Skip to content

Commit 436b564

Browse files
committed
Keep track of the partial attributes. Still need to actually check them...
1 parent 3dc9990 commit 436b564

File tree

2 files changed

+58
-17
lines changed

2 files changed

+58
-17
lines changed

src/edu/stanford/nlp/semgraph/semgrex/NodeAttributes.java

+5
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ public NodeAttributes() {
3939
empty = false;
4040
attributes = new ArrayList<>();
4141
positiveAttributes = new HashSet<>();
42+
contains = new ArrayList<>();
4243
}
4344

4445
public void setRoot(boolean root) {
@@ -74,4 +75,8 @@ public void addContains(String annotation, String key, String value) {
7475
public List<Triple<String, String, Boolean>> attributes() {
7576
return Collections.unmodifiableList(attributes);
7677
}
78+
79+
public List<Triple<String, String, String>> contains() {
80+
return Collections.unmodifiableList(contains);
81+
}
7782
}

src/edu/stanford/nlp/semgraph/semgrex/NodePattern.java

+53-17
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import java.util.Iterator;
66
import java.util.List;
77
import java.util.Map;
8+
import java.util.TreeMap;
89
import java.util.regex.Matcher;
910
import java.util.regex.Pattern;
1011

@@ -32,6 +33,11 @@ public class NodePattern extends SemgrexPattern {
3233
* Otherwise, the type will be a Pattern, and you must use Pattern.matches().
3334
*/
3435
private final List<Attribute> attributes;
36+
/**
37+
* Attributes which represent Maps (eg CoNLLUFeats)
38+
* and only partial matches are necessary
39+
*/
40+
private final Map<String, Attribute> partialAttributes;
3541
private final boolean isRoot;
3642
private final boolean isLink;
3743
private final boolean isEmpty;
@@ -58,6 +64,9 @@ public NodePattern(GraphRelation r, boolean negDesc,
5864
// order the attributes so that the pattern stays the same when
5965
// printing a compiled pattern
6066
this.attributes = new ArrayList<>();
67+
// same with partial attributes - use a TreeMap to keep things in order
68+
this.partialAttributes = new TreeMap<>();
69+
6170
descString = "{";
6271
for (Triple<String, String, Boolean> entry : attrs.attributes()) {
6372
if (!descString.equals("{"))
@@ -70,23 +79,7 @@ public NodePattern(GraphRelation r, boolean negDesc,
7079
if (value.equals("__")) {
7180
attributes.add(new Attribute(key, true, true, negated));
7281
} else if (value.matches("/.*/")) {
73-
boolean isRegexp = false;
74-
for (int i = 1; i < value.length() - 1; ++i) {
75-
char chr = value.charAt(i);
76-
if ( !( (chr >= 'A' && chr <= 'Z') || (chr >= 'a' && chr <= 'z') || (chr >= '0' && chr <= '9') ) ) {
77-
isRegexp = true;
78-
break;
79-
}
80-
}
81-
String patternContent = value.substring(1, value.length() - 1);
82-
if (isRegexp) {
83-
attributes.add(new Attribute(key,
84-
Pattern.compile(patternContent),
85-
Pattern.compile(patternContent, Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE),
86-
negated));
87-
} else {
88-
attributes.add(new Attribute(key, patternContent, patternContent, negated));
89-
}
82+
attributes.add(buildRegexAttribute(key, value, negated));
9083
} else { // raw description
9184
attributes.add(new Attribute(key, value, value, negated));
9285
}
@@ -98,6 +91,25 @@ public NodePattern(GraphRelation r, boolean negDesc,
9891
}
9992
}
10093

94+
for (Triple<String, String, String> entry : attrs.contains()) {
95+
String annotation = entry.first();
96+
String key = entry.second();
97+
String value = entry.third();
98+
99+
// Add the attributes for this key
100+
if (value.equals("__")) {
101+
partialAttributes.put(annotation, new Attribute(key, true, true, false));
102+
} else if (value.matches("/.*/")) {
103+
partialAttributes.put(annotation, buildRegexAttribute(key, value, false));
104+
} else { // raw description
105+
partialAttributes.put(annotation, new Attribute(key, value, value, false));
106+
}
107+
108+
if (!descString.equals("{"))
109+
descString += ";";
110+
descString += (annotation + "@" + key + "=" + value);
111+
}
112+
101113
if (attrs.root()) {
102114
if (!descString.equals("{"))
103115
descString += ";";
@@ -118,6 +130,30 @@ public NodePattern(GraphRelation r, boolean negDesc,
118130
this.variableGroups = Collections.unmodifiableList(variableGroups);
119131
}
120132

133+
/**
134+
* Tests the value to see if it's really a regex, or just a string wrapped in regex.
135+
* Return an Attribute which matches this expression
136+
*/
137+
private Attribute buildRegexAttribute(String key, String value, boolean negated) {
138+
boolean isRegexp = false;
139+
for (int i = 1; i < value.length() - 1; ++i) {
140+
char chr = value.charAt(i);
141+
if ( !( (chr >= 'A' && chr <= 'Z') || (chr >= 'a' && chr <= 'z') || (chr >= '0' && chr <= '9') ) ) {
142+
isRegexp = true;
143+
break;
144+
}
145+
}
146+
String patternContent = value.substring(1, value.length() - 1);
147+
if (isRegexp) {
148+
return new Attribute(key,
149+
Pattern.compile(patternContent),
150+
Pattern.compile(patternContent, Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE),
151+
negated);
152+
} else {
153+
return new Attribute(key, patternContent, patternContent, negated);
154+
}
155+
}
156+
121157
@SuppressWarnings("unchecked")
122158
public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean ignoreCase) {
123159
// System.out.println(node.word());

0 commit comments

Comments
 (0)