Skip to content

Commit 1d96e04

Browse files
committed
Parse a node containment option... need to process it in the NodePattern still
Keep track of the partial attributes. Still need to actually check them...
1 parent bff3588 commit 1d96e04

File tree

4 files changed

+181
-69
lines changed

4 files changed

+181
-69
lines changed

src/edu/stanford/nlp/semgraph/semgrex/NodeAttributes.java

+16
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,20 @@ public class NodeAttributes {
2626
// String, String, Boolean: key, value, negated
2727
private List<Triple<String, String, Boolean>> attributes;
2828
private Set<String> positiveAttributes;
29+
// Some annotations, especially morpho freatures (CoreAnnotations.CoNLLUFeats)
30+
// are represented by Maps. In some cases it will be easier to search
31+
// for individual elements of that map rather than turn the map into a string
32+
// and search on its contents that way. This is especially true since there
33+
// is no guarantee the map will be in a consistent order.
34+
// String, String, String: node attribute for a map (such as CoNLLUFeats), key in that map, value to match
35+
private List<Triple<String, String, String>> contains;
2936

3037
public NodeAttributes() {
3138
root = false;
3239
empty = false;
3340
attributes = new ArrayList<>();
3441
positiveAttributes = new HashSet<>();
42+
contains = new ArrayList<>();
3543
}
3644

3745
public void setRoot(boolean root) {
@@ -60,7 +68,15 @@ public void setAttribute(String key, String value, boolean negated) {
6068
attributes.add(new Triple(key, value, negated));
6169
}
6270

71+
public void addContains(String annotation, String key, String value) {
72+
contains.add(new Triple(annotation, key, value));
73+
}
74+
6375
public List<Triple<String, String, Boolean>> attributes() {
6476
return Collections.unmodifiableList(attributes);
6577
}
78+
79+
public List<Triple<String, String, String>> contains() {
80+
return Collections.unmodifiableList(contains);
81+
}
6682
}

src/edu/stanford/nlp/semgraph/semgrex/NodePattern.java

+84-35
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ public class NodePattern extends SemgrexPattern {
3232
* Otherwise, the type will be a Pattern, and you must use Pattern.matches().
3333
*/
3434
private final List<Attribute> attributes;
35+
/**
36+
* Attributes which represent Maps (eg CoNLLUFeats)
37+
* and only partial matches are necessary
38+
*/
39+
private final List<Pair<String, Attribute>> partialAttributes;
3540
private final boolean isRoot;
3641
private final boolean isLink;
3742
private final boolean isEmpty;
@@ -58,6 +63,9 @@ public NodePattern(GraphRelation r, boolean negDesc,
5863
// order the attributes so that the pattern stays the same when
5964
// printing a compiled pattern
6065
this.attributes = new ArrayList<>();
66+
// same with partial attributes
67+
this.partialAttributes = new ArrayList<>();
68+
6169
descString = "{";
6270
for (Triple<String, String, Boolean> entry : attrs.attributes()) {
6371
if (!descString.equals("{"))
@@ -70,23 +78,7 @@ public NodePattern(GraphRelation r, boolean negDesc,
7078
if (value.equals("__")) {
7179
attributes.add(new Attribute(key, true, true, negated));
7280
} else if (value.matches("/.*/")) {
73-
boolean isRegexp = false;
74-
for (int i = 1; i < value.length() - 1; ++i) {
75-
char chr = value.charAt(i);
76-
if ( !( (chr >= 'A' && chr <= 'Z') || (chr >= 'a' && chr <= 'z') || (chr >= '0' && chr <= '9') ) ) {
77-
isRegexp = true;
78-
break;
79-
}
80-
}
81-
String patternContent = value.substring(1, value.length() - 1);
82-
if (isRegexp) {
83-
attributes.add(new Attribute(key,
84-
Pattern.compile(patternContent),
85-
Pattern.compile(patternContent, Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE),
86-
negated));
87-
} else {
88-
attributes.add(new Attribute(key, patternContent, patternContent, negated));
89-
}
81+
attributes.add(buildRegexAttribute(key, value, negated));
9082
} else { // raw description
9183
attributes.add(new Attribute(key, value, value, negated));
9284
}
@@ -98,6 +90,27 @@ public NodePattern(GraphRelation r, boolean negDesc,
9890
}
9991
}
10092

93+
for (Triple<String, String, String> entry : attrs.contains()) {
94+
String annotation = entry.first();
95+
String key = entry.second();
96+
String value = entry.third();
97+
98+
final Attribute attr;
99+
// Add the attributes for this key
100+
if (value.equals("__")) {
101+
attr = new Attribute(key, true, true, false);
102+
} else if (value.matches("/.*/")) {
103+
attr = buildRegexAttribute(key, value, false);
104+
} else { // raw description
105+
attr = new Attribute(key, value, value, false);
106+
}
107+
partialAttributes.add(new Pair<>(annotation, attr));
108+
109+
if (!descString.equals("{"))
110+
descString += ";";
111+
descString += (annotation + "@" + key + "=" + value);
112+
}
113+
101114
if (attrs.root()) {
102115
if (!descString.equals("{"))
103116
descString += ";";
@@ -118,6 +131,53 @@ public NodePattern(GraphRelation r, boolean negDesc,
118131
this.variableGroups = Collections.unmodifiableList(variableGroups);
119132
}
120133

134+
/**
135+
* Tests the value to see if it's really a regex, or just a string wrapped in regex.
136+
* Return an Attribute which matches this expression
137+
*/
138+
private Attribute buildRegexAttribute(String key, String value, boolean negated) {
139+
boolean isRegexp = false;
140+
for (int i = 1; i < value.length() - 1; ++i) {
141+
char chr = value.charAt(i);
142+
if ( !( (chr >= 'A' && chr <= 'Z') || (chr >= 'a' && chr <= 'z') || (chr >= '0' && chr <= '9') ) ) {
143+
isRegexp = true;
144+
break;
145+
}
146+
}
147+
String patternContent = value.substring(1, value.length() - 1);
148+
if (isRegexp) {
149+
return new Attribute(key,
150+
Pattern.compile(patternContent),
151+
Pattern.compile(patternContent, Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE),
152+
negated);
153+
} else {
154+
return new Attribute(key, patternContent, patternContent, negated);
155+
}
156+
}
157+
158+
private boolean checkMatch(Attribute attr, boolean ignoreCase, String nodeValue) {
159+
boolean matches;
160+
161+
Object toMatch = ignoreCase ? attr.caseless : attr.cased;
162+
if (toMatch instanceof Boolean) {
163+
matches = ((Boolean) toMatch);
164+
} else if (toMatch instanceof String) {
165+
if (ignoreCase) {
166+
matches = nodeValue.equalsIgnoreCase(toMatch.toString());
167+
} else {
168+
matches = nodeValue.equals(toMatch.toString());
169+
}
170+
} else if (toMatch instanceof Pattern) {
171+
matches = ((Pattern) toMatch).matcher(nodeValue).matches();
172+
} else {
173+
throw new IllegalStateException("Unknown matcher type: " + toMatch + " (of class + " + toMatch.getClass() + ")");
174+
}
175+
if (attr.negated) {
176+
matches = !matches;
177+
}
178+
return matches;
179+
}
180+
121181
@SuppressWarnings("unchecked")
122182
public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean ignoreCase) {
123183
// System.out.println(node.word());
@@ -156,31 +216,20 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
156216
return negDesc;
157217

158218
// Get the node pattern
159-
Object toMatch = ignoreCase ? attr.caseless : attr.cased;
160-
boolean matches;
161-
if (toMatch instanceof Boolean) {
162-
matches = ((Boolean) toMatch);
163-
} else if (toMatch instanceof String) {
164-
if (ignoreCase) {
165-
matches = nodeValue.equalsIgnoreCase(toMatch.toString());
166-
} else {
167-
matches = nodeValue.equals(toMatch.toString());
168-
}
169-
} else if (toMatch instanceof Pattern) {
170-
matches = ((Pattern) toMatch).matcher(nodeValue).matches();
171-
} else {
172-
throw new IllegalStateException("Unknown matcher type: " + toMatch + " (of class + " + toMatch.getClass() + ")");
173-
}
174-
if (attr.negated) {
175-
matches = !matches;
176-
}
219+
boolean matches = checkMatch(attr, ignoreCase, nodeValue);
177220

178221
if (!matches) {
179222
// System.out.println("doesn't match");
180223
// System.out.println("");
181224
return negDesc;
182225
}
183226
}
227+
for (Pair<String, Attribute> partialAttribute : partialAttributes) {
228+
String annotation = partialAttribute.first();
229+
Attribute attr = partialAttribute.second();
230+
// TODO
231+
}
232+
184233
// System.out.println("matches");
185234
// System.out.println("");
186235
return !negDesc;

0 commit comments

Comments
 (0)