8
8
import java .util .regex .Matcher ;
9
9
import java .util .regex .Pattern ;
10
10
11
+ import edu .stanford .nlp .ling .AnnotationLookup ;
11
12
import edu .stanford .nlp .ling .IndexedWord ;
12
13
import edu .stanford .nlp .semgraph .SemanticGraph ;
13
14
import edu .stanford .nlp .semgraph .SemanticGraphEdge ;
@@ -32,6 +33,11 @@ public class NodePattern extends SemgrexPattern {
32
33
* Otherwise, the type will be a Pattern, and you must use Pattern.matches().
33
34
*/
34
35
private final List <Attribute > attributes ;
36
+ /**
37
+ * Attributes which represent Maps (eg CoNLLUFeats)
38
+ * and only partial matches are necessary
39
+ */
40
+ private final List <Pair <String , Attribute >> partialAttributes ;
35
41
private final boolean isRoot ;
36
42
private final boolean isLink ;
37
43
private final boolean isEmpty ;
@@ -58,6 +64,9 @@ public NodePattern(GraphRelation r, boolean negDesc,
58
64
// order the attributes so that the pattern stays the same when
59
65
// printing a compiled pattern
60
66
this .attributes = new ArrayList <>();
67
+ // same with partial attributes
68
+ this .partialAttributes = new ArrayList <>();
69
+
61
70
descString = "{" ;
62
71
for (Triple <String , String , Boolean > entry : attrs .attributes ()) {
63
72
if (!descString .equals ("{" ))
@@ -70,23 +79,7 @@ public NodePattern(GraphRelation r, boolean negDesc,
70
79
if (value .equals ("__" )) {
71
80
attributes .add (new Attribute (key , true , true , negated ));
72
81
} else if (value .matches ("/.*/" )) {
73
- boolean isRegexp = false ;
74
- for (int i = 1 ; i < value .length () - 1 ; ++i ) {
75
- char chr = value .charAt (i );
76
- if ( !( (chr >= 'A' && chr <= 'Z' ) || (chr >= 'a' && chr <= 'z' ) || (chr >= '0' && chr <= '9' ) ) ) {
77
- isRegexp = true ;
78
- break ;
79
- }
80
- }
81
- String patternContent = value .substring (1 , value .length () - 1 );
82
- if (isRegexp ) {
83
- attributes .add (new Attribute (key ,
84
- Pattern .compile (patternContent ),
85
- Pattern .compile (patternContent , Pattern .CASE_INSENSITIVE |Pattern .UNICODE_CASE ),
86
- negated ));
87
- } else {
88
- attributes .add (new Attribute (key , patternContent , patternContent , negated ));
89
- }
82
+ attributes .add (buildRegexAttribute (key , value , negated ));
90
83
} else { // raw description
91
84
attributes .add (new Attribute (key , value , value , negated ));
92
85
}
@@ -98,6 +91,33 @@ public NodePattern(GraphRelation r, boolean negDesc,
98
91
}
99
92
}
100
93
94
+ for (Triple <String , String , String > entry : attrs .contains ()) {
95
+ String annotation = entry .first ();
96
+ String key = entry .second ();
97
+ String value = entry .third ();
98
+
99
+ Class <?> clazz = AnnotationLookup .getValueType (AnnotationLookup .toCoreKey (annotation ));
100
+ boolean isMap = clazz != null && Map .class .isAssignableFrom (clazz );
101
+ if (!isMap ) {
102
+ throw new SemgrexParseException ("Cannot process a single key/value from annotation " + annotation + " as it is not a Map" );
103
+ }
104
+
105
+ final Attribute attr ;
106
+ // Add the attributes for this key
107
+ if (value .equals ("__" )) {
108
+ attr = new Attribute (key , true , true , false );
109
+ } else if (value .matches ("/.*/" )) {
110
+ attr = buildRegexAttribute (key , value , false );
111
+ } else { // raw description
112
+ attr = new Attribute (key , value , value , false );
113
+ }
114
+ partialAttributes .add (new Pair <>(annotation , attr ));
115
+
116
+ if (!descString .equals ("{" ))
117
+ descString += ";" ;
118
+ descString += (annotation + "@" + key + "=" + value );
119
+ }
120
+
101
121
if (attrs .root ()) {
102
122
if (!descString .equals ("{" ))
103
123
descString += ";" ;
@@ -118,6 +138,30 @@ public NodePattern(GraphRelation r, boolean negDesc,
118
138
this .variableGroups = Collections .unmodifiableList (variableGroups );
119
139
}
120
140
141
+ /**
142
+ * Tests the value to see if it's really a regex, or just a string wrapped in regex.
143
+ * Return an Attribute which matches this expression
144
+ */
145
+ private Attribute buildRegexAttribute (String key , String value , boolean negated ) {
146
+ boolean isRegexp = false ;
147
+ for (int i = 1 ; i < value .length () - 1 ; ++i ) {
148
+ char chr = value .charAt (i );
149
+ if ( !( (chr >= 'A' && chr <= 'Z' ) || (chr >= 'a' && chr <= 'z' ) || (chr >= '0' && chr <= '9' ) ) ) {
150
+ isRegexp = true ;
151
+ break ;
152
+ }
153
+ }
154
+ String patternContent = value .substring (1 , value .length () - 1 );
155
+ if (isRegexp ) {
156
+ return new Attribute (key ,
157
+ Pattern .compile (patternContent ),
158
+ Pattern .compile (patternContent , Pattern .CASE_INSENSITIVE |Pattern .UNICODE_CASE ),
159
+ negated );
160
+ } else {
161
+ return new Attribute (key , patternContent , patternContent , negated );
162
+ }
163
+ }
164
+
121
165
private boolean checkMatch (Attribute attr , boolean ignoreCase , String nodeValue ) {
122
166
if (nodeValue == null ) {
123
167
// treat non-existent attributes has having matched a negated expression
@@ -189,6 +233,29 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
189
233
return negDesc ;
190
234
}
191
235
}
236
+ for (Pair <String , Attribute > partialAttribute : partialAttributes ) {
237
+ String annotation = partialAttribute .first ();
238
+ Attribute attr = partialAttribute .second ();
239
+
240
+ Class clazz = Env .lookupAnnotationKey (env , annotation );
241
+ Object rawmap = node .get (clazz );
242
+ // if the map is null, it can't possibly match...
243
+ if (rawmap == null ) {
244
+ return negDesc ;
245
+ }
246
+ if (!(rawmap instanceof Map ))
247
+ throw new RuntimeException ("Can only use partial attributes with Maps... this should have been checked at creation time!" );
248
+ Map <String , ?> map = (Map ) rawmap ;
249
+
250
+ // TODO: allow for regex match on the keys?
251
+ Object value = map .get (attr .key );
252
+ final String nodeValue = (value == null ) ? null : value .toString ();
253
+ boolean matches = checkMatch (attr , ignoreCase , nodeValue );
254
+ if (!matches ) {
255
+ return negDesc ;
256
+ }
257
+ }
258
+
192
259
// System.out.println("matches");
193
260
// System.out.println("");
194
261
return !negDesc ;
0 commit comments