5
5
import java .util .Iterator ;
6
6
import java .util .List ;
7
7
import java .util .Map ;
8
+ import java .util .TreeMap ;
8
9
import java .util .regex .Matcher ;
9
10
import java .util .regex .Pattern ;
10
11
@@ -32,6 +33,11 @@ public class NodePattern extends SemgrexPattern {
32
33
* Otherwise, the type will be a Pattern, and you must use Pattern.matches().
33
34
*/
34
35
private final List <Attribute > attributes ;
36
+ /**
37
+ * Attributes which represent Maps (eg CoNLLUFeats)
38
+ * and only partial matches are necessary
39
+ */
40
+ private final Map <String , Attribute > partialAttributes ;
35
41
private final boolean isRoot ;
36
42
private final boolean isLink ;
37
43
private final boolean isEmpty ;
@@ -58,6 +64,9 @@ public NodePattern(GraphRelation r, boolean negDesc,
58
64
// order the attributes so that the pattern stays the same when
59
65
// printing a compiled pattern
60
66
this .attributes = new ArrayList <>();
67
+ // same with partial attributes - use a TreeMap to keep things in order
68
+ this .partialAttributes = new TreeMap <>();
69
+
61
70
descString = "{" ;
62
71
for (Triple <String , String , Boolean > entry : attrs .attributes ()) {
63
72
if (!descString .equals ("{" ))
@@ -70,23 +79,7 @@ public NodePattern(GraphRelation r, boolean negDesc,
70
79
if (value .equals ("__" )) {
71
80
attributes .add (new Attribute (key , true , true , negated ));
72
81
} else if (value .matches ("/.*/" )) {
73
- boolean isRegexp = false ;
74
- for (int i = 1 ; i < value .length () - 1 ; ++i ) {
75
- char chr = value .charAt (i );
76
- if ( !( (chr >= 'A' && chr <= 'Z' ) || (chr >= 'a' && chr <= 'z' ) || (chr >= '0' && chr <= '9' ) ) ) {
77
- isRegexp = true ;
78
- break ;
79
- }
80
- }
81
- String patternContent = value .substring (1 , value .length () - 1 );
82
- if (isRegexp ) {
83
- attributes .add (new Attribute (key ,
84
- Pattern .compile (patternContent ),
85
- Pattern .compile (patternContent , Pattern .CASE_INSENSITIVE |Pattern .UNICODE_CASE ),
86
- negated ));
87
- } else {
88
- attributes .add (new Attribute (key , patternContent , patternContent , negated ));
89
- }
82
+ attributes .add (buildRegexAttribute (key , value , negated ));
90
83
} else { // raw description
91
84
attributes .add (new Attribute (key , value , value , negated ));
92
85
}
@@ -98,6 +91,25 @@ public NodePattern(GraphRelation r, boolean negDesc,
98
91
}
99
92
}
100
93
94
+ for (Triple <String , String , String > entry : attrs .contains ()) {
95
+ String annotation = entry .first ();
96
+ String key = entry .second ();
97
+ String value = entry .third ();
98
+
99
+ // Add the attributes for this key
100
+ if (value .equals ("__" )) {
101
+ partialAttributes .put (annotation , new Attribute (key , true , true , false ));
102
+ } else if (value .matches ("/.*/" )) {
103
+ partialAttributes .put (annotation , buildRegexAttribute (key , value , false ));
104
+ } else { // raw description
105
+ partialAttributes .put (annotation , new Attribute (key , value , value , false ));
106
+ }
107
+
108
+ if (!descString .equals ("{" ))
109
+ descString += ";" ;
110
+ descString += (annotation + "@" + key + "=" + value );
111
+ }
112
+
101
113
if (attrs .root ()) {
102
114
if (!descString .equals ("{" ))
103
115
descString += ";" ;
@@ -118,6 +130,30 @@ public NodePattern(GraphRelation r, boolean negDesc,
118
130
this .variableGroups = Collections .unmodifiableList (variableGroups );
119
131
}
120
132
133
+ /**
134
+ * Tests the value to see if it's really a regex, or just a string wrapped in regex.
135
+ * Return an Attribute which matches this expression
136
+ */
137
+ private Attribute buildRegexAttribute (String key , String value , boolean negated ) {
138
+ boolean isRegexp = false ;
139
+ for (int i = 1 ; i < value .length () - 1 ; ++i ) {
140
+ char chr = value .charAt (i );
141
+ if ( !( (chr >= 'A' && chr <= 'Z' ) || (chr >= 'a' && chr <= 'z' ) || (chr >= '0' && chr <= '9' ) ) ) {
142
+ isRegexp = true ;
143
+ break ;
144
+ }
145
+ }
146
+ String patternContent = value .substring (1 , value .length () - 1 );
147
+ if (isRegexp ) {
148
+ return new Attribute (key ,
149
+ Pattern .compile (patternContent ),
150
+ Pattern .compile (patternContent , Pattern .CASE_INSENSITIVE |Pattern .UNICODE_CASE ),
151
+ negated );
152
+ } else {
153
+ return new Attribute (key , patternContent , patternContent , negated );
154
+ }
155
+ }
156
+
121
157
@ SuppressWarnings ("unchecked" )
122
158
public boolean nodeAttrMatch (IndexedWord node , final SemanticGraph sg , boolean ignoreCase ) {
123
159
// System.out.println(node.word());
0 commit comments