@@ -101,10 +101,20 @@ public String printSemanticGraph(SemanticGraph basicSg, SemanticGraph enhancedSg
101
101
102
102
// don't use after() directly; it returns a default of ""
103
103
if (token .get (CoreAnnotations .AfterAnnotation .class ) != null && token .after ().equals ("" )) {
104
- if (misc .equals ("_" )) {
105
- misc = "SpaceAfter=No" ;
106
- } else {
107
- misc = misc + "|SpaceAfter=No" ;
104
+ IndexedWord nextVertex = tokenSg .getNodeByIndex (token .index () + 1 );
105
+ // the next word needs to exist and be part of the same MWT
106
+ // and either this word is the start of the MWT
107
+ // or this word is the middle of the same MWT as the next word
108
+ // if that is true, we will skip the SpaceAfter annotation
109
+ boolean inMWT = ((nextVertex != null && isMWTbutNotStart (nextVertex )) &&
110
+ ((token .containsKey (CoreAnnotations .IsFirstWordOfMWTAnnotation .class ) && token .get (CoreAnnotations .IsFirstWordOfMWTAnnotation .class )) ||
111
+ (isMWTbutNotStart (token ))));
112
+ if (!inMWT ) {
113
+ if (misc .equals ("_" )) {
114
+ misc = "SpaceAfter=No" ;
115
+ } else {
116
+ misc = misc + "|SpaceAfter=No" ;
117
+ }
108
118
}
109
119
}
110
120
@@ -151,19 +161,29 @@ public static void printSpan(StringBuilder sb, AbstractCoreLabel token) {
151
161
}
152
162
}
153
163
164
+ /**
165
+ * Is the word part of an MWT, but not the start?
166
+ */
167
+ public static boolean isMWTbutNotStart (IndexedWord nextVertex ) {
168
+ if (nextVertex .containsKey (CoreAnnotations .IsFirstWordOfMWTAnnotation .class ) &&
169
+ nextVertex .get (CoreAnnotations .IsFirstWordOfMWTAnnotation .class )) {
170
+ return false ;
171
+ }
172
+ if (!nextVertex .containsKey (CoreAnnotations .IsMultiWordTokenAnnotation .class ) ||
173
+ !nextVertex .get (CoreAnnotations .IsMultiWordTokenAnnotation .class )) {
174
+ return false ;
175
+ }
176
+ return true ;
177
+ }
178
+
154
179
public static void printMWT (StringBuilder sb , SemanticGraph graph , IndexedWord token ) {
155
180
int startIndex = token .index ();
156
181
int endIndex = startIndex ;
157
182
// advance endIndex until we reach the end of the sentence, the start of the next MWT,
158
183
// or a word which isn't part of any MWT
159
184
IndexedWord nextVertex ;
160
185
while ((nextVertex = graph .getNodeByIndex (endIndex +1 )) != null ) {
161
- if (nextVertex .containsKey (CoreAnnotations .IsFirstWordOfMWTAnnotation .class ) &&
162
- nextVertex .get (CoreAnnotations .IsFirstWordOfMWTAnnotation .class )) {
163
- break ;
164
- }
165
- if (!nextVertex .containsKey (CoreAnnotations .IsMultiWordTokenAnnotation .class ) ||
166
- !nextVertex .get (CoreAnnotations .IsMultiWordTokenAnnotation .class )) {
186
+ if (!isMWTbutNotStart (nextVertex )) {
167
187
break ;
168
188
}
169
189
++endIndex ;
0 commit comments