@@ -100,12 +100,25 @@ public Tree transformTree(Tree t) {
100
100
private static final TsurgeonPattern splitMoneyTsurgeon =
101
101
Tsurgeon .parseOperation ("createSubtree QP left right" );
102
102
103
+ // This fixes a very rare subset of parses
104
+ // such as "(NP (QP just about all) the losses) ..."
105
+ // in fact, that's the only example in ptb3-revised
106
+ // because of previous MWE combinations, we may already get
107
+ // "(NP (QP at least a) day)"
108
+ // -> "(NP (QP (ADVP at least) a) day)"
109
+ // and therefore the flattenAdvmodTsurgeon will also find that parse
110
+ private static final TregexPattern groupADVPTregex =
111
+ TregexPattern .compile ("NP < (QP <1 RB=first <2 RB=second <3 (DT !$+ __) $++ /^N/)" );
112
+
113
+ private static final TsurgeonPattern groupADVPTsurgeon =
114
+ Tsurgeon .parseOperation ("createSubtree ADVP first second" );
115
+
103
116
// Remove QP in a structure such as
104
117
// (NP (QP nearly_RB all_DT) stuff_NN)
105
118
// so that the converter can attach both `nearly` and `all` to `stuff`
106
119
// not using a nummod, either, which is kind of annoying
107
120
private static final TregexPattern flattenAdvmodTregex =
108
- TregexPattern .compile ("NP < (QP=remove <1 RB <2 (DT !$+ __) $++ /^N/)" );
121
+ TregexPattern .compile ("NP < (QP=remove <1 ADVP| RB <2 (DT !$+ __) $++ /^N/)" );
109
122
110
123
private static final TsurgeonPattern flattenAdvmodTsurgeon =
111
124
Tsurgeon .parseOperation ("excise remove remove" );
@@ -131,6 +144,7 @@ public Tree QPtransform(Tree t) {
131
144
}
132
145
t = Tsurgeon .processPattern (splitCCTregex , splitCCTsurgeon , t );
133
146
t = Tsurgeon .processPattern (splitMoneyTregex , splitMoneyTsurgeon , t );
147
+ t = Tsurgeon .processPattern (groupADVPTregex , groupADVPTsurgeon , t );
134
148
t = Tsurgeon .processPattern (flattenAdvmodTregex , flattenAdvmodTsurgeon , t );
135
149
return t ;
136
150
}
0 commit comments