Skip to content

Commit 6245acc

Browse files
committed
Also flatten combined RB or ADVP phrases
1 parent 8b2b668 commit 6245acc

File tree

1 file changed

+15
-1
lines changed

1 file changed

+15
-1
lines changed

src/edu/stanford/nlp/trees/QPTreeTransformer.java

+15-1
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,25 @@ public Tree transformTree(Tree t) {
100100
private static final TsurgeonPattern splitMoneyTsurgeon =
101101
Tsurgeon.parseOperation("createSubtree QP left right");
102102

103+
// This fixes a very rare subset of parses
104+
// such as "(NP (QP just about all) the losses) ..."
105+
// in fact, that's the only example in ptb3-revised
106+
// because of previous MWE combinations, we may already get
107+
// "(NP (QP at least a) day)"
108+
// -> "(NP (QP (ADVP at least) a) day)"
109+
// and therefore the flattenAdvmodTsurgeon will also find that parse
110+
private static final TregexPattern groupADVPTregex =
111+
TregexPattern.compile("NP < (QP <1 RB=first <2 RB=second <3 (DT !$+ __) $++ /^N/)");
112+
113+
private static final TsurgeonPattern groupADVPTsurgeon =
114+
Tsurgeon.parseOperation("createSubtree ADVP first second");
115+
103116
// Remove QP in a structure such as
104117
// (NP (QP nearly_RB all_DT) stuff_NN)
105118
// so that the converter can attach both `nearly` and `all` to `stuff`
106119
// not using a nummod, either, which is kind of annoying
107120
private static final TregexPattern flattenAdvmodTregex =
108-
TregexPattern.compile("NP < (QP=remove <1 RB <2 (DT !$+ __) $++ /^N/)");
121+
TregexPattern.compile("NP < (QP=remove <1 ADVP|RB <2 (DT !$+ __) $++ /^N/)");
109122

110123
private static final TsurgeonPattern flattenAdvmodTsurgeon =
111124
Tsurgeon.parseOperation("excise remove remove");
@@ -131,6 +144,7 @@ public Tree QPtransform(Tree t) {
131144
}
132145
t = Tsurgeon.processPattern(splitCCTregex, splitCCTsurgeon, t);
133146
t = Tsurgeon.processPattern(splitMoneyTregex, splitMoneyTsurgeon, t);
147+
t = Tsurgeon.processPattern(groupADVPTregex, groupADVPTsurgeon, t);
134148
t = Tsurgeon.processPattern(flattenAdvmodTregex, flattenAdvmodTsurgeon, t);
135149
return t;
136150
}

0 commit comments

Comments
 (0)