@@ -105,6 +105,11 @@ public Tree transformTree(Tree t) {
105
105
log .info ("After MWETransform: " + t );
106
106
}
107
107
108
+ t = MWFlatTransform (t );
109
+ if (VERBOSE ) {
110
+ log .info ("After MWFlatTransform: " + t );
111
+ }
112
+
108
113
t = prepCCTransform (t );
109
114
if (VERBOSE ) {
110
115
log .info ("After prepCCTransform: " + t );
@@ -688,7 +693,6 @@ private static Tree findCCparent(Tree t, Tree root) {
688
693
TregexPattern .compile ("@QP|XS < ((JJR|RBR||RB|RP|IN=node1 < /^(?i)(up)$/) $+ (IN|TO=node2 < /^(?i)to$/))" ), // up to
689
694
TregexPattern .compile ("@QP < ((JJR|RBR|RB|RP|IN=node1 < /^(?i)up$/) $+ (IN|TO=node2 < /^(?i)to$/))" ), //up to
690
695
TregexPattern .compile ("@S|SQ|VP|ADVP|PP < (@ADVP < ((IN|RB=node1 < /^(?i)at$/) $+ (JJS|RBS=node2 < /^(?i)least$/)) !$+ (RB < /(?i)(once|twice)/))" ), //at least
691
-
692
696
};
693
697
694
698
private static final TsurgeonPattern MWE_OPERATION = Tsurgeon .parseOperation ("[createSubtree MWE node1 node2] [if exists node3 move node3 $- node2]" );
@@ -727,7 +731,20 @@ public static Tree MWETransform(Tree t) {
727
731
return t ;
728
732
}
729
733
730
-
734
+ private static final TregexPattern [] MW_FLAT_PATTERNS = {
735
+ TregexPattern .compile ("@NP|ADVP <... {(__=node1 < /^(?i)en$/); (__=node2 < /^(?i)masse$/)}" ), // en masse, which is tagged in different ways in PTB
736
+ };
737
+
738
+ private static final TsurgeonPattern MW_FLAT_OPERATION = Tsurgeon .parseOperation ("[createSubtree FLAT node1 node2] [if exists node3 move node3 $- node2]" );
739
+
740
+ public static Tree MWFlatTransform (Tree t ) {
741
+ for (TregexPattern p : MW_FLAT_PATTERNS ) {
742
+ Tsurgeon .processPattern (p , MW_FLAT_OPERATION , t );
743
+ }
744
+
745
+ return t ;
746
+ }
747
+
731
748
private static final TregexPattern FLAT_PREP_CC_PATTERN = TregexPattern .compile ("PP <, (/^(IN|TO)$/=p1 $+ (CC=cc $+ /^(IN|TO)$/=p2))" );
732
749
private static final TsurgeonPattern FLAT_PREP_CC_OPERATION = Tsurgeon .parseOperation ("[createSubtree PCONJP p1 cc] [move p2 $- cc]" );
733
750
0 commit comments