Skip to content

Commit cb338cd

Browse files
committed
Add a FLAT relation for phrases such as 'en masse' which aren't considered a FIXED or MWT expression. Addresses a tiny part of UniversalDependencies/docs#717
1 parent bc4acf1 commit cb338cd

4 files changed

+28
-2
lines changed

src/edu/stanford/nlp/trees/CoordinationTransformer.java

+19-2
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,11 @@ public Tree transformTree(Tree t) {
105105
log.info("After MWETransform: " + t);
106106
}
107107

108+
t = MWFlatTransform(t);
109+
if (VERBOSE) {
110+
log.info("After MWFlatTransform: " + t);
111+
}
112+
108113
t = prepCCTransform(t);
109114
if (VERBOSE) {
110115
log.info("After prepCCTransform: " + t);
@@ -688,7 +693,6 @@ private static Tree findCCparent(Tree t, Tree root) {
688693
TregexPattern.compile("@QP|XS < ((JJR|RBR||RB|RP|IN=node1 < /^(?i)(up)$/) $+ (IN|TO=node2 < /^(?i)to$/))"), // up to
689694
TregexPattern.compile("@QP < ((JJR|RBR|RB|RP|IN=node1 < /^(?i)up$/) $+ (IN|TO=node2 < /^(?i)to$/))"), //up to
690695
TregexPattern.compile("@S|SQ|VP|ADVP|PP < (@ADVP < ((IN|RB=node1 < /^(?i)at$/) $+ (JJS|RBS=node2 < /^(?i)least$/)) !$+ (RB < /(?i)(once|twice)/))"), //at least
691-
692696
};
693697

694698
private static final TsurgeonPattern MWE_OPERATION = Tsurgeon.parseOperation("[createSubtree MWE node1 node2] [if exists node3 move node3 $- node2]");
@@ -727,7 +731,20 @@ public static Tree MWETransform(Tree t) {
727731
return t;
728732
}
729733

730-
734+
private static final TregexPattern[] MW_FLAT_PATTERNS = {
735+
TregexPattern.compile("@NP|ADVP <... {(__=node1 < /^(?i)en$/); (__=node2 < /^(?i)masse$/)}"), // en masse, which is tagged in different ways in PTB
736+
};
737+
738+
private static final TsurgeonPattern MW_FLAT_OPERATION = Tsurgeon.parseOperation("[createSubtree FLAT node1 node2] [if exists node3 move node3 $- node2]");
739+
740+
public static Tree MWFlatTransform(Tree t) {
741+
for (TregexPattern p : MW_FLAT_PATTERNS) {
742+
Tsurgeon.processPattern(p, MW_FLAT_OPERATION, t);
743+
}
744+
745+
return t;
746+
}
747+
731748
private static final TregexPattern FLAT_PREP_CC_PATTERN = TregexPattern.compile("PP <, (/^(IN|TO)$/=p1 $+ (CC=cc $+ /^(IN|TO)$/=p2))");
732749
private static final TsurgeonPattern FLAT_PREP_CC_OPERATION = Tsurgeon.parseOperation("[createSubtree PCONJP p1 cc] [move p2 $- cc]");
733750

src/edu/stanford/nlp/trees/GrammaticalStructure.java

+2
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ public GrammaticalStructure(Tree t, Collection<GrammaticalRelation> relations,
164164
// avoiding a wasteful copy of the labels.
165165
Trees.setLeafLabels(treeGraph, t.yield());
166166
Trees.setLeafTagsIfUnset(treeGraph);
167+
//System.out.println(treeGraph.toPrettyString(2));
167168
if (transformer != null) {
168169
Tree transformed = transformer.transformTree(treeGraph);
169170
if (!(transformed instanceof TreeGraphNode)) {
@@ -173,6 +174,7 @@ public GrammaticalStructure(Tree t, Collection<GrammaticalRelation> relations,
173174
} else {
174175
this.root = treeGraph;
175176
}
177+
//System.out.println(this.root.toPrettyString(2));
176178
indexNodes(this.root);
177179
// add head word and tag to phrase nodes
178180
if (hf == null) {

src/edu/stanford/nlp/trees/UniversalEnglishGrammaticalRelations.java

+6
Original file line numberDiff line numberDiff line change
@@ -1235,6 +1235,11 @@ private UniversalEnglishGrammaticalRelations() {}
12351235
MODIFIER, "MWE", tregexCompiler,
12361236
"MWE < (IN|TO|RB|NP|NN|JJ|VB|CC|VBZ|VBD|ADVP|PP|JJS|RBS=target)");
12371237

1238+
public static final GrammaticalRelation FLAT_EXPRESSION =
1239+
new GrammaticalRelation(Language.UniversalEnglish, "flat", "flat expression",
1240+
MODIFIER, "FLAT", tregexCompiler,
1241+
"FLAT < (IN|FW=target)");
1242+
12381243
/**
12391244
* The "determiner" grammatical relation.
12401245
* <br>
@@ -1613,6 +1618,7 @@ private UniversalEnglishGrammaticalRelations() {}
16131618
ADVERBIAL_MODIFIER,
16141619
NEGATION_MODIFIER,
16151620
MULTI_WORD_EXPRESSION,
1621+
FLAT_EXPRESSION,
16161622
DETERMINER,
16171623
PREDETERMINER,
16181624
PRECONJUNCT,

src/edu/stanford/nlp/trees/UniversalSemanticHeadFinder.java

+1
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ private void ruleChanges() {
194194

195195
// Special constituent for multi-word expressions
196196
nonTerminalInfo.put("MWE", new String[][]{{"left"}});
197+
nonTerminalInfo.put("FLAT", new String[][]{{"left"}});
197198

198199
nonTerminalInfo.put("PCONJP", new String[][]{{"left"}});
199200

0 commit comments

Comments
 (0)