Skip to content

Commit 30f2f8e

Browse files
committed
Update the UniversalPOSMapper to use AUX for a large chunk of the dependencies by reusing the patterns from UniversalEnglishGrammaticalRelations to find those words. Currently it is finding more than it should, but the error rate is significantly lower than it is without this change
1 parent 7f70ad8 commit 30f2f8e

File tree

3 files changed

+42
-27
lines changed

3 files changed

+42
-27
lines changed

src/edu/stanford/nlp/trees/GrammaticalRelation.java

+4
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,10 @@ private void addChild(GrammaticalRelation child) {
362362
children.add(child);
363363
}
364364

365+
public List<TregexPattern> targetPatterns() {
366+
return Collections.unmodifiableList(targetPatterns);
367+
}
368+
365369
/** Given a {@code Tree} node {@code t}, attempts to
366370
* return a list of nodes to which node {@code t} has this
367371
* grammatical relation, with {@code t} as the governor.

src/edu/stanford/nlp/trees/UniversalEnglishGrammaticalRelations.java

+15-4
Original file line numberDiff line numberDiff line change
@@ -139,14 +139,21 @@ private UniversalEnglishGrammaticalRelations() {}
139139
* <br>
140140
* Example: <br>
141141
* "Reagan has died" &rarr; {@code aux}(died, has)
142+
* <br>
143+
* For any pattern in AUX_MODIFIER, AUX_PASSIVE_MODIFIER, and COPULA
144+
* where the target is not the verb itself, but rather the enclosing
145+
* constituent, there is a tregex named variable:
146+
* =aux
147+
* Please make sure to maintain this. Those tags are used in
148+
* UniversalPOSMapper to update the tags
142149
*/
143150
public static final GrammaticalRelation AUX_MODIFIER =
144151
new GrammaticalRelation(Language.UniversalEnglish, "aux", "auxiliary",
145152
DEPENDENT, "VP|SQ|SINV|CONJP", tregexCompiler,
146153
"VP < VP < (/^(?:MD|VB.*|AUXG?|POS)$/=target)",
147154
"SQ|SINV < (/^(?:VB|MD|AUX)/=target $++ /^(?:VP|ADJP)/)",
148155
// add handling of tricky VP fronting cases...
149-
"SINV < (VP=target < (/^(?:VB|AUX|POS)/ < " + beAuxiliaryRegex + ") $-- (VP < VBG))");
156+
"SINV < (VP=target < (/^(?:VB|AUX|POS)/=aux < " + beAuxiliaryRegex + ") $-- (VP < VBG))");
150157

151158

152159
/**
@@ -156,15 +163,17 @@ private UniversalEnglishGrammaticalRelations() {}
156163
*
157164
* Example: <br>
158165
* "Kennedy has been killed" &rarr; {@code auxpass}(killed, been)
166+
* <br>
167+
* See AUX_MODIFIER for an explanation of the =aux named nodes
159168
*/
160169
public static final GrammaticalRelation AUX_PASSIVE_MODIFIER =
161170
new GrammaticalRelation(Language.UniversalEnglish, "aux:pass", "passive auxiliary",
162171
AUX_MODIFIER, "VP|SQ|SINV", tregexCompiler,
163172
"VP < (/^(?:VB|AUX|POS)/=target < " + passiveAuxWordRegex + " ) < (VP|ADJP [ < VBN|VBD | < (VP|ADJP < VBN|VBD) < CC ] )",
164173
"SQ|SINV < (/^(?:VB|AUX|POS)/=target < " + beAuxiliaryRegex + " $++ (VP < VBD|VBN))",
165174
// add handling of tricky VP fronting cases...
166-
"SINV < (VP=target < (/^(?:VB|AUX|POS)/ < " + beAuxiliaryRegex + ") $-- (VP < VBD|VBN))",
167-
"SINV < (VP=target < (VP < (/^(?:VB|AUX|POS)/ < " + beAuxiliaryRegex + ")) $-- (VP < VBD|VBN))");
175+
"SINV < (VP=target < (/^(?:VB|AUX|POS)/=aux < " + beAuxiliaryRegex + ") $-- (VP < VBD|VBN))",
176+
"SINV < (VP=target < (VP < (/^(?:VB|AUX|POS)/=aux < " + beAuxiliaryRegex + ")) $-- (VP < VBD|VBN))");
168177

169178
/**
170179
* The "copula" grammatical relation. A copula is the relation between
@@ -173,6 +182,8 @@ private UniversalEnglishGrammaticalRelations() {}
173182
* Examples: <br>
174183
* "Bill is big" &rarr; {@code cop}(big, is) <br>
175184
* "Bill is an honest man" &rarr; {@code cop}(man, is)
185+
* <br>
186+
* See AUX_MODIFIER for an explanation of the =aux named nodes
176187
*/
177188
public static final GrammaticalRelation COPULA =
178189
new GrammaticalRelation(Language.UniversalEnglish, "cop", "copula",
@@ -182,7 +193,7 @@ private UniversalEnglishGrammaticalRelations() {}
182193
// matches (what, is) in "what is that" after the SQ has been flattened out of the tree
183194
"SBARQ < (/^(?:VB|AUX)/=target < " + copularWordRegex + ") < (WHNP < WP)",
184195
// "Such a great idea this was"
185-
"SINV <# (NP $++ (NP $++ (VP=target < (/^(?:VB|AUX)/ < " + copularWordRegex + "))))");
196+
"SINV <# (NP $++ (NP $++ (VP=target < (/^(?:VB|AUX)/=aux < " + copularWordRegex + "))))");
186197

187198
/**
188199
* The "conjunct" grammatical relation. A conjunct is the relation between

src/edu/stanford/nlp/trees/UniversalPOSMapper.java

+23-23
Original file line numberDiff line numberDiff line change
@@ -70,30 +70,30 @@ public static void load() {
7070

7171
}
7272

73+
List<TregexPattern> auxPatterns = new ArrayList<>();
74+
auxPatterns.addAll(UniversalEnglishGrammaticalRelations.AUX_MODIFIER.targetPatterns());
75+
auxPatterns.addAll(UniversalEnglishGrammaticalRelations.AUX_PASSIVE_MODIFIER.targetPatterns());
76+
auxPatterns.addAll(UniversalEnglishGrammaticalRelations.COPULA.targetPatterns());
77+
for (TregexPattern pattern : auxPatterns) {
78+
// note that the original patterns capture both VB and AUX...
79+
// if we capture AUX here, infinite loop!
80+
// also, we don't relabel POS, since that would be a really weird UPOS/XPOS combination
81+
final String newTregex;
82+
final String newTsurgeon;
83+
if (pattern.knownVariables().contains("aux")) {
84+
newTregex = pattern.pattern() + ": (=aux == /^(?:VB)/)";
85+
newTsurgeon = "relabel aux AUX";
86+
} else {
87+
newTregex = pattern.pattern() + ": (=target == /^(?:VB)/)";
88+
newTsurgeon = "relabel target AUX";
89+
}
90+
operations.add(new Pair<>(TregexPattern.compile(newTregex),
91+
Tsurgeon.parseOperation(newTsurgeon)));
92+
}
93+
7394
String [][] otherContextMappings = new String [][] {
74-
// Don't do this, we are now treating these as copular constructions
75-
// VB.* -> AUX (for passives where main verb is part of an ADJP)
76-
// @VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase|get|got|getting|gets|gotten)$/ ) < (@ADJP [ < VBN|VBD | < (@VP|ADJP < VBN|VBD) < CC ] )
77-
//relabel target AUX",
78-
79-
// VB.* -> AUX (for cases with fronted main VPs)
80-
{ "@SINV < (@VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase)$/ ) $-- (@VP < VBD|VBN))",
81-
"AUX", },
82-
// VB.* -> AUX (another, rarer case of fronted VPs)
83-
{ "@SINV < (@VP < (@VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase)$/ )) $-- (@VP < VBD|VBN))",
84-
"AUX", },
85-
86-
// VB.* -> AUX (passive, case 2)
87-
//"%SQ|SINV < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase)$/ $++ (VP < VBD|VBN))",
88-
//"%relabel target AUX",
89-
// VB.* -> AUX (active, case 1)
90-
{ "VP < VP < (/^VB.*$/=target <: /^(?i:will|have|can|would|do|is|was|be|are|has|could|should|did|been|may|were|had|'ll|'ve|does|am|might|ca|'m|being|'s|must|'d|'re|wo|shall|get|ve|s|got|r|m|getting|having|d|re|ll|wilt|v|of|my|nt|gets|du|wud|woud|with|willl|wil|wase|shoul|shal|`s|ould|-ll|most|made|hvae|hav|cold|as|art|ai|ar|a)$/)",
91-
"AUX", },
92-
93-
// VB -> AUX (active, case 2)
94-
{ "@SQ|SINV < (/^VB/=target $++ /^(?:VP)/ <... {/.*/})", "AUX" },
95-
96-
// otherwise, VB.* -> VERB
95+
// this will capture all verbs not found by the AUX_MODIFIER, AUX_PASSIVE_MODIFIER, and COPULA expressions above
96+
// VB.* -> VERB
9797
{ "/^VB.*/=target <... {/.*/}", "VERB", },
9898

9999
// IN -> SCONJ (subordinating conjunctions)

0 commit comments

Comments
 (0)