Skip to content

Commit a2de460

Browse files
committed
When processing "not only" and similar phrases into UD, separate them from the CONJP (sometimes ADVP by error) that they show up in. This allows the later part of the converter to connect both of them to the parent with advmod.
As part of this, turn the UPOS of "not" into PART Also, update the corrector to make a few changes to the structure, which may help usages of the trees or of SD as well as the UD. The UD changes are written to accommodate the structural errors in the original PTB, though
1 parent 2945cac commit a2de460

5 files changed

+27
-7
lines changed

src/edu/stanford/nlp/trees/CoordinationTransformer.java

+8
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,13 @@ private static Tree findCCparent(Tree t, Tree root) {
728728
private static final TregexPattern BUT_ALSO_PATTERN = TregexPattern.compile("CONJP=conjp < (CC=cc < but) < (RB=rb < also) ?$+ (__=nextNode < (__ < __))");
729729
private static final TsurgeonPattern BUT_ALSO_OPERATION = Tsurgeon.parseOperation("[move cc $- conjp] [move rb $- cc] [if exists nextNode move rb >1 nextNode] [createSubtree ADVP rb] [delete conjp]");
730730

731+
/*
732+
* "not only" is not a MWE, so break up the CONJP similar to "but also".
733+
* compensate for some JJ tagged "only" in this expression
734+
*/
735+
private static final TregexPattern NOT_ONLY_PATTERN = TregexPattern.compile("CONJP|ADVP=conjp < (RB=not < /^(?i)not$/) < (RB|JJ=only < /^(?i)only|just|merely|even$/) ?$+ (__=nextNode < (__ < __))");
736+
private static final TsurgeonPattern NOT_ONLY_OPERATION = Tsurgeon.parseOperation("[move not $- conjp] [move only $- not] [if exists nextNode move only >1 nextNode] [if exists nextNode move not >1 nextNode] [createSubtree ADVP not] [createSubtree ADVP only] [delete conjp]");
737+
731738
/* at least / at most / at best / at worst / ... should be treated as if "at"
732739
was a preposition and the RBS was a noun. Assumes that the MWE "at least"
733740
has already been extracted. */
@@ -749,6 +756,7 @@ public static Tree MWETransform(Tree t) {
749756

750757
Tsurgeon.processPattern(ACCORDING_TO_PATTERN, ACCORDING_TO_OPERATION, t);
751758
Tsurgeon.processPattern(BUT_ALSO_PATTERN, BUT_ALSO_OPERATION, t);
759+
Tsurgeon.processPattern(NOT_ONLY_PATTERN, NOT_ONLY_OPERATION, t);
752760
Tsurgeon.processPattern(AT_RBS_PATTERN, AT_RBS_OPERATION, t);
753761
Tsurgeon.processPattern(AT_ALL_PATTERN, AT_ALL_OPERATION, t);
754762

src/edu/stanford/nlp/trees/UniversalEnglishGrammaticalRelations.java

-1
Original file line numberDiff line numberDiff line change
@@ -1312,7 +1312,6 @@ private UniversalEnglishGrammaticalRelations() {}
13121312
MODIFIER,
13131313
"S|VP|ADJP|PP|ADVP|UCP(?:-TMP|-ADV)?|NX|NML|SBAR|NP(?:-TMP|-ADV)?", tregexCompiler,
13141314
"NP|NP-TMP|NP-ADV|NX|NML < (PDT|CC|DT=target < /^(?i:either|neither|both)$/ $++ CC)",
1315-
"NP|NP-TMP|NP-ADV|NX|NML < (CONJP=target < (RB < /^(?i:not)$/) < (RB|JJ < /^(?i:only|merely|just)$/) $++ CC|CONJP)",
13161315
// This matches weird/wrong NP-internal preconjuncts where you get (NP PDT (NP NP CC NP)) or similar
13171316
"NP|NP-TMP|NP-ADV|NX|NML < (PDT|CC|DT=target < /^(?i:either|neither|both)$/ ) < (NP < CC)",
13181317
"/^S|VP|ADJP|PP|ADVP|UCP(?:-TMP|-ADV)?|NX|NML|SBAR$/ < (PDT|DT|CC=target < /^(?i:either|neither|both)$/ $++ CC)",

src/edu/stanford/nlp/trees/UniversalPOSMapper.java

+3
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,9 @@ public static void load() {
134134
// RB -> PART when it is verbal negation (not or its reductions)
135135
{ "@VP|SINV|SQ|FRAG|ADVP < (RB=target < /^(?i:not|n't|nt|t|n)$/)", "PART" },
136136

137+
// "not" as part of a phrase such as "not only", "not just", etc is tagged as PART in UD
138+
{ "@ADVP|CONJP <1 (RB=target < /^(?i:not|n't|nt|t|n)$/) <2 (__ < only|just|merely|even) !<3 __", "PART" },
139+
137140
// Otherwise RB -> ADV
138141
{ "RB=target <... {/.*/}", "ADV" },
139142

src/edu/stanford/nlp/trees/treebank/EnglishPTBTreebankCorrector.java

+10
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,16 @@ private static BufferedReader getBufferedReader(String source) {
168168
"adjoin (NP NN@) newnp\n" +
169169
'\n') +
170170

171+
// Fix not_RB only_JJ, which should generally be not_RB only_RB
172+
// and put it under a CONJP instead of an ADVP
173+
("ADVP|CONJP <1 (__ < /^(?i:not)$/) <2 (JJ=bad < only|just|merely|even) !<3 __\n" +
174+
"relabel bad RB\n" +
175+
'\n') +
176+
177+
("ADVP=bad <1 (__ < /^(?i:not)$/) <2 (RB < only|just|merely|even) !<3 __\n" +
178+
"relabel bad CONJP\n" +
179+
'\n') +
180+
171181
// Fix some cases of 'as well as' not made into a CONJP unit
172182
// There are a few other weird cases that should also be reviewed with the tregex
173183
// well|Well|WELL , as|AS|As . as|AS|As !>(__ > @CONJP)

test/src/edu/stanford/nlp/trees/UniversalEnglishGrammaticalStructureTest.java

+6-6
Original file line numberDiff line numberDiff line change
@@ -522,8 +522,8 @@ public static Collection<Object[]> testCases() {
522522
"( (S (NP (PRP I)) (VP (VBP like) (NP (CONJP (RB not) (RB only)) (NP (NNS cats)) (CONJP (CC but) (RB also)) (NP (NN dogs)))) (. .)))",
523523
"nsubj(like-2, I-1)\n" +
524524
"root(ROOT-0, like-2)\n" +
525-
"advmod(only-4, not-3)\n" +
526-
"cc:preconj(cats-5, only-4)\n" +
525+
"advmod(cats-5, not-3)\n" +
526+
"advmod(cats-5, only-4)\n" +
527527
"obj(like-2, cats-5)\n" +
528528
"cc(dogs-8, but-6)\n" +
529529
"advmod(dogs-8, also-7)\n" +
@@ -2497,8 +2497,8 @@ public static Collection<Object[]> testCases() {
24972497
"( (S (NP (PRP I)) (VP (VBP like) (NP (CONJP (RB not) (RB only)) (NP (NNS cats)) (CONJP (CC but) (RB also)) (NP (NN dogs)))) (. .)))",
24982498
"nsubj(like-2, I-1)\n" +
24992499
"root(ROOT-0, like-2)\n" +
2500-
"advmod(only-4, not-3)\n" +
2501-
"cc:preconj(cats-5, only-4)\n" +
2500+
"advmod(cats-5, not-3)\n" +
2501+
"advmod(cats-5, only-4)\n" +
25022502
"obj(like-2, cats-5)\n" +
25032503
"cc(dogs-8, but-6)\n" +
25042504
"advmod(dogs-8, also-7)\n" +
@@ -2510,8 +2510,8 @@ public static Collection<Object[]> testCases() {
25102510
"nsubj(flew-2', Fred-1)\n" +
25112511
"root(ROOT-0, flew-2)\n" +
25122512
"conj:and(flew-2, flew-2')\n" +
2513-
"advmod(only-4, not-3)\n" +
2514-
"cc:preconj(Greece-6, only-4)\n" +
2513+
"advmod(Greece-6, not-3)\n" +
2514+
"advmod(Greece-6, only-4)\n" +
25152515
"case(Greece-6, to-5)\n" +
25162516
"obl:to(flew-2, Greece-6)\n" +
25172517
"cc(flew-2', but-7)\n" +

0 commit comments

Comments
 (0)