Skip to content

Commit 850e588

Browse files
committed
Add lemmas to a few of the MWTs that we combine for English. A few others are still TODO, such as the n't 'll etc suite
1 parent 1dd746c commit 850e588

File tree

1 file changed

+16
-2
lines changed

1 file changed

+16
-2
lines changed

src/edu/stanford/nlp/trees/ud/EnglishMWTCombiner.java

+16-2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ public SemanticGraph combineMWTs(SemanticGraph sg) {
1515

1616
// combine using the CombineMWT operation, using the default concatenation for the MWT text
1717
String mwt = String.join(newline,
18+
// TODO: separate the contractions so we can adjust the lemmas?
19+
// In some other way fix those lemmas?
1820
"<ssurgeon-pattern-list>",
1921
" <ssurgeon-pattern>",
2022
" <uid>1</uid>",
@@ -30,9 +32,19 @@ public SemanticGraph combineMWTs(SemanticGraph sg) {
3032
" </ssurgeon-pattern>",
3133
" <ssurgeon-pattern>",
3234
" <uid>3</uid>",
33-
" <notes>Edit a node's MWT for wanna/gonna</notes>",
34-
" <semgrex>" + XMLUtils.escapeXML("{word:/(?i)wan|gon/;after://}=first . {word:/(?i)na/}=second") + "</semgrex>",
35+
" <notes>Edit a node's MWT for wanna</notes>",
36+
" <semgrex>" + XMLUtils.escapeXML("{word:/(?i)wan/;after://}=first . {word:/(?i)na/}=second") + "</semgrex>",
3537
" <edit-list>CombineMWT -node first -node second</edit-list>",
38+
" <edit-list>EditNode -node first -lemma want</edit-list>",
39+
" <edit-list>EditNode -node second -lemma to</edit-list>",
40+
" </ssurgeon-pattern>",
41+
" <ssurgeon-pattern>",
42+
" <uid>3b</uid>",
43+
" <notes>Edit a node's MWT for gonna</notes>",
44+
" <semgrex>" + XMLUtils.escapeXML("{word:/(?i)gon/;after://}=first . {word:/(?i)na/}=second") + "</semgrex>",
45+
" <edit-list>CombineMWT -node first -node second</edit-list>",
46+
" <edit-list>EditNode -node first -lemma go</edit-list>",
47+
" <edit-list>EditNode -node second -lemma to</edit-list>",
3648
" </ssurgeon-pattern>",
3749
" <ssurgeon-pattern>",
3850
" <uid>4</uid>",
@@ -45,6 +57,8 @@ public SemanticGraph combineMWTs(SemanticGraph sg) {
4557
" <notes>Edit a node's MWT for 'tis and 'twas</notes>",
4658
" <semgrex>" + XMLUtils.escapeXML("{word:/'[tT]/}=first . {word:/(?i)is|was/}=second") + "</semgrex>",
4759
" <edit-list>CombineMWT -node first -node second</edit-list>",
60+
" <edit-list>EditNode -node first -lemma it</edit-list>",
61+
" <edit-list>EditNode -node second -lemma be</edit-list>",
4862
" </ssurgeon-pattern>",
4963
" <ssurgeon-pattern>",
5064
" <uid>6</uid>",

0 commit comments

Comments
 (0)