Skip to content

Commit 8e7d121

Browse files
committed
Add functionality to Ssurgeon that allows for removing a field (such as lemma) from a node
1 parent 156fad1 commit 8e7d121

File tree

3 files changed

+123
-4
lines changed

3 files changed

+123
-4
lines changed

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/EditNode.java

+21-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
package edu.stanford.nlp.semgraph.semgrex.ssurgeon;
22

3+
import java.util.ArrayList;
34
import java.util.Collections;
5+
import java.util.List;
46
import java.util.Map;
57
import java.util.TreeMap;
68

9+
import edu.stanford.nlp.ling.AnnotationLookup;
10+
import edu.stanford.nlp.ling.CoreAnnotation;
711
import edu.stanford.nlp.ling.CoreAnnotations;
812
import edu.stanford.nlp.ling.CoreLabel;
913
import edu.stanford.nlp.ling.IndexedWord;
@@ -20,15 +24,16 @@ public class EditNode extends SsurgeonEdit {
2024
public static final String LABEL = "editNode";
2125

2226
final String nodeName;
27+
final List<String> removedAttributes;
2328
final Map<String, String> attributes;
2429
final Map<String, String> updateMorphoFeatures;
2530

26-
public EditNode(String nodeName, Map<String, String> attributes, String updateMorphoFeatures) {
31+
public EditNode(String nodeName, Map<String, String> attributes, String updateMorphoFeatures, List<String> removedAttributes) {
2732
if (nodeName == null) {
2833
throw new SsurgeonParseException("Cannot make an EditNode with no nodeName");
2934
}
30-
if (attributes.size() == 0 && updateMorphoFeatures == null) {
31-
throw new SsurgeonParseException("Cannot make an EditNode with no attributes or updated morphological features");
35+
if (attributes.size() == 0 && updateMorphoFeatures == null && removedAttributes.size() == 0) {
36+
throw new SsurgeonParseException("Cannot make an EditNode with no updated attributes, removed attributes, or updated morphological features");
3237
}
3338
AddDep.checkIllegalAttributes(attributes);
3439
this.nodeName = nodeName;
@@ -38,6 +43,12 @@ public EditNode(String nodeName, Map<String, String> attributes, String updateMo
3843
} else {
3944
this.updateMorphoFeatures = Collections.emptyMap();
4045
}
46+
this.removedAttributes = new ArrayList<>(removedAttributes);
47+
for (String attr : removedAttributes) {
48+
if (AnnotationLookup.toCoreKey(attr) == null) {
49+
throw new SsurgeonParseException("Unknown attribute |" + attr + "| when building an EditNode operation");
50+
}
51+
}
4152
}
4253

4354

@@ -107,6 +118,13 @@ public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
107118
}
108119
}
109120

121+
for (String key : removedAttributes) {
122+
Class<? extends CoreAnnotation<?>> clazz = AnnotationLookup.toCoreKey(key);
123+
if (word.remove((Class) clazz) != null) {
124+
changed = true;
125+
}
126+
}
127+
110128
return changed;
111129
}
112130
}

src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/Ssurgeon.java

+7-1
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,7 @@ public Collection<SsurgeonWordlist> getResources() {
418418
public static final String POSITION_ARG = "-position";
419419
public static final String UPDATE_MORPHO_FEATURES = "-updateMorphoFeatures";
420420
public static final String UPDATE_MORPHO_FEATURES_LOWER = "-updatemorphofeatures";
421+
public static final String REMOVE = "-remove";
421422

422423

423424
// args for Ssurgeon edits, allowing us to not
@@ -450,6 +451,8 @@ protected static class SsurgeonArgs {
450451
public Integer headIndex = null;
451452

452453
public Map<String, String> annotations = new TreeMap<>();
454+
455+
public List<String> remove = new ArrayList<>();
453456
}
454457

455458
/**
@@ -530,6 +533,9 @@ private static SsurgeonArgs parseArgsBox(String args, Map<String, String> additi
530533
case UPDATE_MORPHO_FEATURES_LOWER:
531534
argsBox.updateMorphoFeatures = argsValue;
532535
break;
536+
case REMOVE:
537+
argsBox.remove.add(argsValue);
538+
break;
533539
default:
534540
String key = argsKey.substring(1);
535541
Class<? extends CoreAnnotation<?>> annotation = AnnotationLookup.toCoreKey(key);
@@ -595,7 +601,7 @@ public static SsurgeonEdit parseEditLine(String editLine, Map<String, String> at
595601
if (argsBox.nodes.size() != 1) {
596602
throw new SsurgeonParseException("Cannot make an EditNode out of " + argsBox.nodes.size() + " nodes. Please use exactly one -node");
597603
}
598-
return new EditNode(argsBox.nodes.get(0), argsBox.annotations, argsBox.updateMorphoFeatures);
604+
return new EditNode(argsBox.nodes.get(0), argsBox.annotations, argsBox.updateMorphoFeatures, argsBox.remove);
599605
} else if (command.equalsIgnoreCase(Lemmatize.LABEL)) {
600606
if (argsBox.nodes.size() != 1) {
601607
throw new SsurgeonParseException("Cannot make a Lemmatize out of " + argsBox.nodes.size() + " nodes. Please use exactly one -node");

test/src/edu/stanford/nlp/semgraph/semgrex/ssurgeon/SsurgeonTest.java

+95
Original file line numberDiff line numberDiff line change
@@ -1494,6 +1494,93 @@ public void readXMLEditNode() {
14941494
assertEquals("blue", blueVertex.value());
14951495
}
14961496

1497+
/**
1498+
* Test that trying to build an EditNode with an illegal removed attribute fails
1499+
*/
1500+
@Test
1501+
public void readXMLEditNodeIllegalRemove() {
1502+
// sanity check that the key we will use does not actually mean anything
1503+
String missingKey = "zzzzzz";
1504+
assertNull(AnnotationLookup.toCoreKey(missingKey));
1505+
1506+
try {
1507+
Ssurgeon inst = Ssurgeon.inst();
1508+
String remove = String.join(newline,
1509+
"<ssurgeon-pattern-list>",
1510+
" <ssurgeon-pattern>",
1511+
" <uid>38</uid>",
1512+
" <notes>Edit a node</notes>",
1513+
" <semgrex>" + XMLUtils.escapeXML("{word:blue}=blue") + "</semgrex>",
1514+
" <edit-list>EditNode -node blue -remove " + missingKey + "</edit-list>",
1515+
" </ssurgeon-pattern>",
1516+
"</ssurgeon-pattern-list>");
1517+
inst.readFromString(remove);
1518+
throw new AssertionError("Expected a parse exception!");
1519+
} catch(SsurgeonParseException e) {
1520+
// yay
1521+
}
1522+
}
1523+
1524+
/**
1525+
* Check that we can add and remove lemmas using EditNode
1526+
*
1527+
* Specially testing that the remove functionality works
1528+
*/
1529+
@Test
1530+
public void readXMLEditNodeRemove() {
1531+
Ssurgeon inst = Ssurgeon.inst();
1532+
1533+
// use "dep" as the dependency so as to be language-agnostic in this test
1534+
String add = String.join(newline,
1535+
"<ssurgeon-pattern-list>",
1536+
" <ssurgeon-pattern>",
1537+
" <uid>38</uid>",
1538+
" <notes>Edit a node</notes>",
1539+
" <semgrex>" + XMLUtils.escapeXML("{word:blue}=blue") + "</semgrex>",
1540+
" <edit-list>EditNode -node blue -lemma blue</edit-list>",
1541+
" </ssurgeon-pattern>",
1542+
"</ssurgeon-pattern-list>");
1543+
List<SsurgeonPattern> patterns = inst.readFromString(add);
1544+
assertEquals(patterns.size(), 1);
1545+
SsurgeonPattern editSsurgeon = patterns.get(0);
1546+
1547+
SemanticGraph sg = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> [antennae-4 dep> blue-3]]");
1548+
IndexedWord blueVertex = sg.getNodeByIndexSafe(3);
1549+
assertEquals("blue", blueVertex.value());
1550+
assertNull(blueVertex.lemma());
1551+
1552+
SemanticGraph newSG = editSsurgeon.iterate(sg).first;
1553+
SemanticGraph expected = SemanticGraph.valueOf("[has-2 nsubj> Jennifer-1 obj> [antennae-4 dep> blue-3]]");
1554+
assertEquals(expected, newSG);
1555+
// this ssurgeon will fix the color of the antennae
1556+
blueVertex = newSG.getNodeByIndexSafe(3);
1557+
assertNotNull(blueVertex);
1558+
assertNull(blueVertex.tag());
1559+
assertEquals("blue", blueVertex.value());
1560+
assertEquals("blue", blueVertex.lemma());
1561+
1562+
String remove = String.join(newline,
1563+
"<ssurgeon-pattern-list>",
1564+
" <ssurgeon-pattern>",
1565+
" <uid>38</uid>",
1566+
" <notes>Edit a node</notes>",
1567+
" <semgrex>" + XMLUtils.escapeXML("{word:blue}=blue") + "</semgrex>",
1568+
" <edit-list>EditNode -node blue -remove lemma</edit-list>",
1569+
" </ssurgeon-pattern>",
1570+
"</ssurgeon-pattern-list>");
1571+
patterns = inst.readFromString(remove);
1572+
assertEquals(patterns.size(), 1);
1573+
editSsurgeon = patterns.get(0);
1574+
1575+
SemanticGraph noLemmaSG = editSsurgeon.iterate(newSG).first;
1576+
assertEquals(expected, noLemmaSG);
1577+
blueVertex = noLemmaSG.getNodeByIndexSafe(3);
1578+
assertNotNull(blueVertex);
1579+
assertNull(blueVertex.tag());
1580+
assertEquals("blue", blueVertex.value());
1581+
assertNull(blueVertex.lemma());
1582+
}
1583+
14971584
/**
14981585
* A couple tests of updating the morpho map on a word using EditNode
14991586
* <br>
@@ -2200,4 +2287,12 @@ public void simpleTest() {
22002287
String firstGraphString = newSgs.iterator().next().toCompactString().trim();
22012288
assertEquals("[bartender nsubj>Joe det>the cop>is]", firstGraphString);
22022289
}
2290+
2291+
/**
2292+
* Test that a couple fields used in Ssurgeon don't conflict with annotation keys in AnnotationLookup
2293+
*/
2294+
@Test
2295+
public void annotationNamesTest() {
2296+
assertNull(AnnotationLookup.toCoreKey(Ssurgeon.REMOVE));
2297+
}
22032298
}

0 commit comments

Comments
 (0)