Skip to content

Commit 7d0cb36

Browse files
committed
Add a negative containment to semgrex to match the containment option
1 parent 819a6e0 commit 7d0cb36

File tree

7 files changed

+105
-56
lines changed

7 files changed

+105
-56
lines changed

src/edu/stanford/nlp/semgraph/semgrex/NodeAttributes.java

+6-5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import java.util.List;
77
import java.util.Set;
88

9+
import edu.stanford.nlp.util.Quadruple;
910
import edu.stanford.nlp.util.Triple;
1011

1112
/**
@@ -31,8 +32,8 @@ public class NodeAttributes {
3132
// for individual elements of that map rather than turn the map into a string
3233
// and search on its contents that way. This is especially true since there
3334
// is no guarantee the map will be in a consistent order.
34-
// String, String, String: node attribute for a map (such as CoNLLUFeats), key in that map, value to match
35-
private List<Triple<String, String, String>> contains;
35+
// String, String, String, Boolean: node attribute for a map (such as CoNLLUFeats), key in that map, value to match, negated?
36+
private List<Quadruple<String, String, String, Boolean>> contains;
3637

3738
public NodeAttributes() {
3839
root = false;
@@ -68,15 +69,15 @@ public void setAttribute(String key, String value, boolean negated) {
6869
attributes.add(new Triple(key, value, negated));
6970
}
7071

71-
public void addContains(String annotation, String key, String value) {
72-
contains.add(new Triple(annotation, key, value));
72+
public void addContains(String annotation, String key, String value, Boolean negated) {
73+
contains.add(new Quadruple(annotation, key, value, negated));
7374
}
7475

7576
public List<Triple<String, String, Boolean>> attributes() {
7677
return Collections.unmodifiableList(attributes);
7778
}
7879

79-
public List<Triple<String, String, String>> contains() {
80+
public List<Quadruple<String, String, String, Boolean>> contains() {
8081
return Collections.unmodifiableList(contains);
8182
}
8283
}

src/edu/stanford/nlp/semgraph/semgrex/NodePattern.java

+16-12
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import edu.stanford.nlp.semgraph.SemanticGraph;
1414
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
1515
import edu.stanford.nlp.util.Pair;
16+
import edu.stanford.nlp.util.Quadruple;
1617
import edu.stanford.nlp.util.Triple;
1718
import edu.stanford.nlp.util.logging.Redwood;
1819

@@ -91,10 +92,11 @@ public NodePattern(GraphRelation r, boolean negDesc,
9192
}
9293
}
9394

94-
for (Triple<String, String, String> entry : attrs.contains()) {
95+
for (Quadruple<String, String, String, Boolean> entry : attrs.contains()) {
9596
String annotation = entry.first();
9697
String key = entry.second();
9798
String value = entry.third();
99+
boolean negated = entry.fourth();
98100

99101
Class<?> clazz = AnnotationLookup.getValueType(AnnotationLookup.toCoreKey(annotation));
100102
boolean isMap = clazz != null && Map.class.isAssignableFrom(clazz);
@@ -105,11 +107,11 @@ public NodePattern(GraphRelation r, boolean negDesc,
105107
final Attribute attr;
106108
// Add the attributes for this key
107109
if (value.equals("__")) {
108-
attr = new Attribute(key, true, true, false);
110+
attr = new Attribute(key, true, true, negated);
109111
} else if (value.matches("/.*/")) {
110-
attr = buildRegexAttribute(key, value, false);
112+
attr = buildRegexAttribute(key, value, negated);
111113
} else { // raw description
112-
attr = new Attribute(key, value, value, false);
114+
attr = new Attribute(key, value, value, negated);
113115
}
114116
partialAttributes.add(new Pair<>(annotation, attr));
115117

@@ -239,17 +241,19 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
239241

240242
Class clazz = Env.lookupAnnotationKey(env, annotation);
241243
Object rawmap = node.get(clazz);
242-
// if the map is null, it can't possibly match...
244+
final String nodeValue;
243245
if (rawmap == null) {
244-
return negDesc;
246+
nodeValue = null;
247+
} else {
248+
if (!(rawmap instanceof Map))
249+
throw new RuntimeException("Can only use partial attributes with Maps... this should have been checked at creation time!");
250+
Map<String, ?> map = (Map) rawmap;
251+
252+
// TODO: allow for regex match on the keys?
253+
Object value = map.get(attr.key);
254+
nodeValue = (value == null) ? null : value.toString();
245255
}
246-
if (!(rawmap instanceof Map))
247-
throw new RuntimeException("Can only use partial attributes with Maps... this should have been checked at creation time!");
248-
Map<String, ?> map = (Map) rawmap;
249256

250-
// TODO: allow for regex match on the keys?
251-
Object value = map.get(attr.key);
252-
final String nodeValue = (value == null) ? null : value.toString();
253257
boolean matches = checkMatch(attr, ignoreCase, nodeValue);
254258
if (!matches) {
255259
return negDesc;

src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.java

+45-30
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
6565
case 11:
6666
case 15:
6767
case 17:
68-
case 23:{
68+
case 24:{
6969
node = SubNode(GraphRelation.ROOT);
7070
children.add(node);
7171
label_1:
@@ -135,7 +135,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
135135
}
136136
case 15:
137137
case 17:
138-
case 23:{
138+
case 24:{
139139
result = ModNode(r);
140140
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
141141
case RELATION:
@@ -397,7 +397,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
397397
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
398398
case 15:
399399
case 17:
400-
case 23:{
400+
case 24:{
401401
node = ModNode(reln);
402402
break;
403403
}
@@ -454,7 +454,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
454454
case 14:
455455
case 15:
456456
case 17:
457-
case 23:{
457+
case 24:{
458458
;
459459
break;
460460
}
@@ -485,7 +485,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
485485
boolean startUnderNeg;
486486
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
487487
case 17:
488-
case 23:{
488+
case 24:{
489489
child = Child(r);
490490
break;
491491
}
@@ -512,7 +512,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
512512
child = NodeDisj(r);
513513
break;
514514
}
515-
case 23:{
515+
case 24:{
516516
child = Description(r);
517517
break;
518518
}
@@ -569,8 +569,22 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
569569
}
570570
break;
571571
}
572-
case ALIGNRELN:{
573-
attrType = jj_consume_token(ALIGNRELN);
572+
case ALIGNRELN:
573+
case 23:{
574+
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
575+
case ALIGNRELN:{
576+
attrType = jj_consume_token(ALIGNRELN);
577+
break;
578+
}
579+
case 23:{
580+
attrType = jj_consume_token(23);
581+
break;
582+
}
583+
default:
584+
jj_la1[25] = jj_gen;
585+
jj_consume_token(-1);
586+
throw new ParseException();
587+
}
574588
key = jj_consume_token(IDENTIFIER);
575589
jj_consume_token(21);
576590
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
@@ -583,19 +597,20 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
583597
break;
584598
}
585599
default:
586-
jj_la1[25] = jj_gen;
600+
jj_la1[26] = jj_gen;
587601
jj_consume_token(-1);
588602
throw new ParseException();
589603
}
590604
if (attr == null || key == null || value == null) {
591605
{if (true) throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr +
592606
" key=" + key + " value=" + value);}
593607
}
594-
attributes.addContains(attr.image, key.image, value.image);
608+
boolean negated = attrType.image.equals("!@");
609+
attributes.addContains(attr.image, key.image, value.image, negated);
595610
break;
596611
}
597612
default:
598-
jj_la1[26] = jj_gen;
613+
jj_la1[27] = jj_gen;
599614
jj_consume_token(-1);
600615
throw new ParseException();
601616
}
@@ -612,7 +627,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
612627
break;
613628
}
614629
default:
615-
jj_la1[27] = jj_gen;
630+
jj_la1[28] = jj_gen;
616631
jj_consume_token(-1);
617632
throw new ParseException();
618633
}
@@ -622,7 +637,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
622637
boolean link = false;
623638
NodeAttributes attributes = new NodeAttributes();
624639
NodePattern pat;
625-
jj_consume_token(23);
640+
jj_consume_token(24);
626641
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
627642
case IDENTIFIER:
628643
case EMPTY:
@@ -631,24 +646,24 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
631646
label_6:
632647
while (true) {
633648
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
634-
case 24:{
649+
case 25:{
635650
;
636651
break;
637652
}
638653
default:
639-
jj_la1[28] = jj_gen;
654+
jj_la1[29] = jj_gen;
640655
break label_6;
641656
}
642-
jj_consume_token(24);
657+
jj_consume_token(25);
643658
AddAttribute(attributes);
644659
}
645660
break;
646661
}
647662
default:
648-
jj_la1[29] = jj_gen;
663+
jj_la1[30] = jj_gen;
649664
;
650665
}
651-
jj_consume_token(25);
666+
jj_consume_token(26);
652667
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
653668
case 21:{
654669
jj_consume_token(21);
@@ -665,7 +680,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
665680
break;
666681
}
667682
default:
668-
jj_la1[30] = jj_gen;
683+
jj_la1[31] = jj_gen;
669684
;
670685
}
671686
pat = new NodePattern(r, underNodeNegation, attributes, link, name != null ? name.image : null);
@@ -682,13 +697,13 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
682697
public Token jj_nt;
683698
private int jj_ntk;
684699
private int jj_gen;
685-
final private int[] jj_la1 = new int[31];
700+
final private int[] jj_la1 = new int[32];
686701
static private int[] jj_la1_0;
687702
static {
688703
jj_la1_init_0();
689704
}
690705
private static void jj_la1_init_0() {
691-
jj_la1_0 = new int[] {0x400,0x828808,0x3801c,0x3801c,0x828800,0x2000,0x3c01c,0x4000,0x3801c,0x2001c,0x80000,0x10,0x110,0x110,0x100000,0x200000,0x1c,0x828800,0x2000,0x82c000,0x4000,0x828000,0x820000,0x400400,0x110,0x110,0x400408,0xd0,0x1000000,0xd0,0x200000,};
706+
jj_la1_0 = new int[] {0x400,0x1028808,0x3801c,0x3801c,0x1028800,0x2000,0x3c01c,0x4000,0x3801c,0x2001c,0x80000,0x10,0x110,0x110,0x100000,0x200000,0x1c,0x1028800,0x2000,0x102c000,0x4000,0x1028000,0x1020000,0x400400,0x110,0x800008,0x110,0xc00408,0xd0,0x2000000,0xd0,0x200000,};
692707
}
693708

694709
/** Constructor with InputStream. */
@@ -702,7 +717,7 @@ public SemgrexParser(java.io.InputStream stream, String encoding) {
702717
token = new Token();
703718
jj_ntk = -1;
704719
jj_gen = 0;
705-
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
720+
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
706721
}
707722

708723
/** Reinitialise. */
@@ -716,7 +731,7 @@ public void ReInit(java.io.InputStream stream, String encoding) {
716731
token = new Token();
717732
jj_ntk = -1;
718733
jj_gen = 0;
719-
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
734+
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
720735
}
721736

722737
/** Constructor. */
@@ -726,7 +741,7 @@ public SemgrexParser(java.io.Reader stream) {
726741
token = new Token();
727742
jj_ntk = -1;
728743
jj_gen = 0;
729-
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
744+
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
730745
}
731746

732747
/** Reinitialise. */
@@ -744,7 +759,7 @@ public void ReInit(java.io.Reader stream) {
744759
token = new Token();
745760
jj_ntk = -1;
746761
jj_gen = 0;
747-
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
762+
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
748763
}
749764

750765
/** Constructor with generated Token Manager. */
@@ -753,7 +768,7 @@ public SemgrexParser(SemgrexParserTokenManager tm) {
753768
token = new Token();
754769
jj_ntk = -1;
755770
jj_gen = 0;
756-
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
771+
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
757772
}
758773

759774
/** Reinitialise. */
@@ -762,7 +777,7 @@ public void ReInit(SemgrexParserTokenManager tm) {
762777
token = new Token();
763778
jj_ntk = -1;
764779
jj_gen = 0;
765-
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
780+
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
766781
}
767782

768783
private Token jj_consume_token(int kind) throws ParseException {
@@ -813,12 +828,12 @@ private int jj_ntk_f() {
813828
/** Generate ParseException. */
814829
public ParseException generateParseException() {
815830
jj_expentries.clear();
816-
boolean[] la1tokens = new boolean[26];
831+
boolean[] la1tokens = new boolean[27];
817832
if (jj_kind >= 0) {
818833
la1tokens[jj_kind] = true;
819834
jj_kind = -1;
820835
}
821-
for (int i = 0; i < 31; i++) {
836+
for (int i = 0; i < 32; i++) {
822837
if (jj_la1[i] == jj_gen) {
823838
for (int j = 0; j < 32; j++) {
824839
if ((jj_la1_0[i] & (1<<j)) != 0) {
@@ -827,7 +842,7 @@ public ParseException generateParseException() {
827842
}
828843
}
829844
}
830-
for (int i = 0; i < 26; i++) {
845+
for (int i = 0; i < 27; i++) {
831846
if (la1tokens[i]) {
832847
jj_expentry = new int[1];
833848
jj_expentry[0] = i;

src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.jj

+3-2
Original file line numberDiff line numberDiff line change
@@ -283,13 +283,14 @@ void AddAttribute(NodeAttributes attributes) : {
283283
}
284284
})
285285
|
286-
(attrType = "@") (key = <IDENTIFIER>) "=" (value = <IDENTIFIER> | value = <REGEX>)
286+
(attrType = "@" | attrType = "!@") (key = <IDENTIFIER>) "=" (value = <IDENTIFIER> | value = <REGEX>)
287287
{
288288
if (attr == null || key == null || value == null) {
289289
throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr +
290290
" key=" + key + " value=" + value);
291291
}
292-
attributes.addContains(attr.image, key.image, value.image);
292+
boolean negated = attrType.image.equals("!@");
293+
attributes.addContains(attr.image, key.image, value.image, negated);
293294
})
294295
)
295296
|

src/edu/stanford/nlp/semgraph/semgrex/SemgrexParserConstants.java

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ interface SemgrexParserConstants {
5555
"\"~\"",
5656
"\"=\"",
5757
"\"!:\"",
58+
"\"!@\"",
5859
"\"{\"",
5960
"\";\"",
6061
"\"}\"",

0 commit comments

Comments
 (0)