Skip to content

Commit a350aaa

Browse files
committed
Rewrite the contains syntax to look a bit more like a map.
This map allow repeated elements inside the same brackets
1 parent 29fa00a commit a350aaa

File tree

6 files changed

+139
-53
lines changed

6 files changed

+139
-53
lines changed

src/edu/stanford/nlp/semgraph/semgrex/NodePattern.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,10 @@ public NodePattern(GraphRelation r, boolean negDesc,
117117

118118
if (!descString.equals("{"))
119119
descString += ";";
120-
String separator = negated ? "!=" : "=";
121-
descString += (annotation + "@" + key + separator + value);
120+
String separator = negated ? "!:" : ":";
121+
// TODO: the descString might look nicer if multiple contains
122+
// for the same attribute were collapsed into the same map
123+
descString += (annotation + ":{" + key + ":" + value + "}");
122124
}
123125

124126
if (attrs.root()) {

src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.java

+84-35
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
6565
case 11:
6666
case 15:
6767
case 17:
68-
case 24:{
68+
case 26:{
6969
node = SubNode(GraphRelation.ROOT);
7070
children.add(node);
7171
label_1:
@@ -135,7 +135,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
135135
}
136136
case 15:
137137
case 17:
138-
case 24:{
138+
case 26:{
139139
result = ModNode(r);
140140
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
141141
case RELATION:
@@ -397,7 +397,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
397397
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
398398
case 15:
399399
case 17:
400-
case 24:{
400+
case 26:{
401401
node = ModNode(reln);
402402
break;
403403
}
@@ -454,7 +454,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
454454
case 14:
455455
case 15:
456456
case 17:
457-
case 24:{
457+
case 26:{
458458
;
459459
break;
460460
}
@@ -485,7 +485,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
485485
boolean startUnderNeg;
486486
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
487487
case 17:
488-
case 24:{
488+
case 26:{
489489
child = Child(r);
490490
break;
491491
}
@@ -512,7 +512,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
512512
child = NodeDisj(r);
513513
break;
514514
}
515-
case 24:{
515+
case 26:{
516516
child = Description(r);
517517
break;
518518
}
@@ -529,6 +529,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
529529
Token key = null;
530530
Token value = null;
531531
Token attrType = null;
532+
boolean negated = false;
532533
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
533534
case IDENTIFIER:{
534535
attr = jj_consume_token(IDENTIFIER);
@@ -564,29 +565,28 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
564565
throw new ParseException();
565566
}
566567
if (attr != null && value != null) {
567-
boolean negated = attrType.image.equals("!:");
568+
negated = attrType.image.equals("!:");
568569
attributes.setAttribute(attr.image, value.image, negated);
569570
}
570571
break;
571572
}
572-
case ALIGNRELN:
573573
case 23:{
574+
jj_consume_token(23);
575+
key = jj_consume_token(IDENTIFIER);
574576
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
575-
case ALIGNRELN:{
576-
attrType = jj_consume_token(ALIGNRELN);
577+
case 10:{
578+
attrType = jj_consume_token(10);
577579
break;
578580
}
579-
case 23:{
580-
attrType = jj_consume_token(23);
581+
case 22:{
582+
attrType = jj_consume_token(22);
581583
break;
582584
}
583585
default:
584586
jj_la1[25] = jj_gen;
585587
jj_consume_token(-1);
586588
throw new ParseException();
587589
}
588-
key = jj_consume_token(IDENTIFIER);
589-
jj_consume_token(21);
590590
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
591591
case IDENTIFIER:{
592592
value = jj_consume_token(IDENTIFIER);
@@ -605,12 +605,61 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
605605
{if (true) throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr +
606606
" key=" + key + " value=" + value);}
607607
}
608-
boolean negated = attrType.image.equals("!@");
608+
negated = attrType.image.equals("!:");
609609
attributes.addContains(attr.image, key.image, value.image, negated);
610+
label_6:
611+
while (true) {
612+
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
613+
case 24:{
614+
;
615+
break;
616+
}
617+
default:
618+
jj_la1[27] = jj_gen;
619+
break label_6;
620+
}
621+
jj_consume_token(24);
622+
key = jj_consume_token(IDENTIFIER);
623+
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
624+
case 10:{
625+
attrType = jj_consume_token(10);
626+
break;
627+
}
628+
case 22:{
629+
attrType = jj_consume_token(22);
630+
break;
631+
}
632+
default:
633+
jj_la1[28] = jj_gen;
634+
jj_consume_token(-1);
635+
throw new ParseException();
636+
}
637+
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
638+
case IDENTIFIER:{
639+
value = jj_consume_token(IDENTIFIER);
640+
break;
641+
}
642+
case REGEX:{
643+
value = jj_consume_token(REGEX);
644+
break;
645+
}
646+
default:
647+
jj_la1[29] = jj_gen;
648+
jj_consume_token(-1);
649+
throw new ParseException();
650+
}
651+
if (attr == null || key == null || value == null) {
652+
{if (true) throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr +
653+
" key=" + key + " value=" + value);}
654+
}
655+
negated = attrType.image.equals("!:");
656+
attributes.addContains(attr.image, key.image, value.image, negated);
657+
}
658+
jj_consume_token(25);
610659
break;
611660
}
612661
default:
613-
jj_la1[27] = jj_gen;
662+
jj_la1[30] = jj_gen;
614663
jj_consume_token(-1);
615664
throw new ParseException();
616665
}
@@ -627,7 +676,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
627676
break;
628677
}
629678
default:
630-
jj_la1[28] = jj_gen;
679+
jj_la1[31] = jj_gen;
631680
jj_consume_token(-1);
632681
throw new ParseException();
633682
}
@@ -637,33 +686,33 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
637686
boolean link = false;
638687
NodeAttributes attributes = new NodeAttributes();
639688
NodePattern pat;
640-
jj_consume_token(24);
689+
jj_consume_token(26);
641690
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
642691
case IDENTIFIER:
643692
case EMPTY:
644693
case ROOT:{
645694
AddAttribute(attributes);
646-
label_6:
695+
label_7:
647696
while (true) {
648697
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
649-
case 25:{
698+
case 24:{
650699
;
651700
break;
652701
}
653702
default:
654-
jj_la1[29] = jj_gen;
655-
break label_6;
703+
jj_la1[32] = jj_gen;
704+
break label_7;
656705
}
657-
jj_consume_token(25);
706+
jj_consume_token(24);
658707
AddAttribute(attributes);
659708
}
660709
break;
661710
}
662711
default:
663-
jj_la1[30] = jj_gen;
712+
jj_la1[33] = jj_gen;
664713
;
665714
}
666-
jj_consume_token(26);
715+
jj_consume_token(25);
667716
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
668717
case 21:{
669718
jj_consume_token(21);
@@ -680,7 +729,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
680729
break;
681730
}
682731
default:
683-
jj_la1[31] = jj_gen;
732+
jj_la1[34] = jj_gen;
684733
;
685734
}
686735
pat = new NodePattern(r, underNodeNegation, attributes, link, name != null ? name.image : null);
@@ -697,13 +746,13 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
697746
public Token jj_nt;
698747
private int jj_ntk;
699748
private int jj_gen;
700-
final private int[] jj_la1 = new int[32];
749+
final private int[] jj_la1 = new int[35];
701750
static private int[] jj_la1_0;
702751
static {
703752
jj_la1_init_0();
704753
}
705754
private static void jj_la1_init_0() {
706-
jj_la1_0 = new int[] {0x400,0x1028808,0x3801c,0x3801c,0x1028800,0x2000,0x3c01c,0x4000,0x3801c,0x2001c,0x80000,0x10,0x110,0x110,0x100000,0x200000,0x1c,0x1028800,0x2000,0x102c000,0x4000,0x1028000,0x1020000,0x400400,0x110,0x800008,0x110,0xc00408,0xd0,0x2000000,0xd0,0x200000,};
755+
jj_la1_0 = new int[] {0x400,0x4028808,0x3801c,0x3801c,0x4028800,0x2000,0x3c01c,0x4000,0x3801c,0x2001c,0x80000,0x10,0x110,0x110,0x100000,0x200000,0x1c,0x4028800,0x2000,0x402c000,0x4000,0x4028000,0x4020000,0x400400,0x110,0x400400,0x110,0x1000000,0x400400,0x110,0xc00400,0xd0,0x1000000,0xd0,0x200000,};
707756
}
708757

709758
/** Constructor with InputStream. */
@@ -717,7 +766,7 @@ public SemgrexParser(java.io.InputStream stream, String encoding) {
717766
token = new Token();
718767
jj_ntk = -1;
719768
jj_gen = 0;
720-
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
769+
for (int i = 0; i < 35; i++) jj_la1[i] = -1;
721770
}
722771

723772
/** Reinitialise. */
@@ -731,7 +780,7 @@ public void ReInit(java.io.InputStream stream, String encoding) {
731780
token = new Token();
732781
jj_ntk = -1;
733782
jj_gen = 0;
734-
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
783+
for (int i = 0; i < 35; i++) jj_la1[i] = -1;
735784
}
736785

737786
/** Constructor. */
@@ -741,7 +790,7 @@ public SemgrexParser(java.io.Reader stream) {
741790
token = new Token();
742791
jj_ntk = -1;
743792
jj_gen = 0;
744-
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
793+
for (int i = 0; i < 35; i++) jj_la1[i] = -1;
745794
}
746795

747796
/** Reinitialise. */
@@ -759,7 +808,7 @@ public void ReInit(java.io.Reader stream) {
759808
token = new Token();
760809
jj_ntk = -1;
761810
jj_gen = 0;
762-
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
811+
for (int i = 0; i < 35; i++) jj_la1[i] = -1;
763812
}
764813

765814
/** Constructor with generated Token Manager. */
@@ -768,7 +817,7 @@ public SemgrexParser(SemgrexParserTokenManager tm) {
768817
token = new Token();
769818
jj_ntk = -1;
770819
jj_gen = 0;
771-
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
820+
for (int i = 0; i < 35; i++) jj_la1[i] = -1;
772821
}
773822

774823
/** Reinitialise. */
@@ -777,7 +826,7 @@ public void ReInit(SemgrexParserTokenManager tm) {
777826
token = new Token();
778827
jj_ntk = -1;
779828
jj_gen = 0;
780-
for (int i = 0; i < 32; i++) jj_la1[i] = -1;
829+
for (int i = 0; i < 35; i++) jj_la1[i] = -1;
781830
}
782831

783832
private Token jj_consume_token(int kind) throws ParseException {
@@ -833,7 +882,7 @@ public ParseException generateParseException() {
833882
la1tokens[jj_kind] = true;
834883
jj_kind = -1;
835884
}
836-
for (int i = 0; i < 32; i++) {
885+
for (int i = 0; i < 35; i++) {
837886
if (jj_la1[i] == jj_gen) {
838887
for (int j = 0; j < 32; j++) {
839888
if ((jj_la1_0[i] & (1<<j)) != 0) {

src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.jj

+15-3
Original file line numberDiff line numberDiff line change
@@ -274,24 +274,36 @@ void AddAttribute(NodeAttributes attributes) : {
274274
Token key = null;
275275
Token value = null;
276276
Token attrType = null;
277+
boolean negated = false;
277278
} {
278279
(attr = <IDENTIFIER>
279280
(( (attrType = ":" | attrType = "!:") (value = <IDENTIFIER> | value = <REGEX>) {
280281
if (attr != null && value != null) {
281-
boolean negated = attrType.image.equals("!:");
282+
negated = attrType.image.equals("!:");
282283
attributes.setAttribute(attr.image, value.image, negated);
283284
}
284285
})
285286
|
286-
(attrType = "@" | attrType = "!@") (key = <IDENTIFIER>) "=" (value = <IDENTIFIER> | value = <REGEX>)
287+
( ":{"
288+
((key = <IDENTIFIER>) (attrType = ":" | attrType = "!:") (value = <IDENTIFIER> | value = <REGEX>)
287289
{
288290
if (attr == null || key == null || value == null) {
289291
throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr +
290292
" key=" + key + " value=" + value);
291293
}
292-
boolean negated = attrType.image.equals("!@");
294+
negated = attrType.image.equals("!:");
293295
attributes.addContains(attr.image, key.image, value.image, negated);
294296
})
297+
( ";" (key = <IDENTIFIER>) (attrType = ":" | attrType = "!:") (value = <IDENTIFIER> | value = <REGEX>)
298+
{
299+
if (attr == null || key == null || value == null) {
300+
throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr +
301+
" key=" + key + " value=" + value);
302+
}
303+
negated = attrType.image.equals("!:");
304+
attributes.addContains(attr.image, key.image, value.image, negated);
305+
})*
306+
"}" ))
295307
)
296308
|
297309
( attr = <ROOT> { attributes.setRoot(true); } )

src/edu/stanford/nlp/semgraph/semgrex/SemgrexParserConstants.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,10 @@ interface SemgrexParserConstants {
5555
"\"~\"",
5656
"\"=\"",
5757
"\"!:\"",
58-
"\"!@\"",
59-
"\"{\"",
58+
"\":{\"",
6059
"\";\"",
6160
"\"}\"",
61+
"\"{\"",
6262
};
6363

6464
}

0 commit comments

Comments
 (0)