Skip to content

Commit 3dc9990

Browse files
committed
Parse a node containment option... need to process it in the NodePattern still
1 parent bff3588 commit 3dc9990

File tree

3 files changed

+92
-34
lines changed

3 files changed

+92
-34
lines changed

src/edu/stanford/nlp/semgraph/semgrex/NodeAttributes.java

+11
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,13 @@ public class NodeAttributes {
2626
// String, String, Boolean: key, value, negated
2727
private List<Triple<String, String, Boolean>> attributes;
2828
private Set<String> positiveAttributes;
29+
// Some annotations, especially morpho freatures (CoreAnnotations.CoNLLUFeats)
30+
// are represented by Maps. In some cases it will be easier to search
31+
// for individual elements of that map rather than turn the map into a string
32+
// and search on its contents that way. This is especially true since there
33+
// is no guarantee the map will be in a consistent order.
34+
// String, String, String: node attribute for a map (such as CoNLLUFeats), key in that map, value to match
35+
private List<Triple<String, String, String>> contains;
2936

3037
public NodeAttributes() {
3138
root = false;
@@ -60,6 +67,10 @@ public void setAttribute(String key, String value, boolean negated) {
6067
attributes.add(new Triple(key, value, negated));
6168
}
6269

70+
public void addContains(String annotation, String key, String value) {
71+
contains.add(new Triple(annotation, key, value));
72+
}
73+
6374
public List<Triple<String, String, Boolean>> attributes() {
6475
return Collections.unmodifiableList(attributes);
6576
}

src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.java

+70-32
Original file line numberDiff line numberDiff line change
@@ -526,40 +526,77 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
526526
}
527527

528528
final public void AddAttribute(NodeAttributes attributes) throws ParseException {Token attr = null;
529+
Token key = null;
529530
Token value = null;
530531
Token attrType = null;
531532
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
533+
case ALIGNRELN:
532534
case IDENTIFIER:{
533-
attr = jj_consume_token(IDENTIFIER);
534535
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
535-
case 10:{
536-
attrType = jj_consume_token(10);
537-
break;
536+
case IDENTIFIER:{
537+
attr = jj_consume_token(IDENTIFIER);
538+
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
539+
case 10:{
540+
attrType = jj_consume_token(10);
541+
break;
542+
}
543+
case 22:{
544+
attrType = jj_consume_token(22);
545+
break;
546+
}
547+
default:
548+
jj_la1[23] = jj_gen;
549+
jj_consume_token(-1);
550+
throw new ParseException();
538551
}
539-
case 22:{
540-
attrType = jj_consume_token(22);
541-
break;
552+
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
553+
case IDENTIFIER:{
554+
value = jj_consume_token(IDENTIFIER);
555+
break;
556+
}
557+
case REGEX:{
558+
value = jj_consume_token(REGEX);
559+
break;
560+
}
561+
default:
562+
jj_la1[24] = jj_gen;
563+
jj_consume_token(-1);
564+
throw new ParseException();
542565
}
543-
default:
544-
jj_la1[23] = jj_gen;
545-
jj_consume_token(-1);
546-
throw new ParseException();
547-
}
548-
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
549-
case IDENTIFIER:{
550-
value = jj_consume_token(IDENTIFIER);
551566
break;
552567
}
553-
case REGEX:{
554-
value = jj_consume_token(REGEX);
568+
case ALIGNRELN:{
569+
attrType = jj_consume_token(ALIGNRELN);
570+
key = jj_consume_token(IDENTIFIER);
571+
jj_consume_token(21);
572+
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
573+
case IDENTIFIER:{
574+
value = jj_consume_token(IDENTIFIER);
575+
break;
576+
}
577+
case REGEX:{
578+
value = jj_consume_token(REGEX);
579+
break;
580+
}
581+
default:
582+
jj_la1[25] = jj_gen;
583+
jj_consume_token(-1);
584+
throw new ParseException();
585+
}
555586
break;
556587
}
557588
default:
558-
jj_la1[24] = jj_gen;
589+
jj_la1[26] = jj_gen;
559590
jj_consume_token(-1);
560591
throw new ParseException();
561592
}
562-
if (attr != null && value != null) {
593+
if (attrType.image.equals("@")) {
594+
if (attr == null || key == null || value == null) {
595+
{if (true) throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr +
596+
" key=" + key + " value=" + value);}
597+
}
598+
attributes.addContains(attr.image, key.image, value.image);
599+
} else if (attr != null && value != null) {
563600
boolean negated = attrType.image.equals("!:");
564601
attributes.setAttribute(attr.image, value.image, negated);
565602
}
@@ -576,7 +613,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
576613
break;
577614
}
578615
default:
579-
jj_la1[25] = jj_gen;
616+
jj_la1[27] = jj_gen;
580617
jj_consume_token(-1);
581618
throw new ParseException();
582619
}
@@ -588,6 +625,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
588625
NodePattern pat;
589626
jj_consume_token(23);
590627
switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) {
628+
case ALIGNRELN:
591629
case IDENTIFIER:
592630
case EMPTY:
593631
case ROOT:{
@@ -600,7 +638,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
600638
break;
601639
}
602640
default:
603-
jj_la1[26] = jj_gen;
641+
jj_la1[28] = jj_gen;
604642
break label_6;
605643
}
606644
jj_consume_token(24);
@@ -609,7 +647,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
609647
break;
610648
}
611649
default:
612-
jj_la1[27] = jj_gen;
650+
jj_la1[29] = jj_gen;
613651
;
614652
}
615653
jj_consume_token(25);
@@ -629,7 +667,7 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
629667
break;
630668
}
631669
default:
632-
jj_la1[28] = jj_gen;
670+
jj_la1[30] = jj_gen;
633671
;
634672
}
635673
pat = new NodePattern(r, underNodeNegation, attributes, link, name != null ? name.image : null);
@@ -646,13 +684,13 @@ final public SemgrexPattern Root() throws ParseException {// Root pattern for th
646684
public Token jj_nt;
647685
private int jj_ntk;
648686
private int jj_gen;
649-
final private int[] jj_la1 = new int[29];
687+
final private int[] jj_la1 = new int[31];
650688
static private int[] jj_la1_0;
651689
static {
652690
jj_la1_init_0();
653691
}
654692
private static void jj_la1_init_0() {
655-
jj_la1_0 = new int[] {0x400,0x828808,0x3801c,0x3801c,0x828800,0x2000,0x3c01c,0x4000,0x3801c,0x2001c,0x80000,0x10,0x110,0x110,0x100000,0x200000,0x1c,0x828800,0x2000,0x82c000,0x4000,0x828000,0x820000,0x400400,0x110,0xd0,0x1000000,0xd0,0x200000,};
693+
jj_la1_0 = new int[] {0x400,0x828808,0x3801c,0x3801c,0x828800,0x2000,0x3c01c,0x4000,0x3801c,0x2001c,0x80000,0x10,0x110,0x110,0x100000,0x200000,0x1c,0x828800,0x2000,0x82c000,0x4000,0x828000,0x820000,0x400400,0x110,0x110,0x18,0xd8,0x1000000,0xd8,0x200000,};
656694
}
657695

658696
/** Constructor with InputStream. */
@@ -666,7 +704,7 @@ public SemgrexParser(java.io.InputStream stream, String encoding) {
666704
token = new Token();
667705
jj_ntk = -1;
668706
jj_gen = 0;
669-
for (int i = 0; i < 29; i++) jj_la1[i] = -1;
707+
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
670708
}
671709

672710
/** Reinitialise. */
@@ -680,7 +718,7 @@ public void ReInit(java.io.InputStream stream, String encoding) {
680718
token = new Token();
681719
jj_ntk = -1;
682720
jj_gen = 0;
683-
for (int i = 0; i < 29; i++) jj_la1[i] = -1;
721+
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
684722
}
685723

686724
/** Constructor. */
@@ -690,7 +728,7 @@ public SemgrexParser(java.io.Reader stream) {
690728
token = new Token();
691729
jj_ntk = -1;
692730
jj_gen = 0;
693-
for (int i = 0; i < 29; i++) jj_la1[i] = -1;
731+
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
694732
}
695733

696734
/** Reinitialise. */
@@ -708,7 +746,7 @@ public void ReInit(java.io.Reader stream) {
708746
token = new Token();
709747
jj_ntk = -1;
710748
jj_gen = 0;
711-
for (int i = 0; i < 29; i++) jj_la1[i] = -1;
749+
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
712750
}
713751

714752
/** Constructor with generated Token Manager. */
@@ -717,7 +755,7 @@ public SemgrexParser(SemgrexParserTokenManager tm) {
717755
token = new Token();
718756
jj_ntk = -1;
719757
jj_gen = 0;
720-
for (int i = 0; i < 29; i++) jj_la1[i] = -1;
758+
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
721759
}
722760

723761
/** Reinitialise. */
@@ -726,7 +764,7 @@ public void ReInit(SemgrexParserTokenManager tm) {
726764
token = new Token();
727765
jj_ntk = -1;
728766
jj_gen = 0;
729-
for (int i = 0; i < 29; i++) jj_la1[i] = -1;
767+
for (int i = 0; i < 31; i++) jj_la1[i] = -1;
730768
}
731769

732770
private Token jj_consume_token(int kind) throws ParseException {
@@ -782,7 +820,7 @@ public ParseException generateParseException() {
782820
la1tokens[jj_kind] = true;
783821
jj_kind = -1;
784822
}
785-
for (int i = 0; i < 29; i++) {
823+
for (int i = 0; i < 31; i++) {
786824
if (jj_la1[i] == jj_gen) {
787825
for (int j = 0; j < 32; j++) {
788826
if ((jj_la1_0[i] & (1<<j)) != 0) {

src/edu/stanford/nlp/semgraph/semgrex/SemgrexParser.jj

+11-2
Original file line numberDiff line numberDiff line change
@@ -271,12 +271,21 @@ SemgrexPattern Child(GraphRelation r) : {
271271

272272
void AddAttribute(NodeAttributes attributes) : {
273273
Token attr = null;
274+
Token key = null;
274275
Token value = null;
275276
Token attrType = null;
276277
} {
277-
((attr = <IDENTIFIER> (attrType = ":" | attrType = "!:") (value = <IDENTIFIER> | value = <REGEX>) )
278+
((attr = <IDENTIFIER>
279+
( (attrType = ":" | attrType = "!:") (value = <IDENTIFIER> | value = <REGEX>) ) |
280+
( (attrType = "@") (key = <IDENTIFIER>) "=" (value = <IDENTIFIER> | value = <REGEX>) ) )
278281
{
279-
if (attr != null && value != null) {
282+
if (attrType.image.equals("@")) {
283+
if (attr == null || key == null || value == null) {
284+
throw new SemgrexParseException("null while parsing semgrex expression: attr=" + attr +
285+
" key=" + key + " value=" + value);
286+
}
287+
attributes.addContains(attr.image, key.image, value.image);
288+
} else if (attr != null && value != null) {
280289
boolean negated = attrType.image.equals("!:");
281290
attributes.setAttribute(attr.image, value.image, negated);
282291
}

0 commit comments

Comments
 (0)