@@ -90,8 +90,11 @@ private static class TreeToSemanticGraphIterator implements Iterator<Pair<Semant
90
90
private Iterator <Tree > treeIterator ;
91
91
private Tree currentTree ; // = null;
92
92
93
- public TreeToSemanticGraphIterator (Iterator <Tree > treeIterator ) {
93
+ private TreeTransformer corrector ; // = null;
94
+
95
+ public TreeToSemanticGraphIterator (Iterator <Tree > treeIterator , TreeTransformer corrector ) {
94
96
this .treeIterator = treeIterator ;
97
+ this .corrector = corrector ;
95
98
}
96
99
97
100
@ Override
@@ -102,6 +105,25 @@ public boolean hasNext() {
102
105
@ Override
103
106
public Pair <SemanticGraph , SemanticGraph > next () {
104
107
Tree t = treeIterator .next ();
108
+ if (corrector != null ) {
109
+ t = corrector .transformTree (t );
110
+ // The corrector uses tsurgeon, with two limitations:
111
+ // - adjoin nodes don't set word(), just set value()
112
+ // - rearranging tags doesn't update the tag() of a leaf
113
+ List <Tree > preterminals = Trees .preTerminals (t );
114
+ for (Tree preterminal : preterminals ) {
115
+ assert preterminal .children ().length == 1 ;
116
+ Tree leaf = preterminal .children ()[0 ];
117
+ if (!(leaf .label () instanceof CoreLabel )) {
118
+ throw new RuntimeException ("These should all be CoreLabels!" );
119
+ }
120
+ CoreLabel leafWord = (CoreLabel ) leaf .label ();
121
+ if (leafWord .word () == null && leafWord .value () != null ) {
122
+ leafWord .setWord (leafWord .value ());
123
+ }
124
+ leafWord .setTag (preterminal .value ());
125
+ }
126
+ }
105
127
currentTree = t ;
106
128
return new Pair <>(convertTreeToBasic (t ), null );
107
129
}
@@ -246,10 +268,15 @@ public static void main(String[] args) {
246
268
Iterator <Pair <SemanticGraph , SemanticGraph >> sgIterator ; // = null;
247
269
248
270
if (treeFileName != null ) {
249
- MemoryTreebank tb = new MemoryTreebank (new NPTmpRetainingTreeNormalizer (0 , false , 1 , false , true ));
271
+ NPTmpRetainingTreeNormalizer normalizer = new NPTmpRetainingTreeNormalizer (0 , false , 1 , false , true );
272
+ MemoryTreebank tb = new MemoryTreebank (normalizer );
250
273
tb .loadPath (treeFileName );
251
274
Iterator <Tree > treeIterator = tb .iterator ();
252
- sgIterator = new TreeToSemanticGraphIterator (treeIterator );
275
+ TreeTransformer ptbCorrector = null ;
276
+ if (correctPTB ) {
277
+ ptbCorrector = new CompositeTreeTransformer (new EnglishPTBTreebankCorrector (), normalizer );
278
+ }
279
+ sgIterator = new TreeToSemanticGraphIterator (treeIterator , ptbCorrector );
253
280
} else if (conlluFileName != null ) {
254
281
CoNLLUDocumentReader reader = new CoNLLUDocumentReader ();
255
282
try {
@@ -274,27 +301,17 @@ public static void main(String[] args) {
274
301
275
302
UniversalDependenciesFeatureAnnotator featureAnnotator = (addFeatures ) ? new UniversalDependenciesFeatureAnnotator () : null ;
276
303
EnglishMWTCombiner mwtCombiner = (combineMWTs ) ? new EnglishMWTCombiner () : null ;
277
- EnglishPTBTreebankCorrector ptbCorrector = (correctPTB ) ? new EnglishPTBTreebankCorrector () : null ;
278
304
279
305
CoNLLUDocumentWriter writer = new CoNLLUDocumentWriter ();
280
306
281
307
int graphIdx = 0 ;
282
308
while (sgIterator .hasNext ()) {
283
- Pair <SemanticGraph , SemanticGraph > sgs = sgIterator .next ();
309
+ final Pair <SemanticGraph , SemanticGraph > sgs = sgIterator .next ();
284
310
SemanticGraph sg = sgs .first ();
285
311
286
312
if (treeFileName != null ) {
287
313
//add UPOS tags
288
314
Tree tree = ((TreeToSemanticGraphIterator ) sgIterator ).getCurrentTree ();
289
- if (ptbCorrector != null ) {
290
- tree = ptbCorrector .transformTree (tree );
291
- List <Label > xposLabels = tree .preTerminalYield ();
292
- for (IndexedWord token : sg .vertexListSorted ()) {
293
- int idx = token .index () - 1 ;
294
- String xposTag = xposLabels .get (idx ).value ();
295
- token .set (CoreAnnotations .PartOfSpeechAnnotation .class , xposTag );
296
- }
297
- }
298
315
Tree uposTree = UniversalPOSMapper .mapTree (tree );
299
316
List <Label > uposLabels = uposTree .preTerminalYield ();
300
317
for (IndexedWord token : sg .vertexListSorted ()) {
0 commit comments