Skip to content

Commit e87f437

Browse files
committed
Also output the known tags in a dataset after the dataset has been retagged in the srparser
1 parent 614b936 commit e87f437

File tree

2 files changed

+22
-0
lines changed

2 files changed

+22
-0
lines changed

src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceParser.java

+1
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,7 @@ private void train(List<Pair<String, FileFilter>> trainTreebankPath,
537537
log.info("Retagging with tagger model: " + op.testOptions.taggerSerializedFile);
538538
log.info("Known tags in the tagger model: " + tagger.tagSet());
539539
redoTags(binarizedTrees, tagger, nThreads);
540+
log.info("Tags in training set: " + Trees.uniqueTags(binarizedTrees));
540541
retagTimer.done("Retagging");
541542
}
542543

src/edu/stanford/nlp/trees/Trees.java

+21
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,27 @@ private static void preTerminals(Tree t, List<Tree> l) {
186186
}
187187
}
188188

189+
public static Set<String> uniqueTags(List<Tree> trees) {
190+
Set<String> allTags = new HashSet<>();
191+
for (Tree tree : trees) {
192+
uniqueTags(tree, allTags);
193+
}
194+
return allTags;
195+
}
196+
197+
public static Set<String> uniqueTags(Tree tree) {
198+
List<Label> labels = tree.preTerminalYield();
199+
return uniqueTags(tree, new HashSet<>());
200+
}
201+
202+
public static Set<String> uniqueTags(Tree tree, Set<String> tags) {
203+
List<Label> labels = tree.preTerminalYield();
204+
for (Label label : labels) {
205+
tags.add(label.value());
206+
}
207+
return tags;
208+
}
209+
189210

190211
/**
191212
* returns the labels of the leaves in a Tree in the order that they're found.

0 commit comments

Comments
 (0)