Skip to content

Commit bc72bce

Browse files
committedMar 14, 2024·
Simplify all the 1-1 tag mappings in UniversalPOSMapper
1 parent 6fa8d4d commit bc72bce

File tree

1 file changed

+41
-231
lines changed

1 file changed

+41
-231
lines changed
 

‎src/edu/stanford/nlp/trees/UniversalPOSMapper.java

+41-231
Original file line numberDiff line numberDiff line change
@@ -139,237 +139,47 @@ public static void load() {
139139
Tsurgeon.parseOperation("relabel target " + newOp[1])));
140140

141141
}
142-
String newLine = System.lineSeparator();
143-
String rawPattern = String.join(newLine,
144-
// ------------------------------
145-
// 1 to 1 mappings
146-
// ------------------------------
147-
// CC -> CCONJ
148-
"CC=target <... {/.*/}",
149-
"",
150-
"relabel target CCONJ",
151-
"",
152-
153-
// CD -> NUM
154-
"CD=target <... {/.*/}",
155-
"",
156-
"relabel target NUM",
157-
"",
158-
159-
// EX -> PRON
160-
"EX=target <... {/.*/}",
161-
"",
162-
"relabel target PRON",
163-
"",
164-
165-
// FW -> X
166-
"FW=target <... {/.*/}",
167-
"",
168-
"relabel target X",
169-
"",
170-
171-
// JJ.* -> ADJ
172-
"/^JJ.*$/=target < __",
173-
"",
174-
"relabel target ADJ",
175-
"",
176-
177-
// LS -> X
178-
"LS=target <... {/.*/}",
179-
"",
180-
"relabel target X",
181-
"",
182-
183-
// MD -> AUX
184-
"MD=target <... {/.*/}",
185-
"",
186-
"relabel target AUX",
187-
"",
188-
189-
// NNS -> NOUN
190-
"NNS=target <... {/.*/}",
191-
"",
192-
"relabel target NOUN",
193-
"",
194-
195-
// NNP -> PROPN
196-
"NNP=target <... {/.*/}",
197-
"",
198-
"relabel target PROPN",
199-
"",
200-
201-
// NNPS -> PROPN
202-
"NNPS=target <... {/.*/}",
203-
"",
204-
"relabel target PROPN",
205-
"",
206-
207-
// PDT -> DET
208-
"PDT=target <... {/.*/}",
209-
"",
210-
"relabel target DET",
211-
"",
212-
213-
// POS -> PART
214-
"POS=target <... {/.*/}",
215-
"",
216-
"relabel target PART",
217-
"",
218-
219-
// PRP -> PRON
220-
"PRP=target <... {/.*/}",
221-
"",
222-
"relabel target PRON",
223-
"",
224-
225-
// PRP$ -> PRON
226-
"/^PRP\\$$/=target <... {/.*/}",
227-
"",
228-
"relabel target PRON",
229-
"",
230-
231-
// RBR -> ADV
232-
"RBR=target <... {/.*/}",
233-
"",
234-
"relabel target ADV",
235-
"",
236-
237-
// RBS -> ADV
238-
"RBS=target <... {/.*/}",
239-
"",
240-
"relabel target ADV",
241-
"",
242-
243-
// RP -> ADP
244-
"RP=target <... {/.*/}",
245-
"",
246-
"relabel target ADP",
247-
"",
248-
249-
// UH -> INTJ
250-
"UH=target <... {/.*/}",
251-
"",
252-
"relabel target INTJ",
253-
"",
254-
255-
// WP -> PRON
256-
"WP=target <... {/.*/}",
257-
"",
258-
"relabel target PRON",
259-
"",
260-
261-
// WP$ -> PRON
262-
"/^WP\\$$/=target <... {/.*/}",
263-
"",
264-
"relabel target PRON",
265-
"",
266-
267-
// WRB -> ADV
268-
"WRB=target <... {/.*/}",
269-
"",
270-
"relabel target ADV",
271-
"",
272-
273-
// `` -> PUNCT
274-
"/^``$/=target <... {/.*/}",
275-
"",
276-
"relabel target PUNCT",
277-
"",
278-
279-
// '' -> PUNCT
280-
"/^''$/=target < __",
281-
"",
282-
"relabel target PUNCT",
283-
"",
284-
285-
// ( -> PUNCT
286-
"/^\\($/=target <... {/.*/}",
287-
"",
288-
"relabel target PUNCT",
289-
"",
290-
291-
// ) -> PUNCT
292-
"/^\\)$/=target <... {/.*/}",
293-
"",
294-
"relabel target PUNCT",
295-
"",
296-
297-
// -LRB- -> PUNCT
298-
"/^-LRB-$/=target <... {/.*/}",
299-
"",
300-
"relabel target PUNCT",
301-
"",
302-
303-
// -RRB- -> PUNCT
304-
"/^-RRB-$/=target <... {/.*/}",
305-
"",
306-
"relabel target PUNCT",
307-
"",
308-
309-
// , -> PUNCT
310-
"/^,$/=target <... {/.*/}",
311-
"",
312-
"relabel target PUNCT",
313-
"",
314-
315-
// . -> PUNCT
316-
"/^\\.$/=target <... {/.*/}",
317-
"",
318-
"relabel target PUNCT",
319-
"",
320-
321-
// : -> PUNCT
322-
"/^:$/=target <... {/.*/}",
323-
"",
324-
"relabel target PUNCT",
325-
"",
326-
327-
// HYPH -> PUNCT
328-
"HYPH=target <... {/.*/}",
329-
"",
330-
"relabel target PUNCT",
331-
"",
332-
333-
// # -> SYM
334-
"/^#$/=target <... {/.*/}",
335-
"",
336-
"relabel target SYM",
337-
"",
338-
339-
// $ -> SYM. Also note that there is a no-op rule of SYM -> SYM!
340-
"/^\\$$/=target <... {/.*/}",
341-
"",
342-
"relabel target SYM",
343-
"",
344-
345-
// ADD -> X
346-
"ADD=target <... {/.*/}",
347-
"",
348-
"relabel target X",
349-
"",
350-
351-
// AFX -> X
352-
"AFX=target <... {/.*/}",
353-
"",
354-
"relabel target X",
355-
"",
356-
357-
// GW -> X
358-
"GW=target <... {/.*/}",
359-
"",
360-
"relabel target X",
361-
"",
362-
363-
// XX -> X
364-
"XX=target <... {/.*/}",
365-
"",
366-
"relabel target X");
367-
StringReader reader = new StringReader(rawPattern);
368-
try (BufferedReader buffered = new BufferedReader(reader)) {
369-
List<Pair<TregexPattern, TsurgeonPattern>> newOperations = Tsurgeon.getOperationsFromReader(buffered, new TregexPatternCompiler());
370-
operations.addAll(newOperations);
371-
} catch (IOException e) {
372-
throw new RuntimeIOException(e);
142+
143+
144+
String [][] one2oneMappings = new String [][] {
145+
{"CC", "CCONJ"},
146+
{"CD", "NUM"},
147+
{"EX", "PRON"},
148+
{"FW", "X"},
149+
{"/^JJ.*$/", "ADJ"},
150+
{"LS", "X"},
151+
{"MD", "AUX"},
152+
{"NNS", "NOUN"},
153+
{"NNP", "PROPN"},
154+
{"NNPS", "PROPN"},
155+
{"PDT", "DET"},
156+
{"POS", "PART"},
157+
{"PRP", "PRON"},
158+
{"/^PRP[$]$/", "PRON"},
159+
{"RBR", "ADV"},
160+
{"RBS", "ADV"},
161+
{"RP", "ADP"},
162+
{"UH", "INTJ"},
163+
{"WP", "PRON"},
164+
{"/^WP[$]$/", "PRON"},
165+
{"WRB", "ADV"},
166+
{"/^``$/", "PUNCT"},
167+
{"/^''$/", "PUNCT"},
168+
{"/^[()]$/", "PUNCT"},
169+
{"/^-[RL]RB-$/", "PUNCT"},
170+
{"/^[,.:]$/", "PUNCT"},
171+
{"HYPH", "PUNCT"},
172+
// Also note that there is a no-op rule of SYM -> SYM!
173+
{"/^[#$]$/", "SYM"},
174+
{"ADD", "X"},
175+
{"AFX", "X"},
176+
{"GW", "X"},
177+
{"XX", "X"},
178+
};
179+
for (String[] newOp : one2oneMappings) {
180+
operations.add(new Pair<>(TregexPattern.compile(newOp[0] + "=target <: __"),
181+
Tsurgeon.parseOperation("relabel target " + newOp[1])));
182+
373183
}
374184
loaded = true;
375185
}

0 commit comments

Comments
 (0)
Please sign in to comment.