@@ -139,237 +139,47 @@ public static void load() {
139
139
Tsurgeon .parseOperation ("relabel target " + newOp [1 ])));
140
140
141
141
}
142
- String newLine = System .lineSeparator ();
143
- String rawPattern = String .join (newLine ,
144
- // ------------------------------
145
- // 1 to 1 mappings
146
- // ------------------------------
147
- // CC -> CCONJ
148
- "CC=target <... {/.*/}" ,
149
- "" ,
150
- "relabel target CCONJ" ,
151
- "" ,
152
-
153
- // CD -> NUM
154
- "CD=target <... {/.*/}" ,
155
- "" ,
156
- "relabel target NUM" ,
157
- "" ,
158
-
159
- // EX -> PRON
160
- "EX=target <... {/.*/}" ,
161
- "" ,
162
- "relabel target PRON" ,
163
- "" ,
164
-
165
- // FW -> X
166
- "FW=target <... {/.*/}" ,
167
- "" ,
168
- "relabel target X" ,
169
- "" ,
170
-
171
- // JJ.* -> ADJ
172
- "/^JJ.*$/=target < __" ,
173
- "" ,
174
- "relabel target ADJ" ,
175
- "" ,
176
-
177
- // LS -> X
178
- "LS=target <... {/.*/}" ,
179
- "" ,
180
- "relabel target X" ,
181
- "" ,
182
-
183
- // MD -> AUX
184
- "MD=target <... {/.*/}" ,
185
- "" ,
186
- "relabel target AUX" ,
187
- "" ,
188
-
189
- // NNS -> NOUN
190
- "NNS=target <... {/.*/}" ,
191
- "" ,
192
- "relabel target NOUN" ,
193
- "" ,
194
-
195
- // NNP -> PROPN
196
- "NNP=target <... {/.*/}" ,
197
- "" ,
198
- "relabel target PROPN" ,
199
- "" ,
200
-
201
- // NNPS -> PROPN
202
- "NNPS=target <... {/.*/}" ,
203
- "" ,
204
- "relabel target PROPN" ,
205
- "" ,
206
-
207
- // PDT -> DET
208
- "PDT=target <... {/.*/}" ,
209
- "" ,
210
- "relabel target DET" ,
211
- "" ,
212
-
213
- // POS -> PART
214
- "POS=target <... {/.*/}" ,
215
- "" ,
216
- "relabel target PART" ,
217
- "" ,
218
-
219
- // PRP -> PRON
220
- "PRP=target <... {/.*/}" ,
221
- "" ,
222
- "relabel target PRON" ,
223
- "" ,
224
-
225
- // PRP$ -> PRON
226
- "/^PRP\\ $$/=target <... {/.*/}" ,
227
- "" ,
228
- "relabel target PRON" ,
229
- "" ,
230
-
231
- // RBR -> ADV
232
- "RBR=target <... {/.*/}" ,
233
- "" ,
234
- "relabel target ADV" ,
235
- "" ,
236
-
237
- // RBS -> ADV
238
- "RBS=target <... {/.*/}" ,
239
- "" ,
240
- "relabel target ADV" ,
241
- "" ,
242
-
243
- // RP -> ADP
244
- "RP=target <... {/.*/}" ,
245
- "" ,
246
- "relabel target ADP" ,
247
- "" ,
248
-
249
- // UH -> INTJ
250
- "UH=target <... {/.*/}" ,
251
- "" ,
252
- "relabel target INTJ" ,
253
- "" ,
254
-
255
- // WP -> PRON
256
- "WP=target <... {/.*/}" ,
257
- "" ,
258
- "relabel target PRON" ,
259
- "" ,
260
-
261
- // WP$ -> PRON
262
- "/^WP\\ $$/=target <... {/.*/}" ,
263
- "" ,
264
- "relabel target PRON" ,
265
- "" ,
266
-
267
- // WRB -> ADV
268
- "WRB=target <... {/.*/}" ,
269
- "" ,
270
- "relabel target ADV" ,
271
- "" ,
272
-
273
- // `` -> PUNCT
274
- "/^``$/=target <... {/.*/}" ,
275
- "" ,
276
- "relabel target PUNCT" ,
277
- "" ,
278
-
279
- // '' -> PUNCT
280
- "/^''$/=target < __" ,
281
- "" ,
282
- "relabel target PUNCT" ,
283
- "" ,
284
-
285
- // ( -> PUNCT
286
- "/^\\ ($/=target <... {/.*/}" ,
287
- "" ,
288
- "relabel target PUNCT" ,
289
- "" ,
290
-
291
- // ) -> PUNCT
292
- "/^\\ )$/=target <... {/.*/}" ,
293
- "" ,
294
- "relabel target PUNCT" ,
295
- "" ,
296
-
297
- // -LRB- -> PUNCT
298
- "/^-LRB-$/=target <... {/.*/}" ,
299
- "" ,
300
- "relabel target PUNCT" ,
301
- "" ,
302
-
303
- // -RRB- -> PUNCT
304
- "/^-RRB-$/=target <... {/.*/}" ,
305
- "" ,
306
- "relabel target PUNCT" ,
307
- "" ,
308
-
309
- // , -> PUNCT
310
- "/^,$/=target <... {/.*/}" ,
311
- "" ,
312
- "relabel target PUNCT" ,
313
- "" ,
314
-
315
- // . -> PUNCT
316
- "/^\\ .$/=target <... {/.*/}" ,
317
- "" ,
318
- "relabel target PUNCT" ,
319
- "" ,
320
-
321
- // : -> PUNCT
322
- "/^:$/=target <... {/.*/}" ,
323
- "" ,
324
- "relabel target PUNCT" ,
325
- "" ,
326
-
327
- // HYPH -> PUNCT
328
- "HYPH=target <... {/.*/}" ,
329
- "" ,
330
- "relabel target PUNCT" ,
331
- "" ,
332
-
333
- // # -> SYM
334
- "/^#$/=target <... {/.*/}" ,
335
- "" ,
336
- "relabel target SYM" ,
337
- "" ,
338
-
339
- // $ -> SYM. Also note that there is a no-op rule of SYM -> SYM!
340
- "/^\\ $$/=target <... {/.*/}" ,
341
- "" ,
342
- "relabel target SYM" ,
343
- "" ,
344
-
345
- // ADD -> X
346
- "ADD=target <... {/.*/}" ,
347
- "" ,
348
- "relabel target X" ,
349
- "" ,
350
-
351
- // AFX -> X
352
- "AFX=target <... {/.*/}" ,
353
- "" ,
354
- "relabel target X" ,
355
- "" ,
356
-
357
- // GW -> X
358
- "GW=target <... {/.*/}" ,
359
- "" ,
360
- "relabel target X" ,
361
- "" ,
362
-
363
- // XX -> X
364
- "XX=target <... {/.*/}" ,
365
- "" ,
366
- "relabel target X" );
367
- StringReader reader = new StringReader (rawPattern );
368
- try (BufferedReader buffered = new BufferedReader (reader )) {
369
- List <Pair <TregexPattern , TsurgeonPattern >> newOperations = Tsurgeon .getOperationsFromReader (buffered , new TregexPatternCompiler ());
370
- operations .addAll (newOperations );
371
- } catch (IOException e ) {
372
- throw new RuntimeIOException (e );
142
+
143
+
144
+ String [][] one2oneMappings = new String [][] {
145
+ {"CC" , "CCONJ" },
146
+ {"CD" , "NUM" },
147
+ {"EX" , "PRON" },
148
+ {"FW" , "X" },
149
+ {"/^JJ.*$/" , "ADJ" },
150
+ {"LS" , "X" },
151
+ {"MD" , "AUX" },
152
+ {"NNS" , "NOUN" },
153
+ {"NNP" , "PROPN" },
154
+ {"NNPS" , "PROPN" },
155
+ {"PDT" , "DET" },
156
+ {"POS" , "PART" },
157
+ {"PRP" , "PRON" },
158
+ {"/^PRP[$]$/" , "PRON" },
159
+ {"RBR" , "ADV" },
160
+ {"RBS" , "ADV" },
161
+ {"RP" , "ADP" },
162
+ {"UH" , "INTJ" },
163
+ {"WP" , "PRON" },
164
+ {"/^WP[$]$/" , "PRON" },
165
+ {"WRB" , "ADV" },
166
+ {"/^``$/" , "PUNCT" },
167
+ {"/^''$/" , "PUNCT" },
168
+ {"/^[()]$/" , "PUNCT" },
169
+ {"/^-[RL]RB-$/" , "PUNCT" },
170
+ {"/^[,.:]$/" , "PUNCT" },
171
+ {"HYPH" , "PUNCT" },
172
+ // Also note that there is a no-op rule of SYM -> SYM!
173
+ {"/^[#$]$/" , "SYM" },
174
+ {"ADD" , "X" },
175
+ {"AFX" , "X" },
176
+ {"GW" , "X" },
177
+ {"XX" , "X" },
178
+ };
179
+ for (String [] newOp : one2oneMappings ) {
180
+ operations .add (new Pair <>(TregexPattern .compile (newOp [0 ] + "=target <: __" ),
181
+ Tsurgeon .parseOperation ("relabel target " + newOp [1 ])));
182
+
373
183
}
374
184
loaded = true ;
375
185
}
0 commit comments