-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathnormalize-lexer-rules.sh
137 lines (118 loc) · 3.13 KB
/
normalize-lexer-rules.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#
while getopts 'xh' opt; do
case "$opt" in
x)
set -x
optx="-x"
;;
?|h)
cat - <<EOF
NAME
$(basename $0) -- rename lexer rule names using standardized Unicode names
SYNOPSIS
$(basename $0) ([-x | -h])* [grammar-files]
DESCRIPTION
Renames lexer rule names using the standard Unicode name in an Antlr4 grammar.
This script must be run under Linux Bash or Windows MSYS2 Bash or Windows WSL Linux.
OPTIONS
-h
Output this help message.
-x
Execute "set -x" to debug script.
EXAMPLE USAGE
git clone https://github.com/antlr/grammars-v4.git
cd grammars-v4/abb
$(basename $0) *.g4
cd ../java/java20
trparse -t ANTLRv4 *.g4 | $(basename $0)
EOF
exit 0
;;
esac
done
shift $((OPTIND - 1))
files=("$@")
temp=`mktemp`
# Determine if prerequisites are installed. Otherwise, output error and
# exit.
if ! command -v dotnet &> /dev/null
then
echo "'dotnet' could not be found. Install Microsoft NET."
exit 1
fi
# ... referenced in this file.
for tool in trparse trxgrep trrename trsponge
do
if ! command -v $tool &> /dev/null
then
echo "'$tool' could not be found. Install Trash Toolkit."
exit 1
fi
done
# ... referenced elsewhere.
for tool in trquery trunfold
do
if ! command -v $tool &> /dev/null
then
echo "'$tool' could not be found. Install Trash Toolkit."
exit 1
fi
done
if [ ${#files[@]} -gt 0 ]
then
trparse -t ANTLRv4 ${files[@]} > $temp
else
cat - > $temp
fi
# Get full path of this script.
full_path_script=$(realpath $0)
full_path_script_dir=`dirname $full_path_script`
cat $temp | trxgrep -e '
//lexerRuleSpec
/lexerRuleBlock
/lexerAltList[not(OR)]
/lexerAlt[not(lexerCommands)]
/lexerElements[count(*)=1]
/lexerElement[not(ebnfSuffix)]
/lexerAtom
/terminalDef[not(elementOptions)]
/STRING_LITERAL[string-length(.) < 4]
/text()' | sed "s/^'//" | sed "s/'$//" > chars.txt
cat $temp | trxgrep -e '
//lexerRuleSpec
[
lexerRuleBlock
/lexerAltList[not(OR)]
/lexerAlt[not(lexerCommands)]
/lexerElements[count(*)=1]
/lexerElement[not(ebnfSuffix)]
/lexerAtom
/terminalDef[not(elementOptions)]
/STRING_LITERAL[string-length(.) < 4]]
/TOKEN_REF
/text()' > original_names.txt
if [ `wc -l original_names.txt | awk '{print $1}'` -ne 0 ]
then
if [ `wc -l original_names.txt | awk '{print $1}'` -eq `wc -l chars.txt | awk '{print $1}'` ]
then
rm -f new_names.txt
for i in `cat chars.txt | tr -d '\n' | od -t x1 | cut -c 8-`
do
name=`grep "^00${i^^}" $full_path_script_dir/UCD/NamesList.txt | cut -c 6- | sed 's/ /_/g' | sed 's/-/_/g'`
echo $name >> new_names.txt
done
fi
paste original_names.txt new_names.txt | tr -d '\r' | tr '\t' ',' > renames.txt
echo ""
echo Renaming lexer symbols ...
cat $temp | trrename -R renames.txt | trsponge -c
echo ""
echo Unfold string literals into all parser rules ...
$full_path_script_dir/unfold-string-literals.sh $optx ${files[@]}
echo ""
echo Removing unused parentheses ...
$full_path_script_dir/delete-useless-parentheses.sh $optx ${files[@]}
echo ""
echo Done.
fi
rm -f renames.txt original_names.txt new_names.txt chars.txt $temp