-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathgen-ocaml
executable file
·245 lines (212 loc) · 5.64 KB
/
gen-ocaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
#! /usr/bin/env bash
#
# Generate OCaml parsers
#
# For lang=ruby, the file hierarchy we create looks like this:
#
# ocaml-src
# ├── bin
# │ ├── dune
# │ └── Main.ml
# ├── lib
# │ ├── bindings.c
# │ ├── Boilerplate.ml
# │ ├── CST.ml
# │ ├── dune
# │ ├── Parse.ml
# │ ├── Parse.mli
# │ ├── parser.c
# │ ├── scanner.cc
# │ └── tree_sitter
# │ └── parser.h
# └── tree-sitter-ruby.opam
#
set -eu -o pipefail
project_root=$(git rev-parse --show-toplevel)
# The ocaml-tree-sitter executable and the runtime library are assumed to
# to have been installed with 'make install'
#
ocaml_tree_sitter=$(which ocaml-tree-sitter)
default_dst_dir="ocaml-src"
default_src_dir="src"
default_lang="lang"
usage() {
cat <<EOF
Usage: $(basename "$0") [OPTIONS]
Call ocaml-tree-sitter to derive an OCaml parsing library and executable
from a tree-sitter grammar in json format, normally generated by
tree-sitter as 'src/grammar.json'.
Options:
--dst DST_DIR
Specify the output directory. Default: $default_dst_dir
--src SRC_DIR
Location of the 'src' folder with some of its contents generated
by tree-sitter. It must contain 'grammar.json', 'parser.c',
and optionally other C files ('scanner.c' or 'scanner.cc') needed
to build the C parser. Default: $src_dir
--help
Show this help message and exit.
--lang NAME
Name of the programming language. It will be part of the name
of the library and of the OCaml module. Case conversion and conversions
between dashes and underscores will take place as needed.
Default: $default_lang
--trace
Print debugging info during parsing.
EOF
}
error() {
cat >&2 <<EOF
Error: $*
EOF
exit 1
}
test -x "$ocaml_tree_sitter" || error "missing executable $ocaml_tree_sitter"
lang="$default_lang"
src_dir="$default_src_dir"
dst_dir="$default_dst_dir"
trace_option=()
while [[ $# -gt 0 ]]; do
case "$1" in
--dst)
dst_dir="$2"
shift
;;
--help)
usage
exit 0
;;
--lang)
lang="$2"
shift
;;
--src)
src_dir="$2"
shift
;;
--trace)
trace_option="--trace"
;;
*)
error "Unsupported argument: $1"
esac
shift
done
lang_dashes=$(echo "$lang" | tr 'A-Z_' 'a-z-')
lang_underscores=$(echo "$lang" | tr 'A-Z-' 'a-z_')
# Copy what we need to the ocaml-src folder. We need at least to copy
# the C source and headers.
#
rm -rf "$dst_dir"
mkdir -p "$dst_dir"/lib
# Build the lists of C and C++ files to compile, without their extension.
#
c_files="parser bindings"
cxx_files=""
cp "$src_dir"/parser.c "$dst_dir"/lib
if [[ -e "$src_dir"/scanner.c ]]; then
cp "$src_dir"/scanner.c "$dst_dir"/lib
c_files="scanner $c_files"
fi
if [[ -e "$src_dir"/scanner.cc ]]; then
cp "$src_dir"/scanner.cc "$dst_dir"/lib
cxx_files="scanner"
fi
# Copy C header files
#
shopt -s nullglob
for x in "$src_dir"/*.h; do cp "$x" "$dst_dir"/lib; done
cp -a "$src_dir"/tree_sitter "$dst_dir"/lib/tree_sitter
# Generate the OCaml code needed to parse the examples/*.out json files.
#
"$ocaml_tree_sitter" \
"$lang_underscores" \
"$src_dir"/grammar.json \
-d "$dst_dir" \
"${trace_option[@]}"
cat > "$dst_dir"/lib/bindings.c <<EOF
/*
Generated by ocaml-tree-sitter for $lang_underscores.
*/
#include <string.h>
#include <tree_sitter/api.h>
#include <caml/alloc.h>
#include <caml/bigarray.h>
#include <caml/callback.h>
#include <caml/custom.h>
#include <caml/memory.h>
#include <caml/mlvalues.h>
#include <caml/threads.h>
// Implemented by parser.c
TSLanguage *tree_sitter_${lang_underscores}();
typedef struct _parser {
TSParser *parser;
} parser_W;
static void finalize_parser(value v) {
parser_W *p;
p = (parser_W *)Data_custom_val(v);
ts_parser_delete(p->parser);
}
static struct custom_operations parser_custom_ops = {
.identifier = "parser handling",
.finalize = finalize_parser,
.compare = custom_compare_default,
.hash = custom_hash_default,
.serialize = custom_serialize_default,
.deserialize = custom_deserialize_default
};
// OCaml function
CAMLprim value octs_create_parser_${lang_underscores}(value unit) {
CAMLparam0();
CAMLlocal1(v);
parser_W parserWrapper;
TSParser *parser = ts_parser_new();
parserWrapper.parser = parser;
v = caml_alloc_custom(&parser_custom_ops, sizeof(parser_W), 0, 1);
memcpy(Data_custom_val(v), &parserWrapper, sizeof(parser_W));
ts_parser_set_language(parser, tree_sitter_${lang_underscores}());
CAMLreturn(v);
};
EOF
cat > "$dst_dir"/lib/dune <<EOF
(library
(public_name tree-sitter-lang.${lang_dashes})
(name tree_sitter_${lang_underscores})
(preprocess (pps ppx_sexp_conv))
(libraries atdgen-runtime tree-sitter.run)
; A copy of the C headers for the tree-sitter library is found locally.
; This is because it's important to use the right version of 'parser.h'.
;
(foreign_stubs
(language c)
(names ${c_files})
(flags -std=c99
-fPIC
-I .)
)
; TREESITTER_LIBDIR can be set to its correct value using pkg-config:
; pkg-config --libs-only-L tree-sitter | sed -e 's/^-L//'
;
(c_library_flags (-L%{env:TREESITTER_LIBDIR=/usr/local/lib}
-lstdc++
-ltree-sitter)
)
(foreign_stubs
(language cxx)
(names ${cxx_files})
(flags -fPIC
-I .)
)
)
EOF
cat > "$dst_dir"/bin/dune <<EOF
(executable
(package tree-sitter-lang)
(public_name parse-${lang_dashes})
(name Main)
(libraries tree-sitter-lang.${lang_dashes})
)
EOF
# This is needed for building with dune.
#
touch "$dst_dir"/tree-sitter-lang.opam