-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathlang-stat
executable file
·139 lines (122 loc) · 3.28 KB
/
lang-stat
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#! /usr/bin/env bash
#
# Fetch git projects from a list and run our parser on the files for
# a language of interest.
#
set -eu -o pipefail
progname=$(basename "$0")
usage() {
cat <<EOF
Usage: $progname LANG PROJECTS_FILE EXTENSIONS_FILE
For each project specified by a git URL in PROJECTS_FILE, run the local
parser on all the files of the project whose extension matches one of the
extensions in EXTENSIONS_FILE.
Example: $progname projects.txt extensions.txt
EOF
}
error() {
echo "Error: $*" >&2
exit 1
}
[[ "${BASH_VERSION%%.*}" -ge 4 ]] || error "requires bash >= 4"
if [[ $# != 3 ]]; then
usage 2>&1
exit 1
fi
lang="$1"
projects_file="$2"
extensions_file="$3"
# Read lines into array after removing comments and blank lines.
readarray -t urls < <(grep -v '^ *\(#\| *$\)' "$projects_file")
readarray -t extensions < <(grep -v '^ *\(#\| *$\)' "$extensions_file")
csv=stat.csv
tmp=stat.tmp
failed_inputs=stat.fail
# CSV header
cat > "$csv" <<EOF
Name,URL,Files,"Failed files",Lines,"Failed lines"
EOF
parser=$(pwd)/ocaml-src/_build/install/default/bin/parse-"$lang"
test -x "$parser" || error "Missing parser '$parser'."
parse() {
(
cd "$proj_workspace"
json=out.json
cst=out.cst
"$parser" "$input" > /dev/null
) > "$proj_workspace"/parse.log 2>&1
}
handle_project() {
name=$(basename "${url%.git}")
proj_workspace="$tmp"/"$name"
mkdir -p "$proj_workspace"
(
cd "$proj_workspace"
rm -f inputs
if [[ ! -d "$name" ]]; then
echo "Clone '$name' from '$url'."
git clone --depth 1 "$url" "$name"
else
echo "Use local git repo for '$name'."
origin_url=$(git -C "$name" remote get-url origin)
if [[ "$url" != "$origin_url" ]]; then
cat >&2 <<EOF
Wrong remote URL found in cloned repository '$name':
found $origin_url
expected $url
Check that you don't have two project URLs with the same repo name.
EOF
exit 1
fi
fi
for ext in "${extensions[@]}"; do
if [[ -n "$ext" ]]; then
find "$name" -name "*$ext" >> inputs
fi
done
)
readarray -t inputs < <(cat "$proj_workspace"/inputs)
num_files=${#inputs[@]}
echo "Found $num_files files."
errors=0
line_count=0
error_line_count=0
for input in "${inputs[@]}"; do
echo -n "$input"
file_line_count=$(wc -l "$proj_workspace/$input" | cut -f1 -d' ')
echo -n " ($file_line_count lines)"
line_count=$(( line_count + file_line_count ))
if parse; then
echo " OK"
else
error_line_count=$(( error_line_count + file_line_count ))
echo "$proj_workspace/$input" >> "$failed_inputs"
errors=$(( errors + 1 ))
echo " ERROR"
fi
done
global_line_count=$(( global_line_count + line_count ))
global_error_line_count=$(( global_error_line_count + error_line_count ))
echo "$name,$url,$num_files,$errors,$line_count,$error_line_count" >> "$csv"
}
mkdir -p "$tmp"
rm -f "$failed_inputs"
global_line_count=0
global_error_line_count=0
for url in "${urls[@]}"; do
handle_project 2>&1 || echo "Failed on $url" 2>&1 \
| tee "$tmp"/lang-stat.log
done
line_coverage=$(awk -f - <<EOF
BEGIN {
printf "%.3f%%\n",
100 * (1 - $global_error_line_count / $global_line_count)
}
EOF
)
cat <<EOF
Failed inputs: $failed_inputs
Result file: $csv
Line count: $global_line_count
Line coverage: $line_coverage
EOF