-
Notifications
You must be signed in to change notification settings - Fork 863
/
Copy pathk8s-infra-diff-buckets.sh
executable file
·126 lines (106 loc) · 3.45 KB
/
k8s-infra-diff-buckets.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/usr/bin/env bash
# Copyright 2021 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Diff the listings of two bucket paths
set -o errexit
set -o nounset
set -o pipefail
# common vars
script_name="$(basename "${BASH_SOURCE[0]%.*}")"
readonly script_name
# setup TMPDIR; if DEBUG, use consistent dir to cache, ls instead of rm on exit
function setup_tmpdir() {
if [ "${DEBUG:-"false"}" == "true" ]; then
TMPDIR="/tmp/${script_name}.$(echo "${@}" | sha1sum | cut -c1-7)"
mkdir -p "${TMPDIR}"
trap 'echo; find "${TMPDIR}" -type f -exec ls -l {} + | sed -e "s/^/# /"' EXIT
else
TMPDIR="$(mktemp -d "/tmp/${script_name}.XXX")"
trap 'rm -rf "${TMPDIR}"' EXIT
fi
readonly TMPDIR
}
function iso_dt() {
date +%Y-%m-%dT%H:%M:%S
}
function log() {
echo "# $(iso_dt) - ${*}"
}
function main() {
# usage
if [ $# -lt 2 ]; then
>&2 echo "usage: ${script_name} gs://old-bucket/path gs://new-bucket/path [exclude_regex]"
exit 1
fi
local old="${1}"
local new="${2}"
local exclude_regex="${3:-'^$'}"
# ensure buckets are of form gs://foo
for var in old new; do
v=${!var}; if [ "${v:0:5}" != "gs://" ]; then declare ${var}="gs://${v}"; fi
done
# get into a working directory
setup_tmpdir "${old}" "${new}"
pushd "${TMPDIR}" >/dev/null
# get listings, sort for benefit of comm and diff below
for var in old new; do
f="${var}.raw.txt"
v="${!var}"
if ! [ -f "${f}" ]; then
log "listing ${v}"
gsutil ls "${v}" | sed -e "s|${v%%\*}||" | sort > "${f}"
fi
done
log "filtering to exclude lines matching '${exclude_regex}'"
for var in old new; do
<"${var}.raw.txt" grep -E -v "${exclude_regex}" >"${var}.txt"
done
# NB: either is computed at end with fewer lines for speed
log "computing lines in common, only old, only new, and either"
comm -12 {old,new}.txt >common.txt
comm -23 {old,new}.txt >only.old.txt
comm -13 {old,new}.txt >only.new.txt
cat {common,only.{old,new}}.txt | sort >either.txt
log "computing counts of lines in common, only old, only new, and either"
for var in either common only_old only_new; do
v=$(printf "%d" "$(<${var/_/.}.txt wc -l)")
declare num_${var}="${v}"
done
# setup max padding for summarize below
len="${#old}"
if [ "${#old}" -lt "${#new}" ]; then len="${#new}"; fi
len=$((len + 9))
# setup max count for summarize below
total=${num_either:?}
function summarize() {
local var="num_${1}" description="${2}"
local v pct
v="${!var}"
pct="$(bc -l <<< "100*${v}/${total}")"
printf "# %-${len}s : %6d (%5.1f%%)\n" "${description}" "${v}" "${pct}"
}
echo
(
echo "# date: $(iso_dt)"
echo "# old: ${old}"
echo "# new: ${new}"
echo "# exclude_regex: ${exclude_regex}"
echo "# summary:"
summarize "either" "total (in either)"
summarize "common" "common (in both)"
summarize "only_old" "only in ${old}"
summarize "only_new" "only in ${new}"
) | tee summary.txt
}
main "$@"