-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetthelogs
executable file
·161 lines (150 loc) · 5.02 KB
/
getthelogs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/bin/bash -xxx
# Source http://git.openstack.org/cgit/openstack-infra/tripleo-ci/tree/scripts/getthelogs
# but the source is no longer maintainer or cared about
RETRY_LOCAL=${RETRY_LOCAL:-false}
set -eu -o pipefail
DROP="\b(ara|ara_oooq|docs|build|stackviz|conf\.modules\.d|\.git)\b"
NOT="log\.[0-9]|~lock~|md5"
function usage(){
echo "Helper script for downloading tripleo/rdo/osp/os-infra CI jobs logs"
echo "Downloads the logs and starts a shell from the logs root directory"
echo
echo "Example:"
echo "getthelogs http://logs.openstack.org/00/123456/7/check/gate-tripleo-ci-foo/d3adbeef"
echo
echo "Example 2 - Rebuild from local cache"
echo "(helpful to get the logs of another run of the same job - retaining the files structure):"
echo "RETRY_LOCAL=/tmp/entries.123456789O getthelogs http://logs.openstack.org/00/123456/8/check/gate-tripleo-ci-foo/d3adbeef"
echo
echo "Repeat prepared download jobs stored in a local file"
echo "RETRY_LOCAL=all getthelogs"
}
WORKERS=6
SSO="${SSO:-null}"
COOK=""
if [ "$SSO" != "null" ] ; then
curl --fail --negotiate -u: -b cookies -c cookies -L "$SSO" &> /dev/null
COOK="-b cookies -Ok"
fi
function getit(){
# Do not fail if something is missing / cannot be downloaded
set +e
cat /tmp/curl-jobs-shuf.txt | xargs -r -n1 -P ${1} -I{} sh -c "curl {} | ${2}"
set -e
}
function finish(){
rc=${rc:-$?}
echo "Processed URLs stored in $urls":
#cat $urls
trap - EXIT
cd $TDIR
echo "Download job exited ${rc}"
PS1="JOBLOGS ]\$ " bash --noprofile --norc
}
# args: url, [parent_entry,] [cached?]
function get_jobs(){
local lurls=$(mktemp "$fentries/urls.XXXXXXXXXX")
local entries
local e
local paths
if [ "${3:-}" ]; then
entries=$2
local prefix=$(echo ${url%/logs/*})
local postfix="logs/$(echo ${url##*/logs/})"
else
entries=$(mktemp "$fentries/entries.XXXXXXXXXX")
[ "${2:-}" ] && echo $2 > $entries # add to tree of cached entries
f=$(basename ${1%/})
if [ "x$COOK" = "x" ]; then
curl -sLk $COOK "$1" 2> /dev/null > "$f"
else
curl -sLk $COOK "${1%/}" 2> /dev/null
fi
cat "$f" | sed -r 's/></>\n</g' | $filter |\
grep -E '\[DIR|href=\S+\/?<' | grep -vE "$DROP|href=\"?http" |\
sed -e "s,.*href=[\"']\([^\"']*\)[\"'].*,${1}\1,g" |\
awk -F'index' {'print $1'} | tee -a $entries
fi
grep -q '^http' $entries || return
set +u
while read -r d; do
echo $d | grep -q '^http' || continue # ignore tree metadata
echo $d | grep -Eq '\.\.\/|\/\/$' && continue
echo "Processing URLs for $d"
if echo $d | grep -q /$; then # list directories via recursion
if [ "${3:-}" ]; then
# TODO: test read back through the tree entries
# allowing different URLs to prefix the known log files structure
e=$(head -1 $entries)
echo $e | grep -q '^http' && e=' ' # reached a local bottom
paths=$(grep -rl $e $(dirname $entries))
e="$e cached"
else
# grow the tree
e=$entries
fi
cat $lurls | grep -q "${d%*/}/ " || get_jobs "${d%*/}/" $e
else # also list files
grep -q "$d" $urls && continue
grep -q "$d" $lurls && continue
cat $lurls | grep -q "$d " || echo "$d" >> $lurls
fi
done < <(cat -- $entries)
set -u
cat $lurls >> $urls
}
if [ "$RETRY_LOCAL" = "all" ]; then
getit $WORKERS cat
exit 0
fi
[[ "${1:--}" =~ ^\s+?- ]] && (usage; exit 1)
trap finish EXIT SIGINT SIGTERM
urls=$(mktemp -t tmp.XXXXXXXXXX)
if [ "$RETRY_LOCAL" = "false" ]; then
fentries=$(mktemp -d -t entries-XXXXXXXXXX)
else
fentries=$RETRY_LOCAL
fi
BASEURL=${1%/}
SC=$(dirname $BASEURL | grep -o \/ | wc -w)
TDIR=${BASEURL##*http://}
TDIR=${TDIR##*https://}
TDIR=/tmp/${TDIR}
mkdir -p "$TDIR"
cd "$TDIR"
echo "Target dir for download: $TDIR"
echo Will download logs from the following URLs:
filter="cat"
if [ "x$COOK" = "x" ]; then
curl -sIk $COOK "$BASEURL/" | grep -qi 'content-encoding: gzip' && filter="zcat"
else
curl -sIk $COOK "$BASEURL"
f=$(basename "$BASEURL")
grep -qi 'content-encoding: gzip' "$f" && filter="zcat"
fi
if [ "$RETRY_LOCAL" = "false" ]; then
get_jobs "$BASEURL/"
else
# rebuild urls descending the tree (top is the oldest entry dir)
# TODO: it must traverse all paths to all bottoms...
get_jobs "$BASEURL/" $(ls -t1 "${fentries}/entries.*" | tail -1) cached
fi
rm -f /tmp/curl-jobs.txt
while read -r d; do
echo $d | grep -q '^http' || continue
echo $d | grep -Eq "$NOT" && continue
mkdir -p $(dirname "${d##*logs/}") ||:
echo "-Lfk ${COOK} ${d} -o $(dirname ${d##*logs/})/$(basename ${d##*logs/})" >> /tmp/curl-jobs.txt
done < <(cat -- $urls)
cat /tmp/curl-jobs.txt | sort -u >> /tmp/curl-jobs_.txt
cat /tmp/curl-jobs_.txt | shuf > /tmp/curl-jobs-shuf.txt
rm -f /tmp/curl-jobs_.txt
getit $WORKERS $filter
set +e
cat /tmp/curl-jobs-shuf.txt | xargs -r -n1 -P ${WORKERS} -I{} sh -c "curl {} | $filter"
set -e
if [ "$filter" = "cat" ]; then
find . -type f -name "*.gz" | xargs -r -n1 gunzip
else
find . -type f -name "*.*" | xargs -r -n1 -I{} bash -c 'mv -f "{}" "{}_" && zcat "{}_" > "{}" && rm -f "{}_"'
fi