55
66SINCE=" ${SINCE:- 12h} "
77ENVS=" ${ENVS:- community-tc fx-ci} "
8- LIMIT=200
8+ LIMIT=500
99TMPDIR_BASE=$( mktemp -d)
1010trap ' rm -rf "$TMPDIR_BASE"' EXIT
1111
@@ -39,19 +39,32 @@ query_env() {
3939 --since " $SINCE " --limit " $LIMIT " \
4040 > " $dir /api_403.jsonl" &
4141
42+ query -e " $env " --type worker-requested --since " $SINCE " --limit " $LIMIT " \
43+ > " $dir /worker_requested.jsonl" &
44+
4245 query -e " $env " --type worker-stopped --since " $SINCE " --limit " $LIMIT " \
4346 > " $dir /worker_stopped.jsonl" &
4447
45- query -e " $env " \
48+ # Claim-expired tasks (task-exception from claim-resolver)
49+ query -e " $env " --type task-exception \
4650 --filter ' jsonPayload.Logger="taskcluster.queue.claim-resolver"' \
4751 --since " $SINCE " --limit " $LIMIT " \
4852 > " $dir /claim_expired.jsonl" &
4953
50- query -e " $env " \
54+ # Deadline-exceeded tasks (task-exception from deadline-resolver)
55+ query -e " $env " --type task-exception \
5156 --filter ' jsonPayload.Logger="taskcluster.queue.deadline-resolver"' \
5257 --since " $SINCE " --limit " $LIMIT " \
5358 > " $dir /deadline_exceeded.jsonl" &
5459
60+ # Worker-manager periodic loop failures
61+ for loop in workerScanner workerScannerAzure provisioner; do
62+ query -e " $env " --type monitor.periodic \
63+ --where " name=$loop " --where ' status!=success' \
64+ --since " $SINCE " --limit " $LIMIT " \
65+ > " $dir /periodic_${loop} .jsonl" &
66+ done
67+
5568 wait
5669}
5770
@@ -112,21 +125,29 @@ summarize_env() {
112125 local nce; nce=$( count " $dir /claim_expired.jsonl" )
113126 echo " "
114127 echo " CLAIM-EXPIRED ($nce tasks)"
115- # taskQueueId not present in resolver logs; show taskId sample instead
116128 if [[ $nce -gt 0 ]]; then
117- echo " Sample task IDs:"
118- jq -r ' .taskId // "?"' " $dir /claim_expired.jsonl" 2> /dev/null | head -5 \
119- | awk ' {print " " $0}' || true
129+ echo " By task queue:"
130+ top_n " $dir /claim_expired.jsonl" ' .taskQueueId // "unknown"'
120131 fi
121132
122133 # Deadline-exceeded
123134 local nde; nde=$( count " $dir /deadline_exceeded.jsonl" )
124135 echo " "
125136 echo " DEADLINE-EXCEEDED ($nde tasks)"
126137 if [[ $nde -gt 0 ]]; then
127- echo " Sample task IDs:"
128- jq -r ' .taskId // "?"' " $dir /deadline_exceeded.jsonl" 2> /dev/null | head -5 \
129- | awk ' {print " " $0}' || true
138+ echo " By task queue:"
139+ top_n " $dir /deadline_exceeded.jsonl" ' .taskQueueId // "unknown"'
140+ fi
141+
142+ # Workers requested
143+ local nwr; nwr=$( count " $dir /worker_requested.jsonl" )
144+ echo " "
145+ echo " WORKERS REQUESTED ($nwr total)"
146+ if [[ $nwr -gt 0 ]]; then
147+ echo " By pool (top 8):"
148+ top_n " $dir /worker_requested.jsonl" ' .workerPoolId // "unknown"' 8
149+ echo " By provider:"
150+ top_n " $dir /worker_requested.jsonl" ' .providerId // "unknown"'
130151 fi
131152
132153 # Worker stopped
@@ -140,10 +161,29 @@ summarize_env() {
140161 top_n " $dir /worker_stopped.jsonl" ' .reason // "(no reason)"' 8
141162 fi
142163
164+ # Worker-manager periodic loop failures
165+ echo " "
166+ echo " PERIODIC LOOP FAILURES"
167+ local any_periodic_fail=0
168+ for loop in workerScanner workerScannerAzure provisioner; do
169+ local f=" $dir /periodic_${loop} .jsonl"
170+ local nfail; nfail=$( count " $f " )
171+ if [[ $nfail -eq 0 ]]; then continue ; fi
172+ any_periodic_fail=1
173+ echo " $loop : ${nfail} failures"
174+ echo " By status:"
175+ top_n " $f " ' .status // "unknown"'
176+ echo " Max duration:"
177+ local max_dur; max_dur=$( jq -r ' .duration // 0' " $f " 2> /dev/null | sort -n | tail -1)
178+ echo " ${max_dur} s"
179+ done
180+ [[ $any_periodic_fail -eq 0 ]] && echo " ✓ All loops healthy (no non-success runs)"
181+
143182 # Limit warnings
144183 echo " "
145184 local warned=0
146- for f in errors api_500 api_403 claim_expired deadline_exceeded worker_stopped; do
185+ for f in errors api_500 api_403 claim_expired deadline_exceeded worker_requested worker_stopped \
186+ periodic_workerScanner periodic_workerScannerAzure periodic_provisioner; do
147187 local c; c=$( count " $dir /$f .jsonl" )
148188 if [[ " $c " -ge " $LIMIT " ]]; then
149189 echo " ⚠ $f hit limit ($LIMIT ) — results truncated, tighten filters or reduce --since"
0 commit comments