Skip to content

Commit 39c8aa6

Browse files
authored
Add a summary of the most often failing tests to the daily build report (#1351)
* Add an option to the build status script to summarize by test. * Add to workflow. * Add to workflow. * Change formatting, and include crash/timeout in the list. * Add an option of whether to include crashes in the list. * Add column for whether is error/flake. * Reformat results to fit in a proper table, and include the latest date. * Rephrase slightly. * Change formatting of logs list. * Formatting. * Link to code search for specific tests named.
1 parent b186c24 commit 39c8aa6

File tree

2 files changed

+165
-16
lines changed

2 files changed

+165
-16
lines changed

.github/workflows/build-report.yml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ env:
99
GITHUB_TOKEN: ${{ github.token }}
1010
numDays: 7
1111
numDaysExtended: 30
12+
numTestsSummary: 10
1213

1314
jobs:
1415
generate-report:
@@ -47,6 +48,7 @@ jobs:
4748
run: |
4849
python3 scripts/gha/report_build_status.py --token ${{ github.token }} --days ${{ env.numDays }} --output_markdown --read_cache build_status_short.cache > report_short.md
4950
python3 scripts/gha/report_build_status.py --token ${{ github.token }} --days ${{ env.numDaysExtended }} --output_markdown --read_cache build_status.cache > report_extended.md
51+
python3 scripts/gha/report_build_status.py --token ${{ github.token }} --days ${{ env.numDaysExtended }} --output_markdown --read_cache build_status.cache --report=test_summary --summary_count=${{ env.numTestsSummary }} > test_summary.md
5052
python3 scripts/gha/report_build_status.py --token ${{ github.token }} --days ${{ env.numDays }} --nooutput_header --read_cache build_status_short.cache > report.txt
5153
- name: Generate comment string
5254
run: |
@@ -56,20 +58,26 @@ jobs:
5658
cat report_short.md >> comment.md
5759
cat >> comment.md <<EOF
5860
59-
<details><summary>View extended history</summary>
61+
<details><summary>View extended history (last ${{ env.numDaysExtended }} days)</summary>
6062
6163
EOF
6264
cat report_extended.md >> comment.md
6365
cat >> comment.md <<EOF
6466
</details>
67+
<details><summary>Top ${{ env.numTestsSummary }} flakes/failures (last ${{ env.numDaysExtended }} days)</summary>
68+
69+
EOF
70+
cat test_summary.md >> comment.md
71+
cat >> comment.md <<EOF
72+
</details>
6573
<details><summary>📄</summary><pre>
6674
EOF
6775
cat report.txt >> comment.md
6876
cat >> comment.md <<EOF
6977
</pre></details>
7078
7179
***
72-
80+
7381
EOF
7482
- name: Show comment string
7583
run: |

scripts/gha/report_build_status.py

Lines changed: 155 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,23 @@
9595
"read_cache", None,
9696
"Read a cache file that was written by a previous run via --write_cache.")
9797

98+
flags.DEFINE_enum(
99+
"report", "daily_log",
100+
["daily_log", "test_summary"],
101+
"Choose whether to report a daily build/test log or a summary of failing and flaky tests.")
102+
103+
flags.DEFINE_integer(
104+
"summary_count", 10,
105+
"If --report=test_summary, how many of the top tests to show.")
106+
107+
flags.DEFINE_enum(
108+
"summary_type", "all", ["all", "errors", "flakes"],
109+
"Whether to include flakes, errors, or all in the test summary.")
110+
111+
flags.DEFINE_bool(
112+
"summary_include_crashes", True,
113+
"Whether to include CRASH/TIMEOUT in the test summary.")
114+
98115
_WORKFLOW_TESTS = 'integration_tests.yml'
99116
_WORKFLOW_PACKAGING = 'cpp-packaging.yml'
100117
_TRIGGER_USER = 'firebase-workflow-trigger[bot]'
@@ -142,13 +159,14 @@ def analyze_log(text, url):
142159
"""Do a simple analysis of the log summary text to determine if the build
143160
or test succeeded, flaked, or failed.
144161
"""
162+
if not text: text = ""
145163
build_status = decorate_url(_PASS_TEXT, url)
146164
test_status = decorate_url(_PASS_TEXT, url)
147-
if '[BUILD] [ERROR]' in text:
165+
if '[BUILD] [ERROR]' in text or '[BUILD] [FAILURE]' in text:
148166
build_status = decorate_url(_FAILURE_TEXT, url)
149167
elif '[BUILD] [FLAKINESS]' in text:
150168
build_status =decorate_url(_FLAKY_TEXT, url)
151-
if '[TEST] [ERROR]' in text:
169+
if '[TEST] [ERROR]' in text or '[TEST] [FAILURE]' in text:
152170
test_status = decorate_url(_FAILURE_TEXT, url)
153171
elif '[TEST] [FLAKINESS]' in text:
154172
test_status = decorate_url(_FLAKY_TEXT, url)
@@ -224,15 +242,20 @@ def format_errors(all_errors, severity, event):
224242
return final_text
225243

226244

227-
def create_notes(text):
228-
"""Combine the sets of errors into a single string.
229-
"""
230-
if not text: return ''
231-
errors = {}
245+
def aggregate_errors_from_log(text, debug=False):
246+
if not text: return {}
232247
text += '\n'
248+
errors = {}
233249
lines = text.split('\n')
234250
current_product = None
251+
event = None
252+
severity = None
253+
platform = None
254+
other = None
255+
product = None
256+
235257
for line in lines:
258+
if debug: print(line)
236259
if not current_product:
237260
m = re.search(r'^([a-z_]+):', line)
238261
if m:
@@ -243,11 +266,12 @@ def create_notes(text):
243266
current_product = None
244267
else:
245268
m = re.search(
246-
r'\[(BUILD|TEST)\] \[(ERROR|FLAKINESS)\] \[([a-zA-Z]+)\] (\[.*\])',
269+
r'\[(BUILD|TEST)\] \[(ERROR|FAILURE|FLAKINESS)\] \[([a-zA-Z]+)\] (\[.*\])',
247270
line)
248271
if m:
249272
event = m.group(1)
250273
severity = m.group(2)
274+
if severity == "FAILURE": severity = "ERROR"
251275
platform = m.group(3)
252276
other = m.group(4)
253277
product = current_product
@@ -259,8 +283,24 @@ def create_notes(text):
259283
if product not in errors[severity][event]:
260284
errors[severity][event][product] = {}
261285
if platform not in errors[severity][event][product]:
262-
errors[severity][event][product][platform] = set()
263-
errors[severity][event][product][platform].add(other)
286+
errors[severity][event][product][platform] = {}
287+
errors[severity][event][product][platform]['description'] = set()
288+
errors[severity][event][product][platform]['test_list'] = set()
289+
errors[severity][event][product][platform]['description'].add(other)
290+
else:
291+
m2 = re.search(r"failed tests: \[\'(.*)\'\]", line)
292+
if m2:
293+
test_list = m2.group(1).split("', '")
294+
for test_name in test_list:
295+
errors[severity][event][product][platform]['test_list'].add(test_name)
296+
return errors
297+
298+
299+
def create_notes(text, debug=False):
300+
"""Combine the sets of errors into a single string.
301+
"""
302+
if not text: return ''
303+
errors = aggregate_errors_from_log(text, debug)
264304

265305
log_items = []
266306
text = format_errors(errors, 'ERROR', 'BUILD')
@@ -470,8 +510,10 @@ def main(argv):
470510
prev_notes = ''
471511
last_good_day = None
472512

513+
output = ""
514+
473515
if FLAGS.output_markdown:
474-
print("### Testing History (last %d days)\n" % len(all_days))
516+
output += "### Testing History (last %d days)\n\n" % len(all_days)
475517

476518
table_fields = (
477519
["Date"] +
@@ -493,9 +535,9 @@ def main(argv):
493535
table_row_fmt = row_prefix + row_separator.join(["%s" for f in table_fields]) + row_suffix
494536

495537
if FLAGS.output_header:
496-
print(table_header_string)
538+
output += table_header_string + "\n"
497539
if FLAGS.output_markdown:
498-
print(table_row_fmt.replace("%s", "---").replace(" ", ""))
540+
output += table_row_fmt.replace("%s", "---").replace(" ", "") + "\n"
499541

500542
days_sorted = sorted(all_days)
501543
if FLAGS.reverse: days_sorted = reversed(days_sorted)
@@ -534,7 +576,106 @@ def main(argv):
534576
package_tests_log[1],
535577
notes]
536578
)
537-
print(table_row_fmt % tuple(table_row_contents))
579+
output += (table_row_fmt % tuple(table_row_contents)) + "\n"
580+
581+
if FLAGS.report == "daily_log":
582+
print(output)
583+
elif FLAGS.report == "test_summary":
584+
test_list = {}
585+
for day in days_sorted:
586+
if source_tests[day]['log_results']:
587+
errors = aggregate_errors_from_log(source_tests[day]['log_results'])
588+
test_link = source_tests[day]['html_url']
589+
elif package_tests[day]['log_results']:
590+
errors = aggregate_errors_from_log(package_tests[day]['log_results'])
591+
test_link = package_tests[day]['html_url']
592+
else:
593+
continue
594+
595+
sev_list = []
596+
if FLAGS.summary_type == "all" or FLAGS.summary_type == "flakes":
597+
sev_list.append('FLAKINESS')
598+
if FLAGS.summary_type == "all" or FLAGS.summary_type == "errors":
599+
sev_list.append('ERROR')
600+
for sev in sev_list:
601+
if sev in errors and 'TEST' in errors[sev]:
602+
test_entries = errors[sev]['TEST']
603+
for product, platform_dict in test_entries.items():
604+
if product == "missing_log":
605+
continue
606+
platforms = list(platform_dict.keys())
607+
for platform in platforms:
608+
test_names = list(test_entries[product][platform]['test_list'])
609+
if not test_names:
610+
test_names = ['Unspecified test']
611+
for test_name in test_names:
612+
if test_name == "CRASH/TIMEOUT":
613+
if not FLAGS.summary_include_crashes: continue
614+
else: test_name = "Crash or timeout"
615+
test_id = "%s | %s | %s | %s" % (sev.lower(), product, platform, test_name)
616+
if test_id not in test_list:
617+
test_list[test_id] = {}
618+
test_list[test_id]['count'] = 0
619+
test_list[test_id]['links'] = []
620+
test_list[test_id]['count'] += 1
621+
test_list[test_id]['links'].append(test_link)
622+
test_list[test_id]['latest'] = day
623+
624+
test_list_sorted = reversed(sorted(test_list.keys(), key=lambda x: test_list[x]['count']))
625+
if FLAGS.output_header:
626+
if FLAGS.output_markdown:
627+
print("| # | Latest | Product | Platform | Test Info |")
628+
print("|---|---|---|---|---|")
629+
else:
630+
print("Count\tLatest\tSeverity\tProduct\tPlatform\tTest Name")
631+
632+
num_shown = 0
633+
634+
for test_id in test_list_sorted:
635+
(severity, product, platform, test_name) = test_id.split(" | ")
636+
days_ago = (dateutil.utils.today() - dateutil.parser.parse(test_list[test_id]['latest'])).days
637+
if days_ago <= 0:
638+
latest = "Today"
639+
else:
640+
latest = "%s day%s ago" % (days_ago, '' if days_ago == 1 else 's')
641+
if FLAGS.output_markdown:
642+
if severity == "error":
643+
severity = "(failure)"
644+
elif severity == "flakiness":
645+
severity = "(flaky)"
646+
latest = latest.replace(" ", "&nbsp;")
647+
product = product.replace("_", " ")
648+
product = product.upper() if product == "gma" else product.title()
649+
if len(test_list[test_id]['links']) > 0:
650+
latest = "[%s](%s)" % (latest, test_list[test_id]['links'][-1])
651+
652+
link_list = []
653+
seen = set()
654+
num = 1
655+
656+
for link in test_list[test_id]['links']:
657+
if link not in seen:
658+
seen.add(link)
659+
link_list.append("[%d](%s)" % (num, link))
660+
num += 1
661+
# If test_name looks like FirebaseSomethingTest.Something, link it to code search.
662+
m = re.match(r"(Firebase[A-Za-z]*Test)\.(.*)", test_name)
663+
if m:
664+
search_url = "http://github.com/search?q=repo:firebase/firebase-cpp-sdk%%20\"%s,%%20%s\"" % (m.group(1), m.group(2))
665+
test_name_str = "[%s](%s)" % (test_name, search_url)
666+
else:
667+
test_name_str = test_name
668+
669+
print("| %d | %s | %s | %s | %s&nbsp;%s<br/>&nbsp;&nbsp;&nbsp;Logs: %s |" % (
670+
test_list[test_id]['count'], latest,
671+
product, platform,
672+
test_name_str, severity, " ".join(link_list)))
673+
else:
674+
print("%d\t%s\t%s\t%s\t%s\t%s" % (test_list[test_id]['count'], latest, severity, product, platform, test_name))
675+
num_shown += 1
676+
if num_shown >= FLAGS.summary_count:
677+
break
678+
538679

539680
if __name__ == "__main__":
540681
flags.mark_flag_as_required("token")

0 commit comments

Comments
 (0)