Skip to content

Commit 43bed73

Browse files
committed
Rework project reports
Signed-off-by: phi-go <[email protected]> Signed-off-by: phi-go <[email protected]>
1 parent 2541882 commit 43bed73

File tree

4 files changed

+227
-59
lines changed

4 files changed

+227
-59
lines changed

tools/web-fuzzing-introspection/app/static/assets/db/web_db_creator_from_summary.py

+138-16
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,10 @@
2424
import subprocess
2525
import zipfile
2626
import tarfile
27+
import statistics
28+
from pathlib import Path
2729
from threading import Thread
28-
from typing import List, Any, Optional, Dict
30+
from typing import List, Any, Optional, Dict, Tuple, Set
2931

3032
import constants
3133
import oss_fuzz
@@ -34,7 +36,7 @@
3436
DB_JSON_ALL_PROJECT_TIMESTAMP = 'all-project-timestamps.json'
3537
DB_JSON_ALL_FUNCTIONS = 'all-functions-db-{PROJ}.json'
3638
DB_JSON_ALL_CONSTRUCTORS = 'all-constructors-db-{PROJ}.json'
37-
DB_JSON_ALL_CURRENT_FUNCS = 'all-project-current.json'
39+
DB_JSON_ALL_CURRENT = 'all-project-current.json'
3840
DB_JSON_ALL_BRANCH_BLOCKERS = 'all-branch-blockers.json'
3941
DB_BUILD_STATUS_JSON = 'build-status.json'
4042
#DB_RAW_INTROSPECTOR_REPORTS = 'raw-introspector-reports'
@@ -44,7 +46,7 @@
4446
DB_JSON_ALL_PROJECT_TIMESTAMP,
4547
DB_JSON_ALL_FUNCTIONS,
4648
DB_JSON_ALL_CONSTRUCTORS,
47-
DB_JSON_ALL_CURRENT_FUNCS,
49+
DB_JSON_ALL_CURRENT,
4850
]
4951

5052
INTROSPECTOR_WEBAPP_ZIP = (
@@ -53,6 +55,9 @@
5355
FI_EXCLUDE_ALL_NON_MUSTS = bool(int(os.getenv('FI_EXCLUDE_ALL_NON_MUSTS',
5456
'0')))
5557

58+
NUM_RECENT_DAYS = 30
59+
FUZZER_COVERAGE_IS_DEGRADED = 5 # 5% or more is a degradation
60+
5661
MUST_INCLUDES = set()
5762
MUST_INCLUDE_WITH_LANG: List[Any] = []
5863

@@ -896,11 +901,106 @@ def extend_db_timestamps(db_timestamp, output_directory):
896901
json.dump(existing_timestamps, f)
897902

898903

899-
def extend_db_json_files(project_timestamps, output_directory):
904+
def per_fuzzer_coverage_analysis(project_name: str,
905+
coverages: Dict[str, List[Tuple[int, str]]],
906+
lost_fuzzers):
907+
"""Go through the recent coverage results and combine them into a short summary.
908+
Including an assessment if the fuzzer got worse over time.
909+
"""
910+
911+
# TODO This might not be a good metric when coverage is not meaningful,
912+
# for example for very small projects or projects that have low coverage
913+
# already. Though, this might not be super bad as we are taking a look
914+
# at per fuzzer coverage, which is should already be normalized to what
915+
# can be reached.
916+
# TODO What would be a good percentage to mark as coverage degradation,
917+
# taking 5% for now but should be observed, maybe per it should be
918+
# configurable per project as well.
919+
results = {}
920+
for ff, data in coverages.items():
921+
if len(data) > 0:
922+
values = [dd[0] for dd in data]
923+
dates = [dd[1] for dd in data]
924+
latest_date_with_value = next(dd[1] for dd in reversed(data)
925+
if dd[0] is not None)
926+
if latest_date_with_value is not None:
927+
report_url = oss_fuzz.get_fuzzer_code_coverage_summary_url(
928+
project_name, latest_date_with_value.replace('-', ''), ff)
929+
report_url = report_url.removesuffix(
930+
'summary.json') + 'index.html'
931+
else:
932+
report_url = None
933+
max_cov = max(values[:-1], default=0)
934+
avg_cov = round(statistics.fmean(values), 2)
935+
current = values[-1]
936+
results[ff] = {
937+
'report_url': report_url,
938+
'report_date': latest_date_with_value,
939+
'coverages_values': values,
940+
'coverages_dates': dates,
941+
'max': max_cov,
942+
'avg': avg_cov,
943+
'current': current,
944+
'has_degraded':
945+
(max_cov - current) > FUZZER_COVERAGE_IS_DEGRADED,
946+
'got_lost': ff in lost_fuzzers,
947+
}
948+
return results
949+
950+
951+
def calculate_recent_results(projects_with_new_results, timestamps,
952+
num_days: int):
953+
"""Analyse recent project data to detect possible degradations of fuzzer efficiency."""
954+
from collections import defaultdict
955+
956+
data: Dict[str, Dict[str, Dict[str, Any]]] = defaultdict(dict)
957+
for pt in timestamps:
958+
project_name = pt['project_name']
959+
if project_name in projects_with_new_results:
960+
data[project_name][pt['date']] = pt
961+
962+
results = {}
963+
for project_name, project_data in data.items():
964+
fuzzers_past = set()
965+
fuzzers_current: Set[str] = set()
966+
per_fuzzer_coverages = defaultdict(list)
967+
968+
for do in (get_date_at_offset_as_str(ii)
969+
for ii in range(-num_days, 0, 1)):
970+
try:
971+
date_data = project_data[do]
972+
per_fuzzer_coverage_data = date_data[
973+
'per-fuzzer-coverage-data']
974+
975+
fuzzers_past |= fuzzers_current
976+
fuzzers_current = set(per_fuzzer_coverage_data.keys())
977+
978+
for ff, cov_data in per_fuzzer_coverage_data.items():
979+
try:
980+
perc = round(
981+
100 * cov_data['covered'] / cov_data['count'], 2)
982+
except:
983+
perc = 0
984+
985+
per_fuzzer_coverages[ff].append((perc, do))
986+
except:
987+
continue
988+
989+
fuzzer_diff = fuzzers_past - fuzzers_current
990+
per_fuzzer_coverages = per_fuzzer_coverage_analysis(
991+
project_name, per_fuzzer_coverages, fuzzer_diff)
992+
993+
results[project_name] = per_fuzzer_coverages
994+
995+
return results
996+
997+
998+
def extend_db_json_files(project_timestamps, output_directory,
999+
should_include_details):
9001000
"""Extends a set of DB .json files."""
9011001

9021002
existing_timestamps = []
903-
logging.info('Loading existing timestamps 1')
1003+
logging.info('Loading existing timestamps')
9041004
if os.path.isfile(
9051005
os.path.join(output_directory, DB_JSON_ALL_PROJECT_TIMESTAMP)):
9061006
with open(
@@ -919,10 +1019,11 @@ def extend_db_json_files(project_timestamps, output_directory):
9191019
existing_timestamp_mapping = dict()
9201020

9211021
for es in existing_timestamps:
922-
if not es['project_name'] in existing_timestamp_mapping:
1022+
if es['project_name'] not in existing_timestamp_mapping:
9231023
existing_timestamp_mapping[es['project_name']] = set()
9241024
existing_timestamp_mapping[es['project_name']].add(es['date'])
9251025

1026+
projects_with_new_results = set()
9261027
for new_ts in project_timestamps:
9271028
to_add = True
9281029

@@ -932,24 +1033,44 @@ def extend_db_json_files(project_timestamps, output_directory):
9321033
to_add = False
9331034
if to_add:
9341035
existing_timestamps.append(new_ts)
1036+
projects_with_new_results.add(new_ts['project_name'])
9351037
have_added = True
9361038

9371039
if FI_EXCLUDE_ALL_NON_MUSTS:
938-
new_timestamps = []
1040+
# Filter existing timstamps to to only those in MUST_INCLUDES.
1041+
kept_timestamps = []
9391042
for ts in existing_timestamps:
9401043
if ts['project_name'] in MUST_INCLUDES:
941-
new_timestamps.append(ts)
942-
existing_timestamps = new_timestamps
1044+
kept_timestamps.append(ts)
1045+
existing_timestamps = kept_timestamps
9431046

944-
new_project_stamps = []
1047+
# Also filter the current project results.
1048+
kept_project_stamps = []
9451049
for project_stamp in project_timestamps:
9461050
if project_stamp['project_name'] in MUST_INCLUDES:
947-
new_project_stamps.append(project_stamp)
948-
project_timestamps = new_project_stamps
1051+
kept_project_stamps.append(project_stamp)
1052+
project_timestamps = kept_project_stamps
1053+
1054+
if should_include_details:
1055+
recent_results = calculate_recent_results(projects_with_new_results,
1056+
existing_timestamps,
1057+
NUM_RECENT_DAYS)
1058+
# TODO these results might detect issues that should be communicated with
1059+
# project maintainers. The best approach might be to load the
1060+
# project_timestamps file (all-project-current.json)
1061+
# separately and load recent results there and maybe issue warnings.
1062+
for pt in project_timestamps:
1063+
try:
1064+
pt['recent_results'] = recent_results.get(pt['project_name'])
1065+
except Exception as exc:
1066+
logger.warning(
1067+
f'Could not get recent results for {pt["project_name"]}: {exc}'
1068+
)
1069+
else:
1070+
recent_results = None
9491071

950-
logging.info('Dumping all current projects')
951-
with open(os.path.join(output_directory, DB_JSON_ALL_CURRENT_FUNCS),
952-
'w') as f:
1072+
logging.info('Dumping current project data')
1073+
with open(os.path.join(output_directory, DB_JSON_ALL_CURRENT), 'w') as f:
9531074
json.dump(project_timestamps, f)
9541075

9551076
# Remove any light-introspector files because they should not be saved in the
@@ -1017,7 +1138,8 @@ def update_db_files(db_timestamp,
10171138
f.write(json.dumps(all_header_files))
10181139

10191140
logging.info('Extending DB json files')
1020-
extend_db_json_files(project_timestamps, output_directory)
1141+
extend_db_json_files(project_timestamps, output_directory,
1142+
should_include_details)
10211143

10221144
logging.info('Extending DB time stamps')
10231145
extend_db_timestamps(db_timestamp, output_directory)

tools/web-fuzzing-introspection/app/webapp/__init__.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,6 @@ def load_db() -> None:
6060
project_name=project_timestamp['project_name'],
6161
language=project_timestamp['language'],
6262
coverage_data=project_timestamp['coverage-data'],
63-
per_fuzzer_coverage_data=project_timestamp.get(
64-
'per-fuzzer-coverage-data', None),
6563
introspector_data=project_timestamp['introspector-data'],
6664
fuzzer_count=project_timestamp['fuzzer-count'],
6765
introspector_url=project_timestamp.get('introspector_url',
@@ -87,8 +85,9 @@ def load_db() -> None:
8785
introspector_data=project_timestamp['introspector-data'],
8886
fuzzer_count=project_timestamp['fuzzer-count'],
8987
project_repository=project_timestamp['project_repository'],
90-
light_analysis=project_timestamp.get('light-introspector',
91-
{})))
88+
light_analysis=project_timestamp.get('light-introspector', {}),
89+
recent_results=project_timestamp.get('recent_results'),
90+
))
9291

9392
introspector_data = project_timestamp.get('introspector-data', None)
9493
if introspector_data is None:

tools/web-fuzzing-introspection/app/webapp/models.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ def __init__(self, name: str, language: str, date: str,
3030
introspector_data: Optional[Dict[str,
3131
Any]], fuzzer_count: int,
3232
project_repository: Optional[str], light_analysis: Dict[Any,
33-
Any]):
33+
Any],
34+
recent_results: Optional[Dict[str, Any]]):
3435
self.name = name
3536
self.language = language
3637
self.date = date
@@ -39,9 +40,13 @@ def __init__(self, name: str, language: str, date: str,
3940
self.fuzzer_count = fuzzer_count
4041
self.project_repository = project_repository
4142
self.light_analysis = light_analysis
43+
self.recent_results = recent_results
4244

4345
def has_introspector(self) -> bool:
44-
return self.introspector_data != None
46+
return self.introspector_data is not None
47+
48+
def has_recent_results(self) -> bool:
49+
return self.recent_results is not None
4550

4651

4752
class DBTimestamp:
@@ -78,7 +83,6 @@ def __init__(self,
7883
date: str,
7984
language: str,
8085
coverage_data: Optional[Dict[str, Any]],
81-
per_fuzzer_coverage_data: Optional[Dict[str, Dict[str, Any]]],
8286
introspector_data: Optional[Dict[str, Any]],
8387
fuzzer_count: int,
8488
introspector_url: Optional[str] = None,
@@ -89,7 +93,6 @@ def __init__(self,
8993
self.date = date
9094
self.language = language
9195
self.coverage_data = coverage_data
92-
self.per_fuzzer_coverage_data = per_fuzzer_coverage_data
9396
self.introspector_data = introspector_data
9497
self.fuzzer_count = fuzzer_count
9598
self.introspector_url = introspector_url

0 commit comments

Comments
 (0)