24
24
import subprocess
25
25
import zipfile
26
26
import tarfile
27
+ import statistics
28
+ from pathlib import Path
27
29
from threading import Thread
28
- from typing import List , Any , Optional , Dict
30
+ from typing import List , Any , Optional , Dict , Tuple , Set
29
31
30
32
import constants
31
33
import oss_fuzz
34
36
DB_JSON_ALL_PROJECT_TIMESTAMP = 'all-project-timestamps.json'
35
37
DB_JSON_ALL_FUNCTIONS = 'all-functions-db-{PROJ}.json'
36
38
DB_JSON_ALL_CONSTRUCTORS = 'all-constructors-db-{PROJ}.json'
37
- DB_JSON_ALL_CURRENT_FUNCS = 'all-project-current.json'
39
+ DB_JSON_ALL_CURRENT = 'all-project-current.json'
38
40
DB_JSON_ALL_BRANCH_BLOCKERS = 'all-branch-blockers.json'
39
41
DB_BUILD_STATUS_JSON = 'build-status.json'
40
42
#DB_RAW_INTROSPECTOR_REPORTS = 'raw-introspector-reports'
44
46
DB_JSON_ALL_PROJECT_TIMESTAMP ,
45
47
DB_JSON_ALL_FUNCTIONS ,
46
48
DB_JSON_ALL_CONSTRUCTORS ,
47
- DB_JSON_ALL_CURRENT_FUNCS ,
49
+ DB_JSON_ALL_CURRENT ,
48
50
]
49
51
50
52
INTROSPECTOR_WEBAPP_ZIP = (
53
55
FI_EXCLUDE_ALL_NON_MUSTS = bool (int (os .getenv ('FI_EXCLUDE_ALL_NON_MUSTS' ,
54
56
'0' )))
55
57
58
+ NUM_RECENT_DAYS = 30
59
+ FUZZER_COVERAGE_IS_DEGRADED = 5 # 5% or more is a degradation
60
+
56
61
MUST_INCLUDES = set ()
57
62
MUST_INCLUDE_WITH_LANG : List [Any ] = []
58
63
@@ -896,11 +901,106 @@ def extend_db_timestamps(db_timestamp, output_directory):
896
901
json .dump (existing_timestamps , f )
897
902
898
903
899
- def extend_db_json_files (project_timestamps , output_directory ):
904
+ def per_fuzzer_coverage_analysis (project_name : str ,
905
+ coverages : Dict [str , List [Tuple [int , str ]]],
906
+ lost_fuzzers ):
907
+ """Go through the recent coverage results and combine them into a short summary.
908
+ Including an assessment if the fuzzer got worse over time.
909
+ """
910
+
911
+ # TODO This might not be a good metric when coverage is not meaningful,
912
+ # for example for very small projects or projects that have low coverage
913
+ # already. Though, this might not be super bad as we are taking a look
914
+ # at per fuzzer coverage, which is should already be normalized to what
915
+ # can be reached.
916
+ # TODO What would be a good percentage to mark as coverage degradation,
917
+ # taking 5% for now but should be observed, maybe per it should be
918
+ # configurable per project as well.
919
+ results = {}
920
+ for ff , data in coverages .items ():
921
+ if len (data ) > 0 :
922
+ values = [dd [0 ] for dd in data ]
923
+ dates = [dd [1 ] for dd in data ]
924
+ latest_date_with_value = next (dd [1 ] for dd in reversed (data )
925
+ if dd [0 ] is not None )
926
+ if latest_date_with_value is not None :
927
+ report_url = oss_fuzz .get_fuzzer_code_coverage_summary_url (
928
+ project_name , latest_date_with_value .replace ('-' , '' ), ff )
929
+ report_url = report_url .removesuffix (
930
+ 'summary.json' ) + 'index.html'
931
+ else :
932
+ report_url = None
933
+ max_cov = max (values [:- 1 ], default = 0 )
934
+ avg_cov = round (statistics .fmean (values ), 2 )
935
+ current = values [- 1 ]
936
+ results [ff ] = {
937
+ 'report_url' : report_url ,
938
+ 'report_date' : latest_date_with_value ,
939
+ 'coverages_values' : values ,
940
+ 'coverages_dates' : dates ,
941
+ 'max' : max_cov ,
942
+ 'avg' : avg_cov ,
943
+ 'current' : current ,
944
+ 'has_degraded' :
945
+ (max_cov - current ) > FUZZER_COVERAGE_IS_DEGRADED ,
946
+ 'got_lost' : ff in lost_fuzzers ,
947
+ }
948
+ return results
949
+
950
+
951
+ def calculate_recent_results (projects_with_new_results , timestamps ,
952
+ num_days : int ):
953
+ """Analyse recent project data to detect possible degradations of fuzzer efficiency."""
954
+ from collections import defaultdict
955
+
956
+ data : Dict [str , Dict [str , Dict [str , Any ]]] = defaultdict (dict )
957
+ for pt in timestamps :
958
+ project_name = pt ['project_name' ]
959
+ if project_name in projects_with_new_results :
960
+ data [project_name ][pt ['date' ]] = pt
961
+
962
+ results = {}
963
+ for project_name , project_data in data .items ():
964
+ fuzzers_past = set ()
965
+ fuzzers_current : Set [str ] = set ()
966
+ per_fuzzer_coverages = defaultdict (list )
967
+
968
+ for do in (get_date_at_offset_as_str (ii )
969
+ for ii in range (- num_days , 0 , 1 )):
970
+ try :
971
+ date_data = project_data [do ]
972
+ per_fuzzer_coverage_data = date_data [
973
+ 'per-fuzzer-coverage-data' ]
974
+
975
+ fuzzers_past |= fuzzers_current
976
+ fuzzers_current = set (per_fuzzer_coverage_data .keys ())
977
+
978
+ for ff , cov_data in per_fuzzer_coverage_data .items ():
979
+ try :
980
+ perc = round (
981
+ 100 * cov_data ['covered' ] / cov_data ['count' ], 2 )
982
+ except :
983
+ perc = 0
984
+
985
+ per_fuzzer_coverages [ff ].append ((perc , do ))
986
+ except :
987
+ continue
988
+
989
+ fuzzer_diff = fuzzers_past - fuzzers_current
990
+ per_fuzzer_coverages = per_fuzzer_coverage_analysis (
991
+ project_name , per_fuzzer_coverages , fuzzer_diff )
992
+
993
+ results [project_name ] = per_fuzzer_coverages
994
+
995
+ return results
996
+
997
+
998
+ def extend_db_json_files (project_timestamps , output_directory ,
999
+ should_include_details ):
900
1000
"""Extends a set of DB .json files."""
901
1001
902
1002
existing_timestamps = []
903
- logging .info ('Loading existing timestamps 1 ' )
1003
+ logging .info ('Loading existing timestamps' )
904
1004
if os .path .isfile (
905
1005
os .path .join (output_directory , DB_JSON_ALL_PROJECT_TIMESTAMP )):
906
1006
with open (
@@ -919,10 +1019,11 @@ def extend_db_json_files(project_timestamps, output_directory):
919
1019
existing_timestamp_mapping = dict ()
920
1020
921
1021
for es in existing_timestamps :
922
- if not es ['project_name' ] in existing_timestamp_mapping :
1022
+ if es ['project_name' ] not in existing_timestamp_mapping :
923
1023
existing_timestamp_mapping [es ['project_name' ]] = set ()
924
1024
existing_timestamp_mapping [es ['project_name' ]].add (es ['date' ])
925
1025
1026
+ projects_with_new_results = set ()
926
1027
for new_ts in project_timestamps :
927
1028
to_add = True
928
1029
@@ -932,24 +1033,44 @@ def extend_db_json_files(project_timestamps, output_directory):
932
1033
to_add = False
933
1034
if to_add :
934
1035
existing_timestamps .append (new_ts )
1036
+ projects_with_new_results .add (new_ts ['project_name' ])
935
1037
have_added = True
936
1038
937
1039
if FI_EXCLUDE_ALL_NON_MUSTS :
938
- new_timestamps = []
1040
+ # Filter existing timstamps to to only those in MUST_INCLUDES.
1041
+ kept_timestamps = []
939
1042
for ts in existing_timestamps :
940
1043
if ts ['project_name' ] in MUST_INCLUDES :
941
- new_timestamps .append (ts )
942
- existing_timestamps = new_timestamps
1044
+ kept_timestamps .append (ts )
1045
+ existing_timestamps = kept_timestamps
943
1046
944
- new_project_stamps = []
1047
+ # Also filter the current project results.
1048
+ kept_project_stamps = []
945
1049
for project_stamp in project_timestamps :
946
1050
if project_stamp ['project_name' ] in MUST_INCLUDES :
947
- new_project_stamps .append (project_stamp )
948
- project_timestamps = new_project_stamps
1051
+ kept_project_stamps .append (project_stamp )
1052
+ project_timestamps = kept_project_stamps
1053
+
1054
+ if should_include_details :
1055
+ recent_results = calculate_recent_results (projects_with_new_results ,
1056
+ existing_timestamps ,
1057
+ NUM_RECENT_DAYS )
1058
+ # TODO these results might detect issues that should be communicated with
1059
+ # project maintainers. The best approach might be to load the
1060
+ # project_timestamps file (all-project-current.json)
1061
+ # separately and load recent results there and maybe issue warnings.
1062
+ for pt in project_timestamps :
1063
+ try :
1064
+ pt ['recent_results' ] = recent_results .get (pt ['project_name' ])
1065
+ except Exception as exc :
1066
+ logger .warning (
1067
+ f'Could not get recent results for { pt ["project_name" ]} : { exc } '
1068
+ )
1069
+ else :
1070
+ recent_results = None
949
1071
950
- logging .info ('Dumping all current projects' )
951
- with open (os .path .join (output_directory , DB_JSON_ALL_CURRENT_FUNCS ),
952
- 'w' ) as f :
1072
+ logging .info ('Dumping current project data' )
1073
+ with open (os .path .join (output_directory , DB_JSON_ALL_CURRENT ), 'w' ) as f :
953
1074
json .dump (project_timestamps , f )
954
1075
955
1076
# Remove any light-introspector files because they should not be saved in the
@@ -1017,7 +1138,8 @@ def update_db_files(db_timestamp,
1017
1138
f .write (json .dumps (all_header_files ))
1018
1139
1019
1140
logging .info ('Extending DB json files' )
1020
- extend_db_json_files (project_timestamps , output_directory )
1141
+ extend_db_json_files (project_timestamps , output_directory ,
1142
+ should_include_details )
1021
1143
1022
1144
logging .info ('Extending DB time stamps' )
1023
1145
extend_db_timestamps (db_timestamp , output_directory )
0 commit comments