6
6
import logging
7
7
8
8
import requests
9
+ from problog import get_evaluatable
10
+ from problog .logic import Term
11
+ from problog .program import PrologString
9
12
from sqlalchemy import ForeignKey , String
10
13
from sqlalchemy .orm import Mapped , mapped_column
11
14
@@ -65,184 +68,6 @@ class MaliciousMetadataFacts(CheckFacts):
65
68
}
66
69
67
70
68
- # This list contains the heuristic analyzer classes
69
- # When implementing new analyzer, appending the classes to this list
70
- ANALYZERS : list = [
71
- EmptyProjectLinkAnalyzer ,
72
- SourceCodeRepoAnalyzer ,
73
- OneReleaseAnalyzer ,
74
- HighReleaseFrequencyAnalyzer ,
75
- UnchangedReleaseAnalyzer ,
76
- CloserReleaseJoinDateAnalyzer ,
77
- SuspiciousSetupAnalyzer ,
78
- WheelAbsenceAnalyzer ,
79
- AnomalousVersionAnalyzer ,
80
- ]
81
-
82
-
83
- # The HeuristicResult sequence is aligned with the sequence of ANALYZERS list
84
- SUSPICIOUS_COMBO : dict [
85
- tuple [
86
- HeuristicResult ,
87
- HeuristicResult ,
88
- HeuristicResult ,
89
- HeuristicResult ,
90
- HeuristicResult ,
91
- HeuristicResult ,
92
- HeuristicResult ,
93
- HeuristicResult ,
94
- HeuristicResult ,
95
- ],
96
- float ,
97
- ] = {
98
- (
99
- HeuristicResult .FAIL , # Empty Project
100
- HeuristicResult .SKIP , # Source Code Repo
101
- HeuristicResult .FAIL , # One Release
102
- HeuristicResult .SKIP , # High Release Frequency
103
- HeuristicResult .SKIP , # Unchanged Release
104
- HeuristicResult .FAIL , # Closer Release Join Date
105
- HeuristicResult .FAIL , # Suspicious Setup
106
- HeuristicResult .FAIL , # Wheel Absence
107
- HeuristicResult .FAIL , # Anomalous Version
108
- # No project link, only one release, and the maintainer released it shortly
109
- # after account registration.
110
- # The setup.py file contains suspicious imports and .whl file isn't present.
111
- # Anomalous version has no effect.
112
- ): Confidence .HIGH ,
113
- (
114
- HeuristicResult .FAIL , # Empty Project
115
- HeuristicResult .SKIP , # Source Code Repo
116
- HeuristicResult .FAIL , # One Release
117
- HeuristicResult .SKIP , # High Release Frequency
118
- HeuristicResult .SKIP , # Unchanged Release
119
- HeuristicResult .FAIL , # Closer Release Join Date
120
- HeuristicResult .FAIL , # Suspicious Setup
121
- HeuristicResult .FAIL , # Wheel Absence
122
- HeuristicResult .PASS , # Anomalous Version
123
- # No project link, only one release, and the maintainer released it shortly
124
- # after account registration.
125
- # The setup.py file contains suspicious imports and .whl file isn't present.
126
- # Anomalous version has no effect.
127
- ): Confidence .HIGH ,
128
- (
129
- HeuristicResult .FAIL , # Empty Project
130
- HeuristicResult .SKIP , # Source Code Repo
131
- HeuristicResult .PASS , # One Release
132
- HeuristicResult .FAIL , # High Release Frequency
133
- HeuristicResult .FAIL , # Unchanged Release
134
- HeuristicResult .FAIL , # Closer Release Join Date
135
- HeuristicResult .FAIL , # Suspicious Setup
136
- HeuristicResult .FAIL , # Wheel Absence
137
- HeuristicResult .SKIP , # Anomalous Version
138
- # No project link, frequent releases of multiple versions without modifying the content,
139
- # and the maintainer released it shortly after account registration.
140
- # The setup.py file contains suspicious imports and .whl file isn't present.
141
- ): Confidence .HIGH ,
142
- (
143
- HeuristicResult .FAIL , # Empty Project
144
- HeuristicResult .SKIP , # Source Code Repo
145
- HeuristicResult .PASS , # One Release
146
- HeuristicResult .FAIL , # High Release Frequency
147
- HeuristicResult .PASS , # Unchanged Release
148
- HeuristicResult .FAIL , # Closer Release Join Date
149
- HeuristicResult .FAIL , # Suspicious Setup
150
- HeuristicResult .FAIL , # Wheel Absence
151
- HeuristicResult .SKIP , # Anomalous Version
152
- # No project link, frequent releases of multiple versions,
153
- # and the maintainer released it shortly after account registration.
154
- # The setup.py file contains suspicious imports and .whl file isn't present.
155
- ): Confidence .HIGH ,
156
- (
157
- HeuristicResult .FAIL , # Empty Project
158
- HeuristicResult .SKIP , # Source Code Repo
159
- HeuristicResult .PASS , # One Release
160
- HeuristicResult .FAIL , # High Release Frequency
161
- HeuristicResult .FAIL , # Unchanged Release
162
- HeuristicResult .FAIL , # Closer Release Join Date
163
- HeuristicResult .PASS , # Suspicious Setup
164
- HeuristicResult .PASS , # Wheel Absence
165
- HeuristicResult .SKIP , # Anomalous Version
166
- # No project link, frequent releases of multiple versions without modifying the content,
167
- # and the maintainer released it shortly after account registration. Presence/Absence of
168
- # .whl file has no effect
169
- ): Confidence .MEDIUM ,
170
- (
171
- HeuristicResult .FAIL , # Empty Project
172
- HeuristicResult .SKIP , # Source Code Repo
173
- HeuristicResult .PASS , # One Release
174
- HeuristicResult .FAIL , # High Release Frequency
175
- HeuristicResult .FAIL , # Unchanged Release
176
- HeuristicResult .FAIL , # Closer Release Join Date
177
- HeuristicResult .PASS , # Suspicious Setup
178
- HeuristicResult .FAIL , # Wheel Absence
179
- HeuristicResult .SKIP , # Anomalous Version
180
- # No project link, frequent releases of multiple versions without modifying the content,
181
- # and the maintainer released it shortly after account registration. Presence/Absence of
182
- # .whl file has no effect
183
- ): Confidence .MEDIUM ,
184
- (
185
- HeuristicResult .PASS , # Empty Project
186
- HeuristicResult .FAIL , # Source Code Repo
187
- HeuristicResult .PASS , # One Release
188
- HeuristicResult .FAIL , # High Release Frequency
189
- HeuristicResult .PASS , # Unchanged Release
190
- HeuristicResult .FAIL , # Closer Release Join Date
191
- HeuristicResult .FAIL , # Suspicious Setup
192
- HeuristicResult .FAIL , # Wheel Absence
193
- HeuristicResult .SKIP , # Anomalous Version
194
- # No source code repo, frequent releases of multiple versions,
195
- # and the maintainer released it shortly after account registration.
196
- # The setup.py file contains suspicious imports and .whl file isn't present.
197
- ): Confidence .HIGH ,
198
- (
199
- HeuristicResult .FAIL , # Empty Project
200
- HeuristicResult .SKIP , # Source Code Repo
201
- HeuristicResult .FAIL , # One Release
202
- HeuristicResult .SKIP , # High Release Frequency
203
- HeuristicResult .SKIP , # Unchanged Release
204
- HeuristicResult .FAIL , # Closer Release Join Date
205
- HeuristicResult .PASS , # Suspicious Setup
206
- HeuristicResult .PASS , # Wheel Absence
207
- HeuristicResult .FAIL , # Anomalous Version
208
- # No project link, only one release, and the maintainer released it shortly
209
- # after account registration.
210
- # The setup.py file has no effect and .whl file is present.
211
- # The version number is anomalous.
212
- ): Confidence .MEDIUM ,
213
- (
214
- HeuristicResult .FAIL , # Empty Project
215
- HeuristicResult .SKIP , # Source Code Repo
216
- HeuristicResult .FAIL , # One Release
217
- HeuristicResult .SKIP , # High Release Frequency
218
- HeuristicResult .SKIP , # Unchanged Release
219
- HeuristicResult .FAIL , # Closer Release Join Date
220
- HeuristicResult .FAIL , # Suspicious Setup
221
- HeuristicResult .PASS , # Wheel Absence
222
- HeuristicResult .FAIL , # Anomalous Version
223
- # No project link, only one release, and the maintainer released it shortly
224
- # after account registration.
225
- # The setup.py file has no effect and .whl file is present.
226
- # The version number is anomalous.
227
- ): Confidence .MEDIUM ,
228
- (
229
- HeuristicResult .FAIL , # Empty Project
230
- HeuristicResult .SKIP , # Source Code Repo
231
- HeuristicResult .FAIL , # One Release
232
- HeuristicResult .SKIP , # High Release Frequency
233
- HeuristicResult .SKIP , # Unchanged Release
234
- HeuristicResult .FAIL , # Closer Release Join Date
235
- HeuristicResult .SKIP , # Suspicious Setup
236
- HeuristicResult .PASS , # Wheel Absence
237
- HeuristicResult .FAIL , # Anomalous Version
238
- # No project link, only one release, and the maintainer released it shortly
239
- # after account registration.
240
- # The setup.py file has no effect and .whl file is present.
241
- # The version number is anomalous.
242
- ): Confidence .MEDIUM ,
243
- }
244
-
245
-
246
71
class DetectMaliciousMetadataCheck (BaseCheck ):
247
72
"""This check analyzes the metadata of a package for malicious behavior."""
248
73
@@ -303,6 +128,41 @@ def validate_malware(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[boo
303
128
is_malware , detail_info = sourcecode_analyzer .analyze ()
304
129
return is_malware , detail_info
305
130
131
+ def evaluate_heuristic_results (self , heuristic_results : dict [Heuristics , HeuristicResult ]) -> float | None :
132
+ """Analyse the heuristic results to determine the maliciousness of the package.
133
+
134
+ Parameters
135
+ ----------
136
+ heuristic_results: dict[Heuristics, HeuristicResult]
137
+ Dictionary of Heuristic keys with HeuristicResult values, results of each heuristic scan.
138
+
139
+ Returns
140
+ -------
141
+ float | None
142
+ Returns the confidence associated with the detected malicious combination, otherwise None if no associated
143
+ malicious combination was triggered.
144
+ """
145
+ facts_list : list [str ] = []
146
+ for heuristic , result in heuristic_results .items ():
147
+ if result == HeuristicResult .SKIP :
148
+ facts_list .append (f"0.0::{ heuristic .value } ." )
149
+ elif result == HeuristicResult .PASS :
150
+ facts_list .append (f"{ heuristic .value } :- true." )
151
+ else : # HeuristicResult.FAIL
152
+ facts_list .append (f"{ heuristic .value } :- false." )
153
+
154
+ facts = "\n " .join (facts_list )
155
+ problog_code = f"{ facts } \n \n { self .malware_rules_problog_model } "
156
+ logger .debug ("Problog model used for evaluation:\n %s" , problog_code )
157
+
158
+ problog_model = PrologString (problog_code )
159
+ problog_results : dict [Term , float ] = get_evaluatable ().create_from (problog_model ).evaluate ()
160
+
161
+ confidence : float | None = problog_results .get (Term (self .problog_result_access ))
162
+ if confidence == 0.0 :
163
+ return None # no rules were triggered
164
+ return confidence
165
+
306
166
def run_heuristics (
307
167
self , pypi_package_json : PyPIPackageJsonAsset
308
168
) -> tuple [dict [Heuristics , HeuristicResult ], dict [str , JsonType ]]:
@@ -326,7 +186,7 @@ def run_heuristics(
326
186
results : dict [Heuristics , HeuristicResult ] = {}
327
187
detail_info : dict [str , JsonType ] = {}
328
188
329
- for _analyzer in ANALYZERS :
189
+ for _analyzer in self . analyzers :
330
190
analyzer : BaseHeuristicAnalyzer = _analyzer ()
331
191
logger .debug ("Instantiating %s" , _analyzer .__name__ )
332
192
@@ -418,8 +278,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
418
278
except HeuristicAnalyzerValueError :
419
279
return CheckResultData (result_tables = [], result_type = CheckResultType .UNKNOWN )
420
280
421
- result_combo : tuple = tuple (result .values ())
422
- confidence : float | None = SUSPICIOUS_COMBO .get (result_combo , None )
281
+ confidence = self .evaluate_heuristic_results (result )
423
282
result_type = CheckResultType .FAILED
424
283
if confidence is None :
425
284
confidence = Confidence .HIGH
@@ -448,5 +307,66 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
448
307
# Return UNKNOWN result for unsupported ecosystems.
449
308
return CheckResultData (result_tables = [], result_type = CheckResultType .UNKNOWN )
450
309
310
+ # This list contains the heuristic analyzer classes
311
+ # When implementing new analyzer, appending the classes to this list
312
+ analyzers : list = [
313
+ EmptyProjectLinkAnalyzer ,
314
+ SourceCodeRepoAnalyzer ,
315
+ OneReleaseAnalyzer ,
316
+ HighReleaseFrequencyAnalyzer ,
317
+ UnchangedReleaseAnalyzer ,
318
+ CloserReleaseJoinDateAnalyzer ,
319
+ SuspiciousSetupAnalyzer ,
320
+ WheelAbsenceAnalyzer ,
321
+ AnomalousVersionAnalyzer ,
322
+ ]
323
+
324
+ problog_result_access = "result"
325
+
326
+ malware_rules_problog_model = f"""
327
+ % Heuristic groupings
328
+ % These are common combinations of heuristics that are used in many of the rules, thus themselves representing
329
+ % certain behaviors. When changing or adding rules here, if there are frequent combinations of particular
330
+ % heuristics, group them together here.
331
+
332
+ % Maintainer has recently joined, publishing an undetailed page with no links.
333
+ quickUndetailed :- not { Heuristics .EMPTY_PROJECT_LINK .value } , not { Heuristics .CLOSER_RELEASE_JOIN_DATE .value } .
334
+
335
+ % Maintainer releases a suspicious setup.py and forces it to run by omitting a .whl file.
336
+ forceSetup :- not { Heuristics .SUSPICIOUS_SETUP .value } , not { Heuristics .WHEEL_ABSENCE .value } .
337
+
338
+ % Suspicious Combinations
339
+
340
+ % Package released recently with little detail, forcing the setup.py to run.
341
+ { Confidence .HIGH .value } ::high :- quickUndetailed, forceSetup, not { Heuristics .ONE_RELEASE .value } .
342
+ { Confidence .HIGH .value } ::high :- quickUndetailed, forceSetup, not { Heuristics .HIGH_RELEASE_FREQUENCY .value } .
343
+
344
+ % Package released recently with little detail, with some more refined trust markers introduced: project links,
345
+ % multiple different releases, but there is no source code repository matching it and the setup is suspicious.
346
+ { Confidence .HIGH .value } ::high :- not { Heuristics .SOURCE_CODE_REPO .value } ,
347
+ not { Heuristics .HIGH_RELEASE_FREQUENCY .value } ,
348
+ not { Heuristics .CLOSER_RELEASE_JOIN_DATE .value } ,
349
+ { Heuristics .UNCHANGED_RELEASE .value } ,
350
+ forceSetup.
351
+
352
+ % Package released recently with little detail, with multiple releases as a trust marker, but frequent and with
353
+ % the same code.
354
+ { Confidence .MEDIUM .value } ::medium :- quickUndetailed,
355
+ not { Heuristics .HIGH_RELEASE_FREQUENCY .value } ,
356
+ not { Heuristics .UNCHANGED_RELEASE .value } ,
357
+ { Heuristics .SUSPICIOUS_SETUP .value } .
358
+
359
+ % Package released recently with little detail and an anomalous version number for a single-release package.
360
+ { Confidence .MEDIUM .value } ::medium :- quickUndetailed,
361
+ not { Heuristics .ONE_RELEASE .value } ,
362
+ { Heuristics .WHEEL_ABSENCE .value } ,
363
+ not { Heuristics .ANOMALOUS_VERSION .value } .
364
+
365
+ { problog_result_access } :- high.
366
+ { problog_result_access } :- medium.
367
+
368
+ query({ problog_result_access } ).
369
+ """
370
+
451
371
452
372
registry .register (DetectMaliciousMetadataCheck ())
0 commit comments