Skip to content

Commit 01bf2a6

Browse files
authored
Merge pull request #2169 from aboutcode-org/advisory_grouping
Group related advisories on basis of content
2 parents 053c8fb + 391141c commit 01bf2a6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+616
-380
lines changed

vulnerabilities/api_v2.py

Lines changed: 52 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
from vulnerabilities.models import VulnerabilitySeverity
4242
from vulnerabilities.models import Weakness
4343
from vulnerabilities.throttling import PermissionBasedUserRateThrottle
44+
from vulnerabilities.utils import group_advisories_by_content
4445

4546

4647
class CharInFilter(filters.BaseInFilter, filters.CharFilter):
@@ -361,19 +362,39 @@ def get_affected_by_vulnerabilities(self, package):
361362

362363
latest_advisories = AdvisoryV2.objects.latest_for_avids(avids)
363364
advisory_by_avid = {adv.avid: adv for adv in latest_advisories}
365+
impact_by_avid = {}
364366

365-
result = {}
366-
367+
advisories = []
367368
for impact in impacts:
368369
avid = impact.advisory.avid
369370
advisory = advisory_by_avid.get(avid)
370371
if not advisory:
371372
continue
372-
fixed_by_packages = [pkg.purl for pkg in impact.fixed_by_packages.all()]
373-
result[advisory.avid] = {
374-
"advisory_id": advisory.avid,
375-
"fixed_by_packages": fixed_by_packages,
376-
}
373+
advisories.append(advisory)
374+
impact_by_avid[avid] = impact
375+
376+
grouped_advisories = group_advisories_by_content(advisories=advisories)
377+
378+
advs = []
379+
380+
for hash in grouped_advisories:
381+
advs.append(grouped_advisories[hash])
382+
383+
result = []
384+
385+
for advisory in advs:
386+
primary_advisory = advisory["primary"]
387+
avid = primary_advisory.avid
388+
impact = impact_by_avid.get(avid)
389+
if not impact:
390+
continue
391+
result.append(
392+
{
393+
"advisory_id": primary_advisory.avid,
394+
"fixed_by_packages": [pkg.purl for pkg in impact.fixed_by_packages.all()],
395+
"duplicate_advisory_ids": [adv.avid for adv in advisory["secondary"]],
396+
}
397+
)
377398

378399
return result
379400

@@ -384,7 +405,25 @@ def get_fixing_vulnerabilities(self, package):
384405

385406
latest_advisories = AdvisoryV2.objects.latest_for_avids(avids)
386407

387-
return [adv.avid for adv in latest_advisories]
408+
grouped_advisories = group_advisories_by_content(advisories=latest_advisories)
409+
410+
advs = []
411+
412+
for hash in grouped_advisories:
413+
advs.append(grouped_advisories[hash])
414+
415+
result = []
416+
417+
for advisory in advs:
418+
primary_advisory = advisory["primary"]
419+
result.append(
420+
{
421+
"advisory_id": primary_advisory.avid,
422+
"duplicate_advisory_ids": [adv.avid for adv in advisory["secondary"]],
423+
}
424+
)
425+
426+
return result
388427

389428
def get_next_non_vulnerable_version(self, package):
390429
if next_non_vulnerable := package.get_non_vulnerable_versions()[0]:
@@ -1078,14 +1117,14 @@ def list(self, request, *args, **kwargs):
10781117
return self.get_paginated_response(
10791118
{
10801119
"packages": serializer.data,
1081-
"advisories": advisory_data,
1120+
"advisories_by_id": advisory_data,
10821121
}
10831122
)
10841123

10851124
return Response(
10861125
{
10871126
"packages": serializer.data,
1088-
"advisories": advisory_data,
1127+
"advisories_by_id": advisory_data,
10891128
}
10901129
)
10911130

@@ -1160,7 +1199,7 @@ def bulk_lookup(self, request):
11601199
return Response(
11611200
{
11621201
"packages": package_data,
1163-
"advisories": advisory_data,
1202+
"advisories_by_id": advisory_data,
11641203
}
11651204
)
11661205

@@ -1254,7 +1293,7 @@ def bulk_search(self, request):
12541293
return Response(
12551294
{
12561295
"packages": package_data,
1257-
"advisories": advisory_data,
1296+
"advisories_by_id": advisory_data,
12581297
}
12591298
)
12601299

@@ -1308,7 +1347,7 @@ def bulk_search(self, request):
13081347
return Response(
13091348
{
13101349
"packages": package_data,
1311-
"advisories": advisory_data,
1350+
"advisories_by_id": advisory_data,
13121351
}
13131352
)
13141353

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Generated by Django 4.2.25 on 2026-02-10 12:46
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("vulnerabilities", "0112_alter_advisoryseverity_scoring_system_and_more"),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name="advisoryv2",
15+
name="precedence",
16+
field=models.IntegerField(
17+
blank=True,
18+
help_text="Precedence indicates the priority of advisory from different datasources. It is determined based on the reliability of the datasource and how close it is to the source.",
19+
null=True,
20+
),
21+
),
22+
]

vulnerabilities/models.py

Lines changed: 49 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import csv
1111
import datetime
1212
import hashlib
13+
import json
1314
import logging
1415
import uuid
1516
import xml.etree.ElementTree as ET
@@ -69,7 +70,9 @@
6970
from vulnerabilities.severity_systems import EPSS
7071
from vulnerabilities.severity_systems import SCORING_SYSTEMS
7172
from vulnerabilities.utils import compute_patch_checksum
73+
from vulnerabilities.utils import normalize_list
7274
from vulnerabilities.utils import normalize_purl
75+
from vulnerabilities.utils import normalize_text
7376
from vulnerabilities.utils import purl_to_dict
7477
from vulnerablecode import __version__ as VULNERABLECODE_VERSION
7578
from vulnerablecode.settings import VULNERABLECODE_PIPELINE_TIMEOUT
@@ -2988,11 +2991,11 @@ class AdvisoryV2(models.Model):
29882991
help_text="Weighted severity is the highest value calculated by multiplying each severity by its corresponding weight, divided by 10.",
29892992
)
29902993

2991-
# precedence = models.IntegerField(
2992-
# null=True,
2993-
# blank=True,
2994-
# help_text="Precedence indicates the priority level of addressing a vulnerability based on its overall risk",
2995-
# )
2994+
precedence = models.IntegerField(
2995+
null=True,
2996+
blank=True,
2997+
help_text="Precedence indicates the priority of advisory from different datasources. It is determined based on the reliability of the datasource and how close it is to the source.",
2998+
)
29962999

29973000
@property
29983001
def risk_score(self):
@@ -3038,16 +3041,20 @@ def to_advisory_data(self) -> "AdvisoryDataV2":
30383041

30393042
return AdvisoryDataV2(
30403043
advisory_id=self.advisory_id,
3041-
aliases=[item.alias for item in self.aliases.all()],
3044+
aliases=normalize_list([item.alias for item in self.aliases.all()]),
30423045
summary=self.summary,
3043-
affected_packages=[
3044-
impacted.to_affected_package_data() for impacted in self.impacted_packages.all()
3045-
],
3046-
references=[ref.to_reference_v2_data() for ref in self.references.all()],
3047-
patches=[patch.to_patch_data() for patch in self.patches.all()],
3046+
affected_packages=normalize_list(
3047+
[impacted.to_affected_package_data() for impacted in self.impacted_packages.all()]
3048+
),
3049+
references=normalize_list(
3050+
[ref.to_reference_v2_data() for ref in self.references.all()]
3051+
),
3052+
patches=normalize_list([patch.to_patch_data() for patch in self.patches.all()]),
30483053
date_published=self.date_published,
3049-
weaknesses=[weak.cwe_id for weak in self.weaknesses.all()],
3050-
severities=[sev.to_vulnerability_severity_data() for sev in self.severities.all()],
3054+
weaknesses=normalize_list([weak.cwe_id for weak in self.weaknesses.all()]),
3055+
severities=normalize_list(
3056+
[sev.to_vulnerability_severity_data() for sev in self.severities.all()]
3057+
),
30513058
url=self.url,
30523059
)
30533060

@@ -3058,6 +3065,35 @@ def get_aliases(self):
30583065
"""
30593066
return self.aliases.all()
30603067

3068+
def compute_advisory_content(self):
3069+
"""
3070+
Compute a unique content hash for an advisory by normalizing its data and hashing it.
3071+
3072+
:param advisory: An Advisory object
3073+
:return: SHA-256 hash digest as content hash
3074+
"""
3075+
normalized_data = {
3076+
"summary": normalize_text(self.summary),
3077+
"impacted_packages": sorted(
3078+
[impact.to_dict() for impact in self.impacted_packages.all()],
3079+
key=lambda x: json.dumps(x, sort_keys=True),
3080+
),
3081+
"patches": sorted(
3082+
[patch.to_patch_data().to_dict() for patch in self.patches.all()],
3083+
key=lambda x: json.dumps(x, sort_keys=True),
3084+
),
3085+
"severities": sorted(
3086+
[sev.to_vulnerability_severity_data().to_dict() for sev in self.severities.all()],
3087+
key=lambda x: (x.get("system"), x.get("value")),
3088+
),
3089+
"weaknesses": normalize_list([weakness.cwe_id for weakness in self.weaknesses.all()]),
3090+
}
3091+
3092+
normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True)
3093+
content_hash = hashlib.sha256(normalized_json.encode("utf-8")).hexdigest()
3094+
3095+
return content_hash
3096+
30613097
alias = get_aliases
30623098

30633099

vulnerabilities/pipelines/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ class VulnerableCodeBaseImporterPipelineV2(VulnerableCodePipeline):
265265
spdx_license_expression = None
266266
repo_url = None
267267
ignorable_versions = []
268+
precedence = 0
268269

269270
# Control how often progress log is shown (range: 1–100, higher value = less frequent log)
270271
progress_step = 10
@@ -318,6 +319,7 @@ def collect_and_store_advisories(self):
318319
advisory=advisory,
319320
pipeline_id=self.pipeline_id,
320321
logger=self.log,
322+
precedence=self.precedence,
321323
):
322324
collected_advisory_count += 1
323325
except Exception as e:

vulnerabilities/pipelines/v2_importers/alpine_linux_importer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ class AlpineLinuxImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
3838
license_url = "https://secdb.alpinelinux.org/license.txt"
3939
repo_url = "git+https://github.com/aboutcode-org/aboutcode-mirror-alpine-secdb/"
4040

41+
precedence = 200
42+
4143
@classmethod
4244
def steps(cls):
4345
return (

vulnerabilities/pipelines/v2_importers/aosp_importer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ class AospImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
3131
spdx_license_expression = "Apache-2.0"
3232
license_url = "https://github.com/quarkslab/aosp_dataset/blob/master/LICENSE"
3333

34+
precedence = 200
35+
3436
@classmethod
3537
def steps(cls):
3638
return (

vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ class ApacheHTTPDImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
152152
license_url = "https://www.apache.org/licenses/LICENSE-2.0"
153153
base_url = "https://httpd.apache.org/security/json/"
154154

155+
precedence = 200
156+
155157
links = []
156158

157159
ignorable_versions = frozenset(

vulnerabilities/pipelines/v2_importers/apache_kafka_importer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ class ApacheKafkaImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
4848
"CVE-2021-4104",
4949
]
5050

51+
precedence = 200
52+
5153
@classmethod
5254
def steps(cls):
5355
return (

vulnerabilities/pipelines/v2_importers/apache_tomcat_importer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ class ApacheTomcatImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
4040
license_url = "https://www.apache.org/licenses/LICENSE-2.0"
4141
base_url = "https://tomcat.apache.org/security"
4242

43+
precedence = 200
44+
4345
def fetch_advisory_links(self):
4446
"""
4547
Yield the URLs of each Tomcat version security-related page.

vulnerabilities/pipelines/v2_importers/archlinux_importer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ class ArchLinuxImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
2828
spdx_license_expression = "MIT"
2929
license_url = "https://github.com/archlinux/arch-security-tracker/blob/master/LICENSE"
3030

31+
precedence = 200
32+
3133
@classmethod
3234
def steps(cls):
3335
return (

0 commit comments

Comments
 (0)