Skip to content

Commit 976c5ad

Browse files
committed
Add initial improver for collect repo fix commits
Signed-off-by: ziad hany <[email protected]>
1 parent dcb0511 commit 976c5ad

File tree

4 files changed

+467
-0
lines changed

4 files changed

+467
-0
lines changed

vulnerabilities/improvers/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
from vulnerabilities.pipelines import flag_ghost_packages
2020
from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline
2121
from vulnerabilities.pipelines import remove_duplicate_advisories
22+
from vulnerabilities.pipelines.v2_improvers import (
23+
collect_repo_fix_commits as collect_repo_fix_commits_v2,
24+
)
2225
from vulnerabilities.pipelines.v2_improvers import compute_advisory_todo as compute_advisory_todo_v2
2326
from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2
2427
from vulnerabilities.pipelines.v2_improvers import (
@@ -67,6 +70,7 @@
6770
compute_package_risk_v2.ComputePackageRiskPipeline,
6871
compute_version_rank_v2.ComputeVersionRankPipeline,
6972
compute_advisory_todo_v2.ComputeToDo,
73+
collect_repo_fix_commits_v2.CollectRepoFixCommitPipeline,
7074
compute_advisory_todo.ComputeToDo,
7175
]
7276
)
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
import bisect
10+
import re
11+
from collections import defaultdict
12+
from typing import List
13+
from typing import Optional
14+
from typing import Tuple
15+
16+
from git import Commit
17+
from git import Repo
18+
19+
from vulnerabilities.models import AdvisoryV2
20+
from vulnerabilities.models import CodeFixV2
21+
from vulnerabilities.pipelines import VulnerableCodePipeline
22+
23+
24+
class CollectRepoFixCommitPipeline(VulnerableCodePipeline):
25+
"""
26+
Pipeline to collect fix commits from any git repository.
27+
"""
28+
29+
pipeline_id = "repo_fix_commit_pipeline"
30+
repositories_url = "git+https://github.com/the-tcpdump-group/tcpdump"
31+
32+
@classmethod
33+
def steps(cls):
34+
return (
35+
cls.collect_fix_commits,
36+
cls.store_fix_commits,
37+
)
38+
39+
def classify_commit_type(self, commit) -> str:
40+
num_parents = len(commit.parents)
41+
if num_parents == 0:
42+
return "root"
43+
elif num_parents == 1:
44+
return "normal"
45+
else:
46+
return "merge"
47+
48+
def detect_fix_commit(self, commit) -> str:
49+
"""
50+
Detect whether a commit is a bug-fix or vulnerability-fix commit.
51+
Returns: "vulnerability_fix" or "other"
52+
"""
53+
msg = commit.message.lower()
54+
security_patterns = [
55+
# CVE identifiers
56+
r"\bcve-[0-9]{4}-[0-9]{4,19}\b",
57+
]
58+
if any(re.search(p, msg) for p in security_patterns):
59+
return "vulnerability_fix"
60+
return "other"
61+
62+
def extract_cves(self, text: str) -> List[str]:
63+
if not text:
64+
return []
65+
cves = re.findall(r"cve-[0-9]{4}-[0-9]{4,19}", text, flags=re.IGNORECASE)
66+
return list({cve.upper() for cve in cves})
67+
68+
def get_previous_releases(
69+
self,
70+
release_tags_sorted: List[Tuple[str, int]],
71+
dates: List[int],
72+
commit_date: int,
73+
) -> List[str]:
74+
index = bisect.bisect_left(dates, commit_date)
75+
return [tag for tag, _ in release_tags_sorted[:index]]
76+
77+
def get_current_or_next_release(
78+
self,
79+
release_tags_sorted: List[Tuple[str, int]],
80+
dates: List[int],
81+
commit_date: int,
82+
) -> Optional[str]:
83+
index = bisect.bisect_left(dates, commit_date)
84+
85+
if index < len(dates) and dates[index] == commit_date:
86+
return release_tags_sorted[index][0]
87+
88+
if index < len(dates):
89+
return release_tags_sorted[index][0]
90+
91+
return None
92+
93+
def get_current_release(
94+
self, repo: Repo, commit: Commit, prev_release_by_date: Optional[str]
95+
) -> str:
96+
try:
97+
return repo.git.describe("--tags", "--exact-match", commit.hexsha)
98+
except Exception:
99+
pass
100+
101+
try:
102+
return repo.git.describe("--tags", "--abbrev=0", "--first-parent", commit.hexsha)
103+
except Exception:
104+
pass
105+
106+
if prev_release_by_date:
107+
return prev_release_by_date
108+
109+
return "NO_TAGS_AVAILABLE"
110+
111+
def collect_fix_commits(self):
112+
self.log("Processing git repository fix commits.")
113+
repo_url = "https://github.com/the-tcpdump-group/tcpdump"
114+
repo_path = "/home/ziad-hany/PycharmProjects/tcpdump"
115+
116+
repo = Repo(repo_path)
117+
cve_list = defaultdict(set)
118+
119+
# Precompute release tags
120+
release_tags = []
121+
for tag in repo.tags:
122+
try:
123+
release_tags.append((tag.name, tag.commit.committed_date))
124+
except Exception:
125+
continue
126+
127+
release_tags_sorted = sorted(release_tags, key=lambda x: x[1])
128+
dates_array = [date for _, date in release_tags_sorted]
129+
130+
for commit in repo.iter_commits("--all"):
131+
commit_type = self.classify_commit_type(commit)
132+
fix_type = self.detect_fix_commit(commit)
133+
134+
if fix_type == "vulnerability_fix" and commit_type in ["normal", "merge"]:
135+
prev_release_list = self.get_previous_releases(
136+
release_tags_sorted, dates_array, commit.committed_date
137+
)
138+
prev_release_by_date = prev_release_list[-1] if prev_release_list else None
139+
140+
curr_release = self.get_current_release(repo, commit, prev_release_by_date)
141+
commit_info = {
142+
"hash": commit.hexsha,
143+
"url": repo_url + "/commit/" + commit.hexsha,
144+
"message": commit.message.strip(),
145+
"curr_release": curr_release,
146+
"prev_release": prev_release_list,
147+
"fix_type": fix_type,
148+
}
149+
150+
for cve_id in self.extract_cves(commit.message.strip()):
151+
commit_url = f"{repo_url}/commit/{commit.hexsha}"
152+
cve_list[cve_id].add(commit_url)
153+
154+
# Save results into pipeline state
155+
self.fix_commits = {cve: list(commits) for cve, commits in cve_list.items()}
156+
self.log(f"Found {len(self.fix_commits)} unique CVEs with fix commits.")
157+
158+
def store_fix_commits(self):
159+
if not hasattr(self, "fix_commits"):
160+
self.log("No fix commits collected. Run collect_fix_commits() first.")
161+
return
162+
163+
created_fix_count = 0
164+
165+
# FIXME
166+
for vulnerability_id, commit_urls in self.fix_commits.items():
167+
advisories = AdvisoryV2.objects.filter(advisory_id__iendswith=vulnerability_id)
168+
169+
if not advisories.exists():
170+
self.log(f"No advisories found for vulnerability_id: {vulnerability_id}")
171+
continue
172+
173+
for adv in advisories:
174+
for impact in adv.impacted_packages.all():
175+
for package in impact.affecting_packages.all():
176+
for vcs_url in commit_urls:
177+
code_fix, created = CodeFixV2.objects.get_or_create(
178+
commits=[vcs_url],
179+
advisory=adv,
180+
affected_package=package,
181+
)
182+
if created:
183+
created_fix_count += 1
184+
185+
self.log(f"Stored {created_fix_count} new CodeFixV2 entries.")

0 commit comments

Comments
 (0)