Skip to content

Commit e056b7c

Browse files
committed
feat: detect vulnerable GitHub Actions
Signed-off-by: behnazh-w <[email protected]>
1 parent 9d5de67 commit e056b7c

File tree

6 files changed

+618
-32
lines changed

6 files changed

+618
-32
lines changed

src/macaron/config/defaults.ini

+6
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,12 @@ url_netloc = api.deps.dev
544544
url_scheme = https
545545
purl_endpoint = v3alpha/purl
546546

547+
[osv_dev]
548+
url_netloc = api.osv.dev
549+
url_scheme = https
550+
query_endpoint = v1/query
551+
querybatch_endpoint = v1/querybatch
552+
547553
# Configuration options for selecting the checks to run.
548554
# Both the exclude and include are defined as list of strings:
549555
# - The exclude list is used to specify the checks that will not run.

src/macaron/database/db_custom_types.py

+32
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,38 @@ def process_result_value(self, value: None | dict, dialect: Any) -> None | dict:
102102
return value
103103

104104

105+
class DBJsonList(TypeDecorator): # pylint: disable=W0223
106+
"""SQLAlchemy column type to serialize lists."""
107+
108+
# It is stored in the database as a json value.
109+
impl = JSON
110+
111+
# To prevent Sphinx from rendering the docstrings for `cache_ok`, make this docstring private.
112+
#: :meta private:
113+
cache_ok = True
114+
115+
def process_bind_param(self, value: None | list, dialect: Any) -> None | list:
116+
"""Process when storing a dict object to the SQLite db.
117+
118+
value: None | list
119+
The value being stored.
120+
"""
121+
if not isinstance(value, list):
122+
raise TypeError("DBJsonList type expects a list.")
123+
124+
return value
125+
126+
def process_result_value(self, value: None | list, dialect: Any) -> None | list:
127+
"""Process when loading a dict object from the SQLite db.
128+
129+
value: None | list
130+
The value being loaded.
131+
"""
132+
if not isinstance(value, list):
133+
raise TypeError("DBJsonList type expects a list.")
134+
return value
135+
136+
105137
class ProvenancePayload(TypeDecorator): # pylint: disable=W0223
106138
"""SQLAlchemy column type to serialize InTotoProvenance."""
107139

src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py

+21-30
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
import logging
77

8-
import requests
98
from problog import get_evaluatable
109
from problog.logic import Term
1110
from problog.program import PrologString
@@ -34,16 +33,16 @@
3433
from macaron.slsa_analyzer.checks.base_check import BaseCheck
3534
from macaron.slsa_analyzer.checks.check_result import CheckResultData, CheckResultType, Confidence, JustificationType
3635
from macaron.slsa_analyzer.package_registry.deps_dev import APIAccessError, DepsDevService
36+
from macaron.slsa_analyzer.package_registry.osv_dev import OSVDevService
3737
from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset, PyPIRegistry
3838
from macaron.slsa_analyzer.registry import registry
3939
from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo
40-
from macaron.util import send_post_http_raw
4140

4241
logger: logging.Logger = logging.getLogger(__name__)
4342

4443

4544
class MaliciousMetadataFacts(CheckFacts):
46-
"""The ORM mapping for justifications in pypi heuristic check."""
45+
"""The ORM mapping for justifications in malicious metadata check."""
4746

4847
__tablename__ = "_detect_malicious_metadata_check"
4948

@@ -71,14 +70,10 @@ class MaliciousMetadataFacts(CheckFacts):
7170
class DetectMaliciousMetadataCheck(BaseCheck):
7271
"""This check analyzes the metadata of a package for malicious behavior."""
7372

74-
# The OSV knowledge base query database.
75-
osv_query_url = "https://api.osv.dev/v1/query"
76-
7773
def __init__(self) -> None:
7874
"""Initialize a check instance."""
7975
check_id = "mcn_detect_malicious_metadata_1"
8076
description = """This check analyzes the metadata of a package based on reports malicious behavior.
81-
Supported ecosystem for unknown malware: PyPI.
8277
"""
8378
super().__init__(check_id=check_id, description=description, eval_reqs=[])
8479

@@ -222,38 +217,34 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
222217
package_registry_info_entries = ctx.dynamic_data["package_registries"]
223218

224219
# First check if this package is a known malware
225-
data = {"package": {"purl": ctx.component.purl}}
226-
227220
try:
228221
package_exists = bool(DepsDevService.get_package_info(ctx.component.purl))
229222
except APIAccessError as error:
230223
logger.debug(error)
231224

232225
# Known malicious packages must have been removed.
233226
if not package_exists:
234-
response = send_post_http_raw(self.osv_query_url, json_data=data, headers=None)
235-
res_obj = None
236-
if response:
237-
try:
238-
res_obj = response.json()
239-
except requests.exceptions.JSONDecodeError as error:
240-
logger.debug("Unable to get a valid response from %s: %s", self.osv_query_url, error)
241-
if res_obj:
242-
for vuln in res_obj.get("vulns", {}):
243-
if v_id := json_extract(vuln, ["id"], str):
244-
result_tables.append(
245-
MaliciousMetadataFacts(
246-
known_malware=f"https://osv.dev/vulnerability/{v_id}",
247-
result={},
248-
detail_information=vuln,
249-
confidence=Confidence.HIGH,
250-
)
227+
vulns: list = []
228+
try:
229+
vulns = OSVDevService.get_vulnerabilities_purl(ctx.component.purl)
230+
except APIAccessError as error:
231+
logger.debug(error)
232+
233+
for vuln in vulns:
234+
if v_id := json_extract(vuln, ["id"], str):
235+
result_tables.append(
236+
MaliciousMetadataFacts(
237+
known_malware=f"https://osv.dev/vulnerability/{v_id}",
238+
result={},
239+
detail_information=vuln,
240+
confidence=Confidence.HIGH,
251241
)
252-
if result_tables:
253-
return CheckResultData(
254-
result_tables=result_tables,
255-
result_type=CheckResultType.FAILED,
256242
)
243+
if result_tables:
244+
return CheckResultData(
245+
result_tables=result_tables,
246+
result_type=CheckResultType.FAILED,
247+
)
257248

258249
# If the package is not a known malware, run malware analysis heuristics.
259250
for package_registry_info_entry in package_registry_info_entries:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
"""This module contains the implementation of the GitHub Actions vulnerabilities check."""
5+
6+
import logging
7+
import os
8+
9+
from sqlalchemy import ForeignKey, String
10+
from sqlalchemy.orm import Mapped, mapped_column
11+
12+
from macaron.database.db_custom_types import DBJsonList
13+
from macaron.database.table_definitions import CheckFacts
14+
from macaron.errors import APIAccessError
15+
from macaron.json_tools import json_extract
16+
from macaron.slsa_analyzer.analyze_context import AnalyzeContext
17+
from macaron.slsa_analyzer.checks.base_check import BaseCheck, CheckResultType
18+
from macaron.slsa_analyzer.checks.check_result import CheckResultData, Confidence, JustificationType
19+
from macaron.slsa_analyzer.ci_service.github_actions.analyzer import GitHubWorkflowNode, GitHubWorkflowType
20+
from macaron.slsa_analyzer.package_registry.osv_dev import OSVDevService
21+
from macaron.slsa_analyzer.registry import registry
22+
from macaron.slsa_analyzer.slsa_req import ReqName
23+
24+
logger: logging.Logger = logging.getLogger(__name__)
25+
26+
27+
class GitHubActionsVulnsFacts(CheckFacts):
28+
"""The ORM mapping for justifications in the GitHub Actions vulnerabilities check."""
29+
30+
__tablename__ = "_github_actions_vulnerabilities_check"
31+
32+
#: The primary key.
33+
id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True) # noqa: A003
34+
35+
#: The list of vulnerability URLs.
36+
vulnerability_urls: Mapped[list[str]] = mapped_column(
37+
DBJsonList, nullable=False, info={"justification": JustificationType.TEXT}
38+
)
39+
40+
#: The GitHub Action Identifier.
41+
github_actions_id: Mapped[str] = mapped_column(
42+
String, nullable=False, info={"justification": JustificationType.TEXT}
43+
)
44+
45+
#: The GitHub Action version.
46+
github_actions_version: Mapped[str] = mapped_column(
47+
String, nullable=False, info={"justification": JustificationType.TEXT}
48+
)
49+
50+
#: The GitHub Action workflow that calls the vulnerable GitHub Action.
51+
caller_workflow: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.HREF})
52+
53+
__mapper_args__ = {
54+
"polymorphic_identity": "_github_actions_vulnerabilities_check",
55+
}
56+
57+
58+
class GitHubActionsVulnsCheck(BaseCheck):
59+
"""This Check checks whether the GitHub Actions called from the corresponding repo have known vulnerabilities.
60+
61+
Note: This check analyzes the direct GitHub Actions dependencies only.
62+
TODO: Check GitHub Actions dependencies recursively.
63+
"""
64+
65+
def __init__(self) -> None:
66+
"""Initialize instance."""
67+
check_id = "mcn_githubactions_vulnerabilities_1"
68+
description = "Check whether the GitHub Actions called from the corresponding repo have known vulnerabilities.."
69+
depends_on: list[tuple[str, CheckResultType]] = [("mcn_version_control_system_1", CheckResultType.PASSED)]
70+
eval_reqs = [ReqName.SECURITY]
71+
super().__init__(check_id=check_id, description=description, depends_on=depends_on, eval_reqs=eval_reqs)
72+
73+
def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
74+
"""Implement the check in this method.
75+
76+
Parameters
77+
----------
78+
ctx : AnalyzeContext
79+
The object containing processed data for the target repo.
80+
81+
Returns
82+
-------
83+
CheckResultData
84+
The result of the check.
85+
"""
86+
result_tables: list[CheckFacts] = []
87+
88+
ci_services = ctx.dynamic_data["ci_services"]
89+
90+
external_workflows: dict[str, list] = {}
91+
for ci_info in ci_services:
92+
for callee in ci_info["callgraph"].bfs():
93+
if isinstance(callee, GitHubWorkflowNode) and callee.node_type in [
94+
GitHubWorkflowType.EXTERNAL,
95+
GitHubWorkflowType.REUSABLE,
96+
]:
97+
if "@" in callee.name:
98+
workflow_name, workflow_version = callee.name.split("@")
99+
else:
100+
# Most likely we have encountered an internal reusable workflow, which
101+
# can be skipped.
102+
logger.debug("GitHub Actions workflow %s misses a version. Skipping...", callee.name)
103+
continue
104+
105+
caller_path = callee.caller.source_path if callee.caller else None
106+
107+
if not workflow_name:
108+
logger.debug("Workflow %s is not relevant. Skipping...", callee.name)
109+
continue
110+
111+
ext_workflow: list = external_workflows.get(workflow_name, [])
112+
ext_workflow.append(
113+
{
114+
"version": workflow_version,
115+
"caller_path": ci_info["service"].api_client.get_file_link(
116+
ctx.component.repository.full_name,
117+
ctx.component.repository.commit_sha,
118+
file_path=(
119+
ci_info["service"].api_client.get_relative_path_of_workflow(
120+
os.path.basename(caller_path)
121+
)
122+
if caller_path
123+
else ""
124+
),
125+
),
126+
}
127+
)
128+
external_workflows[workflow_name] = ext_workflow
129+
130+
# We first send a batch query to see which GitHub Actions are potentially vulnerable.
131+
# OSV's querybatch returns minimal results but this allows us to only make subsequent
132+
# queries to get vulnerability details when needed.
133+
batch_query = [{"name": k, "ecosystem": "GitHub Actions"} for k, _ in external_workflows.items()]
134+
batch_vulns = []
135+
try:
136+
batch_vulns = OSVDevService.get_vulnerabilities_package_name_batch(batch_query)
137+
except APIAccessError as error:
138+
logger.debug(error)
139+
140+
for vuln_res in batch_vulns:
141+
vulns: list = []
142+
workflow_name = vuln_res["name"]
143+
try:
144+
vulns = OSVDevService.get_vulnerabilities_package_name(ecosystem="GitHub Actions", name=workflow_name)
145+
except APIAccessError as error:
146+
logger.debug(error)
147+
continue
148+
for workflow_inv in external_workflows[workflow_name]:
149+
vuln_mapping = []
150+
for vuln in vulns:
151+
if v_id := json_extract(vuln, ["id"], str):
152+
try:
153+
if OSVDevService.is_version_affected(
154+
vuln, workflow_name, workflow_inv["version"], "GitHub Actions"
155+
):
156+
vuln_mapping.append(f"https://osv.dev/vulnerability/{v_id}")
157+
except APIAccessError as error:
158+
logger.debug(error)
159+
if vuln_mapping:
160+
result_tables.append(
161+
GitHubActionsVulnsFacts(
162+
vulnerability_urls=vuln_mapping,
163+
github_actions_id=workflow_name,
164+
github_actions_version=workflow_inv["version"],
165+
caller_workflow=workflow_inv["caller_path"],
166+
confidence=Confidence.HIGH,
167+
)
168+
)
169+
170+
if result_tables:
171+
return CheckResultData(
172+
result_tables=result_tables,
173+
result_type=CheckResultType.FAILED,
174+
)
175+
176+
return CheckResultData(
177+
result_tables=[],
178+
result_type=CheckResultType.PASSED,
179+
)
180+
181+
182+
registry.register(GitHubActionsVulnsCheck())

0 commit comments

Comments
 (0)