feat: NoScan Changes for JSON output (#5294)

joydeep049 · web-flow · commit a5b2e22e9964 · 2025-08-20T12:32:39.000-07:00
Signed-off-by: joydeep049 &lt;bntripathy123@gmail.com&gt;
diff --git a/cve_bin_tool/cli.py b/cve_bin_tool/cli.py
@@ -1164,8 +1164,9 @@ def main(argv=None):
                         LOGGER.debug(f"Triage Data: {triage_data}")
                         parsed_data[product_info] = triage_data
 
-                    if not args["no_scan"]:
-                        cve_scanner.get_cves(product_info, triage_data)
+                    # Always call get_cves to collect component information
+                    # The method handles both normal and no-scan modes internally
+                    cve_scanner.get_cves(product_info, triage_data)
             total_files = version_scanner.total_scanned_files
             LOGGER.info(f"Total files: {total_files}")
 
diff --git a/cve_bin_tool/output_engine/__init__.py b/cve_bin_tool/output_engine/__init__.py
@@ -71,53 +71,84 @@ def output_csv(
     affected_versions: int = 0,
     metrics: bool = False,
     strip_scan_dir: bool = False,
+    no_scan: bool = False,
 ):
-    """Output a CSV of CVEs"""
-    formatted_output = format_output(
-        all_cve_data,
-        scanned_dir,
-        strip_scan_dir,
-        all_cve_version_info,
-        detailed,
-        affected_versions,
-        metrics,
-    )
+    """Output a CSV of CVEs or components (in no-scan mode)"""
+    if no_scan:
+        # In no-scan mode, output component information
+        fieldnames = [
+            "vendor",
+            "product",
+            "version",
+            "paths",
+            "scan_mode",
+        ]
+        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
+        writer.writeheader()
+
+        for product_info, component_data in all_cve_data.items():
+            if strip_scan_dir:
+                paths = ", ".join(
+                    [strip_path(path, scanned_dir) for path in component_data["paths"]]
+                )
+            else:
+                paths = ", ".join(component_data["paths"])
+
+            row = {
+                "vendor": product_info.vendor,
+                "product": product_info.product,
+                "version": product_info.version,
+                "paths": paths,
+                "scan_mode": "no-scan",
+            }
+            writer.writerow(row)
+    else:
+        # Normal CVE output
+        formatted_output = format_output(
+            all_cve_data,
+            scanned_dir,
+            strip_scan_dir,
+            all_cve_version_info,
+            detailed,
+            affected_versions,
+            metrics,
+        )
 
-    # Remove triage response and justification from the CSV output.
-    for cve_entry in formatted_output:
-        cve_entry.pop("response", None)
-        cve_entry.pop("justification", None)
-
-    # Trim any leading -, =, +, @, tab or CR to avoid excel macros
-    for cve_entry in formatted_output:
-        for key, value in cve_entry.items():
-            cve_entry[key] = value.strip("-=+@\t\r")
-
-    fieldnames = [
-        "vendor",
-        "product",
-        "version",
-        "cve_number",
-        "severity",
-        "score",
-        "source",
-        "cvss_version",
-        "cvss_vector",
-        "paths",
-        "remarks",
-        "comments",
-    ]
-    if metrics:
-        fieldnames.append("epss_probability")
-        fieldnames.append("epss_percentile")
-    if detailed:
-        fieldnames.append("description")
-    if affected_versions != 0:
-        fieldnames.append("affected_versions")
-    writer = csv.DictWriter(outfile, fieldnames=fieldnames)
-
-    writer.writeheader()
-    writer.writerows(formatted_output)
+        # Remove triage response and justification from the CSV output.
+        for cve_entry in formatted_output:
+            cve_entry.pop("response", None)
+            cve_entry.pop("justification", None)
+
+        # Trim any leading -, =, +, @, tab or CR to avoid excel macros
+        for cve_entry in formatted_output:
+            for key, value in cve_entry.items():
+                cve_entry[key] = value.strip("-=+@\t\r")
+
+        fieldnames = [
+            "vendor",
+            "product",
+            "version",
+            "cve_number",
+            "severity",
+            "score",
+            "source",
+            "cvss_version",
+            "cvss_vector",
+            "paths",
+            "remarks",
+            "comments",
+        ]
+        if metrics:
+            fieldnames.append("epss_probability")
+            fieldnames.append("epss_percentile")
+        if detailed:
+            fieldnames.append("description")
+        if affected_versions != 0:
+            fieldnames.append("affected_versions")
+        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
+
+        writer.writeheader()
+        writer.writerows(formatted_output)
 
 
 # load pdfs only if reportlab is found.  if not, make a stub that prints a
@@ -744,6 +775,7 @@ def output_cves(self, outfile, output_type="console"):
                 self.affected_versions,
                 self.metrics,
                 self.strip_scan_dir,
+                self.no_scan,
             )
         elif output_type == "json2":
             output_json2(
@@ -758,6 +790,7 @@ def output_cves(self, outfile, output_type="console"):
                 self.exploits,
                 self.metrics,
                 self.strip_scan_dir,
+                self.no_scan,
             )
         elif output_type == "csv":
             output_csv(
@@ -769,6 +802,7 @@ def output_cves(self, outfile, output_type="console"):
                 self.affected_versions,
                 self.metrics,
                 self.strip_scan_dir,
+                self.no_scan,
             )
         elif output_type == "pdf":
             output_pdf(
diff --git a/cve_bin_tool/output_engine/json_output.py b/cve_bin_tool/output_engine/json_output.py
@@ -8,7 +8,7 @@
 from typing import IO
 
 from cve_bin_tool.cvedb import CVEDB
-from cve_bin_tool.util import CVEData, ProductInfo, VersionInfo
+from cve_bin_tool.util import CVEData, ProductInfo, VersionInfo, strip_path
 from cve_bin_tool.version import VERSION
 
 from .util import format_output, get_cve_summary
@@ -54,6 +54,44 @@ def vulnerabilities_builder(
     return vulnerabilities
 
 
+def components_builder(
+    all_cve_data,
+    scanned_dir,
+    strip_scan_dir,
+):
+    """
+    Builds a dictionary of identified components for no-scan mode.
+    """
+    components = {}
+    components["summary"] = {
+        "total_components": len(all_cve_data),
+        "components_with_paths": sum(
+            1 for data in all_cve_data.values() if data["paths"]
+        ),
+    }
+
+    component_reports = []
+    for product_info, component_data in all_cve_data.items():
+        if strip_scan_dir:
+            paths = ", ".join(
+                [strip_path(path, scanned_dir) for path in component_data["paths"]]
+            )
+        else:
+            paths = ", ".join(component_data["paths"])
+
+        component_entry = {
+            "vendor": product_info.vendor,
+            "product": product_info.product,
+            "version": product_info.version,
+            "paths": paths,
+            "scan_mode": "no-scan",
+        }
+        component_reports.append(component_entry)
+
+    components["components"] = component_reports
+    return components
+
+
 def db_entries_count():
     """
     Retrieves the count of CVE entries from the database grouped by data source.
@@ -103,18 +141,36 @@ def output_json(
     affected_versions: int = 0,
     metrics: bool = False,
     strip_scan_dir: bool = False,
+    no_scan: bool = False,
 ):
-    """Output a JSON of CVEs"""
-    formatted_output = format_output(
-        all_cve_data,
-        scanned_dir,
-        strip_scan_dir,
-        all_cve_version_info,
-        detailed,
-        affected_versions,
-        metrics,
-    )
-    json.dump(formatted_output, outfile, indent=2)
+    """Output a JSON of CVEs or components (in no-scan mode)"""
+    if no_scan:
+        # In no-scan mode, output component information instead of CVE data
+        output = {}
+        output["scan_mode"] = "no-scan"
+        output["metadata"] = {
+            "tool": {"name": "cve-bin-tool", "version": f"{VERSION}"},
+            "generation_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+            "description": "Component identification scan without vulnerability assessment",
+        }
+        output["components"] = components_builder(
+            all_cve_data,
+            scanned_dir,
+            strip_scan_dir,
+        )
+        json.dump(output, outfile, indent=2)
+    else:
+        # Normal CVE output
+        formatted_output = format_output(
+            all_cve_data,
+            scanned_dir,
+            strip_scan_dir,
+            all_cve_version_info,
+            detailed,
+            affected_versions,
+            metrics,
+        )
+        json.dump(formatted_output, outfile, indent=2)
 
 
 def output_json2(
@@ -129,23 +185,41 @@ def output_json2(
     exploits: bool = False,
     metrics: bool = False,
     strip_scan_dir: bool = False,
+    no_scan: bool = False,
 ):
-    """Output a JSON of CVEs in JSON2 format"""
-    output = {}
-    output["$schema"] = ""
-    output["metadata"] = metadata_builder(organized_parameters)
-    output["database_info"] = {
-        "last_updated": time_of_last_update.strftime("%Y-%m-%d %H:%M:%S"),
-        "total_entries": db_entries_count(),
-    }
-    output["vulnerabilities"] = vulnerabilities_builder(
-        all_cve_data,
-        exploits,
-        all_cve_version_info,
-        scanned_dir,
-        detailed,
-        affected_versions,
-        metrics,
-        strip_scan_dir,
-    )
-    json.dump(output, outfile, indent=2)
+    """Output a JSON of CVEs in JSON2 format or components (in no-scan mode)"""
+    if no_scan:
+        # In no-scan mode, output component information in JSON2 format
+        output = {}
+        output["$schema"] = ""
+        output["metadata"] = metadata_builder(organized_parameters)
+        output["scan_mode"] = "no-scan"
+        output["description"] = (
+            "Component identification scan without vulnerability assessment"
+        )
+        output["components"] = components_builder(
+            all_cve_data,
+            scanned_dir,
+            strip_scan_dir,
+        )
+        json.dump(output, outfile, indent=2)
+    else:
+        # Normal CVE output in JSON2 format
+        output = {}
+        output["$schema"] = ""
+        output["metadata"] = metadata_builder(organized_parameters)
+        output["database_info"] = {
+            "last_updated": time_of_last_update.strftime("%Y-%m-%d %H:%M:%S"),
+            "total_entries": db_entries_count(),
+        }
+        output["vulnerabilities"] = vulnerabilities_builder(
+            all_cve_data,
+            exploits,
+            all_cve_version_info,
+            scanned_dir,
+            detailed,
+            affected_versions,
+            metrics,
+            strip_scan_dir,
+        )
+        json.dump(output, outfile, indent=2)
diff --git a/cve_bin_tool/version_scanner.py b/cve_bin_tool/version_scanner.py
@@ -262,7 +262,16 @@ def scan_file(self, filename: str) -> Iterator[ScanInfo]:
         # check if it's a Linux kernel image
         is_linux_kernel, output = self.is_linux_kernel(filename)
 
-        if not is_exec and not is_linux_kernel:
+        # In no-scan mode, also check if it's a language-specific file
+        is_language_file = False
+        if self.no_scan:
+            # Check if filename matches any language parser patterns
+            for pattern in valid_files.keys():
+                if pattern in filename:
+                    is_language_file = True
+                    break
+
+        if not is_exec and not is_linux_kernel and not is_language_file:
             return None
 
         # parse binary file's strings
@@ -279,6 +288,18 @@ def scan_file(self, filename: str) -> Iterator[ScanInfo]:
                 for scan_info in parse(filename, output, self.cve_db, self.logger):
                     yield ScanInfo(scan_info.product_info, "".join(self.file_stack))
 
+        # In no-scan mode, also try to parse language-specific files directly
+        if self.no_scan and is_language_file:
+            # Create a mock output string that includes the filename pattern
+            for pattern in valid_files.keys():
+                if pattern in filename:
+                    mock_output = f"mock: {pattern}"
+                    for scan_info in parse(
+                        filename, mock_output, self.cve_db, self.logger
+                    ):
+                        yield ScanInfo(scan_info.product_info, "".join(self.file_stack))
+                    break
+
         yield from self.run_checkers(filename, lines)
 
     def run_checkers(self, filename: str, lines: str) -> Iterator[ScanInfo]:
diff --git a/test/test_json_no_scan.py b/test/test_json_no_scan.py