Skip to content

Commit e039fb7

Browse files
committed
Added helper function for converting gfm output
1 parent 2cf4b79 commit e039fb7

File tree

7 files changed

+207
-48
lines changed

7 files changed

+207
-48
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ build-backend = "hatchling.build"
66

77
[project]
88
name = "socketsecurity"
9-
version = "2.1.3"
9+
version = "2.1.4"
1010
requires-python = ">= 3.10"
1111
license = {"file" = "LICENSE"}
1212
dependencies = [

socketsecurity/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
__author__ = 'socket.dev'
2-
__version__ = '2.1.3'
2+
__version__ = '2.1.4'

socketsecurity/core/__init__.py

Lines changed: 68 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ def create_packages_dict(self, sbom_artifacts: list[SocketArtifact]) -> dict[str
425425
packages = {}
426426
top_level_count = {}
427427
for artifact in sbom_artifacts:
428-
package = Package.from_socket_artifact(asdict(artifact))
428+
package = Package.from_diff_artifact(artifact.__dict__)
429429
if package.id in packages:
430430
print("Duplicate package?")
431431
else:
@@ -534,22 +534,44 @@ def update_package_values(pkg: Package) -> Package:
534534
pkg.url += f"/{pkg.name}/overview/{pkg.version}"
535535
return pkg
536536

537-
def get_added_and_removed_packages(self, head_full_scan_id: str, new_full_scan_id: str) -> Tuple[Dict[str, Package], Dict[str, Package]]:
537+
def get_added_and_removed_packages(
538+
self,
539+
head_full_scan_id: str,
540+
new_full_scan_id: str,
541+
merge: bool = False,
542+
external_href: str = None,
543+
) -> Tuple[Dict[str, Package], Dict[str, Package], str, str]:
538544
"""
539545
Get packages that were added and removed between scans.
540546
541547
Args:
542-
head_full_scan: Previous scan (may be None if first scan)
543-
head_full_scan_id: New scan just created
544-
548+
head_full_scan_id: Previous scan
549+
new_full_scan_id: New scan just created
550+
merge: Whether the scan is merged into the default branch
551+
external_href: External reference
545552
Returns:
546553
Tuple of (added_packages, removed_packages) dictionaries
547554
"""
548555

549556
log.info(f"Comparing scans - Head scan ID: {head_full_scan_id}, New scan ID: {new_full_scan_id}")
550557
diff_start = time.time()
551558
try:
552-
diff_report = self.sdk.fullscans.stream_diff(self.config.org_slug, head_full_scan_id, new_full_scan_id, use_types=True).data
559+
params = {
560+
"before": head_full_scan_id,
561+
"after": new_full_scan_id,
562+
"description": f"Diff scan between head {head_full_scan_id} and new {new_full_scan_id} scans",
563+
"merge": merge,
564+
}
565+
if external_href:
566+
params["external_href"] = external_href
567+
new_diff_scan = self.sdk.diffscans.create_from_ids(self.config.org_slug, params)
568+
data = new_diff_scan.get("diff_scan", {})
569+
diff_scan_id = data.get("id")
570+
if not diff_scan_id:
571+
log.error(f"Failed to get diff scan ID for {new_full_scan_id}")
572+
log.error(new_diff_scan)
573+
sys.exit(1)
574+
diff_report = self.sdk.diffscans.get(self.config.org_slug, diff_scan_id)
553575
except APIFailure as e:
554576
log.error(f"API Error: {e}")
555577
sys.exit(1)
@@ -559,44 +581,63 @@ def get_added_and_removed_packages(self, head_full_scan_id: str, new_full_scan_i
559581
log.error(f"Stack trace:\n{traceback.format_exc()}")
560582
raise
561583

584+
diff_data = diff_report.get("diff_scan", {})
562585
diff_end = time.time()
586+
diff_url = diff_data.get("html_url")
587+
after_data = diff_data.get("after_full_scan")
588+
if after_data:
589+
new_full_scan_url = after_data.get("html_url")
590+
else:
591+
new_full_scan_url = ""
592+
artifacts = diff_data.get("artifacts", {})
593+
added = artifacts.get("added", [])
594+
removed = artifacts.get("removed", [])
595+
unchanged = artifacts.get("unchanged", [])
596+
replaced = artifacts.get("replaced", [])
597+
updated = artifacts.get("updated", [])
563598
log.info(f"Diff Report Gathered in {diff_end - diff_start:.2f} seconds")
564599
log.info("Diff report artifact counts:")
565-
log.info(f"Added: {len(diff_report.artifacts.added)}")
566-
log.info(f"Removed: {len(diff_report.artifacts.removed)}")
567-
log.info(f"Unchanged: {len(diff_report.artifacts.unchanged)}")
568-
log.info(f"Replaced: {len(diff_report.artifacts.replaced)}")
569-
log.info(f"Updated: {len(diff_report.artifacts.updated)}")
600+
log.info(f"Added: {len(added)}")
601+
log.info(f"Removed: {len(removed)}")
602+
log.info(f"Unchanged: {len(unchanged)}")
603+
log.info(f"Replaced: {len(replaced)}")
604+
log.info(f"Updated: {len(updated)}")
570605

571-
added_artifacts = diff_report.artifacts.added + diff_report.artifacts.updated
572-
removed_artifacts = diff_report.artifacts.removed + diff_report.artifacts.replaced
606+
added_artifacts = added + updated
607+
removed_artifacts = removed
573608

574609
added_packages: Dict[str, Package] = {}
575610
removed_packages: Dict[str, Package] = {}
576611

577612
for artifact in added_artifacts:
613+
artifact_id = artifact.get("id")
614+
artifact_name = artifact.get("name")
615+
artifact_version = artifact.get("version")
578616
try:
579-
pkg = Package.from_diff_artifact(asdict(artifact))
617+
pkg = Package.from_diff_artifact(artifact)
580618
pkg = Core.update_package_values(pkg)
581-
added_packages[artifact.id] = pkg
619+
added_packages[pkg.id] = pkg
582620
except KeyError:
583-
log.error(f"KeyError: Could not create package from added artifact {artifact.id}")
584-
log.error(f"Artifact details - name: {artifact.name}, version: {artifact.version}")
621+
log.error(f"KeyError: Could not create package from added artifact {artifact_id}")
622+
log.error(f"Artifact details - name: {artifact_name}, version: {artifact_version}")
585623
log.error("No matching packages found in new_full_scan")
586624

587625
for artifact in removed_artifacts:
626+
artifact_id = artifact.get("id")
627+
artifact_name = artifact.get("name")
628+
artifact_version = artifact.get("version")
588629
try:
589-
pkg = Package.from_diff_artifact(asdict(artifact))
630+
pkg = Package.from_diff_artifact(artifact)
590631
pkg = Core.update_package_values(pkg)
591632
if pkg.namespace:
592633
pkg.purl += f"{pkg.namespace}/{pkg.purl}"
593-
removed_packages[artifact.id] = pkg
634+
removed_packages[pkg.id] = pkg
594635
except KeyError:
595-
log.error(f"KeyError: Could not create package from removed artifact {artifact.id}")
596-
log.error(f"Artifact details - name: {artifact.name}, version: {artifact.version}")
636+
log.error(f"KeyError: Could not create package from removed artifact {artifact_id}")
637+
log.error(f"Artifact details - name: {artifact_name}, version: {artifact_version}")
597638
log.error("No matching packages found in head_full_scan")
598639

599-
return added_packages, removed_packages
640+
return added_packages, removed_packages, diff_url, new_full_scan_url
600641

601642
def create_new_diff(
602643
self,
@@ -642,7 +683,6 @@ def create_new_diff(
642683
try:
643684
new_scan_start = time.time()
644685
new_full_scan = self.create_full_scan(files_for_sending, params)
645-
new_full_scan.sbom_artifacts = self.get_sbom_data(new_full_scan.id)
646686
new_scan_end = time.time()
647687
log.info(f"Total time to create new full scan: {new_scan_end - new_scan_start:.2f}")
648688
except APIFailure as e:
@@ -654,26 +694,15 @@ def create_new_diff(
654694
log.error(f"Stack trace:\n{traceback.format_exc()}")
655695
raise
656696

657-
scans_ready = self.check_full_scans_status(head_full_scan_id, new_full_scan.id)
658-
if scans_ready is False:
659-
log.error(f"Full scans did not complete within {self.config.timeout} seconds")
660-
added_packages, removed_packages = self.get_added_and_removed_packages(head_full_scan_id, new_full_scan.id)
697+
added_packages, removed_packages, diff_url, report_url = self.get_added_and_removed_packages(
698+
head_full_scan_id,
699+
new_full_scan.id
700+
)
661701

662702
diff = self.create_diff_report(added_packages, removed_packages)
663-
664-
base_socket = "https://socket.dev/dashboard/org"
665703
diff.id = new_full_scan.id
666-
667-
report_url = f"{base_socket}/{self.config.org_slug}/sbom/{diff.id}"
668-
if not params.include_license_details:
669-
report_url += "?include_license_details=false"
670704
diff.report_url = report_url
671-
672-
if head_full_scan_id is not None:
673-
diff.diff_url = f"{base_socket}/{self.config.org_slug}/diff/{head_full_scan_id}/{diff.id}"
674-
else:
675-
diff.diff_url = diff.report_url
676-
705+
diff.diff_url = diff_url
677706
return diff
678707

679708
def create_diff_report(

socketsecurity/core/classes.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ class AlertCounts(TypedDict):
9797
low: int
9898

9999
@dataclass(kw_only=True)
100-
class Package(SocketArtifactLink):
100+
class Package():
101101
"""
102102
Represents a package detected in a Socket Security scan.
103103
@@ -106,16 +106,23 @@ class Package(SocketArtifactLink):
106106
"""
107107

108108
# Common properties from both artifact types
109-
id: str
109+
type: str
110110
name: str
111111
version: str
112-
type: str
112+
release: str
113+
diffType: str
114+
id: str
115+
author: List[str] = field(default_factory=list)
113116
score: SocketScore
114117
alerts: List[SocketAlert]
115-
author: List[str] = field(default_factory=list)
116118
size: Optional[int] = None
117119
license: Optional[str] = None
118120
namespace: Optional[str] = None
121+
topLevelAncestors: Optional[List[str]] = None
122+
direct: Optional[bool] = False
123+
manifestFiles: Optional[List[SocketManifestReference]] = None
124+
dependencies: Optional[List[str]] = None
125+
artifact: Optional[SocketArtifactLink] = None
119126

120127
# Package-specific fields
121128
license_text: str = ""
@@ -203,7 +210,9 @@ def from_diff_artifact(cls, data: dict) -> "Package":
203210
manifestFiles=ref.get("manifestFiles", []),
204211
dependencies=ref.get("dependencies"),
205212
artifact=ref.get("artifact"),
206-
namespace=data.get('namespace', None)
213+
namespace=data.get('namespace', None),
214+
release=ref.get("release", None),
215+
diffType=ref.get("diffType", None),
207216
)
208217

209218
class Issue:
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import markdown
2+
from bs4 import BeautifulSoup, NavigableString, Tag
3+
import string
4+
5+
6+
class Helper:
7+
@staticmethod
8+
def parse_gfm_section(html_content):
9+
"""
10+
Parse a GitHub-Flavored Markdown section containing a table and surrounding content.
11+
Returns a dict with "before_html", "columns", "rows_html", and "after_html".
12+
"""
13+
html = markdown.markdown(html_content, extensions=['extra'])
14+
soup = BeautifulSoup(html, "html.parser")
15+
16+
table = soup.find('table')
17+
if not table:
18+
# If no table, treat entire content as before_html
19+
return {"before_html": html, "columns": [], "rows_html": [], "after_html": ''}
20+
21+
# Collect HTML before the table
22+
before_parts = [str(elem) for elem in table.find_previous_siblings()]
23+
before_html = ''.join(reversed(before_parts))
24+
25+
# Collect HTML after the table
26+
after_parts = [str(elem) for elem in table.find_next_siblings()]
27+
after_html = ''.join(after_parts)
28+
29+
# Extract table headers
30+
headers = [th.get_text(strip=True) for th in table.find_all('th')]
31+
32+
# Extract table rows (skip header)
33+
rows_html = []
34+
for tr in table.find_all('tr')[1:]:
35+
cells = [str(td) for td in tr.find_all('td')]
36+
rows_html.append(cells)
37+
38+
return {
39+
"before_html": before_html,
40+
"columns": headers,
41+
"rows_html": rows_html,
42+
"after_html": after_html
43+
}
44+
45+
@staticmethod
46+
def parse_cell(html_td):
47+
"""Convert a table cell HTML into plain text or a dict for links/images."""
48+
soup = BeautifulSoup(html_td, "html.parser")
49+
a = soup.find('a')
50+
if a:
51+
cell = {"url": a.get('href', '')}
52+
img = a.find('img')
53+
if img:
54+
cell.update({
55+
"img_src": img.get('src', ''),
56+
"title": img.get('title', ''),
57+
"link_text": a.get_text(strip=True)
58+
})
59+
else:
60+
cell["link_text"] = a.get_text(strip=True)
61+
return cell
62+
return soup.get_text(strip=True)
63+
64+
@staticmethod
65+
def parse_html_parts(html_fragment):
66+
"""
67+
Convert an HTML fragment into a list of parts.
68+
Each part is either:
69+
- {"text": "..."}
70+
- {"link": "url", "text": "..."}
71+
- {"img_src": "url", "alt": "...", "title": "..."}
72+
"""
73+
soup = BeautifulSoup(html_fragment, 'html.parser')
74+
parts = []
75+
76+
def handle_element(elem):
77+
if isinstance(elem, NavigableString):
78+
text = str(elem).strip()
79+
if text and not all(ch in string.punctuation for ch in text):
80+
parts.append({"text": text})
81+
elif isinstance(elem, Tag):
82+
if elem.name == 'a':
83+
href = elem.get('href', '')
84+
txt = elem.get_text(strip=True)
85+
parts.append({"link": href, "text": txt})
86+
elif elem.name == 'img':
87+
parts.append({
88+
"img_src": elem.get('src', ''),
89+
"alt": elem.get('alt', ''),
90+
"title": elem.get('title', '')
91+
})
92+
else:
93+
# Recurse into children for nested tags
94+
for child in elem.children:
95+
handle_element(child)
96+
97+
for element in soup.contents:
98+
handle_element(element)
99+
100+
return parts
101+
102+
@staticmethod
103+
def section_to_json(section_result):
104+
"""
105+
Convert a parsed section into structured JSON.
106+
Returns {"before": [...], "table": [...], "after": [...]}.
107+
"""
108+
# Build JSON rows for the table
109+
table_rows = []
110+
cols = section_result.get('columns', [])
111+
for row_html in section_result.get('rows_html', []):
112+
cells = [Helper.parse_cell(cell_html) for cell_html in row_html]
113+
table_rows.append(dict(zip(cols, cells)))
114+
115+
return {
116+
"before": Helper.parse_html_parts(section_result.get('before_html', '')),
117+
"table": table_rows,
118+
"after": Helper.parse_html_parts(section_result.get('after_html', ''))
119+
}

socketsecurity/core/messages.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,8 @@ def create_security_comment_json(diff: Diff) -> dict:
292292
output = {
293293
"scan_failed": scan_failed,
294294
"new_alerts": [],
295-
"full_scan_id": diff.id
295+
"full_scan_id": diff.id,
296+
"diff_url": diff.diff_url
296297
}
297298
for alert in diff.new_alerts:
298299
alert: Issue

socketsecurity/output.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ def output_console_comments(self, diff_report: Diff, sbom_file_name: Optional[st
6666

6767
console_security_comment = Messages.create_console_security_alert_table(diff_report)
6868
self.logger.info("Security issues detected by Socket Security:")
69-
self.logger.info(console_security_comment)
69+
self.logger.info(f"Diff Url: {diff_report.diff_url}")
70+
self.logger.info(f"\n{console_security_comment}")
7071

7172
def output_console_json(self, diff_report: Diff, sbom_file_name: Optional[str] = None) -> None:
7273
"""Outputs JSON formatted results"""

0 commit comments

Comments
 (0)