Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ local
*.rdb
*.aof
.vscode
.ipynb_checkpoints

# This is only created when packaging for external redistribution
/thirdparty/
12 changes: 12 additions & 0 deletions scanpipe/pipelines/deploy_to_develop.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ def steps(cls):
cls.map_javascript,
cls.map_javascript_symbols,
cls.map_elf,
cls.map_macho,
cls.map_winpe,
cls.map_go,
cls.map_rust,
cls.match_directories_to_purldb,
Expand Down Expand Up @@ -208,6 +210,16 @@ def map_elf(self):
d2d.map_elfs_with_dwarf_paths(project=self.project, logger=self.log)
d2d.map_elfs_binaries_with_symbols(project=self.project, logger=self.log)

@optional_step("MacOS")
def map_macho(self):
"""Map mach0 binaries to their sources using symbols."""
d2d.map_macho_binaries_with_symbols(project=self.project, logger=self.log)

@optional_step("Windows")
def map_winpe(self):
"""Map winpe binaries to their sources using symbols."""
d2d.map_winpe_binaries_with_symbols(project=self.project, logger=self.log)

@optional_step("Go")
def map_go(self):
"""Map Go binaries to their sources using paths."""
Expand Down
51 changes: 50 additions & 1 deletion scanpipe/pipes/d2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
from django.db.models.functions import Concat
from django.template.defaultfilters import pluralize

from binary_inspector.binary import collect_and_parse_macho_symbols
from binary_inspector.binary import collect_and_parse_winpe_symbols
from commoncode.paths import common_prefix
from elf_inspector.binary import collect_and_parse_elf_symbols
from elf_inspector.dwarf import get_dwarf_paths
Expand Down Expand Up @@ -1942,7 +1944,7 @@ def map_elfs_binaries_with_symbols(project, logger=None):
project.codebaseresources.files().to_codebase().has_no_relation().elfs()
)

# Collect source symbols from rust source files
# Collect source symbols from elf related source files
elf_from_resources = from_resources.filter(extension__in=[".c", ".cpp", ".h"])

map_binaries_with_symbols(
Expand All @@ -1955,6 +1957,53 @@ def map_elfs_binaries_with_symbols(project, logger=None):
)


def map_macho_binaries_with_symbols(project, logger=None):
"""Map macho binaries to their source using symbols in ``project``."""
from_resources = project.codebaseresources.files().from_codebase()
macho_binaries = (
project.codebaseresources.files()
.to_codebase()
.has_no_relation()
.macho_binaries()
)

# Collect source symbols from macos related source files
mac_from_resources = from_resources.filter(
extension__in=[".c", ".cpp", ".h", ".m", ".swift"]
)

map_binaries_with_symbols(
project=project,
from_resources=mac_from_resources,
to_resources=macho_binaries,
binary_symbols_func=collect_and_parse_macho_symbols,
map_type="macho_symbols",
logger=logger,
)


def map_winpe_binaries_with_symbols(project, logger=None):
"""Map winpe binaries to their source using symbols in ``project``."""
from_resources = project.codebaseresources.files().from_codebase()
winexe_binaries = (
project.codebaseresources.files().to_codebase().has_no_relation().win_exes()
)

# Collect source symbols from windows related source files
windows_from_resources = from_resources.filter(
extension__in=[".c", ".cpp", ".h", ".cs"]
)

map_binaries_with_symbols(
project=project,
from_resources=windows_from_resources,
to_resources=winexe_binaries,
binary_symbols_func=collect_and_parse_winpe_symbols,
map_type="winpe_symbols",
logger=logger,
)


def map_binaries_with_symbols(
project,
from_resources,
Expand Down
18 changes: 18 additions & 0 deletions scanpipe/pipes/symbolmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
SMALL_FILE_SYMBOLS_THRESHOLD = 20
MATCHING_RATIO_RUST_SMALL_FILE = 0.4
MATCHING_RATIO_ELF = 0.05
MATCHING_RATIO_MACHO = 0.15
MATCHING_RATIO_WINPE = 0.15
MATCHING_RATIO_JAVASCRIPT = 0.7
SMALL_FILE_SYMBOLS_THRESHOLD_JAVASCRIPT = 30
MATCHING_RATIO_JAVASCRIPT_SMALL_FILE = 0.5
Expand Down Expand Up @@ -209,6 +211,22 @@ def match_source_symbols_to_binary(source_symbols, binary_symbols, map_type):
return True, stats
else:
return False, stats
elif map_type == "macho_symbols":
if (
matched_symbols_ratio > MATCHING_RATIO_MACHO
or matched_symbols_unique_ratio > MATCHING_RATIO_MACHO
):
return True, stats
else:
return False, stats
elif map_type == "winpe_symbols":
if (
matched_symbols_ratio > MATCHING_RATIO_WINPE
or matched_symbols_unique_ratio > MATCHING_RATIO_WINPE
):
return True, stats
else:
return False, stats


def match_source_paths_to_binary(
Expand Down
Binary file added scanpipe/tests/data/d2d-macho/from-lumen.zip
Binary file not shown.
Binary file added scanpipe/tests/data/d2d-macho/to-lumen.zip
Binary file not shown.
Binary file not shown.
Binary file added scanpipe/tests/data/d2d-winpe/to-translucent.zip
Binary file not shown.
62 changes: 62 additions & 0 deletions scanpipe/tests/pipes/test_d2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -1599,6 +1599,68 @@ def test_scanpipe_pipes_d2d_map_elf_symbols(self):
).count(),
)

@skipIf(sys.platform == "darwin", "Test is failing on macOS")
def test_scanpipe_pipes_d2d_map_macho_symbols(self):
input_dir = self.project1.input_path
input_resources = [
self.data / "d2d-macho/from-lumen.zip",
self.data / "d2d-macho/to-lumen.zip",
]
copy_inputs(input_resources, input_dir)
self.from_files, self.to_files = d2d.get_inputs(self.project1)
inputs_with_codebase_path_destination = [
(self.from_files, self.project1.codebase_path / d2d.FROM),
(self.to_files, self.project1.codebase_path / d2d.TO),
]
for input_files, codebase_path in inputs_with_codebase_path_destination:
for input_file_path in input_files:
scancode.extract_archive(input_file_path, codebase_path)

scancode.extract_archives(
self.project1.codebase_path,
recurse=True,
)
pipes.collect_and_create_codebase_resources(self.project1)
buffer = io.StringIO()
d2d.map_macho_binaries_with_symbols(project=self.project1, logger=buffer.write)
self.assertEqual(
9,
CodebaseRelation.objects.filter(
project=self.project1, map_type="macho_symbols"
).count(),
)

@skipIf(sys.platform == "darwin", "Test is failing on macOS")
def test_scanpipe_pipes_d2d_map_winpe_symbols(self):
input_dir = self.project1.input_path
input_resources = [
self.data / "d2d-winpe/to-translucent.zip",
self.data / "d2d-winpe/from-translucent.zip",
]
copy_inputs(input_resources, input_dir)
self.from_files, self.to_files = d2d.get_inputs(self.project1)
inputs_with_codebase_path_destination = [
(self.from_files, self.project1.codebase_path / d2d.FROM),
(self.to_files, self.project1.codebase_path / d2d.TO),
]
for input_files, codebase_path in inputs_with_codebase_path_destination:
for input_file_path in input_files:
scancode.extract_archive(input_file_path, codebase_path)

scancode.extract_archives(
self.project1.codebase_path,
recurse=True,
)
pipes.collect_and_create_codebase_resources(self.project1)
buffer = io.StringIO()
d2d.map_winpe_binaries_with_symbols(project=self.project1, logger=buffer.write)
self.assertEqual(
4,
CodebaseRelation.objects.filter(
project=self.project1, map_type="winpe_symbols"
).count(),
)

@mock.patch("scanpipe.pipes.purldb.match_resources")
def test_scanpipe_pipes_d2d_match_purldb_resource_no_package_data(
self, mock_match_resource
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ install_requires =
elf-inspector==0.0.3
go-inspector==0.5.0
rust-inspector==0.1.0
binary-inspector==0.1.2
python-inspector==0.14.0
source-inspector==0.6.1; sys_platform != "darwin" and platform_machine != "arm64"
aboutcode-toolkit==11.1.1
Expand Down