Skip to content

Commit 9d41ad3

Browse files
Improve Go/Rust/Elf/Mach0 binary symbol mapping (#1810)
* Improve Go/Rust/Elf/Mach0 binary symbol mapping Reference: #1707 Signed-off-by: Ayan Sinha Mahapatra <[email protected]> * Add unittests for new d2d modules Signed-off-by: Ayan Sinha Mahapatra <[email protected]> --------- Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent ebe9058 commit 9d41ad3

File tree

6 files changed

+235
-28
lines changed

6 files changed

+235
-28
lines changed

scanpipe/pipelines/deploy_to_develop.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ def steps(cls):
7676
cls.map_javascript,
7777
cls.map_javascript_symbols,
7878
cls.map_javascript_strings,
79+
cls.get_symbols_from_binaries,
7980
cls.map_elf,
8081
cls.map_macho,
8182
cls.map_winpe,
@@ -197,6 +198,14 @@ def map_javascript_strings(self):
197198
"""Map deployed JavaScript, TypeScript to its sources using string literals."""
198199
d2d.map_javascript_strings(project=self.project, logger=self.log)
199200

201+
def get_symbols_from_binaries(self):
202+
"""Extract symbols from Elf, Mach0 and windows binaries for mapping."""
203+
d2d.extract_binary_symbols(
204+
project=self.project,
205+
options=self.selected_groups,
206+
logger=self.log,
207+
)
208+
200209
@optional_step("Elf")
201210
def map_elf(self):
202211
"""Map ELF binaries to their sources using dwarf paths and symbols."""
@@ -215,8 +224,9 @@ def map_winpe(self):
215224

216225
@optional_step("Go")
217226
def map_go(self):
218-
"""Map Go binaries to their sources using paths."""
227+
"""Map Go binaries to their sources using paths and symbols."""
219228
d2d.map_go_paths(project=self.project, logger=self.log)
229+
d2d.map_go_binaries_with_symbols(project=self.project, logger=self.log)
220230

221231
@optional_step("Rust")
222232
def map_rust(self):

scanpipe/pipes/d2d.py

Lines changed: 107 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1930,6 +1930,53 @@ def map_go_paths(project, logger=None):
19301930
)
19311931

19321932

1933+
RUST_BINARY_OPTIONS = ["Rust"]
1934+
ELF_BINARY_OPTIONS = ["Python", "Go", "Elf"]
1935+
MACHO_BINARY_OPTIONS = ["Rust", "Go", "MacOS"]
1936+
WINPE_BINARY_OPTIONS = ["Windows"]
1937+
1938+
1939+
def extract_binary_symbols(project, options, logger=None):
1940+
"""
1941+
Extract binary symbols for all Elf, Mach0 and Winpe binaries
1942+
found in the ``project`` resources, based on selected
1943+
ecosystem ``options`` so that these symbols can be mapped to
1944+
extracted source symbols.
1945+
"""
1946+
to_resources = project.codebaseresources.files().to_codebase().has_no_relation()
1947+
if any([option in ELF_BINARY_OPTIONS for option in options]):
1948+
to_binaries = to_resources.elfs()
1949+
extract_binary_symbols_from_resources(
1950+
resources=to_binaries,
1951+
binary_symbols_func=collect_and_parse_elf_symbols,
1952+
logger=logger,
1953+
)
1954+
1955+
if any([option in RUST_BINARY_OPTIONS for option in options]):
1956+
to_binaries = to_resources.executable_binaries()
1957+
extract_binary_symbols_from_resources(
1958+
resources=to_binaries,
1959+
binary_symbols_func=collect_and_parse_rust_symbols,
1960+
logger=logger,
1961+
)
1962+
1963+
if any([option in MACHO_BINARY_OPTIONS for option in options]):
1964+
to_binaries = to_resources.macho_binaries()
1965+
extract_binary_symbols_from_resources(
1966+
resources=to_binaries,
1967+
binary_symbols_func=collect_and_parse_macho_symbols,
1968+
logger=logger,
1969+
)
1970+
1971+
if any([option in WINPE_BINARY_OPTIONS for option in options]):
1972+
to_binaries = to_resources.win_exes()
1973+
extract_binary_symbols_from_resources(
1974+
resources=to_binaries,
1975+
binary_symbols_func=collect_and_parse_winpe_symbols,
1976+
logger=logger,
1977+
)
1978+
1979+
19331980
def map_rust_binaries_with_symbols(project, logger=None):
19341981
"""Map Rust binaries to their source using symbols in ``project``."""
19351982
from_resources = project.codebaseresources.files().from_codebase()
@@ -1950,8 +1997,32 @@ def map_rust_binaries_with_symbols(project, logger=None):
19501997
project=project,
19511998
from_resources=rust_from_resources,
19521999
to_resources=to_binaries,
1953-
binary_symbols_func=collect_and_parse_rust_symbols,
1954-
map_type="rust_symbols",
2000+
map_types=["rust_symbols", "elf_symbols", "macho_symbols"],
2001+
logger=logger,
2002+
)
2003+
2004+
2005+
def map_go_binaries_with_symbols(project, logger=None):
2006+
"""Map Go binaries to their source using symbols in ``project``."""
2007+
from_resources = project.codebaseresources.files().from_codebase()
2008+
to_binaries = (
2009+
project.codebaseresources.files()
2010+
.to_codebase()
2011+
.has_no_relation()
2012+
.executable_binaries()
2013+
)
2014+
2015+
# Collect source symbols from rust source files
2016+
go_config = d2d_config.get_ecosystem_config(ecosystem="Go")
2017+
go_from_resources = from_resources.filter(
2018+
extension__in=go_config.source_symbol_extensions
2019+
)
2020+
2021+
map_binaries_with_symbols(
2022+
project=project,
2023+
from_resources=go_from_resources,
2024+
to_resources=to_binaries,
2025+
map_types=["elf_symbols", "macho_symbols"],
19552026
logger=logger,
19562027
)
19572028

@@ -1973,8 +2044,7 @@ def map_elfs_binaries_with_symbols(project, logger=None):
19732044
project=project,
19742045
from_resources=elf_from_resources,
19752046
to_resources=elf_binaries,
1976-
binary_symbols_func=collect_and_parse_elf_symbols,
1977-
map_type="elf_symbols",
2047+
map_types=["elf_symbols"],
19782048
logger=logger,
19792049
)
19802050

@@ -1999,8 +2069,7 @@ def map_macho_binaries_with_symbols(project, logger=None):
19992069
project=project,
20002070
from_resources=mac_from_resources,
20012071
to_resources=macho_binaries,
2002-
binary_symbols_func=collect_and_parse_macho_symbols,
2003-
map_type="macho_symbols",
2072+
map_types=["macho_symbols"],
20042073
logger=logger,
20052074
)
20062075

@@ -2022,18 +2091,29 @@ def map_winpe_binaries_with_symbols(project, logger=None):
20222091
project=project,
20232092
from_resources=windows_from_resources,
20242093
to_resources=winexe_binaries,
2025-
binary_symbols_func=collect_and_parse_winpe_symbols,
2026-
map_type="winpe_symbols",
2094+
map_types=["winpe_symbols"],
20272095
logger=logger,
20282096
)
20292097

20302098

2099+
def get_binary_symbols(resource, map_types):
2100+
"""
2101+
Return the map_type and binary symbols from `resource` for different kind of
2102+
binary `map_types`.
2103+
"""
2104+
for map_type in map_types:
2105+
symbols = resource.extra_data.get(map_type)
2106+
if symbols:
2107+
return map_type, symbols
2108+
2109+
return None, []
2110+
2111+
20312112
def map_binaries_with_symbols(
20322113
project,
20332114
from_resources,
20342115
to_resources,
2035-
binary_symbols_func,
2036-
map_type,
2116+
map_types,
20372117
logger=None,
20382118
):
20392119
"""Map Binaries to their source using symbols in ``project``."""
@@ -2043,14 +2123,6 @@ def map_binaries_with_symbols(
20432123
project_files=from_resources,
20442124
)
20452125

2046-
# Collect binary symbols from rust binaries
2047-
for resource in to_resources:
2048-
try:
2049-
binary_symbols = binary_symbols_func(resource.location)
2050-
resource.update_extra_data(binary_symbols)
2051-
except Exception as e:
2052-
logger(f"Error parsing binary symbols at: {resource.location_path!r} {e!r}")
2053-
20542126
if logger:
20552127
logger(
20562128
f"Mapping {to_resources.count():,d} to/ resources using symbols "
@@ -2060,7 +2132,10 @@ def map_binaries_with_symbols(
20602132
resource_iterator = to_resources.iterator(chunk_size=2000)
20612133
progress = LoopProgress(to_resources.count(), logger)
20622134
for to_resource in progress.iter(resource_iterator):
2063-
binary_symbols = to_resource.extra_data.get(map_type)
2135+
map_type, binary_symbols = get_binary_symbols(
2136+
resource=to_resource,
2137+
map_types=map_types,
2138+
)
20642139
if not binary_symbols:
20652140
continue
20662141

@@ -2077,6 +2152,19 @@ def map_binaries_with_symbols(
20772152
)
20782153

20792154

2155+
def extract_binary_symbols_from_resources(resources, binary_symbols_func, logger):
2156+
"""
2157+
Extract binary symbols from ``resources`` using the ecosystem specific
2158+
symbol extractor function ``binary_symbols_func``.
2159+
"""
2160+
for resource in resources:
2161+
try:
2162+
binary_symbols = binary_symbols_func(resource.location)
2163+
resource.update_extra_data(binary_symbols)
2164+
except Exception as e:
2165+
logger(f"Error parsing binary symbols at: {resource.location_path!r} {e!r}")
2166+
2167+
20802168
def map_javascript_symbols(project, logger=None):
20812169
"""Map deployed JavaScript, TypeScript to its sources using symbols."""
20822170
project_files = project.codebaseresources.files()
@@ -2270,14 +2358,6 @@ def map_python_pyx_to_binaries(project, logger=None):
22702358
project.codebaseresources.files().to_codebase().has_no_relation().elfs()
22712359
)
22722360

2273-
# Collect binary symbols from binaries
2274-
for resource in to_resources:
2275-
try:
2276-
binary_symbols = collect_and_parse_elf_symbols(resource.location)
2277-
resource.update_extra_data(binary_symbols)
2278-
except Exception as e:
2279-
logger(f"Error parsing binary symbols at: {resource.location_path!r} {e!r}")
2280-
22812361
for resource in from_resources:
22822362
# Open Cython source file, create AST, parse it for function definitions
22832363
# and save them in a list

scanpipe/pipes/d2d_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ class EcosystemConfig:
107107
"Go": EcosystemConfig(
108108
ecosystem_option="Go",
109109
matchable_resource_extensions=[".go"],
110+
source_symbol_extensions=[".go"],
110111
),
111112
"Rust": EcosystemConfig(
112113
ecosystem_option="Rust",
1004 Bytes
Binary file not shown.
303 KB
Binary file not shown.

0 commit comments

Comments
 (0)