Skip to content

Commit a12848b

Browse files
Improve source mapping for .py and .pyi files
Signed-off-by: Aryan-SINGH-GIT <[email protected]>
1 parent eb8d4bb commit a12848b

File tree

5 files changed

+231
-1
lines changed

5 files changed

+231
-1
lines changed

scanpipe/pipelines/deploy_to_develop.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@ def map_python(self):
295295
symbols.
296296
"""
297297
d2d.map_python_pyx_to_binaries(project=self.project, logger=self.log)
298+
d2d.map_python_protobuf_files(project=self.project, logger=self.log)
298299

299300
def match_directories_to_purldb(self):
300301
"""Match selected directories in PurlDB."""

scanpipe/pipes/d2d.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2372,3 +2372,60 @@ def map_python_pyx_to_binaries(project, logger=None):
23722372
to_resource=matching_elf,
23732373
map_type="python_pyx_match",
23742374
)
2375+
2376+
2377+
def map_python_protobuf_files(project, logger=None):
2378+
"""Map protobuf-generated .py/.pyi files to their source .proto files."""
2379+
from_resources = (
2380+
project.codebaseresources.files()
2381+
.from_codebase()
2382+
.filter(extension=".proto")
2383+
)
2384+
to_resources = (
2385+
project.codebaseresources.files()
2386+
.to_codebase()
2387+
.has_no_relation()
2388+
.filter(extension__in=[".py", ".pyi"])
2389+
)
2390+
2391+
to_resources_count = to_resources.count()
2392+
from_resources_count = from_resources.count()
2393+
2394+
if not from_resources_count:
2395+
return
2396+
2397+
if not to_resources_count:
2398+
return
2399+
2400+
proto_index = {}
2401+
for proto_resource in from_resources:
2402+
base_name = proto_resource.name.replace(".proto", "")
2403+
proto_index[base_name] = proto_resource
2404+
2405+
mapped_count = 0
2406+
for to_resource in to_resources:
2407+
base_name = _extract_protobuf_base_name(to_resource.name)
2408+
2409+
if base_name and base_name in proto_index:
2410+
from_resource = proto_index[base_name]
2411+
pipes.make_relation(
2412+
from_resource=from_resource,
2413+
to_resource=to_resource,
2414+
map_type="protobuf_mapping",
2415+
extra_data={"protobuf_base_name": base_name},
2416+
)
2417+
mapped_count += 1
2418+
2419+
2420+
def _extract_protobuf_base_name(filename):
2421+
"""Extract the base name from a protobuf-generated filename."""
2422+
import re
2423+
2424+
name_without_ext = filename.rsplit(".", 1)[0]
2425+
protobuf_pattern = r"^(.+)_pb[23]$"
2426+
match = re.match(protobuf_pattern, name_without_ext)
2427+
2428+
if match:
2429+
return match.group(1)
2430+
2431+
return None

scanpipe/pipes/d2d_config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,8 @@ class EcosystemConfig:
148148
),
149149
"Python": EcosystemConfig(
150150
ecosystem_option="Python",
151-
source_symbol_extensions=[".pyx", ".pxd"],
151+
source_symbol_extensions=[".pyx", ".pxd", ".py", ".pyi"],
152+
matchable_resource_extensions=[".py", ".pyi"],
152153
),
153154
}
154155

scanpipe/tests/pipes/test_d2d.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2155,3 +2155,107 @@ def test_scanpipe_d2d_load_ecosystem_config(self):
21552155
expected_extra_data = json.load(f)
21562156

21572157
self.assertEqual(expected_extra_data, asdict(pipeline.ecosystem_config))
2158+
2159+
def test_scanpipe_pipes_d2d_extract_protobuf_base_name(self):
2160+
"""Test the protobuf base name extraction function."""
2161+
test_cases = [
2162+
("command_request_pb2.py", "command_request"),
2163+
("connection_request_pb2.pyi", "connection_request"),
2164+
("response_pb2.py", "response"),
2165+
("user_pb3.py", "user"),
2166+
("data_pb2.pyi", "data"),
2167+
("regular_file.py", None),
2168+
("not_protobuf.pyi", None),
2169+
("pb2_standalone.py", None),
2170+
]
2171+
2172+
for filename, expected in test_cases:
2173+
with self.subTest(filename=filename):
2174+
result = d2d._extract_protobuf_base_name(filename)
2175+
self.assertEqual(expected, result)
2176+
2177+
def test_scanpipe_pipes_d2d_map_python_protobuf_files(self):
2178+
"""Test protobuf file mapping functionality."""
2179+
from1 = make_resource_file(
2180+
self.project1,
2181+
path="from/valkey_glide-2.0.1/glide-core/src/protobuf/command_request.proto",
2182+
)
2183+
from2 = make_resource_file(
2184+
self.project1,
2185+
path="from/valkey_glide-2.0.1/glide-core/src/protobuf/connection_request.proto",
2186+
)
2187+
from3 = make_resource_file(
2188+
self.project1,
2189+
path="from/valkey_glide-2.0.1/glide-core/src/protobuf/response.proto",
2190+
)
2191+
2192+
to1 = make_resource_file(
2193+
self.project1,
2194+
path="to/glide/protobuf/command_request_pb2.py",
2195+
)
2196+
to2 = make_resource_file(
2197+
self.project1,
2198+
path="to/glide/protobuf/command_request_pb2.pyi",
2199+
)
2200+
to3 = make_resource_file(
2201+
self.project1,
2202+
path="to/glide/protobuf/connection_request_pb2.py",
2203+
)
2204+
to4 = make_resource_file(
2205+
self.project1,
2206+
path="to/glide/protobuf/connection_request_pb2.pyi",
2207+
)
2208+
to5 = make_resource_file(
2209+
self.project1,
2210+
path="to/glide/protobuf/response_pb2.py",
2211+
)
2212+
to6 = make_resource_file(
2213+
self.project1,
2214+
path="to/glide/protobuf/response_pb2.pyi",
2215+
)
2216+
2217+
d2d.map_python_protobuf_files(self.project1)
2218+
2219+
relations = self.project1.codebaserelations.filter(map_type="protobuf_mapping")
2220+
self.assertEqual(6, relations.count())
2221+
2222+
expected_mappings = [
2223+
(from1, to1, "command_request"),
2224+
(from1, to2, "command_request"),
2225+
(from2, to3, "connection_request"),
2226+
(from2, to4, "connection_request"),
2227+
(from3, to5, "response"),
2228+
(from3, to6, "response"),
2229+
]
2230+
2231+
for from_resource, to_resource, expected_base_name in expected_mappings:
2232+
relation = relations.filter(
2233+
from_resource=from_resource,
2234+
to_resource=to_resource
2235+
).first()
2236+
self.assertIsNotNone(relation)
2237+
self.assertEqual(expected_base_name, relation.extra_data["protobuf_base_name"])
2238+
2239+
def test_scanpipe_pipes_d2d_map_python_protobuf_files_no_proto_files(self):
2240+
"""Test protobuf mapping when no .proto files exist."""
2241+
make_resource_file(
2242+
self.project1,
2243+
path="to/glide/protobuf/command_request_pb2.py",
2244+
)
2245+
2246+
d2d.map_python_protobuf_files(self.project1)
2247+
2248+
relations = self.project1.codebaserelations.filter(map_type="protobuf_mapping")
2249+
self.assertEqual(0, relations.count())
2250+
2251+
def test_scanpipe_pipes_d2d_map_python_protobuf_files_no_py_files(self):
2252+
"""Test protobuf mapping when no .py/.pyi files exist."""
2253+
make_resource_file(
2254+
self.project1,
2255+
path="from/valkey_glide-2.0.1/glide-core/src/protobuf/command_request.proto",
2256+
)
2257+
2258+
d2d.map_python_protobuf_files(self.project1)
2259+
2260+
relations = self.project1.codebaserelations.filter(map_type="protobuf_mapping")
2261+
self.assertEqual(0, relations.count())

scanpipe/tests/test_pipelines.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1701,6 +1701,73 @@ def test_scanpipe_deploy_to_develop_pipeline_integration_elfs(self):
17011701
expected_file = self.data / "d2d-elfs" / "brotli-elf-d2d.json"
17021702
self.assertPipelineResultEqual(expected_file, result_file)
17031703

1704+
def test_scanpipe_deploy_to_develop_pipeline_integration_protobuf(self):
1705+
"""Test the map_deploy_to_develop pipeline with protobuf files."""
1706+
pipeline_name = "map_deploy_to_develop"
1707+
project1 = make_project(name="ProtobufTest")
1708+
selected_groups = ["Python"]
1709+
1710+
1711+
from_dir = project1.codebase_path / "from"
1712+
from_dir.mkdir(parents=True)
1713+
1714+
1715+
proto_dir = from_dir / "valkey_glide-2.0.1" / "glide-core" / "src" / "protobuf"
1716+
proto_dir.mkdir(parents=True)
1717+
1718+
proto_files = [
1719+
"command_request.proto",
1720+
"connection_request.proto",
1721+
"response.proto"
1722+
]
1723+
1724+
for proto_file in proto_files:
1725+
(proto_dir / proto_file).write_text(f'syntax = "proto3";\npackage glide;\nmessage {proto_file.replace(".proto", "").title()} {{}}')
1726+
1727+
1728+
to_dir = project1.codebase_path / "to"
1729+
to_dir.mkdir(parents=True)
1730+
1731+
py_dir = to_dir / "glide" / "protobuf"
1732+
py_dir.mkdir(parents=True)
1733+
1734+
py_files = [
1735+
"command_request_pb2.py",
1736+
"command_request_pb2.pyi",
1737+
"connection_request_pb2.py",
1738+
"connection_request_pb2.pyi",
1739+
"response_pb2.py",
1740+
"response_pb2.pyi"
1741+
]
1742+
1743+
for py_file in py_files:
1744+
(py_dir / py_file).write_text(f'# Generated by protoc\nclass {py_file.replace("_pb2.py", "").replace("_pb2.pyi", "")}: pass')
1745+
1746+
1747+
run = project1.add_pipeline(
1748+
pipeline_name=pipeline_name, selected_groups=selected_groups
1749+
)
1750+
pipeline = run.make_pipeline_instance()
1751+
1752+
exitcode, out = pipeline.execute()
1753+
self.assertEqual(0, exitcode, msg=out)
1754+
1755+
1756+
protobuf_relations = project1.codebaserelations.filter(map_type="protobuf_mapping")
1757+
self.assertGreater(protobuf_relations.count(), 0)
1758+
1759+
1760+
command_request_relations = protobuf_relations.filter(
1761+
to_resource__path__contains="command_request_pb2"
1762+
)
1763+
self.assertGreater(command_request_relations.count(), 0)
1764+
1765+
1766+
for relation in protobuf_relations:
1767+
self.assertIn(".proto", relation.from_resource.path)
1768+
self.assertIn("_pb2", relation.to_resource.path)
1769+
self.assertEqual("protobuf_mapping", relation.map_type)
1770+
17041771
def test_scanpipe_deploy_to_develop_pipeline_extract_input_files_errors(self):
17051772
project1 = make_project()
17061773
run = project1.add_pipeline("map_deploy_to_develop")

0 commit comments

Comments
 (0)