Skip to content

Commit f221505

Browse files
committed
Add step to map Cython files to binaries in d2d pipeline
Signed-off-by: Jono Yang <jyang@nexb.com>
1 parent d9875ff commit f221505

2 files changed

Lines changed: 44 additions & 1 deletion

File tree

scanpipe/pipelines/deploy_to_develop.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ def steps(cls):
7575
cls.map_elf,
7676
cls.map_go,
7777
cls.map_rust,
78+
cls.map_python,
7879
cls.match_directories_to_purldb,
7980
cls.match_resources_to_purldb,
8081
cls.map_javascript_post_purldb_match,
@@ -218,6 +219,11 @@ def map_rust(self):
218219
"""Map Rust binaries to their sources using symbols."""
219220
d2d.map_rust_binaries_with_symbols(project=self.project, logger=self.log)
220221

222+
@optional_step("Python")
223+
def map_python(self):
224+
"""Map binaries from Python packages to their sources using dwarf paths and symbols."""
225+
d2d.map_python_pyx_to_binaries(project=self.project, logger=self.log)
226+
221227
def match_directories_to_purldb(self):
222228
"""Match selected directories in PurlDB."""
223229
if not purldb.is_available():

scanpipe/pipes/d2d.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
from packagedcode.npm import NpmPackageJsonHandler
4747
from rust_inspector.binary import collect_and_parse_rust_symbols
4848
from summarycode.classify import LEGAL_STARTS_ENDS
49+
from source_inspector.symbols_tree_sitter import get_tree
4950

5051
from aboutcode.pipeline import LoopProgress
5152
from scanpipe import pipes
@@ -1943,7 +1944,7 @@ def map_elfs_binaries_with_symbols(project, logger=None):
19431944
)
19441945

19451946
# Collect source symbols from rust source files
1946-
elf_from_resources = from_resources.filter(extension__in=[".c", ".cpp", ".h"])
1947+
elf_from_resources = from_resources.filter(extension__in=[".c", ".cpp", ".h", ".pyx", ".pxd"])
19471948

19481949
map_binaries_with_symbols(
19491950
project=project,
@@ -2097,3 +2098,39 @@ def _map_javascript_symbols(to_resource, javascript_from_resources, logger):
20972098
to_resource.update(status=flag.MAPPED)
20982099
return 1
20992100
return 0
2101+
2102+
2103+
def map_python_pyx_to_binaries(project, logger=None):
2104+
"""Map ELF binaries to their sources in ``project``."""
2105+
from_resources = project.codebaseresources.files().from_codebase().filter(extension__endswith=".pyx")
2106+
to_resources = (
2107+
project.codebaseresources.files().to_codebase().has_no_relation().elfs()
2108+
)
2109+
2110+
# Collect binary symbols from binaries
2111+
for resource in to_resources:
2112+
try:
2113+
binary_symbols = collect_and_parse_elf_symbols(resource.location)
2114+
resource.update_extra_data(binary_symbols)
2115+
except Exception as e:
2116+
logger(f"Error parsing binary symbols at: {resource.location_path!r} {e!r}")
2117+
2118+
for resource in from_resources:
2119+
tree, _ = get_tree(resource.location)
2120+
function_definitions = [node for node in tree.root_node.children if node.type == "function_definition"]
2121+
identifiers = []
2122+
for node in function_definitions:
2123+
for child in node.children:
2124+
if child.type == "identifier":
2125+
identifiers.append(child.text.decode())
2126+
2127+
identifiers_qs = Q()
2128+
for identifier in identifiers:
2129+
identifiers_qs |= Q(extra_data__icontains=identifier)
2130+
matching_elfs = to_resources.filter(identifiers_qs)
2131+
for matching_elf in matching_elfs:
2132+
pipes.make_relation(
2133+
from_resource=resource,
2134+
to_resource=matching_elf,
2135+
map_type="python_pyx_match",
2136+
)

0 commit comments

Comments
 (0)