Skip to content

Commit 05ce569

Browse files
committed
Merge main and fix conflicts
Signed-off-by: tdruez <tdruez@nexb.com>
2 parents c08076c + 90f396b commit 05ce569

47 files changed

Lines changed: 26438 additions & 24038 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

CHANGELOG.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,16 @@ v35.1.0 (unreleased)
1919
license rules used during the scan.
2020
https://github.com/aboutcode-org/scancode.io/issues/1657
2121

22+
- Add a new step to the ``DeployToDevelop`` pipeline, ``map_python``, to match
23+
Cython source files (.pyx) to their compiled binaries.
24+
https://github.com/aboutcode-org/scancode.io/pull/1703
25+
26+
- Update scancode-toolkit to v32.4.0. See CHANGELOG for updates:
27+
https://github.com/aboutcode-org/scancode-toolkit/releases/tag/v32.4.0
28+
Adds a new ``git_sha1`` attribute to the ``CodebaseResource`` model as this
29+
is now computed and returned from the ``scancode-toolkit`` ``--info`` plugin.
30+
https://github.com/aboutcode-org/scancode.io/pull/1708
31+
2232
v35.0.0 (2025-06-23)
2333
--------------------
2434

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ dependencies = [
5656
# Docker
5757
"container-inspector==33.0.0",
5858
# ScanCode-toolkit
59-
"scancode-toolkit[packages]==32.3.3",
59+
"scancode-toolkit[packages]==32.4.0",
6060
"extractcode[full]==31.0.0",
6161
"commoncode==32.3.0",
6262
"Beautifulsoup4[chardet]==4.13.4",
@@ -69,7 +69,7 @@ dependencies = [
6969
"rust-inspector==0.1.0",
7070
"binary-inspector==0.1.2",
7171
"python-inspector==0.14.0",
72-
"source-inspector==0.6.1; sys_platform != 'darwin' and platform_machine != 'arm64'",
72+
"source-inspector==0.7.0; sys_platform != 'darwin' and platform_machine != 'arm64'",
7373
"aboutcode-toolkit==11.1.1",
7474
# Utilities
7575
"XlsxWriter==3.2.5",

scanpipe/api/serializers.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,7 @@ class Meta:
369369
"sha1",
370370
"sha256",
371371
"sha512",
372+
"sha1_git",
372373
"is_binary",
373374
"is_text",
374375
"is_archive",

scanpipe/filters.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,7 @@ class Meta:
571571
"sha1",
572572
"sha256",
573573
"sha512",
574+
"sha1_git",
574575
"size",
575576
"status",
576577
"tag",
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Generated by Django 5.1.11 on 2025-06-30 15:50
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('scanpipe', '0072_discovereddependency_uuid_unique'),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name='codebaseresource',
15+
name='sha1_git',
16+
field=models.CharField(blank=True, help_text='SHA1 git checksum hex-encoded, as in git genreated checksum.', max_length=40, verbose_name='SHA1_git'),
17+
),
18+
]

scanpipe/models.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2748,6 +2748,12 @@ class Type(models.TextChoices):
27482748
is_text = models.BooleanField(default=False)
27492749
is_archive = models.BooleanField(default=False)
27502750
is_media = models.BooleanField(default=False)
2751+
sha1_git = models.CharField(
2752+
_("SHA1_git"),
2753+
max_length=40,
2754+
blank=True,
2755+
help_text=_("SHA1 checksum generated by Git, hex-encoded."),
2756+
)
27512757
package_data = models.JSONField(
27522758
default=list,
27532759
blank=True,

scanpipe/pipelines/deploy_to_develop.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ def steps(cls):
8181
cls.map_winpe,
8282
cls.map_go,
8383
cls.map_rust,
84+
cls.map_python,
8485
cls.match_directories_to_purldb,
8586
cls.match_resources_to_purldb,
8687
cls.map_javascript_post_purldb_match,
@@ -221,6 +222,14 @@ def map_rust(self):
221222
"""Map Rust binaries to their sources using symbols."""
222223
d2d.map_rust_binaries_with_symbols(project=self.project, logger=self.log)
223224

225+
@optional_step("Python")
226+
def map_python(self):
227+
"""
228+
Map binaries from Python packages to their sources using dwarf paths and
229+
symbols.
230+
"""
231+
d2d.map_python_pyx_to_binaries(project=self.project, logger=self.log)
232+
224233
def match_directories_to_purldb(self):
225234
"""Match selected directories in PurlDB."""
226235
if not purldb.is_available():

scanpipe/pipes/d2d.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2254,3 +2254,55 @@ def _map_javascript_strings(to_resource, javascript_from_resources, logger):
22542254
to_resource.update(status=flag.MAPPED)
22552255
return 1
22562256
return 0
2257+
2258+
2259+
def map_python_pyx_to_binaries(project, logger=None):
2260+
"""Map Cython source to their compiled binaries in ``project``."""
2261+
from source_inspector.symbols_tree_sitter import get_tree_and_language_info
2262+
2263+
python_config = d2d_config.get_ecosystem_config(ecosystem="Python")
2264+
from_resources = (
2265+
project.codebaseresources.files()
2266+
.from_codebase()
2267+
.filter(extension__in=python_config.source_symbol_extensions)
2268+
)
2269+
to_resources = (
2270+
project.codebaseresources.files().to_codebase().has_no_relation().elfs()
2271+
)
2272+
2273+
# Collect binary symbols from binaries
2274+
for resource in to_resources:
2275+
try:
2276+
binary_symbols = collect_and_parse_elf_symbols(resource.location)
2277+
resource.update_extra_data(binary_symbols)
2278+
except Exception as e:
2279+
logger(f"Error parsing binary symbols at: {resource.location_path!r} {e!r}")
2280+
2281+
for resource in from_resources:
2282+
# Open Cython source file, create AST, parse it for function definitions
2283+
# and save them in a list
2284+
tree, _ = get_tree_and_language_info(resource.location)
2285+
function_definitions = [
2286+
node
2287+
for node in tree.root_node.children
2288+
if node.type == "function_definition"
2289+
]
2290+
identifiers = []
2291+
for node in function_definitions:
2292+
for child in node.children:
2293+
if child.type == "identifier":
2294+
identifiers.append(child.text.decode())
2295+
2296+
# Find matching to/ resource by checking to see which to/ resource's
2297+
# extra_data field contains function definitions found from Cython
2298+
# source files
2299+
identifiers_qs = Q()
2300+
for identifier in identifiers:
2301+
identifiers_qs |= Q(extra_data__icontains=identifier)
2302+
matching_elfs = to_resources.filter(identifiers_qs)
2303+
for matching_elf in matching_elfs:
2304+
pipes.make_relation(
2305+
from_resource=resource,
2306+
to_resource=matching_elf,
2307+
map_type="python_pyx_match",
2308+
)

scanpipe/pipes/d2d_config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,10 @@ class EcosystemConfig:
131131
ecosystem_option="Windows",
132132
source_symbol_extensions=[".c", ".cpp", ".h", ".cs"],
133133
),
134+
"Python": EcosystemConfig(
135+
ecosystem_option="Python",
136+
source_symbol_extensions=[".pyx", ".pxd"],
137+
),
134138
}
135139

136140

scanpipe/tests/data/asgiref/asgiref-3.3.0.spdx.json

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"dataLicense": "CC0-1.0",
44
"SPDXID": "SPDXRef-DOCUMENT",
55
"name": "scancodeio_asgiref",
6-
"documentNamespace": "https://scancode.io/spdxdocs/24c1b665-7fb2-4e0c-8785-cba72fb35df0",
6+
"documentNamespace": "https://scancode.io/spdxdocs/1cdd3f3a-eea9-4c9c-b78e-9fa6bcde9cfd",
77
"creationInfo": {
88
"created": "2000-01-01T01:02:03Z",
99
"creators": [
@@ -14,7 +14,7 @@
1414
"packages": [
1515
{
1616
"name": "asgiref",
17-
"SPDXID": "SPDXRef-scancodeio-discoveredpackage-101147dd-f8a7-4ea3-87a1-01b9b0af5d4f",
17+
"SPDXID": "SPDXRef-scancodeio-discoveredpackage-b4e16c8a-f564-4379-9de9-ea2aaba08d94",
1818
"downloadLocation": "NOASSERTION",
1919
"licenseConcluded": "BSD-3-Clause",
2020
"copyrightText": "NOASSERTION",
@@ -33,7 +33,7 @@
3333
},
3434
{
3535
"name": "asgiref",
36-
"SPDXID": "SPDXRef-scancodeio-discoveredpackage-b5035991-5b4b-40be-b68b-1c9c528078cd",
36+
"SPDXID": "SPDXRef-scancodeio-discoveredpackage-80e083f1-7d05-432e-96f8-e6dfd9e494f0",
3737
"downloadLocation": "NOASSERTION",
3838
"licenseConcluded": "BSD-3-Clause",
3939
"copyrightText": "NOASSERTION",
@@ -52,7 +52,7 @@
5252
},
5353
{
5454
"name": "pytest",
55-
"SPDXID": "SPDXRef-scancodeio-discovereddependency-13818fb7-6094-4868-97ca-384a8fc8d16d",
55+
"SPDXID": "SPDXRef-scancodeio-discovereddependency-05f9bf8f-4da8-488e-9f48-6e183c4b813b",
5656
"downloadLocation": "NOASSERTION",
5757
"licenseConcluded": "NOASSERTION",
5858
"copyrightText": "NOASSERTION",
@@ -68,7 +68,7 @@
6868
},
6969
{
7070
"name": "pytest",
71-
"SPDXID": "SPDXRef-scancodeio-discovereddependency-2f1d3742-0553-4c4f-8731-1ffbbc13827d",
71+
"SPDXID": "SPDXRef-scancodeio-discovereddependency-43988fc2-bc0e-4c81-b083-7c5f21a7be50",
7272
"downloadLocation": "NOASSERTION",
7373
"licenseConcluded": "NOASSERTION",
7474
"copyrightText": "NOASSERTION",
@@ -84,7 +84,7 @@
8484
},
8585
{
8686
"name": "pytest-asyncio",
87-
"SPDXID": "SPDXRef-scancodeio-discovereddependency-fd5a81e5-0739-406e-9189-7b8a3644ef0d",
87+
"SPDXID": "SPDXRef-scancodeio-discovereddependency-ea25292c-05af-4982-9596-866c5de9d8cd",
8888
"downloadLocation": "NOASSERTION",
8989
"licenseConcluded": "NOASSERTION",
9090
"copyrightText": "NOASSERTION",
@@ -100,7 +100,7 @@
100100
},
101101
{
102102
"name": "pytest-asyncio",
103-
"SPDXID": "SPDXRef-scancodeio-discovereddependency-e175db55-d0f3-4224-b6d4-2b0ad553b865",
103+
"SPDXID": "SPDXRef-scancodeio-discovereddependency-a0b6b6e7-5e75-4b69-9742-b04fe8a594a3",
104104
"downloadLocation": "NOASSERTION",
105105
"licenseConcluded": "NOASSERTION",
106106
"copyrightText": "NOASSERTION",
@@ -116,33 +116,33 @@
116116
}
117117
],
118118
"documentDescribes": [
119-
"SPDXRef-scancodeio-discoveredpackage-101147dd-f8a7-4ea3-87a1-01b9b0af5d4f",
120-
"SPDXRef-scancodeio-discoveredpackage-b5035991-5b4b-40be-b68b-1c9c528078cd",
121-
"SPDXRef-scancodeio-discovereddependency-13818fb7-6094-4868-97ca-384a8fc8d16d",
122-
"SPDXRef-scancodeio-discovereddependency-2f1d3742-0553-4c4f-8731-1ffbbc13827d",
123-
"SPDXRef-scancodeio-discovereddependency-fd5a81e5-0739-406e-9189-7b8a3644ef0d",
124-
"SPDXRef-scancodeio-discovereddependency-e175db55-d0f3-4224-b6d4-2b0ad553b865"
119+
"SPDXRef-scancodeio-discoveredpackage-b4e16c8a-f564-4379-9de9-ea2aaba08d94",
120+
"SPDXRef-scancodeio-discoveredpackage-80e083f1-7d05-432e-96f8-e6dfd9e494f0",
121+
"SPDXRef-scancodeio-discovereddependency-05f9bf8f-4da8-488e-9f48-6e183c4b813b",
122+
"SPDXRef-scancodeio-discovereddependency-43988fc2-bc0e-4c81-b083-7c5f21a7be50",
123+
"SPDXRef-scancodeio-discovereddependency-ea25292c-05af-4982-9596-866c5de9d8cd",
124+
"SPDXRef-scancodeio-discovereddependency-a0b6b6e7-5e75-4b69-9742-b04fe8a594a3"
125125
],
126126
"files": [],
127127
"relationships": [
128128
{
129-
"spdxElementId": "SPDXRef-scancodeio-discovereddependency-13818fb7-6094-4868-97ca-384a8fc8d16d",
130-
"relatedSpdxElement": "SPDXRef-scancodeio-discoveredpackage-101147dd-f8a7-4ea3-87a1-01b9b0af5d4f",
129+
"spdxElementId": "SPDXRef-scancodeio-discovereddependency-05f9bf8f-4da8-488e-9f48-6e183c4b813b",
130+
"relatedSpdxElement": "SPDXRef-scancodeio-discoveredpackage-b4e16c8a-f564-4379-9de9-ea2aaba08d94",
131131
"relationshipType": "DEPENDENCY_OF"
132132
},
133133
{
134-
"spdxElementId": "SPDXRef-scancodeio-discovereddependency-2f1d3742-0553-4c4f-8731-1ffbbc13827d",
135-
"relatedSpdxElement": "SPDXRef-scancodeio-discoveredpackage-b5035991-5b4b-40be-b68b-1c9c528078cd",
134+
"spdxElementId": "SPDXRef-scancodeio-discovereddependency-43988fc2-bc0e-4c81-b083-7c5f21a7be50",
135+
"relatedSpdxElement": "SPDXRef-scancodeio-discoveredpackage-80e083f1-7d05-432e-96f8-e6dfd9e494f0",
136136
"relationshipType": "DEPENDENCY_OF"
137137
},
138138
{
139-
"spdxElementId": "SPDXRef-scancodeio-discovereddependency-fd5a81e5-0739-406e-9189-7b8a3644ef0d",
140-
"relatedSpdxElement": "SPDXRef-scancodeio-discoveredpackage-101147dd-f8a7-4ea3-87a1-01b9b0af5d4f",
139+
"spdxElementId": "SPDXRef-scancodeio-discovereddependency-ea25292c-05af-4982-9596-866c5de9d8cd",
140+
"relatedSpdxElement": "SPDXRef-scancodeio-discoveredpackage-b4e16c8a-f564-4379-9de9-ea2aaba08d94",
141141
"relationshipType": "DEPENDENCY_OF"
142142
},
143143
{
144-
"spdxElementId": "SPDXRef-scancodeio-discovereddependency-e175db55-d0f3-4224-b6d4-2b0ad553b865",
145-
"relatedSpdxElement": "SPDXRef-scancodeio-discoveredpackage-b5035991-5b4b-40be-b68b-1c9c528078cd",
144+
"spdxElementId": "SPDXRef-scancodeio-discovereddependency-a0b6b6e7-5e75-4b69-9742-b04fe8a594a3",
145+
"relatedSpdxElement": "SPDXRef-scancodeio-discoveredpackage-80e083f1-7d05-432e-96f8-e6dfd9e494f0",
146146
"relationshipType": "DEPENDENCY_OF"
147147
}
148148
],

0 commit comments

Comments
 (0)