Skip to content

Commit 39e2770

Browse files
Fix single-file package assembly bugs
Fix assemble functions for many ecosystems so we correctly return top-level packages even on single package manifest file scans. Signed-off-by: Ayan Sinha Mahapatra <asmahapatra@aboutcode.org>
1 parent 6fd5054 commit 39e2770

15 files changed

Lines changed: 2139 additions & 134 deletions

File tree

src/packagedcode/cargo.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,11 @@ def assemble(cls, package_data, resource, codebase, package_adder):
6969

7070
datafile_path_patterns = CargoTomlHandler.path_patterns + CargoLockHandler.path_patterns
7171

72-
workspace_root = resource.parent(codebase)
73-
if workspace_root and workspace_package_data and workspace_members:
72+
root_resource = resource.parent(codebase)
73+
if root_resource and workspace_package_data and workspace_members:
7474
# TODO: support glob patterns found in cargo workspaces
7575
for workspace_member_path in workspace_members:
76-
workspace_directory_path = os.path.join(workspace_root.path, workspace_member_path)
76+
workspace_directory_path = os.path.join(root_resource.path, workspace_member_path)
7777
workspace_directory = codebase.get_resource(path=workspace_directory_path)
7878
if not workspace_directory:
7979
continue
@@ -104,9 +104,12 @@ def assemble(cls, package_data, resource, codebase, package_adder):
104104
package_adder=package_adder,
105105
)
106106
else:
107+
if not root_resource:
108+
root_resource = resource
109+
107110
yield from cls.assemble_from_many_datafiles(
108111
datafile_path_patterns=datafile_path_patterns,
109-
resource=workspace_root,
112+
resource=root_resource,
110113
codebase=codebase,
111114
package_adder=package_adder,
112115
)

src/packagedcode/chef.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,12 @@ def assemble(cls, package_data, resource, codebase, package_adder):
150150
ChefMetadataRbHandler.path_patterns +
151151
ChefMetadataJsonHandler.path_patterns
152152
)
153+
root_resource = resource.parent(codebase)
154+
if not root_resource:
155+
root_resource = resource
153156
yield from cls.assemble_from_many_datafiles(
154157
datafile_path_patterns=datafile_path_patterns,
155-
resource=resource.parent(codebase),
158+
resource=root_resource,
156159
codebase=codebase,
157160
package_adder=package_adder,
158161
)

src/packagedcode/cocoapods.py

Lines changed: 61 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -139,70 +139,71 @@ def assemble(cls, package_data, resource, codebase, package_adder):
139139
"""
140140
if codebase.has_single_resource:
141141
yield from models.DatafileHandler.assemble(package_data, resource, codebase)
142-
else:
143-
# do we have more than one podspec?
144-
parent = resource.parent(codebase)
145-
sibling_podspecs = [
146-
r for r in parent.children(codebase)
147-
if r.name.endswith('.podspec')
148-
]
149-
150-
siblings_counts = len(sibling_podspecs)
151-
has_single_podspec = siblings_counts == 1
152-
has_multiple_podspec = siblings_counts > 1
153-
154-
datafile_path_patterns = (
155-
PodfileLockHandler.path_patterns +
156-
PodfileHandler.path_patterns
142+
return
143+
144+
# do we have more than one podspec?
145+
parent = resource.parent(codebase)
146+
sibling_podspecs = [
147+
r for r in parent.children(codebase)
148+
if r.name.endswith('.podspec')
149+
]
150+
151+
siblings_counts = len(sibling_podspecs)
152+
has_single_podspec = siblings_counts == 1
153+
has_multiple_podspec = siblings_counts > 1
154+
155+
datafile_path_patterns = (
156+
PodfileLockHandler.path_patterns +
157+
PodfileHandler.path_patterns
158+
)
159+
if has_single_podspec:
160+
# we can treat all podfile/spec as being for one package
161+
podspec_path_patterns = (f"*{sibling_podspecs[0].name}",)
162+
yield from cls.assemble_from_many_datafiles(
163+
datafile_path_patterns=podspec_path_patterns + datafile_path_patterns,
164+
resource=parent,
165+
codebase=codebase,
166+
package_adder=package_adder,
157167
)
158-
if has_single_podspec:
159-
# we can treat all podfile/spec as being for one package
160-
podspec_path_patterns = (f"*{sibling_podspecs[0].name}",)
161-
yield from cls.assemble_from_many_datafiles(
162-
datafile_path_patterns=podspec_path_patterns + datafile_path_patterns,
163-
resource=parent,
164-
codebase=codebase,
165-
package_adder=package_adder,
166-
)
167168

168-
elif has_multiple_podspec:
169-
# treat each of podspec and podfile alone without meraging
170-
# as we cannot determine easily which podfile is for which
171-
# podspec
172-
podspec = sibling_podspecs.pop()
173-
podspec_path_patterns = (f"*{podspec.name}",)
174-
yield from cls.assemble_from_many_datafiles(
175-
datafile_path_patterns=podspec_path_patterns + datafile_path_patterns,
176-
resource=parent,
177-
codebase=codebase,
178-
package_adder=package_adder,
179-
)
169+
elif has_multiple_podspec:
170+
# treat each of podspec and podfile alone without meraging
171+
# as we cannot determine easily which podfile is for which
172+
# podspec
173+
podspec = sibling_podspecs.pop()
174+
podspec_path_patterns = (f"*{podspec.name}",)
175+
yield from cls.assemble_from_many_datafiles(
176+
datafile_path_patterns=podspec_path_patterns + datafile_path_patterns,
177+
resource=parent,
178+
codebase=codebase,
179+
package_adder=package_adder,
180+
)
180181

181-
for resource in sibling_podspecs:
182-
datafile_path = resource.path
183-
for package_data in resource.package_data:
184-
package_data = models.PackageData.from_dict(package_data)
185-
package = models.Package.from_package_data(
186-
package_data=package_data,
187-
datafile_path=datafile_path,
188-
)
189-
cls.assign_package_to_resources(
190-
package=package,
191-
resource=resource,
192-
codebase=codebase,
193-
package_adder=package_adder,
194-
)
195-
yield package
196-
yield resource
182+
for resource in sibling_podspecs:
183+
datafile_path = resource.path
184+
for package_data in resource.package_data:
185+
package_data = models.PackageData.from_dict(package_data)
186+
package = models.Package.from_package_data(
187+
package_data=package_data,
188+
datafile_path=datafile_path,
189+
)
190+
cls.assign_package_to_resources(
191+
package=package,
192+
resource=resource,
193+
codebase=codebase,
194+
package_adder=package_adder,
195+
)
196+
yield package
197+
yield resource
197198

198-
else:
199-
# has_no_podspec:
200-
yield from cls.assemble_from_many_datafiles(
201-
datafile_path_patterns=datafile_path_patterns,
202-
resource=parent,
203-
codebase=codebase,
204-
package_adder=package_adder,
205-
)
199+
else:
200+
# has_no_podspec:
201+
yield from cls.assemble_from_many_datafiles(
202+
datafile_path_patterns=datafile_path_patterns,
203+
resource=parent,
204+
codebase=codebase,
205+
package_adder=package_adder,
206+
)
206207

207208

208209
class PodspecHandler(BasePodHandler):

src/packagedcode/golang.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,12 @@ def assemble(cls, package_data, resource, codebase, package_adder):
3535
datafile_path_patterns = (
3636
GoModHandler.path_patterns + GoSumHandler.path_patterns
3737
)
38+
root_resource = resource.parent(codebase)
39+
if not root_resource:
40+
root_resource = resource
3841
yield from cls.assemble_from_many_datafiles(
3942
datafile_path_patterns=datafile_path_patterns,
40-
resource=resource.parent(codebase),
43+
resource=root_resource,
4144
codebase=codebase,
4245
package_adder=package_adder,
4346
)

src/packagedcode/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1397,7 +1397,7 @@ def assemble_from_many_datafiles(
13971397
# we iterate on datafile_name_patterns because their order matters
13981398
for path_pattern in datafile_path_patterns:
13991399
for sibling in siblings:
1400-
if fnmatchcase(sibling.path, path_pattern):
1400+
if fnmatchcase(sibling.location, path_pattern):
14011401
for package_data in sibling.package_data:
14021402
package_data = PackageData.from_dict(package_data)
14031403
pkgdata_resources.append((package_data, sibling,))

src/packagedcode/pypi.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,10 @@ class BaseExtractedPythonLayout(models.DatafileHandler):
166166

167167
@classmethod
168168
def assemble(cls, package_data, resource, codebase, package_adder):
169+
if codebase.has_single_resource:
170+
yield from models.DatafileHandler.assemble(package_data, resource, codebase)
171+
return
172+
169173
# a source distribution can have many manifests
170174
datafile_name_patterns = (
171175
PipfileHandler.path_patterns + PipfileLockHandler.path_patterns
@@ -555,7 +559,7 @@ def assemble(cls, package_data, resource, codebase, package_adder):
555559
if codebase.has_single_resource:
556560
yield from models.DatafileHandler.assemble(package_data, resource, codebase, package_adder)
557561
return
558-
562+
559563
assert len(package_resource.package_data) == 1, f'Invalid pyproject.toml for {package_resource.path}'
560564
pkg_data = package_resource.package_data[0]
561565
pkg_data = models.PackageData.from_dict(pkg_data)

src/packagedcode/rubygems.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ def assemble_extracted_gem(cls, package_data, resource, codebase, package_adder)
6464
)
6565

6666
gemroot = get_ancestor(levels_up=2, resource=resource, codebase=codebase)
67+
if not gemroot:
68+
gemroot == resource
6769

6870
yield from cls.assemble_from_many_datafiles(
6971
datafile_path_patterns=datafile_path_patterns,
@@ -109,10 +111,12 @@ def assemble(cls, package_data, resource, codebase, package_adder):
109111
GemfileHandler.path_patterns +
110112
GemfileLockHandler.path_patterns
111113
)
112-
114+
root_resource = resource.parent(codebase)
115+
if not root_resource:
116+
root_resource = resource
113117
yield from cls.assemble_from_many_datafiles(
114118
datafile_path_patterns=datafile_path_patterns,
115-
resource=resource.parent(codebase),
119+
resource=root_resource,
116120
codebase=codebase,
117121
package_adder=package_adder,
118122
)

tests/packagedcode/data/cargo/cargo_toml/single-file-scan/Cargo.toml.expected

Lines changed: 131 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,131 @@
11
{
2-
"packages": [],
3-
"dependencies": [],
2+
"packages": [
3+
{
4+
"type": "cargo",
5+
"namespace": null,
6+
"name": "constant_time_eq",
7+
"version": "0.4.2",
8+
"qualifiers": {},
9+
"subpath": null,
10+
"primary_language": "Rust",
11+
"description": "Compares two equal-sized byte strings in constant time.",
12+
"release_date": null,
13+
"parties": [
14+
{
15+
"type": "person",
16+
"role": "author",
17+
"name": "Cesar Eduardo Barros",
18+
"email": "cesarb@cesarb.eti.br",
19+
"url": null
20+
}
21+
],
22+
"keywords": [
23+
"constant_time",
24+
"cryptography",
25+
"no-std"
26+
],
27+
"homepage_url": null,
28+
"download_url": null,
29+
"size": null,
30+
"sha1": null,
31+
"md5": null,
32+
"sha256": null,
33+
"sha512": null,
34+
"bug_tracking_url": null,
35+
"code_view_url": null,
36+
"vcs_url": "https://github.com/cesarb/constant_time_eq",
37+
"copyright": null,
38+
"holder": null,
39+
"declared_license_expression": "cc0-1.0 OR mit-0 OR apache-2.0",
40+
"declared_license_expression_spdx": "CC0-1.0 OR MIT-0 OR Apache-2.0",
41+
"license_detections": [
42+
{
43+
"license_expression": "cc0-1.0 OR mit-0 OR apache-2.0",
44+
"license_expression_spdx": "CC0-1.0 OR MIT-0 OR Apache-2.0",
45+
"matches": [
46+
{
47+
"license_expression": "cc0-1.0 OR mit-0 OR apache-2.0",
48+
"license_expression_spdx": "CC0-1.0 OR MIT-0 OR Apache-2.0",
49+
"from_file": "Cargo.toml",
50+
"start_line": 1,
51+
"end_line": 1,
52+
"matcher": "1-spdx-id",
53+
"score": 100.0,
54+
"matched_length": 10,
55+
"match_coverage": 100.0,
56+
"rule_relevance": 100,
57+
"rule_identifier": "spdx-license-identifier-cc0_1_0_or_mit_0_or_apache_2_0-f44a2ec174eb034bd3c662f728664281e507b20d",
58+
"rule_url": null,
59+
"matched_text": "CC0-1.0 OR MIT-0 OR Apache-2.0"
60+
}
61+
],
62+
"identifier": "cc0_1_0_or_mit_0_or_apache_2_0-3f14dd48-7cd8-cf28-d4e1-3b0174a587ee"
63+
}
64+
],
65+
"other_license_expression": null,
66+
"other_license_expression_spdx": null,
67+
"other_license_detections": [],
68+
"extracted_license_statement": "CC0-1.0 OR MIT-0 OR Apache-2.0",
69+
"notice_text": null,
70+
"source_packages": [],
71+
"is_private": false,
72+
"is_virtual": false,
73+
"extra_data": {
74+
"documentation_url": "https://docs.rs/constant_time_eq",
75+
"rust_version": "1.85.0",
76+
"rust_edition": "2024"
77+
},
78+
"repository_homepage_url": "https://crates.io/crates/constant_time_eq",
79+
"repository_download_url": "https://crates.io/api/v1/crates/constant_time_eq/0.4.2/download",
80+
"api_data_url": "https://crates.io/api/v1/crates/constant_time_eq",
81+
"package_uid": "pkg:cargo/constant_time_eq@0.4.2?uuid=fixed-uid-done-for-testing-5642512d1758",
82+
"datafile_paths": [
83+
"Cargo.toml"
84+
],
85+
"datasource_ids": [
86+
"cargo_toml"
87+
],
88+
"purl": "pkg:cargo/constant_time_eq@0.4.2"
89+
}
90+
],
91+
"dependencies": [
92+
{
93+
"purl": "pkg:cargo/criterion",
94+
"extracted_requirement": "0.5.1",
95+
"scope": "dev-dependencies",
96+
"is_runtime": false,
97+
"is_optional": false,
98+
"is_pinned": false,
99+
"is_direct": true,
100+
"resolved_package": {},
101+
"extra_data": {
102+
"version": "0.5.1",
103+
"features": [
104+
"cargo_bench_support",
105+
"html_reports"
106+
]
107+
},
108+
"dependency_uid": "pkg:cargo/criterion?uuid=fixed-uid-done-for-testing-5642512d1758",
109+
"for_package_uid": "pkg:cargo/constant_time_eq@0.4.2?uuid=fixed-uid-done-for-testing-5642512d1758",
110+
"datafile_path": "Cargo.toml",
111+
"datasource_id": "cargo_toml"
112+
},
113+
{
114+
"purl": "pkg:cargo/count_instructions",
115+
"extracted_requirement": "0.2.0",
116+
"scope": "dev-dependencies",
117+
"is_runtime": false,
118+
"is_optional": false,
119+
"is_pinned": false,
120+
"is_direct": true,
121+
"resolved_package": {},
122+
"extra_data": {},
123+
"dependency_uid": "pkg:cargo/count_instructions?uuid=fixed-uid-done-for-testing-5642512d1758",
124+
"for_package_uid": "pkg:cargo/constant_time_eq@0.4.2?uuid=fixed-uid-done-for-testing-5642512d1758",
125+
"datafile_path": "Cargo.toml",
126+
"datasource_id": "cargo_toml"
127+
}
128+
],
4129
"files": [
5130
{
6131
"path": "Cargo.toml",
@@ -119,8 +244,10 @@
119244
"purl": "pkg:cargo/constant_time_eq@0.4.2"
120245
}
121246
],
122-
"for_packages": [],
247+
"for_packages": [
248+
"pkg:cargo/constant_time_eq@0.4.2?uuid=fixed-uid-done-for-testing-5642512d1758"
249+
],
123250
"scan_errors": []
124251
}
125252
]
126-
}
253+
}

0 commit comments

Comments
 (0)