Skip to content

Commit 4001864

Browse files
committed
chore(hfh):SP-4188 include license info into hfh results
1 parent 46158c4 commit 4001864

2 files changed

Lines changed: 82 additions & 14 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1010
- Added `--format raw` option to `folder-scan` command to export HFH results in snippet-scanner JSON format
1111
- Expands directory-level HFH results into per-file entries keyed by relative file path
1212
- Assigns each file to the most specific matching `path_id` (deepest directory match wins)
13+
- Added license decoration to folder hash scan results via dependency service
14+
- Each component version in HFH results is now decorated with license information
15+
- CycloneDX output uses pre-decorated licenses instead of making a separate dependency API call
1316

1417
## [1.50.0] - 2026-03-17
1518
### Fixed

src/scanoss/scanners/scanner_hfh.py

Lines changed: 79 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -117,17 +117,77 @@ def __init__( # noqa: PLR0913
117117

118118
def _execute_grpc_scan(self, hfh_request: Dict) -> None:
119119
"""
120-
Execute folder hash scan.
120+
Execute folder hash scan and decorate results with license information.
121121
122122
Args:
123123
hfh_request: Request dictionary for the gRPC call
124124
"""
125125
try:
126126
self.scan_results = self.client.folder_hash_scan(hfh_request, self.use_grpc)
127+
self._decorate_with_licenses()
127128
except Exception as e:
128129
self.base.print_stderr(f'Error during folder hash scan: {e}')
129130
self.scan_results = None
130131

132+
def _decorate_with_licenses(self) -> None:
133+
"""
134+
Decorate each component version in scan results with license information
135+
by calling the dependency service.
136+
"""
137+
if not self.scan_results or not self.client:
138+
return
139+
results = self.scan_results.get('results', [])
140+
if not results:
141+
return
142+
143+
dep_files = []
144+
for result in results:
145+
path_id = result.get('path_id', '')
146+
for component in result.get('components', []):
147+
purl = component.get('purl', '')
148+
if not purl:
149+
continue
150+
for version_entry in component.get('versions', []):
151+
version = version_entry.get('version', '')
152+
if not version:
153+
continue
154+
dep_files.append({
155+
'file': path_id,
156+
'purls': [{'purl': purl, 'requirement': version}],
157+
})
158+
159+
if not dep_files:
160+
return
161+
162+
try:
163+
decorated = self.client.get_dependencies({'files': dep_files})
164+
except Exception as e:
165+
self.base.print_stderr(f'Warning: Failed to fetch license data: {e}')
166+
return
167+
168+
if not decorated or 'files' not in decorated:
169+
return
170+
171+
# Build lookup: purl@requirement -> licenses
172+
license_map = {}
173+
for dep_file in decorated.get('files', []):
174+
for dep in dep_file.get('dependencies', []):
175+
dep_purl = dep.get('purl', '')
176+
dep_requirement = dep.get('requirement', '')
177+
key = dep_purl + '@' + dep_requirement
178+
licenses = dep.get('licenses', [])
179+
if dep_purl and licenses:
180+
license_map[key] = licenses
181+
182+
# Inject licenses into each component version
183+
for result in results:
184+
for component in result.get('components', []):
185+
purl = component.get('purl', '')
186+
for version_entry in component.get('versions', []):
187+
version = version_entry.get('version', '')
188+
versioned_purl = f'{purl}@{version}'
189+
version_entry['licenses'] = license_map.get(versioned_purl, [])
190+
131191
def scan(self) -> Optional[Dict]:
132192
"""
133193
Scan the provided directory using the folder hashing algorithm.
@@ -218,30 +278,34 @@ def _format_cyclonedx_output(self) -> str: # noqa: PLR0911
218278
if not best_match_component.get('versions'):
219279
self.base.print_stderr('ERROR: No versions found for best match component')
220280
return ''
221-
222281
best_match_version = best_match_component['versions'][0]
223282
purl = best_match_component['purl']
283+
version = best_match_version['version']
284+
licenses = best_match_version.get('licenses', [])
224285

225-
get_dependencies_json_request = {
226-
'files': [
286+
# Build scan_results from already-decorated HFH data
287+
scan_results = {
288+
f'{best_match_component["name"]}:{version}': [
227289
{
228-
'file': f'{best_match_component["name"]}:{best_match_version["version"]}',
229-
'purls': [{'purl': purl, 'requirement': best_match_version['version']}],
290+
'id': 'dependency',
291+
'dependencies': [
292+
{
293+
'purl': purl,
294+
'component': best_match_component.get('name', ''),
295+
'version': version,
296+
'licenses': licenses,
297+
}
298+
],
230299
}
231300
]
232301
}
233302

234303
get_vulnerabilities_json_request = {
235-
'components': [{'purl': purl, 'requirement': best_match_version['version']}],
304+
'components': [{'purl': purl, 'requirement': version}],
236305
}
237-
238-
decorated_scan_results = self.scanner.client.get_dependencies(get_dependencies_json_request)
239306
vulnerabilities = self.scanner.client.get_vulnerabilities_json(get_vulnerabilities_json_request)
240307

241308
cdx = CycloneDx(self.base.debug)
242-
scan_results = {}
243-
for f in decorated_scan_results['files']:
244-
scan_results[f['file']] = [f]
245309
success, cdx_output = cdx.produce_from_json(scan_results)
246310
if not success:
247311
error_msg = 'ERROR: Failed to produce CycloneDX output'
@@ -253,7 +317,7 @@ def _format_cyclonedx_output(self) -> str: # noqa: PLR0911
253317

254318
return json.dumps(cdx_output, indent=2)
255319
except Exception as e:
256-
self.base.print_stderr(f'ERROR: Failed to get license information: {e}')
320+
self.base.print_stderr(f'ERROR: Failed to produce CycloneDX output: {e}')
257321
return None
258322

259323
def _format_spdxlite_output(self) -> str:
@@ -414,6 +478,7 @@ def _build_snippet_entry(
414478
"""
415479
purl = component.get('purl', '')
416480
version = best_version.get('version', '')
481+
licenses = best_version.get('licenses', [])
417482

418483
url = purl2url.get_repo_url(purl) if purl else ''
419484
return {
@@ -431,7 +496,7 @@ def _build_snippet_entry(
431496
'source_hash': file_hash,
432497
'url_hash': '',
433498
'release_date': '',
434-
'licenses': [],
499+
'licenses': licenses,
435500
'lines': 'all',
436501
'oss_lines': 'all',
437502
'status': 'pending',

0 commit comments

Comments
 (0)