Skip to content

Commit c8b5be2

Browse files
committed
chore(hfh):SP-4188 include license info into hfh results
1 parent 7a4c07c commit c8b5be2

2 files changed

Lines changed: 113 additions & 14 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1010
- Added `--format raw` option to `folder-scan` command to export HFH results in snippet-scanner JSON format
1111
- Expands directory-level HFH results into per-file entries keyed by relative file path
1212
- Assigns each file to the most specific matching `path_id` (deepest directory match wins)
13+
- Added license decoration to folder hash scan results via dependency service
14+
- Each component version in HFH results is now decorated with license information
15+
- CycloneDX output uses pre-decorated licenses instead of making a separate dependency API call
1316

1417
## [1.50.0] - 2026-03-17
1518
### Fixed

src/scanoss/scanners/scanner_hfh.py

Lines changed: 110 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -117,17 +117,108 @@ def __init__( # noqa: PLR0913
117117

118118
def _execute_grpc_scan(self, hfh_request: Dict) -> None:
119119
"""
120-
Execute folder hash scan.
120+
Execute folder hash scan and decorate results with license information.
121121
122122
Args:
123123
hfh_request: Request dictionary for the gRPC call
124124
"""
125125
try:
126126
self.scan_results = self.client.folder_hash_scan(hfh_request, self.use_grpc)
127+
self._decorate_with_licenses()
127128
except Exception as e:
128129
self.base.print_stderr(f'Error during folder hash scan: {e}')
129130
self.scan_results = None
130131

132+
def _decorate_with_licenses(self) -> None:
133+
"""
134+
Decorate each component version in scan results with license information
135+
by calling the dependency service.
136+
"""
137+
if not self.scan_results or not self.client:
138+
return
139+
results = self.scan_results.get('results', [])
140+
if not results:
141+
return
142+
143+
dep_files = self._collect_dep_files(results)
144+
if not dep_files:
145+
return
146+
147+
try:
148+
decorated = self.client.get_dependencies({'files': dep_files})
149+
except Exception as e:
150+
self.base.print_stderr(f'Warning: Failed to fetch license data: {e}')
151+
return
152+
153+
if not decorated or 'files' not in decorated:
154+
return
155+
156+
license_map = self._build_license_map(decorated)
157+
self._inject_licenses(results, license_map)
158+
159+
@staticmethod
160+
def _collect_dep_files(results: List[Dict]) -> List[Dict]:
161+
"""Collect dependency file entries for all component versions in the results."""
162+
dep_files = []
163+
for result in results:
164+
path_id = result.get('path_id', '')
165+
for component in result.get('components', []):
166+
purl = component.get('purl', '')
167+
if not purl:
168+
continue
169+
for version_entry in component.get('versions', []):
170+
version = version_entry.get('version', '')
171+
if not version:
172+
continue
173+
dep_files.append({
174+
'file': path_id,
175+
'purls': [{'purl': purl, 'requirement': version}],
176+
})
177+
return dep_files
178+
179+
@staticmethod
180+
def _build_license_map(decorated: Dict) -> Dict[str, List]:
181+
"""Build a purl@requirement -> licenses lookup from the dependency service response.
182+
183+
Args:
184+
decorated (Dict): The response from the dependency service containing
185+
decorated files with license information.
186+
187+
Returns:
188+
Dict[str, List]: A mapping of 'purl@requirement' keys to their
189+
corresponding list of license dictionaries.
190+
"""
191+
license_map = {}
192+
for dep_file in decorated.get('files', []):
193+
for dep in dep_file.get('dependencies', []):
194+
dep_purl = dep.get('purl', '')
195+
# Use 'requirement' instead of 'version' as the key because the service
196+
# may resolve a different version, but the requirement always matches what was sent.
197+
dep_requirement = dep.get('requirement', '')
198+
licenses = dep.get('licenses', [])
199+
if dep_purl and licenses:
200+
license_map[f'{dep_purl}@{dep_requirement}'] = licenses
201+
return license_map
202+
203+
@staticmethod
204+
def _inject_licenses(results: List[Dict], license_map: Dict[str, List]) -> None:
205+
"""Inject licenses from the lookup map into each component version entry.
206+
207+
Args:
208+
results (List[Dict]): The 'results' list from the HFH scan response.
209+
Each result contains components with version entries that will
210+
be mutated in place to include license data.
211+
license_map (Dict[str, List]): A mapping of 'purl@version' keys to
212+
their corresponding list of license dictionaries, as built by
213+
``_build_license_map``.
214+
"""
215+
for result in results:
216+
for component in result.get('components', []):
217+
purl = component.get('purl', '')
218+
for version_entry in component.get('versions', []):
219+
version = version_entry.get('version', '')
220+
version_entry['licenses'] = license_map.get(f'{purl}@{version}', [])
221+
131222
def scan(self) -> Optional[Dict]:
132223
"""
133224
Scan the provided directory using the folder hashing algorithm.
@@ -218,30 +309,34 @@ def _format_cyclonedx_output(self) -> str: # noqa: PLR0911
218309
if not best_match_component.get('versions'):
219310
self.base.print_stderr('ERROR: No versions found for best match component')
220311
return ''
221-
222312
best_match_version = best_match_component['versions'][0]
223313
purl = best_match_component['purl']
314+
version = best_match_version['version']
315+
licenses = best_match_version.get('licenses', [])
224316

225-
get_dependencies_json_request = {
226-
'files': [
317+
# Build scan_results from already-decorated HFH data
318+
scan_results = {
319+
f'{best_match_component["name"]}:{version}': [
227320
{
228-
'file': f'{best_match_component["name"]}:{best_match_version["version"]}',
229-
'purls': [{'purl': purl, 'requirement': best_match_version['version']}],
321+
'id': 'dependency',
322+
'dependencies': [
323+
{
324+
'purl': purl,
325+
'component': best_match_component.get('name', ''),
326+
'version': version,
327+
'licenses': licenses,
328+
}
329+
],
230330
}
231331
]
232332
}
233333

234334
get_vulnerabilities_json_request = {
235-
'components': [{'purl': purl, 'requirement': best_match_version['version']}],
335+
'components': [{'purl': purl, 'requirement': version}],
236336
}
237-
238-
decorated_scan_results = self.scanner.client.get_dependencies(get_dependencies_json_request)
239337
vulnerabilities = self.scanner.client.get_vulnerabilities_json(get_vulnerabilities_json_request)
240338

241339
cdx = CycloneDx(self.base.debug)
242-
scan_results = {}
243-
for f in decorated_scan_results['files']:
244-
scan_results[f['file']] = [f]
245340
success, cdx_output = cdx.produce_from_json(scan_results)
246341
if not success:
247342
error_msg = 'ERROR: Failed to produce CycloneDX output'
@@ -253,7 +348,7 @@ def _format_cyclonedx_output(self) -> str: # noqa: PLR0911
253348

254349
return json.dumps(cdx_output, indent=2)
255350
except Exception as e:
256-
self.base.print_stderr(f'ERROR: Failed to get license information: {e}')
351+
self.base.print_stderr(f'ERROR: Failed to produce CycloneDX output: {e}')
257352
return None
258353

259354
def _format_spdxlite_output(self) -> str:
@@ -414,6 +509,7 @@ def _build_file_match_entry(
414509
"""
415510
purl = component.get('purl', '')
416511
version = best_version.get('version', '')
512+
licenses = best_version.get('licenses', [])
417513

418514
url = purl2url.get_repo_url(purl) if purl else ''
419515
return {
@@ -431,7 +527,7 @@ def _build_file_match_entry(
431527
'source_hash': file_hash,
432528
'url_hash': '',
433529
'release_date': '',
434-
'licenses': [],
530+
'licenses': licenses,
435531
'lines': 'all',
436532
'oss_lines': 'all',
437533
'status': 'pending',

0 commit comments

Comments
 (0)