From f49cb1910957bd53d32979edc7340ea29f64f18c Mon Sep 17 00:00:00 2001 From: Juanje Mendoza Date: Thu, 26 Mar 2026 16:10:17 +0100 Subject: [PATCH] Fix over-merged values in canonicalization. Detected by Anas. --- src/somef/export/json_export.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/somef/export/json_export.py b/src/somef/export/json_export.py index 1b2beddf..8f1aa7f2 100644 --- a/src/somef/export/json_export.py +++ b/src/somef/export/json_export.py @@ -722,6 +722,10 @@ def canonicalize_value(value, value_type): clean_path = path return urlunparse((parsed.scheme, parsed.netloc, clean_path, '', '', '')) + domains_to_keep_path = ['github.com', 'api.github.com', 'gitlab.com', 'bitbucket.org'] + if any(d in parsed.netloc for d in domains_to_keep_path): + return urlunparse((parsed.scheme, parsed.netloc, path, '', '', '')) + # It's a directory/page → unify to domain return f"{parsed.scheme}://{parsed.netloc}" @@ -776,6 +780,7 @@ def unify_results(repo_data: dict) -> dict: seen = {} for item in items: + result = item.get(constants.PROP_RESULT, {}) normalized_type = normalize_type(result) result[constants.PROP_TYPE] = normalized_type @@ -785,7 +790,6 @@ def unify_results(repo_data: dict) -> dict: canonical = canonicalize_value(value, value_type) key = str(canonical) - if key in seen: existing = seen[key]