From 11adbe3b72e8330962b28e4c53334ce4f812bedf Mon Sep 17 00:00:00 2001 From: tdruez Date: Mon, 16 Jun 2025 13:56:51 +0400 Subject: [PATCH 1/2] Display matched snippets details in "Resource viewer" #1688 Signed-off-by: tdruez --- CHANGELOG.rst | 4 +++ scanpipe/tests/test_views.py | 12 ++++++++- scanpipe/views.py | 47 +++++++++++++++++++++++------------- 3 files changed, 45 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 538c624ae2..b0c589fb38 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,6 +4,10 @@ Changelog v34.12.0 (unreleased) --------------------- +- Display matched snippets details in "Resource viewer", including the package, + resource, and similarity values. + https://github.com/aboutcode-org/scancode.io/issues/1688 + - Add filtering by label and pipeline in the ``flush-projects`` management command. Also, a new ``--dry-run`` option is available to test the filters before applying the deletion. diff --git a/scanpipe/tests/test_views.py b/scanpipe/tests/test_views.py index 57e43ddce4..5a1f6a158c 100644 --- a/scanpipe/tests/test_views.py +++ b/scanpipe/tests/test_views.py @@ -1339,7 +1339,17 @@ def test_scanpipe_views_codebase_resource_details_get_matched_snippet_annotation resource1.save() resource1.refresh_from_db() results = CodebaseResourceDetailsView.get_matched_snippet_annotations(resource1) - expected_results = [{"start_line": 1, "end_line": 6}] + expected_results = [ + { + "start_line": 1, + "end_line": 6, + "text": ( + "package: pkg:github/isaacs/inherits@v2.0.3\n" + "resource: inherits-2.0.3/inherits.js\n" + "similarity: 1.0\n" + ), + } + ] self.assertEqual(expected_results, results) def test_project_packages_export_json(self): diff --git a/scanpipe/views.py b/scanpipe/views.py index 201ad7792b..ddaf8b18ab 100644 --- a/scanpipe/views.py +++ b/scanpipe/views.py @@ -2006,24 +2006,33 @@ def get_license_annotations(self, field_name): @staticmethod def get_matched_snippet_annotations(resource): - # convert qspan from list of ints to Spans - matched_snippet_annotations = [] matched_snippets = resource.extra_data.get("matched_snippets") - if matched_snippets: - line_by_pos = resource.extra_data.get("line_by_pos") - for matched_snippet in matched_snippets: - match_detections = matched_snippet["match_detections"] - qspan = Span(match_detections) - for span in qspan.subspans(): - # line_by_pos is stored as JSON and keys in JSON are always - # strings - matched_snippet_annotations.append( - { - "start_line": line_by_pos[str(span.start)], - "end_line": line_by_pos[str(span.end)], - } - ) - return matched_snippet_annotations + line_by_pos = resource.extra_data.get("line_by_pos") + if not matched_snippets: + return [] + + snippet_annotations = [] + for snippet in matched_snippets: + package = snippet.get("package", "") + resource = snippet.get("resource", "") + similarity = snippet.get("similarity", 0) + text = ( + f"package: {package}\nresource: {resource}\nsimilarity: {similarity}\n" + ) + + # convert qspan from list of ints to Spans + qspan = Span(snippet["match_detections"]) + for span in qspan.subspans(): + # line_by_pos is stored as JSON and keys in JSON are always strings + snippet_annotations.append( + { + "start_line": line_by_pos[str(span.start)], + "end_line": line_by_pos[str(span.end)], + "text": text, + } + ) + + return snippet_annotations def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) @@ -2055,6 +2064,10 @@ def get_context_data(self, **kwargs): matched_snippet_annotations = self.get_matched_snippet_annotations(resource) context["detected_values"]["matched snippets"] = matched_snippet_annotations + from pprint import pprint + + pprint(context["detected_values"]) + return context From cfb0f5a290a63a6df0a8b9a5ac7424396116c4c9 Mon Sep 17 00:00:00 2001 From: tdruez Date: Mon, 16 Jun 2025 13:59:14 +0400 Subject: [PATCH 2/2] Remove print statements used for debugging #1688 Signed-off-by: tdruez --- scanpipe/views.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scanpipe/views.py b/scanpipe/views.py index ddaf8b18ab..ea66581643 100644 --- a/scanpipe/views.py +++ b/scanpipe/views.py @@ -2064,10 +2064,6 @@ def get_context_data(self, **kwargs): matched_snippet_annotations = self.get_matched_snippet_annotations(resource) context["detected_values"]["matched snippets"] = matched_snippet_annotations - from pprint import pprint - - pprint(context["detected_values"]) - return context