From 2b9dbe192dbc763bbc1fbe0083e2296b1d04ba69 Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Thu, 12 Jun 2025 15:51:19 +0530 Subject: [PATCH 01/19] Add support for storing top-level paths of the codebase Signed-off-by: Aayush Kumar --- scanpipe/pipes/rootfs.py | 7 +++++++ scanpipe/tests/test_pipelines.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/scanpipe/pipes/rootfs.py b/scanpipe/pipes/rootfs.py index 9c623491a7..144b8239d6 100644 --- a/scanpipe/pipes/rootfs.py +++ b/scanpipe/pipes/rootfs.py @@ -139,6 +139,13 @@ def get_res(parent, fname): rootfs_path=rootfs_path, ) + if with_dir: + rootfs_path = pipes.normalize_path("") + yield Resource( + location=location, + rootfs_path=rootfs_path, + ) + for top, dirs, files in os.walk(location): for f in files: yield get_res(parent=top, fname=f) diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index 0852bc8418..854c396f57 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -1209,7 +1209,7 @@ def test_scanpipe_rootfs_pipeline_integration(self): exitcode, out = pipeline.execute() self.assertEqual(0, exitcode, msg=out) - self.assertEqual(16, project1.codebaseresources.count()) + self.assertEqual(17, project1.codebaseresources.count()) self.assertEqual(2, project1.discoveredpackages.count()) self.assertEqual(0, project1.discovereddependencies.count()) From 4150c235d638416d296e7fbda0867258d8e250e5 Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Thu, 12 Jun 2025 18:36:59 +0530 Subject: [PATCH 02/19] Add `ancestor` field to CodebasResource to track parent path of a resource Signed-off-by: Aayush Kumar --- ...0073_codebaseresource_ancestor_and_more.py | 22 +++++++++++++++++++ scanpipe/models.py | 9 ++++++++ scanpipe/pipes/__init__.py | 2 ++ 3 files changed, 33 insertions(+) create mode 100644 scanpipe/migrations/0073_codebaseresource_ancestor_and_more.py diff --git a/scanpipe/migrations/0073_codebaseresource_ancestor_and_more.py b/scanpipe/migrations/0073_codebaseresource_ancestor_and_more.py new file mode 100644 index 0000000000..6d6952e6ee --- /dev/null +++ b/scanpipe/migrations/0073_codebaseresource_ancestor_and_more.py @@ -0,0 +1,22 @@ +# Generated by Django 5.1.9 on 2025-06-12 10:32 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('scanpipe', '0072_discovereddependency_uuid_unique'), + ] + + operations = [ + migrations.AddField( + model_name='codebaseresource', + name='ancestor', + field=models.CharField(blank=True, help_text="Path of the immediate parent directory of this resource. Its '.' for top-level resources.", max_length=2000, null=True), + ), + migrations.AddIndex( + model_name='codebaseresource', + index=models.Index(fields=['project', 'ancestor'], name='scanpipe_co_project_f1a160_idx'), + ), + ] diff --git a/scanpipe/models.py b/scanpipe/models.py index ac28627808..c8c92b8145 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2739,6 +2739,14 @@ class CodebaseResource( 'Eg.: "/usr/bin/bash" for a path of "tarball-extract/rootfs/usr/bin/bash"' ), ) + + ancestor = models.CharField( + max_length=2000, + null=True, + blank=True, + help_text="Path of the immediate parent directory of this resource. Its '.' for top-level resources.", + ) + status = models.CharField( blank=True, max_length=50, @@ -2832,6 +2840,7 @@ class Meta: models.Index(fields=["compliance_alert"]), models.Index(fields=["is_binary"]), models.Index(fields=["is_text"]), + models.Index(fields=["project", "ancestor"]), ] constraints = [ models.UniqueConstraint( diff --git a/scanpipe/pipes/__init__.py b/scanpipe/pipes/__init__.py index d3fe69f69a..7da1534f02 100644 --- a/scanpipe/pipes/__init__.py +++ b/scanpipe/pipes/__init__.py @@ -72,6 +72,7 @@ def make_codebase_resource(project, location, save=True, **extra_fields): from scanpipe.pipes import flag relative_path = Path(location).relative_to(project.codebase_path) + parent_path = str(relative_path.parent) try: resource_data = scancode.get_resource_info(location=str(location)) except OSError as error: @@ -92,6 +93,7 @@ def make_codebase_resource(project, location, save=True, **extra_fields): codebase_resource = CodebaseResource( project=project, path=relative_path, + ancestor=parent_path, **resource_data, ) From 0e5336b4b30e7764f6879f017b98841be9728b26 Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Fri, 13 Jun 2025 16:03:24 +0530 Subject: [PATCH 03/19] fix line too long error in scanpipe/models.py Signed-off-by: Aayush Kumar --- scanpipe/models.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scanpipe/models.py b/scanpipe/models.py index c8c92b8145..5a41731bcd 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2744,7 +2744,10 @@ class CodebaseResource( max_length=2000, null=True, blank=True, - help_text="Path of the immediate parent directory of this resource. Its '.' for top-level resources.", + help_text=_( + "Path of the immediate parent directory of a resource. " + "For top level resources the value is '.'" + ), ) status = models.CharField( From 41e03e740c00a9bb0482613341e4247cbce75351 Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Fri, 13 Jun 2025 17:15:02 +0530 Subject: [PATCH 04/19] update tests Signed-off-by: Aayush Kumar --- .../rootfs/basic-rootfs_root_filesystems.json | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/scanpipe/tests/data/rootfs/basic-rootfs_root_filesystems.json b/scanpipe/tests/data/rootfs/basic-rootfs_root_filesystems.json index 970d672007..b64c4115f9 100644 --- a/scanpipe/tests/data/rootfs/basic-rootfs_root_filesystems.json +++ b/scanpipe/tests/data/rootfs/basic-rootfs_root_filesystems.json @@ -340,6 +340,42 @@ ], "dependencies": [], "files": [ + { + "path": "basic-rootfs.tar.gz-extract", + "type": "directory", + "name": "basic-rootfs.tar.gz-extract", + "status": "scanned", + "for_packages": [], + "tag": "", + "extension": ".tar.gz-extract", + "programming_language": "", + "detected_license_expression": "", + "detected_license_expression_spdx": "", + "license_detections": [], + "license_clues": [], + "percentage_of_license_text": null, + "copyrights": [], + "holders": [], + "authors": [], + "package_data": [], + "emails": [], + "urls": [], + "md5": "", + "sha1": "", + "sha256": "", + "sha512": "", + "sha1_git": "", + "is_binary": false, + "is_text": false, + "is_archive": false, + "is_media": false, + "is_legal": false, + "is_manifest": false, + "is_readme": false, + "is_top_level": true, + "is_key_file": false, + "extra_data": {} + }, { "path": "basic-rootfs.tar.gz-extract/etc", "type": "directory", From 6aede5809a3b87f94fdabffdb0adaacd091858bb Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Sat, 14 Jun 2025 17:51:30 +0530 Subject: [PATCH 05/19] rename `ancestor` field to `parent_directory_path` Signed-off-by: Aayush Kumar --- ...0073_codebaseresource_ancestor_and_more.py | 22 ------------------- ...resource_parent_directory_path_and_more.py | 22 +++++++++++++++++++ scanpipe/models.py | 6 ++--- scanpipe/pipes/__init__.py | 6 ++++- 4 files changed, 30 insertions(+), 26 deletions(-) delete mode 100644 scanpipe/migrations/0073_codebaseresource_ancestor_and_more.py create mode 100644 scanpipe/migrations/0073_codebaseresource_parent_directory_path_and_more.py diff --git a/scanpipe/migrations/0073_codebaseresource_ancestor_and_more.py b/scanpipe/migrations/0073_codebaseresource_ancestor_and_more.py deleted file mode 100644 index 6d6952e6ee..0000000000 --- a/scanpipe/migrations/0073_codebaseresource_ancestor_and_more.py +++ /dev/null @@ -1,22 +0,0 @@ -# Generated by Django 5.1.9 on 2025-06-12 10:32 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('scanpipe', '0072_discovereddependency_uuid_unique'), - ] - - operations = [ - migrations.AddField( - model_name='codebaseresource', - name='ancestor', - field=models.CharField(blank=True, help_text="Path of the immediate parent directory of this resource. Its '.' for top-level resources.", max_length=2000, null=True), - ), - migrations.AddIndex( - model_name='codebaseresource', - index=models.Index(fields=['project', 'ancestor'], name='scanpipe_co_project_f1a160_idx'), - ), - ] diff --git a/scanpipe/migrations/0073_codebaseresource_parent_directory_path_and_more.py b/scanpipe/migrations/0073_codebaseresource_parent_directory_path_and_more.py new file mode 100644 index 0000000000..561a459c54 --- /dev/null +++ b/scanpipe/migrations/0073_codebaseresource_parent_directory_path_and_more.py @@ -0,0 +1,22 @@ +# Generated by Django 5.1.9 on 2025-06-14 10:11 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('scanpipe', '0072_discovereddependency_uuid_unique'), + ] + + operations = [ + migrations.AddField( + model_name='codebaseresource', + name='parent_directory_path', + field=models.CharField(blank=True, help_text='Path of the immediate parent directory of a resource. For top level resources the value is set to None', max_length=2000, null=True), + ), + migrations.AddIndex( + model_name='codebaseresource', + index=models.Index(fields=['project', 'parent_directory_path'], name='scanpipe_co_project_f4a24b_idx'), + ), + ] diff --git a/scanpipe/models.py b/scanpipe/models.py index 5a41731bcd..80dce7a71a 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2740,13 +2740,13 @@ class CodebaseResource( ), ) - ancestor = models.CharField( + parent_directory_path = models.CharField( max_length=2000, null=True, blank=True, help_text=_( "Path of the immediate parent directory of a resource. " - "For top level resources the value is '.'" + "For top level resources the value is set to None" ), ) @@ -2843,7 +2843,7 @@ class Meta: models.Index(fields=["compliance_alert"]), models.Index(fields=["is_binary"]), models.Index(fields=["is_text"]), - models.Index(fields=["project", "ancestor"]), + models.Index(fields=["project", "parent_directory_path"]), ] constraints = [ models.UniqueConstraint( diff --git a/scanpipe/pipes/__init__.py b/scanpipe/pipes/__init__.py index 7da1534f02..7ea8650cc1 100644 --- a/scanpipe/pipes/__init__.py +++ b/scanpipe/pipes/__init__.py @@ -73,6 +73,10 @@ def make_codebase_resource(project, location, save=True, **extra_fields): relative_path = Path(location).relative_to(project.codebase_path) parent_path = str(relative_path.parent) + + if parent_path == ".": + parent_path = None + try: resource_data = scancode.get_resource_info(location=str(location)) except OSError as error: @@ -93,7 +97,7 @@ def make_codebase_resource(project, location, save=True, **extra_fields): codebase_resource = CodebaseResource( project=project, path=relative_path, - ancestor=parent_path, + parent_directory_path=parent_path, **resource_data, ) From c355002b34465bac6cbe64d0620714f43d9b34ef Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Sat, 14 Jun 2025 23:59:01 +0530 Subject: [PATCH 06/19] add save() method to CodebaseResource to ensure `parent_directory_path` is always set Signed-off-by: Aayush Kumar --- scanpipe/models.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scanpipe/models.py b/scanpipe/models.py index 80dce7a71a..7ed00a58c1 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2856,6 +2856,11 @@ class Meta: def __str__(self): return self.path + def save(self, *args, **kwargs): + if self.path and not self.parent_directory_path: + self.parent_directory_path = parent_directory(str(self.path), with_trail=False) + super().save(*args, **kwargs) + def get_absolute_url(self): return reverse("resource_detail", args=[self.project.slug, self.path]) From 7df056c0d4d4a89bf4d60a1a57d32ca776426b9f Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Sun, 15 Jun 2025 00:23:59 +0530 Subject: [PATCH 07/19] add tests Signed-off-by: Aayush Kumar --- scanpipe/tests/test_models.py | 10 ++++++ scanpipe/tests/test_pipelines.py | 58 ++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/scanpipe/tests/test_models.py b/scanpipe/tests/test_models.py index ef3f08039e..75a1437ed3 100644 --- a/scanpipe/tests/test_models.py +++ b/scanpipe/tests/test_models.py @@ -1646,6 +1646,16 @@ def test_scanpipe_can_compute_compliance_alert_for_license_exceptions(self): resource.update(detected_license_expression=license_expression) self.assertEqual("warning", resource.compute_compliance_alert()) + def test_scanpipe_codebase_root_parent_directory_path(self): + resource1 = self.project1.codebaseresources.create(path="file") + + self.assertEqual("", resource1.parent_directory_path) + + def test_scanpipe_codebase_regular_parent_directory_path(self): + resource2 = self.project1.codebaseresources.create(path="dir1/dir2/file") + + self.assertEqual("dir1/dir2", resource2.parent_directory_path) + def test_scanpipe_scan_fields_model_mixin_methods(self): expected = [ "detected_license_expression", diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index 854c396f57..ff251d7092 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -863,6 +863,64 @@ def test_scanpipe_scan_codebase_pipeline_integration(self): expected_file = self.data / "scancode" / "is-npm-1.0.0_scan_codebase.json" self.assertPipelineResultEqual(expected_file, result_file) + def test_scanpipe_scan_codebase_creates_top_level_paths(self): + pipeline_name = "scan_codebase" + project1 = make_project() + + filename = "is-npm-1.0.0.tgz" + input_location = self.data / "scancode" / filename + project1.copy_input_from(input_location) + + run = project1.add_pipeline(pipeline_name) + pipeline = run.make_pipeline_instance() + + exitcode, out = pipeline.execute() + self.assertEqual(0, exitcode, msg=out) + + expected_top_level_paths = ["is-npm-1.0.0.tgz", "is-npm-1.0.0.tgz-extract"] + + top_level_resources = project1.codebaseresources.filter( + parent_directory_path=None + ) + top_level_paths = [res.path for res in top_level_resources] + + self.assertListEqual(top_level_paths, expected_top_level_paths) + + def test_scanpipe_scan_codebase_creates_parent_directory_path_field(self): + pipeline_name = "scan_codebase" + project1 = make_project() + + filename = "is-npm-1.0.0.tgz" + input_location = self.data / "scancode" / filename + project1.copy_input_from(input_location) + + run = project1.add_pipeline(pipeline_name) + pipeline = run.make_pipeline_instance() + + exitcode, out = pipeline.execute() + self.assertEqual(0, exitcode, msg=out) + + expected_top_level_paths = ["is-npm-1.0.0.tgz", "is-npm-1.0.0.tgz-extract"] + expected_nested_paths = [ + "is-npm-1.0.0.tgz-extract/package/index.js", + "is-npm-1.0.0.tgz-extract/package/package.json", + "is-npm-1.0.0.tgz-extract/package/readme.md", + ] + + top_level_resources = project1.codebaseresources.filter( + parent_directory_path=None + ) + top_level_paths = [res.path for res in top_level_resources] + + self.assertListEqual(top_level_paths, expected_top_level_paths) + + nested_resources = project1.codebaseresources.filter( + parent_directory_path="is-npm-1.0.0.tgz-extract/package" + ) + nested_paths = [res.path for res in nested_resources] + + self.assertListEqual(nested_paths, expected_nested_paths) + def test_scanpipe_inspect_packages_creates_packages_npm(self): pipeline_name = "inspect_packages" project1 = make_project() From 8dca295d72625f1b52a6ed169d4a144928364d46 Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Sun, 15 Jun 2025 00:29:18 +0530 Subject: [PATCH 08/19] fix code format Signed-off-by: Aayush Kumar --- scanpipe/models.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scanpipe/models.py b/scanpipe/models.py index 7ed00a58c1..ee54c1a05a 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2858,7 +2858,9 @@ def __str__(self): def save(self, *args, **kwargs): if self.path and not self.parent_directory_path: - self.parent_directory_path = parent_directory(str(self.path), with_trail=False) + self.parent_directory_path = parent_directory( + str(self.path), with_trail=False + ) super().save(*args, **kwargs) def get_absolute_url(self): From a10cf4136da7220b3f785fcbdafab066facf404b Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Tue, 17 Jun 2025 00:35:47 +0530 Subject: [PATCH 09/19] rename parent_directory_path field to parent_path Signed-off-by: Aayush Kumar --- ...e.py => 0073_codebaseresource_parent_path_and_more.py} | 6 +++--- scanpipe/models.py | 8 ++++---- scanpipe/pipes/__init__.py | 2 +- scanpipe/tests/test_models.py | 8 ++++---- scanpipe/tests/test_pipelines.py | 8 ++++---- 5 files changed, 16 insertions(+), 16 deletions(-) rename scanpipe/migrations/{0073_codebaseresource_parent_directory_path_and_more.py => 0073_codebaseresource_parent_path_and_more.py} (73%) diff --git a/scanpipe/migrations/0073_codebaseresource_parent_directory_path_and_more.py b/scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py similarity index 73% rename from scanpipe/migrations/0073_codebaseresource_parent_directory_path_and_more.py rename to scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py index 561a459c54..ee1f5dd4a7 100644 --- a/scanpipe/migrations/0073_codebaseresource_parent_directory_path_and_more.py +++ b/scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py @@ -1,4 +1,4 @@ -# Generated by Django 5.1.9 on 2025-06-14 10:11 +# Generated by Django 5.1.9 on 2025-06-16 17:42 from django.db import migrations, models @@ -12,11 +12,11 @@ class Migration(migrations.Migration): operations = [ migrations.AddField( model_name='codebaseresource', - name='parent_directory_path', + name='parent_path', field=models.CharField(blank=True, help_text='Path of the immediate parent directory of a resource. For top level resources the value is set to None', max_length=2000, null=True), ), migrations.AddIndex( model_name='codebaseresource', - index=models.Index(fields=['project', 'parent_directory_path'], name='scanpipe_co_project_f4a24b_idx'), + index=models.Index(fields=['project', 'parent_path'], name='scanpipe_co_project_008448_idx'), ), ] diff --git a/scanpipe/models.py b/scanpipe/models.py index ee54c1a05a..c61e02fb84 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2740,7 +2740,7 @@ class CodebaseResource( ), ) - parent_directory_path = models.CharField( + parent_path = models.CharField( max_length=2000, null=True, blank=True, @@ -2843,7 +2843,7 @@ class Meta: models.Index(fields=["compliance_alert"]), models.Index(fields=["is_binary"]), models.Index(fields=["is_text"]), - models.Index(fields=["project", "parent_directory_path"]), + models.Index(fields=["project", "parent_path"]), ] constraints = [ models.UniqueConstraint( @@ -2857,8 +2857,8 @@ def __str__(self): return self.path def save(self, *args, **kwargs): - if self.path and not self.parent_directory_path: - self.parent_directory_path = parent_directory( + if self.path and not self.parent_path: + self.parent_path = parent_directory( str(self.path), with_trail=False ) super().save(*args, **kwargs) diff --git a/scanpipe/pipes/__init__.py b/scanpipe/pipes/__init__.py index 7ea8650cc1..c48e642d14 100644 --- a/scanpipe/pipes/__init__.py +++ b/scanpipe/pipes/__init__.py @@ -97,7 +97,7 @@ def make_codebase_resource(project, location, save=True, **extra_fields): codebase_resource = CodebaseResource( project=project, path=relative_path, - parent_directory_path=parent_path, + parent_path=parent_path, **resource_data, ) diff --git a/scanpipe/tests/test_models.py b/scanpipe/tests/test_models.py index 75a1437ed3..503072cffb 100644 --- a/scanpipe/tests/test_models.py +++ b/scanpipe/tests/test_models.py @@ -1646,15 +1646,15 @@ def test_scanpipe_can_compute_compliance_alert_for_license_exceptions(self): resource.update(detected_license_expression=license_expression) self.assertEqual("warning", resource.compute_compliance_alert()) - def test_scanpipe_codebase_root_parent_directory_path(self): + def test_scanpipe_codebase_root_parent_path(self): resource1 = self.project1.codebaseresources.create(path="file") - self.assertEqual("", resource1.parent_directory_path) + self.assertEqual("", resource1.parent_path) - def test_scanpipe_codebase_regular_parent_directory_path(self): + def test_scanpipe_codebase_regular_parent_path(self): resource2 = self.project1.codebaseresources.create(path="dir1/dir2/file") - self.assertEqual("dir1/dir2", resource2.parent_directory_path) + self.assertEqual("dir1/dir2", resource2.parent_path) def test_scanpipe_scan_fields_model_mixin_methods(self): expected = [ diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index ff251d7092..ffc368ad5a 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -880,13 +880,13 @@ def test_scanpipe_scan_codebase_creates_top_level_paths(self): expected_top_level_paths = ["is-npm-1.0.0.tgz", "is-npm-1.0.0.tgz-extract"] top_level_resources = project1.codebaseresources.filter( - parent_directory_path=None + parent_path=None ) top_level_paths = [res.path for res in top_level_resources] self.assertListEqual(top_level_paths, expected_top_level_paths) - def test_scanpipe_scan_codebase_creates_parent_directory_path_field(self): + def test_scanpipe_scan_codebase_creates_parent_path_field(self): pipeline_name = "scan_codebase" project1 = make_project() @@ -908,14 +908,14 @@ def test_scanpipe_scan_codebase_creates_parent_directory_path_field(self): ] top_level_resources = project1.codebaseresources.filter( - parent_directory_path=None + parent_path=None ) top_level_paths = [res.path for res in top_level_resources] self.assertListEqual(top_level_paths, expected_top_level_paths) nested_resources = project1.codebaseresources.filter( - parent_directory_path="is-npm-1.0.0.tgz-extract/package" + parent_path="is-npm-1.0.0.tgz-extract/package" ) nested_paths = [res.path for res in nested_resources] From 3a57e1acc5d17eaa141cb043f455b00dd852234f Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Tue, 17 Jun 2025 00:38:49 +0530 Subject: [PATCH 10/19] fix code format Signed-off-by: Aayush Kumar --- scanpipe/models.py | 4 +--- scanpipe/tests/test_pipelines.py | 8 ++------ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/scanpipe/models.py b/scanpipe/models.py index c61e02fb84..c6c5b6b4e2 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2858,9 +2858,7 @@ def __str__(self): def save(self, *args, **kwargs): if self.path and not self.parent_path: - self.parent_path = parent_directory( - str(self.path), with_trail=False - ) + self.parent_path = parent_directory(str(self.path), with_trail=False) super().save(*args, **kwargs) def get_absolute_url(self): diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index ffc368ad5a..14cdc117a0 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -879,9 +879,7 @@ def test_scanpipe_scan_codebase_creates_top_level_paths(self): expected_top_level_paths = ["is-npm-1.0.0.tgz", "is-npm-1.0.0.tgz-extract"] - top_level_resources = project1.codebaseresources.filter( - parent_path=None - ) + top_level_resources = project1.codebaseresources.filter(parent_path=None) top_level_paths = [res.path for res in top_level_resources] self.assertListEqual(top_level_paths, expected_top_level_paths) @@ -907,9 +905,7 @@ def test_scanpipe_scan_codebase_creates_parent_path_field(self): "is-npm-1.0.0.tgz-extract/package/readme.md", ] - top_level_resources = project1.codebaseresources.filter( - parent_path=None - ) + top_level_resources = project1.codebaseresources.filter(parent_path=None) top_level_paths = [res.path for res in top_level_resources] self.assertListEqual(top_level_paths, expected_top_level_paths) From 0f4094c66df8511cdb649c89169512a5f49a9ef4 Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Tue, 17 Jun 2025 17:49:33 +0530 Subject: [PATCH 11/19] minor fixes and adjustments following review feedback Signed-off-by: Aayush Kumar --- .../0073_codebaseresource_parent_path_and_more.py | 2 +- scanpipe/models.py | 10 ++++++---- scanpipe/pipes/rootfs.py | 3 ++- scanpipe/tests/test_models.py | 2 +- scanpipe/tests/test_pipelines.py | 6 +++--- 5 files changed, 13 insertions(+), 10 deletions(-) diff --git a/scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py b/scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py index ee1f5dd4a7..b5bd8fc442 100644 --- a/scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py +++ b/scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py @@ -13,7 +13,7 @@ class Migration(migrations.Migration): migrations.AddField( model_name='codebaseresource', name='parent_path', - field=models.CharField(blank=True, help_text='Path of the immediate parent directory of a resource. For top level resources the value is set to None', max_length=2000, null=True), + field=models.CharField(blank=True, help_text='The path of the resource\'s parent directory. Set to None for top-level (root) resources. Used to efficiently retrieve a directory\'s contents.', max_length=2000, null=True), ), migrations.AddIndex( model_name='codebaseresource', diff --git a/scanpipe/models.py b/scanpipe/models.py index c6c5b6b4e2..7127a6fba9 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2745,8 +2745,9 @@ class CodebaseResource( null=True, blank=True, help_text=_( - "Path of the immediate parent directory of a resource. " - "For top level resources the value is set to None" + "The path of the resource's parent directory. " + "Set to None for top-level (root) resources. " + "Used to efficiently retrieve a directory's contents." ), ) @@ -2858,7 +2859,7 @@ def __str__(self): def save(self, *args, **kwargs): if self.path and not self.parent_path: - self.parent_path = parent_directory(str(self.path), with_trail=False) + self.parent_path = self.parent_directory() super().save(*args, **kwargs) def get_absolute_url(self): @@ -2931,7 +2932,8 @@ def get_path_segments_with_subpath(self): def parent_directory(self): """Return the parent path for this CodebaseResource or None.""" - return parent_directory(self.path, with_trail=False) + parent_path = parent_directory(str(self.path), with_trail=False) + return None if parent_path == "" else parent_path def has_parent(self): """ diff --git a/scanpipe/pipes/rootfs.py b/scanpipe/pipes/rootfs.py index 144b8239d6..95325d38d3 100644 --- a/scanpipe/pipes/rootfs.py +++ b/scanpipe/pipes/rootfs.py @@ -139,8 +139,9 @@ def get_res(parent, fname): rootfs_path=rootfs_path, ) + # Explicitly yields the root directory as a resource when `with_dir` is True if with_dir: - rootfs_path = pipes.normalize_path("") + rootfs_path = "/" yield Resource( location=location, rootfs_path=rootfs_path, diff --git a/scanpipe/tests/test_models.py b/scanpipe/tests/test_models.py index 503072cffb..479d78eb9c 100644 --- a/scanpipe/tests/test_models.py +++ b/scanpipe/tests/test_models.py @@ -1649,7 +1649,7 @@ def test_scanpipe_can_compute_compliance_alert_for_license_exceptions(self): def test_scanpipe_codebase_root_parent_path(self): resource1 = self.project1.codebaseresources.create(path="file") - self.assertEqual("", resource1.parent_path) + self.assertIsNone(resource1.parent_path) def test_scanpipe_codebase_regular_parent_path(self): resource2 = self.project1.codebaseresources.create(path="dir1/dir2/file") diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index 14cdc117a0..10643282c0 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -880,7 +880,7 @@ def test_scanpipe_scan_codebase_creates_top_level_paths(self): expected_top_level_paths = ["is-npm-1.0.0.tgz", "is-npm-1.0.0.tgz-extract"] top_level_resources = project1.codebaseresources.filter(parent_path=None) - top_level_paths = [res.path for res in top_level_resources] + top_level_paths = [resource.path for resource in top_level_resources] self.assertListEqual(top_level_paths, expected_top_level_paths) @@ -906,14 +906,14 @@ def test_scanpipe_scan_codebase_creates_parent_path_field(self): ] top_level_resources = project1.codebaseresources.filter(parent_path=None) - top_level_paths = [res.path for res in top_level_resources] + top_level_paths = [resource.path for resource in top_level_resources] self.assertListEqual(top_level_paths, expected_top_level_paths) nested_resources = project1.codebaseresources.filter( parent_path="is-npm-1.0.0.tgz-extract/package" ) - nested_paths = [res.path for res in nested_resources] + nested_paths = [resource.path for resource in nested_resources] self.assertListEqual(nested_paths, expected_nested_paths) From bd11786eee38c67bfc8a58ada9213dba749f303f Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Tue, 17 Jun 2025 18:26:14 +0530 Subject: [PATCH 12/19] Simplify return statement in `parent_directory` for better readability Signed-off-by: Aayush Kumar --- scanpipe/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scanpipe/models.py b/scanpipe/models.py index 7127a6fba9..5548e8ee5c 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2933,7 +2933,7 @@ def get_path_segments_with_subpath(self): def parent_directory(self): """Return the parent path for this CodebaseResource or None.""" parent_path = parent_directory(str(self.path), with_trail=False) - return None if parent_path == "" else parent_path + return parent_path or None def has_parent(self): """ From c8331dfcdac1cde80640206321fad01ecbca277e Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Tue, 8 Jul 2025 20:03:32 +0530 Subject: [PATCH 13/19] bump migration Signed-off-by: Aayush Kumar --- ...nd_more.py => 0074_codebaseresource_parent_path_and_more.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename scanpipe/migrations/{0073_codebaseresource_parent_path_and_more.py => 0074_codebaseresource_parent_path_and_more.py} (91%) diff --git a/scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py b/scanpipe/migrations/0074_codebaseresource_parent_path_and_more.py similarity index 91% rename from scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py rename to scanpipe/migrations/0074_codebaseresource_parent_path_and_more.py index b5bd8fc442..332bf3faaf 100644 --- a/scanpipe/migrations/0073_codebaseresource_parent_path_and_more.py +++ b/scanpipe/migrations/0074_codebaseresource_parent_path_and_more.py @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ('scanpipe', '0072_discovereddependency_uuid_unique'), + ('scanpipe', '0073_add_sha1_git_checksum'), ] operations = [ From a0258405873eb47da89a7ab983e22589a443acbe Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Wed, 9 Jul 2025 01:22:06 +0530 Subject: [PATCH 14/19] update parent_path to display root files on empty string instead of None to align with the code format Signed-off-by: Aayush Kumar --- .../migrations/0074_codebaseresource_parent_path_and_more.py | 2 +- scanpipe/models.py | 3 +-- scanpipe/pipes/__init__.py | 2 +- scanpipe/tests/test_models.py | 2 +- scanpipe/tests/test_pipelines.py | 4 ++-- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/scanpipe/migrations/0074_codebaseresource_parent_path_and_more.py b/scanpipe/migrations/0074_codebaseresource_parent_path_and_more.py index 332bf3faaf..efd41fe53a 100644 --- a/scanpipe/migrations/0074_codebaseresource_parent_path_and_more.py +++ b/scanpipe/migrations/0074_codebaseresource_parent_path_and_more.py @@ -13,7 +13,7 @@ class Migration(migrations.Migration): migrations.AddField( model_name='codebaseresource', name='parent_path', - field=models.CharField(blank=True, help_text='The path of the resource\'s parent directory. Set to None for top-level (root) resources. Used to efficiently retrieve a directory\'s contents.', max_length=2000, null=True), + field=models.CharField(blank=True, help_text='The path of the resource\'s parent directory. Set to None for top-level (root) resources. Used to efficiently retrieve a directory\'s contents.', max_length=2000), ), migrations.AddIndex( model_name='codebaseresource', diff --git a/scanpipe/models.py b/scanpipe/models.py index 5548e8ee5c..94d70628a3 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2742,7 +2742,6 @@ class CodebaseResource( parent_path = models.CharField( max_length=2000, - null=True, blank=True, help_text=_( "The path of the resource's parent directory. " @@ -2859,7 +2858,7 @@ def __str__(self): def save(self, *args, **kwargs): if self.path and not self.parent_path: - self.parent_path = self.parent_directory() + self.parent_path = self.parent_directory() or "" super().save(*args, **kwargs) def get_absolute_url(self): diff --git a/scanpipe/pipes/__init__.py b/scanpipe/pipes/__init__.py index c48e642d14..788a1ce828 100644 --- a/scanpipe/pipes/__init__.py +++ b/scanpipe/pipes/__init__.py @@ -75,7 +75,7 @@ def make_codebase_resource(project, location, save=True, **extra_fields): parent_path = str(relative_path.parent) if parent_path == ".": - parent_path = None + parent_path = "" try: resource_data = scancode.get_resource_info(location=str(location)) diff --git a/scanpipe/tests/test_models.py b/scanpipe/tests/test_models.py index 479d78eb9c..503072cffb 100644 --- a/scanpipe/tests/test_models.py +++ b/scanpipe/tests/test_models.py @@ -1649,7 +1649,7 @@ def test_scanpipe_can_compute_compliance_alert_for_license_exceptions(self): def test_scanpipe_codebase_root_parent_path(self): resource1 = self.project1.codebaseresources.create(path="file") - self.assertIsNone(resource1.parent_path) + self.assertEqual("", resource1.parent_path) def test_scanpipe_codebase_regular_parent_path(self): resource2 = self.project1.codebaseresources.create(path="dir1/dir2/file") diff --git a/scanpipe/tests/test_pipelines.py b/scanpipe/tests/test_pipelines.py index 10643282c0..40b567ba04 100644 --- a/scanpipe/tests/test_pipelines.py +++ b/scanpipe/tests/test_pipelines.py @@ -879,7 +879,7 @@ def test_scanpipe_scan_codebase_creates_top_level_paths(self): expected_top_level_paths = ["is-npm-1.0.0.tgz", "is-npm-1.0.0.tgz-extract"] - top_level_resources = project1.codebaseresources.filter(parent_path=None) + top_level_resources = project1.codebaseresources.filter(parent_path="") top_level_paths = [resource.path for resource in top_level_resources] self.assertListEqual(top_level_paths, expected_top_level_paths) @@ -905,7 +905,7 @@ def test_scanpipe_scan_codebase_creates_parent_path_field(self): "is-npm-1.0.0.tgz-extract/package/readme.md", ] - top_level_resources = project1.codebaseresources.filter(parent_path=None) + top_level_resources = project1.codebaseresources.filter(parent_path="") top_level_paths = [resource.path for resource in top_level_resources] self.assertListEqual(top_level_paths, expected_top_level_paths) From 8c6b229de208090efd6c4db16ce57d8497e5cb9a Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Tue, 15 Jul 2025 00:32:39 +0530 Subject: [PATCH 15/19] fix `scan_single_package` not giving corect `parent_path` Signed-off-by: Aayush Kumar --- scanpipe/models.py | 2 +- scanpipe/pipes/scancode.py | 7 +++++++ scanpipe/tests/pipes/test_scancode.py | 18 ++++++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/scanpipe/models.py b/scanpipe/models.py index 94d70628a3..50be1ed4e0 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -2745,7 +2745,7 @@ class CodebaseResource( blank=True, help_text=_( "The path of the resource's parent directory. " - "Set to None for top-level (root) resources. " + "Set to empty string for top-level (root) resources. " "Used to efficiently retrieve a directory's contents." ), ) diff --git a/scanpipe/pipes/scancode.py b/scanpipe/pipes/scancode.py index 8f169311d2..58af4ab9b3 100644 --- a/scanpipe/pipes/scancode.py +++ b/scanpipe/pipes/scancode.py @@ -916,6 +916,8 @@ def create_codebase_resources(project, scanned_codebase): # includes the "root". The `get_path` method is used instead. if field.name == "path": continue + if field.name == "parent_path": + continue value = getattr(scanned_resource, field.name, None) if value is not None: resource_data[field.name] = value @@ -924,6 +926,11 @@ def create_codebase_resources(project, scanned_codebase): resource_data["type"] = CodebaseResource.Type[resource_type] resource_path = scanned_resource.get_path(strip_root=True) + parent_path = str(Path(resource_path).parent) + if parent_path == ".": + parent_path = "" + resource_data["parent_path"] = parent_path + codebase_resource, _ = CodebaseResource.objects.get_or_create( project=project, path=resource_path, diff --git a/scanpipe/tests/pipes/test_scancode.py b/scanpipe/tests/pipes/test_scancode.py index 4904cb68f8..259d0619f3 100644 --- a/scanpipe/tests/pipes/test_scancode.py +++ b/scanpipe/tests/pipes/test_scancode.py @@ -723,3 +723,21 @@ def test_scanpipe_scancode_resolve_dependencies_no_requirements(self): resolved_dep = project1.discovereddependencies.get(name="bluebird") self.assertEqual(resolved_dep, dep_2) self.assertEqual(resolved_dep.resolved_to_package, pkg_1) + + def test_scanpipe_pipes_scancode_scan_single_package_correct_parent_path(self): + project1 = Project.objects.create(name="Analysis") + input_location = self.data / "scancode" / "is-npm-1.0.0.tgz" + project1.copy_input_from(input_location) + run = project1.add_pipeline("scan_single_package") + pipeline = run.make_pipeline_instance() + exitcode, out = pipeline.execute() + + self.assertEqual(0, exitcode, msg=out) + self.assertEqual(4, project1.codebaseresources.count()) + + root = project1.codebaseresources.get(path="package") + self.assertEqual("", root.parent_path) + self.assertNotEqual("codebase", root.parent_path) + + file1 = project1.codebaseresources.get(path="package/index.js") + self.assertEqual("package", file1.parent_path) \ No newline at end of file From 7be20a13b96da6b9bff66a62c5062c83d590ee26 Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Tue, 15 Jul 2025 00:35:13 +0530 Subject: [PATCH 16/19] fix code format Signed-off-by: Aayush Kumar --- scanpipe/tests/pipes/test_scancode.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scanpipe/tests/pipes/test_scancode.py b/scanpipe/tests/pipes/test_scancode.py index 259d0619f3..5f37f6e754 100644 --- a/scanpipe/tests/pipes/test_scancode.py +++ b/scanpipe/tests/pipes/test_scancode.py @@ -731,13 +731,13 @@ def test_scanpipe_pipes_scancode_scan_single_package_correct_parent_path(self): run = project1.add_pipeline("scan_single_package") pipeline = run.make_pipeline_instance() exitcode, out = pipeline.execute() - + self.assertEqual(0, exitcode, msg=out) self.assertEqual(4, project1.codebaseresources.count()) root = project1.codebaseresources.get(path="package") self.assertEqual("", root.parent_path) self.assertNotEqual("codebase", root.parent_path) - + file1 = project1.codebaseresources.get(path="package/index.js") - self.assertEqual("package", file1.parent_path) \ No newline at end of file + self.assertEqual("package", file1.parent_path) From 5b218df50df7212216d9509ad95150bd33d2f754 Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Mon, 21 Jul 2025 22:56:18 +0530 Subject: [PATCH 17/19] regen tests Signed-off-by: Aayush Kumar --- scanpipe/tests/data/rootfs/basic-rootfs_root_filesystems.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scanpipe/tests/data/rootfs/basic-rootfs_root_filesystems.json b/scanpipe/tests/data/rootfs/basic-rootfs_root_filesystems.json index b64c4115f9..1a4019bcda 100644 --- a/scanpipe/tests/data/rootfs/basic-rootfs_root_filesystems.json +++ b/scanpipe/tests/data/rootfs/basic-rootfs_root_filesystems.json @@ -372,7 +372,7 @@ "is_legal": false, "is_manifest": false, "is_readme": false, - "is_top_level": true, + "is_top_level": false, "is_key_file": false, "extra_data": {} }, From 111a90115d7fb30b6a1eacecec75d07f3fbefb8d Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Tue, 22 Jul 2025 00:17:18 +0530 Subject: [PATCH 18/19] bump migration Signed-off-by: Aayush Kumar --- ...nd_more.py => 0075_codebaseresource_parent_path_and_more.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename scanpipe/migrations/{0074_codebaseresource_parent_path_and_more.py => 0075_codebaseresource_parent_path_and_more.py} (92%) diff --git a/scanpipe/migrations/0074_codebaseresource_parent_path_and_more.py b/scanpipe/migrations/0075_codebaseresource_parent_path_and_more.py similarity index 92% rename from scanpipe/migrations/0074_codebaseresource_parent_path_and_more.py rename to scanpipe/migrations/0075_codebaseresource_parent_path_and_more.py index efd41fe53a..5891ef9878 100644 --- a/scanpipe/migrations/0074_codebaseresource_parent_path_and_more.py +++ b/scanpipe/migrations/0075_codebaseresource_parent_path_and_more.py @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ('scanpipe', '0073_add_sha1_git_checksum'), + ('scanpipe', '0074_discovered_license_models'), ] operations = [ From 16f9b4c8245b0dd7a1ad8436e09642927398cd24 Mon Sep 17 00:00:00 2001 From: Aayush Kumar Date: Thu, 24 Jul 2025 01:43:40 +0530 Subject: [PATCH 19/19] make create_codebase_resource function less complex Signed-off-by: Aayush Kumar --- scanpipe/pipes/scancode.py | 139 +++++++++++++++++++------------------ 1 file changed, 73 insertions(+), 66 deletions(-) diff --git a/scanpipe/pipes/scancode.py b/scanpipe/pipes/scancode.py index 58af4ab9b3..da0a729d35 100644 --- a/scanpipe/pipes/scancode.py +++ b/scanpipe/pipes/scancode.py @@ -900,53 +900,72 @@ def get_virtual_codebase(project, input_location): return VirtualCodebase(input_location, temp_dir=str(temp_path), max_in_memory=0) -def create_codebase_resources(project, scanned_codebase): - """ - Save the resources of a ScanCode `scanned_codebase` scancode.resource.Codebase - object to the database as a CodebaseResource of the `project`. - This function can be used to expend an existing `project` Codebase with new - CodebaseResource objects as the existing objects (based on the `path`) will be - skipped. - """ - for scanned_resource in scanned_codebase.walk(skip_root=True): - resource_data = {} - - for field in CodebaseResource._meta.fields: - # Do not include the path as provided by the scanned_resource since it - # includes the "root". The `get_path` method is used instead. - if field.name == "path": - continue - if field.name == "parent_path": - continue - value = getattr(scanned_resource, field.name, None) - if value is not None: - resource_data[field.name] = value - - resource_type = "FILE" if scanned_resource.is_file else "DIRECTORY" - resource_data["type"] = CodebaseResource.Type[resource_type] - resource_path = scanned_resource.get_path(strip_root=True) - - parent_path = str(Path(resource_path).parent) - if parent_path == ".": - parent_path = "" - resource_data["parent_path"] = parent_path - - codebase_resource, _ = CodebaseResource.objects.get_or_create( +def create_codebase_resource(project, scanned_resource): + """Create a CodebaseResource entry from ScanCode scanned data.""" + resource_data = {} + + for field in CodebaseResource._meta.fields: + # Do not include the path as provided by the scanned_resource since it + # includes the "root". The `get_path` method is used instead. + if field.name in ["path", "parent_path"]: + continue + value = getattr(scanned_resource, field.name, None) + if value is not None: + resource_data[field.name] = value + + resource_type = "FILE" if scanned_resource.is_file else "DIRECTORY" + resource_data["type"] = CodebaseResource.Type[resource_type] + resource_path = scanned_resource.get_path(strip_root=True) + + parent_path = str(Path(resource_path).parent) + if parent_path == ".": + parent_path = "" + resource_data["parent_path"] = parent_path + + codebase_resource, _ = CodebaseResource.objects.get_or_create( + project=project, + path=resource_path, + defaults=resource_data, + ) + + # Handle package assignments + for_packages = getattr(scanned_resource, "for_packages", []) + for package_uid in for_packages: + logger.debug(f"Assign {package_uid} to {codebase_resource}") + package = project.discoveredpackages.get(package_uid=package_uid) + set_codebase_resource_for_package( + codebase_resource=codebase_resource, + discovered_package=package, + ) + + # Handle license detections + license_detections = getattr(scanned_resource, "license_detections", []) + for detection_data in license_detections: + detection_identifier = detection_data.get("identifier") + pipes.update_or_create_license_detection( project=project, - path=resource_path, - defaults=resource_data, + detection_data=detection_data, + resource_path=resource_path, + count_detection=False, ) + logger.debug(f"Add {codebase_resource} to {detection_identifier}") - for_packages = getattr(scanned_resource, "for_packages", []) - for package_uid in for_packages: - logger.debug(f"Assign {package_uid} to {codebase_resource}") - package = project.discoveredpackages.get(package_uid=package_uid) - set_codebase_resource_for_package( - codebase_resource=codebase_resource, - discovered_package=package, - ) + # Handle license clues + license_clues = getattr(scanned_resource, "license_clues", []) + for clue_data in license_clues: + pipes.update_or_create_license_detection( + project=project, + detection_data=clue_data, + resource_path=resource_path, + is_license_clue=True, + ) + logger.debug(f"Add license clue at {codebase_resource}") - license_detections = getattr(scanned_resource, "license_detections", []) + # Handle package data + packages = getattr(scanned_resource, "package_data", []) + for package_data in packages: + license_detections = package_data.get("license_detections", []) + license_detections.extend(package_data.get("other_license_detections", [])) for detection_data in license_detections: detection_identifier = detection_data.get("identifier") pipes.update_or_create_license_detection( @@ -954,33 +973,21 @@ def create_codebase_resources(project, scanned_codebase): detection_data=detection_data, resource_path=resource_path, count_detection=False, + from_package=True, ) logger.debug(f"Add {codebase_resource} to {detection_identifier}") - license_clues = getattr(scanned_resource, "license_clues", []) - for clue_data in license_clues: - pipes.update_or_create_license_detection( - project=project, - detection_data=clue_data, - resource_path=resource_path, - is_license_clue=True, - ) - logger.debug(f"Add license clue at {codebase_resource}") - packages = getattr(scanned_resource, "package_data", []) - for package_data in packages: - license_detections = package_data.get("license_detections", []) - license_detections.extend(package_data.get("other_license_detections", [])) - for detection_data in license_detections: - detection_identifier = detection_data.get("identifier") - pipes.update_or_create_license_detection( - project=project, - detection_data=detection_data, - resource_path=resource_path, - count_detection=False, - from_package=True, - ) - logger.debug(f"Add {codebase_resource} to {detection_identifier}") +def create_codebase_resources(project, scanned_codebase): + """ + Save the resources of a ScanCode `scanned_codebase` scancode.resource.Codebase + object to the database as a CodebaseResource of the `project`. + This function can be used to expend an existing `project` Codebase with new + CodebaseResource objects as the existing objects (based on the `path`) will be + skipped. + """ + for scanned_resource in scanned_codebase.walk(skip_root=True): + create_codebase_resource(project, scanned_resource) def create_discovered_packages(project, scanned_codebase):