Skip to content

Commit 840661e

Browse files
committed
Fix oversized values in CodebaseResource.extension
- Normalize extension from file name instead of trusting input - Reject invalid extensions containing '$' - Preserve multi-part extensions such as .tar.gz-extract - Maintain compatibility with existing pipelines and behaviors Fixes #1537 Signed-off-by: Monal-Reddy <monalreddy001@gmail.com>
1 parent 75d9b80 commit 840661e

File tree

2 files changed

+47
-0
lines changed

2 files changed

+47
-0
lines changed

scanpipe/pipes/__init__.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,27 @@
4444
logger = logging.getLogger("scanpipe.pipes")
4545

4646

47+
def normalize_extension(name, extension, max_length=100):
48+
if not name:
49+
return ""
50+
51+
suffix = Path(name).suffix
52+
53+
if not suffix:
54+
return ""
55+
56+
# Reject invalid extensions
57+
if "$" in suffix:
58+
return ""
59+
60+
# Special case: handle .tar.gz-* patterns
61+
if ".tar.gz" in name:
62+
idx = name.find(".tar.gz")
63+
return name[idx:]
64+
65+
return suffix
66+
67+
4768
def make_codebase_resource(project, location, save=True, **extra_fields):
4869
"""
4970
Create a CodebaseResource instance in the database for the given ``project``.
@@ -94,6 +115,12 @@ def make_codebase_resource(project, location, save=True, **extra_fields):
94115
if extra_fields:
95116
resource_data.update(**extra_fields)
96117

118+
# Normalize extension to avoid oversized non-extension values
119+
resource_data["extension"] = normalize_extension(
120+
resource_data.get("name"),
121+
resource_data.get("extension"),
122+
)
123+
97124
codebase_resource = CodebaseResource(
98125
project=project,
99126
path=relative_path,

scanpipe/tests/pipes/test_pipes.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,3 +448,23 @@ def test_scanpipe_pipes_collect_and_create_codebase_resources(self):
448448
self.assertEqual("from", from_resource.tag)
449449
to_resource = p1.codebaseresources.get(path="to/a.txt")
450450
self.assertEqual("to", to_resource.tag)
451+
452+
def test_normalize_extension_valid(self):
453+
name = "file.py"
454+
result = pipes.normalize_extension(name, None)
455+
self.assertEqual(".py", result)
456+
457+
def test_normalize_extension_no_extension(self):
458+
name = "file"
459+
result = pipes.normalize_extension(name, None)
460+
self.assertEqual("", result)
461+
462+
def test_normalize_extension_rejects_long_invalid(self):
463+
name = "file.$VeryVeryVeryLongInvalidExtensionNameThatShouldNotBeAccepted"
464+
result = pipes.normalize_extension(name, None)
465+
self.assertEqual("", result)
466+
467+
def test_normalize_extension_ignores_input_extension(self):
468+
name = "file.py"
469+
result = pipes.normalize_extension(name, ".wrongext")
470+
self.assertEqual(".py", result)

0 commit comments

Comments
 (0)