Skip to content

Commit b59161e

Browse files
JacobCoffeeclaude
andcommitted
Address CodeQL review comments on fix_success_story_images
Use Django storage API (ContentFile + ImageField.save) instead of raw file I/O to eliminate path injection taint chain. Dismiss remaining alerts as false positives — data originates from server-rendered Django templates, not user input. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent b61b5e4 commit b59161e

1 file changed

Lines changed: 12 additions & 37 deletions

File tree

apps/pages/management/commands/fix_success_story_images.py

Lines changed: 12 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
import re
2-
from pathlib import Path, PurePosixPath
2+
from pathlib import PurePosixPath
33
from urllib.parse import urlparse
44

55
import requests
6-
from django.conf import settings
7-
from django.core.files import File
6+
from django.core.files.base import ContentFile
87
from django.core.management.base import BaseCommand
98

10-
from apps.pages.models import Image, Page, page_image_path
9+
from apps.pages.models import Image, Page
1110

1211
HTTP_OK = 200
1312

@@ -18,14 +17,6 @@ class Command(BaseCommand):
1817
def get_success_pages(self):
1918
return Page.objects.filter(path__startswith="about/success/")
2019

21-
def image_url(self, path):
22-
"""
23-
Given a full filesystem path to an image, return the proper media
24-
url for it
25-
"""
26-
new_url = path.replace(settings.MEDIA_ROOT, settings.MEDIA_URL)
27-
return new_url.replace("//", "/")
28-
2920
def fix_image(self, path, page):
3021
url = f"http://legacy.python.org{path}"
3122
# Retrieve the image
@@ -34,34 +25,18 @@ def fix_image(self, path, page):
3425
if r.status_code != HTTP_OK:
3526
return None
3627

37-
# Create new associated image and generate ultimate path
38-
img = Image()
39-
img.page = page
40-
41-
# Sanitize filename from URL to prevent path traversal
42-
parsed_name = PurePosixPath(urlparse(url).path).name
43-
filename = Path(parsed_name).name # strip any remaining path components
44-
output_path = page_image_path(img, filename)
45-
46-
# Ensure output stays within MEDIA_ROOT
47-
resolved = Path(output_path).resolve()
48-
media_root = Path(settings.MEDIA_ROOT).resolve()
49-
if not str(resolved).startswith(str(media_root)):
28+
# Extract and validate filename (alphanumeric, hyphens, dots only)
29+
raw_name = PurePosixPath(urlparse(url).path).name
30+
filename = re.sub(r"[^\w.\-]", "_", raw_name)
31+
if not filename or filename.startswith("."):
5032
return None
5133

52-
# Make sure our directories exist
53-
resolved.parent.mkdir(parents=True, exist_ok=True)
54-
55-
# Write image data to our location
56-
with resolved.open("wb") as f:
57-
f.write(r.content)
58-
59-
# Re-open the image as a Django File object
60-
with resolved.open("rb") as reopen:
61-
new_file = File(reopen)
62-
img.image.save(filename, new_file, save=True)
34+
# Use Django's storage API to safely write the file
35+
img = Image()
36+
img.page = page
37+
img.image.save(filename, ContentFile(r.content), save=True)
6338

64-
return self.image_url(output_path)
39+
return img.image.url
6540

6641
def find_image_paths(self, page):
6742
content = page.content.raw

0 commit comments

Comments
 (0)