11import re
2- from pathlib import Path , PurePosixPath
2+ from pathlib import PurePosixPath
33from urllib .parse import urlparse
44
55import requests
6- from django .conf import settings
7- from django .core .files import File
6+ from django .core .files .base import ContentFile
87from django .core .management .base import BaseCommand
98
10- from apps .pages .models import Image , Page , page_image_path
9+ from apps .pages .models import Image , Page
1110
1211HTTP_OK = 200
1312
@@ -18,14 +17,6 @@ class Command(BaseCommand):
1817 def get_success_pages (self ):
1918 return Page .objects .filter (path__startswith = "about/success/" )
2019
21- def image_url (self , path ):
22- """
23- Given a full filesystem path to an image, return the proper media
24- url for it
25- """
26- new_url = path .replace (settings .MEDIA_ROOT , settings .MEDIA_URL )
27- return new_url .replace ("//" , "/" )
28-
2920 def fix_image (self , path , page ):
3021 url = f"http://legacy.python.org{ path } "
3122 # Retrieve the image
@@ -34,34 +25,18 @@ def fix_image(self, path, page):
3425 if r .status_code != HTTP_OK :
3526 return None
3627
37- # Create new associated image and generate ultimate path
38- img = Image ()
39- img .page = page
40-
41- # Sanitize filename from URL to prevent path traversal
42- parsed_name = PurePosixPath (urlparse (url ).path ).name
43- filename = Path (parsed_name ).name # strip any remaining path components
44- output_path = page_image_path (img , filename )
45-
46- # Ensure output stays within MEDIA_ROOT
47- resolved = Path (output_path ).resolve ()
48- media_root = Path (settings .MEDIA_ROOT ).resolve ()
49- if not str (resolved ).startswith (str (media_root )):
28+ # Extract and validate filename (alphanumeric, hyphens, dots only)
29+ raw_name = PurePosixPath (urlparse (url ).path ).name
30+ filename = re .sub (r"[^\w.\-]" , "_" , raw_name )
31+ if not filename or filename .startswith ("." ):
5032 return None
5133
52- # Make sure our directories exist
53- resolved .parent .mkdir (parents = True , exist_ok = True )
54-
55- # Write image data to our location
56- with resolved .open ("wb" ) as f :
57- f .write (r .content )
58-
59- # Re-open the image as a Django File object
60- with resolved .open ("rb" ) as reopen :
61- new_file = File (reopen )
62- img .image .save (filename , new_file , save = True )
34+ # Use Django's storage API to safely write the file
35+ img = Image ()
36+ img .page = page
37+ img .image .save (filename , ContentFile (r .content ), save = True )
6338
64- return self . image_url ( output_path )
39+ return img . image . url
6540
6641 def find_image_paths (self , page ):
6742 content = page .content .raw
0 commit comments