From 53f8c41bc3a6ea23d4608a2e21783bdf001980ff Mon Sep 17 00:00:00 2001 From: Wes Appler Date: Thu, 2 Oct 2025 12:54:19 -0400 Subject: [PATCH 1/7] Initial implementation for deleting application attachements --- hypha/apply/activity/adapters/emails.py | 2 - hypha/apply/funds/apps.py | 7 ++ .../0131_delete_orphaned_attachments.py | 72 +++++++++++++++++++ hypha/apply/funds/models/mixins.py | 1 - hypha/apply/funds/signals.py | 29 ++++++++ hypha/apply/funds/views/submission_edit.py | 1 + hypha/apply/stream_forms/files.py | 2 +- hypha/apply/stream_forms/testing/factories.py | 56 ++++++++++++++- 8 files changed, 165 insertions(+), 5 deletions(-) create mode 100644 hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py create mode 100644 hypha/apply/funds/signals.py diff --git a/hypha/apply/activity/adapters/emails.py b/hypha/apply/activity/adapters/emails.py index 2e9de9cff3..ca24a99980 100644 --- a/hypha/apply/activity/adapters/emails.py +++ b/hypha/apply/activity/adapters/emails.py @@ -175,8 +175,6 @@ def handle_transition(self, new_phase, source, old_phase=None, **kwargs): old_index = list(dict(PHASES).keys()).index(old_phase.name) target_index = list(dict(PHASES).keys()).index(submission.status) is_forward = old_index < target_index - print("NEW PHASE") - print(new_phase.public_name) kwargs["old_phase"] = old_phase.public_name kwargs["new_phase"] = new_phase.public_name diff --git a/hypha/apply/funds/apps.py b/hypha/apply/funds/apps.py index 19ff626a16..ab928d56c5 100644 --- a/hypha/apply/funds/apps.py +++ b/hypha/apply/funds/apps.py @@ -1,5 +1,12 @@ from django.apps import AppConfig +from django.core.signals import request_finished class ApplyConfig(AppConfig): name = "hypha.apply.funds" + + def ready(self): + # Connect the attachment deletion handler + from . import signals + + request_finished.connect(signals.delete_attachments) diff --git a/hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py b/hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py new file mode 100644 index 0000000000..88375e7364 --- /dev/null +++ b/hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py @@ -0,0 +1,72 @@ +# Generated by Django 4.2.24 on 2025-10-01 15:39 + +from django.db import migrations +from django.core.files.storage import default_storage +import os + + +def delete_directory(directory_path): + """Delete a full directory (empty or not)""" + + directories, files = default_storage.listdir(directory_path) + + for item in directories: + item_path = os.path.join(directory_path, item) + if default_storage.exists(item_path): + # Recursively delete subdirectories + delete_directory(item_path) + + for item in files: + item_path = os.path.join(directory_path, item) + if default_storage.exists(item_path): + # Delete files + default_storage.delete(item_path) + + if default_storage.exists(directory_path): + # Delete the empty directory + default_storage.delete(directory_path) + + +def delete_orphaned_attachments(apps, schema_editor): + """Remove all attachments not associated with an application""" + + ApplicationSubmission = apps.get_model("funds", "ApplicationSubmission") + + submission_attachment_path = f"{default_storage.base_location}/submission" + + folders_to_delete = [] + folders_to_check = [] + + for folder in default_storage.listdir(submission_attachment_path)[ + 0 + ]: # `listdir` returns ([folders], [files]) + try: + folders_to_check.append(int(folder)) + except ValueError: + # Folder name is not an int, therefore not a submission ID and can be deleted (an edge case) + folders_to_delete.append(folder) + + # Get a list of all undeleted submissions that have a folder + valid_ids = set( + ApplicationSubmission.objects.filter(id__in=folders_to_check).values_list( + "id", flat=True + ) + ) + + # Find the set difference and delete those folders + folders_to_delete += list(set(folders_to_check) - valid_ids) + + for folder in folders_to_delete: + # try: + delete_directory(f"{submission_attachment_path}/{folder}") + # except FileNotFoundError: + # # Will get thrown when unit tests attempt to run migrations + # pass + + +class Migration(migrations.Migration): + dependencies = [ + ("funds", "0130_alter_applicationsubmission_status"), + ] + + operations = [migrations.RunPython(delete_orphaned_attachments)] diff --git a/hypha/apply/funds/models/mixins.py b/hypha/apply/funds/models/mixins.py index 4aa5255440..a6c7e8429e 100644 --- a/hypha/apply/funds/models/mixins.py +++ b/hypha/apply/funds/models/mixins.py @@ -161,7 +161,6 @@ def extract_files(self): for field in self.form_fields: if isinstance(field.block, UploadableMediaBlock): files[field.id] = self.data(field.id) or [] - self.form_data.pop(field.id, None) return files @classmethod diff --git a/hypha/apply/funds/signals.py b/hypha/apply/funds/signals.py new file mode 100644 index 0000000000..e70141222b --- /dev/null +++ b/hypha/apply/funds/signals.py @@ -0,0 +1,29 @@ +from django.db.models.signals import pre_delete +from django.dispatch import receiver + +from hypha.apply.funds.models.application_revisions import ApplicationRevision +from hypha.apply.funds.models.mixins import AccessFormData +from hypha.apply.funds.models.submissions import ApplicationSubmission + + +@receiver(signal=pre_delete, sender=ApplicationSubmission) +@receiver(signal=pre_delete, sender=ApplicationRevision) +def delete_attachments(sender, instance=None, **kwargs): + """ + Before the deletion of any sub class of AccessFormData, ensure the files associated with it are deleted too. + + This can include things like ApplicationSubmission & ApplicationRevision objects + """ + # This will be called anytime a deletion is ran, so ensure the object being deleted + # can have attachments + if ( + sender + and issubclass(sender, AccessFormData) + and isinstance(instance, AccessFormData) + ): + files = instance.extract_files() + for value in files.values(): + if not isinstance(value, list): # Single file field + value.delete() + else: # Multiple file fields + [sub_file.delete() for sub_file in value] diff --git a/hypha/apply/funds/views/submission_edit.py b/hypha/apply/funds/views/submission_edit.py index 241b40c18e..10da360918 100644 --- a/hypha/apply/funds/views/submission_edit.py +++ b/hypha/apply/funds/views/submission_edit.py @@ -253,6 +253,7 @@ def get_placeholder_file(self, initial_file): return [ PlaceholderUploadedFile(f.filename, size=f.size, file_id=f.name) for f in initial_file + if f ] def save_draft_and_refresh_page(self, form) -> HttpResponseRedirect: diff --git a/hypha/apply/stream_forms/files.py b/hypha/apply/stream_forms/files.py index 45b2f99e88..b68bd65a89 100644 --- a/hypha/apply/stream_forms/files.py +++ b/hypha/apply/stream_forms/files.py @@ -7,7 +7,7 @@ class StreamFieldDataEncoder(DjangoJSONEncoder): def default(self, o): - if isinstance(o, StreamFieldFile): + if isinstance(o, File): return { "name": o.name, "filename": o.filename, diff --git a/hypha/apply/stream_forms/testing/factories.py b/hypha/apply/stream_forms/testing/factories.py index de3258b839..c42a7042b3 100644 --- a/hypha/apply/stream_forms/testing/factories.py +++ b/hypha/apply/stream_forms/testing/factories.py @@ -1,9 +1,11 @@ import json +import random import uuid from collections import defaultdict import factory import wagtail_factories +from django.core.files.base import File from django.core.files.uploadedfile import SimpleUploadedFile from django.core.serializers.json import DjangoJSONEncoder from wagtail.blocks import RichTextBlock, StructValue @@ -73,6 +75,18 @@ def _create(cls, *args, form_fields=None, for_factory=None, clean=False, **kwarg or for_factory.Meta.model.form_fields.field.to_python(form_fields) } + # Get UUIDs of the file fields to add "-uploads" fields later + file_fields = [] + file_types = ("image", "file", "multi_file") + for field in form_fields: + try: + if field["type"] in file_types: + file_fields.append(field["id"]) + except TypeError: + if field.block_type in file_types: + file_fields.append(field.id) + # field["id"] for field in form_fields if field["type"] in ("file", "multi_file")] + form_data = {} for name, answer in kwargs.items(): try: @@ -92,6 +106,25 @@ def _create(cls, *args, form_fields=None, for_factory=None, clean=False, **kwarg clean_object.delete() return form_data + for id in file_fields: + uploads = [] + if entry := form_data.get(id): + if not isinstance(entry, list): + entry = [entry] + + for file in entry: + uploads.append( + { + "id": str(uuid.uuid4()), + "name": file._name, + "size": random.randint(20, 100000), + "type": "tus", + "url": "", + } + ) + + form_data[f"{id}-uploads"] = json.dumps(uploads) + return form_data @classmethod @@ -242,6 +275,17 @@ def make_answer(cls, params=None): return cls.choices[0] +class UploadedFile(SimpleUploadedFile): + """Utilized to make functionality closer to that of `StreamFieldFile` + + Requires a `filename` attribute which is pulled from the existing `_name` + """ + + def __init__(self, name, content, content_type=...): + super().__init__(name, content, content_type) + self.filename = self._name + + class UploadableMediaFactory(FormFieldBlockFactory): default_value = factory.django.FileField() @@ -252,7 +296,7 @@ def make_answer(cls, params=None): if params.get("filename") is None: params["filename"] = "test_example.pdf" file_name, file = cls.default_value._make_content(params) - return SimpleUploadedFile(file_name, file.read()) + return UploadedFile(file_name, file.read()) class ImageFieldBlockFactory(UploadableMediaFactory): @@ -276,6 +320,16 @@ def make_answer(cls, params=None): return [UploadableMediaFactory.make_answer() for _ in range(2)] +class StreamFieldDataEncoder(DjangoJSONEncoder): + def default(self, o): + if isinstance(o, File): + return { + "name": o.name, + "filename": o.filename, + } + return super().default(o) + + class StreamFieldUUIDFactory(wagtail_factories.StreamFieldFactory): def evaluate(self, instance, step, extra): params = self.build_form(extra) From 7c557280234ecba1dd061a771b4806339612db9a Mon Sep 17 00:00:00 2001 From: Wes Appler Date: Fri, 3 Oct 2025 10:39:26 -0400 Subject: [PATCH 2/7] Added `FileNotFound` exception handling for when tests are ran --- .../0131_delete_orphaned_attachments.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py b/hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py index 88375e7364..dd2d50c13b 100644 --- a/hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py +++ b/hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py @@ -37,9 +37,8 @@ def delete_orphaned_attachments(apps, schema_editor): folders_to_delete = [] folders_to_check = [] - for folder in default_storage.listdir(submission_attachment_path)[ - 0 - ]: # `listdir` returns ([folders], [files]) + for folder in default_storage.listdir(submission_attachment_path)[0]: + # `listdir` returns ([folders], [files]) ^ try: folders_to_check.append(int(folder)) except ValueError: @@ -57,11 +56,11 @@ def delete_orphaned_attachments(apps, schema_editor): folders_to_delete += list(set(folders_to_check) - valid_ids) for folder in folders_to_delete: - # try: - delete_directory(f"{submission_attachment_path}/{folder}") - # except FileNotFoundError: - # # Will get thrown when unit tests attempt to run migrations - # pass + try: + delete_directory(f"{submission_attachment_path}/{folder}") + except FileNotFoundError: + # Will get thrown when unit tests attempt to run migrations + pass class Migration(migrations.Migration): From 674b4afd02d030ae1dd1cd255299e0af7a36014d Mon Sep 17 00:00:00 2001 From: Wes Appler Date: Fri, 3 Oct 2025 11:16:55 -0400 Subject: [PATCH 3/7] Added check for submission attachments directory --- .../apply/funds/migrations/0131_delete_orphaned_attachments.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py b/hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py index dd2d50c13b..6800945545 100644 --- a/hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py +++ b/hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py @@ -37,6 +37,9 @@ def delete_orphaned_attachments(apps, schema_editor): folders_to_delete = [] folders_to_check = [] + if not default_storage.exists(submission_attachment_path): + return + for folder in default_storage.listdir(submission_attachment_path)[0]: # `listdir` returns ([folders], [files]) ^ try: From 138b58041bbf02b67c6ce0cbe4c8b8ae791ebb6d Mon Sep 17 00:00:00 2001 From: Wes Appler Date: Thu, 9 Oct 2025 09:47:09 -0400 Subject: [PATCH 4/7] Add pytest bypass to delete signal --- .../0131_delete_orphaned_attachments.py | 33 ++++++------------- hypha/apply/funds/signals.py | 24 ++++++++++---- hypha/apply/funds/utils.py | 27 +++++++++++++++ hypha/apply/funds/views/submission_edit.py | 1 - 4 files changed, 55 insertions(+), 30 deletions(-) diff --git a/hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py b/hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py index 6800945545..1f415b8551 100644 --- a/hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py +++ b/hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py @@ -2,34 +2,19 @@ from django.db import migrations from django.core.files.storage import default_storage -import os - - -def delete_directory(directory_path): - """Delete a full directory (empty or not)""" - - directories, files = default_storage.listdir(directory_path) - - for item in directories: - item_path = os.path.join(directory_path, item) - if default_storage.exists(item_path): - # Recursively delete subdirectories - delete_directory(item_path) - - for item in files: - item_path = os.path.join(directory_path, item) - if default_storage.exists(item_path): - # Delete files - default_storage.delete(item_path) - - if default_storage.exists(directory_path): - # Delete the empty directory - default_storage.delete(directory_path) +import sys +from ..utils import delete_directory def delete_orphaned_attachments(apps, schema_editor): """Remove all attachments not associated with an application""" + # TODO: This solution is not ideal but due to our unit tests writing to the filesystem + # this can cause files belonging to the dev's local server to be deleted. Until + # these can be better isolated, this signal will do nothing when pytest is running + if "pytest" in sys.modules: + return + ApplicationSubmission = apps.get_model("funds", "ApplicationSubmission") submission_attachment_path = f"{default_storage.base_location}/submission" @@ -38,6 +23,8 @@ def delete_orphaned_attachments(apps, schema_editor): folders_to_check = [] if not default_storage.exists(submission_attachment_path): + # If specified path doesn't exist, ignore + # edge case that typically comes up in tests return for folder in default_storage.listdir(submission_attachment_path)[0]: diff --git a/hypha/apply/funds/signals.py b/hypha/apply/funds/signals.py index e70141222b..aba65d5bb4 100644 --- a/hypha/apply/funds/signals.py +++ b/hypha/apply/funds/signals.py @@ -1,9 +1,12 @@ +import sys + +from django.core.files.storage import default_storage from django.db.models.signals import pre_delete from django.dispatch import receiver from hypha.apply.funds.models.application_revisions import ApplicationRevision -from hypha.apply.funds.models.mixins import AccessFormData from hypha.apply.funds.models.submissions import ApplicationSubmission +from hypha.apply.funds.utils import delete_directory @receiver(signal=pre_delete, sender=ApplicationSubmission) @@ -14,16 +17,25 @@ def delete_attachments(sender, instance=None, **kwargs): This can include things like ApplicationSubmission & ApplicationRevision objects """ + + # TODO: This solution is not ideal but due to our unit tests writing to the filesystem + # this can cause files belonging to the dev's local server to be deleted. Until + # these can be better isolated, this signal will do nothing when pytest is running + if "pytest" in sys.modules: + return + # This will be called anytime a deletion is ran, so ensure the object being deleted # can have attachments - if ( - sender - and issubclass(sender, AccessFormData) - and isinstance(instance, AccessFormData) - ): + if issubclass(sender, ApplicationRevision): files = instance.extract_files() for value in files.values(): if not isinstance(value, list): # Single file field value.delete() else: # Multiple file fields [sub_file.delete() for sub_file in value] + elif issubclass(sender, ApplicationSubmission): + submission_attachment_path = ( + f"{default_storage.base_location}/submission/{instance.id}" + ) + + delete_directory(submission_attachment_path) diff --git a/hypha/apply/funds/utils.py b/hypha/apply/funds/utils.py index a6c27b18bb..a60e6a4b51 100644 --- a/hypha/apply/funds/utils.py +++ b/hypha/apply/funds/utils.py @@ -1,4 +1,5 @@ import csv +import os import re from datetime import datetime from functools import reduce @@ -8,6 +9,7 @@ from typing import Iterable import django_filters as filters +from django.core.files.storage import default_storage from django.urls import reverse from django.utils.encoding import force_bytes from django.utils.html import strip_tags @@ -267,3 +269,28 @@ def generate_invite_path(invite): kwargs={"uidb64": uid, "token": token}, ) return login_path + + +def delete_directory(directory_path): + """Delete a full directory (empty or not) + + Used in attachment cleanup when deleting submissions/revisions + """ + + directories, files = default_storage.listdir(directory_path) + + for item in directories: + item_path = os.path.join(directory_path, item) + if default_storage.exists(item_path): + # Recursively delete subdirectories + delete_directory(item_path) + + for item in files: + item_path = os.path.join(directory_path, item) + if default_storage.exists(item_path): + # Delete files + default_storage.delete(item_path) + + if default_storage.exists(directory_path): + # Delete the empty directory + default_storage.delete(directory_path) diff --git a/hypha/apply/funds/views/submission_edit.py b/hypha/apply/funds/views/submission_edit.py index 10da360918..241b40c18e 100644 --- a/hypha/apply/funds/views/submission_edit.py +++ b/hypha/apply/funds/views/submission_edit.py @@ -253,7 +253,6 @@ def get_placeholder_file(self, initial_file): return [ PlaceholderUploadedFile(f.filename, size=f.size, file_id=f.name) for f in initial_file - if f ] def save_draft_and_refresh_page(self, form) -> HttpResponseRedirect: From 3b515a22cabf9fa81dae5a0966ed7bb3ef52cd54 Mon Sep 17 00:00:00 2001 From: Wes Appler Date: Thu, 9 Oct 2025 10:58:25 -0400 Subject: [PATCH 5/7] Revert factory, tried to include `-upload` attribute in hopes that'd fix tests, it didn't --- hypha/apply/stream_forms/testing/factories.py | 32 ------------------- 1 file changed, 32 deletions(-) diff --git a/hypha/apply/stream_forms/testing/factories.py b/hypha/apply/stream_forms/testing/factories.py index c42a7042b3..d946ae07d8 100644 --- a/hypha/apply/stream_forms/testing/factories.py +++ b/hypha/apply/stream_forms/testing/factories.py @@ -1,5 +1,4 @@ import json -import random import uuid from collections import defaultdict @@ -75,18 +74,6 @@ def _create(cls, *args, form_fields=None, for_factory=None, clean=False, **kwarg or for_factory.Meta.model.form_fields.field.to_python(form_fields) } - # Get UUIDs of the file fields to add "-uploads" fields later - file_fields = [] - file_types = ("image", "file", "multi_file") - for field in form_fields: - try: - if field["type"] in file_types: - file_fields.append(field["id"]) - except TypeError: - if field.block_type in file_types: - file_fields.append(field.id) - # field["id"] for field in form_fields if field["type"] in ("file", "multi_file")] - form_data = {} for name, answer in kwargs.items(): try: @@ -106,25 +93,6 @@ def _create(cls, *args, form_fields=None, for_factory=None, clean=False, **kwarg clean_object.delete() return form_data - for id in file_fields: - uploads = [] - if entry := form_data.get(id): - if not isinstance(entry, list): - entry = [entry] - - for file in entry: - uploads.append( - { - "id": str(uuid.uuid4()), - "name": file._name, - "size": random.randint(20, 100000), - "type": "tus", - "url": "", - } - ) - - form_data[f"{id}-uploads"] = json.dumps(uploads) - return form_data @classmethod From a49e746ac4cd5dfc4e49f707afb4d753aa218ba4 Mon Sep 17 00:00:00 2001 From: Wes Appler Date: Tue, 14 Oct 2025 14:21:44 -0400 Subject: [PATCH 6/7] Removed unneeded `base_location` in submission paths --- .../funds/migrations/0131_delete_orphaned_attachments.py | 2 +- hypha/apply/funds/signals.py | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py b/hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py index 1f415b8551..a9d49a4fa1 100644 --- a/hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py +++ b/hypha/apply/funds/migrations/0131_delete_orphaned_attachments.py @@ -17,7 +17,7 @@ def delete_orphaned_attachments(apps, schema_editor): ApplicationSubmission = apps.get_model("funds", "ApplicationSubmission") - submission_attachment_path = f"{default_storage.base_location}/submission" + submission_attachment_path = "submission" folders_to_delete = [] folders_to_check = [] diff --git a/hypha/apply/funds/signals.py b/hypha/apply/funds/signals.py index aba65d5bb4..25f46610f3 100644 --- a/hypha/apply/funds/signals.py +++ b/hypha/apply/funds/signals.py @@ -1,6 +1,5 @@ import sys -from django.core.files.storage import default_storage from django.db.models.signals import pre_delete from django.dispatch import receiver @@ -34,8 +33,6 @@ def delete_attachments(sender, instance=None, **kwargs): else: # Multiple file fields [sub_file.delete() for sub_file in value] elif issubclass(sender, ApplicationSubmission): - submission_attachment_path = ( - f"{default_storage.base_location}/submission/{instance.id}" - ) + submission_attachment_path = f"submission/{instance.id}" delete_directory(submission_attachment_path) From 6ffaaee8f5b54c0b8f4ac8519cfdd357d8d013b9 Mon Sep 17 00:00:00 2001 From: Wes Appler Date: Tue, 14 Oct 2025 15:18:31 -0400 Subject: [PATCH 7/7] Added check to see if submission folder actually exists --- hypha/apply/funds/signals.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hypha/apply/funds/signals.py b/hypha/apply/funds/signals.py index 25f46610f3..49b2e37e1a 100644 --- a/hypha/apply/funds/signals.py +++ b/hypha/apply/funds/signals.py @@ -1,5 +1,6 @@ import sys +from django.core.files.storage import default_storage from django.db.models.signals import pre_delete from django.dispatch import receiver @@ -35,4 +36,5 @@ def delete_attachments(sender, instance=None, **kwargs): elif issubclass(sender, ApplicationSubmission): submission_attachment_path = f"submission/{instance.id}" - delete_directory(submission_attachment_path) + if default_storage.exists(submission_attachment_path): + delete_directory(submission_attachment_path)