Skip to content

Commit 3d28b88

Browse files
committed
Fix duplicate file issue on every save/submit
1 parent 1d2db99 commit 3d28b88

2 files changed

Lines changed: 85 additions & 12 deletions

File tree

hypha/apply/funds/models/mixins.py

Lines changed: 81 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import hashlib
2+
13
from django.core.files import File
24
from django.utils.safestring import mark_safe
35
from django_file_form.models import PlaceholderUploadedFile
@@ -11,6 +13,7 @@
1113
MultiInputCharFieldBlock,
1214
UploadableMediaBlock,
1315
)
16+
from hypha.apply.stream_forms.files import StreamFieldFile
1417
from hypha.apply.utils.blocks import SingleIncludeMixin
1518
from hypha.apply.utils.storage import PrivateStorage
1619

@@ -92,19 +95,87 @@ def process_file(cls, instance, field, file):
9295
else:
9396
return cls.stream_file(instance, field, file)
9497

95-
def process_file_data(self, data):
98+
def process_file_data(self, data, latest_existing_data=None):
9699
for field in self.form_fields:
97100
if isinstance(field.block, UploadableMediaBlock):
98-
file = self.process_file(self, field, data.get(field.id, []))
99-
try:
100-
file.save()
101-
except (AttributeError, FileNotFoundError):
101+
new_file = data.get(field.id, [])
102+
existing_file = latest_existing_data.get(field.id, [])
103+
104+
# processing files before checking because placeholder files can't be read
105+
new_stream_file = self.process_file(self, field, new_file)
106+
# existing_stream_file = self.process_file(self, field, existing_file)
107+
108+
# save only if it is not the same file(s)
109+
same_file = self._is_same_file(existing_file, new_stream_file)
110+
if not same_file:
102111
try:
103-
for f in file:
104-
f.save()
105-
except FileNotFoundError:
106-
pass
107-
self.form_data[field.id] = file
112+
new_stream_file.save()
113+
except (AttributeError, FileNotFoundError):
114+
try:
115+
for f in new_stream_file:
116+
f.save()
117+
except FileNotFoundError:
118+
pass
119+
self.form_data[field.id] = new_stream_file
120+
else:
121+
self.form_data[field.id] = existing_file
122+
123+
def _is_same_file(self, existing, new):
124+
# Normalize to list for multi-file support
125+
if not isinstance(existing, list):
126+
existing = [existing]
127+
if not isinstance(new, list):
128+
new = [new]
129+
130+
if len(existing) != len(new):
131+
return False
132+
133+
for e, n in zip(existing, new, strict=False):
134+
e_file = self._get_file_obj(e)
135+
n_file = self._get_file_obj(n)
136+
if not e_file or not n_file:
137+
return False
138+
139+
# Compare file names
140+
if e_file.name != n_file.name:
141+
return False
142+
143+
# Compare file sizes
144+
if e_file.size != n_file.size:
145+
return False
146+
147+
# Compare file hashes(keep it after other checks to avoid checking it for every file)
148+
if self._hash_file(e_file) != self._hash_file(n_file):
149+
return False
150+
151+
return True
152+
153+
def _get_file_obj(self, file_obj):
154+
"""Returns a file-like object from Wagtail or Django file field, or None."""
155+
try:
156+
if isinstance(file_obj, StreamFieldFile):
157+
return file_obj.file
158+
if hasattr(file_obj, "file"):
159+
return file_obj.file
160+
elif hasattr(file_obj, "temporary_file_path"):
161+
return open(file_obj.temporary_file_path(), "rb")
162+
elif hasattr(file_obj, "read"):
163+
return file_obj
164+
except Exception:
165+
pass
166+
return None
167+
168+
def _hash_file(self, file_obj, chunk_size=4096):
169+
"""Returns SHA256 hash of a file-like object"""
170+
try:
171+
file_obj.seek(0)
172+
hash_obj = hashlib.sha256()
173+
while chunk := file_obj.read(chunk_size):
174+
hash_obj.update(chunk)
175+
file_obj.seek(0)
176+
return hash_obj.hexdigest()
177+
except Exception:
178+
return None
108179

109180
def extract_files(self):
110181
files = {}

hypha/apply/funds/models/submissions.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -743,7 +743,9 @@ def create_revision(
743743
def clean_submission(self):
744744
self.process_form_data()
745745
self.ensure_user_has_account()
746-
self.process_file_data(self.form_data)
746+
# pass current submission data to avoid file save on every submit(if file is not updated)
747+
current_submission = ApplicationSubmission.objects.get(id=self.id)
748+
self.process_file_data(self.form_data, current_submission.from_draft().raw_data)
747749

748750
def get_assigned_meta_terms(self):
749751
"""Returns assigned meta terms excluding the 'root' term"""
@@ -795,7 +797,7 @@ def save(self, *args, update_fields=None, skip_custom=False, **kwargs):
795797
f"{self.get_from_parent('submission_id_prefix')}{self.id}"
796798
)
797799

798-
self.process_file_data(files)
800+
self.process_file_data(files, self.from_draft().raw_data)
799801
AssignedReviewers.objects.bulk_create_reviewers(
800802
list(self.get_from_parent("reviewers").all()),
801803
self,

0 commit comments

Comments
 (0)