Skip to content

Commit a8f5d90

Browse files
authored
Merge pull request #466 from amahuli03/390-bugfix-upload-file-endpoint
Fixed error 1 (openAI title sanitization) and added unit tests
2 parents 662f29d + 4b4d727 commit a8f5d90

File tree

3 files changed

+40
-3
lines changed

3 files changed

+40
-3
lines changed

server/api/views/uploadFile/test_title.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,11 @@ def test_falls_back_to_first_page_text_if_metadata_title_does_not_match_regex(se
5353
expected_title = "Advances in Mood Disorder Pharmacotherapy: Evaluating New Antipsychotics and Mood Stabilizers for Bipolar Disorder and Schizophrenia"
5454
self.assertEqual(expected_title, title.generate_title(doc))
5555

56-
@patch("api.services.openai_services.openAIServices.openAI")
56+
@patch("api.views.uploadFile.title.openAIServices.openAI")
5757
def test_falls_back_to_chatgpt_if_no_title_found(self, mock_openAI):
5858
doc = MagicMock()
5959
doc.metadata = {"title": None}
60-
doc.get_text.return_value = []
60+
doc[0].get_text.return_value = []
6161

6262
mock_response = MagicMock()
6363
mock_response.choices = [MagicMock()]
@@ -67,3 +67,34 @@ def test_falls_back_to_chatgpt_if_no_title_found(self, mock_openAI):
6767
title.generate_title(doc)
6868

6969
self.assertTrue(mock_openAI.called)
70+
71+
@patch("api.views.uploadFile.title.openAIServices.openAI")
72+
def test_strips_quotes_from_openai_title(self, mock_openAI):
73+
doc = MagicMock()
74+
doc.metadata = {"title": None}
75+
doc[0].get_text.return_value = []
76+
77+
mock_response = MagicMock()
78+
mock_response.choices = [MagicMock()]
79+
mock_response.choices[0].message.content = '"Updated CANMAT/ISBD Guidelines for Treating Mixed Features in Bipolar Disorder"'
80+
mock_openAI.return_value = mock_response
81+
82+
result = title.generate_title(doc)
83+
84+
self.assertEqual(result, "Updated CANMAT/ISBD Guidelines for Treating Mixed Features in Bipolar Disorder")
85+
86+
@patch("api.views.uploadFile.title.openAIServices.openAI")
87+
def test_truncates_long_openai_title(self, mock_openAI):
88+
doc = MagicMock()
89+
doc.metadata = {"title": None}
90+
doc[0].get_text.return_value = []
91+
92+
mock_response = MagicMock()
93+
mock_response.choices = [MagicMock()]
94+
mock_response.choices[0].message.content = "A" * 300
95+
mock_openAI.return_value = mock_response
96+
97+
result = title.generate_title(doc)
98+
99+
# Ensure the title is truncated to fit the UploadFile model's title field (max_length=255), since OpenAI responses may exceed this limit
100+
self.assertLessEqual(len(result), 255)

server/api/views/uploadFile/title.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,4 +58,6 @@ def summarize_pdf(pdf: fitz.Document) -> str:
5858
prompt = "Please provide a title for this document. The title should be less than 256 characters and will be displayed on a webpage."
5959
response = openAIServices.openAI(
6060
first_page_content, prompt, model='gpt-4o', temp=0.0)
61-
return response.choices[0].message.content
61+
title = response.choices[0].message.content.strip().strip('"').strip("'")
62+
# Truncate to fit UploadFile model's max_length=255 title field as a final safeguard
63+
return title[:255]

server/api/views/uploadFile/views.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
import fitz
1313
from django.db import transaction
1414
from .title import generate_title
15+
import logging
16+
17+
logger = logging.getLogger(__name__)
1518

1619

1720
class UploadFileView(APIView):
@@ -124,6 +127,7 @@ def post(self, request, format=None):
124127
)
125128
except Exception as e:
126129
# Handle potential errors
130+
logger.exception("File upload failed for '%s': %s", uploaded_file.name, e)
127131
return Response({"message": f"Error processing file and embeddings: {str(e)}"},
128132
status=status.HTTP_400_BAD_REQUEST)
129133

0 commit comments

Comments
 (0)