Skip to content

Commit 1786e21

Browse files
fix: Handled the file format if its in UPPER CASE (#1200)
1 parent 20a6843 commit 1786e21

3 files changed

Lines changed: 23 additions & 1 deletion

File tree

code/backend/batch/utilities/helpers/embedders/push_embedder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def __init__(self, blob_client: AzureBlobStorageClient, env_helper: EnvHelper):
3838
self.embedding_configs[ext] = processor
3939

4040
def embed_file(self, source_url: str, file_name: str):
41-
file_extension = file_name.split(".")[-1]
41+
file_extension = file_name.split(".")[-1].lower()
4242
embedding_config = self.embedding_configs.get(file_extension)
4343
self.__embed(
4444
source_url=source_url,

code/tests/functional/conftest.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,12 @@ def setup_config_mocking(httpserver: HTTPServer):
334334
"loading": {"strategy": "web"},
335335
"use_advanced_image_processing": False,
336336
},
337+
{
338+
"document_type": "htm",
339+
"chunking": {"strategy": "layout", "size": 500, "overlap": 100},
340+
"loading": {"strategy": "web"},
341+
"use_advanced_image_processing": False,
342+
},
337343
{
338344
"document_type": "docx",
339345
"chunking": {"strategy": "layout", "size": 500, "overlap": 100},

code/tests/utilities/helpers/test_push_embedder.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,22 @@ def test_embed_file_chunks_documents(document_loading_mock, document_chunking_mo
347347
)
348348

349349

350+
def test_embed_file_chunks_documents_upper_case(document_loading_mock, document_chunking_mock, env_helper_mock):
351+
# given
352+
push_embedder = PushEmbedder(MagicMock(), env_helper_mock)
353+
354+
# when
355+
push_embedder.embed_file(
356+
"some-url",
357+
"some-file-name.PDF",
358+
)
359+
360+
# then
361+
document_chunking_mock.return_value.chunk.assert_called_once_with(
362+
document_loading_mock.return_value.load.return_value, CHUNKING_SETTINGS
363+
)
364+
365+
350366
def test_embed_file_generates_embeddings_for_documents(llm_helper_mock, env_helper_mock):
351367
# given
352368
push_embedder = PushEmbedder(MagicMock(), env_helper_mock)

0 commit comments

Comments
 (0)