Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,7 @@ def run(
"""

if self._storage is None:
msg = f"The component {self.__class__.__name__} was not warmed up. Call 'warm_up()' before calling run()."
raise RuntimeError(msg)
self.warm_up()

filtered_documents = self._filter_documents_by_extensions(documents) if self.file_extensions else documents

Expand Down
38 changes: 9 additions & 29 deletions integrations/amazon_bedrock/tests/test_s3_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,6 @@ def test_to_dict_with_parameters(self, tmp_path):

def test_run(self, tmp_path, mock_s3_storage, mock_boto3_session):
d = S3Downloader(file_root_path=str(tmp_path))
S3Downloader.warm_up(d)
d._storage = mock_s3_storage

docs = [
Expand All @@ -141,7 +140,6 @@ def test_run(self, tmp_path, mock_s3_storage, mock_boto3_session):

def test_run_with_extensions(self, tmp_path, mock_s3_storage, mock_boto3_session):
d = S3Downloader(file_root_path=str(tmp_path), file_extensions=[".txt"])
S3Downloader.warm_up(d)
d._storage = mock_s3_storage

docs = [
Expand All @@ -155,25 +153,19 @@ def test_run_with_extensions(self, tmp_path, mock_s3_storage, mock_boto3_session

def test_run_with_input_file_meta_key(self, tmp_path, mock_s3_storage, mock_boto3_session):
d = S3Downloader(file_root_path=str(tmp_path), file_name_meta_key="custom_file_key")
S3Downloader.warm_up(d)
d._storage = mock_s3_storage

docs = [
Document(meta={"file_id": str(uuid4()), "custom_file_key": "a.txt"}),
]
docs = [Document(meta={"file_id": str(uuid4()), "custom_file_key": "a.txt"})]

out = d.run(documents=docs)
assert len(out["documents"]) == 1
assert out["documents"][0].meta["custom_file_key"] == "a.txt"

def test_run_with_s3_key_generation_function(self, tmp_path, mock_s3_storage, mock_boto3_session):
d = S3Downloader(file_root_path=str(tmp_path), s3_key_generation_function=s3_key_generation_function)
S3Downloader.warm_up(d)
d._storage = mock_s3_storage

docs = [
Document(meta={"file_id": str(uuid4()), "file_name": "a.txt"}),
]
docs = [Document(meta={"file_id": str(uuid4()), "file_name": "a.txt"})]
out = d.run(documents=docs)
assert len(out["documents"]) == 1
assert out["documents"][0].meta["file_name"] == "a.txt"
Expand All @@ -189,7 +181,6 @@ def test_run_with_s3_key_generation_function_and_file_extensions(
s3_key_generation_function=s3_key_generation_function,
file_extensions=[".txt"],
)
S3Downloader.warm_up(d)
d._storage = mock_s3_storage

docs = [
Expand All @@ -210,8 +201,6 @@ def test_run_with_s3_key_generation_function_and_file_extensions(
def test_live_run(self, tmp_path, monkeypatch):
d = S3Downloader(file_root_path=str(tmp_path))
monkeypatch.setenv("S3_DOWNLOADER_PREFIX", "")
S3Downloader.warm_up(d)

docs = [
Document(meta={"file_id": str(uuid4()), "file_name": "text-sample.txt"}),
Document(meta={"file_id": str(uuid4()), "file_name": "document-sample.pdf"}),
Expand All @@ -229,9 +218,7 @@ def test_live_run(self, tmp_path, monkeypatch):
)
def test_live_run_with_no_documents(self, tmp_path):
d = S3Downloader(file_root_path=str(tmp_path))
S3Downloader.warm_up(d)
docs = []
out = d.run(documents=docs)
out = d.run(documents=[])
assert len(out["documents"]) == 0

@pytest.mark.integration
Expand All @@ -247,10 +234,8 @@ def test_live_run_with_no_documents(self, tmp_path):
def test_live_run_with_custom_meta_key(self, tmp_path, monkeypatch):
d = S3Downloader(file_root_path=str(tmp_path), file_name_meta_key="custom_name")
monkeypatch.setenv("S3_DOWNLOADER_PREFIX", "")
S3Downloader.warm_up(d)
docs = [
Document(meta={"custom_name": "text-sample.txt"}),
]
d.warm_up()
docs = [Document(meta={"custom_name": "text-sample.txt"})]
out = d.run(documents=docs)
assert len(out["documents"]) == 1
assert out["documents"][0].meta["custom_name"] == "text-sample.txt"
Expand All @@ -263,11 +248,8 @@ def test_live_run_with_custom_meta_key(self, tmp_path, monkeypatch):
def test_live_run_with_prefix(self, tmp_path, monkeypatch):
d = S3Downloader(file_root_path=str(tmp_path))
monkeypatch.setenv("S3_DOWNLOADER_PREFIX", "subfolder/")

S3Downloader.warm_up(d)
docs = [
Document(meta={"file_name": "employees.json"}),
]
d.warm_up()
docs = [Document(meta={"file_name": "employees.json"})]
out = d.run(documents=docs)
assert len(out["documents"]) == 1
assert out["documents"][0].meta["file_name"] == "employees.json"
Expand All @@ -286,10 +268,8 @@ def test_live_run_with_s3_key_generation_function_and_file_extensions(self, tmp_
file_name_meta_key="file_name",
s3_key_generation_function=s3_key_generation_function,
)
S3Downloader.warm_up(d)
docs = [
Document(meta={"file_name": "dog.jpg"}),
]
d.warm_up()
docs = [Document(meta={"file_name": "dog.jpg"})]
out = d.run(documents=docs)
assert len(out["documents"]) == 1
assert out["documents"][0].meta["file_name"] == "dog.jpg"