Skip to content

Commit 9c143e9

Browse files
authored
feat: Add configurable environment variable names to S3FileDownloader init params (#3015)
* feat: Add configurable environment variable names to S3FileDownloader init params * Modify default values for S3Storage.from_env class method * docs: Update docstrings * docs: Fix ruff docstring error
1 parent 52aa7fc commit 9c143e9

3 files changed

Lines changed: 35 additions & 4 deletions

File tree

integrations/amazon_bedrock/src/haystack_integrations/common/s3/utils.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,28 @@ def download(self, key: str, local_file_path: Path) -> None:
100100
raise S3StorageError(msg) from e
101101

102102
@classmethod
103-
def from_env(cls, *, session: Session, config: Config) -> "S3Storage":
104-
"""Create a S3Storage object from environment variables."""
105-
s3_bucket = os.getenv("S3_DOWNLOADER_BUCKET")
103+
def from_env(
104+
cls, *, session: Session, config: Config, s3_bucket_name_env: str = "S3_DOWNLOADER_BUCKET"
105+
) -> "S3Storage":
106+
"""
107+
Create a S3Storage object from environment variables.
108+
109+
The following environment variables are read:
110+
- `S3_DOWNLOADER_BUCKET` (or the value of `s3_bucket_name_env`): The name of the S3 bucket
111+
to download files from. Required — raises `ValueError` if not set.
112+
- `S3_DOWNLOADER_PREFIX`: Optional prefix to apply to all S3 keys (e.g. `"folder/subfolder/"`).
113+
- `AWS_ENDPOINT_URL`: Optional custom endpoint URL, useful for S3-compatible services
114+
such as MinIO or LocalStack.
115+
116+
:param session: The boto3 `Session` to use when creating the S3 client.
117+
:param config: The botocore `Config` to apply to the S3 client.
118+
:param s3_bucket_name_env: The name of the environment variable of the S3 bucket to download files from.
119+
By default, the value is `"S3_DOWNLOADER_BUCKET"`.
120+
:returns: A fully initialized `S3Storage` instance.
121+
:raises ValueError: If the environment variable specified by `s3_bucket_name_env` is not set
122+
or is empty.
123+
"""
124+
s3_bucket = os.getenv(s3_bucket_name_env)
106125
if not s3_bucket:
107126
msg = (
108127
"Missing environment variable S3_DOWNLOADER_BUCKET."

integrations/amazon_bedrock/src/haystack_integrations/components/downloaders/s3/s3_downloader.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def __init__(
4545
max_workers: int = 32,
4646
max_cache_size: int = 100,
4747
s3_key_generation_function: Callable[[Document], str] | None = None,
48+
s3_bucket_name_env: str = "S3_DOWNLOADER_BUCKET",
4849
) -> None:
4950
"""
5051
Initializes the `S3Downloader` with the provided parameters.
@@ -77,6 +78,8 @@ def __init__(
7778
The function must accept a `Document` object and return a string.
7879
If the environment variable `S3_DOWNLOADER_PREFIX` is set, its value will be automatically
7980
prefixed to the generated S3 key.
81+
:param s3_bucket_name_env: The name of the environment variable of the S3 bucket to download files from.
82+
By default, the value is `"S3_DOWNLOADER_BUCKET"`.
8083
:raises ValueError: If the `file_root_path` is not set through
8184
the constructor or the `FILE_ROOT_PATH` environment variable.
8285
@@ -105,6 +108,7 @@ def __init__(
105108
self.max_cache_size = max_cache_size
106109
self.file_name_meta_key = file_name_meta_key
107110
self.s3_key_generation_function = s3_key_generation_function
111+
self.s3_bucket_name_env = s3_bucket_name_env
108112

109113
self._storage: S3Storage | None = None
110114

@@ -126,7 +130,9 @@ def warm_up(self) -> None:
126130
"""Warm up the component by initializing the settings and storage."""
127131
if self._storage is None:
128132
self.file_root_path.mkdir(parents=True, exist_ok=True)
129-
self._storage = S3Storage.from_env(session=self._session, config=self._config)
133+
self._storage = S3Storage.from_env(
134+
session=self._session, config=self._config, s3_bucket_name_env=self.s3_bucket_name_env
135+
)
130136

131137
@component.output_types(documents=list[Document])
132138
def run(
@@ -246,6 +252,7 @@ def to_dict(self) -> dict[str, Any]:
246252
file_extensions=self.file_extensions,
247253
file_name_meta_key=self.file_name_meta_key,
248254
s3_key_generation_function=s3_key_generation_function_name,
255+
s3_bucket_name_env=self.s3_bucket_name_env,
249256
)
250257

251258
@classmethod

integrations/amazon_bedrock/tests/test_s3_downloader.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def test_init_custom_parameters(self, mock_boto3_session, tmp_path):
5959
max_cache_size=100,
6060
max_workers=32,
6161
file_name_meta_key="file_id",
62+
s3_bucket_name_env="b",
6263
)
6364
assert d.file_extensions == [".pdf", ".txt"]
6465

@@ -79,6 +80,7 @@ def test_to_dict(self, mock_boto3_session: Any, tmp_path, boto3_config: dict[str
7980
"max_workers": 32,
8081
"file_name_meta_key": "file_name",
8182
"s3_key_generation_function": None,
83+
"s3_bucket_name_env": "S3_DOWNLOADER_BUCKET",
8284
},
8385
}
8486
assert d.to_dict() == expected
@@ -95,6 +97,7 @@ def test_from_dict(self, mock_boto3_session: Any, tmp_path, boto3_config: dict[s
9597
"aws_profile_name": {"type": "env_var", "env_vars": ["AWS_PROFILE"], "strict": False},
9698
"file_root_path": str(tmp_path),
9799
"s3_key_generation_function": None,
100+
"s3_bucket_name_env": "S3_DOWNLOADER_BUCKET",
98101
},
99102
}
100103
d = S3Downloader.from_dict(data)
@@ -108,6 +111,7 @@ def test_to_dict_with_parameters(self, tmp_path):
108111
max_workers=40,
109112
file_name_meta_key="new_file_key",
110113
s3_key_generation_function=s3_key_generation_function,
114+
s3_bucket_name_env="b",
111115
)
112116
expected = {
113117
"type": TYPE,
@@ -123,6 +127,7 @@ def test_to_dict_with_parameters(self, tmp_path):
123127
"max_workers": 40,
124128
"file_name_meta_key": "new_file_key",
125129
"s3_key_generation_function": "tests.test_s3_downloader.s3_key_generation_function",
130+
"s3_bucket_name_env": "b",
126131
},
127132
}
128133
assert d.to_dict() == expected

0 commit comments

Comments
 (0)