1818import io
1919
2020import pytest
21+ import requests
2122from unittest .mock import patch , MagicMock
2223
2324from pytexera .storage .dataset_file_document import DatasetFileDocument
2425
25-
2626DEFAULT_ENDPOINT = "http://localhost:9092/api/dataset/presign-download"
2727CUSTOM_ENDPOINT = "https://example.test/api/presign"
2828
@@ -95,15 +95,19 @@ def _make_doc(self, monkeypatch, path="/bob@x.com/ds/v1/file.csv"):
9595
9696 def test_returns_presigned_url_field_from_json_body (self , monkeypatch ):
9797 doc = self ._make_doc (monkeypatch )
98- with patch ("pytexera.storage.dataset_file_document.requests.get" ) as mock_get :
98+ with patch (
99+ "pytexera.storage.dataset_file_document.requests.Session.get"
100+ ) as mock_get :
99101 mock_get .return_value = make_response (
100102 200 , body = {"presignedUrl" : "https://signed.test/x" }
101103 )
102104 assert doc .get_presigned_url () == "https://signed.test/x"
103105
104106 def test_sends_bearer_authorization_header_with_jwt (self , monkeypatch ):
105107 doc = self ._make_doc (monkeypatch )
106- with patch ("pytexera.storage.dataset_file_document.requests.get" ) as mock_get :
108+ with patch (
109+ "pytexera.storage.dataset_file_document.requests.Session.get"
110+ ) as mock_get :
107111 mock_get .return_value = make_response (200 , body = {"presignedUrl" : "u" })
108112 doc .get_presigned_url ()
109113 _ , kwargs = mock_get .call_args
@@ -113,7 +117,9 @@ def test_url_encodes_filepath_query_parameter(self, monkeypatch):
113117 # urllib.parse.quote keeps "/" as safe by default, but encodes "@"
114118 # and " " — pin both pieces so the contract is explicit.
115119 doc = self ._make_doc (monkeypatch , path = "/bob@x.com/ds/v1/data file.csv" )
116- with patch ("pytexera.storage.dataset_file_document.requests.get" ) as mock_get :
120+ with patch (
121+ "pytexera.storage.dataset_file_document.requests.Session.get"
122+ ) as mock_get :
117123 mock_get .return_value = make_response (200 , body = {"presignedUrl" : "u" })
118124 doc .get_presigned_url ()
119125 _ , kwargs = mock_get .call_args
@@ -124,29 +130,37 @@ def test_url_encodes_filepath_query_parameter(self, monkeypatch):
124130
125131 def test_calls_configured_endpoint (self , monkeypatch ):
126132 doc = self ._make_doc (monkeypatch )
127- with patch ("pytexera.storage.dataset_file_document.requests.get" ) as mock_get :
133+ with patch (
134+ "pytexera.storage.dataset_file_document.requests.Session.get"
135+ ) as mock_get :
128136 mock_get .return_value = make_response (200 , body = {"presignedUrl" : "u" })
129137 doc .get_presigned_url ()
130138 args , _ = mock_get .call_args
131139 assert args [0 ] == CUSTOM_ENDPOINT
132140
133141 def test_raises_runtime_error_with_status_and_body_on_failure (self , monkeypatch ):
134142 doc = self ._make_doc (monkeypatch )
135- with patch ("pytexera.storage.dataset_file_document.requests.get" ) as mock_get :
143+ with patch (
144+ "pytexera.storage.dataset_file_document.requests.Session.get"
145+ ) as mock_get :
136146 mock_get .return_value = make_response (403 , body = "forbidden" )
137147 with pytest .raises (RuntimeError , match = r"403.*forbidden" ):
138148 doc .get_presigned_url ()
139149
140150 def test_raises_when_response_body_lacks_presigned_url_key (self , monkeypatch ):
141151 doc = self ._make_doc (monkeypatch )
142- with patch ("pytexera.storage.dataset_file_document.requests.get" ) as mock_get :
152+ with patch (
153+ "pytexera.storage.dataset_file_document.requests.Session.get"
154+ ) as mock_get :
143155 mock_get .return_value = make_response (200 , body = {"other" : "value" })
144156 with pytest .raises (RuntimeError , match = "'presignedUrl' missing" ):
145157 doc .get_presigned_url ()
146158
147159 def test_raises_when_response_body_is_not_valid_json (self , monkeypatch ):
148160 doc = self ._make_doc (monkeypatch )
149- with patch ("pytexera.storage.dataset_file_document.requests.get" ) as mock_get :
161+ with patch (
162+ "pytexera.storage.dataset_file_document.requests.Session.get"
163+ ) as mock_get :
150164 response = MagicMock ()
151165 response .status_code = 200
152166 response .json .side_effect = ValueError ("Expecting value" )
@@ -157,14 +171,18 @@ def test_raises_when_response_body_is_not_valid_json(self, monkeypatch):
157171
158172 def test_raises_when_presigned_url_is_empty_string (self , monkeypatch ):
159173 doc = self ._make_doc (monkeypatch )
160- with patch ("pytexera.storage.dataset_file_document.requests.get" ) as mock_get :
174+ with patch (
175+ "pytexera.storage.dataset_file_document.requests.Session.get"
176+ ) as mock_get :
161177 mock_get .return_value = make_response (200 , body = {"presignedUrl" : "" })
162178 with pytest .raises (RuntimeError , match = "'presignedUrl' missing" ):
163179 doc .get_presigned_url ()
164180
165181 def test_raises_when_presigned_url_is_not_a_string (self , monkeypatch ):
166182 doc = self ._make_doc (monkeypatch )
167- with patch ("pytexera.storage.dataset_file_document.requests.get" ) as mock_get :
183+ with patch (
184+ "pytexera.storage.dataset_file_document.requests.Session.get"
185+ ) as mock_get :
168186 mock_get .return_value = make_response (200 , body = {"presignedUrl" : None })
169187 with pytest .raises (RuntimeError , match = "'presignedUrl' missing" ):
170188 doc .get_presigned_url ()
@@ -178,7 +196,9 @@ def _make_doc(self, monkeypatch):
178196
179197 def test_returns_bytesio_with_downloaded_content (self , monkeypatch ):
180198 doc = self ._make_doc (monkeypatch )
181- with patch ("pytexera.storage.dataset_file_document.requests.get" ) as mock_get :
199+ with patch (
200+ "pytexera.storage.dataset_file_document.requests.Session.get"
201+ ) as mock_get :
182202 mock_get .side_effect = [
183203 make_response (200 , body = {"presignedUrl" : "https://signed.test/x" }),
184204 make_response (200 , content = b"hello-bytes" ),
@@ -189,14 +209,18 @@ def test_returns_bytesio_with_downloaded_content(self, monkeypatch):
189209
190210 def test_propagates_presigned_url_failure (self , monkeypatch ):
191211 doc = self ._make_doc (monkeypatch )
192- with patch ("pytexera.storage.dataset_file_document.requests.get" ) as mock_get :
212+ with patch (
213+ "pytexera.storage.dataset_file_document.requests.Session.get"
214+ ) as mock_get :
193215 mock_get .return_value = make_response (500 , body = "upstream down" )
194216 with pytest .raises (RuntimeError , match = r"500.*upstream down" ):
195217 doc .read_file ()
196218
197219 def test_raises_runtime_error_when_download_fails (self , monkeypatch ):
198220 doc = self ._make_doc (monkeypatch )
199- with patch ("pytexera.storage.dataset_file_document.requests.get" ) as mock_get :
221+ with patch (
222+ "pytexera.storage.dataset_file_document.requests.Session.get"
223+ ) as mock_get :
200224 mock_get .side_effect = [
201225 make_response (200 , body = {"presignedUrl" : "https://signed.test/x" }),
202226 make_response (404 , body = "missing" ),
@@ -206,11 +230,82 @@ def test_raises_runtime_error_when_download_fails(self, monkeypatch):
206230
207231 def test_downloads_from_presigned_url_returned_by_first_call (self , monkeypatch ):
208232 doc = self ._make_doc (monkeypatch )
209- with patch ("pytexera.storage.dataset_file_document.requests.get" ) as mock_get :
233+ with patch (
234+ "pytexera.storage.dataset_file_document.requests.Session.get"
235+ ) as mock_get :
210236 mock_get .side_effect = [
211237 make_response (200 , body = {"presignedUrl" : "https://signed.test/x" }),
212238 make_response (200 , content = b"" ),
213239 ]
214240 doc .read_file ()
215241 second_call_args , _ = mock_get .call_args_list [1 ]
216242 assert second_call_args [0 ] == "https://signed.test/x"
243+
244+
245+ class TestTimeoutsAndRetries :
246+ def _make_doc (self , monkeypatch ):
247+ monkeypatch .setenv ("USER_JWT_TOKEN" , "test-jwt-token" )
248+ monkeypatch .setenv ("FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT" , CUSTOM_ENDPOINT )
249+ return DatasetFileDocument ("/bob@x.com/ds/v1/file.csv" )
250+
251+ def test_presigned_url_request_passes_request_timeout (self , monkeypatch ):
252+ doc = self ._make_doc (monkeypatch )
253+ with patch (
254+ "pytexera.storage.dataset_file_document.requests.Session.get"
255+ ) as mock_get :
256+ mock_get .return_value = make_response (200 , body = {"presignedUrl" : "u" })
257+ doc .get_presigned_url ()
258+ _ , kwargs = mock_get .call_args
259+ assert kwargs ["timeout" ] == DatasetFileDocument ._REQUEST_TIMEOUT
260+
261+ def test_download_request_passes_request_timeout (self , monkeypatch ):
262+ doc = self ._make_doc (monkeypatch )
263+ with patch (
264+ "pytexera.storage.dataset_file_document.requests.Session.get"
265+ ) as mock_get :
266+ mock_get .side_effect = [
267+ make_response (200 , body = {"presignedUrl" : "https://signed.test/x" }),
268+ make_response (200 , content = b"data" ),
269+ ]
270+ doc .read_file ()
271+ _ , download_kwargs = mock_get .call_args_list [1 ]
272+ assert download_kwargs ["timeout" ] == DatasetFileDocument ._REQUEST_TIMEOUT
273+
274+ def test_session_mounts_retry_adapter_for_http_and_https (self ):
275+ session = DatasetFileDocument ._retry_session ()
276+ try :
277+ for prefix in ("http://" , "https://" ):
278+ retry = session .get_adapter (prefix ).max_retries
279+ assert retry .total == DatasetFileDocument ._MAX_RETRIES
280+ assert retry .connect == DatasetFileDocument ._MAX_RETRIES
281+ assert retry .read == DatasetFileDocument ._MAX_RETRIES
282+ assert set (retry .status_forcelist ) == set (
283+ DatasetFileDocument ._RETRY_STATUS_FORCELIST
284+ )
285+ # Only idempotent GETs should be retried.
286+ assert retry .allowed_methods == frozenset ({"GET" })
287+ finally :
288+ session .close ()
289+
290+ def test_presigned_url_request_timeout_is_wrapped_in_runtime_error (
291+ self , monkeypatch
292+ ):
293+ doc = self ._make_doc (monkeypatch )
294+ with patch (
295+ "pytexera.storage.dataset_file_document.requests.Session.get"
296+ ) as mock_get :
297+ mock_get .side_effect = requests .exceptions .ReadTimeout ("timed out" )
298+ with pytest .raises (RuntimeError , match = "request failed" ):
299+ doc .get_presigned_url ()
300+
301+ def test_download_request_timeout_is_wrapped_in_runtime_error (self , monkeypatch ):
302+ doc = self ._make_doc (monkeypatch )
303+ with patch (
304+ "pytexera.storage.dataset_file_document.requests.Session.get"
305+ ) as mock_get :
306+ mock_get .side_effect = [
307+ make_response (200 , body = {"presignedUrl" : "https://signed.test/x" }),
308+ requests .exceptions .ConnectionError ("connection reset" ),
309+ ]
310+ with pytest .raises (RuntimeError , match = "Failed to retrieve file content" ):
311+ doc .read_file ()
0 commit comments