Skip to content

Commit a4b1fd9

Browse files
Add tests and modify dynamic timeout calculation
1 parent 9aae22e commit a4b1fd9

2 files changed

Lines changed: 134 additions & 2 deletions

File tree

datareservoirio/storage/storage.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ def _df_to_blob(df, blob_url, session=_BLOBSTORAGE_SESSION, dynamic_read_timeout
363363

364364

365365
def _calculate_timeout(file_size_bytes):
366-
bytes_per_second = 1 * 1024 * 1024 # 1MB/s
366+
bytes_per_second = 0.5 * 1024 * 1024 # 0,5 MB/s
367367
min_timeout = 30
368-
timeout = max(min_timeout, (file_size_bytes / bytes_per_second) * 1.5)
368+
timeout = int(max(min_timeout, (file_size_bytes / bytes_per_second) * 1.5)) # 1.5 tolerance
369369
return timeout

tests/test_storage/test_storage.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from unittest.mock import ANY, call
66

77
import pandas as pd
8+
import numpy as np
89
import pytest
910
import requests
1011
from requests import HTTPError
@@ -16,6 +17,24 @@
1617

1718
TEST_PATH = Path(__file__).parent
1819

20+
def helper_function(func):
21+
func._is_helper = True
22+
return func
23+
24+
@helper_function
25+
def generate_dataframe(target_bytes):
26+
NUM_COLUMNS = 5
27+
BYTES_PER_VALUE = 28
28+
29+
bytes_per_row = NUM_COLUMNS * BYTES_PER_VALUE
30+
num_rows = max(1, int(target_bytes / bytes_per_row))
31+
32+
data = {
33+
f'col_{i}': np.random.randn(num_rows)
34+
for i in range(NUM_COLUMNS)
35+
}
36+
37+
return pd.DataFrame(data)
1938

2039
class Test__blob_to_df:
2140
"""
@@ -556,6 +575,119 @@ def test_put(
556575

557576
mock_requests.assert_has_calls(calls_expected)
558577

578+
def test_put_dynamic_timeout_sets_correct_value(
579+
self,
580+
mock_requests,
581+
storage_no_cache,
582+
response_cases,
583+
):
584+
response_cases.set("group4")
585+
df = generate_dataframe(20_000_000) # 20 MB
586+
587+
storage_no_cache.put(
588+
df,
589+
"http://example/blob/url",
590+
(
591+
"POST",
592+
"https://reservoir-api.4subsea.net/api/files/commit",
593+
{"json": {"FileId": "1234"}, "timeout": 10},
594+
),
595+
True,
596+
)
597+
598+
calls_expected = [
599+
call(
600+
method="put",
601+
url="http://example/blob/url",
602+
headers={"x-ms-blob-type": "BlockBlob"},
603+
data=ANY,
604+
timeout=(30, 40),
605+
),
606+
call(
607+
method="POST",
608+
url="https://reservoir-api.4subsea.net/api/files/commit",
609+
json={"FileId": "1234"},
610+
timeout=10,
611+
),
612+
]
613+
614+
mock_requests.assert_has_calls(calls_expected)
615+
616+
def test_put_dynamic_timeout_sets_minimal_value(
617+
self,
618+
mock_requests,
619+
storage_no_cache,
620+
response_cases,
621+
):
622+
response_cases.set("group4")
623+
df = generate_dataframe(1_000_000) # 1 MB
624+
625+
storage_no_cache.put(
626+
df,
627+
"http://example/blob/url",
628+
(
629+
"POST",
630+
"https://reservoir-api.4subsea.net/api/files/commit",
631+
{"json": {"FileId": "1234"}, "timeout": 10},
632+
),
633+
True,
634+
)
635+
636+
calls_expected = [
637+
call(
638+
method="put",
639+
url="http://example/blob/url",
640+
headers={"x-ms-blob-type": "BlockBlob"},
641+
data=ANY,
642+
timeout=(30, 30),
643+
),
644+
call(
645+
method="POST",
646+
url="https://reservoir-api.4subsea.net/api/files/commit",
647+
json={"FileId": "1234"},
648+
timeout=10,
649+
),
650+
]
651+
652+
mock_requests.assert_has_calls(calls_expected)
653+
654+
def test_put_without_dynamic_timeout_doesnt_set_its_value(
655+
self,
656+
mock_requests,
657+
storage_no_cache,
658+
response_cases,
659+
):
660+
response_cases.set("group4")
661+
df = generate_dataframe(20_000_000) # 20 MB
662+
663+
storage_no_cache.put(
664+
df,
665+
"http://example/blob/url",
666+
(
667+
"POST",
668+
"https://reservoir-api.4subsea.net/api/files/commit",
669+
{"json": {"FileId": "1234"}, "timeout": 10},
670+
)
671+
)
672+
673+
calls_expected = [
674+
call(
675+
method="put",
676+
url="http://example/blob/url",
677+
headers={"x-ms-blob-type": "BlockBlob"},
678+
data=ANY,
679+
timeout=(30, None),
680+
),
681+
call(
682+
method="POST",
683+
url="https://reservoir-api.4subsea.net/api/files/commit",
684+
json={"FileId": "1234"},
685+
timeout=10,
686+
),
687+
]
688+
689+
mock_requests.assert_has_calls(calls_expected)
690+
559691
def test_put_raise_for_status(self, storage_no_cache, data_float):
560692
df = data_float.as_dataframe()
561693

0 commit comments

Comments
 (0)