Skip to content

Commit ab846a7

Browse files
fix: disable HTTP response caching to prevent unbounded memory growth
Hardcode _use_cache = False in HttpClient to prevent requests_cache SQLite backend from accumulating cached HTTP responses in memory, which causes container memory to grow unboundedly during long syncs. Skip cache-related tests that expect caching to be active. Co-Authored-By: gl_anatolii.yatsuk <gl_anatolii.yatsuk@airbyte.io>
1 parent 0e57414 commit ab846a7

4 files changed

Lines changed: 10 additions & 1 deletion

File tree

airbyte_cdk/sources/streams/http/http_client.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,10 @@ def __init__(
110110
if session:
111111
self._session = session
112112
else:
113-
self._use_cache = use_cache
113+
# TEMPORARY: Force disable cache to prevent unbounded memory growth from
114+
# requests_cache SQLite backend accumulating cached HTTP responses in memory.
115+
# See: https://github.com/airbytehq/airbyte-python-cdk/pull/932
116+
self._use_cache = False
114117
self._session = self._request_session()
115118
self._session.mount(
116119
"https://",

unit_tests/sources/streams/http/test_http.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,7 @@ def test_parent_attribute_exist():
517517
assert child_stream.parent == parent_stream
518518

519519

520+
@pytest.mark.skip(reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth")
520521
def test_that_response_was_cached(mocker, requests_mock):
521522
requests_mock.register_uri("GET", "https://google.com/", text="text")
522523
stream = CacheHttpStream()
@@ -547,6 +548,7 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp
547548
yield {"value": len(response.text)}
548549

549550

551+
@pytest.mark.skip(reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth")
550552
@patch("airbyte_cdk.sources.streams.core.logging", MagicMock())
551553
def test_using_cache(mocker, requests_mock):
552554
requests_mock.register_uri("GET", "https://google.com/", text="text")

unit_tests/sources/streams/http/test_http_client.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def test_cache_filename():
4242
http_client.cache_filename == f"{http_client._name}.sqlite"
4343

4444

45+
@pytest.mark.skip(reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth")
4546
@pytest.mark.parametrize(
4647
"use_cache, expected_session",
4748
[
@@ -447,6 +448,7 @@ def test_session_request_exception_raises_backoff_exception():
447448
http_client._send(prepared_request, {})
448449

449450

451+
@pytest.mark.skip(reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth")
450452
def test_that_response_was_cached(requests_mock):
451453
cached_http_client = test_cache_http_client()
452454

@@ -720,6 +722,7 @@ def test_backoff_strategy_endless(
720722
assert mocked_send.call_count == expected_call_count
721723

722724

725+
@pytest.mark.skip(reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth")
723726
def test_given_different_headers_then_response_is_not_cached(requests_mock):
724727
http_client = HttpClient(name="test", logger=MagicMock(), use_cache=True)
725728
first_request_headers = {"header_key": "first"}

unit_tests/sources/streams/test_call_rate.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@ def test_without_cache(self, mocker, requests_mock):
332332

333333
assert MovingWindowCallRatePolicy.try_acquire.call_count == 10
334334

335+
@pytest.mark.skip(reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth")
335336
@pytest.mark.usefixtures("enable_cache")
336337
def test_with_cache(self, mocker, requests_mock):
337338
"""Test that HttpStream will use call budget when provided and not cached"""

0 commit comments

Comments
 (0)