fix: pass max_line_length to readline() to prevent LineTooLong on large SSE lines with MTLS.

MarkDaoust · copybara-github · commit 192cdcec165c · 2026-05-14T10:02:58.000-07:00
When using AsyncAuthorizedSession (Vertex AI + ADC), the google-auth library
creates its own internal aiohttp.ClientSession with the default
read_bufsize=2**16. This gives the underlying StreamReader a _high_water of
131072 bytes, which is the effective limit for readline().

Streaming responses from thinking models can include large thoughtSignature
fields, or generated images, that push a single SSE data: line beyond 131072 bytes, causing aiohttp to raise LineTooLong.

The fix passes max_line_length=READ_BUFFER_SIZE (4MB) explicitly to every
readline() call in _aiter_response_stream(), overriding the limit regardless
of which code path created the underlying session. This covers both the direct
AiohttpClientSession path (API key auth) and the AsyncAuthorizedSession path
(Vertex AI + ADC).

Test: added test_aiohttp_large_sse_line_with_thought_signature which uses a
mock that enforces the real aiohttp LineTooLong limit, and streams a 150KB SSE
line that would previously fail. Manually verified, this test fails before the change.
PiperOrigin-RevId: 915454389
diff --git a/google/genai/_api_client.py b/google/genai/_api_client.py
@@ -447,7 +447,9 @@ async def _aiter_response_stream(self) -> AsyncIterator[str]:
       try:
         while True:
           # Read a line from the stream. This returns bytes.
-          line_bytes = await self.response_stream.content.readline()
+          line_bytes = await self.response_stream.content.readline(
+              max_line_length=READ_BUFFER_SIZE
+          )
           if not line_bytes:
             break
           # Decode the bytes and remove trailing whitespace and newlines.
diff --git a/google/genai/tests/client/test_async_stream.py b/google/genai/tests/client/test_async_stream.py
@@ -73,7 +73,7 @@ def __init__(self, lines: List[str]):
     self.content.readline.side_effect = self._async_read_line
     self.release = MagicMock()
 
-  async def _async_read_line(self) -> bytes:
+  async def _async_read_line(self, **kwargs) -> bytes:
     if self._read_pos >= len(self._read_data):
       return b""  # End of stream
 
@@ -240,7 +240,7 @@ async def test_aiohttp_simple_lines(responses: api_client.HttpResponse):
   results = [line async for line in responses._aiter_response_stream()]
 
   assert results == lines
-  mock_response.content.readline.assert_any_call()
+  mock_response.content.readline.assert_called()
   mock_response.release.assert_called_once()
 
 
@@ -256,7 +256,7 @@ async def test_aiohttp_data_prefix(responses: api_client.HttpResponse):
   results = [line async for line in responses._aiter_response_stream()]
 
   assert results == ["{ 'message': 'hello' }", "{ 'status': 'ok' }"]
-  mock_response.content.readline.assert_any_call()
+  mock_response.content.readline.assert_called()
   mock_response.release.assert_called_once()
 
 
@@ -278,7 +278,7 @@ async def test_aiohttp_multiple_json_chunks(responses: api_client.HttpResponse):
   results = [line async for line in responses._aiter_response_stream()]
 
   assert results == ['{ "id": 1 }', '{ "id": 2 }', '{ "id": 3 }']
-  mock_response.content.readline.assert_any_call()
+  mock_response.content.readline.assert_called()
   mock_response.release.assert_called_once()
 
 
@@ -296,7 +296,88 @@ async def test_aiohttp_incomplete_json_at_end(
   results = [line async for line in responses._aiter_response_stream()]
 
   assert results == ['{ "partial": "data"']
-  mock_response.content.readline.assert_any_call()
+  mock_response.content.readline.assert_called()
+  mock_response.release.assert_called_once()
+
+
+class MockAIOHTTPResponseWithLineLimits(aiohttp.ClientResponse):
+  """Mock that enforces aiohttp's real readline limits.
+
+  Real aiohttp StreamReader raises LineTooLong when a line exceeds
+  `_high_water` (= limit * 2) bytes. The default limit is 2**16, so lines
+  over 131072 bytes fail unless `max_line_length` is explicitly passed.
+  """
+
+  DEFAULT_HIGH_WATER = 2**16 * 2  # 131072, same as aiohttp default
+
+  def __init__(self, lines: List[str]):
+    self.content = MagicMock()
+    self.content.readline = AsyncMock()
+    self._read_data = b"\n".join(line.encode("utf-8") for line in lines) + b"\n"
+    self._read_pos = 0
+    self.content.readline.side_effect = self._async_read_line
+    self.release = MagicMock()
+
+  async def _async_read_line(
+      self, *, max_line_length=None
+  ) -> bytes:
+    if self._read_pos >= len(self._read_data):
+      return b""
+
+    newline_pos = self._read_data.find(b"\n", self._read_pos)
+    if newline_pos == -1:
+      line = self._read_data[self._read_pos:]
+      self._read_pos = len(self._read_data)
+    else:
+      line = self._read_data[self._read_pos:newline_pos + 1]
+      self._read_pos = newline_pos + 1
+
+    # Enforce limit like real aiohttp StreamReader.readuntil does
+    limit = max_line_length or self.DEFAULT_HIGH_WATER
+    if len(line) > limit:
+      from aiohttp.http_exceptions import LineTooLong
+      raise LineTooLong(line[:100] + b"...", limit)
+
+    return line
+
+
+@requires_aiohttp
+@pytest.mark.asyncio
+async def test_aiohttp_large_sse_line_with_thought_signature(
+    responses: api_client.HttpResponse,
+):
+  """Verifies large SSE lines (e.g. thoughtSignature) don't hit LineTooLong.
+
+  aiohttp's StreamReader.readline() enforces a maximum line length based on
+  the session's read_bufsize (default: 2**16), which gives a _high_water limit
+  of 131072 bytes. Thinking models can return a thoughtSignature field large
+  enough to push a single SSE data: line past this limit, causing LineTooLong.
+
+  The fix passes max_line_length=READ_BUFFER_SIZE (4MB) explicitly on the
+  readline() call in _aiter_response_stream(), overriding the limit at the
+  call site regardless of how the underlying aiohttp session was configured.
+
+  This test verifies the fix by using a mock that enforces the real aiohttp
+  readline limit and confirms a 150KB line is streamed successfully.
+  """
+  api_client.has_aiohttp = True
+
+  # Build a single SSE line larger than aiohttp's default limit (131072)
+  large_thought_sig = "A" * 150_000  # > 131072 bytes
+  large_sse_payload = (
+      f'{{"candidates": [{{"content": {{"parts": [{{"text": "",'
+      f'"thoughtSignature": "{large_thought_sig}"}}]}}}}]}}'
+  )
+  lines = [f"data: {large_sse_payload}", ""]
+
+  mock_response = MockAIOHTTPResponseWithLineLimits(lines)
+  responses.response_stream = mock_response
+
+  results = [line async for line in responses._aiter_response_stream()]
+
+  assert len(results) == 1
+  assert "thoughtSignature" in results[0]
+  assert large_thought_sig in results[0]
   mock_response.release.assert_called_once()