fix: Support generalized history config injection for Gemini 3.1 Live on Vertex AI

wyf7107 · copybara-github · commit e7eb5fe2bb9a · 2026-06-08T16:00:26.000-07:00
Port of GitHub PR: 61a3933 Expose history_config in RunConfig and map it to LLM live connect request configuration. Generalize history connection logic to automatically inject initial_history_in_client_content = True when seeding history on a fresh connection for both Gemini API and Vertex AI backends. Co-authored-by: Yifan Wang <wanyif@google.com> PiperOrigin-RevId: 927465431
diff --git a/src/google/adk/agents/run_config.py b/src/google/adk/agents/run_config.py
@@ -247,6 +247,9 @@ class RunConfig(BaseModel):
   session_resumption: Optional[types.SessionResumptionConfig] = None
   """Configures session resumption mechanism. Only support transparent session resumption mode now."""
 
+  history_config: Optional[types.HistoryConfig] = None
+  """Configures the exchange of history between the client and the server."""
+
   context_window_compression: Optional[types.ContextWindowCompressionConfig] = (
       None
   )
diff --git a/src/google/adk/flows/llm_flows/base_llm_flow.py b/src/google/adk/flows/llm_flows/base_llm_flow.py
@@ -552,6 +552,28 @@ async def run_live(
             if session_resumption.transparent is None:
               session_resumption.transparent = True
 
+        # When seeding a fresh connection with prior conversation history, set
+        # initial_history_in_client_content to True. This tells the Live server
+        # that the provided history already includes the model's past responses,
+        # preventing the server from generating duplicate responses for those replayed turns.
+        if (
+            llm_request.contents
+            and not invocation_context.live_session_resumption_handle
+        ):
+          if not llm_request.live_connect_config:
+            llm_request.live_connect_config = types.LiveConnectConfig()
+          if not llm_request.live_connect_config.history_config:
+            llm_request.live_connect_config.history_config = (
+                types.HistoryConfig()
+            )
+          if (
+              llm_request.live_connect_config.history_config.initial_history_in_client_content
+              is None
+          ):
+            llm_request.live_connect_config.history_config.initial_history_in_client_content = (
+                True
+            )
+
         logger.info(
             'Establishing live connection for agent: %s',
             invocation_context.agent.name,
diff --git a/src/google/adk/flows/llm_flows/basic.py b/src/google/adk/flows/llm_flows/basic.py
@@ -91,6 +91,9 @@ def _build_basic_request(
   llm_request.live_connect_config.session_resumption = (
       invocation_context.run_config.session_resumption
   )
+  llm_request.live_connect_config.history_config = (
+      invocation_context.run_config.history_config
+  )
   llm_request.live_connect_config.context_window_compression = (
       invocation_context.run_config.context_window_compression
   )
diff --git a/tests/unittests/flows/llm_flows/test_base_llm_flow.py b/tests/unittests/flows/llm_flows/test_base_llm_flow.py
@@ -30,6 +30,7 @@
 from google.adk.plugins.base_plugin import BasePlugin
 from google.adk.tools.base_toolset import BaseToolset
 from google.adk.tools.google_search_tool import GoogleSearchTool
+from google.adk.utils.variant_utils import GoogleLLMVariant
 from google.genai import types
 import pytest
 from websockets.exceptions import ConnectionClosed
@@ -1386,3 +1387,141 @@ async def mock_receive_2():
         second_call_req = mock_connect.call_args_list[1][0][0]
         session_resump = second_call_req.live_connect_config.session_resumption
         assert session_resump.transparent
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    'api_backend',
+    [
+        GoogleLLMVariant.GEMINI_API,
+        GoogleLLMVariant.VERTEX_AI,
+    ],
+)
+async def test_run_live_history_config_set_for_all_backends(api_backend):
+  """Test that run_live sets history_config for all backends."""
+
+  real_model = Gemini(model='gemini-3.1-flash-live-preview')
+  mock_connection = mock.AsyncMock()
+
+  agent = Agent(name='test_agent', model=real_model)
+  invocation_context = await testing_utils.create_invocation_context(
+      agent=agent
+  )
+  invocation_context.live_request_queue = LiveRequestQueue()
+  invocation_context.run_config = RunConfig()
+
+  flow = BaseLlmFlowForTesting()
+
+  async def mock_preprocess(ctx, req):
+    req.contents = [types.Content(parts=[types.Part.from_text(text='history')])]
+    from google.adk.flows.llm_flows.basic import _build_basic_request
+
+    _build_basic_request(ctx, req)
+    yield Event(id=Event.new_id(), author='test')
+
+  with mock.patch.object(
+      flow, '_preprocess_async', side_effect=mock_preprocess
+  ):
+    with mock.patch.object(flow, '_send_to_model', new_callable=AsyncMock):
+
+      class StopTestError(Exception):
+        pass
+
+      async def mock_receive():
+        yield LlmResponse(
+            content=types.Content(parts=[types.Part.from_text(text='hi')])
+        )
+        raise StopTestError('stop')
+
+      mock_connection.receive = mock.Mock(side_effect=mock_receive)
+
+      with mock.patch(
+          'google.adk.models.google_llm.Gemini.connect'
+      ) as mock_connect:
+        mock_connect.return_value.__aenter__.return_value = mock_connection
+
+        # Mock the api_backend property
+        with mock.patch.object(
+            Gemini,
+            '_api_backend',
+            new_callable=mock.PropertyMock,
+            return_value=api_backend,
+        ):
+          try:
+            async for _ in flow.run_live(invocation_context):
+              pass
+          except StopTestError:
+            pass
+
+          assert mock_connect.call_count == 1
+          called_req = mock_connect.call_args[0][0]
+          assert called_req.live_connect_config is not None
+          assert called_req.live_connect_config.history_config is not None
+          assert (
+              called_req.live_connect_config.history_config.initial_history_in_client_content
+              is True
+          )
+
+
+@pytest.mark.asyncio
+async def test_run_live_respects_explicit_initial_history_in_client_content_false():
+  """Test that run_live respects explicit initial_history_in_client_content=False in RunConfig."""
+
+  real_model = Gemini()
+  mock_connection = mock.AsyncMock()
+
+  agent = Agent(name='test_agent', model=real_model)
+  invocation_context = await testing_utils.create_invocation_context(
+      agent=agent
+  )
+  invocation_context.live_request_queue = LiveRequestQueue()
+  run_config = RunConfig(
+      history_config=types.HistoryConfig(
+          initial_history_in_client_content=False
+      )
+  )
+  invocation_context.run_config = run_config
+
+  flow = BaseLlmFlowForTesting()
+
+  async def mock_preprocess(ctx, req):
+    req.contents = [types.Content(parts=[types.Part.from_text(text='history')])]
+    from google.adk.flows.llm_flows.basic import _build_basic_request
+
+    _build_basic_request(ctx, req)
+    yield Event(id=Event.new_id(), author='test')
+
+  with mock.patch.object(
+      flow, '_preprocess_async', side_effect=mock_preprocess
+  ):
+    with mock.patch.object(flow, '_send_to_model', new_callable=AsyncMock):
+
+      class StopTestError(Exception):
+        pass
+
+      async def mock_receive():
+        yield LlmResponse(
+            content=types.Content(parts=[types.Part.from_text(text='hi')])
+        )
+        raise StopTestError('stop')
+
+      mock_connection.receive = mock.Mock(side_effect=mock_receive)
+
+      with mock.patch(
+          'google.adk.models.google_llm.Gemini.connect'
+      ) as mock_connect:
+        mock_connect.return_value.__aenter__.return_value = mock_connection
+
+        try:
+          async for _ in flow.run_live(invocation_context):
+            pass
+        except StopTestError:
+          pass
+
+        assert mock_connect.call_count == 1
+        call_req = mock_connect.call_args[0][0]
+        assert call_req.live_connect_config.history_config is not None
+        assert (
+            call_req.live_connect_config.history_config.initial_history_in_client_content
+            is False
+        )

Original file line number	Diff line number	Diff line change
`@@ -247,6 +247,9 @@ class RunConfig(BaseModel):`
`247`	`247`	`session_resumption: Optional[types.SessionResumptionConfig] = None`
`248`	`248`	`"""Configures session resumption mechanism. Only support transparent session resumption mode now."""`
`249`	`249`
	`250`	`+ history_config: Optional[types.HistoryConfig] = None`
	`251`	`+ """Configures the exchange of history between the client and the server."""`
	`252`	`+`
`250`	`253`	`context_window_compression: Optional[types.ContextWindowCompressionConfig] = (`
`251`	`254`	`None`
`252`	`255`	`)`
Original file line number	Diff line number	Diff line change
`@@ -91,6 +91,9 @@ def _build_basic_request(`
`91`	`91`	`llm_request.live_connect_config.session_resumption = (`
`92`	`92`	`invocation_context.run_config.session_resumption`
`93`	`93`	`)`
	`94`	`+ llm_request.live_connect_config.history_config = (`
	`95`	`+ invocation_context.run_config.history_config`
	`96`	`+ )`
`94`	`97`	`llm_request.live_connect_config.context_window_compression = (`
`95`	`98`	`invocation_context.run_config.context_window_compression`
`96`	`99`	`)`