google
diff --git a/‎src/google/adk/memory/vertex_ai_memory_bank_service.py‎
Lines changed: 2 additions & 11 deletions b/‎src/google/adk/memory/vertex_ai_memory_bank_service.py‎
Lines changed: 2 additions & 11 deletions
diff --git a/‎src/google/adk/models/interactions_utils.py‎
Lines changed: 122 additions & 6 deletions b/‎src/google/adk/models/interactions_utils.py‎
Lines changed: 122 additions & 6 deletions
diff --git a/‎src/google/adk/models/lite_llm.py‎
Lines changed: 89 additions & 31 deletions b/‎src/google/adk/models/lite_llm.py‎
Lines changed: 89 additions & 31 deletions
@@ -26,7 +26,6 @@
 from google.genai import types
 from typing_extensions import override
 
-from ..utils._google_client_headers import get_tracking_headers
 from ..utils.vertex_ai_utils import get_express_mode_api_key
 from .base_memory_service import BaseMemoryService
 from .base_memory_service import SearchMemoryResponse
@@ -617,17 +616,9 @@ def _get_api_client(self) -> vertexai.AsyncClient:
     """
     import vertexai
 
-    http_options = types.HttpOptions(headers=get_tracking_headers())
     if self._express_mode_api_key:
-      return vertexai.Client(
-          http_options=http_options,
-          api_key=self._express_mode_api_key,
-      ).aio
-    return vertexai.Client(
-        project=self._project,
-        location=self._location,
-        http_options=http_options,
-    ).aio
+      return vertexai.Client(api_key=self._express_mode_api_key).aio
+    return vertexai.Client(project=self._project, location=self._location).aio
 
 
 def _log_ingest_task_error(task: asyncio.Task) -> None:
 
@@ -706,13 +706,23 @@ def convert_interaction_to_llm_response(
 
 @dataclasses.dataclass
 class _StreamState:
-  """Accumulates streamed parts across SSE events.
+  """Accumulates streamed parts and grounding data across SSE events.
 
   ``parts`` collects ``types.Part``s in arrival order to assemble the final
-  ``Content``.
+  ``Content``. The grounding fields accumulate google_search / citation data
+  that maps to ``grounding_metadata`` (a top-level ``LlmResponse`` field, not a
+  part) so it can be reattached to the final, persisted event.
   """
 
   parts: list[types.Part] = dataclasses.field(default_factory=list)
+  web_search_queries: list[str] = dataclasses.field(default_factory=list)
+  grounding_chunks: list[types.GroundingChunk] = dataclasses.field(
+      default_factory=list
+  )
+  grounding_supports: list[types.GroundingSupport] = dataclasses.field(
+      default_factory=list
+  )
+  search_entry_point: types.SearchEntryPoint | None = None
 
 
 def _partial_part_response(
@@ -727,6 +737,18 @@ def _partial_part_response(
   )
 
 
+def _partial_grounding_response(
+    grounding_metadata: types.GroundingMetadata, interaction_id: str | None
+) -> LlmResponse:
+  """Build a partial streaming LlmResponse carrying incremental grounding."""
+  return LlmResponse(
+      grounding_metadata=grounding_metadata,
+      partial=True,
+      turn_complete=False,
+      interaction_id=interaction_id,
+  )
+
+
 def _handle_text(
     delta: StepDeltaData, state: _StreamState, interaction_id: str | None
 ) -> LlmResponse | None:
@@ -862,6 +884,69 @@ def _handle_code_execution_result(
   return _partial_part_response(part, interaction_id)
 
 
+def _handle_google_search_call(
+    delta: StepDeltaData, state: _StreamState, interaction_id: str | None
+) -> LlmResponse | None:
+  queries = delta.arguments.queries if delta.arguments else None
+  if not queries:
+    return None
+  state.web_search_queries.extend(queries)
+  grounding_metadata = types.GroundingMetadata(web_search_queries=list(queries))
+  return _partial_grounding_response(grounding_metadata, interaction_id)
+
+
+def _handle_google_search_result(
+    delta: StepDeltaData, state: _StreamState, interaction_id: str | None
+) -> LlmResponse | None:
+  rendered = None
+  for search_result in delta.result or []:
+    if search_result.search_suggestions:
+      rendered = search_result.search_suggestions
+      break
+  if not rendered:
+    return None
+  entry_point = types.SearchEntryPoint(rendered_content=rendered)
+  state.search_entry_point = entry_point
+  grounding_metadata = types.GroundingMetadata(search_entry_point=entry_point)
+  return _partial_grounding_response(grounding_metadata, interaction_id)
+
+
+def _handle_text_annotation(
+    delta: StepDeltaData, state: _StreamState, interaction_id: str | None
+) -> LlmResponse | None:
+  new_chunks: list[types.GroundingChunk] = []
+  new_supports: list[types.GroundingSupport] = []
+  for annotation in delta.annotations or []:
+    if getattr(annotation, 'type', None) != 'url_citation':
+      continue
+    chunk_index = len(state.grounding_chunks) + len(new_chunks)
+    new_chunks.append(
+        types.GroundingChunk(
+            web=types.GroundingChunkWeb(
+                uri=annotation.url, title=annotation.title
+            )
+        )
+    )
+    new_supports.append(
+        types.GroundingSupport(
+            segment=types.Segment(
+                start_index=annotation.start_index,
+                end_index=annotation.end_index,
+            ),
+            grounding_chunk_indices=[chunk_index],
+        )
+    )
+  if not new_chunks:
+    return None
+  state.grounding_chunks.extend(new_chunks)
+  state.grounding_supports.extend(new_supports)
+  grounding_metadata = types.GroundingMetadata(
+      grounding_chunks=new_chunks,
+      grounding_supports=new_supports,
+  )
+  return _partial_grounding_response(grounding_metadata, interaction_id)
+
+
 def _handle_function_result(
     delta: StepDeltaData, state: _StreamState, interaction_id: str | None
 ) -> LlmResponse | None:
@@ -875,6 +960,24 @@ def _handle_function_result(
   return _partial_part_response(part, interaction_id)
 
 
+def _build_grounding_metadata(
+    state: _StreamState,
+) -> types.GroundingMetadata | None:
+  if not (
+      state.web_search_queries
+      or state.grounding_chunks
+      or state.grounding_supports
+      or state.search_entry_point
+  ):
+    return None
+  return types.GroundingMetadata(
+      web_search_queries=state.web_search_queries or None,
+      grounding_chunks=state.grounding_chunks or None,
+      grounding_supports=state.grounding_supports or None,
+      search_entry_point=state.search_entry_point,
+  )
+
+
 def convert_interaction_event_to_llm_response(
     event: InteractionSSEEvent,
     state: _StreamState,
@@ -931,6 +1034,12 @@ def convert_interaction_event_to_llm_response(
       return _handle_code_execution_call(delta, state, interaction_id)
     elif delta_type == 'code_execution_result':
       return _handle_code_execution_result(delta, state, interaction_id)
+    elif delta_type == 'google_search_call':
+      return _handle_google_search_call(delta, state, interaction_id)
+    elif delta_type == 'google_search_result':
+      return _handle_google_search_result(delta, state, interaction_id)
+    elif delta_type == 'text_annotation_delta':
+      return _handle_text_annotation(delta, state, interaction_id)
     elif delta_type == 'function_result':
       return _handle_function_result(delta, state, interaction_id)
     else:
@@ -968,16 +1077,23 @@ def convert_interaction_event_to_llm_response(
     return None
 
   elif isinstance(event, InteractionCompletedEvent):
-    # Final aggregated response
-    if state.parts:
+    grounding_metadata = _build_grounding_metadata(state)
+    if state.parts or grounding_metadata is not None:
+      content = (
+          types.Content(role='model', parts=state.parts)
+          if state.parts
+          else None
+      )
       return LlmResponse(
-          content=types.Content(role='model', parts=state.parts),
+          content=content,
+          grounding_metadata=grounding_metadata,
+          usage_metadata=_usage_metadata_from_interaction(event.interaction),
           partial=False,
           turn_complete=True,
           finish_reason=types.FinishReason.STOP,
           interaction_id=interaction_id,
       )
-    # If no streaming parts were collected, convert the final interaction directly
+    # No streaming parts or grounding collected: convert the final interaction.
     return convert_interaction_to_llm_response(event.interaction)
 
   elif isinstance(event, Interaction):
 
@@ -330,6 +330,30 @@ def _get_provider_from_model(model: str) -> str:
   return ""
 
 
+# Providers that can route to Anthropic. bedrock and vertex_ai are multi-model
+# platforms, so _is_anthropic_route also checks the model name for them.
+_ANTHROPIC_PROVIDERS = frozenset({"anthropic", "bedrock", "vertex_ai"})
+
+
+def _is_anthropic_provider(provider: str) -> bool:
+  """Returns True if the provider can route to an Anthropic model endpoint."""
+  return provider.lower() in _ANTHROPIC_PROVIDERS if provider else False
+
+
+def _is_anthropic_route(provider: str, model: str) -> bool:
+  """Returns True only when requests actually reach an Anthropic Claude model.
+
+  bedrock and vertex_ai also host non-Anthropic models (Llama, Gemini), so for
+  those platforms the model name must identify a Claude model too. Formatting
+  thinking blocks for a non-Claude model triggers API validation (400) errors.
+  """
+  if not _is_anthropic_provider(provider):
+    return False
+  if provider.lower() in ("bedrock", "vertex_ai"):
+    return _is_anthropic_model(model)
+  return True
+
+
 def _infer_mime_type_from_uri(uri: str) -> Optional[str]:
   """Attempts to infer MIME type from a URI's path extension.
 
@@ -491,42 +515,48 @@ def _iter_reasoning_texts(reasoning_value: Any) -> Iterable[str]:
 
 
 def _is_thinking_blocks_format(reasoning_value: Any) -> bool:
-  """Returns True if reasoning_value is thinking_blocks format.
+  """Returns True if reasoning_value is Anthropic thinking_blocks format.
 
-  Anthropic blocks carry a 'signature'; Gemini blocks carry 'thinking'/'type'
-  without one. Match either so Gemini thought text is not dropped.
+  Anthropic thinking_blocks is a list of dicts, each with 'type', 'thinking',
+  and 'signature' keys.
   """
   if not isinstance(reasoning_value, list) or not reasoning_value:
     return False
   first = reasoning_value[0]
-  return isinstance(first, dict) and (
-      "thinking" in first or "signature" in first
-  )
+  return isinstance(first, dict) and "signature" in first
 
 
 def _convert_reasoning_value_to_parts(reasoning_value: Any) -> List[types.Part]:
   """Converts provider reasoning payloads into Gemini thought parts.
 
-  Handles Anthropic thinking_blocks (list of dicts with type/thinking/signature)
-  by preserving the signature on each part's thought_signature field. This is
-  required for Anthropic to maintain thinking across tool call boundaries.
+  Handles two formats:
+  - Anthropic thinking_blocks with 'thinking' and optional 'signature' fields.
+  - A plain string or nested structure (OpenAI/Azure/Ollama) via
+    _iter_reasoning_texts.
   """
-  if _is_thinking_blocks_format(reasoning_value):
+  if isinstance(reasoning_value, list):
     parts: List[types.Part] = []
     for block in reasoning_value:
-      if not isinstance(block, dict):
-        continue
-      block_type = block.get("type", "")
-      if block_type == "redacted":
-        continue
-      thinking_text = block.get("thinking", "")
-      signature = block.get("signature", "")
-      if not thinking_text and not signature:
-        continue
-      part = types.Part(text=thinking_text, thought=True)
-      if signature:
-        part.thought_signature = signature.encode("utf-8")
-      parts.append(part)
+      if isinstance(block, dict):
+        block_type = block.get("type", "")
+        if block_type == "redacted":
+          continue
+        if block_type == "thinking":
+          thinking_text = block.get("thinking", "")
+          if thinking_text:
+            part = types.Part(text=thinking_text, thought=True)
+            signature = block.get("signature")
+            if signature:
+              decoded_signature = _decode_thought_signature(signature)
+              part.thought_signature = decoded_signature or str(
+                  signature
+              ).encode("utf-8")
+            parts.append(part)
+          continue
+      # Fall back to text extraction for non-thinking-block items.
+      for text in _iter_reasoning_texts(block):
+        if text:
+          parts.append(types.Part(text=text, thought=True))
     return parts
   return [
       types.Part(text=text, thought=True)
@@ -538,16 +568,16 @@ def _convert_reasoning_value_to_parts(reasoning_value: Any) -> List[types.Part]:
 def _extract_reasoning_value(message: Message | Delta | None) -> Any:
   """Fetches the reasoning payload from a LiteLLM message.
 
-  Checks for 'thinking_blocks' (Anthropic structured format with signatures),
-  'reasoning_content' (LiteLLM standard, used by Azure/Foundry, Ollama via
-  LiteLLM) and 'reasoning' (used by LM Studio, vLLM).
-  Prioritizes 'thinking_blocks' when present (Anthropic models), then
-  'reasoning_content', then 'reasoning'.
+  Checks for 'thinking_blocks' (Anthropic thinking with signatures),
+  'reasoning_content' (LiteLLM standard, used by Azure/Foundry,
+  Ollama via LiteLLM), and 'reasoning' (used by LM Studio, vLLM).
+  Prioritizes 'thinking_blocks' when the key is present, as they contain
+  the signature required for Anthropic's extended thinking API.
   """
   if message is None:
     return None
-  # Anthropic models return thinking_blocks with type/thinking/signature fields.
-  # This must be preserved to maintain thinking across tool call boundaries.
+  # Prefer thinking_blocks (Anthropic) — they carry per-block signatures
+  # needed for multi-turn conversations with extended thinking.
   thinking_blocks = message.get("thinking_blocks")
   if thinking_blocks is not None:
     return thinking_blocks
@@ -999,7 +1029,7 @@ async def _content_to_message_param(
         if part.text and part.thought_signature:
           sig = part.thought_signature
           if isinstance(sig, bytes):
-            sig = sig.decode("utf-8")
+            sig = base64.b64encode(sig).decode("utf-8")
           thinking_blocks.append({
               "type": "thinking",
               "thinking": part.text,
@@ -1026,6 +1056,34 @@ async def _content_to_message_param(
       ):
         reasoning_texts.append(_decode_inline_text_data(part.inline_data.data))
 
+    # Anthropic routes require thinking blocks to be embedded directly in the
+    # message content list. LiteLLM's prompt template for Anthropic drops the
+    # top-level reasoning_content field, so thinking blocks disappear from
+    # multi-turn histories and the model stops producing them after the first
+    # turn. Signatures are required by the Anthropic API for thinking blocks in
+    # multi-turn conversations. On multi-model platforms (bedrock, vertex_ai)
+    # this must only apply to actual Claude models, not Gemini/Llama/etc.
+    if reasoning_parts and _is_anthropic_route(provider, model):
+      content_list = []
+      for part in reasoning_parts:
+        if part.text:
+          block = {"type": "thinking", "thinking": part.text}
+          if part.thought_signature:
+            sig = part.thought_signature
+            if isinstance(sig, bytes):
+              sig = base64.b64encode(sig).decode("utf-8")
+            block["signature"] = sig
+          content_list.append(block)
+      if isinstance(final_content, list):
+        content_list.extend(final_content)
+      elif final_content:
+        content_list.append({"type": "text", "text": final_content})
+      return ChatCompletionAssistantMessage(
+          role=role,
+          content=content_list or None,
+          tool_calls=tool_calls or None,
+      )
+
     # Preserve reasoning deltas exactly as received. Injecting separators
     # between fragments can corrupt provider-streamed thinking text.
     reasoning_content = "".join(text for text in reasoning_texts if text)