feat(agents): add include_sources for per-agent content source filtering

bofenghuang · bofenghuang · commit 43170f791633 · 2026-06-01T11:23:29.000+02:00
Add `include_sources: list[str] | None` to `LlmAgent` as an orthogonal
axis to the existing `include_contents` temporal-window control. Where
`include_contents` answers "how far back?", `include_sources` answers
"from whom?" — allowing agents in a multi-agent pipeline to declare an
allowlist of content sources rather than receiving every narrative-cast
peer output.

Reserved source names: 'user' (plain human messages), 'self' (this
agent's own prior model turns), and any agent name matched directly
against event.author before narrative casting occurs.

Filtering runs at the event level inside _get_contents(), before
_present_other_agent_message() converts authorship into embedded text,
so source identity is read from structured metadata rather than parsed
from "[agent_name] said:" strings.

Function call/response pairing is preserved: FC responses for the current
agent's own calls are tied to 'self' (dropped together with their calls
when 'self' is absent), and another agent's FC responses are dropped when
that agent's call is also filtered. Live-mode events are handled by
mapping event.author == agent_name to the 'self' reserved name, since
_is_other_agent_reply() returns True for all non-user events in live
sessions.

Raises ValueError when include_sources=[] (use None to disable).
diff --git a/src/google/adk/agents/llm_agent.py b/src/google/adk/agents/llm_agent.py
@@ -349,6 +349,27 @@ class LlmAgent(BaseAgent):
     instruction and input
   """
 
+  include_sources: Optional[list[str]] = None
+  """Allowlist of content sources to include in model requests.
+
+  Orthogonal to include_contents (temporal window); this controls which
+  sources are kept from within that window.
+
+  Options:
+    None (default): all sources pass through — backward-compatible.
+    list[str]: only content from the listed sources is kept.
+
+  Reserved source names:
+    'user'  — plain human user messages (not tool outputs)
+    'self'  — this agent's own prior model outputs
+    <name>  — any other string is matched against event.author (agent name)
+
+  Example — keep full history but only user + this agent's turns:
+    include_contents='default', include_sources=['user', 'self']
+
+  Raises ValueError if set to [] (use None to disable filtering).
+  """
+
   # Controlled input/output configurations - Start
   input_schema: Optional[type[BaseModel]] = None
   """The input schema when agent is used as a tool."""
@@ -954,6 +975,17 @@ def __maybe_save_output_to_state(self, event: Event):
   def __model_validator_after(self) -> LlmAgent:
     return self
 
+  @field_validator('include_sources', mode='after')
+  @classmethod
+  def _validate_include_sources(
+      cls, v: Optional[list[str]]
+  ) -> Optional[list[str]]:
+    if v is not None and len(v) == 0:
+      raise ValueError(
+          "include_sources=[] keeps nothing. Use None to disable filtering."
+      )
+    return v
+
   @field_validator('generate_content_config', mode='after')
   @classmethod
   def validate_generate_content_config(
diff --git a/src/google/adk/flows/llm_flows/contents.py b/src/google/adk/flows/llm_flows/contents.py
@@ -68,6 +68,7 @@ async def run_async(
     instruction_related_contents = llm_request.contents
 
     is_single_turn = getattr(agent, 'mode', None) == 'single_turn'
+    source_filter = getattr(agent, 'include_sources', None)
     if agent.include_contents == 'default':
       # Include full conversation history
       llm_request.contents = _get_contents(
@@ -78,6 +79,7 @@ async def run_async(
           isolation_scope=invocation_context.isolation_scope,
           is_single_turn=is_single_turn,
           user_content=invocation_context.user_content,
+          source_filter=source_filter,
       )
     else:
       # Include current turn context only (no conversation history)
@@ -89,6 +91,7 @@ async def run_async(
           isolation_scope=invocation_context.isolation_scope,
           is_single_turn=is_single_turn,
           user_content=invocation_context.user_content,
+          source_filter=source_filter,
       )
 
     # Add instruction-related contents to proper position in conversation
@@ -504,6 +507,7 @@ def _get_contents(
     isolation_scope: Optional[str] = None,
     is_single_turn: bool = False,
     user_content: Optional[types.Content] = None,
+    source_filter: Optional[list[str]] = None,
 ) -> list[types.Content]:
   """Get the contents for the LLM request.
 
@@ -610,6 +614,7 @@ def _get_contents(
         accumulated_output_transcription = ''
 
     is_other_reply = _is_other_agent_reply(agent_name, event)
+    other_fc_author = None  # set when is_other_reply via FC attribution
 
     # Check if it's a FunctionResponse for another agent
     if not is_other_reply and event.content:
@@ -623,8 +628,43 @@ def _get_contents(
               and call_author != 'user'
           ):
             is_other_reply = True
+            other_fc_author = call_author
             break
 
+    if source_filter is not None:
+      if is_other_reply:
+        if event.author != 'user':
+          # In live mode the current agent's own events are also classified as
+          # other_reply (see _is_other_agent_reply). Map the actual agent name
+          # to the 'self' reserved name so source_filter=['self'] works.
+          effective_source = (
+              'self' if event.author == agent_name else event.author
+          )
+          if effective_source not in source_filter:
+            continue
+        else:
+          # 'user'-authored FC response to another agent's call.
+          # other_fc_author was resolved above — no second iteration needed.
+          # _present_other_agent_message converts it to text, so no raw
+          # function_response survives — but drop it when its call author is
+          # filtered to avoid "[agent_b] returned X" with no visible preceding
+          # "[agent_b] called tool Y".
+          if other_fc_author and other_fc_author not in source_filter:
+            continue
+      elif event.content:
+        if event.content.role == 'model':
+          if 'self' not in source_filter:
+            continue
+        elif event.content.role == 'user':
+          if _content_contains_function_response(event.content):
+            # FC responses are paired with the current agent's own tool calls
+            # (role='model'). Tie them to 'self' so dropping 'self' drops both
+            # sides of the pair and avoids orphaned function_response parts.
+            if 'self' not in source_filter:
+              continue
+          elif 'user' not in source_filter:
+            continue
+
     if is_other_reply:
       if converted_event := _present_other_agent_message(event):
         filtered_events.append(converted_event)
@@ -677,6 +717,7 @@ def _get_current_turn_contents(
     is_single_turn: bool = False,
     isolation_scope: Optional[str] = None,
     user_content: Optional[types.Content] = None,
+    source_filter: Optional[list[str]] = None,
 ) -> list[types.Content]:
   """Get contents for the current turn only (no conversation history).
 
@@ -712,6 +753,7 @@ def _get_current_turn_contents(
           isolation_scope=isolation_scope,
           is_single_turn=is_single_turn,
           user_content=user_content,
+          source_filter=source_filter,
       )
 
   return []
diff --git a/tests/unittests/agents/test_llm_agent_include_contents.py b/tests/unittests/agents/test_llm_agent_include_contents.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Unit tests for LlmAgent include_contents field behavior."""
+"""Unit tests for LlmAgent include_contents and include_sources field behavior."""
 
 from google.adk.agents.llm_agent import LlmAgent
 from google.adk.agents.sequential_agent import SequentialAgent
@@ -241,3 +241,131 @@ async def test_include_contents_none_sequential_agents():
   assert any(
       "Agent1 response" in str(content) for _, content in agent2_contents
   )
+
+
+# ---------------------------------------------------------------------------
+# include_sources: field validation
+# ---------------------------------------------------------------------------
+
+
+def test_include_sources_empty_list_raises():
+  """include_sources=[] must raise ValueError — use None to disable filtering."""
+  with pytest.raises(ValueError, match='include_sources=\\[\\]'):
+    LlmAgent(
+        name='agent',
+        model='gemini-2.5-flash',
+        include_sources=[],
+    )
+
+
+def test_include_sources_none_is_accepted():
+  """include_sources=None (default) must not raise."""
+  agent = LlmAgent(
+      name='agent', model='gemini-2.5-flash', include_sources=None
+  )
+  assert agent.include_sources is None
+
+
+# ---------------------------------------------------------------------------
+# include_sources: integration — user-only in sequential pipeline
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_include_sources_user_only_drops_upstream_agent_entries():
+  """Downstream agent with include_sources=['user'] receives only the human user message."""
+  agent1_model = testing_utils.MockModel.create(
+      responses=['Upstream agent reply']
+  )
+  agent1 = LlmAgent(
+      name='upstream',
+      model=agent1_model,
+      instruction='You are upstream',
+  )
+
+  agent2_model = testing_utils.MockModel.create(
+      responses=['Downstream response']
+  )
+  agent2 = LlmAgent(
+      name='downstream',
+      model=agent2_model,
+      include_sources=['user'],
+      instruction='You are downstream',
+  )
+
+  sequential = SequentialAgent(
+      name='pipeline', sub_agents=[agent1, agent2]
+  )
+  runner = testing_utils.InMemoryRunner(sequential)
+  runner.run('Original user request')
+
+  agent2_contents = testing_utils.simplify_contents(
+      agent2_model.requests[0].contents
+  )
+
+  # User message must be present
+  assert any(
+      'Original user request' in str(c) for _, c in agent2_contents
+  )
+  # Upstream agent's narrative entry must be absent
+  assert not any(
+      'Upstream agent reply' in str(c) for _, c in agent2_contents
+  )
+  assert not any('For context:' in str(c) for _, c in agent2_contents)
+
+
+# ---------------------------------------------------------------------------
+# include_sources: composing with include_contents='default' — multi-turn
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_include_sources_user_self_drops_upstream_across_turns():
+  """include_sources=['user','self'] + include_contents='default' (full history):
+  downstream agent sees all user messages and its own prior turns, but no
+  narrative entries from the upstream agent across multiple invocations.
+  """
+  agent1_model = testing_utils.MockModel.create(
+      responses=['Turn1 upstream reply', 'Turn2 upstream reply']
+  )
+  agent1 = LlmAgent(
+      name='upstream',
+      model=agent1_model,
+      instruction='You are upstream',
+  )
+
+  agent2_model = testing_utils.MockModel.create(
+      responses=['Turn1 downstream', 'Turn2 downstream']
+  )
+  agent2 = LlmAgent(
+      name='downstream',
+      model=agent2_model,
+      include_sources=['user', 'self'],
+      instruction='You are downstream',
+  )
+
+  sequential = SequentialAgent(
+      name='pipeline', sub_agents=[agent1, agent2]
+  )
+  runner = testing_utils.InMemoryRunner(sequential)
+  runner.run('Turn 1 user message')
+  runner.run('Turn 2 user message')
+
+  # Second invocation of downstream agent — should see user messages + own
+  # prior turn, but not upstream's narrative entries.
+  agent2_second_contents = testing_utils.simplify_contents(
+      agent2_model.requests[1].contents
+  )
+
+  # User messages must be present
+  assert any(
+      'Turn 1 user message' in str(c) for _, c in agent2_second_contents
+  )
+  assert any(
+      'Turn 2 user message' in str(c) for _, c in agent2_second_contents
+  )
+  # Upstream agent's narrative entries must be absent
+  assert not any(
+      'upstream reply' in str(c).lower() for _, c in agent2_second_contents
+  )
+  assert not any('For context:' in str(c) for _, c in agent2_second_contents)
diff --git a/tests/unittests/flows/llm_flows/test_contents_source_filter.py b/tests/unittests/flows/llm_flows/test_contents_source_filter.py