fix: sandbox nested persona template rendering in evaluation prompts

petrmarinec · GWeale · copybara-github · commit 30493bae56f6 · 2026-06-17T15:48:39.000-07:00
Merge #5266 ### Link to Issue or Description of Change **1. Link to an existing issue (if applicable):** - Related: #5265 **2. Or, if no issue exists, describe the change:** **Problem:** Nested persona behavior strings were rendered through `render_string_filter`, which created a fresh Jinja template from persona-controlled content instead of consistently reusing a sandboxed environment. That allowed nested persona templates to execute outside the intended sandbox boundary in evaluation prompt construction. Affected files: - `src/google/adk/evaluation/simulation/llm_backed_user_simulator_prompts.py` - `src/google/adk/evaluation/simulation/per_turn_user_simulator_quality_prompts.py` **Solution:** Render nested persona strings through `SandboxedEnvironment` and use `SandboxedEnvironment` for the per-turn evaluator prompt builder as well. This keeps supported nested placeholders such as `{{ stop_signal }}` working while blocking unsafe nested template access. Added regression tests for both the safe interpolation path and blocked unsafe attribute traversal. ### Testing Plan **Unit Tests:** - [x] I have added or updated unit tests for my change. - [ ] All unit tests pass locally. Passed in clean Linux Docker (`python:3.11-bookworm`): - `uv sync --all-extras` - `pytest tests/unittests/evaluation/simulation` - Result: `70 passed` Additional repo-wide validation: - `pytest tests/unittests` - Result on patched branch: `5326 passed, 1 skipped, 5 failed` - The same 5 failures reproduce on unmodified `origin/main` - Those failures are unrelated `tests/unittests/tools/test_skill_toolset.py` integration timeouts **Manual End-to-End (E2E) Tests:** - [x] Ran a live `adk web` regression test against the eval API in Linux Docker using a local non-LLM root agent. - [x] Malicious nested persona template `{{ ''.__class__.__mro__ }}` was blocked during prompt construction with `jinja2.exceptions.SecurityError`. - [x] A safe persona using nested `{{ stop_signal }}` placeholders did not raise `TemplateSyntaxError` or `SecurityError` and progressed beyond prompt rendering into a real Gemini model call. - [x] The safe run did not fully complete because the test key hit `429 RESOURCE_EXHAUSTED`, but the absence of template errors and the subsequent model call confirm the sandboxed nested rendering path is functioning as intended. ### Checklist - [x] I have read the [CONTRIBUTING.md](https://github.com/google/adk-python/blob/main/CONTRIBUTING.md) document. - [x] I have performed a self-review of my own code. - [ ] I have commented my code, particularly in hard-to-understand areas. - [x] I have added tests that prove my fix is effective or that my feature works. - [ ] New and existing unit tests pass locally with my changes. - [x] I have manually tested my changes end-to-end. - [x] Any dependent changes have been merged and published in downstream modules. ### Additional context This is a small, focused fix intended to close the nested-template sandbox bypass path without changing the supported nested placeholder behavior used by existing personas. Co-authored-by: George Weale <gweale@google.com> COPYBARA_INTEGRATE_REVIEW=#5266 from petrmarinec:fix-jinja-ssti-sandbox edc6364 PiperOrigin-RevId: 933966100
diff --git a/src/google/adk/evaluation/simulation/llm_backed_user_simulator_prompts.py b/src/google/adk/evaluation/simulation/llm_backed_user_simulator_prompts.py
@@ -185,7 +185,6 @@ def get_llm_backed_user_simulator_prompt(
   """Formats the prompt for the llm-backed user simulator"""
   from jinja2 import DictLoader
   from jinja2 import pass_context
-  from jinja2 import Template
   from jinja2.sandbox import SandboxedEnvironment
 
   templates = {
@@ -200,7 +199,7 @@ def get_llm_backed_user_simulator_prompt(
   def _render_string_filter(context, template_string):
     if not template_string:
       return ""
-    return Template(template_string).render(context)
+    return template_env.from_string(template_string).render(context.get_all())
 
   template_env.filters["render_string_filter"] = _render_string_filter
 
diff --git a/src/google/adk/evaluation/simulation/per_turn_user_simulator_quality_prompts.py b/src/google/adk/evaluation/simulation/per_turn_user_simulator_quality_prompts.py
@@ -221,9 +221,8 @@ def get_per_turn_user_simulator_quality_prompt(
 ):
   """Formats the prompt for the per turn user simulator evaluator"""
   from jinja2 import DictLoader
-  from jinja2 import Environment
   from jinja2 import pass_context
-  from jinja2 import Template
+  from jinja2.sandbox import SandboxedEnvironment
 
   templates = {
       "verifier_instructions": (
@@ -232,13 +231,13 @@ def get_per_turn_user_simulator_quality_prompt(
           )
       ),
   }
-  template_env = Environment(loader=DictLoader(templates))
+  template_env = SandboxedEnvironment(loader=DictLoader(templates))
 
   @pass_context
   def _render_string_filter(context, template_string):
     if not template_string:
       return ""
-    return Template(template_string).render(context)
+    return template_env.from_string(template_string).render(context.get_all())
 
   template_env.filters["render_string_filter"] = _render_string_filter
 
diff --git a/tests/unittests/evaluation/simulation/test_llm_backed_user_simulator_prompts.py b/tests/unittests/evaluation/simulation/test_llm_backed_user_simulator_prompts.py
@@ -21,6 +21,7 @@
 from google.adk.evaluation.simulation.llm_backed_user_simulator_prompts import is_valid_user_simulator_template
 from google.adk.evaluation.simulation.user_simulator_personas import UserBehavior
 from google.adk.evaluation.simulation.user_simulator_personas import UserPersona
+from jinja2.exceptions import SecurityError
 import pytest
 
 _MOCK_DEFAULT_TEMPLATE = textwrap.dedent("""\
@@ -208,6 +209,57 @@ def test_get_llm_backed_user_simulator_prompt_with_persona(self, mocker):
       test stop""").strip()
     assert prompt == expected_prompt
 
+  def test_get_llm_backed_user_simulator_prompt_renders_persona_templates_in_sandbox(
+      self,
+  ):
+    user_persona = UserPersona(
+        id="test_persona",
+        description="Test persona description",
+        behaviors=[
+            UserBehavior(
+                name="Behavior {{ stop_signal }}",
+                description="Description {{ stop_signal }}",
+                behavior_instructions=["instruction {{ stop_signal }}"],
+                violation_rubrics=["rubric 1"],
+            )
+        ],
+    )
+
+    prompt = get_llm_backed_user_simulator_prompt(
+        conversation_plan="test plan",
+        conversation_history="test history",
+        stop_signal="test stop",
+        user_persona=user_persona,
+    )
+
+    assert "## Behavior test stop" in prompt
+    assert "Description test stop" in prompt
+    assert "  * instruction test stop" in prompt
+
+  def test_get_llm_backed_user_simulator_prompt_blocks_unsafe_persona_templates(
+      self,
+  ):
+    user_persona = UserPersona(
+        id="test_persona",
+        description="Test persona description",
+        behaviors=[
+            UserBehavior(
+                name="{{ ''.__class__.__mro__ }}",
+                description="Test behavior description",
+                behavior_instructions=["instruction 1"],
+                violation_rubrics=["rubric 1"],
+            )
+        ],
+    )
+
+    with pytest.raises(SecurityError):
+      get_llm_backed_user_simulator_prompt(
+          conversation_plan="test plan",
+          conversation_history="test history",
+          stop_signal="test stop",
+          user_persona=user_persona,
+      )
+
 
 class TestIsValidUserSimulatorTemplate:
   """Test cases for is_valid_user_simulator_template."""
diff --git a/tests/unittests/evaluation/simulation/test_per_turn_user_simulation_quality_prompts.py b/tests/unittests/evaluation/simulation/test_per_turn_user_simulation_quality_prompts.py
@@ -20,6 +20,8 @@
 from google.adk.evaluation.simulation.per_turn_user_simulator_quality_prompts import get_per_turn_user_simulator_quality_prompt
 from google.adk.evaluation.simulation.user_simulator_personas import UserBehavior
 from google.adk.evaluation.simulation.user_simulator_personas import UserPersona
+from jinja2.exceptions import SecurityError
+import pytest
 
 _MOCK_DEFAULT_TEMPLATE = textwrap.dedent("""\
   Default template
@@ -182,3 +184,56 @@ def test_get_per_turn_user_simulator_quality_prompt_with_persona(
       # Stop signal
       stop""").strip()
     assert prompt == expected_prompt
+
+  def test_get_per_turn_user_simulator_quality_prompt_renders_persona_templates_in_sandbox(
+      self,
+  ):
+    persona = UserPersona(
+        id="test_persona",
+        description="Test persona description.",
+        behaviors=[
+            UserBehavior(
+                name="criteria {{ stop_signal }}",
+                description="Test behavior {{ stop_signal }}.",
+                behavior_instructions=["instruction1"],
+                violation_rubrics=["violation {{ stop_signal }}"],
+            )
+        ],
+    )
+
+    prompt = get_per_turn_user_simulator_quality_prompt(
+        conversation_plan="plan",
+        conversation_history="history",
+        generated_user_response="response",
+        stop_signal="stop",
+        user_persona=persona,
+    )
+
+    assert "## Criteria: criteria stop" in prompt
+    assert "Test behavior stop." in prompt
+    assert "  * violation stop" in prompt
+
+  def test_get_per_turn_user_simulator_quality_prompt_blocks_unsafe_persona_templates(
+      self,
+  ):
+    persona = UserPersona(
+        id="test_persona",
+        description="Test persona description.",
+        behaviors=[
+            UserBehavior(
+                name="{{ ''.__class__.__mro__ }}",
+                description="Test behavior description.",
+                behavior_instructions=["instruction1"],
+                violation_rubrics=["violation1"],
+            )
+        ],
+    )
+
+    with pytest.raises(SecurityError):
+      get_per_turn_user_simulator_quality_prompt(
+          conversation_plan="plan",
+          conversation_history="history",
+          generated_user_response="response",
+          stop_signal="stop",
+          user_persona=persona,
+      )