fix: terminate infinite retry loop in LoadSkillResourceTool on RESOURCE_NOT_FOUND

google-genai-bot · xuanyang15 · commit bc45ee67cd34 · 2026-05-28T13:51:25.000-07:00
Merge #5651 Fixes #5652 ORIGINAL_AUTHOR=Botta Venkata Leela Raman Raj <110351568+Raman369AI@users.noreply.github.com> GitOrigin-RevId: e25c08e Change-Id: Ie5c151b22674845d7e92afbc88422ad873d3cd55
diff --git a/src/google/adk/tools/skill_toolset.py b/src/google/adk/tools/skill_toolset.py
@@ -83,10 +83,14 @@ def _build_skill_system_instruction(prefix: str | None = None) -> str:
       "of them in order.\n"
       f"3. The `{p}load_skill_resource` tool is for viewing files within a "
       "skill's directory (e.g., `references/*`, `assets/*`, `scripts/*`). "
-      "Do NOT use other tools to access these files.\n"
+      "It is ONLY for skill-bundled files — do NOT use it to access "
+      "documents or files provided by the user at runtime. Do NOT use "
+      "other tools to access skill files.\n"
       f"4. Use `{p}run_skill_script` to run scripts from a skill's `scripts/` "
       f"directory. Use `{p}load_skill_resource` to view script content first if "
       "needed.\n"
+      f"5. If `{p}load_skill_resource` returns any error, do not retry any "
+      "path. Report the error to the user and stop.\n"
   )
 
 
@@ -348,6 +352,23 @@ async def run_async(
       }
 
     if content is None:
+      # Invocation-scoped failure counter. Counts RESOURCE_NOT_FOUND across ALL
+      # paths so the guard fires even when the LLM hallucinates a different path
+      # on each retry. The `temp:` prefix prevents persistence to durable
+      # session storage; invocation_id isolates in-memory backends.
+      counter_key = f"temp:_adk_skill_resource_not_found_count_{tool_context.invocation_id}"
+      fail_count = int(tool_context.state.get(counter_key) or 0) + 1
+      tool_context.state[counter_key] = fail_count
+      if fail_count > 1:
+        return {
+            "error": (
+                f"Resource '{file_path}' not found in skill '{skill_name}'."
+                f" This is resource lookup failure #{fail_count} this"
+                " invocation. Do not retry any path — report the error to"
+                " the user and stop."
+            ),
+            "error_code": "RESOURCE_NOT_FOUND_FATAL",
+        }
       return {
           "error": f"Resource '{file_path}' not found in skill '{skill_name}'.",
           "error_code": "RESOURCE_NOT_FOUND",
diff --git a/tests/unittests/tools/test_skill_toolset.py b/tests/unittests/tools/test_skill_toolset.py
@@ -332,16 +332,10 @@ async def test_load_skill_run_async_state_none(
                 "error_code": "SKILL_NOT_FOUND",
             },
         ),
-        (
-            {"skill_name": "skill1", "file_path": "references/other.md"},
-            {
-                "error": (
-                    "Resource 'references/other.md' not found in skill"
-                    " 'skill1'."
-                ),
-                "error_code": "RESOURCE_NOT_FOUND",
-            },
-        ),
+        # RESOURCE_NOT_FOUND is tested separately in
+        # test_load_resource_first_missing_returns_soft_error because the
+        # counter guard requires a real state dict (mock state.get() returns a
+        # truthy MagicMock that int() coerces to 1, skipping the soft path).
         (
             {"skill_name": "skill1", "file_path": "invalid/path.txt"},
             {
@@ -479,27 +473,149 @@ def test_duplicate_skill_name_raises(mock_skill1):
 
 
 @pytest.mark.asyncio
-async def test_scripts_resource_not_found(mock_skill1, tool_context_instance):
+async def test_scripts_resource_not_found(mock_skill1):
   toolset = skill_toolset.SkillToolset([mock_skill1])
   tool = skill_toolset.LoadSkillResourceTool(toolset)
+  ctx = _make_tool_context_with_agent()
   result = await tool.run_async(
       args={"skill_name": "skill1", "file_path": "scripts/nonexistent.sh"},
-      tool_context=tool_context_instance,
+      tool_context=ctx,
   )
   assert result["error_code"] == "RESOURCE_NOT_FOUND"
 
 
+@pytest.mark.asyncio
+async def test_load_resource_first_missing_returns_soft_error(mock_skill1):
+  """First RESOURCE_NOT_FOUND in an invocation returns the soft error code."""
+  toolset = skill_toolset.SkillToolset([mock_skill1])
+  tool = skill_toolset.LoadSkillResourceTool(toolset)
+  ctx = _make_tool_context_with_agent()
+  result = await tool.run_async(
+      args={"skill_name": "skill1", "file_path": "references/other.md"},
+      tool_context=ctx,
+  )
+  assert result["error_code"] == "RESOURCE_NOT_FOUND"
+  assert result["error"] == (
+      "Resource 'references/other.md' not found in skill 'skill1'."
+  )
+
+
+@pytest.mark.asyncio
+async def test_load_resource_repeated_failure_escalates_to_fatal(mock_skill1):
+  """Any second RESOURCE_NOT_FOUND within an invocation returns RESOURCE_NOT_FOUND_FATAL."""
+  toolset = skill_toolset.SkillToolset([mock_skill1])
+  tool = skill_toolset.LoadSkillResourceTool(toolset)
+  ctx = _make_tool_context_with_agent()
+
+  args = {"skill_name": "skill1", "file_path": "references/nonexistent.md"}
+
+  result1 = await tool.run_async(args=args, tool_context=ctx)
+  assert result1["error_code"] == "RESOURCE_NOT_FOUND"
+
+  result2 = await tool.run_async(args=args, tool_context=ctx)
+  assert result2["error_code"] == "RESOURCE_NOT_FOUND_FATAL"
+  assert "Do not retry" in result2["error"]
+  assert "stop" in result2["error"].lower()
+  assert "failure #2" in result2["error"]
+
+
+@pytest.mark.asyncio
+async def test_load_resource_different_path_also_escalates_to_fatal(
+    mock_skill1,
+):
+  """A different missing path on the second call still escalates to RESOURCE_NOT_FOUND_FATAL.
+
+  The counter is path-agnostic: any second not-found within the same invocation
+  is fatal, even when the LLM hallucinates a different path on each retry.
+  """
+  toolset = skill_toolset.SkillToolset([mock_skill1])
+  tool = skill_toolset.LoadSkillResourceTool(toolset)
+  ctx = _make_tool_context_with_agent()
+
+  result1 = await tool.run_async(
+      args={"skill_name": "skill1", "file_path": "references/missing_a.md"},
+      tool_context=ctx,
+  )
+  assert result1["error_code"] == "RESOURCE_NOT_FOUND"
+
+  result2 = await tool.run_async(
+      args={"skill_name": "skill1", "file_path": "references/missing_b.md"},
+      tool_context=ctx,
+  )
+  assert result2["error_code"] == "RESOURCE_NOT_FOUND_FATAL"
+  assert "Do not retry" in result2["error"]
+
+
+@pytest.mark.asyncio
+async def test_load_resource_failures_isolated_per_invocation(mock_skill1):
+  """Failure counter does not leak across invocations.
+
+  A RESOURCE_NOT_FOUND in invocation A must not increment invocation B's
+  counter; invocation B's first missing-resource call must still return the
+  soft error, even when both invocations share the same session state dict.
+  """
+  toolset = skill_toolset.SkillToolset([mock_skill1])
+  tool = skill_toolset.LoadSkillResourceTool(toolset)
+
+  shared_state = {}
+  ctx_a = _make_tool_context_with_agent(invocation_id="inv_a")
+  ctx_a.state = shared_state
+  ctx_b = _make_tool_context_with_agent(invocation_id="inv_b")
+  ctx_b.state = shared_state
+
+  # invocation A: one failure — counter for inv_a reaches 1 (soft).
+  result_a = await tool.run_async(
+      args={"skill_name": "skill1", "file_path": "references/typo.md"},
+      tool_context=ctx_a,
+  )
+  assert result_a["error_code"] == "RESOURCE_NOT_FOUND"
+
+  # invocation B, first attempt (different path) — counter for inv_b = 1 (soft).
+  result_b1 = await tool.run_async(
+      args={"skill_name": "skill1", "file_path": "references/typo.md"},
+      tool_context=ctx_b,
+  )
+  assert result_b1["error_code"] == "RESOURCE_NOT_FOUND"
+
+  # invocation B, second attempt (different path) — counter for inv_b = 2 (fatal).
+  result_b2 = await tool.run_async(
+      args={"skill_name": "skill1", "file_path": "references/other.md"},
+      tool_context=ctx_b,
+  )
+  assert result_b2["error_code"] == "RESOURCE_NOT_FOUND_FATAL"
+
+
+@pytest.mark.asyncio
+async def test_load_resource_counter_uses_temp_prefix(mock_skill1):
+  """Failure-counter key uses the `temp:` prefix so it is not persisted."""
+  toolset = skill_toolset.SkillToolset([mock_skill1])
+  tool = skill_toolset.LoadSkillResourceTool(toolset)
+  ctx = _make_tool_context_with_agent()
+
+  await tool.run_async(
+      args={"skill_name": "skill1", "file_path": "references/missing.md"},
+      tool_context=ctx,
+  )
+
+  # The counter key must start with `temp:` so it is trimmed from the event
+  # delta and never reaches durable storage.
+  guard_keys = [k for k in ctx.state if "skill_resource_not_found_count" in k]
+  assert guard_keys, "Failure counter did not write a tracking key."
+  assert all(k.startswith("temp:") for k in guard_keys)
+
+
 # RunSkillScriptTool tests
 
 
-def _make_tool_context_with_agent(agent=None):
+def _make_tool_context_with_agent(agent=None, invocation_id="test_invocation"):
   """Creates a mock ToolContext with _invocation_context.agent."""
   ctx = mock.MagicMock(spec=tool_context.ToolContext)
   ctx._invocation_context = mock.MagicMock()
   ctx._invocation_context.agent = agent or mock.MagicMock()
   ctx._invocation_context.agent.name = "test_agent"
   ctx._invocation_context.agent_states = {}
   ctx.agent_name = "test_agent"
+  ctx.invocation_id = invocation_id
   ctx.state = {}
   return ctx