Skip to content

Commit 1fda297

Browse files
authored
fix(core): handle tool execution timeout/error causing IllegalStateException (#956)
ReActAgent throws IllegalStateException when tool calls timeout or fail, because no tool result is written to memory, leaving orphaned pending tool call states that crash the agent on subsequent requests. Root cause: - Tool execution timeout/error propagates without writing results to memory - Pending tool call state remains, blocking subsequent doCall() invocations - validateAndAddToolResults() throws when user message has no tool results Changes: - doCall(): detect pending tool calls without user-provided results and auto-generate error results to clear the pending state - executeToolCalls(): add onErrorResume to catch tool execution failures and generate error tool results instead of propagating exceptions - Add generateAndAddErrorToolResults() helper to create error results for orphaned pending tool calls This ensures the agent recovers gracefully from tool failures instead of crashing, and the model receives proper error feedback to continue processing. Closes #951 ## AgentScope-Java Version [The version of AgentScope-Java you are working on, e.g. 1.0.9, check your pom.xml dependency version or run `mvn dependency:tree | grep agentscope-parent:pom`(only mac/linux)] ## Description [Please describe the background, purpose, changes made, and how to test this PR] ## Checklist Please check the following items before code is ready to be reviewed. - [ ] Code has been formatted with `mvn spotless:apply` - [ ] All tests are passing (`mvn test`) - [ ] Javadoc comments are complete and follow project conventions - [ ] Related documentation has been updated (e.g. links, examples, etc.) - [ ] Code is ready for review
1 parent 0f48e3f commit 1fda297

3 files changed

Lines changed: 388 additions & 24 deletions

File tree

agentscope-core/src/main/java/io/agentscope/core/ReActAgent.java

Lines changed: 102 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import io.agentscope.core.hook.ActingChunkEvent;
2121
import io.agentscope.core.hook.Hook;
2222
import io.agentscope.core.hook.HookEvent;
23+
import io.agentscope.core.hook.PendingToolRecoveryHook;
2324
import io.agentscope.core.hook.PostActingEvent;
2425
import io.agentscope.core.hook.PostReasoningEvent;
2526
import io.agentscope.core.hook.PostSummaryEvent;
@@ -69,6 +70,7 @@
6970
import io.agentscope.core.tool.ToolExecutionContext;
7071
import io.agentscope.core.tool.ToolResultMessageBuilder;
7172
import io.agentscope.core.tool.Toolkit;
73+
import io.agentscope.core.util.ExceptionUtils;
7274
import io.agentscope.core.util.MessageUtils;
7375
import java.util.ArrayList;
7476
import java.util.Comparator;
@@ -268,9 +270,45 @@ protected Mono<Msg> doCall(List<Msg> msgs) {
268270
return executeIteration(0);
269271
}
270272

271-
// Has pending tools -> validate and add tool results
272-
validateAndAddToolResults(msgs, pendingIds);
273-
return hasPendingToolUse() ? acting(0) : executeIteration(0);
273+
// Has pending tools but no input -> resume (execute pending tools directly)
274+
if (msgs == null || msgs.isEmpty()) {
275+
return hasPendingToolUse() ? acting(0) : executeIteration(0);
276+
}
277+
278+
// Has pending tools + input -> check if user provided tool results
279+
List<ToolResultBlock> providedResults =
280+
msgs.stream()
281+
.flatMap(m -> m.getContentBlocks(ToolResultBlock.class).stream())
282+
.toList();
283+
284+
if (!providedResults.isEmpty()) {
285+
// User provided tool results -> validate and add
286+
validateAndAddToolResults(msgs, pendingIds);
287+
return hasPendingToolUse() ? acting(0) : executeIteration(0);
288+
}
289+
290+
// If PendingToolRecoveryHook is enabled, pending state should have been
291+
// patched during PreCallEvent. If we still reach here, the hook was disabled
292+
// and the user did not provide tool results — this is an unrecoverable state.
293+
throw new IllegalStateException(
294+
"Pending tool calls exist without results. "
295+
+ "Enable PendingToolRecoveryHook or provide tool results. "
296+
+ "Pending IDs: "
297+
+ pendingIds);
298+
}
299+
300+
/**
301+
* Build a {@link ToolResultBlock} representing a tool execution error.
302+
*
303+
* @param toolId the id of the tool call that failed
304+
* @param errorMessage the human-readable error description
305+
* @return a {@link ToolResultBlock} containing the formatted error message
306+
*/
307+
private static ToolResultBlock buildErrorToolResult(String toolId, String errorMessage) {
308+
return ToolResultBlock.builder()
309+
.id(toolId)
310+
.output(List.of(TextBlock.builder().text("[ERROR] " + errorMessage).build()))
311+
.build();
274312
}
275313

276314
/**
@@ -618,6 +656,10 @@ private Msg buildSuspendedMsg(List<Map.Entry<ToolUseBlock, ToolResultBlock>> pen
618656
/**
619657
* Execute tool calls and return paired results.
620658
*
659+
* <p>If tool execution fails (timeout, error, etc.), this method generates error tool results
660+
* for all pending tool calls instead of propagating the error. This ensures the agent can
661+
* continue processing and the model receives proper error feedback.
662+
*
621663
* @param toolCalls The list of tool calls (potentially modified by PreActingEvent hooks)
622664
* @return Mono containing list of (ToolUseBlock, ToolResultBlock) pairs
623665
*/
@@ -628,7 +670,37 @@ private Mono<List<Map.Entry<ToolUseBlock, ToolResultBlock>>> executeToolCalls(
628670
results ->
629671
IntStream.range(0, toolCalls.size())
630672
.mapToObj(i -> Map.entry(toolCalls.get(i), results.get(i)))
631-
.toList());
673+
.toList())
674+
.onErrorResume(
675+
Exception.class,
676+
error -> {
677+
// Preserve interruption signal for agent stop policy
678+
if (error instanceof InterruptedException) {
679+
return Mono.error(error);
680+
}
681+
// Generate error tool results for all pending tool calls.
682+
// Only catch Exception subclasses; critical JVM errors
683+
// (e.g. OutOfMemoryError) are left to propagate.
684+
String errorMsg = ExceptionUtils.getErrorMessage(error);
685+
log.error(
686+
"Tool execution failed, generating error results for {} tool"
687+
+ " calls",
688+
toolCalls.size(),
689+
error);
690+
List<Map.Entry<ToolUseBlock, ToolResultBlock>> errorResults =
691+
toolCalls.stream()
692+
.map(
693+
toolCall -> {
694+
ToolResultBlock errorResult =
695+
buildErrorToolResult(
696+
toolCall.getId(),
697+
"Tool execution failed: "
698+
+ errorMsg);
699+
return Map.entry(toolCall, errorResult);
700+
})
701+
.toList();
702+
return Mono.just(errorResults);
703+
});
632704
}
633705

634706
/**
@@ -1043,6 +1115,7 @@ public static class Builder {
10431115
private PlanNotebook planNotebook;
10441116
private SkillBox skillBox;
10451117
private ToolExecutionContext toolExecutionContext;
1118+
private boolean enablePendingToolRecovery = false;
10461119

10471120
// Long-term memory configuration
10481121
private LongTermMemory longTermMemory;
@@ -1181,6 +1254,26 @@ public Builder enableMetaTool(boolean enableMetaTool) {
11811254
return this;
11821255
}
11831256

1257+
/**
1258+
* Enables or disables automatic recovery from orphaned pending tool calls.
1259+
*
1260+
* <p>When enabled , a {@link PendingToolRecoveryHook} is automatically
1261+
* registered to detect and patch orphaned pending tool calls with synthetic error
1262+
* results before agent processing begins. This prevents {@link IllegalStateException}
1263+
* when tool execution fails, times out, or is interrupted.
1264+
*
1265+
* <p>Disable this if you prefer to handle pending tool calls manually, for example
1266+
* through HITL (Human-in-the-loop) mechanisms or custom error handling strategies.
1267+
*
1268+
* @param enable true to enable auto-recovery, false to disable
1269+
* @return This builder instance for method chaining
1270+
* @see PendingToolRecoveryHook
1271+
*/
1272+
public Builder enablePendingToolRecovery(boolean enable) {
1273+
this.enablePendingToolRecovery = enable;
1274+
return this;
1275+
}
1276+
11841277
/**
11851278
* Sets the execution configuration for model API calls.
11861279
*
@@ -1449,6 +1542,11 @@ public ReActAgent build() {
14491542
agentToolkit.registerMetaTool();
14501543
}
14511544

1545+
// Register PendingToolRecoveryHook if enabled
1546+
if (enablePendingToolRecovery) {
1547+
hooks.add(new PendingToolRecoveryHook());
1548+
}
1549+
14521550
// Configure long-term memory if provided
14531551
if (longTermMemory != null) {
14541552
configureLongTermMemory(agentToolkit);
Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
/*
2+
* Copyright 2024-2026 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package io.agentscope.core.hook;
17+
18+
import io.agentscope.core.ReActAgent;
19+
import io.agentscope.core.agent.Agent;
20+
import io.agentscope.core.memory.Memory;
21+
import io.agentscope.core.message.Msg;
22+
import io.agentscope.core.message.MsgRole;
23+
import io.agentscope.core.message.TextBlock;
24+
import io.agentscope.core.message.ToolResultBlock;
25+
import io.agentscope.core.message.ToolUseBlock;
26+
import io.agentscope.core.tool.ToolResultMessageBuilder;
27+
import java.util.List;
28+
import java.util.Set;
29+
import java.util.stream.Collectors;
30+
import org.slf4j.Logger;
31+
import org.slf4j.LoggerFactory;
32+
import reactor.core.publisher.Mono;
33+
34+
/**
35+
* Hook that automatically recovers from orphaned pending tool calls by generating error
36+
* {@link ToolResultBlock}s before the agent processes new input.
37+
*
38+
* <p>When tool execution fails, times out, or is interrupted, tool call states may remain in
39+
* memory without corresponding results. This hook detects such orphaned pending tool calls at
40+
* {@link PreCallEvent} time and patches them with synthetic error results, allowing the agent
41+
* to continue processing instead of crashing with {@link IllegalStateException}.
42+
*
43+
* <p>This hook is registered by default in {@link ReActAgent.Builder}. Users can disable it
44+
* via {@link ReActAgent.Builder#enablePendingToolRecovery(boolean)} if they prefer to handle
45+
* pending tool calls manually (e.g., through HITL mechanisms).
46+
*
47+
* <p><b>Behavior:</b>
48+
* <ul>
49+
* <li>Only activates when the agent is a {@link ReActAgent}</li>
50+
* <li>Only patches when pending tool calls exist AND user input does not contain
51+
* {@link ToolResultBlock}s (i.e., user is not providing results themselves)</li>
52+
* <li>Generated error results are added to memory as TOOL-role messages</li>
53+
* </ul>
54+
*
55+
* @see ReActAgent
56+
* @see PreCallEvent
57+
*/
58+
public class PendingToolRecoveryHook implements Hook {
59+
60+
private static final Logger log = LoggerFactory.getLogger(PendingToolRecoveryHook.class);
61+
62+
@Override
63+
public <T extends HookEvent> Mono<T> onEvent(T event) {
64+
if (event instanceof PreCallEvent preCallEvent) {
65+
@SuppressWarnings("unchecked")
66+
Mono<T> result = (Mono<T>) handlePreCall(preCallEvent);
67+
return result;
68+
}
69+
return Mono.just(event);
70+
}
71+
72+
@Override
73+
public int priority() {
74+
// High priority — must run before other hooks that depend on memory state
75+
return 10;
76+
}
77+
78+
/**
79+
* Detect and patch orphaned pending tool calls before agent processing begins.
80+
*
81+
* @param event the PreCallEvent containing agent and input messages
82+
* @return Mono containing the unmodified event after patching is complete
83+
*/
84+
private Mono<PreCallEvent> handlePreCall(PreCallEvent event) {
85+
Agent agent = event.getAgent();
86+
if (!(agent instanceof ReActAgent reactAgent)) {
87+
return Mono.just(event);
88+
}
89+
90+
Memory memory = reactAgent.getMemory();
91+
if (memory == null) {
92+
return Mono.just(event);
93+
}
94+
95+
// Find pending tool call IDs (tool calls without corresponding results)
96+
Set<String> pendingIds = findPendingToolUseIds(memory);
97+
if (pendingIds.isEmpty()) {
98+
return Mono.just(event);
99+
}
100+
101+
// Check if user already provided tool results in the input
102+
List<Msg> inputMessages = event.getInputMessages();
103+
104+
// If input is empty/null, the user is resuming (wants to continue acting).
105+
// Do NOT patch — let ReActAgent's doCall handle the resume flow.
106+
if (inputMessages == null || inputMessages.isEmpty()) {
107+
return Mono.just(event);
108+
}
109+
110+
boolean userProvidedResults =
111+
inputMessages.stream().anyMatch(m -> m.hasContentBlocks(ToolResultBlock.class));
112+
if (userProvidedResults) {
113+
return Mono.just(event);
114+
}
115+
116+
// Auto-patch: generate error tool results for orphaned pending tool calls
117+
log.warn(
118+
"Pending tool calls detected without results, auto-generating error results."
119+
+ " Pending IDs: {}",
120+
pendingIds);
121+
122+
patchPendingToolCalls(reactAgent, memory, pendingIds);
123+
return Mono.just(event);
124+
}
125+
126+
/**
127+
* Find tool call IDs from the last assistant message that have no corresponding
128+
* {@link ToolResultBlock} in memory.
129+
*
130+
* @param memory the agent's memory
131+
* @return set of pending tool use IDs, empty if none
132+
*/
133+
private Set<String> findPendingToolUseIds(Memory memory) {
134+
List<Msg> messages = memory.getMessages();
135+
136+
// Find last assistant message
137+
Msg lastAssistant = null;
138+
for (int i = messages.size() - 1; i >= 0; i--) {
139+
if (messages.get(i).getRole() == MsgRole.ASSISTANT) {
140+
lastAssistant = messages.get(i);
141+
break;
142+
}
143+
}
144+
145+
if (lastAssistant == null || !lastAssistant.hasContentBlocks(ToolUseBlock.class)) {
146+
return Set.of();
147+
}
148+
149+
// Collect all existing tool result IDs in memory
150+
Set<String> existingResultIds =
151+
messages.stream()
152+
.flatMap(m -> m.getContentBlocks(ToolResultBlock.class).stream())
153+
.map(ToolResultBlock::getId)
154+
.collect(Collectors.toSet());
155+
156+
// Return tool call IDs that have no result yet
157+
return lastAssistant.getContentBlocks(ToolUseBlock.class).stream()
158+
.map(ToolUseBlock::getId)
159+
.filter(id -> !existingResultIds.contains(id))
160+
.collect(Collectors.toSet());
161+
}
162+
163+
/**
164+
* Generate error {@link ToolResultBlock}s for each pending tool call and add them
165+
* to memory as TOOL-role messages.
166+
*
167+
* @param agent the ReActAgent instance
168+
* @param memory the agent's memory
169+
* @param pendingIds the set of pending tool use IDs to patch
170+
*/
171+
private void patchPendingToolCalls(ReActAgent agent, Memory memory, Set<String> pendingIds) {
172+
List<Msg> messages = memory.getMessages();
173+
174+
// Find last assistant message to get ToolUseBlock details
175+
Msg lastAssistant = null;
176+
for (int i = messages.size() - 1; i >= 0; i--) {
177+
if (messages.get(i).getRole() == MsgRole.ASSISTANT) {
178+
lastAssistant = messages.get(i);
179+
break;
180+
}
181+
}
182+
if (lastAssistant == null) {
183+
return;
184+
}
185+
186+
List<ToolUseBlock> pendingToolCalls =
187+
lastAssistant.getContentBlocks(ToolUseBlock.class).stream()
188+
.filter(toolUse -> pendingIds.contains(toolUse.getId()))
189+
.toList();
190+
191+
for (ToolUseBlock toolCall : pendingToolCalls) {
192+
ToolResultBlock errorResult = buildErrorToolResult(toolCall);
193+
Msg toolResultMsg =
194+
ToolResultMessageBuilder.buildToolResultMsg(
195+
errorResult, toolCall, agent.getName());
196+
memory.addMessage(toolResultMsg);
197+
198+
log.info(
199+
"Auto-generated error result for pending tool call: {} ({})",
200+
toolCall.getName(),
201+
toolCall.getId());
202+
}
203+
}
204+
205+
/**
206+
* Build an error {@link ToolResultBlock} for a failed or orphaned tool call.
207+
*
208+
* @param toolCall the tool call that has no result
209+
* @return a ToolResultBlock containing a formatted error message
210+
*/
211+
private static ToolResultBlock buildErrorToolResult(ToolUseBlock toolCall) {
212+
return ToolResultBlock.builder()
213+
.id(toolCall.getId())
214+
.output(
215+
List.of(
216+
TextBlock.builder()
217+
.text(
218+
"[ERROR] Previous tool execution failed or was"
219+
+ " interrupted. Tool: "
220+
+ toolCall.getName())
221+
.build()))
222+
.build();
223+
}
224+
}

0 commit comments

Comments
 (0)