Skip to content

Commit f52e8cd

Browse files
authored
feat(agent): classify structured agent send errors (#356)
## Summary - Add a public agent stream error taxonomy with ownership, code, retry, feedback, and resolution metadata. - Convert accepted-turn send failures and empty provider replies into structured `message.stream` error events while preserving turn completion semantics. - Classify user agent, user LLM provider, AionUI, and unknown upstream failures with priority-aware matching and redacted technical detail. - Preserve mainline channel warmup/stream subscription behavior and adapt affected mocks/tests to `AgentSendError`. ## Verification - [x] `just push --force-with-lease origin agent-error-taxonomy` (cargo fix, clippy fix, fmt, nextest, push) - [x] `cargo nextest run --workspace` via `just push`: 5735 passed, 18 skipped - [x] `cargo test -p aionui-channel`: 204 unit tests plus channel integration tests passed
1 parent 3481956 commit f52e8cd

33 files changed

Lines changed: 1762 additions & 121 deletions

crates/aionui-ai-agent/src/agent_runtime.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use std::sync::{Arc, RwLock};
1414

1515
use tokio::sync::broadcast;
1616

17+
use aionui_api_types::AgentStreamErrorData;
1718
use aionui_common::{ConversationStatus, TimestampMs, now_ms};
1819

1920
use crate::protocol::events::{AgentStreamEvent, ErrorEventData, FinishEventData};
@@ -127,6 +128,12 @@ impl AgentRuntime {
127128
/// Atomic: set status ← Finished AND broadcast `Error { message }`.
128129
/// Idempotent in the Finished absorbing state (no-op).
129130
pub fn emit_error(&self, message: impl Into<String>) {
131+
self.emit_error_data(ErrorEventData::legacy(message, None));
132+
}
133+
134+
/// Atomic: set status ← Finished AND broadcast the structured error payload.
135+
/// Idempotent in the Finished absorbing state (no-op).
136+
pub fn emit_error_data(&self, data: AgentStreamErrorData) {
130137
let already_finished = {
131138
let mut guard = self.status.write().unwrap_or_else(|e| e.into_inner());
132139
let was_finished = matches!(*guard, Some(ConversationStatus::Finished));
@@ -138,10 +145,7 @@ impl AgentRuntime {
138145
if already_finished {
139146
return;
140147
}
141-
let _ = self.event_tx.send(AgentStreamEvent::Error(ErrorEventData {
142-
message: message.into(),
143-
code: None,
144-
}));
148+
let _ = self.event_tx.send(AgentStreamEvent::Error(data));
145149
}
146150
}
147151

crates/aionui-ai-agent/src/agent_task.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ use crate::manager::nanobot::NanobotAgentManager;
2222
use crate::manager::openclaw::OpenClawAgentManager;
2323
use crate::manager::remote::RemoteAgentManager;
2424
use crate::protocol::events::AgentStreamEvent;
25+
use crate::protocol::send_error::AgentSendError;
2526
use crate::types::SendMessageData;
2627

2728
use aionui_api_types::{
@@ -62,7 +63,7 @@ pub trait IAgentTask: Send + Sync {
6263
/// Send a user message to the agent. Returns once the agent has
6364
/// accepted the turn; actual streaming proceeds on the broadcast
6465
/// channel returned by [`Self::subscribe`].
65-
async fn send_message(&self, data: SendMessageData) -> Result<(), AppError>;
66+
async fn send_message(&self, data: SendMessageData) -> Result<(), AgentSendError>;
6667

6768
/// Stop the current streaming response without killing the agent.
6869
async fn cancel(&self) -> Result<(), AppError>;
@@ -220,7 +221,7 @@ impl AgentInstance {
220221
}
221222

222223
/// Send a user message to the agent.
223-
pub async fn send_message(&self, data: SendMessageData) -> Result<(), AppError> {
224+
pub async fn send_message(&self, data: SendMessageData) -> Result<(), AgentSendError> {
224225
self.as_task().send_message(data).await
225226
}
226227

crates/aionui-ai-agent/src/capability/backend_output_sink.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -131,10 +131,9 @@ impl OutputSink for BackendOutputSink {
131131
}
132132

133133
fn emit_error(&self, msg: &str) {
134-
let _ = self.event_tx.send(AgentStreamEvent::Error(ErrorEventData {
135-
message: msg.to_owned(),
136-
code: None,
137-
}));
134+
let _ = self
135+
.event_tx
136+
.send(AgentStreamEvent::Error(ErrorEventData::legacy(msg, None)));
138137
}
139138

140139
fn emit_info(&self, msg: &str) {

crates/aionui-ai-agent/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ pub use factory::{AgentFactoryDeps, build_agent_factory};
3232
pub use idle_scanner::start_idle_scanner;
3333
pub use persistence::AcpSessionSyncService;
3434
pub use protocol::events::AgentStreamEvent;
35+
pub use protocol::send_error::AgentSendError;
3536
pub use registry::{AgentRegistry, UnavailableReason};
3637
pub use routes::{AgentRouterState, RemoteAgentRouterState, agent_routes, remote_agent_routes};
3738
pub use services::AgentService;

crates/aionui-ai-agent/src/manager/acp/agent.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use crate::manager::process_registry::{register_session_process, unregister_agen
1111
use crate::protocol::acp::AcpProtocol;
1212
use crate::protocol::error::{AcpError, CloseReason};
1313
use crate::protocol::events::{AgentStreamEvent, FinishEventData};
14+
use crate::protocol::send_error::AgentSendError;
1415
use crate::registry::CatalogSender;
1516
use crate::shared_kernel::{ModeId, ModelId, SessionId as DomainSessionId};
1617
use crate::types::SendMessageData;
@@ -605,17 +606,17 @@ impl crate::agent_task::IAgentTask for AcpAgentManager {
605606
}
606607

607608
#[tracing::instrument(skip_all, fields(conversation_id = %self.params.conversation_id, msg_id = %data.msg_id))]
608-
async fn send_message(&self, data: SendMessageData) -> Result<(), AppError> {
609+
async fn send_message(&self, data: SendMessageData) -> Result<(), AgentSendError> {
609610
self.runtime.bump_activity();
610611

611-
let result = self.ensure_session_and_send(&data).await;
612-
match &result {
612+
match self.ensure_session_and_send(&data).await {
613613
Ok(()) => {
614614
info!("ACP send_message completed");
615615
// ACP pattern: Finish with session_id = None (default).
616616
// If ACP later wants to include the session_id in Finish,
617617
// read it from `self.session.read().await.session_id()`.
618618
self.runtime.emit_finish(None);
619+
Ok(())
619620
}
620621
Err(err) => {
621622
// Build a CloseReason that captures whatever context we still
@@ -628,22 +629,23 @@ impl crate::agent_task::IAgentTask for AcpAgentManager {
628629
// stderr-augmentation heuristic for the SDK's "default
629630
// Internal error" shape; otherwise the user-facing form
630631
// of the AppError is the best we can do.
631-
let close_reason = self.build_close_reason_from_error(err).await;
632+
let close_reason = self.build_close_reason_from_error(&err).await;
632633

633634
// Operator log: full error chain + the (raw, pre-redaction)
634635
// stderr peek so on-call can correlate. The redacted summary
635636
// is what reaches the UI.
636637
let summary = close_reason.user_facing_message();
637-
warn!(error = %ErrorChain(err), close_reason_summary = %summary, "ACP send_message failed");
638+
error!(error = %ErrorChain(&err), close_reason_summary = %summary, "ACP send_message failed");
638639

639640
{
640641
let mut session = self.session.write().await;
641642
session.record_close_reason(Some(close_reason));
642643
}
643-
self.runtime.emit_error(summary);
644+
let send_error = AgentSendError::from_app_error(err);
645+
self.runtime.emit_error_data(send_error.stream_error().clone());
646+
Err(send_error)
644647
}
645648
}
646-
result
647649
}
648650

649651
#[tracing::instrument(skip_all, fields(conversation_id = %self.params.conversation_id))]

crates/aionui-ai-agent/src/manager/acp/agent_session_flow.rs

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ use crate::protocol::events::{
77
use crate::shared_kernel::SessionId as DomainSessionId;
88
use crate::types::SendMessageData;
99
use agent_client_protocol::schema::{ContentBlock, LoadSessionRequest, PromptRequest, SessionId, StopReason};
10+
use aionui_api_types::{
11+
AgentErrorCode, AgentErrorOwnership, AgentErrorResolution, AgentErrorResolutionKind, AgentErrorResolutionTarget,
12+
};
1013
use aionui_common::AppError;
1114
use serde_json::Value;
1215
use tokio::sync::broadcast::error::TryRecvError;
@@ -259,13 +262,9 @@ impl AcpAgentManager {
259262
// user already initiated the cancel and doesn't need a second
260263
// notification.
261264
if !matches!(prompt_response.stop_reason, StopReason::Cancelled) && is_empty_turn(&mut probe_rx) {
262-
self.runtime.emit(AgentStreamEvent::Error(ErrorEventData {
263-
// TODO(i18n): wire to a frontend translation key once a
264-
// pattern is established. For now this is the user-facing
265-
// English string.
266-
message: empty_finish_diagnostic_message(prompt_response.stop_reason),
267-
code: Some("acp.empty_finish".into()),
268-
}));
265+
self.runtime.emit(AgentStreamEvent::Error(empty_finish_diagnostic_error(
266+
prompt_response.stop_reason,
267+
)));
269268
}
270269

271270
// Emit Finish event
@@ -377,6 +376,24 @@ fn event_is_user_visible_output(event: &AgentStreamEvent) -> bool {
377376
)
378377
}
379378

379+
fn empty_finish_diagnostic_error(stop_reason: StopReason) -> ErrorEventData {
380+
ErrorEventData::classified(
381+
// TODO(i18n): wire to a frontend translation key once a
382+
// pattern is established. For now this is the user-facing
383+
// English string.
384+
empty_finish_diagnostic_message(stop_reason),
385+
AgentErrorCode::UnknownUpstreamError,
386+
AgentErrorOwnership::UnknownUpstream,
387+
Some("Agent completed the turn without producing visible output.".into()),
388+
true,
389+
true,
390+
Some(AgentErrorResolution::new(
391+
AgentErrorResolutionKind::SendFeedback,
392+
Some(AgentErrorResolutionTarget::Feedback),
393+
)),
394+
)
395+
}
396+
380397
/// Build the user-facing message shown when the agent finished a turn
381398
/// without emitting any output. Wording is deliberately concrete so the
382399
/// user has something to act on (retry, reword, check provider).
@@ -580,6 +597,7 @@ mod tests {
580597
ToolCallStatus,
581598
};
582599
use agent_client_protocol::schema::StopReason;
600+
use aionui_api_types::{AgentErrorResolutionKind, AgentErrorResolutionTarget};
583601
use tokio::sync::broadcast;
584602

585603
/// Lifecycle-only events (`Start`/`Finish`) must NOT count as
@@ -671,4 +689,15 @@ mod tests {
671689
let refusal = super::empty_finish_diagnostic_message(StopReason::Refusal);
672690
assert!(refusal.to_lowercase().contains("refused"));
673691
}
692+
693+
#[test]
694+
fn empty_finish_diagnostic_error_has_feedback_resolution() {
695+
let error = super::empty_finish_diagnostic_error(StopReason::EndTurn);
696+
697+
let resolution = error
698+
.resolution
699+
.expect("empty-finish classified errors must include a resolution");
700+
assert_eq!(resolution.kind, AgentErrorResolutionKind::SendFeedback);
701+
assert_eq!(resolution.target, Some(AgentErrorResolutionTarget::Feedback));
702+
}
674703
}

crates/aionui-ai-agent/src/manager/aionrs/agent.rs

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use crate::agent_runtime::AgentRuntime;
2121
use crate::capability::backend_output_sink::BackendOutputSink;
2222
use crate::capability::backend_protocol_sink::BackendProtocolSink;
2323
use crate::protocol::events::AgentStreamEvent;
24+
use crate::protocol::send_error::AgentSendError;
2425
use crate::types::{AionrsResolvedConfig, SendMessageData};
2526

2627
pub struct AionrsAgentManager {
@@ -185,7 +186,7 @@ impl crate::agent_task::IAgentTask for AionrsAgentManager {
185186
self.runtime.subscribe()
186187
}
187188

188-
async fn send_message(&self, data: SendMessageData) -> Result<(), AppError> {
189+
async fn send_message(&self, data: SendMessageData) -> Result<(), AgentSendError> {
189190
let started_at = now_ms();
190191
info!(
191192
conversation_id = %self.runtime.conversation_id(),
@@ -230,9 +231,10 @@ impl crate::agent_task::IAgentTask for AionrsAgentManager {
230231
error = %ErrorChain(&e),
231232
"Aionrs engine.run() failed, emitting Error+Finish"
232233
);
233-
self.runtime.emit_error(error_msg.clone());
234+
let send_error = aionrs_engine_error_to_send_error(error_msg);
235+
self.runtime.emit_error_data(send_error.stream_error().clone());
234236
self.runtime.emit_finish(None);
235-
Err(AppError::Internal(error_msg))
237+
Err(send_error)
236238
}
237239
None => {
238240
self.runtime.emit_error("Stopped by user");
@@ -353,6 +355,14 @@ fn parse_session_mode(s: &str) -> SessionMode {
353355
}
354356
}
355357

358+
fn aionrs_engine_error_to_send_error(error_msg: String) -> AgentSendError {
359+
let lower = error_msg.to_ascii_lowercase();
360+
if lower.contains("provider error") || lower.contains("provider:") {
361+
return AgentSendError::from_app_error(AppError::BadGateway(error_msg));
362+
}
363+
AgentSendError::from_app_error(AppError::Internal(error_msg))
364+
}
365+
356366
#[cfg(test)]
357367
mod tests {
358368
use super::*;
@@ -488,4 +498,22 @@ mod tests {
488498
other => panic!("Expected Finish, got {:?}", other),
489499
}
490500
}
501+
502+
#[test]
503+
fn aionrs_provider_connection_error_is_user_llm_provider_error() {
504+
let send_error = aionrs_engine_error_to_send_error(
505+
"Aionrs agent error: Provider error: Connection error: Signable request error: failed to create canonical request"
506+
.to_owned(),
507+
);
508+
509+
assert_eq!(
510+
send_error.code(),
511+
Some(aionui_api_types::AgentErrorCode::UserLlmProviderConfigError)
512+
);
513+
assert_eq!(
514+
send_error.ownership(),
515+
Some(aionui_api_types::AgentErrorOwnership::UserLlmProvider)
516+
);
517+
assert_eq!(send_error.stream_error().retryable, Some(false));
518+
}
491519
}

crates/aionui-ai-agent/src/manager/nanobot/agent.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use crate::agent_runtime::AgentRuntime;
1212
use crate::capability::cli_process::CliAgentProcess;
1313
use crate::manager::process_registry::register_session_process;
1414
use crate::protocol::events::AgentStreamEvent;
15+
use crate::protocol::send_error::AgentSendError;
1516
use crate::types::SendMessageData;
1617
use std::path::PathBuf;
1718

@@ -187,7 +188,7 @@ impl crate::agent_task::IAgentTask for NanobotAgentManager {
187188
self.runtime.subscribe()
188189
}
189190

190-
async fn send_message(&self, data: SendMessageData) -> Result<(), AppError> {
191+
async fn send_message(&self, data: SendMessageData) -> Result<(), AgentSendError> {
191192
self.runtime.bump_activity();
192193

193194
{
@@ -205,7 +206,19 @@ impl crate::agent_task::IAgentTask for NanobotAgentManager {
205206
}
206207
});
207208

208-
self.process.send(&payload).await
209+
match self.process.send(&payload).await {
210+
Ok(()) => Ok(()),
211+
Err(err) => {
212+
error!(
213+
conversation_id = %self.runtime.conversation_id(),
214+
error = %ErrorChain(&err),
215+
"Nanobot send_message failed, emitting Error"
216+
);
217+
let send_error = AgentSendError::from_app_error(err);
218+
self.runtime.emit_error_data(send_error.stream_error().clone());
219+
Err(send_error)
220+
}
221+
}
209222
}
210223

211224
async fn cancel(&self) -> Result<(), AppError> {

crates/aionui-ai-agent/src/manager/openclaw/agent/mod.rs

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use crate::agent_runtime::AgentRuntime;
1111
use crate::capability::cli_process::CliAgentProcess;
1212
use crate::manager::process_registry::register_session_process;
1313
use crate::protocol::events::AgentStreamEvent;
14+
use crate::protocol::send_error::AgentSendError;
1415
use crate::types::SendMessageData;
1516
use aionui_api_types::OpenClawBuildExtra;
1617

@@ -403,7 +404,7 @@ impl crate::agent_task::IAgentTask for OpenClawAgentManager {
403404
self.runtime.subscribe()
404405
}
405406

406-
async fn send_message(&self, data: SendMessageData) -> Result<(), AppError> {
407+
async fn send_message(&self, data: SendMessageData) -> Result<(), AgentSendError> {
407408
self.runtime.bump_activity();
408409

409410
let is_first = {
@@ -419,17 +420,20 @@ impl crate::agent_task::IAgentTask for OpenClawAgentManager {
419420
text_state.reset_for_new_turn();
420421
}
421422

422-
let result = self.do_send_message(is_first, data).await;
423-
if let Err(ref e) = result {
424-
error!(
425-
conversation_id = %self.runtime.conversation_id(),
426-
error = %ErrorChain(e),
427-
"OpenClaw send_message failed, emitting Error+Finish"
428-
);
429-
self.runtime.emit_error(format!("OpenClaw send failed: {e}"));
430-
self.runtime.emit_finish(None);
423+
match self.do_send_message(is_first, data).await {
424+
Ok(()) => Ok(()),
425+
Err(err) => {
426+
error!(
427+
conversation_id = %self.runtime.conversation_id(),
428+
error = %ErrorChain(&err),
429+
"OpenClaw send_message failed, emitting Error+Finish"
430+
);
431+
let send_error = AgentSendError::from_app_error(err);
432+
self.runtime.emit_error_data(send_error.stream_error().clone());
433+
self.runtime.emit_finish(None);
434+
Err(send_error)
435+
}
431436
}
432-
result
433437
}
434438

435439
async fn cancel(&self) -> Result<(), AppError> {

crates/aionui-ai-agent/src/manager/openclaw/event_mapper.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,7 @@ fn map_chat_event(
133133
}
134134
ChatEventState::Error => {
135135
let msg = chat.error_message.unwrap_or_else(|| "Unknown chat error".into());
136-
events.push(AgentStreamEvent::Error(ErrorEventData {
137-
message: msg,
138-
code: None,
139-
}));
136+
events.push(AgentStreamEvent::Error(ErrorEventData::legacy(msg, None)));
140137
text_state.turn_active = false;
141138
}
142139
}

0 commit comments

Comments
 (0)