Skip to content

Commit 9f71c52

Browse files
authored
chore(logging): audit ai-agent and conversation crates for sentry-ready logs (#257)
Pre-release logging audit. Runtime log level is info and user-reported issues only upload logs once, so every error/warn must carry enough context to diagnose from a single shot. Common: - Add aionui_common::ErrorChain to render std::error::Error::source() chains as "outer: inner1: inner2" in one log field. All error! / warn! callers switch to ErrorChain so nested AppError / sqlx / tungstenite causes stop being swallowed. Conversation: - stream_relay.rs: wrap consume() + persistence/broadcast helpers in #[instrument(skip_all, fields(conversation_id, msg_id))]. All 12 repository error! calls now carry the full span context, removing the "Failed to update streaming message" floaters that were unlinkable to a specific turn. - service.rs: add #[instrument] to create/get/list/update/delete/reset/ update_extra/send_message/cancel/warmup/create_acp_session_row with user_id + conversation_id. CRUD success emits info so Sentry can reconstruct the lifecycle. send_message logs per-step (user message persisted → agent task ready → stream dispatched). model-change kill path is no longer silent (let _ = → explicit warn on failure). insert_message / status write / agent.cancel failures emit warn with ErrorChain before propagating. AI-agent: - spawn_sdk.rs / spawn_legacy.rs: promote spawn / spawned / exit from debug to info so subprocess lifecycle is visible under info. Back-fill error logs on all four ok_or_else stdio/PID capture sites. - manager/acp/agent.rs: #[instrument] on ensure_session_opened / warmup_session / send_message / cancel. take_stdio ok_or_else gets an error log. connect / kill errors use ErrorChain. - protocol/acp.rs: broadcast send failure (event_tx.send) is no longer ignored — logs at debug when no subscribers (expected post-turn). - Other managers (aionrs / openclaw / remote / nanobot) + idle_scanner: ErrorChain migration for existing error! / warn! calls. No structural changes — these already carried conversation_id. Verified: cargo check / clippy / test all green across the three crates (common 65 + conversation 231 + ai-agent 337 tests passing).
1 parent cfcbe24 commit 9f71c52

13 files changed

Lines changed: 213 additions & 113 deletions

File tree

crates/aionui-ai-agent/src/capability/cli_process/spawn_legacy.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
use std::sync::Arc;
22

3-
use aionui_common::{AppError, CommandSpec};
3+
use aionui_common::{AppError, CommandSpec, ErrorChain};
44
use aionui_runtime::Builder as CmdBuilder;
55
use tokio::io::{AsyncBufReadExt, BufReader};
66
use tokio::process::Child;
77
use tokio::sync::{Mutex, broadcast, watch};
8-
use tracing::{debug, error, trace, warn};
8+
use tracing::{debug, error, info, trace, warn};
99

1010
use super::{CliAgentProcess, EVENT_CHANNEL_CAPACITY, STDERR_BUFFER_MAX};
1111

@@ -32,7 +32,7 @@ impl CliAgentProcess {
3232
}
3333

3434
let mut child: Child = cmd.spawn().map_err(|e| {
35-
error!(command = %config.command.display(), error = %e, "Failed to spawn CLI process");
35+
error!(command = %config.command.display(), error = %ErrorChain(&e), "Failed to spawn CLI process");
3636
AppError::Internal(format!(
3737
"Failed to spawn CLI process '{}': {}",
3838
config.command.display(),
@@ -43,7 +43,7 @@ impl CliAgentProcess {
4343
let pid = child
4444
.id()
4545
.ok_or_else(|| AppError::Internal("Failed to obtain PID from spawned process".into()))?;
46-
debug!(pid, command = %config.command.display(), "CLI process spawned");
46+
info!(pid, command = %config.command.display(), "CLI process spawned");
4747

4848
let stdout = child
4949
.stdout
@@ -117,11 +117,11 @@ impl CliAgentProcess {
117117
let exit_handle = tokio::spawn(async move {
118118
match child.wait().await {
119119
Ok(status) => {
120-
debug!(pid, ?status, "CLI process exited");
120+
info!(pid, ?status, "CLI process exited");
121121
let _ = exit_tx.send(Some(status));
122122
}
123123
Err(e) => {
124-
error!(pid, error = %e, "Failed to wait on CLI process");
124+
error!(pid, error = %ErrorChain(&e), "Failed to wait on CLI process");
125125
// Signal exit even on error so callers don't hang
126126
let _ = exit_tx.send(None);
127127
}

crates/aionui-ai-agent/src/capability/cli_process/spawn_sdk.rs

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
use aionui_common::{AppError, CommandSpec};
1+
use aionui_common::{AppError, CommandSpec, ErrorChain};
22
use aionui_runtime::Builder as CmdBuilder;
33
use std::path::Path;
44
use std::sync::Arc;
55
use tokio::io::{AsyncBufReadExt, BufReader};
66
use tokio::process::Child;
77
use tokio::sync::{Mutex, broadcast, watch};
8-
use tracing::{debug, error, warn};
8+
use tracing::{debug, error, info, warn};
99

1010
use super::{CliAgentProcess, EVENT_CHANNEL_CAPACITY, STDERR_BUFFER_MAX};
1111

@@ -36,33 +36,34 @@ impl CliAgentProcess {
3636
if let Some(ref cwd) = config.cwd {
3737
cmd.current_dir(cwd);
3838
}
39-
debug!("Spawning CLI process (SDK mode) with command: {:?}", &cmd);
39+
info!(command = %config.command.display(), "Spawning CLI process (SDK mode)");
4040
let mut child: Child = cmd.spawn().map_err(|e| {
41-
error!(command = %config.command.display(), error = %e, "Failed to spawn CLI process");
41+
error!(command = %config.command.display(), error = %ErrorChain(&e), "Failed to spawn CLI process");
4242
AppError::Internal(format!(
4343
"Failed to spawn CLI process '{}': {}",
4444
config.command.display(),
4545
e
4646
))
4747
})?;
4848

49-
let pid = child
50-
.id()
51-
.ok_or_else(|| AppError::Internal("Failed to obtain PID from spawned process".into()))?;
52-
debug!(pid, command = %config.command.display(), "CLI process spawned (SDK mode)");
53-
54-
let stdout = child
55-
.stdout
56-
.take()
57-
.ok_or_else(|| AppError::Internal("Failed to capture stdout from child process".into()))?;
58-
let stderr = child
59-
.stderr
60-
.take()
61-
.ok_or_else(|| AppError::Internal("Failed to capture stderr from child process".into()))?;
62-
let stdin = child
63-
.stdin
64-
.take()
65-
.ok_or_else(|| AppError::Internal("Failed to capture stdin for child process".into()))?;
49+
let pid = child.id().ok_or_else(|| {
50+
error!(command = %config.command.display(), "Failed to obtain PID from spawned process");
51+
AppError::Internal("Failed to obtain PID from spawned process".into())
52+
})?;
53+
info!(pid, command = %config.command.display(), "CLI process spawned (SDK mode)");
54+
55+
let stdout = child.stdout.take().ok_or_else(|| {
56+
error!(pid, "Failed to capture stdout from child process");
57+
AppError::Internal("Failed to capture stdout from child process".into())
58+
})?;
59+
let stderr = child.stderr.take().ok_or_else(|| {
60+
error!(pid, "Failed to capture stderr from child process");
61+
AppError::Internal("Failed to capture stderr from child process".into())
62+
})?;
63+
let stdin = child.stdin.take().ok_or_else(|| {
64+
error!(pid, "Failed to capture stdin for child process");
65+
AppError::Internal("Failed to capture stdin for child process".into())
66+
})?;
6667

6768
let (event_tx, _) = broadcast::channel(EVENT_CHANNEL_CAPACITY);
6869
let (exit_tx, exit_rx) = watch::channel(None);
@@ -95,11 +96,11 @@ impl CliAgentProcess {
9596
let exit_handle = tokio::spawn(async move {
9697
match child.wait().await {
9798
Ok(status) => {
98-
debug!(pid, ?status, "CLI process exited");
99+
info!(pid, ?status, "CLI process exited");
99100
let _ = exit_tx.send(Some(status));
100101
}
101102
Err(e) => {
102-
error!(pid, error = %e, "Failed to wait on CLI process");
103+
error!(pid, error = %ErrorChain(&e), "Failed to wait on CLI process");
103104
let _ = exit_tx.send(None);
104105
}
105106
}

crates/aionui-ai-agent/src/idle_scanner.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use std::sync::Arc;
22
use std::time::Duration;
33

4-
use aionui_common::AgentKillReason;
4+
use aionui_common::{AgentKillReason, ErrorChain};
55
use tracing::{debug, info, warn};
66

77
use crate::task_manager::IWorkerTaskManager;
@@ -66,7 +66,7 @@ fn scan_and_cleanup(manager: &Arc<dyn IWorkerTaskManager>, threshold_ms: i64) {
6666
if let Err(e) = manager.kill(&id, Some(AgentKillReason::IdleTimeout)) {
6767
warn!(
6868
conversation_id = %id,
69-
error = %e,
69+
error = %ErrorChain(&e),
7070
"Failed to kill idle agent"
7171
);
7272
}

crates/aionui-ai-agent/src/manager/acp/agent.rs

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@ use agent_client_protocol::schema::{
1414
};
1515
use aionui_api_types::{AgentHandshake, SlashCommandItem};
1616
use aionui_common::{
17-
AgentKillReason, AgentType, AppError, ConversationStatus, TimestampMs, normalize_keys_to_snake_case,
17+
AgentKillReason, AgentType, AppError, ConversationStatus, ErrorChain, TimestampMs, normalize_keys_to_snake_case,
1818
};
1919
use serde_json::Value;
2020
use std::sync::Arc;
2121
use std::time::Duration;
2222
use tokio::sync::{Mutex, RwLock, broadcast, mpsc};
23-
use tracing::{error, info};
23+
use tracing::{debug, error, info, warn};
2424

2525
use super::mode_normalize::normalize_requested_mode;
2626

@@ -136,10 +136,10 @@ impl AcpAgentManager {
136136
AppError,
137137
> {
138138
let process = CliAgentProcess::spawn_for_sdk(params.command_spec.clone(), &params.data_dir).await?;
139-
let (stdin, stdout) = process
140-
.take_stdio()
141-
.await
142-
.ok_or_else(|| AppError::Internal("Failed to take stdio from CLI process".into()))?;
139+
let (stdin, stdout) = process.take_stdio().await.ok_or_else(|| {
140+
error!(conversation_id = %params.conversation_id, "Failed to take stdio from CLI process");
141+
AppError::Internal("Failed to take stdio from CLI process".into())
142+
})?;
143143

144144
// Dedicated channel for raw SDK SessionNotifications → session tracker.
145145
// This channel is separate from event_tx so the tracker never re-applies
@@ -154,7 +154,7 @@ impl AcpAgentManager {
154154
.map_err(|e| {
155155
error!(
156156
conversation_id = %params.conversation_id,
157-
error = %e,
157+
error = %ErrorChain(&e),
158158
"Failed to establish ACP protocol connection"
159159
);
160160
AppError::from(e)
@@ -380,7 +380,9 @@ impl AcpAgentManager {
380380
/// 1. No sid at all → `open_session_new`
381381
/// 2. Sid present but CLI has not opened it (fresh task) → `open_session_resume`
382382
/// 3. Already opened → noop, return the existing sid
383+
#[tracing::instrument(skip_all, fields(conversation_id = %self.params.conversation_id))]
383384
async fn ensure_session_opened(&self) -> Result<String, AppError> {
385+
debug!("Ensuring ACP session is opened");
384386
let _lock = self.session_lock.lock().await;
385387

386388
let (session_id, opened) = {
@@ -449,8 +451,15 @@ impl AcpAgentManager {
449451
/// factory after `AcpAgentManager::build` so `POST /warmup` returns
450452
/// only after the session is ready to accept `set_mode` / `set_model`
451453
/// / `prompt`. Idempotent — if already opened, returns immediately.
454+
#[tracing::instrument(skip_all, fields(conversation_id = %self.params.conversation_id))]
452455
pub async fn warmup_session(&self) -> Result<(), AppError> {
453-
self.ensure_session_opened().await.map(|_sid| ())
456+
info!("Warming up ACP session");
457+
let result = self.ensure_session_opened().await.map(|_sid| ());
458+
match &result {
459+
Ok(()) => info!("ACP session warmed up"),
460+
Err(e) => warn!(error = %ErrorChain(e), "ACP session warmup failed"),
461+
}
462+
result
454463
}
455464
}
456465

@@ -480,25 +489,30 @@ impl crate::agent_task::IAgentTask for AcpAgentManager {
480489
self.runtime.subscribe()
481490
}
482491

492+
#[tracing::instrument(skip_all, fields(conversation_id = %self.params.conversation_id, msg_id = %data.msg_id))]
483493
async fn send_message(&self, data: SendMessageData) -> Result<(), AppError> {
484494
self.runtime.bump_activity();
485495

486496
let result = self.ensure_session_and_send(&data).await;
487497
match &result {
488498
Ok(()) => {
499+
info!("ACP send_message completed");
489500
// ACP pattern: Finish with session_id = None (default).
490501
// If ACP later wants to include the session_id in Finish,
491502
// read it from `self.session.read().await.session_id()`.
492503
self.runtime.emit_finish(None);
493504
}
494505
Err(err) => {
506+
warn!(error = %ErrorChain(err), "ACP send_message failed");
495507
self.runtime.emit_error(err.to_string());
496508
}
497509
}
498510
result
499511
}
500512

513+
#[tracing::instrument(skip_all, fields(conversation_id = %self.params.conversation_id))]
501514
async fn cancel(&self) -> Result<(), AppError> {
515+
info!("Cancelling ACP session");
502516
let session_id = self.session.read().await.session_id().map(ToOwned::to_owned);
503517
if let Some(sid) = session_id {
504518
self.protocol.cancel(CancelNotification::new(SessionId::new(sid)));
@@ -534,7 +548,7 @@ impl crate::agent_task::IAgentTask for AcpAgentManager {
534548

535549
tokio::spawn(async move {
536550
if let Err(e) = process.kill(grace).await {
537-
error!(error = %e, "Failed to kill ACP process");
551+
error!(error = %ErrorChain(&e), "Failed to kill ACP process");
538552
}
539553
});
540554

crates/aionui-ai-agent/src/manager/aionrs/agent.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ use aion_mcp::manager::McpManager;
1010
use aion_protocol::commands::SessionMode;
1111
use aion_protocol::{ToolApprovalManager, ToolApprovalResult};
1212
use aionui_api_types::AgentModeResponse;
13-
use aionui_common::{AgentKillReason, AgentType, AppError, Confirmation, ConversationStatus, TimestampMs, now_ms};
13+
use aionui_common::{
14+
AgentKillReason, AgentType, AppError, Confirmation, ConversationStatus, ErrorChain, TimestampMs, now_ms,
15+
};
1416
use serde_json::Value;
1517
use tokio::sync::{Mutex, broadcast};
1618
use tracing::{debug, error, info};
@@ -110,7 +112,7 @@ impl AionrsAgentManager {
110112
if !is_resume && let Err(e) = engine.init_session(&provider_label, &workspace, Some(&conversation_id)) {
111113
error!(
112114
conversation_id = %conversation_id,
113-
error = %e,
115+
error = %ErrorChain(&*e),
114116
"Failed to init session, continuing without persistence"
115117
);
116118
}
@@ -204,7 +206,7 @@ impl crate::agent_task::IAgentTask for AionrsAgentManager {
204206
error!(
205207
conversation_id = %self.runtime.conversation_id(),
206208
elapsed_ms,
207-
error = %e,
209+
error = %ErrorChain(&e),
208210
"Aionrs engine.run() failed, emitting Error+Finish"
209211
);
210212
self.runtime.emit_error(error_msg.clone());

crates/aionui-ai-agent/src/manager/nanobot/agent.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use std::sync::Arc;
22
use std::time::Duration;
33

4-
use aionui_common::{AgentKillReason, AgentType, AppError, Confirmation, ConversationStatus, TimestampMs};
4+
use aionui_common::{AgentKillReason, AgentType, AppError, Confirmation, ConversationStatus, ErrorChain, TimestampMs};
55
use serde_json::{Value, json};
66
use tokio::sync::{Mutex, RwLock, broadcast};
77
use tracing::{debug, error, info, warn};
@@ -209,7 +209,7 @@ impl crate::agent_task::IAgentTask for NanobotAgentManager {
209209
let grace = Duration::from_millis(NANOBOT_KILL_GRACE_MS);
210210
tokio::spawn(async move {
211211
if let Err(e) = process.kill(grace).await {
212-
error!(error = %e, "Failed to kill Nanobot process");
212+
error!(error = %ErrorChain(&e), "Failed to kill Nanobot process");
213213
}
214214
});
215215

crates/aionui-ai-agent/src/manager/openclaw/agent/mod.rs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use std::collections::HashMap;
22
use std::sync::Arc;
33
use std::time::Duration;
44

5-
use aionui_common::{AgentKillReason, AgentType, AppError, Confirmation, ConversationStatus, TimestampMs};
5+
use aionui_common::{AgentKillReason, AgentType, AppError, Confirmation, ConversationStatus, ErrorChain, TimestampMs};
66
use serde_json::{Value, json};
77
use tokio::sync::{Mutex, RwLock, broadcast};
88
use tracing::{debug, error, info, warn};
@@ -126,14 +126,13 @@ impl OpenClawAgentManager {
126126

127127
let (connection, hello) = OpenClawConnection::connect(&ws_url, auth, &identity)
128128
.await
129-
.map_err(|e| {
129+
.inspect_err(|e| {
130130
error!(
131131
conversation_id = %conversation_id,
132132
url = %ws_url,
133-
error = %e,
133+
error = %ErrorChain(e),
134134
"Failed to connect to OpenClaw gateway"
135135
);
136-
e
137136
})?;
138137

139138
if let Some(ref auth_info) = hello.auth
@@ -321,7 +320,7 @@ impl OpenClawAgentManager {
321320
Err(e) => {
322321
warn!(
323322
conversation_id = %self.runtime.conversation_id(),
324-
error = %e,
323+
error = %ErrorChain(&e),
325324
"Failed to resume OpenClaw session, falling back to sessions.reset"
326325
);
327326
}
@@ -414,7 +413,7 @@ impl crate::agent_task::IAgentTask for OpenClawAgentManager {
414413
if let Err(ref e) = result {
415414
error!(
416415
conversation_id = %self.runtime.conversation_id(),
417-
error = %e,
416+
error = %ErrorChain(e),
418417
"OpenClaw send_message failed, emitting Error+Finish"
419418
);
420419
self.runtime.emit_error(format!("OpenClaw send failed: {e}"));
@@ -479,7 +478,7 @@ impl crate::agent_task::IAgentTask for OpenClawAgentManager {
479478
let grace = Duration::from_millis(OPENCLAW_KILL_GRACE_MS);
480479
tokio::spawn(async move {
481480
if let Err(e) = process.kill(grace).await {
482-
error!(error = %e, "Failed to kill OpenClaw gateway process");
481+
error!(error = %ErrorChain(&e), "Failed to kill OpenClaw gateway process");
483482
}
484483
});
485484
}

crates/aionui-ai-agent/src/manager/remote/agent.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use std::collections::HashMap;
22
use std::sync::Arc;
33

44
use aionui_common::{
5-
AgentKillReason, AgentType, AppError, Confirmation, ConversationStatus, RemoteAgentStatus, TimestampMs,
5+
AgentKillReason, AgentType, AppError, Confirmation, ConversationStatus, ErrorChain, RemoteAgentStatus, TimestampMs,
66
};
77
use futures_util::{SinkExt, StreamExt};
88
use serde_json::{Value, json};
@@ -86,7 +86,7 @@ impl RemoteAgentManager {
8686
let url = &self.remote_config.url;
8787

8888
let (ws_stream, _response) = tokio_tungstenite::connect_async(url).await.map_err(|e| {
89-
error!(url = url, error = %e, "Failed to connect to remote agent");
89+
error!(url = url, error = %ErrorChain(&e), "Failed to connect to remote agent");
9090
AppError::Internal(format!("WebSocket connection failed: {e}"))
9191
})?;
9292

@@ -134,7 +134,7 @@ impl RemoteAgentManager {
134134
Err(e) => {
135135
debug!(
136136
conversation_id = %self.runtime.conversation_id(),
137-
error = %e,
137+
error = %ErrorChain(&e),
138138
"Non-JSON WebSocket message, skipping"
139139
);
140140
}
@@ -150,7 +150,7 @@ impl RemoteAgentManager {
150150
Err(e) => {
151151
warn!(
152152
conversation_id = %self.runtime.conversation_id(),
153-
error = %e,
153+
error = %ErrorChain(&e),
154154
"WebSocket read error"
155155
);
156156
break;
@@ -231,7 +231,7 @@ impl RemoteAgentManager {
231231
sink.send(Message::Text(text.into())).await.map_err(|e| {
232232
error!(
233233
conversation_id = %self.runtime.conversation_id(),
234-
error = %e,
234+
error = %ErrorChain(&e),
235235
"Failed to send WebSocket message"
236236
);
237237
AppError::Internal(format!("WebSocket send failed: {e}"))

0 commit comments

Comments
 (0)