Skip to content

Commit 5304c82

Browse files
committed
feat(subagent): 为subagent添加日志追踪
1 parent 2a8ed83 commit 5304c82

11 files changed

Lines changed: 362 additions & 117 deletions

File tree

astrbot/core/agent/runners/tool_loop_agent_runner.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -989,6 +989,13 @@ def _append_tool_call_result(tool_call_id: str, content: str) -> None:
989989
),
990990
)
991991

992+
# 获取 trace span:优先使用 subagent trace_span,否则回退到 event.trace
993+
_agent_ctx = getattr(self.run_context, "context", None)
994+
_trace = getattr(_agent_ctx, "trace_span", None)
995+
if _trace is None and _agent_ctx is not None:
996+
_event = getattr(_agent_ctx, "event", None)
997+
_trace = getattr(_event, "trace", None)
998+
992999
# 执行函数调用
9931000
for func_tool_name, func_tool_args, func_tool_id in zip(
9941001
llm_response.tools_call_name,
@@ -1012,6 +1019,9 @@ def _append_tool_call_result(tool_call_id: str, content: str) -> None:
10121019
],
10131020
)
10141021
)
1022+
# 记录工具调用追踪
1023+
if _trace:
1024+
_trace.record("agent_tool_call", tool_name=func_tool_name)
10151025
try:
10161026
if not req.func_tool:
10171027
return
@@ -1251,6 +1261,15 @@ def _append_tool_call_result(tool_call_id: str, content: str) -> None:
12511261
)
12521262
)
12531263
logger.info(f"Tool `{func_tool_name}` Result: {tool_result_content}")
1264+
# 记录工具结果追踪
1265+
if _trace:
1266+
_trace.record(
1267+
"agent_tool_result",
1268+
tool_name=func_tool_name,
1269+
tool_result=tool_result_content[:500]
1270+
if tool_result_content
1271+
else None,
1272+
)
12541273

12551274
# 处理函数调用响应
12561275
if tool_call_result_blocks:

astrbot/core/astr_agent_context.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ class AstrAgentContext:
1818
"""The message event associated with the agent context."""
1919
extra: dict[str, Any] = Field(default_factory=dict)
2020
"""Customized extra data."""
21+
trace_span: Any = Field(default=None)
22+
"""Optional custom TraceSpan for subagent tracing. When set, tool calls within
23+
the agent loop will be recorded to this trace instead of event.trace.
24+
This prevents concurrent subagent and main agent tool calls from mixing up
25+
trace records."""
2126

2227

2328
AgentContextWrapper = ContextWrapper[AstrAgentContext]

astrbot/core/astr_agent_run_util.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -185,13 +185,6 @@ async def run_agent(
185185
if resp.type == "tool_call_result":
186186
msg_chain = resp.data["chain"]
187187

188-
astr_event.trace.record(
189-
"agent_tool_result",
190-
tool_result=msg_chain.get_plain_text(
191-
with_other_comps_mark=True
192-
),
193-
)
194-
195188
if msg_chain.type == "tool_direct_result":
196189
# tool_direct_result 用于标记 llm tool 需要直接发送给用户的内容
197190
await astr_event.send(msg_chain)
@@ -218,10 +211,6 @@ async def run_agent(
218211
yield MessageChain(chain=[], type="break")
219212

220213
tool_info = _extract_chain_json_data(resp.data["chain"])
221-
astr_event.trace.record(
222-
"agent_tool_call",
223-
tool_name=tool_info if tool_info else "unknown",
224-
)
225214
_record_tool_call_name(tool_info, tool_name_by_call_id)
226215

227216
if astr_event.get_platform_name() == "webchat":

astrbot/core/astr_agent_tool_exec.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ async def _execute_handoff(
342342
ctx = run_context.context.context
343343
event = run_context.context.event
344344
umo = event.unified_msg_origin
345+
agent_name = getattr(tool.agent, "name", "unknown")
345346

346347
# Use per-subagent provider override if configured; otherwise fall back
347348
# to the current/default provider resolution.
@@ -368,10 +369,38 @@ async def _execute_handoff(
368369
agent_max_step = int(prov_settings.get("max_agent_step", 30))
369370
stream = prov_settings.get("streaming_response", False)
370371

372+
# Create trace span for subagent execution
373+
from astrbot.core.utils.trace import TraceSpan
374+
375+
parent_trace = getattr(event, "trace", None)
376+
subagent_trace = TraceSpan(
377+
name=f"SubAgent:{agent_name}",
378+
umo=event.unified_msg_origin,
379+
sender_name=event.get_sender_name()
380+
if hasattr(event, "get_sender_name")
381+
else None,
382+
message_outline=f"Handoff to {agent_name}: {input_[:100] if input_ else ''}",
383+
parent_span_id=parent_trace.span_id if parent_trace else None,
384+
)
385+
subagent_trace.record(
386+
"subagent_execution_begin",
387+
agent_name=agent_name,
388+
input_preview=input_[:500] if input_ else None,
389+
image_count=len(image_urls),
390+
tools=[t.name for t in toolset] if toolset else [],
391+
max_steps=agent_max_step,
392+
stream=stream,
393+
)
394+
371395
# 获取子代理的历史上下文
372396
subagent_history, agent_name = cls._load_subagent_history(umo, tool)
373397
# 如果有历史上下文,合并到 contexts 中
374398
if subagent_history:
399+
subagent_trace.record(
400+
"subagent_history_loaded",
401+
agent_name=agent_name,
402+
history_messages_count=len(subagent_history),
403+
)
375404
if contexts is None:
376405
contexts = subagent_history
377406
else:
@@ -381,6 +410,14 @@ async def _execute_handoff(
381410
subagent_system_prompt = cls._build_subagent_system_prompt(
382411
umo, tool, prov_settings
383412
)
413+
subagent_trace.record(
414+
"subagent_system_prompt",
415+
agent_name=agent_name,
416+
prompt_length=len(subagent_system_prompt),
417+
prompt_preview=subagent_system_prompt[:300]
418+
if subagent_system_prompt
419+
else None,
420+
)
384421

385422
# 构建子代理的追加内容
386423
extra_content_parts = SubAgentManager.build_subagent_extra_content_parts(
@@ -408,6 +445,7 @@ async def _run_subagent():
408445
stream=stream,
409446
runner_messages=runner_messages,
410447
extra_user_content_parts=extra_content_parts,
448+
trace_span=subagent_trace,
411449
)
412450

413451
# 添加执行超时控制
@@ -419,6 +457,10 @@ async def _run_subagent():
419457
except asyncio.TimeoutError:
420458
# 若超时,保存已产生的部分历史
421459
cls._save_subagent_history(umo, runner_messages, agent_name)
460+
subagent_trace.record(
461+
"subagent_execution_timeout",
462+
timeout_seconds=execution_timeout,
463+
)
422464
error_msg = f"SubAgent '{agent_name}' execution timeout after {execution_timeout:.1f} seconds."
423465
logger.warning(f"[SubAgent:Timeout] {error_msg}")
424466

@@ -433,8 +475,26 @@ async def _run_subagent():
433475
else:
434476
# 不设置超时
435477
llm_resp = await _run_subagent()
478+
479+
execution_time = time.time() - subagent_trace.started_at
480+
subagent_trace.record(
481+
"subagent_execution_complete",
482+
agent_name=agent_name,
483+
result_preview=llm_resp.completion_text[:500]
484+
if hasattr(llm_resp, "completion_text") and llm_resp.completion_text
485+
else None,
486+
result_length=len(llm_resp.completion_text)
487+
if hasattr(llm_resp, "completion_text") and llm_resp.completion_text
488+
else 0,
489+
execution_time=execution_time,
490+
)
491+
436492
# 保存历史上下文
437493
cls._save_subagent_history(umo, runner_messages, agent_name)
494+
subagent_trace.record(
495+
"subagent_history_saved",
496+
messages_count=len(runner_messages),
497+
)
438498

439499
yield mcp.types.CallToolResult(
440500
content=[mcp.types.TextContent(type="text", text=llm_resp.completion_text)]
@@ -467,6 +527,26 @@ async def _execute_handoff_background(
467527

468528
original_task_id = uuid.uuid4().hex
469529

530+
# Create trace span for background task creation
531+
from astrbot.core.utils.trace import TraceSpan
532+
533+
parent_trace = getattr(event, "trace", None)
534+
bg_trace = TraceSpan(
535+
name=f"SubAgentBackground:{agent_name}",
536+
umo=event.unified_msg_origin,
537+
sender_name=event.get_sender_name()
538+
if hasattr(event, "get_sender_name")
539+
else None,
540+
message_outline=f"Background handoff to {agent_name}",
541+
parent_span_id=parent_trace.span_id if parent_trace else None,
542+
)
543+
bg_trace.record(
544+
"subagent_background_task_created",
545+
agent_name=agent_name,
546+
subagent_task_id=subagent_task_id,
547+
original_task_id=original_task_id,
548+
)
549+
470550
async def _run_handoff_in_background() -> None:
471551
try:
472552
await cls._do_handoff_background(
@@ -515,6 +595,25 @@ async def _do_handoff_background(
515595
event = run_context.context.event
516596
umo = event.unified_msg_origin
517597
agent_name = getattr(tool.agent, "name", None)
598+
599+
# Create trace span for background subagent execution
600+
from astrbot.core.utils.trace import TraceSpan
601+
602+
parent_trace = getattr(event, "trace", None)
603+
bg_trace = TraceSpan(
604+
name=f"SubAgentBackground:{agent_name}",
605+
umo=event.unified_msg_origin,
606+
sender_name=event.get_sender_name()
607+
if hasattr(event, "get_sender_name")
608+
else None,
609+
message_outline=f"Background handoff to {agent_name}",
610+
parent_span_id=parent_trace.span_id if parent_trace else None,
611+
)
612+
bg_trace.record(
613+
"subagent_background_execution_start",
614+
agent_name=agent_name,
615+
)
616+
518617
# 获取SubAgent的超时时间
519618
execution_timeout = cls._get_subagent_execution_timeout()
520619

@@ -550,6 +649,14 @@ async def _run():
550649
)
551650

552651
execution_time = time.time() - start_time
652+
bg_trace.record(
653+
"subagent_background_execution_end",
654+
agent_name=agent_name,
655+
success=error_text is None,
656+
result_preview=result_text[:500] if result_text else None,
657+
execution_time=execution_time,
658+
)
659+
553660
# Check if it's enhanced subagent
554661
is_managed = cls._is_managed_subagent(umo, agent_name)
555662
if is_managed:

astrbot/core/star/context.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ async def tool_loop_agent(
226226
agent_context = AstrAgentContext(
227227
context=self,
228228
event=event,
229+
trace_span=kwargs.get("trace_span"),
229230
)
230231
agent_runner = ToolLoopAgentRunner()
231232
tool_executor = FunctionToolExecutor()

astrbot/core/subagent_manager.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ class SubAgentSession:
6767
) # 后台subagent结果存储: {agent_name: {task_id: SubAgentExecutionResult}}
6868
# 任务计数器: {agent_name: next_task_id}
6969
background_task_counters: dict = field(default_factory=dict)
70+
subagent_traces: dict = field(default_factory=dict) # {agent_name: TraceSpan}
7071
last_activity_at: float = field(default_factory=time.time) # 最后活跃时间戳
7172

7273

@@ -778,7 +779,16 @@ def set_shared_context_enabled(cls, session_id: str, enabled: bool) -> None:
778779
def set_subagent_status(cls, session_id: str, agent_name: str, status: str) -> None:
779780
session = cls._get_or_create_session(session_id)
780781
if agent_name in session.subagents:
782+
old_status = session.subagent_status.get(agent_name, "UNKNOWN")
781783
session.subagent_status[agent_name] = status
784+
trace = session.subagent_traces.get(agent_name)
785+
if trace:
786+
trace.record(
787+
"subagent_status_change",
788+
agent_name=agent_name,
789+
from_status=old_status,
790+
to_status=status,
791+
)
782792

783793
# for read-only operations
784794
@classmethod
@@ -845,11 +855,21 @@ async def create_subagent(
845855
protected: If True, the subagent will not be auto-cleaned per turn.
846856
Static subagents from config should be protected.
847857
"""
858+
from astrbot.core.utils.trace import TraceSpan
859+
860+
trace = TraceSpan(name=f"SubAgent:{config.name}", umo=session_id)
848861
session = cls._get_or_create_session(session_id)
849862
if config.name not in session.subagents:
850863
# Check max count limit
851864
active_count = len(session.subagents.keys())
852865
if active_count >= cls._max_subagent_count:
866+
trace.record(
867+
"subagent_created",
868+
agent_name=config.name,
869+
success=False,
870+
reason="max_count_reached",
871+
max_count=cls._max_subagent_count,
872+
)
853873
return (
854874
f"Error: Maximum number of subagents ({cls._max_subagent_count}) reached. More subagents is not allowed.",
855875
None,
@@ -892,6 +912,16 @@ async def create_subagent(
892912
# 如果标记为protected,则加入protected集合
893913
if protected:
894914
session.protected_agents.add(config.name)
915+
trace.record(
916+
"subagent_created",
917+
agent_name=config.name,
918+
success=True,
919+
tools=list(config.tools) if config.tools else [],
920+
skills=list(config.skills) if config.skills else [],
921+
protected=protected,
922+
provider_id=config.provider_id,
923+
)
924+
session.subagent_traces[config.name] = trace
895925
logger.info(
896926
"[SubAgent:Create] Created subagent: %s (protected=%s)",
897927
config.name,
@@ -927,6 +957,19 @@ def register_static_subagent(
927957
if (
928958
config.name not in session.subagents
929959
): # if the static agent already exists, pass
960+
from astrbot.core.utils.trace import TraceSpan
961+
962+
trace = TraceSpan(name=f"SubAgent:{config.name}", umo=session_id)
963+
trace.record(
964+
"subagent_created",
965+
agent_name=config.name,
966+
success=True,
967+
static=True,
968+
tools=list(config.tools) if config.tools else [],
969+
skills=list(config.skills) if config.skills else [],
970+
)
971+
session.subagent_traces[config.name] = trace
972+
930973
if config.tools is None:
931974
config.tools = None
932975
if config.tools is not None and not config.tools:
@@ -984,6 +1027,7 @@ def _remove_by_name(name):
9841027
session.subagent_histories.pop(name, None)
9851028
session.subagent_background_results.pop(name, None)
9861029
session.background_task_counters.pop(name, None)
1030+
session.subagent_traces.pop(name, None)
9871031
# 清理公共上下文中包含该Agent的内容
9881032
cls.cleanup_shared_context_by_agent(session_id, name)
9891033

@@ -1004,6 +1048,7 @@ def _remove_by_name(name):
10041048
session.shared_context.clear()
10051049
session.subagent_background_results.clear()
10061050
session.background_task_counters.clear()
1051+
session.subagent_traces.clear()
10071052
logger.info("[SubAgent:Cleanup] All subagents cleaned.")
10081053
return "__SUBAGENT_REMOVED__: All subagents have been removed."
10091054
else:
@@ -1165,6 +1210,17 @@ def store_subagent_result(
11651210
if metadata:
11661211
task_store[task_id].metadata.update(metadata)
11671212

1213+
trace = session.subagent_traces.get(agent_name)
1214+
if trace:
1215+
trace.record(
1216+
"subagent_result_stored",
1217+
agent_name=agent_name,
1218+
task_id=task_id,
1219+
success=success,
1220+
execution_time=execution_time,
1221+
has_error=error is not None,
1222+
)
1223+
11681224
@classmethod
11691225
def get_subagent_result(
11701226
cls, session_id: str, agent_name: str, task_id: str | None = None

0 commit comments

Comments
 (0)