feat: refactor agent runner, enhance follow-up, skill manager and history saver

AstrBot Local · claude · AstrBot Local · commit 7d534795298b · 2026-03-12T21:13:27.000+08:00
- Remove empty completion retry logic from ToolLoopAgentRunner
- Enhance astr_main_agent with additional context handling
- Extend follow_up pipeline stage with new processing logic
- Add new capabilities to skill_manager
- Improve history_saver utility

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/astrbot/core/agent/runners/tool_loop_agent_runner.py b/astrbot/core/agent/runners/tool_loop_agent_runner.py
@@ -28,7 +28,6 @@
 )
 from astrbot.core.provider.entities import (
     LLMResponse,
-    LLM_CONTROL_CODE_EMPTY_COMPLETION_RETRY,
     LLM_CONTROL_CODE_UNKNOWN_TOOL_CALL,
     ProviderRequest,
     ToolCallsResult,
@@ -88,10 +87,6 @@ def _get_persona_custom_error_message(self) -> str | None:
         event = getattr(self.run_context.context, "event", None)
         return extract_persona_custom_error_message_from_event(event)
 
-    @staticmethod
-    def _is_empty_completion_retry(resp: LLMResponse) -> bool:
-        return resp.control_code == LLM_CONTROL_CODE_EMPTY_COMPLETION_RETRY
-
     @staticmethod
     def _is_unknown_tool_call(resp: LLMResponse) -> bool:
         return resp.control_code == LLM_CONTROL_CODE_UNKNOWN_TOOL_CALL
@@ -257,24 +252,6 @@ async def _iter_llm_responses_with_fallback(
                         yield resp
                         continue
 
-                    if self._is_empty_completion_retry(resp):
-                        # Empty/unparseable response from model, retry same provider once
-                        logger.warning(
-                            "Chat Model %s returned empty/unparseable completion, retrying once...",
-                            candidate_id,
-                        )
-                        try:
-                            async for retry_resp in self._iter_llm_responses(include_model=idx == 0):
-                                if retry_resp.is_chunk:
-                                    yield retry_resp
-                                    continue
-                                yield retry_resp
-                                return
-                        except Exception as retry_exc:
-                            logger.warning("Retry also failed: %s", retry_exc)
-                            last_exception = retry_exc
-                        break
-
                     if (
                         resp.role == "err"
                         and not has_stream_output
@@ -294,80 +271,6 @@ async def _iter_llm_responses_with_fallback(
                     return
             except Exception as exc:  # noqa: BLE001
                 last_exception = exc
-                _exc_str = str(exc).lower()
-                # Auto-compress context when model_max_prompt_tokens_exceeded
-                if (
-                    "model_max_prompt_tokens_exceeded" in _exc_str
-                    or "prompt token count" in _exc_str
-                    or "tokens_exceeded" in _exc_str
-                    or "context_length_exceeded" in _exc_str
-                    or ("token" in _exc_str and "exceed" in _exc_str)
-                ):
-                    logger.warning(
-                        "Chat Model %s: token limit exceeded, forcing context compression and retrying...",
-                        candidate_id,
-                    )
-                    try:
-                        from astrbot.core.agent.context.truncator import ContextTruncator
-                        _truncator = ContextTruncator()
-                        _before_total = len(self.run_context.messages)
-                        # Aggressively halve until small enough (up to 5 rounds)
-                        _compression_success = False
-                        for _halve_round in range(5):
-                            _before = len(self.run_context.messages)
-                            self.run_context.messages = _truncator.truncate_by_halving(
-                                self.run_context.messages
-                            )
-                            _after = len(self.run_context.messages)
-                            logger.info(
-                                "Forced context truncation round %d: %d -> %d messages.",
-                                _halve_round + 1, _before, _after,
-                            )
-                            if _after <= 4:
-                                break  # Can't shrink further
-                            # Retry same candidate
-                            try:
-                                async for resp in self._iter_llm_responses(include_model=idx == 0):
-                                    if resp.is_chunk:
-                                        has_stream_output = True
-                                        yield resp
-                                        continue
-                                    yield resp
-                                    logger.info(
-                                        "Context truncation succeeded after %d round(s): %d -> %d messages.",
-                                        _halve_round + 1, _before_total, _after,
-                                    )
-                                    _compression_success = True
-                                    return
-                                if has_stream_output:
-                                    _compression_success = True
-                                    return
-                                _compression_success = True
-                                break  # succeeded without stream output
-                            except Exception as retry_exc:
-                                _exc_str2 = str(retry_exc).lower()
-                                if not (
-                                    "model_max_prompt_tokens_exceeded" in _exc_str2
-                                    or "prompt token count" in _exc_str2
-                                    or "tokens_exceeded" in _exc_str2
-                                    or "context_length_exceeded" in _exc_str2
-                                    or ("token" in _exc_str2 and "exceed" in _exc_str2)
-                                ):
-                                    last_exception = retry_exc
-                                    logger.warning(
-                                        "Chat Model %s retry after compression failed: %s",
-                                        candidate_id, retry_exc,
-                                    )
-                                    break
-                                last_exception = retry_exc
-                                logger.warning(
-                                    "Chat Model %s still token-exceeded after round %d, halving again...",
-                                    candidate_id, _halve_round + 1,
-                                )
-                                continue
-                    except Exception as compress_exc:
-                        logger.error("Failed to compress context: %s", compress_exc)
-                    continue
                 logger.warning(
                     "Chat Model %s request error: %s",
                     candidate_id,
@@ -404,15 +307,21 @@ def follow_up(
         *,
         message_text: str,
     ) -> FollowUpTicket | None:
-        """Queue a follow-up message for the next tool result."""
+        """Queue a follow-up message to be injected into the next tool result.
+
+        Returns None if the agent is already done (message arrived too late) or
+        if the message text is empty.
+        """
         if self.done():
+            logger.debug("follow_up: agent already done, message discarded.")
             return None
         text = (message_text or "").strip()
         if not text:
             return None
         ticket = FollowUpTicket(seq=self._follow_up_seq, text=text)
         self._follow_up_seq += 1
         self._pending_follow_ups.append(ticket)
+        logger.debug("follow_up: queued ticket seq=%d, pending=%d", ticket.seq, len(self._pending_follow_ups))
         return ticket
 
     def _resolve_unconsumed_follow_ups(self) -> None:
@@ -431,15 +340,16 @@ def _consume_follow_up_notice(self) -> str:
         for ticket in follow_ups:
             ticket.consumed = True
             ticket.resolved.set()
+
         follow_up_lines = "\n".join(
             f"{idx}. {ticket.text}" for idx, ticket in enumerate(follow_ups, start=1)
         )
+        count = len(follow_ups)
+        plural = "messages" if count > 1 else "message"
         return (
-            "\n\n[FOLLOW-UP] The user sent additional message(s) while you were working. "
-            "Treat these as supplementary instructions for the current task — DO NOT stop "
-            "or restart the current operation. Instead, seamlessly incorporate them into "
-            "your ongoing work. Continue the task flow without interrupting it. "
-            "Do NOT acknowledge receipt explicitly; just act on them naturally.\n"
+            f"\n\n[FOLLOW-UP x{count}] The user sent {count} {plural} while you were working. "
+            "Incorporate them as supplementary instructions seamlessly — "
+            "do NOT stop, restart, or explicitly acknowledge receipt; just act naturally.\n"
             f"{follow_up_lines}"
         )
 
@@ -778,7 +688,7 @@ async def step_until_done(
             self.run_context.messages.append(
                 Message(
                     role="user",
-                    content="工具调用次数已达到上限，请停止使用工具，并根据已经收集到的信息，对你的任务和发现进行总结，然后直接回复用户。",
+                    content="工具调用次数已达到上限，请停止使用工具，并根据已经收集到的信息，对你的任务和发现进行总结，然后直接回复用户。(Tool call limit reached. Stop using tools and summarize your findings directly for the user.)",
                 )
             )
             # 再执行最后一步
diff --git a/astrbot/core/astr_main_agent.py b/astrbot/core/astr_main_agent.py
@@ -59,7 +59,7 @@
 from astrbot.core.platform.astr_message_event import AstrMessageEvent
 from astrbot.core.provider import Provider
 from astrbot.core.provider.entities import ProviderRequest
-from astrbot.core.skills.skill_manager import SkillManager, build_skills_prompt
+from astrbot.core.skills.skill_manager import SkillManager, build_skills_prompt, get_skills_fingerprint
 from astrbot.core.star.context import Context
 from astrbot.core.star.star_handler import star_map
 from astrbot.core.tools.cron_tools import (
@@ -353,6 +353,33 @@ async def _ensure_persona_and_skills(
                     "You cannot use shell or Python to perform skills. "
                     "If you need to use these capabilities, ask the user to enable Computer Use in the AstrBot WebUI -> Config."
                 )
+
+    # --- Dynamic skill update detection (lightweight) ---
+    # Compute current fingerprint using only stat() calls (no file I/O).
+    # If the fingerprint changed since the last turn (stored on the event extra),
+    # inject a one-line system reminder into extra_user_content_parts so the LLM
+    # is aware that the skill list may have changed.  This avoids rebuilding the
+    # full system prompt mid-conversation (which many providers ignore anyway).
+    current_fp = get_skills_fingerprint()
+    prev_fp = event.get_extra("_skills_fp")
+    if prev_fp is not None and prev_fp != current_fp:
+        # Skills changed since the last request in this session — notify the LLM.
+        skill_names = [s.name for s in skills]
+        skills_list_str = ", ".join(skill_names) if skill_names else "(none)"
+        req.extra_user_content_parts.append(
+            TextPart(
+                text=(
+                    "<system_reminder>"
+                    "The available skill list has been updated since the last turn. "
+                    f"Current active skills: {skills_list_str}. "
+                    "Please refer to this updated list for any skill-related requests."
+                    "</system_reminder>"
+                )
+            )
+        )
+        logger.debug("Skills fingerprint changed (%s -> %s), injected update reminder.", prev_fp, current_fp)
+    event.set_extra("_skills_fp", current_fp)
+    # --- end dynamic skill update detection ---
     tmgr = plugin_context.get_llm_tool_manager()
 
     # inject toolset in the persona
diff --git a/astrbot/core/pipeline/process_stage/follow_up.py b/astrbot/core/pipeline/process_stage/follow_up.py
@@ -41,6 +41,28 @@ def register_active_runner(umo: str, runner: AgentRunner) -> None:
 def unregister_active_runner(umo: str, runner: AgentRunner) -> None:
     if _ACTIVE_AGENT_RUNNERS.get(umo) is runner:
         _ACTIVE_AGENT_RUNNERS.pop(umo, None)
+        # Best-effort cleanup: if no follow-up state is pending any more, drop the
+        # UMO entry to avoid accumulating stale entries when sessions end abnormally.
+        state = _FOLLOW_UP_ORDER_STATE.get(umo)
+        if state is not None:
+            statuses = state.get("statuses")
+            if not statuses:
+                _FOLLOW_UP_ORDER_STATE.pop(umo, None)
+            else:
+                # There are still pending/active entries — notify the condition so
+                # any waiter in _activate_and_wait_follow_up_turn can re-check and
+                # potentially hit the timeout branch.
+                condition = state.get("condition")
+                if isinstance(condition, asyncio.Condition):
+                    async def _notify_condition(cond: asyncio.Condition) -> None:
+                        async with cond:
+                            cond.notify_all()
+                    try:
+                        loop = asyncio.get_event_loop()
+                        if loop.is_running():
+                            loop.create_task(_notify_condition(condition))
+                    except RuntimeError:
+                        pass
 
 
 def _get_follow_up_order_state(umo: str) -> dict[str, object]:
@@ -108,6 +130,10 @@ async def _mark_follow_up_consumed(umo: str, seq: int) -> None:
             _FOLLOW_UP_ORDER_STATE.pop(umo, None)
 
 
+_FOLLOW_UP_WAIT_TIMEOUT: float = 30.0
+"""Max seconds a follow-up turn will wait for its predecessor to finish."""
+
+
 async def _activate_and_wait_follow_up_turn(umo: str, seq: int) -> None:
     state = _FOLLOW_UP_ORDER_STATE.get(umo)
     if not state:
@@ -121,12 +147,27 @@ async def _activate_and_wait_follow_up_turn(umo: str, seq: int) -> None:
             statuses[seq] = "active"
 
         # Strict ordering: only the head (`next_turn`) can continue.
+        # Use a timeout to guard against predecessor runner crashes.
+        deadline = asyncio.get_event_loop().time() + _FOLLOW_UP_WAIT_TIMEOUT
         while True:
             next_turn = state["next_turn"]
             assert isinstance(next_turn, int)
             if next_turn == seq:
                 break
-            await condition.wait()
+            remaining = deadline - asyncio.get_event_loop().time()
+            if remaining <= 0:
+                # Predecessor never finished; forcibly advance to avoid permanent hang.
+                logger.warning(
+                    "Follow-up wait timeout for umo=%s seq=%s; advancing turn to prevent hang.",
+                    umo,
+                    seq,
+                )
+                state["next_turn"] = seq
+                break
+            try:
+                await asyncio.wait_for(condition.wait(), timeout=remaining)
+            except asyncio.TimeoutError:
+                pass  # re-check on next iteration
 
 
 async def _finish_follow_up_turn(umo: str, seq: int) -> None:
@@ -152,8 +193,19 @@ async def _monitor_follow_up_ticket(
     ticket: FollowUpTicket,
     order_seq: int,
 ) -> None:
-    """Advance consumed slots immediately on resolution to avoid wake-order drift."""
+    """Advance consumed slots immediately on resolution to avoid wake-order drift.
+
+    Only marks the order slot consumed here when *ticket* was consumed by the
+    runner (i.e. injected into a tool-result).  If the ticket was *not* consumed
+    (i.e. the runner finished without ever flushing pending follow-ups),
+    ``prepare_follow_up_capture`` handles the mark via its own branch, so we
+    must not double-call ``_mark_follow_up_consumed`` here.
+    """
     await ticket.resolved.wait()
+    # Guard: only act when consumed=True AND prepare_follow_up_capture has not
+    # already handled this seq (it sets consumed_marked and calls us via the
+    # captured branch).  The state dict check inside _mark_follow_up_consumed
+    # is idempotent, so a double-call is safe, but we skip it when not needed.
     if ticket.consumed:
         await _mark_follow_up_consumed(umo, order_seq)
 
diff --git a/astrbot/core/skills/skill_manager.py b/astrbot/core/skills/skill_manager.py
@@ -464,3 +464,44 @@ def install_skill_from_zip(self, zip_path: str, *, overwrite: bool = True) -> st
 
         self.set_skill_active(skill_name, True)
         return skill_name
+
+
+# ---------------------------------------------------------------------------
+# Lightweight skill-set fingerprint for dynamic update detection
+# ---------------------------------------------------------------------------
+
+def get_skills_fingerprint(skills_root: str | None = None) -> str:
+    """Return a lightweight fingerprint of the current skill set.
+
+    Uses only os.stat() calls (no file I/O) so it is extremely cheap.
+    The fingerprint changes whenever:
+      - skills.json is modified (new skill installed / toggled)
+      - a new SKILL.md appears or disappears under skills_root
+    """
+    import hashlib
+
+    data_path = Path(get_astrbot_data_path())
+    config_path = str(data_path / SKILLS_CONFIG_FILENAME)
+    root = Path(skills_root or get_astrbot_skills_path())
+
+    parts: list[str] = []
+
+    # 1. skills.json mtime (catches installs / enable-disable)
+    try:
+        parts.append(str(os.path.getmtime(config_path)))
+    except OSError:
+        parts.append("no-config")
+
+    # 2. Count of skill dirs that have SKILL.md (catches new folders)
+    try:
+        count = sum(
+            1
+            for e in root.iterdir()
+            if e.is_dir() and (e / "SKILL.md").exists()
+        )
+        parts.append(str(count))
+    except OSError:
+        parts.append("0")
+
+    raw = "|".join(parts)
+    return hashlib.md5(raw.encode()).hexdigest()[:12]
diff --git a/astrbot/core/utils/history_saver.py b/astrbot/core/utils/history_saver.py