@@ -173,6 +173,25 @@ def build_system_prompt(
173173If the user inputs another language, internally translate it, then reply IN ENGLISH ONLY.
174174VIOLATING THIS RULE IS A CRITICAL ERROR.
175175
176+ ## TOOL ERROR RULE — HARD STOP (NEVER TROUBLESHOOT ERRORS):
177+ When ANY tool returns an error, exception, or non-zero exit code:
178+ 1. STOP immediately. Do not attempt to fix, diagnose, or retry.
179+ 2. Say in ONE sentence: what you were doing and what failed.
180+ Example: "The audit tool hit an unexpected error and needs to be reported."
181+ 3. Then say: "Would you like to report this bug?"
182+ 4. Wait. Do nothing else. The user will decide.
183+ This tool is not designed to fix itself. Fail fast, report quickly.
184+
185+ ## RESPONSE STYLE RULE — CONVERSATIONAL PLAIN ENGLISH:
186+ Always respond in natural sentences, like a helpful colleague would.
187+ - NEVER dump raw tool output, JSON, tables of IDs, or code blocks in your reply.
188+ - Summarize what you found in 1-3 plain sentences.
189+ - If a command found issues: say how many and what kind.
190+ - If everything is fine: say so briefly.
191+ - Details go in the tool result panel; your words give the meaning.
192+ Example good: "Audit found 3 issues: LEDGER.md is missing and 2 requirements lack tests."
193+ Example bad: "The tool returned: [\u2717 ] LEDGER.md MISSING, [\u2717 ] REQ-001 uncovered..."
194+
176195You are an AEE-integrated specsmith agent for this project.
177196
178197## Project Governance
@@ -281,6 +300,7 @@ def __init__(
281300 self ._skills : list [Skill ] = load_skills (Path (self .project_dir ))
282301 self ._hooks = HookRegistry ()
283302 self ._system_prompt = ""
303+ self ._hard_stop : bool = False # set True when a critical tool crash is detected
284304
285305 # Execution profile — loaded from scaffold.yml at session start
286306 from specsmith import profiles
@@ -379,6 +399,76 @@ def _has_non_english(self, text: str) -> bool:
379399 hits = len (self ._NON_ASCII_BLOCKS .findall (text ))
380400 return hits > 5 and (hits / max (len (text ), 1 )) > 0.05
381401
402+ # ---- Critical error patterns that trigger a hard stop ----
403+ _CRITICAL_PATTERNS = re .compile (
404+ r"Traceback \(most recent call last\)"
405+ r"|\[ERROR\]"
406+ r"|UnicodeDecodeError"
407+ r"|UnicodeEncodeError"
408+ r"|ImportError"
409+ r"|ModuleNotFoundError"
410+ r"|AttributeError: '"
411+ r"|TypeError: unsupported"
412+ r"|PermissionError"
413+ r"|OSError: "
414+ r"|RuntimeError: " ,
415+ re .IGNORECASE ,
416+ )
417+
418+ @staticmethod
419+ def _is_critical_error (output : str ) -> bool :
420+ """Return True if tool output indicates an unexpected crash.
421+
422+ Normal governance failures (audit issues, missing files) are NOT
423+ critical — only Python exceptions and import errors are.
424+ """
425+ if not output :
426+ return False
427+ # Non-zero exit alone is expected (e.g. audit found issues).
428+ # Only flag when a Python exception signature is present.
429+ return AgentRunner ._CRITICAL_PATTERNS .search (output ) is not None
430+
431+ def _collect_diagnostics (self , tool_name : str , output : str ) -> dict :
432+ """Collect diagnostic context for a crash report."""
433+ import platform as _platform
434+ import sys as _sys
435+
436+ from specsmith import __version__ as _ver
437+
438+ project_type = ""
439+ try :
440+ import yaml as _yaml
441+
442+ sf = Path (self .project_dir ) / "scaffold.yml"
443+ if sf .exists ():
444+ raw = _yaml .safe_load (sf .read_text (encoding = "utf-8" )) or {}
445+ project_type = str (raw .get ("type" , "" ))
446+ except Exception : # noqa: BLE001
447+ pass
448+
449+ # Classify repo: Python exceptions from specsmith module → specsmith CLI
450+ # Extension/bridge errors would never reach here (they don’t use this runner)
451+ repo = "specsmith"
452+
453+ # Extract first meaningful error line for the summary
454+ summary = output .strip ().splitlines ()
455+ _err_pat = re .compile (r"\w+Error|Exception|RuntimeError" )
456+ summary_line = next (
457+ (ln .strip () for ln in reversed (summary ) if _err_pat .match (ln .strip ())),
458+ summary [0 ] if summary else "Unknown error" ,
459+ )[:200 ]
460+
461+ return {
462+ "tool" : tool_name ,
463+ "summary" : summary_line ,
464+ "detail" : output [:4000 ],
465+ "specsmith_version" : _ver ,
466+ "python_version" : _sys .version .split ()[0 ],
467+ "os_info" : f"{ _platform .system ()} { _platform .release ()} " ,
468+ "project_type" : project_type ,
469+ "repo" : repo ,
470+ }
471+
382472 def _agent_turn (self , user_input : str , silent : bool = False ) -> str :
383473 """Execute one user→agent turn with tool loop."""
384474 # Inject a lightweight English-only reminder into every user message.
@@ -443,9 +533,15 @@ def _agent_turn(self, user_input: str, silent: bool = False) -> str:
443533 break
444534
445535 # Process tool calls
536+ self ._hard_stop = False # reset before each batch
446537 tool_results = self ._execute_tool_calls (response .tool_calls , silent = silent )
447538 self ._state .tool_calls_made += len (tool_results )
448539
540+ # Fail fast: a critical tool crash was detected — break immediately
541+ # without sending the error back to the LLM (which would try to fix it).
542+ if self ._hard_stop :
543+ break
544+
449545 # Add assistant message with tool calls
450546 self ._state .messages .append (
451547 Message (
@@ -543,7 +639,12 @@ def _call_provider(self, messages: list[Message], silent: bool = False) -> Compl
543639 def _execute_tool_calls (
544640 self , tool_calls : list [dict [str , Any ]], silent : bool = False
545641 ) -> list [ToolResult ]:
546- """Execute tool calls and return results."""
642+ """Execute tool calls and return results.
643+
644+ Sets ``self._hard_stop = True`` if any tool produces a critical error
645+ (Python exception, import error, etc.) so the caller can break the
646+ agentic loop immediately without sending the error to the LLM.
647+ """
547648 from specsmith import profiles as _profiles
548649
549650 results : list [ToolResult ] = []
@@ -669,6 +770,22 @@ def _execute_tool_calls(
669770
670771 elapsed = time .time () * 1000 - start_ms
671772
773+ # ---- Fail-fast: detect critical errors -------------------------
774+ # A critical error is an unexpected crash (Python exception, import
775+ # failure, etc.) — NOT a normal governance failure (audit issues,
776+ # missing files) which the LLM should describe conversationally.
777+ if self ._is_critical_error (output ):
778+ self ._hard_stop = True
779+ diagnostics = self ._collect_diagnostics (name , output )
780+ if not silent and self ._json_events :
781+ self ._emit_event (type = "tool_crash" , ** diagnostics )
782+ elif not silent :
783+ self ._print (
784+ f"\n [CRITICAL ERROR in { name } ] "
785+ f"{ diagnostics ['summary' ]} \n "
786+ "Session stopped. Please report this bug."
787+ )
788+
672789 if not silent :
673790 if self ._json_events :
674791 self ._emit_event (type = "tool_finished" , name = name , result = output , is_error = error )
0 commit comments