Skip to content

Commit 1734334

Browse files
tbitcsoz-agent
andcommitted
fix: suppress non-English LLM responses before they reach the UI
Root cause: _call_provider emitted the llm_chunk JSON event immediately after provider.complete() returned, before _agent_turn could run the _has_non_english() check. Thai/Chinese responses were therefore sent to the VS Code extension and displayed even when the correction turn later produced an English reply. Changes: - Add defer_emit: bool = False param to _call_provider. When True, skip the llm_chunk emission so the caller controls when content is surfaced. - _agent_turn: set _defer_emit = not silent and self._json_events (only applies in the VS Code bridge mode; interactive REPL unaffected). - Emit llm_chunk from _agent_turn AFTER _has_non_english() passes (final text responses) or after confirming content is English (tool-call partial text). Non-English partial text on tool-call responses is silently dropped instead of displayed. - Remove the _iteration == 0 guard on the correction turn — non-English responses on any iteration are now caught and re-prompted, not just the first one. Co-Authored-By: Oz <oz-agent@warp.dev>
1 parent e4d96f2 commit 1734334

1 file changed

Lines changed: 35 additions & 7 deletions

File tree

src/specsmith/agent/runner.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -480,14 +480,21 @@ def _agent_turn(self, user_input: str, silent: bool = False) -> str:
480480
# Add user message
481481
self._state.messages.append(Message(role=Role.USER, content=user_input))
482482

483+
# In json_events mode we defer the llm_chunk emission so we can run the
484+
# language check BEFORE the UI sees the text. Without this, a Thai/Chinese
485+
# response would be displayed even when the correction turn later succeeds.
486+
_defer_emit = not silent and self._json_events
487+
483488
final_response = ""
484489
for _iteration in range(self._max_iterations):
485490
messages_with_system = [
486491
Message(role=Role.SYSTEM, content=self._system_prompt)
487492
] + self._state.messages
488493

489494
try:
490-
response = self._call_provider(messages_with_system, silent=silent)
495+
response = self._call_provider(
496+
messages_with_system, silent=silent, defer_emit=_defer_emit
497+
)
491498
except Exception as e: # noqa: BLE001
492499
error_msg = f"[Provider error] {e}"
493500
if not silent:
@@ -515,9 +522,12 @@ def _agent_turn(self, user_input: str, silent: bool = False) -> str:
515522
final_response = response.content
516523

517524
if not response.has_tool_calls:
518-
# Non-English correction: if response appears to be in another language,
519-
# issue a single correction turn rather than showing the wrong-language response.
520-
if response.content and self._has_non_english(response.content) and _iteration == 0:
525+
# Non-English correction: if the final response is in a non-English
526+
# language, silently issue a correction turn instead of surfacing the
527+
# bad text. _defer_emit ensures the UI never sees the Thai/Chinese
528+
# content — we only emit llm_chunk once we have an English reply.
529+
# The iteration limit acts as a natural loop-break (max_iterations).
530+
if response.content and self._has_non_english(response.content):
521531
correction = (
522532
"[LANG:EN] CRITICAL: Your last response was in a non-English language. "
523533
"You MUST respond in English ONLY. Please re-answer in English."
@@ -526,12 +536,22 @@ def _agent_turn(self, user_input: str, silent: bool = False) -> str:
526536
Message(role=Role.ASSISTANT, content=response.content)
527537
)
528538
self._state.messages.append(Message(role=Role.USER, content=correction))
529-
# Continue the loop to get an English response
539+
# Continue the loop to get an English response (don't emit bad content)
530540
continue
541+
# Language OK — emit the deferred llm_chunk now
542+
if _defer_emit and response.content:
543+
self._emit_event(type="llm_chunk", text=response.content)
531544
# Final response — add to history
532545
self._state.messages.append(Message(role=Role.ASSISTANT, content=response.content))
533546
break
534547

548+
# Has tool calls — emit any deferred partial text (planning/thinking text
549+
# that precedes the tool invocation), but only if it is in English.
550+
# Some Qwen/DeepSeek variants emit Thai/Chinese thinking text before tool
551+
# calls; silently drop that rather than surfacing it to the UI.
552+
if _defer_emit and response.content and not self._has_non_english(response.content):
553+
self._emit_event(type="llm_chunk", text=response.content)
554+
535555
# Process tool calls
536556
self._hard_stop = False # reset before each batch
537557
tool_results = self._execute_tool_calls(response.tool_calls, silent=silent)
@@ -571,12 +591,20 @@ def _agent_turn(self, user_input: str, silent: bool = False) -> str:
571591

572592
return final_response
573593

574-
def _call_provider(self, messages: list[Message], silent: bool = False) -> CompletionResponse:
594+
def _call_provider(
595+
self,
596+
messages: list[Message],
597+
silent: bool = False,
598+
defer_emit: bool = False,
599+
) -> CompletionResponse:
575600
"""Call the LLM provider with optimization engine pre/post hooks.
576601
577602
Streaming is disabled when tools are registered (streaming drops tool_call blocks).
578603
When an OptimizationEngine is active, pre_call() may return a cache hit
579604
or transform messages/model/tools before the actual provider call.
605+
606+
``defer_emit=True`` suppresses the ``llm_chunk`` event so the caller can
607+
validate the response (e.g. language check) before surfacing it to the UI.
580608
"""
581609
provider: Any = self._provider
582610
tools = self._tools
@@ -613,7 +641,7 @@ def _call_provider(self, messages: list[Message], silent: bool = False) -> Compl
613641
response = CompletionResponse(content=accumulated, model=str(provider.model))
614642
else:
615643
response = cast(CompletionResponse, provider.complete(messages, tools=tools))
616-
if not silent and response.content:
644+
if not silent and not defer_emit and response.content:
617645
if self._json_events:
618646
self._emit_event(type="llm_chunk", text=response.content)
619647
else:

0 commit comments

Comments
 (0)