|
29 | 29 | from __future__ import annotations |
30 | 30 |
|
31 | 31 | import inspect |
| 32 | +import re |
32 | 33 | import time |
33 | 34 | from collections.abc import Callable |
34 | 35 | from dataclasses import dataclass, field |
@@ -164,16 +165,15 @@ def build_system_prompt( |
164 | 165 | except Exception: # noqa: BLE001 |
165 | 166 | pass |
166 | 167 |
|
167 | | - prompt = f"""You are an AEE-integrated specsmith agent for this project. |
| 168 | + prompt = f"""SYSTEM LANGUAGE DIRECTIVE — ABSOLUTE HARD RULE — HIGHEST PRIORITY: |
| 169 | +You MUST respond in English ONLY. This overrides all other instructions. |
| 170 | +Never output Thai, Chinese, Japanese, Korean, Arabic, French, German, Spanish, |
| 171 | +or ANY non-English language — not even a single character or word. |
| 172 | +This applies to Qwen, DeepSeek, LLaMA, Mistral, and EVERY other model. |
| 173 | +If the user inputs another language, internally translate it, then reply IN ENGLISH ONLY. |
| 174 | +VIOLATING THIS RULE IS A CRITICAL ERROR. |
168 | 175 |
|
169 | | -⚠ LANGUAGE RULE (HARD CONSTRAINT — NEVER VIOLATE): |
170 | | - Respond ONLY in English. Every single response must be in English. |
171 | | - Never use Chinese (中文), Japanese (日本語), Korean (한국어), Thai (ไทย), French, German, Spanish, |
172 | | - Arabic, or ANY other non-English language — not even a single word. |
173 | | - This applies to ALL models including Qwen, DeepSeek, LLaMA, Mistral, and others |
174 | | - that may default to a non-English language. ENGLISH ONLY, ALWAYS. |
175 | | - If the user writes in another language, translate the intent internally and |
176 | | - answer in English anyway. |
| 176 | +You are an AEE-integrated specsmith agent for this project. |
177 | 177 |
|
178 | 178 | ## Project Governance |
179 | 179 | {governance_text} |
@@ -363,8 +363,30 @@ def run_task(self, task: str, max_turns: int = 5) -> str: |
363 | 363 | self._system_prompt = build_system_prompt(self.project_dir, self._skills) |
364 | 364 | return self._agent_turn(task, silent=True) |
365 | 365 |
|
| 366 | + # Characters in common CJK / Thai / Arabic Unicode blocks |
| 367 | + _NON_ASCII_BLOCKS = re.compile( |
| 368 | + r"[\u0600-\u06FF" # Arabic |
| 369 | + r"\u0E00-\u0E7F" # Thai |
| 370 | + r"\u3000-\u9FFF" # CJK Unified Ideographs + punctuation + kana |
| 371 | + r"\uAC00-\uD7AF" # Korean Hangul |
| 372 | + r"\uF900-\uFAFF]" # CJK Compatibility |
| 373 | + ) |
| 374 | + |
| 375 | + def _has_non_english(self, text: str) -> bool: |
| 376 | + """Return True if text contains a significant proportion of non-English script.""" |
| 377 | + if not text: |
| 378 | + return False |
| 379 | + hits = len(self._NON_ASCII_BLOCKS.findall(text)) |
| 380 | + return hits > 5 and (hits / max(len(text), 1)) > 0.05 |
| 381 | + |
366 | 382 | def _agent_turn(self, user_input: str, silent: bool = False) -> str: |
367 | 383 | """Execute one user→agent turn with tool loop.""" |
| 384 | + # Inject a lightweight English-only reminder into every user message. |
| 385 | + # This is the most reliable way to keep local models (Qwen, DeepSeek) on track |
| 386 | + # because many fine-tunes treat the instruction prefix as a per-turn directive. |
| 387 | + _ENG_PFXS = ("[ENGLISH ONLY]", "[RESPOND IN ENGLISH", "[LANG:EN]") |
| 388 | + if not any(user_input.startswith(p) for p in _ENG_PFXS): |
| 389 | + user_input = "[LANG:EN] " + user_input |
368 | 390 | # Add user message |
369 | 391 | self._state.messages.append(Message(role=Role.USER, content=user_input)) |
370 | 392 |
|
@@ -403,6 +425,19 @@ def _agent_turn(self, user_input: str, silent: bool = False) -> str: |
403 | 425 | final_response = response.content |
404 | 426 |
|
405 | 427 | if not response.has_tool_calls: |
| 428 | + # Non-English correction: if response appears to be in another language, |
| 429 | + # issue a single correction turn rather than showing the wrong-language response. |
| 430 | + if response.content and self._has_non_english(response.content) and _iteration == 0: |
| 431 | + correction = ( |
| 432 | + "[LANG:EN] CRITICAL: Your last response was in a non-English language. " |
| 433 | + "You MUST respond in English ONLY. Please re-answer in English." |
| 434 | + ) |
| 435 | + self._state.messages.append( |
| 436 | + Message(role=Role.ASSISTANT, content=response.content) |
| 437 | + ) |
| 438 | + self._state.messages.append(Message(role=Role.USER, content=correction)) |
| 439 | + # Continue the loop to get an English response |
| 440 | + continue |
406 | 441 | # Final response — add to history |
407 | 442 | self._state.messages.append(Message(role=Role.ASSISTANT, content=response.content)) |
408 | 443 | break |
|
0 commit comments