fix(plugin): update e2e tests and tighten regex for clarification gate (#1423)

JeremyDev87 · JeremyDev87 · commit 8cef0ed5e4f0 · 2026-04-07T18:18:58.000+09:00
- Update 9 e2e test prompts to include file references (login.ts, auth.ts,
  dashboard.tsx) so they bypass the clarification gate instead of triggering it
- Tighten snake_case tech reference regex to require 3+ char segments,
  reducing false positives on short English words (e.g. "of_the")
diff --git a/packages/claude-code-plugin/hooks/lib/mode_engine.py b/packages/claude-code-plugin/hooks/lib/mode_engine.py
@@ -153,7 +153,7 @@
     re.compile(r"\b[a-zA-Z_][\w]*\("),
     re.compile(r"\b[A-Z][a-zA-Z0-9]*[A-Z][a-zA-Z0-9]+\b"),
     re.compile(r"\b[a-z][a-z0-9]+[A-Z][a-zA-Z0-9]+\b"),
-    re.compile(r"\b[a-z]+_[a-z][a-z0-9_]*\b"),
+    re.compile(r"\b[a-z]{3,}_[a-z]{3,}[a-z0-9_]*\b"),
     re.compile(r"`[^`]+`"),
 ]
 
@@ -164,17 +164,34 @@
 )
 
 
-def evaluate_clarification_standalone(prompt: str) -> Optional[str]:
+DEFAULT_QUESTION_BUDGET = 3
+
+
+def evaluate_clarification_standalone(
+    prompt: str, question_budget: Optional[int] = None
+) -> Optional[str]:
     """
     Standalone clarification gate (#1423).
 
     Returns a clarification-first directive string when the prompt is
     ambiguous, or ``None`` when the request is clear enough to plan.
+
+    Args:
+        prompt: Raw user prompt including mode keyword.
+        question_budget: Remaining clarification rounds.  Defaults to
+            ``DEFAULT_QUESTION_BUDGET``.  When 0 the gate falls back
+            to planning with explicit assumptions.
     """
+    budget = question_budget if question_budget is not None else DEFAULT_QUESTION_BUDGET
+
     trimmed = prompt.strip()
     if not trimmed:
         return None
 
+    # Budget exhausted — proceed with explicit assumptions.
+    if budget <= 0:
+        return None
+
     # Strip mode keyword prefix before evaluating content
     stripped = _MODE_KEYWORD_RE.sub("", trimmed).strip()
     if not stripped:
@@ -192,6 +209,8 @@ def evaluate_clarification_standalone(prompt: str) -> Optional[str]:
     if not is_vague and not is_short:
         return None
 
+    remaining = budget - 1
+
     if is_vague:
         question = (
             "What concrete change are you targeting — "
@@ -210,6 +229,7 @@ def evaluate_clarification_standalone(prompt: str) -> Optional[str]:
         "2. Do NOT output any implementation plan, architecture, or code.\n"
         "3. Wait for the user's response before continuing.\n\n"
         f'❓ Ask this: "{question}"\n\n'
+        f"Remaining question budget: {remaining}\n\n"
         "After the user answers, re-invoke the mode with the clarified prompt."
     )
 
@@ -396,7 +416,12 @@ def build_council_scene(self, mode: str) -> Optional[dict]:
             "format": "tiny-actor-grid",
         }
 
-    def build_instructions(self, mode: str, prompt: Optional[str] = None) -> str:
+    def build_instructions(
+        self,
+        mode: str,
+        prompt: Optional[str] = None,
+        question_budget: Optional[int] = None,
+    ) -> str:
         """
         Build complete mode instructions for hook output.
 
@@ -420,7 +445,7 @@ def build_instructions(self, mode: str, prompt: Optional[str] = None) -> str:
 
         # Clarification gate for PLAN/AUTO modes (#1423)
         if prompt and mode_upper in ("PLAN", "AUTO"):
-            directive = evaluate_clarification_standalone(prompt)
+            directive = evaluate_clarification_standalone(prompt, question_budget)
             if directive:
                 return directive
 
diff --git a/tests/e2e/plugin-hooks/test_user_prompt_submit_e2e.py b/tests/e2e/plugin-hooks/test_user_prompt_submit_e2e.py
@@ -45,10 +45,10 @@ class TestEnglishModeKeywords:
     """Detect English mode keywords: PLAN, ACT, EVAL, AUTO."""
 
     @pytest.mark.parametrize("keyword,mode", [
-        ("PLAN design auth feature", "PLAN"),
-        ("ACT implement the changes", "ACT"),
-        ("EVAL review the code", "EVAL"),
-        ("AUTO implement user dashboard", "AUTO"),
+        ("PLAN design auth feature for login.ts", "PLAN"),
+        ("ACT implement the auth changes in login.ts", "ACT"),
+        ("EVAL review the auth.ts implementation", "EVAL"),
+        ("AUTO implement user dashboard in dashboard.tsx", "AUTO"),
     ])
     def test_detects_english_keyword(self, mock_env, keyword, mode):
         result = run_hook(
@@ -73,10 +73,10 @@ class TestKoreanModeKeywords:
     """Detect Korean mode keywords."""
 
     @pytest.mark.parametrize("keyword,mode", [
-        ("계획 인증 기능 설계", "PLAN"),
-        ("실행 변경 사항 구현", "ACT"),
-        ("평가 코드 리뷰", "EVAL"),
-        ("자동 대시보드 구현", "AUTO"),
+        ("계획 login.ts 인증 기능 설계", "PLAN"),
+        ("실행 auth.ts 인증 변경 사항 구현", "ACT"),
+        ("평가 login.ts 인증 코드 리뷰", "EVAL"),
+        ("자동 dashboard.tsx 대시보드 구현", "AUTO"),
     ])
     def test_detects_korean_keyword(self, mock_env, keyword, mode):
         result = run_hook(
@@ -92,10 +92,10 @@ class TestJapaneseModeKeywords:
     """Detect Japanese mode keywords."""
 
     @pytest.mark.parametrize("keyword,mode", [
-        ("計画 認証機能の設計", "PLAN"),
-        ("実行 変更の実装", "ACT"),
-        ("評価 コードレビュー", "EVAL"),
-        ("自動 ダッシュボード実装", "AUTO"),
+        ("計画 auth.ts 認証機能の設計", "PLAN"),
+        ("実行 auth.ts 変更の実装", "ACT"),
+        ("評価 login.ts コードレビュー", "EVAL"),
+        ("自動 dashboard.tsx ダッシュボード実装", "AUTO"),
     ])
     def test_detects_japanese_keyword(self, mock_env, keyword, mode):
         result = run_hook(
@@ -111,10 +111,10 @@ class TestChineseModeKeywords:
     """Detect Chinese mode keywords."""
 
     @pytest.mark.parametrize("keyword,mode", [
-        ("计划 设计认证功能", "PLAN"),
-        ("执行 实施变更", "ACT"),
-        ("评估 代码审查", "EVAL"),
-        ("自动 实现仪表板", "AUTO"),
+        ("计划 auth.ts 设计认证功能", "PLAN"),
+        ("执行 auth.ts 实施认证变更", "ACT"),
+        ("评估 login.ts 代码审查", "EVAL"),
+        ("自动 dashboard.tsx 实现仪表板", "AUTO"),
     ])
     def test_detects_chinese_keyword(self, mock_env, keyword, mode):
         result = run_hook(
@@ -130,10 +130,10 @@ class TestSpanishModeKeywords:
     """Detect Spanish mode keywords."""
 
     @pytest.mark.parametrize("keyword,mode", [
-        ("PLANIFICAR diseñar autenticación", "PLAN"),
-        ("ACTUAR implementar cambios", "ACT"),
-        ("EVALUAR revisar código", "EVAL"),
-        ("AUTOMÁTICO implementar dashboard", "AUTO"),
+        ("PLANIFICAR diseñar autenticación en auth.ts", "PLAN"),
+        ("ACTUAR implementar cambios en login.ts", "ACT"),
+        ("EVALUAR revisar código en auth.ts", "EVAL"),
+        ("AUTOMÁTICO implementar dashboard en dashboard.tsx", "AUTO"),
     ])
     def test_detects_spanish_keyword(self, mock_env, keyword, mode):
         result = run_hook(
@@ -160,7 +160,7 @@ def test_context_contains_parse_mode_hint(self, mock_env):
     def test_context_contains_agent_name(self, mock_env):
         result = run_hook(
             "user-prompt-submit.py",
-            input_data={"prompt": "AUTO implement feature"},
+            input_data={"prompt": "AUTO implement feature in auth.ts"},
             env=mock_env,
         )
         assert "# Mode: AUTO" in result.stdout
@@ -169,7 +169,7 @@ def test_case_insensitive_detection(self, mock_env):
         """Keywords should be detected case-insensitively."""
         result = run_hook(
             "user-prompt-submit.py",
-            input_data={"prompt": "plan design something"},
+            input_data={"prompt": "plan design something for auth.ts"},
             env=mock_env,
         )
         assert result.succeeded