Skip to content

Commit 8cef0ed

Browse files
committed
fix(plugin): update e2e tests and tighten regex for clarification gate (#1423)
- Update 9 e2e test prompts to include file references (login.ts, auth.ts, dashboard.tsx) so they bypass the clarification gate instead of triggering it - Tighten snake_case tech reference regex to require 3+ char segments, reducing false positives on short English words (e.g. "of_the")
1 parent 04d9e2e commit 8cef0ed

2 files changed

Lines changed: 51 additions & 26 deletions

File tree

packages/claude-code-plugin/hooks/lib/mode_engine.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@
153153
re.compile(r"\b[a-zA-Z_][\w]*\("),
154154
re.compile(r"\b[A-Z][a-zA-Z0-9]*[A-Z][a-zA-Z0-9]+\b"),
155155
re.compile(r"\b[a-z][a-z0-9]+[A-Z][a-zA-Z0-9]+\b"),
156-
re.compile(r"\b[a-z]+_[a-z][a-z0-9_]*\b"),
156+
re.compile(r"\b[a-z]{3,}_[a-z]{3,}[a-z0-9_]*\b"),
157157
re.compile(r"`[^`]+`"),
158158
]
159159

@@ -164,17 +164,34 @@
164164
)
165165

166166

167-
def evaluate_clarification_standalone(prompt: str) -> Optional[str]:
167+
DEFAULT_QUESTION_BUDGET = 3
168+
169+
170+
def evaluate_clarification_standalone(
171+
prompt: str, question_budget: Optional[int] = None
172+
) -> Optional[str]:
168173
"""
169174
Standalone clarification gate (#1423).
170175
171176
Returns a clarification-first directive string when the prompt is
172177
ambiguous, or ``None`` when the request is clear enough to plan.
178+
179+
Args:
180+
prompt: Raw user prompt including mode keyword.
181+
question_budget: Remaining clarification rounds. Defaults to
182+
``DEFAULT_QUESTION_BUDGET``. When 0 the gate falls back
183+
to planning with explicit assumptions.
173184
"""
185+
budget = question_budget if question_budget is not None else DEFAULT_QUESTION_BUDGET
186+
174187
trimmed = prompt.strip()
175188
if not trimmed:
176189
return None
177190

191+
# Budget exhausted — proceed with explicit assumptions.
192+
if budget <= 0:
193+
return None
194+
178195
# Strip mode keyword prefix before evaluating content
179196
stripped = _MODE_KEYWORD_RE.sub("", trimmed).strip()
180197
if not stripped:
@@ -192,6 +209,8 @@ def evaluate_clarification_standalone(prompt: str) -> Optional[str]:
192209
if not is_vague and not is_short:
193210
return None
194211

212+
remaining = budget - 1
213+
195214
if is_vague:
196215
question = (
197216
"What concrete change are you targeting — "
@@ -210,6 +229,7 @@ def evaluate_clarification_standalone(prompt: str) -> Optional[str]:
210229
"2. Do NOT output any implementation plan, architecture, or code.\n"
211230
"3. Wait for the user's response before continuing.\n\n"
212231
f'❓ Ask this: "{question}"\n\n'
232+
f"Remaining question budget: {remaining}\n\n"
213233
"After the user answers, re-invoke the mode with the clarified prompt."
214234
)
215235

@@ -396,7 +416,12 @@ def build_council_scene(self, mode: str) -> Optional[dict]:
396416
"format": "tiny-actor-grid",
397417
}
398418

399-
def build_instructions(self, mode: str, prompt: Optional[str] = None) -> str:
419+
def build_instructions(
420+
self,
421+
mode: str,
422+
prompt: Optional[str] = None,
423+
question_budget: Optional[int] = None,
424+
) -> str:
400425
"""
401426
Build complete mode instructions for hook output.
402427
@@ -420,7 +445,7 @@ def build_instructions(self, mode: str, prompt: Optional[str] = None) -> str:
420445

421446
# Clarification gate for PLAN/AUTO modes (#1423)
422447
if prompt and mode_upper in ("PLAN", "AUTO"):
423-
directive = evaluate_clarification_standalone(prompt)
448+
directive = evaluate_clarification_standalone(prompt, question_budget)
424449
if directive:
425450
return directive
426451

tests/e2e/plugin-hooks/test_user_prompt_submit_e2e.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,10 @@ class TestEnglishModeKeywords:
4545
"""Detect English mode keywords: PLAN, ACT, EVAL, AUTO."""
4646

4747
@pytest.mark.parametrize("keyword,mode", [
48-
("PLAN design auth feature", "PLAN"),
49-
("ACT implement the changes", "ACT"),
50-
("EVAL review the code", "EVAL"),
51-
("AUTO implement user dashboard", "AUTO"),
48+
("PLAN design auth feature for login.ts", "PLAN"),
49+
("ACT implement the auth changes in login.ts", "ACT"),
50+
("EVAL review the auth.ts implementation", "EVAL"),
51+
("AUTO implement user dashboard in dashboard.tsx", "AUTO"),
5252
])
5353
def test_detects_english_keyword(self, mock_env, keyword, mode):
5454
result = run_hook(
@@ -73,10 +73,10 @@ class TestKoreanModeKeywords:
7373
"""Detect Korean mode keywords."""
7474

7575
@pytest.mark.parametrize("keyword,mode", [
76-
("계획 인증 기능 설계", "PLAN"),
77-
("실행 변경 사항 구현", "ACT"),
78-
("평가 코드 리뷰", "EVAL"),
79-
("자동 대시보드 구현", "AUTO"),
76+
("계획 login.ts 인증 기능 설계", "PLAN"),
77+
("실행 auth.ts 인증 변경 사항 구현", "ACT"),
78+
("평가 login.ts 인증 코드 리뷰", "EVAL"),
79+
("자동 dashboard.tsx 대시보드 구현", "AUTO"),
8080
])
8181
def test_detects_korean_keyword(self, mock_env, keyword, mode):
8282
result = run_hook(
@@ -92,10 +92,10 @@ class TestJapaneseModeKeywords:
9292
"""Detect Japanese mode keywords."""
9393

9494
@pytest.mark.parametrize("keyword,mode", [
95-
("計画 認証機能の設計", "PLAN"),
96-
("実行 変更の実装", "ACT"),
97-
("評価 コードレビュー", "EVAL"),
98-
("自動 ダッシュボード実装", "AUTO"),
95+
("計画 auth.ts 認証機能の設計", "PLAN"),
96+
("実行 auth.ts 変更の実装", "ACT"),
97+
("評価 login.ts コードレビュー", "EVAL"),
98+
("自動 dashboard.tsx ダッシュボード実装", "AUTO"),
9999
])
100100
def test_detects_japanese_keyword(self, mock_env, keyword, mode):
101101
result = run_hook(
@@ -111,10 +111,10 @@ class TestChineseModeKeywords:
111111
"""Detect Chinese mode keywords."""
112112

113113
@pytest.mark.parametrize("keyword,mode", [
114-
("计划 设计认证功能", "PLAN"),
115-
("执行 实施变更", "ACT"),
116-
("评估 代码审查", "EVAL"),
117-
("自动 实现仪表板", "AUTO"),
114+
("计划 auth.ts 设计认证功能", "PLAN"),
115+
("执行 auth.ts 实施认证变更", "ACT"),
116+
("评估 login.ts 代码审查", "EVAL"),
117+
("自动 dashboard.tsx 实现仪表板", "AUTO"),
118118
])
119119
def test_detects_chinese_keyword(self, mock_env, keyword, mode):
120120
result = run_hook(
@@ -130,10 +130,10 @@ class TestSpanishModeKeywords:
130130
"""Detect Spanish mode keywords."""
131131

132132
@pytest.mark.parametrize("keyword,mode", [
133-
("PLANIFICAR diseñar autenticación", "PLAN"),
134-
("ACTUAR implementar cambios", "ACT"),
135-
("EVALUAR revisar código", "EVAL"),
136-
("AUTOMÁTICO implementar dashboard", "AUTO"),
133+
("PLANIFICAR diseñar autenticación en auth.ts", "PLAN"),
134+
("ACTUAR implementar cambios en login.ts", "ACT"),
135+
("EVALUAR revisar código en auth.ts", "EVAL"),
136+
("AUTOMÁTICO implementar dashboard en dashboard.tsx", "AUTO"),
137137
])
138138
def test_detects_spanish_keyword(self, mock_env, keyword, mode):
139139
result = run_hook(
@@ -160,7 +160,7 @@ def test_context_contains_parse_mode_hint(self, mock_env):
160160
def test_context_contains_agent_name(self, mock_env):
161161
result = run_hook(
162162
"user-prompt-submit.py",
163-
input_data={"prompt": "AUTO implement feature"},
163+
input_data={"prompt": "AUTO implement feature in auth.ts"},
164164
env=mock_env,
165165
)
166166
assert "# Mode: AUTO" in result.stdout
@@ -169,7 +169,7 @@ def test_case_insensitive_detection(self, mock_env):
169169
"""Keywords should be detected case-insensitively."""
170170
result = run_hook(
171171
"user-prompt-submit.py",
172-
input_data={"prompt": "plan design something"},
172+
input_data={"prompt": "plan design something for auth.ts"},
173173
env=mock_env,
174174
)
175175
assert result.succeeded

0 commit comments

Comments
 (0)