Skip to content

Commit eefb9d4

Browse files
committed
test: guard codex active instruction drift
1 parent 1272689 commit eefb9d4

12 files changed

Lines changed: 30 additions & 12 deletions

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ The format follows Keep a Changelog, and marketplace/plugin versions follow Sema
1818
- Context7 MCP now pins `@upstash/context7-mcp@3.0.0`, matching the current
1919
npm stable package in `.mcp.json` and `config/mcp-runtime-versions.env`.
2020
- Shared MCP runtime pins now match current upstream stable packages:
21-
`serena-agent==1.5.3`, `chrome-devtools-mcp@1.1.0`, and `shadcn@4.8.1`.
21+
`serena-agent==1.5.3`, `chrome-devtools-mcp@1.1.1`, and `shadcn@4.8.1`.
2222

2323
### Fixed
2424

config/mcp-runtime-versions.env

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,6 @@ SERENA_AGENT_VERSION=1.5.3
1212
SEMGREP_VERSION=1.163.0
1313
SEQUENTIAL_THINKING_MCP_VERSION=2025.12.18
1414
PLAYWRIGHT_MCP_VERSION=0.0.75
15-
CHROME_DEVTOOLS_MCP_VERSION=1.1.0
15+
CHROME_DEVTOOLS_MCP_VERSION=1.1.1
1616
CONTEXT7_MCP_VERSION=3.0.0
1717
SHADCN_VERSION=4.8.1

plugins/rldyour-mcps/.mcp.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
"chrome-devtools": {
4646
"command": "bunx",
4747
"args": [
48-
"chrome-devtools-mcp@1.1.0",
48+
"chrome-devtools-mcp@1.1.1",
4949
"--headless",
5050
"--isolated",
5151
"--no-usage-statistics",

scripts/validate_instruction_docs.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,21 @@
2222
CODEX_DOC = "AGENTS.md"
2323
CLAUDE_DOC = ".claude/CLAUDE.md"
2424
LEGACY_CLAUDE_DOC = "CLAUDE.md"
25+
ACTIVE_DOCS = (
26+
CODEX_DOC,
27+
CLAUDE_DOC,
28+
"README.md",
29+
"docs/contract-matrix.md",
30+
"system/AGENTS.md",
31+
)
32+
FORBIDDEN_ACTIVE_CLAIMS = {
33+
"[features].plugin_hooks = true": "Codex 0.134 treats plugin_hooks as a removed feature flag",
34+
"features.plugin_hooks = true": "Codex 0.134 treats plugin_hooks as a removed feature flag",
35+
"active `hooks`, `plugin_hooks`, and `multi_agent`": "plugin hooks are verified through hooks/list, not an active feature flag",
36+
"active hooks, plugin_hooks, and multi_agent": "plugin hooks are verified through hooks/list, not an active feature flag",
37+
":danger-no-sandbox": "current Codex built-ins use :danger-full-access for the danger profile",
38+
"currently pinned at v1.15.4": "active current-pin wording must match the current OpenCode baseline",
39+
}
2540

2641

2742
def run_state(root: Path) -> dict[str, object]:
@@ -53,6 +68,9 @@ def validate_file_content(root: Path, relative: str, errors: list[str], warnings
5368
lines = text.splitlines()
5469
if SECRET_RE.search(text):
5570
errors.append(f"{relative}: contains secret-looking content")
71+
for needle, reason in FORBIDDEN_ACTIVE_CLAIMS.items():
72+
if needle in text:
73+
errors.append(f"{relative}: forbidden active claim {needle!r}: {reason}")
5674
if relative == CODEX_DOC:
5775
if len(lines) > 260:
5876
warnings.append(f"{relative}: {len(lines)} lines; keep Codex instructions compact")
@@ -103,7 +121,7 @@ def main() -> int:
103121
if (root / LEGACY_CLAUDE_DOC).is_file():
104122
errors.append(f"{LEGACY_CLAUDE_DOC}: use .claude/CLAUDE.md for Claude Code project memory")
105123

106-
for relative in (CODEX_DOC, CLAUDE_DOC):
124+
for relative in ACTIVE_DOCS:
107125
validate_file_content(root, relative, errors, warnings)
108126

109127
payload = {

system/agents/architecture-reviewer.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ mcp_servers.playwright.tool_timeout_sec = 180
1717

1818
mcp_servers.chrome-devtools.enabled = false
1919
mcp_servers.chrome-devtools.command = "bunx"
20-
mcp_servers.chrome-devtools.args = ["chrome-devtools-mcp@1.1.0", "--headless", "--isolated", "--no-usage-statistics", "--no-performance-crux"]
20+
mcp_servers.chrome-devtools.args = ["chrome-devtools-mcp@1.1.1", "--headless", "--isolated", "--no-usage-statistics", "--no-performance-crux"]
2121
mcp_servers.chrome-devtools.startup_timeout_sec = 90
2222
mcp_servers.chrome-devtools.tool_timeout_sec = 180
2323

system/agents/browser-tester.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ mcp_servers.playwright.tool_timeout_sec = 180
1717

1818
mcp_servers.chrome-devtools.enabled = false
1919
mcp_servers.chrome-devtools.command = "bunx"
20-
mcp_servers.chrome-devtools.args = ["chrome-devtools-mcp@1.1.0", "--headless", "--isolated", "--no-usage-statistics", "--no-performance-crux"]
20+
mcp_servers.chrome-devtools.args = ["chrome-devtools-mcp@1.1.1", "--headless", "--isolated", "--no-usage-statistics", "--no-performance-crux"]
2121
mcp_servers.chrome-devtools.startup_timeout_sec = 90
2222
mcp_servers.chrome-devtools.tool_timeout_sec = 180
2323

system/agents/consistency-reviewer.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ mcp_servers.playwright.tool_timeout_sec = 180
1717

1818
mcp_servers.chrome-devtools.enabled = false
1919
mcp_servers.chrome-devtools.command = "bunx"
20-
mcp_servers.chrome-devtools.args = ["chrome-devtools-mcp@1.1.0", "--headless", "--isolated", "--no-usage-statistics", "--no-performance-crux"]
20+
mcp_servers.chrome-devtools.args = ["chrome-devtools-mcp@1.1.1", "--headless", "--isolated", "--no-usage-statistics", "--no-performance-crux"]
2121
mcp_servers.chrome-devtools.startup_timeout_sec = 90
2222
mcp_servers.chrome-devtools.tool_timeout_sec = 180
2323

system/agents/quality-reviewer.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ mcp_servers.playwright.tool_timeout_sec = 180
1717

1818
mcp_servers.chrome-devtools.enabled = false
1919
mcp_servers.chrome-devtools.command = "bunx"
20-
mcp_servers.chrome-devtools.args = ["chrome-devtools-mcp@1.1.0", "--headless", "--isolated", "--no-usage-statistics", "--no-performance-crux"]
20+
mcp_servers.chrome-devtools.args = ["chrome-devtools-mcp@1.1.1", "--headless", "--isolated", "--no-usage-statistics", "--no-performance-crux"]
2121
mcp_servers.chrome-devtools.startup_timeout_sec = 90
2222
mcp_servers.chrome-devtools.tool_timeout_sec = 180
2323

system/agents/research-explorer.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ mcp_servers.playwright.tool_timeout_sec = 180
1818

1919
mcp_servers.chrome-devtools.enabled = false
2020
mcp_servers.chrome-devtools.command = "bunx"
21-
mcp_servers.chrome-devtools.args = ["chrome-devtools-mcp@1.1.0", "--headless", "--isolated", "--no-usage-statistics", "--no-performance-crux"]
21+
mcp_servers.chrome-devtools.args = ["chrome-devtools-mcp@1.1.1", "--headless", "--isolated", "--no-usage-statistics", "--no-performance-crux"]
2222
mcp_servers.chrome-devtools.startup_timeout_sec = 90
2323
mcp_servers.chrome-devtools.tool_timeout_sec = 180
2424

system/agents/security-audit.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ mcp_servers.playwright.tool_timeout_sec = 180
1717

1818
mcp_servers.chrome-devtools.enabled = false
1919
mcp_servers.chrome-devtools.command = "bunx"
20-
mcp_servers.chrome-devtools.args = ["chrome-devtools-mcp@1.1.0", "--headless", "--isolated", "--no-usage-statistics", "--no-performance-crux"]
20+
mcp_servers.chrome-devtools.args = ["chrome-devtools-mcp@1.1.1", "--headless", "--isolated", "--no-usage-statistics", "--no-performance-crux"]
2121
mcp_servers.chrome-devtools.startup_timeout_sec = 90
2222
mcp_servers.chrome-devtools.tool_timeout_sec = 180
2323

0 commit comments

Comments
 (0)