Skip to content

Commit 814d7a3

Browse files
ndycodecodex
andcommitted
fix(matrix): update smoke harness for modern Codex CLI
Use non-interactive 'codex exec' invocation, drop deprecated args, and resolve Windows .cmd wrappers to node script entries to avoid shell arg flattening. Also tighten pass/fail classification and add wrapper-resolution tests. Co-authored-by: Codex <noreply@openai.com>
1 parent 14de812 commit 814d7a3

2 files changed

Lines changed: 86 additions & 19 deletions

File tree

scripts/test-model-matrix.js

Lines changed: 47 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { existsSync } from "node:fs";
1+
import { existsSync, readFileSync } from "node:fs";
22
import { readFile, writeFile, rm, mkdir } from "node:fs/promises";
33
import { spawnSync } from "node:child_process";
44
import { dirname, join, resolve } from "node:path";
@@ -13,16 +13,45 @@ const scenarioTemplates = {
1313
modern: join(repoRoot, "config", "codex-modern.json"),
1414
};
1515

16-
const defaultPromptPrefix = "Reply exactly:";
17-
const modelProviderId = "openai";
1816
const pluginPackageName = "codex-multi-auth";
1917
const DEFAULT_MATRIX_TIMEOUT_MS = 120000;
2018

19+
function resolveCmdScriptEntry(commandPath) {
20+
if (!/\.cmd$/i.test(commandPath)) {
21+
return null;
22+
}
23+
try {
24+
const raw = readFileSync(commandPath, "utf8");
25+
const match = raw.match(/"%dp0%\\([^"\r\n]+\.js)"/i);
26+
if (!match || typeof match[1] !== "string") {
27+
return null;
28+
}
29+
const relScriptPath = match[1].replace(/[\\/]+/g, "/");
30+
const scriptPath = resolve(dirname(commandPath), relScriptPath);
31+
return existsSync(scriptPath) ? scriptPath : null;
32+
} catch {
33+
return null;
34+
}
35+
}
36+
37+
function buildExecutable(command) {
38+
const scriptEntry = resolveCmdScriptEntry(command);
39+
if (scriptEntry) {
40+
return {
41+
command: process.execPath,
42+
shell: false,
43+
prefixArgs: [scriptEntry],
44+
displayCommand: command,
45+
};
46+
}
47+
return { command, shell: /\.cmd$/i.test(command) };
48+
}
49+
2150
export function resolveCodexExecutable() {
2251
const envOverride = process.env.CODEX_BIN;
2352
if (envOverride && envOverride.trim().length > 0) {
2453
const command = envOverride.trim();
25-
return { command, shell: /\.cmd$/i.test(command) };
54+
return buildExecutable(command);
2655
}
2756

2857
if (process.platform !== "win32") {
@@ -53,12 +82,12 @@ export function resolveCodexExecutable() {
5382
/npm\\Codex\.cmd$/i.test(candidate),
5483
);
5584
if (exactCmd) {
56-
return { command: exactCmd, shell: true };
85+
return buildExecutable(exactCmd);
5786
}
5887

5988
const anyCmd = candidates.find((candidate) => /\.cmd$/i.test(candidate));
6089
if (anyCmd) {
61-
return { command: anyCmd, shell: true };
90+
return buildExecutable(anyCmd);
6291
}
6392

6493
return { command: candidates[0], shell: false };
@@ -215,23 +244,24 @@ function enumerateCases(models, smoke, maxCases) {
215244
return selected;
216245
}
217246

218-
function executeModelCase(caseInfo, index, port) {
247+
function executeModelCase(caseInfo, index) {
219248
const token = `MODEL_MATRIX_OK_${index}`;
220-
const message = `${defaultPromptPrefix} ${token}`;
249+
const message = token;
221250
const args = [
222-
"run",
251+
"exec",
223252
message,
224253
"--model",
225-
`${modelProviderId}/${caseInfo.model}`,
226-
"--port",
227-
String(port),
254+
caseInfo.model,
255+
"--json",
256+
"--skip-git-repo-check",
228257
];
229258
if (caseInfo.variant) {
230-
args.push("--variant", caseInfo.variant);
259+
args.push("-c", `model_reasoning_effort="${caseInfo.variant}"`);
231260
}
232261

233262
const timeoutMs = resolveMatrixTimeoutMs();
234-
const finalized = spawnSync(CodexExecutable.command, args, {
263+
const commandArgs = [...(CodexExecutable.prefixArgs ?? []), ...args];
264+
const finalized = spawnSync(CodexExecutable.command, commandArgs, {
235265
cwd: repoRoot,
236266
encoding: "utf8",
237267
windowsHide: true,
@@ -258,9 +288,8 @@ function executeModelCase(caseInfo, index, port) {
258288

259289
const combinedOutput = `${finalized.stdout ?? ""}\n${finalized.stderr ?? ""}`.trim();
260290
const hasToken = combinedOutput.includes(token);
261-
const hasFatalError = /ProviderModelNotFoundError|Model not found/i.test(combinedOutput);
262291
const exitCode = finalized.status ?? 1;
263-
const ok = exitCode === 0 && hasToken && !hasFatalError;
292+
const ok = exitCode === 0 && hasToken;
264293

265294
return {
266295
...caseInfo,
@@ -346,7 +375,6 @@ async function runScenario(scenario, options) {
346375
const result = executeModelCase(
347376
caseInfo,
348377
i + 1,
349-
options.portStart + i,
350378
);
351379
results.push(result);
352380
const variantLabel = result.variant ? ` [variant=${result.variant}]` : "";
@@ -401,7 +429,7 @@ async function main() {
401429
console.log(`Scenarios: ${scenarios.join(", ")}`);
402430
console.log(`Mode: ${smoke ? "smoke" : "full"}`);
403431
console.log(`Plugin: ${pluginRef}`);
404-
console.log(`Codex command: ${CodexExecutable.command}`);
432+
console.log(`Codex command: ${CodexExecutable.displayCommand ?? CodexExecutable.command}`);
405433

406434
const backups = await backupLocalConfigs();
407435
const allResults = [];
@@ -437,7 +465,7 @@ async function main() {
437465
scenarios,
438466
mode: smoke ? "smoke" : "full",
439467
plugin: pluginRef,
440-
CodexCommand: CodexExecutable.command,
468+
CodexCommand: CodexExecutable.displayCommand ?? CodexExecutable.command,
441469
totals: {
442470
total: allResults.length,
443471
passed,

test/test-model-matrix-script.test.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
2+
import { tmpdir } from "node:os";
3+
import { dirname, join } from "node:path";
14
import { beforeEach, describe, expect, it, vi } from "vitest";
25

36
const spawnSync = vi.fn();
@@ -24,6 +27,42 @@ describe("test-model-matrix script helpers", () => {
2427
});
2528
});
2629

30+
it("resolves CODEX_BIN .cmd wrapper to node + script entry when available", async () => {
31+
const fixtureRoot = mkdtempSync(join(tmpdir(), "matrix-cmd-wrapper-"));
32+
try {
33+
const scriptPath = join(
34+
fixtureRoot,
35+
"node_modules",
36+
"codex-multi-auth",
37+
"scripts",
38+
"codex.js",
39+
);
40+
mkdirSync(dirname(scriptPath), { recursive: true });
41+
writeFileSync(scriptPath, "#!/usr/bin/env node\n", "utf8");
42+
43+
const cmdPath = join(fixtureRoot, "Codex.cmd");
44+
writeFileSync(
45+
cmdPath,
46+
[
47+
"@ECHO off",
48+
'endLocal & goto #_undefined_# 2>NUL || title %COMSPEC% & "%_prog%" "%dp0%\\node_modules\\codex-multi-auth\\scripts\\codex.js" %*',
49+
].join("\r\n"),
50+
"utf8",
51+
);
52+
vi.stubEnv("CODEX_BIN", cmdPath);
53+
54+
const mod = await import("../scripts/test-model-matrix.js");
55+
expect(mod.resolveCodexExecutable()).toEqual({
56+
command: process.execPath,
57+
shell: false,
58+
prefixArgs: [scriptPath],
59+
displayCommand: cmdPath,
60+
});
61+
} finally {
62+
rmSync(fixtureRoot, { recursive: true, force: true });
63+
}
64+
});
65+
2766
it("falls back to default timeout when CODEX_MATRIX_TIMEOUT_MS is invalid", async () => {
2867
vi.stubEnv("CODEX_MATRIX_TIMEOUT_MS", "abc");
2968
const mod = await import("../scripts/test-model-matrix.js");

0 commit comments

Comments
 (0)