Skip to content

Commit 111f7bf

Browse files
egavrindevagent
andcommitted
fix(validation): include anthropic smoke coverage
- Add Anthropic to the provider smoke matrix with explicit blocked reporting - Refactor the helper so script lint stays quiet after the coverage change Co-Authored-By: devagent <devagent@egavrin>
1 parent 751bbaa commit 111f7bf

1 file changed

Lines changed: 175 additions & 87 deletions

File tree

scripts/live-validation/provider-smoke.ts

Lines changed: 175 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
11
#!/usr/bin/env bun
22

3+
import { PROTOCOL_VERSION, type TaskExecutionRequest } from "@devagent-sdk/types";
34
import { spawn } from "node:child_process";
45
import { mkdir, mkdtemp, writeFile } from "node:fs/promises";
56
import { tmpdir } from "node:os";
67
import { dirname, join } from "node:path";
78
import { fileURLToPath } from "node:url";
8-
import { PROTOCOL_VERSION, type TaskExecutionRequest } from "@devagent-sdk/types";
9+
910
import {
1011
CredentialStore,
1112
getProviderCredentialDescriptor,
1213
type CredentialInfo,
1314
} from "../../packages/runtime/src/index.ts";
1415

1516
type ProviderId =
17+
| "anthropic"
1618
| "devagent-api"
1719
| "openai"
1820
| "openrouter"
@@ -58,6 +60,22 @@ interface OllamaModelSelection {
5860
readonly blockedReason?: string;
5961
}
6062

63+
interface ProviderCheckContext {
64+
readonly provider: ProviderId;
65+
readonly model: string;
66+
readonly providerDir: string;
67+
readonly workDir: string;
68+
readonly env: NodeJS.ProcessEnv;
69+
readonly command: ReturnType<typeof getDevagentCommand>;
70+
readonly checks: ProviderSmokeCheck[];
71+
}
72+
73+
type ProviderCheckRecorder = (
74+
label: string,
75+
args: string[],
76+
timeoutMs: number,
77+
) => Promise<void>;
78+
6179
function extractStatusCode(output: string): number | null {
6280
const match = output.match(/statusCode:\s*(\d{3})/);
6381
return match ? Number.parseInt(match[1]!, 10) : null;
@@ -67,7 +85,7 @@ function extractUrl(output: string): string | null {
6785
const quoted = output.match(/url:\s*'([^']+)'/);
6886
if (quoted) return quoted[1]!;
6987
const plain = output.match(/https?:\/\/[^\s'"]+/);
70-
return plain ? plain[0]! : null;
88+
return plain?.[0] ?? null;
7189
}
7290

7391
export function classifyProviderFailure(result: CommandResult): Pick<ProviderSmokeCheck, "status" | "blockedReason"> {
@@ -101,6 +119,7 @@ export function classifyProviderFailure(result: CommandResult): Pick<ProviderSmo
101119
}
102120

103121
const DEFAULT_MODELS: Readonly<Record<Exclude<ProviderId, "ollama">, string>> = {
122+
anthropic: "claude-sonnet-4-20250514",
104123
"devagent-api": "cortex",
105124
openai: "gpt-5.4-mini",
106125
openrouter: "openai/gpt-4o-mini",
@@ -287,7 +306,49 @@ async function writeSmokeArtifacts(
287306
return { stdoutPath, stderrPath };
288307
}
289308

290-
function buildExecuteRequest(repoRoot: string, provider: ProviderId, model: string): TaskExecutionRequest {
309+
function buildRepository(repoRoot: string): TaskExecutionRequest["repositories"][number] {
310+
return {
311+
id: "repo-1",
312+
workspaceId: "workspace-1",
313+
alias: "primary",
314+
name: "repo",
315+
repoRoot,
316+
repoFullName: "local/provider-smoke",
317+
defaultBranch: "main",
318+
provider: "local",
319+
};
320+
}
321+
322+
function buildExecution(repoRoot: string): TaskExecutionRequest["execution"] {
323+
return {
324+
primaryRepositoryId: "repo-1",
325+
repositories: [{
326+
repositoryId: "repo-1",
327+
alias: "primary",
328+
sourceRepoPath: repoRoot,
329+
baseRef: "main",
330+
workBranch: "devagent/provider-smoke",
331+
isolation: "temp-copy",
332+
}],
333+
};
334+
}
335+
336+
function buildCapabilities(): TaskExecutionRequest["capabilities"] {
337+
return {
338+
canSyncTasks: true,
339+
canCreateTask: true,
340+
canComment: true,
341+
canReview: true,
342+
canMerge: true,
343+
canOpenReviewable: true,
344+
};
345+
}
346+
347+
function buildExecuteRequest(
348+
repoRoot: string,
349+
provider: ProviderId,
350+
model: string,
351+
): TaskExecutionRequest {
291352
return {
292353
protocolVersion: PROTOCOL_VERSION,
293354
taskId: `provider-smoke-${provider}`,
@@ -298,34 +359,15 @@ function buildExecuteRequest(repoRoot: string, provider: ProviderId, model: stri
298359
provider: "local",
299360
primaryRepositoryId: "repo-1",
300361
},
301-
repositories: [{
302-
id: "repo-1",
303-
workspaceId: "workspace-1",
304-
alias: "primary",
305-
name: "repo",
306-
repoRoot,
307-
repoFullName: "local/provider-smoke",
308-
defaultBranch: "main",
309-
provider: "local",
310-
}],
362+
repositories: [buildRepository(repoRoot)],
311363
workItem: {
312364
id: "item-1",
313365
kind: "local-task",
314366
externalId: "provider-smoke",
315367
title: "Create a tiny plan",
316368
repositoryId: "repo-1",
317369
},
318-
execution: {
319-
primaryRepositoryId: "repo-1",
320-
repositories: [{
321-
repositoryId: "repo-1",
322-
alias: "primary",
323-
sourceRepoPath: repoRoot,
324-
baseRef: "main",
325-
workBranch: "devagent/provider-smoke",
326-
isolation: "temp-copy",
327-
}],
328-
},
370+
execution: buildExecution(repoRoot),
329371
targetRepositoryIds: ["repo-1"],
330372
executor: {
331373
executorId: "devagent",
@@ -339,14 +381,7 @@ function buildExecuteRequest(repoRoot: string, provider: ProviderId, model: stri
339381
timeoutSec: 120,
340382
allowNetwork: true,
341383
},
342-
capabilities: {
343-
canSyncTasks: true,
344-
canCreateTask: true,
345-
canComment: true,
346-
canReview: true,
347-
canMerge: true,
348-
canOpenReviewable: true,
349-
},
384+
capabilities: buildCapabilities(),
350385
context: {
351386
summary: "Return a short plan only.",
352387
issueBody: "Do not modify files.",
@@ -356,49 +391,41 @@ function buildExecuteRequest(repoRoot: string, provider: ProviderId, model: stri
356391
};
357392
}
358393

359-
async function runProviderChecks(
360-
devagentRoot: string,
361-
outputRoot: string,
394+
function createBlockedProviderReport(
362395
provider: ProviderId,
363396
model: string,
364-
): Promise<ProviderSmokeReport> {
365-
const providerDir = join(outputRoot, provider);
366-
await mkdir(providerDir, { recursive: true });
367-
const storedCredential = loadStoredCredentials()[provider];
368-
const blockedReason = providerBlockedReason(provider, storedCredential);
369-
if (blockedReason) {
370-
return {
371-
provider,
372-
model,
397+
blockedReason: string,
398+
): ProviderSmokeReport {
399+
return {
400+
provider,
401+
model,
402+
status: "blocked",
403+
checks: [{
404+
label: "credential",
405+
command: "auth status",
406+
durationMs: 0,
373407
status: "blocked",
374-
checks: [{
375-
label: "credential",
376-
command: "auth status",
377-
durationMs: 0,
378-
status: "blocked",
379-
blockedReason,
380-
}],
381-
};
382-
}
383-
384-
const env = await createIsolatedEnv(outputRoot, provider);
385-
const command = getDevagentCommand(devagentRoot);
386-
const workDir = join(outputRoot, provider, "workspace");
387-
const checks: ProviderSmokeCheck[] = [];
408+
blockedReason,
409+
}],
410+
};
411+
}
388412

389-
const runAndRecord = async (
390-
label: string,
391-
args: string[],
392-
timeoutMs: number,
393-
): Promise<void> => {
394-
const result = await runCommand(command.executable, [...command.baseArgs, ...args], workDir, env, timeoutMs);
395-
const paths = await writeSmokeArtifacts(providerDir, label, result);
413+
function createProviderCheckRecorder(context: ProviderCheckContext): ProviderCheckRecorder {
414+
return async (label, args, timeoutMs): Promise<void> => {
415+
const result = await runCommand(
416+
context.command.executable,
417+
[...context.command.baseArgs, ...args],
418+
context.workDir,
419+
context.env,
420+
timeoutMs,
421+
);
422+
const paths = await writeSmokeArtifacts(context.providerDir, label, result);
396423
const classified = result.exitCode === 0
397424
? { status: "passed" as const, blockedReason: undefined }
398425
: classifyProviderFailure(result);
399-
checks.push({
426+
context.checks.push({
400427
label,
401-
command: [command.executable, ...command.baseArgs, ...args].join(" "),
428+
command: [context.command.executable, ...context.command.baseArgs, ...args].join(" "),
402429
exitCode: result.exitCode,
403430
durationMs: result.durationMs,
404431
status: classified.status,
@@ -407,29 +434,89 @@ async function runProviderChecks(
407434
stderrPath: paths.stderrPath,
408435
});
409436
};
437+
}
438+
439+
async function runDevagentApiChecks(
440+
context: ProviderCheckContext,
441+
runAndRecord: ProviderCheckRecorder,
442+
): Promise<void> {
443+
await writeFile(join(context.workDir, "prompt.md"), "Reply with exactly: OK\n");
444+
await runAndRecord("quiet-query", ["--provider", context.provider, "--model", context.model, "--quiet", "Reply with exactly: OK"], 120_000);
445+
await runAndRecord("file-query", ["--provider", context.provider, "--model", context.model, "--quiet", "-f", join(context.workDir, "prompt.md")], 120_000);
446+
}
410447

411-
if (provider === "devagent-api") {
412-
await writeFile(join(workDir, "prompt.md"), "Reply with exactly: OK\n");
413-
await runAndRecord("quiet-query", ["--provider", provider, "--model", model, "--quiet", "Reply with exactly: OK"], 120_000);
414-
await runAndRecord("file-query", ["--provider", provider, "--model", model, "--quiet", "-f", join(workDir, "prompt.md")], 120_000);
415-
} else if (provider === "openai" || provider === "deepseek" || provider === "chatgpt" || provider === "github-copilot" || provider === "ollama") {
416-
await runAndRecord("quiet-query", ["--provider", provider, "--model", model, "--quiet", "Reply with exactly: OK"], provider === "ollama" ? 180_000 : 120_000);
417-
} else if (provider === "openrouter") {
418-
await mkdir(join(workDir, "repo"), { recursive: true });
419-
await runCommand("git", ["init", "-q"], join(workDir, "repo"), env, 10_000);
420-
await writeFile(join(workDir, "repo", "hello.txt"), "hello\n");
421-
const requestPath = join(workDir, "request.json");
422-
await writeFile(requestPath, JSON.stringify(buildExecuteRequest(join(workDir, "repo"), provider, model), null, 2));
423-
await mkdir(join(workDir, "artifacts"), { recursive: true });
424-
await runAndRecord("execute", ["execute", "--request", requestPath, "--artifact-dir", join(workDir, "artifacts")], 180_000);
448+
async function runOpenRouterExecuteCheck(
449+
context: ProviderCheckContext,
450+
runAndRecord: ProviderCheckRecorder,
451+
): Promise<void> {
452+
await mkdir(join(context.workDir, "repo"), { recursive: true });
453+
await runCommand("git", ["init", "-q"], join(context.workDir, "repo"), context.env, 10_000);
454+
await writeFile(join(context.workDir, "repo", "hello.txt"), "hello\n");
455+
const requestPath = join(context.workDir, "request.json");
456+
await writeFile(
457+
requestPath,
458+
JSON.stringify(buildExecuteRequest(join(context.workDir, "repo"), context.provider, context.model), null, 2),
459+
);
460+
await mkdir(join(context.workDir, "artifacts"), { recursive: true });
461+
await runAndRecord("execute", ["execute", "--request", requestPath, "--artifact-dir", join(context.workDir, "artifacts")], 180_000);
462+
}
463+
464+
function quietQueryTimeout(provider: ProviderId): number {
465+
return provider === "ollama" ? 180_000 : 120_000;
466+
}
467+
468+
async function runQuietQueryCheck(
469+
context: ProviderCheckContext,
470+
runAndRecord: ProviderCheckRecorder,
471+
): Promise<void> {
472+
await runAndRecord(
473+
"quiet-query",
474+
["--provider", context.provider, "--model", context.model, "--quiet", "Reply with exactly: OK"],
475+
quietQueryTimeout(context.provider),
476+
);
477+
}
478+
479+
async function runProviderSpecificChecks(
480+
context: ProviderCheckContext,
481+
runAndRecord: ProviderCheckRecorder,
482+
): Promise<void> {
483+
if (context.provider === "devagent-api") {
484+
await runDevagentApiChecks(context, runAndRecord);
485+
} else if (context.provider === "openrouter") {
486+
await runOpenRouterExecuteCheck(context, runAndRecord);
487+
} else {
488+
await runQuietQueryCheck(context, runAndRecord);
489+
}
490+
}
491+
492+
function summarizeProviderChecks(checks: ReadonlyArray<ProviderSmokeCheck>): SmokeStatus {
493+
if (checks.every((check) => check.status === "passed")) {
494+
return "passed";
495+
}
496+
return checks.some((check) => check.status === "failed") ? "failed" : "blocked";
497+
}
498+
499+
async function runProviderChecks(
500+
devagentRoot: string,
501+
outputRoot: string,
502+
provider: ProviderId,
503+
model: string,
504+
): Promise<ProviderSmokeReport> {
505+
const providerDir = join(outputRoot, provider);
506+
await mkdir(providerDir, { recursive: true });
507+
const storedCredential = loadStoredCredentials()[provider];
508+
const blockedReason = providerBlockedReason(provider, storedCredential);
509+
if (blockedReason) {
510+
return createBlockedProviderReport(provider, model, blockedReason);
425511
}
426512

427-
const status = checks.every((check) => check.status === "passed")
428-
? "passed"
429-
: checks.some((check) => check.status === "failed")
430-
? "failed"
431-
: "blocked";
432-
return { provider, model, status, checks };
513+
const env = await createIsolatedEnv(outputRoot, provider);
514+
const command = getDevagentCommand(devagentRoot);
515+
const workDir = join(outputRoot, provider, "workspace");
516+
const checks: ProviderSmokeCheck[] = [];
517+
const context = { provider, model, providerDir, workDir, env, command, checks };
518+
await runProviderSpecificChecks(context, createProviderCheckRecorder(context));
519+
return { provider, model, status: summarizeProviderChecks(checks), checks };
433520
}
434521

435522
function renderMarkdown(reports: ReadonlyArray<ProviderSmokeReport>): string {
@@ -460,6 +547,7 @@ async function main(): Promise<void> {
460547

461548
const ollamaSelection = await selectOllamaModel();
462549
const matrix: Array<{ provider: ProviderId; model: string | null }> = [
550+
{ provider: "anthropic", model: DEFAULT_MODELS.anthropic },
463551
{ provider: "devagent-api", model: DEFAULT_MODELS["devagent-api"] },
464552
{ provider: "openai", model: DEFAULT_MODELS.openai },
465553
{ provider: "openrouter", model: DEFAULT_MODELS.openrouter },

0 commit comments

Comments
 (0)