Skip to content

Commit 991db1f

Browse files
committed
Emit agent task artifact envelopes
1 parent 66677af commit 991db1f

5 files changed

Lines changed: 172 additions & 7 deletions

File tree

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
"test:runtime-php-snippets": "tsx tests/runtime-php-snippets.test.ts",
7777
"test:browser-runner-template": "tsx tests/browser-runner-template.test.ts",
7878
"test:editor-actions": "tsx tests/editor-actions.test.ts",
79+
"test:artifact-result-envelope": "tsx tests/artifact-result-envelope.test.ts",
7980
"test:artifact-path-primitives": "tsx tests/artifact-path-primitives.test.ts",
8081
"test:browser-callback-materialization-contracts": "tsx tests/browser-callback-materialization-contracts.test.ts",
8182
"test:materialize-replay-package-command": "tsx tests/materialize-replay-package-command.test.ts",

packages/cli/src/commands/agent-task-run.ts

Lines changed: 114 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { mkdir, mkdtemp, readFile, rm, writeFile } from "node:fs/promises"
22
import { tmpdir } from "node:os"
33
import { join } from "node:path"
4-
import { buildAgentTaskRecipe, DEFAULT_WORDPRESS_VERSION, normalizeAgentRuntimeWorkload, normalizeAgentTaskRunResult, normalizeAgentTerminalResult, normalizeTaskInput, parseCommandJson, parseCommandOptions, resolveEffectiveRuntimeToolPolicy, type AgentTaskRunInput, type AgentTaskRunResultSummary, type AgentTerminalResult, type SandboxToolPolicySnapshot } from "@automattic/wp-codebox-core"
4+
import { artifactResultEnvelope, buildAgentTaskRecipe, DEFAULT_WORDPRESS_VERSION, normalizeAgentRuntimeWorkload, normalizeAgentTaskRunResult, normalizeAgentTerminalResult, normalizeTaskInput, parseCommandJson, parseCommandOptions, resolveEffectiveRuntimeToolPolicy, type AgentTaskRunInput, type AgentTaskRunResultSummary, type AgentTerminalResult, type ArtifactResultEnvelope, type SandboxToolPolicySnapshot } from "@automattic/wp-codebox-core"
55
import { stripUndefined } from "@automattic/wp-codebox-core/internals"
66
import { runRecipeRunCommand } from "./recipe-run.js"
77

@@ -30,6 +30,9 @@ interface AgentTaskRunOutput {
3030
completion_outcome: Record<string, unknown>
3131
component_contracts: Array<Record<string, unknown>>
3232
structured_artifacts: Array<Record<string, unknown>>
33+
typed_artifacts: Array<Record<string, unknown>>
34+
outputs: Record<string, unknown>
35+
artifact_result: ArtifactResultEnvelope
3336
run: Record<string, unknown>
3437
diagnostics: Array<Record<string, unknown>>
3538
agent_runtime_diagnostics: Record<string, unknown>
@@ -147,11 +150,34 @@ export async function runAgentTask(input: AgentTaskRunInput, options: AgentTaskR
147150
const failureEvidence = success ? undefined : buildFailureEvidence({ input, task, wpVersion, artifacts, recipePath, generatedRecipeArtifact, run, capture })
148151
const outputDiagnostics = [...diagnostics(run, success ? 0 : capture.exitCode, success, failureEvidence), ...(hasAgentBundle ? workload.diagnostics.map((diagnostic) => ({ ...diagnostic })) : [])]
149152
const agentTaskRunResult = success ? normalizedRunResult : withFailureEvidence(normalizedRunResult, failureEvidence, outputDiagnostics)
153+
const session = sandboxSession(input, run, artifacts, success ? "completed" : "failed")
154+
const structuredArtifacts = structuredArtifactRefs(agentTaskResult)
155+
const outputs = stripUndefined({ ...workload.outputs })
156+
const typedArtifacts = typedArtifactRefs(agentTaskResult, outputs)
157+
const evidence = evidenceRefs(run, artifacts, failureEvidence)
158+
const artifactResult = artifactResultEnvelope({
159+
operation: "agent-task-run",
160+
status: success ? "created" : "failed",
161+
artifactBundle: agentTaskRunResult.refs.artifact_bundles[0],
162+
artifactRefs: [...agentTaskRunResult.refs.artifact_bundles, ...agentTaskRunResult.artifacts],
163+
result: {
164+
structured_artifacts: structuredArtifacts,
165+
typed_artifacts: typedArtifacts,
166+
agent_reply: agentReply(agentResult, terminalResult, agentTaskRunResult),
167+
transcript_refs: agentTaskRunResult.refs.transcripts,
168+
evidence_refs: evidence,
169+
preview: previewMetadata(session, run),
170+
session,
171+
outputs,
172+
},
173+
diagnostics: artifactResultDiagnostics(outputDiagnostics),
174+
metadata: artifactResultMetadata(run, input, agentTaskRunResult),
175+
})
150176
const output: AgentTaskRunOutput = {
151177
success,
152178
schema: "wp-codebox/agent-task-run/v1",
153179
status: agentTaskRunResult.status,
154-
session: sandboxSession(input, run, artifacts, success ? "completed" : "failed"),
180+
session,
155181
task,
156182
task_input: taskInput,
157183
wp: wpVersion,
@@ -162,11 +188,14 @@ export async function runAgentTask(input: AgentTaskRunInput, options: AgentTaskR
162188
terminal_result: terminalResult,
163189
completion_outcome: completionOutcome,
164190
component_contracts: componentContractReport(run),
165-
structured_artifacts: structuredArtifactRefs(agentTaskResult),
191+
structured_artifacts: structuredArtifacts,
192+
typed_artifacts: typedArtifacts,
193+
outputs: { ...outputs, artifact_result: artifactResult },
194+
artifact_result: artifactResult,
166195
run,
167196
diagnostics: outputDiagnostics,
168197
agent_runtime_diagnostics: await buildAgentRuntimeDiagnostics(run, input),
169-
evidence_refs: evidenceRefs(run, artifacts, failureEvidence),
198+
evidence_refs: evidence,
170199
failure_evidence: failureEvidence,
171200
run_metadata: stripUndefined({
172201
run_id: stringValue(runRecord.runId),
@@ -181,6 +210,7 @@ export async function runAgentTask(input: AgentTaskRunInput, options: AgentTaskR
181210
agent_runtime: {
182211
workload,
183212
},
213+
artifact_result: artifactResult,
184214
},
185215
}
186216
return output
@@ -191,11 +221,29 @@ export async function runAgentTask(input: AgentTaskRunInput, options: AgentTaskR
191221
const failureEvidence = buildFailureEvidence({ input, task, wpVersion, artifacts, recipePath, generatedRecipeArtifact, run, capture, error })
192222
const failureDiagnostics = diagnostics(run, capture?.exitCode ?? 1, false, failureEvidence)
193223
const agentTaskRunResult = withFailureEvidence(normalizedRunResult, failureEvidence, failureDiagnostics)
224+
const session = sandboxSession(input, run, artifacts, "failed")
225+
const evidence = evidenceRefs(run, artifacts, failureEvidence)
226+
const artifactResult = artifactResultEnvelope({
227+
operation: "agent-task-run",
228+
status: "failed",
229+
artifactBundle: agentTaskRunResult.refs.artifact_bundles[0],
230+
artifactRefs: [...agentTaskRunResult.refs.artifact_bundles, ...agentTaskRunResult.artifacts],
231+
result: {
232+
agent_reply: agentReply({}, normalizeAgentTerminalResult(run, { compatMode: true }), agentTaskRunResult),
233+
transcript_refs: agentTaskRunResult.refs.transcripts,
234+
evidence_refs: evidence,
235+
preview: previewMetadata(session, run),
236+
session,
237+
outputs: {},
238+
},
239+
diagnostics: artifactResultDiagnostics(failureDiagnostics),
240+
metadata: artifactResultMetadata(run, input, agentTaskRunResult),
241+
})
194242
return {
195243
success: false,
196244
schema: "wp-codebox/agent-task-run/v1",
197245
status: agentTaskRunResult.status,
198-
session: sandboxSession(input, run, artifacts, "failed"),
246+
session,
199247
task,
200248
task_input: taskInput,
201249
wp: wpVersion,
@@ -207,10 +255,13 @@ export async function runAgentTask(input: AgentTaskRunInput, options: AgentTaskR
207255
completion_outcome: {},
208256
component_contracts: componentContractReport(run),
209257
structured_artifacts: [],
258+
typed_artifacts: [],
259+
outputs: { artifact_result: artifactResult },
260+
artifact_result: artifactResult,
210261
run,
211262
diagnostics: failureDiagnostics,
212263
agent_runtime_diagnostics: await buildAgentRuntimeDiagnostics(run, input),
213-
evidence_refs: evidenceRefs(run, artifacts, failureEvidence),
264+
evidence_refs: evidence,
214265
failure_evidence: failureEvidence,
215266
run_metadata: stripUndefined({
216267
sandbox_session_id: stringValue(input.sandbox_session_id),
@@ -224,6 +275,7 @@ export async function runAgentTask(input: AgentTaskRunInput, options: AgentTaskR
224275
diagnostics: failureDiagnostics,
225276
},
226277
},
278+
artifact_result: artifactResult,
227279
},
228280
}
229281
} finally {
@@ -508,6 +560,62 @@ function structuredArtifactRefs(agentTaskResult: Record<string, unknown>): Array
508560
return fromOutputs.filter((entry): entry is Record<string, unknown> => Boolean(objectValue(entry)))
509561
}
510562

563+
function typedArtifactRefs(agentTaskResult: Record<string, unknown>, workloadOutputs: Record<string, unknown> = {}): Array<Record<string, unknown>> {
564+
const direct = Array.isArray(agentTaskResult.typed_artifacts) ? agentTaskResult.typed_artifacts : []
565+
const outputs = objectValue(agentTaskResult.outputs) || {}
566+
const fromOutputs = Array.isArray(outputs.typed_artifacts) ? outputs.typed_artifacts : []
567+
const fromWorkloadOutputs = Array.isArray(workloadOutputs.typed_artifacts) ? workloadOutputs.typed_artifacts : []
568+
return dedupeRecords([...direct, ...fromOutputs, ...fromWorkloadOutputs].filter((entry): entry is Record<string, unknown> => Boolean(objectValue(entry))))
569+
}
570+
571+
function agentReply(agentResult: Record<string, unknown>, terminalResult: AgentTerminalResult | undefined, runResult: AgentTaskRunResultSummary): Record<string, unknown> | undefined {
572+
const text = stringValue(agentResult.reply) || stringValue(agentResult.message) || stringValue(agentResult.response)
573+
const summary = stringValue(agentResult.summary) || runResult.summary
574+
const status = terminalResult?.status || runResult.status
575+
return nonEmptyObject(stripUndefined({
576+
text: text || undefined,
577+
summary: summary || undefined,
578+
status,
579+
metadata: nonEmptyObject(stripUndefined({ terminal_result: terminalResult })),
580+
}))
581+
}
582+
583+
function previewMetadata(session: Record<string, unknown>, run: Record<string, unknown>): Record<string, unknown> | undefined {
584+
const runtime = objectValue(run.runtime) || {}
585+
const preview = objectValue(runtime.preview) || {}
586+
const sessionArtifacts = objectValue(session.artifacts) || {}
587+
return nonEmptyObject(stripUndefined({
588+
...preview,
589+
url: stringValue(preview.url) || stringValue(runtime.previewUrl) || stringValue(sessionArtifacts.preview_url) || undefined,
590+
}))
591+
}
592+
593+
function artifactResultMetadata(run: Record<string, unknown>, input: AgentTaskRunInput, runResult: AgentTaskRunResultSummary): Record<string, unknown> {
594+
const runRecord = objectValue(run.run) || {}
595+
const runtimeRecord = objectValue(run.runtime) || {}
596+
return stripUndefined({
597+
status: runResult.status,
598+
success: runResult.success,
599+
run_id: stringValue(runRecord.runId) || stringValue(runResult.metadata.run_id) || undefined,
600+
run_status: stringValue(runRecord.status) || stringValue(runResult.metadata.run_status) || undefined,
601+
runtime_id: stringValue(runtimeRecord.id) || stringValue(runResult.metadata.runtime_id) || undefined,
602+
runtime_status: stringValue(runtimeRecord.status) || stringValue(runResult.metadata.runtime_status) || undefined,
603+
sandbox_session_id: stringValue(input.sandbox_session_id) || undefined,
604+
orchestrator: input.orchestrator,
605+
parent_request_schema: stringValue(input.parent_request?.schema) || undefined,
606+
})
607+
}
608+
609+
function artifactResultDiagnostics(diagnostics: Array<Record<string, unknown>>): Array<{ code: string, message: string, severity?: "info" | "warning" | "error", phase?: string, metadata?: Record<string, unknown> }> {
610+
return diagnostics.map((diagnostic) => stripUndefined({
611+
code: stringValue(diagnostic.code ?? diagnostic.class ?? diagnostic.kind) || "wp-codebox.agent_task_diagnostic",
612+
message: stringValue(diagnostic.message) || "WP Codebox agent task diagnostic.",
613+
severity: diagnostic.severity === "info" || diagnostic.severity === "warning" || diagnostic.severity === "error" ? diagnostic.severity : undefined,
614+
phase: stringValue(diagnostic.phase) || undefined,
615+
metadata: nonEmptyObject(objectValue(diagnostic.data ?? diagnostic.metadata)),
616+
}))
617+
}
618+
511619
async function readJsonRecord(path: string): Promise<Record<string, unknown> | undefined> {
512620
if (!path) return undefined
513621
try {

packages/runtime-core/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ export * from "./artifact-review.js"
88
export * from "./artifact-diagnostics.js"
99
export * from "./artifact-test-results.js"
1010
export * from "./artifact-export-links.js"
11+
export * from "./artifact-result-envelope.js"
1112
export * from "./runtime-contracts.js"
1213
export * from "./runtime-neutral-contracts.js"
1314
export * from "./runtime-boundary-contracts.js"

tests/agent-task-contracts.test.ts

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import assert from "node:assert/strict"
22
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"
33
import { tmpdir } from "node:os"
44
import { join } from "node:path"
5-
import { buildAgentTaskRecipe, normalizeAgentRuntimeWorkload, normalizeAgentTaskRunResult, normalizeAgentTerminalResult, normalizeTaskInput } from "../packages/runtime-core/src/index.js"
5+
import { ARTIFACT_RESULT_ENVELOPE_SCHEMA, buildAgentTaskRecipe, normalizeAgentRuntimeWorkload, normalizeAgentTaskRunResult, normalizeAgentTerminalResult, normalizeTaskInput } from "../packages/runtime-core/src/index.js"
66
import { effectivePolicyCommands } from "../packages/runtime-core/src/contracts.js"
77
import { commandCatalogOutput } from "../packages/cli/src/commands/discovery.js"
88
import { agentTaskRunExitCode } from "../packages/cli/src/commands/agent-task-run.js"
@@ -44,6 +44,23 @@ const strictRuntimeWorkload = normalizeAgentRuntimeWorkload({ outputs: { answer:
4444
assert.deepEqual(strictRuntimeWorkload.outputs, {})
4545
assert.equal(strictRuntimeWorkload.diagnostics.some((diagnostic) => diagnostic.class === "wp-codebox.normalizer.compat_mode_used"), false)
4646

47+
const compatRuntimeWorkload = normalizeAgentRuntimeWorkload({ outputs: { answer: "legacy" } }, { compatMode: true })
48+
assert.deepEqual(compatRuntimeWorkload.outputs, { answer: "legacy" })
49+
assert.equal(compatRuntimeWorkload.diagnostics.some((diagnostic) => diagnostic.class === "wp-codebox.normalizer.compat_mode_used"), true)
50+
51+
const normalizedWithArtifactEnvelope = normalizeAgentTaskRunResult({
52+
success: true,
53+
run: { artifactRefs: [{ id: "bundle-1", kind: "artifact-bundle", directory: "artifacts/run-1" }] },
54+
agentResult: {
55+
artifacts: { directory: "artifacts/run-1" },
56+
summary: "Changed one file",
57+
transcript: { artifact: "files/transcript.json" },
58+
},
59+
}, { exitStatus: 0 })
60+
assert.equal(normalizedWithArtifactEnvelope.refs.artifact_bundles[0].path, "artifacts/run-1")
61+
assert.equal(normalizedWithArtifactEnvelope.refs.transcripts[0].kind, "codebox-transcript")
62+
assert.equal(ARTIFACT_RESULT_ENVELOPE_SCHEMA, "wp-codebox/artifact-result-envelope/v1")
63+
4764
const catalog = commandCatalogOutput()
4865
const agentSandboxRun = catalog.commands.find((command) => command.id === "wp-codebox.agent-sandbox-run")
4966
assert.ok(agentSandboxRun, "catalog includes wp-codebox.agent-sandbox-run")
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import assert from "node:assert/strict"
2+
import { ARTIFACT_RESULT_ENVELOPE_SCHEMA, artifactResultEnvelope, normalizeArtifactResultEnvelope } from "../packages/runtime-core/src/index.js"
3+
4+
const envelope = artifactResultEnvelope({
5+
operation: "agent-task-run",
6+
status: "created",
7+
artifactBundle: { kind: "artifact-bundle", path: "artifacts/run-1", digest: { algorithm: "sha256", value: "abc" } },
8+
artifactRefs: [{ kind: "codebox-patch", path: "files/patch.diff" }],
9+
result: {
10+
typed_artifacts: [{ name: "report", artifact_schema: "example/report/v1", payload: { ok: true } }],
11+
outputs: { answer: 42 },
12+
},
13+
diagnostics: [{ code: "wp-codebox.test", message: "test diagnostic", severity: "info" }],
14+
metadata: { runtime_id: "runtime-1" },
15+
})
16+
17+
assert.equal(envelope.schema, ARTIFACT_RESULT_ENVELOPE_SCHEMA)
18+
assert.equal(envelope.success, true)
19+
assert.equal(envelope.artifactBundle?.path, "artifacts/run-1")
20+
assert.equal(envelope.artifactRefs.length, 2)
21+
assert.deepEqual(envelope.result?.outputs, { answer: 42 })
22+
assert.equal(envelope.result?.typed_artifacts?.[0]?.name, "report")
23+
24+
const normalized = normalizeArtifactResultEnvelope({
25+
schema: ARTIFACT_RESULT_ENVELOPE_SCHEMA,
26+
operation: "agent-task-run",
27+
status: "created",
28+
artifactBundle: { kind: "bundle", path: "artifacts/run-2" },
29+
artifactRefs: [{ kind: "log", path: "files/log.txt" }],
30+
result: { ok: true },
31+
})
32+
33+
assert.equal(normalized.artifactBundle?.kind, "bundle")
34+
assert.equal(normalized.artifactRefs[0].path, "artifacts/run-2")
35+
assert.deepEqual(normalized.result, { ok: true })
36+
assert.deepEqual(normalized.diagnostics, [])
37+
38+
console.log("artifact result envelope contract passed")

0 commit comments

Comments
 (0)