agenta/web/packages/agenta-playground/src/executeWorkflowRevision.ts at 585c6adbd8bf1f94b232b615f32bb1f09e32ae0d · Agenta-AI/agenta · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
/**
 * executeWorkflowRevision
 *
 * Standalone imperative function that runs a single workflow revision against
 * a given input data object using the full playground execution infrastructure
 * (concurrency limiting, abort, chain execution, URL/payload resolution via
 * workflowMolecule).
 *
 * Unlike the playground's interactive execution path, this function:
 * - Does NOT require a React context or any playground UI atoms
 * - Creates a transient synthetic loadable row for the duration of the call
 * - Cleans up all transient state after the call completes
 * - Returns a plain promise with the result
 *
 * Usage (from an evaluation context):
 *
 * ```typescript
 * import { executeWorkflowRevision } from '@agenta/playground'
 * import { workflowMolecule } from '@agenta/entities/workflow'
 *
 * // Pre-seed the fetched workflow into the default store
 * workflowMolecule.set.seedEntity(revisionId, fetchedWorkflow)
 *
 * const result = await executeWorkflowRevision({
 *   revisionId,
 *   inputData: { country: "France", city: "Paris" },
 *   projectId,
 * })
 * // result: { status, output, traceId, spanId, error }
 * ```
 */

import {loadableController} from "@agenta/entities/runnable"
import type {PlaygroundNode} from "@agenta/entities/runnable"
import {testcaseMolecule} from "@agenta/entities/testcase"
import {workflowDraftAtomFamily} from "@agenta/entities/workflow"
import {getDefaultStore} from "jotai/vanilla"

import {executeStepForSessionWithExecutionItems} from "./state/execution/executionRunner"
import type {ExecutionSession} from "./state/execution/types"

// ============================================================================
// TYPES
// ============================================================================

export interface ExecuteWorkflowRevisionParams {
    /** The workflow revision ID (must already be seeded via workflowMolecule.set.seedEntity) */
    revisionId: string
    /** Input data to pass to the workflow (key-value map) */
    inputData: Record<string, unknown>
    /** Project ID scoped to the execution */
    projectId?: string | null
    /** Optional auth headers to forward (e.g. Authorization) */
    headers?: Record<string, string>
    /** Abort signal to cancel the execution */
    abortSignal?: AbortSignal
}

export interface ExecuteWorkflowRevisionResult {
    status: "success" | "error" | "cancelled"
    output?: unknown
    structuredOutput?: unknown
    traceId?: string | null
    spanId?: string | null
    error?: {message: string; code?: string; type?: string; stacktrace?: string}
}

// ============================================================================
// IMPLEMENTATION
// ============================================================================

/**
 * Execute a single workflow revision against input data.
 *
 * Requires the revision to be pre-seeded into the default Jotai store via:
 *   workflowMolecule.set.seedEntity(revisionId, fetchedWorkflow)
 *
 * This ensures workflowMolecule selectors (invocationUrl, requestPayload, etc.)
 * resolve correctly without needing a React context or query subscription.
 */
export async function executeWorkflowRevision(
    params: ExecuteWorkflowRevisionParams,
): Promise<ExecuteWorkflowRevisionResult> {
    const {revisionId, inputData, projectId, headers = {}} = params

    const store = getDefaultStore()

    // Create a unique transient loadable ID so this call doesn't interfere with
    // any existing playground loadable state.
    const loadableId = `eval-invocation:${revisionId}:${Date.now()}`
    const stepId = `step-${Date.now()}`

    // Add a synthetic testcase row to the loadable so that
    // createExecutionItemHandle can resolve displayRowIds and row data.
    // We pass inputData so that variable resolution works as a fallback,
    // even though the runner will use inputValues directly.
    const rowId: string | null = store.set(loadableController.actions.addRow, loadableId, inputData)

    if (!rowId) {
        return {
            status: "error",
            error: {message: "Failed to create synthetic testcase row for execution"},
        }
    }

    // Suppress any existing draft overlay so the execution uses only committed
    // server state. If a user has unsaved playground edits for this revision,
    // those must NOT leak into evaluation invocations.
    const draftAtom = workflowDraftAtomFamily(revisionId)
    const stashedDraft = store.get(draftAtom)
    if (stashedDraft) {
        store.set(draftAtom, null)
    }

    // Build a minimal single-node topology (no chain, depth=0)
    const node: PlaygroundNode = {
        id: `node-${revisionId}`,
        entityId: revisionId,
        entityType: "workflow",
        depth: 0,
    }

    const session: ExecutionSession = {
        id: `sess:${revisionId}`,
        runnableId: revisionId,
        runnableType: "workflow",
        mode: "completion",
    }

    return new Promise<ExecuteWorkflowRevisionResult>((resolve) => {
        executeStepForSessionWithExecutionItems({
            get: store.get,
            set: store.set,
            loadableId,
            stepId,
            session,
            data: inputData,
            nodes: [node],
            allConnections: [],
            sessionOptions: {
                [session.id]: {
                    ...(projectId ? {projectId} : {}),
                    headers,
                },
            },
            lifecycle: {
                onStart: () => {
                    // nothing to do — no progress tracking needed here
                },
                onProgress: () => {
                    // nothing to do
                },
                onComplete: ({result}) => {
                    cleanup()
                    resolve({
                        status: "success",
                        output: result.output,
                        structuredOutput: result.structuredOutput,
                        traceId: result.traceId ?? null,
                        spanId: extractSpanId(result),
                        error: undefined,
                    })
                },
                onFail: ({error, traceId}) => {
                    cleanup()
                    resolve({
                        status: "error",
                        traceId: traceId ?? null,
                        error: error ?? {message: "Execution failed"},
                    })
                },
                onCancel: () => {
                    cleanup()
                    resolve({status: "cancelled"})
                },
            },
        })
    })

    function cleanup() {
        // Delete the transient testcase entity to avoid memory leaks
        store.set(testcaseMolecule.actions.delete, rowId as string)

        // Restore the stashed draft so playground edits are not lost
        if (stashedDraft) {
            store.set(draftAtom, stashedDraft)
        }
    }
}

/**
 * Try to extract a span ID from the structured execution result.
 * The span_id may appear in different locations depending on the workflow type.
 */
function extractSpanId(result: Partial<{structuredOutput?: unknown}>): string | null {
    const s = result.structuredOutput as Record<string, unknown> | null | undefined
    if (!s) return null
    const spanId = s.span_id ?? s.spanId ?? (s.tree as Record<string, unknown> | undefined)?.span_id
    return typeof spanId === "string" ? spanId : null
}