fix: surface provider error messages in evaluation table

GanJiaKouN16 · GanJiaKouN16 · commit 585c6adbd8bf · 2026-06-06T14:54:06.000+08:00
The evaluation table was showing a generic 'too many requests' message instead of the actual provider error because: 1. executeViaFetch never checked for body-level errors on HTTP 200. The Python SDK can return HTTP 200 with a non-200 status.code embedded in the response body (WorkflowBatchResponse.status.code). This path was silently treated as success. 2. Error stacktrace/type/code were not propagated through the pipeline. Even when the HTTP error path was taken, only the message was extracted — the SDK's status.type, status.code, and status.stacktrace were dropped. Changes: - executeViaFetch: detect body-level errors on HTTP 200 by checking responseData.status.code !== 200 and return an error result - executeViaFetch: extract stacktrace (coercing string[] to string), type, and code from both HTTP-error and body-error paths - Add stacktrace and type to ExecutionResult, RunResult, and ExecuteWorkflowRevisionResult error shapes - runInvocationAction: pass stacktrace and type through to upsertStepResultWithInvocation - upsertStepResultWithInvocation: accept type field in error param No UI changes needed — InvocationCell already renders stepError.message and stepError.stacktrace when present; extractStepError already reads error.code, error.type, error.stacktrace from persisted step data. Closes #3324
diff --git a/web/oss/src/components/EvalRunDetails/atoms/runInvocationAction.ts b/web/oss/src/components/EvalRunDetails/atoms/runInvocationAction.ts
@@ -208,7 +208,11 @@ export const triggerRunInvocationAtom = atom(
                     traceId: result.traceId ?? undefined,
                     status: "failure",
                     references,
-                    error: {message: errorMessage},
+                    error: {
+                        message: errorMessage,
+                        ...(result.error?.stacktrace ? {stacktrace: result.error.stacktrace} : {}),
+                        ...(result.error?.type ? {type: result.error.type} : {}),
+                    },
                 })
 
                 await updateScenarioStatus(scenarioId, EvaluationStatus.FAILURE)
diff --git a/web/oss/src/services/evaluations/invocations/api.ts b/web/oss/src/services/evaluations/invocations/api.ts
@@ -69,7 +69,7 @@ export const upsertStepResultWithInvocation = async ({
     status: string
     references?: InvocationReferences
     outputs?: unknown
-    error?: {message: string; stacktrace?: string}
+    error?: {message: string; stacktrace?: string; type?: string}
 }): Promise<void> => {
     const {projectId} = getProjectValues()
 
diff --git a/web/packages/agenta-entities/src/runnable/types.ts b/web/packages/agenta-entities/src/runnable/types.ts
@@ -204,6 +204,8 @@ export interface ExecutionResult {
     error?: {
         message: string
         code?: string
+        type?: string
+        stacktrace?: string
     }
     trace?: TraceInfo
     metrics?: ExecutionMetrics
diff --git a/web/packages/agenta-playground/src/executeWorkflowRevision.ts b/web/packages/agenta-playground/src/executeWorkflowRevision.ts
@@ -62,7 +62,7 @@ export interface ExecuteWorkflowRevisionResult {
     structuredOutput?: unknown
     traceId?: string | null
     spanId?: string | null
-    error?: {message: string; code?: string}
+    error?: {message: string; code?: string; type?: string; stacktrace?: string}
 }
 
 // ============================================================================
diff --git a/web/packages/agenta-playground/src/state/execution/executionRunner.ts b/web/packages/agenta-playground/src/state/execution/executionRunner.ts
@@ -187,7 +187,7 @@ interface ExecutionSessionLifecycleCallbacks {
         chainResults?: RunResult["chainResults"]
     }) => void
     onComplete: (payload: {result: Partial<RunResult>}) => void
-    onFail: (payload: {error: {message: string; code?: string}; traceId?: string | null}) => void
+    onFail: (payload: {error: {message: string; code?: string; type?: string; stacktrace?: string}; traceId?: string | null}) => void
     onCancel: () => void
 }
 
@@ -671,13 +671,20 @@ async function executeViaFetch(params: {
         if (!response.ok) {
             const errorText = await response.text()
             let errorMessage = `Request failed with status ${response.status}`
+            let errorCode: string | undefined
+            let errorType: string | undefined
+            let errorStacktrace: string | undefined
             let traceId: string | null = null
 
             try {
                 const errorData = JSON.parse(errorText)
                 traceId = extractTraceIdFromPayload(errorData)
                 if (errorData?.status?.message) {
                     errorMessage = errorData.status.message
+                    errorCode = errorData.status.code?.toString()
+                    errorType = errorData.status.type
+                    const st = errorData.status.stacktrace
+                    errorStacktrace = Array.isArray(st) ? st.join("\n") : st
                 } else if (errorData?.detail?.message) {
                     errorMessage = errorData.detail.message
                 } else if (typeof errorData?.detail === "string") {
@@ -692,13 +699,48 @@ async function executeViaFetch(params: {
                 status: "error",
                 startedAt,
                 completedAt: new Date().toISOString(),
-                error: {message: errorMessage},
+                error: {
+                    message: errorMessage,
+                    ...(errorCode ? {code: errorCode} : {}),
+                    ...(errorType ? {type: errorType} : {}),
+                    ...(errorStacktrace ? {stacktrace: errorStacktrace} : {}),
+                },
                 ...(traceId ? {trace: {id: traceId}} : {}),
             }
         }
 
         const responseData = await response.json()
 
+        // Check for body-level error status (SDK returns HTTP 200 with error in body).
+        // The Python SDK's WorkflowBatchResponse may embed a non-200 status.code
+        // inside the response body even when the HTTP status is 200.
+        const bodyStatus = responseData?.status
+        if (bodyStatus && typeof bodyStatus === "object" && bodyStatus.code && bodyStatus.code !== 200) {
+            const traceId = extractTraceIdFromPayload(responseData)
+            const spanId = extractSpanIdFromPayload(responseData)
+            const st = bodyStatus.stacktrace
+            return {
+                executionId,
+                status: "error",
+                startedAt,
+                completedAt: new Date().toISOString(),
+                error: {
+                    message: bodyStatus.message || "Invocation failed",
+                    ...(bodyStatus.code ? {code: bodyStatus.code.toString()} : {}),
+                    ...(bodyStatus.type ? {type: bodyStatus.type} : {}),
+                    ...(st ? {stacktrace: Array.isArray(st) ? st.join("\n") : st} : {}),
+                },
+                ...(traceId
+                    ? {
+                          trace: {
+                              id: traceId,
+                              ...(spanId ? {spanId} : {}),
+                          },
+                      }
+                    : {}),
+            }
+        }
+
         // Delegate response parsing to entity-level normalizer when provided.
         // Default: unwrap `data` field if present, extract `trace_id`.
         const normalized = normalizeResponse
diff --git a/web/packages/agenta-playground/src/state/execution/types.ts b/web/packages/agenta-playground/src/state/execution/types.ts
@@ -165,7 +165,7 @@ export interface RunResult {
     /** Hash of result for comparison (optional) */
     resultHash?: string | null
     /** Error details if status is "error" */
-    error?: {message: string; code?: string} | null
+    error?: {message: string; code?: string; type?: string; stacktrace?: string} | null
     /** Timestamp when execution started (ms) */
     startedAt?: number
     /** Timestamp when execution completed (ms) */

Original file line number	Diff line number	Diff line change
`@@ -204,6 +204,8 @@ export interface ExecutionResult {`
`204`	`204`	`error?: {`
`205`	`205`	`message: string`
`206`	`206`	`code?: string`
	`207`	`+ type?: string`
	`208`	`+ stacktrace?: string`
`207`	`209`	`}`
`208`	`210`	`trace?: TraceInfo`
`209`	`211`	`metrics?: ExecutionMetrics`
Original file line number	Diff line number	Diff line change
`@@ -62,7 +62,7 @@ export interface ExecuteWorkflowRevisionResult {`
`62`	`62`	`structuredOutput?: unknown`
`63`	`63`	`traceId?: string \| null`
`64`	`64`	`spanId?: string \| null`
`65`		`- error?: {message: string; code?: string}`
	`65`	`+ error?: {message: string; code?: string; type?: string; stacktrace?: string}`
`66`	`66`	`}`
`67`	`67`
`68`	`68`	`// ============================================================================`