Skip to content

Commit 585c6ad

Browse files
committed
fix: surface provider error messages in evaluation table
The evaluation table was showing a generic 'too many requests' message instead of the actual provider error because: 1. executeViaFetch never checked for body-level errors on HTTP 200. The Python SDK can return HTTP 200 with a non-200 status.code embedded in the response body (WorkflowBatchResponse.status.code). This path was silently treated as success. 2. Error stacktrace/type/code were not propagated through the pipeline. Even when the HTTP error path was taken, only the message was extracted — the SDK's status.type, status.code, and status.stacktrace were dropped. Changes: - executeViaFetch: detect body-level errors on HTTP 200 by checking responseData.status.code !== 200 and return an error result - executeViaFetch: extract stacktrace (coercing string[] to string), type, and code from both HTTP-error and body-error paths - Add stacktrace and type to ExecutionResult, RunResult, and ExecuteWorkflowRevisionResult error shapes - runInvocationAction: pass stacktrace and type through to upsertStepResultWithInvocation - upsertStepResultWithInvocation: accept type field in error param No UI changes needed — InvocationCell already renders stepError.message and stepError.stacktrace when present; extractStepError already reads error.code, error.type, error.stacktrace from persisted step data. Closes #3324
1 parent 445fc3f commit 585c6ad

6 files changed

Lines changed: 54 additions & 6 deletions

File tree

web/oss/src/components/EvalRunDetails/atoms/runInvocationAction.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,11 @@ export const triggerRunInvocationAtom = atom(
208208
traceId: result.traceId ?? undefined,
209209
status: "failure",
210210
references,
211-
error: {message: errorMessage},
211+
error: {
212+
message: errorMessage,
213+
...(result.error?.stacktrace ? {stacktrace: result.error.stacktrace} : {}),
214+
...(result.error?.type ? {type: result.error.type} : {}),
215+
},
212216
})
213217

214218
await updateScenarioStatus(scenarioId, EvaluationStatus.FAILURE)

web/oss/src/services/evaluations/invocations/api.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ export const upsertStepResultWithInvocation = async ({
6969
status: string
7070
references?: InvocationReferences
7171
outputs?: unknown
72-
error?: {message: string; stacktrace?: string}
72+
error?: {message: string; stacktrace?: string; type?: string}
7373
}): Promise<void> => {
7474
const {projectId} = getProjectValues()
7575

web/packages/agenta-entities/src/runnable/types.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ export interface ExecutionResult {
204204
error?: {
205205
message: string
206206
code?: string
207+
type?: string
208+
stacktrace?: string
207209
}
208210
trace?: TraceInfo
209211
metrics?: ExecutionMetrics

web/packages/agenta-playground/src/executeWorkflowRevision.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ export interface ExecuteWorkflowRevisionResult {
6262
structuredOutput?: unknown
6363
traceId?: string | null
6464
spanId?: string | null
65-
error?: {message: string; code?: string}
65+
error?: {message: string; code?: string; type?: string; stacktrace?: string}
6666
}
6767

6868
// ============================================================================

web/packages/agenta-playground/src/state/execution/executionRunner.ts

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ interface ExecutionSessionLifecycleCallbacks {
187187
chainResults?: RunResult["chainResults"]
188188
}) => void
189189
onComplete: (payload: {result: Partial<RunResult>}) => void
190-
onFail: (payload: {error: {message: string; code?: string}; traceId?: string | null}) => void
190+
onFail: (payload: {error: {message: string; code?: string; type?: string; stacktrace?: string}; traceId?: string | null}) => void
191191
onCancel: () => void
192192
}
193193

@@ -671,13 +671,20 @@ async function executeViaFetch(params: {
671671
if (!response.ok) {
672672
const errorText = await response.text()
673673
let errorMessage = `Request failed with status ${response.status}`
674+
let errorCode: string | undefined
675+
let errorType: string | undefined
676+
let errorStacktrace: string | undefined
674677
let traceId: string | null = null
675678

676679
try {
677680
const errorData = JSON.parse(errorText)
678681
traceId = extractTraceIdFromPayload(errorData)
679682
if (errorData?.status?.message) {
680683
errorMessage = errorData.status.message
684+
errorCode = errorData.status.code?.toString()
685+
errorType = errorData.status.type
686+
const st = errorData.status.stacktrace
687+
errorStacktrace = Array.isArray(st) ? st.join("\n") : st
681688
} else if (errorData?.detail?.message) {
682689
errorMessage = errorData.detail.message
683690
} else if (typeof errorData?.detail === "string") {
@@ -692,13 +699,48 @@ async function executeViaFetch(params: {
692699
status: "error",
693700
startedAt,
694701
completedAt: new Date().toISOString(),
695-
error: {message: errorMessage},
702+
error: {
703+
message: errorMessage,
704+
...(errorCode ? {code: errorCode} : {}),
705+
...(errorType ? {type: errorType} : {}),
706+
...(errorStacktrace ? {stacktrace: errorStacktrace} : {}),
707+
},
696708
...(traceId ? {trace: {id: traceId}} : {}),
697709
}
698710
}
699711

700712
const responseData = await response.json()
701713

714+
// Check for body-level error status (SDK returns HTTP 200 with error in body).
715+
// The Python SDK's WorkflowBatchResponse may embed a non-200 status.code
716+
// inside the response body even when the HTTP status is 200.
717+
const bodyStatus = responseData?.status
718+
if (bodyStatus && typeof bodyStatus === "object" && bodyStatus.code && bodyStatus.code !== 200) {
719+
const traceId = extractTraceIdFromPayload(responseData)
720+
const spanId = extractSpanIdFromPayload(responseData)
721+
const st = bodyStatus.stacktrace
722+
return {
723+
executionId,
724+
status: "error",
725+
startedAt,
726+
completedAt: new Date().toISOString(),
727+
error: {
728+
message: bodyStatus.message || "Invocation failed",
729+
...(bodyStatus.code ? {code: bodyStatus.code.toString()} : {}),
730+
...(bodyStatus.type ? {type: bodyStatus.type} : {}),
731+
...(st ? {stacktrace: Array.isArray(st) ? st.join("\n") : st} : {}),
732+
},
733+
...(traceId
734+
? {
735+
trace: {
736+
id: traceId,
737+
...(spanId ? {spanId} : {}),
738+
},
739+
}
740+
: {}),
741+
}
742+
}
743+
702744
// Delegate response parsing to entity-level normalizer when provided.
703745
// Default: unwrap `data` field if present, extract `trace_id`.
704746
const normalized = normalizeResponse

web/packages/agenta-playground/src/state/execution/types.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ export interface RunResult {
165165
/** Hash of result for comparison (optional) */
166166
resultHash?: string | null
167167
/** Error details if status is "error" */
168-
error?: {message: string; code?: string} | null
168+
error?: {message: string; code?: string; type?: string; stacktrace?: string} | null
169169
/** Timestamp when execution started (ms) */
170170
startedAt?: number
171171
/** Timestamp when execution completed (ms) */

0 commit comments

Comments
 (0)