Skip to content

Commit 4d9b1c0

Browse files
committed
fix(evaluators): list all automatic evaluators in the sidebar switcher
The switcher used fullPagePlaygroundEvaluatorsAtom, which narrows to evaluators that have a full-page playground (LLM, code) and so dropped the declarative matchers (exact match, regex, similarity, json diff, contains json, ...). Add nonHumanEvaluatorsAtom - non-archived evaluators with only the human (is_feedback, resolved from the latest revision) exclusion - and point the switcher at it, so every automatic evaluator is listed while human ones stay out.
1 parent e585887 commit 4d9b1c0

4 files changed

Lines changed: 40 additions & 17 deletions

File tree

web/oss/src/components/Sidebar/components/WorkflowEntityCard.tsx

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import {memo, useCallback, useMemo, useState} from "react"
22

33
import {
4-
fullPagePlaygroundEvaluatorsAtom,
4+
nonHumanEvaluatorsAtom,
55
nonArchivedAppWorkflowsAtom,
66
nonArchivedEvaluatorsAtom,
77
parseWorkflowKeyFromUri,
@@ -117,26 +117,24 @@ const WorkflowEntityCard = memo(({collapsed}: WorkflowEntityCardProps) => {
117117
const ctx = useAtomValue(currentWorkflowContextAtom)
118118
const apps = useAtomValue(nonArchivedAppWorkflowsAtom) as readonly Workflow[]
119119
const evaluators = useAtomValue(nonArchivedEvaluatorsAtom) as readonly Workflow[]
120-
// Only evaluators with a real full-page playground belong in the switcher.
121-
// `fullPagePlaygroundEvaluatorsAtom` resolves the type flags from each
122-
// evaluator's LATEST REVISION — the workflow LIST records this card reads
123-
// from `nonArchivedEvaluatorsAtom` carry NO `data.uri` and NO
124-
// `is_feedback`/`is_llm`/`is_code` flags (those live on the revision, not
125-
// the parent artifact). That's why the old `!w.flags?.is_feedback` filter
126-
// never excluded anything and human/feedback evaluators leaked into the
127-
// switcher (QA 2026-06-05). The atom drops human (`is_feedback`) AND
128-
// declarative classifier evaluators (match/exact_match/json_*/etc.) — all
129-
// of which route to an `/apps/<id>/*` destination the guard redirects back
130-
// to /evaluators, so clicking them would be a dead end.
131-
const fullPagePlaygroundEvaluators = useAtomValue(
132-
fullPagePlaygroundEvaluatorsAtom,
133-
) as readonly Workflow[]
120+
// The switcher lists every AUTOMATIC evaluator — LLM, code, AND the
121+
// declarative classifiers (exact match, regex, similarity / semantic
122+
// similarity, json diff, contains json, …). `nonHumanEvaluatorsAtom`
123+
// resolves `is_feedback` from each evaluator's LATEST REVISION — the
124+
// workflow LIST records this card reads from `nonArchivedEvaluatorsAtom`
125+
// carry NO `is_feedback`/`is_llm`/`is_code` flags (those live on the
126+
// revision, not the parent artifact), which is why the old
127+
// `!w.flags?.is_feedback` filter never excluded anything and human
128+
// evaluators leaked in (QA 2026-06-05). It drops ONLY human (`is_feedback`)
129+
// evaluators; navigation lands on the workflow's current sub-page (Overview/
130+
// Evaluations are valid for every evaluator), so matchers no longer dead-end.
131+
const automaticEvaluators = useAtomValue(nonHumanEvaluatorsAtom) as readonly Workflow[]
134132
// Gated by `EVALUATOR_FULL_PAGE_NAV_ENABLED`: while the flag is off, the
135133
// switcher dropdown hides the "Evaluators" group entirely.
136134
const switcherEvaluators: readonly Workflow[] = useMemo(() => {
137135
if (!EVALUATOR_FULL_PAGE_NAV_ENABLED) return EMPTY_WORKFLOWS
138-
return fullPagePlaygroundEvaluators
139-
}, [fullPagePlaygroundEvaluators])
136+
return automaticEvaluators
137+
}, [automaticEvaluators])
140138
const recentAppId = useAtomValue(recentAppIdAtom)
141139
const recentEvaluatorId = useAtomValue(recentEvaluatorIdAtom)
142140
const navigateToWorkflow = useSetAtom(routerAppNavigationAtom)

web/packages/agenta-entities/src/workflow/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ export {
289289
evaluatorsListDataAtom,
290290
nonArchivedEvaluatorsAtom,
291291
fullPagePlaygroundEvaluatorsAtom,
292+
nonHumanEvaluatorsAtom,
292293
// Templates
293294
evaluatorTemplatesQueryAtom,
294295
evaluatorTemplatesDataAtom,

web/packages/agenta-entities/src/workflow/state/evaluatorUtils.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,29 @@ export const fullPagePlaygroundEvaluatorsAtom = atom<Workflow[]>((get) => {
142142
})
143143
})
144144

145+
/**
146+
* Non-archived **automatic** evaluators — i.e. all evaluators except human
147+
* (`is_feedback`) ones. Unlike `fullPagePlaygroundEvaluatorsAtom`, this does
148+
* NOT narrow to evaluators that have a full-page playground, so it includes the
149+
* declarative classifiers too (exact match, regex, similarity / semantic
150+
* similarity, json diff, contains json, …). This is the right list for the
151+
* sidebar workflow switcher, which should surface every automatic evaluator.
152+
*
153+
* `is_feedback` lives on the revision (not the parent artifact), so it's
154+
* resolved from each evaluator's latest revision (batched + cached). An
155+
* evaluator whose latest revision hasn't resolved yet is held back until it
156+
* does, so a human evaluator never briefly leaks into the list.
157+
*/
158+
export const nonHumanEvaluatorsAtom = atom<Workflow[]>((get) => {
159+
const evaluators = get(nonArchivedEvaluatorsAtom)
160+
return evaluators.filter((evaluator) => {
161+
if (!evaluator.id) return false
162+
const revision = get(workflowLatestRevisionQueryAtomFamily(evaluator.id)).data
163+
if (!revision) return false
164+
return !revision.flags?.is_feedback
165+
})
166+
})
167+
145168
/**
146169
* Invalidate the evaluators list cache.
147170
* Call after create/update/archive operations on evaluator workflows.

web/packages/agenta-entities/src/workflow/state/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ export {
159159
evaluatorsListDataAtom,
160160
nonArchivedEvaluatorsAtom,
161161
fullPagePlaygroundEvaluatorsAtom,
162+
nonHumanEvaluatorsAtom,
162163
// Templates
163164
evaluatorTemplatesQueryAtom,
164165
evaluatorTemplatesDataAtom,

0 commit comments

Comments
 (0)