@@ -83,7 +83,11 @@ import {
8383 newTestcaseDataHashAtom ,
8484} from "../execution/selectors"
8585import { pruneDanglingConnections } from "../helpers/connectionGraph"
86- import { reconcileRowDataForEntity , resolveEntityInputContract } from "../helpers/entityInputContract"
86+ import {
87+ collectDownstreamReferencedColumns ,
88+ reconcileRowDataForEntity ,
89+ resolveEntityInputContract ,
90+ } from "../helpers/entityInputContract"
8791import { extractAndLoadChatMessagesAtom } from "../helpers/extractAndLoadChatMessages"
8892import { normalizeTestcaseRowsForLoad } from "../helpers/testcaseRowNormalization"
8993import type { EntitySelection , PlaygroundNode , RunnableType } from "../types"
@@ -2133,88 +2137,113 @@ function relinkLoadableSessions(
21332137 }
21342138}
21352139
2140+ type PruneStatus = "acted" | "noop" | "unresolved"
2141+
21362142/**
2137- * Reconcile every testcase row against the new primary entity's input
2138- * contract.
2143+ * Reconcile every testcase row against the given entity's input contract.
21392144 *
21402145 * Why this exists: the testcase row store (`testcaseMolecule`) is shared
21412146 * across loadables. When the user swaps the primary app in the LLM-as-a-
2142- * judge playground (anchor swap in `setEntityIdsAtom`), the row data keeps
2143- * every key the *previous* primary populated — chat `messages`, completion
2144- * template variables that the new app doesn't declare, etc. Without
2145- * reconciliation, those stale keys leak into the new app's request body
2146- * via the downstream "spread all keys" fallbacks.
2147- *
2148- * Handling at the row layer (here) makes the UI immediately reflect the new
2149- * app's variables and is the primary fix; execution-time reconciliation in
2150- * `executionRunner.ts` is only a hydration-window safety net.
2147+ * judge playground, the row data keeps every key the *previous* primary
2148+ * populated — chat `messages`, completion template variables that the new
2149+ * app doesn't declare, etc. Without reconciliation, those stale keys leak
2150+ * into the new app's request body and the downstream evaluator's envelope.
21512151 *
21522152 * Allow-list source is `inputPorts` (via `resolveEntityInputContract`), NOT
21532153 * `inputSchema.properties` — completion apps surface their variables as
21542154 * prompt template placeholders through `inputPorts` and have an EMPTY static
21552155 * input schema, so schema-based filtering keeps everything. Policy:
2156- * - App with a resolved contract → strict: keep only declared keys.
2156+ * - App with a resolved contract → strict: keep only declared (or
2157+ * downstream-evaluator-protected) keys.
21572158 * - Evaluator → chat-transport only: evaluators spread extra testcase
21582159 * columns, so we never strict-filter them.
2159- * - Unresolved contract (ports mid-hydration) → skip; the execution-time
2160- * reconciliation catches it. A console.warn is emitted so we can see
2161- * when the hydration window is hit.
2160+ * - Unresolved contract (ports mid-hydration) → no-op; returns
2161+ * `"unresolved"` so the caller can retry once the contract resolves. The
2162+ * run-time reconciliation in `webWorkerIntegration` is the backstop.
2163+ *
2164+ * Columns referenced by downstream evaluator `<input>_key` settings (e.g.
2165+ * `correct_answer_key → ground_truth`) are protected so a strict clean
2166+ * against the app contract doesn't drop intentional evaluation inputs.
21622167 *
21632168 * Mutations go through `testcaseMolecule.actions.batchUpdate` setting stale
21642169 * keys to `undefined`, which the store's update reducer interprets as a
21652170 * delete. Drafts are created as needed (one per affected row).
21662171 */
2167- function pruneTestcaseRowsForEntity ( get : Getter , set : Setter , entityId : string ) {
2172+ function pruneTestcaseRowsForEntity ( get : Getter , set : Setter , entityId : string ) : PruneStatus {
21682173 const contract = resolveEntityInputContract ( get , entityId )
21692174
21702175 // Unresolved, non-evaluator contract → we can't strict-filter safely yet.
21712176 // The evaluator path is always "resolved enough" (chat-transport strip
21722177 // works without a variable list), so only bail for non-evaluator apps.
21732178 if ( ! contract . isEvaluator && ! contract . resolved ) {
2174- console . warn ( "[playgroundController.prune] contract-not-resolved on swap" , {
2175- entityId,
2176- // Without resolved inputPorts we can't decide what to drop. The
2177- // execution-time reconciliation catches the leak window for now.
2178- } )
2179- return
2179+ return "unresolved"
21802180 }
21812181
21822182 const displayRowIds = get ( testcaseMolecule . atoms . displayRowIds )
2183- if ( ! Array . isArray ( displayRowIds ) || displayRowIds . length === 0 ) return
2183+ if ( ! Array . isArray ( displayRowIds ) || displayRowIds . length === 0 ) return "noop"
2184+
2185+ const protectedColumns = collectDownstreamReferencedColumns ( get , get ( playgroundNodesAtom ) )
21842186
21852187 const updates : { id : string ; updates : { data : Record < string , unknown > } } [ ] = [ ]
2186- const droppedPerRow : Record < string , string [ ] > = { }
2187- let strategyUsed : string | null = null
21882188
21892189 for ( const rowId of displayRowIds ) {
21902190 const row = get ( testcaseMolecule . data ( rowId ) )
21912191 const data = ( row as { data ?: Record < string , unknown > } | null ) ?. data
21922192 if ( ! data || typeof data !== "object" ) continue
21932193
2194- const { dropped, strategy} = reconcileRowDataForEntity ( get , entityId , data )
2194+ const { dropped} = reconcileRowDataForEntity ( get , entityId , data , {
2195+ protectedKeys : protectedColumns ,
2196+ } )
21952197 if ( dropped . length === 0 ) continue
2196- strategyUsed = strategy
21972198
21982199 const undefinedData : Record < string , unknown > = { }
21992200 for ( const key of dropped ) {
22002201 undefinedData [ key ] = undefined
22012202 }
22022203 updates . push ( { id : rowId , updates : { data : undefinedData } } )
2203- droppedPerRow [ rowId ] = dropped
22042204 }
22052205
2206- if ( updates . length === 0 ) return
2207-
2208- console . warn ( "[playgroundController.prune] dropped stale keys after primary swap" , {
2209- entityId,
2210- rowsAffected : updates . length ,
2211- strategy : strategyUsed ,
2212- droppedPerRow,
2213- } )
2206+ if ( updates . length === 0 ) return "noop"
22142207
22152208 set ( testcaseMolecule . actions . batchUpdate , updates )
2209+ return "acted"
22162210}
22172211
2212+ /**
2213+ * Reconcile all testcase rows against the CURRENT primary (depth-0) entity's
2214+ * input contract, on demand — call this right after a primary swap so the
2215+ * shared row is cleaned the instant the app changes, without waiting for a
2216+ * run. The run-time reconciliation in `webWorkerIntegration` is the backstop.
2217+ *
2218+ * Hydration handling: the new primary's input ports may not be resolved at
2219+ * call time (the workflow is still loading). When the prune reports
2220+ * `"unresolved"` AND the entity isn't loaded yet, we subscribe to its
2221+ * `inputPorts` and retry once they resolve, then unsubscribe. If the entity
2222+ * is already loaded but has no resolvable variables, there's nothing to wait
2223+ * for, so we don't subscribe (avoids a dangling subscription).
2224+ */
2225+ const reconcileRowsToPrimaryAtom = atom ( null , ( get , set ) => {
2226+ const nodes = get ( playgroundNodesAtom )
2227+ const primary = nodes . find ( ( node ) => node . depth === 0 )
2228+ if ( ! primary ) return
2229+ const entityId = primary . entityId
2230+
2231+ const status = pruneTestcaseRowsForEntity ( get , set , entityId )
2232+ if ( status !== "unresolved" ) return
2233+
2234+ // Unresolved: either the workflow is still loading, or it's a genuinely
2235+ // no-variable app. Only wait if it hasn't loaded yet.
2236+ const entityLoaded = get ( workflowMolecule . selectors . data ( entityId ) ) != null
2237+ if ( entityLoaded ) return
2238+
2239+ const store = getDefaultStore ( )
2240+ const unsub = store . sub ( workflowMolecule . selectors . inputPorts ( entityId ) , ( ) => {
2241+ const retryStatus = pruneTestcaseRowsForEntity ( store . get , store . set , entityId )
2242+ const nowLoaded = store . get ( workflowMolecule . selectors . data ( entityId ) ) != null
2243+ if ( retryStatus !== "unresolved" || nowLoaded ) unsub ( )
2244+ } )
2245+ } )
2246+
22182247/**
22192248 * Switch one entity for another in the displayed selection.
22202249 * Handles both single and comparison mode. The loadable-scoped re-link
@@ -2336,6 +2365,13 @@ export const playgroundController = {
23362365 /** Change the primary node */
23372366 changePrimaryNode : changePrimaryNodeAtom ,
23382367
2368+ /**
2369+ * Reconcile all testcase rows against the current primary entity's
2370+ * input contract. Call after a primary swap to clean stale keys from a
2371+ * previous app off the shared row immediately (#4525 / AGE-3793).
2372+ */
2373+ reconcileRowsToPrimary : reconcileRowsToPrimaryAtom ,
2374+
23392375 /** Disconnect from testset and reset to local mode */
23402376 disconnectAndResetToLocal : disconnectAndResetToLocalAtom ,
23412377
0 commit comments