@@ -1445,21 +1445,16 @@ async def _run_ground_truth_optimization(
14451445 attempt ,
14461446 n ,
14471447 )
1448- self ._last_run_succeeded = True
1449- self ._last_succeeded_context = last_ctx
1450- self ._safe_status_update ("success" , last_ctx , last_ctx .iteration )
1451- if self ._options .on_passing_result :
1452- try :
1453- self ._options .on_passing_result (last_ctx )
1454- except Exception :
1455- logger .exception (
1456- "[GT Attempt %d] -> on_passing_result callback failed" , attempt
1457- )
14581448 # Phase 2: optimize model/params on the frozen winning variation.
14591449 if (
14601450 self ._options .latency_optimization
14611451 or self ._options .token_optimization
14621452 ) and not self ._is_token_limit_exceeded ():
1453+ # Record Phase 1 success without firing on_passing_result yet;
1454+ # we fire it once below with the true final winner.
1455+ self ._last_run_succeeded = True
1456+ self ._last_succeeded_context = last_ctx
1457+ self ._safe_status_update ("success" , last_ctx , last_ctx .iteration )
14631458 phase1_winner = self ._last_succeeded_context
14641459 await self ._run_cost_latency_phase (
14651460 last_ctx ,
@@ -1469,11 +1464,28 @@ async def _run_ground_truth_optimization(
14691464 # No Phase 2 candidate won; restore the Phase 1 winner.
14701465 self ._last_run_succeeded = True
14711466 self ._last_succeeded_context = phase1_winner
1472- elif self ._last_succeeded_context is not phase1_winner :
1473- # Phase 2 selected a better model; return that context so
1474- # callers (including auto_commit) see the actual final winner
1475- # rather than the stale Phase 1 GT batch results.
1476- return [self ._last_succeeded_context ]
1467+ else :
1468+ self ._last_run_succeeded = True
1469+ self ._last_succeeded_context = last_ctx
1470+ self ._safe_status_update ("success" , last_ctx , last_ctx .iteration )
1471+
1472+ # Fire on_passing_result exactly once with the true final winner.
1473+ final_winner = self ._last_succeeded_context
1474+ if final_winner and self ._options .on_passing_result :
1475+ try :
1476+ self ._options .on_passing_result (final_winner )
1477+ except Exception :
1478+ logger .exception (
1479+ "[GT Attempt %d] -> on_passing_result callback failed" , attempt
1480+ )
1481+
1482+ if (
1483+ self ._last_succeeded_context is not None
1484+ and self ._last_succeeded_context is not last_ctx
1485+ ):
1486+ # Phase 2 selected a better model; return that context so
1487+ # callers (including auto_commit) see the actual final winner.
1488+ return [self ._last_succeeded_context ]
14771489 return attempt_results
14781490
14791491 # We've hit max attempts for the batches, bail at this point
@@ -2193,8 +2205,6 @@ async def _execute_agent_turn(
21932205 )
21942206 except Exception :
21952207 logger .exception ("[Iteration %d] -> Agent call failed" , iteration )
2196- if self ._options .on_failing_result :
2197- self ._options .on_failing_result (optimize_context )
21982208 raise
21992209
22002210 scores : Dict [str , JudgeResult ] = {}
@@ -2495,23 +2505,29 @@ def _apply_cost_gate(
24952505 return passed_so_far and passed , ctx
24962506
24972507 def _handle_success (
2498- self , optimize_context : OptimizationContext , iteration : int
2508+ self ,
2509+ optimize_context : OptimizationContext ,
2510+ iteration : int ,
2511+ suppress_user_callbacks : bool = False ,
24992512 ) -> Any :
25002513 """
25012514 Handle a successful optimization result.
25022515
2503- Fires the "success" status update, invokes on_passing_result if set,
2504- and returns the winning OptimizationContext.
2516+ Fires the "success" status update and (unless suppressed) invokes
2517+ on_passing_result. Pass suppress_user_callbacks=True from Phase 2 so
2518+ the API record is updated without firing on_passing_result a second time
2519+ — the caller is responsible for firing it once with the true final winner.
25052520
25062521 :param optimize_context: The context from the passing iteration
25072522 :param iteration: Current iteration number for logging
2523+ :param suppress_user_callbacks: When True, skip on_passing_result.
25082524 :return: The passing OptimizationContext
25092525 """
25102526 logger .info ("[Iteration %d] -> Optimization succeeded" , iteration )
25112527 self ._last_run_succeeded = True
25122528 self ._last_succeeded_context = optimize_context
25132529 self ._safe_status_update ("success" , optimize_context , iteration )
2514- if self ._options .on_passing_result :
2530+ if not suppress_user_callbacks and self ._options .on_passing_result :
25152531 try :
25162532 self ._options .on_passing_result (optimize_context )
25172533 except Exception :
@@ -2566,13 +2582,15 @@ def _pick_best_candidate(
25662582 def _score (ctx : OptimizationContext ) -> float :
25672583 total = 0.0
25682584 if (
2569- ctx .duration_ms is not None
2585+ self ._options .latency_optimization
2586+ and ctx .duration_ms is not None
25702587 and self ._baseline_duration_ms is not None
25712588 and self ._baseline_duration_ms > 0
25722589 ):
25732590 total += ctx .duration_ms / self ._baseline_duration_ms
25742591 if (
2575- ctx .estimated_cost_usd is not None
2592+ self ._options .token_optimization
2593+ and ctx .estimated_cost_usd is not None
25762594 and self ._baseline_cost_usd is not None
25772595 and self ._baseline_cost_usd > 0
25782596 ):
@@ -2706,7 +2724,9 @@ async def _run_cost_latency_phase(
27062724
27072725 if candidates :
27082726 best = self ._pick_best_candidate (candidates )
2709- self ._handle_success (best , best .iteration )
2727+ # Suppress on_passing_result here — the caller fires it once with the
2728+ # true final winner after Phase 2 returns, so it is never double-fired.
2729+ self ._handle_success (best , best .iteration , suppress_user_callbacks = True )
27102730 logger .info (
27112731 "[Phase 2] -> Best candidate selected: model=%s, duration_ms=%s, cost=%s" ,
27122732 best .current_model ,
@@ -3080,17 +3100,34 @@ async def _run_optimization(
30803100 optimize_context , iteration
30813101 )
30823102 if all_valid :
3083- self ._handle_success (optimize_context , iteration )
3084- phase1_winner = self ._last_succeeded_context
3103+ # Suppress on_passing_result in _handle_success; we fire it
3104+ # exactly once below with the true final winner so that Phase 2
3105+ # (if it runs) cannot cause a double callback.
3106+ self ._handle_success (
3107+ optimize_context , iteration , suppress_user_callbacks = True
3108+ )
30853109 if (
30863110 self ._options .latency_optimization
30873111 or self ._options .token_optimization
30883112 ) and not self ._is_token_limit_exceeded ():
3113+ phase1_winner = self ._last_succeeded_context
30893114 await self ._run_cost_latency_phase (optimize_context , iteration )
30903115 if self ._last_succeeded_context is None :
30913116 self ._last_run_succeeded = True
30923117 self ._last_succeeded_context = phase1_winner
3093- return self ._last_succeeded_context
3118+ # Fire on_passing_result exactly once with the true final winner
3119+ # (Phase 1 winner if Phase 2 was skipped/found nothing better,
3120+ # or the Phase 2 best candidate otherwise).
3121+ final_winner = self ._last_succeeded_context
3122+ if final_winner and self ._options .on_passing_result :
3123+ try :
3124+ self ._options .on_passing_result (final_winner )
3125+ except Exception :
3126+ logger .exception (
3127+ "[Iteration %d] -> on_passing_result callback failed" ,
3128+ iteration ,
3129+ )
3130+ return final_winner
30943131 if self ._is_token_limit_exceeded ():
30953132 return self ._handle_failure (last_ctx , iteration )
30963133 # Validation failed — treat as a normal failed attempt.
0 commit comments