@@ -1155,6 +1155,12 @@ async def _run_ground_truth_optimization(
11551155 sample_passed , optimize_context = self ._apply_duration_gate (sample_passed , optimize_context )
11561156 sample_passed , optimize_context = self ._apply_cost_gate (sample_passed , optimize_context )
11571157
1158+ # Flush gate scores to the API for this sample. Without this,
1159+ # the next sample's "generating" event closes out this record
1160+ # with a status-only PATCH before gate scores are sent, so only
1161+ # the last sample would ever show latency/cost gate entries.
1162+ self ._safe_status_update ("evaluating" , optimize_context , linear_iter )
1163+
11581164 if not sample_passed :
11591165 logger .info (
11601166 "[GT Attempt %d] -> Sample %d/%d FAILED" ,
@@ -2110,21 +2116,23 @@ def _apply_duration_gate(
21102116 When the gate is active (any acceptance statement implies latency optimization),
21112117 evaluates whether the candidate's duration improved by at least
21122118 _DURATION_TOLERANCE vs the baseline. A synthetic ``_latency_gate`` entry is
2113- added to scores with score=1.0 on pass or score=0.0 on fail so the outcome
2114- is visible in the API result and UI.
2119+ always added to scores with score=1.0 on pass or score=0.0 on fail so the
2120+ outcome is visible in the API result and UI for every iteration.
2121+
2122+ The gate score is recorded even when ``passed_so_far`` is False (quality
2123+ judges already failed) so that latency telemetry is visible on all
2124+ iterations, not just passing ones. In that case it is informational only
2125+ and cannot block the iteration further.
21152126
2116- The gate is skipped (no score entry added) when:
2117- - No acceptance statement implies latency optimization.
2118- - ``passed_so_far`` is already False (a prior check failed the sample).
2127+ The gate is skipped entirely (no score entry added) only when no acceptance
2128+ statement implies latency optimization.
21192129
21202130 :param passed_so_far: Whether all prior checks for this sample passed.
21212131 :param ctx: Current optimization context.
21222132 :return: (passed, updated_ctx) where passed reflects gate outcome.
21232133 """
21242134 if not _acceptance_criteria_implies_duration_optimization (self ._options .judges ):
21252135 return passed_so_far , ctx
2126- if not passed_so_far :
2127- return passed_so_far , ctx
21282136 passed = self ._evaluate_duration (ctx )
21292137 if passed :
21302138 if self ._baseline_duration_ms is not None and ctx .duration_ms is not None :
@@ -2149,9 +2157,13 @@ def _apply_duration_gate(
21492157 score = 0.0
21502158 ctx = dataclasses .replace (
21512159 ctx ,
2152- scores = {** ctx .scores , "_latency_gate" : JudgeResult (score = score , rationale = rationale )},
2160+ scores = {** ctx .scores , "_latency_gate" : JudgeResult (
2161+ score = score ,
2162+ rationale = rationale ,
2163+ duration_ms = ctx .duration_ms ,
2164+ )},
21532165 )
2154- return passed , ctx
2166+ return passed_so_far and passed , ctx
21552167
21562168 def _apply_cost_gate (
21572169 self , passed_so_far : bool , ctx : OptimizationContext
@@ -2160,21 +2172,23 @@ def _apply_cost_gate(
21602172
21612173 When the gate is active (any acceptance statement implies cost optimization),
21622174 evaluates whether the candidate's estimated cost improved by at least
2163- _COST_TOLERANCE vs the baseline. A synthetic ``_cost_gate`` entry is
2175+ _COST_TOLERANCE vs the baseline. A synthetic ``_cost_gate`` entry is always
21642176 added to scores with score=1.0 on pass or score=0.0 on fail.
21652177
2166- The gate is skipped (no score entry added) when:
2167- - No acceptance statement implies cost optimization.
2168- - ``passed_so_far`` is already False (a prior check failed the sample).
2178+ The gate score is recorded even when ``passed_so_far`` is False (quality
2179+ judges already failed) so that cost telemetry is visible on all iterations,
2180+ not just passing ones. In that case it is informational only and cannot
2181+ block the iteration further.
2182+
2183+ The gate is skipped entirely (no score entry added) only when no acceptance
2184+ statement implies cost optimization.
21692185
21702186 :param passed_so_far: Whether all prior checks for this sample passed.
21712187 :param ctx: Current optimization context.
21722188 :return: (passed, updated_ctx) where passed reflects gate outcome.
21732189 """
21742190 if not _acceptance_criteria_implies_cost_optimization (self ._options .judges ):
21752191 return passed_so_far , ctx
2176- if not passed_so_far :
2177- return passed_so_far , ctx
21782192 passed = self ._evaluate_cost (ctx )
21792193 if passed :
21802194 if self ._baseline_cost_usd is not None and ctx .estimated_cost_usd is not None :
@@ -2199,9 +2213,14 @@ def _apply_cost_gate(
21992213 score = 0.0
22002214 ctx = dataclasses .replace (
22012215 ctx ,
2202- scores = {** ctx .scores , "_cost_gate" : JudgeResult (score = score , rationale = rationale )},
2216+ scores = {** ctx .scores , "_cost_gate" : JudgeResult (
2217+ score = score ,
2218+ rationale = rationale ,
2219+ duration_ms = ctx .duration_ms ,
2220+ estimated_cost_usd = ctx .estimated_cost_usd ,
2221+ )},
22032222 )
2204- return passed , ctx
2223+ return passed_so_far and passed , ctx
22052224
22062225 def _handle_success (
22072226 self , optimize_context : OptimizationContext , iteration : int
0 commit comments