File tree Expand file tree Collapse file tree 4 files changed +13
-7
lines changed
Expand file tree Collapse file tree 4 files changed +13
-7
lines changed Original file line number Diff line number Diff line change @@ -285,7 +285,6 @@ async function main() {
285285 } ) ;
286286
287287 const summaries = [ summaryA ] ;
288- let passesAllGates = summaryA . passesGate ;
289288
290289 if ( codebaseB ) {
291290 const summaryB = await runSingleEvaluation ( {
@@ -296,8 +295,6 @@ async function main() {
296295 } ) ;
297296
298297 summaries . push ( summaryB ) ;
299- passesAllGates =
300- mode === 'discovery' ? passesAllGates : passesAllGates && summaryB . passesGate ;
301298 }
302299
303300 if ( mode === 'discovery' ) {
@@ -320,6 +317,7 @@ async function main() {
320317 process . exit ( gate . status === 'failed' ? 1 : 0 ) ;
321318 }
322319
320+ const passesAllGates = summaries . every ( ( summary ) => summary . passesGate ) ;
323321 printCombinedSummary ( summaries , mode ) ;
324322 process . exit ( passesAllGates ? 0 : 1 ) ;
325323}
Original file line number Diff line number Diff line change @@ -240,7 +240,10 @@ function compareMetric(
240240 comparatorValue : number | null ,
241241 metric : DiscoveryMetricName
242242) : DiscoveryMetricComparison {
243- const lowerIsBetter = metric === 'averageEstimatedTokens' || metric === 'averageFirstRelevantHit' ;
243+ const lowerIsBetter =
244+ metric === 'averagePayloadBytes' ||
245+ metric === 'averageEstimatedTokens' ||
246+ metric === 'averageFirstRelevantHit' ;
244247 const passes =
245248 actualValue !== null &&
246249 comparatorValue !== null &&
@@ -260,7 +263,10 @@ function compareMetricWithinTolerance(
260263 metric : DiscoveryMetricName ,
261264 tolerancePercent : number
262265) : DiscoveryMetricComparison {
263- const lowerIsBetter = metric === 'averageFirstRelevantHit' ;
266+ const lowerIsBetter =
267+ metric === 'averagePayloadBytes' ||
268+ metric === 'averageEstimatedTokens' ||
269+ metric === 'averageFirstRelevantHit' ;
264270 const multiplier = 1 + tolerancePercent / 100 ;
265271 const passes =
266272 actualValue !== null &&
Original file line number Diff line number Diff line change @@ -151,6 +151,7 @@ export interface DiscoverySurfaceResult {
151151
152152export type DiscoveryMetricName =
153153 | 'averageUsefulness'
154+ | 'averagePayloadBytes'
154155 | 'averageEstimatedTokens'
155156 | 'averageFirstRelevantHit'
156157 | 'bestExampleUsefulnessRate' ;
@@ -198,6 +199,7 @@ export interface DiscoveryBenchmarkProtocol {
198199
199200export interface DiscoveryComparatorMetrics {
200201 averageUsefulness ?: number | null ;
202+ averagePayloadBytes ?: number | null ;
201203 averageEstimatedTokens ?: number | null ;
202204 averageFirstRelevantHit ?: number | null ;
203205 bestExampleUsefulnessRate ?: number | null ;
Original file line number Diff line number Diff line change 3939 }
4040 ],
4141 "metrics" : {
42- "payloadCost" : [" payloadBytes " , " estimatedTokens " ],
43- "usefulness" : [" usefulnessScore " , " firstRelevantHit " , " bestExampleUseful " ]
42+ "payloadCost" : [" averagePayloadBytes " , " averageEstimatedTokens " ],
43+ "usefulness" : [" averageUsefulness " , " averageFirstRelevantHit " , " bestExampleUsefulnessRate " ]
4444 },
4545 "fairnessRules" : [
4646 " Use only current shipped codebase-context surfaces in the direct-tool lane." ,
You can’t perform that action at this time.
0 commit comments