@@ -47,6 +47,7 @@ import { Resource } from "@opencode-ai/console-resource"
4747import { i18n , type Key } from "~/i18n"
4848import { localeFromRequest } from "~/lib/language"
4949import { createModelTpmLimiter } from "./modelTpmLimiter"
50+ import { createModelTpsLimiter } from "./modelTpsLimiter"
5051
5152type ZenData = Awaited < ReturnType < typeof ZenData . list > >
5253type RetryOptions = {
@@ -129,6 +130,8 @@ export async function handler(
129130 logger . metric ( { source : billingSource } )
130131 const modelTpmLimiter = createModelTpmLimiter ( modelInfo . providers )
131132 const modelTpmLimits = await modelTpmLimiter ?. check ( )
133+ const modelTpsLimiter = createModelTpsLimiter ( modelInfo . providers )
134+ const modelTpsLimits = await modelTpsLimiter ?. check ( )
132135
133136 const retriableRequest = async ( retry : RetryOptions = { excludeProviders : [ ] , retryCount : 0 } ) => {
134137 const providerInfo = selectProvider (
@@ -142,6 +145,7 @@ export async function handler(
142145 retry ,
143146 stickyProvider ,
144147 modelTpmLimits ,
148+ modelTpsLimits ,
145149 )
146150 validateModelSettings ( billingSource , authInfo )
147151 updateProviderKey ( authInfo , providerInfo )
@@ -294,14 +298,17 @@ export async function handler(
294298
295299 let buffer = ""
296300 let responseLength = 0
301+ let timestampFirstByte = 0
302+ let timestampLastByte = 0
297303
298304 function pump ( ) : Promise < void > {
299305 return (
300306 reader ?. read ( ) . then ( async ( { done, value : rawValue } ) => {
301307 if ( done ) {
308+ const timestampLastByte = Date . now ( )
302309 logger . metric ( {
303310 response_length : responseLength ,
304- "timestamp.last_byte" : Date . now ( ) ,
311+ "timestamp.last_byte" : timestampLastByte ,
305312 } )
306313 dataDumper ?. flush ( )
307314 await rateLimiter ?. track ( )
@@ -311,6 +318,13 @@ export async function handler(
311318 const costInfo = calculateCost ( modelInfo , usageInfo )
312319 await trialLimiter ?. track ( usageInfo )
313320 await modelTpmLimiter ?. track ( providerInfo . id , providerInfo . model , usageInfo )
321+ await modelTpsLimiter ?. track (
322+ providerInfo . id ,
323+ providerInfo . model ,
324+ timestampFirstByte ,
325+ timestampLastByte ,
326+ usageInfo ,
327+ )
314328 await trackUsage ( sessionId , billingSource , authInfo , modelInfo , providerInfo , usageInfo , costInfo )
315329 await reload ( billingSource , authInfo , costInfo )
316330 const cost = calculateOccurredCost ( billingSource , costInfo )
@@ -321,10 +335,10 @@ export async function handler(
321335 }
322336
323337 if ( responseLength === 0 ) {
324- const now = Date . now ( )
338+ timestampFirstByte = Date . now ( )
325339 logger . metric ( {
326- time_to_first_byte : now - startTimestamp ,
327- "timestamp.first_byte" : now ,
340+ time_to_first_byte : timestampFirstByte - startTimestamp ,
341+ "timestamp.first_byte" : timestampFirstByte ,
328342 } )
329343 }
330344
@@ -478,6 +492,7 @@ export async function handler(
478492 retry : RetryOptions ,
479493 stickyProvider : string | undefined ,
480494 modelTpmLimits : Record < string , number > | undefined ,
495+ modelTpsLimits : Record < string , boolean > | undefined ,
481496 ) {
482497 const modelProvider = ( ( ) => {
483498 // Byok is top priority b/c if user set their own API key, we should use it
@@ -509,6 +524,11 @@ export async function handler(
509524 const usage = modelTpmLimits ?. [ `${ provider . id } /${ provider . model } ` ] ?? 0
510525 return usage < provider . tpmLimit * 1_000_000
511526 } )
527+ . filter ( ( provider ) => {
528+ if ( ! provider . tpsGoal ) return true
529+ const isLowTps = modelTpsLimits ?. [ `${ provider . id } /${ provider . model } ` ] ?? false
530+ return ! isLowTps
531+ } )
512532 . map ( ( provider ) => {
513533 topPriority = Math . min ( topPriority , provider . priority )
514534 return provider
0 commit comments