@@ -158,6 +158,7 @@ import { paintUiText, formatUiBadge, formatUiHeader, formatUiItem, formatUiKeyVa
158158import {
159159 buildBeginnerChecklist ,
160160 buildBeginnerDoctorFindings ,
161+ formatPromptCacheSnapshot ,
161162 recommendBeginnerNextAction ,
162163 summarizeBeginnerAccounts ,
163164 type BeginnerAccountSnapshot ,
@@ -278,6 +279,9 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
278279 lastRequestAt : number | null ;
279280 lastError : string | null ;
280281 lastErrorCategory : string | null ;
282+ promptCacheEnabledRequests : number ;
283+ promptCacheMissingRequests : number ;
284+ lastPromptCacheKey : string | null ;
281285 lastSelectedAccountIndex : number | null ;
282286 lastQuotaKey : string | null ;
283287 lastSelectionSnapshot : SelectionSnapshot | null ;
@@ -304,6 +308,9 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
304308 lastRequestAt : null ,
305309 lastError : null ,
306310 lastErrorCategory : null ,
311+ promptCacheEnabledRequests : 0 ,
312+ promptCacheMissingRequests : 0 ,
313+ lastPromptCacheKey : null ,
307314 lastSelectedAccountIndex : null ,
308315 lastQuotaKey : null ,
309316 lastSelectionSnapshot : null ,
@@ -1373,6 +1380,9 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
13731380 serverErrors : runtimeMetrics . serverErrors ,
13741381 networkErrors : runtimeMetrics . networkErrors ,
13751382 lastErrorCategory : runtimeMetrics . lastErrorCategory ,
1383+ promptCacheEnabledRequests : runtimeMetrics . promptCacheEnabledRequests ,
1384+ promptCacheMissingRequests : runtimeMetrics . promptCacheMissingRequests ,
1385+ lastPromptCacheKey : runtimeMetrics . lastPromptCacheKey ,
13761386 } ) ;
13771387
13781388 const formatDoctorSeverity = (
@@ -2025,6 +2035,12 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
20252035 threadIdCandidate ? `${ threadIdCandidate } :${ Date . now ( ) } ` : undefined ,
20262036 ) ;
20272037 runtimeMetrics . lastRequestAt = Date . now ( ) ;
2038+ runtimeMetrics . lastPromptCacheKey = promptCacheKey ?? null ;
2039+ if ( promptCacheKey ) {
2040+ runtimeMetrics . promptCacheEnabledRequests ++ ;
2041+ } else {
2042+ runtimeMetrics . promptCacheMissingRequests ++ ;
2043+ }
20282044 const retryBudget = new RetryBudgetTracker ( retryBudgetLimits ) ;
20292045 const consumeRetryBudget = (
20302046 bucket : RetryBudgetClass ,
@@ -2313,20 +2329,31 @@ while (attempted.size < Math.max(1, accountCount)) {
23132329 : null ;
23142330
23152331 if ( abortSignal ?. aborted ) {
2316- clearTimeout ( fetchTimeoutId ) ;
2317- fetchController . abort ( abortSignal . reason ?? new Error ( "Aborted by user" ) ) ;
2318- } else if ( abortSignal && onUserAbort ) {
2319- abortSignal . addEventListener ( "abort" , onUserAbort , { once : true } ) ;
2320- }
2332+ clearTimeout ( fetchTimeoutId ) ;
2333+ fetchController . abort ( abortSignal . reason ?? new Error ( "Aborted by user" ) ) ;
2334+ } else if ( abortSignal && onUserAbort ) {
2335+ abortSignal . addEventListener ( "abort" , onUserAbort , { once : true } ) ;
2336+ }
23212337
2322- try {
2338+ try {
2339+ // Request metrics are tracked at the fetch boundary, so retries and
2340+ // account rotation are counted consistently. These increments are
2341+ // in-memory only and run on Node's single-threaded event loop, so no
2342+ // filesystem locking or token-redaction concerns are introduced here.
23232343 runtimeMetrics . totalRequests ++ ;
23242344 response = await fetch ( url , {
23252345 ...requestInit ,
23262346 headers,
23272347 signal : fetchController . signal ,
23282348 } ) ;
2329- } catch ( networkError ) {
2349+ } catch ( networkError ) {
2350+ if ( abortSignal ?. aborted && fetchController . signal . aborted ) {
2351+ accountManager . refundToken ( account , modelFamily , model ) ;
2352+ if ( networkError instanceof Error ) {
2353+ throw networkError ;
2354+ }
2355+ throw new Error ( String ( networkError ) ) ;
2356+ }
23302357 const errorMsg = networkError instanceof Error ? networkError . message : String ( networkError ) ;
23312358 logWarn ( `Network error for account ${ account . index + 1 } : ${ errorMsg } ` ) ;
23322359 if (
@@ -2359,21 +2386,21 @@ while (attempted.size < Math.max(1, accountCount)) {
23592386 accountManager . refundToken ( account , modelFamily , model ) ;
23602387 accountManager . recordFailure ( account , modelFamily , model ) ;
23612388 break ;
2362- } finally {
2363- clearTimeout ( fetchTimeoutId ) ;
2364- if ( abortSignal && onUserAbort ) {
2365- abortSignal . removeEventListener ( "abort" , onUserAbort ) ;
2366- }
2389+ } finally {
2390+ clearTimeout ( fetchTimeoutId ) ;
2391+ if ( abortSignal && onUserAbort ) {
2392+ abortSignal . removeEventListener ( "abort" , onUserAbort ) ;
23672393 }
2368- const fetchLatencyMs = Math . round ( performance . now ( ) - fetchStart ) ;
2369-
2370- logRequest ( LOG_STAGES . RESPONSE , {
2371- status : response . status ,
2372- ok : response . ok ,
2373- statusText : response . statusText ,
2374- latencyMs : fetchLatencyMs ,
2375- headers : Object . fromEntries ( response . headers . entries ( ) ) ,
2376- } ) ;
2394+ }
2395+ const fetchLatencyMs = Math . round ( performance . now ( ) - fetchStart ) ;
2396+
2397+ logRequest ( LOG_STAGES . RESPONSE , {
2398+ status : response . status ,
2399+ ok : response . ok ,
2400+ statusText : response . statusText ,
2401+ latencyMs : fetchLatencyMs ,
2402+ headers : Object . fromEntries ( response . headers . entries ( ) ) ,
2403+ } ) ;
23772404
23782405 if ( ! response . ok ) {
23792406 const contextOverflowResult = await handleContextOverflow ( response , model ) ;
@@ -5197,6 +5224,14 @@ while (attempted.size < Math.max(1, accountCount)) {
51975224 "muted" ,
51985225 ) ,
51995226 ) ;
5227+ lines . push (
5228+ formatUiKeyValue (
5229+ ui ,
5230+ "Prompt cache" ,
5231+ formatPromptCacheSnapshot ( runtime ) ,
5232+ "muted" ,
5233+ ) ,
5234+ ) ;
52005235 }
52015236
52025237 return lines . join ( "\n" ) ;
@@ -5236,6 +5271,9 @@ while (attempted.size < Math.max(1, accountCount)) {
52365271 lines . push (
52375272 ` Runtime failures: failed=${ runtime . failedRequests } , rateLimited=${ runtime . rateLimitedResponses } , authRefreshFailed=${ runtime . authRefreshFailures } , server=${ runtime . serverErrors } , network=${ runtime . networkErrors } ` ,
52385273 ) ;
5274+ lines . push (
5275+ ` Prompt cache: ${ formatPromptCacheSnapshot ( runtime ) } ` ,
5276+ ) ;
52395277 }
52405278 return lines . join ( "\n" ) ;
52415279 } ,
0 commit comments