@@ -614,6 +614,85 @@ async function patchPayloadMetadata(
614614 }
615615}
616616
617+ /**
618+ * Captured autorouter metadata per-session, populated by the onResponse callback
619+ * installed in `buildWrapper`. Exposed via the `taas.autorouter.lastRoute` gateway
620+ * RPC so Alien AI Studio (or any other client) can pull the latest TaaS routing
621+ * decision for a session — including the actual model chosen by the autorouter,
622+ * the algorithm used, the source of that algorithm (org/dept/key/user/system),
623+ * the thinking level applied, and the chosen model's context window.
624+ *
625+ * The map is keyed by the affinity session ID we already derive in
626+ * `resolveSessionId(ctx.workspaceDir)`. Stored values are bounded — see
627+ * `LAST_ROUTE_LIMIT` — to avoid unbounded growth in long-lived gateways.
628+ */
629+ type AutorouterCapture = {
630+ sessionId : string
631+ capturedAt : number
632+ autorouterModel : string | null
633+ autorouterAlgo : string | null
634+ autorouterAlgoSource : string | null
635+ thinkingApplied : string | null
636+ routedContextWindow : number | null
637+ }
638+
639+ const LAST_ROUTE_LIMIT = 256
640+ const lastRouteBySessionId = new Map < string , AutorouterCapture > ( )
641+
642+ function pruneLastRouteMap ( ) : void {
643+ if ( lastRouteBySessionId . size <= LAST_ROUTE_LIMIT ) return
644+ // Drop oldest entries by capturedAt ascending until we're back under the cap.
645+ const entries = [ ...lastRouteBySessionId . entries ( ) ] . sort (
646+ ( a , b ) => a [ 1 ] . capturedAt - b [ 1 ] . capturedAt
647+ )
648+ const toDrop = entries . length - LAST_ROUTE_LIMIT
649+ for ( let i = 0 ; i < toDrop ; i ++ ) {
650+ lastRouteBySessionId . delete ( entries [ i ] [ 0 ] )
651+ }
652+ }
653+
654+ function captureAutorouterFromHeaders (
655+ sessionId : string ,
656+ headers : Record < string , string >
657+ ) : void {
658+ // Header names from TaaS proxy are emitted in canonical "X-TaaS-*" form
659+ // but Node/undici lowercases incoming response headers. Read case-insensitively.
660+ const lowered : Record < string , string > = { }
661+ for ( const [ k , v ] of Object . entries ( headers ) ) {
662+ if ( typeof v === "string" ) lowered [ k . toLowerCase ( ) ] = v
663+ }
664+ const autorouted = lowered [ "x-taas-autorouted" ]
665+ if ( autorouted !== "true" ) return // ignore non-autorouted responses
666+ const capture : AutorouterCapture = {
667+ sessionId,
668+ capturedAt : Date . now ( ) ,
669+ autorouterModel : lowered [ "x-taas-autorouter-model" ] ?? null ,
670+ autorouterAlgo : lowered [ "x-taas-autorouter-mode" ] ?? null ,
671+ autorouterAlgoSource : lowered [ "x-taas-autorouter-algorithm-source" ] ?? null ,
672+ thinkingApplied : lowered [ "x-taas-thinking-applied" ] ?? null ,
673+ routedContextWindow : ( ( ) => {
674+ const raw = lowered [ "x-taas-routed-context-window" ]
675+ if ( ! raw ) return null
676+ const n = Number ( raw )
677+ return Number . isFinite ( n ) && n > 0 ? n : null
678+ } ) ( ) ,
679+ }
680+ lastRouteBySessionId . set ( sessionId , capture )
681+ pruneLastRouteMap ( )
682+ if ( isDev ) {
683+ console . debug (
684+ `[taas-affinity] captured autorouter sessionId=${ sessionId } ` +
685+ `model=${ capture . autorouterModel } algo=${ capture . autorouterAlgo } ` +
686+ `source=${ capture . autorouterAlgoSource } thinking=${ capture . thinkingApplied } ` +
687+ `ctxWindow=${ capture . routedContextWindow } `
688+ )
689+ }
690+ }
691+
692+ function getLastRouteForSession ( sessionId : string ) : AutorouterCapture | null {
693+ return lastRouteBySessionId . get ( sessionId ) ?? null
694+ }
695+
617696function buildWrapper ( ctx : ProviderWrapStreamFnContext ) {
618697 const { streamFn } = ctx
619698 if ( ! streamFn ) return undefined
@@ -642,7 +721,23 @@ function buildWrapper(ctx: ProviderWrapStreamFnContext) {
642721 if ( prevOnPayload ) return prevOnPayload ( patched , payloadModel )
643722 return patched
644723 }
645- return inner ( model , context , { ...options , onPayload } )
724+ const prevOnResponse = options ?. onResponse
725+ const onResponse : NonNullable < typeof options > [ "onResponse" ] = async (
726+ response ,
727+ responseModel
728+ ) => {
729+ try {
730+ captureAutorouterFromHeaders ( sessionId , response ?. headers ?? { } )
731+ } catch ( err ) {
732+ if ( isDev ) {
733+ console . debug (
734+ `[taas-affinity] onResponse capture failed: ${ ( err as Error ) ?. message ?? err } `
735+ )
736+ }
737+ }
738+ if ( prevOnResponse ) await prevOnResponse ( response , responseModel )
739+ }
740+ return inner ( model , context , { ...options , onPayload, onResponse } )
646741 } as typeof inner
647742}
648743
@@ -700,5 +795,27 @@ export default {
700795 wrapStreamFn : buildWrapper ,
701796 resolveTransportTurnState : buildTransportTurnState ,
702797 } )
798+
799+ // Expose captured TaaS autorouter metadata to gateway clients (Studio, etc.).
800+ // The Alien AI Studio polls this after each turn to populate the model/algo/
801+ // thinking/context-window fields in the AgentChatPanel for cloudsigma/auto and
802+ // other autorouted requests. See PRD "Alien AI Studio - Auto-Routing Model UX"
803+ // (Confluence 1901363271).
804+ if ( typeof api . registerGatewayMethod === "function" ) api . registerGatewayMethod (
805+ "taas.autorouter.lastRoute" ,
806+ async ( { params, respond } ) => {
807+ // Accept either { workspaceDir } (preferred — derives sessionId the
808+ // same way the wrapper does) or { sessionId } (direct lookup).
809+ const p = ( params ?? { } ) as Record < string , unknown >
810+ const directSessionId =
811+ typeof p . sessionId === "string" ? p . sessionId : null
812+ const workspaceDir =
813+ typeof p . workspaceDir === "string" ? p . workspaceDir : undefined
814+ const resolvedSessionId =
815+ directSessionId ?? resolveSessionId ( workspaceDir ) . sessionId
816+ const captured = getLastRouteForSession ( resolvedSessionId )
817+ respond ( true , { sessionId : resolvedSessionId , capture : captured } )
818+ }
819+ )
703820 } ,
704821}
0 commit comments