Skip to content

Commit 520efe7

Browse files
rjcloudsigmacloudsigma
andauthored
feat: capture TaaS autorouter response headers + expose via gateway RPC (#7)
Adds an onResponse callback to the wrapStreamFn options that reads X-TaaS-Autorouter-* and X-TaaS-Thinking-Applied and X-TaaS-Routed-Context-Window response headers from autorouted TaaS calls, stores them in an in-memory per-session map (bounded to 256 entries, oldest-evicted), and exposes them via a new gateway RPC: taas.autorouter.lastRoute. Alien AI Studio (PRD Confluence 1901363271) polls this method after each turn to populate the routed model, algo, source, thinking-applied, and context-window fields in the AgentChatPanel for cloudsigma/auto and other autorouted requests. Smoke test extended to cover both the header capture and the gateway method response shape. Zero OpenClaw core changes — uses existing wrapStreamFn + registerGatewayMethod hooks. Registration is guarded for older API surfaces that lack registerGatewayMethod. Co-authored-by: cloudsigma <cloudsigma@snowcrash.tail77dc93.ts.net>
1 parent 284b854 commit 520efe7

2 files changed

Lines changed: 206 additions & 1 deletion

File tree

index.ts

Lines changed: 118 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,85 @@ async function patchPayloadMetadata(
614614
}
615615
}
616616

617+
/**
618+
* Captured autorouter metadata per-session, populated by the onResponse callback
619+
* installed in `buildWrapper`. Exposed via the `taas.autorouter.lastRoute` gateway
620+
* RPC so Alien AI Studio (or any other client) can pull the latest TaaS routing
621+
* decision for a session — including the actual model chosen by the autorouter,
622+
* the algorithm used, the source of that algorithm (org/dept/key/user/system),
623+
* the thinking level applied, and the chosen model's context window.
624+
*
625+
* The map is keyed by the affinity session ID we already derive in
626+
* `resolveSessionId(ctx.workspaceDir)`. Stored values are bounded — see
627+
* `LAST_ROUTE_LIMIT` — to avoid unbounded growth in long-lived gateways.
628+
*/
629+
type AutorouterCapture = {
630+
sessionId: string
631+
capturedAt: number
632+
autorouterModel: string | null
633+
autorouterAlgo: string | null
634+
autorouterAlgoSource: string | null
635+
thinkingApplied: string | null
636+
routedContextWindow: number | null
637+
}
638+
639+
const LAST_ROUTE_LIMIT = 256
640+
const lastRouteBySessionId = new Map<string, AutorouterCapture>()
641+
642+
function pruneLastRouteMap(): void {
643+
if (lastRouteBySessionId.size <= LAST_ROUTE_LIMIT) return
644+
// Drop oldest entries by capturedAt ascending until we're back under the cap.
645+
const entries = [...lastRouteBySessionId.entries()].sort(
646+
(a, b) => a[1].capturedAt - b[1].capturedAt
647+
)
648+
const toDrop = entries.length - LAST_ROUTE_LIMIT
649+
for (let i = 0; i < toDrop; i++) {
650+
lastRouteBySessionId.delete(entries[i][0])
651+
}
652+
}
653+
654+
function captureAutorouterFromHeaders(
655+
sessionId: string,
656+
headers: Record<string, string>
657+
): void {
658+
// Header names from TaaS proxy are emitted in canonical "X-TaaS-*" form
659+
// but Node/undici lowercases incoming response headers. Read case-insensitively.
660+
const lowered: Record<string, string> = {}
661+
for (const [k, v] of Object.entries(headers)) {
662+
if (typeof v === "string") lowered[k.toLowerCase()] = v
663+
}
664+
const autorouted = lowered["x-taas-autorouted"]
665+
if (autorouted !== "true") return // ignore non-autorouted responses
666+
const capture: AutorouterCapture = {
667+
sessionId,
668+
capturedAt: Date.now(),
669+
autorouterModel: lowered["x-taas-autorouter-model"] ?? null,
670+
autorouterAlgo: lowered["x-taas-autorouter-mode"] ?? null,
671+
autorouterAlgoSource: lowered["x-taas-autorouter-algorithm-source"] ?? null,
672+
thinkingApplied: lowered["x-taas-thinking-applied"] ?? null,
673+
routedContextWindow: (() => {
674+
const raw = lowered["x-taas-routed-context-window"]
675+
if (!raw) return null
676+
const n = Number(raw)
677+
return Number.isFinite(n) && n > 0 ? n : null
678+
})(),
679+
}
680+
lastRouteBySessionId.set(sessionId, capture)
681+
pruneLastRouteMap()
682+
if (isDev) {
683+
console.debug(
684+
`[taas-affinity] captured autorouter sessionId=${sessionId} ` +
685+
`model=${capture.autorouterModel} algo=${capture.autorouterAlgo} ` +
686+
`source=${capture.autorouterAlgoSource} thinking=${capture.thinkingApplied} ` +
687+
`ctxWindow=${capture.routedContextWindow}`
688+
)
689+
}
690+
}
691+
692+
function getLastRouteForSession(sessionId: string): AutorouterCapture | null {
693+
return lastRouteBySessionId.get(sessionId) ?? null
694+
}
695+
617696
function buildWrapper(ctx: ProviderWrapStreamFnContext) {
618697
const { streamFn } = ctx
619698
if (!streamFn) return undefined
@@ -642,7 +721,23 @@ function buildWrapper(ctx: ProviderWrapStreamFnContext) {
642721
if (prevOnPayload) return prevOnPayload(patched, payloadModel)
643722
return patched
644723
}
645-
return inner(model, context, { ...options, onPayload })
724+
const prevOnResponse = options?.onResponse
725+
const onResponse: NonNullable<typeof options>["onResponse"] = async (
726+
response,
727+
responseModel
728+
) => {
729+
try {
730+
captureAutorouterFromHeaders(sessionId, response?.headers ?? {})
731+
} catch (err) {
732+
if (isDev) {
733+
console.debug(
734+
`[taas-affinity] onResponse capture failed: ${(err as Error)?.message ?? err}`
735+
)
736+
}
737+
}
738+
if (prevOnResponse) await prevOnResponse(response, responseModel)
739+
}
740+
return inner(model, context, { ...options, onPayload, onResponse })
646741
} as typeof inner
647742
}
648743

@@ -700,5 +795,27 @@ export default {
700795
wrapStreamFn: buildWrapper,
701796
resolveTransportTurnState: buildTransportTurnState,
702797
})
798+
799+
// Expose captured TaaS autorouter metadata to gateway clients (Studio, etc.).
800+
// The Alien AI Studio polls this after each turn to populate the model/algo/
801+
// thinking/context-window fields in the AgentChatPanel for cloudsigma/auto and
802+
// other autorouted requests. See PRD "Alien AI Studio - Auto-Routing Model UX"
803+
// (Confluence 1901363271).
804+
if (typeof api.registerGatewayMethod === "function") api.registerGatewayMethod(
805+
"taas.autorouter.lastRoute",
806+
async ({ params, respond }) => {
807+
// Accept either { workspaceDir } (preferred — derives sessionId the
808+
// same way the wrapper does) or { sessionId } (direct lookup).
809+
const p = (params ?? {}) as Record<string, unknown>
810+
const directSessionId =
811+
typeof p.sessionId === "string" ? p.sessionId : null
812+
const workspaceDir =
813+
typeof p.workspaceDir === "string" ? p.workspaceDir : undefined
814+
const resolvedSessionId =
815+
directSessionId ?? resolveSessionId(workspaceDir).sessionId
816+
const captured = getLastRouteForSession(resolvedSessionId)
817+
respond(true, { sessionId: resolvedSessionId, capture: captured })
818+
}
819+
)
703820
},
704821
}

test/smoke.mjs

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,91 @@ const transportState = provider.resolveTransportTurnState({
7272
assert.match(transportState.headers["X-Session-Id"], /^oc:[a-f0-9]{16}$/)
7373

7474
console.log("smoke ok")
75+
76+
// === autorouter capture (R7.1/R7.2 from Studio PRD) ===
77+
// Verify that the wrapper threads an onResponse callback that captures
78+
// X-TaaS-* headers and that taas.autorouter.lastRoute returns them.
79+
let registeredMethod
80+
let registeredHandler
81+
const apiWithGateway = {
82+
registerProvider(candidate) {
83+
// keep previous provider too — second registration
84+
},
85+
registerGatewayMethod(name, handler) {
86+
registeredMethod = name
87+
registeredHandler = handler
88+
},
89+
}
90+
plugin.register(apiWithGateway)
91+
assert.equal(registeredMethod, "taas.autorouter.lastRoute", "gateway method registered")
92+
assert.equal(typeof registeredHandler, "function", "handler is a function")
93+
94+
// Drive the wrapper through onResponse with synthetic autorouter headers.
95+
const captureStreamFn = async (_model, _context, options = {}) => {
96+
// pi-ai protocol: call onPayload first (existing behaviour), then onResponse
97+
// with the simulated HTTP response object, then stream.
98+
if (options.onPayload) await options.onPayload({ messages: [], metadata: {} }, _model)
99+
if (options.onResponse) {
100+
await options.onResponse(
101+
{
102+
status: 200,
103+
headers: {
104+
"x-taas-autorouted": "true",
105+
"x-taas-autorouter-model": "cloudsigma/gpt-5",
106+
"x-taas-autorouter-mode": "best_fit",
107+
"x-taas-autorouter-algorithm-source": "api_key_default",
108+
"x-taas-thinking-applied": "medium",
109+
"x-taas-routed-context-window": "128000",
110+
},
111+
},
112+
_model
113+
)
114+
}
115+
}
116+
const captureWrapped = provider.wrapStreamFn({
117+
streamFn: captureStreamFn,
118+
workspaceDir: "/tmp/openclaw-token-cache-optimizer-smoke",
119+
provider: "cloudsigma",
120+
modelId: "cloudsigma/auto",
121+
model: { id: "cloudsigma/auto" },
122+
})
123+
await captureWrapped("model", { messages: [] }, {})
124+
125+
// Now invoke the registered gateway handler and assert it returns the capture.
126+
let respondedOk
127+
let respondedPayload
128+
await registeredHandler({
129+
req: { id: "test" },
130+
params: { workspaceDir: "/tmp/openclaw-token-cache-optimizer-smoke" },
131+
client: null,
132+
isWebchatConnect: () => false,
133+
respond: (ok, payload) => {
134+
respondedOk = ok
135+
respondedPayload = payload
136+
},
137+
context: {},
138+
})
139+
assert.equal(respondedOk, true, "handler responded ok")
140+
assert.ok(respondedPayload, "payload present")
141+
assert.match(respondedPayload.sessionId, /^oc:[a-f0-9]{16}$/, "sessionId looks valid")
142+
assert.ok(respondedPayload.capture, "capture present")
143+
assert.equal(respondedPayload.capture.autorouterModel, "cloudsigma/gpt-5")
144+
assert.equal(respondedPayload.capture.autorouterAlgo, "best_fit")
145+
assert.equal(respondedPayload.capture.autorouterAlgoSource, "api_key_default")
146+
assert.equal(respondedPayload.capture.thinkingApplied, "medium")
147+
assert.equal(respondedPayload.capture.routedContextWindow, 128000)
148+
149+
// Non-autorouted response should NOT overwrite (we explicitly drop it)
150+
await captureWrapped("model", { messages: [] }, {})
151+
await registeredHandler({
152+
req: { id: "t2" },
153+
params: { workspaceDir: "/tmp/openclaw-token-cache-optimizer-smoke" },
154+
client: null,
155+
isWebchatConnect: () => false,
156+
respond: (_ok, payload) => {
157+
assert.equal(payload.capture.autorouterModel, "cloudsigma/gpt-5", "still holds last good")
158+
},
159+
context: {},
160+
})
161+
162+
console.log("autorouter capture smoke ok")

0 commit comments

Comments
 (0)