From a57c23842eed738c10382194f88f7384ba8f7c6a Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Fri, 26 Sep 2025 13:08:57 +0800
Subject: [PATCH 01/62] feature gpt-5-codex responses api

---
 src/routes/messages/handler.ts                | 138 +++-
 .../messages/responses-stream-translation.ts  | 664 ++++++++++++++++++
 src/routes/messages/responses-translation.ts  | 638 +++++++++++++++++
 src/routes/responses/handler.ts               |  78 ++
 src/routes/responses/route.ts                 |  15 +
 src/routes/responses/utils.ts                 |  71 ++
 src/server.ts                                 |   3 +
 src/services/copilot/create-responses.ts      | 212 ++++++
 src/services/copilot/get-models.ts            |   4 +
 tests/responses-stream-translation.test.ts    | 137 ++++
 tests/translation.test.ts                     | 159 +++++
 11 files changed, 2115 insertions(+), 4 deletions(-)
 create mode 100644 src/routes/messages/responses-stream-translation.ts
 create mode 100644 src/routes/messages/responses-translation.ts
 create mode 100644 src/routes/responses/handler.ts
 create mode 100644 src/routes/responses/route.ts
 create mode 100644 src/routes/responses/utils.ts
 create mode 100644 src/services/copilot/create-responses.ts
 create mode 100644 tests/responses-stream-translation.test.ts
 create mode 100644 tests/translation.test.ts

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 85dbf6243..10b97c53c 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -6,11 +6,24 @@ import { streamSSE } from "hono/streaming"
 import { awaitApproval } from "~/lib/approval"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
+import {
+  createResponsesStreamState,
+  translateResponsesStreamEvent,
+} from "~/routes/messages/responses-stream-translation"
+import {
+  translateAnthropicMessagesToResponsesPayload,
+  translateResponsesResultToAnthropic,
+} from "~/routes/messages/responses-translation"
+import { getResponsesRequestOptions } from "~/routes/responses/utils"
 import {
   createChatCompletions,
   type ChatCompletionChunk,
   type ChatCompletionResponse,
 } from "~/services/copilot/create-chat-completions"
+import {
+  createResponses,
+  type ResponsesResult,
+} from "~/services/copilot/create-responses"
 
 import {
   type AnthropicMessagesPayload,
@@ -28,16 +41,31 @@ export async function handleCompletion(c: Context) {
   const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
   consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
 
+  const useResponsesApi = shouldUseResponsesApi(anthropicPayload.model)
+
+  if (state.manualApprove) {
+    await awaitApproval()
+  }
+
+  if (useResponsesApi) {
+    return await handleWithResponsesApi(c, anthropicPayload)
+  }
+
+  return await handleWithChatCompletions(c, anthropicPayload)
+}
+
+const RESPONSES_ENDPOINT = "/responses"
+
+const handleWithChatCompletions = async (
+  c: Context,
+  anthropicPayload: AnthropicMessagesPayload,
+) => {
   const openAIPayload = translateToOpenAI(anthropicPayload)
   consola.debug(
     "Translated OpenAI request payload:",
     JSON.stringify(openAIPayload),
   )
 
-  if (state.manualApprove) {
-    await awaitApproval()
-  }
-
   const response = await createChatCompletions(openAIPayload)
 
   if (isNonStreaming(response)) {
@@ -86,6 +114,108 @@ export async function handleCompletion(c: Context) {
   })
 }
 
+const handleWithResponsesApi = async (
+  c: Context,
+  anthropicPayload: AnthropicMessagesPayload,
+) => {
+  const responsesPayload =
+    translateAnthropicMessagesToResponsesPayload(anthropicPayload)
+  consola.debug(
+    "Translated Responses payload:",
+    JSON.stringify(responsesPayload),
+  )
+
+  const { vision, initiator } = getResponsesRequestOptions(responsesPayload)
+  const response = await createResponses(responsesPayload, {
+    vision,
+    initiator,
+  })
+
+  if (responsesPayload.stream && isAsyncIterable(response)) {
+    consola.debug("Streaming response from Copilot (Responses API)")
+    return streamSSE(c, async (stream) => {
+      const streamState = createResponsesStreamState()
+
+      for await (const chunk of response) {
+        consola.debug("Responses raw stream event:", JSON.stringify(chunk))
+
+        const eventName = (chunk as { event?: string }).event
+        if (eventName === "ping") {
+          await stream.writeSSE({ event: "ping", data: "" })
+          continue
+        }
+
+        const data = (chunk as { data?: string }).data
+        if (!data) {
+          continue
+        }
+
+        if (data === "[DONE]") {
+          break
+        }
+
+        const parsed = safeJsonParse(data)
+        if (!parsed) {
+          continue
+        }
+
+        const events = translateResponsesStreamEvent(parsed, streamState)
+        for (const event of events) {
+          consola.debug("Translated Anthropic event:", JSON.stringify(event))
+          await stream.writeSSE({
+            event: event.type,
+            data: JSON.stringify(event),
+          })
+        }
+      }
+
+      if (!streamState.messageCompleted) {
+        consola.warn(
+          "Responses stream ended without completion; sending fallback message_stop",
+        )
+        const fallback = { type: "message_stop" as const }
+        await stream.writeSSE({
+          event: fallback.type,
+          data: JSON.stringify(fallback),
+        })
+      }
+    })
+  }
+
+  consola.debug(
+    "Non-streaming Responses result:",
+    JSON.stringify(response).slice(-400),
+  )
+  const anthropicResponse = translateResponsesResultToAnthropic(
+    response as ResponsesResult,
+  )
+  consola.debug(
+    "Translated Anthropic response:",
+    JSON.stringify(anthropicResponse),
+  )
+  return c.json(anthropicResponse)
+}
+
+const shouldUseResponsesApi = (modelId: string): boolean => {
+  const selectedModel = state.models?.data.find((model) => model.id === modelId)
+  return (
+    selectedModel?.supported_endpoints?.includes(RESPONSES_ENDPOINT) ?? false
+  )
+}
+
 const isNonStreaming = (
   response: Awaited<ReturnType<typeof createChatCompletions>>,
 ): response is ChatCompletionResponse => Object.hasOwn(response, "choices")
+
+const isAsyncIterable = <T>(value: unknown): value is AsyncIterable<T> =>
+  Boolean(value)
+  && typeof (value as AsyncIterable<T>)[Symbol.asyncIterator] === "function"
+
+const safeJsonParse = (value: string): Record<string, unknown> | undefined => {
+  try {
+    return JSON.parse(value) as Record<string, unknown>
+  } catch (error) {
+    consola.warn("Failed to parse Responses stream chunk:", value, error)
+    return undefined
+  }
+}
diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
new file mode 100644
index 000000000..06feab1a4
--- /dev/null
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -0,0 +1,664 @@
+import { type ResponsesResult } from "~/services/copilot/create-responses"
+
+import { type AnthropicStreamEventData } from "./anthropic-types"
+import { translateResponsesResultToAnthropic } from "./responses-translation"
+
+export interface ResponsesStreamState {
+  messageStartSent: boolean
+  messageCompleted: boolean
+  nextContentBlockIndex: number
+  blockIndexByKey: Map<string, number>
+  openBlocks: Set<number>
+  blockHasDelta: Set<number>
+  currentResponseId?: string
+  currentModel?: string
+  initialInputTokens?: number
+  functionCallStateByOutputIndex: Map<number, FunctionCallStreamState>
+  functionCallOutputIndexByItemId: Map<string, number>
+}
+
+type FunctionCallStreamState = {
+  blockIndex: number
+  toolCallId: string
+  name: string
+}
+
+export const createResponsesStreamState = (): ResponsesStreamState => ({
+  messageStartSent: false,
+  messageCompleted: false,
+  nextContentBlockIndex: 0,
+  blockIndexByKey: new Map(),
+  openBlocks: new Set(),
+  blockHasDelta: new Set(),
+  functionCallStateByOutputIndex: new Map(),
+  functionCallOutputIndexByItemId: new Map(),
+})
+
+export const translateResponsesStreamEvent = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const eventType =
+    typeof rawEvent.type === "string" ? rawEvent.type : undefined
+  if (!eventType) {
+    return []
+  }
+
+  switch (eventType) {
+    case "response.created": {
+      return handleResponseCreated(rawEvent, state)
+    }
+
+    case "response.reasoning_summary_text.delta":
+    case "response.output_text.delta": {
+      return handleOutputTextDelta(rawEvent, state)
+    }
+
+    case "response.reasoning_summary_part.done":
+    case "response.output_text.done": {
+      return handleOutputTextDone(rawEvent, state)
+    }
+
+    case "response.output_item.added": {
+      return handleOutputItemAdded(rawEvent, state)
+    }
+
+    case "response.function_call_arguments.delta": {
+      return handleFunctionCallArgumentsDelta(rawEvent, state)
+    }
+
+    case "response.function_call_arguments.done": {
+      return handleFunctionCallArgumentsDone(rawEvent, state)
+    }
+
+    case "response.completed":
+    case "response.incomplete": {
+      return handleResponseCompleted(rawEvent, state)
+    }
+
+    case "response.failed": {
+      return handleResponseFailed(rawEvent, state)
+    }
+
+    case "error": {
+      return handleErrorEvent(rawEvent, state)
+    }
+
+    default: {
+      return []
+    }
+  }
+}
+
+// Helper handlers to keep translateResponsesStreamEvent concise
+const handleResponseCreated = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const response = toResponsesResult(rawEvent.response)
+  if (response) {
+    cacheResponseMetadata(state, response)
+  }
+  return ensureMessageStart(state, response)
+}
+
+const handleOutputItemAdded = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const response = toResponsesResult(rawEvent.response)
+  const events = ensureMessageStart(state, response)
+
+  const functionCallDetails = extractFunctionCallDetails(rawEvent, state)
+  if (!functionCallDetails) {
+    return events
+  }
+
+  const { outputIndex, toolCallId, name, initialArguments, itemId } =
+    functionCallDetails
+
+  if (itemId) {
+    state.functionCallOutputIndexByItemId.set(itemId, outputIndex)
+  }
+
+  const blockIndex = openFunctionCallBlock(state, {
+    outputIndex,
+    toolCallId,
+    name,
+    events,
+  })
+
+  if (initialArguments !== undefined && initialArguments.length > 0) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "input_json_delta",
+        partial_json: initialArguments,
+      },
+    })
+    state.blockHasDelta.add(blockIndex)
+  }
+
+  return events
+}
+
+const handleFunctionCallArgumentsDelta = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = ensureMessageStart(state)
+
+  const outputIndex = resolveFunctionCallOutputIndex(state, rawEvent)
+  if (outputIndex === undefined) {
+    return events
+  }
+
+  const deltaText = typeof rawEvent.delta === "string" ? rawEvent.delta : ""
+  if (!deltaText) {
+    return events
+  }
+
+  const blockIndex = openFunctionCallBlock(state, {
+    outputIndex,
+    events,
+  })
+
+  events.push({
+    type: "content_block_delta",
+    index: blockIndex,
+    delta: {
+      type: "input_json_delta",
+      partial_json: deltaText,
+    },
+  })
+  state.blockHasDelta.add(blockIndex)
+
+  return events
+}
+
+const handleFunctionCallArgumentsDone = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = ensureMessageStart(state)
+
+  const outputIndex = resolveFunctionCallOutputIndex(state, rawEvent)
+  if (outputIndex === undefined) {
+    return events
+  }
+
+  const blockIndex = openFunctionCallBlock(state, {
+    outputIndex,
+    events,
+  })
+
+  const finalArguments =
+    typeof rawEvent.arguments === "string" ? rawEvent.arguments : undefined
+
+  if (!state.blockHasDelta.has(blockIndex) && finalArguments) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "input_json_delta",
+        partial_json: finalArguments,
+      },
+    })
+    state.blockHasDelta.add(blockIndex)
+  }
+
+  closeBlockIfOpen(state, blockIndex, events)
+
+  const existingState = state.functionCallStateByOutputIndex.get(outputIndex)
+  if (existingState) {
+    state.functionCallOutputIndexByItemId.delete(existingState.toolCallId)
+  }
+  state.functionCallStateByOutputIndex.delete(outputIndex)
+
+  const itemId = toNonEmptyString(rawEvent.item_id)
+  if (itemId) {
+    state.functionCallOutputIndexByItemId.delete(itemId)
+  }
+
+  return events
+}
+
+const handleOutputTextDelta = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = ensureMessageStart(state)
+
+  const outputIndex = toNumber(rawEvent.output_index)
+  const contentIndex = toNumber(rawEvent.content_index)
+  const deltaText = typeof rawEvent.delta === "string" ? rawEvent.delta : ""
+
+  if (!deltaText) {
+    return events
+  }
+
+  const blockIndex = openTextBlockIfNeeded(state, {
+    outputIndex,
+    contentIndex,
+    events,
+  })
+
+  events.push({
+    type: "content_block_delta",
+    index: blockIndex,
+    delta: {
+      type: "text_delta",
+      text: deltaText,
+    },
+  })
+  state.blockHasDelta.add(blockIndex)
+
+  return events
+}
+
+const handleOutputTextDone = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = ensureMessageStart(state)
+
+  const outputIndex = toNumber(rawEvent.output_index)
+  const contentIndex = toNumber(rawEvent.content_index)
+  const text = typeof rawEvent.text === "string" ? rawEvent.text : ""
+
+  const blockIndex = openTextBlockIfNeeded(state, {
+    outputIndex,
+    contentIndex,
+    events,
+  })
+
+  if (text && !state.blockHasDelta.has(blockIndex)) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "text_delta",
+        text,
+      },
+    })
+  }
+
+  closeBlockIfOpen(state, blockIndex, events)
+
+  return events
+}
+
+const handleResponseCompleted = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const response = toResponsesResult(rawEvent.response)
+  const events = ensureMessageStart(state, response)
+
+  closeAllOpenBlocks(state, events)
+
+  if (response) {
+    const anthropic = translateResponsesResultToAnthropic(response)
+    events.push({
+      type: "message_delta",
+      delta: {
+        stop_reason: anthropic.stop_reason,
+        stop_sequence: anthropic.stop_sequence,
+      },
+      usage: anthropic.usage,
+    })
+  } else {
+    events.push({
+      type: "message_delta",
+      delta: {
+        stop_reason: null,
+        stop_sequence: null,
+      },
+    })
+  }
+
+  events.push({ type: "message_stop" })
+  state.messageCompleted = true
+
+  return events
+}
+
+const handleResponseFailed = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const response = toResponsesResult(rawEvent.response)
+  const events = ensureMessageStart(state, response)
+
+  closeAllOpenBlocks(state, events)
+
+  const message =
+    typeof rawEvent.error === "string" ?
+      rawEvent.error
+    : "Response generation failed."
+
+  events.push(buildErrorEvent(message))
+  state.messageCompleted = true
+
+  return events
+}
+
+const handleErrorEvent = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const message =
+    typeof rawEvent.message === "string" ?
+      rawEvent.message
+    : "An unexpected error occurred during streaming."
+
+  state.messageCompleted = true
+  return [buildErrorEvent(message)]
+}
+
+const ensureMessageStart = (
+  state: ResponsesStreamState,
+  response?: ResponsesResult,
+): Array<AnthropicStreamEventData> => {
+  if (state.messageStartSent) {
+    return []
+  }
+
+  if (response) {
+    cacheResponseMetadata(state, response)
+  }
+
+  const id = response?.id ?? state.currentResponseId ?? "response"
+  const model = response?.model ?? state.currentModel ?? ""
+
+  const inputTokens =
+    response?.usage?.input_tokens ?? state.initialInputTokens ?? 0
+
+  state.messageStartSent = true
+
+  return [
+    {
+      type: "message_start",
+      message: {
+        id,
+        type: "message",
+        role: "assistant",
+        content: [],
+        model,
+        stop_reason: null,
+        stop_sequence: null,
+        usage: {
+          input_tokens: inputTokens,
+          output_tokens: 0,
+        },
+      },
+    },
+  ]
+}
+
+const openTextBlockIfNeeded = (
+  state: ResponsesStreamState,
+  params: {
+    outputIndex: number
+    contentIndex: number
+    events: Array<AnthropicStreamEventData>
+  },
+): number => {
+  const { outputIndex, contentIndex, events } = params
+  const key = getBlockKey(outputIndex, contentIndex)
+  let blockIndex = state.blockIndexByKey.get(key)
+
+  if (blockIndex === undefined) {
+    blockIndex = state.nextContentBlockIndex
+    state.nextContentBlockIndex += 1
+    state.blockIndexByKey.set(key, blockIndex)
+  }
+
+  if (!state.openBlocks.has(blockIndex)) {
+    events.push({
+      type: "content_block_start",
+      index: blockIndex,
+      content_block: {
+        type: "text",
+        text: "",
+      },
+    })
+    state.openBlocks.add(blockIndex)
+  }
+
+  return blockIndex
+}
+
+const closeBlockIfOpen = (
+  state: ResponsesStreamState,
+  blockIndex: number,
+  events: Array<AnthropicStreamEventData>,
+) => {
+  if (!state.openBlocks.has(blockIndex)) {
+    return
+  }
+
+  events.push({ type: "content_block_stop", index: blockIndex })
+  state.openBlocks.delete(blockIndex)
+  state.blockHasDelta.delete(blockIndex)
+}
+
+const closeAllOpenBlocks = (
+  state: ResponsesStreamState,
+  events: Array<AnthropicStreamEventData>,
+) => {
+  for (const blockIndex of state.openBlocks) {
+    closeBlockIfOpen(state, blockIndex, events)
+  }
+
+  state.functionCallStateByOutputIndex.clear()
+  state.functionCallOutputIndexByItemId.clear()
+}
+
+const cacheResponseMetadata = (
+  state: ResponsesStreamState,
+  response: ResponsesResult,
+) => {
+  state.currentResponseId = response.id
+  state.currentModel = response.model
+  state.initialInputTokens = response.usage?.input_tokens ?? 0
+}
+
+const buildErrorEvent = (message: string): AnthropicStreamEventData => ({
+  type: "error",
+  error: {
+    type: "api_error",
+    message,
+  },
+})
+
+const getBlockKey = (outputIndex: number, contentIndex: number): string =>
+  `${outputIndex}:${contentIndex}`
+
+const resolveFunctionCallOutputIndex = (
+  state: ResponsesStreamState,
+  rawEvent: Record<string, unknown>,
+): number | undefined => {
+  if (
+    typeof rawEvent.output_index === "number"
+    || (typeof rawEvent.output_index === "string"
+      && rawEvent.output_index.length > 0)
+  ) {
+    const parsed = toOptionalNumber(rawEvent.output_index)
+    if (parsed !== undefined) {
+      return parsed
+    }
+  }
+
+  const itemId = toNonEmptyString(rawEvent.item_id)
+  if (itemId) {
+    const mapped = state.functionCallOutputIndexByItemId.get(itemId)
+    if (mapped !== undefined) {
+      return mapped
+    }
+  }
+
+  return undefined
+}
+
+const openFunctionCallBlock = (
+  state: ResponsesStreamState,
+  params: {
+    outputIndex: number
+    toolCallId?: string
+    name?: string
+    events: Array<AnthropicStreamEventData>
+  },
+): number => {
+  const { outputIndex, toolCallId, name, events } = params
+
+  let functionCallState = state.functionCallStateByOutputIndex.get(outputIndex)
+
+  if (!functionCallState) {
+    const blockIndex = state.nextContentBlockIndex
+    state.nextContentBlockIndex += 1
+
+    const resolvedToolCallId = toolCallId ?? `tool_call_${blockIndex}`
+    const resolvedName = name ?? "function"
+
+    functionCallState = {
+      blockIndex,
+      toolCallId: resolvedToolCallId,
+      name: resolvedName,
+    }
+
+    state.functionCallStateByOutputIndex.set(outputIndex, functionCallState)
+    state.functionCallOutputIndexByItemId.set(resolvedToolCallId, outputIndex)
+  }
+
+  const { blockIndex } = functionCallState
+
+  if (!state.openBlocks.has(blockIndex)) {
+    events.push({
+      type: "content_block_start",
+      index: blockIndex,
+      content_block: {
+        type: "tool_use",
+        id: functionCallState.toolCallId,
+        name: functionCallState.name,
+        input: {},
+      },
+    })
+    state.openBlocks.add(blockIndex)
+  }
+
+  return blockIndex
+}
+
+type FunctionCallDetails = {
+  outputIndex: number
+  toolCallId: string
+  name: string
+  initialArguments?: string
+  itemId?: string
+}
+
+const extractFunctionCallDetails = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): FunctionCallDetails | undefined => {
+  const item = isRecord(rawEvent.item) ? rawEvent.item : undefined
+  if (!item) {
+    return undefined
+  }
+
+  const itemType = typeof item.type === "string" ? item.type : undefined
+  if (itemType !== "function_call") {
+    return undefined
+  }
+
+  const outputIndex = resolveFunctionCallOutputIndex(state, rawEvent)
+  if (outputIndex === undefined) {
+    return undefined
+  }
+
+  const callId = toNonEmptyString(item.call_id)
+  const itemId = toNonEmptyString(item.id)
+  const name = toNonEmptyString(item.name) ?? "function"
+
+  const toolCallId = callId ?? itemId ?? `tool_call_${outputIndex}`
+  const initialArguments =
+    typeof item.arguments === "string" ? item.arguments : undefined
+
+  return {
+    outputIndex,
+    toolCallId,
+    name,
+    initialArguments,
+    itemId,
+  }
+}
+
+const toResponsesResult = (value: unknown): ResponsesResult | undefined =>
+  isResponsesResult(value) ? value : undefined
+
+const toOptionalNumber = (value: unknown): number | undefined => {
+  if (typeof value === "number" && Number.isFinite(value)) {
+    return value
+  }
+
+  if (typeof value === "string" && value.length > 0) {
+    const parsed = Number(value)
+    if (Number.isFinite(parsed)) {
+      return parsed
+    }
+  }
+
+  return undefined
+}
+
+const toNonEmptyString = (value: unknown): string | undefined => {
+  if (typeof value === "string" && value.length > 0) {
+    return value
+  }
+
+  return undefined
+}
+
+const toNumber = (value: unknown): number => {
+  if (typeof value === "number" && Number.isFinite(value)) {
+    return value
+  }
+
+  if (typeof value === "string") {
+    const parsed = Number(value)
+    if (Number.isFinite(parsed)) {
+      return parsed
+    }
+  }
+
+  return 0
+}
+
+const isResponsesResult = (value: unknown): value is ResponsesResult => {
+  if (!isRecord(value)) {
+    return false
+  }
+
+  if (typeof value.id !== "string") {
+    return false
+  }
+
+  if (typeof value.model !== "string") {
+    return false
+  }
+
+  if (!Array.isArray(value.output)) {
+    return false
+  }
+
+  if (typeof value.object !== "string") {
+    return false
+  }
+
+  return true
+}
+
+const isRecord = (value: unknown): value is Record<string, unknown> =>
+  typeof value === "object" && value !== null
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
new file mode 100644
index 000000000..057a7b962
--- /dev/null
+++ b/src/routes/messages/responses-translation.ts
@@ -0,0 +1,638 @@
+import consola from "consola"
+
+import {
+  type ResponsesPayload,
+  type ResponseInputContent,
+  type ResponseInputImage,
+  type ResponseInputItem,
+  type ResponseInputMessage,
+  type ResponseInputText,
+  type ResponsesResult,
+  type ResponseOutputContentBlock,
+  type ResponseOutputFunctionCall,
+  type ResponseOutputFunctionCallOutput,
+  type ResponseOutputItem,
+  type ResponseOutputReasoning,
+  type ResponseReasoningBlock,
+  type ResponseOutputRefusal,
+  type ResponseOutputText,
+  type ResponseFunctionToolCallItem,
+  type ResponseFunctionCallOutputItem,
+} from "~/services/copilot/create-responses"
+
+import {
+  type AnthropicAssistantContentBlock,
+  type AnthropicAssistantMessage,
+  type AnthropicResponse,
+  type AnthropicImageBlock,
+  type AnthropicMessage,
+  type AnthropicMessagesPayload,
+  type AnthropicTextBlock,
+  type AnthropicTool,
+  type AnthropicToolResultBlock,
+  type AnthropicToolUseBlock,
+  type AnthropicUserContentBlock,
+  type AnthropicUserMessage,
+} from "./anthropic-types"
+
+const MESSAGE_TYPE = "message"
+
+export const translateAnthropicMessagesToResponsesPayload = (
+  payload: AnthropicMessagesPayload,
+): ResponsesPayload => {
+  const input: Array<ResponseInputItem> = []
+
+  for (const message of payload.messages) {
+    input.push(...translateMessage(message))
+  }
+
+  const translatedTools = convertAnthropicTools(payload.tools)
+  const toolChoice = convertAnthropicToolChoice(payload.tool_choice)
+
+  const { safetyIdentifier, promptCacheKey } = parseUserId(
+    payload.metadata?.user_id,
+  )
+
+  const responsesPayload: ResponsesPayload = {
+    model: payload.model,
+    input,
+    instructions: translateSystemPrompt(payload.system),
+    temperature: payload.temperature ?? null,
+    top_p: payload.top_p ?? null,
+    max_output_tokens: payload.max_tokens,
+    tools: translatedTools,
+    tool_choice: toolChoice,
+    metadata: payload.metadata ? { ...payload.metadata } : null,
+    safety_identifier: safetyIdentifier,
+    prompt_cache_key: promptCacheKey,
+    stream: payload.stream ?? null,
+    store: false,
+    parallel_tool_calls: true,
+    reasoning: { effort: "high", summary: "auto" },
+    include: ["reasoning.encrypted_content"],
+  }
+
+  return responsesPayload
+}
+
+const translateMessage = (
+  message: AnthropicMessage,
+): Array<ResponseInputItem> => {
+  if (message.role === "user") {
+    return translateUserMessage(message)
+  }
+
+  return translateAssistantMessage(message)
+}
+
+const translateUserMessage = (
+  message: AnthropicUserMessage,
+): Array<ResponseInputItem> => {
+  if (typeof message.content === "string") {
+    return [createMessage("user", message.content)]
+  }
+
+  if (!Array.isArray(message.content)) {
+    return []
+  }
+
+  const items: Array<ResponseInputItem> = []
+  const pendingContent: Array<ResponseInputContent> = []
+
+  for (const block of message.content) {
+    if (block.type === "tool_result") {
+      flushPendingContent("user", pendingContent, items)
+      items.push(createFunctionCallOutput(block))
+      continue
+    }
+
+    const converted = translateUserContentBlock(block)
+    if (converted) {
+      pendingContent.push(converted)
+    }
+  }
+
+  flushPendingContent("user", pendingContent, items)
+
+  return items
+}
+
+const translateAssistantMessage = (
+  message: AnthropicAssistantMessage,
+): Array<ResponseInputItem> => {
+  if (typeof message.content === "string") {
+    return [createMessage("assistant", message.content)]
+  }
+
+  if (!Array.isArray(message.content)) {
+    return []
+  }
+
+  const items: Array<ResponseInputItem> = []
+  const pendingContent: Array<ResponseInputContent> = []
+
+  for (const block of message.content) {
+    if (block.type === "tool_use") {
+      flushPendingContent("assistant", pendingContent, items)
+      items.push(createFunctionToolCall(block))
+      continue
+    }
+
+    const converted = translateAssistantContentBlock(block)
+    if (converted) {
+      pendingContent.push(converted)
+    }
+  }
+
+  flushPendingContent("assistant", pendingContent, items)
+
+  return items
+}
+
+const translateUserContentBlock = (
+  block: AnthropicUserContentBlock,
+): ResponseInputContent | undefined => {
+  switch (block.type) {
+    case "text": {
+      return createTextContent(block.text)
+    }
+    case "image": {
+      return createImageContent(block)
+    }
+    case "tool_result": {
+      return undefined
+    }
+    default: {
+      return undefined
+    }
+  }
+}
+
+const translateAssistantContentBlock = (
+  block: AnthropicAssistantContentBlock,
+): ResponseInputContent | undefined => {
+  switch (block.type) {
+    case "text": {
+      return createOutPutTextContent(block.text)
+    }
+    case "thinking": {
+      return createOutPutTextContent(block.thinking)
+    }
+    case "tool_use": {
+      return undefined
+    }
+    default: {
+      return undefined
+    }
+  }
+}
+
+const flushPendingContent = (
+  role: ResponseInputMessage["role"],
+  pendingContent: Array<ResponseInputContent>,
+  target: Array<ResponseInputItem>,
+) => {
+  if (pendingContent.length === 0) {
+    return
+  }
+
+  const messageContent =
+    pendingContent.length === 1 && isPlainText(pendingContent[0]) ?
+      pendingContent[0].text
+    : [...pendingContent]
+
+  target.push(createMessage(role, messageContent))
+  pendingContent.length = 0
+}
+
+const createMessage = (
+  role: ResponseInputMessage["role"],
+  content: string | Array<ResponseInputContent>,
+): ResponseInputMessage => ({
+  type: MESSAGE_TYPE,
+  role,
+  content,
+})
+
+const createTextContent = (text: string): ResponseInputText => ({
+  type: "input_text",
+  text,
+})
+
+const createOutPutTextContent = (text: string): ResponseInputText => ({
+  type: "output_text",
+  text,
+})
+
+const createImageContent = (
+  block: AnthropicImageBlock,
+): ResponseInputImage => ({
+  type: "input_image",
+  image_url: `data:${block.source.media_type};base64,${block.source.data}`,
+})
+
+const createFunctionToolCall = (
+  block: AnthropicToolUseBlock,
+): ResponseFunctionToolCallItem => ({
+  type: "function_call",
+  call_id: block.id,
+  name: block.name,
+  arguments: JSON.stringify(block.input),
+  status: "completed",
+})
+
+const createFunctionCallOutput = (
+  block: AnthropicToolResultBlock,
+): ResponseFunctionCallOutputItem => ({
+  type: "function_call_output",
+  call_id: block.tool_use_id,
+  output: block.content,
+  status: block.is_error ? "incomplete" : "completed",
+})
+
+const translateSystemPrompt = (
+  system: string | Array<AnthropicTextBlock> | undefined,
+): string | null => {
+  if (!system) {
+    return null
+  }
+
+  const toolUsePrompt = `
+## Tool use
+- You have access to many tools. If a tool exists to perform a specific task, you MUST use that tool instead of running a terminal command to perform that task.
+### Bash tool
+When using the Bash tool, follow these rules:
+- always run_in_background set to false, unless you are running a long-running command (e.g., a server or a watch command).
+### BashOutput tool
+When using the BashOutput tool, follow these rules:
+- Only Bash Tool run_in_background set to true, Use BashOutput to read the output later
+### TodoWrite tool
+When using the TodoWrite tool, follow these rules:
+- Skip using the TodoWrite tool for straightforward tasks (roughly the easiest 25%).
+- Do not make single-step todo lists.
+- When you made a todo, update it after having performed one of the sub-tasks that you shared on the todo list.`
+
+  if (typeof system === "string") {
+    return system + toolUsePrompt
+  }
+
+  const text = system
+    .map((block, index) => {
+      if (index === 0) {
+        return block.text + toolUsePrompt
+      }
+      return block.text
+    })
+    .join(" ")
+  return text.length > 0 ? text : null
+}
+
+const convertAnthropicTools = (
+  tools: Array<AnthropicTool> | undefined,
+): Array<Record<string, unknown>> | null => {
+  if (!tools || tools.length === 0) {
+    return null
+  }
+
+  return tools.map((tool) => ({
+    type: "function",
+    name: tool.name,
+    parameters: tool.input_schema,
+    strict: false,
+    ...(tool.description ? { description: tool.description } : {}),
+  }))
+}
+
+const convertAnthropicToolChoice = (
+  choice: AnthropicMessagesPayload["tool_choice"],
+): unknown => {
+  if (!choice) {
+    return undefined
+  }
+
+  switch (choice.type) {
+    case "auto": {
+      return "auto"
+    }
+    case "any": {
+      return "required"
+    }
+    case "tool": {
+      return choice.name ? { type: "function", name: choice.name } : undefined
+    }
+    case "none": {
+      return "none"
+    }
+    default: {
+      return undefined
+    }
+  }
+}
+
+const isPlainText = (
+  content: ResponseInputContent,
+): content is ResponseInputText | { text: string } => {
+  if (typeof content !== "object") {
+    return false
+  }
+
+  return (
+    "text" in content
+    && typeof (content as ResponseInputText).text === "string"
+    && !("image_url" in content)
+  )
+}
+
+export const translateResponsesResultToAnthropic = (
+  response: ResponsesResult,
+): AnthropicResponse => {
+  const contentBlocks = mapOutputToAnthropicContent(response.output)
+  const usage = mapResponsesUsage(response)
+  let anthropicContent = fallbackContentBlocks(response.output_text)
+  if (contentBlocks.length > 0) {
+    anthropicContent = contentBlocks
+  }
+
+  const stopReason = mapResponsesStopReason(response)
+
+  return {
+    id: response.id,
+    type: "message",
+    role: "assistant",
+    content: anthropicContent,
+    model: response.model,
+    stop_reason: stopReason,
+    stop_sequence: null,
+    usage,
+  }
+}
+
+const mapOutputToAnthropicContent = (
+  output: Array<ResponseOutputItem>,
+): Array<AnthropicAssistantContentBlock> => {
+  const contentBlocks: Array<AnthropicAssistantContentBlock> = []
+
+  for (const item of output) {
+    switch (item.type) {
+      case "reasoning": {
+        const thinkingText = extractReasoningText(item)
+        if (thinkingText.length > 0) {
+          contentBlocks.push({ type: "thinking", thinking: thinkingText })
+        }
+        break
+      }
+      case "function_call": {
+        const toolUseBlock = createToolUseContentBlock(item)
+        if (toolUseBlock) {
+          contentBlocks.push(toolUseBlock)
+        }
+        break
+      }
+      case "function_call_output": {
+        const outputBlock = createFunctionCallOutputBlock(item)
+        if (outputBlock) {
+          contentBlocks.push(outputBlock)
+        }
+        break
+      }
+      case "message":
+      case "output_text": {
+        const combinedText = combineMessageTextContent(item.content)
+        if (combinedText.length > 0) {
+          contentBlocks.push({ type: "text", text: combinedText })
+        }
+        break
+      }
+      default: {
+        // Future compatibility for unrecognized output item types.
+        const combinedText = combineMessageTextContent(
+          (item as { content?: Array<ResponseOutputContentBlock> }).content,
+        )
+        if (combinedText.length > 0) {
+          contentBlocks.push({ type: "text", text: combinedText })
+        }
+      }
+    }
+  }
+
+  return contentBlocks
+}
+
+const combineMessageTextContent = (
+  content: Array<ResponseOutputContentBlock> | undefined,
+): string => {
+  if (!Array.isArray(content)) {
+    return ""
+  }
+
+  let aggregated = ""
+
+  for (const block of content) {
+    if (isResponseOutputText(block)) {
+      aggregated += block.text
+      continue
+    }
+
+    if (isResponseOutputRefusal(block)) {
+      aggregated += block.refusal
+      continue
+    }
+
+    if (typeof (block as { text?: unknown }).text === "string") {
+      aggregated += (block as { text: string }).text
+      continue
+    }
+
+    if (typeof (block as { reasoning?: unknown }).reasoning === "string") {
+      aggregated += (block as { reasoning: string }).reasoning
+      continue
+    }
+  }
+
+  return aggregated
+}
+
+const extractReasoningText = (item: ResponseOutputReasoning): string => {
+  const segments: Array<string> = []
+
+  const collectFromBlocks = (blocks?: Array<ResponseReasoningBlock>) => {
+    if (!Array.isArray(blocks)) {
+      return
+    }
+
+    for (const block of blocks) {
+      if (typeof block.text === "string") {
+        segments.push(block.text)
+        continue
+      }
+
+      if (typeof block.thinking === "string") {
+        segments.push(block.thinking)
+        continue
+      }
+
+      const reasoningValue = (block as Record<string, unknown>).reasoning
+      if (typeof reasoningValue === "string") {
+        segments.push(reasoningValue)
+      }
+    }
+  }
+
+  collectFromBlocks(item.reasoning)
+  collectFromBlocks(item.summary)
+
+  if (typeof item.thinking === "string") {
+    segments.push(item.thinking)
+  }
+
+  const textValue = (item as Record<string, unknown>).text
+  if (typeof textValue === "string") {
+    segments.push(textValue)
+  }
+
+  return segments.join("").trim()
+}
+
+const createToolUseContentBlock = (
+  call: ResponseOutputFunctionCall,
+): AnthropicToolUseBlock | null => {
+  const toolId = call.call_id ?? call.id
+  if (!call.name || !toolId) {
+    return null
+  }
+
+  const input = parseFunctionCallArguments(call.arguments)
+
+  return {
+    type: "tool_use",
+    id: toolId,
+    name: call.name,
+    input,
+  }
+}
+
+const createFunctionCallOutputBlock = (
+  output: ResponseOutputFunctionCallOutput,
+): AnthropicAssistantContentBlock | null => {
+  if (typeof output.output !== "string" || output.output.length === 0) {
+    return null
+  }
+
+  return {
+    type: "text",
+    text: output.output,
+  }
+}
+
+const parseFunctionCallArguments = (
+  rawArguments: string,
+): Record<string, unknown> => {
+  if (typeof rawArguments !== "string" || rawArguments.trim().length === 0) {
+    return {}
+  }
+
+  try {
+    const parsed: unknown = JSON.parse(rawArguments)
+
+    if (Array.isArray(parsed)) {
+      return { arguments: parsed }
+    }
+
+    if (parsed && typeof parsed === "object") {
+      return parsed as Record<string, unknown>
+    }
+  } catch (error) {
+    consola.warn("Failed to parse function call arguments", {
+      error,
+      rawArguments,
+    })
+  }
+
+  return { raw_arguments: rawArguments }
+}
+
+const fallbackContentBlocks = (
+  outputText: string,
+): Array<AnthropicAssistantContentBlock> => {
+  if (!outputText) {
+    return []
+  }
+
+  return [
+    {
+      type: "text",
+      text: outputText,
+    },
+  ]
+}
+
+const mapResponsesStopReason = (
+  response: ResponsesResult,
+): AnthropicResponse["stop_reason"] => {
+  const { status, incomplete_details: incompleteDetails } = response
+
+  if (status === "completed") {
+    return "end_turn"
+  }
+
+  if (status === "incomplete") {
+    if (incompleteDetails?.reason === "max_output_tokens") {
+      return "max_tokens"
+    }
+    if (incompleteDetails?.reason === "content_filter") {
+      return "end_turn"
+    }
+    if (incompleteDetails?.reason === "tool_use") {
+      return "tool_use"
+    }
+  }
+
+  return null
+}
+
+const mapResponsesUsage = (
+  response: ResponsesResult,
+): AnthropicResponse["usage"] => {
+  const promptTokens = response.usage?.input_tokens ?? 0
+  const completionTokens = response.usage?.output_tokens ?? 0
+
+  return {
+    input_tokens: promptTokens,
+    output_tokens: completionTokens,
+  }
+}
+
+const isRecord = (value: unknown): value is Record<string, unknown> =>
+  typeof value === "object" && value !== null
+
+const isResponseOutputText = (
+  block: ResponseOutputContentBlock,
+): block is ResponseOutputText =>
+  isRecord(block)
+  && "type" in block
+  && (block as { type?: unknown }).type === "output_text"
+
+const isResponseOutputRefusal = (
+  block: ResponseOutputContentBlock,
+): block is ResponseOutputRefusal =>
+  isRecord(block)
+  && "type" in block
+  && (block as { type?: unknown }).type === "refusal"
+
+const parseUserId = (
+  userId: string | undefined,
+): { safetyIdentifier: string | null; promptCacheKey: string | null } => {
+  if (!userId || typeof userId !== "string") {
+    return { safetyIdentifier: null, promptCacheKey: null }
+  }
+
+  // Parse safety_identifier: content between "user_" and "_account"
+  const userMatch = userId.match(/user_([^_]+)_account/)
+  const safetyIdentifier = userMatch ? userMatch[1] : null
+
+  // Parse prompt_cache_key: content after "_session_"
+  const sessionMatch = userId.match(/_session_(.+)$/)
+  const promptCacheKey = sessionMatch ? sessionMatch[1] : null
+
+  return { safetyIdentifier, promptCacheKey }
+}
diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
new file mode 100644
index 000000000..ef7b38b93
--- /dev/null
+++ b/src/routes/responses/handler.ts
@@ -0,0 +1,78 @@
+import type { Context } from "hono"
+
+import consola from "consola"
+import { streamSSE } from "hono/streaming"
+
+import { awaitApproval } from "~/lib/approval"
+import { checkRateLimit } from "~/lib/rate-limit"
+import { state } from "~/lib/state"
+import {
+  createResponses,
+  type ResponsesPayload,
+  type ResponsesResult,
+} from "~/services/copilot/create-responses"
+
+import { getResponsesRequestOptions } from "./utils"
+
+const RESPONSES_ENDPOINT = "/responses"
+
+export const handleResponses = async (c: Context) => {
+  await checkRateLimit(state)
+
+  const payload = await c.req.json<ResponsesPayload>()
+  consola.debug("Responses request payload:", JSON.stringify(payload))
+
+  const selectedModel = state.models?.data.find(
+    (model) => model.id === payload.model,
+  )
+  const supportsResponses =
+    selectedModel?.supported_endpoints?.includes(RESPONSES_ENDPOINT) ?? false
+
+  if (!supportsResponses) {
+    return c.json(
+      {
+        error: {
+          message:
+            "This model does not support the responses endpoint. Please choose a different model.",
+          type: "invalid_request_error",
+        },
+      },
+      400,
+    )
+  }
+
+  const { vision, initiator } = getResponsesRequestOptions(payload)
+
+  if (state.manualApprove) {
+    await awaitApproval()
+  }
+
+  const response = await createResponses(payload, { vision, initiator })
+
+  if (isStreamingRequested(payload) && isAsyncIterable(response)) {
+    consola.debug("Forwarding native Responses stream")
+    return streamSSE(c, async (stream) => {
+      for await (const chunk of response) {
+        consola.debug("Responses stream chunk:", JSON.stringify(chunk))
+        await stream.writeSSE({
+          id: (chunk as { id?: string }).id,
+          event: (chunk as { event?: string }).event,
+          data: (chunk as { data?: string }).data ?? "",
+        })
+      }
+    })
+  }
+
+  consola.debug(
+    "Forwarding native Responses result:",
+    JSON.stringify(response).slice(-400),
+  )
+  return c.json(response as ResponsesResult)
+}
+
+const isAsyncIterable = <T>(value: unknown): value is AsyncIterable<T> =>
+  Boolean(value)
+  && typeof (value as AsyncIterable<T>)[Symbol.asyncIterator] === "function"
+
+const isStreamingRequested = (payload: ResponsesPayload): boolean =>
+  Boolean(payload.stream)
diff --git a/src/routes/responses/route.ts b/src/routes/responses/route.ts
new file mode 100644
index 000000000..af2423427
--- /dev/null
+++ b/src/routes/responses/route.ts
@@ -0,0 +1,15 @@
+import { Hono } from "hono"
+
+import { forwardError } from "~/lib/error"
+
+import { handleResponses } from "./handler"
+
+export const responsesRoutes = new Hono()
+
+responsesRoutes.post("/", async (c) => {
+  try {
+    return await handleResponses(c)
+  } catch (error) {
+    return await forwardError(c, error)
+  }
+})
diff --git a/src/routes/responses/utils.ts b/src/routes/responses/utils.ts
new file mode 100644
index 000000000..5dea1daae
--- /dev/null
+++ b/src/routes/responses/utils.ts
@@ -0,0 +1,71 @@
+import type {
+  ResponseInputItem,
+  ResponsesPayload,
+} from "~/services/copilot/create-responses"
+
+export const getResponsesRequestOptions = (
+  payload: ResponsesPayload,
+): { vision: boolean; initiator: "agent" | "user" } => {
+  const vision = hasVisionInput(payload)
+  const initiator = hasAgentInitiator(payload) ? "agent" : "user"
+
+  return { vision, initiator }
+}
+
+export const hasAgentInitiator = (payload: ResponsesPayload): boolean =>
+  getPayloadItems(payload).some((item) => {
+    if (!("role" in item) || !item.role) {
+      return true
+    }
+    const role = typeof item.role === "string" ? item.role.toLowerCase() : ""
+    return role === "assistant"
+  })
+
+export const hasVisionInput = (payload: ResponsesPayload): boolean => {
+  const values = getPayloadItems(payload)
+  return values.some((item) => containsVisionContent(item))
+}
+
+const getPayloadItems = (
+  payload: ResponsesPayload,
+): Array<ResponseInputItem> => {
+  const result: Array<ResponseInputItem> = []
+
+  const { input, instructions } = payload
+
+  if (Array.isArray(input)) {
+    result.push(...input)
+  }
+
+  if (Array.isArray(instructions)) {
+    result.push(...instructions)
+  }
+
+  return result
+}
+
+const containsVisionContent = (value: unknown): boolean => {
+  if (!value) return false
+
+  if (Array.isArray(value)) {
+    return value.some((entry) => containsVisionContent(entry))
+  }
+
+  if (typeof value !== "object") {
+    return false
+  }
+
+  const record = value as Record<string, unknown>
+  const type =
+    typeof record.type === "string" ? record.type.toLowerCase() : undefined
+
+  if (type === "input_image") {
+    return true
+  }
+
+  if (Array.isArray(record.content)) {
+    return record.content.some((entry) => containsVisionContent(entry))
+  }
+
+  return false
+}
diff --git a/src/server.ts b/src/server.ts
index 3cb2bb860..2d792c566 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -6,6 +6,7 @@ import { completionRoutes } from "./routes/chat-completions/route"
 import { embeddingRoutes } from "./routes/embeddings/route"
 import { messageRoutes } from "./routes/messages/route"
 import { modelRoutes } from "./routes/models/route"
+import { responsesRoutes } from "./routes/responses/route"
 import { tokenRoute } from "./routes/token/route"
 import { usageRoute } from "./routes/usage/route"
 
@@ -21,11 +22,13 @@ server.route("/models", modelRoutes)
 server.route("/embeddings", embeddingRoutes)
 server.route("/usage", usageRoute)
 server.route("/token", tokenRoute)
+server.route("/responses", responsesRoutes)
 
 // Compatibility with tools that expect v1/ prefix
 server.route("/v1/chat/completions", completionRoutes)
 server.route("/v1/models", modelRoutes)
 server.route("/v1/embeddings", embeddingRoutes)
+server.route("/v1/responses", responsesRoutes)
 
 // Anthropic compatible endpoints
 server.route("/v1/messages", messageRoutes)
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
new file mode 100644
index 000000000..9009abf6f
--- /dev/null
+++ b/src/services/copilot/create-responses.ts
@@ -0,0 +1,212 @@
+import consola from "consola"
+import { events } from "fetch-event-stream"
+
+import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config"
+import { HTTPError } from "~/lib/error"
+import { state } from "~/lib/state"
+
+export interface ResponsesPayload {
+  model: string
+  input?: string | Array<ResponseInputItem>
+  instructions?: string | Array<ResponseInputItem> | null
+  temperature?: number | null
+  top_p?: number | null
+  max_output_tokens?: number | null
+  tools?: Array<Record<string, unknown>> | null
+  tool_choice?: unknown
+  metadata?: Record<string, unknown> | null
+  stream?: boolean | null
+  response_format?: Record<string, unknown> | null
+  safety_identifier?: string | null
+  prompt_cache_key?: string | null
+  parallel_tool_calls?: boolean | null
+  store?: boolean | null
+  reasoning?: Record<string, unknown> | null
+  include?: Array<string>
+  [key: string]: unknown
+}
+
+export interface ResponseInputMessage {
+  type?: "message"
+  role: "user" | "assistant" | "system" | "developer"
+  content?: string | Array<ResponseInputContent>
+  status?: string
+}
+
+export interface ResponseFunctionToolCallItem {
+  type: "function_call"
+  call_id: string
+  name: string
+  arguments: string
+  status?: "in_progress" | "completed" | "incomplete"
+}
+
+export interface ResponseFunctionCallOutputItem {
+  type: "function_call_output"
+  call_id: string
+  output: string
+  status?: "in_progress" | "completed" | "incomplete"
+}
+
+export type ResponseInputItem =
+  | ResponseInputMessage
+  | ResponseFunctionToolCallItem
+  | ResponseFunctionCallOutputItem
+  | Record<string, unknown>
+
+export type ResponseInputContent =
+  | ResponseInputText
+  | ResponseInputImage
+  | ResponseContentTextLike
+  | Record<string, unknown>
+
+export interface ResponseInputText {
+  type?: "input_text" | "text" | "output_text"
+  text: string
+}
+
+export interface ResponseInputImage {
+  type: "input_image"
+  image_url?: string | null
+  file_id?: string | null
+  detail?: "low" | "high" | "auto"
+}
+
+export interface ResponseContentTextLike {
+  type?: "text"
+  text: string
+}
+
+export interface ResponsesResult {
+  id: string
+  object: "response"
+  created_at: number
+  model: string
+  output: Array<ResponseOutputItem>
+  output_text: string
+  status: string
+  usage?: ResponseUsage | null
+  error: Record<string, unknown> | null
+  incomplete_details: Record<string, unknown> | null
+  instructions: string | null
+  metadata: Record<string, unknown> | null
+  parallel_tool_calls: boolean
+  temperature: number | null
+  tool_choice: unknown
+  tools: Array<Record<string, unknown>>
+  top_p: number | null
+}
+
+export type ResponseOutputItem =
+  | ResponseOutputMessage
+  | ResponseOutputReasoning
+  | ResponseOutputFunctionCall
+  | ResponseOutputFunctionCallOutput
+
+export interface ResponseOutputMessage {
+  id: string
+  type: "message" | "output_text"
+  role: "assistant"
+  status: "completed" | "in_progress" | "incomplete"
+  content?: Array<ResponseOutputContentBlock>
+}
+
+export interface ResponseOutputReasoning {
+  id: string
+  type: "reasoning"
+  reasoning?: Array<ResponseReasoningBlock>
+  summary?: Array<ResponseReasoningBlock>
+  thinking?: string
+  [key: string]: unknown
+}
+
+export interface ResponseReasoningBlock {
+  type: string
+  text?: string
+  thinking?: string
+  [key: string]: unknown
+}
+
+export interface ResponseOutputFunctionCall {
+  id: string
+  type: "function_call"
+  call_id?: string
+  name: string
+  arguments: string
+  status?: "in_progress" | "completed" | "incomplete"
+  [key: string]: unknown
+}
+
+export interface ResponseOutputFunctionCallOutput {
+  id: string
+  type: "function_call_output"
+  call_id: string
+  output: string
+  status?: "in_progress" | "completed" | "incomplete"
+  [key: string]: unknown
+}
+
+export type ResponseOutputContentBlock =
+  | ResponseOutputText
+  | ResponseOutputRefusal
+  | Record<string, unknown>
+
+export interface ResponseOutputText {
+  type: "output_text"
+  text: string
+  annotations: Array<unknown>
+}
+
+export interface ResponseOutputRefusal {
+  type: "refusal"
+  refusal: string
+}
+
+export interface ResponseUsage {
+  input_tokens: number
+  output_tokens?: number
+  total_tokens: number
+  input_tokens_details?: {
+    cached_tokens: number
+  }
+  output_tokens_details?: {
+    reasoning_tokens: number
+  }
+}
+
+export type ResponsesStream = ReturnType<typeof events>
+export type CreateResponsesReturn = ResponsesResult | ResponsesStream
+
+interface ResponsesRequestOptions {
+  vision: boolean
+  initiator: "agent" | "user"
+}
+
+export const createResponses = async (
+  payload: ResponsesPayload,
+  { vision, initiator }: ResponsesRequestOptions,
+): Promise<CreateResponsesReturn> => {
+  if (!state.copilotToken) throw new Error("Copilot token not found")
+
+  const headers: Record<string, string> = {
+    ...copilotHeaders(state, vision),
+    "X-Initiator": initiator,
+  }
+
+  const response = await fetch(`${copilotBaseUrl(state)}/responses`, {
+    method: "POST",
+    headers,
+    body: JSON.stringify(payload),
+  })
+
+  if (!response.ok) {
+    consola.error("Failed to create responses", response)
+    throw new HTTPError("Failed to create responses", response)
+  }
+
+  if (payload.stream) {
+    return events(response)
+  }
+
+  return (await response.json()) as ResponsesResult
+}
diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts
index 792adc480..d56180852 100644
--- a/src/services/copilot/get-models.ts
+++ b/src/services/copilot/get-models.ts
@@ -28,6 +28,9 @@ interface ModelSupports {
   tool_calls?: boolean
   parallel_tool_calls?: boolean
   dimensions?: boolean
+  streaming?: boolean
+  structured_outputs?: boolean
+  vision?: boolean
 }
 
 interface ModelCapabilities {
@@ -52,4 +55,5 @@ interface Model {
     state: string
     terms: string
   }
+  supported_endpoints?: Array<string>
 }
diff --git a/tests/responses-stream-translation.test.ts b/tests/responses-stream-translation.test.ts
new file mode 100644
index 000000000..9f149e1bd
--- /dev/null
+++ b/tests/responses-stream-translation.test.ts
@@ -0,0 +1,137 @@
+import { describe, expect, test } from "bun:test"
+
+import type { AnthropicStreamEventData } from "~/routes/messages/anthropic-types"
+
+import {
+  createResponsesStreamState,
+  translateResponsesStreamEvent,
+} from "~/routes/messages/responses-stream-translation"
+
+const createFunctionCallAddedEvent = () => ({
+  type: "response.output_item.added",
+  output_index: 1,
+  item: {
+    id: "item-1",
+    type: "function_call",
+    call_id: "call-1",
+    name: "TodoWrite",
+    arguments: "",
+    status: "in_progress",
+  },
+})
+
+describe("translateResponsesStreamEvent tool calls", () => {
+  test("streams function call arguments across deltas", () => {
+    const state = createResponsesStreamState()
+
+    const events = [
+      translateResponsesStreamEvent(createFunctionCallAddedEvent(), state),
+      translateResponsesStreamEvent(
+        {
+          type: "response.function_call_arguments.delta",
+          output_index: 1,
+          delta: '{"todos":',
+        },
+        state,
+      ),
+      translateResponsesStreamEvent(
+        {
+          type: "response.function_call_arguments.delta",
+          output_index: 1,
+          delta: "[]}",
+        },
+        state,
+      ),
+      translateResponsesStreamEvent(
+        {
+          type: "response.function_call_arguments.done",
+          output_index: 1,
+          arguments: '{"todos":[]}',
+        },
+        state,
+      ),
+    ].flat()
+
+    const messageStart = events.find((event) => event.type === "message_start")
+    expect(messageStart).toBeDefined()
+
+    const blockStart = events.find(
+      (event) => event.type === "content_block_start",
+    )
+    expect(blockStart).toBeDefined()
+    if (blockStart?.type === "content_block_start") {
+      expect(blockStart.content_block).toEqual({
+        type: "tool_use",
+        id: "call-1",
+        name: "TodoWrite",
+        input: {},
+      })
+    }
+
+    const deltas = events.filter(
+      (
+        event,
+      ): event is Extract<
+        AnthropicStreamEventData,
+        { type: "content_block_delta" }
+      > => event.type === "content_block_delta",
+    )
+    expect(deltas).toHaveLength(2)
+    expect(deltas[0].delta).toEqual({
+      type: "input_json_delta",
+      partial_json: '{"todos":',
+    })
+    expect(deltas[1].delta).toEqual({
+      type: "input_json_delta",
+      partial_json: "[]}",
+    })
+
+    const blockStop = events.find(
+      (event) => event.type === "content_block_stop",
+    )
+    expect(blockStop).toBeDefined()
+
+    expect(state.openBlocks.size).toBe(0)
+    expect(state.functionCallStateByOutputIndex.size).toBe(0)
+  })
+
+  test("emits full arguments when only done payload is present", () => {
+    const state = createResponsesStreamState()
+
+    const events = [
+      translateResponsesStreamEvent(createFunctionCallAddedEvent(), state),
+      translateResponsesStreamEvent(
+        {
+          type: "response.function_call_arguments.done",
+          output_index: 1,
+          arguments:
+            '{"todos":[{"content":"Review src/routes/responses/translation.ts"}]}',
+        },
+        state,
+      ),
+    ].flat()
+
+    const deltas = events.filter(
+      (
+        event,
+      ): event is Extract<
+        AnthropicStreamEventData,
+        { type: "content_block_delta" }
+      > => event.type === "content_block_delta",
+    )
+    expect(deltas).toHaveLength(1)
+    expect(deltas[0].delta).toEqual({
+      type: "input_json_delta",
+      partial_json:
+        '{"todos":[{"content":"Review src/routes/responses/translation.ts"}]}',
+    })
+
+    const blockStop = events.find(
+      (event) => event.type === "content_block_stop",
+    )
+    expect(blockStop).toBeDefined()
+
+    expect(state.openBlocks.size).toBe(0)
+    expect(state.functionCallStateByOutputIndex.size).toBe(0)
+  })
+})
diff --git a/tests/translation.test.ts b/tests/translation.test.ts
new file mode 100644
index 000000000..84856b932
--- /dev/null
+++ b/tests/translation.test.ts
@@ -0,0 +1,159 @@
+import { describe, expect, it } from "bun:test"
+
+import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
+import type {
+  ResponseInputMessage,
+  ResponsesResult,
+} from "~/services/copilot/create-responses"
+
+import {
+  translateAnthropicMessagesToResponsesPayload,
+  translateResponsesResultToAnthropic,
+} from "~/routes/messages/responses-translation"
+
+const samplePayload = {
+  model: "claude-3-5-sonnet",
+  max_tokens: 1024,
+  messages: [
+    {
+      role: "user",
+      content: [
+        {
+          type: "text",
+          text: "<system-reminder>\nThis is a reminder that your todo list is currently empty. DO NOT mention this to the user explicitly because they are already aware. If you are working on tasks that would benefit from a todo list please use the TodoWrite tool to create one. If not, please feel free to ignore. Again do not mention this message to the user.\n</system-reminder>",
+        },
+        {
+          type: "text",
+          text: "<system-reminder>\nAs you answer the user's questions, you can use the following context:\n# important-instruction-reminders\nDo what has been asked; nothing more, nothing less.\nNEVER create files unless they're absolutely necessary for achieving your goal.\nALWAYS prefer editing an existing file to creating a new one.\nNEVER proactively create documentation files (*.md) or README files. Only create documentation files if explicitly requested by the User.\n\n      \n      IMPORTANT: this context may or may not be relevant to your tasks. You should not respond to this context unless it is highly relevant to your task.\n</system-reminder>",
+        },
+        {
+          type: "text",
+          text: "hi",
+        },
+        {
+          type: "text",
+          text: "<system-reminder>\nThe user opened the file c:\\Work2\\copilot-api\\src\\routes\\responses\\translation.ts in the IDE. This may or may not be related to the current task.\n</system-reminder>",
+        },
+        {
+          type: "text",
+          text: "hi",
+          cache_control: {
+            type: "ephemeral",
+          },
+        },
+      ],
+    },
+  ],
+} as unknown as AnthropicMessagesPayload
+
+describe("translateAnthropicMessagesToResponsesPayload", () => {
+  it("converts anthropic text blocks into response input messages", () => {
+    const result = translateAnthropicMessagesToResponsesPayload(samplePayload)
+
+    console.log("result:", JSON.stringify(result, null, 2))
+    expect(Array.isArray(result.input)).toBe(true)
+    const input = result.input as Array<ResponseInputMessage>
+    expect(input).toHaveLength(1)
+
+    const message = input[0]
+    expect(message.role).toBe("user")
+    expect(Array.isArray(message.content)).toBe(true)
+
+    const content = message.content as Array<{ text: string }>
+    expect(content.map((item) => item.text)).toEqual([
+      "<system-reminder>\nThis is a reminder that your todo list is currently empty. DO NOT mention this to the user explicitly because they are already aware. If you are working on tasks that would benefit from a todo list please use the TodoWrite tool to create one. If not, please feel free to ignore. Again do not mention this message to the user.\n</system-reminder>",
+      "<system-reminder>\nAs you answer the user's questions, you can use the following context:\n# important-instruction-reminders\nDo what has been asked; nothing more, nothing less.\nNEVER create files unless they're absolutely necessary for achieving your goal.\nALWAYS prefer editing an existing file to creating a new one.\nNEVER proactively create documentation files (*.md) or README files. Only create documentation files if explicitly requested by the User.\n\n      \n      IMPORTANT: this context may or may not be relevant to your tasks. You should not respond to this context unless it is highly relevant to your task.\n</system-reminder>",
+      "hi",
+      "<system-reminder>\nThe user opened the file c:\\Work2\\copilot-api\\src\\routes\\responses\\translation.ts in the IDE. This may or may not be related to the current task.\n</system-reminder>",
+      "hi",
+    ])
+  })
+})
+
+describe("translateResponsesResultToAnthropic", () => {
+  it("handles reasoning and function call items", () => {
+    const responsesResult: ResponsesResult = {
+      id: "resp_123",
+      object: "response",
+      created_at: 0,
+      model: "gpt-4.1",
+      output: [
+        {
+          id: "reason_1",
+          type: "reasoning",
+          reasoning: [{ type: "text", text: "Thinking about the task." }],
+        },
+        {
+          id: "call_1",
+          type: "function_call",
+          call_id: "call_1",
+          name: "TodoWrite",
+          arguments:
+            '{"todos":[{"content":"Read src/routes/responses/translation.ts","status":"in_progress"}]}',
+          status: "completed",
+        },
+        {
+          id: "message_1",
+          type: "message",
+          role: "assistant",
+          status: "completed",
+          content: [
+            {
+              type: "output_text",
+              text: "Added the task to your todo list.",
+              annotations: [],
+            },
+          ],
+        },
+      ],
+      output_text: "Added the task to your todo list.",
+      status: "incomplete",
+      usage: {
+        input_tokens: 120,
+        output_tokens: 36,
+        total_tokens: 156,
+      },
+      error: null,
+      incomplete_details: { reason: "tool_use" },
+      instructions: null,
+      metadata: null,
+      parallel_tool_calls: false,
+      temperature: null,
+      tool_choice: null,
+      tools: [],
+      top_p: null,
+    }
+
+    const anthropicResponse =
+      translateResponsesResultToAnthropic(responsesResult)
+
+    expect(anthropicResponse.stop_reason).toBe("tool_use")
+    expect(anthropicResponse.content).toHaveLength(3)
+
+    const [thinkingBlock, toolUseBlock, textBlock] = anthropicResponse.content
+
+    expect(thinkingBlock.type).toBe("thinking")
+    if (thinkingBlock.type === "thinking") {
+      expect(thinkingBlock.thinking).toContain("Thinking about the task")
+    }
+
+    expect(toolUseBlock.type).toBe("tool_use")
+    if (toolUseBlock.type === "tool_use") {
+      expect(toolUseBlock.id).toBe("call_1")
+      expect(toolUseBlock.name).toBe("TodoWrite")
+      expect(toolUseBlock.input).toEqual({
+        todos: [
+          {
+            content: "Read src/routes/responses/translation.ts",
+            status: "in_progress",
+          },
+        ],
+      })
+    }
+
+    expect(textBlock.type).toBe("text")
+    if (textBlock.type === "text") {
+      expect(textBlock.text).toBe("Added the task to your todo list.")
+    }
+  })
+})

From 87899a137a711e571852b41b35f90a91aa7013bd Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Mon, 29 Sep 2025 15:43:04 +0800
Subject: [PATCH 02/62] feat: enhance output type for function call and add
 content conversion utility

---
 src/routes/messages/responses-translation.ts | 28 ++++++++++++++++++--
 src/services/copilot/create-responses.ts     | 10 ++-----
 2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index 057a7b962..5d0ff3b98 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -246,7 +246,7 @@ const createFunctionCallOutput = (
 ): ResponseFunctionCallOutputItem => ({
   type: "function_call_output",
   call_id: block.tool_use_id,
-  output: block.content,
+  output: convertToolResultContent(block.content),
   status: block.is_error ? "incomplete" : "completed",
 })
 
@@ -268,7 +268,7 @@ When using the BashOutput tool, follow these rules:
 - Only Bash Tool run_in_background set to true, Use BashOutput to read the output later
 ### TodoWrite tool
 When using the TodoWrite tool, follow these rules:
-- Skip using the TodoWrite tool for straightforward tasks (roughly the easiest 25%).
+- Skip using the TodoWrite tool for simple or straightforward tasks (roughly the easiest 25%).
 - Do not make single-step todo lists.
 - When you made a todo, update it after having performed one of the sub-tasks that you shared on the todo list.`
 
@@ -636,3 +636,27 @@ const parseUserId = (
 
   return { safetyIdentifier, promptCacheKey }
 }
+
+const convertToolResultContent = (
+  content: string | Array<AnthropicTextBlock> | Array<AnthropicImageBlock>,
+): string | Array<ResponseInputContent> => {
+  if (typeof content === "string") {
+    return content
+  }
+
+  if (Array.isArray(content)) {
+    if (content.length > 0 && content[0].type === "text") {
+      return (content as Array<AnthropicTextBlock>).map((block) =>
+        createTextContent(block.text),
+      )
+    }
+
+    if (content.length > 0 && content[0].type === "image") {
+      return (content as Array<AnthropicImageBlock>).map((block) =>
+        createImageContent(block),
+      )
+    }
+  }
+
+  return ""
+}
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
index 9009abf6f..52a162efa 100644
--- a/src/services/copilot/create-responses.ts
+++ b/src/services/copilot/create-responses.ts
@@ -44,7 +44,7 @@ export interface ResponseFunctionToolCallItem {
 export interface ResponseFunctionCallOutputItem {
   type: "function_call_output"
   call_id: string
-  output: string
+  output: string | Array<ResponseInputContent>
   status?: "in_progress" | "completed" | "incomplete"
 }
 
@@ -57,11 +57,10 @@ export type ResponseInputItem =
 export type ResponseInputContent =
   | ResponseInputText
   | ResponseInputImage
-  | ResponseContentTextLike
   | Record<string, unknown>
 
 export interface ResponseInputText {
-  type?: "input_text" | "text" | "output_text"
+  type?: "input_text" | "output_text"
   text: string
 }
 
@@ -72,11 +71,6 @@ export interface ResponseInputImage {
   detail?: "low" | "high" | "auto"
 }
 
-export interface ResponseContentTextLike {
-  type?: "text"
-  text: string
-}
-
 export interface ResponsesResult {
   id: string
   object: "response"

From 4fc0fa0e6d5b70623344081e6ca4eab9eb6fc128 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Mon, 29 Sep 2025 20:09:45 +0800
Subject: [PATCH 03/62] refactor: optimize content conversion logic in
 convertToolResultContent function

---
 src/routes/messages/responses-translation.ts | 26 ++++++++++++--------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index 5d0ff3b98..71c877e12 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -645,17 +645,23 @@ const convertToolResultContent = (
   }
 
   if (Array.isArray(content)) {
-    if (content.length > 0 && content[0].type === "text") {
-      return (content as Array<AnthropicTextBlock>).map((block) =>
-        createTextContent(block.text),
-      )
-    }
-
-    if (content.length > 0 && content[0].type === "image") {
-      return (content as Array<AnthropicImageBlock>).map((block) =>
-        createImageContent(block),
-      )
+    const result: Array<ResponseInputContent> = []
+    for (const block of content) {
+      switch (block.type) {
+        case "text": {
+          result.push(createTextContent(block.text))
+          break
+        }
+        case "image": {
+          result.push(createImageContent(block))
+          break
+        }
+        default: {
+          break
+        }
+      }
     }
+    return result
   }
 
   return ""

From 2b9733bc0d0bc7af22ba834e7cc7d46f26cb10ae Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Tue, 30 Sep 2025 09:31:52 +0800
Subject: [PATCH 04/62] refactor: remove unused function call output type and
 simplify response output message type

---
 src/routes/messages/responses-translation.ts | 24 +-------------------
 src/services/copilot/create-responses.ts     | 12 +---------
 2 files changed, 2 insertions(+), 34 deletions(-)

diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index 71c877e12..41c262994 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -10,7 +10,6 @@ import {
   type ResponsesResult,
   type ResponseOutputContentBlock,
   type ResponseOutputFunctionCall,
-  type ResponseOutputFunctionCallOutput,
   type ResponseOutputItem,
   type ResponseOutputReasoning,
   type ResponseReasoningBlock,
@@ -388,15 +387,7 @@ const mapOutputToAnthropicContent = (
         }
         break
       }
-      case "function_call_output": {
-        const outputBlock = createFunctionCallOutputBlock(item)
-        if (outputBlock) {
-          contentBlocks.push(outputBlock)
-        }
-        break
-      }
-      case "message":
-      case "output_text": {
+      case "message": {
         const combinedText = combineMessageTextContent(item.content)
         if (combinedText.length > 0) {
           contentBlocks.push({ type: "text", text: combinedText })
@@ -511,19 +502,6 @@ const createToolUseContentBlock = (
   }
 }
 
-const createFunctionCallOutputBlock = (
-  output: ResponseOutputFunctionCallOutput,
-): AnthropicAssistantContentBlock | null => {
-  if (typeof output.output !== "string" || output.output.length === 0) {
-    return null
-  }
-
-  return {
-    type: "text",
-    text: output.output,
-  }
-}
-
 const parseFunctionCallArguments = (
   rawArguments: string,
 ): Record<string, unknown> => {
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
index 52a162efa..b13349e4d 100644
--- a/src/services/copilot/create-responses.ts
+++ b/src/services/copilot/create-responses.ts
@@ -95,11 +95,10 @@ export type ResponseOutputItem =
   | ResponseOutputMessage
   | ResponseOutputReasoning
   | ResponseOutputFunctionCall
-  | ResponseOutputFunctionCallOutput
 
 export interface ResponseOutputMessage {
   id: string
-  type: "message" | "output_text"
+  type: "message"
   role: "assistant"
   status: "completed" | "in_progress" | "incomplete"
   content?: Array<ResponseOutputContentBlock>
@@ -131,15 +130,6 @@ export interface ResponseOutputFunctionCall {
   [key: string]: unknown
 }
 
-export interface ResponseOutputFunctionCallOutput {
-  id: string
-  type: "function_call_output"
-  call_id: string
-  output: string
-  status?: "in_progress" | "completed" | "incomplete"
-  [key: string]: unknown
-}
-
 export type ResponseOutputContentBlock =
   | ResponseOutputText
   | ResponseOutputRefusal

From 505f648a77af6036cd3b846b91fe1eb67c3168c1 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Tue, 30 Sep 2025 15:43:05 +0800
Subject: [PATCH 05/62] feat: add signature and reasoning handling to responses
 translation and state management

---
 src/routes/messages/anthropic-types.ts        |   1 +
 .../messages/responses-stream-translation.ts  | 159 +++++++++++++++++-
 src/routes/messages/responses-translation.ts  |  56 +++---
 src/routes/responses/utils.ts                 |   6 +-
 src/services/copilot/create-responses.ts      |  22 ++-
 5 files changed, 200 insertions(+), 44 deletions(-)

diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
index 881fffcc8..f07485bf0 100644
--- a/src/routes/messages/anthropic-types.ts
+++ b/src/routes/messages/anthropic-types.ts
@@ -56,6 +56,7 @@ export interface AnthropicToolUseBlock {
 export interface AnthropicThinkingBlock {
   type: "thinking"
   thinking: string
+  signature: string
 }
 
 export type AnthropicUserContentBlock =
diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
index 06feab1a4..a3857e8a7 100644
--- a/src/routes/messages/responses-stream-translation.ts
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -15,6 +15,7 @@ export interface ResponsesStreamState {
   initialInputTokens?: number
   functionCallStateByOutputIndex: Map<number, FunctionCallStreamState>
   functionCallOutputIndexByItemId: Map<string, number>
+  summryIndex: number
 }
 
 type FunctionCallStreamState = {
@@ -32,6 +33,7 @@ export const createResponsesStreamState = (): ResponsesStreamState => ({
   blockHasDelta: new Set(),
   functionCallStateByOutputIndex: new Map(),
   functionCallOutputIndexByItemId: new Map(),
+  summryIndex: 0,
 })
 
 export const translateResponsesStreamEvent = (
@@ -49,12 +51,18 @@ export const translateResponsesStreamEvent = (
       return handleResponseCreated(rawEvent, state)
     }
 
-    case "response.reasoning_summary_text.delta":
+    case "response.reasoning_summary_text.delta": {
+      return handleReasoningSummaryTextDelta(rawEvent, state)
+    }
+
     case "response.output_text.delta": {
       return handleOutputTextDelta(rawEvent, state)
     }
 
-    case "response.reasoning_summary_part.done":
+    case "response.reasoning_summary_part.done": {
+      return handleReasoningSummaryPartDone(rawEvent, state)
+    }
+
     case "response.output_text.done": {
       return handleOutputTextDone(rawEvent, state)
     }
@@ -63,6 +71,10 @@ export const translateResponsesStreamEvent = (
       return handleOutputItemAdded(rawEvent, state)
     }
 
+    case "response.output_item.done": {
+      return handleOutputItemDone(rawEvent, state)
+    }
+
     case "response.function_call_arguments.delta": {
       return handleFunctionCallArgumentsDelta(rawEvent, state)
     }
@@ -143,6 +155,51 @@ const handleOutputItemAdded = (
   return events
 }
 
+const handleOutputItemDone = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = ensureMessageStart(state)
+
+  const item = isRecord(rawEvent.item) ? rawEvent.item : undefined
+  if (!item) {
+    return events
+  }
+
+  const itemType = typeof item.type === "string" ? item.type : undefined
+  if (itemType !== "reasoning") {
+    return events
+  }
+
+  const outputIndex = toNumber(rawEvent.output_index)
+  const contentIndex = state.summryIndex
+
+  const blockIndex = openThinkingBlockIfNeeded(state, {
+    outputIndex,
+    contentIndex,
+    events,
+  })
+
+  const signature =
+    typeof item.encrypted_content === "string" ? item.encrypted_content : ""
+
+  if (signature) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "signature_delta",
+        signature,
+      },
+    })
+    state.blockHasDelta.add(blockIndex)
+  }
+
+  closeBlockIfOpen(state, blockIndex, events)
+
+  return events
+}
+
 const handleFunctionCallArgumentsDelta = (
   rawEvent: Record<string, unknown>,
   state: ResponsesStreamState,
@@ -257,6 +314,71 @@ const handleOutputTextDelta = (
   return events
 }
 
+const handleReasoningSummaryTextDelta = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = ensureMessageStart(state)
+
+  const outputIndex = toNumber(rawEvent.output_index)
+  const contentIndex = toNumber(rawEvent.summary_index)
+  const deltaText = typeof rawEvent.delta === "string" ? rawEvent.delta : ""
+
+  if (!deltaText) {
+    return events
+  }
+
+  const blockIndex = openThinkingBlockIfNeeded(state, {
+    outputIndex,
+    contentIndex,
+    events,
+  })
+
+  events.push({
+    type: "content_block_delta",
+    index: blockIndex,
+    delta: {
+      type: "thinking_delta",
+      thinking: deltaText,
+    },
+  })
+  state.blockHasDelta.add(blockIndex)
+
+  return events
+}
+
+const handleReasoningSummaryPartDone = (
+  rawEvent: Record<string, unknown>,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = ensureMessageStart(state)
+
+  const outputIndex = toNumber(rawEvent.output_index)
+  const contentIndex = toNumber(rawEvent.summary_index)
+  state.summryIndex = contentIndex
+  const part = isRecord(rawEvent.part) ? rawEvent.part : undefined
+  const text = part && typeof part.text === "string" ? part.text : ""
+
+  const blockIndex = openThinkingBlockIfNeeded(state, {
+    outputIndex,
+    contentIndex,
+    events,
+  })
+
+  if (text && !state.blockHasDelta.has(blockIndex)) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "thinking_delta",
+        thinking: text,
+      },
+    })
+  }
+
+  return events
+}
+
 const handleOutputTextDone = (
   rawEvent: Record<string, unknown>,
   state: ResponsesStreamState,
@@ -430,6 +552,39 @@ const openTextBlockIfNeeded = (
   return blockIndex
 }
 
+const openThinkingBlockIfNeeded = (
+  state: ResponsesStreamState,
+  params: {
+    outputIndex: number
+    contentIndex: number
+    events: Array<AnthropicStreamEventData>
+  },
+): number => {
+  const { outputIndex, contentIndex, events } = params
+  const key = getBlockKey(outputIndex, contentIndex)
+  let blockIndex = state.blockIndexByKey.get(key)
+
+  if (blockIndex === undefined) {
+    blockIndex = state.nextContentBlockIndex
+    state.nextContentBlockIndex += 1
+    state.blockIndexByKey.set(key, blockIndex)
+  }
+
+  if (!state.openBlocks.has(blockIndex)) {
+    events.push({
+      type: "content_block_start",
+      index: blockIndex,
+      content_block: {
+        type: "thinking",
+        thinking: "",
+      },
+    })
+    state.openBlocks.add(blockIndex)
+  }
+
+  return blockIndex
+}
+
 const closeBlockIfOpen = (
   state: ResponsesStreamState,
   blockIndex: number,
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index 41c262994..50ae3a600 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -6,6 +6,7 @@ import {
   type ResponseInputImage,
   type ResponseInputItem,
   type ResponseInputMessage,
+  type ResponseInputReasoning,
   type ResponseInputText,
   type ResponsesResult,
   type ResponseOutputContentBlock,
@@ -27,6 +28,7 @@ import {
   type AnthropicMessage,
   type AnthropicMessagesPayload,
   type AnthropicTextBlock,
+  type AnthropicThinkingBlock,
   type AnthropicTool,
   type AnthropicToolResultBlock,
   type AnthropicToolUseBlock,
@@ -137,6 +139,12 @@ const translateAssistantMessage = (
       continue
     }
 
+    if (block.type === "thinking") {
+      flushPendingContent("assistant", pendingContent, items)
+      items.push(createReasoningContent(block))
+      continue
+    }
+
     const converted = translateAssistantContentBlock(block)
     if (converted) {
       pendingContent.push(converted)
@@ -158,9 +166,6 @@ const translateUserContentBlock = (
     case "image": {
       return createImageContent(block)
     }
-    case "tool_result": {
-      return undefined
-    }
     default: {
       return undefined
     }
@@ -174,12 +179,6 @@ const translateAssistantContentBlock = (
     case "text": {
       return createOutPutTextContent(block.text)
     }
-    case "thinking": {
-      return createOutPutTextContent(block.thinking)
-    }
-    case "tool_use": {
-      return undefined
-    }
     default: {
       return undefined
     }
@@ -230,6 +229,19 @@ const createImageContent = (
   image_url: `data:${block.source.media_type};base64,${block.source.data}`,
 })
 
+const createReasoningContent = (
+  block: AnthropicThinkingBlock,
+): ResponseInputReasoning => ({
+  type: "reasoning",
+  summary: [
+    {
+      type: "summary_text",
+      text: block.thinking,
+    },
+  ],
+  encrypted_content: block.signature,
+})
+
 const createFunctionToolCall = (
   block: AnthropicToolUseBlock,
 ): ResponseFunctionToolCallItem => ({
@@ -376,7 +388,11 @@ const mapOutputToAnthropicContent = (
       case "reasoning": {
         const thinkingText = extractReasoningText(item)
         if (thinkingText.length > 0) {
-          contentBlocks.push({ type: "thinking", thinking: thinkingText })
+          contentBlocks.push({
+            type: "thinking",
+            thinking: thinkingText,
+            signature: item.encrypted_content ?? "",
+          })
         }
         break
       }
@@ -456,31 +472,11 @@ const extractReasoningText = (item: ResponseOutputReasoning): string => {
         segments.push(block.text)
         continue
       }
-
-      if (typeof block.thinking === "string") {
-        segments.push(block.thinking)
-        continue
-      }
-
-      const reasoningValue = (block as Record<string, unknown>).reasoning
-      if (typeof reasoningValue === "string") {
-        segments.push(reasoningValue)
-      }
     }
   }
 
-  collectFromBlocks(item.reasoning)
   collectFromBlocks(item.summary)
 
-  if (typeof item.thinking === "string") {
-    segments.push(item.thinking)
-  }
-
-  const textValue = (item as Record<string, unknown>).text
-  if (typeof textValue === "string") {
-    segments.push(textValue)
-  }
-
   return segments.join("").trim()
 }
 
diff --git a/src/routes/responses/utils.ts b/src/routes/responses/utils.ts
index 5dea1daae..734319cd7 100644
--- a/src/routes/responses/utils.ts
+++ b/src/routes/responses/utils.ts
@@ -31,16 +31,12 @@ const getPayloadItems = (
 ): Array<ResponseInputItem> => {
   const result: Array<ResponseInputItem> = []
 
-  const { input, instructions } = payload
+  const { input } = payload
 
   if (Array.isArray(input)) {
     result.push(...input)
   }
 
-  if (Array.isArray(instructions)) {
-    result.push(...instructions)
-  }
-
   return result
 }
 
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
index b13349e4d..8322cacee 100644
--- a/src/services/copilot/create-responses.ts
+++ b/src/services/copilot/create-responses.ts
@@ -7,13 +7,13 @@ import { state } from "~/lib/state"
 
 export interface ResponsesPayload {
   model: string
+  instructions?: string | null
   input?: string | Array<ResponseInputItem>
-  instructions?: string | Array<ResponseInputItem> | null
+  tools?: Array<Record<string, unknown>> | null
+  tool_choice?: unknown
   temperature?: number | null
   top_p?: number | null
   max_output_tokens?: number | null
-  tools?: Array<Record<string, unknown>> | null
-  tool_choice?: unknown
   metadata?: Record<string, unknown> | null
   stream?: boolean | null
   response_format?: Record<string, unknown> | null
@@ -48,10 +48,20 @@ export interface ResponseFunctionCallOutputItem {
   status?: "in_progress" | "completed" | "incomplete"
 }
 
+export interface ResponseInputReasoning {
+  type: "reasoning"
+  summary: Array<{
+    type: "summary_text"
+    text: string
+  }>
+  encrypted_content: string
+}
+
 export type ResponseInputItem =
   | ResponseInputMessage
   | ResponseFunctionToolCallItem
   | ResponseFunctionCallOutputItem
+  | ResponseInputReasoning
   | Record<string, unknown>
 
 export type ResponseInputContent =
@@ -107,17 +117,15 @@ export interface ResponseOutputMessage {
 export interface ResponseOutputReasoning {
   id: string
   type: "reasoning"
-  reasoning?: Array<ResponseReasoningBlock>
   summary?: Array<ResponseReasoningBlock>
-  thinking?: string
+  encrypted_content?: string
+  status: "completed" | "in_progress" | "incomplete"
   [key: string]: unknown
 }
 
 export interface ResponseReasoningBlock {
   type: string
   text?: string
-  thinking?: string
-  [key: string]: unknown
 }
 
 export interface ResponseOutputFunctionCall {

From 9477b4541280246541f16c3416865d58d8170a1d Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Tue, 30 Sep 2025 15:48:32 +0800
Subject: [PATCH 06/62] feat: add signature to thinking messages and enhance
 reasoning structure in translation tests

---
 tests/anthropic-request.test.ts | 2 ++
 tests/translation.test.ts       | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts
index a4a5b06b5..c86bcac13 100644
--- a/tests/anthropic-request.test.ts
+++ b/tests/anthropic-request.test.ts
@@ -136,6 +136,7 @@ describe("Anthropic to OpenAI translation logic", () => {
             {
               type: "thinking",
               thinking: "Let me think about this simple math problem...",
+              signature: "abc123",
             },
             { type: "text", text: "2+2 equals 4." },
           ],
@@ -168,6 +169,7 @@ describe("Anthropic to OpenAI translation logic", () => {
               type: "thinking",
               thinking:
                 "I need to call the weather API to get current weather information.",
+              signature: "def456",
             },
             { type: "text", text: "I'll check the weather for you." },
             {
diff --git a/tests/translation.test.ts b/tests/translation.test.ts
index 84856b932..0c3ececb2 100644
--- a/tests/translation.test.ts
+++ b/tests/translation.test.ts
@@ -81,7 +81,9 @@ describe("translateResponsesResultToAnthropic", () => {
         {
           id: "reason_1",
           type: "reasoning",
-          reasoning: [{ type: "text", text: "Thinking about the task." }],
+          summary: [{ type: "text", text: "Thinking about the task." }],
+          status: "completed",
+          encrypted_content: "encrypted_reasoning_content",
         },
         {
           id: "call_1",

From 44551f9aae3fd6cb99a27b349cce827df37021f5 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Tue, 30 Sep 2025 15:56:34 +0800
Subject: [PATCH 07/62] refactor: remove summaryIndex from ResponsesStreamState
 and related handlers

---
 .../messages/responses-stream-translation.ts  | 33 ++++---------------
 1 file changed, 6 insertions(+), 27 deletions(-)

diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
index a3857e8a7..3a4bdfd9f 100644
--- a/src/routes/messages/responses-stream-translation.ts
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -15,7 +15,6 @@ export interface ResponsesStreamState {
   initialInputTokens?: number
   functionCallStateByOutputIndex: Map<number, FunctionCallStreamState>
   functionCallOutputIndexByItemId: Map<string, number>
-  summryIndex: number
 }
 
 type FunctionCallStreamState = {
@@ -33,7 +32,6 @@ export const createResponsesStreamState = (): ResponsesStreamState => ({
   blockHasDelta: new Set(),
   functionCallStateByOutputIndex: new Map(),
   functionCallOutputIndexByItemId: new Map(),
-  summryIndex: 0,
 })
 
 export const translateResponsesStreamEvent = (
@@ -172,13 +170,8 @@ const handleOutputItemDone = (
   }
 
   const outputIndex = toNumber(rawEvent.output_index)
-  const contentIndex = state.summryIndex
 
-  const blockIndex = openThinkingBlockIfNeeded(state, {
-    outputIndex,
-    contentIndex,
-    events,
-  })
+  const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)
 
   const signature =
     typeof item.encrypted_content === "string" ? item.encrypted_content : ""
@@ -321,18 +314,13 @@ const handleReasoningSummaryTextDelta = (
   const events = ensureMessageStart(state)
 
   const outputIndex = toNumber(rawEvent.output_index)
-  const contentIndex = toNumber(rawEvent.summary_index)
   const deltaText = typeof rawEvent.delta === "string" ? rawEvent.delta : ""
 
   if (!deltaText) {
     return events
   }
 
-  const blockIndex = openThinkingBlockIfNeeded(state, {
-    outputIndex,
-    contentIndex,
-    events,
-  })
+  const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)
 
   events.push({
     type: "content_block_delta",
@@ -354,16 +342,10 @@ const handleReasoningSummaryPartDone = (
   const events = ensureMessageStart(state)
 
   const outputIndex = toNumber(rawEvent.output_index)
-  const contentIndex = toNumber(rawEvent.summary_index)
-  state.summryIndex = contentIndex
   const part = isRecord(rawEvent.part) ? rawEvent.part : undefined
   const text = part && typeof part.text === "string" ? part.text : ""
 
-  const blockIndex = openThinkingBlockIfNeeded(state, {
-    outputIndex,
-    contentIndex,
-    events,
-  })
+  const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)
 
   if (text && !state.blockHasDelta.has(blockIndex)) {
     events.push({
@@ -554,13 +536,10 @@ const openTextBlockIfNeeded = (
 
 const openThinkingBlockIfNeeded = (
   state: ResponsesStreamState,
-  params: {
-    outputIndex: number
-    contentIndex: number
-    events: Array<AnthropicStreamEventData>
-  },
+  outputIndex: number,
+  events: Array<AnthropicStreamEventData>,
 ): number => {
-  const { outputIndex, contentIndex, events } = params
+  const contentIndex = 0
   const key = getBlockKey(outputIndex, contentIndex)
   let blockIndex = state.blockIndexByKey.get(key)
 

From 708ae3377f58ff1b7902d5983e308434ee00bb4f Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Tue, 30 Sep 2025 23:21:27 +0800
Subject: [PATCH 08/62] feat: enhance streaming response handling with ping
 mechanism

---
 README.md                       | 11 ++++++-----
 src/routes/responses/handler.ts | 30 +++++++++++++++++++++++-------
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index e5b390991..e0aa9d3e2 100644
--- a/README.md
+++ b/README.md
@@ -184,11 +184,12 @@ The server exposes several endpoints to interact with the Copilot API. It provid
 
 These endpoints mimic the OpenAI API structure.
 
-| Endpoint                    | Method | Description                                               |
-| --------------------------- | ------ | --------------------------------------------------------- |
-| `POST /v1/chat/completions` | `POST` | Creates a model response for the given chat conversation. |
-| `GET /v1/models`            | `GET`  | Lists the currently available models.                     |
-| `POST /v1/embeddings`       | `POST` | Creates an embedding vector representing the input text.  |
+| Endpoint                    | Method | Description                                                      |
+| --------------------------- | ------ | ---------------------------------------------------------------- |
+| `POST /v1/responses`        | `POST` | Most advanced interface for generating model responses.          |
+| `POST /v1/chat/completions` | `POST` | Creates a model response for the given chat conversation.        |
+| `GET /v1/models`            | `GET`  | Lists the currently available models.                            |
+| `POST /v1/embeddings`       | `POST` | Creates an embedding vector representing the input text.         |
 
 ### Anthropic Compatible Endpoints
 
diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
index ef7b38b93..d06d02d67 100644
--- a/src/routes/responses/handler.ts
+++ b/src/routes/responses/handler.ts
@@ -52,13 +52,29 @@ export const handleResponses = async (c: Context) => {
   if (isStreamingRequested(payload) && isAsyncIterable(response)) {
     consola.debug("Forwarding native Responses stream")
     return streamSSE(c, async (stream) => {
-      for await (const chunk of response) {
-        consola.debug("Responses stream chunk:", JSON.stringify(chunk))
-        await stream.writeSSE({
-          id: (chunk as { id?: string }).id,
-          event: (chunk as { event?: string }).event,
-          data: (chunk as { data?: string }).data ?? "",
-        })
+      const pingInterval = setInterval(async () => {
+        try {
+          await stream.writeSSE({
+            event: "ping",
+            data: JSON.stringify({ timestamp: Date.now() }),
+          })
+        } catch (error) {
+          consola.warn("Failed to send ping:", error)
+          clearInterval(pingInterval)
+        }
+      }, 3000)
+
+      try {
+        for await (const chunk of response) {
+          consola.debug("Responses stream chunk:", JSON.stringify(chunk))
+          await stream.writeSSE({
+            id: (chunk as { id?: string }).id,
+            event: (chunk as { event?: string }).event,
+            data: (chunk as { data?: string }).data ?? "",
+          })
+        }
+      } finally {
+        clearInterval(pingInterval)
       }
     })
   }

From 47fb3e46032ed8062fb50e9f20609733588bc95c Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Wed, 1 Oct 2025 18:43:11 +0800
Subject: [PATCH 09/62] feat: responses translation add cache_read_input_tokens

---
 src/routes/messages/responses-stream-translation.ts | 11 ++++++++---
 src/routes/messages/responses-translation.ts        | 13 +++++++++----
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
index 3a4bdfd9f..db09bf144 100644
--- a/src/routes/messages/responses-stream-translation.ts
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -13,6 +13,7 @@ export interface ResponsesStreamState {
   currentResponseId?: string
   currentModel?: string
   initialInputTokens?: number
+  initialInputCachedTokens?: number
   functionCallStateByOutputIndex: Map<number, FunctionCallStreamState>
   functionCallOutputIndexByItemId: Map<string, number>
 }
@@ -476,11 +477,10 @@ const ensureMessageStart = (
   const id = response?.id ?? state.currentResponseId ?? "response"
   const model = response?.model ?? state.currentModel ?? ""
 
-  const inputTokens =
-    response?.usage?.input_tokens ?? state.initialInputTokens ?? 0
-
   state.messageStartSent = true
 
+  const inputTokens =
+    (state.initialInputTokens ?? 0) - (state.initialInputCachedTokens ?? 0)
   return [
     {
       type: "message_start",
@@ -495,6 +495,9 @@ const ensureMessageStart = (
         usage: {
           input_tokens: inputTokens,
           output_tokens: 0,
+          ...(state.initialInputCachedTokens !== undefined && {
+            cache_creation_input_tokens: state.initialInputCachedTokens,
+          }),
         },
       },
     },
@@ -597,6 +600,8 @@ const cacheResponseMetadata = (
   state.currentResponseId = response.id
   state.currentModel = response.model
   state.initialInputTokens = response.usage?.input_tokens ?? 0
+  state.initialInputCachedTokens =
+    response.usage?.input_tokens_details?.cached_tokens
 }
 
 const buildErrorEvent = (message: string): AnthropicStreamEventData => ({
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index 50ae3a600..00f481291 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -567,12 +567,17 @@ const mapResponsesStopReason = (
 const mapResponsesUsage = (
   response: ResponsesResult,
 ): AnthropicResponse["usage"] => {
-  const promptTokens = response.usage?.input_tokens ?? 0
-  const completionTokens = response.usage?.output_tokens ?? 0
+  const inputTokens = response.usage?.input_tokens ?? 0
+  const outputTokens = response.usage?.output_tokens ?? 0
+  const inputCachedTokens = response.usage?.input_tokens_details?.cached_tokens
 
   return {
-    input_tokens: promptTokens,
-    output_tokens: completionTokens,
+    input_tokens: inputTokens - (inputCachedTokens ?? 0),
+    output_tokens: outputTokens,
+    ...(response.usage?.input_tokens_details?.cached_tokens !== undefined && {
+      cache_read_input_tokens:
+        response.usage.input_tokens_details.cached_tokens,
+    }),
   }
 }
 

From 619d4828ad6dc54842f2b34fd7cd4d13a976fe3d Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Tue, 7 Oct 2025 20:57:05 +0800
Subject: [PATCH 10/62] feat: improve event log and enhance reasoning content
 handling by adding signature check and update prompt

---
 src/routes/messages/handler.ts               | 13 ++++++-----
 src/routes/messages/responses-translation.ts | 23 +++++++++++++++-----
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index a3faf0454..cf50d8f77 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -138,28 +138,29 @@ const handleWithResponsesApi = async (
       const streamState = createResponsesStreamState()
 
       for await (const chunk of response) {
-        consola.debug("Responses raw stream event:", JSON.stringify(chunk))
-
-        const eventName = (chunk as { event?: string }).event
+        const eventName = chunk.event
         if (eventName === "ping") {
           await stream.writeSSE({ event: "ping", data: "" })
           continue
         }
 
-        const data = (chunk as { data?: string }).data
+        const data = chunk.data
         if (!data) {
           continue
         }
 
+        consola.debug("Responses raw stream event:", data)
+
         const events = translateResponsesStreamEvent(
           JSON.parse(data) as ResponseStreamEvent,
           streamState,
         )
         for (const event of events) {
-          consola.debug("Translated Anthropic event:", JSON.stringify(event))
+          const eventData = JSON.stringify(event)
+          consola.debug("Translated Anthropic event:", eventData)
           await stream.writeSSE({
             event: event.type,
-            data: JSON.stringify(event),
+            data: eventData,
           })
         }
       }
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index ceb5e301a..9084fc78d 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -142,7 +142,11 @@ const translateAssistantMessage = (
       continue
     }
 
-    if (block.type === "thinking") {
+    if (
+      block.type === "thinking"
+      && block.signature
+      && block.signature.includes("@")
+    ) {
       flushPendingContent("assistant", pendingContent, items)
       items.push(createReasoningContent(block))
       continue
@@ -233,16 +237,21 @@ const createImageContent = (
 const createReasoningContent = (
   block: AnthropicThinkingBlock,
 ): ResponseInputReasoning => {
-  // allign with vscode-copilot-chat extractThinkingData, otherwise it will cause miss cache occasionally —— the usage input cached tokens to be 0
+  // align with vscode-copilot-chat extractThinkingData, should add id, otherwise it will cause miss cache occasionally —— the usage input cached tokens to be 0
   // https://github.com/microsoft/vscode-copilot-chat/blob/main/src/platform/endpoint/node/responsesApi.ts#L162
   // when use in codex cli, reasoning id is empty, so it will cause miss cache occasionally
   const array = block.signature.split("@")
   const signature = array[0]
-  const id = array.length > 1 ? array[1] : undefined
+  const id = array[1]
   return {
     id,
     type: "reasoning",
-    summary: [],
+    summary: [
+      {
+        type: "summary_text",
+        text: block.thinking,
+      },
+    ],
     encrypted_content: signature,
   }
 }
@@ -284,9 +293,11 @@ When using the BashOutput tool, follow these rules:
 - Only Bash Tool run_in_background set to true, Use BashOutput to read the output later
 ### TodoWrite tool
 When using the TodoWrite tool, follow these rules:
-- Skip using the TodoWrite tool for simple or straightforward tasks (roughly the easiest 25%).
+- Skip using the TodoWrite tool for tasks with three or fewer steps.
 - Do not make single-step todo lists.
-- When you made a todo, update it after having performed one of the sub-tasks that you shared on the todo list.`
+- When you made a todo, update it after having performed one of the sub-tasks that you shared on the todo list.
+## Special user requests
+- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as ''date''), you should do so.`
 
   if (typeof system === "string") {
     return system + toolUsePrompt

From 5c6e4c6fae09356d60652a70a03a8dad3c59ad64 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Wed, 8 Oct 2025 00:40:34 +0800
Subject: [PATCH 11/62] 1.fix claude code 2.0.28 warmup request consume premium
 request, forcing small model if no tools are used 2.add bun idleTimeout = 0
 3.feat: Compatible with Claude code JSONL file usage error scenarios, delay
 closeBlockIfOpen and map responses api to anthropic  support tool_use and fix
 spelling errors 4.feat: add configuration management with extra prompt
 handling and ensure config file creation

---
 src/lib/config.ts                             | 87 +++++++++++++++++++
 src/lib/paths.ts                              |  3 +
 src/routes/messages/handler.ts                |  6 ++
 .../messages/responses-stream-translation.ts  | 21 +++--
 src/routes/messages/responses-translation.ts  | 30 +++----
 src/routes/responses/handler.ts               | 30 ++-----
 src/start.ts                                  |  3 +
 tests/responses-stream-translation.test.ts    | 14 +--
 8 files changed, 130 insertions(+), 64 deletions(-)
 create mode 100644 src/lib/config.ts

diff --git a/src/lib/config.ts b/src/lib/config.ts
new file mode 100644
index 000000000..d39e4684d
--- /dev/null
+++ b/src/lib/config.ts
@@ -0,0 +1,87 @@
+import consola from "consola"
+import fs from "node:fs"
+
+import { PATHS } from "./paths"
+
+export interface AppConfig {
+  extraPrompts?: Record<string, string>
+  smallModel?: string
+}
+
+const defaultConfig: AppConfig = {
+  extraPrompts: {
+    "gpt-5-codex": `
+## Tool use
+- You have access to many tools. If a tool exists to perform a specific task, you MUST use that tool instead of running a terminal command to perform that task.
+### Bash tool
+When using the Bash tool, follow these rules:
+- always run_in_background set to false, unless you are running a long-running command (e.g., a server or a watch command).
+### BashOutput tool
+When using the BashOutput tool, follow these rules:
+- Only Bash Tool run_in_background set to true, Use BashOutput to read the output later
+### TodoWrite tool
+When using the TodoWrite tool, follow these rules:
+- Skip using the TodoWrite tool for tasks with three or fewer steps.
+- Do not make single-step todo lists.
+- When you made a todo, update it after having performed one of the sub-tasks that you shared on the todo list.
+## Special user requests
+- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as 'date'), you should do so.
+`,
+  },
+  smallModel: "gpt-5-mini",
+}
+
+let cachedConfig: AppConfig | null = null
+
+function ensureConfigFile(): void {
+  try {
+    fs.accessSync(PATHS.CONFIG_PATH, fs.constants.R_OK | fs.constants.W_OK)
+  } catch {
+    fs.writeFileSync(
+      PATHS.CONFIG_PATH,
+      `${JSON.stringify(defaultConfig, null, 2)}\n`,
+      "utf8",
+    )
+    try {
+      fs.chmodSync(PATHS.CONFIG_PATH, 0o600)
+    } catch {
+      return
+    }
+  }
+}
+
+function readConfigFromDisk(): AppConfig {
+  ensureConfigFile()
+  try {
+    const raw = fs.readFileSync(PATHS.CONFIG_PATH, "utf8")
+    if (!raw.trim()) {
+      fs.writeFileSync(
+        PATHS.CONFIG_PATH,
+        `${JSON.stringify(defaultConfig, null, 2)}\n`,
+        "utf8",
+      )
+      return defaultConfig
+    }
+    return JSON.parse(raw) as AppConfig
+  } catch (error) {
+    consola.error("Failed to read config file, using default config", error)
+    return defaultConfig
+  }
+}
+
+export function getConfig(): AppConfig {
+  if (!cachedConfig) {
+    cachedConfig = readConfigFromDisk()
+  }
+  return cachedConfig
+}
+
+export function getExtraPromptForModel(model: string): string {
+  const config = getConfig()
+  return config.extraPrompts?.[model] ?? ""
+}
+
+export function getSmallModel(): string {
+  const config = getConfig()
+  return config.smallModel ?? "gpt-5-mini"
+}
diff --git a/src/lib/paths.ts b/src/lib/paths.ts
index 8d0a9f02b..e85c21d8a 100644
--- a/src/lib/paths.ts
+++ b/src/lib/paths.ts
@@ -5,15 +5,18 @@ import path from "node:path"
 const APP_DIR = path.join(os.homedir(), ".local", "share", "copilot-api")
 
 const GITHUB_TOKEN_PATH = path.join(APP_DIR, "github_token")
+const CONFIG_PATH = path.join(APP_DIR, "config.json")
 
 export const PATHS = {
   APP_DIR,
   GITHUB_TOKEN_PATH,
+  CONFIG_PATH,
 }
 
 export async function ensurePaths(): Promise<void> {
   await fs.mkdir(PATHS.APP_DIR, { recursive: true })
   await ensureFile(PATHS.GITHUB_TOKEN_PATH)
+  await ensureFile(PATHS.CONFIG_PATH)
 }
 
 async function ensureFile(filePath: string): Promise<void> {
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index cf50d8f77..f6dd033d3 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -4,6 +4,7 @@ import consola from "consola"
 import { streamSSE } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
+import { getSmallModel } from "~/lib/config"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import {
@@ -42,6 +43,11 @@ export async function handleCompletion(c: Context) {
   const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
   consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
 
+  // fix claude code 2.0.28 warmup request consume premium request, forcing small model if no tools are used
+  if (!anthropicPayload.tools || anthropicPayload.tools.length === 0) {
+    anthropicPayload.model = getSmallModel()
+  }
+
   const useResponsesApi = shouldUseResponsesApi(anthropicPayload.model)
 
   if (state.manualApprove) {
diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
index 56fd536e3..de4b0c3a4 100644
--- a/src/routes/messages/responses-stream-translation.ts
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -176,8 +176,6 @@ const handleOutputItemDone = (
     state.blockHasDelta.add(blockIndex)
   }
 
-  closeBlockIfOpen(state, blockIndex, events)
-
   return events
 }
 
@@ -232,7 +230,6 @@ const handleFunctionCallArgumentsDone = (
     state.blockHasDelta.add(blockIndex)
   }
 
-  closeBlockIfOpen(state, blockIndex, events)
   state.functionCallStateByOutputIndex.delete(outputIndex)
   return events
 }
@@ -340,8 +337,6 @@ const handleOutputTextDone = (
     })
   }
 
-  closeBlockIfOpen(state, blockIndex, events)
-
   return events
 }
 
@@ -421,9 +416,7 @@ const messageStart = (
         usage: {
           input_tokens: inputTokens,
           output_tokens: 0,
-          ...(inputCachedTokens !== undefined && {
-            cache_creation_input_tokens: inputCachedTokens,
-          }),
+          cache_read_input_tokens: inputCachedTokens ?? 0,
         },
       },
     },
@@ -449,6 +442,7 @@ const openTextBlockIfNeeded = (
   }
 
   if (!state.openBlocks.has(blockIndex)) {
+    closeOpenBlocks(state, events)
     events.push({
       type: "content_block_start",
       index: blockIndex,
@@ -480,6 +474,7 @@ const openThinkingBlockIfNeeded = (
   }
 
   if (!state.openBlocks.has(blockIndex)) {
+    closeOpenBlocks(state, events)
     events.push({
       type: "content_block_start",
       index: blockIndex,
@@ -508,13 +503,20 @@ const closeBlockIfOpen = (
   state.blockHasDelta.delete(blockIndex)
 }
 
-const closeAllOpenBlocks = (
+const closeOpenBlocks = (
   state: ResponsesStreamState,
   events: Array<AnthropicStreamEventData>,
 ) => {
   for (const blockIndex of state.openBlocks) {
     closeBlockIfOpen(state, blockIndex, events)
   }
+}
+
+const closeAllOpenBlocks = (
+  state: ResponsesStreamState,
+  events: Array<AnthropicStreamEventData>,
+) => {
+  closeOpenBlocks(state, events)
 
   state.functionCallStateByOutputIndex.clear()
 }
@@ -562,6 +564,7 @@ const openFunctionCallBlock = (
   const { blockIndex } = functionCallState
 
   if (!state.openBlocks.has(blockIndex)) {
+    closeOpenBlocks(state, events)
     events.push({
       type: "content_block_start",
       index: blockIndex,
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index 9084fc78d..d15931989 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -1,5 +1,6 @@
 import consola from "consola"
 
+import { getExtraPromptForModel } from "~/lib/config"
 import {
   type ResponsesPayload,
   type ResponseInputContent,
@@ -60,8 +61,8 @@ export const translateAnthropicMessagesToResponsesPayload = (
   const responsesPayload: ResponsesPayload = {
     model: payload.model,
     input,
-    instructions: translateSystemPrompt(payload.system),
-    temperature: payload.temperature ?? null,
+    instructions: translateSystemPrompt(payload.system, payload.model),
+    temperature: 1, // reasoning high temperature fixed to 1
     top_p: payload.top_p ?? null,
     max_output_tokens: payload.max_tokens,
     tools: translatedTools,
@@ -277,36 +278,22 @@ const createFunctionCallOutput = (
 
 const translateSystemPrompt = (
   system: string | Array<AnthropicTextBlock> | undefined,
+  model: string,
 ): string | null => {
   if (!system) {
     return null
   }
 
-  const toolUsePrompt = `
-## Tool use
-- You have access to many tools. If a tool exists to perform a specific task, you MUST use that tool instead of running a terminal command to perform that task.
-### Bash tool
-When using the Bash tool, follow these rules:
-- always run_in_background set to false, unless you are running a long-running command (e.g., a server or a watch command).
-### BashOutput tool
-When using the BashOutput tool, follow these rules:
-- Only Bash Tool run_in_background set to true, Use BashOutput to read the output later
-### TodoWrite tool
-When using the TodoWrite tool, follow these rules:
-- Skip using the TodoWrite tool for tasks with three or fewer steps.
-- Do not make single-step todo lists.
-- When you made a todo, update it after having performed one of the sub-tasks that you shared on the todo list.
-## Special user requests
-- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as ''date''), you should do so.`
+  const extraPrompt = getExtraPromptForModel(model)
 
   if (typeof system === "string") {
-    return system + toolUsePrompt
+    return system + extraPrompt
   }
 
   const text = system
     .map((block, index) => {
       if (index === 0) {
-        return block.text + toolUsePrompt
+        return block.text + extraPrompt
       }
       return block.text
     })
@@ -548,6 +535,9 @@ const mapResponsesStopReason = (
   const { status, incomplete_details: incompleteDetails } = response
 
   if (status === "completed") {
+    if (response.output.some((item) => item.type === "function_call")) {
+      return "tool_use"
+    }
     return "end_turn"
   }
 
diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
index d06d02d67..ef7b38b93 100644
--- a/src/routes/responses/handler.ts
+++ b/src/routes/responses/handler.ts
@@ -52,29 +52,13 @@ export const handleResponses = async (c: Context) => {
   if (isStreamingRequested(payload) && isAsyncIterable(response)) {
     consola.debug("Forwarding native Responses stream")
     return streamSSE(c, async (stream) => {
-      const pingInterval = setInterval(async () => {
-        try {
-          await stream.writeSSE({
-            event: "ping",
-            data: JSON.stringify({ timestamp: Date.now() }),
-          })
-        } catch (error) {
-          consola.warn("Failed to send ping:", error)
-          clearInterval(pingInterval)
-        }
-      }, 3000)
-
-      try {
-        for await (const chunk of response) {
-          consola.debug("Responses stream chunk:", JSON.stringify(chunk))
-          await stream.writeSSE({
-            id: (chunk as { id?: string }).id,
-            event: (chunk as { event?: string }).event,
-            data: (chunk as { data?: string }).data ?? "",
-          })
-        }
-      } finally {
-        clearInterval(pingInterval)
+      for await (const chunk of response) {
+        consola.debug("Responses stream chunk:", JSON.stringify(chunk))
+        await stream.writeSSE({
+          id: (chunk as { id?: string }).id,
+          event: (chunk as { event?: string }).event,
+          data: (chunk as { data?: string }).data ?? "",
+        })
       }
     })
   }
diff --git a/src/start.ts b/src/start.ts
index 46798ad87..b832bddd4 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -111,6 +111,9 @@ export async function runServer(options: RunServerOptions): Promise<void> {
   serve({
     fetch: server.fetch as ServerHandler,
     port: options.port,
+    bun: {
+      idleTimeout: 0,
+    },
   })
 }
 
diff --git a/tests/responses-stream-translation.test.ts b/tests/responses-stream-translation.test.ts
index 039411cf7..885ac9113 100644
--- a/tests/responses-stream-translation.test.ts
+++ b/tests/responses-stream-translation.test.ts
@@ -96,12 +96,7 @@ describe("translateResponsesStreamEvent tool calls", () => {
       partial_json: "[]}",
     })
 
-    const blockStop = events.find(
-      (event) => event.type === "content_block_stop",
-    )
-    expect(blockStop).toBeDefined()
-
-    expect(state.openBlocks.size).toBe(0)
+    expect(state.openBlocks.size).toBe(1)
     expect(state.functionCallStateByOutputIndex.size).toBe(0)
   })
 
@@ -139,12 +134,7 @@ describe("translateResponsesStreamEvent tool calls", () => {
         '{"todos":[{"content":"Review src/routes/responses/translation.ts"}]}',
     })
 
-    const blockStop = events.find(
-      (event) => event.type === "content_block_stop",
-    )
-    expect(blockStop).toBeDefined()
-
-    expect(state.openBlocks.size).toBe(0)
+    expect(state.openBlocks.size).toBe(1)
     expect(state.functionCallStateByOutputIndex.size).toBe(0)
   })
 })

From 32cb10a1ce2deffdc4a2aa5b500339aa03d2528b Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Mon, 3 Nov 2025 11:13:50 +0800
Subject: [PATCH 12/62] fix: the cluade code small model where max_tokens is
 only 512, which is incompatible with gpt-5-mini

---
 src/routes/messages/responses-translation.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index d15931989..5c9728cdf 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -64,7 +64,7 @@ export const translateAnthropicMessagesToResponsesPayload = (
     instructions: translateSystemPrompt(payload.system, payload.model),
     temperature: 1, // reasoning high temperature fixed to 1
     top_p: payload.top_p ?? null,
-    max_output_tokens: payload.max_tokens,
+    max_output_tokens: Math.max(payload.max_tokens, 12800),
     tools: translatedTools,
     tool_choice: toolChoice,
     metadata: payload.metadata ? { ...payload.metadata } : null,

From 9051a213a2f3b9f862b88dd2992b1cfea492ee14 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Mon, 3 Nov 2025 12:41:16 +0800
Subject: [PATCH 13/62] feat: add model reasoning efforts configuration and
 integrate into message translation

---
 src/lib/config.ts                            | 13 ++++++++++++-
 src/routes/messages/responses-translation.ts | 10 ++++++++--
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/src/lib/config.ts b/src/lib/config.ts
index d39e4684d..19642e99e 100644
--- a/src/lib/config.ts
+++ b/src/lib/config.ts
@@ -6,6 +6,7 @@ import { PATHS } from "./paths"
 export interface AppConfig {
   extraPrompts?: Record<string, string>
   smallModel?: string
+  modelReasoningEfforts?: Record<string, "minimal" | "low" | "medium" | "high">
 }
 
 const defaultConfig: AppConfig = {
@@ -29,6 +30,9 @@ When using the TodoWrite tool, follow these rules:
 `,
   },
   smallModel: "gpt-5-mini",
+  modelReasoningEfforts: {
+    "gpt-5-mini": "low",
+  },
 }
 
 let cachedConfig: AppConfig | null = null
@@ -71,7 +75,7 @@ function readConfigFromDisk(): AppConfig {
 
 export function getConfig(): AppConfig {
   if (!cachedConfig) {
-    cachedConfig = readConfigFromDisk()
+    cachedConfig ??= readConfigFromDisk()
   }
   return cachedConfig
 }
@@ -85,3 +89,10 @@ export function getSmallModel(): string {
   const config = getConfig()
   return config.smallModel ?? "gpt-5-mini"
 }
+
+export function getReasoningEffortForModel(
+  model: string,
+): "minimal" | "low" | "medium" | "high" {
+  const config = getConfig()
+  return config.modelReasoningEfforts?.[model] ?? "high"
+}
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index 5c9728cdf..c8e9460b8 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -1,6 +1,9 @@
 import consola from "consola"
 
-import { getExtraPromptForModel } from "~/lib/config"
+import {
+  getExtraPromptForModel,
+  getReasoningEffortForModel,
+} from "~/lib/config"
 import {
   type ResponsesPayload,
   type ResponseInputContent,
@@ -73,7 +76,10 @@ export const translateAnthropicMessagesToResponsesPayload = (
     stream: payload.stream ?? null,
     store: false,
     parallel_tool_calls: true,
-    reasoning: { effort: "high", summary: "detailed" },
+    reasoning: {
+      effort: getReasoningEffortForModel(payload.model),
+      summary: "detailed",
+    },
     include: ["reasoning.encrypted_content"],
   }
 

From eeeb820d1ac8c6df3f9d892bbd4137d481c69784 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Mon, 3 Nov 2025 12:56:30 +0800
Subject: [PATCH 14/62] fix: ensure application directory is created when
 config file is missing

---
 src/lib/config.ts | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/lib/config.ts b/src/lib/config.ts
index 19642e99e..a13b3fe37 100644
--- a/src/lib/config.ts
+++ b/src/lib/config.ts
@@ -41,6 +41,7 @@ function ensureConfigFile(): void {
   try {
     fs.accessSync(PATHS.CONFIG_PATH, fs.constants.R_OK | fs.constants.W_OK)
   } catch {
+    fs.mkdirSync(PATHS.APP_DIR, { recursive: true })
     fs.writeFileSync(
       PATHS.CONFIG_PATH,
       `${JSON.stringify(defaultConfig, null, 2)}\n`,
@@ -74,9 +75,7 @@ function readConfigFromDisk(): AppConfig {
 }
 
 export function getConfig(): AppConfig {
-  if (!cachedConfig) {
-    cachedConfig ??= readConfigFromDisk()
-  }
+  cachedConfig ??= readConfigFromDisk()
   return cachedConfig
 }
 

From 3f69f131860abad11c30c8bd1d8094d5a69c7c92 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Wed, 29 Oct 2025 15:19:10 +0800
Subject: [PATCH 15/62] feat: consola file logger for handler.ts

---
 src/lib/logger.ts                      | 179 +++++++++++++++++++++++++
 src/routes/chat-completions/handler.ts |  20 +--
 src/routes/messages/handler.ts         |  32 ++---
 src/routes/responses/handler.ts        |  12 +-
 4 files changed, 214 insertions(+), 29 deletions(-)
 create mode 100644 src/lib/logger.ts

diff --git a/src/lib/logger.ts b/src/lib/logger.ts
new file mode 100644
index 000000000..466b4cabe
--- /dev/null
+++ b/src/lib/logger.ts
@@ -0,0 +1,179 @@
+import consola, { type ConsolaInstance } from "consola"
+import fs from "node:fs"
+import path from "node:path"
+import util from "node:util"
+
+import { PATHS } from "./paths"
+
+const LOG_RETENTION_DAYS = 7
+const LOG_RETENTION_MS = LOG_RETENTION_DAYS * 24 * 60 * 60 * 1000
+const CLEANUP_INTERVAL_MS = 24 * 60 * 60 * 1000
+const LOG_DIR = path.join(PATHS.APP_DIR, "logs")
+const FLUSH_INTERVAL_MS = 1000
+const MAX_BUFFER_SIZE = 100
+
+const logStreams = new Map<string, fs.WriteStream>()
+const logBuffers = new Map<string, Array<string>>()
+
+const ensureLogDirectory = () => {
+  if (!fs.existsSync(LOG_DIR)) {
+    fs.mkdirSync(LOG_DIR, { recursive: true })
+  }
+}
+
+const cleanupOldLogs = () => {
+  if (!fs.existsSync(LOG_DIR)) {
+    return
+  }
+
+  const now = Date.now()
+
+  for (const entry of fs.readdirSync(LOG_DIR)) {
+    const filePath = path.join(LOG_DIR, entry)
+
+    let stats: fs.Stats
+    try {
+      stats = fs.statSync(filePath)
+    } catch {
+      continue
+    }
+
+    if (!stats.isFile()) {
+      continue
+    }
+
+    if (now - stats.mtimeMs > LOG_RETENTION_MS) {
+      try {
+        fs.rmSync(filePath)
+      } catch {
+        continue
+      }
+    }
+  }
+}
+
+const formatArgs = (args: Array<unknown>) =>
+  args
+    .map((arg) =>
+      typeof arg === "string" ? arg : (
+        util.inspect(arg, { depth: null, colors: false })
+      ),
+    )
+    .join(" ")
+
+const sanitizeName = (name: string) => {
+  const normalized = name
+    .toLowerCase()
+    .replaceAll(/[^a-z0-9]+/g, "-")
+    .replaceAll(/^-+|-+$/g, "")
+
+  return normalized === "" ? "handler" : normalized
+}
+
+const getLogStream = (filePath: string): fs.WriteStream => {
+  let stream = logStreams.get(filePath)
+  if (!stream || stream.destroyed) {
+    stream = fs.createWriteStream(filePath, { flags: "a" })
+    logStreams.set(filePath, stream)
+
+    stream.on("error", (error: unknown) => {
+      console.warn("Log stream error", error)
+      logStreams.delete(filePath)
+    })
+  }
+  return stream
+}
+
+const flushBuffer = (filePath: string) => {
+  const buffer = logBuffers.get(filePath)
+  if (!buffer || buffer.length === 0) {
+    return
+  }
+
+  const stream = getLogStream(filePath)
+  const content = buffer.join("\n") + "\n"
+  stream.write(content, (error) => {
+    if (error) {
+      console.warn("Failed to write handler log", error)
+    }
+  })
+
+  logBuffers.set(filePath, [])
+}
+
+const flushAllBuffers = () => {
+  for (const filePath of logBuffers.keys()) {
+    flushBuffer(filePath)
+  }
+}
+
+const appendLine = (filePath: string, line: string) => {
+  let buffer = logBuffers.get(filePath)
+  if (!buffer) {
+    buffer = []
+    logBuffers.set(filePath, buffer)
+  }
+
+  buffer.push(line)
+
+  if (buffer.length >= MAX_BUFFER_SIZE) {
+    flushBuffer(filePath)
+  }
+}
+
+setInterval(flushAllBuffers, FLUSH_INTERVAL_MS)
+
+const cleanup = () => {
+  flushAllBuffers()
+  for (const stream of logStreams.values()) {
+    stream.end()
+  }
+  logStreams.clear()
+  logBuffers.clear()
+}
+
+process.on("exit", cleanup)
+process.on("SIGINT", () => {
+  cleanup()
+  process.exit(0)
+})
+process.on("SIGTERM", () => {
+  cleanup()
+  process.exit(0)
+})
+
+let lastCleanup = 0
+
+export const createHandlerLogger = (name: string): ConsolaInstance => {
+  ensureLogDirectory()
+
+  const sanitizedName = sanitizeName(name)
+  const instance = consola.withTag(name)
+
+  instance.level = 5
+  instance.setReporters([])
+
+  instance.addReporter({
+    log(logObj) {
+      ensureLogDirectory()
+
+      if (Date.now() - lastCleanup > CLEANUP_INTERVAL_MS) {
+        cleanupOldLogs()
+        lastCleanup = Date.now()
+      }
+
+      const date = logObj.date
+      const dateKey = date.toLocaleDateString("sv-SE")
+      const timestamp = date.toLocaleString("sv-SE", { hour12: false })
+      const filePath = path.join(LOG_DIR, `${sanitizedName}-${dateKey}.log`)
+      const message = formatArgs(logObj.args as Array<unknown>)
+      const line = `[${timestamp}] [${logObj.type}] [${logObj.tag || name}]${
+        message ? ` ${message}` : ""
+      }`
+
+      appendLine(filePath, line)
+    },
+  })
+
+  return instance
+}
diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
index 04a5ae9ed..3a037a523 100644
--- a/src/routes/chat-completions/handler.ts
+++ b/src/routes/chat-completions/handler.ts
@@ -1,9 +1,9 @@
 import type { Context } from "hono"
 
-import consola from "consola"
 import { streamSSE, type SSEMessage } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
+import { createHandlerLogger } from "~/lib/logger"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
@@ -14,11 +14,13 @@ import {
   type ChatCompletionsPayload,
 } from "~/services/copilot/create-chat-completions"
 
+const logger = createHandlerLogger("chat-completions-handler")
+
 export async function handleCompletion(c: Context) {
   await checkRateLimit(state)
 
   let payload = await c.req.json<ChatCompletionsPayload>()
-  consola.debug("Request payload:", JSON.stringify(payload).slice(-400))
+  logger.debug("Request payload:", JSON.stringify(payload).slice(-400))
 
   // Find the selected model
   const selectedModel = state.models?.data.find(
@@ -29,12 +31,12 @@ export async function handleCompletion(c: Context) {
   try {
     if (selectedModel) {
       const tokenCount = await getTokenCount(payload, selectedModel)
-      consola.info("Current token count:", tokenCount)
+      logger.info("Current token count:", tokenCount)
     } else {
-      consola.warn("No model selected, skipping token count calculation")
+      logger.warn("No model selected, skipping token count calculation")
     }
   } catch (error) {
-    consola.warn("Failed to calculate token count:", error)
+    logger.warn("Failed to calculate token count:", error)
   }
 
   if (state.manualApprove) await awaitApproval()
@@ -44,20 +46,20 @@ export async function handleCompletion(c: Context) {
       ...payload,
       max_tokens: selectedModel?.capabilities.limits.max_output_tokens,
     }
-    consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
+    logger.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
   }
 
   const response = await createChatCompletions(payload)
 
   if (isNonStreaming(response)) {
-    consola.debug("Non-streaming response:", JSON.stringify(response))
+    logger.debug("Non-streaming response:", JSON.stringify(response))
     return c.json(response)
   }
 
-  consola.debug("Streaming response")
+  logger.debug("Streaming response")
   return streamSSE(c, async (stream) => {
     for await (const chunk of response) {
-      consola.debug("Streaming chunk:", JSON.stringify(chunk))
+      logger.debug("Streaming chunk:", JSON.stringify(chunk))
       await stream.writeSSE(chunk as SSEMessage)
     }
   })
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index f6dd033d3..7fcccd83b 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -1,10 +1,10 @@
 import type { Context } from "hono"
 
-import consola from "consola"
 import { streamSSE } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
 import { getSmallModel } from "~/lib/config"
+import { createHandlerLogger } from "~/lib/logger"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import {
@@ -37,11 +37,13 @@ import {
 } from "./non-stream-translation"
 import { translateChunkToAnthropicEvents } from "./stream-translation"
 
+const logger = createHandlerLogger("messages-handler")
+
 export async function handleCompletion(c: Context) {
   await checkRateLimit(state)
 
   const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
-  consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
+  logger.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
 
   // fix claude code 2.0.28 warmup request consume premium request, forcing small model if no tools are used
   if (!anthropicPayload.tools || anthropicPayload.tools.length === 0) {
@@ -68,7 +70,7 @@ const handleWithChatCompletions = async (
   anthropicPayload: AnthropicMessagesPayload,
 ) => {
   const openAIPayload = translateToOpenAI(anthropicPayload)
-  consola.debug(
+  logger.debug(
     "Translated OpenAI request payload:",
     JSON.stringify(openAIPayload),
   )
@@ -76,19 +78,19 @@ const handleWithChatCompletions = async (
   const response = await createChatCompletions(openAIPayload)
 
   if (isNonStreaming(response)) {
-    consola.debug(
+    logger.debug(
       "Non-streaming response from Copilot:",
       JSON.stringify(response).slice(-400),
     )
     const anthropicResponse = translateToAnthropic(response)
-    consola.debug(
+    logger.debug(
       "Translated Anthropic response:",
       JSON.stringify(anthropicResponse),
     )
     return c.json(anthropicResponse)
   }
 
-  consola.debug("Streaming response from Copilot")
+  logger.debug("Streaming response from Copilot")
   return streamSSE(c, async (stream) => {
     const streamState: AnthropicStreamState = {
       messageStartSent: false,
@@ -98,7 +100,7 @@ const handleWithChatCompletions = async (
     }
 
     for await (const rawEvent of response) {
-      consola.debug("Copilot raw stream event:", JSON.stringify(rawEvent))
+      logger.debug("Copilot raw stream event:", JSON.stringify(rawEvent))
       if (rawEvent.data === "[DONE]") {
         break
       }
@@ -111,7 +113,7 @@ const handleWithChatCompletions = async (
       const events = translateChunkToAnthropicEvents(chunk, streamState)
 
       for (const event of events) {
-        consola.debug("Translated Anthropic event:", JSON.stringify(event))
+        logger.debug("Translated Anthropic event:", JSON.stringify(event))
         await stream.writeSSE({
           event: event.type,
           data: JSON.stringify(event),
@@ -127,7 +129,7 @@ const handleWithResponsesApi = async (
 ) => {
   const responsesPayload =
     translateAnthropicMessagesToResponsesPayload(anthropicPayload)
-  consola.debug(
+  logger.debug(
     "Translated Responses payload:",
     JSON.stringify(responsesPayload),
   )
@@ -139,7 +141,7 @@ const handleWithResponsesApi = async (
   })
 
   if (responsesPayload.stream && isAsyncIterable(response)) {
-    consola.debug("Streaming response from Copilot (Responses API)")
+    logger.debug("Streaming response from Copilot (Responses API)")
     return streamSSE(c, async (stream) => {
       const streamState = createResponsesStreamState()
 
@@ -155,7 +157,7 @@ const handleWithResponsesApi = async (
           continue
         }
 
-        consola.debug("Responses raw stream event:", data)
+        logger.debug("Responses raw stream event:", data)
 
         const events = translateResponsesStreamEvent(
           JSON.parse(data) as ResponseStreamEvent,
@@ -163,7 +165,7 @@ const handleWithResponsesApi = async (
         )
         for (const event of events) {
           const eventData = JSON.stringify(event)
-          consola.debug("Translated Anthropic event:", eventData)
+          logger.debug("Translated Anthropic event:", eventData)
           await stream.writeSSE({
             event: event.type,
             data: eventData,
@@ -172,7 +174,7 @@ const handleWithResponsesApi = async (
       }
 
       if (!streamState.messageCompleted) {
-        consola.warn(
+        logger.warn(
           "Responses stream ended without completion; sending fallback message_stop",
         )
         const fallback = { type: "message_stop" as const }
@@ -184,14 +186,14 @@ const handleWithResponsesApi = async (
     })
   }
 
-  consola.debug(
+  logger.debug(
     "Non-streaming Responses result:",
     JSON.stringify(response).slice(-400),
   )
   const anthropicResponse = translateResponsesResultToAnthropic(
     response as ResponsesResult,
   )
-  consola.debug(
+  logger.debug(
     "Translated Anthropic response:",
     JSON.stringify(anthropicResponse),
   )
diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
index ef7b38b93..574d61fcf 100644
--- a/src/routes/responses/handler.ts
+++ b/src/routes/responses/handler.ts
@@ -1,9 +1,9 @@
 import type { Context } from "hono"
 
-import consola from "consola"
 import { streamSSE } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
+import { createHandlerLogger } from "~/lib/logger"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import {
@@ -14,13 +14,15 @@ import {
 
 import { getResponsesRequestOptions } from "./utils"
 
+const logger = createHandlerLogger("responses-handler")
+
 const RESPONSES_ENDPOINT = "/responses"
 
 export const handleResponses = async (c: Context) => {
   await checkRateLimit(state)
 
   const payload = await c.req.json<ResponsesPayload>()
-  consola.debug("Responses request payload:", JSON.stringify(payload))
+  logger.debug("Responses request payload:", JSON.stringify(payload))
 
   const selectedModel = state.models?.data.find(
     (model) => model.id === payload.model,
@@ -50,10 +52,10 @@ export const handleResponses = async (c: Context) => {
   const response = await createResponses(payload, { vision, initiator })
 
   if (isStreamingRequested(payload) && isAsyncIterable(response)) {
-    consola.debug("Forwarding native Responses stream")
+    logger.debug("Forwarding native Responses stream")
     return streamSSE(c, async (stream) => {
       for await (const chunk of response) {
-        consola.debug("Responses stream chunk:", JSON.stringify(chunk))
+        logger.debug("Responses stream chunk:", JSON.stringify(chunk))
         await stream.writeSSE({
           id: (chunk as { id?: string }).id,
           event: (chunk as { event?: string }).event,
@@ -63,7 +65,7 @@ export const handleResponses = async (c: Context) => {
     })
   }
 
-  consola.debug(
+  logger.debug(
     "Forwarding native Responses result:",
     JSON.stringify(response).slice(-400),
   )

From 4c0d775e1dc6b8648c7ad5f21fb783fc3246facf Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Thu, 30 Oct 2025 10:42:32 +0800
Subject: [PATCH 16/62] fix: copolit function call returning infinite line
 breaks until max_tokens limit

---
 src/routes/messages/handler.ts                | 16 +++-
 .../messages/responses-stream-translation.ts  | 89 ++++++++++++++++++-
 2 files changed, 97 insertions(+), 8 deletions(-)

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 7fcccd83b..19ba01687 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -8,6 +8,7 @@ import { createHandlerLogger } from "~/lib/logger"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import {
+  buildErrorEvent,
   createResponsesStreamState,
   translateResponsesStreamEvent,
 } from "~/routes/messages/responses-stream-translation"
@@ -171,16 +172,23 @@ const handleWithResponsesApi = async (
             data: eventData,
           })
         }
+
+        if (streamState.messageCompleted) {
+          logger.debug("Message completed, ending stream")
+          break
+        }
       }
 
       if (!streamState.messageCompleted) {
         logger.warn(
-          "Responses stream ended without completion; sending fallback message_stop",
+          "Responses stream ended without completion; sending erorr event",
+        )
+        const errorEvent = buildErrorEvent(
+          "Responses stream ended without completion",
         )
-        const fallback = { type: "message_stop" as const }
         await stream.writeSSE({
-          event: fallback.type,
-          data: JSON.stringify(fallback),
+          event: errorEvent.type,
+          data: JSON.stringify(errorEvent),
         })
       }
     })
diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
index de4b0c3a4..524b2e17f 100644
--- a/src/routes/messages/responses-stream-translation.ts
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -1,5 +1,3 @@
-import consola from "consola"
-
 import {
   type ResponseCompletedEvent,
   type ResponseCreatedEvent,
@@ -21,6 +19,39 @@ import {
 import { type AnthropicStreamEventData } from "./anthropic-types"
 import { translateResponsesResultToAnthropic } from "./responses-translation"
 
+const MAX_CONSECUTIVE_FUNCTION_CALL_WHITESPACE = 20
+
+class FunctionCallArgumentsValidationError extends Error {
+  constructor(message: string) {
+    super(message)
+    this.name = "FunctionCallArgumentsValidationError"
+  }
+}
+
+const updateWhitespaceRunState = (
+  previousCount: number,
+  chunk: string,
+): {
+  nextCount: number
+  exceeded: boolean
+} => {
+  let count = previousCount
+
+  for (const char of chunk) {
+    if (char === " " || char === "\r" || char === "\n") {
+      count += 1
+      if (count > MAX_CONSECUTIVE_FUNCTION_CALL_WHITESPACE) {
+        return { nextCount: count, exceeded: true }
+      }
+      continue
+    }
+
+    count = 0
+  }
+
+  return { nextCount: count, exceeded: false }
+}
+
 export interface ResponsesStreamState {
   messageStartSent: boolean
   messageCompleted: boolean
@@ -35,6 +66,7 @@ type FunctionCallStreamState = {
   blockIndex: number
   toolCallId: string
   name: string
+  consecutiveWhitespaceCount: number
 }
 
 export const createResponsesStreamState = (): ResponsesStreamState => ({
@@ -102,7 +134,6 @@ export const translateResponsesStreamEvent = (
     }
 
     default: {
-      consola.debug("Unknown Responses stream event type:", eventType)
       return []
     }
   }
@@ -186,11 +217,45 @@ const handleFunctionCallArgumentsDelta = (
   const events = new Array<AnthropicStreamEventData>()
   const outputIndex = rawEvent.output_index
   const deltaText = rawEvent.delta
+
+  if (!deltaText) {
+    return events
+  }
+
   const blockIndex = openFunctionCallBlock(state, {
     outputIndex,
     events,
   })
 
+  const functionCallState =
+    state.functionCallStateByOutputIndex.get(outputIndex)
+  if (!functionCallState) {
+    return handleFunctionCallArgumentsValidationError(
+      new FunctionCallArgumentsValidationError(
+        "Received function call arguments delta without an open tool call block.",
+      ),
+      state,
+      events,
+    )
+  }
+
+  // fix: copolit function call returning infinite line breaks until max_tokens limit
+  // "arguments": "{\"path\":\"xxx\",\"pattern\":\"**/*.ts\",\"} }? Wait extra braces. Need correct. I should run? Wait overcame. Need proper JSON with pattern \"\n\n\n\n\n\n\n\n...
+  const { nextCount, exceeded } = updateWhitespaceRunState(
+    functionCallState.consecutiveWhitespaceCount,
+    deltaText,
+  )
+  if (exceeded) {
+    return handleFunctionCallArgumentsValidationError(
+      new FunctionCallArgumentsValidationError(
+        "Received function call arguments delta containing more than 20 consecutive whitespace characters.",
+      ),
+      state,
+      events,
+    )
+  }
+  functionCallState.consecutiveWhitespaceCount = nextCount
+
   events.push({
     type: "content_block_delta",
     index: blockIndex,
@@ -394,6 +459,21 @@ const handleErrorEvent = (
   return [buildErrorEvent(message)]
 }
 
+const handleFunctionCallArgumentsValidationError = (
+  error: FunctionCallArgumentsValidationError,
+  state: ResponsesStreamState,
+  events: Array<AnthropicStreamEventData> = [],
+): Array<AnthropicStreamEventData> => {
+  const reason = error.message
+
+  closeAllOpenBlocks(state, events)
+  state.messageCompleted = true
+
+  events.push(buildErrorEvent(reason))
+
+  return events
+}
+
 const messageStart = (
   state: ResponsesStreamState,
   response: ResponsesResult,
@@ -521,7 +601,7 @@ const closeAllOpenBlocks = (
   state.functionCallStateByOutputIndex.clear()
 }
 
-const buildErrorEvent = (message: string): AnthropicStreamEventData => ({
+export const buildErrorEvent = (message: string): AnthropicStreamEventData => ({
   type: "error",
   error: {
     type: "api_error",
@@ -556,6 +636,7 @@ const openFunctionCallBlock = (
       blockIndex,
       toolCallId: resolvedToolCallId,
       name: resolvedName,
+      consecutiveWhitespaceCount: 0,
     }
 
     state.functionCallStateByOutputIndex.set(outputIndex, functionCallState)

From 1ec12db6805443f4e6d55cb3027ca19e4c39c1be Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Mon, 3 Nov 2025 13:25:41 +0800
Subject: [PATCH 17/62] feat: add verbose logging configuration to enhance log
 detail level

---
 src/lib/logger.ts | 5 ++++-
 src/lib/state.ts  | 1 +
 src/start.ts      | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/lib/logger.ts b/src/lib/logger.ts
index 466b4cabe..93a3b01f0 100644
--- a/src/lib/logger.ts
+++ b/src/lib/logger.ts
@@ -4,6 +4,7 @@ import path from "node:path"
 import util from "node:util"
 
 import { PATHS } from "./paths"
+import { state } from "./state"
 
 const LOG_RETENTION_DAYS = 7
 const LOG_RETENTION_MS = LOG_RETENTION_DAYS * 24 * 60 * 60 * 1000
@@ -150,7 +151,9 @@ export const createHandlerLogger = (name: string): ConsolaInstance => {
   const sanitizedName = sanitizeName(name)
   const instance = consola.withTag(name)
 
-  instance.level = 5
+  if (state.verbose) {
+    instance.level = 5
+  }
   instance.setReporters([])
 
   instance.addReporter({
diff --git a/src/lib/state.ts b/src/lib/state.ts
index 5ba4dc1d1..7c20f3c38 100644
--- a/src/lib/state.ts
+++ b/src/lib/state.ts
@@ -15,6 +15,7 @@ export interface State {
   // Rate limiting configuration
   rateLimitSeconds?: number
   lastRequestTimestamp?: number
+  verbose?: boolean
 }
 
 export const state: State = {
diff --git a/src/start.ts b/src/start.ts
index b832bddd4..9cef69f2b 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -27,6 +27,7 @@ interface RunServerOptions {
 
 export async function runServer(options: RunServerOptions): Promise<void> {
   if (options.verbose) {
+    state.verbose = true
     consola.level = 5
     consola.info("Verbose logging enabled")
   }

From 174e868e2a3803da450a13438e5f42a058fc6bd6 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Mon, 3 Nov 2025 13:44:59 +0800
Subject: [PATCH 18/62] fix: update verbose property to be required in State
 interface and adjust runServer to set verbose level correctly

---
 src/lib/state.ts | 3 ++-
 src/start.ts     | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/lib/state.ts b/src/lib/state.ts
index 7c20f3c38..5d5bc2bb6 100644
--- a/src/lib/state.ts
+++ b/src/lib/state.ts
@@ -15,7 +15,7 @@ export interface State {
   // Rate limiting configuration
   rateLimitSeconds?: number
   lastRequestTimestamp?: number
-  verbose?: boolean
+  verbose: boolean
 }
 
 export const state: State = {
@@ -23,4 +23,5 @@ export const state: State = {
   manualApprove: false,
   rateLimitWait: false,
   showToken: false,
+  verbose: false,
 }
diff --git a/src/start.ts b/src/start.ts
index 9cef69f2b..bcd16e354 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -11,7 +11,6 @@ import { generateEnvScript } from "./lib/shell"
 import { state } from "./lib/state"
 import { setupCopilotToken, setupGitHubToken } from "./lib/token"
 import { cacheModels, cacheVSCodeVersion } from "./lib/utils"
-import { server } from "./server"
 
 interface RunServerOptions {
   port: number
@@ -26,8 +25,8 @@ interface RunServerOptions {
 }
 
 export async function runServer(options: RunServerOptions): Promise<void> {
+  state.verbose = options.verbose
   if (options.verbose) {
-    state.verbose = true
     consola.level = 5
     consola.info("Verbose logging enabled")
   }
@@ -109,6 +108,8 @@ export async function runServer(options: RunServerOptions): Promise<void> {
     `🌐 Usage Viewer: https://ericc-ch.github.io/copilot-api?endpoint=${serverUrl}/usage`,
   )
 
+  const { server } = await import("./server")
+
   serve({
     fetch: server.fetch as ServerHandler,
     port: options.port,

From 6f479267e70a8b6fcad1691b23f7e61a51107813 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Thu, 6 Nov 2025 17:05:36 +0800
Subject: [PATCH 19/62] fix: correct typo in warning message and refine
 whitespace handling logic

---
 src/routes/messages/handler.ts                      | 2 +-
 src/routes/messages/responses-stream-translation.ts | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 19ba01687..54f4a0622 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -181,7 +181,7 @@ const handleWithResponsesApi = async (
 
       if (!streamState.messageCompleted) {
         logger.warn(
-          "Responses stream ended without completion; sending erorr event",
+          "Responses stream ended without completion; sending error event",
         )
         const errorEvent = buildErrorEvent(
           "Responses stream ended without completion",
diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
index 524b2e17f..5a0338b90 100644
--- a/src/routes/messages/responses-stream-translation.ts
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -38,7 +38,7 @@ const updateWhitespaceRunState = (
   let count = previousCount
 
   for (const char of chunk) {
-    if (char === " " || char === "\r" || char === "\n") {
+    if (char === "\r" || char === "\n") {
       count += 1
       if (count > MAX_CONSECUTIVE_FUNCTION_CALL_WHITESPACE) {
         return { nextCount: count, exceeded: true }
@@ -46,7 +46,9 @@ const updateWhitespaceRunState = (
       continue
     }
 
-    count = 0
+    if (char !== " ") {
+      count = 0
+    }
   }
 
   return { nextCount: count, exceeded: false }

From 01d4adb07158de9bf722e8ea4f5080f978c830f6 Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Mon, 10 Nov 2025 23:26:33 +0800
Subject: [PATCH 20/62] fix: update token counting logic for GPT and Claude and
 Grok models, adjusting input token calculations and handling tool prompts

---
 src/lib/tokenizer.ts                        | 65 ++++++++++++++++-----
 src/routes/messages/count-tokens-handler.ts | 15 ++---
 2 files changed, 58 insertions(+), 22 deletions(-)

diff --git a/src/lib/tokenizer.ts b/src/lib/tokenizer.ts
index 8c3eda736..e9b83ac5b 100644
--- a/src/lib/tokenizer.ts
+++ b/src/lib/tokenizer.ts
@@ -37,7 +37,9 @@ const calculateToolCallsTokens = (
   let tokens = 0
   for (const toolCall of toolCalls) {
     tokens += constants.funcInit
-    tokens += encoder.encode(JSON.stringify(toolCall)).length
+    tokens += encoder.encode(toolCall.id).length
+    tokens += encoder.encode(toolCall.function.name).length
+    tokens += encoder.encode(toolCall.function.arguments).length
   }
   tokens += constants.funcEnd
   return tokens
@@ -158,6 +160,7 @@ const getModelConstants = (model: Model) => {
         enumInit: -3,
         enumItem: 3,
         funcEnd: 12,
+        isGpt: true,
       }
     : {
         funcInit: 7,
@@ -166,6 +169,7 @@ const getModelConstants = (model: Model) => {
         enumInit: -3,
         enumItem: 3,
         funcEnd: 12,
+        isGpt: model.id.startsWith("gpt-"),
       }
 }
 
@@ -218,8 +222,12 @@ const calculateParameterTokens = (
   const line = `${paramName}:${paramType}:${paramDesc}`
   tokens += encoder.encode(line).length
 
+  if (param.type === "array" && param["items"]) {
+    tokens += calculateParametersTokens(param["items"], encoder, constants)
+  }
+
   // Handle additional properties (excluding standard ones)
-  const excludedKeys = new Set(["type", "description", "enum"])
+  const excludedKeys = new Set(["type", "description", "enum", "items"])
   for (const propertyName of Object.keys(param)) {
     if (!excludedKeys.has(propertyName)) {
       const propertyValue = param[propertyName]
@@ -234,6 +242,27 @@ const calculateParameterTokens = (
   return tokens
 }
 
+/**
+ * Calculate tokens for properties object
+ */
+const calculatePropertiesTokens = (
+  properties: Record<string, unknown>,
+  encoder: Encoder,
+  constants: ReturnType<typeof getModelConstants>,
+): number => {
+  let tokens = 0
+  if (Object.keys(properties).length > 0) {
+    tokens += constants.propInit
+    for (const propKey of Object.keys(properties)) {
+      tokens += calculateParameterTokens(propKey, properties[propKey], {
+        encoder,
+        constants,
+      })
+    }
+  }
+  return tokens
+}
+
 /**
  * Calculate tokens for function parameters
  */
@@ -249,18 +278,17 @@ const calculateParametersTokens = (
   const params = parameters as Record<string, unknown>
   let tokens = 0
 
+  const excludedKeys = new Set(["$schema", "additionalProperties"])
   for (const [key, value] of Object.entries(params)) {
+    if (excludedKeys.has(key)) {
+      continue
+    }
     if (key === "properties") {
-      const properties = value as Record<string, unknown>
-      if (Object.keys(properties).length > 0) {
-        tokens += constants.propInit
-        for (const propKey of Object.keys(properties)) {
-          tokens += calculateParameterTokens(propKey, properties[propKey], {
-            encoder,
-            constants,
-          })
-        }
-      }
+      tokens += calculatePropertiesTokens(
+        value as Record<string, unknown>,
+        encoder,
+        constants,
+      )
     } else {
       const paramText =
         typeof value === "string" ? value : JSON.stringify(value)
@@ -306,10 +334,16 @@ export const numTokensForTools = (
   constants: ReturnType<typeof getModelConstants>,
 ): number => {
   let funcTokenCount = 0
-  for (const tool of tools) {
-    funcTokenCount += calculateToolTokens(tool, encoder, constants)
+  if (constants.isGpt) {
+    for (const tool of tools) {
+      funcTokenCount += calculateToolTokens(tool, encoder, constants)
+    }
+    funcTokenCount += constants.funcEnd
+  } else {
+    for (const tool of tools) {
+      funcTokenCount += encoder.encode(JSON.stringify(tool)).length
+    }
   }
-  funcTokenCount += constants.funcEnd
   return funcTokenCount
 }
 
@@ -335,6 +369,7 @@ export const getTokenCount = async (
   )
 
   const constants = getModelConstants(model)
+  // gpt count token https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
   let inputTokens = calculateTokens(inputMessages, encoder, constants)
   if (payload.tools && payload.tools.length > 0) {
     inputTokens += numTokensForTools(payload.tools, encoder, constants)
diff --git a/src/routes/messages/count-tokens-handler.ts b/src/routes/messages/count-tokens-handler.ts
index 2ec849cb8..70bd6a53c 100644
--- a/src/routes/messages/count-tokens-handler.ts
+++ b/src/routes/messages/count-tokens-handler.ts
@@ -33,18 +33,21 @@ export async function handleCountTokens(c: Context) {
     const tokenCount = await getTokenCount(openAIPayload, selectedModel)
 
     if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
-      let mcpToolExist = false
+      let addToolSystemPromptCount = false
       if (anthropicBeta?.startsWith("claude-code")) {
-        mcpToolExist = anthropicPayload.tools.some((tool) =>
-          tool.name.startsWith("mcp__"),
+        const toolsLength = anthropicPayload.tools.length
+        addToolSystemPromptCount = !anthropicPayload.tools.some(
+          (tool) =>
+            tool.name.startsWith("mcp__")
+            || (tool.name === "Skill" && toolsLength === 1),
         )
       }
-      if (!mcpToolExist) {
+      if (addToolSystemPromptCount) {
         if (anthropicPayload.model.startsWith("claude")) {
           // https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/overview#pricing
           tokenCount.input = tokenCount.input + 346
         } else if (anthropicPayload.model.startsWith("grok")) {
-          tokenCount.input = tokenCount.input + 480
+          tokenCount.input = tokenCount.input + 120
         }
       }
     }
@@ -52,8 +55,6 @@ export async function handleCountTokens(c: Context) {
     let finalTokenCount = tokenCount.input + tokenCount.output
     if (anthropicPayload.model.startsWith("claude")) {
       finalTokenCount = Math.round(finalTokenCount * 1.15)
-    } else if (anthropicPayload.model.startsWith("grok")) {
-      finalTokenCount = Math.round(finalTokenCount * 1.03)
     }
 
     consola.info("Token count:", finalTokenCount)

From 3cdc32c0811469da9eebec5ca3892caf068df542 Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Thu, 20 Nov 2025 07:43:01 +0800
Subject: [PATCH 21/62] fix: extend whitespace handling in
 updateWhitespaceRunState to include tab characters

---
 src/routes/messages/responses-stream-translation.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
index 5a0338b90..5fa043c91 100644
--- a/src/routes/messages/responses-stream-translation.ts
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -38,7 +38,7 @@ const updateWhitespaceRunState = (
   let count = previousCount
 
   for (const char of chunk) {
-    if (char === "\r" || char === "\n") {
+    if (char === "\r" || char === "\n" || char === "\t") {
       count += 1
       if (count > MAX_CONSECUTIVE_FUNCTION_CALL_WHITESPACE) {
         return { nextCount: count, exceeded: true }

From f7835a44f06976cab874700e4d94a5f5c0379369 Mon Sep 17 00:00:00 2001
From: SR_team <sr_team@sr.team>
Date: Sat, 22 Nov 2025 14:17:07 +0500
Subject: [PATCH 22/62] Remove incompatible with copilot responses
 `service_tier` field (#45)

Some clients, like RooCode may send `service_tier` to `/responses` endpoint, but Copilot do not support this field and returns error
---
 src/services/copilot/create-responses.ts | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
index 4e4448ecc..1d4e06141 100644
--- a/src/services/copilot/create-responses.ts
+++ b/src/services/copilot/create-responses.ts
@@ -22,6 +22,7 @@ export interface ResponsesPayload {
   store?: boolean | null
   reasoning?: Reasoning | null
   include?: Array<ResponseIncludable>
+  service_tier?: string | null // NOTE: Unsupported by GitHub Copilot
   [key: string]: unknown
 }
 
@@ -336,6 +337,9 @@ export const createResponses = async (
     "X-Initiator": initiator,
   }
 
+  // service_tier is not supported by github copilot
+  payload.service_tier = null
+
   const response = await fetch(`${copilotBaseUrl(state)}/responses`, {
     method: "POST",
     headers,

From 318855eb868d2b8999a98cae08125602cb264503 Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Fri, 5 Dec 2025 23:33:26 +0800
Subject: [PATCH 23/62] feat(config): enhance model configuration with
 automatic defaults and expanded reasoning options and add doc

---
 README.md                                   | 24 +++++-
 src/lib/config.ts                           | 83 ++++++++++++++++-----
 src/routes/messages/count-tokens-handler.ts |  2 +-
 src/routes/messages/handler.ts              |  7 +-
 src/services/copilot/create-responses.ts    |  2 +-
 src/start.ts                                |  4 +
 6 files changed, 98 insertions(+), 24 deletions(-)

diff --git a/README.md b/README.md
index e54440f99..dee26a99f 100644
--- a/README.md
+++ b/README.md
@@ -177,6 +177,28 @@ The following command line options are available for the `start` command:
 | ------ | ------------------------- | ------- | ----- |
 | --json | Output debug info as JSON | false   | none  |
 
+## Configuration (config.json)
+
+- **Location:** `~/.local/share/copilot-api/config.json` (Linux/macOS) or `%USERPROFILE%\.local\share\copilot-api\config.json` (Windows).
+- **Default shape:**
+  ```json
+  {
+    "extraPrompts": {
+      "gpt-5-mini": "<built-in exploration prompt>",
+      "gpt-5.1-codex-max": "<built-in exploration prompt>"
+    },
+    "smallModel": "gpt-5-mini",
+    "modelReasoningEfforts": {
+      "gpt-5-mini": "low"
+    }
+  }
+  ```
+- **extraPrompts:** Map of `model -> prompt` appended to the first system prompt when translating Anthropic-style requests to Copilot. Use this to inject guardrails or guidance per model. Missing default entries are auto-added without overwriting your custom prompts.
+- **smallModel:** Fallback model used for tool-less warmup messages (e.g., Claude Code probe requests) to avoid spending premium requests; defaults to `gpt-5-mini`.
+- **modelReasoningEfforts:** Per-model `reasoning.effort` sent to the Copilot Responses API. Allowed values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. If a model isn’t listed, `high` is used by default.
+
+Edit this file to customize prompts or swap in your own fast model. Restart the server (or rerun the command) after changes so the cached config is refreshed.
+
 ## API Endpoints
 
 The server exposes several endpoints to interact with the Copilot API. It provides OpenAI-compatible endpoints and now also includes support for Anthropic-compatible endpoints, allowing for greater flexibility with different tools and services.
@@ -187,7 +209,7 @@ These endpoints mimic the OpenAI API structure.
 
 | Endpoint                    | Method | Description                                                      |
 | --------------------------- | ------ | ---------------------------------------------------------------- |
-| `POST /v1/responses`        | `POST` | Most advanced interface for generating model responses.          |
+| `POST /v1/responses`        | `POST` | OpenAI Most advanced interface for generating model responses.          |
 | `POST /v1/chat/completions` | `POST` | Creates a model response for the given chat conversation.        |
 | `GET /v1/models`            | `GET`  | Lists the currently available models.                            |
 | `POST /v1/embeddings`       | `POST` | Creates an embedding vector representing the input text.         |
diff --git a/src/lib/config.ts b/src/lib/config.ts
index a13b3fe37..dff63eb5c 100644
--- a/src/lib/config.ts
+++ b/src/lib/config.ts
@@ -6,28 +6,23 @@ import { PATHS } from "./paths"
 export interface AppConfig {
   extraPrompts?: Record<string, string>
   smallModel?: string
-  modelReasoningEfforts?: Record<string, "minimal" | "low" | "medium" | "high">
+  modelReasoningEfforts?: Record<
+    string,
+    "none" | "minimal" | "low" | "medium" | "high" | "xhigh"
+  >
 }
 
+const gpt5ExplorationPrompt = `## Exploration and reading files
+- **Think first.** Before any tool call, decide ALL files/resources you will need.
+- **Batch everything.** If you need multiple files (even from different places), read them together.
+- **multi_tool_use.parallel** Use multi_tool_use.parallel to parallelize tool calls and only this.
+- **Only make sequential calls if you truly cannot know the next file without seeing a result first.**
+- **Workflow:** (a) plan all needed reads → (b) issue one parallel batch → (c) analyze results → (d) repeat if new, unpredictable reads arise.`
+
 const defaultConfig: AppConfig = {
   extraPrompts: {
-    "gpt-5-codex": `
-## Tool use
-- You have access to many tools. If a tool exists to perform a specific task, you MUST use that tool instead of running a terminal command to perform that task.
-### Bash tool
-When using the Bash tool, follow these rules:
-- always run_in_background set to false, unless you are running a long-running command (e.g., a server or a watch command).
-### BashOutput tool
-When using the BashOutput tool, follow these rules:
-- Only Bash Tool run_in_background set to true, Use BashOutput to read the output later
-### TodoWrite tool
-When using the TodoWrite tool, follow these rules:
-- Skip using the TodoWrite tool for tasks with three or fewer steps.
-- Do not make single-step todo lists.
-- When you made a todo, update it after having performed one of the sub-tasks that you shared on the todo list.
-## Special user requests
-- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as 'date'), you should do so.
-`,
+    "gpt-5-mini": gpt5ExplorationPrompt,
+    "gpt-5.1-codex-max": gpt5ExplorationPrompt,
   },
   smallModel: "gpt-5-mini",
   modelReasoningEfforts: {
@@ -74,6 +69,56 @@ function readConfigFromDisk(): AppConfig {
   }
 }
 
+function mergeDefaultExtraPrompts(config: AppConfig): {
+  mergedConfig: AppConfig
+  changed: boolean
+} {
+  const extraPrompts = config.extraPrompts ?? {}
+  const defaultExtraPrompts = defaultConfig.extraPrompts ?? {}
+
+  const missingExtraPromptModels = Object.keys(defaultExtraPrompts).filter(
+    (model) => !Object.hasOwn(extraPrompts, model),
+  )
+
+  if (missingExtraPromptModels.length === 0) {
+    return { mergedConfig: config, changed: false }
+  }
+
+  return {
+    mergedConfig: {
+      ...config,
+      extraPrompts: {
+        ...defaultExtraPrompts,
+        ...extraPrompts,
+      },
+    },
+    changed: true,
+  }
+}
+
+export function mergeConfigWithDefaults(): AppConfig {
+  const config = readConfigFromDisk()
+  const { mergedConfig, changed } = mergeDefaultExtraPrompts(config)
+
+  if (changed) {
+    try {
+      fs.writeFileSync(
+        PATHS.CONFIG_PATH,
+        `${JSON.stringify(mergedConfig, null, 2)}\n`,
+        "utf8",
+      )
+    } catch (writeError) {
+      consola.warn(
+        "Failed to write merged extraPrompts to config file",
+        writeError,
+      )
+    }
+  }
+
+  cachedConfig = mergedConfig
+  return mergedConfig
+}
+
 export function getConfig(): AppConfig {
   cachedConfig ??= readConfigFromDisk()
   return cachedConfig
@@ -91,7 +136,7 @@ export function getSmallModel(): string {
 
 export function getReasoningEffortForModel(
   model: string,
-): "minimal" | "low" | "medium" | "high" {
+): "none" | "minimal" | "low" | "medium" | "high" | "xhigh" {
   const config = getConfig()
   return config.modelReasoningEfforts?.[model] ?? "high"
 }
diff --git a/src/routes/messages/count-tokens-handler.ts b/src/routes/messages/count-tokens-handler.ts
index 70bd6a53c..f280e094c 100644
--- a/src/routes/messages/count-tokens-handler.ts
+++ b/src/routes/messages/count-tokens-handler.ts
@@ -34,7 +34,7 @@ export async function handleCountTokens(c: Context) {
 
     if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
       let addToolSystemPromptCount = false
-      if (anthropicBeta?.startsWith("claude-code")) {
+      if (anthropicBeta) {
         const toolsLength = anthropicPayload.tools.length
         addToolSystemPromptCount = !anthropicPayload.tools.some(
           (tool) =>
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 54f4a0622..1de094595 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -46,8 +46,11 @@ export async function handleCompletion(c: Context) {
   const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
   logger.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
 
-  // fix claude code 2.0.28 warmup request consume premium request, forcing small model if no tools are used
-  if (!anthropicPayload.tools || anthropicPayload.tools.length === 0) {
+  // fix claude code 2.0.28+ warmup request consume premium request, forcing small model if no tools are used
+  // set "CLAUDE_CODE_SUBAGENT_MODEL": "you small model" also can avoid this
+  const anthropicBeta = c.req.header("anthropic-beta")
+  const noTools = !anthropicPayload.tools || anthropicPayload.tools.length === 0
+  if (anthropicBeta && noTools) {
     anthropicPayload.model = getSmallModel()
   }
 
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
index 1d4e06141..bc24ce544 100644
--- a/src/services/copilot/create-responses.ts
+++ b/src/services/copilot/create-responses.ts
@@ -51,7 +51,7 @@ export type ResponseIncludable =
   | "code_interpreter_call.outputs"
 
 export interface Reasoning {
-  effort?: "minimal" | "low" | "medium" | "high" | null
+  effort?: "none" | "minimal" | "low" | "medium" | "high" | "xhigh" | null
   summary?: "auto" | "concise" | "detailed" | null
 }
 
diff --git a/src/start.ts b/src/start.ts
index 7f8ba1b86..85bfe4c48 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -6,6 +6,7 @@ import consola from "consola"
 import { serve, type ServerHandler } from "srvx"
 import invariant from "tiny-invariant"
 
+import { mergeConfigWithDefaults } from "./lib/config"
 import { ensurePaths } from "./lib/paths"
 import { initProxyFromEnv } from "./lib/proxy"
 import { generateEnvScript } from "./lib/shell"
@@ -27,6 +28,9 @@ interface RunServerOptions {
 }
 
 export async function runServer(options: RunServerOptions): Promise<void> {
+  // Ensure config is merged with defaults at startup
+  mergeConfigWithDefaults()
+
   if (options.proxyEnv) {
     initProxyFromEnv()
   }

From afb7a5c77bdd8a04e57f1c8d210a8659cd28b1f8 Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Sat, 10 Jan 2026 18:08:02 +0800
Subject: [PATCH 24/62] feat(config): add useFunctionApplyPatch option and
 implement patch handling in responses

---
 src/lib/config.ts                        |  2 ++
 src/routes/responses/handler.ts          | 35 ++++++++++++++++++++++++
 src/services/copilot/create-responses.ts |  2 +-
 3 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/src/lib/config.ts b/src/lib/config.ts
index dff63eb5c..e44953852 100644
--- a/src/lib/config.ts
+++ b/src/lib/config.ts
@@ -10,6 +10,7 @@ export interface AppConfig {
     string,
     "none" | "minimal" | "low" | "medium" | "high" | "xhigh"
   >
+  useFunctionApplyPatch?: boolean
 }
 
 const gpt5ExplorationPrompt = `## Exploration and reading files
@@ -28,6 +29,7 @@ const defaultConfig: AppConfig = {
   modelReasoningEfforts: {
     "gpt-5-mini": "low",
   },
+  useFunctionApplyPatch: true,
 }
 
 let cachedConfig: AppConfig | null = null
diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
index 574d61fcf..14a841ac4 100644
--- a/src/routes/responses/handler.ts
+++ b/src/routes/responses/handler.ts
@@ -3,6 +3,7 @@ import type { Context } from "hono"
 import { streamSSE } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
+import { getConfig } from "~/lib/config"
 import { createHandlerLogger } from "~/lib/logger"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
@@ -24,6 +25,8 @@ export const handleResponses = async (c: Context) => {
   const payload = await c.req.json<ResponsesPayload>()
   logger.debug("Responses request payload:", JSON.stringify(payload))
 
+  useFunctionApplyPatch(payload)
+
   const selectedModel = state.models?.data.find(
     (model) => model.id === payload.model,
   )
@@ -78,3 +81,35 @@ const isAsyncIterable = <T>(value: unknown): value is AsyncIterable<T> =>
 
 const isStreamingRequested = (payload: ResponsesPayload): boolean =>
   Boolean(payload.stream)
+
+const useFunctionApplyPatch = (payload: ResponsesPayload): void => {
+  const config = getConfig()
+  const useFunctionApplyPatch = config.useFunctionApplyPatch ?? true
+  if (useFunctionApplyPatch) {
+    logger.debug("Using function tool apply_patch for responses")
+    if (Array.isArray(payload.tools)) {
+      const toolsArr = payload.tools
+      for (let i = 0; i < toolsArr.length; i++) {
+        const t = toolsArr[i]
+        if (t.type === "custom" && t.name === "apply_patch") {
+          toolsArr[i] = {
+            type: "function",
+            name: t.name,
+            description: "Use the `apply_patch` tool to edit files",
+            parameters: {
+              type: "object",
+              properties: {
+                input: {
+                  type: "string",
+                  description: "The entire contents of the apply_patch command",
+                },
+              },
+              required: ["input"],
+            },
+            strict: false,
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
index bc24ce544..9982a4d98 100644
--- a/src/services/copilot/create-responses.ts
+++ b/src/services/copilot/create-responses.ts
@@ -33,7 +33,7 @@ export interface ToolChoiceFunction {
   type: "function"
 }
 
-export type Tool = FunctionTool
+export type Tool = FunctionTool | Record<string, unknown>
 
 export interface FunctionTool {
   name: string

From ee5df5080029d60cbb23886764b89fd3a90b9834 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Mon, 12 Jan 2026 14:12:51 +0800
Subject: [PATCH 25/62] fix: fix inconsistent credit consumption in chat , and
 adapter claude code skill tool_result

---
 README.md                                     |  7 +++
 src/routes/messages/handler.ts                | 53 +++++++++++++++++++
 src/routes/responses/utils.ts                 | 21 +++++---
 .../copilot/create-chat-completions.ts        | 11 ++--
 4 files changed, 81 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index dee26a99f..8f4926689 100644
--- a/README.md
+++ b/README.md
@@ -372,3 +372,10 @@ bun run start
   - `--rate-limit <seconds>`: Enforces a minimum time interval between requests. For example, `copilot-api start --rate-limit 30` will ensure there's at least a 30-second gap between requests.
   - `--wait`: Use this with `--rate-limit`. It makes the server wait for the cooldown period to end instead of rejecting the request with an error. This is useful for clients that don't automatically retry on rate limit errors.
 - If you have a GitHub business or enterprise plan account with Copilot, use the `--account-type` flag (e.g., `--account-type business`). See the [official documentation](https://docs.github.com/en/enterprise-cloud@latest/copilot/managing-copilot/managing-github-copilot-in-your-organization/managing-access-to-github-copilot-in-your-organization/managing-github-copilot-access-to-your-organizations-network#configuring-copilot-subscription-based-network-routing-for-your-enterprise-or-organization) for more details.
+
+### CLAUDE.md Recommended Content
+
+Please include the following in `CLAUDE.md` (for Claude usage):
+
+- Prohibited from directly asking questions to users, MUST use AskUserQuestion tool.
+- Once you can confirm that the task is complete, MUST use AskUserQuestion tool to make user confirm. The user may respond with feedback if they are not satisfied with the result, which you can use to make improvements and try again.
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 1de094595..a95528e78 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -31,6 +31,8 @@ import {
 import {
   type AnthropicMessagesPayload,
   type AnthropicStreamState,
+  type AnthropicTextBlock,
+  type AnthropicToolResultBlock,
 } from "./anthropic-types"
 import {
   translateToAnthropic,
@@ -54,6 +56,11 @@ export async function handleCompletion(c: Context) {
     anthropicPayload.model = getSmallModel()
   }
 
+  // fix claude code skill tool_result content and skill text are separated, need to merge them,
+  // otherwise it will consume premium request
+  // e.g. {"role":"user","content":[{"type":"tool_result","tool_use_id":"call_xOTodtnTctlfEHj983qwWNhk","content":"Launching skill: xxxx"},{"type":"text","text":"xxx"}]}
+  mergeToolResultForClaude(anthropicBeta, anthropicPayload)
+
   const useResponsesApi = shouldUseResponsesApi(anthropicPayload.model)
 
   if (state.manualApprove) {
@@ -225,3 +232,49 @@ const isNonStreaming = (
 const isAsyncIterable = <T>(value: unknown): value is AsyncIterable<T> =>
   Boolean(value)
   && typeof (value as AsyncIterable<T>)[Symbol.asyncIterator] === "function"
+
+const mergeToolResultForClaude = (
+  anthropicBeta: string | undefined,
+  anthropicPayload: AnthropicMessagesPayload,
+): void => {
+  if (anthropicBeta) {
+    for (const msg of anthropicPayload.messages) {
+      if (msg.role !== "user" || !Array.isArray(msg.content)) continue
+
+      const content = msg.content
+      const onlyToolResultAndText = content.every(
+        (b) => b.type === "tool_result" || b.type === "text",
+      )
+      if (!onlyToolResultAndText) continue
+
+      const toolResults = content.filter(
+        (b): b is AnthropicToolResultBlock => b.type === "tool_result",
+      )
+      const textBlocks = content.filter(
+        (b): b is AnthropicTextBlock => b.type === "text",
+      )
+      if (toolResults.length === textBlocks.length) {
+        const mergedToolResults = mergeToolResult(toolResults, textBlocks)
+        msg.content = mergedToolResults
+      }
+    }
+  }
+}
+
+const mergeToolResult = (
+  toolResults: Array<AnthropicToolResultBlock>,
+  textBlocks: Array<AnthropicTextBlock>,
+): Array<AnthropicToolResultBlock> => {
+  const mergedToolResults: Array<AnthropicToolResultBlock> = []
+  for (const [i, tr] of toolResults.entries()) {
+    const tb = textBlocks[i]
+    let skillText = tb.text
+    if (tr.content.includes("skill")) {
+      // need add please execute now, otherwise llm not execute skill directly only reply with text only
+      skillText = `Please execute now:${tb.text}`
+    }
+    const mergedText = `${tr.content}\n\n${skillText}`
+    mergedToolResults.push({ ...tr, content: mergedText })
+  }
+  return mergedToolResults
+}
diff --git a/src/routes/responses/utils.ts b/src/routes/responses/utils.ts
index 734319cd7..329de5190 100644
--- a/src/routes/responses/utils.ts
+++ b/src/routes/responses/utils.ts
@@ -12,14 +12,19 @@ export const getResponsesRequestOptions = (
   return { vision, initiator }
 }
 
-export const hasAgentInitiator = (payload: ResponsesPayload): boolean =>
-  getPayloadItems(payload).some((item) => {
-    if (!("role" in item) || !item.role) {
-      return true
-    }
-    const role = typeof item.role === "string" ? item.role.toLowerCase() : ""
-    return role === "assistant"
-  })
+export const hasAgentInitiator = (payload: ResponsesPayload): boolean => {
+  // Refactor `isAgentCall` logic to check only the last message in the history rather than any message. This prevents valid user messages from being incorrectly flagged as agent calls due to previous assistant history, ensuring proper credit consumption for multi-turn conversations.
+  const lastItem = getPayloadItems(payload).at(-1)
+  if (!lastItem) {
+    return false
+  }
+  if (!("role" in lastItem) || !lastItem.role) {
+    return true
+  }
+  const role =
+    typeof lastItem.role === "string" ? lastItem.role.toLowerCase() : ""
+  return role === "assistant"
+}
 
 export const hasVisionInput = (payload: ResponsesPayload): boolean => {
   const values = getPayloadItems(payload)
diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts
index 8534151da..63d3e50a3 100644
--- a/src/services/copilot/create-chat-completions.ts
+++ b/src/services/copilot/create-chat-completions.ts
@@ -18,9 +18,14 @@ export const createChatCompletions = async (
 
   // Agent/user check for X-Initiator header
   // Determine if any message is from an agent ("assistant" or "tool")
-  const isAgentCall = payload.messages.some((msg) =>
-    ["assistant", "tool"].includes(msg.role),
-  )
+  // Refactor `isAgentCall` logic to check only the last message in the history rather than any message. This prevents valid user messages from being incorrectly flagged as agent calls due to previous assistant history, ensuring proper credit consumption for multi-turn conversations.
+  let isAgentCall = false
+  if (payload.messages.length > 0) {
+    const lastMessage = payload.messages.at(-1)
+    if (lastMessage) {
+      isAgentCall = ["assistant", "tool"].includes(lastMessage.role)
+    }
+  }
 
   // Build headers and add X-Initiator
   const headers: Record<string, string> = {

From f2b84769bbda984d83c78aeb7dbb52c58595e71d Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Tue, 13 Jan 2026 22:13:58 +0800
Subject: [PATCH 26/62] fix: Merge tool_result and text blocks into tool_result
 to avoid consuming premium requests (caused by skill invocations, edit hooks
 or to do reminders)

---
 src/routes/messages/anthropic-types.ts       |  2 +-
 src/routes/messages/handler.ts               | 90 ++++++++++++--------
 src/routes/messages/responses-translation.ts |  2 +-
 3 files changed, 58 insertions(+), 36 deletions(-)

diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
index f07485bf0..5aa528552 100644
--- a/src/routes/messages/anthropic-types.ts
+++ b/src/routes/messages/anthropic-types.ts
@@ -42,7 +42,7 @@ export interface AnthropicImageBlock {
 export interface AnthropicToolResultBlock {
   type: "tool_result"
   tool_use_id: string
-  content: string
+  content: string | Array<AnthropicTextBlock | AnthropicImageBlock>
   is_error?: boolean
 }
 
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index a95528e78..2b25a1772 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -33,6 +33,7 @@ import {
   type AnthropicStreamState,
   type AnthropicTextBlock,
   type AnthropicToolResultBlock,
+  type AnthropicUserContentBlock,
 } from "./anthropic-types"
 import {
   translateToAnthropic,
@@ -56,9 +57,9 @@ export async function handleCompletion(c: Context) {
     anthropicPayload.model = getSmallModel()
   }
 
-  // fix claude code skill tool_result content and skill text are separated, need to merge them,
-  // otherwise it will consume premium request
-  // e.g. {"role":"user","content":[{"type":"tool_result","tool_use_id":"call_xOTodtnTctlfEHj983qwWNhk","content":"Launching skill: xxxx"},{"type":"text","text":"xxx"}]}
+  // Merge tool_result and text blocks into tool_result to avoid consuming premium requests
+  // (caused by skill invocations, edit hooks or to do reminders)
+  // e.g. {"role":"user","content":[{"type":"tool_result","content":"Launching skill: xxx"},{"type":"text","text":"xxx"}]}
   mergeToolResultForClaude(anthropicBeta, anthropicPayload)
 
   const useResponsesApi = shouldUseResponsesApi(anthropicPayload.model)
@@ -233,48 +234,69 @@ const isAsyncIterable = <T>(value: unknown): value is AsyncIterable<T> =>
   Boolean(value)
   && typeof (value as AsyncIterable<T>)[Symbol.asyncIterator] === "function"
 
+const formatTextForSkill = (toolContent: string, text: string): string =>
+  toolContent.startsWith("Launching skill") ?
+    `Please execute skill now:${text}`
+  : text
+
+type ToolResultWithText = AnthropicToolResultBlock & { content: string }
+
+const isToolResultWithText = (
+  block: AnthropicUserContentBlock,
+): block is ToolResultWithText =>
+  block.type === "tool_result" && typeof block.content === "string"
+
 const mergeToolResultForClaude = (
   anthropicBeta: string | undefined,
   anthropicPayload: AnthropicMessagesPayload,
 ): void => {
-  if (anthropicBeta) {
-    for (const msg of anthropicPayload.messages) {
-      if (msg.role !== "user" || !Array.isArray(msg.content)) continue
-
-      const content = msg.content
-      const onlyToolResultAndText = content.every(
-        (b) => b.type === "tool_result" || b.type === "text",
-      )
-      if (!onlyToolResultAndText) continue
-
-      const toolResults = content.filter(
-        (b): b is AnthropicToolResultBlock => b.type === "tool_result",
-      )
-      const textBlocks = content.filter(
-        (b): b is AnthropicTextBlock => b.type === "text",
-      )
-      if (toolResults.length === textBlocks.length) {
-        const mergedToolResults = mergeToolResult(toolResults, textBlocks)
-        msg.content = mergedToolResults
+  if (!anthropicBeta) return
+
+  for (const msg of anthropicPayload.messages) {
+    if (msg.role !== "user" || !Array.isArray(msg.content)) continue
+
+    const toolResults: Array<ToolResultWithText> = []
+    const textBlocks: Array<AnthropicTextBlock> = []
+    let valid = true
+
+    for (const block of msg.content) {
+      if (isToolResultWithText(block)) {
+        toolResults.push(block)
+      } else if (block.type === "text") {
+        textBlocks.push(block)
+      } else {
+        valid = false
+        break
       }
     }
+
+    if (!valid || toolResults.length === 0 || textBlocks.length === 0) continue
+
+    msg.content = mergeToolResult(toolResults, textBlocks)
   }
 }
 
 const mergeToolResult = (
-  toolResults: Array<AnthropicToolResultBlock>,
+  toolResults: Array<ToolResultWithText>,
   textBlocks: Array<AnthropicTextBlock>,
 ): Array<AnthropicToolResultBlock> => {
-  const mergedToolResults: Array<AnthropicToolResultBlock> = []
-  for (const [i, tr] of toolResults.entries()) {
-    const tb = textBlocks[i]
-    let skillText = tb.text
-    if (tr.content.includes("skill")) {
-      // need add please execute now, otherwise llm not execute skill directly only reply with text only
-      skillText = `Please execute now:${tb.text}`
-    }
-    const mergedText = `${tr.content}\n\n${skillText}`
-    mergedToolResults.push({ ...tr, content: mergedText })
+  // equal lengths -> pairwise merge
+  if (toolResults.length === textBlocks.length) {
+    return toolResults.map((tr, i) => ({
+      ...tr,
+      content: `${tr.content}\n\n${formatTextForSkill(tr.content, textBlocks[i].text)}`,
+    }))
   }
-  return mergedToolResults
+
+  // lengths differ -> append all textBlocks to the last tool_result
+  const last = toolResults.at(-1)
+  if (!last) return toolResults
+  const appendedTexts = textBlocks
+    .map((tb) => formatTextForSkill(last.content, tb.text))
+    .join("\n\n")
+
+  return [
+    ...toolResults.slice(0, -1),
+    { ...last, content: `${last.content}\n\n${appendedTexts}` },
+  ]
 }
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index c8e9460b8..332e7e3c9 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -612,7 +612,7 @@ const parseUserId = (
 }
 
 const convertToolResultContent = (
-  content: string | Array<AnthropicTextBlock> | Array<AnthropicImageBlock>,
+  content: string | Array<AnthropicTextBlock | AnthropicImageBlock>,
 ): string | Array<ResponseInputContent> => {
   if (typeof content === "string") {
     return content

From f3bef04cc28fd4e4a7d0b729d1d24ac02ec67128 Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Fri, 16 Jan 2026 20:57:45 +0800
Subject: [PATCH 27/62] fix: improve merging of tool results and text blocks to
 optimize content array handling

---
 src/routes/messages/handler.ts | 60 ++++++++++++++++++----------------
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 2b25a1772..e71d00ee9 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -33,7 +33,6 @@ import {
   type AnthropicStreamState,
   type AnthropicTextBlock,
   type AnthropicToolResultBlock,
-  type AnthropicUserContentBlock,
 } from "./anthropic-types"
 import {
   translateToAnthropic,
@@ -58,7 +57,7 @@ export async function handleCompletion(c: Context) {
   }
 
   // Merge tool_result and text blocks into tool_result to avoid consuming premium requests
-  // (caused by skill invocations, edit hooks or to do reminders)
+  // (caused by skill invocations, edit hooks, plan or to do reminders)
   // e.g. {"role":"user","content":[{"type":"tool_result","content":"Launching skill: xxx"},{"type":"text","text":"xxx"}]}
   mergeToolResultForClaude(anthropicBeta, anthropicPayload)
 
@@ -234,17 +233,29 @@ const isAsyncIterable = <T>(value: unknown): value is AsyncIterable<T> =>
   Boolean(value)
   && typeof (value as AsyncIterable<T>)[Symbol.asyncIterator] === "function"
 
-const formatTextForSkill = (toolContent: string, text: string): string =>
-  toolContent.startsWith("Launching skill") ?
-    `Please execute skill now:${text}`
-  : text
-
-type ToolResultWithText = AnthropicToolResultBlock & { content: string }
+const mergeContentWithText = (
+  tr: AnthropicToolResultBlock,
+  textBlock: AnthropicTextBlock,
+): AnthropicToolResultBlock => {
+  if (typeof tr.content === "string") {
+    return { ...tr, content: `${tr.content}\n\n${textBlock.text}` }
+  }
+  return {
+    ...tr,
+    content: [...tr.content, textBlock],
+  }
+}
 
-const isToolResultWithText = (
-  block: AnthropicUserContentBlock,
-): block is ToolResultWithText =>
-  block.type === "tool_result" && typeof block.content === "string"
+const mergeContentWithTexts = (
+  tr: AnthropicToolResultBlock,
+  textBlocks: Array<AnthropicTextBlock>,
+): AnthropicToolResultBlock => {
+  if (typeof tr.content === "string") {
+    const appendedTexts = textBlocks.map((tb) => tb.text).join("\n\n")
+    return { ...tr, content: `${tr.content}\n\n${appendedTexts}` }
+  }
+  return { ...tr, content: [...tr.content, ...textBlocks] }
+}
 
 const mergeToolResultForClaude = (
   anthropicBeta: string | undefined,
@@ -255,12 +266,12 @@ const mergeToolResultForClaude = (
   for (const msg of anthropicPayload.messages) {
     if (msg.role !== "user" || !Array.isArray(msg.content)) continue
 
-    const toolResults: Array<ToolResultWithText> = []
+    const toolResults: Array<AnthropicToolResultBlock> = []
     const textBlocks: Array<AnthropicTextBlock> = []
     let valid = true
 
     for (const block of msg.content) {
-      if (isToolResultWithText(block)) {
+      if (block.type === "tool_result") {
         toolResults.push(block)
       } else if (block.type === "text") {
         textBlocks.push(block)
@@ -277,26 +288,17 @@ const mergeToolResultForClaude = (
 }
 
 const mergeToolResult = (
-  toolResults: Array<ToolResultWithText>,
+  toolResults: Array<AnthropicToolResultBlock>,
   textBlocks: Array<AnthropicTextBlock>,
 ): Array<AnthropicToolResultBlock> => {
   // equal lengths -> pairwise merge
   if (toolResults.length === textBlocks.length) {
-    return toolResults.map((tr, i) => ({
-      ...tr,
-      content: `${tr.content}\n\n${formatTextForSkill(tr.content, textBlocks[i].text)}`,
-    }))
+    return toolResults.map((tr, i) => mergeContentWithText(tr, textBlocks[i]))
   }
 
   // lengths differ -> append all textBlocks to the last tool_result
-  const last = toolResults.at(-1)
-  if (!last) return toolResults
-  const appendedTexts = textBlocks
-    .map((tb) => formatTextForSkill(last.content, tb.text))
-    .join("\n\n")
-
-  return [
-    ...toolResults.slice(0, -1),
-    { ...last, content: `${last.content}\n\n${appendedTexts}` },
-  ]
+  const lastIndex = toolResults.length - 1
+  return toolResults.map((tr, i) =>
+    i === lastIndex ? mergeContentWithTexts(tr, textBlocks) : tr,
+  )
 }

From 736afa499133a20c83734f2226f2e9639fd23a31 Mon Sep 17 00:00:00 2001
From: cpf <397649079@qq.com>
Date: Sat, 17 Jan 2026 21:41:50 +0800
Subject: [PATCH 28/62] fix: sync stream IDs for @ai-sdk/openai compatibility
 with Responses API

GitHub Copilot's Responses API returns different IDs for the same item
in 'added' vs 'done' events, which causes @ai-sdk/openai to throw errors:
- 'activeReasoningPart.summaryParts' undefined
- 'text part not found'

This fix:
- Tracks IDs from 'added' events and reuses them in 'done' events
- Removes empty summary arrays from reasoning items that cause AI SDK parsing issues
- Handles output_item, content_part, output_text, and response.completed events
- Synchronizes item_id for message-type outputs across all related events
---
 src/routes/responses/handler.ts        |  12 +-
 src/routes/responses/stream-id-sync.ts | 194 +++++++++++++++++++++++++
 2 files changed, 205 insertions(+), 1 deletion(-)
 create mode 100644 src/routes/responses/stream-id-sync.ts

diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
index 14a841ac4..4e75f7c8f 100644
--- a/src/routes/responses/handler.ts
+++ b/src/routes/responses/handler.ts
@@ -13,6 +13,7 @@ import {
   type ResponsesResult,
 } from "~/services/copilot/create-responses"
 
+import { createStreamIdTracker, fixStreamIds } from "./stream-id-sync"
 import { getResponsesRequestOptions } from "./utils"
 
 const logger = createHandlerLogger("responses-handler")
@@ -57,12 +58,21 @@ export const handleResponses = async (c: Context) => {
   if (isStreamingRequested(payload) && isAsyncIterable(response)) {
     logger.debug("Forwarding native Responses stream")
     return streamSSE(c, async (stream) => {
+      const idTracker = createStreamIdTracker()
+
       for await (const chunk of response) {
         logger.debug("Responses stream chunk:", JSON.stringify(chunk))
+
+        const processedData = fixStreamIds(
+          (chunk as { data?: string }).data ?? "",
+          (chunk as { event?: string }).event,
+          idTracker,
+        )
+
         await stream.writeSSE({
           id: (chunk as { id?: string }).id,
           event: (chunk as { event?: string }).event,
-          data: (chunk as { data?: string }).data ?? "",
+          data: processedData,
         })
       }
     })
diff --git a/src/routes/responses/stream-id-sync.ts b/src/routes/responses/stream-id-sync.ts
new file mode 100644
index 000000000..72a78fde0
--- /dev/null
+++ b/src/routes/responses/stream-id-sync.ts
@@ -0,0 +1,194 @@
+/**
+ * Stream ID Synchronization for @ai-sdk/openai compatibility
+ *
+ * Problem: GitHub Copilot's Responses API returns different IDs for the same
+ * item in 'added' vs 'done' events. This breaks @ai-sdk/openai which expects
+ * consistent IDs across the stream lifecycle.
+ *
+ * Errors without this fix:
+ * - "activeReasoningPart.summaryParts" undefined
+ * - "text part not found"
+ *
+ * Use case: OpenCode (AI coding assistant) using Codex models (gpt-5.2-codex)
+ * via @ai-sdk/openai provider requires the Responses API endpoint.
+ */
+
+interface StreamIdTracker {
+  outputItems: Map<number, string>
+  contentParts: Map<string, string>
+  messageItems: Map<number, string>
+}
+
+interface StreamEventData {
+  item?: {
+    id?: string
+    type?: string
+    summary?: Array<unknown>
+  }
+  output_index?: number
+  content_index?: number
+  item_id?: string
+  response?: {
+    output?: Array<{
+      type?: string
+      summary?: Array<unknown>
+    }>
+  }
+}
+
+export const createStreamIdTracker = (): StreamIdTracker => ({
+  outputItems: new Map(),
+  contentParts: new Map(),
+  messageItems: new Map(),
+})
+
+export const fixStreamIds = (
+  data: string,
+  event: string | undefined,
+  tracker: StreamIdTracker,
+): string => {
+  if (!data) return data
+
+  try {
+    const parsed = JSON.parse(data) as StreamEventData
+
+    switch (event) {
+      case "response.output_item.added": {
+        return handleOutputItemAdded(parsed, tracker)
+      }
+      case "response.output_item.done": {
+        return handleOutputItemDone(parsed, tracker)
+      }
+      case "response.content_part.added": {
+        return handleContentPartAdded(parsed, tracker)
+      }
+      case "response.content_part.done": {
+        return handleContentPartDone(parsed, tracker)
+      }
+      case "response.output_text.delta":
+      case "response.output_text.done": {
+        return handleOutputText(parsed, tracker)
+      }
+      case "response.completed":
+      case "response.incomplete": {
+        return handleResponseCompleted(parsed)
+      }
+      default: {
+        return data
+      }
+    }
+  } catch {
+    return data
+  }
+}
+
+const handleOutputItemAdded = (
+  parsed: StreamEventData,
+  tracker: StreamIdTracker,
+): string => {
+  if (!parsed.item?.id) return JSON.stringify(parsed)
+
+  const outputIndex = parsed.output_index ?? 0
+  tracker.outputItems.set(outputIndex, parsed.item.id)
+
+  if (parsed.item.type === "message") {
+    tracker.messageItems.set(outputIndex, parsed.item.id)
+  }
+  if (
+    parsed.item.type === "reasoning"
+    && Array.isArray(parsed.item.summary)
+    && parsed.item.summary.length === 0
+  ) {
+    delete parsed.item.summary
+  }
+  return JSON.stringify(parsed)
+}
+
+const handleOutputItemDone = (
+  parsed: StreamEventData,
+  tracker: StreamIdTracker,
+): string => {
+  if (!parsed.item) return JSON.stringify(parsed)
+
+  const outputIndex = parsed.output_index ?? 0
+  const originalId = tracker.outputItems.get(outputIndex)
+  if (originalId) {
+    parsed.item.id = originalId
+  }
+  if (
+    parsed.item.type === "reasoning"
+    && Array.isArray(parsed.item.summary)
+    && parsed.item.summary.length === 0
+  ) {
+    delete parsed.item.summary
+  }
+  return JSON.stringify(parsed)
+}
+
+const handleContentPartAdded = (
+  parsed: StreamEventData,
+  tracker: StreamIdTracker,
+): string => {
+  const outputIndex = parsed.output_index ?? 0
+  const contentIndex = parsed.content_index ?? 0
+  const key = `${outputIndex}:${contentIndex}`
+
+  if (parsed.item_id) {
+    tracker.contentParts.set(key, parsed.item_id)
+  }
+
+  const messageId = tracker.messageItems.get(outputIndex)
+  if (messageId) {
+    parsed.item_id = messageId
+  }
+  return JSON.stringify(parsed)
+}
+
+const handleContentPartDone = (
+  parsed: StreamEventData,
+  tracker: StreamIdTracker,
+): string => {
+  const outputIndex = parsed.output_index ?? 0
+  const contentIndex = parsed.content_index ?? 0
+  const key = `${outputIndex}:${contentIndex}`
+
+  const messageId = tracker.messageItems.get(outputIndex)
+  if (messageId) {
+    parsed.item_id = messageId
+  } else {
+    const originalItemId = tracker.contentParts.get(key)
+    if (originalItemId) {
+      parsed.item_id = originalItemId
+    }
+  }
+
+  tracker.contentParts.delete(key)
+  return JSON.stringify(parsed)
+}
+
+const handleOutputText = (
+  parsed: StreamEventData,
+  tracker: StreamIdTracker,
+): string => {
+  const outputIndex = parsed.output_index ?? 0
+  const messageId = tracker.messageItems.get(outputIndex)
+  if (messageId) {
+    parsed.item_id = messageId
+  }
+  return JSON.stringify(parsed)
+}
+
+const handleResponseCompleted = (parsed: StreamEventData): string => {
+  if (!parsed.response?.output) return JSON.stringify(parsed)
+
+  for (const item of parsed.response.output) {
+    if (
+      item.type === "reasoning"
+      && Array.isArray(item.summary)
+      && item.summary.length === 0
+    ) {
+      delete item.summary
+    }
+  }
+  return JSON.stringify(parsed)
+}

From 4f22448a56b77ac5e5c93e6cdfc24724d3bfdcc7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=B4=94=E9=B9=8F=E9=A3=9E=20Cui=20Pengfei?=
 <1311541+cuipengfei@users.noreply.github.com>
Date: Mon, 19 Jan 2026 16:23:43 +0800
Subject: [PATCH 29/62]  fix: sync stream IDs for @ai-sdk/openai compatibility
 with Responses API , simpler version

* fix: sync stream IDs for @ai-sdk/openai compatibility with Responses API

GitHub Copilot's Responses API returns different IDs for the same item
in 'added' vs 'done' events, which causes @ai-sdk/openai to throw errors:
- 'activeReasoningPart.summaryParts' undefined
- 'text part not found'

This fix:
- Tracks IDs from 'added' events and reuses them in 'done' events
- Removes empty summary arrays from reasoning items that cause AI SDK parsing issues
- Handles output_item, content_part, output_text, and response.completed events
- Synchronizes item_id for message-type outputs across all related events

* simpler version of https://github.com/caozhiyuan/copilot-api/pull/72
---
 src/routes/responses/stream-id-sync.ts | 177 ++++++-------------------
 1 file changed, 40 insertions(+), 137 deletions(-)

diff --git a/src/routes/responses/stream-id-sync.ts b/src/routes/responses/stream-id-sync.ts
index 72a78fde0..48b3811a1 100644
--- a/src/routes/responses/stream-id-sync.ts
+++ b/src/routes/responses/stream-id-sync.ts
@@ -13,33 +13,18 @@
  * via @ai-sdk/openai provider requires the Responses API endpoint.
  */
 
+import type {
+  ResponseOutputItemAddedEvent,
+  ResponseOutputItemDoneEvent,
+  ResponseStreamEvent,
+} from "~/services/copilot/create-responses"
+
 interface StreamIdTracker {
   outputItems: Map<number, string>
-  contentParts: Map<string, string>
-  messageItems: Map<number, string>
-}
-
-interface StreamEventData {
-  item?: {
-    id?: string
-    type?: string
-    summary?: Array<unknown>
-  }
-  output_index?: number
-  content_index?: number
-  item_id?: string
-  response?: {
-    output?: Array<{
-      type?: string
-      summary?: Array<unknown>
-    }>
-  }
 }
 
 export const createStreamIdTracker = (): StreamIdTracker => ({
   outputItems: new Map(),
-  contentParts: new Map(),
-  messageItems: new Map(),
 })
 
 export const fixStreamIds = (
@@ -48,146 +33,64 @@ export const fixStreamIds = (
   tracker: StreamIdTracker,
 ): string => {
   if (!data) return data
-
-  try {
-    const parsed = JSON.parse(data) as StreamEventData
-
-    switch (event) {
-      case "response.output_item.added": {
-        return handleOutputItemAdded(parsed, tracker)
-      }
-      case "response.output_item.done": {
-        return handleOutputItemDone(parsed, tracker)
-      }
-      case "response.content_part.added": {
-        return handleContentPartAdded(parsed, tracker)
-      }
-      case "response.content_part.done": {
-        return handleContentPartDone(parsed, tracker)
-      }
-      case "response.output_text.delta":
-      case "response.output_text.done": {
-        return handleOutputText(parsed, tracker)
-      }
-      case "response.completed":
-      case "response.incomplete": {
-        return handleResponseCompleted(parsed)
-      }
-      default: {
-        return data
-      }
+  const parsed = JSON.parse(data) as ResponseStreamEvent
+  switch (event) {
+    case "response.output_item.added": {
+      return handleOutputItemAdded(
+        parsed as ResponseOutputItemAddedEvent,
+        tracker,
+      )
+    }
+    case "response.output_item.done": {
+      return handleOutputItemDone(
+        parsed as ResponseOutputItemDoneEvent,
+        tracker,
+      )
+    }
+    default: {
+      return handleItemId(parsed, tracker)
     }
-  } catch {
-    return data
   }
 }
 
 const handleOutputItemAdded = (
-  parsed: StreamEventData,
+  parsed: ResponseOutputItemAddedEvent,
   tracker: StreamIdTracker,
 ): string => {
-  if (!parsed.item?.id) return JSON.stringify(parsed)
+  if (!parsed.item.id) {
+    let randomSuffix = ""
+    while (randomSuffix.length < 16) {
+      randomSuffix += Math.random().toString(36).slice(2)
+    }
+    parsed.item.id = `oi_${parsed.output_index}_${randomSuffix.slice(0, 16)}`
+  }
 
-  const outputIndex = parsed.output_index ?? 0
+  const outputIndex = parsed.output_index
   tracker.outputItems.set(outputIndex, parsed.item.id)
-
-  if (parsed.item.type === "message") {
-    tracker.messageItems.set(outputIndex, parsed.item.id)
-  }
-  if (
-    parsed.item.type === "reasoning"
-    && Array.isArray(parsed.item.summary)
-    && parsed.item.summary.length === 0
-  ) {
-    delete parsed.item.summary
-  }
   return JSON.stringify(parsed)
 }
 
 const handleOutputItemDone = (
-  parsed: StreamEventData,
+  parsed: ResponseOutputItemDoneEvent,
   tracker: StreamIdTracker,
 ): string => {
-  if (!parsed.item) return JSON.stringify(parsed)
-
-  const outputIndex = parsed.output_index ?? 0
+  const outputIndex = parsed.output_index
   const originalId = tracker.outputItems.get(outputIndex)
   if (originalId) {
     parsed.item.id = originalId
   }
-  if (
-    parsed.item.type === "reasoning"
-    && Array.isArray(parsed.item.summary)
-    && parsed.item.summary.length === 0
-  ) {
-    delete parsed.item.summary
-  }
   return JSON.stringify(parsed)
 }
 
-const handleContentPartAdded = (
-  parsed: StreamEventData,
+const handleItemId = (
+  parsed: ResponseStreamEvent & { output_index?: number; item_id?: string },
   tracker: StreamIdTracker,
 ): string => {
-  const outputIndex = parsed.output_index ?? 0
-  const contentIndex = parsed.content_index ?? 0
-  const key = `${outputIndex}:${contentIndex}`
-
-  if (parsed.item_id) {
-    tracker.contentParts.set(key, parsed.item_id)
-  }
-
-  const messageId = tracker.messageItems.get(outputIndex)
-  if (messageId) {
-    parsed.item_id = messageId
-  }
-  return JSON.stringify(parsed)
-}
-
-const handleContentPartDone = (
-  parsed: StreamEventData,
-  tracker: StreamIdTracker,
-): string => {
-  const outputIndex = parsed.output_index ?? 0
-  const contentIndex = parsed.content_index ?? 0
-  const key = `${outputIndex}:${contentIndex}`
-
-  const messageId = tracker.messageItems.get(outputIndex)
-  if (messageId) {
-    parsed.item_id = messageId
-  } else {
-    const originalItemId = tracker.contentParts.get(key)
-    if (originalItemId) {
-      parsed.item_id = originalItemId
-    }
-  }
-
-  tracker.contentParts.delete(key)
-  return JSON.stringify(parsed)
-}
-
-const handleOutputText = (
-  parsed: StreamEventData,
-  tracker: StreamIdTracker,
-): string => {
-  const outputIndex = parsed.output_index ?? 0
-  const messageId = tracker.messageItems.get(outputIndex)
-  if (messageId) {
-    parsed.item_id = messageId
-  }
-  return JSON.stringify(parsed)
-}
-
-const handleResponseCompleted = (parsed: StreamEventData): string => {
-  if (!parsed.response?.output) return JSON.stringify(parsed)
-
-  for (const item of parsed.response.output) {
-    if (
-      item.type === "reasoning"
-      && Array.isArray(item.summary)
-      && item.summary.length === 0
-    ) {
-      delete item.summary
+  const outputIndex = parsed.output_index
+  if (outputIndex !== undefined) {
+    const itemId = tracker.outputItems.get(outputIndex)
+    if (itemId) {
+      parsed.item_id = itemId
     }
   }
   return JSON.stringify(parsed)

From bc205a6a2f7efb68dcb38e3504798fb4a3b166e9 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Tue, 20 Jan 2026 18:32:18 +0800
Subject: [PATCH 30/62] fix: update mergeToolResultForClaude to handle opencode
 request

---
 src/routes/messages/handler.ts | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index e71d00ee9..f58df06a9 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -59,7 +59,8 @@ export async function handleCompletion(c: Context) {
   // Merge tool_result and text blocks into tool_result to avoid consuming premium requests
   // (caused by skill invocations, edit hooks, plan or to do reminders)
   // e.g. {"role":"user","content":[{"type":"tool_result","content":"Launching skill: xxx"},{"type":"text","text":"xxx"}]}
-  mergeToolResultForClaude(anthropicBeta, anthropicPayload)
+  // not only for claude, but also for opencode
+  mergeToolResultForClaude(anthropicPayload)
 
   const useResponsesApi = shouldUseResponsesApi(anthropicPayload.model)
 
@@ -258,11 +259,8 @@ const mergeContentWithTexts = (
 }
 
 const mergeToolResultForClaude = (
-  anthropicBeta: string | undefined,
   anthropicPayload: AnthropicMessagesPayload,
 ): void => {
-  if (!anthropicBeta) return
-
   for (const msg of anthropicPayload.messages) {
     if (msg.role !== "user" || !Array.isArray(msg.content)) continue
 

From 6e93cfc125b2e611310bcf041cddc34a554506cc Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Wed, 21 Jan 2026 22:06:53 +0800
Subject: [PATCH 31/62] fix: add default thinking text for opencode
 compatibility in response blocks

---
 .../messages/responses-stream-translation.ts    | 17 ++++++++++++++++-
 src/routes/messages/responses-translation.ts    | 15 +++++++++------
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
index 5fa043c91..ab5ae7ef1 100644
--- a/src/routes/messages/responses-stream-translation.ts
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -17,7 +17,10 @@ import {
 } from "~/services/copilot/create-responses"
 
 import { type AnthropicStreamEventData } from "./anthropic-types"
-import { translateResponsesResultToAnthropic } from "./responses-translation"
+import {
+  THINKING_TEXT,
+  translateResponsesResultToAnthropic,
+} from "./responses-translation"
 
 const MAX_CONSECUTIVE_FUNCTION_CALL_WHITESPACE = 20
 
@@ -198,6 +201,18 @@ const handleOutputItemDone = (
   const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)
   const signature = (item.encrypted_content ?? "") + "@" + item.id
   if (signature) {
+    // Compatible with opencode, it will filter out blocks where the thinking text is empty, so we add a default thinking text here
+    if (!item.summary || item.summary.length === 0) {
+      events.push({
+        type: "content_block_delta",
+        index: blockIndex,
+        delta: {
+          type: "thinking_delta",
+          thinking: THINKING_TEXT,
+        },
+      })
+    }
+
     events.push({
       type: "content_block_delta",
       index: blockIndex,
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index 332e7e3c9..04a4f9270 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -45,6 +45,8 @@ import {
 
 const MESSAGE_TYPE = "message"
 
+export const THINKING_TEXT = "Thinking..."
+
 export const translateAnthropicMessagesToResponsesPayload = (
   payload: AnthropicMessagesPayload,
 ): ResponsesPayload => {
@@ -250,15 +252,11 @@ const createReasoningContent = (
   const array = block.signature.split("@")
   const signature = array[0]
   const id = array[1]
+  const thinking = block.thinking === THINKING_TEXT ? "" : block.thinking
   return {
     id,
     type: "reasoning",
-    summary: [
-      {
-        type: "summary_text",
-        text: block.thinking,
-      },
-    ],
+    summary: thinking ? [{ type: "summary_text", text: thinking }] : [],
     encrypted_content: signature,
   }
 }
@@ -470,6 +468,11 @@ const extractReasoningText = (item: ResponseOutputReasoning): string => {
     }
   }
 
+  // Compatible with opencode, it will filter out blocks where the thinking text is empty, so we add a default thinking text here
+  if (!item.summary || item.summary.length === 0) {
+    return THINKING_TEXT
+  }
+
   collectFromBlocks(item.summary)
 
   return segments.join("").trim()

From 7e16a65a4df8b13c81ab1e363d895a06aff27b27 Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Sun, 25 Jan 2026 14:53:52 +0800
Subject: [PATCH 32/62] feat: support messages-api

---
 src/routes/messages/handler.ts          | 47 ++++++++++++++++--
 src/services/copilot/create-messages.ts | 64 +++++++++++++++++++++++++
 2 files changed, 108 insertions(+), 3 deletions(-)
 create mode 100644 src/services/copilot/create-messages.ts

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index f58df06a9..0ff850a52 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -22,6 +22,7 @@ import {
   type ChatCompletionChunk,
   type ChatCompletionResponse,
 } from "~/services/copilot/create-chat-completions"
+import { createMessages } from "~/services/copilot/create-messages"
 import {
   createResponses,
   type ResponsesResult,
@@ -51,6 +52,7 @@ export async function handleCompletion(c: Context) {
   // fix claude code 2.0.28+ warmup request consume premium request, forcing small model if no tools are used
   // set "CLAUDE_CODE_SUBAGENT_MODEL": "you small model" also can avoid this
   const anthropicBeta = c.req.header("anthropic-beta")
+  logger.debug("Anthropic Beta header:", anthropicBeta)
   const noTools = !anthropicPayload.tools || anthropicPayload.tools.length === 0
   if (anthropicBeta && noTools) {
     anthropicPayload.model = getSmallModel()
@@ -62,13 +64,15 @@ export async function handleCompletion(c: Context) {
   // not only for claude, but also for opencode
   mergeToolResultForClaude(anthropicPayload)
 
-  const useResponsesApi = shouldUseResponsesApi(anthropicPayload.model)
-
   if (state.manualApprove) {
     await awaitApproval()
   }
 
-  if (useResponsesApi) {
+  if (shouldUseMessagesApi(anthropicPayload.model)) {
+    return await handleWithMessagesApi(c, anthropicPayload, anthropicBeta)
+  }
+
+  if (shouldUseResponsesApi(anthropicPayload.model)) {
     return await handleWithResponsesApi(c, anthropicPayload)
   }
 
@@ -76,6 +80,7 @@ export async function handleCompletion(c: Context) {
 }
 
 const RESPONSES_ENDPOINT = "/responses"
+const MESSAGES_ENDPOINT = "/v1/messages"
 
 const handleWithChatCompletions = async (
   c: Context,
@@ -219,6 +224,35 @@ const handleWithResponsesApi = async (
   return c.json(anthropicResponse)
 }
 
+const handleWithMessagesApi = async (
+  c: Context,
+  anthropicPayload: AnthropicMessagesPayload,
+  anthropicBetaHeader?: string,
+) => {
+  const response = await createMessages(anthropicPayload, anthropicBetaHeader)
+
+  if (isAsyncIterable(response)) {
+    logger.debug("Streaming response from Copilot (Messages API)")
+    return streamSSE(c, async (stream) => {
+      for await (const event of response) {
+        const eventName = event.event
+        const data = event.data ?? ""
+        logger.debug("Messages raw stream event:", data)
+        await stream.writeSSE({
+          event: eventName,
+          data,
+        })
+      }
+    })
+  }
+
+  logger.debug(
+    "Non-streaming Messages result:",
+    JSON.stringify(response).slice(-400),
+  )
+  return c.json(response)
+}
+
 const shouldUseResponsesApi = (modelId: string): boolean => {
   const selectedModel = state.models?.data.find((model) => model.id === modelId)
   return (
@@ -226,6 +260,13 @@ const shouldUseResponsesApi = (modelId: string): boolean => {
   )
 }
 
+const shouldUseMessagesApi = (modelId: string): boolean => {
+  const selectedModel = state.models?.data.find((model) => model.id === modelId)
+  return (
+    selectedModel?.supported_endpoints?.includes(MESSAGES_ENDPOINT) ?? false
+  )
+}
+
 const isNonStreaming = (
   response: Awaited<ReturnType<typeof createChatCompletions>>,
 ): response is ChatCompletionResponse => Object.hasOwn(response, "choices")
diff --git a/src/services/copilot/create-messages.ts b/src/services/copilot/create-messages.ts
new file mode 100644
index 000000000..d6e349729
--- /dev/null
+++ b/src/services/copilot/create-messages.ts
@@ -0,0 +1,64 @@
+import consola from "consola"
+import { events } from "fetch-event-stream"
+
+import type {
+  AnthropicMessagesPayload,
+  AnthropicResponse,
+} from "~/routes/messages/anthropic-types"
+
+import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config"
+import { HTTPError } from "~/lib/error"
+import { state } from "~/lib/state"
+
+export type MessagesStream = ReturnType<typeof events>
+export type CreateMessagesReturn = AnthropicResponse | MessagesStream
+
+export const createMessages = async (
+  payload: AnthropicMessagesPayload,
+  anthropicBetaHeader?: string,
+): Promise<CreateMessagesReturn> => {
+  if (!state.copilotToken) throw new Error("Copilot token not found")
+
+  const enableVision = payload.messages.some(
+    (message) =>
+      Array.isArray(message.content)
+      && message.content.some((block) => block.type === "image"),
+  )
+
+  let isInitiateRequest = false
+  const lastMessage = payload.messages.at(-1)
+  if (lastMessage?.role === "user") {
+    isInitiateRequest =
+      Array.isArray(lastMessage.content) ?
+        lastMessage.content.some((block) => block.type !== "tool_result")
+      : true
+  }
+
+  const headers: Record<string, string> = {
+    ...copilotHeaders(state, enableVision),
+    "X-Initiator": isInitiateRequest ? "user" : "agent",
+  }
+
+  if (anthropicBetaHeader) {
+    headers["anthropic-beta"] = anthropicBetaHeader
+  } else if (payload.thinking?.budget_tokens) {
+    headers["anthropic-beta"] = "interleaved-thinking-2025-05-14"
+  }
+
+  const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
+    method: "POST",
+    headers,
+    body: JSON.stringify(payload),
+  })
+
+  if (!response.ok) {
+    consola.error("Failed to create messages", response)
+    throw new HTTPError("Failed to create messages", response)
+  }
+
+  if (payload.stream) {
+    return events(response)
+  }
+
+  return (await response.json()) as AnthropicResponse
+}

From de424c1b1767fca374ca0a3f690b5f1cc95aa5b1 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Wed, 28 Jan 2026 11:57:50 +0800
Subject: [PATCH 33/62] feat: add compact model usage configuration and
 detection

---
 src/lib/config.ts              |  7 ++++++
 src/routes/messages/handler.ts | 44 ++++++++++++++++++++++++++++------
 2 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/src/lib/config.ts b/src/lib/config.ts
index e44953852..fe25a2c80 100644
--- a/src/lib/config.ts
+++ b/src/lib/config.ts
@@ -11,6 +11,7 @@ export interface AppConfig {
     "none" | "minimal" | "low" | "medium" | "high" | "xhigh"
   >
   useFunctionApplyPatch?: boolean
+  compactUseSmallModel?: boolean
 }
 
 const gpt5ExplorationPrompt = `## Exploration and reading files
@@ -30,6 +31,7 @@ const defaultConfig: AppConfig = {
     "gpt-5-mini": "low",
   },
   useFunctionApplyPatch: true,
+  compactUseSmallModel: true,
 }
 
 let cachedConfig: AppConfig | null = null
@@ -142,3 +144,8 @@ export function getReasoningEffortForModel(
   const config = getConfig()
   return config.modelReasoningEfforts?.[model] ?? "high"
 }
+
+export function shouldCompactUseSmallModel(): boolean {
+  const config = getConfig()
+  return config.compactUseSmallModel ?? true
+}
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 0ff850a52..7e8fb0ea8 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -3,7 +3,7 @@ import type { Context } from "hono"
 import { streamSSE } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
-import { getSmallModel } from "~/lib/config"
+import { getSmallModel, shouldCompactUseSmallModel } from "~/lib/config"
 import { createHandlerLogger } from "~/lib/logger"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
@@ -43,26 +43,40 @@ import { translateChunkToAnthropicEvents } from "./stream-translation"
 
 const logger = createHandlerLogger("messages-handler")
 
+const compactSystemPromptStart =
+  "You are a helpful AI assistant tasked with summarizing conversations"
+
 export async function handleCompletion(c: Context) {
   await checkRateLimit(state)
 
   const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
   logger.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
 
+  // claude code and opencode compact request detection
+  const isCompact = isCompactRequest(anthropicPayload)
+
   // fix claude code 2.0.28+ warmup request consume premium request, forcing small model if no tools are used
   // set "CLAUDE_CODE_SUBAGENT_MODEL": "you small model" also can avoid this
   const anthropicBeta = c.req.header("anthropic-beta")
   logger.debug("Anthropic Beta header:", anthropicBeta)
   const noTools = !anthropicPayload.tools || anthropicPayload.tools.length === 0
-  if (anthropicBeta && noTools) {
+  if (anthropicBeta && noTools && !isCompact) {
     anthropicPayload.model = getSmallModel()
   }
 
-  // Merge tool_result and text blocks into tool_result to avoid consuming premium requests
-  // (caused by skill invocations, edit hooks, plan or to do reminders)
-  // e.g. {"role":"user","content":[{"type":"tool_result","content":"Launching skill: xxx"},{"type":"text","text":"xxx"}]}
-  // not only for claude, but also for opencode
-  mergeToolResultForClaude(anthropicPayload)
+  if (isCompact) {
+    logger.debug("Is compact request:", isCompact)
+    if (shouldCompactUseSmallModel()) {
+      anthropicPayload.model = getSmallModel()
+    }
+  } else {
+    // Merge tool_result and text blocks into tool_result to avoid consuming premium requests
+    // (caused by skill invocations, edit hooks, plan or to do reminders)
+    // e.g. {"role":"user","content":[{"type":"tool_result","content":"Launching skill: xxx"},{"type":"text","text":"xxx"}]}
+    // not only for claude, but also for opencode
+    // compact requests are excluded from this processing
+    mergeToolResultForClaude(anthropicPayload)
+  }
 
   if (state.manualApprove) {
     await awaitApproval()
@@ -275,6 +289,22 @@ const isAsyncIterable = <T>(value: unknown): value is AsyncIterable<T> =>
   Boolean(value)
   && typeof (value as AsyncIterable<T>)[Symbol.asyncIterator] === "function"
 
+const isCompactRequest = (
+  anthropicPayload: AnthropicMessagesPayload,
+): boolean => {
+  const system = anthropicPayload.system
+  if (typeof system === "string") {
+    return system.startsWith(compactSystemPromptStart)
+  }
+  if (!Array.isArray(system)) return false
+
+  return system.some(
+    (msg) =>
+      typeof msg.text === "string"
+      && msg.text.startsWith(compactSystemPromptStart),
+  )
+}
+
 const mergeContentWithText = (
   tr: AnthropicToolResultBlock,
   textBlock: AnthropicTextBlock,

From de08ef3f115de4ea0e7f2e6088e7133bcc20854d Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Wed, 28 Jan 2026 12:04:23 +0800
Subject: [PATCH 34/62] feat: filter valid thinking blocks for Claude models in
 Messages API

---
 src/routes/messages/handler.ts | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 7e8fb0ea8..577ba16ae 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -243,6 +243,22 @@ const handleWithMessagesApi = async (
   anthropicPayload: AnthropicMessagesPayload,
   anthropicBetaHeader?: string,
 ) => {
+  // Pre-request processing: filter thinking blocks for Claude models so only
+  // valid thinking blocks are sent to the Copilot Messages API.
+  for (const msg of anthropicPayload.messages) {
+    if (msg.role === "assistant" && Array.isArray(msg.content)) {
+      msg.content = msg.content.filter((block) => {
+        if (block.type !== "thinking") return true
+        return (
+          block.thinking
+          && block.thinking !== "Thinking..."
+          && block.signature
+          && !block.signature.includes("@")
+        )
+      })
+    }
+  }
+
   const response = await createMessages(anthropicPayload, anthropicBetaHeader)
 
   if (isAsyncIterable(response)) {

From 3c12f580bf4d269ab18838bcc259a89719f8a2cd Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Fri, 30 Jan 2026 14:49:00 +0800
Subject: [PATCH 35/62] feat: remove web_search tool in responses payload as
 it's not supported by copilot

---
 src/routes/responses/handler.ts | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
index 4e75f7c8f..24659d912 100644
--- a/src/routes/responses/handler.ts
+++ b/src/routes/responses/handler.ts
@@ -28,6 +28,9 @@ export const handleResponses = async (c: Context) => {
 
   useFunctionApplyPatch(payload)
 
+  // Remove web_search tool as it's not supported by GitHub Copilot
+  removeWebSearchTool(payload)
+
   const selectedModel = state.models?.data.find(
     (model) => model.id === payload.model,
   )
@@ -123,3 +126,11 @@ const useFunctionApplyPatch = (payload: ResponsesPayload): void => {
     }
   }
 }
+
+const removeWebSearchTool = (payload: ResponsesPayload): void => {
+  if (!Array.isArray(payload.tools) || payload.tools.length === 0) return
+
+  payload.tools = payload.tools.filter((t) => {
+    return t.type !== "web_search"
+  })
+}

From c2d0e6aeb1e73bb55c249ee90500efed713ba16c Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Sat, 31 Jan 2026 15:30:03 +0800
Subject: [PATCH 36/62] docs: improve configuration examples and Claude Code
 settings.json examples

---
 README.md | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 8f4926689..e7fcc243e 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ A reverse-engineered proxy for the GitHub Copilot API that exposes it as an Open
 
 ## Features
 
-- **OpenAI & Anthropic Compatibility**: Exposes GitHub Copilot as an OpenAI-compatible (`/v1/chat/completions`, `/v1/models`, `/v1/embeddings`) and Anthropic-compatible (`/v1/messages`) API.
+- **OpenAI & Anthropic Compatibility**: Exposes GitHub Copilot as an OpenAI-compatible (`/v1/responses`, `/v1/chat/completions`, `/v1/models`, `/v1/embeddings`) and Anthropic-compatible (`/v1/messages`) API.
 - **Claude Code Integration**: Easily configure and launch [Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview) to use Copilot as its backend with a simple command-line flag (`--claude-code`).
 - **Usage Dashboard**: A web-based dashboard to monitor your Copilot API usage, view quotas, and see detailed statistics.
 - **Rate Limit Control**: Manage API usage with rate-limiting options (`--rate-limit`) and a waiting mechanism (`--wait`) to prevent errors from rapid requests.
@@ -190,12 +190,16 @@ The following command line options are available for the `start` command:
     "smallModel": "gpt-5-mini",
     "modelReasoningEfforts": {
       "gpt-5-mini": "low"
-    }
+    },
+    "useFunctionApplyPatch": true,
+    "compactUseSmallModel": true
   }
   ```
 - **extraPrompts:** Map of `model -> prompt` appended to the first system prompt when translating Anthropic-style requests to Copilot. Use this to inject guardrails or guidance per model. Missing default entries are auto-added without overwriting your custom prompts.
 - **smallModel:** Fallback model used for tool-less warmup messages (e.g., Claude Code probe requests) to avoid spending premium requests; defaults to `gpt-5-mini`.
 - **modelReasoningEfforts:** Per-model `reasoning.effort` sent to the Copilot Responses API. Allowed values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. If a model isn’t listed, `high` is used by default.
+- **useFunctionApplyPatch:** When `true`, the server will convert any custom tool named `apply_patch` in Responses payloads into an OpenAI-style function tool (`type: "function"`) with a parameter schema so assistants can call it using function-calling semantics to edit files. Set to `false` to leave tools unchanged. Defaults to `true`.
+- **compactUseSmallModel:** When `true`, detected "compact" requests (e.g., from Claude Code or Opencode compact mode) will automatically use the configured `smallModel` to avoid consuming premium model usage for short/background tasks. Defaults to `true`.
 
 Edit this file to customize prompts or swap in your own fast model. Restart the server (or rerun the command) after changes so the cached config is refreshed.
 
@@ -330,12 +334,15 @@ Here is an example `.claude/settings.json` file:
   "env": {
     "ANTHROPIC_BASE_URL": "http://localhost:4141",
     "ANTHROPIC_AUTH_TOKEN": "dummy",
-    "ANTHROPIC_MODEL": "gpt-4.1",
-    "ANTHROPIC_DEFAULT_SONNET_MODEL": "gpt-4.1",
-    "ANTHROPIC_SMALL_FAST_MODEL": "gpt-4.1",
-    "ANTHROPIC_DEFAULT_HAIKU_MODEL": "gpt-4.1",
+    "ANTHROPIC_MODEL": "gpt-5.2",
+    "ANTHROPIC_DEFAULT_SONNET_MODEL": "gpt-5.2",
+    "ANTHROPIC_DEFAULT_HAIKU_MODEL": "gpt-5-mini",
+    "CLAUDE_CODE_SUBAGENT_MODEL": "gpt-5-mini",
     "DISABLE_NON_ESSENTIAL_MODEL_CALLS": "1",
-    "CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1"
+    "CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1",
+    "BASH_MAX_TIMEOUT_MS": "600000",
+    "CLAUDE_CODE_ATTRIBUTION_HEADER": "0",
+    "CLAUDE_CODE_ENABLE_PROMPT_SUGGESTION": "false"
   },
   "permissions": {
     "deny": [

From f64c2c615be8f387af592892daeed01eabf2c6ab Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Thu, 5 Feb 2026 10:38:41 +0800
Subject: [PATCH 37/62] feat: update vscode and copilot versions, and refine
 anthropic-beta header handling

---
 src/lib/api-config.ts                   |  6 +++---
 src/services/copilot/create-messages.ts | 10 +++++++++-
 src/services/get-vscode-version.ts      |  2 +-
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts
index 83bce92ad..fbef3d5d1 100644
--- a/src/lib/api-config.ts
+++ b/src/lib/api-config.ts
@@ -7,11 +7,11 @@ export const standardHeaders = () => ({
   accept: "application/json",
 })
 
-const COPILOT_VERSION = "0.26.7"
+const COPILOT_VERSION = "0.37.0"
 const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}`
 const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}`
 
-const API_VERSION = "2025-04-01"
+const API_VERSION = "2025-10-01"
 
 export const copilotBaseUrl = (state: State) =>
   state.accountType === "individual" ?
@@ -25,7 +25,7 @@ export const copilotHeaders = (state: State, vision: boolean = false) => {
     "editor-version": `vscode/${state.vsCodeVersion}`,
     "editor-plugin-version": EDITOR_PLUGIN_VERSION,
     "user-agent": USER_AGENT,
-    "openai-intent": "conversation-panel",
+    "openai-intent": "conversation-agent",
     "x-github-api-version": API_VERSION,
     "x-request-id": randomUUID(),
     "x-vscode-user-agent-library-version": "electron-fetch",
diff --git a/src/services/copilot/create-messages.ts b/src/services/copilot/create-messages.ts
index d6e349729..115215a94 100644
--- a/src/services/copilot/create-messages.ts
+++ b/src/services/copilot/create-messages.ts
@@ -40,7 +40,15 @@ export const createMessages = async (
   }
 
   if (anthropicBetaHeader) {
-    headers["anthropic-beta"] = anthropicBetaHeader
+    // align with vscode copilot extension anthropic-beta
+    const filteredBeta = anthropicBetaHeader
+      .split(",")
+      .map((item) => item.trim())
+      .filter((item) => item !== "claude-code-20250219")
+      .join(",")
+    if (filteredBeta) {
+      headers["anthropic-beta"] = filteredBeta
+    }
   } else if (payload.thinking?.budget_tokens) {
     headers["anthropic-beta"] = "interleaved-thinking-2025-05-14"
   }
diff --git a/src/services/get-vscode-version.ts b/src/services/get-vscode-version.ts
index 6078f09b5..709d8a0da 100644
--- a/src/services/get-vscode-version.ts
+++ b/src/services/get-vscode-version.ts
@@ -1,4 +1,4 @@
-const FALLBACK = "1.104.3"
+const FALLBACK = "1.109.0"
 
 export async function getVSCodeVersion() {
   const controller = new AbortController()

From e2c437d49c8304183e7fe088b35adb4263e2fddf Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Sat, 7 Feb 2026 14:17:09 +0800
Subject: [PATCH 38/62] feat: opus4.6 thinking adaptive

---
 src/routes/messages/anthropic-types.ts |  5 ++++-
 src/routes/messages/handler.ts         | 11 +++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
index 5aa528552..0f75cfc5c 100644
--- a/src/routes/messages/anthropic-types.ts
+++ b/src/routes/messages/anthropic-types.ts
@@ -19,10 +19,13 @@ export interface AnthropicMessagesPayload {
     name?: string
   }
   thinking?: {
-    type: "enabled"
+    type: "enabled" | "adaptive"
     budget_tokens?: number
   }
   service_tier?: "auto" | "standard_only"
+  output_config?: {
+    effort?: "low" | "medium" | "high" | "max"
+  }
 }
 
 export interface AnthropicTextBlock {
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 577ba16ae..712adfc7f 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -259,6 +259,17 @@ const handleWithMessagesApi = async (
     }
   }
 
+  if (anthropicPayload.model === "claude-opus-4.6") {
+    anthropicPayload.thinking = {
+      type: "adaptive",
+    }
+    anthropicPayload.output_config = {
+      effort: "max",
+    }
+  }
+
+  logger.debug("Translated Messages payload:", JSON.stringify(anthropicPayload))
+
   const response = await createMessages(anthropicPayload, anthropicBetaHeader)
 
   if (isAsyncIterable(response)) {

From d0fb055e48b0c7dd7a394248fb52b0e0b66786d7 Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Sat, 7 Feb 2026 21:03:53 +0800
Subject: [PATCH 39/62] feat: enhance model capabilities with adaptive thinking
 support and update API handling

---
 src/routes/messages/handler.ts     | 45 ++++++++++++++++++++++--------
 src/services/copilot/get-models.ts |  3 ++
 2 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 712adfc7f..acf3af2e9 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -2,8 +2,14 @@ import type { Context } from "hono"
 
 import { streamSSE } from "hono/streaming"
 
+import type { Model } from "~/services/copilot/get-models"
+
 import { awaitApproval } from "~/lib/approval"
-import { getSmallModel, shouldCompactUseSmallModel } from "~/lib/config"
+import {
+  getSmallModel,
+  shouldCompactUseSmallModel,
+  getReasoningEffortForModel,
+} from "~/lib/config"
 import { createHandlerLogger } from "~/lib/logger"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
@@ -82,11 +88,18 @@ export async function handleCompletion(c: Context) {
     await awaitApproval()
   }
 
-  if (shouldUseMessagesApi(anthropicPayload.model)) {
-    return await handleWithMessagesApi(c, anthropicPayload, anthropicBeta)
+  const selectedModel = state.models?.data.find(
+    (m) => m.id === anthropicPayload.model,
+  )
+
+  if (shouldUseMessagesApi(selectedModel)) {
+    return await handleWithMessagesApi(c, anthropicPayload, {
+      anthropicBetaHeader: anthropicBeta,
+      selectedModel,
+    })
   }
 
-  if (shouldUseResponsesApi(anthropicPayload.model)) {
+  if (shouldUseResponsesApi(selectedModel)) {
     return await handleWithResponsesApi(c, anthropicPayload)
   }
 
@@ -241,8 +254,9 @@ const handleWithResponsesApi = async (
 const handleWithMessagesApi = async (
   c: Context,
   anthropicPayload: AnthropicMessagesPayload,
-  anthropicBetaHeader?: string,
+  options?: { anthropicBetaHeader?: string; selectedModel?: Model },
 ) => {
+  const { anthropicBetaHeader, selectedModel } = options ?? {}
   // Pre-request processing: filter thinking blocks for Claude models so only
   // valid thinking blocks are sent to the Copilot Messages API.
   for (const msg of anthropicPayload.messages) {
@@ -259,12 +273,12 @@ const handleWithMessagesApi = async (
     }
   }
 
-  if (anthropicPayload.model === "claude-opus-4.6") {
+  if (selectedModel?.capabilities.supports.adaptive_thinking) {
     anthropicPayload.thinking = {
       type: "adaptive",
     }
     anthropicPayload.output_config = {
-      effort: "max",
+      effort: getAnthropicEffortForModel(anthropicPayload.model),
     }
   }
 
@@ -294,15 +308,13 @@ const handleWithMessagesApi = async (
   return c.json(response)
 }
 
-const shouldUseResponsesApi = (modelId: string): boolean => {
-  const selectedModel = state.models?.data.find((model) => model.id === modelId)
+const shouldUseResponsesApi = (selectedModel: Model | undefined): boolean => {
   return (
     selectedModel?.supported_endpoints?.includes(RESPONSES_ENDPOINT) ?? false
   )
 }
 
-const shouldUseMessagesApi = (modelId: string): boolean => {
-  const selectedModel = state.models?.data.find((model) => model.id === modelId)
+const shouldUseMessagesApi = (selectedModel: Model | undefined): boolean => {
   return (
     selectedModel?.supported_endpoints?.includes(MESSAGES_ENDPOINT) ?? false
   )
@@ -316,6 +328,17 @@ const isAsyncIterable = <T>(value: unknown): value is AsyncIterable<T> =>
   Boolean(value)
   && typeof (value as AsyncIterable<T>)[Symbol.asyncIterator] === "function"
 
+const getAnthropicEffortForModel = (
+  model: string,
+): "low" | "medium" | "high" | "max" => {
+  const reasoningEffort = getReasoningEffortForModel(model)
+
+  if (reasoningEffort === "xhigh") return "max"
+  if (reasoningEffort === "none" || reasoningEffort === "minimal") return "low"
+
+  return reasoningEffort
+}
+
 const isCompactRequest = (
   anthropicPayload: AnthropicMessagesPayload,
 ): boolean => {
diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts
index 3690ad3f5..cf3f184b5 100644
--- a/src/services/copilot/get-models.ts
+++ b/src/services/copilot/get-models.ts
@@ -25,12 +25,15 @@ interface ModelLimits {
 }
 
 interface ModelSupports {
+  max_thinking_budget?: number
+  min_thinking_budget?: number
   tool_calls?: boolean
   parallel_tool_calls?: boolean
   dimensions?: boolean
   streaming?: boolean
   structured_outputs?: boolean
   vision?: boolean
+  adaptive_thinking?: boolean
 }
 
 interface ModelCapabilities {

From 7b3e7395c1a4dc13b9b0ad8bdebb03f3d744c5cb Mon Sep 17 00:00:00 2001
From: cpf <397649079@qq.com>
Date: Sun, 8 Feb 2026 21:02:42 +0800
Subject: [PATCH 40/62] fix(stream): send valid JSON in SSE ping events to
 prevent AI_JSONParseError

@ai-sdk/anthropic runs JSON.parse on all SSE data fields (only skips
[DONE]). Sending empty string as ping data caused Unexpected EOF errors,
especially visible with gpt-5.2 via /v1/messages. Changed ping data from
empty string to '{"type":"ping"}' matching Anthropic's expected schema.
---
 src/routes/messages/handler.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index acf3af2e9..1273c10cc 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -192,7 +192,7 @@ const handleWithResponsesApi = async (
       for await (const chunk of response) {
         const eventName = chunk.event
         if (eventName === "ping") {
-          await stream.writeSSE({ event: "ping", data: "" })
+          await stream.writeSSE({ event: "ping", data: '{"type":"ping"}' })
           continue
         }
 

From 4bcbffb80bb708ae4520bc92e9d3514f4969a847 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Wed, 11 Feb 2026 11:47:19 +0800
Subject: [PATCH 41/62] feat: update vscode and copilot version

---
 src/lib/api-config.ts              | 2 +-
 src/services/get-vscode-version.ts | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts
index fbef3d5d1..2f7ecbf5d 100644
--- a/src/lib/api-config.ts
+++ b/src/lib/api-config.ts
@@ -7,7 +7,7 @@ export const standardHeaders = () => ({
   accept: "application/json",
 })
 
-const COPILOT_VERSION = "0.37.0"
+const COPILOT_VERSION = "0.37.4"
 const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}`
 const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}`
 
diff --git a/src/services/get-vscode-version.ts b/src/services/get-vscode-version.ts
index 709d8a0da..7cda4f7a2 100644
--- a/src/services/get-vscode-version.ts
+++ b/src/services/get-vscode-version.ts
@@ -1,4 +1,4 @@
-const FALLBACK = "1.109.0"
+const FALLBACK = "1.109.2"
 
 export async function getVSCodeVersion() {
   const controller = new AbortController()
@@ -19,6 +19,9 @@ export async function getVSCodeVersion() {
     const match = pkgbuild.match(pkgverRegex)
 
     if (match) {
+      if (match[1] === "1.109.0") {
+        return FALLBACK
+      }
       return match[1]
     }
 

From fae8adced444d68e55e3d3970150665851fefa54 Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Wed, 11 Feb 2026 23:23:25 +0800
Subject: [PATCH 42/62] feat(auth): implement API key authentication middleware
 and update README

---
 README.md               |  20 ++++++++
 src/lib/config.ts       |   6 +++
 src/lib/request-auth.ts | 101 ++++++++++++++++++++++++++++++++++++++++
 src/server.ts           |   2 +
 4 files changed, 129 insertions(+)
 create mode 100644 src/lib/request-auth.ts

diff --git a/README.md b/README.md
index e7fcc243e..1fec451a2 100644
--- a/README.md
+++ b/README.md
@@ -183,6 +183,9 @@ The following command line options are available for the `start` command:
 - **Default shape:**
   ```json
   {
+    "auth": {
+      "apiKeys": []
+    },
     "extraPrompts": {
       "gpt-5-mini": "<built-in exploration prompt>",
       "gpt-5.1-codex-max": "<built-in exploration prompt>"
@@ -195,6 +198,7 @@ The following command line options are available for the `start` command:
     "compactUseSmallModel": true
   }
   ```
+- **auth.apiKeys:** API keys used for request authentication. Supports multiple keys for rotation. Requests can authenticate with either `x-api-key: <key>` or `Authorization: Bearer <key>`. If empty or omitted, authentication is disabled.
 - **extraPrompts:** Map of `model -> prompt` appended to the first system prompt when translating Anthropic-style requests to Copilot. Use this to inject guardrails or guidance per model. Missing default entries are auto-added without overwriting your custom prompts.
 - **smallModel:** Fallback model used for tool-less warmup messages (e.g., Claude Code probe requests) to avoid spending premium requests; defaults to `gpt-5-mini`.
 - **modelReasoningEfforts:** Per-model `reasoning.effort` sent to the Copilot Responses API. Allowed values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. If a model isn’t listed, `high` is used by default.
@@ -203,6 +207,22 @@ The following command line options are available for the `start` command:
 
 Edit this file to customize prompts or swap in your own fast model. Restart the server (or rerun the command) after changes so the cached config is refreshed.
 
+## API Authentication
+
+- **Protected routes:** All routes except `/` require authentication when `auth.apiKeys` is configured and non-empty.
+- **Allowed auth headers:**
+  - `x-api-key: <your_key>`
+  - `Authorization: Bearer <your_key>`
+- **CORS preflight:** `OPTIONS` requests are always allowed.
+- **When no keys are configured:** Server starts normally and allows requests (authentication disabled).
+
+Example request:
+
+```sh
+curl http://localhost:4141/v1/models \
+  -H "x-api-key: your_api_key"
+```
+
 ## API Endpoints
 
 The server exposes several endpoints to interact with the Copilot API. It provides OpenAI-compatible endpoints and now also includes support for Anthropic-compatible endpoints, allowing for greater flexibility with different tools and services.
diff --git a/src/lib/config.ts b/src/lib/config.ts
index fe25a2c80..69b733e18 100644
--- a/src/lib/config.ts
+++ b/src/lib/config.ts
@@ -4,6 +4,9 @@ import fs from "node:fs"
 import { PATHS } from "./paths"
 
 export interface AppConfig {
+  auth?: {
+    apiKeys?: Array<string>
+  }
   extraPrompts?: Record<string, string>
   smallModel?: string
   modelReasoningEfforts?: Record<
@@ -22,6 +25,9 @@ const gpt5ExplorationPrompt = `## Exploration and reading files
 - **Workflow:** (a) plan all needed reads → (b) issue one parallel batch → (c) analyze results → (d) repeat if new, unpredictable reads arise.`
 
 const defaultConfig: AppConfig = {
+  auth: {
+    apiKeys: [],
+  },
   extraPrompts: {
     "gpt-5-mini": gpt5ExplorationPrompt,
     "gpt-5.1-codex-max": gpt5ExplorationPrompt,
diff --git a/src/lib/request-auth.ts b/src/lib/request-auth.ts
new file mode 100644
index 000000000..d974c7a06
--- /dev/null
+++ b/src/lib/request-auth.ts
@@ -0,0 +1,101 @@
+import type { Context, MiddlewareHandler } from "hono"
+
+import consola from "consola"
+
+import { getConfig } from "./config"
+
+interface AuthMiddlewareOptions {
+  getApiKeys?: () => Array<string>
+  allowUnauthenticatedPaths?: Array<string>
+  allowOptionsBypass?: boolean
+}
+
+export function normalizeApiKeys(apiKeys: unknown): Array<string> {
+  if (!Array.isArray(apiKeys)) {
+    if (apiKeys !== undefined) {
+      consola.warn("Invalid auth.apiKeys config. Expected an array of strings.")
+    }
+    return []
+  }
+
+  const normalizedKeys = apiKeys
+    .filter((key): key is string => typeof key === "string")
+    .map((key) => key.trim())
+    .filter((key) => key.length > 0)
+
+  if (normalizedKeys.length !== apiKeys.length) {
+    consola.warn(
+      "Invalid auth.apiKeys entries found. Only non-empty strings are allowed.",
+    )
+  }
+
+  return [...new Set(normalizedKeys)]
+}
+
+export function getConfiguredApiKeys(): Array<string> {
+  const config = getConfig()
+  return normalizeApiKeys(config.auth?.apiKeys)
+}
+
+export function extractRequestApiKey(c: Context): string | null {
+  const xApiKey = c.req.header("x-api-key")?.trim()
+  if (xApiKey) {
+    return xApiKey
+  }
+
+  const authorization = c.req.header("authorization")
+  if (!authorization) {
+    return null
+  }
+
+  const [scheme, ...rest] = authorization.trim().split(/\s+/)
+  if (scheme.toLowerCase() !== "bearer") {
+    return null
+  }
+
+  const bearerToken = rest.join(" ").trim()
+  return bearerToken || null
+}
+
+function createUnauthorizedResponse(c: Context): Response {
+  c.header("WWW-Authenticate", 'Bearer realm="copilot-api"')
+  return c.json(
+    {
+      error: {
+        message: "Unauthorized",
+        type: "authentication_error",
+      },
+    },
+    401,
+  )
+}
+
+export function createAuthMiddleware(
+  options: AuthMiddlewareOptions = {},
+): MiddlewareHandler {
+  const getApiKeys = options.getApiKeys ?? getConfiguredApiKeys
+  const allowUnauthenticatedPaths = options.allowUnauthenticatedPaths ?? ["/"]
+  const allowOptionsBypass = options.allowOptionsBypass ?? true
+
+  return async (c, next) => {
+    if (allowOptionsBypass && c.req.method === "OPTIONS") {
+      return next()
+    }
+
+    if (allowUnauthenticatedPaths.includes(c.req.path)) {
+      return next()
+    }
+
+    const apiKeys = getApiKeys()
+    if (apiKeys.length === 0) {
+      return next()
+    }
+
+    const requestApiKey = extractRequestApiKey(c)
+    if (!requestApiKey || !apiKeys.includes(requestApiKey)) {
+      return createUnauthorizedResponse(c)
+    }
+
+    return next()
+  }
+}
diff --git a/src/server.ts b/src/server.ts
index 7b9387e64..4dd8c4e9f 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -2,6 +2,7 @@ import { Hono } from "hono"
 import { cors } from "hono/cors"
 import { logger } from "hono/logger"
 
+import { createAuthMiddleware } from "./lib/request-auth"
 import { completionRoutes } from "./routes/chat-completions/route"
 import { embeddingRoutes } from "./routes/embeddings/route"
 import { messageRoutes } from "./routes/messages/route"
@@ -14,6 +15,7 @@ export const server = new Hono()
 
 server.use(logger())
 server.use(cors())
+server.use("*", createAuthMiddleware())
 
 server.get("/", (c) => c.text("Server running"))
 

From 5c146acd217415d9d733311816a26f0970282d64 Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Thu, 12 Feb 2026 07:57:16 +0800
Subject: [PATCH 43/62] feat: enhance response translation with assistant phase
 handling

---
 src/lib/config.ts                            | 21 +++++++
 src/routes/messages/responses-translation.ts | 64 +++++++++++++++++---
 src/services/copilot/create-responses.ts     |  1 +
 3 files changed, 76 insertions(+), 10 deletions(-)

diff --git a/src/lib/config.ts b/src/lib/config.ts
index 69b733e18..9f71065c1 100644
--- a/src/lib/config.ts
+++ b/src/lib/config.ts
@@ -24,6 +24,26 @@ const gpt5ExplorationPrompt = `## Exploration and reading files
 - **Only make sequential calls if you truly cannot know the next file without seeing a result first.**
 - **Workflow:** (a) plan all needed reads → (b) issue one parallel batch → (c) analyze results → (d) repeat if new, unpredictable reads arise.`
 
+const gpt5CommentaryPrompt = `# Working with the user
+
+You interact with the user through a terminal. You have 2 ways of communicating with the users:  
+- Share intermediary updates in \`commentary\` channel.  
+- After you have completed all your work, send a message to the \`final\` channel.  
+
+## Intermediary updates
+
+- Intermediary updates go to the \`commentary\` channel.
+- User updates are short updates while you are working, they are NOT final answers.
+- You use 1-2 sentence user updates to communicate progress and new information to the user as you are doing work.
+- Do not begin responses with conversational interjections or meta commentary. Avoid openers such as acknowledgements (“Done —”, “Got it”, “Great question, ”) or framing phrases.
+- You provide user updates frequently, every 20s.
+- Before exploring or doing substantial work, you start with a user update acknowledging the request and explaining your first step. You should include your understanding of the user request and explain what you will do. Avoid commenting on the request or using starters such as "Got it -" or "Understood -" etc.
+- When exploring, e.g. searching, reading files, you provide user updates as you go, every 20s, explaining what context you are gathering and what you've learned. Vary your sentence structure when providing these updates to avoid sounding repetitive - in particular, don't start each sentence the same way.
+- After you have sufficient context, and the work is substantial, you provide a longer plan (this is the only user update that may be longer than 2 sentences and can contain formatting).
+- Before performing file edits of any kind, you provide updates explaining what edits you are making.
+- As you are thinking, you very frequently provide updates even if not taking any actions, informing the user of your progress. You interrupt your thinking and send multiple updates in a row if thinking for more than 100 words.
+- Tone of your updates MUST match your personality.`
+
 const defaultConfig: AppConfig = {
   auth: {
     apiKeys: [],
@@ -31,6 +51,7 @@ const defaultConfig: AppConfig = {
   extraPrompts: {
     "gpt-5-mini": gpt5ExplorationPrompt,
     "gpt-5.1-codex-max": gpt5ExplorationPrompt,
+    "gpt-5.3-codex": gpt5CommentaryPrompt,
   },
   smallModel: "gpt-5-mini",
   modelReasoningEfforts: {
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index 04a4f9270..d367bd87e 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -44,6 +44,7 @@ import {
 } from "./anthropic-types"
 
 const MESSAGE_TYPE = "message"
+const CODEX_PHASE_MODEL = "gpt-5.3-codex"
 
 export const THINKING_TEXT = "Thinking..."
 
@@ -53,7 +54,7 @@ export const translateAnthropicMessagesToResponsesPayload = (
   const input: Array<ResponseInputItem> = []
 
   for (const message of payload.messages) {
-    input.push(...translateMessage(message))
+    input.push(...translateMessage(message, payload.model))
   }
 
   const translatedTools = convertAnthropicTools(payload.tools)
@@ -90,12 +91,13 @@ export const translateAnthropicMessagesToResponsesPayload = (
 
 const translateMessage = (
   message: AnthropicMessage,
+  model: string,
 ): Array<ResponseInputItem> => {
   if (message.role === "user") {
     return translateUserMessage(message)
   }
 
-  return translateAssistantMessage(message)
+  return translateAssistantMessage(message, model)
 }
 
 const translateUserMessage = (
@@ -114,7 +116,7 @@ const translateUserMessage = (
 
   for (const block of message.content) {
     if (block.type === "tool_result") {
-      flushPendingContent("user", pendingContent, items)
+      flushPendingContent(pendingContent, items, { role: "user" })
       items.push(createFunctionCallOutput(block))
       continue
     }
@@ -125,16 +127,19 @@ const translateUserMessage = (
     }
   }
 
-  flushPendingContent("user", pendingContent, items)
+  flushPendingContent(pendingContent, items, { role: "user" })
 
   return items
 }
 
 const translateAssistantMessage = (
   message: AnthropicAssistantMessage,
+  model: string,
 ): Array<ResponseInputItem> => {
+  const assistantPhase = resolveAssistantPhase(model, message.content)
+
   if (typeof message.content === "string") {
-    return [createMessage("assistant", message.content)]
+    return [createMessage("assistant", message.content, assistantPhase)]
   }
 
   if (!Array.isArray(message.content)) {
@@ -146,7 +151,10 @@ const translateAssistantMessage = (
 
   for (const block of message.content) {
     if (block.type === "tool_use") {
-      flushPendingContent("assistant", pendingContent, items)
+      flushPendingContent(pendingContent, items, {
+        role: "assistant",
+        phase: assistantPhase,
+      })
       items.push(createFunctionToolCall(block))
       continue
     }
@@ -156,7 +164,10 @@ const translateAssistantMessage = (
       && block.signature
       && block.signature.includes("@")
     ) {
-      flushPendingContent("assistant", pendingContent, items)
+      flushPendingContent(pendingContent, items, {
+        role: "assistant",
+        phase: assistantPhase,
+      })
       items.push(createReasoningContent(block))
       continue
     }
@@ -167,7 +178,10 @@ const translateAssistantMessage = (
     }
   }
 
-  flushPendingContent("assistant", pendingContent, items)
+  flushPendingContent(pendingContent, items, {
+    role: "assistant",
+    phase: assistantPhase,
+  })
 
   return items
 }
@@ -202,9 +216,9 @@ const translateAssistantContentBlock = (
 }
 
 const flushPendingContent = (
-  role: ResponseInputMessage["role"],
   pendingContent: Array<ResponseInputContent>,
   target: Array<ResponseInputItem>,
+  message: Pick<ResponseInputMessage, "role" | "phase">,
 ) => {
   if (pendingContent.length === 0) {
     return
@@ -212,19 +226,49 @@ const flushPendingContent = (
 
   const messageContent = [...pendingContent]
 
-  target.push(createMessage(role, messageContent))
+  target.push(createMessage(message.role, messageContent, message.phase))
   pendingContent.length = 0
 }
 
 const createMessage = (
   role: ResponseInputMessage["role"],
   content: string | Array<ResponseInputContent>,
+  phase?: ResponseInputMessage["phase"],
 ): ResponseInputMessage => ({
   type: MESSAGE_TYPE,
   role,
   content,
+  ...(role === "assistant" && phase ? { phase } : {}),
 })
 
+const resolveAssistantPhase = (
+  model: string,
+  content: AnthropicAssistantMessage["content"],
+): ResponseInputMessage["phase"] | undefined => {
+  if (!shouldApplyCodexPhase(model)) {
+    return undefined
+  }
+
+  if (typeof content === "string") {
+    return "final_answer"
+  }
+
+  if (!Array.isArray(content)) {
+    return undefined
+  }
+
+  const hasText = content.some((block) => block.type === "text")
+  if (!hasText) {
+    return undefined
+  }
+
+  const hasToolUse = content.some((block) => block.type === "tool_use")
+  return hasToolUse ? "commentary" : "final_answer"
+}
+
+const shouldApplyCodexPhase = (model: string): boolean =>
+  model === CODEX_PHASE_MODEL
+
 const createTextContent = (text: string): ResponseInputText => ({
   type: "input_text",
   text,
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
index 9982a4d98..391ebf845 100644
--- a/src/services/copilot/create-responses.ts
+++ b/src/services/copilot/create-responses.ts
@@ -60,6 +60,7 @@ export interface ResponseInputMessage {
   role: "user" | "assistant" | "system" | "developer"
   content?: string | Array<ResponseInputContent>
   status?: string
+  phase?: "commentary" | "final_answer"
 }
 
 export interface ResponseFunctionToolCallItem {

From 0eb8e7f9efc0ad2f8c879d54b209eb84411b3e6d Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Sun, 15 Feb 2026 15:36:54 +0800
Subject: [PATCH 44/62] feat: update fallback version for VSCode and increment
 Copilot version

---
 src/lib/api-config.ts              | 2 +-
 src/services/get-vscode-version.ts | 5 +----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts
index 2f7ecbf5d..3b5b03995 100644
--- a/src/lib/api-config.ts
+++ b/src/lib/api-config.ts
@@ -7,7 +7,7 @@ export const standardHeaders = () => ({
   accept: "application/json",
 })
 
-const COPILOT_VERSION = "0.37.4"
+const COPILOT_VERSION = "0.37.6"
 const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}`
 const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}`
 
diff --git a/src/services/get-vscode-version.ts b/src/services/get-vscode-version.ts
index 7cda4f7a2..23b215fd2 100644
--- a/src/services/get-vscode-version.ts
+++ b/src/services/get-vscode-version.ts
@@ -1,4 +1,4 @@
-const FALLBACK = "1.109.2"
+const FALLBACK = "1.109.3"
 
 export async function getVSCodeVersion() {
   const controller = new AbortController()
@@ -19,9 +19,6 @@ export async function getVSCodeVersion() {
     const match = pkgbuild.match(pkgverRegex)
 
     if (match) {
-      if (match[1] === "1.109.0") {
-        return FALLBACK
-      }
       return match[1]
     }
 

From 5b488b20e5db50a0a8f7fb32e93f261001b2cf18 Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Mon, 16 Feb 2026 12:43:58 +0800
Subject: [PATCH 45/62] feat: implement subagent marker integration and update
 related handlers

---
 .claude/hooks/subagent-start-marker.js        | 33 ++++++++
 .claude/settings.json                         | 15 ++++
 .opencode/plugins/subagent-marker.js          | 65 ++++++++++++++++
 README.md                                     | 40 +++++++++-
 eslint.config.js                              |  1 +
 src/routes/messages/handler.ts                | 33 ++++++--
 src/routes/messages/subagent-marker.ts        | 77 +++++++++++++++++++
 .../copilot/create-chat-completions.ts        |  5 +-
 src/services/copilot/create-messages.ts       |  6 +-
 9 files changed, 265 insertions(+), 10 deletions(-)
 create mode 100644 .claude/hooks/subagent-start-marker.js
 create mode 100644 .claude/settings.json
 create mode 100644 .opencode/plugins/subagent-marker.js
 create mode 100644 src/routes/messages/subagent-marker.ts

diff --git a/.claude/hooks/subagent-start-marker.js b/.claude/hooks/subagent-start-marker.js
new file mode 100644
index 000000000..4a2a39681
--- /dev/null
+++ b/.claude/hooks/subagent-start-marker.js
@@ -0,0 +1,33 @@
+async function readStdin() {
+  let input = "";
+  for await (const chunk of process.stdin) {
+    input += chunk;
+  }
+  return input.trim();
+}
+
+const rawInput = await readStdin();
+let hookInput = {};
+
+if (rawInput) {
+  try {
+    hookInput = JSON.parse(rawInput);
+  } catch {
+    hookInput = {};
+  }
+}
+
+const marker = `__SUBAGENT_MARKER__${JSON.stringify({
+  session_id: hookInput.session_id ?? null,
+  agent_id: hookInput.agent_id ?? null,
+  agent_type: hookInput.agent_type ?? null,
+})}`;
+
+const payload = {
+  hookSpecificOutput: {
+    hookEventName: "SubagentStart",
+    additionalContext: marker,
+  },
+};
+
+process.stdout.write(`${JSON.stringify(payload)}\n`);
diff --git a/.claude/settings.json b/.claude/settings.json
new file mode 100644
index 000000000..6d1577cd4
--- /dev/null
+++ b/.claude/settings.json
@@ -0,0 +1,15 @@
+{
+  "hooks": {
+    "SubagentStart": [
+      {
+        "matcher": "*",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node --input-type=module -e \"import { homedir } from 'node:os'; import { join } from 'node:path'; import { readFile } from 'node:fs/promises'; const file = join(homedir(), '.claude', 'hooks', 'subagent-start-marker.js'); const source = await readFile(file, 'utf8'); const url = 'data:text/javascript;base64,' + Buffer.from(source).toString('base64'); await import(url);\""
+          }
+        ]
+      }
+    ]
+  }
+}
diff --git a/.opencode/plugins/subagent-marker.js b/.opencode/plugins/subagent-marker.js
new file mode 100644
index 000000000..df20bb8ff
--- /dev/null
+++ b/.opencode/plugins/subagent-marker.js
@@ -0,0 +1,65 @@
+const MARKER_PREFIX = "__SUBAGENT_MARKER__"
+
+const subagentSessions = new Set()
+const markedSessions = new Set()
+
+const getSessionInfo = (event) => {
+  if (!event || typeof event !== "object") return undefined
+  const properties = event.properties
+  if (!properties || typeof properties !== "object") return undefined
+  const info = properties.info
+  if (!info || typeof info !== "object") return undefined
+  return info
+}
+
+export const SubagentMarkerPlugin = async () => {
+  return {
+    event: async ({ event }) => {
+      if (event.type === "session.created") {
+        const info = getSessionInfo(event)
+        if (info?.id && info.parentID) {
+          subagentSessions.add(info.id)
+        }
+        return
+      }
+
+      if (event.type === "session.deleted") {
+        const info = getSessionInfo(event)
+        if (info?.id) {
+          subagentSessions.delete(info.id)
+          markedSessions.delete(info.id)
+        }
+      }
+    },
+    "chat.message": async (input, output) => {
+      const { sessionID } = input
+      if (!subagentSessions.has(sessionID) || markedSessions.has(sessionID)) {
+        return
+      }
+      if (!output.message?.id || !output.message?.sessionID) {
+        return
+      }
+
+      const marker = `${MARKER_PREFIX}${JSON.stringify({
+        session_id: sessionID,
+        agent_id: sessionID,
+        agent_type: input.agent ?? "opencode-subagent",
+      })}`
+
+      output.parts.unshift({
+        id: `${output.message.id}-subagent-marker`,
+        sessionID: output.message.sessionID,
+        messageID: output.message.id,
+        type: "text",
+        text: `<system-reminder>\nSubagentStart hook additional context: ${marker}\n</system-reminder>`,
+        synthetic: true,
+        time: {
+          start: Date.now(),
+          end: Date.now(),
+        },
+      })
+
+      markedSessions.add(sessionID)
+    },
+  }
+}
diff --git a/README.md b/README.md
index 1fec451a2..6342fda0a 100644
--- a/README.md
+++ b/README.md
@@ -357,7 +357,6 @@ Here is an example `.claude/settings.json` file:
     "ANTHROPIC_MODEL": "gpt-5.2",
     "ANTHROPIC_DEFAULT_SONNET_MODEL": "gpt-5.2",
     "ANTHROPIC_DEFAULT_HAIKU_MODEL": "gpt-5-mini",
-    "CLAUDE_CODE_SUBAGENT_MODEL": "gpt-5-mini",
     "DISABLE_NON_ESSENTIAL_MODEL_CALLS": "1",
     "CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1",
     "BASH_MAX_TIMEOUT_MS": "600000",
@@ -376,6 +375,45 @@ You can find more options here: [Claude Code settings](https://docs.anthropic.co
 
 You can also read more about IDE integration here: [Add Claude Code to your IDE](https://docs.anthropic.com/en/docs/claude-code/ide-integrations)
 
+### Subagent Marker Integration (Optional)
+
+This project supports `X-Initiator: agent` for subagent-originated requests
+
+#### Claude Code hook producer
+
+Use the included hook script to inject marker context on `SubagentStart`.
+If you place the script under your user Claude directory (`~/.claude/hooks`), use this cross-platform command in `.claude/settings.json`:
+
+- `.claude/hooks/subagent-start-marker.js`
+
+And enable it from `.claude/settings.json`:
+
+```json
+{
+  "hooks": {
+    "SubagentStart": [
+      {
+        "matcher": "*",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node --input-type=module -e \"import { homedir } from 'node:os'; import { join } from 'node:path'; import { readFile } from 'node:fs/promises'; const file = join(homedir(), '.claude', 'hooks', 'subagent-start-marker.js'); const source = await readFile(file, 'utf8'); const url = 'data:text/javascript;base64,' + Buffer.from(source).toString('base64'); await import(url);\""
+          }
+        ]
+      }
+    ]
+  }
+}
+```
+
+#### Opencode plugin producer
+
+For opencode, use the plugin implementation at:
+
+- `.opencode/plugins/subagent-marker.js`
+
+This plugin tracks sub-sessions and prepends a marker system reminder to subagent chat messages.
+
 ## Running from Source
 
 The project can be run from source in several ways:
diff --git a/eslint.config.js b/eslint.config.js
index c9f79bea5..d26148e73 100644
--- a/eslint.config.js
+++ b/eslint.config.js
@@ -1,6 +1,7 @@
 import config from "@echristian/eslint-config"
 
 export default config({
+  ignores: [".claude/**", ".opencode/**"],
   prettier: {
     plugins: ["prettier-plugin-packagejson"],
   },
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 1273c10cc..66d87f39c 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -46,6 +46,7 @@ import {
   translateToOpenAI,
 } from "./non-stream-translation"
 import { translateChunkToAnthropicEvents } from "./stream-translation"
+import { parseSubagentMarkerFromFirstUser } from "./subagent-marker"
 
 const logger = createHandlerLogger("messages-handler")
 
@@ -58,6 +59,12 @@ export async function handleCompletion(c: Context) {
   const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
   logger.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
 
+  const subagentMarker = parseSubagentMarkerFromFirstUser(anthropicPayload)
+  const initiatorOverride = subagentMarker ? "agent" : undefined
+  if (subagentMarker) {
+    logger.debug("Detected Subagent marker:", JSON.stringify(subagentMarker))
+  }
+
   // claude code and opencode compact request detection
   const isCompact = isCompactRequest(anthropicPayload)
 
@@ -95,15 +102,16 @@ export async function handleCompletion(c: Context) {
   if (shouldUseMessagesApi(selectedModel)) {
     return await handleWithMessagesApi(c, anthropicPayload, {
       anthropicBetaHeader: anthropicBeta,
+      initiatorOverride,
       selectedModel,
     })
   }
 
   if (shouldUseResponsesApi(selectedModel)) {
-    return await handleWithResponsesApi(c, anthropicPayload)
+    return await handleWithResponsesApi(c, anthropicPayload, initiatorOverride)
   }
 
-  return await handleWithChatCompletions(c, anthropicPayload)
+  return await handleWithChatCompletions(c, anthropicPayload, initiatorOverride)
 }
 
 const RESPONSES_ENDPOINT = "/responses"
@@ -112,6 +120,7 @@ const MESSAGES_ENDPOINT = "/v1/messages"
 const handleWithChatCompletions = async (
   c: Context,
   anthropicPayload: AnthropicMessagesPayload,
+  initiatorOverride?: "agent" | "user",
 ) => {
   const openAIPayload = translateToOpenAI(anthropicPayload)
   logger.debug(
@@ -119,7 +128,9 @@ const handleWithChatCompletions = async (
     JSON.stringify(openAIPayload),
   )
 
-  const response = await createChatCompletions(openAIPayload)
+  const response = await createChatCompletions(openAIPayload, {
+    initiator: initiatorOverride,
+  })
 
   if (isNonStreaming(response)) {
     logger.debug(
@@ -170,6 +181,7 @@ const handleWithChatCompletions = async (
 const handleWithResponsesApi = async (
   c: Context,
   anthropicPayload: AnthropicMessagesPayload,
+  initiatorOverride?: "agent" | "user",
 ) => {
   const responsesPayload =
     translateAnthropicMessagesToResponsesPayload(anthropicPayload)
@@ -181,7 +193,7 @@ const handleWithResponsesApi = async (
   const { vision, initiator } = getResponsesRequestOptions(responsesPayload)
   const response = await createResponses(responsesPayload, {
     vision,
-    initiator,
+    initiator: initiatorOverride ?? initiator,
   })
 
   if (responsesPayload.stream && isAsyncIterable(response)) {
@@ -254,9 +266,14 @@ const handleWithResponsesApi = async (
 const handleWithMessagesApi = async (
   c: Context,
   anthropicPayload: AnthropicMessagesPayload,
-  options?: { anthropicBetaHeader?: string; selectedModel?: Model },
+  options?: {
+    anthropicBetaHeader?: string
+    initiatorOverride?: "agent" | "user"
+    selectedModel?: Model
+  },
 ) => {
-  const { anthropicBetaHeader, selectedModel } = options ?? {}
+  const { anthropicBetaHeader, initiatorOverride, selectedModel } =
+    options ?? {}
   // Pre-request processing: filter thinking blocks for Claude models so only
   // valid thinking blocks are sent to the Copilot Messages API.
   for (const msg of anthropicPayload.messages) {
@@ -284,7 +301,9 @@ const handleWithMessagesApi = async (
 
   logger.debug("Translated Messages payload:", JSON.stringify(anthropicPayload))
 
-  const response = await createMessages(anthropicPayload, anthropicBetaHeader)
+  const response = await createMessages(anthropicPayload, anthropicBetaHeader, {
+    initiator: initiatorOverride,
+  })
 
   if (isAsyncIterable(response)) {
     logger.debug("Streaming response from Copilot (Messages API)")
diff --git a/src/routes/messages/subagent-marker.ts b/src/routes/messages/subagent-marker.ts
new file mode 100644
index 000000000..89c174141
--- /dev/null
+++ b/src/routes/messages/subagent-marker.ts
@@ -0,0 +1,77 @@
+import type { AnthropicMessagesPayload } from "./anthropic-types"
+
+const subagentMarkerPrefix = "__SUBAGENT_MARKER__"
+
+export interface SubagentMarker {
+  session_id: string
+  agent_id: string
+  agent_type: string
+}
+
+export const parseSubagentMarkerFromFirstUser = (
+  payload: AnthropicMessagesPayload,
+): SubagentMarker | null => {
+  const firstUserMessage = payload.messages.find((msg) => msg.role === "user")
+  if (!firstUserMessage || !Array.isArray(firstUserMessage.content)) {
+    return null
+  }
+
+  for (const block of firstUserMessage.content) {
+    if (block.type !== "text") {
+      continue
+    }
+
+    const marker = parseSubagentMarkerFromSystemReminder(block.text)
+    if (marker) {
+      return marker
+    }
+  }
+
+  return null
+}
+
+const parseSubagentMarkerFromSystemReminder = (
+  text: string,
+): SubagentMarker | null => {
+  const startTag = "<system-reminder>"
+  const endTag = "</system-reminder>"
+  let searchFrom = 0
+
+  while (true) {
+    const reminderStart = text.indexOf(startTag, searchFrom)
+    if (reminderStart === -1) {
+      break
+    }
+
+    const contentStart = reminderStart + startTag.length
+    const reminderEnd = text.indexOf(endTag, contentStart)
+    if (reminderEnd === -1) {
+      break
+    }
+
+    const reminderContent = text.slice(contentStart, reminderEnd)
+    const markerIndex = reminderContent.indexOf(subagentMarkerPrefix)
+    if (markerIndex === -1) {
+      searchFrom = reminderEnd + endTag.length
+      continue
+    }
+
+    const markerJson = reminderContent
+      .slice(markerIndex + subagentMarkerPrefix.length)
+      .trim()
+
+    try {
+      const parsed = JSON.parse(markerJson) as SubagentMarker
+      if (!parsed.session_id || !parsed.agent_id || !parsed.agent_type) {
+        continue
+      }
+
+      return parsed
+    } catch {
+      searchFrom = reminderEnd + endTag.length
+      continue
+    }
+  }
+
+  return null
+}
diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts
index 63d3e50a3..19b4512cd 100644
--- a/src/services/copilot/create-chat-completions.ts
+++ b/src/services/copilot/create-chat-completions.ts
@@ -7,6 +7,9 @@ import { state } from "~/lib/state"
 
 export const createChatCompletions = async (
   payload: ChatCompletionsPayload,
+  options?: {
+    initiator?: "agent" | "user"
+  },
 ) => {
   if (!state.copilotToken) throw new Error("Copilot token not found")
 
@@ -30,7 +33,7 @@ export const createChatCompletions = async (
   // Build headers and add X-Initiator
   const headers: Record<string, string> = {
     ...copilotHeaders(state, enableVision),
-    "X-Initiator": isAgentCall ? "agent" : "user",
+    "X-Initiator": options?.initiator ?? (isAgentCall ? "agent" : "user"),
   }
 
   const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
diff --git a/src/services/copilot/create-messages.ts b/src/services/copilot/create-messages.ts
index 115215a94..2b80b9d10 100644
--- a/src/services/copilot/create-messages.ts
+++ b/src/services/copilot/create-messages.ts
@@ -16,6 +16,9 @@ export type CreateMessagesReturn = AnthropicResponse | MessagesStream
 export const createMessages = async (
   payload: AnthropicMessagesPayload,
   anthropicBetaHeader?: string,
+  options?: {
+    initiator?: "agent" | "user"
+  },
 ): Promise<CreateMessagesReturn> => {
   if (!state.copilotToken) throw new Error("Copilot token not found")
 
@@ -33,10 +36,11 @@ export const createMessages = async (
         lastMessage.content.some((block) => block.type !== "tool_result")
       : true
   }
+  const initiator = options?.initiator ?? (isInitiateRequest ? "user" : "agent")
 
   const headers: Record<string, string> = {
     ...copilotHeaders(state, enableVision),
-    "X-Initiator": isInitiateRequest ? "user" : "agent",
+    "X-Initiator": initiator,
   }
 
   if (anthropicBetaHeader) {

From 88a2e42efbc6581cf16105d686af68a07401b044 Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Mon, 16 Feb 2026 22:21:43 +0800
Subject: [PATCH 46/62] fix: correct search continuation logic in
 parseSubagentMarkerFromSystemReminder

---
 src/routes/messages/subagent-marker.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/routes/messages/subagent-marker.ts b/src/routes/messages/subagent-marker.ts
index 89c174141..0d93ce507 100644
--- a/src/routes/messages/subagent-marker.ts
+++ b/src/routes/messages/subagent-marker.ts
@@ -63,6 +63,7 @@ const parseSubagentMarkerFromSystemReminder = (
     try {
       const parsed = JSON.parse(markerJson) as SubagentMarker
       if (!parsed.session_id || !parsed.agent_id || !parsed.agent_type) {
+        searchFrom = reminderEnd + endTag.length
         continue
       }
 

From 383ab1e6db90fd92365c7f7f0e75477d9405e59d Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Thu, 26 Feb 2026 13:09:53 +0800
Subject: [PATCH 47/62] feat: implement claude-plugin for SubagentStart marker
 injection and update documentation

---
 .claude-plugin/marketplace.json               | 13 +++++++
 .claude/settings.json                         | 15 --------
 README.md                                     | 38 ++++++++-----------
 claude-plugin/.claude-plugin/plugin.json      |  8 ++++
 claude-plugin/hooks/hooks.json                | 15 ++++++++
 .../scripts}/subagent-start-marker.js         |  0
 eslint.config.js                              |  2 +-
 7 files changed, 53 insertions(+), 38 deletions(-)
 create mode 100644 .claude-plugin/marketplace.json
 delete mode 100644 .claude/settings.json
 create mode 100644 claude-plugin/.claude-plugin/plugin.json
 create mode 100644 claude-plugin/hooks/hooks.json
 rename {.claude/hooks => claude-plugin/scripts}/subagent-start-marker.js (100%)

diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
new file mode 100644
index 000000000..7d6742af9
--- /dev/null
+++ b/.claude-plugin/marketplace.json
@@ -0,0 +1,13 @@
+{
+  "name": "copilot-api-marketplace",
+  "owner": {
+    "name": "copilot-api maintainers"
+  },
+  "plugins": [
+    {
+      "name": "claude-plugin",
+      "description": "Inject SubagentStart marker context for copilot-api initiator override",
+      "source": "./claude-plugin"
+    }
+  ]
+}
diff --git a/.claude/settings.json b/.claude/settings.json
deleted file mode 100644
index 6d1577cd4..000000000
--- a/.claude/settings.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-  "hooks": {
-    "SubagentStart": [
-      {
-        "matcher": "*",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "node --input-type=module -e \"import { homedir } from 'node:os'; import { join } from 'node:path'; import { readFile } from 'node:fs/promises'; const file = join(homedir(), '.claude', 'hooks', 'subagent-start-marker.js'); const source = await readFile(file, 'utf8'); const url = 'data:text/javascript;base64,' + Buffer.from(source).toString('base64'); await import(url);\""
-          }
-        ]
-      }
-    ]
-  }
-}
diff --git a/README.md b/README.md
index 6342fda0a..6ee8b0292 100644
--- a/README.md
+++ b/README.md
@@ -377,35 +377,29 @@ You can also read more about IDE integration here: [Add Claude Code to your IDE]
 
 ### Subagent Marker Integration (Optional)
 
-This project supports `X-Initiator: agent` for subagent-originated requests
+This project supports `X-Initiator: agent` for subagent-originated requests.
 
-#### Claude Code hook producer
+#### Claude Code plugin producer (marketplace-based)
 
-Use the included hook script to inject marker context on `SubagentStart`.
-If you place the script under your user Claude directory (`~/.claude/hooks`), use this cross-platform command in `.claude/settings.json`:
+The marker producer is packaged as a Claude Code plugin named `claude-plugin`.
 
-- `.claude/hooks/subagent-start-marker.js`
+- Marketplace catalog in this repository: `.claude-plugin/marketplace.json`
+- Plugin source in this repository: `claude-plugin`
 
-And enable it from `.claude/settings.json`:
+Add the marketplace remotely:
 
-```json
-{
-  "hooks": {
-    "SubagentStart": [
-      {
-        "matcher": "*",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "node --input-type=module -e \"import { homedir } from 'node:os'; import { join } from 'node:path'; import { readFile } from 'node:fs/promises'; const file = join(homedir(), '.claude', 'hooks', 'subagent-start-marker.js'); const source = await readFile(file, 'utf8'); const url = 'data:text/javascript;base64,' + Buffer.from(source).toString('base64'); await import(url);\""
-          }
-        ]
-      }
-    ]
-  }
-}
+```sh
+/plugin marketplace add https://github.com/ericc-ch/copilot-api.git
 ```
 
+Install the plugin from the marketplace:
+
+```sh
+/plugin install claude-plugin@copilot-api-marketplace
+```
+
+After installation, the plugin injects `__SUBAGENT_MARKER__...` on `SubagentStart`, and this proxy uses it to infer `X-Initiator: agent`.
+
 #### Opencode plugin producer
 
 For opencode, use the plugin implementation at:
diff --git a/claude-plugin/.claude-plugin/plugin.json b/claude-plugin/.claude-plugin/plugin.json
new file mode 100644
index 000000000..615fcf662
--- /dev/null
+++ b/claude-plugin/.claude-plugin/plugin.json
@@ -0,0 +1,8 @@
+{
+  "name": "claude-plugin",
+  "description": "Inject SubagentStart marker context for copilot-api initiator override",
+  "version": "1.0.0",
+  "author": {
+    "name": "copilot-api maintainers"
+  }
+}
diff --git a/claude-plugin/hooks/hooks.json b/claude-plugin/hooks/hooks.json
new file mode 100644
index 000000000..0dce9891f
--- /dev/null
+++ b/claude-plugin/hooks/hooks.json
@@ -0,0 +1,15 @@
+{
+  "hooks": {
+    "SubagentStart": [
+      {
+        "matcher": "*",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/subagent-start-marker.js\""
+          }
+        ]
+      }
+    ]
+  }
+}
diff --git a/.claude/hooks/subagent-start-marker.js b/claude-plugin/scripts/subagent-start-marker.js
similarity index 100%
rename from .claude/hooks/subagent-start-marker.js
rename to claude-plugin/scripts/subagent-start-marker.js
diff --git a/eslint.config.js b/eslint.config.js
index d26148e73..b7d56d5b7 100644
--- a/eslint.config.js
+++ b/eslint.config.js
@@ -1,7 +1,7 @@
 import config from "@echristian/eslint-config"
 
 export default config({
-  ignores: [".claude/**", ".opencode/**"],
+  ignores: ["claude-plugin/**", ".opencode/**"],
   prettier: {
     plugins: ["prettier-plugin-packagejson"],
   },

From b2dbf9d57612bdf75e87f71993567bd5315b22b5 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Thu, 5 Mar 2026 11:36:10 +0800
Subject: [PATCH 48/62] feat: enhance anthropic beta header handling in
 createMessages function

---
 src/services/copilot/create-messages.ts | 58 ++++++++++++++++++++-----
 1 file changed, 46 insertions(+), 12 deletions(-)

diff --git a/src/services/copilot/create-messages.ts b/src/services/copilot/create-messages.ts
index 2b80b9d10..adbcfe319 100644
--- a/src/services/copilot/create-messages.ts
+++ b/src/services/copilot/create-messages.ts
@@ -13,6 +13,45 @@ import { state } from "~/lib/state"
 export type MessagesStream = ReturnType<typeof events>
 export type CreateMessagesReturn = AnthropicResponse | MessagesStream
 
+const INTERLEAVED_THINKING_BETA = "interleaved-thinking-2025-05-14"
+const allowedAnthropicBetas = new Set([
+  INTERLEAVED_THINKING_BETA,
+  "context-management-2025-06-27",
+  "advanced-tool-use-2025-11-20",
+])
+
+const buildAnthropicBetaHeader = (
+  anthropicBetaHeader: string | undefined,
+  thinking: AnthropicMessagesPayload["thinking"],
+): string | undefined => {
+  const isAdaptiveThinking = thinking?.type === "adaptive"
+
+  if (anthropicBetaHeader) {
+    const filteredBeta = anthropicBetaHeader
+      .split(",")
+      .map((item) => item.trim())
+      .filter((item) => item.length > 0)
+      .filter((item) => allowedAnthropicBetas.has(item))
+    const uniqueFilteredBetas = [...new Set(filteredBeta)]
+    const finalFilteredBetas =
+      isAdaptiveThinking ?
+        uniqueFilteredBetas.filter((item) => item !== INTERLEAVED_THINKING_BETA)
+      : uniqueFilteredBetas
+
+    if (finalFilteredBetas.length > 0) {
+      return finalFilteredBetas.join(",")
+    }
+
+    return undefined
+  }
+
+  if (thinking?.budget_tokens && !isAdaptiveThinking) {
+    return INTERLEAVED_THINKING_BETA
+  }
+
+  return undefined
+}
+
 export const createMessages = async (
   payload: AnthropicMessagesPayload,
   anthropicBetaHeader?: string,
@@ -43,18 +82,13 @@ export const createMessages = async (
     "X-Initiator": initiator,
   }
 
-  if (anthropicBetaHeader) {
-    // align with vscode copilot extension anthropic-beta
-    const filteredBeta = anthropicBetaHeader
-      .split(",")
-      .map((item) => item.trim())
-      .filter((item) => item !== "claude-code-20250219")
-      .join(",")
-    if (filteredBeta) {
-      headers["anthropic-beta"] = filteredBeta
-    }
-  } else if (payload.thinking?.budget_tokens) {
-    headers["anthropic-beta"] = "interleaved-thinking-2025-05-14"
+  // align with vscode copilot extension anthropic-beta
+  const anthropicBeta = buildAnthropicBetaHeader(
+    anthropicBetaHeader,
+    payload.thinking,
+  )
+  if (anthropicBeta) {
+    headers["anthropic-beta"] = anthropicBeta
   }
 
   const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {

From 47764aadb45f6f526a655f29610ef9f3956ff7a7 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Fri, 6 Mar 2026 06:02:43 +0800
Subject: [PATCH 49/62] feat: enhance phase handling in responses translation

---
 src/lib/config.ts                            | 22 ++++++++++++---
 src/routes/messages/responses-translation.ts | 25 +++++++++++------
 src/services/get-vscode-version.ts           | 29 ++------------------
 3 files changed, 37 insertions(+), 39 deletions(-)

diff --git a/src/lib/config.ts b/src/lib/config.ts
index 9f71065c1..23430c93f 100644
--- a/src/lib/config.ts
+++ b/src/lib/config.ts
@@ -50,12 +50,13 @@ const defaultConfig: AppConfig = {
   },
   extraPrompts: {
     "gpt-5-mini": gpt5ExplorationPrompt,
-    "gpt-5.1-codex-max": gpt5ExplorationPrompt,
     "gpt-5.3-codex": gpt5CommentaryPrompt,
+    "gpt-5.4": gpt5CommentaryPrompt,
   },
   smallModel: "gpt-5-mini",
   modelReasoningEfforts: {
     "gpt-5-mini": "low",
+    "gpt-5.3-codex": "xhigh",
   },
   useFunctionApplyPatch: true,
   compactUseSmallModel: true,
@@ -100,18 +101,27 @@ function readConfigFromDisk(): AppConfig {
   }
 }
 
-function mergeDefaultExtraPrompts(config: AppConfig): {
+function mergeDefaultConfig(config: AppConfig): {
   mergedConfig: AppConfig
   changed: boolean
 } {
   const extraPrompts = config.extraPrompts ?? {}
   const defaultExtraPrompts = defaultConfig.extraPrompts ?? {}
+  const modelReasoningEfforts = config.modelReasoningEfforts ?? {}
+  const defaultModelReasoningEfforts = defaultConfig.modelReasoningEfforts ?? {}
 
   const missingExtraPromptModels = Object.keys(defaultExtraPrompts).filter(
     (model) => !Object.hasOwn(extraPrompts, model),
   )
 
-  if (missingExtraPromptModels.length === 0) {
+  const missingReasoningEffortModels = Object.keys(
+    defaultModelReasoningEfforts,
+  ).filter((model) => !Object.hasOwn(modelReasoningEfforts, model))
+
+  const hasExtraPromptChanges = missingExtraPromptModels.length > 0
+  const hasReasoningEffortChanges = missingReasoningEffortModels.length > 0
+
+  if (!hasExtraPromptChanges && !hasReasoningEffortChanges) {
     return { mergedConfig: config, changed: false }
   }
 
@@ -122,6 +132,10 @@ function mergeDefaultExtraPrompts(config: AppConfig): {
         ...defaultExtraPrompts,
         ...extraPrompts,
       },
+      modelReasoningEfforts: {
+        ...defaultModelReasoningEfforts,
+        ...modelReasoningEfforts,
+      },
     },
     changed: true,
   }
@@ -129,7 +143,7 @@ function mergeDefaultExtraPrompts(config: AppConfig): {
 
 export function mergeConfigWithDefaults(): AppConfig {
   const config = readConfigFromDisk()
-  const { mergedConfig, changed } = mergeDefaultExtraPrompts(config)
+  const { mergedConfig, changed } = mergeDefaultConfig(config)
 
   if (changed) {
     try {
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index d367bd87e..654d4835f 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -44,7 +44,6 @@ import {
 } from "./anthropic-types"
 
 const MESSAGE_TYPE = "message"
-const CODEX_PHASE_MODEL = "gpt-5.3-codex"
 
 export const THINKING_TEXT = "Thinking..."
 
@@ -52,9 +51,10 @@ export const translateAnthropicMessagesToResponsesPayload = (
   payload: AnthropicMessagesPayload,
 ): ResponsesPayload => {
   const input: Array<ResponseInputItem> = []
+  const applyPhase = shouldApplyPhase(payload.model)
 
   for (const message of payload.messages) {
-    input.push(...translateMessage(message, payload.model))
+    input.push(...translateMessage(message, payload.model, applyPhase))
   }
 
   const translatedTools = convertAnthropicTools(payload.tools)
@@ -92,12 +92,13 @@ export const translateAnthropicMessagesToResponsesPayload = (
 const translateMessage = (
   message: AnthropicMessage,
   model: string,
+  applyPhase: boolean,
 ): Array<ResponseInputItem> => {
   if (message.role === "user") {
     return translateUserMessage(message)
   }
 
-  return translateAssistantMessage(message, model)
+  return translateAssistantMessage(message, model, applyPhase)
 }
 
 const translateUserMessage = (
@@ -135,8 +136,13 @@ const translateUserMessage = (
 const translateAssistantMessage = (
   message: AnthropicAssistantMessage,
   model: string,
+  applyPhase: boolean,
 ): Array<ResponseInputItem> => {
-  const assistantPhase = resolveAssistantPhase(model, message.content)
+  const assistantPhase = resolveAssistantPhase(
+    model,
+    message.content,
+    applyPhase,
+  )
 
   if (typeof message.content === "string") {
     return [createMessage("assistant", message.content, assistantPhase)]
@@ -242,10 +248,11 @@ const createMessage = (
 })
 
 const resolveAssistantPhase = (
-  model: string,
+  _model: string,
   content: AnthropicAssistantMessage["content"],
+  applyPhase: boolean,
 ): ResponseInputMessage["phase"] | undefined => {
-  if (!shouldApplyCodexPhase(model)) {
+  if (!applyPhase) {
     return undefined
   }
 
@@ -266,8 +273,10 @@ const resolveAssistantPhase = (
   return hasToolUse ? "commentary" : "final_answer"
 }
 
-const shouldApplyCodexPhase = (model: string): boolean =>
-  model === CODEX_PHASE_MODEL
+const shouldApplyPhase = (model: string): boolean => {
+  const extraPrompt = getExtraPromptForModel(model)
+  return extraPrompt.includes("## Intermediary updates")
+}
 
 const createTextContent = (text: string): ResponseInputText => ({
   type: "input_text",
diff --git a/src/services/get-vscode-version.ts b/src/services/get-vscode-version.ts
index 23b215fd2..5f8f8a598 100644
--- a/src/services/get-vscode-version.ts
+++ b/src/services/get-vscode-version.ts
@@ -1,33 +1,8 @@
 const FALLBACK = "1.109.3"
 
 export async function getVSCodeVersion() {
-  const controller = new AbortController()
-  const timeout = setTimeout(() => {
-    controller.abort()
-  }, 5000)
-
-  try {
-    const response = await fetch(
-      "https://aur.archlinux.org/cgit/aur.git/plain/PKGBUILD?h=visual-studio-code-bin",
-      {
-        signal: controller.signal,
-      },
-    )
-
-    const pkgbuild = await response.text()
-    const pkgverRegex = /pkgver=([0-9.]+)/
-    const match = pkgbuild.match(pkgverRegex)
-
-    if (match) {
-      return match[1]
-    }
-
-    return FALLBACK
-  } catch {
-    return FALLBACK
-  } finally {
-    clearTimeout(timeout)
-  }
+  await Promise.resolve()
+  return FALLBACK
 }
 
 await getVSCodeVersion()

From c9686a2a793ad0d38f5cbc7bd612e1ddec11a353 Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Fri, 6 Mar 2026 20:17:34 +0800
Subject: [PATCH 50/62] feat: add context management and compaction features to
 responses API

---
 src/lib/config.ts                             |  15 ++
 src/routes/messages/handler.ts                |  26 +++-
 .../messages/responses-stream-translation.ts  |  37 ++++-
 src/routes/messages/responses-translation.ts  | 132 ++++++++++++++++--
 src/routes/responses/handler.ts               |  15 +-
 src/routes/responses/utils.ts                 |  77 ++++++++++
 src/services/copilot/create-responses.ts      |  23 +++
 7 files changed, 305 insertions(+), 20 deletions(-)

diff --git a/src/lib/config.ts b/src/lib/config.ts
index 23430c93f..380a270f6 100644
--- a/src/lib/config.ts
+++ b/src/lib/config.ts
@@ -9,6 +9,7 @@ export interface AppConfig {
   }
   extraPrompts?: Record<string, string>
   smallModel?: string
+  responsesApiContextManagementModels?: Array<string>
   modelReasoningEfforts?: Record<
     string,
     "none" | "minimal" | "low" | "medium" | "high" | "xhigh"
@@ -54,6 +55,7 @@ const defaultConfig: AppConfig = {
     "gpt-5.4": gpt5CommentaryPrompt,
   },
   smallModel: "gpt-5-mini",
+  responsesApiContextManagementModels: ["gpt-5.4", "gpt-5.3-codex"],
   modelReasoningEfforts: {
     "gpt-5-mini": "low",
     "gpt-5.3-codex": "xhigh",
@@ -179,6 +181,19 @@ export function getSmallModel(): string {
   return config.smallModel ?? "gpt-5-mini"
 }
 
+export function getResponsesApiContextManagementModels(): Array<string> {
+  const config = getConfig()
+  return (
+    config.responsesApiContextManagementModels
+    ?? defaultConfig.responsesApiContextManagementModels
+    ?? []
+  )
+}
+
+export function isResponsesApiContextManagementModel(model: string): boolean {
+  return getResponsesApiContextManagementModels().includes(model)
+}
+
 export function getReasoningEffortForModel(
   model: string,
 ): "none" | "minimal" | "low" | "medium" | "high" | "xhigh" {
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 66d87f39c..469d494c9 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -22,7 +22,11 @@ import {
   translateAnthropicMessagesToResponsesPayload,
   translateResponsesResultToAnthropic,
 } from "~/routes/messages/responses-translation"
-import { getResponsesRequestOptions } from "~/routes/responses/utils"
+import {
+  applyResponsesApiContextManagement,
+  compactInputByLatestCompaction,
+  getResponsesRequestOptions,
+} from "~/routes/responses/utils"
 import {
   createChatCompletions,
   type ChatCompletionChunk,
@@ -108,7 +112,10 @@ export async function handleCompletion(c: Context) {
   }
 
   if (shouldUseResponsesApi(selectedModel)) {
-    return await handleWithResponsesApi(c, anthropicPayload, initiatorOverride)
+    return await handleWithResponsesApi(c, anthropicPayload, {
+      initiatorOverride,
+      selectedModel,
+    })
   }
 
   return await handleWithChatCompletions(c, anthropicPayload, initiatorOverride)
@@ -181,10 +188,23 @@ const handleWithChatCompletions = async (
 const handleWithResponsesApi = async (
   c: Context,
   anthropicPayload: AnthropicMessagesPayload,
-  initiatorOverride?: "agent" | "user",
+  options?: {
+    initiatorOverride?: "agent" | "user"
+    selectedModel?: Model
+  },
 ) => {
+  const { initiatorOverride, selectedModel } = options ?? {}
+
   const responsesPayload =
     translateAnthropicMessagesToResponsesPayload(anthropicPayload)
+
+  applyResponsesApiContextManagement(
+    responsesPayload,
+    selectedModel?.capabilities.limits.max_prompt_tokens,
+  )
+
+  compactInputByLatestCompaction(responsesPayload)
+
   logger.debug(
     "Translated Responses payload:",
     JSON.stringify(responsesPayload),
diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
index ab5ae7ef1..6f4f7f21f 100644
--- a/src/routes/messages/responses-stream-translation.ts
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -19,6 +19,7 @@ import {
 import { type AnthropicStreamEventData } from "./anthropic-types"
 import {
   THINKING_TEXT,
+  encodeCompactionCarrierSignature,
   translateResponsesResultToAnthropic,
 } from "./responses-translation"
 
@@ -193,11 +194,45 @@ const handleOutputItemDone = (
   const events = new Array<AnthropicStreamEventData>()
   const item = rawEvent.item
   const itemType = item.type
+  const outputIndex = rawEvent.output_index
+
+  if (itemType === "compaction") {
+    if (!item.id || !item.encrypted_content) {
+      return events
+    }
+
+    const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)
+
+    if (!state.blockHasDelta.has(blockIndex)) {
+      events.push({
+        type: "content_block_delta",
+        index: blockIndex,
+        delta: {
+          type: "thinking_delta",
+          thinking: THINKING_TEXT,
+        },
+      })
+    }
+
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "signature_delta",
+        signature: encodeCompactionCarrierSignature({
+          id: item.id,
+          encrypted_content: item.encrypted_content,
+        }),
+      },
+    })
+    state.blockHasDelta.add(blockIndex)
+    return events
+  }
+
   if (itemType !== "reasoning") {
     return events
   }
 
-  const outputIndex = rawEvent.output_index
   const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)
   const signature = (item.encrypted_content ?? "") + "@" + item.id
   if (signature) {
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index 654d4835f..7107d73d4 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -6,6 +6,7 @@ import {
 } from "~/lib/config"
 import {
   type ResponsesPayload,
+  type ResponseInputCompaction,
   type ResponseInputContent,
   type ResponseInputImage,
   type ResponseInputItem,
@@ -14,6 +15,7 @@ import {
   type ResponseInputText,
   type ResponsesResult,
   type ResponseOutputContentBlock,
+  type ResponseOutputCompaction,
   type ResponseOutputFunctionCall,
   type ResponseOutputItem,
   type ResponseOutputReasoning,
@@ -44,6 +46,8 @@ import {
 } from "./anthropic-types"
 
 const MESSAGE_TYPE = "message"
+const COMPACTION_SIGNATURE_PREFIX = "cm1#"
+const COMPACTION_SIGNATURE_SEPARATOR = "@"
 
 export const THINKING_TEXT = "Thinking..."
 
@@ -89,6 +93,44 @@ export const translateAnthropicMessagesToResponsesPayload = (
   return responsesPayload
 }
 
+type CompactionCarrier = {
+  id: string
+  encrypted_content: string
+}
+
+export const encodeCompactionCarrierSignature = (
+  compaction: CompactionCarrier,
+): string => {
+  return `${COMPACTION_SIGNATURE_PREFIX}${compaction.encrypted_content}${COMPACTION_SIGNATURE_SEPARATOR}${compaction.id}`
+}
+
+export const decodeCompactionCarrierSignature = (
+  signature: string,
+): CompactionCarrier | undefined => {
+  if (signature.startsWith(COMPACTION_SIGNATURE_PREFIX)) {
+    const raw = signature.slice(COMPACTION_SIGNATURE_PREFIX.length)
+    const separatorIndex = raw.indexOf(COMPACTION_SIGNATURE_SEPARATOR)
+
+    if (separatorIndex <= 0 || separatorIndex === raw.length - 1) {
+      return undefined
+    }
+
+    const encrypted_content = raw.slice(0, separatorIndex)
+    const id = raw.slice(separatorIndex + 1)
+
+    if (!encrypted_content) {
+      return undefined
+    }
+
+    return {
+      id,
+      encrypted_content,
+    }
+  }
+
+  return undefined
+}
+
 const translateMessage = (
   message: AnthropicMessage,
   model: string,
@@ -165,17 +207,25 @@ const translateAssistantMessage = (
       continue
     }
 
-    if (
-      block.type === "thinking"
-      && block.signature
-      && block.signature.includes("@")
-    ) {
-      flushPendingContent(pendingContent, items, {
-        role: "assistant",
-        phase: assistantPhase,
-      })
-      items.push(createReasoningContent(block))
-      continue
+    if (block.type === "thinking" && block.signature) {
+      const compactionContent = createCompactionContent(block)
+      if (compactionContent) {
+        flushPendingContent(pendingContent, items, {
+          role: "assistant",
+          phase: assistantPhase,
+        })
+        items.push(compactionContent)
+        continue
+      }
+
+      if (block.signature.includes("@")) {
+        flushPendingContent(pendingContent, items, {
+          role: "assistant",
+          phase: assistantPhase,
+        })
+        items.push(createReasoningContent(block))
+        continue
+      }
     }
 
     const converted = translateAssistantContentBlock(block)
@@ -302,15 +352,43 @@ const createReasoningContent = (
   // align with vscode-copilot-chat extractThinkingData, should add id, otherwise it will cause miss cache occasionally —— the usage input cached tokens to be 0
   // https://github.com/microsoft/vscode-copilot-chat/blob/main/src/platform/endpoint/node/responsesApi.ts#L162
   // when use in codex cli, reasoning id is empty, so it will cause miss cache occasionally
-  const array = block.signature.split("@")
-  const signature = array[0]
-  const id = array[1]
+  const { encryptedContent, id } = parseReasoningSignature(block.signature)
   const thinking = block.thinking === THINKING_TEXT ? "" : block.thinking
   return {
     id,
     type: "reasoning",
     summary: thinking ? [{ type: "summary_text", text: thinking }] : [],
-    encrypted_content: signature,
+    encrypted_content: encryptedContent,
+  }
+}
+
+const createCompactionContent = (
+  block: AnthropicThinkingBlock,
+): ResponseInputCompaction | undefined => {
+  const compaction = decodeCompactionCarrierSignature(block.signature)
+  if (!compaction) {
+    return undefined
+  }
+
+  return {
+    id: compaction.id,
+    type: "compaction",
+    encrypted_content: compaction.encrypted_content,
+  }
+}
+
+const parseReasoningSignature = (
+  signature: string,
+): { encryptedContent: string; id: string } => {
+  const splitIndex = signature.lastIndexOf("@")
+
+  if (splitIndex <= 0 || splitIndex === signature.length - 1) {
+    return { encryptedContent: signature, id: "" }
+  }
+
+  return {
+    encryptedContent: signature.slice(0, splitIndex),
+    id: signature.slice(splitIndex + 1),
   }
 }
 
@@ -456,6 +534,13 @@ const mapOutputToAnthropicContent = (
         }
         break
       }
+      case "compaction": {
+        const compactionBlock = createCompactionThinkingBlock(item)
+        if (compactionBlock) {
+          contentBlocks.push(compactionBlock)
+        }
+        break
+      }
       default: {
         // Future compatibility for unrecognized output item types.
         const combinedText = combineMessageTextContent(
@@ -549,6 +634,23 @@ const createToolUseContentBlock = (
   }
 }
 
+const createCompactionThinkingBlock = (
+  item: ResponseOutputCompaction,
+): AnthropicAssistantContentBlock | null => {
+  if (!item.id || !item.encrypted_content) {
+    return null
+  }
+
+  return {
+    type: "thinking",
+    thinking: THINKING_TEXT,
+    signature: encodeCompactionCarrierSignature({
+      id: item.id,
+      encrypted_content: item.encrypted_content,
+    }),
+  }
+}
+
 const parseFunctionCallArguments = (
   rawArguments: string,
 ): Record<string, unknown> => {
diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
index 24659d912..b7e356bf2 100644
--- a/src/routes/responses/handler.ts
+++ b/src/routes/responses/handler.ts
@@ -14,7 +14,11 @@ import {
 } from "~/services/copilot/create-responses"
 
 import { createStreamIdTracker, fixStreamIds } from "./stream-id-sync"
-import { getResponsesRequestOptions } from "./utils"
+import {
+  applyResponsesApiContextManagement,
+  compactInputByLatestCompaction,
+  getResponsesRequestOptions,
+} from "./utils"
 
 const logger = createHandlerLogger("responses-handler")
 
@@ -31,6 +35,8 @@ export const handleResponses = async (c: Context) => {
   // Remove web_search tool as it's not supported by GitHub Copilot
   removeWebSearchTool(payload)
 
+  compactInputByLatestCompaction(payload)
+
   const selectedModel = state.models?.data.find(
     (model) => model.id === payload.model,
   )
@@ -50,6 +56,13 @@ export const handleResponses = async (c: Context) => {
     )
   }
 
+  applyResponsesApiContextManagement(
+    payload,
+    selectedModel?.capabilities.limits.max_prompt_tokens,
+  )
+
+  logger.debug("Translated Responses payload:", JSON.stringify(payload))
+
   const { vision, initiator } = getResponsesRequestOptions(payload)
 
   if (state.manualApprove) {
diff --git a/src/routes/responses/utils.ts b/src/routes/responses/utils.ts
index 329de5190..b5a7fbc0e 100644
--- a/src/routes/responses/utils.ts
+++ b/src/routes/responses/utils.ts
@@ -1,8 +1,11 @@
 import type {
+  ResponseContextManagementCompactionItem,
   ResponseInputItem,
   ResponsesPayload,
 } from "~/services/copilot/create-responses"
 
+import { isResponsesApiContextManagementModel } from "~/lib/config"
+
 export const getResponsesRequestOptions = (
   payload: ResponsesPayload,
 ): { vision: boolean; initiator: "agent" | "user" } => {
@@ -31,6 +34,80 @@ export const hasVisionInput = (payload: ResponsesPayload): boolean => {
   return values.some((item) => containsVisionContent(item))
 }
 
+export const resolveResponsesCompactThreshold = (
+  maxPromptTokens?: number,
+): number => {
+  if (typeof maxPromptTokens === "number" && maxPromptTokens > 0) {
+    return Math.floor(maxPromptTokens * 0.75)
+  }
+
+  return 50000
+}
+
+const createCompactionContextManagement = (
+  compactThreshold: number,
+): Array<ResponseContextManagementCompactionItem> => [
+  {
+    type: "compaction",
+    compact_threshold: compactThreshold,
+  },
+]
+
+export const applyResponsesApiContextManagement = (
+  payload: ResponsesPayload,
+  maxPromptTokens?: number,
+): void => {
+  if (payload.context_management !== undefined) {
+    return
+  }
+
+  if (!isResponsesApiContextManagementModel(payload.model)) {
+    return
+  }
+
+  payload.context_management = createCompactionContextManagement(
+    resolveResponsesCompactThreshold(maxPromptTokens),
+  )
+}
+
+export const compactInputByLatestCompaction = (
+  payload: ResponsesPayload,
+): void => {
+  if (!Array.isArray(payload.input) || payload.input.length === 0) {
+    return
+  }
+
+  const latestCompactionMessageIndex = getLatestCompactionMessageIndex(
+    payload.input,
+  )
+
+  if (latestCompactionMessageIndex === undefined) {
+    return
+  }
+
+  payload.input = payload.input.slice(latestCompactionMessageIndex)
+}
+
+const getLatestCompactionMessageIndex = (
+  input: Array<ResponseInputItem>,
+): number | undefined => {
+  for (let index = input.length - 1; index >= 0; index -= 1) {
+    if (isCompactionInputItem(input[index])) {
+      return index
+    }
+  }
+
+  return undefined
+}
+
+const isCompactionInputItem = (value: ResponseInputItem): boolean => {
+  return (
+    "type" in value
+    && typeof value.type === "string"
+    && value.type === "compaction"
+  )
+}
+
 const getPayloadItems = (
   payload: ResponsesPayload,
 ): Array<ResponseInputItem> => {
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
index 391ebf845..baa0ee1a8 100644
--- a/src/services/copilot/create-responses.ts
+++ b/src/services/copilot/create-responses.ts
@@ -21,6 +21,7 @@ export interface ResponsesPayload {
   parallel_tool_calls?: boolean | null
   store?: boolean | null
   reasoning?: Reasoning | null
+  context_management?: Array<ResponseContextManagementItem> | null
   include?: Array<ResponseIncludable>
   service_tier?: string | null // NOTE: Unsupported by GitHub Copilot
   [key: string]: unknown
@@ -55,6 +56,14 @@ export interface Reasoning {
   summary?: "auto" | "concise" | "detailed" | null
 }
 
+export interface ResponseContextManagementCompactionItem {
+  type: "compaction"
+  compact_threshold: number
+}
+
+export type ResponseContextManagementItem =
+  ResponseContextManagementCompactionItem
+
 export interface ResponseInputMessage {
   type?: "message"
   role: "user" | "assistant" | "system" | "developer"
@@ -88,11 +97,18 @@ export interface ResponseInputReasoning {
   encrypted_content: string
 }
 
+export interface ResponseInputCompaction {
+  id: string
+  type: "compaction"
+  encrypted_content: string
+}
+
 export type ResponseInputItem =
   | ResponseInputMessage
   | ResponseFunctionToolCallItem
   | ResponseFunctionCallOutputItem
   | ResponseInputReasoning
+  | ResponseInputCompaction
   | Record<string, unknown>
 
 export type ResponseInputContent =
@@ -146,6 +162,7 @@ export type ResponseOutputItem =
   | ResponseOutputMessage
   | ResponseOutputReasoning
   | ResponseOutputFunctionCall
+  | ResponseOutputCompaction
 
 export interface ResponseOutputMessage {
   id: string
@@ -177,6 +194,12 @@ export interface ResponseOutputFunctionCall {
   status?: "in_progress" | "completed" | "incomplete"
 }
 
+export interface ResponseOutputCompaction {
+  id: string
+  type: "compaction"
+  encrypted_content: string
+}
+
 export type ResponseOutputContentBlock =
   | ResponseOutputText
   | ResponseOutputRefusal

From e69e6a8d32e8cb85c599e926dd3f228686b0d299 Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Sun, 8 Mar 2026 20:28:31 +0800
Subject: [PATCH 51/62] feat: align with the request headers in the copilot
 extension version 0.38.2 and update opencode plugin to set header
 x-session-id

---
 .opencode/plugins/subagent-marker.js          |  19 ++-
 README.md                                     |  26 ++-
 src/lib/api-config.ts                         |  21 ++-
 src/lib/config.ts                             |   2 +-
 src/lib/state.ts                              |   3 +
 src/lib/utils.ts                              | 157 ++++++++++++++++++
 src/routes/chat-completions/handler.ts        |  14 +-
 src/routes/messages/handler.ts                |  67 ++++++--
 src/routes/responses/handler.ts               |  15 +-
 src/routes/responses/utils.ts                 |   2 +-
 .../copilot/create-chat-completions.ts        |  25 ++-
 src/services/copilot/create-messages.ts       |  23 ++-
 src/services/copilot/create-responses.ts      |  26 ++-
 src/services/get-vscode-version.ts            |   2 +-
 src/start.ts                                  |   9 +-
 tests/create-chat-completions.test.ts         |  12 +-
 16 files changed, 369 insertions(+), 54 deletions(-)

diff --git a/.opencode/plugins/subagent-marker.js b/.opencode/plugins/subagent-marker.js
index df20bb8ff..ec3f4b381 100644
--- a/.opencode/plugins/subagent-marker.js
+++ b/.opencode/plugins/subagent-marker.js
@@ -2,6 +2,7 @@ const MARKER_PREFIX = "__SUBAGENT_MARKER__"
 
 const subagentSessions = new Set()
 const markedSessions = new Set()
+const sessionParentMap = new Map()
 
 const getSessionInfo = (event) => {
   if (!event || typeof event !== "object") return undefined
@@ -17,8 +18,13 @@ export const SubagentMarkerPlugin = async () => {
     event: async ({ event }) => {
       if (event.type === "session.created") {
         const info = getSessionInfo(event)
-        if (info?.id && info.parentID) {
-          subagentSessions.add(info.id)
+        if (info?.id) {
+          if (info.parentID) {
+            subagentSessions.add(info.id)
+            sessionParentMap.set(info.id, info.parentID)
+          } else {
+            sessionParentMap.set(info.id, info.id)
+          }
         }
         return
       }
@@ -28,6 +34,7 @@ export const SubagentMarkerPlugin = async () => {
         if (info?.id) {
           subagentSessions.delete(info.id)
           markedSessions.delete(info.id)
+          sessionParentMap.delete(info.id)
         }
       }
     },
@@ -58,8 +65,14 @@ export const SubagentMarkerPlugin = async () => {
           end: Date.now(),
         },
       })
-
       markedSessions.add(sessionID)
     },
+    "chat.headers": async (input, output) => {
+      const { sessionID } = input
+      const sessionIdValue = sessionParentMap.get(sessionID)
+      if (sessionIdValue) {
+        output.headers["x-session-id"] = sessionIdValue
+      }
+    },
   }
 }
diff --git a/README.md b/README.md
index 6ee8b0292..456b3b8e1 100644
--- a/README.md
+++ b/README.md
@@ -377,7 +377,7 @@ You can also read more about IDE integration here: [Add Claude Code to your IDE]
 
 ### Subagent Marker Integration (Optional)
 
-This project supports `X-Initiator: agent` for subagent-originated requests.
+This project supports `x-initiator: agent` for subagent-originated requests.
 
 #### Claude Code plugin producer (marketplace-based)
 
@@ -398,15 +398,31 @@ Install the plugin from the marketplace:
 /plugin install claude-plugin@copilot-api-marketplace
 ```
 
-After installation, the plugin injects `__SUBAGENT_MARKER__...` on `SubagentStart`, and this proxy uses it to infer `X-Initiator: agent`.
+After installation, the plugin injects `__SUBAGENT_MARKER__...` on `SubagentStart`, and this proxy uses it to infer `x-initiator: agent`.
 
 #### Opencode plugin producer
 
-For opencode, use the plugin implementation at:
+The marker producer is packaged as an opencode plugin located at `.opencode/plugins/subagent-marker.js`.
 
-- `.opencode/plugins/subagent-marker.js`
+**Installation:**
 
-This plugin tracks sub-sessions and prepends a marker system reminder to subagent chat messages.
+Copy the plugin file to your opencode plugins directory:
+
+```sh
+# Clone or download this repository, then copy the plugin
+cp .opencode/plugins/subagent-marker.js ~/.config/opencode/plugins/
+```
+
+Or manually create the file at `~/.config/opencode/plugins/subagent-marker.js` with the plugin content.
+
+**Features:**
+
+- Tracks sub-sessions created by subagents
+- Automatically prepends a marker system reminder (`__SUBAGENT_MARKER__...`) to subagent chat messages
+- Sets `x-session-id` header for session tracking
+- Enables this proxy to infer `x-initiator: agent` for subagent-originated requests
+
+The plugin hooks into `session.created`, `session.deleted`, `chat.message`, and `chat.headers` events to provide seamless subagent marker functionality.
 
 ## Running from Source
 
diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts
index 3b5b03995..ba2b6af0e 100644
--- a/src/lib/api-config.ts
+++ b/src/lib/api-config.ts
@@ -7,7 +7,7 @@ export const standardHeaders = () => ({
   accept: "application/json",
 })
 
-const COPILOT_VERSION = "0.37.6"
+const COPILOT_VERSION = "0.38.2"
 const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}`
 const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}`
 
@@ -17,7 +17,12 @@ export const copilotBaseUrl = (state: State) =>
   state.accountType === "individual" ?
     "https://api.githubcopilot.com"
   : `https://api.${state.accountType}.githubcopilot.com`
-export const copilotHeaders = (state: State, vision: boolean = false) => {
+export const copilotHeaders = (
+  state: State,
+  requestId?: string,
+  vision: boolean = false,
+) => {
+  const requestIdValue = requestId ?? randomUUID()
   const headers: Record<string, string> = {
     Authorization: `Bearer ${state.copilotToken}`,
     "content-type": standardHeaders()["content-type"],
@@ -27,12 +32,22 @@ export const copilotHeaders = (state: State, vision: boolean = false) => {
     "user-agent": USER_AGENT,
     "openai-intent": "conversation-agent",
     "x-github-api-version": API_VERSION,
-    "x-request-id": randomUUID(),
+    "x-request-id": requestIdValue,
     "x-vscode-user-agent-library-version": "electron-fetch",
+    "x-agent-task-id": requestIdValue,
+    "x-interaction-type": "conversation-agent",
   }
 
   if (vision) headers["copilot-vision-request"] = "true"
 
+  if (state.macMachineId) {
+    headers["vscode-machineid"] = state.macMachineId
+  }
+
+  if (state.vsCodeSessionId) {
+    headers["vscode-sessionid"] = state.vsCodeSessionId
+  }
+
   return headers
 }
 
diff --git a/src/lib/config.ts b/src/lib/config.ts
index 380a270f6..129acd814 100644
--- a/src/lib/config.ts
+++ b/src/lib/config.ts
@@ -55,7 +55,7 @@ const defaultConfig: AppConfig = {
     "gpt-5.4": gpt5CommentaryPrompt,
   },
   smallModel: "gpt-5-mini",
-  responsesApiContextManagementModels: ["gpt-5.4", "gpt-5.3-codex"],
+  responsesApiContextManagementModels: [],
   modelReasoningEfforts: {
     "gpt-5-mini": "low",
     "gpt-5.3-codex": "xhigh",
diff --git a/src/lib/state.ts b/src/lib/state.ts
index 5d5bc2bb6..490ce7370 100644
--- a/src/lib/state.ts
+++ b/src/lib/state.ts
@@ -8,6 +8,9 @@ export interface State {
   models?: ModelsResponse
   vsCodeVersion?: string
 
+  macMachineId?: string
+  vsCodeSessionId?: string
+
   manualApprove: boolean
   rateLimitWait: boolean
   showToken: boolean
diff --git a/src/lib/utils.ts b/src/lib/utils.ts
index cc80be667..81317abf7 100644
--- a/src/lib/utils.ts
+++ b/src/lib/utils.ts
@@ -1,4 +1,10 @@
+import type { Context } from "hono"
+
 import consola from "consola"
+import { createHash, randomUUID } from "node:crypto"
+import { networkInterfaces } from "node:os"
+
+import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
 
 import { getModels } from "~/services/copilot/get-models"
 import { getVSCodeVersion } from "~/services/get-vscode-version"
@@ -24,3 +30,154 @@ export const cacheVSCodeVersion = async () => {
 
   consola.info(`Using VSCode version: ${response}`)
 }
+
+const invalidMacAddresses = new Set([
+  "00:00:00:00:00:00",
+  "ff:ff:ff:ff:ff:ff",
+  "ac:de:48:00:11:22",
+])
+
+function validateMacAddress(candidate: string): boolean {
+  const tempCandidate = candidate.replaceAll("-", ":").toLowerCase()
+  return !invalidMacAddresses.has(tempCandidate)
+}
+
+export function getMac(): string | null {
+  const ifaces = networkInterfaces()
+  // eslint-disable-next-line guard-for-in
+  for (const name in ifaces) {
+    const networkInterface = ifaces[name]
+    if (networkInterface) {
+      for (const { mac } of networkInterface) {
+        if (validateMacAddress(mac)) {
+          return mac
+        }
+      }
+    }
+  }
+  return null
+}
+
+export const cacheMacMachineId = () => {
+  const macAddress = getMac() ?? randomUUID()
+  state.macMachineId = createHash("sha256")
+    .update(macAddress, "utf8")
+    .digest("hex")
+  consola.debug(`Using machine ID: ${state.macMachineId}`)
+}
+
+const SESSION_REFRESH_BASE_MS = 60 * 60 * 1000
+const SESSION_REFRESH_JITTER_MS = 20 * 60 * 1000
+let vsCodeSessionRefreshTimer: ReturnType<typeof setTimeout> | null = null
+
+const generateSessionId = () => {
+  state.vsCodeSessionId = randomUUID() + Date.now().toString()
+  consola.debug(`Generated VSCode session ID: ${state.vsCodeSessionId}`)
+}
+
+export const stopVsCodeSessionRefreshLoop = () => {
+  if (vsCodeSessionRefreshTimer) {
+    clearTimeout(vsCodeSessionRefreshTimer)
+    vsCodeSessionRefreshTimer = null
+  }
+}
+
+const scheduleSessionIdRefresh = () => {
+  const randomDelay = Math.floor(Math.random() * SESSION_REFRESH_JITTER_MS)
+  const delay = SESSION_REFRESH_BASE_MS + randomDelay
+  consola.debug(
+    `Scheduling next VSCode session ID refresh in ${Math.round(
+      delay / 1000,
+    )} seconds`,
+  )
+
+  stopVsCodeSessionRefreshLoop()
+  vsCodeSessionRefreshTimer = setTimeout(() => {
+    try {
+      generateSessionId()
+    } catch (error) {
+      consola.error("Failed to refresh session ID, rescheduling...", error)
+    } finally {
+      scheduleSessionIdRefresh()
+    }
+  }, delay)
+}
+
+export const cacheVsCodeSessionId = () => {
+  stopVsCodeSessionRefreshLoop()
+  generateSessionId()
+  scheduleSessionIdRefresh()
+}
+
+interface PayloadMessage {
+  role?: string
+  content?: string | Array<{ type?: string; text?: string }> | null
+  type?: string
+}
+
+const findLastUserContent = (
+  messages: Array<PayloadMessage>,
+): string | null => {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i]
+    if (msg.role === "user" && msg.content) {
+      if (typeof msg.content === "string") {
+        return msg.content
+      } else if (Array.isArray(msg.content)) {
+        const array = msg.content
+          .filter((n) => n.type !== "tool_result")
+          .map((n) => ({ ...n, cache_control: undefined }))
+        if (array.length > 0) {
+          return JSON.stringify(array)
+        }
+      }
+    }
+  }
+  return null
+}
+
+export const generateRequestIdFromPayload = (
+  payload: {
+    messages: string | Array<PayloadMessage> | undefined
+  },
+  sessionId?: string,
+): string => {
+  const messages = payload.messages
+  if (messages) {
+    const lastUserContent =
+      typeof messages === "string" ? messages : findLastUserContent(messages)
+
+    if (lastUserContent) {
+      return getUUID(
+        (sessionId ?? "") + (state.macMachineId ?? "") + lastUserContent,
+      )
+    }
+  }
+
+  return randomUUID()
+}
+
+export const getRootSessionId = (
+  anthropicPayload: AnthropicMessagesPayload,
+  c: Context,
+): string | undefined => {
+  let sessionId: string | undefined
+  if (anthropicPayload.metadata?.user_id) {
+    const sessionMatch = new RegExp(/_session_(.+)$/).exec(
+      anthropicPayload.metadata.user_id,
+    )
+    sessionId = sessionMatch ? sessionMatch[1] : undefined
+  } else {
+    sessionId = c.req.header("x-session-id")
+  }
+  if (sessionId) {
+    return getUUID(sessionId)
+  }
+  return sessionId
+}
+
+export const getUUID = (content: string): string => {
+  const hash = createHash("sha256").update(content).digest("hex")
+  const hash32 = hash.slice(0, 32)
+  return `${hash32.slice(0, 8)}-${hash32.slice(8, 12)}-${hash32.slice(12, 16)}-${hash32.slice(16, 20)}-${hash32.slice(20)}`
+}
diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
index 3a037a523..842a499f2 100644
--- a/src/routes/chat-completions/handler.ts
+++ b/src/routes/chat-completions/handler.ts
@@ -7,7 +7,7 @@ import { createHandlerLogger } from "~/lib/logger"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
-import { isNullish } from "~/lib/utils"
+import { generateRequestIdFromPayload, getUUID, isNullish } from "~/lib/utils"
 import {
   createChatCompletions,
   type ChatCompletionResponse,
@@ -49,7 +49,17 @@ export async function handleCompletion(c: Context) {
     logger.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
   }
 
-  const response = await createChatCompletions(payload)
+  // not support subagent marker for now , set sessionId = getUUID(requestId)
+  const requestId = generateRequestIdFromPayload(payload)
+  logger.debug("Generated request ID:", requestId)
+
+  const sessionId = getUUID(requestId)
+  logger.debug("Extracted session ID:", sessionId)
+
+  const response = await createChatCompletions(payload, {
+    requestId,
+    sessionId,
+  })
 
   if (isNonStreaming(response)) {
     logger.debug("Non-streaming response:", JSON.stringify(response))
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 469d494c9..434c9cd56 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -13,6 +13,7 @@ import {
 import { createHandlerLogger } from "~/lib/logger"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
+import { generateRequestIdFromPayload, getRootSessionId } from "~/lib/utils"
 import {
   buildErrorEvent,
   createResponsesStreamState,
@@ -39,6 +40,8 @@ import {
   type ResponseStreamEvent,
 } from "~/services/copilot/create-responses"
 
+import type { SubagentMarker } from "./subagent-marker"
+
 import {
   type AnthropicMessagesPayload,
   type AnthropicStreamState,
@@ -64,11 +67,13 @@ export async function handleCompletion(c: Context) {
   logger.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
 
   const subagentMarker = parseSubagentMarkerFromFirstUser(anthropicPayload)
-  const initiatorOverride = subagentMarker ? "agent" : undefined
   if (subagentMarker) {
     logger.debug("Detected Subagent marker:", JSON.stringify(subagentMarker))
   }
 
+  const sessionId = getRootSessionId(anthropicPayload, c)
+  logger.debug("Extracted session ID:", sessionId)
+
   // claude code and opencode compact request detection
   const isCompact = isCompactRequest(anthropicPayload)
 
@@ -95,6 +100,9 @@ export async function handleCompletion(c: Context) {
     mergeToolResultForClaude(anthropicPayload)
   }
 
+  const requestId = generateRequestIdFromPayload(anthropicPayload, sessionId)
+  logger.debug("Generated request ID:", requestId)
+
   if (state.manualApprove) {
     await awaitApproval()
   }
@@ -106,19 +114,27 @@ export async function handleCompletion(c: Context) {
   if (shouldUseMessagesApi(selectedModel)) {
     return await handleWithMessagesApi(c, anthropicPayload, {
       anthropicBetaHeader: anthropicBeta,
-      initiatorOverride,
+      subagentMarker,
       selectedModel,
+      requestId,
+      sessionId,
     })
   }
 
   if (shouldUseResponsesApi(selectedModel)) {
     return await handleWithResponsesApi(c, anthropicPayload, {
-      initiatorOverride,
+      subagentMarker,
       selectedModel,
+      requestId,
+      sessionId,
     })
   }
 
-  return await handleWithChatCompletions(c, anthropicPayload, initiatorOverride)
+  return await handleWithChatCompletions(c, anthropicPayload, {
+    subagentMarker,
+    requestId,
+    sessionId,
+  })
 }
 
 const RESPONSES_ENDPOINT = "/responses"
@@ -127,8 +143,13 @@ const MESSAGES_ENDPOINT = "/v1/messages"
 const handleWithChatCompletions = async (
   c: Context,
   anthropicPayload: AnthropicMessagesPayload,
-  initiatorOverride?: "agent" | "user",
+  options: {
+    subagentMarker?: SubagentMarker | null
+    requestId: string
+    sessionId?: string
+  },
 ) => {
+  const { subagentMarker, requestId, sessionId } = options
   const openAIPayload = translateToOpenAI(anthropicPayload)
   logger.debug(
     "Translated OpenAI request payload:",
@@ -136,7 +157,9 @@ const handleWithChatCompletions = async (
   )
 
   const response = await createChatCompletions(openAIPayload, {
-    initiator: initiatorOverride,
+    subagentMarker,
+    requestId,
+    sessionId,
   })
 
   if (isNonStreaming(response)) {
@@ -188,12 +211,14 @@ const handleWithChatCompletions = async (
 const handleWithResponsesApi = async (
   c: Context,
   anthropicPayload: AnthropicMessagesPayload,
-  options?: {
-    initiatorOverride?: "agent" | "user"
+  options: {
+    subagentMarker?: SubagentMarker | null
     selectedModel?: Model
+    requestId: string
+    sessionId?: string
   },
 ) => {
-  const { initiatorOverride, selectedModel } = options ?? {}
+  const { subagentMarker, selectedModel, requestId, sessionId } = options
 
   const responsesPayload =
     translateAnthropicMessagesToResponsesPayload(anthropicPayload)
@@ -213,7 +238,10 @@ const handleWithResponsesApi = async (
   const { vision, initiator } = getResponsesRequestOptions(responsesPayload)
   const response = await createResponses(responsesPayload, {
     vision,
-    initiator: initiatorOverride ?? initiator,
+    initiator: initiator,
+    subagentMarker,
+    requestId,
+    sessionId,
   })
 
   if (responsesPayload.stream && isAsyncIterable(response)) {
@@ -286,14 +314,21 @@ const handleWithResponsesApi = async (
 const handleWithMessagesApi = async (
   c: Context,
   anthropicPayload: AnthropicMessagesPayload,
-  options?: {
+  options: {
     anthropicBetaHeader?: string
-    initiatorOverride?: "agent" | "user"
+    subagentMarker?: SubagentMarker | null
     selectedModel?: Model
+    requestId: string
+    sessionId?: string
   },
 ) => {
-  const { anthropicBetaHeader, initiatorOverride, selectedModel } =
-    options ?? {}
+  const {
+    anthropicBetaHeader,
+    subagentMarker,
+    selectedModel,
+    requestId,
+    sessionId,
+  } = options
   // Pre-request processing: filter thinking blocks for Claude models so only
   // valid thinking blocks are sent to the Copilot Messages API.
   for (const msg of anthropicPayload.messages) {
@@ -322,7 +357,9 @@ const handleWithMessagesApi = async (
   logger.debug("Translated Messages payload:", JSON.stringify(anthropicPayload))
 
   const response = await createMessages(anthropicPayload, anthropicBetaHeader, {
-    initiator: initiatorOverride,
+    subagentMarker,
+    requestId,
+    sessionId,
   })
 
   if (isAsyncIterable(response)) {
diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
index b7e356bf2..ed8ccefe6 100644
--- a/src/routes/responses/handler.ts
+++ b/src/routes/responses/handler.ts
@@ -7,6 +7,7 @@ import { getConfig } from "~/lib/config"
 import { createHandlerLogger } from "~/lib/logger"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
+import { generateRequestIdFromPayload, getUUID } from "~/lib/utils"
 import {
   createResponses,
   type ResponsesPayload,
@@ -30,6 +31,13 @@ export const handleResponses = async (c: Context) => {
   const payload = await c.req.json<ResponsesPayload>()
   logger.debug("Responses request payload:", JSON.stringify(payload))
 
+  // not support subagent marker for now , set sessionId = getUUID(requestId)
+  const requestId = generateRequestIdFromPayload({ messages: payload.input })
+  logger.debug("Generated request ID:", requestId)
+
+  const sessionId = getUUID(requestId)
+  logger.debug("Extracted session ID:", sessionId)
+
   useFunctionApplyPatch(payload)
 
   // Remove web_search tool as it's not supported by GitHub Copilot
@@ -69,7 +77,12 @@ export const handleResponses = async (c: Context) => {
     await awaitApproval()
   }
 
-  const response = await createResponses(payload, { vision, initiator })
+  const response = await createResponses(payload, {
+    vision,
+    initiator,
+    requestId,
+    sessionId: sessionId,
+  })
 
   if (isStreamingRequested(payload) && isAsyncIterable(response)) {
     logger.debug("Forwarding native Responses stream")
diff --git a/src/routes/responses/utils.ts b/src/routes/responses/utils.ts
index b5a7fbc0e..be2b31a2c 100644
--- a/src/routes/responses/utils.ts
+++ b/src/routes/responses/utils.ts
@@ -38,7 +38,7 @@ export const resolveResponsesCompactThreshold = (
   maxPromptTokens?: number,
 ): number => {
   if (typeof maxPromptTokens === "number" && maxPromptTokens > 0) {
-    return Math.floor(maxPromptTokens * 0.75)
+    return Math.floor(maxPromptTokens * 0.9)
   }
 
   return 50000
diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts
index 19b4512cd..661d07ff5 100644
--- a/src/services/copilot/create-chat-completions.ts
+++ b/src/services/copilot/create-chat-completions.ts
@@ -1,14 +1,18 @@
 import consola from "consola"
 import { events } from "fetch-event-stream"
 
+import type { SubagentMarker } from "~/routes/messages/subagent-marker"
+
 import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
 export const createChatCompletions = async (
   payload: ChatCompletionsPayload,
-  options?: {
-    initiator?: "agent" | "user"
+  options: {
+    subagentMarker?: SubagentMarker | null
+    requestId: string
+    sessionId?: string
   },
 ) => {
   if (!state.copilotToken) throw new Error("Copilot token not found")
@@ -19,7 +23,7 @@ export const createChatCompletions = async (
       && x.content?.some((x) => x.type === "image_url"),
   )
 
-  // Agent/user check for X-Initiator header
+  // Agent/user check for x-initiator header
   // Determine if any message is from an agent ("assistant" or "tool")
   // Refactor `isAgentCall` logic to check only the last message in the history rather than any message. This prevents valid user messages from being incorrectly flagged as agent calls due to previous assistant history, ensuring proper credit consumption for multi-turn conversations.
   let isAgentCall = false
@@ -30,10 +34,19 @@ export const createChatCompletions = async (
     }
   }
 
-  // Build headers and add X-Initiator
+  // Build headers and add x-initiator
   const headers: Record<string, string> = {
-    ...copilotHeaders(state, enableVision),
-    "X-Initiator": options?.initiator ?? (isAgentCall ? "agent" : "user"),
+    ...copilotHeaders(state, options.requestId, enableVision),
+    "x-initiator": isAgentCall ? "agent" : "user",
+  }
+
+  if (options.subagentMarker) {
+    headers["x-initiator"] = "agent"
+    headers["x-interaction-type"] = "conversation-subagent"
+  }
+
+  if (options.sessionId) {
+    headers["x-interaction-id"] = options.sessionId
   }
 
   const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
diff --git a/src/services/copilot/create-messages.ts b/src/services/copilot/create-messages.ts
index adbcfe319..a6a27b312 100644
--- a/src/services/copilot/create-messages.ts
+++ b/src/services/copilot/create-messages.ts
@@ -5,6 +5,7 @@ import type {
   AnthropicMessagesPayload,
   AnthropicResponse,
 } from "~/routes/messages/anthropic-types"
+import type { SubagentMarker } from "~/routes/messages/subagent-marker"
 
 import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
@@ -54,9 +55,11 @@ const buildAnthropicBetaHeader = (
 
 export const createMessages = async (
   payload: AnthropicMessagesPayload,
-  anthropicBetaHeader?: string,
-  options?: {
-    initiator?: "agent" | "user"
+  anthropicBetaHeader: string | undefined,
+  options: {
+    subagentMarker?: SubagentMarker | null
+    requestId: string
+    sessionId?: string
   },
 ): Promise<CreateMessagesReturn> => {
   if (!state.copilotToken) throw new Error("Copilot token not found")
@@ -75,11 +78,19 @@ export const createMessages = async (
         lastMessage.content.some((block) => block.type !== "tool_result")
       : true
   }
-  const initiator = options?.initiator ?? (isInitiateRequest ? "user" : "agent")
 
   const headers: Record<string, string> = {
-    ...copilotHeaders(state, enableVision),
-    "X-Initiator": initiator,
+    ...copilotHeaders(state, options.requestId, enableVision),
+    "x-initiator": isInitiateRequest ? "user" : "agent",
+  }
+
+  if (options.subagentMarker) {
+    headers["x-initiator"] = "agent"
+    headers["x-interaction-type"] = "conversation-subagent"
+  }
+
+  if (options.sessionId) {
+    headers["x-interaction-id"] = options.sessionId
   }
 
   // align with vscode copilot extension anthropic-beta
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
index baa0ee1a8..1679578c3 100644
--- a/src/services/copilot/create-responses.ts
+++ b/src/services/copilot/create-responses.ts
@@ -1,6 +1,8 @@
 import consola from "consola"
 import { events } from "fetch-event-stream"
 
+import type { SubagentMarker } from "~/routes/messages/subagent-marker"
+
 import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
@@ -348,17 +350,35 @@ export type CreateResponsesReturn = ResponsesResult | ResponsesStream
 interface ResponsesRequestOptions {
   vision: boolean
   initiator: "agent" | "user"
+  subagentMarker?: SubagentMarker | null
+  requestId: string
+  sessionId?: string
 }
 
 export const createResponses = async (
   payload: ResponsesPayload,
-  { vision, initiator }: ResponsesRequestOptions,
+  {
+    vision,
+    initiator,
+    subagentMarker,
+    requestId,
+    sessionId,
+  }: ResponsesRequestOptions,
 ): Promise<CreateResponsesReturn> => {
   if (!state.copilotToken) throw new Error("Copilot token not found")
 
   const headers: Record<string, string> = {
-    ...copilotHeaders(state, vision),
-    "X-Initiator": initiator,
+    ...copilotHeaders(state, requestId, vision),
+    "x-initiator": initiator,
+  }
+
+  if (subagentMarker) {
+    headers["x-initiator"] = "agent"
+    headers["x-interaction-type"] = "conversation-subagent"
+  }
+
+  if (sessionId) {
+    headers["x-interaction-id"] = sessionId
   }
 
   // service_tier is not supported by github copilot
diff --git a/src/services/get-vscode-version.ts b/src/services/get-vscode-version.ts
index 5f8f8a598..dfff314e9 100644
--- a/src/services/get-vscode-version.ts
+++ b/src/services/get-vscode-version.ts
@@ -1,4 +1,4 @@
-const FALLBACK = "1.109.3"
+const FALLBACK = "1.110.1"
 
 export async function getVSCodeVersion() {
   await Promise.resolve()
diff --git a/src/start.ts b/src/start.ts
index 85bfe4c48..037244a4e 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -12,7 +12,12 @@ import { initProxyFromEnv } from "./lib/proxy"
 import { generateEnvScript } from "./lib/shell"
 import { state } from "./lib/state"
 import { setupCopilotToken, setupGitHubToken } from "./lib/token"
-import { cacheModels, cacheVSCodeVersion } from "./lib/utils"
+import {
+  cacheMacMachineId,
+  cacheModels,
+  cacheVSCodeVersion,
+  cacheVsCodeSessionId,
+} from "./lib/utils"
 
 interface RunServerOptions {
   port: number
@@ -53,6 +58,8 @@ export async function runServer(options: RunServerOptions): Promise<void> {
 
   await ensurePaths()
   await cacheVSCodeVersion()
+  cacheMacMachineId()
+  cacheVsCodeSessionId()
 
   if (options.githubToken) {
     state.githubToken = options.githubToken
diff --git a/tests/create-chat-completions.test.ts b/tests/create-chat-completions.test.ts
index d18e741aa..369f6d2cc 100644
--- a/tests/create-chat-completions.test.ts
+++ b/tests/create-chat-completions.test.ts
@@ -23,7 +23,7 @@ const fetchMock = mock(
 // @ts-expect-error - Mock fetch doesn't implement all fetch properties
 ;(globalThis as unknown as { fetch: typeof fetch }).fetch = fetchMock
 
-test("sets X-Initiator to agent if tool/assistant present", async () => {
+test("sets x-initiator to agent if tool/assistant present", async () => {
   const payload: ChatCompletionsPayload = {
     messages: [
       { role: "user", content: "hi" },
@@ -31,15 +31,15 @@ test("sets X-Initiator to agent if tool/assistant present", async () => {
     ],
     model: "gpt-test",
   }
-  await createChatCompletions(payload)
+  await createChatCompletions(payload, { requestId: "1" })
   expect(fetchMock).toHaveBeenCalled()
   const headers = (
     fetchMock.mock.calls[0][1] as { headers: Record<string, string> }
   ).headers
-  expect(headers["X-Initiator"]).toBe("agent")
+  expect(headers["x-initiator"]).toBe("agent")
 })
 
-test("sets X-Initiator to user if only user present", async () => {
+test("sets x-initiator to user if only user present", async () => {
   const payload: ChatCompletionsPayload = {
     messages: [
       { role: "user", content: "hi" },
@@ -47,10 +47,10 @@ test("sets X-Initiator to user if only user present", async () => {
     ],
     model: "gpt-test",
   }
-  await createChatCompletions(payload)
+  await createChatCompletions(payload, { requestId: "1" })
   expect(fetchMock).toHaveBeenCalled()
   const headers = (
     fetchMock.mock.calls[1][1] as { headers: Record<string, string> }
   ).headers
-  expect(headers["X-Initiator"]).toBe("user")
+  expect(headers["x-initiator"]).toBe("user")
 })

From 8374c20e375b21ee8b3476b2ac3ffdda8839cb08 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Wed, 11 Mar 2026 13:02:24 +0800
Subject: [PATCH 52/62] feat: enhance adaptive thinking handling in messages
 API to prevent errors with tool choices

---
 src/routes/messages/handler.ts | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 434c9cd56..7cbaa907a 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -345,7 +345,12 @@ const handleWithMessagesApi = async (
     }
   }
 
-  if (selectedModel?.capabilities.supports.adaptive_thinking) {
+  // https://platform.claude.com/docs/en/build-with-claude/extended-thinking#extended-thinking-with-tool-use
+  // Using tool_choice: {"type": "any"} or tool_choice: {"type": "tool", "name": "..."} will result in an error because these options force tool use, which is incompatible with extended thinking.
+  const toolChoice = anthropicPayload.tool_choice
+  const disableThink = toolChoice?.type === "any" || toolChoice?.type === "tool"
+
+  if (selectedModel?.capabilities.supports.adaptive_thinking && !disableThink) {
     anthropicPayload.thinking = {
       type: "adaptive",
     }

From ca803ba65790f565a29f18941b4318c53ffd6b2b Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Thu, 12 Mar 2026 17:25:07 +0800
Subject: [PATCH 53/62] feat: implement opencode OAuth handling

---
 README.md                                     |  23 ++++
 src/lib/api-config.ts                         | 120 +++++++++++++++++-
 src/lib/paths.ts                              |  14 +-
 src/lib/token.ts                              |  64 +++++++++-
 .../copilot/create-chat-completions.ts        |  19 +--
 src/services/copilot/create-messages.ts       |  19 +--
 src/services/copilot/create-responses.ts      |  15 +--
 src/services/github/get-copilot-token.ts      |   4 +-
 src/services/github/get-copilot-usage.ts      |  11 +-
 src/services/github/get-device-code.ts        |  18 ++-
 src/services/github/get-user.ts               |   4 +-
 src/services/github/poll-access-token.ts      |  30 ++---
 12 files changed, 267 insertions(+), 74 deletions(-)

diff --git a/README.md b/README.md
index 456b3b8e1..fb31db5ef 100644
--- a/README.md
+++ b/README.md
@@ -39,6 +39,7 @@ A reverse-engineered proxy for the GitHub Copilot API that exposes it as an Open
 - **Token Visibility**: Option to display GitHub and Copilot tokens during authentication and refresh for debugging (`--show-token`).
 - **Flexible Authentication**: Authenticate interactively or provide a GitHub token directly, suitable for CI/CD environments.
 - **Support for Different Account Types**: Works with individual, business, and enterprise GitHub Copilot plans.
+- **Opencode OAuth Support**: Use opencode GitHub Copilot authentication by setting `COPILOT_API_OAUTH_APP=opencode` environment variable.
 
 ## Demo
 
@@ -302,6 +303,28 @@ npx copilot-api@latest debug --json
 
 # Initialize proxy from environment variables (HTTP_PROXY, HTTPS_PROXY, etc.)
 npx copilot-api@latest start --proxy-env
+
+# Use opencode GitHub Copilot authentication
+COPILOT_API_OAUTH_APP=opencode npx @jeffreycao/copilot-api@latest start
+```
+
+### Opencode OAuth Authentication
+
+You can use opencode GitHub Copilot authentication instead of the default one:
+
+```sh
+# Set environment variable before running any command
+export COPILOT_API_OAUTH_APP=opencode
+
+# Then run start or auth commands
+npx @jeffreycao/copilot-api@latest start
+npx @jeffreycao/copilot-api@latest auth
+```
+
+Or use inline environment variable:
+
+```sh
+COPILOT_API_OAUTH_APP=opencode npx @jeffreycao/copilot-api@latest start
 ```
 
 ## Using the Usage Viewer
diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts
index ba2b6af0e..d0bb67b90 100644
--- a/src/lib/api-config.ts
+++ b/src/lib/api-config.ts
@@ -2,6 +2,98 @@ import { randomUUID } from "node:crypto"
 
 import type { State } from "./state"
 
+export const isOpencodeOauthApp = (): boolean => {
+  return process.env.COPILOT_API_OAUTH_APP === "opencode"
+}
+
+export const normalizeDomain = (input: string): string => {
+  return input
+    .trim()
+    .replace(/^https?:\/\//u, "")
+    .replace(/\/+$/u, "")
+}
+
+export const getEnterpriseDomain = (): string | null => {
+  const raw = (process.env.COPILOT_API_ENTERPRISE_URL ?? "").trim()
+  if (!raw) return null
+  const normalized = normalizeDomain(raw)
+  return normalized || null
+}
+
+export const getGitHubBaseUrl = (): string => {
+  const resolvedDomain = getEnterpriseDomain()
+  return resolvedDomain ? `https://${resolvedDomain}` : GITHUB_BASE_URL
+}
+
+export const getGitHubApiBaseUrl = (): string => {
+  const resolvedDomain = getEnterpriseDomain()
+  return resolvedDomain ?
+      `https://${resolvedDomain}/api/v3`
+    : GITHUB_API_BASE_URL
+}
+
+export const getOpencodeOauthHeaders = (): Record<string, string> => {
+  return {
+    Accept: "application/json",
+    "Content-Type": "application/json",
+    "User-Agent":
+      "opencode/1.2.16 ai-sdk/provider-utils/3.0.21 runtime/bun/1.3.10, opencode/1.2.16",
+  }
+}
+
+export const getOauthUrls = (): {
+  deviceCodeUrl: string
+  accessTokenUrl: string
+} => {
+  const githubBaseUrl = getGitHubBaseUrl()
+
+  return {
+    deviceCodeUrl: `${githubBaseUrl}/login/device/code`,
+    accessTokenUrl: `${githubBaseUrl}/login/oauth/access_token`,
+  }
+}
+
+interface OauthAppConfig {
+  clientId: string
+  headers: Record<string, string>
+  scope: string
+}
+
+export const getOauthAppConfig = (): OauthAppConfig => {
+  if (isOpencodeOauthApp()) {
+    return {
+      clientId: OPENCODE_GITHUB_CLIENT_ID,
+      headers: getOpencodeOauthHeaders(),
+      scope: GITHUB_APP_SCOPES,
+    }
+  }
+
+  return {
+    clientId: GITHUB_CLIENT_ID,
+    headers: standardHeaders(),
+    scope: GITHUB_APP_SCOPES,
+  }
+}
+
+export const prepareInteractionHeaders = (
+  sessionId: string | undefined,
+  isSubagent: boolean,
+  headers: Record<string, string>,
+) => {
+  const sendInteractionHeaders = !isOpencodeOauthApp()
+
+  if (isSubagent) {
+    headers["x-initiator"] = "agent"
+    if (sendInteractionHeaders) {
+      headers["x-interaction-type"] = "conversation-subagent"
+    }
+  }
+
+  if (sessionId && sendInteractionHeaders) {
+    headers["x-interaction-id"] = sessionId
+  }
+}
+
 export const standardHeaders = () => ({
   "content-type": "application/json",
   accept: "application/json",
@@ -13,15 +105,34 @@ const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}`
 
 const API_VERSION = "2025-10-01"
 
-export const copilotBaseUrl = (state: State) =>
-  state.accountType === "individual" ?
-    "https://api.githubcopilot.com"
-  : `https://api.${state.accountType}.githubcopilot.com`
+export const copilotBaseUrl = (state: State) => {
+  const enterpriseDomain = getEnterpriseDomain()
+  if (enterpriseDomain) {
+    return `https://copilot-api.${enterpriseDomain}`
+  }
+
+  return state.accountType === "individual" ?
+      "https://api.githubcopilot.com"
+    : `https://api.${state.accountType}.githubcopilot.com`
+}
+
 export const copilotHeaders = (
   state: State,
   requestId?: string,
   vision: boolean = false,
 ) => {
+  if (isOpencodeOauthApp()) {
+    const headers: Record<string, string> = {
+      Authorization: `Bearer ${state.copilotToken}`,
+      ...getOpencodeOauthHeaders(),
+      "Openai-Intent": "conversation-edits",
+    }
+
+    if (vision) headers["Copilot-Vision-Request"] = "true"
+
+    return headers
+  }
+
   const requestIdValue = requestId ?? randomUUID()
   const headers: Record<string, string> = {
     Authorization: `Bearer ${state.copilotToken}`,
@@ -65,3 +176,4 @@ export const githubHeaders = (state: State) => ({
 export const GITHUB_BASE_URL = "https://github.com"
 export const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98"
 export const GITHUB_APP_SCOPES = ["read:user"].join(" ")
+export const OPENCODE_GITHUB_CLIENT_ID = "Ov23li8tweQw6odWQebz"
diff --git a/src/lib/paths.ts b/src/lib/paths.ts
index e85c21d8a..49733a046 100644
--- a/src/lib/paths.ts
+++ b/src/lib/paths.ts
@@ -2,9 +2,17 @@ import fs from "node:fs/promises"
 import os from "node:os"
 import path from "node:path"
 
-const APP_DIR = path.join(os.homedir(), ".local", "share", "copilot-api")
+const AUTH_APP = process.env.COPILOT_API_OAUTH_APP || ""
+const ENTERPRISE_PREFIX = process.env.COPILOT_API_ENTERPRISE_URL ? "ent_" : ""
 
-const GITHUB_TOKEN_PATH = path.join(APP_DIR, "github_token")
+const DEFAULT_DIR = path.join(os.homedir(), ".local", "share", "copilot-api")
+const APP_DIR = process.env.COPILOT_API_HOME || DEFAULT_DIR
+
+const GITHUB_TOKEN_PATH = path.join(
+  APP_DIR,
+  AUTH_APP,
+  ENTERPRISE_PREFIX + "github_token",
+)
 const CONFIG_PATH = path.join(APP_DIR, "config.json")
 
 export const PATHS = {
@@ -14,7 +22,7 @@ export const PATHS = {
 }
 
 export async function ensurePaths(): Promise<void> {
-  await fs.mkdir(PATHS.APP_DIR, { recursive: true })
+  await fs.mkdir(path.join(PATHS.APP_DIR, AUTH_APP), { recursive: true })
   await ensureFile(PATHS.GITHUB_TOKEN_PATH)
   await ensureFile(PATHS.CONFIG_PATH)
 }
diff --git a/src/lib/token.ts b/src/lib/token.ts
index fc8d2785f..a46a660e8 100644
--- a/src/lib/token.ts
+++ b/src/lib/token.ts
@@ -1,6 +1,8 @@
 import consola from "consola"
 import fs from "node:fs/promises"
+import { setTimeout as delay } from "node:timers/promises"
 
+import { isOpencodeOauthApp } from "~/lib/api-config"
 import { PATHS } from "~/lib/paths"
 import { getCopilotToken } from "~/services/github/get-copilot-token"
 import { getDeviceCode } from "~/services/github/get-device-code"
@@ -10,12 +12,37 @@ import { pollAccessToken } from "~/services/github/poll-access-token"
 import { HTTPError } from "./error"
 import { state } from "./state"
 
+let copilotRefreshLoopController: AbortController | null = null
+
+export const stopCopilotRefreshLoop = () => {
+  if (!copilotRefreshLoopController) {
+    return
+  }
+
+  copilotRefreshLoopController.abort()
+  copilotRefreshLoopController = null
+}
+
 const readGithubToken = () => fs.readFile(PATHS.GITHUB_TOKEN_PATH, "utf8")
 
 const writeGithubToken = (token: string) =>
   fs.writeFile(PATHS.GITHUB_TOKEN_PATH, token)
 
 export const setupCopilotToken = async () => {
+  if (isOpencodeOauthApp()) {
+    if (!state.githubToken) throw new Error(`opencode token not found`)
+
+    state.copilotToken = state.githubToken
+
+    consola.debug("GitHub Copilot token set from opencode auth token")
+    if (state.showToken) {
+      consola.info("Copilot token:", state.copilotToken)
+    }
+
+    stopCopilotRefreshLoop()
+    return
+  }
+
   const { token, refresh_in } = await getCopilotToken()
   state.copilotToken = token
 
@@ -25,21 +52,48 @@ export const setupCopilotToken = async () => {
     consola.info("Copilot token:", token)
   }
 
-  const refreshInterval = (refresh_in - 60) * 1000
-  setInterval(async () => {
+  stopCopilotRefreshLoop()
+
+  const controller = new AbortController()
+  copilotRefreshLoopController = controller
+
+  runCopilotRefreshLoop(refresh_in, controller.signal)
+    .catch(() => {
+      consola.warn("Copilot token refresh loop stopped")
+    })
+    .finally(() => {
+      if (copilotRefreshLoopController === controller) {
+        copilotRefreshLoopController = null
+      }
+    })
+}
+
+const runCopilotRefreshLoop = async (
+  refreshIn: number,
+  signal: AbortSignal,
+) => {
+  let nextRefreshDelayMs = (refreshIn - 60) * 1000
+
+  while (!signal.aborted) {
+    await delay(nextRefreshDelayMs, undefined, { signal })
+
     consola.debug("Refreshing Copilot token")
+
     try {
-      const { token } = await getCopilotToken()
+      const { token, refresh_in } = await getCopilotToken()
       state.copilotToken = token
       consola.debug("Copilot token refreshed")
       if (state.showToken) {
         consola.info("Refreshed Copilot token:", token)
       }
+
+      nextRefreshDelayMs = (refresh_in - 60) * 1000
     } catch (error) {
       consola.error("Failed to refresh Copilot token:", error)
-      throw error
+      nextRefreshDelayMs = 15_000
+      consola.warn(`Retrying Copilot token refresh in ${nextRefreshDelayMs}ms`)
     }
-  }, refreshInterval)
+  }
 }
 
 interface SetupGitHubTokenOptions {
diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts
index 661d07ff5..d23dcde11 100644
--- a/src/services/copilot/create-chat-completions.ts
+++ b/src/services/copilot/create-chat-completions.ts
@@ -3,7 +3,11 @@ import { events } from "fetch-event-stream"
 
 import type { SubagentMarker } from "~/routes/messages/subagent-marker"
 
-import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config"
+import {
+  copilotBaseUrl,
+  copilotHeaders,
+  prepareInteractionHeaders,
+} from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
@@ -40,14 +44,11 @@ export const createChatCompletions = async (
     "x-initiator": isAgentCall ? "agent" : "user",
   }
 
-  if (options.subagentMarker) {
-    headers["x-initiator"] = "agent"
-    headers["x-interaction-type"] = "conversation-subagent"
-  }
-
-  if (options.sessionId) {
-    headers["x-interaction-id"] = options.sessionId
-  }
+  prepareInteractionHeaders(
+    options.sessionId,
+    Boolean(options.subagentMarker),
+    headers,
+  )
 
   const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
     method: "POST",
diff --git a/src/services/copilot/create-messages.ts b/src/services/copilot/create-messages.ts
index a6a27b312..ab910054b 100644
--- a/src/services/copilot/create-messages.ts
+++ b/src/services/copilot/create-messages.ts
@@ -7,7 +7,11 @@ import type {
 } from "~/routes/messages/anthropic-types"
 import type { SubagentMarker } from "~/routes/messages/subagent-marker"
 
-import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config"
+import {
+  copilotBaseUrl,
+  copilotHeaders,
+  prepareInteractionHeaders,
+} from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
@@ -84,14 +88,11 @@ export const createMessages = async (
     "x-initiator": isInitiateRequest ? "user" : "agent",
   }
 
-  if (options.subagentMarker) {
-    headers["x-initiator"] = "agent"
-    headers["x-interaction-type"] = "conversation-subagent"
-  }
-
-  if (options.sessionId) {
-    headers["x-interaction-id"] = options.sessionId
-  }
+  prepareInteractionHeaders(
+    options.sessionId,
+    Boolean(options.subagentMarker),
+    headers,
+  )
 
   // align with vscode copilot extension anthropic-beta
   const anthropicBeta = buildAnthropicBetaHeader(
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
index 1679578c3..a9b726a34 100644
--- a/src/services/copilot/create-responses.ts
+++ b/src/services/copilot/create-responses.ts
@@ -3,7 +3,11 @@ import { events } from "fetch-event-stream"
 
 import type { SubagentMarker } from "~/routes/messages/subagent-marker"
 
-import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config"
+import {
+  copilotBaseUrl,
+  copilotHeaders,
+  prepareInteractionHeaders,
+} from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
@@ -372,14 +376,7 @@ export const createResponses = async (
     "x-initiator": initiator,
   }
 
-  if (subagentMarker) {
-    headers["x-initiator"] = "agent"
-    headers["x-interaction-type"] = "conversation-subagent"
-  }
-
-  if (sessionId) {
-    headers["x-interaction-id"] = sessionId
-  }
+  prepareInteractionHeaders(sessionId, Boolean(subagentMarker), headers)
 
   // service_tier is not supported by github copilot
   payload.service_tier = null
diff --git a/src/services/github/get-copilot-token.ts b/src/services/github/get-copilot-token.ts
index 98744bab1..9c33c038c 100644
--- a/src/services/github/get-copilot-token.ts
+++ b/src/services/github/get-copilot-token.ts
@@ -1,10 +1,10 @@
-import { GITHUB_API_BASE_URL, githubHeaders } from "~/lib/api-config"
+import { getGitHubApiBaseUrl, githubHeaders } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
 export const getCopilotToken = async () => {
   const response = await fetch(
-    `${GITHUB_API_BASE_URL}/copilot_internal/v2/token`,
+    `${getGitHubApiBaseUrl()}/copilot_internal/v2/token`,
     {
       headers: githubHeaders(state),
     },
diff --git a/src/services/github/get-copilot-usage.ts b/src/services/github/get-copilot-usage.ts
index 6cdd8bc10..1af6632e4 100644
--- a/src/services/github/get-copilot-usage.ts
+++ b/src/services/github/get-copilot-usage.ts
@@ -1,11 +1,14 @@
-import { GITHUB_API_BASE_URL, githubHeaders } from "~/lib/api-config"
+import { getGitHubApiBaseUrl, githubHeaders } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
 export const getCopilotUsage = async (): Promise<CopilotUsageResponse> => {
-  const response = await fetch(`${GITHUB_API_BASE_URL}/copilot_internal/user`, {
-    headers: githubHeaders(state),
-  })
+  const response = await fetch(
+    `${getGitHubApiBaseUrl()}/copilot_internal/user`,
+    {
+      headers: githubHeaders(state),
+    },
+  )
 
   if (!response.ok) {
     throw new HTTPError("Failed to get Copilot usage", response)
diff --git a/src/services/github/get-device-code.ts b/src/services/github/get-device-code.ts
index cf35f4ec9..79d26ba70 100644
--- a/src/services/github/get-device-code.ts
+++ b/src/services/github/get-device-code.ts
@@ -1,18 +1,16 @@
-import {
-  GITHUB_APP_SCOPES,
-  GITHUB_BASE_URL,
-  GITHUB_CLIENT_ID,
-  standardHeaders,
-} from "~/lib/api-config"
+import { getOauthAppConfig, getOauthUrls } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
 
 export async function getDeviceCode(): Promise<DeviceCodeResponse> {
-  const response = await fetch(`${GITHUB_BASE_URL}/login/device/code`, {
+  const { clientId, headers, scope } = getOauthAppConfig()
+  const { deviceCodeUrl } = getOauthUrls()
+
+  const response = await fetch(deviceCodeUrl, {
     method: "POST",
-    headers: standardHeaders(),
+    headers,
     body: JSON.stringify({
-      client_id: GITHUB_CLIENT_ID,
-      scope: GITHUB_APP_SCOPES,
+      client_id: clientId,
+      scope,
     }),
   })
 
diff --git a/src/services/github/get-user.ts b/src/services/github/get-user.ts
index 23e1b1c1c..6774c4492 100644
--- a/src/services/github/get-user.ts
+++ b/src/services/github/get-user.ts
@@ -1,9 +1,9 @@
-import { GITHUB_API_BASE_URL, standardHeaders } from "~/lib/api-config"
+import { getGitHubApiBaseUrl, standardHeaders } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
 export async function getGitHubUser() {
-  const response = await fetch(`${GITHUB_API_BASE_URL}/user`, {
+  const response = await fetch(`${getGitHubApiBaseUrl()}/user`, {
     headers: {
       authorization: `token ${state.githubToken}`,
       ...standardHeaders(),
diff --git a/src/services/github/poll-access-token.ts b/src/services/github/poll-access-token.ts
index 4639ee0dc..44c4a07b8 100644
--- a/src/services/github/poll-access-token.ts
+++ b/src/services/github/poll-access-token.ts
@@ -1,10 +1,6 @@
 import consola from "consola"
 
-import {
-  GITHUB_BASE_URL,
-  GITHUB_CLIENT_ID,
-  standardHeaders,
-} from "~/lib/api-config"
+import { getOauthAppConfig, getOauthUrls } from "~/lib/api-config"
 import { sleep } from "~/lib/utils"
 
 import type { DeviceCodeResponse } from "./get-device-code"
@@ -12,24 +8,24 @@ import type { DeviceCodeResponse } from "./get-device-code"
 export async function pollAccessToken(
   deviceCode: DeviceCodeResponse,
 ): Promise<string> {
+  const { clientId, headers } = getOauthAppConfig()
+  const { accessTokenUrl } = getOauthUrls()
+
   // Interval is in seconds, we need to multiply by 1000 to get milliseconds
   // I'm also adding another second, just to be safe
   const sleepDuration = (deviceCode.interval + 1) * 1000
   consola.debug(`Polling access token with interval of ${sleepDuration}ms`)
 
   while (true) {
-    const response = await fetch(
-      `${GITHUB_BASE_URL}/login/oauth/access_token`,
-      {
-        method: "POST",
-        headers: standardHeaders(),
-        body: JSON.stringify({
-          client_id: GITHUB_CLIENT_ID,
-          device_code: deviceCode.device_code,
-          grant_type: "urn:ietf:params:oauth:grant-type:device_code",
-        }),
-      },
-    )
+    const response = await fetch(accessTokenUrl, {
+      method: "POST",
+      headers,
+      body: JSON.stringify({
+        client_id: clientId,
+        device_code: deviceCode.device_code,
+        grant_type: "urn:ietf:params:oauth:grant-type:device_code",
+      }),
+    })
 
     if (!response.ok) {
       await sleep(sleepDuration)

From ca2c7f9e957b942cea495ff8ee4e978527375d10 Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Thu, 12 Mar 2026 22:19:09 +0800
Subject: [PATCH 54/62] feat: trim oauth env var and update docs

---
 README.md             | 2 ++
 src/lib/api-config.ts | 2 +-
 src/lib/paths.ts      | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index fb31db5ef..4275ec4ad 100644
--- a/README.md
+++ b/README.md
@@ -40,6 +40,8 @@ A reverse-engineered proxy for the GitHub Copilot API that exposes it as an Open
 - **Flexible Authentication**: Authenticate interactively or provide a GitHub token directly, suitable for CI/CD environments.
 - **Support for Different Account Types**: Works with individual, business, and enterprise GitHub Copilot plans.
 - **Opencode OAuth Support**: Use opencode GitHub Copilot authentication by setting `COPILOT_API_OAUTH_APP=opencode` environment variable.
+- **GitHub Enterprise Support**: Connect to GHE.com by setting `COPILOT_API_ENTERPRISE_URL` environment variable (e.g., `company.ghe.com`).
+- **Custom Data Directory**: Change the default data directory (where tokens and config are stored) by setting `COPILOT_API_HOME` environment variable.
 
 ## Demo
 
diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts
index d0bb67b90..18cb46656 100644
--- a/src/lib/api-config.ts
+++ b/src/lib/api-config.ts
@@ -3,7 +3,7 @@ import { randomUUID } from "node:crypto"
 import type { State } from "./state"
 
 export const isOpencodeOauthApp = (): boolean => {
-  return process.env.COPILOT_API_OAUTH_APP === "opencode"
+  return process.env.COPILOT_API_OAUTH_APP?.trim() === "opencode"
 }
 
 export const normalizeDomain = (input: string): string => {
diff --git a/src/lib/paths.ts b/src/lib/paths.ts
index 49733a046..dba35a791 100644
--- a/src/lib/paths.ts
+++ b/src/lib/paths.ts
@@ -2,7 +2,7 @@ import fs from "node:fs/promises"
 import os from "node:os"
 import path from "node:path"
 
-const AUTH_APP = process.env.COPILOT_API_OAUTH_APP || ""
+const AUTH_APP = process.env.COPILOT_API_OAUTH_APP?.trim() || ""
 const ENTERPRISE_PREFIX = process.env.COPILOT_API_ENTERPRISE_URL ? "ent_" : ""
 
 const DEFAULT_DIR = path.join(os.homedir(), ".local", "share", "copilot-api")

From 47eb5021798399ab78237407db943aced43b467b Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Sat, 14 Mar 2026 08:55:46 +0800
Subject: [PATCH 55/62] feat: implement findEndpointModel function to enhance
 claude model mapping

---
 src/lib/models.ts                           | 75 +++++++++++++++++++++
 src/routes/messages/count-tokens-handler.ts |  7 +-
 src/routes/messages/handler.ts              |  6 +-
 3 files changed, 81 insertions(+), 7 deletions(-)
 create mode 100644 src/lib/models.ts

diff --git a/src/lib/models.ts b/src/lib/models.ts
new file mode 100644
index 000000000..f5f621eab
--- /dev/null
+++ b/src/lib/models.ts
@@ -0,0 +1,75 @@
+import type { Model } from "~/services/copilot/get-models"
+
+import { state } from "~/lib/state"
+
+export const findEndpointModel = (sdkModelId: string): Model | undefined => {
+  const models = state.models?.data ?? []
+  const exactMatch = models.find((m) => m.id === sdkModelId)
+  if (exactMatch) {
+    return exactMatch
+  }
+
+  const normalized = _normalizeSdkModelId(sdkModelId)
+  if (!normalized) {
+    return undefined
+  }
+
+  const modelName = `claude-${normalized.family}-${normalized.version}`
+  const model = models.find((m) => m.id === modelName)
+  if (model) {
+    return model
+  }
+
+  return undefined
+}
+
+/**
+ * Normalizes an SDK model ID to extract the model family and version.
+ * this method from github copilot extension
+ * Examples:
+ * - "claude-opus-4-5-20251101" -> { family: "opus", version: "4.5" }
+ * - "claude-3-5-sonnet-20241022" -> { family: "sonnet", version: "3.5" }
+ * - "claude-sonnet-4-20250514" -> { family: "sonnet", version: "4" }
+ * - "claude-haiku-3-5-20250514" -> { family: "haiku", version: "3.5" }
+ * - "claude-haiku-4.5" -> { family: "haiku", version: "4.5" }
+ */
+const _normalizeSdkModelId = (
+  sdkModelId: string,
+): { family: string; version: string } | undefined => {
+  const lower = sdkModelId.toLowerCase()
+
+  // Strip date suffix (8 digits at the end)
+  const withoutDate = lower.replace(/-\d{8}$/, "")
+
+  // Pattern 1: claude-{family}-{major}-{minor} (e.g., claude-opus-4-5, claude-haiku-3-5)
+  const pattern1 = withoutDate.match(/^claude-(\w+)-(\d+)-(\d+)$/)
+  if (pattern1) {
+    return { family: pattern1[1], version: `${pattern1[2]}.${pattern1[3]}` }
+  }
+
+  // Pattern 2: claude-{major}-{minor}-{family} (e.g., claude-3-5-sonnet)
+  const pattern2 = withoutDate.match(/^claude-(\d+)-(\d+)-(\w+)$/)
+  if (pattern2) {
+    return { family: pattern2[3], version: `${pattern2[1]}.${pattern2[2]}` }
+  }
+
+  // Pattern 3: claude-{family}-{major}.{minor} (e.g., claude-haiku-4.5)
+  const pattern3 = withoutDate.match(/^claude-(\w+)-(\d+)\.(\d+)$/)
+  if (pattern3) {
+    return { family: pattern3[1], version: `${pattern3[2]}.${pattern3[3]}` }
+  }
+
+  // Pattern 4: claude-{family}-{major} (e.g., claude-sonnet-4)
+  const pattern4 = withoutDate.match(/^claude-(\w+)-(\d+)$/)
+  if (pattern4) {
+    return { family: pattern4[1], version: pattern4[2] }
+  }
+
+  // Pattern 5: claude-{major}-{family} (e.g., claude-3-opus)
+  const pattern5 = withoutDate.match(/^claude-(\d+)-(\w+)$/)
+  if (pattern5) {
+    return { family: pattern5[2], version: pattern5[1] }
+  }
+
+  return undefined
+}
diff --git a/src/routes/messages/count-tokens-handler.ts b/src/routes/messages/count-tokens-handler.ts
index f280e094c..a361e1cc5 100644
--- a/src/routes/messages/count-tokens-handler.ts
+++ b/src/routes/messages/count-tokens-handler.ts
@@ -2,9 +2,9 @@ import type { Context } from "hono"
 
 import consola from "consola"
 
-import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
 
+import { findEndpointModel } from "../../lib/models"
 import { type AnthropicMessagesPayload } from "./anthropic-types"
 import { translateToOpenAI } from "./non-stream-translation"
 
@@ -19,9 +19,8 @@ export async function handleCountTokens(c: Context) {
 
     const openAIPayload = translateToOpenAI(anthropicPayload)
 
-    const selectedModel = state.models?.data.find(
-      (model) => model.id === anthropicPayload.model,
-    )
+    const selectedModel = findEndpointModel(anthropicPayload.model)
+    anthropicPayload.model = selectedModel?.id ?? anthropicPayload.model
 
     if (!selectedModel) {
       consola.warn("Model not found, returning default token count")
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 7cbaa907a..ad8286e48 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -11,6 +11,7 @@ import {
   getReasoningEffortForModel,
 } from "~/lib/config"
 import { createHandlerLogger } from "~/lib/logger"
+import { findEndpointModel } from "~/lib/models"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { generateRequestIdFromPayload, getRootSessionId } from "~/lib/utils"
@@ -107,9 +108,8 @@ export async function handleCompletion(c: Context) {
     await awaitApproval()
   }
 
-  const selectedModel = state.models?.data.find(
-    (m) => m.id === anthropicPayload.model,
-  )
+  const selectedModel = findEndpointModel(anthropicPayload.model)
+  anthropicPayload.model = selectedModel?.id ?? anthropicPayload.model
 
   if (shouldUseMessagesApi(selectedModel)) {
     return await handleWithMessagesApi(c, anthropicPayload, {

From 75de2a3c7dfc3c513c7ae43ed8c535b3481622ba Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Sat, 14 Mar 2026 10:05:45 +0800
Subject: [PATCH 56/62] fix: update subagent marker ID prefix for opencode
 1.2.26 compatibility Change ID format from message.id-subagent-marker to
 prt-message.id-subagent-marker to match opencode 1.2.26 requirements.

---
 .opencode/plugins/subagent-marker.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.opencode/plugins/subagent-marker.js b/.opencode/plugins/subagent-marker.js
index ec3f4b381..e17a68127 100644
--- a/.opencode/plugins/subagent-marker.js
+++ b/.opencode/plugins/subagent-marker.js
@@ -54,7 +54,7 @@ export const SubagentMarkerPlugin = async () => {
       })}`
 
       output.parts.unshift({
-        id: `${output.message.id}-subagent-marker`,
+        id: `prt-${output.message.id}-subagent-marker`,
         sessionID: output.message.sessionID,
         messageID: output.message.id,
         type: "text",

From 24c0867c4b52903e6dd4bfe45aae54f54b549e9d Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Sun, 15 Mar 2026 15:03:01 +0800
Subject: [PATCH 57/62] feat: add useMessagesApi toggle to fallback to
 /chat/completions on high demand

---
 README.md                      | 4 +++-
 src/lib/config.ts              | 7 +++++++
 src/routes/messages/handler.ts | 5 +++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4275ec4ad..9d73987b2 100644
--- a/README.md
+++ b/README.md
@@ -198,7 +198,8 @@ The following command line options are available for the `start` command:
       "gpt-5-mini": "low"
     },
     "useFunctionApplyPatch": true,
-    "compactUseSmallModel": true
+    "compactUseSmallModel": true,
+    "useMessagesApi": true
   }
   ```
 - **auth.apiKeys:** API keys used for request authentication. Supports multiple keys for rotation. Requests can authenticate with either `x-api-key: <key>` or `Authorization: Bearer <key>`. If empty or omitted, authentication is disabled.
@@ -207,6 +208,7 @@ The following command line options are available for the `start` command:
 - **modelReasoningEfforts:** Per-model `reasoning.effort` sent to the Copilot Responses API. Allowed values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. If a model isn’t listed, `high` is used by default.
 - **useFunctionApplyPatch:** When `true`, the server will convert any custom tool named `apply_patch` in Responses payloads into an OpenAI-style function tool (`type: "function"`) with a parameter schema so assistants can call it using function-calling semantics to edit files. Set to `false` to leave tools unchanged. Defaults to `true`.
 - **compactUseSmallModel:** When `true`, detected "compact" requests (e.g., from Claude Code or Opencode compact mode) will automatically use the configured `smallModel` to avoid consuming premium model usage for short/background tasks. Defaults to `true`.
+- **useMessagesApi:** When `true`, Claude-family models that support Copilot's native `/v1/messages` endpoint will use the Messages API; otherwise they fall back to `/chat/completions`. Set to `false` to disable Messages API routing and always use `/chat/completions`. Defaults to `true`.
 
 Edit this file to customize prompts or swap in your own fast model. Restart the server (or rerun the command) after changes so the cached config is refreshed.
 
diff --git a/src/lib/config.ts b/src/lib/config.ts
index 129acd814..396b9a2ed 100644
--- a/src/lib/config.ts
+++ b/src/lib/config.ts
@@ -16,6 +16,7 @@ export interface AppConfig {
   >
   useFunctionApplyPatch?: boolean
   compactUseSmallModel?: boolean
+  useMessagesApi?: boolean
 }
 
 const gpt5ExplorationPrompt = `## Exploration and reading files
@@ -62,6 +63,7 @@ const defaultConfig: AppConfig = {
   },
   useFunctionApplyPatch: true,
   compactUseSmallModel: true,
+  useMessagesApi: true,
 }
 
 let cachedConfig: AppConfig | null = null
@@ -205,3 +207,8 @@ export function shouldCompactUseSmallModel(): boolean {
   const config = getConfig()
   return config.compactUseSmallModel ?? true
 }
+
+export function isMessagesApiEnabled(): boolean {
+  const config = getConfig()
+  return config.useMessagesApi ?? true
+}
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index ad8286e48..b63936704 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -9,6 +9,7 @@ import {
   getSmallModel,
   shouldCompactUseSmallModel,
   getReasoningEffortForModel,
+  isMessagesApiEnabled,
 } from "~/lib/config"
 import { createHandlerLogger } from "~/lib/logger"
 import { findEndpointModel } from "~/lib/models"
@@ -396,6 +397,10 @@ const shouldUseResponsesApi = (selectedModel: Model | undefined): boolean => {
 }
 
 const shouldUseMessagesApi = (selectedModel: Model | undefined): boolean => {
+  const useMessagesApi = isMessagesApiEnabled()
+  if (!useMessagesApi) {
+    return false
+  }
   return (
     selectedModel?.supported_endpoints?.includes(MESSAGES_ENDPOINT) ?? false
   )

From 090c61722721a7b83294df28e26e6aefe94caa26 Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Sun, 15 Mar 2026 15:52:07 +0800
Subject: [PATCH 58/62] feat: update getUUID function to generate
 standards-compliant UUIDv4 and add tests

---
 src/lib/utils.ts    | 14 +++++++++++---
 tests/utils.test.ts | 45 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 3 deletions(-)
 create mode 100644 tests/utils.test.ts

diff --git a/src/lib/utils.ts b/src/lib/utils.ts
index 81317abf7..50a35368b 100644
--- a/src/lib/utils.ts
+++ b/src/lib/utils.ts
@@ -177,7 +177,15 @@ export const getRootSessionId = (
 }
 
 export const getUUID = (content: string): string => {
-  const hash = createHash("sha256").update(content).digest("hex")
-  const hash32 = hash.slice(0, 32)
-  return `${hash32.slice(0, 8)}-${hash32.slice(8, 12)}-${hash32.slice(12, 16)}-${hash32.slice(16, 20)}-${hash32.slice(20)}`
+  const uuidBytes = createHash("sha256")
+    .update(content)
+    .digest()
+    .subarray(0, 16)
+
+  uuidBytes[6] = (uuidBytes[6] & 0x0f) | 0x40
+  uuidBytes[8] = (uuidBytes[8] & 0x3f) | 0x80
+
+  const uuidHex = uuidBytes.toString("hex")
+
+  return `${uuidHex.slice(0, 8)}-${uuidHex.slice(8, 12)}-${uuidHex.slice(12, 16)}-${uuidHex.slice(16, 20)}-${uuidHex.slice(20)}`
 }
diff --git a/tests/utils.test.ts b/tests/utils.test.ts
new file mode 100644
index 000000000..d1d8c094d
--- /dev/null
+++ b/tests/utils.test.ts
@@ -0,0 +1,45 @@
+import { expect, test } from "bun:test"
+import { createHash, randomUUID } from "node:crypto"
+
+import { getUUID } from "../src/lib/utils"
+
+const getLegacyUUID = (content: string): string => {
+  const hash32 = createHash("sha256").update(content).digest("hex").slice(0, 32)
+  return `${hash32.slice(0, 8)}-${hash32.slice(8, 12)}-${hash32.slice(12, 16)}-${hash32.slice(16, 20)}-${hash32.slice(20)}`
+}
+
+test("getUUID returns a deterministic standards-compliant UUIDv4", () => {
+  const uuid = getUUID("hello world")
+
+  expect(uuid).toBe("b94d27b9-934d-4e08-a52e-52d7da7dabfa")
+  expect(uuid).toMatch(
+    /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/,
+  )
+  expect(getUUID("hello world")).toBe(uuid)
+  expect(getUUID("hello world!")).not.toBe(uuid)
+})
+
+test("prints randomUUID and deterministic UUID for comparison", () => {
+  const input = "hello world"
+  const random = randomUUID()
+  const legacy = getLegacyUUID(input)
+  const derived = getUUID(input)
+  const derivedAgain = getUUID(input)
+
+  console.info(`randomUUID(): ${random}`)
+  console.info(`legacy getUUID(${JSON.stringify(input)}): ${legacy}`)
+  console.info(`getUUID(${JSON.stringify(input)}): ${derived}`)
+  console.info(`getUUID(${JSON.stringify(input)}) again: ${derivedAgain}`)
+
+  expect(random).toMatch(
+    /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/,
+  )
+  expect(derived).toMatch(
+    /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/,
+  )
+  expect(legacy).toBe("b94d27b9-934d-3e08-a52e-52d7da7dabfa")
+  expect(derived).toBe("b94d27b9-934d-4e08-a52e-52d7da7dabfa")
+  expect(derivedAgain).toBe(derived)
+  expect(legacy).not.toBe(derived)
+  expect(random).not.toBe(derived)
+})

From 97b11b211e7eadda3623910cb2be59bb5fcd2a58 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Wed, 18 Mar 2026 10:04:05 +0800
Subject: [PATCH 59/62] feat: ensure that compaction message is tracked as
 agent initiated, consistent with github copilot extension and opencode

---
 README.md                                     |  2 --
 src/lib/api-config.ts                         |  9 +++++++++
 src/lib/config.ts                             |  6 ------
 src/routes/messages/handler.ts                | 19 +++++++++++++------
 .../copilot/create-chat-completions.ts        |  4 ++++
 src/services/copilot/create-messages.ts       |  4 ++++
 src/services/copilot/create-responses.ts      |  5 +++++
 7 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index 9d73987b2..747eb00fa 100644
--- a/README.md
+++ b/README.md
@@ -198,7 +198,6 @@ The following command line options are available for the `start` command:
       "gpt-5-mini": "low"
     },
     "useFunctionApplyPatch": true,
-    "compactUseSmallModel": true,
     "useMessagesApi": true
   }
   ```
@@ -207,7 +206,6 @@ The following command line options are available for the `start` command:
 - **smallModel:** Fallback model used for tool-less warmup messages (e.g., Claude Code probe requests) to avoid spending premium requests; defaults to `gpt-5-mini`.
 - **modelReasoningEfforts:** Per-model `reasoning.effort` sent to the Copilot Responses API. Allowed values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. If a model isn’t listed, `high` is used by default.
 - **useFunctionApplyPatch:** When `true`, the server will convert any custom tool named `apply_patch` in Responses payloads into an OpenAI-style function tool (`type: "function"`) with a parameter schema so assistants can call it using function-calling semantics to edit files. Set to `false` to leave tools unchanged. Defaults to `true`.
-- **compactUseSmallModel:** When `true`, detected "compact" requests (e.g., from Claude Code or Opencode compact mode) will automatically use the configured `smallModel` to avoid consuming premium model usage for short/background tasks. Defaults to `true`.
 - **useMessagesApi:** When `true`, Claude-family models that support Copilot's native `/v1/messages` endpoint will use the Messages API; otherwise they fall back to `/chat/completions`. Set to `false` to disable Messages API routing and always use `/chat/completions`. Defaults to `true`.
 
 Edit this file to customize prompts or swap in your own fast model. Restart the server (or rerun the command) after changes so the cached config is refreshed.
diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts
index 18cb46656..b3a39c196 100644
--- a/src/lib/api-config.ts
+++ b/src/lib/api-config.ts
@@ -75,6 +75,15 @@ export const getOauthAppConfig = (): OauthAppConfig => {
   }
 }
 
+export const prepareForCompact = (
+  headers: Record<string, string>,
+  isCompact?: boolean,
+) => {
+  if (isCompact) {
+    headers["x-initiator"] = "agent"
+  }
+}
+
 export const prepareInteractionHeaders = (
   sessionId: string | undefined,
   isSubagent: boolean,
diff --git a/src/lib/config.ts b/src/lib/config.ts
index 396b9a2ed..d4f68adf6 100644
--- a/src/lib/config.ts
+++ b/src/lib/config.ts
@@ -62,7 +62,6 @@ const defaultConfig: AppConfig = {
     "gpt-5.3-codex": "xhigh",
   },
   useFunctionApplyPatch: true,
-  compactUseSmallModel: true,
   useMessagesApi: true,
 }
 
@@ -203,11 +202,6 @@ export function getReasoningEffortForModel(
   return config.modelReasoningEfforts?.[model] ?? "high"
 }
 
-export function shouldCompactUseSmallModel(): boolean {
-  const config = getConfig()
-  return config.compactUseSmallModel ?? true
-}
-
 export function isMessagesApiEnabled(): boolean {
   const config = getConfig()
   return config.useMessagesApi ?? true
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index b63936704..ee87bede0 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -7,7 +7,6 @@ import type { Model } from "~/services/copilot/get-models"
 import { awaitApproval } from "~/lib/approval"
 import {
   getSmallModel,
-  shouldCompactUseSmallModel,
   getReasoningEffortForModel,
   isMessagesApiEnabled,
 } from "~/lib/config"
@@ -90,9 +89,6 @@ export async function handleCompletion(c: Context) {
 
   if (isCompact) {
     logger.debug("Is compact request:", isCompact)
-    if (shouldCompactUseSmallModel()) {
-      anthropicPayload.model = getSmallModel()
-    }
   } else {
     // Merge tool_result and text blocks into tool_result to avoid consuming premium requests
     // (caused by skill invocations, edit hooks, plan or to do reminders)
@@ -119,6 +115,7 @@ export async function handleCompletion(c: Context) {
       selectedModel,
       requestId,
       sessionId,
+      isCompact,
     })
   }
 
@@ -128,6 +125,7 @@ export async function handleCompletion(c: Context) {
       selectedModel,
       requestId,
       sessionId,
+      isCompact,
     })
   }
 
@@ -135,6 +133,7 @@ export async function handleCompletion(c: Context) {
     subagentMarker,
     requestId,
     sessionId,
+    isCompact,
   })
 }
 
@@ -148,9 +147,10 @@ const handleWithChatCompletions = async (
     subagentMarker?: SubagentMarker | null
     requestId: string
     sessionId?: string
+    isCompact?: boolean
   },
 ) => {
-  const { subagentMarker, requestId, sessionId } = options
+  const { subagentMarker, requestId, sessionId, isCompact } = options
   const openAIPayload = translateToOpenAI(anthropicPayload)
   logger.debug(
     "Translated OpenAI request payload:",
@@ -161,6 +161,7 @@ const handleWithChatCompletions = async (
     subagentMarker,
     requestId,
     sessionId,
+    isCompact,
   })
 
   if (isNonStreaming(response)) {
@@ -217,9 +218,11 @@ const handleWithResponsesApi = async (
     selectedModel?: Model
     requestId: string
     sessionId?: string
+    isCompact?: boolean
   },
 ) => {
-  const { subagentMarker, selectedModel, requestId, sessionId } = options
+  const { subagentMarker, selectedModel, requestId, sessionId, isCompact } =
+    options
 
   const responsesPayload =
     translateAnthropicMessagesToResponsesPayload(anthropicPayload)
@@ -243,6 +246,7 @@ const handleWithResponsesApi = async (
     subagentMarker,
     requestId,
     sessionId,
+    isCompact,
   })
 
   if (responsesPayload.stream && isAsyncIterable(response)) {
@@ -321,6 +325,7 @@ const handleWithMessagesApi = async (
     selectedModel?: Model
     requestId: string
     sessionId?: string
+    isCompact?: boolean
   },
 ) => {
   const {
@@ -329,6 +334,7 @@ const handleWithMessagesApi = async (
     selectedModel,
     requestId,
     sessionId,
+    isCompact,
   } = options
   // Pre-request processing: filter thinking blocks for Claude models so only
   // valid thinking blocks are sent to the Copilot Messages API.
@@ -366,6 +372,7 @@ const handleWithMessagesApi = async (
     subagentMarker,
     requestId,
     sessionId,
+    isCompact,
   })
 
   if (isAsyncIterable(response)) {
diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts
index d23dcde11..cfbd78994 100644
--- a/src/services/copilot/create-chat-completions.ts
+++ b/src/services/copilot/create-chat-completions.ts
@@ -6,6 +6,7 @@ import type { SubagentMarker } from "~/routes/messages/subagent-marker"
 import {
   copilotBaseUrl,
   copilotHeaders,
+  prepareForCompact,
   prepareInteractionHeaders,
 } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
@@ -17,6 +18,7 @@ export const createChatCompletions = async (
     subagentMarker?: SubagentMarker | null
     requestId: string
     sessionId?: string
+    isCompact?: boolean
   },
 ) => {
   if (!state.copilotToken) throw new Error("Copilot token not found")
@@ -50,6 +52,8 @@ export const createChatCompletions = async (
     headers,
   )
 
+  prepareForCompact(headers, options.isCompact)
+
   const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
     method: "POST",
     headers,
diff --git a/src/services/copilot/create-messages.ts b/src/services/copilot/create-messages.ts
index ab910054b..c32cf52a1 100644
--- a/src/services/copilot/create-messages.ts
+++ b/src/services/copilot/create-messages.ts
@@ -10,6 +10,7 @@ import type { SubagentMarker } from "~/routes/messages/subagent-marker"
 import {
   copilotBaseUrl,
   copilotHeaders,
+  prepareForCompact,
   prepareInteractionHeaders,
 } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
@@ -64,6 +65,7 @@ export const createMessages = async (
     subagentMarker?: SubagentMarker | null
     requestId: string
     sessionId?: string
+    isCompact?: boolean
   },
 ): Promise<CreateMessagesReturn> => {
   if (!state.copilotToken) throw new Error("Copilot token not found")
@@ -94,6 +96,8 @@ export const createMessages = async (
     headers,
   )
 
+  prepareForCompact(headers, options.isCompact)
+
   // align with vscode copilot extension anthropic-beta
   const anthropicBeta = buildAnthropicBetaHeader(
     anthropicBetaHeader,
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
index a9b726a34..ba50d0cc7 100644
--- a/src/services/copilot/create-responses.ts
+++ b/src/services/copilot/create-responses.ts
@@ -6,6 +6,7 @@ import type { SubagentMarker } from "~/routes/messages/subagent-marker"
 import {
   copilotBaseUrl,
   copilotHeaders,
+  prepareForCompact,
   prepareInteractionHeaders,
 } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
@@ -357,6 +358,7 @@ interface ResponsesRequestOptions {
   subagentMarker?: SubagentMarker | null
   requestId: string
   sessionId?: string
+  isCompact?: boolean
 }
 
 export const createResponses = async (
@@ -367,6 +369,7 @@ export const createResponses = async (
     subagentMarker,
     requestId,
     sessionId,
+    isCompact,
   }: ResponsesRequestOptions,
 ): Promise<CreateResponsesReturn> => {
   if (!state.copilotToken) throw new Error("Copilot token not found")
@@ -378,6 +381,8 @@ export const createResponses = async (
 
   prepareInteractionHeaders(sessionId, Boolean(subagentMarker), headers)
 
+  prepareForCompact(headers, isCompact)
+
   // service_tier is not supported by github copilot
   payload.service_tier = null
 

From dc4c62a29f91631fdfa4b20ee4e1dbe0c24cf0ba Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Fri, 20 Mar 2026 12:42:41 +0800
Subject: [PATCH 60/62] feat: enhance user ID parsing with JSON support and
 legacy fallback

---
 src/lib/utils.ts                             | 64 ++++++++++++++++----
 src/routes/messages/responses-translation.ts | 21 +------
 tests/responses-translation.test.ts          | 37 +++++++++++
 tests/utils.test.ts                          | 55 ++++++++++++++++-
 4 files changed, 144 insertions(+), 33 deletions(-)

diff --git a/src/lib/utils.ts b/src/lib/utils.ts
index 50a35368b..29707357d 100644
--- a/src/lib/utils.ts
+++ b/src/lib/utils.ts
@@ -115,6 +115,50 @@ interface PayloadMessage {
   type?: string
 }
 
+const isRecord = (value: unknown): value is Record<string, unknown> =>
+  typeof value === "object" && value !== null
+
+const getUserIdJsonField = (
+  userIdPayload: Record<string, unknown> | null,
+  field: string,
+): string | null => {
+  const value = userIdPayload?.[field]
+  return typeof value === "string" && value.length > 0 ? value : null
+}
+
+const parseJsonUserId = (userId: string): Record<string, unknown> | null => {
+  try {
+    const parsed: unknown = JSON.parse(userId)
+    return isRecord(parsed) ? parsed : null
+  } catch {
+    return null
+  }
+}
+
+export const parseUserIdMetadata = (
+  userId: string | undefined,
+): { safetyIdentifier: string | null; sessionId: string | null } => {
+  if (!userId || typeof userId !== "string") {
+    return { safetyIdentifier: null, sessionId: null }
+  }
+
+  const legacySafetyIdentifier =
+    userId.match(/user_([^_]+)_account/)?.[1] ?? null
+  const legacySessionId = userId.match(/_session_(.+)$/)?.[1] ?? null
+
+  const parsedUserId =
+    legacySafetyIdentifier && legacySessionId ? null : parseJsonUserId(userId)
+
+  const safetyIdentifier =
+    legacySafetyIdentifier
+    ?? getUserIdJsonField(parsedUserId, "device_id")
+    ?? getUserIdJsonField(parsedUserId, "account_uuid")
+  const sessionId =
+    legacySessionId ?? getUserIdJsonField(parsedUserId, "session_id")
+
+  return { safetyIdentifier, sessionId }
+}
+
 const findLastUserContent = (
   messages: Array<PayloadMessage>,
 ): string | null => {
@@ -161,19 +205,13 @@ export const getRootSessionId = (
   anthropicPayload: AnthropicMessagesPayload,
   c: Context,
 ): string | undefined => {
-  let sessionId: string | undefined
-  if (anthropicPayload.metadata?.user_id) {
-    const sessionMatch = new RegExp(/_session_(.+)$/).exec(
-      anthropicPayload.metadata.user_id,
-    )
-    sessionId = sessionMatch ? sessionMatch[1] : undefined
-  } else {
-    sessionId = c.req.header("x-session-id")
-  }
-  if (sessionId) {
-    return getUUID(sessionId)
-  }
-  return sessionId
+  const userId = anthropicPayload.metadata?.user_id
+  const sessionId =
+    userId ?
+      parseUserIdMetadata(userId).sessionId || undefined
+    : c.req.header("x-session-id")
+
+  return sessionId ? getUUID(sessionId) : sessionId
 }
 
 export const getUUID = (content: string): string => {
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
index 7107d73d4..855d45aa3 100644
--- a/src/routes/messages/responses-translation.ts
+++ b/src/routes/messages/responses-translation.ts
@@ -4,6 +4,7 @@ import {
   getExtraPromptForModel,
   getReasoningEffortForModel,
 } from "~/lib/config"
+import { parseUserIdMetadata } from "~/lib/utils"
 import {
   type ResponsesPayload,
   type ResponseInputCompaction,
@@ -64,7 +65,7 @@ export const translateAnthropicMessagesToResponsesPayload = (
   const translatedTools = convertAnthropicTools(payload.tools)
   const toolChoice = convertAnthropicToolChoice(payload.tool_choice)
 
-  const { safetyIdentifier, promptCacheKey } = parseUserId(
+  const { safetyIdentifier, sessionId: promptCacheKey } = parseUserIdMetadata(
     payload.metadata?.user_id,
   )
 
@@ -751,24 +752,6 @@ const isResponseOutputRefusal = (
   && "type" in block
   && (block as { type?: unknown }).type === "refusal"
 
-const parseUserId = (
-  userId: string | undefined,
-): { safetyIdentifier: string | null; promptCacheKey: string | null } => {
-  if (!userId || typeof userId !== "string") {
-    return { safetyIdentifier: null, promptCacheKey: null }
-  }
-
-  // Parse safety_identifier: content between "user_" and "_account"
-  const userMatch = userId.match(/user_([^_]+)_account/)
-  const safetyIdentifier = userMatch ? userMatch[1] : null
-
-  // Parse prompt_cache_key: content after "_session_"
-  const sessionMatch = userId.match(/_session_(.+)$/)
-  const promptCacheKey = sessionMatch ? sessionMatch[1] : null
-
-  return { safetyIdentifier, promptCacheKey }
-}
-
 const convertToolResultContent = (
   content: string | Array<AnthropicTextBlock | AnthropicImageBlock>,
 ): string | Array<ResponseInputContent> => {
diff --git a/tests/responses-translation.test.ts b/tests/responses-translation.test.ts
index 3ce5f7083..8d1804e9d 100644
--- a/tests/responses-translation.test.ts
+++ b/tests/responses-translation.test.ts
@@ -46,6 +46,15 @@ const samplePayload = {
   ],
 } as unknown as AnthropicMessagesPayload
 
+const jsonStyleUserId = JSON.stringify({
+  device_id: "3f4a1b7c8d9e0f1234567890abcdef1234567890abcdef1234567890abcdef12",
+  account_uuid: "",
+  session_id: "2c4e1cf0-7a67-4d2e-9a4b-1d16d3f44752",
+})
+
+const legacyStyleUserId =
+  "user_8b7e2c1d4f6a9b3c0d1e2f3456789abcdeffedcba9876543210fedcba1234567_account__session_7d0e2f61-4b5c-4a9d-8f11-2c3d4e5f6a7b"
+
 describe("translateAnthropicMessagesToResponsesPayload", () => {
   it("converts anthropic text blocks into response input messages", () => {
     const result = translateAnthropicMessagesToResponsesPayload(samplePayload)
@@ -67,6 +76,34 @@ describe("translateAnthropicMessagesToResponsesPayload", () => {
       "hi",
     ])
   })
+
+  it("extracts identifiers from JSON-like user_id metadata", () => {
+    const result = translateAnthropicMessagesToResponsesPayload({
+      ...samplePayload,
+      metadata: {
+        user_id: jsonStyleUserId,
+      },
+    })
+
+    expect(result.safety_identifier).toBe(
+      "3f4a1b7c8d9e0f1234567890abcdef1234567890abcdef1234567890abcdef12",
+    )
+    expect(result.prompt_cache_key).toBe("2c4e1cf0-7a67-4d2e-9a4b-1d16d3f44752")
+  })
+
+  it("keeps legacy user_id parsing before JSON fallback", () => {
+    const result = translateAnthropicMessagesToResponsesPayload({
+      ...samplePayload,
+      metadata: {
+        user_id: legacyStyleUserId,
+      },
+    })
+
+    expect(result.safety_identifier).toBe(
+      "8b7e2c1d4f6a9b3c0d1e2f3456789abcdeffedcba9876543210fedcba1234567",
+    )
+    expect(result.prompt_cache_key).toBe("7d0e2f61-4b5c-4a9d-8f11-2c3d4e5f6a7b")
+  })
 })
 
 describe("translateResponsesResultToAnthropic", () => {
diff --git a/tests/utils.test.ts b/tests/utils.test.ts
index d1d8c094d..00de3673f 100644
--- a/tests/utils.test.ts
+++ b/tests/utils.test.ts
@@ -1,7 +1,20 @@
+import type { Context } from "hono"
+
 import { expect, test } from "bun:test"
 import { createHash, randomUUID } from "node:crypto"
 
-import { getUUID } from "../src/lib/utils"
+import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
+
+import { getRootSessionId, getUUID } from "../src/lib/utils"
+
+const jsonStyleUserId = JSON.stringify({
+  device_id: "3f4a1b7c8d9e0f1234567890abcdef1234567890abcdef1234567890abcdef12",
+  account_uuid: "",
+  session_id: "2c4e1cf0-7a67-4d2e-9a4b-1d16d3f44752",
+})
+
+const legacyStyleUserId =
+  "user_8b7e2c1d4f6a9b3c0d1e2f3456789abcdeffedcba9876543210fedcba1234567_account__session_7d0e2f61-4b5c-4a9d-8f11-2c3d4e5f6a7b"
 
 const getLegacyUUID = (content: string): string => {
   const hash32 = createHash("sha256").update(content).digest("hex").slice(0, 32)
@@ -43,3 +56,43 @@ test("prints randomUUID and deterministic UUID for comparison", () => {
   expect(legacy).not.toBe(derived)
   expect(random).not.toBe(derived)
 })
+
+test("getRootSessionId supports JSON-like user_id metadata", () => {
+  const anthropicPayload = {
+    model: "claude-3-5-sonnet",
+    messages: [],
+    max_tokens: 0,
+    metadata: {
+      user_id: jsonStyleUserId,
+    },
+  } as AnthropicMessagesPayload
+  const context = {
+    req: {
+      header: (_name: string) => undefined,
+    },
+  } as unknown as Context
+
+  expect(getRootSessionId(anthropicPayload, context)).toBe(
+    getUUID("2c4e1cf0-7a67-4d2e-9a4b-1d16d3f44752"),
+  )
+})
+
+test("getRootSessionId keeps legacy parsing before JSON fallback", () => {
+  const anthropicPayload = {
+    model: "claude-3-5-sonnet",
+    messages: [],
+    max_tokens: 0,
+    metadata: {
+      user_id: legacyStyleUserId,
+    },
+  } as AnthropicMessagesPayload
+  const context = {
+    req: {
+      header: (_name: string) => undefined,
+    },
+  } as unknown as Context
+
+  expect(getRootSessionId(anthropicPayload, context)).toBe(
+    getUUID("7d0e2f61-4b5c-4a9d-8f11-2c3d4e5f6a7b"),
+  )
+})

From a64984309b5b1b0f6f387aeafcb5e99db7f0ba22 Mon Sep 17 00:00:00 2001
From: caozhiyuan <568022847@qq.com>
Date: Sat, 21 Mar 2026 22:51:02 +0800
Subject: [PATCH 61/62] fix: update GitHub Enterprise API URL format

---
 src/lib/api-config.ts | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts
index b3a39c196..54d084c2b 100644
--- a/src/lib/api-config.ts
+++ b/src/lib/api-config.ts
@@ -27,9 +27,7 @@ export const getGitHubBaseUrl = (): string => {
 
 export const getGitHubApiBaseUrl = (): string => {
   const resolvedDomain = getEnterpriseDomain()
-  return resolvedDomain ?
-      `https://${resolvedDomain}/api/v3`
-    : GITHUB_API_BASE_URL
+  return resolvedDomain ? `https://api.${resolvedDomain}` : GITHUB_API_BASE_URL
 }
 
 export const getOpencodeOauthHeaders = (): Record<string, string> => {

From ce8224c55933f811abe5bf9ba42f9336a7852997 Mon Sep 17 00:00:00 2001
From: "Jeffrey.Cao" <Jeffrey.Cao@budweiserapac.com>
Date: Tue, 31 Mar 2026 17:47:57 +0800
Subject: [PATCH 62/62] fix: strip cache_control from payload before sending to
 Copilot Messages API

---
 src/routes/messages/handler.ts | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index ee87bede0..edf6f0d3b 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -336,6 +336,12 @@ const handleWithMessagesApi = async (
     sessionId,
     isCompact,
   } = options
+
+  // Strip cache_control from system content blocks as the
+  // Copilot Messages API does not support them (rejects extra fields like scope).
+  // commit by nicktogo
+  stripCacheControl(anthropicPayload)
+
   // Pre-request processing: filter thinking blocks for Claude models so only
   // valid thinking blocks are sent to the Copilot Messages API.
   for (const msg of anthropicPayload.messages) {
@@ -514,3 +520,17 @@ const mergeToolResult = (
     i === lastIndex ? mergeContentWithTexts(tr, textBlocks) : tr,
   )
 }
+
+const stripCacheControl = (payload: AnthropicMessagesPayload): void => {
+  // Claude Code only adds unsupported scope field to system block cache_control
+  if (Array.isArray(payload.system)) {
+    for (const block of payload.system) {
+      const b = block as unknown as Record<string, unknown>
+      const cc = b.cache_control
+      if (cc && typeof cc === "object") {
+        const { scope, ...rest } = cc as Record<string, unknown>
+        b.cache_control = rest
+      }
+    }
+  }
+}