diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
new file mode 100644
index 000000000..7d6742af9
--- /dev/null
+++ b/.claude-plugin/marketplace.json
@@ -0,0 +1,13 @@
+{
+  "name": "copilot-api-marketplace",
+  "owner": {
+    "name": "copilot-api maintainers"
+  },
+  "plugins": [
+    {
+      "name": "claude-plugin",
+      "description": "Inject SubagentStart marker context for copilot-api initiator override",
+      "source": "./claude-plugin"
+    }
+  ]
+}
diff --git a/.opencode/plugins/subagent-marker.js b/.opencode/plugins/subagent-marker.js
new file mode 100644
index 000000000..e17a68127
--- /dev/null
+++ b/.opencode/plugins/subagent-marker.js
@@ -0,0 +1,78 @@
+const MARKER_PREFIX = "__SUBAGENT_MARKER__"
+
+const subagentSessions = new Set()
+const markedSessions = new Set()
+const sessionParentMap = new Map()
+
+const getSessionInfo = (event) => {
+  if (!event || typeof event !== "object") return undefined
+  const properties = event.properties
+  if (!properties || typeof properties !== "object") return undefined
+  const info = properties.info
+  if (!info || typeof info !== "object") return undefined
+  return info
+}
+
+export const SubagentMarkerPlugin = async () => {
+  return {
+    event: async ({ event }) => {
+      if (event.type === "session.created") {
+        const info = getSessionInfo(event)
+        if (info?.id) {
+          if (info.parentID) {
+            subagentSessions.add(info.id)
+            sessionParentMap.set(info.id, info.parentID)
+          } else {
+            sessionParentMap.set(info.id, info.id)
+          }
+        }
+        return
+      }
+
+      if (event.type === "session.deleted") {
+        const info = getSessionInfo(event)
+        if (info?.id) {
+          subagentSessions.delete(info.id)
+          markedSessions.delete(info.id)
+          sessionParentMap.delete(info.id)
+        }
+      }
+    },
+    "chat.message": async (input, output) => {
+      const { sessionID } = input
+      if (!subagentSessions.has(sessionID) || markedSessions.has(sessionID)) {
+        return
+      }
+      if (!output.message?.id || !output.message?.sessionID) {
+        return
+      }
+
+      const marker = `${MARKER_PREFIX}${JSON.stringify({
+        session_id: sessionID,
+        agent_id: sessionID,
+        agent_type: input.agent ?? "opencode-subagent",
+      })}`
+
+      output.parts.unshift({
+        id: `prt-${output.message.id}-subagent-marker`,
+        sessionID: output.message.sessionID,
+        messageID: output.message.id,
+        type: "text",
+        text: `<system-reminder>\nSubagentStart hook additional context: ${marker}\n</system-reminder>`,
+        synthetic: true,
+        time: {
+          start: Date.now(),
+          end: Date.now(),
+        },
+      })
+      markedSessions.add(sessionID)
+    },
+    "chat.headers": async (input, output) => {
+      const { sessionID } = input
+      const sessionIdValue = sessionParentMap.get(sessionID)
+      if (sessionIdValue) {
+        output.headers["x-session-id"] = sessionIdValue
+      }
+    },
+  }
+}
diff --git a/README.md b/README.md
index 0d36c13c9..747eb00fa 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ A reverse-engineered proxy for the GitHub Copilot API that exposes it as an Open
 
 ## Features
 
-- **OpenAI & Anthropic Compatibility**: Exposes GitHub Copilot as an OpenAI-compatible (`/v1/chat/completions`, `/v1/models`, `/v1/embeddings`) and Anthropic-compatible (`/v1/messages`) API.
+- **OpenAI & Anthropic Compatibility**: Exposes GitHub Copilot as an OpenAI-compatible (`/v1/responses`, `/v1/chat/completions`, `/v1/models`, `/v1/embeddings`) and Anthropic-compatible (`/v1/messages`) API.
 - **Claude Code Integration**: Easily configure and launch [Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview) to use Copilot as its backend with a simple command-line flag (`--claude-code`).
 - **Usage Dashboard**: A web-based dashboard to monitor your Copilot API usage, view quotas, and see detailed statistics.
 - **Rate Limit Control**: Manage API usage with rate-limiting options (`--rate-limit`) and a waiting mechanism (`--wait`) to prevent errors from rapid requests.
@@ -39,6 +39,9 @@ A reverse-engineered proxy for the GitHub Copilot API that exposes it as an Open
 - **Token Visibility**: Option to display GitHub and Copilot tokens during authentication and refresh for debugging (`--show-token`).
 - **Flexible Authentication**: Authenticate interactively or provide a GitHub token directly, suitable for CI/CD environments.
 - **Support for Different Account Types**: Works with individual, business, and enterprise GitHub Copilot plans.
+- **Opencode OAuth Support**: Use opencode GitHub Copilot authentication by setting `COPILOT_API_OAUTH_APP=opencode` environment variable.
+- **GitHub Enterprise Support**: Connect to GHE.com by setting `COPILOT_API_ENTERPRISE_URL` environment variable (e.g., `company.ghe.com`).
+- **Custom Data Directory**: Change the default data directory (where tokens and config are stored) by setting `COPILOT_API_HOME` environment variable.
 
 ## Demo
 
@@ -177,6 +180,52 @@ The following command line options are available for the `start` command:
 | ------ | ------------------------- | ------- | ----- |
 | --json | Output debug info as JSON | false   | none  |
 
+## Configuration (config.json)
+
+- **Location:** `~/.local/share/copilot-api/config.json` (Linux/macOS) or `%USERPROFILE%\.local\share\copilot-api\config.json` (Windows).
+- **Default shape:**
+  ```json
+  {
+    "auth": {
+      "apiKeys": []
+    },
+    "extraPrompts": {
+      "gpt-5-mini": "<built-in exploration prompt>",
+      "gpt-5.1-codex-max": "<built-in exploration prompt>"
+    },
+    "smallModel": "gpt-5-mini",
+    "modelReasoningEfforts": {
+      "gpt-5-mini": "low"
+    },
+    "useFunctionApplyPatch": true,
+    "useMessagesApi": true
+  }
+  ```
+- **auth.apiKeys:** API keys used for request authentication. Supports multiple keys for rotation. Requests can authenticate with either `x-api-key: <key>` or `Authorization: Bearer <key>`. If empty or omitted, authentication is disabled.
+- **extraPrompts:** Map of `model -> prompt` appended to the first system prompt when translating Anthropic-style requests to Copilot. Use this to inject guardrails or guidance per model. Missing default entries are auto-added without overwriting your custom prompts.
+- **smallModel:** Fallback model used for tool-less warmup messages (e.g., Claude Code probe requests) to avoid spending premium requests; defaults to `gpt-5-mini`.
+- **modelReasoningEfforts:** Per-model `reasoning.effort` sent to the Copilot Responses API. Allowed values are `none`, `minimal`, `low`, `medium`, `high`, and `xhigh`. If a model isn’t listed, `high` is used by default.
+- **useFunctionApplyPatch:** When `true`, the server will convert any custom tool named `apply_patch` in Responses payloads into an OpenAI-style function tool (`type: "function"`) with a parameter schema so assistants can call it using function-calling semantics to edit files. Set to `false` to leave tools unchanged. Defaults to `true`.
+- **useMessagesApi:** When `true`, Claude-family models that support Copilot's native `/v1/messages` endpoint will use the Messages API; otherwise they fall back to `/chat/completions`. Set to `false` to disable Messages API routing and always use `/chat/completions`. Defaults to `true`.
+
+Edit this file to customize prompts or swap in your own fast model. Restart the server (or rerun the command) after changes so the cached config is refreshed.
+
+## API Authentication
+
+- **Protected routes:** All routes except `/` require authentication when `auth.apiKeys` is configured and non-empty.
+- **Allowed auth headers:**
+  - `x-api-key: <your_key>`
+  - `Authorization: Bearer <your_key>`
+- **CORS preflight:** `OPTIONS` requests are always allowed.
+- **When no keys are configured:** Server starts normally and allows requests (authentication disabled).
+
+Example request:
+
+```sh
+curl http://localhost:4141/v1/models \
+  -H "x-api-key: your_api_key"
+```
+
 ## API Endpoints
 
 The server exposes several endpoints to interact with the Copilot API. It provides OpenAI-compatible endpoints and now also includes support for Anthropic-compatible endpoints, allowing for greater flexibility with different tools and services.
@@ -185,11 +234,12 @@ The server exposes several endpoints to interact with the Copilot API. It provid
 
 These endpoints mimic the OpenAI API structure.
 
-| Endpoint                    | Method | Description                                               |
-| --------------------------- | ------ | --------------------------------------------------------- |
-| `POST /v1/chat/completions` | `POST` | Creates a model response for the given chat conversation. |
-| `GET /v1/models`            | `GET`  | Lists the currently available models.                     |
-| `POST /v1/embeddings`       | `POST` | Creates an embedding vector representing the input text.  |
+| Endpoint                    | Method | Description                                                      |
+| --------------------------- | ------ | ---------------------------------------------------------------- |
+| `POST /v1/responses`        | `POST` | OpenAI Most advanced interface for generating model responses.          |
+| `POST /v1/chat/completions` | `POST` | Creates a model response for the given chat conversation.        |
+| `GET /v1/models`            | `GET`  | Lists the currently available models.                            |
+| `POST /v1/embeddings`       | `POST` | Creates an embedding vector representing the input text.         |
 
 ### Anthropic Compatible Endpoints
 
@@ -255,6 +305,28 @@ npx copilot-api@latest debug --json
 
 # Initialize proxy from environment variables (HTTP_PROXY, HTTPS_PROXY, etc.)
 npx copilot-api@latest start --proxy-env
+
+# Use opencode GitHub Copilot authentication
+COPILOT_API_OAUTH_APP=opencode npx @jeffreycao/copilot-api@latest start
+```
+
+### Opencode OAuth Authentication
+
+You can use opencode GitHub Copilot authentication instead of the default one:
+
+```sh
+# Set environment variable before running any command
+export COPILOT_API_OAUTH_APP=opencode
+
+# Then run start or auth commands
+npx @jeffreycao/copilot-api@latest start
+npx @jeffreycao/copilot-api@latest auth
+```
+
+Or use inline environment variable:
+
+```sh
+COPILOT_API_OAUTH_APP=opencode npx @jeffreycao/copilot-api@latest start
 ```
 
 ## Using the Usage Viewer
@@ -307,12 +379,14 @@ Here is an example `.claude/settings.json` file:
   "env": {
     "ANTHROPIC_BASE_URL": "http://localhost:4141",
     "ANTHROPIC_AUTH_TOKEN": "dummy",
-    "ANTHROPIC_MODEL": "gpt-4.1",
-    "ANTHROPIC_DEFAULT_SONNET_MODEL": "gpt-4.1",
-    "ANTHROPIC_SMALL_FAST_MODEL": "gpt-4.1",
-    "ANTHROPIC_DEFAULT_HAIKU_MODEL": "gpt-4.1",
+    "ANTHROPIC_MODEL": "gpt-5.2",
+    "ANTHROPIC_DEFAULT_SONNET_MODEL": "gpt-5.2",
+    "ANTHROPIC_DEFAULT_HAIKU_MODEL": "gpt-5-mini",
     "DISABLE_NON_ESSENTIAL_MODEL_CALLS": "1",
-    "CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1"
+    "CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1",
+    "BASH_MAX_TIMEOUT_MS": "600000",
+    "CLAUDE_CODE_ATTRIBUTION_HEADER": "0",
+    "CLAUDE_CODE_ENABLE_PROMPT_SUGGESTION": "false"
   },
   "permissions": {
     "deny": [
@@ -326,6 +400,55 @@ You can find more options here: [Claude Code settings](https://docs.anthropic.co
 
 You can also read more about IDE integration here: [Add Claude Code to your IDE](https://docs.anthropic.com/en/docs/claude-code/ide-integrations)
 
+### Subagent Marker Integration (Optional)
+
+This project supports `x-initiator: agent` for subagent-originated requests.
+
+#### Claude Code plugin producer (marketplace-based)
+
+The marker producer is packaged as a Claude Code plugin named `claude-plugin`.
+
+- Marketplace catalog in this repository: `.claude-plugin/marketplace.json`
+- Plugin source in this repository: `claude-plugin`
+
+Add the marketplace remotely:
+
+```sh
+/plugin marketplace add https://github.com/ericc-ch/copilot-api.git
+```
+
+Install the plugin from the marketplace:
+
+```sh
+/plugin install claude-plugin@copilot-api-marketplace
+```
+
+After installation, the plugin injects `__SUBAGENT_MARKER__...` on `SubagentStart`, and this proxy uses it to infer `x-initiator: agent`.
+
+#### Opencode plugin producer
+
+The marker producer is packaged as an opencode plugin located at `.opencode/plugins/subagent-marker.js`.
+
+**Installation:**
+
+Copy the plugin file to your opencode plugins directory:
+
+```sh
+# Clone or download this repository, then copy the plugin
+cp .opencode/plugins/subagent-marker.js ~/.config/opencode/plugins/
+```
+
+Or manually create the file at `~/.config/opencode/plugins/subagent-marker.js` with the plugin content.
+
+**Features:**
+
+- Tracks sub-sessions created by subagents
+- Automatically prepends a marker system reminder (`__SUBAGENT_MARKER__...`) to subagent chat messages
+- Sets `x-session-id` header for session tracking
+- Enables this proxy to infer `x-initiator: agent` for subagent-originated requests
+
+The plugin hooks into `session.created`, `session.deleted`, `chat.message`, and `chat.headers` events to provide seamless subagent marker functionality.
+
 ## Running from Source
 
 The project can be run from source in several ways:
@@ -349,3 +472,10 @@ bun run start
   - `--rate-limit <seconds>`: Enforces a minimum time interval between requests. For example, `copilot-api start --rate-limit 30` will ensure there's at least a 30-second gap between requests.
   - `--wait`: Use this with `--rate-limit`. It makes the server wait for the cooldown period to end instead of rejecting the request with an error. This is useful for clients that don't automatically retry on rate limit errors.
 - If you have a GitHub business or enterprise plan account with Copilot, use the `--account-type` flag (e.g., `--account-type business`). See the [official documentation](https://docs.github.com/en/enterprise-cloud@latest/copilot/managing-copilot/managing-github-copilot-in-your-organization/managing-access-to-github-copilot-in-your-organization/managing-github-copilot-access-to-your-organizations-network#configuring-copilot-subscription-based-network-routing-for-your-enterprise-or-organization) for more details.
+
+### CLAUDE.md Recommended Content
+
+Please include the following in `CLAUDE.md` (for Claude usage):
+
+- Prohibited from directly asking questions to users, MUST use AskUserQuestion tool.
+- Once you can confirm that the task is complete, MUST use AskUserQuestion tool to make user confirm. The user may respond with feedback if they are not satisfied with the result, which you can use to make improvements and try again.
diff --git a/claude-plugin/.claude-plugin/plugin.json b/claude-plugin/.claude-plugin/plugin.json
new file mode 100644
index 000000000..615fcf662
--- /dev/null
+++ b/claude-plugin/.claude-plugin/plugin.json
@@ -0,0 +1,8 @@
+{
+  "name": "claude-plugin",
+  "description": "Inject SubagentStart marker context for copilot-api initiator override",
+  "version": "1.0.0",
+  "author": {
+    "name": "copilot-api maintainers"
+  }
+}
diff --git a/claude-plugin/hooks/hooks.json b/claude-plugin/hooks/hooks.json
new file mode 100644
index 000000000..0dce9891f
--- /dev/null
+++ b/claude-plugin/hooks/hooks.json
@@ -0,0 +1,15 @@
+{
+  "hooks": {
+    "SubagentStart": [
+      {
+        "matcher": "*",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/subagent-start-marker.js\""
+          }
+        ]
+      }
+    ]
+  }
+}
diff --git a/claude-plugin/scripts/subagent-start-marker.js b/claude-plugin/scripts/subagent-start-marker.js
new file mode 100644
index 000000000..4a2a39681
--- /dev/null
+++ b/claude-plugin/scripts/subagent-start-marker.js
@@ -0,0 +1,33 @@
+async function readStdin() {
+  let input = "";
+  for await (const chunk of process.stdin) {
+    input += chunk;
+  }
+  return input.trim();
+}
+
+const rawInput = await readStdin();
+let hookInput = {};
+
+if (rawInput) {
+  try {
+    hookInput = JSON.parse(rawInput);
+  } catch {
+    hookInput = {};
+  }
+}
+
+const marker = `__SUBAGENT_MARKER__${JSON.stringify({
+  session_id: hookInput.session_id ?? null,
+  agent_id: hookInput.agent_id ?? null,
+  agent_type: hookInput.agent_type ?? null,
+})}`;
+
+const payload = {
+  hookSpecificOutput: {
+    hookEventName: "SubagentStart",
+    additionalContext: marker,
+  },
+};
+
+process.stdout.write(`${JSON.stringify(payload)}\n`);
diff --git a/eslint.config.js b/eslint.config.js
index c9f79bea5..b7d56d5b7 100644
--- a/eslint.config.js
+++ b/eslint.config.js
@@ -1,6 +1,7 @@
 import config from "@echristian/eslint-config"
 
 export default config({
+  ignores: ["claude-plugin/**", ".opencode/**"],
   prettier: {
     plugins: ["prettier-plugin-packagejson"],
   },
diff --git a/src/lib/api-config.ts b/src/lib/api-config.ts
index 83bce92ad..54d084c2b 100644
--- a/src/lib/api-config.ts
+++ b/src/lib/api-config.ts
@@ -2,22 +2,145 @@ import { randomUUID } from "node:crypto"
 
 import type { State } from "./state"
 
+export const isOpencodeOauthApp = (): boolean => {
+  return process.env.COPILOT_API_OAUTH_APP?.trim() === "opencode"
+}
+
+export const normalizeDomain = (input: string): string => {
+  return input
+    .trim()
+    .replace(/^https?:\/\//u, "")
+    .replace(/\/+$/u, "")
+}
+
+export const getEnterpriseDomain = (): string | null => {
+  const raw = (process.env.COPILOT_API_ENTERPRISE_URL ?? "").trim()
+  if (!raw) return null
+  const normalized = normalizeDomain(raw)
+  return normalized || null
+}
+
+export const getGitHubBaseUrl = (): string => {
+  const resolvedDomain = getEnterpriseDomain()
+  return resolvedDomain ? `https://${resolvedDomain}` : GITHUB_BASE_URL
+}
+
+export const getGitHubApiBaseUrl = (): string => {
+  const resolvedDomain = getEnterpriseDomain()
+  return resolvedDomain ? `https://api.${resolvedDomain}` : GITHUB_API_BASE_URL
+}
+
+export const getOpencodeOauthHeaders = (): Record<string, string> => {
+  return {
+    Accept: "application/json",
+    "Content-Type": "application/json",
+    "User-Agent":
+      "opencode/1.2.16 ai-sdk/provider-utils/3.0.21 runtime/bun/1.3.10, opencode/1.2.16",
+  }
+}
+
+export const getOauthUrls = (): {
+  deviceCodeUrl: string
+  accessTokenUrl: string
+} => {
+  const githubBaseUrl = getGitHubBaseUrl()
+
+  return {
+    deviceCodeUrl: `${githubBaseUrl}/login/device/code`,
+    accessTokenUrl: `${githubBaseUrl}/login/oauth/access_token`,
+  }
+}
+
+interface OauthAppConfig {
+  clientId: string
+  headers: Record<string, string>
+  scope: string
+}
+
+export const getOauthAppConfig = (): OauthAppConfig => {
+  if (isOpencodeOauthApp()) {
+    return {
+      clientId: OPENCODE_GITHUB_CLIENT_ID,
+      headers: getOpencodeOauthHeaders(),
+      scope: GITHUB_APP_SCOPES,
+    }
+  }
+
+  return {
+    clientId: GITHUB_CLIENT_ID,
+    headers: standardHeaders(),
+    scope: GITHUB_APP_SCOPES,
+  }
+}
+
+export const prepareForCompact = (
+  headers: Record<string, string>,
+  isCompact?: boolean,
+) => {
+  if (isCompact) {
+    headers["x-initiator"] = "agent"
+  }
+}
+
+export const prepareInteractionHeaders = (
+  sessionId: string | undefined,
+  isSubagent: boolean,
+  headers: Record<string, string>,
+) => {
+  const sendInteractionHeaders = !isOpencodeOauthApp()
+
+  if (isSubagent) {
+    headers["x-initiator"] = "agent"
+    if (sendInteractionHeaders) {
+      headers["x-interaction-type"] = "conversation-subagent"
+    }
+  }
+
+  if (sessionId && sendInteractionHeaders) {
+    headers["x-interaction-id"] = sessionId
+  }
+}
+
 export const standardHeaders = () => ({
   "content-type": "application/json",
   accept: "application/json",
 })
 
-const COPILOT_VERSION = "0.26.7"
+const COPILOT_VERSION = "0.38.2"
 const EDITOR_PLUGIN_VERSION = `copilot-chat/${COPILOT_VERSION}`
 const USER_AGENT = `GitHubCopilotChat/${COPILOT_VERSION}`
 
-const API_VERSION = "2025-04-01"
+const API_VERSION = "2025-10-01"
+
+export const copilotBaseUrl = (state: State) => {
+  const enterpriseDomain = getEnterpriseDomain()
+  if (enterpriseDomain) {
+    return `https://copilot-api.${enterpriseDomain}`
+  }
+
+  return state.accountType === "individual" ?
+      "https://api.githubcopilot.com"
+    : `https://api.${state.accountType}.githubcopilot.com`
+}
 
-export const copilotBaseUrl = (state: State) =>
-  state.accountType === "individual" ?
-    "https://api.githubcopilot.com"
-  : `https://api.${state.accountType}.githubcopilot.com`
-export const copilotHeaders = (state: State, vision: boolean = false) => {
+export const copilotHeaders = (
+  state: State,
+  requestId?: string,
+  vision: boolean = false,
+) => {
+  if (isOpencodeOauthApp()) {
+    const headers: Record<string, string> = {
+      Authorization: `Bearer ${state.copilotToken}`,
+      ...getOpencodeOauthHeaders(),
+      "Openai-Intent": "conversation-edits",
+    }
+
+    if (vision) headers["Copilot-Vision-Request"] = "true"
+
+    return headers
+  }
+
+  const requestIdValue = requestId ?? randomUUID()
   const headers: Record<string, string> = {
     Authorization: `Bearer ${state.copilotToken}`,
     "content-type": standardHeaders()["content-type"],
@@ -25,14 +148,24 @@ export const copilotHeaders = (state: State, vision: boolean = false) => {
     "editor-version": `vscode/${state.vsCodeVersion}`,
     "editor-plugin-version": EDITOR_PLUGIN_VERSION,
     "user-agent": USER_AGENT,
-    "openai-intent": "conversation-panel",
+    "openai-intent": "conversation-agent",
     "x-github-api-version": API_VERSION,
-    "x-request-id": randomUUID(),
+    "x-request-id": requestIdValue,
     "x-vscode-user-agent-library-version": "electron-fetch",
+    "x-agent-task-id": requestIdValue,
+    "x-interaction-type": "conversation-agent",
   }
 
   if (vision) headers["copilot-vision-request"] = "true"
 
+  if (state.macMachineId) {
+    headers["vscode-machineid"] = state.macMachineId
+  }
+
+  if (state.vsCodeSessionId) {
+    headers["vscode-sessionid"] = state.vsCodeSessionId
+  }
+
   return headers
 }
 
@@ -50,3 +183,4 @@ export const githubHeaders = (state: State) => ({
 export const GITHUB_BASE_URL = "https://github.com"
 export const GITHUB_CLIENT_ID = "Iv1.b507a08c87ecfe98"
 export const GITHUB_APP_SCOPES = ["read:user"].join(" ")
+export const OPENCODE_GITHUB_CLIENT_ID = "Ov23li8tweQw6odWQebz"
diff --git a/src/lib/config.ts b/src/lib/config.ts
new file mode 100644
index 000000000..d4f68adf6
--- /dev/null
+++ b/src/lib/config.ts
@@ -0,0 +1,208 @@
+import consola from "consola"
+import fs from "node:fs"
+
+import { PATHS } from "./paths"
+
+export interface AppConfig {
+  auth?: {
+    apiKeys?: Array<string>
+  }
+  extraPrompts?: Record<string, string>
+  smallModel?: string
+  responsesApiContextManagementModels?: Array<string>
+  modelReasoningEfforts?: Record<
+    string,
+    "none" | "minimal" | "low" | "medium" | "high" | "xhigh"
+  >
+  useFunctionApplyPatch?: boolean
+  compactUseSmallModel?: boolean
+  useMessagesApi?: boolean
+}
+
+const gpt5ExplorationPrompt = `## Exploration and reading files
+- **Think first.** Before any tool call, decide ALL files/resources you will need.
+- **Batch everything.** If you need multiple files (even from different places), read them together.
+- **multi_tool_use.parallel** Use multi_tool_use.parallel to parallelize tool calls and only this.
+- **Only make sequential calls if you truly cannot know the next file without seeing a result first.**
+- **Workflow:** (a) plan all needed reads → (b) issue one parallel batch → (c) analyze results → (d) repeat if new, unpredictable reads arise.`
+
+const gpt5CommentaryPrompt = `# Working with the user
+
+You interact with the user through a terminal. You have 2 ways of communicating with the users:  
+- Share intermediary updates in \`commentary\` channel.  
+- After you have completed all your work, send a message to the \`final\` channel.  
+
+## Intermediary updates
+
+- Intermediary updates go to the \`commentary\` channel.
+- User updates are short updates while you are working, they are NOT final answers.
+- You use 1-2 sentence user updates to communicate progress and new information to the user as you are doing work.
+- Do not begin responses with conversational interjections or meta commentary. Avoid openers such as acknowledgements (“Done —”, “Got it”, “Great question, ”) or framing phrases.
+- You provide user updates frequently, every 20s.
+- Before exploring or doing substantial work, you start with a user update acknowledging the request and explaining your first step. You should include your understanding of the user request and explain what you will do. Avoid commenting on the request or using starters such as "Got it -" or "Understood -" etc.
+- When exploring, e.g. searching, reading files, you provide user updates as you go, every 20s, explaining what context you are gathering and what you've learned. Vary your sentence structure when providing these updates to avoid sounding repetitive - in particular, don't start each sentence the same way.
+- After you have sufficient context, and the work is substantial, you provide a longer plan (this is the only user update that may be longer than 2 sentences and can contain formatting).
+- Before performing file edits of any kind, you provide updates explaining what edits you are making.
+- As you are thinking, you very frequently provide updates even if not taking any actions, informing the user of your progress. You interrupt your thinking and send multiple updates in a row if thinking for more than 100 words.
+- Tone of your updates MUST match your personality.`
+
+const defaultConfig: AppConfig = {
+  auth: {
+    apiKeys: [],
+  },
+  extraPrompts: {
+    "gpt-5-mini": gpt5ExplorationPrompt,
+    "gpt-5.3-codex": gpt5CommentaryPrompt,
+    "gpt-5.4": gpt5CommentaryPrompt,
+  },
+  smallModel: "gpt-5-mini",
+  responsesApiContextManagementModels: [],
+  modelReasoningEfforts: {
+    "gpt-5-mini": "low",
+    "gpt-5.3-codex": "xhigh",
+  },
+  useFunctionApplyPatch: true,
+  useMessagesApi: true,
+}
+
+let cachedConfig: AppConfig | null = null
+
+function ensureConfigFile(): void {
+  try {
+    fs.accessSync(PATHS.CONFIG_PATH, fs.constants.R_OK | fs.constants.W_OK)
+  } catch {
+    fs.mkdirSync(PATHS.APP_DIR, { recursive: true })
+    fs.writeFileSync(
+      PATHS.CONFIG_PATH,
+      `${JSON.stringify(defaultConfig, null, 2)}\n`,
+      "utf8",
+    )
+    try {
+      fs.chmodSync(PATHS.CONFIG_PATH, 0o600)
+    } catch {
+      return
+    }
+  }
+}
+
+function readConfigFromDisk(): AppConfig {
+  ensureConfigFile()
+  try {
+    const raw = fs.readFileSync(PATHS.CONFIG_PATH, "utf8")
+    if (!raw.trim()) {
+      fs.writeFileSync(
+        PATHS.CONFIG_PATH,
+        `${JSON.stringify(defaultConfig, null, 2)}\n`,
+        "utf8",
+      )
+      return defaultConfig
+    }
+    return JSON.parse(raw) as AppConfig
+  } catch (error) {
+    consola.error("Failed to read config file, using default config", error)
+    return defaultConfig
+  }
+}
+
+function mergeDefaultConfig(config: AppConfig): {
+  mergedConfig: AppConfig
+  changed: boolean
+} {
+  const extraPrompts = config.extraPrompts ?? {}
+  const defaultExtraPrompts = defaultConfig.extraPrompts ?? {}
+  const modelReasoningEfforts = config.modelReasoningEfforts ?? {}
+  const defaultModelReasoningEfforts = defaultConfig.modelReasoningEfforts ?? {}
+
+  const missingExtraPromptModels = Object.keys(defaultExtraPrompts).filter(
+    (model) => !Object.hasOwn(extraPrompts, model),
+  )
+
+  const missingReasoningEffortModels = Object.keys(
+    defaultModelReasoningEfforts,
+  ).filter((model) => !Object.hasOwn(modelReasoningEfforts, model))
+
+  const hasExtraPromptChanges = missingExtraPromptModels.length > 0
+  const hasReasoningEffortChanges = missingReasoningEffortModels.length > 0
+
+  if (!hasExtraPromptChanges && !hasReasoningEffortChanges) {
+    return { mergedConfig: config, changed: false }
+  }
+
+  return {
+    mergedConfig: {
+      ...config,
+      extraPrompts: {
+        ...defaultExtraPrompts,
+        ...extraPrompts,
+      },
+      modelReasoningEfforts: {
+        ...defaultModelReasoningEfforts,
+        ...modelReasoningEfforts,
+      },
+    },
+    changed: true,
+  }
+}
+
+export function mergeConfigWithDefaults(): AppConfig {
+  const config = readConfigFromDisk()
+  const { mergedConfig, changed } = mergeDefaultConfig(config)
+
+  if (changed) {
+    try {
+      fs.writeFileSync(
+        PATHS.CONFIG_PATH,
+        `${JSON.stringify(mergedConfig, null, 2)}\n`,
+        "utf8",
+      )
+    } catch (writeError) {
+      consola.warn(
+        "Failed to write merged extraPrompts to config file",
+        writeError,
+      )
+    }
+  }
+
+  cachedConfig = mergedConfig
+  return mergedConfig
+}
+
+export function getConfig(): AppConfig {
+  cachedConfig ??= readConfigFromDisk()
+  return cachedConfig
+}
+
+export function getExtraPromptForModel(model: string): string {
+  const config = getConfig()
+  return config.extraPrompts?.[model] ?? ""
+}
+
+export function getSmallModel(): string {
+  const config = getConfig()
+  return config.smallModel ?? "gpt-5-mini"
+}
+
+export function getResponsesApiContextManagementModels(): Array<string> {
+  const config = getConfig()
+  return (
+    config.responsesApiContextManagementModels
+    ?? defaultConfig.responsesApiContextManagementModels
+    ?? []
+  )
+}
+
+export function isResponsesApiContextManagementModel(model: string): boolean {
+  return getResponsesApiContextManagementModels().includes(model)
+}
+
+export function getReasoningEffortForModel(
+  model: string,
+): "none" | "minimal" | "low" | "medium" | "high" | "xhigh" {
+  const config = getConfig()
+  return config.modelReasoningEfforts?.[model] ?? "high"
+}
+
+export function isMessagesApiEnabled(): boolean {
+  const config = getConfig()
+  return config.useMessagesApi ?? true
+}
diff --git a/src/lib/logger.ts b/src/lib/logger.ts
new file mode 100644
index 000000000..93a3b01f0
--- /dev/null
+++ b/src/lib/logger.ts
@@ -0,0 +1,182 @@
+import consola, { type ConsolaInstance } from "consola"
+import fs from "node:fs"
+import path from "node:path"
+import util from "node:util"
+
+import { PATHS } from "./paths"
+import { state } from "./state"
+
+const LOG_RETENTION_DAYS = 7
+const LOG_RETENTION_MS = LOG_RETENTION_DAYS * 24 * 60 * 60 * 1000
+const CLEANUP_INTERVAL_MS = 24 * 60 * 60 * 1000
+const LOG_DIR = path.join(PATHS.APP_DIR, "logs")
+const FLUSH_INTERVAL_MS = 1000
+const MAX_BUFFER_SIZE = 100
+
+const logStreams = new Map<string, fs.WriteStream>()
+const logBuffers = new Map<string, Array<string>>()
+
+const ensureLogDirectory = () => {
+  if (!fs.existsSync(LOG_DIR)) {
+    fs.mkdirSync(LOG_DIR, { recursive: true })
+  }
+}
+
+const cleanupOldLogs = () => {
+  if (!fs.existsSync(LOG_DIR)) {
+    return
+  }
+
+  const now = Date.now()
+
+  for (const entry of fs.readdirSync(LOG_DIR)) {
+    const filePath = path.join(LOG_DIR, entry)
+
+    let stats: fs.Stats
+    try {
+      stats = fs.statSync(filePath)
+    } catch {
+      continue
+    }
+
+    if (!stats.isFile()) {
+      continue
+    }
+
+    if (now - stats.mtimeMs > LOG_RETENTION_MS) {
+      try {
+        fs.rmSync(filePath)
+      } catch {
+        continue
+      }
+    }
+  }
+}
+
+const formatArgs = (args: Array<unknown>) =>
+  args
+    .map((arg) =>
+      typeof arg === "string" ? arg : (
+        util.inspect(arg, { depth: null, colors: false })
+      ),
+    )
+    .join(" ")
+
+const sanitizeName = (name: string) => {
+  const normalized = name
+    .toLowerCase()
+    .replaceAll(/[^a-z0-9]+/g, "-")
+    .replaceAll(/^-+|-+$/g, "")
+
+  return normalized === "" ? "handler" : normalized
+}
+
+const getLogStream = (filePath: string): fs.WriteStream => {
+  let stream = logStreams.get(filePath)
+  if (!stream || stream.destroyed) {
+    stream = fs.createWriteStream(filePath, { flags: "a" })
+    logStreams.set(filePath, stream)
+
+    stream.on("error", (error: unknown) => {
+      console.warn("Log stream error", error)
+      logStreams.delete(filePath)
+    })
+  }
+  return stream
+}
+
+const flushBuffer = (filePath: string) => {
+  const buffer = logBuffers.get(filePath)
+  if (!buffer || buffer.length === 0) {
+    return
+  }
+
+  const stream = getLogStream(filePath)
+  const content = buffer.join("\n") + "\n"
+  stream.write(content, (error) => {
+    if (error) {
+      console.warn("Failed to write handler log", error)
+    }
+  })
+
+  logBuffers.set(filePath, [])
+}
+
+const flushAllBuffers = () => {
+  for (const filePath of logBuffers.keys()) {
+    flushBuffer(filePath)
+  }
+}
+
+const appendLine = (filePath: string, line: string) => {
+  let buffer = logBuffers.get(filePath)
+  if (!buffer) {
+    buffer = []
+    logBuffers.set(filePath, buffer)
+  }
+
+  buffer.push(line)
+
+  if (buffer.length >= MAX_BUFFER_SIZE) {
+    flushBuffer(filePath)
+  }
+}
+
+setInterval(flushAllBuffers, FLUSH_INTERVAL_MS)
+
+const cleanup = () => {
+  flushAllBuffers()
+  for (const stream of logStreams.values()) {
+    stream.end()
+  }
+  logStreams.clear()
+  logBuffers.clear()
+}
+
+process.on("exit", cleanup)
+process.on("SIGINT", () => {
+  cleanup()
+  process.exit(0)
+})
+process.on("SIGTERM", () => {
+  cleanup()
+  process.exit(0)
+})
+
+let lastCleanup = 0
+
+export const createHandlerLogger = (name: string): ConsolaInstance => {
+  ensureLogDirectory()
+
+  const sanitizedName = sanitizeName(name)
+  const instance = consola.withTag(name)
+
+  if (state.verbose) {
+    instance.level = 5
+  }
+  instance.setReporters([])
+
+  instance.addReporter({
+    log(logObj) {
+      ensureLogDirectory()
+
+      if (Date.now() - lastCleanup > CLEANUP_INTERVAL_MS) {
+        cleanupOldLogs()
+        lastCleanup = Date.now()
+      }
+
+      const date = logObj.date
+      const dateKey = date.toLocaleDateString("sv-SE")
+      const timestamp = date.toLocaleString("sv-SE", { hour12: false })
+      const filePath = path.join(LOG_DIR, `${sanitizedName}-${dateKey}.log`)
+      const message = formatArgs(logObj.args as Array<unknown>)
+      const line = `[${timestamp}] [${logObj.type}] [${logObj.tag || name}]${
+        message ? ` ${message}` : ""
+      }`
+
+      appendLine(filePath, line)
+    },
+  })
+
+  return instance
+}
diff --git a/src/lib/models.ts b/src/lib/models.ts
new file mode 100644
index 000000000..f5f621eab
--- /dev/null
+++ b/src/lib/models.ts
@@ -0,0 +1,75 @@
+import type { Model } from "~/services/copilot/get-models"
+
+import { state } from "~/lib/state"
+
+export const findEndpointModel = (sdkModelId: string): Model | undefined => {
+  const models = state.models?.data ?? []
+  const exactMatch = models.find((m) => m.id === sdkModelId)
+  if (exactMatch) {
+    return exactMatch
+  }
+
+  const normalized = _normalizeSdkModelId(sdkModelId)
+  if (!normalized) {
+    return undefined
+  }
+
+  const modelName = `claude-${normalized.family}-${normalized.version}`
+  const model = models.find((m) => m.id === modelName)
+  if (model) {
+    return model
+  }
+
+  return undefined
+}
+
+/**
+ * Normalizes an SDK model ID to extract the model family and version.
+ * this method from github copilot extension
+ * Examples:
+ * - "claude-opus-4-5-20251101" -> { family: "opus", version: "4.5" }
+ * - "claude-3-5-sonnet-20241022" -> { family: "sonnet", version: "3.5" }
+ * - "claude-sonnet-4-20250514" -> { family: "sonnet", version: "4" }
+ * - "claude-haiku-3-5-20250514" -> { family: "haiku", version: "3.5" }
+ * - "claude-haiku-4.5" -> { family: "haiku", version: "4.5" }
+ */
+const _normalizeSdkModelId = (
+  sdkModelId: string,
+): { family: string; version: string } | undefined => {
+  const lower = sdkModelId.toLowerCase()
+
+  // Strip date suffix (8 digits at the end)
+  const withoutDate = lower.replace(/-\d{8}$/, "")
+
+  // Pattern 1: claude-{family}-{major}-{minor} (e.g., claude-opus-4-5, claude-haiku-3-5)
+  const pattern1 = withoutDate.match(/^claude-(\w+)-(\d+)-(\d+)$/)
+  if (pattern1) {
+    return { family: pattern1[1], version: `${pattern1[2]}.${pattern1[3]}` }
+  }
+
+  // Pattern 2: claude-{major}-{minor}-{family} (e.g., claude-3-5-sonnet)
+  const pattern2 = withoutDate.match(/^claude-(\d+)-(\d+)-(\w+)$/)
+  if (pattern2) {
+    return { family: pattern2[3], version: `${pattern2[1]}.${pattern2[2]}` }
+  }
+
+  // Pattern 3: claude-{family}-{major}.{minor} (e.g., claude-haiku-4.5)
+  const pattern3 = withoutDate.match(/^claude-(\w+)-(\d+)\.(\d+)$/)
+  if (pattern3) {
+    return { family: pattern3[1], version: `${pattern3[2]}.${pattern3[3]}` }
+  }
+
+  // Pattern 4: claude-{family}-{major} (e.g., claude-sonnet-4)
+  const pattern4 = withoutDate.match(/^claude-(\w+)-(\d+)$/)
+  if (pattern4) {
+    return { family: pattern4[1], version: pattern4[2] }
+  }
+
+  // Pattern 5: claude-{major}-{family} (e.g., claude-3-opus)
+  const pattern5 = withoutDate.match(/^claude-(\d+)-(\w+)$/)
+  if (pattern5) {
+    return { family: pattern5[2], version: pattern5[1] }
+  }
+
+  return undefined
+}
diff --git a/src/lib/paths.ts b/src/lib/paths.ts
index 8d0a9f02b..dba35a791 100644
--- a/src/lib/paths.ts
+++ b/src/lib/paths.ts
@@ -2,18 +2,29 @@ import fs from "node:fs/promises"
 import os from "node:os"
 import path from "node:path"
 
-const APP_DIR = path.join(os.homedir(), ".local", "share", "copilot-api")
+const AUTH_APP = process.env.COPILOT_API_OAUTH_APP?.trim() || ""
+const ENTERPRISE_PREFIX = process.env.COPILOT_API_ENTERPRISE_URL ? "ent_" : ""
 
-const GITHUB_TOKEN_PATH = path.join(APP_DIR, "github_token")
+const DEFAULT_DIR = path.join(os.homedir(), ".local", "share", "copilot-api")
+const APP_DIR = process.env.COPILOT_API_HOME || DEFAULT_DIR
+
+const GITHUB_TOKEN_PATH = path.join(
+  APP_DIR,
+  AUTH_APP,
+  ENTERPRISE_PREFIX + "github_token",
+)
+const CONFIG_PATH = path.join(APP_DIR, "config.json")
 
 export const PATHS = {
   APP_DIR,
   GITHUB_TOKEN_PATH,
+  CONFIG_PATH,
 }
 
 export async function ensurePaths(): Promise<void> {
-  await fs.mkdir(PATHS.APP_DIR, { recursive: true })
+  await fs.mkdir(path.join(PATHS.APP_DIR, AUTH_APP), { recursive: true })
   await ensureFile(PATHS.GITHUB_TOKEN_PATH)
+  await ensureFile(PATHS.CONFIG_PATH)
 }
 
 async function ensureFile(filePath: string): Promise<void> {
diff --git a/src/lib/request-auth.ts b/src/lib/request-auth.ts
new file mode 100644
index 000000000..d974c7a06
--- /dev/null
+++ b/src/lib/request-auth.ts
@@ -0,0 +1,101 @@
+import type { Context, MiddlewareHandler } from "hono"
+
+import consola from "consola"
+
+import { getConfig } from "./config"
+
+interface AuthMiddlewareOptions {
+  getApiKeys?: () => Array<string>
+  allowUnauthenticatedPaths?: Array<string>
+  allowOptionsBypass?: boolean
+}
+
+export function normalizeApiKeys(apiKeys: unknown): Array<string> {
+  if (!Array.isArray(apiKeys)) {
+    if (apiKeys !== undefined) {
+      consola.warn("Invalid auth.apiKeys config. Expected an array of strings.")
+    }
+    return []
+  }
+
+  const normalizedKeys = apiKeys
+    .filter((key): key is string => typeof key === "string")
+    .map((key) => key.trim())
+    .filter((key) => key.length > 0)
+
+  if (normalizedKeys.length !== apiKeys.length) {
+    consola.warn(
+      "Invalid auth.apiKeys entries found. Only non-empty strings are allowed.",
+    )
+  }
+
+  return [...new Set(normalizedKeys)]
+}
+
+export function getConfiguredApiKeys(): Array<string> {
+  const config = getConfig()
+  return normalizeApiKeys(config.auth?.apiKeys)
+}
+
+export function extractRequestApiKey(c: Context): string | null {
+  const xApiKey = c.req.header("x-api-key")?.trim()
+  if (xApiKey) {
+    return xApiKey
+  }
+
+  const authorization = c.req.header("authorization")
+  if (!authorization) {
+    return null
+  }
+
+  const [scheme, ...rest] = authorization.trim().split(/\s+/)
+  if (scheme.toLowerCase() !== "bearer") {
+    return null
+  }
+
+  const bearerToken = rest.join(" ").trim()
+  return bearerToken || null
+}
+
+function createUnauthorizedResponse(c: Context): Response {
+  c.header("WWW-Authenticate", 'Bearer realm="copilot-api"')
+  return c.json(
+    {
+      error: {
+        message: "Unauthorized",
+        type: "authentication_error",
+      },
+    },
+    401,
+  )
+}
+
+export function createAuthMiddleware(
+  options: AuthMiddlewareOptions = {},
+): MiddlewareHandler {
+  const getApiKeys = options.getApiKeys ?? getConfiguredApiKeys
+  const allowUnauthenticatedPaths = options.allowUnauthenticatedPaths ?? ["/"]
+  const allowOptionsBypass = options.allowOptionsBypass ?? true
+
+  return async (c, next) => {
+    if (allowOptionsBypass && c.req.method === "OPTIONS") {
+      return next()
+    }
+
+    if (allowUnauthenticatedPaths.includes(c.req.path)) {
+      return next()
+    }
+
+    const apiKeys = getApiKeys()
+    if (apiKeys.length === 0) {
+      return next()
+    }
+
+    const requestApiKey = extractRequestApiKey(c)
+    if (!requestApiKey || !apiKeys.includes(requestApiKey)) {
+      return createUnauthorizedResponse(c)
+    }
+
+    return next()
+  }
+}
diff --git a/src/lib/state.ts b/src/lib/state.ts
index 5ba4dc1d1..490ce7370 100644
--- a/src/lib/state.ts
+++ b/src/lib/state.ts
@@ -8,6 +8,9 @@ export interface State {
   models?: ModelsResponse
   vsCodeVersion?: string
 
+  macMachineId?: string
+  vsCodeSessionId?: string
+
   manualApprove: boolean
   rateLimitWait: boolean
   showToken: boolean
@@ -15,6 +18,7 @@ export interface State {
   // Rate limiting configuration
   rateLimitSeconds?: number
   lastRequestTimestamp?: number
+  verbose: boolean
 }
 
 export const state: State = {
@@ -22,4 +26,5 @@ export const state: State = {
   manualApprove: false,
   rateLimitWait: false,
   showToken: false,
+  verbose: false,
 }
diff --git a/src/lib/token.ts b/src/lib/token.ts
index fc8d2785f..a46a660e8 100644
--- a/src/lib/token.ts
+++ b/src/lib/token.ts
@@ -1,6 +1,8 @@
 import consola from "consola"
 import fs from "node:fs/promises"
+import { setTimeout as delay } from "node:timers/promises"
 
+import { isOpencodeOauthApp } from "~/lib/api-config"
 import { PATHS } from "~/lib/paths"
 import { getCopilotToken } from "~/services/github/get-copilot-token"
 import { getDeviceCode } from "~/services/github/get-device-code"
@@ -10,12 +12,37 @@ import { pollAccessToken } from "~/services/github/poll-access-token"
 import { HTTPError } from "./error"
 import { state } from "./state"
 
+let copilotRefreshLoopController: AbortController | null = null
+
+export const stopCopilotRefreshLoop = () => {
+  if (!copilotRefreshLoopController) {
+    return
+  }
+
+  copilotRefreshLoopController.abort()
+  copilotRefreshLoopController = null
+}
+
 const readGithubToken = () => fs.readFile(PATHS.GITHUB_TOKEN_PATH, "utf8")
 
 const writeGithubToken = (token: string) =>
   fs.writeFile(PATHS.GITHUB_TOKEN_PATH, token)
 
 export const setupCopilotToken = async () => {
+  if (isOpencodeOauthApp()) {
+    if (!state.githubToken) throw new Error(`opencode token not found`)
+
+    state.copilotToken = state.githubToken
+
+    consola.debug("GitHub Copilot token set from opencode auth token")
+    if (state.showToken) {
+      consola.info("Copilot token:", state.copilotToken)
+    }
+
+    stopCopilotRefreshLoop()
+    return
+  }
+
   const { token, refresh_in } = await getCopilotToken()
   state.copilotToken = token
 
@@ -25,21 +52,48 @@ export const setupCopilotToken = async () => {
     consola.info("Copilot token:", token)
   }
 
-  const refreshInterval = (refresh_in - 60) * 1000
-  setInterval(async () => {
+  stopCopilotRefreshLoop()
+
+  const controller = new AbortController()
+  copilotRefreshLoopController = controller
+
+  runCopilotRefreshLoop(refresh_in, controller.signal)
+    .catch(() => {
+      consola.warn("Copilot token refresh loop stopped")
+    })
+    .finally(() => {
+      if (copilotRefreshLoopController === controller) {
+        copilotRefreshLoopController = null
+      }
+    })
+}
+
+const runCopilotRefreshLoop = async (
+  refreshIn: number,
+  signal: AbortSignal,
+) => {
+  let nextRefreshDelayMs = (refreshIn - 60) * 1000
+
+  while (!signal.aborted) {
+    await delay(nextRefreshDelayMs, undefined, { signal })
+
     consola.debug("Refreshing Copilot token")
+
     try {
-      const { token } = await getCopilotToken()
+      const { token, refresh_in } = await getCopilotToken()
       state.copilotToken = token
       consola.debug("Copilot token refreshed")
       if (state.showToken) {
         consola.info("Refreshed Copilot token:", token)
       }
+
+      nextRefreshDelayMs = (refresh_in - 60) * 1000
     } catch (error) {
       consola.error("Failed to refresh Copilot token:", error)
-      throw error
+      nextRefreshDelayMs = 15_000
+      consola.warn(`Retrying Copilot token refresh in ${nextRefreshDelayMs}ms`)
     }
-  }, refreshInterval)
+  }
 }
 
 interface SetupGitHubTokenOptions {
diff --git a/src/lib/tokenizer.ts b/src/lib/tokenizer.ts
index 8c3eda736..e9b83ac5b 100644
--- a/src/lib/tokenizer.ts
+++ b/src/lib/tokenizer.ts
@@ -37,7 +37,9 @@ const calculateToolCallsTokens = (
   let tokens = 0
   for (const toolCall of toolCalls) {
     tokens += constants.funcInit
-    tokens += encoder.encode(JSON.stringify(toolCall)).length
+    tokens += encoder.encode(toolCall.id).length
+    tokens += encoder.encode(toolCall.function.name).length
+    tokens += encoder.encode(toolCall.function.arguments).length
   }
   tokens += constants.funcEnd
   return tokens
@@ -158,6 +160,7 @@ const getModelConstants = (model: Model) => {
         enumInit: -3,
         enumItem: 3,
         funcEnd: 12,
+        isGpt: true,
       }
     : {
         funcInit: 7,
@@ -166,6 +169,7 @@ const getModelConstants = (model: Model) => {
         enumInit: -3,
         enumItem: 3,
         funcEnd: 12,
+        isGpt: model.id.startsWith("gpt-"),
       }
 }
 
@@ -218,8 +222,12 @@ const calculateParameterTokens = (
   const line = `${paramName}:${paramType}:${paramDesc}`
   tokens += encoder.encode(line).length
 
+  if (param.type === "array" && param["items"]) {
+    tokens += calculateParametersTokens(param["items"], encoder, constants)
+  }
+
   // Handle additional properties (excluding standard ones)
-  const excludedKeys = new Set(["type", "description", "enum"])
+  const excludedKeys = new Set(["type", "description", "enum", "items"])
   for (const propertyName of Object.keys(param)) {
     if (!excludedKeys.has(propertyName)) {
       const propertyValue = param[propertyName]
@@ -234,6 +242,27 @@ const calculateParameterTokens = (
   return tokens
 }
 
+/**
+ * Calculate tokens for properties object
+ */
+const calculatePropertiesTokens = (
+  properties: Record<string, unknown>,
+  encoder: Encoder,
+  constants: ReturnType<typeof getModelConstants>,
+): number => {
+  let tokens = 0
+  if (Object.keys(properties).length > 0) {
+    tokens += constants.propInit
+    for (const propKey of Object.keys(properties)) {
+      tokens += calculateParameterTokens(propKey, properties[propKey], {
+        encoder,
+        constants,
+      })
+    }
+  }
+  return tokens
+}
+
 /**
  * Calculate tokens for function parameters
  */
@@ -249,18 +278,17 @@ const calculateParametersTokens = (
   const params = parameters as Record<string, unknown>
   let tokens = 0
 
+  const excludedKeys = new Set(["$schema", "additionalProperties"])
   for (const [key, value] of Object.entries(params)) {
+    if (excludedKeys.has(key)) {
+      continue
+    }
     if (key === "properties") {
-      const properties = value as Record<string, unknown>
-      if (Object.keys(properties).length > 0) {
-        tokens += constants.propInit
-        for (const propKey of Object.keys(properties)) {
-          tokens += calculateParameterTokens(propKey, properties[propKey], {
-            encoder,
-            constants,
-          })
-        }
-      }
+      tokens += calculatePropertiesTokens(
+        value as Record<string, unknown>,
+        encoder,
+        constants,
+      )
     } else {
       const paramText =
         typeof value === "string" ? value : JSON.stringify(value)
@@ -306,10 +334,16 @@ export const numTokensForTools = (
   constants: ReturnType<typeof getModelConstants>,
 ): number => {
   let funcTokenCount = 0
-  for (const tool of tools) {
-    funcTokenCount += calculateToolTokens(tool, encoder, constants)
+  if (constants.isGpt) {
+    for (const tool of tools) {
+      funcTokenCount += calculateToolTokens(tool, encoder, constants)
+    }
+    funcTokenCount += constants.funcEnd
+  } else {
+    for (const tool of tools) {
+      funcTokenCount += encoder.encode(JSON.stringify(tool)).length
+    }
   }
-  funcTokenCount += constants.funcEnd
   return funcTokenCount
 }
 
@@ -335,6 +369,7 @@ export const getTokenCount = async (
   )
 
   const constants = getModelConstants(model)
+  // gpt count token https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
   let inputTokens = calculateTokens(inputMessages, encoder, constants)
   if (payload.tools && payload.tools.length > 0) {
     inputTokens += numTokensForTools(payload.tools, encoder, constants)
diff --git a/src/lib/utils.ts b/src/lib/utils.ts
index cc80be667..29707357d 100644
--- a/src/lib/utils.ts
+++ b/src/lib/utils.ts
@@ -1,4 +1,10 @@
+import type { Context } from "hono"
+
 import consola from "consola"
+import { createHash, randomUUID } from "node:crypto"
+import { networkInterfaces } from "node:os"
+
+import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
 
 import { getModels } from "~/services/copilot/get-models"
 import { getVSCodeVersion } from "~/services/get-vscode-version"
@@ -24,3 +30,200 @@ export const cacheVSCodeVersion = async () => {
 
   consola.info(`Using VSCode version: ${response}`)
 }
+
+const invalidMacAddresses = new Set([
+  "00:00:00:00:00:00",
+  "ff:ff:ff:ff:ff:ff",
+  "ac:de:48:00:11:22",
+])
+
+function validateMacAddress(candidate: string): boolean {
+  const tempCandidate = candidate.replaceAll("-", ":").toLowerCase()
+  return !invalidMacAddresses.has(tempCandidate)
+}
+
+export function getMac(): string | null {
+  const ifaces = networkInterfaces()
+  // eslint-disable-next-line guard-for-in
+  for (const name in ifaces) {
+    const networkInterface = ifaces[name]
+    if (networkInterface) {
+      for (const { mac } of networkInterface) {
+        if (validateMacAddress(mac)) {
+          return mac
+        }
+      }
+    }
+  }
+  return null
+}
+
+export const cacheMacMachineId = () => {
+  const macAddress = getMac() ?? randomUUID()
+  state.macMachineId = createHash("sha256")
+    .update(macAddress, "utf8")
+    .digest("hex")
+  consola.debug(`Using machine ID: ${state.macMachineId}`)
+}
+
+const SESSION_REFRESH_BASE_MS = 60 * 60 * 1000
+const SESSION_REFRESH_JITTER_MS = 20 * 60 * 1000
+let vsCodeSessionRefreshTimer: ReturnType<typeof setTimeout> | null = null
+
+const generateSessionId = () => {
+  state.vsCodeSessionId = randomUUID() + Date.now().toString()
+  consola.debug(`Generated VSCode session ID: ${state.vsCodeSessionId}`)
+}
+
+export const stopVsCodeSessionRefreshLoop = () => {
+  if (vsCodeSessionRefreshTimer) {
+    clearTimeout(vsCodeSessionRefreshTimer)
+    vsCodeSessionRefreshTimer = null
+  }
+}
+
+const scheduleSessionIdRefresh = () => {
+  const randomDelay = Math.floor(Math.random() * SESSION_REFRESH_JITTER_MS)
+  const delay = SESSION_REFRESH_BASE_MS + randomDelay
+  consola.debug(
+    `Scheduling next VSCode session ID refresh in ${Math.round(
+      delay / 1000,
+    )} seconds`,
+  )
+
+  stopVsCodeSessionRefreshLoop()
+  vsCodeSessionRefreshTimer = setTimeout(() => {
+    try {
+      generateSessionId()
+    } catch (error) {
+      consola.error("Failed to refresh session ID, rescheduling...", error)
+    } finally {
+      scheduleSessionIdRefresh()
+    }
+  }, delay)
+}
+
+export const cacheVsCodeSessionId = () => {
+  stopVsCodeSessionRefreshLoop()
+  generateSessionId()
+  scheduleSessionIdRefresh()
+}
+
+interface PayloadMessage {
+  role?: string
+  content?: string | Array<{ type?: string; text?: string }> | null
+  type?: string
+}
+
+const isRecord = (value: unknown): value is Record<string, unknown> =>
+  typeof value === "object" && value !== null
+
+const getUserIdJsonField = (
+  userIdPayload: Record<string, unknown> | null,
+  field: string,
+): string | null => {
+  const value = userIdPayload?.[field]
+  return typeof value === "string" && value.length > 0 ? value : null
+}
+
+const parseJsonUserId = (userId: string): Record<string, unknown> | null => {
+  try {
+    const parsed: unknown = JSON.parse(userId)
+    return isRecord(parsed) ? parsed : null
+  } catch {
+    return null
+  }
+}
+
+export const parseUserIdMetadata = (
+  userId: string | undefined,
+): { safetyIdentifier: string | null; sessionId: string | null } => {
+  if (!userId || typeof userId !== "string") {
+    return { safetyIdentifier: null, sessionId: null }
+  }
+
+  const legacySafetyIdentifier =
+    userId.match(/user_([^_]+)_account/)?.[1] ?? null
+  const legacySessionId = userId.match(/_session_(.+)$/)?.[1] ?? null
+
+  const parsedUserId =
+    legacySafetyIdentifier && legacySessionId ? null : parseJsonUserId(userId)
+
+  const safetyIdentifier =
+    legacySafetyIdentifier
+    ?? getUserIdJsonField(parsedUserId, "device_id")
+    ?? getUserIdJsonField(parsedUserId, "account_uuid")
+  const sessionId =
+    legacySessionId ?? getUserIdJsonField(parsedUserId, "session_id")
+
+  return { safetyIdentifier, sessionId }
+}
+
+const findLastUserContent = (
+  messages: Array<PayloadMessage>,
+): string | null => {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i]
+    if (msg.role === "user" && msg.content) {
+      if (typeof msg.content === "string") {
+        return msg.content
+      } else if (Array.isArray(msg.content)) {
+        const array = msg.content
+          .filter((n) => n.type !== "tool_result")
+          .map((n) => ({ ...n, cache_control: undefined }))
+        if (array.length > 0) {
+          return JSON.stringify(array)
+        }
+      }
+    }
+  }
+  return null
+}
+
+export const generateRequestIdFromPayload = (
+  payload: {
+    messages: string | Array<PayloadMessage> | undefined
+  },
+  sessionId?: string,
+): string => {
+  const messages = payload.messages
+  if (messages) {
+    const lastUserContent =
+      typeof messages === "string" ? messages : findLastUserContent(messages)
+
+    if (lastUserContent) {
+      return getUUID(
+        (sessionId ?? "") + (state.macMachineId ?? "") + lastUserContent,
+      )
+    }
+  }
+
+  return randomUUID()
+}
+
+export const getRootSessionId = (
+  anthropicPayload: AnthropicMessagesPayload,
+  c: Context,
+): string | undefined => {
+  const userId = anthropicPayload.metadata?.user_id
+  const sessionId =
+    userId ?
+      parseUserIdMetadata(userId).sessionId || undefined
+    : c.req.header("x-session-id")
+
+  return sessionId ? getUUID(sessionId) : sessionId
+}
+
+export const getUUID = (content: string): string => {
+  const uuidBytes = createHash("sha256")
+    .update(content)
+    .digest()
+    .subarray(0, 16)
+
+  uuidBytes[6] = (uuidBytes[6] & 0x0f) | 0x40
+  uuidBytes[8] = (uuidBytes[8] & 0x3f) | 0x80
+
+  const uuidHex = uuidBytes.toString("hex")
+
+  return `${uuidHex.slice(0, 8)}-${uuidHex.slice(8, 12)}-${uuidHex.slice(12, 16)}-${uuidHex.slice(16, 20)}-${uuidHex.slice(20)}`
+}
diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
index 04a5ae9ed..842a499f2 100644
--- a/src/routes/chat-completions/handler.ts
+++ b/src/routes/chat-completions/handler.ts
@@ -1,24 +1,26 @@
 import type { Context } from "hono"
 
-import consola from "consola"
 import { streamSSE, type SSEMessage } from "hono/streaming"
 
 import { awaitApproval } from "~/lib/approval"
+import { createHandlerLogger } from "~/lib/logger"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
-import { isNullish } from "~/lib/utils"
+import { generateRequestIdFromPayload, getUUID, isNullish } from "~/lib/utils"
 import {
   createChatCompletions,
   type ChatCompletionResponse,
   type ChatCompletionsPayload,
 } from "~/services/copilot/create-chat-completions"
 
+const logger = createHandlerLogger("chat-completions-handler")
+
 export async function handleCompletion(c: Context) {
   await checkRateLimit(state)
 
   let payload = await c.req.json<ChatCompletionsPayload>()
-  consola.debug("Request payload:", JSON.stringify(payload).slice(-400))
+  logger.debug("Request payload:", JSON.stringify(payload).slice(-400))
 
   // Find the selected model
   const selectedModel = state.models?.data.find(
@@ -29,12 +31,12 @@ export async function handleCompletion(c: Context) {
   try {
     if (selectedModel) {
       const tokenCount = await getTokenCount(payload, selectedModel)
-      consola.info("Current token count:", tokenCount)
+      logger.info("Current token count:", tokenCount)
     } else {
-      consola.warn("No model selected, skipping token count calculation")
+      logger.warn("No model selected, skipping token count calculation")
     }
   } catch (error) {
-    consola.warn("Failed to calculate token count:", error)
+    logger.warn("Failed to calculate token count:", error)
   }
 
   if (state.manualApprove) await awaitApproval()
@@ -44,20 +46,30 @@ export async function handleCompletion(c: Context) {
       ...payload,
       max_tokens: selectedModel?.capabilities.limits.max_output_tokens,
     }
-    consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
+    logger.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
   }
 
-  const response = await createChatCompletions(payload)
+  // not support subagent marker for now , set sessionId = getUUID(requestId)
+  const requestId = generateRequestIdFromPayload(payload)
+  logger.debug("Generated request ID:", requestId)
+
+  const sessionId = getUUID(requestId)
+  logger.debug("Extracted session ID:", sessionId)
+
+  const response = await createChatCompletions(payload, {
+    requestId,
+    sessionId,
+  })
 
   if (isNonStreaming(response)) {
-    consola.debug("Non-streaming response:", JSON.stringify(response))
+    logger.debug("Non-streaming response:", JSON.stringify(response))
     return c.json(response)
   }
 
-  consola.debug("Streaming response")
+  logger.debug("Streaming response")
   return streamSSE(c, async (stream) => {
     for await (const chunk of response) {
-      consola.debug("Streaming chunk:", JSON.stringify(chunk))
+      logger.debug("Streaming chunk:", JSON.stringify(chunk))
       await stream.writeSSE(chunk as SSEMessage)
     }
   })
diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
index 881fffcc8..0f75cfc5c 100644
--- a/src/routes/messages/anthropic-types.ts
+++ b/src/routes/messages/anthropic-types.ts
@@ -19,10 +19,13 @@ export interface AnthropicMessagesPayload {
     name?: string
   }
   thinking?: {
-    type: "enabled"
+    type: "enabled" | "adaptive"
     budget_tokens?: number
   }
   service_tier?: "auto" | "standard_only"
+  output_config?: {
+    effort?: "low" | "medium" | "high" | "max"
+  }
 }
 
 export interface AnthropicTextBlock {
@@ -42,7 +45,7 @@ export interface AnthropicImageBlock {
 export interface AnthropicToolResultBlock {
   type: "tool_result"
   tool_use_id: string
-  content: string
+  content: string | Array<AnthropicTextBlock | AnthropicImageBlock>
   is_error?: boolean
 }
 
@@ -56,6 +59,7 @@ export interface AnthropicToolUseBlock {
 export interface AnthropicThinkingBlock {
   type: "thinking"
   thinking: string
+  signature: string
 }
 
 export type AnthropicUserContentBlock =
diff --git a/src/routes/messages/count-tokens-handler.ts b/src/routes/messages/count-tokens-handler.ts
index 2ec849cb8..a361e1cc5 100644
--- a/src/routes/messages/count-tokens-handler.ts
+++ b/src/routes/messages/count-tokens-handler.ts
@@ -2,9 +2,9 @@ import type { Context } from "hono"
 
 import consola from "consola"
 
-import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
 
+import { findEndpointModel } from "../../lib/models"
 import { type AnthropicMessagesPayload } from "./anthropic-types"
 import { translateToOpenAI } from "./non-stream-translation"
 
@@ -19,9 +19,8 @@ export async function handleCountTokens(c: Context) {
 
     const openAIPayload = translateToOpenAI(anthropicPayload)
 
-    const selectedModel = state.models?.data.find(
-      (model) => model.id === anthropicPayload.model,
-    )
+    const selectedModel = findEndpointModel(anthropicPayload.model)
+    anthropicPayload.model = selectedModel?.id ?? anthropicPayload.model
 
     if (!selectedModel) {
       consola.warn("Model not found, returning default token count")
@@ -33,18 +32,21 @@ export async function handleCountTokens(c: Context) {
     const tokenCount = await getTokenCount(openAIPayload, selectedModel)
 
     if (anthropicPayload.tools && anthropicPayload.tools.length > 0) {
-      let mcpToolExist = false
-      if (anthropicBeta?.startsWith("claude-code")) {
-        mcpToolExist = anthropicPayload.tools.some((tool) =>
-          tool.name.startsWith("mcp__"),
+      let addToolSystemPromptCount = false
+      if (anthropicBeta) {
+        const toolsLength = anthropicPayload.tools.length
+        addToolSystemPromptCount = !anthropicPayload.tools.some(
+          (tool) =>
+            tool.name.startsWith("mcp__")
+            || (tool.name === "Skill" && toolsLength === 1),
         )
       }
-      if (!mcpToolExist) {
+      if (addToolSystemPromptCount) {
         if (anthropicPayload.model.startsWith("claude")) {
           // https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/overview#pricing
           tokenCount.input = tokenCount.input + 346
         } else if (anthropicPayload.model.startsWith("grok")) {
-          tokenCount.input = tokenCount.input + 480
+          tokenCount.input = tokenCount.input + 120
         }
       }
     }
@@ -52,8 +54,6 @@ export async function handleCountTokens(c: Context) {
     let finalTokenCount = tokenCount.input + tokenCount.output
     if (anthropicPayload.model.startsWith("claude")) {
       finalTokenCount = Math.round(finalTokenCount * 1.15)
-    } else if (anthropicPayload.model.startsWith("grok")) {
-      finalTokenCount = Math.round(finalTokenCount * 1.03)
     }
 
     consola.info("Token count:", finalTokenCount)
diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
index 85dbf6243..edf6f0d3b 100644
--- a/src/routes/messages/handler.ts
+++ b/src/routes/messages/handler.ts
@@ -1,59 +1,183 @@
 import type { Context } from "hono"
 
-import consola from "consola"
 import { streamSSE } from "hono/streaming"
 
+import type { Model } from "~/services/copilot/get-models"
+
 import { awaitApproval } from "~/lib/approval"
+import {
+  getSmallModel,
+  getReasoningEffortForModel,
+  isMessagesApiEnabled,
+} from "~/lib/config"
+import { createHandlerLogger } from "~/lib/logger"
+import { findEndpointModel } from "~/lib/models"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
+import { generateRequestIdFromPayload, getRootSessionId } from "~/lib/utils"
+import {
+  buildErrorEvent,
+  createResponsesStreamState,
+  translateResponsesStreamEvent,
+} from "~/routes/messages/responses-stream-translation"
+import {
+  translateAnthropicMessagesToResponsesPayload,
+  translateResponsesResultToAnthropic,
+} from "~/routes/messages/responses-translation"
+import {
+  applyResponsesApiContextManagement,
+  compactInputByLatestCompaction,
+  getResponsesRequestOptions,
+} from "~/routes/responses/utils"
 import {
   createChatCompletions,
   type ChatCompletionChunk,
   type ChatCompletionResponse,
 } from "~/services/copilot/create-chat-completions"
+import { createMessages } from "~/services/copilot/create-messages"
+import {
+  createResponses,
+  type ResponsesResult,
+  type ResponseStreamEvent,
+} from "~/services/copilot/create-responses"
+
+import type { SubagentMarker } from "./subagent-marker"
 
 import {
   type AnthropicMessagesPayload,
   type AnthropicStreamState,
+  type AnthropicTextBlock,
+  type AnthropicToolResultBlock,
 } from "./anthropic-types"
 import {
   translateToAnthropic,
   translateToOpenAI,
 } from "./non-stream-translation"
 import { translateChunkToAnthropicEvents } from "./stream-translation"
+import { parseSubagentMarkerFromFirstUser } from "./subagent-marker"
+
+const logger = createHandlerLogger("messages-handler")
+
+const compactSystemPromptStart =
+  "You are a helpful AI assistant tasked with summarizing conversations"
 
 export async function handleCompletion(c: Context) {
   await checkRateLimit(state)
 
   const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
-  consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
+  logger.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
 
-  const openAIPayload = translateToOpenAI(anthropicPayload)
-  consola.debug(
-    "Translated OpenAI request payload:",
-    JSON.stringify(openAIPayload),
-  )
+  const subagentMarker = parseSubagentMarkerFromFirstUser(anthropicPayload)
+  if (subagentMarker) {
+    logger.debug("Detected Subagent marker:", JSON.stringify(subagentMarker))
+  }
+
+  const sessionId = getRootSessionId(anthropicPayload, c)
+  logger.debug("Extracted session ID:", sessionId)
+
+  // claude code and opencode compact request detection
+  const isCompact = isCompactRequest(anthropicPayload)
+
+  // fix claude code 2.0.28+ warmup request consume premium request, forcing small model if no tools are used
+  // set "CLAUDE_CODE_SUBAGENT_MODEL": "you small model" also can avoid this
+  const anthropicBeta = c.req.header("anthropic-beta")
+  logger.debug("Anthropic Beta header:", anthropicBeta)
+  const noTools = !anthropicPayload.tools || anthropicPayload.tools.length === 0
+  if (anthropicBeta && noTools && !isCompact) {
+    anthropicPayload.model = getSmallModel()
+  }
+
+  if (isCompact) {
+    logger.debug("Is compact request:", isCompact)
+  } else {
+    // Merge tool_result and text blocks into tool_result to avoid consuming premium requests
+    // (caused by skill invocations, edit hooks, plan or to do reminders)
+    // e.g. {"role":"user","content":[{"type":"tool_result","content":"Launching skill: xxx"},{"type":"text","text":"xxx"}]}
+    // not only for claude, but also for opencode
+    // compact requests are excluded from this processing
+    mergeToolResultForClaude(anthropicPayload)
+  }
+
+  const requestId = generateRequestIdFromPayload(anthropicPayload, sessionId)
+  logger.debug("Generated request ID:", requestId)
 
   if (state.manualApprove) {
     await awaitApproval()
   }
 
-  const response = await createChatCompletions(openAIPayload)
+  const selectedModel = findEndpointModel(anthropicPayload.model)
+  anthropicPayload.model = selectedModel?.id ?? anthropicPayload.model
+
+  if (shouldUseMessagesApi(selectedModel)) {
+    return await handleWithMessagesApi(c, anthropicPayload, {
+      anthropicBetaHeader: anthropicBeta,
+      subagentMarker,
+      selectedModel,
+      requestId,
+      sessionId,
+      isCompact,
+    })
+  }
+
+  if (shouldUseResponsesApi(selectedModel)) {
+    return await handleWithResponsesApi(c, anthropicPayload, {
+      subagentMarker,
+      selectedModel,
+      requestId,
+      sessionId,
+      isCompact,
+    })
+  }
+
+  return await handleWithChatCompletions(c, anthropicPayload, {
+    subagentMarker,
+    requestId,
+    sessionId,
+    isCompact,
+  })
+}
+
+const RESPONSES_ENDPOINT = "/responses"
+const MESSAGES_ENDPOINT = "/v1/messages"
+
+const handleWithChatCompletions = async (
+  c: Context,
+  anthropicPayload: AnthropicMessagesPayload,
+  options: {
+    subagentMarker?: SubagentMarker | null
+    requestId: string
+    sessionId?: string
+    isCompact?: boolean
+  },
+) => {
+  const { subagentMarker, requestId, sessionId, isCompact } = options
+  const openAIPayload = translateToOpenAI(anthropicPayload)
+  logger.debug(
+    "Translated OpenAI request payload:",
+    JSON.stringify(openAIPayload),
+  )
+
+  const response = await createChatCompletions(openAIPayload, {
+    subagentMarker,
+    requestId,
+    sessionId,
+    isCompact,
+  })
 
   if (isNonStreaming(response)) {
-    consola.debug(
+    logger.debug(
       "Non-streaming response from Copilot:",
       JSON.stringify(response).slice(-400),
     )
     const anthropicResponse = translateToAnthropic(response)
-    consola.debug(
+    logger.debug(
       "Translated Anthropic response:",
       JSON.stringify(anthropicResponse),
     )
     return c.json(anthropicResponse)
   }
 
-  consola.debug("Streaming response from Copilot")
+  logger.debug("Streaming response from Copilot")
   return streamSSE(c, async (stream) => {
     const streamState: AnthropicStreamState = {
       messageStartSent: false,
@@ -63,7 +187,7 @@ export async function handleCompletion(c: Context) {
     }
 
     for await (const rawEvent of response) {
-      consola.debug("Copilot raw stream event:", JSON.stringify(rawEvent))
+      logger.debug("Copilot raw stream event:", JSON.stringify(rawEvent))
       if (rawEvent.data === "[DONE]") {
         break
       }
@@ -76,7 +200,7 @@ export async function handleCompletion(c: Context) {
       const events = translateChunkToAnthropicEvents(chunk, streamState)
 
       for (const event of events) {
-        consola.debug("Translated Anthropic event:", JSON.stringify(event))
+        logger.debug("Translated Anthropic event:", JSON.stringify(event))
         await stream.writeSSE({
           event: event.type,
           data: JSON.stringify(event),
@@ -86,6 +210,327 @@ export async function handleCompletion(c: Context) {
   })
 }
 
+const handleWithResponsesApi = async (
+  c: Context,
+  anthropicPayload: AnthropicMessagesPayload,
+  options: {
+    subagentMarker?: SubagentMarker | null
+    selectedModel?: Model
+    requestId: string
+    sessionId?: string
+    isCompact?: boolean
+  },
+) => {
+  const { subagentMarker, selectedModel, requestId, sessionId, isCompact } =
+    options
+
+  const responsesPayload =
+    translateAnthropicMessagesToResponsesPayload(anthropicPayload)
+
+  applyResponsesApiContextManagement(
+    responsesPayload,
+    selectedModel?.capabilities.limits.max_prompt_tokens,
+  )
+
+  compactInputByLatestCompaction(responsesPayload)
+
+  logger.debug(
+    "Translated Responses payload:",
+    JSON.stringify(responsesPayload),
+  )
+
+  const { vision, initiator } = getResponsesRequestOptions(responsesPayload)
+  const response = await createResponses(responsesPayload, {
+    vision,
+    initiator: initiator,
+    subagentMarker,
+    requestId,
+    sessionId,
+    isCompact,
+  })
+
+  if (responsesPayload.stream && isAsyncIterable(response)) {
+    logger.debug("Streaming response from Copilot (Responses API)")
+    return streamSSE(c, async (stream) => {
+      const streamState = createResponsesStreamState()
+
+      for await (const chunk of response) {
+        const eventName = chunk.event
+        if (eventName === "ping") {
+          await stream.writeSSE({ event: "ping", data: '{"type":"ping"}' })
+          continue
+        }
+
+        const data = chunk.data
+        if (!data) {
+          continue
+        }
+
+        logger.debug("Responses raw stream event:", data)
+
+        const events = translateResponsesStreamEvent(
+          JSON.parse(data) as ResponseStreamEvent,
+          streamState,
+        )
+        for (const event of events) {
+          const eventData = JSON.stringify(event)
+          logger.debug("Translated Anthropic event:", eventData)
+          await stream.writeSSE({
+            event: event.type,
+            data: eventData,
+          })
+        }
+
+        if (streamState.messageCompleted) {
+          logger.debug("Message completed, ending stream")
+          break
+        }
+      }
+
+      if (!streamState.messageCompleted) {
+        logger.warn(
+          "Responses stream ended without completion; sending error event",
+        )
+        const errorEvent = buildErrorEvent(
+          "Responses stream ended without completion",
+        )
+        await stream.writeSSE({
+          event: errorEvent.type,
+          data: JSON.stringify(errorEvent),
+        })
+      }
+    })
+  }
+
+  logger.debug(
+    "Non-streaming Responses result:",
+    JSON.stringify(response).slice(-400),
+  )
+  const anthropicResponse = translateResponsesResultToAnthropic(
+    response as ResponsesResult,
+  )
+  logger.debug(
+    "Translated Anthropic response:",
+    JSON.stringify(anthropicResponse),
+  )
+  return c.json(anthropicResponse)
+}
+
+const handleWithMessagesApi = async (
+  c: Context,
+  anthropicPayload: AnthropicMessagesPayload,
+  options: {
+    anthropicBetaHeader?: string
+    subagentMarker?: SubagentMarker | null
+    selectedModel?: Model
+    requestId: string
+    sessionId?: string
+    isCompact?: boolean
+  },
+) => {
+  const {
+    anthropicBetaHeader,
+    subagentMarker,
+    selectedModel,
+    requestId,
+    sessionId,
+    isCompact,
+  } = options
+
+  // Strip cache_control from system content blocks as the
+  // Copilot Messages API does not support them (rejects extra fields like scope).
+  // commit by nicktogo
+  stripCacheControl(anthropicPayload)
+
+  // Pre-request processing: filter thinking blocks for Claude models so only
+  // valid thinking blocks are sent to the Copilot Messages API.
+  for (const msg of anthropicPayload.messages) {
+    if (msg.role === "assistant" && Array.isArray(msg.content)) {
+      msg.content = msg.content.filter((block) => {
+        if (block.type !== "thinking") return true
+        return (
+          block.thinking
+          && block.thinking !== "Thinking..."
+          && block.signature
+          && !block.signature.includes("@")
+        )
+      })
+    }
+  }
+
+  // https://platform.claude.com/docs/en/build-with-claude/extended-thinking#extended-thinking-with-tool-use
+  // Using tool_choice: {"type": "any"} or tool_choice: {"type": "tool", "name": "..."} will result in an error because these options force tool use, which is incompatible with extended thinking.
+  const toolChoice = anthropicPayload.tool_choice
+  const disableThink = toolChoice?.type === "any" || toolChoice?.type === "tool"
+
+  if (selectedModel?.capabilities.supports.adaptive_thinking && !disableThink) {
+    anthropicPayload.thinking = {
+      type: "adaptive",
+    }
+    anthropicPayload.output_config = {
+      effort: getAnthropicEffortForModel(anthropicPayload.model),
+    }
+  }
+
+  logger.debug("Translated Messages payload:", JSON.stringify(anthropicPayload))
+
+  const response = await createMessages(anthropicPayload, anthropicBetaHeader, {
+    subagentMarker,
+    requestId,
+    sessionId,
+    isCompact,
+  })
+
+  if (isAsyncIterable(response)) {
+    logger.debug("Streaming response from Copilot (Messages API)")
+    return streamSSE(c, async (stream) => {
+      for await (const event of response) {
+        const eventName = event.event
+        const data = event.data ?? ""
+        logger.debug("Messages raw stream event:", data)
+        await stream.writeSSE({
+          event: eventName,
+          data,
+        })
+      }
+    })
+  }
+
+  logger.debug(
+    "Non-streaming Messages result:",
+    JSON.stringify(response).slice(-400),
+  )
+  return c.json(response)
+}
+
+const shouldUseResponsesApi = (selectedModel: Model | undefined): boolean => {
+  return (
+    selectedModel?.supported_endpoints?.includes(RESPONSES_ENDPOINT) ?? false
+  )
+}
+
+const shouldUseMessagesApi = (selectedModel: Model | undefined): boolean => {
+  const useMessagesApi = isMessagesApiEnabled()
+  if (!useMessagesApi) {
+    return false
+  }
+  return (
+    selectedModel?.supported_endpoints?.includes(MESSAGES_ENDPOINT) ?? false
+  )
+}
+
 const isNonStreaming = (
   response: Awaited<ReturnType<typeof createChatCompletions>>,
 ): response is ChatCompletionResponse => Object.hasOwn(response, "choices")
+
+const isAsyncIterable = <T>(value: unknown): value is AsyncIterable<T> =>
+  Boolean(value)
+  && typeof (value as AsyncIterable<T>)[Symbol.asyncIterator] === "function"
+
+const getAnthropicEffortForModel = (
+  model: string,
+): "low" | "medium" | "high" | "max" => {
+  const reasoningEffort = getReasoningEffortForModel(model)
+
+  if (reasoningEffort === "xhigh") return "max"
+  if (reasoningEffort === "none" || reasoningEffort === "minimal") return "low"
+
+  return reasoningEffort
+}
+
+const isCompactRequest = (
+  anthropicPayload: AnthropicMessagesPayload,
+): boolean => {
+  const system = anthropicPayload.system
+  if (typeof system === "string") {
+    return system.startsWith(compactSystemPromptStart)
+  }
+  if (!Array.isArray(system)) return false
+
+  return system.some(
+    (msg) =>
+      typeof msg.text === "string"
+      && msg.text.startsWith(compactSystemPromptStart),
+  )
+}
+
+const mergeContentWithText = (
+  tr: AnthropicToolResultBlock,
+  textBlock: AnthropicTextBlock,
+): AnthropicToolResultBlock => {
+  if (typeof tr.content === "string") {
+    return { ...tr, content: `${tr.content}\n\n${textBlock.text}` }
+  }
+  return {
+    ...tr,
+    content: [...tr.content, textBlock],
+  }
+}
+
+const mergeContentWithTexts = (
+  tr: AnthropicToolResultBlock,
+  textBlocks: Array<AnthropicTextBlock>,
+): AnthropicToolResultBlock => {
+  if (typeof tr.content === "string") {
+    const appendedTexts = textBlocks.map((tb) => tb.text).join("\n\n")
+    return { ...tr, content: `${tr.content}\n\n${appendedTexts}` }
+  }
+  return { ...tr, content: [...tr.content, ...textBlocks] }
+}
+
+const mergeToolResultForClaude = (
+  anthropicPayload: AnthropicMessagesPayload,
+): void => {
+  for (const msg of anthropicPayload.messages) {
+    if (msg.role !== "user" || !Array.isArray(msg.content)) continue
+
+    const toolResults: Array<AnthropicToolResultBlock> = []
+    const textBlocks: Array<AnthropicTextBlock> = []
+    let valid = true
+
+    for (const block of msg.content) {
+      if (block.type === "tool_result") {
+        toolResults.push(block)
+      } else if (block.type === "text") {
+        textBlocks.push(block)
+      } else {
+        valid = false
+        break
+      }
+    }
+
+    if (!valid || toolResults.length === 0 || textBlocks.length === 0) continue
+
+    msg.content = mergeToolResult(toolResults, textBlocks)
+  }
+}
+
+const mergeToolResult = (
+  toolResults: Array<AnthropicToolResultBlock>,
+  textBlocks: Array<AnthropicTextBlock>,
+): Array<AnthropicToolResultBlock> => {
+  // equal lengths -> pairwise merge
+  if (toolResults.length === textBlocks.length) {
+    return toolResults.map((tr, i) => mergeContentWithText(tr, textBlocks[i]))
+  }
+
+  // lengths differ -> append all textBlocks to the last tool_result
+  const lastIndex = toolResults.length - 1
+  return toolResults.map((tr, i) =>
+    i === lastIndex ? mergeContentWithTexts(tr, textBlocks) : tr,
+  )
+}
+
+const stripCacheControl = (payload: AnthropicMessagesPayload): void => {
+  // Claude Code only adds unsupported scope field to system block cache_control
+  if (Array.isArray(payload.system)) {
+    for (const block of payload.system) {
+      const b = block as unknown as Record<string, unknown>
+      const cc = b.cache_control
+      if (cc && typeof cc === "object") {
+        const { scope, ...rest } = cc as Record<string, unknown>
+        b.cache_control = rest
+      }
+    }
+  }
+}
diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts
new file mode 100644
index 000000000..6f4f7f21f
--- /dev/null
+++ b/src/routes/messages/responses-stream-translation.ts
@@ -0,0 +1,743 @@
+import {
+  type ResponseCompletedEvent,
+  type ResponseCreatedEvent,
+  type ResponseErrorEvent,
+  type ResponseFailedEvent,
+  type ResponseFunctionCallArgumentsDeltaEvent,
+  type ResponseFunctionCallArgumentsDoneEvent,
+  type ResponseIncompleteEvent,
+  type ResponseOutputItemAddedEvent,
+  type ResponseOutputItemDoneEvent,
+  type ResponseReasoningSummaryTextDeltaEvent,
+  type ResponseReasoningSummaryTextDoneEvent,
+  type ResponsesResult,
+  type ResponseStreamEvent,
+  type ResponseTextDeltaEvent,
+  type ResponseTextDoneEvent,
+} from "~/services/copilot/create-responses"
+
+import { type AnthropicStreamEventData } from "./anthropic-types"
+import {
+  THINKING_TEXT,
+  encodeCompactionCarrierSignature,
+  translateResponsesResultToAnthropic,
+} from "./responses-translation"
+
+const MAX_CONSECUTIVE_FUNCTION_CALL_WHITESPACE = 20
+
+class FunctionCallArgumentsValidationError extends Error {
+  constructor(message: string) {
+    super(message)
+    this.name = "FunctionCallArgumentsValidationError"
+  }
+}
+
+const updateWhitespaceRunState = (
+  previousCount: number,
+  chunk: string,
+): {
+  nextCount: number
+  exceeded: boolean
+} => {
+  let count = previousCount
+
+  for (const char of chunk) {
+    if (char === "\r" || char === "\n" || char === "\t") {
+      count += 1
+      if (count > MAX_CONSECUTIVE_FUNCTION_CALL_WHITESPACE) {
+        return { nextCount: count, exceeded: true }
+      }
+      continue
+    }
+
+    if (char !== " ") {
+      count = 0
+    }
+  }
+
+  return { nextCount: count, exceeded: false }
+}
+
+export interface ResponsesStreamState {
+  messageStartSent: boolean
+  messageCompleted: boolean
+  nextContentBlockIndex: number
+  blockIndexByKey: Map<string, number>
+  openBlocks: Set<number>
+  blockHasDelta: Set<number>
+  functionCallStateByOutputIndex: Map<number, FunctionCallStreamState>
+}
+
+type FunctionCallStreamState = {
+  blockIndex: number
+  toolCallId: string
+  name: string
+  consecutiveWhitespaceCount: number
+}
+
+export const createResponsesStreamState = (): ResponsesStreamState => ({
+  messageStartSent: false,
+  messageCompleted: false,
+  nextContentBlockIndex: 0,
+  blockIndexByKey: new Map(),
+  openBlocks: new Set(),
+  blockHasDelta: new Set(),
+  functionCallStateByOutputIndex: new Map(),
+})
+
+export const translateResponsesStreamEvent = (
+  rawEvent: ResponseStreamEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const eventType = rawEvent.type
+  switch (eventType) {
+    case "response.created": {
+      return handleResponseCreated(rawEvent, state)
+    }
+
+    case "response.output_item.added": {
+      return handleOutputItemAdded(rawEvent, state)
+    }
+
+    case "response.reasoning_summary_text.delta": {
+      return handleReasoningSummaryTextDelta(rawEvent, state)
+    }
+
+    case "response.output_text.delta": {
+      return handleOutputTextDelta(rawEvent, state)
+    }
+
+    case "response.reasoning_summary_text.done": {
+      return handleReasoningSummaryTextDone(rawEvent, state)
+    }
+
+    case "response.output_text.done": {
+      return handleOutputTextDone(rawEvent, state)
+    }
+    case "response.output_item.done": {
+      return handleOutputItemDone(rawEvent, state)
+    }
+
+    case "response.function_call_arguments.delta": {
+      return handleFunctionCallArgumentsDelta(rawEvent, state)
+    }
+
+    case "response.function_call_arguments.done": {
+      return handleFunctionCallArgumentsDone(rawEvent, state)
+    }
+
+    case "response.completed":
+    case "response.incomplete": {
+      return handleResponseCompleted(rawEvent, state)
+    }
+
+    case "response.failed": {
+      return handleResponseFailed(rawEvent, state)
+    }
+
+    case "error": {
+      return handleErrorEvent(rawEvent, state)
+    }
+
+    default: {
+      return []
+    }
+  }
+}
+
+// Helper handlers to keep translateResponsesStreamEvent concise
+const handleResponseCreated = (
+  rawEvent: ResponseCreatedEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  return messageStart(state, rawEvent.response)
+}
+
+const handleOutputItemAdded = (
+  rawEvent: ResponseOutputItemAddedEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = new Array<AnthropicStreamEventData>()
+  const functionCallDetails = extractFunctionCallDetails(rawEvent)
+  if (!functionCallDetails) {
+    return events
+  }
+
+  const { outputIndex, toolCallId, name, initialArguments } =
+    functionCallDetails
+  const blockIndex = openFunctionCallBlock(state, {
+    outputIndex,
+    toolCallId,
+    name,
+    events,
+  })
+
+  if (initialArguments !== undefined && initialArguments.length > 0) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "input_json_delta",
+        partial_json: initialArguments,
+      },
+    })
+    state.blockHasDelta.add(blockIndex)
+  }
+
+  return events
+}
+
+const handleOutputItemDone = (
+  rawEvent: ResponseOutputItemDoneEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = new Array<AnthropicStreamEventData>()
+  const item = rawEvent.item
+  const itemType = item.type
+  const outputIndex = rawEvent.output_index
+
+  if (itemType === "compaction") {
+    if (!item.id || !item.encrypted_content) {
+      return events
+    }
+
+    const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)
+
+    if (!state.blockHasDelta.has(blockIndex)) {
+      events.push({
+        type: "content_block_delta",
+        index: blockIndex,
+        delta: {
+          type: "thinking_delta",
+          thinking: THINKING_TEXT,
+        },
+      })
+    }
+
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "signature_delta",
+        signature: encodeCompactionCarrierSignature({
+          id: item.id,
+          encrypted_content: item.encrypted_content,
+        }),
+      },
+    })
+    state.blockHasDelta.add(blockIndex)
+    return events
+  }
+
+  if (itemType !== "reasoning") {
+    return events
+  }
+
+  const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)
+  const signature = (item.encrypted_content ?? "") + "@" + item.id
+  if (signature) {
+    // Compatible with opencode, it will filter out blocks where the thinking text is empty, so we add a default thinking text here
+    if (!item.summary || item.summary.length === 0) {
+      events.push({
+        type: "content_block_delta",
+        index: blockIndex,
+        delta: {
+          type: "thinking_delta",
+          thinking: THINKING_TEXT,
+        },
+      })
+    }
+
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "signature_delta",
+        signature,
+      },
+    })
+    state.blockHasDelta.add(blockIndex)
+  }
+
+  return events
+}
+
+const handleFunctionCallArgumentsDelta = (
+  rawEvent: ResponseFunctionCallArgumentsDeltaEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = new Array<AnthropicStreamEventData>()
+  const outputIndex = rawEvent.output_index
+  const deltaText = rawEvent.delta
+
+  if (!deltaText) {
+    return events
+  }
+
+  const blockIndex = openFunctionCallBlock(state, {
+    outputIndex,
+    events,
+  })
+
+  const functionCallState =
+    state.functionCallStateByOutputIndex.get(outputIndex)
+  if (!functionCallState) {
+    return handleFunctionCallArgumentsValidationError(
+      new FunctionCallArgumentsValidationError(
+        "Received function call arguments delta without an open tool call block.",
+      ),
+      state,
+      events,
+    )
+  }
+
+  // fix: copolit function call returning infinite line breaks until max_tokens limit
+  // "arguments": "{\"path\":\"xxx\",\"pattern\":\"**/*.ts\",\"} }? Wait extra braces. Need correct. I should run? Wait overcame. Need proper JSON with pattern \"\n\n\n\n\n\n\n\n...
+  const { nextCount, exceeded } = updateWhitespaceRunState(
+    functionCallState.consecutiveWhitespaceCount,
+    deltaText,
+  )
+  if (exceeded) {
+    return handleFunctionCallArgumentsValidationError(
+      new FunctionCallArgumentsValidationError(
+        "Received function call arguments delta containing more than 20 consecutive whitespace characters.",
+      ),
+      state,
+      events,
+    )
+  }
+  functionCallState.consecutiveWhitespaceCount = nextCount
+
+  events.push({
+    type: "content_block_delta",
+    index: blockIndex,
+    delta: {
+      type: "input_json_delta",
+      partial_json: deltaText,
+    },
+  })
+  state.blockHasDelta.add(blockIndex)
+
+  return events
+}
+
+const handleFunctionCallArgumentsDone = (
+  rawEvent: ResponseFunctionCallArgumentsDoneEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = new Array<AnthropicStreamEventData>()
+  const outputIndex = rawEvent.output_index
+  const blockIndex = openFunctionCallBlock(state, {
+    outputIndex,
+    events,
+  })
+
+  const finalArguments =
+    typeof rawEvent.arguments === "string" ? rawEvent.arguments : undefined
+
+  if (!state.blockHasDelta.has(blockIndex) && finalArguments) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "input_json_delta",
+        partial_json: finalArguments,
+      },
+    })
+    state.blockHasDelta.add(blockIndex)
+  }
+
+  state.functionCallStateByOutputIndex.delete(outputIndex)
+  return events
+}
+
+const handleOutputTextDelta = (
+  rawEvent: ResponseTextDeltaEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = new Array<AnthropicStreamEventData>()
+  const outputIndex = rawEvent.output_index
+  const contentIndex = rawEvent.content_index
+  const deltaText = rawEvent.delta
+
+  if (!deltaText) {
+    return events
+  }
+
+  const blockIndex = openTextBlockIfNeeded(state, {
+    outputIndex,
+    contentIndex,
+    events,
+  })
+
+  events.push({
+    type: "content_block_delta",
+    index: blockIndex,
+    delta: {
+      type: "text_delta",
+      text: deltaText,
+    },
+  })
+  state.blockHasDelta.add(blockIndex)
+
+  return events
+}
+
+const handleReasoningSummaryTextDelta = (
+  rawEvent: ResponseReasoningSummaryTextDeltaEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const outputIndex = rawEvent.output_index
+  const deltaText = rawEvent.delta
+  const events = new Array<AnthropicStreamEventData>()
+  const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)
+
+  events.push({
+    type: "content_block_delta",
+    index: blockIndex,
+    delta: {
+      type: "thinking_delta",
+      thinking: deltaText,
+    },
+  })
+  state.blockHasDelta.add(blockIndex)
+
+  return events
+}
+
+const handleReasoningSummaryTextDone = (
+  rawEvent: ResponseReasoningSummaryTextDoneEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const outputIndex = rawEvent.output_index
+  const text = rawEvent.text
+  const events = new Array<AnthropicStreamEventData>()
+  const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)
+
+  if (text && !state.blockHasDelta.has(blockIndex)) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "thinking_delta",
+        thinking: text,
+      },
+    })
+  }
+
+  return events
+}
+
+const handleOutputTextDone = (
+  rawEvent: ResponseTextDoneEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const events = new Array<AnthropicStreamEventData>()
+  const outputIndex = rawEvent.output_index
+  const contentIndex = rawEvent.content_index
+  const text = rawEvent.text
+
+  const blockIndex = openTextBlockIfNeeded(state, {
+    outputIndex,
+    contentIndex,
+    events,
+  })
+
+  if (text && !state.blockHasDelta.has(blockIndex)) {
+    events.push({
+      type: "content_block_delta",
+      index: blockIndex,
+      delta: {
+        type: "text_delta",
+        text,
+      },
+    })
+  }
+
+  return events
+}
+
+const handleResponseCompleted = (
+  rawEvent: ResponseCompletedEvent | ResponseIncompleteEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const response = rawEvent.response
+  const events = new Array<AnthropicStreamEventData>()
+
+  closeAllOpenBlocks(state, events)
+  const anthropic = translateResponsesResultToAnthropic(response)
+  events.push(
+    {
+      type: "message_delta",
+      delta: {
+        stop_reason: anthropic.stop_reason,
+        stop_sequence: anthropic.stop_sequence,
+      },
+      usage: anthropic.usage,
+    },
+    { type: "message_stop" },
+  )
+  state.messageCompleted = true
+  return events
+}
+
+const handleResponseFailed = (
+  rawEvent: ResponseFailedEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const response = rawEvent.response
+  const events = new Array<AnthropicStreamEventData>()
+  closeAllOpenBlocks(state, events)
+
+  const message =
+    response.error?.message ?? "The response failed due to an unknown error."
+
+  events.push(buildErrorEvent(message))
+  state.messageCompleted = true
+
+  return events
+}
+
+const handleErrorEvent = (
+  rawEvent: ResponseErrorEvent,
+  state: ResponsesStreamState,
+): Array<AnthropicStreamEventData> => {
+  const message =
+    typeof rawEvent.message === "string" ?
+      rawEvent.message
+    : "An unexpected error occurred during streaming."
+
+  state.messageCompleted = true
+  return [buildErrorEvent(message)]
+}
+
+const handleFunctionCallArgumentsValidationError = (
+  error: FunctionCallArgumentsValidationError,
+  state: ResponsesStreamState,
+  events: Array<AnthropicStreamEventData> = [],
+): Array<AnthropicStreamEventData> => {
+  const reason = error.message
+
+  closeAllOpenBlocks(state, events)
+  state.messageCompleted = true
+
+  events.push(buildErrorEvent(reason))
+
+  return events
+}
+
+const messageStart = (
+  state: ResponsesStreamState,
+  response: ResponsesResult,
+): Array<AnthropicStreamEventData> => {
+  state.messageStartSent = true
+  const inputCachedTokens = response.usage?.input_tokens_details?.cached_tokens
+  const inputTokens =
+    (response.usage?.input_tokens ?? 0) - (inputCachedTokens ?? 0)
+  return [
+    {
+      type: "message_start",
+      message: {
+        id: response.id,
+        type: "message",
+        role: "assistant",
+        content: [],
+        model: response.model,
+        stop_reason: null,
+        stop_sequence: null,
+        usage: {
+          input_tokens: inputTokens,
+          output_tokens: 0,
+          cache_read_input_tokens: inputCachedTokens ?? 0,
+        },
+      },
+    },
+  ]
+}
+
+const openTextBlockIfNeeded = (
+  state: ResponsesStreamState,
+  params: {
+    outputIndex: number
+    contentIndex: number
+    events: Array<AnthropicStreamEventData>
+  },
+): number => {
+  const { outputIndex, contentIndex, events } = params
+  const key = getBlockKey(outputIndex, contentIndex)
+  let blockIndex = state.blockIndexByKey.get(key)
+
+  if (blockIndex === undefined) {
+    blockIndex = state.nextContentBlockIndex
+    state.nextContentBlockIndex += 1
+    state.blockIndexByKey.set(key, blockIndex)
+  }
+
+  if (!state.openBlocks.has(blockIndex)) {
+    closeOpenBlocks(state, events)
+    events.push({
+      type: "content_block_start",
+      index: blockIndex,
+      content_block: {
+        type: "text",
+        text: "",
+      },
+    })
+    state.openBlocks.add(blockIndex)
+  }
+
+  return blockIndex
+}
+
+const openThinkingBlockIfNeeded = (
+  state: ResponsesStreamState,
+  outputIndex: number,
+  events: Array<AnthropicStreamEventData>,
+): number => {
+  //thinking blocks has multiple summary_index, should combine into one block
+  const summaryIndex = 0
+  const key = getBlockKey(outputIndex, summaryIndex)
+  let blockIndex = state.blockIndexByKey.get(key)
+
+  if (blockIndex === undefined) {
+    blockIndex = state.nextContentBlockIndex
+    state.nextContentBlockIndex += 1
+    state.blockIndexByKey.set(key, blockIndex)
+  }
+
+  if (!state.openBlocks.has(blockIndex)) {
+    closeOpenBlocks(state, events)
+    events.push({
+      type: "content_block_start",
+      index: blockIndex,
+      content_block: {
+        type: "thinking",
+        thinking: "",
+      },
+    })
+    state.openBlocks.add(blockIndex)
+  }
+
+  return blockIndex
+}
+
+const closeBlockIfOpen = (
+  state: ResponsesStreamState,
+  blockIndex: number,
+  events: Array<AnthropicStreamEventData>,
+) => {
+  if (!state.openBlocks.has(blockIndex)) {
+    return
+  }
+
+  events.push({ type: "content_block_stop", index: blockIndex })
+  state.openBlocks.delete(blockIndex)
+  state.blockHasDelta.delete(blockIndex)
+}
+
+const closeOpenBlocks = (
+  state: ResponsesStreamState,
+  events: Array<AnthropicStreamEventData>,
+) => {
+  for (const blockIndex of state.openBlocks) {
+    closeBlockIfOpen(state, blockIndex, events)
+  }
+}
+
+const closeAllOpenBlocks = (
+  state: ResponsesStreamState,
+  events: Array<AnthropicStreamEventData>,
+) => {
+  closeOpenBlocks(state, events)
+
+  state.functionCallStateByOutputIndex.clear()
+}
+
+export const buildErrorEvent = (message: string): AnthropicStreamEventData => ({
+  type: "error",
+  error: {
+    type: "api_error",
+    message,
+  },
+})
+
+const getBlockKey = (outputIndex: number, contentIndex: number): string =>
+  `${outputIndex}:${contentIndex}`
+
+const openFunctionCallBlock = (
+  state: ResponsesStreamState,
+  params: {
+    outputIndex: number
+    toolCallId?: string
+    name?: string
+    events: Array<AnthropicStreamEventData>
+  },
+): number => {
+  const { outputIndex, toolCallId, name, events } = params
+
+  let functionCallState = state.functionCallStateByOutputIndex.get(outputIndex)
+
+  if (!functionCallState) {
+    const blockIndex = state.nextContentBlockIndex
+    state.nextContentBlockIndex += 1
+
+    const resolvedToolCallId = toolCallId ?? `tool_call_${blockIndex}`
+    const resolvedName = name ?? "function"
+
+    functionCallState = {
+      blockIndex,
+      toolCallId: resolvedToolCallId,
+      name: resolvedName,
+      consecutiveWhitespaceCount: 0,
+    }
+
+    state.functionCallStateByOutputIndex.set(outputIndex, functionCallState)
+  }
+
+  const { blockIndex } = functionCallState
+
+  if (!state.openBlocks.has(blockIndex)) {
+    closeOpenBlocks(state, events)
+    events.push({
+      type: "content_block_start",
+      index: blockIndex,
+      content_block: {
+        type: "tool_use",
+        id: functionCallState.toolCallId,
+        name: functionCallState.name,
+        input: {},
+      },
+    })
+    state.openBlocks.add(blockIndex)
+  }
+
+  return blockIndex
+}
+
+type FunctionCallDetails = {
+  outputIndex: number
+  toolCallId: string
+  name: string
+  initialArguments?: string
+}
+
+const extractFunctionCallDetails = (
+  rawEvent: ResponseOutputItemAddedEvent,
+): FunctionCallDetails | undefined => {
+  const item = rawEvent.item
+  const itemType = item.type
+  if (itemType !== "function_call") {
+    return undefined
+  }
+
+  const outputIndex = rawEvent.output_index
+  const toolCallId = item.call_id
+  const name = item.name
+  const initialArguments = item.arguments
+  return {
+    outputIndex,
+    toolCallId,
+    name,
+    initialArguments,
+  }
+}
diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts
new file mode 100644
index 000000000..855d45aa3
--- /dev/null
+++ b/src/routes/messages/responses-translation.ts
@@ -0,0 +1,783 @@
+import consola from "consola"
+
+import {
+  getExtraPromptForModel,
+  getReasoningEffortForModel,
+} from "~/lib/config"
+import { parseUserIdMetadata } from "~/lib/utils"
+import {
+  type ResponsesPayload,
+  type ResponseInputCompaction,
+  type ResponseInputContent,
+  type ResponseInputImage,
+  type ResponseInputItem,
+  type ResponseInputMessage,
+  type ResponseInputReasoning,
+  type ResponseInputText,
+  type ResponsesResult,
+  type ResponseOutputContentBlock,
+  type ResponseOutputCompaction,
+  type ResponseOutputFunctionCall,
+  type ResponseOutputItem,
+  type ResponseOutputReasoning,
+  type ResponseReasoningBlock,
+  type ResponseOutputRefusal,
+  type ResponseOutputText,
+  type ResponseFunctionToolCallItem,
+  type ResponseFunctionCallOutputItem,
+  type Tool,
+  type ToolChoiceFunction,
+  type ToolChoiceOptions,
+} from "~/services/copilot/create-responses"
+
+import {
+  type AnthropicAssistantContentBlock,
+  type AnthropicAssistantMessage,
+  type AnthropicResponse,
+  type AnthropicImageBlock,
+  type AnthropicMessage,
+  type AnthropicMessagesPayload,
+  type AnthropicTextBlock,
+  type AnthropicThinkingBlock,
+  type AnthropicTool,
+  type AnthropicToolResultBlock,
+  type AnthropicToolUseBlock,
+  type AnthropicUserContentBlock,
+  type AnthropicUserMessage,
+} from "./anthropic-types"
+
+const MESSAGE_TYPE = "message"
+const COMPACTION_SIGNATURE_PREFIX = "cm1#"
+const COMPACTION_SIGNATURE_SEPARATOR = "@"
+
+export const THINKING_TEXT = "Thinking..."
+
+export const translateAnthropicMessagesToResponsesPayload = (
+  payload: AnthropicMessagesPayload,
+): ResponsesPayload => {
+  const input: Array<ResponseInputItem> = []
+  const applyPhase = shouldApplyPhase(payload.model)
+
+  for (const message of payload.messages) {
+    input.push(...translateMessage(message, payload.model, applyPhase))
+  }
+
+  const translatedTools = convertAnthropicTools(payload.tools)
+  const toolChoice = convertAnthropicToolChoice(payload.tool_choice)
+
+  const { safetyIdentifier, sessionId: promptCacheKey } = parseUserIdMetadata(
+    payload.metadata?.user_id,
+  )
+
+  const responsesPayload: ResponsesPayload = {
+    model: payload.model,
+    input,
+    instructions: translateSystemPrompt(payload.system, payload.model),
+    temperature: 1, // reasoning high temperature fixed to 1
+    top_p: payload.top_p ?? null,
+    max_output_tokens: Math.max(payload.max_tokens, 12800),
+    tools: translatedTools,
+    tool_choice: toolChoice,
+    metadata: payload.metadata ? { ...payload.metadata } : null,
+    safety_identifier: safetyIdentifier,
+    prompt_cache_key: promptCacheKey,
+    stream: payload.stream ?? null,
+    store: false,
+    parallel_tool_calls: true,
+    reasoning: {
+      effort: getReasoningEffortForModel(payload.model),
+      summary: "detailed",
+    },
+    include: ["reasoning.encrypted_content"],
+  }
+
+  return responsesPayload
+}
+
+type CompactionCarrier = {
+  id: string
+  encrypted_content: string
+}
+
+export const encodeCompactionCarrierSignature = (
+  compaction: CompactionCarrier,
+): string => {
+  return `${COMPACTION_SIGNATURE_PREFIX}${compaction.encrypted_content}${COMPACTION_SIGNATURE_SEPARATOR}${compaction.id}`
+}
+
+export const decodeCompactionCarrierSignature = (
+  signature: string,
+): CompactionCarrier | undefined => {
+  if (signature.startsWith(COMPACTION_SIGNATURE_PREFIX)) {
+    const raw = signature.slice(COMPACTION_SIGNATURE_PREFIX.length)
+    const separatorIndex = raw.indexOf(COMPACTION_SIGNATURE_SEPARATOR)
+
+    if (separatorIndex <= 0 || separatorIndex === raw.length - 1) {
+      return undefined
+    }
+
+    const encrypted_content = raw.slice(0, separatorIndex)
+    const id = raw.slice(separatorIndex + 1)
+
+    if (!encrypted_content) {
+      return undefined
+    }
+
+    return {
+      id,
+      encrypted_content,
+    }
+  }
+
+  return undefined
+}
+
+const translateMessage = (
+  message: AnthropicMessage,
+  model: string,
+  applyPhase: boolean,
+): Array<ResponseInputItem> => {
+  if (message.role === "user") {
+    return translateUserMessage(message)
+  }
+
+  return translateAssistantMessage(message, model, applyPhase)
+}
+
+const translateUserMessage = (
+  message: AnthropicUserMessage,
+): Array<ResponseInputItem> => {
+  if (typeof message.content === "string") {
+    return [createMessage("user", message.content)]
+  }
+
+  if (!Array.isArray(message.content)) {
+    return []
+  }
+
+  const items: Array<ResponseInputItem> = []
+  const pendingContent: Array<ResponseInputContent> = []
+
+  for (const block of message.content) {
+    if (block.type === "tool_result") {
+      flushPendingContent(pendingContent, items, { role: "user" })
+      items.push(createFunctionCallOutput(block))
+      continue
+    }
+
+    const converted = translateUserContentBlock(block)
+    if (converted) {
+      pendingContent.push(converted)
+    }
+  }
+
+  flushPendingContent(pendingContent, items, { role: "user" })
+
+  return items
+}
+
+const translateAssistantMessage = (
+  message: AnthropicAssistantMessage,
+  model: string,
+  applyPhase: boolean,
+): Array<ResponseInputItem> => {
+  const assistantPhase = resolveAssistantPhase(
+    model,
+    message.content,
+    applyPhase,
+  )
+
+  if (typeof message.content === "string") {
+    return [createMessage("assistant", message.content, assistantPhase)]
+  }
+
+  if (!Array.isArray(message.content)) {
+    return []
+  }
+
+  const items: Array<ResponseInputItem> = []
+  const pendingContent: Array<ResponseInputContent> = []
+
+  for (const block of message.content) {
+    if (block.type === "tool_use") {
+      flushPendingContent(pendingContent, items, {
+        role: "assistant",
+        phase: assistantPhase,
+      })
+      items.push(createFunctionToolCall(block))
+      continue
+    }
+
+    if (block.type === "thinking" && block.signature) {
+      const compactionContent = createCompactionContent(block)
+      if (compactionContent) {
+        flushPendingContent(pendingContent, items, {
+          role: "assistant",
+          phase: assistantPhase,
+        })
+        items.push(compactionContent)
+        continue
+      }
+
+      if (block.signature.includes("@")) {
+        flushPendingContent(pendingContent, items, {
+          role: "assistant",
+          phase: assistantPhase,
+        })
+        items.push(createReasoningContent(block))
+        continue
+      }
+    }
+
+    const converted = translateAssistantContentBlock(block)
+    if (converted) {
+      pendingContent.push(converted)
+    }
+  }
+
+  flushPendingContent(pendingContent, items, {
+    role: "assistant",
+    phase: assistantPhase,
+  })
+
+  return items
+}
+
+const translateUserContentBlock = (
+  block: AnthropicUserContentBlock,
+): ResponseInputContent | undefined => {
+  switch (block.type) {
+    case "text": {
+      return createTextContent(block.text)
+    }
+    case "image": {
+      return createImageContent(block)
+    }
+    default: {
+      return undefined
+    }
+  }
+}
+
+const translateAssistantContentBlock = (
+  block: AnthropicAssistantContentBlock,
+): ResponseInputContent | undefined => {
+  switch (block.type) {
+    case "text": {
+      return createOutPutTextContent(block.text)
+    }
+    default: {
+      return undefined
+    }
+  }
+}
+
+const flushPendingContent = (
+  pendingContent: Array<ResponseInputContent>,
+  target: Array<ResponseInputItem>,
+  message: Pick<ResponseInputMessage, "role" | "phase">,
+) => {
+  if (pendingContent.length === 0) {
+    return
+  }
+
+  const messageContent = [...pendingContent]
+
+  target.push(createMessage(message.role, messageContent, message.phase))
+  pendingContent.length = 0
+}
+
+const createMessage = (
+  role: ResponseInputMessage["role"],
+  content: string | Array<ResponseInputContent>,
+  phase?: ResponseInputMessage["phase"],
+): ResponseInputMessage => ({
+  type: MESSAGE_TYPE,
+  role,
+  content,
+  ...(role === "assistant" && phase ? { phase } : {}),
+})
+
+const resolveAssistantPhase = (
+  _model: string,
+  content: AnthropicAssistantMessage["content"],
+  applyPhase: boolean,
+): ResponseInputMessage["phase"] | undefined => {
+  if (!applyPhase) {
+    return undefined
+  }
+
+  if (typeof content === "string") {
+    return "final_answer"
+  }
+
+  if (!Array.isArray(content)) {
+    return undefined
+  }
+
+  const hasText = content.some((block) => block.type === "text")
+  if (!hasText) {
+    return undefined
+  }
+
+  const hasToolUse = content.some((block) => block.type === "tool_use")
+  return hasToolUse ? "commentary" : "final_answer"
+}
+
+const shouldApplyPhase = (model: string): boolean => {
+  const extraPrompt = getExtraPromptForModel(model)
+  return extraPrompt.includes("## Intermediary updates")
+}
+
+const createTextContent = (text: string): ResponseInputText => ({
+  type: "input_text",
+  text,
+})
+
+const createOutPutTextContent = (text: string): ResponseInputText => ({
+  type: "output_text",
+  text,
+})
+
+const createImageContent = (
+  block: AnthropicImageBlock,
+): ResponseInputImage => ({
+  type: "input_image",
+  image_url: `data:${block.source.media_type};base64,${block.source.data}`,
+  detail: "auto",
+})
+
+const createReasoningContent = (
+  block: AnthropicThinkingBlock,
+): ResponseInputReasoning => {
+  // align with vscode-copilot-chat extractThinkingData, should add id, otherwise it will cause miss cache occasionally —— the usage input cached tokens to be 0
+  // https://github.com/microsoft/vscode-copilot-chat/blob/main/src/platform/endpoint/node/responsesApi.ts#L162
+  // when use in codex cli, reasoning id is empty, so it will cause miss cache occasionally
+  const { encryptedContent, id } = parseReasoningSignature(block.signature)
+  const thinking = block.thinking === THINKING_TEXT ? "" : block.thinking
+  return {
+    id,
+    type: "reasoning",
+    summary: thinking ? [{ type: "summary_text", text: thinking }] : [],
+    encrypted_content: encryptedContent,
+  }
+}
+
+const createCompactionContent = (
+  block: AnthropicThinkingBlock,
+): ResponseInputCompaction | undefined => {
+  const compaction = decodeCompactionCarrierSignature(block.signature)
+  if (!compaction) {
+    return undefined
+  }
+
+  return {
+    id: compaction.id,
+    type: "compaction",
+    encrypted_content: compaction.encrypted_content,
+  }
+}
+
+const parseReasoningSignature = (
+  signature: string,
+): { encryptedContent: string; id: string } => {
+  const splitIndex = signature.lastIndexOf("@")
+
+  if (splitIndex <= 0 || splitIndex === signature.length - 1) {
+    return { encryptedContent: signature, id: "" }
+  }
+
+  return {
+    encryptedContent: signature.slice(0, splitIndex),
+    id: signature.slice(splitIndex + 1),
+  }
+}
+
+const createFunctionToolCall = (
+  block: AnthropicToolUseBlock,
+): ResponseFunctionToolCallItem => ({
+  type: "function_call",
+  call_id: block.id,
+  name: block.name,
+  arguments: JSON.stringify(block.input),
+  status: "completed",
+})
+
+const createFunctionCallOutput = (
+  block: AnthropicToolResultBlock,
+): ResponseFunctionCallOutputItem => ({
+  type: "function_call_output",
+  call_id: block.tool_use_id,
+  output: convertToolResultContent(block.content),
+  status: block.is_error ? "incomplete" : "completed",
+})
+
+const translateSystemPrompt = (
+  system: string | Array<AnthropicTextBlock> | undefined,
+  model: string,
+): string | null => {
+  if (!system) {
+    return null
+  }
+
+  const extraPrompt = getExtraPromptForModel(model)
+
+  if (typeof system === "string") {
+    return system + extraPrompt
+  }
+
+  const text = system
+    .map((block, index) => {
+      if (index === 0) {
+        return block.text + extraPrompt
+      }
+      return block.text
+    })
+    .join(" ")
+  return text.length > 0 ? text : null
+}
+
+const convertAnthropicTools = (
+  tools: Array<AnthropicTool> | undefined,
+): Array<Tool> | null => {
+  if (!tools || tools.length === 0) {
+    return null
+  }
+
+  return tools.map((tool) => ({
+    type: "function",
+    name: tool.name,
+    parameters: tool.input_schema,
+    strict: false,
+    ...(tool.description ? { description: tool.description } : {}),
+  }))
+}
+
+const convertAnthropicToolChoice = (
+  choice: AnthropicMessagesPayload["tool_choice"],
+): ToolChoiceOptions | ToolChoiceFunction => {
+  if (!choice) {
+    return "auto"
+  }
+
+  switch (choice.type) {
+    case "auto": {
+      return "auto"
+    }
+    case "any": {
+      return "required"
+    }
+    case "tool": {
+      return choice.name ? { type: "function", name: choice.name } : "auto"
+    }
+    case "none": {
+      return "none"
+    }
+    default: {
+      return "auto"
+    }
+  }
+}
+
+export const translateResponsesResultToAnthropic = (
+  response: ResponsesResult,
+): AnthropicResponse => {
+  const contentBlocks = mapOutputToAnthropicContent(response.output)
+  const usage = mapResponsesUsage(response)
+  let anthropicContent = fallbackContentBlocks(response.output_text)
+  if (contentBlocks.length > 0) {
+    anthropicContent = contentBlocks
+  }
+
+  const stopReason = mapResponsesStopReason(response)
+
+  return {
+    id: response.id,
+    type: "message",
+    role: "assistant",
+    content: anthropicContent,
+    model: response.model,
+    stop_reason: stopReason,
+    stop_sequence: null,
+    usage,
+  }
+}
+
+const mapOutputToAnthropicContent = (
+  output: Array<ResponseOutputItem>,
+): Array<AnthropicAssistantContentBlock> => {
+  const contentBlocks: Array<AnthropicAssistantContentBlock> = []
+
+  for (const item of output) {
+    switch (item.type) {
+      case "reasoning": {
+        const thinkingText = extractReasoningText(item)
+        if (thinkingText.length > 0) {
+          contentBlocks.push({
+            type: "thinking",
+            thinking: thinkingText,
+            signature: (item.encrypted_content ?? "") + "@" + item.id,
+          })
+        }
+        break
+      }
+      case "function_call": {
+        const toolUseBlock = createToolUseContentBlock(item)
+        if (toolUseBlock) {
+          contentBlocks.push(toolUseBlock)
+        }
+        break
+      }
+      case "message": {
+        const combinedText = combineMessageTextContent(item.content)
+        if (combinedText.length > 0) {
+          contentBlocks.push({ type: "text", text: combinedText })
+        }
+        break
+      }
+      case "compaction": {
+        const compactionBlock = createCompactionThinkingBlock(item)
+        if (compactionBlock) {
+          contentBlocks.push(compactionBlock)
+        }
+        break
+      }
+      default: {
+        // Future compatibility for unrecognized output item types.
+        const combinedText = combineMessageTextContent(
+          (item as { content?: Array<ResponseOutputContentBlock> }).content,
+        )
+        if (combinedText.length > 0) {
+          contentBlocks.push({ type: "text", text: combinedText })
+        }
+      }
+    }
+  }
+
+  return contentBlocks
+}
+
+const combineMessageTextContent = (
+  content: Array<ResponseOutputContentBlock> | undefined,
+): string => {
+  if (!Array.isArray(content)) {
+    return ""
+  }
+
+  let aggregated = ""
+
+  for (const block of content) {
+    if (isResponseOutputText(block)) {
+      aggregated += block.text
+      continue
+    }
+
+    if (isResponseOutputRefusal(block)) {
+      aggregated += block.refusal
+      continue
+    }
+
+    if (typeof (block as { text?: unknown }).text === "string") {
+      aggregated += (block as { text: string }).text
+      continue
+    }
+
+    if (typeof (block as { reasoning?: unknown }).reasoning === "string") {
+      aggregated += (block as { reasoning: string }).reasoning
+      continue
+    }
+  }
+
+  return aggregated
+}
+
+const extractReasoningText = (item: ResponseOutputReasoning): string => {
+  const segments: Array<string> = []
+
+  const collectFromBlocks = (blocks?: Array<ResponseReasoningBlock>) => {
+    if (!Array.isArray(blocks)) {
+      return
+    }
+
+    for (const block of blocks) {
+      if (typeof block.text === "string") {
+        segments.push(block.text)
+        continue
+      }
+    }
+  }
+
+  // Compatible with opencode, it will filter out blocks where the thinking text is empty, so we add a default thinking text here
+  if (!item.summary || item.summary.length === 0) {
+    return THINKING_TEXT
+  }
+
+  collectFromBlocks(item.summary)
+
+  return segments.join("").trim()
+}
+
+const createToolUseContentBlock = (
+  call: ResponseOutputFunctionCall,
+): AnthropicToolUseBlock | null => {
+  const toolId = call.call_id
+  if (!call.name || !toolId) {
+    return null
+  }
+
+  const input = parseFunctionCallArguments(call.arguments)
+
+  return {
+    type: "tool_use",
+    id: toolId,
+    name: call.name,
+    input,
+  }
+}
+
+const createCompactionThinkingBlock = (
+  item: ResponseOutputCompaction,
+): AnthropicAssistantContentBlock | null => {
+  if (!item.id || !item.encrypted_content) {
+    return null
+  }
+
+  return {
+    type: "thinking",
+    thinking: THINKING_TEXT,
+    signature: encodeCompactionCarrierSignature({
+      id: item.id,
+      encrypted_content: item.encrypted_content,
+    }),
+  }
+}
+
+const parseFunctionCallArguments = (
+  rawArguments: string,
+): Record<string, unknown> => {
+  if (typeof rawArguments !== "string" || rawArguments.trim().length === 0) {
+    return {}
+  }
+
+  try {
+    const parsed: unknown = JSON.parse(rawArguments)
+
+    if (Array.isArray(parsed)) {
+      return { arguments: parsed }
+    }
+
+    if (parsed && typeof parsed === "object") {
+      return parsed as Record<string, unknown>
+    }
+  } catch (error) {
+    consola.warn("Failed to parse function call arguments", {
+      error,
+      rawArguments,
+    })
+  }
+
+  return { raw_arguments: rawArguments }
+}
+
+const fallbackContentBlocks = (
+  outputText: string,
+): Array<AnthropicAssistantContentBlock> => {
+  if (!outputText) {
+    return []
+  }
+
+  return [
+    {
+      type: "text",
+      text: outputText,
+    },
+  ]
+}
+
+const mapResponsesStopReason = (
+  response: ResponsesResult,
+): AnthropicResponse["stop_reason"] => {
+  const { status, incomplete_details: incompleteDetails } = response
+
+  if (status === "completed") {
+    if (response.output.some((item) => item.type === "function_call")) {
+      return "tool_use"
+    }
+    return "end_turn"
+  }
+
+  if (status === "incomplete") {
+    if (incompleteDetails?.reason === "max_output_tokens") {
+      return "max_tokens"
+    }
+    if (incompleteDetails?.reason === "content_filter") {
+      return "end_turn"
+    }
+  }
+
+  return null
+}
+
+const mapResponsesUsage = (
+  response: ResponsesResult,
+): AnthropicResponse["usage"] => {
+  const inputTokens = response.usage?.input_tokens ?? 0
+  const outputTokens = response.usage?.output_tokens ?? 0
+  const inputCachedTokens = response.usage?.input_tokens_details?.cached_tokens
+
+  return {
+    input_tokens: inputTokens - (inputCachedTokens ?? 0),
+    output_tokens: outputTokens,
+    ...(response.usage?.input_tokens_details?.cached_tokens !== undefined && {
+      cache_read_input_tokens:
+        response.usage.input_tokens_details.cached_tokens,
+    }),
+  }
+}
+
+const isRecord = (value: unknown): value is Record<string, unknown> =>
+  typeof value === "object" && value !== null
+
+const isResponseOutputText = (
+  block: ResponseOutputContentBlock,
+): block is ResponseOutputText =>
+  isRecord(block)
+  && "type" in block
+  && (block as { type?: unknown }).type === "output_text"
+
+const isResponseOutputRefusal = (
+  block: ResponseOutputContentBlock,
+): block is ResponseOutputRefusal =>
+  isRecord(block)
+  && "type" in block
+  && (block as { type?: unknown }).type === "refusal"
+
+const convertToolResultContent = (
+  content: string | Array<AnthropicTextBlock | AnthropicImageBlock>,
+): string | Array<ResponseInputContent> => {
+  if (typeof content === "string") {
+    return content
+  }
+
+  if (Array.isArray(content)) {
+    const result: Array<ResponseInputContent> = []
+    for (const block of content) {
+      switch (block.type) {
+        case "text": {
+          result.push(createTextContent(block.text))
+          break
+        }
+        case "image": {
+          result.push(createImageContent(block))
+          break
+        }
+        default: {
+          break
+        }
+      }
+    }
+    return result
+  }
+
+  return ""
+}
diff --git a/src/routes/messages/subagent-marker.ts b/src/routes/messages/subagent-marker.ts
new file mode 100644
index 000000000..0d93ce507
--- /dev/null
+++ b/src/routes/messages/subagent-marker.ts
@@ -0,0 +1,78 @@
+import type { AnthropicMessagesPayload } from "./anthropic-types"
+
+const subagentMarkerPrefix = "__SUBAGENT_MARKER__"
+
+export interface SubagentMarker {
+  session_id: string
+  agent_id: string
+  agent_type: string
+}
+
+export const parseSubagentMarkerFromFirstUser = (
+  payload: AnthropicMessagesPayload,
+): SubagentMarker | null => {
+  const firstUserMessage = payload.messages.find((msg) => msg.role === "user")
+  if (!firstUserMessage || !Array.isArray(firstUserMessage.content)) {
+    return null
+  }
+
+  for (const block of firstUserMessage.content) {
+    if (block.type !== "text") {
+      continue
+    }
+
+    const marker = parseSubagentMarkerFromSystemReminder(block.text)
+    if (marker) {
+      return marker
+    }
+  }
+
+  return null
+}
+
+const parseSubagentMarkerFromSystemReminder = (
+  text: string,
+): SubagentMarker | null => {
+  const startTag = "<system-reminder>"
+  const endTag = "</system-reminder>"
+  let searchFrom = 0
+
+  while (true) {
+    const reminderStart = text.indexOf(startTag, searchFrom)
+    if (reminderStart === -1) {
+      break
+    }
+
+    const contentStart = reminderStart + startTag.length
+    const reminderEnd = text.indexOf(endTag, contentStart)
+    if (reminderEnd === -1) {
+      break
+    }
+
+    const reminderContent = text.slice(contentStart, reminderEnd)
+    const markerIndex = reminderContent.indexOf(subagentMarkerPrefix)
+    if (markerIndex === -1) {
+      searchFrom = reminderEnd + endTag.length
+      continue
+    }
+
+    const markerJson = reminderContent
+      .slice(markerIndex + subagentMarkerPrefix.length)
+      .trim()
+
+    try {
+      const parsed = JSON.parse(markerJson) as SubagentMarker
+      if (!parsed.session_id || !parsed.agent_id || !parsed.agent_type) {
+        searchFrom = reminderEnd + endTag.length
+        continue
+      }
+
+      return parsed
+    } catch {
+      searchFrom = reminderEnd + endTag.length
+      continue
+    }
+  }
+
+  return null
+}
diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts
new file mode 100644
index 000000000..ed8ccefe6
--- /dev/null
+++ b/src/routes/responses/handler.ts
@@ -0,0 +1,162 @@
+import type { Context } from "hono"
+
+import { streamSSE } from "hono/streaming"
+
+import { awaitApproval } from "~/lib/approval"
+import { getConfig } from "~/lib/config"
+import { createHandlerLogger } from "~/lib/logger"
+import { checkRateLimit } from "~/lib/rate-limit"
+import { state } from "~/lib/state"
+import { generateRequestIdFromPayload, getUUID } from "~/lib/utils"
+import {
+  createResponses,
+  type ResponsesPayload,
+  type ResponsesResult,
+} from "~/services/copilot/create-responses"
+
+import { createStreamIdTracker, fixStreamIds } from "./stream-id-sync"
+import {
+  applyResponsesApiContextManagement,
+  compactInputByLatestCompaction,
+  getResponsesRequestOptions,
+} from "./utils"
+
+const logger = createHandlerLogger("responses-handler")
+
+const RESPONSES_ENDPOINT = "/responses"
+
+export const handleResponses = async (c: Context) => {
+  await checkRateLimit(state)
+
+  const payload = await c.req.json<ResponsesPayload>()
+  logger.debug("Responses request payload:", JSON.stringify(payload))
+
+  // not support subagent marker for now , set sessionId = getUUID(requestId)
+  const requestId = generateRequestIdFromPayload({ messages: payload.input })
+  logger.debug("Generated request ID:", requestId)
+
+  const sessionId = getUUID(requestId)
+  logger.debug("Extracted session ID:", sessionId)
+
+  useFunctionApplyPatch(payload)
+
+  // Remove web_search tool as it's not supported by GitHub Copilot
+  removeWebSearchTool(payload)
+
+  compactInputByLatestCompaction(payload)
+
+  const selectedModel = state.models?.data.find(
+    (model) => model.id === payload.model,
+  )
+  const supportsResponses =
+    selectedModel?.supported_endpoints?.includes(RESPONSES_ENDPOINT) ?? false
+
+  if (!supportsResponses) {
+    return c.json(
+      {
+        error: {
+          message:
+            "This model does not support the responses endpoint. Please choose a different model.",
+          type: "invalid_request_error",
+        },
+      },
+      400,
+    )
+  }
+
+  applyResponsesApiContextManagement(
+    payload,
+    selectedModel?.capabilities.limits.max_prompt_tokens,
+  )
+
+  logger.debug("Translated Responses payload:", JSON.stringify(payload))
+
+  const { vision, initiator } = getResponsesRequestOptions(payload)
+
+  if (state.manualApprove) {
+    await awaitApproval()
+  }
+
+  const response = await createResponses(payload, {
+    vision,
+    initiator,
+    requestId,
+    sessionId: sessionId,
+  })
+
+  if (isStreamingRequested(payload) && isAsyncIterable(response)) {
+    logger.debug("Forwarding native Responses stream")
+    return streamSSE(c, async (stream) => {
+      const idTracker = createStreamIdTracker()
+
+      for await (const chunk of response) {
+        logger.debug("Responses stream chunk:", JSON.stringify(chunk))
+
+        const processedData = fixStreamIds(
+          (chunk as { data?: string }).data ?? "",
+          (chunk as { event?: string }).event,
+          idTracker,
+        )
+
+        await stream.writeSSE({
+          id: (chunk as { id?: string }).id,
+          event: (chunk as { event?: string }).event,
+          data: processedData,
+        })
+      }
+    })
+  }
+
+  logger.debug(
+    "Forwarding native Responses result:",
+    JSON.stringify(response).slice(-400),
+  )
+  return c.json(response as ResponsesResult)
+}
+
+const isAsyncIterable = <T>(value: unknown): value is AsyncIterable<T> =>
+  Boolean(value)
+  && typeof (value as AsyncIterable<T>)[Symbol.asyncIterator] === "function"
+
+const isStreamingRequested = (payload: ResponsesPayload): boolean =>
+  Boolean(payload.stream)
+
+const useFunctionApplyPatch = (payload: ResponsesPayload): void => {
+  const config = getConfig()
+  const useFunctionApplyPatch = config.useFunctionApplyPatch ?? true
+  if (useFunctionApplyPatch) {
+    logger.debug("Using function tool apply_patch for responses")
+    if (Array.isArray(payload.tools)) {
+      const toolsArr = payload.tools
+      for (let i = 0; i < toolsArr.length; i++) {
+        const t = toolsArr[i]
+        if (t.type === "custom" && t.name === "apply_patch") {
+          toolsArr[i] = {
+            type: "function",
+            name: t.name,
+            description: "Use the `apply_patch` tool to edit files",
+            parameters: {
+              type: "object",
+              properties: {
+                input: {
+                  type: "string",
+                  description: "The entire contents of the apply_patch command",
+                },
+              },
+              required: ["input"],
+            },
+            strict: false,
+          }
+        }
+      }
+    }
+  }
+}
+
+const removeWebSearchTool = (payload: ResponsesPayload): void => {
+  if (!Array.isArray(payload.tools) || payload.tools.length === 0) return
+
+  payload.tools = payload.tools.filter((t) => {
+    return t.type !== "web_search"
+  })
+}
diff --git a/src/routes/responses/route.ts b/src/routes/responses/route.ts
new file mode 100644
index 000000000..af2423427
--- /dev/null
+++ b/src/routes/responses/route.ts
@@ -0,0 +1,15 @@
+import { Hono } from "hono"
+
+import { forwardError } from "~/lib/error"
+
+import { handleResponses } from "./handler"
+
+export const responsesRoutes = new Hono()
+
+responsesRoutes.post("/", async (c) => {
+  try {
+    return await handleResponses(c)
+  } catch (error) {
+    return await forwardError(c, error)
+  }
+})
diff --git a/src/routes/responses/stream-id-sync.ts b/src/routes/responses/stream-id-sync.ts
new file mode 100644
index 000000000..48b3811a1
--- /dev/null
+++ b/src/routes/responses/stream-id-sync.ts
@@ -0,0 +1,97 @@
+/**
+ * Stream ID Synchronization for @ai-sdk/openai compatibility
+ *
+ * Problem: GitHub Copilot's Responses API returns different IDs for the same
+ * item in 'added' vs 'done' events. This breaks @ai-sdk/openai which expects
+ * consistent IDs across the stream lifecycle.
+ *
+ * Errors without this fix:
+ * - "activeReasoningPart.summaryParts" undefined
+ * - "text part not found"
+ *
+ * Use case: OpenCode (AI coding assistant) using Codex models (gpt-5.2-codex)
+ * via @ai-sdk/openai provider requires the Responses API endpoint.
+ */
+
+import type {
+  ResponseOutputItemAddedEvent,
+  ResponseOutputItemDoneEvent,
+  ResponseStreamEvent,
+} from "~/services/copilot/create-responses"
+
+interface StreamIdTracker {
+  outputItems: Map<number, string>
+}
+
+export const createStreamIdTracker = (): StreamIdTracker => ({
+  outputItems: new Map(),
+})
+
+export const fixStreamIds = (
+  data: string,
+  event: string | undefined,
+  tracker: StreamIdTracker,
+): string => {
+  if (!data) return data
+  const parsed = JSON.parse(data) as ResponseStreamEvent
+  switch (event) {
+    case "response.output_item.added": {
+      return handleOutputItemAdded(
+        parsed as ResponseOutputItemAddedEvent,
+        tracker,
+      )
+    }
+    case "response.output_item.done": {
+      return handleOutputItemDone(
+        parsed as ResponseOutputItemDoneEvent,
+        tracker,
+      )
+    }
+    default: {
+      return handleItemId(parsed, tracker)
+    }
+  }
+}
+
+const handleOutputItemAdded = (
+  parsed: ResponseOutputItemAddedEvent,
+  tracker: StreamIdTracker,
+): string => {
+  if (!parsed.item.id) {
+    let randomSuffix = ""
+    while (randomSuffix.length < 16) {
+      randomSuffix += Math.random().toString(36).slice(2)
+    }
+    parsed.item.id = `oi_${parsed.output_index}_${randomSuffix.slice(0, 16)}`
+  }
+
+  const outputIndex = parsed.output_index
+  tracker.outputItems.set(outputIndex, parsed.item.id)
+  return JSON.stringify(parsed)
+}
+
+const handleOutputItemDone = (
+  parsed: ResponseOutputItemDoneEvent,
+  tracker: StreamIdTracker,
+): string => {
+  const outputIndex = parsed.output_index
+  const originalId = tracker.outputItems.get(outputIndex)
+  if (originalId) {
+    parsed.item.id = originalId
+  }
+  return JSON.stringify(parsed)
+}
+
+const handleItemId = (
+  parsed: ResponseStreamEvent & { output_index?: number; item_id?: string },
+  tracker: StreamIdTracker,
+): string => {
+  const outputIndex = parsed.output_index
+  if (outputIndex !== undefined) {
+    const itemId = tracker.outputItems.get(outputIndex)
+    if (itemId) {
+      parsed.item_id = itemId
+    }
+  }
+  return JSON.stringify(parsed)
+}
diff --git a/src/routes/responses/utils.ts b/src/routes/responses/utils.ts
new file mode 100644
index 000000000..be2b31a2c
--- /dev/null
+++ b/src/routes/responses/utils.ts
@@ -0,0 +1,149 @@
+import type {
+  ResponseContextManagementCompactionItem,
+  ResponseInputItem,
+  ResponsesPayload,
+} from "~/services/copilot/create-responses"
+
+import { isResponsesApiContextManagementModel } from "~/lib/config"
+
+export const getResponsesRequestOptions = (
+  payload: ResponsesPayload,
+): { vision: boolean; initiator: "agent" | "user" } => {
+  const vision = hasVisionInput(payload)
+  const initiator = hasAgentInitiator(payload) ? "agent" : "user"
+
+  return { vision, initiator }
+}
+
+export const hasAgentInitiator = (payload: ResponsesPayload): boolean => {
+  // Refactor `isAgentCall` logic to check only the last message in the history rather than any message. This prevents valid user messages from being incorrectly flagged as agent calls due to previous assistant history, ensuring proper credit consumption for multi-turn conversations.
+  const lastItem = getPayloadItems(payload).at(-1)
+  if (!lastItem) {
+    return false
+  }
+  if (!("role" in lastItem) || !lastItem.role) {
+    return true
+  }
+  const role =
+    typeof lastItem.role === "string" ? lastItem.role.toLowerCase() : ""
+  return role === "assistant"
+}
+
+export const hasVisionInput = (payload: ResponsesPayload): boolean => {
+  const values = getPayloadItems(payload)
+  return values.some((item) => containsVisionContent(item))
+}
+
+export const resolveResponsesCompactThreshold = (
+  maxPromptTokens?: number,
+): number => {
+  if (typeof maxPromptTokens === "number" && maxPromptTokens > 0) {
+    return Math.floor(maxPromptTokens * 0.9)
+  }
+
+  return 50000
+}
+
+const createCompactionContextManagement = (
+  compactThreshold: number,
+): Array<ResponseContextManagementCompactionItem> => [
+  {
+    type: "compaction",
+    compact_threshold: compactThreshold,
+  },
+]
+
+export const applyResponsesApiContextManagement = (
+  payload: ResponsesPayload,
+  maxPromptTokens?: number,
+): void => {
+  if (payload.context_management !== undefined) {
+    return
+  }
+
+  if (!isResponsesApiContextManagementModel(payload.model)) {
+    return
+  }
+
+  payload.context_management = createCompactionContextManagement(
+    resolveResponsesCompactThreshold(maxPromptTokens),
+  )
+}
+
+export const compactInputByLatestCompaction = (
+  payload: ResponsesPayload,
+): void => {
+  if (!Array.isArray(payload.input) || payload.input.length === 0) {
+    return
+  }
+
+  const latestCompactionMessageIndex = getLatestCompactionMessageIndex(
+    payload.input,
+  )
+
+  if (latestCompactionMessageIndex === undefined) {
+    return
+  }
+
+  payload.input = payload.input.slice(latestCompactionMessageIndex)
+}
+
+const getLatestCompactionMessageIndex = (
+  input: Array<ResponseInputItem>,
+): number | undefined => {
+  for (let index = input.length - 1; index >= 0; index -= 1) {
+    if (isCompactionInputItem(input[index])) {
+      return index
+    }
+  }
+
+  return undefined
+}
+
+const isCompactionInputItem = (value: ResponseInputItem): boolean => {
+  return (
+    "type" in value
+    && typeof value.type === "string"
+    && value.type === "compaction"
+  )
+}
+
+const getPayloadItems = (
+  payload: ResponsesPayload,
+): Array<ResponseInputItem> => {
+  const result: Array<ResponseInputItem> = []
+
+  const { input } = payload
+
+  if (Array.isArray(input)) {
+    result.push(...input)
+  }
+
+  return result
+}
+
+const containsVisionContent = (value: unknown): boolean => {
+  if (!value) return false
+
+  if (Array.isArray(value)) {
+    return value.some((entry) => containsVisionContent(entry))
+  }
+
+  if (typeof value !== "object") {
+    return false
+  }
+
+  const record = value as Record<string, unknown>
+  const type =
+    typeof record.type === "string" ? record.type.toLowerCase() : undefined
+
+  if (type === "input_image") {
+    return true
+  }
+
+  if (Array.isArray(record.content)) {
+    return record.content.some((entry) => containsVisionContent(entry))
+  }
+
+  return false
+}
diff --git a/src/server.ts b/src/server.ts
index 462a278f3..4dd8c4e9f 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -2,10 +2,12 @@ import { Hono } from "hono"
 import { cors } from "hono/cors"
 import { logger } from "hono/logger"
 
+import { createAuthMiddleware } from "./lib/request-auth"
 import { completionRoutes } from "./routes/chat-completions/route"
 import { embeddingRoutes } from "./routes/embeddings/route"
 import { messageRoutes } from "./routes/messages/route"
 import { modelRoutes } from "./routes/models/route"
+import { responsesRoutes } from "./routes/responses/route"
 import { tokenRoute } from "./routes/token/route"
 import { usageRoute } from "./routes/usage/route"
 
@@ -13,6 +15,7 @@ export const server = new Hono()
 
 server.use(logger())
 server.use(cors())
+server.use("*", createAuthMiddleware())
 
 server.get("/", (c) => c.text("Server running"))
 
@@ -21,11 +24,13 @@ server.route("/models", modelRoutes)
 server.route("/embeddings", embeddingRoutes)
 server.route("/usage", usageRoute)
 server.route("/token", tokenRoute)
+server.route("/responses", responsesRoutes)
 
 // Compatibility with tools that expect v1/ prefix
 server.route("/v1/chat/completions", completionRoutes)
 server.route("/v1/models", modelRoutes)
 server.route("/v1/embeddings", embeddingRoutes)
+server.route("/v1/responses", responsesRoutes)
 
 // Anthropic compatible endpoints
 server.route("/v1/messages", messageRoutes)
diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts
index 8534151da..cfbd78994 100644
--- a/src/services/copilot/create-chat-completions.ts
+++ b/src/services/copilot/create-chat-completions.ts
@@ -1,12 +1,25 @@
 import consola from "consola"
 import { events } from "fetch-event-stream"
 
-import { copilotHeaders, copilotBaseUrl } from "~/lib/api-config"
+import type { SubagentMarker } from "~/routes/messages/subagent-marker"
+
+import {
+  copilotBaseUrl,
+  copilotHeaders,
+  prepareForCompact,
+  prepareInteractionHeaders,
+} from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
 export const createChatCompletions = async (
   payload: ChatCompletionsPayload,
+  options: {
+    subagentMarker?: SubagentMarker | null
+    requestId: string
+    sessionId?: string
+    isCompact?: boolean
+  },
 ) => {
   if (!state.copilotToken) throw new Error("Copilot token not found")
 
@@ -16,18 +29,31 @@ export const createChatCompletions = async (
       && x.content?.some((x) => x.type === "image_url"),
   )
 
-  // Agent/user check for X-Initiator header
+  // Agent/user check for x-initiator header
   // Determine if any message is from an agent ("assistant" or "tool")
-  const isAgentCall = payload.messages.some((msg) =>
-    ["assistant", "tool"].includes(msg.role),
-  )
+  // Refactor `isAgentCall` logic to check only the last message in the history rather than any message. This prevents valid user messages from being incorrectly flagged as agent calls due to previous assistant history, ensuring proper credit consumption for multi-turn conversations.
+  let isAgentCall = false
+  if (payload.messages.length > 0) {
+    const lastMessage = payload.messages.at(-1)
+    if (lastMessage) {
+      isAgentCall = ["assistant", "tool"].includes(lastMessage.role)
+    }
+  }
 
-  // Build headers and add X-Initiator
+  // Build headers and add x-initiator
   const headers: Record<string, string> = {
-    ...copilotHeaders(state, enableVision),
-    "X-Initiator": isAgentCall ? "agent" : "user",
+    ...copilotHeaders(state, options.requestId, enableVision),
+    "x-initiator": isAgentCall ? "agent" : "user",
   }
 
+  prepareInteractionHeaders(
+    options.sessionId,
+    Boolean(options.subagentMarker),
+    headers,
+  )
+
+  prepareForCompact(headers, options.isCompact)
+
   const response = await fetch(`${copilotBaseUrl(state)}/chat/completions`, {
     method: "POST",
     headers,
diff --git a/src/services/copilot/create-messages.ts b/src/services/copilot/create-messages.ts
new file mode 100644
index 000000000..c32cf52a1
--- /dev/null
+++ b/src/services/copilot/create-messages.ts
@@ -0,0 +1,126 @@
+import consola from "consola"
+import { events } from "fetch-event-stream"
+
+import type {
+  AnthropicMessagesPayload,
+  AnthropicResponse,
+} from "~/routes/messages/anthropic-types"
+import type { SubagentMarker } from "~/routes/messages/subagent-marker"
+
+import {
+  copilotBaseUrl,
+  copilotHeaders,
+  prepareForCompact,
+  prepareInteractionHeaders,
+} from "~/lib/api-config"
+import { HTTPError } from "~/lib/error"
+import { state } from "~/lib/state"
+
+export type MessagesStream = ReturnType<typeof events>
+export type CreateMessagesReturn = AnthropicResponse | MessagesStream
+
+const INTERLEAVED_THINKING_BETA = "interleaved-thinking-2025-05-14"
+const allowedAnthropicBetas = new Set([
+  INTERLEAVED_THINKING_BETA,
+  "context-management-2025-06-27",
+  "advanced-tool-use-2025-11-20",
+])
+
+const buildAnthropicBetaHeader = (
+  anthropicBetaHeader: string | undefined,
+  thinking: AnthropicMessagesPayload["thinking"],
+): string | undefined => {
+  const isAdaptiveThinking = thinking?.type === "adaptive"
+
+  if (anthropicBetaHeader) {
+    const filteredBeta = anthropicBetaHeader
+      .split(",")
+      .map((item) => item.trim())
+      .filter((item) => item.length > 0)
+      .filter((item) => allowedAnthropicBetas.has(item))
+    const uniqueFilteredBetas = [...new Set(filteredBeta)]
+    const finalFilteredBetas =
+      isAdaptiveThinking ?
+        uniqueFilteredBetas.filter((item) => item !== INTERLEAVED_THINKING_BETA)
+      : uniqueFilteredBetas
+
+    if (finalFilteredBetas.length > 0) {
+      return finalFilteredBetas.join(",")
+    }
+
+    return undefined
+  }
+
+  if (thinking?.budget_tokens && !isAdaptiveThinking) {
+    return INTERLEAVED_THINKING_BETA
+  }
+
+  return undefined
+}
+
+export const createMessages = async (
+  payload: AnthropicMessagesPayload,
+  anthropicBetaHeader: string | undefined,
+  options: {
+    subagentMarker?: SubagentMarker | null
+    requestId: string
+    sessionId?: string
+    isCompact?: boolean
+  },
+): Promise<CreateMessagesReturn> => {
+  if (!state.copilotToken) throw new Error("Copilot token not found")
+
+  const enableVision = payload.messages.some(
+    (message) =>
+      Array.isArray(message.content)
+      && message.content.some((block) => block.type === "image"),
+  )
+
+  let isInitiateRequest = false
+  const lastMessage = payload.messages.at(-1)
+  if (lastMessage?.role === "user") {
+    isInitiateRequest =
+      Array.isArray(lastMessage.content) ?
+        lastMessage.content.some((block) => block.type !== "tool_result")
+      : true
+  }
+
+  const headers: Record<string, string> = {
+    ...copilotHeaders(state, options.requestId, enableVision),
+    "x-initiator": isInitiateRequest ? "user" : "agent",
+  }
+
+  prepareInteractionHeaders(
+    options.sessionId,
+    Boolean(options.subagentMarker),
+    headers,
+  )
+
+  prepareForCompact(headers, options.isCompact)
+
+  // align with vscode copilot extension anthropic-beta
+  const anthropicBeta = buildAnthropicBetaHeader(
+    anthropicBetaHeader,
+    payload.thinking,
+  )
+  if (anthropicBeta) {
+    headers["anthropic-beta"] = anthropicBeta
+  }
+
+  const response = await fetch(`${copilotBaseUrl(state)}/v1/messages`, {
+    method: "POST",
+    headers,
+    body: JSON.stringify(payload),
+  })
+
+  if (!response.ok) {
+    consola.error("Failed to create messages", response)
+    throw new HTTPError("Failed to create messages", response)
+  }
+
+  if (payload.stream) {
+    return events(response)
+  }
+
+  return (await response.json()) as AnthropicResponse
+}
diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts
new file mode 100644
index 000000000..ba50d0cc7
--- /dev/null
+++ b/src/services/copilot/create-responses.ts
@@ -0,0 +1,405 @@
+import consola from "consola"
+import { events } from "fetch-event-stream"
+
+import type { SubagentMarker } from "~/routes/messages/subagent-marker"
+
+import {
+  copilotBaseUrl,
+  copilotHeaders,
+  prepareForCompact,
+  prepareInteractionHeaders,
+} from "~/lib/api-config"
+import { HTTPError } from "~/lib/error"
+import { state } from "~/lib/state"
+
+export interface ResponsesPayload {
+  model: string
+  instructions?: string | null
+  input?: string | Array<ResponseInputItem>
+  tools?: Array<Tool> | null
+  tool_choice?: ToolChoiceOptions | ToolChoiceFunction
+  temperature?: number | null
+  top_p?: number | null
+  max_output_tokens?: number | null
+  metadata?: Metadata | null
+  stream?: boolean | null
+  safety_identifier?: string | null
+  prompt_cache_key?: string | null
+  parallel_tool_calls?: boolean | null
+  store?: boolean | null
+  reasoning?: Reasoning | null
+  context_management?: Array<ResponseContextManagementItem> | null
+  include?: Array<ResponseIncludable>
+  service_tier?: string | null // NOTE: Unsupported by GitHub Copilot
+  [key: string]: unknown
+}
+
+export type ToolChoiceOptions = "none" | "auto" | "required"
+
+export interface ToolChoiceFunction {
+  name: string
+  type: "function"
+}
+
+export type Tool = FunctionTool | Record<string, unknown>
+
+export interface FunctionTool {
+  name: string
+  parameters: { [key: string]: unknown } | null
+  strict: boolean | null
+  type: "function"
+  description?: string | null
+}
+
+export type ResponseIncludable =
+  | "file_search_call.results"
+  | "message.input_image.image_url"
+  | "computer_call_output.output.image_url"
+  | "reasoning.encrypted_content"
+  | "code_interpreter_call.outputs"
+
+export interface Reasoning {
+  effort?: "none" | "minimal" | "low" | "medium" | "high" | "xhigh" | null
+  summary?: "auto" | "concise" | "detailed" | null
+}
+
+export interface ResponseContextManagementCompactionItem {
+  type: "compaction"
+  compact_threshold: number
+}
+
+export type ResponseContextManagementItem =
+  ResponseContextManagementCompactionItem
+
+export interface ResponseInputMessage {
+  type?: "message"
+  role: "user" | "assistant" | "system" | "developer"
+  content?: string | Array<ResponseInputContent>
+  status?: string
+  phase?: "commentary" | "final_answer"
+}
+
+export interface ResponseFunctionToolCallItem {
+  type: "function_call"
+  call_id: string
+  name: string
+  arguments: string
+  status?: "in_progress" | "completed" | "incomplete"
+}
+
+export interface ResponseFunctionCallOutputItem {
+  type: "function_call_output"
+  call_id: string
+  output: string | Array<ResponseInputContent>
+  status?: "in_progress" | "completed" | "incomplete"
+}
+
+export interface ResponseInputReasoning {
+  id?: string
+  type: "reasoning"
+  summary: Array<{
+    type: "summary_text"
+    text: string
+  }>
+  encrypted_content: string
+}
+
+export interface ResponseInputCompaction {
+  id: string
+  type: "compaction"
+  encrypted_content: string
+}
+
+export type ResponseInputItem =
+  | ResponseInputMessage
+  | ResponseFunctionToolCallItem
+  | ResponseFunctionCallOutputItem
+  | ResponseInputReasoning
+  | ResponseInputCompaction
+  | Record<string, unknown>
+
+export type ResponseInputContent =
+  | ResponseInputText
+  | ResponseInputImage
+  | Record<string, unknown>
+
+export interface ResponseInputText {
+  type: "input_text" | "output_text"
+  text: string
+}
+
+export interface ResponseInputImage {
+  type: "input_image"
+  image_url?: string | null
+  file_id?: string | null
+  detail: "low" | "high" | "auto"
+}
+
+export interface ResponsesResult {
+  id: string
+  object: "response"
+  created_at: number
+  model: string
+  output: Array<ResponseOutputItem>
+  output_text: string
+  status: string
+  usage?: ResponseUsage | null
+  error: ResponseError | null
+  incomplete_details: IncompleteDetails | null
+  instructions: string | null
+  metadata: Metadata | null
+  parallel_tool_calls: boolean
+  temperature: number | null
+  tool_choice: unknown
+  tools: Array<Tool>
+  top_p: number | null
+}
+
+export type Metadata = { [key: string]: string }
+
+export interface IncompleteDetails {
+  reason?: "max_output_tokens" | "content_filter"
+}
+
+export interface ResponseError {
+  message: string
+}
+
+export type ResponseOutputItem =
+  | ResponseOutputMessage
+  | ResponseOutputReasoning
+  | ResponseOutputFunctionCall
+  | ResponseOutputCompaction
+
+export interface ResponseOutputMessage {
+  id: string
+  type: "message"
+  role: "assistant"
+  status: "completed" | "in_progress" | "incomplete"
+  content?: Array<ResponseOutputContentBlock>
+}
+
+export interface ResponseOutputReasoning {
+  id: string
+  type: "reasoning"
+  summary?: Array<ResponseReasoningBlock>
+  encrypted_content?: string
+  status?: "completed" | "in_progress" | "incomplete"
+}
+
+export interface ResponseReasoningBlock {
+  type: string
+  text?: string
+}
+
+export interface ResponseOutputFunctionCall {
+  id?: string
+  type: "function_call"
+  call_id: string
+  name: string
+  arguments: string
+  status?: "in_progress" | "completed" | "incomplete"
+}
+
+export interface ResponseOutputCompaction {
+  id: string
+  type: "compaction"
+  encrypted_content: string
+}
+
+export type ResponseOutputContentBlock =
+  | ResponseOutputText
+  | ResponseOutputRefusal
+  | Record<string, unknown>
+
+export interface ResponseOutputText {
+  type: "output_text"
+  text: string
+  annotations: Array<unknown>
+}
+
+export interface ResponseOutputRefusal {
+  type: "refusal"
+  refusal: string
+}
+
+export interface ResponseUsage {
+  input_tokens: number
+  output_tokens?: number
+  total_tokens: number
+  input_tokens_details?: {
+    cached_tokens: number
+  }
+  output_tokens_details?: {
+    reasoning_tokens: number
+  }
+}
+
+export type ResponseStreamEvent =
+  | ResponseCompletedEvent
+  | ResponseIncompleteEvent
+  | ResponseCreatedEvent
+  | ResponseErrorEvent
+  | ResponseFunctionCallArgumentsDeltaEvent
+  | ResponseFunctionCallArgumentsDoneEvent
+  | ResponseFailedEvent
+  | ResponseOutputItemAddedEvent
+  | ResponseOutputItemDoneEvent
+  | ResponseReasoningSummaryTextDeltaEvent
+  | ResponseReasoningSummaryTextDoneEvent
+  | ResponseTextDeltaEvent
+  | ResponseTextDoneEvent
+
+export interface ResponseCompletedEvent {
+  response: ResponsesResult
+  sequence_number: number
+  type: "response.completed"
+}
+
+export interface ResponseIncompleteEvent {
+  response: ResponsesResult
+  sequence_number: number
+  type: "response.incomplete"
+}
+
+export interface ResponseCreatedEvent {
+  response: ResponsesResult
+  sequence_number: number
+  type: "response.created"
+}
+
+export interface ResponseErrorEvent {
+  code: string | null
+  message: string
+  param: string | null
+  sequence_number: number
+  type: "error"
+}
+
+export interface ResponseFunctionCallArgumentsDeltaEvent {
+  delta: string
+  item_id: string
+  output_index: number
+  sequence_number: number
+  type: "response.function_call_arguments.delta"
+}
+
+export interface ResponseFunctionCallArgumentsDoneEvent {
+  arguments: string
+  item_id: string
+  name: string
+  output_index: number
+  sequence_number: number
+  type: "response.function_call_arguments.done"
+}
+
+export interface ResponseFailedEvent {
+  response: ResponsesResult
+  sequence_number: number
+  type: "response.failed"
+}
+
+export interface ResponseOutputItemAddedEvent {
+  item: ResponseOutputItem
+  output_index: number
+  sequence_number: number
+  type: "response.output_item.added"
+}
+
+export interface ResponseOutputItemDoneEvent {
+  item: ResponseOutputItem
+  output_index: number
+  sequence_number: number
+  type: "response.output_item.done"
+}
+
+export interface ResponseReasoningSummaryTextDeltaEvent {
+  delta: string
+  item_id: string
+  output_index: number
+  sequence_number: number
+  summary_index: number
+  type: "response.reasoning_summary_text.delta"
+}
+
+export interface ResponseReasoningSummaryTextDoneEvent {
+  item_id: string
+  output_index: number
+  sequence_number: number
+  summary_index: number
+  text: string
+  type: "response.reasoning_summary_text.done"
+}
+
+export interface ResponseTextDeltaEvent {
+  content_index: number
+  delta: string
+  item_id: string
+  output_index: number
+  sequence_number: number
+  type: "response.output_text.delta"
+}
+
+export interface ResponseTextDoneEvent {
+  content_index: number
+  item_id: string
+  output_index: number
+  sequence_number: number
+  text: string
+  type: "response.output_text.done"
+}
+
+export type ResponsesStream = ReturnType<typeof events>
+export type CreateResponsesReturn = ResponsesResult | ResponsesStream
+
+interface ResponsesRequestOptions {
+  vision: boolean
+  initiator: "agent" | "user"
+  subagentMarker?: SubagentMarker | null
+  requestId: string
+  sessionId?: string
+  isCompact?: boolean
+}
+
+export const createResponses = async (
+  payload: ResponsesPayload,
+  {
+    vision,
+    initiator,
+    subagentMarker,
+    requestId,
+    sessionId,
+    isCompact,
+  }: ResponsesRequestOptions,
+): Promise<CreateResponsesReturn> => {
+  if (!state.copilotToken) throw new Error("Copilot token not found")
+
+  const headers: Record<string, string> = {
+    ...copilotHeaders(state, requestId, vision),
+    "x-initiator": initiator,
+  }
+
+  prepareInteractionHeaders(sessionId, Boolean(subagentMarker), headers)
+
+  prepareForCompact(headers, isCompact)
+
+  // service_tier is not supported by github copilot
+  payload.service_tier = null
+
+  const response = await fetch(`${copilotBaseUrl(state)}/responses`, {
+    method: "POST",
+    headers,
+    body: JSON.stringify(payload),
+  })
+
+  if (!response.ok) {
+    consola.error("Failed to create responses", response)
+    throw new HTTPError("Failed to create responses", response)
+  }
+
+  if (payload.stream) {
+    return events(response)
+  }
+
+  return (await response.json()) as ResponsesResult
+}
diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts
index 3cfa30af0..cf3f184b5 100644
--- a/src/services/copilot/get-models.ts
+++ b/src/services/copilot/get-models.ts
@@ -25,9 +25,15 @@ interface ModelLimits {
 }
 
 interface ModelSupports {
+  max_thinking_budget?: number
+  min_thinking_budget?: number
   tool_calls?: boolean
   parallel_tool_calls?: boolean
   dimensions?: boolean
+  streaming?: boolean
+  structured_outputs?: boolean
+  vision?: boolean
+  adaptive_thinking?: boolean
 }
 
 interface ModelCapabilities {
@@ -52,4 +58,5 @@ export interface Model {
     state: string
     terms: string
   }
+  supported_endpoints?: Array<string>
 }
diff --git a/src/services/get-vscode-version.ts b/src/services/get-vscode-version.ts
index 6078f09b5..dfff314e9 100644
--- a/src/services/get-vscode-version.ts
+++ b/src/services/get-vscode-version.ts
@@ -1,33 +1,8 @@
-const FALLBACK = "1.104.3"
+const FALLBACK = "1.110.1"
 
 export async function getVSCodeVersion() {
-  const controller = new AbortController()
-  const timeout = setTimeout(() => {
-    controller.abort()
-  }, 5000)
-
-  try {
-    const response = await fetch(
-      "https://aur.archlinux.org/cgit/aur.git/plain/PKGBUILD?h=visual-studio-code-bin",
-      {
-        signal: controller.signal,
-      },
-    )
-
-    const pkgbuild = await response.text()
-    const pkgverRegex = /pkgver=([0-9.]+)/
-    const match = pkgbuild.match(pkgverRegex)
-
-    if (match) {
-      return match[1]
-    }
-
-    return FALLBACK
-  } catch {
-    return FALLBACK
-  } finally {
-    clearTimeout(timeout)
-  }
+  await Promise.resolve()
+  return FALLBACK
 }
 
 await getVSCodeVersion()
diff --git a/src/services/github/get-copilot-token.ts b/src/services/github/get-copilot-token.ts
index 98744bab1..9c33c038c 100644
--- a/src/services/github/get-copilot-token.ts
+++ b/src/services/github/get-copilot-token.ts
@@ -1,10 +1,10 @@
-import { GITHUB_API_BASE_URL, githubHeaders } from "~/lib/api-config"
+import { getGitHubApiBaseUrl, githubHeaders } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
 export const getCopilotToken = async () => {
   const response = await fetch(
-    `${GITHUB_API_BASE_URL}/copilot_internal/v2/token`,
+    `${getGitHubApiBaseUrl()}/copilot_internal/v2/token`,
     {
       headers: githubHeaders(state),
     },
diff --git a/src/services/github/get-copilot-usage.ts b/src/services/github/get-copilot-usage.ts
index 6cdd8bc10..1af6632e4 100644
--- a/src/services/github/get-copilot-usage.ts
+++ b/src/services/github/get-copilot-usage.ts
@@ -1,11 +1,14 @@
-import { GITHUB_API_BASE_URL, githubHeaders } from "~/lib/api-config"
+import { getGitHubApiBaseUrl, githubHeaders } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
 export const getCopilotUsage = async (): Promise<CopilotUsageResponse> => {
-  const response = await fetch(`${GITHUB_API_BASE_URL}/copilot_internal/user`, {
-    headers: githubHeaders(state),
-  })
+  const response = await fetch(
+    `${getGitHubApiBaseUrl()}/copilot_internal/user`,
+    {
+      headers: githubHeaders(state),
+    },
+  )
 
   if (!response.ok) {
     throw new HTTPError("Failed to get Copilot usage", response)
diff --git a/src/services/github/get-device-code.ts b/src/services/github/get-device-code.ts
index cf35f4ec9..79d26ba70 100644
--- a/src/services/github/get-device-code.ts
+++ b/src/services/github/get-device-code.ts
@@ -1,18 +1,16 @@
-import {
-  GITHUB_APP_SCOPES,
-  GITHUB_BASE_URL,
-  GITHUB_CLIENT_ID,
-  standardHeaders,
-} from "~/lib/api-config"
+import { getOauthAppConfig, getOauthUrls } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
 
 export async function getDeviceCode(): Promise<DeviceCodeResponse> {
-  const response = await fetch(`${GITHUB_BASE_URL}/login/device/code`, {
+  const { clientId, headers, scope } = getOauthAppConfig()
+  const { deviceCodeUrl } = getOauthUrls()
+
+  const response = await fetch(deviceCodeUrl, {
     method: "POST",
-    headers: standardHeaders(),
+    headers,
     body: JSON.stringify({
-      client_id: GITHUB_CLIENT_ID,
-      scope: GITHUB_APP_SCOPES,
+      client_id: clientId,
+      scope,
     }),
   })
 
diff --git a/src/services/github/get-user.ts b/src/services/github/get-user.ts
index 23e1b1c1c..6774c4492 100644
--- a/src/services/github/get-user.ts
+++ b/src/services/github/get-user.ts
@@ -1,9 +1,9 @@
-import { GITHUB_API_BASE_URL, standardHeaders } from "~/lib/api-config"
+import { getGitHubApiBaseUrl, standardHeaders } from "~/lib/api-config"
 import { HTTPError } from "~/lib/error"
 import { state } from "~/lib/state"
 
 export async function getGitHubUser() {
-  const response = await fetch(`${GITHUB_API_BASE_URL}/user`, {
+  const response = await fetch(`${getGitHubApiBaseUrl()}/user`, {
     headers: {
       authorization: `token ${state.githubToken}`,
       ...standardHeaders(),
diff --git a/src/services/github/poll-access-token.ts b/src/services/github/poll-access-token.ts
index 4639ee0dc..44c4a07b8 100644
--- a/src/services/github/poll-access-token.ts
+++ b/src/services/github/poll-access-token.ts
@@ -1,10 +1,6 @@
 import consola from "consola"
 
-import {
-  GITHUB_BASE_URL,
-  GITHUB_CLIENT_ID,
-  standardHeaders,
-} from "~/lib/api-config"
+import { getOauthAppConfig, getOauthUrls } from "~/lib/api-config"
 import { sleep } from "~/lib/utils"
 
 import type { DeviceCodeResponse } from "./get-device-code"
@@ -12,24 +8,24 @@ import type { DeviceCodeResponse } from "./get-device-code"
 export async function pollAccessToken(
   deviceCode: DeviceCodeResponse,
 ): Promise<string> {
+  const { clientId, headers } = getOauthAppConfig()
+  const { accessTokenUrl } = getOauthUrls()
+
   // Interval is in seconds, we need to multiply by 1000 to get milliseconds
   // I'm also adding another second, just to be safe
   const sleepDuration = (deviceCode.interval + 1) * 1000
   consola.debug(`Polling access token with interval of ${sleepDuration}ms`)
 
   while (true) {
-    const response = await fetch(
-      `${GITHUB_BASE_URL}/login/oauth/access_token`,
-      {
-        method: "POST",
-        headers: standardHeaders(),
-        body: JSON.stringify({
-          client_id: GITHUB_CLIENT_ID,
-          device_code: deviceCode.device_code,
-          grant_type: "urn:ietf:params:oauth:grant-type:device_code",
-        }),
-      },
-    )
+    const response = await fetch(accessTokenUrl, {
+      method: "POST",
+      headers,
+      body: JSON.stringify({
+        client_id: clientId,
+        device_code: deviceCode.device_code,
+        grant_type: "urn:ietf:params:oauth:grant-type:device_code",
+      }),
+    })
 
     if (!response.ok) {
       await sleep(sleepDuration)
diff --git a/src/start.ts b/src/start.ts
index 14abbbdff..037244a4e 100644
--- a/src/start.ts
+++ b/src/start.ts
@@ -6,13 +6,18 @@ import consola from "consola"
 import { serve, type ServerHandler } from "srvx"
 import invariant from "tiny-invariant"
 
+import { mergeConfigWithDefaults } from "./lib/config"
 import { ensurePaths } from "./lib/paths"
 import { initProxyFromEnv } from "./lib/proxy"
 import { generateEnvScript } from "./lib/shell"
 import { state } from "./lib/state"
 import { setupCopilotToken, setupGitHubToken } from "./lib/token"
-import { cacheModels, cacheVSCodeVersion } from "./lib/utils"
-import { server } from "./server"
+import {
+  cacheMacMachineId,
+  cacheModels,
+  cacheVSCodeVersion,
+  cacheVsCodeSessionId,
+} from "./lib/utils"
 
 interface RunServerOptions {
   port: number
@@ -28,10 +33,14 @@ interface RunServerOptions {
 }
 
 export async function runServer(options: RunServerOptions): Promise<void> {
+  // Ensure config is merged with defaults at startup
+  mergeConfigWithDefaults()
+
   if (options.proxyEnv) {
     initProxyFromEnv()
   }
 
+  state.verbose = options.verbose
   if (options.verbose) {
     consola.level = 5
     consola.info("Verbose logging enabled")
@@ -49,6 +58,8 @@ export async function runServer(options: RunServerOptions): Promise<void> {
 
   await ensurePaths()
   await cacheVSCodeVersion()
+  cacheMacMachineId()
+  cacheVsCodeSessionId()
 
   if (options.githubToken) {
     state.githubToken = options.githubToken
@@ -114,9 +125,14 @@ export async function runServer(options: RunServerOptions): Promise<void> {
     `🌐 Usage Viewer: https://ericc-ch.github.io/copilot-api?endpoint=${serverUrl}/usage`,
   )
 
+  const { server } = await import("./server")
+
   serve({
     fetch: server.fetch as ServerHandler,
     port: options.port,
+    bun: {
+      idleTimeout: 0,
+    },
   })
 }
 
diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts
index 06c663778..89f240f3c 100644
--- a/tests/anthropic-request.test.ts
+++ b/tests/anthropic-request.test.ts
@@ -136,6 +136,7 @@ describe("Anthropic to OpenAI translation logic", () => {
             {
               type: "thinking",
               thinking: "Let me think about this simple math problem...",
+              signature: "abc123",
             },
             { type: "text", text: "2+2 equals 4." },
           ],
@@ -168,6 +169,7 @@ describe("Anthropic to OpenAI translation logic", () => {
               type: "thinking",
               thinking:
                 "I need to call the weather API to get current weather information.",
+              signature: "def456",
             },
             { type: "text", text: "I'll check the weather for you." },
             {
diff --git a/tests/create-chat-completions.test.ts b/tests/create-chat-completions.test.ts
index d18e741aa..369f6d2cc 100644
--- a/tests/create-chat-completions.test.ts
+++ b/tests/create-chat-completions.test.ts
@@ -23,7 +23,7 @@ const fetchMock = mock(
 // @ts-expect-error - Mock fetch doesn't implement all fetch properties
 ;(globalThis as unknown as { fetch: typeof fetch }).fetch = fetchMock
 
-test("sets X-Initiator to agent if tool/assistant present", async () => {
+test("sets x-initiator to agent if tool/assistant present", async () => {
   const payload: ChatCompletionsPayload = {
     messages: [
       { role: "user", content: "hi" },
@@ -31,15 +31,15 @@ test("sets X-Initiator to agent if tool/assistant present", async () => {
     ],
     model: "gpt-test",
   }
-  await createChatCompletions(payload)
+  await createChatCompletions(payload, { requestId: "1" })
   expect(fetchMock).toHaveBeenCalled()
   const headers = (
     fetchMock.mock.calls[0][1] as { headers: Record<string, string> }
   ).headers
-  expect(headers["X-Initiator"]).toBe("agent")
+  expect(headers["x-initiator"]).toBe("agent")
 })
 
-test("sets X-Initiator to user if only user present", async () => {
+test("sets x-initiator to user if only user present", async () => {
   const payload: ChatCompletionsPayload = {
     messages: [
       { role: "user", content: "hi" },
@@ -47,10 +47,10 @@ test("sets X-Initiator to user if only user present", async () => {
     ],
     model: "gpt-test",
   }
-  await createChatCompletions(payload)
+  await createChatCompletions(payload, { requestId: "1" })
   expect(fetchMock).toHaveBeenCalled()
   const headers = (
     fetchMock.mock.calls[1][1] as { headers: Record<string, string> }
   ).headers
-  expect(headers["X-Initiator"]).toBe("user")
+  expect(headers["x-initiator"]).toBe("user")
 })
diff --git a/tests/responses-stream-translation.test.ts b/tests/responses-stream-translation.test.ts
new file mode 100644
index 000000000..885ac9113
--- /dev/null
+++ b/tests/responses-stream-translation.test.ts
@@ -0,0 +1,140 @@
+import { describe, expect, test } from "bun:test"
+
+import type { AnthropicStreamEventData } from "~/routes/messages/anthropic-types"
+import type {
+  ResponseOutputItemAddedEvent,
+  ResponseFunctionCallArgumentsDeltaEvent,
+  ResponseFunctionCallArgumentsDoneEvent,
+} from "~/services/copilot/create-responses"
+
+import {
+  createResponsesStreamState,
+  translateResponsesStreamEvent,
+} from "~/routes/messages/responses-stream-translation"
+
+const createFunctionCallAddedEvent = (): ResponseOutputItemAddedEvent => ({
+  type: "response.output_item.added",
+  sequence_number: 1,
+  output_index: 1,
+  item: {
+    id: "item-1",
+    type: "function_call",
+    call_id: "call-1",
+    name: "TodoWrite",
+    arguments: "",
+    status: "in_progress",
+  },
+})
+
+describe("translateResponsesStreamEvent tool calls", () => {
+  test("streams function call arguments across deltas", () => {
+    const state = createResponsesStreamState()
+
+    const events = [
+      translateResponsesStreamEvent(createFunctionCallAddedEvent(), state),
+      translateResponsesStreamEvent(
+        {
+          type: "response.function_call_arguments.delta",
+          item_id: "item-1",
+          output_index: 1,
+          sequence_number: 2,
+          delta: '{"todos":',
+        } as ResponseFunctionCallArgumentsDeltaEvent,
+        state,
+      ),
+      translateResponsesStreamEvent(
+        {
+          type: "response.function_call_arguments.delta",
+          item_id: "item-1",
+          output_index: 1,
+          sequence_number: 3,
+          delta: "[]}",
+        } as ResponseFunctionCallArgumentsDeltaEvent,
+        state,
+      ),
+      translateResponsesStreamEvent(
+        {
+          type: "response.function_call_arguments.done",
+          item_id: "item-1",
+          name: "TodoWrite",
+          output_index: 1,
+          sequence_number: 4,
+          arguments: '{"todos":[]}',
+        } as ResponseFunctionCallArgumentsDoneEvent,
+        state,
+      ),
+    ].flat()
+
+    const blockStart = events.find(
+      (event) => event.type === "content_block_start",
+    )
+    expect(blockStart).toBeDefined()
+    if (blockStart?.type === "content_block_start") {
+      expect(blockStart.content_block).toEqual({
+        type: "tool_use",
+        id: "call-1",
+        name: "TodoWrite",
+        input: {},
+      })
+    }
+
+    const deltas = events.filter(
+      (
+        event,
+      ): event is Extract<
+        AnthropicStreamEventData,
+        { type: "content_block_delta" }
+      > => event.type === "content_block_delta",
+    )
+    expect(deltas).toHaveLength(2)
+    expect(deltas[0].delta).toEqual({
+      type: "input_json_delta",
+      partial_json: '{"todos":',
+    })
+    expect(deltas[1].delta).toEqual({
+      type: "input_json_delta",
+      partial_json: "[]}",
+    })
+
+    expect(state.openBlocks.size).toBe(1)
+    expect(state.functionCallStateByOutputIndex.size).toBe(0)
+  })
+
+  test("emits full arguments when only done payload is present", () => {
+    const state = createResponsesStreamState()
+
+    const events = [
+      translateResponsesStreamEvent(createFunctionCallAddedEvent(), state),
+      translateResponsesStreamEvent(
+        {
+          type: "response.function_call_arguments.done",
+          item_id: "item-1",
+          name: "TodoWrite",
+          output_index: 1,
+          sequence_number: 2,
+          arguments:
+            '{"todos":[{"content":"Review src/routes/responses/translation.ts"}]}',
+        } as ResponseFunctionCallArgumentsDoneEvent,
+        state,
+      ),
+    ].flat()
+
+    const deltas = events.filter(
+      (
+        event,
+      ): event is Extract<
+        AnthropicStreamEventData,
+        { type: "content_block_delta" }
+      > => event.type === "content_block_delta",
+    )
+    expect(deltas).toHaveLength(1)
+    expect(deltas[0].delta).toEqual({
+      type: "input_json_delta",
+      partial_json:
+        '{"todos":[{"content":"Review src/routes/responses/translation.ts"}]}',
+    })
+
+    expect(state.openBlocks.size).toBe(1)
+    expect(state.functionCallStateByOutputIndex.size).toBe(0)
+  })
+})
diff --git a/tests/responses-translation.test.ts b/tests/responses-translation.test.ts
new file mode 100644
index 000000000..8d1804e9d
--- /dev/null
+++ b/tests/responses-translation.test.ts
@@ -0,0 +1,197 @@
+import { describe, expect, it } from "bun:test"
+
+import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
+import type {
+  ResponseInputMessage,
+  ResponsesResult,
+} from "~/services/copilot/create-responses"
+
+import {
+  translateAnthropicMessagesToResponsesPayload,
+  translateResponsesResultToAnthropic,
+} from "~/routes/messages/responses-translation"
+
+const samplePayload = {
+  model: "claude-3-5-sonnet",
+  max_tokens: 1024,
+  messages: [
+    {
+      role: "user",
+      content: [
+        {
+          type: "text",
+          text: "<system-reminder>\nThis is a reminder that your todo list is currently empty. DO NOT mention this to the user explicitly because they are already aware. If you are working on tasks that would benefit from a todo list please use the TodoWrite tool to create one. If not, please feel free to ignore. Again do not mention this message to the user.\n</system-reminder>",
+        },
+        {
+          type: "text",
+          text: "<system-reminder>\nAs you answer the user's questions, you can use the following context:\n# important-instruction-reminders\nDo what has been asked; nothing more, nothing less.\nNEVER create files unless they're absolutely necessary for achieving your goal.\nALWAYS prefer editing an existing file to creating a new one.\nNEVER proactively create documentation files (*.md) or README files. Only create documentation files if explicitly requested by the User.\n\n      \n      IMPORTANT: this context may or may not be relevant to your tasks. You should not respond to this context unless it is highly relevant to your task.\n</system-reminder>",
+        },
+        {
+          type: "text",
+          text: "hi",
+        },
+        {
+          type: "text",
+          text: "<system-reminder>\nThe user opened the file c:\\Work2\\copilot-api\\src\\routes\\responses\\translation.ts in the IDE. This may or may not be related to the current task.\n</system-reminder>",
+        },
+        {
+          type: "text",
+          text: "hi",
+          cache_control: {
+            type: "ephemeral",
+          },
+        },
+      ],
+    },
+  ],
+} as unknown as AnthropicMessagesPayload
+
+const jsonStyleUserId = JSON.stringify({
+  device_id: "3f4a1b7c8d9e0f1234567890abcdef1234567890abcdef1234567890abcdef12",
+  account_uuid: "",
+  session_id: "2c4e1cf0-7a67-4d2e-9a4b-1d16d3f44752",
+})
+
+const legacyStyleUserId =
+  "user_8b7e2c1d4f6a9b3c0d1e2f3456789abcdeffedcba9876543210fedcba1234567_account__session_7d0e2f61-4b5c-4a9d-8f11-2c3d4e5f6a7b"
+
+describe("translateAnthropicMessagesToResponsesPayload", () => {
+  it("converts anthropic text blocks into response input messages", () => {
+    const result = translateAnthropicMessagesToResponsesPayload(samplePayload)
+
+    expect(Array.isArray(result.input)).toBe(true)
+    const input = result.input as Array<ResponseInputMessage>
+    expect(input).toHaveLength(1)
+
+    const message = input[0]
+    expect(message.role).toBe("user")
+    expect(Array.isArray(message.content)).toBe(true)
+
+    const content = message.content as Array<{ text: string }>
+    expect(content.map((item) => item.text)).toEqual([
+      "<system-reminder>\nThis is a reminder that your todo list is currently empty. DO NOT mention this to the user explicitly because they are already aware. If you are working on tasks that would benefit from a todo list please use the TodoWrite tool to create one. If not, please feel free to ignore. Again do not mention this message to the user.\n</system-reminder>",
+      "<system-reminder>\nAs you answer the user's questions, you can use the following context:\n# important-instruction-reminders\nDo what has been asked; nothing more, nothing less.\nNEVER create files unless they're absolutely necessary for achieving your goal.\nALWAYS prefer editing an existing file to creating a new one.\nNEVER proactively create documentation files (*.md) or README files. Only create documentation files if explicitly requested by the User.\n\n      \n      IMPORTANT: this context may or may not be relevant to your tasks. You should not respond to this context unless it is highly relevant to your task.\n</system-reminder>",
+      "hi",
+      "<system-reminder>\nThe user opened the file c:\\Work2\\copilot-api\\src\\routes\\responses\\translation.ts in the IDE. This may or may not be related to the current task.\n</system-reminder>",
+      "hi",
+    ])
+  })
+
+  it("extracts identifiers from JSON-like user_id metadata", () => {
+    const result = translateAnthropicMessagesToResponsesPayload({
+      ...samplePayload,
+      metadata: {
+        user_id: jsonStyleUserId,
+      },
+    })
+
+    expect(result.safety_identifier).toBe(
+      "3f4a1b7c8d9e0f1234567890abcdef1234567890abcdef1234567890abcdef12",
+    )
+    expect(result.prompt_cache_key).toBe("2c4e1cf0-7a67-4d2e-9a4b-1d16d3f44752")
+  })
+
+  it("keeps legacy user_id parsing before JSON fallback", () => {
+    const result = translateAnthropicMessagesToResponsesPayload({
+      ...samplePayload,
+      metadata: {
+        user_id: legacyStyleUserId,
+      },
+    })
+
+    expect(result.safety_identifier).toBe(
+      "8b7e2c1d4f6a9b3c0d1e2f3456789abcdeffedcba9876543210fedcba1234567",
+    )
+    expect(result.prompt_cache_key).toBe("7d0e2f61-4b5c-4a9d-8f11-2c3d4e5f6a7b")
+  })
+})
+
+describe("translateResponsesResultToAnthropic", () => {
+  it("handles reasoning and function call items", () => {
+    const responsesResult: ResponsesResult = {
+      id: "resp_123",
+      object: "response",
+      created_at: 0,
+      model: "gpt-4.1",
+      output: [
+        {
+          id: "reason_1",
+          type: "reasoning",
+          summary: [{ type: "summary_text", text: "Thinking about the task." }],
+          status: "completed",
+          encrypted_content: "encrypted_reasoning_content",
+        },
+        {
+          id: "call_1",
+          type: "function_call",
+          call_id: "call_1",
+          name: "TodoWrite",
+          arguments:
+            '{"todos":[{"content":"Read src/routes/responses/translation.ts","status":"in_progress"}]}',
+          status: "completed",
+        },
+        {
+          id: "message_1",
+          type: "message",
+          role: "assistant",
+          status: "completed",
+          content: [
+            {
+              type: "output_text",
+              text: "Added the task to your todo list.",
+              annotations: [],
+            },
+          ],
+        },
+      ],
+      output_text: "Added the task to your todo list.",
+      status: "incomplete",
+      usage: {
+        input_tokens: 120,
+        output_tokens: 36,
+        total_tokens: 156,
+      },
+      error: null,
+      incomplete_details: { reason: "content_filter" },
+      instructions: null,
+      metadata: null,
+      parallel_tool_calls: false,
+      temperature: null,
+      tool_choice: null,
+      tools: [],
+      top_p: null,
+    }
+
+    const anthropicResponse =
+      translateResponsesResultToAnthropic(responsesResult)
+
+    expect(anthropicResponse.stop_reason).toBe("end_turn")
+    expect(anthropicResponse.content).toHaveLength(3)
+
+    const [thinkingBlock, toolUseBlock, textBlock] = anthropicResponse.content
+
+    expect(thinkingBlock.type).toBe("thinking")
+    if (thinkingBlock.type === "thinking") {
+      expect(thinkingBlock.thinking).toContain("Thinking about the task")
+    }
+
+    expect(toolUseBlock.type).toBe("tool_use")
+    if (toolUseBlock.type === "tool_use") {
+      expect(toolUseBlock.id).toBe("call_1")
+      expect(toolUseBlock.name).toBe("TodoWrite")
+      expect(toolUseBlock.input).toEqual({
+        todos: [
+          {
+            content: "Read src/routes/responses/translation.ts",
+            status: "in_progress",
+          },
+        ],
+      })
+    }
+
+    expect(textBlock.type).toBe("text")
+    if (textBlock.type === "text") {
+      expect(textBlock.text).toBe("Added the task to your todo list.")
+    }
+  })
+})
diff --git a/tests/utils.test.ts b/tests/utils.test.ts
new file mode 100644
index 000000000..00de3673f
--- /dev/null
+++ b/tests/utils.test.ts
@@ -0,0 +1,98 @@
+import type { Context } from "hono"
+
+import { expect, test } from "bun:test"
+import { createHash, randomUUID } from "node:crypto"
+
+import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
+
+import { getRootSessionId, getUUID } from "../src/lib/utils"
+
+const jsonStyleUserId = JSON.stringify({
+  device_id: "3f4a1b7c8d9e0f1234567890abcdef1234567890abcdef1234567890abcdef12",
+  account_uuid: "",
+  session_id: "2c4e1cf0-7a67-4d2e-9a4b-1d16d3f44752",
+})
+
+const legacyStyleUserId =
+  "user_8b7e2c1d4f6a9b3c0d1e2f3456789abcdeffedcba9876543210fedcba1234567_account__session_7d0e2f61-4b5c-4a9d-8f11-2c3d4e5f6a7b"
+
+const getLegacyUUID = (content: string): string => {
+  const hash32 = createHash("sha256").update(content).digest("hex").slice(0, 32)
+  return `${hash32.slice(0, 8)}-${hash32.slice(8, 12)}-${hash32.slice(12, 16)}-${hash32.slice(16, 20)}-${hash32.slice(20)}`
+}
+
+test("getUUID returns a deterministic standards-compliant UUIDv4", () => {
+  const uuid = getUUID("hello world")
+
+  expect(uuid).toBe("b94d27b9-934d-4e08-a52e-52d7da7dabfa")
+  expect(uuid).toMatch(
+    /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/,
+  )
+  expect(getUUID("hello world")).toBe(uuid)
+  expect(getUUID("hello world!")).not.toBe(uuid)
+})
+
+test("prints randomUUID and deterministic UUID for comparison", () => {
+  const input = "hello world"
+  const random = randomUUID()
+  const legacy = getLegacyUUID(input)
+  const derived = getUUID(input)
+  const derivedAgain = getUUID(input)
+
+  console.info(`randomUUID(): ${random}`)
+  console.info(`legacy getUUID(${JSON.stringify(input)}): ${legacy}`)
+  console.info(`getUUID(${JSON.stringify(input)}): ${derived}`)
+  console.info(`getUUID(${JSON.stringify(input)}) again: ${derivedAgain}`)
+
+  expect(random).toMatch(
+    /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/,
+  )
+  expect(derived).toMatch(
+    /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/,
+  )
+  expect(legacy).toBe("b94d27b9-934d-3e08-a52e-52d7da7dabfa")
+  expect(derived).toBe("b94d27b9-934d-4e08-a52e-52d7da7dabfa")
+  expect(derivedAgain).toBe(derived)
+  expect(legacy).not.toBe(derived)
+  expect(random).not.toBe(derived)
+})
+
+test("getRootSessionId supports JSON-like user_id metadata", () => {
+  const anthropicPayload = {
+    model: "claude-3-5-sonnet",
+    messages: [],
+    max_tokens: 0,
+    metadata: {
+      user_id: jsonStyleUserId,
+    },
+  } as AnthropicMessagesPayload
+  const context = {
+    req: {
+      header: (_name: string) => undefined,
+    },
+  } as unknown as Context
+
+  expect(getRootSessionId(anthropicPayload, context)).toBe(
+    getUUID("2c4e1cf0-7a67-4d2e-9a4b-1d16d3f44752"),
+  )
+})
+
+test("getRootSessionId keeps legacy parsing before JSON fallback", () => {
+  const anthropicPayload = {
+    model: "claude-3-5-sonnet",
+    messages: [],
+    max_tokens: 0,
+    metadata: {
+      user_id: legacyStyleUserId,
+    },
+  } as AnthropicMessagesPayload
+  const context = {
+    req: {
+      header: (_name: string) => undefined,
+    },
+  } as unknown as Context
+
+  expect(getRootSessionId(anthropicPayload, context)).toBe(
+    getUUID("7d0e2f61-4b5c-4a9d-8f11-2c3d4e5f6a7b"),
+  )
+})