docs: document reasoning and thinking translation, add handler tests

lyzgeorge · claude · lyzgeorge · commit 34927088a601 · 2026-04-13T23:57:40.000+08:00
Add a Reasoning &amp; Extended Thinking section to the README, highlight the feature in the intro and features list, and cover the capability gating with new handler tests for the Anthropic /v1/messages surface and additional cases for /v1/chat/completions.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/README.md b/README.md
@@ -1,5 +1,7 @@
 # Copilot API Proxy
 
+**One Copilot subscription. Every frontier reasoning model. OpenAI and Anthropic shaped.** Point Claude Code, Cline, or your own scripts at a single localhost URL and unlock Claude Sonnet 4.6, GPT-5, Gemini, and friends — with real reasoning traces and thinking budgets routed to whichever knob the upstream model actually supports.
+
 > [!WARNING]
 > This is a reverse-engineered proxy of GitHub Copilot API. It is not supported by GitHub, and may break unexpectedly. Use at your own risk.
 
@@ -32,6 +34,7 @@ A reverse-engineered proxy for the GitHub Copilot API that exposes it as an Open
 ## Features
 
 - **OpenAI & Anthropic Compatibility**: Exposes GitHub Copilot as an OpenAI-compatible (`/v1/chat/completions`, `/v1/models`, `/v1/embeddings`) and Anthropic-compatible (`/v1/messages`) API.
+- **Reasoning & Extended Thinking**: Capability-aware translation of `reasoning_effort` and Anthropic `thinking` blocks. Thinking traces, signatures, and `reasoning_opaque` tokens flow through both non-streaming and streaming responses without you having to know which upstream flag each model wants.
 - **Claude Code Integration**: Easily configure and launch [Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview) to use Copilot as its backend with a simple command-line flag (`--claude-code`).
 - **Usage Dashboard**: A web-based dashboard to monitor your Copilot API usage, view quotas, and see detailed statistics.
 - **Rate Limit Control**: Manage API usage with rate-limiting options (`--rate-limit`) and a waiting mechanism (`--wait`) to prevent errors from rapid requests.
@@ -278,6 +281,56 @@ The dashboard provides a user-friendly interface to view your Copilot usage data
 - **URL-based Configuration**: You can also specify the API endpoint directly in the URL using a query parameter. This is useful for bookmarks or sharing links. For example:
   `https://ericc-ch.github.io/copilot-api?endpoint=http://your-api-server/usage`
 
+## Reasoning & Extended Thinking
+
+Each Copilot model advertises its own reasoning knobs under `capabilities.supports`. The proxy reads them at startup and translates requests accordingly, so the same client call works across Claude, GPT, Gemini, and friends.
+
+### OpenAI-shaped requests (`/v1/chat/completions`)
+
+- `reasoning_effort` (`low` | `medium` | `high`, plus `minimal` for GPT-5 family) is passed through to any model whose `supports.reasoning_effort` is non-empty. Other models get it stripped.
+- `thinking_budget` is passed through only when the model advertises `supports.adaptive_thinking` (currently Claude Sonnet 4.5+/4.6, Opus 4.6). Unsupported models silently drop it.
+- Claude reasoning responses surface as `reasoning_text` and `reasoning_opaque` on the assistant message.
+
+```sh
+# GPT-5 mini with heavy reasoning
+curl http://localhost:4141/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-5-mini",
+    "reasoning_effort": "high",
+    "messages": [{"role": "user", "content": "Think carefully: what is 17*23?"}]
+  }'
+
+# Claude Sonnet 4.6 with an explicit thinking budget
+curl http://localhost:4141/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "claude-sonnet-4.6",
+    "reasoning_effort": "high",
+    "thinking_budget": 2048,
+    "messages": [{"role": "user", "content": "Think carefully: what is 17*23?"}]
+  }'
+```
+
+### Anthropic-shaped requests (`/v1/messages`)
+
+- `thinking: {"type": "enabled", "budget_tokens": N}` is translated into `reasoning_effort: "high"` for any reasoning-capable model, plus `thinking_budget` for adaptive-thinking models.
+- `thinking: {"type": "disabled"}` suppresses both fields upstream.
+- If the selected model supports neither knob, the thinking config is silently stripped and logged at debug level — the request still succeeds.
+- Claude thinking streams emit `content_block_start` / `thinking_delta` / `signature_delta` / `content_block_stop` events before the text block, so Claude Code and similar clients see native thinking UIs.
+
+```sh
+# Extended thinking via the Anthropic surface
+curl http://localhost:4141/v1/messages \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "claude-sonnet-4.6",
+    "max_tokens": 1024,
+    "thinking": {"type": "enabled", "budget_tokens": 2048},
+    "messages": [{"role": "user", "content": "Think carefully: what is 17*23?"}]
+  }'
+```
+
 ## Using with Claude Code
 
 This proxy can be used to power [Claude Code](https://docs.anthropic.com/en/claude-code), an experimental conversational AI assistant for developers from Anthropic.
diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts
@@ -3,8 +3,36 @@ import { z } from "zod"
 
 import type { AnthropicMessagesPayload } from "~/routes/messages/anthropic-types"
 
+import type { Model } from "../src/services/copilot/get-models"
+
 import { translateToOpenAI } from "../src/routes/messages/non-stream-translation"
-import { buildAnthropicReasoningContext } from "../src/routes/reasoning-context"
+import {
+  buildAnthropicReasoningContext,
+  buildOpenAIReasoningContext,
+} from "../src/routes/reasoning-context"
+
+function makeModel(
+  id: string,
+  supports: Model["capabilities"]["supports"],
+): Model {
+  return {
+    id,
+    model_picker_enabled: true,
+    name: id,
+    object: "model",
+    preview: false,
+    vendor: "test",
+    version: "1",
+    capabilities: {
+      family: id,
+      limits: {},
+      object: "model_capabilities",
+      supports,
+      tokenizer: "test",
+      type: "chat",
+    },
+  }
+}
 
 const disabledReasoningContext = {
   reasoningEffort: undefined,
@@ -364,6 +392,97 @@ describe("reasoning context helpers", () => {
     })
   })
 
+  test("reasoning_effort-only model gets reasoning_effort but no thinking_budget", () => {
+    expect(
+      buildAnthropicReasoningContext(
+        {
+          model: "gpt-5-mini",
+          messages: [],
+          max_tokens: 1024,
+          thinking: { type: "enabled", budget_tokens: 2048 },
+        },
+        makeModel("gpt-5-mini", {
+          reasoning_effort: ["low", "medium", "high"],
+        }),
+      ),
+    ).toEqual({
+      reasoningEffort: "high",
+      thinkingBudget: undefined,
+    })
+  })
+
+  test("disabled thinking returns an empty context regardless of capability", () => {
+    expect(
+      buildAnthropicReasoningContext(
+        {
+          model: "claude-sonnet-4.6",
+          messages: [],
+          max_tokens: 1024,
+          thinking: { type: "disabled" },
+        },
+        makeModel("claude-sonnet-4.6", {
+          adaptive_thinking: true,
+          reasoning_effort: ["low", "medium", "high"],
+        }),
+      ),
+    ).toEqual({})
+  })
+
+  test("buildOpenAIReasoningContext keeps supported fields and drops unsupported ones", () => {
+    const claudeModel = makeModel("claude-sonnet-4.6", {
+      adaptive_thinking: true,
+      reasoning_effort: ["low", "medium", "high"],
+    })
+    expect(
+      buildOpenAIReasoningContext(
+        {
+          model: "claude-sonnet-4.6",
+          messages: [],
+          reasoning_effort: "high",
+          thinking_budget: 2048,
+        },
+        claudeModel,
+      ),
+    ).toEqual({
+      reasoningEffort: "high",
+      thinkingBudget: 2048,
+    })
+
+    const gptModel = makeModel("gpt-5-mini", {
+      reasoning_effort: ["low", "medium", "high"],
+    })
+    expect(
+      buildOpenAIReasoningContext(
+        {
+          model: "gpt-5-mini",
+          messages: [],
+          reasoning_effort: "high",
+          thinking_budget: 2048,
+        },
+        gptModel,
+      ),
+    ).toEqual({
+      reasoningEffort: "high",
+      thinkingBudget: undefined,
+    })
+
+    const plainModel = makeModel("gpt-4o", {})
+    expect(
+      buildOpenAIReasoningContext(
+        {
+          model: "gpt-4o",
+          messages: [],
+          reasoning_effort: "high",
+          thinking_budget: 2048,
+        },
+        plainModel,
+      ),
+    ).toEqual({
+      reasoningEffort: undefined,
+      thinkingBudget: undefined,
+    })
+  })
+
   test("unsupported model does not expose Anthropic adaptive thinking fields", () => {
     expect(
       buildAnthropicReasoningContext(
diff --git a/tests/chat-completions-handler.test.ts b/tests/chat-completions-handler.test.ts
@@ -200,4 +200,92 @@ describe("handleCompletion reasoning normalization", () => {
       "gpt-adaptive",
     )
   })
+
+  test("reasoning_effort-only model keeps reasoning_effort and drops thinking_budget", async () => {
+    state.models = {
+      object: "list",
+      data: [
+        {
+          id: "gpt-reasoning",
+          name: "GPT Reasoning",
+          object: "model",
+          model_picker_enabled: true,
+          preview: false,
+          vendor: "openai",
+          version: "1",
+          capabilities: {
+            family: "gpt",
+            object: "model_capabilities",
+            tokenizer: "gpt",
+            type: "chat",
+            supports: {
+              reasoning_effort: ["low", "medium", "high"],
+            },
+            limits: {
+              max_output_tokens: 4096,
+            },
+          },
+        },
+      ],
+    }
+
+    const payload = {
+      messages: [{ role: "user", content: "hello" }],
+      model: "gpt-reasoning",
+      reasoning_effort: "high",
+      thinking_budget: 2048,
+    } satisfies ChatCompletionsPayload
+
+    await handleCompletion(createContext(payload))
+
+    expect(fetchMock).toHaveBeenCalledTimes(1)
+    const body = getLastRequestBody()
+    expect(body.reasoning_effort).toBe("high")
+    expect(body.thinking_budget).toBeUndefined()
+    expect(debugMock).toHaveBeenCalledWith(
+      "Dropping unsupported OpenAI thinking_budget for model:",
+      "gpt-reasoning",
+    )
+  })
+
+  test("plain model without reasoning capabilities drops both fields", async () => {
+    state.models = {
+      object: "list",
+      data: [
+        {
+          id: "gpt-4o",
+          name: "GPT-4o",
+          object: "model",
+          model_picker_enabled: true,
+          preview: false,
+          vendor: "openai",
+          version: "1",
+          capabilities: {
+            family: "gpt",
+            object: "model_capabilities",
+            tokenizer: "gpt",
+            type: "chat",
+            supports: {},
+            limits: {
+              max_output_tokens: 4096,
+            },
+          },
+        },
+      ],
+    }
+
+    const payload = {
+      messages: [{ role: "user", content: "hello" }],
+      model: "gpt-4o",
+      reasoning_effort: "high",
+      thinking_budget: 2048,
+    } satisfies ChatCompletionsPayload
+
+    await handleCompletion(createContext(payload))
+
+    expect(fetchMock).toHaveBeenCalledTimes(1)
+    const body = getLastRequestBody()
+    expect(body.reasoning_effort).toBeUndefined()
+    expect(body.thinking_budget).toBeUndefined()
+  })
 })
diff --git a/tests/messages-handler.test.ts b/tests/messages-handler.test.ts