Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -184,11 +184,12 @@ The server exposes several endpoints to interact with the Copilot API. It provid

These endpoints mimic the OpenAI API structure.

| Endpoint | Method | Description |
| --------------------------- | ------ | --------------------------------------------------------- |
| `POST /v1/chat/completions` | `POST` | Creates a model response for the given chat conversation. |
| `GET /v1/models` | `GET` | Lists the currently available models. |
| `POST /v1/embeddings` | `POST` | Creates an embedding vector representing the input text. |
| Endpoint | Method | Description |
| --------------------------- | ------ | ---------------------------------------------------------------- |
| `POST /v1/responses` | `POST` | Most advanced interface for generating model responses. |
| `POST /v1/chat/completions` | `POST` | Creates a model response for the given chat conversation. |
| `GET /v1/models` | `GET` | Lists the currently available models. |
| `POST /v1/embeddings` | `POST` | Creates an embedding vector representing the input text. |

### Anthropic Compatible Endpoints

Expand Down
1 change: 1 addition & 0 deletions src/routes/messages/anthropic-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ export interface AnthropicToolUseBlock {
export interface AnthropicThinkingBlock {
type: "thinking"
thinking: string
signature: string
}

export type AnthropicUserContentBlock =
Expand Down
149 changes: 144 additions & 5 deletions src/routes/messages/responses-stream-translation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export interface ResponsesStreamState {
currentResponseId?: string
currentModel?: string
initialInputTokens?: number
initialInputCachedTokens?: number
functionCallStateByOutputIndex: Map<number, FunctionCallStreamState>
functionCallOutputIndexByItemId: Map<string, number>
}
Expand Down Expand Up @@ -49,12 +50,18 @@ export const translateResponsesStreamEvent = (
return handleResponseCreated(rawEvent, state)
}

case "response.reasoning_summary_text.delta":
case "response.reasoning_summary_text.delta": {
return handleReasoningSummaryTextDelta(rawEvent, state)
}

case "response.output_text.delta": {
return handleOutputTextDelta(rawEvent, state)
}

case "response.reasoning_summary_part.done":
case "response.reasoning_summary_part.done": {
return handleReasoningSummaryPartDone(rawEvent, state)
}

case "response.output_text.done": {
return handleOutputTextDone(rawEvent, state)
}
Expand All @@ -63,6 +70,10 @@ export const translateResponsesStreamEvent = (
return handleOutputItemAdded(rawEvent, state)
}

case "response.output_item.done": {
return handleOutputItemDone(rawEvent, state)
}

case "response.function_call_arguments.delta": {
return handleFunctionCallArgumentsDelta(rawEvent, state)
}
Expand Down Expand Up @@ -143,6 +154,46 @@ const handleOutputItemAdded = (
return events
}

const handleOutputItemDone = (
rawEvent: Record<string, unknown>,
state: ResponsesStreamState,
): Array<AnthropicStreamEventData> => {
const events = ensureMessageStart(state)

const item = isRecord(rawEvent.item) ? rawEvent.item : undefined
if (!item) {
return events
}

const itemType = typeof item.type === "string" ? item.type : undefined
if (itemType !== "reasoning") {
return events
}

const outputIndex = toNumber(rawEvent.output_index)

const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)

const signature =
typeof item.encrypted_content === "string" ? item.encrypted_content : ""

if (signature) {
events.push({
type: "content_block_delta",
index: blockIndex,
delta: {
type: "signature_delta",
signature,
},
})
state.blockHasDelta.add(blockIndex)
}

closeBlockIfOpen(state, blockIndex, events)

return events
}

const handleFunctionCallArgumentsDelta = (
rawEvent: Record<string, unknown>,
state: ResponsesStreamState,
Expand Down Expand Up @@ -257,6 +308,60 @@ const handleOutputTextDelta = (
return events
}

const handleReasoningSummaryTextDelta = (
rawEvent: Record<string, unknown>,
state: ResponsesStreamState,
): Array<AnthropicStreamEventData> => {
const events = ensureMessageStart(state)

const outputIndex = toNumber(rawEvent.output_index)
const deltaText = typeof rawEvent.delta === "string" ? rawEvent.delta : ""

if (!deltaText) {
return events
}

const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)

events.push({
type: "content_block_delta",
index: blockIndex,
delta: {
type: "thinking_delta",
thinking: deltaText,
},
})
state.blockHasDelta.add(blockIndex)

return events
}

const handleReasoningSummaryPartDone = (
rawEvent: Record<string, unknown>,
state: ResponsesStreamState,
): Array<AnthropicStreamEventData> => {
const events = ensureMessageStart(state)

const outputIndex = toNumber(rawEvent.output_index)
const part = isRecord(rawEvent.part) ? rawEvent.part : undefined
const text = part && typeof part.text === "string" ? part.text : ""

const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events)

if (text && !state.blockHasDelta.has(blockIndex)) {
events.push({
type: "content_block_delta",
index: blockIndex,
delta: {
type: "thinking_delta",
thinking: text,
},
})
}

return events
}

const handleOutputTextDone = (
rawEvent: Record<string, unknown>,
state: ResponsesStreamState,
Expand Down Expand Up @@ -372,11 +477,10 @@ const ensureMessageStart = (
const id = response?.id ?? state.currentResponseId ?? "response"
const model = response?.model ?? state.currentModel ?? ""

const inputTokens =
response?.usage?.input_tokens ?? state.initialInputTokens ?? 0

state.messageStartSent = true

const inputTokens =
(state.initialInputTokens ?? 0) - (state.initialInputCachedTokens ?? 0)
Comment on lines +482 to +483

Copilot AI Oct 1, 2025

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Streaming usage metadata uses cache_creation_input_tokens whereas non-streaming usage uses cache_read_input_tokens, creating an inconsistent external contract. Standardize the cache metric naming or emit both (with clear semantics) to avoid client divergence.

Copilot uses AI. Check for mistakes.
return [
{
type: "message_start",
Expand All @@ -391,6 +495,9 @@ const ensureMessageStart = (
usage: {
input_tokens: inputTokens,
output_tokens: 0,
...(state.initialInputCachedTokens !== undefined && {
cache_creation_input_tokens: state.initialInputCachedTokens,

Copilot AI Oct 1, 2025

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Streaming usage metadata uses cache_creation_input_tokens whereas non-streaming usage uses cache_read_input_tokens, creating an inconsistent external contract. Standardize the cache metric naming or emit both (with clear semantics) to avoid client divergence.

Suggested change
cache_creation_input_tokens: state.initialInputCachedTokens,
cache_creation_input_tokens: state.initialInputCachedTokens,
cache_read_input_tokens: state.initialInputCachedTokens,

Copilot uses AI. Check for mistakes.
}),
},
},
},
Expand Down Expand Up @@ -430,6 +537,36 @@ const openTextBlockIfNeeded = (
return blockIndex
}

const openThinkingBlockIfNeeded = (
state: ResponsesStreamState,
outputIndex: number,
events: Array<AnthropicStreamEventData>,
): number => {
const contentIndex = 0
const key = getBlockKey(outputIndex, contentIndex)
let blockIndex = state.blockIndexByKey.get(key)

if (blockIndex === undefined) {
blockIndex = state.nextContentBlockIndex
state.nextContentBlockIndex += 1
state.blockIndexByKey.set(key, blockIndex)
}

if (!state.openBlocks.has(blockIndex)) {
events.push({
type: "content_block_start",
index: blockIndex,
content_block: {
type: "thinking",
thinking: "",
},
})
state.openBlocks.add(blockIndex)
}

return blockIndex
}
Comment on lines +540 to +568

Copilot AI Oct 1, 2025

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] Logic duplicates patterns from openTextBlockIfNeeded with only content type and fixed contentIndex differences; consider refactoring to a generic helper (e.g., openBlockIfNeeded(kind, outputIndex, contentIndex?)) to reduce code duplication and future divergence risk.

Copilot uses AI. Check for mistakes.

const closeBlockIfOpen = (
state: ResponsesStreamState,
blockIndex: number,
Expand Down Expand Up @@ -463,6 +600,8 @@ const cacheResponseMetadata = (
state.currentResponseId = response.id
state.currentModel = response.model
state.initialInputTokens = response.usage?.input_tokens ?? 0
state.initialInputCachedTokens =
response.usage?.input_tokens_details?.cached_tokens
}

const buildErrorEvent = (message: string): AnthropicStreamEventData => ({
Expand Down
69 changes: 35 additions & 34 deletions src/routes/messages/responses-translation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import {
type ResponseInputImage,
type ResponseInputItem,
type ResponseInputMessage,
type ResponseInputReasoning,
type ResponseInputText,
type ResponsesResult,
type ResponseOutputContentBlock,
Expand All @@ -27,6 +28,7 @@ import {
type AnthropicMessage,
type AnthropicMessagesPayload,
type AnthropicTextBlock,
type AnthropicThinkingBlock,
type AnthropicTool,
type AnthropicToolResultBlock,
type AnthropicToolUseBlock,
Expand Down Expand Up @@ -137,6 +139,12 @@ const translateAssistantMessage = (
continue
}

if (block.type === "thinking") {
flushPendingContent("assistant", pendingContent, items)
items.push(createReasoningContent(block))
continue
}

const converted = translateAssistantContentBlock(block)
if (converted) {
pendingContent.push(converted)
Expand All @@ -158,9 +166,6 @@ const translateUserContentBlock = (
case "image": {
return createImageContent(block)
}
case "tool_result": {
return undefined
}
default: {
return undefined
}
Expand All @@ -174,12 +179,6 @@ const translateAssistantContentBlock = (
case "text": {
return createOutPutTextContent(block.text)
}
case "thinking": {
return createOutPutTextContent(block.thinking)
}
case "tool_use": {
return undefined
}
default: {
return undefined
}
Expand Down Expand Up @@ -230,6 +229,19 @@ const createImageContent = (
image_url: `data:${block.source.media_type};base64,${block.source.data}`,
})

const createReasoningContent = (
block: AnthropicThinkingBlock,
): ResponseInputReasoning => ({
type: "reasoning",
summary: [
{
type: "summary_text",
text: block.thinking,
},
],
encrypted_content: block.signature,
})

const createFunctionToolCall = (
block: AnthropicToolUseBlock,
): ResponseFunctionToolCallItem => ({
Expand Down Expand Up @@ -376,7 +388,11 @@ const mapOutputToAnthropicContent = (
case "reasoning": {
const thinkingText = extractReasoningText(item)
if (thinkingText.length > 0) {
contentBlocks.push({ type: "thinking", thinking: thinkingText })
contentBlocks.push({
type: "thinking",
thinking: thinkingText,
signature: item.encrypted_content ?? "",
})
}
break
}
Expand Down Expand Up @@ -456,31 +472,11 @@ const extractReasoningText = (item: ResponseOutputReasoning): string => {
segments.push(block.text)
continue
}

if (typeof block.thinking === "string") {
segments.push(block.thinking)
continue
}

const reasoningValue = (block as Record<string, unknown>).reasoning
if (typeof reasoningValue === "string") {
segments.push(reasoningValue)
}
}
}

collectFromBlocks(item.reasoning)
collectFromBlocks(item.summary)

if (typeof item.thinking === "string") {
segments.push(item.thinking)
}

const textValue = (item as Record<string, unknown>).text
if (typeof textValue === "string") {
segments.push(textValue)
}

return segments.join("").trim()
}

Expand Down Expand Up @@ -571,12 +567,17 @@ const mapResponsesStopReason = (
const mapResponsesUsage = (
response: ResponsesResult,
): AnthropicResponse["usage"] => {
const promptTokens = response.usage?.input_tokens ?? 0
const completionTokens = response.usage?.output_tokens ?? 0
const inputTokens = response.usage?.input_tokens ?? 0
const outputTokens = response.usage?.output_tokens ?? 0
const inputCachedTokens = response.usage?.input_tokens_details?.cached_tokens

return {
input_tokens: promptTokens,
output_tokens: completionTokens,
input_tokens: inputTokens - (inputCachedTokens ?? 0),
output_tokens: outputTokens,
...(response.usage?.input_tokens_details?.cached_tokens !== undefined && {
cache_read_input_tokens:

Copilot AI Oct 1, 2025

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Usage field uses cache_read_input_tokens here, while the streaming path emits cache_creation_input_tokens (see ensureMessageStart). Divergent field names for analogous cache token metrics can confuse clients; align on a single naming convention or document the semantic distinction if both are required.

Suggested change
cache_read_input_tokens:
cache_creation_input_tokens:

Copilot uses AI. Check for mistakes.
response.usage.input_tokens_details.cached_tokens,
}),
}
}

Expand Down
Loading