diff --git a/README.md b/README.md index e5b390991..e0aa9d3e2 100644 --- a/README.md +++ b/README.md @@ -184,11 +184,12 @@ The server exposes several endpoints to interact with the Copilot API. It provid These endpoints mimic the OpenAI API structure. -| Endpoint | Method | Description | -| --------------------------- | ------ | --------------------------------------------------------- | -| `POST /v1/chat/completions` | `POST` | Creates a model response for the given chat conversation. | -| `GET /v1/models` | `GET` | Lists the currently available models. | -| `POST /v1/embeddings` | `POST` | Creates an embedding vector representing the input text. | +| Endpoint | Method | Description | +| --------------------------- | ------ | ---------------------------------------------------------------- | +| `POST /v1/responses` | `POST` | Most advanced interface for generating model responses. | +| `POST /v1/chat/completions` | `POST` | Creates a model response for the given chat conversation. | +| `GET /v1/models` | `GET` | Lists the currently available models. | +| `POST /v1/embeddings` | `POST` | Creates an embedding vector representing the input text. | ### Anthropic Compatible Endpoints diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts index 881fffcc8..f07485bf0 100644 --- a/src/routes/messages/anthropic-types.ts +++ b/src/routes/messages/anthropic-types.ts @@ -56,6 +56,7 @@ export interface AnthropicToolUseBlock { export interface AnthropicThinkingBlock { type: "thinking" thinking: string + signature: string } export type AnthropicUserContentBlock = diff --git a/src/routes/messages/responses-stream-translation.ts b/src/routes/messages/responses-stream-translation.ts index 06feab1a4..db09bf144 100644 --- a/src/routes/messages/responses-stream-translation.ts +++ b/src/routes/messages/responses-stream-translation.ts @@ -13,6 +13,7 @@ export interface ResponsesStreamState { currentResponseId?: string currentModel?: string initialInputTokens?: number + initialInputCachedTokens?: number functionCallStateByOutputIndex: Map functionCallOutputIndexByItemId: Map } @@ -49,12 +50,18 @@ export const translateResponsesStreamEvent = ( return handleResponseCreated(rawEvent, state) } - case "response.reasoning_summary_text.delta": + case "response.reasoning_summary_text.delta": { + return handleReasoningSummaryTextDelta(rawEvent, state) + } + case "response.output_text.delta": { return handleOutputTextDelta(rawEvent, state) } - case "response.reasoning_summary_part.done": + case "response.reasoning_summary_part.done": { + return handleReasoningSummaryPartDone(rawEvent, state) + } + case "response.output_text.done": { return handleOutputTextDone(rawEvent, state) } @@ -63,6 +70,10 @@ export const translateResponsesStreamEvent = ( return handleOutputItemAdded(rawEvent, state) } + case "response.output_item.done": { + return handleOutputItemDone(rawEvent, state) + } + case "response.function_call_arguments.delta": { return handleFunctionCallArgumentsDelta(rawEvent, state) } @@ -143,6 +154,46 @@ const handleOutputItemAdded = ( return events } +const handleOutputItemDone = ( + rawEvent: Record, + state: ResponsesStreamState, +): Array => { + const events = ensureMessageStart(state) + + const item = isRecord(rawEvent.item) ? rawEvent.item : undefined + if (!item) { + return events + } + + const itemType = typeof item.type === "string" ? item.type : undefined + if (itemType !== "reasoning") { + return events + } + + const outputIndex = toNumber(rawEvent.output_index) + + const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events) + + const signature = + typeof item.encrypted_content === "string" ? item.encrypted_content : "" + + if (signature) { + events.push({ + type: "content_block_delta", + index: blockIndex, + delta: { + type: "signature_delta", + signature, + }, + }) + state.blockHasDelta.add(blockIndex) + } + + closeBlockIfOpen(state, blockIndex, events) + + return events +} + const handleFunctionCallArgumentsDelta = ( rawEvent: Record, state: ResponsesStreamState, @@ -257,6 +308,60 @@ const handleOutputTextDelta = ( return events } +const handleReasoningSummaryTextDelta = ( + rawEvent: Record, + state: ResponsesStreamState, +): Array => { + const events = ensureMessageStart(state) + + const outputIndex = toNumber(rawEvent.output_index) + const deltaText = typeof rawEvent.delta === "string" ? rawEvent.delta : "" + + if (!deltaText) { + return events + } + + const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events) + + events.push({ + type: "content_block_delta", + index: blockIndex, + delta: { + type: "thinking_delta", + thinking: deltaText, + }, + }) + state.blockHasDelta.add(blockIndex) + + return events +} + +const handleReasoningSummaryPartDone = ( + rawEvent: Record, + state: ResponsesStreamState, +): Array => { + const events = ensureMessageStart(state) + + const outputIndex = toNumber(rawEvent.output_index) + const part = isRecord(rawEvent.part) ? rawEvent.part : undefined + const text = part && typeof part.text === "string" ? part.text : "" + + const blockIndex = openThinkingBlockIfNeeded(state, outputIndex, events) + + if (text && !state.blockHasDelta.has(blockIndex)) { + events.push({ + type: "content_block_delta", + index: blockIndex, + delta: { + type: "thinking_delta", + thinking: text, + }, + }) + } + + return events +} + const handleOutputTextDone = ( rawEvent: Record, state: ResponsesStreamState, @@ -372,11 +477,10 @@ const ensureMessageStart = ( const id = response?.id ?? state.currentResponseId ?? "response" const model = response?.model ?? state.currentModel ?? "" - const inputTokens = - response?.usage?.input_tokens ?? state.initialInputTokens ?? 0 - state.messageStartSent = true + const inputTokens = + (state.initialInputTokens ?? 0) - (state.initialInputCachedTokens ?? 0) return [ { type: "message_start", @@ -391,6 +495,9 @@ const ensureMessageStart = ( usage: { input_tokens: inputTokens, output_tokens: 0, + ...(state.initialInputCachedTokens !== undefined && { + cache_creation_input_tokens: state.initialInputCachedTokens, + }), }, }, }, @@ -430,6 +537,36 @@ const openTextBlockIfNeeded = ( return blockIndex } +const openThinkingBlockIfNeeded = ( + state: ResponsesStreamState, + outputIndex: number, + events: Array, +): number => { + const contentIndex = 0 + const key = getBlockKey(outputIndex, contentIndex) + let blockIndex = state.blockIndexByKey.get(key) + + if (blockIndex === undefined) { + blockIndex = state.nextContentBlockIndex + state.nextContentBlockIndex += 1 + state.blockIndexByKey.set(key, blockIndex) + } + + if (!state.openBlocks.has(blockIndex)) { + events.push({ + type: "content_block_start", + index: blockIndex, + content_block: { + type: "thinking", + thinking: "", + }, + }) + state.openBlocks.add(blockIndex) + } + + return blockIndex +} + const closeBlockIfOpen = ( state: ResponsesStreamState, blockIndex: number, @@ -463,6 +600,8 @@ const cacheResponseMetadata = ( state.currentResponseId = response.id state.currentModel = response.model state.initialInputTokens = response.usage?.input_tokens ?? 0 + state.initialInputCachedTokens = + response.usage?.input_tokens_details?.cached_tokens } const buildErrorEvent = (message: string): AnthropicStreamEventData => ({ diff --git a/src/routes/messages/responses-translation.ts b/src/routes/messages/responses-translation.ts index 41c262994..00f481291 100644 --- a/src/routes/messages/responses-translation.ts +++ b/src/routes/messages/responses-translation.ts @@ -6,6 +6,7 @@ import { type ResponseInputImage, type ResponseInputItem, type ResponseInputMessage, + type ResponseInputReasoning, type ResponseInputText, type ResponsesResult, type ResponseOutputContentBlock, @@ -27,6 +28,7 @@ import { type AnthropicMessage, type AnthropicMessagesPayload, type AnthropicTextBlock, + type AnthropicThinkingBlock, type AnthropicTool, type AnthropicToolResultBlock, type AnthropicToolUseBlock, @@ -137,6 +139,12 @@ const translateAssistantMessage = ( continue } + if (block.type === "thinking") { + flushPendingContent("assistant", pendingContent, items) + items.push(createReasoningContent(block)) + continue + } + const converted = translateAssistantContentBlock(block) if (converted) { pendingContent.push(converted) @@ -158,9 +166,6 @@ const translateUserContentBlock = ( case "image": { return createImageContent(block) } - case "tool_result": { - return undefined - } default: { return undefined } @@ -174,12 +179,6 @@ const translateAssistantContentBlock = ( case "text": { return createOutPutTextContent(block.text) } - case "thinking": { - return createOutPutTextContent(block.thinking) - } - case "tool_use": { - return undefined - } default: { return undefined } @@ -230,6 +229,19 @@ const createImageContent = ( image_url: `data:${block.source.media_type};base64,${block.source.data}`, }) +const createReasoningContent = ( + block: AnthropicThinkingBlock, +): ResponseInputReasoning => ({ + type: "reasoning", + summary: [ + { + type: "summary_text", + text: block.thinking, + }, + ], + encrypted_content: block.signature, +}) + const createFunctionToolCall = ( block: AnthropicToolUseBlock, ): ResponseFunctionToolCallItem => ({ @@ -376,7 +388,11 @@ const mapOutputToAnthropicContent = ( case "reasoning": { const thinkingText = extractReasoningText(item) if (thinkingText.length > 0) { - contentBlocks.push({ type: "thinking", thinking: thinkingText }) + contentBlocks.push({ + type: "thinking", + thinking: thinkingText, + signature: item.encrypted_content ?? "", + }) } break } @@ -456,31 +472,11 @@ const extractReasoningText = (item: ResponseOutputReasoning): string => { segments.push(block.text) continue } - - if (typeof block.thinking === "string") { - segments.push(block.thinking) - continue - } - - const reasoningValue = (block as Record).reasoning - if (typeof reasoningValue === "string") { - segments.push(reasoningValue) - } } } - collectFromBlocks(item.reasoning) collectFromBlocks(item.summary) - if (typeof item.thinking === "string") { - segments.push(item.thinking) - } - - const textValue = (item as Record).text - if (typeof textValue === "string") { - segments.push(textValue) - } - return segments.join("").trim() } @@ -571,12 +567,17 @@ const mapResponsesStopReason = ( const mapResponsesUsage = ( response: ResponsesResult, ): AnthropicResponse["usage"] => { - const promptTokens = response.usage?.input_tokens ?? 0 - const completionTokens = response.usage?.output_tokens ?? 0 + const inputTokens = response.usage?.input_tokens ?? 0 + const outputTokens = response.usage?.output_tokens ?? 0 + const inputCachedTokens = response.usage?.input_tokens_details?.cached_tokens return { - input_tokens: promptTokens, - output_tokens: completionTokens, + input_tokens: inputTokens - (inputCachedTokens ?? 0), + output_tokens: outputTokens, + ...(response.usage?.input_tokens_details?.cached_tokens !== undefined && { + cache_read_input_tokens: + response.usage.input_tokens_details.cached_tokens, + }), } } diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts index ef7b38b93..d06d02d67 100644 --- a/src/routes/responses/handler.ts +++ b/src/routes/responses/handler.ts @@ -52,13 +52,29 @@ export const handleResponses = async (c: Context) => { if (isStreamingRequested(payload) && isAsyncIterable(response)) { consola.debug("Forwarding native Responses stream") return streamSSE(c, async (stream) => { - for await (const chunk of response) { - consola.debug("Responses stream chunk:", JSON.stringify(chunk)) - await stream.writeSSE({ - id: (chunk as { id?: string }).id, - event: (chunk as { event?: string }).event, - data: (chunk as { data?: string }).data ?? "", - }) + const pingInterval = setInterval(async () => { + try { + await stream.writeSSE({ + event: "ping", + data: JSON.stringify({ timestamp: Date.now() }), + }) + } catch (error) { + consola.warn("Failed to send ping:", error) + clearInterval(pingInterval) + } + }, 3000) + + try { + for await (const chunk of response) { + consola.debug("Responses stream chunk:", JSON.stringify(chunk)) + await stream.writeSSE({ + id: (chunk as { id?: string }).id, + event: (chunk as { event?: string }).event, + data: (chunk as { data?: string }).data ?? "", + }) + } + } finally { + clearInterval(pingInterval) } }) } diff --git a/src/routes/responses/utils.ts b/src/routes/responses/utils.ts index 5dea1daae..734319cd7 100644 --- a/src/routes/responses/utils.ts +++ b/src/routes/responses/utils.ts @@ -31,16 +31,12 @@ const getPayloadItems = ( ): Array => { const result: Array = [] - const { input, instructions } = payload + const { input } = payload if (Array.isArray(input)) { result.push(...input) } - if (Array.isArray(instructions)) { - result.push(...instructions) - } - return result } diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts index b13349e4d..8322cacee 100644 --- a/src/services/copilot/create-responses.ts +++ b/src/services/copilot/create-responses.ts @@ -7,13 +7,13 @@ import { state } from "~/lib/state" export interface ResponsesPayload { model: string + instructions?: string | null input?: string | Array - instructions?: string | Array | null + tools?: Array> | null + tool_choice?: unknown temperature?: number | null top_p?: number | null max_output_tokens?: number | null - tools?: Array> | null - tool_choice?: unknown metadata?: Record | null stream?: boolean | null response_format?: Record | null @@ -48,10 +48,20 @@ export interface ResponseFunctionCallOutputItem { status?: "in_progress" | "completed" | "incomplete" } +export interface ResponseInputReasoning { + type: "reasoning" + summary: Array<{ + type: "summary_text" + text: string + }> + encrypted_content: string +} + export type ResponseInputItem = | ResponseInputMessage | ResponseFunctionToolCallItem | ResponseFunctionCallOutputItem + | ResponseInputReasoning | Record export type ResponseInputContent = @@ -107,17 +117,15 @@ export interface ResponseOutputMessage { export interface ResponseOutputReasoning { id: string type: "reasoning" - reasoning?: Array summary?: Array - thinking?: string + encrypted_content?: string + status: "completed" | "in_progress" | "incomplete" [key: string]: unknown } export interface ResponseReasoningBlock { type: string text?: string - thinking?: string - [key: string]: unknown } export interface ResponseOutputFunctionCall { diff --git a/tests/anthropic-request.test.ts b/tests/anthropic-request.test.ts index a4a5b06b5..c86bcac13 100644 --- a/tests/anthropic-request.test.ts +++ b/tests/anthropic-request.test.ts @@ -136,6 +136,7 @@ describe("Anthropic to OpenAI translation logic", () => { { type: "thinking", thinking: "Let me think about this simple math problem...", + signature: "abc123", }, { type: "text", text: "2+2 equals 4." }, ], @@ -168,6 +169,7 @@ describe("Anthropic to OpenAI translation logic", () => { type: "thinking", thinking: "I need to call the weather API to get current weather information.", + signature: "def456", }, { type: "text", text: "I'll check the weather for you." }, { diff --git a/tests/translation.test.ts b/tests/translation.test.ts index 84856b932..0c3ececb2 100644 --- a/tests/translation.test.ts +++ b/tests/translation.test.ts @@ -81,7 +81,9 @@ describe("translateResponsesResultToAnthropic", () => { { id: "reason_1", type: "reasoning", - reasoning: [{ type: "text", text: "Thinking about the task." }], + summary: [{ type: "text", text: "Thinking about the task." }], + status: "completed", + encrypted_content: "encrypted_reasoning_content", }, { id: "call_1",