Skip to content

Commit 71219c1

Browse files
committed
fix(chat): return reasoning content on /v1/chat/completions
Upstream Codex SSE only emits response.reasoning_summary_text.delta when reasoning.summary is set, but TranslateRequest only sent reasoning.effort -- so thinking text never reached the client. The non-stream path also dropped reasoning entirely (compactMessage had no field, handler accumulator skipped the reasoning events). - Translator now sets reasoning.summary = "auto" so upstream produces the summary stream. - streamDelta and compactMessage gain both reasoning (OpenAI o1 style, what Cherry Studio reads by default) and reasoning_content (DeepSeek style); we emit both for cross-client compatibility. - Non-stream path collects reasoning_summary_text.delta / reasoning_text.delta into fullReasoning and threads it through BuildCompactResponse. - TranslateCompactResponse extracts reasoning items' summary/content text from response.output for the /v1/responses-fed compact path. Closes #156
1 parent aad9e92 commit 71219c1

2 files changed

Lines changed: 74 additions & 19 deletions

File tree

proxy/handler.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2367,6 +2367,7 @@ func (h *Handler) ChatCompletions(c *gin.Context) {
23672367
}
23682368
} else {
23692369
var fullContent strings.Builder
2370+
var fullReasoning strings.Builder
23702371
var toolCalls []ToolCallResult
23712372

23722373
readErr = ReadSSEStream(resp.Body, func(data []byte) bool {
@@ -2381,6 +2382,8 @@ func (h *Handler) ChatCompletions(c *gin.Context) {
23812382
delta := parsed.Get("delta").String()
23822383
deltaCharCount += len(delta)
23832384
fullContent.WriteString(delta)
2385+
case "response.reasoning_summary_text.delta", "response.reasoning_text.delta":
2386+
fullReasoning.WriteString(parsed.Get("delta").String())
23842387
case "response.function_call_arguments.delta":
23852388
deltaCharCount += len(parsed.Get("delta").String())
23862389
case "response.completed":
@@ -2400,7 +2403,7 @@ func (h *Handler) ChatCompletions(c *gin.Context) {
24002403
return true
24012404
})
24022405

2403-
compactResult = BuildCompactResponse(chunkID, model, created, fullContent.String(), toolCalls, usage)
2406+
compactResult = BuildCompactResponse(chunkID, model, created, fullContent.String(), fullReasoning.String(), toolCalls, usage)
24042407
}
24052408

24062409
// 断流检测 + token 估算
@@ -2540,6 +2543,7 @@ func (h *Handler) handleStreamResponse(c *gin.Context, body io.Reader, model, ch
25402543
// handleCompactResponse 处理非流式响应
25412544
func (h *Handler) handleCompactResponse(c *gin.Context, body io.Reader, model, chunkID string, created int64) {
25422545
var fullContent strings.Builder
2546+
var fullReasoning strings.Builder
25432547
var usage *UsageInfo
25442548

25452549
_ = ReadSSEStream(body, func(data []byte) bool {
@@ -2548,6 +2552,8 @@ func (h *Handler) handleCompactResponse(c *gin.Context, body io.Reader, model, c
25482552
case "response.output_text.delta":
25492553
delta := gjson.GetBytes(data, "delta").String()
25502554
fullContent.WriteString(delta)
2555+
case "response.reasoning_summary_text.delta", "response.reasoning_text.delta":
2556+
fullReasoning.WriteString(gjson.GetBytes(data, "delta").String())
25512557
case "response.completed":
25522558
usage = extractUsage(data)
25532559
return false
@@ -2557,7 +2563,7 @@ func (h *Handler) handleCompactResponse(c *gin.Context, body io.Reader, model, c
25572563
return true
25582564
})
25592565

2560-
result := BuildCompactResponse(chunkID, model, created, fullContent.String(), nil, usage)
2566+
result := BuildCompactResponse(chunkID, model, created, fullContent.String(), fullReasoning.String(), nil, usage)
25612567

25622568
c.Data(http.StatusOK, "application/json", result)
25632569
}

proxy/translator.go

Lines changed: 66 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,15 @@ type streamChoice struct {
8787
FinishReason *string `json:"finish_reason"`
8888
}
8989

90-
// streamDelta 流式块中的增量内容
90+
// streamDelta 流式块中的增量内容。
91+
//
92+
// reasoning 字段同时输出两种命名,兼容不同客户端:
93+
// - reasoning: OpenAI 官方 o1/GPT-5 风格(Cherry Studio 等默认走这个)
94+
// - reasoning_content: DeepSeek / OpenRouter / new-api 等克隆站点风格
9195
type streamDelta struct {
9296
Role string `json:"role,omitempty"`
9397
Content *string `json:"content,omitempty"`
98+
Reasoning *string `json:"reasoning,omitempty"`
9499
ReasoningContent *string `json:"reasoning_content,omitempty"`
95100
ToolCalls []toolCallDelta `json:"tool_calls,omitempty"`
96101
}
@@ -126,11 +131,13 @@ type compactChoice struct {
126131
FinishReason string `json:"finish_reason"`
127132
}
128133

129-
// compactMessage 非流式响应中的消息
134+
// compactMessage 非流式响应中的消息。reasoning / reasoning_content 同时输出兼容多端。
130135
type compactMessage struct {
131-
Role string `json:"role"`
132-
Content *string `json:"content"`
133-
ToolCalls []compactToolCallOut `json:"tool_calls,omitempty"`
136+
Role string `json:"role"`
137+
Content *string `json:"content"`
138+
Reasoning *string `json:"reasoning,omitempty"`
139+
ReasoningContent *string `json:"reasoning_content,omitempty"`
140+
ToolCalls []compactToolCallOut `json:"tool_calls,omitempty"`
134141
}
135142

136143
// compactToolCallOut 非流式响应中的工具调用
@@ -1147,9 +1154,16 @@ func TranslateRequest(rawJSON []byte) ([]byte, error) {
11471154
normalizeResponsesInputMessageContent(out)
11481155
normalizeResponsesInputItemIDs(out)
11491156

1150-
// 2. reasoning effort
1157+
// 2. reasoning effort + summary
1158+
// 显式向 Codex 请求 summary,否则上游不会发 response.reasoning_summary_text.delta,
1159+
// chat/completions 客户端就拿不到思考内容(issue #156)。
11511160
if effort := normalizeReasoningEffort(req.ReasoningEffort); effort != "" {
1152-
out["reasoning"] = map[string]any{"effort": effort}
1161+
out["reasoning"] = map[string]any{
1162+
"effort": effort,
1163+
"summary": "auto",
1164+
}
1165+
} else {
1166+
out["reasoning"] = map[string]any{"summary": "auto"}
11531167
}
11541168

11551169
// 3. service tier(兼容客户端字段;只有 fast/priority 会显式传给 Codex 上游)
@@ -2235,13 +2249,17 @@ func newContentChunk(id, model string, created int64, content string) []byte {
22352249
return b
22362250
}
22372251

2238-
// newReasoningChunk 构建推理内容流式块
2252+
// newReasoningChunk 构建推理内容流式块。
2253+
// 同时填入 reasoning 与 reasoning_content,兼容 OpenAI/DeepSeek 两套客户端风格。
22392254
func newReasoningChunk(id, model string, created int64, reasoning string) []byte {
22402255
chunk := openAIStreamChunk{
22412256
ID: id, Object: "chat.completion.chunk", Created: created, Model: model,
22422257
Choices: []streamChoice{{
22432258
Index: 0,
2244-
Delta: &streamDelta{ReasoningContent: &reasoning},
2259+
Delta: &streamDelta{
2260+
Reasoning: &reasoning,
2261+
ReasoningContent: &reasoning,
2262+
},
22452263
}},
22462264
}
22472265
b, _ := json.Marshal(chunk)
@@ -2458,11 +2476,12 @@ func (st *StreamTranslator) Translate(eventData []byte) ([]byte, bool) {
24582476

24592477
// TranslateCompactResponse 将 Codex 非流式响应转换为 OpenAI 格式
24602478
func TranslateCompactResponse(responseData []byte, model string, id string) []byte {
2461-
var outputText string
2479+
var outputText, reasoningText string
24622480
output := gjson.GetBytes(responseData, "output")
24632481
if output.IsArray() {
24642482
output.ForEach(func(_, item gjson.Result) bool {
2465-
if item.Get("type").String() == "message" {
2483+
switch item.Get("type").String() {
2484+
case "message":
24662485
content := item.Get("content")
24672486
if content.IsArray() {
24682487
content.ForEach(func(_, part gjson.Result) bool {
@@ -2472,23 +2491,47 @@ func TranslateCompactResponse(responseData []byte, model string, id string) []by
24722491
return true
24732492
})
24742493
}
2494+
case "reasoning":
2495+
// Codex 在 response.output 里把思考过程作为 reasoning item,
2496+
// content/summary 数组下每个元素是 {type, text} 形式。
2497+
summary := item.Get("summary")
2498+
if summary.IsArray() {
2499+
summary.ForEach(func(_, part gjson.Result) bool {
2500+
reasoningText += part.Get("text").String()
2501+
return true
2502+
})
2503+
}
2504+
content := item.Get("content")
2505+
if content.IsArray() {
2506+
content.ForEach(func(_, part gjson.Result) bool {
2507+
reasoningText += part.Get("text").String()
2508+
return true
2509+
})
2510+
}
24752511
}
24762512
return true
24772513
})
24782514
}
24792515

24802516
usage := extractUsage(responseData)
24812517

2518+
msg := compactMessage{
2519+
Role: "assistant",
2520+
Content: &outputText,
2521+
}
2522+
if reasoningText != "" {
2523+
r := reasoningText
2524+
msg.Reasoning = &r
2525+
msg.ReasoningContent = &r
2526+
}
2527+
24822528
resp := openAICompactResponse{
24832529
ID: id,
24842530
Object: "chat.completion",
24852531
Model: model,
24862532
Choices: []compactChoice{{
2487-
Index: 0,
2488-
Message: compactMessage{
2489-
Role: "assistant",
2490-
Content: &outputText,
2491-
},
2533+
Index: 0,
2534+
Message: msg,
24922535
FinishReason: "stop",
24932536
}},
24942537
Usage: usage,
@@ -2499,12 +2542,18 @@ func TranslateCompactResponse(responseData []byte, model string, id string) []by
24992542

25002543
// BuildCompactResponse 构建非流式完整响应(供 handler.go 调用,替代内联 sjson)
25012544
// 当有 toolCalls 且 content 为空时,content 输出为 JSON null
2502-
func BuildCompactResponse(id, model string, created int64, content string, toolCalls []ToolCallResult, usage *UsageInfo) []byte {
2545+
// reasoning 为思考过程拼接文本,空字符串时 reasoning / reasoning_content 字段被省略。
2546+
func BuildCompactResponse(id, model string, created int64, content, reasoning string, toolCalls []ToolCallResult, usage *UsageInfo) []byte {
25032547
finishReason := "stop"
25042548
msg := compactMessage{
25052549
Role: "assistant",
25062550
Content: &content,
25072551
}
2552+
if reasoning != "" {
2553+
r := reasoning
2554+
msg.Reasoning = &r
2555+
msg.ReasoningContent = &r
2556+
}
25082557

25092558
if len(toolCalls) > 0 {
25102559
finishReason = "tool_calls"

0 commit comments

Comments
 (0)