Fix Claude thinking conversion and SSE usage

root · root · commit 89a0d13e59f8 · 2026-04-29T20:44:46.000+08:00
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,35 @@
+name: tests
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+  workflow_dispatch:
+
+env:
+  LIBCLANG_PATH: /usr/lib/llvm-18/lib
+
+jobs:
+  cargo-tests:
+    runs-on: ubuntu-24.04
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+
+      - name: Install system build dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y --no-install-recommends clang libclang-dev
+
+      - name: Install Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          toolchain: 1.89.0
+
+      - name: Cache cargo registry
+        uses: Swatinem/rust-cache@v2
+
+      - name: Run integration tests
+        run: cargo test --tests
diff --git a/src/format.rs b/src/format.rs
@@ -53,6 +53,7 @@ struct InternalRequest {
     stream: bool,
     tools: Vec<InternalTool>,
     tool_choice: Option<Value>,
+    thinking: Option<Value>,
     extra: Map<String, Value>,
 }
 
@@ -480,6 +481,7 @@ fn parse_openai_chat(body: &Map<String, Value>) -> Result<InternalRequest> {
         stream: body.get("stream").and_then(Value::as_bool).unwrap_or(false),
         tools,
         tool_choice: body.get("tool_choice").cloned(),
+        thinking: None,
         extra: filter_keys(body, &["messages", "model", "stream", "tools", "tool_choice"]),
     })
 }
@@ -615,7 +617,8 @@ fn parse_claude_chat(body: &Map<String, Value>) -> Result<InternalRequest> {
         stream: body.get("stream").and_then(Value::as_bool).unwrap_or(false),
         tools: parse_claude_tools(body.get("tools")),
         tool_choice: body.get("tool_choice").cloned(),
-        extra: filter_keys(body, &["system", "messages", "model", "stream", "tools", "tool_choice"]),
+        thinking: body.get("thinking").cloned(),
+        extra: filter_keys(body, &["system", "messages", "model", "stream", "tools", "tool_choice", "thinking"]),
     })
 }
 
@@ -683,7 +686,8 @@ fn parse_claude_code(body: &Map<String, Value>) -> Result<InternalRequest> {
         stream: false,
         tools: Vec::new(),
         tool_choice: options.get("tool_choice").cloned(),
-        extra: filter_keys(&options, &["model", "systemPrompt", "mcpServers", "tool_choice"]),
+        thinking: options.get("thinking").cloned(),
+        extra: filter_keys(&options, &["model", "systemPrompt", "mcpServers", "tool_choice", "thinking"]),
     })
 }
 
@@ -762,6 +766,7 @@ fn parse_openai_responses(body: &Map<String, Value>) -> Result<InternalRequest>
         stream: body.get("stream").and_then(Value::as_bool).unwrap_or(false),
         tools: parse_responses_tools(body.get("tools")),
         tool_choice: body.get("tool_choice").cloned(),
+        thinking: None,
         extra,
     })
 }
@@ -1049,6 +1054,7 @@ fn parse_gemini_chat(body: &Map<String, Value>, path: &str) -> Result<InternalRe
         stream: path.contains("streamGenerateContent"),
         tools: parse_gemini_tools(body.get("tools")),
         tool_choice: body.get("toolConfig").cloned(),
+        thinking: None,
         extra,
     })
 }
@@ -1086,6 +1092,9 @@ fn emit_openai_chat(req: &InternalRequest) -> Value {
     if let Some(tool_choice) = &req.tool_choice {
         body.insert("tool_choice".to_string(), tool_choice.clone());
     }
+    if let Some(reasoning) = normalize_claude_thinking_for_openai(req.thinking.as_ref()) {
+        body.insert("reasoning".to_string(), reasoning);
+    }
     body.extend(req.extra.clone());
     Value::Object(body)
 }
@@ -1166,6 +1175,9 @@ fn emit_openai_responses(req: &InternalRequest) -> Value {
             normalize_tool_choice_for_openai_responses(tool_choice),
         );
     }
+    if let Some(reasoning) = normalize_claude_thinking_for_openai(req.thinking.as_ref()) {
+        body.insert("reasoning".to_string(), reasoning);
+    }
     Value::Object(body)
 }
 
@@ -1282,6 +1294,7 @@ fn strip_tools(req: InternalRequest) -> InternalRequest {
         stream: req.stream,
         tools: Vec::new(),
         tool_choice: None,
+        thinking: req.thinking,
         extra: req.extra,
     }
 }
@@ -1678,6 +1691,17 @@ fn normalize_tool_choice_for_openai_responses(tool_choice: &Value) -> Value {
     }
 }
 
+fn normalize_claude_thinking_for_openai(thinking: Option<&Value>) -> Option<Value> {
+    let thinking = thinking?.as_object()?;
+    match thinking.get("type").and_then(Value::as_str) {
+        Some("enabled") => {
+            let budget_tokens = thinking.get("budget_tokens").and_then(Value::as_i64)?;
+            Some(json!({"max_tokens": budget_tokens}))
+        }
+        _ => None,
+    }
+}
+
 fn normalize_extra_for_openai_responses(extra: &Map<String, Value>) -> Map<String, Value> {
     if extra.is_empty() {
         return Map::new();
@@ -1934,4 +1958,54 @@ mod tests {
             other => panic!("unexpected error: {other:?}"),
         }
     }
+
+    #[test]
+    fn transforms_claude_thinking_into_openai_chat_reasoning() {
+        let config = json!({
+            "format_transform": {
+                "enabled": true,
+                "from": "claude_chat",
+                "to": "openai_chat"
+            }
+        });
+        let body = json!({
+            "model": "gpt-4.1-mini",
+            "thinking": {
+                "type": "enabled",
+                "budget_tokens": 2048
+            },
+            "messages": [{"role": "user", "content": "Hi"}]
+        });
+
+        let plan = process_request(&config, "/v1/messages", &[], body).expect("request should transform");
+
+        assert_eq!(plan.target_format, Some(RequestFormat::OpenAiChat));
+        assert_eq!(plan.body.get("thinking"), None);
+        assert_eq!(plan.body.get("reasoning"), Some(&json!({"max_tokens": 2048})));
+    }
+
+    #[test]
+    fn transforms_claude_thinking_into_openai_responses_reasoning() {
+        let config = json!({
+            "format_transform": {
+                "enabled": true,
+                "from": "claude_chat",
+                "to": "openai_responses"
+            }
+        });
+        let body = json!({
+            "model": "gpt-4.1-mini",
+            "thinking": {
+                "type": "enabled",
+                "budget_tokens": 1024
+            },
+            "messages": [{"role": "user", "content": "Hi"}]
+        });
+
+        let plan = process_request(&config, "/v1/messages", &[], body).expect("request should transform");
+
+        assert_eq!(plan.target_format, Some(RequestFormat::OpenAiResponses));
+        assert_eq!(plan.body.get("thinking"), None);
+        assert_eq!(plan.body.get("reasoning"), Some(&json!({"max_tokens": 1024})));
+    }
 }
diff --git a/src/proxy.rs b/src/proxy.rs
@@ -108,6 +108,10 @@ async fn proxy_entry_with_cfg(
                 }
             }
         })?;
+    let estimated_prompt_tokens = estimate_prompt_tokens_for_stream(
+        request_plan.target_format.or(request_plan.source_format),
+        &request_plan.body,
+    );
     let basic_mod_cfg = parsed
         .config
         .get("basic_moderation")
@@ -271,6 +275,7 @@ async fn proxy_entry_with_cfg(
         request_plan.target_format,
         delay_stream_header,
         is_stream,
+        estimated_prompt_tokens,
         &moderation_debug,
     )
     .await
@@ -435,6 +440,7 @@ async fn build_proxy_response(
     upstream_format: Option<crate::format::RequestFormat>,
     delay_stream_header: bool,
     request_expects_stream: bool,
+    estimated_prompt_tokens: Option<i64>,
     moderation_debug: &HeaderMap,
 ) -> Result<Response, ApiError> {
     let headers = filtered_response_headers(upstream_response.headers());
@@ -446,6 +452,7 @@ async fn build_proxy_response(
             upstream_format,
             delay_stream_header,
             header_says_stream,
+            estimated_prompt_tokens,
             &headers,
             moderation_debug,
         )
@@ -471,6 +478,7 @@ async fn build_streaming_proxy_response(
     upstream_format: Option<crate::format::RequestFormat>,
     delay_stream_header: bool,
     header_says_stream: bool,
+    estimated_prompt_tokens: Option<i64>,
     headers: &HeaderMap,
     moderation_debug: &HeaderMap,
 ) -> Result<Response, ApiError> {
@@ -568,6 +576,7 @@ async fn build_streaming_proxy_response(
             upstream,
             upstream_format.expect("upstream format for transformed stream"),
             client_format.expect("client format for transformed stream"),
+            estimated_prompt_tokens,
         )
     } else {
         build_passthrough_stream_body(buffered, upstream)
@@ -708,6 +717,7 @@ fn build_transformed_stream_body(
     upstream: ReqByteStream,
     from_format: crate::format::RequestFormat,
     to_format: crate::format::RequestFormat,
+    estimated_prompt_tokens: Option<i64>,
 ) -> BoxBody {
     struct TransformState {
         buffered: VecDeque<Bytes>,
@@ -720,7 +730,7 @@ fn build_transformed_stream_body(
     let state = TransformState {
         buffered: VecDeque::from(buffered),
         upstream,
-        transcoder: StreamTranscoder::new(from_format, to_format),
+        transcoder: StreamTranscoder::new(from_format, to_format, estimated_prompt_tokens),
         ready: VecDeque::new(),
         flushed: false,
     };
@@ -768,6 +778,38 @@ fn build_transformed_stream_body(
     boxed(Body::wrap_stream(stream))
 }
 
+fn estimate_prompt_tokens_for_stream(
+    format: Option<crate::format::RequestFormat>,
+    body: &Value,
+) -> Option<i64> {
+    let format = format?;
+    let request_format = format.as_str();
+    let text = extract::extract_text_for_moderation(body, request_format);
+    estimate_tokens_from_text(&text)
+}
+
+fn estimate_tokens_from_text(text: &str) -> Option<i64> {
+    let trimmed = text.trim();
+    if trimmed.is_empty() {
+        return None;
+    }
+
+    let mut estimate = 0_i64;
+    for ch in trimmed.chars() {
+        if ch.is_ascii_whitespace() {
+            continue;
+        }
+        if ch.is_ascii() {
+            estimate += 1;
+        } else {
+            estimate += 2;
+        }
+    }
+
+    let estimated_tokens = ((estimate + 3) / 4).max(1);
+    Some(estimated_tokens)
+}
+
 async fn collect_stream_bytes(
     buffered: Vec<Bytes>,
     mut upstream: ReqByteStream,
diff --git a/src/streaming.rs b/src/streaming.rs
@@ -16,7 +16,7 @@ pub fn maybe_transform_sse(
         return None;
     }
 
-    let mut transcoder = StreamTranscoder::new(from_format, to_format);
+    let mut transcoder = StreamTranscoder::new(from_format, to_format, None);
     let mut out = transcoder.feed_chunk(raw);
     out.extend(transcoder.flush());
     Some(out)
@@ -71,11 +71,26 @@ pub struct StreamTranscoder {
 }
 
 impl StreamTranscoder {
-    pub fn new(from_format: RequestFormat, to_format: RequestFormat) -> Self {
+    pub fn new(
+        from_format: RequestFormat,
+        to_format: RequestFormat,
+        estimated_prompt_tokens: Option<i64>,
+    ) -> Self {
+        let mut meta = Map::new();
+        if let Some(tokens) = estimated_prompt_tokens.filter(|tokens| *tokens > 0) {
+            meta.insert(
+                "usage".to_string(),
+                json!({
+                    "prompt_tokens": tokens,
+                    "completion_tokens": 0,
+                    "total_tokens": tokens
+                }),
+            );
+        }
         Self {
             from_format,
             sink: create_sink(to_format),
-            meta: Map::new(),
+            meta,
             started: false,
             seen_tool_calls: HashMap::new(),
             pending: Vec::new(),
@@ -491,14 +506,19 @@ impl InternalSink for ClaudeSink {
             return Vec::new();
         }
         self.started = true;
+        let usage = meta
+            .get("usage")
+            .and_then(chat_usage_to_claude_stream_usage)
+            .unwrap_or_else(|| json!({"input_tokens": 0, "output_tokens": 0}));
         vec![encode_json_sse_with_event(
             &json!({
                 "type": "message_start",
                 "message": {
                     "id": self.id,
                     "model": self.model,
                     "role": "assistant",
-                    "content": []
+                    "content": [],
+                    "usage": usage
                 }
             }),
             "message_start",
@@ -559,9 +579,8 @@ impl InternalSink for ClaudeSink {
             _ => "end_turn",
         };
         let usage_obj = usage
-            .and_then(|usage| usage.get("output_tokens").cloned())
-            .map(|output_tokens| json!({"output_tokens": output_tokens}))
-            .unwrap_or_else(|| json!({"output_tokens": 0}));
+            .and_then(chat_usage_to_claude_stream_usage)
+            .unwrap_or_else(|| json!({"input_tokens": 0, "output_tokens": 0}));
         vec![
             encode_json_sse_with_event(
                 &json!({
@@ -769,6 +788,9 @@ fn decode_openai_chat(
         meta.entry("created".to_string())
             .or_insert_with(|| json!(now_timestamp()));
     }
+    if let Some(usage) = event.get("usage").cloned() {
+        meta.insert("usage".to_string(), usage);
+    }
 
     let mut out = vec![InternalEvent::Start { meta: meta.clone() }];
     let delta = choice.get("delta").and_then(Value::as_object);
@@ -1280,6 +1302,17 @@ fn chat_usage_to_responses_usage(usage: &Value) -> Option<Value> {
     }))
 }
 
+fn chat_usage_to_claude_stream_usage(usage: &Value) -> Option<Value> {
+    let usage = usage.as_object()?;
+    let prompt_details = usage.get("prompt_tokens_details").and_then(Value::as_object);
+    Some(json!({
+        "input_tokens": usage.get("prompt_tokens").cloned().unwrap_or_else(|| usage.get("input_tokens").cloned().unwrap_or_else(|| json!(0))),
+        "output_tokens": usage.get("completion_tokens").cloned().unwrap_or_else(|| usage.get("output_tokens").cloned().unwrap_or_else(|| json!(0))),
+        "cache_creation_input_tokens": prompt_details.and_then(|details| details.get("cached_creation_tokens").cloned()).unwrap_or_else(|| json!(0)),
+        "cache_read_input_tokens": prompt_details.and_then(|details| details.get("cached_tokens").cloned()).unwrap_or_else(|| json!(0)),
+    }))
+}
+
 fn push_string_array_value(map: &mut Map<String, Value>, key: &str, value: String) {
     if key.is_empty() {
         return;
diff --git a/tests/http_proxy_stream_tests.rs b/tests/http_proxy_stream_tests.rs