Skip to content

Commit 89a0d13

Browse files
author
root
committed
Fix Claude thinking conversion and SSE usage
1 parent 3e92c12 commit 89a0d13

5 files changed

Lines changed: 299 additions & 13 deletions

File tree

.github/workflows/tests.yml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
name: tests
2+
3+
on:
4+
push:
5+
branches:
6+
- master
7+
pull_request:
8+
workflow_dispatch:
9+
10+
env:
11+
LIBCLANG_PATH: /usr/lib/llvm-18/lib
12+
13+
jobs:
14+
cargo-tests:
15+
runs-on: ubuntu-24.04
16+
17+
steps:
18+
- name: Checkout
19+
uses: actions/checkout@v6
20+
21+
- name: Install system build dependencies
22+
run: |
23+
sudo apt-get update
24+
sudo apt-get install -y --no-install-recommends clang libclang-dev
25+
26+
- name: Install Rust toolchain
27+
uses: dtolnay/rust-toolchain@stable
28+
with:
29+
toolchain: 1.89.0
30+
31+
- name: Cache cargo registry
32+
uses: Swatinem/rust-cache@v2
33+
34+
- name: Run integration tests
35+
run: cargo test --tests

src/format.rs

Lines changed: 76 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ struct InternalRequest {
5353
stream: bool,
5454
tools: Vec<InternalTool>,
5555
tool_choice: Option<Value>,
56+
thinking: Option<Value>,
5657
extra: Map<String, Value>,
5758
}
5859

@@ -480,6 +481,7 @@ fn parse_openai_chat(body: &Map<String, Value>) -> Result<InternalRequest> {
480481
stream: body.get("stream").and_then(Value::as_bool).unwrap_or(false),
481482
tools,
482483
tool_choice: body.get("tool_choice").cloned(),
484+
thinking: None,
483485
extra: filter_keys(body, &["messages", "model", "stream", "tools", "tool_choice"]),
484486
})
485487
}
@@ -615,7 +617,8 @@ fn parse_claude_chat(body: &Map<String, Value>) -> Result<InternalRequest> {
615617
stream: body.get("stream").and_then(Value::as_bool).unwrap_or(false),
616618
tools: parse_claude_tools(body.get("tools")),
617619
tool_choice: body.get("tool_choice").cloned(),
618-
extra: filter_keys(body, &["system", "messages", "model", "stream", "tools", "tool_choice"]),
620+
thinking: body.get("thinking").cloned(),
621+
extra: filter_keys(body, &["system", "messages", "model", "stream", "tools", "tool_choice", "thinking"]),
619622
})
620623
}
621624

@@ -683,7 +686,8 @@ fn parse_claude_code(body: &Map<String, Value>) -> Result<InternalRequest> {
683686
stream: false,
684687
tools: Vec::new(),
685688
tool_choice: options.get("tool_choice").cloned(),
686-
extra: filter_keys(&options, &["model", "systemPrompt", "mcpServers", "tool_choice"]),
689+
thinking: options.get("thinking").cloned(),
690+
extra: filter_keys(&options, &["model", "systemPrompt", "mcpServers", "tool_choice", "thinking"]),
687691
})
688692
}
689693

@@ -762,6 +766,7 @@ fn parse_openai_responses(body: &Map<String, Value>) -> Result<InternalRequest>
762766
stream: body.get("stream").and_then(Value::as_bool).unwrap_or(false),
763767
tools: parse_responses_tools(body.get("tools")),
764768
tool_choice: body.get("tool_choice").cloned(),
769+
thinking: None,
765770
extra,
766771
})
767772
}
@@ -1049,6 +1054,7 @@ fn parse_gemini_chat(body: &Map<String, Value>, path: &str) -> Result<InternalRe
10491054
stream: path.contains("streamGenerateContent"),
10501055
tools: parse_gemini_tools(body.get("tools")),
10511056
tool_choice: body.get("toolConfig").cloned(),
1057+
thinking: None,
10521058
extra,
10531059
})
10541060
}
@@ -1086,6 +1092,9 @@ fn emit_openai_chat(req: &InternalRequest) -> Value {
10861092
if let Some(tool_choice) = &req.tool_choice {
10871093
body.insert("tool_choice".to_string(), tool_choice.clone());
10881094
}
1095+
if let Some(reasoning) = normalize_claude_thinking_for_openai(req.thinking.as_ref()) {
1096+
body.insert("reasoning".to_string(), reasoning);
1097+
}
10891098
body.extend(req.extra.clone());
10901099
Value::Object(body)
10911100
}
@@ -1166,6 +1175,9 @@ fn emit_openai_responses(req: &InternalRequest) -> Value {
11661175
normalize_tool_choice_for_openai_responses(tool_choice),
11671176
);
11681177
}
1178+
if let Some(reasoning) = normalize_claude_thinking_for_openai(req.thinking.as_ref()) {
1179+
body.insert("reasoning".to_string(), reasoning);
1180+
}
11691181
Value::Object(body)
11701182
}
11711183

@@ -1282,6 +1294,7 @@ fn strip_tools(req: InternalRequest) -> InternalRequest {
12821294
stream: req.stream,
12831295
tools: Vec::new(),
12841296
tool_choice: None,
1297+
thinking: req.thinking,
12851298
extra: req.extra,
12861299
}
12871300
}
@@ -1678,6 +1691,17 @@ fn normalize_tool_choice_for_openai_responses(tool_choice: &Value) -> Value {
16781691
}
16791692
}
16801693

1694+
fn normalize_claude_thinking_for_openai(thinking: Option<&Value>) -> Option<Value> {
1695+
let thinking = thinking?.as_object()?;
1696+
match thinking.get("type").and_then(Value::as_str) {
1697+
Some("enabled") => {
1698+
let budget_tokens = thinking.get("budget_tokens").and_then(Value::as_i64)?;
1699+
Some(json!({"max_tokens": budget_tokens}))
1700+
}
1701+
_ => None,
1702+
}
1703+
}
1704+
16811705
fn normalize_extra_for_openai_responses(extra: &Map<String, Value>) -> Map<String, Value> {
16821706
if extra.is_empty() {
16831707
return Map::new();
@@ -1934,4 +1958,54 @@ mod tests {
19341958
other => panic!("unexpected error: {other:?}"),
19351959
}
19361960
}
1961+
1962+
#[test]
1963+
fn transforms_claude_thinking_into_openai_chat_reasoning() {
1964+
let config = json!({
1965+
"format_transform": {
1966+
"enabled": true,
1967+
"from": "claude_chat",
1968+
"to": "openai_chat"
1969+
}
1970+
});
1971+
let body = json!({
1972+
"model": "gpt-4.1-mini",
1973+
"thinking": {
1974+
"type": "enabled",
1975+
"budget_tokens": 2048
1976+
},
1977+
"messages": [{"role": "user", "content": "Hi"}]
1978+
});
1979+
1980+
let plan = process_request(&config, "/v1/messages", &[], body).expect("request should transform");
1981+
1982+
assert_eq!(plan.target_format, Some(RequestFormat::OpenAiChat));
1983+
assert_eq!(plan.body.get("thinking"), None);
1984+
assert_eq!(plan.body.get("reasoning"), Some(&json!({"max_tokens": 2048})));
1985+
}
1986+
1987+
#[test]
1988+
fn transforms_claude_thinking_into_openai_responses_reasoning() {
1989+
let config = json!({
1990+
"format_transform": {
1991+
"enabled": true,
1992+
"from": "claude_chat",
1993+
"to": "openai_responses"
1994+
}
1995+
});
1996+
let body = json!({
1997+
"model": "gpt-4.1-mini",
1998+
"thinking": {
1999+
"type": "enabled",
2000+
"budget_tokens": 1024
2001+
},
2002+
"messages": [{"role": "user", "content": "Hi"}]
2003+
});
2004+
2005+
let plan = process_request(&config, "/v1/messages", &[], body).expect("request should transform");
2006+
2007+
assert_eq!(plan.target_format, Some(RequestFormat::OpenAiResponses));
2008+
assert_eq!(plan.body.get("thinking"), None);
2009+
assert_eq!(plan.body.get("reasoning"), Some(&json!({"max_tokens": 1024})));
2010+
}
19372011
}

src/proxy.rs

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,10 @@ async fn proxy_entry_with_cfg(
108108
}
109109
}
110110
})?;
111+
let estimated_prompt_tokens = estimate_prompt_tokens_for_stream(
112+
request_plan.target_format.or(request_plan.source_format),
113+
&request_plan.body,
114+
);
111115
let basic_mod_cfg = parsed
112116
.config
113117
.get("basic_moderation")
@@ -271,6 +275,7 @@ async fn proxy_entry_with_cfg(
271275
request_plan.target_format,
272276
delay_stream_header,
273277
is_stream,
278+
estimated_prompt_tokens,
274279
&moderation_debug,
275280
)
276281
.await
@@ -435,6 +440,7 @@ async fn build_proxy_response(
435440
upstream_format: Option<crate::format::RequestFormat>,
436441
delay_stream_header: bool,
437442
request_expects_stream: bool,
443+
estimated_prompt_tokens: Option<i64>,
438444
moderation_debug: &HeaderMap,
439445
) -> Result<Response, ApiError> {
440446
let headers = filtered_response_headers(upstream_response.headers());
@@ -446,6 +452,7 @@ async fn build_proxy_response(
446452
upstream_format,
447453
delay_stream_header,
448454
header_says_stream,
455+
estimated_prompt_tokens,
449456
&headers,
450457
moderation_debug,
451458
)
@@ -471,6 +478,7 @@ async fn build_streaming_proxy_response(
471478
upstream_format: Option<crate::format::RequestFormat>,
472479
delay_stream_header: bool,
473480
header_says_stream: bool,
481+
estimated_prompt_tokens: Option<i64>,
474482
headers: &HeaderMap,
475483
moderation_debug: &HeaderMap,
476484
) -> Result<Response, ApiError> {
@@ -568,6 +576,7 @@ async fn build_streaming_proxy_response(
568576
upstream,
569577
upstream_format.expect("upstream format for transformed stream"),
570578
client_format.expect("client format for transformed stream"),
579+
estimated_prompt_tokens,
571580
)
572581
} else {
573582
build_passthrough_stream_body(buffered, upstream)
@@ -708,6 +717,7 @@ fn build_transformed_stream_body(
708717
upstream: ReqByteStream,
709718
from_format: crate::format::RequestFormat,
710719
to_format: crate::format::RequestFormat,
720+
estimated_prompt_tokens: Option<i64>,
711721
) -> BoxBody {
712722
struct TransformState {
713723
buffered: VecDeque<Bytes>,
@@ -720,7 +730,7 @@ fn build_transformed_stream_body(
720730
let state = TransformState {
721731
buffered: VecDeque::from(buffered),
722732
upstream,
723-
transcoder: StreamTranscoder::new(from_format, to_format),
733+
transcoder: StreamTranscoder::new(from_format, to_format, estimated_prompt_tokens),
724734
ready: VecDeque::new(),
725735
flushed: false,
726736
};
@@ -768,6 +778,38 @@ fn build_transformed_stream_body(
768778
boxed(Body::wrap_stream(stream))
769779
}
770780

781+
fn estimate_prompt_tokens_for_stream(
782+
format: Option<crate::format::RequestFormat>,
783+
body: &Value,
784+
) -> Option<i64> {
785+
let format = format?;
786+
let request_format = format.as_str();
787+
let text = extract::extract_text_for_moderation(body, request_format);
788+
estimate_tokens_from_text(&text)
789+
}
790+
791+
fn estimate_tokens_from_text(text: &str) -> Option<i64> {
792+
let trimmed = text.trim();
793+
if trimmed.is_empty() {
794+
return None;
795+
}
796+
797+
let mut estimate = 0_i64;
798+
for ch in trimmed.chars() {
799+
if ch.is_ascii_whitespace() {
800+
continue;
801+
}
802+
if ch.is_ascii() {
803+
estimate += 1;
804+
} else {
805+
estimate += 2;
806+
}
807+
}
808+
809+
let estimated_tokens = ((estimate + 3) / 4).max(1);
810+
Some(estimated_tokens)
811+
}
812+
771813
async fn collect_stream_bytes(
772814
buffered: Vec<Bytes>,
773815
mut upstream: ReqByteStream,

src/streaming.rs

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ pub fn maybe_transform_sse(
1616
return None;
1717
}
1818

19-
let mut transcoder = StreamTranscoder::new(from_format, to_format);
19+
let mut transcoder = StreamTranscoder::new(from_format, to_format, None);
2020
let mut out = transcoder.feed_chunk(raw);
2121
out.extend(transcoder.flush());
2222
Some(out)
@@ -71,11 +71,26 @@ pub struct StreamTranscoder {
7171
}
7272

7373
impl StreamTranscoder {
74-
pub fn new(from_format: RequestFormat, to_format: RequestFormat) -> Self {
74+
pub fn new(
75+
from_format: RequestFormat,
76+
to_format: RequestFormat,
77+
estimated_prompt_tokens: Option<i64>,
78+
) -> Self {
79+
let mut meta = Map::new();
80+
if let Some(tokens) = estimated_prompt_tokens.filter(|tokens| *tokens > 0) {
81+
meta.insert(
82+
"usage".to_string(),
83+
json!({
84+
"prompt_tokens": tokens,
85+
"completion_tokens": 0,
86+
"total_tokens": tokens
87+
}),
88+
);
89+
}
7590
Self {
7691
from_format,
7792
sink: create_sink(to_format),
78-
meta: Map::new(),
93+
meta,
7994
started: false,
8095
seen_tool_calls: HashMap::new(),
8196
pending: Vec::new(),
@@ -491,14 +506,19 @@ impl InternalSink for ClaudeSink {
491506
return Vec::new();
492507
}
493508
self.started = true;
509+
let usage = meta
510+
.get("usage")
511+
.and_then(chat_usage_to_claude_stream_usage)
512+
.unwrap_or_else(|| json!({"input_tokens": 0, "output_tokens": 0}));
494513
vec![encode_json_sse_with_event(
495514
&json!({
496515
"type": "message_start",
497516
"message": {
498517
"id": self.id,
499518
"model": self.model,
500519
"role": "assistant",
501-
"content": []
520+
"content": [],
521+
"usage": usage
502522
}
503523
}),
504524
"message_start",
@@ -559,9 +579,8 @@ impl InternalSink for ClaudeSink {
559579
_ => "end_turn",
560580
};
561581
let usage_obj = usage
562-
.and_then(|usage| usage.get("output_tokens").cloned())
563-
.map(|output_tokens| json!({"output_tokens": output_tokens}))
564-
.unwrap_or_else(|| json!({"output_tokens": 0}));
582+
.and_then(chat_usage_to_claude_stream_usage)
583+
.unwrap_or_else(|| json!({"input_tokens": 0, "output_tokens": 0}));
565584
vec![
566585
encode_json_sse_with_event(
567586
&json!({
@@ -769,6 +788,9 @@ fn decode_openai_chat(
769788
meta.entry("created".to_string())
770789
.or_insert_with(|| json!(now_timestamp()));
771790
}
791+
if let Some(usage) = event.get("usage").cloned() {
792+
meta.insert("usage".to_string(), usage);
793+
}
772794

773795
let mut out = vec![InternalEvent::Start { meta: meta.clone() }];
774796
let delta = choice.get("delta").and_then(Value::as_object);
@@ -1280,6 +1302,17 @@ fn chat_usage_to_responses_usage(usage: &Value) -> Option<Value> {
12801302
}))
12811303
}
12821304

1305+
fn chat_usage_to_claude_stream_usage(usage: &Value) -> Option<Value> {
1306+
let usage = usage.as_object()?;
1307+
let prompt_details = usage.get("prompt_tokens_details").and_then(Value::as_object);
1308+
Some(json!({
1309+
"input_tokens": usage.get("prompt_tokens").cloned().unwrap_or_else(|| usage.get("input_tokens").cloned().unwrap_or_else(|| json!(0))),
1310+
"output_tokens": usage.get("completion_tokens").cloned().unwrap_or_else(|| usage.get("output_tokens").cloned().unwrap_or_else(|| json!(0))),
1311+
"cache_creation_input_tokens": prompt_details.and_then(|details| details.get("cached_creation_tokens").cloned()).unwrap_or_else(|| json!(0)),
1312+
"cache_read_input_tokens": prompt_details.and_then(|details| details.get("cached_tokens").cloned()).unwrap_or_else(|| json!(0)),
1313+
}))
1314+
}
1315+
12831316
fn push_string_array_value(map: &mut Map<String, Value>, key: &str, value: String) {
12841317
if key.is_empty() {
12851318
return;

0 commit comments

Comments
 (0)