Skip to content

Commit 251895e

Browse files
v0.16.0 apiproxy: compress tool_results in last user message
Previously the entire last user message was skipped for compression, which meant every tool_result block in agentic turns (bash output, file reads, web fetches — often megabytes) passed through uncompressed. New behaviour: - text blocks in the last user message: still verbatim (the human's current question must reach the LLM unaltered) - tool_result blocks in the last user message: compressed like any other tool_result; the LLM can expand via omc_proxy_expand_ref if it needs the full content This is the largest single remaining compression gap for Claude Code sessions, where the last message is almost always an array of tool_result blocks from bash/read_file/web_fetch calls. Tests: 16/16 green (+1 new: last_user_tool_results_are_compressed)
1 parent 61335e7 commit 251895e

1 file changed

Lines changed: 74 additions & 2 deletions

File tree

omnimcode-apiproxy/src/main.rs

Lines changed: 74 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -614,8 +614,28 @@ fn rewrite_request_body(body: &[u8], state: &AppState) -> Result<(Bytes, Rewrite
614614
.map(|(i, _)| i);
615615

616616
for (idx, msg) in messages.iter_mut().enumerate() {
617-
if Some(idx) == last_user_idx { continue; }
617+
let is_last_user = Some(idx) == last_user_idx;
618618
let Some(content) = msg.get_mut("content") else { continue };
619+
620+
if is_last_user {
621+
// The last user message contains the human's current question — we
622+
// never rewrite text blocks there so the LLM sees it verbatim.
623+
// However, tool_result blocks in the same message (bash output, file
624+
// reads, etc.) can be megabytes of already-executed output that the
625+
// LLM doesn't need to re-read in full to answer. Compress those.
626+
if let Value::Array(blocks) = content {
627+
for block in blocks.iter_mut() {
628+
if block.get("type").and_then(Value::as_str) == Some("tool_result") {
629+
if let Some(inner) = block.get_mut("content") {
630+
rewrite_tool_result_content(inner, state, &mut out, &mut seen);
631+
}
632+
}
633+
// text blocks in the last user message: pass through verbatim.
634+
}
635+
}
636+
// String content (plain question): skip entirely.
637+
continue;
638+
}
619639
match content {
620640
Value::String(s) => {
621641
if s.len() >= state.rewrite_threshold {
@@ -1147,6 +1167,8 @@ mod tests {
11471167
/// what was asked.
11481168
#[test]
11491169
fn last_user_message_never_rewritten() {
1170+
// Text content (the human's actual question) in the last user message
1171+
// must always pass through verbatim — the LLM needs to see it to respond.
11501172
let state = test_state(256);
11511173
let big_question = "Please analyze: ".to_string() + &"Q".repeat(1000);
11521174
let req = json!({
@@ -1162,7 +1184,57 @@ mod tests {
11621184
let v: Value = serde_json::from_slice(&out).unwrap();
11631185
let last = v["messages"][2]["content"].as_str().unwrap();
11641186
assert_eq!(last, big_question,
1165-
"last user message must be byte-identical to input");
1187+
"last user message text content must be byte-identical to input");
1188+
}
1189+
1190+
/// tool_result blocks in the last user message ARE compressed even though
1191+
/// the wrapping message is "last user". In agentic workflows the last
1192+
/// message is nearly always an array of tool_results (bash output, file
1193+
/// reads, …) — often megabytes — and the LLM can expand via
1194+
/// omc_proxy_expand_ref if it needs the full content.
1195+
#[test]
1196+
fn last_user_tool_results_are_compressed() {
1197+
let threshold = 256;
1198+
let state = test_state(threshold);
1199+
1200+
// A large tool_result body — clearly above threshold.
1201+
let big_output = "line: data output\n".repeat(100); // ~1.8KB
1202+
assert!(big_output.len() > threshold, "pre-condition: must exceed threshold");
1203+
1204+
let req = json!({
1205+
"model": "test", "max_tokens": 10,
1206+
"messages": [
1207+
{"role": "user", "content": "run the script"},
1208+
{"role": "assistant", "content": [
1209+
{"type": "tool_use", "id": "tu_1", "name": "bash",
1210+
"input": {"command": "echo hello"}}
1211+
]},
1212+
{"role": "user", "content": [
1213+
{"type": "tool_result", "tool_use_id": "tu_1",
1214+
"content": big_output.clone()},
1215+
{"type": "text", "text": "What does this mean?"}
1216+
]}
1217+
]
1218+
});
1219+
1220+
let body = serde_json::to_vec(&req).unwrap();
1221+
let (out, outcome) = rewrite_request_body(&body, &state).unwrap();
1222+
let v: Value = serde_json::from_slice(&out).unwrap();
1223+
1224+
// tool_result block must have been rewritten to a marker
1225+
let last_msg_content = v["messages"][2]["content"].as_array().unwrap();
1226+
let tool_result_block = &last_msg_content[0];
1227+
let tr_content = tool_result_block["content"].as_str().unwrap();
1228+
assert!(tr_content.starts_with("<omc:ref"),
1229+
"tool_result in last user message must be compressed; got: {}", &tr_content[..80]);
1230+
1231+
// text block must pass through verbatim
1232+
let text_block = &last_msg_content[1];
1233+
assert_eq!(text_block["text"].as_str().unwrap(), "What does this mean?",
1234+
"text block in last user message must be verbatim");
1235+
1236+
// stat must have ticked
1237+
assert!(outcome.any(), "rewrite outcome must report at least one rewritten block");
11661238
}
11671239

11681240
/// Marker round-trip: any text we compress must come back IDENTICAL via

0 commit comments

Comments
 (0)