@@ -614,8 +614,28 @@ fn rewrite_request_body(body: &[u8], state: &AppState) -> Result<(Bytes, Rewrite
614614 . map ( |( i, _) | i) ;
615615
616616 for ( idx, msg) in messages. iter_mut ( ) . enumerate ( ) {
617- if Some ( idx) == last_user_idx { continue ; }
617+ let is_last_user = Some ( idx) == last_user_idx;
618618 let Some ( content) = msg. get_mut ( "content" ) else { continue } ;
619+
620+ if is_last_user {
621+ // The last user message contains the human's current question — we
622+ // never rewrite text blocks there so the LLM sees it verbatim.
623+ // However, tool_result blocks in the same message (bash output, file
624+ // reads, etc.) can be megabytes of already-executed output that the
625+ // LLM doesn't need to re-read in full to answer. Compress those.
626+ if let Value :: Array ( blocks) = content {
627+ for block in blocks. iter_mut ( ) {
628+ if block. get ( "type" ) . and_then ( Value :: as_str) == Some ( "tool_result" ) {
629+ if let Some ( inner) = block. get_mut ( "content" ) {
630+ rewrite_tool_result_content ( inner, state, & mut out, & mut seen) ;
631+ }
632+ }
633+ // text blocks in the last user message: pass through verbatim.
634+ }
635+ }
636+ // String content (plain question): skip entirely.
637+ continue ;
638+ }
619639 match content {
620640 Value :: String ( s) => {
621641 if s. len ( ) >= state. rewrite_threshold {
@@ -1147,6 +1167,8 @@ mod tests {
11471167 /// what was asked.
11481168 #[ test]
11491169 fn last_user_message_never_rewritten ( ) {
1170+ // Text content (the human's actual question) in the last user message
1171+ // must always pass through verbatim — the LLM needs to see it to respond.
11501172 let state = test_state ( 256 ) ;
11511173 let big_question = "Please analyze: " . to_string ( ) + & "Q" . repeat ( 1000 ) ;
11521174 let req = json ! ( {
@@ -1162,7 +1184,57 @@ mod tests {
11621184 let v: Value = serde_json:: from_slice ( & out) . unwrap ( ) ;
11631185 let last = v[ "messages" ] [ 2 ] [ "content" ] . as_str ( ) . unwrap ( ) ;
11641186 assert_eq ! ( last, big_question,
1165- "last user message must be byte-identical to input" ) ;
1187+ "last user message text content must be byte-identical to input" ) ;
1188+ }
1189+
1190+ /// tool_result blocks in the last user message ARE compressed even though
1191+ /// the wrapping message is "last user". In agentic workflows the last
1192+ /// message is nearly always an array of tool_results (bash output, file
1193+ /// reads, …) — often megabytes — and the LLM can expand via
1194+ /// omc_proxy_expand_ref if it needs the full content.
1195+ #[ test]
1196+ fn last_user_tool_results_are_compressed ( ) {
1197+ let threshold = 256 ;
1198+ let state = test_state ( threshold) ;
1199+
1200+ // A large tool_result body — clearly above threshold.
1201+ let big_output = "line: data output\n " . repeat ( 100 ) ; // ~1.8KB
1202+ assert ! ( big_output. len( ) > threshold, "pre-condition: must exceed threshold" ) ;
1203+
1204+ let req = json ! ( {
1205+ "model" : "test" , "max_tokens" : 10 ,
1206+ "messages" : [
1207+ { "role" : "user" , "content" : "run the script" } ,
1208+ { "role" : "assistant" , "content" : [
1209+ { "type" : "tool_use" , "id" : "tu_1" , "name" : "bash" ,
1210+ "input" : { "command" : "echo hello" } }
1211+ ] } ,
1212+ { "role" : "user" , "content" : [
1213+ { "type" : "tool_result" , "tool_use_id" : "tu_1" ,
1214+ "content" : big_output. clone( ) } ,
1215+ { "type" : "text" , "text" : "What does this mean?" }
1216+ ] }
1217+ ]
1218+ } ) ;
1219+
1220+ let body = serde_json:: to_vec ( & req) . unwrap ( ) ;
1221+ let ( out, outcome) = rewrite_request_body ( & body, & state) . unwrap ( ) ;
1222+ let v: Value = serde_json:: from_slice ( & out) . unwrap ( ) ;
1223+
1224+ // tool_result block must have been rewritten to a marker
1225+ let last_msg_content = v[ "messages" ] [ 2 ] [ "content" ] . as_array ( ) . unwrap ( ) ;
1226+ let tool_result_block = & last_msg_content[ 0 ] ;
1227+ let tr_content = tool_result_block[ "content" ] . as_str ( ) . unwrap ( ) ;
1228+ assert ! ( tr_content. starts_with( "<omc:ref" ) ,
1229+ "tool_result in last user message must be compressed; got: {}" , & tr_content[ ..80 ] ) ;
1230+
1231+ // text block must pass through verbatim
1232+ let text_block = & last_msg_content[ 1 ] ;
1233+ assert_eq ! ( text_block[ "text" ] . as_str( ) . unwrap( ) , "What does this mean?" ,
1234+ "text block in last user message must be verbatim" ) ;
1235+
1236+ // stat must have ticked
1237+ assert ! ( outcome. any( ) , "rewrite outcome must report at least one rewritten block" ) ;
11661238 }
11671239
11681240 /// Marker round-trip: any text we compress must come back IDENTICAL via
0 commit comments