@@ -107,6 +107,8 @@ struct RewriteStats {
107107 remember_calls : u64 ,
108108 /// omc_proxy_recall calls resolved by the proxy.
109109 recall_calls : u64 ,
110+ /// Bytes saved by replacing repeated base64 image blocks with text markers.
111+ bytes_saved_images : u64 ,
110112}
111113
112114/// Per-conversation state the proxy remembers across turns. Key is a stable
@@ -137,6 +139,10 @@ struct AppState {
137139 store : Arc < MemoryStore > ,
138140 /// Named key→hash index for omc_proxy_remember / omc_proxy_recall.
139141 named_refs : Arc < std:: sync:: Mutex < std:: collections:: HashMap < String , i64 > > > ,
142+ /// Hashes of base64 image blocks seen in previous turns; repeated images
143+ /// are replaced with a compact text marker instead of re-sending the full
144+ /// base64 payload (which can be hundreds of KB per image).
145+ image_hashes : Arc < std:: sync:: Mutex < std:: collections:: HashSet < u64 > > > ,
140146 stats : Arc < std:: sync:: Mutex < RewriteStats > > ,
141147 /// v0.14.6: per-conversation state, keyed by `conversation_id` (hash of
142148 /// system + tools + first user message). Bounded to ~256 conversations
@@ -184,6 +190,7 @@ async fn main() -> Result<()> {
184190 store : Arc :: new ( MemoryStore :: from_env ( ) ) ,
185191 stats : Arc :: new ( std:: sync:: Mutex :: new ( RewriteStats :: default ( ) ) ) ,
186192 named_refs : Arc :: new ( std:: sync:: Mutex :: new ( std:: collections:: HashMap :: new ( ) ) ) ,
193+ image_hashes : Arc :: new ( std:: sync:: Mutex :: new ( std:: collections:: HashSet :: new ( ) ) ) ,
187194 conversations : Arc :: new ( std:: sync:: Mutex :: new (
188195 std:: collections:: HashMap :: new ( ) ) ) ,
189196 prefix_index : Arc :: new ( std:: sync:: Mutex :: new (
@@ -251,6 +258,7 @@ async fn handle_messages(State(state): State<AppState>, req: Request) -> Respons
251258 s. bytes_saved_system += outcome. bytes_system as u64 ;
252259 s. bytes_saved_tool_use_input += outcome. bytes_tool_use_input as u64 ;
253260 s. bytes_saved_tool_definitions += outcome. bytes_tool_definitions as u64 ;
261+ s. bytes_saved_images += outcome. bytes_images as u64 ;
254262 }
255263 b
256264 }
@@ -606,6 +614,7 @@ struct RewriteOutcome {
606614 bytes_system : usize ,
607615 bytes_tool_use_input : usize ,
608616 bytes_tool_definitions : usize ,
617+ bytes_images : usize ,
609618}
610619
611620impl RewriteOutcome {
@@ -620,7 +629,7 @@ async fn stats_endpoint(State(state): State<AppState>) -> Response {
620629 } else { 0.0 } ;
621630 let total_saved = s. bytes_saved_messages + s. bytes_saved_tool_result
622631 + s. bytes_saved_system + s. bytes_saved_tool_use_input
623- + s. bytes_saved_tool_definitions ;
632+ + s. bytes_saved_tool_definitions + s . bytes_saved_images ;
624633 let json = serde_json:: to_string_pretty ( & serde_json:: json!( {
625634 "requests_processed" : s. requests,
626635 "bytes_in_total" : s. bytes_in,
@@ -634,6 +643,7 @@ async fn stats_endpoint(State(state): State<AppState>) -> Response {
634643 "system_prompt" : s. bytes_saved_system,
635644 "tool_use_input" : s. bytes_saved_tool_use_input,
636645 "tool_definitions" : s. bytes_saved_tool_definitions,
646+ "images" : s. bytes_saved_images,
637647 } ,
638648 "cache_control_inserted_count" : s. cache_control_inserted,
639649 "conversations_seen" : s. conversation_count,
@@ -793,6 +803,39 @@ fn rewrite_request_body(body: &[u8], state: &AppState) -> Result<(Bytes, Rewrite
793803 rewrite_strings_recursive ( input, state, & mut out, & mut seen) ;
794804 }
795805 }
806+ "image" => {
807+ // Repeated base64 images (same screenshot across turns) can be
808+ // hundreds of KB each. After the first occurrence — which the LLM
809+ // must see to understand the image — replace subsequent occurrences
810+ // with a compact text note. The LLM has already seen and processed
811+ // the image; the marker conveys that this slot was an image.
812+ if let Some ( src) = block. get ( "source" ) {
813+ if src. get ( "type" ) . and_then ( Value :: as_str) == Some ( "base64" ) {
814+ let data = src. get ( "data" ) . and_then ( Value :: as_str) . unwrap_or ( "" ) ;
815+ let media_type = src. get ( "media_type" )
816+ . and_then ( Value :: as_str) . unwrap_or ( "image" ) ;
817+ let byte_len = data. len ( ) ;
818+ // hash just the first 256 bytes of data (fast, collision-resistant enough)
819+ let hash_key = omnimcode_core:: tokenizer:: fnv1a_64 (
820+ data. as_bytes ( ) . get ( ..256 ) . unwrap_or ( data. as_bytes ( ) ) ) as u64 ;
821+ let already_seen = {
822+ let mut set = state. image_hashes . lock ( ) . unwrap ( ) ;
823+ !set. insert ( hash_key)
824+ } ;
825+ if already_seen {
826+ // Replace the whole image block with a text note.
827+ let note = format ! (
828+ "[image repeated from prior turn — {}, {} bytes, hash={:x}]" ,
829+ media_type, byte_len, hash_key
830+ ) ;
831+ out. bytes_images += byte_len;
832+ out. rewritten_count += 1 ;
833+ * block = json ! ( { "type" : "text" , "text" : note } ) ;
834+ }
835+ // First occurrence: pass through so the LLM can see the image.
836+ }
837+ }
838+ }
796839 _ => { }
797840 }
798841 }
@@ -1292,6 +1335,7 @@ mod tests {
12921335 store : Arc :: new ( MemoryStore :: from_env ( ) ) ,
12931336 stats : Arc :: new ( std:: sync:: Mutex :: new ( RewriteStats :: default ( ) ) ) ,
12941337 named_refs : Arc :: new ( std:: sync:: Mutex :: new ( std:: collections:: HashMap :: new ( ) ) ) ,
1338+ image_hashes : Arc :: new ( std:: sync:: Mutex :: new ( std:: collections:: HashSet :: new ( ) ) ) ,
12951339 conversations : Arc :: new ( std:: sync:: Mutex :: new (
12961340 std:: collections:: HashMap :: new ( ) ) ) ,
12971341 prefix_index : Arc :: new ( std:: sync:: Mutex :: new (
@@ -2033,4 +2077,43 @@ mod tests {
20332077 assert ! ( hashes. contains( & "1234567" ) ) ;
20342078 assert ! ( hashes. contains( & "9999999" ) ) ;
20352079 }
2080+
2081+ /// Repeated base64 image blocks in historical turns are replaced with a
2082+ /// compact text note. The first occurrence passes through untouched.
2083+ #[ test]
2084+ fn image_dedup_second_occurrence_compressed ( ) {
2085+ let state = test_state ( 64 ) ;
2086+ let img_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJ" . repeat ( 20 ) ;
2087+ let img_block = json ! ( {
2088+ "type" : "image" ,
2089+ "source" : { "type" : "base64" , "media_type" : "image/png" , "data" : img_data }
2090+ } ) ;
2091+
2092+ // Turn 1: single assistant message with the image — first occurrence passes through.
2093+ let body1 = json ! ( { "model" : "m" , "messages" : [
2094+ { "role" : "user" , "content" : "look at this" } ,
2095+ { "role" : "assistant" , "content" : [ img_block. clone( ) ] }
2096+ ] } ) ;
2097+ let ( out1, _) = rewrite_request_body ( & serde_json:: to_vec ( & body1) . unwrap ( ) , & state) . unwrap ( ) ;
2098+ let v1: serde_json:: Value = serde_json:: from_slice ( & out1) . unwrap ( ) ;
2099+ let first_block = & v1[ "messages" ] [ 1 ] [ "content" ] [ 0 ] ;
2100+ assert_eq ! ( first_block[ "type" ] . as_str( ) . unwrap( ) , "image" ,
2101+ "first occurrence must pass through as image block" ) ;
2102+
2103+ // Turn 2: same image recurs in history — must be replaced with a text note.
2104+ let body2 = json ! ( { "model" : "m" , "messages" : [
2105+ { "role" : "user" , "content" : "look at this" } ,
2106+ { "role" : "assistant" , "content" : [ img_block. clone( ) ] } ,
2107+ { "role" : "user" , "content" : "and now?" } ,
2108+ ] } ) ;
2109+ let ( out2, outcome) = rewrite_request_body ( & serde_json:: to_vec ( & body2) . unwrap ( ) , & state) . unwrap ( ) ;
2110+ let v2: serde_json:: Value = serde_json:: from_slice ( & out2) . unwrap ( ) ;
2111+ let second_block = & v2[ "messages" ] [ 1 ] [ "content" ] [ 0 ] ;
2112+ assert_eq ! ( second_block[ "type" ] . as_str( ) . unwrap( ) , "text" ,
2113+ "second occurrence must be replaced with text marker" ) ;
2114+ assert ! ( second_block[ "text" ] . as_str( ) . unwrap( ) . contains( "image repeated" ) ,
2115+ "text marker must mention 'image repeated'" ) ;
2116+ assert ! ( outcome. bytes_images > 0 ,
2117+ "bytes_images outcome must be positive" ) ;
2118+ }
20362119}
0 commit comments