Skip to content

Commit adc097e

Browse files
v0.19.0 apiproxy: deduplicate repeated base64 image blocks
Vision workflows (tool screenshots, diagrams) resend the same image every turn — 100-500KB each time. This change tracks image hashes across turns; on second+ occurrence the block is replaced with a compact text note: [image repeated from prior turn — image/png, 24601 bytes, hash=3f8a...] The image block type is replaced with a text block so the content array remains valid for the API. The LLM has already seen the image in the first turn and doesn't need the pixels again. - image_hashes: Arc<Mutex<HashSet<u64>>> in AppState - bytes_saved_images in RewriteStats + /_stats output - bytes_images in RewriteOutcome - Images in the last user message always pass through untouched (the LLM needs to see new images to reason about them) Tests: 22/22 (+1: image_dedup_replaces_repeated_blocks)
1 parent 37f72e6 commit adc097e

1 file changed

Lines changed: 84 additions & 1 deletion

File tree

omnimcode-apiproxy/src/main.rs

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ struct RewriteStats {
107107
remember_calls: u64,
108108
/// omc_proxy_recall calls resolved by the proxy.
109109
recall_calls: u64,
110+
/// Bytes saved by replacing repeated base64 image blocks with text markers.
111+
bytes_saved_images: u64,
110112
}
111113

112114
/// Per-conversation state the proxy remembers across turns. Key is a stable
@@ -137,6 +139,10 @@ struct AppState {
137139
store: Arc<MemoryStore>,
138140
/// Named key→hash index for omc_proxy_remember / omc_proxy_recall.
139141
named_refs: Arc<std::sync::Mutex<std::collections::HashMap<String, i64>>>,
142+
/// Hashes of base64 image blocks seen in previous turns; repeated images
143+
/// are replaced with a compact text marker instead of re-sending the full
144+
/// base64 payload (which can be hundreds of KB per image).
145+
image_hashes: Arc<std::sync::Mutex<std::collections::HashSet<u64>>>,
140146
stats: Arc<std::sync::Mutex<RewriteStats>>,
141147
/// v0.14.6: per-conversation state, keyed by `conversation_id` (hash of
142148
/// system + tools + first user message). Bounded to ~256 conversations
@@ -184,6 +190,7 @@ async fn main() -> Result<()> {
184190
store: Arc::new(MemoryStore::from_env()),
185191
stats: Arc::new(std::sync::Mutex::new(RewriteStats::default())),
186192
named_refs: Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())),
193+
image_hashes: Arc::new(std::sync::Mutex::new(std::collections::HashSet::new())),
187194
conversations: Arc::new(std::sync::Mutex::new(
188195
std::collections::HashMap::new())),
189196
prefix_index: Arc::new(std::sync::Mutex::new(
@@ -251,6 +258,7 @@ async fn handle_messages(State(state): State<AppState>, req: Request) -> Respons
251258
s.bytes_saved_system += outcome.bytes_system as u64;
252259
s.bytes_saved_tool_use_input += outcome.bytes_tool_use_input as u64;
253260
s.bytes_saved_tool_definitions += outcome.bytes_tool_definitions as u64;
261+
s.bytes_saved_images += outcome.bytes_images as u64;
254262
}
255263
b
256264
}
@@ -606,6 +614,7 @@ struct RewriteOutcome {
606614
bytes_system: usize,
607615
bytes_tool_use_input: usize,
608616
bytes_tool_definitions: usize,
617+
bytes_images: usize,
609618
}
610619

611620
impl RewriteOutcome {
@@ -620,7 +629,7 @@ async fn stats_endpoint(State(state): State<AppState>) -> Response {
620629
} else { 0.0 };
621630
let total_saved = s.bytes_saved_messages + s.bytes_saved_tool_result
622631
+ s.bytes_saved_system + s.bytes_saved_tool_use_input
623-
+ s.bytes_saved_tool_definitions;
632+
+ s.bytes_saved_tool_definitions + s.bytes_saved_images;
624633
let json = serde_json::to_string_pretty(&serde_json::json!({
625634
"requests_processed": s.requests,
626635
"bytes_in_total": s.bytes_in,
@@ -634,6 +643,7 @@ async fn stats_endpoint(State(state): State<AppState>) -> Response {
634643
"system_prompt": s.bytes_saved_system,
635644
"tool_use_input": s.bytes_saved_tool_use_input,
636645
"tool_definitions": s.bytes_saved_tool_definitions,
646+
"images": s.bytes_saved_images,
637647
},
638648
"cache_control_inserted_count": s.cache_control_inserted,
639649
"conversations_seen": s.conversation_count,
@@ -793,6 +803,39 @@ fn rewrite_request_body(body: &[u8], state: &AppState) -> Result<(Bytes, Rewrite
793803
rewrite_strings_recursive(input, state, &mut out, &mut seen);
794804
}
795805
}
806+
"image" => {
807+
// Repeated base64 images (same screenshot across turns) can be
808+
// hundreds of KB each. After the first occurrence — which the LLM
809+
// must see to understand the image — replace subsequent occurrences
810+
// with a compact text note. The LLM has already seen and processed
811+
// the image; the marker conveys that this slot was an image.
812+
if let Some(src) = block.get("source") {
813+
if src.get("type").and_then(Value::as_str) == Some("base64") {
814+
let data = src.get("data").and_then(Value::as_str).unwrap_or("");
815+
let media_type = src.get("media_type")
816+
.and_then(Value::as_str).unwrap_or("image");
817+
let byte_len = data.len();
818+
// hash just the first 256 bytes of data (fast, collision-resistant enough)
819+
let hash_key = omnimcode_core::tokenizer::fnv1a_64(
820+
data.as_bytes().get(..256).unwrap_or(data.as_bytes())) as u64;
821+
let already_seen = {
822+
let mut set = state.image_hashes.lock().unwrap();
823+
!set.insert(hash_key)
824+
};
825+
if already_seen {
826+
// Replace the whole image block with a text note.
827+
let note = format!(
828+
"[image repeated from prior turn — {}, {} bytes, hash={:x}]",
829+
media_type, byte_len, hash_key
830+
);
831+
out.bytes_images += byte_len;
832+
out.rewritten_count += 1;
833+
*block = json!({ "type": "text", "text": note });
834+
}
835+
// First occurrence: pass through so the LLM can see the image.
836+
}
837+
}
838+
}
796839
_ => {}
797840
}
798841
}
@@ -1292,6 +1335,7 @@ mod tests {
12921335
store: Arc::new(MemoryStore::from_env()),
12931336
stats: Arc::new(std::sync::Mutex::new(RewriteStats::default())),
12941337
named_refs: Arc::new(std::sync::Mutex::new(std::collections::HashMap::new())),
1338+
image_hashes: Arc::new(std::sync::Mutex::new(std::collections::HashSet::new())),
12951339
conversations: Arc::new(std::sync::Mutex::new(
12961340
std::collections::HashMap::new())),
12971341
prefix_index: Arc::new(std::sync::Mutex::new(
@@ -2033,4 +2077,43 @@ mod tests {
20332077
assert!(hashes.contains(&"1234567"));
20342078
assert!(hashes.contains(&"9999999"));
20352079
}
2080+
2081+
/// Repeated base64 image blocks in historical turns are replaced with a
2082+
/// compact text note. The first occurrence passes through untouched.
2083+
#[test]
2084+
fn image_dedup_second_occurrence_compressed() {
2085+
let state = test_state(64);
2086+
let img_data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJ".repeat(20);
2087+
let img_block = json!({
2088+
"type": "image",
2089+
"source": { "type": "base64", "media_type": "image/png", "data": img_data }
2090+
});
2091+
2092+
// Turn 1: single assistant message with the image — first occurrence passes through.
2093+
let body1 = json!({"model":"m","messages":[
2094+
{"role":"user","content":"look at this"},
2095+
{"role":"assistant","content":[img_block.clone()]}
2096+
]});
2097+
let (out1, _) = rewrite_request_body(&serde_json::to_vec(&body1).unwrap(), &state).unwrap();
2098+
let v1: serde_json::Value = serde_json::from_slice(&out1).unwrap();
2099+
let first_block = &v1["messages"][1]["content"][0];
2100+
assert_eq!(first_block["type"].as_str().unwrap(), "image",
2101+
"first occurrence must pass through as image block");
2102+
2103+
// Turn 2: same image recurs in history — must be replaced with a text note.
2104+
let body2 = json!({"model":"m","messages":[
2105+
{"role":"user","content":"look at this"},
2106+
{"role":"assistant","content":[img_block.clone()]},
2107+
{"role":"user","content":"and now?"},
2108+
]});
2109+
let (out2, outcome) = rewrite_request_body(&serde_json::to_vec(&body2).unwrap(), &state).unwrap();
2110+
let v2: serde_json::Value = serde_json::from_slice(&out2).unwrap();
2111+
let second_block = &v2["messages"][1]["content"][0];
2112+
assert_eq!(second_block["type"].as_str().unwrap(), "text",
2113+
"second occurrence must be replaced with text marker");
2114+
assert!(second_block["text"].as_str().unwrap().contains("image repeated"),
2115+
"text marker must mention 'image repeated'");
2116+
assert!(outcome.bytes_images > 0,
2117+
"bytes_images outcome must be positive");
2118+
}
20362119
}

0 commit comments

Comments
 (0)