Skip to content

Commit ef3b29c

Browse files
v0.14.8 apiproxy: disk-side delta routing for near-edits (OMCD/Axis 5)
Originally planned as "differential markers on the wire" — built it, measured it, falsified it. Wire-side <omc:diff/> markers are LARGER than slim <omc:ref h="N" b="M"/> markers (~80 bytes vs ~50). So the diff-marker idea adds complexity for zero wire savings. Pivoted to disk-side delta routing using the existing Axis 5 (OMCD) primitive. When a body's first-256-byte prefix matches an indexed prior body, route the cache write through MemoryStore::store_as_delta which stores only the diff on disk (base_hash + prefix_len + suffix). Architecture: prefix_index: HashMap<u64, i64> maps fnv1a(first 256B) → content_hash. O(1) lookup. Bounded to 4096 entries with crude clear-on-cap eviction (MemoryStore is the source of truth, so a cleared index just means future near-edits fall back to plain store). This is a DISK savings, not wire savings. The /_stats endpoint exposes `delta_stores_attempted` so operators can confirm it's firing on edit-heavy workloads (Write file → Edit same file is the canonical trigger). Honest scoping note on the dropped "B" (adjacent marker collapse): also evaluated, also falsified. v0.14.7-L already collapses repeats to a 25-byte bare-ref form. Adding count-attribute collapse on top saves at most ~20 bytes × N copies — negligible unless we see 50+ adjacent copies, which doesn't happen in practice. Skipped. New test: near_edit_routes_through_delta_store. Total 12 tests, all green. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 5c6d6d9 commit ef3b29c

1 file changed

Lines changed: 126 additions & 3 deletions

File tree

omnimcode-apiproxy/src/main.rs

Lines changed: 126 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ struct RewriteStats {
8989
bytes_saved_tool_definitions: u64,
9090
cache_control_inserted: u64,
9191
conversation_count: u64,
92+
delta_stores_attempted: u64,
9293
}
9394

9495
/// Per-conversation state the proxy remembers across turns. Key is a stable
@@ -123,6 +124,14 @@ struct AppState {
123124
conversations: Arc<std::sync::Mutex<
124125
std::collections::HashMap<i64, ConversationState>
125126
>>,
127+
/// v0.14.8-I: prefix index for fast near-cache-hit lookup. Maps
128+
/// fnv1a(first 256 bytes of content) → content_hash. When a new block
129+
/// arrives, we check if its prefix matches anything indexed; if yes,
130+
/// we compare full text and might emit a differential marker.
131+
/// Bounded to ~4096 entries with LRU eviction.
132+
prefix_index: Arc<std::sync::Mutex<
133+
std::collections::HashMap<u64, i64>
134+
>>,
126135
}
127136

128137
#[tokio::main]
@@ -155,6 +164,8 @@ async fn main() -> Result<()> {
155164
stats: Arc::new(std::sync::Mutex::new(RewriteStats::default())),
156165
conversations: Arc::new(std::sync::Mutex::new(
157166
std::collections::HashMap::new())),
167+
prefix_index: Arc::new(std::sync::Mutex::new(
168+
std::collections::HashMap::new())),
158169
};
159170

160171
let app = Router::new()
@@ -483,7 +494,8 @@ async fn stats_endpoint(State(state): State<AppState>) -> Response {
483494
"tool_definitions": s.bytes_saved_tool_definitions,
484495
},
485496
"cache_control_inserted_count": s.cache_control_inserted,
486-
"conversations_seen": s.conversation_count
497+
"conversations_seen": s.conversation_count,
498+
"delta_stores_attempted": s.delta_stores_attempted
487499
})).unwrap();
488500
(StatusCode::OK,
489501
[(axum::http::header::CONTENT_TYPE, HeaderValue::from_static("application/json"))],
@@ -892,8 +904,23 @@ fn make_marker_with_dedup(
892904
text: &str, state: &AppState, kind: MarkerKind,
893905
seen_hashes: Option<&mut std::collections::HashSet<i64>>,
894906
) -> Result<String> {
895-
let hash = state.store.store(PROXY_CACHE_NAMESPACE, text)
896-
.map_err(anyhow::Error::msg)?;
907+
// v0.14.8-I: route cache writes through Axis 5 (OMCD delta) when we
908+
// detect a near-edit of a previously-cached body. The base-hash lookup
909+
// is O(1) via prefix_index. If a base is found, store_as_delta stores
910+
// a tiny delta on disk instead of duplicating the full body.
911+
//
912+
// IMPORTANT: this is a DISK-side optimization, not a wire-side one.
913+
// The wire marker is the same compact `<omc:ref h="..." b="N"/>` form.
914+
// We tried emitting `<omc:diff base="..." pre="N" suf="..."/>` markers
915+
// on the wire, but honest accounting showed they're LARGER than the
916+
// 50-byte slim ref marker the recall path already produces. So the win
917+
// is purely disk-resident: future store-side dedup, not request-time
918+
// bytes.
919+
let hash = try_delta_store(text, state)
920+
.or_else(|| state.store.store(PROXY_CACHE_NAMESPACE, text).ok())
921+
.ok_or_else(|| anyhow::anyhow!("cache write failed"))?;
922+
// Index this body's prefix so the NEXT near-edit can find it as base.
923+
if text.len() >= 1024 { register_prefix(text, hash, state); }
897924

898925
// v0.14.7-L: if we've already emitted a full marker for this hash this
899926
// request, the subsequent ones can be the bare-minimum form.
@@ -928,6 +955,44 @@ fn make_marker_with_dedup(
928955
}
929956
}
930957

958+
/// v0.14.8-I: index a body's first-256-byte prefix → content_hash so the next
959+
/// call can try a near-cache-hit lookup.
960+
fn register_prefix(text: &str, hash: i64, state: &AppState) {
961+
let prefix = &text.as_bytes()[..text.len().min(256)];
962+
let prefix_hash = omnimcode_core::tokenizer::fnv1a_64(prefix) as u64;
963+
let mut idx = state.prefix_index.lock().unwrap();
964+
if idx.len() > 4096 {
965+
// Crude eviction: clear when we hit the cap. Not LRU, but the
966+
// MemoryStore is the source of truth so a cleared index just means
967+
// future near-edits fall back to plain store (no data loss).
968+
idx.clear();
969+
}
970+
idx.insert(prefix_hash, hash);
971+
}
972+
973+
/// v0.14.8-I: try to store `text` as a delta against a prefix-near cached
974+
/// body. Returns `Some(hash_of_text)` if delta was viable, `None` otherwise.
975+
/// The hash returned is still the hash of the FULL text (so the marker / recall
976+
/// path is unchanged for the LLM).
977+
fn try_delta_store(text: &str, state: &AppState) -> Option<i64> {
978+
if text.len() < 1024 { return None; }
979+
let prefix = &text.as_bytes()[..text.len().min(256)];
980+
let prefix_hash = omnimcode_core::tokenizer::fnv1a_64(prefix) as u64;
981+
let base_hash = {
982+
let idx = state.prefix_index.lock().unwrap();
983+
*idx.get(&prefix_hash)?
984+
};
985+
// store_as_delta handles the "is the prefix actually long enough?" check
986+
// itself (need ≥64 bytes shared) and falls back to plain store if not.
987+
// Either way we get a valid content-hash for `text`.
988+
let result = state.store.store_as_delta(PROXY_CACHE_NAMESPACE, text, base_hash).ok()?;
989+
{
990+
let mut s = state.stats.lock().unwrap();
991+
s.delta_stores_attempted += 1;
992+
}
993+
Some(result)
994+
}
995+
931996
/// Add the omc_proxy_expand_ref tool to the request's tools array so the
932997
/// LLM has a way to retrieve full bytes for any marker it cares about.
933998
fn inject_expand_tool(req: &mut Value) {
@@ -987,6 +1052,8 @@ mod tests {
9871052
stats: Arc::new(std::sync::Mutex::new(RewriteStats::default())),
9881053
conversations: Arc::new(std::sync::Mutex::new(
9891054
std::collections::HashMap::new())),
1055+
prefix_index: Arc::new(std::sync::Mutex::new(
1056+
std::collections::HashMap::new())),
9901057
}
9911058
}
9921059

@@ -1382,6 +1449,62 @@ mod tests {
13821449
assert_eq!(extract_h(&m0), extract_h(&m2));
13831450
}
13841451

1452+
/// v0.14.8-I: when a content body is a near-edit of a previously-cached
1453+
/// body, the disk-side store should route through Axis 5 (OMCD delta).
1454+
/// We verify by checking that delta_stores_attempted ticks up AND that
1455+
/// recall still returns the correct full text byte-for-byte.
1456+
#[test]
1457+
fn near_edit_routes_through_delta_store() {
1458+
let state = test_state(256);
1459+
// Base body. Large enough to be eligible for prefix indexing.
1460+
let base = "Common prefix.\n".repeat(80); // ~1200 bytes
1461+
// First request stores `base`. No delta possible (nothing prior).
1462+
let req1 = json!({
1463+
"model": "test", "max_tokens": 10,
1464+
"messages": [
1465+
{"role": "assistant", "content": base.clone()},
1466+
{"role": "user", "content": "first"}
1467+
]
1468+
});
1469+
let _ = rewrite_request_body(&serde_json::to_vec(&req1).unwrap(), &state).unwrap();
1470+
let delta_attempts_before = state.stats.lock().unwrap().delta_stores_attempted;
1471+
1472+
// Now a near-edit: same content + a small suffix. Should trigger delta.
1473+
let near_edit = format!("{}APPENDED MORE CONTENT TO THE END", base);
1474+
let req2 = json!({
1475+
"model": "test", "max_tokens": 10,
1476+
"messages": [
1477+
{"role": "assistant", "content": near_edit.clone()},
1478+
{"role": "user", "content": "second"}
1479+
]
1480+
});
1481+
let (out2, _) = rewrite_request_body(&serde_json::to_vec(&req2).unwrap(), &state).unwrap();
1482+
let delta_attempts_after = state.stats.lock().unwrap().delta_stores_attempted;
1483+
assert!(delta_attempts_after > delta_attempts_before,
1484+
"expected delta_stores_attempted to increment for near-edit");
1485+
1486+
// Extract the marker that was emitted for near_edit, then recall via
1487+
// the hash inside it. Should reconstruct byte-identical original.
1488+
let v: Value = serde_json::from_slice(&out2).unwrap();
1489+
let marker_holder = &v["messages"][0]["content"];
1490+
let marker_str = if let Some(s) = marker_holder.as_str() {
1491+
s.to_string()
1492+
} else if let Some(arr) = marker_holder.as_array() {
1493+
// cache_control insertion may have moved it into array form
1494+
arr.first().and_then(|b| b.get("text"))
1495+
.and_then(Value::as_str).unwrap().to_string()
1496+
} else {
1497+
panic!("couldn't extract marker")
1498+
};
1499+
// Slim marker form: <omc:ref h="N" b="M"/>
1500+
let h = marker_str.split(" h=\"").nth(1).unwrap()
1501+
.split('"').next().unwrap().parse::<i64>().unwrap();
1502+
let recovered = state.store.recall(Some(PROXY_CACHE_NAMESPACE), h)
1503+
.unwrap().expect("must be recoverable");
1504+
assert_eq!(recovered, near_edit,
1505+
"delta-stored body must round-trip byte-identical");
1506+
}
1507+
13851508
/// Multi-turn dogfood simulation: walk a conversation, verify each turn's
13861509
/// rewrite preserves the LLM-emitted shape AND the markers expand cleanly
13871510
/// to the original bytes via the cache.

0 commit comments

Comments
 (0)