@@ -89,6 +89,7 @@ struct RewriteStats {
8989 bytes_saved_tool_definitions : u64 ,
9090 cache_control_inserted : u64 ,
9191 conversation_count : u64 ,
92+ delta_stores_attempted : u64 ,
9293}
9394
9495/// Per-conversation state the proxy remembers across turns. Key is a stable
@@ -123,6 +124,14 @@ struct AppState {
123124 conversations : Arc < std:: sync:: Mutex <
124125 std:: collections:: HashMap < i64 , ConversationState >
125126 > > ,
127+ /// v0.14.8-I: prefix index for fast near-cache-hit lookup. Maps
128+ /// fnv1a(first 256 bytes of content) → content_hash. When a new block
129+ /// arrives, we check if its prefix matches anything indexed; if yes,
130+ /// we compare full text and might emit a differential marker.
131+ /// Bounded to ~4096 entries with LRU eviction.
132+ prefix_index : Arc < std:: sync:: Mutex <
133+ std:: collections:: HashMap < u64 , i64 >
134+ > > ,
126135}
127136
128137#[ tokio:: main]
@@ -155,6 +164,8 @@ async fn main() -> Result<()> {
155164 stats : Arc :: new ( std:: sync:: Mutex :: new ( RewriteStats :: default ( ) ) ) ,
156165 conversations : Arc :: new ( std:: sync:: Mutex :: new (
157166 std:: collections:: HashMap :: new ( ) ) ) ,
167+ prefix_index : Arc :: new ( std:: sync:: Mutex :: new (
168+ std:: collections:: HashMap :: new ( ) ) ) ,
158169 } ;
159170
160171 let app = Router :: new ( )
@@ -483,7 +494,8 @@ async fn stats_endpoint(State(state): State<AppState>) -> Response {
483494 "tool_definitions" : s. bytes_saved_tool_definitions,
484495 } ,
485496 "cache_control_inserted_count" : s. cache_control_inserted,
486- "conversations_seen" : s. conversation_count
497+ "conversations_seen" : s. conversation_count,
498+ "delta_stores_attempted" : s. delta_stores_attempted
487499 } ) ) . unwrap ( ) ;
488500 ( StatusCode :: OK ,
489501 [ ( axum:: http:: header:: CONTENT_TYPE , HeaderValue :: from_static ( "application/json" ) ) ] ,
@@ -892,8 +904,23 @@ fn make_marker_with_dedup(
892904 text : & str , state : & AppState , kind : MarkerKind ,
893905 seen_hashes : Option < & mut std:: collections:: HashSet < i64 > > ,
894906) -> Result < String > {
895- let hash = state. store . store ( PROXY_CACHE_NAMESPACE , text)
896- . map_err ( anyhow:: Error :: msg) ?;
907+ // v0.14.8-I: route cache writes through Axis 5 (OMCD delta) when we
908+ // detect a near-edit of a previously-cached body. The base-hash lookup
909+ // is O(1) via prefix_index. If a base is found, store_as_delta stores
910+ // a tiny delta on disk instead of duplicating the full body.
911+ //
912+ // IMPORTANT: this is a DISK-side optimization, not a wire-side one.
913+ // The wire marker is the same compact `<omc:ref h="..." b="N"/>` form.
914+ // We tried emitting `<omc:diff base="..." pre="N" suf="..."/>` markers
915+ // on the wire, but honest accounting showed they're LARGER than the
916+ // 50-byte slim ref marker the recall path already produces. So the win
917+ // is purely disk-resident: future store-side dedup, not request-time
918+ // bytes.
919+ let hash = try_delta_store ( text, state)
920+ . or_else ( || state. store . store ( PROXY_CACHE_NAMESPACE , text) . ok ( ) )
921+ . ok_or_else ( || anyhow:: anyhow!( "cache write failed" ) ) ?;
922+ // Index this body's prefix so the NEXT near-edit can find it as base.
923+ if text. len ( ) >= 1024 { register_prefix ( text, hash, state) ; }
897924
898925 // v0.14.7-L: if we've already emitted a full marker for this hash this
899926 // request, the subsequent ones can be the bare-minimum form.
@@ -928,6 +955,44 @@ fn make_marker_with_dedup(
928955 }
929956}
930957
958+ /// v0.14.8-I: index a body's first-256-byte prefix → content_hash so the next
959+ /// call can try a near-cache-hit lookup.
960+ fn register_prefix ( text : & str , hash : i64 , state : & AppState ) {
961+ let prefix = & text. as_bytes ( ) [ ..text. len ( ) . min ( 256 ) ] ;
962+ let prefix_hash = omnimcode_core:: tokenizer:: fnv1a_64 ( prefix) as u64 ;
963+ let mut idx = state. prefix_index . lock ( ) . unwrap ( ) ;
964+ if idx. len ( ) > 4096 {
965+ // Crude eviction: clear when we hit the cap. Not LRU, but the
966+ // MemoryStore is the source of truth so a cleared index just means
967+ // future near-edits fall back to plain store (no data loss).
968+ idx. clear ( ) ;
969+ }
970+ idx. insert ( prefix_hash, hash) ;
971+ }
972+
973+ /// v0.14.8-I: try to store `text` as a delta against a prefix-near cached
974+ /// body. Returns `Some(hash_of_text)` if delta was viable, `None` otherwise.
975+ /// The hash returned is still the hash of the FULL text (so the marker / recall
976+ /// path is unchanged for the LLM).
977+ fn try_delta_store ( text : & str , state : & AppState ) -> Option < i64 > {
978+ if text. len ( ) < 1024 { return None ; }
979+ let prefix = & text. as_bytes ( ) [ ..text. len ( ) . min ( 256 ) ] ;
980+ let prefix_hash = omnimcode_core:: tokenizer:: fnv1a_64 ( prefix) as u64 ;
981+ let base_hash = {
982+ let idx = state. prefix_index . lock ( ) . unwrap ( ) ;
983+ * idx. get ( & prefix_hash) ?
984+ } ;
985+ // store_as_delta handles the "is the prefix actually long enough?" check
986+ // itself (need ≥64 bytes shared) and falls back to plain store if not.
987+ // Either way we get a valid content-hash for `text`.
988+ let result = state. store . store_as_delta ( PROXY_CACHE_NAMESPACE , text, base_hash) . ok ( ) ?;
989+ {
990+ let mut s = state. stats . lock ( ) . unwrap ( ) ;
991+ s. delta_stores_attempted += 1 ;
992+ }
993+ Some ( result)
994+ }
995+
931996/// Add the omc_proxy_expand_ref tool to the request's tools array so the
932997/// LLM has a way to retrieve full bytes for any marker it cares about.
933998fn inject_expand_tool ( req : & mut Value ) {
@@ -987,6 +1052,8 @@ mod tests {
9871052 stats : Arc :: new ( std:: sync:: Mutex :: new ( RewriteStats :: default ( ) ) ) ,
9881053 conversations : Arc :: new ( std:: sync:: Mutex :: new (
9891054 std:: collections:: HashMap :: new ( ) ) ) ,
1055+ prefix_index : Arc :: new ( std:: sync:: Mutex :: new (
1056+ std:: collections:: HashMap :: new ( ) ) ) ,
9901057 }
9911058 }
9921059
@@ -1382,6 +1449,62 @@ mod tests {
13821449 assert_eq ! ( extract_h( & m0) , extract_h( & m2) ) ;
13831450 }
13841451
1452+ /// v0.14.8-I: when a content body is a near-edit of a previously-cached
1453+ /// body, the disk-side store should route through Axis 5 (OMCD delta).
1454+ /// We verify by checking that delta_stores_attempted ticks up AND that
1455+ /// recall still returns the correct full text byte-for-byte.
1456+ #[ test]
1457+ fn near_edit_routes_through_delta_store ( ) {
1458+ let state = test_state ( 256 ) ;
1459+ // Base body. Large enough to be eligible for prefix indexing.
1460+ let base = "Common prefix.\n " . repeat ( 80 ) ; // ~1200 bytes
1461+ // First request stores `base`. No delta possible (nothing prior).
1462+ let req1 = json ! ( {
1463+ "model" : "test" , "max_tokens" : 10 ,
1464+ "messages" : [
1465+ { "role" : "assistant" , "content" : base. clone( ) } ,
1466+ { "role" : "user" , "content" : "first" }
1467+ ]
1468+ } ) ;
1469+ let _ = rewrite_request_body ( & serde_json:: to_vec ( & req1) . unwrap ( ) , & state) . unwrap ( ) ;
1470+ let delta_attempts_before = state. stats . lock ( ) . unwrap ( ) . delta_stores_attempted ;
1471+
1472+ // Now a near-edit: same content + a small suffix. Should trigger delta.
1473+ let near_edit = format ! ( "{}APPENDED MORE CONTENT TO THE END" , base) ;
1474+ let req2 = json ! ( {
1475+ "model" : "test" , "max_tokens" : 10 ,
1476+ "messages" : [
1477+ { "role" : "assistant" , "content" : near_edit. clone( ) } ,
1478+ { "role" : "user" , "content" : "second" }
1479+ ]
1480+ } ) ;
1481+ let ( out2, _) = rewrite_request_body ( & serde_json:: to_vec ( & req2) . unwrap ( ) , & state) . unwrap ( ) ;
1482+ let delta_attempts_after = state. stats . lock ( ) . unwrap ( ) . delta_stores_attempted ;
1483+ assert ! ( delta_attempts_after > delta_attempts_before,
1484+ "expected delta_stores_attempted to increment for near-edit" ) ;
1485+
1486+ // Extract the marker that was emitted for near_edit, then recall via
1487+ // the hash inside it. Should reconstruct byte-identical original.
1488+ let v: Value = serde_json:: from_slice ( & out2) . unwrap ( ) ;
1489+ let marker_holder = & v[ "messages" ] [ 0 ] [ "content" ] ;
1490+ let marker_str = if let Some ( s) = marker_holder. as_str ( ) {
1491+ s. to_string ( )
1492+ } else if let Some ( arr) = marker_holder. as_array ( ) {
1493+ // cache_control insertion may have moved it into array form
1494+ arr. first ( ) . and_then ( |b| b. get ( "text" ) )
1495+ . and_then ( Value :: as_str) . unwrap ( ) . to_string ( )
1496+ } else {
1497+ panic ! ( "couldn't extract marker" )
1498+ } ;
1499+ // Slim marker form: <omc:ref h="N" b="M"/>
1500+ let h = marker_str. split ( " h=\" " ) . nth ( 1 ) . unwrap ( )
1501+ . split ( '"' ) . next ( ) . unwrap ( ) . parse :: < i64 > ( ) . unwrap ( ) ;
1502+ let recovered = state. store . recall ( Some ( PROXY_CACHE_NAMESPACE ) , h)
1503+ . unwrap ( ) . expect ( "must be recoverable" ) ;
1504+ assert_eq ! ( recovered, near_edit,
1505+ "delta-stored body must round-trip byte-identical" ) ;
1506+ }
1507+
13851508 /// Multi-turn dogfood simulation: walk a conversation, verify each turn's
13861509 /// rewrite preserves the LLM-emitted shape AND the markers expand cleanly
13871510 /// to the original bytes via the cache.
0 commit comments