@@ -50,8 +50,9 @@ use tracing::{debug, info, warn};
5050
5151const PROXY_CACHE_NAMESPACE : & str = "_apiproxy_cache" ;
5252const EXPAND_TOOL_NAME : & str = "omc_proxy_expand_ref" ;
53- const REMEMBER_TOOL_NAME : & str = "omc_proxy_remember" ;
54- const RECALL_TOOL_NAME : & str = "omc_proxy_recall" ;
53+ const REMEMBER_TOOL_NAME : & str = "omc_proxy_remember" ;
54+ const RECALL_TOOL_NAME : & str = "omc_proxy_recall" ;
55+ const LIST_REFS_TOOL_NAME : & str = "omc_proxy_list_refs" ;
5556
5657#[ derive( Parser , Debug , Clone ) ]
5758#[ command( name = "omnimcode-apiproxy" , version = env!( "CARGO_PKG_VERSION" ) ) ]
@@ -367,6 +368,14 @@ async fn handle_with_expand_loop(
367368 info ! ( "omc_proxy_recall: key={:?}" , key) ;
368369 ( id. clone ( ) , text)
369370 }
371+ ProxyCall :: ListRefs { id } => {
372+ let req_val: Value = serde_json:: from_slice ( & current_body)
373+ . unwrap_or_default ( ) ;
374+ let markers = find_markers_in_value ( & req_val) ;
375+ info ! ( "omc_proxy_list_refs: found {} markers" , markers. len( ) ) ;
376+ ( id. clone ( ) ,
377+ serde_json:: to_string_pretty ( & markers) . unwrap_or_default ( ) )
378+ }
370379 } ;
371380 tool_results. push ( json ! ( {
372381 "type" : "tool_result" ,
@@ -392,6 +401,7 @@ enum ProxyCall {
392401 ExpandRef { id : String , hash_str : String } ,
393402 Remember { id : String , key : String , value : String } ,
394403 Recall { id : String , key : String } ,
404+ ListRefs { id : String } ,
395405}
396406
397407fn collect_proxy_tool_calls ( resp : & Value ) -> Vec < ProxyCall > {
@@ -410,7 +420,7 @@ fn collect_proxy_tool_calls(resp: &Value) -> Vec<ProxyCall> {
410420 . unwrap_or ( "" ) . to_string ( ) ;
411421 if !id. is_empty ( ) && !hash_str. is_empty ( ) {
412422 calls. push ( ProxyCall :: ExpandRef { id, hash_str } ) ;
413- } else { return vec ! [ ] ; } // malformed — pass through
423+ } else { return vec ! [ ] ; }
414424 }
415425 n if n == REMEMBER_TOOL_NAME => {
416426 let key = inp. get ( "key" ) . and_then ( Value :: as_str) . unwrap_or ( "" ) . to_string ( ) ;
@@ -425,6 +435,11 @@ fn collect_proxy_tool_calls(resp: &Value) -> Vec<ProxyCall> {
425435 calls. push ( ProxyCall :: Recall { id, key } ) ;
426436 } else { return vec ! [ ] ; }
427437 }
438+ n if n == LIST_REFS_TOOL_NAME => {
439+ if !id. is_empty ( ) {
440+ calls. push ( ProxyCall :: ListRefs { id } ) ;
441+ }
442+ }
428443 _ => return vec ! [ ] , // non-proxy tool → client must handle
429444 }
430445 }
@@ -440,6 +455,55 @@ fn lookup_expand(hash_str: &str, state: &AppState) -> Result<String> {
440455 Ok ( body)
441456}
442457
458+ /// Extract a double-quoted attribute value from an `<omc:ref .../>` marker string.
459+ fn extract_marker_attr < ' a > ( marker : & ' a str , attr : & str ) -> Option < & ' a str > {
460+ let pat = format ! ( " {}=\" " , attr) ;
461+ let start = marker. find ( pat. as_str ( ) ) ? + pat. len ( ) ;
462+ let end = marker[ start..] . find ( '"' ) ? + start;
463+ Some ( & marker[ start..end] )
464+ }
465+
466+ /// Walk a JSON Value tree and collect every `<omc:ref/>` marker (deduplicated).
467+ /// Returns `[{"hash": "...", "bytes": N}, ...]`.
468+ fn find_markers_in_value ( val : & Value ) -> Vec < Value > {
469+ let mut seen = std:: collections:: HashSet :: < String > :: new ( ) ;
470+ let mut results = Vec :: < Value > :: new ( ) ;
471+ find_markers_rec ( val, & mut seen, & mut results) ;
472+ results
473+ }
474+
475+ fn find_markers_rec (
476+ val : & Value ,
477+ seen : & mut std:: collections:: HashSet < String > ,
478+ out : & mut Vec < Value > ,
479+ ) {
480+ match val {
481+ Value :: String ( s) => {
482+ let mut pos = 0usize ;
483+ while let Some ( rel) = s[ pos..] . find ( "<omc:ref" ) {
484+ let abs = pos + rel;
485+ let end = s[ abs..] . find ( "/>" ) . map ( |e| abs + e + 2 ) . unwrap_or ( s. len ( ) ) ;
486+ let marker = & s[ abs..end] ;
487+ let hash = extract_marker_attr ( marker, "hash_str" )
488+ . or_else ( || extract_marker_attr ( marker, "h" ) )
489+ . map ( |h| h. to_string ( ) ) ;
490+ let bytes: Option < u64 > = extract_marker_attr ( marker, "bytes" )
491+ . or_else ( || extract_marker_attr ( marker, "b" ) )
492+ . and_then ( |b| b. parse ( ) . ok ( ) ) ;
493+ if let Some ( h) = hash {
494+ if seen. insert ( h. clone ( ) ) {
495+ out. push ( json ! ( { "hash" : h, "bytes" : bytes } ) ) ;
496+ }
497+ }
498+ pos = end;
499+ }
500+ }
501+ Value :: Array ( arr) => { for v in arr { find_markers_rec ( v, seen, out) ; } }
502+ Value :: Object ( map) => { for ( _, v) in map { find_markers_rec ( v, seen, out) ; } }
503+ _ => { }
504+ }
505+ }
506+
443507fn rebuild_response ( status : StatusCode , headers : & HeaderMap , body : Bytes ) -> Response {
444508 let mut resp = Response :: builder ( ) . status ( status) ;
445509 for ( k, v) in headers. iter ( ) {
@@ -1186,12 +1250,23 @@ fn inject_proxy_tools(req: &mut Value) {
11861250 "required" : [ "key" ]
11871251 }
11881252 } ) ) ;
1253+
1254+ // ── omc_proxy_list_refs ─────────────────────────────────────────────────
1255+ tools_arr. push ( json ! ( {
1256+ "name" : LIST_REFS_TOOL_NAME ,
1257+ "description" : "Return a JSON array describing every <omc:ref/> marker \
1258+ currently present in the conversation context. Each entry \
1259+ has {\" hash\" , \" bytes\" } so you can decide which to expand. \
1260+ Takes no arguments.",
1261+ "input_schema" : { "type" : "object" , "properties" : { } , "required" : [ ] }
1262+ } ) ) ;
11891263}
11901264
1191- /// Compatibility shim — callers that used inject_expand_tool still work.
1265+ /// Compatibility shim -- callers that used inject_expand_tool still work.
11921266#[ allow( dead_code) ]
11931267fn inject_expand_tool ( req : & mut Value ) { inject_proxy_tools ( req) ; }
11941268
1269+
11951270#[ cfg( test) ]
11961271mod tests {
11971272 use super :: * ;
@@ -1927,8 +2002,35 @@ mod tests {
19272002 let tools = req[ "tools" ] . as_array ( ) . expect ( "tools array must exist" ) ;
19282003 let names: Vec < & str > = tools. iter ( )
19292004 . filter_map ( |t| t[ "name" ] . as_str ( ) ) . collect ( ) ;
1930- assert ! ( names. contains( & EXPAND_TOOL_NAME ) , "expand_ref must be injected" ) ;
1931- assert ! ( names. contains( & REMEMBER_TOOL_NAME ) , "remember must be injected" ) ;
1932- assert ! ( names. contains( & RECALL_TOOL_NAME ) , "recall must be injected" ) ;
2005+ assert ! ( names. contains( & EXPAND_TOOL_NAME ) , "expand_ref must be injected" ) ;
2006+ assert ! ( names. contains( & REMEMBER_TOOL_NAME ) , "remember must be injected" ) ;
2007+ assert ! ( names. contains( & RECALL_TOOL_NAME ) , "recall must be injected" ) ;
2008+ assert ! ( names. contains( & LIST_REFS_TOOL_NAME ) , "list_refs must be injected" ) ;
2009+ }
2010+
2011+ /// find_markers_in_value correctly discovers all <omc:ref> markers in a
2012+ /// JSON tree (including inside nested arrays/objects) and deduplicates.
2013+ #[ test]
2014+ fn list_refs_finds_markers_in_value ( ) {
2015+ let text_with_marker = format ! (
2016+ "Here is a compressed block: <omc:ref h=\" 1234567\" b=\" 4096\" /> and that's it." ) ;
2017+ let text_with_two = format ! (
2018+ "<omc:ref h=\" 1234567\" b=\" 4096\" /> and again <omc:ref h=\" 9999999\" b=\" 512\" />" ) ;
2019+ let val = json ! ( {
2020+ "messages" : [
2021+ { "role" : "user" , "content" : text_with_marker. clone( ) } ,
2022+ { "role" : "assistant" , "content" : [
2023+ { "type" : "text" , "text" : text_with_two. clone( ) }
2024+ ] } ,
2025+ // Duplicate of first marker — should be deduped
2026+ { "role" : "user" , "content" : text_with_marker. clone( ) } ,
2027+ ]
2028+ } ) ;
2029+ let markers = find_markers_in_value ( & val) ;
2030+ assert_eq ! ( markers. len( ) , 2 , "must find exactly 2 distinct hashes" ) ;
2031+ let hashes: Vec < & str > = markers. iter ( )
2032+ . filter_map ( |m| m[ "hash" ] . as_str ( ) ) . collect ( ) ;
2033+ assert ! ( hashes. contains( & "1234567" ) ) ;
2034+ assert ! ( hashes. contains( & "9999999" ) ) ;
19332035 }
19342036}
0 commit comments