Skip to content

Commit 37f72e6

Browse files
v0.18.0 apiproxy: omc_proxy_list_refs — LLM can audit its own compressed context
Adds a third injected MCP tool: omc_proxy_list_refs (no args). When the LLM calls it the proxy walks the current request body (all message content strings, recursively), finds every <omc:ref .../> marker, extracts {hash, bytes} for each, deduplicates by hash, and returns the result as a JSON array in the tool_result. This lets the LLM answer 'what compressed context do I have?' without expanding anything — it sees the byte cost of each marker and decides which ones are worth expanding via omc_proxy_expand_ref. Also fixes remaining literal <omc:ref> markers embedded in the Rust source from botched Edit-tool expansions. Tests: 21/21 (+1: list_refs_find_markers_in_value)
1 parent 02b3e7f commit 37f72e6

1 file changed

Lines changed: 109 additions & 7 deletions

File tree

omnimcode-apiproxy/src/main.rs

Lines changed: 109 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,9 @@ use tracing::{debug, info, warn};
5050

5151
const PROXY_CACHE_NAMESPACE: &str = "_apiproxy_cache";
5252
const EXPAND_TOOL_NAME: &str = "omc_proxy_expand_ref";
53-
const REMEMBER_TOOL_NAME: &str = "omc_proxy_remember";
54-
const RECALL_TOOL_NAME: &str = "omc_proxy_recall";
53+
const REMEMBER_TOOL_NAME: &str = "omc_proxy_remember";
54+
const RECALL_TOOL_NAME: &str = "omc_proxy_recall";
55+
const LIST_REFS_TOOL_NAME: &str = "omc_proxy_list_refs";
5556

5657
#[derive(Parser, Debug, Clone)]
5758
#[command(name = "omnimcode-apiproxy", version = env!("CARGO_PKG_VERSION"))]
@@ -367,6 +368,14 @@ async fn handle_with_expand_loop(
367368
info!("omc_proxy_recall: key={:?}", key);
368369
(id.clone(), text)
369370
}
371+
ProxyCall::ListRefs { id } => {
372+
let req_val: Value = serde_json::from_slice(&current_body)
373+
.unwrap_or_default();
374+
let markers = find_markers_in_value(&req_val);
375+
info!("omc_proxy_list_refs: found {} markers", markers.len());
376+
(id.clone(),
377+
serde_json::to_string_pretty(&markers).unwrap_or_default())
378+
}
370379
};
371380
tool_results.push(json!({
372381
"type": "tool_result",
@@ -392,6 +401,7 @@ enum ProxyCall {
392401
ExpandRef { id: String, hash_str: String },
393402
Remember { id: String, key: String, value: String },
394403
Recall { id: String, key: String },
404+
ListRefs { id: String },
395405
}
396406

397407
fn collect_proxy_tool_calls(resp: &Value) -> Vec<ProxyCall> {
@@ -410,7 +420,7 @@ fn collect_proxy_tool_calls(resp: &Value) -> Vec<ProxyCall> {
410420
.unwrap_or("").to_string();
411421
if !id.is_empty() && !hash_str.is_empty() {
412422
calls.push(ProxyCall::ExpandRef { id, hash_str });
413-
} else { return vec![]; } // malformed — pass through
423+
} else { return vec![]; }
414424
}
415425
n if n == REMEMBER_TOOL_NAME => {
416426
let key = inp.get("key").and_then(Value::as_str).unwrap_or("").to_string();
@@ -425,6 +435,11 @@ fn collect_proxy_tool_calls(resp: &Value) -> Vec<ProxyCall> {
425435
calls.push(ProxyCall::Recall { id, key });
426436
} else { return vec![]; }
427437
}
438+
n if n == LIST_REFS_TOOL_NAME => {
439+
if !id.is_empty() {
440+
calls.push(ProxyCall::ListRefs { id });
441+
}
442+
}
428443
_ => return vec![], // non-proxy tool → client must handle
429444
}
430445
}
@@ -440,6 +455,55 @@ fn lookup_expand(hash_str: &str, state: &AppState) -> Result<String> {
440455
Ok(body)
441456
}
442457

458+
/// Extract a double-quoted attribute value from an `<omc:ref .../>` marker string.
459+
fn extract_marker_attr<'a>(marker: &'a str, attr: &str) -> Option<&'a str> {
460+
let pat = format!(" {}=\"", attr);
461+
let start = marker.find(pat.as_str())? + pat.len();
462+
let end = marker[start..].find('"')? + start;
463+
Some(&marker[start..end])
464+
}
465+
466+
/// Walk a JSON Value tree and collect every `<omc:ref/>` marker (deduplicated).
467+
/// Returns `[{"hash": "...", "bytes": N}, ...]`.
468+
fn find_markers_in_value(val: &Value) -> Vec<Value> {
469+
let mut seen = std::collections::HashSet::<String>::new();
470+
let mut results = Vec::<Value>::new();
471+
find_markers_rec(val, &mut seen, &mut results);
472+
results
473+
}
474+
475+
fn find_markers_rec(
476+
val: &Value,
477+
seen: &mut std::collections::HashSet<String>,
478+
out: &mut Vec<Value>,
479+
) {
480+
match val {
481+
Value::String(s) => {
482+
let mut pos = 0usize;
483+
while let Some(rel) = s[pos..].find("<omc:ref") {
484+
let abs = pos + rel;
485+
let end = s[abs..].find("/>").map(|e| abs + e + 2).unwrap_or(s.len());
486+
let marker = &s[abs..end];
487+
let hash = extract_marker_attr(marker, "hash_str")
488+
.or_else(|| extract_marker_attr(marker, "h"))
489+
.map(|h| h.to_string());
490+
let bytes: Option<u64> = extract_marker_attr(marker, "bytes")
491+
.or_else(|| extract_marker_attr(marker, "b"))
492+
.and_then(|b| b.parse().ok());
493+
if let Some(h) = hash {
494+
if seen.insert(h.clone()) {
495+
out.push(json!({ "hash": h, "bytes": bytes }));
496+
}
497+
}
498+
pos = end;
499+
}
500+
}
501+
Value::Array(arr) => { for v in arr { find_markers_rec(v, seen, out); } }
502+
Value::Object(map) => { for (_, v) in map { find_markers_rec(v, seen, out); } }
503+
_ => {}
504+
}
505+
}
506+
443507
fn rebuild_response(status: StatusCode, headers: &HeaderMap, body: Bytes) -> Response {
444508
let mut resp = Response::builder().status(status);
445509
for (k, v) in headers.iter() {
@@ -1186,12 +1250,23 @@ fn inject_proxy_tools(req: &mut Value) {
11861250
"required": ["key"]
11871251
}
11881252
}));
1253+
1254+
// ── omc_proxy_list_refs ─────────────────────────────────────────────────
1255+
tools_arr.push(json!({
1256+
"name": LIST_REFS_TOOL_NAME,
1257+
"description": "Return a JSON array describing every <omc:ref/> marker \
1258+
currently present in the conversation context. Each entry \
1259+
has {\"hash\", \"bytes\"} so you can decide which to expand. \
1260+
Takes no arguments.",
1261+
"input_schema": { "type": "object", "properties": {}, "required": [] }
1262+
}));
11891263
}
11901264

1191-
/// Compatibility shim callers that used inject_expand_tool still work.
1265+
/// Compatibility shim -- callers that used inject_expand_tool still work.
11921266
#[allow(dead_code)]
11931267
fn inject_expand_tool(req: &mut Value) { inject_proxy_tools(req); }
11941268

1269+
11951270
#[cfg(test)]
11961271
mod tests {
11971272
use super::*;
@@ -1927,8 +2002,35 @@ mod tests {
19272002
let tools = req["tools"].as_array().expect("tools array must exist");
19282003
let names: Vec<&str> = tools.iter()
19292004
.filter_map(|t| t["name"].as_str()).collect();
1930-
assert!(names.contains(&EXPAND_TOOL_NAME), "expand_ref must be injected");
1931-
assert!(names.contains(&REMEMBER_TOOL_NAME), "remember must be injected");
1932-
assert!(names.contains(&RECALL_TOOL_NAME), "recall must be injected");
2005+
assert!(names.contains(&EXPAND_TOOL_NAME), "expand_ref must be injected");
2006+
assert!(names.contains(&REMEMBER_TOOL_NAME), "remember must be injected");
2007+
assert!(names.contains(&RECALL_TOOL_NAME), "recall must be injected");
2008+
assert!(names.contains(&LIST_REFS_TOOL_NAME), "list_refs must be injected");
2009+
}
2010+
2011+
/// find_markers_in_value correctly discovers all <omc:ref> markers in a
2012+
/// JSON tree (including inside nested arrays/objects) and deduplicates.
2013+
#[test]
2014+
fn list_refs_finds_markers_in_value() {
2015+
let text_with_marker = format!(
2016+
"Here is a compressed block: <omc:ref h=\"1234567\" b=\"4096\"/> and that's it.");
2017+
let text_with_two = format!(
2018+
"<omc:ref h=\"1234567\" b=\"4096\"/> and again <omc:ref h=\"9999999\" b=\"512\"/>");
2019+
let val = json!({
2020+
"messages": [
2021+
{"role": "user", "content": text_with_marker.clone()},
2022+
{"role": "assistant", "content": [
2023+
{"type": "text", "text": text_with_two.clone()}
2024+
]},
2025+
// Duplicate of first marker — should be deduped
2026+
{"role": "user", "content": text_with_marker.clone()},
2027+
]
2028+
});
2029+
let markers = find_markers_in_value(&val);
2030+
assert_eq!(markers.len(), 2, "must find exactly 2 distinct hashes");
2031+
let hashes: Vec<&str> = markers.iter()
2032+
.filter_map(|m| m["hash"].as_str()).collect();
2033+
assert!(hashes.contains(&"1234567"));
2034+
assert!(hashes.contains(&"9999999"));
19332035
}
19342036
}

0 commit comments

Comments
 (0)