Skip to content

Commit 3ba719f

Browse files
v0.20.0 apiproxy: omc_proxy_forward — LLM-to-LLM routing via the proxy
Adds omc_proxy_forward(endpoint, message, model, context_refs[]) as an injected MCP tool. When the LLM calls it, the proxy: 1. Resolves context_refs hashes from MemoryStore, appending each expanded body as additional context in the sub-request. 2. Constructs a /v1/messages request to the target endpoint (default: same upstream as the calling LLM; 'self' is an alias). 3. Forwards original request auth headers so the sub-request authenticates identically to the parent. 4. Compresses the sub-LLM's reply to an omc:ref marker when it exceeds the rewrite_threshold; returns the marker as the tool_result so the calling LLM pays only marker cost for the reply, not full-text cost. Key insight: the MemoryStore is shared across all agents connected to the same proxy instance, so a 100KB context block stored once can be referenced by N agents at ~50 bytes each — 2000× reduction for context passing between agents at scale. Also: forward_calls stat added to /_stats. Tests: 22/22 green (+1 assertion in proxy_tools_injected).
1 parent adc097e commit 3ba719f

1 file changed

Lines changed: 123 additions & 0 deletions

File tree

omnimcode-apiproxy/src/main.rs

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ const EXPAND_TOOL_NAME: &str = "omc_proxy_expand_ref";
5353
const REMEMBER_TOOL_NAME: &str = "omc_proxy_remember";
5454
const RECALL_TOOL_NAME: &str = "omc_proxy_recall";
5555
const LIST_REFS_TOOL_NAME: &str = "omc_proxy_list_refs";
56+
const FORWARD_TOOL_NAME: &str = "omc_proxy_forward";
5657

5758
#[derive(Parser, Debug, Clone)]
5859
#[command(name = "omnimcode-apiproxy", version = env!("CARGO_PKG_VERSION"))]
@@ -109,6 +110,8 @@ struct RewriteStats {
109110
recall_calls: u64,
110111
/// Bytes saved by replacing repeated base64 image blocks with text markers.
111112
bytes_saved_images: u64,
113+
/// omc_proxy_forward calls resolved by the proxy.
114+
forward_calls: u64,
112115
}
113116

114117
/// Per-conversation state the proxy remembers across turns. Key is a stable
@@ -289,6 +292,75 @@ async fn handle_messages(State(state): State<AppState>, req: Request) -> Respons
289292
/// tool_result synthetically appended, and re-call upstream. Bounded to
290293
/// MAX_EXPAND_ROUNDS to prevent runaway loops if the LLM keeps asking
291294
/// to expand.
295+
/// Dispatch an `omc_proxy_forward` call: send `message` (optionally with
296+
/// expanded `context_refs`) to `endpoint`/v1/messages and return the
297+
/// sub-LLM's reply text (compressed to a marker if large).
298+
async fn dispatch_forward(
299+
id: &str, endpoint: &str, message: &str, model: &str,
300+
context_refs: &[String], headers: &HeaderMap, state: &AppState,
301+
) -> (String, String) {
302+
// Resolve context_refs from MemoryStore
303+
let mut parts = vec![message.to_string()];
304+
for hash_str in context_refs {
305+
if let Ok(h) = hash_str.parse::<i64>() {
306+
if let Ok(Some(text)) = state.store.recall(Some(PROXY_CACHE_NAMESPACE), h) {
307+
parts.push(format!("---\nContext ref {}:\n{}", hash_str, text));
308+
}
309+
}
310+
}
311+
let user_content = parts.join("\n\n");
312+
313+
let target_url = if endpoint.is_empty() || endpoint == "self" {
314+
format!("{}/v1/messages", state.upstream.trim_end_matches('/'))
315+
} else {
316+
format!("{}/v1/messages", endpoint.trim_end_matches('/'))
317+
};
318+
319+
let effective_model = if model.is_empty() { "claude-haiku-4-5" } else { model };
320+
let req_body = serde_json::json!({
321+
"model": effective_model,
322+
"max_tokens": 4096,
323+
"messages": [{"role": "user", "content": user_content}]
324+
});
325+
326+
let mut req = state.http.post(&target_url).json(&req_body);
327+
for (k, v) in headers.iter() {
328+
if k == "x-api-key" || k == "authorization" || k == "anthropic-version" {
329+
req = req.header(k, v);
330+
}
331+
}
332+
333+
match req.send().await {
334+
Ok(resp) => {
335+
match resp.json::<Value>().await {
336+
Ok(body) => {
337+
let reply = body["content"][0]["text"]
338+
.as_str().unwrap_or("").to_string();
339+
let result = if reply.len() > state.rewrite_threshold {
340+
let hash = state.store
341+
.store(PROXY_CACHE_NAMESPACE, &reply)
342+
.unwrap_or(-1);
343+
let preview: String = reply.chars()
344+
.take(state.preview_bytes).collect();
345+
format!("<omc:ref h=\"{}\" b=\"{}\" preview={:?}/>",
346+
hash, reply.len(), preview)
347+
} else {
348+
reply
349+
};
350+
state.stats.lock().unwrap().forward_calls += 1;
351+
info!("omc_proxy_forward: endpoint={:?} model={:?} → {} bytes",
352+
endpoint, effective_model, result.len());
353+
(id.to_string(), result)
354+
}
355+
Err(e) => (id.to_string(),
356+
format!("[omc_proxy_forward: decode error: {}]", e))
357+
}
358+
}
359+
Err(e) => (id.to_string(),
360+
format!("[omc_proxy_forward: network error: {}]", e))
361+
}
362+
}
363+
292364
async fn handle_with_expand_loop(
293365
state: &AppState, headers: &HeaderMap, initial_body: Bytes,
294366
) -> Response {
@@ -384,6 +456,10 @@ async fn handle_with_expand_loop(
384456
(id.clone(),
385457
serde_json::to_string_pretty(&markers).unwrap_or_default())
386458
}
459+
ProxyCall::Forward { id, endpoint, message, model, context_refs } => {
460+
dispatch_forward(id, endpoint, message, model, context_refs,
461+
headers, state).await
462+
}
387463
};
388464
tool_results.push(json!({
389465
"type": "tool_result",
@@ -410,6 +486,9 @@ enum ProxyCall {
410486
Remember { id: String, key: String, value: String },
411487
Recall { id: String, key: String },
412488
ListRefs { id: String },
489+
/// Route a message to another LLM endpoint and return its compressed reply.
490+
Forward { id: String, endpoint: String, message: String, model: String,
491+
context_refs: Vec<String> },
413492
}
414493

415494
fn collect_proxy_tool_calls(resp: &Value) -> Vec<ProxyCall> {
@@ -448,6 +527,23 @@ fn collect_proxy_tool_calls(resp: &Value) -> Vec<ProxyCall> {
448527
calls.push(ProxyCall::ListRefs { id });
449528
}
450529
}
530+
n if n == FORWARD_TOOL_NAME => {
531+
let endpoint = inp.get("endpoint")
532+
.and_then(Value::as_str).unwrap_or("").to_string();
533+
let message = inp.get("message")
534+
.and_then(Value::as_str).unwrap_or("").to_string();
535+
let model = inp.get("model")
536+
.and_then(Value::as_str).unwrap_or("claude-opus-4-5").to_string();
537+
let context_refs = inp.get("context_refs")
538+
.and_then(Value::as_array)
539+
.map(|arr| arr.iter()
540+
.filter_map(|v| v.as_str().map(|s| s.to_string()))
541+
.collect())
542+
.unwrap_or_default();
543+
if !id.is_empty() && !message.is_empty() {
544+
calls.push(ProxyCall::Forward { id, endpoint, message, model, context_refs });
545+
}
546+
}
451547
_ => return vec![], // non-proxy tool → client must handle
452548
}
453549
}
@@ -651,6 +747,7 @@ async fn stats_endpoint(State(state): State<AppState>) -> Response {
651747
"streaming_passthrough": s.streaming_passthrough,
652748
"remember_calls": s.remember_calls,
653749
"recall_calls": s.recall_calls,
750+
"forward_calls": s.forward_calls,
654751
})).unwrap();
655752
(StatusCode::OK,
656753
[(axum::http::header::CONTENT_TYPE, HeaderValue::from_static("application/json"))],
@@ -1303,6 +1400,31 @@ fn inject_proxy_tools(req: &mut Value) {
13031400
Takes no arguments.",
13041401
"input_schema": { "type": "object", "properties": {}, "required": [] }
13051402
}));
1403+
1404+
// ── omc_proxy_forward ────────────────────────────────────────────────
1405+
tools_arr.push(json!({
1406+
"name": FORWARD_TOOL_NAME,
1407+
"description": "Route a message to another LLM and return its compressed \
1408+
reply as an <omc:ref/> marker. Use `endpoint` to target a \
1409+
different proxy instance (default: same upstream). \
1410+
Optionally pass `context_refs` (list of hash strings from \
1411+
omc_proxy_list_refs) to expand relevant prior context into \
1412+
the sub-request — sharing memory costs O(marker) not O(content).",
1413+
"input_schema": {
1414+
"type": "object",
1415+
"properties": {
1416+
"message": { "type": "string",
1417+
"description": "The message to send to the target LLM." },
1418+
"model": { "type": "string",
1419+
"description": "Model to use (default: claude-opus-4-5)." },
1420+
"endpoint": { "type": "string",
1421+
"description": "Target proxy/API base URL (omit or 'self' for same upstream)." },
1422+
"context_refs": { "type": "array", "items": { "type": "string" },
1423+
"description": "Hash strings of context markers to include in the sub-request." }
1424+
},
1425+
"required": ["message"]
1426+
}
1427+
}));
13061428
}
13071429

13081430
/// Compatibility shim -- callers that used inject_expand_tool still work.
@@ -2050,6 +2172,7 @@ mod tests {
20502172
assert!(names.contains(&REMEMBER_TOOL_NAME), "remember must be injected");
20512173
assert!(names.contains(&RECALL_TOOL_NAME), "recall must be injected");
20522174
assert!(names.contains(&LIST_REFS_TOOL_NAME), "list_refs must be injected");
2175+
assert!(names.contains(&FORWARD_TOOL_NAME), "forward must be injected");
20532176
}
20542177

20552178
/// find_markers_in_value correctly discovers all <omc:ref> markers in a

0 commit comments

Comments
 (0)