Skip to content

Commit d1b0677

Browse files
add llm_judge, llm_compare, file_ls builtins
- New: llm_judge(responses[], criteria, model?) — score N responses 1-10 via structured LLM eval; returns [{idx, score, reason}] array - New: llm_compare(a, b, criteria, model?) — pick winner of two responses; returns {winner: "A"|"B", reason: "..."} - New: file_ls(path?) — list directory entries as sorted string array - ALL_BUILTINS list deduplicated and extended with new LLM builtins; is_known_builtin also updated for first-class function support Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 7ba616f commit d1b0677

2 files changed

Lines changed: 147 additions & 4 deletions

File tree

omnimcode-core/src/interpreter.rs

Lines changed: 57 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2229,9 +2229,11 @@ impl Interpreter {
22292229
| "sha256" | "sha512" | "base64_encode" | "base64_decode"
22302230
// LLM builtins
22312231
| "llm_call" | "llm_chat" | "llm_embed" | "llm_models" | "llm_system"
2232-
| "llm_stream_print"
2232+
| "llm_stream_print" | "llm_judge" | "llm_compare"
22332233
| "llm_tools" | "substrate_embed"
22342234
| "batch_llm_call" | "batch_llm_chat"
2235+
// File utilities
2236+
| "file_ls"
22352237
// HTTP builtins
22362238
| "http_get" | "http_post" | "http_post_json" | "http_put" | "http_delete"
22372239
| "now_iso" | "now_unix" | "format_time" | "parse_time"
@@ -4867,6 +4869,22 @@ impl Interpreter {
48674869
let exists = std::path::Path::new(&path).exists();
48684870
Ok(Value::HInt(HInt::new(if exists { 1 } else { 0 })))
48694871
}
4872+
"file_ls" => {
4873+
let path = if args.is_empty() {
4874+
".".to_string()
4875+
} else {
4876+
self.eval_expr(&args[0])?.to_display_string()
4877+
};
4878+
let entries = std::fs::read_dir(&path)
4879+
.map_err(|e| format!("file_ls: {}", e))?;
4880+
let mut names: Vec<Value> = Vec::new();
4881+
for entry in entries.flatten() {
4882+
let name = entry.file_name().to_string_lossy().to_string();
4883+
names.push(Value::String(name));
4884+
}
4885+
names.sort_by(|a, b| a.to_display_string().cmp(&b.to_display_string()));
4886+
Ok(Value::Array(HArray::from_vec(names)))
4887+
}
48704888
// Introspection and utility.
48714889
"type_of" => {
48724890
if args.is_empty() {
@@ -9601,6 +9619,39 @@ impl Interpreter {
96019619
};
96029620
crate::llm_builtins::llm_stream_print(&prompt, system.as_deref(), model.as_deref())
96039621
}
9622+
// llm_judge(responses, criteria, model?) -> dict[]
9623+
// Score each response in an array; returns [{idx, score, reason}] sorted best-first.
9624+
"llm_judge" => {
9625+
if args.len() < 2 {
9626+
return Err("llm_judge requires (responses, criteria, model?)".to_string());
9627+
}
9628+
let responses = self.eval_expr(&args[0])?;
9629+
let criteria = self.eval_expr(&args[1])?.to_display_string();
9630+
let model = if args.len() > 2 {
9631+
match self.eval_expr(&args[2])? {
9632+
Value::Null => None,
9633+
v => Some(v.to_display_string()),
9634+
}
9635+
} else { None };
9636+
crate::llm_builtins::llm_judge(&responses, &criteria, model.as_deref())
9637+
}
9638+
// llm_compare(a, b, criteria, model?) -> dict
9639+
// Pick the better of two responses; returns {winner: "A"|"B", reason: "..."}.
9640+
"llm_compare" => {
9641+
if args.len() < 3 {
9642+
return Err("llm_compare requires (a, b, criteria, model?)".to_string());
9643+
}
9644+
let a = self.eval_expr(&args[0])?.to_display_string();
9645+
let b = self.eval_expr(&args[1])?.to_display_string();
9646+
let criteria = self.eval_expr(&args[2])?.to_display_string();
9647+
let model = if args.len() > 3 {
9648+
match self.eval_expr(&args[3])? {
9649+
Value::Null => None,
9650+
v => Some(v.to_display_string()),
9651+
}
9652+
} else { None };
9653+
crate::llm_builtins::llm_compare(&a, &b, &criteria, model.as_deref())
9654+
}
96049655
// llm_models() -> dict[]
96059656
// Returns the list of models available from the active provider.
96069657
// Each element is a dict with at least {"id": string, "provider": string}.
@@ -14555,8 +14606,10 @@ pub(crate) const HEAL_BUILTIN_NAMES: &[&str] = &[
1455514606
"re_match", "re_find", "re_find_all", "re_replace", "re_split",
1455614607
"json_parse", "json_stringify", "json_extract", "str_format",
1455714608
"sha256", "sha512", "base64_encode", "base64_decode",
14558-
// LLM builtins (Anthropic API — enabled with llm-builtins feature)
14559-
"llm_call", "llm_chat", "llm_embed",
14609+
// LLM builtins
14610+
"llm_call", "llm_chat", "llm_embed", "llm_models", "llm_system",
14611+
"llm_stream_print", "llm_judge", "llm_compare",
14612+
"llm_tools", "substrate_embed",
1456014613
"batch_llm_call", "batch_llm_chat",
1456114614
// Native HTTP builtins
1456214615
"http_get", "http_post", "http_post_json", "http_put", "http_delete",
@@ -14633,7 +14686,7 @@ pub(crate) const HEAL_BUILTIN_NAMES: &[&str] = &[
1463314686
"is_singularity", "ensure_clean", "collapse", "invert",
1463414687
"quantize", "quantization_ratio",
1463514688
// I/O
14636-
"read_file", "write_file", "file_exists", "print",
14689+
"read_file", "write_file", "file_exists", "file_ls", "print",
1463714690
"println", "print_raw",
1463814691
// Time / random / conversion / introspection
1463914692
"now_ms", "random_int", "random_float", "random_seed",

omnimcode-core/src/llm_builtins.rs

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,96 @@ pub fn llm_stream_print(
205205
Err("llm_stream_print: recompile with --features native-llm".to_string())
206206
}
207207

208+
/// `llm_judge(responses, criteria, model?) -> dict[]`
209+
///
210+
/// Scores each response (array of strings) against `criteria` and returns
211+
/// an array of `{idx, score, reason}` dicts sorted best-first.
212+
pub fn llm_judge(
213+
responses: &Value,
214+
criteria: &str,
215+
model_override: Option<&str>,
216+
) -> Result<Value, String> {
217+
let items = match responses {
218+
Value::Array(a) => a.items.borrow().clone(),
219+
_ => return Err("llm_judge: first arg must be an array of strings".to_string()),
220+
};
221+
222+
let mut prompt = format!(
223+
"Score each response below (1-10) based on: {criteria}\n\
224+
Return ONLY JSON: [{{\"idx\":0,\"score\":8,\"reason\":\"...\"}}, ...]\n\n"
225+
);
226+
for (i, item) in items.iter().enumerate() {
227+
prompt.push_str(&format!("[{}]: {}\n---\n", i, item.to_display_string()));
228+
}
229+
230+
let sys = "You are a precise evaluator. Output only valid JSON with no extra text.";
231+
let raw = llm_call_sys(&prompt, model_override, Some(sys))?;
232+
let text = match raw { Value::String(s) => s, _ => return Ok(Value::Array(HArray::from_vec(vec![]))) };
233+
234+
// Find first '[' and scan for the array
235+
let bytes = text.as_bytes();
236+
for start in 0..bytes.len() {
237+
if bytes[start] == b'[' {
238+
for end in (start + 1..=bytes.len()).rev() {
239+
if let Ok(v) = serde_json::from_str::<serde_json::Value>(&text[start..end]) {
240+
return Ok(json_to_value(&v));
241+
}
242+
}
243+
}
244+
}
245+
Ok(Value::Array(HArray::from_vec(vec![])))
246+
}
247+
248+
#[cfg(not(feature = "native-llm"))]
249+
pub fn llm_judge(
250+
_responses: &Value,
251+
_criteria: &str,
252+
_model_override: Option<&str>,
253+
) -> Result<Value, String> {
254+
Err("llm_judge: recompile with --features native-llm".to_string())
255+
}
256+
257+
/// `llm_compare(a, b, criteria, model?) -> dict`
258+
///
259+
/// Compares two responses and returns `{winner: "A"|"B", reason: "..."}`.
260+
pub fn llm_compare(
261+
a: &str,
262+
b: &str,
263+
criteria: &str,
264+
model_override: Option<&str>,
265+
) -> Result<Value, String> {
266+
let prompt = format!(
267+
"Compare these two responses based on: {criteria}\n\n\
268+
[A]: {a}\n\n[B]: {b}\n\n\
269+
Return ONLY JSON: {{\"winner\":\"A\",\"reason\":\"...\"}}"
270+
);
271+
let sys = "You are an impartial judge. Output only valid JSON.";
272+
let raw = llm_call_sys(&prompt, model_override, Some(sys))?;
273+
let text = match raw { Value::String(s) => s, _ => return Ok(Value::Null) };
274+
275+
let bytes = text.as_bytes();
276+
for start in 0..bytes.len() {
277+
if bytes[start] == b'{' {
278+
for end in (start + 1..=bytes.len()).rev() {
279+
if let Ok(v) = serde_json::from_str::<serde_json::Value>(&text[start..end]) {
280+
return Ok(json_to_value(&v));
281+
}
282+
}
283+
}
284+
}
285+
Ok(Value::Null)
286+
}
287+
288+
#[cfg(not(feature = "native-llm"))]
289+
pub fn llm_compare(
290+
_a: &str,
291+
_b: &str,
292+
_criteria: &str,
293+
_model_override: Option<&str>,
294+
) -> Result<Value, String> {
295+
Err("llm_compare: recompile with --features native-llm".to_string())
296+
}
297+
208298
/// `batch_llm_call(prompts, model?, concurrency?) -> string[]`
209299
///
210300
/// Send multiple prompts to the LLM sequentially and return all responses in

0 commit comments

Comments
 (0)