add llm_stream_print + voting_ensemble demo

RandomCoder-lab · claude · RandomCoder-lab · commit 7ba616fa8455 · 2026-05-18T20:36:10.000-05:00
- New: llm_stream_print(prompt, system?, model?) — SSE streaming to stdout
  token-by-token; returns full accumulated text; works for both Anthropic and
  OpenAI providers (handles different delta shapes per provider)
- New: examples/demos/voting_ensemble.omc — N agents vote on a question via
  batch_llm_call, majority wins; tests tech choice, startup strategy, ML algo
- Docs: llm_stream_print entry in llm_workflow category
- llm_call now accepts optional 3rd system arg to match examples/lib/llm.omc

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/examples/demos/voting_ensemble.omc b/examples/demos/voting_ensemble.omc
@@ -0,0 +1,118 @@
+# voting_ensemble.omc — multiple agents vote on a decision, majority wins
+#
+# Pattern:
+#   1. Ask N agents the same question (batch_llm_call)
+#   2. Each agent returns one of the allowed choices
+#   3. Tally votes, find majority winner
+#   4. Optional: ask dissenting agents to reconsider (soft consensus)
+
+fn extract_choice(text, choices) {
+    h i = 0
+    while i < arr_len(choices) {
+        if str_contains(str_upper(text), str_upper(choices[i])) {
+            return choices[i]
+        }
+        i = i + 1
+    }
+    return choices[0]
+}
+
+fn tally_votes(votes) {
+    h counts = {}
+    h i = 0
+    while i < arr_len(votes) {
+        h v = votes[i]
+        if dict_has(counts, v) {
+            counts[v] = counts[v] + 1
+        } else {
+            counts[v] = 1
+        }
+        i = i + 1
+    }
+    return counts
+}
+
+fn find_winner(counts) {
+    h best = ""
+    h best_count = 0
+    h keys = dict_keys(counts)
+    h i = 0
+    while i < arr_len(keys) {
+        h k = keys[i]
+        if counts[k] > best_count {
+            best_count = counts[k]
+            best = k
+        }
+        i = i + 1
+    }
+    return best
+}
+
+fn vote_ensemble(question, choices, n_agents, system_prompt) {
+    print(str_concat("Question: ", question))
+    print(str_concat("Choices: ", arr_join(choices, " | ")))
+    print(str_concat("Agents: ", to_str(n_agents)))
+    print("")
+
+    h choice_str = arr_join(choices, ", ")
+    h prompt = str_concat(
+        question, "\n\n",
+        "Reply with ONLY one of: ", choice_str,
+        ". No explanation. Just the choice word."
+    )
+    h sys = str_concat(
+        system_prompt, " ",
+        "You must respond with exactly one word from: ", choice_str
+    )
+
+    h prompts = arr_fill({prompt: prompt, system: sys}, n_agents)
+    h responses = batch_llm_call(prompts)
+
+    h votes = par_map(responses, fn(r) {
+        return extract_choice(r, choices)
+    })
+
+    h counts = tally_votes(votes)
+    h winner = find_winner(counts)
+
+    print("Vote results:")
+    h i = 0
+    while i < arr_len(choices) {
+        h c = choices[i]
+        h cnt = 0
+        if dict_has(counts, c) { cnt = counts[c] }
+        print(str_concat("  ", c, ": ", to_str(cnt), "/", to_str(n_agents)))
+        i = i + 1
+    }
+    print(str_concat("Winner: ", winner))
+    return {winner: winner, counts: counts, votes: votes}
+}
+
+# ── Decision 1: tech choice ──────────────────────────────────────────────────
+h r1 = vote_ensemble(
+    "Which programming language is best for building production AI systems in 2025?",
+    ["Python", "Rust", "Go"],
+    5,
+    "You are a senior software architect with 10 years of production AI experience."
+)
+print("")
+
+# ── Decision 2: strategy ─────────────────────────────────────────────────────
+h r2 = vote_ensemble(
+    "Should a startup with $500k runway prioritize hiring or marketing?",
+    ["Hiring", "Marketing"],
+    7,
+    "You are a startup advisor who has helped 50+ early-stage companies."
+)
+print("")
+
+# ── Decision 3: algorithm ────────────────────────────────────────────────────
+h r3 = vote_ensemble(
+    "For a real-time recommendation system with 10M users, which approach is better?",
+    ["Collaborative", "Content-based", "Hybrid"],
+    6,
+    "You are a machine learning engineer specializing in recommender systems."
+)
+print("")
+
+print("=== Ensemble voting complete ===")
diff --git a/omnimcode-core/src/docs.rs b/omnimcode-core/src/docs.rs
@@ -1352,6 +1352,20 @@ pub const BUILTINS: &[BuiltinDoc] = &[
 print(answer)"#,
         unique_to_omc: true,
     },
+    BuiltinDoc {
+        name: "llm_stream_print", category: "llm_workflow",
+        signature: "(prompt: string, system?: string, model?: string) -> string",
+        description: concat!(
+            "Stream the LLM response token-by-token to stdout as it arrives, then return the full accumulated text. ",
+            "Uses SSE streaming (stream:true in the API body). ",
+            "Works with both Anthropic and OpenAI providers (auto-detected via LLM_PROVIDER). ",
+            "Ideal for interactive CLI tools and demos where you want visible token-by-token output. ",
+            "Returns the complete response string when finished."
+        ),
+        example: r#"h full = llm_stream_print("Write a haiku about recursion", "You are a poet.")
+print(str_concat("Total chars: ", to_str(str_len(full))))"#,
+        unique_to_omc: true,
+    },
     BuiltinDoc {
         name: "llm_tools", category: "llm_workflow",
         signature: "(messages: dict[], tools: dict[], model?: string) -> dict",
diff --git a/omnimcode-core/src/interpreter.rs b/omnimcode-core/src/interpreter.rs
@@ -2229,6 +2229,7 @@ impl Interpreter {
             | "sha256" | "sha512" | "base64_encode" | "base64_decode"
             // LLM builtins
             | "llm_call" | "llm_chat" | "llm_embed" | "llm_models" | "llm_system"
+            | "llm_stream_print"
             | "llm_tools" | "substrate_embed"
             | "batch_llm_call" | "batch_llm_chat"
             // HTTP builtins
@@ -9574,6 +9575,32 @@ impl Interpreter {
                 };
                 crate::llm_builtins::llm_system(&prompt, &system, model.as_deref())
             }
+            // llm_stream_print(prompt, system?, model?) -> string
+            //   Streams LLM response to stdout token-by-token, returns full text.
+            //   Uses SSE streaming API. system defaults to null (no system prompt).
+            "llm_stream_print" => {
+                if args.is_empty() {
+                    return Err("llm_stream_print requires (prompt, system?, model?)".to_string());
+                }
+                let prompt = self.eval_expr(&args[0])?.to_display_string();
+                let system = if args.len() > 1 {
+                    match self.eval_expr(&args[1])? {
+                        Value::Null => None,
+                        v => Some(v.to_display_string()),
+                    }
+                } else {
+                    None
+                };
+                let model = if args.len() > 2 {
+                    match self.eval_expr(&args[2])? {
+                        Value::Null => None,
+                        v => Some(v.to_display_string()),
+                    }
+                } else {
+                    None
+                };
+                crate::llm_builtins::llm_stream_print(&prompt, system.as_deref(), model.as_deref())
+            }
             // llm_models() -> dict[]
             //   Returns the list of models available from the active provider.
             //   Each element is a dict with at least {"id": string, "provider": string}.
diff --git a/omnimcode-core/src/llm_builtins.rs b/omnimcode-core/src/llm_builtins.rs
@@ -92,6 +92,119 @@ pub fn llm_system(
     llm_call_sys(prompt, model_override, Some(system))
 }
 
+/// `llm_stream_print(prompt, system?, model?) -> string`
+///
+/// Streams the LLM response token-by-token to stdout, then returns the full
+/// accumulated text. Uses SSE streaming (stream:true). Supports both Anthropic
+/// and OpenAI providers (auto-detected via LLM_PROVIDER env var).
+#[cfg(feature = "native-llm")]
+pub fn llm_stream_print(
+    prompt: &str,
+    system: Option<&str>,
+    model_override: Option<&str>,
+) -> Result<Value, String> {
+    use std::io::{BufRead, BufReader, Write};
+
+    let cfg = Config::from_env()?;
+    let model = model_override.unwrap_or(&cfg.model).to_string();
+
+    // Build messages list
+    let mut messages: Vec<ChatMessage> = Vec::new();
+    if let Some(sys) = system {
+        if !sys.is_empty() {
+            messages.push(ChatMessage { role: "system".to_string(), content: sys.to_string() });
+        }
+    }
+    messages.push(ChatMessage { role: "user".to_string(), content: prompt.to_string() });
+
+    match cfg.provider {
+        Provider::Anthropic => {
+            let mut system_parts: Vec<String> = Vec::new();
+            let mut msgs_json: Vec<serde_json::Value> = Vec::new();
+            for m in &messages {
+                if m.role == "system" {
+                    system_parts.push(m.content.clone());
+                } else {
+                    msgs_json.push(serde_json::json!({ "role": m.role, "content": m.content }));
+                }
+            }
+            let mut body = serde_json::json!({
+                "model": model, "max_tokens": 4096,
+                "messages": msgs_json, "stream": true
+            });
+            if !system_parts.is_empty() {
+                body["system"] = serde_json::Value::String(system_parts.join("\n\n"));
+            }
+            let resp = ureq::post(&cfg.base_url)
+                .set("Content-Type", "application/json")
+                .set("Authorization", &format!("Bearer {}", cfg.api_key))
+                .set("anthropic-version", "2023-06-01")
+                .set("x-api-key", &cfg.api_key)
+                .send_json(body)
+                .map_err(|e| format!("llm_stream HTTP error: {}", e))?;
+            let reader = BufReader::new(resp.into_reader());
+            let mut full_text = String::new();
+            for line in reader.lines() {
+                let line = line.map_err(|e| format!("llm_stream read error: {}", e))?;
+                if let Some(data) = line.strip_prefix("data: ") {
+                    if data == "[DONE]" { break; }
+                    if let Ok(event) = serde_json::from_str::<serde_json::Value>(data) {
+                        if event["type"] == "content_block_delta" {
+                            if let Some(text) = event["delta"]["text"].as_str() {
+                                print!("{}", text);
+                                let _ = std::io::stdout().flush();
+                                full_text.push_str(text);
+                            }
+                        }
+                    }
+                }
+            }
+            println!();
+            Ok(Value::String(full_text))
+        }
+        Provider::OpenAI => {
+            let msgs_json: Vec<serde_json::Value> = messages
+                .iter()
+                .map(|m| serde_json::json!({ "role": m.role, "content": m.content }))
+                .collect();
+            let body = serde_json::json!({
+                "model": model, "messages": msgs_json, "stream": true
+            });
+            let resp = ureq::post(&cfg.base_url)
+                .set("Content-Type", "application/json")
+                .set("Authorization", &format!("Bearer {}", cfg.api_key))
+                .send_json(body)
+                .map_err(|e| format!("llm_stream HTTP error: {}", e))?;
+            let reader = BufReader::new(resp.into_reader());
+            let mut full_text = String::new();
+            for line in reader.lines() {
+                let line = line.map_err(|e| format!("llm_stream read error: {}", e))?;
+                if let Some(data) = line.strip_prefix("data: ") {
+                    if data == "[DONE]" { break; }
+                    if let Ok(event) = serde_json::from_str::<serde_json::Value>(data) {
+                        if let Some(text) = event["choices"][0]["delta"]["content"].as_str() {
+                            print!("{}", text);
+                            let _ = std::io::stdout().flush();
+                            full_text.push_str(text);
+                        }
+                    }
+                }
+            }
+            println!();
+            Ok(Value::String(full_text))
+        }
+    }
+}
+
+#[cfg(not(feature = "native-llm"))]
+pub fn llm_stream_print(
+    _prompt: &str,
+    _system: Option<&str>,
+    _model_override: Option<&str>,
+) -> Result<Value, String> {
+    Err("llm_stream_print: recompile with --features native-llm".to_string())
+}
+
 /// `batch_llm_call(prompts, model?, concurrency?) -> string[]`
 ///
 /// Send multiple prompts to the LLM sequentially and return all responses in