userFRM · userFRM · May 29, 2026 · May 29, 2026 · May 29, 2026 · May 29, 2026
diff --git a/crates/rpg-cli/src/main.rs b/crates/rpg-cli/src/main.rs
@@ -134,11 +134,11 @@ enum Commands {
     /// Autonomous LLM-driven semantic lifting (fire-and-forget)
     #[cfg(feature = "lift")]
     Lift {
-        /// LLM provider: "anthropic" or "openai"
+        /// LLM provider: "anthropic", "openai", or "ollama" (local, no API key)
         #[arg(long, default_value = "anthropic")]
         provider: String,
 
-        /// Model override (default: haiku for anthropic, gpt-4o-mini for openai)
+        /// Model override (default: haiku for anthropic, gpt-4o-mini for openai, qwen2.5-coder:3b for ollama)
         #[arg(long)]
         model: Option<String>,
 
@@ -1114,12 +1114,13 @@ fn cmd_lift(
         anyhow::bail!("No RPG found. Run `rpg-encoder build` first.");
     }
 
-    // Resolve API key from arg or environment
+    // Resolve API key from arg or environment. Local providers (ollama) need none.
     let api_key = api_key
         .map(String::from)
         .or_else(|| match provider_name {
             "anthropic" => std::env::var("ANTHROPIC_API_KEY").ok(),
             "openai" => std::env::var("OPENAI_API_KEY").ok(),
+            "ollama" => Some(String::new()), // local daemon, no key required
             _ => None,
         })
         .ok_or_else(|| {

diff --git a/crates/rpg-lift/Cargo.toml b/crates/rpg-lift/Cargo.toml
@@ -8,9 +8,10 @@ repository.workspace = true
 description = "Autonomous LLM-driven semantic lifting for RPG"
 
 [features]
-default = ["anthropic", "openai"]
+default = ["anthropic", "openai", "ollama"]
 anthropic = []
 openai = []
+ollama = []
 
 [dependencies]
 rpg-core = { workspace = true }

diff --git a/crates/rpg-lift/src/provider.rs b/crates/rpg-lift/src/provider.rs
@@ -297,6 +297,109 @@ impl LlmProvider for OpenAiProvider {
     }
 }
 
+// ---------------------------------------------------------------------------
+// Ollama (local) — native /api/chat, no API key, zero cost
+// ---------------------------------------------------------------------------
+
+/// Fully-local provider backed by a running Ollama daemon. No API key, no
+/// network cost — the lift token cost the agent normally pays is removed
+/// entirely (paid once, locally, at index time).
+#[cfg(feature = "ollama")]
+pub struct OllamaProvider {
+    model: String,
+    base_url: String,
+    agent: ureq::Agent,
+}
+
+#[cfg(feature = "ollama")]
+impl OllamaProvider {
+    /// Default model: a small code-tuned local model, fast on CPU.
+    pub const DEFAULT_MODEL: &str = "qwen2.5-coder:3b";
+    const DEFAULT_BASE_URL: &str = "http://localhost:11434";
+
+    pub fn new(model: Option<String>, base_url: Option<String>) -> Self {
+        Self {
+            model: model.unwrap_or_else(|| Self::DEFAULT_MODEL.to_string()),
+            base_url: base_url.unwrap_or_else(|| Self::DEFAULT_BASE_URL.to_string()),
+            agent: ureq::Agent::new_with_config(
+                ureq::config::Config::builder()
+                    // local models can be slow on CPU; allow a generous timeout
+                    .timeout_global(Some(std::time::Duration::from_mins(5)))
+                    .build(),
+            ),
+        }
+    }
+}
+
+/// Parse an Ollama `/api/chat` (non-streaming) response. Pure, so it is unit
+/// tested without a live daemon.
+#[cfg(feature = "ollama")]
+fn parse_ollama_response(json: &Value) -> Result<LlmResponse, ProviderError> {
+    if let Some(err) = json.get("error") {
+        return Err(ProviderError::Api {
+            status: 400,
+            message: err.as_str().unwrap_or("unknown error").to_string(),
+        });
+    }
+    let text = json
+        .get("message")
+        .and_then(|m| m.get("content"))
+        .and_then(|c| c.as_str())
+        .filter(|s| !s.is_empty())
+        .ok_or(ProviderError::EmptyResponse)?
+        .to_string();
+    let input_tokens = json.get("prompt_eval_count").and_then(|t| t.as_u64());
+    let output_tokens = json.get("eval_count").and_then(|t| t.as_u64());
+    Ok(LlmResponse {
+        text,
+        input_tokens,
+        output_tokens,
+    })
+}
+
+#[cfg(feature = "ollama")]
+impl LlmProvider for OllamaProvider {
+    fn complete(&self, system: &str, user: &str) -> Result<LlmResponse, ProviderError> {
+        let url = format!("{}/api/chat", self.base_url.trim_end_matches('/'));
+
+        let body = serde_json::json!({
+            "model": self.model,
+            "stream": false,
+            "options": {"temperature": 0},
+            "messages": [
+                {"role": "system", "content": system},
+                {"role": "user", "content": user}
+            ]
+        });
+
+        let mut response = self
+            .agent
+            .post(&url)
+            .header("content-type", "application/json")
+            .send_json(&body)
+            .map_err(|e| ProviderError::Http(e.to_string()))?;
+
+        let json: Value = response
+            .body_mut()
+            .read_json()
+            .map_err(|e| ProviderError::Parse(e.to_string()))?;
+
+        parse_ollama_response(&json)
+    }
+
+    fn model_name(&self) -> &str {
+        &self.model
+    }
+
+    fn cost_per_mtok_input(&self) -> f64 {
+        0.0 // local inference is free
+    }
+
+    fn cost_per_mtok_output(&self) -> f64 {
+        0.0
+    }
+}
+
 /// Create a provider from CLI arguments.
 pub fn create_provider(
     provider_name: &str,
@@ -316,6 +419,11 @@ pub fn create_provider(
             model.map(String::from),
             base_url.map(String::from),
         ))),
+        #[cfg(feature = "ollama")]
+        "ollama" => Ok(Box::new(OllamaProvider::new(
+            model.map(String::from),
+            base_url.map(String::from),
+        ))),
         other => Err(ProviderError::Http(format!(
             "unknown provider: '{}'. Available: {}",
             other,
@@ -331,5 +439,63 @@ pub fn available_providers() -> Vec<&'static str> {
         "anthropic",
         #[cfg(feature = "openai")]
         "openai",
+        #[cfg(feature = "ollama")]
+        "ollama",
     ]
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[cfg(feature = "ollama")]
+    #[test]
+    fn parse_ollama_extracts_text_and_token_counts() {
+        let json = serde_json::json!({
+            "message": {"role": "assistant", "content": "fetch user record, validate id"},
+            "prompt_eval_count": 412,
+            "eval_count": 17
+        });
+        let r = parse_ollama_response(&json).unwrap();
+        assert_eq!(r.text, "fetch user record, validate id");
+        assert_eq!(r.input_tokens, Some(412));
+        assert_eq!(r.output_tokens, Some(17));
+    }
+
+    #[cfg(feature = "ollama")]
+    #[test]
+    fn parse_ollama_empty_content_is_empty_response() {
+        let json = serde_json::json!({"message": {"content": ""}});
+        assert!(matches!(
+            parse_ollama_response(&json),
+            Err(ProviderError::EmptyResponse)
+        ));
+    }
+
+    #[cfg(feature = "ollama")]
+    #[test]
+    fn parse_ollama_surfaces_api_error() {
+        let json = serde_json::json!({"error": "model 'x' not found, try pulling it first"});
+        assert!(matches!(
+            parse_ollama_response(&json),
+            Err(ProviderError::Api { .. })
+        ));
+    }
+
+    #[cfg(feature = "ollama")]
+    #[test]
+    fn ollama_provider_defaults_to_qwen_coder_and_zero_cost() {
+        let p = OllamaProvider::new(None, None);
+        assert_eq!(p.model_name(), "qwen2.5-coder:3b");
+        assert_eq!(p.cost_per_mtok_input(), 0.0);
+        assert_eq!(p.cost_per_mtok_output(), 0.0);
+    }
+
+    #[cfg(feature = "ollama")]
+    #[test]
+    fn create_provider_supports_ollama_without_api_key() {
+        let p = create_provider("ollama", "", None, None).unwrap();
+        assert_eq!(p.model_name(), "qwen2.5-coder:3b");
+        assert!(available_providers().contains(&"ollama"));
+    }
+}
diff --git a/crates/rpg-mcp/src/params.rs b/crates/rpg-mcp/src/params.rs
@@ -273,13 +273,13 @@ pub(crate) struct DetectCyclesParams {
 /// Parameters for the `auto_lift` tool.
 #[derive(Deserialize, JsonSchema)]
 pub(crate) struct AutoLiftParams {
-    /// LLM provider: "anthropic", "openai", or any OpenAI-compatible endpoint.
+    /// LLM provider: "anthropic", "openai", "ollama" (local, no API key), or any OpenAI-compatible endpoint.
     pub(crate) provider: String,
     /// API key for the provider. Use this OR api_key_env (not both). Prefer api_key_env to avoid exposing keys in tool call transcripts.
     pub(crate) api_key: Option<String>,
     /// Environment variable name containing the API key (e.g., "ANTHROPIC_API_KEY"). Safer than passing the key directly — the key never appears in tool call logs.
     pub(crate) api_key_env: Option<String>,
-    /// Model override (default: claude-haiku-4-5-20251001 for anthropic, gpt-4o-mini for openai).
+    /// Model override (default: claude-haiku-4-5-20251001 for anthropic, gpt-4o-mini for openai, qwen2.5-coder:3b for ollama).
     pub(crate) model: Option<String>,
     /// Base URL for OpenAI-compatible endpoints (e.g., "https://openrouter.ai/api/v1" for OpenRouter, "https://generativelanguage.googleapis.com/v1beta/openai" for Gemini).
     pub(crate) base_url: Option<String>,

diff --git a/crates/rpg-mcp/src/prompts/server_instructions.md b/crates/rpg-mcp/src/prompts/server_instructions.md
@@ -166,8 +166,9 @@ Fallbacks when no delegation mechanism is available:
   `finalize_lifting` ONCE after all scopes are complete — calling it mid-flow
   auto-routes pending entities against incomplete signals and locks the
   hierarchy in early.
-- **CLI autonomous lift** (unlifted entities only): `rpg-encoder lift --provider anthropic|openai`
-  uses an external API key directly — no agent subscription involvement. **After the CLI
+- **CLI autonomous lift** (unlifted entities only): `rpg-encoder lift --provider anthropic|openai|ollama`
+  — `anthropic`/`openai` use an external API key directly; `ollama` runs a fully-local model
+  (default `qwen2.5-coder:3b`, no API key, no cost). No agent subscription involvement. **After the CLI
   finishes, call `reload_rpg` in this session** so the server picks up the updated
   `.rpg/graph.json` — otherwise subsequent queries will still see the pre-lift state.
   Note: the CLI lifts entities with no features; it does not re-lift stale entities