Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions crates/rpg-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,11 @@ enum Commands {
/// Autonomous LLM-driven semantic lifting (fire-and-forget)
#[cfg(feature = "lift")]
Lift {
/// LLM provider: "anthropic" or "openai"
/// LLM provider: "anthropic", "openai", or "ollama" (local, no API key)
#[arg(long, default_value = "anthropic")]
provider: String,

/// Model override (default: haiku for anthropic, gpt-4o-mini for openai)
/// Model override (default: haiku for anthropic, gpt-4o-mini for openai, qwen2.5-coder:3b for ollama)
#[arg(long)]
model: Option<String>,

Expand Down Expand Up @@ -1114,12 +1114,13 @@ fn cmd_lift(
anyhow::bail!("No RPG found. Run `rpg-encoder build` first.");
}

// Resolve API key from arg or environment
// Resolve API key from arg or environment. Local providers (ollama) need none.
let api_key = api_key
.map(String::from)
.or_else(|| match provider_name {
"anthropic" => std::env::var("ANTHROPIC_API_KEY").ok(),
"openai" => std::env::var("OPENAI_API_KEY").ok(),
"ollama" => Some(String::new()), // local daemon, no key required
_ => None,
})
.ok_or_else(|| {
Expand Down
3 changes: 2 additions & 1 deletion crates/rpg-lift/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ repository.workspace = true
description = "Autonomous LLM-driven semantic lifting for RPG"

[features]
default = ["anthropic", "openai"]
default = ["anthropic", "openai", "ollama"]
anthropic = []
openai = []
ollama = []

[dependencies]
rpg-core = { workspace = true }
Expand Down
166 changes: 166 additions & 0 deletions crates/rpg-lift/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,109 @@ impl LlmProvider for OpenAiProvider {
}
}

// ---------------------------------------------------------------------------
// Ollama (local) — native /api/chat, no API key, zero cost
// ---------------------------------------------------------------------------

/// Fully-local provider backed by a running Ollama daemon. No API key, no
/// network cost — the lift token cost the agent normally pays is removed
/// entirely (paid once, locally, at index time).
#[cfg(feature = "ollama")]
pub struct OllamaProvider {
model: String,
base_url: String,
agent: ureq::Agent,
}

#[cfg(feature = "ollama")]
impl OllamaProvider {
/// Default model: a small code-tuned local model, fast on CPU.
pub const DEFAULT_MODEL: &str = "qwen2.5-coder:3b";
const DEFAULT_BASE_URL: &str = "http://localhost:11434";

pub fn new(model: Option<String>, base_url: Option<String>) -> Self {
Self {
model: model.unwrap_or_else(|| Self::DEFAULT_MODEL.to_string()),
base_url: base_url.unwrap_or_else(|| Self::DEFAULT_BASE_URL.to_string()),
agent: ureq::Agent::new_with_config(
ureq::config::Config::builder()
// local models can be slow on CPU; allow a generous timeout
.timeout_global(Some(std::time::Duration::from_mins(5)))
.build(),
),
}
}
}

/// Parse an Ollama `/api/chat` (non-streaming) response. Pure, so it is unit
/// tested without a live daemon.
#[cfg(feature = "ollama")]
fn parse_ollama_response(json: &Value) -> Result<LlmResponse, ProviderError> {
if let Some(err) = json.get("error") {
return Err(ProviderError::Api {
status: 400,
message: err.as_str().unwrap_or("unknown error").to_string(),
});
}
let text = json
.get("message")
.and_then(|m| m.get("content"))
.and_then(|c| c.as_str())
.filter(|s| !s.is_empty())
.ok_or(ProviderError::EmptyResponse)?
.to_string();
let input_tokens = json.get("prompt_eval_count").and_then(|t| t.as_u64());
let output_tokens = json.get("eval_count").and_then(|t| t.as_u64());
Ok(LlmResponse {
text,
input_tokens,
output_tokens,
})
}

#[cfg(feature = "ollama")]
impl LlmProvider for OllamaProvider {
fn complete(&self, system: &str, user: &str) -> Result<LlmResponse, ProviderError> {
let url = format!("{}/api/chat", self.base_url.trim_end_matches('/'));

let body = serde_json::json!({
"model": self.model,
"stream": false,
"options": {"temperature": 0},
"messages": [
{"role": "system", "content": system},
{"role": "user", "content": user}
]
});

let mut response = self
.agent
.post(&url)
.header("content-type", "application/json")
.send_json(&body)
.map_err(|e| ProviderError::Http(e.to_string()))?;

let json: Value = response
.body_mut()
.read_json()
.map_err(|e| ProviderError::Parse(e.to_string()))?;

parse_ollama_response(&json)
}

fn model_name(&self) -> &str {
&self.model
}

fn cost_per_mtok_input(&self) -> f64 {
0.0 // local inference is free
}

fn cost_per_mtok_output(&self) -> f64 {
0.0
}
}

/// Create a provider from CLI arguments.
pub fn create_provider(
provider_name: &str,
Expand All @@ -316,6 +419,11 @@ pub fn create_provider(
model.map(String::from),
base_url.map(String::from),
))),
#[cfg(feature = "ollama")]
"ollama" => Ok(Box::new(OllamaProvider::new(
model.map(String::from),
base_url.map(String::from),
))),
other => Err(ProviderError::Http(format!(
"unknown provider: '{}'. Available: {}",
other,
Expand All @@ -331,5 +439,63 @@ pub fn available_providers() -> Vec<&'static str> {
"anthropic",
#[cfg(feature = "openai")]
"openai",
#[cfg(feature = "ollama")]
"ollama",
]
}

#[cfg(test)]
mod tests {
use super::*;

#[cfg(feature = "ollama")]
#[test]
fn parse_ollama_extracts_text_and_token_counts() {
let json = serde_json::json!({
"message": {"role": "assistant", "content": "fetch user record, validate id"},
"prompt_eval_count": 412,
"eval_count": 17
});
let r = parse_ollama_response(&json).unwrap();
assert_eq!(r.text, "fetch user record, validate id");
assert_eq!(r.input_tokens, Some(412));
assert_eq!(r.output_tokens, Some(17));
}

#[cfg(feature = "ollama")]
#[test]
fn parse_ollama_empty_content_is_empty_response() {
let json = serde_json::json!({"message": {"content": ""}});
assert!(matches!(
parse_ollama_response(&json),
Err(ProviderError::EmptyResponse)
));
}

#[cfg(feature = "ollama")]
#[test]
fn parse_ollama_surfaces_api_error() {
let json = serde_json::json!({"error": "model 'x' not found, try pulling it first"});
assert!(matches!(
parse_ollama_response(&json),
Err(ProviderError::Api { .. })
));
}

#[cfg(feature = "ollama")]
#[test]
fn ollama_provider_defaults_to_qwen_coder_and_zero_cost() {
let p = OllamaProvider::new(None, None);
assert_eq!(p.model_name(), "qwen2.5-coder:3b");
assert_eq!(p.cost_per_mtok_input(), 0.0);
assert_eq!(p.cost_per_mtok_output(), 0.0);
}

#[cfg(feature = "ollama")]
#[test]
fn create_provider_supports_ollama_without_api_key() {
let p = create_provider("ollama", "", None, None).unwrap();
assert_eq!(p.model_name(), "qwen2.5-coder:3b");
assert!(available_providers().contains(&"ollama"));
}
}
4 changes: 2 additions & 2 deletions crates/rpg-mcp/src/params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,13 +273,13 @@ pub(crate) struct DetectCyclesParams {
/// Parameters for the `auto_lift` tool.
#[derive(Deserialize, JsonSchema)]
pub(crate) struct AutoLiftParams {
/// LLM provider: "anthropic", "openai", or any OpenAI-compatible endpoint.
/// LLM provider: "anthropic", "openai", "ollama" (local, no API key), or any OpenAI-compatible endpoint.
pub(crate) provider: String,
/// API key for the provider. Use this OR api_key_env (not both). Prefer api_key_env to avoid exposing keys in tool call transcripts.
pub(crate) api_key: Option<String>,
/// Environment variable name containing the API key (e.g., "ANTHROPIC_API_KEY"). Safer than passing the key directly — the key never appears in tool call logs.
pub(crate) api_key_env: Option<String>,
/// Model override (default: claude-haiku-4-5-20251001 for anthropic, gpt-4o-mini for openai).
/// Model override (default: claude-haiku-4-5-20251001 for anthropic, gpt-4o-mini for openai, qwen2.5-coder:3b for ollama).
pub(crate) model: Option<String>,
/// Base URL for OpenAI-compatible endpoints (e.g., "https://openrouter.ai/api/v1" for OpenRouter, "https://generativelanguage.googleapis.com/v1beta/openai" for Gemini).
pub(crate) base_url: Option<String>,
Expand Down
5 changes: 3 additions & 2 deletions crates/rpg-mcp/src/prompts/server_instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,9 @@ Fallbacks when no delegation mechanism is available:
`finalize_lifting` ONCE after all scopes are complete — calling it mid-flow
auto-routes pending entities against incomplete signals and locks the
hierarchy in early.
- **CLI autonomous lift** (unlifted entities only): `rpg-encoder lift --provider anthropic|openai`
uses an external API key directly — no agent subscription involvement. **After the CLI
- **CLI autonomous lift** (unlifted entities only): `rpg-encoder lift --provider anthropic|openai|ollama`
— `anthropic`/`openai` use an external API key directly; `ollama` runs a fully-local model
(default `qwen2.5-coder:3b`, no API key, no cost). No agent subscription involvement. **After the CLI
finishes, call `reload_rpg` in this session** so the server picks up the updated
`.rpg/graph.json` — otherwise subsequent queries will still see the pre-lift state.
Note: the CLI lifts entities with no features; it does not re-lift stale entities
Expand Down
Loading