Skip to content

Commit ba1993e

Browse files
authored
refactor: remove Ollama and add custom provider configuration (#546)
* feat(config): add ExecutionConfig for runtime behavior settings Add ExecutionConfig struct for controlling runtime execution: - max_agent_threads: Maximum concurrent agent threads - max_tool_threads: Maximum concurrent tool executions - command_timeout_secs: Timeout for shell commands - http_timeout_secs: Timeout for HTTP requests - streaming_enabled: Enable/disable streaming responses This allows users to customize execution behavior via config files. * feat(config): add custom providers configuration support Add CustomProviderConfig and CustomModelConfig types for user-defined LLM providers in config.toml: - base_url: Provider API endpoint - api_type: openai/anthropic/openai-compatible - api_key_env: Environment variable for API key - default_model: Default model for provider - models: Available models with capabilities - headers: Custom HTTP headers Enables users to add their own providers without code changes. * feat(cli): add runtime execution configuration arguments Add CLI arguments for controlling execution behavior: - --max-agent-threads: Maximum concurrent agent threads - --max-tool-threads: Maximum concurrent tool executions - --command-timeout: Timeout for shell commands in seconds - --http-timeout: Timeout for HTTP requests in seconds - --no-streaming: Disable streaming responses These arguments override config file settings at runtime. * refactor: remove cortex-ollama crate and all backend references Remove Ollama integration from the backend: - Delete cortex-ollama crate entirely - Remove from workspace members and dependencies - Remove Ollama model presets from cortex-common - Remove OllamaEmbedder from cortex-engine - Remove Ollama provider references from CLI utilities - Remove Ollama options from app-server config - Clean up related imports and references Users can now add custom local providers via the new custom providers configuration system. * refactor(gui): remove Ollama provider from cortex-gui Remove all Ollama provider references from the GUI: - Delete OllamaProvider.ts implementation - Remove Ollama from AIProvider enum (backend) - Remove Ollama from LLMProviderType union type - Remove Ollama from provider configuration objects - Remove Ollama from model selectors and settings - Remove Ollama from onboarding wizard - Update package-lock.json dependencies The custom providers system now allows users to configure local inference endpoints without built-in Ollama support.
1 parent 6a4d543 commit ba1993e

43 files changed

Lines changed: 329 additions & 2608 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Cargo.lock

Lines changed: 0 additions & 25 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,6 @@ members = [
8080
# CLI - LM Studio Integration
8181
"src/cortex-lmstudio",
8282

83-
# CLI - Ollama Integration
84-
"src/cortex-ollama",
85-
8683
# CLI - TUI
8784
"src/cortex-core",
8885
"src/cortex-tui",
@@ -224,7 +221,6 @@ cortex-experimental = { path = "src/cortex-experimental" }
224221
cortex-linux-sandbox = { path = "src/cortex-linux-sandbox" }
225222
cortex-windows-sandbox = { path = "src/cortex-windows-sandbox" }
226223
cortex-lmstudio = { path = "src/cortex-lmstudio" }
227-
cortex-ollama = { path = "src/cortex-ollama" }
228224
cortex-skills = { path = "src/cortex-skills" }
229225
cortex-prompt-harness = { path = "src/cortex-prompt-harness" }
230226

src/cortex-app-server/src/config.rs

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -374,9 +374,6 @@ pub struct ProviderConfig {
374374
pub azure_api_key: Option<String>,
375375
/// Azure OpenAI deployment.
376376
pub azure_deployment: Option<String>,
377-
/// Ollama base URL.
378-
#[serde(default = "default_ollama_url")]
379-
pub ollama_url: String,
380377
/// Default provider.
381378
#[serde(default = "default_provider")]
382379
pub default_provider: String,
@@ -385,10 +382,6 @@ pub struct ProviderConfig {
385382
pub default_model: String,
386383
}
387384

388-
fn default_ollama_url() -> String {
389-
"http://localhost:11434".to_string()
390-
}
391-
392385
fn default_provider() -> String {
393386
"openai".to_string()
394387
}

src/cortex-cli/src/agent_cmd/utils.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ pub fn validate_model_name(model: &str) -> Result<String> {
3939
"mistral",
4040
"xai",
4141
"deepseek",
42-
"ollama",
42+
"groq",
4343
];
4444
let provider = parts[0].to_lowercase();
4545
if !valid_providers.contains(&provider.as_str()) {

src/cortex-cli/src/cli/args.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,26 @@ pub struct InteractiveArgs {
208208
#[arg(long = "search", default_value_t = false, help_heading = "Features")]
209209
pub web_search: bool,
210210

211+
/// Maximum number of concurrent agent threads
212+
#[arg(long = "max-agent-threads", value_name = "N", help_heading = "Execution")]
213+
pub max_agent_threads: Option<usize>,
214+
215+
/// Maximum number of concurrent tool executions
216+
#[arg(long = "max-tool-threads", value_name = "N", help_heading = "Execution")]
217+
pub max_tool_threads: Option<usize>,
218+
219+
/// Timeout for shell commands in seconds
220+
#[arg(long = "command-timeout", value_name = "SECONDS", help_heading = "Execution")]
221+
pub command_timeout: Option<u64>,
222+
223+
/// Timeout for HTTP requests in seconds
224+
#[arg(long = "http-timeout", value_name = "SECONDS", help_heading = "Execution")]
225+
pub http_timeout: Option<u64>,
226+
227+
/// Disable streaming responses
228+
#[arg(long = "no-streaming", default_value_t = false, help_heading = "Execution")]
229+
pub no_streaming: bool,
230+
211231
/// Set log verbosity level (error, warn, info, debug, trace)
212232
#[arg(
213233
long = "log-level",

src/cortex-cli/src/models_cmd.rs

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -411,35 +411,6 @@ fn get_available_models() -> Vec<ModelInfo> {
411411
input_cost_per_million: Some(0.55),
412412
output_cost_per_million: Some(2.19),
413413
},
414-
// Local/OSS models (Ollama) - no API cost, run locally
415-
ModelInfo {
416-
id: "qwen2.5-coder:32b".to_string(),
417-
name: "Qwen 2.5 Coder 32B".to_string(),
418-
provider: "ollama".to_string(),
419-
capabilities: ModelCapabilities {
420-
vision: false,
421-
tools: true,
422-
parallel_tools: false, // Ollama models typically don't support parallel tools
423-
streaming: true,
424-
json_mode: true,
425-
},
426-
input_cost_per_million: None, // Local model, no API cost
427-
output_cost_per_million: None,
428-
},
429-
ModelInfo {
430-
id: "llama3.3:70b".to_string(),
431-
name: "Llama 3.3 70B".to_string(),
432-
provider: "ollama".to_string(),
433-
capabilities: ModelCapabilities {
434-
vision: false,
435-
tools: true,
436-
parallel_tools: false, // Ollama models typically don't support parallel tools
437-
streaming: true,
438-
json_mode: true,
439-
},
440-
input_cost_per_million: None, // Local model, no API cost
441-
output_cost_per_million: None,
442-
},
443414
]
444415
}
445416

src/cortex-cli/src/stats_cmd.rs

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -286,11 +286,6 @@ fn get_model_pricing(model: &str) -> ModelPricing {
286286
input_per_million: 2.0,
287287
output_per_million: 10.0,
288288
},
289-
// Local/OSS (free)
290-
m if m.contains("ollama") || m.contains("qwen") || m.contains("llama3") => ModelPricing {
291-
input_per_million: 0.0,
292-
output_per_million: 0.0,
293-
},
294289
// Default (conservative estimate)
295290
_ => ModelPricing {
296291
input_per_million: 3.0,
@@ -319,16 +314,16 @@ fn infer_provider(model: &str) -> String {
319314
"openai".to_string()
320315
} else if model_lower.contains("gemini") {
321316
"google".to_string()
322-
} else if model_lower.contains("llama") && !model_lower.contains("ollama") {
317+
} else if model_lower.contains("llama") {
323318
"groq".to_string()
324319
} else if model_lower.contains("mistral") || model_lower.contains("codestral") {
325320
"mistral".to_string()
326321
} else if model_lower.contains("deepseek") {
327322
"deepseek".to_string()
328323
} else if model_lower.contains("grok") {
329324
"xai".to_string()
330-
} else if model_lower.contains("ollama") || model_lower.contains("qwen") {
331-
"ollama".to_string()
325+
} else if model_lower.contains("qwen") {
326+
"deepseek".to_string() // Qwen models often available via DeepSeek
332327
} else {
333328
"unknown".to_string()
334329
}
@@ -735,9 +730,9 @@ mod tests {
735730
let cost = calculate_cost("claude-sonnet-4", 1_000_000, 1_000_000);
736731
assert!((cost - 18.0).abs() < 0.001);
737732

738-
// Free local model
739-
let cost = calculate_cost("ollama:llama3", 1_000_000, 1_000_000);
740-
assert_eq!(cost, 0.0);
733+
// GPT-4o: $2.50/$10 per 1M
734+
let cost = calculate_cost("gpt-4o", 1_000_000, 1_000_000);
735+
assert!((cost - 12.5).abs() < 0.001);
741736
}
742737

743738
#[test]

src/cortex-cli/src/utils/model.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ pub const KNOWN_PROVIDERS: &[&str] = &[
1313
"mistral",
1414
"xai",
1515
"deepseek",
16-
"ollama",
16+
"groq",
1717
"lmstudio",
1818
"llamacpp",
1919
"vllm",

src/cortex-cli/src/utils/validation.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ pub fn validate_model_name(model: &str) -> Result<String> {
303303
"mistral",
304304
"xai",
305305
"deepseek",
306-
"ollama",
306+
"groq",
307307
];
308308
let provider = parts[0].to_lowercase();
309309
if !valid_providers.contains(&provider.as_str()) {

0 commit comments

Comments
 (0)