itwizardo · Sigmachan · Jun 12, 2026 · Jun 14, 2026 · Jun 15, 2026 · Jun 15, 2026
diff --git a/.DS_Store b/.DS_Store
diff --git a/.github/.DS_Store b/.github/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -10,3 +10,9 @@ archive/
 .claw/sessions/
 .clawhip/
 status-help.txt
+
+# HackCode local artifacts (sessions, agents) — never commit to a public repo
+.hackcode/
+.hackcode-agents/
+# macOS
+.DS_Store
diff --git a/.hackcode-todos.json b/.hackcode-todos.json
diff --git a/.hackcode/.DS_Store b/.hackcode/.DS_Store
diff --git a/.hackcode/sessions/bc692bcac69824a6/session-1776068029370-0.jsonl b/.hackcode/sessions/bc692bcac69824a6/session-1776068029370-0.jsonl
diff --git a/cheatsheets/.DS_Store b/cheatsheets/.DS_Store
diff --git a/rust/.DS_Store b/rust/.DS_Store
diff --git a/rust/.hackcode/sessions/e90808530b36ab3d/session-1775737207380-0.jsonl b/rust/.hackcode/sessions/e90808530b36ab3d/session-1775737207380-0.jsonl
diff --git a/rust/.hackcode/sessions/e90808530b36ab3d/session-1775737663410-0.jsonl b/rust/.hackcode/sessions/e90808530b36ab3d/session-1775737663410-0.jsonl
diff --git a/rust/.hackcode/sessions/e90808530b36ab3d/session-1775738904028-0.jsonl b/rust/.hackcode/sessions/e90808530b36ab3d/session-1775738904028-0.jsonl
diff --git a/rust/.hackcode/sessions/e90808530b36ab3d/session-1775739267268-0.jsonl b/rust/.hackcode/sessions/e90808530b36ab3d/session-1775739267268-0.jsonl
diff --git a/rust/.hackcode/sessions/e90808530b36ab3d/session-1775740141320-0.jsonl b/rust/.hackcode/sessions/e90808530b36ab3d/session-1775740141320-0.jsonl
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
diff --git a/rust/crates/api/src/client.rs b/rust/crates/api/src/client.rs
@@ -40,6 +40,10 @@ impl ProviderClient {
                     Some(meta) if meta.auth_env == "DASHSCOPE_API_KEY" => {
                         OpenAiCompatConfig::dashscope()
                     }
+                    Some(meta) if meta.auth_env == "OPENCODE_API_KEY" => {
+                        OpenAiCompatConfig::opencode()
+                    }
+                    Some(meta) if meta.auth_env == "NIM_API_KEY" => OpenAiCompatConfig::nim(),
                     _ => OpenAiCompatConfig::openai(),
                 };
                 Ok(Self::OpenAi(OpenAiCompatClient::from_env(config)?))
@@ -168,7 +172,7 @@ mod tests {
 
     #[test]
     fn resolves_existing_and_grok_aliases() {
-        assert_eq!(resolve_model_alias("opus"), "claude-opus-4-6");
+        assert_eq!(resolve_model_alias("opus"), "claude-opus-4-8");
         assert_eq!(resolve_model_alias("grok"), "grok-3");
         assert_eq!(resolve_model_alias("grok-mini"), "grok-3-mini");
     }

diff --git a/rust/crates/api/src/providers/mod.rs b/rust/crates/api/src/providers/mod.rs
@@ -78,6 +78,15 @@ const MODEL_REGISTRY: &[(&str, ProviderMetadata)] = &[
             default_base_url: anthropic::DEFAULT_BASE_URL,
         },
     ),
+    (
+        "fable",
+        ProviderMetadata {
+            provider: ProviderKind::Anthropic,
+            auth_env: "ANTHROPIC_API_KEY",
+            base_url_env: "ANTHROPIC_BASE_URL",
+            default_base_url: anthropic::DEFAULT_BASE_URL,
+        },
+    ),
     (
         "grok",
         ProviderMetadata {
@@ -143,9 +152,10 @@ pub fn resolve_model_alias(model: &str) -> String {
         .find_map(|(alias, metadata)| {
             (*alias == lower).then_some(match metadata.provider {
                 ProviderKind::Anthropic => match *alias {
-                    "opus" => "claude-opus-4-6",
+                    "opus" => "claude-opus-4-8",
                     "sonnet" => "claude-sonnet-4-6",
-                    "haiku" => "claude-haiku-4-5-20251213",
+                    "haiku" => "claude-haiku-4-5",
+                    "fable" => "claude-fable-5",
                     _ => trimmed,
                 },
                 ProviderKind::Xai => match *alias {
@@ -154,7 +164,13 @@ pub fn resolve_model_alias(model: &str) -> String {
                     "grok-2" => "grok-2",
                     _ => trimmed,
                 },
-                ProviderKind::OpenAi | ProviderKind::Ollama => trimmed,
+                ProviderKind::OpenAi => match *alias {
+                    // `kimi` is the friendly alias for Moonshot's current flagship
+                    // on DashScope; expand it to the canonical model id.
+                    "kimi" => "kimi-k2.5",
+                    _ => trimmed,
+                },
+                ProviderKind::Ollama => trimmed,
             })
         })
         .map_or_else(|| trimmed.to_string(), ToOwned::to_owned)
@@ -204,6 +220,39 @@ pub fn metadata_for_model(model: &str) -> Option<ProviderMetadata> {
             default_base_url: openai_compat::DEFAULT_DASHSCOPE_BASE_URL,
         });
     }
+    // Moonshot Kimi models (kimi-k2.5, kimi-k1.5, kimi/<id>) also speak the
+    // OpenAI-compat shape on DashScope. Matched before the Ollama colon-heuristic
+    // so a model id like `kimi-k2.5` is not misrouted to a local backend.
+    if canonical.starts_with("kimi/") || canonical.starts_with("kimi-") {
+        return Some(ProviderMetadata {
+            provider: ProviderKind::OpenAi,
+            auth_env: "DASHSCOPE_API_KEY",
+            base_url_env: "DASHSCOPE_BASE_URL",
+            default_base_url: openai_compat::DEFAULT_DASHSCOPE_BASE_URL,
+        });
+    }
+    // opencode "Zen" gateway — OpenAI-compatible, free models (minimax-m3,
+    // deepseek-v4-pro, fable-5-go, glm-5.1, …). Routed via the `opencode/` prefix;
+    // the bare model id is sent on the wire. Key in OPENCODE_API_KEY.
+    if canonical.starts_with("opencode/") {
+        return Some(ProviderMetadata {
+            provider: ProviderKind::OpenAi,
+            auth_env: "OPENCODE_API_KEY",
+            base_url_env: "OPENCODE_BASE_URL",
+            default_base_url: openai_compat::DEFAULT_OPENCODE_BASE_URL,
+        });
+    }
+    // NVIDIA NIM — OpenAI-compatible, free quota. Routed via the `nim/` prefix;
+    // the vendor/model id after the prefix is sent on the wire
+    // (e.g. nim/deepseek-ai/deepseek-r1 → deepseek-ai/deepseek-r1). Key in NIM_API_KEY.
+    if canonical.starts_with("nim/") {
+        return Some(ProviderMetadata {
+            provider: ProviderKind::OpenAi,
+            auth_env: "NIM_API_KEY",
+            base_url_env: "NIM_BASE_URL",
+            default_base_url: openai_compat::DEFAULT_NIM_BASE_URL,
+        });
+    }
     // Ollama local models — HuggingFace models pulled via `ollama pull hf.co/...`
     // and any model containing a colon (e.g. "llama3.2:1b", "gemma:7b")
     if canonical.starts_with("hf.co/")
@@ -229,6 +278,19 @@ pub fn metadata_for_model(model: &str) -> Option<ProviderMetadata> {
 #[must_use]
 pub fn detect_provider_kind(model: &str) -> ProviderKind {
     if let Some(metadata) = metadata_for_model(model) {
+        // An explicitly-configured OpenAI-compatible endpoint takes precedence
+        // over the local-Ollama *fallback heuristic* — model ids like
+        // "qwen2.5-coder:7b" only match Ollama by shape (a colon), so when the
+        // user has pointed OPENAI_BASE_URL at their own server, route there.
+        // Explicit provider metadata (anthropic/grok/dashscope/openai/...) is
+        // resolved before the Ollama branch in `metadata_for_model`, so this
+        // only ever reinterprets the heuristic fallback.
+        if metadata.provider == ProviderKind::Ollama
+            && std::env::var_os("OPENAI_BASE_URL").is_some()
+            && openai_compat::has_api_key("OPENAI_API_KEY")
+        {
+            return ProviderKind::OpenAi;
+        }
         return metadata.provider;
     }
     // When OPENAI_BASE_URL is set, the user explicitly configured an
@@ -257,7 +319,9 @@ pub fn detect_provider_kind(model: &str) -> ProviderKind {
 pub const fn model_family_identity_for_kind(kind: ProviderKind) -> runtime::ModelFamilyIdentity {
     match kind {
         ProviderKind::Anthropic => runtime::ModelFamilyIdentity::Claude,
-        ProviderKind::Xai | ProviderKind::OpenAi => runtime::ModelFamilyIdentity::Generic,
+        ProviderKind::Xai | ProviderKind::OpenAi | ProviderKind::Ollama => {
+            runtime::ModelFamilyIdentity::Generic
+        }
     }
 }
 
@@ -293,14 +357,27 @@ pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
     let canonical = resolve_model_alias(model);
     let base_model = canonical.rsplit('/').next().unwrap_or(canonical.as_str());
     match base_model {
+        "claude-opus-4-8" | "claude-fable-5" => Some(ModelTokenLimit {
+            max_output_tokens: 128_000,
+            context_window_tokens: 1_000_000,
+        }),
         "claude-opus-4-6" => Some(ModelTokenLimit {
             max_output_tokens: 32_000,
             context_window_tokens: 200_000,
         }),
-        "claude-sonnet-4-6" | "claude-haiku-4-5-20251213" => Some(ModelTokenLimit {
+        "claude-sonnet-4-6" => Some(ModelTokenLimit {
             max_output_tokens: 64_000,
+            // Sonnet 4.6 ships a 200K context window by default (the 1M window is
+            // a separate beta opt-in); advertising 1M here would make the
+            // preflight under-block requests the API actually rejects.
             context_window_tokens: 200_000,
         }),
+        "claude-haiku-4-5" | "claude-haiku-4-5-20251001" | "claude-haiku-4-5-20251213" => {
+            Some(ModelTokenLimit {
+                max_output_tokens: 64_000,
+                context_window_tokens: 200_000,
+            })
+        }
         "grok-3" | "grok-3-mini" => Some(ModelTokenLimit {
             max_output_tokens: 64_000,
             context_window_tokens: 131_072,

diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs
@@ -20,6 +20,8 @@ pub const DEFAULT_XAI_BASE_URL: &str = "https://api.x.ai/v1";
 pub const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1";
 pub const DEFAULT_DASHSCOPE_BASE_URL: &str = "https://dashscope.aliyuncs.com/compatible-mode/v1";
 pub const DEFAULT_OLLAMA_BASE_URL: &str = "http://localhost:11434/v1";
+pub const DEFAULT_OPENCODE_BASE_URL: &str = "https://opencode.ai/zen/go/v1";
+pub const DEFAULT_NIM_BASE_URL: &str = "https://integrate.api.nvidia.com/v1";
 const REQUEST_ID_HEADER: &str = "request-id";
 const ALT_REQUEST_ID_HEADER: &str = "x-request-id";
 const DEFAULT_INITIAL_BACKOFF: Duration = Duration::from_secs(1);
@@ -43,6 +45,8 @@ const XAI_ENV_VARS: &[&str] = &["XAI_API_KEY"];
 const OPENAI_ENV_VARS: &[&str] = &["OPENAI_API_KEY"];
 const DASHSCOPE_ENV_VARS: &[&str] = &["DASHSCOPE_API_KEY"];
 const OLLAMA_ENV_VARS: &[&str] = &["OLLAMA_API_KEY"];
+const OPENCODE_ENV_VARS: &[&str] = &["OPENCODE_API_KEY"];
+const NIM_ENV_VARS: &[&str] = &["NIM_API_KEY"];
 
 // Provider-specific request body size limits in bytes
 const XAI_MAX_REQUEST_BODY_BYTES: usize = 52_428_800; // 50MB
@@ -101,13 +105,42 @@ impl OpenAiCompatConfig {
         }
     }
 
+    /// opencode "Zen" gateway (`https://opencode.ai/zen/go/v1`) — OpenAI-compatible,
+    /// serves models like minimax-m3, deepseek-v4-pro, fable-5-go, glm-5.1.
+    /// Key comes from the opencode auth store, exported as OPENCODE_API_KEY.
+    #[must_use]
+    pub const fn opencode() -> Self {
+        Self {
+            provider_name: "opencode-go",
+            api_key_env: "OPENCODE_API_KEY",
+            base_url_env: "OPENCODE_BASE_URL",
+            default_base_url: DEFAULT_OPENCODE_BASE_URL,
+            max_request_body_bytes: OPENAI_MAX_REQUEST_BODY_BYTES,
+        }
+    }
+
+    /// NVIDIA NIM (`https://integrate.api.nvidia.com/v1`) — OpenAI-compatible,
+    /// free quota. Serves vendor/model ids (deepseek-ai/deepseek-r1, etc.).
+    #[must_use]
+    pub const fn nim() -> Self {
+        Self {
+            provider_name: "NVIDIA-NIM",
+            api_key_env: "NIM_API_KEY",
+            base_url_env: "NIM_BASE_URL",
+            default_base_url: DEFAULT_NIM_BASE_URL,
+            max_request_body_bytes: OPENAI_MAX_REQUEST_BODY_BYTES,
+        }
+    }
+
     #[must_use]
     pub fn credential_env_vars(self) -> &'static [&'static str] {
         match self.provider_name {
             "xAI" => XAI_ENV_VARS,
             "OpenAI" => OPENAI_ENV_VARS,
             "DashScope" => DASHSCOPE_ENV_VARS,
             "Ollama" => OLLAMA_ENV_VARS,
+            "opencode-go" => OPENCODE_ENV_VARS,
+            "NVIDIA-NIM" => NIM_ENV_VARS,
             _ => &[],
         }
     }
@@ -897,7 +930,7 @@ fn strip_routing_prefix(model: &str) -> &str {
         let prefix = &model[..pos];
         // Only strip if the prefix before "/" is a known routing prefix,
         // not if "/" appears in the middle of the model name for other reasons.
-        if matches!(prefix, "openai" | "xai" | "grok" | "qwen" | "kimi") {
+        if matches!(prefix, "openai" | "xai" | "grok" | "qwen" | "kimi" | "opencode" | "nim") {
             &model[pos + 1..]
         } else {
             model
@@ -1019,9 +1052,29 @@ pub fn build_chat_completion_request(
         payload["reasoning_effort"] = json!(effort);
     }
 
+    // Local aeon (Qwen3.5-MTP) intermittently opens a `<think>` block and never
+    // emits the closing `</think>` within the token budget — even on trivial
+    // turns. The server's `--reasoning-parser qwen3` then cannot split the
+    // output, so `content` comes back empty and the REPL prints "(no response)".
+    // Disable thinking for this model via the Qwen chat template switch so it
+    // answers directly. Targeted by wire model name; no effect on other backends.
+    if model_disables_thinking(wire_model) {
+        payload["chat_template_kwargs"] = json!({ "enable_thinking": false });
+    }
+
     payload
 }
 
+/// Returns true for local models that must run with thinking disabled to avoid
+/// unclosed `<think>` blocks swallowing the entire response. Currently the
+/// Qwen3.5-MTP "aeon" served on the local vLLM rig.
+#[must_use]
+pub fn model_disables_thinking(wire_model: &str) -> bool {
+    let canonical = wire_model.rsplit('/').next().unwrap_or(wire_model);
+    let lowered = canonical.to_ascii_lowercase();
+    lowered == "aeon" || lowered.starts_with("qwen3.5") || lowered.starts_with("qwen3_5")
+}
+
 /// Returns true for models that do NOT support the `is_error` field in tool results.
 /// kimi models (via Moonshot AI/Dashscope) reject this field with 400 Bad Request.
 /// Returns true for models that do NOT support the `is_error` field in tool results.

diff --git a/rust/crates/commands/src/lib.rs b/rust/crates/commands/src/lib.rs
@@ -5735,11 +5735,11 @@ mod tests {
         let _guard = env_guard();
         let workspace = temp_dir("mcp-degrades-144");
         let config_home = temp_dir("mcp-degrades-144-cfg");
-        fs::create_dir_all(workspace.join(".claw")).expect("create workspace .claw dir");
+        fs::create_dir_all(workspace.join(".hackcode")).expect("create workspace .claw dir");
         fs::create_dir_all(&config_home).expect("create config home");
         // One valid server + one malformed entry missing `command`.
         fs::write(
-            workspace.join(".claw.json"),
+            workspace.join(".hackcode.json"),
             r#"{
   "mcpServers": {
     "everything": {"command": "npx", "args": ["-y", "@modelcontextprotocol/server-everything"]},

diff --git a/rust/crates/runtime/src/bash.rs b/rust/crates/runtime/src/bash.rs
@@ -176,32 +176,17 @@ async fn execute_bash_async(
 
     let mut command = prepare_tokio_command(&input.command, &cwd, &sandbox_status, true);
 
-    // The model often passes timeout values thinking they're seconds (e.g. 60)
-    // but the parameter is in milliseconds.  Enforce a 30-second floor so
-    // network commands (nmap, curl, ping) actually have time to finish.
+    // `timeout` is honored exactly as supplied (milliseconds). We deliberately do
+    // not floor it: an explicit timeout should mean what it says so callers can
+    // bound a hung `cargo test`/network probe precisely. Sensible defaults for
+    // long-running network commands belong at the caller, not a silent override.
     let output_result = if let Some(timeout_ms) = input.timeout {
-        let timeout_ms = timeout_ms.max(30_000);
         match timeout(Duration::from_millis(timeout_ms), command.output()).await {
             Ok(result) => (result?, false),
-            Err(_) => {
-                return Ok(BashCommandOutput {
-                    stdout: String::new(),
-                    stderr: format!("Command exceeded timeout of {timeout_ms} ms"),
-                    raw_output_path: None,
-                    interrupted: true,
-                    is_image: None,
-                    background_task_id: None,
-                    backgrounded_by_user: None,
-                    assistant_auto_backgrounded: None,
-                    dangerously_disable_sandbox: input.dangerously_disable_sandbox,
-                    return_code_interpretation: Some(String::from("timeout")),
-                    no_output_expected: Some(true),
-                    structured_content: None,
-                    persisted_output_path: None,
-                    persisted_output_size: None,
-                    sandbox_status: Some(sandbox_status),
-                });
-            }
+            // On timeout, classify the failure (a hung `cargo test`/`pytest`
+            // reads differently from a slow network command) and emit structured
+            // provenance instead of a bare "timeout" string.
+            Err(_) => return Ok(timeout_output(&input, timeout_ms, sandbox_status)),
-    let mut command = prepare_tokio_command(&input.command, &cwd, &sandbox_status, true);
-
-    // The model often passes timeout values thinking they're seconds (e.g. 60)
-    // but the parameter is in milliseconds.  Enforce a 30-second floor so
-    // network commands (nmap, curl, ping) actually have time to finish.
-    // `timeout` is honored exactly as supplied (milliseconds). We deliberately do
-    // not floor it: an explicit timeout should mean what it says so callers can
-    // bound a hung `cargo test`/network probe precisely. Sensible defaults for
-    // long-running network commands belong at the caller, not a silent override.
-    let output_result = if let Some(timeout_ms) = input.timeout {
-        let timeout_ms = timeout_ms.max(30_000);
-        match timeout(Duration::from_millis(timeout_ms), command.output()).await {
-            Ok(result) => (result?, false),
-            Err(_) => {
-                return Ok(BashCommandOutput {
-                    stdout: String::new(),
-                    stderr: format!("Command exceeded timeout of {timeout_ms} ms"),
-                    raw_output_path: None,
-                    interrupted: true,
-                    is_image: None,
-                    background_task_id: None,
-                    backgrounded_by_user: None,
-                    assistant_auto_backgrounded: None,
-                    dangerously_disable_sandbox: input.dangerously_disable_sandbox,
-                    return_code_interpretation: Some(String::from("timeout")),
-                    no_output_expected: Some(true),
-                    structured_content: None,
-                    persisted_output_path: None,
-                    persisted_output_size: None,
-                    sandbox_status: Some(sandbox_status),
-                });
-            }
-            // On timeout, classify the failure (a hung `cargo test`/`pytest`
-            // reads differently from a slow network command) and emit structured
-            // provenance instead of a bare "timeout" string.
-            Err(_) => return Ok(timeout_output(&input, timeout_ms, sandbox_status)),
+    let mut command = prepare_tokio_command(&input.command, &cwd, &sandbox_status, true);
+
+    // `timeout` is honored exactly as supplied (milliseconds). We deliberately do
+    // not floor it: an explicit timeout should mean what it says so callers can
+    // bound a hung `cargo test`/network probe precisely. Sensible defaults for
+    // long-running network commands belong at the caller, not a silent override.
+    let output_result = if let Some(timeout_ms) = input.timeout {
+        command.kill_on_drop(true);
+        match timeout(Duration::from_millis(timeout_ms), command.output()).await {
+            Ok(result) => (result?, false),
+            // On timeout, classify the failure (a hung `cargo test`/`pytest`
+            // reads differently from a slow network command) and emit structured
+            // provenance instead of a bare "timeout" string.
+            Err(_) => return Ok(timeout_output(&input, timeout_ms, sandbox_status)),
-    let mut command = prepare_tokio_command(&input.command, &cwd, &sandbox_status, true);
-
-    // The model often passes timeout values thinking they're seconds (e.g. 60)
-    // but the parameter is in milliseconds.  Enforce a 30-second floor so
-    // network commands (nmap, curl, ping) actually have time to finish.
-    // `timeout` is honored exactly as supplied (milliseconds). We deliberately do
-    // not floor it: an explicit timeout should mean what it says so callers can
-    // bound a hung `cargo test`/network probe precisely. Sensible defaults for
-    // long-running network commands belong at the caller, not a silent override.
-    let output_result = if let Some(timeout_ms) = input.timeout {
-        let timeout_ms = timeout_ms.max(30_000);
-        match timeout(Duration::from_millis(timeout_ms), command.output()).await {
-            Ok(result) => (result?, false),
-            Err(_) => {
-                return Ok(BashCommandOutput {
-                    stdout: String::new(),
-                    stderr: format!("Command exceeded timeout of {timeout_ms} ms"),
-                    raw_output_path: None,
-                    interrupted: true,
-                    is_image: None,
-                    background_task_id: None,
-                    backgrounded_by_user: None,
-                    assistant_auto_backgrounded: None,
-                    dangerously_disable_sandbox: input.dangerously_disable_sandbox,
-                    return_code_interpretation: Some(String::from("timeout")),
-                    no_output_expected: Some(true),
-                    structured_content: None,
-                    persisted_output_path: None,
-                    persisted_output_size: None,
-                    sandbox_status: Some(sandbox_status),
-                });
-            }
-            // On timeout, classify the failure (a hung `cargo test`/`pytest`
-            // reads differently from a slow network command) and emit structured
-            // provenance instead of a bare "timeout" string.
-            Err(_) => return Ok(timeout_output(&input, timeout_ms, sandbox_status)),
+    let mut command = prepare_tokio_command(&input.command, &cwd, &sandbox_status, true);
+
+    // `timeout` is honored exactly as supplied (milliseconds). We deliberately do
+    // not floor it: an explicit timeout should mean what it says so callers can
+    // bound a hung `cargo test`/network probe precisely. Sensible defaults for
+    // long-running network commands belong at the caller, not a silent override.
+    let output_result = if let Some(timeout_ms) = input.timeout {
+        command.kill_on_drop(true);
+        match timeout(Duration::from_millis(timeout_ms), command.output()).await {
+            Ok(result) => (result?, false),
+            // On timeout, classify the failure (a hung `cargo test`/`pytest`
+            // reads differently from a slow network command) and emit structured
+            // provenance instead of a bare "timeout" string.
+            Err(_) => return Ok(timeout_output(&input, timeout_ms, sandbox_status)),
         }
     } else {
         (command.output().await?, false)