superagent-ai
diff --git a/‎.cursor/rules/development-workflow.mdc‎
Lines changed: 1 addition & 1 deletion b/‎.cursor/rules/development-workflow.mdc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.cursor/rules/project-overview.mdc‎
Lines changed: 5 additions & 6 deletions b/‎.cursor/rules/project-overview.mdc‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎.env.example‎
Lines changed: 2 additions & 2 deletions b/‎.env.example‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 25 additions & 21 deletions b/‎README.md‎
Lines changed: 25 additions & 21 deletions
diff --git a/‎package.json‎
Lines changed: 1 addition & 1 deletion b/‎package.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/agent/agent.ts‎
Lines changed: 4 additions & 3 deletions b/‎src/agent/agent.ts‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎src/agent/batch-mode.test.ts‎
Lines changed: 1 addition & 1 deletion b/‎src/agent/batch-mode.test.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/agent/recap.test.ts‎
Lines changed: 2 additions & 2 deletions b/‎src/agent/recap.test.ts‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/agent/vision-input.test.ts‎
Lines changed: 20 additions & 0 deletions b/‎src/agent/vision-input.test.ts‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎src/grok/client.test.ts‎
Lines changed: 18 additions & 18 deletions b/‎src/grok/client.test.ts‎
Lines changed: 18 additions & 18 deletions
@@ -42,7 +42,7 @@ bun run start
 |----|----|----|
 | `GROK_API_KEY` | Yes | API key for xAI Grok |
 | `GROK_BASE_URL` | No | Custom API endpoint (default: `https://api.x.ai/v1`) |
-| `GROK_MODEL` | No | Model override (default: `grok-4-1-fast`) |
+| `GROK_MODEL` | No | Model override (default: `grok-4.3`) |
 | `GROK_MAX_TOKENS` | No | Max tokens per response (default: 16384) |
 
 Copy `.env.example` to `.env` and fill in your values.
 
@@ -53,12 +53,11 @@ src/
 
 ## Latest Grok Models
 
-- grok-4-0709 (flagship reasoning)
-- grok-4.20-beta-0309 (multi-agent, 2M context)
-- grok-4-fast (fast reasoning, 2M context)
-- grok-4-1-fast (latest fast, 2M context)
-- grok-code-fast-1 (code optimized)
-- grok-3, grok-3-mini
+- grok-4.3 (recommended flagship reasoning)
+- grok-4.20-non-reasoning (recommended non-reasoning)
+- grok-4.20-multi-agent-0309 (multi-agent, 2M context)
+- grok-4.20-0309-reasoning (reasoning, 2M context)
+- grok-3-mini (compact model with reasoning effort controls)
 
 ## CI/CD
 
 
@@ -4,8 +4,8 @@ GROK_API_KEY=your_grok_api_key_here
 # Optional: Custom API base URL (default: https://api.x.ai/v1)
 # GROK_BASE_URL=https://api.x.ai/v1
 
-# Optional: Default model (default: grok-4-1-fast-reasoning)
-# GROK_MODEL=grok-4-1-fast-reasoning
+# Optional: Default model (default: grok-4.3)
+# GROK_MODEL=grok-4.3
 
 # Optional: Max tokens per response (default: 16384)
 # GROK_MAX_TOKENS=16384
 
@@ -1,10 +1,10 @@
-# grok-cli — an open-source coding agent for the Grok API
+# grok-cli: an open-source coding agent for the Grok API
 
-[![CI](https://github.com/superagent-ai/grok-cli/actions/workflows/typecheck.yml/badge.svg)](https://github.com/superagent-ai/grok-cli/actions/workflows/typecheck.yml)
-[![npm](https://img.shields.io/npm/v/grok-dev.svg)](https://www.npmjs.com/package/grok-dev)
-[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](./LICENSE)
-[![TypeScript](https://img.shields.io/badge/TypeScript-5.9-3178C6?logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
-[![Bun](https://img.shields.io/badge/Bun-1.x-000000?logo=bun&logoColor=white)](https://bun.sh/)
+[CI](https://github.com/superagent-ai/grok-cli/actions/workflows/typecheck.yml)
+[npm](https://www.npmjs.com/package/grok-dev)
+[License: MIT](./LICENSE)
+[TypeScript](https://www.typescriptlang.org/)
+[Bun](https://bun.sh/)
 
 > **Disclaimer:** This project is community-built, open-source, and **not affiliated with, endorsed by, or sponsored by xAI Corp.** "Grok" is a trademark of xAI Corp. This tool uses the publicly available Grok API.
 
@@ -173,19 +173,19 @@ You keep using a text model for the session, and Grok saves generated media unde
 
 | Thing                             | What it means                                                                                                                                                                                                              |
 | --------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| **Built for the Grok API**        | Defaults tuned for the xAI API; models like `**grok-code-fast-1`**, `**grok-4-1-fast-reasoning**`, `**grok-4.20-multi-agent-0309**`, plus flagship and fast variants—run `grok models` for the full menu.                         |
-| **X + web search**                | `**search_x`** and `**search_web**` tools—live posts and docs without pretending the internet stopped in 2023.                                                                                                             |
-| **Media generation**              | Built-in `**generate_image`** and `**generate_video**` tools for text-to-image, image editing, text-to-video, and image-to-video flows. Generated files are saved locally so you can reuse them after the xAI URLs expire. |
-| **Sub-agents (default behavior)** | Foreground `**task`** delegation (e.g. explore, general, or computer) plus background `**delegate**` for read-only deep dives—parallelize like you mean it.                                                                |
-| **Verify**                        | `**/verify`** or `**--verify**` — inspects your app, builds, tests, boots it, and runs browser smoke checks in a sandboxed environment. Screenshots and video included.                                                    |
-| **Computer use**                  | Built-in `**computer`** sub-agent for host desktop automation via `**agent-desktop**`. It prefers semantic accessibility snapshots and stable refs, with screenshots saved under `**.grok/computer/**` when requested.     |
-| **Custom sub-agents**             | Define named agents with `**subAgents`** in `**~/.grok/user-settings.json**` and manage them from the TUI with `**/agents**`.                                                                                              |
+| **Built for the Grok API**        | Defaults tuned for the xAI API; models like `grok-4.3`, `grok-4.20-non-reasoning`, `grok-4.20-multi-agent-0309`, plus current flagship and multi-agent variants—run `grok models` for the full menu.                       |
+| **X + web search**                | `**search_x`** and `**search_web`** tools—live posts and docs without pretending the internet stopped in 2023.                                                                                                             |
+| **Media generation**              | Built-in `**generate_image`** and `**generate_video`** tools for text-to-image, image editing, text-to-video, and image-to-video flows. Generated files are saved locally so you can reuse them after the xAI URLs expire. |
+| **Sub-agents (default behavior)** | Foreground `**task`** delegation (e.g. explore, general, or computer) plus background `**delegate`** for read-only deep dives—parallelize like you mean it.                                                                |
+| **Verify**                        | `**/verify`** or `**--verify`** — inspects your app, builds, tests, boots it, and runs browser smoke checks in a sandboxed environment. Screenshots and video included.                                                    |
+| **Computer use**                  | Built-in `**computer`** sub-agent for host desktop automation via `**agent-desktop`**. It prefers semantic accessibility snapshots and stable refs, with screenshots saved under `**.grok/computer/**` when requested.     |
+| **Custom sub-agents**             | Define named agents with `**subAgents`** in `**~/.grok/user-settings.json`** and manage them from the TUI with `**/agents**`.                                                                                              |
 | **Remote control**                | Pair **Telegram** from the TUI (`/remote-control` → Telegram): DM your bot, `**/pair`**, approve the code in-terminal. Keep the CLI running while you ping it from your phone.                                             |
 | **No “mystery meat” UI**          | OpenTUI React terminal UI—fast, keyboard-driven, not whatever glitchy thing you’re thinking of.                                                                                                                            |
-| **Skills**                        | Agent Skills under `**.agents/skills/<name>/SKILL.md`** (project) or `**~/.agents/skills/**` (user). Use `**/skills**` in the TUI to list what’s installed.                                                                |
-| **MCPs**                          | Extend with Model Context Protocol servers—configure via `**/mcps`** in the TUI or `**.grok/settings.json**` (`mcpServers`).                                                                                               |
+| **Skills**                        | Agent Skills under `**.agents/skills/<name>/SKILL.md`** (project) or `**~/.agents/skills/`** (user). Use `**/skills**` in the TUI to list what’s installed.                                                                |
+| **MCPs**                          | Extend with Model Context Protocol servers—configure via `**/mcps`** in the TUI or `**.grok/settings.json`** (`mcpServers`).                                                                                               |
 | **Sessions**                      | Conversations persist; `**--session latest`** picks up where you left off.                                                                                                                                                 |
-| **Headless**                      | `**--prompt`** / `**-p**` for non-interactive runs—pipe it, script it, bench it.                                                                                                                                           |
+| **Headless**                      | `**--prompt`** / `**-p`** for non-interactive runs—pipe it, script it, bench it.                                                                                                                                           |
 | **Hackable**                      | TypeScript, clear agent loop, bash-first tools—fork it, shamelessly.                                                                                                                                                       |
 
 
@@ -228,7 +228,7 @@ Optional `**subAgents**` — custom foreground sub-agents. Each entry needs `**n
   "subAgents": [
     {
       "name": "security-review",
-      "model": "grok-code-fast-1",
+      "model": "grok-4.3",
       "instruction": "Prioritize security implications and suggest concrete fixes."
     }
   ]
@@ -320,7 +320,7 @@ Hook commands receive JSON on **stdin** (event details) and can return JSON on *
 
 ## Instructions & project brain
 
-- `**AGENTS.md**` — merged from git root down to your cwd (Codex-style; see repo docs). `**AGENTS.override.md**` wins per directory when present.
+- `**AGENTS.md`** — merged from git root down to your cwd (Codex-style; see repo docs). `**AGENTS.override.md**` wins per directory when present.
 
 ---
 
@@ -350,7 +350,7 @@ All settings are saved in `~/.grok/user-settings.json` (user) and `.grok/setting
 
 ### Verify
 
-Run `**/verify`** in the TUI or `**--verify**` on the CLI to verify your app locally:
+Run `**/verify`** in the TUI or `**--verify`** on the CLI to verify your app locally:
 
 ```bash
 grok --verify
@@ -370,6 +370,7 @@ Common issues and solutions:
 **Install script fails on macOS**
 
 Make sure you have a modern shell and `curl` available:
+
 ```bash
 # Verify curl is installed
 which curl
@@ -381,6 +382,7 @@ bash -c "$(curl -fsSL https://raw.githubusercontent.com/superagent-ai/grok-cli/m
 **Bun not found**
 
 The install script bundles Bun, but if you want to use your own:
+
 ```bash
 curl -fsSL https://bun.sh/install | bash
 bun add -g grok-dev
@@ -391,6 +393,7 @@ bun add -g grok-dev
 **"Missing GROK_API_KEY" error**
 
 Set your API key using one of these methods:
+
 ```bash
 # Environment variable
 export GROK_API_KEY=your_key_here
@@ -406,6 +409,7 @@ Get your API key from [x.ai](https://x.ai).
 **UI doesn't render correctly**
 
 Try a different terminal emulator. Recommended:
+
 - WezTerm (cross-platform)
 - Alacritty (cross-platform)
 - Ghostty (macOS/Linux)
@@ -439,7 +443,7 @@ If you're on Intel Mac or Linux, sandbox mode is not available. Use standard mod
 **Slow response times**
 
 - Check your network connection to x.ai API
-- Try `grok-code-fast-1` model for faster responses
+- Try `grok-4.20-non-reasoning` for non-reasoning workloads
 - Reduce `--max-tool-rounds` for headless runs
 
 **High memory usage**
@@ -487,4 +491,4 @@ bun run lint
 
 ## License
 
-MIT
+MIT
@@ -1,6 +1,6 @@
 {
   "name": "grok-dev",
-  "version": "1.1.5",
+  "version": "1.1.6",
   "description": "An open-source AI coding agent powered by Grok, built with Bun and OpenTUI.",
   "type": "module",
   "main": "dist/index.js",
 
@@ -99,8 +99,8 @@ import { containsEncryptedReasoning, sanitizeModelMessages } from "./reasoning";
 import { buildVisionUserMessages } from "./vision-input";
 
 const MAX_TOOL_ROUNDS = 400;
-const VISION_MODEL = "grok-4-1-fast-reasoning";
-const COMPUTER_MODEL = "grok-4.20-0309-reasoning";
+const VISION_MODEL = "grok-4.3";
+const COMPUTER_MODEL = "grok-4.3";
 
 interface AgentOptions {
   persistSession?: boolean;
@@ -1839,7 +1839,8 @@ export class Agent {
     await this.fireHook(promptInput, signal).catch(() => {});
 
     await this.consumeBackgroundNotifications();
-    const userModelMessage: ModelMessage = { role: "user", content: userMessage };
+    const userModelMessages = await buildVisionUserMessages(userMessage, this.bash.getCwd(), signal);
+    const userModelMessage = userModelMessages[0] ?? ({ role: "user", content: userMessage } satisfies ModelMessage);
     this.messages.push(userModelMessage);
     this.messageSeqs.push(null);
 
 
@@ -109,7 +109,7 @@ describe("Agent batch mode", () => {
         childMessages: [{ role: "user", content: "Do the thing" }],
         childSystem: "system",
         childRuntime: {
-          modelId: "grok-4-1-fast-reasoning",
+          modelId: "grok-4.3",
           modelInfo: {
             supportsClientTools: false,
             supportsMaxOutputTokens: true,
 
@@ -5,7 +5,7 @@ async function importAgentModuleWithRecapMocks() {
 
   const generateRecap = vi.fn(async () => ({
     recap: "Recovered the latest session state.",
-    modelId: "grok-4-1-fast-non-reasoning",
+    modelId: "grok-4.20-non-reasoning",
     usage: {
       inputTokens: 10,
       outputTokens: 4,
@@ -83,7 +83,7 @@ describe("Agent session recap", () => {
       workspaceId: "workspace-1",
       title: null,
       recap: null,
-      model: "grok-4-1-fast",
+      model: "grok-4.3",
       mode: "agent",
       cwdAtStart: process.cwd(),
       cwdLast: process.cwd(),
 
@@ -36,4 +36,24 @@ describe("buildVisionUserMessages", () => {
       text: `Validate the image at ${imagePath}`,
     });
   });
+
+  it("recognizes shell-escaped screenshot paths", async () => {
+    const imageName = "Screenshot 2026-05-06 at 10.02.18.png";
+    const imagePath = path.join(tempDir, imageName);
+    fs.writeFileSync(imagePath, Buffer.from([1, 2, 3, 4]));
+    const escapedPath = path.join(tempDir, "Screenshot\\ 2026-05-06\\ at\\ 10.02.18.png");
+
+    const messages = await buildVisionUserMessages(`${escapedPath}\nExplain this image`, tempDir);
+
+    const content = messages[0]?.content as Array<Record<string, unknown>>;
+    expect(content[0]).toMatchObject({
+      type: "file",
+      mediaType: "image/png",
+    });
+    expect(content[0]?.data).toBeInstanceOf(Uint8Array);
+    expect(content[1]).toMatchObject({
+      type: "text",
+      text: `${escapedPath}\nExplain this image`,
+    });
+  });
 });
@@ -4,7 +4,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 import * as settings from "../utils/settings";
 import { generateRecap, resolveModelRuntime } from "./client";
 
-const mockGenerateText = vi.fn();
+const mockGenerateText = vi.hoisted(() => vi.fn());
 
 vi.mock("ai", () => {
   return {
@@ -34,7 +34,7 @@ describe("client", () => {
 
       expect(result).toEqual({
         recap: "Wrapped up the parser fix. Next step is wiring the new recap banner.",
-        modelId: "grok-4-1-fast-non-reasoning",
+        modelId: "grok-4.20-non-reasoning",
         usage: { inputTokens: 11, outputTokens: 7, totalTokens: 18 },
       });
       expect(mockGenerateText).toHaveBeenCalledWith(
@@ -54,7 +54,7 @@ describe("client", () => {
 
       expect(result).toEqual({
         recap: "",
-        modelId: "grok-4-1-fast-non-reasoning",
+        modelId: "grok-4.20-non-reasoning",
       });
     });
   });
@@ -67,21 +67,21 @@ describe("client", () => {
         expect(runtime.providerOptions).toBeUndefined();
       });
 
-      it("does not include providerOptions for grok-4-0709 even though it has reasoning flag", () => {
+      it("normalizes retired flagship reasoning models to grok-4.3", () => {
         const runtime = resolveModelRuntime(mockProvider, "grok-4-0709");
-        expect(runtime.modelId).toBe("grok-4-0709");
+        expect(runtime.modelId).toBe("grok-4.3");
         expect(runtime.providerOptions).toBeUndefined();
       });
 
-      it("does not include providerOptions for grok-code-fast-1", () => {
+      it("normalizes retired code models to grok-4.3", () => {
         const runtime = resolveModelRuntime(mockProvider, "grok-code-fast-1");
-        expect(runtime.modelId).toBe("grok-code-fast-1");
+        expect(runtime.modelId).toBe("grok-4.3");
         expect(runtime.providerOptions).toBeUndefined();
       });
 
-      it("does not include providerOptions for grok-4-1-fast-reasoning", () => {
+      it("normalizes retired fast reasoning models to grok-4.3", () => {
         const runtime = resolveModelRuntime(mockProvider, "grok-4-1-fast-reasoning");
-        expect(runtime.modelId).toBe("grok-4-1-fast-reasoning");
+        expect(runtime.modelId).toBe("grok-4.3");
         expect(runtime.providerOptions).toBeUndefined();
       });
 
@@ -91,9 +91,9 @@ describe("client", () => {
         expect(runtime.providerOptions).toBeUndefined();
       });
 
-      it("does not include providerOptions for grok-3", () => {
+      it("normalizes retired non-reasoning models to grok-4.20-non-reasoning", () => {
         const runtime = resolveModelRuntime(mockProvider, "grok-3");
-        expect(runtime.modelId).toBe("grok-3");
+        expect(runtime.modelId).toBe("grok-4.20-non-reasoning");
         expect(runtime.providerOptions).toBeUndefined();
       });
     });
@@ -129,24 +129,24 @@ describe("client", () => {
         });
       });
 
-      it("does not include providerOptions for grok-4-0709 even when effort is configured", () => {
+      it("does not include providerOptions for retired reasoning aliases even when effort is configured", () => {
         vi.spyOn(settings, "getReasoningEffortForModel").mockReturnValue("high");
         const runtime = resolveModelRuntime(mockProvider, "grok-4-0709");
-        expect(runtime.modelId).toBe("grok-4-0709");
+        expect(runtime.modelId).toBe("grok-4.3");
         expect(runtime.providerOptions).toBeUndefined();
       });
 
-      it("does not include providerOptions for grok-code-fast-1 even when effort is configured", () => {
+      it("does not include providerOptions for retired code aliases even when effort is configured", () => {
         vi.spyOn(settings, "getReasoningEffortForModel").mockReturnValue("high");
         const runtime = resolveModelRuntime(mockProvider, "grok-code-fast-1");
-        expect(runtime.modelId).toBe("grok-code-fast-1");
+        expect(runtime.modelId).toBe("grok-4.3");
         expect(runtime.providerOptions).toBeUndefined();
       });
 
-      it("does not include providerOptions for grok-4-1-fast-reasoning even when effort is configured", () => {
+      it("does not include providerOptions for grok-4.3 even when effort is configured", () => {
         vi.spyOn(settings, "getReasoningEffortForModel").mockReturnValue("high");
-        const runtime = resolveModelRuntime(mockProvider, "grok-4-1-fast-reasoning");
-        expect(runtime.modelId).toBe("grok-4-1-fast-reasoning");
+        const runtime = resolveModelRuntime(mockProvider, "grok-4.3");
+        expect(runtime.modelId).toBe("grok-4.3");
         expect(runtime.providerOptions).toBeUndefined();
       });
     });
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "grok-dev",`
`3`		`- "version": "1.1.5",`
	`3`	`+ "version": "1.1.6",`
`4`	`4`	`"description": "An open-source AI coding agent powered by Grok, built with Bun and OpenTUI.",`
`5`	`5`	`"type": "module",`
`6`	`6`	`"main": "dist/index.js",`