diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6e77eca..489bb07 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,7 +32,29 @@ jobs: node-version: ${{ env.NODE_VERSION }} cache: npm - name: Install dependencies - run: npm ci --ignore-scripts + run: | + # npm/cli#4828: platform-specific optionalDependencies (rollup + # native binaries like @rollup/rollup-linux-x64-gnu) can be + # missing from a package-lock.json generated on a different + # platform. `npm ci` then fails with "Cannot find module + # @rollup/rollup-linux-x64-gnu" on Linux runners when the + # lockfile was generated on macOS. Fall back to a clean + # `npm install` in that case so CI self-heals without needing + # to regenerate the lockfile on every contributor's machine. + npm ci --ignore-scripts || { + echo "::warning::npm ci failed — falling back to npm install (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } + # npm/cli#4828: even when `npm ci` succeeds, platform-specific + # rollup optionalDependencies can be silently skipped on Linux + # runners if the lockfile was generated on macOS. Probe and + # self-heal if the native binding is missing. + node -e "require('rollup/dist/native.js')" 2>/dev/null || { + echo "::warning::rollup native binding missing — reinstalling (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } - name: Prettier --check run: npx prettier --check 'src/**/*.ts' 'test/**/*.ts' @@ -49,7 +71,25 @@ jobs: node-version: ${{ env.NODE_VERSION }} cache: npm - name: Install dependencies - run: npm ci --ignore-scripts + run: | + # npm/cli#4828: platform-specific optionalDependencies (rollup + # native binaries like @rollup/rollup-linux-x64-gnu) can be + # missing from a package-lock.json generated on a different + # platform. `npm ci` then fails with "Cannot find module + # @rollup/rollup-linux-x64-gnu" on Linux runners when the + # lockfile was generated on macOS. Fall back to a clean + # `npm install` in that case so CI self-heals without needing + # to regenerate the lockfile on every contributor's machine. + npm ci --ignore-scripts || { + echo "::warning::npm ci failed — falling back to npm install (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } + node -e "require('rollup/dist/native.js')" 2>/dev/null || { + echo "::warning::rollup native binding missing — reinstalling (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } - name: ESLint run: npx eslint 'src/**/*.ts' --max-warnings=100 @@ -66,7 +106,25 @@ jobs: node-version: ${{ env.NODE_VERSION }} cache: npm - name: Install dependencies - run: npm ci --ignore-scripts + run: | + # npm/cli#4828: platform-specific optionalDependencies (rollup + # native binaries like @rollup/rollup-linux-x64-gnu) can be + # missing from a package-lock.json generated on a different + # platform. `npm ci` then fails with "Cannot find module + # @rollup/rollup-linux-x64-gnu" on Linux runners when the + # lockfile was generated on macOS. Fall back to a clean + # `npm install` in that case so CI self-heals without needing + # to regenerate the lockfile on every contributor's machine. + npm ci --ignore-scripts || { + echo "::warning::npm ci failed — falling back to npm install (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } + node -e "require('rollup/dist/native.js')" 2>/dev/null || { + echo "::warning::rollup native binding missing — reinstalling (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } - name: TypeScript run: npx tsc -p tsconfig.json --noEmit @@ -88,7 +146,25 @@ jobs: node-version: ${{ matrix.node }} cache: npm - name: Install dependencies - run: npm ci --ignore-scripts + run: | + # npm/cli#4828: platform-specific optionalDependencies (rollup + # native binaries like @rollup/rollup-linux-x64-gnu) can be + # missing from a package-lock.json generated on a different + # platform. `npm ci` then fails with "Cannot find module + # @rollup/rollup-linux-x64-gnu" on Linux runners when the + # lockfile was generated on macOS. Fall back to a clean + # `npm install` in that case so CI self-heals without needing + # to regenerate the lockfile on every contributor's machine. + npm ci --ignore-scripts || { + echo "::warning::npm ci failed — falling back to npm install (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } + node -e "require('rollup/dist/native.js')" 2>/dev/null || { + echo "::warning::rollup native binding missing — reinstalling (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } - name: Rebuild native bindings (better-sqlite3 prebuild) run: npm rebuild better-sqlite3 - name: Run vitest @@ -116,7 +192,25 @@ jobs: node-version: ${{ env.NODE_VERSION }} cache: npm - name: Install dependencies - run: npm ci --ignore-scripts + run: | + # npm/cli#4828: platform-specific optionalDependencies (rollup + # native binaries like @rollup/rollup-linux-x64-gnu) can be + # missing from a package-lock.json generated on a different + # platform. `npm ci` then fails with "Cannot find module + # @rollup/rollup-linux-x64-gnu" on Linux runners when the + # lockfile was generated on macOS. Fall back to a clean + # `npm install` in that case so CI self-heals without needing + # to regenerate the lockfile on every contributor's machine. + npm ci --ignore-scripts || { + echo "::warning::npm ci failed — falling back to npm install (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } + node -e "require('rollup/dist/native.js')" 2>/dev/null || { + echo "::warning::rollup native binding missing — reinstalling (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } - name: Rebuild native bindings (better-sqlite3 prebuild) run: npm rebuild better-sqlite3 - name: vitest --coverage @@ -144,7 +238,25 @@ jobs: node-version: ${{ env.NODE_VERSION }} cache: npm - name: Install dependencies - run: npm ci --ignore-scripts + run: | + # npm/cli#4828: platform-specific optionalDependencies (rollup + # native binaries like @rollup/rollup-linux-x64-gnu) can be + # missing from a package-lock.json generated on a different + # platform. `npm ci` then fails with "Cannot find module + # @rollup/rollup-linux-x64-gnu" on Linux runners when the + # lockfile was generated on macOS. Fall back to a clean + # `npm install` in that case so CI self-heals without needing + # to regenerate the lockfile on every contributor's machine. + npm ci --ignore-scripts || { + echo "::warning::npm ci failed — falling back to npm install (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } + node -e "require('rollup/dist/native.js')" 2>/dev/null || { + echo "::warning::rollup native binding missing — reinstalling (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } - name: Rebuild native bindings (better-sqlite3 prebuild) run: npm rebuild better-sqlite3 - name: Build (tsc + asset copy) @@ -201,7 +313,25 @@ jobs: node-version: ${{ env.NODE_VERSION }} cache: npm - name: Install dependencies - run: npm ci --ignore-scripts + run: | + # npm/cli#4828: platform-specific optionalDependencies (rollup + # native binaries like @rollup/rollup-linux-x64-gnu) can be + # missing from a package-lock.json generated on a different + # platform. `npm ci` then fails with "Cannot find module + # @rollup/rollup-linux-x64-gnu" on Linux runners when the + # lockfile was generated on macOS. Fall back to a clean + # `npm install` in that case so CI self-heals without needing + # to regenerate the lockfile on every contributor's machine. + npm ci --ignore-scripts || { + echo "::warning::npm ci failed — falling back to npm install (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } + node -e "require('rollup/dist/native.js')" 2>/dev/null || { + echo "::warning::rollup native binding missing — reinstalling (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } - name: npm audit (high) run: npm audit --audit-level=high || true diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 35b8218..9e451fa 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -24,7 +24,19 @@ jobs: node-version: '20' registry-url: https://registry.npmjs.org cache: npm - - run: npm ci --ignore-scripts + - name: Install dependencies + # npm/cli#4828 self-heal (see ci.yml for rationale). + run: | + npm ci --ignore-scripts || { + echo "::warning::npm ci failed — falling back to npm install (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } + node -e "require('rollup/dist/native.js')" 2>/dev/null || { + echo "::warning::rollup native binding missing — reinstalling (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } - run: npm rebuild better-sqlite3 - run: npm run build - run: npx vitest run diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 71f3c48..834de65 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -36,7 +36,19 @@ jobs: with: node-version: ${{ env.NODE_VERSION }} cache: npm - - run: npm ci --ignore-scripts + - name: Install dependencies + # npm/cli#4828 self-heal (see ci.yml for rationale). + run: | + npm ci --ignore-scripts || { + echo "::warning::npm ci failed — falling back to npm install (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } + node -e "require('rollup/dist/native.js')" 2>/dev/null || { + echo "::warning::rollup native binding missing — reinstalling (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } - run: npm rebuild better-sqlite3 - run: npm run build - run: npx vitest run @@ -73,7 +85,19 @@ jobs: with: node-version: ${{ env.NODE_VERSION }} cache: npm - - run: npm ci --ignore-scripts + - name: Install dependencies + # npm/cli#4828 self-heal (see ci.yml for rationale). + run: | + npm ci --ignore-scripts || { + echo "::warning::npm ci failed — falling back to npm install (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } + node -e "require('rollup/dist/native.js')" 2>/dev/null || { + echo "::warning::rollup native binding missing — reinstalling (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } - run: npm rebuild better-sqlite3 - run: npm run build - run: npm pack @@ -194,7 +218,19 @@ jobs: node-version: ${{ env.NODE_VERSION }} registry-url: https://registry.npmjs.org cache: npm - - run: npm ci --ignore-scripts + - name: Install dependencies + # npm/cli#4828 self-heal (see ci.yml for rationale). + run: | + npm ci --ignore-scripts || { + echo "::warning::npm ci failed — falling back to npm install (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } + node -e "require('rollup/dist/native.js')" 2>/dev/null || { + echo "::warning::rollup native binding missing — reinstalling (npm/cli#4828)" + rm -rf node_modules package-lock.json + npm install --ignore-scripts --no-audit --no-fund + } - run: npm rebuild better-sqlite3 - run: npm run build - run: npm publish --provenance --access public diff --git a/DEMO.md b/DEMO.md new file mode 100644 index 0000000..1f3bb42 --- /dev/null +++ b/DEMO.md @@ -0,0 +1,248 @@ +
+

Forge demo

+ Forge logo +
+ Local-first, multi-agent, programmable software-engineering runtime. +
+ +--- + +## What this document is + +A hands-on tour of Forge's three surfaces — the **interactive REPL**, the **one-shot CLI**, and the **web dashboard** — all driving the same runtime (`src/core/orchestrator.ts`). Every screenshot below is real output; every video clip is an unedited screen capture. + +Jump to: + +- [Before you start](#before-you-start) +- [The REPL](#1-the-repl--forge) +- [The one-shot CLI](#2-the-one-shot-cli--forge-run-) +- [The web dashboard](#3-the-web-dashboard--forge-ui-start) +- [Common workflows](#common-workflows) +- [Tips & gotchas](#tips--gotchas) + +--- + +## Before you start + +```bash +# Install +npm install -g @hoangsonw/forge + +# Health check — lists reachable providers + role→model mapping +forge doctor + +# Pick a local model if you don't have one yet (free, runs on your box) +ollama pull llama3:8b # ~4.7 GB, general-purpose +ollama pull qwen2.5:7b # ~4.4 GB, better at code +``` + +If `forge doctor` shows at least one green provider, you're ready. + +--- + +## 1. The REPL · `forge` + +Interactive shell with multi-turn prompts, slash-command autocomplete, digit shortcuts for interactive prompts, streamed markdown rendering, and live file-change tracking. The REPL is the right surface when you want a **conversation** — asking follow-up questions, iterating on a plan, or exploring a codebase. + +### Screenshot + +![Forge REPL](images/repl.png) + +### Video + +https://github.com/user-attachments/assets/550c76ed-ee05-438f-a55d-5be09e2cf78f + +> If your markdown viewer doesn't render the video inline, open [`images/REPL.mp4`](images/REPL.mp4) directly. + +### Try it + +```bash +forge +``` + +Then at the prompt: + +``` +[1] forge ❯ summarize src/core/loop.ts in this project +[2] forge ❯ what are the key state transitions it manages? +[3] forge ❯ /mode heavy +[4] forge ❯ add a small helper that counts step retries +``` + +Each turn threads the previous ones into the planner's context via `composeDescription` (see `src/core/conversation.ts`), so follow-ups resolve against real prior turns — not hallucinated history. + +### What to look for in the demo + +- **Launch banner** — mode, task, and phase breadcrumbs (`classify → plan → approve → execute → verify`) print above the progress rail. +- **Live streaming** — the model's answer reflows token-by-token with markdown formatting (headings, fenced code, lists) forming up in place. +- **Slash-command dropdown** — type `/` and the fuzzy-ranked slash catalog appears above the prompt. Arrow keys pick, Tab accepts, digit keys jump. +- **Status line** — shows mode, provider:model, cwd, context usage, turn number, conversation id, plus any active permission flags (`+files`, `+shell`, …). +- **DONE block** — duration, files changed, and a final completion line after each task. + +--- + +## 2. The one-shot CLI · `forge run "..."` + +A single task end-to-end: classify → plan → approve → execute → verify → report. Ideal for CI jobs, batch scripts, and "I know exactly what I want" invocations. + +### Screenshot + +![Forge CLI](images/cli.png) + +### Video + +https://github.com/user-attachments/assets/9e1cbbd0-764c-46b4-a937-447ef37fe31a + +> If your markdown viewer doesn't render the video inline, open [`images/CLI.mp4`](images/CLI.mp4) directly. + +### Try it + +```bash +# A read-only analysis (no mutation risk) +forge run "summarize src/core/loop.ts" + +# A bugfix with auto-approve (skip the plan-approval prompt) +forge run --yes "fix the off-by-one in pagination.ts" + +# Produce a plan without executing it +forge run --plan-only "add a /health endpoint to the Express server" + +# Pick a mode explicitly +forge run --mode heavy "refactor the auth middleware to use JWTs" + +# Deterministic output for reproducibility (temperature 0) +forge run --deterministic "add JSDoc to every exported fn in src/types" +``` + +### Flags worth knowing + +| Flag | Effect | +|---|---| +| `--yes` | auto-approve plan | +| `--plan-only` | produce plan, stop | +| `--mode ` | `fast` · `balanced` · `heavy` · `plan` · `audit` · `debug` · `architect` · `offline-safe` | +| `--strict` | confirm every action | +| `--allow-files` / `--allow-shell` / `--allow-network` / `--allow-web` / `--allow-mcp` | session-scoped permission grants | +| `--skip-permissions` | skip routine prompts (high-risk still asked) | +| `--deterministic` | temperature 0 for reproducible output | +| `--non-interactive` | deny any prompt silently (CI-safe) | +| `--trace` | emit full trace (implies `--debug`) | + +See `forge run --help` for the full list. + +### What to look for in the demo + +- **`━━━ LAUNCHING ━━━`** banner at the start (mode, task, phase pills). +- **Plan approval prompt** with `Approve / Edit / Reject` — Edit opens `$EDITOR` with the plan JSON. +- **Per-step execution** with spinner + tool-result echoes. +- **`━━━ DONE ━━━`** banner at the end with duration, files changed, model cost (when billable). + +--- + +## 3. The web dashboard · `forge ui start` + +A local HTTP + WebSocket dashboard (vanilla JS, <120 KB, no CDN). Runs on `http://127.0.0.1:7823`. Best for watching multiple tasks, browsing history, reading long outputs, or driving Forge from a browser tab. + +### Screenshot + +![Forge Web Dashboard](images/ui.png) + +### Video + +https://github.com/user-attachments/assets/49a9e479-5be6-4cc7-ab5e-c906d0103316 + +> If your markdown viewer doesn't render the video inline, open [`images/UI.mp4`](images/UI.mp4) directly. + +### Try it + +```bash +forge ui start +# open http://127.0.0.1:7823 +``` + +Or via Docker Compose (Forge + Ollama + UI in one command): + +```bash +docker compose -f docker/docker-compose.yml up -d +``` + +### What you can do in the dashboard + +- **Hero input on the Dashboard** — type a prompt, pick a project path (autocomplete from known projects, or hit **Browse…** for a server-side `$HOME`-scoped directory picker), fire the task. +- **Chat view** — multi-turn conversations with markdown-rendered bot replies. +- **Task detail view** — live stream of phase events, working-spinner, streamed model output, and a follow-up input that threads prior turns into the next task. +- **Tasks view** — full history, searchable; click any row to expand/continue. +- **Plan approval / Edit modal** — when a task hits approval, the plan viewer offers **Reject / Edit… / Approve & run**. Edit opens an inline JSON editor; save re-enters the approval loop with the new plan. +- **Permission modal** — per-call risk-classified prompts (`Deny / Allow once / Allow for session`). +- **Live cost + token counters** — for local providers, shows token count; for hosted (OpenAI / Anthropic), shows estimated USD. +- **Historical-task replay** — click a past task in the history table and the dashboard replays its saved plan + summary + file list even though the WebSocket subscription only streams live tasks. + +### What to look for in the demo + +- **Project picker** under the hero input — dropdown of known projects plus a **Browse…** button. +- **Streaming markdown** reflowing live in the task stream. +- **Plan viewer** with per-step chips (type, risk, id, target) and three-button footer. +- **Follow-up composer** at the bottom of each task view — continues the conversation by spawning a new task with composed prior-turn context. + +--- + +## Common workflows + +### Analyze a file without touching it + +```bash +forge run "summarize src/core/loop.ts" +``` + +The classifier tags this as `intent=analysis`, so the planner is forbidden from emitting mutation steps (`edit_file`, `write_file`, `run_tests`). The narrator pass turns the gathered context into a human-readable summary. + +### Iterate on a change in the REPL + +``` +[1] forge ❯ find everywhere we call `saveTask` without wrapping in try/catch +[2] forge ❯ wrap those with a shared helper that logs the error +[3] forge ❯ run the tests +``` + +Each turn's context is threaded into the next, so the model knows what the previous turns touched. + +### Plan-first, approve later (CI-friendly) + +```bash +forge run --plan-only "add a /health endpoint" > plan.json +# review plan.json in your PR +forge run --yes "add a /health endpoint" +``` + +### Drive Forge from a browser tab + +```bash +forge ui start +``` + +Open `http://127.0.0.1:7823`, set the project path once (sticky until you change it), fire any prompt — plan approval and permissions surface as modals. + +### Mix surfaces in one session + +Same SQLite index, same tasks, same conversation files. Start a task in the REPL, watch it finish in the dashboard's Active view, continue the conversation from either side. Each surface is a view over the runtime, not a sandbox. + +--- + +## Tips & gotchas + +- **`forge doctor`** is your friend. If something's off — provider unreachable, keychain not available, model role unmapped — this tells you. +- **First turn is slower.** Local models cold-start; Forge emits a `MODEL_WARMING` event so you can see it. +- **`~/.forge/logs/forge.log`** is the authoritative debug log. Trace-level with `--trace` or `FORGE_LOG_LEVEL=debug`. +- **Cancel** any running task with `Ctrl+C` in the REPL, the CLI, or the dashboard's **Cancel** button. +- **Permission grants are scoped.** An "allow for session" only applies to that REPL / CLI invocation; it doesn't persist across runs unless you explicitly set it in `~/.forge/config.json`. +- **Your local model matters.** Forge's planner and narrator expect a model ≥ 7B for reasonable instruction-following; 3B chat models will produce noisy plans. `ollama pull qwen2.5:7b` is a solid default. + +--- + +## Where to next + +- [`README.md`](README.md) — full feature list, architecture, runtime metrics. +- [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) — hot paths, mode caps, state machine. +- [`docs/SETUP.md`](docs/SETUP.md) — contributor setup. +- [`FLYWHEEL.md`](FLYWHEEL.md) — the plan → bead → code methodology. +- [`CLAUDE.md`](CLAUDE.md) / [`AGENTS.md`](AGENTS.md) — context for AI agents working on this repo. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..13e43a9 --- /dev/null +++ b/Makefile @@ -0,0 +1,301 @@ +# Forge — local-first, multi-agent, programmable software-engineering runtime. +# +# This Makefile is a thin, self-documenting wrapper over npm scripts, Docker, +# and a handful of shell one-liners that we'd otherwise retype a dozen times +# a day. It is intentionally NOT the canonical build system — package.json +# scripts are; this just gives them short names and groups them sensibly so +# `make help` answers "how do I …" for new contributors. +# +# Invariants followed: +# - Every target is .PHONY unless it produces the named file. +# - Every user-facing target has a "##" doc comment on its line; `make help` +# parses those into a categorised table. +# - Recipes are idempotent where possible — running twice is safe. +# - No target silently swallows errors; if a step fails, `make` fails. + +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- + +SHELL := /usr/bin/env bash +.SHELLFLAGS := -euo pipefail -c +.ONESHELL: +.DEFAULT_GOAL := help + +# Project metadata (derived from package.json so rename-the-package Just Works) +PKG_NAME := $(shell node -p "require('./package.json').name" 2>/dev/null || echo @hoangsonw/forge) +PKG_VERSION := $(shell node -p "require('./package.json').version" 2>/dev/null || echo 0.0.0) + +# Runtime +NODE ?= node +NPM ?= npm +NPX ?= npx + +# Docker / OCI +DOCKER ?= docker +IMAGE ?= ghcr.io/hoangsonw/forge-agentic-coding-cli +TAG ?= dev +IMAGE_FULL := $(IMAGE):$(TAG) +PLATFORMS ?= linux/amd64,linux/arm64 +COMPOSE_FILE ?= docker/docker-compose.yml + +# Where test harnesses drop throwaway state. Override via env: +# make test FORGE_HOME=/tmp/forge-ci +FORGE_HOME ?= $(HOME)/.forge + +# --------------------------------------------------------------------------- +# Self-documenting help (parses `##` annotations from this file) +# --------------------------------------------------------------------------- + +.PHONY: help +help: ## Show this help (default target) + @awk 'BEGIN { \ + FS = ":.*##"; \ + printf "\n\033[1;36mForge\033[0m \033[2m%s@%s\033[0m · make targets\n\n", "$(PKG_NAME)", "$(PKG_VERSION)" \ + } \ + /^##@/ { \ + printf "\n\033[1;35m%s\033[0m\n", substr($$0, 5); next \ + } \ + /^[a-zA-Z0-9_.-]+:.*##/ { \ + printf " \033[32m%-22s\033[0m %s\n", $$1, $$2 \ + }' $(MAKEFILE_LIST) + @printf "\nOverride knobs (env or \`make VAR=...\`):\n" + @printf " \033[2mTAG=\033[0m%-14s image tag for docker targets (default: dev)\n" "$(TAG)" + @printf " \033[2mPLATFORMS=\033[0m%-9s docker buildx platforms (default: linux/amd64,linux/arm64)\n" "$(PLATFORMS)" + @printf " \033[2mFORGE_HOME=\033[0m%-9s state dir for smoke runs (default: ~/.forge)\n" "$(FORGE_HOME)" + @printf "\n" + +##@ Setup + +.PHONY: install +install: ## Install dependencies (npm ci, matches package-lock.json exactly) + $(NPM) ci --ignore-scripts + +.PHONY: install-dev +install-dev: ## Install dependencies with devDeps (first-time contributor path) + $(NPM) install + +.PHONY: link +link: build ## npm link — make `forge` on PATH resolve to this checkout + $(NPM) link + +.PHONY: unlink +unlink: ## Remove the npm-linked binary (`@hoangsonw/forge`) from your PATH + -$(NPM) unlink -g $(PKG_NAME) + +.PHONY: relink +relink: unlink link ## unlink + link in one step (after a pull / branch switch) + +##@ Build + +.PHONY: build +build: ## Compile TypeScript + copy non-code assets into dist/ + $(NPM) run build + +.PHONY: watch +watch: ## Rebuild on every file change (tsc --watch; UI assets don't auto-copy) + $(NPM) run build:watch + +.PHONY: typecheck +typecheck: ## Type-check without emitting files (fast; CI-safe) + $(NPM) run typecheck + +.PHONY: clean +clean: ## Remove dist/ and any coverage output + rm -rf dist coverage .tsbuildinfo + +.PHONY: distclean +distclean: clean ## clean + nuke node_modules (forces a fresh install next time) + rm -rf node_modules + +##@ Quality + +.PHONY: lint +lint: ## ESLint over src/ (errors only; warnings OK) + $(NPM) run lint + +.PHONY: format +format: ## Prettier write (src/ + test/) + $(NPM) run format + +.PHONY: format-check +format-check: ## Prettier verify (fails if anything would be reformatted) + $(NPM) run format:check + +.PHONY: test +test: ## Run the full vitest suite (97 files, 570+ tests) + $(NPM) test + +.PHONY: test-watch +test-watch: ## Run vitest in watch mode (auto-reruns on change) + $(NPM) run test:watch + +.PHONY: test-coverage +test-coverage: ## Run tests with v8 coverage → coverage/ + index.html + $(NPM) run test:coverage + +.PHONY: test-one +test-one: ## Run ONE test file: make test-one FILE=test/unit/foo.test.ts + @if [[ -z "$${FILE:-}" ]]; then echo "usage: make test-one FILE=test/unit/foo.test.ts"; exit 2; fi + $(NPX) vitest run "$$FILE" + +.PHONY: verify +verify: format-check lint typecheck build test ## Everything CI runs, in one shot + +##@ Metrics + +.PHONY: metrics +metrics: ## Regenerate docs/metrics.json (counts, sizes, test count, …) + bash scripts/metrics.sh + +.PHONY: bundle +bundle: build ## Build an offline tarball bundle (via scripts/bundle.js) + $(NODE) scripts/bundle.js + +##@ Run locally + +.PHONY: start +start: build ## Run the compiled CLI (`./bin/forge.js`) with no args → REPL + $(NODE) ./bin/forge.js + +.PHONY: dev +dev: ## Run the CLI via ts-node (no build step; slower cold start) + $(NPM) run dev + +.PHONY: doctor +doctor: build ## Sanity-check providers + role→model mapping (<1 s cold) + $(NODE) ./bin/forge.js doctor --no-banner + +.PHONY: repl +repl: build ## Alias: open the Forge REPL against this checkout + $(NODE) ./bin/forge.js + +.PHONY: ui +ui: build ## Launch the local dashboard at http://127.0.0.1:7823 + $(NODE) ./bin/forge.js ui start --bind 127.0.0.1 --port 7823 + +.PHONY: ui-stop +ui-stop: ## Kill any running Forge UI process bound to :7823 + -lsof -ti tcp:7823 2>/dev/null | xargs -r kill -9 + +##@ Docker + +.PHONY: docker-build +docker-build: ## Build a single-arch image locally: $(IMAGE_FULL) + $(DOCKER) build -f docker/Dockerfile -t $(IMAGE_FULL) . + +.PHONY: docker-build-multi +docker-build-multi: ## Multi-arch build (buildx; linux/amd64 + linux/arm64). Adds --push if PUSH=1 + $(DOCKER) buildx build \ + --platform $(PLATFORMS) \ + -f docker/Dockerfile \ + -t $(IMAGE_FULL) \ + $(if $(filter 1 true,$(PUSH)),--push,--load) \ + . + +.PHONY: docker-run +docker-run: docker-build ## Run the image with the current repo mounted as /workspace + $(DOCKER) run --rm -it \ + -v forge-home:/data \ + -v "$$(pwd):/workspace" \ + $(IMAGE_FULL) forge doctor --no-banner + +.PHONY: docker-ui +docker-ui: docker-build ## Run the containerised dashboard at http://127.0.0.1:7823 + $(DOCKER) run --rm -p 7823:7823 -v forge-home:/data \ + $(IMAGE_FULL) forge ui start --bind 0.0.0.0 + +.PHONY: compose-up +compose-up: ## Bring up the full stack (forge + ollama + ui) via docker-compose + $(DOCKER) compose -f $(COMPOSE_FILE) up -d + +.PHONY: compose-down +compose-down: ## Tear down the compose stack (keeps volumes) + $(DOCKER) compose -f $(COMPOSE_FILE) down + +.PHONY: compose-nuke +compose-nuke: ## Tear down the compose stack AND delete all named volumes + $(DOCKER) compose -f $(COMPOSE_FILE) down --volumes --remove-orphans + +.PHONY: compose-logs +compose-logs: ## Tail logs from the compose stack + $(DOCKER) compose -f $(COMPOSE_FILE) logs -f --tail=200 + +##@ Release (maintainer-only) + +.PHONY: pack +pack: build ## Produce an npm tarball in the repo root (no publish) + $(NPM) pack + +.PHONY: publish-dry +publish-dry: build ## Dry-run `npm publish --access public` (shows what would be uploaded) + $(NPM) publish --access public --dry-run + +.PHONY: tag +tag: ## Create & push a git tag `v$(PKG_VERSION)` (triggers release.yml) + @echo "Tagging v$(PKG_VERSION)" + git tag -a "v$(PKG_VERSION)" -m "Release v$(PKG_VERSION)" + git push origin "v$(PKG_VERSION)" + +##@ Maintenance + +.PHONY: audit +audit: ## npm audit (production deps, fails on high/critical) + $(NPM) audit --omit=dev --audit-level=high + +.PHONY: outdated +outdated: ## List packages that have newer versions available + -$(NPM) outdated + +.PHONY: tree +tree: ## Show the dep tree (production only) + $(NPM) ls --omit=dev --all + +.PHONY: locs +locs: ## Lines of code by language (requires `cloc`; brew install cloc) + @command -v cloc >/dev/null || { echo "install cloc: brew install cloc"; exit 1; } + cloc --quiet --exclude-dir=node_modules,dist,coverage,.git . + +##@ Troubleshooting + +.PHONY: where +where: ## Print resolved paths and versions that builds/tests will use + @printf "package : $(PKG_NAME)@$(PKG_VERSION)\n" + @printf "node : $$($(NODE) --version) (at: $$(which $(NODE)))\n" + @printf "npm : $$($(NPM) --version) (at: $$(which $(NPM)))\n" + @printf "forge (dist) : $$(ls dist/cli/index.js 2>/dev/null || echo 'not built (make build)')\n" + @printf "forge (bin) : ./bin/forge.js\n" + @printf "FORGE_HOME : $(FORGE_HOME)\n" + @printf "docker : $$($(DOCKER) --version 2>/dev/null || echo 'not installed')\n" + +.PHONY: smoke +smoke: build ## End-to-end smoke check (doctor + test + --help) in isolated FORGE_HOME + @tmp=$$(mktemp -d -t forge-smoke.XXXXXX); \ + echo "Using FORGE_HOME=$$tmp"; \ + FORGE_HOME=$$tmp $(NODE) ./bin/forge.js --help >/dev/null; \ + FORGE_HOME=$$tmp $(NODE) ./bin/forge.js doctor --no-banner; \ + rm -rf "$$tmp"; \ + echo "smoke: OK" + +.PHONY: kill-stale +kill-stale: ## Kill stray forge UI / daemon processes (useful after dev crashes) + -pgrep -f "bin/forge.js ui start" | xargs -r kill -9 + -pgrep -f "bin/forge.js daemon" | xargs -r kill -9 + -lsof -ti tcp:7823 2>/dev/null | xargs -r kill -9 + @echo "cleaned up" + +# --------------------------------------------------------------------------- +# Footer: ensure every user-facing target declared above is marked .PHONY so +# stale files with the same name can't shadow them. +# --------------------------------------------------------------------------- + +.PHONY: help install install-dev link unlink relink \ + build watch typecheck clean distclean \ + lint format format-check test test-watch test-coverage test-one verify \ + metrics bundle \ + start dev doctor repl ui ui-stop \ + docker-build docker-build-multi docker-run docker-ui \ + compose-up compose-down compose-nuke compose-logs \ + pack publish-dry tag \ + audit outdated tree locs \ + where smoke kill-stale diff --git a/README.md b/README.md index 6b3df5d..88ffd21 100644 --- a/README.md +++ b/README.md @@ -4,14 +4,16 @@ # Forge -**A local-first, multi-agent, programmable software-engineering runtime.** +**A local-first, plan-first, multi-agent, and programmable software-engineering runtime.** *Not an assistant. A runtime.* Forge brings its own scheduler, sandbox, permission system, state machine, agentic loop, memory layers, and plugin ecosystem. You pick the model. You approve the actions. Everything is inspectable, replayable, and yours. -**[Install](docs/INSTALL.md) · [Dev setup](docs/SETUP.md) · [Architecture](docs/ARCHITECTURE.md) · [Releases & versioning](RELEASES.md) · [Wiki Page](index.html) · [NPM Package](https://www.npmjs.com/package/@hoangsonw/forge) · [License](LICENSE)** +Forge logo + +**[Install](https://github.com/hoangsonww/Forge-Agentic-Coding-CLI/blob/master/docs/INSTALL.md) · [Dev setup](https://github.com/hoangsonww/Forge-Agentic-Coding-CLI/blob/master/docs/SETUP.md) · [Architecture](https://github.com/hoangsonww/Forge-Agentic-Coding-CLI/blob/master/docs/ARCHITECTURE.md) · [Releases & versioning](https://github.com/hoangsonww/Forge-Agentic-Coding-CLI/blob/master/RELEASES.md) · [Demo walkthrough](DEMO.md) · [Wiki Page](https://hoangsonww.github.io/Forge-Agentic-Coding-CLI/) · [NPM Package](https://www.npmjs.com/package/@hoangsonw/forge) · [License](LICENSE)** @@ -42,25 +44,25 @@ is inspectable, replayable, and yours. ## At a glance -Forge is a local-first, multi-agent, programmable software-engineering runtime. Unlike Claude Code or OpenAI Codex, Forge is local-first infrastructure, not a hosted assistant. It brings its own scheduler, sandbox, permission system, state machine, agentic loop, memory layers, and plugin ecosystem. You pick & host the model. You approve the actions. Everything is inspectable, replayable, and yours. +Forge is a local-first, plan-first, multi-agent, and programmable software-engineering runtime. Unlike Claude Code or OpenAI Codex, Forge is local-first infrastructure, not a hosted assistant. It brings its own scheduler, sandbox, permission system, state machine, agentic loop, memory layers, and plugin ecosystem. You pick & host the model. You approve the actions. Everything is inspectable, replayable, and yours.
-| | value | reproducer | -|---|---|---| -| ⚡ **`forge doctor` cold-start** | **173 ms** | `time node bin/forge.js doctor --no-banner` | -| ⚡ **`forge --help` cold-start** | **238 ms** | `time node bin/forge.js --help` | -| 📦 **UI shell · zero CDN** | **90 KB** uncompressed | `wc -c src/ui/public/app.js` | -| 🌐 **Provider probe timeout** | **1.5 s** | `src/models/openai.ts#isAvailable` | -| 🔌 **Model providers** (auto-detected) | **6** | ollama · lmstudio · vllm · llama.cpp · openai-compat · anthropic | -| 🧠 **Model families** classified | **41** | Llama / Qwen / DeepSeek / Gemma / Phi / Mistral / Codestral / … | -| 🤖 **Built-in agents** | **6** | planner · architect · executor · reviewer · debugger · memory | -| 🛠 **Tools** available to agents | **18** | read · write · edit · grep · glob · run_command · git · web · … | -| 💬 **CLI subcommands · slash commands** | **24 · 55** | `forge --help` · `/help` in REPL | -| 🎛 **Modes** | **9** | fast · balanced · heavy · plan · execute · audit · debug · architect · offline-safe | -| ✅ **Tests** | **548 / 97 files** · 100% passing · ~5.5 s wall-clock | `npx vitest run` | -| 🐳 **CI jobs · release stages** | **9 · 6** | [`.github/workflows/`](.github/workflows) | -| 📦 **Container image** | ~355 MB · multi-arch · non-root · HEALTHCHECK | `docker pull ghcr.io/hoangsonw/forge-agentic-coding-cli:latest` | +| | value | reproducer | +|-----------------------------------------|-------------------------------------------------------|-------------------------------------------------------------------------------------| +| ⚡ **`forge doctor` cold-start** | **173 ms** | `time node bin/forge.js doctor --no-banner` | +| ⚡ **`forge --help` cold-start** | **238 ms** | `time node bin/forge.js --help` | +| 📦 **UI shell · zero CDN** | **90 KB** uncompressed | `wc -c src/ui/public/app.js` | +| 🌐 **Provider probe timeout** | **1.5 s** | `src/models/openai.ts#isAvailable` | +| 🔌 **Model providers** (auto-detected) | **6** | ollama · lmstudio · vllm · llama.cpp · openai-compat · anthropic | +| 🧠 **Model families** classified | **41** | Llama / Qwen / DeepSeek / Gemma / Phi / Mistral / Codestral / … | +| 🤖 **Built-in agents** | **6** | planner · architect · executor · reviewer · debugger · memory | +| 🛠 **Tools** available to agents | **18** | read · write · edit · grep · glob · run_command · git · web · … | +| 💬 **CLI subcommands · slash commands** | **24 · 55** | `forge --help` · `/help` in REPL | +| 🎛 **Modes** | **9** | fast · balanced · heavy · plan · execute · audit · debug · architect · offline-safe | +| ✅ **Tests** | **548 / 97 files** · 100% passing · ~5.5 s wall-clock | `npx vitest run` | +| 🐳 **CI jobs · release stages** | **9 · 6** | [`.github/workflows/`](.github/workflows) | +| 📦 **Container image** | ~355 MB · multi-arch · non-root · HEALTHCHECK | `docker pull ghcr.io/hoangsonw/forge-agentic-coding-cli:latest` |
@@ -235,9 +237,38 @@ docker compose -f docker/docker-compose.yml up -d # open http://127.0.0.1:7823 ``` -**Requirements:** Node ≥ 20 *and/or* Docker ≥ 25. At least one LLM source -(local runtime or API key). See [`docs/INSTALL.md`](docs/INSTALL.md) for -per-OS notes. +### System requirements + +| | Minimum | Notes | +|---|---|---| +| **Node.js** | **≥ 20** (22 tested) | Enforced via `package.json#engines`. Not needed if you use Docker. | +| **OS** | macOS · Linux · Windows (WSL recommended) | `better-sqlite3` ships prebuilds for darwin-x64, darwin-arm64, linux-x64, linux-arm64, win32-x64 — no compile step. | +| **Disk** | ~150 MB for `node_modules`; state under `~/.forge` grows with history | Override via `FORGE_HOME`. | +| **RAM** | Forge ~100 MB; your local model consumes its own RAM/VRAM | `forge doctor` cold-starts in ~170 ms. | +| **Docker** (alt path) | ≥ 25 | Multi-arch (amd64, arm64) image on GHCR. Zero host Node needed. | +| **At least one model source** | Ollama · LM Studio · vLLM · llama.cpp · Anthropic · OpenAI-compatible | `forge doctor` tells you which are reachable. | + +**Runtime npm dependencies** (13, zero optional): `@modelcontextprotocol/sdk`, `better-sqlite3` (native, prebuilt), `chalk`, `cli-table3`, `commander`, `dotenv`, `ora`, `prompts`, `semver`, `undici`, `ws`, `yaml`, `zod`. No Python, Rust, or Go toolchain. + +**Recommended** (not required): `ripgrep` (fast `grep` tool path), `git` (diff/status tools + project-root detection), `$EDITOR` (used when you pick "Edit" on a plan). + +See [`docs/INSTALL.md`](docs/INSTALL.md) for per-OS notes and [`docs/SETUP.md`](docs/SETUP.md) for contributor setup. + +### See it running + +Three surfaces, one runtime. + +**REPL (Interactive Terminal) Mode** + +https://github.com/user-attachments/assets/eb592bbf-62a1-4d74-a540-7e066ebe56a4 + +**CLI (Headless, One-shot run) Mode** + +https://github.com/user-attachments/assets/bc3b3204-fd87-436f-9467-604535edb4e2 + +**Web UI Dashboard** + +https://github.com/user-attachments/assets/218cd64f-40fe-4836-9c62-c7a08538056b --- @@ -496,6 +527,42 @@ warns once, never refuses to route. Unknown models are accepted too — Forge rates them as generic executors rather than refusing to route. +### Model size & capability notes + +The agentic loop is cheap for the runtime but expensive for the *model*. +Every step is a multi-turn tool-use conversation that returns strict JSON. +Small models struggle with this in recognisable ways — please pick the +right tool for the job. + +| Work you want to do | Safe local floor | What fails below the floor | +|---|---|---| +| Pure chat ("explain closures") | any 3B instruct (phi-3:mini, gemma-3:2b) | fine — conversation fast-path bypasses tool use entirely | +| Summarize a file, explain a snippet | 7B instruct (qwen2.5:7b, llama3.1:8b) | summary is a line of "I read the file" instead of content | +| Single-file edits / small features | **7B+ code specialist** (deepseek-coder:6.7b, qwen2.5-coder:7b) | picks wrong tool (run_command to write files), splits "create empty + edit" patterns, escalates to ask_user on tool errors | +| Multi-file refactors, new features | 14B+ code specialist or a hosted frontier model | plan quality drops; step IDs get inconsistent; validation retries exhausted | +| Architecture-level changes | hosted (Claude Opus/Sonnet, GPT-4 class) realistically | budgets blow out; changes go off-plan | + +Forge ships with defences so a small model fails *loudly* instead of +silently corrupting files: the executor prompt spells out step-type → +tool mappings, `ask_user` rejects empty/too-short questions as +non-retryable, `edit_file` handles "create empty then fill" gracefully, +parent directories auto-create, provider warm-up is explicit, and the +router streams prose without `jsonMode` for narrator/conversation +paths. The result is that a small model will often tell you it can't +finish a task; it will rarely write the wrong code into a file. + +If in doubt: configure a code specialist for the `code` role, keep +something lighter for `fast`, and set `ANTHROPIC_API_KEY` or +`OPENAI_API_KEY` as a fallback — the router uses the hosted provider +automatically when the local one fails or trips its circuit breaker. + +```bash +forge config set models.code deepseek-coder:6.7b +forge config set models.planner qwen2.5:7b +forge config set models.fast phi3:mini +export ANTHROPIC_API_KEY=sk-… # optional fallback +``` + --- ## Safety model (not optional) @@ -567,6 +634,8 @@ Each mode is an **enforceable budget** — not a hint to the model. See ## CLI reference +> **▶ See each surface in action** in [DEMO.md](DEMO.md) — REPL walkthrough, `forge run` one-shots, and the web dashboard. + 24 subcommands. Full surface: ``` @@ -697,6 +766,8 @@ API key auth. Tokens stored in the OS keychain. Single hardened image (non-root, HEALTHCHECK, OCI labels, ~355 MB) that serves both CLI and UI. +> [▶ Dashboard demo](images/UI.mp4) — `forge ui start` driving a full task end-to-end (plan approval, streamed model output, follow-up thread). More in [DEMO.md](DEMO.md). + ```bash # Pull (multi-arch: linux/amd64 + linux/arm64): docker pull ghcr.io/hoangsonw/forge-agentic-coding-cli:latest diff --git a/RELEASES.md b/RELEASES.md index 1478ebc..51a7476 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -6,8 +6,6 @@ # Releases & Versioning -
- **How Forge versions, tags, builds, signs, and ships.** Who this is for: @@ -15,8 +13,6 @@ Who this is for: - **Maintainers** cutting a release or shipping a hotfix. - **Integrators** consuming Forge from CI, Docker, or the npm registry. -
- --- ## Table of contents @@ -480,12 +476,12 @@ flowchart TD H --> C1{{"match?"}}:::step L --> C1 C1 -->|yes| OK1["layer 1 ok"]:::ok - C1 -->|no| F1["REFUSE — retain existing binary"]:::fail + C1 -->|no| F1["REFUSE — retain existing binary"]:::fail OK1 --> VER["Ed25519.verify(
public_key = trusted_keys[i],
message = manifest.json,
signature = manifest.sig
)"]:::step VER --> C2{{"any trusted key verifies?"}}:::step C2 -->|yes| OK2["layer 2 ok
install"]:::ok - C2 -->|no| F2["REFUSE — unless
FORGE_ALLOW_UNSIGNED=1 (dev only)"]:::fail + C2 -->|no| F2["REFUSE — unless
FORGE_ALLOW_UNSIGNED=1 (dev only)"]:::fail ``` ### Verifying by hand diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 70c8d3f..54461de 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -345,6 +345,48 @@ total — into `{class, roles, contextTokens}`. model isn't installed on the user's provider. Picks best-fit from what's actually there, caches per process, warns once. +### 6.1 Model-capability assumptions and the runtime guards that defend them + +Forge does not assume a frontier model. The agentic loop is shaped so that +small, cheap, local-first models (down to the 7B tier) can drive it usefully +— but not silently. Every observed small-model failure mode has a +corresponding runtime guard so that either: + +- the model recovers cleanly (retry with different args, switch tool, set + `done:true`), or +- the tool fails loudly and non-retryably, forcing the executor to change + strategy instead of looping, or +- the task ends with an honest failure message rather than corrupted state. + +| Failure mode (small / mid models) | Where it manifests | Runtime guard | +|---|---|---| +| Wrong tool selection (e.g. `run_command` to write file contents) | `src/agents/executor.ts` | System prompt spells out `step.type → tool` mapping and forbids `run_command` for file writes | +| Splitting "create empty file → edit to fill" across steps | planner output → `src/tools/edit-file.ts` | `edit_file` with `oldText=""` on an empty/missing file writes the full body instead of erroring | +| Missing-parent-directory `ENOENT` on `write_file` | `src/tools/write-file.ts` | `createDirs` defaults to `true` (mkdir-p); opt out explicitly to get the old behaviour | +| Escalating to `ask_user` on tool errors, stalling the step | `src/tools/ask-user.ts` | Rejects questions < 3 chars as non-retryable; description tells the model "tool errors are for you to recover from, not escalate" | +| Cold-load timeout treated as a model failure and fallback to hosted | `src/models/ollama.ts`, `src/models/router.ts` | Headers-timeout floor at 300 s; proactive `warm()` with `/api/ps` preflight; explicit `MODEL_WARMING`/`MODEL_WARMED` events drive the spinner | +| Malformed JSON breaking `{actions, summary, done}` | `src/agents/executor.ts` | Parse-through-first-fence + schema validation; per-step retry budget capped; loop detector catches thrashing | +| Reviewer rejecting analysis tasks for "no file changes" | `src/agents/reviewer.ts` | Classifier sets `requiresReview=false` for intent=analysis; loop short-circuits the verify phase; reviewer prompt knows analysis tasks have no diff | +| Two concurrent edits racing on the same file | `src/sandbox/file-lock.ts` | Per-process path-keyed mutex serializes read-modify-write; atomic `writeAtomic` via temp + rename prevents torn reads | +| `create_file` step that emits an empty file | `src/agents/planner.ts` | Planner prompt requires a single `create_file` step with the full intended body; `edit_file`-on-empty safety-net if ignored | + +**Consequences for capability tiers** (measured empirically, not specced — +expect some variance across model families): + +| Work | Local floor | Above the floor | Below the floor | +|---|---|---|---| +| Conversation / concept Q&A | 3B instruct | — | fast-path skips tool use, so even 3B works | +| Summarize / explain | 7B instruct | clean streaming narrator output | summary reduces to "I read the file" | +| Single-file edits | 7B code specialist (deepseek-coder, qwen2.5-coder) | reliable tool calls, minimal retries | wrong-tool selection, step retries, occasional loop-detector trips | +| Multi-file / new feature | 14B+ code specialist OR hosted | plan quality holds; dependencies tracked | plan IDs drift; validation retries exhausted | +| Architecture / refactor | hosted frontier | end-to-end runs without intervention | not practical today | + +When the local-first path is insufficient, the router's fallback wiring +(circuit breaker + `fallback` field on `RoutingDecision`) transparently +routes the next call to the hosted provider if one is configured. No code +change, no flag — just set `ANTHROPIC_API_KEY` or `OPENAI_API_KEY` and the +system degrades gracefully under model failure. + --- ## 7. Permission + sandbox model diff --git a/docs/INSTALL.md b/docs/INSTALL.md index 6c9b91b..65c3298 100644 --- a/docs/INSTALL.md +++ b/docs/INSTALL.md @@ -5,6 +5,7 @@ ## Table of contents +- [0. System requirements](#0-system-requirements) - [1. Choose your install path](#1-choose-your-install-path) - [2. npm (global)](#2-npm-global) - [3. Docker](#3-docker) @@ -18,6 +19,25 @@ --- +## 0. System requirements + +Forge runs anywhere Node 20+ runs. The Docker path has no host-side Node requirement at all. + +| | Minimum | Notes | +|---|---|---| +| **Node.js** | **≥ 20** (22 tested in CI) | Enforced via `package.json#engines`. Skip if you use Docker. | +| **OS** | macOS · Linux · Windows (native or WSL) | `better-sqlite3` ships prebuilds for darwin-x64, darwin-arm64, linux-x64, linux-arm64, win32-x64 — no toolchain needed on `npm install`. | +| **Disk** | ~150 MB `node_modules`; state under `~/.forge` grows with history | Override via `FORGE_HOME`. | +| **RAM** | Forge: ~100 MB resident. Your local model: whatever the model needs. | `forge doctor` cold-starts in ~170 ms. | +| **Docker** (alt path) | ≥ 25 | Multi-arch image `ghcr.io/hoangsonw/forge-agentic-coding-cli:latest`. Amd64 + arm64. | +| **At least one model source** | Local runtime or hosted key | See [§7](#7-model-runtimes-you-can-point-forge-at). `forge doctor` probes all of them. | + +**Runtime npm dependencies** (13 total, **zero optional**): `@modelcontextprotocol/sdk`, `better-sqlite3`, `chalk`, `cli-table3`, `commander`, `dotenv`, `ora`, `prompts`, `semver`, `undici`, `ws`, `yaml`, `zod`. No Python, Rust, or Go required — `better-sqlite3` is the only native module and ships prebuilt binaries. + +**Recommended** (not required): `ripgrep` (fast path for the `grep` tool), `git` (for `git_diff`/`git_status` tools and project-root detection), `$EDITOR` (used when you pick "Edit" on a plan approval). + +--- + ## 1. Choose your install path ```mermaid @@ -277,6 +297,53 @@ Granite-Code, CodeLlama, Codestral, StarCoder, Yi, Solar, Zephyr, MiniCPM, LLaVA, TinyLlama, SmolLM, Aya, and more. Unknown models still get a routable role rather than being refused. +### Picking a model that fits the work + +Forge's agentic loop is multi-turn tool use with strict JSON output. That's +easy for frontier hosted models and hard for small local ones. These are +the tiers we've observed in practice — pull the right size for what you +intend to do, and set a hosted fallback for when you hit the ceiling. + +| Task type | Local floor we trust | Example pulls | Notes | +|---|---|---|---| +| Chat / concept Q&A | 3B instruct | `phi3:mini`, `gemma3:2b`, `qwen2.5:3b` | Uses the conversation fast-path; no tool use required. | +| Summarize / explain code | 7B instruct | `qwen2.5:7b`, `llama3.1:8b` | Narrator pass runs non-JSON and streams cleanly. | +| Single-file edits / small features | **7B+ code specialist** | `deepseek-coder:6.7b`, `qwen2.5-coder:7b` | Multi-step tool use; general 7B models often pick the wrong tool here. | +| Multi-file refactors / new features | 14B+ code specialist | `qwen2.5-coder:14b`, `deepseek-coder:33b` | Or route through a hosted frontier model. | +| Architecture-level changes | hosted only, realistically | Claude Opus/Sonnet, GPT-4-class | Context windows + plan quality matter. | + +**Expected failure modes below the floor** (the rail guards flag these +rather than silently corrupting files): + +- Wrong tool selection — e.g. `run_command` to write file contents. + Executor prompt maps step types explicitly; unrecoverable calls surface + loudly instead of looping. +- Escalating to `ask_user` on tool errors instead of retrying or switching + tools. `ask_user` rejects empty/too-short questions as non-retryable. +- Splitting "create empty file, then edit to fill" across two steps. + `edit_file` now handles empty-oldText on an empty file as a full-body + write, so this legitimate pattern succeeds. +- Malformed JSON that breaks the executor's `{actions, summary, done}` + contract. The run fails cleanly; no partial state is written. + +**Configuring per-role models:** + +```bash +forge config set models.planner qwen2.5:7b +forge config set models.code deepseek-coder:6.7b +forge config set models.fast phi3:mini + +# Hosted fallback — router engages automatically on local failure / breaker open. +export ANTHROPIC_API_KEY=sk-… +# or: +export OPENAI_API_KEY=sk-… +``` + +First use of a local model triggers a visible `warming ` phase +before the first call — cold-loading a 7B into RAM/VRAM can take up to a +minute on slower machines. Subsequent calls are fast while Ollama keeps +it resident (5 min default). + ### Runtime selection flow ```mermaid diff --git a/docs/SETUP.md b/docs/SETUP.md index 2f576ab..6b987c8 100644 --- a/docs/SETUP.md +++ b/docs/SETUP.md @@ -21,16 +21,64 @@ ## 1. Prerequisites -| | Version | +### Host toolchain + +| | Version | Why | +|---|---|---| +| **Node.js** | **≥ 20** (22 tested in CI) | Enforced via `package.json#engines`. Uses async iterators on `undici` request bodies, `node:events`, and native ESM/CJS interop. | +| **npm** | bundled with Node | For `npm ci` / `npm link`. | +| **git** | any modern | Project-root detection, `git_diff` / `git_status` tools. | +| **ripgrep** | any | Optional but recommended — fast path for the `grep` tool. Falls back to a Node glob walker. | +| **Docker** | ≥ 25 | Only needed for building the image or using the compose stack. | +| **$EDITOR** | any | Used when you pick "Edit" on a plan; falls back to `vi`. | + +### OS support + +| OS | Status | +|---|---| +| macOS (darwin-x64, darwin-arm64) | first-class, tested in CI | +| Linux (linux-x64, linux-arm64) | first-class, tested in CI | +| Windows (native + WSL) | supported via native `better-sqlite3` prebuilds; WSL recommended for POSIX symlink / ripgrep parity | + +### Runtime npm dependencies + +Forge declares **13 runtime deps** and **zero optional deps**. None require a C/C++/Rust/Go/Python toolchain at install time — `better-sqlite3` is the only native module and ships prebuilds for every supported triple. + +| Package | Version | What for | +|---|---|---| +| `@modelcontextprotocol/sdk` | ^1.0.0 | MCP bridge (stdio/http_stream/websocket transports) | +| `better-sqlite3` | ^11.3.0 | Local index DB (`~/.forge/forge.db`), FTS5 cold memory | +| `chalk` | ^4.1.2 | ANSI color (v4 kept for CJS) | +| `cli-table3` | ^0.6.5 | Tables in `forge doctor`, `task list`, `model list` | +| `commander` | ^12.1.0 | CLI argv parsing | +| `dotenv` | ^16.4.5 | `.env` loading | +| `ora` | ^5.4.1 | Progress spinner (v5 kept for CJS) | +| `prompts` | ^2.4.2 | Non-TTY fallback for the numbered-select helper | +| `semver` | ^7.6.3 | Update-check version comparison | +| `undici` | ^6.19.2 | HTTP client for Ollama / Anthropic / OpenAI streams | +| `ws` | ^8.18.0 | UI dashboard WebSocket | +| `yaml` | ^2.5.0 | Skill-file frontmatter | +| `zod` | ^3.23.8 | Runtime validation of plans and tool args | + +### Model source — you need at least one + +Local runtimes (auto-detected on standard ports with a ~1.5 s probe): + +| Runtime | Default endpoint | Env override | +|---|---|---| +| Ollama | `http://127.0.0.1:11434` | `OLLAMA_ENDPOINT` | +| LM Studio | `http://127.0.0.1:1234/v1` | `LMSTUDIO_ENDPOINT` | +| vLLM | `http://127.0.0.1:8000/v1` | `VLLM_ENDPOINT` | +| llama.cpp (`server`) | `http://127.0.0.1:8080/v1` | `LLAMACPP_ENDPOINT` | + +Hosted runtimes (API key via env or OS keychain): + +| Runtime | Env var | |---|---| -| Node.js | ≥ 20 (22 tested) | -| npm | bundled with Node | -| git | any | -| ripgrep | optional but recommended — used by tools | -| Docker (for image work) | ≥ 25 | +| Anthropic | `ANTHROPIC_API_KEY` | +| OpenAI-compatible | `OPENAI_API_KEY` (+ `OPENAI_BASE_URL` for non-OpenAI endpoints) | -Optional: Ollama / LM Studio / vLLM / llama.cpp for testing against a real -local model. Hosted `ANTHROPIC_API_KEY` / `OPENAI_API_KEY` also works. +If no provider is reachable, `forge doctor` reports it explicitly and prints the exact command to start one — no silent fallbacks. --- @@ -222,6 +270,16 @@ flowchart TB providers the router sees as up. - **Events log:** `~/.forge/logs/events.jsonl` is append-only JSONL and trivially `jq`-queryable. +- **"Is this a model-capability bug or a Forge bug?"** — when tracking a + failing task, check the capability tier before changing code. Small + models (<7B, or any general 7B on multi-step edits) produce failure + modes that the runtime deliberately surfaces loudly rather than hides: + wrong-tool selection, `ask_user` escalation, split create-empty-then-fill + plans. See [ARCHITECTURE §6.1](ARCHITECTURE.md#61-model-capability-assumptions-and-the-runtime-guards-that-defend-them) + for the full table of failure modes → runtime guards. If you reproduce + the same failure on a hosted frontier model, it's a Forge bug. If only + on a small local, check the guard exists and that your change hasn't + regressed it. --- diff --git a/images/CLI.mp4 b/images/CLI.mp4 new file mode 100644 index 0000000..d27b0de Binary files /dev/null and b/images/CLI.mp4 differ diff --git a/images/REPL.mp4 b/images/REPL.mp4 new file mode 100644 index 0000000..73f3b59 Binary files /dev/null and b/images/REPL.mp4 differ diff --git a/images/UI.mp4 b/images/UI.mp4 new file mode 100644 index 0000000..34a99ef Binary files /dev/null and b/images/UI.mp4 differ diff --git a/images/cli.png b/images/cli.png new file mode 100644 index 0000000..541153e Binary files /dev/null and b/images/cli.png differ diff --git a/images/logo.jpeg b/images/logo.jpeg new file mode 100644 index 0000000..c09e5dc Binary files /dev/null and b/images/logo.jpeg differ diff --git a/images/repl.png b/images/repl.png index b424798..097c894 100644 Binary files a/images/repl.png and b/images/repl.png differ diff --git a/images/ui.png b/images/ui.png index 65a2050..8b89e2d 100644 Binary files a/images/ui.png and b/images/ui.png differ diff --git a/index.html b/index.html index 8461e0e..dd84a67 100644 --- a/index.html +++ b/index.html @@ -128,6 +128,7 @@

Jump anywhere

@@ -182,10 +184,14 @@

Every capability, highlighted.

Forge REPL Interface
-
Forge Web Dashboard
- Forge Web Dashboard +
Forge CLI
+ Forge CLI
+
+
Forge Web Dashboard
+ Forge Web Dashboard +
/ 01

Local-first

Auto-detects Ollama, LM Studio, vLLM, llama.cpp. Hosted Anthropic / OpenAI / Azure / Groq / LocalAI / Together / Fireworks are opt-in.

ollamalmstudiovllmllama.cpp
/ 02

Iterative executor

Model sees every tool result (stdout / stderr / exit) and adapts within a step. Mode-capped turn budgets.

adaptivebounded
@@ -209,6 +215,73 @@

Every capability, highlighted.

+ +
+
+
+ Live demos +

See it running.

+

Screen captures of each Forge surface — the interactive REPL, the one-shot CLI, and the web dashboard — all driving the same runtime.

+
+ +
+
+
▶ REPL
+

Interactive session

+

Multi-turn prompts with slash-command autocomplete, status line, digit shortcuts for prompts, streamed markdown rendering, and live file-change tracking.

+
streamslashautocomplete
+
+
+
▶ CLI
+

One-shot runs

+

forge run "…" launches a full classify → plan → approve → execute → verify pipeline in the terminal with a progress rail and completion block.

+
--yes--plan-onlyci-friendly
+
+
+
▶ UI
+

Web dashboard

+

Live WebSocket stream of plan approval, permission prompts, model deltas, and task results. Historical tasks replay from disk; follow-ups thread the conversation.

+
WebSocketstreamhistory
+
+
+ +
+
+

What every demo is actually showing

+

The same src/core/orchestrator.ts runtime drives all three surfaces. Any task you run in one surface is a real row in the SQLite index — pickable from another surface, visible in forge sessions, cancellable from the dashboard.

+

Deltas stream token-by-token from the provider (emitDelta → event bus → WebSocket / REPL progress rail). Markdown reflows in place so headings, fences, and lists form up live instead of dumping at the end.

+
+
+

Run these for yourself

+

REPL

+
forge
+

One-shot

+
forge run "summarize src/core/loop.ts"
+

Dashboard

+
forge ui start   # http://127.0.0.1:7823
+
+
+ +
+
REPL demo · forge
+ +
+
+
CLI demo · forge run
+ +
+
+
Web dashboard demo · forge ui start
+ +
+
+
+ @@ -450,6 +523,50 @@

Model families → preferred roles

+ +

Model size & capability tiers

+

+ The agentic loop is multi-turn tool use with strict JSON output. Small + local models can drive it, but not every kind of work is realistic at + every size. Pick by the work you intend to do, and set a hosted + fallback for when you hit the ceiling — the router degrades gracefully + via its circuit breaker. +

+
+ + + + + + + + + + + +
WorkLocal floor we trustExample pulls
Chat / concept Q&A3B instructphi3:mini · gemma3:2b · qwen2.5:3b
Summarize / explain code7B instructqwen2.5:7b · llama3.1:8b
Single-file edits / small features7B+ code specialistdeepseek-coder:6.7b · qwen2.5-coder:7b
Multi-file refactors / new features14B+ code specialistqwen2.5-coder:14b · deepseek-coder:33b
Architecture-level changeshosted only, realisticallyClaude Opus/Sonnet · GPT-4-class
+
+ +

Observed small-model failure modes & runtime guards

+

+ Below the tier floor, models fail in recognisable ways. Forge catches + each so a small model fails loudly instead of corrupting + state. +

+
+ + + + + + + + + + + +
Failure modeRuntime guard
Picks run_command to write file contentsExecutor prompt spells out step.type → tool mapping and forbids run_command for file writes.
Escalates to ask_user on any tool error, stalling the stepask_user rejects empty / too-short questions as non-retryable; model has to switch tools.
Splits "create empty file → edit to fill"edit_file with oldText="" on an empty/missing file writes the full body.
write_file ENOENT because parent dir doesn't existcreateDirs defaults to true (mkdir-p).
Cold-load timeout interpreted as model failureHeaders-timeout floor 300 s; proactive warm() with /api/ps preflight.
Reviewer rejects analysis tasks for "no file changes"Classifier sets requiresReview=false for intent=analysis; narrator pass writes the real answer.
Two concurrent edits race on the same filePer-process path-mutex + atomic temp+rename.
+
@@ -652,12 +769,89 @@

Add MCP connector

+
+
+
+ 13 · System requirements +

Node 20+. Or just Docker.

+
+

+ Forge runs on any platform Node 20 runs on, or anywhere Docker runs. There is no host-side Python, Rust, or Go requirement. better-sqlite3 is the only native module and ships prebuilts for every supported triple — no toolchain needed on npm install. +

+
+
+
/ host
+

Host toolchain

+

+ Node.js ≥ 20 (22 tested).
+ OS: macOS · Linux · Windows (native or WSL).
+ Architectures: x64 · arm64.
+ Docker ≥ 25 (only if you prefer the container path). +

+
node 20+darwinlinuxwin32arm64
+
+
+
/ footprint
+

Disk & RAM

+

+ Disk: ~150 MB node_modules; state under ~/.forge grows with session history (override with FORGE_HOME).
+ RAM: ~100 MB for Forge itself. Your local model uses its own RAM/VRAM on top.
+ Cold start: forge doctor ~170 ms. +

+
~150 MB~100 MB RAM
+
+
+
/ model
+

Model source (pick ≥ 1)

+

+ Local: Ollama · LM Studio · vLLM · llama.cpp — auto-detected on standard ports.
+ Hosted: ANTHROPIC_API_KEY · OPENAI_API_KEY (+ OPENAI_BASE_URL for any OpenAI-compatible server).
+ forge doctor probes all of them and tells you which are reachable. +

+
local-firsthosted fallback
+
+
+ +

Runtime npm dependencies

+

+ 13 runtime packages, zero optional dependencies. Listed below so you can audit them before npm install. +

+
+
package.json · dependencies13 total
+
@modelcontextprotocol/sdk  # MCP bridge (stdio / http_stream / websocket)
+better-sqlite3             # local index DB · FTS5 cold memory · native, prebuilt
+chalk                      # ANSI color
+cli-table3                 # tables in `forge doctor`, `task list`
+commander                  # CLI argv parsing
+dotenv                     # .env loading
+ora                        # progress spinner
+prompts                    # non-TTY fallback for the numbered-select helper
+semver                     # update-check version comparison
+undici                     # HTTP client · Ollama / Anthropic / OpenAI streams
+ws                         # UI dashboard WebSocket
+yaml                       # skill-file frontmatter
+zod                        # runtime validation of plans & tool args
+
+
+ +

Recommended (not required)

+

+ ripgrep — fast path for the grep tool; falls back to a Node glob walker.
+ git — enables git_diff / git_status tools and project-root detection.
+ $EDITOR — used when you pick "Edit" on a plan approval; falls back to vi. +

+
+
+ +
- 13 · Install + 14 · Install

Three paths. Pick one.

@@ -704,7 +898,7 @@

03 / Compose

- 14 · Container posture + 15 · Container posture

Single image. CLI + UI + daemon.

@@ -743,7 +937,7 @@

Single image. CLI + UI + daemon.

- 15 · CI/CD + 16 · CI/CD

9 jobs per PR. 6 release stages.

CI · every PR + push

@@ -819,7 +1013,7 @@

Release · on v* tag
- 16 · Runtime metrics + 17 · Runtime metrics

What it actually costs to run.

All measured locally — reproducers in the table at the bottom. No synthetic benchmarks, no comparisons against straw-man tools.

@@ -895,7 +1089,7 @@

UI shell asset sizes

- 17 · Agent-facing files + 18 · Agent-facing files

Works with every coding agent.

Context files so agents don't re-learn the repo every turn.

diff --git a/install/install.sh b/install/install.sh index 8f61908..1889e2e 100755 --- a/install/install.sh +++ b/install/install.sh @@ -9,7 +9,7 @@ set -euo pipefail # # End result (either path): `forge` on your PATH. -FORGE_PKG="${FORGE_PKG:-@forge/cli}" +FORGE_PKG="${FORGE_PKG:-@hoangsonw/forge}" FORGE_VERSION="${FORGE_VERSION:-latest}" FORGE_MODE="${FORGE_MODE:-auto}" @@ -35,7 +35,7 @@ fi SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" IN_REPO=0 -if [ -f "${REPO_ROOT}/package.json" ] && grep -q '"name": "@forge/cli"' "${REPO_ROOT}/package.json"; then +if [ -f "${REPO_ROOT}/package.json" ] && grep -q '"name": "@hoangsonw/forge"' "${REPO_ROOT}/package.json"; then IN_REPO=1 fi diff --git a/package-lock.json b/package-lock.json index 15a5be9..a88b2f3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { - "name": "@forge/cli", - "version": "0.1.0", + "name": "@hoangsonw/forge", + "version": "0.1.2", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "@forge/cli", - "version": "0.1.0", + "name": "@hoangsonw/forge", + "version": "0.1.2", "hasInstallScript": true, "license": "MIT", "dependencies": { @@ -151,74 +151,6 @@ "@jridgewell/sourcemap-codec": "^1.4.10" } }, - "node_modules/@esbuild/aix-ppc64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz", - "integrity": "sha512-1SDgH6ZSPTlggy1yI6+Dbkiz8xzpHJEVAlF/AM1tHPLsf5STom9rwtjE4hKAF20FfXXNTFqEYXyJNWh1GiZedQ==", - "cpu": [ - "ppc64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "aix" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/android-arm": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.21.5.tgz", - "integrity": "sha512-vCPvzSjpPHEi1siZdlvAlsPxXl7WbOVUBBAowWug4rJHb68Ox8KualB+1ocNvT5fjv6wpkX6o/iEpbDrf68zcg==", - "cpu": [ - "arm" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/android-arm64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.21.5.tgz", - "integrity": "sha512-c0uX9VAUBQ7dTDCjq+wdyGLowMdtR/GoC2U5IYk/7D1H1JYC0qseD7+11iMP2mRLN9RcCMRcjC4YMclCzGwS/A==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/android-x64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.21.5.tgz", - "integrity": "sha512-D7aPRUUNHRBwHxzxRvp856rjUHRFW1SdQATKXH2hqA0kAZb1hKmi02OpYRacl0TxIGz/ZmXWlbZgjwWYaCakTA==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=12" - } - }, "node_modules/@esbuild/darwin-arm64": { "version": "0.21.5", "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.21.5.tgz", @@ -236,312 +168,6 @@ "node": ">=12" } }, - "node_modules/@esbuild/darwin-x64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.21.5.tgz", - "integrity": "sha512-se/JjF8NlmKVG4kNIuyWMV/22ZaerB+qaSi5MdrXtd6R08kvs2qCN4C09miupktDitvh8jRFflwGFBQcxZRjbw==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/freebsd-arm64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.21.5.tgz", - "integrity": "sha512-5JcRxxRDUJLX8JXp/wcBCy3pENnCgBR9bN6JsY4OmhfUtIHe3ZW0mawA7+RDAcMLrMIZaf03NlQiX9DGyB8h4g==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/freebsd-x64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.21.5.tgz", - "integrity": "sha512-J95kNBj1zkbMXtHVH29bBriQygMXqoVQOQYA+ISs0/2l3T9/kj42ow2mpqerRBxDJnmkUDCaQT/dfNXWX/ZZCQ==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/linux-arm": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.21.5.tgz", - "integrity": "sha512-bPb5AHZtbeNGjCKVZ9UGqGwo8EUu4cLq68E95A53KlxAPRmUyYv2D6F0uUI65XisGOL1hBP5mTronbgo+0bFcA==", - "cpu": [ - "arm" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/linux-arm64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.21.5.tgz", - "integrity": "sha512-ibKvmyYzKsBeX8d8I7MH/TMfWDXBF3db4qM6sy+7re0YXya+K1cem3on9XgdT2EQGMu4hQyZhan7TeQ8XkGp4Q==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/linux-ia32": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.21.5.tgz", - "integrity": "sha512-YvjXDqLRqPDl2dvRODYmmhz4rPeVKYvppfGYKSNGdyZkA01046pLWyRKKI3ax8fbJoK5QbxblURkwK/MWY18Tg==", - "cpu": [ - "ia32" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/linux-loong64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.21.5.tgz", - "integrity": "sha512-uHf1BmMG8qEvzdrzAqg2SIG/02+4/DHB6a9Kbya0XDvwDEKCoC8ZRWI5JJvNdUjtciBGFQ5PuBlpEOXQj+JQSg==", - "cpu": [ - "loong64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/linux-mips64el": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.21.5.tgz", - "integrity": "sha512-IajOmO+KJK23bj52dFSNCMsz1QP1DqM6cwLUv3W1QwyxkyIWecfafnI555fvSGqEKwjMXVLokcV5ygHW5b3Jbg==", - "cpu": [ - "mips64el" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/linux-ppc64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.21.5.tgz", - "integrity": "sha512-1hHV/Z4OEfMwpLO8rp7CvlhBDnjsC3CttJXIhBi+5Aj5r+MBvy4egg7wCbe//hSsT+RvDAG7s81tAvpL2XAE4w==", - "cpu": [ - "ppc64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/linux-riscv64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.21.5.tgz", - "integrity": "sha512-2HdXDMd9GMgTGrPWnJzP2ALSokE/0O5HhTUvWIbD3YdjME8JwvSCnNGBnTThKGEB91OZhzrJ4qIIxk/SBmyDDA==", - "cpu": [ - "riscv64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/linux-s390x": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.21.5.tgz", - "integrity": "sha512-zus5sxzqBJD3eXxwvjN1yQkRepANgxE9lgOW2qLnmr8ikMTphkjgXu1HR01K4FJg8h1kEEDAqDcZQtbrRnB41A==", - "cpu": [ - "s390x" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/linux-x64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.21.5.tgz", - "integrity": "sha512-1rYdTpyv03iycF1+BhzrzQJCdOuAOtaqHTWJZCWvijKD2N5Xu0TtVC8/+1faWqcP9iBCWOmjmhoH94dH82BxPQ==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/netbsd-x64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.21.5.tgz", - "integrity": "sha512-Woi2MXzXjMULccIwMnLciyZH4nCIMpWQAs049KEeMvOcNADVxo0UBIQPfSmxB3CWKedngg7sWZdLvLczpe0tLg==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "netbsd" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/openbsd-x64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.21.5.tgz", - "integrity": "sha512-HLNNw99xsvx12lFBUwoT8EVCsSvRNDVxNpjZ7bPn947b8gJPzeHWyNVhFsaerc0n3TsbOINvRP2byTZ5LKezow==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "openbsd" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/sunos-x64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.21.5.tgz", - "integrity": "sha512-6+gjmFpfy0BHU5Tpptkuh8+uw3mnrvgs+dSPQXQOv3ekbordwnzTVEb4qnIvQcYXq6gzkyTnoZ9dZG+D4garKg==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "sunos" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/win32-arm64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.21.5.tgz", - "integrity": "sha512-Z0gOTd75VvXqyq7nsl93zwahcTROgqvuAcYDUr+vOv8uHhNSKROyU961kgtCD1e95IqPKSQKH7tBTslnS3tA8A==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/win32-ia32": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.21.5.tgz", - "integrity": "sha512-SWXFF1CL2RVNMaVs+BBClwtfZSvDgtL//G/smwAc5oVK/UPu2Gu9tIaRgFmYFFKrmg3SyAjSrElf0TiJ1v8fYA==", - "cpu": [ - "ia32" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=12" - } - }, - "node_modules/@esbuild/win32-x64": { - "version": "0.21.5", - "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.21.5.tgz", - "integrity": "sha512-tQd/1efJuzPC6rCFwEvLtci/xNFcTZknmXs98FYDfGE4wP9ClFV98nyKrzJKVPMhdDnjzLhdUyMX4PsQAPjwIw==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=12" - } - }, "node_modules/@eslint-community/eslint-utils": { "version": "4.9.1", "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.9.1.tgz", @@ -961,34 +587,6 @@ "node": ">=14" } }, - "node_modules/@rollup/rollup-android-arm-eabi": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.60.2.tgz", - "integrity": "sha512-dnlp69efPPg6Uaw2dVqzWRfAWRnYVb1XJ8CyyhIbZeaq4CA5/mLeZ1IEt9QqQxmbdvagjLIm2ZL8BxXv5lH4Yw==", - "cpu": [ - "arm" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ] - }, - "node_modules/@rollup/rollup-android-arm64": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.60.2.tgz", - "integrity": "sha512-OqZTwDRDchGRHHm/hwLOL7uVPB9aUvI0am/eQuWMNyFHf5PSEQmyEeYYheA0EPPKUO/l0uigCp+iaTjoLjVoHg==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ] - }, "node_modules/@rollup/rollup-darwin-arm64": { "version": "4.60.2", "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.60.2.tgz", @@ -1003,314 +601,6 @@ "darwin" ] }, - "node_modules/@rollup/rollup-darwin-x64": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.60.2.tgz", - "integrity": "sha512-gjEtURKLCC5VXm1I+2i1u9OhxFsKAQJKTVB8WvDAHF+oZlq0GTVFOlTlO1q3AlCTE/DF32c16ESvfgqR7343/g==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ] - }, - "node_modules/@rollup/rollup-freebsd-arm64": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.60.2.tgz", - "integrity": "sha512-Bcl6CYDeAgE70cqZaMojOi/eK63h5Me97ZqAQoh77VPjMysA/4ORQBRGo3rRy45x4MzVlU9uZxs8Uwy7ZaKnBw==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ] - }, - "node_modules/@rollup/rollup-freebsd-x64": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.60.2.tgz", - "integrity": "sha512-LU+TPda3mAE2QB0/Hp5VyeKJivpC6+tlOXd1VMoXV/YFMvk/MNk5iXeBfB4MQGRWyOYVJ01625vjkr0Az98OJQ==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ] - }, - "node_modules/@rollup/rollup-linux-arm-gnueabihf": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.60.2.tgz", - "integrity": "sha512-2QxQrM+KQ7DAW4o22j+XZ6RKdxjLD7BOWTP0Bv0tmjdyhXSsr2Ul1oJDQqh9Zf5qOwTuTc7Ek83mOFaKnodPjg==", - "cpu": [ - "arm" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-arm-musleabihf": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.60.2.tgz", - "integrity": "sha512-TbziEu2DVsTEOPif2mKWkMeDMLoYjx95oESa9fkQQK7r/Orta0gnkcDpzwufEcAO2BLBsD7mZkXGFqEdMRRwfw==", - "cpu": [ - "arm" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-arm64-gnu": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.60.2.tgz", - "integrity": "sha512-bO/rVDiDUuM2YfuCUwZ1t1cP+/yqjqz+Xf2VtkdppefuOFS2OSeAfgafaHNkFn0t02hEyXngZkxtGqXcXwO8Rg==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-arm64-musl": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.60.2.tgz", - "integrity": "sha512-hr26p7e93Rl0Za+JwW7EAnwAvKkehh12BU1Llm9Ykiibg4uIr2rbpxG9WCf56GuvidlTG9KiiQT/TXT1yAWxTA==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-loong64-gnu": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.60.2.tgz", - "integrity": "sha512-pOjB/uSIyDt+ow3k/RcLvUAOGpysT2phDn7TTUB3n75SlIgZzM6NKAqlErPhoFU+npgY3/n+2HYIQVbF70P9/A==", - "cpu": [ - "loong64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-loong64-musl": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.60.2.tgz", - "integrity": "sha512-2/w+q8jszv9Ww1c+6uJT3OwqhdmGP2/4T17cu8WuwyUuuaCDDJ2ojdyYwZzCxx0GcsZBhzi3HmH+J5pZNXnd+Q==", - "cpu": [ - "loong64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-ppc64-gnu": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.60.2.tgz", - "integrity": "sha512-11+aL5vKheYgczxtPVVRhdptAM2H7fcDR5Gw4/bTcteuZBlH4oP9f5s9zYO9aGZvoGeBpqXI/9TZZihZ609wKw==", - "cpu": [ - "ppc64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-ppc64-musl": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.60.2.tgz", - "integrity": "sha512-i16fokAGK46IVZuV8LIIwMdtqhin9hfYkCh8pf8iC3QU3LpwL+1FSFGej+O7l3E/AoknL6Dclh2oTdnRMpTzFQ==", - "cpu": [ - "ppc64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-riscv64-gnu": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.60.2.tgz", - "integrity": "sha512-49FkKS6RGQoriDSK/6E2GkAsAuU5kETFCh7pG4yD/ylj9rKhTmO3elsnmBvRD4PgJPds5W2PkhC82aVwmUcJ7A==", - "cpu": [ - "riscv64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-riscv64-musl": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.60.2.tgz", - "integrity": "sha512-mjYNkHPfGpUR00DuM1ZZIgs64Hpf4bWcz9Z41+4Q+pgDx73UwWdAYyf6EG/lRFldmdHHzgrYyge5akFUW0D3mQ==", - "cpu": [ - "riscv64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-s390x-gnu": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.60.2.tgz", - "integrity": "sha512-ALyvJz965BQk8E9Al/JDKKDLH2kfKFLTGMlgkAbbYtZuJt9LU8DW3ZoDMCtQpXAltZxwBHevXz5u+gf0yA0YoA==", - "cpu": [ - "s390x" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-x64-gnu": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.60.2.tgz", - "integrity": "sha512-UQjrkIdWrKI626Du8lCQ6MJp/6V1LAo2bOK9OTu4mSn8GGXIkPXk/Vsp4bLHCd9Z9Iz2OTEaokUE90VweJgIYQ==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-x64-musl": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.60.2.tgz", - "integrity": "sha512-bTsRGj6VlSdn/XD4CGyzMnzaBs9bsRxy79eTqTCBsA8TMIEky7qg48aPkvJvFe1HyzQ5oMZdg7AnVlWQSKLTnw==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-openbsd-x64": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.60.2.tgz", - "integrity": "sha512-6d4Z3534xitaA1FcMWP7mQPq5zGwBmGbhphh2DwaA1aNIXUu3KTOfwrWpbwI4/Gr0uANo7NTtaykFyO2hPuFLg==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "openbsd" - ] - }, - "node_modules/@rollup/rollup-openharmony-arm64": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.60.2.tgz", - "integrity": "sha512-NetAg5iO2uN7eB8zE5qrZ3CSil+7IJt4WDFLcC75Ymywq1VZVD6qJ6EvNLjZ3rEm6gB7XW5JdT60c6MN35Z85Q==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "openharmony" - ] - }, - "node_modules/@rollup/rollup-win32-arm64-msvc": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.60.2.tgz", - "integrity": "sha512-NCYhOotpgWZ5kdxCZsv6Iudx0wX8980Q/oW4pNFNihpBKsDbEA1zpkfxJGC0yugsUuyDZ7gL37dbzwhR0VI7pQ==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ] - }, - "node_modules/@rollup/rollup-win32-ia32-msvc": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.60.2.tgz", - "integrity": "sha512-RXsaOqXxfoUBQoOgvmmijVxJnW2IGB0eoMO7F8FAjaj0UTywUO/luSqimWBJn04WNgUkeNhh7fs7pESXajWmkg==", - "cpu": [ - "ia32" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ] - }, - "node_modules/@rollup/rollup-win32-x64-gnu": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.60.2.tgz", - "integrity": "sha512-qdAzEULD+/hzObedtmV6iBpdL5TIbKVztGiK7O3/KYSf+HIzU257+MX1EXJcyIiDbMAqmbwaufcYPvyRryeZtA==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ] - }, - "node_modules/@rollup/rollup-win32-x64-msvc": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.60.2.tgz", - "integrity": "sha512-Nd/SgG27WoA9e+/TdK74KnHz852TLa94ovOYySo/yMPuTmpckK/jIF2jSwS3g7ELSKXK13/cVdmg1Z/DaCWKxA==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ] - }, "node_modules/@tsconfig/node10": { "version": "1.0.12", "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.12.tgz", diff --git a/package.json b/package.json index 0366376..441f4d8 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@hoangsonw/forge", - "version": "0.1.1", + "version": "0.1.2", "description": "Forge - a local-first, multi-agent, programmable software-engineering runtime", "license": "MIT", "author": "Son Nguyen (hoangsonw)", @@ -35,7 +35,7 @@ "prepare": "npm run build", "start": "node ./bin/forge.js", "link": "npm run build && npm link", - "unlink": "npm unlink -g @forge/cli", + "unlink": "npm unlink -g @hoangsonw/forge", "relink": "npm run unlink --silent; npm run link", "postinstall": "node scripts/postinstall.js" }, diff --git a/src/agents/executor.ts b/src/agents/executor.ts index fc361da..c064fe3 100644 --- a/src/agents/executor.ts +++ b/src/agents/executor.ts @@ -42,9 +42,21 @@ Each turn, output STRICT JSON: Protocol: - Choose tools from the catalog only. Each action's args must match the tool's input schema. - You will receive the result of every tool call before your next turn. Read errors carefully. -- If a tool fails, either retry with different args, switch tools, or set "done": true with a summary explaining what is blocked and why. +- If a tool fails, EITHER retry with different args OR switch to a different tool OR set "done": true with a summary explaining the blockage. DO NOT call ask_user to recover from tool errors — ask_user is ONLY for when the ORIGINAL user request is genuinely ambiguous. - Set "done": true with an empty actions array once the step is satisfied. -- Never include prose outside JSON. No code fences outside a single JSON object.`; +- Never include prose outside JSON. No code fences outside a single JSON object. + +Step-type → tool mapping (the plan step's \`type\` field tells you which tool to use): +- type="create_file" → ONE call to write_file with the FULL intended file body in \`content\`. Do NOT create the file empty and edit it afterwards. Infer the body from the step description, prior context, and the overall task. +- type="edit_file" → edit_file with exact oldText/newText snippets. If the target file is empty or does not exist yet, use write_file (or edit_file with oldText="") to write the whole body. +- type="delete_file" → delete_file. +- type="apply_patch" → apply_patch with a unified-diff \`patch\`. +- type="run_command" → run_command with a single shell command. Never use run_command to write file contents — use write_file instead. +- type="run_tests" → run_tests with no args (framework auto-detects). Only pass framework/target if the auto-detection was wrong. +- type="analyze" / "retrieve_context" → read_file, grep, list_dir, glob. No mutations. +- type="review" / "debug" → summarise what you found and set done:true. No mutations unless the step clearly demands them. + +When writing or editing code, produce the complete, compilable body — do not emit placeholders like "// TODO implement" unless the step explicitly says to scaffold.`; const TRUNCATE = 2_000; diff --git a/src/agents/narrator.ts b/src/agents/narrator.ts new file mode 100644 index 0000000..1481ebc --- /dev/null +++ b/src/agents/narrator.ts @@ -0,0 +1,225 @@ +/** + * Narrator agent — produces the user-facing prose answer for analysis / + * informational tasks (summarize, explain, describe, audit). + * + * Why this exists: + * The executor runs with `jsonMode: true` because every turn must return a + * parseable `{actions, summary, done}` object. That JSON contract forces + * the `summary` field to describe what the agent did this turn + * ("File has been read successfully"), not the *content* the user asked + * for. For code-change tasks that's fine — the deliverable is the diff. For + * informational tasks the diff is empty and the step summaries read as + * nonsense to the user. + * + * The narrator is a single non-JSON call run after the executor completes. + * It takes the tool outputs the executor gathered (file contents, grep + * hits, etc.) and asks a capable general-purpose model to actually answer + * the user's original question. Because it doesn't set `jsonMode`, the + * streaming path in the router kicks in and the user watches the answer + * render live via the progress rail. + * + * @author Son Nguyen + */ + +import { Mode, ToolResult } from '../types'; +import { callModel } from '../models/router'; +import { assembleTaskPrompt } from '../prompts/assembler'; +import { log } from '../logging/logger'; + +export interface NarratorToolResult { + tool: string; + args: unknown; + result: ToolResult; +} + +export interface NarrateParams { + taskTitle: string; + taskDescription: string; + toolResults: NarratorToolResult[]; + mode: Mode; + taskId?: string; + projectId?: string; +} + +/** Per-tool-result size cap so a giant file doesn't blow the context window. */ +const PER_RESULT_MAX = 18_000; +/** Aggregate cap across all results. */ +const TOTAL_CONTEXT_MAX = 60_000; + +const truncate = (s: string, max: number): string => + s.length > max ? s.slice(0, max) + `\n…[truncated ${s.length - max} chars]` : s; + +/** + * Best-effort extraction of "human-readable content" from a tool result. + * Tools that produce file contents or search hits are the common case; for + * everything else we fall back to a JSON dump so the model still has signal. + */ +const extractContent = (tr: NarratorToolResult): string => { + if (!tr.result.success) { + const msg = tr.result.error?.message ?? 'tool failed'; + return `[error] ${msg}`; + } + const out = tr.result.output as unknown; + if (out == null) return ''; + if (typeof out === 'string') return out; + if (typeof out === 'object') { + const rec = out as Record; + // Canonical content-bearing shapes used across Forge's tools. + if (typeof rec.content === 'string') return String(rec.content); + if (Array.isArray(rec.matches)) { + return rec.matches.map((m) => (typeof m === 'string' ? m : JSON.stringify(m))).join('\n'); + } + if (typeof rec.text === 'string') return String(rec.text); + if (typeof rec.body === 'string') return String(rec.body); + try { + return JSON.stringify(out, null, 2); + } catch { + return String(out); + } + } + return String(out); +}; + +const toolLabel = (tr: NarratorToolResult): string => { + const args = tr.args as Record | undefined; + const target = (args && (args.path || args.file || args.pattern || args.url || args.query)) ?? ''; + return target ? `${tr.tool} · ${String(target).slice(0, 120)}` : tr.tool; +}; + +/** + * Ask the model to write the user-facing answer. Streams by virtue of not + * setting jsonMode; the router emits deltas via the event bus which the + * progress rail renders live. + */ +export const narrateAnalysis = async (params: NarrateParams): Promise => { + const blocks: string[] = []; + let total = 0; + for (const tr of params.toolResults) { + const content = extractContent(tr); + if (!content) continue; + const capped = truncate(content, PER_RESULT_MAX); + const fenced = `### ${toolLabel(tr)}\n\n${capped}`; + if (total + fenced.length > TOTAL_CONTEXT_MAX) break; + blocks.push(fenced); + total += fenced.length; + } + const context = blocks.join('\n\n---\n\n') || '(no tool output was gathered)'; + + const prompt = assembleTaskPrompt({ + mode: params.mode, + title: params.taskTitle, + description: params.taskDescription, + additionalUserText: `You are answering an informational / analysis task. The executor has already gathered the relevant context below via tools. Your job now is to write the answer the user asked for — directly, concretely, in well-formatted Markdown. + +Rules: +- Write the answer. Do NOT narrate what you're about to do. +- Do NOT wrap the answer in JSON or code fences (unless you're quoting code). +- Prefer short paragraphs, bullet points for lists of things, and inline \`code\` for identifiers. +- If the task is to summarize, produce a summary — don't just say "the file was read". + +USER TASK: +${params.taskDescription || params.taskTitle} + +GATHERED CONTEXT: +${context} + +Now write the answer.`, + }); + + try { + // No jsonMode → router will invoke stream() on the provider and emit + // per-token deltas over the event bus. CLI/REPL/UI already render those + // live. The accumulated text is returned as `response.content`. + const { response } = await callModel( + 'planner', + params.mode, + prompt.messages, + { + temperature: 0.3, + maxTokens: 2_000, + timeoutMs: 180_000, + }, + { taskId: params.taskId, projectId: params.projectId, role: 'planner' }, + ); + return response.content.trim(); + } catch (err) { + log.warn('narrator failed', { err: String(err) }); + // Fall back to a minimal note so the caller still has something usable; + // loop.ts keeps the step summaries as backup content. + return ''; + } +}; + +export interface ConversationParams { + input: string; + mode: Mode; + taskId?: string; + projectId?: string; + /** + * Composed prior-turns context (from composeDescription in the REPL path). + * Pre-formatted as markdown with `## Current request` / `## Conversation + * so far` sections. Passed verbatim to the model so follow-ups like + * "what have we talked about?" can actually recall. + */ + description?: string; +} + +/** + * Answer a pure conversational question with no tool access. Used by the + * orchestrator's conversation fast-path — same streaming contract as + * `narrateAnalysis` (no jsonMode → router emits deltas → progress rail + * renders live) but without a GATHERED CONTEXT block, since the whole + * point is that no tools need to run. + * + * Multi-turn history: when `description` is supplied (REPL / UI wrap the + * new user message with prior turns via composeDescription), we hand the + * whole thing to the model under an explicit "CONVERSATION HISTORY" block + * so follow-up questions ("what have we talked about?") resolve against + * the actual prior turns instead of the model hallucinating. + */ +export const respondConversation = async (params: ConversationParams): Promise => { + // If we got a composed description (multi-turn context), use it as the + // primary payload. Otherwise the raw input is enough. + const desc = params.description; + const hasHistory = + typeof desc === 'string' && + desc.length > params.input.length && + desc.includes('Conversation so far'); + const payload = hasHistory && desc ? desc : params.input; + + const prompt = assembleTaskPrompt({ + mode: params.mode, + title: 'Conversation', + description: params.input, + additionalUserText: `You are a helpful software-engineering assistant answering a general conversational question. The user is NOT asking about a specific codebase — answer from general knowledge. + +Rules: +- Answer directly. Do NOT narrate what you're about to do. +- Do NOT wrap the answer in JSON. +- Use concise Markdown: short paragraphs, inline \`code\` for identifiers, fenced \`\`\` blocks only for code samples. +- If the question is ambiguous, pick the most common interpretation and answer. +${hasHistory ? '- Use the CONVERSATION HISTORY below as ground truth when answering follow-ups. If the user asks "what have we talked about?" or similar, summarize ONLY the actual prior turns listed — do NOT invent topics that were not discussed.' : ''} + +${hasHistory ? 'CONVERSATION HISTORY & CURRENT REQUEST:\n' : 'QUESTION:\n'}${payload} + +Now write the answer.`, + }); + + try { + const { response } = await callModel( + 'planner', + params.mode, + prompt.messages, + { + temperature: 0.4, + maxTokens: 1_500, + timeoutMs: 120_000, + }, + { taskId: params.taskId, projectId: params.projectId, role: 'planner' }, + ); + return response.content.trim(); + } catch (err) { + log.warn('conversation responder failed', { err: String(err) }); + throw err; + } +}; diff --git a/src/agents/planner.ts b/src/agents/planner.ts index 51ed74f..cce995c 100644 --- a/src/agents/planner.ts +++ b/src/agents/planner.ts @@ -41,7 +41,19 @@ Rules: - Keep the plan minimal — no busywork steps. - Prefer reading before writing. Always include verification (tests or review) before completion. - If user approval may be needed for a destructive step, mark risk accordingly. -- Reference concrete file paths where known. If unknown, include a retrieve_context step first.`; +- Reference concrete file paths where known ONLY IF you have been shown them in the task description or context. NEVER invent file paths. If you're unsure, include a retrieve_context step first; do not guess. +- When creating a new file with content, emit ONE step of type create_file with the full body (do not split into "create empty" + "edit to fill" — edit_file cannot target an empty file). +- Prefer edit_file for surgical modifications of existing content (pass a unique oldText snippet); use write_file only when rewriting the entire body. + +INTENT-SPECIFIC RULES: +- When INTENT=analysis (summarize / explain / describe / audit / review): + * DO NOT emit edit_file, create_file, write_file, delete_file, apply_patch, run_command, or run_tests. These are mutations; analysis is read-only. + * The deliverable is the spoken answer, NOT a diff. A post-step narrator synthesises the user-facing summary from whatever context you gathered. + * A good analysis plan is ONE step: retrieve_context (or read_file) targeting the exact file(s) the user named. Stop there. That's it. + * Do NOT emit an "analyze" step after retrieve_context — it's redundant and the executor has nothing new to call. +- When INTENT=bugfix / feature / refactor / optimization / test / setup: + * Normal mutation rules apply. +- Never write a file the user did not ask you to touch. "Summarize X" does NOT grant permission to edit X.`; const buildFallbackPlan = (task: Task): Plan => { const steps: PlanStep[] = [ @@ -140,6 +152,11 @@ export const buildPlannerPrompt = (task: Task, projectRoot: string, mode: Mode) }); const patternBlock = learnedPatternBlock(task); const contextBlocks = patternBlock ? [...retrieved.blocks, patternBlock] : retrieved.blocks; + // Surfacing intent directly in the user prompt: without this the planner + // would read "summarize src/core/loop.ts" and sometimes plan an edit_file + // step that appends a summary COMMENT to the source — which nobody asked + // for and modifies code for a read-only task. + const intent = task.profile?.intent ?? 'other'; return assembleTaskPrompt({ mode, title: task.title, @@ -148,7 +165,7 @@ export const buildPlannerPrompt = (task: Task, projectRoot: string, mode: Mode) projectInstructions: loadProjectInstructions(projectRoot), contextBlocks, tools: allTools(), - additionalUserText: `${planSchemaPrompt}\n\nTASK:\n${task.title}\n${task.description ?? ''}`, + additionalUserText: `${planSchemaPrompt}\n\nINTENT: ${intent}\n\nTASK:\n${task.title}\n${task.description ?? ''}`, }); }; diff --git a/src/agents/reviewer.ts b/src/agents/reviewer.ts index 0020ca8..c99cb53 100644 --- a/src/agents/reviewer.ts +++ b/src/agents/reviewer.ts @@ -26,11 +26,17 @@ Output STRICT JSON: "summary": string } -Approve only if: -- The requested change is actually complete. -- No obvious regressions introduced. -- Tests (if any) pass. -- No security issues introduced (secrets, unsafe commands, unsanitized input).`; +Approval rules: +- For tasks that modify code (bugfix/feature/refactor/test/optimization): + Approve only if the requested change is complete, no obvious regressions + were introduced, any tests pass, and no security issues were introduced + (secrets, unsafe commands, unsanitized input). +- For analysis / informational tasks (summarize, explain, describe, audit): + The deliverable is the answer itself, NOT a diff. Do NOT reject such a + task merely because there are no file changes — that's the expected + shape of the work. Approve if the requested analysis was produced and + appears coherent and on-topic. Reject only if the analysis is absent, + contradictory, or the agent clearly didn't perform the work.`; const parse = (content: string): ReviewVerdict | null => { const fence = /```(?:json)?\s*([\s\S]+?)\s*```/i.exec(content); @@ -57,18 +63,24 @@ export interface ReviewerInput { changesSummary: string; filesChanged: string[]; testsPassed?: boolean; + /** Intent from the classifier — lets the reviewer judge analysis tasks by analysis content, not by file-change count. */ + intent?: import('../types').TaskType; } export const reviewOutcome = async ( input: ReviewerInput, mode: import('../types').Mode, ): Promise => { + const intentLine = input.intent + ? `Task intent: ${input.intent}${input.intent === 'analysis' ? ' (informational — no file changes expected)' : ''}` + : ''; const prompt = assembleTaskPrompt({ mode, title: `Review: ${input.taskTitle}`, description: input.changesSummary, additionalUserText: `${reviewerSchema} +${intentLine} Files changed: ${input.filesChanged.join(', ') || '(none)'} Tests passed: ${input.testsPassed ?? 'unknown'} diff --git a/src/classifier/classifier.ts b/src/classifier/classifier.ts index 127f333..c503ca5 100644 --- a/src/classifier/classifier.ts +++ b/src/classifier/classifier.ts @@ -5,7 +5,7 @@ */ import { Mode, TaskProfile } from '../types'; -import { heuristicClassify } from './heuristics'; +import { heuristicClassify, looksConversational } from './heuristics'; import { callModel } from '../models/router'; import { log } from '../logging/logger'; @@ -24,6 +24,7 @@ const DEFAULT_AGENTS_BY_TYPE: Record = { setup: ['executor'], test: ['executor', 'reviewer'], optimization: ['planner', 'executor', 'reviewer'], + conversation: [], other: ['planner', 'executor'], }; @@ -39,6 +40,26 @@ explanation (string, <=160 chars). Do not include prose. Only JSON.`; export const classify = async (params: ClassifyParams): Promise => { + // Fast-path: clearly-conversational questions ("what is X?", "why does Y + // work this way?") skip the plan/approve/execute pipeline entirely. The + // heuristic is deliberately conservative — false-negatives just mean the + // user waits through planning, false-positives would answer from general + // knowledge when they wanted code analysis. + if (looksConversational(params.input)) { + return { + intent: 'conversation', + secondary: [], + complexity: 'trivial', + scope: 'single-file', + risk: 'low', + requiresPlan: false, + requiresTests: false, + requiresReview: false, + agents: DEFAULT_AGENTS_BY_TYPE.conversation, + skills: [], + explanation: 'heuristic: conversational question (no repo refs, no imperatives)', + }; + } const heuristic = heuristicClassify(params.input, params.filesReferenced?.length ?? 0); let enriched = heuristic; @@ -73,7 +94,11 @@ export const classify = async (params: ClassifyParams): Promise => const requiresPlan = enriched.complexity !== 'trivial' || params.mode === 'plan'; const requiresTests = enriched.type === 'bugfix' || enriched.type === 'feature' || enriched.type === 'refactor'; - const requiresReview = enriched.complexity !== 'trivial'; + // Analysis / explain / summarize tasks produce no file changes by design — + // the deliverable is the answer printed to the user. Gating completion on + // a reviewer that expects a diff here guarantees a false "Review did not + // approve: no file changes" rejection. Skip review for analysis intents. + const requiresReview = enriched.complexity !== 'trivial' && enriched.type !== 'analysis'; return { intent: enriched.type, diff --git a/src/classifier/heuristics.ts b/src/classifier/heuristics.ts index d26e18d..59fab58 100644 --- a/src/classifier/heuristics.ts +++ b/src/classifier/heuristics.ts @@ -6,6 +6,60 @@ import { TaskType, Complexity, Risk, Scope } from '../types'; +/** + * Conservative detector for "pure conversational question" inputs that don't + * need the plan/approve/execute pipeline — e.g. "what's the difference + * between a Map and a Dict?" or "explain closures". + * + * Err strongly toward false (treat as normal task). A missed-conversation + * just means the user waits through planning for a tiny reply; a + * false-positive means we answer from general knowledge when the user was + * asking about their codebase. + * + * Rejects anything that: + * - references a file/path/extension in the repo + * - mentions "this codebase / this file / our code / …" + * - contains any imperative code verb (create/fix/refactor/write/…) + * - is longer than ~400 chars (chat questions are short) + * Accepts if it starts with an interrogative (what/why/how/…) or ends with `?`. + */ +export const looksConversational = (input: string): boolean => { + const s = input.trim(); + if (!s || s.length > 400) return false; + + // Any reference to repo artifacts disqualifies — the user is asking about + // their code, not a general concept. + const repoRef = + /\b(src|lib|app|test|tests|docs|bin|dist|node_modules|public|scripts)\/[\w./-]+|\.(ts|tsx|js|jsx|mjs|cjs|py|go|rs|java|rb|cs|php|swift|kt|md|json|yaml|yml|toml|html|css|scss|sh|sql|proto|graphql)\b|\bthis (repo|repository|codebase|code ?base|project|file|function|module|component|class|package|method|script|service)\b|\bthe (codebase|repo|project|file)\b|\b(README|CHANGELOG|LICENSE|NOTICE|CONTRIBUTING|SECURITY|Dockerfile|Makefile|package\.json|tsconfig|eslintrc|prettierrc)\b/; + if (repoRef.test(s)) return false; + + // Imperative code / analysis actions imply the user wants something DONE + // on concrete targets, not chatted about abstractly. + const imperative = + /\b(create|add|build|implement|introduce|fix|patch|refactor|rewrite|edit|modify|update|change|write|delete|remove|drop|migrate|install|upgrade|downgrade|optimi[sz]e|scaffold|deploy|setup|configure|rename|move|copy|generate|run|execute|test|summari[sz]e|analy[sz]e|audit|review|investigate|debug|trace|lint|format)\b/i; + if (imperative.test(s)) return false; + + // Greetings / short chat openers. Pulled out because they're common REPL + // starters and don't match any interrogative pattern. + const greeting = + /^(hi|hello|hey|yo|sup|howdy|greetings|good (morning|afternoon|evening|night)|thanks?|thx|ty|ok|okay|cool|nice|great|sure|yep|yeah|yes|no|nope|bye|goodbye|cheers)\b[!?. ]*$/i; + if (greeting.test(s)) return true; + + // Question shape: starts with an interrogative or ends with '?'. + const interrogative = + /^(what|why|how|when|where|who|which|is|are|can|could|should|would|will|does|do|did|explain|compare|contrast|tell me|describe (?!src)|define|difference between)\b/i; + if (interrogative.test(s)) return true; + if (s.endsWith('?')) return true; + + // Short, non-imperative, no-repo-ref text (<=5 words, <=40 chars) is + // almost always small talk or a clarifying remark rather than a coding + // task. Takes the fast-path. + const wordCount = s.split(/\s+/).filter(Boolean).length; + if (s.length <= 40 && wordCount <= 5) return true; + + return false; +}; + interface KeywordRule { re: RegExp; type: TaskType; @@ -26,7 +80,7 @@ const RULES: KeywordRule[] = [ weight: 3, }, { - re: /\b(explain|understand|analy[sz]e|describe|what does|why does)\b/i, + re: /\b(explain|understand|analy[sz]e|describe|summari[sz]e|summary|audit|review|what does|why does|how does)\b/i, type: 'analysis', weight: 2, }, diff --git a/src/cli/banners.ts b/src/cli/banners.ts index b07d6f4..01815eb 100644 --- a/src/cli/banners.ts +++ b/src/cli/banners.ts @@ -139,13 +139,34 @@ export const section = (title: string, glyph = '◆'): string => { return `\n${chalk.rgb(...PALETTE.cyan)(glyph)} ${chalk.bold(title)}\n`; }; +// Inner width between │'s: 46 cols. Title rows reserve ` ` (3) + `✔ ` (3) +// + ` ` (2) of chrome = 8, leaving 38 for the title. Body rows reserve +// ` ` (3) + ` ` (2) = 5, leaving 41 for the body. +const BOX_TITLE_PAD = 38; +const BOX_BODY_PAD = 41; + +// Strip ANSI SGR escapes so padding math matches *display* width, not raw +// byte length. Callers routinely pre-style strings with chalk (e.g. doctor +// passes `chalk.dim('try: forge run …')` to success()) and those invisible +// `\x1b[…m` bytes would otherwise count toward .padEnd and push the trailing +// │ out of alignment. +// eslint-disable-next-line no-control-regex -- CSI SGR pattern; controls are the whole point +const ANSI_RE = /\x1b\[[0-9;]*m/g; +const padVisible = (s: string, width: number): string => { + const visibleLen = s.replace(ANSI_RE, '').length; + const deficit = width - visibleLen; + return deficit > 0 ? s + ' '.repeat(deficit) : s; +}; + /** Big celebratory success frame. */ export const success = (title: string, body?: string[]): string => { const lines = [ chalk.green(' ╭──────────────────────────────────────────────╮'), - chalk.green(' │ ') + chalk.bold.green('✔ ' + title.padEnd(40)) + chalk.green(' │'), + chalk.green(' │ ') + + chalk.bold.green('✔ ' + padVisible(title, BOX_TITLE_PAD)) + + chalk.green(' │'), ...(body ?? []).map( - (b) => chalk.green(' │ ') + chalk.dim(b.padEnd(40)) + chalk.green(' │'), + (b) => chalk.green(' │ ') + chalk.dim(padVisible(b, BOX_BODY_PAD)) + chalk.green(' │'), ), chalk.green(' ╰──────────────────────────────────────────────╯'), ]; @@ -156,8 +177,12 @@ export const success = (title: string, body?: string[]): string => { export const failure = (title: string, body?: string[]): string => { const lines = [ chalk.red(' ╭──────────────────────────────────────────────╮'), - chalk.red(' │ ') + chalk.bold.red('✖ ' + title.padEnd(40)) + chalk.red(' │'), - ...(body ?? []).map((b) => chalk.red(' │ ') + chalk.dim(b.padEnd(40)) + chalk.red(' │')), + chalk.red(' │ ') + + chalk.bold.red('✖ ' + padVisible(title, BOX_TITLE_PAD)) + + chalk.red(' │'), + ...(body ?? []).map( + (b) => chalk.red(' │ ') + chalk.dim(padVisible(b, BOX_BODY_PAD)) + chalk.red(' │'), + ), chalk.red(' ╰──────────────────────────────────────────────╯'), ]; return lines.join('\n'); @@ -167,9 +192,11 @@ export const failure = (title: string, body?: string[]): string => { export const attention = (title: string, body?: string[]): string => { const lines = [ chalk.yellow(' ╭──────────────────────────────────────────────╮'), - chalk.yellow(' │ ') + chalk.bold.yellow('⚠ ' + title.padEnd(40)) + chalk.yellow(' │'), + chalk.yellow(' │ ') + + chalk.bold.yellow('⚠ ' + padVisible(title, BOX_TITLE_PAD)) + + chalk.yellow(' │'), ...(body ?? []).map( - (b) => chalk.yellow(' │ ') + chalk.dim(b.padEnd(40)) + chalk.yellow(' │'), + (b) => chalk.yellow(' │ ') + chalk.dim(padVisible(b, BOX_BODY_PAD)) + chalk.yellow(' │'), ), chalk.yellow(' ╰──────────────────────────────────────────────╯'), ]; @@ -228,24 +255,30 @@ export const tag = (severity: 'info' | 'warning' | 'error' | 'critical'): string /** Completion summary, used by `run` at the end of a task. * The `title` may contain markdown produced by the reviewer/model — * render it through the terminal markdown renderer so headings, code, - * and bold render instead of showing literal asterisks/backticks. */ + * and bold render instead of showing literal asterisks/backticks. + * + * When `skipTitle` is true, the caller has already streamed the answer to + * the terminal live (see progress.ts) and we don't want to repeat it. In + * that case we emit just the divider + metadata. */ export const completionSummary = ( title: string, filesChanged: string[], durationMs: number, cost?: number, + skipTitle = false, ): string => { // Single-line summaries get inline markdown; multi-line ones get the // full block renderer so lists / headings / code blocks breathe. const isMultiline = title.includes('\n'); - const rendered = isMultiline - ? renderMarkdown(title, { indent: 2 }) - : ` ${sparkles()} ${renderMarkdown(title, { oneLine: false })}`; + const rendered = skipTitle + ? '' + : isMultiline + ? renderMarkdown(title, { indent: 2 }) + : ` ${sparkles()} ${renderMarkdown(title, { oneLine: false })}`; const lines = [ divider('done'), '', - rendered, - '', + ...(rendered ? [rendered, ''] : []), kv('duration', chalk.rgb(...PALETTE.cyan)(`${(durationMs / 1000).toFixed(1)}s`)), kv( 'files changed', diff --git a/src/cli/choose.ts b/src/cli/choose.ts new file mode 100644 index 0000000..859bd21 --- /dev/null +++ b/src/cli/choose.ts @@ -0,0 +1,238 @@ +/** + * Interactive numbered-select prompt. + * + * Users can pick a choice by either: + * - Pressing the digit that matches the item (1, 2, 3, …) for instant pick. + * - Using ↑/↓ (or j/k) to highlight and Enter to confirm. + * + * Why not just use `prompts` select? `prompts` supports arrow-nav but has no + * digit hotkey path, and we want the common case (3–4 choices) to be a + * single keystroke. Raw-mode stdin gives us that without a new dep. + * + * Falls back to `prompts` when stdin/stdout isn't a TTY (CI, pipes), so + * scripts and test harnesses continue to work unchanged. + * + * @author Son Nguyen + */ + +import prompts from 'prompts'; +import chalk from 'chalk'; +import { PALETTE } from './banners'; + +export interface NumberedChoice { + title: string; + value: T; + /** Optional trailing hint shown in dim color next to the title. */ + hint?: string; + /** Optional color override for the title. */ + color?: 'green' | 'red' | 'yellow' | 'cyan' | 'default'; +} + +export interface NumberedSelectOptions { + message: string; + choices: NumberedChoice[]; + /** Default-highlighted index (0-based). */ + initial?: number; + /** Emitted under the prompt; defaults to "press 1–N or ↑↓ + Enter". */ + hint?: string; +} + +const cyan = chalk.rgb(...PALETTE.cyan); +const dim = chalk.rgb(...PALETTE.muted); +const green = chalk.rgb(...PALETTE.green); + +const colorFor = (c: NumberedChoice['color']): ((s: string) => string) => { + switch (c) { + case 'green': + return chalk.green; + case 'red': + return chalk.red; + case 'yellow': + return chalk.yellow; + case 'cyan': + return cyan; + default: + return (s: string) => s; + } +}; + +export const chooseNumbered = async (opts: NumberedSelectOptions): Promise => { + const n = opts.choices.length; + if (n === 0) return undefined; + if (n > 9) { + // Digit shortcuts only cover 1–9. For longer lists, fall through to the + // arrow-only experience via the `prompts` library (still usable, just no + // hotkeys). In practice Forge never shows more than 4 options. + const resp = await prompts({ + type: 'select', + name: 'value', + message: opts.message, + choices: opts.choices.map((c) => ({ + title: c.hint ? `${c.title} ${dim(c.hint)}` : c.title, + value: c.value, + })), + initial: opts.initial ?? 0, + }); + return resp?.value as T | undefined; + } + + const tty = Boolean(process.stdin.isTTY && process.stdout.isTTY); + if (!tty) { + const resp = await prompts({ + type: 'select', + name: 'value', + message: opts.message, + choices: opts.choices.map((c, i) => ({ + title: `${i + 1}. ${c.title}${c.hint ? ' ' + dim(c.hint) : ''}`, + value: c.value, + })), + initial: opts.initial ?? 0, + }); + return resp?.value as T | undefined; + } + + return runRawSelect(opts); +}; + +const runRawSelect = (opts: NumberedSelectOptions): Promise => { + const choices = opts.choices; + const n = choices.length; + let idx = Math.max(0, Math.min(n - 1, opts.initial ?? 0)); + + const out = process.stdout; + // Number of lines we've painted since the prompt header; tracked so we + // can clear exactly that many on each re-render / exit without nuking + // unrelated terminal output above. + let linesDrawn = 0; + + const hintText = opts.hint ?? `press ${cyan('1')}–${cyan(String(n))} or ${cyan('↑↓ Enter')}`; + + const hideCursor = (): void => { + out.write('\x1b[?25l'); + }; + const showCursor = (): void => { + out.write('\x1b[?25h'); + }; + + const clear = (): void => { + if (linesDrawn === 0) return; + // Move cursor up `linesDrawn` lines, clear each one top-down. + out.write(`\x1b[${linesDrawn}A`); + for (let i = 0; i < linesDrawn; i++) { + out.write('\x1b[2K'); // clear entire line + if (i < linesDrawn - 1) out.write('\x1b[1B'); // move down + } + out.write(`\x1b[${linesDrawn - 1}A`); // back to top + out.write('\r'); + linesDrawn = 0; + }; + + const render = (): void => { + clear(); + const lines: string[] = []; + lines.push(`${green('?')} ${chalk.bold(opts.message)} ${dim(hintText)}`); + for (let i = 0; i < n; i++) { + const c = choices[i]; + const color = colorFor(c.color); + const num = cyan(`${i + 1}.`); + const title = color(c.title); + const hint = c.hint ? ` ${dim(c.hint)}` : ''; + if (i === idx) { + lines.push(` ${cyan('▸')} ${num} ${chalk.bold(title)}${hint}`); + } else { + lines.push(` ${num} ${title}${hint}`); + } + } + out.write(lines.join('\n') + '\n'); + linesDrawn = lines.length; + }; + + return new Promise((resolve) => { + const stdin = process.stdin; + let finished = false; + + const finish = (value: T | undefined, picked?: number): void => { + if (finished) return; + finished = true; + stdin.removeListener('data', onData); + // Do NOT reset raw mode or pause stdin here. When called from the + // REPL (via task-approval / permission prompts), the line editor + // owns stdin and expects raw mode + a resumed stream. If we toggle + // those off, the next readline.emitKeypressEvents tick gets nothing + // and the REPL silently dies (Node exits once stdin is paused and + // no other handles keep the loop alive). Let the outer owner manage + // global stdin state; we only unregister our own data listener. + // Replace the prompt with a compact "answered" summary line so the + // scrollback stays tidy. + clear(); + if (picked != null && picked >= 0 && picked < n) { + const c = choices[picked]; + const color = colorFor(c.color); + out.write(`${green('✔')} ${chalk.bold(opts.message)} ${color(c.title)}\n`); + } else { + out.write(`${dim('·')} ${chalk.bold(opts.message)} ${dim('(cancelled)')}\n`); + } + showCursor(); + resolve(value); + }; + + const onData = (chunk: Buffer): void => { + const key = chunk.toString('utf8'); + // digit 1..9 + if (/^[1-9]$/.test(key)) { + const pick = parseInt(key, 10) - 1; + if (pick < n) { + finish(choices[pick].value, pick); + } + return; + } + // Enter + if (key === '\r' || key === '\n') { + finish(choices[idx].value, idx); + return; + } + // Ctrl-C or Esc → cancel + if (key === '\x03' || key === '\x1b') { + finish(undefined); + return; + } + // Arrow up / k + if (key === '\x1b[A' || key === 'k') { + idx = (idx - 1 + n) % n; + render(); + return; + } + // Arrow down / j + if (key === '\x1b[B' || key === 'j') { + idx = (idx + 1) % n; + render(); + return; + } + // Home / 'g' + if (key === '\x1b[H' || key === 'g') { + idx = 0; + render(); + return; + } + // End / 'G' + if (key === '\x1b[F' || key === 'G') { + idx = n - 1; + render(); + return; + } + }; + + try { + stdin.setRawMode(true); + } catch { + // Non-TTY fell through earlier but be defensive: if raw mode fails, + // fall back to the prompts() path synchronously inside the Promise. + resolve(undefined); + return; + } + stdin.resume(); + stdin.on('data', onData); + hideCursor(); + render(); + }); +}; diff --git a/src/cli/commands/run.ts b/src/cli/commands/run.ts index 4486d5c..6591795 100644 --- a/src/cli/commands/run.ts +++ b/src/cli/commands/run.ts @@ -25,6 +25,7 @@ import { } from '../ui'; import chalk from 'chalk'; import { bootstrap } from '../bootstrap'; +import { startProgress } from '../progress'; const runOptions = (cmd: Command) => cmd @@ -85,13 +86,19 @@ runCommand.action(async (promptParts: string[], opts) => { nonInteractive: Boolean(opts.nonInteractive), }; - const result = await orchestrateRun({ - input: promptText, - mode, - autoApprove: Boolean(opts.yes), - planOnly: Boolean(opts.planOnly) || mode === 'plan', - flags, - }); + const progress = opts.trace ? null : startProgress({ initial: 'classifying request' }); + let result; + try { + result = await orchestrateRun({ + input: promptText, + mode, + autoApprove: Boolean(opts.yes), + planOnly: Boolean(opts.planOnly) || mode === 'plan', + flags, + }); + } finally { + progress?.stop(); + } if (result.result.success) { process.stdout.write( @@ -101,6 +108,7 @@ runCommand.action(async (promptParts: string[], opts) => { result.result.filesChanged, result.result.durationMs, result.result.costUsd, + progress?.didStream() === true, ), ); ok(`Task complete.`); diff --git a/src/cli/markdown.ts b/src/cli/markdown.ts index d38623c..b4787f2 100644 --- a/src/cli/markdown.ts +++ b/src/cli/markdown.ts @@ -23,10 +23,90 @@ interface RenderOptions { indent?: number; } +/** + * Normalise fenced code blocks that arrived flattened on a single line. + * + * Small local models occasionally emit code blocks like: + * + * ```javascript const numbers = [1,2,3]; numbers.forEach(x=>x); ``` + * + * CommonMark requires ``` + language tag alone on a line, so the block + * parser rejects this and falls through to inline-code rendering — which + * in turn treats consecutive backticks as empty code spans, producing + * ugly `` `` javascript … `` `` output in the terminal. + * + * This pre-pass rewrites any inline fence (opening + body + closing all on + * the same physical line) into canonical multi-line form so the block + * parser picks it up as a real fenced block. Leaves well-formed blocks + * (already multi-line) untouched. + */ +/** + * Renumber ordered lists that LLMs emit as "1. … 1. … 1. …". + * + * CommonMark's spec says all ordered-list markers can literally be `1.` and + * a compliant renderer auto-numbers sequentially. Models rely on that and + * constantly emit repeated `1.`. Our renderer's own ordered-list handler + * does renumber within a contiguous run — but when the list has nested + * content (a bullet sub-list, an indented code block), the run ends and + * the next `1.` starts a fresh list with n=1. Result: "1. Foo → bullets → + * 1. Bar → bullets → 1. Baz" renders as three separate "1.". + * + * This pre-pass walks line-by-line, keeps a counter per indent level, and + * rewrites `1.` markers when we're past the first item at that indent. It + * deliberately leaves sources that *already* use sequential numbering + * (`1.`, `2.`, `3.`) alone — only the literal-`1.`-for-every-item pattern + * gets rewritten. Counters reset on headings and fenced code blocks + * (genuine section boundaries). + */ +const renumberOrderedLists = (input: string): string => { + const lines = input.split('\n'); + const counters = new Map(); + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + // Section break: reset all counters so subsequent ordered lists start + // numbering from whatever the source says. + if (/^\s*(?:#{1,6}\s|```+|~~~+)/.test(line)) { + counters.clear(); + continue; + } + const m = /^(\s*)(\d+)\.\s(.*)$/.exec(line); + if (!m) continue; + const indent = m[1].length; + const originalNum = parseInt(m[2], 10); + const body = m[3]; + const prev = counters.get(indent) ?? 0; + if (originalNum === 1 && prev >= 1) { + // Model wrote `1.` where we're already past item 1 at this indent. + // Renumber to maintain the visible sequence. + const next = prev + 1; + counters.set(indent, next); + lines[i] = `${m[1]}${next}. ${body}`; + } else { + // Trust the source number (could be 2. 3. 4. …, or a fresh 1. that + // starts a new top-level list after the counter was empty). + counters.set(indent, originalNum); + } + } + return lines.join('\n'); +}; + +const normaliseInlineFences = (input: string): string => { + // Matches the whole inline block: opening fence with optional language, + // then anything (non-greedy) up to the closing fence, ALL on one physical + // line (no literal newline in the match). + // eslint-disable-next-line no-useless-escape -- escape kept for readability + const re = /```([\w-]*)[ \t]+([^\n]*?)[ \t]*```/g; + return input.replace(re, (_match, lang: string, body: string) => { + // Preserve the body as-is so spaces inside string literals survive. + const trimmed = body.replace(/\s+$/, ''); + return `\n\`\`\`${lang}\n${trimmed}\n\`\`\`\n`; + }); +}; + /** Render CommonMark-ish text to ANSI-coloured output. */ export const renderMarkdown = (input: string, opts: RenderOptions = {}): string => { if (!input) return ''; - const rendered = renderBlocks(input); + const rendered = renderBlocks(renumberOrderedLists(normaliseInlineFences(input))); if (opts.oneLine) { return stripAnsi(rendered).replace(/\s+/g, ' ').trim(); } @@ -52,14 +132,29 @@ const renderBlocks = (text: string): string => { while (i < lines.length) { const line = lines[i]; - // Fenced code block (``` or ~~~, optional language tag) - const fenceMatch = /^```+\s*([\w-]*)\s*$|^~~~+\s*([\w-]*)\s*$/.exec(line); + // Fenced code block (``` or ~~~, optional language tag). + // + // LLMs routinely nest code blocks inside bullet / numbered lists, which + // means the fence arrives with 2–4 spaces of leading indent. CommonMark + // officially allows up to 3 spaces, and in practice we see up to 6 from + // small models. Be liberal: strip any leading whitespace before matching. + // The closing fence must be on its own line too but may have the same + // (or matching) leading indent — we strip to compare. + const fenceMatch = /^\s*```+\s*([\w-]*)\s*$|^\s*~~~+\s*([\w-]*)\s*$/.exec(line); if (fenceMatch) { const lang = fenceMatch[1] || fenceMatch[2] || ''; + // Remember how much the opener was indented so the body can strip the + // same prefix — otherwise the rendered code block inherits the list's + // indentation as trailing-whitespace inside each code line. + const openerIndent = (line.match(/^\s*/)?.[0] ?? '').length; const codeLines: string[] = []; i++; - while (i < lines.length && !/^(?:```+|~~~+)\s*$/.test(lines[i])) { - codeLines.push(lines[i]); + while (i < lines.length && !/^\s*(?:```+|~~~+)\s*$/.test(lines[i])) { + // Strip up to `openerIndent` leading spaces so "- foo\n ```js\n x" + // renders as just `x`, not ` x`. + const raw = lines[i]; + const prefixLen = Math.min(openerIndent, (raw.match(/^[ \t]*/)?.[0] ?? '').length); + codeLines.push(raw.slice(prefixLen)); i++; } if (i < lines.length) i++; // consume closing fence @@ -97,13 +192,18 @@ const renderBlocks = (text: string): string => { continue; } - // Ordered list + // Ordered list — honour the actual source number on each line rather + // than rerolling from 1 per run. `renumberOrderedLists` has already + // rewritten LLM-style "1. 1. 1." input into proper sequential numbers, + // so trusting the source here renders them correctly across runs that + // get broken by sub-lists or paragraphs. if (/^\s*\d+\.\s+/.test(line)) { - let n = 1; - while (i < lines.length && /^\s*\d+\.\s+/.test(lines[i])) { + while (i < lines.length) { + const m = /^\s*(\d+)\.\s+/.exec(lines[i]); + if (!m) break; + const n = parseInt(m[1], 10); const item = lines[i].replace(/^\s*\d+\.\s+/, ''); out.push(' ' + chalk.cyan(`${n}.`) + ' ' + formatInline(item)); - n++; i++; } continue; @@ -138,7 +238,9 @@ const renderBlocks = (text: string): string => { /** True if a line opens a new block (so we stop collecting paragraph text). */ const isBlockBoundary = (line: string): boolean => { if (!line.trim()) return true; - if (/^```+|^~~~+/.test(line)) return true; + // Fences may be indented (inside list items, blockquotes); match the same + // permissive rule the block-fence handler uses. + if (/^\s*(?:```+|~~~+)/.test(line)) return true; if (/^#{1,6}\s/.test(line)) return true; if (/^\s*>/.test(line)) return true; if (/^\s*[-*+]\s+/.test(line)) return true; diff --git a/src/cli/progress.ts b/src/cli/progress.ts new file mode 100644 index 0000000..79c529c --- /dev/null +++ b/src/cli/progress.ts @@ -0,0 +1,345 @@ +/** + * Shared progress display for any CLI surface that drives `orchestrateRun` + * (the `forge run` one-shot, the REPL turn loop). Subscribes to the + * in-process event bus and reflects loop phases + streaming model output on + * a single spinner line so the terminal never looks frozen. + * + * Behavior: + * - Spinner text tracks the current phase (classify → plan → approve → + * execute → verify) via TASK_* events. + * - On `TASK_PLANNED`, the spinner pauses so the approval prompt can render + * cleanly. It resumes on `TASK_APPROVED`. + * - Streaming model deltas print inline under the spinner in a dimmed + * "thinking" rail; the spinner remains visible below them. The rail is + * cleared between phases. + * + * Keep this module framework-free — it's imported by repl.ts on every turn + * and by run.ts once per invocation. + * + * @author Son Nguyen + */ + +import chalk from 'chalk'; +import { spinner as makeSpinner } from './ui'; +import { PALETTE } from './banners'; +import { renderMarkdown } from './markdown'; +import { eventBus, ModelDeltaEvent } from '../persistence/events'; +import type { ForgeEvent } from '../types'; +import type { Ora } from 'ora'; + +export interface ProgressHandle { + stop(): void; + /** Pause the spinner without unsubscribing — used to surface a prompt. */ + pause(): void; + resume(text?: string): void; + /** + * True iff at least one streaming model delta was rendered. Callers use + * this to decide whether the completion summary should repeat the task + * result text (no — already on screen) or just show metadata. + */ + didStream(): boolean; +} + +const phaseText = (e: ForgeEvent): string | null => { + switch (e.type) { + case 'TASK_CREATED': + return 'classifying request'; + case 'TASK_CLASSIFIED': + return 'planning'; + case 'TASK_PLANNED': + return 'plan ready'; + case 'TASK_APPROVED': + return 'executing'; + case 'TASK_SCHEDULED': + return 'scheduling'; + case 'TASK_STARTED': + return 'running'; + case 'TASK_STEP_STARTED': + return e.message.replace(/^→\s*/, ''); + case 'TASK_STEP_COMPLETED': + return 'step complete'; + case 'TASK_VERIFYING': + return 'verifying'; + case 'MODEL_WARMING': { + const p = (e.payload as { provider?: string; model?: string } | undefined) ?? {}; + return p.model + ? `warming ${p.model} · loading into memory (first call, ~30–60s)` + : 'warming model · loading into memory (first call)'; + } + case 'MODEL_WARMED': { + const p = (e.payload as { model?: string; durationMs?: number } | undefined) ?? {}; + if (p.durationMs != null && p.model) { + return `warmed ${p.model} in ${(p.durationMs / 1000).toFixed(1)}s`; + } + return 'model ready'; + } + case 'MODEL_CALLED': { + const p = (e.payload as { provider?: string; model?: string } | undefined) ?? {}; + return p.model ? `thinking · ${p.model}` : 'thinking'; + } + case 'TOOL_CALLED': + return e.message || 'running tool'; + case 'TOOL_COMPLETED': + return e.message || 'tool complete'; + default: + return null; + } +}; + +export interface ProgressOptions { + /** Print streaming deltas inline under the spinner. Default: true. */ + showDeltas?: boolean; + /** Initial spinner text. */ + initial?: string; + /** Task id to filter deltas by; if unset, shows all deltas in-process. */ + taskId?: string; +} + +export const startProgress = (opts: ProgressOptions = {}): ProgressHandle => { + const showDeltas = opts.showDeltas !== false; + const spinner = makeSpinner(opts.initial ?? 'working').start(); + // Streaming rail: accumulate chars on the current "thinking" line; when the + // spinner advances to a new phase we drop the rail so the terminal doesn't + // keep a half-formed paragraph around. + // Streaming prose, two-pass: + // Pass 1 (live): tokens get written to stdout immediately with a fixed + // 2-space indent and soft-wrap at the terminal width. The user sees + // characters appear as the model emits them — no perceptible batching. + // Pass 2 (finalize): when a paragraph boundary (\n\n outside an open + // fenced ``` block) is crossed, we rewind the cursor over exactly the + // terminal rows we painted, clear them, and rewrite that block through + // renderMarkdown. Same region, now with headings/lists/code styled. + // + // Consequences we care about: + // - Zero output duplication (we replace, we don't append). + // - Works per-block — a 2000-token answer re-renders paragraph by + // paragraph as each one completes, never all-at-once at the end. + // - Only the currently-unfinished block is rewritable. Finalized blocks + // above remain untouched even if the answer keeps growing. + // - `didStream()` lets callers skip the duplicate completion summary. + let streamBuffer = ''; + let streamedSomething = false; + // Cursor bookkeeping for the currently-in-flight block. rawLines is the + // number of `\n` we've emitted since the block started (= rows above the + // current cursor row). currentCol is the column of the cursor on the row + // we're currently painting. Updated on every write so we can rewind + // precisely when the block finalizes. + let rawLines = 0; + let currentCol = 0; + const BLOCK_INDENT = ' '; + const termWidth = (): number => Math.max(20, process.stdout.columns ?? 100); + const closeRail = (): void => { + // Phase-break separator so the next spinner line doesn't crash into + // the just-rendered text. Only relevant after a fully-flushed block. + if (streamedSomething && streamBuffer.length === 0) { + process.stdout.write('\n'); + } + }; + const dim = chalk.rgb(...PALETTE.muted); + + const accent = chalk.rgb(...PALETTE.cyan); + + // Print a step's completion summary above the spinner. The executor uses + // jsonMode (so streaming is off), which means the *only* user-visible proof + // that the model did something useful is the `summary` field it returns per + // step. Dump it to stdout as the step finishes so the user watches work + // accumulate in real time instead of staring at a bare spinner. + const printStepSummary = (stepId: string, summary: string): void => { + pauseSpinner(spinner); + const body = summary.trim(); + if (!body) return; + process.stdout.write(`\n ${accent('◇')} ${chalk.bold(stepId)} ${dim('·')} ${body}\n`); + }; + + const onEvent = (e: ForgeEvent): void => { + if (opts.taskId && e.taskId && e.taskId !== opts.taskId) return; + // TASK_STEP_COMPLETED carries the model's per-step prose in the payload. + // Render it inline so the user sees the work being produced — otherwise + // a task like "summarize X" looks like it output nothing until the final + // completion box. + if (e.type === 'TASK_STEP_COMPLETED') { + const payload = (e.payload as { summary?: string } | undefined) ?? {}; + const stepId = (e.message || '').replace(/^✔\s*/, '') || 'step'; + if (payload.summary && payload.summary.trim().length > 0) { + printStepSummary(stepId, payload.summary); + } + // Fall through to also update spinner text below. + } + const text = phaseText(e); + if (!text) return; + closeRail(); + if (e.type === 'TASK_PLANNED') { + // About to hit the approval prompt. Yield the terminal. + try { + spinner.stopAndPersist({ symbol: '◆', text: chalk.dim('plan ready · awaiting approval') }); + } catch { + spinner.stop(); + } + return; + } + try { + spinner.text = text; + if (!spinner.isSpinning) spinner.start(); + } catch { + // ora can throw if the terminal was torn down; ignore. + } + }; + + // True if `text` currently has an unclosed ``` fence — we must NOT try to + // render a partial code block as markdown, it'll mangle. Counts triple + // backticks at line starts; odd count = open fence. + const hasOpenFence = (text: string): boolean => { + let count = 0; + for (const line of text.split('\n')) { + if (/^\s*```/.test(line)) count++; + } + return count % 2 !== 0; + }; + + // Write raw tokens live with 2-space indent and soft-wrap. We keep + // `rawLines` (= newlines emitted since block start) and `currentCol` + // (visible column on the current line) in sync with what we painted, so + // `clearRawBlock` can rewind precisely. + const writeRaw = (text: string): void => { + if (!text) return; + if (!streamedSomething) { + pauseSpinner(spinner); + // Separator from any preceding spinner/step-summary output. + process.stdout.write('\n'); + streamedSomething = true; + } + const width = termWidth(); + for (const ch of text) { + if (ch === '\n') { + process.stdout.write('\n'); + rawLines++; + currentCol = 0; + continue; + } + if (currentCol === 0) { + process.stdout.write(BLOCK_INDENT); + currentCol = BLOCK_INDENT.length; + } + process.stdout.write(ch); + currentCol++; + // Soft-wrap one column early so the terminal's own auto-wrap doesn't + // advance the cursor without us noticing — our rewind math depends on + // every row transition going through our counter. + if (currentCol >= width) { + process.stdout.write('\n'); + rawLines++; + currentCol = 0; + } + } + }; + + // Rewind the cursor to the start of the current block and erase down to + // the end of the screen. Used right before replacing the raw paint with + // the markdown-rendered version. + const clearRawBlock = (): void => { + // Move to the start of the current line. + process.stdout.write('\r'); + // If we're below the block's first line, walk back up. + if (rawLines > 0) { + process.stdout.write(`\x1b[${rawLines}A`); + } + // Erase from cursor to end of screen — wipes the raw-painted region. + process.stdout.write('\x1b[J'); + rawLines = 0; + currentCol = 0; + }; + + // Finalize a completed block: rewind over its raw paint and write the + // markdown-rendered version in the same region. Trailing `\n\n` gives + // paragraph spacing before the next (possibly already in-progress) block. + const finalizeBlock = (content: string): void => { + if (!content.trim()) { + // Still clear whatever we painted so we don't leave orphaned text. + clearRawBlock(); + return; + } + clearRawBlock(); + const rendered = renderMarkdown(content, { indent: 2 }).replace(/\s+$/, ''); + if (rendered) process.stdout.write(rendered + '\n\n'); + }; + + const flushAll = (): void => { + if (!streamBuffer.trim()) { + streamBuffer = ''; + return; + } + finalizeBlock(streamBuffer); + streamBuffer = ''; + }; + + const onDelta = (d: ModelDeltaEvent): void => { + if (!showDeltas) return; + if (opts.taskId && d.taskId && d.taskId !== opts.taskId) return; + if (d.done) { + flushAll(); + return; + } + if (!d.text) return; + // Live paint first — this is what makes streaming feel responsive. + writeRaw(d.text); + streamBuffer += d.text; + // Then opportunistically finalize the prefix up to the last paragraph + // break outside an open fence. This replaces the raw paint of that + // prefix with its markdown-rendered form, and continues painting any + // bytes past the break as the start of the next block. + if (!streamBuffer.includes('\n\n')) return; + let splitAt = streamBuffer.lastIndexOf('\n\n'); + while (splitAt > 0 && hasOpenFence(streamBuffer.slice(0, splitAt))) { + splitAt = streamBuffer.lastIndexOf('\n\n', splitAt - 1); + } + if (splitAt <= 0) return; + const ready = streamBuffer.slice(0, splitAt); + const rest = streamBuffer.slice(splitAt + 2); + // We painted the entire `ready + \n\n + rest` region as raw. Clear it + // all, write the rendered version, then re-paint the `rest` as raw so + // streaming continues from the right visible position. + finalizeBlock(ready); + streamBuffer = rest; + if (rest) writeRaw(rest); + }; + + eventBus.on('event', onEvent); + eventBus.on('delta', onDelta); + + return { + stop(): void { + eventBus.off('event', onEvent); + eventBus.off('delta', onDelta); + // Flush anything left over so we never drop a partial answer. + flushAll(); + try { + spinner.stop(); + } catch { + // ignore + } + }, + pause(): void { + closeRail(); + pauseSpinner(spinner); + }, + resume(text?: string): void { + if (text) spinner.text = text; + try { + if (!spinner.isSpinning) spinner.start(); + } catch { + // ignore + } + }, + didStream(): boolean { + return streamedSomething; + }, + }; +}; + +const pauseSpinner = (s: Ora): void => { + try { + if (s.isSpinning) s.stop(); + } catch { + // ignore + } +}; diff --git a/src/cli/repl-input.ts b/src/cli/repl-input.ts index b787f12..fec5895 100644 --- a/src/cli/repl-input.ts +++ b/src/cli/repl-input.ts @@ -85,6 +85,12 @@ export class LineEditor { private resolveDone?: () => void; private readonly hooks: LineEditorHooks; + // Number of rows currently painted ABOVE the prompt row for the slash- + // command dropdown. Tracked precisely so per-keystroke redraws can rewind + // exactly that many rows, clear from there to end-of-screen, and repaint + // — no guessing, no save/restore cursor escapes, no drift. + private dropdownRowsAbove = 0; + // Kill ring — last text removed by Ctrl+U / Ctrl+K / Ctrl+W / Alt+Backspace. // Retrieved via Ctrl+Y (yank). Single-slot for simplicity; good enough. private killRing = ''; @@ -491,16 +497,25 @@ export class LineEditor { private async submit(): Promise { const raw = this.buf; let picked: Suggestion | undefined; - // If the input starts with a slash and there are suggestions, apply the - // highest-ranked match when the literal input doesn't exactly equal any - // command. + // Slash-command dispatch at submit. Two cases the user might want: + // 1. They navigated the dropdown with ↑/↓ and pressed Enter — honor + // that selection even if the literal buffer would exactly-match a + // different command. + // 2. They just typed a full command and pressed Enter with no + // navigation — fall through to the literal text (no rewrite). + // The earlier version always picked `lastSuggestions[0]` and ignored + // `this.sel`, so arrow-key selection never applied. if (raw.startsWith('/') && this.lastSuggestions.length) { - const first = this.lastSuggestions[0]; const head = raw.slice(1).split(/\s+/, 1)[0] ?? ''; const exact = this.lastSuggestions.find((s) => s.value.slice(1) === head); - if (!exact) { - // rewrite buf so display + handler see the normalised command - picked = first; + // User-navigated selection beats top-of-list. If they didn't move + // the cursor (`this.sel === 0`) and the buffer already exactly-matches + // a command, leave the buffer alone — they typed what they meant. + const userNavigated = this.sel > 0; + if (userNavigated) { + picked = this.lastSuggestions[this.sel]; + } else if (!exact) { + picked = this.lastSuggestions[0]; } } this.eraseBelowAndPromptRow(); @@ -526,6 +541,21 @@ export class LineEditor { chalk.red(`\nEditor submit error: ${e instanceof Error ? e.message : String(e)}\n`), ); } finally { + // Sub-prompts fired during the task (chooseNumbered for plan approval + // / permission decisions, the ask_user tool) grab stdin and restore it + // to a sensible-for-them state on exit — which is NOT our state. + // Specifically, chooseNumbered calls `stdin.pause()` + `setRawMode(false)` + // at the end. If we just re-render without reacquiring stdin, the REPL + // silently dies: no keypress events flow, no other event-loop work is + // pending, Node exits. Reacquire here so the editor is always ready + // for the next turn. + try { + if (process.stdin.isTTY) process.stdin.setRawMode(true); + process.stdin.resume(); + } catch { + // best-effort; if TTY semantics changed under us, render will still + // paint and the user can at least see state. + } this.blocked = false; } if (!this.done) this.render(true); @@ -689,49 +719,57 @@ export class LineEditor { } private render(initial = false): void { - // Refresh suggestions for current buffer this.lastSuggestions = this.hooks.suggestions(this.buf); if (this.sel >= this.lastSuggestions.length) this.sel = 0; - if (!initial) this.eraseBelowAndPromptRow(); - else { - // First render: just sit at current cursor position, don't erase above - process.stdout.write(esc.clearScreenDown); - } - + // Layout (top → bottom, painted once per render): + // + // ← only on initial / resume / post-submit + // ─┐ + // ├ painted only when buf starts with '/'; tracked + // … │ in this.dropdownRowsAbove so the next render + // ─┘ can rewind precisely. + // ← cursor lives here + // + // Key invariant: the prompt row is always the LAST row of the editor's + // painted region. Nothing is painted below it. Per-keystroke, we rewind + // `dropdownRowsAbove` rows from the prompt row, clear from there to end + // of screen, and repaint dropdown + prompt. The status line is NEVER + // part of the per-keystroke redraw — it stays stable up-scroll. const prompt = this.hooks.prompt(); const promptWidth = visibleWidth(prompt); - // Render newlines inside the input as a visible ↵ glyph so a multi-line - // compose (Alt+Enter) stays on the same visual row. The raw buffer keeps - // the \n for submission; only the display is munged. const NL_GLYPH = chalk.dim('↵ '); const renderBuf = this.buf.replace(/\n/g, NL_GLYPH); const ghost = this.ghostSuffix(); - const inputSegment = renderBuf + (ghost ? chalk.dim(ghost) : ''); - process.stdout.write(prompt + inputSegment); - - // Cursor column = promptWidth + visible chars before cursor. Each \n - // before the cursor becomes 2 visible chars (the ↵ glyph + space). const newlinesBefore = (this.buf.slice(0, this.cursor).match(/\n/g) ?? []).length; const cursorCol = promptWidth + this.cursor + newlinesBefore; + const dropdownRows = this.renderDropdown(); - // Render dropdown + status line below - const belowLines: string[] = []; - const drop = this.renderDropdown(); - if (drop.length) belowLines.push(...drop, ''); - else belowLines.push(''); - belowLines.push(this.hooks.statusLine()); - - let belowCount = 0; - for (const line of belowLines) { - process.stdout.write('\n' + line); - belowCount++; + if (initial) { + // First paint / resume / post-submit: status line, then dropdown (if + // any), then prompt row. All newlines flow downward — no cursor-up + // gymnastics. + process.stdout.write(esc.clearScreenDown); + process.stdout.write(this.hooks.statusLine() + '\n'); + for (const line of dropdownRows) process.stdout.write(line + '\n'); + process.stdout.write(prompt + inputSegment); + process.stdout.write(esc.cursorTo(cursorCol)); + this.dropdownRowsAbove = dropdownRows.length; + return; } - // Move cursor up back to input row, then to correct column - if (belowCount > 0) process.stdout.write(esc.cursorUp(belowCount)); + // Per-keystroke: rewind to the top of (dropdown ∪ prompt) region, clear + // to end of screen, repaint dropdown + prompt. + process.stdout.write('\r'); + if (this.dropdownRowsAbove > 0) { + process.stdout.write(esc.cursorUp(this.dropdownRowsAbove)); + } + process.stdout.write(esc.clearScreenDown); + for (const line of dropdownRows) process.stdout.write(line + '\n'); + process.stdout.write(prompt + inputSegment); process.stdout.write(esc.cursorTo(cursorCol)); + this.dropdownRowsAbove = dropdownRows.length; } private renderDropdown(): string[] { @@ -769,9 +807,7 @@ export class LineEditor { const descText = fit(sg.description ?? '', descBudget); const labelColored = isSel ? chalk.bold.cyan(labelText) : chalk.cyan(labelText); const descColored = chalk.dim(descText); - // Row body, by construction visible width == innerW. const row = ` ${arrow} ${labelColored} ${descColored}`; - // Safety assertion in case label or desc contained stray wide chars const surplus = visibleWidth(row) - innerW; const safeRow = surplus > 0 ? row.slice(0, row.length - surplus) : row; lines.push(pad + chalk.dim('│') + safeRow + chalk.dim('│')); @@ -787,8 +823,14 @@ export class LineEditor { } private eraseBelowAndPromptRow(): void { - // Move to col 0 of input row, wipe it + everything below + // Clear the editor's painted region — dropdown rows above us + the + // prompt row we're on. Status line (even further above) is left intact + // so it stays visible while we echo the submitted line. process.stdout.write(esc.cursorCol0); + if (this.dropdownRowsAbove > 0) { + process.stdout.write(esc.cursorUp(this.dropdownRowsAbove)); + } process.stdout.write(esc.clearScreenDown); + this.dropdownRowsAbove = 0; } } diff --git a/src/cli/repl.ts b/src/cli/repl.ts index ceb42f7..ef173ce 100644 --- a/src/cli/repl.ts +++ b/src/cli/repl.ts @@ -28,7 +28,19 @@ const pkg = require('../../package.json') as { version?: string }; import { Command, CommanderError } from 'commander'; import chalk from 'chalk'; import { PALETTE } from './banners'; -import { ok, err, info, dim, accent, warn } from './ui'; +import { + ok, + err, + info, + dim, + accent, + warn, + divider, + rocket, + completionSummary, + failure, + breadcrumbs, +} from './ui'; import { bootstrap } from './bootstrap'; import { setConsoleOutput } from '../logging/logger'; import { orchestrateRun } from '../core/orchestrator'; @@ -45,7 +57,7 @@ import { rankSlash, } from './repl-commands'; import { listProviders } from '../models/provider'; -import { renderMarkdown } from './markdown'; +import { startProgress } from './progress'; import { Conversation, ConversationTurn, @@ -62,7 +74,7 @@ import { watchConversationFile, } from '../core/conversation'; import { ConversationWatcher } from '../persistence/conversation-store'; -import { checkForUpdate, currentVersion } from '../daemon/updater'; +import { checkForUpdate, currentVersion, readCache } from '../daemon/updater'; // ---------- Types ---------- @@ -98,18 +110,54 @@ const turnsOf = (state: ReplState): ConversationTurn[] => state.conversation.tur const HISTORY_FILE = path.join(FORGE_HOME, 'history'); const HISTORY_MAX = 1000; -const loadHistory = (): string[] => { +const loadHistory = (projectRoot?: string): string[] => { + const seen = new Set(); + const merged: string[] = []; + + // 1) Flat per-FORGE_HOME history file. The canonical source: every submit + // appends to this, lets arrow-up recall across REPL invocations even if + // there's no conversation context. try { - if (!fs.existsSync(HISTORY_FILE)) return []; - const raw = fs.readFileSync(HISTORY_FILE, 'utf8'); - return raw - .split('\n') - .map((s) => s.trim()) - .filter(Boolean) - .slice(-HISTORY_MAX); + if (fs.existsSync(HISTORY_FILE)) { + const raw = fs.readFileSync(HISTORY_FILE, 'utf8'); + for (const line of raw.split('\n')) { + const s = line.trim(); + if (!s || seen.has(s)) continue; + seen.add(s); + merged.push(s); + } + } } catch { - return []; + /* best-effort */ + } + + // 2) Prior conversation turns for THIS project. Without this, a freshly + // provisioned FORGE_HOME (e.g. `rm -rf /tmp/forge-repl`) or a machine + // where the flat history was lost would leave arrow-up empty even + // though the user has perfectly good inputs in conversation history. + // Read-only, oldest-first, deduped against the flat file. + if (projectRoot) { + try { + const convos = listConversations(projectRoot); + // Sort by createdAt ascending so older inputs appear earlier in the + // final history (newer-at-tail matches the flat-file convention). + const ordered = [...convos].sort((a, b) => a.createdAt.localeCompare(b.createdAt)); + for (const meta of ordered) { + const conv = loadConversation(projectRoot, meta.id); + if (!conv) continue; + for (const t of conv.turns) { + const s = (t.input ?? '').trim(); + if (!s || seen.has(s)) continue; + seen.add(s); + merged.push(s); + } + } + } catch { + /* best-effort — if the project subdir is weird, don't fail REPL boot */ + } } + + return merged.slice(-HISTORY_MAX); }; const appendHistory = (line: string): void => { @@ -811,7 +859,18 @@ const runTaskTurn = async ( state.abort = new AbortController(); const composed = composeDescription(effectiveInput, state.conversation.turns.slice(0, -1)); - process.stdout.write('\n'); + // Launch banner — parity with `forge run` so REPL users see the same + // mode/task/phase breadcrumbs. Rendered synchronously (no animation) so it + // doesn't fight the line editor or the progress spinner. + const displayedPrompt = + effectiveInput.length > 100 ? effectiveInput.slice(0, 100) + '…' : effectiveInput; + process.stdout.write('\n' + divider('launching') + '\n\n'); + info(`${rocket()} mode=${accent(turn.mode)}`); + process.stdout.write(` ${chalk.dim('task:')} ${chalk.white(displayedPrompt)}\n`); + process.stdout.write( + ' ' + breadcrumbs(['classify', 'plan', 'approve', 'execute', 'verify'], 0) + '\n\n', + ); + const progress = startProgress({ initial: 'classifying request' }); try { const out = await orchestrateRun({ input: effectiveInput, @@ -847,28 +906,37 @@ const runTaskTurn = async ( } process.stdout.write('\n'); if (r.success) { - const costBit = r.costUsd && r.costUsd > 0 ? chalk.dim(` · $${r.costUsd.toFixed(4)}`) : ''; - ok( - `turn ${turnsOf(state).length} done ${chalk.dim( - `(${((r.durationMs ?? 0) / 1000).toFixed(1)}s · ${r.filesChanged?.length ?? 0} files)`, - )}${costBit}`, + // DONE block — parity with `forge run` so REPL and CLI surfaces emit + // the same divider + metadata + file list. `skipTitle` avoids + // re-rendering the summary when the progress rail already streamed it + // live; otherwise completionSummary renders the markdown itself. + process.stdout.write( + completionSummary( + r.summary ?? '', + r.filesChanged ?? [], + r.durationMs ?? 0, + r.costUsd, + progress.didStream() === true, + ), ); - if (r.summary) { - process.stdout.write('\n' + renderMarkdown(r.summary, { indent: 2 }) + '\n'); - } - if (r.filesChanged?.length) { - process.stdout.write('\n'); - for (const f of r.filesChanged.slice(0, 8)) { - process.stdout.write(` ${chalk.rgb(...PALETTE.teal)('▸')} ${chalk.white(f)}\n`); - } - if (r.filesChanged.length > 8) { - process.stdout.write(chalk.dim(` …+${r.filesChanged.length - 8} more\n`)); - } - } + ok(`turn ${turnsOf(state).length} done.`); } else { - err(`turn ${turnsOf(state).length} failed`); + // Error summaries contain identifiers like `step_001`, `tool_error`, + // `not_found`, file paths — running them through the markdown renderer + // would strip underscores (interpreted as italic markers) and mangle + // paths with `*` in them. Emit the failure frame, then the raw + // dim-wrapped summary, matching `forge run`'s failure path. + process.stdout.write( + '\n' + + failure(`turn ${turnsOf(state).length} failed`, [ + (r.summary ?? '').slice(0, 40), + 'see forge session list for replay', + ]) + + '\n', + ); if (r.summary) { - process.stdout.write('\n' + renderMarkdown(r.summary, { indent: 2 }) + '\n'); + const lines = r.summary.split('\n').map((l) => ' ' + chalk.dim(l)); + process.stdout.write('\n' + lines.join('\n') + '\n'); } } } catch (e) { @@ -893,6 +961,7 @@ const runTaskTurn = async ( } err(`Turn crashed: ${e instanceof Error ? e.message : String(e)}`); } finally { + progress.stop(); state.running = false; state.abort = undefined; } @@ -1009,34 +1078,34 @@ export const startRepl = async ( process.stdout.write(hero(state, pkg.version ?? '0.1.0')); if (conversation.turns.length) printResumedSummary(state); - // Fire-and-forget update check on every REPL start. `shouldCheckNow` in the - // updater rate-limits actual network hits to `cfg.update.checkIntervalHours` - // (default 24h) so this is cheap (cache read) on repeat boots. Print a - // single-line notice when an update is available and the user hasn't - // opted out via `update.notify = false`. - void (async () => { - try { - const res = await checkForUpdate(); - if (!res || !res.hasUpdate) return; - if (!loadGlobalConfig().update.notify) return; - const msg = - ' ' + - chalk.bgRgb(...PALETTE.violet).white(' update ') + - ' ' + - chalk.white(`Forge ${res.latestVersion} available`) + - chalk.dim(` (you're on ${currentVersion()}).`) + - chalk.dim(' Run ') + - chalk.bold('/update') + - chalk.dim(' to install · ') + - chalk.bold('/update ignore ' + res.latestVersion) + - chalk.dim(' to silence.\n'); - process.stdout.write('\n' + msg + '\n'); - } catch { - /* best-effort — never block the REPL */ - } - })(); + // Update check — strictly SYNC, printed before the editor starts. Reads + // the on-disk cache (sub-ms). A fire-and-forget refresh is scheduled so + // the cache is fresh for NEXT boot, but it's run *after* the editor has + // closed (see the end of startRepl) so there is zero possibility of a + // stray stdout write landing behind the line editor's back and desync-ing + // its cursor tracking. An earlier version fired the refresh here with a + // direct stdout.write on resolution — and every keystroke after that point + // repainted on a fresh row. + const cached = readCache(); + if (cached?.hasUpdate && loadGlobalConfig().update.notify) { + const msg = + ' ' + + chalk.bgRgb(...PALETTE.violet).white(' update ') + + ' ' + + chalk.white(`Forge ${cached.latestVersion} available`) + + chalk.dim(` (you're on ${currentVersion()}).`) + + chalk.dim(' Run ') + + chalk.bold('/update') + + chalk.dim(' to install · ') + + chalk.bold('/update ignore ' + cached.latestVersion) + + chalk.dim(' to silence.'); + process.stdout.write('\n' + msg + '\n'); + } - const history = loadHistory(); + // Seed from both the flat history file AND prior conversation turns for + // this project so arrow-up recalls inputs across sessions, not just the + // current one. + const history = loadHistory(projectRoot); let editor: LineEditor | null = null; let lastSigint = 0; @@ -1139,6 +1208,11 @@ export const startRepl = async ( // same process (unusual, but safe to do). setConsoleOutput(true); + // Cache-only update refresh now that the editor is gone. Never writes to + // stdout; result is picked up by the NEXT REPL boot's synchronous cache + // read above. Best-effort — swallows network errors. + void checkForUpdate().catch(() => {}); + if (!closed) process.stdout.write('\n'); process.stdout.write(dim(` session ${state.conversation.meta.id} saved.`) + '\n'); process.stdout.write( diff --git a/src/core/interactive-host.ts b/src/core/interactive-host.ts index e97ce26..bab7088 100644 --- a/src/core/interactive-host.ts +++ b/src/core/interactive-host.ts @@ -19,6 +19,14 @@ export interface InteractiveHost { /** Returns 'approve' (run the plan), 'cancel' (stop the task), 'edit' (open an editor — only CLI implements this). */ confirmPlan(plan: Plan, taskId: string): Promise<'approve' | 'cancel' | 'edit'>; + /** + * Optional host-specific plan editor. When present, the loop calls this + * instead of the default `$EDITOR` flow — the UI uses it to surface an + * inline editor over the WebSocket so users can tweak the JSON without a + * terminal. Returns the edited plan (possibly unchanged). + */ + editPlan?(plan: Plan, taskId: string): Promise; + /** Decide whether a tool action is permitted. Honor flags (allow-files, strict, non-interactive, etc.). */ requestPermission(req: PermissionRequest, flags: PermissionFlags): Promise; diff --git a/src/core/loop.ts b/src/core/loop.ts index f8a8f29..c584dac 100644 --- a/src/core/loop.ts +++ b/src/core/loop.ts @@ -1,5 +1,5 @@ import chalk from 'chalk'; -import prompts from 'prompts'; +import { chooseNumbered } from '../cli/choose'; import { Task, TaskResult, Mode, Plan } from '../types'; import { ForgeRuntimeError } from '../types/errors'; import { saveTask, transitionTask } from '../persistence/tasks'; @@ -10,6 +10,7 @@ import { newRunId, newSessionId } from '../logging/trace'; import { plannerAgent } from '../agents/planner'; import { runStep } from '../agents/executor'; import { reviewOutcome } from '../agents/reviewer'; +import { narrateAnalysis } from '../agents/narrator'; import { diagnose } from '../agents/debugger'; import { topoSort, validatePlan } from '../scheduler/dag'; import { concurrency } from '../scheduler/resource-manager'; @@ -61,18 +62,16 @@ const confirmPlan = async ( const host = currentHost(); if (host) return host.confirmPlan(plan, taskId); printPlanSummary(plan); - const resp = await prompts({ - type: 'select', - name: 'value', + const value = await chooseNumbered<'approve' | 'cancel' | 'edit'>({ message: 'Approve plan?', + initial: 0, choices: [ - { title: chalk.green('Approve'), value: 'approve' }, - { title: 'Cancel', value: 'cancel' }, - { title: 'Edit (opens $EDITOR)', value: 'edit' }, + { title: 'Approve', value: 'approve', color: 'green' }, + { title: 'Cancel', value: 'cancel', color: 'red' }, + { title: 'Edit', value: 'edit', hint: '(opens $EDITOR)' }, ], - initial: 0, }); - return (resp?.value as 'approve' | 'cancel' | 'edit') ?? 'cancel'; + return value ?? 'cancel'; }; const editPlanInEditor = async (plan: Plan): Promise => { @@ -95,6 +94,60 @@ const editPlanInEditor = async (plan: Plan): Promise => { } }; +/** + * Translate a thrown error into a message shaped for end-user display. + * + * The failure paths in the loop surface ForgeRuntimeError with a `class` + * (e.g. `not_found`, `permission_denied`, `retry_exhausted`, `timeout`, + * `user_input`) and sometimes a `recoveryHint`. The raw message is often + * adequate — "oldText not present in src/x.ts", "File src/y.ts: ENOENT" — + * but it's still a runtime-error shape, not a sentence the user writes + * down in a bug report. This adds a single-line prefix keyed off `class` + * and appends the hint when present. + */ +const humaniseFailure = (err: unknown): string => { + const raw = + err instanceof ForgeRuntimeError + ? err.message + : err instanceof Error + ? err.message + : String(err); + const cls = err instanceof ForgeRuntimeError ? err.class : null; + const hint = err instanceof ForgeRuntimeError ? err.recoveryHint : undefined; + const prefixFor = (c: string | null): string => { + switch (c) { + case 'not_found': + return 'Not found'; + case 'permission_denied': + return 'Permission denied'; + case 'timeout': + return 'Timed out'; + case 'retry_exhausted': + return 'Retries exhausted'; + case 'plan_invalid': + return 'Planner produced an invalid plan'; + case 'state_invalid': + return 'Invalid task state transition'; + case 'resource_exhausted': + return 'Resource limit hit'; + case 'user_input': + return 'Invalid input'; + case 'tool_error': + return 'Tool failed'; + case 'model_error': + return 'Model call failed'; + case 'sandbox_violation': + return 'Sandbox policy rejected the action'; + default: + return 'Task failed'; + } + }; + const head = prefixFor(cls); + const body = raw && raw !== head ? `: ${raw}` : ''; + const tail = hint ? `\nHint: ${hint}` : ''; + return `${head}${body}${tail}`; +}; + export interface LoopResult { task: Task; result: TaskResult; @@ -147,6 +200,20 @@ export const runAgenticLoop = async (task: Task, options: LoopOptions): Promise< let current = task; const filesChanged = new Set(); const errors: ForgeRuntimeError[] = []; + // Step summaries are the model's own prose for each plan step. For + // analysis tasks these ARE the deliverable (the answer the user wanted); + // for change tasks they describe what was done and are useful alongside + // the reviewer's verdict. We aggregate them so the final TaskResult.summary + // shows the actual work rather than just the reviewer's one-line verdict. + const stepSummaries: Array<{ stepId: string; summary: string }> = []; + // Retained across the executor loop so the narrator (for analysis tasks) + // can see what tools returned — file contents, grep hits, etc. — and + // synthesize the real answer the user asked for. + const allToolResults: Array<{ + tool: string; + args: unknown; + result: import('../types').ToolResult; + }> = []; try { // ---------- PLAN ---------- @@ -174,6 +241,62 @@ export const runAgenticLoop = async (task: Task, options: LoopOptions): Promise< log.info('plan auto-fixed', { notes: fixerReport.notes }); plan = fixerReport.plan; } + // Runtime enforcement: analysis intent is READ-ONLY. Strip any + // mutation steps the planner emitted despite the prompt rule. Also + // drop redundant "analyze" steps that follow a retrieve_context / + // read — those add no signal and trip up small models that don't + // know what to do for an already-read file, often leading to + // hallucinated file paths ("src/main/java/com/example/MyClass.java" + // in a TypeScript project). The narrator handles synthesis post-steps. + if (current.profile?.intent === 'analysis') { + const READ_ONLY_TYPES = new Set([ + 'retrieve_context', + 'analyze', + 'plan', + 'review', + 'debug', + 'ask_user', + 'custom', + ]); + const MUTATION_TYPES = new Set([ + 'edit_file', + 'create_file', + 'delete_file', + 'apply_patch', + 'run_command', + 'run_tests', + ]); + const before = plan.steps.length; + let seenRetrieve = false; + const filtered = plan.steps.filter((s) => { + if (MUTATION_TYPES.has(s.type)) return false; + if (s.type === 'analyze' && seenRetrieve) return false; + if (s.type === 'retrieve_context') seenRetrieve = true; + return READ_ONLY_TYPES.has(s.type); + }); + if (filtered.length === 0) { + // Planner produced only mutation steps on an analysis task. Fall + // back to a minimal read-only plan targeting any file the user + // named in the title/description. + const titleFiles = + (current.title + ' ' + (current.description ?? '')).match(/[\w./-]+\.\w+/) ?? []; + const target = titleFiles[0]; + filtered.push({ + id: 'step_001', + type: 'retrieve_context', + description: target ? `Read ${target}` : `Gather context for: ${current.title}`, + target, + }); + } + if (filtered.length !== before) { + log.info('analysis-intent plan filtered', { + before, + after: filtered.length, + dropped: plan.steps.length - filtered.length, + }); + } + plan = { ...plan, steps: filtered }; + } current.plan = plan; saveTask(options.projectRoot, current); session({ type: 'plan', content: plan, timestamp: new Date().toISOString() }); @@ -200,7 +323,11 @@ export const runAgenticLoop = async (task: Task, options: LoopOptions): Promise< // ---------- APPROVAL ---------- let decision = await confirmPlan(plan, options.autoApprove ?? false, current.id); while (decision === 'edit') { - plan = await editPlanInEditor(plan); + // Prefer the host's own editor if it implements one (UI uses an + // inline modal — no `$EDITOR` available). Fall back to the terminal + // editor for CLI. + const host = currentHost(); + plan = host?.editPlan ? await host.editPlan(plan, current.id) : await editPlanInEditor(plan); current.plan = plan; saveTask(options.projectRoot, current); decision = await confirmPlan(plan, false, current.id); @@ -266,6 +393,7 @@ export const runAgenticLoop = async (task: Task, options: LoopOptions): Promise< runId, }); for (const toolRes of out.toolResults) { + allToolResults.push(toolRes); session({ type: 'tool_call', content: { tool: toolRes.tool, args: toolRes.args }, @@ -291,27 +419,64 @@ export const runAgenticLoop = async (task: Task, options: LoopOptions): Promise< // the executor's `completed` flag, falling back to the legacy // "no surviving failures at the tail" check so we still catch the // case where no recovery was attempted. - const tail = out.toolResults[out.toolResults.length - 1]; - const stepFailed = !out.completed || (tail ? !tail.result.success : false); + const lastFailure = [...out.toolResults].reverse().find((r) => !r.result.success); + const anyToolFailed = Boolean(lastFailure); + // A step is "failed" only when there was an actual tool error OR + // the executor produced nothing at all (no tool calls AND no done + // flag — means the model is wedged). Running out of turns AFTER + // productive work is treated as a soft completion: we have + // signals we can carry forward, so don't blow up the whole task. + const stepWedged = !out.completed && out.toolResults.length === 0 && !out.summary.trim(); + const stepFailed = anyToolFailed || stepWedged; if (stepFailed) { - const lastFailure = [...out.toolResults].reverse().find((r) => !r.result.success); loopDetector.record({ stepId: step.id, success: false, errorClass: lastFailure?.result.error?.class, timestamp: Date.now(), }); + // Respect the underlying tool's retryable flag. A `not_found` + // read or similar terminal error won't become findable on + // retry — retrying just burns model calls. Preserve true when + // the tool actually marked itself retryable (transient network, + // rate-limit, etc.) so those still get their three attempts. + const retryable = lastFailure?.result.error?.retryable ?? false; throw new ForgeRuntimeError({ - class: 'tool_error', - message: lastFailure?.result.error?.message ?? `Step ${step.id} did not complete`, - retryable: true, + class: lastFailure?.result.error?.class ?? 'tool_error', + message: + lastFailure?.result.error?.message ?? + `Step ${step.id} produced no tool calls or summary within the turn budget`, + retryable, recoveryHint: lastFailure?.result.error?.recoveryHint, }); } + if (!out.completed) { + // Soft completion: tools ran, we just didn't get an explicit + // done. Log it but don't fail — later steps use the gathered + // context. + log.info('step soft-completed (no explicit done, but tools succeeded)', { + step: step.id, + toolCalls: out.toolResults.length, + }); + } loopDetector.record({ stepId: step.id, success: true, timestamp: Date.now() }); stepOk = true; + if (out.summary && out.summary.trim().length > 0) { + stepSummaries.push({ stepId: step.id, summary: out.summary.trim() }); + } event('TASK_STEP_COMPLETED', `✔ ${step.id}`, { summary: out.summary }); } catch (err) { + // Non-retryable errors fail fast — no point burning additional + // attempts on something the tool told us won't succeed. + if (err instanceof ForgeRuntimeError && err.retryable === false) { + event( + 'TASK_STEP_FAILED', + `${step.id} failed (${err.class}): ${err.message}`, + { err: String(err) }, + 'error', + ); + throw err; + } event( 'RETRY_ATTEMPTED', `retry ${attempts}/${maxRetries} on ${step.id}`, @@ -382,16 +547,28 @@ export const runAgenticLoop = async (task: Task, options: LoopOptions): Promise< // ---------- VERIFY ---------- current = transitionTask(options.projectRoot, current.id, 'verifying'); event('TASK_VERIFYING', 'Review pass'); - const verdict = await reviewOutcome( - { - taskTitle: current.title, - changesSummary: `Executed ${ordered.length} steps. Files: ${[...filesChanged].join(', ') || '(none)'}`, - filesChanged: [...filesChanged], - }, - options.mode, - ); + // The profile can mark a task as not-requiring-review (typically + // analysis/explain/summarize, which produce no diff). Respect that and + // skip the reviewer entirely — otherwise we'd burn a model call to get + // a guaranteed "no changes" rejection. + const skipReview = current.profile?.requiresReview === false; + const verdict = skipReview + ? { + approved: true, + issues: [], + summary: 'informational task complete (no review required — no code changes expected)', + } + : await reviewOutcome( + { + taskTitle: current.title, + changesSummary: `Executed ${ordered.length} steps. Files: ${[...filesChanged].join(', ') || '(none)'}`, + filesChanged: [...filesChanged], + intent: current.profile?.intent, + }, + options.mode, + ); session({ type: 'event', content: { review: verdict }, timestamp: new Date().toISOString() }); - if (!verdict.approved && cfg.completion.requireReview) { + if (!verdict.approved && cfg.completion.requireReview && !skipReview) { throw new ForgeRuntimeError({ class: 'state_invalid', message: `Review did not approve: ${verdict.summary}`, @@ -399,16 +576,62 @@ export const runAgenticLoop = async (task: Task, options: LoopOptions): Promise< }); } + // For analysis / informational tasks the executor's jsonMode responses + // give us worthless step summaries like "file read successfully" — the + // real deliverable is prose that actually answers the user. Run a single + // non-JSON narrator pass (which streams live via the router's event bus) + // over the tool outputs the executor gathered, and use that as the + // user-facing summary. + const isAnalysis = current.profile?.intent === 'analysis'; + let narratorText = ''; + if (isAnalysis && allToolResults.length > 0) { + event('TASK_STEP_STARTED', '→ narrator: writing the answer'); + try { + narratorText = await narrateAnalysis({ + taskTitle: current.title, + taskDescription: current.description ?? current.title, + toolResults: allToolResults, + mode: options.mode, + taskId: current.id, + projectId: current.projectId, + }); + } catch (err) { + log.warn('narrator pass failed', { err: String(err) }); + } + } + + // Prefer the narrator's answer for analysis tasks; otherwise use step + // summaries (meaningful for code-change tasks). The reviewer verdict is + // appended as a quiet footer for change tasks where a review ran. + const stepBlock = stepSummaries + .map((s) => s.summary) + .filter((s) => s.length > 0) + .join('\n\n'); + let finalSummary: string; + if (narratorText) { + finalSummary = narratorText; + } else if (stepBlock) { + finalSummary = + skipReview || !verdict.summary ? stepBlock : `${stepBlock}\n\n_${verdict.summary}_`; + } else { + finalSummary = verdict.summary || 'Task completed.'; + } const finalResult: TaskResult = { success: true, - summary: verdict.summary || 'Task completed.', + summary: finalSummary, filesChanged: [...filesChanged], durationMs: Date.now() - started, }; current = transitionTask(options.projectRoot, current.id, 'completed', { result: finalResult, }); - event('TASK_COMPLETED', finalResult.summary, { files: finalResult.filesChanged }); + // Short event message; the full summary lives in `task.result` and any + // narrator stream already rendered it. Emitting the whole summary as + // the event `message` caused the UI to render it again as a log line. + event('TASK_COMPLETED', 'Task completed', { + files: finalResult.filesChanged, + summary: finalResult.summary, + }); session({ type: 'result', content: finalResult, timestamp: new Date().toISOString() }); // Learning: reinforce the successful pattern (intent + scope). if (cfg.memory.learningEnabled && current.profile) { @@ -421,9 +644,13 @@ export const runAgenticLoop = async (task: Task, options: LoopOptions): Promise< return { task: current, result: finalResult }; } catch (err) { log.error('agentic loop failed', { err: String(err) }); + // Shape the user-facing summary based on the error class so the REPL + // box / run box shows something actionable instead of a raw runtime + // error. Falls back to the bare message for unexpected error types. + const summary = humaniseFailure(err); const res: TaskResult = { success: false, - summary: err instanceof Error ? err.message : String(err), + summary, filesChanged: [...filesChanged], durationMs: Date.now() - started, errors: errors.map((e) => e.toJSON()), diff --git a/src/core/orchestrator.ts b/src/core/orchestrator.ts index 0fbf4ee..3752ea5 100644 --- a/src/core/orchestrator.ts +++ b/src/core/orchestrator.ts @@ -4,15 +4,17 @@ * @author Son Nguyen */ -import { Mode, Task } from '../types'; +import { Mode, Task, TaskResult } from '../types'; import { newTaskId, newTraceId, newRunId } from '../logging/trace'; import { findProjectRoot, loadGlobalConfig } from '../config/loader'; import { projectId as computeProjectId } from '../config/paths'; import { classify } from '../classifier/classifier'; -import { saveTask } from '../persistence/tasks'; +import { saveTask, transitionTask } from '../persistence/tasks'; import { runAgenticLoop, LoopOptions } from './loop'; import { emit } from '../persistence/events'; import { PermissionFlags } from '../permissions/manager'; +import { respondConversation } from '../agents/narrator'; +import { log } from '../logging/logger'; export interface OrchestratorInput { input: string; @@ -23,6 +25,15 @@ export interface OrchestratorInput { planOnly?: boolean; title?: string; description?: string; + /** + * Caller-provided task id. Used by the UI task-runner so its `active` + * map key matches the id the orchestrator emits on events + deltas — + * without this the per-task WebSocket bridge silently drops every + * `model.delta` because the runner-side id (crypto hex) and the + * orchestrator-side id (`task_`) are unrelated strings. REPL/CLI + * don't set it because they don't bridge events over a socket. + */ + taskId?: string; } export const orchestrateRun = async (params: OrchestratorInput) => { @@ -39,7 +50,7 @@ export const orchestrateRun = async (params: OrchestratorInput) => { const title = params.title ?? params.input.slice(0, 80); const now = new Date().toISOString(); const task: Task = { - id: newTaskId(), + id: params.taskId ?? newTaskId(), projectId: pid, title, description: params.description ?? params.input, @@ -76,6 +87,77 @@ export const orchestrateRun = async (params: OrchestratorInput) => { timestamp: now, }); + // Fast-path: a conversational question doesn't need planning, approval, + // execution, or review. Just stream an answer and record a terminal task. + // The progress rail / UI still get TASK_STARTED / TASK_COMPLETED events + // and per-token deltas via the router, so UX is identical to any other + // streaming response — minus the ~3 s plan/approval overhead. + if (profile.intent === 'conversation') { + const started = Date.now(); + emit(root, { + type: 'TASK_STARTED', + taskId: task.id, + projectId: pid, + traceId: task.traceId, + runId: task.runId, + severity: 'info', + message: 'conversation · direct answer', + timestamp: new Date().toISOString(), + }); + let answer = ''; + try { + answer = await respondConversation({ + input: params.input, + // Pass the composed multi-turn context when present (REPL / UI + // wrap prior turns into `description` via composeDescription). + // The responder uses it as ground truth for follow-up questions + // like "what have we talked about?". + description: params.description, + mode: params.mode, + taskId: task.id, + projectId: pid, + }); + } catch (err) { + log.warn('conversation fast-path failed', { err: String(err) }); + const failResult: TaskResult = { + success: false, + summary: err instanceof Error ? err.message : String(err), + filesChanged: [], + durationMs: Date.now() - started, + }; + const failed = transitionTask(root, task.id, 'failed', { result: failResult }); + emit(root, { + type: 'TASK_FAILED', + taskId: task.id, + projectId: pid, + traceId: task.traceId, + runId: task.runId, + severity: 'error', + message: failResult.summary, + timestamp: new Date().toISOString(), + }); + return { task: failed, result: failResult }; + } + const result: TaskResult = { + success: true, + summary: answer || '(empty response)', + filesChanged: [], + durationMs: Date.now() - started, + }; + const done = transitionTask(root, task.id, 'completed', { result }); + emit(root, { + type: 'TASK_COMPLETED', + taskId: task.id, + projectId: pid, + traceId: task.traceId, + runId: task.runId, + severity: 'info', + message: 'conversation answered', + timestamp: new Date().toISOString(), + }); + return { task: done, result }; + } + const options: LoopOptions = { projectRoot: root, mode: params.mode, diff --git a/src/daemon/updater.ts b/src/daemon/updater.ts index 6541e21..ee9c933 100644 --- a/src/daemon/updater.ts +++ b/src/daemon/updater.ts @@ -24,17 +24,22 @@ interface CacheShape { notes?: string; } -const readPkgVersion = (): string => { +const readPkg = (): { name: string; version: string } => { try { const pkg = JSON.parse( fs.readFileSync(path.join(__dirname, '..', '..', 'package.json'), 'utf8'), ); - return String(pkg.version ?? '0.0.0'); + return { + name: String(pkg.name ?? '@hoangsonw/forge'), + version: String(pkg.version ?? '0.0.0'), + }; } catch { - return '0.0.0'; + return { name: '@hoangsonw/forge', version: '0.0.0' }; } }; +const readPkgVersion = (): string => readPkg().version; + const shouldCheckNow = (): boolean => { try { if (!fs.existsSync(CACHE_FILE)) return true; @@ -61,10 +66,15 @@ const writeCache = (data: CacheShape): void => { }; const fetchLatest = async (channel: string): Promise => { - // Default to the npm registry; teams can host their own. We treat the - // network fetch as best-effort and never block. + // Derive the registry path from package.json#name so renaming the package + // (or forking it) Just Works. The previous hard-coded `@forge/cli` pointed + // at an unrelated package that happened to exist on npm — update prompts + // were wildly wrong ("Forge 12.18.0 available"). Scoped names include a + // slash which must be URL-encoded in the registry path. + const { name } = readPkg(); + const registryPath = encodeURIComponent(name).replace('%40', '@'); try { - const res = await request('https://registry.npmjs.org/@forge/cli', { + const res = await request(`https://registry.npmjs.org/${registryPath}`, { method: 'GET', headersTimeout: 8000, bodyTimeout: 8000, diff --git a/src/models/anthropic.ts b/src/models/anthropic.ts index 4833c14..355f121 100644 --- a/src/models/anthropic.ts +++ b/src/models/anthropic.ts @@ -14,6 +14,7 @@ import { ModelMessage, ModelCallOptions, ModelResponse, + ModelStreamChunk, } from '../types'; import { ForgeRuntimeError } from '../types/errors'; import { loadGlobalConfig } from '../config/loader'; @@ -139,4 +140,125 @@ export class AnthropicProvider implements ModelProvider { }); } } + + /** + * Stream messages from Anthropic's /v1/messages SSE endpoint. Anthropic's + * wire format is event-typed SSE: `event: content_block_delta` frames carry + * text deltas under `.delta.text`; `message_delta` carries usage updates; + * `message_stop` terminates. We coalesce those into our provider-neutral + * `ModelStreamChunk` shape. + */ + async *stream( + model: string, + messages: ModelMessage[], + options: ModelCallOptions = {}, + ): AsyncGenerator { + if (!this.apiKey) { + throw new ForgeRuntimeError({ + class: 'model_error', + message: 'Anthropic provider selected but ANTHROPIC_API_KEY is not set.', + retryable: false, + }); + } + const started = Date.now(); + const systemParts = messages.filter((m) => m.role === 'system').map((m) => m.content); + const conversationMessages = messages + .filter((m) => m.role !== 'system') + .map((m) => ({ role: m.role, content: m.content })); + const body: Record = { + model, + messages: conversationMessages, + max_tokens: options.maxTokens ?? 4096, + temperature: options.deterministic ? 0 : (options.temperature ?? 0.3), + stream: true, + }; + if (systemParts.length) body.system = systemParts.join('\n\n'); + if (options.stop) body.stop_sequences = options.stop; + + let res; + try { + res = await request(`${this.endpoint}/v1/messages`, { + method: 'POST', + headers: { + 'content-type': 'application/json', + accept: 'text/event-stream', + 'anthropic-version': '2023-06-01', + 'x-api-key': this.apiKey, + }, + body: JSON.stringify(body), + bodyTimeout: options.timeoutMs ?? 600_000, + headersTimeout: options.timeoutMs ?? 180_000, + }); + } catch (err) { + throw new ForgeRuntimeError({ + class: 'model_error', + message: `Anthropic stream request failed: ${String(err)}`, + retryable: true, + cause: err, + }); + } + if (res.statusCode !== 200) { + const text = await res.body.text(); + throw new ForgeRuntimeError({ + class: 'model_error', + message: `Anthropic ${res.statusCode}: ${text.slice(0, 500)}`, + retryable: res.statusCode === 429 || res.statusCode >= 500, + }); + } + + let buffer = ''; + let inputTokens: number | undefined; + let outputTokens: number | undefined; + let finishReason: 'stop' | 'length' = 'stop'; + + for await (const chunk of res.body as AsyncIterable) { + buffer += chunk.toString('utf8'); + let sep = buffer.indexOf('\n\n'); + while (sep !== -1) { + const frame = buffer.slice(0, sep); + buffer = buffer.slice(sep + 2); + sep = buffer.indexOf('\n\n'); + let eventType = ''; + let dataLine = ''; + for (const rawLine of frame.split('\n')) { + const line = rawLine.trim(); + if (line.startsWith('event:')) eventType = line.slice(6).trim(); + else if (line.startsWith('data:')) dataLine = line.slice(5).trim(); + } + if (!dataLine) continue; + let data: { + type?: string; + delta?: { type?: string; text?: string; stop_reason?: string }; + usage?: { input_tokens?: number; output_tokens?: number }; + message?: { usage?: { input_tokens?: number; output_tokens?: number } }; + }; + try { + data = JSON.parse(dataLine); + } catch { + continue; + } + if (eventType === 'content_block_delta' && data.delta?.type === 'text_delta') { + const delta = data.delta.text ?? ''; + if (delta) yield { delta, done: false }; + } else if (eventType === 'message_start' && data.message?.usage) { + inputTokens = data.message.usage.input_tokens ?? inputTokens; + } else if (eventType === 'message_delta') { + if (data.usage?.output_tokens != null) outputTokens = data.usage.output_tokens; + const sr = data.delta?.stop_reason; + if (sr === 'max_tokens') finishReason = 'length'; + else if (sr) finishReason = 'stop'; + } + } + } + yield { + delta: '', + done: true, + model, + provider: 'anthropic', + inputTokens, + outputTokens, + durationMs: Date.now() - started, + finishReason, + }; + } } diff --git a/src/models/ollama.ts b/src/models/ollama.ts index 65fa814..b6f5e55 100644 --- a/src/models/ollama.ts +++ b/src/models/ollama.ts @@ -11,11 +11,22 @@ import { ModelMessage, ModelCallOptions, ModelResponse, + ModelStreamChunk, } from '../types'; import { ForgeRuntimeError } from '../types/errors'; import { loadGlobalConfig } from '../config/loader'; import { classifyModel } from './local-catalog'; +// Ollama cold-loads a model into RAM/VRAM before it streams the first byte of +// a reply. That window is unrelated to generation, and can exceed the agent's +// per-call `timeoutMs` (60–90s) on first use of a large model. Floor the +// *headers* timeout so we don't flip to the hosted fallback while Ollama is +// still loading. Override with FORGE_OLLAMA_HEADERS_TIMEOUT_MS (ms). +const headersTimeoutFloor = (): number => { + const env = Number(process.env.FORGE_OLLAMA_HEADERS_TIMEOUT_MS); + return Number.isFinite(env) && env > 0 ? env : 300_000; +}; + export class OllamaProvider implements ModelProvider { readonly name = 'ollama'; @@ -30,6 +41,58 @@ export class OllamaProvider implements ModelProvider { } } + /** + * Pre-load a model into memory so the first real call doesn't eat the + * cold-load latency. Uses Ollama's /api/generate with an empty prompt — + * the documented idiom for warming. `keep_alive:"5m"` matches Ollama's + * default so we don't accidentally shorten the resident window. + * + * Cheap (~50ms) when the model is already loaded, blocking for the + * real cold-load otherwise. Never throws: a warm failure is not worth + * failing the run; the real call will surface any concrete error. + */ + async warm(model: string): Promise { + try { + // Cheap preflight: skip the warm if Ollama already has the model + // resident. /api/ps lists currently-loaded models. + try { + const ps = await request(`${this.endpoint}/api/ps`, { + method: 'GET', + headersTimeout: 2_000, + bodyTimeout: 2_000, + }); + if (ps.statusCode === 200) { + const body = (await ps.body.json()) as { models?: Array<{ name?: string }> }; + if ((body.models ?? []).some((m) => m.name === model)) return; + } else { + try { + await ps.body.dump(); + } catch { + // ignore + } + } + } catch { + // Older Ollama (<0.1.33) doesn't have /api/ps — fall through to warm. + } + const res = await request(`${this.endpoint}/api/generate`, { + method: 'POST', + headers: { 'content-type': 'application/json' }, + body: JSON.stringify({ model, keep_alive: '5m' }), + // Warm is allowed to take a while — first-time load of a 7B can + // exceed a minute on slower machines. 10 min is a defensible cap. + bodyTimeout: 600_000, + headersTimeout: Math.max(headersTimeoutFloor(), 600_000), + }); + try { + await res.body.dump(); + } catch { + // ignore + } + } catch { + // Swallow — caller will get a clearer error from the real call. + } + } + async listModels(): Promise { try { const res = await request(`${this.endpoint}/api/tags`, { method: 'GET' }); @@ -74,7 +137,7 @@ export class OllamaProvider implements ModelProvider { headers: { 'content-type': 'application/json' }, body: JSON.stringify(body), bodyTimeout: options.timeoutMs ?? 120_000, - headersTimeout: options.timeoutMs ?? 120_000, + headersTimeout: Math.max(options.timeoutMs ?? 0, headersTimeoutFloor()), }); if (res.statusCode !== 200) { const text = await res.body.text(); @@ -109,4 +172,99 @@ export class OllamaProvider implements ModelProvider { }); } } + + /** + * Stream a chat completion. Ollama speaks line-delimited JSON: one object + * per chunk, each with `message.content` plus a `done:true` marker on the + * final frame. We yield deltas as they arrive and a terminal frame carrying + * usage/finish metadata. + */ + async *stream( + model: string, + messages: ModelMessage[], + options: ModelCallOptions = {}, + ): AsyncGenerator { + const started = Date.now(); + const body = { + model, + messages: messages.map((m) => ({ role: m.role, content: m.content })), + stream: true, + format: options.jsonMode ? 'json' : undefined, + options: { + temperature: options.deterministic ? 0 : (options.temperature ?? 0.2), + num_predict: options.maxTokens ?? 2048, + stop: options.stop, + }, + }; + let res; + try { + res = await request(`${this.endpoint}/api/chat`, { + method: 'POST', + headers: { 'content-type': 'application/json' }, + body: JSON.stringify(body), + bodyTimeout: options.timeoutMs ?? 600_000, + headersTimeout: Math.max(options.timeoutMs ?? 0, headersTimeoutFloor()), + }); + } catch (err) { + throw new ForgeRuntimeError({ + class: 'model_error', + message: `Ollama stream request failed: ${String(err)}`, + retryable: true, + cause: err, + }); + } + if (res.statusCode !== 200) { + const text = await res.body.text(); + throw new ForgeRuntimeError({ + class: 'model_error', + message: `Ollama ${res.statusCode}: ${text.slice(0, 500)}`, + retryable: res.statusCode >= 500, + }); + } + + let buffer = ''; + let inputTokens: number | undefined; + let outputTokens: number | undefined; + let finishReason: 'stop' | 'length' = 'stop'; + + for await (const chunk of res.body as AsyncIterable) { + buffer += chunk.toString('utf8'); + let nl = buffer.indexOf('\n'); + while (nl !== -1) { + const line = buffer.slice(0, nl).trim(); + buffer = buffer.slice(nl + 1); + nl = buffer.indexOf('\n'); + if (!line) continue; + let obj: { + message?: { content?: string }; + done?: boolean; + done_reason?: string; + prompt_eval_count?: number; + eval_count?: number; + }; + try { + obj = JSON.parse(line); + } catch { + continue; + } + const delta = obj.message?.content ?? ''; + if (delta) yield { delta, done: false }; + if (obj.done) { + inputTokens = obj.prompt_eval_count; + outputTokens = obj.eval_count; + finishReason = obj.done_reason === 'length' ? 'length' : 'stop'; + } + } + } + yield { + delta: '', + done: true, + model, + provider: 'ollama', + inputTokens, + outputTokens, + durationMs: Date.now() - started, + finishReason, + }; + } } diff --git a/src/models/openai.ts b/src/models/openai.ts index 5bfb561..48094c1 100644 --- a/src/models/openai.ts +++ b/src/models/openai.ts @@ -13,6 +13,7 @@ import { ModelMessage, ModelCallOptions, ModelResponse, + ModelStreamChunk, } from '../types'; import { ForgeRuntimeError } from '../types/errors'; import { classifyModel } from './local-catalog'; @@ -147,6 +148,114 @@ export class OpenAIProvider implements ModelProvider { }); } } + + /** + * Stream chat completions using SSE. Every OpenAI-compatible server — + * api.openai.com, LM Studio, vLLM, llama.cpp `server`, LocalAI, Together — + * emits `data: {...}\n\n` frames with a `[DONE]` sentinel. Parses + * `choices[0].delta.content` as the incremental text and `usage` on the + * terminal frame (when the server bothers to send it). + */ + async *stream( + model: string, + messages: ModelMessage[], + options: ModelCallOptions = {}, + ): AsyncGenerator { + const started = Date.now(); + const body = { + model, + messages: messages.map((m) => ({ role: m.role, content: m.content })), + temperature: options.deterministic ? 0 : (options.temperature ?? 0.3), + max_tokens: options.maxTokens ?? 2048, + stop: options.stop, + stream: true, + stream_options: { include_usage: true }, + response_format: options.jsonMode ? { type: 'json_object' } : undefined, + }; + const headers: Record = { + 'content-type': 'application/json', + accept: 'text/event-stream', + }; + if (this.apiKey) headers.authorization = `Bearer ${this.apiKey}`; + let res; + try { + res = await request(`${this.endpoint}/chat/completions`, { + method: 'POST', + headers, + body: JSON.stringify(body), + bodyTimeout: options.timeoutMs ?? 600_000, + headersTimeout: options.timeoutMs ?? 180_000, + }); + } catch (err) { + throw new ForgeRuntimeError({ + class: 'model_error', + message: `${this.name} stream request failed: ${String(err)}`, + retryable: true, + cause: err, + }); + } + if (res.statusCode !== 200) { + const text = await res.body.text(); + throw new ForgeRuntimeError({ + class: 'model_error', + message: `${this.name} ${res.statusCode}: ${text.slice(0, 500)}`, + retryable: res.statusCode === 429 || res.statusCode >= 500, + }); + } + + let buffer = ''; + let inputTokens: number | undefined; + let outputTokens: number | undefined; + let finishReason: 'stop' | 'length' | 'tool_call' = 'stop'; + + for await (const chunk of res.body as AsyncIterable) { + buffer += chunk.toString('utf8'); + // SSE frames are separated by a blank line (\n\n). + let sep = buffer.indexOf('\n\n'); + while (sep !== -1) { + const frame = buffer.slice(0, sep); + buffer = buffer.slice(sep + 2); + sep = buffer.indexOf('\n\n'); + for (const rawLine of frame.split('\n')) { + const line = rawLine.trim(); + if (!line || !line.startsWith('data:')) continue; + const data = line.slice(5).trim(); + if (!data || data === '[DONE]') continue; + let obj: { + choices?: Array<{ + delta?: { content?: string }; + finish_reason?: string | null; + }>; + usage?: { prompt_tokens?: number; completion_tokens?: number }; + }; + try { + obj = JSON.parse(data); + } catch { + continue; + } + const delta = obj.choices?.[0]?.delta?.content ?? ''; + if (delta) yield { delta, done: false }; + const fr = obj.choices?.[0]?.finish_reason; + if (fr) + finishReason = fr === 'length' ? 'length' : fr === 'tool_calls' ? 'tool_call' : 'stop'; + if (obj.usage) { + inputTokens = obj.usage.prompt_tokens; + outputTokens = obj.usage.completion_tokens; + } + } + } + } + yield { + delta: '', + done: true, + model, + provider: this.name, + inputTokens, + outputTokens, + durationMs: Date.now() - started, + finishReason, + }; + } } // Model classification has moved to `./local-catalog.ts` so every provider diff --git a/src/models/router.ts b/src/models/router.ts index eb9b907..f96e3bf 100644 --- a/src/models/router.ts +++ b/src/models/router.ts @@ -6,7 +6,14 @@ * @author Son Nguyen */ -import { Mode, ModelMessage, ModelCallOptions, ModelResponse, ModelRole } from '../types'; +import { + Mode, + ModelMessage, + ModelCallOptions, + ModelResponse, + ModelRole, + ModelProvider, +} from '../types'; import { loadGlobalConfig } from '../config/loader'; import { ForgeRuntimeError } from '../types/errors'; import { getProvider, listProviders, firstAvailableProvider } from './provider'; @@ -16,6 +23,90 @@ import * as rateLimit from './rate-limit'; import * as breaker from './circuit-breaker'; import * as cost from './cost'; import { resolveLocalModel, isLocalProvider } from './adapter'; +import { emitDelta, eventBus } from '../persistence/events'; + +// Per-process cache of (provider:model) pairs we've already asked to warm. +// Warming is idempotent and cheap when already loaded, so the only reason to +// gate is to avoid emitting the "warming…" spinner text on every single call. +const warmed = new Set(); +const inflightWarms = new Map>(); + +/** + * Ensure the given provider+model is loaded into memory before we send the + * first real request. Emits TASK-scoped MODEL_WARMING/MODEL_WARMED events so + * the CLI spinner and UI can tell users exactly why they're waiting. + * + * Concurrent callers for the same (provider, model) share one underlying + * warm promise. Warming errors are logged and swallowed — the next real call + * will surface any real problem with a clearer error. + */ +const ensureWarm = async ( + provider: ModelProvider, + model: string, + ctx: CallContext, +): Promise => { + const warmFn = provider.warm; + if (typeof warmFn !== 'function') return; + const key = `${provider.name}:${model}`; + if (warmed.has(key)) return; + const existing = inflightWarms.get(key); + if (existing) return existing; + + const started = Date.now(); + const task = (async () => { + const timestamp = new Date().toISOString(); + eventBus.emit('event', { + type: 'MODEL_WARMING', + taskId: ctx.taskId, + projectId: ctx.projectId, + severity: 'info', + message: `warming ${model}`, + payload: { provider: provider.name, model }, + timestamp, + }); + try { + await warmFn.call(provider, model); + } catch (err) { + log.debug('warm threw despite contract', { provider: provider.name, err: String(err) }); + } finally { + warmed.add(key); + inflightWarms.delete(key); + eventBus.emit('event', { + type: 'MODEL_WARMED', + taskId: ctx.taskId, + projectId: ctx.projectId, + severity: 'info', + message: `warmed ${model}`, + payload: { provider: provider.name, model, durationMs: Date.now() - started }, + timestamp: new Date().toISOString(), + }); + } + })(); + inflightWarms.set(key, task); + return task; +}; + +/** + * Best-effort fire-and-forget warm — used by the REPL at startup so the + * model is ready by the time the user's first prompt arrives. Callers don't + * await it and any errors are fully isolated. + */ +export const backgroundWarm = (providerName: string, model: string): void => { + try { + const provider = getProvider(providerName); + void ensureWarm(provider, model, {}); + } catch { + // provider not registered or something similar; silent per contract + } +}; + +/** + * Exposed for tests — clears the warmed set so a fresh run is observable. + */ +export const _resetWarmedForTest = (): void => { + warmed.clear(); + inflightWarms.clear(); +}; export interface RoutingDecision { provider: string; @@ -109,8 +200,91 @@ export const resolveModel = async (params: { export interface CallContext { projectId?: string; taskId?: string; + role?: ModelRole; + /** + * Disable streaming for this call even if the provider supports it. Used by + * callers that explicitly need a single-shot response (e.g. strict JSON + * mode with validators that parse the full body). + */ + noStream?: boolean; } +/** + * Call a provider and accumulate a full `ModelResponse`, streaming deltas via + * the in-process event bus along the way if the provider supports it. Falls + * back cleanly to `complete()` for providers without `stream()`, callers that + * opt out with `ctx.noStream`, or when `jsonMode` is set (JSON responses are + * only useful whole). + */ +const callProvider = async ( + provider: ModelProvider, + model: string, + messages: ModelMessage[], + options: ModelCallOptions, + ctx: CallContext, +): Promise => { + // Pre-warm on first use of this (provider, model) combo. This is the + // difference between a mysterious 60+ second silence and an explicit + // "warming qwen2.5:7b…" phase the user can see ticking. + await ensureWarm(provider, model, ctx); + + const streamFn = !ctx.noStream && !options.jsonMode ? provider.stream : undefined; + if (!streamFn) return provider.complete(model, messages, options); + + const started = Date.now(); + let text = ''; + let inputTokens: number | undefined; + let outputTokens: number | undefined; + let finishReason: 'stop' | 'length' | 'error' | 'tool_call' = 'stop'; + try { + for await (const chunk of streamFn.call(provider, model, messages, options)) { + if (chunk.delta) { + text += chunk.delta; + emitDelta({ + text: chunk.delta, + taskId: ctx.taskId, + projectId: ctx.projectId, + role: ctx.role, + model, + provider: provider.name, + done: false, + }); + } + if (chunk.done) { + inputTokens = chunk.inputTokens ?? inputTokens; + outputTokens = chunk.outputTokens ?? outputTokens; + finishReason = chunk.finishReason ?? finishReason; + emitDelta({ + text: '', + taskId: ctx.taskId, + projectId: ctx.projectId, + role: ctx.role, + model, + provider: provider.name, + done: true, + }); + } + } + } catch (err) { + // If streaming fails mid-flight, fall back to a blocking call so we don't + // return a torn response to agents that expected a full body. + log.debug('stream failed; falling back to complete()', { + provider: provider.name, + err: String(err), + }); + return provider.complete(model, messages, options); + } + return { + content: text, + model, + provider: provider.name, + inputTokens, + outputTokens, + durationMs: Date.now() - started, + finishReason, + }; +}; + export const callModel = async ( role: ModelRole, mode: Mode, @@ -150,8 +324,9 @@ export const callModel = async ( provider: decision.provider, model: decision.model, }); + const effectiveCtx: CallContext = { ...ctx, role }; try { - const response = await provider.complete(decision.model, messages, options); + const response = await callProvider(provider, decision.model, messages, options, effectiveCtx); breaker.reportSuccess(decision.provider); cache.store(decision.provider, decision.model, messages, options, response); const usd = cost.record(ctx, response); @@ -166,7 +341,13 @@ export const callModel = async ( await rateLimit.acquire(decision.fallback.provider); try { const fb = getProvider(decision.fallback.provider); - const response = await fb.complete(decision.fallback.model, messages, options); + const response = await callProvider( + fb, + decision.fallback.model, + messages, + options, + effectiveCtx, + ); breaker.reportSuccess(decision.fallback.provider); cache.store( decision.fallback.provider, diff --git a/src/permissions/manager.ts b/src/permissions/manager.ts index 03b9a88..f103ab1 100644 --- a/src/permissions/manager.ts +++ b/src/permissions/manager.ts @@ -8,7 +8,7 @@ */ import chalk from 'chalk'; -import prompts from 'prompts'; +import { chooseNumbered } from '../cli/choose'; import { PermissionRequest, PermissionDecision } from '../types'; import { ForgeRuntimeError } from '../types/errors'; import { loadPermissionGrants, savePermissionGrant, PermissionRow } from '../persistence/index-db'; @@ -84,25 +84,31 @@ const promptUser = async (req: PermissionRequest): Promise = }`, ); - const choices = [ - { title: 'Allow once', value: 'allow' }, - { title: 'Allow for this session', value: 'allow_session' }, - { title: 'Deny', value: 'deny' }, + // Color choices so digit hotkeys feel obvious (green = allow family, + // red = deny). `allow_project` appears only for medium/low risk since + // a persistent grant on a `high`/`critical` tool is too sharp an edge. + const choices: Array<{ + title: string; + value: PermissionDecision | 'allow_project'; + color?: 'green' | 'red' | 'yellow' | 'cyan'; + hint?: string; + }> = [ + { title: 'Allow once', value: 'allow', color: 'green' }, + { title: 'Allow for this session', value: 'allow_session', color: 'green' }, ]; - // Only non-high-risk tools can be saved as persistent grants. if (req.risk !== 'critical' && req.risk !== 'high') { - choices.splice(2, 0, { title: 'Allow for this project (remember)', value: 'allow_project' }); + choices.push({ + title: 'Allow for this project', + value: 'allow_project', + color: 'cyan', + hint: '(remember)', + }); } + choices.push({ title: 'Deny', value: 'deny', color: 'red' }); - const resp = await prompts({ - type: 'select', - name: 'value', - message: 'Decision', - choices, - initial: 0, - }); - if (!resp || !resp.value) return 'deny'; - if (resp.value === 'allow_project') { + const value = await chooseNumbered({ message: 'Decision', choices, initial: 0 }); + if (!value) return 'deny'; + if (value === 'allow_project') { savePermissionGrant({ tool: req.tool, project_id: req.projectId, @@ -112,7 +118,7 @@ const promptUser = async (req: PermissionRequest): Promise = }); return 'allow_session'; } - return resp.value as PermissionDecision; + return value as PermissionDecision; }; export const requestPermission = async ( @@ -143,9 +149,15 @@ export const requestPermission = async ( return blanket; } - // Cached decision + // Cached decision — an explicit "Allow for session/project/global" from + // the user is a first-class authorization and MUST be honored on + // subsequent calls. The old `!shouldAlwaysAsk` gate here was double- + // counting: it treated every `execute`/`network` tool as "always ask" + // even after the user had explicitly granted it, so users saw the same + // prompt 3+ times and their grant never took effect. Only `critical` + // risk should ignore the cache — those must re-confirm every call. const cached = tryCachedGrant(req); - if (cached && !shouldAlwaysAsk({ risk: req.risk, sideEffect: req.sideEffect }, true)) { + if (cached && req.risk !== 'critical') { return cached; } diff --git a/src/persistence/events.ts b/src/persistence/events.ts index dec48e4..54a77ba 100644 --- a/src/persistence/events.ts +++ b/src/persistence/events.ts @@ -7,6 +7,7 @@ */ import * as path from 'path'; +import { EventEmitter } from 'node:events'; import { ensureProjectDir } from '../config/paths'; import { appendJsonl, streamJsonl } from './jsonl'; import { ForgeEvent } from '../types'; @@ -16,8 +17,50 @@ const eventFile = (projectRoot: string): string => { return path.join(sub.logs, 'events.jsonl'); }; +/** + * In-process event bus. Fires synchronously after `emit()` appends to the + * JSONL log so subscribers (CLI spinner, REPL, UI WebSocket bridge) can react + * without tailing a file. Subscribers MUST not throw — a throwing listener + * would tear down the agentic loop. We isolate them in a try/catch. + * + * The `'event'` channel fires for every ForgeEvent. The `'delta'` channel + * fires for streaming model output and carries `{ taskId?, projectId?, text, + * role?, model? }` — this is separate from `MODEL_DELTA` ForgeEvents because + * streaming text is high-frequency and we don't want to fsync each chunk. + */ +export const eventBus = new EventEmitter(); +eventBus.setMaxListeners(100); + +export interface ModelDeltaEvent { + text: string; + projectId?: string; + taskId?: string; + role?: string; + model?: string; + provider?: string; + /** True on the final (done) frame. */ + done?: boolean; +} + export const emit = (projectRoot: string, event: ForgeEvent): void => { appendJsonl(eventFile(projectRoot), event); + try { + eventBus.emit('event', event); + } catch { + // Listener misbehaved; swallow so persistence is still king. + } +}; + +/** + * Emit a streaming-text delta. Not persisted to the JSONL log — that would + * spam the file with per-token lines — but delivered to in-process listeners. + */ +export const emitDelta = (evt: ModelDeltaEvent): void => { + try { + eventBus.emit('delta', evt); + } catch { + // listener misbehaved; ignore + } }; export const streamEvents = (projectRoot: string): AsyncGenerator => diff --git a/src/persistence/tasks.ts b/src/persistence/tasks.ts index e415a8f..fb52389 100644 --- a/src/persistence/tasks.ts +++ b/src/persistence/tasks.ts @@ -15,7 +15,12 @@ import { indexTask, upsertProject, deleteTaskFromIndex } from './index-db'; import * as pathModule from 'path'; const LEGAL_TRANSITIONS: Record = { - draft: ['planned', 'cancelled'], + // `completed` and `failed` direct from `draft` are used by the + // conversation fast-path (`TaskType === 'conversation'`), where no + // planning/execution ever occurs — the orchestrator calls the model + // directly and records a terminal result. Keeping these transitions + // explicit is honest about the lifecycle a task actually went through. + draft: ['planned', 'cancelled', 'completed', 'failed'], planned: ['approved', 'cancelled', 'blocked'], approved: ['scheduled', 'cancelled'], scheduled: ['running', 'cancelled', 'blocked'], diff --git a/src/sandbox/file-lock.ts b/src/sandbox/file-lock.ts new file mode 100644 index 0000000..7c91a65 --- /dev/null +++ b/src/sandbox/file-lock.ts @@ -0,0 +1,83 @@ +/** + * Per-process file mutex + atomic write primitives. + * + * File-editing tools (write_file, edit_file) are otherwise a naive + * read-modify-write: two concurrent tool calls on the same path would race + * — one edit silently overwrites the other. We also want writes to be + * *atomic* from the perspective of any reader (no half-written bytes). + * + * Scope: + * - Serializes ONLY within this forge process. Two separate `forge run` + * invocations are already extremely unusual against the same working + * tree and are left to OS-level semantics (POSIX rename is atomic, so + * concurrent readers never observe a torn write even cross-process). + * - Writes go through a `{dir}/.{basename}.forge-tmp.{pid}.{rand}` temp + * file and then `fs.renameSync` onto the final path — POSIX guarantees + * rename-within-a-filesystem is atomic. + * + * @author Son Nguyen + */ + +import * as fs from 'fs'; +import * as path from 'path'; +import * as crypto from 'crypto'; + +// Per-path mutex. Each entry is a Promise chain; new callers `await` the +// previous tail and append themselves. The map entry is cleared when the +// chain drains so we don't leak memory for files touched once and never again. +const inflight = new Map>(); + +/** + * Run `fn` with exclusive access to `absPath` within this process. Serializes + * all callers that hand in the same path. The function is re-entrant-unsafe: + * do NOT call `withFileLock` recursively on the same path from within `fn` + * or you will deadlock. + */ +export const withFileLock = async (absPath: string, fn: () => Promise): Promise => { + const key = path.resolve(absPath); + const prev = inflight.get(key) ?? Promise.resolve(); + // The next holder waits on the previous tail. + const run = prev.catch(() => undefined).then(fn); + // Chain cleanup so we don't leak the last holder. + const cleanup = run + .catch(() => undefined) + .finally(() => { + if (inflight.get(key) === cleanup) inflight.delete(key); + }); + inflight.set(key, cleanup); + return run; +}; + +/** + * Atomic write: writes `content` to a sibling temp file in the same directory + * (rename-within-fs is atomic on POSIX; cross-fs rename on Windows is too, + * though not technically crash-atomic there). Concurrent readers either see + * the pre-write content or the full post-write content — never a torn read. + * + * NB: intentionally synchronous to match the existing fs.*Sync usage in + * edit_file / write_file. Switching to async would ripple through both tools + * and their tests; not worth it for the microsecond-scale difference. + */ +export const writeAtomic = (absPath: string, content: string): void => { + const dir = path.dirname(absPath); + const base = path.basename(absPath); + const tmp = path.join( + dir, + `.${base}.forge-tmp.${process.pid}.${crypto.randomBytes(6).toString('hex')}`, + ); + try { + fs.writeFileSync(tmp, content, 'utf8'); + fs.renameSync(tmp, absPath); + } catch (err) { + // Best-effort cleanup: if rename failed the temp file is orphaned. + try { + fs.unlinkSync(tmp); + } catch { + // ignore + } + throw err; + } +}; + +/** Exposed for tests — wipes the per-path mutex map. */ +export const _resetFileLocksForTest = (): void => inflight.clear(); diff --git a/src/sandbox/fs.ts b/src/sandbox/fs.ts index a8d5fb7..c65204d 100644 --- a/src/sandbox/fs.ts +++ b/src/sandbox/fs.ts @@ -6,8 +6,25 @@ import * as path from 'path'; import * as fs from 'fs'; +import * as os from 'os'; import { ForgeRuntimeError } from '../types/errors'; +// `~` is shell syntax; Node treats it as a literal directory name. LLMs +// routinely produce paths like `~/project/src/file.ts` when the user +// mentions a home-relative directory, and without pre-expansion those get +// joined against the project root as `/~/project/src/file.ts` — the +// exact ENOENT noise users see. Expanding here means the resolved absolute +// path still goes through the regular `allowedRoots` containment check +// below, so sandbox guarantees are unchanged. +const expandTilde = (p: string): string => { + if (!p) return p; + if (p === '~') return os.homedir(); + if (p.startsWith('~/') || p.startsWith('~' + path.sep)) { + return path.join(os.homedir(), p.slice(2)); + } + return p; +}; + // Paths we NEVER allow regardless of scope configuration. const ALWAYS_FORBIDDEN = [ '/etc/passwd', @@ -40,7 +57,7 @@ export const resolveSafe = ( policy: SandboxPolicy, mode: 'read' | 'write', ): string => { - const abs = path.resolve(policy.projectRoot, inputPath); + const abs = path.resolve(policy.projectRoot, expandTilde(inputPath)); const normalized = path.normalize(abs); for (const forbidden of ALWAYS_FORBIDDEN) { diff --git a/src/tools/ask-user.ts b/src/tools/ask-user.ts index 7f93705..fa46713 100644 --- a/src/tools/ask-user.ts +++ b/src/tools/ask-user.ts @@ -11,6 +11,7 @@ import prompts from 'prompts'; import { Tool, ToolResult } from '../types'; import { ForgeRuntimeError } from '../types/errors'; +import { chooseNumbered } from '../cli/choose'; interface Args { question: string; @@ -22,7 +23,8 @@ interface Args { export const askUserTool: Tool = { schema: { name: 'ask_user', - description: 'Ask the user a clarifying question. Use sparingly.', + description: + 'Ask the user a clarifying question when the ORIGINAL request is genuinely ambiguous. Do NOT call this to recover from tool errors — retry with different args or switch tools instead. Requires a clear, non-empty question (>= 3 chars).', sideEffect: 'pure', risk: 'low', permissionDefault: 'allow', @@ -41,6 +43,23 @@ export const askUserTool: Tool = { }, async execute(args): Promise> { const start = Date.now(); + // Reject malformed calls fast so the executor can recover (non-retryable, + // so it'll switch tools instead of looping on the same bad call). + // Common failure mode: a smaller model calls ask_user after a tool error + // to "ask the user how to proceed" with a malformed or empty question. + // The executor prompt forbids that — this is belt-and-braces. + if (typeof args.question !== 'string' || args.question.trim().length < 3) { + return { + success: false, + error: { + class: 'user_input', + message: + 'ask_user requires a clear, non-empty question (>= 3 chars). Tool errors should be recovered by retrying with different args or switching tools — not by calling ask_user.', + retryable: false, + }, + durationMs: Date.now() - start, + }; + } if (!process.stdin.isTTY) { if (args.nonInteractiveDefault !== undefined) { return { @@ -62,13 +81,12 @@ export const askUserTool: Tool = { try { let answer = ''; if (args.choices && args.choices.length) { - const resp = await prompts({ - type: 'select', - name: 'value', + const picked = await chooseNumbered({ message: args.question, choices: args.choices.map((c) => ({ title: c, value: c })), + initial: args.defaultValue ? Math.max(0, args.choices.indexOf(args.defaultValue)) : 0, }); - answer = resp?.value ?? args.defaultValue ?? ''; + answer = picked ?? args.defaultValue ?? ''; } else { const resp = await prompts({ type: 'text', diff --git a/src/tools/edit-file.ts b/src/tools/edit-file.ts index 9944f90..6595314 100644 --- a/src/tools/edit-file.ts +++ b/src/tools/edit-file.ts @@ -8,6 +8,7 @@ import * as fs from 'fs'; import { Tool, ToolResult } from '../types'; import { ForgeRuntimeError } from '../types/errors'; import { resolveSafe } from '../sandbox/fs'; +import { withFileLock, writeAtomic } from '../sandbox/file-lock'; interface Args { path: string; @@ -53,40 +54,61 @@ export const editFileTool: Tool = { const start = Date.now(); try { const real = resolveSafe(args.path, { projectRoot: ctx.projectRoot }, 'write'); - const original = fs.existsSync(real) ? fs.readFileSync(real, 'utf8') : ''; - const needle = args.oldText; - if (!needle) { - throw new ForgeRuntimeError({ - class: 'user_input', - message: 'edit_file requires non-empty oldText', - retryable: false, - }); - } - const occurrences = countOccurrences(original, needle); - if (occurrences === 0) { - throw new ForgeRuntimeError({ - class: 'not_found', - message: `oldText not present in ${args.path}`, - retryable: false, - recoveryHint: 'Use read_file first to verify the exact snippet.', - }); - } - if (occurrences > 1 && !args.replaceAll) { - throw new ForgeRuntimeError({ - class: 'conflict', - message: `oldText matches ${occurrences} occurrences; pass replaceAll=true or narrow the anchor.`, - retryable: false, - }); - } - const updated = args.replaceAll - ? original.split(needle).join(args.newText) - : original.replace(needle, args.newText); - fs.writeFileSync(real, updated, 'utf8'); - return { - success: true, - output: { replacements: occurrences, bytesWritten: Buffer.byteLength(updated) }, - durationMs: Date.now() - start, - }; + // Entire read-modify-write runs under the per-path mutex so two + // concurrent edit_file (or write_file) calls against the same path + // serialize instead of racing. The re-read MUST happen inside the + // lock — reading before the lock could give us content that the + // previous holder has since replaced, and we'd then overwrite their + // change. + return await withFileLock(real, async () => { + const original = fs.existsSync(real) ? fs.readFileSync(real, 'utf8') : ''; + const needle = args.oldText; + if (!needle) { + // Planner pattern: create_file (empty) → edit_file (add content). + // When the target is empty/missing and oldText is empty, the intent + // is "just write this as the file body". Honor it. On a file that + // already has content, empty oldText is ambiguous — keep the error. + if (original.length === 0) { + writeAtomic(real, args.newText); + return { + success: true, + output: { replacements: 1, bytesWritten: Buffer.byteLength(args.newText) }, + durationMs: Date.now() - start, + }; + } + throw new ForgeRuntimeError({ + class: 'user_input', + message: 'edit_file requires non-empty oldText when the file already has content', + retryable: false, + recoveryHint: 'Use write_file to overwrite, or pass an exact oldText snippet.', + }); + } + const occurrences = countOccurrences(original, needle); + if (occurrences === 0) { + throw new ForgeRuntimeError({ + class: 'not_found', + message: `oldText not present in ${args.path}`, + retryable: false, + recoveryHint: 'Use read_file first to verify the exact snippet.', + }); + } + if (occurrences > 1 && !args.replaceAll) { + throw new ForgeRuntimeError({ + class: 'conflict', + message: `oldText matches ${occurrences} occurrences; pass replaceAll=true or narrow the anchor.`, + retryable: false, + }); + } + const updated = args.replaceAll + ? original.split(needle).join(args.newText) + : original.replace(needle, args.newText); + writeAtomic(real, updated); + return { + success: true, + output: { replacements: occurrences, bytesWritten: Buffer.byteLength(updated) }, + durationMs: Date.now() - start, + }; + }); } catch (err) { return { success: false, diff --git a/src/tools/move-file.ts b/src/tools/move-file.ts index ef012b7..c79c114 100644 --- a/src/tools/move-file.ts +++ b/src/tools/move-file.ts @@ -20,7 +20,8 @@ interface Args { export const moveFileTool: Tool = { schema: { name: 'move_file', - description: 'Move (rename) a file or directory within the sandbox.', + description: + 'Move (rename) a file or directory within the sandbox. Missing parent directories at the destination are created automatically; pass createDirs:false to disable.', sideEffect: 'write', risk: 'medium', permissionDefault: 'ask', @@ -56,7 +57,9 @@ export const moveFileTool: Tool = { retryable: false, }); } - if (args.createDirs) fs.mkdirSync(path.dirname(dst), { recursive: true }); + // Match write_file semantics: mkdir-p is the default. Opt out with + // `createDirs: false` if you want a missing destination parent to fail. + if (args.createDirs !== false) fs.mkdirSync(path.dirname(dst), { recursive: true }); fs.renameSync(src, dst); return { success: true, diff --git a/src/tools/run-tests.ts b/src/tools/run-tests.ts index e823458..2e1d7c0 100644 --- a/src/tools/run-tests.ts +++ b/src/tools/run-tests.ts @@ -11,11 +11,33 @@ import { runCommand } from '../sandbox/shell'; import { ForgeRuntimeError } from '../types/errors'; interface Args { - framework?: 'auto' | 'npm' | 'pnpm' | 'yarn' | 'pytest' | 'go' | 'cargo'; + framework?: 'auto' | 'npm' | 'pnpm' | 'yarn' | 'pytest' | 'go' | 'cargo' | 'node'; target?: string; timeoutMs?: number; } +// Node 20+ ships `node --test` as a first-class runner for *.test.{js,mjs,cjs,ts} +// files. We probe for such files in conventional locations when there's no +// other framework configured, so a project can use node:test without needing +// package.json / Jest / Mocha. +const hasNodeTestFiles = (root: string): boolean => { + const dirs = ['test', 'tests', '__tests__', 'src']; + for (const d of dirs) { + const full = path.join(root, d); + try { + const st = fs.statSync(full); + if (!st.isDirectory()) continue; + // Shallow scan only — deep recursion could be expensive on large repos, + // and conventional test layouts keep tests one level deep. + const entries = fs.readdirSync(full); + if (entries.some((f) => /\.test\.(?:m?js|cjs|ts)$/.test(f))) return true; + } catch { + // missing or unreadable — not an error + } + } + return false; +}; + interface Output { framework: string; stdout: string; @@ -41,6 +63,10 @@ const detectFramework = (root: string): string => { } if (fs.existsSync(path.join(root, 'go.mod'))) return 'go'; if (fs.existsSync(path.join(root, 'Cargo.toml'))) return 'cargo'; + // Fallback: if we see *.test.{js,mjs,cjs,ts} files in a conventional + // location, assume `node --test` (built into Node 20+). Lets toy repos + // run tests without a full toolchain setup. + if (hasNodeTestFiles(root)) return 'node'; return 'none'; }; @@ -58,6 +84,11 @@ const commandFor = (framework: string, target?: string): string | null => { return `go test ${target ?? './...'}`; case 'cargo': return `cargo test${target ? ` ${target}` : ''}`; + case 'node': + // Node's built-in runner picks up *.test.* files under the target + // dir(s). We pass dirs explicitly (empty arg tree = current dir) so + // no globbing subtleties across shells. Target overrides. + return `node --test ${target ?? ''}`.trim(); default: return null; } diff --git a/src/tools/write-file.ts b/src/tools/write-file.ts index dd59557..12a3bab 100644 --- a/src/tools/write-file.ts +++ b/src/tools/write-file.ts @@ -9,6 +9,7 @@ import * as path from 'path'; import { Tool, ToolResult } from '../types'; import { ForgeRuntimeError } from '../types/errors'; import { resolveSafe } from '../sandbox/fs'; +import { withFileLock, writeAtomic } from '../sandbox/file-lock'; interface Args { path: string; @@ -20,7 +21,8 @@ interface Args { export const writeFileTool: Tool = { schema: { name: 'write_file', - description: 'Write (create/overwrite/append) a text file inside the sandbox.', + description: + 'Write (create/overwrite/append) a text file inside the sandbox. Missing parent directories are created automatically; pass createDirs:false to disable.', sideEffect: 'write', risk: 'medium', permissionDefault: 'ask', @@ -41,28 +43,41 @@ export const writeFileTool: Tool { + const existed = fs.existsSync(real); + if (args.mode === 'create_only' && existed) { + throw new ForgeRuntimeError({ + class: 'conflict', + message: `${args.path} already exists (mode=create_only).`, + retryable: false, + }); + } + // Default to mkdir-p so "create src/foo/bar.js" works without the + // agent having to predict a separate mkdir step first. Opt out + // with `createDirs: false` to fail when the parent doesn't exist. + if (args.createDirs !== false) { + fs.mkdirSync(path.dirname(real), { recursive: true }); + } + if (args.mode === 'append') { + // Append is NOT made atomic here. Atomic append would require a + // full-file rewrite (read all, append in memory, writeAtomic). + // That's too expensive for large logs and changes the semantics. + // Callers that need torn-read safety should use overwrite mode. + fs.appendFileSync(real, args.content, { encoding: 'utf8' }); + } else { + writeAtomic(real, args.content); + } + const stat = fs.statSync(real); + return { + success: true, + output: { bytesWritten: stat.size, existed }, + durationMs: Date.now() - start, + }; + }); } catch (err) { return { success: false, diff --git a/src/types/index.ts b/src/types/index.ts index 9a98f23..7a5347a 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -54,6 +54,7 @@ export type TaskType = | 'setup' | 'test' | 'optimization' + | 'conversation' | 'other'; export type Complexity = 'trivial' | 'simple' | 'moderate' | 'complex'; @@ -173,8 +174,11 @@ export type EventType = | 'TOOL_COMPLETED' | 'TOOL_FAILED' | 'MODEL_CALLED' + | 'MODEL_DELTA' | 'MODEL_COMPLETED' | 'MODEL_FAILED' + | 'MODEL_WARMING' + | 'MODEL_WARMED' | 'PERMISSION_REQUESTED' | 'PERMISSION_GRANTED' | 'PERMISSION_DENIED' @@ -245,6 +249,24 @@ export interface ModelResponse { finishReason?: 'stop' | 'length' | 'error' | 'tool_call'; } +/** + * One frame of a streaming completion. Providers yield any number of + * `done:false` chunks (each carrying a text `delta`), terminated by a single + * `done:true` chunk that may also carry usage, finishReason, and duration. + * The final `delta` on a done:true frame is usually empty but implementations + * may concatenate it verbatim. + */ +export interface ModelStreamChunk { + delta: string; + done: boolean; + model?: string; + provider?: string; + inputTokens?: number; + outputTokens?: number; + durationMs?: number; + finishReason?: 'stop' | 'length' | 'error' | 'tool_call'; +} + export interface ModelProvider { readonly name: string; isAvailable(): Promise; @@ -254,6 +276,24 @@ export interface ModelProvider { messages: ModelMessage[], options?: ModelCallOptions, ): Promise; + /** + * Optional streaming completion. When absent, callers should treat the + * provider as non-streaming and fall back to `complete()`. + */ + stream?( + model: string, + messages: ModelMessage[], + options?: ModelCallOptions, + ): AsyncIterable; + /** + * Optional pre-warm hook. For runtimes that load models into RAM/VRAM on + * demand (Ollama, LM Studio), calling `warm` before the first real call + * hides the cold-load latency behind an explicit "warming" phase instead + * of a mysterious headers-timeout. Should be idempotent — already-loaded + * models return quickly. Must not throw: failures are treated as "did + * what we could" and the next real call surfaces any real error. + */ + warm?(model: string): Promise; } // ---------- Prompts ---------- diff --git a/src/ui/public/app.js b/src/ui/public/app.js index ca165a9..97bd0d8 100644 --- a/src/ui/public/app.js +++ b/src/ui/public/app.js @@ -391,7 +391,7 @@ views.dashboard = async () => { title updated - ${tasks.map((t) => ` + ${tasks.map((t) => ` ${esc(t.id)} ${badge(t.status)} ${esc(t.mode)} @@ -435,6 +435,12 @@ views.dashboard = async () => {
+
+ in + + + +
${chipsHtml}
run · ⇧ ⏎ open advanced form
@@ -452,8 +458,25 @@ views.dashboard = async () => {
Spend
-
$${Number(cost.totals?.usd ?? 0).toFixed(3)}
-
${Number(cost.totals?.tokens ?? 0).toLocaleString()} tokens
+ ${(() => { + const usd = Number(cost.totals?.usd ?? 0); + const toks = Number(cost.totals?.tokens ?? 0); + // Local providers (Ollama, llama.cpp) have no per-token pricing, + // so usd is always 0 even when tokens flow. Showing $0.000 in + // the headline makes the card look broken. When there is no + // billable cost but tokens are being used, promote the token + // count to the headline and annotate as "local · free". + if (usd > 0) { + return `
$${usd.toFixed(3)}
+
${toks.toLocaleString()} tokens
`; + } + if (toks > 0) { + return `
${toks.toLocaleString()}
+
tokens · local · free
`; + } + return `
$0.000
+
no tokens yet
`; + })()}
Provider
@@ -474,17 +497,34 @@ views.dashboard = async () => { `); const input = document.getElementById('hero-input'); + const cwdInput = document.getElementById('hero-cwd'); const go = async (prompt = null) => { const p = (prompt ?? input.value).trim(); if (!p) return; pushPromptHistory(p); + const cwd = (cwdInput?.value || '').trim() || undefined; try { - const { taskId } = await apiPost('/api/tasks/run', { prompt: p, autoApprove: false }); + const { taskId } = await apiPost('/api/tasks/run', { prompt: p, autoApprove: false, cwd }); toast('Task started', 'ok'); openTask(taskId); } catch (e) { toast(String(e), 'err'); } }; document.getElementById('hero-go').addEventListener('click', () => go()); + // Known-projects autocomplete + Browse modal for the hero cwd input so + // the user can pick a dir directly from the Dashboard before running + // their first task, instead of having to dig into the New-task form. + api('/api/projects').then((ps) => { + const dl = document.getElementById('hero-cwd-list'); + if (!dl) return; + dl.innerHTML = (ps || []) + .map((p) => ``) + .join(''); + }).catch(() => {}); + document.getElementById('hero-browse')?.addEventListener('click', () => { + openDirPicker((picked) => { + if (cwdInput) cwdInput.value = picked; + }); + }); attachPromptHistory(input); input.addEventListener('keydown', (e) => { if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); go(); } @@ -876,7 +916,11 @@ views.run = async () => {
- +
+ + + +
@@ -927,6 +971,22 @@ views.run = async () => { }); document.getElementById('run-reset').addEventListener('click', () => setView('run')); document.getElementById('run-prompt').focus(); + // Populate the project datalist + wire the Browse button for the dir + // picker. Known projects come from Forge's project index so the user can + // jump to repos they've run tasks against before without retyping a path. + api('/api/projects').then((ps) => { + const dl = document.getElementById('run-cwd-list'); + if (!dl) return; + dl.innerHTML = (ps || []) + .map((p) => ``) + .join(''); + }).catch(() => {}); + document.getElementById('run-browse')?.addEventListener('click', () => { + openDirPicker((picked) => { + const el = document.getElementById('run-cwd'); + if (el) el.value = picked; + }); + }); }; // ---------- Task detail ---------- @@ -935,7 +995,7 @@ const openTask = (taskId) => { currentView = 'task'; document.querySelectorAll('.nav-item').forEach((b) => b.classList.remove('active')); app.innerHTML = page(` - ${pageHeader('Task · ' + taskId, 'Live stream from the interactive host.', ` + ${pageHeader('Conversation · ' + taskId, 'Live stream from the interactive host. Type below to send a follow-up.', ` `)} @@ -949,18 +1009,240 @@ const openTask = (taskId) => {

+
+
+

Follow-up

+ +
+
+ +
+ + + + +
+
+
+ + 0 prior turns +
+ +
+
+
`); + // Pre-populate the project path from /api/status so the user can see where + // tasks will run and override before sending. Without this the server-side + // cwd is whatever the `forge ui start` process inherited and the user has + // no visibility, which led to paths being joined against the wrong root + // (e.g. "~/Forge-Agentic-Coding-CLI/src/..." resolved under the real cwd). + api('/api/status').then((s) => { + const el = document.getElementById('followup-cwd'); + if (el && s?.cwd) el.value = s.cwd; + }).catch(() => {}); + + // Populate the datalist with recent/known projects. Users can either type + // to autocomplete, pick from the dropdown, or click Browse for a full + // server-side directory picker. + api('/api/projects').then((ps) => { + const dl = document.getElementById('followup-cwd-list'); + if (!dl) return; + dl.innerHTML = (ps || []) + .map((p) => ``) + .join(''); + }).catch(() => {}); + + // Browse modal — walks directories under $HOME and lets the user click a + // folder to set it as the project. Server enforces the $HOME containment + // rule so the picker can't be used as a system enumerator. + document.getElementById('followup-browse')?.addEventListener('click', () => { + openDirPicker((picked) => { + const el = document.getElementById('followup-cwd'); + if (el) el.value = picked; + }); + }); const stream = document.getElementById('task-stream'); const planSec = document.getElementById('task-plan'); let currentPlanPromptId = null; + // Conversation state: each task under this view contributes one turn. + // We compose them on submit into a `description` that mirrors what + // `composeDescription` in the REPL does server-side — the orchestrator + // treats it as ground truth for follow-ups like "what did we talk + // about?". `activeTaskId` tracks which task's WS we're currently + // subscribed to (swaps on each follow-up). + const convoTurns = []; + let activeTaskId = taskId; + const composeDescription = (newInput) => { + const prior = convoTurns.filter((t) => t.summary); + if (!prior.length) return newInput; + const lines = [ + '## Current request', + newInput, + '', + '## Conversation so far (earliest → latest)', + ]; + prior.slice(-8).forEach((t, i) => { + lines.push(`${i + 1}. user: ${t.input.replace(/\s+/g, ' ').slice(0, 240)}`); + lines.push(` assistant: ${t.success === false ? 'FAILED' : 'OK'} — ${(t.summary || '').replace(/\s+/g, ' ').slice(0, 240)}`); + }); + lines.push('', '## Notes', '- "Current request" is the user\'s latest message; prior turns are context only.'); + return lines.join('\n'); + }; + const updateTurnCount = () => { + const el = document.getElementById('followup-turns'); + if (el) el.textContent = `${convoTurns.filter((t) => t.summary).length} prior turn${convoTurns.filter((t) => t.summary).length === 1 ? '' : 's'}`; + }; + + // Stream flows earliest → latest top-to-bottom (chat-style). Each line gets + // a right-aligned local timestamp. Auto-scroll sticks to the bottom as long + // as the user hasn't deliberately scrolled up to read history. + const isAtBottom = () => { + const gap = stream.scrollHeight - stream.scrollTop - stream.clientHeight; + return gap < 48; + }; + const scrollToBottom = () => { + stream.scrollTop = stream.scrollHeight; + }; + const tsNow = () => { + const d = new Date(); + const hh = String(d.getHours()).padStart(2, '0'); + const mm = String(d.getMinutes()).padStart(2, '0'); + const ss = String(d.getSeconds()).padStart(2, '0'); + return `${hh}:${mm}:${ss}`; + }; const push = (line) => { const el = document.createElement('div'); el.className = line.cls; - el.innerHTML = line.html; - stream.insertBefore(el, stream.firstChild); - while (stream.childElementCount > 300) stream.lastChild?.remove(); + // Wrap message in a flex row so the right-aligned timestamp never + // collides with the body. `.log-body` is a div (not span) because + // callers pass block-level markdown such as `
` and `
` — a span around block content is invalid HTML and + // triggered subtle inline-baseline artifacts between code lines in + // some browsers. + el.innerHTML = + `
${line.html}
` + + `${tsNow()}`; + const stick = isAtBottom(); + stream.appendChild(el); + while (stream.childElementCount > 300) stream.firstChild?.remove(); + if (stick) scrollToBottom(); + return el; + }; + + // Markdown renderer reused by the chat view. Falls back to plain-text + // escaping when the markdown helper script hasn't loaded for some reason. + const md = (s) => + window.forgeMd && window.forgeMd.mdToHtml ? window.forgeMd.mdToHtml(s || '') : esc(s || ''); + + // Live "working" spinner lives between STARTED and DONE. REPL/CLI already + // show an ora spinner during this phase; without something equivalent in + // the UI the task just sits on "STARTED" for ~2 minutes. The spinner goes + // away the moment streaming starts, DONE/FAILED/ERROR arrives, or the + // task is cancelled. + let workingEl = null; + const showWorking = (phase) => { + if (workingEl) { + const ph = workingEl.querySelector('.working-phase'); + if (ph) ph.textContent = phase; + return; + } + workingEl = document.createElement('div'); + workingEl.className = 'log-line log-line-working'; + workingEl.innerHTML = + `WORKING · ${esc(phase)}` + + `${tsNow()}`; + const stick = isAtBottom(); + stream.appendChild(workingEl); + if (stick) scrollToBottom(); + // Tiny inline spinner animation — swap the braille glyph every 80ms. Kept + // in-element so one interval per spawned spinner, cleared on hide(). + const frames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']; + let i = 0; + const sp = workingEl.querySelector('.working-spinner'); + workingEl._timer = setInterval(() => { + if (!workingEl || !sp) return; + i = (i + 1) % frames.length; + sp.textContent = frames[i]; + }, 80); + }; + const hideWorking = () => { + if (!workingEl) return; + if (workingEl._timer) clearInterval(workingEl._timer); + workingEl.remove(); + workingEl = null; + }; + + // Streaming model output: the REPL/CLI render accumulated markdown live + // as tokens arrive (headings, fences, lists all form up in place). The UI + // now does the same — each delta appends to a per-stream buffer, and a + // requestAnimationFrame-coalesced re-render pipes the buffer through the + // same `mdToHtml` the rest of the UI uses. One rAF tick per frame caps + // work at ~60 re-renders/sec even if tokens arrive faster. + let deltaEl = null; + let deltaKey = ''; + let deltaBuf = ''; + let deltaRaf = 0; + // Track whether any delta was rendered during this task. If yes, the + // final `task.result` frame shouldn't repeat the full summary as a DONE + // block — the user already read it live. + let deltaStreamed = false; + const flushDeltaRender = () => { + deltaRaf = 0; + if (!deltaEl) return; + const span = deltaEl.querySelector('.stream-text'); + if (!span) return; + const stick = isAtBottom(); + // Render the whole accumulator so mid-stream markdown (headings, + // partially-closed fences, list sequences) reflows cleanly. + span.innerHTML = md(deltaBuf); + if (stick) scrollToBottom(); + }; + const appendDelta = (msg) => { + const key = `${msg.provider || ''}/${msg.model || ''}/${msg.role || ''}`; + if (msg.done) { + // Final render pass so the closing tokens (last list items, closing + // code fence, etc.) are reflected even if the last rAF tick hadn't + // fired yet. + if (deltaRaf) cancelAnimationFrame(deltaRaf); + flushDeltaRender(); + if (deltaEl) deltaEl.classList.add('log-line-done'); + deltaEl = null; + deltaKey = ''; + deltaBuf = ''; + return; + } + if (!msg.text) return; + if (!deltaEl || deltaKey !== key) { + deltaEl = document.createElement('div'); + deltaEl.className = 'log-line log-line-stream'; + // `.stream-text` needs to be block-level because the streamed + // markdown may include `
` / `
    ` / `` — wrapping a block + // like `
    ` in an inline `` made browsers render subtle
    +      // inline-baseline artifacts between code lines (the "weird lines"
    +      // users saw). Use a div container for the body with an inline head
    +      // row for the model label; the markdown target is its own div.
    +      deltaEl.innerHTML =
    +        `
    ${esc(msg.model || 'model')}
    ` + + `${tsNow()}`; + const stick = isAtBottom(); + stream.appendChild(deltaEl); + while (stream.childElementCount > 300) stream.firstChild?.remove(); + if (stick) scrollToBottom(); + deltaKey = key; + deltaBuf = ''; + } + deltaBuf += msg.text; + deltaStreamed = true; + // Coalesce re-renders. Multiple deltas within the same frame collapse + // into one innerHTML assignment so fast providers (local Ollama can + // emit 100+ tokens/sec) don't thrash the layout. + if (!deltaRaf) deltaRaf = requestAnimationFrame(flushDeltaRender); }; const renderPlan = (plan) => { @@ -987,12 +1269,23 @@ const openTask = (taskId) => {
    ${steps}
    +
`; planSec.querySelectorAll('[data-plan-action]').forEach((b) => b.addEventListener('click', async () => { if (!currentPlanPromptId) return; + if (b.dataset.planAction === 'edit') { + // Show the inline JSON editor. When the user saves, we'll + // receive a fresh `plan_edit` prompt from the server (because + // the loop re-calls confirmPlan after editPlan returns) — this + // click itself only needs to resolve the CURRENT approval + // prompt with value='edit'. + await apiPost('/api/prompts/respond', { promptId: currentPlanPromptId, value: 'edit' }); + currentPlanPromptId = null; + return; + } await apiPost('/api/prompts/respond', { promptId: currentPlanPromptId, value: b.dataset.planAction }); planSec.hidden = true; planSec.innerHTML = ''; @@ -1003,48 +1296,219 @@ const openTask = (taskId) => { app.querySelector('[data-action="back"]').addEventListener('click', () => setView('active')); app.querySelector('[data-action="cancel"]').addEventListener('click', async () => { - try { await apiPost(`/api/tasks/${taskId}/cancel`); toast('cancel requested', 'warn'); } + try { await apiPost(`/api/tasks/${activeTaskId}/cancel`); toast('cancel requested', 'warn'); } catch (e) { toast(String(e), 'err'); } }); - if (taskConnections.has(taskId)) { try { taskConnections.get(taskId).close(); } catch {} } - const url = `${location.protocol === 'https:' ? 'wss' : 'ws'}://${location.host}/ws/tasks/${taskId}`; - const ws = new WebSocket(url); - taskConnections.set(taskId, ws); - const meta = document.getElementById('task-meta'); - ws.onopen = () => { meta.textContent = 'live'; }; - ws.onclose = () => { meta.textContent = 'disconnected'; }; - ws.onmessage = (e) => { - let msg; - try { msg = JSON.parse(e.data); } catch { return; } - if (msg.kind === 'event') { - const ev = msg.event; - push({ - cls: `log-line ${ev.severity ?? 'info'}`, - html: `${esc(ev.type)} · ${esc(ev.message)}`, - }); - } else if (msg.kind === 'prompt') { - if (msg.promptType === 'plan_approval') { - currentPlanPromptId = msg.promptId; - renderPlan(msg.plan); - } else if (msg.promptType === 'permission') { - openPermissionModal(msg); - } else if (msg.promptType === 'user_input') { - openUserInputModal(msg); + + // Attach a WebSocket to a given taskId; swaps the listener when a + // follow-up turn spawns a new task. Returns the socket so we can close + // it when swapping again. + // Replay a historical task's saved detail into the stream. The WS server + // only streams live/active tasks, so when the user opens a completed task + // from the Tasks / Dashboard tables we need to hydrate the view from the + // persisted JSON instead of leaving it blank with "disconnected". Safe to + // call at any point — `push` is idempotent per event, and we guard with a + // flag so a brief race where the WS opens + detail arrives doesn't + // double-render. + let hydrated = false; + const hydrateHistorical = async (id) => { + if (hydrated) return; + try { + const t = await api(`/api/tasks/${id}`); + if (!t || hydrated) return; + hydrated = true; + meta.textContent = `historical · ${String(t.status || 'done')}`; + push({ cls: 'log-line', html: `PROMPT · ${esc((t.title || '').slice(0, 200))}` }); + if (t.plan) renderPlan(t.plan); + const ok = t.result?.success !== false; + const summary = t.result?.summary || ''; + if (summary) { + const multiline = summary.includes('\n'); + push({ + cls: `log-line ${ok ? '' : 'error'}`, + html: multiline + ? `${ok ? 'DONE' : 'FAILED'}
${md(summary)}
` + : `${ok ? 'DONE' : 'FAILED'} · ${md(summary)}`, + }); + } + for (const f of (t.result?.filesChanged || []).slice(0, 12)) { + push({ cls: 'log-line', html: `FILE · ${esc(f)}` }); + } + } catch (e) { + // Task not found or error — show a one-line note so the page isn't + // silently empty. + push({ cls: 'log-line warning', html: `HISTORY · unable to load task detail (${esc(String(e).slice(0, 120))})` }); + } + }; + + const attachWs = (id) => { + if (taskConnections.has(id)) { try { taskConnections.get(id).close(); } catch {} } + const url = `${location.protocol === 'https:' ? 'wss' : 'ws'}://${location.host}/ws/tasks/${id}`; + const ws = new WebSocket(url); + taskConnections.set(id, ws); + // If the WS hasn't opened within 800ms, assume this is a historical + // task (the server closes with 1008 'unknown task' almost instantly in + // that case) and hydrate from persisted state. 800ms is long enough + // that live tasks reliably OPEN first, short enough that the UX feels + // instant. + let openedOrErrored = false; + setTimeout(() => { if (!openedOrErrored && ws.readyState !== ws.OPEN) hydrateHistorical(id); }, 800); + ws.onopen = () => { openedOrErrored = true; meta.textContent = 'live · ' + id.slice(0, 8); }; + ws.onclose = () => { + openedOrErrored = true; + // Close with code 1008 (policy violation) is how the server says + // "unknown task" for historical rows — treat that as a cue to load + // the detail view. For tasks that opened live and then closed, the + // stream already has content; leave the meta as "disconnected". + if (!hydrated) { hydrateHistorical(id); } + else { meta.textContent = 'disconnected'; } + }; + ws.onmessage = (e) => { + let msg; + try { msg = JSON.parse(e.data); } catch { return; } + if (msg.kind === 'event') { + const ev = msg.event; + push({ + cls: `log-line ${ev.severity ?? 'info'}`, + html: `${esc(ev.type)} · ${esc(ev.message)}`, + }); + // Keep the "working" spinner label in sync with the latest phase + // event so the user sees what Forge is doing (classify / plan / + // step_001 reading src/foo…) instead of a static "classifying + // request" label for the full run. + if (ev?.message) showWorking(ev.message.slice(0, 80)); + } else if (msg.kind === 'prompt') { + if (msg.promptType === 'plan_approval') { + currentPlanPromptId = msg.promptId; + renderPlan(msg.plan); + } else if (msg.promptType === 'plan_edit') { + openPlanEditModal(msg.promptId, msg.plan); + } else if (msg.promptType === 'permission') { + openPermissionModal(msg); + } else if (msg.promptType === 'user_input') { + openUserInputModal(msg); + } + } else if (msg.kind === 'task.started') { + push({ cls: 'log-line', html: `STARTED · ${esc(msg.prompt.slice(0, 120))}` }); + showWorking('classifying request…'); + } else if (msg.kind === 'task.result') { + hideWorking(); + const ok = msg.result?.success; + const summary = msg.result?.summary ?? ''; + if (deltaStreamed) { + // The narrator / conversation answer already streamed into the + // view as model deltas — rendering the same text again as a + // DONE block is pure duplication. Emit a single status line + // instead so the user sees the task finished. + push({ + cls: `log-line ${ok ? '' : 'error'}`, + html: `${ok ? 'DONE' : 'FAILED'}${ok ? '' : ` · ${md(summary)}`}`, + }); + } else { + // No live stream happened (planner-only task, executor used + // jsonMode, etc.) — render the full summary so the user has + // something to read. + const isMultiline = summary.includes('\n'); + if (isMultiline) { + push({ + cls: `log-line ${ok ? '' : 'error'}`, + html: `${ok ? 'DONE' : 'FAILED'}
${md(summary)}
`, + }); + } else { + push({ + cls: `log-line ${ok ? '' : 'error'}`, + html: `${ok ? 'DONE' : 'FAILED'} · ${md(summary)}`, + }); + } + } + toast(ok ? 'Task complete' : 'Task failed', ok ? 'ok' : 'err'); + // Reset for the next task in this tab. + deltaStreamed = false; + // Record the summary against the most recently-sent turn so future + // follow-ups can thread it into the composed description. + const pending = convoTurns[convoTurns.length - 1]; + if (pending && pending.taskId === id && !pending.summary) { + pending.summary = summary; + pending.success = ok; + updateTurnCount(); + } + // Re-enable the follow-up input once the task finishes. + const sendBtn = document.getElementById('followup-send'); + const statusEl = document.getElementById('followup-status'); + if (sendBtn) sendBtn.disabled = false; + if (statusEl) statusEl.textContent = 'ready'; + } else if (msg.kind === 'task.error') { + hideWorking(); + push({ cls: 'log-line error', html: `ERROR · ${esc(msg.error)}` }); + const sendBtn = document.getElementById('followup-send'); + const statusEl = document.getElementById('followup-status'); + if (sendBtn) sendBtn.disabled = false; + if (statusEl) statusEl.textContent = 'ready'; + } else if (msg.kind === 'task.cancel_requested') { + hideWorking(); + push({ cls: 'log-line warning', html: `CANCEL · requested` }); + } else if (msg.kind === 'model.delta') { + // First token arriving means the model is speaking — the generic + // "working" spinner has served its purpose; the streamed text is + // the new source of motion. + if (msg.text) hideWorking(); + appendDelta(msg); } - } else if (msg.kind === 'task.started') { - push({ cls: 'log-line', html: `STARTED · ${esc(msg.prompt.slice(0, 120))}` }); - } else if (msg.kind === 'task.result') { - const ok = msg.result?.success; - push({ cls: `log-line ${ok ? '' : 'error'}`, html: `${ok ? 'DONE' : 'FAILED'} · ${esc(msg.result?.summary ?? '')}` }); - toast(ok ? 'Task complete' : 'Task failed', ok ? 'ok' : 'err'); - } else if (msg.kind === 'task.error') { - push({ cls: 'log-line error', html: `ERROR · ${esc(msg.error)}` }); - } else if (msg.kind === 'task.cancel_requested') { - push({ cls: 'log-line warning', html: `CANCEL · requested` }); + }; + return ws; + }; + + attachWs(taskId); + + // Follow-up input — typed message spawns a new task with prior-turns + // context threaded in via `description`. The server's orchestrator uses + // that for the conversation fast-path; for non-conversational intents + // it's handed to the planner as context. + const input = document.getElementById('followup-input'); + const sendBtn = document.getElementById('followup-send'); + const autoCk = document.getElementById('followup-auto'); + const cwdInput = document.getElementById('followup-cwd'); + const statusEl = document.getElementById('followup-status'); + const submitFollowup = async () => { + const text = (input?.value || '').trim(); + if (!text) return; + sendBtn.disabled = true; + statusEl.textContent = 'sending…'; + // Push an echo of the user's turn into the stream so the conversation + // reads linearly. + push({ cls: 'log-line', html: `YOU · ${esc(text)}` }); + const description = composeDescription(text); + const cwd = (cwdInput?.value || '').trim() || undefined; + try { + const body = await apiPost('/api/tasks/run', { + prompt: text, + autoApprove: !!autoCk?.checked, + description, + cwd, + }); + const newTaskId = body.taskId; + convoTurns.push({ taskId: newTaskId, input: text, summary: null }); + updateTurnCount(); + activeTaskId = newTaskId; + statusEl.textContent = 'running · ' + newTaskId.slice(0, 8); + input.value = ''; + attachWs(newTaskId); + } catch (e) { + toast(String(e), 'err'); + sendBtn.disabled = false; + statusEl.textContent = 'error'; } }; + sendBtn?.addEventListener('click', submitFollowup); + input?.addEventListener('keydown', (e) => { + if (e.key === 'Enter' && !e.shiftKey) { + e.preventDefault(); + void submitFollowup(); + } + }); + input?.focus(); }; // ---------- Active / tasks ---------- @@ -1105,7 +1569,7 @@ views.tasks = async () => { title updated - ${rows.map((t) => ` + ${rows.map((t) => ` ${esc(t.id)} ${badge(t.status)} ${esc(t.mode)} @@ -1116,6 +1580,9 @@ views.tasks = async () => { `).join('')}
` : `
No matching tasks
`; + body.querySelectorAll('[data-open-task]').forEach((el) => + el.addEventListener('click', () => openTask(el.dataset.openTask)), + ); }; const input = document.getElementById('q'); let h; @@ -1414,7 +1881,16 @@ views.cost = async () => {
Calls
${esc(t.calls)}
all providers
Tokens
${Number(t.tokens).toLocaleString()}
input + output
-
Spend
$${Number(t.usd).toFixed(4)}
estimated USD
+
Spend
${(() => { + const usd = Number(t.usd); + const toks = Number(t.tokens); + // Cost view: if only local providers were used, usd is 0 by design + // (Ollama, llama.cpp are free). Surface that explicitly instead of + // a deceptive "$0.0000 estimated USD" line. + if (usd > 0) return `
$${usd.toFixed(4)}
estimated USD
`; + if (toks > 0) return `
local · free
no billable providers
`; + return `
$0.0000
no calls yet
`; + })()}

Recent calls

${rows.length @@ -1614,6 +2090,64 @@ const mountOverlay = (innerHTML, { closeOnClickOutside = true, onClose, onKey } return { overlay, close }; }; +// Inline plan editor. The user clicked "Edit…" on a plan approval; the +// server's loop re-called `host.editPlan(plan)` which surfaces this +// prompt. We show the plan JSON in a textarea; on Save we POST the new +// plan back as the response, the loop installs it, and re-surfaces a +// fresh plan_approval prompt so the user can approve/reject/edit-again. +const openPlanEditModal = (promptId, plan) => { + const initial = JSON.stringify(plan, null, 2); + const { overlay, close } = mountOverlay(` + `, { closeOnClickOutside: false }); + const ta = overlay.querySelector('#plan-edit-text'); + const errEl = overlay.querySelector('#plan-edit-err'); + ta.value = initial; + ta.focus(); + overlay.querySelector('[data-pe="cancel"]').addEventListener('click', async () => { + // Cancel leaves the plan unchanged; responding with the original + // plan satisfies the edit contract without mutating anything. + await apiPost('/api/prompts/respond', { promptId, value: plan }); + close(); + }); + overlay.querySelector('[data-pe="save"]').addEventListener('click', async () => { + let parsed; + try { + parsed = JSON.parse(ta.value); + } catch (e) { + errEl.textContent = 'Invalid JSON: ' + String(e).slice(0, 160); + return; + } + if (!parsed || !Array.isArray(parsed.steps)) { + errEl.textContent = 'Plan needs a `steps` array.'; + return; + } + try { + await apiPost('/api/prompts/respond', { promptId, value: parsed }); + close(); + } catch (e) { + errEl.textContent = 'Save failed: ' + String(e).slice(0, 160); + } + }); +}; + const openPermissionModal = (msg) => { const { overlay, close } = mountOverlay(`