diff --git a/.gitignore b/.gitignore index 5fa546b..9753a7a 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,12 @@ coverage/ *.swp *.swo *~ +TEMP/ +temp/ +tmp/ + +# Screenshots +*.png # IDE .vscode/ @@ -60,10 +66,8 @@ cli/.pytest_cache/ cli/.ruff_cache/ .codeforge/data/ -# Dashboard-specific -dashboard/.svelte-kit/ -dashboard/build/ -dashboard/mockups/ +# Dashboard (deprecated — migrating to CodeDirective, a separate repo) +dashboard/ # Docs-specific docs/.astro/ @@ -74,3 +78,4 @@ dev-debug.log npm-debug.log* yarn-debug.log* yarn-error.log* +.research/ diff --git a/CLAUDE.md b/CLAUDE.md index 598f664..94bc371 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,6 +1,6 @@ # CodeForge Monorepo -This repository contains four packages. Each package manages its own dependencies independently. +This repository contains three packages. Each package manages its own dependencies independently. ## Packages @@ -9,7 +9,6 @@ This repository contains four packages. Each package manages its own dependencie | `container/` | Node.js | npm | | `cli/` | Bun | bun | | `docs/` | Node.js | npm | -| `dashboard/` | Bun | npm (frontend) / bun (server) | ## Development Rules @@ -27,8 +26,6 @@ Each package has its own `CLAUDE.md` with package-specific development rules: - [`container/CLAUDE.md`](container/CLAUDE.md) — changelog, documentation, and configuration rules for the devcontainer package - `cli/` — Bun/TypeScript CLI; run `bun test` for tests - `docs/` — Astro/Starlight site; run `npm run build` to verify -- [`dashboard/CLAUDE.md`](dashboard/CLAUDE.md) | [`dashboard/README.md`](dashboard/README.md) — Svelte 5 SPA + Bun backend for session analytics - ### Cross-Package Changes When a change spans multiple packages, make the changes in a single branch and PR. @@ -41,21 +38,3 @@ Run tests for each affected package before committing: - **Container**: `cd container && npm test` - **CLI**: `cd cli && bun test` - **Docs**: `cd docs && npm run build` -- **Dashboard**: `cd dashboard && bun test` - -### Dashboard vs CLI - -The `dashboard/` and `cli/` packages serve different audiences: - -- **CLI** (`codeforge` command) — terminal-first, text/JSON output, scriptable, - runs inside or outside the container. Features: session search, task search/list/show, - plan search, plugin management, index/config commands. -- **Dashboard** (Svelte 5 SPA) — visual analytics, charts, expandable detail views, - real-time SSE updates. Features: session browsing with conversation replay, - task/plan/agent/memory views, project analytics, cost tracking. - -When adding a new data view: -- If it's browsable/visual (tables, charts, detail drill-down) → dashboard -- If it's scriptable/automatable (piped output, filters, JSON) → CLI -- If it's both → implement in both, but don't import CLI as a dashboard dependency. - Fork patterns instead. diff --git a/README.md b/README.md index 791e353..d385080 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,8 @@ Monorepo for CodeForge — an AI-powered development environment for Claude Code | Package | Description | Version | |---------|-------------|---------| -| [`container/`](container/) | CodeForge DevContainer (`@coredirective/cf-container` on npm) | 2.1.0 | -| [`cli/`](cli/) | CodeForge CLI (`codeforge-cli`) | 0.1.0 | -| [`dashboard/`](dashboard/) | Session analytics dashboard (Svelte 5 SPA + Bun backend) | — | +| [`container/`](container/) | CodeForge DevContainer (`@coredirective/cf-container` on npm) | 2.2.0 | +| [`cli/`](cli/) | CodeForge CLI (`codeforge-cli`) | 0.2.0 | | [`docs/`](docs/) | Documentation site ([codeforge.core-directive.com](https://codeforge.core-directive.com)) | — | ## Quick Start @@ -31,9 +30,6 @@ cd container && npm test # CLI (Bun) cd cli && bun test -# Dashboard (Bun) -cd dashboard && bun test - # Docs (npm) cd docs && npm run build ``` diff --git a/cli/CHANGELOG.md b/cli/CHANGELOG.md index e9eb2d4..5aa4617 100644 --- a/cli/CHANGELOG.md +++ b/cli/CHANGELOG.md @@ -1,5 +1,22 @@ # CodeForge CLI Changelog +## v0.2.0 — 2026-04-16 + +### New Command + +- **`codeforge session tokens`** — Analyze thinking token usage across Claude Code sessions + - Shows exact billed output tokens (from API) and visible content breakdown (thinking, text, tool chars) + - **Thinking Density** table: % of turns with thinking, avg chars per thinking turn, session intensity breakdown (none/light/medium/heavy) + - **Per-session breakdown**: turns with thinking, density %, avg chars per thinking turn + - Filter by `--project`, `--model`, `--since`, `--until` + - Output formats: text (colorized tables) and JSON (`--format json`) + +### Purpose + +Benchmarks thinking token costs to compare extended thinking behavior across models (e.g., Opus 4.5 vs 4.6). Since `output_tokens` is a combined total (thinking + text + tool_use) with no separate `thinking_tokens` field, this command provides visibility into thinking patterns through empirical measurement of visible content. + +--- + ## v0.1.0 — 2026-03-14 (Experimental) ### Package Rename diff --git a/cli/package.json b/cli/package.json index 8840310..122180f 100644 --- a/cli/package.json +++ b/cli/package.json @@ -3,7 +3,7 @@ "publishConfig": { "access": "public" }, - "version": "0.1.0", + "version": "0.2.0", "description": "CLI for CodeForge development workflows", "keywords": [ "codeforge", diff --git a/cli/src/commands/session/tokens.ts b/cli/src/commands/session/tokens.ts new file mode 100644 index 0000000..1b2dde3 --- /dev/null +++ b/cli/src/commands/session/tokens.ts @@ -0,0 +1,544 @@ +import chalk from "chalk"; +import type { Command } from "commander"; +import { basename } from "path"; +import { readLines } from "../../search/engine.js"; +import { discoverSessionFiles } from "../../utils/glob.js"; +import { parseRelativeTime, parseTime } from "../../utils/time.js"; + +interface TokensCommandOptions { + project?: string; + since?: string; + until?: string; + model?: string; + format: string; + color?: boolean; +} + +interface AssistantMessage { + type: "assistant"; + timestamp: string; + sessionId: string; + message: { + role: "assistant"; + model?: string; + content: Array<{ type: string; thinking?: string; text?: string }>; + usage?: { output_tokens?: number }; + }; +} + +export interface SessionTokenStats { + sessionId: string; + filePath: string; + project?: string; + model: string; + isSubagent: boolean; + parentProject?: string; + turns: number; + turnsWithThinking: number; // Turns that have thinking blocks + outputTokens: number; // Exact billed tokens from API + thinkingChars: number; // Visible thinking content + textChars: number; // Visible text content + toolChars: number; // Tool use JSON content +} + +export interface ModelSummary { + model: string; + sessions: number; + turns: number; + turnsWithThinking: number; + outputTokens: number; + thinkingChars: number; + textChars: number; + toolChars: number; + // Derived metrics + thinkingDensity: number; // % of turns with thinking + avgThinkingWhenPresent: number; // Avg chars per thinking turn + // Session breakdown by intensity + sessionsNoThinking: number; + sessionsLight: number; // <5K chars + sessionsMedium: number; // 5K-50K chars + sessionsHeavy: number; // 50K+ chars +} + +export interface TokenAnalysisResult { + mainSessions: SessionTokenStats[]; + subagentSessions: SessionTokenStats[]; + modelSummaries: ModelSummary[]; + totalSessions: number; + totalTurns: number; + totalOutputTokens: number; +} + +function extractProjectFromPath(filePath: string): string | undefined { + // Path format: ~/.claude/projects/{project-slug}/{session}.jsonl + // or ~/.claude/projects/{project-slug}/subagents/{session}.jsonl + const parts = filePath.split("/"); + const projectsIdx = parts.indexOf("projects"); + if (projectsIdx >= 0 && projectsIdx + 1 < parts.length) { + return parts[projectsIdx + 1]; + } + return undefined; +} + +function isSubagentPath(filePath: string): boolean { + return filePath.includes("/subagents/"); +} + +function getParentProject(filePath: string): string | undefined { + if (!isSubagentPath(filePath)) return undefined; + // subagents folder is inside the project folder + return extractProjectFromPath(filePath); +} + +async function analyzeSessionTokens( + filePath: string, +): Promise { + const sessionId = basename(filePath, ".jsonl"); + const project = extractProjectFromPath(filePath); + const isSubagent = isSubagentPath(filePath); + const parentProject = getParentProject(filePath); + + let model = "unknown"; + let turns = 0; + let turnsWithThinking = 0; + let outputTokens = 0; + let thinkingChars = 0; + let textChars = 0; + let toolChars = 0; + + try { + for await (const line of readLines(filePath)) { + let raw: Record; + try { + raw = JSON.parse(line) as Record; + } catch { + continue; + } + + if (raw.type !== "assistant") continue; + + const msg = raw as unknown as AssistantMessage; + const message = msg.message; + if (!message) continue; + + // Extract model (use first found) + if (message.model && model === "unknown") { + model = normalizeModelName(message.model); + } + + // Extract usage + const usage = message.usage; + if (usage?.output_tokens) { + turns++; + outputTokens += usage.output_tokens; + } + + // Count visible content by type + let turnHasThinking = false; + if (Array.isArray(message.content)) { + for (const block of message.content) { + if (block.type === "thinking" && typeof block.thinking === "string") { + thinkingChars += block.thinking.length; + if (block.thinking.length > 0) { + turnHasThinking = true; + } + } + if (block.type === "text" && typeof block.text === "string") { + textChars += block.text.length; + } + if (block.type === "tool_use") { + const toolBlock = block as { input?: unknown }; + if (toolBlock.input) { + toolChars += JSON.stringify(toolBlock.input).length; + } + } + } + } + if (turnHasThinking) { + turnsWithThinking++; + } + } + } catch { + return null; + } + + if (turns === 0) return null; + + return { + sessionId, + filePath, + project, + model, + isSubagent, + parentProject, + turns, + turnsWithThinking, + outputTokens, + thinkingChars, + textChars, + toolChars, + }; +} + +function normalizeModelName(model: string): string { + // Extract the base model name for comparison + // claude-3-5-sonnet-20241022 -> sonnet-3-5 + // claude-opus-4-5-20250101 -> opus-4-5 + // claude-sonnet-4-6-20250201 -> sonnet-4-6 + + const match = model.match( + /claude[-_]?(opus|sonnet|haiku)[-_]?(\d+)[-_]?(\d+)?/i, + ); + if (match) { + const [, variant, major, minor] = match; + return minor ? `${variant}-${major}-${minor}` : `${variant}-${major}`; + } + + // Fallback: try claude-3-5-sonnet pattern + const altMatch = model.match( + /claude[-_](\d+)[-_](\d+)?[-_]?(opus|sonnet|haiku)/i, + ); + if (altMatch) { + const [, major, minor, variant] = altMatch; + return minor ? `${variant}-${major}-${minor}` : `${variant}-${major}`; + } + + // Return shortened version + return model.replace(/[-_]\d{8}$/, "").slice(0, 20); +} + +function computeModelSummaries(sessions: SessionTokenStats[]): ModelSummary[] { + const byModel = new Map< + string, + { + sessions: number; + turns: number; + turnsWithThinking: number; + outputTokens: number; + thinkingChars: number; + textChars: number; + toolChars: number; + sessionsNoThinking: number; + sessionsLight: number; + sessionsMedium: number; + sessionsHeavy: number; + } + >(); + + for (const s of sessions) { + const existing = byModel.get(s.model) ?? { + sessions: 0, + turns: 0, + turnsWithThinking: 0, + outputTokens: 0, + thinkingChars: 0, + textChars: 0, + toolChars: 0, + sessionsNoThinking: 0, + sessionsLight: 0, + sessionsMedium: 0, + sessionsHeavy: 0, + }; + existing.sessions++; + existing.turns += s.turns; + existing.turnsWithThinking += s.turnsWithThinking; + existing.outputTokens += s.outputTokens; + existing.thinkingChars += s.thinkingChars; + existing.textChars += s.textChars; + existing.toolChars += s.toolChars; + + // Categorize session by thinking intensity + if (s.thinkingChars === 0) { + existing.sessionsNoThinking++; + } else if (s.thinkingChars < 5000) { + existing.sessionsLight++; + } else if (s.thinkingChars < 50000) { + existing.sessionsMedium++; + } else { + existing.sessionsHeavy++; + } + + byModel.set(s.model, existing); + } + + const summaries: ModelSummary[] = []; + for (const [model, data] of byModel) { + const thinkingDensity = + data.turns > 0 ? data.turnsWithThinking / data.turns : 0; + const avgThinkingWhenPresent = + data.turnsWithThinking > 0 + ? Math.round(data.thinkingChars / data.turnsWithThinking) + : 0; + + summaries.push({ + model, + sessions: data.sessions, + turns: data.turns, + turnsWithThinking: data.turnsWithThinking, + outputTokens: data.outputTokens, + thinkingChars: data.thinkingChars, + textChars: data.textChars, + toolChars: data.toolChars, + thinkingDensity, + avgThinkingWhenPresent, + sessionsNoThinking: data.sessionsNoThinking, + sessionsLight: data.sessionsLight, + sessionsMedium: data.sessionsMedium, + sessionsHeavy: data.sessionsHeavy, + }); + } + + // Sort by total output tokens descending + summaries.sort((a, b) => b.outputTokens - a.outputTokens); + return summaries; +} + +async function analyzeTokens(options: { + project?: string; + since?: Date; + until?: Date; + model?: string; +}): Promise { + const files = await discoverSessionFiles(); + + const mainSessions: SessionTokenStats[] = []; + const subagentSessions: SessionTokenStats[] = []; + + for (const filePath of files) { + // Filter by project + if (options.project) { + const project = extractProjectFromPath(filePath); + if (!project?.includes(options.project)) continue; + } + + const stats = await analyzeSessionTokens(filePath); + if (!stats) continue; + + // Filter by model + if (options.model && !stats.model.includes(options.model)) continue; + + if (stats.isSubagent) { + subagentSessions.push(stats); + } else { + mainSessions.push(stats); + } + } + + // Sort by output tokens descending + mainSessions.sort((a, b) => b.outputTokens - a.outputTokens); + subagentSessions.sort((a, b) => b.outputTokens - a.outputTokens); + + const allSessions = [...mainSessions, ...subagentSessions]; + const modelSummaries = computeModelSummaries(allSessions); + + return { + mainSessions, + subagentSessions, + modelSummaries, + totalSessions: allSessions.length, + totalTurns: allSessions.reduce((sum, s) => sum + s.turns, 0), + totalOutputTokens: allSessions.reduce((sum, s) => sum + s.outputTokens, 0), + }; +} + +function formatNumber(n: number): string { + if (n >= 1000000) return `${(n / 1000000).toFixed(1)}M`; + if (n >= 1000) return `${(n / 1000).toFixed(1)}K`; + return n.toString(); +} + +function formatTokensText( + result: TokenAnalysisResult, + options?: { noColor?: boolean }, +): string { + if (options?.noColor) { + chalk.level = 0; + } + + const lines: string[] = []; + + lines.push(chalk.bold("Session Token Analysis")); + lines.push(chalk.dim("═".repeat(90))); + lines.push(""); + lines.push( + chalk.dim( + "Output = billed tokens (exact). Think/Text/Tool = visible chars by type.", + ), + ); + lines.push(""); + + // Model Comparison (primary view — most useful for comparing models) + if (result.modelSummaries.length > 0) { + lines.push(chalk.bold("By Model")); + lines.push(chalk.dim("─".repeat(90))); + lines.push( + chalk.dim( + `${"Model".padEnd(15)} │ ${"Sessions".padStart(8)} │ ${"Turns".padStart(7)} │ ${"Output Tok".padStart(10)} │ ${"Think".padStart(8)} │ ${"Text".padStart(8)} │ ${"Tool".padStart(8)}`, + ), + ); + lines.push(chalk.dim("─".repeat(90))); + + for (const m of result.modelSummaries) { + const modelName = m.model.slice(0, 15); + lines.push( + `${chalk.bold(modelName.padEnd(15))} │ ${String(m.sessions).padStart(8)} │ ${String(m.turns).padStart(7)} │ ${formatNumber(m.outputTokens).padStart(10)} │ ${formatNumber(m.thinkingChars).padStart(8)} │ ${formatNumber(m.textChars).padStart(8)} │ ${formatNumber(m.toolChars).padStart(8)}`, + ); + } + lines.push(chalk.dim("─".repeat(90))); + lines.push(""); + + // Thinking Density table + lines.push(chalk.bold("Thinking Density")); + lines.push(chalk.dim("─".repeat(90))); + lines.push( + chalk.dim( + `${"Model".padEnd(15)} │ ${"Turns".padStart(7)} │ ${"W/Think".padStart(7)} │ ${"Density".padStart(7)} │ ${"Avg Chars".padStart(9)} │ ${"None".padStart(6)} │ ${"Light".padStart(6)} │ ${"Med".padStart(6)} │ ${"Heavy".padStart(6)}`, + ), + ); + lines.push(chalk.dim("─".repeat(90))); + + for (const m of result.modelSummaries) { + const modelName = m.model.slice(0, 15); + const density = `${Math.round(m.thinkingDensity * 100)}%`; + lines.push( + `${chalk.bold(modelName.padEnd(15))} │ ${String(m.turns).padStart(7)} │ ${String(m.turnsWithThinking).padStart(7)} │ ${density.padStart(7)} │ ${formatNumber(m.avgThinkingWhenPresent).padStart(9)} │ ${String(m.sessionsNoThinking).padStart(6)} │ ${String(m.sessionsLight).padStart(6)} │ ${String(m.sessionsMedium).padStart(6)} │ ${String(m.sessionsHeavy).padStart(6)}`, + ); + } + lines.push(chalk.dim("─".repeat(90))); + lines.push( + chalk.dim( + "Density = % turns with thinking. Avg Chars = avg per thinking turn. None/Light/Med/Heavy = session count by total thinking (<5K/<50K/50K+).", + ), + ); + lines.push(""); + } + + // Main Sessions Table + if (result.mainSessions.length > 0) { + lines.push(chalk.bold("Main Sessions")); + lines.push(chalk.dim("─".repeat(95))); + lines.push( + chalk.dim( + `${"Session".padEnd(24)} │ ${"Model".padEnd(12)} │ ${"Turns".padStart(5)} │ ${"W/Thnk".padStart(6)} │ ${"Density".padStart(7)} │ ${"Output".padStart(8)} │ ${"Think".padStart(8)} │ ${"Avg/Turn".padStart(8)}`, + ), + ); + lines.push(chalk.dim("─".repeat(95))); + + for (const s of result.mainSessions.slice(0, 15)) { + const sessionShort = s.sessionId.slice(0, 22); + const modelShort = s.model.slice(0, 12); + const density = + s.turns > 0 + ? `${Math.round((s.turnsWithThinking / s.turns) * 100)}%` + : "0%"; + const avgPerTurn = + s.turnsWithThinking > 0 + ? formatNumber(Math.round(s.thinkingChars / s.turnsWithThinking)) + : "—"; + lines.push( + `${sessionShort.padEnd(24)} │ ${chalk.cyan(modelShort.padEnd(12))} │ ${String(s.turns).padStart(5)} │ ${String(s.turnsWithThinking).padStart(6)} │ ${density.padStart(7)} │ ${formatNumber(s.outputTokens).padStart(8)} │ ${formatNumber(s.thinkingChars).padStart(8)} │ ${avgPerTurn.padStart(8)}`, + ); + } + + if (result.mainSessions.length > 15) { + lines.push( + chalk.dim(` ... and ${result.mainSessions.length - 15} more sessions`), + ); + } + lines.push(chalk.dim("─".repeat(95))); + lines.push(""); + } + + // Subagent Sessions Table + if (result.subagentSessions.length > 0) { + lines.push(chalk.bold("Subagent Sessions")); + lines.push(chalk.dim("─".repeat(95))); + lines.push( + chalk.dim( + `${"Agent ID".padEnd(24)} │ ${"Model".padEnd(12)} │ ${"Turns".padStart(5)} │ ${"W/Thnk".padStart(6)} │ ${"Density".padStart(7)} │ ${"Output".padStart(8)} │ ${"Think".padStart(8)} │ ${"Avg/Turn".padStart(8)}`, + ), + ); + lines.push(chalk.dim("─".repeat(95))); + + for (const s of result.subagentSessions.slice(0, 10)) { + const agentShort = s.sessionId.slice(0, 22); + const modelShort = s.model.slice(0, 12); + const density = + s.turns > 0 + ? `${Math.round((s.turnsWithThinking / s.turns) * 100)}%` + : "0%"; + const avgPerTurn = + s.turnsWithThinking > 0 + ? formatNumber(Math.round(s.thinkingChars / s.turnsWithThinking)) + : "—"; + lines.push( + `${agentShort.padEnd(24)} │ ${chalk.yellow(modelShort.padEnd(12))} │ ${String(s.turns).padStart(5)} │ ${String(s.turnsWithThinking).padStart(6)} │ ${density.padStart(7)} │ ${formatNumber(s.outputTokens).padStart(8)} │ ${formatNumber(s.thinkingChars).padStart(8)} │ ${avgPerTurn.padStart(8)}`, + ); + } + + if (result.subagentSessions.length > 10) { + lines.push( + chalk.dim( + ` ... and ${result.subagentSessions.length - 10} more subagent sessions`, + ), + ); + } + lines.push(chalk.dim("─".repeat(95))); + lines.push(""); + } + + // Summary + lines.push( + chalk.dim( + `Total: ${result.totalSessions} sessions, ${result.totalTurns} turns, ${formatNumber(result.totalOutputTokens)} output tokens`, + ), + ); + + return lines.join("\n"); +} + +function formatTokensJson(result: TokenAnalysisResult): string { + return JSON.stringify(result, null, 2); +} + +export function registerTokensCommand(parent: Command): void { + parent + .command("tokens") + .description("Analyze thinking token usage across sessions") + .option("--project ", "Filter by project directory") + .option("--since