codeflash-ai · KRRT7 · Apr 23, 2026 · Apr 23, 2026 · Apr 23, 2026
diff --git a/.claude/hooks/bash-guard.sh b/.claude/hooks/bash-guard.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+# PreToolUse hook: Block Bash calls that should use dedicated tools.
+# Exit 0 = allow, Exit 2 = block (message on stderr).
+
+INPUT=$(cat 2>/dev/null || true)
+COMMAND=$(echo "$INPUT" | jq -r '.tool_input.command // empty' 2>/dev/null || true)
+
+[ -z "$COMMAND" ] && exit 0
+
+# Strip leading env vars (FOO=bar cmd ...) and whitespace to get the actual command
+STRIPPED=$(echo "$COMMAND" | sed 's/^[[:space:]]*\([A-Za-z_][A-Za-z0-9_]*=[^[:space:]]*[[:space:]]*\)*//')
+FIRST_CMD=$(echo "$STRIPPED" | awk '{print $1}')
+
+case "$FIRST_CMD" in
+    grep|egrep|fgrep|rg)
+        echo "BLOCKED: Use the Grep tool instead of \`$FIRST_CMD\`. It provides better output and permissions handling." >&2
+        exit 2
+        ;;
+    find)
+        echo "BLOCKED: Use the Glob tool instead of \`find\`. Glob is faster and returns results sorted by modification time." >&2
+        exit 2
+        ;;
+    cat|head|tail)
+        echo "BLOCKED: Use the Read tool instead of \`$FIRST_CMD\`. Read provides line numbers and supports images/PDFs." >&2
+        exit 2
+        ;;
+    sed)
+        if echo "$COMMAND" | grep -qE '(^|[[:space:]])sed[[:space:]]+-i'; then
+            echo "BLOCKED: Use the Edit tool instead of \`sed -i\`. Edit tracks changes properly." >&2
+            exit 2
+        fi
+        ;;
+esac
+
+# echo with file redirection (echo "..." > file)
+if echo "$STRIPPED" | grep -qE '^echo\b.*[[:space:]]>'; then
+    echo "BLOCKED: Use the Write tool instead of \`echo >\`. Write provides proper file creation." >&2
+    exit 2
+fi
+
+exit 0
diff --git a/.claude/hooks/post-compact.sh b/.claude/hooks/post-compact.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+# PreCompact hook: Inject state preservation guidance before context compaction.
+
+cd "$CLAUDE_PROJECT_DIR" 2>/dev/null || exit 0
+
+STATE=""
+
+BRANCH=$(git branch --show-current 2>/dev/null)
+[ -n "$BRANCH" ] && STATE="${STATE}Branch: ${BRANCH}\n"
+
+DIRTY=$(git status --porcelain 2>/dev/null)
+if [ -n "$DIRTY" ]; then
+    COUNT=$(echo "$DIRTY" | wc -l | tr -d ' ')
+    STATE="${STATE}Uncommitted files (${COUNT}):\n${DIRTY}\n"
+fi
+
+UPSTREAM=$(git rev-parse --abbrev-ref '@{upstream}' 2>/dev/null)
+if [ -n "$UPSTREAM" ]; then
+    AHEAD=$(git rev-list --count "${UPSTREAM}..HEAD" 2>/dev/null)
+    [ "$AHEAD" -gt 0 ] 2>/dev/null && STATE="${STATE}Unpushed commits: ${AHEAD}\n"
+fi
+
+RECENT=$(git log --oneline -5 2>/dev/null)
+[ -n "$RECENT" ] && STATE="${STATE}Recent commits:\n${RECENT}\n"
+
+LATEST_HANDOFF=$(ls -t "$CLAUDE_PROJECT_DIR/.claude/handoffs/"*.md 2>/dev/null | head -1)
+if [ -n "$LATEST_HANDOFF" ] && [ -f "$LATEST_HANDOFF" ]; then
+    HANDOFF_CONTENT=$(head -40 "$LATEST_HANDOFF" 2>/dev/null)
+    [ -n "$HANDOFF_CONTENT" ] && STATE="${STATE}\nHandoff context:\n${HANDOFF_CONTENT}\n"
+fi
+
+STATE="${STATE}\nProject conventions to preserve:\n"
+STATE="${STATE}- Python 3.9+, uv for all tooling, ruff + mypy via prek\n"
+STATE="${STATE}- Verification: uv run prek (single command for lint/format/types)\n"
+STATE="${STATE}- Pre-push: uv run prek run --from-ref origin/<base>\n"
+STATE="${STATE}- Conventional commits: fix:, feat:, refactor:, test:, chore:\n"
+STATE="${STATE}- Result type: Success(value) / Failure(error), check with is_successful()\n"
+STATE="${STATE}- Language singleton: set_current_language() / current_language()\n"
+STATE="${STATE}- libcst for code transforms, ast for read-only analysis\n"
+
+[ -z "$STATE" ] && exit 0
+
+cat <<EOF
+{
+  "systemMessage": "PRESERVE the following session state through compaction:\n$(echo -e "$STATE" | sed 's/"/\\"/g' | sed ':a;N;$!ba;s/\n/\\n/g')"
+}
+EOF
+
+exit 0
diff --git a/.claude/hooks/post-edit-lint.sh b/.claude/hooks/post-edit-lint.sh
@@ -1,5 +1,4 @@
 #!/usr/bin/env bash
-# Everyone is on macOS so this should be fine, we don't account for Windows
 set -euo pipefail
 
 input=$(cat)
@@ -10,6 +9,5 @@ if [[ -z "$file_path" || ! -f "$file_path" ]]; then
 fi
 
 if [[ "$file_path" == *.py ]]; then
-    # First run auto-fixes formatting; second run catches real lint errors
     uv run prek --files "$file_path" 2>/dev/null || uv run prek --files "$file_path"
 fi
diff --git a/.claude/hooks/require-read.sh b/.claude/hooks/require-read.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+# PreToolUse hook: Block Write/Edit on existing files that haven't been Read first.
+# Exit 0 = allow, Exit 2 = block (message on stderr).
+
+INPUT=$(cat 2>/dev/null || true)
+FILE_PATH=$(echo "$INPUT" | jq -r '.tool_input.file_path // empty' 2>/dev/null || true)
+
+[ -z "$FILE_PATH" ] && exit 0
+
+# New files don't need prior reads
+[ ! -f "$FILE_PATH" ] && exit 0
+
+TRACKER="$CLAUDE_PROJECT_DIR/.claude/.read-tracker"
+
+if [ ! -f "$TRACKER" ]; then
+    echo "BLOCKED: Read \`$(basename "$FILE_PATH")\` first before modifying it." >&2
+    exit 2
+fi
+
+if grep -qxF "$FILE_PATH" "$TRACKER"; then
+    exit 0
+fi
+
+echo "BLOCKED: Read \`$(basename "$FILE_PATH")\` first before modifying it." >&2
+exit 2
diff --git a/.claude/hooks/status-line.sh b/.claude/hooks/status-line.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+# Status line: derive context from git state.
+
+input=$(cat)
+project_dir=$(echo "$input" | jq -r '.workspace.project_dir')
+
+user=$(whoami)
+branch=$(git -C "$project_dir" branch --show-current 2>/dev/null)
+
+changed=$(git -C "$project_dir" diff --name-only HEAD 2>/dev/null)
+[ -z "$changed" ] && changed=$(git -C "$project_dir" diff --name-only 2>/dev/null)
+[ -z "$changed" ] && changed=$(git -C "$project_dir" diff --name-only --cached 2>/dev/null)
+
+if [ -n "$changed" ]; then
+    area=$(echo "$changed" | sed 's|/.*||' | sort | uniq -c | sort -rn | head -1 | awk '{print $2}')
+else
+    area=""
+fi
+
+context=""
+case "$area" in
+    codeflash)
+        subsystem=$(echo "$changed" | grep '^codeflash/' | sed 's|^codeflash/||; s|/.*||' | sort | uniq -c | sort -rn | head -1 | awk '{print $2}')
+        [ -n "$subsystem" ] && context="editing $subsystem" ;;
+    tests)
+        target=$(echo "$changed" | grep '^tests/' | sed 's|^tests/||; s|/.*||' | sort -u | head -1)
+        [ -n "$target" ] && context="testing $target" ;;
+    .claude)
+        context="configuring claude" ;;
+esac
+
+if [ -z "$context" ] && [ -n "$branch" ]; then
+    case "$branch" in
+        feat/*|cf-*) context="building: ${branch#feat/}" ;;
+        fix/*)       context="fixing: ${branch#fix/}" ;;
+        refactor/*)  context="refactoring: ${branch#refactor/}" ;;
+        test/*)      context="testing: ${branch#test/}" ;;
+        chore/*)     context="chore: ${branch#chore/}" ;;
+    esac
+fi
+
+dirty=""
+if [ -n "$(git -C "$project_dir" status --porcelain 2>/dev/null)" ]; then
+    dirty=" *"
+fi
+
+status="$user | codeflash"
+[ -n "$context" ] && status="$status | $context"
+[ -n "$branch" ] && status="$status | $branch$dirty"
+echo "$status"
diff --git a/.claude/hooks/track-read.sh b/.claude/hooks/track-read.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+# PostToolUse hook: Track Read calls for the require-read guard.
+
+INPUT=$(cat 2>/dev/null || true)
+FILE_PATH=$(echo "$INPUT" | jq -r '.tool_input.file_path // empty' 2>/dev/null || true)
+
+[ -z "$FILE_PATH" ] && exit 0
+
+echo "$FILE_PATH" >> "$CLAUDE_PROJECT_DIR/.claude/.read-tracker"
+exit 0
diff --git a/.claude/rules/code-style.md b/.claude/rules/code-style.md
@@ -4,10 +4,11 @@
 - **Python**: 3.9+ syntax
 - **Package management**: Always use `uv`, never `pip`
 - **Tooling**: Ruff for linting/formatting, mypy strict mode, prek for pre-commit checks
-- **Comments**: Minimal - only explain "why", not "what"
-- **Docstrings**: Do not add docstrings to new or changed code unless the user explicitly asks for them — not even one-liners. The codebase intentionally keeps functions self-documenting through clear naming and type annotations
-- **Types**: Match the type annotation style of surrounding code — the codebase uses annotations, so add them in new code
-- **Naming**: NEVER use leading underscores (`_function_name`) - Python has no true private functions, use public names
+- **Comments**: Minimal — only explain "why", not "what"
+- **Docstrings**: Do not add docstrings unless the user explicitly asks
+- **Types**: Match the type annotation style of surrounding code
+- **Naming**: No leading underscores (`_function_name`) — Python has no true private functions
 - **Paths**: Always use absolute paths
-- **Encoding**: Always pass `encoding="utf-8"` to `open()`, `read_text()`, `write_text()`, etc. in new or changed code — Windows defaults to `cp1252` which breaks on non-ASCII content. Don't flag pre-existing code that lacks it unless you're already modifying that line.
-- **Verification**: Use `uv run prek` to verify code — it handles ruff, ty, mypy in one pass. Don't run `ruff`, `mypy`, or `python -c "import ..."` separately; `prek` is the single verification command
+- **Encoding**: Always pass `encoding="utf-8"` to `open()`, `read_text()`, `write_text()` in new or changed code
+- **Verification**: Use `uv run prek` — it handles ruff, ty, mypy in one pass. Don't run them separately
+- **Code transforms**: Use `libcst` for code modification/transformation. `ast` is acceptable for read-only analysis
diff --git a/.claude/rules/debugging.md b/.claude/rules/debugging.md
@@ -0,0 +1,19 @@
+# Debugging
+
+## Root cause first
+
+When encountering a bug, investigate the root cause. Don't patch symptoms. If you're about to add a try/except, a fallback default, or a defensive check — ask whether the real fix is upstream.
+
+## Isolated testing
+
+Prefer running individual test functions over full suites. Only run the full suite when explicitly asked or before pushing.
+
+- Single function: `uv run pytest tests/test_foo.py::TestBar::test_baz -v`
+- Single module: `uv run pytest tests/test_foo.py -v`
+- Full suite: only when asked, or before `git push`
+
+When debugging a specific endpoint or integration, test it directly instead of running the entire pipeline end-to-end.
+
+## Subprocess failures
+
+When a subprocess fails, always log stdout and stderr. "Exit code 1" with no output is useless.
diff --git a/.claude/rules/git.md b/.claude/rules/git.md
@@ -1,19 +1,35 @@
-# Git Commits & Pull Requests
+# Git
 
 ## Commits
+
 - Never commit, amend, or push without explicit permission
-- Don't commit intermediate states — wait until the full implementation is complete, reviewed, and explicitly approved before committing. If the user corrects direction mid-implementation, incorporate the correction before any commit
-- Always create a new branch from `main` before starting any new work — never commit directly to `main` or reuse an existing feature branch for unrelated changes
-- Use conventional commit format: `fix:`, `feat:`, `refactor:`, `docs:`, `test:`, `chore:`
-- Keep commits atomic - one logical change per commit
-- Commit message body should be concise (1-2 sentences max)
-- Merge for simple syncs, rebase when branches have diverged significantly
-- When committing to an external/third-party repo, follow that repo's own conventions for versioning, changelog, and CI
-- Pre-commit: Run `uv run prek` before committing — fix any issues before creating the commit
-- Pre-push: Run `uv run prek run --from-ref origin/<base>` to check all changed files against the PR base — this matches CI behavior and catches issues that per-commit prek misses. To detect the base branch: `gh pr view --json baseRefName -q .baseRefName 2>/dev/null || echo main`
+- Don't commit intermediate states — wait until the full implementation is complete and approved
+- Always create a new branch from `main` — never commit directly to `main`
+- Conventional format: `fix:`, `feat:`, `refactor:`, `docs:`, `test:`, `chore:`
+- First line: imperative verb + what changed, under 72 characters
+- Body for *why*, not *what* — the diff shows what changed
+- One purpose per commit: a bug fix, a new function, a refactor — not all three
+- A commit that adds a function also adds its tests and exports — that's one logical change
+
+## Sizing
+
+- Too small: renaming a variable in one commit, updating its references in another
+- Right size: adding a function with its tests, `__init__` export, and usage update
+- Too large: implementing an entire subsystem in one commit
+
+## Pre-commit / Pre-push
+
+- Pre-commit: Run `uv run prek` before committing
+- Pre-push: Run `uv run prek run --from-ref origin/<base>` to check all changed files against the PR base
 
 ## Pull Requests
-- PR titles should use conventional format
-- Keep the PR body short and straight to the point
+
+- PR titles use conventional format
+- Keep the PR body short and to the point
 - If related to a Linear issue, include `CF-#` in the body
-- Branch naming: `cf-#-title` (lowercase, hyphenated), no other prefixes/suffixes
+- Branch naming: `cf-#-title` (lowercase, hyphenated)
+
+## Branch Hygiene
+
+- Delete feature branches locally after merging (`git branch -d <branch>`)
+- Use `/clean_gone` to prune local branches whose remote tracking branch has been deleted
diff --git a/.claude/rules/github.md b/.claude/rules/github.md
@@ -0,0 +1,5 @@
+# GitHub Interactions
+
+ALWAYS use MCP GitHub tools (`mcp__github__*`) for GitHub operations. Check for a matching MCP tool first — only fall back to `gh` via Bash when no MCP tool exists for the operation.
+
+This also applies to other MCP-connected services (Linear, Granola). MCP first, CLI second.
diff --git a/.claude/rules/language-patterns.md b/.claude/rules/language-patterns.md
@@ -6,8 +6,8 @@ paths:
 # Language Support Patterns
 
 - Current language is a module-level singleton in `languages/current.py` — use `set_current_language()` / `current_language()`, never pass language as a parameter through call chains
-- Use `get_language_support(identifier)` from `languages/registry.py` to get a `LanguageSupport` instance — never import language classes directly
-- New language support classes must use the `@register_language` decorator to register with the extension and language registries
-- `languages/__init__.py` uses `__getattr__` for lazy imports to avoid circular dependencies — follow this pattern when adding new exports
-- Prefer `LanguageSupport` protocol dispatch over `is_python()`/`is_javascript()` guards — remaining guards are being migrated to protocol methods
+- Use `get_language_support(identifier)` from `languages/registry.py` — never import language classes directly
+- New language support classes must use the `@register_language` decorator
+- `languages/__init__.py` uses `__getattr__` for lazy imports to avoid circular dependencies
+- Prefer `LanguageSupport` protocol dispatch over `is_python()`/`is_javascript()` guards
 - `is_javascript()` returns `True` for both JavaScript and TypeScript (still used in ~15 call sites pending migration)
diff --git a/.claude/rules/sessions.md b/.claude/rules/sessions.md
@@ -0,0 +1,27 @@
+# Session Discipline
+
+## Scope
+
+One task per session. Don't mix implementation with communication drafting, transcript search, or strategic planning.
+
+## Duration
+
+Cap sessions at 2-3 hours. Use `/handoff` at natural breakpoints rather than letting auto-compaction degrade context.
+
+- After 1 compaction: consider wrapping up the current task and handing off
+- After 3 compactions: stop, and tell the user to start a fresh session
+- Never continue past 5 compactions — context is too degraded
+
+## Context preservation
+
+When compacting, preserve: modified files list, current branch, test commands used, key decisions made. Use subagents for exploration to keep main context clean.
+
+## No polling
+
+Never poll background tasks. No `wc -l`, no `tail -f`, no `sleep` loops. Use `run_in_background` and wait for the completion notification.
+
+## File read budget
+
+If you've read the same file 3+ times in a session, either:
+- The session is too long and compaction destroyed your context — write a handoff
+- You're not retaining key information — write it down in your response before it compacts away
diff --git a/.claude/rules/source-code.md b/.claude/rules/source-code.md
diff --git a/.claude/rules/testing.md b/.claude/rules/testing.md
@@ -4,13 +4,14 @@ paths:
   - "codeflash/**/*test*.py"
 ---
 
-# Testing Conventions
+# Testing
 
-- Code context extraction and replacement tests must always assert for full string equality, no substring matching.
-- Use pytest's `tmp_path` fixture for temp directories — do not use `tempfile.mkdtemp()`, `tempfile.TemporaryDirectory()`, or `NamedTemporaryFile`. Some existing tests still use `tempfile` but new tests must use `tmp_path`.
-- Always call `.resolve()` on Path objects before passing them to functions under test — this ensures absolute paths and resolves symlinks. Example: `source_file = (tmp_path / "example.py").resolve()`
-- Use `.as_posix()` when converting resolved paths to strings (normalizes to forward slashes).
-- Any new feature or bug fix that can be tested automatically must have test cases.
-- If changes affect existing test expectations, update the tests accordingly. Tests must always pass after changes.
-- The pytest plugin patches `time`, `random`, `uuid`, and `datetime` for deterministic test execution — never assume real randomness or real time in verification tests.
-- `conftest.py` uses an autouse fixture that calls `reset_current_language()` — tests always start with Python as the default language.
+- Full string equality for context extraction/replacement tests — no substring matching
+- Use pytest's `tmp_path` fixture — not `tempfile.mkdtemp()` or `NamedTemporaryFile`
+- Always call `.resolve()` on Path objects before passing to functions under test
+- Use `.as_posix()` when converting resolved paths to strings
+- New features and bug fixes must have test cases
+- The pytest plugin patches `time`, `random`, `uuid`, `datetime` for deterministic execution
+- `conftest.py` autouse fixture calls `reset_current_language()` — tests start with Python as default
+- Prefer running individual tests over full suites: `uv run pytest tests/test_foo.py::TestBar::test_baz -v`
+- Only run the full suite when explicitly asked or before pushing
diff --git a/.claude/rules/workflow.md b/.claude/rules/workflow.md
@@ -1,13 +1,17 @@
 # Workflow
 
 ## Code Changes
-- Before making any changes, outline your approach in 3-5 numbered steps. Include which repo/branch you'll work in, what commands you'll run, and what success looks like. Wait for approval before starting
+
+Before making any changes, outline your approach in 3-5 numbered steps. Include which branch you'll work on, what commands you'll run, and what success looks like. Wait for approval before starting.
 
 ## Response Style
-- When listing items (PRs, functions, optimization targets), always provide the complete list ordered by priority on the first attempt. Do not give partial lists
+
+When listing items (PRs, functions, optimization targets), provide the complete list ordered by priority on the first attempt. No partial lists.
 
 ## Commands
-- When running long-running commands (benchmarks, profiling, optimizers like codeflash), always run them in the foreground. Do not use background processes
+
+Long-running commands (benchmarks, profiling, optimizers) always run in the foreground. Do not use background processes.
 
 ## Debugging
-- When claiming something is a pre-existing issue (e.g., test failures on main), verify by checking out main and running the tests before making that claim
+
+When claiming something is a pre-existing issue (e.g., test failures on main), verify by checking out main and running the tests before making that claim.