diff --git a/.github/scripts/agent-docs-audit.mjs b/.github/scripts/agent-docs-audit.mjs new file mode 100755 index 0000000000..647f060d6d --- /dev/null +++ b/.github/scripts/agent-docs-audit.mjs @@ -0,0 +1,241 @@ +#!/usr/bin/env node +/** + * Agent-docs semantic audit. + * + * Three layers, modeled on risk-assess.mjs: + * L1: deterministic scan (sizes, paths, symlinks, broken refs) - free + * L2: Haiku triage per doc - needs review? - ~$0.01/doc + * L3: Sonnet deep analysis on flagged docs - structured findings - ~$0.10/doc + * + * Usage: + * node agent-docs-audit.mjs # audit all flagged docs (L1+L2+L3) + * node agent-docs-audit.mjs --only # audit one specific doc + * node agent-docs-audit.mjs --skip-ai # L1 only, no API calls + * node agent-docs-audit.mjs --dry-run # all layers stubbed + * + * Env: + * ANTHROPIC_API_KEY required for L2/L3; if missing the script auto-falls back to L1-only + * REPO_ROOT target repo path (default: cwd) + * POLICY_FILE path to agent-docs-policy.md (default: /agent-docs-policy.md) + * + * Output: Markdown report to stdout, structured JSON to /tmp/agent-docs-audit.json, + * L1 markdown to /tmp/agent-docs-audit-l1.md. + */ + +import { existsSync, readFileSync, writeFileSync } from 'node:fs'; +import { join, resolve } from 'node:path'; +import { runL1Scan, flaggedForReview, renderL1Markdown } from './agent-docs-l1.mjs'; + +delete process.env.CLAUDECODE; + +const REPO_ROOT = resolve(process.env.REPO_ROOT ?? process.cwd()); +const POLICY_FILE_DEFAULT = join(REPO_ROOT, 'agent-docs-policy.md'); +const POLICY_FILE = process.env.POLICY_FILE ?? POLICY_FILE_DEFAULT; + +const args = process.argv.slice(2); +const DRY_RUN = args.includes('--dry-run'); +const SKIP_AI = args.includes('--skip-ai') || (!DRY_RUN && !process.env.ANTHROPIC_API_KEY); +const ONLY = args.includes('--only') ? args[args.indexOf('--only') + 1] : null; + +// ── Layer 2: Haiku triage ── + +async function haikuTriage(doc, policyText) { + if (DRY_RUN) return { decision: 'review', reason: 'dry-run forces review', cost: 0 }; + const Anthropic = (await import('@anthropic-ai/sdk')).default; + const client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY }); + const docContent = readFileSync(join(REPO_ROOT, doc.path), 'utf-8'); + const prompt = `You are triaging an agent-context doc for review. + +Doc path: ${doc.path} +Doc size: ${doc.lines} lines +Deterministic flags: ${doc.reasons.join('; ')} + +Doc content: +\`\`\`markdown +${docContent} +\`\`\` + +Policy excerpt: +${policyText.slice(0, 2000)} + +Decide whether this doc needs deep review against the policy. Respond using the triage tool. Bias toward "review" only when the deterministic flags suggest real issues (broken refs likely stale; large size likely contains content that could be trimmed/moved).`; + + const result = await client.messages.create({ + model: 'claude-haiku-4-5-20251001', + max_tokens: 256, + tools: [{ + name: 'triage', + description: 'Decide whether the doc needs deep review.', + input_schema: { + type: 'object', + properties: { + decision: { type: 'string', enum: ['review', 'skip'] }, + reason: { type: 'string', description: 'One sentence' }, + }, + required: ['decision', 'reason'], + }, + }], + tool_choice: { type: 'tool', name: 'triage' }, + messages: [{ role: 'user', content: prompt }], + }); + const toolUse = result.content.find((b) => b.type === 'tool_use'); + const cost = (result.usage.input_tokens * 0.0000008) + (result.usage.output_tokens * 0.000004); + return { ...toolUse.input, cost }; +} + +// ── Layer 3: Sonnet deep analysis ── + +async function sonnetDeep(doc, policyText) { + if (DRY_RUN) { + return { findings: [{ label: 'INVESTIGATE', section: '(dry-run)', claim: '', reason: 'no API call', evidence: '', suggested_action: 'rerun without --dry-run' }], cost: 0, durationMs: 0, toolCalls: [] }; + } + const { query } = await import('@anthropic-ai/claude-agent-sdk'); + const docContent = readFileSync(join(REPO_ROOT, doc.path), 'utf-8'); + + const prompt = `You are auditing an agent-context doc against a policy. Identify sections that are stale, incorrect, redundant, or misplaced. + +Doc path: ${doc.path} +Doc size: ${doc.lines} lines +Deterministic flags: ${doc.reasons.join('; ')} +Deterministic broken refs (if any): ${doc.brokenRefs.join(', ') || 'none'} + +Doc content: +\`\`\`markdown +${docContent} +\`\`\` + +Policy: +${policyText} + +Scope your investigation tightly: +1. Verify each deterministic broken-ref flag (real miss or just shorthand?). Emit UPDATE or INVESTIGATE. +2. Look at the 2 largest H2 sections. Decide if either is a MOVE candidate per the policy (duplicates content elsewhere, package-specific, etc.). Cite the destination doc. +3. Sample 2-3 specific concrete claims (a command, path, function name). Verify them. + +Do not pad the report. Most sections will produce no finding - that is correct. Prefer "drop the hardcoded value" over "update to the current value" when the value is likely to drift. + +Limit yourself to 8 tool calls total. Use Grep for identifiers, Read for short files, Glob for existence, Bash sparingly (git log, complex rg). + +End your response with this JSON. No markdown fences: + +{"findings":[{"label":"KEEP|TRIM|MOVE|UPDATE|INVESTIGATE","section":"H2 or H3 header text","claim":"verbatim excerpt or paraphrase","reason":"why this label","evidence":"what you checked and what you found","suggested_action":"concrete next step"}]} + +Only emit KEEP when explaining why a flagged section should remain. The default for verified content is silence.`; + + let resultText = ''; + let cost = 0; + let durationMs = 0; + const toolCalls = []; + + for await (const msg of query({ + prompt, + options: { + // allowedTools is not a strict allowlist under bypassPermissions; Bash + // gets through. Listing what we expect to use; relying on the prompt + // budget ("6-8 tool calls") and maxTurns to constrain cost. + allowedTools: ['Read', 'Glob', 'Grep', 'Bash'], + disallowedTools: ['Edit', 'Write', 'Task', 'WebFetch', 'WebSearch', 'mcp__*'], + permissionMode: 'bypassPermissions', + maxTurns: 15, + cwd: REPO_ROOT, + }, + })) { + if (msg.type === 'assistant' && msg.message?.content) { + for (const block of msg.message.content) { + if (block.type === 'text') resultText = block.text; + if (block.type === 'tool_use') { + toolCalls.push(`${block.name}: ${JSON.stringify(block.input).slice(0, 100)}`); + } + } + } + if (msg.type === 'result') { + cost = msg.total_cost_usd ?? 0; + durationMs = msg.duration_api_ms ?? msg.duration_ms ?? 0; + } + } + + const jsonMatch = resultText.match(/\{[\s\S]*"findings"[\s\S]*\}/); + if (!jsonMatch) throw new Error(`Sonnet did not produce findings JSON. Tail: ${resultText.slice(-300)}`); + const parsed = JSON.parse(jsonMatch[0]); + return { ...parsed, cost, durationMs, toolCalls }; +} + +// ── Output ── + +function renderMarkdown(report) { + const lines = [`# Agent docs audit\n`, `Target: \`${REPO_ROOT}\`\n`]; + if (DRY_RUN) lines.push('**DRY RUN** - no API calls were made.\n'); + lines.push(`Total cost: $${report.totalCost.toFixed(4)} (${report.docs.length} docs reviewed)\n`); + for (const d of report.docs) { + lines.push(`## \`${d.path}\` (${d.lines} lines)\n`); + lines.push(`Reasons flagged: ${d.reasons.join('; ')}`); + lines.push(`Triage: ${d.triage?.decision ?? 'n/a'} - ${d.triage?.reason ?? ''}`); + if (!d.deep) { lines.push(''); continue; } + if (d.deep.findings.length === 0) { + lines.push('No findings.\n'); + continue; + } + lines.push('| Label | Section | Reason | Suggested action |'); + lines.push('|---|---|---|---|'); + for (const f of d.deep.findings) { + lines.push(`| ${f.label} | ${f.section} | ${f.reason} | ${f.suggested_action} |`); + } + lines.push(''); + } + return lines.join('\n'); +} + +// ── Main ── + +async function main() { + // SKIP_AI is set automatically if ANTHROPIC_API_KEY is missing, or via --skip-ai. + // In that case the script runs L1 only and the workflow uploads an L1-only report. + + const policyText = existsSync(POLICY_FILE) + ? readFileSync(POLICY_FILE, 'utf-8') + : '# Default policy\n\n(No policy file found; using built-in defaults: root <= 120 lines, nested <= 200, label findings KEEP/TRIM/MOVE/UPDATE/INVESTIGATE.)'; + + console.error('[L1] running deterministic scan...'); + const scan = runL1Scan(REPO_ROOT); + writeFileSync('/tmp/agent-docs-audit-l1.md', renderL1Markdown(scan)); + let flagged = flaggedForReview(scan).map((f) => ({ + path: f.relPath, + lines: f.lineCount, + kind: f.isSymlink ? 'symlink' : 'file', + brokenRefs: f.brokenPathRefs, + reasons: f.reasons, + })); + if (ONLY) flagged = flagged.filter((d) => d.path === ONLY); + console.error(`[L1] ${scan.files.length} doc(s) inventoried, ${flagged.length} flagged for review${ONLY ? ` (filtered to --only ${ONLY})` : ''}`); + + if (SKIP_AI) { + console.error('[L2/L3] skipped (no ANTHROPIC_API_KEY or --skip-ai). Writing L1 report only.'); + const stub = { docs: flagged.map((d) => ({ ...d, triage: null, deep: null })), totalCost: 0, l1Only: true }; + writeFileSync('/tmp/agent-docs-audit.json', JSON.stringify(stub, null, 2)); + console.log(renderL1Markdown(scan)); + return; + } + + const report = { docs: [], totalCost: 0 }; + for (const doc of flagged) { + console.error(`[L2] triage: ${doc.path}`); + const triage = await haikuTriage(doc, policyText); + report.totalCost += triage.cost; + const entry = { ...doc, triage, deep: null }; + if (triage.decision === 'review') { + console.error(`[L3] deep analysis: ${doc.path}`); + const deep = await sonnetDeep(doc, policyText); + report.totalCost += deep.cost; + entry.deep = deep; + } + report.docs.push(entry); + } + + writeFileSync('/tmp/agent-docs-audit.json', JSON.stringify(report, null, 2)); + console.log(renderMarkdown(report)); +} + +main().catch((err) => { + console.error(`audit failed: ${err.stack || err.message}`); + process.exit(1); +}); diff --git a/.github/scripts/agent-docs-l1.mjs b/.github/scripts/agent-docs-l1.mjs new file mode 100755 index 0000000000..deb61c2206 --- /dev/null +++ b/.github/scripts/agent-docs-l1.mjs @@ -0,0 +1,426 @@ +#!/usr/bin/env node +/** + * Layer 1: deterministic agent-docs scan. No model calls, no network. + * + * Walks the repo (respecting ignore patterns), inventories every agent doc, + * classifies AGENTS.md/CLAUDE.md pairs, detects broken @imports and broken + * path refs (with context-aware resolution), surfaces over-budget files. + * + * Exported entry: runL1Scan(repoRoot) -> { files, pairs, config } + * CLI mode: prints a Markdown report to stdout. Usage: + * node agent-docs-l1.mjs [--target ] + */ + +import { existsSync, lstatSync, readdirSync, readFileSync, readlinkSync, statSync } from 'node:fs'; +import { dirname, isAbsolute, join, relative, resolve } from 'node:path'; + +// ── Config ───────────────────────────────────────────────────────────────── +// Inline for SuperDoc. If we extend this script to other repos, lift to YAML. + +export const CONFIG = { + budgets: { root: 120, nestedWarn: 200 }, + ignore: [ + 'node_modules', + '.git', + '.tmp', + 'dist', + 'devtools/visual-testing/node_modules', + 'tests/consumer-typecheck/node_modules', + ], + intentionalDifferentPairs: ['packages/superdoc/AGENTS.md:packages/superdoc/CLAUDE.md'], + knownCommands: [ + 'pnpm test', + 'pnpm test:behavior', + 'pnpm test:layout', + 'pnpm test:visual', + 'pnpm dev', + 'pnpm build', + 'pnpm corpus:upload', + 'pnpm corpus:pull', + 'pnpm layout:compare', + ], + docBasenames: new Set(['AGENTS.md', 'CLAUDE.md', 'CLAUDE.local.md']), + rulesDir: '.claude/rules', +}; + +// ── Walk ─────────────────────────────────────────────────────────────────── + +function shouldIgnore(relPath) { + for (const ig of CONFIG.ignore) { + if (relPath === ig || relPath.startsWith(`${ig}/`)) return true; + if (relPath.includes(`/${ig}/`) || relPath.endsWith(`/${ig}`)) return true; + } + return false; +} + +function findAgentDocs(repoRoot) { + const found = []; + function walk(absDir, relDir) { + let entries; + try { + entries = readdirSync(absDir, { withFileTypes: true }); + } catch { + return; + } + for (const entry of entries) { + const relPath = relDir ? `${relDir}/${entry.name}` : entry.name; + if (shouldIgnore(relPath)) continue; + const abs = join(absDir, entry.name); + if (entry.isSymbolicLink()) { + // A symlinked AGENTS.md or CLAUDE.md still counts. + if (CONFIG.docBasenames.has(entry.name)) found.push(relPath); + continue; + } + if (entry.isDirectory()) { + walk(abs, relPath); + continue; + } + if (entry.isFile()) { + if (CONFIG.docBasenames.has(entry.name)) found.push(relPath); + else if (relDir.endsWith(CONFIG.rulesDir) && entry.name.endsWith('.md')) found.push(relPath); + } + } + } + walk(repoRoot, ''); + return found.sort(); +} + +// ── Per-file inspection ──────────────────────────────────────────────────── + +function inspectFile(repoRoot, relPath) { + const abs = join(repoRoot, relPath); + const lst = lstatSync(abs); + const isSymlink = lst.isSymbolicLink(); + let symlinkTarget = null; + let brokenSymlinkTarget = null; + if (isSymlink) { + const raw = readlinkSync(abs); + symlinkTarget = isAbsolute(raw) ? raw : resolve(dirname(abs), raw); + if (!existsSync(symlinkTarget)) brokenSymlinkTarget = symlinkTarget; + } + + // If the symlink target is missing, readFileSync(abs) would follow the link + // and throw ENOENT, killing the audit before any artifacts upload. Catch + // and surface as a finding instead. + if (brokenSymlinkTarget) { + return { + relPath, + absPath: abs, + isSymlink: true, + symlinkTarget, + brokenSymlinkTarget, + lineCount: 0, + brokenPathRefs: [], + brokenImports: [], + unresolvedCommands: [], + sections: [], + }; + } + + const readPath = isSymlink && symlinkTarget ? symlinkTarget : abs; + let content = ''; + try { + content = readFileSync(readPath, 'utf-8'); + } catch (err) { + return { + relPath, + absPath: abs, + isSymlink, + symlinkTarget, + brokenSymlinkTarget: readPath, + readError: err.message, + lineCount: 0, + brokenPathRefs: [], + brokenImports: [], + unresolvedCommands: [], + sections: [], + }; + } + const newlines = (content.match(/\n/g) ?? []).length; + const lineCount = content.endsWith('\n') ? newlines : newlines + 1; + + return { + relPath, + absPath: abs, + isSymlink, + symlinkTarget, + lineCount, + brokenPathRefs: findBrokenPathRefs(content, repoRoot, relPath), + brokenImports: findBrokenImports(content, dirname(abs)), + unresolvedCommands: findUnresolvedCommands(content, repoRoot), + sections: extractSections(content), + }; +} + +function stripCodeBlocks(content) { + return content.replace(/```[\s\S]*?```/g, ''); +} + +function findPackageRoot(docRelPath) { + const parts = docRelPath.split('/'); + if (parts.length < 2) return null; + if (['packages', 'apps', 'shared'].includes(parts[0])) return `${parts[0]}/${parts[1]}`; + if (parts[0] === 'tests') return parts.slice(0, 2).join('/'); + return null; +} + +function looksLikeFilesystemPath(s) { + if (!s || s.length < 2 || s.length > 160) return false; + if (!s.includes('/')) return false; + if (/\s/.test(s)) return false; + if (/^https?:\/\//.test(s)) return false; + if (/[*?{}[\]<>]/.test(s)) return false; + if (s.startsWith('@') || s.startsWith('~/')) return false; + if (/^[a-z]+:\/\//i.test(s)) return false; + if (/^cdn\./.test(s) || /^unpkg\./.test(s)) return false; + if (/^\/[a-z]/.test(s) && /(icon|image|img|asset|file|path)/i.test(s)) return false; + if (/^(cd|pnpm|npm|yarn|bun|node|git|ls|cat|grep|find|sed|awk|mkdir|rm|mv|cp|echo|export|source|sudo|brew|docker|curl|wget)\b/.test(s)) return false; + return /[a-zA-Z0-9_.-]\/[a-zA-Z0-9_.-]/.test(s); +} + +function resolveInContext(repoRoot, candidate, docDir, pkgRoot) { + const tryPaths = [candidate]; + if (docDir && docDir !== '.') tryPaths.push(`${docDir}/${candidate}`); + if (pkgRoot) tryPaths.push(`${pkgRoot}/${candidate}`); + tryPaths.push(`packages/${candidate}`, `apps/${candidate}`, `shared/${candidate}`); + for (const p of tryPaths) { + const norm = p.replace(/\/+/g, '/'); + if (existsSync(join(repoRoot, norm))) return true; + } + return false; +} + +function findBrokenPathRefs(content, repoRoot, docRelPath) { + const prose = stripCodeBlocks(content); + const docDir = dirname(docRelPath); + const pkgRoot = findPackageRoot(docRelPath); + const refs = new Set(); + const re = /`([^`\n]{2,200})`/g; + let m; + while ((m = re.exec(prose)) !== null) { + const candidate = m[1].trim().replace(/[#?].*$/, ''); + if (!looksLikeFilesystemPath(candidate)) continue; + if (resolveInContext(repoRoot, candidate, docDir, pkgRoot)) continue; + refs.add(candidate); + } + return [...refs].sort(); +} + +function findBrokenImports(content, fileDir) { + const refs = new Set(); + const re = /@([\w./@~-]+\.md)/g; + let m; + while ((m = re.exec(content)) !== null) { + const path = m[1]; + if (path.startsWith('~/')) continue; + const resolved = path.startsWith('/') ? path : resolve(fileDir, path); + if (!existsSync(resolved)) refs.add(path); + } + return [...refs].sort(); +} + +function findUnresolvedCommands(content, repoRoot) { + const refs = new Set(); + const re = /\bpnpm\s+(?:run\s+|--filter\s+\S+\s+(?:run\s+)?)?([a-zA-Z][\w:.\-]*)/g; + const pkgPath = join(repoRoot, 'package.json'); + let scripts = new Set(); + if (existsSync(pkgPath)) { + try { + const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8')); + scripts = new Set(Object.keys(pkg.scripts ?? {})); + } catch { + /* skip */ + } + } + const builtins = new Set(['install', 'add', 'remove', 'update', 'i', 'audit', 'list', 'ls', 'why', 'outdated', 'exec', 'run']); + const known = new Set(CONFIG.knownCommands.map((c) => c.replace(/^pnpm /, ''))); + let m; + while ((m = re.exec(content)) !== null) { + const script = m[1]; + if (scripts.has(script) || builtins.has(script) || known.has(script)) continue; + refs.add(`pnpm ${script}`); + } + return [...refs].sort(); +} + +function extractSections(content) { + const lines = content.split('\n'); + const sections = []; + let cur = null; + lines.forEach((line, i) => { + if (/^##\s+/.test(line)) { + if (cur) sections.push({ header: cur.header, lines: i - cur.start }); + cur = { header: line.replace(/^##\s+/, '').trim(), start: i }; + } + }); + if (cur) sections.push({ header: cur.header, lines: lines.length - cur.start }); + return sections.sort((a, b) => b.lines - a.lines); +} + +// ── Pair classification ──────────────────────────────────────────────────── + +function classifyPairs(files) { + const byDir = new Map(); + for (const f of files) { + const dir = dirname(f.relPath); + const entry = byDir.get(dir) ?? {}; + const base = f.relPath.split('/').pop(); + if (base === 'AGENTS.md') entry.agents = f; + if (base === 'CLAUDE.md') entry.claude = f; + byDir.set(dir, entry); + } + const intentional = new Set(CONFIG.intentionalDifferentPairs); + const pairs = []; + for (const [dir, e] of byDir) { + if (!e.agents && !e.claude) continue; + if (!e.agents || !e.claude) { + pairs.push({ dir, classification: 'single', detail: e.agents ? 'only AGENTS.md' : 'only CLAUDE.md' }); + continue; + } + const a = e.agents; + const c = e.claude; + const key1 = `${a.relPath}:${c.relPath}`; + const key2 = `${c.relPath}:${a.relPath}`; + if (intentional.has(key1) || intentional.has(key2)) { + pairs.push({ dir, classification: 'intentional-different', detail: `allowlisted: ${a.lineCount}L vs ${c.lineCount}L` }); + continue; + } + const linked = (a.isSymlink && a.symlinkTarget === c.absPath) || (c.isSymlink && c.symlinkTarget === a.absPath); + if (linked) { + pairs.push({ dir, classification: 'linked', detail: `canonical: ${a.isSymlink ? 'CLAUDE.md' : 'AGENTS.md'}` }); + continue; + } + // Either side might be a broken symlink we already flagged; if so we + // can't read its content, so report the pair as unexpected-duplicate + // without the byte-match check. + let aContent = ''; + let cContent = ''; + try { aContent = readFileSync(a.absPath, 'utf-8'); } catch { aContent = null; } + try { cContent = readFileSync(c.absPath, 'utf-8'); } catch { cContent = null; } + let detail; + if (aContent === null || cContent === null) { + detail = `one side unreadable (broken symlink?); cannot byte-compare`; + } else if (aContent === cContent) { + detail = `byte-for-byte duplicate (${a.lineCount}L), not symlinked`; + } else { + detail = `divergent (${a.lineCount}L vs ${c.lineCount}L), not in intentional-different allowlist`; + } + pairs.push({ dir, classification: 'unexpected-duplicate', detail }); + } + return pairs.sort((p, q) => p.dir.localeCompare(q.dir)); +} + +// ── Entry ────────────────────────────────────────────────────────────────── + +export function runL1Scan(repoRoot) { + const docPaths = findAgentDocs(repoRoot); + const files = docPaths.map((p) => inspectFile(repoRoot, p)); + const pairs = classifyPairs(files); + return { files, pairs, config: CONFIG }; +} + +// Compute deterministic flags for every file. Used both for the L1 markdown +// (so PR-mode reports surface single-finding issues) and as input to the +// L2/L3 gating below. +export function computeFlags(file) { + if (file.isSymlink) return []; + const isRoot = !file.relPath.includes('/'); + const reasons = []; + if (file.brokenSymlinkTarget) reasons.push(`broken symlink target: ${file.brokenSymlinkTarget}`); + if (isRoot && file.lineCount > CONFIG.budgets.root) reasons.push(`over root budget (${file.lineCount} > ${CONFIG.budgets.root})`); + if (!isRoot && file.lineCount > CONFIG.budgets.nestedWarn) reasons.push(`over nested-warn (${file.lineCount} > ${CONFIG.budgets.nestedWarn})`); + if (file.brokenPathRefs.length > 0) reasons.push(`${file.brokenPathRefs.length} broken path ref(s)`); + if (file.brokenImports.length > 0) reasons.push(`${file.brokenImports.length} broken @import(s)`); + if (file.unresolvedCommands.length > 0) reasons.push(`${file.unresolvedCommands.length} unresolved command(s)`); + return reasons; +} + +// L2/L3 gating: stricter than computeFlags. A single-broken-ref doc still +// appears in the L1 report (via computeFlags) but only triggers paid L2/L3 +// review when there are 2+ broken refs or budget is significantly exceeded. +export function flaggedForReview(scan) { + return scan.files + .map((f) => ({ ...f, reasons: computeFlags(f) })) + .filter((f) => { + if (f.reasons.length === 0) return false; + if (f.isSymlink) return false; + const isRoot = !f.relPath.includes('/'); + if (f.brokenPathRefs.length >= 2) return true; + if (f.brokenImports.length >= 1) return true; // any broken @import is high-signal + if (isRoot ? f.lineCount > 144 : f.lineCount > 240) return true; + return false; + }); +} + +export function renderL1Markdown(scan) { + const lines = [`# Agent docs L1 audit\n`]; + lines.push(`Found ${scan.files.length} agent-doc files in ${new Set(scan.files.map((f) => dirname(f.relPath))).size} directories.\n`); + + // Inventory + lines.push('## Inventory\n'); + lines.push('| File | Lines | Kind | Notes |'); + lines.push('|---|---|---|---|'); + for (const f of [...scan.files].sort((a, b) => b.lineCount - a.lineCount)) { + const isRoot = !f.relPath.includes('/'); + let kind = 'file'; + if (f.isSymlink) { + kind = f.brokenSymlinkTarget + ? `symlink → BROKEN (${relative(process.cwd(), f.brokenSymlinkTarget) || f.brokenSymlinkTarget})` + : `symlink → ${relative(process.cwd(), f.symlinkTarget) || '?'}`; + } + const notes = isRoot && f.lineCount > CONFIG.budgets.root + ? `over root budget (${f.lineCount} > ${CONFIG.budgets.root})` + : !isRoot && f.lineCount > CONFIG.budgets.nestedWarn + ? `over nested-warn (${f.lineCount} > ${CONFIG.budgets.nestedWarn})` + : ''; + lines.push(`| \`${f.relPath}\` | ${f.lineCount} | ${kind} | ${notes} |`); + } + + // Pairs + lines.push('\n## Pair classification\n'); + lines.push('| Directory | Class | Detail |'); + lines.push('|---|---|---|'); + for (const p of scan.pairs) lines.push(`| \`${p.dir || '(root)'}\` | ${p.classification} | ${p.detail} |`); + + // Deterministic findings (per-doc broken refs/imports/commands). Non-symlinks + // only, since symlinks share content with their target. + const findingsByFile = []; + for (const f of scan.files) { + if (f.isSymlink) continue; + const reasons = computeFlags(f); + if (reasons.length === 0) continue; + findingsByFile.push({ file: f, reasons }); + } + lines.push('\n## Deterministic findings\n'); + if (findingsByFile.length === 0) { + lines.push('None.\n'); + } else { + for (const { file, reasons } of findingsByFile) { + lines.push(`### \`${file.relPath}\`\n`); + lines.push(reasons.map((r) => `- ${r}`).join('\n')); + if (file.brokenPathRefs.length > 0) { + lines.push('\nBroken path refs:'); + for (const r of file.brokenPathRefs) lines.push(` - \`${r}\``); + } + if (file.brokenImports.length > 0) { + lines.push('\nBroken @imports:'); + for (const r of file.brokenImports) lines.push(` - \`${r}\``); + } + if (file.unresolvedCommands.length > 0) { + lines.push('\nUnresolved commands (advisory):'); + for (const r of file.unresolvedCommands) lines.push(` - \`${r}\``); + } + lines.push(''); + } + } + + return lines.join('\n'); +} + +// CLI mode +if (import.meta.url === `file://${process.argv[1]}`) { + const targetIdx = process.argv.indexOf('--target'); + const target = targetIdx >= 0 ? process.argv[targetIdx + 1] : process.cwd(); + const scan = runL1Scan(resolve(target)); + console.log(renderL1Markdown(scan)); +} diff --git a/.github/workflows/agent-docs-audit.yml b/.github/workflows/agent-docs-audit.yml new file mode 100644 index 0000000000..338149f733 --- /dev/null +++ b/.github/workflows/agent-docs-audit.yml @@ -0,0 +1,122 @@ +name: Agent docs audit + +# Audit agent-context docs (CLAUDE.md, AGENTS.md, .claude/rules/) against +# agent-docs-policy.md. +# +# Two layers: +# L1 (deterministic): line budgets, symlink/pair integrity, broken @imports, +# broken path refs, unresolved commands. No API calls. +# L2 + L3 (semantic): Haiku triage filters L1's flagged set; Sonnet verifies +# concrete claims via Read/Glob/Grep and produces +# KEEP/TRIM/MOVE/UPDATE/INVESTIGATE findings. +# +# Warning-only for now. Uploads artifacts and writes a Step Summary; does not +# post PR comments, does not fail the workflow on findings. +# +# AI layers are skipped automatically if ANTHROPIC_API_KEY is unavailable +# (fork PRs, secret not set). In that case the L1 report still uploads. + +on: + workflow_dispatch: + schedule: + # Daily 06:00 UTC — L1 structural check only, free. Catches new broken + # refs/symlink drift within a day without burning API budget. + - cron: '0 6 * * *' + # Weekly Monday 14:00 UTC — full L1+L2+L3 with semantic audit. Slower + # cadence is fine for semantic drift; the API cost (~$1-2/run) only + # makes sense weekly. + - cron: '0 14 * * 1' + pull_request: + paths: + - '**/AGENTS.md' + - '**/CLAUDE.md' + - '**/CLAUDE.local.md' + - '.claude/rules/**' + - 'agent-docs-policy.md' + - '.github/scripts/agent-docs-audit*' + - '.github/scripts/agent-docs-l1*' + - '.github/workflows/agent-docs-audit.yml' + +permissions: + contents: read + +concurrency: + group: agent-docs-audit-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + audit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-node@v4 + with: + node-version: 20 + + # Detect whether AI layers can run. + # + # SECURITY: PR-triggered runs always fall back to L1-only. + # The audited input is markdown (CLAUDE.md, AGENTS.md) which is + # itself prompt text and can be modified in the PR. Running a + # tool-using model (Read/Glob/Grep/Bash) over PR-authored prompt + # files while ANTHROPIC_API_KEY is in env would be a prompt- + # injection pathway. risk-assess.yml is safer because its input + # is a code diff and it only allows Read/Glob/Grep. Until our + # threat model matches that, L2/L3 only run on scheduled and + # workflow_dispatch events (input is main, trusted by review). + - name: Detect AI eligibility + id: ai + run: | + if [ "${{ github.event_name }}" = "pull_request" ]; then + echo "skip=true" >> $GITHUB_OUTPUT + echo "Skipping AI layers on pull_request - running L1 (deterministic drift) only" + elif [ "${{ github.event.schedule }}" = "0 6 * * *" ]; then + echo "skip=true" >> $GITHUB_OUTPUT + echo "Daily schedule - running L1 only (structural drift). Full L1+L2+L3 runs Mondays." + elif [ -z "${{ secrets.ANTHROPIC_API_KEY }}" ]; then + echo "skip=true" >> $GITHUB_OUTPUT + echo "Skipping AI layers - no ANTHROPIC_API_KEY in repo secrets" + else + echo "skip=false" >> $GITHUB_OUTPUT + fi + + - name: Install script deps + if: steps.ai.outputs.skip != 'true' + run: npm install --prefix .github/scripts @anthropic-ai/claude-agent-sdk @anthropic-ai/sdk + + - name: Run audit (L1 + L2 + L3) + if: steps.ai.outputs.skip != 'true' + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + REPO_ROOT: ${{ github.workspace }} + run: | + node .github/scripts/agent-docs-audit.mjs > /tmp/agent-docs-audit-summary.md + cat /tmp/agent-docs-audit-summary.md + + - name: Run audit (L1 only) + if: steps.ai.outputs.skip == 'true' + env: + REPO_ROOT: ${{ github.workspace }} + run: | + node .github/scripts/agent-docs-audit.mjs --skip-ai > /tmp/agent-docs-audit-summary.md + cat /tmp/agent-docs-audit-summary.md + + - name: Write step summary + if: always() + run: | + if [ -f /tmp/agent-docs-audit-summary.md ]; then + cat /tmp/agent-docs-audit-summary.md >> $GITHUB_STEP_SUMMARY + fi + + - name: Upload audit artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: agent-docs-audit + path: | + /tmp/agent-docs-audit-summary.md + /tmp/agent-docs-audit.json + /tmp/agent-docs-audit-l1.md + if-no-files-found: warn + retention-days: 30 diff --git a/agent-docs-policy.md b/agent-docs-policy.md new file mode 100644 index 0000000000..b54e8abdf4 --- /dev/null +++ b/agent-docs-policy.md @@ -0,0 +1,94 @@ +# Agent docs policy + +Core rule: agent docs (`CLAUDE.md`, `AGENTS.md`, `.claude/rules/*.md`) encode +non-obvious truths that a competent agent would otherwise miss. They are not +encyclopedias of the codebase. + +## Placement + +- **Root `CLAUDE.md` / `AGENTS.md`**: routing layer. Cross-package invariants, + high-frequency truths, the one-line "where does X go" answer. Loaded into + every session. +- **Nested `/AGENTS.md`**: package-specific rules and patterns. + Loaded only when an agent reads files in that package. +- **`.claude/rules/.md`** with `paths:` frontmatter: rules that only + apply to matching files (e.g. JSDoc rules for `**/*.js`). +- **Hooks or scripts**: anything that must be enforced rather than advised. + Doc-level "always do X" rules that have a deterministic check belong in CI, + not in agent docs. + +## Size budgets + +- Root: target <= 120 lines. Hard ceiling 200. +- Nested: target <= 200 lines. Flag at 200, justify above 250. +- Path-scoped rule files: one concern per file, <= 50 lines. + +## Write + +- Architectural invariants that prevent wrong-subsystem edits. +- The four-file pattern, the do-not-hand-edit-derived list, the layer boundary + whose violation produces real bugs. +- Non-obvious commands (workspace filters, multi-step procedures). +- Repo conventions an agent cannot infer from one file (`pnpm run X` vs + `pnpm --filter Y run X`, symlink direction for `AGENTS.md` / `CLAUDE.md`). +- Subsystem rules that have caused recurring PR feedback. + +## Do not write + +- Project structure trees that `ls packages/` would produce. +- Standard language conventions or framework basics. +- File-by-file descriptions of source. +- Long restatements of what nested docs already say. +- Marketing copy or brand prose; that belongs in `brand.md`. +- Speculative rules, one-off preferences, or guidance whose value has not been + observed in real work. Agents try to satisfy written rules; unnecessary + requirements add cost and can reduce task success. + +## Verifiable claims + +Every concrete claim must be checkable by the audit: + +- **Paths in backticks** must resolve from the doc's package root, the repo + root, or one of `packages/`, `apps/`, `shared/`. +- **`pnpm