sanitize docs, one last moment fix

PatrickSys · PatrickSys · commit fadeecbb540d · 2026-02-21T17:49:35.000+01:00
diff --git a/AGENTS.md b/AGENTS.md
@@ -22,7 +22,7 @@ These are non-negotiable. Every PR, feature, and design decision must respect th
 - **Never stage/commit `.planning/**`\*\* (or any other local workflow artifacts) unless the user explicitly asks in that message.
 - **Never use `gsd-tools ... commit` wrappers** in this repo. Use plain `git add <exact files>` and `git commit -m "..."`.
 - **Before every commit:** run `git status --short` and confirm staged files match intent; abort if any `.planning/**` is staged.
-
+- **Avoid using `any` Type AT ALL COSTS.
 ## Evaluation Integrity (NON-NEGOTIABLE)
 
 These rules prevent metric gaming, overfitting, and false quality claims. Violation of these rules means the feature CANNOT ship.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,30 +8,27 @@
 - **Scope headers in code snippets**: When requesting snippets (`includeSnippets: true`), each code block now starts with a comment like `// UserService.login()` so agents know where the code lives without extra file reads.
 - **Edit decision card**: When searching with `intent="edit"`, `intent="refactor"`, or `intent="migrate"`, results now include a decision card telling you whether there's enough evidence to proceed safely. The card shows: whether you're ready (`ready: true/false`), what to do next if not (`nextAction`), relevant team patterns to follow, a top example file, how many callers appear in results (`impact.coverage`), and what searches would help close gaps (`whatWouldHelp`).
 - **Caller coverage tracking**: The decision card shows how many of a symbol's callers are in your search results. Low coverage (less than 40% when there are lots of callers) triggers an alert so you know to search more before editing.
+- **Index versioning**: Index artifacts are versioned via `index-meta.json`. Mixed-version indexes are never served; version mismatches or corruption trigger automatic rebuild.
+- **Crash-safe rebuilds**: Full rebuilds write to `.staging/` and swap atomically only on success. Failed rebuilds don't corrupt the active index.
+- **Relationship sidecar**: New `relationships.json` artifact containing file import graph, reverse imports, and symbol export index. Updated incrementally alongside the main index.
+- **References confidence + hints**: `get_symbol_references` now includes `confidence: "syntactic"` and `isComplete: boolean` to help agents assess result completeness. `search_codebase` results now include a structured `hints` object (capped callers/consumers/tests ranked by frequency) drawn from the relationships sidecar. **`get_component_usage` removed from MCP surface (11→10 tools).** If you previously used `get_component_usage`, use `get_symbol_references` for symbol usage evidence (usageCount, top snippets, callers/consumers).
+- Tree-sitter-backed symbol extraction is now used by the Generic analyzer when available (with safe fallbacks).
+- Expanded language/extension detection to improve indexing coverage (e.g. `.pyi`, `.php`, `.kt`/`.kts`, `.cc`/`.cxx`, `.cs`, `.swift`, `.scala`, `.toml`, `.xml`).
+- New tool: `get_symbol_references` for concrete symbol usage evidence (usageCount + top snippets).
+- Multi-codebase eval runner: `npm run eval -- <codebaseA> <codebaseB>` with per-codebase reports and combined summary.
+- Shared eval scoring/reporting module (`src/eval/*`) used by both the CLI runner and the test suite.
+- Second frozen eval fixture plus an in-repo controlled TypeScript codebase for fully-offline eval runs.
+- Regression tests covering Tree-sitter Unicode slicing, parser cleanup/reset behavior, and large/generated file skipping.
 
 ### Changed
 
 - **Preflight response shape**: Renamed `reason` to `nextAction` for clarity. Removed internal fields (`evidenceLock`, `riskLevel`, `confidence`) so the output is stable and doesn't change shape unexpectedly.
-
+ 
 ### Fixed
 
 - Null-pointer crash in GenericAnalyzer when chunk content is undefined.
 - Tree-sitter symbol extraction now treats node offsets as UTF-8 byte ranges and evicts cached parsers on failures/timeouts.
 
-### More improvements (Phases 06–08)
-
-- **Index versioning (Phase 06)**: Index artifacts are versioned via `index-meta.json`. Mixed-version indexes are never served; version mismatches or corruption trigger automatic rebuild.
-- **Crash-safe rebuilds (Phase 06)**: Full rebuilds write to `.staging/` and swap atomically only on success. Failed rebuilds don't corrupt the active index.
-- **Relationship sidecar (Phase 07)**: New `relationships.json` artifact containing file import graph, reverse imports, and symbol export index. Updated incrementally alongside the main index.
-- **References confidence + hints (Phase 08)**: `get_symbol_references` now includes `confidence: "syntactic"` and `isComplete: boolean` to help agents assess result completeness. `search_codebase` results now include a structured `hints` object (capped callers/consumers/tests ranked by frequency) drawn from the relationships sidecar. `get_component_usage` removed from MCP surface (11→10 tools).
-- Tree-sitter-backed symbol extraction is now used by the Generic analyzer when available (with safe fallbacks).
-- Expanded language/extension detection to improve indexing coverage (e.g. `.pyi`, `.php`, `.kt`/`.kts`, `.cc`/`.cxx`, `.cs`, `.swift`, `.scala`, `.toml`, `.xml`).
-- New tool: `get_symbol_references` for concrete symbol usage evidence (usageCount + top snippets).
-- Multi-codebase eval runner: `npm run eval -- <codebaseA> <codebaseB>` with per-codebase reports and combined summary.
-- Shared eval scoring/reporting module (`src/eval/*`) used by both the CLI runner and the test suite.
-- Second frozen eval fixture plus an in-repo controlled TypeScript codebase for fully-offline eval runs.
-- Regression tests covering Tree-sitter Unicode slicing, parser cleanup/reset behavior, and large/generated file skipping.
-
 ## [1.6.2] - 2026-02-17
 
 Stripped it down for token efficiency, moved CLI code out of the protocol layer, and cleared structural debt.
diff --git a/README.md b/README.md
@@ -119,12 +119,21 @@ This is where it all comes together. One call returns:
 - **Code results** with `file` (path + line range), `summary`, `score`
 - **Type** per result: compact `componentType:layer` (e.g., `service:data`) — helps agents orient
 - **Pattern signals** per result: `trend` (Rising/Declining — Stable is omitted) and `patternWarning` when using legacy code
-- **Relationships** per result: `importedByCount` and `hasTests` (condensed) + **hints** (capped ranked callers, consumers, tests)
+- **Relationships** per result: `importedByCount` and `hasTests` (condensed) + **hints** (capped ranked callers, consumers, tests) — so you see suggested next reads and know what you haven't looked at yet
 - **Related memories**: up to 3 team decisions, gotchas, and failures matched to the query
 - **Search quality**: `ok` or `low_confidence` with confidence score and `hint` when low
 - **Preflight**: `ready` (boolean) with decision card when `intent="edit"|"refactor"|"migrate"`. Shows `nextAction` (if not ready), `warnings`, `patterns` (do/avoid), `bestExample`, `impact` (caller coverage), and `whatWouldHelp` (next steps). If search quality is low, `ready` is always `false`.
 
-Snippets are opt-in (`includeSnippets: true`). Default output is lean — if the agent wants code, it calls `read_file`.
+Snippets are optional (`includeSnippets: true`). When enabled, snippets that have symbol metadata (e.g. from the Generic analyzer's AST chunking or Angular component chunks) start with a scope header so you know where the code lives (e.g. `// AuthService.getToken()` or `// SpotifyApiService`). Example:
+
+```ts
+// AuthService.getToken()
+getToken(): string {
+  return this.token;
+}
+```
+
+Default output is lean — if the agent wants code, it calls `read_file`.
 
 ```json
 {
@@ -189,7 +198,7 @@ Record a decision once. It surfaces automatically in search results and prefligh
 | ------------------------------ | ------------------------------------------------------------------------------------------- |
 | `search_codebase`              | Hybrid search + decision card. Pass `intent="edit"` to get `ready`, `nextAction`, patterns, caller coverage, and `whatWouldHelp`. |
 | `get_team_patterns`            | Pattern frequencies, golden files, conflict detection                                      |
-| `get_symbol_references`        | Find concrete references to a symbol (usageCount + top snippets + confidence + completeness) |
+| `get_symbol_references`        | Find concrete references to a symbol (usageCount + top snippets). `confidence: "syntactic"` = static/source-based only; no runtime or dynamic dispatch. |
 | `remember`                     | Record a convention, decision, gotcha, or failure                                          |
 | `get_memory`                   | Query team memory with confidence decay scoring                                            |
 | `get_codebase_metadata`        | Project structure, frameworks, dependencies                                                |
@@ -200,7 +209,7 @@ Record a decision once. It surfaces automatically in search results and prefligh
 
 ## Evaluation Harness (`npm run eval`)
 
-Reproducible evaluation with frozen fixtures so ranking/chunking changes are measured honestly and regressions get caught.
+Reproducible evaluation with frozen fixtures so ranking/chunking changes are measured honestly and regressions get caught. **For contributors and CI:** run before releases or after changing search/ranking/chunking to guard against regressions.
 
 - Two codebases: `npm run eval -- <codebaseA> <codebaseB>`
 - Defaults: fixture A = `tests/fixtures/eval-angular-spotify.json`, fixture B = `tests/fixtures/eval-controlled.json`
@@ -214,11 +223,13 @@ npm run eval -- tests/fixtures/codebases/eval-controlled tests/fixtures/codebase
 ```
 
 - Flags: `--help`, `--fixture-a`, `--fixture-b`, `--skip-reindex`, `--no-rerank`, `--no-redact`
+- To save a report for later comparison, redirect stdout (e.g. `pnpm run eval -- <path-to-angular-spotify> --skip-reindex > internal-docs/tests/eval-runs/angular-spotify-YYYY-MM-DD.txt`).
 
 ## How the Search Works
 
 The retrieval pipeline is designed around one goal: give the agent the right context, not just any file that matches.
 
+- **Definition-first ranking** - for exact-name lookups (e.g. a symbol name), the file that *defines* the symbol ranks above files that only use it.
 - **Intent classification** - knows whether "AuthService" is a name lookup or "how does auth work" is conceptual. Adjusts keyword/semantic weights accordingly.
 - **Hybrid fusion (RRF)** - combines keyword and semantic search using Reciprocal Rank Fusion instead of brittle score averaging.
 - **Query expansion** - conceptual queries automatically expand with domain-relevant terms (auth → login, token, session, guard).
@@ -229,13 +240,15 @@ The retrieval pipeline is designed around one goal: give the agent the right con
 - **Version gating** - index artifacts are versioned; mismatches trigger automatic rebuild so mixed-version data is never served.
 - **Auto-heal** - if the index corrupts, search triggers a full re-index automatically.
 
+**Index reliability:** Rebuilds write to a staging directory and swap atomically only on success, so a failed rebuild never corrupts the active index. Version mismatches or corruption trigger an automatic full re-index (no user action required).
+
 ## Language Support
 
-Over **30+ languages** are supported for indexing + retrieval: TypeScript/JavaScript, Python (incl `.pyi`), PHP, Ruby, Java, Kotlin (`.kt`/`.kts`), Go, Rust, C/C++ (incl `.cc`/`.cxx`), C#, Swift, Scala, Shell, plus common config/markup formats (JSON/YAML/TOML/XML, etc.).
+**10 languages** have full symbol extraction (Tree-sitter): TypeScript, JavaScript, Python, Java, Kotlin, C, C++, C#, Go, Rust. **30+ languages** have indexing and retrieval coverage (keyword + semantic), including PHP, Ruby, Swift, Scala, Shell, and config/markup (JSON/YAML/TOML/XML, etc.).
 
 Enrichment is framework-specific: right now only **Angular** has a dedicated analyzer for rich conventions/context (signals, standalone components, control flow, DI patterns).
 
-For non-Angular projects, the **Generic** analyzer still provides broad coverage, and will use Tree-sitter symbol extraction when a grammar is available (otherwise it falls back to safe parsing).
+For non-Angular projects, the **Generic** analyzer uses **AST-aligned chunking** when a Tree-sitter grammar is available: symbol-bounded chunks with **scope-aware prefixes** (e.g. `// ClassName.methodName`) so snippets show where code lives. Without a grammar it falls back to safe line-based chunking.
 
 Structured filters available: `framework`, `language`, `componentType`, `layer` (presentation, business, data, state, core, shared).
 
diff --git a/docs/capabilities.md b/docs/capabilities.md
@@ -4,15 +4,15 @@ Technical reference for what `codebase-context` ships today. For the user-facing
 
 ## Tool Surface
 
-10 MCP tools + 1 optional resource (`codebase://context`).
+10 MCP tools + 1 optional resource (`codebase://context`). **Migration:** `get_component_usage` was removed; use `get_symbol_references` for symbol usage evidence.
 
 ### Core Tools
 
 | Tool                    | Input                                                             | Output                                                                                                                                                                                                                  |
 | ----------------------- | ----------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `search_codebase`       | `query`, optional `intent`, `limit`, `filters`, `includeSnippets` | Ranked results (`file`, `summary`, `score`, `type`, `trend`, `patternWarning`, `relationships`, `hints`) + `searchQuality` + decision card (`ready`, `nextAction`, `patterns`, `bestExample`, `impact`, `whatWouldHelp`) when `intent="edit"`. Hints capped at 3 per category. |
 | `get_team_patterns`     | optional `category`                                               | Pattern frequencies, trends, golden files, conflicts                                                                                                                                 |
-| `get_symbol_references` | `symbol`, optional `limit`                                        | Concrete symbol usage evidence: `usageCount` + top usage snippets + `confidence` ("syntactic") + `isComplete` boolean                                                                |
+| `get_symbol_references` | `symbol`, optional `limit`                                        | Concrete symbol usage evidence: `usageCount` + top usage snippets + `confidence` + `isComplete`. `confidence: "syntactic"` means static/source-based only (no runtime or dynamic dispatch). Replaces the removed `get_component_usage`. |
 | `remember`              | `type`, `category`, `memory`, `reason`                            | Persists to `.codebase-context/memory.json`                                                                                                                                          |
 | `get_memory`            | optional `category`, `type`, `query`, `limit`                     | Memories with confidence decay scoring                                                                                                                                               |
 
@@ -121,12 +121,12 @@ Returned as `preflight` when search `intent` is `edit`, `refactor`, or `migrate`
 ## Analyzers
 
 - **Angular**: signals, standalone components, control flow syntax, lifecycle hooks, DI patterns, component metadata
-- **Generic**: 30+ languages — TypeScript, JavaScript, Python, Java, Kotlin, C/C++, C#, Go, Rust, PHP, Ruby, Swift, Scala, Shell, config/markup formats
+- **Generic**: 30+ have indexing/retrieval coverage including PHP, Ruby, Swift, Scala, Shell, config/markup., 10 languages have full symbol extraction (Tree-sitter: TypeScript, JavaScript, Python, Java, Kotlin, C, C++, C#, Go, Rust). 
 
 Notes:
 
 - Language detection covers common extensions including `.pyi`, `.kt`/`.kts`, `.cc`/`.cxx`, and config formats like `.toml`/`.xml`.
-- When Tree-sitter grammars are present, the Generic analyzer can derive symbol components from Tree-sitter extraction (with fallbacks).
+- When Tree-sitter grammars are present, the Generic analyzer uses AST-aligned chunking and scope-aware prefixes for symbol-aware snippets (with fallbacks).
 
 ## Evaluation Harness
 
diff --git a/src/tools/search-codebase.ts b/src/tools/search-codebase.ts
@@ -176,9 +176,8 @@ export async function handle(
                 text: JSON.stringify(
                   {
                     status: 'error',
-                    message: `Auto-heal retry failed: ${
-                      retryError instanceof Error ? retryError.message : String(retryError)
-                    }`
+                    message: `Auto-heal retry failed: ${retryError instanceof Error ? retryError.message : String(retryError)
+                      }`
                   },
                   null,
                   2
@@ -313,11 +312,13 @@ export async function handle(
 
   function buildRelationshipHints(result: SearchResult): RelationshipHints {
     const rPath = result.filePath;
+    // Graph keys are relative paths with forward slashes; normalize for comparison
+    const rPathNorm = path.relative(ctx.rootPath, rPath).replace(/\\/g, '/') || rPath.replace(/\\/g, '/');
 
     // importedBy: files that import this result (reverse lookup), collect with counts
     const importedByMap = new Map<string, number>();
     for (const [dep, importers] of reverseImports) {
-      if (dep.endsWith(rPath) || rPath.endsWith(dep)) {
+      if (dep === rPathNorm || dep.endsWith(rPathNorm) || rPathNorm.endsWith(dep)) {
         for (const importer of importers) {
           importedByMap.set(importer, (importedByMap.get(importer) || 0) + 1);
         }
@@ -326,7 +327,7 @@ export async function handle(
 
     // testedIn: heuristic — same basename with .spec/.test extension
     const testedIn: string[] = [];
-    const baseName = path.basename(rPath).replace(/\.[^.]+$/, '');
+    const baseName = path.basename(rPathNorm).replace(/\.[^.]+$/, '');
     if (importsGraph) {
       for (const file of Object.keys(importsGraph)) {
         const fileBase = path.basename(file);
@@ -616,8 +617,8 @@ export async function handle(
       }
 
       // Add patterns (do/avoid, capped at 3 each, with adoption %)
-      const doPatterns = preferredPatternsForOutput.slice(0, 3).map((p) => `${p.pattern} — ${p.frequency || 'N/A'}`);
-      const avoidPatterns = avoidPatternsForOutput.slice(0, 3).map((p) => `${p.pattern} — ${p.frequency || 'N/A'} (declining)`);
+      const doPatterns = preferredPatternsForOutput.slice(0, 3).map((p) => `${p.pattern} — ${p.adoption ? ` ${p.adoption}% adoption` : ''}`);
+      const avoidPatterns = avoidPatternsForOutput.slice(0, 3).map((p) => `${p.pattern} — ${p.adoption ? ` ${p.adoption}% adoption` : ''} (declining)`);
       if (doPatterns.length > 0 || avoidPatterns.length > 0) {
         decisionCard.patterns = {
           ...(doPatterns.length > 0 && { do: doPatterns }),
@@ -688,6 +689,10 @@ export async function handle(
     if (metadata?.functionName) {
       return metadata.functionName;
     }
+    // component chunk fallback (component or pipe name)
+    if (metadata?.componentName) {
+      return metadata.componentName;
+    }
     return null;
   }
 
@@ -712,8 +717,8 @@ export async function handle(
               confidence: searchQuality.confidence,
               ...(searchQuality.status === 'low_confidence' &&
                 searchQuality.nextSteps?.[0] && {
-                  hint: searchQuality.nextSteps[0]
-                })
+                hint: searchQuality.nextSteps[0]
+              })
             },
             ...(preflightPayload && { preflight: preflightPayload }),
             results: results.map((r) => {