Merge pull request #22 from AztecProtocol/fix/error-lookup-suppress-weak-when-semantic-useful

critesjosh · web-flow · commit 80f17feaef78 · 2026-05-04T12:36:45.000-04:00
fix(error-lookup): suppress weak catalog when semantic returned useful results + content-thin chunk filter
diff --git a/src/tools/error-lookup.ts b/src/tools/error-lookup.ts
@@ -31,6 +31,57 @@ import { checkVersionGate, formatMismatchMessage } from "../utils/version-check.
  */
 const STRONG_MATCH_THRESHOLD = 70;
 
+/**
+ * A line is "path-shaped" if it looks like a filesystem path rather
+ * than a code/docs line. Strips a leading markdown heading marker so
+ * ``# aztec-nr/.../foo.nr`` is recognized as path-shaped just like
+ * the bare ``aztec-nr/.../foo.nr``. Path-shaped means: contains ``/``
+ * and has no whitespace. Real signature lines (``pub fn foo(...)``,
+ * ``struct Bar { ... }``, ``pub use a::b;``) always have whitespace,
+ * so they never trip this predicate.
+ */
+function lineIsPathShaped(line: string): boolean {
+  const cleaned = line.replace(/^#+\s*/, "").trim();
+  return cleaned.length > 0 && cleaned.includes("/") && !/\s/.test(cleaned);
+}
+
+/**
+ * Drop semantic chunks whose body is empty or just the file path.
+ *
+ * Why this exists client-side even though docsgpt's ``/api/search``
+ * has its own equivalent guard: defense-in-depth. The MCP server is
+ * shipped to end users on whatever DocsGPT instance ``API_URL``
+ * points at — that backend may not have the latest filter applied,
+ * may be a self-hosted fork, or may reintroduce the bug in a future
+ * regression. Filtering on this side keeps the MCP UX safe regardless.
+ *
+ * Mirrors the Python helper in ``application/api/answer/routes/search.py``
+ * (``_is_empty_apiref_chunk``) — same content-shape predicate.
+ *
+ * The predicate is deliberately metadata-free. An earlier draft used
+ * ``match.source`` / ``match.title`` as a "heading-equivalent" set
+ * to strip a rendered file heading before checking the rest, but
+ * docsgpt's ``/api/search`` rewrites ``source`` to a public URL via
+ * ``_aztec_source_url`` — so the heading string never matches the
+ * post-rewrite source field. The shape-only check below works
+ * regardless of metadata transformations.
+ */
+function isUsefulSemanticChunk(match: SemanticSearchResult): boolean {
+  const text = (match.text ?? "").trim();
+  if (!text) return false;
+
+  const lines = text
+    .split("\n")
+    .map((l) => l.trim())
+    .filter((l) => l.length > 0);
+  if (lines.length === 0) return false;
+
+  // All non-empty lines are path-shaped → no real API content.
+  if (lines.every(lineIsPathShaped)) return false;
+
+  return true;
+}
+
 export type SemanticHealth =
   | "ok" // semantic returned results
   | "no_results" // semantic ran cleanly, returned empty
@@ -143,11 +194,19 @@ export async function lookupAztecError(
   }
 
   try {
-    const semanticResults = await docsgptClient.search(
+    const rawResults = await docsgptClient.search(
       `Aztec error: ${query}`,
       3
     );
 
+    // Filter content-thin / path-only chunks. If the server-side guard
+    // is in place these will be empty already, but defense-in-depth
+    // protects against older docsgpt deployments and any future
+    // regression in the apiref ingest. "Returned 3 chunks but all
+    // were just file paths" is functionally equivalent to "returned
+    // nothing useful" and we report it as such.
+    const semanticResults = rawResults.filter(isUsefulSemanticChunk);
+
     if (semanticResults.length > 0) {
       return {
         success: true,
diff --git a/src/utils/format.ts b/src/utils/format.ts
@@ -187,17 +187,27 @@ export function formatErrorLookupResult(result: ErrorLookupToolResult): string {
   const { catalogMatches, codeMatches } = result.result;
 
   // When semantic results exist AND every catalog match is below the
-  // strong-match threshold, the catalog hits are low-confidence cues
-  // that shouldn't visually dominate the response. Render semantic
-  // first under "## Related Documentation", and the catalog after
-  // under "## Lower-Confidence Catalog Hints" so the LLM consumer
-  // doesn't anchor on a misleading top hit (e.g. "note already
-  // nullified" matching "Contract already initialized" with score 54).
+  // strong-match threshold, the catalog hits are low-confidence cues.
+  // Two cases:
+  //
+  //   semanticHasResults = true  → semantic returned content-bearing
+  //     chunks (the lookupAztecError filter only sets semanticResults
+  //     when at least one chunk passed isUsefulSemanticChunk). The
+  //     weak catalog hint is now actively misleading — the user keeps
+  //     anchoring on it as the "primary answer" even though semantic
+  //     gave us better context. SUPPRESS the catalog section entirely.
+  //
+  //   semanticHasResults = false → semantic ran but produced nothing
+  //     useful (or didn't run: no client, version mismatch, backend
+  //     failed). The user has no other signal. KEEP the weak catalog
+  //     with a clear "Lower-Confidence Catalog Hints" header so they
+  //     have *something* to look at, framed honestly.
   const semanticHasResults =
     !!result.semanticResults && result.semanticResults.length > 0;
   const catalogIsWeakOnly =
     catalogMatches.length > 0 &&
     catalogMatches.every((m) => m.score < 70);
+  const suppressWeakCatalog = catalogIsWeakOnly && semanticHasResults;
   const renderSemanticFirst = semanticHasResults && catalogIsWeakOnly;
 
   function renderSemantic() {
@@ -221,6 +231,12 @@ export function formatErrorLookupResult(result: ErrorLookupToolResult): string {
 
   function renderCatalog() {
     if (catalogMatches.length === 0) return;
+    // Phase 2 suppression: when semantic returned content-bearing
+    // chunks AND the catalog is weak-only, the catalog hits are
+    // pure noise that the user keeps anchoring on. Hide them
+    // entirely. They remain in `result.catalogMatches` for
+    // programmatic consumers that need every signal.
+    if (suppressWeakCatalog) return;
     lines.push(
       catalogIsWeakOnly
         ? "## Lower-Confidence Catalog Hints"
diff --git a/tests/tools/error-lookup.test.ts b/tests/tools/error-lookup.test.ts
@@ -292,6 +292,152 @@ describe("lookupAztecError — semantic fallback", () => {
   });
 });
 
+describe("lookupAztecError — content-thin chunk filter", () => {
+  /**
+   * Defense-in-depth filter: even if docsgpt's `/api/search` regresses
+   * and starts returning path-only / empty-body apiref chunks,
+   * `isUsefulSemanticChunk` drops them before they're surfaced to the
+   * LLM consumer. Mirrors the server-side
+   * `_is_empty_apiref_chunk` helper.
+   */
+  function chunk(text: string, source = "aztec-nr/aztec/src/foo.nr") {
+    return { text, title: "foo.nr", source };
+  }
+
+  it("drops chunks with `#`-prefixed path heading even when source field is a public URL", async () => {
+    /**
+     * Regression for codex review: `/api/search` rewrites the chunk's
+     * `source` field to a public URL via `_aztec_source_url`. A chunk
+     * whose body is `# aztec-nr/.../foo.nr` (path heading only) won't
+     * match the URL-rewritten source field by string equality. The
+     * earlier filter would fail to strip the heading, then fall through
+     * to the path-shape check — which also failed because `# ...` has
+     * whitespace from the markdown marker. The new shape-only filter
+     * catches this directly.
+     */
+    const client = makeClient({
+      search: vi.fn().mockResolvedValue([
+        {
+          text: "# aztec-nr/aztec/src/context/foo.nr\n",
+          title: "foo.nr",
+          source: "https://github.com/AztecProtocol/aztec-packages/blob/v4.2.0/noir-projects/aztec-nr/aztec/src/context/foo.nr",
+        },
+      ]),
+    });
+    const result = await lookupAztecError({ query: "obscure" }, client);
+    expect(result.semanticHealth).toBe("no_results");
+  });
+
+  it("treats raw output of all path-only chunks as 'no_results'", async () => {
+    const client = makeClient({
+      search: vi.fn().mockResolvedValue([
+        chunk("\n\naztec-nr/aztec/src/context/note_existence_request.nr\n\n",
+              "aztec-nr/aztec/src/context/note_existence_request.nr"),
+        chunk("\n\naztec-nr/aztec/src/note/hinted_note.nr\n",
+              "aztec-nr/aztec/src/note/hinted_note.nr"),
+      ]),
+    });
+    const result = await lookupAztecError({ query: "obscure" }, client);
+    expect(result.semanticHealth).toBe("no_results");
+    expect(result.semanticResults).toBeUndefined();
+  });
+
+  it("keeps mixed results when at least one chunk has substantive body", async () => {
+    const client = makeClient({
+      search: vi.fn().mockResolvedValue([
+        chunk("\n\naztec-nr/aztec/src/empty.nr\n",
+              "aztec-nr/aztec/src/empty.nr"),
+        chunk(
+          "# aztec-nr/aztec/src/hash.nr\npub fn poseidon(input: [Field; N]) -> Field",
+          "aztec-nr/aztec/src/hash.nr"
+        ),
+        chunk("\n\naztec-nr/aztec/src/utils.nr\n",
+              "aztec-nr/aztec/src/utils.nr"),
+      ]),
+    });
+    const result = await lookupAztecError({ query: "poseidon" }, client);
+    expect(result.semanticHealth).toBe("ok");
+    expect(result.semanticResults).toHaveLength(1);
+    expect(result.semanticResults![0].text).toContain("poseidon");
+  });
+});
+
+describe("lookupAztecError — weak catalog suppression when semantic is useful", () => {
+  /**
+   * The user-reported "bogus result still appears" failure mode: weak
+   * catalog hits visible alongside semantic results lets the LLM
+   * consumer anchor on the wrong answer. When semantic returned
+   * useful (post-filter) chunks, the weak catalog is now suppressed
+   * from the rendered output entirely (still present in
+   * `result.catalogMatches` for programmatic consumers).
+   *
+   * This tests the data-shape that the formatter consumes; the
+   * formatter test (`tests/utils/format.test.ts`) verifies the
+   * suppression actually happens at render time.
+   */
+  it("returns semanticHealth='ok' with weak catalog still in result.catalogMatches", async () => {
+    mockLookupError.mockReturnValue({
+      query: "note already nullified",
+      catalogMatches: [
+        catalogHit(54, "Contract already initialized", "word-overlap"),
+      ],
+      codeMatches: [],
+    });
+
+    const client = makeClient({
+      search: vi.fn().mockResolvedValue([
+        {
+          text: "Notes in Aztec are nullified by emitting a nullifier...",
+          title: "Note Lifecycle",
+          source: "docs/notes.md",
+        },
+      ]),
+    });
+
+    const result = await lookupAztecError(
+      { query: "note already nullified" },
+      client
+    );
+    expect(result.semanticHealth).toBe("ok");
+    expect(result.semanticResults).toHaveLength(1);
+    // The weak catalog hit is preserved in the data — the formatter
+    // is responsible for hiding it. Programmatic consumers can still
+    // see all signals.
+    expect(result.result.catalogMatches).toHaveLength(1);
+    expect(result.result.catalogMatches[0].score).toBe(54);
+  });
+
+  it("when semantic is filtered out (all path-only) AND catalog is weak, keeps catalog", async () => {
+    mockLookupError.mockReturnValue({
+      query: "note already nullified",
+      catalogMatches: [
+        catalogHit(54, "Contract already initialized", "word-overlap"),
+      ],
+      codeMatches: [],
+    });
+
+    const client = makeClient({
+      search: vi.fn().mockResolvedValue([
+        // Path-only chunks that the filter will drop
+        { text: "\n\naztec-nr/aztec/src/foo.nr\n",
+          title: "foo.nr",
+          source: "aztec-nr/aztec/src/foo.nr" },
+      ]),
+    });
+
+    const result = await lookupAztecError(
+      { query: "note already nullified" },
+      client
+    );
+    // semantic returned empty (after filter) → no_results
+    expect(result.semanticHealth).toBe("no_results");
+    // Weak catalog stays in the result so the user has *some* signal
+    expect(result.result.catalogMatches).toHaveLength(1);
+    expect(result.message).toContain("low-confidence");
+    expect(result.message).toMatch(/no relevant documentation|Semantic search/i);
+  });
+});
+
 describe("lookupAztecError — semantic failure (sanitized message)", () => {
   it("sets semanticHealth='failed' and returns sanitized message on 401", async () => {
     const client = makeClient({
@@ -325,7 +471,7 @@ describe("lookupAztecError — version-mismatch gate", () => {
   it("blocks semantic fallback when local clone diverges from corpus", async () => {
     mockGetRepoTag.mockResolvedValue("v4.1.0");
     const client = makeClient({
-      search: vi.fn().mockResolvedValue([{ text: "x", title: "x", source: "x" }]),
+      search: vi.fn().mockResolvedValue([{ text: "Some prose body content here.", title: "T", source: "x" }]),
       getCorpusVersion: vi.fn().mockResolvedValue({ aztec_corpus_version: "v4.2.0" }),
     });
 
@@ -340,7 +486,7 @@ describe("lookupAztecError — version-mismatch gate", () => {
     mockGetRepoTag.mockResolvedValue("v4.1.0");
     const client = makeClient({
       search: vi.fn().mockResolvedValue([
-        { text: "x", title: "x", source: "x" },
+        { text: "Some prose body content here.", title: "T", source: "x" },
       ]),
       getCorpusVersion: vi.fn().mockResolvedValue({ aztec_corpus_version: "v4.2.0" }),
     });
diff --git a/tests/utils/format.test.ts b/tests/utils/format.test.ts