Skip to content

Commit 80f17fe

Browse files
authored
Merge pull request #22 from AztecProtocol/fix/error-lookup-suppress-weak-when-semantic-useful
fix(error-lookup): suppress weak catalog when semantic returned useful results + content-thin chunk filter
2 parents 011fc0d + 607fbfd commit 80f17fe

4 files changed

Lines changed: 354 additions & 9 deletions

File tree

src/tools/error-lookup.ts

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,57 @@ import { checkVersionGate, formatMismatchMessage } from "../utils/version-check.
3131
*/
3232
const STRONG_MATCH_THRESHOLD = 70;
3333

34+
/**
35+
* A line is "path-shaped" if it looks like a filesystem path rather
36+
* than a code/docs line. Strips a leading markdown heading marker so
37+
* ``# aztec-nr/.../foo.nr`` is recognized as path-shaped just like
38+
* the bare ``aztec-nr/.../foo.nr``. Path-shaped means: contains ``/``
39+
* and has no whitespace. Real signature lines (``pub fn foo(...)``,
40+
* ``struct Bar { ... }``, ``pub use a::b;``) always have whitespace,
41+
* so they never trip this predicate.
42+
*/
43+
function lineIsPathShaped(line: string): boolean {
44+
const cleaned = line.replace(/^#+\s*/, "").trim();
45+
return cleaned.length > 0 && cleaned.includes("/") && !/\s/.test(cleaned);
46+
}
47+
48+
/**
49+
* Drop semantic chunks whose body is empty or just the file path.
50+
*
51+
* Why this exists client-side even though docsgpt's ``/api/search``
52+
* has its own equivalent guard: defense-in-depth. The MCP server is
53+
* shipped to end users on whatever DocsGPT instance ``API_URL``
54+
* points at — that backend may not have the latest filter applied,
55+
* may be a self-hosted fork, or may reintroduce the bug in a future
56+
* regression. Filtering on this side keeps the MCP UX safe regardless.
57+
*
58+
* Mirrors the Python helper in ``application/api/answer/routes/search.py``
59+
* (``_is_empty_apiref_chunk``) — same content-shape predicate.
60+
*
61+
* The predicate is deliberately metadata-free. An earlier draft used
62+
* ``match.source`` / ``match.title`` as a "heading-equivalent" set
63+
* to strip a rendered file heading before checking the rest, but
64+
* docsgpt's ``/api/search`` rewrites ``source`` to a public URL via
65+
* ``_aztec_source_url`` — so the heading string never matches the
66+
* post-rewrite source field. The shape-only check below works
67+
* regardless of metadata transformations.
68+
*/
69+
function isUsefulSemanticChunk(match: SemanticSearchResult): boolean {
70+
const text = (match.text ?? "").trim();
71+
if (!text) return false;
72+
73+
const lines = text
74+
.split("\n")
75+
.map((l) => l.trim())
76+
.filter((l) => l.length > 0);
77+
if (lines.length === 0) return false;
78+
79+
// All non-empty lines are path-shaped → no real API content.
80+
if (lines.every(lineIsPathShaped)) return false;
81+
82+
return true;
83+
}
84+
3485
export type SemanticHealth =
3586
| "ok" // semantic returned results
3687
| "no_results" // semantic ran cleanly, returned empty
@@ -143,11 +194,19 @@ export async function lookupAztecError(
143194
}
144195

145196
try {
146-
const semanticResults = await docsgptClient.search(
197+
const rawResults = await docsgptClient.search(
147198
`Aztec error: ${query}`,
148199
3
149200
);
150201

202+
// Filter content-thin / path-only chunks. If the server-side guard
203+
// is in place these will be empty already, but defense-in-depth
204+
// protects against older docsgpt deployments and any future
205+
// regression in the apiref ingest. "Returned 3 chunks but all
206+
// were just file paths" is functionally equivalent to "returned
207+
// nothing useful" and we report it as such.
208+
const semanticResults = rawResults.filter(isUsefulSemanticChunk);
209+
151210
if (semanticResults.length > 0) {
152211
return {
153212
success: true,

src/utils/format.ts

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -187,17 +187,27 @@ export function formatErrorLookupResult(result: ErrorLookupToolResult): string {
187187
const { catalogMatches, codeMatches } = result.result;
188188

189189
// When semantic results exist AND every catalog match is below the
190-
// strong-match threshold, the catalog hits are low-confidence cues
191-
// that shouldn't visually dominate the response. Render semantic
192-
// first under "## Related Documentation", and the catalog after
193-
// under "## Lower-Confidence Catalog Hints" so the LLM consumer
194-
// doesn't anchor on a misleading top hit (e.g. "note already
195-
// nullified" matching "Contract already initialized" with score 54).
190+
// strong-match threshold, the catalog hits are low-confidence cues.
191+
// Two cases:
192+
//
193+
// semanticHasResults = true → semantic returned content-bearing
194+
// chunks (the lookupAztecError filter only sets semanticResults
195+
// when at least one chunk passed isUsefulSemanticChunk). The
196+
// weak catalog hint is now actively misleading — the user keeps
197+
// anchoring on it as the "primary answer" even though semantic
198+
// gave us better context. SUPPRESS the catalog section entirely.
199+
//
200+
// semanticHasResults = false → semantic ran but produced nothing
201+
// useful (or didn't run: no client, version mismatch, backend
202+
// failed). The user has no other signal. KEEP the weak catalog
203+
// with a clear "Lower-Confidence Catalog Hints" header so they
204+
// have *something* to look at, framed honestly.
196205
const semanticHasResults =
197206
!!result.semanticResults && result.semanticResults.length > 0;
198207
const catalogIsWeakOnly =
199208
catalogMatches.length > 0 &&
200209
catalogMatches.every((m) => m.score < 70);
210+
const suppressWeakCatalog = catalogIsWeakOnly && semanticHasResults;
201211
const renderSemanticFirst = semanticHasResults && catalogIsWeakOnly;
202212

203213
function renderSemantic() {
@@ -221,6 +231,12 @@ export function formatErrorLookupResult(result: ErrorLookupToolResult): string {
221231

222232
function renderCatalog() {
223233
if (catalogMatches.length === 0) return;
234+
// Phase 2 suppression: when semantic returned content-bearing
235+
// chunks AND the catalog is weak-only, the catalog hits are
236+
// pure noise that the user keeps anchoring on. Hide them
237+
// entirely. They remain in `result.catalogMatches` for
238+
// programmatic consumers that need every signal.
239+
if (suppressWeakCatalog) return;
224240
lines.push(
225241
catalogIsWeakOnly
226242
? "## Lower-Confidence Catalog Hints"

tests/tools/error-lookup.test.ts

Lines changed: 148 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,152 @@ describe("lookupAztecError — semantic fallback", () => {
292292
});
293293
});
294294

295+
describe("lookupAztecError — content-thin chunk filter", () => {
296+
/**
297+
* Defense-in-depth filter: even if docsgpt's `/api/search` regresses
298+
* and starts returning path-only / empty-body apiref chunks,
299+
* `isUsefulSemanticChunk` drops them before they're surfaced to the
300+
* LLM consumer. Mirrors the server-side
301+
* `_is_empty_apiref_chunk` helper.
302+
*/
303+
function chunk(text: string, source = "aztec-nr/aztec/src/foo.nr") {
304+
return { text, title: "foo.nr", source };
305+
}
306+
307+
it("drops chunks with `#`-prefixed path heading even when source field is a public URL", async () => {
308+
/**
309+
* Regression for codex review: `/api/search` rewrites the chunk's
310+
* `source` field to a public URL via `_aztec_source_url`. A chunk
311+
* whose body is `# aztec-nr/.../foo.nr` (path heading only) won't
312+
* match the URL-rewritten source field by string equality. The
313+
* earlier filter would fail to strip the heading, then fall through
314+
* to the path-shape check — which also failed because `# ...` has
315+
* whitespace from the markdown marker. The new shape-only filter
316+
* catches this directly.
317+
*/
318+
const client = makeClient({
319+
search: vi.fn().mockResolvedValue([
320+
{
321+
text: "# aztec-nr/aztec/src/context/foo.nr\n",
322+
title: "foo.nr",
323+
source: "https://github.com/AztecProtocol/aztec-packages/blob/v4.2.0/noir-projects/aztec-nr/aztec/src/context/foo.nr",
324+
},
325+
]),
326+
});
327+
const result = await lookupAztecError({ query: "obscure" }, client);
328+
expect(result.semanticHealth).toBe("no_results");
329+
});
330+
331+
it("treats raw output of all path-only chunks as 'no_results'", async () => {
332+
const client = makeClient({
333+
search: vi.fn().mockResolvedValue([
334+
chunk("\n\naztec-nr/aztec/src/context/note_existence_request.nr\n\n",
335+
"aztec-nr/aztec/src/context/note_existence_request.nr"),
336+
chunk("\n\naztec-nr/aztec/src/note/hinted_note.nr\n",
337+
"aztec-nr/aztec/src/note/hinted_note.nr"),
338+
]),
339+
});
340+
const result = await lookupAztecError({ query: "obscure" }, client);
341+
expect(result.semanticHealth).toBe("no_results");
342+
expect(result.semanticResults).toBeUndefined();
343+
});
344+
345+
it("keeps mixed results when at least one chunk has substantive body", async () => {
346+
const client = makeClient({
347+
search: vi.fn().mockResolvedValue([
348+
chunk("\n\naztec-nr/aztec/src/empty.nr\n",
349+
"aztec-nr/aztec/src/empty.nr"),
350+
chunk(
351+
"# aztec-nr/aztec/src/hash.nr\npub fn poseidon(input: [Field; N]) -> Field",
352+
"aztec-nr/aztec/src/hash.nr"
353+
),
354+
chunk("\n\naztec-nr/aztec/src/utils.nr\n",
355+
"aztec-nr/aztec/src/utils.nr"),
356+
]),
357+
});
358+
const result = await lookupAztecError({ query: "poseidon" }, client);
359+
expect(result.semanticHealth).toBe("ok");
360+
expect(result.semanticResults).toHaveLength(1);
361+
expect(result.semanticResults![0].text).toContain("poseidon");
362+
});
363+
});
364+
365+
describe("lookupAztecError — weak catalog suppression when semantic is useful", () => {
366+
/**
367+
* The user-reported "bogus result still appears" failure mode: weak
368+
* catalog hits visible alongside semantic results lets the LLM
369+
* consumer anchor on the wrong answer. When semantic returned
370+
* useful (post-filter) chunks, the weak catalog is now suppressed
371+
* from the rendered output entirely (still present in
372+
* `result.catalogMatches` for programmatic consumers).
373+
*
374+
* This tests the data-shape that the formatter consumes; the
375+
* formatter test (`tests/utils/format.test.ts`) verifies the
376+
* suppression actually happens at render time.
377+
*/
378+
it("returns semanticHealth='ok' with weak catalog still in result.catalogMatches", async () => {
379+
mockLookupError.mockReturnValue({
380+
query: "note already nullified",
381+
catalogMatches: [
382+
catalogHit(54, "Contract already initialized", "word-overlap"),
383+
],
384+
codeMatches: [],
385+
});
386+
387+
const client = makeClient({
388+
search: vi.fn().mockResolvedValue([
389+
{
390+
text: "Notes in Aztec are nullified by emitting a nullifier...",
391+
title: "Note Lifecycle",
392+
source: "docs/notes.md",
393+
},
394+
]),
395+
});
396+
397+
const result = await lookupAztecError(
398+
{ query: "note already nullified" },
399+
client
400+
);
401+
expect(result.semanticHealth).toBe("ok");
402+
expect(result.semanticResults).toHaveLength(1);
403+
// The weak catalog hit is preserved in the data — the formatter
404+
// is responsible for hiding it. Programmatic consumers can still
405+
// see all signals.
406+
expect(result.result.catalogMatches).toHaveLength(1);
407+
expect(result.result.catalogMatches[0].score).toBe(54);
408+
});
409+
410+
it("when semantic is filtered out (all path-only) AND catalog is weak, keeps catalog", async () => {
411+
mockLookupError.mockReturnValue({
412+
query: "note already nullified",
413+
catalogMatches: [
414+
catalogHit(54, "Contract already initialized", "word-overlap"),
415+
],
416+
codeMatches: [],
417+
});
418+
419+
const client = makeClient({
420+
search: vi.fn().mockResolvedValue([
421+
// Path-only chunks that the filter will drop
422+
{ text: "\n\naztec-nr/aztec/src/foo.nr\n",
423+
title: "foo.nr",
424+
source: "aztec-nr/aztec/src/foo.nr" },
425+
]),
426+
});
427+
428+
const result = await lookupAztecError(
429+
{ query: "note already nullified" },
430+
client
431+
);
432+
// semantic returned empty (after filter) → no_results
433+
expect(result.semanticHealth).toBe("no_results");
434+
// Weak catalog stays in the result so the user has *some* signal
435+
expect(result.result.catalogMatches).toHaveLength(1);
436+
expect(result.message).toContain("low-confidence");
437+
expect(result.message).toMatch(/no relevant documentation|Semantic search/i);
438+
});
439+
});
440+
295441
describe("lookupAztecError — semantic failure (sanitized message)", () => {
296442
it("sets semanticHealth='failed' and returns sanitized message on 401", async () => {
297443
const client = makeClient({
@@ -325,7 +471,7 @@ describe("lookupAztecError — version-mismatch gate", () => {
325471
it("blocks semantic fallback when local clone diverges from corpus", async () => {
326472
mockGetRepoTag.mockResolvedValue("v4.1.0");
327473
const client = makeClient({
328-
search: vi.fn().mockResolvedValue([{ text: "x", title: "x", source: "x" }]),
474+
search: vi.fn().mockResolvedValue([{ text: "Some prose body content here.", title: "T", source: "x" }]),
329475
getCorpusVersion: vi.fn().mockResolvedValue({ aztec_corpus_version: "v4.2.0" }),
330476
});
331477

@@ -340,7 +486,7 @@ describe("lookupAztecError — version-mismatch gate", () => {
340486
mockGetRepoTag.mockResolvedValue("v4.1.0");
341487
const client = makeClient({
342488
search: vi.fn().mockResolvedValue([
343-
{ text: "x", title: "x", source: "x" },
489+
{ text: "Some prose body content here.", title: "T", source: "x" },
344490
]),
345491
getCorpusVersion: vi.fn().mockResolvedValue({ aztec_corpus_version: "v4.2.0" }),
346492
});

0 commit comments

Comments
 (0)