Skip to content

Commit 53db43a

Browse files
committed
[recipes] Fix REVIEW-CODEX-2-P2: numeric disambiguator for slugify collisions
1 parent 159e221 commit 53db43a

2 files changed

Lines changed: 46 additions & 3 deletions

File tree

recipes/entity-wiki/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ Pick the mode that matches how you plan to consume the wikis. Each has its own c
204204

205205
| Mode | Where it lives | Pros | Cons |
206206
|------|----------------|------|------|
207-
| `file` (default) | `./wikis/<slug>.md` | Human-readable, git-versionable, Obsidian-compatible, zero DB writes | Not queryable from SQL or MCP tools; lives outside the brain |
207+
| `file` (default) | `./wikis/<slug>.md` | Human-readable, git-versionable, Obsidian-compatible, zero DB writes | Not queryable from SQL or MCP tools; lives outside the brain. Slug is derived from `canonical_name` with non-alphanumerics stripped, so distinct entities like `C`, `C#`, and `C++` share a base slug — the writer appends `-1`, `-2`, ... to avoid overwrites (and logs a warning). Re-running for the same entity id still overwrites its own file. |
208208
| `entity-metadata` | `entities.metadata.wiki_page` JSONB | Queryable via SQL, travels with the entity, no new rows | Not searchable via embeddings, not picked up by `search_thoughts` |
209209
| `thought` | A new row in `public.thoughts` with `metadata.type = 'dossier'` | Retrievable via normal search / MCP tools, full provenance back to the atoms it summarizes | **Requires `EMBEDDING_API_KEY`** (the dossier is embedded at write time so match_thoughts can find it). **Can pollute semantic search** — a long dossier that restates 20 atoms will match many queries and rank above the atoms themselves |
210210

recipes/entity-wiki/generate-wiki.mjs

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -594,10 +594,53 @@ function buildFrontmatter(entity, sourceCounts, provenance) {
594594
return lines.join("\n");
595595
}
596596

597+
// Resolve an output path that doesn't silently overwrite another entity's
598+
// wiki. slugify() strips non-alphanumerics, so distinct entities like `C`,
599+
// `C#`, and `C++` all collapse to the same base slug (e.g. `tool-c`). To keep
600+
// re-runs idempotent for the same entity while preventing cross-entity
601+
// clobber, we:
602+
// 1. Try the base slug first. If the file doesn't exist, use it.
603+
// 2. If it exists, peek at its `entity_id:` frontmatter line. If it belongs
604+
// to this entity, overwrite (idempotent re-run).
605+
// 3. Otherwise another entity owns the base path — append `-1`, `-2`, ...
606+
// until we find a free path (or one that already belongs to us).
607+
// Logs a warning on every collision so users see when their entities are
608+
// colliding and can pick better canonical names.
609+
function resolveOutputPath(outDir, baseSlug, entity) {
610+
const tryPath = (suffix) => path.join(outDir, `${baseSlug}${suffix}.md`);
611+
const ownedBy = (p) => {
612+
try {
613+
const head = fs.readFileSync(p, "utf8").slice(0, 2048);
614+
const match = head.match(/^entity_id:\s*(\S+)/m);
615+
return match ? String(match[1]) === String(entity.id) : false;
616+
} catch {
617+
return false;
618+
}
619+
};
620+
let candidate = tryPath("");
621+
if (!fs.existsSync(candidate) || ownedBy(candidate)) return candidate;
622+
// Collision with a different entity — warn and pick a numeric suffix.
623+
for (let i = 1; i < 1000; i++) {
624+
candidate = tryPath(`-${i}`);
625+
if (!fs.existsSync(candidate) || ownedBy(candidate)) {
626+
console.warn(
627+
`[wiki] slug collision on "${baseSlug}.md" for entity #${entity.id} ` +
628+
`${entity.canonical_name} (${entity.entity_type}); writing as ` +
629+
`"${path.basename(candidate)}". Consider disambiguating canonical names.`,
630+
);
631+
return candidate;
632+
}
633+
}
634+
throw new Error(
635+
`[wiki] gave up finding a non-colliding path for "${baseSlug}.md" ` +
636+
`(entity #${entity.id}); too many collisions in ${outDir}.`,
637+
);
638+
}
639+
597640
function writeFile(wiki, entity, sourceCounts, provenance, outDir) {
598641
fs.mkdirSync(outDir, { recursive: true });
599-
const filename = `${slugify(entity.canonical_name, entity.entity_type)}.md`;
600-
const filepath = path.join(outDir, filename);
642+
const baseSlug = slugify(entity.canonical_name, entity.entity_type);
643+
const filepath = resolveOutputPath(outDir, baseSlug, entity);
601644
fs.writeFileSync(filepath, buildFrontmatter(entity, sourceCounts, provenance) + wiki + "\n", "utf8");
602645
return filepath;
603646
}

0 commit comments

Comments
 (0)