From be1d2f64b95dd86bafb1937909501a516f6619d5 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 20 Jun 2026 19:44:50 +0000 Subject: [PATCH 1/3] =?UTF-8?q?feat(contract):=20D-GV2-2=20=E2=80=94=20per?= =?UTF-8?q?-family=20Codebook=20registry=20(gated=20guid-v2-tail)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Continues the v2 arc after #560 merged. The type + in-memory registry tier of the family→Codebook scoping (the Lance-backed/OntologyRegistry tier is deferred). Zero-dep, feature-gated (default OFF). contract::codebook: - Codebook — insertion-ordered index↔label interning, 1-byte index, CODEBOOK_CAP=256; intern() returns None on overflow (the split-the-family signal, never widen the byte). - FamilyCodebookRegistry — family(u16) → Codebook; intern(family,label), resolve(family,index) for cross-family decode. Per-family scoping: the SAME label gets INDEPENDENT indices in different families (no global codebook contamination — this is what dissolves the aiwar "60 noisy families"). The finer sibling of classid→ClassView; the family node's episodic-basin content (E-MIXIN-IS-AN-ADDRESS-REFERENCE-NOT-A-COPY); the 256×256 Morton tile (≤256 leaves for the 1-byte in-family index, E-UNIFORM-MORTON-TILE-PYRAMID). 3 tests (dedup/sequential, overflow-split, per-family scoping). --features guid-v2-tail green; default build clean (codebook absent); clippy -D warnings clean both. Plan D-GV2-2 marked PARTIAL; AGENT_LOG updated. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01CcpLeEC3XK8Eye53GKBVvi --- .claude/board/AGENT_LOG.md | 4 + .../guid-v2-tail-per-family-codebook-v1.md | 2 +- crates/lance-graph-contract/src/codebook.rs | 174 ++++++++++++++++++ crates/lance-graph-contract/src/lib.rs | 3 + 4 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 crates/lance-graph-contract/src/codebook.rs diff --git a/.claude/board/AGENT_LOG.md b/.claude/board/AGENT_LOG.md index d148fbea..11a13b9f 100644 --- a/.claude/board/AGENT_LOG.md +++ b/.claude/board/AGENT_LOG.md @@ -1,3 +1,7 @@ +## 2026-06-20 (cont.¹⁰) — D-GV2-2 partial: per-family Codebook (contract::codebook, gated) + +**Main thread (Opus), autoattended.** #560 merged (synced main c05394f4; #558 also merged). Continued the greenlit v2 arc with **D-GV2-2** (type + in-memory registry tier): NEW `contract::codebook` (feature `guid-v2-tail`, zero-dep, default OFF) — `Codebook` (insertion-ordered index↔label, 1-byte index, `CODEBOOK_CAP=256`, overflow→None = split-the-family signal) + `FamilyCodebookRegistry` (`family→Codebook`, per-family scoping so the SAME label gets independent indices per family, `resolve(family,index)` for cross-family decode). The finer sibling of `classid→ClassView`; the family node's episodic-basin content; the 256×256 Morton tile (≤256 leaves for the 1-byte in-family index). Dissolves the aiwar "60 noisy families" at the root (per-family vocabularies are small + clean). 3 tests; `--features guid-v2-tail` green, default build clean (codebook absent), clippy clean both. **DEFERRED:** Lance-backed persistence + OntologyRegistry integration in lance-graph-ontology. Next: D-GV2-3 (soa_graph per-family edges under v2) + D-GV2-4 (aiwar re-key). New PR off main. **558/559 (OpenProject/Redmine bridges, other arc) still have open comments — left for that arc.** + ## 2026-06-20 (cont.⁹) — PR #560 codex P2 review fixes (gremlin bag semantics + aiwar cross-family edges) **Main thread (Opus), autoattended.** Two unresolved P2 codex threads on PR #560, both fixed: (1) `graph_gremlin.rs` `step()` silently deduped targets via a `seen` set — broke Gremlin bag/multiset semantics (`v(["A","C"]).out().count()` = 1 not 2 when both reach B). Rewrote to per-traverser emission (duplicates preserved); added explicit `dedup()` step + `out_preserves_bag_multiplicity` test. (2) `aiwar.rs` `aiwar_node_rows` put cross-category adapter bytes into the first 12 `in_family` slots (labeled `linked`), so `references` queries missed them and the label flipped with fan-out count — aiwar edges are ALL cross-family, so they now go to the 4 `out_family` slots (`references`), cap 4; test asserts `references` present + no `linked`. contract aiwar 3/3, callcenter gremlin 8/8 (+1 bag test), clippy clean (my files; pre-existing TD-CALLCENTER-QUERY-CLIPPY untouched). Pushed to #560; both review threads resolved. **558/559 (NOT mine — OpenProject/Redmine ontology bridges) checked: NOT all resolved** — #558 2 open (codex P2 seed-context-id + CodeRabbit unit-tests), #559 1 open P1 (Redmine/OpenProject entity_type_id convergence). Surfaced to operator (different arc); not auto-fixed. diff --git a/.claude/plans/guid-v2-tail-per-family-codebook-v1.md b/.claude/plans/guid-v2-tail-per-family-codebook-v1.md index 1c99e6a7..f47ffb97 100644 --- a/.claude/plans/guid-v2-tail-per-family-codebook-v1.md +++ b/.claude/plans/guid-v2-tail-per-family-codebook-v1.md @@ -108,7 +108,7 @@ coarse→fine left-to-right: `…·TWIG·leaf` (routing) then `family·identity` ## Deliverables (when greenlit — feature `guid-v2-tail`, default OFF) - **D-GV2-1** ✅ **SHIPPED (this PR, feature `guid-v2-tail` default OFF, additive & non-breaking).** `canonical_node`: `new_v2` (+`leaf`), `leaf()` `10..12`, `family_v2()` `12..14`, `identity_v2()` `14..16` (all `u16`), `local_key_v2()` (4 bytes), `decode_v2()`/`GuidPartsV2`, `to_hex_v2()` (uniform 4-hex), `GUID_TAIL_LAYOUT_VERSION_V2 = 2` (version gate). `hhtl::from_guid_prefix_v2` = `HEEL·HIP·TWIG·leaf` (16 nibbles; classid is the separate codebook prefix; leaf in path, family/identity NOT). v1 `new`/`family()`/`identity()` UNTOUCHED (distinct v2 names → no silent semantic swap, `I-LEGACY-API-FEATURE-GATED`). **Field-isolation matrix test** + v1/v2 coexistence + leaf-in-path tests. default 703 / `--features guid-v2-tail` 706, clippy clean both. **Cutover (rename v2→canonical, deprecate v1, bump `ENVELOPE_LAYOUT_VERSION`) = D-GV2-5.** -- **D-GV2-2** `family → Codebook` registry = **episodic basin** (the codebook + the basin's accumulated supporting edges), sibling of `classid → ClassView` in `lance-graph-ontology`: `LazyLock`/Lance-backed, masked-load lookup, head-only. 256-entry cap + split-on-overflow guard. Mixin = O(1) reference to this basin (`E-MIXIN-IS-AN-ADDRESS-REFERENCE-NOT-A-COPY`). +- **D-GV2-2** ◐ **PARTIAL — type + in-memory registry SHIPPED** (`contract::codebook`, feature `guid-v2-tail`): `Codebook` (insertion-ordered `index ↔ label`, 1-byte index, `CODEBOOK_CAP=256` with overflow→`None` split-signal) + `FamilyCodebookRegistry` (`family → Codebook`, `intern`/`resolve(family,index)`/per-family scoping). The finer sibling of `classid → ClassView`; the family node's episodic-basin content (`E-MIXIN-IS-AN-ADDRESS-REFERENCE-NOT-A-COPY`); the 256×256 Morton tile, ≤256 leaves for the 1-byte index. 3 tests (dedup/sequential, overflow-split, per-family scoping). **DEFERRED:** the `LazyLock`/Lance-backed persistence + `OntologyRegistry` integration in `lance-graph-ontology` (the heavyweight runtime tier). - **D-GV2-3** `soa_graph` per-family edge resolution: 12 in-family = 1-byte own-codebook index, 4 out-of-family = `(family,index)`; retire `family & 0xFF` collision-skip under v2. - **D-GV2-4** `aiwar` re-keyed on `leaf` (coarse node-type, 5 hubs) + per-family codebook (System/Stakeholder/… vocabularies) → resolves the "60 noisy families" on real data. - **D-GV2-5** cutover: flip default after the gating numbers + downstream (none today) confirmed; v1 → `#[deprecated]` no-op path with migration pointer. diff --git a/crates/lance-graph-contract/src/codebook.rs b/crates/lance-graph-contract/src/codebook.rs new file mode 100644 index 00000000..7e988a5c --- /dev/null +++ b/crates/lance-graph-contract/src/codebook.rs @@ -0,0 +1,174 @@ +//! `codebook` — per-family codebook (D-GV2-2, feature `guid-v2-tail`). +//! +//! The finer sibling of [`class_view`](crate::class_view) (`classid → ClassView`): +//! here **`family → Codebook`**. Each family owns a ≤256-entry vocabulary that its +//! nodes index by a **1-byte in-family adapter** — the 256×256 Morton centroid +//! tile of `E-UNIFORM-MORTON-TILE-PYRAMID`, with ≤256 leaves for the 1-byte index. +//! +//! Why per-family (not one global codebook): it dissolves the aiwar "60 noisy +//! families" at the root — each family's vocabulary is small and clean, and a +//! within-family reference is an **exact** index into that family's codebook (no +//! `& 0xFF` low-byte aliasing). The `family` tier (u16) selects the codebook +//! (head-only routing); the 1-byte index resolves within it. Cross-family edges +//! carry `(family, index)` and decode via [`FamilyCodebookRegistry::resolve`]. +//! +//! A family that outgrows 256 entries **splits** (mint a sub-family — cheap with +//! the v2 16-bit family tier), never widens the byte ([`Codebook::intern`] +//! returns `None` on overflow as the split signal). The codebook is the family +//! node's **episodic basin** content (`E-MIXIN-IS-AN-ADDRESS-REFERENCE-NOT-A-COPY`): +//! members reference it by the 1-byte index, the shared vocabulary lives once. +//! +//! This module is the TYPE + in-memory registry (the `LazyLock` tier). The +//! Lance-backed persistence + `OntologyRegistry` integration are deferred to the +//! ontology-crate wiring step (see plan `guid-v2-tail-per-family-codebook-v1.md`). + +use std::collections::HashMap; + +/// Max entries per family codebook — the 1-byte in-family index cap. A family +/// that needs more SPLITS (mint a sub-family), never widens the byte. +pub const CODEBOOK_CAP: usize = 256; + +/// A per-family codebook: insertion-ordered label interning, `index ↔ label`. +/// `index` is the 1-byte in-family adapter value (`0..len`). ≤[`CODEBOOK_CAP`]. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct Codebook { + entries: Vec, // index → label (insertion order) + by_label: HashMap, // label → index +} + +impl Codebook { + /// An empty codebook. + pub fn new() -> Self { + Self::default() + } + + /// Intern `label` → its 1-byte index (insertion order, deduped). Returns + /// `None` if the codebook is full (256) and `label` is new — the caller must + /// SPLIT the family (the `CODEBOOK_CAP` overflow signal). An already-present + /// label always resolves (even at capacity). + pub fn intern(&mut self, label: &str) -> Option { + if let Some(&i) = self.by_label.get(label) { + return Some(i); + } + if self.entries.len() >= CODEBOOK_CAP { + return None; + } + let i = self.entries.len() as u8; + self.entries.push(label.to_string()); + self.by_label.insert(label.to_string(), i); + Some(i) + } + + /// The 1-byte index of `label`, if interned. + pub fn index_of(&self, label: &str) -> Option { + self.by_label.get(label).copied() + } + + /// The label at `index`, if present. + pub fn label(&self, index: u8) -> Option<&str> { + self.entries.get(index as usize).map(String::as_str) + } + + /// Number of interned entries. + pub fn len(&self) -> usize { + self.entries.len() + } + + /// Whether the codebook holds no entries. + pub fn is_empty(&self) -> bool { + self.entries.is_empty() + } + + /// Whether the codebook is at [`CODEBOOK_CAP`] (a new label would overflow → + /// split the family). + pub fn is_full(&self) -> bool { + self.entries.len() >= CODEBOOK_CAP + } +} + +/// `family → Codebook` — the per-family codebook registry, the finer sibling of +/// `classid → ClassView`. In-memory (the `LazyLock` tier); a Lance-backed, +/// `OntologyRegistry`-integrated variant is deferred. +#[derive(Debug, Clone, Default)] +pub struct FamilyCodebookRegistry { + books: HashMap, +} + +impl FamilyCodebookRegistry { + /// An empty registry. + pub fn new() -> Self { + Self::default() + } + + /// The codebook for `family`, creating an empty one if absent. + pub fn entry(&mut self, family: u16) -> &mut Codebook { + self.books.entry(family).or_default() + } + + /// The codebook for `family`, if it exists (read-only). + pub fn get(&self, family: u16) -> Option<&Codebook> { + self.books.get(&family) + } + + /// Intern `label` into `family`'s codebook → its 1-byte index. `None` on + /// codebook overflow (split the family). + pub fn intern(&mut self, family: u16, label: &str) -> Option { + self.entry(family).intern(label) + } + + /// Resolve a cross-family reference `(family, index)` → label — the decode of + /// an out-of-family adapter / `references` edge. + pub fn resolve(&self, family: u16, index: u8) -> Option<&str> { + self.books.get(&family).and_then(|cb| cb.label(index)) + } + + /// Number of families with a codebook. + pub fn families(&self) -> usize { + self.books.len() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn intern_dedups_and_assigns_sequential_indices() { + let mut cb = Codebook::new(); + assert_eq!(cb.intern("Nation"), Some(0)); + assert_eq!(cb.intern("TechCompany"), Some(1)); + assert_eq!(cb.intern("Nation"), Some(0)); // dedup + assert_eq!(cb.len(), 2); + assert_eq!(cb.index_of("TechCompany"), Some(1)); + assert_eq!(cb.label(0), Some("Nation")); + assert_eq!(cb.label(9), None); + } + + #[test] + fn codebook_overflow_signals_split() { + let mut cb = Codebook::new(); + for i in 0..CODEBOOK_CAP { + assert!(cb.intern(&format!("e{i}")).is_some()); + } + assert!(cb.is_full()); + // a NEW label overflows → None (split the family)… + assert_eq!(cb.intern("one_too_many"), None); + // …but an already-interned label still resolves at capacity. + assert_eq!(cb.intern("e0"), Some(0)); + } + + #[test] + fn registry_scopes_codebooks_per_family() { + // The SAME label gets INDEPENDENT indices in different families — the + // whole point of per-family scoping (no global contamination). + let mut reg = FamilyCodebookRegistry::new(); + assert_eq!(reg.intern(0x0001, "Issue"), Some(0)); + assert_eq!(reg.intern(0x0001, "Bug"), Some(1)); + assert_eq!(reg.intern(0x0002, "Issue"), Some(0)); // family 2's own index 0 + assert_eq!(reg.families(), 2); + // cross-family resolve (family, index) → label + assert_eq!(reg.resolve(0x0001, 1), Some("Bug")); + assert_eq!(reg.resolve(0x0002, 0), Some("Issue")); + assert_eq!(reg.resolve(0x0099, 0), None); // unknown family + } +} diff --git a/crates/lance-graph-contract/src/lib.rs b/crates/lance-graph-contract/src/lib.rs index 02f1d8db..fc3eca76 100644 --- a/crates/lance-graph-contract/src/lib.rs +++ b/crates/lance-graph-contract/src/lib.rs @@ -46,6 +46,9 @@ pub mod callcenter; pub mod cam; pub mod canonical_node; pub mod class_view; +/// D-GV2-2 — per-family codebook (`family → Codebook`), gated on the v2 tail. +#[cfg(feature = "guid-v2-tail")] +pub mod codebook; pub mod codegen_manifest; pub mod codegen_spine; pub mod cognitive_shader; From 5eccf40debab7cf5ffa181c42e3983558968dac0 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 20 Jun 2026 19:58:21 +0000 Subject: [PATCH 2/3] =?UTF-8?q?docs(migration):=20ogar-vocab=20=E2=87=84?= =?UTF-8?q?=20contract=20codebook=20seam=20=E2=80=94=20migration=20doc=20+?= =?UTF-8?q?=20canon=20conflict?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Operator asked to DO the ontology-schema migration documentation. Grounded in OGAR crates/ogar-vocab/src/lib.rs (the real codebook): it already defines CODEBOOK (domain-encoded 0xDDCC), ConceptDomain + canonical_concept_domain, source_domain_concept(project|erp), canonical_concept_id, and LabelDTO — and its own note says LabelDTO "long-term belongs in lance-graph-contract; codebook id == NodeGuid.classid low u16." Surfaces a canon CONFLICT: merged CLASSID_OSINT=0x0007 routes to OGAR's Reserved domain (OSINT is 0x07XX); CLASSID_FMA=0x0008 sits in OGAR's OCR block (FMA/anatomy is clinical → Health 0x09XX). Root cause: 0x0007 minted from the early "OSINT is 0x0007" guess before ogar-vocab's 0xDDCC layout was consulted. New .claude/plans/ogar-vocab-contract-codebook-migration-v1.md (D-OVC-1..4): host the codebook/ConceptDomain/LabelDTO in contract, classids follow 0xDDCC (mint project 0x01XX + ERP 0x02XX; realign OSINT→0x0700, FMA→Health). The per-family codebook (D-GV2-2) is the finer scope of the same idea. NO code minted/rewritten: realigning merged OSINT/FMA rewrites canon (#557/#560 + CLAUDE.md canon block) → operator sign-off required (plan §5, three decisions). INTEGRATION_PLANS prepended; ISSUES ISS-CLASSID-OGAR-DRIFT filed; AGENT_LOG updated. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01CcpLeEC3XK8Eye53GKBVvi --- .claude/board/AGENT_LOG.md | 4 + .claude/board/INTEGRATION_PLANS.md | 6 + .claude/board/ISSUES.md | 5 + ...ar-vocab-contract-codebook-migration-v1.md | 128 ++++++++++++++++++ 4 files changed, 143 insertions(+) create mode 100644 .claude/plans/ogar-vocab-contract-codebook-migration-v1.md diff --git a/.claude/board/AGENT_LOG.md b/.claude/board/AGENT_LOG.md index 11a13b9f..c2ad067d 100644 --- a/.claude/board/AGENT_LOG.md +++ b/.claude/board/AGENT_LOG.md @@ -1,3 +1,7 @@ +## 2026-06-20 (cont.¹¹) — ogar-vocab⇄contract codebook migration doc + canon-conflict surfaced + +**Main thread (Opus), autoattended.** Operator: "point [to migration docs] as in DO it" + diagnosed the ontology/contract/q2 triangle seams. Grounded in OGAR `crates/ogar-vocab/src/lib.rs` (read, not guessed): it already defines `CODEBOOK` (domain-encoded `0xDDCC`, :1073), `ConceptDomain` + `canonical_concept_domain` (:1141/:1163), `source_domain_concept("project"|"erp")` (:1186), `canonical_concept_id` (:1214), `LabelDTO` (:1476) — and its own note (:1208) says `LabelDTO` "long-term belongs in lance-graph-contract; codebook id == NodeGuid.classid low u16." **Found a real canon conflict:** merged `CLASSID_OSINT=0x0007` is OGAR's *Reserved* domain (OSINT=`0x07XX`); `CLASSID_FMA=0x0008` is OGAR's *OCR* block (FMA/anatomy≈Health `0x09XX`). Wrote `.claude/plans/ogar-vocab-contract-codebook-migration-v1.md` (D-OVC-1..4): host codebook/ConceptDomain/LabelDTO in contract, classids follow `0xDDCC` (mint project `0x01XX`+ERP `0x02XX`; realign OSINT→`0x0700`, FMA→Health). INTEGRATION_PLANS prepended; ISSUES `ISS-CLASSID-OGAR-DRIFT` filed. **Did NOT mint/rewrite code:** the OSINT/FMA realign rewrites merged canon + the CLAUDE.md canon block → operator sign-off required (plan §5). Surfaced 3 decisions: (1) realign OSINT/FMA? (2) OGAR↔contract dependency direction (move vs wire-compat)? (3) FMA → Health 0x09XX or new anatomy domain? Doc committed to the jirak branch (PR #561 arc). + ## 2026-06-20 (cont.¹⁰) — D-GV2-2 partial: per-family Codebook (contract::codebook, gated) **Main thread (Opus), autoattended.** #560 merged (synced main c05394f4; #558 also merged). Continued the greenlit v2 arc with **D-GV2-2** (type + in-memory registry tier): NEW `contract::codebook` (feature `guid-v2-tail`, zero-dep, default OFF) — `Codebook` (insertion-ordered index↔label, 1-byte index, `CODEBOOK_CAP=256`, overflow→None = split-the-family signal) + `FamilyCodebookRegistry` (`family→Codebook`, per-family scoping so the SAME label gets independent indices per family, `resolve(family,index)` for cross-family decode). The finer sibling of `classid→ClassView`; the family node's episodic-basin content; the 256×256 Morton tile (≤256 leaves for the 1-byte in-family index). Dissolves the aiwar "60 noisy families" at the root (per-family vocabularies are small + clean). 3 tests; `--features guid-v2-tail` green, default build clean (codebook absent), clippy clean both. **DEFERRED:** Lance-backed persistence + OntologyRegistry integration in lance-graph-ontology. Next: D-GV2-3 (soa_graph per-family edges under v2) + D-GV2-4 (aiwar re-key). New PR off main. **558/559 (OpenProject/Redmine bridges, other arc) still have open comments — left for that arc.** diff --git a/.claude/board/INTEGRATION_PLANS.md b/.claude/board/INTEGRATION_PLANS.md index ca34e61d..b7685bb7 100644 --- a/.claude/board/INTEGRATION_PLANS.md +++ b/.claude/board/INTEGRATION_PLANS.md @@ -1,3 +1,9 @@ +## 2026-06-20 — ogar-vocab ⇄ contract codebook migration (PROPOSED; surfaces a canon conflict) + +Plan: `.claude/plans/ogar-vocab-contract-codebook-migration-v1.md`. Closes the ontology→contract seam: OGAR `ogar-vocab` already defines the class-identity codebook (`CODEBOOK` domain-encoded `0xDDCC`, `ConceptDomain`, `source_domain_concept`, `canonical_concept_id`, `LabelDTO`) and its own doc says `LabelDTO` "long-term belongs in lance-graph-contract … codebook ids and the NodeGuid.classid u16 low half are wire-compatible." **Conflict surfaced:** merged `CLASSID_OSINT=0x0007` routes to OGAR's *Reserved* domain (OSINT is `0x07XX`), and `CLASSID_FMA=0x0008` sits in OGAR's OCR block (FMA/anatomy ≈ Health `0x09XX`). Target: contract hosts the codebook/`ConceptDomain`/`LabelDTO`, classids follow `0xDDCC` (mint project `0x01XX` + ERP `0x02XX`; realign OSINT→`0x0700`, FMA→Health). D-OVC-1..4. **Gated on operator sign-off (canon realign of merged OSINT/FMA + the OGAR↔contract dependency direction) — see plan §5.** Per-family codebook (D-GV2-2) is the finer scope of the same idea. Cross-ref ISSUES `ISS-CLASSID-OGAR-DRIFT`. + +--- + ## 2026-06-20 — guid-v2-tail + per-family codebook scoping (PROPOSED, operator what-if) Plan: `.claude/plans/guid-v2-tail-per-family-codebook-v1.md`. Repartition the 48-bit basin tail `family(u24)|identity(u24)` → `leaf(u16)|family(u16)|identity(u16)` (whole key = uniform 8×u16 tiers), and scope **codebooks per family** (`family → Codebook`, the finer sibling of `classid → ClassView`; 12 in-family edge slots = 1-byte index into own family codebook, 4 out-of-family = `(family,index)`). Dissolves the aiwar "60 noisy families" at the root (per-family ≤256 vocabularies), kills the awkward `u24`, makes family-adapter resolution exact, and gives the 3-tier subclass codebook a native home. **Blast radius measured: CONTAINED in lance-graph** (q2/smb-office-rs/medcare-rs = 0; routing prefix `from_guid_prefix`/`mailbox_scan` is tail-agnostic; ~3 layout files + ~35 mostly-test `NodeGuid::new` call sites). **PROPOSED — gated on operator sign-off (canon version bump) + two numbers:** ≤65 536 identities per `(leaf,family)` bucket, ≤256 codebook entries per family before split. Ships feature-gated `guid-v2-tail` (default OFF) per `I-LEGACY-API-FEATURE-GATED` (field-isolation matrix + version gate). D-GV2-1..5. diff --git a/.claude/board/ISSUES.md b/.claude/board/ISSUES.md index 6b7d494f..6da33661 100644 --- a/.claude/board/ISSUES.md +++ b/.claude/board/ISSUES.md @@ -263,3 +263,8 @@ flip Open entry to Superseded. **When an issue is deferred knowingly** — leave it Open here but also append a row to `TECH_DEBT.md` with cross-ref back. + +## ISS-CLASSID-OGAR-DRIFT — 2026-06-20 — OPEN (needs operator sign-off) +**What:** merged `lance-graph-contract` classids drifted from OGAR `ogar-vocab`'s domain-encoded codebook (`0xDDCC`, `crates/ogar-vocab/src/lib.rs:1073` CODEBOOK + `:1163` `canonical_concept_domain`). `CLASSID_OSINT=0x0007` → `0x00` = OGAR *Reserved* domain (OSINT is `0x07XX`); `CLASSID_FMA=0x0008` → OGAR *OCR* block (FMA/anatomy is clinical → Health `0x09XX`). OGAR's own note (`lib.rs:1204-1212`): codebook id == `NodeGuid.classid` low u16, and `LabelDTO` "long-term belongs in lance-graph-contract." So contract + OGAR currently disagree on what `0x07`/`0x08` mean. +**Impact:** the contract↔OGAR↔q2 triangle has an inconsistent classid space; `canonical_concept_domain(id>>8)` mis-routes contract's OSINT/FMA; project/ERP un-minted. +**Fix (proposed):** `.claude/plans/ogar-vocab-contract-codebook-migration-v1.md` D-OVC-1..4 — host the codebook/`ConceptDomain`/`LabelDTO` in contract, classids follow `0xDDCC` (mint project `0x01XX`+ERP `0x02XX`; realign OSINT→`0x0700`, FMA→Health `0x09XX`). **Realigning merged OSINT/FMA rewrites canon (#557/#560 + CLAUDE.md canon block) → operator sign-off required** (plan §5). Origin: `CLASSID_OSINT=0x0007` minted from the early "OSINT is 0x0007" guess before ogar-vocab's `0xDDCC` layout was consulted. diff --git a/.claude/plans/ogar-vocab-contract-codebook-migration-v1.md b/.claude/plans/ogar-vocab-contract-codebook-migration-v1.md new file mode 100644 index 00000000..fe3836e4 --- /dev/null +++ b/.claude/plans/ogar-vocab-contract-codebook-migration-v1.md @@ -0,0 +1,128 @@ +# Migration — OGAR `ogar-vocab` codebook ⇄ `lance-graph-contract` classid (v1) + +> **Status:** PROPOSED (2026-06-20). Surfaces a **canon conflict** between merged +> `lance-graph-contract` classids and OGAR's `ogar-vocab` codebook; the +> reconciliation rewrites merged canon (`CLASSID_OSINT`/`CLASSID_FMA`) and so is +> gated on operator sign-off. +> **The triangle:** ontology (OGAR `ogar-vocab`) → contract (`NodeGuid`/`ClassId`) +> → q2 (Quadro-2 cockpit consuming `GraphSnapshot`). + +--- + +## 1 — The seam, grounded (file:line) + +OGAR's `crates/ogar-vocab/src/lib.rs` already defines the canonical class +identity layer, and **its own doc-comment says where it belongs**: + +- **`CODEBOOK`** (`lib.rs:1073`) — curated `(canonical_concept, u16)` table, ids + assigned (never hashed). Domain-encoded `0xDDCC` (high byte = domain). +- **`ConceptDomain`** (`lib.rs:1141`) + **`canonical_concept_domain(id)→ConceptDomain`** + (`lib.rs:1163`, routes on `id >> 8`, O(1) no-lookup). +- **`source_domain_concept("project"|"erp"|"german-erp")→ConceptDomain`** + (`lib.rs:1186`) — the seam from `Class::source_domain` (the coarse curator tag, + `lib.rs:193`) to the typed domain. +- **`canonical_concept_id(concept)→Option`** (`lib.rs:1214`) + + `Class::canonical_id()/canonical_id_le()` (`lib.rs:1026/1034`). +- **`LabelDTO { label, id: u16, canonical }`** (`lib.rs:1476`) + `from_alias()` — + consumer alias → shared codebook id. **`lib.rs:1208`:** *"The contract type + (`LabelDTO`) lives in `ogar-vocab` today; **long-term it belongs in + `lance-graph-contract`** alongside `ClassId` and the `NodeGuid` LE layout. Wire + is the source of truth: any encoder/decoder agreeing on `u16` LE is compatible + regardless of which crate exports the DTO."* And `lib.rs:1204-1206`: *"codebook + ids and the `NodeGuid.classid` u16 low half are wire-compatible."* + +So **the OGAR codebook id IS the contract classid (low u16)** — one wire value, +two crates. Contract has none of it yet (grep: no `ogar-vocab` reference in +`lance-graph-contract`). + +## 2 — The conflict (the migration gap) + +OGAR's `0xDDCC` domain layout vs the classids contract minted this session +(#557/#560, merged): + +| Domain | OGAR `ConceptDomain` block | contract today | Aligned? | +|---|---|---|---| +| project-mgmt (OP↔Redmine) | `0x01XX` | — (un-minted) | n/a — **mint** | +| commerce/ERP (OSB↔Odoo) | `0x02XX` | — (un-minted) | n/a — **mint** | +| OSINT | **`0x07XX`** | `CLASSID_OSINT = 0x0007` | ❌ `0x0007 >> 8 = 0x00` = **Reserved**, not OSINT | +| OCR | `0x08XX` | `CLASSID_FMA = 0x0008` | ❌ `0x0008` is in OGAR's **OCR** block | +| Health (clinical) | `0x09XX` | (FMA anatomy ≈ Health) | ❌ FMA/anatomy is medical → belongs `0x09XX`, not `0x0008` | + +Root cause: `CLASSID_OSINT=0x0007` was minted from the early guess "OSINT is +0x0007" before `ogar-vocab`'s domain-encoded layout was consulted. Under OGAR, +the OSINT *domain* is the high byte `0x07`, so an OSINT class is `0x07CC` +(e.g. `0x0700`), and `0x0007` is a Reserved-domain slot. `0x0008` collides with +the OCR domain; FMA (Foundational Model of Anatomy) is clinical → Health +`0x09XX` (or a dedicated anatomy domain) — never `0x0008`. + +## 3 — Target state (single source of truth) + +Per OGAR's own note, **`lance-graph-contract` is the long-term home** for the +class-identity codebook. Reconcile onto OGAR's `0xDDCC` scheme: + +1. **Codebook + domain types live in contract.** Move (or mirror, wire-compat) + `ConceptDomain`, `canonical_concept_domain`, `source_domain_concept`, + `canonical_concept_id`, the `CODEBOOK`, and `LabelDTO` into + `lance-graph-contract` (next to `ClassId`/`NodeGuid`). `ogar-vocab` + re-exports them (OGAR→contract dep) **OR** both keep a copy and the **wire + (`u16` LE) is the contract** (no new dep). *Decision needed — see §5.* +2. **classids follow `0xDDCC`.** `NodeGuid.classid` low u16 == the codebook id. + - project-mgmt: `0x01XX` (mint `CLASSID_PROJECT = 0x0100` block). + - commerce/ERP: `0x02XX` (mint `CLASSID_ERP/COMMERCE = 0x0200` block). + - OSINT: realign `CLASSID_OSINT` → `0x0700` (Gotham domain). + - anatomy/FMA: realign `CLASSID_FMA` → Health `0x09XX` (or a new anatomy + domain block, reserved appended — never `0x0008`). +3. **`canonical_concept_domain` becomes the `ReadMode`/domain router** — the + `classid → ReadMode` registry keys off `id >> 8` (the domain), so OSINT/FMA/ + project/ERP all resolve by the same O(1) high-byte rule. +4. **The per-family codebook (D-GV2-2) is the FINER scope of the SAME idea.** + OGAR `CODEBOOK` = the *concept/classid* codebook (domain `0xDDCC`); the + `guid-v2-tail` `FamilyCodebookRegistry` (`contract::codebook`) = the + *within-family* label vocab. They compose: classid (domain) selects the + coarse codebook; family selects the sub-codebook. Longest-prefix-wins, one + rule (OGAR `CLAUDE.md` "Codebook scoping = the class routing prefix"). + +## 4 — Deliverables (gated on §5 decisions) + +- **D-OVC-1** Move/mirror `ConceptDomain` + `canonical_concept_domain` + + `source_domain_concept` + `canonical_concept_id` + `CODEBOOK` + `LabelDTO` + into `lance-graph-contract` (e.g. `contract::ogar_codebook`); `ogar-vocab` + re-exports (or wire-compat duplicate). Round-trip test: `LabelDTO::from_alias` + parity across both crates. +- **D-OVC-2** Mint `CLASSID_PROJECT` (`0x0100`) + `CLASSID_ERP` (`0x0200`) in + `canonical_node.rs` + `ReadMode`s, registered in `BUILTIN_READ_MODES`. Add + `soa_graph::{PROJECT, ERP}` `DomainSpec`s (siblings of `OSINT_GOTHAM`/`FMA_ANATOMY`). +- **D-OVC-3** **Canon realign (SIGN-OFF):** `CLASSID_OSINT 0x0007 → 0x0700`, + `CLASSID_FMA 0x0008 → 0x09xx` (Health) or a minted anatomy domain. Field-isolation + / version-gate per `I-LEGACY-API-FEATURE-GATED`; update `aiwar.rs`, `soa_graph.rs`, + tests, and the canon block in `lance-graph/CLAUDE.md` + OGAR `CODEBOOK`. +- **D-OVC-4** Route `classid → ReadMode` (and the domain ClassView) through + `canonical_concept_domain(classid_lo)`; q2 reads `LabelDTO`/`canonical` for + display labels (the contract→q2 leg of the triangle). + +## 5 — Decisions needed (operator) + +1. **Canon realign OSINT/FMA?** `CLASSID_OSINT 0x0007 → 0x0700`, `CLASSID_FMA + 0x0008 → 0x09XX`. This rewrites merged canon (#557/#560) + the `lance-graph/ + CLAUDE.md` canon block. Recommended (otherwise contract and OGAR disagree on + what `0x07`/`0x08` mean), but it's your canon to change. Alternative: keep + `0x0007/0x0008` and re-document OGAR's domain layout to match (worse — breaks + the clean `id>>8` domain route). +2. **Dependency direction for the shared types:** (a) move to contract, + `ogar-vocab` `pub use`s from it (OGAR gains a `lance-graph-contract` dep); + or (b) both define, wire (`u16` LE) is the only contract, a parity test + guards drift (no new dep). OGAR's note leans (a) ("belongs in contract"); + (b) is lighter and keeps OGAR dep-free. Recommend (b) now, (a) at a + deliberate consolidation. +3. **FMA/anatomy domain:** fold into Health `0x09XX`, or mint a dedicated + anatomy domain block (append-only reserved high byte)? + +## Cross-refs + +OGAR `crates/ogar-vocab/src/lib.rs` (`CODEBOOK`/`ConceptDomain`/`LabelDTO`/ +`source_domain`), OGAR `CLAUDE.md` "Tier interpretation 256×256 CENTROID TILE" + +"Codebook scoping = the class routing prefix"; `contract::canonical_node` +(`CLASSID_OSINT`/`CLASSID_FMA`/`BUILTIN_READ_MODES`), `contract::codebook` +(D-GV2-2 per-family), `contract::soa_graph` (`OSINT_GOTHAM`/`FMA_ANATOMY`), +`contract::aiwar`; `guid-v2-tail-per-family-codebook-v1.md`; +`E-UNIFORM-MORTON-TILE-PYRAMID`. From dcf550526cec7c744f81a025b7995f4782829439 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 20 Jun 2026 20:14:23 +0000 Subject: [PATCH 3/3] =?UTF-8?q?fix(contract):=20D-GV2-2=20codex=20P2=20?= =?UTF-8?q?=E2=80=94=20reserve=20codebook=20index=200=20(EdgeBlock=20senti?= =?UTF-8?q?nel)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #561 codex P2: Codebook::intern assigned the first label index 0, which collides with EdgeBlock's all-zero empty-slot sentinel (soa_graph skips byte==0), so references to the first codebook entry were silently dropped. Reserve index 0 everywhere: entries are now 1-based (1..=255), CODEBOOK_CAP = 255, label(0) -> None. Same reserve-0 rule as the 0xDDCC codebook (CC=0x00 = domain root). 3 codebook tests updated; green; clippy clean. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01CcpLeEC3XK8Eye53GKBVvi --- crates/lance-graph-contract/src/codebook.rs | 64 +++++++++++++-------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/crates/lance-graph-contract/src/codebook.rs b/crates/lance-graph-contract/src/codebook.rs index 7e988a5c..7bc3c7ea 100644 --- a/crates/lance-graph-contract/src/codebook.rs +++ b/crates/lance-graph-contract/src/codebook.rs @@ -24,9 +24,12 @@ use std::collections::HashMap; -/// Max entries per family codebook — the 1-byte in-family index cap. A family -/// that needs more SPLITS (mint a sub-family), never widens the byte. -pub const CODEBOOK_CAP: usize = 256; +/// Max entries per family codebook — indices `1..=255`. **Index `0` is reserved** +/// as the `EdgeBlock` empty-slot sentinel (an all-zero adapter byte means +/// "unused"; `soa_graph` skips `byte == 0`), so real entries are 1-based. This is +/// the same reserve-`0` rule the `0xDDCC` codebook uses (`CC = 0x00` = domain +/// root). A family needing >255 entries SPLITS (mint a sub-family). +pub const CODEBOOK_CAP: usize = 255; /// A per-family codebook: insertion-ordered label interning, `index ↔ label`. /// `index` is the 1-byte in-family adapter value (`0..len`). ≤[`CODEBOOK_CAP`]. @@ -42,10 +45,11 @@ impl Codebook { Self::default() } - /// Intern `label` → its 1-byte index (insertion order, deduped). Returns - /// `None` if the codebook is full (256) and `label` is new — the caller must - /// SPLIT the family (the `CODEBOOK_CAP` overflow signal). An already-present - /// label always resolves (even at capacity). + /// Intern `label` → its **1-based** 1-byte index (insertion order, deduped). + /// Index `0` is reserved (the `EdgeBlock` empty-slot sentinel), so the first + /// entry is `1`. Returns `None` if the codebook is full (255 entries) and + /// `label` is new — the caller must SPLIT the family (the `CODEBOOK_CAP` + /// overflow signal). An already-present label always resolves (even at capacity). pub fn intern(&mut self, label: &str) -> Option { if let Some(&i) = self.by_label.get(label) { return Some(i); @@ -53,20 +57,25 @@ impl Codebook { if self.entries.len() >= CODEBOOK_CAP { return None; } - let i = self.entries.len() as u8; + // 1-based: index 0 is the reserved empty-slot sentinel. + let i = (self.entries.len() + 1) as u8; self.entries.push(label.to_string()); self.by_label.insert(label.to_string(), i); Some(i) } - /// The 1-byte index of `label`, if interned. + /// The 1-based 1-byte index of `label`, if interned. pub fn index_of(&self, label: &str) -> Option { self.by_label.get(label).copied() } - /// The label at `index`, if present. + /// The label at `index` (1-based), if present. Index `0` (the reserved + /// empty-slot sentinel) resolves to `None`. pub fn label(&self, index: u8) -> Option<&str> { - self.entries.get(index as usize).map(String::as_str) + if index == 0 { + return None; + } + self.entries.get((index - 1) as usize).map(String::as_str) } /// Number of interned entries. @@ -133,14 +142,16 @@ mod tests { use super::*; #[test] - fn intern_dedups_and_assigns_sequential_indices() { + fn intern_is_1_based_and_dedups() { + // Index 0 is reserved (EdgeBlock empty-slot sentinel) — entries start at 1. let mut cb = Codebook::new(); - assert_eq!(cb.intern("Nation"), Some(0)); - assert_eq!(cb.intern("TechCompany"), Some(1)); - assert_eq!(cb.intern("Nation"), Some(0)); // dedup + assert_eq!(cb.intern("Nation"), Some(1)); + assert_eq!(cb.intern("TechCompany"), Some(2)); + assert_eq!(cb.intern("Nation"), Some(1)); // dedup assert_eq!(cb.len(), 2); - assert_eq!(cb.index_of("TechCompany"), Some(1)); - assert_eq!(cb.label(0), Some("Nation")); + assert_eq!(cb.index_of("TechCompany"), Some(2)); + assert_eq!(cb.label(1), Some("Nation")); + assert_eq!(cb.label(0), None, "index 0 is the reserved sentinel"); assert_eq!(cb.label(9), None); } @@ -151,24 +162,27 @@ mod tests { assert!(cb.intern(&format!("e{i}")).is_some()); } assert!(cb.is_full()); + assert_eq!(cb.len(), 255); // indices 1..=255, 0 reserved // a NEW label overflows → None (split the family)… assert_eq!(cb.intern("one_too_many"), None); // …but an already-interned label still resolves at capacity. - assert_eq!(cb.intern("e0"), Some(0)); + assert_eq!(cb.intern("e0"), Some(1)); } #[test] fn registry_scopes_codebooks_per_family() { // The SAME label gets INDEPENDENT indices in different families — the - // whole point of per-family scoping (no global contamination). + // whole point of per-family scoping (no global contamination). All + // 1-based (0 reserved). let mut reg = FamilyCodebookRegistry::new(); - assert_eq!(reg.intern(0x0001, "Issue"), Some(0)); - assert_eq!(reg.intern(0x0001, "Bug"), Some(1)); - assert_eq!(reg.intern(0x0002, "Issue"), Some(0)); // family 2's own index 0 + assert_eq!(reg.intern(0x0001, "Issue"), Some(1)); + assert_eq!(reg.intern(0x0001, "Bug"), Some(2)); + assert_eq!(reg.intern(0x0002, "Issue"), Some(1)); // family 2's own index 1 assert_eq!(reg.families(), 2); // cross-family resolve (family, index) → label - assert_eq!(reg.resolve(0x0001, 1), Some("Bug")); - assert_eq!(reg.resolve(0x0002, 0), Some("Issue")); - assert_eq!(reg.resolve(0x0099, 0), None); // unknown family + assert_eq!(reg.resolve(0x0001, 2), Some("Bug")); + assert_eq!(reg.resolve(0x0002, 1), Some("Issue")); + assert_eq!(reg.resolve(0x0001, 0), None); // reserved sentinel + assert_eq!(reg.resolve(0x0099, 1), None); // unknown family } }