Skip to content

Commit dcf5505

Browse files
committed
fix(contract): D-GV2-2 codex P2 — reserve codebook index 0 (EdgeBlock sentinel)
#561 codex P2: Codebook::intern assigned the first label index 0, which collides with EdgeBlock's all-zero empty-slot sentinel (soa_graph skips byte==0), so references to the first codebook entry were silently dropped. Reserve index 0 everywhere: entries are now 1-based (1..=255), CODEBOOK_CAP = 255, label(0) -> None. Same reserve-0 rule as the 0xDDCC codebook (CC=0x00 = domain root). 3 codebook tests updated; green; clippy clean. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01CcpLeEC3XK8Eye53GKBVvi
1 parent 5eccf40 commit dcf5505

1 file changed

Lines changed: 39 additions & 25 deletions

File tree

crates/lance-graph-contract/src/codebook.rs

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,12 @@
2424
2525
use std::collections::HashMap;
2626

27-
/// Max entries per family codebook — the 1-byte in-family index cap. A family
28-
/// that needs more SPLITS (mint a sub-family), never widens the byte.
29-
pub const CODEBOOK_CAP: usize = 256;
27+
/// Max entries per family codebook — indices `1..=255`. **Index `0` is reserved**
28+
/// as the `EdgeBlock` empty-slot sentinel (an all-zero adapter byte means
29+
/// "unused"; `soa_graph` skips `byte == 0`), so real entries are 1-based. This is
30+
/// the same reserve-`0` rule the `0xDDCC` codebook uses (`CC = 0x00` = domain
31+
/// root). A family needing >255 entries SPLITS (mint a sub-family).
32+
pub const CODEBOOK_CAP: usize = 255;
3033

3134
/// A per-family codebook: insertion-ordered label interning, `index ↔ label`.
3235
/// `index` is the 1-byte in-family adapter value (`0..len`). ≤[`CODEBOOK_CAP`].
@@ -42,31 +45,37 @@ impl Codebook {
4245
Self::default()
4346
}
4447

45-
/// Intern `label` → its 1-byte index (insertion order, deduped). Returns
46-
/// `None` if the codebook is full (256) and `label` is new — the caller must
47-
/// SPLIT the family (the `CODEBOOK_CAP` overflow signal). An already-present
48-
/// label always resolves (even at capacity).
48+
/// Intern `label` → its **1-based** 1-byte index (insertion order, deduped).
49+
/// Index `0` is reserved (the `EdgeBlock` empty-slot sentinel), so the first
50+
/// entry is `1`. Returns `None` if the codebook is full (255 entries) and
51+
/// `label` is new — the caller must SPLIT the family (the `CODEBOOK_CAP`
52+
/// overflow signal). An already-present label always resolves (even at capacity).
4953
pub fn intern(&mut self, label: &str) -> Option<u8> {
5054
if let Some(&i) = self.by_label.get(label) {
5155
return Some(i);
5256
}
5357
if self.entries.len() >= CODEBOOK_CAP {
5458
return None;
5559
}
56-
let i = self.entries.len() as u8;
60+
// 1-based: index 0 is the reserved empty-slot sentinel.
61+
let i = (self.entries.len() + 1) as u8;
5762
self.entries.push(label.to_string());
5863
self.by_label.insert(label.to_string(), i);
5964
Some(i)
6065
}
6166

62-
/// The 1-byte index of `label`, if interned.
67+
/// The 1-based 1-byte index of `label`, if interned.
6368
pub fn index_of(&self, label: &str) -> Option<u8> {
6469
self.by_label.get(label).copied()
6570
}
6671

67-
/// The label at `index`, if present.
72+
/// The label at `index` (1-based), if present. Index `0` (the reserved
73+
/// empty-slot sentinel) resolves to `None`.
6874
pub fn label(&self, index: u8) -> Option<&str> {
69-
self.entries.get(index as usize).map(String::as_str)
75+
if index == 0 {
76+
return None;
77+
}
78+
self.entries.get((index - 1) as usize).map(String::as_str)
7079
}
7180

7281
/// Number of interned entries.
@@ -133,14 +142,16 @@ mod tests {
133142
use super::*;
134143

135144
#[test]
136-
fn intern_dedups_and_assigns_sequential_indices() {
145+
fn intern_is_1_based_and_dedups() {
146+
// Index 0 is reserved (EdgeBlock empty-slot sentinel) — entries start at 1.
137147
let mut cb = Codebook::new();
138-
assert_eq!(cb.intern("Nation"), Some(0));
139-
assert_eq!(cb.intern("TechCompany"), Some(1));
140-
assert_eq!(cb.intern("Nation"), Some(0)); // dedup
148+
assert_eq!(cb.intern("Nation"), Some(1));
149+
assert_eq!(cb.intern("TechCompany"), Some(2));
150+
assert_eq!(cb.intern("Nation"), Some(1)); // dedup
141151
assert_eq!(cb.len(), 2);
142-
assert_eq!(cb.index_of("TechCompany"), Some(1));
143-
assert_eq!(cb.label(0), Some("Nation"));
152+
assert_eq!(cb.index_of("TechCompany"), Some(2));
153+
assert_eq!(cb.label(1), Some("Nation"));
154+
assert_eq!(cb.label(0), None, "index 0 is the reserved sentinel");
144155
assert_eq!(cb.label(9), None);
145156
}
146157

@@ -151,24 +162,27 @@ mod tests {
151162
assert!(cb.intern(&format!("e{i}")).is_some());
152163
}
153164
assert!(cb.is_full());
165+
assert_eq!(cb.len(), 255); // indices 1..=255, 0 reserved
154166
// a NEW label overflows → None (split the family)…
155167
assert_eq!(cb.intern("one_too_many"), None);
156168
// …but an already-interned label still resolves at capacity.
157-
assert_eq!(cb.intern("e0"), Some(0));
169+
assert_eq!(cb.intern("e0"), Some(1));
158170
}
159171

160172
#[test]
161173
fn registry_scopes_codebooks_per_family() {
162174
// The SAME label gets INDEPENDENT indices in different families — the
163-
// whole point of per-family scoping (no global contamination).
175+
// whole point of per-family scoping (no global contamination). All
176+
// 1-based (0 reserved).
164177
let mut reg = FamilyCodebookRegistry::new();
165-
assert_eq!(reg.intern(0x0001, "Issue"), Some(0));
166-
assert_eq!(reg.intern(0x0001, "Bug"), Some(1));
167-
assert_eq!(reg.intern(0x0002, "Issue"), Some(0)); // family 2's own index 0
178+
assert_eq!(reg.intern(0x0001, "Issue"), Some(1));
179+
assert_eq!(reg.intern(0x0001, "Bug"), Some(2));
180+
assert_eq!(reg.intern(0x0002, "Issue"), Some(1)); // family 2's own index 1
168181
assert_eq!(reg.families(), 2);
169182
// cross-family resolve (family, index) → label
170-
assert_eq!(reg.resolve(0x0001, 1), Some("Bug"));
171-
assert_eq!(reg.resolve(0x0002, 0), Some("Issue"));
172-
assert_eq!(reg.resolve(0x0099, 0), None); // unknown family
183+
assert_eq!(reg.resolve(0x0001, 2), Some("Bug"));
184+
assert_eq!(reg.resolve(0x0002, 1), Some("Issue"));
185+
assert_eq!(reg.resolve(0x0001, 0), None); // reserved sentinel
186+
assert_eq!(reg.resolve(0x0099, 1), None); // unknown family
173187
}
174188
}

0 commit comments

Comments
 (0)