2424
2525use std:: collections:: HashMap ;
2626
27- /// Max entries per family codebook — the 1-byte in-family index cap. A family
28- /// that needs more SPLITS (mint a sub-family), never widens the byte.
29- pub const CODEBOOK_CAP : usize = 256 ;
27+ /// Max entries per family codebook — indices `1..=255`. **Index `0` is reserved**
28+ /// as the `EdgeBlock` empty-slot sentinel (an all-zero adapter byte means
29+ /// "unused"; `soa_graph` skips `byte == 0`), so real entries are 1-based. This is
30+ /// the same reserve-`0` rule the `0xDDCC` codebook uses (`CC = 0x00` = domain
31+ /// root). A family needing >255 entries SPLITS (mint a sub-family).
32+ pub const CODEBOOK_CAP : usize = 255 ;
3033
3134/// A per-family codebook: insertion-ordered label interning, `index ↔ label`.
3235/// `index` is the 1-byte in-family adapter value (`0..len`). ≤[`CODEBOOK_CAP`].
@@ -42,31 +45,37 @@ impl Codebook {
4245 Self :: default ( )
4346 }
4447
45- /// Intern `label` → its 1-byte index (insertion order, deduped). Returns
46- /// `None` if the codebook is full (256) and `label` is new — the caller must
47- /// SPLIT the family (the `CODEBOOK_CAP` overflow signal). An already-present
48- /// label always resolves (even at capacity).
48+ /// Intern `label` → its **1-based** 1-byte index (insertion order, deduped).
49+ /// Index `0` is reserved (the `EdgeBlock` empty-slot sentinel), so the first
50+ /// entry is `1`. Returns `None` if the codebook is full (255 entries) and
51+ /// `label` is new — the caller must SPLIT the family (the `CODEBOOK_CAP`
52+ /// overflow signal). An already-present label always resolves (even at capacity).
4953 pub fn intern ( & mut self , label : & str ) -> Option < u8 > {
5054 if let Some ( & i) = self . by_label . get ( label) {
5155 return Some ( i) ;
5256 }
5357 if self . entries . len ( ) >= CODEBOOK_CAP {
5458 return None ;
5559 }
56- let i = self . entries . len ( ) as u8 ;
60+ // 1-based: index 0 is the reserved empty-slot sentinel.
61+ let i = ( self . entries . len ( ) + 1 ) as u8 ;
5762 self . entries . push ( label. to_string ( ) ) ;
5863 self . by_label . insert ( label. to_string ( ) , i) ;
5964 Some ( i)
6065 }
6166
62- /// The 1-byte index of `label`, if interned.
67+ /// The 1-based 1- byte index of `label`, if interned.
6368 pub fn index_of ( & self , label : & str ) -> Option < u8 > {
6469 self . by_label . get ( label) . copied ( )
6570 }
6671
67- /// The label at `index`, if present.
72+ /// The label at `index` (1-based), if present. Index `0` (the reserved
73+ /// empty-slot sentinel) resolves to `None`.
6874 pub fn label ( & self , index : u8 ) -> Option < & str > {
69- self . entries . get ( index as usize ) . map ( String :: as_str)
75+ if index == 0 {
76+ return None ;
77+ }
78+ self . entries . get ( ( index - 1 ) as usize ) . map ( String :: as_str)
7079 }
7180
7281 /// Number of interned entries.
@@ -133,14 +142,16 @@ mod tests {
133142 use super :: * ;
134143
135144 #[ test]
136- fn intern_dedups_and_assigns_sequential_indices ( ) {
145+ fn intern_is_1_based_and_dedups ( ) {
146+ // Index 0 is reserved (EdgeBlock empty-slot sentinel) — entries start at 1.
137147 let mut cb = Codebook :: new ( ) ;
138- assert_eq ! ( cb. intern( "Nation" ) , Some ( 0 ) ) ;
139- assert_eq ! ( cb. intern( "TechCompany" ) , Some ( 1 ) ) ;
140- assert_eq ! ( cb. intern( "Nation" ) , Some ( 0 ) ) ; // dedup
148+ assert_eq ! ( cb. intern( "Nation" ) , Some ( 1 ) ) ;
149+ assert_eq ! ( cb. intern( "TechCompany" ) , Some ( 2 ) ) ;
150+ assert_eq ! ( cb. intern( "Nation" ) , Some ( 1 ) ) ; // dedup
141151 assert_eq ! ( cb. len( ) , 2 ) ;
142- assert_eq ! ( cb. index_of( "TechCompany" ) , Some ( 1 ) ) ;
143- assert_eq ! ( cb. label( 0 ) , Some ( "Nation" ) ) ;
152+ assert_eq ! ( cb. index_of( "TechCompany" ) , Some ( 2 ) ) ;
153+ assert_eq ! ( cb. label( 1 ) , Some ( "Nation" ) ) ;
154+ assert_eq ! ( cb. label( 0 ) , None , "index 0 is the reserved sentinel" ) ;
144155 assert_eq ! ( cb. label( 9 ) , None ) ;
145156 }
146157
@@ -151,24 +162,27 @@ mod tests {
151162 assert ! ( cb. intern( & format!( "e{i}" ) ) . is_some( ) ) ;
152163 }
153164 assert ! ( cb. is_full( ) ) ;
165+ assert_eq ! ( cb. len( ) , 255 ) ; // indices 1..=255, 0 reserved
154166 // a NEW label overflows → None (split the family)…
155167 assert_eq ! ( cb. intern( "one_too_many" ) , None ) ;
156168 // …but an already-interned label still resolves at capacity.
157- assert_eq ! ( cb. intern( "e0" ) , Some ( 0 ) ) ;
169+ assert_eq ! ( cb. intern( "e0" ) , Some ( 1 ) ) ;
158170 }
159171
160172 #[ test]
161173 fn registry_scopes_codebooks_per_family ( ) {
162174 // The SAME label gets INDEPENDENT indices in different families — the
163- // whole point of per-family scoping (no global contamination).
175+ // whole point of per-family scoping (no global contamination). All
176+ // 1-based (0 reserved).
164177 let mut reg = FamilyCodebookRegistry :: new ( ) ;
165- assert_eq ! ( reg. intern( 0x0001 , "Issue" ) , Some ( 0 ) ) ;
166- assert_eq ! ( reg. intern( 0x0001 , "Bug" ) , Some ( 1 ) ) ;
167- assert_eq ! ( reg. intern( 0x0002 , "Issue" ) , Some ( 0 ) ) ; // family 2's own index 0
178+ assert_eq ! ( reg. intern( 0x0001 , "Issue" ) , Some ( 1 ) ) ;
179+ assert_eq ! ( reg. intern( 0x0001 , "Bug" ) , Some ( 2 ) ) ;
180+ assert_eq ! ( reg. intern( 0x0002 , "Issue" ) , Some ( 1 ) ) ; // family 2's own index 1
168181 assert_eq ! ( reg. families( ) , 2 ) ;
169182 // cross-family resolve (family, index) → label
170- assert_eq ! ( reg. resolve( 0x0001 , 1 ) , Some ( "Bug" ) ) ;
171- assert_eq ! ( reg. resolve( 0x0002 , 0 ) , Some ( "Issue" ) ) ;
172- assert_eq ! ( reg. resolve( 0x0099 , 0 ) , None ) ; // unknown family
183+ assert_eq ! ( reg. resolve( 0x0001 , 2 ) , Some ( "Bug" ) ) ;
184+ assert_eq ! ( reg. resolve( 0x0002 , 1 ) , Some ( "Issue" ) ) ;
185+ assert_eq ! ( reg. resolve( 0x0001 , 0 ) , None ) ; // reserved sentinel
186+ assert_eq ! ( reg. resolve( 0x0099 , 1 ) , None ) ; // unknown family
173187 }
174188}
0 commit comments