Skip to content

Commit b5370a5

Browse files
committed
refactor(F6): expose scent_u64() for CAM-PQ Phase C + birthday-paradox collision tests
1 parent 82bd03b commit b5370a5

1 file changed

Lines changed: 76 additions & 11 deletions

File tree

crates/lance-graph-callcenter/src/dn_path.rs

Lines changed: 76 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -46,27 +46,36 @@ impl DnPath {
4646
})
4747
}
4848

49-
/// Compute the scent of this DN path: FNV-1a hash of the canonical
50-
/// path string, folded to a single `u8`.
49+
/// 64-bit FNV-1a digest over the canonical hex path.
5150
///
5251
/// The canonical form is the 6 segment hashes rendered as hex and
5352
/// concatenated with `/` separators (deterministic, stable, zero-dep).
54-
/// The full 64-bit FNV-1a digest is XOR-folded into 1 byte, preserving
55-
/// avalanche properties much better than the old XOR-fold of individual
56-
/// segment hashes.
57-
///
58-
/// Future phases may replace this with ZeckBF17→Base17→CAM-PQ
59-
/// (16Kbit → 48B → 34B → 6B → 1B, ρ=0.937) once bgz-tensor
60-
/// enters the callcenter dep tree.
61-
pub fn scent(&self) -> u8 {
53+
/// CAM-PQ stages downstream (HHTL Phase C) keep the full bits;
54+
/// [`scent()`](Self::scent) folds this to u8 for HHTL Phase A bucket
55+
/// dispatch.
56+
pub fn scent_u64(&self) -> u64 {
6257
use core::fmt::Write;
6358
let mut buf = String::with_capacity(6 * 17);
6459
let segments = [self.ns, self.heel, self.hip, self.branch, self.twig, self.leaf];
6560
for (i, seg) in segments.iter().enumerate() {
6661
if i > 0 { buf.push('/'); }
6762
let _ = write!(buf, "{:016x}", seg);
6863
}
69-
let h = fnv1a(&buf);
64+
fnv1a(&buf)
65+
}
66+
67+
/// Compute the scent of this DN path: FNV-1a hash of the canonical
68+
/// path string, folded to a single `u8`.
69+
///
70+
/// XOR-folds [`scent_u64()`](Self::scent_u64) (64 → 8 bits), preserving
71+
/// avalanche properties much better than the old XOR-fold of individual
72+
/// segment hashes.
73+
///
74+
/// Future phases may replace this with ZeckBF17→Base17→CAM-PQ
75+
/// (16Kbit → 48B → 34B → 6B → 1B, ρ=0.937) once bgz-tensor
76+
/// enters the callcenter dep tree.
77+
pub fn scent(&self) -> u8 {
78+
let h = self.scent_u64();
7079
let folded = h
7180
^ (h >> 8)
7281
^ (h >> 16)
@@ -166,4 +175,60 @@ mod tests {
166175
.unwrap();
167176
assert_eq!(p.scent_stub(), p.scent());
168177
}
178+
179+
#[test]
180+
fn scent_u64_fold_matches_scent() {
181+
let p = DnPath::parse(
182+
"/tree/ada/heel/callcenter/hip/v1/branch/agents/twig/card/leaf/abc",
183+
)
184+
.unwrap();
185+
let h = p.scent_u64();
186+
let folded = (h
187+
^ (h >> 8)
188+
^ (h >> 16)
189+
^ (h >> 24)
190+
^ (h >> 32)
191+
^ (h >> 40)
192+
^ (h >> 48)
193+
^ (h >> 56)) as u8;
194+
assert_eq!(folded, p.scent());
195+
}
196+
197+
#[test]
198+
fn scent_distribution_100_paths_low_collision() {
199+
let paths: Vec<DnPath> = (0..100)
200+
.map(|i| {
201+
DnPath::parse(&format!(
202+
"/tree/tenant/heel/agent_{i}/hip/session_{i}/branch/leaf_{i}/twig/t_{i}/leaf/l_{i}"
203+
))
204+
.unwrap()
205+
})
206+
.collect();
207+
let scents: Vec<u8> = paths.iter().map(|p| p.scent()).collect();
208+
let unique: std::collections::HashSet<_> = scents.iter().copied().collect();
209+
assert!(
210+
unique.len() >= 50,
211+
"FNV-1a XOR-fold should distribute >=50 unique buckets across 100 distinct paths, got {}",
212+
unique.len()
213+
);
214+
}
215+
216+
#[test]
217+
fn scent_u64_distribution_100_paths_all_unique() {
218+
let paths: Vec<DnPath> = (0..100)
219+
.map(|i| {
220+
DnPath::parse(&format!(
221+
"/tree/tenant/heel/agent_{i}/hip/session_{i}/branch/leaf_{i}/twig/t_{i}/leaf/l_{i}"
222+
))
223+
.unwrap()
224+
})
225+
.collect();
226+
let scents: std::collections::HashSet<u64> =
227+
paths.iter().map(|p| p.scent_u64()).collect();
228+
assert_eq!(
229+
scents.len(),
230+
100,
231+
"scent_u64 in 64-bit codomain should have zero collisions in 100 paths"
232+
);
233+
}
169234
}

0 commit comments

Comments
 (0)