|
| 1 | +//! Bridge: deepnsm trajectory bundles → contract `ContextChain` disambiguation. |
| 2 | +//! |
| 3 | +//! ## Why this module exists |
| 4 | +//! |
| 5 | +//! The "real fingerprint" path on `ContextChain::disambiguate_with` accepts a |
| 6 | +//! `Option<CrystalFingerprint>` as `sentinel_fp` for the empty-candidates |
| 7 | +//! sentinel branch. Until this module shipped, the contract crate had no |
| 8 | +//! caller that produced a real (non-zero, non-placeholder) fingerprint — |
| 9 | +//! the parameter was effectively dead-letter typing. The "real fp" honesty |
| 10 | +//! gap came from advertising support for `Some(fp)` without anyone wiring |
| 11 | +//! a real `MarkovBundler::role_bundle()` projection through it. |
| 12 | +//! |
| 13 | +//! `disambiguate_with_trajectory` closes that gap. It sign-binarizes a |
| 14 | +//! 16,384-dim f32 trajectory bundle into a `Binary16K` fingerprint and |
| 15 | +//! threads it as the sentinel fingerprint. This way, when the chain has |
| 16 | +//! no in-window evidence (empty candidates), the result still carries a |
| 17 | +//! signal — the bundled-trajectory fingerprint of where the parser |
| 18 | +//! actually was — instead of an all-zero placeholder. |
| 19 | +//! |
| 20 | +//! ## Why sign-binarize? |
| 21 | +//! |
| 22 | +//! `Binary16K` is the 2 KB Hamming-compare format |
| 23 | +//! (`Box<[u64; 256]>`, 256 × 64 = 16,384 bits, one bit per dim). The |
| 24 | +//! lossless route from Vsa16kF32 to Binary16K is a sign threshold: |
| 25 | +//! `dim ≥ 0 → bit set`. This preserves bipolar VSA semantics for |
| 26 | +//! Hamming-distance comparison; magnitude is discarded by design (the |
| 27 | +//! magnitude lives in Vsa16kF32). |
| 28 | +//! |
| 29 | +//! ## Workspace iron rule consistency |
| 30 | +//! |
| 31 | +//! Per `I-VSA-IDENTITIES`: the trajectory bundle is an IDENTITY |
| 32 | +//! superposition of role-bound content fingerprints, not bitpacked |
| 33 | +//! content. Sign-binarizing it for Hamming compare is the canonical |
| 34 | +//! switchboard hop (Vsa16kF32 → Binary16K) called out in |
| 35 | +//! `vsa-switchboard-architecture.md`. |
| 36 | +
|
| 37 | +use lance_graph_contract::crystal::fingerprint::CrystalFingerprint; |
| 38 | +use lance_graph_contract::grammar::context_chain::{ |
| 39 | + ContextChain, DisambiguateOpts, DisambiguationResult, |
| 40 | +}; |
| 41 | + |
| 42 | +/// Number of bits in a `Binary16K` fingerprint (16,384). |
| 43 | +const BINARY16K_BITS: usize = 16_384; |
| 44 | + |
| 45 | +/// Number of u64 words in a `Binary16K` fingerprint (256). |
| 46 | +const BINARY16K_WORDS: usize = 256; |
| 47 | + |
| 48 | +/// Disambiguate at position `i` against `candidates`, with the sentinel |
| 49 | +/// fingerprint sourced from a real trajectory `bundle` (typically |
| 50 | +/// `MarkovBundler::role_bundle(role)` or the full role-superposed |
| 51 | +/// trajectory). The bundle is sign-binarized into a `Binary16K` |
| 52 | +/// fingerprint and threaded through `DisambiguateOpts::sentinel_fp`. |
| 53 | +/// |
| 54 | +/// On the empty-candidates sentinel path the result's `winner` / |
| 55 | +/// `chosen` carry the bundled-trajectory fingerprint instead of the |
| 56 | +/// zero placeholder — closing the "real fp" honesty gap that prior |
| 57 | +/// PR-G3 work left open (the contract accepted the option but no |
| 58 | +/// caller produced a real value). |
| 59 | +/// |
| 60 | +/// `bundle` lengths shorter than 16,384 are zero-padded to 16,384; |
| 61 | +/// longer bundles are truncated. Both edge cases are intentionally |
| 62 | +/// silent — the deepnsm carrier is by-construction |
| 63 | +/// 16,384 dims (`Vsa16kF32`-shaped), so any deviation is a wiring |
| 64 | +/// bug at the call site, not user input. |
| 65 | +pub fn disambiguate_with_trajectory<I>( |
| 66 | + chain: &ContextChain, |
| 67 | + i: usize, |
| 68 | + candidates: I, |
| 69 | + bundle: &[f32], |
| 70 | +) -> DisambiguationResult |
| 71 | +where |
| 72 | + I: IntoIterator<Item = CrystalFingerprint>, |
| 73 | +{ |
| 74 | + let bits = sign_binarize_to_binary16k(bundle); |
| 75 | + chain.disambiguate_with( |
| 76 | + i, |
| 77 | + candidates, |
| 78 | + DisambiguateOpts { |
| 79 | + kernel: None, |
| 80 | + sentinel_fp: Some(CrystalFingerprint::Binary16K(bits)), |
| 81 | + }, |
| 82 | + ) |
| 83 | +} |
| 84 | + |
| 85 | +/// Sign-binarize a 16,384-dim f32 bundle into a `Binary16K` payload. |
| 86 | +/// |
| 87 | +/// Bit `i` is set iff `bundle[i] >= 0.0`. Bundle entries beyond |
| 88 | +/// `bundle.len()` (when shorter than 16,384) are treated as `< 0.0` |
| 89 | +/// (bit clear). Bundle entries past 16,384 are ignored. |
| 90 | +/// |
| 91 | +/// Returns the boxed `[u64; 256]` shape that the |
| 92 | +/// `CrystalFingerprint::Binary16K` variant wraps. |
| 93 | +pub fn sign_binarize_to_binary16k(bundle: &[f32]) -> Box<[u64; BINARY16K_WORDS]> { |
| 94 | + let mut out = Box::new([0u64; BINARY16K_WORDS]); |
| 95 | + for (i, &v) in bundle.iter().take(BINARY16K_BITS).enumerate() { |
| 96 | + if v >= 0.0 { |
| 97 | + out[i / 64] |= 1u64 << (i % 64); |
| 98 | + } |
| 99 | + } |
| 100 | + out |
| 101 | +} |
| 102 | + |
| 103 | +#[cfg(test)] |
| 104 | +mod tests { |
| 105 | + use super::*; |
| 106 | + use lance_graph_contract::crystal::fingerprint::CrystalFingerprint; |
| 107 | + use lance_graph_contract::grammar::context_chain::ContextChain; |
| 108 | + |
| 109 | + fn empty_chain() -> ContextChain { |
| 110 | + ContextChain::new() |
| 111 | + } |
| 112 | + |
| 113 | + /// Loose-end-#2 failing-test-first witness (the "real fp" honesty |
| 114 | + /// gap closer): on the empty-candidates sentinel path, |
| 115 | + /// `disambiguate_with_trajectory` with an all-positive bundle must |
| 116 | + /// produce a `Binary16K` fingerprint that has at least one bit |
| 117 | + /// set — i.e. the result is *not* the zero-sentinel. |
| 118 | + /// |
| 119 | + /// This test would fail (compile-but-no-such-function) without |
| 120 | + /// `disambiguate_with_trajectory`, which is the whole point of |
| 121 | + /// the bridge: the contract accepted `Option<CrystalFingerprint>` |
| 122 | + /// as a passthrough but no caller produced a real value. |
| 123 | + #[test] |
| 124 | + fn test_caller_constructs_real_fingerprint_not_zero() { |
| 125 | + let chain = empty_chain(); |
| 126 | + let bundle_of_all_ones = vec![1.0_f32; BINARY16K_BITS]; |
| 127 | + |
| 128 | + let result = disambiguate_with_trajectory( |
| 129 | + &chain, |
| 130 | + 0, |
| 131 | + std::iter::empty::<CrystalFingerprint>(), |
| 132 | + &bundle_of_all_ones, |
| 133 | + ); |
| 134 | + |
| 135 | + // Sentinel path was taken (no candidates). |
| 136 | + assert_eq!(result.candidate_count, 0); |
| 137 | + assert_eq!(result.winner_index, usize::MAX); |
| 138 | + assert!(result.escalate_to_llm); |
| 139 | + |
| 140 | + // Critical assertion: chosen carries a non-zero fingerprint. |
| 141 | + match &result.chosen { |
| 142 | + CrystalFingerprint::Binary16K(bits) => { |
| 143 | + let any_set = bits.iter().any(|&w| w != 0); |
| 144 | + assert!( |
| 145 | + any_set, |
| 146 | + "real-fp caller must NOT produce a zero-sentinel: \ |
| 147 | + all-positive bundle → all bits set (saw all-zero)" |
| 148 | + ); |
| 149 | + // Strong form: an all-positive bundle should set ALL bits. |
| 150 | + assert!( |
| 151 | + bits.iter().all(|&w| w == u64::MAX), |
| 152 | + "all-positive bundle should sign-binarize to all-ones" |
| 153 | + ); |
| 154 | + } |
| 155 | + _ => panic!("expected Binary16K sentinel"), |
| 156 | + } |
| 157 | + } |
| 158 | + |
| 159 | + /// Sign-binarize unit test: an all-positive bundle becomes all-1s. |
| 160 | + #[test] |
| 161 | + fn sign_binarize_all_positive_yields_all_ones() { |
| 162 | + let bundle = vec![0.5_f32; BINARY16K_BITS]; |
| 163 | + let bits = sign_binarize_to_binary16k(&bundle); |
| 164 | + for (i, &w) in bits.iter().enumerate() { |
| 165 | + assert_eq!( |
| 166 | + w, |
| 167 | + u64::MAX, |
| 168 | + "word {i} should be all-ones for all-positive bundle" |
| 169 | + ); |
| 170 | + } |
| 171 | + } |
| 172 | + |
| 173 | + /// Sign-binarize complementary test: an all-negative bundle stays |
| 174 | + /// all-zeros (bit set requires `>= 0.0`). Anchors the contract. |
| 175 | + #[test] |
| 176 | + fn sign_binarize_all_negative_yields_all_zeros() { |
| 177 | + let bundle = vec![-1.0_f32; BINARY16K_BITS]; |
| 178 | + let bits = sign_binarize_to_binary16k(&bundle); |
| 179 | + for (i, &w) in bits.iter().enumerate() { |
| 180 | + assert_eq!(w, 0u64, "word {i} should be zero for all-negative bundle"); |
| 181 | + } |
| 182 | + } |
| 183 | + |
| 184 | + /// Round-trip: two distinct bundles (all-positive vs sign-flipped |
| 185 | + /// at every other dim) drive `disambiguate_with_trajectory` to two |
| 186 | + /// different sentinel fingerprints. Confirms the bundle actually |
| 187 | + /// flows into the result, not a constant. |
| 188 | + #[test] |
| 189 | + fn round_trip_different_bundles_produce_different_fingerprints() { |
| 190 | + let chain = empty_chain(); |
| 191 | + |
| 192 | + let bundle_a: Vec<f32> = (0..BINARY16K_BITS).map(|_| 1.0).collect(); |
| 193 | + let bundle_b: Vec<f32> = (0..BINARY16K_BITS) |
| 194 | + .map(|i| if i % 2 == 0 { 1.0 } else { -1.0 }) |
| 195 | + .collect(); |
| 196 | + |
| 197 | + let res_a = disambiguate_with_trajectory( |
| 198 | + &chain, |
| 199 | + 0, |
| 200 | + std::iter::empty::<CrystalFingerprint>(), |
| 201 | + &bundle_a, |
| 202 | + ); |
| 203 | + let res_b = disambiguate_with_trajectory( |
| 204 | + &chain, |
| 205 | + 0, |
| 206 | + std::iter::empty::<CrystalFingerprint>(), |
| 207 | + &bundle_b, |
| 208 | + ); |
| 209 | + |
| 210 | + let bits_a = match &res_a.chosen { |
| 211 | + CrystalFingerprint::Binary16K(b) => b.clone(), |
| 212 | + _ => panic!("expected Binary16K"), |
| 213 | + }; |
| 214 | + let bits_b = match &res_b.chosen { |
| 215 | + CrystalFingerprint::Binary16K(b) => b.clone(), |
| 216 | + _ => panic!("expected Binary16K"), |
| 217 | + }; |
| 218 | + |
| 219 | + // The two bundles must produce distinct sentinel fingerprints. |
| 220 | + assert_ne!( |
| 221 | + *bits_a, *bits_b, |
| 222 | + "different bundles must produce different sentinel fingerprints" |
| 223 | + ); |
| 224 | + // Bundle B has alternating signs → exactly half the bits set. |
| 225 | + let popcount_b: u32 = bits_b.iter().map(|w| w.count_ones()).sum(); |
| 226 | + assert_eq!( |
| 227 | + popcount_b, |
| 228 | + (BINARY16K_BITS / 2) as u32, |
| 229 | + "alternating-sign bundle should set every other bit" |
| 230 | + ); |
| 231 | + } |
| 232 | + |
| 233 | + /// Truncation contract: a bundle longer than 16,384 silently keeps |
| 234 | + /// only the first 16,384 dims; tail is ignored. |
| 235 | + #[test] |
| 236 | + fn sign_binarize_truncates_oversized_bundle() { |
| 237 | + let mut bundle = vec![1.0_f32; BINARY16K_BITS]; |
| 238 | + bundle.extend(std::iter::repeat(-1.0_f32).take(100)); |
| 239 | + let bits = sign_binarize_to_binary16k(&bundle); |
| 240 | + // First 16,384 bits → all-positive → all-ones. |
| 241 | + for &w in bits.iter() { |
| 242 | + assert_eq!(w, u64::MAX); |
| 243 | + } |
| 244 | + } |
| 245 | +} |
0 commit comments