|
1 | 1 | //! CrystalFingerprint — polymorphic carrier of crystal semantic content. |
2 | 2 | //! |
3 | | -//! Four native forms: |
| 3 | +//! Five native forms: |
4 | 4 | //! |
5 | 5 | //! | Variant | Size | Role | |
6 | 6 | //! |------------------|-------|----------------------------------------------| |
7 | 7 | //! | `Binary16K` | 2 KB | Compact semantic (Hamming similarity). | |
8 | 8 | //! | `Structured5x5` | 3 KB | Rich native form (5×5×5×5×5 cells). | |
9 | 9 | //! | `Vsa10kI8` | 10 KB | lancedb-native VSA (int8). | |
10 | 10 | //! | `Vsa10kF32` | 40 KB | lancedb-native VSA (f32). | |
| 11 | +//! | `Vsa16kF32` | 64 KB | Click-native switchboard carrier (f32, 16_384-D).| |
| 12 | +//! |
| 13 | +//! ## Vsa16kF32 — the inside-BBB switchboard carrier |
| 14 | +//! |
| 15 | +//! Per CLAUDE.md §The Click and the I-VSA-IDENTITIES iron rule, the |
| 16 | +//! 16,384-dimensional f32 VSA is the **switchboard carrier** for |
| 17 | +//! role-indexed bundle (Markov) and role-key bind/unbind on the |
| 18 | +//! semantic kernel. It is 1:1 bit-addressable with `Binary16K` |
| 19 | +//! (dimension i corresponds to bit i) and supports lossless bipolar |
| 20 | +//! projection in both directions via [`binary16k_to_vsa16k_bipolar`] |
| 21 | +//! and [`vsa16k_to_binary16k_threshold`]. |
| 22 | +//! |
| 23 | +//! **BBB membrane status:** `Vsa16kF32` is INSIDE-BBB only. It does |
| 24 | +//! NOT cross the `ExternalMembrane` — the Arrow-scalar commit tier |
| 25 | +//! uses the 2 KB `Binary16K` projection (Index regime) or the 6 B |
| 26 | +//! CAM-PQ scent (Argmax regime). See I1 codec regime split |
| 27 | +//! (ADR-0002). |
11 | 28 | //! |
12 | 29 | //! ## Passthrough to 10,000-D |
13 | 30 | //! |
@@ -52,6 +69,11 @@ pub enum CrystalFingerprint { |
52 | 69 |
|
53 | 70 | /// 10,000-D VSA, f32 components (lancedb-native, 40 KB). |
54 | 71 | Vsa10kF32(Box<[f32; 10_000]>), |
| 72 | + |
| 73 | + /// 16,384-D VSA, f32 components — the Click switchboard carrier (64 KB). |
| 74 | + /// One-to-one with `Binary16K` dimensions via bipolar projection. |
| 75 | + /// Inside-BBB only; never crosses `ExternalMembrane`. |
| 76 | + Vsa16kF32(Box<[f32; 16_384]>), |
55 | 77 | } |
56 | 78 |
|
57 | 79 | /// Five-dimensional quorum: consensus along each of the 5^5 axes. |
@@ -216,6 +238,20 @@ impl CrystalFingerprint { |
216 | 238 | Self::Vsa10kF32(v) => { |
217 | 239 | out.copy_from_slice(&v[..]); |
218 | 240 | } |
| 241 | + Self::Vsa16kF32(v) => { |
| 242 | + // 16_384 → 10_000 downcast: similarity-preserving stride copy |
| 243 | + // with interleaved averaging of the surplus 6_384 dims into |
| 244 | + // the base 10_000. Not lossless — reserved for cases where a |
| 245 | + // 10K-surface consumer needs the 16K carrier's content. For |
| 246 | + // lossless projection, stay on the 16K carrier. |
| 247 | + for i in 0..10_000 { |
| 248 | + out[i] = v[i]; |
| 249 | + } |
| 250 | + for j in 10_000..16_384 { |
| 251 | + let i = j - 10_000; |
| 252 | + out[i] = (out[i] + v[j]) * 0.5; |
| 253 | + } |
| 254 | + } |
219 | 255 | } |
220 | 256 | out |
221 | 257 | } |
@@ -297,6 +333,7 @@ impl CrystalFingerprint { |
297 | 333 | Self::Structured5x5 { .. } => 3125 + 5 * 4, // ~3 KB |
298 | 334 | Self::Vsa10kI8(_) => 10_000, // 10 KB |
299 | 335 | Self::Vsa10kF32(_) => 40_000, // 40 KB |
| 336 | + Self::Vsa16kF32(_) => 65_536, // 64 KB |
300 | 337 | } |
301 | 338 | } |
302 | 339 | } |
@@ -357,6 +394,93 @@ pub fn vsa_cosine(a: &[f32; 10_000], b: &[f32; 10_000]) -> f32 { |
357 | 394 | if denom < 1e-12 { 0.0 } else { dot / denom } |
358 | 395 | } |
359 | 396 |
|
| 397 | +// ── Vsa16kF32 — the Click switchboard carrier ────────────────────────── |
| 398 | +// |
| 399 | +// One-to-one bit-addressable with Binary16K (dim i ↔ bit i). Bipolar |
| 400 | +// ±1 projection is lossless in both directions under strict-threshold |
| 401 | +// inverse. Supports the semantic-kernel algebra (role-indexed bundle |
| 402 | +// for Markov, element-wise bind for role-key slice assignment) on the |
| 403 | +// f32 carrier. 64 KB per vector; inside-BBB only. |
| 404 | + |
| 405 | +/// Allocate a zero-valued Vsa16kF32 carrier. |
| 406 | +#[inline] |
| 407 | +pub fn vsa16k_zero() -> Box<[f32; 16_384]> { |
| 408 | + Box::new([0.0f32; 16_384]) |
| 409 | +} |
| 410 | + |
| 411 | +/// Project a `Binary16K` (256 × u64 = 16_384 bits) into a bipolar |
| 412 | +/// `Vsa16kF32`: bit i set → +1.0 at dim i; bit i clear → -1.0. |
| 413 | +/// |
| 414 | +/// Lossless under the inverse [`vsa16k_to_binary16k_threshold`]. |
| 415 | +pub fn binary16k_to_vsa16k_bipolar(bits: &[u64; 256]) -> Box<[f32; 16_384]> { |
| 416 | + let mut out = Box::new([0.0f32; 16_384]); |
| 417 | + for w in 0..256 { |
| 418 | + let word = bits[w]; |
| 419 | + for b in 0..64 { |
| 420 | + let dim = w * 64 + b; |
| 421 | + out[dim] = if (word >> b) & 1 == 1 { 1.0 } else { -1.0 }; |
| 422 | + } |
| 423 | + } |
| 424 | + out |
| 425 | +} |
| 426 | + |
| 427 | +/// Threshold a `Vsa16kF32` carrier back to a `Binary16K`: dim > 0.0 → bit set. |
| 428 | +/// |
| 429 | +/// Inverse of [`binary16k_to_vsa16k_bipolar`] for any vector whose signs |
| 430 | +/// survived bundling / binding (does not require strict ±1 values — |
| 431 | +/// any positive value decodes to 1, any non-positive to 0). |
| 432 | +pub fn vsa16k_to_binary16k_threshold(v: &[f32; 16_384]) -> Box<[u64; 256]> { |
| 433 | + let mut bits = Box::new([0u64; 256]); |
| 434 | + for w in 0..256 { |
| 435 | + let mut word = 0u64; |
| 436 | + for b in 0..64 { |
| 437 | + let dim = w * 64 + b; |
| 438 | + if v[dim] > 0.0 { |
| 439 | + word |= 1u64 << b; |
| 440 | + } |
| 441 | + } |
| 442 | + bits[w] = word; |
| 443 | + } |
| 444 | + bits |
| 445 | +} |
| 446 | + |
| 447 | +/// Element-wise multiply (bind) on the 16K carrier: assigns a role key |
| 448 | +/// to content. Self-inverse for ±1 bipolar keys (key² = 1 elementwise). |
| 449 | +pub fn vsa16k_bind(a: &[f32; 16_384], b: &[f32; 16_384]) -> Box<[f32; 16_384]> { |
| 450 | + let mut out = Box::new([0.0f32; 16_384]); |
| 451 | + for i in 0..16_384 { |
| 452 | + out[i] = a[i] * b[i]; |
| 453 | + } |
| 454 | + out |
| 455 | +} |
| 456 | + |
| 457 | +/// Element-wise add (bundle / superposition) on the 16K carrier. |
| 458 | +/// Per I-SUBSTRATE-MARKOV, this is the Chapman-Kolmogorov-safe |
| 459 | +/// merge mode for state-transition paths. Do NOT substitute XOR. |
| 460 | +pub fn vsa16k_bundle(vectors: &[&[f32; 16_384]]) -> Box<[f32; 16_384]> { |
| 461 | + let mut out = Box::new([0.0f32; 16_384]); |
| 462 | + for v in vectors { |
| 463 | + for i in 0..16_384 { |
| 464 | + out[i] += v[i]; |
| 465 | + } |
| 466 | + } |
| 467 | + out |
| 468 | +} |
| 469 | + |
| 470 | +/// Cosine similarity between two 16K carriers. |
| 471 | +pub fn vsa16k_cosine(a: &[f32; 16_384], b: &[f32; 16_384]) -> f32 { |
| 472 | + let mut dot = 0.0f32; |
| 473 | + let mut norm_a = 0.0f32; |
| 474 | + let mut norm_b = 0.0f32; |
| 475 | + for i in 0..16_384 { |
| 476 | + dot += a[i] * b[i]; |
| 477 | + norm_a += a[i] * a[i]; |
| 478 | + norm_b += b[i] * b[i]; |
| 479 | + } |
| 480 | + let denom = norm_a.sqrt() * norm_b.sqrt(); |
| 481 | + if denom < 1e-12 { 0.0 } else { dot / denom } |
| 482 | +} |
| 483 | + |
360 | 484 | #[cfg(test)] |
361 | 485 | mod tests { |
362 | 486 | use super::*; |
@@ -500,4 +624,107 @@ mod tests { |
500 | 624 | assert!(sim_a > 0.5, "bundle should be similar to input a"); |
501 | 625 | assert!(sim_b > 0.0, "bundle should be positively similar to b"); |
502 | 626 | } |
| 627 | + |
| 628 | + #[test] |
| 629 | + fn vsa16k_byte_size_is_64k() { |
| 630 | + let fp = CrystalFingerprint::Vsa16kF32(Box::new([0.0f32; 16_384])); |
| 631 | + assert_eq!(fp.byte_size(), 65_536); |
| 632 | + } |
| 633 | + |
| 634 | + #[test] |
| 635 | + fn binary16k_to_vsa16k_bipolar_roundtrip_is_lossless() { |
| 636 | + let mut bits = Box::new([0u64; 256]); |
| 637 | + for i in 0..256 { |
| 638 | + bits[i] = 0xDEAD_BEEF_CAFE_BABEu64.wrapping_mul(i as u64 + 1); |
| 639 | + } |
| 640 | + let v = binary16k_to_vsa16k_bipolar(&bits); |
| 641 | + let back = vsa16k_to_binary16k_threshold(&v); |
| 642 | + for i in 0..256 { |
| 643 | + assert_eq!(back[i], bits[i], |
| 644 | + "word {i}: expected {:#018x} got {:#018x}", bits[i], back[i]); |
| 645 | + } |
| 646 | + } |
| 647 | + |
| 648 | + #[test] |
| 649 | + fn vsa16k_bipolar_values_are_unit() { |
| 650 | + let bits = Box::new([0xAAAA_AAAA_AAAA_AAAAu64; 256]); |
| 651 | + let v = binary16k_to_vsa16k_bipolar(&bits); |
| 652 | + for i in 0..16_384 { |
| 653 | + assert!(v[i] == 1.0 || v[i] == -1.0, |
| 654 | + "dim {i} is {} — must be strict ±1", v[i]); |
| 655 | + } |
| 656 | + } |
| 657 | + |
| 658 | + #[test] |
| 659 | + fn vsa16k_bind_is_self_inverse_for_bipolar() { |
| 660 | + let key = { |
| 661 | + let mut k = Box::new([0.0f32; 16_384]); |
| 662 | + for i in 0..16_384 { |
| 663 | + k[i] = if i % 3 == 0 { -1.0 } else { 1.0 }; |
| 664 | + } |
| 665 | + k |
| 666 | + }; |
| 667 | + let content = { |
| 668 | + let mut c = Box::new([0.0f32; 16_384]); |
| 669 | + for i in 0..16_384 { |
| 670 | + c[i] = (i as f32 / 16_384.0) * 2.0 - 1.0; |
| 671 | + } |
| 672 | + c |
| 673 | + }; |
| 674 | + let bound = vsa16k_bind(&content, &key); |
| 675 | + let unbound = vsa16k_bind(&bound, &key); |
| 676 | + for i in 0..16_384 { |
| 677 | + assert!((unbound[i] - content[i]).abs() < 1e-5, |
| 678 | + "dim {i}: expected {} got {}", content[i], unbound[i]); |
| 679 | + } |
| 680 | + } |
| 681 | + |
| 682 | + #[test] |
| 683 | + fn vsa16k_bundle_preserves_similarity_to_inputs() { |
| 684 | + let a = { |
| 685 | + let mut v = Box::new([0.0f32; 16_384]); |
| 686 | + for i in 0..16_384 { v[i] = 1.0; } |
| 687 | + v |
| 688 | + }; |
| 689 | + let b = { |
| 690 | + let mut v = Box::new([0.0f32; 16_384]); |
| 691 | + for i in 0..16_384 { v[i] = if i < 8_192 { 1.0 } else { -1.0 }; } |
| 692 | + v |
| 693 | + }; |
| 694 | + let bundled = vsa16k_bundle(&[&*a, &*b]); |
| 695 | + assert!(vsa16k_cosine(&bundled, &a) > 0.5); |
| 696 | + assert!(vsa16k_cosine(&bundled, &b) > 0.0); |
| 697 | + } |
| 698 | + |
| 699 | + #[test] |
| 700 | + fn vsa16k_bundle_then_unbind_recovers_role_content() { |
| 701 | + // Two role slots, each with its own bipolar key; content bound to |
| 702 | + // each; bundled; unbind by multiplying with the role key recovers |
| 703 | + // the matching content above the noise floor. |
| 704 | + let role_a = binary16k_to_vsa16k_bipolar(&Box::new([0xF0F0_F0F0_F0F0_F0F0u64; 256])); |
| 705 | + let role_b = binary16k_to_vsa16k_bipolar(&Box::new([0x0F0F_0F0F_0F0F_0F0Fu64; 256])); |
| 706 | + let content_a = binary16k_to_vsa16k_bipolar(&Box::new([0xAAAA_AAAA_AAAA_AAAAu64; 256])); |
| 707 | + let content_b = binary16k_to_vsa16k_bipolar(&Box::new([0x5555_5555_5555_5555u64; 256])); |
| 708 | + let bound_a = vsa16k_bind(&content_a, &role_a); |
| 709 | + let bound_b = vsa16k_bind(&content_b, &role_b); |
| 710 | + let bundled = vsa16k_bundle(&[&*bound_a, &*bound_b]); |
| 711 | + let recovered_a = vsa16k_bind(&bundled, &role_a); |
| 712 | + let recovered_b = vsa16k_bind(&bundled, &role_b); |
| 713 | + assert!(vsa16k_cosine(&recovered_a, &content_a) |
| 714 | + > vsa16k_cosine(&recovered_a, &content_b), |
| 715 | + "unbind(role_a) must favour content_a over content_b"); |
| 716 | + assert!(vsa16k_cosine(&recovered_b, &content_b) |
| 717 | + > vsa16k_cosine(&recovered_b, &content_a), |
| 718 | + "unbind(role_b) must favour content_b over content_a"); |
| 719 | + } |
| 720 | + |
| 721 | + #[test] |
| 722 | + fn vsa16k_to_vsa10k_projection_is_finite() { |
| 723 | + let fp = CrystalFingerprint::Vsa16kF32(Box::new([1.0f32; 16_384])); |
| 724 | + let v10 = fp.to_vsa10k_f32(); |
| 725 | + for i in 0..10_000 { |
| 726 | + assert!(v10[i].is_finite(), |
| 727 | + "vsa16k→vsa10k must produce finite values; dim {i} is {}", v10[i]); |
| 728 | + } |
| 729 | + } |
503 | 730 | } |
0 commit comments