@@ -47,10 +47,10 @@ use crate::driver::ShaderDriver;
4747use crate :: engine_bridge:: { self , unified_style, UNIFIED_STYLES } ;
4848use crate :: token_agreement:: { ReferenceModel , TokenAgreementHarness } ;
4949use crate :: wire:: {
50- WireCalibrateRequest , WireCalibrateResponse , WireCrystal , WireDispatch , WireHealth ,
51- WireIngest , WirePlanRequest , WirePlanResponse , WireProbeRequest , WireProbeResponse ,
52- WireQualia , WireRunbookRequest , WireRunbookResponse , WireRunbookStep ,
53- WireRunbookStepResult , WireStepResult , WireStyleInfo , WireSweepRequest ,
50+ WireCalibrateRequest , WireCalibrateResponse , WireCrystal , WireDispatch , WireEncode ,
51+ WireEncodeResponse , WireHealth , WireIngest , WirePlanRequest , WirePlanResponse ,
52+ WireProbeRequest , WireProbeResponse , WireQualia , WireRunbookRequest , WireRunbookResponse ,
53+ WireRunbookStep , WireRunbookStepResult , WireStepResult , WireStyleInfo , WireSweepRequest ,
5454 WireSweepResponse , WireSweepResult , WireTensorsRequest , WireTensorsResponse ,
5555 WireTokenAgreement , WireTokenAgreementResult , WireUnifiedStep ,
5656} ;
@@ -116,6 +116,8 @@ pub fn router(driver: ShaderDriver) -> Router {
116116 // Generic OrchestrationBridge gateway — route any UnifiedStep by step_type.
117117 // Composed bridges cover lg.* (planner) + nd.* (codec research).
118118 . route ( "/v1/shader/route" , post ( route_handler) )
119+ // JIT lens encode pipeline — text → DeepNSM → 512-bit VSA → 16Kbit BindSpace row.
120+ . route ( "/v1/shader/encode" , post ( encode_handler) )
119121 . with_state ( state)
120122}
121123
@@ -447,6 +449,130 @@ fn run_plan(
447449 ) )
448450}
449451
452+ // ─── Encode handler ─────────────────────────────────────────────────────────
453+
454+ /// `POST /v1/shader/encode` — text → DeepNSM → 512-bit VSA → 16Kbit BindSpace row.
455+ ///
456+ /// Pipeline:
457+ /// 1. Split text into words (whitespace + punctuation).
458+ /// 2. Hash each word to a 12-bit vocabulary rank via SplitMix64-style mixing
459+ /// (deterministic; no data files required — DeepNsm's `VsaVec::from_rank`
460+ /// accepts any u16 rank and produces a stable pseudo-random 512-bit vector).
461+ /// 3. XOR-bind each word vector with a position vector so word order matters:
462+ /// `word_fp = VsaVec::from_rank(hash(word)) XOR VsaVec::random(pos * PHI)`.
463+ /// 4. Majority-bundle all word-position vectors → 512-bit sentence fingerprint.
464+ /// 5. Expand 8 × u64 (512-bit) → 256 × u64 (16 Kbit) by tiling: each source
465+ /// u64 occupies a 32-word run in the content plane.
466+ /// 6. Write the content row into BindSpace at write_cursor, advance cursor.
467+ /// 7. Return hex fingerprint + token_count + bits_set + row_written.
468+ ///
469+ /// Why hash-based ranks instead of Vocabulary::load?
470+ /// The vocabulary requires CSV data files on disk; the encode endpoint is
471+ /// intended to be stateless and zero-I/O. `VsaVec::from_rank` is pure and
472+ /// deterministic — hashing word strings to u16 rank seeds gives the same
473+ /// VSA vectors on every call without loading any external table. When the
474+ /// data files are available, upgrade to Vocabulary::load + parser::parse for
475+ /// full SPO triple extraction.
476+ async fn encode_handler (
477+ State ( state) : State < AppState > ,
478+ Json ( req) : Json < WireEncode > ,
479+ ) -> Result < Json < WireEncodeResponse > , ( StatusCode , Json < Value > ) > {
480+ use deepnsm:: encoder:: { bundle, VsaVec , VSA_WORDS } ;
481+
482+ // ── 1. Word tokenisation (zero-I/O, no CSV needed) ───────────────────
483+ let words: Vec < & str > = req
484+ . text
485+ . split ( |c : char | c. is_whitespace ( ) || ( c. is_ascii_punctuation ( ) && c != '\'' ) )
486+ . filter ( |s| !s. is_empty ( ) )
487+ . collect ( ) ;
488+ let token_count = words. len ( ) ;
489+
490+ // ── 2 + 3. Hash word → rank, XOR-bind with position vector ───────────
491+ //
492+ // Rank derivation: FNV-1a-style fold into 12 bits.
493+ // hash = words[i].bytes().fold(2166136261u32, |h, b| {
494+ // (h ^ b as u32).wrapping_mul(16777619)
495+ // }) & 0x0FFF
496+ //
497+ // Position braid: XOR with VsaVec::random(pos * PHI) so
498+ // "dog bites man" ≠ "man bites dog".
499+ const PHI : u64 = 0x9E3779B97F4A7C15 ; // golden-ratio multiplier
500+
501+ let word_vecs: Vec < VsaVec > = words
502+ . iter ( )
503+ . enumerate ( )
504+ . map ( |( pos, word) | {
505+ // FNV-1a → 12-bit rank
506+ let hash = word
507+ . bytes ( )
508+ . fold ( 2166136261u32 , |h, b| ( h ^ b as u32 ) . wrapping_mul ( 16777619 ) ) ;
509+ let rank = ( hash & 0x0FFF ) as u16 ;
510+
511+ // Position seed: unique per (pos, golden-ratio)
512+ let pos_seed = ( pos as u64 ) . wrapping_mul ( PHI ) ;
513+ let pos_vec = VsaVec :: random ( pos_seed) ;
514+
515+ // word_fp = from_rank(rank) XOR pos_vec
516+ VsaVec :: from_rank ( rank) . bind ( & pos_vec)
517+ } )
518+ . collect ( ) ;
519+
520+ // ── 4. Bundle → 512-bit sentence fingerprint ─────────────────────────
521+ let sentence_vec = if word_vecs. is_empty ( ) {
522+ VsaVec :: ZERO
523+ } else {
524+ bundle ( & word_vecs)
525+ } ;
526+
527+ // ── 4b. Build fingerprint hex and popcount ────────────────────────────
528+ let vsa_words = sentence_vec. as_words ( ) ; // &[u64; VSA_WORDS] (VSA_WORDS = 8)
529+ let fingerprint_hex: String = vsa_words
530+ . iter ( )
531+ . map ( |w| format ! ( "{:016x}" , w) )
532+ . collect ( ) ;
533+ let bits_set = sentence_vec. popcount ( ) as usize ;
534+
535+ // ── 5. Expand 8 × u64 → 256 × u64 (16 Kbit) ─────────────────────────
536+ //
537+ // Tiling strategy: content_fp[i] = vsa_words[i / TILE_FACTOR]
538+ // TILE_FACTOR = CONTENT_WORDS / VSA_WORDS = 256 / 8 = 32.
539+ // Every source u64 occupies 32 consecutive words in the content plane.
540+ // This preserves all 512 VSA bits at stable positions; the dispatch
541+ // sweep correlates against them via Hamming distance.
542+ const CONTENT_WORDS : usize = 256 ; // WORDS_PER_FP in bindspace.rs
543+ const TILE_FACTOR : usize = CONTENT_WORDS / VSA_WORDS ; // = 32
544+ let mut content_fp = [ 0u64 ; CONTENT_WORDS ] ;
545+ for ( i, w) in content_fp. iter_mut ( ) . enumerate ( ) {
546+ * w = vsa_words[ i / TILE_FACTOR ] ;
547+ }
548+
549+ // ── 6. Write to BindSpace, advance write_cursor ───────────────────────
550+ let row_written = {
551+ let mut st = state. lock ( ) . map_err ( |_| {
552+ ( StatusCode :: INTERNAL_SERVER_ERROR , Json ( json ! ( { "error" : "lock poisoned" } ) ) )
553+ } ) ?;
554+ let cursor = st. write_cursor ;
555+ if cursor >= st. driver . bindspace . len {
556+ None
557+ } else {
558+ let bs = Arc :: get_mut ( & mut st. driver . bindspace ) . ok_or_else ( || {
559+ ( StatusCode :: CONFLICT , Json ( json ! ( { "error" : "bindspace has multiple references" } ) ) )
560+ } ) ?;
561+ bs. fingerprints . set_content ( cursor, & content_fp) ;
562+ st. write_cursor = cursor + 1 ;
563+ Some ( cursor as u32 )
564+ }
565+ } ;
566+
567+ Ok ( Json ( WireEncodeResponse {
568+ text : req. text ,
569+ token_count,
570+ fingerprint_hex,
571+ bits_set,
572+ row_written,
573+ } ) )
574+ }
575+
450576/// Runbook-step dispatcher for Plan. Maps the shared planner state +
451577/// request into a runbook step result, yielding an error string on the
452578/// with-planner=off build to flow through the runbook's error channel.
0 commit comments