@@ -131,11 +131,25 @@ impl MarkovBundler {
131131 }
132132 }
133133 }
134- // permute by position offset (rotate_right)
135- if !acc. is_empty ( ) {
136- let k = ( self . radius as usize ) % acc. len ( ) ;
137- acc. rotate_right ( k) ;
134+ // REMOVED: post-bundle acc.rotate_right(k) — corrupted role-slice alignment.
135+ // Plan called for per-sentence pre-bundle vsa_permute; that's a follow-up.
136+ // Until then, no permutation = aligned bundle.
137+
138+ // Bundle normalization (HIGH item from PR #279 review): divide by the
139+ // sum of |kernel weights| so cosine comparisons across kernel choices
140+ // are invariant to kernel-shape magnitude. Without this, MexicanHat
141+ // bundles have systematically smaller norms than Uniform bundles
142+ // simply because the kernel weights peak at 1 and decay.
143+ let radius_i = self . radius as i32 ;
144+ let total_abs_weight: f32 = ( -radius_i..=radius_i)
145+ . map ( |d| self . kernel . weight ( d, self . radius ) . abs ( ) )
146+ . sum ( ) ;
147+ if total_abs_weight > 1e-9 {
148+ for v in acc. iter_mut ( ) {
149+ * v /= total_abs_weight;
150+ }
138151 }
152+
139153 Trajectory {
140154 fingerprint : acc,
141155 radius : self . radius ,
@@ -203,4 +217,156 @@ mod tests {
203217 assert_eq ! ( GrammaticalRole :: Lokal . slice( ) . len( ) , 150 ) ;
204218 assert_eq ! ( GrammaticalRole :: Instrument . slice( ) . len( ) , 100 ) ;
205219 }
220+
221+ /// Helper: fill a bundler's window so a single push triggers `bundle_current`.
222+ fn fill_and_bundle (
223+ kernel : Kernel ,
224+ radius : u32 ,
225+ sent : WindowedSentence ,
226+ ) -> Trajectory {
227+ let mut b = MarkovBundler :: new ( radius, kernel) ;
228+ let cap = ( 2 * radius + 1 ) as usize ;
229+ let mut last: Option < Trajectory > = None ;
230+ for _ in 0 ..cap {
231+ last = b. push ( sent. clone ( ) ) ;
232+ }
233+ last. expect ( "bundler should emit a trajectory once window is full" )
234+ }
235+
236+ /// Helper: push a sequence of distinct sentences so per-position
237+ /// kernel weights actually shape the bundle. Returns the trajectory
238+ /// emitted on the final push (window saturated).
239+ fn bundle_sequence (
240+ kernel : Kernel ,
241+ radius : u32 ,
242+ sentences : Vec < WindowedSentence > ,
243+ ) -> Trajectory {
244+ let mut b = MarkovBundler :: new ( radius, kernel) ;
245+ let cap = ( 2 * radius + 1 ) as usize ;
246+ assert_eq ! ( sentences. len( ) , cap, "sequence must fill exactly one window" ) ;
247+ let mut last: Option < Trajectory > = None ;
248+ for s in sentences {
249+ last = b. push ( s) ;
250+ }
251+ last. expect ( "bundler should emit on the saturating push" )
252+ }
253+
254+ /// REGRESSION (PR #279 CRITICAL #2): the removed `rotate_right` shifted
255+ /// SUBJECT-slice content into the PREDICATE slice (or worse, the
256+ /// CONTEXT band). After the fix, a SUBJECT-only window must keep all
257+ /// non-zero content inside `[0, 3277)` and have ~zero everywhere else.
258+ #[ test]
259+ fn bundle_does_not_rotate_subject_dims_outside_subject_slice ( ) {
260+ // SUBJECT-only window: every sentence has a single SUBJECT token
261+ // whose content_fp is all 1.0 across the SUBJECT slice.
262+ let subject_len = GrammaticalRole :: Subject . slice ( ) . 1
263+ - GrammaticalRole :: Subject . slice ( ) . 0 ;
264+ let sent = WindowedSentence {
265+ tokens : vec ! [ TokenWithRole {
266+ content_fp: vec![ 1.0 ; subject_len] ,
267+ role: GrammaticalRole :: Subject ,
268+ } ] ,
269+ } ;
270+ let traj = fill_and_bundle ( Kernel :: Uniform , 5 , sent) ;
271+
272+ let ( s_start, s_stop) = GrammaticalRole :: Subject . slice ( ) ;
273+ // SUBJECT slice should be non-zero (positive after normalization).
274+ let subject_sum: f32 =
275+ traj. fingerprint [ s_start..s_stop] . iter ( ) . sum ( ) ;
276+ assert ! (
277+ subject_sum > 1.0 ,
278+ "expected non-trivial SUBJECT content, got sum={subject_sum}"
279+ ) ;
280+ // Outside the SUBJECT slice every dim must be ~0 (no rotation).
281+ let outside_max: f32 = traj. fingerprint [ s_stop..]
282+ . iter ( )
283+ . fold ( 0.0f32 , |acc, v| acc. max ( v. abs ( ) ) ) ;
284+ assert ! (
285+ outside_max < 1e-6 ,
286+ "rotation leaked SUBJECT content past slice boundary: \
287+ max |outside| = {outside_max}"
288+ ) ;
289+ }
290+
291+ /// MexicanHat and Uniform kernels must produce materially different
292+ /// bundles on the same window — otherwise the kernel selector is
293+ /// ineffective at runtime. Uses an asymmetric heterogeneous window
294+ /// (one outlier position carries content; others are blank) so that
295+ /// per-position kernel weights reshape the accumulated bundle in a
296+ /// way symmetric kernels can't equalize.
297+ #[ test]
298+ fn mexican_hat_bundle_differs_from_uniform_bundle ( ) {
299+ let subject_len = GrammaticalRole :: Subject . slice ( ) . 1
300+ - GrammaticalRole :: Subject . slice ( ) . 0 ;
301+ let radius = 5u32 ;
302+ let cap = ( 2 * radius + 1 ) as usize ;
303+ // Single outlier at position 1 (delta = -4). Uniform weights this
304+ // identically to focal; MexicanHat strongly attenuates it
305+ // (w(-4, 5) ≈ 0.26 vs w(0, 5) = 1.0). Normalization divides each
306+ // by its own Σ|w|, so the per-dim values differ across the
307+ // SUBJECT slice.
308+ let outlier_pos = 1usize ;
309+ let sentences: Vec < WindowedSentence > = ( 0 ..cap)
310+ . map ( |i| WindowedSentence {
311+ tokens : vec ! [ TokenWithRole {
312+ content_fp: vec![
313+ if i == outlier_pos { 1.0 } else { 0.0 } ;
314+ subject_len
315+ ] ,
316+ role: GrammaticalRole :: Subject ,
317+ } ] ,
318+ } )
319+ . collect ( ) ;
320+ let uni = bundle_sequence ( Kernel :: Uniform , radius, sentences. clone ( ) ) ;
321+ let mex = bundle_sequence ( Kernel :: MexicanHat , radius, sentences) ;
322+ assert_eq ! ( uni. fingerprint. len( ) , mex. fingerprint. len( ) ) ;
323+ let l2: f32 = uni
324+ . fingerprint
325+ . iter ( )
326+ . zip ( mex. fingerprint . iter ( ) )
327+ . map ( |( a, b) | ( a - b) * ( a - b) )
328+ . sum :: < f32 > ( )
329+ . sqrt ( ) ;
330+ assert ! (
331+ l2 > 1e-3 ,
332+ "MexicanHat bundle should differ from Uniform bundle, l2={l2}"
333+ ) ;
334+ }
335+
336+ /// Bundle normalization (HIGH from PR #279) makes the L2 norm
337+ /// invariant to kernel-shape magnitude. We assert all three kernels
338+ /// land in a loose [0.5, 1.5] band on a controlled SUBJECT-only window.
339+ #[ test]
340+ fn bundle_l2_norm_invariant_to_kernel ( ) {
341+ let subject_len = GrammaticalRole :: Subject . slice ( ) . 1
342+ - GrammaticalRole :: Subject . slice ( ) . 0 ;
343+ let sent = WindowedSentence {
344+ tokens : vec ! [ TokenWithRole {
345+ content_fp: vec![ 1.0 ; subject_len] ,
346+ role: GrammaticalRole :: Subject ,
347+ } ] ,
348+ } ;
349+ for k in [ Kernel :: Uniform , Kernel :: MexicanHat , Kernel :: Gaussian ] {
350+ let traj = fill_and_bundle ( k, 5 , sent. clone ( ) ) ;
351+ // Per-dim mean of |v| × sqrt(N_subj) ≈ L2 norm; we test L2 directly.
352+ let l2: f32 = traj
353+ . fingerprint
354+ . iter ( )
355+ . map ( |v| v * v)
356+ . sum :: < f32 > ( )
357+ . sqrt ( ) ;
358+ // Each SUBJECT dim sums to (Σ_i w_i) / (Σ_i |w_i|). For Uniform
359+ // and Gaussian (all-positive weights) this is exactly 1.0 per dim,
360+ // so L2 = sqrt(subject_len) ≈ 57.2. For Mexican-hat the negative
361+ // brim cancels part of the positive core, dropping the per-dim
362+ // value but keeping it within the same order of magnitude.
363+ // We loose-bound on L2 / sqrt(subject_len) ∈ [0.5, 1.5].
364+ let scale = ( subject_len as f32 ) . sqrt ( ) ;
365+ let norm_l2 = l2 / scale;
366+ assert ! (
367+ ( 0.5 ..=1.5 ) . contains( & norm_l2) ,
368+ "kernel {k:?}: normalized L2 {norm_l2} (raw {l2}) out of [0.5, 1.5]"
369+ ) ;
370+ }
371+ }
206372}
0 commit comments