@@ -469,16 +469,178 @@ mod tests {
469469 }
470470
471471 #[ test]
472- fn rejects_dimension_below_2 ( ) {
473- let mut buf = BufferMut :: < f32 > :: with_capacity ( 1 ) ;
474- buf. push ( 1.0 ) ;
475- let elements = PrimitiveArray :: new :: < f32 > ( buf. freeze ( ) , Validity :: NonNullable ) ;
476- let fsl = FixedSizeListArray :: try_new ( elements. into_array ( ) , 1 , Validity :: NonNullable , 1 )
477- . unwrap ( ) ;
472+ fn mse_rejects_dimension_below_2 ( ) {
473+ let fsl = make_fsl_dim1 ( ) ;
478474 let config = TurboQuantConfig {
479475 bit_width : 2 ,
480476 seed : Some ( 0 ) ,
481477 } ;
482478 assert ! ( turboquant_encode_mse( & fsl, & config) . is_err( ) ) ;
483479 }
480+
481+ #[ test]
482+ fn qjl_rejects_dimension_below_2 ( ) {
483+ let fsl = make_fsl_dim1 ( ) ;
484+ let config = TurboQuantConfig {
485+ bit_width : 3 ,
486+ seed : Some ( 0 ) ,
487+ } ;
488+ assert ! ( turboquant_encode_qjl( & fsl, & config) . is_err( ) ) ;
489+ }
490+
491+ fn make_fsl_dim1 ( ) -> FixedSizeListArray {
492+ let mut buf = BufferMut :: < f32 > :: with_capacity ( 1 ) ;
493+ buf. push ( 1.0 ) ;
494+ let elements = PrimitiveArray :: new :: < f32 > ( buf. freeze ( ) , Validity :: NonNullable ) ;
495+ FixedSizeListArray :: try_new ( elements. into_array ( ) , 1 , Validity :: NonNullable , 1 ) . unwrap ( )
496+ }
497+
498+ // -----------------------------------------------------------------------
499+ // Verification tests for stored metadata
500+ // -----------------------------------------------------------------------
501+
502+ /// Verify that the centroids stored in the MSE array match what get_centroids() computes.
503+ #[ test]
504+ fn stored_centroids_match_computed ( ) -> VortexResult < ( ) > {
505+ let fsl = make_fsl ( 10 , 128 , 42 ) ;
506+ let config = TurboQuantConfig {
507+ bit_width : 3 ,
508+ seed : Some ( 123 ) ,
509+ } ;
510+ let encoded = turboquant_encode_mse ( & fsl, & config) ?;
511+
512+ let mut ctx = SESSION . create_execution_ctx ( ) ;
513+ let stored_centroids_prim = encoded
514+ . centroids ( )
515+ . clone ( )
516+ . execute :: < PrimitiveArray > ( & mut ctx) ?;
517+ let stored = stored_centroids_prim. as_slice :: < f32 > ( ) ;
518+
519+ let padded_dim = encoded. padded_dim ( ) ;
520+ let computed = crate :: centroids:: get_centroids ( padded_dim, 3 ) ?;
521+
522+ assert_eq ! ( stored. len( ) , computed. len( ) ) ;
523+ for i in 0 ..stored. len ( ) {
524+ assert_eq ! ( stored[ i] , computed[ i] , "Centroid mismatch at {i}" ) ;
525+ }
526+ Ok ( ( ) )
527+ }
528+
529+ /// Verify that stored rotation signs produce identical decode to seed-based decode.
530+ ///
531+ /// Encodes the same data twice: once with the new path (stored signs), and
532+ /// once by manually recomputing the rotation from the seed. Both should
533+ /// produce identical output.
534+ #[ test]
535+ fn stored_rotation_signs_produce_correct_decode ( ) -> VortexResult < ( ) > {
536+ use crate :: rotation:: RotationMatrix ;
537+
538+ let fsl = make_fsl ( 20 , 128 , 42 ) ;
539+ let config = TurboQuantConfig {
540+ bit_width : 3 ,
541+ seed : Some ( 123 ) ,
542+ } ;
543+ let encoded = turboquant_encode_mse ( & fsl, & config) ?;
544+
545+ // Decode via the stored-signs path (normal decode).
546+ let mut ctx = SESSION . create_execution_ctx ( ) ;
547+ let decoded_fsl = encoded
548+ . clone ( )
549+ . into_array ( )
550+ . execute :: < FixedSizeListArray > ( & mut ctx) ?;
551+ let decoded = decoded_fsl. elements ( ) . to_canonical ( ) ?. into_primitive ( ) ;
552+ let decoded_slice = decoded. as_slice :: < f32 > ( ) ;
553+
554+ // Verify stored signs match seed-derived signs.
555+ let rot_from_seed = RotationMatrix :: try_new ( 123 , 128 ) ?;
556+ let exported = rot_from_seed. export_inverse_signs_bool_array ( ) ;
557+ let stored_signs = encoded
558+ . rotation_signs ( )
559+ . clone ( )
560+ . execute :: < vortex_array:: arrays:: BoolArray > ( & mut ctx) ?;
561+
562+ assert_eq ! ( exported. len( ) , stored_signs. len( ) ) ;
563+ let exp_buf = exported. to_bit_buffer ( ) ;
564+ let stored_buf = stored_signs. to_bit_buffer ( ) ;
565+ for i in 0 ..exported. len ( ) {
566+ assert_eq ! (
567+ exp_buf. value( i) ,
568+ stored_buf. value( i) ,
569+ "Sign mismatch at bit {i}"
570+ ) ;
571+ }
572+
573+ // Also verify decode output is non-empty and has expected size.
574+ assert_eq ! ( decoded_slice. len( ) , 20 * 128 ) ;
575+ Ok ( ( ) )
576+ }
577+
578+ // -----------------------------------------------------------------------
579+ // QJL-specific quality tests
580+ // -----------------------------------------------------------------------
581+
582+ /// Verify that QJL's MSE component (at bit_width-1) satisfies the theoretical bound.
583+ #[ rstest]
584+ #[ case( 128 , 3 ) ]
585+ #[ case( 128 , 4 ) ]
586+ #[ case( 256 , 3 ) ]
587+ fn qjl_mse_within_theoretical_bound (
588+ #[ case] dim : usize ,
589+ #[ case] bit_width : u8 ,
590+ ) -> VortexResult < ( ) > {
591+ let num_rows = 200 ;
592+ let fsl = make_fsl ( num_rows, dim, 42 ) ;
593+ let config = TurboQuantConfig {
594+ bit_width,
595+ seed : Some ( 789 ) ,
596+ } ;
597+ let ( original, decoded) = encode_decode_qjl ( & fsl, & config) ?;
598+
599+ let normalized_mse = per_vector_normalized_mse ( & original, & decoded, dim, num_rows) ;
600+
601+ // QJL at b bits uses (b-1)-bit MSE plus a correction term.
602+ // The MSE should be at most the (b-1)-bit theoretical bound, though
603+ // in practice the QJL correction often improves it further.
604+ let mse_bound = theoretical_mse_bound ( bit_width - 1 ) ;
605+ assert ! (
606+ normalized_mse < mse_bound,
607+ "QJL MSE {normalized_mse:.6} exceeds (b-1)-bit bound {mse_bound:.6} \
608+ for dim={dim}, bits={bit_width}",
609+ ) ;
610+ Ok ( ( ) )
611+ }
612+
613+ /// Verify that high-bitwidth QJL (8-9 bits) achieves very low distortion.
614+ #[ rstest]
615+ #[ case( 128 , 8 ) ]
616+ #[ case( 128 , 9 ) ]
617+ fn high_bitwidth_qjl_is_small ( #[ case] dim : usize , #[ case] bit_width : u8 ) -> VortexResult < ( ) > {
618+ let num_rows = 200 ;
619+ let fsl = make_fsl ( num_rows, dim, 42 ) ;
620+
621+ // Compare against 4-bit QJL as reference ceiling.
622+ let config_4bit = TurboQuantConfig {
623+ bit_width : 4 ,
624+ seed : Some ( 789 ) ,
625+ } ;
626+ let ( original_4, decoded_4) = encode_decode_qjl ( & fsl, & config_4bit) ?;
627+ let mse_4bit = per_vector_normalized_mse ( & original_4, & decoded_4, dim, num_rows) ;
628+
629+ let config = TurboQuantConfig {
630+ bit_width,
631+ seed : Some ( 789 ) ,
632+ } ;
633+ let ( original, decoded) = encode_decode_qjl ( & fsl, & config) ?;
634+ let mse = per_vector_normalized_mse ( & original, & decoded, dim, num_rows) ;
635+
636+ assert ! (
637+ mse < mse_4bit,
638+ "{bit_width}-bit QJL MSE ({mse:.6}) should be < 4-bit ({mse_4bit:.6})"
639+ ) ;
640+ assert ! (
641+ mse < 0.01 ,
642+ "{bit_width}-bit QJL MSE ({mse:.6}) should be < 1%"
643+ ) ;
644+ Ok ( ( ) )
645+ }
484646}
0 commit comments