@@ -19,12 +19,16 @@ use crate::encodings::turboquant::vtable::TurboQuant;
1919/// TurboQuant array data.
2020///
2121/// TurboQuant is a lossy vector quantization encoding for [`Vector`](crate::vector::Vector)
22- /// extension arrays. It stores quantized coordinate codes and per-vector norms , along with shared
22+ /// extension arrays. It stores quantized coordinate codes for unit-norm vectors , along with shared
2323/// codebook centroids and the parameters of the current structured rotation.
2424///
25+ /// Norms should be stored externally in the [`L2Denorm`](crate::scalar_fns::l2_denorm::L2Denorm)
26+ /// `ScalarFnArray` wrapper.
27+ ///
2528/// See the [module docs](crate::encodings::turboquant) for algorithmic details.
2629///
27- /// A degenerate TurboQuant array has zero rows and `bit_width == 0`, with all slots empty.
30+ /// Note that degenerate TurboQuant arrays have zero rows and `bit_width == 0`, with all slots
31+ /// empty.
2832#[ derive( Clone , Debug ) ]
2933pub struct TurboQuantData {
3034 /// The vector dimension `d`, cached from the `FixedSizeList` storage dtype's list size.
@@ -95,16 +99,21 @@ impl TurboQuantData {
9599 pub fn validate (
96100 dtype : & DType ,
97101 codes : & ArrayRef ,
98- norms : & ArrayRef ,
99102 centroids : & ArrayRef ,
100103 rotation_signs : & ArrayRef ,
101104 ) -> VortexResult < ( ) > {
102105 let vector_metadata = TurboQuant :: validate_dtype ( dtype) ?;
103106 let dimension = vector_metadata. dimensions ( ) ;
104107 let padded_dim = dimension. next_power_of_two ( ) ;
105108
109+ // TurboQuant arrays are always non-nullable. Nullability should be handled by the external
110+ // L2Denorm ScalarFnArray wrapper.
111+ vortex_ensure ! (
112+ !dtype. is_nullable( ) ,
113+ "TurboQuant dtype must be non-nullable, got {dtype}" ,
114+ ) ;
115+
106116 // Codes must be a non-nullable FixedSizeList<u8> with list_size == padded_dim.
107- // Null vectors are represented by all-zero codes since validity lives in the norms array.
108117 let expected_codes_dtype = DType :: FixedSizeList (
109118 Arc :: new ( DType :: Primitive ( PType :: U8 , Nullability :: NonNullable ) ) ,
110119 padded_dim,
@@ -116,23 +125,6 @@ impl TurboQuantData {
116125 "codes dtype does not match expected {expected_codes_dtype}" ,
117126 ) ;
118127
119- let num_rows = codes. len ( ) ;
120- vortex_ensure_eq ! (
121- norms. len( ) ,
122- num_rows,
123- "norms length must match codes length" ,
124- ) ;
125-
126- // Norms dtype must match the element ptype of the Vector, with the parent's nullability.
127- // Norms carry the validity of the entire TurboQuant array.
128- let element_ptype = vector_metadata. element_ptype ( ) ;
129- let expected_norms_dtype = DType :: Primitive ( element_ptype, dtype. nullability ( ) ) ;
130- vortex_ensure_eq ! (
131- * norms. dtype( ) ,
132- expected_norms_dtype,
133- "norms dtype does not match expected {expected_norms_dtype}" ,
134- ) ;
135-
136128 // Centroids are always f32 regardless of element type.
137129 let centroids_dtype = DType :: Primitive ( PType :: F32 , Nullability :: NonNullable ) ;
138130 vortex_ensure_eq ! (
@@ -154,6 +146,7 @@ impl TurboQuantData {
154146 "rotation_signs dtype does not match expected {expected_signs_dtype}" ,
155147 ) ;
156148 // Degenerate (empty) case: all children must be empty, and bit_width is 0.
149+ let num_rows = codes. len ( ) ;
157150 if num_rows == 0 {
158151 vortex_ensure ! (
159152 centroids. is_empty( ) ,
@@ -198,13 +191,11 @@ impl TurboQuantData {
198191
199192 pub ( crate ) fn make_slots (
200193 codes : ArrayRef ,
201- norms : ArrayRef ,
202194 centroids : ArrayRef ,
203195 rotation_signs : ArrayRef ,
204196 ) -> Vec < Option < ArrayRef > > {
205197 let mut slots = vec ! [ None ; Slot :: COUNT ] ;
206198 slots[ Slot :: Codes as usize ] = Some ( codes) ;
207- slots[ Slot :: Norms as usize ] = Some ( norms) ;
208199 slots[ Slot :: Centroids as usize ] = Some ( centroids) ;
209200 slots[ Slot :: RotationSigns as usize ] = Some ( rotation_signs) ;
210201 slots
@@ -242,12 +233,6 @@ pub trait TurboQuantArrayExt: TypedArrayRef<TurboQuant> {
242233 . vortex_expect ( "TurboQuantArray codes slot" )
243234 }
244235
245- fn norms ( & self ) -> & ArrayRef {
246- self . as_ref ( ) . slots ( ) [ Slot :: Norms as usize ]
247- . as_ref ( )
248- . vortex_expect ( "TurboQuantArray norms slot" )
249- }
250-
251236 fn centroids ( & self ) -> & ArrayRef {
252237 self . as_ref ( ) . slots ( ) [ Slot :: Centroids as usize ]
253238 . as_ref ( )
0 commit comments