@@ -88,11 +88,23 @@ use std::sync::Arc;
8888///
8989#[ derive( Clone ) ]
9090pub struct FixedSizeBinaryArray {
91- data_type : DataType , // Must be DataType::FixedSizeBinary(value_length)
91+ /// Must be DataType::FixedSizeBinary(value_size)
92+ data_type : DataType ,
93+ /// `len` values, each `value_size` bytes
9294 value_data : Buffer ,
95+ /// Optional Null Buffer
9396 nulls : Option < NullBuffer > ,
97+ /// Number of elements in the array
9498 len : usize ,
95- value_length : i32 ,
99+ /// size of each element, validated to fit in a positive i32
100+ ///
101+ /// Corresponds to the [`byteWidth` field] in the Arrow Spec
102+ ///
103+ /// note: Arrow stores `value_len` using i32. This implementation stores it
104+ /// as a usize to ensure correct offset calculations.
105+ ///
106+ /// [`byteWidth` field]: https://github.com/apache/arrow/blob/2a89d03bbefd620b42126b8e00f8ae57e99cd638/format/Schema.fbs#L211
107+ value_size : usize ,
96108}
97109
98110impl FixedSizeBinaryArray {
@@ -124,7 +136,6 @@ impl FixedSizeBinaryArray {
124136 /// * `value_length < 0`
125137 /// * `values.len() / value_length != nulls.len()`
126138 /// * `value_length == 0 && values.len() != 0`
127- /// * `len * value_length > i32::MAX`
128139 pub fn try_new (
129140 value_length : i32 ,
130141 values : Buffer ,
@@ -163,44 +174,15 @@ impl FixedSizeBinaryArray {
163174 }
164175 } ;
165176
166- Self :: validate_lengths ( value_size, len) ?;
167-
168177 Ok ( Self {
169178 data_type,
170179 value_data : values,
171- value_length ,
180+ value_size ,
172181 nulls,
173182 len,
174183 } )
175184 }
176185
177- /// Some calculations below use i32 arithmetic which can overflow when
178- /// valid offsets are past i32::MAX. Until that is solved for real do not
179- /// permit constructing any FixedSizeBinaryArray that has a valid offset
180- /// past i32::MAX
181- fn validate_lengths ( value_size : usize , len : usize ) -> Result < ( ) , ArrowError > {
182- // the offset is also calculated for the next element (i + 1) so
183- // check `len` (not last element index) to ensure that all offsets are valid
184- let max_offset = value_size. checked_mul ( len) . ok_or_else ( || {
185- ArrowError :: InvalidArgumentError ( format ! (
186- "FixedSizeBinaryArray error: value size {value_size} * len {len} exceeds maximum valid offset"
187- ) )
188- } ) ?;
189-
190- let max_valid_offset: usize = i32:: MAX . try_into ( ) . map_err ( |_| {
191- ArrowError :: InvalidArgumentError ( format ! (
192- "FixedSizeBinaryArray error: maximum valid offset exceeds i32::MAX, got {max_offset}"
193- ) )
194- } ) ?;
195-
196- if max_offset > max_valid_offset {
197- return Err ( ArrowError :: InvalidArgumentError ( format ! (
198- "FixedSizeBinaryArray error: value size {value_size} * length {len} exceeds maximum valid offset of {max_valid_offset}"
199- ) ) ) ;
200- } ;
201- Ok ( ( ) )
202- }
203-
204186 /// Create a new [`FixedSizeBinaryArray`] of length `len` where all values are null
205187 ///
206188 /// # Panics
@@ -209,26 +191,25 @@ impl FixedSizeBinaryArray {
209191 ///
210192 /// * `value_length < 0`
211193 /// * `value_length * len` would overflow `usize`
212- /// * `value_length * len > i32::MAX`
213194 /// * `value_length * len * 8` would overflow `usize`
214195 pub fn new_null ( value_length : i32 , len : usize ) -> Self {
215196 const BITS_IN_A_BYTE : usize = 8 ;
216197 let value_size = value_length. to_usize ( ) . unwrap ( ) ;
217- Self :: validate_lengths ( value_size, len) . unwrap ( ) ;
218198 let capacity_in_bytes = value_size. checked_mul ( len) . unwrap ( ) ;
219199 let capacity_in_bits = capacity_in_bytes. checked_mul ( BITS_IN_A_BYTE ) . unwrap ( ) ;
220200 Self {
221201 data_type : DataType :: FixedSizeBinary ( value_length) ,
222202 value_data : MutableBuffer :: new_null ( capacity_in_bits) . into ( ) ,
223203 nulls : Some ( NullBuffer :: new_null ( len) ) ,
224- value_length ,
204+ value_size ,
225205 len,
226206 }
227207 }
228208
229209 /// Deconstruct this array into its constituent parts
230210 pub fn into_parts ( self ) -> ( i32 , Buffer , Option < NullBuffer > ) {
231- ( self . value_length , self . value_data , self . nulls )
211+ let value_length = self . value_length ( ) ;
212+ ( value_length, self . value_data , self . nulls )
232213 }
233214
234215 /// Returns the element at index `i` as a byte slice.
@@ -239,19 +220,14 @@ impl FixedSizeBinaryArray {
239220 /// # Panics
240221 /// Panics if index `i` is out of bounds.
241222 pub fn value ( & self , i : usize ) -> & [ u8 ] {
223+ let len = self . len ( ) ;
242224 assert ! (
243- i < self . len( ) ,
244- "Trying to access an element at index {} from a FixedSizeBinaryArray of length {}" ,
245- i,
246- self . len( )
225+ i < len,
226+ "Trying to access an element at index {i} from a FixedSizeBinaryArray of length {len}" ,
247227 ) ;
248- let offset = i + self . offset ( ) ;
228+ let position = i * self . value_size ;
249229 unsafe {
250- let pos = self . value_offset_at ( offset) ;
251- std:: slice:: from_raw_parts (
252- self . value_data . as_ptr ( ) . offset ( pos as isize ) ,
253- ( self . value_offset_at ( offset + 1 ) - pos) as usize ,
254- )
230+ std:: slice:: from_raw_parts ( self . value_data . as_ptr ( ) . add ( position) , self . value_size )
255231 }
256232 }
257233
@@ -265,30 +241,46 @@ impl FixedSizeBinaryArray {
265241 /// Caller is responsible for ensuring that the index is within the bounds
266242 /// of the array
267243 pub unsafe fn value_unchecked ( & self , i : usize ) -> & [ u8 ] {
268- let offset = i + self . offset ( ) ;
269- let pos = self . value_offset_at ( offset) ;
244+ let position = i * self . value_size ;
270245 unsafe {
271- std:: slice:: from_raw_parts (
272- self . value_data . as_ptr ( ) . offset ( pos as isize ) ,
273- ( self . value_offset_at ( offset + 1 ) - pos) as usize ,
274- )
246+ std:: slice:: from_raw_parts ( self . value_data . as_ptr ( ) . add ( position) , self . value_size )
275247 }
276248 }
277249
278250 /// Returns the offset for the element at index `i`.
279251 ///
280252 /// Note this doesn't do any bound checking, for performance reason.
253+ ///
254+ /// # Panics
255+ ///
256+ /// Panics if the computed byte offset exceeds `i32::MAX`.
257+ #[ deprecated( since = "59.0.0" , note = "Use i * value_size() instead" ) ]
281258 #[ inline]
282259 pub fn value_offset ( & self , i : usize ) -> i32 {
283- self . value_offset_at ( self . offset ( ) + i )
260+ self . value_length ( ) * i as i32
284261 }
285262
286263 /// Returns the length for an element.
287264 ///
288265 /// All elements have the same length as the array is a fixed size.
266+ ///
267+ /// Returns an `i32` to be compatible with the Arrow spec.
268+ ///
269+ /// Use [`Self::value_size`] to return a `usize`.
289270 #[ inline]
290271 pub fn value_length ( & self ) -> i32 {
291- self . value_length
272+ // This is safe: constructor validated that value_size was a valid i32
273+ self . value_size as i32
274+ }
275+
276+ /// Return the length for an element, as a usize.
277+ ///
278+ /// All elements have the same length as the array is a fixed size.
279+ ///
280+ /// Note: This value will always fit, without overflow, into an i32
281+ #[ inline]
282+ pub fn value_size ( & self ) -> usize {
283+ self . value_size
292284 }
293285
294286 /// Returns the values of this array.
@@ -311,14 +303,16 @@ impl FixedSizeBinaryArray {
311303 offset. saturating_add( len) <= self . len,
312304 "the length + offset of the sliced FixedSizeBinaryArray cannot exceed the existing length"
313305 ) ;
314-
315- let size = self . value_length as usize ;
306+ let offset_bytes = offset
307+ . checked_mul ( self . value_size )
308+ . expect ( "offset overflow" ) ;
309+ let len_bytes = len. checked_mul ( self . value_size ) . expect ( "offset overflow" ) ;
316310
317311 Self {
318312 data_type : self . data_type . clone ( ) ,
319313 nulls : self . nulls . as_ref ( ) . map ( |n| n. slice ( offset, len) ) ,
320- value_length : self . value_length ,
321- value_data : self . value_data . slice_with_length ( offset * size , len * size ) ,
314+ value_size : self . value_size ,
315+ value_data : self . value_data . slice_with_length ( offset_bytes , len_bytes ) ,
322316 len,
323317 }
324318 }
@@ -420,7 +414,6 @@ impl FixedSizeBinaryArray {
420414 let nulls = NullBuffer :: from_unsliced_buffer ( null_buf, len) ;
421415
422416 let value_size = value_size. unwrap_or ( 0 ) ;
423- Self :: validate_lengths ( value_size, len) ?;
424417 let value_length = value_size. try_into ( ) . map_err ( |_| {
425418 ArrowError :: InvalidArgumentError ( format ! (
426419 "FixedSizeBinaryArray value length exceeds i32, got {value_size}"
@@ -430,7 +423,7 @@ impl FixedSizeBinaryArray {
430423 data_type : DataType :: FixedSizeBinary ( value_length) ,
431424 value_data : buffer. into ( ) ,
432425 nulls,
433- value_length ,
426+ value_size ,
434427 len,
435428 } )
436429 }
@@ -514,14 +507,13 @@ impl FixedSizeBinaryArray {
514507 } ) ?;
515508
516509 let nulls = NullBuffer :: from_unsliced_buffer ( null_buf, len) ;
517- Self :: validate_lengths ( value_size, len) ?;
518510
519511 Ok ( Self {
520512 data_type : DataType :: FixedSizeBinary ( value_length) ,
521513 value_data : buffer. into ( ) ,
522514 nulls,
523515 len,
524- value_length ,
516+ value_size ,
525517 } )
526518 }
527519
@@ -583,7 +575,6 @@ impl FixedSizeBinaryArray {
583575 }
584576
585577 let value_size = value_size. unwrap_or ( 0 ) ;
586- Self :: validate_lengths ( value_size, len) ?;
587578 let value_length = value_size. try_into ( ) . map_err ( |_| {
588579 ArrowError :: InvalidArgumentError ( format ! (
589580 "FixedSizeBinaryArray value length exceeds i32, got {value_size}"
@@ -593,16 +584,11 @@ impl FixedSizeBinaryArray {
593584 data_type : DataType :: FixedSizeBinary ( value_length) ,
594585 value_data : buffer. into ( ) ,
595586 nulls : None ,
596- value_length ,
587+ value_size ,
597588 len,
598589 } )
599590 }
600591
601- #[ inline]
602- fn value_offset_at ( & self , i : usize ) -> i32 {
603- self . value_length * i as i32
604- }
605-
606592 /// constructs a new iterator
607593 pub fn iter ( & self ) -> FixedSizeBinaryIter < ' _ > {
608594 FixedSizeBinaryIter :: new ( self )
@@ -626,8 +612,6 @@ impl From<ArrayData> for FixedSizeBinaryArray {
626612 let value_size = value_length
627613 . to_usize ( )
628614 . expect ( "FixedSizeBinaryArray value length must be non-negative" ) ;
629- Self :: validate_lengths ( value_size, len)
630- . expect ( "FixedSizeBinaryArray offsets must fit within i32" ) ;
631615 let value_data = buffers[ 0 ] . slice_with_length (
632616 offset. checked_mul ( value_size) . expect ( "offset overflow" ) ,
633617 len. checked_mul ( value_size) . expect ( "length overflow" ) ,
@@ -638,7 +622,7 @@ impl From<ArrayData> for FixedSizeBinaryArray {
638622 nulls,
639623 len,
640624 value_data,
641- value_length ,
625+ value_size ,
642626 }
643627 }
644628}
@@ -849,7 +833,6 @@ mod tests {
849833 fixed_size_binary_array. value( 2 )
850834 ) ;
851835 assert_eq ! ( 5 , fixed_size_binary_array. value_length( ) ) ;
852- assert_eq ! ( 10 , fixed_size_binary_array. value_offset( 2 ) ) ;
853836 for i in 0 ..3 {
854837 assert ! ( fixed_size_binary_array. is_valid( i) ) ;
855838 assert ! ( !fixed_size_binary_array. is_null( i) ) ;
@@ -872,9 +855,7 @@ mod tests {
872855 fixed_size_binary_array. value( 1 )
873856 ) ;
874857 assert_eq ! ( 2 , fixed_size_binary_array. len( ) ) ;
875- assert_eq ! ( 0 , fixed_size_binary_array. value_offset( 0 ) ) ;
876858 assert_eq ! ( 5 , fixed_size_binary_array. value_length( ) ) ;
877- assert_eq ! ( 5 , fixed_size_binary_array. value_offset( 1 ) ) ;
878859 }
879860
880861 #[ test]
@@ -1122,26 +1103,6 @@ mod tests {
11221103 array. value ( 4 ) ;
11231104 }
11241105
1125- #[ test]
1126- fn test_validate_lengths_allows_empty_array ( ) {
1127- FixedSizeBinaryArray :: validate_lengths ( 1024 , 0 ) . unwrap ( ) ;
1128- }
1129-
1130- #[ test]
1131- fn test_validate_lengths_allows_i32_max_offset ( ) {
1132- FixedSizeBinaryArray :: validate_lengths ( 1 , i32:: MAX as usize ) . unwrap ( ) ;
1133- FixedSizeBinaryArray :: validate_lengths ( 262_176 , 8191 ) . unwrap ( ) ;
1134- }
1135-
1136- #[ test]
1137- fn test_validate_lengths_rejects_offset_past_i32_max ( ) {
1138- let err = FixedSizeBinaryArray :: validate_lengths ( 262_177 , 8192 ) . unwrap_err ( ) ;
1139- assert_eq ! (
1140- err. to_string( ) ,
1141- "Invalid argument error: FixedSizeBinaryArray error: value size 262177 * length 8192 exceeds maximum valid offset of 2147483647" ,
1142- ) ;
1143- }
1144-
11451106 #[ test]
11461107 fn test_constructors ( ) {
11471108 let buffer = Buffer :: from_vec ( vec ! [ 0_u8 ; 10 ] ) ;
0 commit comments