@@ -20,10 +20,11 @@ use std::{
2020} ;
2121
2222use arrow:: array:: { ArrayData , ArrayDataBuilder , AsArray } ;
23- use arrow_array:: { new_empty_array, new_null_array, Array , ArrayRef , UInt64Array } ;
24- use arrow_buffer:: { ArrowNativeType , BooleanBuffer , BooleanBufferBuilder , NullBuffer } ;
23+ use arrow_array:: { new_empty_array, new_null_array, Array , ArrayRef , OffsetSizeTrait , UInt64Array } ;
24+ use arrow_buffer:: {
25+ ArrowNativeType , BooleanBuffer , BooleanBufferBuilder , NullBuffer , ScalarBuffer ,
26+ } ;
2527use arrow_schema:: DataType ;
26- use bytemuck:: try_cast_slice;
2728use lance_arrow:: DataTypeExt ;
2829use snafu:: location;
2930
@@ -252,43 +253,42 @@ impl FixedWidthDataBlock {
252253}
253254
254255#[ derive( Debug ) ]
255- pub struct VariableWidthDataBlockBuilder {
256- offsets : Vec < u32 > ,
256+ pub struct VariableWidthDataBlockBuilder < T : OffsetSizeTrait > {
257+ offsets : Vec < T > ,
257258 bytes : Vec < u8 > ,
258259}
259260
260- impl VariableWidthDataBlockBuilder {
261+ impl < T : OffsetSizeTrait > VariableWidthDataBlockBuilder < T > {
261262 fn new ( estimated_size_bytes : u64 ) -> Self {
262263 Self {
263- offsets : vec ! [ 0u32 ] ,
264+ offsets : vec ! [ T :: from_usize ( 0 ) . unwrap ( ) ] ,
264265 bytes : Vec :: with_capacity ( estimated_size_bytes as usize ) ,
265266 }
266267 }
267268}
268269
269- impl DataBlockBuilderImpl for VariableWidthDataBlockBuilder {
270+ impl < T : OffsetSizeTrait > DataBlockBuilderImpl for VariableWidthDataBlockBuilder < T > {
270271 fn append ( & mut self , data_block : & DataBlock , selection : Range < u64 > ) {
271272 let block = data_block. as_variable_width_ref ( ) . unwrap ( ) ;
272- assert ! ( block. bits_per_offset == 32 ) ;
273+ assert ! ( block. bits_per_offset == T :: get_byte_width ( ) as u8 * 8 ) ;
273274
274- let offsets: & [ u32 ] = try_cast_slice ( & block. offsets )
275- . expect ( "cast from a bits_per_offset=32 `VariableWidthDataBlock's offsets field field to &[32] should be fine." ) ;
275+ let offsets: ScalarBuffer < T > = block. offsets . try_clone ( ) . unwrap ( ) . borrow_to_typed_slice ( ) ;
276276
277277 let start_offset = offsets[ selection. start as usize ] ;
278278 let end_offset = offsets[ selection. end as usize ] ;
279279 let mut previous_len = self . bytes . len ( ) ;
280280
281281 self . bytes
282- . extend_from_slice ( & block. data [ start_offset as usize .. end_offset as usize ] ) ;
282+ . extend_from_slice ( & block. data [ start_offset. as_usize ( ) .. end_offset. as_usize ( ) ] ) ;
283283
284284 self . offsets . extend (
285285 offsets[ selection. start as usize ..selection. end as usize ]
286286 . iter ( )
287287 . zip ( & offsets[ selection. start as usize + 1 ..=selection. end as usize ] )
288288 . map ( |( & current, & next) | {
289289 let this_value_len = next - current;
290- previous_len += this_value_len as usize ;
291- previous_len as u32
290+ previous_len += this_value_len. as_usize ( ) ;
291+ T :: from_usize ( previous_len) . unwrap ( )
292292 } ) ,
293293 ) ;
294294 }
@@ -298,7 +298,7 @@ impl DataBlockBuilderImpl for VariableWidthDataBlockBuilder {
298298 DataBlock :: VariableWidth ( VariableWidthBlock {
299299 data : LanceBuffer :: Owned ( self . bytes ) ,
300300 offsets : LanceBuffer :: reinterpret_vec ( self . offsets ) ,
301- bits_per_offset : 32 ,
301+ bits_per_offset : T :: get_byte_width ( ) as u8 * 8 ,
302302 num_values,
303303 block_info : BlockInfo :: new ( ) ,
304304 } )
@@ -1085,7 +1085,13 @@ impl DataBlock {
10851085 }
10861086 Self :: VariableWidth ( inner) => {
10871087 if inner. bits_per_offset == 32 {
1088- Box :: new ( VariableWidthDataBlockBuilder :: new ( estimated_size_bytes) )
1088+ Box :: new ( VariableWidthDataBlockBuilder :: < i32 > :: new (
1089+ estimated_size_bytes,
1090+ ) )
1091+ } else if inner. bits_per_offset == 64 {
1092+ Box :: new ( VariableWidthDataBlockBuilder :: < i64 > :: new (
1093+ estimated_size_bytes,
1094+ ) )
10891095 } else {
10901096 todo ! ( )
10911097 }
0 commit comments