@@ -468,6 +468,121 @@ impl<'a> TypedElements<'a> {
468468 )
469469 }
470470
471+ /// Check if value at given index is null.
472+ #[ inline]
473+ fn is_null_at ( & self , idx : usize ) -> bool {
474+ match self {
475+ TypedElements :: Boolean ( arr) => arr. is_null ( idx) ,
476+ TypedElements :: Int8 ( arr) => arr. is_null ( idx) ,
477+ TypedElements :: Int16 ( arr) => arr. is_null ( idx) ,
478+ TypedElements :: Int32 ( arr) => arr. is_null ( idx) ,
479+ TypedElements :: Int64 ( arr) => arr. is_null ( idx) ,
480+ TypedElements :: Float32 ( arr) => arr. is_null ( idx) ,
481+ TypedElements :: Float64 ( arr) => arr. is_null ( idx) ,
482+ TypedElements :: Date32 ( arr) => arr. is_null ( idx) ,
483+ TypedElements :: TimestampMicro ( arr) => arr. is_null ( idx) ,
484+ TypedElements :: Decimal128 ( arr, _) => arr. is_null ( idx) ,
485+ TypedElements :: String ( arr) => arr. is_null ( idx) ,
486+ TypedElements :: LargeString ( arr) => arr. is_null ( idx) ,
487+ TypedElements :: Binary ( arr) => arr. is_null ( idx) ,
488+ TypedElements :: LargeBinary ( arr) => arr. is_null ( idx) ,
489+ TypedElements :: Other ( arr, _) => arr. is_null ( idx) ,
490+ }
491+ }
492+
493+ /// Check if this is a fixed-width type (value fits in 8-byte slot).
494+ #[ inline]
495+ fn is_fixed_width ( & self ) -> bool {
496+ match self {
497+ TypedElements :: Boolean ( _)
498+ | TypedElements :: Int8 ( _)
499+ | TypedElements :: Int16 ( _)
500+ | TypedElements :: Int32 ( _)
501+ | TypedElements :: Int64 ( _)
502+ | TypedElements :: Float32 ( _)
503+ | TypedElements :: Float64 ( _)
504+ | TypedElements :: Date32 ( _)
505+ | TypedElements :: TimestampMicro ( _) => true ,
506+ TypedElements :: Decimal128 ( _, p) => * p <= MAX_LONG_DIGITS ,
507+ _ => false ,
508+ }
509+ }
510+
511+ /// Get fixed-width value as i64 for the 8-byte field slot.
512+ #[ inline]
513+ fn get_fixed_value ( & self , idx : usize ) -> i64 {
514+ match self {
515+ TypedElements :: Boolean ( arr) => {
516+ if arr. value ( idx) { 1 } else { 0 }
517+ }
518+ TypedElements :: Int8 ( arr) => arr. value ( idx) as i64 ,
519+ TypedElements :: Int16 ( arr) => arr. value ( idx) as i64 ,
520+ TypedElements :: Int32 ( arr) => arr. value ( idx) as i64 ,
521+ TypedElements :: Int64 ( arr) => arr. value ( idx) ,
522+ TypedElements :: Float32 ( arr) => ( arr. value ( idx) . to_bits ( ) as i32 ) as i64 ,
523+ TypedElements :: Float64 ( arr) => arr. value ( idx) . to_bits ( ) as i64 ,
524+ TypedElements :: Date32 ( arr) => arr. value ( idx) as i64 ,
525+ TypedElements :: TimestampMicro ( arr) => arr. value ( idx) ,
526+ TypedElements :: Decimal128 ( arr, _) => arr. value ( idx) as i64 ,
527+ _ => 0 , // Should not be called for variable-length types
528+ }
529+ }
530+
531+ /// Write variable-length data to buffer. Returns length written (0 for fixed-width).
532+ fn write_variable_value (
533+ & self ,
534+ buffer : & mut Vec < u8 > ,
535+ idx : usize ,
536+ base_offset : usize ,
537+ ) -> CometResult < usize > {
538+ match self {
539+ TypedElements :: String ( arr) => {
540+ let bytes = arr. value ( idx) . as_bytes ( ) ;
541+ let len = bytes. len ( ) ;
542+ buffer. extend_from_slice ( bytes) ;
543+ let padding = round_up_to_8 ( len) - len;
544+ buffer. extend ( std:: iter:: repeat_n ( 0u8 , padding) ) ;
545+ Ok ( len)
546+ }
547+ TypedElements :: LargeString ( arr) => {
548+ let bytes = arr. value ( idx) . as_bytes ( ) ;
549+ let len = bytes. len ( ) ;
550+ buffer. extend_from_slice ( bytes) ;
551+ let padding = round_up_to_8 ( len) - len;
552+ buffer. extend ( std:: iter:: repeat_n ( 0u8 , padding) ) ;
553+ Ok ( len)
554+ }
555+ TypedElements :: Binary ( arr) => {
556+ let bytes = arr. value ( idx) ;
557+ let len = bytes. len ( ) ;
558+ buffer. extend_from_slice ( bytes) ;
559+ let padding = round_up_to_8 ( len) - len;
560+ buffer. extend ( std:: iter:: repeat_n ( 0u8 , padding) ) ;
561+ Ok ( len)
562+ }
563+ TypedElements :: LargeBinary ( arr) => {
564+ let bytes = arr. value ( idx) ;
565+ let len = bytes. len ( ) ;
566+ buffer. extend_from_slice ( bytes) ;
567+ let padding = round_up_to_8 ( len) - len;
568+ buffer. extend ( std:: iter:: repeat_n ( 0u8 , padding) ) ;
569+ Ok ( len)
570+ }
571+ TypedElements :: Decimal128 ( arr, precision) if * precision > MAX_LONG_DIGITS => {
572+ let bytes = i128_to_spark_decimal_bytes ( arr. value ( idx) ) ;
573+ let len = bytes. len ( ) ;
574+ buffer. extend_from_slice ( & bytes) ;
575+ let padding = round_up_to_8 ( len) - len;
576+ buffer. extend ( std:: iter:: repeat_n ( 0u8 , padding) ) ;
577+ Ok ( len)
578+ }
579+ TypedElements :: Other ( arr, element_type) => {
580+ write_nested_variable_to_buffer ( buffer, element_type, arr, idx, base_offset)
581+ }
582+ _ => Ok ( 0 ) , // Fixed-width types
583+ }
584+ }
585+
471586 /// Write a range of elements to buffer in UnsafeArrayData format.
472587 /// Returns the total bytes written (including header).
473588 fn write_range_to_buffer (
@@ -1727,6 +1842,8 @@ fn write_array_element(buffer: &mut [u8], data_type: &DataType, value: i64, offs
17271842
17281843/// Writes a struct value directly to the buffer.
17291844/// Returns the unpadded length written.
1845+ ///
1846+ /// Processes each field using inline type dispatch to avoid allocation overhead.
17301847#[ inline]
17311848fn write_struct_to_buffer (
17321849 buffer : & mut Vec < u8 > ,
@@ -1744,43 +1861,79 @@ fn write_struct_to_buffer(
17441861 // Reserve space for fixed-width portion (zeros for null bits and field slots)
17451862 buffer. resize ( struct_start + nested_fixed_size, 0 ) ;
17461863
1747- // Write each field of the struct
1864+ // Write each field with inline type handling (no allocation)
17481865 for ( field_idx, field) in fields. iter ( ) . enumerate ( ) {
17491866 let column = struct_array. column ( field_idx) ;
1750- let is_null = column . is_null ( row_idx ) ;
1867+ let data_type = field . data_type ( ) ;
17511868
1752- if is_null {
1869+ if column . is_null ( row_idx ) {
17531870 // Set null bit in nested struct
1754- let word_idx = field_idx / 64 ;
1755- let bit_idx = field_idx % 64 ;
1756- let word_offset = struct_start + word_idx * 8 ;
1757- let mut word =
1758- i64:: from_le_bytes ( buffer[ word_offset..word_offset + 8 ] . try_into ( ) . unwrap ( ) ) ;
1759- word |= 1i64 << bit_idx;
1760- buffer[ word_offset..word_offset + 8 ] . copy_from_slice ( & word. to_le_bytes ( ) ) ;
1871+ set_null_bit ( buffer, struct_start, field_idx) ;
17611872 } else {
17621873 let field_offset = struct_start + nested_bitset_width + field_idx * 8 ;
17631874
1764- // Check if this field has variable-length data
1765- let var_len = write_nested_variable_to_buffer (
1766- buffer,
1767- field. data_type ( ) ,
1768- column,
1769- row_idx,
1770- struct_start,
1771- ) ?;
1875+ // Inline type dispatch for fixed-width types (most common case)
1876+ let value = match data_type {
1877+ DataType :: Boolean => {
1878+ let arr = column. as_any ( ) . downcast_ref :: < BooleanArray > ( ) . unwrap ( ) ;
1879+ Some ( if arr. value ( row_idx) { 1i64 } else { 0i64 } )
1880+ }
1881+ DataType :: Int8 => {
1882+ let arr = column. as_any ( ) . downcast_ref :: < Int8Array > ( ) . unwrap ( ) ;
1883+ Some ( arr. value ( row_idx) as i64 )
1884+ }
1885+ DataType :: Int16 => {
1886+ let arr = column. as_any ( ) . downcast_ref :: < Int16Array > ( ) . unwrap ( ) ;
1887+ Some ( arr. value ( row_idx) as i64 )
1888+ }
1889+ DataType :: Int32 => {
1890+ let arr = column. as_any ( ) . downcast_ref :: < Int32Array > ( ) . unwrap ( ) ;
1891+ Some ( arr. value ( row_idx) as i64 )
1892+ }
1893+ DataType :: Int64 => {
1894+ let arr = column. as_any ( ) . downcast_ref :: < Int64Array > ( ) . unwrap ( ) ;
1895+ Some ( arr. value ( row_idx) )
1896+ }
1897+ DataType :: Float32 => {
1898+ let arr = column. as_any ( ) . downcast_ref :: < Float32Array > ( ) . unwrap ( ) ;
1899+ Some ( ( arr. value ( row_idx) . to_bits ( ) as i32 ) as i64 )
1900+ }
1901+ DataType :: Float64 => {
1902+ let arr = column. as_any ( ) . downcast_ref :: < Float64Array > ( ) . unwrap ( ) ;
1903+ Some ( arr. value ( row_idx) . to_bits ( ) as i64 )
1904+ }
1905+ DataType :: Date32 => {
1906+ let arr = column. as_any ( ) . downcast_ref :: < Date32Array > ( ) . unwrap ( ) ;
1907+ Some ( arr. value ( row_idx) as i64 )
1908+ }
1909+ DataType :: Timestamp ( TimeUnit :: Microsecond , _) => {
1910+ let arr = column
1911+ . as_any ( )
1912+ . downcast_ref :: < TimestampMicrosecondArray > ( )
1913+ . unwrap ( ) ;
1914+ Some ( arr. value ( row_idx) )
1915+ }
1916+ DataType :: Decimal128 ( p, _) if * p <= MAX_LONG_DIGITS => {
1917+ let arr = column. as_any ( ) . downcast_ref :: < Decimal128Array > ( ) . unwrap ( ) ;
1918+ Some ( arr. value ( row_idx) as i64 )
1919+ }
1920+ _ => None , // Variable-length type
1921+ } ;
17721922
1773- if var_len > 0 {
1774- // Variable-length field: compute offset relative to struct start
1775- let padded_len = round_up_to_8 ( var_len) ;
1776- let data_offset = buffer. len ( ) - padded_len - struct_start;
1777- let offset_and_len = ( ( data_offset as i64 ) << 32 ) | ( var_len as i64 ) ;
1778- buffer[ field_offset..field_offset + 8 ]
1779- . copy_from_slice ( & offset_and_len. to_le_bytes ( ) ) ;
1923+ if let Some ( v) = value {
1924+ // Fixed-width field
1925+ buffer[ field_offset..field_offset + 8 ] . copy_from_slice ( & v. to_le_bytes ( ) ) ;
17801926 } else {
1781- // Fixed-width field: write value directly
1782- let value = get_field_value ( field. data_type ( ) , column, row_idx) ?;
1783- buffer[ field_offset..field_offset + 8 ] . copy_from_slice ( & value. to_le_bytes ( ) ) ;
1927+ // Variable-length field
1928+ let var_len =
1929+ write_nested_variable_to_buffer ( buffer, data_type, column, row_idx, struct_start) ?;
1930+ if var_len > 0 {
1931+ let padded_len = round_up_to_8 ( var_len) ;
1932+ let data_offset = buffer. len ( ) - padded_len - struct_start;
1933+ let offset_and_len = ( ( data_offset as i64 ) << 32 ) | ( var_len as i64 ) ;
1934+ buffer[ field_offset..field_offset + 8 ]
1935+ . copy_from_slice ( & offset_and_len. to_le_bytes ( ) ) ;
1936+ }
17841937 }
17851938 }
17861939 }
0 commit comments