@@ -71,13 +71,32 @@ const NESTED_TYPE_BUILDER_CAPACITY: usize = 100;
7171/// safe to call as long as:
7272/// - The index is within bounds (caller's responsibility)
7373/// - The object was constructed from valid Spark UnsafeRow/UnsafeArray data
74+ ///
75+ /// # Alignment
76+ ///
77+ /// Primitive accessor methods are implemented separately for each type because they have
78+ /// different alignment guarantees:
79+ /// - `SparkUnsafeRow`: All field offsets are 8-byte aligned (bitset width is a multiple of 8,
80+ /// and each field slot is 8 bytes), so accessors use aligned `ptr::read()`.
81+ /// - `SparkUnsafeArray`: The array base address may be unaligned when nested within a row's
82+ /// variable-length region, so accessors use `ptr::read_unaligned()`.
7483pub trait SparkUnsafeObject {
7584 /// Returns the address of the row.
7685 fn get_row_addr ( & self ) -> i64 ;
7786
7887 /// Returns the offset of the element at the given index.
7988 fn get_element_offset ( & self , index : usize , element_size : usize ) -> * const u8 ;
8089
90+ fn get_boolean ( & self , index : usize ) -> bool ;
91+ fn get_byte ( & self , index : usize ) -> i8 ;
92+ fn get_short ( & self , index : usize ) -> i16 ;
93+ fn get_int ( & self , index : usize ) -> i32 ;
94+ fn get_long ( & self , index : usize ) -> i64 ;
95+ fn get_float ( & self , index : usize ) -> f32 ;
96+ fn get_double ( & self , index : usize ) -> f64 ;
97+ fn get_date ( & self , index : usize ) -> i32 ;
98+ fn get_timestamp ( & self , index : usize ) -> i64 ;
99+
81100 /// Returns the offset and length of the element at the given index.
82101 #[ inline]
83102 fn get_offset_and_len ( & self , index : usize ) -> ( i32 , i32 ) {
@@ -87,79 +106,6 @@ pub trait SparkUnsafeObject {
87106 ( offset, len)
88107 }
89108
90- /// Returns boolean value at the given index of the object.
91- #[ inline]
92- fn get_boolean ( & self , index : usize ) -> bool {
93- let addr = self . get_element_offset ( index, 1 ) ;
94- // SAFETY: addr points to valid element data within the UnsafeRow/UnsafeArray region.
95- // The caller ensures index is within bounds.
96- debug_assert ! (
97- !addr. is_null( ) ,
98- "get_boolean: null pointer at index {index}"
99- ) ;
100- unsafe { * addr != 0 }
101- }
102-
103- /// Returns byte value at the given index of the object.
104- #[ inline]
105- fn get_byte ( & self , index : usize ) -> i8 {
106- let addr = self . get_element_offset ( index, 1 ) ;
107- // SAFETY: addr points to valid element data (1 byte) within the row/array region.
108- debug_assert ! ( !addr. is_null( ) , "get_byte: null pointer at index {index}" ) ;
109- let slice: & [ u8 ] = unsafe { std:: slice:: from_raw_parts ( addr, 1 ) } ;
110- i8:: from_le_bytes ( slice. try_into ( ) . unwrap ( ) )
111- }
112-
113- /// Returns short value at the given index of the object.
114- #[ inline]
115- fn get_short ( & self , index : usize ) -> i16 {
116- let addr = self . get_element_offset ( index, 2 ) ;
117- // SAFETY: addr points to valid element data (2 bytes) within the row/array region.
118- debug_assert ! ( !addr. is_null( ) , "get_short: null pointer at index {index}" ) ;
119- let slice: & [ u8 ] = unsafe { std:: slice:: from_raw_parts ( addr, 2 ) } ;
120- i16:: from_le_bytes ( slice. try_into ( ) . unwrap ( ) )
121- }
122-
123- /// Returns integer value at the given index of the object.
124- #[ inline]
125- fn get_int ( & self , index : usize ) -> i32 {
126- let addr = self . get_element_offset ( index, 4 ) ;
127- // SAFETY: addr points to valid element data (4 bytes) within the row/array region.
128- debug_assert ! ( !addr. is_null( ) , "get_int: null pointer at index {index}" ) ;
129- let slice: & [ u8 ] = unsafe { std:: slice:: from_raw_parts ( addr, 4 ) } ;
130- i32:: from_le_bytes ( slice. try_into ( ) . unwrap ( ) )
131- }
132-
133- /// Returns long value at the given index of the object.
134- #[ inline]
135- fn get_long ( & self , index : usize ) -> i64 {
136- let addr = self . get_element_offset ( index, 8 ) ;
137- // SAFETY: addr points to valid element data (8 bytes) within the row/array region.
138- debug_assert ! ( !addr. is_null( ) , "get_long: null pointer at index {index}" ) ;
139- let slice: & [ u8 ] = unsafe { std:: slice:: from_raw_parts ( addr, 8 ) } ;
140- i64:: from_le_bytes ( slice. try_into ( ) . unwrap ( ) )
141- }
142-
143- /// Returns float value at the given index of the object.
144- #[ inline]
145- fn get_float ( & self , index : usize ) -> f32 {
146- let addr = self . get_element_offset ( index, 4 ) ;
147- // SAFETY: addr points to valid element data (4 bytes) within the row/array region.
148- debug_assert ! ( !addr. is_null( ) , "get_float: null pointer at index {index}" ) ;
149- let slice: & [ u8 ] = unsafe { std:: slice:: from_raw_parts ( addr, 4 ) } ;
150- f32:: from_le_bytes ( slice. try_into ( ) . unwrap ( ) )
151- }
152-
153- /// Returns double value at the given index of the object.
154- #[ inline]
155- fn get_double ( & self , index : usize ) -> f64 {
156- let addr = self . get_element_offset ( index, 8 ) ;
157- // SAFETY: addr points to valid element data (8 bytes) within the row/array region.
158- debug_assert ! ( !addr. is_null( ) , "get_double: null pointer at index {index}" ) ;
159- let slice: & [ u8 ] = unsafe { std:: slice:: from_raw_parts ( addr, 8 ) } ;
160- f64:: from_le_bytes ( slice. try_into ( ) . unwrap ( ) )
161- }
162-
163109 /// Returns string value at the given index of the object.
164110 fn get_string ( & self , index : usize ) -> & str {
165111 let ( offset, len) = self . get_offset_and_len ( index) ;
@@ -190,29 +136,6 @@ pub trait SparkUnsafeObject {
190136 unsafe { std:: slice:: from_raw_parts ( addr as * const u8 , len as usize ) }
191137 }
192138
193- /// Returns date value at the given index of the object.
194- #[ inline]
195- fn get_date ( & self , index : usize ) -> i32 {
196- let addr = self . get_element_offset ( index, 4 ) ;
197- // SAFETY: addr points to valid element data (4 bytes) within the row/array region.
198- debug_assert ! ( !addr. is_null( ) , "get_date: null pointer at index {index}" ) ;
199- let slice: & [ u8 ] = unsafe { std:: slice:: from_raw_parts ( addr, 4 ) } ;
200- i32:: from_le_bytes ( slice. try_into ( ) . unwrap ( ) )
201- }
202-
203- /// Returns timestamp value at the given index of the object.
204- #[ inline]
205- fn get_timestamp ( & self , index : usize ) -> i64 {
206- let addr = self . get_element_offset ( index, 8 ) ;
207- // SAFETY: addr points to valid element data (8 bytes) within the row/array region.
208- debug_assert ! (
209- !addr. is_null( ) ,
210- "get_timestamp: null pointer at index {index}"
211- ) ;
212- let slice: & [ u8 ] = unsafe { std:: slice:: from_raw_parts ( addr, 8 ) } ;
213- i64:: from_le_bytes ( slice. try_into ( ) . unwrap ( ) )
214- }
215-
216139 /// Returns decimal value at the given index of the object.
217140 fn get_decimal ( & self , index : usize , precision : u8 ) -> i128 {
218141 if precision <= MAX_LONG_DIGITS {
@@ -244,6 +167,94 @@ pub trait SparkUnsafeObject {
244167 }
245168}
246169
170+ /// Generates primitive accessor implementations for `SparkUnsafeObject`.
171+ ///
172+ /// Uses `$read_method` to read typed values from raw pointers:
173+ /// - `read` for aligned access (SparkUnsafeRow — all offsets are 8-byte aligned)
174+ /// - `read_unaligned` for potentially unaligned access (SparkUnsafeArray)
175+ macro_rules! impl_primitive_accessors {
176+ ( $read_method: ident) => {
177+ #[ inline]
178+ fn get_boolean( & self , index: usize ) -> bool {
179+ let addr = self . get_element_offset( index, 1 ) ;
180+ debug_assert!(
181+ !addr. is_null( ) ,
182+ "get_boolean: null pointer at index {index}"
183+ ) ;
184+ // SAFETY: addr points to valid element data within the row/array region.
185+ unsafe { * addr != 0 }
186+ }
187+
188+ #[ inline]
189+ fn get_byte( & self , index: usize ) -> i8 {
190+ let addr = self . get_element_offset( index, 1 ) ;
191+ debug_assert!( !addr. is_null( ) , "get_byte: null pointer at index {index}" ) ;
192+ // SAFETY: addr points to valid element data (1 byte) within the row/array region.
193+ unsafe { * ( addr as * const i8 ) }
194+ }
195+
196+ #[ inline]
197+ fn get_short( & self , index: usize ) -> i16 {
198+ let addr = self . get_element_offset( index, 2 ) as * const i16 ;
199+ debug_assert!( !addr. is_null( ) , "get_short: null pointer at index {index}" ) ;
200+ // SAFETY: addr points to valid element data (2 bytes) within the row/array region.
201+ unsafe { addr. $read_method( ) }
202+ }
203+
204+ #[ inline]
205+ fn get_int( & self , index: usize ) -> i32 {
206+ let addr = self . get_element_offset( index, 4 ) as * const i32 ;
207+ debug_assert!( !addr. is_null( ) , "get_int: null pointer at index {index}" ) ;
208+ // SAFETY: addr points to valid element data (4 bytes) within the row/array region.
209+ unsafe { addr. $read_method( ) }
210+ }
211+
212+ #[ inline]
213+ fn get_long( & self , index: usize ) -> i64 {
214+ let addr = self . get_element_offset( index, 8 ) as * const i64 ;
215+ debug_assert!( !addr. is_null( ) , "get_long: null pointer at index {index}" ) ;
216+ // SAFETY: addr points to valid element data (8 bytes) within the row/array region.
217+ unsafe { addr. $read_method( ) }
218+ }
219+
220+ #[ inline]
221+ fn get_float( & self , index: usize ) -> f32 {
222+ let addr = self . get_element_offset( index, 4 ) as * const f32 ;
223+ debug_assert!( !addr. is_null( ) , "get_float: null pointer at index {index}" ) ;
224+ // SAFETY: addr points to valid element data (4 bytes) within the row/array region.
225+ unsafe { addr. $read_method( ) }
226+ }
227+
228+ #[ inline]
229+ fn get_double( & self , index: usize ) -> f64 {
230+ let addr = self . get_element_offset( index, 8 ) as * const f64 ;
231+ debug_assert!( !addr. is_null( ) , "get_double: null pointer at index {index}" ) ;
232+ // SAFETY: addr points to valid element data (8 bytes) within the row/array region.
233+ unsafe { addr. $read_method( ) }
234+ }
235+
236+ #[ inline]
237+ fn get_date( & self , index: usize ) -> i32 {
238+ let addr = self . get_element_offset( index, 4 ) as * const i32 ;
239+ debug_assert!( !addr. is_null( ) , "get_date: null pointer at index {index}" ) ;
240+ // SAFETY: addr points to valid element data (4 bytes) within the row/array region.
241+ unsafe { addr. $read_method( ) }
242+ }
243+
244+ #[ inline]
245+ fn get_timestamp( & self , index: usize ) -> i64 {
246+ let addr = self . get_element_offset( index, 8 ) as * const i64 ;
247+ debug_assert!(
248+ !addr. is_null( ) ,
249+ "get_timestamp: null pointer at index {index}"
250+ ) ;
251+ // SAFETY: addr points to valid element data (8 bytes) within the row/array region.
252+ unsafe { addr. $read_method( ) }
253+ }
254+ } ;
255+ }
256+ pub ( crate ) use impl_primitive_accessors;
257+
247258pub struct SparkUnsafeRow {
248259 row_addr : i64 ,
249260 row_size : i32 ,
@@ -258,6 +269,11 @@ impl SparkUnsafeObject for SparkUnsafeRow {
258269 fn get_element_offset ( & self , index : usize , _: usize ) -> * const u8 {
259270 ( self . row_addr + self . row_bitset_width + ( index * 8 ) as i64 ) as * const u8
260271 }
272+
273+ // SparkUnsafeRow field offsets are always 8-byte aligned: the base address is 8-byte
274+ // aligned (JVM guarantee), bitset_width is a multiple of 8, and each field slot is
275+ // 8 bytes. This means we can safely use aligned ptr::read() for all typed accesses.
276+ impl_primitive_accessors ! ( read) ;
261277}
262278
263279impl Default for SparkUnsafeRow {
@@ -321,11 +337,13 @@ impl SparkUnsafeRow {
321337 // SAFETY: row_addr points to valid Spark UnsafeRow data with at least
322338 // ceil(num_fields/64) * 8 bytes of null bitset. The caller ensures index < num_fields.
323339 // word_offset is within the bitset region since (index >> 6) << 3 < bitset size.
340+ // The bitset starts at row_addr (8-byte aligned) and each word is at offset 8*k,
341+ // so word_offset is always 8-byte aligned — we can use aligned ptr::read().
324342 debug_assert ! ( self . row_addr != -1 , "is_null_at: row not initialized" ) ;
325343 unsafe {
326344 let mask: i64 = 1i64 << ( index & 0x3f ) ;
327345 let word_offset = ( self . row_addr + ( ( ( index >> 6 ) as i64 ) << 3 ) ) as * const i64 ;
328- let word: i64 = word_offset. read_unaligned ( ) ;
346+ let word: i64 = word_offset. read ( ) ;
329347 ( word & mask) != 0
330348 }
331349 }
@@ -336,12 +354,13 @@ impl SparkUnsafeRow {
336354 // ceil(num_fields/64) * 8 bytes of null bitset. The caller ensures index < num_fields.
337355 // word_offset is within the bitset region since (index >> 6) << 3 < bitset size.
338356 // Writing is safe because we have mutable access and the memory is owned by the JVM.
357+ // The bitset is always 8-byte aligned — we can use aligned ptr::read()/write().
339358 debug_assert ! ( self . row_addr != -1 , "set_not_null_at: row not initialized" ) ;
340359 unsafe {
341360 let mask: i64 = 1i64 << ( index & 0x3f ) ;
342361 let word_offset = ( self . row_addr + ( ( ( index >> 6 ) as i64 ) << 3 ) ) as * mut i64 ;
343- let word: i64 = word_offset. read_unaligned ( ) ;
344- word_offset. write_unaligned ( word & !mask) ;
362+ let word: i64 = word_offset. read ( ) ;
363+ word_offset. write ( word & !mask) ;
345364 }
346365 }
347366}
0 commit comments