Skip to content

Commit fe3ffd9

Browse files
committed
fix: use read_unaligned for Spark UnsafeArray data to prevent misaligned pointer panics
Extract is_null_in_bitset helper to deduplicate null-bit-checking pattern. Restore bulk append_slice fast path with runtime alignment check.
1 parent 10b89a7 commit fe3ffd9

1 file changed

Lines changed: 59 additions & 34 deletions

File tree

  • native/core/src/execution/shuffle/spark_unsafe

native/core/src/execution/shuffle/spark_unsafe/list.rs

Lines changed: 59 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -52,30 +52,36 @@ macro_rules! impl_append_to_builder {
5252
debug_assert!(!null_words.is_null(), "null_bitset_ptr is null");
5353
debug_assert!(!ptr.is_null(), "element_offset pointer is null");
5454
for idx in 0..num_elements {
55-
let word_idx = idx >> 6;
56-
let bit_idx = idx & 0x3f;
57-
// SAFETY: word_idx < ceil(num_elements/64) since idx < num_elements
58-
let is_null = unsafe { (*null_words.add(word_idx) & (1i64 << bit_idx)) != 0 };
55+
// SAFETY: null_words has ceil(num_elements/64) words, idx < num_elements
56+
let is_null = unsafe { Self::is_null_in_bitset(null_words, idx) };
5957

6058
if is_null {
6159
builder.append_null();
6260
} else {
6361
// SAFETY: ptr is within element data bounds
64-
builder.append_value(unsafe { *ptr });
62+
builder.append_value(unsafe { ptr.read_unaligned() });
6563
}
6664
// SAFETY: ptr stays within bounds, iterating num_elements times
6765
ptr = unsafe { ptr.add(1) };
6866
}
6967
} else {
7068
// SAFETY: element_offset points to contiguous data of length num_elements
7169
debug_assert!(self.element_offset != 0, "element_offset is null");
72-
let slice = unsafe {
73-
std::slice::from_raw_parts(
74-
self.element_offset as *const $element_type,
75-
num_elements,
76-
)
77-
};
78-
builder.append_slice(slice);
70+
let ptr = self.element_offset as *const $element_type;
71+
// Use bulk copy when data is properly aligned, fall back to
72+
// per-element unaligned reads otherwise
73+
if (ptr as usize).is_multiple_of(std::mem::align_of::<$element_type>()) {
74+
let slice = unsafe {
75+
std::slice::from_raw_parts(ptr, num_elements)
76+
};
77+
builder.append_slice(slice);
78+
} else {
79+
let mut ptr = ptr;
80+
for _ in 0..num_elements {
81+
builder.append_value(unsafe { ptr.read_unaligned() });
82+
ptr = unsafe { ptr.add(1) };
83+
}
84+
}
7985
}
8086
}
8187
};
@@ -158,6 +164,17 @@ impl SparkUnsafeArray {
158164
(self.row_addr + 8) as *const i64
159165
}
160166

167+
/// Checks whether the null bit at `idx` is set in the given null bitset pointer.
168+
///
169+
/// # Safety
170+
/// `null_words` must point to at least `ceil((idx+1)/64)` i64 words.
171+
#[inline]
172+
unsafe fn is_null_in_bitset(null_words: *const i64, idx: usize) -> bool {
173+
let word_idx = idx >> 6;
174+
let bit_idx = idx & 0x3f;
175+
(null_words.add(word_idx).read_unaligned() & (1i64 << bit_idx)) != 0
176+
}
177+
161178
impl_append_to_builder!(append_ints_to_builder, Int32Builder, i32);
162179
impl_append_to_builder!(append_longs_to_builder, Int64Builder, i64);
163180
impl_append_to_builder!(append_shorts_to_builder, Int16Builder, i16);
@@ -189,10 +206,8 @@ impl SparkUnsafeArray {
189206
"append_booleans: null_bitset_ptr is null"
190207
);
191208
for idx in 0..num_elements {
192-
let word_idx = idx >> 6;
193-
let bit_idx = idx & 0x3f;
194-
// SAFETY: word_idx < ceil(num_elements/64) since idx < num_elements
195-
let is_null = unsafe { (*null_words.add(word_idx) & (1i64 << bit_idx)) != 0 };
209+
// SAFETY: null_words has ceil(num_elements/64) words, idx < num_elements
210+
let is_null = unsafe { Self::is_null_in_bitset(null_words, idx) };
196211

197212
if is_null {
198213
builder.append_null();
@@ -234,16 +249,14 @@ impl SparkUnsafeArray {
234249
"append_timestamps: element_offset pointer is null"
235250
);
236251
for idx in 0..num_elements {
237-
let word_idx = idx >> 6;
238-
let bit_idx = idx & 0x3f;
239-
// SAFETY: word_idx < ceil(num_elements/64) since idx < num_elements
240-
let is_null = unsafe { (*null_words.add(word_idx) & (1i64 << bit_idx)) != 0 };
252+
// SAFETY: null_words has ceil(num_elements/64) words, idx < num_elements
253+
let is_null = unsafe { Self::is_null_in_bitset(null_words, idx) };
241254

242255
if is_null {
243256
builder.append_null();
244257
} else {
245258
// SAFETY: ptr is within element data bounds
246-
builder.append_value(unsafe { *ptr });
259+
builder.append_value(unsafe { ptr.read_unaligned() });
247260
}
248261
// SAFETY: ptr stays within bounds, iterating num_elements times
249262
ptr = unsafe { ptr.add(1) };
@@ -254,10 +267,17 @@ impl SparkUnsafeArray {
254267
self.element_offset != 0,
255268
"append_timestamps: element_offset is null"
256269
);
257-
let slice = unsafe {
258-
std::slice::from_raw_parts(self.element_offset as *const i64, num_elements)
259-
};
260-
builder.append_slice(slice);
270+
let ptr = self.element_offset as *const i64;
271+
if (ptr as usize).is_multiple_of(std::mem::align_of::<i64>()) {
272+
let slice = unsafe { std::slice::from_raw_parts(ptr, num_elements) };
273+
builder.append_slice(slice);
274+
} else {
275+
let mut ptr = ptr;
276+
for _ in 0..num_elements {
277+
builder.append_value(unsafe { ptr.read_unaligned() });
278+
ptr = unsafe { ptr.add(1) };
279+
}
280+
}
261281
}
262282
}
263283

@@ -283,16 +303,14 @@ impl SparkUnsafeArray {
283303
"append_dates: element_offset pointer is null"
284304
);
285305
for idx in 0..num_elements {
286-
let word_idx = idx >> 6;
287-
let bit_idx = idx & 0x3f;
288-
// SAFETY: word_idx < ceil(num_elements/64) since idx < num_elements
289-
let is_null = unsafe { (*null_words.add(word_idx) & (1i64 << bit_idx)) != 0 };
306+
// SAFETY: null_words has ceil(num_elements/64) words, idx < num_elements
307+
let is_null = unsafe { Self::is_null_in_bitset(null_words, idx) };
290308

291309
if is_null {
292310
builder.append_null();
293311
} else {
294312
// SAFETY: ptr is within element data bounds
295-
builder.append_value(unsafe { *ptr });
313+
builder.append_value(unsafe { ptr.read_unaligned() });
296314
}
297315
// SAFETY: ptr stays within bounds, iterating num_elements times
298316
ptr = unsafe { ptr.add(1) };
@@ -303,10 +321,17 @@ impl SparkUnsafeArray {
303321
self.element_offset != 0,
304322
"append_dates: element_offset is null"
305323
);
306-
let slice = unsafe {
307-
std::slice::from_raw_parts(self.element_offset as *const i32, num_elements)
308-
};
309-
builder.append_slice(slice);
324+
let ptr = self.element_offset as *const i32;
325+
if (ptr as usize).is_multiple_of(std::mem::align_of::<i32>()) {
326+
let slice = unsafe { std::slice::from_raw_parts(ptr, num_elements) };
327+
builder.append_slice(slice);
328+
} else {
329+
let mut ptr = ptr;
330+
for _ in 0..num_elements {
331+
builder.append_value(unsafe { ptr.read_unaligned() });
332+
ptr = unsafe { ptr.add(1) };
333+
}
334+
}
310335
}
311336
}
312337
}

0 commit comments

Comments
 (0)