Skip to content

Commit ae8e57c

Browse files
authored
fix: resolve Miri UB in null struct field test, re-enable Miri on PRs (apache#3669)
Add bounds-checking debug_assert in SparkUnsafeRow::get_element_offset to catch out-of-bounds accesses early. Fix test_append_null_struct_field_to_struct_builder which had an undersized 8-byte buffer (only null bitset, no field slot) with null bit unset, causing an out-of-bounds read in get_long. Use 16 bytes with bit 0 set to properly represent a null field. Re-enable Miri on pull_request trigger now that the upstream cargo nightly regression (apache#3499) is resolved.
1 parent b9f29c8 commit ae8e57c

2 files changed

Lines changed: 21 additions & 13 deletions

File tree

  • .github/workflows
  • native/core/src/execution/shuffle/spark_unsafe

.github/workflows/miri.yml

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,14 @@ on:
2828
- "native/core/benches/**"
2929
- "native/spark-expr/benches/**"
3030
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
31-
# Disabled until Miri compatibility is restored
32-
# https://github.com/apache/datafusion-comet/issues/3499
33-
# pull_request:
34-
# paths-ignore:
35-
# - "doc/**"
36-
# - "docs/**"
37-
# - "**.md"
38-
# - "native/core/benches/**"
39-
# - "native/spark-expr/benches/**"
40-
# - "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
31+
pull_request:
32+
paths-ignore:
33+
- "doc/**"
34+
- "docs/**"
35+
- "**.md"
36+
- "native/core/benches/**"
37+
- "native/spark-expr/benches/**"
38+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
4139
# manual trigger
4240
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
4341
workflow_dispatch:

native/core/src/execution/shuffle/spark_unsafe/row.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,8 +255,15 @@ impl SparkUnsafeObject for SparkUnsafeRow {
255255
self.row_addr
256256
}
257257

258-
fn get_element_offset(&self, index: usize, _: usize) -> *const u8 {
259-
(self.row_addr + self.row_bitset_width + (index * 8) as i64) as *const u8
258+
fn get_element_offset(&self, index: usize, element_size: usize) -> *const u8 {
259+
let offset = self.row_bitset_width + (index * 8) as i64;
260+
debug_assert!(
261+
self.row_size >= 0 && offset + element_size as i64 <= self.row_size as i64,
262+
"get_element_offset: access at offset {offset} with size {element_size} \
263+
exceeds row_size {} for index {index}",
264+
self.row_size
265+
);
266+
(self.row_addr + offset) as *const u8
260267
}
261268
}
262269

@@ -1659,7 +1666,10 @@ mod test {
16591666
let fields = Fields::from(vec![Field::new("st", data_type.clone(), true)]);
16601667
let mut struct_builder = StructBuilder::from_fields(fields, 1);
16611668
let mut row = SparkUnsafeRow::new_with_num_fields(1);
1662-
let data = [0; 8];
1669+
// 8 bytes null bitset + 8 bytes field value = 16 bytes
1670+
// Set bit 0 in the null bitset to mark field 0 as null
1671+
let mut data = [0u8; 16];
1672+
data[0] = 1;
16631673
row.point_to_slice(&data);
16641674
append_field(&data_type, &mut struct_builder, &row, 0).expect("append field");
16651675
struct_builder.append_null();

0 commit comments

Comments
 (0)