Skip to content

Commit c59b81b

Browse files
authored
Merge branch 'main' into perf/aligned-reads-spark-unsafe-row
2 parents 6963a92 + ae8e57c commit c59b81b

2 files changed

Lines changed: 21 additions & 13 deletions

File tree

  • .github/workflows
  • native/core/src/execution/shuffle/spark_unsafe

.github/workflows/miri.yml

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,14 @@ on:
2828
- "native/core/benches/**"
2929
- "native/spark-expr/benches/**"
3030
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
31-
# Disabled until Miri compatibility is restored
32-
# https://github.com/apache/datafusion-comet/issues/3499
33-
# pull_request:
34-
# paths-ignore:
35-
# - "doc/**"
36-
# - "docs/**"
37-
# - "**.md"
38-
# - "native/core/benches/**"
39-
# - "native/spark-expr/benches/**"
40-
# - "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
31+
pull_request:
32+
paths-ignore:
33+
- "doc/**"
34+
- "docs/**"
35+
- "**.md"
36+
- "native/core/benches/**"
37+
- "native/spark-expr/benches/**"
38+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
4139
# manual trigger
4240
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
4341
workflow_dispatch:

native/core/src/execution/shuffle/spark_unsafe/row.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -266,8 +266,15 @@ impl SparkUnsafeObject for SparkUnsafeRow {
266266
self.row_addr
267267
}
268268

269-
fn get_element_offset(&self, index: usize, _: usize) -> *const u8 {
270-
(self.row_addr + self.row_bitset_width + (index * 8) as i64) as *const u8
269+
fn get_element_offset(&self, index: usize, element_size: usize) -> *const u8 {
270+
let offset = self.row_bitset_width + (index * 8) as i64;
271+
debug_assert!(
272+
self.row_size >= 0 && offset + element_size as i64 <= self.row_size as i64,
273+
"get_element_offset: access at offset {offset} with size {element_size} \
274+
exceeds row_size {} for index {index}",
275+
self.row_size
276+
);
277+
(self.row_addr + offset) as *const u8
271278
}
272279

273280
// SparkUnsafeRow field offsets are always 8-byte aligned: the base address is 8-byte
@@ -1678,7 +1685,10 @@ mod test {
16781685
let fields = Fields::from(vec![Field::new("st", data_type.clone(), true)]);
16791686
let mut struct_builder = StructBuilder::from_fields(fields, 1);
16801687
let mut row = SparkUnsafeRow::new_with_num_fields(1);
1681-
let data = [0; 8];
1688+
// 8 bytes null bitset + 8 bytes field value = 16 bytes
1689+
// Set bit 0 in the null bitset to mark field 0 as null
1690+
let mut data = [0u8; 16];
1691+
data[0] = 1;
16821692
row.point_to_slice(&data);
16831693
append_field(&data_type, &mut struct_builder, &row, 0).expect("append field");
16841694
struct_builder.append_null();

0 commit comments

Comments
 (0)