Skip to content

Commit 27e680a

Browse files
committed
cleanup
Signed-off-by: Alexander Droste <alexander.droste@protonmail.com>
1 parent 420a350 commit 27e680a

1 file changed

Lines changed: 27 additions & 22 deletions

File tree

vortex-cuda/src/arrow/canonical.rs

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -773,9 +773,8 @@ pub(super) async fn export_arrow_validity_buffer(
773773
arrow_offset: usize,
774774
ctx: &mut CudaExecutionCtx,
775775
) -> VortexResult<(Option<BufferHandle>, i64)> {
776-
// Container export paths reach here without going through `execute_cuda`, so decode the
777-
// validity on the GPU here to turn any lazy/compressed bool array (e.g. dict-encoded, or from
778-
// take/scan) into a device-resident canonical bool before export.
776+
// Validity is exported separately from the array data. Decode it here so Arrow
777+
// gets a device-resident validity buffer alongside the array it belongs to.
779778
let validity = execute_validity_cuda(validity, len, ctx).await?;
780779
match validity {
781780
Validity::NonNullable | Validity::AllValid => Ok((None, 0)),
@@ -795,14 +794,17 @@ pub(super) async fn export_arrow_validity_buffer(
795794
})?;
796795
let BoolDataParts { bits, meta } = array.into_data().into_parts(len);
797796
let bitmap = ctx.ensure_on_device(bits).await?;
798-
// Repack only when a bit-level offset can't be expressed by Arrow's byte-addressed
799-
// validity buffer plus array offset.
800-
let bitmap = if arrow_offset == 0 && meta.offset() == 0 {
797+
// ArrowDeviceArray uses ArrowArray layout with its buffers being device pointers.
798+
//
799+
// Validity is one bit per row, addressed via the Arrow array offset. Reuse the bitmap
800+
// when Vortex's validity offset already matches Arrow's; otherwise repack on the GPU
801+
// so row i is at Arrow bit `arrow_offset + i`.
802+
let bitmap = if meta.offset() == arrow_offset {
801803
bitmap
802804
} else {
803805
repack_arrow_validity_buffer(&bitmap, meta.offset(), len, arrow_offset, ctx)?
804806
};
805-
Ok((Some(bitmap), UNKNOWN_NULL_COUNT))
807+
Ok((Some(bitmap), ARROW_UNKNOWN_NULL_COUNT))
806808
}
807809
}
808810
}
@@ -816,7 +818,7 @@ fn validity_bitmap_byte_len(len: usize, arrow_offset: usize) -> VortexResult<usi
816818
}
817819

818820
/// Arrow C Data uses -1 when the null count has not been computed.
819-
const UNKNOWN_NULL_COUNT: i64 = -1;
821+
const ARROW_UNKNOWN_NULL_COUNT: i64 = -1;
820822

821823
/// Allocate a zeroed device buffer with cuDF-safe padding for Arrow validity masks.
822824
fn device_zeroed_byte_buffer(
@@ -1663,7 +1665,10 @@ mod tests {
16631665
)
16641666
);
16651667
assert_eq!(exported.array.array.length, 4);
1666-
assert_eq!(exported.array.array.null_count, super::UNKNOWN_NULL_COUNT);
1668+
assert_eq!(
1669+
exported.array.array.null_count,
1670+
super::ARROW_UNKNOWN_NULL_COUNT
1671+
);
16671672
assert_eq!(exported.array.array.n_buffers, 2);
16681673
assert_eq!(exported.array.array.n_children, 0);
16691674
assert!(!exported.array.array.dictionary.is_null());
@@ -1721,7 +1726,7 @@ mod tests {
17211726
let children = unsafe { std::slice::from_raw_parts(exported.array.array.children, 1) };
17221727
let dict_child = unsafe { &*children[0] };
17231728
assert!(!dict_child.dictionary.is_null());
1724-
assert_eq!(dict_child.null_count, super::UNKNOWN_NULL_COUNT);
1729+
assert_eq!(dict_child.null_count, super::ARROW_UNKNOWN_NULL_COUNT);
17251730

17261731
unsafe { release_exported_array(&raw mut exported.array.array) };
17271732
Ok(())
@@ -1754,7 +1759,7 @@ mod tests {
17541759
[0, 1, 0]
17551760
);
17561761
let dictionary = unsafe { &*exported.array.array.dictionary };
1757-
assert_eq!(dictionary.null_count, super::UNKNOWN_NULL_COUNT);
1762+
assert_eq!(dictionary.null_count, super::ARROW_UNKNOWN_NULL_COUNT);
17581763
assert_eq!(private_data_buffer_i32_values(dictionary, 1)?.len(), 2);
17591764

17601765
unsafe { release_exported_array(&raw mut exported.array.array) };
@@ -2031,7 +2036,7 @@ mod tests {
20312036
assert_binary_layout(
20322037
&exported.array.array,
20332038
5,
2034-
super::UNKNOWN_NULL_COUNT,
2039+
super::ARROW_UNKNOWN_NULL_COUNT,
20352040
&[0, 0, 3, 3, 8, i32::try_from(8 + out_of_line.len())?],
20362041
&[b"\x00\xff\xfe".as_slice(), b"short", out_of_line].concat(),
20372042
)?;
@@ -2545,7 +2550,7 @@ mod tests {
25452550
[0, 1, 0, 1, 2]
25462551
);
25472552
let dictionary = unsafe { &*elements.dictionary };
2548-
assert_eq!(dictionary.null_count, super::UNKNOWN_NULL_COUNT);
2553+
assert_eq!(dictionary.null_count, super::ARROW_UNKNOWN_NULL_COUNT);
25492554

25502555
unsafe { release_exported_array(&raw mut exported.array.array) };
25512556
Ok(())
@@ -2791,7 +2796,7 @@ mod tests {
27912796
let mut exported = utf8.export_device_array_with_schema(&mut ctx).await?;
27922797
let field = Field::try_from(&exported.schema)?;
27932798
assert_eq!(field, Field::new("", DataType::Utf8View, true));
2794-
assert_varbinview_shape(&exported.array.array, 3, super::UNKNOWN_NULL_COUNT)?;
2799+
assert_varbinview_shape(&exported.array.array, 3, super::ARROW_UNKNOWN_NULL_COUNT)?;
27952800
assert_eq!(exported.array.device_type, ARROW_DEVICE_CUDA);
27962801

27972802
let private_data = unsafe { &*exported.array.array.private_data.cast::<PrivateData>() };
@@ -2819,7 +2824,7 @@ mod tests {
28192824
assert_binary_layout(
28202825
&exported.array.array,
28212826
3,
2822-
super::UNKNOWN_NULL_COUNT,
2827+
super::ARROW_UNKNOWN_NULL_COUNT,
28232828
&[0, 0, 2, i32::try_from(2 + sliced_out_of_line.len())?],
28242829
&[b"\x00\xff".as_slice(), sliced_out_of_line].concat(),
28252830
)?;
@@ -2937,7 +2942,7 @@ mod tests {
29372942
let mut primitive = assert_nullable_export(
29382943
PrimitiveArray::from_option_iter([Some(1i32), None, Some(3)]).into_array(),
29392944
2,
2940-
super::UNKNOWN_NULL_COUNT,
2945+
super::ARROW_UNKNOWN_NULL_COUNT,
29412946
&mut ctx,
29422947
)
29432948
.await?;
@@ -2966,7 +2971,7 @@ mod tests {
29662971
.into_array()
29672972
.slice(1..4)?,
29682973
2,
2969-
super::UNKNOWN_NULL_COUNT,
2974+
super::ARROW_UNKNOWN_NULL_COUNT,
29702975
&mut ctx,
29712976
)
29722977
.await?;
@@ -3015,7 +3020,7 @@ mod tests {
30153020
)
30163021
.into_array(),
30173022
2,
3018-
super::UNKNOWN_NULL_COUNT,
3023+
super::ARROW_UNKNOWN_NULL_COUNT,
30193024
&mut ctx,
30203025
)
30213026
.await?;
@@ -3045,7 +3050,7 @@ mod tests {
30453050
)
30463051
.into_array(),
30473052
2,
3048-
super::UNKNOWN_NULL_COUNT,
3053+
super::ARROW_UNKNOWN_NULL_COUNT,
30493054
&mut ctx,
30503055
)
30513056
.await?;
@@ -3068,7 +3073,7 @@ mod tests {
30683073
])
30693074
.into_array(),
30703075
4,
3071-
super::UNKNOWN_NULL_COUNT,
3076+
super::ARROW_UNKNOWN_NULL_COUNT,
30723077
&mut ctx,
30733078
)
30743079
.await?;
@@ -3092,7 +3097,7 @@ mod tests {
30923097
)?
30933098
.into_array(),
30943099
1,
3095-
super::UNKNOWN_NULL_COUNT,
3100+
super::ARROW_UNKNOWN_NULL_COUNT,
30963101
&mut ctx,
30973102
)
30983103
.await?;
@@ -3126,7 +3131,7 @@ mod tests {
31263131
)?
31273132
.into_array(),
31283133
1,
3129-
super::UNKNOWN_NULL_COUNT,
3134+
super::ARROW_UNKNOWN_NULL_COUNT,
31303135
&mut ctx,
31313136
)
31323137
.await?;

0 commit comments

Comments
 (0)