|
| 1 | +// SPDX-License-Identifier: Apache-2.0 |
| 2 | +// SPDX-FileCopyrightText: Copyright the Vortex contributors |
| 3 | + |
| 4 | +use std::sync::Arc; |
| 5 | + |
| 6 | +use arrow_array::ArrayRef as ArrowArrayRef; |
| 7 | +use arrow_array::FixedSizeBinaryArray; |
| 8 | +use vortex_error::VortexResult; |
| 9 | +use vortex_error::vortex_bail; |
| 10 | + |
| 11 | +use crate::ArrayRef; |
| 12 | +use crate::ExecutionCtx; |
| 13 | +use crate::arrays::ExtensionArray; |
| 14 | +use crate::arrays::FixedSizeListArray; |
| 15 | +use crate::arrays::PrimitiveArray; |
| 16 | +use crate::arrow::executor::validity::to_arrow_null_buffer; |
| 17 | +use crate::dtype::DType; |
| 18 | +use crate::dtype::PType; |
| 19 | +use crate::vtable::ValidityHelper; |
| 20 | + |
| 21 | +/// Convert a Vortex array to an Arrow `FixedSizeBinaryArray`. |
| 22 | +/// |
| 23 | +/// Accepts either an extension array (e.g. UUID) or a plain `FixedSizeList(Primitive(U8), size)`. |
| 24 | +pub(super) fn to_arrow_fixed_size_binary( |
| 25 | + array: ArrayRef, |
| 26 | + size: i32, |
| 27 | + ctx: &mut ExecutionCtx, |
| 28 | +) -> VortexResult<ArrowArrayRef> { |
| 29 | + let storage = if array.dtype().is_extension() { |
| 30 | + array |
| 31 | + .execute::<ExtensionArray>(ctx)? |
| 32 | + .storage_array() |
| 33 | + .clone() |
| 34 | + } else { |
| 35 | + array |
| 36 | + }; |
| 37 | + |
| 38 | + let fsl = storage.execute::<FixedSizeListArray>(ctx)?; |
| 39 | + |
| 40 | + match fsl.dtype() { |
| 41 | + DType::FixedSizeList(elem, list_size, _) |
| 42 | + if *list_size == size as u32 |
| 43 | + && matches!(elem.as_ref(), DType::Primitive(PType::U8, _)) => {} |
| 44 | + other => { |
| 45 | + vortex_bail!("FixedSizeBinary({size}) requires FixedSizeList(U8, {size}), got {other}"); |
| 46 | + } |
| 47 | + } |
| 48 | + |
| 49 | + let elements = fsl.elements().clone().execute::<PrimitiveArray>(ctx)?; |
| 50 | + let values = elements.into_buffer::<u8>().into_arrow_buffer(); |
| 51 | + let null_buffer = to_arrow_null_buffer(fsl.validity(), fsl.len(), ctx)?; |
| 52 | + |
| 53 | + Ok(Arc::new(FixedSizeBinaryArray::new( |
| 54 | + size, |
| 55 | + values, |
| 56 | + null_buffer, |
| 57 | + ))) |
| 58 | +} |
| 59 | + |
| 60 | +#[cfg(test)] |
| 61 | +mod tests { |
| 62 | + use arrow_array::FixedSizeBinaryArray; |
| 63 | + use arrow_schema::DataType; |
| 64 | + use vortex_buffer::BitBuffer; |
| 65 | + use vortex_buffer::Buffer; |
| 66 | + |
| 67 | + use crate::IntoArray; |
| 68 | + use crate::LEGACY_SESSION; |
| 69 | + use crate::VortexSessionExecute; |
| 70 | + use crate::arrays::ExtensionArray; |
| 71 | + use crate::arrays::FixedSizeListArray; |
| 72 | + use crate::arrays::PrimitiveArray; |
| 73 | + use crate::arrow::ArrowArrayExecutor; |
| 74 | + use crate::dtype::Nullability; |
| 75 | + use crate::extension::uuid::Uuid; |
| 76 | + use crate::extension::uuid::vtable::UUID_BYTE_LEN; |
| 77 | + use crate::validity::Validity; |
| 78 | + |
| 79 | + #[expect( |
| 80 | + clippy::cast_possible_truncation, |
| 81 | + reason = "UUID_BYTE_LEN always fits u32/i32" |
| 82 | + )] |
| 83 | + #[test] |
| 84 | + fn test_uuid_to_fixed_size_binary() { |
| 85 | + let u1 = uuid::Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap(); |
| 86 | + let u2 = uuid::Uuid::parse_str("f47ac10b-58cc-4372-a567-0e02b2c3d479").unwrap(); |
| 87 | + |
| 88 | + let flat: Vec<u8> = [u1.as_bytes(), &[0u8; 16], u2.as_bytes()] |
| 89 | + .into_iter() |
| 90 | + .flatten() |
| 91 | + .copied() |
| 92 | + .collect(); |
| 93 | + let elements = PrimitiveArray::new(Buffer::from(flat), Validity::NonNullable).into_array(); |
| 94 | + let validity = Validity::from(BitBuffer::from_iter([true, false, true])); |
| 95 | + let fsl = FixedSizeListArray::try_new(elements, UUID_BYTE_LEN as u32, validity, 3) |
| 96 | + .unwrap() |
| 97 | + .into_array(); |
| 98 | + let uuid_array = ExtensionArray::new(Uuid::default(Nullability::Nullable).erased(), fsl); |
| 99 | + |
| 100 | + let mut ctx = LEGACY_SESSION.create_execution_ctx(); |
| 101 | + let arrow = uuid_array |
| 102 | + .into_array() |
| 103 | + .execute_arrow( |
| 104 | + Some(&DataType::FixedSizeBinary(UUID_BYTE_LEN as i32)), |
| 105 | + &mut ctx, |
| 106 | + ) |
| 107 | + .unwrap(); |
| 108 | + |
| 109 | + let expected = FixedSizeBinaryArray::try_from_sparse_iter_with_size( |
| 110 | + [Some(u1.as_bytes().as_slice()), None, Some(u2.as_bytes())].into_iter(), |
| 111 | + UUID_BYTE_LEN as i32, |
| 112 | + ) |
| 113 | + .unwrap(); |
| 114 | + assert_eq!(arrow.as_ref(), &expected as &dyn arrow_array::Array); |
| 115 | + } |
| 116 | +} |
0 commit comments