Skip to content
Closed

🥸 #7932

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions encodings/parquet-variant/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use vortex_array::ExecutionCtx;
use vortex_array::IntoArray;
use vortex_array::TypedArrayRef;
use vortex_array::arrays::VariantArray;
#[expect(deprecated, reason = "TODO(aduffy): convert this to ArrowExportVTable")]
use vortex_array::arrow::ArrowArrayExecutor;
use vortex_array::arrow::FromArrowArray;
use vortex_array::arrow::to_arrow_null_buffer;
Expand Down Expand Up @@ -235,6 +236,7 @@ pub trait ParquetVariantArrayExt: TypedArrayRef<ParquetVariant> {
self.as_ref().slots()[TYPED_VALUE_SLOT].as_ref()
}

#[expect(deprecated, reason = "TODO(aduffy): convert this to ArrowExportVTable")]
fn to_arrow(&self, ctx: &mut ExecutionCtx) -> VortexResult<ArrowVariantArray> {
let metadata = self.metadata_array();
let len = metadata.len();
Expand Down
13 changes: 9 additions & 4 deletions encodings/pco/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use vortex_array::LEGACY_SESSION;
use vortex_array::VortexSessionExecute;
use vortex_array::arrays::BoolArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrow::ArrowArrayExecutor;
use vortex_array::arrow::ArrowSessionExt;
use vortex_array::assert_arrays_eq;
use vortex_array::assert_nth_scalar;
use vortex_array::dtype::DType;
Expand Down Expand Up @@ -213,9 +213,14 @@ fn test_serde() -> VortexResult<()> {
&ReadContext::new(context.to_ids()),
&SESSION,
)?;
let data_type = data.dtype().to_arrow_dtype()?;
let pco_arrow = pco.execute_arrow(Some(&data_type), &mut ctx)?;
let decoded_arrow = decoded.execute_arrow(Some(&data_type), &mut ctx)?;
let data_type = LEGACY_SESSION.arrow().to_arrow_field("", data.dtype())?;
let pco_arrow = LEGACY_SESSION
.arrow()
.execute_arrow(pco, Some(&data_type), &mut ctx)?;
let decoded_arrow =
LEGACY_SESSION
.arrow()
.execute_arrow(decoded, Some(&data_type), &mut ctx)?;
assert!(pco_arrow == decoded_arrow);
Ok(())
}
7 changes: 5 additions & 2 deletions encodings/runend/src/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ mod tests {
use vortex_array::VortexSessionExecute as _;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::primitive::PrimitiveArrayExt;
use vortex_array::arrow::ArrowArrayExecutor;
use vortex_array::arrow::ArrowSessionExt;
use vortex_array::arrow::FromArrowArray;
use vortex_array::assert_arrays_eq;
use vortex_array::dtype::DType;
Expand Down Expand Up @@ -301,7 +301,10 @@ mod tests {
}

fn execute(array: ArrayRef, dt: &DataType) -> VortexResult<arrow_array::ArrayRef> {
array.execute_arrow(Some(dt), &mut SESSION.create_execution_ctx())
let field = Field::new("", dt.clone(), true);
SESSION
.arrow()
.execute_arrow(array, Some(&field), &mut SESSION.create_execution_ctx())
}

#[test]
Expand Down
57 changes: 37 additions & 20 deletions encodings/sparse/src/canonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ mod test {
use vortex_array::arrays::VarBinArray;
use vortex_array::arrays::VarBinViewArray;
use vortex_array::arrays::listview::ListViewArrayExt;
use vortex_array::arrow::ArrowArrayExecutor;
use vortex_array::arrow::ArrowSessionExt;
use vortex_array::assert_arrays_eq;
use vortex_array::dtype::DType;
use vortex_array::dtype::DecimalDType;
Expand Down Expand Up @@ -845,23 +845,33 @@ mod test {
let fill_scalar = Scalar::decimal(DecimalValue::I32(123), decimal_dtype, Nullable);
let sparse_struct = Sparse::try_new(indices, patch_values, len, fill_scalar).unwrap();

let expected = DecimalArray::new(
buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
decimal_dtype,
// NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
)
.into_array()
.execute_arrow(None, &mut ctx)
.unwrap();
let expected = LEGACY_SESSION
.arrow()
.execute_arrow(
DecimalArray::new(
buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
decimal_dtype,
// NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
)
.into_array(),
None,
&mut ctx,
)
.unwrap();

let actual = sparse_struct
.as_array()
.clone()
.execute::<DecimalArray>(&mut ctx)
.unwrap()
.into_array()
.execute_arrow(None, &mut ctx)
let actual = LEGACY_SESSION
.arrow()
.execute_arrow(
sparse_struct
.as_array()
.clone()
.execute::<DecimalArray>(&mut ctx)
.unwrap()
.into_array(),
None,
&mut ctx,
)
.unwrap();

assert_eq!(expected.data_type(), actual.data_type());
Expand Down Expand Up @@ -1544,9 +1554,16 @@ mod test {
assert_arrays_eq!(&actual, &expected);

// Note that the preferred arrow list representation is `List` (not `ListView`).
let arrow_dtype = expected.dtype().to_arrow_dtype()?;
let actual = actual.execute_arrow(Some(&arrow_dtype), &mut ctx)?;
let expected = expected.execute_arrow(Some(&arrow_dtype), &mut ctx)?;
let arrow_dtype = LEGACY_SESSION
.arrow()
.to_arrow_field("", expected.dtype())?;
let actual = LEGACY_SESSION
.arrow()
.execute_arrow(actual, Some(&arrow_dtype), &mut ctx)?;
let expected =
LEGACY_SESSION
.arrow()
.execute_arrow(expected, Some(&arrow_dtype), &mut ctx)?;

assert_eq!(actual.data_type(), expected.data_type());
Ok(())
Expand Down
6 changes: 5 additions & 1 deletion vortex-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ arrow-buffer = { workspace = true }
arrow-cast = { workspace = true }
arrow-data = { workspace = true }
arrow-ord = { workspace = true }
arrow-schema = { workspace = true }
arrow-schema = { workspace = true, features = ["canonical_extension_types"] }
arrow-select = { workspace = true }
arrow-string = { workspace = true }
async-lock = { workspace = true }
Expand Down Expand Up @@ -201,3 +201,7 @@ harness = false
[[bench]]
name = "slice_dict_primitive"
harness = false

[[bench]]
name = "to_arrow"
harness = false
130 changes: 130 additions & 0 deletions vortex-array/benches/to_arrow.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#![expect(clippy::unwrap_used)]

use std::sync::Arc;

use divan::Bencher;
use vortex_array::ArrayRef;
use vortex_array::IntoArray;
use vortex_array::LEGACY_SESSION;
use vortex_array::VortexSessionExecute;
use vortex_array::arrays::DecimalArray;
use vortex_array::arrays::ListArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::StructArray;
#[expect(
deprecated,
reason = "benchmark comparing deprecated method with new one"
)]
use vortex_array::arrow::ArrowArrayExecutor;
use vortex_array::arrow::ArrowSessionExt;
use vortex_array::dtype::DType;
use vortex_array::dtype::DecimalDType;
use vortex_array::dtype::Nullability;
use vortex_array::dtype::PType;
use vortex_array::dtype::StructFields;

fn main() {
divan::main();
}

fn schema() -> DType {
let fields = StructFields::from_iter([
(
"primitive",
DType::Primitive(PType::F32, Nullability::Nullable),
),
(
"list",
DType::List(
Arc::new(DType::Binary(Nullability::NonNullable)),
Nullability::Nullable,
),
),
(
"decimal",
DType::Decimal(DecimalDType::new(19, 10), Nullability::Nullable),
),
]);
DType::Struct(fields, Nullability::NonNullable)
}

fn array() -> ArrayRef {
StructArray::from_fields(&[
(
"primitive",
PrimitiveArray::from_iter(0i16..1024).into_array(),
),
(
"list",
ListArray::from_iter_slow::<u32, _>(
(0..1024).map(|_| vec!["a", "b", "c"]).collect::<Vec<_>>(),
Arc::new(DType::Utf8(Nullability::NonNullable)),
)
.unwrap()
.into_array(),
),
(
"decimal",
DecimalArray::from_iter(0i64..1024, DecimalDType::new(19, 2)).into_array(),
),
])
.unwrap()
.into_array()
}

#[divan::bench]
fn to_arrow_dtype(bencher: Bencher) {
bencher.with_inputs(schema).bench_values(|dtype| {
#[expect(deprecated, reason = "benchmarking deprecated code path")]
dtype.to_arrow_dtype().unwrap()
});
}

#[allow(non_snake_case)]
#[divan::bench]
fn ArrowExportVTable_to_arrow_field(bencher: Bencher) {
// Warm the ArrowSession
drop(
LEGACY_SESSION
.arrow()
.to_arrow_field("", &schema())
.unwrap(),
);

bencher
.with_inputs(schema)
.bench_values(|dtype| LEGACY_SESSION.arrow().to_arrow_field("", &dtype).unwrap())
}

#[divan::bench]
fn to_arrow_array(bencher: Bencher) {
bencher
.with_inputs(|| (array(), LEGACY_SESSION.create_execution_ctx()))
.bench_values(|(array, mut ctx)| {
#[expect(deprecated, reason = "benchmarking deprecated code path")]
array.execute_arrow(None, &mut ctx).unwrap()
});
}

#[allow(non_snake_case)]
#[divan::bench]
fn ArrowExportVTable_execute_arrow(bencher: Bencher) {
// Warm the ArrowSession
drop(LEGACY_SESSION.arrow().execute_arrow(
array(),
None,
&mut LEGACY_SESSION.create_execution_ctx(),
));

bencher
.with_inputs(|| (array(), LEGACY_SESSION.create_execution_ctx()))
.bench_values(|(array, mut ctx)| {
LEGACY_SESSION
.arrow()
.execute_arrow(array, None, &mut ctx)
.unwrap()
})
}
Loading
Loading