Skip to content

Commit abaede8

Browse files
committed
more
Signed-off-by: Adam Gutglick <adam@spiraldb.com>
1 parent 1c06acd commit abaede8

10 files changed

Lines changed: 298 additions & 77 deletions

File tree

encodings/parquet-variant/src/variant_get/mod.rs

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,20 @@ fn variant_get_impl(
101101
.ok_or_else(|| vortex_err!("variant_get did not return a StructArray"))?,
102102
)
103103
.map_err(|e| vortex_err!("failed to create VariantArray from result: {e}"))?;
104+
let value_nullable = result_variant
105+
.inner()
106+
.fields()
107+
.iter()
108+
.find(|field| field.name() == "value")
109+
.map(|field| field.is_nullable())
110+
.unwrap_or(false);
111+
let typed_value_nullable = result_variant
112+
.inner()
113+
.fields()
114+
.iter()
115+
.find(|field| field.name() == "typed_value")
116+
.map(|field| field.is_nullable())
117+
.unwrap_or(false);
104118

105119
// Ensure the result is always nullable (matching variant_get's return_dtype).
106120
// Arrow may return a non-nullable result when no nulls are present.
@@ -121,11 +135,11 @@ fn variant_get_impl(
121135
)?;
122136
let value = result_variant
123137
.value_field()
124-
.map(|v| ArrayRef::from_arrow(v as &dyn arrow_array::Array, true))
138+
.map(|v| ArrayRef::from_arrow(v as &dyn arrow_array::Array, value_nullable))
125139
.transpose()?;
126140
let typed_value = result_variant
127141
.typed_value_field()
128-
.map(|tv| ArrayRef::from_arrow(tv.as_ref(), true))
142+
.map(|tv| ArrayRef::from_arrow(tv.as_ref(), typed_value_nullable))
129143
.transpose()?;
130144

131145
let pv = ParquetVariant::try_new(validity, metadata, value, typed_value)?;

encodings/parquet-variant/src/variant_get/tests.rs

Lines changed: 108 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use parquet_variant_compute::GetOptions;
1919
use parquet_variant_compute::VariantArray as ArrowVariantArray;
2020
use parquet_variant_compute::VariantArrayBuilder;
2121
use parquet_variant_compute::json_to_variant;
22+
use rstest::fixture;
2223
use rstest::rstest;
2324
use vortex_array::ArrayRef;
2425
use vortex_array::LEGACY_SESSION;
@@ -34,29 +35,29 @@ use vortex_array::expr::variant_get;
3435
use vortex_array::expr::variant_get_as;
3536
use vortex_array::scalar_fn::fns::variant_get::VariantPath as VortexVariantPath;
3637
use vortex_error::VortexResult;
38+
use vortex_mask::Mask;
3739

3840
use crate::ParquetVariant;
3941
use crate::ParquetVariantArrayExt;
4042
use crate::ParquetVariantData;
4143

42-
/// Apply variant_get and execute through the full pipeline (including execute_parent).
43-
fn apply_variant_get(arr: &ArrayRef, path: impl Into<VortexVariantPath>) -> VortexResult<ArrayRef> {
44-
let expr = variant_get(path, root());
45-
let array = arr.clone().apply(&expr)?;
46-
let mut ctx = LEGACY_SESSION.create_execution_ctx();
47-
array.execute::<ArrayRef>(&mut ctx)
48-
}
49-
50-
/// Apply typed variant_get and execute through the full pipeline.
51-
fn apply_variant_get_as(
52-
arr: &ArrayRef,
53-
path: impl Into<VortexVariantPath>,
54-
as_dtype: DType,
55-
) -> VortexResult<ArrayRef> {
56-
let expr = variant_get_as(path, as_dtype, root());
57-
let array = arr.clone().apply(&expr)?;
58-
let mut ctx = LEGACY_SESSION.create_execution_ctx();
59-
array.execute::<ArrayRef>(&mut ctx)
44+
macro_rules! apply_variant_get {
45+
($arr:expr, $path:expr) => {{
46+
(|| -> VortexResult<ArrayRef> {
47+
let expr = variant_get($path, root());
48+
let array = $arr.clone().apply(&expr)?;
49+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
50+
array.execute::<ArrayRef>(&mut ctx)
51+
})()
52+
}};
53+
($arr:expr, $path:expr, $as_dtype:expr) => {{
54+
(|| -> VortexResult<ArrayRef> {
55+
let expr = variant_get_as($path, $as_dtype, root());
56+
let array = $arr.clone().apply(&expr)?;
57+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
58+
array.execute::<ArrayRef>(&mut ctx)
59+
})()
60+
}};
6061
}
6162

6263
/// Convert a Vortex result back to an Arrow VariantArray for comparison.
@@ -67,6 +68,31 @@ fn vortex_to_arrow_variant(arr: &ArrayRef) -> ArrowVariantArray {
6768
pv.to_arrow(&mut ctx).unwrap()
6869
}
6970

71+
fn assert_variant_storage_matches(expected: &ArrowVariantArray, actual: &ArrowVariantArray) {
72+
assert_eq!(actual.len(), expected.len(), "length mismatch");
73+
assert_eq!(
74+
actual.inner().column_names(),
75+
expected.inner().column_names(),
76+
"column mismatch"
77+
);
78+
assert_eq!(actual.inner().nulls(), expected.inner().nulls());
79+
assert_eq!(
80+
actual.inner().fields().len(),
81+
expected.inner().fields().len()
82+
);
83+
84+
for (expected, actual) in expected
85+
.inner()
86+
.fields()
87+
.iter()
88+
.zip(actual.inner().fields().iter())
89+
{
90+
assert_eq!(actual.name(), expected.name());
91+
assert_eq!(actual.data_type(), expected.data_type());
92+
assert_eq!(actual.is_nullable(), expected.is_nullable());
93+
}
94+
}
95+
7096
/// Run variant_get through both Arrow and Vortex on the same input, and assert
7197
/// the per-row results (value + validity) are identical by comparing at the Arrow level.
7298
fn assert_matches_arrow(json_rows: &[&str], field: &str) {
@@ -85,14 +111,10 @@ fn assert_matches_arrow(json_rows: &[&str], field: &str) {
85111
.unwrap();
86112

87113
let vortex_input = ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap();
88-
let vortex_result = apply_variant_get(&vortex_input, field).unwrap();
114+
let vortex_result = apply_variant_get!(&vortex_input, field).unwrap();
89115
let vortex_as_arrow = vortex_to_arrow_variant(&vortex_result);
90116

91-
assert_eq!(
92-
vortex_as_arrow.len(),
93-
arrow_result_variant.len(),
94-
"length mismatch"
95-
);
117+
assert_variant_storage_matches(&arrow_result_variant, &vortex_as_arrow);
96118

97119
for i in 0..arrow_result_variant.len() {
98120
let arrow_is_null = arrow_result_variant.is_null(i);
@@ -135,14 +157,10 @@ fn assert_matches_arrow_with_path(
135157
.unwrap();
136158

137159
let vortex_input = ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap();
138-
let vortex_result = apply_variant_get(&vortex_input, path).unwrap();
160+
let vortex_result = apply_variant_get!(&vortex_input, path).unwrap();
139161
let vortex_as_arrow = vortex_to_arrow_variant(&vortex_result);
140162

141-
assert_eq!(
142-
vortex_as_arrow.len(),
143-
arrow_result_variant.len(),
144-
"length mismatch"
145-
);
163+
assert_variant_storage_matches(&arrow_result_variant, &vortex_as_arrow);
146164

147165
for i in 0..arrow_result_variant.len() {
148166
let arrow_is_null = arrow_result_variant.is_null(i);
@@ -188,7 +206,7 @@ fn assert_typed_matches_arrow_with_path(
188206
let expected = ArrayRef::from_arrow(arrow_result.as_ref(), true)?;
189207

190208
let vortex_input = ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap();
191-
let vortex_result = apply_variant_get_as(&vortex_input, path, as_dtype.clone())?;
209+
let vortex_result = apply_variant_get!(&vortex_input, path, as_dtype.clone())?;
192210

193211
assert_eq!(vortex_result.dtype(), &as_dtype.as_nullable());
194212
assert_arrays_eq!(vortex_result, expected);
@@ -222,14 +240,10 @@ fn assert_matches_arrow_nullable(json_rows: &[&str], validity: &[bool], field: &
222240
.unwrap();
223241

224242
let vortex_input = ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap();
225-
let vortex_result = apply_variant_get(&vortex_input, field).unwrap();
243+
let vortex_result = apply_variant_get!(&vortex_input, field).unwrap();
226244
let vortex_as_arrow = vortex_to_arrow_variant(&vortex_result);
227245

228-
assert_eq!(
229-
vortex_as_arrow.len(),
230-
arrow_result_variant.len(),
231-
"length mismatch"
232-
);
246+
assert_variant_storage_matches(&arrow_result_variant, &vortex_as_arrow);
233247

234248
for i in 0..arrow_result_variant.len() {
235249
let arrow_is_null = arrow_result_variant.is_null(i);
@@ -373,14 +387,9 @@ fn test_variant_get_matches_arrow_typed_path() -> VortexResult<()> {
373387
)
374388
}
375389

376-
// ---------------------------------------------------------------------------
377-
// Original standalone tests
378-
// ---------------------------------------------------------------------------
379-
380-
#[test]
381-
fn test_variant_get_basic() -> VortexResult<()> {
382-
let arr = make_object_array()?;
383-
let result = apply_variant_get(&arr, "a")?;
390+
#[rstest]
391+
fn test_variant_get_basic(object_array: ArrayRef) -> VortexResult<()> {
392+
let result = apply_variant_get!(&object_array, "a")?;
384393

385394
assert_eq!(result.len(), 3);
386395

@@ -403,10 +412,9 @@ fn test_variant_get_basic() -> VortexResult<()> {
403412
Ok(())
404413
}
405414

406-
#[test]
407-
fn test_variant_get_missing_field() -> VortexResult<()> {
408-
let arr = make_object_array()?;
409-
let result = apply_variant_get(&arr, "nonexistent")?;
415+
#[rstest]
416+
fn test_variant_get_missing_field(object_array: ArrayRef) -> VortexResult<()> {
417+
let result = apply_variant_get!(&object_array, "nonexistent")?;
410418

411419
assert_eq!(result.len(), 3);
412420
for i in 0..3 {
@@ -416,10 +424,9 @@ fn test_variant_get_missing_field() -> VortexResult<()> {
416424
Ok(())
417425
}
418426

419-
#[test]
420-
fn test_variant_get_null_input() -> VortexResult<()> {
421-
let arr = make_nullable_object_array()?;
422-
let result = apply_variant_get(&arr, "a")?;
427+
#[rstest]
428+
fn test_variant_get_null_input(nullable_object_array: ArrayRef) -> VortexResult<()> {
429+
let result = apply_variant_get!(&nullable_object_array, "a")?;
423430

424431
assert_eq!(result.len(), 3);
425432
assert!(!result.scalar_at(0)?.is_null());
@@ -436,7 +443,7 @@ fn test_variant_get_non_object() -> VortexResult<()> {
436443
builder.append_variant(PqVariant::from("hello"));
437444
let arr = ParquetVariantData::from_arrow_variant(&builder.build())?;
438445

439-
let result = apply_variant_get(&arr, "a")?;
446+
let result = apply_variant_get!(&arr, "a")?;
440447

441448
assert_eq!(result.len(), 2);
442449
assert!(result.scalar_at(0)?.is_null());
@@ -445,10 +452,9 @@ fn test_variant_get_non_object() -> VortexResult<()> {
445452
Ok(())
446453
}
447454

448-
#[test]
449-
fn test_variant_get_different_field() -> VortexResult<()> {
450-
let arr = make_object_array()?;
451-
let result = apply_variant_get(&arr, "b")?;
455+
#[rstest]
456+
fn test_variant_get_different_field(object_array: ArrayRef) -> VortexResult<()> {
457+
let result = apply_variant_get!(&object_array, "b")?;
452458

453459
assert_eq!(result.len(), 3);
454460
assert!(!result.scalar_at(0)?.is_null());
@@ -458,12 +464,49 @@ fn test_variant_get_different_field() -> VortexResult<()> {
458464
Ok(())
459465
}
460466

467+
#[rstest]
468+
fn test_variant_get_through_slice_wrapper(object_array: ArrayRef) -> VortexResult<()> {
469+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
470+
471+
let expr = variant_get("a", root());
472+
let actual = object_array
473+
.slice(1..3)?
474+
.apply(&expr)?
475+
.execute::<ArrayRef>(&mut ctx)?;
476+
477+
let expected = apply_variant_get!(&object_array, "a")?;
478+
479+
assert_eq!(actual.len(), 2);
480+
assert_eq!(actual.scalar_at(0)?, expected.scalar_at(1)?);
481+
assert_eq!(actual.scalar_at(1)?, expected.scalar_at(2)?);
482+
Ok(())
483+
}
484+
485+
#[rstest]
486+
fn test_variant_get_through_filter_wrapper(object_array: ArrayRef) -> VortexResult<()> {
487+
let mask = Mask::from_iter([true, false, true]);
488+
489+
let expr = variant_get("a", root());
490+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
491+
492+
let array = object_array.filter(mask.clone())?.apply(&expr)?;
493+
let actual = array.execute::<ArrayRef>(&mut ctx)?;
494+
let expected = apply_variant_get!(&object_array, "a")?;
495+
496+
assert_eq!(mask.true_count(), 2);
497+
assert_eq!(actual.len(), 2);
498+
assert_eq!(actual.scalar_at(0)?, expected.scalar_at(0)?);
499+
assert_eq!(actual.scalar_at(1)?, expected.scalar_at(2)?);
500+
Ok(())
501+
}
502+
461503
// ---------------------------------------------------------------------------
462504
// Test data helpers
463505
// ---------------------------------------------------------------------------
464506

465-
/// Build a small non-null object variant array used by the standalone tests.
466-
fn make_object_array() -> VortexResult<ArrayRef> {
507+
/// Small non-null object variant array used by the standalone tests.
508+
#[fixture]
509+
fn object_array() -> ArrayRef {
467510
let mut builder = VariantArrayBuilder::new(3);
468511

469512
builder
@@ -480,11 +523,12 @@ fn make_object_array() -> VortexResult<ArrayRef> {
480523

481524
builder.new_object().with_field("b", "y").finish();
482525

483-
ParquetVariantData::from_arrow_variant(&builder.build())
526+
ParquetVariantData::from_arrow_variant(&builder.build()).unwrap()
484527
}
485528

486-
/// Build the same object array shape with an explicit top-level validity bitmap.
487-
fn make_nullable_object_array() -> VortexResult<ArrayRef> {
529+
/// The same object array shape with an explicit top-level validity bitmap.
530+
#[fixture]
531+
fn nullable_object_array() -> ArrayRef {
488532
let mut builder = VariantArrayBuilder::new(3);
489533

490534
builder.new_object().with_field("a", 10i32).finish();
@@ -499,5 +543,5 @@ fn make_nullable_object_array() -> VortexResult<ArrayRef> {
499543
)
500544
.unwrap();
501545
let arrow_variant = ArrowVariantArray::try_new(&null_struct).unwrap();
502-
ParquetVariantData::from_arrow_variant(&arrow_variant)
546+
ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap()
503547
}

vortex-array/public-api.lock

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17270,7 +17270,7 @@ pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::coerce_args(&self,
1727017270

1727117271
pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::deserialize(&self, metadata: &[u8], session: &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
1727217272

17273-
pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::execute(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, _args: &dyn vortex_array::scalar_fn::ExecutionArgs, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::ArrayRef>
17273+
pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::execute(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, args: &dyn vortex_array::scalar_fn::ExecutionArgs, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::ArrayRef>
1727417274

1727517275
pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::fmt_sql(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, expr: &vortex_array::expr::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
1727617276

@@ -18620,7 +18620,7 @@ pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::coerce_args(&self,
1862018620

1862118621
pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::deserialize(&self, metadata: &[u8], session: &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Options>
1862218622

18623-
pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::execute(&self, _options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, _args: &dyn vortex_array::scalar_fn::ExecutionArgs, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::ArrayRef>
18623+
pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::execute(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, args: &dyn vortex_array::scalar_fn::ExecutionArgs, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<vortex_array::ArrayRef>
1862418624

1862518625
pub fn vortex_array::scalar_fn::fns::variant_get::VariantGet::fmt_sql(&self, options: &vortex_array::scalar_fn::fns::variant_get::VariantGetOptions, expr: &vortex_array::expr::Expression, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
1862618626

vortex-array/src/arrays/filter/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ pub use vtable::FilterArray;
1010
mod execute;
1111

1212
mod kernel;
13+
mod parent_kernels;
1314
pub use kernel::FilterExecuteAdaptor;
1415
pub use kernel::FilterKernel;
1516
pub use kernel::FilterReduce;

0 commit comments

Comments
 (0)