Skip to content

Commit 1c06acd

Browse files
committed
Better expr
Signed-off-by: Adam Gutglick <adam@spiraldb.com>
1 parent 1f724a7 commit 1c06acd

11 files changed

Lines changed: 909 additions & 363 deletions

File tree

encodings/parquet-variant/src/variant_get/mod.rs

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
88
use std::sync::Arc;
99

10-
use parquet_variant::VariantPathElement;
10+
use arrow_schema::Field;
11+
use arrow_schema::FieldRef;
12+
use parquet_variant::VariantPath;
13+
use parquet_variant::VariantPathElement as ArrowVariantPathElement;
1114
use parquet_variant_compute::GetOptions;
1215
use parquet_variant_compute::VariantArray as ArrowVariantArray;
1316
use vortex_array::ArrayRef;
@@ -19,10 +22,11 @@ use vortex_array::arrays::scalar_fn::ExactScalarFn;
1922
use vortex_array::arrays::scalar_fn::ScalarFnArrayView;
2023
use vortex_array::arrow::FromArrowArray;
2124
use vortex_array::dtype::DType;
22-
use vortex_array::dtype::FieldName;
2325
use vortex_array::dtype::Nullability;
2426
use vortex_array::kernel::ExecuteParentKernel;
2527
use vortex_array::scalar_fn::fns::variant_get::VariantGet;
28+
use vortex_array::scalar_fn::fns::variant_get::VariantGetOptions;
29+
use vortex_array::scalar_fn::fns::variant_get::VariantPathElement as VortexVariantPathElement;
2630
use vortex_array::validity::Validity;
2731
use vortex_buffer::BitBuffer;
2832
use vortex_error::VortexResult;
@@ -47,31 +51,48 @@ impl ExecuteParentKernel<ParquetVariant> for VariantGetExecuteParent {
4751
_child_idx: usize,
4852
ctx: &mut ExecutionCtx,
4953
) -> VortexResult<Option<ArrayRef>> {
50-
let field_name: &FieldName = parent.options;
51-
variant_get_impl(array, field_name, ctx).map(Some)
54+
variant_get_impl(array, parent.options, ctx).map(Some)
5255
}
5356
}
5457

5558
fn variant_get_impl(
5659
array: ArrayView<'_, ParquetVariant>,
57-
field_name: &FieldName,
60+
options: &VariantGetOptions,
5861
ctx: &mut ExecutionCtx,
5962
) -> VortexResult<ArrayRef> {
6063
// Convert to Arrow VariantArray
6164
let arrow_variant = array.to_arrow(ctx)?;
6265

63-
// Build path for a single field access
64-
let path_element = VariantPathElement::Field {
65-
name: field_name.as_ref().into(),
66-
};
67-
let options = GetOptions::new_with_path(vec![path_element].into());
66+
let path = options
67+
.path()
68+
.iter()
69+
.cloned()
70+
.map(|element| match element {
71+
VortexVariantPathElement::Field(name) => ArrowVariantPathElement::Field {
72+
name: name.to_string().into(),
73+
},
74+
VortexVariantPathElement::Index(index) => ArrowVariantPathElement::Index { index },
75+
})
76+
.collect::<Vec<_>>();
77+
let mut arrow_options = GetOptions::new_with_path(VariantPath::new(path));
78+
if let Some(as_dtype) = options.effective_as_dtype() {
79+
arrow_options = arrow_options.with_as_type(Some(FieldRef::new(Field::new(
80+
"result",
81+
as_dtype.to_arrow_dtype()?,
82+
as_dtype.is_nullable(),
83+
))));
84+
}
6885

6986
// Delegate to the parquet-variant-compute kernel.
7087
// With as_type = None, the result is itself a VariantArray.
7188
let inner: Arc<dyn arrow_array::Array> = Arc::new(arrow_variant.into_inner());
72-
let arrow_result = parquet_variant_compute::variant_get(&inner, options)
89+
let arrow_result = parquet_variant_compute::variant_get(&inner, arrow_options)
7390
.map_err(|e| vortex_err!("variant_get failed: {e}"))?;
7491

92+
if options.effective_as_dtype().is_some() {
93+
return ArrayRef::from_arrow(arrow_result.as_ref(), true);
94+
}
95+
7596
// Convert back to Vortex.
7697
let result_variant = ArrowVariantArray::try_new(
7798
arrow_result

encodings/parquet-variant/src/variant_get/tests.rs

Lines changed: 162 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,13 @@ use arrow_array::ArrayRef as ArrowArrayRef;
88
use arrow_array::StringArray;
99
use arrow_array::StructArray;
1010
use arrow_buffer::NullBuffer;
11+
use arrow_schema::DataType as ArrowDataType;
12+
use arrow_schema::Field as ArrowField;
13+
use arrow_schema::FieldRef;
1114
use parquet_variant::Variant as PqVariant;
1215
use parquet_variant::VariantBuilderExt;
1316
use parquet_variant::VariantPath;
17+
use parquet_variant::VariantPathElement;
1418
use parquet_variant_compute::GetOptions;
1519
use parquet_variant_compute::VariantArray as ArrowVariantArray;
1620
use parquet_variant_compute::VariantArrayBuilder;
@@ -20,17 +24,36 @@ use vortex_array::ArrayRef;
2024
use vortex_array::LEGACY_SESSION;
2125
use vortex_array::VortexSessionExecute;
2226
use vortex_array::arrays::variant::VariantArrayExt;
27+
use vortex_array::arrow::FromArrowArray;
28+
use vortex_array::assert_arrays_eq;
29+
use vortex_array::dtype::DType;
30+
use vortex_array::dtype::Nullability;
31+
use vortex_array::dtype::PType;
2332
use vortex_array::expr::root;
2433
use vortex_array::expr::variant_get;
34+
use vortex_array::expr::variant_get_as;
35+
use vortex_array::scalar_fn::fns::variant_get::VariantPath as VortexVariantPath;
2536
use vortex_error::VortexResult;
2637

2738
use crate::ParquetVariant;
2839
use crate::ParquetVariantArrayExt;
2940
use crate::ParquetVariantData;
3041

3142
/// Apply variant_get and execute through the full pipeline (including execute_parent).
32-
fn apply_variant_get(arr: &ArrayRef, field: &str) -> VortexResult<ArrayRef> {
33-
let expr = variant_get(field, root());
43+
fn apply_variant_get(arr: &ArrayRef, path: impl Into<VortexVariantPath>) -> VortexResult<ArrayRef> {
44+
let expr = variant_get(path, root());
45+
let array = arr.clone().apply(&expr)?;
46+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
47+
array.execute::<ArrayRef>(&mut ctx)
48+
}
49+
50+
/// Apply typed variant_get and execute through the full pipeline.
51+
fn apply_variant_get_as(
52+
arr: &ArrayRef,
53+
path: impl Into<VortexVariantPath>,
54+
as_dtype: DType,
55+
) -> VortexResult<ArrayRef> {
56+
let expr = variant_get_as(path, as_dtype, root());
3457
let array = arr.clone().apply(&expr)?;
3558
let mut ctx = LEGACY_SESSION.create_execution_ctx();
3659
array.execute::<ArrayRef>(&mut ctx)
@@ -47,7 +70,6 @@ fn vortex_to_arrow_variant(arr: &ArrayRef) -> ArrowVariantArray {
4770
/// Run variant_get through both Arrow and Vortex on the same input, and assert
4871
/// the per-row results (value + validity) are identical by comparing at the Arrow level.
4972
fn assert_matches_arrow(json_rows: &[&str], field: &str) {
50-
// --- Arrow side ---
5173
let arrow_strings: ArrowArrayRef = Arc::new(StringArray::from(
5274
json_rows.iter().map(|s| Some(*s)).collect::<Vec<_>>(),
5375
));
@@ -62,12 +84,10 @@ fn assert_matches_arrow(json_rows: &[&str], field: &str) {
6284
ArrowVariantArray::try_new(arrow_result.as_any().downcast_ref::<StructArray>().unwrap())
6385
.unwrap();
6486

65-
// --- Vortex side ---
6687
let vortex_input = ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap();
6788
let vortex_result = apply_variant_get(&vortex_input, field).unwrap();
6889
let vortex_as_arrow = vortex_to_arrow_variant(&vortex_result);
6990

70-
// --- Compare row-by-row at Arrow Variant level ---
7191
assert_eq!(
7292
vortex_as_arrow.len(),
7393
arrow_result_variant.len(),
@@ -94,10 +114,90 @@ fn assert_matches_arrow(json_rows: &[&str], field: &str) {
94114
}
95115
}
96116

117+
/// Run variant_get through both Arrow and Vortex for an explicit nested/index path,
118+
/// and assert the per-row results match at the Arrow variant level.
119+
fn assert_matches_arrow_with_path(
120+
json_rows: &[&str],
121+
path: VortexVariantPath,
122+
arrow_path: VariantPath<'static>,
123+
) {
124+
let arrow_strings: ArrowArrayRef = Arc::new(StringArray::from(
125+
json_rows.iter().map(|s| Some(*s)).collect::<Vec<_>>(),
126+
));
127+
let arrow_variant = json_to_variant(&arrow_strings).unwrap();
128+
let arrow_result = parquet_variant_compute::variant_get(
129+
&arrow_variant.clone().into(),
130+
GetOptions::new_with_path(arrow_path),
131+
)
132+
.unwrap();
133+
let arrow_result_variant =
134+
ArrowVariantArray::try_new(arrow_result.as_any().downcast_ref::<StructArray>().unwrap())
135+
.unwrap();
136+
137+
let vortex_input = ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap();
138+
let vortex_result = apply_variant_get(&vortex_input, path).unwrap();
139+
let vortex_as_arrow = vortex_to_arrow_variant(&vortex_result);
140+
141+
assert_eq!(
142+
vortex_as_arrow.len(),
143+
arrow_result_variant.len(),
144+
"length mismatch"
145+
);
146+
147+
for i in 0..arrow_result_variant.len() {
148+
let arrow_is_null = arrow_result_variant.is_null(i);
149+
let vortex_is_null = vortex_as_arrow.is_null(i);
150+
151+
assert_eq!(
152+
vortex_is_null, arrow_is_null,
153+
"row {i}: null mismatch (vortex={vortex_is_null}, arrow={arrow_is_null})"
154+
);
155+
156+
if !arrow_is_null {
157+
let arrow_value = arrow_result_variant.value(i);
158+
let vortex_value = vortex_as_arrow.value(i);
159+
assert_eq!(
160+
vortex_value, arrow_value,
161+
"row {i}: value mismatch\n vortex: {vortex_value:?}\n arrow: {arrow_value:?}"
162+
);
163+
}
164+
}
165+
}
166+
167+
/// Run typed variant_get through both Arrow and Vortex for an explicit path,
168+
/// and assert the typed nullable result matches.
169+
fn assert_typed_matches_arrow_with_path(
170+
json_rows: &[&str],
171+
path: VortexVariantPath,
172+
arrow_path: VariantPath<'static>,
173+
as_dtype: DType,
174+
arrow_dtype: ArrowDataType,
175+
) -> VortexResult<()> {
176+
let arrow_strings: ArrowArrayRef = Arc::new(StringArray::from(
177+
json_rows.iter().map(|s| Some(*s)).collect::<Vec<_>>(),
178+
));
179+
let arrow_variant = json_to_variant(&arrow_strings).unwrap();
180+
let arrow_result =
181+
parquet_variant_compute::variant_get(
182+
&arrow_variant.clone().into(),
183+
GetOptions::new_with_path(arrow_path).with_as_type(Some(FieldRef::new(
184+
ArrowField::new("result", arrow_dtype, true),
185+
))),
186+
)
187+
.unwrap();
188+
let expected = ArrayRef::from_arrow(arrow_result.as_ref(), true)?;
189+
190+
let vortex_input = ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap();
191+
let vortex_result = apply_variant_get_as(&vortex_input, path, as_dtype.clone())?;
192+
193+
assert_eq!(vortex_result.dtype(), &as_dtype.as_nullable());
194+
assert_arrays_eq!(vortex_result, expected);
195+
Ok(())
196+
}
197+
97198
/// Run variant_get through both Arrow and Vortex on nullable input (with NullBuffer),
98199
/// and assert the results match.
99200
fn assert_matches_arrow_nullable(json_rows: &[&str], validity: &[bool], field: &str) {
100-
// --- Arrow side ---
101201
let arrow_strings: ArrowArrayRef = Arc::new(StringArray::from(
102202
json_rows.iter().map(|s| Some(*s)).collect::<Vec<_>>(),
103203
));
@@ -121,12 +221,10 @@ fn assert_matches_arrow_nullable(json_rows: &[&str], validity: &[bool], field: &
121221
ArrowVariantArray::try_new(arrow_result.as_any().downcast_ref::<StructArray>().unwrap())
122222
.unwrap();
123223

124-
// --- Vortex side ---
125224
let vortex_input = ParquetVariantData::from_arrow_variant(&arrow_variant).unwrap();
126225
let vortex_result = apply_variant_get(&vortex_input, field).unwrap();
127226
let vortex_as_arrow = vortex_to_arrow_variant(&vortex_result);
128227

129-
// --- Compare ---
130228
assert_eq!(
131229
vortex_as_arrow.len(),
132230
arrow_result_variant.len(),
@@ -221,6 +319,60 @@ fn test_variant_get_matches_arrow_nested_object_result() {
221319
);
222320
}
223321

322+
#[test]
323+
fn test_variant_get_matches_arrow_nested_path() {
324+
let path = VortexVariantPath::from_name("outer").join("inner");
325+
let arrow_path = VariantPath::from_iter([
326+
VariantPathElement::field("outer".to_string()),
327+
VariantPathElement::field("inner".to_string()),
328+
]);
329+
assert_matches_arrow_with_path(
330+
&[
331+
r#"{"outer": {"inner": 42}}"#,
332+
r#"{"outer": {"inner": "x"}}"#,
333+
r#"{"outer": {"other": true}}"#,
334+
],
335+
path,
336+
arrow_path,
337+
);
338+
}
339+
340+
#[test]
341+
fn test_variant_get_matches_arrow_index_path() {
342+
let path = VortexVariantPath::from_name("arr").join(1usize);
343+
let arrow_path = VariantPath::from_iter([
344+
VariantPathElement::field("arr".to_string()),
345+
VariantPathElement::index(1),
346+
]);
347+
assert_matches_arrow_with_path(
348+
&[
349+
r#"{"arr": [1, 2, 3]}"#,
350+
r#"{"arr": ["a", "b"]}"#,
351+
r#"{"arr": [true]}"#,
352+
],
353+
path,
354+
arrow_path,
355+
);
356+
}
357+
358+
#[test]
359+
fn test_variant_get_matches_arrow_typed_path() -> VortexResult<()> {
360+
assert_typed_matches_arrow_with_path(
361+
&[
362+
r#"{"outer": {"inner": 42}}"#,
363+
r#"{"outer": {"inner": "x"}}"#,
364+
r#"{"outer": {"other": true}}"#,
365+
],
366+
VortexVariantPath::from_name("outer").join("inner"),
367+
VariantPath::from_iter([
368+
VariantPathElement::field("outer".to_string()),
369+
VariantPathElement::field("inner".to_string()),
370+
]),
371+
DType::Primitive(PType::I64, Nullability::NonNullable),
372+
ArrowDataType::Int64,
373+
)
374+
}
375+
224376
// ---------------------------------------------------------------------------
225377
// Original standalone tests
226378
// ---------------------------------------------------------------------------
@@ -310,6 +462,7 @@ fn test_variant_get_different_field() -> VortexResult<()> {
310462
// Test data helpers
311463
// ---------------------------------------------------------------------------
312464

465+
/// Build a small non-null object variant array used by the standalone tests.
313466
fn make_object_array() -> VortexResult<ArrayRef> {
314467
let mut builder = VariantArrayBuilder::new(3);
315468

@@ -330,6 +483,7 @@ fn make_object_array() -> VortexResult<ArrayRef> {
330483
ParquetVariantData::from_arrow_variant(&builder.build())
331484
}
332485

486+
/// Build the same object array shape with an explicit top-level validity bitmap.
333487
fn make_nullable_object_array() -> VortexResult<ArrayRef> {
334488
let mut builder = VariantArrayBuilder::new(3);
335489

0 commit comments

Comments
 (0)