@@ -8,9 +8,13 @@ use arrow_array::ArrayRef as ArrowArrayRef;
88use arrow_array:: StringArray ;
99use arrow_array:: StructArray ;
1010use arrow_buffer:: NullBuffer ;
11+ use arrow_schema:: DataType as ArrowDataType ;
12+ use arrow_schema:: Field as ArrowField ;
13+ use arrow_schema:: FieldRef ;
1114use parquet_variant:: Variant as PqVariant ;
1215use parquet_variant:: VariantBuilderExt ;
1316use parquet_variant:: VariantPath ;
17+ use parquet_variant:: VariantPathElement ;
1418use parquet_variant_compute:: GetOptions ;
1519use parquet_variant_compute:: VariantArray as ArrowVariantArray ;
1620use parquet_variant_compute:: VariantArrayBuilder ;
@@ -20,17 +24,36 @@ use vortex_array::ArrayRef;
2024use vortex_array:: LEGACY_SESSION ;
2125use vortex_array:: VortexSessionExecute ;
2226use vortex_array:: arrays:: variant:: VariantArrayExt ;
27+ use vortex_array:: arrow:: FromArrowArray ;
28+ use vortex_array:: assert_arrays_eq;
29+ use vortex_array:: dtype:: DType ;
30+ use vortex_array:: dtype:: Nullability ;
31+ use vortex_array:: dtype:: PType ;
2332use vortex_array:: expr:: root;
2433use vortex_array:: expr:: variant_get;
34+ use vortex_array:: expr:: variant_get_as;
35+ use vortex_array:: scalar_fn:: fns:: variant_get:: VariantPath as VortexVariantPath ;
2536use vortex_error:: VortexResult ;
2637
2738use crate :: ParquetVariant ;
2839use crate :: ParquetVariantArrayExt ;
2940use crate :: ParquetVariantData ;
3041
3142/// Apply variant_get and execute through the full pipeline (including execute_parent).
32- fn apply_variant_get ( arr : & ArrayRef , field : & str ) -> VortexResult < ArrayRef > {
33- let expr = variant_get ( field, root ( ) ) ;
43+ fn apply_variant_get ( arr : & ArrayRef , path : impl Into < VortexVariantPath > ) -> VortexResult < ArrayRef > {
44+ let expr = variant_get ( path, root ( ) ) ;
45+ let array = arr. clone ( ) . apply ( & expr) ?;
46+ let mut ctx = LEGACY_SESSION . create_execution_ctx ( ) ;
47+ array. execute :: < ArrayRef > ( & mut ctx)
48+ }
49+
50+ /// Apply typed variant_get and execute through the full pipeline.
51+ fn apply_variant_get_as (
52+ arr : & ArrayRef ,
53+ path : impl Into < VortexVariantPath > ,
54+ as_dtype : DType ,
55+ ) -> VortexResult < ArrayRef > {
56+ let expr = variant_get_as ( path, as_dtype, root ( ) ) ;
3457 let array = arr. clone ( ) . apply ( & expr) ?;
3558 let mut ctx = LEGACY_SESSION . create_execution_ctx ( ) ;
3659 array. execute :: < ArrayRef > ( & mut ctx)
@@ -47,7 +70,6 @@ fn vortex_to_arrow_variant(arr: &ArrayRef) -> ArrowVariantArray {
4770/// Run variant_get through both Arrow and Vortex on the same input, and assert
4871/// the per-row results (value + validity) are identical by comparing at the Arrow level.
4972fn assert_matches_arrow ( json_rows : & [ & str ] , field : & str ) {
50- // --- Arrow side ---
5173 let arrow_strings: ArrowArrayRef = Arc :: new ( StringArray :: from (
5274 json_rows. iter ( ) . map ( |s| Some ( * s) ) . collect :: < Vec < _ > > ( ) ,
5375 ) ) ;
@@ -62,12 +84,10 @@ fn assert_matches_arrow(json_rows: &[&str], field: &str) {
6284 ArrowVariantArray :: try_new ( arrow_result. as_any ( ) . downcast_ref :: < StructArray > ( ) . unwrap ( ) )
6385 . unwrap ( ) ;
6486
65- // --- Vortex side ---
6687 let vortex_input = ParquetVariantData :: from_arrow_variant ( & arrow_variant) . unwrap ( ) ;
6788 let vortex_result = apply_variant_get ( & vortex_input, field) . unwrap ( ) ;
6889 let vortex_as_arrow = vortex_to_arrow_variant ( & vortex_result) ;
6990
70- // --- Compare row-by-row at Arrow Variant level ---
7191 assert_eq ! (
7292 vortex_as_arrow. len( ) ,
7393 arrow_result_variant. len( ) ,
@@ -94,10 +114,90 @@ fn assert_matches_arrow(json_rows: &[&str], field: &str) {
94114 }
95115}
96116
117+ /// Run variant_get through both Arrow and Vortex for an explicit nested/index path,
118+ /// and assert the per-row results match at the Arrow variant level.
119+ fn assert_matches_arrow_with_path (
120+ json_rows : & [ & str ] ,
121+ path : VortexVariantPath ,
122+ arrow_path : VariantPath < ' static > ,
123+ ) {
124+ let arrow_strings: ArrowArrayRef = Arc :: new ( StringArray :: from (
125+ json_rows. iter ( ) . map ( |s| Some ( * s) ) . collect :: < Vec < _ > > ( ) ,
126+ ) ) ;
127+ let arrow_variant = json_to_variant ( & arrow_strings) . unwrap ( ) ;
128+ let arrow_result = parquet_variant_compute:: variant_get (
129+ & arrow_variant. clone ( ) . into ( ) ,
130+ GetOptions :: new_with_path ( arrow_path) ,
131+ )
132+ . unwrap ( ) ;
133+ let arrow_result_variant =
134+ ArrowVariantArray :: try_new ( arrow_result. as_any ( ) . downcast_ref :: < StructArray > ( ) . unwrap ( ) )
135+ . unwrap ( ) ;
136+
137+ let vortex_input = ParquetVariantData :: from_arrow_variant ( & arrow_variant) . unwrap ( ) ;
138+ let vortex_result = apply_variant_get ( & vortex_input, path) . unwrap ( ) ;
139+ let vortex_as_arrow = vortex_to_arrow_variant ( & vortex_result) ;
140+
141+ assert_eq ! (
142+ vortex_as_arrow. len( ) ,
143+ arrow_result_variant. len( ) ,
144+ "length mismatch"
145+ ) ;
146+
147+ for i in 0 ..arrow_result_variant. len ( ) {
148+ let arrow_is_null = arrow_result_variant. is_null ( i) ;
149+ let vortex_is_null = vortex_as_arrow. is_null ( i) ;
150+
151+ assert_eq ! (
152+ vortex_is_null, arrow_is_null,
153+ "row {i}: null mismatch (vortex={vortex_is_null}, arrow={arrow_is_null})"
154+ ) ;
155+
156+ if !arrow_is_null {
157+ let arrow_value = arrow_result_variant. value ( i) ;
158+ let vortex_value = vortex_as_arrow. value ( i) ;
159+ assert_eq ! (
160+ vortex_value, arrow_value,
161+ "row {i}: value mismatch\n vortex: {vortex_value:?}\n arrow: {arrow_value:?}"
162+ ) ;
163+ }
164+ }
165+ }
166+
167+ /// Run typed variant_get through both Arrow and Vortex for an explicit path,
168+ /// and assert the typed nullable result matches.
169+ fn assert_typed_matches_arrow_with_path (
170+ json_rows : & [ & str ] ,
171+ path : VortexVariantPath ,
172+ arrow_path : VariantPath < ' static > ,
173+ as_dtype : DType ,
174+ arrow_dtype : ArrowDataType ,
175+ ) -> VortexResult < ( ) > {
176+ let arrow_strings: ArrowArrayRef = Arc :: new ( StringArray :: from (
177+ json_rows. iter ( ) . map ( |s| Some ( * s) ) . collect :: < Vec < _ > > ( ) ,
178+ ) ) ;
179+ let arrow_variant = json_to_variant ( & arrow_strings) . unwrap ( ) ;
180+ let arrow_result =
181+ parquet_variant_compute:: variant_get (
182+ & arrow_variant. clone ( ) . into ( ) ,
183+ GetOptions :: new_with_path ( arrow_path) . with_as_type ( Some ( FieldRef :: new (
184+ ArrowField :: new ( "result" , arrow_dtype, true ) ,
185+ ) ) ) ,
186+ )
187+ . unwrap ( ) ;
188+ let expected = ArrayRef :: from_arrow ( arrow_result. as_ref ( ) , true ) ?;
189+
190+ let vortex_input = ParquetVariantData :: from_arrow_variant ( & arrow_variant) . unwrap ( ) ;
191+ let vortex_result = apply_variant_get_as ( & vortex_input, path, as_dtype. clone ( ) ) ?;
192+
193+ assert_eq ! ( vortex_result. dtype( ) , & as_dtype. as_nullable( ) ) ;
194+ assert_arrays_eq ! ( vortex_result, expected) ;
195+ Ok ( ( ) )
196+ }
197+
97198/// Run variant_get through both Arrow and Vortex on nullable input (with NullBuffer),
98199/// and assert the results match.
99200fn assert_matches_arrow_nullable ( json_rows : & [ & str ] , validity : & [ bool ] , field : & str ) {
100- // --- Arrow side ---
101201 let arrow_strings: ArrowArrayRef = Arc :: new ( StringArray :: from (
102202 json_rows. iter ( ) . map ( |s| Some ( * s) ) . collect :: < Vec < _ > > ( ) ,
103203 ) ) ;
@@ -121,12 +221,10 @@ fn assert_matches_arrow_nullable(json_rows: &[&str], validity: &[bool], field: &
121221 ArrowVariantArray :: try_new ( arrow_result. as_any ( ) . downcast_ref :: < StructArray > ( ) . unwrap ( ) )
122222 . unwrap ( ) ;
123223
124- // --- Vortex side ---
125224 let vortex_input = ParquetVariantData :: from_arrow_variant ( & arrow_variant) . unwrap ( ) ;
126225 let vortex_result = apply_variant_get ( & vortex_input, field) . unwrap ( ) ;
127226 let vortex_as_arrow = vortex_to_arrow_variant ( & vortex_result) ;
128227
129- // --- Compare ---
130228 assert_eq ! (
131229 vortex_as_arrow. len( ) ,
132230 arrow_result_variant. len( ) ,
@@ -221,6 +319,60 @@ fn test_variant_get_matches_arrow_nested_object_result() {
221319 ) ;
222320}
223321
322+ #[ test]
323+ fn test_variant_get_matches_arrow_nested_path ( ) {
324+ let path = VortexVariantPath :: from_name ( "outer" ) . join ( "inner" ) ;
325+ let arrow_path = VariantPath :: from_iter ( [
326+ VariantPathElement :: field ( "outer" . to_string ( ) ) ,
327+ VariantPathElement :: field ( "inner" . to_string ( ) ) ,
328+ ] ) ;
329+ assert_matches_arrow_with_path (
330+ & [
331+ r#"{"outer": {"inner": 42}}"# ,
332+ r#"{"outer": {"inner": "x"}}"# ,
333+ r#"{"outer": {"other": true}}"# ,
334+ ] ,
335+ path,
336+ arrow_path,
337+ ) ;
338+ }
339+
340+ #[ test]
341+ fn test_variant_get_matches_arrow_index_path ( ) {
342+ let path = VortexVariantPath :: from_name ( "arr" ) . join ( 1usize ) ;
343+ let arrow_path = VariantPath :: from_iter ( [
344+ VariantPathElement :: field ( "arr" . to_string ( ) ) ,
345+ VariantPathElement :: index ( 1 ) ,
346+ ] ) ;
347+ assert_matches_arrow_with_path (
348+ & [
349+ r#"{"arr": [1, 2, 3]}"# ,
350+ r#"{"arr": ["a", "b"]}"# ,
351+ r#"{"arr": [true]}"# ,
352+ ] ,
353+ path,
354+ arrow_path,
355+ ) ;
356+ }
357+
358+ #[ test]
359+ fn test_variant_get_matches_arrow_typed_path ( ) -> VortexResult < ( ) > {
360+ assert_typed_matches_arrow_with_path (
361+ & [
362+ r#"{"outer": {"inner": 42}}"# ,
363+ r#"{"outer": {"inner": "x"}}"# ,
364+ r#"{"outer": {"other": true}}"# ,
365+ ] ,
366+ VortexVariantPath :: from_name ( "outer" ) . join ( "inner" ) ,
367+ VariantPath :: from_iter ( [
368+ VariantPathElement :: field ( "outer" . to_string ( ) ) ,
369+ VariantPathElement :: field ( "inner" . to_string ( ) ) ,
370+ ] ) ,
371+ DType :: Primitive ( PType :: I64 , Nullability :: NonNullable ) ,
372+ ArrowDataType :: Int64 ,
373+ )
374+ }
375+
224376// ---------------------------------------------------------------------------
225377// Original standalone tests
226378// ---------------------------------------------------------------------------
@@ -310,6 +462,7 @@ fn test_variant_get_different_field() -> VortexResult<()> {
310462// Test data helpers
311463// ---------------------------------------------------------------------------
312464
465+ /// Build a small non-null object variant array used by the standalone tests.
313466fn make_object_array ( ) -> VortexResult < ArrayRef > {
314467 let mut builder = VariantArrayBuilder :: new ( 3 ) ;
315468
@@ -330,6 +483,7 @@ fn make_object_array() -> VortexResult<ArrayRef> {
330483 ParquetVariantData :: from_arrow_variant ( & builder. build ( ) )
331484}
332485
486+ /// Build the same object array shape with an explicit top-level validity bitmap.
333487fn make_nullable_object_array ( ) -> VortexResult < ArrayRef > {
334488 let mut builder = VariantArrayBuilder :: new ( 3 ) ;
335489
0 commit comments