@@ -78,9 +78,9 @@ use parquet::arrow::arrow_reader::{ArrowPredicate, RowFilter};
7878use parquet:: file:: metadata:: ParquetMetaData ;
7979use parquet:: schema:: types:: SchemaDescriptor ;
8080
81- use datafusion_common:: { Result , ScalarValue } ;
8281use datafusion_common:: cast:: as_boolean_array;
8382use datafusion_common:: tree_node:: { TreeNode , TreeNodeRecursion , TreeNodeVisitor } ;
83+ use datafusion_common:: { Result , ScalarValue } ;
8484use datafusion_physical_expr:: ScalarFunctionExpr ;
8585use datafusion_physical_expr:: expressions:: { Column , Literal } ;
8686use datafusion_physical_expr:: utils:: { collect_columns, reassign_expr_columns} ;
@@ -398,7 +398,7 @@ impl<'schema> PushdownChecker<'schema> {
398398/// See <https://github.com/datafusion-contrib/datafusion-variant> for the
399399/// `datafusion-variant` crate that defines these UDFs.
400400const VARIANT_UDF_NAMES : & [ & str ] = & [
401- "variant_get" , // variant_get, variant_get_str, variant_get_int, etc.
401+ "variant_get" , // variant_get, variant_get_str, variant_get_int, etc.
402402 "is_variant_null" ,
403403] ;
404404
@@ -513,76 +513,71 @@ impl TreeNodeVisitor<'_> for PushdownChecker<'_> {
513513 // - `metadata` — always needed (variant metadata dictionary)
514514 // - `value` — always needed (fallback for non-shredded values)
515515 // - `typed_value.<path...>` — the specific shredded field(s)
516- if let Some ( func_expr) = node. as_any ( ) . downcast_ref :: < ScalarFunctionExpr > ( ) {
517- if is_variant_udf_name ( func_expr. name ( ) ) {
518- if let Some ( column) = func_expr
519- . args ( )
520- . first ( )
521- . and_then ( |a| a. as_any ( ) . downcast_ref :: < Column > ( ) )
522- {
523- let Ok ( root_idx) = self . file_schema . index_of ( column. name ( ) ) else {
524- self . projected_columns = true ;
525- return Ok ( TreeNodeRecursion :: Jump ) ;
526- } ;
527-
528- // Extract the variant path from the second argument.
529- // It can be a string literal or a list of string literals.
530- let variant_path: Option < Vec < String > > =
531- func_expr. args ( ) . get ( 1 ) . and_then ( |arg| {
532- let lit = arg. as_any ( ) . downcast_ref :: < Literal > ( ) ?;
533- match lit. value ( ) {
534- ScalarValue :: Utf8 ( Some ( s) )
535- | ScalarValue :: Utf8View ( Some ( s) )
536- | ScalarValue :: LargeUtf8 ( Some ( s) ) => {
537- Some ( vec ! [ s. to_string( ) ] )
538- }
539- ScalarValue :: List ( arr) if !arr. is_null ( 0 ) => {
540- let values = arr. value ( 0 ) ;
541- let strings =
542- values. as_any ( ) . downcast_ref :: < StringArray > ( ) ?;
543- let path: Vec < String > = ( 0 ..strings. len ( ) )
544- . filter_map ( |i| {
545- strings
546- . is_valid ( i)
547- . then ( || strings. value ( i) . to_string ( ) )
548- } )
549- . collect ( ) ;
550- Some ( path)
551- }
552- _ => None ,
553- }
554- } ) ;
555-
556- // Record struct field accesses for the variant sub-fields:
557- // metadata, value, and typed_value.<path>
558- self . struct_field_accesses . push ( StructFieldAccess {
559- root_index : root_idx,
560- field_path : vec ! [ "metadata" . to_string( ) ] ,
561- } ) ;
562- self . struct_field_accesses . push ( StructFieldAccess {
563- root_index : root_idx,
564- field_path : vec ! [ "value" . to_string( ) ] ,
565- } ) ;
566-
567- if let Some ( path) = variant_path {
568- // typed_value.<field1>.<field2>...
569- let mut typed_value_path = vec ! [ "typed_value" . to_string( ) ] ;
570- typed_value_path. extend ( path) ;
571- self . struct_field_accesses . push ( StructFieldAccess {
572- root_index : root_idx,
573- field_path : typed_value_path,
574- } ) ;
575- } else {
576- // Can't determine path statically — read entire typed_value
577- self . struct_field_accesses . push ( StructFieldAccess {
578- root_index : root_idx,
579- field_path : vec ! [ "typed_value" . to_string( ) ] ,
580- } ) ;
581- }
516+ if let Some ( func_expr) = node. as_any ( ) . downcast_ref :: < ScalarFunctionExpr > ( )
517+ && is_variant_udf_name ( func_expr. name ( ) )
518+ && let Some ( column) = func_expr
519+ . args ( )
520+ . first ( )
521+ . and_then ( |a| a. as_any ( ) . downcast_ref :: < Column > ( ) )
522+ {
523+ let Ok ( root_idx) = self . file_schema . index_of ( column. name ( ) ) else {
524+ self . projected_columns = true ;
525+ return Ok ( TreeNodeRecursion :: Jump ) ;
526+ } ;
582527
583- return Ok ( TreeNodeRecursion :: Jump ) ;
584- }
528+ // Extract the variant path from the second argument.
529+ // It can be a string literal or a list of string literals.
530+ let variant_path: Option < Vec < String > > =
531+ func_expr. args ( ) . get ( 1 ) . and_then ( |arg| {
532+ let lit = arg. as_any ( ) . downcast_ref :: < Literal > ( ) ?;
533+ match lit. value ( ) {
534+ ScalarValue :: Utf8 ( Some ( s) )
535+ | ScalarValue :: Utf8View ( Some ( s) )
536+ | ScalarValue :: LargeUtf8 ( Some ( s) ) => {
537+ Some ( vec ! [ s. to_string( ) ] )
538+ }
539+ ScalarValue :: List ( arr) if !arr. is_null ( 0 ) => {
540+ let values = arr. value ( 0 ) ;
541+ let strings =
542+ values. as_any ( ) . downcast_ref :: < StringArray > ( ) ?;
543+ let path: Vec < String > = ( 0 ..strings. len ( ) )
544+ . filter ( |& i| strings. is_valid ( i) )
545+ . map ( |i| strings. value ( i) . to_string ( ) )
546+ . collect ( ) ;
547+ Some ( path)
548+ }
549+ _ => None ,
550+ }
551+ } ) ;
552+
553+ // Record struct field accesses for the variant sub-fields:
554+ // metadata, value, and typed_value.<path>
555+ self . struct_field_accesses . push ( StructFieldAccess {
556+ root_index : root_idx,
557+ field_path : vec ! [ "metadata" . to_string( ) ] ,
558+ } ) ;
559+ self . struct_field_accesses . push ( StructFieldAccess {
560+ root_index : root_idx,
561+ field_path : vec ! [ "value" . to_string( ) ] ,
562+ } ) ;
563+
564+ if let Some ( path) = variant_path {
565+ // typed_value.<field1>.<field2>...
566+ let mut typed_value_path = vec ! [ "typed_value" . to_string( ) ] ;
567+ typed_value_path. extend ( path) ;
568+ self . struct_field_accesses . push ( StructFieldAccess {
569+ root_index : root_idx,
570+ field_path : typed_value_path,
571+ } ) ;
572+ } else {
573+ // Can't determine path statically — read entire typed_value
574+ self . struct_field_accesses . push ( StructFieldAccess {
575+ root_index : root_idx,
576+ field_path : vec ! [ "typed_value" . to_string( ) ] ,
577+ } ) ;
585578 }
579+
580+ return Ok ( TreeNodeRecursion :: Jump ) ;
586581 }
587582
588583 if let Some ( column) = node. as_any ( ) . downcast_ref :: < Column > ( )
0 commit comments