@@ -78,9 +78,9 @@ use parquet::arrow::arrow_reader::{ArrowPredicate, RowFilter};
7878use parquet:: file:: metadata:: ParquetMetaData ;
7979use parquet:: schema:: types:: SchemaDescriptor ;
8080
81- use datafusion_common:: { Result , ScalarValue } ;
8281use datafusion_common:: cast:: as_boolean_array;
8382use datafusion_common:: tree_node:: { TreeNode , TreeNodeRecursion , TreeNodeVisitor } ;
83+ use datafusion_common:: { Result , ScalarValue } ;
8484use datafusion_physical_expr:: ScalarFunctionExpr ;
8585use datafusion_physical_expr:: expressions:: { Column , Literal } ;
8686use datafusion_physical_expr:: utils:: { collect_columns, reassign_expr_columns} ;
@@ -398,7 +398,7 @@ impl<'schema> PushdownChecker<'schema> {
398398/// See <https://github.com/datafusion-contrib/datafusion-variant> for the
399399/// `datafusion-variant` crate that defines these UDFs.
400400const VARIANT_UDF_NAMES : & [ & str ] = & [
401- "variant_get" , // variant_get, variant_get_str, variant_get_int, etc.
401+ "variant_get" , // variant_get, variant_get_str, variant_get_int, etc.
402402 "is_variant_null" ,
403403] ;
404404
@@ -513,76 +513,69 @@ impl TreeNodeVisitor<'_> for PushdownChecker<'_> {
513513 // - `metadata` — always needed (variant metadata dictionary)
514514 // - `value` — always needed (fallback for non-shredded values)
515515 // - `typed_value.<path...>` — the specific shredded field(s)
516- if let Some ( func_expr) = node. as_any ( ) . downcast_ref :: < ScalarFunctionExpr > ( ) {
517- if is_variant_udf_name ( func_expr. name ( ) ) {
518- if let Some ( column) = func_expr
519- . args ( )
520- . first ( )
521- . and_then ( |a| a. as_any ( ) . downcast_ref :: < Column > ( ) )
522- {
523- let Ok ( root_idx) = self . file_schema . index_of ( column. name ( ) ) else {
524- self . projected_columns = true ;
525- return Ok ( TreeNodeRecursion :: Jump ) ;
526- } ;
527-
528- // Extract the variant path from the second argument.
529- // It can be a string literal or a list of string literals.
530- let variant_path: Option < Vec < String > > =
531- func_expr. args ( ) . get ( 1 ) . and_then ( |arg| {
532- let lit = arg. as_any ( ) . downcast_ref :: < Literal > ( ) ?;
533- match lit. value ( ) {
534- ScalarValue :: Utf8 ( Some ( s) )
535- | ScalarValue :: Utf8View ( Some ( s) )
536- | ScalarValue :: LargeUtf8 ( Some ( s) ) => {
537- Some ( vec ! [ s. to_string( ) ] )
538- }
539- ScalarValue :: List ( arr) if !arr. is_null ( 0 ) => {
540- let values = arr. value ( 0 ) ;
541- let strings =
542- values. as_any ( ) . downcast_ref :: < StringArray > ( ) ?;
543- let path: Vec < String > = ( 0 ..strings. len ( ) )
544- . filter_map ( |i| {
545- strings
546- . is_valid ( i)
547- . then ( || strings. value ( i) . to_string ( ) )
548- } )
549- . collect ( ) ;
550- Some ( path)
551- }
552- _ => None ,
553- }
554- } ) ;
555-
556- // Record struct field accesses for the variant sub-fields:
557- // metadata, value, and typed_value.<path>
558- self . struct_field_accesses . push ( StructFieldAccess {
559- root_index : root_idx,
560- field_path : vec ! [ "metadata" . to_string( ) ] ,
561- } ) ;
562- self . struct_field_accesses . push ( StructFieldAccess {
563- root_index : root_idx,
564- field_path : vec ! [ "value" . to_string( ) ] ,
565- } ) ;
566-
567- if let Some ( path) = variant_path {
568- // typed_value.<field1>.<field2>...
569- let mut typed_value_path = vec ! [ "typed_value" . to_string( ) ] ;
570- typed_value_path. extend ( path) ;
571- self . struct_field_accesses . push ( StructFieldAccess {
572- root_index : root_idx,
573- field_path : typed_value_path,
574- } ) ;
575- } else {
576- // Can't determine path statically — read entire typed_value
577- self . struct_field_accesses . push ( StructFieldAccess {
578- root_index : root_idx,
579- field_path : vec ! [ "typed_value" . to_string( ) ] ,
580- } ) ;
581- }
516+ if let Some ( func_expr) = node. as_any ( ) . downcast_ref :: < ScalarFunctionExpr > ( )
517+ && is_variant_udf_name ( func_expr. name ( ) )
518+ && let Some ( column) = func_expr
519+ . args ( )
520+ . first ( )
521+ . and_then ( |a| a. as_any ( ) . downcast_ref :: < Column > ( ) )
522+ {
523+ let Ok ( root_idx) = self . file_schema . index_of ( column. name ( ) ) else {
524+ self . projected_columns = true ;
525+ return Ok ( TreeNodeRecursion :: Jump ) ;
526+ } ;
582527
583- return Ok ( TreeNodeRecursion :: Jump ) ;
584- }
528+ // Extract the variant path from the second argument.
529+ // It can be a string literal or a list of string literals.
530+ let variant_path: Option < Vec < String > > =
531+ func_expr. args ( ) . get ( 1 ) . and_then ( |arg| {
532+ let lit = arg. as_any ( ) . downcast_ref :: < Literal > ( ) ?;
533+ match lit. value ( ) {
534+ ScalarValue :: Utf8 ( Some ( s) )
535+ | ScalarValue :: Utf8View ( Some ( s) )
536+ | ScalarValue :: LargeUtf8 ( Some ( s) ) => Some ( vec ! [ s. to_string( ) ] ) ,
537+ ScalarValue :: List ( arr) if !arr. is_null ( 0 ) => {
538+ let values = arr. value ( 0 ) ;
539+ let strings =
540+ values. as_any ( ) . downcast_ref :: < StringArray > ( ) ?;
541+ let path: Vec < String > = ( 0 ..strings. len ( ) )
542+ . filter ( |& i| strings. is_valid ( i) )
543+ . map ( |i| strings. value ( i) . to_string ( ) )
544+ . collect ( ) ;
545+ Some ( path)
546+ }
547+ _ => None ,
548+ }
549+ } ) ;
550+
551+ // Record struct field accesses for the variant sub-fields:
552+ // metadata, value, and typed_value.<path>
553+ self . struct_field_accesses . push ( StructFieldAccess {
554+ root_index : root_idx,
555+ field_path : vec ! [ "metadata" . to_string( ) ] ,
556+ } ) ;
557+ self . struct_field_accesses . push ( StructFieldAccess {
558+ root_index : root_idx,
559+ field_path : vec ! [ "value" . to_string( ) ] ,
560+ } ) ;
561+
562+ if let Some ( path) = variant_path {
563+ // typed_value.<field1>.<field2>...
564+ let mut typed_value_path = vec ! [ "typed_value" . to_string( ) ] ;
565+ typed_value_path. extend ( path) ;
566+ self . struct_field_accesses . push ( StructFieldAccess {
567+ root_index : root_idx,
568+ field_path : typed_value_path,
569+ } ) ;
570+ } else {
571+ // Can't determine path statically — read entire typed_value
572+ self . struct_field_accesses . push ( StructFieldAccess {
573+ root_index : root_idx,
574+ field_path : vec ! [ "typed_value" . to_string( ) ] ,
575+ } ) ;
585576 }
577+
578+ return Ok ( TreeNodeRecursion :: Jump ) ;
586579 }
587580
588581 if let Some ( column) = node. as_any ( ) . downcast_ref :: < Column > ( )
0 commit comments