@@ -78,8 +78,8 @@ use datafusion::physical_plan::expressions::{
7878} ;
7979use datafusion:: physical_plan:: filter:: FilterExec ;
8080use datafusion:: physical_plan:: joins:: {
81- HashJoinExec , NestedLoopJoinExec , PartitionMode , SortMergeJoinExec ,
82- StreamJoinPartitionMode , SymmetricHashJoinExec ,
81+ HashJoinExec , HashTableLookupExpr , NestedLoopJoinExec , PartitionMode ,
82+ SortMergeJoinExec , StreamJoinPartitionMode , SymmetricHashJoinExec ,
8383} ;
8484use datafusion:: physical_plan:: limit:: { GlobalLimitExec , LocalLimitExec } ;
8585use datafusion:: physical_plan:: placeholder_row:: PlaceholderRowExec ;
@@ -113,6 +113,7 @@ use datafusion_expr::{
113113use datafusion_functions_aggregate:: average:: avg_udaf;
114114use datafusion_functions_aggregate:: nth_value:: nth_value_udaf;
115115use datafusion_functions_aggregate:: string_agg:: string_agg_udaf;
116+ use datafusion_physical_plan:: joins:: join_hash_map:: JoinHashMapU32 ;
116117use datafusion_proto:: physical_plan:: {
117118 AsExecutionPlan , DefaultPhysicalExtensionCodec , PhysicalExtensionCodec ,
118119} ;
@@ -2264,3 +2265,110 @@ async fn roundtrip_listing_table_with_schema_metadata() -> Result<()> {
22642265
22652266 roundtrip_test ( plan)
22662267}
2268+
2269+ #[ tokio:: test]
2270+ async fn roundtrip_async_func_exec ( ) -> Result < ( ) > {
2271+ #[ derive( Debug , PartialEq , Eq , Hash ) ]
2272+ struct TestAsyncUDF {
2273+ signature : Signature ,
2274+ }
2275+
2276+ impl TestAsyncUDF {
2277+ fn new ( ) -> Self {
2278+ Self {
2279+ signature : Signature :: exact ( vec ! [ DataType :: Int64 ] , Volatility :: Volatile ) ,
2280+ }
2281+ }
2282+ }
2283+
2284+ impl ScalarUDFImpl for TestAsyncUDF {
2285+ fn as_any ( & self ) -> & dyn Any {
2286+ self
2287+ }
2288+
2289+ fn name ( & self ) -> & str {
2290+ "test_async_udf"
2291+ }
2292+
2293+ fn signature ( & self ) -> & Signature {
2294+ & self . signature
2295+ }
2296+
2297+ fn return_type ( & self , _arg_types : & [ DataType ] ) -> Result < DataType > {
2298+ Ok ( DataType :: Int64 )
2299+ }
2300+
2301+ fn invoke_with_args ( & self , _args : ScalarFunctionArgs ) -> Result < ColumnarValue > {
2302+ not_impl_err ! ( "Must call from `invoke_async_with_args`" )
2303+ }
2304+ }
2305+
2306+ #[ async_trait:: async_trait]
2307+ impl AsyncScalarUDFImpl for TestAsyncUDF {
2308+ async fn invoke_async_with_args (
2309+ & self ,
2310+ args : ScalarFunctionArgs ,
2311+ ) -> Result < ColumnarValue > {
2312+ Ok ( args. args [ 0 ] . clone ( ) )
2313+ }
2314+ }
2315+
2316+ let ctx = SessionContext :: new ( ) ;
2317+ let async_udf = AsyncScalarUDF :: new ( Arc :: new ( TestAsyncUDF :: new ( ) ) ) ;
2318+ ctx. register_udf ( async_udf. into_scalar_udf ( ) ) ;
2319+
2320+ let physical_plan = ctx
2321+ . sql ( "select test_async_udf(1)" )
2322+ . await ?
2323+ . create_physical_plan ( )
2324+ . await ?;
2325+
2326+ roundtrip_test_with_context ( physical_plan, & ctx) ?;
2327+
2328+ Ok ( ( ) )
2329+ }
2330+
2331+ /// Test that HashTableLookupExpr serializes to lit(true)
2332+ ///
2333+ /// HashTableLookupExpr contains a runtime hash table that cannot be serialized.
2334+ /// The serialization code replaces it with lit(true) which is safe because
2335+ /// it's a performance optimization filter, not a correctness requirement.
2336+ #[ test]
2337+ fn roundtrip_hash_table_lookup_expr_to_lit ( ) -> Result < ( ) > {
2338+ // Create a simple schema and input plan
2339+ let schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new( "col" , DataType :: Int64 , false ) ] ) ) ;
2340+ let input = Arc :: new ( EmptyExec :: new ( schema. clone ( ) ) ) ;
2341+
2342+ // Create a HashTableLookupExpr - it will be replaced with lit(true) during serialization
2343+ let hash_map = Arc :: new ( JoinHashMapU32 :: with_capacity ( 0 ) ) ;
2344+ let hash_expr: Arc < dyn PhysicalExpr > = Arc :: new ( Column :: new ( "col" , 0 ) ) ;
2345+ let lookup_expr: Arc < dyn PhysicalExpr > = Arc :: new ( HashTableLookupExpr :: new (
2346+ hash_expr,
2347+ hash_map,
2348+ "test_lookup" . to_string ( ) ,
2349+ ) ) ;
2350+
2351+ // Create a filter with the lookup expression
2352+ let filter = Arc :: new ( FilterExec :: try_new ( lookup_expr, input) ?) ;
2353+
2354+ // Serialize
2355+ let ctx = SessionContext :: new ( ) ;
2356+ let codec = DefaultPhysicalExtensionCodec { } ;
2357+ let proto: protobuf:: PhysicalPlanNode =
2358+ protobuf:: PhysicalPlanNode :: try_from_physical_plan ( filter. clone ( ) , & codec)
2359+ . expect ( "serialization should succeed" ) ;
2360+
2361+ // Deserialize
2362+ let result: Arc < dyn ExecutionPlan > = proto
2363+ . try_into_physical_plan ( & ctx. task_ctx ( ) , & codec)
2364+ . expect ( "deserialization should succeed" ) ;
2365+
2366+ // The deserialized plan should have lit(true) instead of HashTableLookupExpr
2367+ // Verify the filter predicate is a Literal(true)
2368+ let result_filter = result. as_any ( ) . downcast_ref :: < FilterExec > ( ) . unwrap ( ) ;
2369+ let predicate = result_filter. predicate ( ) ;
2370+ let literal = predicate. as_any ( ) . downcast_ref :: < Literal > ( ) . unwrap ( ) ;
2371+ assert_eq ! ( * literal. value( ) , ScalarValue :: Boolean ( Some ( true ) ) ) ;
2372+
2373+ Ok ( ( ) )
2374+ }
0 commit comments