@@ -265,15 +265,7 @@ pub fn evaluate_expression(
265265 ( Literal ( scalar) , _) => {
266266 validate_array_type ( scalar. to_array ( batch. num_rows ( ) ) ?, result_type)
267267 }
268- ( Column ( name) , _) => {
269- // Column extraction uses ordinal-based struct validation because column mapping
270- // can cause physical/logical name mismatches. apply_schema handles renaming.
271- let arr = extract_column ( batch, name) ?;
272- if let Some ( expected) = result_type {
273- ensure_data_types ( expected, arr. data_type ( ) , ValidationMode :: TypesOnly ) ?;
274- }
275- Ok ( arr)
276- }
268+ ( Column ( name) , _) => validate_array_type ( extract_column ( batch, name) ?, result_type) ,
277269 ( Struct ( fields, nullability) , Some ( DataType :: Struct ( output_schema) ) ) => {
278270 evaluate_struct_expression ( fields, batch, output_schema, nullability. as_ref ( ) )
279271 }
@@ -2396,7 +2388,7 @@ mod tests {
23962388 }
23972389
23982390 #[ test]
2399- fn column_extract_struct_with_mismatched_field_names ( ) {
2391+ fn column_extract_struct_rejects_mismatched_field_names ( ) {
24002392 let batch = make_struct_batch (
24012393 vec ! [
24022394 ArrowField :: new( "col-abc-001" , ArrowDataType :: Int64 , true ) ,
@@ -2408,7 +2400,6 @@ mod tests {
24082400 ] ,
24092401 ) ;
24102402
2411- // Logical names differ from physical names due to column mapping
24122403 let logical_type = DataType :: try_struct_type ( [
24132404 StructField :: nullable ( "my_column" , DataType :: LONG ) ,
24142405 StructField :: nullable ( "other_column" , DataType :: LONG ) ,
@@ -2417,47 +2408,22 @@ mod tests {
24172408
24182409 let expr = column_expr ! ( "stats" ) ;
24192410 let result = evaluate_expression ( & expr, & batch, Some ( & logical_type) ) ;
2420-
2421- // Ordinal-based validation passes: same field count and types by position.
2422- // The downstream apply_schema transformation handles renaming.
2423- let arr = result. expect ( "should succeed with mismatched names but matching types" ) ;
2424- let struct_arr = arr. as_any ( ) . downcast_ref :: < StructArray > ( ) . unwrap ( ) ;
2425- assert_eq ! ( struct_arr. num_columns( ) , 2 ) ;
2426- assert_eq ! ( struct_arr. len( ) , 2 ) ;
2427- }
2428-
2429- #[ test]
2430- fn column_extract_struct_rejects_mismatched_field_count ( ) {
2431- let batch = make_struct_batch (
2432- vec ! [ ArrowField :: new( "col-abc-001" , ArrowDataType :: Int64 , true ) ] ,
2433- vec ! [ Arc :: new( Int64Array :: from( vec![ Some ( 1 ) , Some ( 2 ) ] ) ) ] ,
2434- ) ;
2435-
2436- let logical_type = DataType :: try_struct_type ( [
2437- StructField :: nullable ( "a" , DataType :: LONG ) ,
2438- StructField :: nullable ( "b" , DataType :: LONG ) ,
2439- ] )
2440- . unwrap ( ) ;
2441-
2442- let expr = column_expr ! ( "stats" ) ;
2443- let result = evaluate_expression ( & expr, & batch, Some ( & logical_type) ) ;
2444- assert_result_error_with_message ( result, "Struct field count mismatch" ) ;
2411+ assert_result_error_with_message ( result, "Missing Struct fields" ) ;
24452412 }
24462413
24472414 #[ test]
24482415 fn column_extract_struct_rejects_mismatched_child_types ( ) {
24492416 let batch = make_struct_batch (
24502417 vec ! [
2451- ArrowField :: new( "col-abc-001 " , ArrowDataType :: Int64 , true ) ,
2452- ArrowField :: new( "col-abc-002 " , ArrowDataType :: Utf8 , true ) ,
2418+ ArrowField :: new( "a " , ArrowDataType :: Int64 , true ) ,
2419+ ArrowField :: new( "b " , ArrowDataType :: Utf8 , true ) ,
24532420 ] ,
24542421 vec ! [
24552422 Arc :: new( Int64Array :: from( vec![ Some ( 1 ) ] ) ) ,
24562423 Arc :: new( StringArray :: from( vec![ Some ( "x" ) ] ) ) ,
24572424 ] ,
24582425 ) ;
24592426
2460- // Expect two LONG columns, but the second arrow field is Utf8
24612427 let logical_type = DataType :: try_struct_type ( [
24622428 StructField :: nullable ( "a" , DataType :: LONG ) ,
24632429 StructField :: nullable ( "b" , DataType :: LONG ) ,
@@ -2470,7 +2436,7 @@ mod tests {
24702436 }
24712437
24722438 #[ test]
2473- fn column_extract_struct_with_matching_names_still_works ( ) {
2439+ fn column_extract_struct_with_matching_names_works ( ) {
24742440 let batch = make_struct_batch (
24752441 vec ! [
24762442 ArrowField :: new( "a" , ArrowDataType :: Int64 , true ) ,
@@ -2493,15 +2459,11 @@ mod tests {
24932459 assert ! ( result. is_ok( ) ) ;
24942460 }
24952461
2496- /// Exercises the exact code path from `get_add_transform_expr` where a `struct_from`
2497- /// expression wraps `column_expr!("add.stats_parsed")`. When the checkpoint parquet has
2498- /// stats_parsed with physical column names (e.g. `col-abc-001`) but the output schema
2499- /// uses logical names (e.g. `id`), `evaluate_struct_expression` calls
2500- /// `evaluate_expression(Column, struct_result_type)` with mismatched field names.
2501- /// Without ordinal-based validation this fails with a name mismatch error.
2462+ /// When a `struct_from` expression wraps a `Column` referencing stats_parsed, and the
2463+ /// checkpoint parquet has physical column names (e.g. `col-abc-001`) but the output schema
2464+ /// uses logical names (e.g. `id`), name-based validation correctly rejects the mismatch.
25022465 #[ test]
2503- fn struct_from_with_column_tolerates_nested_name_mismatch ( ) {
2504- // Build a batch mimicking checkpoint data: add.stats_parsed uses physical names
2466+ fn struct_from_with_column_rejects_nested_name_mismatch ( ) {
25052467 let stats_fields: Vec < ArrowField > = vec ! [
25062468 ArrowField :: new( "col-abc-001" , ArrowDataType :: Int64 , true ) ,
25072469 ArrowField :: new( "col-abc-002" , ArrowDataType :: Int64 , true ) ,
@@ -2541,7 +2503,6 @@ mod tests {
25412503 ) ] ) ;
25422504 let batch = RecordBatch :: try_new ( Arc :: new ( schema) , vec ! [ Arc :: new( add_struct) ] ) . unwrap ( ) ;
25432505
2544- // struct_from mimicking get_add_transform_expr: wraps a Column referencing stats_parsed
25452506 let expr = Expr :: struct_from ( [
25462507 column_expr_ref ! ( "add.path" ) ,
25472508 column_expr_ref ! ( "add.stats_parsed" ) ,
@@ -2561,36 +2522,6 @@ mod tests {
25612522 . unwrap ( ) ;
25622523
25632524 let result = evaluate_expression ( & expr, & batch, Some ( & output_type) ) ;
2564- result. expect ( "struct_from with Column sub-expression should tolerate field name mismatch" ) ;
2565- }
2566-
2567- #[ test]
2568- fn column_extract_nested_struct_with_mismatched_names ( ) {
2569- let inner_fields = vec ! [ ArrowField :: new( "phys-inner" , ArrowDataType :: Int64 , true ) ] ;
2570- let inner_struct = ArrowDataType :: Struct ( inner_fields. clone ( ) . into ( ) ) ;
2571- let batch = make_struct_batch (
2572- vec ! [ ArrowField :: new( "phys-outer" , inner_struct, true ) ] ,
2573- vec ! [ Arc :: new(
2574- StructArray :: try_new(
2575- inner_fields. into( ) ,
2576- vec![ Arc :: new( Int64Array :: from( vec![ Some ( 42 ) ] ) ) ] ,
2577- None ,
2578- )
2579- . unwrap( ) ,
2580- ) ] ,
2581- ) ;
2582-
2583- let logical_type = DataType :: try_struct_type ( [ StructField :: nullable (
2584- "logical_outer" ,
2585- DataType :: struct_type_unchecked ( [ StructField :: nullable (
2586- "logical_inner" ,
2587- DataType :: LONG ,
2588- ) ] ) ,
2589- ) ] )
2590- . unwrap ( ) ;
2591-
2592- let expr = column_expr ! ( "stats" ) ;
2593- let result = evaluate_expression ( & expr, & batch, Some ( & logical_type) ) ;
2594- assert ! ( result. is_ok( ) ) ;
2525+ assert_result_error_with_message ( result, "Missing Struct fields" ) ;
25952526 }
25962527}
0 commit comments