@@ -22,7 +22,7 @@ use std::fs::metadata;
2222use std:: sync:: Arc ;
2323
2424use super :: SendableRecordBatchStream ;
25- use crate :: expressions:: Column ;
25+ use crate :: expressions:: { CastExpr , Column } ;
2626use crate :: projection:: { ProjectionExec , ProjectionExpr } ;
2727use crate :: stream:: RecordBatchReceiverStream ;
2828use crate :: { ColumnStatistics , ExecutionPlan , Statistics } ;
@@ -91,18 +91,18 @@ fn build_file_list_recurse(
9191 Ok ( ( ) )
9292}
9393
94- /// Align `input`'s physical plan schema with `expected_schema` when only field names differ .
94+ /// Align `input`'s physical plan schema with `expected_schema`.
9595///
9696/// This helper is intended for operators that combine independently planned children but
9797/// expose a single declared output schema. It returns `input` unchanged when schemas already
9898/// match exactly. Otherwise, it validates that projection can safely produce the expected
9999/// schema, then wraps `input` in a [`ProjectionExec`] that keeps columns in their existing
100100/// positional order and aliases them to `expected_schema`'s field names.
101101///
102- /// [`ProjectionExec`] can rename fields but preserves column data types, nullability, field
103- /// metadata, and schema metadata from the input expressions. Therefore, this helper rejects
104- /// mismatches in those attributes rather than returning a plan whose schema still differs
105- /// from `expected_schema` .
102+ /// [`ProjectionExec`] can rename fields. When the expected field is nullable and the input
103+ /// field is not, this helper also widens nullability with a same-type [`CastExpr`]. It rejects
104+ /// differences that projection cannot safely normalize exactly, such as data type, metadata,
105+ /// schema metadata, and nullability narrowing .
106106pub fn project_plan_to_schema (
107107 input : Arc < dyn ExecutionPlan > ,
108108 expected_schema : & SchemaRef ,
@@ -134,7 +134,7 @@ pub fn project_plan_to_schema(
134134 . find_map ( |( i, ( input_field, expected_field) ) | {
135135 if input_field. data_type ( ) != expected_field. data_type ( ) {
136136 Some ( ( i, input_field, expected_field, "data type" ) )
137- } else if input_field. is_nullable ( ) != expected_field. is_nullable ( ) {
137+ } else if input_field. is_nullable ( ) && ! expected_field. is_nullable ( ) {
138138 Some ( ( i, input_field, expected_field, "nullability" ) )
139139 } else if input_field. metadata ( ) != expected_field. metadata ( ) {
140140 Some ( ( i, input_field, expected_field, "metadata" ) )
@@ -157,9 +157,22 @@ pub fn project_plan_to_schema(
157157 . fields ( )
158158 . iter ( )
159159 . enumerate ( )
160- . map ( |( i, expected_field) | ProjectionExpr {
161- expr : Arc :: new ( Column :: new ( input_schema. field ( i) . name ( ) , i) ) ,
162- alias : expected_field. name ( ) . clone ( ) ,
160+ . map ( |( i, expected_field) | {
161+ let input_field = input_schema. field ( i) ;
162+ let column = Arc :: new ( Column :: new ( input_field. name ( ) , i) ) ;
163+ let expr = if !input_field. is_nullable ( ) && expected_field. is_nullable ( ) {
164+ Arc :: new ( CastExpr :: new_with_target_field (
165+ column,
166+ Arc :: clone ( expected_field) ,
167+ None ,
168+ ) ) as _
169+ } else {
170+ column as _
171+ } ;
172+ ProjectionExpr {
173+ expr,
174+ alias : expected_field. name ( ) . clone ( ) ,
175+ }
163176 } )
164177 . collect :: < Vec < _ > > ( ) ;
165178
@@ -484,7 +497,22 @@ mod tests {
484497 }
485498
486499 #[ test]
487- fn project_plan_to_schema_errors_on_nullability_mismatch ( ) {
500+ fn project_plan_to_schema_widens_nullability ( ) -> Result < ( ) > {
501+ let input = empty_exec ( vec ! [ Field :: new( "a" , DataType :: Int32 , false ) ] ) ;
502+ let expected_schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new(
503+ "renamed" ,
504+ DataType :: Int32 ,
505+ true ,
506+ ) ] ) ) ;
507+
508+ let result = project_plan_to_schema ( input, & expected_schema) ?;
509+
510+ assert_eq ! ( result. schema( ) , expected_schema) ;
511+ Ok ( ( ) )
512+ }
513+
514+ #[ test]
515+ fn project_plan_to_schema_errors_on_nullability_narrowing ( ) {
488516 let input = empty_exec ( vec ! [ Field :: new( "a" , DataType :: Int32 , true ) ] ) ;
489517 let expected_schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new(
490518 "renamed" ,
0 commit comments