Skip to content

Commit a32c4b2

Browse files
adriangbclaude
andauthored
fix: preserve source field metadata in TryCast expressions (#21390)
## Which issue does this PR close? N/A — discovered while investigating metadata propagation through cast expressions in #21322 ## Rationale for this change `Expr::Cast` preserves the source field's metadata through a dedicated `to_field` handler in `expr_schema.rs`, but `Expr::TryCast` fell through to the default case which creates a `Field::new(...)` without any metadata. This caused source column metadata to be silently dropped when using `TRY_CAST`. ## What changes are included in this PR? - Added a dedicated `to_field` handler for `Expr::TryCast` in `expr_schema.rs` that preserves source field metadata (matching `Expr::Cast` behavior), while keeping TryCast's always-nullable semantics. - Added SLT tests in `metadata.slt` verifying metadata preservation through `TRY_CAST` on both timestamp and integer columns. ## Are these changes tested? Yes — new sqllogictest cases in `metadata.slt` using `arrow_metadata()` to verify metadata is preserved through `TRY_CAST`. ## Are there any user-facing changes? `TRY_CAST` now preserves source field metadata, consistent with `CAST` behavior. 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 0b8c4c5 commit a32c4b2

File tree

2 files changed

+94
-17
lines changed

2 files changed

+94
-17
lines changed

datafusion/expr/src/expr_schema.rs

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,24 @@ pub trait ExprSchemable {
6565
-> Result<(DataType, bool)>;
6666
}
6767

68+
/// Derives the output field for a cast expression from the source field.
69+
/// For `TryCast`, `force_nullable` is `true` since a failed cast returns NULL.
70+
fn cast_output_field(
71+
source_field: &FieldRef,
72+
target_type: &DataType,
73+
force_nullable: bool,
74+
) -> Arc<Field> {
75+
let mut f = source_field
76+
.as_ref()
77+
.clone()
78+
.with_data_type(target_type.clone())
79+
.with_metadata(source_field.metadata().clone());
80+
if force_nullable {
81+
f = f.with_nullable(true);
82+
}
83+
Arc::new(f)
84+
}
85+
6886
impl ExprSchemable for Expr {
6987
/// Returns the [arrow::datatypes::DataType] of the expression
7088
/// based on [ExprSchema]
@@ -553,33 +571,25 @@ impl ExprSchemable for Expr {
553571
func.return_field_from_args(args)
554572
}
555573
// _ => Ok((self.get_type(schema)?, self.nullable(schema)?)),
556-
Expr::Cast(Cast { expr, field }) => expr
557-
.to_field(schema)
558-
.map(|(_table_ref, destination_field)| {
559-
// This propagates the nullability of the input rather than
560-
// force the nullability of the destination field. This is
561-
// usually the desired behaviour (i.e., specifying a cast
562-
// destination type usually does not force a user to pick
563-
// nullability, and assuming `true` would prevent the non-nullability
564-
// of the parent expression to make the result eligible for
565-
// optimizations that only apply to non-nullable values).
566-
destination_field
567-
.as_ref()
568-
.clone()
569-
.with_data_type(field.data_type().clone())
570-
.with_metadata(destination_field.metadata().clone())
574+
Expr::Cast(Cast { expr, field }) => {
575+
expr.to_field(schema).map(|(_table_ref, src)| {
576+
cast_output_field(&src, field.data_type(), false)
571577
})
572-
.map(Arc::new),
578+
}
573579
Expr::Placeholder(Placeholder {
574580
id: _,
575581
field: Some(field),
576582
}) => Ok(Arc::clone(field).renamed(&schema_name)),
583+
Expr::TryCast(TryCast { expr, field }) => {
584+
expr.to_field(schema).map(|(_table_ref, src)| {
585+
cast_output_field(&src, field.data_type(), true)
586+
})
587+
}
577588
Expr::Like(_)
578589
| Expr::SimilarTo(_)
579590
| Expr::Not(_)
580591
| Expr::Between(_)
581592
| Expr::Case(_)
582-
| Expr::TryCast(_)
583593
| Expr::InList(_)
584594
| Expr::InSubquery(_)
585595
| Expr::SetComparison(_)

datafusion/sqllogictest/test_files/metadata.slt

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,34 @@ FROM table_with_metadata;
220220
2020-09-08
221221
2020-09-08
222222

223+
# Regression test: CAST should preserve source field metadata
224+
query DT
225+
SELECT
226+
CAST(ts AS DATE) as casted,
227+
arrow_metadata(CAST(ts AS DATE), 'metadata_key')
228+
FROM table_with_metadata;
229+
----
230+
2020-09-08 ts non-nullable field
231+
2020-09-08 ts non-nullable field
232+
2020-09-08 ts non-nullable field
233+
234+
# Regression test: CAST preserves metadata on integer column
235+
query IT
236+
SELECT
237+
CAST(id AS BIGINT) as casted,
238+
arrow_metadata(CAST(id AS BIGINT), 'metadata_key')
239+
FROM table_with_metadata;
240+
----
241+
1 the id field
242+
NULL the id field
243+
3 the id field
244+
245+
# Regression test: CAST with single-argument arrow_metadata (returns full map)
246+
query ?
247+
select arrow_metadata(CAST(id AS BIGINT)) from table_with_metadata limit 1;
248+
----
249+
{metadata_key: the id field}
250+
223251
# Regression test: distinct with cast
224252
query D
225253
SELECT DISTINCT (ts::DATE) AS dist
@@ -347,5 +375,44 @@ select arrow_metadata(id) from table_with_metadata limit 1;
347375
----
348376
{metadata_key: the id field}
349377

378+
# Regression test: TRY_CAST should preserve source field metadata
379+
query DT
380+
SELECT
381+
TRY_CAST(ts AS DATE) as try_casted,
382+
arrow_metadata(TRY_CAST(ts AS DATE), 'metadata_key')
383+
FROM table_with_metadata;
384+
----
385+
2020-09-08 ts non-nullable field
386+
2020-09-08 ts non-nullable field
387+
2020-09-08 ts non-nullable field
388+
389+
# Regression test: TRY_CAST preserves metadata on integer column
390+
query IT
391+
SELECT
392+
TRY_CAST(id AS BIGINT) as try_casted,
393+
arrow_metadata(TRY_CAST(id AS BIGINT), 'metadata_key')
394+
FROM table_with_metadata;
395+
----
396+
1 the id field
397+
NULL the id field
398+
3 the id field
399+
400+
# Regression test: TRY_CAST preserves metadata even when cast fails (returns NULL)
401+
query IT
402+
SELECT
403+
TRY_CAST(name AS INT) as try_casted,
404+
arrow_metadata(TRY_CAST(name AS INT), 'metadata_key')
405+
FROM table_with_metadata;
406+
----
407+
NULL the name field
408+
NULL the name field
409+
NULL the name field
410+
411+
# Regression test: TRY_CAST with single-argument arrow_metadata (returns full map)
412+
query ?
413+
select arrow_metadata(TRY_CAST(id AS BIGINT)) from table_with_metadata limit 1;
414+
----
415+
{metadata_key: the id field}
416+
350417
statement ok
351418
drop table table_with_metadata;

0 commit comments

Comments
 (0)