Skip to content

Commit a16868e

Browse files
authored
Relax OneOf schema comparison to ignore nullability differences (#37)
Different table types (FileScanTable, LiveTable, MV rollups) may expose different nullability for the same column. For example: - A partition column in one table is non-nullable - The same column in a rollup MV is a file column, forced nullable for DF 52 RecordBatch validation compatibility Previously, OneOf rejected candidates with any schema difference including nullability. Now it only checks field names and data types, allowing nullable/non-nullable variants of the same column.
1 parent d77d8bf commit a16868e

1 file changed

Lines changed: 75 additions & 2 deletions

File tree

src/rewrite/exploitation.rs

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,10 +245,21 @@ impl ExtensionPlanner for ViewExploitationPlanner {
245245
return Ok(None);
246246
}
247247

248+
// Compare schemas ignoring nullability differences.
249+
// Different table types (FileScanTable, LiveTable, MV) may expose
250+
// different nullability for the same column. For example, a partition
251+
// column in one table is non-nullable, but the same column is a file
252+
// column in a rollup MV (forced nullable for DF 52 RecordBatch
253+
// validation compatibility). Field names and data types must match.
248254
if logical_inputs
249255
.iter()
250256
.map(|plan| plan.schema())
251-
.any(|schema| schema != logical_inputs[0].schema())
257+
.any(|schema| {
258+
!schemas_equal_ignoring_nullability(
259+
schema.as_arrow(),
260+
logical_inputs[0].schema().as_arrow(),
261+
)
262+
})
252263
{
253264
return Err(DataFusionError::Plan(
254265
"candidate logical plans should have the same schema".to_string(),
@@ -258,7 +269,9 @@ impl ExtensionPlanner for ViewExploitationPlanner {
258269
if physical_inputs
259270
.iter()
260271
.map(|plan| plan.schema())
261-
.any(|schema| schema != physical_inputs[0].schema())
272+
.any(|schema| {
273+
!schemas_equal_ignoring_nullability(&schema, &physical_inputs[0].schema())
274+
})
262275
{
263276
return Err(DataFusionError::Plan(
264277
"candidate physical plans should have the same schema".to_string(),
@@ -522,3 +535,63 @@ impl PhysicalOptimizerRule for PruneCandidates {
522535
true
523536
}
524537
}
538+
539+
/// Compare two Arrow schemas ignoring field nullability.
540+
///
541+
/// Returns true if field count, field names, and data types all match.
542+
/// Nullability differences are ignored because different table types
543+
/// (e.g. FileScanTable with forced-nullable file columns vs a table
544+
/// where the same column is a non-nullable partition column) can
545+
/// legitimately differ in nullability while being semantically equivalent.
546+
fn schemas_equal_ignoring_nullability(a: &arrow_schema::Schema, b: &arrow_schema::Schema) -> bool {
547+
a.fields().len() == b.fields().len()
548+
&& a.fields()
549+
.iter()
550+
.zip(b.fields().iter())
551+
.all(|(f1, f2)| f1.name() == f2.name() && f1.data_type() == f2.data_type())
552+
}
553+
554+
#[cfg(test)]
555+
mod tests_nullability {
556+
use super::*;
557+
use arrow_schema::{DataType, Field, Schema};
558+
559+
#[test]
560+
fn schemas_equal_when_only_nullability_differs() {
561+
let a = Schema::new(vec![
562+
Field::new("ticker", DataType::Utf8, false),
563+
Field::new("date", DataType::Utf8, false),
564+
Field::new("price", DataType::Float64, false),
565+
]);
566+
let b = Schema::new(vec![
567+
Field::new("ticker", DataType::Utf8, true),
568+
Field::new("date", DataType::Utf8, true),
569+
Field::new("price", DataType::Float64, true),
570+
]);
571+
assert!(schemas_equal_ignoring_nullability(&a, &b));
572+
}
573+
574+
#[test]
575+
fn schemas_not_equal_when_types_differ() {
576+
let a = Schema::new(vec![Field::new("x", DataType::Int32, false)]);
577+
let b = Schema::new(vec![Field::new("x", DataType::Int64, false)]);
578+
assert!(!schemas_equal_ignoring_nullability(&a, &b));
579+
}
580+
581+
#[test]
582+
fn schemas_not_equal_when_names_differ() {
583+
let a = Schema::new(vec![Field::new("ticker", DataType::Utf8, true)]);
584+
let b = Schema::new(vec![Field::new("symbol", DataType::Utf8, true)]);
585+
assert!(!schemas_equal_ignoring_nullability(&a, &b));
586+
}
587+
588+
#[test]
589+
fn schemas_not_equal_when_field_count_differs() {
590+
let a = Schema::new(vec![
591+
Field::new("x", DataType::Int32, false),
592+
Field::new("y", DataType::Int32, false),
593+
]);
594+
let b = Schema::new(vec![Field::new("x", DataType::Int32, false)]);
595+
assert!(!schemas_equal_ignoring_nullability(&a, &b));
596+
}
597+
}

0 commit comments

Comments
 (0)