Skip to content

Commit 7d107f0

Browse files
authored
fix(physical-plan): set column byte_size to 0 in FilterExec zero-row interval stats (#21999)
## Which issue does this PR close? - Closes #. ## Rationale for this change While reading code around column statistics I've realized that we send column byte_size when there are no rows. In this case I think it is better to set it to 0 ## What changes are included in this PR? Set `byte_size` to zero when there are no rows in filter column statistics ## Are these changes tested? Yes existing unit tests ## Are there any user-facing changes? no api changes and minimal propagated statistics change
1 parent ae796ab commit 7d107f0

2 files changed

Lines changed: 7 additions & 10 deletions

File tree

datafusion/core/tests/physical_optimizer/partition_statistics.rs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -381,8 +381,6 @@ mod test {
381381
let filter: Arc<dyn ExecutionPlan> =
382382
Arc::new(FilterExec::try_new(predicate, scan)?);
383383
let full_statistics = filter.partition_statistics(None)?;
384-
// Filter preserves original total_rows and byte_size from input
385-
// (4 total rows = 2 partitions * 2 rows each, byte_size = 4 * 4 = 16 bytes for int32)
386384
let expected_full_statistic = Statistics {
387385
num_rows: Precision::Inexact(0),
388386
total_byte_size: Precision::Inexact(0),
@@ -393,15 +391,15 @@ mod test {
393391
min_value: Precision::Exact(ScalarValue::Int32(None)),
394392
sum_value: Precision::Exact(ScalarValue::Int32(None)),
395393
distinct_count: Precision::Exact(0),
396-
byte_size: Precision::Exact(16),
394+
byte_size: Precision::Exact(0),
397395
},
398396
ColumnStatistics {
399397
null_count: Precision::Exact(0),
400398
max_value: Precision::Exact(ScalarValue::Date32(None)),
401399
min_value: Precision::Exact(ScalarValue::Date32(None)),
402400
sum_value: Precision::Exact(ScalarValue::Date32(None)),
403401
distinct_count: Precision::Exact(0),
404-
byte_size: Precision::Exact(16), // 4 rows * 4 bytes (Date32)
402+
byte_size: Precision::Exact(0),
405403
},
406404
],
407405
};
@@ -411,7 +409,6 @@ mod test {
411409
.map(|idx| filter.partition_statistics(Some(idx)))
412410
.collect::<Result<Vec<_>>>()?;
413411
assert_eq!(statistics.len(), 2);
414-
// Per-partition stats: each partition has 2 rows, byte_size = 2 * 4 = 8
415412
let expected_partition_statistic = Statistics {
416413
num_rows: Precision::Inexact(0),
417414
total_byte_size: Precision::Inexact(0),
@@ -422,15 +419,15 @@ mod test {
422419
min_value: Precision::Exact(ScalarValue::Int32(None)),
423420
sum_value: Precision::Exact(ScalarValue::Int32(None)),
424421
distinct_count: Precision::Exact(0),
425-
byte_size: Precision::Exact(8),
422+
byte_size: Precision::Exact(0),
426423
},
427424
ColumnStatistics {
428425
null_count: Precision::Exact(0),
429426
max_value: Precision::Exact(ScalarValue::Date32(None)),
430427
min_value: Precision::Exact(ScalarValue::Date32(None)),
431428
sum_value: Precision::Exact(ScalarValue::Date32(None)),
432429
distinct_count: Precision::Exact(0),
433-
byte_size: Precision::Exact(8), // 2 rows * 4 bytes (Date32)
430+
byte_size: Precision::Exact(0),
434431
},
435432
],
436433
};

datafusion/physical-plan/src/filter.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -903,7 +903,7 @@ fn collect_new_statistics(
903903
min_value: Precision::Exact(typed_null.clone()),
904904
sum_value: Precision::Exact(typed_null),
905905
distinct_count: Precision::Exact(0),
906-
byte_size: input_column_stats[idx].byte_size,
906+
byte_size: Precision::Exact(0),
907907
};
908908
};
909909
let (lower, upper) = interval.into_bounds();
@@ -1621,15 +1621,15 @@ mod tests {
16211621
sum_value: Precision::Exact(ScalarValue::Int32(None)),
16221622
distinct_count: Precision::Exact(0),
16231623
null_count: Precision::Exact(0),
1624-
byte_size: Precision::Absent,
1624+
byte_size: Precision::Exact(0),
16251625
},
16261626
ColumnStatistics {
16271627
min_value: Precision::Exact(ScalarValue::Int32(None)),
16281628
max_value: Precision::Exact(ScalarValue::Int32(None)),
16291629
sum_value: Precision::Exact(ScalarValue::Int32(None)),
16301630
distinct_count: Precision::Exact(0),
16311631
null_count: Precision::Exact(0),
1632-
byte_size: Precision::Absent,
1632+
byte_size: Precision::Exact(0),
16331633
},
16341634
]
16351635
);

0 commit comments

Comments
 (0)