Skip to content

Commit e352c28

Browse files
committed
fix: update hash join partition_statistics test for NDV preservation through merges
1 parent 159183c commit e352c28

File tree

1 file changed

+9
-6
lines changed

1 file changed

+9
-6
lines changed

datafusion/core/tests/physical_optimizer/partition_statistics.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1418,6 +1418,8 @@ mod test {
14181418
byte_size: Precision::Exact(16),
14191419
},
14201420
// Left date column: all partitions (2025-03-01..2025-03-04)
1421+
// NDV is Inexact(1) because each Hive partition has exactly 1 distinct date value,
1422+
// and merging takes max as a conservative lower bound
14211423
ColumnStatistics {
14221424
null_count: Precision::Exact(0),
14231425
max_value: Precision::Exact(ScalarValue::Date32(Some(
@@ -1427,7 +1429,7 @@ mod test {
14271429
DATE_2025_03_01,
14281430
))),
14291431
sum_value: Precision::Absent,
1430-
distinct_count: Precision::Absent,
1432+
distinct_count: Precision::Inexact(1),
14311433
byte_size: Precision::Exact(16),
14321434
},
14331435
// Right id column: partition 0 only (id 3..4)
@@ -1440,6 +1442,7 @@ mod test {
14401442
byte_size: Precision::Exact(8),
14411443
},
14421444
// Right date column: partition 0 only (2025-03-01..2025-03-02)
1445+
// NDV is Inexact(1) from the single Hive partition's date value
14431446
ColumnStatistics {
14441447
null_count: Precision::Exact(0),
14451448
max_value: Precision::Exact(ScalarValue::Date32(Some(
@@ -1449,7 +1452,7 @@ mod test {
14491452
DATE_2025_03_01,
14501453
))),
14511454
sum_value: Precision::Absent,
1452-
distinct_count: Precision::Absent,
1455+
distinct_count: Precision::Inexact(1),
14531456
byte_size: Precision::Exact(8),
14541457
},
14551458
],
@@ -1501,7 +1504,7 @@ mod test {
15011504
DATE_2025_03_01,
15021505
))),
15031506
sum_value: Precision::Absent,
1504-
distinct_count: Precision::Absent,
1507+
distinct_count: Precision::Inexact(1),
15051508
byte_size: Precision::Exact(8),
15061509
},
15071510
// Right id column: partition 0 only (id 3..4)
@@ -1523,7 +1526,7 @@ mod test {
15231526
DATE_2025_03_01,
15241527
))),
15251528
sum_value: Precision::Absent,
1526-
distinct_count: Precision::Absent,
1529+
distinct_count: Precision::Inexact(1),
15271530
byte_size: Precision::Exact(8),
15281531
},
15291532
],
@@ -1575,7 +1578,7 @@ mod test {
15751578
DATE_2025_03_01,
15761579
))),
15771580
sum_value: Precision::Absent,
1578-
distinct_count: Precision::Absent,
1581+
distinct_count: Precision::Inexact(1),
15791582
byte_size: Precision::Exact(16),
15801583
},
15811584
// Right id column: all partitions (id 1..4)
@@ -1597,7 +1600,7 @@ mod test {
15971600
DATE_2025_03_01,
15981601
))),
15991602
sum_value: Precision::Absent,
1600-
distinct_count: Precision::Absent,
1603+
distinct_count: Precision::Inexact(1),
16011604
byte_size: Precision::Exact(16),
16021605
},
16031606
],

0 commit comments

Comments
 (0)