Skip to content

Commit d7239c6

Browse files
committed
fix: update partition_statistics tests for NDV preservation
Partition columns now preserve distinct_count as Inexact(1) when merging statistics, reflecting that each partition file has a single distinct partition value.
1 parent de48b02 commit d7239c6

1 file changed

Lines changed: 7 additions & 5 deletions

File tree

datafusion/core/tests/physical_optimizer/partition_statistics.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -150,13 +150,15 @@ mod test {
150150
// - null_count = 0 (partition values from paths are never null)
151151
// - min/max are the merged partition values across files in the group
152152
// - byte_size = num_rows * 4 (Date32 is 4 bytes per row)
153+
// - distinct_count = Inexact(1) per partition file (single partition value per file),
154+
// preserved via max() when merging stats across partitions
153155
let date32_byte_size = num_rows * 4;
154156
column_stats.push(ColumnStatistics {
155157
null_count: Precision::Exact(0),
156158
max_value: Precision::Exact(ScalarValue::Date32(Some(max_date))),
157159
min_value: Precision::Exact(ScalarValue::Date32(Some(min_date))),
158160
sum_value: Precision::Absent,
159-
distinct_count: Precision::Absent,
161+
distinct_count: Precision::Inexact(1),
160162
byte_size: Precision::Exact(date32_byte_size),
161163
});
162164
}
@@ -577,7 +579,7 @@ mod test {
577579
max_value: Precision::Exact(ScalarValue::Date32(Some(20151))),
578580
min_value: Precision::Exact(ScalarValue::Date32(Some(20148))),
579581
sum_value: Precision::Absent,
580-
distinct_count: Precision::Absent,
582+
distinct_count: Precision::Inexact(1),
581583
byte_size: Precision::Absent,
582584
},
583585
// column 2: right.id (Int32, file column from t2) - right partition 0: ids [3,4]
@@ -611,7 +613,7 @@ mod test {
611613
max_value: Precision::Exact(ScalarValue::Date32(Some(20151))),
612614
min_value: Precision::Exact(ScalarValue::Date32(Some(20148))),
613615
sum_value: Precision::Absent,
614-
distinct_count: Precision::Absent,
616+
distinct_count: Precision::Inexact(1),
615617
byte_size: Precision::Absent,
616618
},
617619
// column 2: right.id (Int32, file column from t2) - right partition 1: ids [1,2]
@@ -1247,7 +1249,7 @@ mod test {
12471249
DATE_2025_03_01,
12481250
))),
12491251
sum_value: Precision::Absent,
1250-
distinct_count: Precision::Absent,
1252+
distinct_count: Precision::Inexact(1),
12511253
byte_size: Precision::Exact(8),
12521254
},
12531255
ColumnStatistics::new_unknown(), // window column
@@ -1275,7 +1277,7 @@ mod test {
12751277
DATE_2025_03_03,
12761278
))),
12771279
sum_value: Precision::Absent,
1278-
distinct_count: Precision::Absent,
1280+
distinct_count: Precision::Inexact(1),
12791281
byte_size: Precision::Exact(8),
12801282
},
12811283
ColumnStatistics::new_unknown(), // window column

0 commit comments

Comments
 (0)