Skip to content

Commit c51490f

Browse files
committed
fix: cargo fmt
1 parent 8265113 commit c51490f

2 files changed

Lines changed: 42 additions & 44 deletions

File tree

datafusion/common/src/stats.rs

Lines changed: 16 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1246,10 +1246,6 @@ mod tests {
12461246
assert_eq!(stats, make_stats(vec![20, 30, 20, 20, 10, 30]));
12471247
}
12481248

1249-
fn single_col_schema(dt: DataType) -> Schema {
1250-
Schema::new(vec![Field::new("a", dt, true)])
1251-
}
1252-
12531249
// Make a Statistics structure with the specified null counts for each column
12541250
fn make_stats(counts: impl IntoIterator<Item = usize>) -> Statistics {
12551251
Statistics {
@@ -1494,7 +1490,7 @@ mod tests {
14941490
);
14951491

14961492
// Merge statistics
1497-
let schema = single_col_schema(DataType::Int32);
1493+
let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
14981494
let merged_stats =
14991495
Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
15001496

@@ -1538,9 +1534,8 @@ mod tests {
15381534
.with_distinct_count(Precision::Exact(8)),
15391535
);
15401536

1541-
let schema = single_col_schema(DataType::Int32);
1542-
let merged =
1543-
Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
1537+
let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
1538+
let merged = Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
15441539
// No overlap -> sum of NDVs
15451540
assert_eq!(
15461541
merged.column_statistics[0].distinct_count,
@@ -1567,9 +1562,8 @@ mod tests {
15671562
.with_distinct_count(Precision::Exact(30)),
15681563
);
15691564

1570-
let schema = single_col_schema(DataType::Int32);
1571-
let merged =
1572-
Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
1565+
let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
1566+
let merged = Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
15731567
// Full overlap -> max(50, 30) = 50
15741568
assert_eq!(
15751569
merged.column_statistics[0].distinct_count,
@@ -1596,9 +1590,8 @@ mod tests {
15961590
.with_distinct_count(Precision::Exact(60)),
15971591
);
15981592

1599-
let schema = single_col_schema(DataType::Int32);
1600-
let merged =
1601-
Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
1593+
let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
1594+
let merged = Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
16021595
// overlap=[50,100], range_left=100, range_right=100, overlap_range=50
16031596
// overlap_left=80*(50/100)=40, overlap_right=60*(50/100)=30
16041597
// result = max(40,30) + (80-40) + (60-30) = 40 + 40 + 30 = 110
@@ -1613,19 +1606,16 @@ mod tests {
16131606
let stats1 = Statistics::default()
16141607
.with_num_rows(Precision::Exact(10))
16151608
.add_column_statistics(
1616-
ColumnStatistics::new_unknown()
1617-
.with_distinct_count(Precision::Exact(5)),
1609+
ColumnStatistics::new_unknown().with_distinct_count(Precision::Exact(5)),
16181610
);
16191611
let stats2 = Statistics::default()
16201612
.with_num_rows(Precision::Exact(10))
16211613
.add_column_statistics(
1622-
ColumnStatistics::new_unknown()
1623-
.with_distinct_count(Precision::Exact(8)),
1614+
ColumnStatistics::new_unknown().with_distinct_count(Precision::Exact(8)),
16241615
);
16251616

1626-
let schema = single_col_schema(DataType::Int32);
1627-
let merged =
1628-
Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
1617+
let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
1618+
let merged = Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
16291619
// No min/max -> fallback to max(5, 8)
16301620
assert_eq!(
16311621
merged.column_statistics[0].distinct_count,
@@ -1660,9 +1650,8 @@ mod tests {
16601650
.with_distinct_count(Precision::Exact(8)),
16611651
);
16621652

1663-
let schema = single_col_schema(DataType::Utf8);
1664-
let merged =
1665-
Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
1653+
let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
1654+
let merged = Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
16661655
// distance() unsupported for strings -> fallback to max
16671656
assert_eq!(
16681657
merged.column_statistics[0].distinct_count,
@@ -1690,9 +1679,8 @@ mod tests {
16901679
.with_distinct_count(Precision::Exact(1)),
16911680
);
16921681

1693-
let schema = single_col_schema(DataType::Int32);
1694-
let merged =
1695-
Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
1682+
let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
1683+
let merged = Statistics::try_merge_iter([&stats1, &stats2], &schema).unwrap();
16961684
assert_eq!(
16971685
merged.column_statistics[0].distinct_count,
16981686
Precision::Inexact(1)
@@ -1716,8 +1704,7 @@ mod tests {
17161704
.with_distinct_count(Precision::Exact(1)),
17171705
);
17181706

1719-
let merged =
1720-
Statistics::try_merge_iter([&stats3, &stats4], &schema).unwrap();
1707+
let merged = Statistics::try_merge_iter([&stats3, &stats4], &schema).unwrap();
17211708
assert_eq!(
17221709
merged.column_statistics[0].distinct_count,
17231710
Precision::Inexact(2)

datafusion/datasource-parquet/src/metadata.rs

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -560,8 +560,7 @@ fn summarize_column_statistics(
560560
})
561561
.collect();
562562

563-
let coverage =
564-
distinct_counts.len() as f64 / num_row_groups.max(1) as f64;
563+
let coverage = distinct_counts.len() as f64 / num_row_groups.max(1) as f64;
565564

566565
if coverage < PARTIAL_NDV_THRESHOLD {
567566
Precision::Absent
@@ -1045,13 +1044,8 @@ mod tests {
10451044

10461045
let stats1 =
10471046
ParquetStatistics::int32(Some(1), Some(50), Some(15), Some(0), false);
1048-
let stats2 = ParquetStatistics::int32(
1049-
Some(51),
1050-
Some(100),
1051-
None,
1052-
Some(0),
1053-
false,
1054-
);
1047+
let stats2 =
1048+
ParquetStatistics::int32(Some(51), Some(100), None, Some(0), false);
10551049

10561050
let row_group1 =
10571051
create_row_group_with_stats(&schema_descr, vec![Some(stats1)], 500);
@@ -1078,15 +1072,32 @@ mod tests {
10781072
let schema_descr = create_schema_descr(1);
10791073
let arrow_schema = create_arrow_schema(1);
10801074

1081-
let stats_with =
1082-
|ndv| ParquetStatistics::int32(Some(1), Some(100), Some(ndv), Some(0), false);
1075+
let stats_with = |ndv| {
1076+
ParquetStatistics::int32(Some(1), Some(100), Some(ndv), Some(0), false)
1077+
};
10831078
let stats_without =
10841079
ParquetStatistics::int32(Some(1), Some(100), None, Some(0), false);
10851080

1086-
let rg1 = create_row_group_with_stats(&schema_descr, vec![Some(stats_with(10))], 250);
1087-
let rg2 = create_row_group_with_stats(&schema_descr, vec![Some(stats_with(20))], 250);
1088-
let rg3 = create_row_group_with_stats(&schema_descr, vec![Some(stats_with(15))], 250);
1089-
let rg4 = create_row_group_with_stats(&schema_descr, vec![Some(stats_without)], 250);
1081+
let rg1 = create_row_group_with_stats(
1082+
&schema_descr,
1083+
vec![Some(stats_with(10))],
1084+
250,
1085+
);
1086+
let rg2 = create_row_group_with_stats(
1087+
&schema_descr,
1088+
vec![Some(stats_with(20))],
1089+
250,
1090+
);
1091+
let rg3 = create_row_group_with_stats(
1092+
&schema_descr,
1093+
vec![Some(stats_with(15))],
1094+
250,
1095+
);
1096+
let rg4 = create_row_group_with_stats(
1097+
&schema_descr,
1098+
vec![Some(stats_without)],
1099+
250,
1100+
);
10901101
let metadata =
10911102
create_parquet_metadata(schema_descr, vec![rg1, rg2, rg3, rg4]);
10921103

0 commit comments

Comments
 (0)