Skip to content

Commit 73a54c4

Browse files
committed
Add tests for additional numeric types and reversed operand order
Cover Int8, Int64, Float32 equality predicates and reversed operand order (literal = column) for NDV single-value optimization.
1 parent 95b417e commit 73a54c4

1 file changed

Lines changed: 133 additions & 0 deletions

File tree

datafusion/physical-plan/src/filter.rs

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2444,4 +2444,137 @@ mod tests {
24442444
);
24452445
Ok(())
24462446
}
2447+
2448+
#[tokio::test]
2449+
async fn test_filter_statistics_equality_int8_ndv() -> Result<()> {
2450+
// a: min=-100, max=100, ndv=50
2451+
let schema = Schema::new(vec![Field::new("a", DataType::Int8, false)]);
2452+
let input = Arc::new(StatisticsExec::new(
2453+
Statistics {
2454+
num_rows: Precision::Inexact(100),
2455+
total_byte_size: Precision::Inexact(100),
2456+
column_statistics: vec![ColumnStatistics {
2457+
min_value: Precision::Inexact(ScalarValue::Int8(Some(-100))),
2458+
max_value: Precision::Inexact(ScalarValue::Int8(Some(100))),
2459+
distinct_count: Precision::Inexact(50),
2460+
..Default::default()
2461+
}],
2462+
},
2463+
schema.clone(),
2464+
));
2465+
2466+
let predicate = Arc::new(BinaryExpr::new(
2467+
Arc::new(Column::new("a", 0)),
2468+
Operator::Eq,
2469+
Arc::new(Literal::new(ScalarValue::Int8(Some(42)))),
2470+
));
2471+
let filter: Arc<dyn ExecutionPlan> =
2472+
Arc::new(FilterExec::try_new(predicate, input)?);
2473+
let statistics = filter.partition_statistics(None)?;
2474+
assert_eq!(
2475+
statistics.column_statistics[0].distinct_count,
2476+
Precision::Exact(1)
2477+
);
2478+
Ok(())
2479+
}
2480+
2481+
#[tokio::test]
2482+
async fn test_filter_statistics_equality_int64_ndv() -> Result<()> {
2483+
// a: min=0, max=1_000_000, ndv=100_000
2484+
let schema = Schema::new(vec![Field::new("a", DataType::Int64, false)]);
2485+
let input = Arc::new(StatisticsExec::new(
2486+
Statistics {
2487+
num_rows: Precision::Inexact(100_000),
2488+
total_byte_size: Precision::Inexact(800_000),
2489+
column_statistics: vec![ColumnStatistics {
2490+
min_value: Precision::Inexact(ScalarValue::Int64(Some(0))),
2491+
max_value: Precision::Inexact(ScalarValue::Int64(Some(1_000_000))),
2492+
distinct_count: Precision::Inexact(100_000),
2493+
..Default::default()
2494+
}],
2495+
},
2496+
schema.clone(),
2497+
));
2498+
2499+
let predicate = Arc::new(BinaryExpr::new(
2500+
Arc::new(Column::new("a", 0)),
2501+
Operator::Eq,
2502+
Arc::new(Literal::new(ScalarValue::Int64(Some(42)))),
2503+
));
2504+
let filter: Arc<dyn ExecutionPlan> =
2505+
Arc::new(FilterExec::try_new(predicate, input)?);
2506+
let statistics = filter.partition_statistics(None)?;
2507+
assert_eq!(
2508+
statistics.column_statistics[0].distinct_count,
2509+
Precision::Exact(1)
2510+
);
2511+
Ok(())
2512+
}
2513+
2514+
#[tokio::test]
2515+
async fn test_filter_statistics_equality_float32_ndv() -> Result<()> {
2516+
// a: min=0.0, max=100.0, ndv=50
2517+
let schema = Schema::new(vec![Field::new("a", DataType::Float32, false)]);
2518+
let input = Arc::new(StatisticsExec::new(
2519+
Statistics {
2520+
num_rows: Precision::Inexact(100),
2521+
total_byte_size: Precision::Inexact(400),
2522+
column_statistics: vec![ColumnStatistics {
2523+
min_value: Precision::Inexact(ScalarValue::Float32(Some(0.0))),
2524+
max_value: Precision::Inexact(ScalarValue::Float32(Some(100.0))),
2525+
distinct_count: Precision::Inexact(50),
2526+
..Default::default()
2527+
}],
2528+
},
2529+
schema.clone(),
2530+
));
2531+
2532+
let predicate = Arc::new(BinaryExpr::new(
2533+
Arc::new(Column::new("a", 0)),
2534+
Operator::Eq,
2535+
Arc::new(Literal::new(ScalarValue::Float32(Some(3.14)))),
2536+
));
2537+
let filter: Arc<dyn ExecutionPlan> =
2538+
Arc::new(FilterExec::try_new(predicate, input)?);
2539+
let statistics = filter.partition_statistics(None)?;
2540+
assert_eq!(
2541+
statistics.column_statistics[0].distinct_count,
2542+
Precision::Exact(1)
2543+
);
2544+
Ok(())
2545+
}
2546+
2547+
#[tokio::test]
2548+
async fn test_filter_statistics_equality_reversed_ndv() -> Result<()> {
2549+
// a: min=1, max=100, ndv=80
2550+
let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
2551+
let input = Arc::new(StatisticsExec::new(
2552+
Statistics {
2553+
num_rows: Precision::Inexact(100),
2554+
total_byte_size: Precision::Inexact(400),
2555+
column_statistics: vec![ColumnStatistics {
2556+
min_value: Precision::Inexact(ScalarValue::Int32(Some(1))),
2557+
max_value: Precision::Inexact(ScalarValue::Int32(Some(100))),
2558+
distinct_count: Precision::Inexact(80),
2559+
..Default::default()
2560+
}],
2561+
},
2562+
schema.clone(),
2563+
));
2564+
2565+
// 42 = a (literal on the left)
2566+
let predicate = Arc::new(BinaryExpr::new(
2567+
Arc::new(Literal::new(ScalarValue::Int32(Some(42)))),
2568+
Operator::Eq,
2569+
Arc::new(Column::new("a", 0)),
2570+
));
2571+
let filter: Arc<dyn ExecutionPlan> =
2572+
Arc::new(FilterExec::try_new(predicate, input)?);
2573+
let statistics = filter.partition_statistics(None)?;
2574+
assert_eq!(
2575+
statistics.column_statistics[0].distinct_count,
2576+
Precision::Exact(1)
2577+
);
2578+
Ok(())
2579+
}
24472580
}

0 commit comments

Comments
 (0)