Skip to content

Commit ab09466

Browse files
committed
Add SLT tests for NDV=Exact(1) across numeric types
Cover Int8, Int64, Float32 equality predicates and reversed operand order (literal = column) as sqllogictests in parquet_statistics.slt.
1 parent cb29d45 commit ab09466

1 file changed

Lines changed: 69 additions & 0 deletions

File tree

datafusion/sqllogictest/test_files/parquet_statistics.slt

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,75 @@ physical_plan
117117
statement ok
118118
DROP TABLE test_table;
119119

120+
######
121+
# Equality filter NDV=Exact(1) for different numeric types
122+
######
123+
124+
statement ok
125+
set datafusion.execution.collect_statistics = true;
126+
127+
query I
128+
COPY (
129+
SELECT
130+
arrow_cast(v, 'Int8') AS i8,
131+
arrow_cast(v, 'Int64') AS i64,
132+
arrow_cast(v + 0.5, 'Float32') AS f32,
133+
arrow_cast(v + 0.5, 'Float64') AS f64
134+
FROM (VALUES (1), (2), (3), (4), (5)) AS t(v)
135+
)
136+
TO 'test_files/scratch/parquet_statistics/typed_table.parquet'
137+
STORED AS PARQUET;
138+
----
139+
5
140+
141+
statement ok
142+
CREATE EXTERNAL TABLE typed_table (
143+
i8 TINYINT,
144+
i64 BIGINT,
145+
f32 FLOAT,
146+
f64 DOUBLE
147+
) STORED AS PARQUET
148+
LOCATION 'test_files/scratch/parquet_statistics/typed_table.parquet';
149+
150+
# Int8 equality
151+
query TT
152+
EXPLAIN SELECT i8 FROM typed_table WHERE i8 = 2;
153+
----
154+
physical_plan
155+
01)FilterExec: i8@0 = 2, statistics=[Rows=Inexact(1), Bytes=Inexact(1), [(Col[0]: Min=Exact(Int8(2)) Max=Exact(Int8(2)) Null=Inexact(0) Distinct=Exact(1) ScanBytes=Inexact(5))]]
156+
02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, statistics=[Rows=Inexact(5), Bytes=Inexact(5), [(Col[0]: Min=Inexact(Int8(1)) Max=Inexact(Int8(5)) Null=Inexact(0) ScanBytes=Inexact(5))]]
157+
03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/typed_table.parquet]]}, projection=[i8], file_type=parquet, predicate=i8@0 = 2, pruning_predicate=i8_null_count@2 != row_count@3 AND i8_min@0 <= 2 AND 2 <= i8_max@1, required_guarantees=[i8 in (2)], statistics=[Rows=Inexact(5), Bytes=Inexact(5), [(Col[0]: Min=Inexact(Int8(1)) Max=Inexact(Int8(5)) Null=Inexact(0) ScanBytes=Inexact(5))]]
158+
159+
# Int64 equality
160+
query TT
161+
EXPLAIN SELECT i64 FROM typed_table WHERE i64 = 2;
162+
----
163+
physical_plan
164+
01)FilterExec: i64@0 = 2, statistics=[Rows=Inexact(1), Bytes=Inexact(8), [(Col[0]: Min=Exact(Int64(2)) Max=Exact(Int64(2)) Null=Inexact(0) Distinct=Exact(1) ScanBytes=Inexact(40))]]
165+
02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, statistics=[Rows=Inexact(5), Bytes=Inexact(40), [(Col[0]: Min=Inexact(Int64(1)) Max=Inexact(Int64(5)) Null=Inexact(0) ScanBytes=Inexact(40))]]
166+
03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/typed_table.parquet]]}, projection=[i64], file_type=parquet, predicate=i64@1 = 2, pruning_predicate=i64_null_count@2 != row_count@3 AND i64_min@0 <= 2 AND 2 <= i64_max@1, required_guarantees=[i64 in (2)], statistics=[Rows=Inexact(5), Bytes=Inexact(40), [(Col[0]: Min=Inexact(Int64(1)) Max=Inexact(Int64(5)) Null=Inexact(0) ScanBytes=Inexact(40))]]
167+
168+
# Float32 equality
169+
query TT
170+
EXPLAIN SELECT f32 FROM typed_table WHERE f32 = 2.5;
171+
----
172+
physical_plan
173+
01)FilterExec: CAST(f32@0 AS Float64) = 2.5, statistics=[Rows=Inexact(1), Bytes=Inexact(1), [(Col[0]: Min=Exact(Float32(2.5)) Max=Exact(Float32(2.5)) Null=Inexact(0) Distinct=Exact(1) ScanBytes=Inexact(20))]]
174+
02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, statistics=[Rows=Inexact(5), Bytes=Inexact(20), [(Col[0]: Min=Inexact(Float32(1.5)) Max=Inexact(Float32(5.5)) Null=Inexact(0) ScanBytes=Inexact(20))]]
175+
03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/typed_table.parquet]]}, projection=[f32], file_type=parquet, predicate=CAST(f32@2 AS Float64) = 2.5, pruning_predicate=f32_null_count@2 != row_count@3 AND CAST(f32_min@0 AS Float64) <= 2.5 AND 2.5 <= CAST(f32_max@1 AS Float64), required_guarantees=[], statistics=[Rows=Inexact(5), Bytes=Inexact(20), [(Col[0]: Min=Inexact(Float32(1.5)) Max=Inexact(Float32(5.5)) Null=Inexact(0) ScanBytes=Inexact(20))]]
176+
177+
# Reversed operand order: literal = column (Float64)
178+
query TT
179+
EXPLAIN SELECT f64 FROM typed_table WHERE 2.5 = f64;
180+
----
181+
physical_plan
182+
01)FilterExec: f64@0 = 2.5, statistics=[Rows=Inexact(1), Bytes=Inexact(1), [(Col[0]: Min=Exact(Float64(2.5)) Max=Exact(Float64(2.5)) Null=Inexact(0) Distinct=Exact(1) ScanBytes=Inexact(40))]]
183+
02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, statistics=[Rows=Inexact(5), Bytes=Inexact(40), [(Col[0]: Min=Inexact(Float64(1.5)) Max=Inexact(Float64(5.5)) Null=Inexact(0) ScanBytes=Inexact(40))]]
184+
03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/typed_table.parquet]]}, projection=[f64], file_type=parquet, predicate=f64@3 = 2.5, pruning_predicate=f64_null_count@2 != row_count@3 AND f64_min@0 <= 2.5 AND 2.5 <= f64_max@1, required_guarantees=[f64 in (2.5)], statistics=[Rows=Inexact(5), Bytes=Inexact(40), [(Col[0]: Min=Inexact(Float64(1.5)) Max=Inexact(Float64(5.5)) Null=Inexact(0) ScanBytes=Inexact(40))]]
185+
186+
statement ok
187+
DROP TABLE typed_table;
188+
120189
# Config reset
121190
statement ok
122191
RESET datafusion.execution.collect_statistics;

0 commit comments

Comments
 (0)