Skip to content

Commit a98ead3

Browse files
committed
Add SLT tests for NDV=Exact(1) across numeric types
Cover Int8, Int64, Float32 equality predicates and reversed operand order (literal = column) as sqllogictests in parquet_statistics.slt.
1 parent 238ab02 commit a98ead3

1 file changed

Lines changed: 69 additions & 0 deletions

File tree

datafusion/sqllogictest/test_files/parquet_statistics.slt

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,3 +117,72 @@ physical_plan
117117
# cleanup
118118
statement ok
119119
DROP TABLE test_table;
120+
121+
######
122+
# Equality filter NDV=Exact(1) for different numeric types
123+
######
124+
125+
statement ok
126+
set datafusion.execution.collect_statistics = true;
127+
128+
query I
129+
COPY (
130+
SELECT
131+
arrow_cast(v, 'Int8') AS i8,
132+
arrow_cast(v, 'Int64') AS i64,
133+
arrow_cast(v + 0.5, 'Float32') AS f32,
134+
arrow_cast(v + 0.5, 'Float64') AS f64
135+
FROM (VALUES (1), (2), (3), (4), (5)) AS t(v)
136+
)
137+
TO 'test_files/scratch/parquet_statistics/typed_table.parquet'
138+
STORED AS PARQUET;
139+
----
140+
5
141+
142+
statement ok
143+
CREATE EXTERNAL TABLE typed_table (
144+
i8 TINYINT,
145+
i64 BIGINT,
146+
f32 FLOAT,
147+
f64 DOUBLE
148+
) STORED AS PARQUET
149+
LOCATION 'test_files/scratch/parquet_statistics/typed_table.parquet';
150+
151+
# Int8 equality
152+
query TT
153+
EXPLAIN SELECT i8 FROM typed_table WHERE i8 = 2;
154+
----
155+
physical_plan
156+
01)FilterExec: i8@0 = 2, statistics=[Rows=Inexact(1), Bytes=Inexact(1), [(Col[0]: Min=Exact(Int8(2)) Max=Exact(Int8(2)) Null=Inexact(0) Distinct=Exact(1) ScanBytes=Inexact(5))]]
157+
02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, statistics=[Rows=Inexact(5), Bytes=Inexact(5), [(Col[0]: Min=Inexact(Int8(1)) Max=Inexact(Int8(5)) Null=Inexact(0) ScanBytes=Inexact(5))]]
158+
03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/typed_table.parquet]]}, projection=[i8], file_type=parquet, predicate=i8@0 = 2, pruning_predicate=i8_null_count@2 != row_count@3 AND i8_min@0 <= 2 AND 2 <= i8_max@1, required_guarantees=[i8 in (2)], statistics=[Rows=Inexact(5), Bytes=Inexact(5), [(Col[0]: Min=Inexact(Int8(1)) Max=Inexact(Int8(5)) Null=Inexact(0) ScanBytes=Inexact(5))]]
159+
160+
# Int64 equality
161+
query TT
162+
EXPLAIN SELECT i64 FROM typed_table WHERE i64 = 2;
163+
----
164+
physical_plan
165+
01)FilterExec: i64@0 = 2, statistics=[Rows=Inexact(1), Bytes=Inexact(8), [(Col[0]: Min=Exact(Int64(2)) Max=Exact(Int64(2)) Null=Inexact(0) Distinct=Exact(1) ScanBytes=Inexact(40))]]
166+
02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, statistics=[Rows=Inexact(5), Bytes=Inexact(40), [(Col[0]: Min=Inexact(Int64(1)) Max=Inexact(Int64(5)) Null=Inexact(0) ScanBytes=Inexact(40))]]
167+
03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/typed_table.parquet]]}, projection=[i64], file_type=parquet, predicate=i64@1 = 2, pruning_predicate=i64_null_count@2 != row_count@3 AND i64_min@0 <= 2 AND 2 <= i64_max@1, required_guarantees=[i64 in (2)], statistics=[Rows=Inexact(5), Bytes=Inexact(40), [(Col[0]: Min=Inexact(Int64(1)) Max=Inexact(Int64(5)) Null=Inexact(0) ScanBytes=Inexact(40))]]
168+
169+
# Float32 equality
170+
query TT
171+
EXPLAIN SELECT f32 FROM typed_table WHERE f32 = 2.5;
172+
----
173+
physical_plan
174+
01)FilterExec: CAST(f32@0 AS Float64) = 2.5, statistics=[Rows=Inexact(1), Bytes=Inexact(1), [(Col[0]: Min=Exact(Float32(2.5)) Max=Exact(Float32(2.5)) Null=Inexact(0) Distinct=Exact(1) ScanBytes=Inexact(20))]]
175+
02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, statistics=[Rows=Inexact(5), Bytes=Inexact(20), [(Col[0]: Min=Inexact(Float32(1.5)) Max=Inexact(Float32(5.5)) Null=Inexact(0) ScanBytes=Inexact(20))]]
176+
03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/typed_table.parquet]]}, projection=[f32], file_type=parquet, predicate=CAST(f32@2 AS Float64) = 2.5, pruning_predicate=f32_null_count@2 != row_count@3 AND CAST(f32_min@0 AS Float64) <= 2.5 AND 2.5 <= CAST(f32_max@1 AS Float64), required_guarantees=[], statistics=[Rows=Inexact(5), Bytes=Inexact(20), [(Col[0]: Min=Inexact(Float32(1.5)) Max=Inexact(Float32(5.5)) Null=Inexact(0) ScanBytes=Inexact(20))]]
177+
178+
# Reversed operand order: literal = column (Float64)
179+
query TT
180+
EXPLAIN SELECT f64 FROM typed_table WHERE 2.5 = f64;
181+
----
182+
physical_plan
183+
01)FilterExec: f64@0 = 2.5, statistics=[Rows=Inexact(1), Bytes=Inexact(1), [(Col[0]: Min=Exact(Float64(2.5)) Max=Exact(Float64(2.5)) Null=Inexact(0) Distinct=Exact(1) ScanBytes=Inexact(40))]]
184+
02)--RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, statistics=[Rows=Inexact(5), Bytes=Inexact(40), [(Col[0]: Min=Inexact(Float64(1.5)) Max=Inexact(Float64(5.5)) Null=Inexact(0) ScanBytes=Inexact(40))]]
185+
03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_statistics/typed_table.parquet]]}, projection=[f64], file_type=parquet, predicate=f64@3 = 2.5, pruning_predicate=f64_null_count@2 != row_count@3 AND f64_min@0 <= 2.5 AND 2.5 <= f64_max@1, required_guarantees=[f64 in (2.5)], statistics=[Rows=Inexact(5), Bytes=Inexact(40), [(Col[0]: Min=Inexact(Float64(1.5)) Max=Inexact(Float64(5.5)) Null=Inexact(0) ScanBytes=Inexact(40))]]
186+
187+
statement ok
188+
DROP TABLE typed_table;

0 commit comments

Comments
 (0)