@@ -174,10 +174,11 @@ impl RowGroupPruningTest {
174174 self ,
175175 schema : Arc < Schema > ,
176176 batches : Vec < RecordBatch > ,
177+ max_row_per_group : usize ,
177178 ) {
178179 let output = ContextWithParquet :: with_custom_data (
179180 self . scenario ,
180- RowGroup ( 2 ) ,
181+ RowGroup ( max_row_per_group ) ,
181182 schema,
182183 batches,
183184 )
@@ -1721,28 +1722,29 @@ fn make_i32_batch(
17211722#[ tokio:: test]
17221723async fn test_limit_pruning ( ) -> datafusion_common:: error:: Result < ( ) > {
17231724 // Scenario: Simple integer column, multiple row groups
1724- // Query: SELECT c1 FROM t WHERE c1 > 0 LIMIT 2
1725+ // Query: SELECT c1 FROM t WHERE c1 = 0 LIMIT 2
17251726 // We expect 2 rows in total.
17261727
1727- // Row Group 0: c1 = [1, 2] -> Fully matched, 2 rows
1728- // Row Group 1: c1 = [3, 4] -> Fully matched, 2 rows
1729- // Row Group 2: c1 = [5, 6] -> Fully matched, 2 rows
1730- // Row Group 3: c1 = [-1, 0] -> Pruned by statistics, 0 rows
1728+ // Row Group 0: c1 = [0, -2] -> Partially matched, 1 row
1729+ // Row Group 1: c1 = [1, 2] -> Fully matched, 2 rows
1730+ // Row Group 2: c1 = [3, 4] -> Fully matched, 2 rows
1731+ // Row Group 3: c1 = [5, 6] -> Fully matched, 2 rows
1732+ // Row Group 4: c1 = [-1, -2] -> Not matched
17311733
1732- // If limit = 2, and RG0 is fully matched and has 2 rows, we should
1733- // only scan RG0 and prune other row groups (RG1, RG2, RG3)
1734- // RG3 is pruned by statistics. RG1 and RG2 are pruned by limit.
1735- // So 3 row groups are effectively pruned due to limit pruning.
1734+ // If limit = 2, and RG1 is fully matched and has 2 rows, we should
1735+ // only scan RG1 and prune other row groups
1736+ // RG4 is pruned by statistics. RG2 and RG3 are pruned by limit.
1737+ // So 2 row groups are effectively pruned due to limit pruning.
17361738
17371739 let schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new( "c1" , DataType :: Int32 , false ) ] ) ) ;
1738- let query = "explain verbose SELECT c1 FROM t WHERE c1 > 0 LIMIT 2" ;
1740+ let query = "SELECT c1 FROM t WHERE c1 >= 0 LIMIT 2" ;
17391741
17401742 let batches = vec ! [
17411743 make_i32_batch( "c1" , vec![ 0 , -2 ] ) ?,
1742- make_i32_batch( "c1" , vec![ 0 , 0 ] ) ?, // RG0: Fully matched, 2 rows
1743- make_i32_batch( "c1" , vec![ 0 , 0 ] ) ?, // RG1: Fully matched, 2 rows
1744- make_i32_batch( "c1" , vec![ 0 , 0 ] ) ?, // RG2: Fully matched, 2 rows
1745- make_i32_batch( "c1" , vec![ -1 , 0 ] ) ?, // RG3: Pruned by statistics, 0 rows
1744+ make_i32_batch( "c1" , vec![ 0 , 0 ] ) ?,
1745+ make_i32_batch( "c1" , vec![ 0 , 0 ] ) ?,
1746+ make_i32_batch( "c1" , vec![ 0 , 0 ] ) ?,
1747+ make_i32_batch( "c1" , vec![ -1 , - 2 ] ) ?,
17461748 ] ;
17471749
17481750 RowGroupPruningTest :: new ( )
@@ -1751,9 +1753,9 @@ async fn test_limit_pruning() -> datafusion_common::error::Result<()> {
17511753 . with_expected_errors ( Some ( 0 ) )
17521754 . with_expected_rows ( 2 )
17531755 . with_pruned_files ( Some ( 0 ) )
1754- . with_matched_by_stats ( Some ( 5 ) ) // RG0, RG1, RG2 are matched by stats (c1 > 0 )
1755- . with_pruned_by_stats ( Some ( 0 ) ) // RG3 is pruned by stats (c1 = [-1, 0] does not satisfy c1 > 0)
1756- . with_limit_pruned_row_groups ( Some ( 4 ) ) // RG1, RG2 are pruned by limit. (RG3 is already pruned by stats )
1756+ . with_matched_by_stats ( Some ( 4 ) )
1757+ . with_pruned_by_stats ( Some ( 1 ) )
1758+ . with_limit_pruned_row_groups ( Some ( 2 ) )
17571759 . test_row_group_prune_with_custom_data ( schema, batches, 2 )
17581760 . await ;
17591761
0 commit comments