Skip to content

Commit 53aab8b

Browse files
committed
fix, add tests
1 parent 1a52a45 commit 53aab8b

2 files changed

Lines changed: 144 additions & 2 deletions

File tree

datafusion/datasource-parquet/src/metadata.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ impl<'a> DFParquetMetadata<'a> {
218218
/// extracting ordering, as it only fetches the metadata once.
219219
///
220220
/// # Returns
221-
/// A tuple of (Statistics, Option<LexOrdering>) where the ordering is `None` if:
221+
/// A tuple of (Statistics, `Option<LexOrdering>`) where the ordering is `None` if:
222222
/// - No row groups have sorting_columns
223223
/// - Row groups have inconsistent sorting_columns
224224
/// - Sorting columns cannot be mapped to the Arrow schema
@@ -578,7 +578,7 @@ pub(crate) fn sort_expr_to_sorting_column(
578578
})
579579
}
580580

581-
/// Convert a LexOrdering to Vec<SortingColumn> for Parquet.
581+
/// Convert a LexOrdering to `Vec<SortingColumn>` for Parquet.
582582
///
583583
/// Returns `Err` if any expression is not a simple column reference.
584584
pub(crate) fn lex_ordering_to_sorting_columns(

datafusion/sqllogictest/test_files/parquet.slt

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -924,3 +924,145 @@ DROP TABLE sorted_inferred;
924924

925925
statement ok
926926
DROP TABLE ordering_src;
927+
928+
###################
929+
# Test: DESC ordering inference from Parquet sorting_columns metadata
930+
###################
931+
932+
statement ok
933+
CREATE TABLE ordering_desc_src AS VALUES (5, 50), (4, 40), (3, 30), (2, 20), (1, 10);
934+
935+
query I
936+
COPY (SELECT column1 as a, column2 as b FROM ordering_desc_src ORDER BY a DESC NULLS LAST)
937+
TO 'test_files/scratch/parquet/ordering_desc.parquet'
938+
STORED AS PARQUET;
939+
----
940+
5
941+
942+
statement ok
943+
CREATE EXTERNAL TABLE sorted_desc (a INT, b INT)
944+
STORED AS PARQUET
945+
LOCATION 'test_files/scratch/parquet/ordering_desc.parquet';
946+
947+
# Verify DESC ordering is inferred - no SortExec needed
948+
query TT
949+
EXPLAIN SELECT a, b FROM sorted_desc ORDER BY a DESC NULLS LAST;
950+
----
951+
logical_plan
952+
01)Sort: sorted_desc.a DESC NULLS LAST
953+
02)--TableScan: sorted_desc projection=[a, b]
954+
physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/ordering_desc.parquet]]}, projection=[a, b], output_ordering=[a@0 DESC NULLS LAST], file_type=parquet
955+
956+
statement ok
957+
DROP TABLE sorted_desc;
958+
959+
statement ok
960+
DROP TABLE ordering_desc_src;
961+
962+
###################
963+
# Test: Reverse ordering - file is DESC but query wants ASC
964+
# Ordering is inferred with reverse_order, but SortExec is still needed
965+
###################
966+
967+
statement ok
968+
CREATE TABLE ordering_reverse_src AS VALUES (5, 50), (4, 40), (3, 30), (2, 20), (1, 10);
969+
970+
query I
971+
COPY (SELECT column1 as a, column2 as b FROM ordering_reverse_src ORDER BY a DESC NULLS LAST)
972+
TO 'test_files/scratch/parquet/ordering_reverse.parquet'
973+
STORED AS PARQUET;
974+
----
975+
5
976+
977+
statement ok
978+
CREATE EXTERNAL TABLE sorted_reverse (a INT, b INT)
979+
STORED AS PARQUET
980+
LOCATION 'test_files/scratch/parquet/ordering_reverse.parquet';
981+
982+
# Query wants ASC but file is DESC - ordering is inferred with reverse_order, SortExec preserved
983+
query TT
984+
EXPLAIN SELECT a, b FROM sorted_reverse ORDER BY a ASC NULLS FIRST;
985+
----
986+
logical_plan
987+
01)Sort: sorted_reverse.a ASC NULLS FIRST
988+
02)--TableScan: sorted_reverse projection=[a, b]
989+
physical_plan
990+
01)SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
991+
02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/ordering_reverse.parquet]]}, projection=[a, b], file_type=parquet, reverse_row_groups=true
992+
993+
statement ok
994+
DROP TABLE sorted_reverse;
995+
996+
statement ok
997+
DROP TABLE ordering_reverse_src;
998+
999+
###################
1000+
# Test: Multi-column ordering inference from Parquet sorting_columns metadata
1001+
###################
1002+
1003+
statement ok
1004+
CREATE TABLE ordering_multi_src AS VALUES (1, 10, 100), (1, 20, 200), (2, 10, 300), (2, 20, 400);
1005+
1006+
query I
1007+
COPY (SELECT column1 as a, column2 as b, column3 as c FROM ordering_multi_src ORDER BY a ASC NULLS FIRST, b ASC NULLS FIRST)
1008+
TO 'test_files/scratch/parquet/ordering_multi.parquet'
1009+
STORED AS PARQUET;
1010+
----
1011+
4
1012+
1013+
statement ok
1014+
CREATE EXTERNAL TABLE sorted_multi (a INT, b INT, c INT)
1015+
STORED AS PARQUET
1016+
LOCATION 'test_files/scratch/parquet/ordering_multi.parquet';
1017+
1018+
# Verify multi-column ordering is inferred - no SortExec needed
1019+
query TT
1020+
EXPLAIN SELECT a, b, c FROM sorted_multi ORDER BY a ASC NULLS FIRST, b ASC NULLS FIRST;
1021+
----
1022+
logical_plan
1023+
01)Sort: sorted_multi.a ASC NULLS FIRST, sorted_multi.b ASC NULLS FIRST
1024+
02)--TableScan: sorted_multi projection=[a, b, c]
1025+
physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/ordering_multi.parquet]]}, projection=[a, b, c], output_ordering=[a@0 ASC, b@1 ASC], file_type=parquet
1026+
1027+
statement ok
1028+
DROP TABLE sorted_multi;
1029+
1030+
statement ok
1031+
DROP TABLE ordering_multi_src;
1032+
1033+
###################
1034+
# Test: Files with no ordering metadata should not infer ordering
1035+
###################
1036+
1037+
statement ok
1038+
CREATE TABLE no_ordering_src AS VALUES (3, 30), (1, 10), (2, 20);
1039+
1040+
# Write Parquet WITHOUT ORDER BY - no sorting_columns metadata
1041+
query I
1042+
COPY (SELECT column1 as a, column2 as b FROM no_ordering_src)
1043+
TO 'test_files/scratch/parquet/no_ordering.parquet'
1044+
STORED AS PARQUET;
1045+
----
1046+
3
1047+
1048+
statement ok
1049+
CREATE EXTERNAL TABLE no_ordering (a INT, b INT)
1050+
STORED AS PARQUET
1051+
LOCATION 'test_files/scratch/parquet/no_ordering.parquet';
1052+
1053+
# Verify NO ordering is inferred - SortExec IS required
1054+
query TT
1055+
EXPLAIN SELECT a, b FROM no_ordering ORDER BY a ASC NULLS FIRST;
1056+
----
1057+
logical_plan
1058+
01)Sort: no_ordering.a ASC NULLS FIRST
1059+
02)--TableScan: no_ordering projection=[a, b]
1060+
physical_plan
1061+
01)SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
1062+
02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/no_ordering.parquet]]}, projection=[a, b], file_type=parquet
1063+
1064+
statement ok
1065+
DROP TABLE no_ordering;
1066+
1067+
statement ok
1068+
DROP TABLE no_ordering_src;

0 commit comments

Comments
 (0)