Skip to content

Commit 79e89b0

Browse files
committed
Fix proto deserialization: add enable_exact_reverse_scan default
1 parent fdfa2c7 commit 79e89b0

5 files changed

Lines changed: 10 additions & 4 deletions

File tree

datafusion/common/src/file_options/parquet_writer.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,7 @@ mod tests {
465465
skip_arrow_metadata: defaults.skip_arrow_metadata,
466466
coerce_int96: None,
467467
max_predicate_cache_size: defaults.max_predicate_cache_size,
468+
enable_exact_reverse_scan: defaults.enable_exact_reverse_scan,
468469
}
469470
}
470471

@@ -579,6 +580,8 @@ mod tests {
579580
binary_as_string: global_options_defaults.binary_as_string,
580581
skip_arrow_metadata: global_options_defaults.skip_arrow_metadata,
581582
coerce_int96: None,
583+
enable_exact_reverse_scan: global_options_defaults
584+
.enable_exact_reverse_scan,
582585
},
583586
column_specific_options,
584587
key_value_metadata,

datafusion/datasource-parquet/src/opener.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -585,10 +585,10 @@ impl FileOpener for ParquetOpener {
585585
// reversal (in ReversedRowGroupStream), not at the parquet reader
586586
// level. Applying limit here would read the first N rows in forward
587587
// order and then reverse them, giving wrong results.
588-
if let Some(limit) = limit {
589-
if !reverse_rows {
590-
builder = builder.with_limit(limit)
591-
}
588+
if let Some(limit) = limit
589+
&& !reverse_rows
590+
{
591+
builder = builder.with_limit(limit)
592592
}
593593

594594
if let Some(max_predicate_cache_size) = max_predicate_cache_size {

datafusion/proto-common/src/from_proto/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,6 +1013,7 @@ impl TryFrom<&protobuf::ParquetOptions> for ParquetOptions {
10131013
max_predicate_cache_size: value.max_predicate_cache_size_opt.map(|opt| match opt {
10141014
protobuf::parquet_options::MaxPredicateCacheSizeOpt::MaxPredicateCacheSize(v) => Some(v as usize),
10151015
}).unwrap_or(None),
1016+
enable_exact_reverse_scan: false,
10161017
})
10171018
}
10181019
}

datafusion/proto/src/logical_plan/file_formats.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,7 @@ mod parquet {
525525
max_predicate_cache_size: proto.max_predicate_cache_size_opt.as_ref().map(|opt| match opt {
526526
parquet_options::MaxPredicateCacheSizeOpt::MaxPredicateCacheSize(size) => *size as usize,
527527
}),
528+
enable_exact_reverse_scan: false,
528529
}
529530
}
530531
}

docs/source/user-guide/configs.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ The following configuration settings are available:
9090
| datafusion.execution.parquet.coerce_int96 | NULL | (reading) If true, parquet reader will read columns of physical type int96 as originating from a different resolution than nanosecond. This is useful for reading data from systems like Spark which stores microsecond resolution timestamps in an int96 allowing it to write values with a larger date range than 64-bit timestamps with nanosecond resolution. |
9191
| datafusion.execution.parquet.bloom_filter_on_read | true | (reading) Use any available bloom filters when reading parquet files |
9292
| datafusion.execution.parquet.max_predicate_cache_size | NULL | (reading) The maximum predicate cache size, in bytes. When `pushdown_filters` is enabled, sets the maximum memory used to cache the results of predicate evaluation between filter evaluation and output generation. Decreasing this value will reduce memory usage, but may increase IO and CPU usage. None means use the default parquet reader setting. 0 means no caching. |
93+
| datafusion.execution.parquet.enable_exact_reverse_scan | false | (reading) If true, reverse scans produce exact descending order by reversing rows within each row group. This allows the Sort operator to be removed entirely and fetch/limit to be pushed down to the scan. If false (default), reverse scans only reverse row group order (inexact), keeping TopK above for final sorting. |
9394
| datafusion.execution.parquet.data_pagesize_limit | 1048576 | (writing) Sets best effort maximum size of data page in bytes |
9495
| datafusion.execution.parquet.write_batch_size | 1024 | (writing) Sets write_batch_size in bytes |
9596
| datafusion.execution.parquet.writer_version | 1.0 | (writing) Sets parquet writer version valid values are "1.0" and "2.0" |

0 commit comments

Comments
 (0)