asolimando
diff --git a/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions b/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎benchmarks/README.md‎
Lines changed: 18 additions & 2 deletions b/‎benchmarks/README.md‎
Lines changed: 18 additions & 2 deletions
diff --git a/‎benchmarks/bench.sh‎
Lines changed: 15 additions & 1 deletion b/‎benchmarks/bench.sh‎
Lines changed: 15 additions & 1 deletion
diff --git a/‎benchmarks/sql_benchmarks/predicate_eval/benchmarks/correlation/q73.benchmark‎
Lines changed: 7 additions & 0 deletions b/‎benchmarks/sql_benchmarks/predicate_eval/benchmarks/correlation/q73.benchmark‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎benchmarks/sql_benchmarks/predicate_eval/load/corrproxy.sql‎
Lines changed: 44 additions & 0 deletions b/‎benchmarks/sql_benchmarks/predicate_eval/load/corrproxy.sql‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎benchmarks/sql_benchmarks/predicate_eval/queries/correlation/q73.sql‎
Lines changed: 14 additions & 0 deletions b/‎benchmarks/sql_benchmarks/predicate_eval/queries/correlation/q73.sql‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎benchmarks/src/sort_tpch.rs‎
Lines changed: 2 additions & 2 deletions b/‎benchmarks/src/sort_tpch.rs‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎datafusion-cli/src/main.rs‎
Lines changed: 6 additions & 5 deletions b/‎datafusion-cli/src/main.rs‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎datafusion-cli/src/object_storage/stdin.rs‎
Lines changed: 18 additions & 7 deletions b/‎datafusion-cli/src/object_storage/stdin.rs‎
Lines changed: 18 additions & 7 deletions
diff --git a/‎datafusion/catalog-listing/src/table.rs‎
Lines changed: 18 additions & 4 deletions b/‎datafusion/catalog-listing/src/table.rs‎
Lines changed: 18 additions & 4 deletions
@@ -558,7 +558,15 @@ Test performance of end-to-end sort SQL queries. (While the `Sort` benchmark foc
 
 Sort integration benchmark runs whole table sort queries on TPCH `lineitem` table, with different characteristics. For example, different number of sort keys, different sort key cardinality, different number of payload columns, etc.
 
-If the TPCH tables have been converted as sorted on their first column (see [Sorted Conversion](#sorted-conversion)), you can use the `--sorted` flag to indicate that the input data is pre-sorted, allowing DataFusion to leverage that order during query execution.
+The `--sorted` flag does not sort or rewrite the input files. It declares that the `lineitem` Parquet input is already sorted ascending by its first column (`l_orderkey`). DataFusion can then leverage that ordering during query execution.
+
+To generate the expected TPC-H SF=1 Parquet input for this benchmark, run:
+
+```bash
+./bench.sh data tpch
+```
+
+For the `lineitem` table used by `sort-tpch`, this uses `tpchgen-cli` to generate Parquet data that is already ordered by `l_orderkey`. If you use a different input directory, only pass `--sorted` when the `lineitem` files already have that ordering.
 
 Additionally, an optional `--limit` flag is available for the sort benchmark. When specified, this flag appends a `LIMIT n` clause to the SQL query, effectively converting the query into a TopK query. Combining the `--sorted` and `--limit` options enables benchmarking of TopK queries on pre-sorted inputs.
 
@@ -578,7 +586,7 @@ See [`sort_tpch.rs`](src/sort_tpch.rs) for more details.
  cargo run --release --bin dfbench -- sort-tpch -p './datafusion/benchmarks/data/tpch_sf1' -o '/tmp/sort_tpch.json' --query 2
 ```
 
-3. Run all queries as TopK queries on presorted data:
+3. Run all queries as TopK queries on already sorted data:
 
 ```bash
  cargo run --release --bin dfbench -- sort-tpch --sorted --limit 10 -p './datafusion/benchmarks/data/tpch_sf1' -o '/tmp/sort_tpch.json'
@@ -598,6 +606,14 @@ In addition, topk_tpch is available from the bench.sh script:
 ./bench.sh run topk_tpch
 ```
 
+To benchmark TopK queries on TPC-H `lineitem` input ordered by `l_orderkey`, use:
+
+```bash
+./bench.sh run topk_sorted_tpch
+```
+
+This runs `dfbench sort-tpch --sorted --limit 100` through the benchmark script, using `--sorted` to declare the existing `l_orderkey` ordering.
+
 ## IMDB
 
 Run Join Order Benchmark (JOB) on IMDB dataset.
 
@@ -99,6 +99,7 @@ tpcds:                  TPCDS inspired benchmark on Scale Factor (SF) 1 (~1GB),
 sort_tpch:              Benchmark of sorting speed for end-to-end sort queries on TPC-H dataset (SF=1)
 sort_tpch10:            Benchmark of sorting speed for end-to-end sort queries on TPC-H dataset (SF=10)
 topk_tpch:              Benchmark of top-k (sorting with limit) queries on TPC-H dataset (SF=1)
+topk_sorted_tpch:       Benchmark of top-k queries on TPC-H lineitem ordered by l_orderkey (SF=1)
 push_down_topk:         Benchmark of ORDER BY ... LIMIT over outer joins on TPC-H dataset (SF=1) — exercises pushing TopK through a join
 external_aggr:          External aggregation benchmark on TPC-H dataset (SF=1)
 wide_schema:            Small-projection queries on a wide synthetic dataset (1024 cols × 256 files) — measures per-file metadata overhead
@@ -346,7 +347,7 @@ main() {
                     # same data as for tpch10
                     data_tpch "10" "parquet"
                     ;;
-                topk_tpch)
+                topk_tpch|topk_sorted_tpch)
                     # same data as for tpch
                     data_tpch "1" "parquet"
                     ;;
@@ -577,6 +578,9 @@ main() {
                 topk_tpch)
                     run_topk_tpch
                     ;;
+                topk_sorted_tpch)
+                    run_topk_sorted_tpch
+                    ;;
                 push_down_topk)
                     run_push_down_topk
                     ;;
@@ -1506,6 +1510,16 @@ run_topk_tpch() {
     $CARGO_COMMAND --bin dfbench -- sort-tpch --iterations 5 --path "${TPCH_DIR}" -o "${RESULTS_FILE}" --limit 100 ${QUERY_ARG} ${LATENCY_ARG}
 }
 
+# Runs the sorted sort tpch integration benchmark with limit 100 (topk)
+run_topk_sorted_tpch() {
+    TPCH_DIR="${DATA_DIR}/tpch_sf1"
+    RESULTS_FILE="${RESULTS_DIR}/run_topk_sorted_tpch.json"
+    echo "RESULTS_FILE: ${RESULTS_FILE}"
+    echo "Running sorted topk tpch benchmark..."
+
+    $CARGO_COMMAND --bin dfbench -- sort-tpch --iterations 5 --path "${TPCH_DIR}" -o "${RESULTS_FILE}" --sorted --limit 100 ${QUERY_ARG} ${LATENCY_ARG}
+}
+
 # Runs the nlj benchmark
 run_nlj() {
     RESULTS_FILE="${RESULTS_DIR}/nlj.json"
 
@@ -0,0 +1,7 @@
+subgroup correlation
+
+template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template
+SUBGROUP=correlation
+QPAD=73
+DATASET=corrproxy
+NAME=correlation_q73_redundant_proxy
@@ -0,0 +1,44 @@
+-- Correlated-proxy dataset: a cheap integer predicate that is a perfect proxy
+-- for three string predicates, plus one independent string predicate.
+--
+--   c0          = 1 for ~30% of rows (cheap proxy)
+--   s1, s2, s3  each contain a marker exactly where c0 = 1     (correlated)
+--   s4          contains a marker for an independent ~30%      (independent)
+--
+-- The four string columns are deliberately *identical in shape*: same width,
+-- the same single marker at the same offset, each matched by an equally cheap
+-- regex with the same ~30% marginal selectivity. Marginally the four regex
+-- predicates are therefore indistinguishable -- same cost, same selectivity, in
+-- every position -- so neither a marginal cost/selectivity estimator nor
+-- runtime timing can prefer one over another. Only their *conditional*
+-- behaviour behind the proxy differs: after `c0 = 1`, the s1/s2/s3 regexes keep
+-- every survivor (each re-tests the proxy's condition) while the s4 regex still
+-- discards ~70%. Only joint statistics can see that; an independence assumption
+-- prices all four regexes identically in every position.
+--
+-- PRED_FILL sets the filler width on each side of the marker (a non-matching
+-- `regexp_like` must scan the whole value), and PRED_ROWS sizes the table.
+CREATE TABLE t AS
+WITH base AS (
+  SELECT
+    -- The cheap proxy and the independent control share one definition each, so
+    -- the perfect-proxy / independence invariants can't drift apart silently.
+    (value * 7)  % 100 < 30 AS proxy,   -- ~30%, drives c0 and s1/s2/s3
+    (value * 13) % 100 < 30 AS indep    -- ~30%, independent of proxy, drives s4
+  FROM generate_series(1, ${PRED_ROWS:-1000000})
+)
+SELECT
+  CASE WHEN proxy THEN 1 ELSE 0 END AS c0,
+  repeat('q', ${PRED_FILL:-30})
+    || CASE WHEN proxy THEN 'aaa' ELSE 'zzz' END
+    || repeat('q', ${PRED_FILL:-30}) AS s1,
+  repeat('q', ${PRED_FILL:-30})
+    || CASE WHEN proxy THEN 'ccc' ELSE 'zzz' END
+    || repeat('q', ${PRED_FILL:-30}) AS s2,
+  repeat('q', ${PRED_FILL:-30})
+    || CASE WHEN proxy THEN 'ddd' ELSE 'zzz' END
+    || repeat('q', ${PRED_FILL:-30}) AS s3,
+  repeat('q', ${PRED_FILL:-30})
+    || CASE WHEN indep THEN 'bbb' ELSE 'zzz' END
+    || repeat('q', ${PRED_FILL:-30}) AS s4
+FROM base;
@@ -0,0 +1,14 @@
+-- Hidden: `c0 = 1` is a perfect proxy for the s1/s2/s3 regexes -- after the
+-- cheap proxy, each of those keeps every survivor while the equally selective
+-- (~30%) s4 regex still discards ~70%. The optimal order is [c0, s4, s1/s2/s3]
+-- (one informative regex on 30% of rows, the three redundant ones on 9%), but
+-- the four regexes are marginally identical -- same width, same marker offset,
+-- same cost, same selectivity -- so ranking them takes their *joint*
+-- distribution with the proxy. Written with the redundant regexes first,
+-- grouped with their proxy, as an author naturally would.
+SELECT count(*) FROM t
+WHERE c0 = 1
+  AND regexp_like(s1, 'a.a')
+  AND regexp_like(s2, 'c.c')
+  AND regexp_like(s3, 'd.d')
+  AND regexp_like(s4, 'b.b');
@@ -64,8 +64,8 @@ pub struct RunOpt {
     #[arg(short = 'm', long = "mem-table")]
     mem_table: bool,
 
-    /// Mark the first column of each table as sorted in ascending order.
-    /// The tables should have been created with the `--sort` option for this to have any effect.
+    /// Declare that the first column of the input table is already sorted in ascending order.
+    /// This flag only attaches ordering metadata; it does not sort the input files.
     #[arg(short = 't', long = "sorted")]
     sorted: bool,
 
 
@@ -335,7 +335,8 @@ fn get_session_config(args: &Args) -> Result<SessionConfig> {
         if batch_size == 0 {
             return config_err!("batch_size must be greater than 0");
         }
-        config_options.execution.batch_size = batch_size;
+        config_options.execution.batch_size =
+            datafusion_common::config::ConfigNonZeroUsize::try_new(batch_size)?;
     };
 
     // use easier to understand "tree" mode by default
@@ -641,9 +642,9 @@ mod tests {
         +-----------------------------------+-----------------+---------------------+------+------------------+
         | filename                          | file_size_bytes | metadata_size_bytes | hits | extra            |
         +-----------------------------------+-----------------+---------------------+------+------------------+
-        | alltypes_plain.parquet            | 1851            | 8794                | 2    | page_index=false |
+        | alltypes_plain.parquet            | 1851            | 8794                | 1    | page_index=false |
         | alltypes_tiny_pages.parquet       | 454233          | 268970              | 2    | page_index=true  |
-        | lz4_raw_compressed_larger.parquet | 380836          | 1331                | 2    | page_index=false |
+        | lz4_raw_compressed_larger.parquet | 380836          | 1331                | 1    | page_index=false |
         +-----------------------------------+-----------------+---------------------+------+------------------+
         ");
 
@@ -672,9 +673,9 @@ mod tests {
         +-----------------------------------+-----------------+---------------------+------+------------------+
         | filename                          | file_size_bytes | metadata_size_bytes | hits | extra            |
         +-----------------------------------+-----------------+---------------------+------+------------------+
-        | alltypes_plain.parquet            | 1851            | 8794                | 5    | page_index=false |
+        | alltypes_plain.parquet            | 1851            | 8794                | 4    | page_index=false |
         | alltypes_tiny_pages.parquet       | 454233          | 268970              | 2    | page_index=true  |
-        | lz4_raw_compressed_larger.parquet | 380836          | 1331                | 3    | page_index=false |
+        | lz4_raw_compressed_larger.parquet | 380836          | 1331                | 2    | page_index=false |
         +-----------------------------------+-----------------+---------------------+------+------------------+
         ");
 
 
@@ -99,9 +99,9 @@ impl StdinUtils {
         format!("{}:///{object_name}", Self::SCHEME)
     }
 
-    /// Returns the object store backing the `stdin://` scheme, reading and
-    /// buffering standard input on first use and reusing that buffer for any
-    /// subsequent `stdin://` table created in the same session.
+    /// Returns the object store backing the `stdin://` scheme, buffering all of
+    /// standard input when the store is first constructed and reusing that
+    /// buffer for any subsequent `stdin://` table created in the same session.
     ///
     /// stdin is a one-shot stream: it can only be read once. The object store
     /// registry keys by scheme/authority, so every `stdin://` URL maps to the
@@ -268,15 +268,26 @@ mod tests {
         // stdin can only be read once, so a second `stdin://` table must reuse
         // the store buffered by the first instead of re-reading (now-empty)
         // stdin and overwriting it.
+        //
+        // The very first read happens inside `get_or_create` -> `object_store`,
+        // which consumes the real process stdin and so cannot be driven from a
+        // unit test. Seed the registry with the store that first read would have
+        // produced (as the first `CREATE EXTERNAL TABLE` does), then drive the
+        // lookup through `get_or_create` and assert it hands back that exact
+        // store rather than rebuilding it.
         let url = Url::parse("stdin:///stdin.csv").unwrap();
-        let store =
-            StdinUtils::in_memory_object_store(&url, b"a\n1\n2\n".to_vec()).await?;
+        let path = ObjectStorePath::from_url_path(url.path())?;
+        let buffered: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
+        buffered.put(&path, b"a\n1\n2\n".to_vec().into()).await?;
 
         let ctx = SessionContext::new();
-        ctx.register_object_store(&url, store);
+        ctx.register_object_store(&url, Arc::clone(&buffered));
 
         let reused = StdinUtils::get_or_create(&ctx.state(), &url).await?;
-        let path = ObjectStorePath::from_url_path(url.path())?;
+        assert!(
+            Arc::ptr_eq(&buffered, &reused),
+            "get_or_create must reuse the registered stdin store, not rebuild it"
+        );
         let bytes = reused.get(&path).await?.bytes().await?;
         assert_eq!(bytes.as_ref(), b"a\n1\n2\n");
         Ok(())
 
@@ -34,8 +34,7 @@ use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
 use datafusion_datasource::{
     ListingTableUrl, PartitionedFile, TableSchemaBuilder, compute_all_files_statistics,
 };
-use datafusion_execution::cache::cache_manager::FileStatisticsCache;
-use datafusion_execution::cache::cache_manager::TableScopedPath;
+use datafusion_execution::cache::cache_manager::{FileStatisticsCache, TableScopedPath};
 use datafusion_expr::dml::InsertOp;
 use datafusion_expr::execution_props::ExecutionProps;
 use datafusion_expr::{Expr, TableProviderFilterPushDown, TableType};
@@ -264,6 +263,21 @@ impl ListingTable {
         self
     }
 
+    fn statistics_cache(
+        &self,
+        has_table_reference: bool,
+    ) -> Option<&Arc<FileStatisticsCache>> {
+        let shared_cache = self.collected_statistics.as_ref()?;
+        if has_table_reference || self.schema_source == SchemaSource::Inferred {
+            Some(shared_cache)
+        } else {
+            // Anonymous specified-schema reads can use the same file path with
+            // different logical schemas. File statistics are schema-dependent,
+            // so avoid reusing stats computed for a different read schema.
+            None
+        }
+    }
+
     /// Specify the SQL definition for this table, if any
     pub fn with_definition(mut self, definition: Option<String>) -> Self {
         self.definition = definition;
@@ -807,7 +821,7 @@ impl ListingTable {
         let meta = &part_file.object_meta;
 
         // Check cache first - if we have valid cached statistics and ordering
-        if let Some(cache) = &self.collected_statistics
+        if let Some(cache) = self.statistics_cache(path.table.is_some())
             && let Some(cached) = cache.get(&path)
             && cached.is_valid_for(meta)
         {
@@ -825,7 +839,7 @@ impl ListingTable {
         let statistics = Arc::new(file_meta.statistics);
 
         // Store in cache
-        if let Some(cache) = &self.collected_statistics {
+        if let Some(cache) = self.statistics_cache(path.table.is_some()) {
             cache.put(
                 &path,
                 CachedFileMetadata::new(