fix e2e sql tests.

Rachelint · Rachelint · commit 2014a94b46b3 · 2025-04-22T00:02:57.000+08:00
diff --git a/datafusion/execution/src/runtime_env.rs b/datafusion/execution/src/runtime_env.rs
@@ -263,7 +263,7 @@ impl RuntimeEnvBuilder {
             (pool, disk_manager)
         } else {
             (
-                Arc::new(UnboundedMemoryPool::default()),
+                Arc::new(UnboundedMemoryPool::default()) as _,
                 DiskManagerConfig::Disabled,
             )
         };
diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs
@@ -47,7 +47,7 @@ pub mod execution_props {
     pub use datafusion_expr::var_provider::{VarProvider, VarType};
 }
 
-pub use aggregate::groups_accumulator::{GroupsAccumulatorAdapter, FlatNullState};
+pub use aggregate::groups_accumulator::{FlatNullState, GroupsAccumulatorAdapter};
 pub use analysis::{analyze, AnalysisContext, ExprBoundaries};
 pub use equivalence::{
     calculate_union, AcrossPartitions, ConstExpr, EquivalenceProperties,
diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -42,7 +42,7 @@ use datafusion_common::{internal_err, DataFusionError, Result};
 use datafusion_execution::disk_manager::RefCountedTempFile;
 use datafusion_execution::memory_pool::proxy::VecAllocExt;
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
-use datafusion_execution::{DiskManager, TaskContext};
+use datafusion_execution::TaskContext;
 use datafusion_expr::{EmitTo, GroupsAccumulator};
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::{GroupsAccumulatorAdapter, PhysicalSortExpr};
@@ -872,9 +872,9 @@ impl Stream for GroupedHashAggregateStream {
                     //   - If found `Err`, throw it, end this stream abnormally
                     //   - If found `None`, it means all blocks are polled, end this stream normally
                     //   - If found `Some`, return it and wait next polling
-                    let emit_result = self.emit(emit_to, false);
+                    let emit_result = self.emit(EmitTo::NextBlock, false);
                     let Ok(batch_opt) = emit_result else {
-                        return Poll::Ready(Some(emit_result));
+                        return Poll::Ready(Some(Err(emit_result.unwrap_err())));
                     };
 
                     let Some(batch) = batch_opt else {
@@ -888,7 +888,7 @@ impl Stream for GroupedHashAggregateStream {
                         continue;
                     };
 
-                    debug_assert!(output_batch.num_rows() > 0);
+                    debug_assert!(batch.num_rows() > 0);
                     return Poll::Ready(Some(Ok(
                         batch.record_output(&self.baseline_metrics)
                     )));
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -217,6 +217,7 @@ datafusion.catalog.newlines_in_values false
 datafusion.execution.batch_size 8192
 datafusion.execution.coalesce_batches true
 datafusion.execution.collect_statistics false
+datafusion.execution.enable_aggregation_blocked_groups true
 datafusion.execution.enable_recursive_ctes true
 datafusion.execution.enforce_batch_size_in_joins false
 datafusion.execution.keep_partition_by_columns false
@@ -317,6 +318,7 @@ datafusion.catalog.newlines_in_values false Specifies whether newlines in (quote
 datafusion.execution.batch_size 8192 Default batch size while creating new batches, it's especially useful for buffer-in-memory batches since creating tiny batches would result in too much metadata memory consumption
 datafusion.execution.coalesce_batches true When set to true, record batches will be examined between each operator and small batches will be coalesced into larger batches. This is helpful when there are highly selective filters or joins that could produce tiny output batches. The target batch size is determined by the configuration setting
 datafusion.execution.collect_statistics false Should DataFusion collect statistics after listing files
+datafusion.execution.enable_aggregation_blocked_groups true Should DataFusion use the the blocked approach to manage the groups values and their related states in accumulators. By default, the blocked approach will be used. And the blocked approach allocates capacity for the block based on a predefined block size firstly. When the block reaches its limit, we allocate a new block (also with the same predefined block size based capacity) instead of expanding the current one and copying the data. If setting this flag to `false`, will fall-back to use the single approach, values are managed within a single large block(can think of it as a Vec). As this block grows, it often triggers numerous copies, resulting in poor performance.
 datafusion.execution.enable_recursive_ctes true Should DataFusion support recursive CTEs
 datafusion.execution.enforce_batch_size_in_joins false Should DataFusion enforce batch size in joins or not. By default, DataFusion will not enforce batch size in joins. Enforcing batch size in joins can reduce memory usage when joining large tables with a highly-selective join filter, but is also slightly slower.
 datafusion.execution.keep_partition_by_columns false Should DataFusion keep the columns used for partition_by in the output RecordBatches

Original file line number	Diff line number	Diff line change
`@@ -263,7 +263,7 @@ impl RuntimeEnvBuilder {`
`263`	`263`	`(pool, disk_manager)`
`264`	`264`	`} else {`
`265`	`265`	`(`
`266`		`- Arc::new(UnboundedMemoryPool::default()),`
	`266`	`+ Arc::new(UnboundedMemoryPool::default()) as _,`
`267`	`267`	`DiskManagerConfig::Disabled,`
`268`	`268`	`)`
`269`	`269`	`};`
Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,7 @@ pub mod execution_props {`
`47`	`47`	`pub use datafusion_expr::var_provider::{VarProvider, VarType};`
`48`	`48`	`}`
`49`	`49`
`50`		`-pub use aggregate::groups_accumulator::{GroupsAccumulatorAdapter, FlatNullState};`
	`50`	`+pub use aggregate::groups_accumulator::{FlatNullState, GroupsAccumulatorAdapter};`
`51`	`51`	`pub use analysis::{analyze, AnalysisContext, ExprBoundaries};`
`52`	`52`	`pub use equivalence::{`
`53`	`53`	`calculate_union, AcrossPartitions, ConstExpr, EquivalenceProperties,`