diff --git a/src/query/ast/src/parser/parser.rs b/src/query/ast/src/parser/parser.rs index 02aa6b85efbd8..8d8badbea13f6 100644 --- a/src/query/ast/src/parser/parser.rs +++ b/src/query/ast/src/parser/parser.rs @@ -16,17 +16,7 @@ use nom::Parser; use crate::ParseError; use crate::Result; -use crate::ast::DatabaseRef; -use crate::ast::Expr; -use crate::ast::Identifier; -use crate::ast::Literal; -use crate::ast::ProcedureIdentity; -use crate::ast::Query; -use crate::ast::SelectTarget; -use crate::ast::SetExpr; -use crate::ast::Statement; -use crate::ast::StatementWithFormat; -use crate::ast::TableRef; +use crate::ast::*; use crate::parser::Backtrace; use crate::parser::common::IResult; use crate::parser::common::comma_separated_list0; @@ -48,9 +38,6 @@ use crate::parser::statement::statement; use crate::parser::token::Token; use crate::parser::token::TokenKind; use crate::parser::token::Tokenizer; -use crate::visit::VisitControl; -use crate::visit::VisitorMut; -use crate::visit::WalkMut; pub fn tokenize_sql(sql: &str) -> Result>> { Tokenizer::new(sql).collect::>>() @@ -242,6 +229,10 @@ fn assert_reparse(sql: &str, stmt: StatementWithFormat) -> std::result::Result<( #[cfg(debug_assertions)] fn reset_ast(mut stmt: StatementWithFormat) -> StatementWithFormat { + use crate::visit::VisitControl; + use crate::visit::VisitorMut; + use crate::visit::WalkMut; + struct ResetAst; impl ResetAst { diff --git a/src/query/ast/src/visit.rs b/src/query/ast/src/visit.rs index faff5a488ee47..10f4a87fd66e3 100644 --- a/src/query/ast/src/visit.rs +++ b/src/query/ast/src/visit.rs @@ -88,6 +88,8 @@ pub enum VisitControl { Break(B), } +pub type VisitResult = Result; + /// Pre-order visitor hooks for immutable AST traversal. /// /// Returning `Continue` lets the walker descend into children automatically. diff --git a/src/query/sql/src/planner/binder/aggregate.rs b/src/query/sql/src/planner/binder/aggregate.rs index 7cf4995bfd94b..7170031a6ef16 100644 --- a/src/query/sql/src/planner/binder/aggregate.rs +++ b/src/query/sql/src/planner/binder/aggregate.rs @@ -60,8 +60,8 @@ use crate::plans::GroupingSets; use crate::plans::ScalarExpr; use crate::plans::ScalarItem; use crate::plans::UDAFCall; -use crate::plans::Visitor; -use crate::plans::VisitorMut; +use crate::plans::Visitor as ScalarVisitor; +use crate::plans::VisitorMut as ScalarVisitorMut; use crate::plans::walk_expr_mut; /// Information for `GROUPING SETS`. @@ -763,7 +763,7 @@ struct ExistingAggregateRewriter<'a> { error_message: &'a str, } -impl<'a> VisitorMut<'a> for ExistingAggregateRewriter<'a> { +impl<'a> ScalarVisitorMut<'a> for ExistingAggregateRewriter<'a> { fn visit(&mut self, expr: &'a mut ScalarExpr) -> Result<()> { match expr { ScalarExpr::AggregateFunction(aggregate) => { @@ -816,7 +816,7 @@ impl<'a> VisitorMut<'a> for ExistingAggregateRewriter<'a> { } } -impl<'a> VisitorMut<'a> for AggregateRewriter<'a> { +impl<'a> ScalarVisitorMut<'a> for AggregateRewriter<'a> { fn visit(&mut self, expr: &'a mut ScalarExpr) -> Result<()> { match expr { ScalarExpr::AggregateFunction(aggregate) => { @@ -906,8 +906,7 @@ impl Binder { } } - let original_context = bind_context.expr_context.clone(); - bind_context.set_expr_context(ExprContext::GroupClaue); + let original_context = bind_context.replace_expr_context(ExprContext::GroupClaue); let group_by = Self::expand_group(group_by.clone())?; match &group_by { @@ -935,7 +934,7 @@ impl Binder { } _ => unreachable!(), } - bind_context.set_expr_context(original_context); + bind_context.expr_context = original_context; Ok(()) } @@ -1472,6 +1471,34 @@ impl Binder { Ok((scalar.clone(), scalar.data_type()?)) } } + + pub(super) fn bind_and_rewrite_aggregate_expr( + &mut self, + bind_context: &mut BindContext, + aliases: &[(String, ScalarExpr)], + expr_context: ExprContext, + expr: &Expr, + ) -> Result { + let original_context = bind_context.replace_expr_context(expr_context); + + let mut scalar_binder = ScalarBinder::new( + bind_context, + self.ctx.clone(), + &self.name_resolution_ctx, + self.metadata.clone(), + aliases, + ); + + let (mut result, _) = scalar_binder.bind(expr)?; + AggregateRewriter::rewrite_expr( + &mut bind_context.aggregate_info, + self.metadata.clone(), + &mut result, + )?; + + bind_context.expr_context = original_context; + Ok(result) + } } fn build_replaced_aggregate_column( diff --git a/src/query/sql/src/planner/binder/aggregate_prepass.rs b/src/query/sql/src/planner/binder/aggregate_prepass.rs new file mode 100644 index 0000000000000..df4e9a47c6c29 --- /dev/null +++ b/src/query/sql/src/planner/binder/aggregate_prepass.rs @@ -0,0 +1,933 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::collections::HashSet; + +use databend_common_ast::ast::ColumnID; +use databend_common_ast::ast::ColumnRef; +use databend_common_ast::ast::Expr; +use databend_common_ast::ast::FunctionCall; +use databend_common_ast::ast::OrderByExpr; +use databend_common_ast::ast::Query; +use databend_common_ast::ast::SelectTarget; +use databend_common_ast::visit::VisitControl; +use databend_common_ast::visit::VisitResult; +use databend_common_ast::visit::Visitor; +use databend_common_ast::visit::Walk; +use databend_common_base::runtime::block_on; +use databend_common_exception::Result; +use databend_common_functions::aggregates::AggregateFunctionFactory; +use databend_common_functions::is_builtin_function; +use databend_common_meta_app::principal::UDFDefinition; +use databend_common_users::UserApiProvider; + +use super::ExprContext; +use crate::BindContext; +use crate::NameResolutionContext; +use crate::binder::Binder; +use crate::binder::select::SelectList; +use crate::normalize_identifier; +use crate::plans::ScalarExpr; + +macro_rules! try_ast_walk { + ($expr:expr) => { + match $expr? { + VisitControl::Continue | VisitControl::SkipChildren => {} + VisitControl::Break(value) => return Ok(VisitControl::Break(value)), + } + }; +} + +#[derive(Debug, Clone, PartialEq, Eq, enum_as_inner::EnumAsInner)] +pub(super) enum AggregatePrepassSource { + DirectClause, + AliasExpansion(String), +} + +fn is_aggregate_target( + name_resolution_ctx: &NameResolutionContext, + udaf_names: &HashSet, + func: &FunctionCall, +) -> bool { + if func.window.is_some() { + return false; + } + + let func_name = normalize_identifier(&func.name, name_resolution_ctx).name; + AggregateFunctionFactory::instance().contains(func_name.as_str()) + || func_name.eq_ignore_ascii_case("grouping") + || udaf_names.contains(func_name.as_str()) +} + +#[derive(Debug, Clone, PartialEq)] +pub(super) struct AggregatePrepassFact { + pub expr_context: ExprContext, + pub source: AggregatePrepassSource, + pub expr: Expr, + pub contains_window: bool, +} + +#[derive(Debug, Clone, Copy, Default)] +struct ExprFlags { + contains_aggregate: bool, + contains_window: bool, + contains_subquery: bool, +} + +trait ExprInspection { + fn observe_top_level_expr(&mut self, _expr: &Expr) {} + + fn observe_top_level_column_ref(&mut self, _column: &ColumnRef) {} + + fn mark_contains_subquery(&mut self) {} + + fn walk_expr(&mut self, expr: &Expr) + where Self: std::marker::Sized { + expr.walk(&mut ExprWalker { + inspection: self, + in_subquery: false, + }) + .unwrap(); + } +} + +impl ExprInspection for (L, R) { + fn observe_top_level_expr(&mut self, expr: &Expr) { + self.0.observe_top_level_expr(expr); + self.1.observe_top_level_expr(expr); + } + + fn observe_top_level_column_ref(&mut self, column: &ColumnRef) { + self.0.observe_top_level_column_ref(column); + self.1.observe_top_level_column_ref(column); + } + + fn mark_contains_subquery(&mut self) { + self.0.mark_contains_subquery(); + self.1.mark_contains_subquery(); + } +} + +struct ExprWalker<'a, T> { + inspection: &'a mut T, + in_subquery: bool, +} + +impl ExprWalker<'_, T> +where T: ExprInspection +{ + fn visit_query_children(&mut self, query: &Query) -> VisitResult { + if let Some(with) = &query.with { + for cte in &with.ctes { + try_ast_walk!(cte.walk(self)); + } + } + try_ast_walk!(query.body.walk(self)); + for item in &query.order_by { + try_ast_walk!(item.walk(self)); + } + for expr in &query.limit { + try_ast_walk!(expr.walk(self)); + } + if let Some(offset) = &query.offset { + try_ast_walk!(offset.walk(self)); + } + + Ok(VisitControl::Continue) + } +} + +impl Visitor for ExprWalker<'_, T> +where T: ExprInspection +{ + fn visit_expr(&mut self, expr: &Expr) -> VisitResult { + if !self.in_subquery { + self.inspection.observe_top_level_expr(expr); + if let Expr::ColumnRef { column, .. } = expr { + self.inspection.observe_top_level_column_ref(column); + } + } + Ok(VisitControl::Continue) + } + + fn visit_query(&mut self, query: &Query) -> VisitResult { + self.inspection.mark_contains_subquery(); + let was_in_subquery = self.in_subquery; + self.in_subquery = true; + let result = self.visit_query_children(query); + self.in_subquery = was_in_subquery; + result?; + Ok(VisitControl::SkipChildren) + } +} + +struct ExprFlagsProbe<'a> { + name_resolution_ctx: &'a NameResolutionContext, + udaf_names: &'a HashSet, + result: ExprFlags, +} + +impl ExprInspection for ExprFlagsProbe<'_> { + fn observe_top_level_expr(&mut self, expr: &Expr) { + match expr { + Expr::CountAll { window: None, .. } => self.result.contains_aggregate = true, + Expr::FunctionCall { func, .. } + if is_aggregate_target(self.name_resolution_ctx, self.udaf_names, func) => + { + self.result.contains_aggregate = true + } + _ if is_window_expr(expr) => { + self.result.contains_window = true; + } + _ => {} + } + } + + fn mark_contains_subquery(&mut self) { + self.result.contains_subquery = true; + } +} + +struct ReferencedAliasProbe<'a> { + name_resolution_ctx: &'a NameResolutionContext, + aliases: &'a HashSet<&'a str>, + referenced_aliases: BTreeSet, +} + +impl ExprInspection for ReferencedAliasProbe<'_> { + fn observe_top_level_column_ref(&mut self, column: &ColumnRef) { + let Some(alias) = resolve_unqualified_alias_name(self.name_resolution_ctx, column) else { + return; + }; + + if self.aliases.contains(alias.as_str()) { + self.referenced_aliases.insert(alias); + } + } +} + +struct FunctionNameProbe<'a> { + name_resolution_ctx: &'a NameResolutionContext, + names: BTreeSet, +} + +impl ExprInspection for FunctionNameProbe<'_> { + fn observe_top_level_expr(&mut self, expr: &Expr) { + let Expr::FunctionCall { func, .. } = expr else { + return; + }; + + if func.window.is_some() { + return; + } + + self.names + .insert(normalize_identifier(&func.name, self.name_resolution_ctx).name); + } +} + +fn is_window_expr(expr: &Expr) -> bool { + match expr { + Expr::CountAll { + window: Some(_), .. + } => true, + Expr::FunctionCall { func, .. } => func.window.is_some(), + _ => false, + } +} + +#[derive(Debug, Clone, Default)] +pub(super) struct AggregatePrepassFacts { + items: Vec, + direct_len: usize, +} + +impl AggregatePrepassFacts { + fn insert_prioritized_unique(&mut self, fact: AggregatePrepassFact) { + if fact.source.is_direct_clause() { + if self.items[..self.direct_len].contains(&fact) { + return; + } + self.items.push(fact); + let last = self.items.len() - 1; + self.items.swap(self.direct_len, last); + self.direct_len += 1; + } else { + if self.items[self.direct_len..].contains(&fact) { + return; + } + self.items.push(fact); + }; + } +} + +#[derive(Debug, Clone)] +pub(super) struct AggregatePrepassExprInfo { + pub ast: Expr, + pub contains_aggregate: bool, + pub contains_window: bool, + #[allow(dead_code)] + pub contains_subquery: bool, + pub referenced_aliases: Vec, +} + +impl AggregatePrepassExprInfo { + pub(super) fn analyze( + name_resolution_ctx: &NameResolutionContext, + udaf_names: &HashSet, + aliases: &HashSet<&str>, + expr: &Expr, + ) -> Self { + let mut probes = ( + ExprFlagsProbe { + name_resolution_ctx, + udaf_names, + result: ExprFlags::default(), + }, + ReferencedAliasProbe { + name_resolution_ctx, + aliases, + referenced_aliases: BTreeSet::new(), + }, + ); + probes.walk_expr(expr); + let (expr_flags_probe, alias_probe) = probes; + + Self { + ast: expr.clone(), + contains_aggregate: expr_flags_probe.result.contains_aggregate, + contains_window: expr_flags_probe.result.contains_window, + contains_subquery: expr_flags_probe.result.contains_subquery, + referenced_aliases: alias_probe.referenced_aliases.into_iter().collect(), + } + } +} + +#[derive(Debug, Default, Clone)] +pub(super) struct AggregatePrepassAliasCatalog { + items: Vec, + by_name: BTreeMap>, +} + +impl AggregatePrepassAliasCatalog { + pub(super) fn new( + name_resolution_ctx: &NameResolutionContext, + udaf_names: &HashSet, + aliases: Vec<(String, Expr)>, + ) -> Self { + let mut by_name: BTreeMap<_, Vec<_>> = BTreeMap::new(); + for (index, (name, _)) in aliases.iter().enumerate() { + by_name.entry(name.clone()).or_default().push(index); + } + + let alias_names = by_name.keys().map(String::as_str).collect(); + let items = aliases + .into_iter() + .map(|(_, ast)| { + AggregatePrepassExprInfo::analyze( + name_resolution_ctx, + udaf_names, + &alias_names, + &ast, + ) + }) + .collect(); + + Self { items, by_name } + } + + fn get_unique(&self, name: &str) -> Option<&AggregatePrepassExprInfo> { + if let [index] = self.by_name.get(name)?.as_slice() { + Some(&self.items[*index]) + } else { + None + } + } + + #[cfg(test)] + fn items(&self) -> &[AggregatePrepassExprInfo] { + &self.items + } + + pub(super) fn alias_names(&self) -> HashSet<&str> { + self.by_name.keys().map(String::as_str).collect() + } + + pub(super) fn references_aliases_matching(&self, names: &[String], predicate: &F) -> bool + where F: Fn(&AggregatePrepassExprInfo) -> bool { + names + .iter() + .any(|name| self.alias_reaches(name, predicate, &mut BTreeSet::new())) + } + + fn alias_reaches(&self, name: &str, predicate: &F, visiting: &mut BTreeSet) -> bool + where F: Fn(&AggregatePrepassExprInfo) -> bool { + let Some(alias) = self.get_unique(name) else { + return false; + }; + + if predicate(alias) { + return true; + } + + if !visiting.insert(name.to_string()) { + return false; + } + + let reached = alias + .referenced_aliases + .iter() + .any(|dep| self.alias_reaches(dep, predicate, visiting)); + visiting.remove(name); + reached + } +} + +struct Scanner<'a> { + expr_context: ExprContext, + name_resolution_ctx: &'a NameResolutionContext, + udaf_names: &'a HashSet, + ast_aliases: &'a AggregatePrepassAliasCatalog, + in_subquery: bool, + window_depth: usize, + expanding_aliases: HashSet, + expansion_stack: Vec, + facts: Vec, +} + +impl Scanner<'_> { + fn scan( + expr_context: ExprContext, + name_resolution_ctx: &NameResolutionContext, + udaf_names: &HashSet, + ast_aliases: &AggregatePrepassAliasCatalog, + expr: &Expr, + ) -> Vec { + let mut scanner = Scanner { + expr_context, + name_resolution_ctx, + udaf_names, + ast_aliases, + in_subquery: false, + window_depth: 0, + expanding_aliases: HashSet::new(), + expansion_stack: Vec::new(), + facts: Vec::new(), + }; + let _ = expr.walk(&mut scanner); + scanner.facts + } + + fn visit_query_children(&mut self, query: &Query) -> VisitResult { + if let Some(with) = &query.with { + for cte in &with.ctes { + try_ast_walk!(cte.walk(self)); + } + } + try_ast_walk!(query.body.walk(self)); + for item in &query.order_by { + try_ast_walk!(item.walk(self)); + } + for expr in &query.limit { + try_ast_walk!(expr.walk(self)); + } + if let Some(offset) = &query.offset { + try_ast_walk!(offset.walk(self)); + } + + Ok(VisitControl::Continue) + } + + fn visit_window_expr_children(&mut self, expr: &Expr) -> VisitResult { + match expr { + Expr::CountAll { + window, qualified, .. + } => { + for item in qualified { + if let databend_common_ast::ast::Indirection::Identifier(ident) = item { + try_ast_walk!(ident.walk(self)); + } + } + if let Some(window) = window { + try_ast_walk!(window.walk(self)); + } + } + Expr::FunctionCall { func, .. } => { + try_ast_walk!(func.walk(self)); + } + _ => unreachable!("window expr helper must only be called for window exprs"), + } + + Ok(VisitControl::Continue) + } + + fn handle_column_ref(&mut self, column: &ColumnRef) { + if self.in_subquery || self.window_depth > 0 { + return; + } + + let Some((alias, alias_expr)) = + Self::find_aggregate_prepass_alias(self.name_resolution_ctx, column, self.ast_aliases) + else { + return; + }; + + if self.expanding_aliases.insert(alias.clone()) { + self.expansion_stack.push(alias.clone()); + let _ = alias_expr.walk(self); + self.expansion_stack.pop(); + self.expanding_aliases.remove(&alias); + } + } + + fn enter_expr(&mut self, expr: &Expr) { + if is_window_expr(expr) { + self.window_depth += 1; + } + + if self.window_depth > 0 || self.in_subquery { + return; + } + + if let Some(fact) = match expr { + Expr::CountAll { window: None, .. } => self.build_fact(expr), + Expr::FunctionCall { func, .. } + if is_aggregate_target(self.name_resolution_ctx, self.udaf_names, func) => + { + self.build_fact(expr) + } + _ => None, + } { + self.facts.push(fact); + } + } + + fn build_fact(&self, expr: &Expr) -> Option { + let mut probe = ExprFlagsProbe { + name_resolution_ctx: self.name_resolution_ctx, + udaf_names: self.udaf_names, + result: ExprFlags::default(), + }; + probe.walk_expr(expr); + if probe.result.contains_subquery { + return None; + } + + Some(AggregatePrepassFact { + expr_context: self.expr_context, + source: self.current_source(), + expr: expr.clone(), + contains_window: probe.result.contains_window, + }) + } + + fn current_source(&self) -> AggregatePrepassSource { + match self.expansion_stack.first() { + Some(alias) => AggregatePrepassSource::AliasExpansion(alias.clone()), + None => AggregatePrepassSource::DirectClause, + } + } + + fn find_aggregate_prepass_alias<'a>( + name_resolution_ctx: &NameResolutionContext, + column: &ColumnRef, + ast_aliases: &'a AggregatePrepassAliasCatalog, + ) -> Option<(String, &'a Expr)> { + let alias = resolve_unqualified_alias_name(name_resolution_ctx, column)?; + let ast = &ast_aliases.get_unique(&alias)?.ast; + Some((alias, ast)) + } +} + +impl Visitor for Scanner<'_> { + fn visit_expr(&mut self, expr: &Expr) -> VisitResult { + self.enter_expr(expr); + + if let Expr::ColumnRef { column, .. } = expr { + self.handle_column_ref(column); + } + + if is_window_expr(expr) { + let result = self.visit_window_expr_children(expr); + self.window_depth -= 1; + result?; + return Ok(VisitControl::SkipChildren); + } + + Ok(VisitControl::Continue) + } + + fn visit_query(&mut self, query: &Query) -> VisitResult { + let was_in_subquery = self.in_subquery; + self.in_subquery = true; + let result = self.visit_query_children(query); + self.in_subquery = was_in_subquery; + result?; + Ok(VisitControl::SkipChildren) + } +} + +fn resolve_unqualified_alias_name( + name_resolution_ctx: &NameResolutionContext, + column: &ColumnRef, +) -> Option { + if column.database.is_some() || column.table.is_some() { + return None; + } + + let ColumnID::Name(ident) = &column.column else { + return None; + }; + + Some(normalize_identifier(ident, name_resolution_ctx).name) +} + +impl Binder { + pub(super) fn collect_aggregate_prepass_aliases<'a>( + &self, + udaf_names: &HashSet, + select_list: &'a SelectList<'a>, + ) -> AggregatePrepassAliasCatalog { + let aliases = select_list + .items + .iter() + .filter_map(|item| match item.select_target { + SelectTarget::AliasedExpr { expr, .. } => { + Some((item.alias.clone(), expr.as_ref().clone())) + } + _ => None, + }) + .collect(); + AggregatePrepassAliasCatalog::new(&self.name_resolution_ctx, udaf_names, aliases) + } + + pub(super) fn bind_aggregate_prepass_facts( + &mut self, + bind_context: &mut BindContext, + aliases: &[(String, ScalarExpr)], + facts: &AggregatePrepassFacts, + ) -> Result<()> { + for fact in &facts.items { + self.bind_and_rewrite_aggregate_expr( + bind_context, + aliases, + fact.expr_context, + &fact.expr, + )?; + } + + Ok(()) + } + + pub(super) fn find_and_load_udaf( + &self, + bind_context: &BindContext, + select_list: &SelectList<'_>, + having: Option<&Expr>, + qualify: Option<&Expr>, + order_by: &[OrderByExpr], + ) -> Result> { + let mut probe = FunctionNameProbe { + name_resolution_ctx: &self.name_resolution_ctx, + names: BTreeSet::new(), + }; + + for expr in select_list + .items + .iter() + .filter_map(|item| { + if let SelectTarget::AliasedExpr { box expr, .. } = item.select_target { + Some(expr) + } else { + None + } + }) + .chain(having) + .chain(qualify) + .chain(order_by.iter().map(|order| &order.expr)) + { + probe.walk_expr(expr); + } + + self.resolve_udaf_names(bind_context, probe.names) + } + + fn resolve_udaf_names( + &self, + bind_context: &BindContext, + function_names: BTreeSet, + ) -> Result> { + let mut udaf_names = HashSet::new(); + let tenant = self.ctx.get_tenant(); + let provider = UserApiProvider::instance(); + + for name in function_names { + if name.eq_ignore_ascii_case("grouping") || is_builtin_function(&name) { + continue; + } + + let udf = if let Some(udf) = bind_context.udf_cache.read().get(&name).cloned() { + udf + } else { + let udf = block_on(provider.get_udf(&tenant, &name))?; + bind_context + .udf_cache + .write() + .insert(name.clone(), udf.clone()); + udf + }; + + if let Some(udf) = udf + && matches!(udf.definition, UDFDefinition::UDAFScript(_)) + { + udaf_names.insert(name); + } + } + + Ok(udaf_names) + } + + pub(super) fn derive_aggregate_prepass_facts<'a>( + &self, + udaf_names: &HashSet, + aliases: &AggregatePrepassAliasCatalog, + ast_iter: impl Iterator, + ) -> AggregatePrepassFacts { + ast_iter + .flat_map(|(ast_expr, expr_context)| { + Scanner::scan( + expr_context, + &self.name_resolution_ctx, + udaf_names, + aliases, + ast_expr, + ) + }) + .fold(AggregatePrepassFacts::default(), |mut facts, fact| { + facts.insert_prioritized_unique(fact); + facts + }) + } +} + +#[cfg(test)] +mod tests { + use NameResolutionContext; + use databend_common_ast::parser::Dialect; + use databend_common_ast::parser::parse_expr; + use databend_common_ast::parser::tokenize_sql; + + use super::*; + + fn parse_ast_expr(text: &str) -> Expr { + let tokens = tokenize_sql(text).unwrap(); + parse_expr(&tokens, Dialect::PostgreSQL).unwrap() + } + + #[test] + fn aggregate_prepass_alias_catalog_tracks_features_and_refs() { + let name_resolution_ctx = NameResolutionContext::default(); + let udaf_names = HashSet::new(); + let aliases = AggregatePrepassAliasCatalog::new(&name_resolution_ctx, &udaf_names, vec![ + ("s".to_string(), parse_ast_expr("sum(number)")), + ( + "rn".to_string(), + parse_ast_expr("row_number() OVER (ORDER BY s)"), + ), + ( + "sub".to_string(), + parse_ast_expr("(SELECT max(number) FROM t)"), + ), + ]); + + let items = aliases.items(); + assert_eq!(items.len(), 3); + + assert!(items[0].contains_aggregate); + assert!(!items[0].contains_window); + assert!(!items[0].contains_subquery); + assert!(items[0].referenced_aliases.is_empty()); + + assert!(!items[1].contains_aggregate); + assert!(items[1].contains_window); + assert!(!items[1].contains_subquery); + assert_eq!(items[1].referenced_aliases, vec!["s".to_string()]); + + assert!(!items[2].contains_aggregate); + assert!(!items[2].contains_window); + assert!(items[2].contains_subquery); + assert!(items[2].referenced_aliases.is_empty()); + } + + #[test] + fn aggregate_prepass_facts_track_alias_expansion_source() { + let name_resolution_ctx = NameResolutionContext::default(); + let udaf_names = HashSet::new(); + let aliases = AggregatePrepassAliasCatalog::new(&name_resolution_ctx, &udaf_names, vec![( + "s".to_string(), + parse_ast_expr("sum(number)"), + )]); + + let facts = Scanner::scan( + ExprContext::HavingClause, + &name_resolution_ctx, + &udaf_names, + &aliases, + &parse_ast_expr("s > 0"), + ); + + assert_eq!(facts.len(), 1); + assert_eq!( + facts[0].source, + AggregatePrepassSource::AliasExpansion("s".to_string()) + ); + assert!(matches!(facts[0].expr_context, ExprContext::HavingClause)); + assert!(!facts[0].contains_window); + } + + #[test] + fn aggregate_prepass_duplicate_aliases_do_not_expand() { + let name_resolution_ctx = NameResolutionContext::default(); + let udaf_names = HashSet::new(); + let aliases = AggregatePrepassAliasCatalog::new(&name_resolution_ctx, &udaf_names, vec![ + ("s".to_string(), parse_ast_expr("sum(number)")), + ("s".to_string(), parse_ast_expr("max(number)")), + ]); + + let facts = Scanner::scan( + ExprContext::OrderByClause, + &name_resolution_ctx, + &udaf_names, + &aliases, + &parse_ast_expr("s > 0"), + ); + + assert!(facts.is_empty()); + } + + #[test] + fn aggregate_prepass_facts_deduplicate_identical_candidates() { + let name_resolution_ctx = NameResolutionContext::default(); + let udaf_names = HashSet::new(); + let aliases = AggregatePrepassAliasCatalog::new(&name_resolution_ctx, &udaf_names, vec![( + "s".to_string(), + parse_ast_expr("sum(number)"), + )]); + + let mut facts = AggregatePrepassFacts::default(); + for fact in Scanner::scan( + ExprContext::HavingClause, + &name_resolution_ctx, + &udaf_names, + &aliases, + &parse_ast_expr("sum(number) > 0"), + ) { + facts.insert_prioritized_unique(fact); + } + for fact in Scanner::scan( + ExprContext::HavingClause, + &name_resolution_ctx, + &udaf_names, + &aliases, + &parse_ast_expr("sum(number) > 0"), + ) { + facts.insert_prioritized_unique(fact); + } + + assert_eq!(facts.items.len(), 1); + } + + #[test] + fn aggregate_prepass_nested_alias_expansion_keeps_alias_source() { + let name_resolution_ctx = NameResolutionContext::default(); + let udaf_names = HashSet::new(); + let aliases = AggregatePrepassAliasCatalog::new(&name_resolution_ctx, &udaf_names, vec![ + ("s".to_string(), parse_ast_expr("sum(number)")), + ("a".to_string(), parse_ast_expr("s + 1")), + ]); + + let facts = Scanner::scan( + ExprContext::HavingClause, + &name_resolution_ctx, + &udaf_names, + &aliases, + &parse_ast_expr("a > 0"), + ); + + assert_eq!(facts.len(), 1); + assert_eq!( + facts[0].source, + AggregatePrepassSource::AliasExpansion("a".to_string()) + ); + } + + #[test] + fn aggregate_prepass_fact_flags_ignore_deeper_alias_expansion_features() { + let name_resolution_ctx = NameResolutionContext::default(); + let udaf_names = HashSet::new(); + let aliases = AggregatePrepassAliasCatalog::new(&name_resolution_ctx, &udaf_names, vec![( + "w".to_string(), + parse_ast_expr("row_number() OVER (ORDER BY number)"), + )]); + + let facts = Scanner::scan( + ExprContext::HavingClause, + &name_resolution_ctx, + &udaf_names, + &aliases, + &parse_ast_expr("sum(w) > 0"), + ); + + assert_eq!(facts.len(), 1); + assert!(!facts[0].contains_window); + } + + #[test] + fn aggregate_prepass_alias_catalog_tracks_transitive_aggregate_and_window_aliases() { + let name_resolution_ctx = NameResolutionContext::default(); + let udaf_names = HashSet::new(); + let aliases = AggregatePrepassAliasCatalog::new(&name_resolution_ctx, &udaf_names, vec![ + ("s".to_string(), parse_ast_expr("sum(number)")), + ("a".to_string(), parse_ast_expr("s + 1")), + ( + "rn".to_string(), + parse_ast_expr("row_number() OVER (ORDER BY number)"), + ), + ("w".to_string(), parse_ast_expr("rn + 1")), + ]); + + assert!( + &aliases.references_aliases_matching(&["a".to_string()], &|alias| { + alias.contains_aggregate + }) + ); + assert!( + !aliases.references_aliases_matching(&["a".to_string()], &|alias| { + alias.contains_window + }) + ); + assert!( + aliases.references_aliases_matching(&["w".to_string()], &|alias| { + alias.contains_window + }) + ); + assert!( + !aliases.references_aliases_matching(&["w".to_string()], &|alias| { + alias.contains_aggregate + }) + ); + } +} diff --git a/src/query/sql/src/planner/binder/bind_context.rs b/src/query/sql/src/planner/binder/bind_context.rs index 27cb9478ebfbd..559bb91099bde 100644 --- a/src/query/sql/src/planner/binder/bind_context.rs +++ b/src/query/sql/src/planner/binder/bind_context.rs @@ -66,7 +66,7 @@ use crate::plans::ScalarExpr; /// Context of current expression, this is used to check if /// the expression is valid in current context. -#[derive(Debug, Clone, Default, EnumAsInner)] +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, EnumAsInner)] pub enum ExprContext { SelectClause, WhereClause, @@ -937,8 +937,10 @@ impl BindContext { self.columns.iter().map(|c| c.index).collect() } - pub fn set_expr_context(&mut self, expr_context: ExprContext) { - self.expr_context = expr_context; + pub fn replace_expr_context(&mut self, new: ExprContext) -> ExprContext { + let old = self.expr_context; + self.expr_context = new; + old } } diff --git a/src/query/sql/src/planner/binder/bind_query/bind_select.rs b/src/query/sql/src/planner/binder/bind_query/bind_select.rs index 1ecd1f58a297e..93d2e0dd3e2c8 100644 --- a/src/query/sql/src/planner/binder/bind_query/bind_select.rs +++ b/src/query/sql/src/planner/binder/bind_query/bind_select.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashSet; use std::sync::Arc; use databend_common_ast::Span; @@ -54,67 +55,98 @@ use crate::optimizer::ir::SExpr; use crate::planner::QueryExecutor; use crate::planner::binder::BindContext; use crate::planner::binder::Binder; +use crate::planner::binder::ExprContext; +use crate::planner::binder::aggregate_prepass::AggregatePrepassAliasCatalog; +use crate::planner::binder::aggregate_prepass::AggregatePrepassExprInfo; +use crate::planner::binder::aggregate_prepass::AggregatePrepassFacts; +use crate::planner::binder::project::SelectInfo; +use crate::planner::binder::select::SelectList; +use crate::planner::binder::sort::OrderByRewriteFlags; +use crate::planner::binder::sort::OrderItems; +use crate::plans::ScalarExpr; + +#[derive(Clone)] +struct SelectClauseFact { + ast: Expr, + contains_aggregate: bool, + contains_window: bool, + referenced_aliases: Vec, + references_aggregate_aliases: bool, + references_window_aliases: bool, +} + +#[derive(Clone, Default)] +struct SelectClauseFacts { + having: Option, + qualify: Option, + order_by: Vec, + aggregate_prepass_inputs: Vec<(Expr, ExprContext)>, +} + +struct SelectGlobalView { + semantic_aliases: Vec<(String, ScalarExpr)>, + qualify: Option, + order_by: Vec, + aggregate_prepass_facts: AggregatePrepassFacts, +} + +struct SelectPreparation<'a> { + s_expr: SExpr, + from_context: BindContext, + select_list: SelectList<'a>, + global_view: SelectGlobalView, + rewritten_aliases: Vec<(String, ScalarExpr)>, +} + +struct AnalyzedSelect { + s_expr: SExpr, + from_context: BindContext, + select_info: SelectInfo, + having: Option, + qualify: Option, + order_items: OrderItems, +} impl Binder { - #[async_backtrace::framed] - pub(crate) fn bind_select( + fn bind_select_source( &mut self, bind_context: &mut BindContext, stmt: &SelectStmt, - order_by: &[OrderByExpr], - limit: Option, ) -> Result<(SExpr, BindContext)> { - if let Some(hints) = &stmt.hints { - if let Some(e) = self.opt_hints_set_var(bind_context, hints).err() { - warn!( - "In SELECT resolve optimize hints {:?} failed, err: {:?}", - hints, e - ); - } + if stmt.from.is_empty() { + return self.bind_dummy_table(bind_context, &stmt.select_list); } - // whether allow rewrite virtual column and pushdown - bind_context.allow_virtual_column = self - .ctx - .get_settings() - .get_enable_experimental_virtual_column() - .unwrap_or_default() - && LicenseManagerSwitch::instance() - .check_enterprise_enabled(self.ctx.get_license_key(), Feature::VirtualColumn) - .is_ok(); + let mut max_column_position = MaxColumnPosition::default(); + stmt.walk(&mut max_column_position)?; + self.metadata + .write() + .set_max_column_position(max_column_position.max_pos); - let mut rewriter = - SelectRewriter::new(self.name_resolution_ctx.unquoted_ident_case_sensitive) - .with_subquery_executor(self.subquery_executor.clone()); - let new_stmt = rewriter.rewrite(stmt)?; - let stmt = new_stmt.as_ref().unwrap_or(stmt); + let cross_joins = stmt + .from + .iter() + .cloned() + .reduce(|left, right| TableReference::Join { + span: None, + join: Join { + op: JoinOperator::CrossJoin, + condition: JoinCondition::None, + left: Box::new(left), + right: Box::new(right), + }, + }) + .unwrap(); + self.bind_table_reference(bind_context, &cross_joins) + } - let (mut s_expr, mut from_context) = if stmt.from.is_empty() { - let select_list = &stmt.select_list; - self.bind_dummy_table(bind_context, select_list)? - } else { - let mut max_column_position = MaxColumnPosition::default(); - stmt.walk(&mut max_column_position)?; - self.metadata - .write() - .set_max_column_position(max_column_position.max_pos); - - let cross_joins = stmt - .from - .iter() - .cloned() - .reduce(|left, right| TableReference::Join { - span: None, - join: Join { - op: JoinOperator::CrossJoin, - condition: JoinCondition::None, - left: Box::new(left), - right: Box::new(right), - }, - }) - .unwrap(); - self.bind_table_reference(bind_context, &cross_joins)? - }; + fn prepare_select_binding<'a>( + &mut self, + bind_context: &mut BindContext, + stmt: &'a SelectStmt, + order_by: &[OrderByExpr], + ) -> Result> { + let (s_expr, mut from_context) = self.bind_select_source(bind_context, stmt)?; // Try put window definitions into bind context. // This operation should be before `normalize_select_list` because window functions can be used in select list. @@ -142,17 +174,85 @@ impl Binder { } self.analyze_aggregate_select(&mut from_context, &mut select_list)?; + let udaf_names = self.find_and_load_udaf( + &from_context, + &select_list, + stmt.having.as_ref(), + stmt.qualify.as_ref(), + order_by, + )?; + let prepass_aliases = self.collect_aggregate_prepass_aliases(&udaf_names, &select_list); + let clause_facts = self.build_select_clause_facts( + &udaf_names, + &prepass_aliases, + stmt.having.as_ref(), + stmt.qualify.as_ref(), + order_by, + ); + + let aggregate_prepass_facts = self.derive_aggregate_prepass_facts( + &udaf_names, + &prepass_aliases, + clause_facts + .aggregate_prepass_inputs + .iter() + .map(|(expr, expr_context)| (expr, *expr_context)), + ); + let global_view = SelectGlobalView { + semantic_aliases, + qualify: clause_facts.qualify, + order_by: clause_facts.order_by, + aggregate_prepass_facts, + }; + + self.bind_aggregate_prepass_facts( + &mut from_context, + &global_view.semantic_aliases, + &global_view.aggregate_prepass_facts, + )?; // `analyze_window` should behind `analyze_aggregate_select`, // because `analyze_window` will rewrite the aggregate functions in the window function's arguments. self.analyze_window(&mut from_context, &mut select_list)?; - let aliases = select_list + debug_assert!( + select_list + .items + .iter() + .all(|item| !item.scalar.is_aggregate()), + "SELECT projection expects aggregate/UDAF calls to be rewritten before projection analysis", + ); + + let rewritten_aliases = select_list .items .iter() .map(|item| (item.alias.clone(), item.scalar.clone())) .collect::>(); + Ok(SelectPreparation { + s_expr, + from_context, + select_list, + global_view, + rewritten_aliases, + }) + } + + fn analyze_select_clauses( + &mut self, + stmt: &SelectStmt, + order_by: &[OrderByExpr], + limit: Option, + preparation: SelectPreparation<'_>, + ) -> Result { + let SelectPreparation { + mut s_expr, + mut from_context, + select_list, + global_view, + rewritten_aliases, + } = preparation; + // Rewrite Set-returning functions, if the argument contains aggregation function or group item, // set as lazy Set-returning functions. if !from_context.srf_info.srfs.is_empty() { @@ -167,8 +267,12 @@ impl Binder { // Bind WHERE after select-list analysis so aliases are available, but // resolve them against the original pre-rewrite select-item semantics. let where_scalar = if let Some(expr) = &stmt.selection { - let (new_expr, scalar) = - self.bind_where(&mut from_context, &semantic_aliases, expr, s_expr)?; + let (new_expr, scalar) = self.bind_where( + &mut from_context, + &global_view.semantic_aliases, + expr, + s_expr, + )?; s_expr = new_expr; Some(scalar) } else { @@ -176,32 +280,50 @@ impl Binder { }; // `analyze_projection` should behind `analyze_aggregate_select` because `analyze_aggregate_select` will rewrite `grouping`. - let (mut scalar_items, projections) = self.analyze_projection( - &from_context.aggregate_info, - &from_context.windows, - &select_list, - )?; + let mut select_info = self.analyze_projection(&from_context, &select_list)?; let having = if let Some(having) = &stmt.having { - Some(self.analyze_aggregate_having(&mut from_context, &aliases, having)?) + Some(self.analyze_aggregate_having(&mut from_context, &rewritten_aliases, having)?) } else { None }; - let qualify = if let Some(qualify) = &stmt.qualify { - Some(self.analyze_window_qualify(&mut from_context, &semantic_aliases, qualify)?) + let qualify = if let Some(qualify) = global_view.qualify.as_ref() { + Some(self.analyze_window_qualify( + &mut from_context, + &global_view.semantic_aliases, + &qualify.ast, + qualify.contains_window || qualify.references_window_aliases, + )?) } else { None }; + let order_by_rewrite_flags = global_view + .order_by + .iter() + .map(|fact| { + OrderByRewriteFlags::new( + !fact.referenced_aliases.is_empty(), + fact.contains_aggregate || fact.references_aggregate_aliases, + fact.contains_window || fact.references_window_aliases, + ) + }) + .collect::>(); + let order_items = self.analyze_order_items( &mut from_context, - &mut scalar_items, - &aliases, - &projections, + &mut select_info, + // Keep ORDER BY alias resolution on the same read-only semantic alias + // snapshot used by the clause prepass. This avoids binding against + // already-rewritten select-item scalars when a later clause only + // needs the original alias semantics. + &global_view.semantic_aliases, + &order_by_rewrite_flags, order_by, stmt.distinct, )?; + self.refresh_select_output(&from_context, &mut select_info)?; // After all analysis is done. if from_context.srf_info.srfs.is_empty() { @@ -209,7 +331,7 @@ impl Binder { self.analyze_lazy_materialization( &from_context, stmt, - &scalar_items, + &select_info, &select_list, &where_scalar, &order_items.items, @@ -217,6 +339,136 @@ impl Binder { )?; } + Ok(AnalyzedSelect { + s_expr, + from_context, + select_info, + having, + qualify, + order_items, + }) + } + + fn build_select_clause_facts( + &self, + udaf_names: &HashSet, + aliases: &AggregatePrepassAliasCatalog, + having: Option<&Expr>, + qualify: Option<&Expr>, + order_by: &[OrderByExpr], + ) -> SelectClauseFacts { + let alias_names = aliases.alias_names(); + std::iter::chain( + having + .into_iter() + .map(|expr| (expr, ExprContext::HavingClause)), + qualify + .into_iter() + .map(|expr| (expr, ExprContext::QualifyClause)), + ) + .chain( + order_by + .iter() + .map(|order| (&order.expr, ExprContext::OrderByClause)), + ) + .fold( + SelectClauseFacts::default(), + |mut facts, (expr, expr_context)| { + let AggregatePrepassExprInfo { + ast, + contains_aggregate, + contains_window, + referenced_aliases, + .. + } = AggregatePrepassExprInfo::analyze( + &self.name_resolution_ctx, + udaf_names, + &alias_names, + expr, + ); + + let references_aggregate_aliases = aliases + .references_aliases_matching(&referenced_aliases, &|alias| { + alias.contains_aggregate + }); + let references_window_aliases = aliases + .references_aliases_matching(&referenced_aliases, &|alias| { + alias.contains_window + }); + + let fact = SelectClauseFact { + ast, + contains_aggregate, + contains_window, + references_aggregate_aliases, + references_window_aliases, + referenced_aliases, + }; + + if expr_context == ExprContext::QualifyClause { + facts.qualify = Some(fact); + return facts; + } + + if contains_aggregate || references_aggregate_aliases { + facts + .aggregate_prepass_inputs + .push((fact.ast.clone(), expr_context)); + } + + match expr_context { + ExprContext::HavingClause => facts.having = Some(fact), + ExprContext::OrderByClause => facts.order_by.push(fact), + _ => unreachable!("aggregate prepass only inspects HAVING/QUALIFY/ORDER BY"), + } + facts + }, + ) + } + + #[async_backtrace::framed] + pub(crate) fn bind_select( + &mut self, + bind_context: &mut BindContext, + stmt: &SelectStmt, + order_by: &[OrderByExpr], + limit: Option, + ) -> Result<(SExpr, BindContext)> { + if let Some(hints) = &stmt.hints { + if let Some(e) = self.opt_hints_set_var(bind_context, hints).err() { + warn!( + "In SELECT resolve optimize hints {:?} failed, err: {:?}", + hints, e + ); + } + } + + // whether allow rewrite virtual column and pushdown + bind_context.allow_virtual_column = self + .ctx + .get_settings() + .get_enable_experimental_virtual_column() + .unwrap_or_default() + && LicenseManagerSwitch::instance() + .check_enterprise_enabled(self.ctx.get_license_key(), Feature::VirtualColumn) + .is_ok(); + + let mut rewriter = + SelectRewriter::new(self.name_resolution_ctx.unquoted_ident_case_sensitive) + .with_subquery_executor(self.subquery_executor.clone()); + let new_stmt = rewriter.rewrite(stmt)?; + let stmt = new_stmt.as_ref().unwrap_or(stmt); + + let preparation = self.prepare_select_binding(bind_context, stmt, order_by)?; + let AnalyzedSelect { + mut s_expr, + mut from_context, + mut select_info, + having, + qualify, + order_items, + } = self.analyze_select_clauses(stmt, order_by, limit, preparation)?; + if from_context.aggregate_info.has_aggregate_calls() || from_context.aggregate_info.has_group_items() { @@ -243,19 +495,13 @@ impl Binder { } if stmt.distinct { - s_expr = self.bind_distinct( - stmt.span, - &mut from_context, - &projections, - &mut scalar_items, - s_expr, - )?; + s_expr = self.bind_distinct(stmt.span, &mut select_info, s_expr)?; } - s_expr = self.bind_projection(&mut from_context, &projections, &scalar_items, s_expr)?; + s_expr = self.bind_projection(&mut from_context, select_info, s_expr)?; if !order_items.items.is_empty() { - s_expr = self.bind_order_by(&from_context, order_items, &select_list, s_expr)?; + s_expr = self.bind_order_by(order_items, s_expr)?; } if from_context.have_async_func { @@ -286,10 +532,8 @@ impl Binder { /// It is useful when implementing some SQL syntax sugar, /// /// to rewrite the SelectStmt, just add a new rewrite_* function and call it in the `rewrite` function. -#[allow(dead_code)] struct SelectRewriter { new_stmt: Option, - is_unquoted_ident_case_sensitive: bool, subquery_executor: Option>, } @@ -380,10 +624,9 @@ impl SelectRewriter { } impl SelectRewriter { - fn new(is_unquoted_ident_case_sensitive: bool) -> Self { + fn new(_is_unquoted_ident_case_sensitive: bool) -> Self { SelectRewriter { new_stmt: None, - is_unquoted_ident_case_sensitive, subquery_executor: None, } } diff --git a/src/query/sql/src/planner/binder/copy_into_table.rs b/src/query/sql/src/planner/binder/copy_into_table.rs index 693aa4cdc0877..f09434396fedc 100644 --- a/src/query/sql/src/planner/binder/copy_into_table.rs +++ b/src/query/sql/src/planner/binder/copy_into_table.rs @@ -523,22 +523,17 @@ impl Binder { )); }; } - let (scalar_items, projections) = self.analyze_projection( - &from_context.aggregate_info, - &from_context.windows, - &select_list, - )?; + let select_info = self.analyze_projection(&from_context, &select_list)?; - if projections.len() != plan.required_source_schema.num_fields() { + if select_info.column_count() != plan.required_source_schema.num_fields() { return Err(ErrorCode::BadArguments(format!( "Number of columns in select list ({}) does not match that of the corresponding table ({})", - projections.len(), + select_info.column_count(), plan.required_source_schema.num_fields(), ))); } - let mut s_expr = - self.bind_projection(&mut from_context, &projections, &scalar_items, s_expr)?; + let mut s_expr = self.bind_projection(&mut from_context, select_info, s_expr)?; // rewrite async function and udf s_expr = self.rewrite_udf(&mut from_context, s_expr)?; diff --git a/src/query/sql/src/planner/binder/distinct.rs b/src/query/sql/src/planner/binder/distinct.rs index 126b9a8d94510..12c33c01815a6 100644 --- a/src/query/sql/src/planner/binder/distinct.rs +++ b/src/query/sql/src/planner/binder/distinct.rs @@ -12,74 +12,36 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; use std::sync::Arc; use databend_common_ast::Span; use databend_common_exception::Result; -use crate::BindContext; -use crate::Symbol; -use crate::WindowChecker; use crate::binder::Binder; -use crate::binder::ColumnBinding; +use crate::binder::project::SelectInfo; use crate::optimizer::ir::SExpr; -use crate::planner::semantic::GroupingChecker; use crate::plans::Aggregate; use crate::plans::AggregateMode; -use crate::plans::BoundColumnRef; use crate::plans::EvalScalar; -use crate::plans::ScalarExpr; -use crate::plans::ScalarItem; -use crate::plans::VisitorMut as _; - impl Binder { pub fn bind_distinct( &self, span: Span, - bind_context: &mut BindContext, - projections: &[ColumnBinding], - scalar_items: &mut HashMap, + select_info: &mut SelectInfo, child: SExpr, ) -> Result { - let scalar_items: Vec = scalar_items - .drain() - .map(|(_, item)| { - let mut scalar = item.scalar; - if bind_context.in_grouping { - let mut group_checker = GroupingChecker::new(bind_context, None); - group_checker.visit(&mut scalar)?; - } else if !bind_context.windows.window_functions.is_empty() { - let mut window_checker = WindowChecker::new(bind_context); - window_checker.visit(&mut scalar)?; - } - Ok(ScalarItem { - scalar, - index: item.index, - }) - }) - .collect::>()?; + let distinct_input = select_info.take_distinct_plan(span); + let pre_distinct_items = distinct_input.pre_distinct_items; + let group_items = distinct_input.group_items; let mut new_expr = child; - if !scalar_items.is_empty() { + if !pre_distinct_items.is_empty() { let eval_scalar = EvalScalar { - items: scalar_items, + items: pre_distinct_items, }; new_expr = SExpr::create_unary(Arc::new(eval_scalar.into()), Arc::new(new_expr)); } - // Like aggregate, we just use scalar directly. - let group_items: Vec = projections - .iter() - .map(|v| ScalarItem { - scalar: ScalarExpr::BoundColumnRef(BoundColumnRef { - span, - column: v.clone(), - }), - index: v.index, - }) - .collect(); - let distinct_plan = Aggregate { mode: AggregateMode::Initial, group_items, diff --git a/src/query/sql/src/planner/binder/having.rs b/src/query/sql/src/planner/binder/having.rs index 14b4b0f781c6f..0434c604ba2ba 100644 --- a/src/query/sql/src/planner/binder/having.rs +++ b/src/query/sql/src/planner/binder/having.rs @@ -22,8 +22,6 @@ use super::Finder; use crate::BindContext; use crate::Binder; use crate::binder::ExprContext; -use crate::binder::ScalarBinder; -use crate::binder::aggregate::AggregateRewriter; use crate::binder::split_conjunctions; use crate::optimizer::ir::SExpr; use crate::planner::semantic::GroupingChecker; @@ -41,22 +39,12 @@ impl Binder { aliases: &[(String, ScalarExpr)], having: &Expr, ) -> Result { - bind_context.set_expr_context(ExprContext::HavingClause); - - let mut scalar_binder = ScalarBinder::new( + self.bind_and_rewrite_aggregate_expr( bind_context, - self.ctx.clone(), - &self.name_resolution_ctx, - self.metadata.clone(), aliases, - ); - let (mut scalar, _) = scalar_binder.bind(having)?; - AggregateRewriter::rewrite_expr( - &mut bind_context.aggregate_info, - self.metadata.clone(), - &mut scalar, - )?; - Ok(scalar) + ExprContext::HavingClause, + having, + ) } pub fn bind_having( @@ -65,7 +53,7 @@ impl Binder { having: ScalarExpr, child: SExpr, ) -> Result { - bind_context.set_expr_context(ExprContext::HavingClause); + bind_context.expr_context = ExprContext::HavingClause; let f = |scalar: &ScalarExpr| matches!(scalar, ScalarExpr::WindowFunction(_)); let mut finder = Finder::new(&f); diff --git a/src/query/sql/src/planner/binder/mod.rs b/src/query/sql/src/planner/binder/mod.rs index 7eb93460197bc..660ef7d69b02f 100644 --- a/src/query/sql/src/planner/binder/mod.rs +++ b/src/query/sql/src/planner/binder/mod.rs @@ -13,6 +13,7 @@ // limitations under the License. mod aggregate; +mod aggregate_prepass; mod async_function_desc; mod bind_context; mod bind_mutation; @@ -92,8 +93,10 @@ pub use internal_column_factory::INTERNAL_COLUMN_FACTORY; pub use location::get_storage_params_from_options; pub use location::parse_storage_params_from_uri; pub use location::parse_uri_location; +pub use project::SelectInfo; pub use scalar::ScalarBinder; pub use scalar_common::*; +pub(crate) use sort::OrderByRewriteFlags; pub use stream_column_factory::STREAM_COLUMN_FACTORY; pub use window::WindowFunctionInfo; pub use window::WindowOrderByInfo; diff --git a/src/query/sql/src/planner/binder/project.rs b/src/query/sql/src/planner/binder/project.rs index d1f27aff789d7..a0d81f3d0a8ec 100644 --- a/src/query/sql/src/planner/binder/project.rs +++ b/src/query/sql/src/planner/binder/project.rs @@ -83,134 +83,255 @@ impl VisitorMut for RemoveIdentifierQuote { } } +pub struct SelectInfo { + pub(super) source_scalars: HashMap, + pub(super) projection_scalars: HashMap, + pub(super) columns: Vec, +} + +pub(super) struct ProjectionPlanInput { + pub items: Vec, + pub output_columns: Vec, +} + +pub(super) struct DistinctPlanInput { + pub pre_distinct_items: Vec, + pub group_items: Vec, +} + +impl SelectInfo { + pub(super) fn from_columns(columns: Vec) -> Self { + Self { + source_scalars: HashMap::new(), + projection_scalars: HashMap::new(), + columns, + } + } + + pub fn column_count(&self) -> usize { + self.columns.len() + } + + pub(super) fn column_at(&self, index: usize) -> Option<&ColumnBinding> { + self.columns.get(index) + } + + pub(super) fn into_projection_plan(self) -> Result { + let SelectInfo { + projection_scalars, + columns, + .. + } = self; + let mut output_columns = columns; + let mut items = projection_scalars.into_values().collect::>(); + + for item in &items { + if let Some(column) = output_columns + .iter_mut() + .find(|column| column.index == item.index) + { + column.data_type = Box::new(item.scalar.data_type()?); + } + } + + items.sort_by_key(|item| item.index); + Ok(ProjectionPlanInput { + items, + output_columns, + }) + } + + pub(super) fn take_distinct_plan(&mut self, span: Span) -> DistinctPlanInput { + let pre_distinct_items = self + .projection_scalars + .drain() + .map(|(_, item)| item) + .collect::>(); + let group_items = self + .columns + .iter() + .map(|column| ScalarItem { + scalar: ScalarExpr::BoundColumnRef(BoundColumnRef { + span, + column: column.clone(), + }), + index: column.index, + }) + .collect(); + + DistinctPlanInput { + pre_distinct_items, + group_items, + } + } + + pub(super) fn source_scalar_item(&self, index: Symbol) -> Option<&ScalarItem> { + self.source_scalars.get(&index) + } + + pub(super) fn insert_scalar(&mut self, source_item: ScalarItem, projection_item: ScalarItem) { + self.source_scalars.insert(source_item.index, source_item); + self.projection_scalars + .insert(projection_item.index, projection_item); + } + + pub(super) fn rebuild_projection_items(&mut self, mut prepare_item: F) -> Result<()> + where F: FnMut(&ScalarItem) -> Result { + self.projection_scalars.clear(); + for source_item in self.source_scalars.values() { + let projection_item = prepare_item(source_item)?; + self.projection_scalars + .insert(projection_item.index, projection_item); + } + Ok(()) + } +} + impl Binder { - pub fn analyze_projection( + fn use_grouping_projection(bind_context: &BindContext) -> bool { + bind_context.in_grouping + || bind_context.aggregate_info.has_group_items() + || bind_context.aggregate_info.has_aggregate_calls() + } + + pub(super) fn prepare_select_output_scalar( + &self, + bind_context: &BindContext, + scalar: &ScalarExpr, + ) -> Result { + let mut scalar = scalar.clone(); + if Self::use_grouping_projection(bind_context) { + let mut grouping_checker = GroupingChecker::new(bind_context, None); + grouping_checker.visit(&mut scalar)?; + } else { + let mut window_checker = WindowChecker::new(bind_context); + window_checker.visit(&mut scalar)?; + } + Ok(scalar) + } + + pub(super) fn prepare_select_output_item( + &self, + bind_context: &BindContext, + item: &ScalarItem, + ) -> Result { + Ok(ScalarItem { + scalar: self.prepare_select_output_scalar(bind_context, &item.scalar)?, + index: item.index, + }) + } + + pub(crate) fn refresh_select_output( + &self, + bind_context: &BindContext, + select_info: &mut SelectInfo, + ) -> Result<()> { + select_info + .rebuild_projection_items(|item| self.prepare_select_output_item(bind_context, item)) + } + + /// Resolve which output slot a select item should project. + /// + /// Aggregate/UDAF/window analysis may already have registered a reusable slot for + /// the item. Projection only decides whether to reuse that slot or allocate a new + /// derived column; it does not perform aggregate/window semantic analysis. + fn resolve_projection_column_binding( &mut self, agg_info: &AggregateInfo, window_info: &WindowInfo, + item: &SelectItem<'_>, + ) -> Result { + // This item is a grouping sets item, its data type should be nullable. + let is_grouping_sets_item = agg_info.is_grouping_sets_item(&item.scalar); + + let mut column_binding = match &item.scalar { + ScalarExpr::BoundColumnRef(column_ref) => { + let mut column_binding = column_ref.column.clone(); + // We should apply alias for the ColumnBinding, since it comes from table + column_binding.column_name = item.alias.clone(); + column_binding + } + ScalarExpr::AggregateFunction(agg) => { + debug_assert!(!is_grouping_sets_item); + agg_info + .lookup_aggregate_function_column(agg, &item.alias) + .unwrap() + } + ScalarExpr::UDAFCall(udaf) => { + debug_assert!(!is_grouping_sets_item); + agg_info.lookup_udaf_call_column(udaf, &item.alias).unwrap() + } + ScalarExpr::WindowFunction(win) => { + find_replaced_window_function(window_info, win, &item.alias).unwrap() + } + _ => self.create_derived_column_binding(item.alias.clone(), item.scalar.data_type()?), + }; + + if is_grouping_sets_item { + column_binding.data_type = Box::new(column_binding.data_type.wrap_nullable()); + } + + Ok(column_binding) + } + + pub fn analyze_projection( + &mut self, + bind_context: &BindContext, select_list: &SelectList, - ) -> Result<(HashMap, Vec)> { + ) -> Result { let mut columns = Vec::with_capacity(select_list.items.len()); - let mut scalars = HashMap::new(); + let mut source_scalars = HashMap::new(); + let mut projection_scalars = HashMap::new(); for item in select_list.items.iter() { - // This item is a grouping sets item, its data type should be nullable. - let is_grouping_sets_item = agg_info.is_grouping_sets_item(&item.scalar); - - let mut column_binding = match &item.scalar { - ScalarExpr::BoundColumnRef(column_ref) => { - let mut column_binding = column_ref.column.clone(); - // We should apply alias for the ColumnBinding, since it comes from table - column_binding.column_name = item.alias.clone(); - column_binding - } - ScalarExpr::AggregateFunction(agg) => { - // Replace to bound column to reduce duplicate derived column bindings. - debug_assert!(!is_grouping_sets_item); - agg_info - .lookup_aggregate_function_column(agg, &item.alias) - .unwrap() - } - ScalarExpr::UDAFCall(udaf) => { - debug_assert!(!is_grouping_sets_item); - agg_info.lookup_udaf_call_column(udaf, &item.alias).unwrap() - } - ScalarExpr::WindowFunction(win) => { - find_replaced_window_function(window_info, win, &item.alias).unwrap() - } - _ => { - self.create_derived_column_binding(item.alias.clone(), item.scalar.data_type()?) - } - }; - - if is_grouping_sets_item { - column_binding.data_type = Box::new(column_binding.data_type.wrap_nullable()); - } - let scalar = if let ScalarExpr::SubqueryExpr(SubqueryExpr { - span, + let column_binding = self.resolve_projection_column_binding( + &bind_context.aggregate_info, + &bind_context.windows, + item, + )?; + let mut source_scalar = item.scalar.clone(); + if let ScalarExpr::SubqueryExpr(SubqueryExpr { typ, - subquery, - child_expr, - compare_op, - data_type, - outer_columns, - output_column, + projection_index, + contain_agg, .. - }) = item.scalar.clone() + }) = &mut source_scalar + && *typ == SubqueryType::Any { - if typ == SubqueryType::Any { - ScalarExpr::SubqueryExpr(SubqueryExpr { - span, - typ, - subquery, - child_expr, - compare_op, - output_column, - projection_index: Some(column_binding.index), - data_type, - outer_columns, - contain_agg: None, - }) - } else { - item.scalar.clone() - } - } else { - item.scalar.clone() - }; - scalars.insert(column_binding.index, ScalarItem { - scalar, + *projection_index = Some(column_binding.index); + *contain_agg = None; + } + let source_item = ScalarItem { + scalar: source_scalar, index: column_binding.index, - }); + }; + let projection_item = self.prepare_select_output_item(bind_context, &source_item)?; + let mut column_binding = column_binding; + column_binding.data_type = Box::new(projection_item.scalar.data_type()?); + source_scalars.insert(source_item.index, source_item); + projection_scalars.insert(projection_item.index, projection_item); columns.push(column_binding); } - Ok((scalars, columns)) + Ok(SelectInfo { + source_scalars, + projection_scalars, + columns, + }) } pub fn bind_projection( &mut self, bind_context: &mut BindContext, - columns: &[ColumnBinding], - scalars: &HashMap, + select_info: SelectInfo, child: SExpr, ) -> Result { - bind_context.set_expr_context(ExprContext::SelectClause); - let mut columns = columns.to_vec(); - let mut scalars = scalars - .iter() - .map(|(_, item)| { - if bind_context.in_grouping { - let mut scalar = item.scalar.clone(); - let mut grouping_checker = GroupingChecker::new(bind_context, None); - grouping_checker.visit(&mut scalar)?; - - if let Some(x) = columns.iter_mut().find(|x| x.index == item.index) { - x.data_type = Box::new(scalar.data_type()?); - } - - Ok(ScalarItem { - scalar, - index: item.index, - }) - } else { - let mut scalar = item.scalar.clone(); - let mut window_checker = WindowChecker::new(bind_context); - window_checker.visit(&mut scalar)?; - Ok(ScalarItem { - scalar, - index: item.index, - }) - } - }) - .collect::>>()?; - - scalars.sort_by_key(|s| s.index); - let eval_scalar = EvalScalar { items: scalars }; + bind_context.expr_context = ExprContext::SelectClause; + let plan = select_info.into_projection_plan()?; + let eval_scalar = EvalScalar { items: plan.items }; let new_expr = SExpr::create_unary(Arc::new(eval_scalar.into()), Arc::new(child)); - // Set output columns - bind_context.columns = columns; + bind_context.columns = plan.output_columns; Ok(new_expr) } - /// Normalize select list into a BindContext. /// There are three kinds of select target: /// @@ -233,7 +354,7 @@ impl Binder { input_context: &mut BindContext, select_list: &'a [SelectTarget], ) -> Result> { - input_context.set_expr_context(ExprContext::SelectClause); + input_context.expr_context = ExprContext::SelectClause; let mut output = SelectList::default(); let mut prev_aliases = Vec::new(); diff --git a/src/query/sql/src/planner/binder/qualify.rs b/src/query/sql/src/planner/binder/qualify.rs index f30b75b24ad2b..e78e3713c4ab7 100644 --- a/src/query/sql/src/planner/binder/qualify.rs +++ b/src/query/sql/src/planner/binder/qualify.rs @@ -43,8 +43,9 @@ impl Binder { bind_context: &mut BindContext, aliases: &[(String, ScalarExpr)], qualify: &Expr, + needs_window_rewrite: bool, ) -> Result { - bind_context.set_expr_context(ExprContext::QualifyClause); + bind_context.expr_context = ExprContext::QualifyClause; let mut scalar_binder = ScalarBinder::new( bind_context, self.ctx.clone(), @@ -58,8 +59,10 @@ impl Binder { &mut scalar, "Qualify clause must not contain aggregate functions", )?; - let mut rewriter = WindowRewriter::new(bind_context, self.metadata.clone()); - rewriter.visit(&mut scalar)?; + if needs_window_rewrite { + let mut rewriter = WindowRewriter::new(bind_context, self.metadata.clone()); + rewriter.visit(&mut scalar)?; + } Ok(scalar) } @@ -69,7 +72,7 @@ impl Binder { qualify: ScalarExpr, child: SExpr, ) -> Result { - bind_context.set_expr_context(ExprContext::QualifyClause); + bind_context.expr_context = ExprContext::QualifyClause; let scalar = { let mut qualify = qualify; diff --git a/src/query/sql/src/planner/binder/select.rs b/src/query/sql/src/planner/binder/select.rs index 635e0e8284f5f..7f7d3455e1cd4 100644 --- a/src/query/sql/src/planner/binder/select.rs +++ b/src/query/sql/src/planner/binder/select.rs @@ -13,7 +13,6 @@ // limitations under the License. use std::collections::BTreeSet; -use std::collections::HashMap; use std::collections::HashSet; use std::sync::Arc; @@ -42,6 +41,7 @@ use crate::binder::ColumnBindingBuilder; use crate::binder::ExprContext; use crate::binder::INTERNAL_COLUMN_FACTORY; use crate::binder::bind_table_reference::JoinConditions; +use crate::binder::project::SelectInfo; use crate::binder::scalar_common::split_conjunctions; use crate::optimizer::ir::SExpr; use crate::planner::binder::BindContext; @@ -52,7 +52,6 @@ use crate::plans::CastExpr; use crate::plans::Filter; use crate::plans::JoinType; use crate::plans::ScalarExpr; -use crate::plans::ScalarItem; use crate::plans::UnionAll; use crate::plans::Visitor as _; @@ -77,8 +76,7 @@ impl Binder { expr: &Expr, child: SExpr, ) -> Result<(SExpr, ScalarExpr)> { - let last_expr_context = bind_context.expr_context.clone(); - bind_context.set_expr_context(ExprContext::WhereClause); + let last_expr_context = bind_context.replace_expr_context(ExprContext::WhereClause); let mut scalar_binder = ScalarBinder::new( bind_context, @@ -107,7 +105,7 @@ impl Binder { predicates: split_conjunctions(&scalar), }; let new_expr = SExpr::create_unary(Arc::new(filter_plan.into()), Arc::new(child)); - bind_context.set_expr_context(last_expr_context); + bind_context.expr_context = last_expr_context; Ok((new_expr, scalar)) } @@ -310,14 +308,9 @@ impl Binder { ); if distinct { - let columns = new_bind_context.all_column_bindings().to_vec(); - new_expr = self.bind_distinct( - left_span, - &mut new_bind_context, - &columns, - &mut HashMap::new(), - new_expr, - )?; + let mut select_info = + SelectInfo::from_columns(new_bind_context.all_column_bindings().to_vec()); + new_expr = self.bind_distinct(left_span, &mut select_info, new_expr)?; } Ok((new_expr, new_bind_context)) @@ -388,14 +381,8 @@ impl Binder { .set_cte_context(right_context.cte_context); // then apply distinct - let columns = left_context.all_column_bindings().to_vec(); - let s_expr = self.bind_distinct( - left_span, - &mut left_context, - &columns, - &mut HashMap::new(), - s_expr, - )?; + let mut select_info = SelectInfo::from_columns(left_context.all_column_bindings().to_vec()); + let s_expr = self.bind_distinct(left_span, &mut select_info, s_expr)?; Ok((s_expr, left_context)) } @@ -495,7 +482,7 @@ impl Binder { &self, bind_context: &BindContext, stmt: &SelectStmt, - scalar_items: &HashMap, + select_info: &SelectInfo, select_list: &SelectList, where_scalar: &Option, order_by: &[OrderItem], @@ -592,7 +579,7 @@ impl Binder { let mut order_by_cols = HashSet::with_capacity(order_by.len()); for o in order_by { - if let Some(scalar) = scalar_items.get(&o.index) { + if let Some(scalar) = select_info.source_scalar_item(o.index) { let cols = scalar.scalar.used_columns(); order_by_cols.extend(cols); } else { diff --git a/src/query/sql/src/planner/binder/sort.rs b/src/query/sql/src/planner/binder/sort.rs index f9608fd416c54..6315bdea5f98f 100644 --- a/src/query/sql/src/planner/binder/sort.rs +++ b/src/query/sql/src/planner/binder/sort.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; use std::sync::Arc; use databend_common_ast::ast::Expr; @@ -25,10 +24,9 @@ use super::ExprContext; use crate::BindContext; use crate::Symbol; use crate::binder::Binder; -use crate::binder::ColumnBinding; use crate::binder::aggregate::AggregateRewriter; +use crate::binder::project::SelectInfo; use crate::binder::scalar::ScalarBinder; -use crate::binder::select::SelectList; use crate::binder::window::WindowRewriter; use crate::optimizer::ir::SExpr; use crate::planner::semantic::GroupingChecker; @@ -53,32 +51,72 @@ pub struct OrderItem { pub index: Symbol, pub asc: bool, pub nulls_first: bool, - pub name: String, +} + +#[derive(Clone, Copy, Debug)] +pub(crate) struct OrderByRewriteFlags { + pub needs_select_item_replacement: bool, + pub needs_aggregate_rewrite: bool, + pub needs_window_rewrite: bool, +} + +impl OrderByRewriteFlags { + pub(crate) const fn new( + needs_select_item_replacement: bool, + needs_aggregate_rewrite: bool, + needs_window_rewrite: bool, + ) -> Self { + Self { + needs_select_item_replacement, + needs_aggregate_rewrite, + needs_window_rewrite, + } + } + + pub(crate) const fn no_rewrite() -> Self { + Self::new(false, false, false) + } + + const fn needs_recursive_rewrite(self) -> bool { + self.needs_select_item_replacement + || self.needs_aggregate_rewrite + || self.needs_window_rewrite + } + + const fn needs_post_aggregate_rewrite(self) -> bool { + self.needs_select_item_replacement && self.needs_aggregate_rewrite + } } impl Binder { - pub fn analyze_order_items( + pub(crate) fn analyze_order_items( &mut self, bind_context: &mut BindContext, - scalar_items: &mut HashMap, + select_info: &mut SelectInfo, aliases: &[(String, ScalarExpr)], - projections: &[ColumnBinding], + rewrite_flags: &[OrderByRewriteFlags], order_by: &[OrderByExpr], distinct: bool, ) -> Result { - bind_context.set_expr_context(ExprContext::OrderByClause); + bind_context.expr_context = ExprContext::OrderByClause; let settings = self.ctx.get_settings(); let default_nulls_first = settings.get_nulls_first(); let mut order_items = Vec::with_capacity(order_by.len()); - for order in order_by { + assert_eq!( + rewrite_flags.len(), + order_by.len(), + "ORDER BY rewrite flags must align with ORDER BY expressions", + ); + + for (order, rewrite_flags) in order_by.iter().zip(rewrite_flags.iter().copied()) { match &order.expr { Expr::Literal { value: Literal::UInt64(index), .. } => { let index = *index as usize; - if index == 0 || index > projections.len() { + if index == 0 || index > select_info.column_count() { return Err(ErrorCode::SemanticError(format!( "ORDER BY position {} is not in select list", index @@ -87,12 +125,11 @@ impl Binder { } let index = index - 1; - let projection = &projections[index]; + let projection_column = select_info.column_at(index).unwrap(); let asc = order.asc.unwrap_or(true); order_items.push(OrderItem { - index: projection.index, - name: projection.column_name.clone(), + index: projection_column.index, asc, nulls_first: order .nulls_first @@ -109,16 +146,21 @@ impl Binder { ); let (bound_expr, _) = scalar_binder.bind(&order.expr)?; - if let Some((idx, (alias, _))) = aliases + if let Some((idx, (_, scalar))) = aliases .iter() .enumerate() .find(|(_, (_, scalar))| bound_expr.eq(scalar)) { + if bind_context.in_grouping { + let mut group_checker = GroupingChecker::new(bind_context, None); + let mut scalar = scalar.clone(); + group_checker.visit(&mut scalar)?; + } + // The order by expression is in the select list. let asc = order.asc.unwrap_or(true); order_items.push(OrderItem { - index: projections[idx].index, - name: alias.clone(), + index: select_info.column_at(idx).unwrap().index, asc, nulls_first: order .nulls_first @@ -130,30 +172,38 @@ impl Binder { .to_string(), )); } else { - let mut rewrite_scalar = self - .rewrite_scalar_with_replacement( + let mut rewrite_scalar = if rewrite_flags.needs_recursive_rewrite() { + self.rewrite_scalar_with_replacement( bind_context, &bound_expr, + rewrite_flags, &|nest_scalar| { if let ScalarExpr::BoundColumnRef(BoundColumnRef { column, .. }) = nest_scalar { - if let Some(scalar_item) = scalar_items.get(&column.index) { + if let Some(scalar_item) = + select_info.source_scalar_item(column.index) + { return Ok(Some(scalar_item.scalar.clone())); } } Ok(None) }, ) - .map_err(|e| ErrorCode::SemanticError(e.message()))?; + .map_err(|e| ErrorCode::SemanticError(e.message()))? + } else { + bound_expr + }; - AggregateRewriter::rewrite_expr( - &mut bind_context.aggregate_info, - self.metadata.clone(), - &mut rewrite_scalar, - )?; + if rewrite_flags.needs_post_aggregate_rewrite() { + AggregateRewriter::rewrite_expr( + &mut bind_context.aggregate_info, + self.metadata.clone(), + &mut rewrite_scalar, + )?; + } if let ScalarExpr::ConstantExpr(..) = rewrite_scalar { continue; @@ -172,11 +222,12 @@ impl Binder { scalar: rewrite_scalar, index: column_binding.index, }; - scalar_items.insert(column_binding.index, item); + let projection_item = + self.prepare_select_output_item(bind_context, &item)?; + select_info.insert_scalar(item, projection_item); let asc = order.asc.unwrap_or(true); order_items.push(OrderItem { index: column_binding.index, - name: column_binding.column_name, asc, nulls_first: order .nulls_first @@ -189,28 +240,9 @@ impl Binder { Ok(OrderItems { items: order_items }) } - pub fn bind_order_by( - &mut self, - from_context: &BindContext, - order_by: OrderItems, - select_list: &SelectList<'_>, - child: SExpr, - ) -> Result { + pub fn bind_order_by(&mut self, order_by: OrderItems, child: SExpr) -> Result { let mut order_by_items = Vec::with_capacity(order_by.items.len()); for order in order_by.items { - if from_context.in_grouping { - let mut group_checker = GroupingChecker::new(from_context, None); - // Perform grouping check on original scalar expression if order item is alias. - if let Some(scalar_item) = select_list - .items - .iter() - .find(|item| item.alias == order.name) - { - let mut scalar = scalar_item.scalar.clone(); - group_checker.visit(&mut scalar)?; - } - } - let order_by_item = SortItem { index: order.index, asc: order.asc, @@ -236,16 +268,24 @@ impl Binder { &self, bind_context: &mut BindContext, original_scalar: &ScalarExpr, + rewrite_flags: OrderByRewriteFlags, replacement_fn: &F, ) -> Result where F: Fn(&ScalarExpr) -> Result>, { - let replacement_opt = replacement_fn(original_scalar)?; + let replacement_opt = if rewrite_flags.needs_select_item_replacement { + replacement_fn(original_scalar)? + } else { + None + }; match replacement_opt { Some(replacement) => Ok(replacement), None => match original_scalar { aggregate @ ScalarExpr::AggregateFunction(_) => { + if !rewrite_flags.needs_aggregate_rewrite { + return Ok(aggregate.clone()); + } let mut aggregate = aggregate.clone(); AggregateRewriter::rewrite_expr( &mut bind_context.aggregate_info, @@ -255,6 +295,9 @@ impl Binder { Ok(aggregate) } udaf @ ScalarExpr::UDAFCall(_) => { + if !rewrite_flags.needs_aggregate_rewrite { + return Ok(udaf.clone()); + } let mut udaf = udaf.clone(); AggregateRewriter::rewrite_expr( &mut bind_context.aggregate_info, @@ -268,7 +311,12 @@ impl Binder { .args .iter() .map(|arg| { - self.rewrite_scalar_with_replacement(bind_context, arg, replacement_fn) + self.rewrite_scalar_with_replacement( + bind_context, + arg, + rewrite_flags, + replacement_fn, + ) }) .collect::>>()?; Ok(ScalarExpr::LambdaFunction(LambdaFunc { @@ -281,6 +329,9 @@ impl Binder { })) } window @ ScalarExpr::WindowFunction(_) => { + if !rewrite_flags.needs_window_rewrite { + return Ok(window.clone()); + } let mut window = window.clone(); let mut rewriter = WindowRewriter::new(bind_context, self.metadata.clone()); rewriter.visit(&mut window)?; @@ -291,7 +342,12 @@ impl Binder { .arguments .iter() .map(|arg| { - self.rewrite_scalar_with_replacement(bind_context, arg, replacement_fn) + self.rewrite_scalar_with_replacement( + bind_context, + arg, + rewrite_flags, + replacement_fn, + ) }) .collect::>>()?; Ok(ScalarExpr::FunctionCall(FunctionCall { @@ -310,6 +366,7 @@ impl Binder { let argument = Box::new(self.rewrite_scalar_with_replacement( bind_context, argument, + rewrite_flags, replacement_fn, )?); Ok(ScalarExpr::CastExpr(CastExpr { @@ -324,7 +381,12 @@ impl Binder { .arguments .iter() .map(|arg| { - self.rewrite_scalar_with_replacement(bind_context, arg, replacement_fn) + self.rewrite_scalar_with_replacement( + bind_context, + arg, + rewrite_flags, + replacement_fn, + ) }) .collect::>>()?; Ok(UDFCall { diff --git a/src/query/sql/src/planner/dataframe.rs b/src/query/sql/src/planner/dataframe.rs index 5d20b49d085aa..3651a39c87df5 100644 --- a/src/query/sql/src/planner/dataframe.rs +++ b/src/query/sql/src/planner/dataframe.rs @@ -41,6 +41,8 @@ use crate::Binder; use crate::Metadata; use crate::NameResolutionContext; use crate::optimizer::ir::SExpr; +use crate::planner::binder::OrderByRewriteFlags; +use crate::planner::binder::SelectInfo; use crate::plans::Limit; pub struct Dataframe { @@ -51,6 +53,25 @@ pub struct Dataframe { } impl Dataframe { + fn apply_select_output(self, select_info: SelectInfo) -> Result { + let Dataframe { + query_ctx, + mut binder, + mut bind_context, + s_expr, + } = self; + + let s_expr = binder.bind_projection(&mut bind_context, select_info, s_expr)?; + let s_expr = binder.add_internal_column_into_expr(&mut bind_context, s_expr)?; + + Ok(Self { + query_ctx, + binder, + bind_context, + s_expr, + }) + } + pub async fn scan( query_ctx: Arc, db: Option<&str>, @@ -145,23 +166,9 @@ impl Dataframe { .binder .normalize_select_list(bind_context, select_list)?; - let (scalar_items, projections) = self.binder.analyze_projection( - &bind_context.aggregate_info, - &bind_context.windows, - &select_list, - )?; - - self.s_expr = self.binder.bind_projection( - &mut self.bind_context, - &projections, - &scalar_items, - self.s_expr, - )?; - self.s_expr = self - .binder - .add_internal_column_into_expr(&mut self.bind_context, self.s_expr.clone())?; + let select_info = self.binder.analyze_projection(bind_context, &select_list)?; - Ok(self) + self.apply_select_output(select_info) } pub async fn filter(mut self, expr: Expr) -> Result { @@ -196,26 +203,15 @@ impl Dataframe { self.binder .analyze_aggregate_select(&mut self.bind_context, &mut select_list)?; - let (scalar_items, projections) = self.binder.analyze_projection( - &self.bind_context.aggregate_info, - &self.bind_context.windows, - &select_list, - )?; + let select_info = self + .binder + .analyze_projection(&self.bind_context, &select_list)?; self.s_expr = self .binder .bind_aggregate(&mut self.bind_context, self.s_expr)?; - self.s_expr = self.binder.bind_projection( - &mut self.bind_context, - &projections, - &scalar_items, - self.s_expr, - )?; - self.s_expr = self - .binder - .add_internal_column_into_expr(&mut self.bind_context, self.s_expr.clone())?; - Ok(self) + self.apply_select_output(select_info) } pub async fn aggregate( @@ -262,22 +258,11 @@ impl Dataframe { .bind_having(&mut self.bind_context, having, self.s_expr)?; } - let (scalar_items, projections) = self.binder.analyze_projection( - &self.bind_context.aggregate_info, - &self.bind_context.windows, - &select_list, - )?; - - self.s_expr = self.binder.bind_projection( - &mut self.bind_context, - &projections, - &scalar_items, - self.s_expr, - )?; - self.s_expr = self + let select_info = self .binder - .add_internal_column_into_expr(&mut self.bind_context, self.s_expr.clone())?; - Ok(self) + .analyze_projection(&self.bind_context, &select_list)?; + + self.apply_select_output(select_info) } pub fn distinct_col(self, columns: &[&str]) -> Result { @@ -302,28 +287,13 @@ impl Dataframe { .normalize_select_list(&mut self.bind_context, select_list.as_slice())?; self.binder .analyze_aggregate_select(&mut self.bind_context, &mut select_list)?; - let (mut scalar_items, projections) = self.binder.analyze_projection( - &self.bind_context.aggregate_info, - &self.bind_context.windows, - &select_list, - )?; - self.s_expr = self.binder.bind_distinct( - None, - &mut self.bind_context, - &projections, - &mut scalar_items, - self.s_expr.clone(), - )?; - self.s_expr = self.binder.bind_projection( - &mut self.bind_context, - &projections, - &scalar_items, - self.s_expr, - )?; + let mut select_info = self + .binder + .analyze_projection(&self.bind_context, &select_list)?; self.s_expr = self .binder - .add_internal_column_into_expr(&mut self.bind_context, self.s_expr.clone())?; - Ok(self) + .bind_distinct(None, &mut select_info, self.s_expr)?; + self.apply_select_output(select_info) } pub async fn limit(mut self, limit: Option, offset: usize) -> Result { @@ -333,8 +303,7 @@ impl Dataframe { offset, lazy_columns: Default::default(), }; - self.s_expr = - SExpr::create_unary(Arc::new(limit_plan.into()), Arc::new(self.s_expr.clone())); + self.s_expr = SExpr::create_unary(Arc::new(limit_plan.into()), Arc::new(self.s_expr)); Ok(self) } @@ -386,36 +355,22 @@ impl Dataframe { .iter() .map(|item| (item.alias.clone(), item.scalar.clone())) .collect::>(); - let (mut scalar_items, projections) = self.binder.analyze_projection( - &self.bind_context.aggregate_info, - &self.bind_context.windows, - &select_list, - )?; + let mut select_info = self + .binder + .analyze_projection(&self.bind_context, &select_list)?; let order_items = self.binder.analyze_order_items( &mut self.bind_context, - &mut scalar_items, + &mut select_info, &aliases, - &projections, + &vec![OrderByRewriteFlags::no_rewrite(); order.len()], &order, distinct, )?; - self.s_expr = self.binder.bind_order_by( - &self.bind_context, - order_items, - &select_list, - self.s_expr, - )?; + self.binder + .refresh_select_output(&self.bind_context, &mut select_info)?; + self.s_expr = self.binder.bind_order_by(order_items, self.s_expr)?; - self.s_expr = self.binder.bind_projection( - &mut self.bind_context, - &projections, - &scalar_items, - self.s_expr, - )?; - self.s_expr = self - .binder - .add_internal_column_into_expr(&mut self.bind_context, self.s_expr.clone())?; - Ok(self) + self.apply_select_output(select_info) } pub async fn except(mut self, dataframe: Dataframe) -> Result { diff --git a/src/query/sql/src/planner/expression/expression_parser.rs b/src/query/sql/src/planner/expression/expression_parser.rs index e0b1c6f76aab7..3180bfa70fe47 100644 --- a/src/query/sql/src/planner/expression/expression_parser.rs +++ b/src/query/sql/src/planner/expression/expression_parser.rs @@ -349,7 +349,7 @@ pub fn parse_lambda_expr( // Use parent metadata if provided (for masking policies on outer columns) // Otherwise create empty metadata (for better performance in community edition) let metadata = parent_metadata.unwrap_or_else(|| Arc::new(RwLock::new(Metadata::default()))); - lambda_context.set_expr_context(ExprContext::InLambdaFunction); + lambda_context.expr_context = ExprContext::InLambdaFunction; for (lambda_column, lambda_column_type) in lambda_columns.iter() { let column_index = lambda_context.next_column_index(); diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 7edc4985e1a64..e464c3b4137f7 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -2236,11 +2236,10 @@ impl<'a> TypeChecker<'a> { // column-first fallback so `sum(c1)` can bind a same-select alias when // there is no real `c1` column. self.in_aggregate_function = true; - let original_context = self.bind_context.expr_context.clone(); + let original_context = self.bind_context.expr_context; let disallow_alias_resolution = original_context.prefer_resolve_alias(); if disallow_alias_resolution { - self.bind_context - .set_expr_context(ExprContext::InAggregateFunction); + self.bind_context.expr_context = ExprContext::InAggregateFunction; } let arguments_result = (|| { let mut arguments = vec![]; @@ -2253,7 +2252,7 @@ impl<'a> TypeChecker<'a> { Ok::<_, ErrorCode>((arguments, arg_types)) })(); if disallow_alias_resolution { - self.bind_context.set_expr_context(original_context.clone()); + self.bind_context.expr_context = original_context; } self.in_aggregate_function = false; let (mut arguments, mut arg_types) = arguments_result?; @@ -2267,12 +2266,11 @@ impl<'a> TypeChecker<'a> { nulls_first, }| { if disallow_alias_resolution { - self.bind_context - .set_expr_context(ExprContext::InAggregateFunction); + self.bind_context.expr_context = ExprContext::InAggregateFunction; } let result = self.resolve(expr); if disallow_alias_resolution { - self.bind_context.set_expr_context(original_context.clone()); + self.bind_context.expr_context = original_context; } let box (scalar_expr, _) = result?; @@ -3288,9 +3286,9 @@ impl<'a> TypeChecker<'a> { .set_span(span)); } - let original_context = self.bind_context.expr_context.clone(); - self.bind_context - .set_expr_context(ExprContext::InSetReturningFunction); + let original_context = self + .bind_context + .replace_expr_context(ExprContext::InSetReturningFunction); let mut arguments = Vec::with_capacity(args.len()); for arg in args.iter() { @@ -3299,7 +3297,7 @@ impl<'a> TypeChecker<'a> { } // Restore the original context - self.bind_context.set_expr_context(original_context); + self.bind_context.expr_context = original_context; let srf_scalar = ScalarExpr::FunctionCall(FunctionCall { span, @@ -6486,9 +6484,9 @@ impl<'a> TypeChecker<'a> { .set_span(span), ); } - let original_context = self.bind_context.expr_context.clone(); - self.bind_context - .set_expr_context(ExprContext::InAsyncFunction); + let original_context = self + .bind_context + .replace_expr_context(ExprContext::InAsyncFunction); let result = match func_name { "nextval" => self.resolve_nextval_async_function(span, func_name, arguments)?, "dict_get" => self.resolve_dict_get_async_function(span, func_name, arguments)?, @@ -6502,7 +6500,7 @@ impl<'a> TypeChecker<'a> { } }; // Restore the original context - self.bind_context.set_expr_context(original_context); + self.bind_context.expr_context = original_context; self.bind_context.have_async_func = true; Ok(result) } diff --git a/src/query/sql/tests/it/semantic/binder.rs b/src/query/sql/tests/it/semantic/binder.rs index e744cad09e4dc..27c223dd5476c 100644 --- a/src/query/sql/tests/it/semantic/binder.rs +++ b/src/query/sql/tests/it/semantic/binder.rs @@ -13,6 +13,7 @@ // limitations under the License. use databend_common_exception::Result; +use databend_common_sql::plans::Plan; use crate::framework::golden::SqlTestCase; use crate::framework::golden::SqlTestOutcome; @@ -60,10 +61,20 @@ async fn bind_case(case: &SqlTestCase) -> Result { Ok(outcome) } -#[tokio::test(flavor = "multi_thread", worker_threads = 1)] -async fn test_binder_with_lite_table_context() -> Result<()> { - let mut file = open_golden_file("semantic", "binder.txt")?; +async fn run_binder_cases(file_name: &str, cases: &[SqlTestCase]) -> Result<()> { + let mut file = open_golden_file("semantic", file_name)?; + + for case in cases { + write_case_header(&mut file, case)?; + let outcome = bind_case(case).await?; + write_case_outcome(&mut file, &outcome)?; + } + + Ok(()) +} +#[tokio::test(flavor = "multi_thread", worker_threads = 1)] +async fn test_binder_clauses_and_ordering() -> Result<()> { let cases = [ SqlTestCase { name: "simple_aggregate_query_binds", @@ -131,6 +142,12 @@ async fn test_binder_with_lite_table_context() -> Result<()> { setup_sqls: &["CREATE TABLE t(number UInt64)"], sql: "SELECT sum(number) AS s FROM t HAVING s > 0", }, + SqlTestCase { + name: "having_aggregate_does_not_make_scalar_projection_valid", + description: "Introducing an aggregate in HAVING must not make a non-aggregated SELECT list valid.", + setup_sqls: &["CREATE TABLE t(number UInt64)"], + sql: "SELECT number FROM t HAVING count(*) > 0", + }, SqlTestCase { name: "order_by_can_introduce_aggregate_in_aggregate_query", description: "ORDER BY may introduce a new aggregate expression when the query is already aggregated.", @@ -149,6 +166,24 @@ async fn test_binder_with_lite_table_context() -> Result<()> { setup_sqls: &["CREATE TABLE t(number UInt64)"], sql: "SELECT number FROM t ORDER BY count(*) + 1", }, + SqlTestCase { + name: "order_by_expression_reuses_scalar_alias_semantics", + description: "ORDER BY expressions should still inline scalar aliases from the select semantic view when they are used inside a larger expression.", + setup_sqls: &["CREATE TABLE t(number UInt64)"], + sql: "SELECT number + 1 AS s FROM t ORDER BY s + 1", + }, + SqlTestCase { + name: "order_by_duplicate_aggregate_alias_is_ambiguous", + description: "ORDER BY should keep duplicate aggregate aliases ambiguous instead of pre-expanding one candidate.", + setup_sqls: &["CREATE TABLE t(number UInt64)"], + sql: "SELECT sum(number) AS s, max(number) AS s FROM t ORDER BY s", + }, + SqlTestCase { + name: "order_by_expression_reuses_aggregate_alias_semantics", + description: "ORDER BY expressions should keep aggregate aliases on the original semantic view instead of depending on rewritten select-item state.", + setup_sqls: &["CREATE TABLE t(number UInt64)"], + sql: "SELECT sum(number) AS s FROM t ORDER BY s + 1", + }, SqlTestCase { name: "aggregate_argument_prefers_base_column_over_select_alias", description: "Inside an aggregate function, a same-name select alias should not shadow the base column.", @@ -161,6 +196,14 @@ async fn test_binder_with_lite_table_context() -> Result<()> { setup_sqls: &["CREATE TABLE t(number UInt64)"], sql: "SELECT number % 3 AS c1, sum(c1) FROM t GROUP BY number % 3", }, + ]; + + run_binder_cases("binder_clauses.txt", &cases).await +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 1)] +async fn test_binder_window_core_paths() -> Result<()> { + let cases = [ SqlTestCase { name: "window_aggregate_does_not_become_group_aggregate", description: "An aggregate used as a window function should stay in the window phase rather than becoming a group aggregate.", @@ -179,6 +222,66 @@ async fn test_binder_with_lite_table_context() -> Result<()> { setup_sqls: &["CREATE TABLE t(number UInt64)"], sql: "SELECT row_number() OVER (ORDER BY sum(number)) FROM t", }, + SqlTestCase { + name: "order_by_window_alias_does_not_seed_window_aggregate", + description: "ORDER BY on a window alias must not pre-register aggregates that only appear inside that alias's window specification.", + setup_sqls: &["CREATE TABLE t(number UInt64)"], + sql: "SELECT row_number() OVER (ORDER BY sum(number)) AS rn FROM t ORDER BY rn", + }, + SqlTestCase { + name: "order_by_expression_reuses_window_alias_semantics", + description: "ORDER BY expressions should keep window aliases on the original semantic view instead of depending on rewritten select-item state.", + setup_sqls: &["CREATE TABLE t(number UInt64)"], + sql: "SELECT number, row_number() OVER (ORDER BY number) AS rn FROM t ORDER BY rn + 1", + }, + SqlTestCase { + name: "window_order_reuses_having_aggregate", + description: "A window ORDER BY clause should be able to reuse an aggregate introduced later by HAVING.", + setup_sqls: &["CREATE TABLE t(number UInt64)"], + sql: "SELECT row_number() OVER (ORDER BY sum(number)) FROM t HAVING sum(number) > 0", + }, + SqlTestCase { + name: "window_order_reuses_having_aggregate_alias", + description: "A window ORDER BY clause should be able to reuse an aggregate reached through a HAVING alias reference.", + setup_sqls: &["CREATE TABLE t(number UInt64)"], + sql: "SELECT sum(number) AS s, row_number() OVER (ORDER BY s) FROM t HAVING s > 0", + }, + SqlTestCase { + name: "window_order_reuses_having_udaf", + description: "A window ORDER BY clause should be able to reuse a UDAF introduced later by HAVING.", + setup_sqls: &["CREATE TABLE t(a UInt64, b UInt64)", TEST_UDAF_SQL], + sql: "SELECT row_number() OVER (ORDER BY weighted_avg(a, b)) FROM t HAVING weighted_avg(a, b) > 0", + }, + SqlTestCase { + name: "window_order_reuses_having_udaf_alias", + description: "A window ORDER BY clause should be able to reuse a UDAF reached through a HAVING alias reference.", + setup_sqls: &["CREATE TABLE t(a UInt64, b UInt64)", TEST_UDAF_SQL], + sql: "SELECT weighted_avg(a, b) AS s, row_number() OVER (ORDER BY s) FROM t HAVING s > 0", + }, + SqlTestCase { + name: "window_order_reuses_order_by_aggregate", + description: "A window ORDER BY clause should be able to reuse an aggregate introduced later by ORDER BY.", + setup_sqls: &["CREATE TABLE t(number UInt64)"], + sql: "SELECT row_number() OVER (ORDER BY sum(number)) FROM t ORDER BY sum(number)", + }, + SqlTestCase { + name: "window_order_reuses_order_by_udaf", + description: "A window ORDER BY clause should be able to reuse a UDAF introduced later by ORDER BY.", + setup_sqls: &["CREATE TABLE t(a UInt64, b UInt64)", TEST_UDAF_SQL], + sql: "SELECT row_number() OVER (ORDER BY weighted_avg(a, b)) FROM t ORDER BY weighted_avg(a, b)", + }, + SqlTestCase { + name: "window_order_reuses_order_by_aggregate_alias", + description: "A window ORDER BY clause should be able to reuse an aggregate reached through an ORDER BY alias reference.", + setup_sqls: &["CREATE TABLE t(number UInt64)"], + sql: "SELECT sum(number) AS s, row_number() OVER (ORDER BY s) FROM t ORDER BY s", + }, + SqlTestCase { + name: "window_order_reuses_order_by_udaf_alias", + description: "A window ORDER BY clause should be able to reuse a UDAF reached through an ORDER BY alias reference.", + setup_sqls: &["CREATE TABLE t(a UInt64, b UInt64)", TEST_UDAF_SQL], + sql: "SELECT weighted_avg(a, b) AS s, row_number() OVER (ORDER BY s) FROM t ORDER BY s", + }, SqlTestCase { name: "window_order_rejects_window_alias_expansion", description: "A window ORDER BY clause must still reject aliases that expand to a prior window expression.", @@ -210,11 +313,19 @@ async fn test_binder_with_lite_table_context() -> Result<()> { sql: "SELECT number % 3 AS a, number % 4 AS b, row_number() OVER (PARTITION BY b % 2 ORDER BY a) FROM t GROUP BY a, b", }, SqlTestCase { - name: "qualify_cte_then_outer_aggregate_from_sqllogictest_binds", - description: "A sqllogictest pattern that filters with QUALIFY inside a CTE before an outer aggregate should still bind.", - setup_sqls: &["CREATE TABLE t(number UInt64)"], - sql: "WITH test AS (SELECT number % 10 AS id, number AS full_matched FROM t QUALIFY row_number() OVER (PARTITION BY id ORDER BY number DESC) = 1) SELECT full_matched, count() FROM test GROUP BY full_matched HAVING full_matched = 3", + name: "within_group_window_aggregate_binds", + description: "A WITHIN GROUP window aggregate should bind its sort descriptors without turning into a grouped aggregate.", + setup_sqls: &["CREATE TABLE empsalary(depname String, empno UInt64, salary UInt64)"], + sql: "SELECT listagg(cast(salary as varchar), '|') WITHIN GROUP (ORDER BY empno DESC) OVER (PARTITION BY depname ORDER BY empno) FROM empsalary", }, + ]; + + run_binder_cases("binder_window_core.txt", &cases).await +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 1)] +async fn test_binder_named_window_paths() -> Result<()> { + let cases = [ SqlTestCase { name: "named_window_from_sqllogictest_binds", description: "A named WINDOW clause from sqllogictests should bind as a normal window specification.", @@ -257,17 +368,19 @@ async fn test_binder_with_lite_table_context() -> Result<()> { setup_sqls: &["CREATE TABLE empsalary(salary UInt64)"], sql: "SELECT sum(salary) OVER w2 FROM empsalary WINDOW w1 AS (ORDER BY salary ROWS CURRENT ROW), w2 AS (w1)", }, + ]; + + run_binder_cases("binder_window_named.txt", &cases).await +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 1)] +async fn test_binder_qualify_paths() -> Result<()> { + let cases = [ SqlTestCase { - name: "unnest_over_aggregate_is_planned_after_aggregate", - description: "A set-returning function over an aggregate should stay above the aggregate phase instead of rewriting the aggregate away early.", - setup_sqls: &[], - sql: "SELECT unnest(max([11, 12]))", - }, - SqlTestCase { - name: "duplicate_srf_expression_reuses_project_set_binding", - description: "Repeated identical SRF expressions should reuse the registered ProjectSet binding.", - setup_sqls: &[], - sql: "SELECT unnest([1, 2, 3]), unnest([1, 2, 3])", + name: "qualify_cte_then_outer_aggregate_from_sqllogictest_binds", + description: "A sqllogictest pattern that filters with QUALIFY inside a CTE before an outer aggregate should still bind.", + setup_sqls: &["CREATE TABLE t(number UInt64)"], + sql: "WITH test AS (SELECT number % 10 AS id, number AS full_matched FROM t QUALIFY row_number() OVER (PARTITION BY id ORDER BY number DESC) = 1) SELECT full_matched, count() FROM test GROUP BY full_matched HAVING full_matched = 3", }, SqlTestCase { name: "qualify_named_window_with_subquery_binds", @@ -287,6 +400,26 @@ async fn test_binder_with_lite_table_context() -> Result<()> { setup_sqls: &["CREATE TABLE t(number UInt64)"], sql: "SELECT number % 2 AS a, sum(number) AS s, row_number() OVER (ORDER BY a) AS rn FROM t GROUP BY a QUALIFY s > 0", }, + ]; + + run_binder_cases("binder_qualify.txt", &cases).await +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 1)] +async fn test_binder_grouping_and_srf_paths() -> Result<()> { + let cases = [ + SqlTestCase { + name: "unnest_over_aggregate_is_planned_after_aggregate", + description: "A set-returning function over an aggregate should stay above the aggregate phase instead of rewriting the aggregate away early.", + setup_sqls: &[], + sql: "SELECT unnest(max([11, 12]))", + }, + SqlTestCase { + name: "duplicate_srf_expression_reuses_project_set_binding", + description: "Repeated identical SRF expressions should reuse the registered ProjectSet binding.", + setup_sqls: &[], + sql: "SELECT unnest([1, 2, 3]), unnest([1, 2, 3])", + }, SqlTestCase { name: "group_by_srf_alias_from_sqllogictest_binds", description: "A sqllogictest GROUP BY pattern that groups by an SRF select alias should bind successfully.", @@ -359,12 +492,6 @@ async fn test_binder_with_lite_table_context() -> Result<()> { setup_sqls: &["CREATE TABLE empsalary(depname String, salary UInt64)"], sql: "SELECT grouping(salary), grouping(depname), sum(grouping(salary)) OVER (PARTITION BY grouping(salary) + grouping(depname) ORDER BY grouping(depname) DESC) FROM empsalary GROUP BY ROLLUP(depname, salary)", }, - SqlTestCase { - name: "within_group_window_aggregate_binds", - description: "A WITHIN GROUP window aggregate should bind its sort descriptors without turning into a grouped aggregate.", - setup_sqls: &["CREATE TABLE empsalary(depname String, empno UInt64, salary UInt64)"], - sql: "SELECT listagg(cast(salary as varchar), '|') WITHIN GROUP (ORDER BY empno DESC) OVER (PARTITION BY depname ORDER BY empno) FROM empsalary", - }, SqlTestCase { name: "within_group_group_aggregate_binds", description: "A non-window WITHIN GROUP aggregate should register its sort descriptors in the aggregate phase.", @@ -373,10 +500,57 @@ async fn test_binder_with_lite_table_context() -> Result<()> { }, ]; - for case in &cases { - write_case_header(&mut file, case)?; - let outcome = bind_case(case).await?; - write_case_outcome(&mut file, &outcome)?; + run_binder_cases("binder_grouping.txt", &cases).await +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 1)] +async fn test_clause_prepass_skips_subquery_metadata_side_effects() -> Result<()> { + let cases = [ + SqlTestCase { + name: "having_subquery_prepass_metadata", + description: "", + setup_sqls: &["CREATE TABLE t(number UInt64)"], + sql: "SELECT sum(number) FROM t HAVING EXISTS (SELECT 1 FROM t AS inner_t WHERE inner_t.number > 0)", + }, + SqlTestCase { + name: "having_alias_and_subquery_prepass_metadata", + description: "", + setup_sqls: &["CREATE TABLE t(number UInt64)"], + sql: "SELECT sum(number) AS s FROM t HAVING s > 0 AND EXISTS (SELECT 1 FROM t AS inner_t WHERE inner_t.number > 0)", + }, + SqlTestCase { + name: "having_alias_to_subquery_prepass_metadata", + description: "", + setup_sqls: &["CREATE TABLE t(number UInt64)"], + sql: "SELECT (SELECT max(number) FROM t AS inner_t) AS s FROM t HAVING s > 0", + }, + SqlTestCase { + name: "order_by_subquery_prepass_metadata", + description: "", + setup_sqls: &["CREATE TABLE t(number UInt64)"], + sql: "SELECT number FROM t ORDER BY (SELECT max(number) FROM t AS inner_t)", + }, + SqlTestCase { + name: "order_by_alias_to_subquery_prepass_metadata", + description: "", + setup_sqls: &["CREATE TABLE t(number UInt64)"], + sql: "SELECT (SELECT max(number) FROM t AS inner_t) AS s FROM t ORDER BY s", + }, + ]; + + for case in cases { + let ctx = setup_context(&case).await?; + let plan = ctx.bind_sql(case.sql).await?; + let Plan::Query { metadata, .. } = plan else { + panic!("expected query plan for {}", case.name); + }; + + let table_count = metadata.read().tables().len(); + assert_eq!( + table_count, 2, + "{} should only keep metadata for the outer query and the final subquery bind", + case.name + ); } Ok(()) diff --git a/src/query/sql/tests/it/semantic/binder.txt b/src/query/sql/tests/it/semantic/binder.txt deleted file mode 100644 index a50d71ddb6c4b..0000000000000 --- a/src/query/sql/tests/it/semantic/binder.txt +++ /dev/null @@ -1,870 +0,0 @@ -=== simple_aggregate_query_binds === -description: A plain aggregate query should bind successfully. -sql: SELECT avg(number) FROM t -status: ok -EvalScalar -├── scalars: [divide(sum(number) (#1), if(eq(count(number) (#2), 0), 1, count(number) (#2))) AS (#3)] -└── Aggregate(Initial) - ├── group items: [] - ├── aggregate functions: [sum(t.number (#0)) AS (#1), count(t.number (#0)) AS (#2)] - └── EvalScalar - ├── scalars: [t.number (#0) AS (#0)] - └── Scan - ├── table: default.t (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== where_rejects_aggregate_alias === -description: An aggregate alias must still be rejected in WHERE. -sql: SELECT sum(number) AS s FROM t WHERE s > 0 -status: error -code: 1065 -message: Where clause can't contain aggregate or window functions - -=== where_accepts_scalar_alias === -description: A scalar alias should remain usable in WHERE. -sql: SELECT number + 1 AS s FROM t WHERE s > 1 -status: ok -EvalScalar -├── scalars: [plus(t.number (#0), 1) AS (#1)] -└── Filter - ├── filters: [gt(plus(t.number (#0), 1), 1)] - └── Scan - ├── table: default.t (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== where_alias_to_srf_uses_project_set_binding === -description: A WHERE clause that references an SRF alias should keep the alias bound to the ProjectSet column instead of expanding back to the raw SRF. -sql: SELECT unnest([1, 2, 3]) AS u WHERE u = 1 -status: ok -EvalScalar -├── scalars: [get(unnest([1, 2, 3]) (#0)) AS (#1)] -└── Filter - ├── filters: [eq(get(unnest([1, 2, 3]) (#0)), 1)] - └── ProjectSet(ProjectSet { srfs: [ScalarItem { scalar: FunctionCall(FunctionCall { span: Some(7..24), func_name: "unnest", params: [], arguments: [ConstantExpr(ConstantExpr { span: Some(14..23), value: Array(UInt8([1, 2, 3])) })] }), index: 0 }] }) - └── DummyTableScan(DummyTableScan { source_table_indexes: [] }) - - -=== where_rejects_udaf === -description: A UDAF in WHERE must be rejected like any other aggregate. -sql: SELECT a FROM t WHERE weighted_avg(a, b) > 0 -status: error -code: 1065 -message: Where clause can't contain aggregate or window functions - -=== qualify_rejects_aggregate_alias === -description: An aggregate alias must still be rejected in QUALIFY. -sql: SELECT sum(number) AS s FROM t QUALIFY s > 0 -status: error -code: 1065 -message: Qualify clause must not contain aggregate functions - -=== qualify_rejects_direct_aggregate === -description: A raw aggregate expression must be rejected directly in QUALIFY. -sql: SELECT number FROM t QUALIFY sum(number) > 0 -status: error -code: 1065 -message: Qualify clause must not contain aggregate functions - -=== qualify_rejects_udaf_alias === -description: A UDAF alias must still be rejected in QUALIFY. -sql: SELECT weighted_avg(a, b) AS s FROM t QUALIFY s > 0 -status: error -code: 1065 -message: Qualify clause must not contain aggregate functions - -=== qualify_accepts_window_alias === -description: A window alias should remain usable in QUALIFY. -sql: SELECT number, row_number() OVER (ORDER BY number) AS rn FROM t QUALIFY rn = 1 -status: ok -EvalScalar -├── scalars: [t.number (#0) AS (#0), row_number() OVER (ORDER BY number) (#1) AS (#1)] -└── Filter - ├── filters: [eq(row_number() OVER (ORDER BY number) (#1), 1)] - └── Window - ├── aggregate function: row_number - ├── partition items: [] - ├── order by items: [t.number (#0) AS (#0)] - ├── frame: [Range: Preceding(None) ~ CurrentRow] - └── Sort - ├── sort keys: [t.number (#0) ASC NULLS LAST] - ├── limit: [NONE] - └── EvalScalar - ├── scalars: [t.number (#0) AS (#0)] - └── Scan - ├── table: default.t (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== qualify_alias_to_srf_uses_project_set_binding === -description: A QUALIFY clause that references an SRF alias should keep the alias bound to the ProjectSet column instead of expanding back to the raw SRF. -sql: SELECT unnest([1, 2, 3]) AS u QUALIFY u = 1 -status: ok -EvalScalar -├── scalars: [get(unnest([1, 2, 3]) (#0)) AS (#1)] -└── Filter - ├── filters: [eq(get(unnest([1, 2, 3]) (#0)), 1)] - └── ProjectSet(ProjectSet { srfs: [ScalarItem { scalar: FunctionCall(FunctionCall { span: Some(7..24), func_name: "unnest", params: [], arguments: [ConstantExpr(ConstantExpr { span: Some(14..23), value: Array(UInt8([1, 2, 3])) })] }), index: 0 }] }) - └── DummyTableScan(DummyTableScan { source_table_indexes: [] }) - - -=== having_accepts_aggregate_alias === -description: An aggregate alias should remain usable in HAVING. -sql: SELECT sum(number) AS s FROM t HAVING s > 0 -status: ok -EvalScalar -├── scalars: [sum(number) (#1) AS (#1)] -└── Filter - ├── filters: [gt(sum(number) (#1), 0)] - └── Aggregate(Initial) - ├── group items: [] - ├── aggregate functions: [sum(t.number (#0)) AS (#1)] - └── EvalScalar - ├── scalars: [t.number (#0) AS (#0)] - └── Scan - ├── table: default.t (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== order_by_can_introduce_aggregate_in_aggregate_query === -description: ORDER BY may introduce a new aggregate expression when the query is already aggregated. -sql: SELECT count(*) FROM t ORDER BY sum(number) -status: ok -Sort -├── sort keys: [sum(number) (#2) ASC NULLS LAST] -├── limit: [NONE] -└── EvalScalar - ├── scalars: [COUNT(*) (#1) AS (#1), sum(number) (#2) AS (#2)] - └── Aggregate(Initial) - ├── group items: [] - ├── aggregate functions: [count() AS (#1), sum(t.number (#0)) AS (#2)] - └── EvalScalar - ├── scalars: [t.number (#0) AS (#0)] - └── Scan - ├── table: default.t (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== order_by_aggregate_does_not_make_scalar_projection_valid === -description: Introducing an aggregate in ORDER BY must not make a non-aggregated SELECT list valid. -sql: SELECT number FROM t ORDER BY sum(number) -status: error -code: 1065 -message: column "number" must appear in the GROUP BY clause or be used in an aggregate function - -=== order_by_count_does_not_make_scalar_projection_valid === -description: The sqllogictest ORDER BY count(*) pattern must still reject a scalar projection. -sql: SELECT number FROM t ORDER BY count(*) + 1 -status: error -code: 1065 -message: column "number" must appear in the GROUP BY clause or be used in an aggregate function - -=== aggregate_argument_prefers_base_column_over_select_alias === -description: Inside an aggregate function, a same-name select alias should not shadow the base column. -sql: SELECT a AS c2, sum(c2) FROM t GROUP BY a -status: ok -EvalScalar -├── scalars: [t.a (#0) AS (#0), sum(c2) (#2) AS (#2)] -└── Aggregate(Initial) - ├── group items: [t.a (#0) AS (#0)] - ├── aggregate functions: [sum(t.c2 (#1)) AS (#2)] - └── EvalScalar - ├── scalars: [t.a (#0) AS (#0), t.c2 (#1) AS (#1)] - └── Scan - ├── table: default.t (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== aggregate_argument_can_fallback_to_select_alias_in_select_clause === -description: Inside the SELECT list, an aggregate argument should still fall back to a same-select alias when no base column exists. -sql: SELECT number % 3 AS c1, sum(c1) FROM t GROUP BY number % 3 -status: ok -EvalScalar -├── scalars: [sum(c1) (#2) AS (#2), number % 3 (#1) AS (#3)] -└── Aggregate(Initial) - ├── group items: [modulo(t.number (#0), 3) AS (#1)] - ├── aggregate functions: [sum(number % 3 (#1)) AS (#2)] - └── EvalScalar - ├── scalars: [modulo(t.number (#0), 3) AS (#1)] - └── Scan - ├── table: default.t (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== window_aggregate_does_not_become_group_aggregate === -description: An aggregate used as a window function should stay in the window phase rather than becoming a group aggregate. -sql: SELECT sum(number) OVER () FROM t -status: ok -EvalScalar -├── scalars: [sum(number) OVER () (#1) AS (#1)] -└── Window - ├── aggregate function: sum - ├── partition items: [] - ├── order by items: [] - ├── frame: [Range: Preceding(None) ~ Following(None)] - └── EvalScalar - ├── scalars: [t.number (#0) AS (#0)] - └── Scan - ├── table: default.t (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== window_partition_rejects_new_aggregate === -description: A window PARTITION BY clause must not introduce a new aggregate expression. -sql: SELECT row_number() OVER (PARTITION BY sum(number)) FROM t -status: error -code: 1065 -message: Window specification and arguments cannot contain aggregate functions - -=== window_order_rejects_new_aggregate === -description: A window ORDER BY clause must not introduce a new aggregate expression. -sql: SELECT row_number() OVER (ORDER BY sum(number)) FROM t -status: error -code: 1065 -message: Window specification and arguments cannot contain aggregate functions - -=== window_order_rejects_window_alias_expansion === -description: A window ORDER BY clause must still reject aliases that expand to a prior window expression. -sql: SELECT row_number() OVER () AS rn, row_number() OVER (ORDER BY rn) FROM t -status: error -code: 1065 -message: Window function cannot contain another window function - -=== duplicate_window_expression_reuses_window_binding === -description: Repeated identical window expressions should reuse the registered window binding. -sql: SELECT row_number() OVER (ORDER BY number), row_number() OVER (ORDER BY number) FROM t -status: ok -EvalScalar -├── scalars: [row_number() OVER (ORDER BY number) (#1) AS (#1)] -└── Window - ├── aggregate function: row_number - ├── partition items: [] - ├── order by items: [t.number (#0) AS (#0)] - ├── frame: [Range: Preceding(None) ~ CurrentRow] - └── Sort - ├── sort keys: [t.number (#0) ASC NULLS LAST] - ├── limit: [NONE] - └── EvalScalar - ├── scalars: [t.number (#0) AS (#0)] - └── Scan - ├── table: default.t (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== laglead_window_from_sqllogictest_binds === -description: A sqllogictest LEAD window pattern should still bind through the lag/lead rewrite path. -sql: SELECT lead(number, 1, 0) OVER (PARTITION BY number % 3 ORDER BY number + 1) FROM t -status: ok -EvalScalar -├── scalars: [lead(number, 1, 0) OVER (PARTITION BY number % 3 ORDER BY number + 1) (#4) AS (#4)] -└── Window - ├── aggregate function: lead - ├── partition items: [modulo(t.number (#0), 3) AS (#2)] - ├── order by items: [plus(t.number (#0), 1) AS (#3)] - ├── frame: [Rows: Following(Some(Number(1_u64))) ~ Following(Some(Number(1_u64)))] - └── Sort - ├── sort keys: [lead_part_0 (#2) ASC NULLS LAST, lead_order_0 (#3) ASC NULLS LAST] - ├── limit: [NONE] - ├── window top: NONE - ├── window function: LagLead(LagLeadFunction { is_lag: false, arg: BoundColumnRef(BoundColumnRef { span: Some(12..18), column: ColumnBinding { database_name: Some("default"), table_name: Some("t"), column_position: Some(1), table_index: Some(0), column_name: "number", column_name_lower: None, index: 0, data_type: Nullable(Number(UInt64)), visibility: Visible, virtual_expr: None, is_srf: false } }), offset: 1, default: Some(BoundColumnRef(BoundColumnRef { span: Some(23..24), column: ColumnBinding { database_name: None, table_name: None, column_position: None, table_index: None, column_name: "lead_default_value", column_name_lower: None, index: 1, data_type: Nullable(Number(UInt64)), visibility: Visible, virtual_expr: None, is_srf: false } })), return_type: Nullable(Number(UInt64)) }) - └── EvalScalar - ├── scalars: [t.number (#0) AS (#0), CAST(0 AS UInt64 NULL) AS (#1), modulo(t.number (#0), 3) AS (#2), plus(t.number (#0), 1) AS (#3)] - └── Scan - ├── table: default.t (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== nth_value_window_binds === -description: An NTH_VALUE window expression should still bind through the dedicated nth_value rewrite path. -sql: SELECT nth_value(number, 2) OVER (ORDER BY number) FROM t -status: ok -EvalScalar -├── scalars: [nth_value(number, 2) OVER (ORDER BY number) (#1) AS (#1)] -└── Window - ├── aggregate function: nth_value - ├── partition items: [] - ├── order by items: [t.number (#0) AS (#0)] - ├── frame: [Range: Preceding(None) ~ CurrentRow] - └── Sort - ├── sort keys: [t.number (#0) ASC NULLS LAST] - ├── limit: [NONE] - └── EvalScalar - ├── scalars: [t.number (#0) AS (#0), t.number (#0) AS (#0)] - └── Scan - ├── table: default.t (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== window_partition_rewrites_group_item_expression === -description: A window partition expression over grouped aliases should rewrite non-column group items back to their group-item columns. -sql: SELECT number % 3 AS a, number % 4 AS b, row_number() OVER (PARTITION BY b % 2 ORDER BY a) FROM t GROUP BY a, b -status: ok -EvalScalar -├── scalars: [row_number() OVER (PARTITION BY b % 2 ORDER BY a) (#5) AS (#5), a (#1) AS (#6), b (#2) AS (#7)] -└── Window - ├── aggregate function: row_number - ├── partition items: [modulo(b (#2), 2) AS (#3)] - ├── order by items: [a (#1) AS (#4)] - ├── frame: [Range: Preceding(None) ~ CurrentRow] - └── Sort - ├── sort keys: [row_number_part_0 (#3) ASC NULLS LAST, row_number_order_0 (#4) ASC NULLS LAST] - ├── limit: [NONE] - ├── window top: NONE - ├── window function: RowNumber - └── EvalScalar - ├── scalars: [modulo(b (#2), 2) AS (#3), a (#1) AS (#4)] - └── Aggregate(Initial) - ├── group items: [modulo(t.number (#0), 3) AS (#1), modulo(t.number (#0), 4) AS (#2)] - ├── aggregate functions: [] - └── EvalScalar - ├── scalars: [modulo(t.number (#0), 3) AS (#1), modulo(t.number (#0), 4) AS (#2)] - └── Scan - ├── table: default.t (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== qualify_cte_then_outer_aggregate_from_sqllogictest_binds === -description: A sqllogictest pattern that filters with QUALIFY inside a CTE before an outer aggregate should still bind. -sql: WITH test AS (SELECT number % 10 AS id, number AS full_matched FROM t QUALIFY row_number() OVER (PARTITION BY id ORDER BY number DESC) = 1) SELECT full_matched, count() FROM test GROUP BY full_matched HAVING full_matched = 3 -status: ok -EvalScalar -├── scalars: [t.number (#0) AS (#0), count() (#4) AS (#4)] -└── Filter - ├── filters: [eq(t.number (#0), 3)] - └── Aggregate(Initial) - ├── group items: [t.number (#0) AS (#0)] - ├── aggregate functions: [count() AS (#4)] - └── EvalScalar - ├── scalars: [t.number (#0) AS (#0)] - └── EvalScalar - ├── scalars: [t.number (#0) AS (#0), modulo(t.number (#0), 10) AS (#1)] - └── Filter - ├── filters: [eq(row_number() OVER (PARTITION BY id ORDER BY number DESC) (#3), 1)] - └── Window - ├── aggregate function: row_number - ├── partition items: [modulo(t.number (#0), 10) AS (#2)] - ├── order by items: [t.number (#0) AS (#0) DESC] - ├── frame: [Range: Preceding(None) ~ CurrentRow] - └── Sort - ├── sort keys: [row_number_part_0 (#2) ASC NULLS LAST, t.number (#0) DESC NULLS LAST] - ├── limit: [NONE] - ├── window top: NONE - ├── window function: RowNumber - └── EvalScalar - ├── scalars: [t.number (#0) AS (#0), modulo(t.number (#0), 10) AS (#2)] - └── Scan - ├── table: default.t (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== named_window_from_sqllogictest_binds === -description: A named WINDOW clause from sqllogictests should bind as a normal window specification. -sql: SELECT depname, empno, salary, sum(salary) OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY empno) -status: ok -EvalScalar -├── scalars: [empsalary.depname (#0) AS (#0), empsalary.empno (#1) AS (#1), empsalary.salary (#2) AS (#2), sum(salary) OVER w (#3) AS (#3)] -└── Window - ├── aggregate function: sum - ├── partition items: [empsalary.depname (#0) AS (#0)] - ├── order by items: [empsalary.empno (#1) AS (#1)] - ├── frame: [Range: Preceding(None) ~ CurrentRow] - └── Sort - ├── sort keys: [empsalary.depname (#0) ASC NULLS LAST, empsalary.empno (#1) ASC NULLS LAST] - ├── limit: [NONE] - ├── window top: NONE - ├── window function: Aggregate(AggregateFunction { span: Some(31..49), func_name: "sum", distinct: false, params: [], args: [BoundColumnRef(BoundColumnRef { span: Some(35..41), column: ColumnBinding { database_name: Some("default"), table_name: Some("empsalary"), column_position: Some(3), table_index: Some(0), column_name: "salary", column_name_lower: None, index: 2, data_type: Nullable(Number(UInt64)), visibility: Visible, virtual_expr: None, is_srf: false } })], return_type: Nullable(Number(UInt64)), sort_descs: [], display_name: "sum(salary) OVER w" }) - └── EvalScalar - ├── scalars: [empsalary.depname (#0) AS (#0), empsalary.empno (#1) AS (#1), empsalary.salary (#2) AS (#2)] - └── Scan - ├── table: default.empsalary (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== named_window_aggregate_order_by_existing_group_aggregate_binds === -description: A grouped query should be able to introduce an aggregate inside a named window clause and reuse it across the window aggregate and ORDER BY. -sql: SELECT depname, sum(sum(salary)) OVER w FROM empsalary GROUP BY depname WINDOW w AS (PARTITION BY 1 ORDER BY sum(salary)) -status: ok -EvalScalar -├── scalars: [empsalary.depname (#0) AS (#0), sum(sum(salary)) OVER w (#4) AS (#4)] -└── Window - ├── aggregate function: sum - ├── partition items: [1 AS (#3)] - ├── order by items: [sum(salary) (#2) AS (#2)] - ├── frame: [Range: Preceding(None) ~ CurrentRow] - └── Sort - ├── sort keys: [sum_part_0 (#3) ASC NULLS LAST, sum(salary) (#2) ASC NULLS LAST] - ├── limit: [NONE] - ├── window top: NONE - ├── window function: Aggregate(AggregateFunction { span: Some(16..39), func_name: "sum", distinct: false, params: [], args: [BoundColumnRef(BoundColumnRef { span: None, column: ColumnBinding { database_name: None, table_name: None, column_position: None, table_index: None, column_name: "sum(salary)", column_name_lower: None, index: 2, data_type: Nullable(Number(UInt64)), visibility: Visible, virtual_expr: None, is_srf: false } })], return_type: Nullable(Number(UInt64)), sort_descs: [], display_name: "sum(sum(salary)) OVER w" }) - └── EvalScalar - ├── scalars: [sum(salary) (#2) AS (#2), sum(salary) (#2) AS (#2), 1 AS (#3)] - └── Aggregate(Initial) - ├── group items: [empsalary.depname (#0) AS (#0)] - ├── aggregate functions: [sum(empsalary.salary (#1)) AS (#2)] - └── EvalScalar - ├── scalars: [empsalary.depname (#0) AS (#0), empsalary.salary (#1) AS (#1)] - └── Scan - ├── table: default.empsalary (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== inherited_named_window_from_sqllogictest_binds === -description: An inherited named WINDOW specification should bind without losing the base partition spec. -sql: SELECT rank() OVER w1, dense_rank() OVER w2 FROM empsalary WINDOW w1 AS (PARTITION BY depname), w2 AS (w1 ORDER BY salary DESC) -status: ok -EvalScalar -├── scalars: [rank() OVER w1 (#2) AS (#2), dense_rank() OVER w2 (#3) AS (#3)] -└── Window - ├── aggregate function: rank - ├── partition items: [empsalary.depname (#0) AS (#0)] - ├── order by items: [] - ├── frame: [Range: Preceding(None) ~ Following(None)] - └── Sort - ├── sort keys: [empsalary.depname (#0) ASC NULLS LAST] - ├── limit: [NONE] - ├── window top: NONE - ├── window function: Rank - └── EvalScalar - ├── scalars: [empsalary.depname (#0) AS (#0)] - └── Window - ├── aggregate function: dense_rank - ├── partition items: [empsalary.depname (#0) AS (#0)] - ├── order by items: [empsalary.salary (#1) AS (#1) DESC] - ├── frame: [Range: Preceding(None) ~ CurrentRow] - └── Sort - ├── sort keys: [empsalary.depname (#0) ASC NULLS LAST, empsalary.salary (#1) DESC NULLS LAST] - ├── limit: [NONE] - ├── window top: NONE - ├── window function: DenseRank - └── EvalScalar - ├── scalars: [empsalary.depname (#0) AS (#0), empsalary.salary (#1) AS (#1)] - └── Scan - ├── table: default.empsalary (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== recursive_named_window_chain_binds === -description: A recursive chain of named WINDOW references should resolve inherited partition and order specs. -sql: SELECT rank() OVER w3 FROM empsalary WINDOW w1 AS (PARTITION BY depname ORDER BY salary), w2 AS (w1), w3 AS (w2) -status: ok -EvalScalar -├── scalars: [rank() OVER w3 (#2) AS (#2)] -└── Window - ├── aggregate function: rank - ├── partition items: [empsalary.depname (#0) AS (#0)] - ├── order by items: [empsalary.salary (#1) AS (#1)] - ├── frame: [Range: Preceding(None) ~ CurrentRow] - └── Sort - ├── sort keys: [empsalary.depname (#0) ASC NULLS LAST, empsalary.salary (#1) ASC NULLS LAST] - ├── limit: [NONE] - ├── window top: NONE - ├── window function: Rank - └── EvalScalar - ├── scalars: [empsalary.depname (#0) AS (#0), empsalary.salary (#1) AS (#1)] - └── Scan - ├── table: default.empsalary (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== inherited_named_window_rejects_partition_override === -description: Referencing a named window must not add a new PARTITION BY clause. -sql: SELECT rank() OVER w2 FROM empsalary WINDOW w1 AS (ORDER BY salary), w2 AS (w1 PARTITION BY depname) -status: error -code: 1065 -message: WINDOW specification with named WINDOW reference cannot specify PARTITION BY - -=== inherited_named_window_rejects_duplicate_order_by === -description: Referencing a named window with ORDER BY must not specify another ORDER BY. -sql: SELECT rank() OVER w2 FROM empsalary WINDOW w1 AS (ORDER BY salary), w2 AS (w1 ORDER BY depname) -status: error -code: 1065 -message: Cannot specify ORDER BY if referenced named WINDOW specifies ORDER BY - -=== inherited_named_window_rejects_base_frame === -description: Referencing a named window that already contains a frame specification must be rejected. -sql: SELECT sum(salary) OVER w2 FROM empsalary WINDOW w1 AS (ORDER BY salary ROWS CURRENT ROW), w2 AS (w1) -status: error -code: 1065 -message: Cannot reference named WINDOW containing frame specification - -=== unnest_over_aggregate_is_planned_after_aggregate === -description: A set-returning function over an aggregate should stay above the aggregate phase instead of rewriting the aggregate away early. -sql: SELECT unnest(max([11, 12])) -status: ok -EvalScalar -├── scalars: [get(unnest(max([11, 12])) (#2)) AS (#3)] -└── ProjectSet(ProjectSet { srfs: [ScalarItem { scalar: FunctionCall(FunctionCall { span: Some(7..28), func_name: "unnest", params: [], arguments: [BoundColumnRef(BoundColumnRef { span: Some(14..27), column: ColumnBinding { database_name: None, table_name: None, column_position: None, table_index: None, column_name: "max([11, 12])", column_name_lower: None, index: 1, data_type: Nullable(Array(Number(UInt8))), visibility: InVisible, virtual_expr: None, is_srf: false } })] }), index: 2 }] }) - └── Aggregate(Initial) - ├── group items: [] - ├── aggregate functions: [max(max_arg_0 (#0)) AS (#1)] - └── EvalScalar - ├── scalars: [[11, 12] AS (#0)] - └── DummyTableScan(DummyTableScan { source_table_indexes: [] }) - - -=== duplicate_srf_expression_reuses_project_set_binding === -description: Repeated identical SRF expressions should reuse the registered ProjectSet binding. -sql: SELECT unnest([1, 2, 3]), unnest([1, 2, 3]) -status: ok -EvalScalar -├── scalars: [get(unnest([1, 2, 3]) (#0)) AS (#1), get(unnest([1, 2, 3]) (#0)) AS (#2)] -└── ProjectSet(ProjectSet { srfs: [ScalarItem { scalar: FunctionCall(FunctionCall { span: Some(7..24), func_name: "unnest", params: [], arguments: [ConstantExpr(ConstantExpr { span: Some(14..23), value: Array(UInt8([1, 2, 3])) })] }), index: 0 }] }) - └── DummyTableScan(DummyTableScan { source_table_indexes: [] }) - - -=== qualify_named_window_with_subquery_binds === -description: QUALIFY should preserve subqueries while checking named window aliases. -sql: SELECT i, p, o, row_number() OVER w AS rn FROM qt WINDOW w AS (PARTITION BY p ORDER BY o) QUALIFY rn = (SELECT i FROM qt LIMIT 1) -status: ok -EvalScalar -├── scalars: [qt.i (#0) AS (#0), qt.p (#1) AS (#1), qt.o (#2) AS (#2), row_number() OVER w (#3) AS (#3)] -└── Filter - ├── filters: [eq(row_number() OVER w (#3), SUBQUERY AS (#4))] - ├── subquerys - │ └── Subquery (Scalar) - │ ├── output_column: qt.i (#4) - │ └── Limit - │ ├── limit: [1] - │ ├── offset: [0] - │ └── EvalScalar - │ ├── scalars: [qt.i (#4) AS (#4)] - │ └── Scan - │ ├── table: default.qt (#1) - │ ├── filters: [] - │ ├── order by: [] - │ └── limit: NONE - └── Window - ├── aggregate function: row_number - ├── partition items: [qt.p (#1) AS (#1)] - ├── order by items: [qt.o (#2) AS (#2)] - ├── frame: [Range: Preceding(None) ~ CurrentRow] - └── Sort - ├── sort keys: [qt.p (#1) ASC NULLS LAST, qt.o (#2) ASC NULLS LAST] - ├── limit: [NONE] - ├── window top: NONE - ├── window function: RowNumber - └── EvalScalar - ├── scalars: [qt.p (#1) AS (#1), qt.o (#2) AS (#2)] - └── Scan - ├── table: default.qt (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== qualify_grouping_context_uses_grouping_checker === -description: QUALIFY in a grouped query should still accept grouped aliases while binding the window phase. -sql: SELECT number % 2 AS a, row_number() OVER (ORDER BY a) AS rn FROM t GROUP BY a QUALIFY a = 1 -status: ok -EvalScalar -├── scalars: [row_number() OVER (ORDER BY a) (#3) AS (#3), a (#1) AS (#4)] -└── Filter - ├── filters: [eq(a (#1), 1)] - └── Window - ├── aggregate function: row_number - ├── partition items: [] - ├── order by items: [a (#1) AS (#2)] - ├── frame: [Range: Preceding(None) ~ CurrentRow] - └── Sort - ├── sort keys: [row_number_order_0 (#2) ASC NULLS LAST] - ├── limit: [NONE] - └── EvalScalar - ├── scalars: [a (#1) AS (#2)] - └── Aggregate(Initial) - ├── group items: [modulo(t.number (#0), 2) AS (#1)] - ├── aggregate functions: [] - └── EvalScalar - ├── scalars: [modulo(t.number (#0), 2) AS (#1)] - └── Scan - ├── table: default.t (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== qualify_grouping_context_rejects_aggregate_alias === -description: QUALIFY in a grouped query must still reject aggregate aliases while using grouping-aware binding. -sql: SELECT number % 2 AS a, sum(number) AS s, row_number() OVER (ORDER BY a) AS rn FROM t GROUP BY a QUALIFY s > 0 -status: error -code: 1065 -message: Qualify clause must not contain aggregate functions - -=== group_by_srf_alias_from_sqllogictest_binds === -description: A sqllogictest GROUP BY pattern that groups by an SRF select alias should bind successfully. -sql: SELECT t.col1 AS col1, unnest(split(t.col2, ',')) AS col3 FROM t_str AS t GROUP BY col1, col3 ORDER BY col3 -status: ok -Sort -├── sort keys: [col3 (#4) ASC NULLS LAST] -├── limit: [NONE] -└── EvalScalar - ├── scalars: [t_str.col1 (#0) AS (#0), col3 (#3) AS (#4)] - └── Aggregate(Initial) - ├── group items: [t_str.col1 (#0) AS (#0), get(unnest(split(t.col2 (#1), ',')) (#2)) AS (#3)] - ├── aggregate functions: [] - └── EvalScalar - ├── scalars: [t_str.col1 (#0) AS (#0), get(unnest(split(t.col2 (#1), ',')) (#2)) AS (#3)] - └── ProjectSet(ProjectSet { srfs: [ScalarItem { scalar: FunctionCall(FunctionCall { span: Some(23..49), func_name: "unnest", params: [], arguments: [FunctionCall(FunctionCall { span: Some(30..48), func_name: "split", params: [], arguments: [BoundColumnRef(BoundColumnRef { span: Some(36..37), column: ColumnBinding { database_name: None, table_name: Some("t"), column_position: Some(2), table_index: Some(0), column_name: "col2", column_name_lower: None, index: 1, data_type: Nullable(String), visibility: Visible, virtual_expr: None, is_srf: false } }), ConstantExpr(ConstantExpr { span: Some(44..47), value: String(",") })] })] }), index: 2 }] }) - └── Scan - ├── table: default.t_str (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== group_by_all_collects_non_aggregate_select_items === -description: GROUP BY ALL should expand to the non-aggregate SELECT items only. -sql: SELECT number % 2 AS a, sum(number) FROM t GROUP BY ALL -status: ok -EvalScalar -├── scalars: [sum(number) (#2) AS (#2), a (#1) AS (#3)] -└── Aggregate(Initial) - ├── group items: [modulo(t.number (#0), 2) AS (#1)] - ├── aggregate functions: [sum(t.number (#0)) AS (#2)] - └── EvalScalar - ├── scalars: [t.number (#0) AS (#0), modulo(t.number (#0), 2) AS (#1)] - └── Scan - ├── table: default.t (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== grouped_select_udaf_binds === -description: A grouped SELECT should rewrite UDAF output through the aggregate path like builtin aggregates. -sql: SELECT a % 2 AS g, weighted_avg(a, b) FROM t GROUP BY g -status: ok -EvalScalar -├── scalars: [weighted_avg(a, b) (#5) AS (#5), g (#2) AS (#6)] -└── Aggregate(Initial) - ├── group items: [modulo(t.a (#0), 2) AS (#2)] - ├── aggregate functions: [weighted_avg(a, b) AS (#5)] - └── EvalScalar - ├── scalars: [modulo(t.a (#0), 2) AS (#2), CAST(t.a (#0) AS Int32 NULL) AS (#3), CAST(t.b (#1) AS Int32 NULL) AS (#4)] - └── Scan - ├── table: default.t (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== group_by_all_collects_non_udaf_select_items === -description: GROUP BY ALL should also skip UDAF select items when inferring grouping keys. -sql: SELECT a % 2 AS g, weighted_avg(a, b) FROM t GROUP BY ALL -status: ok -EvalScalar -├── scalars: [weighted_avg(a, b) (#5) AS (#5), g (#2) AS (#6)] -└── Aggregate(Initial) - ├── group items: [modulo(t.a (#0), 2) AS (#2)] - ├── aggregate functions: [weighted_avg(a, b) AS (#5)] - └── EvalScalar - ├── scalars: [modulo(t.a (#0), 2) AS (#2), CAST(t.a (#0) AS Int32 NULL) AS (#3), CAST(t.b (#1) AS Int32 NULL) AS (#4)] - └── Scan - ├── table: default.t (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== group_by_rejects_udaf_item === -description: Explicit GROUP BY items must reject UDAF calls just like builtin aggregates. -sql: SELECT weighted_avg(a, b) FROM t GROUP BY weighted_avg(a, b) -status: error -code: 1065 -message: GROUP BY items can't contain aggregate functions or window functions: weighted_avg(a, b) - -=== combined_grouping_sets_binds === -description: A normal GROUP BY item combined with GROUPING SETS should bind through the combined-group expansion path. -sql: SELECT quantity, brand, segment, sum(quantity) FROM sales GROUP BY brand, GROUPING SETS(segment, quantity) -status: ok -EvalScalar -├── scalars: [sales.brand (#0) AS (#0), sales.segment (#1) AS (#1), sales.quantity (#2) AS (#2), sum(quantity) (#7) AS (#7)] -└── Aggregate(Initial) - ├── group items: [sales.brand (#0) AS (#0), sales.segment (#1) AS (#1), sales.quantity (#2) AS (#2), _grouping_id (#6) AS (#6)] - ├── aggregate functions: [sum(sales.quantity (#2)) AS (#7)] - └── EvalScalar - ├── scalars: [sales.brand (#0) AS (#0), sales.segment (#1) AS (#1), sales.quantity (#2) AS (#2)] - └── Scan - ├── table: default.sales (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== grouping_outside_grouping_sets_is_rejected === -description: grouping() should still be rejected when the query is not using grouping sets semantics. -sql: SELECT a, grouping(a) FROM g GROUP BY a -status: error -code: 1065 -message: grouping can only be called in GROUP BY GROUPING SETS clauses - -=== grouping_rejects_non_group_item_argument === -description: grouping() arguments must still be actual GROUP BY expressions inside grouping sets. -sql: SELECT a, grouping(c) FROM g GROUP BY GROUPING SETS ((a), ()) -status: error -code: 1006 -message: Arguments of grouping should be group by expressions - -=== cube_grouping_function_binds === -description: CUBE should expand into grouping sets and allow grouping(...) to bind against the generated grouping id. -sql: SELECT a, b, sum(c) AS sc, grouping(a, b) FROM g GROUP BY CUBE(a, b) -status: ok -EvalScalar -├── scalars: [g.a (#0) AS (#0), g.b (#1) AS (#1), sum(c) (#6) AS (#6), grouping(_grouping_id (#5)) AS (#7)] -└── Aggregate(Initial) - ├── group items: [g.a (#0) AS (#0), g.b (#1) AS (#1), _grouping_id (#5) AS (#5)] - ├── aggregate functions: [sum(g.c (#2)) AS (#6)] - └── EvalScalar - ├── scalars: [g.a (#0) AS (#0), g.b (#1) AS (#1), g.c (#2) AS (#2)] - └── Scan - ├── table: default.g (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== aggregate_over_srf_from_sqllogictest_binds === -description: A sqllogictest aggregate-over-SRF pattern should still bind with ProjectSet below Aggregate. -sql: SELECT max(unnest(split(t.col2, ','))) FROM t_str AS t -status: ok -EvalScalar -├── scalars: [max(unnest(split(t.col2, ','))) (#3) AS (#3)] -└── Aggregate(Initial) - ├── group items: [] - ├── aggregate functions: [max(max_arg_0 (#2)) AS (#3)] - └── EvalScalar - ├── scalars: [get(unnest(split(t.col2 (#0), ',')) (#1)) AS (#2)] - └── ProjectSet(ProjectSet { srfs: [ScalarItem { scalar: FunctionCall(FunctionCall { span: Some(11..37), func_name: "unnest", params: [], arguments: [FunctionCall(FunctionCall { span: Some(18..36), func_name: "split", params: [], arguments: [BoundColumnRef(BoundColumnRef { span: Some(24..25), column: ColumnBinding { database_name: None, table_name: Some("t"), column_position: Some(1), table_index: Some(0), column_name: "col2", column_name_lower: None, index: 0, data_type: Nullable(String), visibility: Visible, virtual_expr: None, is_srf: false } }), ConstantExpr(ConstantExpr { span: Some(32..35), value: String(",") })] })] }), index: 1 }] }) - └── Scan - ├── table: default.t_str (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== unnest_over_wrapped_aggregate_from_sqllogictest_binds === -description: A sqllogictest SRF-over-aggregate pattern with an extra scalar wrapper should still bind with ProjectSet above Aggregate. -sql: SELECT unnest(split(max(t.col2), ',')) FROM t_str AS t -status: ok -EvalScalar -├── scalars: [get(unnest(split(max(t.col2), ',')) (#2)) AS (#3)] -└── ProjectSet(ProjectSet { srfs: [ScalarItem { scalar: FunctionCall(FunctionCall { span: Some(7..38), func_name: "unnest", params: [], arguments: [FunctionCall(FunctionCall { span: Some(14..37), func_name: "split", params: [], arguments: [BoundColumnRef(BoundColumnRef { span: Some(20..31), column: ColumnBinding { database_name: None, table_name: None, column_position: None, table_index: None, column_name: "max(t.col2)", column_name_lower: None, index: 1, data_type: Nullable(String), visibility: InVisible, virtual_expr: None, is_srf: false } }), ConstantExpr(ConstantExpr { span: Some(33..36), value: String(",") })] })] }), index: 2 }] }) - └── Aggregate(Initial) - ├── group items: [] - ├── aggregate functions: [max(t_str.col2 (#0)) AS (#1)] - └── EvalScalar - ├── scalars: [t_str.col2 (#0) AS (#0)] - └── Scan - ├── table: default.t_str (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== grouping_function_inside_window_over_rollup_binds === -description: A sqllogictest grouping() pattern should still rewrite correctly when used inside a window over rollup output. -sql: SELECT grouping(salary), grouping(depname), sum(grouping(salary)) OVER (PARTITION BY grouping(salary) + grouping(depname) ORDER BY grouping(depname) DESC) FROM empsalary GROUP BY ROLLUP(depname, salary) -status: ok -EvalScalar -├── scalars: [sum(grouping(salary)) OVER (PARTITION BY grouping(salary) + grouping(depname) ORDER BY grouping(depname) DESC) (#8) AS (#8), grouping(_grouping_id (#4)) AS (#9), grouping(_grouping_id (#4)) AS (#10)] -└── Window - ├── aggregate function: sum - ├── partition items: [plus(grouping(_grouping_id (#4)), grouping(_grouping_id (#4))) AS (#6)] - ├── order by items: [grouping(_grouping_id (#4)) AS (#7) DESC] - ├── frame: [Range: Preceding(None) ~ CurrentRow] - └── Sort - ├── sort keys: [sum_part_0 (#6) ASC NULLS LAST, sum_order_0 (#7) DESC NULLS LAST] - ├── limit: [NONE] - ├── window top: NONE - ├── window function: Aggregate(AggregateFunction { span: Some(44..154), func_name: "sum", distinct: false, params: [], args: [BoundColumnRef(BoundColumnRef { span: Some(48..64), column: ColumnBinding { database_name: None, table_name: None, column_position: None, table_index: None, column_name: "sum_arg_0", column_name_lower: None, index: 5, data_type: Number(UInt32), visibility: Visible, virtual_expr: None, is_srf: false } })], return_type: Nullable(Number(UInt64)), sort_descs: [], display_name: "sum(grouping(salary)) OVER (PARTITION BY grouping(salary) + grouping(depname) ORDER BY grouping(depname) DESC)" }) - └── EvalScalar - ├── scalars: [grouping(_grouping_id (#4)) AS (#5), plus(grouping(_grouping_id (#4)), grouping(_grouping_id (#4))) AS (#6), grouping(_grouping_id (#4)) AS (#7)] - └── Aggregate(Initial) - ├── group items: [empsalary.depname (#0) AS (#0), empsalary.salary (#1) AS (#1), _grouping_id (#4) AS (#4)] - ├── aggregate functions: [] - └── EvalScalar - ├── scalars: [empsalary.depname (#0) AS (#0), empsalary.salary (#1) AS (#1)] - └── Scan - ├── table: default.empsalary (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== within_group_window_aggregate_binds === -description: A WITHIN GROUP window aggregate should bind its sort descriptors without turning into a grouped aggregate. -sql: SELECT listagg(cast(salary as varchar), '|') WITHIN GROUP (ORDER BY empno DESC) OVER (PARTITION BY depname ORDER BY empno) FROM empsalary -status: ok -EvalScalar -├── scalars: [listagg(CAST(salary AS STRING), '|') WITHIN GROUP ( ORDER BY empno DESC ) OVER (PARTITION BY depname ORDER BY empno) (#4) AS (#4)] -└── Window - ├── aggregate function: listagg - ├── partition items: [empsalary.depname (#0) AS (#0)] - ├── order by items: [empsalary.empno (#1) AS (#1)] - ├── frame: [Range: Preceding(None) ~ CurrentRow] - └── Sort - ├── sort keys: [empsalary.depname (#0) ASC NULLS LAST, empsalary.empno (#1) ASC NULLS LAST] - ├── limit: [NONE] - ├── window top: NONE - ├── window function: Aggregate(AggregateFunction { span: Some(7..122), func_name: "listagg", distinct: false, params: [String("|")], args: [BoundColumnRef(BoundColumnRef { span: Some(20..26), column: ColumnBinding { database_name: None, table_name: None, column_position: None, table_index: None, column_name: "listagg_arg_0", column_name_lower: None, index: 3, data_type: Nullable(String), visibility: Visible, virtual_expr: None, is_srf: false } })], return_type: Nullable(String), sort_descs: [AggregateFunctionScalarSortDesc { expr: BoundColumnRef(BoundColumnRef { span: Some(68..73), column: ColumnBinding { database_name: Some("default"), table_name: Some("empsalary"), column_position: Some(2), table_index: Some(0), column_name: "empno", column_name_lower: None, index: 1, data_type: Nullable(Number(UInt64)), visibility: Visible, virtual_expr: None, is_srf: false } }), is_reuse_index: false, nulls_first: false, asc: false }], display_name: "listagg(CAST(salary AS STRING), '|') WITHIN GROUP ( ORDER BY empno DESC ) OVER (PARTITION BY depname ORDER BY empno)" }) - └── EvalScalar - ├── scalars: [empsalary.depname (#0) AS (#0), empsalary.empno (#1) AS (#1), empsalary.empno (#1) AS (#1), CAST(empsalary.salary (#2) AS String NULL) AS (#3)] - └── Scan - ├── table: default.empsalary (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - -=== within_group_group_aggregate_binds === -description: A non-window WITHIN GROUP aggregate should register its sort descriptors in the aggregate phase. -sql: SELECT listagg(cast(salary as varchar), '|') WITHIN GROUP (ORDER BY empno DESC) FROM empsalary -status: ok -EvalScalar -├── scalars: [listagg(CAST(salary AS STRING), '|') WITHIN GROUP ( ORDER BY empno DESC ) (#3) AS (#3)] -└── Aggregate(Initial) - ├── group items: [] - ├── aggregate functions: [listagg('|')(listagg_arg_0 (#2)) AS (#3)] - └── EvalScalar - ├── scalars: [empsalary.empno (#0) AS (#0), CAST(empsalary.salary (#1) AS String NULL) AS (#2)] - └── Scan - ├── table: default.empsalary (#0) - ├── filters: [] - ├── order by: [] - └── limit: NONE - - diff --git a/src/query/sql/tests/it/semantic/binder_clauses.txt b/src/query/sql/tests/it/semantic/binder_clauses.txt new file mode 100644 index 0000000000000..59ed6bb617e55 --- /dev/null +++ b/src/query/sql/tests/it/semantic/binder_clauses.txt @@ -0,0 +1,259 @@ +=== simple_aggregate_query_binds === +description: A plain aggregate query should bind successfully. +sql: SELECT avg(number) FROM t +status: ok +EvalScalar +├── scalars: [divide(sum(number) (#1), if(eq(count(number) (#2), 0), 1, count(number) (#2))) AS (#3)] +└── Aggregate(Initial) + ├── group items: [] + ├── aggregate functions: [sum(t.number (#0)) AS (#1), count(t.number (#0)) AS (#2)] + └── EvalScalar + ├── scalars: [t.number (#0) AS (#0)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== where_rejects_aggregate_alias === +description: An aggregate alias must still be rejected in WHERE. +sql: SELECT sum(number) AS s FROM t WHERE s > 0 +status: error +code: 1065 +message: Where clause can't contain aggregate or window functions + +=== where_accepts_scalar_alias === +description: A scalar alias should remain usable in WHERE. +sql: SELECT number + 1 AS s FROM t WHERE s > 1 +status: ok +EvalScalar +├── scalars: [plus(t.number (#0), 1) AS (#1)] +└── Filter + ├── filters: [gt(plus(t.number (#0), 1), 1)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== where_alias_to_srf_uses_project_set_binding === +description: A WHERE clause that references an SRF alias should keep the alias bound to the ProjectSet column instead of expanding back to the raw SRF. +sql: SELECT unnest([1, 2, 3]) AS u WHERE u = 1 +status: ok +EvalScalar +├── scalars: [get(unnest([1, 2, 3]) (#0)) AS (#1)] +└── Filter + ├── filters: [eq(get(unnest([1, 2, 3]) (#0)), 1)] + └── ProjectSet(ProjectSet { srfs: [ScalarItem { scalar: FunctionCall(FunctionCall { span: Some(7..24), func_name: "unnest", params: [], arguments: [ConstantExpr(ConstantExpr { span: Some(14..23), value: Array(UInt8([1, 2, 3])) })] }), index: 0 }] }) + └── DummyTableScan(DummyTableScan { source_table_indexes: [] }) + + +=== where_rejects_udaf === +description: A UDAF in WHERE must be rejected like any other aggregate. +sql: SELECT a FROM t WHERE weighted_avg(a, b) > 0 +status: error +code: 1065 +message: Where clause can't contain aggregate or window functions + +=== qualify_rejects_aggregate_alias === +description: An aggregate alias must still be rejected in QUALIFY. +sql: SELECT sum(number) AS s FROM t QUALIFY s > 0 +status: error +code: 1065 +message: Qualify clause must not contain aggregate functions + +=== qualify_rejects_direct_aggregate === +description: A raw aggregate expression must be rejected directly in QUALIFY. +sql: SELECT number FROM t QUALIFY sum(number) > 0 +status: error +code: 1065 +message: Qualify clause must not contain aggregate functions + +=== qualify_rejects_udaf_alias === +description: A UDAF alias must still be rejected in QUALIFY. +sql: SELECT weighted_avg(a, b) AS s FROM t QUALIFY s > 0 +status: error +code: 1065 +message: Qualify clause must not contain aggregate functions + +=== qualify_accepts_window_alias === +description: A window alias should remain usable in QUALIFY. +sql: SELECT number, row_number() OVER (ORDER BY number) AS rn FROM t QUALIFY rn = 1 +status: ok +EvalScalar +├── scalars: [t.number (#0) AS (#0), row_number() OVER (ORDER BY number) (#1) AS (#1)] +└── Filter + ├── filters: [eq(row_number() OVER (ORDER BY number) (#1), 1)] + └── Window + ├── aggregate function: row_number + ├── partition items: [] + ├── order by items: [t.number (#0) AS (#0)] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [t.number (#0) ASC NULLS LAST] + ├── limit: [NONE] + └── EvalScalar + ├── scalars: [t.number (#0) AS (#0)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== qualify_alias_to_srf_uses_project_set_binding === +description: A QUALIFY clause that references an SRF alias should keep the alias bound to the ProjectSet column instead of expanding back to the raw SRF. +sql: SELECT unnest([1, 2, 3]) AS u QUALIFY u = 1 +status: ok +EvalScalar +├── scalars: [get(unnest([1, 2, 3]) (#0)) AS (#1)] +└── Filter + ├── filters: [eq(get(unnest([1, 2, 3]) (#0)), 1)] + └── ProjectSet(ProjectSet { srfs: [ScalarItem { scalar: FunctionCall(FunctionCall { span: Some(7..24), func_name: "unnest", params: [], arguments: [ConstantExpr(ConstantExpr { span: Some(14..23), value: Array(UInt8([1, 2, 3])) })] }), index: 0 }] }) + └── DummyTableScan(DummyTableScan { source_table_indexes: [] }) + + +=== having_accepts_aggregate_alias === +description: An aggregate alias should remain usable in HAVING. +sql: SELECT sum(number) AS s FROM t HAVING s > 0 +status: ok +EvalScalar +├── scalars: [sum(number) (#1) AS (#1)] +└── Filter + ├── filters: [gt(sum(number) (#1), 0)] + └── Aggregate(Initial) + ├── group items: [] + ├── aggregate functions: [sum(t.number (#0)) AS (#1)] + └── EvalScalar + ├── scalars: [t.number (#0) AS (#0)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== having_aggregate_does_not_make_scalar_projection_valid === +description: Introducing an aggregate in HAVING must not make a non-aggregated SELECT list valid. +sql: SELECT number FROM t HAVING count(*) > 0 +status: error +code: 1065 +message: column "number" must appear in the GROUP BY clause or be used in an aggregate function + +=== order_by_can_introduce_aggregate_in_aggregate_query === +description: ORDER BY may introduce a new aggregate expression when the query is already aggregated. +sql: SELECT count(*) FROM t ORDER BY sum(number) +status: ok +Sort +├── sort keys: [sum(number) (#2) ASC NULLS LAST] +├── limit: [NONE] +└── EvalScalar + ├── scalars: [COUNT(*) (#1) AS (#1), sum(number) (#2) AS (#2)] + └── Aggregate(Initial) + ├── group items: [] + ├── aggregate functions: [count() AS (#1), sum(t.number (#0)) AS (#2)] + └── EvalScalar + ├── scalars: [t.number (#0) AS (#0)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== order_by_aggregate_does_not_make_scalar_projection_valid === +description: Introducing an aggregate in ORDER BY must not make a non-aggregated SELECT list valid. +sql: SELECT number FROM t ORDER BY sum(number) +status: error +code: 1065 +message: column "number" must appear in the GROUP BY clause or be used in an aggregate function + +=== order_by_count_does_not_make_scalar_projection_valid === +description: The sqllogictest ORDER BY count(*) pattern must still reject a scalar projection. +sql: SELECT number FROM t ORDER BY count(*) + 1 +status: error +code: 1065 +message: column "number" must appear in the GROUP BY clause or be used in an aggregate function + +=== order_by_expression_reuses_scalar_alias_semantics === +description: ORDER BY expressions should still inline scalar aliases from the select semantic view when they are used inside a larger expression. +sql: SELECT number + 1 AS s FROM t ORDER BY s + 1 +status: ok +Sort +├── sort keys: [s + 1 (#2) ASC NULLS LAST] +├── limit: [NONE] +└── EvalScalar + ├── scalars: [plus(t.number (#0), 1) AS (#1), plus(plus(t.number (#0), 1), 1) AS (#2)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== order_by_duplicate_aggregate_alias_is_ambiguous === +description: ORDER BY should keep duplicate aggregate aliases ambiguous instead of pre-expanding one candidate. +sql: SELECT sum(number) AS s, max(number) AS s FROM t ORDER BY s +status: error +code: 1065 +message: column s reference or alias is ambiguous, please use another alias name + +=== order_by_expression_reuses_aggregate_alias_semantics === +description: ORDER BY expressions should keep aggregate aliases on the original semantic view instead of depending on rewritten select-item state. +sql: SELECT sum(number) AS s FROM t ORDER BY s + 1 +status: ok +Sort +├── sort keys: [s + 1 (#2) ASC NULLS LAST] +├── limit: [NONE] +└── EvalScalar + ├── scalars: [sum(number) (#1) AS (#1), plus(sum(number) (#1), 1) AS (#2)] + └── Aggregate(Initial) + ├── group items: [] + ├── aggregate functions: [sum(t.number (#0)) AS (#1)] + └── EvalScalar + ├── scalars: [t.number (#0) AS (#0)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== aggregate_argument_prefers_base_column_over_select_alias === +description: Inside an aggregate function, a same-name select alias should not shadow the base column. +sql: SELECT a AS c2, sum(c2) FROM t GROUP BY a +status: ok +EvalScalar +├── scalars: [t.a (#0) AS (#0), sum(c2) (#2) AS (#2)] +└── Aggregate(Initial) + ├── group items: [t.a (#0) AS (#0)] + ├── aggregate functions: [sum(t.c2 (#1)) AS (#2)] + └── EvalScalar + ├── scalars: [t.a (#0) AS (#0), t.c2 (#1) AS (#1)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== aggregate_argument_can_fallback_to_select_alias_in_select_clause === +description: Inside the SELECT list, an aggregate argument should still fall back to a same-select alias when no base column exists. +sql: SELECT number % 3 AS c1, sum(c1) FROM t GROUP BY number % 3 +status: ok +EvalScalar +├── scalars: [sum(c1) (#2) AS (#2), number % 3 (#1) AS (#3)] +└── Aggregate(Initial) + ├── group items: [modulo(t.number (#0), 3) AS (#1)] + ├── aggregate functions: [sum(number % 3 (#1)) AS (#2)] + └── EvalScalar + ├── scalars: [modulo(t.number (#0), 3) AS (#1)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + diff --git a/src/query/sql/tests/it/semantic/binder_grouping.txt b/src/query/sql/tests/it/semantic/binder_grouping.txt new file mode 100644 index 0000000000000..aeeeb17199759 --- /dev/null +++ b/src/query/sql/tests/it/semantic/binder_grouping.txt @@ -0,0 +1,244 @@ +=== unnest_over_aggregate_is_planned_after_aggregate === +description: A set-returning function over an aggregate should stay above the aggregate phase instead of rewriting the aggregate away early. +sql: SELECT unnest(max([11, 12])) +status: ok +EvalScalar +├── scalars: [get(unnest(max([11, 12])) (#2)) AS (#3)] +└── ProjectSet(ProjectSet { srfs: [ScalarItem { scalar: FunctionCall(FunctionCall { span: Some(7..28), func_name: "unnest", params: [], arguments: [BoundColumnRef(BoundColumnRef { span: Some(14..27), column: ColumnBinding { database_name: None, table_name: None, column_position: None, table_index: None, column_name: "max([11, 12])", column_name_lower: None, index: 1, data_type: Nullable(Array(Number(UInt8))), visibility: InVisible, virtual_expr: None, is_srf: false } })] }), index: 2 }] }) + └── Aggregate(Initial) + ├── group items: [] + ├── aggregate functions: [max(max_arg_0 (#0)) AS (#1)] + └── EvalScalar + ├── scalars: [[11, 12] AS (#0)] + └── DummyTableScan(DummyTableScan { source_table_indexes: [] }) + + +=== duplicate_srf_expression_reuses_project_set_binding === +description: Repeated identical SRF expressions should reuse the registered ProjectSet binding. +sql: SELECT unnest([1, 2, 3]), unnest([1, 2, 3]) +status: ok +EvalScalar +├── scalars: [get(unnest([1, 2, 3]) (#0)) AS (#1), get(unnest([1, 2, 3]) (#0)) AS (#2)] +└── ProjectSet(ProjectSet { srfs: [ScalarItem { scalar: FunctionCall(FunctionCall { span: Some(7..24), func_name: "unnest", params: [], arguments: [ConstantExpr(ConstantExpr { span: Some(14..23), value: Array(UInt8([1, 2, 3])) })] }), index: 0 }] }) + └── DummyTableScan(DummyTableScan { source_table_indexes: [] }) + + +=== group_by_srf_alias_from_sqllogictest_binds === +description: A sqllogictest GROUP BY pattern that groups by an SRF select alias should bind successfully. +sql: SELECT t.col1 AS col1, unnest(split(t.col2, ',')) AS col3 FROM t_str AS t GROUP BY col1, col3 ORDER BY col3 +status: ok +Sort +├── sort keys: [col3 (#4) ASC NULLS LAST] +├── limit: [NONE] +└── EvalScalar + ├── scalars: [t_str.col1 (#0) AS (#0), col3 (#3) AS (#4)] + └── Aggregate(Initial) + ├── group items: [t_str.col1 (#0) AS (#0), get(unnest(split(t.col2 (#1), ',')) (#2)) AS (#3)] + ├── aggregate functions: [] + └── EvalScalar + ├── scalars: [t_str.col1 (#0) AS (#0), get(unnest(split(t.col2 (#1), ',')) (#2)) AS (#3)] + └── ProjectSet(ProjectSet { srfs: [ScalarItem { scalar: FunctionCall(FunctionCall { span: Some(23..49), func_name: "unnest", params: [], arguments: [FunctionCall(FunctionCall { span: Some(30..48), func_name: "split", params: [], arguments: [BoundColumnRef(BoundColumnRef { span: Some(36..37), column: ColumnBinding { database_name: None, table_name: Some("t"), column_position: Some(2), table_index: Some(0), column_name: "col2", column_name_lower: None, index: 1, data_type: Nullable(String), visibility: Visible, virtual_expr: None, is_srf: false } }), ConstantExpr(ConstantExpr { span: Some(44..47), value: String(",") })] })] }), index: 2 }] }) + └── Scan + ├── table: default.t_str (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== group_by_all_collects_non_aggregate_select_items === +description: GROUP BY ALL should expand to the non-aggregate SELECT items only. +sql: SELECT number % 2 AS a, sum(number) FROM t GROUP BY ALL +status: ok +EvalScalar +├── scalars: [sum(number) (#2) AS (#2), a (#1) AS (#3)] +└── Aggregate(Initial) + ├── group items: [modulo(t.number (#0), 2) AS (#1)] + ├── aggregate functions: [sum(t.number (#0)) AS (#2)] + └── EvalScalar + ├── scalars: [t.number (#0) AS (#0), modulo(t.number (#0), 2) AS (#1)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== grouped_select_udaf_binds === +description: A grouped SELECT should rewrite UDAF output through the aggregate path like builtin aggregates. +sql: SELECT a % 2 AS g, weighted_avg(a, b) FROM t GROUP BY g +status: ok +EvalScalar +├── scalars: [weighted_avg(a, b) (#5) AS (#5), g (#2) AS (#6)] +└── Aggregate(Initial) + ├── group items: [modulo(t.a (#0), 2) AS (#2)] + ├── aggregate functions: [weighted_avg(a, b) AS (#5)] + └── EvalScalar + ├── scalars: [modulo(t.a (#0), 2) AS (#2), CAST(t.a (#0) AS Int32 NULL) AS (#3), CAST(t.b (#1) AS Int32 NULL) AS (#4)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== group_by_all_collects_non_udaf_select_items === +description: GROUP BY ALL should also skip UDAF select items when inferring grouping keys. +sql: SELECT a % 2 AS g, weighted_avg(a, b) FROM t GROUP BY ALL +status: ok +EvalScalar +├── scalars: [weighted_avg(a, b) (#5) AS (#5), g (#2) AS (#6)] +└── Aggregate(Initial) + ├── group items: [modulo(t.a (#0), 2) AS (#2)] + ├── aggregate functions: [weighted_avg(a, b) AS (#5)] + └── EvalScalar + ├── scalars: [modulo(t.a (#0), 2) AS (#2), CAST(t.a (#0) AS Int32 NULL) AS (#3), CAST(t.b (#1) AS Int32 NULL) AS (#4)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== group_by_rejects_udaf_item === +description: Explicit GROUP BY items must reject UDAF calls just like builtin aggregates. +sql: SELECT weighted_avg(a, b) FROM t GROUP BY weighted_avg(a, b) +status: error +code: 1065 +message: GROUP BY items can't contain aggregate functions or window functions: weighted_avg(a, b) + +=== combined_grouping_sets_binds === +description: A normal GROUP BY item combined with GROUPING SETS should bind through the combined-group expansion path. +sql: SELECT quantity, brand, segment, sum(quantity) FROM sales GROUP BY brand, GROUPING SETS(segment, quantity) +status: ok +EvalScalar +├── scalars: [sales.brand (#0) AS (#0), sales.segment (#1) AS (#1), sales.quantity (#2) AS (#2), sum(quantity) (#7) AS (#7)] +└── Aggregate(Initial) + ├── group items: [sales.brand (#0) AS (#0), sales.segment (#1) AS (#1), sales.quantity (#2) AS (#2), _grouping_id (#6) AS (#6)] + ├── aggregate functions: [sum(sales.quantity (#2)) AS (#7)] + └── EvalScalar + ├── scalars: [sales.brand (#0) AS (#0), sales.segment (#1) AS (#1), sales.quantity (#2) AS (#2)] + └── Scan + ├── table: default.sales (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== grouping_outside_grouping_sets_is_rejected === +description: grouping() should still be rejected when the query is not using grouping sets semantics. +sql: SELECT a, grouping(a) FROM g GROUP BY a +status: error +code: 1065 +message: grouping can only be called in GROUP BY GROUPING SETS clauses + +=== grouping_rejects_non_group_item_argument === +description: grouping() arguments must still be actual GROUP BY expressions inside grouping sets. +sql: SELECT a, grouping(c) FROM g GROUP BY GROUPING SETS ((a), ()) +status: error +code: 1006 +message: Arguments of grouping should be group by expressions + +=== cube_grouping_function_binds === +description: CUBE should expand into grouping sets and allow grouping(...) to bind against the generated grouping id. +sql: SELECT a, b, sum(c) AS sc, grouping(a, b) FROM g GROUP BY CUBE(a, b) +status: ok +EvalScalar +├── scalars: [g.a (#0) AS (#0), g.b (#1) AS (#1), sum(c) (#6) AS (#6), grouping(_grouping_id (#5)) AS (#7)] +└── Aggregate(Initial) + ├── group items: [g.a (#0) AS (#0), g.b (#1) AS (#1), _grouping_id (#5) AS (#5)] + ├── aggregate functions: [sum(g.c (#2)) AS (#6)] + └── EvalScalar + ├── scalars: [g.a (#0) AS (#0), g.b (#1) AS (#1), g.c (#2) AS (#2)] + └── Scan + ├── table: default.g (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== aggregate_over_srf_from_sqllogictest_binds === +description: A sqllogictest aggregate-over-SRF pattern should still bind with ProjectSet below Aggregate. +sql: SELECT max(unnest(split(t.col2, ','))) FROM t_str AS t +status: ok +EvalScalar +├── scalars: [max(unnest(split(t.col2, ','))) (#3) AS (#3)] +└── Aggregate(Initial) + ├── group items: [] + ├── aggregate functions: [max(max_arg_0 (#2)) AS (#3)] + └── EvalScalar + ├── scalars: [get(unnest(split(t.col2 (#0), ',')) (#1)) AS (#2)] + └── ProjectSet(ProjectSet { srfs: [ScalarItem { scalar: FunctionCall(FunctionCall { span: Some(11..37), func_name: "unnest", params: [], arguments: [FunctionCall(FunctionCall { span: Some(18..36), func_name: "split", params: [], arguments: [BoundColumnRef(BoundColumnRef { span: Some(24..25), column: ColumnBinding { database_name: None, table_name: Some("t"), column_position: Some(1), table_index: Some(0), column_name: "col2", column_name_lower: None, index: 0, data_type: Nullable(String), visibility: Visible, virtual_expr: None, is_srf: false } }), ConstantExpr(ConstantExpr { span: Some(32..35), value: String(",") })] })] }), index: 1 }] }) + └── Scan + ├── table: default.t_str (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== unnest_over_wrapped_aggregate_from_sqllogictest_binds === +description: A sqllogictest SRF-over-aggregate pattern with an extra scalar wrapper should still bind with ProjectSet above Aggregate. +sql: SELECT unnest(split(max(t.col2), ',')) FROM t_str AS t +status: ok +EvalScalar +├── scalars: [get(unnest(split(max(t.col2), ',')) (#2)) AS (#3)] +└── ProjectSet(ProjectSet { srfs: [ScalarItem { scalar: FunctionCall(FunctionCall { span: Some(7..38), func_name: "unnest", params: [], arguments: [FunctionCall(FunctionCall { span: Some(14..37), func_name: "split", params: [], arguments: [BoundColumnRef(BoundColumnRef { span: Some(20..31), column: ColumnBinding { database_name: None, table_name: None, column_position: None, table_index: None, column_name: "max(t.col2)", column_name_lower: None, index: 1, data_type: Nullable(String), visibility: InVisible, virtual_expr: None, is_srf: false } }), ConstantExpr(ConstantExpr { span: Some(33..36), value: String(",") })] })] }), index: 2 }] }) + └── Aggregate(Initial) + ├── group items: [] + ├── aggregate functions: [max(t_str.col2 (#0)) AS (#1)] + └── EvalScalar + ├── scalars: [t_str.col2 (#0) AS (#0)] + └── Scan + ├── table: default.t_str (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== grouping_function_inside_window_over_rollup_binds === +description: A sqllogictest grouping() pattern should still rewrite correctly when used inside a window over rollup output. +sql: SELECT grouping(salary), grouping(depname), sum(grouping(salary)) OVER (PARTITION BY grouping(salary) + grouping(depname) ORDER BY grouping(depname) DESC) FROM empsalary GROUP BY ROLLUP(depname, salary) +status: ok +EvalScalar +├── scalars: [sum(grouping(salary)) OVER (PARTITION BY grouping(salary) + grouping(depname) ORDER BY grouping(depname) DESC) (#8) AS (#8), grouping(_grouping_id (#4)) AS (#9), grouping(_grouping_id (#4)) AS (#10)] +└── Window + ├── aggregate function: sum + ├── partition items: [plus(grouping(_grouping_id (#4)), grouping(_grouping_id (#4))) AS (#6)] + ├── order by items: [grouping(_grouping_id (#4)) AS (#7) DESC] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [sum_part_0 (#6) ASC NULLS LAST, sum_order_0 (#7) DESC NULLS LAST] + ├── limit: [NONE] + ├── window top: NONE + ├── window function: Aggregate(AggregateFunction { span: Some(44..154), func_name: "sum", distinct: false, params: [], args: [BoundColumnRef(BoundColumnRef { span: Some(48..64), column: ColumnBinding { database_name: None, table_name: None, column_position: None, table_index: None, column_name: "sum_arg_0", column_name_lower: None, index: 5, data_type: Number(UInt32), visibility: Visible, virtual_expr: None, is_srf: false } })], return_type: Nullable(Number(UInt64)), sort_descs: [], display_name: "sum(grouping(salary)) OVER (PARTITION BY grouping(salary) + grouping(depname) ORDER BY grouping(depname) DESC)" }) + └── EvalScalar + ├── scalars: [grouping(_grouping_id (#4)) AS (#5), plus(grouping(_grouping_id (#4)), grouping(_grouping_id (#4))) AS (#6), grouping(_grouping_id (#4)) AS (#7)] + └── Aggregate(Initial) + ├── group items: [empsalary.depname (#0) AS (#0), empsalary.salary (#1) AS (#1), _grouping_id (#4) AS (#4)] + ├── aggregate functions: [] + └── EvalScalar + ├── scalars: [empsalary.depname (#0) AS (#0), empsalary.salary (#1) AS (#1)] + └── Scan + ├── table: default.empsalary (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== within_group_group_aggregate_binds === +description: A non-window WITHIN GROUP aggregate should register its sort descriptors in the aggregate phase. +sql: SELECT listagg(cast(salary as varchar), '|') WITHIN GROUP (ORDER BY empno DESC) FROM empsalary +status: ok +EvalScalar +├── scalars: [listagg(CAST(salary AS STRING), '|') WITHIN GROUP ( ORDER BY empno DESC ) (#3) AS (#3)] +└── Aggregate(Initial) + ├── group items: [] + ├── aggregate functions: [listagg('|')(listagg_arg_0 (#2)) AS (#3)] + └── EvalScalar + ├── scalars: [empsalary.empno (#0) AS (#0), CAST(empsalary.salary (#1) AS String NULL) AS (#2)] + └── Scan + ├── table: default.empsalary (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + diff --git a/src/query/sql/tests/it/semantic/binder_qualify.txt b/src/query/sql/tests/it/semantic/binder_qualify.txt new file mode 100644 index 0000000000000..05a41d158c7d2 --- /dev/null +++ b/src/query/sql/tests/it/semantic/binder_qualify.txt @@ -0,0 +1,113 @@ +=== qualify_cte_then_outer_aggregate_from_sqllogictest_binds === +description: A sqllogictest pattern that filters with QUALIFY inside a CTE before an outer aggregate should still bind. +sql: WITH test AS (SELECT number % 10 AS id, number AS full_matched FROM t QUALIFY row_number() OVER (PARTITION BY id ORDER BY number DESC) = 1) SELECT full_matched, count() FROM test GROUP BY full_matched HAVING full_matched = 3 +status: ok +EvalScalar +├── scalars: [t.number (#0) AS (#0), count() (#4) AS (#4)] +└── Filter + ├── filters: [eq(t.number (#0), 3)] + └── Aggregate(Initial) + ├── group items: [t.number (#0) AS (#0)] + ├── aggregate functions: [count() AS (#4)] + └── EvalScalar + ├── scalars: [t.number (#0) AS (#0)] + └── EvalScalar + ├── scalars: [t.number (#0) AS (#0), modulo(t.number (#0), 10) AS (#1)] + └── Filter + ├── filters: [eq(row_number() OVER (PARTITION BY id ORDER BY number DESC) (#3), 1)] + └── Window + ├── aggregate function: row_number + ├── partition items: [modulo(t.number (#0), 10) AS (#2)] + ├── order by items: [t.number (#0) AS (#0) DESC] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [row_number_part_0 (#2) ASC NULLS LAST, t.number (#0) DESC NULLS LAST] + ├── limit: [NONE] + ├── window top: NONE + ├── window function: RowNumber + └── EvalScalar + ├── scalars: [t.number (#0) AS (#0), modulo(t.number (#0), 10) AS (#2)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== qualify_named_window_with_subquery_binds === +description: QUALIFY should preserve subqueries while checking named window aliases. +sql: SELECT i, p, o, row_number() OVER w AS rn FROM qt WINDOW w AS (PARTITION BY p ORDER BY o) QUALIFY rn = (SELECT i FROM qt LIMIT 1) +status: ok +EvalScalar +├── scalars: [qt.i (#0) AS (#0), qt.p (#1) AS (#1), qt.o (#2) AS (#2), row_number() OVER w (#3) AS (#3)] +└── Filter + ├── filters: [eq(row_number() OVER w (#3), SUBQUERY AS (#4))] + ├── subquerys + │ └── Subquery (Scalar) + │ ├── output_column: qt.i (#4) + │ └── Limit + │ ├── limit: [1] + │ ├── offset: [0] + │ └── EvalScalar + │ ├── scalars: [qt.i (#4) AS (#4)] + │ └── Scan + │ ├── table: default.qt (#1) + │ ├── filters: [] + │ ├── order by: [] + │ └── limit: NONE + └── Window + ├── aggregate function: row_number + ├── partition items: [qt.p (#1) AS (#1)] + ├── order by items: [qt.o (#2) AS (#2)] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [qt.p (#1) ASC NULLS LAST, qt.o (#2) ASC NULLS LAST] + ├── limit: [NONE] + ├── window top: NONE + ├── window function: RowNumber + └── EvalScalar + ├── scalars: [qt.p (#1) AS (#1), qt.o (#2) AS (#2)] + └── Scan + ├── table: default.qt (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== qualify_grouping_context_uses_grouping_checker === +description: QUALIFY in a grouped query should still accept grouped aliases while binding the window phase. +sql: SELECT number % 2 AS a, row_number() OVER (ORDER BY a) AS rn FROM t GROUP BY a QUALIFY a = 1 +status: ok +EvalScalar +├── scalars: [row_number() OVER (ORDER BY a) (#3) AS (#3), a (#1) AS (#4)] +└── Filter + ├── filters: [eq(a (#1), 1)] + └── Window + ├── aggregate function: row_number + ├── partition items: [] + ├── order by items: [a (#1) AS (#2)] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [row_number_order_0 (#2) ASC NULLS LAST] + ├── limit: [NONE] + └── EvalScalar + ├── scalars: [a (#1) AS (#2)] + └── Aggregate(Initial) + ├── group items: [modulo(t.number (#0), 2) AS (#1)] + ├── aggregate functions: [] + └── EvalScalar + ├── scalars: [modulo(t.number (#0), 2) AS (#1)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== qualify_grouping_context_rejects_aggregate_alias === +description: QUALIFY in a grouped query must still reject aggregate aliases while using grouping-aware binding. +sql: SELECT number % 2 AS a, sum(number) AS s, row_number() OVER (ORDER BY a) AS rn FROM t GROUP BY a QUALIFY s > 0 +status: error +code: 1065 +message: Qualify clause must not contain aggregate functions + diff --git a/src/query/sql/tests/it/semantic/binder_window_core.txt b/src/query/sql/tests/it/semantic/binder_window_core.txt new file mode 100644 index 0000000000000..8307091d8301d --- /dev/null +++ b/src/query/sql/tests/it/semantic/binder_window_core.txt @@ -0,0 +1,444 @@ +=== window_aggregate_does_not_become_group_aggregate === +description: An aggregate used as a window function should stay in the window phase rather than becoming a group aggregate. +sql: SELECT sum(number) OVER () FROM t +status: ok +EvalScalar +├── scalars: [sum(number) OVER () (#1) AS (#1)] +└── Window + ├── aggregate function: sum + ├── partition items: [] + ├── order by items: [] + ├── frame: [Range: Preceding(None) ~ Following(None)] + └── EvalScalar + ├── scalars: [t.number (#0) AS (#0)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== window_partition_rejects_new_aggregate === +description: A window PARTITION BY clause must not introduce a new aggregate expression. +sql: SELECT row_number() OVER (PARTITION BY sum(number)) FROM t +status: error +code: 1065 +message: Window specification and arguments cannot contain aggregate functions + +=== window_order_rejects_new_aggregate === +description: A window ORDER BY clause must not introduce a new aggregate expression. +sql: SELECT row_number() OVER (ORDER BY sum(number)) FROM t +status: error +code: 1065 +message: Window specification and arguments cannot contain aggregate functions + +=== order_by_window_alias_does_not_seed_window_aggregate === +description: ORDER BY on a window alias must not pre-register aggregates that only appear inside that alias's window specification. +sql: SELECT row_number() OVER (ORDER BY sum(number)) AS rn FROM t ORDER BY rn +status: error +code: 1065 +message: Window specification and arguments cannot contain aggregate functions + +=== order_by_expression_reuses_window_alias_semantics === +description: ORDER BY expressions should keep window aliases on the original semantic view instead of depending on rewritten select-item state. +sql: SELECT number, row_number() OVER (ORDER BY number) AS rn FROM t ORDER BY rn + 1 +status: ok +Sort +├── sort keys: [rn + 1 (#2) ASC NULLS LAST] +├── limit: [NONE] +└── EvalScalar + ├── scalars: [t.number (#0) AS (#0), row_number() OVER (ORDER BY number) (#1) AS (#1), plus(row_number() OVER (ORDER BY number) (#1), 1) AS (#2)] + └── Window + ├── aggregate function: row_number + ├── partition items: [] + ├── order by items: [t.number (#0) AS (#0)] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [t.number (#0) ASC NULLS LAST] + ├── limit: [NONE] + └── EvalScalar + ├── scalars: [t.number (#0) AS (#0)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== window_order_reuses_having_aggregate === +description: A window ORDER BY clause should be able to reuse an aggregate introduced later by HAVING. +sql: SELECT row_number() OVER (ORDER BY sum(number)) FROM t HAVING sum(number) > 0 +status: ok +EvalScalar +├── scalars: [row_number() OVER (ORDER BY sum(number)) (#2) AS (#2)] +└── Window + ├── aggregate function: row_number + ├── partition items: [] + ├── order by items: [sum(number) (#1) AS (#1)] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [sum(number) (#1) ASC NULLS LAST] + ├── limit: [NONE] + └── EvalScalar + ├── scalars: [sum(number) (#1) AS (#1)] + └── Filter + ├── filters: [gt(sum(number) (#1), 0)] + └── Aggregate(Initial) + ├── group items: [] + ├── aggregate functions: [sum(t.number (#0)) AS (#1)] + └── EvalScalar + ├── scalars: [t.number (#0) AS (#0)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== window_order_reuses_having_aggregate_alias === +description: A window ORDER BY clause should be able to reuse an aggregate reached through a HAVING alias reference. +sql: SELECT sum(number) AS s, row_number() OVER (ORDER BY s) FROM t HAVING s > 0 +status: ok +EvalScalar +├── scalars: [sum(number) (#1) AS (#1), row_number() OVER (ORDER BY s) (#2) AS (#2)] +└── Window + ├── aggregate function: row_number + ├── partition items: [] + ├── order by items: [sum(number) (#1) AS (#1)] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [sum(number) (#1) ASC NULLS LAST] + ├── limit: [NONE] + └── EvalScalar + ├── scalars: [sum(number) (#1) AS (#1)] + └── Filter + ├── filters: [gt(sum(number) (#1), 0)] + └── Aggregate(Initial) + ├── group items: [] + ├── aggregate functions: [sum(t.number (#0)) AS (#1)] + └── EvalScalar + ├── scalars: [t.number (#0) AS (#0)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== window_order_reuses_having_udaf === +description: A window ORDER BY clause should be able to reuse a UDAF introduced later by HAVING. +sql: SELECT row_number() OVER (ORDER BY weighted_avg(a, b)) FROM t HAVING weighted_avg(a, b) > 0 +status: ok +EvalScalar +├── scalars: [row_number() OVER (ORDER BY weighted_avg(a, b)) (#5) AS (#5)] +└── Window + ├── aggregate function: row_number + ├── partition items: [] + ├── order by items: [weighted_avg(a, b) (#4) AS (#4)] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [weighted_avg(a, b) (#4) ASC NULLS LAST] + ├── limit: [NONE] + └── EvalScalar + ├── scalars: [weighted_avg(a, b) (#4) AS (#4)] + └── Filter + ├── filters: [gt(weighted_avg(a, b) (#4), 0)] + └── Aggregate(Initial) + ├── group items: [] + ├── aggregate functions: [weighted_avg(a, b) AS (#4)] + └── EvalScalar + ├── scalars: [CAST(t.a (#0) AS Int32 NULL) AS (#2), CAST(t.b (#1) AS Int32 NULL) AS (#3)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== window_order_reuses_having_udaf_alias === +description: A window ORDER BY clause should be able to reuse a UDAF reached through a HAVING alias reference. +sql: SELECT weighted_avg(a, b) AS s, row_number() OVER (ORDER BY s) FROM t HAVING s > 0 +status: ok +EvalScalar +├── scalars: [weighted_avg(a, b) (#4) AS (#4), row_number() OVER (ORDER BY s) (#5) AS (#5)] +└── Window + ├── aggregate function: row_number + ├── partition items: [] + ├── order by items: [weighted_avg(a, b) (#4) AS (#4)] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [weighted_avg(a, b) (#4) ASC NULLS LAST] + ├── limit: [NONE] + └── EvalScalar + ├── scalars: [weighted_avg(a, b) (#4) AS (#4)] + └── Filter + ├── filters: [gt(weighted_avg(a, b) (#4), 0)] + └── Aggregate(Initial) + ├── group items: [] + ├── aggregate functions: [weighted_avg(a, b) AS (#4)] + └── EvalScalar + ├── scalars: [CAST(t.a (#0) AS Int32 NULL) AS (#2), CAST(t.b (#1) AS Int32 NULL) AS (#3)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== window_order_reuses_order_by_aggregate === +description: A window ORDER BY clause should be able to reuse an aggregate introduced later by ORDER BY. +sql: SELECT row_number() OVER (ORDER BY sum(number)) FROM t ORDER BY sum(number) +status: ok +Sort +├── sort keys: [sum(number) (#1) ASC NULLS LAST] +├── limit: [NONE] +└── EvalScalar + ├── scalars: [sum(number) (#1) AS (#1), row_number() OVER (ORDER BY sum(number)) (#2) AS (#2)] + └── Window + ├── aggregate function: row_number + ├── partition items: [] + ├── order by items: [sum(number) (#1) AS (#1)] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [sum(number) (#1) ASC NULLS LAST] + ├── limit: [NONE] + └── EvalScalar + ├── scalars: [sum(number) (#1) AS (#1)] + └── Aggregate(Initial) + ├── group items: [] + ├── aggregate functions: [sum(t.number (#0)) AS (#1)] + └── EvalScalar + ├── scalars: [t.number (#0) AS (#0)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== window_order_reuses_order_by_udaf === +description: A window ORDER BY clause should be able to reuse a UDAF introduced later by ORDER BY. +sql: SELECT row_number() OVER (ORDER BY weighted_avg(a, b)) FROM t ORDER BY weighted_avg(a, b) +status: ok +Sort +├── sort keys: [weighted_avg(a, b) (#4) ASC NULLS LAST] +├── limit: [NONE] +└── EvalScalar + ├── scalars: [weighted_avg(a, b) (#4) AS (#4), row_number() OVER (ORDER BY weighted_avg(a, b)) (#5) AS (#5)] + └── Window + ├── aggregate function: row_number + ├── partition items: [] + ├── order by items: [weighted_avg(a, b) (#4) AS (#4)] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [weighted_avg(a, b) (#4) ASC NULLS LAST] + ├── limit: [NONE] + └── EvalScalar + ├── scalars: [weighted_avg(a, b) (#4) AS (#4)] + └── Aggregate(Initial) + ├── group items: [] + ├── aggregate functions: [weighted_avg(a, b) AS (#4)] + └── EvalScalar + ├── scalars: [CAST(t.a (#0) AS Int32 NULL) AS (#2), CAST(t.b (#1) AS Int32 NULL) AS (#3)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== window_order_reuses_order_by_aggregate_alias === +description: A window ORDER BY clause should be able to reuse an aggregate reached through an ORDER BY alias reference. +sql: SELECT sum(number) AS s, row_number() OVER (ORDER BY s) FROM t ORDER BY s +status: ok +Sort +├── sort keys: [sum(number) (#1) ASC NULLS LAST] +├── limit: [NONE] +└── EvalScalar + ├── scalars: [sum(number) (#1) AS (#1), row_number() OVER (ORDER BY s) (#2) AS (#2)] + └── Window + ├── aggregate function: row_number + ├── partition items: [] + ├── order by items: [sum(number) (#1) AS (#1)] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [sum(number) (#1) ASC NULLS LAST] + ├── limit: [NONE] + └── EvalScalar + ├── scalars: [sum(number) (#1) AS (#1)] + └── Aggregate(Initial) + ├── group items: [] + ├── aggregate functions: [sum(t.number (#0)) AS (#1)] + └── EvalScalar + ├── scalars: [t.number (#0) AS (#0)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== window_order_reuses_order_by_udaf_alias === +description: A window ORDER BY clause should be able to reuse a UDAF reached through an ORDER BY alias reference. +sql: SELECT weighted_avg(a, b) AS s, row_number() OVER (ORDER BY s) FROM t ORDER BY s +status: ok +Sort +├── sort keys: [weighted_avg(a, b) (#4) ASC NULLS LAST] +├── limit: [NONE] +└── EvalScalar + ├── scalars: [weighted_avg(a, b) (#4) AS (#4), row_number() OVER (ORDER BY s) (#5) AS (#5)] + └── Window + ├── aggregate function: row_number + ├── partition items: [] + ├── order by items: [weighted_avg(a, b) (#4) AS (#4)] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [weighted_avg(a, b) (#4) ASC NULLS LAST] + ├── limit: [NONE] + └── EvalScalar + ├── scalars: [weighted_avg(a, b) (#4) AS (#4)] + └── Aggregate(Initial) + ├── group items: [] + ├── aggregate functions: [weighted_avg(a, b) AS (#4)] + └── EvalScalar + ├── scalars: [CAST(t.a (#0) AS Int32 NULL) AS (#2), CAST(t.b (#1) AS Int32 NULL) AS (#3)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== window_order_rejects_window_alias_expansion === +description: A window ORDER BY clause must still reject aliases that expand to a prior window expression. +sql: SELECT row_number() OVER () AS rn, row_number() OVER (ORDER BY rn) FROM t +status: error +code: 1065 +message: Window function cannot contain another window function + +=== duplicate_window_expression_reuses_window_binding === +description: Repeated identical window expressions should reuse the registered window binding. +sql: SELECT row_number() OVER (ORDER BY number), row_number() OVER (ORDER BY number) FROM t +status: ok +EvalScalar +├── scalars: [row_number() OVER (ORDER BY number) (#1) AS (#1)] +└── Window + ├── aggregate function: row_number + ├── partition items: [] + ├── order by items: [t.number (#0) AS (#0)] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [t.number (#0) ASC NULLS LAST] + ├── limit: [NONE] + └── EvalScalar + ├── scalars: [t.number (#0) AS (#0)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== laglead_window_from_sqllogictest_binds === +description: A sqllogictest LEAD window pattern should still bind through the lag/lead rewrite path. +sql: SELECT lead(number, 1, 0) OVER (PARTITION BY number % 3 ORDER BY number + 1) FROM t +status: ok +EvalScalar +├── scalars: [lead(number, 1, 0) OVER (PARTITION BY number % 3 ORDER BY number + 1) (#4) AS (#4)] +└── Window + ├── aggregate function: lead + ├── partition items: [modulo(t.number (#0), 3) AS (#2)] + ├── order by items: [plus(t.number (#0), 1) AS (#3)] + ├── frame: [Rows: Following(Some(Number(1_u64))) ~ Following(Some(Number(1_u64)))] + └── Sort + ├── sort keys: [lead_part_0 (#2) ASC NULLS LAST, lead_order_0 (#3) ASC NULLS LAST] + ├── limit: [NONE] + ├── window top: NONE + ├── window function: LagLead(LagLeadFunction { is_lag: false, arg: BoundColumnRef(BoundColumnRef { span: Some(12..18), column: ColumnBinding { database_name: Some("default"), table_name: Some("t"), column_position: Some(1), table_index: Some(0), column_name: "number", column_name_lower: None, index: 0, data_type: Nullable(Number(UInt64)), visibility: Visible, virtual_expr: None, is_srf: false } }), offset: 1, default: Some(BoundColumnRef(BoundColumnRef { span: Some(23..24), column: ColumnBinding { database_name: None, table_name: None, column_position: None, table_index: None, column_name: "lead_default_value", column_name_lower: None, index: 1, data_type: Nullable(Number(UInt64)), visibility: Visible, virtual_expr: None, is_srf: false } })), return_type: Nullable(Number(UInt64)) }) + └── EvalScalar + ├── scalars: [t.number (#0) AS (#0), CAST(0 AS UInt64 NULL) AS (#1), modulo(t.number (#0), 3) AS (#2), plus(t.number (#0), 1) AS (#3)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== nth_value_window_binds === +description: An NTH_VALUE window expression should still bind through the dedicated nth_value rewrite path. +sql: SELECT nth_value(number, 2) OVER (ORDER BY number) FROM t +status: ok +EvalScalar +├── scalars: [nth_value(number, 2) OVER (ORDER BY number) (#1) AS (#1)] +└── Window + ├── aggregate function: nth_value + ├── partition items: [] + ├── order by items: [t.number (#0) AS (#0)] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [t.number (#0) ASC NULLS LAST] + ├── limit: [NONE] + └── EvalScalar + ├── scalars: [t.number (#0) AS (#0), t.number (#0) AS (#0)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== window_partition_rewrites_group_item_expression === +description: A window partition expression over grouped aliases should rewrite non-column group items back to their group-item columns. +sql: SELECT number % 3 AS a, number % 4 AS b, row_number() OVER (PARTITION BY b % 2 ORDER BY a) FROM t GROUP BY a, b +status: ok +EvalScalar +├── scalars: [row_number() OVER (PARTITION BY b % 2 ORDER BY a) (#5) AS (#5), a (#1) AS (#6), b (#2) AS (#7)] +└── Window + ├── aggregate function: row_number + ├── partition items: [modulo(b (#2), 2) AS (#3)] + ├── order by items: [a (#1) AS (#4)] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [row_number_part_0 (#3) ASC NULLS LAST, row_number_order_0 (#4) ASC NULLS LAST] + ├── limit: [NONE] + ├── window top: NONE + ├── window function: RowNumber + └── EvalScalar + ├── scalars: [modulo(b (#2), 2) AS (#3), a (#1) AS (#4)] + └── Aggregate(Initial) + ├── group items: [modulo(t.number (#0), 3) AS (#1), modulo(t.number (#0), 4) AS (#2)] + ├── aggregate functions: [] + └── EvalScalar + ├── scalars: [modulo(t.number (#0), 3) AS (#1), modulo(t.number (#0), 4) AS (#2)] + └── Scan + ├── table: default.t (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== within_group_window_aggregate_binds === +description: A WITHIN GROUP window aggregate should bind its sort descriptors without turning into a grouped aggregate. +sql: SELECT listagg(cast(salary as varchar), '|') WITHIN GROUP (ORDER BY empno DESC) OVER (PARTITION BY depname ORDER BY empno) FROM empsalary +status: ok +EvalScalar +├── scalars: [listagg(CAST(salary AS STRING), '|') WITHIN GROUP ( ORDER BY empno DESC ) OVER (PARTITION BY depname ORDER BY empno) (#4) AS (#4)] +└── Window + ├── aggregate function: listagg + ├── partition items: [empsalary.depname (#0) AS (#0)] + ├── order by items: [empsalary.empno (#1) AS (#1)] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [empsalary.depname (#0) ASC NULLS LAST, empsalary.empno (#1) ASC NULLS LAST] + ├── limit: [NONE] + ├── window top: NONE + ├── window function: Aggregate(AggregateFunction { span: Some(7..122), func_name: "listagg", distinct: false, params: [String("|")], args: [BoundColumnRef(BoundColumnRef { span: Some(20..26), column: ColumnBinding { database_name: None, table_name: None, column_position: None, table_index: None, column_name: "listagg_arg_0", column_name_lower: None, index: 3, data_type: Nullable(String), visibility: Visible, virtual_expr: None, is_srf: false } })], return_type: Nullable(String), sort_descs: [AggregateFunctionScalarSortDesc { expr: BoundColumnRef(BoundColumnRef { span: Some(68..73), column: ColumnBinding { database_name: Some("default"), table_name: Some("empsalary"), column_position: Some(2), table_index: Some(0), column_name: "empno", column_name_lower: None, index: 1, data_type: Nullable(Number(UInt64)), visibility: Visible, virtual_expr: None, is_srf: false } }), is_reuse_index: false, nulls_first: false, asc: false }], display_name: "listagg(CAST(salary AS STRING), '|') WITHIN GROUP ( ORDER BY empno DESC ) OVER (PARTITION BY depname ORDER BY empno)" }) + └── EvalScalar + ├── scalars: [empsalary.depname (#0) AS (#0), empsalary.empno (#1) AS (#1), empsalary.empno (#1) AS (#1), CAST(empsalary.salary (#2) AS String NULL) AS (#3)] + └── Scan + ├── table: default.empsalary (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + diff --git a/src/query/sql/tests/it/semantic/binder_window_named.txt b/src/query/sql/tests/it/semantic/binder_window_named.txt new file mode 100644 index 0000000000000..8eecd30f23946 --- /dev/null +++ b/src/query/sql/tests/it/semantic/binder_window_named.txt @@ -0,0 +1,138 @@ +=== named_window_from_sqllogictest_binds === +description: A named WINDOW clause from sqllogictests should bind as a normal window specification. +sql: SELECT depname, empno, salary, sum(salary) OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY empno) +status: ok +EvalScalar +├── scalars: [empsalary.depname (#0) AS (#0), empsalary.empno (#1) AS (#1), empsalary.salary (#2) AS (#2), sum(salary) OVER w (#3) AS (#3)] +└── Window + ├── aggregate function: sum + ├── partition items: [empsalary.depname (#0) AS (#0)] + ├── order by items: [empsalary.empno (#1) AS (#1)] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [empsalary.depname (#0) ASC NULLS LAST, empsalary.empno (#1) ASC NULLS LAST] + ├── limit: [NONE] + ├── window top: NONE + ├── window function: Aggregate(AggregateFunction { span: Some(31..49), func_name: "sum", distinct: false, params: [], args: [BoundColumnRef(BoundColumnRef { span: Some(35..41), column: ColumnBinding { database_name: Some("default"), table_name: Some("empsalary"), column_position: Some(3), table_index: Some(0), column_name: "salary", column_name_lower: None, index: 2, data_type: Nullable(Number(UInt64)), visibility: Visible, virtual_expr: None, is_srf: false } })], return_type: Nullable(Number(UInt64)), sort_descs: [], display_name: "sum(salary) OVER w" }) + └── EvalScalar + ├── scalars: [empsalary.depname (#0) AS (#0), empsalary.empno (#1) AS (#1), empsalary.salary (#2) AS (#2)] + └── Scan + ├── table: default.empsalary (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== named_window_aggregate_order_by_existing_group_aggregate_binds === +description: A grouped query should be able to introduce an aggregate inside a named window clause and reuse it across the window aggregate and ORDER BY. +sql: SELECT depname, sum(sum(salary)) OVER w FROM empsalary GROUP BY depname WINDOW w AS (PARTITION BY 1 ORDER BY sum(salary)) +status: ok +EvalScalar +├── scalars: [empsalary.depname (#0) AS (#0), sum(sum(salary)) OVER w (#4) AS (#4)] +└── Window + ├── aggregate function: sum + ├── partition items: [1 AS (#3)] + ├── order by items: [sum(salary) (#2) AS (#2)] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [sum_part_0 (#3) ASC NULLS LAST, sum(salary) (#2) ASC NULLS LAST] + ├── limit: [NONE] + ├── window top: NONE + ├── window function: Aggregate(AggregateFunction { span: Some(16..39), func_name: "sum", distinct: false, params: [], args: [BoundColumnRef(BoundColumnRef { span: None, column: ColumnBinding { database_name: None, table_name: None, column_position: None, table_index: None, column_name: "sum(salary)", column_name_lower: None, index: 2, data_type: Nullable(Number(UInt64)), visibility: Visible, virtual_expr: None, is_srf: false } })], return_type: Nullable(Number(UInt64)), sort_descs: [], display_name: "sum(sum(salary)) OVER w" }) + └── EvalScalar + ├── scalars: [sum(salary) (#2) AS (#2), sum(salary) (#2) AS (#2), 1 AS (#3)] + └── Aggregate(Initial) + ├── group items: [empsalary.depname (#0) AS (#0)] + ├── aggregate functions: [sum(empsalary.salary (#1)) AS (#2)] + └── EvalScalar + ├── scalars: [empsalary.depname (#0) AS (#0), empsalary.salary (#1) AS (#1)] + └── Scan + ├── table: default.empsalary (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== inherited_named_window_from_sqllogictest_binds === +description: An inherited named WINDOW specification should bind without losing the base partition spec. +sql: SELECT rank() OVER w1, dense_rank() OVER w2 FROM empsalary WINDOW w1 AS (PARTITION BY depname), w2 AS (w1 ORDER BY salary DESC) +status: ok +EvalScalar +├── scalars: [rank() OVER w1 (#2) AS (#2), dense_rank() OVER w2 (#3) AS (#3)] +└── Window + ├── aggregate function: rank + ├── partition items: [empsalary.depname (#0) AS (#0)] + ├── order by items: [] + ├── frame: [Range: Preceding(None) ~ Following(None)] + └── Sort + ├── sort keys: [empsalary.depname (#0) ASC NULLS LAST] + ├── limit: [NONE] + ├── window top: NONE + ├── window function: Rank + └── EvalScalar + ├── scalars: [empsalary.depname (#0) AS (#0)] + └── Window + ├── aggregate function: dense_rank + ├── partition items: [empsalary.depname (#0) AS (#0)] + ├── order by items: [empsalary.salary (#1) AS (#1) DESC] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [empsalary.depname (#0) ASC NULLS LAST, empsalary.salary (#1) DESC NULLS LAST] + ├── limit: [NONE] + ├── window top: NONE + ├── window function: DenseRank + └── EvalScalar + ├── scalars: [empsalary.depname (#0) AS (#0), empsalary.salary (#1) AS (#1)] + └── Scan + ├── table: default.empsalary (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== recursive_named_window_chain_binds === +description: A recursive chain of named WINDOW references should resolve inherited partition and order specs. +sql: SELECT rank() OVER w3 FROM empsalary WINDOW w1 AS (PARTITION BY depname ORDER BY salary), w2 AS (w1), w3 AS (w2) +status: ok +EvalScalar +├── scalars: [rank() OVER w3 (#2) AS (#2)] +└── Window + ├── aggregate function: rank + ├── partition items: [empsalary.depname (#0) AS (#0)] + ├── order by items: [empsalary.salary (#1) AS (#1)] + ├── frame: [Range: Preceding(None) ~ CurrentRow] + └── Sort + ├── sort keys: [empsalary.depname (#0) ASC NULLS LAST, empsalary.salary (#1) ASC NULLS LAST] + ├── limit: [NONE] + ├── window top: NONE + ├── window function: Rank + └── EvalScalar + ├── scalars: [empsalary.depname (#0) AS (#0), empsalary.salary (#1) AS (#1)] + └── Scan + ├── table: default.empsalary (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE + + +=== inherited_named_window_rejects_partition_override === +description: Referencing a named window must not add a new PARTITION BY clause. +sql: SELECT rank() OVER w2 FROM empsalary WINDOW w1 AS (ORDER BY salary), w2 AS (w1 PARTITION BY depname) +status: error +code: 1065 +message: WINDOW specification with named WINDOW reference cannot specify PARTITION BY + +=== inherited_named_window_rejects_duplicate_order_by === +description: Referencing a named window with ORDER BY must not specify another ORDER BY. +sql: SELECT rank() OVER w2 FROM empsalary WINDOW w1 AS (ORDER BY salary), w2 AS (w1 ORDER BY depname) +status: error +code: 1065 +message: Cannot specify ORDER BY if referenced named WINDOW specifies ORDER BY + +=== inherited_named_window_rejects_base_frame === +description: Referencing a named window that already contains a frame specification must be rejected. +sql: SELECT sum(salary) OVER w2 FROM empsalary WINDOW w1 AS (ORDER BY salary ROWS CURRENT ROW), w2 AS (w1) +status: error +code: 1065 +message: Cannot reference named WINDOW containing frame specification + diff --git a/tests/sqllogictests/suites/mode/standalone/explain/aggregate.test b/tests/sqllogictests/suites/mode/standalone/explain/aggregate.test index c7efd88ec0285..23f0c1b00938a 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/aggregate.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/aggregate.test @@ -193,15 +193,15 @@ query T explain select a, avg(b) from explain_agg_t1 group by a having a > 1 and max(b) > 10; ---- EvalScalar -├── output columns: [explain_agg_t1.a (#0), sum(b) / if(count(b) = 0, 1, count(b)) (#4)] +├── output columns: [explain_agg_t1.a (#0), sum(b) / if(count(b) = 0, 1, count(b)) (#5)] ├── expressions: [sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL)] ├── estimated rows: 0.50 └── Filter ├── output columns: [sum(b) (#2), count(b) (#3), explain_agg_t1.a (#0)] - ├── filters: [is_true(max(b) (#5) > 10)] + ├── filters: [is_true(max(b) (#4) > 10)] ├── estimated rows: 0.50 └── AggregateFinal - ├── output columns: [sum(b) (#2), count(b) (#3), max(b) (#5), explain_agg_t1.a (#0)] + ├── output columns: [sum(b) (#2), count(b) (#3), max(b) (#4), explain_agg_t1.a (#0)] ├── group by: [a] ├── aggregate functions: [sum(b), count(), max(b)] ├── estimated rows: 1.00 @@ -224,12 +224,12 @@ query T explain select avg(b) from explain_agg_t1 group by a order by avg(b); ---- Sort(Single) -├── output columns: [sum(b) / if(count(b) = 0, 1, count(b)) (#4), sum(b) / if(count(b) = 0, 1, count(b)) (#5)] +├── output columns: [sum(b) / if(count(b) = 0, 1, count(b)) (#4)] ├── sort keys: [sum(b) / if(count(b) = 0, 1, count(b)) ASC NULLS LAST] ├── estimated rows: 1.00 └── EvalScalar - ├── output columns: [sum(b) / if(count(b) = 0, 1, count(b)) (#4), sum(b) / if(count(b) = 0, 1, count(b)) (#5)] - ├── expressions: [sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL), sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL)] + ├── output columns: [sum(b) / if(count(b) = 0, 1, count(b)) (#4)] + ├── expressions: [sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL)] ├── estimated rows: 1.00 └── AggregateFinal ├── output columns: [sum(b) (#2), count(b) (#3), explain_agg_t1.a (#0)] @@ -287,12 +287,12 @@ query T explain select avg(b), avg(b) + 1 from explain_agg_t1 group by a order by avg(b); ---- Sort(Single) -├── output columns: [sum(b) / if(count(b) = 0, 1, count(b)) (#4), sum(b) / if(count(b) = 0, 1, count(b)) + 1 (#5), sum(b) / if(count(b) = 0, 1, count(b)) (#6)] +├── output columns: [sum(b) / if(count(b) = 0, 1, count(b)) (#4), sum(b) / if(count(b) = 0, 1, count(b)) + 1 (#5)] ├── sort keys: [sum(b) / if(count(b) = 0, 1, count(b)) ASC NULLS LAST] ├── estimated rows: 1.00 └── EvalScalar - ├── output columns: [sum(b) / if(count(b) = 0, 1, count(b)) (#4), sum(b) / if(count(b) = 0, 1, count(b)) + 1 (#5), sum(b) / if(count(b) = 0, 1, count(b)) (#6)] - ├── expressions: [sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL), sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL) + 1, sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL)] + ├── output columns: [sum(b) / if(count(b) = 0, 1, count(b)) (#4), sum(b) / if(count(b) = 0, 1, count(b)) + 1 (#5)] + ├── expressions: [sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL), sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL) + 1] ├── estimated rows: 1.00 └── AggregateFinal ├── output columns: [sum(b) (#2), count(b) (#3), explain_agg_t1.a (#0)] @@ -318,12 +318,12 @@ query T explain select avg(b) + 1, avg(b) from explain_agg_t1 group by a order by avg(b); ---- Sort(Single) -├── output columns: [sum(b) / if(count(b) = 0, 1, count(b)) + 1 (#4), sum(b) / if(count(b) = 0, 1, count(b)) (#5), sum(b) / if(count(b) = 0, 1, count(b)) (#6)] +├── output columns: [sum(b) / if(count(b) = 0, 1, count(b)) + 1 (#4), sum(b) / if(count(b) = 0, 1, count(b)) (#5)] ├── sort keys: [sum(b) / if(count(b) = 0, 1, count(b)) ASC NULLS LAST] ├── estimated rows: 1.00 └── EvalScalar - ├── output columns: [sum(b) / if(count(b) = 0, 1, count(b)) + 1 (#4), sum(b) / if(count(b) = 0, 1, count(b)) (#5), sum(b) / if(count(b) = 0, 1, count(b)) (#6)] - ├── expressions: [sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL) + 1, sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL), sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL)] + ├── output columns: [sum(b) / if(count(b) = 0, 1, count(b)) + 1 (#4), sum(b) / if(count(b) = 0, 1, count(b)) (#5)] + ├── expressions: [sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL) + 1, sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL)] ├── estimated rows: 1.00 └── AggregateFinal ├── output columns: [sum(b) (#2), count(b) (#3), explain_agg_t1.a (#0)] @@ -349,12 +349,12 @@ query T explain select avg(b), avg(b) + 1 from explain_agg_t1 group by a order by avg(b) + 1; ---- Sort(Single) -├── output columns: [sum(b) / if(count(b) = 0, 1, count(b)) (#4), sum(b) / if(count(b) = 0, 1, count(b)) + 1 (#5), sum(b) / if(count(b) = 0, 1, count(b)) + 1 (#6)] +├── output columns: [sum(b) / if(count(b) = 0, 1, count(b)) (#4), sum(b) / if(count(b) = 0, 1, count(b)) + 1 (#5)] ├── sort keys: [sum(b) / if(count(b) = 0, 1, count(b)) + 1 ASC NULLS LAST] ├── estimated rows: 1.00 └── EvalScalar - ├── output columns: [sum(b) / if(count(b) = 0, 1, count(b)) (#4), sum(b) / if(count(b) = 0, 1, count(b)) + 1 (#5), sum(b) / if(count(b) = 0, 1, count(b)) + 1 (#6)] - ├── expressions: [sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL), sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL) + 1, sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL) + 1] + ├── output columns: [sum(b) / if(count(b) = 0, 1, count(b)) (#4), sum(b) / if(count(b) = 0, 1, count(b)) + 1 (#5)] + ├── expressions: [sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL), sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL) + 1] ├── estimated rows: 1.00 └── AggregateFinal ├── output columns: [sum(b) (#2), count(b) (#3), explain_agg_t1.a (#0)] diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/aggregate.test b/tests/sqllogictests/suites/mode/standalone/explain_native/aggregate.test index 1c6c24d59fe79..d35e53fdec794 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/aggregate.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/aggregate.test @@ -161,15 +161,15 @@ query T explain select a, avg(b) from explain_agg_t1 group by a having a > 1 and max(b) > 10; ---- EvalScalar -├── output columns: [explain_agg_t1.a (#0), sum(b) / if(count(b) = 0, 1, count(b)) (#4)] +├── output columns: [explain_agg_t1.a (#0), sum(b) / if(count(b) = 0, 1, count(b)) (#5)] ├── expressions: [sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL)] ├── estimated rows: 0.50 └── Filter ├── output columns: [sum(b) (#2), count(b) (#3), explain_agg_t1.a (#0)] - ├── filters: [is_true(max(b) (#5) > 10)] + ├── filters: [is_true(max(b) (#4) > 10)] ├── estimated rows: 0.50 └── AggregateFinal - ├── output columns: [sum(b) (#2), count(b) (#3), max(b) (#5), explain_agg_t1.a (#0)] + ├── output columns: [sum(b) (#2), count(b) (#3), max(b) (#4), explain_agg_t1.a (#0)] ├── group by: [a] ├── aggregate functions: [sum(b), count(), max(b)] ├── estimated rows: 1.00 @@ -192,15 +192,15 @@ query T explain select a, avg(b) from explain_agg_t1 group by a having a > 1 and max(b) > 10; ---- EvalScalar -├── output columns: [explain_agg_t1.a (#0), sum(b) / if(count(b) = 0, 1, count(b)) (#4)] +├── output columns: [explain_agg_t1.a (#0), sum(b) / if(count(b) = 0, 1, count(b)) (#5)] ├── expressions: [sum(b) (#2) / CAST(if(CAST(count(b) (#3) = 0 AS Boolean NULL), 1, count(b) (#3)) AS UInt64 NULL)] ├── estimated rows: 0.50 └── Filter ├── output columns: [sum(b) (#2), count(b) (#3), explain_agg_t1.a (#0)] - ├── filters: [is_true(max(b) (#5) > 10)] + ├── filters: [is_true(max(b) (#4) > 10)] ├── estimated rows: 0.50 └── AggregateFinal - ├── output columns: [sum(b) (#2), count(b) (#3), max(b) (#5), explain_agg_t1.a (#0)] + ├── output columns: [sum(b) (#2), count(b) (#3), max(b) (#4), explain_agg_t1.a (#0)] ├── group by: [a] ├── aggregate functions: [sum(b), count(), max(b)] ├── estimated rows: 1.00