fix: preserve duplicate GROUPING SETS rows

xiedeyantu · xiedeyantu · commit 3bdd9e71cc9d · 2026-04-02T23:14:14.000+08:00
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
@@ -45,7 +45,7 @@ use crate::utils::{
     grouping_set_expr_count, grouping_set_to_exprlist, split_conjunction,
 };
 use crate::{
-    BinaryExpr, CreateMemoryTable, CreateView, Execute, Expr, ExprSchemable,
+    BinaryExpr, CreateMemoryTable, CreateView, Execute, Expr, ExprSchemable, GroupingSet,
     LogicalPlanBuilder, Operator, Prepare, TableProviderFilterPushDown, TableSource,
     WindowFunctionDefinition, build_join_schema, expr_vec_fmt, requalify_sides_if_needed,
 };
@@ -3519,11 +3519,12 @@ impl Aggregate {
                 .into_iter()
                 .map(|(q, f)| (q, f.as_ref().clone().with_nullable(true).into()))
                 .collect::<Vec<_>>();
+            let max_ordinal = max_grouping_set_duplicate_ordinal(&group_expr);
             qualified_fields.push((
                 None,
                 Field::new(
                     Self::INTERNAL_GROUPING_ID,
-                    Self::grouping_id_type(qualified_fields.len()),
+                    Self::grouping_id_type(qualified_fields.len(), max_ordinal),
                     false,
                 )
                 .into(),
@@ -3609,15 +3610,25 @@ impl Aggregate {
     }
 
     /// Returns the data type of the grouping id.
-    /// The grouping ID value is a bitmask where each set bit
-    /// indicates that the corresponding grouping expression is
-    /// null
-    pub fn grouping_id_type(group_exprs: usize) -> DataType {
-        if group_exprs <= 8 {
+    ///
+    /// The grouping ID packs two pieces of information into a single integer:
+    /// - The low `group_exprs` bits are the semantic bitmask (a set bit means the
+    ///   corresponding grouping expression is NULL for this grouping set).
+    /// - The bits above position `group_exprs` encode a duplicate ordinal that
+    ///   distinguishes multiple occurrences of the same grouping set pattern.
+    ///
+    /// `max_ordinal` is the highest ordinal value that will appear (0 when there
+    /// are no duplicate grouping sets).  The type is chosen to be the smallest
+    /// unsigned integer that can represent both parts.
+    pub fn grouping_id_type(group_exprs: usize, max_ordinal: usize) -> DataType {
+        let ordinal_bits =
+            usize::BITS as usize - max_ordinal.leading_zeros() as usize;
+        let total_bits = group_exprs + ordinal_bits;
+        if total_bits <= 8 {
             DataType::UInt8
-        } else if group_exprs <= 16 {
+        } else if total_bits <= 16 {
             DataType::UInt16
-        } else if group_exprs <= 32 {
+        } else if total_bits <= 32 {
             DataType::UInt32
         } else {
             DataType::UInt64
@@ -3626,21 +3637,36 @@ impl Aggregate {
 
     /// Internal column used when the aggregation is a grouping set.
     ///
-    /// This column contains a bitmask where each bit represents a grouping
-    /// expression. The least significant bit corresponds to the rightmost
-    /// grouping expression. A bit value of 0 indicates that the corresponding
-    /// column is included in the grouping set, while a value of 1 means it is excluded.
+    /// This column packs two values into a single unsigned integer:
+    ///
+    /// - **Low bits (positions 0 .. n-1)**: a semantic bitmask where each bit
+    ///   represents one of the `n` grouping expressions.  The least significant
+    ///   bit corresponds to the rightmost grouping expression.  A `1` bit means
+    ///   the corresponding column is replaced with `NULL` for this grouping set;
+    ///   a `0` bit means it is included.
+    /// - **High bits (positions n and above)**: a *duplicate ordinal* that
+    ///   distinguishes multiple occurrences of the same semantic grouping set
+    ///   pattern within a single query.  The ordinal is `0` for the first
+    ///   occurrence, `1` for the second, and so on.
     ///
-    /// For example, for the grouping expressions CUBE(a, b), the grouping ID
-    /// column will have the following values:
+    /// The integer type is chosen by [`Self::grouping_id_type`] to be the
+    /// smallest `UInt8 / UInt16 / UInt32 / UInt64` that can represent both
+    /// parts.
+    ///
+    /// For example, for the grouping expressions CUBE(a, b) (no duplicates),
+    /// the grouping ID column will have the following values:
     ///     0b00: Both `a` and `b` are included
     ///     0b01: `b` is excluded
     ///     0b10: `a` is excluded
     ///     0b11: Both `a` and `b` are excluded
     ///
-    /// This internal column is necessary because excluded columns are replaced
-    /// with `NULL` values. To handle these cases correctly, we must distinguish
-    /// between an actual `NULL` value in a column and a column being excluded from the set.
+    /// When the same set appears twice and `n = 2`, the duplicate ordinal is
+    /// packed into bit 2:
+    ///     first occurrence:  `0b0_01` (ordinal = 0, mask = 0b01)
+    ///     second occurrence: `0b1_01` (ordinal = 1, mask = 0b01)
+    ///
+    /// The GROUPING function always masks the value with `(1 << n) - 1` before
+    /// interpreting it so the ordinal bits are invisible to user-facing SQL.
     pub const INTERNAL_GROUPING_ID: &'static str = "__grouping_id";
 }
 
@@ -3661,6 +3687,30 @@ impl PartialOrd for Aggregate {
     }
 }
 
+/// Returns the highest duplicate ordinal across all grouping sets in `group_expr`.
+///
+/// The ordinal counts how many times a given grouping set pattern has already
+/// appeared before the current occurrence.  For example, if the same set
+/// appears three times the ordinals are 0, 1, 2 and this function returns 2.
+/// Returns 0 when no grouping set is duplicated.
+fn max_grouping_set_duplicate_ordinal(group_expr: &[Expr]) -> usize {
+    if let Some(Expr::GroupingSet(GroupingSet::GroupingSets(sets))) = group_expr.first() {
+        let mut counts: HashMap<&Vec<Expr>, usize> =
+            HashMap::new();
+        for set in sets {
+            *counts.entry(set).or_insert(0) += 1;
+        }
+        counts
+            .values()
+            .copied()
+            .max()
+            .unwrap_or(1)
+            .saturating_sub(1)
+    } else {
+        0
+    }
+}
+
 /// Checks whether any expression in `group_expr` contains `Expr::GroupingSet`.
 fn contains_grouping_set(group_expr: &[Expr]) -> bool {
     group_expr
diff --git a/datafusion/optimizer/src/analyzer/resolve_grouping_function.rs b/datafusion/optimizer/src/analyzer/resolve_grouping_function.rs
@@ -204,17 +204,25 @@ fn grouping_function_on_id(
             Expr::Literal(ScalarValue::from(value as u64), None)
         }
     };
-
     let grouping_id_column = Expr::Column(Column::from(Aggregate::INTERNAL_GROUPING_ID));
-    // The grouping call is exactly our internal grouping id
+    // The grouping call is exactly our internal grouping id — mask the ordinal
+    // bits (above position `n`) so only the semantic bitmask is visible.
+    let n = group_by_expr_count;
+    // (1 << n) - 1 masks the low n bits.  Use saturating arithmetic to handle n == 0.
+    let semantic_mask: u64 = if n >= 64 {
+        u64::MAX
+    } else {
+        (1u64 << n).wrapping_sub(1)
+    };
+    let masked_id = bitwise_and(grouping_id_column.clone(), literal(semantic_mask as usize));
     if args.len() == group_by_expr_count
         && args
             .iter()
             .rev()
             .enumerate()
             .all(|(idx, expr)| group_by_expr.get(expr) == Some(&idx))
     {
-        return Ok(cast(grouping_id_column, DataType::Int32));
+        return Ok(cast(masked_id, DataType::Int32));
     }
 
     args.iter()
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -38,7 +38,7 @@ use crate::{
 use datafusion_common::config::ConfigOptions;
 use datafusion_physical_expr::utils::collect_columns;
 use parking_lot::Mutex;
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
 
 use arrow::array::{ArrayRef, UInt8Array, UInt16Array, UInt32Array, UInt64Array};
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
@@ -397,6 +397,14 @@ impl PhysicalGroupBy {
         self.expr.len() + usize::from(self.has_grouping_set)
     }
 
+    /// Returns the Arrow data type of the `__grouping_id` column.
+    ///
+    /// The type is chosen to be wide enough to hold both the semantic bitmask
+    /// (in the low `n` bits) and the duplicate ordinal (in the high bits).
+    fn grouping_id_data_type(&self) -> DataType {
+        Aggregate::grouping_id_type(self.expr.len(), max_duplicate_ordinal(&self.groups))
+    }
+
     pub fn group_schema(&self, schema: &Schema) -> Result<SchemaRef> {
         Ok(Arc::new(Schema::new(self.group_fields(schema)?)))
     }
@@ -421,7 +429,7 @@ impl PhysicalGroupBy {
             fields.push(
                 Field::new(
                     Aggregate::INTERNAL_GROUPING_ID,
-                    Aggregate::grouping_id_type(self.expr.len()),
+                    self.grouping_id_data_type(),
                     false,
                 )
                 .into(),
@@ -1937,27 +1945,77 @@ fn evaluate_optional(
         .collect()
 }
 
-fn group_id_array(group: &[bool], batch: &RecordBatch) -> Result<ArrayRef> {
-    if group.len() > 64 {
+/// Builds the internal `__grouping_id` array for a single grouping set.
+///
+/// The returned array packs two values into a single integer:
+///
+/// - Low `n` bits (positions 0 .. n-1): the semantic bitmask.  A `1` bit
+///   at position `i` means that the `i`-th grouping column (counting from the
+///   least significant bit, i.e. the *last* column in the `group` slice) is
+///   `NULL` for this grouping set.
+/// - High bits (positions n and above): the duplicate `ordinal`, which
+///   distinguishes multiple occurrences of the same grouping-set pattern.  The
+///   ordinal is `0` for the first occurrence, `1` for the second, and so on.
+///
+/// The integer type is chosen to be the smallest `UInt8 / UInt16 / UInt32 /
+/// UInt64` that can represent both parts.  It matches the type returned by
+/// [`Aggregate::grouping_id_type`].
+fn group_id_array(
+    group: &[bool],
+    ordinal: usize,
+    max_ordinal: usize,
+    batch: &RecordBatch,
+) -> Result<ArrayRef> {
+    let n = group.len();
+    if n > 64 {
         return not_impl_err!(
             "Grouping sets with more than 64 columns are not supported"
         );
     }
-    let group_id = group.iter().fold(0u64, |acc, &is_null| {
+    let ordinal_bits =
+        usize::BITS as usize - max_ordinal.leading_zeros() as usize;
+    let total_bits = n + ordinal_bits;
+    if total_bits > 64 {
+        return not_impl_err!(
+            "Grouping sets with {n} columns and a maximum duplicate ordinal of \
+             {max_ordinal} require {total_bits} bits, which exceeds 64"
+        );
+    }
+    let semantic_id = group.iter().fold(0u64, |acc, &is_null| {
         (acc << 1) | if is_null { 1 } else { 0 }
     });
+    let full_id = semantic_id | ((ordinal as u64) << n);
     let num_rows = batch.num_rows();
-    if group.len() <= 8 {
-        Ok(Arc::new(UInt8Array::from(vec![group_id as u8; num_rows])))
-    } else if group.len() <= 16 {
-        Ok(Arc::new(UInt16Array::from(vec![group_id as u16; num_rows])))
-    } else if group.len() <= 32 {
-        Ok(Arc::new(UInt32Array::from(vec![group_id as u32; num_rows])))
+    if total_bits <= 8 {
+        Ok(Arc::new(UInt8Array::from(vec![full_id as u8; num_rows])))
+    } else if total_bits <= 16 {
+        Ok(Arc::new(UInt16Array::from(vec![full_id as u16; num_rows])))
+    } else if total_bits <= 32 {
+        Ok(Arc::new(UInt32Array::from(vec![full_id as u32; num_rows])))
     } else {
-        Ok(Arc::new(UInt64Array::from(vec![group_id; num_rows])))
+        Ok(Arc::new(UInt64Array::from(vec![full_id; num_rows])))
     }
 }
 
+/// Returns the highest duplicate ordinal across all grouping sets.
+///
+/// The ordinal counts how many times a grouping-set pattern has already
+/// appeared before the current occurrence.  If the same `Vec<bool>` appears
+/// three times the ordinals are 0, 1, 2 and this function returns 2.
+/// Returns 0 when no grouping set is duplicated.
+fn max_duplicate_ordinal(groups: &[Vec<bool>]) -> usize {
+    let mut counts: HashMap<&Vec<bool>, usize> = HashMap::new();
+    for group in groups {
+        *counts.entry(group).or_insert(0) += 1;
+    }
+    counts
+        .values()
+        .copied()
+        .max()
+        .unwrap_or(1)
+        .saturating_sub(1)
+}
+
 /// Evaluate a group by expression against a `RecordBatch`
 ///
 /// Arguments:
@@ -1972,6 +2030,8 @@ pub fn evaluate_group_by(
     group_by: &PhysicalGroupBy,
     batch: &RecordBatch,
 ) -> Result<Vec<Vec<ArrayRef>>> {
+    let max_ordinal = max_duplicate_ordinal(&group_by.groups);
+    let mut ordinal_per_pattern: HashMap<&Vec<bool>, usize> = HashMap::new();
     let exprs = evaluate_expressions_to_arrays(
         group_by.expr.iter().map(|(expr, _)| expr),
         batch,
@@ -1985,6 +2045,10 @@ pub fn evaluate_group_by(
         .groups
         .iter()
         .map(|group| {
+            let ordinal = ordinal_per_pattern.entry(group).or_insert(0);
+            let current_ordinal = *ordinal;
+            *ordinal += 1;
+
             let mut group_values = Vec::with_capacity(group_by.num_group_exprs());
             group_values.extend(group.iter().enumerate().map(|(idx, is_null)| {
                 if *is_null {
@@ -1994,7 +2058,7 @@ pub fn evaluate_group_by(
                 }
             }));
             if !group_by.is_single() {
-                group_values.push(group_id_array(group, batch)?);
+                group_values.push(group_id_array(group, current_ordinal, max_ordinal, batch)?);
             }
             Ok(group_values)
         })
diff --git a/datafusion/sql/src/unparser/utils.rs b/datafusion/sql/src/unparser/utils.rs
@@ -247,7 +247,14 @@ fn find_agg_expr<'a>(agg: &'a Aggregate, column: &Column) -> Result<Option<&'a E
                     )
                 }
                 Ordering::Greater => {
-                    Ok(agg.aggr_expr.get(index - grouping_expr.len() - 1))
+                    if index < grouping_expr.len() + 1 {
+                        return internal_err!(
+                            "Tried to unproject column referring to internal grouping column"
+                        );
+                    }
+                    Ok(agg
+                        .aggr_expr
+                        .get(index - grouping_expr.len() - 1))
                 }
             }
         } else {
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
@@ -5203,6 +5203,41 @@ NULL NULL 1
 statement ok
 drop table t;
 
+# regression: duplicate grouping sets must not be collapsed into one
+statement ok
+create table duplicate_grouping_sets(deptno int, job varchar, sal int, comm int) as values
+(10, 'CLERK', 1300, null),
+(20, 'MANAGER', 3000, null);
+
+query ITIIIII
+select deptno, job, sal, sum(comm), grouping(deptno), grouping(job), grouping(sal)
+from duplicate_grouping_sets
+group by grouping sets ((deptno, job), (deptno, sal), (deptno, job))
+order by deptno, job, sal, grouping(deptno), grouping(job), grouping(sal);
+----
+10 CLERK NULL NULL 0 0 1
+10 CLERK NULL NULL 0 0 1
+10 NULL 1300 NULL 0 1 0
+20 MANAGER NULL NULL 0 0 1
+20 MANAGER NULL NULL 0 0 1
+20 NULL 3000 NULL 0 1 0
+
+query ITII
+select deptno, job, sal, grouping(deptno, job, sal)
+from duplicate_grouping_sets
+group by grouping sets ((deptno, job), (deptno, sal), (deptno, job))
+order by deptno, job, sal, grouping(deptno, job, sal);
+----
+10 CLERK NULL 1
+10 CLERK NULL 1
+10 NULL 1300 2
+20 MANAGER NULL 1
+20 MANAGER NULL 1
+20 NULL 3000 2
+
+statement ok
+drop table duplicate_grouping_sets;
+
 # test multi group by for binary type without nulls
 statement ok
 create table t(a int, b bytea) as values (1, 0xa), (1, 0xa), (2, 0xb), (3, 0xb), (3, 0xb);

Original file line number	Diff line number	Diff line change
`@@ -247,7 +247,14 @@ fn find_agg_expr<'a>(agg: &'a Aggregate, column: &Column) -> Result<Option<&'a E`
`247`	`247`	`)`
`248`	`248`	`}`
`249`	`249`	`Ordering::Greater => {`
`250`		`- Ok(agg.aggr_expr.get(index - grouping_expr.len() - 1))`
	`250`	`+ if index < grouping_expr.len() + 1 {`
	`251`	`+ return internal_err!(`
	`252`	`+ "Tried to unproject column referring to internal grouping column"`
	`253`	`+ );`
	`254`	`+ }`
	`255`	`+ Ok(agg`
	`256`	`+ .aggr_expr`
	`257`	`+ .get(index - grouping_expr.len() - 1))`
`251`	`258`	`}`
`252`	`259`	`}`
`253`	`260`	`} else {`