Skip to content

Commit c8a6025

Browse files
Ajit Pratap Singhclaude
authored andcommitted
feat: implement GROUPING SETS, ROLLUP, CUBE support (SQL-99 T431)
Implement SQL-99 advanced grouping operations for aggregate queries: ## New Features - ROLLUP(col1, col2, ...) - hierarchical subtotals - CUBE(col1, col2, ...) - all possible subtotal combinations - GROUPING SETS((a,b), (a), ()) - explicit grouping set specification ## Implementation Details ### AST Nodes (ast.go) - RollupExpression: stores column list for ROLLUP operation - CubeExpression: stores column list for CUBE operation - GroupingSetsExpression: stores list of grouping sets (including empty sets) ### Parser (parser.go) - parseGroupingExpressionList(): shared helper for ROLLUP/CUBE parsing - parseRollup(): parses ROLLUP(columns) with validation - parseCube(): parses CUBE(columns) with validation - parseGroupingSets(): parses GROUPING SETS with nested sets support - Updated parseGroupByClause() to detect and route to correct parser ### Tokenizer (tokenizer.go) - Added ROLLUP, CUBE, GROUPING, SETS as keyword token types - Added "GROUPING SETS" compound keyword support ### Keywords (keywords.go, categories.go) - Registered keywords in ADDITIONAL_KEYWORDS - Added to DMLKeywords and CompoundKeywords maps ## Validation - Empty ROLLUP() returns error: "ROLLUP requires at least one expression" - Empty CUBE() returns error: "CUBE requires at least one expression" - Empty set in GROUPING SETS(()) is valid (SQL-99 compliant for grand total) ## Tests - 7 formal test cases in parser_coverage_test.go - Tests cover valid syntax, empty validation, and mixed operations Closes #67 (Phase 1: GROUPING SETS, ROLLUP, CUBE) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent a50a055 commit c8a6025

6 files changed

Lines changed: 405 additions & 16 deletions

File tree

pkg/sql/ast/ast.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,49 @@ func nodifyExpressions(exprs []Expression) []Node {
230230
return nodes
231231
}
232232

233+
// RollupExpression represents ROLLUP(col1, col2, ...) in GROUP BY clause
234+
// ROLLUP generates hierarchical grouping sets from right to left
235+
// Example: ROLLUP(a, b, c) generates grouping sets:
236+
//
237+
// (a, b, c), (a, b), (a), ()
238+
type RollupExpression struct {
239+
Expressions []Expression
240+
}
241+
242+
func (r *RollupExpression) expressionNode() {}
243+
func (r RollupExpression) TokenLiteral() string { return "ROLLUP" }
244+
func (r RollupExpression) Children() []Node { return nodifyExpressions(r.Expressions) }
245+
246+
// CubeExpression represents CUBE(col1, col2, ...) in GROUP BY clause
247+
// CUBE generates all possible combinations of grouping sets
248+
// Example: CUBE(a, b) generates grouping sets:
249+
//
250+
// (a, b), (a), (b), ()
251+
type CubeExpression struct {
252+
Expressions []Expression
253+
}
254+
255+
func (c *CubeExpression) expressionNode() {}
256+
func (c CubeExpression) TokenLiteral() string { return "CUBE" }
257+
func (c CubeExpression) Children() []Node { return nodifyExpressions(c.Expressions) }
258+
259+
// GroupingSetsExpression represents GROUPING SETS(...) in GROUP BY clause
260+
// Allows explicit specification of grouping sets
261+
// Example: GROUPING SETS((a, b), (a), ())
262+
type GroupingSetsExpression struct {
263+
Sets [][]Expression // Each inner slice is one grouping set
264+
}
265+
266+
func (g *GroupingSetsExpression) expressionNode() {}
267+
func (g GroupingSetsExpression) TokenLiteral() string { return "GROUPING SETS" }
268+
func (g GroupingSetsExpression) Children() []Node {
269+
children := make([]Node, 0)
270+
for _, set := range g.Sets {
271+
children = append(children, nodifyExpressions(set)...)
272+
}
273+
return children
274+
}
275+
233276
// Identifier represents a column or table name
234277
type Identifier struct {
235278
Name string

pkg/sql/keywords/categories.go

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,21 @@ func (k *Keywords) initialize() {
5050
"NULLS": models.TokenTypeKeyword,
5151
"FIRST": models.TokenTypeKeyword,
5252
"LAST": models.TokenTypeKeyword,
53+
"ROLLUP": models.TokenTypeKeyword, // SQL-99 grouping operation
54+
"CUBE": models.TokenTypeKeyword, // SQL-99 grouping operation
55+
"GROUPING": models.TokenTypeKeyword, // SQL-99 GROUPING SETS
56+
"SETS": models.TokenTypeKeyword, // SQL-99 GROUPING SETS
5357
}
5458

5559
// Initialize compound keywords
5660
k.CompoundKeywords = map[string]models.TokenType{
57-
"FULL JOIN": models.TokenTypeKeyword,
58-
"CROSS JOIN": models.TokenTypeKeyword,
59-
"NATURAL JOIN": models.TokenTypeKeyword,
60-
"GROUP BY": models.TokenTypeKeyword,
61-
"ORDER BY": models.TokenTypeKeyword,
62-
"LEFT JOIN": models.TokenTypeKeyword,
61+
"FULL JOIN": models.TokenTypeKeyword,
62+
"CROSS JOIN": models.TokenTypeKeyword,
63+
"NATURAL JOIN": models.TokenTypeKeyword,
64+
"GROUP BY": models.TokenTypeKeyword,
65+
"ORDER BY": models.TokenTypeKeyword,
66+
"LEFT JOIN": models.TokenTypeKeyword,
67+
"GROUPING SETS": models.TokenTypeKeyword, // SQL-99 grouping operation
6368
}
6469

6570
// Add all keywords to the main keyword map

pkg/sql/keywords/keywords.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,11 @@ var ADDITIONAL_KEYWORDS = []Keyword{
110110
{Word: "LEAD", Type: models.TokenTypeKeyword, Reserved: false, ReservedForTableAlias: false},
111111
{Word: "FIRST_VALUE", Type: models.TokenTypeKeyword, Reserved: false, ReservedForTableAlias: false},
112112
{Word: "LAST_VALUE", Type: models.TokenTypeKeyword, Reserved: false, ReservedForTableAlias: false},
113+
// SQL-99 grouping operations
114+
{Word: "ROLLUP", Type: models.TokenTypeKeyword, Reserved: true, ReservedForTableAlias: false},
115+
{Word: "CUBE", Type: models.TokenTypeKeyword, Reserved: true, ReservedForTableAlias: false},
116+
{Word: "GROUPING", Type: models.TokenTypeKeyword, Reserved: true, ReservedForTableAlias: false},
117+
{Word: "SETS", Type: models.TokenTypeKeyword, Reserved: true, ReservedForTableAlias: false},
113118
}
114119

115120
// addKeywordsWithCategory is a helper method to add multiple keywords
@@ -130,6 +135,7 @@ func New(dialect SQLDialect, ignoreCase bool) *Keywords {
130135
k.CompoundKeywords["FULL JOIN"] = models.TokenTypeKeyword
131136
k.CompoundKeywords["CROSS JOIN"] = models.TokenTypeKeyword
132137
k.CompoundKeywords["NATURAL JOIN"] = models.TokenTypeKeyword
138+
k.CompoundKeywords["GROUPING SETS"] = models.TokenTypeKeyword // SQL-99 grouping operation
133139

134140
// Add standard keywords
135141
k.addKeywordsWithCategory(RESERVED_FOR_TABLE_ALIAS)

pkg/sql/parser/parser.go

Lines changed: 163 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1137,6 +1137,152 @@ func (p *Parser) parseNullsClause() (*bool, error) {
11371137
return nil, nil
11381138
}
11391139

1140+
// parseGroupingExpressionList parses a parenthesized, comma-separated list of expressions
1141+
// used by ROLLUP and CUBE. Returns error if the list is empty.
1142+
func (p *Parser) parseGroupingExpressionList(keyword string) ([]ast.Expression, error) {
1143+
if p.currentToken.Type != "(" {
1144+
return nil, p.expectedError("( after " + keyword)
1145+
}
1146+
p.advance() // Consume (
1147+
1148+
// Check for empty list - not allowed for ROLLUP/CUBE
1149+
if p.currentToken.Type == ")" {
1150+
return nil, fmt.Errorf("parsing failed: %s requires at least one expression", keyword)
1151+
}
1152+
1153+
// Parse comma-separated expressions
1154+
expressions := make([]ast.Expression, 0)
1155+
for {
1156+
expr, err := p.parseExpression()
1157+
if err != nil {
1158+
return nil, err
1159+
}
1160+
expressions = append(expressions, expr)
1161+
1162+
// Check for comma (more expressions) or closing paren
1163+
if p.currentToken.Type == ")" {
1164+
break
1165+
}
1166+
if p.currentToken.Type != "," {
1167+
return nil, p.expectedError(", or ) in " + keyword)
1168+
}
1169+
p.advance() // Consume comma
1170+
}
1171+
p.advance() // Consume )
1172+
1173+
return expressions, nil
1174+
}
1175+
1176+
// parseRollup parses ROLLUP(col1, col2, ...) in GROUP BY clause
1177+
// ROLLUP generates hierarchical grouping sets from right to left
1178+
// Example: ROLLUP(a, b, c) generates: (a, b, c), (a, b), (a), ()
1179+
func (p *Parser) parseRollup() (*ast.RollupExpression, error) {
1180+
p.advance() // Consume ROLLUP
1181+
1182+
expressions, err := p.parseGroupingExpressionList("ROLLUP")
1183+
if err != nil {
1184+
return nil, err
1185+
}
1186+
1187+
return &ast.RollupExpression{
1188+
Expressions: expressions,
1189+
}, nil
1190+
}
1191+
1192+
// parseCube parses CUBE(col1, col2, ...) in GROUP BY clause
1193+
// CUBE generates all possible combinations of grouping sets
1194+
// Example: CUBE(a, b) generates: (a, b), (a), (b), ()
1195+
func (p *Parser) parseCube() (*ast.CubeExpression, error) {
1196+
p.advance() // Consume CUBE
1197+
1198+
expressions, err := p.parseGroupingExpressionList("CUBE")
1199+
if err != nil {
1200+
return nil, err
1201+
}
1202+
1203+
return &ast.CubeExpression{
1204+
Expressions: expressions,
1205+
}, nil
1206+
}
1207+
1208+
// parseGroupingSets parses GROUPING SETS(...) in GROUP BY clause
1209+
// Allows explicit specification of grouping sets
1210+
// Example: GROUPING SETS((a, b), (a), ()) generates exactly those three grouping sets
1211+
func (p *Parser) parseGroupingSets() (*ast.GroupingSetsExpression, error) {
1212+
// Handle both "GROUPING SETS" as compound keyword or separate tokens
1213+
if p.currentToken.Literal == "GROUPING SETS" {
1214+
p.advance() // Consume "GROUPING SETS" compound token
1215+
} else if p.currentToken.Type == "GROUPING" {
1216+
p.advance() // Consume GROUPING
1217+
if p.currentToken.Type != "SETS" {
1218+
return nil, p.expectedError("SETS after GROUPING")
1219+
}
1220+
p.advance() // Consume SETS
1221+
}
1222+
1223+
if p.currentToken.Type != "(" {
1224+
return nil, p.expectedError("( after GROUPING SETS")
1225+
}
1226+
p.advance() // Consume (
1227+
1228+
// Parse comma-separated grouping sets
1229+
sets := make([][]ast.Expression, 0)
1230+
for {
1231+
// Each set is either:
1232+
// 1. A parenthesized list: (col1, col2)
1233+
// 2. An empty set: ()
1234+
// 3. A single column without parens: col1 (treated as (col1))
1235+
1236+
var set []ast.Expression
1237+
if p.currentToken.Type == "(" {
1238+
p.advance() // Consume (
1239+
// Parse expressions in this set
1240+
set = make([]ast.Expression, 0)
1241+
// Handle empty set: ()
1242+
if p.currentToken.Type != ")" {
1243+
for {
1244+
expr, err := p.parseExpression()
1245+
if err != nil {
1246+
return nil, err
1247+
}
1248+
set = append(set, expr)
1249+
1250+
if p.currentToken.Type == ")" {
1251+
break
1252+
}
1253+
if p.currentToken.Type != "," {
1254+
return nil, p.expectedError(", or ) in grouping set")
1255+
}
1256+
p.advance() // Consume comma
1257+
}
1258+
}
1259+
p.advance() // Consume )
1260+
} else {
1261+
// Single column without parens
1262+
expr, err := p.parseExpression()
1263+
if err != nil {
1264+
return nil, err
1265+
}
1266+
set = []ast.Expression{expr}
1267+
}
1268+
sets = append(sets, set)
1269+
1270+
// Check for comma (more sets) or closing paren
1271+
if p.currentToken.Type == ")" {
1272+
break
1273+
}
1274+
if p.currentToken.Type != "," {
1275+
return nil, p.expectedError(", or ) in GROUPING SETS")
1276+
}
1277+
p.advance() // Consume comma
1278+
}
1279+
p.advance() // Consume )
1280+
1281+
return &ast.GroupingSetsExpression{
1282+
Sets: sets,
1283+
}, nil
1284+
}
1285+
11401286
// parseColumnDef parses a column definition
11411287
func (p *Parser) parseColumnDef() (*ast.ColumnDef, error) {
11421288
name := p.parseIdent()
@@ -1439,9 +1585,25 @@ func (p *Parser) parseSelectStatement() (ast.Statement, error) {
14391585
p.advance() // Consume BY
14401586

14411587
// Parse GROUP BY expressions (comma-separated list)
1588+
// Supports: regular expressions, ROLLUP, CUBE, GROUPING SETS
14421589
groupByExprs := make([]ast.Expression, 0)
14431590
for {
1444-
expr, err := p.parseExpression()
1591+
var expr ast.Expression
1592+
var err error
1593+
1594+
// Check for grouping operations: ROLLUP, CUBE, GROUPING SETS
1595+
// Note: GROUPING SETS may come as a compound keyword or separate tokens
1596+
if p.currentToken.Type == "ROLLUP" {
1597+
expr, err = p.parseRollup()
1598+
} else if p.currentToken.Type == "CUBE" {
1599+
expr, err = p.parseCube()
1600+
} else if p.currentToken.Literal == "GROUPING SETS" ||
1601+
(p.currentToken.Type == "GROUPING" && p.peekToken().Type == "SETS") {
1602+
expr, err = p.parseGroupingSets()
1603+
} else {
1604+
expr, err = p.parseExpression()
1605+
}
1606+
14451607
if err != nil {
14461608
return nil, err
14471609
}

0 commit comments

Comments
 (0)