Skip to content

Commit d065a28

Browse files
ajitpratap0Ajit Pratap Singh
andauthored
feat(parser): ClickHouse WITH TOTALS, LIMIT BY, ANY/ALL JOIN, DEFAULT as identifier (#482) (#503)
Co-authored-by: Ajit Pratap Singh <ajitpratapsingh@Ajits-Mac-mini-2655.local>
1 parent 9450045 commit d065a28

File tree

4 files changed

+113
-3
lines changed

4 files changed

+113
-3
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// Copyright 2026 GoSQLX Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
5+
package parser_test
6+
7+
import (
8+
"testing"
9+
10+
"github.com/ajitpratap0/GoSQLX/pkg/gosqlx"
11+
"github.com/ajitpratap0/GoSQLX/pkg/sql/keywords"
12+
)
13+
14+
// TestClickHouseWithTotals verifies GROUP BY ... WITH TOTALS parses.
15+
func TestClickHouseWithTotals(t *testing.T) {
16+
queries := []string{
17+
`SELECT status, count() FROM events GROUP BY status WITH TOTALS`,
18+
`SELECT status, count() FROM events GROUP BY status WITH TOTALS ORDER BY status`,
19+
}
20+
for _, q := range queries {
21+
t.Run(q[:40], func(t *testing.T) {
22+
if _, err := gosqlx.ParseWithDialect(q, keywords.DialectClickHouse); err != nil {
23+
t.Fatalf("parse failed: %v", err)
24+
}
25+
})
26+
}
27+
}
28+
29+
// TestClickHouseLimitBy verifies LIMIT N [OFFSET M] BY expr parses.
30+
func TestClickHouseLimitBy(t *testing.T) {
31+
queries := []string{
32+
`SELECT user_id, event FROM events ORDER BY ts LIMIT 3 BY user_id`,
33+
`SELECT user_id, event FROM events ORDER BY ts LIMIT 3 OFFSET 1 BY user_id`,
34+
`SELECT user_id, event, ts FROM events ORDER BY ts LIMIT 5 BY user_id, event`,
35+
}
36+
for _, q := range queries {
37+
t.Run(q[:40], func(t *testing.T) {
38+
if _, err := gosqlx.ParseWithDialect(q, keywords.DialectClickHouse); err != nil {
39+
t.Fatalf("parse failed: %v", err)
40+
}
41+
})
42+
}
43+
}
44+
45+
// TestClickHouseAnyJoin verifies the ANY/ALL join strictness prefix parses.
46+
func TestClickHouseAnyJoin(t *testing.T) {
47+
queries := map[string]string{
48+
"any_left": `SELECT * FROM a ANY LEFT JOIN b ON a.id = b.id`,
49+
"any_inner": `SELECT * FROM a ANY INNER JOIN b ON a.id = b.id`,
50+
"all_inner": `SELECT * FROM a ALL INNER JOIN b ON a.id = b.id`,
51+
"asof": `SELECT * FROM a ASOF JOIN b ON a.id = b.id AND a.ts >= b.ts`,
52+
}
53+
for name, q := range queries {
54+
q := q
55+
t.Run(name, func(t *testing.T) {
56+
if _, err := gosqlx.ParseWithDialect(q, keywords.DialectClickHouse); err != nil {
57+
t.Fatalf("parse failed: %v", err)
58+
}
59+
})
60+
}
61+
}
62+
63+
// TestClickHouseDefaultAsIdentifier verifies DEFAULT can be used as a column
64+
// name and DATABASES as a qualified table name in ClickHouse.
65+
func TestClickHouseDefaultAsIdentifier(t *testing.T) {
66+
queries := []string{
67+
`SELECT default FROM t`,
68+
`SELECT database, default FROM system.databases`,
69+
}
70+
for _, q := range queries {
71+
t.Run(q, func(t *testing.T) {
72+
if _, err := gosqlx.ParseWithDialect(q, keywords.DialectClickHouse); err != nil {
73+
t.Fatalf("parse failed: %v", err)
74+
}
75+
})
76+
}
77+
}

pkg/sql/parser/parser.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,7 +1063,7 @@ func (p *Parser) isNonReservedKeyword() bool {
10631063
return true
10641064
case models.TokenTypeTable, models.TokenTypeIndex, models.TokenTypeView,
10651065
models.TokenTypeKey, models.TokenTypeColumn, models.TokenTypeDatabase,
1066-
models.TokenTypePartition, models.TokenTypeRows:
1066+
models.TokenTypePartition, models.TokenTypeRows, models.TokenTypeDefault:
10671067
// DDL keywords that are commonly used as quoted identifiers in MySQL (backtick)
10681068
// and SQL Server (bracket) dialects, and as plain column names in ClickHouse
10691069
// system tables (system.parts.partition, system.replicas.table,
@@ -1073,7 +1073,7 @@ func (p *Parser) isNonReservedKeyword() bool {
10731073
// Token may have generic Type; check value for specific keywords
10741074
switch strings.ToUpper(p.currentToken.Token.Value) {
10751075
case "TARGET", "SOURCE", "MATCHED", "VALUE", "NAME", "TYPE", "STATUS",
1076-
"TABLES":
1076+
"TABLES", "DATABASES":
10771077
return true
10781078
}
10791079
}
@@ -1111,6 +1111,11 @@ func (p *Parser) isJoinKeyword() bool {
11111111
if p.dialect == string(keywords.DialectClickHouse) && p.isTokenMatch("GLOBAL") {
11121112
return true
11131113
}
1114+
// ClickHouse: ANY / ALL as join strictness prefix (ANY LEFT JOIN, ALL INNER JOIN)
1115+
if p.dialect == string(keywords.DialectClickHouse) &&
1116+
(p.isType(models.TokenTypeAny) || p.isType(models.TokenTypeAll)) {
1117+
return true
1118+
}
11141119
return false
11151120
}
11161121

pkg/sql/parser/select.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,22 @@ func (p *Parser) parseSelectStatement() (ast.Statement, error) {
179179
return nil, err
180180
}
181181

182+
// ClickHouse LIMIT BY: `LIMIT N [OFFSET M] BY expr [, expr]...`
183+
// The LIMIT and OFFSET values were already consumed above; if the next
184+
// token is BY, consume the BY-expression list (permissive, not modeled).
185+
if p.dialect == string(keywords.DialectClickHouse) && p.isType(models.TokenTypeBy) {
186+
p.advance() // Consume BY
187+
for {
188+
if _, err := p.parseExpression(); err != nil {
189+
return nil, err
190+
}
191+
if !p.isType(models.TokenTypeComma) {
192+
break
193+
}
194+
p.advance()
195+
}
196+
}
197+
182198
// FETCH FIRST / NEXT
183199
if p.isType(models.TokenTypeFetch) {
184200
if selectStmt.Fetch, err = p.parseFetchClause(); err != nil {

pkg/sql/parser/select_clauses.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,13 @@ func (p *Parser) parseJoinType() (string, bool, error) {
182182
p.advance() // consume GLOBAL; fall through to standard join parsing
183183
}
184184

185+
// ClickHouse ANY/ALL join strictness prefix — e.g. ANY LEFT JOIN, ALL INNER JOIN.
186+
// The strictness modifier is consumed but not modeled on the AST.
187+
if p.dialect == string(keywords.DialectClickHouse) &&
188+
(p.isType(models.TokenTypeAny) || p.isType(models.TokenTypeAll)) {
189+
p.advance()
190+
}
191+
185192
if p.isType(models.TokenTypeNatural) {
186193
isNatural = true
187194
p.advance()
@@ -481,7 +488,7 @@ func (p *Parser) parseGroupByClause() ([]ast.Expression, error) {
481488
p.advance()
482489
}
483490

484-
// MySQL: GROUP BY col1 WITH ROLLUP / WITH CUBE
491+
// MySQL / ClickHouse: GROUP BY col1 WITH ROLLUP / WITH CUBE / WITH TOTALS
485492
if p.isType(models.TokenTypeWith) {
486493
switch strings.ToUpper(p.peekToken().Token.Value) {
487494
case "ROLLUP":
@@ -492,6 +499,11 @@ func (p *Parser) parseGroupByClause() ([]ast.Expression, error) {
492499
p.advance() // Consume WITH
493500
p.advance() // Consume CUBE
494501
groupByExprs = []ast.Expression{&ast.CubeExpression{Expressions: groupByExprs}}
502+
case "TOTALS":
503+
// ClickHouse WITH TOTALS: adds a summary row with aggregate totals.
504+
// Consumed but not modeled on the AST (follow-up).
505+
p.advance() // Consume WITH
506+
p.advance() // Consume TOTALS
495507
}
496508
}
497509

0 commit comments

Comments
 (0)