Skip to content

Commit 4e39433

Browse files
ajitpratap0Ajit Pratap Singh
andauthored
feat(parser): ClickHouse nested column types and engine clauses (#482) (#488)
Co-authored-by: Ajit Pratap Singh <ajitpratapsingh@Ajits-Mac-mini-2655.local>
1 parent 57ee598 commit 4e39433

File tree

3 files changed

+254
-24
lines changed

3 files changed

+254
-24
lines changed
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// Copyright 2026 GoSQLX Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
5+
package parser_test
6+
7+
import (
8+
"testing"
9+
10+
"github.com/ajitpratap0/GoSQLX/pkg/gosqlx"
11+
"github.com/ajitpratap0/GoSQLX/pkg/sql/keywords"
12+
)
13+
14+
// TestClickHouseNestedColumnTypes verifies CREATE TABLE column definitions
15+
// with nested/parameterised types parse for the ClickHouse dialect. Regression
16+
// for #482.
17+
func TestClickHouseNestedColumnTypes(t *testing.T) {
18+
queries := map[string]string{
19+
"array_string": `CREATE TABLE t (
20+
tags Array(String)
21+
) ENGINE = MergeTree() ORDER BY tags`,
22+
23+
"nullable_int": `CREATE TABLE t (
24+
id Nullable(Int32)
25+
) ENGINE = MergeTree() ORDER BY id`,
26+
27+
"array_nullable": `CREATE TABLE t (
28+
tags Array(Nullable(String))
29+
) ENGINE = MergeTree() ORDER BY tags`,
30+
31+
"map_string_array": `CREATE TABLE t (
32+
counts Map(String, Array(UInt32))
33+
) ENGINE = MergeTree() ORDER BY counts`,
34+
35+
"low_cardinality": `CREATE TABLE t (
36+
country LowCardinality(String)
37+
) ENGINE = MergeTree() ORDER BY country`,
38+
39+
"fixed_string": `CREATE TABLE t (
40+
hash FixedString(32)
41+
) ENGINE = MergeTree() ORDER BY hash`,
42+
43+
"datetime64_with_tz": `CREATE TABLE t (
44+
ts DateTime64(3, 'UTC')
45+
) ENGINE = MergeTree() ORDER BY ts`,
46+
47+
"decimal_precision_scale": `CREATE TABLE t (
48+
price Decimal(38, 18)
49+
) ENGINE = MergeTree() ORDER BY price`,
50+
51+
"replicated_engine": `CREATE TABLE t (
52+
id UInt64
53+
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/t', '{replica}') ORDER BY id`,
54+
55+
"distributed_engine": `CREATE TABLE t (
56+
id UInt64
57+
) ENGINE = Distributed('cluster', 'db', 'local_t', id)`,
58+
}
59+
for name, q := range queries {
60+
q := q
61+
t.Run(name, func(t *testing.T) {
62+
if _, err := gosqlx.ParseWithDialect(q, keywords.DialectClickHouse); err != nil {
63+
t.Fatalf("parse failed: %v", err)
64+
}
65+
})
66+
}
67+
}

pkg/sql/parser/ddl.go

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626

2727
"github.com/ajitpratap0/GoSQLX/pkg/models"
2828
"github.com/ajitpratap0/GoSQLX/pkg/sql/ast"
29+
"github.com/ajitpratap0/GoSQLX/pkg/sql/keywords"
2930
)
3031

3132
// isTokenMatch checks if the current token matches the given keyword
@@ -236,9 +237,69 @@ func (p *Parser) parseCreateTable(temporary bool) (*ast.CreateTableStatement, er
236237
opt.Value = p.currentToken.Token.Value
237238
p.advance()
238239
}
240+
// ClickHouse engine values may carry their own argument list:
241+
// ENGINE = MergeTree()
242+
// ENGINE = ReplicatedMergeTree('/path', '{replica}')
243+
// ENGINE = Distributed('cluster', 'db', 'local_t', sharding_key)
244+
// Consume them as a balanced block appended to the option value.
245+
if p.isType(models.TokenTypeLParen) {
246+
args, err := p.parseTypeArgsString()
247+
if err != nil {
248+
return nil, err
249+
}
250+
opt.Value += args
251+
}
239252
stmt.Options = append(stmt.Options, opt)
240253
}
241254

255+
// ClickHouse CREATE TABLE trailing clauses: ORDER BY, PARTITION BY,
256+
// PRIMARY KEY, SAMPLE BY, SETTINGS. These appear after ENGINE = ... and
257+
// are required for MergeTree-family engines. Parse permissively:
258+
// each consumes a parenthesised expression list or a single column ref.
259+
for p.dialect == string(keywords.DialectClickHouse) {
260+
if p.isType(models.TokenTypeOrder) {
261+
p.advance() // ORDER
262+
if p.isType(models.TokenTypeBy) {
263+
p.advance()
264+
}
265+
if err := p.skipClickHouseClauseExpr(); err != nil {
266+
return nil, err
267+
}
268+
continue
269+
}
270+
if p.isTokenMatch("PARTITION") {
271+
p.advance()
272+
if p.isType(models.TokenTypeBy) {
273+
p.advance()
274+
}
275+
if err := p.skipClickHouseClauseExpr(); err != nil {
276+
return nil, err
277+
}
278+
continue
279+
}
280+
if p.isType(models.TokenTypePrimary) {
281+
p.advance()
282+
if p.isType(models.TokenTypeKey) {
283+
p.advance()
284+
}
285+
if err := p.skipClickHouseClauseExpr(); err != nil {
286+
return nil, err
287+
}
288+
continue
289+
}
290+
if p.isTokenMatch("SAMPLE") {
291+
p.advance()
292+
if p.isType(models.TokenTypeBy) {
293+
p.advance()
294+
}
295+
if err := p.skipClickHouseClauseExpr(); err != nil {
296+
return nil, err
297+
}
298+
continue
299+
}
300+
break
301+
}
302+
242303
// SQLite: optional WITHOUT ROWID clause
243304
if p.isTokenMatch("WITHOUT") {
244305
p.advance() // Consume WITHOUT
@@ -550,3 +611,49 @@ func (p *Parser) parseTruncateStatement() (*ast.TruncateStatement, error) {
550611

551612
return stmt, nil
552613
}
614+
615+
// skipClickHouseClauseExpr consumes the expression following a ClickHouse
616+
// CREATE TABLE trailing clause (ORDER BY, PARTITION BY, PRIMARY KEY, SAMPLE BY).
617+
// We do not currently model these clauses on the AST; this just walks the
618+
// tokens until the start of the next clause, EOF, or ';'. Supports both
619+
// parenthesised lists and bare expressions.
620+
func (p *Parser) skipClickHouseClauseExpr() error {
621+
if p.isType(models.TokenTypeLParen) {
622+
// Balanced paren block.
623+
depth := 0
624+
for {
625+
switch p.currentToken.Token.Type {
626+
case models.TokenTypeEOF:
627+
return p.expectedError(") to close clause expression")
628+
case models.TokenTypeLParen:
629+
depth++
630+
p.advance()
631+
case models.TokenTypeRParen:
632+
depth--
633+
p.advance()
634+
if depth == 0 {
635+
return nil
636+
}
637+
default:
638+
p.advance()
639+
}
640+
}
641+
}
642+
643+
// Bare expression: consume until next clause/EOF/;.
644+
for {
645+
t := p.currentToken.Token.Type
646+
if t == models.TokenTypeEOF || t == models.TokenTypeSemicolon {
647+
return nil
648+
}
649+
// Stop at next CH trailing-clause keyword.
650+
if t == models.TokenTypeOrder || t == models.TokenTypePrimary {
651+
return nil
652+
}
653+
val := strings.ToUpper(p.currentToken.Token.Value)
654+
if val == "PARTITION" || val == "SAMPLE" || val == "SETTINGS" || val == "TTL" {
655+
return nil
656+
}
657+
p.advance()
658+
}
659+
}

pkg/sql/parser/ddl_columns.go

Lines changed: 80 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -73,32 +73,18 @@ func (p *Parser) parseColumnDef() (*ast.ColumnDef, error) {
7373

7474
dataTypeStr := dataType.Name
7575

76-
// Check for type parameters like VARCHAR(100) or DECIMAL(10,2)
76+
// Check for type parameters. The simple form is VARCHAR(100) or
77+
// DECIMAL(10,2), but ClickHouse also has nested/parameterised types like
78+
// Array(Nullable(String)), Map(String, Array(UInt32)), Tuple(a UInt8, b String),
79+
// FixedString(16), DateTime64(3, 'UTC'), LowCardinality(String), Decimal(38, 18),
80+
// and engines like ReplicatedMergeTree('/path', '{replica}'). Use a depth-tracking
81+
// token collector that round-trips the type string.
7782
if p.isType(models.TokenTypeLParen) {
78-
dataTypeStr += "("
79-
p.advance() // Consume (
80-
81-
// Parse first parameter (can be number or identifier like MAX)
82-
if p.isType(models.TokenTypeNumber) || p.isType(models.TokenTypeIdentifier) {
83-
dataTypeStr += p.currentToken.Token.Value
84-
p.advance()
85-
}
86-
87-
// Check for second parameter (e.g., DECIMAL(10,2))
88-
if p.isType(models.TokenTypeComma) {
89-
dataTypeStr += ","
90-
p.advance()
91-
if p.isType(models.TokenTypeNumber) || p.isType(models.TokenTypeIdentifier) {
92-
dataTypeStr += p.currentToken.Token.Value
93-
p.advance()
94-
}
95-
}
96-
97-
if !p.isType(models.TokenTypeRParen) {
98-
return nil, p.expectedError(") after type parameters")
83+
args, err := p.parseTypeArgsString()
84+
if err != nil {
85+
return nil, err
9986
}
100-
dataTypeStr += ")"
101-
p.advance() // Consume )
87+
dataTypeStr += args
10288
}
10389

10490
colDef := &ast.ColumnDef{
@@ -480,3 +466,73 @@ func (p *Parser) parseConstraintColumnList() ([]string, error) {
480466

481467
return columns, nil
482468
}
469+
470+
// parseTypeArgsString consumes a balanced parenthesised type-argument list
471+
// and returns it as a string (including the outer parens). Supports nested
472+
// types like Array(Nullable(String)), Map(String, Array(UInt32)),
473+
// Tuple(a UInt8, b String), DateTime64(3, 'UTC'), and engine arguments like
474+
// ReplicatedMergeTree('/path', '{replica}'). The current token must be '('.
475+
func (p *Parser) parseTypeArgsString() (string, error) {
476+
if !p.isType(models.TokenTypeLParen) {
477+
return "", p.expectedError("(")
478+
}
479+
480+
var buf strings.Builder
481+
depth := 0
482+
prevWasIdent := false // for inserting spaces between adjacent tokens (e.g. "a UInt8")
483+
484+
for {
485+
tok := p.currentToken.Token
486+
switch tok.Type {
487+
case models.TokenTypeEOF:
488+
return "", p.expectedError(") to close type arguments")
489+
case models.TokenTypeLParen:
490+
buf.WriteByte('(')
491+
depth++
492+
prevWasIdent = false
493+
p.advance()
494+
continue
495+
case models.TokenTypeRParen:
496+
buf.WriteByte(')')
497+
depth--
498+
p.advance()
499+
if depth == 0 {
500+
return buf.String(), nil
501+
}
502+
prevWasIdent = false
503+
continue
504+
case models.TokenTypeComma:
505+
buf.WriteString(", ")
506+
prevWasIdent = false
507+
p.advance()
508+
continue
509+
}
510+
511+
// Render leaf token. Quote string literals; everything else is rendered
512+
// by its raw value (numbers, identifiers, keywords like Nullable / Array).
513+
val := tok.Value
514+
if val == "" {
515+
return "", p.expectedError("type argument")
516+
}
517+
518+
// Insert a space when two adjacent leaf tokens both look like identifiers
519+
// or numbers — this preserves "name Type" pairs in named tuple elements.
520+
if prevWasIdent {
521+
buf.WriteByte(' ')
522+
}
523+
524+
switch tok.Type {
525+
case models.TokenTypeString, models.TokenTypeSingleQuotedString,
526+
models.TokenTypeDoubleQuotedString:
527+
buf.WriteByte('\'')
528+
buf.WriteString(val)
529+
buf.WriteByte('\'')
530+
prevWasIdent = false
531+
default:
532+
buf.WriteString(val)
533+
prevWasIdent = true
534+
}
535+
536+
p.advance()
537+
}
538+
}

0 commit comments

Comments
 (0)