Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions pkg/sql/parser/clickhouse_nested_types_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright 2026 GoSQLX Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");

package parser_test

import (
"testing"

"github.com/ajitpratap0/GoSQLX/pkg/gosqlx"
"github.com/ajitpratap0/GoSQLX/pkg/sql/keywords"
)

// TestClickHouseNestedColumnTypes verifies CREATE TABLE column definitions
// with nested/parameterised types parse for the ClickHouse dialect. Regression
// for #482.
func TestClickHouseNestedColumnTypes(t *testing.T) {
queries := map[string]string{
"array_string": `CREATE TABLE t (
tags Array(String)
) ENGINE = MergeTree() ORDER BY tags`,

"nullable_int": `CREATE TABLE t (
id Nullable(Int32)
) ENGINE = MergeTree() ORDER BY id`,

"array_nullable": `CREATE TABLE t (
tags Array(Nullable(String))
) ENGINE = MergeTree() ORDER BY tags`,

"map_string_array": `CREATE TABLE t (
counts Map(String, Array(UInt32))
) ENGINE = MergeTree() ORDER BY counts`,

"low_cardinality": `CREATE TABLE t (
country LowCardinality(String)
) ENGINE = MergeTree() ORDER BY country`,

"fixed_string": `CREATE TABLE t (
hash FixedString(32)
) ENGINE = MergeTree() ORDER BY hash`,

"datetime64_with_tz": `CREATE TABLE t (
ts DateTime64(3, 'UTC')
) ENGINE = MergeTree() ORDER BY ts`,

"decimal_precision_scale": `CREATE TABLE t (
price Decimal(38, 18)
) ENGINE = MergeTree() ORDER BY price`,

"replicated_engine": `CREATE TABLE t (
id UInt64
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/t', '{replica}') ORDER BY id`,

"distributed_engine": `CREATE TABLE t (
id UInt64
) ENGINE = Distributed('cluster', 'db', 'local_t', id)`,
}
for name, q := range queries {
q := q
t.Run(name, func(t *testing.T) {
if _, err := gosqlx.ParseWithDialect(q, keywords.DialectClickHouse); err != nil {
t.Fatalf("parse failed: %v", err)
}
})
}
}
107 changes: 107 additions & 0 deletions pkg/sql/parser/ddl.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (

"github.com/ajitpratap0/GoSQLX/pkg/models"
"github.com/ajitpratap0/GoSQLX/pkg/sql/ast"
"github.com/ajitpratap0/GoSQLX/pkg/sql/keywords"
)

// isTokenMatch checks if the current token matches the given keyword
Expand Down Expand Up @@ -236,9 +237,69 @@ func (p *Parser) parseCreateTable(temporary bool) (*ast.CreateTableStatement, er
opt.Value = p.currentToken.Token.Value
p.advance()
}
// ClickHouse engine values may carry their own argument list:
// ENGINE = MergeTree()
// ENGINE = ReplicatedMergeTree('/path', '{replica}')
// ENGINE = Distributed('cluster', 'db', 'local_t', sharding_key)
// Consume them as a balanced block appended to the option value.
if p.isType(models.TokenTypeLParen) {
args, err := p.parseTypeArgsString()
if err != nil {
return nil, err
}
opt.Value += args
}
stmt.Options = append(stmt.Options, opt)
}

// ClickHouse CREATE TABLE trailing clauses: ORDER BY, PARTITION BY,
// PRIMARY KEY, SAMPLE BY, SETTINGS. These appear after ENGINE = ... and
// are required for MergeTree-family engines. Parse permissively:
// each consumes a parenthesised expression list or a single column ref.
for p.dialect == string(keywords.DialectClickHouse) {
if p.isType(models.TokenTypeOrder) {
p.advance() // ORDER
if p.isType(models.TokenTypeBy) {
p.advance()
}
if err := p.skipClickHouseClauseExpr(); err != nil {
return nil, err
}
continue
}
if p.isTokenMatch("PARTITION") {
p.advance()
if p.isType(models.TokenTypeBy) {
p.advance()
}
if err := p.skipClickHouseClauseExpr(); err != nil {
return nil, err
}
continue
}
if p.isType(models.TokenTypePrimary) {
p.advance()
if p.isType(models.TokenTypeKey) {
p.advance()
}
if err := p.skipClickHouseClauseExpr(); err != nil {
return nil, err
}
continue
}
if p.isTokenMatch("SAMPLE") {
p.advance()
if p.isType(models.TokenTypeBy) {
p.advance()
}
if err := p.skipClickHouseClauseExpr(); err != nil {
return nil, err
}
continue
}
break
}

// SQLite: optional WITHOUT ROWID clause
if p.isTokenMatch("WITHOUT") {
p.advance() // Consume WITHOUT
Expand Down Expand Up @@ -550,3 +611,49 @@ func (p *Parser) parseTruncateStatement() (*ast.TruncateStatement, error) {

return stmt, nil
}

// skipClickHouseClauseExpr consumes the expression following a ClickHouse
// CREATE TABLE trailing clause (ORDER BY, PARTITION BY, PRIMARY KEY, SAMPLE BY).
// We do not currently model these clauses on the AST; this just walks the
// tokens until the start of the next clause, EOF, or ';'. Supports both
// parenthesised lists and bare expressions.
func (p *Parser) skipClickHouseClauseExpr() error {
if p.isType(models.TokenTypeLParen) {
// Balanced paren block.
depth := 0
for {
switch p.currentToken.Token.Type {
case models.TokenTypeEOF:
return p.expectedError(") to close clause expression")
case models.TokenTypeLParen:
depth++
p.advance()
case models.TokenTypeRParen:
depth--
p.advance()
if depth == 0 {
return nil
}
default:
p.advance()
}
}
}

// Bare expression: consume until next clause/EOF/;.
for {
t := p.currentToken.Token.Type
if t == models.TokenTypeEOF || t == models.TokenTypeSemicolon {
return nil
}
// Stop at next CH trailing-clause keyword.
if t == models.TokenTypeOrder || t == models.TokenTypePrimary {
return nil
}
val := strings.ToUpper(p.currentToken.Token.Value)
if val == "PARTITION" || val == "SAMPLE" || val == "SETTINGS" || val == "TTL" {
return nil
}
p.advance()
}
}
104 changes: 80 additions & 24 deletions pkg/sql/parser/ddl_columns.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,32 +73,18 @@ func (p *Parser) parseColumnDef() (*ast.ColumnDef, error) {

dataTypeStr := dataType.Name

// Check for type parameters like VARCHAR(100) or DECIMAL(10,2)
// Check for type parameters. The simple form is VARCHAR(100) or
// DECIMAL(10,2), but ClickHouse also has nested/parameterised types like
// Array(Nullable(String)), Map(String, Array(UInt32)), Tuple(a UInt8, b String),
// FixedString(16), DateTime64(3, 'UTC'), LowCardinality(String), Decimal(38, 18),
// and engines like ReplicatedMergeTree('/path', '{replica}'). Use a depth-tracking
// token collector that round-trips the type string.
if p.isType(models.TokenTypeLParen) {
dataTypeStr += "("
p.advance() // Consume (

// Parse first parameter (can be number or identifier like MAX)
if p.isType(models.TokenTypeNumber) || p.isType(models.TokenTypeIdentifier) {
dataTypeStr += p.currentToken.Token.Value
p.advance()
}

// Check for second parameter (e.g., DECIMAL(10,2))
if p.isType(models.TokenTypeComma) {
dataTypeStr += ","
p.advance()
if p.isType(models.TokenTypeNumber) || p.isType(models.TokenTypeIdentifier) {
dataTypeStr += p.currentToken.Token.Value
p.advance()
}
}

if !p.isType(models.TokenTypeRParen) {
return nil, p.expectedError(") after type parameters")
args, err := p.parseTypeArgsString()
if err != nil {
return nil, err
}
dataTypeStr += ")"
p.advance() // Consume )
dataTypeStr += args
}

colDef := &ast.ColumnDef{
Expand Down Expand Up @@ -480,3 +466,73 @@ func (p *Parser) parseConstraintColumnList() ([]string, error) {

return columns, nil
}

// parseTypeArgsString consumes a balanced parenthesised type-argument list
// and returns it as a string (including the outer parens). Supports nested
// types like Array(Nullable(String)), Map(String, Array(UInt32)),
// Tuple(a UInt8, b String), DateTime64(3, 'UTC'), and engine arguments like
// ReplicatedMergeTree('/path', '{replica}'). The current token must be '('.
func (p *Parser) parseTypeArgsString() (string, error) {
if !p.isType(models.TokenTypeLParen) {
return "", p.expectedError("(")
}

var buf strings.Builder
depth := 0
prevWasIdent := false // for inserting spaces between adjacent tokens (e.g. "a UInt8")

for {
tok := p.currentToken.Token
switch tok.Type {
case models.TokenTypeEOF:
return "", p.expectedError(") to close type arguments")
case models.TokenTypeLParen:
buf.WriteByte('(')
depth++
prevWasIdent = false
p.advance()
continue
case models.TokenTypeRParen:
buf.WriteByte(')')
depth--
p.advance()
if depth == 0 {
return buf.String(), nil
}
prevWasIdent = false
continue
case models.TokenTypeComma:
buf.WriteString(", ")
prevWasIdent = false
p.advance()
continue
}

// Render leaf token. Quote string literals; everything else is rendered
// by its raw value (numbers, identifiers, keywords like Nullable / Array).
val := tok.Value
if val == "" {
return "", p.expectedError("type argument")
}

// Insert a space when two adjacent leaf tokens both look like identifiers
// or numbers — this preserves "name Type" pairs in named tuple elements.
if prevWasIdent {
buf.WriteByte(' ')
}

switch tok.Type {
case models.TokenTypeString, models.TokenTypeSingleQuotedString,
models.TokenTypeDoubleQuotedString:
buf.WriteByte('\'')
buf.WriteString(val)
buf.WriteByte('\'')
prevWasIdent = false
default:
buf.WriteString(val)
prevWasIdent = true
}

p.advance()
}
}
Loading