Skip to content

Commit 370e076

Browse files
Ajit Pratap SinghAjit Pratap Singh
authored andcommitted
fix(parser): allow TABLE/PARTITION/TABLES as identifiers in ClickHouse (#480)
ClickHouse system tables (system.replicas, system.parts, system.tables) expose columns named `table` and `partition`, and queries commonly reference `system.tables`. The parser previously rejected these because TABLE, PARTITION, and TABLES tokenize as keywords and the non-reserved-keyword-as-identifier path was gated to SQL Server only. - Extend the gate in parsePrimaryExpression to also enable for ClickHouse. - Add TokenTypePartition and the "TABLES" keyword value to isNonReservedKeyword so they can serve as identifiers in expression position and after qualifiers. Closes #480
1 parent dd0215b commit 370e076

3 files changed

Lines changed: 111 additions & 4 deletions

File tree

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
// Copyright 2026 GoSQLX Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
9+
package parser_test
10+
11+
import (
12+
"testing"
13+
14+
"github.com/ajitpratap0/GoSQLX/pkg/gosqlx"
15+
"github.com/ajitpratap0/GoSQLX/pkg/sql/keywords"
16+
)
17+
18+
// TestClickHouseTableAsIdentifier verifies that the ClickHouse dialect accepts
19+
// `table` as a column identifier in SELECT lists, function arguments, and
20+
// GROUP BY clauses. ClickHouse system tables (system.replicas, system.tables,
21+
// system.parts) all expose a `table` column, so this is a common real-world
22+
// pattern. Regression test for issue #480.
23+
func TestClickHouseTableAsIdentifier(t *testing.T) {
24+
queries := map[string]string{
25+
"replicas_with_table_column": `SELECT
26+
database,
27+
table,
28+
is_leader,
29+
is_readonly,
30+
is_session_expired,
31+
parts_to_check,
32+
queue_size,
33+
inserts_in_queue,
34+
merges_in_queue,
35+
absolute_delay,
36+
last_queue_update,
37+
zookeeper_path
38+
FROM system.replicas
39+
ORDER BY absolute_delay DESC`,
40+
41+
"tables_with_bytes_on_disk": `SELECT
42+
database,
43+
table,
44+
engine,
45+
formatReadableSize(bytes_on_disk) AS size,
46+
parts,
47+
active_parts
48+
FROM system.tables
49+
WHERE engine LIKE '%MergeTree%'
50+
AND is_temporary = 0
51+
ORDER BY bytes_on_disk DESC
52+
LIMIT 10`,
53+
54+
"tables_with_total_bytes": `SELECT
55+
database,
56+
table,
57+
engine,
58+
formatReadableSize(total_bytes) AS size,
59+
parts,
60+
active_parts
61+
FROM system.tables
62+
WHERE engine LIKE '%MergeTree%'
63+
AND is_temporary = 0
64+
ORDER BY total_bytes DESC
65+
LIMIT 10`,
66+
67+
"parts_with_concat_table": `SELECT
68+
concat(database, '.' ,table) AS table_name,
69+
count() AS part_count,
70+
max(partition) AS latest_partition,
71+
formatReadableSize(sum(bytes_on_disk)) AS total_size
72+
FROM system.parts
73+
WHERE active = 1
74+
AND database NOT IN ('system')
75+
GROUP BY database, table
76+
ORDER BY part_count DESC
77+
LIMIT 10`,
78+
79+
"parts_having_count": `SELECT
80+
database,
81+
table,
82+
count() AS parts,
83+
formatReadableSize(sum(bytes_on_disk)) AS size
84+
FROM system.parts
85+
WHERE active = 1
86+
AND database NOT IN ('system')
87+
GROUP BY database, table
88+
HAVING parts > 300
89+
ORDER BY parts DESC`,
90+
}
91+
92+
for name, query := range queries {
93+
query := query
94+
t.Run(name, func(t *testing.T) {
95+
parsed, err := gosqlx.ParseWithDialect(query, keywords.DialectClickHouse)
96+
if err != nil {
97+
t.Fatalf("ParseWithDialect failed: %v", err)
98+
}
99+
if parsed == nil {
100+
t.Fatal("expected non-nil AST")
101+
}
102+
})
103+
}
104+
}

pkg/sql/parser/expressions_literal.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ func (p *Parser) parsePrimaryExpression() (ast.Expression, error) {
103103
return funcCall, nil
104104
}
105105

106-
if p.isType(models.TokenTypeIdentifier) || p.isType(models.TokenTypeDoubleQuotedString) || (p.dialect == string(keywords.DialectSQLServer) && p.isNonReservedKeyword()) {
106+
if p.isType(models.TokenTypeIdentifier) || p.isType(models.TokenTypeDoubleQuotedString) || ((p.dialect == string(keywords.DialectSQLServer) || p.dialect == string(keywords.DialectClickHouse)) && p.isNonReservedKeyword()) {
107107
// Handle identifiers and function calls
108108
// Double-quoted strings are treated as identifiers in SQL (e.g., "column_name")
109109
// Non-reserved keywords (TARGET, SOURCE, etc.) can also be used as identifiers

pkg/sql/parser/parser.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -958,14 +958,17 @@ func (p *Parser) isNonReservedKeyword() bool {
958958
case models.TokenTypeTarget, models.TokenTypeSource, models.TokenTypeMatched:
959959
return true
960960
case models.TokenTypeTable, models.TokenTypeIndex, models.TokenTypeView,
961-
models.TokenTypeKey, models.TokenTypeColumn, models.TokenTypeDatabase:
961+
models.TokenTypeKey, models.TokenTypeColumn, models.TokenTypeDatabase,
962+
models.TokenTypePartition:
962963
// DDL keywords that are commonly used as quoted identifiers in MySQL (backtick)
963-
// and SQL Server (bracket) dialects.
964+
// and SQL Server (bracket) dialects, and as plain column names in ClickHouse
965+
// system tables (system.parts.partition, system.replicas.table, etc).
964966
return true
965967
case models.TokenTypeKeyword:
966968
// Token may have generic Type; check value for specific keywords
967969
switch strings.ToUpper(p.currentToken.Token.Value) {
968-
case "TARGET", "SOURCE", "MATCHED", "VALUE", "NAME", "TYPE", "STATUS":
970+
case "TARGET", "SOURCE", "MATCHED", "VALUE", "NAME", "TYPE", "STATUS",
971+
"TABLES":
969972
return true
970973
}
971974
}

0 commit comments

Comments
 (0)