Skip to content

Commit b22df7b

Browse files
authored
Expand Explain function to handle more AST types (#11)
1 parent d6aa928 commit b22df7b

File tree

12 files changed

+1301
-377
lines changed

12 files changed

+1301
-377
lines changed

TODO.md

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
# TODO: Remaining Parser and Explain Issues
2+
3+
## Current State
4+
5+
- **Tests passing:** 5,197 (76.2%)
6+
- **Tests skipped:** 1,627 (23.8%)
7+
- Parser issues: ~675
8+
- Explain mismatches: ~637
9+
10+
## Parser Issues
11+
12+
These require changes to `parser/parser.go`:
13+
14+
### Table/Database Names Starting with Numbers
15+
Tables and databases with names starting with digits fail to parse:
16+
```sql
17+
DROP TABLE IF EXISTS 03657_gby_overflow;
18+
DROP DATABASE IF EXISTS 03710_database;
19+
```
20+
21+
### FORMAT Null
22+
The `FORMAT Null` clause is not recognized:
23+
```sql
24+
SELECT ... FORMAT Null;
25+
```
26+
27+
### FETCH FIRST ... ROW ONLY
28+
SQL standard fetch syntax is not supported:
29+
```sql
30+
SELECT ... FETCH FIRST 1 ROW ONLY;
31+
```
32+
33+
### INSERT INTO FUNCTION
34+
Function-based inserts are not supported:
35+
```sql
36+
INSERT INTO FUNCTION file('file.parquet') SELECT ...;
37+
```
38+
39+
### WITH ... AS Subquery Aliases
40+
Subquery aliases in FROM clauses with keyword `AS`:
41+
```sql
42+
SELECT * FROM (SELECT 1 x) AS alias;
43+
```
44+
45+
### String Concatenation Operator ||
46+
The `||` operator in some contexts:
47+
```sql
48+
SELECT currentDatabase() || '_test' AS key;
49+
```
50+
51+
### MOD/DIV Operators
52+
The MOD and DIV keywords as operators:
53+
```sql
54+
SELECT number MOD 3, number DIV 3 FROM ...;
55+
```
56+
57+
### Reserved Keyword Handling
58+
Keywords like `LEFT`, `RIGHT` used as table aliases:
59+
```sql
60+
SELECT * FROM numbers(10) AS left RIGHT JOIN ...;
61+
```
62+
63+
### Parameterized Settings
64+
Settings with `$` parameters:
65+
```sql
66+
SET param_$1 = 'Hello';
67+
```
68+
69+
### Incomplete CASE Expression
70+
CASE without END:
71+
```sql
72+
SELECT CASE number -- missing END
73+
```
74+
75+
## Explain Output Issues
76+
77+
These require changes to `internal/explain/`:
78+
79+
### Double Equals (==) Operator
80+
The `==` operator creates extra nested equals/tuple nodes:
81+
```sql
82+
SELECT value == '127.0.0.1:9181'
83+
```
84+
Expected: `Function equals` with `Identifier` and `Literal`
85+
Got: Nested `Function equals` with extra `Function tuple`
86+
87+
### CreateQuery Spacing
88+
Some ClickHouse versions output extra space before `(children`:
89+
```
90+
CreateQuery d1 (children 1) -- two spaces
91+
CreateQuery d1 (children 1) -- one space (our output)
92+
```
93+
94+
### Server Error Messages in Expected Output
95+
Some test expected outputs include trailing messages:
96+
```
97+
The query succeeded but the server error '42' was expected
98+
```
99+
These are not part of the actual EXPLAIN output.
100+
101+
## Lower Priority
102+
103+
### DateTime64 with Timezone
104+
Type parameters with string timezone:
105+
```sql
106+
DateTime64(3,'UTC')
107+
```
108+
109+
### Complex Type Expressions
110+
Nested type expressions in column definitions:
111+
```sql
112+
CREATE TABLE t (c LowCardinality(UUID));
113+
```
114+
115+
### Parameterized Views
116+
View definitions with parameters:
117+
```sql
118+
CREATE VIEW v AS SELECT ... WHERE x={parity:Int8};
119+
```
120+
121+
## Testing Notes
122+
123+
Run tests with timeout to catch infinite loops:
124+
```bash
125+
go test ./parser -timeout 5s -v
126+
```
127+
128+
Count test results:
129+
```bash
130+
go test ./parser -timeout 5s -v 2>&1 | grep -E 'PASS:|SKIP:' | cut -d':' -f1 | sort | uniq -c
131+
```
132+
133+
View explain mismatches:
134+
```bash
135+
go test ./parser -timeout 5s -v 2>&1 | grep -A 30 "TODO: Explain output mismatch" | head -100
136+
```

ast/ast.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,7 @@ type DropQuery struct {
315315
Database string `json:"database,omitempty"`
316316
Table string `json:"table,omitempty"`
317317
View string `json:"view,omitempty"`
318+
User string `json:"user,omitempty"`
318319
Temporary bool `json:"temporary,omitempty"`
319320
OnCluster string `json:"on_cluster,omitempty"`
320321
DropDatabase bool `json:"drop_database,omitempty"`
@@ -449,6 +450,7 @@ const (
449450
ShowCreateDB ShowType = "CREATE_DATABASE"
450451
ShowColumns ShowType = "COLUMNS"
451452
ShowDictionaries ShowType = "DICTIONARIES"
453+
ShowFunctions ShowType = "FUNCTIONS"
452454
)
453455

454456
// ExplainQuery represents an EXPLAIN statement.

internal/explain/explain.go

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
// Package explain provides EXPLAIN AST output functionality for ClickHouse SQL.
2+
package explain
3+
4+
import (
5+
"fmt"
6+
"strings"
7+
8+
"github.com/kyleconroy/doubleclick/ast"
9+
)
10+
11+
// Explain returns the EXPLAIN AST output for a statement, matching ClickHouse's format.
12+
func Explain(stmt ast.Statement) string {
13+
var sb strings.Builder
14+
Node(&sb, stmt, 0)
15+
return sb.String()
16+
}
17+
18+
// Node writes the EXPLAIN AST output for an AST node.
19+
func Node(sb *strings.Builder, node interface{}, depth int) {
20+
if node == nil {
21+
// nil can represent an empty tuple in function arguments
22+
indent := strings.Repeat(" ", depth)
23+
fmt.Fprintf(sb, "%sFunction tuple (children %d)\n", indent, 1)
24+
fmt.Fprintf(sb, "%s ExpressionList\n", indent)
25+
return
26+
}
27+
28+
indent := strings.Repeat(" ", depth)
29+
30+
switch n := node.(type) {
31+
// Select statements
32+
case *ast.SelectWithUnionQuery:
33+
explainSelectWithUnionQuery(sb, n, indent, depth)
34+
case *ast.SelectQuery:
35+
explainSelectQuery(sb, n, indent, depth)
36+
37+
// Tables
38+
case *ast.TablesInSelectQuery:
39+
explainTablesInSelectQuery(sb, n, indent, depth)
40+
case *ast.TablesInSelectQueryElement:
41+
explainTablesInSelectQueryElement(sb, n, indent, depth)
42+
case *ast.TableExpression:
43+
explainTableExpression(sb, n, indent, depth)
44+
case *ast.TableIdentifier:
45+
explainTableIdentifier(sb, n, indent)
46+
case *ast.ArrayJoinClause:
47+
explainArrayJoinClause(sb, n, indent, depth)
48+
case *ast.TableJoin:
49+
explainTableJoin(sb, n, indent, depth)
50+
51+
// Expressions
52+
case *ast.OrderByElement:
53+
explainOrderByElement(sb, n, indent, depth)
54+
case *ast.Identifier:
55+
explainIdentifier(sb, n, indent)
56+
case *ast.Literal:
57+
explainLiteral(sb, n, indent, depth)
58+
case *ast.BinaryExpr:
59+
explainBinaryExpr(sb, n, indent, depth)
60+
case *ast.UnaryExpr:
61+
explainUnaryExpr(sb, n, indent, depth)
62+
case *ast.Subquery:
63+
explainSubquery(sb, n, indent, depth)
64+
case *ast.AliasedExpr:
65+
explainAliasedExpr(sb, n, depth)
66+
case *ast.Asterisk:
67+
explainAsterisk(sb, n, indent)
68+
69+
// Functions
70+
case *ast.FunctionCall:
71+
explainFunctionCall(sb, n, indent, depth)
72+
case *ast.Lambda:
73+
explainLambda(sb, n, indent, depth)
74+
case *ast.CastExpr:
75+
explainCastExpr(sb, n, indent, depth)
76+
case *ast.InExpr:
77+
explainInExpr(sb, n, indent, depth)
78+
case *ast.TernaryExpr:
79+
explainTernaryExpr(sb, n, indent, depth)
80+
case *ast.ArrayAccess:
81+
explainArrayAccess(sb, n, indent, depth)
82+
case *ast.TupleAccess:
83+
explainTupleAccess(sb, n, indent, depth)
84+
case *ast.LikeExpr:
85+
explainLikeExpr(sb, n, indent, depth)
86+
case *ast.BetweenExpr:
87+
explainBetweenExpr(sb, n, indent, depth)
88+
case *ast.IsNullExpr:
89+
explainIsNullExpr(sb, n, indent, depth)
90+
case *ast.CaseExpr:
91+
explainCaseExpr(sb, n, indent, depth)
92+
case *ast.IntervalExpr:
93+
explainIntervalExpr(sb, n, indent, depth)
94+
case *ast.ExistsExpr:
95+
explainExistsExpr(sb, n, indent, depth)
96+
case *ast.ExtractExpr:
97+
explainExtractExpr(sb, n, indent, depth)
98+
99+
// DDL statements
100+
case *ast.CreateQuery:
101+
explainCreateQuery(sb, n, indent, depth)
102+
case *ast.DropQuery:
103+
explainDropQuery(sb, n, indent)
104+
case *ast.SetQuery:
105+
explainSetQuery(sb, indent)
106+
case *ast.SystemQuery:
107+
explainSystemQuery(sb, indent)
108+
case *ast.ExplainQuery:
109+
explainExplainQuery(sb, n, indent, depth)
110+
case *ast.ShowQuery:
111+
explainShowQuery(sb, n, indent)
112+
case *ast.UseQuery:
113+
explainUseQuery(sb, n, indent)
114+
case *ast.DescribeQuery:
115+
explainDescribeQuery(sb, n, indent)
116+
117+
// Types
118+
case *ast.DataType:
119+
explainDataType(sb, n, indent, depth)
120+
case *ast.Parameter:
121+
explainParameter(sb, n, indent)
122+
123+
default:
124+
// For unhandled types, just print the type name
125+
fmt.Fprintf(sb, "%s%T\n", indent, node)
126+
}
127+
}
128+
129+
// TablesWithArrayJoin handles FROM and ARRAY JOIN together as TablesInSelectQuery
130+
func TablesWithArrayJoin(sb *strings.Builder, from *ast.TablesInSelectQuery, arrayJoin *ast.ArrayJoinClause, depth int) {
131+
indent := strings.Repeat(" ", depth)
132+
133+
tableCount := 0
134+
if from != nil {
135+
tableCount = len(from.Tables)
136+
}
137+
if arrayJoin != nil {
138+
tableCount++
139+
}
140+
141+
fmt.Fprintf(sb, "%sTablesInSelectQuery (children %d)\n", indent, tableCount)
142+
143+
if from != nil {
144+
for _, t := range from.Tables {
145+
Node(sb, t, depth+1)
146+
}
147+
}
148+
149+
if arrayJoin != nil {
150+
// ARRAY JOIN is wrapped in TablesInSelectQueryElement
151+
fmt.Fprintf(sb, "%s TablesInSelectQueryElement (children %d)\n", indent, 1)
152+
Node(sb, arrayJoin, depth+2)
153+
}
154+
}
155+
156+
// Column handles column declarations
157+
func Column(sb *strings.Builder, col *ast.ColumnDeclaration, depth int) {
158+
indent := strings.Repeat(" ", depth)
159+
children := 0
160+
if col.Type != nil {
161+
children++
162+
}
163+
if col.Default != nil {
164+
children++
165+
}
166+
fmt.Fprintf(sb, "%sColumnDeclaration %s (children %d)\n", indent, col.Name, children)
167+
if col.Type != nil {
168+
Node(sb, col.Type, depth+1)
169+
}
170+
if col.Default != nil {
171+
Node(sb, col.Default, depth+1)
172+
}
173+
}

0 commit comments

Comments
 (0)