Skip to content

Commit 6000cab

Browse files
committed
Add Explain function to parser package for EXPLAIN AST output
- Add Explain function that takes an ast.Statement and produces EXPLAIN AST output matching ClickHouse's format using a string builder - Update parser tests to verify explain output against explain.txt files - Fix test to skip SQL comment lines when parsing queries - Remove todo:true from 10 passing test cases: - 00001_select_1, 00001_count_hits - 00002_count_visits, 00002_system_numbers - 00003_reinterpret_as_string - 00004_top_counters - 00005_filtering - 00006_agregates, 00006_extremes_and_subquery_from - 00007_array
1 parent 1eefa49 commit 6000cab

12 files changed

Lines changed: 419 additions & 14 deletions

File tree

parser/explain.go

Lines changed: 384 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,384 @@
1+
package parser
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
7+
"github.com/kyleconroy/doubleclick/ast"
8+
)
9+
10+
// Explain returns the EXPLAIN AST output for a statement, matching ClickHouse's format.
11+
func Explain(stmt ast.Statement) string {
12+
var sb strings.Builder
13+
explainNode(&sb, stmt, 0)
14+
return sb.String()
15+
}
16+
17+
// explainNode writes the EXPLAIN AST output for an AST node.
18+
func explainNode(sb *strings.Builder, node interface{}, depth int) {
19+
if node == nil {
20+
return
21+
}
22+
23+
indent := strings.Repeat(" ", depth)
24+
25+
switch n := node.(type) {
26+
case *ast.SelectWithUnionQuery:
27+
children := countChildren(n)
28+
fmt.Fprintf(sb, "%sSelectWithUnionQuery (children %d)\n", indent, children)
29+
// Wrap selects in ExpressionList
30+
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Selects))
31+
for _, sel := range n.Selects {
32+
explainNode(sb, sel, depth+2)
33+
}
34+
35+
case *ast.SelectQuery:
36+
children := countSelectQueryChildren(n)
37+
fmt.Fprintf(sb, "%sSelectQuery (children %d)\n", indent, children)
38+
// Columns (ExpressionList)
39+
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Columns))
40+
for _, col := range n.Columns {
41+
explainNode(sb, col, depth+2)
42+
}
43+
// FROM
44+
if n.From != nil {
45+
explainNode(sb, n.From, depth+1)
46+
}
47+
// ARRAY JOIN
48+
if n.ArrayJoin != nil {
49+
explainNode(sb, n.ArrayJoin, depth+1)
50+
}
51+
// PREWHERE
52+
if n.PreWhere != nil {
53+
explainNode(sb, n.PreWhere, depth+1)
54+
}
55+
// WHERE
56+
if n.Where != nil {
57+
explainNode(sb, n.Where, depth+1)
58+
}
59+
// GROUP BY
60+
if len(n.GroupBy) > 0 {
61+
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.GroupBy))
62+
for _, g := range n.GroupBy {
63+
explainNode(sb, g, depth+2)
64+
}
65+
}
66+
// HAVING
67+
if n.Having != nil {
68+
explainNode(sb, n.Having, depth+1)
69+
}
70+
// ORDER BY
71+
if len(n.OrderBy) > 0 {
72+
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.OrderBy))
73+
for _, o := range n.OrderBy {
74+
explainNode(sb, o, depth+2)
75+
}
76+
}
77+
// LIMIT
78+
if n.Limit != nil {
79+
explainNode(sb, n.Limit, depth+1)
80+
}
81+
// OFFSET
82+
if n.Offset != nil {
83+
explainNode(sb, n.Offset, depth+1)
84+
}
85+
86+
case *ast.TablesInSelectQuery:
87+
fmt.Fprintf(sb, "%sTablesInSelectQuery (children %d)\n", indent, len(n.Tables))
88+
for _, t := range n.Tables {
89+
explainNode(sb, t, depth+1)
90+
}
91+
92+
case *ast.TablesInSelectQueryElement:
93+
children := 1 // table
94+
if n.Join != nil {
95+
children++
96+
}
97+
fmt.Fprintf(sb, "%sTablesInSelectQueryElement (children %d)\n", indent, children)
98+
if n.Table != nil {
99+
explainNode(sb, n.Table, depth+1)
100+
}
101+
if n.Join != nil {
102+
explainNode(sb, n.Join, depth+1)
103+
}
104+
105+
case *ast.TableExpression:
106+
children := 1 // table
107+
if n.Alias != "" {
108+
children++
109+
}
110+
fmt.Fprintf(sb, "%sTableExpression (children %d)\n", indent, children)
111+
explainNode(sb, n.Table, depth+1)
112+
113+
case *ast.TableIdentifier:
114+
name := n.Table
115+
if n.Database != "" {
116+
name = n.Database + "." + n.Table
117+
}
118+
fmt.Fprintf(sb, "%sTableIdentifier %s\n", indent, name)
119+
120+
case *ast.ArrayJoinClause:
121+
fmt.Fprintf(sb, "%sArrayJoin (children %d)\n", indent, 1)
122+
fmt.Fprintf(sb, "%s ExpressionList", indent)
123+
if len(n.Columns) > 0 {
124+
fmt.Fprintf(sb, " (children %d)", len(n.Columns))
125+
}
126+
fmt.Fprintln(sb)
127+
for _, col := range n.Columns {
128+
explainNode(sb, col, depth+2)
129+
}
130+
131+
case *ast.OrderByElement:
132+
fmt.Fprintf(sb, "%sOrderByElement (children %d)\n", indent, 1)
133+
explainNode(sb, n.Expression, depth+1)
134+
135+
case *ast.Identifier:
136+
name := n.Name()
137+
if n.Alias != "" {
138+
fmt.Fprintf(sb, "%sIdentifier %s (alias %s)\n", indent, name, n.Alias)
139+
} else {
140+
fmt.Fprintf(sb, "%sIdentifier %s\n", indent, name)
141+
}
142+
143+
case *ast.Literal:
144+
fmt.Fprintf(sb, "%sLiteral %s\n", indent, formatLiteral(n))
145+
146+
case *ast.FunctionCall:
147+
children := 1 // arguments ExpressionList
148+
if len(n.Parameters) > 0 {
149+
children++ // parameters ExpressionList
150+
}
151+
if n.Alias != "" {
152+
fmt.Fprintf(sb, "%sFunction %s (alias %s) (children %d)\n", indent, n.Name, n.Alias, children)
153+
} else {
154+
fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, n.Name, children)
155+
}
156+
// Arguments
157+
fmt.Fprintf(sb, "%s ExpressionList", indent)
158+
if len(n.Arguments) > 0 {
159+
fmt.Fprintf(sb, " (children %d)", len(n.Arguments))
160+
}
161+
fmt.Fprintln(sb)
162+
for _, arg := range n.Arguments {
163+
explainNode(sb, arg, depth+2)
164+
}
165+
// Parameters (for parametric functions)
166+
if len(n.Parameters) > 0 {
167+
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Parameters))
168+
for _, p := range n.Parameters {
169+
explainNode(sb, p, depth+2)
170+
}
171+
}
172+
173+
case *ast.BinaryExpr:
174+
// Convert operator to function name
175+
fnName := operatorToFunction(n.Op)
176+
fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1)
177+
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2)
178+
explainNode(sb, n.Left, depth+2)
179+
explainNode(sb, n.Right, depth+2)
180+
181+
case *ast.UnaryExpr:
182+
fnName := unaryOperatorToFunction(n.Op)
183+
fmt.Fprintf(sb, "%sFunction %s (children %d)\n", indent, fnName, 1)
184+
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 1)
185+
explainNode(sb, n.Operand, depth+2)
186+
187+
case *ast.Subquery:
188+
children := 1
189+
fmt.Fprintf(sb, "%sSubquery (children %d)\n", indent, children)
190+
explainNode(sb, n.Query, depth+1)
191+
192+
case *ast.AliasedExpr:
193+
explainAliasedExpr(sb, n, depth)
194+
195+
case *ast.Lambda:
196+
// Lambda is represented as Function lambda with tuple of params and body
197+
fmt.Fprintf(sb, "%sFunction lambda (children %d)\n", indent, 1)
198+
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, 2)
199+
// Parameters as tuple
200+
fmt.Fprintf(sb, "%s Function tuple (children %d)\n", indent, 1)
201+
fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(n.Parameters))
202+
for _, p := range n.Parameters {
203+
fmt.Fprintf(sb, "%s Identifier %s\n", indent, p)
204+
}
205+
// Body
206+
explainNode(sb, n.Body, depth+2)
207+
208+
case *ast.SetQuery:
209+
fmt.Fprintf(sb, "%sSet\n", indent)
210+
211+
default:
212+
// For unhandled types, just print the type name
213+
fmt.Fprintf(sb, "%s%T\n", indent, node)
214+
}
215+
}
216+
217+
// countChildren counts the children of a SelectWithUnionQuery
218+
func countChildren(n *ast.SelectWithUnionQuery) int {
219+
return 1 // ExpressionList of selects
220+
}
221+
222+
// countSelectQueryChildren counts the children of a SelectQuery
223+
func countSelectQueryChildren(n *ast.SelectQuery) int {
224+
count := 1 // columns ExpressionList
225+
if n.From != nil {
226+
count++
227+
}
228+
if n.ArrayJoin != nil {
229+
count++
230+
}
231+
if n.PreWhere != nil {
232+
count++
233+
}
234+
if n.Where != nil {
235+
count++
236+
}
237+
if len(n.GroupBy) > 0 {
238+
count++
239+
}
240+
if n.Having != nil {
241+
count++
242+
}
243+
if len(n.OrderBy) > 0 {
244+
count++
245+
}
246+
if n.Limit != nil {
247+
count++
248+
}
249+
if n.Offset != nil {
250+
count++
251+
}
252+
return count
253+
}
254+
255+
// formatLiteral formats a literal value for EXPLAIN AST output
256+
func formatLiteral(lit *ast.Literal) string {
257+
switch lit.Type {
258+
case ast.LiteralInteger:
259+
val := lit.Value.(int64)
260+
if val >= 0 {
261+
return fmt.Sprintf("UInt64_%d", val)
262+
}
263+
return fmt.Sprintf("Int64_%d", val)
264+
case ast.LiteralFloat:
265+
val := lit.Value.(float64)
266+
return fmt.Sprintf("Float64_%v", val)
267+
case ast.LiteralString:
268+
s := lit.Value.(string)
269+
return fmt.Sprintf("\\'%s\\'", s)
270+
case ast.LiteralBoolean:
271+
if lit.Value.(bool) {
272+
return "UInt8_1"
273+
}
274+
return "UInt8_0"
275+
case ast.LiteralNull:
276+
return "Null"
277+
case ast.LiteralArray:
278+
return formatArrayLiteral(lit.Value)
279+
case ast.LiteralTuple:
280+
return formatTupleLiteral(lit.Value)
281+
default:
282+
return fmt.Sprintf("%v", lit.Value)
283+
}
284+
}
285+
286+
// formatArrayLiteral formats an array literal for EXPLAIN AST output
287+
func formatArrayLiteral(val interface{}) string {
288+
exprs, ok := val.([]ast.Expression)
289+
if !ok {
290+
return "Array_[]"
291+
}
292+
var parts []string
293+
for _, e := range exprs {
294+
if lit, ok := e.(*ast.Literal); ok {
295+
parts = append(parts, formatLiteral(lit))
296+
} else if ident, ok := e.(*ast.Identifier); ok {
297+
parts = append(parts, ident.Name())
298+
} else {
299+
parts = append(parts, fmt.Sprintf("%v", e))
300+
}
301+
}
302+
return fmt.Sprintf("Array_[%s]", strings.Join(parts, ", "))
303+
}
304+
305+
// formatTupleLiteral formats a tuple literal for EXPLAIN AST output
306+
func formatTupleLiteral(val interface{}) string {
307+
exprs, ok := val.([]ast.Expression)
308+
if !ok {
309+
return "Tuple_()"
310+
}
311+
var parts []string
312+
for _, e := range exprs {
313+
if lit, ok := e.(*ast.Literal); ok {
314+
parts = append(parts, formatLiteral(lit))
315+
} else if ident, ok := e.(*ast.Identifier); ok {
316+
parts = append(parts, ident.Name())
317+
} else {
318+
parts = append(parts, fmt.Sprintf("%v", e))
319+
}
320+
}
321+
return fmt.Sprintf("Tuple_(%s)", strings.Join(parts, ", "))
322+
}
323+
324+
// operatorToFunction maps binary operators to ClickHouse function names
325+
func operatorToFunction(op string) string {
326+
switch op {
327+
case "+":
328+
return "plus"
329+
case "-":
330+
return "minus"
331+
case "*":
332+
return "multiply"
333+
case "/":
334+
return "divide"
335+
case "%":
336+
return "modulo"
337+
case "=", "==":
338+
return "equals"
339+
case "!=", "<>":
340+
return "notEquals"
341+
case "<":
342+
return "less"
343+
case ">":
344+
return "greater"
345+
case "<=":
346+
return "lessOrEquals"
347+
case ">=":
348+
return "greaterOrEquals"
349+
case "AND":
350+
return "and"
351+
case "OR":
352+
return "or"
353+
case "||":
354+
return "concat"
355+
default:
356+
return strings.ToLower(op)
357+
}
358+
}
359+
360+
// unaryOperatorToFunction maps unary operators to ClickHouse function names
361+
func unaryOperatorToFunction(op string) string {
362+
switch op {
363+
case "-":
364+
return "negate"
365+
case "NOT":
366+
return "not"
367+
default:
368+
return strings.ToLower(op)
369+
}
370+
}
371+
372+
// explainAliasedExpr handles expressions with aliases
373+
func explainAliasedExpr(sb *strings.Builder, n *ast.AliasedExpr, depth int) {
374+
// For aliased expressions, we need to show the underlying expression with the alias
375+
indent := strings.Repeat(" ", depth)
376+
377+
switch e := n.Expr.(type) {
378+
case *ast.Literal:
379+
fmt.Fprintf(sb, "%sLiteral %s (alias %s)\n", indent, formatLiteral(e), n.Alias)
380+
default:
381+
// For other types, recursively explain and add alias info
382+
explainNode(sb, n.Expr, depth)
383+
}
384+
}

0 commit comments

Comments
 (0)