Skip to content

Commit d149200

Browse files
committed
Update parser_test to test each statement with subtests
- Split query.sql into individual statements - Create subtest for each statement (stmt1, stmt2, etc.) - Check explain.txt for first statement, explain_N.txt for Nth statement - Skip statements beyond first if no explain file exists (not yet regenerated) - Skip tests with all statements commented out - Add explain files for 00002_system_numbers (13 statements) This allows tests with multiple SQL statements to verify each one against its corresponding explain output.
1 parent d6d8f6c commit d149200

File tree

13 files changed

+339
-116
lines changed

13 files changed

+339
-116
lines changed

parser/parser_test.go

Lines changed: 202 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"encoding/json"
66
"flag"
7+
"fmt"
78
"os"
89
"path/filepath"
910
"strings"
@@ -32,14 +33,94 @@ type testMetadata struct {
3233
ParseError bool `json:"parse_error,omitempty"` // true if query is intentionally invalid SQL
3334
}
3435

36+
// splitStatements splits SQL content into individual statements.
37+
func splitStatements(content string) []string {
38+
var statements []string
39+
var current strings.Builder
40+
41+
lines := strings.Split(content, "\n")
42+
for _, line := range lines {
43+
trimmed := strings.TrimSpace(line)
44+
45+
// Skip empty lines and full-line comments
46+
if trimmed == "" || strings.HasPrefix(trimmed, "--") {
47+
continue
48+
}
49+
50+
// Remove inline comments (-- comment at end of line)
51+
if idx := findCommentStart(trimmed); idx >= 0 {
52+
trimmed = strings.TrimSpace(trimmed[:idx])
53+
if trimmed == "" {
54+
continue
55+
}
56+
}
57+
58+
// Add to current statement
59+
if current.Len() > 0 {
60+
current.WriteString(" ")
61+
}
62+
current.WriteString(trimmed)
63+
64+
// Check if statement is complete (ends with ;)
65+
if strings.HasSuffix(trimmed, ";") {
66+
stmt := strings.TrimSpace(current.String())
67+
if stmt != "" {
68+
statements = append(statements, stmt)
69+
}
70+
current.Reset()
71+
}
72+
}
73+
74+
// Handle statement without trailing semicolon
75+
if current.Len() > 0 {
76+
stmt := strings.TrimSpace(current.String())
77+
if stmt != "" {
78+
statements = append(statements, stmt)
79+
}
80+
}
81+
82+
return statements
83+
}
84+
85+
// findCommentStart finds the position of -- comment that's not inside a string
86+
func findCommentStart(line string) int {
87+
inString := false
88+
var stringChar byte
89+
for i := 0; i < len(line); i++ {
90+
c := line[i]
91+
if inString {
92+
if c == '\\' && i+1 < len(line) {
93+
i++ // Skip escaped character
94+
continue
95+
}
96+
if c == stringChar {
97+
inString = false
98+
}
99+
} else {
100+
if c == '\'' || c == '"' || c == '`' {
101+
inString = true
102+
stringChar = c
103+
} else if c == '-' && i+1 < len(line) && line[i+1] == '-' {
104+
// Check if this looks like a comment (followed by space or end of line)
105+
if i+2 >= len(line) || line[i+2] == ' ' || line[i+2] == '\t' {
106+
return i
107+
}
108+
}
109+
}
110+
}
111+
return -1
112+
}
113+
35114
// TestParser tests the parser using test cases from the testdata directory.
36115
// Each subdirectory in testdata represents a test case with:
37-
// - query.sql: The SQL query to parse
116+
// - query.sql: The SQL query to parse (may contain multiple statements)
38117
// - metadata.json (optional): Metadata including:
39118
// - todo: true if the test is not yet expected to pass
40119
// - explain: false to skip the test (e.g., when ClickHouse couldn't parse it)
41120
// - skip: true to skip the test entirely (e.g., causes infinite loop)
42121
// - parse_error: true if the query is intentionally invalid SQL (expected to fail parsing)
122+
// - explain.txt: Expected EXPLAIN AST output for first statement
123+
// - explain_N.txt: Expected EXPLAIN AST output for Nth statement (N >= 2)
43124
func TestParser(t *testing.T) {
44125
testdataDir := "testdata"
45126

@@ -58,17 +139,13 @@ func TestParser(t *testing.T) {
58139
t.Run(entry.Name(), func(t *testing.T) {
59140
t.Parallel()
60141

61-
// Create context with 1 second timeout
62-
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
63-
defer cancel()
64-
65142
// Read the query file
66143
queryPath := filepath.Join(testDir, "query.sql")
67144
queryBytes, err := os.ReadFile(queryPath)
68145
if err != nil {
69146
t.Fatalf("Failed to read query.sql: %v", err)
70147
}
71-
query := string(queryBytes)
148+
queryContent := string(queryBytes)
72149

73150
// Read optional metadata
74151
var metadata testMetadata
@@ -93,133 +170,142 @@ func TestParser(t *testing.T) {
93170
}
94171
}
95172

96-
// Parse the query - we only check the first statement
97-
stmts, parseErr := parser.Parse(ctx, strings.NewReader(query))
98-
if len(stmts) == 0 {
99-
// If parse_error is true, this is expected - the query is intentionally invalid
100-
if metadata.ParseError {
101-
t.Skipf("Expected parse error (intentionally invalid SQL)")
102-
return
103-
}
104-
if metadata.Todo {
105-
if *checkSkipped {
106-
t.Skipf("STILL FAILING (parse error): %v", parseErr)
107-
} else {
108-
t.Skipf("TODO: Parser does not yet support (error: %v)", parseErr)
109-
}
110-
return
111-
}
112-
t.Fatalf("Parse error: %v", parseErr)
113-
}
114-
115-
// If parse_error is true but we parsed successfully, skip (our parser is more permissive)
116-
if metadata.ParseError {
117-
t.Skipf("Parsed query marked as parse_error (parser is more permissive)")
173+
// Split into individual statements
174+
statements := splitStatements(queryContent)
175+
if len(statements) == 0 {
176+
t.Skipf("No statements found in query.sql (all commented out)")
118177
return
119178
}
120179

121-
// Verify we can serialize to JSON
122-
_, jsonErr := json.Marshal(stmts[0])
123-
if jsonErr != nil {
124-
if metadata.Todo {
125-
if *checkSkipped {
126-
t.Skipf("STILL FAILING (JSON serialization): %v", jsonErr)
180+
// Test each statement as a subtest
181+
for i, stmt := range statements {
182+
stmtIndex := i + 1
183+
t.Run(fmt.Sprintf("stmt%d", stmtIndex), func(t *testing.T) {
184+
// Determine explain file path: explain.txt for first, explain_N.txt for N >= 2
185+
var explainPath string
186+
if stmtIndex == 1 {
187+
explainPath = filepath.Join(testDir, "explain.txt")
127188
} else {
128-
t.Skipf("TODO: JSON serialization failed: %v", jsonErr)
189+
explainPath = filepath.Join(testDir, fmt.Sprintf("explain_%d.txt", stmtIndex))
129190
}
130-
return
131-
}
132-
t.Fatalf("JSON marshal error: %v\nQuery: %s", jsonErr, query)
133-
}
134191

135-
// Check explain output if explain.txt exists
136-
explainPath := filepath.Join(testDir, "explain.txt")
137-
if expectedBytes, err := os.ReadFile(explainPath); err == nil {
138-
expected := strings.TrimSpace(string(expectedBytes))
139-
// Strip server error messages from expected output
140-
// These are messages like "The query succeeded but the server error '43' was expected..."
141-
if idx := strings.Index(expected, "\nThe query succeeded but the server error"); idx != -1 {
142-
expected = strings.TrimSpace(expected[:idx])
143-
}
144-
actual := strings.TrimSpace(parser.Explain(stmts[0]))
145-
// Use case-insensitive comparison since ClickHouse EXPLAIN AST has inconsistent casing
146-
// (e.g., Float64_NaN vs Float64_nan, GREATEST vs greatest)
147-
if !strings.EqualFold(actual, expected) {
148-
if metadata.Todo {
149-
if *checkSkipped {
150-
t.Skipf("STILL FAILING (explain mismatch):\nExpected:\n%s\n\nGot:\n%s", expected, actual)
151-
} else {
152-
t.Skipf("TODO: Explain output mismatch\nQuery: %s\nExpected:\n%s\n\nGot:\n%s", query, expected, actual)
192+
// For statements beyond the first, skip if no explain file exists
193+
// (these statements haven't been regenerated yet)
194+
if stmtIndex > 1 {
195+
if _, err := os.Stat(explainPath); os.IsNotExist(err) {
196+
t.Skipf("No explain_%d.txt file (run regenerate-explain to generate)", stmtIndex)
197+
return
153198
}
154-
return
155199
}
156-
t.Errorf("Explain output mismatch\nQuery: %s\nExpected:\n%s\n\nGot:\n%s", query, expected, actual)
157-
}
158-
}
159200

160-
// Check AST JSON output if ast.json exists (golden file for AST regression testing)
161-
astPath := filepath.Join(testDir, "ast.json")
162-
if expectedASTBytes, err := os.ReadFile(astPath); err == nil {
163-
actualASTBytes, _ := json.MarshalIndent(stmts[0], "", " ")
164-
expectedAST := strings.TrimSpace(string(expectedASTBytes))
165-
actualAST := strings.TrimSpace(string(actualASTBytes))
166-
if actualAST != expectedAST {
167-
if metadata.Todo {
168-
if *checkSkipped {
169-
t.Skipf("STILL FAILING (AST mismatch):\nExpected:\n%s\n\nGot:\n%s", expectedAST, actualAST)
170-
} else {
171-
t.Skipf("TODO: AST JSON mismatch\nQuery: %s\nExpected:\n%s\n\nGot:\n%s", query, expectedAST, actualAST)
201+
// Create context with 1 second timeout
202+
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
203+
defer cancel()
204+
205+
// Parse this statement
206+
stmts, parseErr := parser.Parse(ctx, strings.NewReader(stmt))
207+
if len(stmts) == 0 {
208+
// If parse_error is true, this is expected - the query is intentionally invalid
209+
if metadata.ParseError {
210+
t.Skipf("Expected parse error (intentionally invalid SQL)")
211+
return
212+
}
213+
if metadata.Todo {
214+
if *checkSkipped {
215+
t.Skipf("STILL FAILING (parse error): %v", parseErr)
216+
} else {
217+
t.Skipf("TODO: Parser does not yet support (error: %v)", parseErr)
218+
}
219+
return
172220
}
221+
t.Fatalf("Parse error: %v", parseErr)
222+
}
223+
224+
// If parse_error is true but we parsed successfully, skip (our parser is more permissive)
225+
if metadata.ParseError {
226+
t.Skipf("Parsed query marked as parse_error (parser is more permissive)")
173227
return
174228
}
175-
t.Errorf("AST JSON mismatch\nQuery: %s\nExpected:\n%s\n\nGot:\n%s", query, expectedAST, actualAST)
176-
}
177-
}
178229

179-
// Check Format output (roundtrip test)
180-
// Skip if todo_format is true, unless -check-format flag is set
181-
if !metadata.TodoFormat || *checkFormat {
182-
formatted := parser.Format(stmts)
183-
// Strip comments from expected since formatter doesn't preserve them
184-
expected := strings.TrimSpace(normalize.StripComments(query))
185-
// Compare with format normalization (whitespace + trailing semicolons)
186-
// Use case-insensitive comparison since formatter uses uppercase keywords
187-
formattedNorm := normalize.ForFormat(formatted)
188-
expectedNorm := normalize.ForFormat(expected)
189-
if !strings.EqualFold(formattedNorm, expectedNorm) {
190-
if metadata.TodoFormat {
191-
if *checkFormat {
192-
t.Logf("FORMAT STILL FAILING:\nExpected:\n%s\n\nGot:\n%s", expected, formatted)
230+
// Verify we can serialize to JSON
231+
_, jsonErr := json.Marshal(stmts[0])
232+
if jsonErr != nil {
233+
if metadata.Todo {
234+
if *checkSkipped {
235+
t.Skipf("STILL FAILING (JSON serialization): %v", jsonErr)
236+
} else {
237+
t.Skipf("TODO: JSON serialization failed: %v", jsonErr)
238+
}
239+
return
193240
}
194-
} else {
195-
t.Errorf("Format output mismatch\nExpected:\n%s\n\nGot:\n%s", expected, formatted)
241+
t.Fatalf("JSON marshal error: %v\nQuery: %s", jsonErr, stmt)
196242
}
197-
} else if metadata.TodoFormat && *checkFormat {
198-
// Automatically remove the todo_format flag from metadata.json
199-
metadata.TodoFormat = false
200-
updatedBytes, err := json.Marshal(metadata)
201-
if err != nil {
202-
t.Errorf("Failed to marshal updated metadata: %v", err)
203-
} else if err := os.WriteFile(metadataPath, append(updatedBytes, '\n'), 0644); err != nil {
204-
t.Errorf("Failed to write updated metadata.json: %v", err)
205-
} else {
206-
t.Logf("FORMAT ENABLED - removed todo_format flag from: %s", entry.Name())
243+
244+
// Check explain output if explain file exists
245+
if expectedBytes, err := os.ReadFile(explainPath); err == nil {
246+
expected := strings.TrimSpace(string(expectedBytes))
247+
// Strip server error messages from expected output
248+
if idx := strings.Index(expected, "\nThe query succeeded but the server error"); idx != -1 {
249+
expected = strings.TrimSpace(expected[:idx])
250+
}
251+
actual := strings.TrimSpace(parser.Explain(stmts[0]))
252+
// Use case-insensitive comparison since ClickHouse EXPLAIN AST has inconsistent casing
253+
if !strings.EqualFold(actual, expected) {
254+
if metadata.Todo {
255+
if *checkSkipped {
256+
t.Skipf("STILL FAILING (explain mismatch):\nExpected:\n%s\n\nGot:\n%s", expected, actual)
257+
} else {
258+
t.Skipf("TODO: Explain output mismatch\nQuery: %s\nExpected:\n%s\n\nGot:\n%s", stmt, expected, actual)
259+
}
260+
return
261+
}
262+
t.Errorf("Explain output mismatch\nQuery: %s\nExpected:\n%s\n\nGot:\n%s", stmt, expected, actual)
263+
}
207264
}
208-
}
209-
}
210265

211-
// If we get here with a todo test and -check-skipped is set, the test passes!
212-
// Automatically remove the todo flag from metadata.json
213-
if metadata.Todo && *checkSkipped {
214-
metadata.Todo = false
215-
updatedBytes, err := json.Marshal(metadata)
216-
if err != nil {
217-
t.Errorf("Failed to marshal updated metadata: %v", err)
218-
} else if err := os.WriteFile(metadataPath, append(updatedBytes, '\n'), 0644); err != nil {
219-
t.Errorf("Failed to write updated metadata.json: %v", err)
220-
} else {
221-
t.Logf("ENABLED - removed todo flag from: %s", entry.Name())
222-
}
266+
// Check Format output (roundtrip test) - only for first statement
267+
if stmtIndex == 1 && (!metadata.TodoFormat || *checkFormat) {
268+
formatted := parser.Format(stmts)
269+
// Strip comments from expected since formatter doesn't preserve them
270+
expected := strings.TrimSpace(normalize.StripComments(stmt))
271+
// Compare with format normalization (whitespace + trailing semicolons)
272+
formattedNorm := normalize.ForFormat(formatted)
273+
expectedNorm := normalize.ForFormat(expected)
274+
if !strings.EqualFold(formattedNorm, expectedNorm) {
275+
if metadata.TodoFormat {
276+
if *checkFormat {
277+
t.Logf("FORMAT STILL FAILING:\nExpected:\n%s\n\nGot:\n%s", expected, formatted)
278+
}
279+
} else {
280+
t.Errorf("Format output mismatch\nExpected:\n%s\n\nGot:\n%s", expected, formatted)
281+
}
282+
} else if metadata.TodoFormat && *checkFormat {
283+
// Automatically remove the todo_format flag from metadata.json
284+
metadata.TodoFormat = false
285+
updatedBytes, err := json.Marshal(metadata)
286+
if err != nil {
287+
t.Errorf("Failed to marshal updated metadata: %v", err)
288+
} else if err := os.WriteFile(metadataPath, append(updatedBytes, '\n'), 0644); err != nil {
289+
t.Errorf("Failed to write updated metadata.json: %v", err)
290+
} else {
291+
t.Logf("FORMAT ENABLED - removed todo_format flag from: %s", entry.Name())
292+
}
293+
}
294+
}
295+
296+
// If we get here with a todo test and -check-skipped is set on first statement, the test passes!
297+
if stmtIndex == 1 && metadata.Todo && *checkSkipped {
298+
metadata.Todo = false
299+
updatedBytes, err := json.Marshal(metadata)
300+
if err != nil {
301+
t.Errorf("Failed to marshal updated metadata: %v", err)
302+
} else if err := os.WriteFile(metadataPath, append(updatedBytes, '\n'), 0644); err != nil {
303+
t.Errorf("Failed to write updated metadata.json: %v", err)
304+
} else {
305+
t.Logf("ENABLED - removed todo flag from: %s", entry.Name())
306+
}
307+
}
308+
})
223309
}
224310
})
225311
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
SelectWithUnionQuery (children 1)
2+
ExpressionList (children 1)
3+
SelectQuery (children 3)
4+
ExpressionList (children 1)
5+
Asterisk
6+
TablesInSelectQuery (children 1)
7+
TablesInSelectQueryElement (children 1)
8+
TableExpression (children 1)
9+
TableIdentifier system.number
10+
Literal UInt64_1
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
SelectWithUnionQuery (children 1)
2+
ExpressionList (children 1)
3+
SelectQuery (children 3)
4+
ExpressionList (children 1)
5+
Asterisk
6+
TablesInSelectQuery (children 1)
7+
TablesInSelectQueryElement (children 1)
8+
TableExpression (children 1)
9+
TableIdentifier system
10+
Literal UInt64_1

0 commit comments

Comments
 (0)