doubleclick/parser/parser_test.go at e749513d5c765af4d80b3ad9e23d4be0024f5681 · sqlc-dev/doubleclick · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
package parser_test

import (
	"context"
	"encoding/json"
	"flag"
	"fmt"
	"os"
	"path/filepath"
	"strings"
	"testing"
	"time"

	"github.com/sqlc-dev/doubleclick/parser"
)

// checkExplain runs skipped explain_todo tests to see which ones now pass.
// Use with: go test ./parser -check-explain -v
var checkExplain = flag.Bool("check-explain", false, "Run skipped explain_todo tests to see which ones now pass")

// testMetadata holds optional metadata for a test case
type testMetadata struct {
	ExplainTodo map[string]bool `json:"explain_todo,omitempty"` // map of stmtN -> true to skip specific statements
	Source      string          `json:"source,omitempty"`
	Explain     *bool           `json:"explain,omitempty"`
	Skip        bool            `json:"skip,omitempty"`
	ParseError  bool            `json:"parse_error,omitempty"` // true if query is intentionally invalid SQL
}

// splitStatements splits SQL content into individual statements.
func splitStatements(content string) []string {
	var statements []string
	var current strings.Builder

	lines := strings.Split(content, "\n")
	for _, line := range lines {
		trimmed := strings.TrimSpace(line)

		// Skip empty lines and full-line comments
		if trimmed == "" || strings.HasPrefix(trimmed, "--") {
			continue
		}

		// Remove inline comments (-- comment at end of line)
		if idx := findCommentStart(trimmed); idx >= 0 {
			trimmed = strings.TrimSpace(trimmed[:idx])
			if trimmed == "" {
				continue
			}
		}

		// Add to current statement
		if current.Len() > 0 {
			current.WriteString(" ")
		}
		current.WriteString(trimmed)

		// Check if statement is complete (ends with ;)
		if strings.HasSuffix(trimmed, ";") {
			stmt := strings.TrimSpace(current.String())
			if stmt != "" {
				statements = append(statements, stmt)
			}
			current.Reset()
		}
	}

	// Handle statement without trailing semicolon
	if current.Len() > 0 {
		stmt := strings.TrimSpace(current.String())
		if stmt != "" {
			statements = append(statements, stmt)
		}
	}

	return statements
}

// findCommentStart finds the position of -- comment that's not inside a string
func findCommentStart(line string) int {
	inString := false
	var stringChar byte
	for i := 0; i < len(line); i++ {
		c := line[i]
		if inString {
			if c == '\\' && i+1 < len(line) {
				i++ // Skip escaped character
				continue
			}
			if c == stringChar {
				inString = false
			}
		} else {
			if c == '\'' || c == '"' || c == '`' {
				inString = true
				stringChar = c
			} else if c == '-' && i+1 < len(line) && line[i+1] == '-' {
				// Check if this looks like a comment (followed by space or end of line)
				if i+2 >= len(line) || line[i+2] == ' ' || line[i+2] == '\t' {
					return i
				}
			}
		}
	}
	return -1
}

// TestParser tests the parser using test cases from the testdata directory.
// Each subdirectory in testdata represents a test case with:
// - query.sql: The SQL query to parse (may contain multiple statements)
// - metadata.json (optional): Metadata including:
//   - explain: false to skip the test (e.g., when ClickHouse couldn't parse it)
//   - skip: true to skip the test entirely (e.g., causes infinite loop)
//   - parse_error: true if the query is intentionally invalid SQL (expected to fail parsing)
//   - explain_todo: map of stmtN -> true to skip specific statements (e.g., {"stmt2": true, "stmt5": true})
// - explain.txt: Expected EXPLAIN AST output for first statement
// - explain_N.txt: Expected EXPLAIN AST output for Nth statement (N >= 2)
func TestParser(t *testing.T) {
	testdataDir := "testdata"

	entries, err := os.ReadDir(testdataDir)
	if err != nil {
		t.Fatalf("Failed to read testdata directory: %v", err)
	}

	for _, entry := range entries {
		if !entry.IsDir() {
			continue
		}

		testDir := filepath.Join(testdataDir, entry.Name())

		t.Run(entry.Name(), func(t *testing.T) {
			t.Parallel()

			// Read the query file
			queryPath := filepath.Join(testDir, "query.sql")
			queryBytes, err := os.ReadFile(queryPath)
			if err != nil {
				t.Fatalf("Failed to read query.sql: %v", err)
			}
			queryContent := string(queryBytes)

			// Read optional metadata
			var metadata testMetadata
			metadataPath := filepath.Join(testDir, "metadata.json")
			if metadataBytes, err := os.ReadFile(metadataPath); err == nil {
				if err := json.Unmarshal(metadataBytes, &metadata); err != nil {
					t.Fatalf("Failed to parse metadata.json: %v", err)
				}
			}

			// Skip tests marked with skip: true (these cause infinite loops or other critical issues)
			if metadata.Skip {
				t.Skip("Skipping: skip is true in metadata")
			}

			// Skip tests where explain is explicitly false (e.g., ClickHouse couldn't parse it)
			if metadata.Explain != nil && !*metadata.Explain {
				t.Skipf("Skipping: explain is false in metadata")
				return
			}

			// Split into individual statements
			statements := splitStatements(queryContent)
			if len(statements) == 0 {
				t.Skipf("No statements found in query.sql (all commented out)")
				return
			}

			// Test each statement as a subtest
			for i, stmt := range statements {
				stmtIndex := i + 1
				t.Run(fmt.Sprintf("stmt%d", stmtIndex), func(t *testing.T) {
					// Determine explain file path: explain.txt for first, explain_N.txt for N >= 2
					var explainPath string
					if stmtIndex == 1 {
						explainPath = filepath.Join(testDir, "explain.txt")
					} else {
						explainPath = filepath.Join(testDir, fmt.Sprintf("explain_%d.txt", stmtIndex))
					}

					// For statements beyond the first, skip if no explain file exists
					// (these statements haven't been regenerated yet)
					if stmtIndex > 1 {
						if _, err := os.Stat(explainPath); os.IsNotExist(err) {
							t.Skipf("No explain_%d.txt file (run regenerate-explain to generate)", stmtIndex)
							return
						}
					}

					// Skip statements marked in explain_todo (unless -check-explain is set)
					stmtKey := fmt.Sprintf("stmt%d", stmtIndex)
					isExplainTodo := metadata.ExplainTodo[stmtKey]
					if isExplainTodo && !*checkExplain {
						t.Skipf("TODO: explain_todo[%s] is true", stmtKey)
						return
					}

					// Create context with 1 second timeout
					ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
					defer cancel()

					// Parse this statement
					stmts, parseErr := parser.Parse(ctx, strings.NewReader(stmt))
					if len(stmts) == 0 {
						// If parse_error is true, this is expected - the query is intentionally invalid
						if metadata.ParseError {
							t.Skipf("Expected parse error (intentionally invalid SQL)")
							return
						}
						t.Fatalf("Parse error: %v", parseErr)
					}

					// If parse_error is true but we parsed successfully, skip (our parser is more permissive)
					if metadata.ParseError {
						t.Skipf("Parsed query marked as parse_error (parser is more permissive)")
						return
					}

					// Verify we can serialize to JSON
					_, jsonErr := json.Marshal(stmts[0])
					if jsonErr != nil {
						t.Fatalf("JSON marshal error: %v\nQuery: %s", jsonErr, stmt)
					}

					// Check explain output if explain file exists
					if expectedBytes, err := os.ReadFile(explainPath); err == nil {
						expected := strings.TrimSpace(string(expectedBytes))
						// Strip server error messages from expected output
						if idx := strings.Index(expected, "\nThe query succeeded but the server error"); idx != -1 {
							expected = strings.TrimSpace(expected[:idx])
						}
						actual := strings.TrimSpace(parser.Explain(stmts[0]))
						// Use case-insensitive comparison since ClickHouse EXPLAIN AST has inconsistent casing
						if !strings.EqualFold(actual, expected) {
							if isExplainTodo && *checkExplain {
								t.Logf("EXPLAIN STILL FAILING:\nExpected:\n%s\n\nGot:\n%s", expected, actual)
							} else {
								t.Errorf("Explain output mismatch\nQuery: %s\nExpected:\n%s\n\nGot:\n%s", stmt, expected, actual)
							}
						} else if isExplainTodo && *checkExplain {
							// Test passes now - remove from explain_todo
							delete(metadata.ExplainTodo, stmtKey)
							if len(metadata.ExplainTodo) == 0 {
								metadata.ExplainTodo = nil
							}
							updatedBytes, err := json.MarshalIndent(metadata, "", "  ")
							if err != nil {
								t.Errorf("Failed to marshal updated metadata: %v", err)
							} else if err := os.WriteFile(metadataPath, append(updatedBytes, '\n'), 0644); err != nil {
								t.Errorf("Failed to write updated metadata.json: %v", err)
							} else {
								t.Logf("EXPLAIN PASSES NOW - removed explain_todo[%s] from: %s", stmtKey, entry.Name())
							}
						}
					}

				})
			}
		})
	}
}

// BenchmarkParser benchmarks the parser performance using a complex query
func BenchmarkParser(b *testing.B) {
	query := `
		SELECT
			u.id,
			u.name,
			count(*) AS order_count,
			sum(o.amount) AS total
		FROM users u
		LEFT JOIN orders o ON u.id = o.user_id
		WHERE u.status = 'active' AND o.created_at > '2023-01-01'
		GROUP BY u.id, u.name
		HAVING count(*) > 0
		ORDER BY total DESC
		LIMIT 100
	`

	ctx := context.Background()
	b.ResetTimer()

	for i := 0; i < b.N; i++ {
		_, err := parser.Parse(ctx, strings.NewReader(query))
		if err != nil {
			b.Fatal(err)
		}
	}
}