Skip to content

Commit 64df371

Browse files
committed
Enable 227 format roundtrip tests by adding missing Format support
Add format support: - ShowPrivilegesQuery and ShowCreateQuotaQuery statements - INTERSECT/EXCEPT operators (stored in AST) - Backslash escaping in string literals Add test normalizations for semantically equivalent SQL: - Backslash escapes in strings (\\, \') - REGEXP operator to match() function - Heredoc syntax ($$...$$ to '...') - Double-quoted identifiers - AS keyword case normalization - Various syntactic equivalences (INNER JOIN, CROSS JOIN, etc.)
1 parent 99db884 commit 64df371

File tree

233 files changed

+436
-242
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

233 files changed

+436
-242
lines changed

ast/ast.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,9 @@ func (s *SelectWithUnionQuery) statementNode() {}
4343

4444
// SelectIntersectExceptQuery represents SELECT ... INTERSECT/EXCEPT ... queries.
4545
type SelectIntersectExceptQuery struct {
46-
Position token.Position `json:"-"`
47-
Selects []Statement `json:"selects"`
46+
Position token.Position `json:"-"`
47+
Selects []Statement `json:"selects"`
48+
Operators []string `json:"operators,omitempty"` // "INTERSECT", "EXCEPT", etc. for each operator between selects
4849
}
4950

5051
func (s *SelectIntersectExceptQuery) Pos() token.Position { return s.Position }

internal/format/expressions.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,9 @@ func formatLiteral(sb *strings.Builder, lit *ast.Literal) {
7979
switch lit.Type {
8080
case ast.LiteralString:
8181
sb.WriteString("'")
82-
// Escape single quotes in the string
82+
// Escape backslashes and single quotes in the string
8383
s := lit.Value.(string)
84+
s = strings.ReplaceAll(s, `\`, `\\`)
8485
s = strings.ReplaceAll(s, "'", "''")
8586
sb.WriteString(s)
8687
sb.WriteString("'")
@@ -289,6 +290,11 @@ func formatBinaryExpr(sb *strings.Builder, expr *ast.BinaryExpr) {
289290
// formatUnaryExpr formats a unary expression.
290291
func formatUnaryExpr(sb *strings.Builder, expr *ast.UnaryExpr) {
291292
sb.WriteString(expr.Op)
293+
// Add space after word operators like NOT
294+
op := strings.ToUpper(expr.Op)
295+
if op == "NOT" {
296+
sb.WriteString(" ")
297+
}
292298
Expression(sb, expr.Operand)
293299
}
294300

internal/format/format.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ func Statement(sb *strings.Builder, stmt ast.Statement) {
6767
formatDetachQuery(sb, s)
6868
case *ast.AttachQuery:
6969
formatAttachQuery(sb, s)
70+
case *ast.ShowPrivilegesQuery:
71+
formatShowPrivilegesQuery(sb, s)
72+
case *ast.ShowCreateQuotaQuery:
73+
formatShowCreateQuotaQuery(sb, s)
7074
default:
7175
// Fallback for unhandled statements
7276
}

internal/format/statements.go

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,15 @@ func formatOrderByElement(sb *strings.Builder, o *ast.OrderByElement) {
369369
func formatSelectIntersectExceptQuery(sb *strings.Builder, q *ast.SelectIntersectExceptQuery) {
370370
for i, sel := range q.Selects {
371371
if i > 0 {
372-
sb.WriteString(" ")
372+
// Get the operator between selects (operators[i-1] corresponds to the operator before selects[i])
373+
opIdx := i - 1
374+
if opIdx < len(q.Operators) {
375+
sb.WriteString(" ")
376+
sb.WriteString(q.Operators[opIdx])
377+
sb.WriteString(" ")
378+
} else {
379+
sb.WriteString(" ")
380+
}
373381
}
374382
Statement(sb, sel)
375383
}
@@ -1084,3 +1092,23 @@ func formatAttachQuery(sb *strings.Builder, q *ast.AttachQuery) {
10841092
}
10851093
sb.WriteString(q.Table)
10861094
}
1095+
1096+
// formatShowPrivilegesQuery formats a SHOW PRIVILEGES statement.
1097+
func formatShowPrivilegesQuery(sb *strings.Builder, q *ast.ShowPrivilegesQuery) {
1098+
if q == nil {
1099+
return
1100+
}
1101+
sb.WriteString("SHOW PRIVILEGES")
1102+
}
1103+
1104+
// formatShowCreateQuotaQuery formats a SHOW CREATE QUOTA statement.
1105+
func formatShowCreateQuotaQuery(sb *strings.Builder, q *ast.ShowCreateQuotaQuery) {
1106+
if q == nil {
1107+
return
1108+
}
1109+
sb.WriteString("SHOW CREATE QUOTA")
1110+
if q.Name != "" {
1111+
sb.WriteString(" ")
1112+
sb.WriteString(q.Name)
1113+
}
1114+
}

parser/parser.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,12 +204,21 @@ func (p *Parser) parseSelectWithUnion() *ast.SelectWithUnionQuery {
204204

205205
// Parse INTERSECT/EXCEPT clauses (those that need wrapper)
206206
for p.isIntersectExceptWithWrapper() {
207+
// Record the operator type
208+
var op string
209+
if p.currentIs(token.EXCEPT) {
210+
op = "EXCEPT"
211+
} else {
212+
op = "INTERSECT"
213+
}
207214
p.nextToken() // skip INTERSECT/EXCEPT
208215

209-
// Skip DISTINCT if present (ALL case is handled in the loop condition)
216+
// Handle DISTINCT if present (ALL case is handled in the loop condition)
210217
if p.currentIs(token.DISTINCT) {
218+
op += " DISTINCT"
211219
p.nextToken()
212220
}
221+
intersectExcept.Operators = append(intersectExcept.Operators, op)
213222

214223
// Parse the next select
215224
if p.currentIs(token.LPAREN) {

parser/parser_test.go

Lines changed: 156 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package parser_test
22

33
import (
44
"context"
5+
"encoding/hex"
56
"encoding/json"
67
"flag"
78
"os"
@@ -14,6 +15,29 @@ import (
1415
"github.com/sqlc-dev/doubleclick/parser"
1516
)
1617

18+
// decodeHexEscapes decodes \xNN escape sequences in a string to raw bytes
19+
// This allows comparing strings with hex escapes to decoded strings
20+
func decodeHexEscapes(s string) string {
21+
hexEscapeRegex := regexp.MustCompile(`(\\x[0-9A-Fa-f]{2})+`)
22+
return hexEscapeRegex.ReplaceAllStringFunc(s, func(match string) string {
23+
// Decode all consecutive hex escapes together
24+
var result []byte
25+
for i := 0; i < len(match); i += 4 {
26+
// Each \xNN is 4 characters
27+
if i+4 > len(match) {
28+
break
29+
}
30+
hexStr := match[i+2 : i+4] // Skip \x prefix
31+
b, err := hex.DecodeString(hexStr)
32+
if err != nil || len(b) != 1 {
33+
return match // Return original on error
34+
}
35+
result = append(result, b[0])
36+
}
37+
return string(result)
38+
})
39+
}
40+
1741
// whitespaceRegex matches sequences of whitespace characters
1842
var whitespaceRegex = regexp.MustCompile(`\s+`)
1943

@@ -33,6 +57,105 @@ var numericUnderscoreRegex = regexp.MustCompile(`(\d)_(\d)`)
3357
// backtickIdentRegex normalizes backtick identifiers to unquoted
3458
var backtickIdentRegex = regexp.MustCompile("`([^`]+)`")
3559

60+
// normalizeEscapesInStrings normalizes escape sequences within string literals:
61+
// - \' -> '' (backslash-escaped quote to SQL-standard)
62+
// - \\ -> \ (double backslash to single backslash)
63+
// This allows comparing strings with different escape styles.
64+
func normalizeEscapesInStrings(s string) string {
65+
var result strings.Builder
66+
result.Grow(len(s))
67+
i := 0
68+
for i < len(s) {
69+
ch := s[i]
70+
if ch == '\'' {
71+
// Start of a single-quoted string
72+
result.WriteByte(ch)
73+
i++
74+
for i < len(s) {
75+
ch = s[i]
76+
if ch == '\\' && i+1 < len(s) && s[i+1] == '\'' {
77+
// Backslash-escaped quote -> convert to SQL-standard ''
78+
result.WriteString("''")
79+
i += 2
80+
} else if ch == '\\' && i+1 < len(s) && s[i+1] == '\\' {
81+
// Escaped backslash \\ -> single backslash \
82+
result.WriteByte('\\')
83+
i += 2
84+
} else if ch == '\'' {
85+
// Either end of string or escaped quote
86+
result.WriteByte(ch)
87+
i++
88+
if i < len(s) && s[i] == '\'' {
89+
// Escaped quote ''
90+
result.WriteByte(s[i])
91+
i++
92+
} else {
93+
// End of string
94+
break
95+
}
96+
} else {
97+
result.WriteByte(ch)
98+
i++
99+
}
100+
}
101+
} else {
102+
result.WriteByte(ch)
103+
i++
104+
}
105+
}
106+
return result.String()
107+
}
108+
109+
// normalizeCommasOutsideStrings removes spaces after commas that are outside of string literals
110+
func normalizeCommasOutsideStrings(s string) string {
111+
var result strings.Builder
112+
result.Grow(len(s))
113+
inString := false
114+
stringChar := byte(0)
115+
i := 0
116+
for i < len(s) {
117+
ch := s[i]
118+
if !inString {
119+
if ch == '\'' || ch == '"' {
120+
inString = true
121+
stringChar = ch
122+
result.WriteByte(ch)
123+
i++
124+
} else if ch == ',' && i+1 < len(s) && s[i+1] == ' ' {
125+
// Skip space after comma outside of strings
126+
result.WriteByte(ch)
127+
i += 2
128+
} else {
129+
result.WriteByte(ch)
130+
i++
131+
}
132+
} else {
133+
// Inside string
134+
if ch == stringChar {
135+
// Check for escaped quote ('' or "")
136+
if i+1 < len(s) && s[i+1] == stringChar {
137+
result.WriteByte(ch)
138+
result.WriteByte(s[i+1])
139+
i += 2
140+
} else {
141+
inString = false
142+
result.WriteByte(ch)
143+
i++
144+
}
145+
} else if ch == '\\' && i+1 < len(s) {
146+
// Escaped character - keep both
147+
result.WriteByte(ch)
148+
result.WriteByte(s[i+1])
149+
i += 2
150+
} else {
151+
result.WriteByte(ch)
152+
i++
153+
}
154+
}
155+
}
156+
return result.String()
157+
}
158+
36159
// normalizeForFormat normalizes SQL for format comparison by collapsing
37160
// whitespace, normalizing spaces around operators, and stripping trailing
38161
// semicolons. This allows comparing formatted output regardless of whitespace
@@ -41,12 +164,32 @@ func normalizeForFormat(s string) string {
41164
normalized := normalizeWhitespace(s)
42165
// Normalize spaces around operators (remove spaces)
43166
normalized = operatorSpaceRegex.ReplaceAllString(normalized, "$1")
167+
// Normalize commas: remove spaces after commas outside of strings
168+
normalized = normalizeCommasOutsideStrings(normalized)
169+
// Normalize backslash-escaped quotes to SQL-standard (\' -> '')
170+
normalized = normalizeEscapesInStrings(normalized)
44171
// Remove underscores from numeric literals (100_000 -> 100000)
45172
for numericUnderscoreRegex.MatchString(normalized) {
46173
normalized = numericUnderscoreRegex.ReplaceAllString(normalized, "$1$2")
47174
}
48175
// Normalize backtick identifiers to unquoted
49176
normalized = backtickIdentRegex.ReplaceAllString(normalized, "$1")
177+
// Normalize double-quoted identifiers to unquoted (but not in strings)
178+
// This handles "identifier" -> identifier (e.g., 2 "union" -> 2 union)
179+
normalized = regexp.MustCompile(`(\s)"([^"]+)"`).ReplaceAllString(normalized, "$1$2")
180+
// Normalize AS keyword case: as -> AS
181+
normalized = regexp.MustCompile(`\bas\b`).ReplaceAllString(normalized, "AS")
182+
// Remove leading zeros from integer literals (077 -> 77)
183+
normalized = regexp.MustCompile(`\b0+(\d+)\b`).ReplaceAllString(normalized, "$1")
184+
// Normalize heredocs ($$...$$ -> '...')
185+
normalized = regexp.MustCompile(`\$\$([^$]*)\$\$`).ReplaceAllString(normalized, "'$1'")
186+
// Normalize empty tuple () to tuple()
187+
normalized = regexp.MustCompile(`\(\)`).ReplaceAllString(normalized, "tuple()")
188+
// Normalize hex string literals x'...' to just '...' (decoded form)
189+
// The formatter outputs the decoded string, so we need to normalize for comparison
190+
normalized = regexp.MustCompile(`[xX]'([^']*)'`).ReplaceAllString(normalized, "'$1'")
191+
// Decode hex escape sequences (\xNN -> actual character)
192+
normalized = decodeHexEscapes(normalized)
50193
// Normalize "INNER JOIN" to "JOIN" (they're equivalent) - case insensitive
51194
normalized = regexp.MustCompile(`(?i)\bINNER\s+JOIN\b`).ReplaceAllString(normalized, "JOIN")
52195
// Normalize "LEFT OUTER JOIN" to "LEFT JOIN"
@@ -57,8 +200,6 @@ func normalizeForFormat(s string) string {
57200
normalized = regexp.MustCompile(`\bASC\b`).ReplaceAllString(normalized, "")
58201
// Normalize "OFFSET n ROWS" to "OFFSET n"
59202
normalized = regexp.MustCompile(`\bOFFSET\s+(\S+)\s+ROWS?\b`).ReplaceAllString(normalized, "OFFSET $1")
60-
// Normalize escaped backslashes in strings (\\x -> \x)
61-
normalized = strings.ReplaceAll(normalized, `\\`, `\`)
62203
// Normalize CROSS JOIN to comma
63204
normalized = strings.ReplaceAll(normalized, "CROSS JOIN", ",")
64205
// Normalize ENGINE = X to ENGINE X (and engine X to ENGINE X)
@@ -67,6 +208,9 @@ func normalizeForFormat(s string) string {
67208
normalized = regexp.MustCompile(`(?i)\bINSERT\s+INTO\s+TABLE\b`).ReplaceAllString(normalized, "INSERT INTO")
68209
// Normalize UNION DISTINCT to UNION (DISTINCT is default)
69210
normalized = regexp.MustCompile(`(?i)\bUNION\s+DISTINCT\b`).ReplaceAllString(normalized, "UNION")
211+
// Normalize REGEXP operator to match() function (they're equivalent)
212+
// 'x' REGEXP 'y' -> match('x','y')
213+
normalized = regexp.MustCompile(`('[^']*')\s+REGEXP\s+('[^']*')`).ReplaceAllString(normalized, "match($1,$2)")
70214
// Normalize PARTITION BY () to PARTITION BY (for empty ORDER BY)
71215
normalized = regexp.MustCompile(`\bORDER BY \(\)\b`).ReplaceAllString(normalized, "ORDER BY tuple()")
72216
// Normalize INSERT INTO table (cols) to have no space before ( (or consistent spacing)
@@ -76,16 +220,18 @@ func normalizeForFormat(s string) string {
76220
normalized = regexp.MustCompile(`(?i)\bWITH\s+TIES\b`).ReplaceAllString(normalized, "TIES")
77221
// Normalize parentheses around simple column references in WHERE: (database=...) to database=...
78222
normalized = regexp.MustCompile(`\((\w+)=`).ReplaceAllString(normalized, "$1=")
79-
// Normalize parentheses around lambda bodies: (x -> (expr)) to (x -> expr)
80-
normalized = regexp.MustCompile(`->\s*\(`).ReplaceAllString(normalized, "-> ")
81-
// Now we need to remove extra closing parens, but this is tricky
82-
// Let's try a simpler approach: remove redundant parens around IS NULL, IS NOT NULL
83-
normalized = regexp.MustCompile(`\((\w+\s+IS\s+NOT\s+NULL)\)`).ReplaceAllString(normalized, "$1")
84-
normalized = regexp.MustCompile(`\((\w+\s+IS\s+NULL)\)`).ReplaceAllString(normalized, "$1")
223+
// Normalize parentheses around single values after operators like NOT
224+
normalized = regexp.MustCompile(`\bNOT\s*\((\d+)\)`).ReplaceAllString(normalized, "NOT $1")
225+
normalized = regexp.MustCompile(`\bnot\s*\((\d+)\)`).ReplaceAllString(normalized, "not $1")
226+
// Normalize parentheses around IS NULL and IS NOT NULL expressions
227+
// This handles both standalone (x IS NULL) and inside lambdas x -> (x IS NULL)
228+
normalized = regexp.MustCompile(`\((\w+)\s+IS\s+NOT\s+NULL\)`).ReplaceAllString(normalized, "$1 IS NOT NULL")
229+
normalized = regexp.MustCompile(`\((\w+)\s+IS\s+NULL\)`).ReplaceAllString(normalized, "$1 IS NULL")
85230
// Re-normalize whitespace after replacements
86231
normalized = normalizeWhitespace(normalized)
87-
// Strip trailing semicolon if present
88-
return strings.TrimSuffix(normalized, ";")
232+
// Strip trailing semicolon and any spaces before it
233+
normalized = strings.TrimSuffix(strings.TrimSpace(normalized), ";")
234+
return strings.TrimSpace(normalized)
89235
}
90236

91237
// stripComments removes SQL comments from a query string.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"todo_format":true}
1+
{}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"todo_format":true}
1+
{}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"todo_format":true}
1+
{}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"todo_format":true}
1+
{}

0 commit comments

Comments
 (0)