Skip to content

Commit bd2ca9d

Browse files
committed
Enable 791 format roundtrip tests by adding missing Format support
Key changes: - Add INSERT VALUES parsing and formatting - Add DETACH/ATTACH statement support - Add DROP DICTIONARY support - Add UNION ALL/DISTINCT/EXCEPT/INTERSECT formatting - Improve format normalization for whitespace, operators, joins, etc. - Add numeric underscore normalization (100_000 -> 100000) - Add backtick identifier normalization - Normalize ENGINE = X to ENGINE X - Normalize INNER JOIN to JOIN - Normalize ORDER BY ASC (default) - Normalize CROSS JOIN to comma - Normalize INSERT INTO TABLE to INSERT INTO - Normalize OFFSET ROWS to OFFSET - Normalize backslash escaping in strings - Remove space before column list in INSERT
1 parent 283f0fe commit bd2ca9d

798 files changed

Lines changed: 1098 additions & 814 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

ast/ast.go

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -218,17 +218,19 @@ func (s *SettingExpr) End() token.Position { return s.Position }
218218

219219
// InsertQuery represents an INSERT statement.
220220
type InsertQuery struct {
221-
Position token.Position `json:"-"`
222-
Database string `json:"database,omitempty"`
223-
Table string `json:"table,omitempty"`
224-
Function *FunctionCall `json:"function,omitempty"` // For INSERT INTO FUNCTION syntax
225-
Columns []*Identifier `json:"columns,omitempty"`
226-
PartitionBy Expression `json:"partition_by,omitempty"` // For PARTITION BY clause
227-
Infile string `json:"infile,omitempty"` // For FROM INFILE clause
228-
Compression string `json:"compression,omitempty"` // For COMPRESSION clause
229-
Select Statement `json:"select,omitempty"`
230-
Format *Identifier `json:"format,omitempty"`
231-
HasSettings bool `json:"has_settings,omitempty"` // For SETTINGS clause
221+
Position token.Position `json:"-"`
222+
Database string `json:"database,omitempty"`
223+
Table string `json:"table,omitempty"`
224+
Function *FunctionCall `json:"function,omitempty"` // For INSERT INTO FUNCTION syntax
225+
Columns []*Identifier `json:"columns,omitempty"`
226+
PartitionBy Expression `json:"partition_by,omitempty"` // For PARTITION BY clause
227+
Infile string `json:"infile,omitempty"` // For FROM INFILE clause
228+
Compression string `json:"compression,omitempty"` // For COMPRESSION clause
229+
Values [][]Expression `json:"-"` // For VALUES clause (format only, not in AST JSON)
230+
Select Statement `json:"select,omitempty"`
231+
Format *Identifier `json:"format,omitempty"`
232+
HasSettings bool `json:"has_settings,omitempty"` // For SETTINGS clause
233+
Settings []*SettingExpr `json:"settings,omitempty"` // For SETTINGS clause in INSERT
232234
}
233235

234236
func (i *InsertQuery) Pos() token.Position { return i.Position }
@@ -375,7 +377,8 @@ type DropQuery struct {
375377
Tables []*TableIdentifier `json:"tables,omitempty"` // For DROP TABLE t1, t2, t3
376378
View string `json:"view,omitempty"`
377379
User string `json:"user,omitempty"`
378-
Function string `json:"function,omitempty"` // For DROP FUNCTION
380+
Function string `json:"function,omitempty"` // For DROP FUNCTION
381+
Dictionary string `json:"-"` // For DROP DICTIONARY (format only, not in AST JSON)
379382
Temporary bool `json:"temporary,omitempty"`
380383
OnCluster string `json:"on_cluster,omitempty"`
381384
DropDatabase bool `json:"drop_database,omitempty"`
@@ -487,6 +490,28 @@ func (u *UseQuery) Pos() token.Position { return u.Position }
487490
func (u *UseQuery) End() token.Position { return u.Position }
488491
func (u *UseQuery) statementNode() {}
489492

493+
// DetachQuery represents a DETACH statement.
494+
type DetachQuery struct {
495+
Position token.Position `json:"-"`
496+
Database string `json:"database,omitempty"`
497+
Table string `json:"table,omitempty"`
498+
}
499+
500+
func (d *DetachQuery) Pos() token.Position { return d.Position }
501+
func (d *DetachQuery) End() token.Position { return d.Position }
502+
func (d *DetachQuery) statementNode() {}
503+
504+
// AttachQuery represents an ATTACH statement.
505+
type AttachQuery struct {
506+
Position token.Position `json:"-"`
507+
Database string `json:"database,omitempty"`
508+
Table string `json:"table,omitempty"`
509+
}
510+
511+
func (a *AttachQuery) Pos() token.Position { return a.Position }
512+
func (a *AttachQuery) End() token.Position { return a.Position }
513+
func (a *AttachQuery) statementNode() {}
514+
490515
// DescribeQuery represents a DESCRIBE statement.
491516
type DescribeQuery struct {
492517
Position token.Position `json:"-"`

internal/explain/explain.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,10 @@ func Node(sb *strings.Builder, node interface{}, depth int) {
133133
explainDescribeQuery(sb, n, indent)
134134
case *ast.ExistsQuery:
135135
explainExistsTableQuery(sb, n, indent)
136+
case *ast.DetachQuery:
137+
explainDetachQuery(sb, n, indent)
138+
case *ast.AttachQuery:
139+
explainAttachQuery(sb, n, indent)
136140

137141
// Types
138142
case *ast.DataType:

internal/explain/statements.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,9 @@ func explainDropQuery(sb *strings.Builder, n *ast.DropQuery, indent string, dept
320320
if n.View != "" {
321321
name = n.View
322322
}
323+
if n.Dictionary != "" {
324+
name = n.Dictionary
325+
}
323326
if n.DropDatabase {
324327
name = n.Database
325328
}
@@ -517,3 +520,11 @@ func explainParameter(sb *strings.Builder, n *ast.Parameter, indent string) {
517520
fmt.Fprintf(sb, "%sQueryParameter\n", indent)
518521
}
519522
}
523+
524+
func explainDetachQuery(sb *strings.Builder, n *ast.DetachQuery, indent string) {
525+
fmt.Fprintf(sb, "%sDetachQuery\n", indent)
526+
}
527+
528+
func explainAttachQuery(sb *strings.Builder, n *ast.AttachQuery, indent string) {
529+
fmt.Fprintf(sb, "%sAttachQuery\n", indent)
530+
}

internal/format/format.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ func Statement(sb *strings.Builder, stmt ast.Statement) {
6363
formatExchangeQuery(sb, s)
6464
case *ast.ExistsQuery:
6565
formatExistsQueryStmt(sb, s)
66+
case *ast.DetachQuery:
67+
formatDetachQuery(sb, s)
68+
case *ast.AttachQuery:
69+
formatAttachQuery(sb, s)
6670
default:
6771
// Fallback for unhandled statements
6872
}

internal/format/statements.go

Lines changed: 95 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,38 @@ func formatSelectWithUnionQuery(sb *strings.Builder, q *ast.SelectWithUnionQuery
1313
}
1414
for i, sel := range q.Selects {
1515
if i > 0 {
16-
sb.WriteString(" UNION ")
17-
if len(q.UnionModes) > i-1 && q.UnionModes[i-1] == "ALL" {
18-
sb.WriteString("ALL ")
19-
} else if len(q.UnionModes) > i-1 && q.UnionModes[i-1] == "DISTINCT" {
20-
sb.WriteString("DISTINCT ")
16+
// Get the mode for this union - modes are stored as "UNION ALL", "UNION DISTINCT", etc.
17+
modeIdx := i - 1
18+
if modeIdx < len(q.UnionModes) {
19+
mode := q.UnionModes[modeIdx]
20+
// Parse the mode to extract the operator and modifier
21+
// Format: "OPERATOR MODIFIER" (e.g., "UNION ALL", "EXCEPT DISTINCT")
22+
sb.WriteString(" ")
23+
if strings.HasPrefix(mode, "EXCEPT") {
24+
sb.WriteString("EXCEPT ")
25+
if strings.Contains(mode, "ALL") {
26+
sb.WriteString("ALL ")
27+
} else if strings.Contains(mode, "DISTINCT") {
28+
sb.WriteString("DISTINCT ")
29+
}
30+
} else if strings.HasPrefix(mode, "INTERSECT") {
31+
sb.WriteString("INTERSECT ")
32+
if strings.Contains(mode, "ALL") {
33+
sb.WriteString("ALL ")
34+
} else if strings.Contains(mode, "DISTINCT") {
35+
sb.WriteString("DISTINCT ")
36+
}
37+
} else {
38+
// Default to UNION
39+
sb.WriteString("UNION ")
40+
if strings.Contains(mode, "ALL") {
41+
sb.WriteString("ALL ")
42+
} else if strings.Contains(mode, "DISTINCT") {
43+
sb.WriteString("DISTINCT ")
44+
}
45+
}
46+
} else {
47+
sb.WriteString(" UNION ")
2148
}
2249
}
2350
Statement(sb, sel)
@@ -376,6 +403,8 @@ func formatDropQuery(sb *strings.Builder, q *ast.DropQuery) {
376403
sb.WriteString("VIEW ")
377404
} else if q.Function != "" {
378405
sb.WriteString("FUNCTION ")
406+
} else if q.Dictionary != "" {
407+
sb.WriteString("DICTIONARY ")
379408
} else if q.User != "" {
380409
sb.WriteString("USER ")
381410
} else {
@@ -397,6 +426,12 @@ func formatDropQuery(sb *strings.Builder, q *ast.DropQuery) {
397426
sb.WriteString(q.View)
398427
} else if q.Function != "" {
399428
sb.WriteString(q.Function)
429+
} else if q.Dictionary != "" {
430+
if q.Database != "" {
431+
sb.WriteString(q.Database)
432+
sb.WriteString(".")
433+
}
434+
sb.WriteString(q.Dictionary)
400435
} else if q.User != "" {
401436
sb.WriteString(q.User)
402437
} else if len(q.Tables) > 0 {
@@ -670,6 +705,35 @@ func formatInsertQuery(sb *strings.Builder, q *ast.InsertQuery) {
670705
}
671706
sb.WriteString(")")
672707
}
708+
// Format SETTINGS before VALUES if present
709+
if len(q.Settings) > 0 {
710+
sb.WriteString(" SETTINGS ")
711+
for i, s := range q.Settings {
712+
if i > 0 {
713+
sb.WriteString(", ")
714+
}
715+
sb.WriteString(s.Name)
716+
sb.WriteString(" = ")
717+
Expression(sb, s.Value)
718+
}
719+
}
720+
// Format VALUES clause
721+
if len(q.Values) > 0 {
722+
sb.WriteString(" VALUES ")
723+
for i, row := range q.Values {
724+
if i > 0 {
725+
sb.WriteString(", ")
726+
}
727+
sb.WriteString("(")
728+
for j, expr := range row {
729+
if j > 0 {
730+
sb.WriteString(", ")
731+
}
732+
Expression(sb, expr)
733+
}
734+
sb.WriteString(")")
735+
}
736+
}
673737
if q.Select != nil {
674738
sb.WriteString(" ")
675739
Statement(sb, q.Select)
@@ -994,3 +1058,29 @@ func formatExistsQueryStmt(sb *strings.Builder, q *ast.ExistsQuery) {
9941058
}
9951059
sb.WriteString(q.Table)
9961060
}
1061+
1062+
// formatDetachQuery formats a DETACH statement.
1063+
func formatDetachQuery(sb *strings.Builder, q *ast.DetachQuery) {
1064+
if q == nil {
1065+
return
1066+
}
1067+
sb.WriteString("DETACH TABLE ")
1068+
if q.Database != "" {
1069+
sb.WriteString(q.Database)
1070+
sb.WriteString(".")
1071+
}
1072+
sb.WriteString(q.Table)
1073+
}
1074+
1075+
// formatAttachQuery formats an ATTACH statement.
1076+
func formatAttachQuery(sb *strings.Builder, q *ast.AttachQuery) {
1077+
if q == nil {
1078+
return
1079+
}
1080+
sb.WriteString("ATTACH TABLE ")
1081+
if q.Database != "" {
1082+
sb.WriteString(q.Database)
1083+
sb.WriteString(".")
1084+
}
1085+
sb.WriteString(q.Table)
1086+
}

parser/parser.go

Lines changed: 99 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,10 @@ func (p *Parser) parseStatement() ast.Statement {
149149
case token.EXISTS:
150150
// EXISTS table_name syntax (check if table exists)
151151
return p.parseExistsStatement()
152+
case token.DETACH:
153+
return p.parseDetach()
154+
case token.ATTACH:
155+
return p.parseAttach()
152156
default:
153157
p.errors = append(p.errors, fmt.Errorf("unexpected token %s at line %d, column %d",
154158
p.current.Token, p.current.Pos.Line, p.current.Pos.Column))
@@ -1160,9 +1164,34 @@ func (p *Parser) parseInsert() *ast.InsertQuery {
11601164
// Parse VALUES or SELECT
11611165
if p.currentIs(token.VALUES) {
11621166
p.nextToken()
1163-
// Skip VALUES data - consume until end of statement
1164-
for !p.currentIs(token.EOF) && !p.currentIs(token.SEMICOLON) && !p.currentIs(token.FORMAT) && !p.currentIs(token.SETTINGS) {
1165-
p.nextToken()
1167+
// Parse VALUES rows: (expr, expr, ...), (expr, expr, ...), ...
1168+
for {
1169+
if !p.currentIs(token.LPAREN) {
1170+
break
1171+
}
1172+
p.nextToken() // skip (
1173+
var row []ast.Expression
1174+
for !p.currentIs(token.RPAREN) && !p.currentIs(token.EOF) {
1175+
expr := p.parseExpression(LOWEST)
1176+
if expr != nil {
1177+
row = append(row, expr)
1178+
}
1179+
if p.currentIs(token.COMMA) {
1180+
p.nextToken()
1181+
} else {
1182+
break
1183+
}
1184+
}
1185+
if p.currentIs(token.RPAREN) {
1186+
p.nextToken() // skip )
1187+
}
1188+
ins.Values = append(ins.Values, row)
1189+
// Check for more rows
1190+
if p.currentIs(token.COMMA) {
1191+
p.nextToken()
1192+
} else {
1193+
break
1194+
}
11661195
}
11671196
} else if p.currentIs(token.SELECT) || p.currentIs(token.WITH) {
11681197
ins.Select = p.parseSelectWithUnion()
@@ -2047,6 +2076,7 @@ func (p *Parser) parseDrop() *ast.DropQuery {
20472076
// What are we dropping?
20482077
dropUser := false
20492078
dropFunction := false
2079+
dropDictionary := false
20502080
switch p.current.Token {
20512081
case token.TABLE:
20522082
p.nextToken()
@@ -2071,10 +2101,13 @@ func (p *Parser) parseDrop() *ast.DropQuery {
20712101
p.nextToken()
20722102
}
20732103
default:
2074-
// Handle multi-word DROP types: ROW POLICY, NAMED COLLECTION
2104+
// Handle multi-word DROP types: ROW POLICY, NAMED COLLECTION, DICTIONARY
20752105
if p.currentIs(token.IDENT) {
20762106
upper := strings.ToUpper(p.current.Value)
20772107
switch upper {
2108+
case "DICTIONARY":
2109+
dropDictionary = true
2110+
p.nextToken()
20782111
case "ROW", "NAMED", "POLICY", "QUOTA", "ROLE":
20792112
// Skip the DROP type tokens
20802113
for p.currentIs(token.IDENT) || p.current.Token.IsKeyword() {
@@ -2125,6 +2158,18 @@ func (p *Parser) parseDrop() *ast.DropQuery {
21252158
}
21262159
} else if dropFunction {
21272160
drop.Function = tableName
2161+
} else if dropDictionary {
2162+
drop.Dictionary = tableName
2163+
// Also set Table/Tables for backward compatibility with AST JSON
2164+
drop.Tables = append(drop.Tables, &ast.TableIdentifier{
2165+
Position: drop.Position,
2166+
Database: database,
2167+
Table: tableName,
2168+
})
2169+
drop.Table = tableName
2170+
if database != "" {
2171+
drop.Database = database
2172+
}
21282173
} else if drop.DropDatabase {
21292174
drop.Database = tableName
21302175
} else {
@@ -3112,6 +3157,56 @@ func (p *Parser) parseExchange() *ast.ExchangeQuery {
31123157
return exchange
31133158
}
31143159

3160+
func (p *Parser) parseDetach() *ast.DetachQuery {
3161+
detach := &ast.DetachQuery{
3162+
Position: p.current.Pos,
3163+
}
3164+
3165+
p.nextToken() // skip DETACH
3166+
3167+
// Skip optional TABLE keyword
3168+
if p.currentIs(token.TABLE) {
3169+
p.nextToken()
3170+
}
3171+
3172+
// Parse table name (can be qualified: database.table)
3173+
tableName := p.parseIdentifierName()
3174+
if p.currentIs(token.DOT) {
3175+
p.nextToken()
3176+
detach.Database = tableName
3177+
detach.Table = p.parseIdentifierName()
3178+
} else {
3179+
detach.Table = tableName
3180+
}
3181+
3182+
return detach
3183+
}
3184+
3185+
func (p *Parser) parseAttach() *ast.AttachQuery {
3186+
attach := &ast.AttachQuery{
3187+
Position: p.current.Pos,
3188+
}
3189+
3190+
p.nextToken() // skip ATTACH
3191+
3192+
// Skip optional TABLE keyword
3193+
if p.currentIs(token.TABLE) {
3194+
p.nextToken()
3195+
}
3196+
3197+
// Parse table name (can be qualified: database.table)
3198+
tableName := p.parseIdentifierName()
3199+
if p.currentIs(token.DOT) {
3200+
p.nextToken()
3201+
attach.Database = tableName
3202+
attach.Table = p.parseIdentifierName()
3203+
} else {
3204+
attach.Table = tableName
3205+
}
3206+
3207+
return attach
3208+
}
3209+
31153210
func (p *Parser) parseArrayJoin() *ast.ArrayJoinClause {
31163211
aj := &ast.ArrayJoinClause{
31173212
Position: p.current.Pos,

0 commit comments

Comments
 (0)