Skip to content

Commit b156a86

Browse files
committed
Fix 53 parser issues: keyword types, implicit aliases, empty tuples, CAST syntax
- Fix parseColumnDeclaration to accept keywords as column names (KEY, etc.) - Fix parseDataType to accept keyword type names (Array, Tuple, Nested, etc.) - Add more data types: INT, JSON, VARIANT, DYNAMIC, aggregation functions, etc. - Support empty tuple parsing: SELECT () - Support CAST(x, 'Type') comma syntax in addition to CAST(x AS Type) - Support WITH TIES clause after LIMIT - Add implicit alias support for columns (SELECT 'a' c0 means SELECT 'a' AS c0) Tests: 5405 passing, 1418 skipped (was 5352 passing, 1471 skipped)
1 parent d84f12f commit b156a86

2 files changed

Lines changed: 85 additions & 12 deletions

File tree

parser/expression.go

Lines changed: 66 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -80,16 +80,56 @@ func (p *Parser) parseExpressionList() []ast.Expression {
8080
return exprs
8181
}
8282

83-
exprs = append(exprs, p.parseExpression(LOWEST))
83+
expr := p.parseExpression(LOWEST)
84+
if expr != nil {
85+
// Handle implicit alias (identifier without AS)
86+
expr = p.parseImplicitAlias(expr)
87+
exprs = append(exprs, expr)
88+
}
8489

8590
for p.currentIs(token.COMMA) {
8691
p.nextToken()
87-
exprs = append(exprs, p.parseExpression(LOWEST))
92+
expr := p.parseExpression(LOWEST)
93+
if expr != nil {
94+
// Handle implicit alias (identifier without AS)
95+
expr = p.parseImplicitAlias(expr)
96+
exprs = append(exprs, expr)
97+
}
8898
}
8999

90100
return exprs
91101
}
92102

103+
// parseImplicitAlias handles implicit column aliases like "SELECT 'a' c0" (meaning 'a' AS c0)
104+
func (p *Parser) parseImplicitAlias(expr ast.Expression) ast.Expression {
105+
// If next token is a plain identifier (not a keyword), treat as implicit alias
106+
// Keywords like FROM, WHERE etc. are tokenized as their own token types, not IDENT
107+
if p.currentIs(token.IDENT) {
108+
alias := p.current.Value
109+
p.nextToken()
110+
111+
// Set alias on the expression if it supports it
112+
switch e := expr.(type) {
113+
case *ast.Identifier:
114+
e.Alias = alias
115+
return e
116+
case *ast.FunctionCall:
117+
e.Alias = alias
118+
return e
119+
case *ast.Subquery:
120+
e.Alias = alias
121+
return e
122+
default:
123+
return &ast.AliasedExpr{
124+
Position: expr.Pos(),
125+
Expr: expr,
126+
Alias: alias,
127+
}
128+
}
129+
}
130+
return expr
131+
}
132+
93133
func (p *Parser) parseExpression(precedence int) ast.Expression {
94134
left := p.parsePrefixExpression()
95135
if left == nil {
@@ -543,6 +583,16 @@ func (p *Parser) parseGroupedOrTuple() ast.Expression {
543583
pos := p.current.Pos
544584
p.nextToken() // skip (
545585

586+
// Handle empty tuple ()
587+
if p.currentIs(token.RPAREN) {
588+
p.nextToken()
589+
return &ast.Literal{
590+
Position: pos,
591+
Type: ast.LiteralTuple,
592+
Value: []ast.Expression{},
593+
}
594+
}
595+
546596
// Check for subquery
547597
if p.currentIs(token.SELECT) || p.currentIs(token.WITH) {
548598
subquery := p.parseSelectWithUnion()
@@ -661,12 +711,22 @@ func (p *Parser) parseCast() ast.Expression {
661711
// Use ALIAS_PREC to avoid consuming AS as an alias operator
662712
expr.Expr = p.parseExpression(ALIAS_PREC)
663713

664-
if !p.expect(token.AS) {
665-
return nil
714+
// Handle both CAST(x AS Type) and CAST(x, 'Type') syntax
715+
if p.currentIs(token.AS) {
716+
p.nextToken()
717+
expr.Type = p.parseDataType()
718+
} else if p.currentIs(token.COMMA) {
719+
p.nextToken()
720+
// Type is given as a string literal
721+
if p.currentIs(token.STRING) {
722+
expr.Type = &ast.DataType{
723+
Position: p.current.Pos,
724+
Name: p.current.Value,
725+
}
726+
p.nextToken()
727+
}
666728
}
667729

668-
expr.Type = p.parseDataType()
669-
670730
p.expect(token.RPAREN)
671731

672732
return expr

parser/parser.go

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,12 @@ func (p *Parser) parseSelect() *ast.SelectQuery {
291291
sel.Offset = sel.Limit
292292
sel.Limit = p.parseExpression(LOWEST)
293293
}
294+
295+
// WITH TIES modifier
296+
if p.currentIs(token.WITH) && p.peekIs(token.TIES) {
297+
p.nextToken() // skip WITH
298+
p.nextToken() // skip TIES
299+
}
294300
}
295301

296302
// Parse OFFSET clause
@@ -1136,8 +1142,8 @@ func (p *Parser) parseColumnDeclaration() *ast.ColumnDeclaration {
11361142
Position: p.current.Pos,
11371143
}
11381144

1139-
// Parse column name
1140-
if p.currentIs(token.IDENT) {
1145+
// Parse column name (can be identifier or keyword like KEY)
1146+
if p.currentIs(token.IDENT) || p.current.Token.IsKeyword() {
11411147
col.Name = p.current.Value
11421148
p.nextToken()
11431149
} else {
@@ -1188,7 +1194,8 @@ func (p *Parser) parseColumnDeclaration() *ast.ColumnDeclaration {
11881194
}
11891195

11901196
func (p *Parser) parseDataType() *ast.DataType {
1191-
if !p.currentIs(token.IDENT) {
1197+
// Type names can be identifiers or keywords (Array, Nested, Key, etc.)
1198+
if !p.currentIs(token.IDENT) && !p.current.Token.IsKeyword() {
11921199
return nil
11931200
}
11941201

@@ -1203,7 +1210,8 @@ func (p *Parser) parseDataType() *ast.DataType {
12031210
p.nextToken()
12041211
for !p.currentIs(token.RPAREN) && !p.currentIs(token.EOF) {
12051212
// Could be another data type or an expression
1206-
if p.currentIs(token.IDENT) && p.isDataTypeName(p.current.Value) {
1213+
// Type names can be identifiers or keywords (Array, Nested, etc.)
1214+
if (p.currentIs(token.IDENT) || p.current.Token.IsKeyword()) && p.isDataTypeName(p.current.Value) {
12071215
dt.Parameters = append(dt.Parameters, p.parseDataType())
12081216
} else {
12091217
dt.Parameters = append(dt.Parameters, p.parseExpression(LOWEST))
@@ -1223,9 +1231,9 @@ func (p *Parser) parseDataType() *ast.DataType {
12231231
func (p *Parser) isDataTypeName(name string) bool {
12241232
upper := strings.ToUpper(name)
12251233
types := []string{
1226-
"INT8", "INT16", "INT32", "INT64", "INT128", "INT256",
1234+
"INT", "INT8", "INT16", "INT32", "INT64", "INT128", "INT256",
12271235
"UINT8", "UINT16", "UINT32", "UINT64", "UINT128", "UINT256",
1228-
"FLOAT32", "FLOAT64",
1236+
"FLOAT32", "FLOAT64", "FLOAT",
12291237
"DECIMAL", "DECIMAL32", "DECIMAL64", "DECIMAL128", "DECIMAL256",
12301238
"STRING", "FIXEDSTRING",
12311239
"UUID", "DATE", "DATE32", "DATETIME", "DATETIME64",
@@ -1235,6 +1243,11 @@ func (p *Parser) isDataTypeName(name string) bool {
12351243
"BOOL", "BOOLEAN",
12361244
"IPV4", "IPV6",
12371245
"NOTHING", "INTERVAL",
1246+
"JSON", "OBJECT", "VARIANT",
1247+
"AGGREGATEFUNCTION", "SIMPLEAGGREGATEFUNCTION",
1248+
"POINT", "RING", "POLYGON", "MULTIPOLYGON",
1249+
"TIME64", "TIME",
1250+
"DYNAMIC",
12381251
}
12391252
for _, t := range types {
12401253
if upper == t {

0 commit comments

Comments
 (0)