diff --git a/Taskfile.yml b/Taskfile.yml index 7a9bf3fd..0bf4fd82 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -79,7 +79,15 @@ tasks: desc: Run tests with race detection (CRITICAL for production) cmds: - echo "Running tests with race detection..." - - go test -race -timeout 60s ./... + - go test -race -timeout 60s $(go list ./... | grep -v /cbinding) + + test:cbinding: + desc: Test C binding package (requires CGO) + env: + CGO_ENABLED: '1' + cmds: + - echo "Running cbinding tests with CGO enabled..." + - go test -race -timeout 60s ./pkg/cbinding/... test:pkg: desc: Run tests for a specific package (use PKG=./pkg/sql/parser) diff --git a/pkg/sql/ast/ast.go b/pkg/sql/ast/ast.go index 64b9fe9d..7816cec0 100644 --- a/pkg/sql/ast/ast.go +++ b/pkg/sql/ast/ast.go @@ -128,6 +128,7 @@ type CommonTableExpr struct { Name string Columns []string Statement Statement + ScalarExpr Expression // ClickHouse: WITH AS (scalar CTE, no subquery) Materialized *bool // nil = default, true = MATERIALIZED, false = NOT MATERIALIZED Pos models.Location // Source position of the CTE name (1-based line and column) } @@ -429,8 +430,9 @@ type SelectStatement struct { From []TableReference TableName string // Added for pool operations Joins []JoinClause - PrewhereClause Expression // ClickHouse PREWHERE clause (applied before WHERE, before reading data) - Sample *SampleClause // ClickHouse SAMPLE clause (comes after FROM/FINAL, before PREWHERE) + ArrayJoin *ArrayJoinClause // ClickHouse ARRAY JOIN / LEFT ARRAY JOIN clause + PrewhereClause Expression // ClickHouse PREWHERE clause (applied before WHERE, before reading data) + Sample *SampleClause // ClickHouse SAMPLE clause (comes after FROM/FINAL, before PREWHERE) Where Expression GroupBy []Expression Having Expression @@ -2294,6 +2296,20 @@ func (c ConnectByClause) Children() []Node { // via TABLESAMPLE, but this implementation targets SAMPLE). // Value is stored as a raw string to preserve the original representation // (e.g., "0.1", "1000", "1/10"). +// ArrayJoinClause represents a ClickHouse ARRAY JOIN or LEFT ARRAY JOIN clause. +// Syntax: [LEFT] ARRAY JOIN expr [AS alias], expr [AS alias], ... +type ArrayJoinClause struct { + Left bool // true for LEFT ARRAY JOIN + Elements []ArrayJoinElement // One or more join elements + Pos models.Location +} + +// ArrayJoinElement is a single expression in an ARRAY JOIN clause with an optional alias. +type ArrayJoinElement struct { + Expr Expression + Alias string +} + type SampleClause struct { // Value is the sampling size/ratio as a raw token string (e.g., "0.1", "1000", "1/10"). Value string diff --git a/pkg/sql/ast/sql.go b/pkg/sql/ast/sql.go index eea64033..9569cfe3 100644 --- a/pkg/sql/ast/sql.go +++ b/pkg/sql/ast/sql.go @@ -562,6 +562,23 @@ func (s *SelectStatement) SQL() string { sb.WriteString(joinSQL(&j)) } + if s.ArrayJoin != nil { + if s.ArrayJoin.Left { + sb.WriteString(" LEFT ARRAY JOIN ") + } else { + sb.WriteString(" ARRAY JOIN ") + } + elems := make([]string, len(s.ArrayJoin.Elements)) + for i, e := range s.ArrayJoin.Elements { + elemStr := exprSQL(e.Expr) + if e.Alias != "" { + elemStr += " AS " + e.Alias + } + elems[i] = elemStr + } + sb.WriteString(strings.Join(elems, ", ")) + } + if s.PrewhereClause != nil { sb.WriteString(" PREWHERE ") sb.WriteString(exprSQL(s.PrewhereClause)) @@ -1406,6 +1423,13 @@ func forSQL(f *ForClause) string { func cteSQL(cte *CommonTableExpr) string { sb := getBuilder() defer putBuilder(sb) + // ClickHouse scalar CTE: WITH AS + if cte.ScalarExpr != nil { + sb.WriteString(exprSQL(cte.ScalarExpr)) + sb.WriteString(" AS ") + sb.WriteString(cte.Name) + return sb.String() + } sb.WriteString(cte.Name) if len(cte.Columns) > 0 { sb.WriteString(" (") @@ -1740,6 +1764,83 @@ func (p *PeriodDefinition) SQL() string { return b.String() } +// SQL returns the SQL string for a PRAGMA statement (SQLite). +func (p *PragmaStatement) SQL() string { + if p == nil { + return "" + } + sb := getBuilder() + defer putBuilder(sb) + sb.WriteString("PRAGMA ") + sb.WriteString(p.Name) + if p.Arg != "" { + sb.WriteString("(") + sb.WriteString(p.Arg) + sb.WriteString(")") + } else if p.Value != "" { + sb.WriteString(" = ") + sb.WriteString(p.Value) + } + return sb.String() +} + +// SQL returns the SQL string for a SHOW statement (MySQL). +func (s *ShowStatement) SQL() string { + if s == nil { + return "" + } + sb := getBuilder() + defer putBuilder(sb) + sb.WriteString("SHOW ") + sb.WriteString(s.ShowType) + if s.ObjectName != "" { + sb.WriteString(" ") + sb.WriteString(s.ObjectName) + } + if s.From != "" { + sb.WriteString(" FROM ") + sb.WriteString(s.From) + } + return sb.String() +} + +// SQL returns the SQL string for a DESCRIBE statement (MySQL). +func (d *DescribeStatement) SQL() string { + if d == nil { + return "" + } + return "DESCRIBE " + d.TableName +} + +// SQL returns the SQL string for a REPLACE statement (MySQL). +func (r *ReplaceStatement) SQL() string { + if r == nil { + return "" + } + sb := getBuilder() + defer putBuilder(sb) + sb.WriteString("REPLACE INTO ") + sb.WriteString(r.TableName) + if len(r.Columns) > 0 { + sb.WriteString(" (") + sb.WriteString(exprListSQL(r.Columns)) + sb.WriteString(")") + } + if len(r.Values) > 0 { + sb.WriteString(" VALUES ") + rows := make([]string, len(r.Values)) + for idx, row := range r.Values { + vals := make([]string, len(row)) + for j, v := range row { + vals[j] = exprSQL(v) + } + rows[idx] = "(" + strings.Join(vals, ", ") + ")" + } + sb.WriteString(strings.Join(rows, ", ")) + } + return sb.String() +} + // ToSQL returns the SQL string for a CONNECT BY clause (MariaDB 10.2+). func (c *ConnectByClause) ToSQL() string { var b strings.Builder diff --git a/pkg/sql/parser/cte.go b/pkg/sql/parser/cte.go index c0de6370..c8fc0b8e 100644 --- a/pkg/sql/parser/cte.go +++ b/pkg/sql/parser/cte.go @@ -23,6 +23,7 @@ import ( goerrors "github.com/ajitpratap0/GoSQLX/pkg/errors" "github.com/ajitpratap0/GoSQLX/pkg/models" "github.com/ajitpratap0/GoSQLX/pkg/sql/ast" + "github.com/ajitpratap0/GoSQLX/pkg/sql/keywords" ) // WITH summary(region, total) AS (SELECT region, SUM(amount) FROM sales GROUP BY region) SELECT * FROM summary @@ -124,6 +125,31 @@ func (p *Parser) parseCommonTableExpr() (*ast.CommonTableExpr, error) { ) } + // ClickHouse scalar CTE: WITH AS , ... + // Detected when the token after WITH is not an identifier, or is an + // identifier not followed by AS/( (which would be a standard CTE). + if p.dialect == string(keywords.DialectClickHouse) && !p.isIdentifier() { + scalarExpr, err := p.parseExpression() + if err != nil { + return nil, err + } + if !p.isType(models.TokenTypeAs) { + return nil, p.expectedError("AS after scalar CTE expression") + } + p.advance() // Consume AS + if !p.isIdentifier() { + return nil, p.expectedError("name after AS in scalar CTE") + } + scalarName := p.currentToken.Token.Value + scalarPos := p.currentLocation() + p.advance() + return &ast.CommonTableExpr{ + Name: scalarName, + ScalarExpr: scalarExpr, + Pos: scalarPos, + }, nil + } + // Parse CTE name (supports double-quoted identifiers) if !p.isIdentifier() { return nil, p.expectedError("CTE name") diff --git a/pkg/sql/parser/ddl.go b/pkg/sql/parser/ddl.go index bf3ffb50..902aa124 100644 --- a/pkg/sql/parser/ddl.go +++ b/pkg/sql/parser/ddl.go @@ -199,6 +199,7 @@ func (p *Parser) parseCreateTable(temporary bool) (*ast.CreateTableStatement, er } // CREATE TABLE ... AS SELECT — no column list, just a query. + // ClickHouse also: CREATE TABLE t AS source_table ENGINE = ... if p.isType(models.TokenTypeAs) { p.advance() // AS if p.isType(models.TokenTypeSelect) || p.isType(models.TokenTypeWith) { @@ -210,6 +211,16 @@ func (p *Parser) parseCreateTable(temporary bool) (*ast.CreateTableStatement, er _ = query // CTAS query not modeled on CreateTableStatement yet return stmt, nil } + // ClickHouse: CREATE TABLE t AS ENGINE = ... + // The identifier is the source table; consume remaining clauses. + if p.dialect == string(keywords.DialectClickHouse) && p.isIdentifier() { + p.advance() // Consume source table name + // Consume ENGINE and trailing clauses + for !p.isType(models.TokenTypeEOF) && !p.isType(models.TokenTypeSemicolon) { + p.advance() + } + return stmt, nil + } return nil, p.expectedError("SELECT after AS") } diff --git a/pkg/sql/parser/ddl_view.go b/pkg/sql/parser/ddl_view.go index da1aba8f..5f153618 100644 --- a/pkg/sql/parser/ddl_view.go +++ b/pkg/sql/parser/ddl_view.go @@ -21,6 +21,7 @@ import ( goerrors "github.com/ajitpratap0/GoSQLX/pkg/errors" "github.com/ajitpratap0/GoSQLX/pkg/models" "github.com/ajitpratap0/GoSQLX/pkg/sql/ast" + "github.com/ajitpratap0/GoSQLX/pkg/sql/keywords" ) // parseCreateView parses CREATE [OR REPLACE] [TEMPORARY] VIEW statement @@ -187,6 +188,26 @@ func (p *Parser) parseCreateMaterializedView() (*ast.CreateMaterializedViewState p.advance() } + // ClickHouse: optional TO before ENGINE/AS + if p.dialect == string(keywords.DialectClickHouse) && p.isType(models.TokenTypeTo) { + p.advance() // Consume TO + toName, toErr := p.parseQualifiedName() + if toErr != nil { + return nil, p.expectedError("target table after TO") + } + stmt.Tablespace = toName // reuse Tablespace for ClickHouse TO + } + + // ClickHouse: optional ENGINE = ... ORDER BY ... before AS SELECT + if p.dialect == string(keywords.DialectClickHouse) { + for p.isTokenMatch("ENGINE") || p.isType(models.TokenTypeOrder) || p.isTokenMatch("PRIMARY") || p.isTokenMatch("PARTITION") || p.isTokenMatch("SETTINGS") { + // Consume all engine clauses token-by-token until AS + for !p.isType(models.TokenTypeAs) && !p.isType(models.TokenTypeEOF) && !p.isType(models.TokenTypeSemicolon) { + p.advance() + } + } + } + // Expect AS if !p.isType(models.TokenTypeAs) { return nil, p.expectedError("AS") diff --git a/pkg/sql/parser/dml_insert.go b/pkg/sql/parser/dml_insert.go index 2290ec47..f92c795d 100644 --- a/pkg/sql/parser/dml_insert.go +++ b/pkg/sql/parser/dml_insert.go @@ -68,9 +68,11 @@ func (p *Parser) parseInsertStatement() (ast.Statement, error) { p.advance() // Consume ) } - // Parse SQL Server OUTPUT clause (between column list and VALUES) + // Parse SQL Server OUTPUT clause (between column list and VALUES). + // Accept OUTPUT regardless of dialect — the keyword is unambiguous here + // and allows dialect-agnostic parsing of T-SQL INSERT statements. var outputCols []ast.Expression - if p.dialect == string(keywords.DialectSQLServer) && strings.ToUpper(p.currentToken.Token.Value) == "OUTPUT" { + if strings.ToUpper(p.currentToken.Token.Value) == "OUTPUT" { p.advance() // Consume OUTPUT var err error outputCols, err = p.parseOutputColumns() diff --git a/pkg/sql/parser/expressions_literal.go b/pkg/sql/parser/expressions_literal.go index 2ef99081..04516108 100644 --- a/pkg/sql/parser/expressions_literal.go +++ b/pkg/sql/parser/expressions_literal.go @@ -128,6 +128,29 @@ func (p *Parser) parsePrimaryExpression() (ast.Expression, error) { return p.parseExtractExpression() } + // Oracle/MariaDB pseudo-columns: ROWNUM, ROWID, LEVEL, SYSDATE, SYSTIMESTAMP. + // These are tokenized as keywords but act as column-like expressions. + // We return them as zero-argument FunctionCall nodes so that implicit + // aliasing works naturally (SELECT ROWNUM rn → AliasedExpression) and + // they don't collide with the bare-Identifier alias guard. + if p.isType(models.TokenTypeKeyword) && p.isOraclePseudoColumn() { + identPos := p.currentLocation() + identName := p.currentToken.Token.Value + p.advance() + // SYSDATE() / SYSTIMESTAMP() — some drivers allow parens + if p.isType(models.TokenTypeLParen) { + funcCall, err := p.parseFunctionCall(identName) + if err != nil { + return nil, err + } + if funcCall.Pos.IsZero() { + funcCall.Pos = identPos + } + return funcCall, nil + } + return &ast.FunctionCall{Name: identName, Pos: identPos}, nil + } + if p.isType(models.TokenTypeIdentifier) || p.isType(models.TokenTypeDoubleQuotedString) || ((p.dialect == string(keywords.DialectSQLServer) || p.dialect == string(keywords.DialectClickHouse)) && p.isNonReservedKeyword()) { // Handle identifiers and function calls // Double-quoted strings are treated as identifiers in SQL (e.g., "column_name") diff --git a/pkg/sql/parser/oracle_test.go b/pkg/sql/parser/oracle_test.go new file mode 100644 index 00000000..f863b6b3 --- /dev/null +++ b/pkg/sql/parser/oracle_test.go @@ -0,0 +1,157 @@ +// Copyright 2026 GoSQLX Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package parser_test + +import ( + "testing" + + "github.com/ajitpratap0/GoSQLX/pkg/gosqlx" + "github.com/ajitpratap0/GoSQLX/pkg/sql/ast" + "github.com/ajitpratap0/GoSQLX/pkg/sql/keywords" +) + +func parseOracle(t *testing.T, sql string) *ast.AST { + t.Helper() + tree, err := gosqlx.ParseWithDialect(sql, keywords.DialectOracle) + if err != nil { + t.Fatalf("ParseWithDialect(Oracle) error: %v\n SQL: %s", err, sql) + } + return tree +} + +// --- ROWNUM pseudo-column --- + +func TestOracle_Rownum_SimpleWhere(t *testing.T) { + tree := parseOracle(t, "SELECT * FROM users WHERE ROWNUM <= 10") + sel, ok := tree.Statements[0].(*ast.SelectStatement) + if !ok { + t.Fatalf("expected SelectStatement, got %T", tree.Statements[0]) + } + if sel.Where == nil { + t.Error("expected WHERE clause") + } +} + +func TestOracle_Rownum_Pagination(t *testing.T) { + sql := `SELECT * FROM ( + SELECT a.*, ROWNUM rnum FROM ( + SELECT * FROM users ORDER BY created_at DESC + ) a WHERE ROWNUM <= 30 + ) WHERE rnum > 20` + parseOracle(t, sql) +} + +func TestOracle_Level_InSelect(t *testing.T) { + parseOracle(t, "SELECT LEVEL, id FROM categories START WITH parent_id IS NULL CONNECT BY PRIOR id = parent_id") +} + +func TestOracle_Sysdate(t *testing.T) { + parseOracle(t, "SELECT SYSDATE FROM DUAL") +} + +// --- CONNECT BY --- + +func TestOracle_ConnectBy_WithIsNull(t *testing.T) { + sql := `SELECT employee_id, manager_id, name + FROM employees + START WITH manager_id IS NULL + CONNECT BY PRIOR employee_id = manager_id` + tree := parseOracle(t, sql) + sel, ok := tree.Statements[0].(*ast.SelectStatement) + if !ok { + t.Fatalf("expected SelectStatement, got %T", tree.Statements[0]) + } + if sel.StartWith == nil { + t.Error("expected StartWith clause") + } + if sel.ConnectBy == nil { + t.Error("expected ConnectBy clause") + } +} + +func TestOracle_ConnectBy_NoCycle(t *testing.T) { + sql := `SELECT id, parent_id FROM categories + START WITH parent_id IS NULL + CONNECT BY NOCYCLE PRIOR id = parent_id` + tree := parseOracle(t, sql) + sel := tree.Statements[0].(*ast.SelectStatement) + if sel.ConnectBy == nil || !sel.ConnectBy.NoCycle { + t.Error("expected NOCYCLE modifier on ConnectBy") + } +} + +func TestOracle_ConnectBy_SysConnectByPath(t *testing.T) { + parseOracle(t, `SELECT SYS_CONNECT_BY_PATH(name, '/') AS path FROM employees START WITH manager_id IS NULL CONNECT BY PRIOR id = manager_id`) +} + +// --- PIVOT / UNPIVOT --- + +func TestOracle_Pivot(t *testing.T) { + sql := `SELECT * FROM ( + SELECT product, region, sales FROM sales_data + ) PIVOT ( + SUM(sales) FOR region IN ('North' AS north, 'South' AS south, 'East' AS east, 'West' AS west) + )` + tree := parseOracle(t, sql) + sel := tree.Statements[0].(*ast.SelectStatement) + if len(sel.From) == 0 { + t.Fatal("expected FROM clause") + } + if sel.From[0].Pivot == nil { + t.Error("expected PIVOT clause on first table reference") + } +} + +func TestOracle_Unpivot(t *testing.T) { + sql := `SELECT product, region, sales FROM regional_sales + UNPIVOT ( + sales FOR region IN (north_sales AS 'North', south_sales AS 'South', east_sales AS 'East', west_sales AS 'West') + )` + tree := parseOracle(t, sql) + sel := tree.Statements[0].(*ast.SelectStatement) + if len(sel.From) == 0 { + t.Fatal("expected FROM clause") + } + if sel.From[0].Unpivot == nil { + t.Error("expected UNPIVOT clause on first table reference") + } +} + +// --- Backslash in string literals --- + +func TestOracle_RegexpBackslash(t *testing.T) { + parseOracle(t, `SELECT name, email FROM users WHERE REGEXP_LIKE(email, '^\w+@[\w.]+\.\w+$')`) +} + +// --- Standard Oracle features --- + +func TestOracle_Merge(t *testing.T) { + sql := `MERGE INTO target t USING source s ON t.id = s.id + WHEN MATCHED THEN UPDATE SET t.name = s.name + WHEN NOT MATCHED THEN INSERT (id, name) VALUES (s.id, s.name)` + parseOracle(t, sql) +} + +func TestOracle_OffsetFetch(t *testing.T) { + parseOracle(t, "SELECT * FROM users ORDER BY id OFFSET 10 ROWS FETCH NEXT 20 ROWS ONLY") +} + +func TestOracle_NVL(t *testing.T) { + parseOracle(t, "SELECT NVL(name, 'Unknown') FROM users") +} + +func TestOracle_Decode(t *testing.T) { + parseOracle(t, "SELECT DECODE(status, 'A', 'Active', 'I', 'Inactive', 'Unknown') FROM users") +} diff --git a/pkg/sql/parser/parser.go b/pkg/sql/parser/parser.go index 4c065e71..1a2ed7dd 100644 --- a/pkg/sql/parser/parser.go +++ b/pkg/sql/parser/parser.go @@ -1095,12 +1095,41 @@ func (p *Parser) isNonReservedKeyword() bool { return false } +// isOraclePseudoColumn returns true when the current keyword token is an +// Oracle/MariaDB pseudo-column that should be parsed as an expression. +func (p *Parser) isOraclePseudoColumn() bool { + return p.isOraclePseudoColumn2(p.currentToken.Token.Value) +} + +// isOraclePseudoColumn2 checks if a given name is an Oracle pseudo-column. +func (p *Parser) isOraclePseudoColumn2(name string) bool { + switch strings.ToUpper(name) { + case "ROWNUM", "ROWID", "LEVEL", "SYSDATE", "SYSTIMESTAMP": + return true + } + return false +} + +// isWindowClauseKeyword returns true if the current token is the WINDOW +// keyword that starts a named window clause (SQL:2003). Must not be consumed +// as a table alias. +func (p *Parser) isWindowClauseKeyword() bool { + return p.isIdentifier() && strings.EqualFold(p.currentToken.Token.Value, "WINDOW") +} + +// isSettingsKeyword returns true if the current token is the ClickHouse +// SETTINGS keyword. Must not be consumed as a table alias. +func (p *Parser) isSettingsKeyword() bool { + return p.dialect == string(keywords.DialectClickHouse) && + p.isTokenMatch("SETTINGS") +} + // canBeAlias checks if current token can be used as an alias. // Aliases can be IDENT, double-quoted identifiers, or certain non-reserved keywords, // but NOT contextual clause keywords that would be consumed as aliases by mistake -// (e.g. MINUS in Snowflake/Oracle, QUALIFY in Snowflake/BigQuery). +// (e.g. MINUS in Snowflake/Oracle, QUALIFY in Snowflake/BigQuery, WINDOW). func (p *Parser) canBeAlias() bool { - if p.isMinusSetOp() || p.isQualifyKeyword() { + if p.isMinusSetOp() || p.isQualifyKeyword() || p.isWindowClauseKeyword() || p.isSettingsKeyword() { return false } return p.isIdentifier() || p.isNonReservedKeyword() @@ -1116,6 +1145,13 @@ func (p *Parser) parseAlterTableStmt() (ast.Statement, error) { // isJoinKeyword checks if current token is a JOIN-related keyword func (p *Parser) isJoinKeyword() bool { + // ClickHouse: LEFT ARRAY JOIN is not a regular JOIN — exclude it so + // the ARRAY JOIN parser handles it separately. + if p.dialect == string(keywords.DialectClickHouse) && + p.isType(models.TokenTypeLeft) && + p.peekToken().Token.Type == models.TokenTypeArray { + return false + } if p.isAnyType( models.TokenTypeJoin, models.TokenTypeInner, models.TokenTypeLeft, models.TokenTypeRight, models.TokenTypeFull, models.TokenTypeCross, diff --git a/pkg/sql/parser/pivot.go b/pkg/sql/parser/pivot.go index 71f1e339..8d62bde1 100644 --- a/pkg/sql/parser/pivot.go +++ b/pkg/sql/parser/pivot.go @@ -154,14 +154,24 @@ func (p *Parser) parsePivotClause() (*ast.PivotClause, error) { } p.advance() // consume ( - // Parse IN values — identifiers (possibly bracket-quoted in SQL Server) + // Parse IN values — identifiers, numbers, or string literals, each with + // an optional AS alias (Oracle syntax: 'North' AS north). var inValues []string for !p.isType(models.TokenTypeRParen) && !p.isType(models.TokenTypeEOF) { if !p.isIdentifier() && !p.isType(models.TokenTypeNumber) && !p.isStringLiteral() { return nil, p.expectedError("value in PIVOT IN list") } - inValues = append(inValues, renderQuotedIdent(p.currentToken.Token)) + val := renderQuotedIdent(p.currentToken.Token) p.advance() + // Optional alias: AS + if p.isType(models.TokenTypeAs) { + p.advance() // consume AS + if p.isIdentifier() || p.isNonReservedKeyword() { + val += " AS " + p.currentToken.Token.Value + p.advance() + } + } + inValues = append(inValues, val) if p.isType(models.TokenTypeComma) { p.advance() } @@ -231,14 +241,24 @@ func (p *Parser) parseUnpivotClause() (*ast.UnpivotClause, error) { } p.advance() // consume ( - // Parse IN columns + // Parse IN columns — each may have an optional AS alias (Oracle: + // north_sales AS 'North'). var cols []string for !p.isType(models.TokenTypeRParen) && !p.isType(models.TokenTypeEOF) { if !p.isIdentifier() { return nil, p.expectedError("column name in UNPIVOT IN list") } - cols = append(cols, renderQuotedIdent(p.currentToken.Token)) + col := renderQuotedIdent(p.currentToken.Token) p.advance() + // Optional alias: AS + if p.isType(models.TokenTypeAs) { + p.advance() // consume AS + if p.isStringLiteral() || p.isIdentifier() || p.isNonReservedKeyword() { + col += " AS " + renderQuotedIdent(p.currentToken.Token) + p.advance() + } + } + cols = append(cols, col) if p.isType(models.TokenTypeComma) { p.advance() } @@ -391,5 +411,12 @@ func (p *Parser) isSampleKeyword() bool { return true } nextUpper := strings.ToUpper(next.Value) - return nextUpper == "BERNOULLI" || nextUpper == "SYSTEM" || nextUpper == "BLOCK" || nextUpper == "ROW" + if nextUpper == "BERNOULLI" || nextUpper == "SYSTEM" || nextUpper == "BLOCK" || nextUpper == "ROW" { + return true + } + // ClickHouse: SAMPLE followed by a number (SAMPLE 0.1, SAMPLE 10000, SAMPLE 1/10) + if p.dialect == string(keywords.DialectClickHouse) && next.Type == models.TokenTypeNumber { + return true + } + return false } diff --git a/pkg/sql/parser/select.go b/pkg/sql/parser/select.go index 0f9ec65b..c9a64b74 100644 --- a/pkg/sql/parser/select.go +++ b/pkg/sql/parser/select.go @@ -87,6 +87,13 @@ func (p *Parser) parseSelectStatement() (ast.Statement, error) { TableName: tableName, } + // ClickHouse ARRAY JOIN / LEFT ARRAY JOIN + if p.dialect == string(keywords.DialectClickHouse) { + if selectStmt.ArrayJoin, err = p.parseArrayJoinClause(); err != nil { + return nil, err + } + } + // SAMPLE (ClickHouse-specific, specifies sampling rate/size; comes after FROM/FINAL) if p.dialect == string(keywords.DialectClickHouse) && p.isTokenMatch("SAMPLE") { if selectStmt.Sample, err = p.parseSampleClause(); err != nil { @@ -169,6 +176,33 @@ func (p *Parser) parseSelectStatement() (ast.Statement, error) { } } + // SQL:2003 WINDOW clause: WINDOW w AS (spec), w2 AS (spec2), ... + // Named window definitions that can be referenced by OVER w. + if strings.EqualFold(p.currentToken.Token.Value, "WINDOW") { + p.advance() // Consume WINDOW + for { + if !p.isIdentifier() { + return nil, p.expectedError("window name after WINDOW") + } + winName := p.currentToken.Token.Value + p.advance() + if !p.isType(models.TokenTypeAs) { + return nil, p.expectedError("AS after window name") + } + p.advance() // Consume AS + winSpec, winErr := p.parseWindowSpec() + if winErr != nil { + return nil, winErr + } + winSpec.Name = winName + selectStmt.Windows = append(selectStmt.Windows, *winSpec) + if !p.isType(models.TokenTypeComma) { + break + } + p.advance() // Consume comma + } + } + // ORDER BY if selectStmt.OrderBy, err = p.parseOrderByClause(); err != nil { return nil, err @@ -339,7 +373,13 @@ func (p *Parser) parseSelectColumnList() ([]ast.Expression, error) { p.advance() expr = &ast.AliasedExpression{Expr: expr, Alias: alias} } else if p.canBeAlias() { - if _, ok := expr.(*ast.Identifier); !ok { + // Implicit aliasing (SELECT expr alias) is allowed for non-identifier + // expressions (functions, literals, casts, etc.) and for bare identifiers + // that are known pseudo-columns (ROWNUM, SYSDATE, LEVEL, etc.) where + // the alias pattern is idiomatic: SELECT ROWNUM rn FROM ... + ident, isIdent := expr.(*ast.Identifier) + allowAlias := !isIdent || (isIdent && ident.Table == "" && p.isOraclePseudoColumn2(ident.Name)) + if allowAlias { alias := p.currentToken.Token.Value p.advance() expr = &ast.AliasedExpression{Expr: expr, Alias: alias} diff --git a/pkg/sql/parser/select_clauses.go b/pkg/sql/parser/select_clauses.go index 444cb3c0..80f2f72e 100644 --- a/pkg/sql/parser/select_clauses.go +++ b/pkg/sql/parser/select_clauses.go @@ -626,8 +626,8 @@ func (p *Parser) parseLimitOffsetClause() (limit *int, offset *int, err error) { _, _ = fmt.Sscanf(p.currentToken.Token.Value, "%d", &firstVal) p.advance() - // MySQL: LIMIT offset, count - if p.dialect == "mysql" && p.isType(models.TokenTypeComma) { + // MySQL / ClickHouse: LIMIT offset, count + if (p.dialect == "mysql" || p.dialect == string(keywords.DialectMariaDB) || p.dialect == string(keywords.DialectClickHouse)) && p.isType(models.TokenTypeComma) { p.advance() if !p.isNumericLiteral() { return nil, nil, p.expectedError("integer for LIMIT count") @@ -815,3 +815,62 @@ func (p *Parser) parseForClause() (*ast.ForClause, error) { return forClause, nil } + +// parseArrayJoinClause parses ClickHouse [LEFT] ARRAY JOIN expr [AS alias], ... +// Returns nil if the current token is not ARRAY (or LEFT ARRAY). +func (p *Parser) parseArrayJoinClause() (*ast.ArrayJoinClause, error) { + isLeft := false + + // Detect LEFT ARRAY JOIN + if p.isType(models.TokenTypeLeft) && p.peekToken().Token.Type == models.TokenTypeArray { + isLeft = true + } + + if !p.isType(models.TokenTypeArray) && !isLeft { + return nil, nil + } + + pos := p.currentLocation() + if isLeft { + p.advance() // Consume LEFT + } + + // Must be ARRAY + if !p.isType(models.TokenTypeArray) { + return nil, nil + } + p.advance() // Consume ARRAY + + // Must be JOIN + if !p.isType(models.TokenTypeJoin) { + return nil, p.expectedError("JOIN after ARRAY") + } + p.advance() // Consume JOIN + + clause := &ast.ArrayJoinClause{Left: isLeft, Pos: pos} + for { + expr, err := p.parseExpression() + if err != nil { + return nil, err + } + elem := ast.ArrayJoinElement{Expr: expr} + // Optional alias: AS name or bare name + if p.isType(models.TokenTypeAs) { + p.advance() // Consume AS + if !p.isIdentifier() { + return nil, p.expectedError("alias after AS in ARRAY JOIN") + } + elem.Alias = p.currentToken.Token.Value + p.advance() + } else if p.canBeAlias() { + elem.Alias = p.currentToken.Token.Value + p.advance() + } + clause.Elements = append(clause.Elements, elem) + if !p.isType(models.TokenTypeComma) { + break + } + p.advance() // Consume comma + } + return clause, nil +} diff --git a/pkg/sql/parser/select_subquery.go b/pkg/sql/parser/select_subquery.go index 76fb0171..e5c94dfd 100644 --- a/pkg/sql/parser/select_subquery.go +++ b/pkg/sql/parser/select_subquery.go @@ -131,8 +131,11 @@ func (p *Parser) parseFromTableReference() (ast.TableReference, error) { // TABLESAMPLE [method] (N [ROWS]) // Consume permissively — the method and paren block are consumed // but not yet modeled on the AST. - if strings.EqualFold(p.currentToken.Token.Value, "SAMPLE") || - strings.EqualFold(p.currentToken.Token.Value, "TABLESAMPLE") { + // Skip for ClickHouse when SAMPLE is followed by a number — it uses + // a SELECT-level SAMPLE clause (not a per-table clause). + if (strings.EqualFold(p.currentToken.Token.Value, "SAMPLE") || + strings.EqualFold(p.currentToken.Token.Value, "TABLESAMPLE")) && + !(p.dialect == string(keywords.DialectClickHouse) && p.peekToken().Token.Type == models.TokenTypeNumber) { p.advance() // SAMPLE / TABLESAMPLE // Optional method name upper := strings.ToUpper(p.currentToken.Token.Value) @@ -179,7 +182,7 @@ func (p *Parser) parseFromTableReference() (ast.TableReference, error) { // Similarly, START followed by WITH is a hierarchical query seed, not an alias. // Don't consume PIVOT/UNPIVOT as a table alias — they are contextual // keywords in SQL Server/Oracle and must reach the pivot-clause parser below. - if (p.isIdentifier() || p.isType(models.TokenTypeAs)) && !p.isMariaDBClauseStart() && !p.isPivotKeyword() && !p.isUnpivotKeyword() && !p.isQualifyKeyword() && !p.isMinusSetOp() && !p.isSnowflakeTimeTravelStart() && !p.isSampleKeyword() && !p.isMatchRecognizeKeyword() { + if (p.isIdentifier() || p.isType(models.TokenTypeAs)) && !p.isMariaDBClauseStart() && !p.isPivotKeyword() && !p.isUnpivotKeyword() && !p.isQualifyKeyword() && !p.isMinusSetOp() && !p.isSnowflakeTimeTravelStart() && !p.isSampleKeyword() && !p.isMatchRecognizeKeyword() && !p.isWindowClauseKeyword() && !p.isSettingsKeyword() { if p.isType(models.TokenTypeAs) { p.advance() // Consume AS if !p.isIdentifier() { @@ -310,7 +313,7 @@ func (p *Parser) parseJoinedTableRef(joinType string) (ast.TableReference, error // Similarly, START followed by WITH is a hierarchical query seed, not an alias. // Don't consume PIVOT/UNPIVOT as a table alias — they are contextual // keywords in SQL Server/Oracle and must reach the pivot-clause parser below. - if (p.isIdentifier() || p.isType(models.TokenTypeAs)) && !p.isMariaDBClauseStart() && !p.isPivotKeyword() && !p.isUnpivotKeyword() && !p.isQualifyKeyword() && !p.isMinusSetOp() && !p.isSnowflakeTimeTravelStart() && !p.isSampleKeyword() && !p.isMatchRecognizeKeyword() { + if (p.isIdentifier() || p.isType(models.TokenTypeAs)) && !p.isMariaDBClauseStart() && !p.isPivotKeyword() && !p.isUnpivotKeyword() && !p.isQualifyKeyword() && !p.isMinusSetOp() && !p.isSnowflakeTimeTravelStart() && !p.isSampleKeyword() && !p.isMatchRecognizeKeyword() && !p.isWindowClauseKeyword() && !p.isSettingsKeyword() { if p.isType(models.TokenTypeAs) { p.advance() if !p.isIdentifier() { diff --git a/pkg/sql/tokenizer/tokenizer.go b/pkg/sql/tokenizer/tokenizer.go index f823cda9..f7a74e5a 100644 --- a/pkg/sql/tokenizer/tokenizer.go +++ b/pkg/sql/tokenizer/tokenizer.go @@ -1041,14 +1041,25 @@ func (t *Tokenizer) readQuotedString(quote rune) (models.Token, error) { } if r == '\\' { - // Handle escape sequences - if err := t.handleEscapeSequence(&buf); err != nil { - return models.Token{}, errors.InvalidSyntaxError( - fmt.Sprintf("invalid escape sequence: %v", err), - models.Location{Line: t.pos.Line, Column: t.pos.Column}, - string(t.input), - ) + // Dialects that don't use C-style backslash escapes (Oracle, + // SQL Server, SQLite, Snowflake, ClickHouse) treat '\' as a + // literal character inside string literals. Only MySQL and + // PostgreSQL (and their derivatives) use backslash escaping. + if t.dialect == keywords.DialectMySQL || t.dialect == keywords.DialectMariaDB || + t.dialect == keywords.DialectPostgreSQL || t.dialect == "" { + if err := t.handleEscapeSequence(&buf); err != nil { + return models.Token{}, errors.InvalidSyntaxError( + fmt.Sprintf("invalid escape sequence: %v", err), + models.Location{Line: t.pos.Line, Column: t.pos.Column}, + string(t.input), + ) + } + continue } + // For other dialects, treat backslash as a literal character. + buf.WriteRune(r) + t.pos.Index += size + t.pos.Column++ continue }