Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions pkg/sql/ast/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,8 @@ type TableReference struct {
// Unpivot is the SQL Server / Oracle UNPIVOT clause for column-to-row transformation.
// Example: SELECT * FROM t UNPIVOT (sales FOR region IN (north_sales, south_sales)) AS unpvt
Unpivot *UnpivotClause
// MatchRecognize is the SQL:2016 row-pattern recognition clause (Snowflake, Oracle).
MatchRecognize *MatchRecognizeClause
}

func (t *TableReference) statementNode() {}
Expand Down Expand Up @@ -275,6 +277,9 @@ func (t TableReference) Children() []Node {
if t.Unpivot != nil {
nodes = append(nodes, t.Unpivot)
}
if t.MatchRecognize != nil {
nodes = append(nodes, t.MatchRecognize)
}
return nodes
}

Expand Down Expand Up @@ -2147,6 +2152,62 @@ type PeriodDefinition struct {
Pos models.Location // Source position of the PERIOD FOR keyword (1-based line and column)
}

// MatchRecognizeClause represents the SQL:2016 MATCH_RECOGNIZE clause for
// row-pattern recognition in a FROM clause (Snowflake, Oracle, Databricks).
//
// MATCH_RECOGNIZE (
// PARTITION BY symbol
// ORDER BY ts
// MEASURES MATCH_NUMBER() AS m
// ALL ROWS PER MATCH
// PATTERN (UP+ DOWN+)
// DEFINE UP AS price > PREV(price), DOWN AS price < PREV(price)
// )
type MatchRecognizeClause struct {
PartitionBy []Expression
OrderBy []OrderByExpression
Measures []MeasureDef
RowsPerMatch string // "ONE ROW PER MATCH" or "ALL ROWS PER MATCH" (empty = default)
AfterMatch string // raw text: "SKIP TO NEXT ROW", "SKIP PAST LAST ROW", etc.
Pattern string // raw pattern text: "UP+ DOWN+"
Definitions []PatternDef
Pos models.Location
}

// MeasureDef is one MEASURES entry: expr AS alias.
type MeasureDef struct {
Expr Expression
Alias string
}

// PatternDef is one DEFINE entry: variable_name AS boolean_condition.
type PatternDef struct {
Name string
Condition Expression
}

func (m *MatchRecognizeClause) expressionNode() {}
func (m MatchRecognizeClause) TokenLiteral() string { return "MATCH_RECOGNIZE" }
func (m MatchRecognizeClause) Children() []Node {
var nodes []Node
nodes = append(nodes, nodifyExpressions(m.PartitionBy)...)
for _, ob := range m.OrderBy {
ob := ob
nodes = append(nodes, &ob)
}
for _, md := range m.Measures {
if md.Expr != nil {
nodes = append(nodes, md.Expr)
}
}
for _, pd := range m.Definitions {
if pd.Condition != nil {
nodes = append(nodes, pd.Condition)
}
}
return nodes
}

// expressionNode satisfies the Expression interface so PeriodDefinition can be
// stored in CreateTableStatement.PeriodDefinitions without a separate interface type.
// Semantically it is a table column constraint, not a scalar expression.
Expand Down
5 changes: 3 additions & 2 deletions pkg/sql/parser/expressions_literal.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,9 @@ func (p *Parser) parsePrimaryExpression() (ast.Expression, error) {
return funcCall, nil
}

// Handle keywords that can be used as function names in MySQL (IF, REPLACE, etc.)
if (p.isType(models.TokenTypeIf) || p.isType(models.TokenTypeReplace)) && p.peekToken().Token.Type == models.TokenTypeLParen {
// Handle keywords that can be used as function names (IF, REPLACE, FIRST, LAST, etc.)
if (p.isType(models.TokenTypeIf) || p.isType(models.TokenTypeReplace) ||
p.isType(models.TokenTypeFirst) || p.isType(models.TokenTypeLast)) && p.peekToken().Token.Type == models.TokenTypeLParen {
kwPos := p.currentLocation()
identName := p.currentToken.Token.Value
p.advance()
Expand Down
228 changes: 228 additions & 0 deletions pkg/sql/parser/match_recognize.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
// Copyright 2026 GoSQLX Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0

// Package parser - match_recognize.go
// SQL:2016 MATCH_RECOGNIZE clause for row-pattern recognition (Snowflake, Oracle).

package parser

import (
"strings"

"github.com/ajitpratap0/GoSQLX/pkg/models"
"github.com/ajitpratap0/GoSQLX/pkg/sql/ast"
"github.com/ajitpratap0/GoSQLX/pkg/sql/keywords"
)

// isMatchRecognizeKeyword returns true if the current token is the contextual
// MATCH_RECOGNIZE keyword in a dialect that supports it.
func (p *Parser) isMatchRecognizeKeyword() bool {
if p.dialect != string(keywords.DialectSnowflake) &&
p.dialect != string(keywords.DialectOracle) {
return false
}
return strings.EqualFold(p.currentToken.Token.Value, "MATCH_RECOGNIZE")
}

// parseMatchRecognize parses the MATCH_RECOGNIZE clause. The current token
// must be MATCH_RECOGNIZE.
//
// Grammar:
//
// MATCH_RECOGNIZE (
// [PARTITION BY expr, ...]
// [ORDER BY expr [ASC|DESC], ...]
// [MEASURES measure_expr AS alias, ...]
// [ONE ROW PER MATCH | ALL ROWS PER MATCH]
// [AFTER MATCH SKIP ...]
// PATTERN ( pattern_regex )
// DEFINE var AS condition, ...
// )
func (p *Parser) parseMatchRecognize() (*ast.MatchRecognizeClause, error) {
pos := p.currentLocation()
p.advance() // Consume MATCH_RECOGNIZE

if !p.isType(models.TokenTypeLParen) {
return nil, p.expectedError("( after MATCH_RECOGNIZE")
}
p.advance() // Consume (

clause := &ast.MatchRecognizeClause{Pos: pos}

// Parse sub-clauses in order. Each is optional except PATTERN and DEFINE.
// PARTITION BY
if p.isType(models.TokenTypePartition) {
p.advance() // PARTITION
if p.isType(models.TokenTypeBy) {
p.advance() // BY
}
for {
expr, err := p.parseExpression()
if err != nil {
return nil, err
}
clause.PartitionBy = append(clause.PartitionBy, expr)
if !p.isType(models.TokenTypeComma) {
break
}
p.advance()
}
}

// ORDER BY
if p.isType(models.TokenTypeOrder) {
p.advance() // ORDER
if p.isType(models.TokenTypeBy) {
p.advance() // BY
}
for {
expr, err := p.parseExpression()
if err != nil {
return nil, err
}
entry := ast.OrderByExpression{Expression: expr, Ascending: true}
if p.isType(models.TokenTypeAsc) {
p.advance()
} else if p.isType(models.TokenTypeDesc) {
entry.Ascending = false
p.advance()
}
clause.OrderBy = append(clause.OrderBy, entry)
if !p.isType(models.TokenTypeComma) {
break
}
p.advance()
}
}

// MEASURES
if strings.EqualFold(p.currentToken.Token.Value, "MEASURES") {
p.advance() // MEASURES
for {
expr, err := p.parseExpression()
if err != nil {
return nil, err
}
alias := ""
if p.isType(models.TokenTypeAs) {
p.advance() // AS
alias = p.currentToken.Token.Value
p.advance() // alias name
}
clause.Measures = append(clause.Measures, ast.MeasureDef{
Expr: expr,
Alias: alias,
})
if !p.isType(models.TokenTypeComma) {
break
}
p.advance()
}
}

// ONE ROW PER MATCH / ALL ROWS PER MATCH
if strings.EqualFold(p.currentToken.Token.Value, "ONE") {
clause.RowsPerMatch = "ONE ROW PER MATCH"
p.advance() // ONE
p.advance() // ROW
p.advance() // PER
p.advance() // MATCH
} else if p.isType(models.TokenTypeAll) {
clause.RowsPerMatch = "ALL ROWS PER MATCH"
p.advance() // ALL
p.advance() // ROWS
p.advance() // PER
p.advance() // MATCH
}

// AFTER MATCH SKIP ... — consume as raw text until PATTERN or DEFINE
if strings.EqualFold(p.currentToken.Token.Value, "AFTER") {
var parts []string
for {
val := strings.ToUpper(p.currentToken.Token.Value)
if val == "PATTERN" || val == "DEFINE" {
break
}
if p.isType(models.TokenTypeEOF) || p.isType(models.TokenTypeRParen) {
break
}
parts = append(parts, p.currentToken.Token.Value)
p.advance()
}
clause.AfterMatch = strings.Join(parts, " ")
}

// PATTERN ( regex )
if strings.EqualFold(p.currentToken.Token.Value, "PATTERN") {
p.advance() // PATTERN
if !p.isType(models.TokenTypeLParen) {
return nil, p.expectedError("( after PATTERN")
}
p.advance() // Consume (

// Collect pattern tokens as raw text until the matching ')'
var patParts []string
depth := 1
for depth > 0 {
if p.isType(models.TokenTypeEOF) {
return nil, p.expectedError(") to close PATTERN")
}
if p.isType(models.TokenTypeLParen) {
depth++
patParts = append(patParts, "(")
} else if p.isType(models.TokenTypeRParen) {
depth--
if depth > 0 {
patParts = append(patParts, ")")
}
} else {
patParts = append(patParts, p.currentToken.Token.Value)
}
p.advance()
}
clause.Pattern = strings.Join(patParts, " ")
}

// DEFINE var AS condition, ...
if strings.EqualFold(p.currentToken.Token.Value, "DEFINE") {
p.advance() // DEFINE
for {
if p.isType(models.TokenTypeRParen) || p.isType(models.TokenTypeEOF) {
break
}
name := p.currentToken.Token.Value
p.advance() // variable name

if !p.isType(models.TokenTypeAs) {
return nil, p.expectedError("AS after pattern variable " + name)
}
p.advance() // AS

cond, err := p.parseExpression()
if err != nil {
return nil, err
}
clause.Definitions = append(clause.Definitions, ast.PatternDef{
Name: name,
Condition: cond,
})
if !p.isType(models.TokenTypeComma) {
break
}
p.advance()
}
}

// Expect closing )
if !p.isType(models.TokenTypeRParen) {
return nil, p.expectedError(") to close MATCH_RECOGNIZE")
}
p.advance() // Consume )

return clause, nil
}
21 changes: 19 additions & 2 deletions pkg/sql/parser/select_subquery.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ func (p *Parser) parseFromTableReference() (ast.TableReference, error) {
// Similarly, START followed by WITH is a hierarchical query seed, not an alias.
// Don't consume PIVOT/UNPIVOT as a table alias — they are contextual
// keywords in SQL Server/Oracle and must reach the pivot-clause parser below.
if (p.isIdentifier() || p.isType(models.TokenTypeAs)) && !p.isMariaDBClauseStart() && !p.isPivotKeyword() && !p.isUnpivotKeyword() && !p.isQualifyKeyword() && !p.isMinusSetOp() && !p.isSnowflakeTimeTravelStart() && !p.isSampleKeyword() {
if (p.isIdentifier() || p.isType(models.TokenTypeAs)) && !p.isMariaDBClauseStart() && !p.isPivotKeyword() && !p.isUnpivotKeyword() && !p.isQualifyKeyword() && !p.isMinusSetOp() && !p.isSnowflakeTimeTravelStart() && !p.isSampleKeyword() && !p.isMatchRecognizeKeyword() {
if p.isType(models.TokenTypeAs) {
p.advance() // Consume AS
if !p.isIdentifier() {
Expand Down Expand Up @@ -237,6 +237,23 @@ func (p *Parser) parseFromTableReference() (ast.TableReference, error) {
p.parsePivotAlias(&tableRef)
}

// Snowflake / Oracle MATCH_RECOGNIZE clause
if p.isMatchRecognizeKeyword() {
mr, err := p.parseMatchRecognize()
if err != nil {
return tableRef, err
}
tableRef.MatchRecognize = mr
// Optional alias after MATCH_RECOGNIZE (...)
if p.isType(models.TokenTypeAs) {
p.advance()
}
if p.isIdentifier() {
tableRef.Alias = p.currentToken.Token.Value
p.advance()
}
}

return tableRef, nil
}

Expand Down Expand Up @@ -293,7 +310,7 @@ func (p *Parser) parseJoinedTableRef(joinType string) (ast.TableReference, error
// Similarly, START followed by WITH is a hierarchical query seed, not an alias.
// Don't consume PIVOT/UNPIVOT as a table alias — they are contextual
// keywords in SQL Server/Oracle and must reach the pivot-clause parser below.
if (p.isIdentifier() || p.isType(models.TokenTypeAs)) && !p.isMariaDBClauseStart() && !p.isPivotKeyword() && !p.isUnpivotKeyword() && !p.isQualifyKeyword() && !p.isMinusSetOp() && !p.isSnowflakeTimeTravelStart() && !p.isSampleKeyword() {
if (p.isIdentifier() || p.isType(models.TokenTypeAs)) && !p.isMariaDBClauseStart() && !p.isPivotKeyword() && !p.isUnpivotKeyword() && !p.isQualifyKeyword() && !p.isMinusSetOp() && !p.isSnowflakeTimeTravelStart() && !p.isSampleKeyword() && !p.isMatchRecognizeKeyword() {
if p.isType(models.TokenTypeAs) {
p.advance()
if !p.isIdentifier() {
Expand Down
Loading
Loading