Skip to content

Commit 627e712

Browse files
ajitpratap0Ajit Pratap Singh
andauthored
feat(parser): Snowflake MATCH_RECOGNIZE clause (#483) (#506)
Co-authored-by: Ajit Pratap Singh <ajitpratapsingh@Ajits-Mac-mini-2655.local>
1 parent 387cca8 commit 627e712

File tree

5 files changed

+447
-4
lines changed

5 files changed

+447
-4
lines changed

pkg/sql/ast/ast.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,8 @@ type TableReference struct {
246246
// Unpivot is the SQL Server / Oracle UNPIVOT clause for column-to-row transformation.
247247
// Example: SELECT * FROM t UNPIVOT (sales FOR region IN (north_sales, south_sales)) AS unpvt
248248
Unpivot *UnpivotClause
249+
// MatchRecognize is the SQL:2016 row-pattern recognition clause (Snowflake, Oracle).
250+
MatchRecognize *MatchRecognizeClause
249251
}
250252

251253
func (t *TableReference) statementNode() {}
@@ -275,6 +277,9 @@ func (t TableReference) Children() []Node {
275277
if t.Unpivot != nil {
276278
nodes = append(nodes, t.Unpivot)
277279
}
280+
if t.MatchRecognize != nil {
281+
nodes = append(nodes, t.MatchRecognize)
282+
}
278283
return nodes
279284
}
280285

@@ -2147,6 +2152,62 @@ type PeriodDefinition struct {
21472152
Pos models.Location // Source position of the PERIOD FOR keyword (1-based line and column)
21482153
}
21492154

2155+
// MatchRecognizeClause represents the SQL:2016 MATCH_RECOGNIZE clause for
2156+
// row-pattern recognition in a FROM clause (Snowflake, Oracle, Databricks).
2157+
//
2158+
// MATCH_RECOGNIZE (
2159+
// PARTITION BY symbol
2160+
// ORDER BY ts
2161+
// MEASURES MATCH_NUMBER() AS m
2162+
// ALL ROWS PER MATCH
2163+
// PATTERN (UP+ DOWN+)
2164+
// DEFINE UP AS price > PREV(price), DOWN AS price < PREV(price)
2165+
// )
2166+
type MatchRecognizeClause struct {
2167+
PartitionBy []Expression
2168+
OrderBy []OrderByExpression
2169+
Measures []MeasureDef
2170+
RowsPerMatch string // "ONE ROW PER MATCH" or "ALL ROWS PER MATCH" (empty = default)
2171+
AfterMatch string // raw text: "SKIP TO NEXT ROW", "SKIP PAST LAST ROW", etc.
2172+
Pattern string // raw pattern text: "UP+ DOWN+"
2173+
Definitions []PatternDef
2174+
Pos models.Location
2175+
}
2176+
2177+
// MeasureDef is one MEASURES entry: expr AS alias.
2178+
type MeasureDef struct {
2179+
Expr Expression
2180+
Alias string
2181+
}
2182+
2183+
// PatternDef is one DEFINE entry: variable_name AS boolean_condition.
2184+
type PatternDef struct {
2185+
Name string
2186+
Condition Expression
2187+
}
2188+
2189+
func (m *MatchRecognizeClause) expressionNode() {}
2190+
func (m MatchRecognizeClause) TokenLiteral() string { return "MATCH_RECOGNIZE" }
2191+
func (m MatchRecognizeClause) Children() []Node {
2192+
var nodes []Node
2193+
nodes = append(nodes, nodifyExpressions(m.PartitionBy)...)
2194+
for _, ob := range m.OrderBy {
2195+
ob := ob
2196+
nodes = append(nodes, &ob)
2197+
}
2198+
for _, md := range m.Measures {
2199+
if md.Expr != nil {
2200+
nodes = append(nodes, md.Expr)
2201+
}
2202+
}
2203+
for _, pd := range m.Definitions {
2204+
if pd.Condition != nil {
2205+
nodes = append(nodes, pd.Condition)
2206+
}
2207+
}
2208+
return nodes
2209+
}
2210+
21502211
// expressionNode satisfies the Expression interface so PeriodDefinition can be
21512212
// stored in CreateTableStatement.PeriodDefinitions without a separate interface type.
21522213
// Semantically it is a table column constraint, not a scalar expression.

pkg/sql/parser/expressions_literal.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,9 @@ func (p *Parser) parsePrimaryExpression() (ast.Expression, error) {
104104
return funcCall, nil
105105
}
106106

107-
// Handle keywords that can be used as function names in MySQL (IF, REPLACE, etc.)
108-
if (p.isType(models.TokenTypeIf) || p.isType(models.TokenTypeReplace)) && p.peekToken().Token.Type == models.TokenTypeLParen {
107+
// Handle keywords that can be used as function names (IF, REPLACE, FIRST, LAST, etc.)
108+
if (p.isType(models.TokenTypeIf) || p.isType(models.TokenTypeReplace) ||
109+
p.isType(models.TokenTypeFirst) || p.isType(models.TokenTypeLast)) && p.peekToken().Token.Type == models.TokenTypeLParen {
109110
kwPos := p.currentLocation()
110111
identName := p.currentToken.Token.Value
111112
p.advance()

pkg/sql/parser/match_recognize.go

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
// Copyright 2026 GoSQLX Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
9+
// Package parser - match_recognize.go
10+
// SQL:2016 MATCH_RECOGNIZE clause for row-pattern recognition (Snowflake, Oracle).
11+
12+
package parser
13+
14+
import (
15+
"strings"
16+
17+
"github.com/ajitpratap0/GoSQLX/pkg/models"
18+
"github.com/ajitpratap0/GoSQLX/pkg/sql/ast"
19+
"github.com/ajitpratap0/GoSQLX/pkg/sql/keywords"
20+
)
21+
22+
// isMatchRecognizeKeyword returns true if the current token is the contextual
23+
// MATCH_RECOGNIZE keyword in a dialect that supports it.
24+
func (p *Parser) isMatchRecognizeKeyword() bool {
25+
if p.dialect != string(keywords.DialectSnowflake) &&
26+
p.dialect != string(keywords.DialectOracle) {
27+
return false
28+
}
29+
return strings.EqualFold(p.currentToken.Token.Value, "MATCH_RECOGNIZE")
30+
}
31+
32+
// parseMatchRecognize parses the MATCH_RECOGNIZE clause. The current token
33+
// must be MATCH_RECOGNIZE.
34+
//
35+
// Grammar:
36+
//
37+
// MATCH_RECOGNIZE (
38+
// [PARTITION BY expr, ...]
39+
// [ORDER BY expr [ASC|DESC], ...]
40+
// [MEASURES measure_expr AS alias, ...]
41+
// [ONE ROW PER MATCH | ALL ROWS PER MATCH]
42+
// [AFTER MATCH SKIP ...]
43+
// PATTERN ( pattern_regex )
44+
// DEFINE var AS condition, ...
45+
// )
46+
func (p *Parser) parseMatchRecognize() (*ast.MatchRecognizeClause, error) {
47+
pos := p.currentLocation()
48+
p.advance() // Consume MATCH_RECOGNIZE
49+
50+
if !p.isType(models.TokenTypeLParen) {
51+
return nil, p.expectedError("( after MATCH_RECOGNIZE")
52+
}
53+
p.advance() // Consume (
54+
55+
clause := &ast.MatchRecognizeClause{Pos: pos}
56+
57+
// Parse sub-clauses in order. Each is optional except PATTERN and DEFINE.
58+
// PARTITION BY
59+
if p.isType(models.TokenTypePartition) {
60+
p.advance() // PARTITION
61+
if p.isType(models.TokenTypeBy) {
62+
p.advance() // BY
63+
}
64+
for {
65+
expr, err := p.parseExpression()
66+
if err != nil {
67+
return nil, err
68+
}
69+
clause.PartitionBy = append(clause.PartitionBy, expr)
70+
if !p.isType(models.TokenTypeComma) {
71+
break
72+
}
73+
p.advance()
74+
}
75+
}
76+
77+
// ORDER BY
78+
if p.isType(models.TokenTypeOrder) {
79+
p.advance() // ORDER
80+
if p.isType(models.TokenTypeBy) {
81+
p.advance() // BY
82+
}
83+
for {
84+
expr, err := p.parseExpression()
85+
if err != nil {
86+
return nil, err
87+
}
88+
entry := ast.OrderByExpression{Expression: expr, Ascending: true}
89+
if p.isType(models.TokenTypeAsc) {
90+
p.advance()
91+
} else if p.isType(models.TokenTypeDesc) {
92+
entry.Ascending = false
93+
p.advance()
94+
}
95+
clause.OrderBy = append(clause.OrderBy, entry)
96+
if !p.isType(models.TokenTypeComma) {
97+
break
98+
}
99+
p.advance()
100+
}
101+
}
102+
103+
// MEASURES
104+
if strings.EqualFold(p.currentToken.Token.Value, "MEASURES") {
105+
p.advance() // MEASURES
106+
for {
107+
expr, err := p.parseExpression()
108+
if err != nil {
109+
return nil, err
110+
}
111+
alias := ""
112+
if p.isType(models.TokenTypeAs) {
113+
p.advance() // AS
114+
alias = p.currentToken.Token.Value
115+
p.advance() // alias name
116+
}
117+
clause.Measures = append(clause.Measures, ast.MeasureDef{
118+
Expr: expr,
119+
Alias: alias,
120+
})
121+
if !p.isType(models.TokenTypeComma) {
122+
break
123+
}
124+
p.advance()
125+
}
126+
}
127+
128+
// ONE ROW PER MATCH / ALL ROWS PER MATCH
129+
if strings.EqualFold(p.currentToken.Token.Value, "ONE") {
130+
clause.RowsPerMatch = "ONE ROW PER MATCH"
131+
p.advance() // ONE
132+
p.advance() // ROW
133+
p.advance() // PER
134+
p.advance() // MATCH
135+
} else if p.isType(models.TokenTypeAll) {
136+
clause.RowsPerMatch = "ALL ROWS PER MATCH"
137+
p.advance() // ALL
138+
p.advance() // ROWS
139+
p.advance() // PER
140+
p.advance() // MATCH
141+
}
142+
143+
// AFTER MATCH SKIP ... — consume as raw text until PATTERN or DEFINE
144+
if strings.EqualFold(p.currentToken.Token.Value, "AFTER") {
145+
var parts []string
146+
for {
147+
val := strings.ToUpper(p.currentToken.Token.Value)
148+
if val == "PATTERN" || val == "DEFINE" {
149+
break
150+
}
151+
if p.isType(models.TokenTypeEOF) || p.isType(models.TokenTypeRParen) {
152+
break
153+
}
154+
parts = append(parts, p.currentToken.Token.Value)
155+
p.advance()
156+
}
157+
clause.AfterMatch = strings.Join(parts, " ")
158+
}
159+
160+
// PATTERN ( regex )
161+
if strings.EqualFold(p.currentToken.Token.Value, "PATTERN") {
162+
p.advance() // PATTERN
163+
if !p.isType(models.TokenTypeLParen) {
164+
return nil, p.expectedError("( after PATTERN")
165+
}
166+
p.advance() // Consume (
167+
168+
// Collect pattern tokens as raw text until the matching ')'
169+
var patParts []string
170+
depth := 1
171+
for depth > 0 {
172+
if p.isType(models.TokenTypeEOF) {
173+
return nil, p.expectedError(") to close PATTERN")
174+
}
175+
if p.isType(models.TokenTypeLParen) {
176+
depth++
177+
patParts = append(patParts, "(")
178+
} else if p.isType(models.TokenTypeRParen) {
179+
depth--
180+
if depth > 0 {
181+
patParts = append(patParts, ")")
182+
}
183+
} else {
184+
patParts = append(patParts, p.currentToken.Token.Value)
185+
}
186+
p.advance()
187+
}
188+
clause.Pattern = strings.Join(patParts, " ")
189+
}
190+
191+
// DEFINE var AS condition, ...
192+
if strings.EqualFold(p.currentToken.Token.Value, "DEFINE") {
193+
p.advance() // DEFINE
194+
for {
195+
if p.isType(models.TokenTypeRParen) || p.isType(models.TokenTypeEOF) {
196+
break
197+
}
198+
name := p.currentToken.Token.Value
199+
p.advance() // variable name
200+
201+
if !p.isType(models.TokenTypeAs) {
202+
return nil, p.expectedError("AS after pattern variable " + name)
203+
}
204+
p.advance() // AS
205+
206+
cond, err := p.parseExpression()
207+
if err != nil {
208+
return nil, err
209+
}
210+
clause.Definitions = append(clause.Definitions, ast.PatternDef{
211+
Name: name,
212+
Condition: cond,
213+
})
214+
if !p.isType(models.TokenTypeComma) {
215+
break
216+
}
217+
p.advance()
218+
}
219+
}
220+
221+
// Expect closing )
222+
if !p.isType(models.TokenTypeRParen) {
223+
return nil, p.expectedError(") to close MATCH_RECOGNIZE")
224+
}
225+
p.advance() // Consume )
226+
227+
return clause, nil
228+
}

pkg/sql/parser/select_subquery.go

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ func (p *Parser) parseFromTableReference() (ast.TableReference, error) {
179179
// Similarly, START followed by WITH is a hierarchical query seed, not an alias.
180180
// Don't consume PIVOT/UNPIVOT as a table alias — they are contextual
181181
// keywords in SQL Server/Oracle and must reach the pivot-clause parser below.
182-
if (p.isIdentifier() || p.isType(models.TokenTypeAs)) && !p.isMariaDBClauseStart() && !p.isPivotKeyword() && !p.isUnpivotKeyword() && !p.isQualifyKeyword() && !p.isMinusSetOp() && !p.isSnowflakeTimeTravelStart() && !p.isSampleKeyword() {
182+
if (p.isIdentifier() || p.isType(models.TokenTypeAs)) && !p.isMariaDBClauseStart() && !p.isPivotKeyword() && !p.isUnpivotKeyword() && !p.isQualifyKeyword() && !p.isMinusSetOp() && !p.isSnowflakeTimeTravelStart() && !p.isSampleKeyword() && !p.isMatchRecognizeKeyword() {
183183
if p.isType(models.TokenTypeAs) {
184184
p.advance() // Consume AS
185185
if !p.isIdentifier() {
@@ -237,6 +237,23 @@ func (p *Parser) parseFromTableReference() (ast.TableReference, error) {
237237
p.parsePivotAlias(&tableRef)
238238
}
239239

240+
// Snowflake / Oracle MATCH_RECOGNIZE clause
241+
if p.isMatchRecognizeKeyword() {
242+
mr, err := p.parseMatchRecognize()
243+
if err != nil {
244+
return tableRef, err
245+
}
246+
tableRef.MatchRecognize = mr
247+
// Optional alias after MATCH_RECOGNIZE (...)
248+
if p.isType(models.TokenTypeAs) {
249+
p.advance()
250+
}
251+
if p.isIdentifier() {
252+
tableRef.Alias = p.currentToken.Token.Value
253+
p.advance()
254+
}
255+
}
256+
240257
return tableRef, nil
241258
}
242259

@@ -293,7 +310,7 @@ func (p *Parser) parseJoinedTableRef(joinType string) (ast.TableReference, error
293310
// Similarly, START followed by WITH is a hierarchical query seed, not an alias.
294311
// Don't consume PIVOT/UNPIVOT as a table alias — they are contextual
295312
// keywords in SQL Server/Oracle and must reach the pivot-clause parser below.
296-
if (p.isIdentifier() || p.isType(models.TokenTypeAs)) && !p.isMariaDBClauseStart() && !p.isPivotKeyword() && !p.isUnpivotKeyword() && !p.isQualifyKeyword() && !p.isMinusSetOp() && !p.isSnowflakeTimeTravelStart() && !p.isSampleKeyword() {
313+
if (p.isIdentifier() || p.isType(models.TokenTypeAs)) && !p.isMariaDBClauseStart() && !p.isPivotKeyword() && !p.isUnpivotKeyword() && !p.isQualifyKeyword() && !p.isMinusSetOp() && !p.isSnowflakeTimeTravelStart() && !p.isSampleKeyword() && !p.isMatchRecognizeKeyword() {
297314
if p.isType(models.TokenTypeAs) {
298315
p.advance()
299316
if !p.isIdentifier() {

0 commit comments

Comments
 (0)