Skip to content

Commit 5836e08

Browse files
Ajit Pratap SinghAjit Pratap Singh
authored andcommitted
feat(parser): Snowflake SAMPLE/TABLESAMPLE clause (#483)
1 parent 670dd7e commit 5836e08

File tree

3 files changed

+89
-2
lines changed

3 files changed

+89
-2
lines changed

pkg/sql/parser/pivot.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,3 +374,22 @@ func (p *Parser) isSnowflakeTimeTravelStart() bool {
374374
}
375375
return false
376376
}
377+
378+
// isSampleKeyword returns true if the current token is SAMPLE or TABLESAMPLE
379+
// followed by '(' or a sampling-method keyword, indicating a sampling clause
380+
// rather than a table alias. Used to prevent the FROM-alias parser from
381+
// consuming these tokens.
382+
func (p *Parser) isSampleKeyword() bool {
383+
upper := strings.ToUpper(p.currentToken.Token.Value)
384+
if upper != "SAMPLE" && upper != "TABLESAMPLE" {
385+
return false
386+
}
387+
// Require '(' or a method keyword as lookahead to disambiguate from
388+
// a table actually named "sample".
389+
next := p.peekToken().Token
390+
if next.Type == models.TokenTypeLParen {
391+
return true
392+
}
393+
nextUpper := strings.ToUpper(next.Value)
394+
return nextUpper == "BERNOULLI" || nextUpper == "SYSTEM" || nextUpper == "BLOCK" || nextUpper == "ROW"
395+
}

pkg/sql/parser/select_subquery.go

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,41 @@ func (p *Parser) parseFromTableReference() (ast.TableReference, error) {
9595
tableRef.TableFunc = funcCall
9696
}
9797

98+
// Snowflake / ANSI SAMPLE or TABLESAMPLE clause on a table reference:
99+
// SAMPLE [BERNOULLI | SYSTEM | BLOCK | ROW] (N [ROWS])
100+
// TABLESAMPLE [method] (N [ROWS])
101+
// Consume permissively — the method and paren block are consumed
102+
// but not yet modeled on the AST.
103+
if strings.EqualFold(p.currentToken.Token.Value, "SAMPLE") ||
104+
strings.EqualFold(p.currentToken.Token.Value, "TABLESAMPLE") {
105+
p.advance() // SAMPLE / TABLESAMPLE
106+
// Optional method name
107+
upper := strings.ToUpper(p.currentToken.Token.Value)
108+
if upper == "BERNOULLI" || upper == "SYSTEM" || upper == "BLOCK" || upper == "ROW" {
109+
p.advance()
110+
}
111+
// (N [ROWS]) block
112+
if p.isType(models.TokenTypeLParen) {
113+
depth := 0
114+
for {
115+
t := p.currentToken.Token.Type
116+
if t == models.TokenTypeEOF {
117+
break
118+
}
119+
if t == models.TokenTypeLParen {
120+
depth++
121+
} else if t == models.TokenTypeRParen {
122+
depth--
123+
if depth == 0 {
124+
p.advance()
125+
break
126+
}
127+
}
128+
p.advance()
129+
}
130+
}
131+
}
132+
98133
// Snowflake time-travel / change-tracking clauses:
99134
// AT (TIMESTAMP => ...)
100135
// BEFORE (STATEMENT => ...)
@@ -113,7 +148,7 @@ func (p *Parser) parseFromTableReference() (ast.TableReference, error) {
113148
// Similarly, START followed by WITH is a hierarchical query seed, not an alias.
114149
// Don't consume PIVOT/UNPIVOT as a table alias — they are contextual
115150
// keywords in SQL Server/Oracle and must reach the pivot-clause parser below.
116-
if (p.isIdentifier() || p.isType(models.TokenTypeAs)) && !p.isMariaDBClauseStart() && !p.isPivotKeyword() && !p.isUnpivotKeyword() && !p.isQualifyKeyword() && !p.isMinusSetOp() && !p.isSnowflakeTimeTravelStart() {
151+
if (p.isIdentifier() || p.isType(models.TokenTypeAs)) && !p.isMariaDBClauseStart() && !p.isPivotKeyword() && !p.isUnpivotKeyword() && !p.isQualifyKeyword() && !p.isMinusSetOp() && !p.isSnowflakeTimeTravelStart() && !p.isSampleKeyword() {
117152
if p.isType(models.TokenTypeAs) {
118153
p.advance() // Consume AS
119154
if !p.isIdentifier() {
@@ -227,7 +262,7 @@ func (p *Parser) parseJoinedTableRef(joinType string) (ast.TableReference, error
227262
// Similarly, START followed by WITH is a hierarchical query seed, not an alias.
228263
// Don't consume PIVOT/UNPIVOT as a table alias — they are contextual
229264
// keywords in SQL Server/Oracle and must reach the pivot-clause parser below.
230-
if (p.isIdentifier() || p.isType(models.TokenTypeAs)) && !p.isMariaDBClauseStart() && !p.isPivotKeyword() && !p.isUnpivotKeyword() && !p.isQualifyKeyword() && !p.isMinusSetOp() && !p.isSnowflakeTimeTravelStart() {
265+
if (p.isIdentifier() || p.isType(models.TokenTypeAs)) && !p.isMariaDBClauseStart() && !p.isPivotKeyword() && !p.isUnpivotKeyword() && !p.isQualifyKeyword() && !p.isMinusSetOp() && !p.isSnowflakeTimeTravelStart() && !p.isSampleKeyword() {
231266
if p.isType(models.TokenTypeAs) {
232267
p.advance()
233268
if !p.isIdentifier() {
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// Copyright 2026 GoSQLX Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
5+
package parser_test
6+
7+
import (
8+
"testing"
9+
10+
"github.com/ajitpratap0/GoSQLX/pkg/gosqlx"
11+
"github.com/ajitpratap0/GoSQLX/pkg/sql/keywords"
12+
)
13+
14+
// TestSnowflakeSample verifies Snowflake SAMPLE / TABLESAMPLE clause on
15+
// table references. Regression for #483.
16+
func TestSnowflakeSample(t *testing.T) {
17+
queries := map[string]string{
18+
"sample_pct": `SELECT * FROM users SAMPLE (10)`,
19+
"tablesample_rows": `SELECT * FROM users TABLESAMPLE (100 ROWS)`,
20+
"sample_bernoulli": `SELECT * FROM users SAMPLE BERNOULLI (5)`,
21+
"tablesample_system": `SELECT * FROM users TABLESAMPLE SYSTEM (1)`,
22+
"sample_block": `SELECT * FROM users SAMPLE BLOCK (10)`,
23+
"sample_with_where": `SELECT * FROM users SAMPLE (50) WHERE id > 100`,
24+
}
25+
for name, q := range queries {
26+
q := q
27+
t.Run(name, func(t *testing.T) {
28+
if _, err := gosqlx.ParseWithDialect(q, keywords.DialectSnowflake); err != nil {
29+
t.Fatalf("parse failed: %v", err)
30+
}
31+
})
32+
}
33+
}

0 commit comments

Comments
 (0)