Skip to content

Commit d181d2b

Browse files
Ajit Pratap SinghAjit Pratap Singh
authored andcommitted
feat(parser): Snowflake VARIANT colon-path expressions (#483)
1 parent f480288 commit d181d2b

File tree

3 files changed

+217
-0
lines changed

3 files changed

+217
-0
lines changed

pkg/sql/ast/ast.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,6 +1050,39 @@ func (u *UnaryExpression) TokenLiteral() string {
10501050

10511051
func (u UnaryExpression) Children() []Node { return []Node{u.Expr} }
10521052

1053+
// VariantPath represents a Snowflake VARIANT path expression:
1054+
//
1055+
// col:field.sub[0]::string
1056+
//
1057+
// The Root is the base expression (typically an Identifier or FunctionCall
1058+
// like PARSE_JSON(raw)). Segments is the chain of path steps that follow
1059+
// the leading `:`. Each segment is either a field name (Name set) or a
1060+
// bracketed index expression (Index set).
1061+
type VariantPath struct {
1062+
Root Expression
1063+
Segments []VariantPathSegment
1064+
Pos models.Location
1065+
}
1066+
1067+
// VariantPathSegment is one step in a VARIANT path: either a field name
1068+
// reached via `:` or `.`, or a bracketed index expression.
1069+
type VariantPathSegment struct {
1070+
Name string // field name (`:field` or `.field`), empty when Index is set
1071+
Index Expression // bracket subscript (`[expr]`), nil when Name is set
1072+
}
1073+
1074+
func (v *VariantPath) expressionNode() {}
1075+
func (v VariantPath) TokenLiteral() string { return ":" }
1076+
func (v VariantPath) Children() []Node {
1077+
nodes := []Node{v.Root}
1078+
for _, seg := range v.Segments {
1079+
if seg.Index != nil {
1080+
nodes = append(nodes, seg.Index)
1081+
}
1082+
}
1083+
return nodes
1084+
}
1085+
10531086
// NamedArgument represents a function argument of the form `name => expr`,
10541087
// used by Snowflake (FLATTEN(input => col), GENERATOR(rowcount => 100)),
10551088
// BigQuery, Oracle, and PostgreSQL procedural calls.

pkg/sql/parser/expressions.go

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
goerrors "github.com/ajitpratap0/GoSQLX/pkg/errors"
2929
"github.com/ajitpratap0/GoSQLX/pkg/models"
3030
"github.com/ajitpratap0/GoSQLX/pkg/sql/ast"
31+
"github.com/ajitpratap0/GoSQLX/pkg/sql/keywords"
3132
)
3233

3334
// parseExpression parses an expression with OR operators (lowest precedence)
@@ -213,6 +214,17 @@ func (p *Parser) parseJSONExpression() (ast.Expression, error) {
213214
return nil, err
214215
}
215216

217+
// Snowflake VARIANT path: `expr:field[.field|[idx]]*`. Must run before
218+
// the `::` cast loop so that `col:a.b::int` casts the full path rather
219+
// than treating the path as a bare expression.
220+
if p.dialect == string(keywords.DialectSnowflake) && p.isType(models.TokenTypeColon) {
221+
vp, err := p.parseSnowflakeVariantPath(left)
222+
if err != nil {
223+
return nil, err
224+
}
225+
left = vp
226+
}
227+
216228
// Handle type casting (::) with highest precedence
217229
// PostgreSQL: expr::type (e.g., '123'::integer, column::text)
218230
for p.isType(models.TokenTypeDoubleColon) {
@@ -380,3 +392,81 @@ func (p *Parser) isJSONOperator() bool {
380392
}
381393
return false
382394
}
395+
396+
// parseSnowflakeVariantPath parses the tail of a Snowflake VARIANT path
397+
// expression. The current token must be `:`. Returns a VariantPath with
398+
// the given root and the parsed segments.
399+
//
400+
// Grammar:
401+
//
402+
// path := ':' step ( '.' field | '[' expr ']' )*
403+
// step := field | '"' quoted '"'
404+
// field := identifier
405+
func (p *Parser) parseSnowflakeVariantPath(root ast.Expression) (*ast.VariantPath, error) {
406+
pos := p.currentLocation()
407+
if !p.isType(models.TokenTypeColon) {
408+
return nil, p.expectedError(":")
409+
}
410+
p.advance() // Consume leading :
411+
412+
vp := &ast.VariantPath{Root: root, Pos: pos}
413+
414+
// First segment must be a field name (identifier or string literal).
415+
name, err := p.parseVariantFieldName()
416+
if err != nil {
417+
return nil, err
418+
}
419+
vp.Segments = append(vp.Segments, ast.VariantPathSegment{Name: name})
420+
421+
// Subsequent segments: `.field` | `[expr]` | `:field` (rare).
422+
for {
423+
switch {
424+
case p.isType(models.TokenTypePeriod):
425+
p.advance()
426+
n, err := p.parseVariantFieldName()
427+
if err != nil {
428+
return nil, err
429+
}
430+
vp.Segments = append(vp.Segments, ast.VariantPathSegment{Name: n})
431+
case p.isType(models.TokenTypeColon):
432+
p.advance()
433+
n, err := p.parseVariantFieldName()
434+
if err != nil {
435+
return nil, err
436+
}
437+
vp.Segments = append(vp.Segments, ast.VariantPathSegment{Name: n})
438+
case p.isType(models.TokenTypeLBracket):
439+
p.advance() // Consume [
440+
idx, err := p.parseExpression()
441+
if err != nil {
442+
return nil, err
443+
}
444+
if !p.isType(models.TokenTypeRBracket) {
445+
return nil, p.expectedError("]")
446+
}
447+
p.advance() // Consume ]
448+
vp.Segments = append(vp.Segments, ast.VariantPathSegment{Index: idx})
449+
default:
450+
return vp, nil
451+
}
452+
}
453+
}
454+
455+
// parseVariantFieldName consumes one VARIANT path field name, which may be
456+
// a bare identifier or a double-quoted string. Returns the name and
457+
// advances past it.
458+
func (p *Parser) parseVariantFieldName() (string, error) {
459+
tok := p.currentToken.Token
460+
switch {
461+
case p.isIdentifier(), p.isType(models.TokenTypeDoubleQuotedString):
462+
name := tok.Value
463+
p.advance()
464+
return name, nil
465+
case p.isType(models.TokenTypeKeyword):
466+
// Keywords may appear as field names (e.g. TYPE, VALUE, STATUS).
467+
name := tok.Value
468+
p.advance()
469+
return name, nil
470+
}
471+
return "", p.expectedError("field name after `:` or `.`")
472+
}
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
// Copyright 2026 GoSQLX Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
5+
package parser_test
6+
7+
import (
8+
"testing"
9+
10+
"github.com/ajitpratap0/GoSQLX/pkg/gosqlx"
11+
"github.com/ajitpratap0/GoSQLX/pkg/sql/ast"
12+
"github.com/ajitpratap0/GoSQLX/pkg/sql/keywords"
13+
)
14+
15+
// TestSnowflakeVariantPath verifies the Snowflake VARIANT colon-path
16+
// expression (`col:field.sub[0]::string`) parses correctly. This is the
17+
// biggest Snowflake gap from the QA sweep (#483) — required for any
18+
// semi-structured / JSON workload.
19+
func TestSnowflakeVariantPath(t *testing.T) {
20+
queries := map[string]string{
21+
"bare": `SELECT col:field FROM t`,
22+
"nested": `SELECT col:field.sub FROM t`,
23+
"with_cast": `SELECT col:field.sub::string FROM t`,
24+
"bracket": `SELECT col:items[0] FROM t`,
25+
"bracket_then_dot": `SELECT col:items[0].name FROM t`,
26+
"parse_json_chain": `SELECT PARSE_JSON(raw):a::int FROM t`,
27+
"quoted_key": `SELECT col:"weird key" FROM t`,
28+
"in_where": `SELECT id FROM t WHERE payload:status::string = 'active'`,
29+
"multi_segment": `SELECT col:a.b.c.d::int AS x FROM t`,
30+
}
31+
for name, q := range queries {
32+
q := q
33+
t.Run(name, func(t *testing.T) {
34+
if _, err := gosqlx.ParseWithDialect(q, keywords.DialectSnowflake); err != nil {
35+
t.Fatalf("parse failed: %v", err)
36+
}
37+
})
38+
}
39+
}
40+
41+
// TestVariantPathASTShape asserts the VariantPath node is produced with the
42+
// expected Root and Segments, and that the trailing :: cast wraps it.
43+
func TestVariantPathASTShape(t *testing.T) {
44+
q := `SELECT col:field.sub[0]::string FROM t`
45+
tree, err := gosqlx.ParseWithDialect(q, keywords.DialectSnowflake)
46+
if err != nil {
47+
t.Fatalf("parse failed: %v", err)
48+
}
49+
var vp *ast.VariantPath
50+
var cast *ast.CastExpression
51+
var visit func(n ast.Node)
52+
visit = func(n ast.Node) {
53+
if n == nil {
54+
return
55+
}
56+
switch x := n.(type) {
57+
case *ast.VariantPath:
58+
if vp == nil {
59+
vp = x
60+
}
61+
case *ast.CastExpression:
62+
if cast == nil {
63+
cast = x
64+
}
65+
}
66+
for _, c := range n.Children() {
67+
visit(c)
68+
}
69+
}
70+
for _, s := range tree.Statements {
71+
visit(s)
72+
}
73+
if vp == nil {
74+
t.Fatal("VariantPath not found")
75+
}
76+
if vp.Root == nil {
77+
t.Fatal("VariantPath.Root nil")
78+
}
79+
if len(vp.Segments) != 3 {
80+
t.Fatalf("Segments: want 3 (field, sub, [0]), got %d", len(vp.Segments))
81+
}
82+
if vp.Segments[0].Name != "field" {
83+
t.Fatalf("Segments[0].Name: want %q, got %q", "field", vp.Segments[0].Name)
84+
}
85+
if vp.Segments[1].Name != "sub" {
86+
t.Fatalf("Segments[1].Name: want %q, got %q", "sub", vp.Segments[1].Name)
87+
}
88+
if vp.Segments[2].Index == nil {
89+
t.Fatal("Segments[2].Index (bracket subscript) missing")
90+
}
91+
if cast == nil || cast.Type != "string" {
92+
t.Fatalf("Cast: want CastExpression with Type=string, got %+v", cast)
93+
}
94+
}

0 commit comments

Comments
 (0)