Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions pkg/sql/ast/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -1057,6 +1057,39 @@ func (u *UnaryExpression) TokenLiteral() string {

func (u UnaryExpression) Children() []Node { return []Node{u.Expr} }

// VariantPath represents a Snowflake VARIANT path expression:
//
// col:field.sub[0]::string
//
// The Root is the base expression (typically an Identifier or FunctionCall
// like PARSE_JSON(raw)). Segments is the chain of path steps that follow
// the leading `:`. Each segment is either a field name (Name set) or a
// bracketed index expression (Index set).
type VariantPath struct {
Root Expression
Segments []VariantPathSegment
Pos models.Location
}

// VariantPathSegment is one step in a VARIANT path: either a field name
// reached via `:` or `.`, or a bracketed index expression.
type VariantPathSegment struct {
Name string // field name (`:field` or `.field`), empty when Index is set
Index Expression // bracket subscript (`[expr]`), nil when Name is set
}

func (v *VariantPath) expressionNode() {}
func (v VariantPath) TokenLiteral() string { return ":" }
func (v VariantPath) Children() []Node {
nodes := []Node{v.Root}
for _, seg := range v.Segments {
if seg.Index != nil {
nodes = append(nodes, seg.Index)
}
}
return nodes
}

// NamedArgument represents a function argument of the form `name => expr`,
// used by Snowflake (FLATTEN(input => col), GENERATOR(rowcount => 100)),
// BigQuery, Oracle, and PostgreSQL procedural calls.
Expand Down
90 changes: 90 additions & 0 deletions pkg/sql/parser/expressions.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
goerrors "github.com/ajitpratap0/GoSQLX/pkg/errors"
"github.com/ajitpratap0/GoSQLX/pkg/models"
"github.com/ajitpratap0/GoSQLX/pkg/sql/ast"
"github.com/ajitpratap0/GoSQLX/pkg/sql/keywords"
)

// parseExpression parses an expression with OR operators (lowest precedence)
Expand Down Expand Up @@ -213,6 +214,17 @@ func (p *Parser) parseJSONExpression() (ast.Expression, error) {
return nil, err
}

// Snowflake VARIANT path: `expr:field[.field|[idx]]*`. Must run before
// the `::` cast loop so that `col:a.b::int` casts the full path rather
// than treating the path as a bare expression.
if p.dialect == string(keywords.DialectSnowflake) && p.isType(models.TokenTypeColon) {
vp, err := p.parseSnowflakeVariantPath(left)
if err != nil {
return nil, err
}
left = vp
}

// Handle type casting (::) with highest precedence
// PostgreSQL: expr::type (e.g., '123'::integer, column::text)
for p.isType(models.TokenTypeDoubleColon) {
Expand Down Expand Up @@ -380,3 +392,81 @@ func (p *Parser) isJSONOperator() bool {
}
return false
}

// parseSnowflakeVariantPath parses the tail of a Snowflake VARIANT path
// expression. The current token must be `:`. Returns a VariantPath with
// the given root and the parsed segments.
//
// Grammar:
//
// path := ':' step ( '.' field | '[' expr ']' )*
// step := field | '"' quoted '"'
// field := identifier
func (p *Parser) parseSnowflakeVariantPath(root ast.Expression) (*ast.VariantPath, error) {
pos := p.currentLocation()
if !p.isType(models.TokenTypeColon) {
return nil, p.expectedError(":")
}
p.advance() // Consume leading :

vp := &ast.VariantPath{Root: root, Pos: pos}

// First segment must be a field name (identifier or string literal).
name, err := p.parseVariantFieldName()
if err != nil {
return nil, err
}
vp.Segments = append(vp.Segments, ast.VariantPathSegment{Name: name})

// Subsequent segments: `.field` | `[expr]` | `:field` (rare).
for {
switch {
case p.isType(models.TokenTypePeriod):
p.advance()
n, err := p.parseVariantFieldName()
if err != nil {
return nil, err
}
vp.Segments = append(vp.Segments, ast.VariantPathSegment{Name: n})
case p.isType(models.TokenTypeColon):
p.advance()
n, err := p.parseVariantFieldName()
if err != nil {
return nil, err
}
vp.Segments = append(vp.Segments, ast.VariantPathSegment{Name: n})
case p.isType(models.TokenTypeLBracket):
p.advance() // Consume [
idx, err := p.parseExpression()
if err != nil {
return nil, err
}
if !p.isType(models.TokenTypeRBracket) {
return nil, p.expectedError("]")
}
p.advance() // Consume ]
vp.Segments = append(vp.Segments, ast.VariantPathSegment{Index: idx})
default:
return vp, nil
}
}
}

// parseVariantFieldName consumes one VARIANT path field name, which may be
// a bare identifier or a double-quoted string. Returns the name and
// advances past it.
func (p *Parser) parseVariantFieldName() (string, error) {
tok := p.currentToken.Token
switch {
case p.isIdentifier(), p.isType(models.TokenTypeDoubleQuotedString):
name := tok.Value
p.advance()
return name, nil
case p.isType(models.TokenTypeKeyword):
// Keywords may appear as field names (e.g. TYPE, VALUE, STATUS).
name := tok.Value
p.advance()
return name, nil
}
return "", p.expectedError("field name after `:` or `.`")
}
94 changes: 94 additions & 0 deletions pkg/sql/parser/snowflake_variant_path_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// Copyright 2026 GoSQLX Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");

package parser_test

import (
"testing"

"github.com/ajitpratap0/GoSQLX/pkg/gosqlx"
"github.com/ajitpratap0/GoSQLX/pkg/sql/ast"
"github.com/ajitpratap0/GoSQLX/pkg/sql/keywords"
)

// TestSnowflakeVariantPath verifies the Snowflake VARIANT colon-path
// expression (`col:field.sub[0]::string`) parses correctly. This is the
// biggest Snowflake gap from the QA sweep (#483) — required for any
// semi-structured / JSON workload.
func TestSnowflakeVariantPath(t *testing.T) {
queries := map[string]string{
"bare": `SELECT col:field FROM t`,
"nested": `SELECT col:field.sub FROM t`,
"with_cast": `SELECT col:field.sub::string FROM t`,
"bracket": `SELECT col:items[0] FROM t`,
"bracket_then_dot": `SELECT col:items[0].name FROM t`,
"parse_json_chain": `SELECT PARSE_JSON(raw):a::int FROM t`,
"quoted_key": `SELECT col:"weird key" FROM t`,
"in_where": `SELECT id FROM t WHERE payload:status::string = 'active'`,
"multi_segment": `SELECT col:a.b.c.d::int AS x FROM t`,
}
for name, q := range queries {
q := q
t.Run(name, func(t *testing.T) {
if _, err := gosqlx.ParseWithDialect(q, keywords.DialectSnowflake); err != nil {
t.Fatalf("parse failed: %v", err)
}
})
}
}

// TestVariantPathASTShape asserts the VariantPath node is produced with the
// expected Root and Segments, and that the trailing :: cast wraps it.
func TestVariantPathASTShape(t *testing.T) {
q := `SELECT col:field.sub[0]::string FROM t`
tree, err := gosqlx.ParseWithDialect(q, keywords.DialectSnowflake)
if err != nil {
t.Fatalf("parse failed: %v", err)
}
var vp *ast.VariantPath
var cast *ast.CastExpression
var visit func(n ast.Node)
visit = func(n ast.Node) {
if n == nil {
return
}
switch x := n.(type) {
case *ast.VariantPath:
if vp == nil {
vp = x
}
case *ast.CastExpression:
if cast == nil {
cast = x
}
}
for _, c := range n.Children() {
visit(c)
}
}
for _, s := range tree.Statements {
visit(s)
}
if vp == nil {
t.Fatal("VariantPath not found")
}
if vp.Root == nil {
t.Fatal("VariantPath.Root nil")
}
if len(vp.Segments) != 3 {
t.Fatalf("Segments: want 3 (field, sub, [0]), got %d", len(vp.Segments))
}
if vp.Segments[0].Name != "field" {
t.Fatalf("Segments[0].Name: want %q, got %q", "field", vp.Segments[0].Name)
}
if vp.Segments[1].Name != "sub" {
t.Fatalf("Segments[1].Name: want %q, got %q", "sub", vp.Segments[1].Name)
}
if vp.Segments[2].Index == nil {
t.Fatal("Segments[2].Index (bracket subscript) missing")
}
if cast == nil || cast.Type != "string" {
t.Fatalf("Cast: want CastExpression with Type=string, got %+v", cast)
}
}
Loading