diff --git a/pkg/sql/ast/ast.go b/pkg/sql/ast/ast.go index 9241e2c8..f7250a93 100644 --- a/pkg/sql/ast/ast.go +++ b/pkg/sql/ast/ast.go @@ -1057,6 +1057,39 @@ func (u *UnaryExpression) TokenLiteral() string { func (u UnaryExpression) Children() []Node { return []Node{u.Expr} } +// VariantPath represents a Snowflake VARIANT path expression: +// +// col:field.sub[0]::string +// +// The Root is the base expression (typically an Identifier or FunctionCall +// like PARSE_JSON(raw)). Segments is the chain of path steps that follow +// the leading `:`. Each segment is either a field name (Name set) or a +// bracketed index expression (Index set). +type VariantPath struct { + Root Expression + Segments []VariantPathSegment + Pos models.Location +} + +// VariantPathSegment is one step in a VARIANT path: either a field name +// reached via `:` or `.`, or a bracketed index expression. +type VariantPathSegment struct { + Name string // field name (`:field` or `.field`), empty when Index is set + Index Expression // bracket subscript (`[expr]`), nil when Name is set +} + +func (v *VariantPath) expressionNode() {} +func (v VariantPath) TokenLiteral() string { return ":" } +func (v VariantPath) Children() []Node { + nodes := []Node{v.Root} + for _, seg := range v.Segments { + if seg.Index != nil { + nodes = append(nodes, seg.Index) + } + } + return nodes +} + // NamedArgument represents a function argument of the form `name => expr`, // used by Snowflake (FLATTEN(input => col), GENERATOR(rowcount => 100)), // BigQuery, Oracle, and PostgreSQL procedural calls. diff --git a/pkg/sql/parser/expressions.go b/pkg/sql/parser/expressions.go index e38b66ec..cfbced79 100644 --- a/pkg/sql/parser/expressions.go +++ b/pkg/sql/parser/expressions.go @@ -28,6 +28,7 @@ import ( goerrors "github.com/ajitpratap0/GoSQLX/pkg/errors" "github.com/ajitpratap0/GoSQLX/pkg/models" "github.com/ajitpratap0/GoSQLX/pkg/sql/ast" + "github.com/ajitpratap0/GoSQLX/pkg/sql/keywords" ) // parseExpression parses an expression with OR operators (lowest precedence) @@ -213,6 +214,17 @@ func (p *Parser) parseJSONExpression() (ast.Expression, error) { return nil, err } + // Snowflake VARIANT path: `expr:field[.field|[idx]]*`. Must run before + // the `::` cast loop so that `col:a.b::int` casts the full path rather + // than treating the path as a bare expression. + if p.dialect == string(keywords.DialectSnowflake) && p.isType(models.TokenTypeColon) { + vp, err := p.parseSnowflakeVariantPath(left) + if err != nil { + return nil, err + } + left = vp + } + // Handle type casting (::) with highest precedence // PostgreSQL: expr::type (e.g., '123'::integer, column::text) for p.isType(models.TokenTypeDoubleColon) { @@ -380,3 +392,81 @@ func (p *Parser) isJSONOperator() bool { } return false } + +// parseSnowflakeVariantPath parses the tail of a Snowflake VARIANT path +// expression. The current token must be `:`. Returns a VariantPath with +// the given root and the parsed segments. +// +// Grammar: +// +// path := ':' step ( '.' field | '[' expr ']' )* +// step := field | '"' quoted '"' +// field := identifier +func (p *Parser) parseSnowflakeVariantPath(root ast.Expression) (*ast.VariantPath, error) { + pos := p.currentLocation() + if !p.isType(models.TokenTypeColon) { + return nil, p.expectedError(":") + } + p.advance() // Consume leading : + + vp := &ast.VariantPath{Root: root, Pos: pos} + + // First segment must be a field name (identifier or string literal). + name, err := p.parseVariantFieldName() + if err != nil { + return nil, err + } + vp.Segments = append(vp.Segments, ast.VariantPathSegment{Name: name}) + + // Subsequent segments: `.field` | `[expr]` | `:field` (rare). + for { + switch { + case p.isType(models.TokenTypePeriod): + p.advance() + n, err := p.parseVariantFieldName() + if err != nil { + return nil, err + } + vp.Segments = append(vp.Segments, ast.VariantPathSegment{Name: n}) + case p.isType(models.TokenTypeColon): + p.advance() + n, err := p.parseVariantFieldName() + if err != nil { + return nil, err + } + vp.Segments = append(vp.Segments, ast.VariantPathSegment{Name: n}) + case p.isType(models.TokenTypeLBracket): + p.advance() // Consume [ + idx, err := p.parseExpression() + if err != nil { + return nil, err + } + if !p.isType(models.TokenTypeRBracket) { + return nil, p.expectedError("]") + } + p.advance() // Consume ] + vp.Segments = append(vp.Segments, ast.VariantPathSegment{Index: idx}) + default: + return vp, nil + } + } +} + +// parseVariantFieldName consumes one VARIANT path field name, which may be +// a bare identifier or a double-quoted string. Returns the name and +// advances past it. +func (p *Parser) parseVariantFieldName() (string, error) { + tok := p.currentToken.Token + switch { + case p.isIdentifier(), p.isType(models.TokenTypeDoubleQuotedString): + name := tok.Value + p.advance() + return name, nil + case p.isType(models.TokenTypeKeyword): + // Keywords may appear as field names (e.g. TYPE, VALUE, STATUS). + name := tok.Value + p.advance() + return name, nil + } + return "", p.expectedError("field name after `:` or `.`") +} diff --git a/pkg/sql/parser/snowflake_variant_path_test.go b/pkg/sql/parser/snowflake_variant_path_test.go new file mode 100644 index 00000000..07b2a859 --- /dev/null +++ b/pkg/sql/parser/snowflake_variant_path_test.go @@ -0,0 +1,94 @@ +// Copyright 2026 GoSQLX Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); + +package parser_test + +import ( + "testing" + + "github.com/ajitpratap0/GoSQLX/pkg/gosqlx" + "github.com/ajitpratap0/GoSQLX/pkg/sql/ast" + "github.com/ajitpratap0/GoSQLX/pkg/sql/keywords" +) + +// TestSnowflakeVariantPath verifies the Snowflake VARIANT colon-path +// expression (`col:field.sub[0]::string`) parses correctly. This is the +// biggest Snowflake gap from the QA sweep (#483) — required for any +// semi-structured / JSON workload. +func TestSnowflakeVariantPath(t *testing.T) { + queries := map[string]string{ + "bare": `SELECT col:field FROM t`, + "nested": `SELECT col:field.sub FROM t`, + "with_cast": `SELECT col:field.sub::string FROM t`, + "bracket": `SELECT col:items[0] FROM t`, + "bracket_then_dot": `SELECT col:items[0].name FROM t`, + "parse_json_chain": `SELECT PARSE_JSON(raw):a::int FROM t`, + "quoted_key": `SELECT col:"weird key" FROM t`, + "in_where": `SELECT id FROM t WHERE payload:status::string = 'active'`, + "multi_segment": `SELECT col:a.b.c.d::int AS x FROM t`, + } + for name, q := range queries { + q := q + t.Run(name, func(t *testing.T) { + if _, err := gosqlx.ParseWithDialect(q, keywords.DialectSnowflake); err != nil { + t.Fatalf("parse failed: %v", err) + } + }) + } +} + +// TestVariantPathASTShape asserts the VariantPath node is produced with the +// expected Root and Segments, and that the trailing :: cast wraps it. +func TestVariantPathASTShape(t *testing.T) { + q := `SELECT col:field.sub[0]::string FROM t` + tree, err := gosqlx.ParseWithDialect(q, keywords.DialectSnowflake) + if err != nil { + t.Fatalf("parse failed: %v", err) + } + var vp *ast.VariantPath + var cast *ast.CastExpression + var visit func(n ast.Node) + visit = func(n ast.Node) { + if n == nil { + return + } + switch x := n.(type) { + case *ast.VariantPath: + if vp == nil { + vp = x + } + case *ast.CastExpression: + if cast == nil { + cast = x + } + } + for _, c := range n.Children() { + visit(c) + } + } + for _, s := range tree.Statements { + visit(s) + } + if vp == nil { + t.Fatal("VariantPath not found") + } + if vp.Root == nil { + t.Fatal("VariantPath.Root nil") + } + if len(vp.Segments) != 3 { + t.Fatalf("Segments: want 3 (field, sub, [0]), got %d", len(vp.Segments)) + } + if vp.Segments[0].Name != "field" { + t.Fatalf("Segments[0].Name: want %q, got %q", "field", vp.Segments[0].Name) + } + if vp.Segments[1].Name != "sub" { + t.Fatalf("Segments[1].Name: want %q, got %q", "sub", vp.Segments[1].Name) + } + if vp.Segments[2].Index == nil { + t.Fatal("Segments[2].Index (bracket subscript) missing") + } + if cast == nil || cast.Type != "string" { + t.Fatalf("Cast: want CastExpression with Type=string, got %+v", cast) + } +}