Skip to content

Commit 7cf8e2f

Browse files
authored
feat(parser): Snowflake COPY INTO / PUT / GET / LIST stubs (#483) (#499)
1 parent a37ed1c commit 7cf8e2f

File tree

2 files changed

+102
-0
lines changed

2 files changed

+102
-0
lines changed

pkg/sql/parser/parser.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,15 @@ func (p *Parser) parseStatement() (ast.Statement, error) {
707707
p.advance()
708708
return p.parsePragmaStatement()
709709
}
710+
// Snowflake stage operations may arrive as keyword tokens depending on
711+
// the active keyword table (LIST, COPY, etc. can be registered).
712+
if p.dialect == string(keywords.DialectSnowflake) {
713+
upper := strings.ToUpper(p.currentToken.Token.Value)
714+
switch upper {
715+
case "COPY", "PUT", "GET", "LIST", "REMOVE", "LS":
716+
return p.parseSnowflakeStageStatement(upper)
717+
}
718+
}
710719
case models.TokenTypeIdentifier:
711720
// PRAGMA may be tokenized as IDENTIFIER when no dialect-specific keyword
712721
// set is active (e.g. when using the default PostgreSQL tokenizer dialect).
@@ -721,6 +730,16 @@ func (p *Parser) parseStatement() (ast.Statement, error) {
721730
strings.EqualFold(p.currentToken.Token.Value, "USE") {
722731
return p.parseSnowflakeUseStatement()
723732
}
733+
// Snowflake stage operations: COPY INTO, PUT, GET, LIST, REMOVE.
734+
// All tokenize as identifiers; parse-only stubs that consume the
735+
// rest of the statement body.
736+
if p.dialect == string(keywords.DialectSnowflake) {
737+
upper := strings.ToUpper(p.currentToken.Token.Value)
738+
switch upper {
739+
case "COPY", "PUT", "GET", "LIST", "REMOVE", "LS":
740+
return p.parseSnowflakeStageStatement(upper)
741+
}
742+
}
724743
}
725744
return nil, p.expectedError("statement")
726745
}
@@ -748,6 +767,47 @@ func (p *Parser) parseSnowflakeUseStatement() (ast.Statement, error) {
748767
return stmt, nil
749768
}
750769

770+
// parseSnowflakeStageStatement parses Snowflake stage operations as stubs:
771+
//
772+
// COPY INTO <target> FROM <source> [options]
773+
// PUT file://<path> @<stage>
774+
// GET @<stage> file://<path>
775+
// LIST @<stage> (or LS)
776+
// REMOVE @<stage>/<path>
777+
//
778+
// The statement is consumed token-by-token (tracking balanced parens) until
779+
// ';' or EOF and returned as a DescribeStatement placeholder tagged with the
780+
// operation kind. No AST modeling yet; follow-up work.
781+
func (p *Parser) parseSnowflakeStageStatement(kind string) (ast.Statement, error) {
782+
p.advance() // Consume leading kind token
783+
784+
// COPY INTO: consume the INTO keyword if present.
785+
if kind == "COPY" && p.isType(models.TokenTypeInto) {
786+
p.advance()
787+
}
788+
789+
// Consume the rest of the statement body.
790+
depth := 0
791+
for {
792+
t := p.currentToken.Token.Type
793+
if t == models.TokenTypeEOF {
794+
break
795+
}
796+
if t == models.TokenTypeSemicolon && depth == 0 {
797+
break
798+
}
799+
if t == models.TokenTypeLParen {
800+
depth++
801+
} else if t == models.TokenTypeRParen {
802+
depth--
803+
}
804+
p.advance()
805+
}
806+
stub := ast.GetDescribeStatement()
807+
stub.TableName = kind
808+
return stub, nil
809+
}
810+
751811
// NewParser creates a new parser with optional configuration.
752812
func NewParser(opts ...ParserOption) *Parser {
753813
p := &Parser{}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// Copyright 2026 GoSQLX Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
5+
package parser_test
6+
7+
import (
8+
"testing"
9+
10+
"github.com/ajitpratap0/GoSQLX/pkg/gosqlx"
11+
"github.com/ajitpratap0/GoSQLX/pkg/sql/keywords"
12+
)
13+
14+
// TestSnowflakeStageOps verifies Snowflake stage operations parse as stubs.
15+
// Regression for #483.
16+
func TestSnowflakeStageOps(t *testing.T) {
17+
queries := map[string]string{
18+
"copy_into_table_with_format": `COPY INTO my_table FROM @my_stage FILE_FORMAT = (TYPE = CSV)`,
19+
20+
"copy_into_named_format": `COPY INTO my_table FROM @my_stage/file.csv FILE_FORMAT = (FORMAT_NAME = my_csv) ON_ERROR = CONTINUE`,
21+
22+
"copy_into_stage_from_table": `COPY INTO @my_stage FROM my_table FILE_FORMAT = (TYPE = PARQUET)`,
23+
24+
"put_to_stage": `PUT file:///tmp/data.csv @my_stage`,
25+
26+
"get_from_stage": `GET @my_stage file:///tmp/output/`,
27+
28+
"list_stage": `LIST @my_stage`,
29+
30+
"remove_from_stage": `REMOVE @my_stage/old_files`,
31+
32+
"ls_alias": `LS @my_stage`,
33+
}
34+
for name, q := range queries {
35+
q := q
36+
t.Run(name, func(t *testing.T) {
37+
if _, err := gosqlx.ParseWithDialect(q, keywords.DialectSnowflake); err != nil {
38+
t.Fatalf("parse failed: %v", err)
39+
}
40+
})
41+
}
42+
}

0 commit comments

Comments
 (0)