Skip to content

Commit 8f449f0

Browse files
ajitpratap0Ajit Pratap Singhclaude
authored
fix(ast): replace DescribeStatement stubs with UnsupportedStatement, fix extraction gaps (#511)
Addresses multiple issues from the v1.14.0 comprehensive project review: P0 — Critical: - Add UnsupportedStatement AST node with Kind and RawSQL fields to replace DescribeStatement misuse for Snowflake stubs (USE, COPY INTO, PUT, GET, LIST, REMOVE, CREATE STAGE/STREAM/TASK/PIPE/etc.) - Add EXTRACT(field FROM source) parser support (was missing entirely) - Fix all 7 extraction gap tests (CASE, CAST, IN, BETWEEN, EXTRACT, recursive CTEs) — previously t.Skip() stubs, now passing P1 — High: - Add AST.HasUnsupportedStatements() for stub detection - Formatter emits "-- UNSUPPORTED: ..." comment for unmodeled statements instead of producing corrupt SQL - Remove stale "CREATE TABLE not implemented" comment from coverage tests - Add TODO(v2-cleanup) markers to 5 overlapping coverage test files P2 — Medium: - Reconcile Validate() empty-input behavior (parser.ValidateBytes now rejects empty input, matching gosqlx.Validate) - Fix ParseBytes string→byte→string round-trip (now threads []byte directly to tokenizer) - Deprecate pkg/sql/monitor in favor of pkg/metrics (v2.0 removal) - Add v2.0 removal timeline to 3 deprecated parser APIs All tests pass with -race across the full project (20 files, +361/-103). Co-authored-by: Ajit Pratap Singh <ajitpratapsingh@Ajits-Mac-mini-2655.local> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 3f7f2d8 commit 8f449f0

20 files changed

+361
-103
lines changed

pkg/formatter/render.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ func FormatStatement(s ast.Statement, opts ast.FormatOptions) string {
164164
return renderShow(v, opts)
165165
case *ast.DescribeStatement:
166166
return renderDescribe(v, opts)
167+
case *ast.UnsupportedStatement:
168+
return renderUnsupported(v, opts)
167169
default:
168170
// Fallback to SQL() for unrecognized statement types
169171
return stmtSQL(s)

pkg/formatter/render_ddl.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,16 @@ func renderDescribe(s *ast.DescribeStatement, opts ast.FormatOptions) string {
119119
return sb.String()
120120
}
121121

122+
// renderUnsupported renders an UnsupportedStatement as a SQL comment
123+
// preserving the original SQL fragment. This prevents silently producing
124+
// corrupt SQL for statement types the formatter cannot handle.
125+
func renderUnsupported(s *ast.UnsupportedStatement, _ ast.FormatOptions) string {
126+
if s.RawSQL != "" {
127+
return "-- UNSUPPORTED: " + s.RawSQL
128+
}
129+
return "-- UNSUPPORTED: " + s.Kind
130+
}
131+
122132
// writeSequenceOptionsFormatted appends formatted sequence options to the builder.
123133
// It mirrors the logic in ast/sql.go writeSequenceOptions but uses the nodeFormatter
124134
// for keyword casing.

pkg/gosqlx/extract.go

Lines changed: 5 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -43,68 +43,12 @@
4343
//
4444
// For large ASTs (1000+ nodes), expect extraction times <100μs on modern hardware.
4545
//
46-
// # Parser Limitations
46+
// # Supported Expression Types
4747
//
48-
// The extraction functions in this package are subject to the following parser limitations.
49-
// These limitations represent SQL features that are partially supported or not yet fully
50-
// implemented in the GoSQLX parser. As the parser evolves, these limitations may be
51-
// addressed in future releases.
52-
//
53-
// ## Known Limitations
54-
//
55-
// 1. CASE Expressions:
56-
// CASE expressions (simple and searched CASE) are not fully supported in the parser.
57-
// Column references within CASE WHEN conditions and result expressions may not be
58-
// extracted correctly.
59-
//
60-
// Example (not fully supported):
61-
// SELECT CASE status WHEN 'active' THEN name ELSE 'N/A' END FROM users
62-
//
63-
// 2. CAST Expressions:
64-
// CAST expressions for type conversion are not fully supported. Column references
65-
// within CAST expressions may not be extracted.
66-
//
67-
// Example (not fully supported):
68-
// SELECT CAST(price AS DECIMAL(10,2)) FROM products
69-
//
70-
// 3. IN Expressions:
71-
// IN expressions with subqueries or complex value lists in WHERE clauses are not
72-
// fully supported. Column references in IN lists may not be extracted correctly.
73-
//
74-
// Example (not fully supported):
75-
// SELECT * FROM users WHERE status IN ('active', 'pending')
76-
// SELECT * FROM orders WHERE user_id IN (SELECT id FROM users)
77-
//
78-
// 4. BETWEEN Expressions:
79-
// BETWEEN expressions for range comparisons are not fully supported. Column references
80-
// in BETWEEN bounds may not be extracted correctly.
81-
//
82-
// Example (not fully supported):
83-
// SELECT * FROM products WHERE price BETWEEN min_price AND max_price
84-
//
85-
// 5. Complex Recursive CTEs:
86-
// Recursive Common Table Expressions (CTEs) with complex JOIN syntax are not fully
87-
// supported. Simple recursive CTEs work, but complex variations may fail to parse.
88-
//
89-
// Example (not fully supported):
90-
// WITH RECURSIVE org_chart AS (
91-
// SELECT id, name, manager_id, 1 as level FROM employees WHERE manager_id IS NULL
92-
// UNION ALL
93-
// SELECT e.id, e.name, e.manager_id, o.level + 1
94-
// FROM employees e
95-
// INNER JOIN org_chart o ON e.manager_id = o.id
96-
// )
97-
// SELECT * FROM org_chart
98-
//
99-
// ## Workarounds
100-
//
101-
// For queries using these unsupported features:
102-
// - Simplify complex expressions where possible
103-
// - Use alternative SQL syntax that is supported
104-
// - Extract metadata manually from the original SQL string
105-
// - Consider contributing parser enhancements to the GoSQLX project
106-
//
107-
// ## Reporting Issues
48+
// The extraction functions correctly traverse all standard expression types
49+
// including: CASE (simple and searched), CAST, IN, BETWEEN, EXTRACT,
50+
// SUBSTRING, POSITION, subqueries, recursive CTEs with JOINs, window
51+
// functions, and all arithmetic/logical operators.
10852
//
10953
// If you encounter parsing issues with SQL queries that should be supported,
11054
// please report them at: https://github.com/ajitpratap0/GoSQLX/issues

pkg/gosqlx/extract_test.go

Lines changed: 85 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,27 @@ func TestExtractTables_WithCTE(t *testing.T) {
116116
}
117117

118118
func TestExtractTables_WithRecursiveCTE(t *testing.T) {
119-
// Skipping - Recursive CTE with complex JOIN syntax not fully supported yet
120-
t.Skip("Recursive CTE with complex syntax not fully supported")
119+
sql := `WITH RECURSIVE org_chart AS (
120+
SELECT id, name, manager_id, 1 as level FROM employees WHERE manager_id IS NULL
121+
UNION ALL
122+
SELECT e.id, e.name, e.manager_id, o.level + 1
123+
FROM employees e
124+
INNER JOIN org_chart o ON e.manager_id = o.id
125+
)
126+
SELECT * FROM org_chart`
127+
128+
astNode, err := Parse(sql)
129+
if err != nil {
130+
t.Fatalf("Failed to parse recursive CTE: %v", err)
131+
}
132+
133+
tables := ExtractTables(astNode)
134+
if !contains(tables, "employees") {
135+
t.Errorf("Expected to find 'employees' table, got: %v", tables)
136+
}
137+
if !contains(tables, "org_chart") {
138+
t.Errorf("Expected to find 'org_chart' CTE reference, got: %v", tables)
139+
}
121140
}
122141

123142
func TestExtractTables_Insert(t *testing.T) {
@@ -647,23 +666,58 @@ func TestExtractMetadata_EmptyQuery(t *testing.T) {
647666
}
648667

649668
func TestExtractColumns_WithCaseExpression(t *testing.T) {
650-
// Skipping - CASE expressions not fully supported in parser yet
651-
t.Skip("CASE expressions not fully supported in parser yet")
669+
sql := "SELECT CASE status WHEN 'active' THEN name ELSE 'N/A' END FROM users"
670+
astNode, err := Parse(sql)
671+
if err != nil {
672+
t.Fatalf("Failed to parse SQL: %v", err)
673+
}
674+
675+
cols := ExtractColumns(astNode)
676+
if !contains(cols, "status") {
677+
t.Errorf("Expected 'status' in columns, got: %v", cols)
678+
}
679+
if !contains(cols, "name") {
680+
t.Errorf("Expected 'name' in columns, got: %v", cols)
681+
}
652682
}
653683

654684
func TestExtractColumns_WithInExpression(t *testing.T) {
655-
// Skipping - IN expressions in WHERE clause not fully supported yet
656-
t.Skip("IN expressions in WHERE clause not fully supported yet")
685+
sql := "SELECT * FROM users WHERE status IN ('active', 'pending')"
686+
astNode, err := Parse(sql)
687+
if err != nil {
688+
t.Fatalf("Failed to parse SQL: %v", err)
689+
}
690+
691+
cols := ExtractColumns(astNode)
692+
if !contains(cols, "status") {
693+
t.Errorf("Expected 'status' in columns, got: %v", cols)
694+
}
657695
}
658696

659697
func TestExtractColumns_WithBetweenExpression(t *testing.T) {
660-
// Skipping - BETWEEN expressions not fully supported yet
661-
t.Skip("BETWEEN expressions not fully supported yet")
698+
sql := "SELECT * FROM products WHERE price BETWEEN 10 AND 100"
699+
astNode, err := Parse(sql)
700+
if err != nil {
701+
t.Fatalf("Failed to parse SQL: %v", err)
702+
}
703+
704+
cols := ExtractColumns(astNode)
705+
if !contains(cols, "price") {
706+
t.Errorf("Expected 'price' in columns, got: %v", cols)
707+
}
662708
}
663709

664710
func TestExtractFunctions_InCaseExpression(t *testing.T) {
665-
// Skipping - CASE expressions not fully supported yet
666-
t.Skip("CASE expressions not fully supported yet")
711+
sql := "SELECT CASE WHEN COUNT(*) > 0 THEN 'yes' ELSE 'no' END FROM users"
712+
astNode, err := Parse(sql)
713+
if err != nil {
714+
t.Fatalf("Failed to parse SQL: %v", err)
715+
}
716+
717+
funcs := ExtractFunctions(astNode)
718+
if !contains(funcs, "COUNT") {
719+
t.Errorf("Expected 'COUNT' in functions, got: %v", funcs)
720+
}
667721
}
668722

669723
func TestExtractTables_WithSetOperations(t *testing.T) {
@@ -854,11 +908,27 @@ func TestExtractFunctions_NoFunctions(t *testing.T) {
854908
}
855909

856910
func TestExtractColumns_WithCastExpression(t *testing.T) {
857-
// Skipping - CAST expressions not fully supported yet
858-
t.Skip("CAST expressions not fully supported yet")
911+
sql := "SELECT CAST(price AS DECIMAL) FROM products"
912+
astNode, err := Parse(sql)
913+
if err != nil {
914+
t.Fatalf("Failed to parse SQL: %v", err)
915+
}
916+
917+
cols := ExtractColumns(astNode)
918+
if !contains(cols, "price") {
919+
t.Errorf("Expected 'price' in columns, got: %v", cols)
920+
}
859921
}
860922

861-
func TestExtractFunctions_ExtractExpression(t *testing.T) {
862-
// Skipping - EXTRACT expressions not fully supported yet
863-
t.Skip("EXTRACT expressions not fully supported yet")
923+
func TestExtractColumns_WithExtractExpression(t *testing.T) {
924+
sql := "SELECT EXTRACT(YEAR FROM created_at) FROM orders"
925+
astNode, err := Parse(sql)
926+
if err != nil {
927+
t.Fatalf("Failed to parse SQL: %v", err)
928+
}
929+
930+
cols := ExtractColumns(astNode)
931+
if !contains(cols, "created_at") {
932+
t.Errorf("Expected 'created_at' in columns, got: %v", cols)
933+
}
864934
}

pkg/gosqlx/gosqlx.go

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -256,15 +256,32 @@ func Validate(sql string) error {
256256

257257
// ParseBytes is like Parse but accepts a byte slice.
258258
//
259-
// This is useful when you already have SQL as bytes (e.g., from file I/O)
260-
// and want to avoid the string → []byte conversion overhead.
259+
// This avoids the string-to-byte conversion that Parse performs internally,
260+
// making it more efficient when you already have SQL as bytes (e.g., from
261+
// file I/O or network reads).
261262
//
262263
// Example:
263264
//
264265
// sqlBytes := []byte("SELECT * FROM users")
265266
// astNode, err := gosqlx.ParseBytes(sqlBytes)
266267
func ParseBytes(sql []byte) (*ast.AST, error) {
267-
return Parse(string(sql))
268+
tkz := tokenizer.GetTokenizer()
269+
defer tokenizer.PutTokenizer(tkz)
270+
271+
tokens, err := tkz.Tokenize(sql)
272+
if err != nil {
273+
return nil, fmt.Errorf("tokenization failed: %w", err)
274+
}
275+
276+
p := parser.GetParser()
277+
defer parser.PutParser(p)
278+
279+
astNode, err := p.ParseFromModelTokens(tokens)
280+
if err != nil {
281+
return nil, fmt.Errorf("parsing failed: %w", err)
282+
}
283+
284+
return astNode, nil
268285
}
269286

270287
// MustParse is like Parse but panics on error.

pkg/sql/ast/ast.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1892,6 +1892,18 @@ func (a AST) Children() []Node {
18921892
return children
18931893
}
18941894

1895+
// HasUnsupportedStatements returns true if the AST contains any
1896+
// UnsupportedStatement nodes — statements the parser consumed but
1897+
// could not fully model.
1898+
func (a AST) HasUnsupportedStatements() bool {
1899+
for _, stmt := range a.Statements {
1900+
if _, ok := stmt.(*UnsupportedStatement); ok {
1901+
return true
1902+
}
1903+
}
1904+
return false
1905+
}
1906+
18951907
// PragmaStatement represents a SQLite PRAGMA statement.
18961908
// Examples: PRAGMA table_info(users), PRAGMA journal_mode = WAL, PRAGMA integrity_check
18971909
type PragmaStatement struct {
@@ -1924,6 +1936,23 @@ func (d *DescribeStatement) statementNode() {}
19241936
func (d DescribeStatement) TokenLiteral() string { return "DESCRIBE" }
19251937
func (d DescribeStatement) Children() []Node { return nil }
19261938

1939+
// UnsupportedStatement represents a SQL statement that was parsed but not
1940+
// fully modeled in the AST. The parser consumed and validated the tokens
1941+
// but no dedicated AST node exists yet for this statement kind.
1942+
//
1943+
// Consumers should use Kind to identify the operation (e.g., "USE", "COPY",
1944+
// "CREATE STAGE") and RawSQL for the original text. Tools that do
1945+
// switch stmt.(type) should handle this case explicitly rather than
1946+
// falling through to a default that assumes the statement is well-structured.
1947+
type UnsupportedStatement struct {
1948+
Kind string // Operation kind: "USE", "COPY", "PUT", "GET", "LIST", "REMOVE", "CREATE STAGE", etc.
1949+
RawSQL string // Original SQL fragment for round-trip fidelity
1950+
}
1951+
1952+
func (u *UnsupportedStatement) statementNode() {}
1953+
func (u UnsupportedStatement) TokenLiteral() string { return u.Kind }
1954+
func (u UnsupportedStatement) Children() []Node { return nil }
1955+
19271956
// ReplaceStatement represents MySQL REPLACE INTO statement
19281957
type ReplaceStatement struct {
19291958
TableName string

pkg/sql/ast/pool.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,12 @@ var (
127127
},
128128
}
129129

130+
unsupportedStmtPool = sync.Pool{
131+
New: func() interface{} {
132+
return &UnsupportedStatement{}
133+
},
134+
}
135+
130136
replaceStmtPool = sync.Pool{
131137
New: func() interface{} {
132138
return &ReplaceStatement{
@@ -535,6 +541,8 @@ func releaseStatement(stmt Statement) {
535541
PutShowStatement(s)
536542
case *DescribeStatement:
537543
PutDescribeStatement(s)
544+
case *UnsupportedStatement:
545+
PutUnsupportedStatement(s)
538546
case *ReplaceStatement:
539547
PutReplaceStatement(s)
540548
case *AlterStatement:
@@ -1756,6 +1764,23 @@ func PutDescribeStatement(stmt *DescribeStatement) {
17561764
describeStmtPool.Put(stmt)
17571765
}
17581766

1767+
// GetUnsupportedStatement gets an UnsupportedStatement from the pool.
1768+
func GetUnsupportedStatement() *UnsupportedStatement {
1769+
return unsupportedStmtPool.Get().(*UnsupportedStatement)
1770+
}
1771+
1772+
// PutUnsupportedStatement returns an UnsupportedStatement to the pool.
1773+
func PutUnsupportedStatement(stmt *UnsupportedStatement) {
1774+
if stmt == nil {
1775+
return
1776+
}
1777+
1778+
stmt.Kind = ""
1779+
stmt.RawSQL = ""
1780+
1781+
unsupportedStmtPool.Put(stmt)
1782+
}
1783+
17591784
// GetReplaceStatement gets a ReplaceStatement from the pool.
17601785
func GetReplaceStatement() *ReplaceStatement {
17611786
stmt := replaceStmtPool.Get().(*ReplaceStatement)

0 commit comments

Comments
 (0)