Skip to content

Commit 36c33d9

Browse files
committed
Add scope graph + bidirectional type checking for SQL analysis
Implement a new SQL analysis system based on two concepts from programming language theory: 1. Scope graphs (internal/analysis/scope/): Model SQL name resolution as path-finding in a labeled graph. Each scope contains declarations (columns, tables, aliases) connected by edges (PARENT, ALIAS, LATERAL, OUTER). This handles joins, subqueries, CTEs, and aliases compositionally — the resolution algorithm doesn't change, only the graph structure does. 2. Bidirectional type checking (internal/analysis/typecheck/): Type information flows in two directions — synthesis (bottom-up: "what type does this expression have?") and checking (top-down: "does this match the expected type?"). This naturally handles parameter type inference: when $1 appears in `WHERE age > $1`, checking mode infers $1's type from the column's type. 3. SQL analyzer (internal/analysis/sqlanalyze/): Combines both systems by walking the sqlc AST, building scope graphs from FROM/JOIN/CTE clauses, and running bidirectional type checking on expressions. Supports SELECT, INSERT, UPDATE, DELETE with parameter inference, output column resolution, and engine-specific operator rules for PostgreSQL and MySQL. https://claude.ai/code/session_01VFJemaXKRZ2NfxYkpwXSbD
1 parent 4bf2159 commit 36c33d9

File tree

9 files changed

+3456
-0
lines changed

9 files changed

+3456
-0
lines changed

internal/analysis/scope/resolve.go

Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
package scope
2+
3+
import "fmt"
4+
5+
// ResolutionError describes why name resolution failed with full provenance.
6+
type ResolutionError struct {
7+
Name string
8+
Qualifier string // Table/alias qualifier, if any
9+
Kind ResolutionErrorKind
10+
Scope *Scope // The scope where resolution was attempted
11+
Candidates []string // For ambiguity errors, the competing names
12+
Location int // Source position of the reference
13+
}
14+
15+
type ResolutionErrorKind int
16+
17+
const (
18+
ErrNotFound ResolutionErrorKind = iota
19+
ErrAmbiguous
20+
ErrQualifierNotFound // e.g., "u.name" but "u" doesn't exist
21+
)
22+
23+
func (e *ResolutionError) Error() string {
24+
switch e.Kind {
25+
case ErrNotFound:
26+
if e.Qualifier != "" {
27+
return fmt.Sprintf("column %q not found in %q", e.Name, e.Qualifier)
28+
}
29+
return fmt.Sprintf("column %q does not exist", e.Name)
30+
case ErrAmbiguous:
31+
return fmt.Sprintf("column reference %q is ambiguous", e.Name)
32+
case ErrQualifierNotFound:
33+
return fmt.Sprintf("table or alias %q does not exist", e.Qualifier)
34+
default:
35+
return fmt.Sprintf("resolution error for %q", e.Name)
36+
}
37+
}
38+
39+
// ResolutionPath records the edges traversed during successful resolution.
40+
// This is the provenance — it tells you exactly how a name was resolved.
41+
type ResolutionPath struct {
42+
Steps []ResolutionStep
43+
}
44+
45+
type ResolutionStep struct {
46+
Edge *Edge // nil for the final lookup step
47+
Scope *Scope // The scope where this step occurred
48+
}
49+
50+
// ResolvedName is the result of successful name resolution.
51+
type ResolvedName struct {
52+
Declaration *Declaration
53+
Path ResolutionPath
54+
}
55+
56+
// Resolve looks up an unqualified column name in this scope.
57+
// It searches local declarations first, then follows parent edges.
58+
// Returns an error if the name is not found or is ambiguous.
59+
func (s *Scope) Resolve(name string) (*ResolvedName, error) {
60+
return s.resolve(name, nil, 0)
61+
}
62+
63+
// ResolveQualified looks up a qualified name like "u.name".
64+
// First resolves the qualifier (table/alias), then looks up the column
65+
// in that table's scope.
66+
func (s *Scope) ResolveQualified(qualifier, name string) (*ResolvedName, error) {
67+
// First, find the qualifier (table name or alias)
68+
qualScope, err := s.resolveQualifier(qualifier, 0)
69+
if err != nil {
70+
return nil, &ResolutionError{
71+
Name: name,
72+
Qualifier: qualifier,
73+
Kind: ErrQualifierNotFound,
74+
Scope: s,
75+
}
76+
}
77+
78+
// Then resolve the column within that scope
79+
var matches []*Declaration
80+
for _, d := range qualScope.Declarations {
81+
if d.Name == name {
82+
matches = append(matches, d)
83+
}
84+
}
85+
86+
if len(matches) == 0 {
87+
return nil, &ResolutionError{
88+
Name: name,
89+
Qualifier: qualifier,
90+
Kind: ErrNotFound,
91+
Scope: qualScope,
92+
}
93+
}
94+
if len(matches) > 1 {
95+
return nil, &ResolutionError{
96+
Name: name,
97+
Qualifier: qualifier,
98+
Kind: ErrAmbiguous,
99+
Scope: qualScope,
100+
}
101+
}
102+
103+
return &ResolvedName{
104+
Declaration: matches[0],
105+
Path: ResolutionPath{
106+
Steps: []ResolutionStep{
107+
{Scope: s},
108+
{Scope: qualScope},
109+
},
110+
},
111+
}, nil
112+
}
113+
114+
const maxResolutionDepth = 20
115+
116+
// resolve performs recursive name resolution with cycle detection via depth limit.
117+
func (s *Scope) resolve(name string, visited map[*Scope]bool, depth int) (*ResolvedName, error) {
118+
if depth > maxResolutionDepth {
119+
return nil, &ResolutionError{Name: name, Kind: ErrNotFound, Scope: s}
120+
}
121+
if visited == nil {
122+
visited = make(map[*Scope]bool)
123+
}
124+
if visited[s] {
125+
return nil, &ResolutionError{Name: name, Kind: ErrNotFound, Scope: s}
126+
}
127+
visited[s] = true
128+
129+
// Search local declarations first
130+
var matches []*Declaration
131+
for _, d := range s.Declarations {
132+
if d.Name == name && d.Kind == DeclColumn {
133+
matches = append(matches, d)
134+
}
135+
}
136+
137+
// Also search table/alias declarations to find columns inside their scopes
138+
for _, d := range s.Declarations {
139+
if (d.Kind == DeclTable || d.Kind == DeclAlias || d.Kind == DeclCTE) && d.Scope != nil {
140+
for _, cd := range d.Scope.Declarations {
141+
if cd.Name == name && cd.Kind == DeclColumn {
142+
matches = append(matches, cd)
143+
}
144+
}
145+
}
146+
}
147+
148+
if len(matches) == 1 {
149+
return &ResolvedName{
150+
Declaration: matches[0],
151+
Path: ResolutionPath{
152+
Steps: []ResolutionStep{{Scope: s}},
153+
},
154+
}, nil
155+
}
156+
if len(matches) > 1 {
157+
return nil, &ResolutionError{Name: name, Kind: ErrAmbiguous, Scope: s}
158+
}
159+
160+
// Follow parent, lateral, and outer edges
161+
for _, edge := range s.Edges {
162+
switch edge.Kind {
163+
case EdgeParent, EdgeLateral, EdgeOuter:
164+
result, err := edge.Target.resolve(name, visited, depth+1)
165+
if err == nil {
166+
result.Path.Steps = append([]ResolutionStep{{Edge: edge, Scope: s}}, result.Path.Steps...)
167+
return result, nil
168+
}
169+
// Propagate ambiguity errors — don't swallow them
170+
if resErr, ok := err.(*ResolutionError); ok && resErr.Kind == ErrAmbiguous {
171+
return nil, resErr
172+
}
173+
}
174+
}
175+
176+
return nil, &ResolutionError{Name: name, Kind: ErrNotFound, Scope: s}
177+
}
178+
179+
// resolveQualifier finds the scope associated with a table name or alias.
180+
func (s *Scope) resolveQualifier(qualifier string, depth int) (*Scope, error) {
181+
if depth > maxResolutionDepth {
182+
return nil, fmt.Errorf("qualifier %q not found", qualifier)
183+
}
184+
185+
// Check alias edges first (higher priority)
186+
for _, edge := range s.Edges {
187+
if edge.Kind == EdgeAlias && edge.Label == qualifier {
188+
return edge.Target, nil
189+
}
190+
}
191+
192+
// Check local table/alias declarations
193+
for _, d := range s.Declarations {
194+
if d.Name == qualifier && (d.Kind == DeclTable || d.Kind == DeclAlias || d.Kind == DeclCTE) && d.Scope != nil {
195+
return d.Scope, nil
196+
}
197+
}
198+
199+
// Follow parent edges
200+
for _, edge := range s.Edges {
201+
if edge.Kind == EdgeParent || edge.Kind == EdgeLateral || edge.Kind == EdgeOuter {
202+
result, err := edge.Target.resolveQualifier(qualifier, depth+1)
203+
if err == nil {
204+
return result, nil
205+
}
206+
}
207+
}
208+
209+
return nil, fmt.Errorf("qualifier %q not found", qualifier)
210+
}
211+
212+
// ResolveColumnRef resolves a column reference that may have 1, 2, or 3 parts:
213+
// - ["name"] -> unqualified column
214+
// - ["alias", "name"] -> table-qualified column
215+
// - ["schema", "table", "name"] -> schema-qualified column (treated as qualifier="table")
216+
func (s *Scope) ResolveColumnRef(parts []string) (*ResolvedName, error) {
217+
switch len(parts) {
218+
case 1:
219+
return s.Resolve(parts[0])
220+
case 2:
221+
return s.ResolveQualified(parts[0], parts[1])
222+
case 3:
223+
// For now, ignore schema and use table.column
224+
return s.ResolveQualified(parts[1], parts[2])
225+
default:
226+
return nil, fmt.Errorf("invalid column reference with %d parts", len(parts))
227+
}
228+
}
229+
230+
// AllColumns returns all column declarations visible from this scope,
231+
// optionally filtered by a qualifier. This is used for SELECT * expansion.
232+
func (s *Scope) AllColumns(qualifier string) []*Declaration {
233+
if qualifier != "" {
234+
qualScope, err := s.resolveQualifier(qualifier, 0)
235+
if err != nil {
236+
return nil
237+
}
238+
var cols []*Declaration
239+
for _, d := range qualScope.Declarations {
240+
if d.Kind == DeclColumn {
241+
cols = append(cols, d)
242+
}
243+
}
244+
return cols
245+
}
246+
247+
// Collect from all table/alias declarations in this scope
248+
var cols []*Declaration
249+
seen := make(map[string]bool)
250+
251+
var collect func(sc *Scope, depth int)
252+
collect = func(sc *Scope, depth int) {
253+
if depth > maxResolutionDepth {
254+
return
255+
}
256+
for _, d := range sc.Declarations {
257+
if (d.Kind == DeclTable || d.Kind == DeclAlias || d.Kind == DeclCTE) && d.Scope != nil {
258+
for _, cd := range d.Scope.Declarations {
259+
if cd.Kind == DeclColumn && !seen[d.Name+"."+cd.Name] {
260+
seen[d.Name+"."+cd.Name] = true
261+
cols = append(cols, cd)
262+
}
263+
}
264+
}
265+
}
266+
for _, edge := range sc.Edges {
267+
if edge.Kind == EdgeParent {
268+
collect(edge.Target, depth+1)
269+
}
270+
}
271+
}
272+
collect(s, 0)
273+
return cols
274+
}

0 commit comments

Comments
 (0)