Skip to content

Commit 3937e21

Browse files
committed
fix: attribution fixes from production corpuses (up to 94% success rate)
Heavily done with use of Claude Code, to reduce the testing times > Root causes fixed: > > - Walker node-type gaps: rewriteRefs, walkFixups, walkCast each > missed CoalesceExpr / MinMaxExpr / AArrayExpr / RowExpr / NullTest / > BooleanTest, plus InsertStmt.WithClause in every walker. Refs and > EXTRACT fields buried in COALESCE survived reshape; CTEs on INSERT > were invisible to the whole chain. > - Transaction cascade: a mid-batch error in attributeCluster's > BEGIN; …; COMMIT; left the connection aborted and poisoned every > subsequent cluster with 25P02. ROLLBACK on error. > - Confidence label: unqualified column on a scan node was labeled > heuristic, but PG's plan structure pins it to that relation > exactly. Only mismatched qualifiers are heuristic now. > - Function Scan: use Alias as table when Relation Name is absent > (pg_catalog.pg_settings et al). > - Capture: skip session/meta statements so qshape's own probes > don't re-enter the corpus. > > Typecast additions from real corpus errors: BoolExpr→bool, > json_build_object keys→text, EXISTS SELECT→int4, JOIN ON / WHERE / > HAVING→bool, searched CASE WHEN→bool.
1 parent 2109b4b commit 3937e21

7 files changed

Lines changed: 1456 additions & 17 deletions

File tree

cmd/qshape/attribute.go

Lines changed: 132 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ func runAttribute(inPath, connStr string, top int) error {
100100
}
101101
defer conn.Close(ctx)
102102

103+
cache := newTypecastCache(conn)
103104
attributed, skipped := 0, 0
104105
for i := range doc.Clusters {
105106
if top > 0 && i >= top {
@@ -109,7 +110,7 @@ func runAttribute(inPath, connStr string, top int) error {
109110
if c.Fingerprint == "" || c.Canonical == "" {
110111
continue
111112
}
112-
params, err := attributeCluster(ctx, conn, c.Canonical)
113+
params, err := attributeCluster(ctx, conn, cache, c.Canonical)
113114
if err != nil {
114115
skipped++
115116
c.Params = []qshape.ParamAttribution{{Confidence: "none", Note: err.Error()}}
@@ -129,10 +130,45 @@ func runAttribute(inPath, connStr string, top int) error {
129130
return enc.Encode(doc)
130131
}
131132

132-
func attributeCluster(ctx context.Context, conn *pgx.Conn, canonical string) ([]qshape.ParamAttribution, error) {
133-
var planJSON []byte
134-
row := conn.QueryRow(ctx, "EXPLAIN (GENERIC_PLAN, FORMAT JSON) "+canonical)
135-
if err := row.Scan(&planJSON); err != nil {
133+
func attributeCluster(ctx context.Context, conn *pgx.Conn, cache *typecastCache, canonical string) ([]qshape.ParamAttribution, error) {
134+
// Re-normalise so clusters.json written by an older qshape version picks
135+
// up current reshape fixes (extract-field recovery, param renumbering).
136+
// Fall back to the stored form if parsing fails
137+
if renormed, err := qshape.Normalize(canonical); err == nil {
138+
canonical = renormed
139+
}
140+
explainSQL := castFuncParamRefs(ctx, cache, canonical)
141+
// PREPARE + EXPLAIN EXECUTE so Postgres sets up a parameter context
142+
// for the $N placeholders. Works on any PG version (GENERIC_PLAN alone
143+
// requires 16+, and the simple-query parser rejects bare $N otherwise).
144+
// Types come from the typecast pass we just ran; NULL values satisfy
145+
// EXECUTE's arity requirement without affecting the plan
146+
nparams := maxParamNumber(explainSQL)
147+
nulls := "NULL"
148+
for i := 1; i < nparams; i++ {
149+
nulls += ", NULL"
150+
}
151+
// force_generic_plan keeps $N in the plan output instead of inlining
152+
// the NULL arguments. Without it Postgres produces a custom plan with
153+
// `WHERE col = NULL` filters that walkPlan can't attribute
154+
script := "SET LOCAL plan_cache_mode = force_generic_plan;\n"
155+
script += "PREPARE _qshape_tmp AS " + explainSQL + ";\n"
156+
if nparams > 0 {
157+
script += "EXPLAIN (FORMAT JSON) EXECUTE _qshape_tmp(" + nulls + ");\n"
158+
} else {
159+
script += "EXPLAIN (FORMAT JSON) EXECUTE _qshape_tmp;\n"
160+
}
161+
script += "DEALLOCATE _qshape_tmp;"
162+
// SET LOCAL only applies inside a transaction — wrap the whole script
163+
script = "BEGIN;\n" + script + "\nCOMMIT;"
164+
planJSON, err := readPlanJSON(ctx, conn, script)
165+
if err != nil {
166+
// A mid-batch error aborts the BEGIN'd transaction and skips the
167+
// trailing COMMIT in the same simple-query batch, so the connection
168+
// stays in aborted state and every following cluster fails with
169+
// 25P02. ROLLBACK resets it before the next call.
170+
_, _ = conn.Exec(ctx, "ROLLBACK")
171+
_, _ = conn.Exec(ctx, "DEALLOCATE IF EXISTS _qshape_tmp")
136172
return nil, err
137173
}
138174

@@ -157,6 +193,73 @@ func attributeCluster(ctx context.Context, conn *pgx.Conn, canonical string) ([]
157193
return out, nil
158194
}
159195

196+
// readPlanJSON runs a multi-statement script (PREPARE; EXPLAIN; DEALLOCATE)
197+
// through simple-query protocol and returns the first text cell of the
198+
// first result-bearing statement. pgx's Query path sanitises $N against
199+
// bound args; here we send the script verbatim
200+
func readPlanJSON(ctx context.Context, conn *pgx.Conn, script string) ([]byte, error) {
201+
mrr := conn.PgConn().Exec(ctx, script)
202+
defer mrr.Close()
203+
var out []byte
204+
for mrr.NextResult() {
205+
rr := mrr.ResultReader()
206+
for rr.NextRow() {
207+
vals := rr.Values()
208+
if len(vals) > 0 && out == nil {
209+
out = append([]byte(nil), vals[0]...)
210+
}
211+
}
212+
if _, err := rr.Close(); err != nil {
213+
return nil, err
214+
}
215+
}
216+
if err := mrr.Close(); err != nil {
217+
return nil, err
218+
}
219+
if out == nil {
220+
return nil, fmt.Errorf("no rows returned")
221+
}
222+
return out, nil
223+
}
224+
225+
// maxParamNumber scans sql for `$N` tokens and returns the highest N seen,
226+
// or 0 if none. Skips $-tags inside string and dollar-quoted contexts
227+
func maxParamNumber(sql string) int {
228+
max := 0
229+
for i := 0; i < len(sql); i++ {
230+
c := sql[i]
231+
switch c {
232+
case '\'':
233+
// skip to matching quote, honoring doubled ''
234+
i++
235+
for i < len(sql) {
236+
if sql[i] == '\'' {
237+
if i+1 < len(sql) && sql[i+1] == '\'' {
238+
i += 2
239+
continue
240+
}
241+
break
242+
}
243+
i++
244+
}
245+
case '$':
246+
j := i + 1
247+
n := 0
248+
for j < len(sql) && sql[j] >= '0' && sql[j] <= '9' {
249+
n = n*10 + int(sql[j]-'0')
250+
j++
251+
}
252+
if j > i+1 {
253+
if n > max {
254+
max = n
255+
}
256+
i = j - 1
257+
}
258+
}
259+
}
260+
return max
261+
}
262+
160263
func walkPlan(raw json.RawMessage, parentSchema, parentTable string, ctx *attrCtx) {
161264
if len(raw) == 0 {
162265
return
@@ -166,21 +269,32 @@ func walkPlan(raw json.RawMessage, parentSchema, parentTable string, ctx *attrCt
166269
return
167270
}
168271

169-
// Track alias → table mapping so we can resolve `u.id = $1` to users.id
272+
// Track alias → table mapping so we can resolve `u.id = $1` to users.id.
273+
// Function Scan on a system view like pg_catalog.pg_settings leaves
274+
// RelationName empty but still sets Alias; use Alias as the table name
275+
// so conds like `(name = $1)` attribute to pg_settings.name.
170276
aliasToTable := map[string]tableRef{}
277+
fallbackTable := n.RelationName
278+
fallbackSchema := n.Schema
171279
if n.RelationName != "" {
172280
t := tableRef{Schema: n.Schema, Table: n.RelationName}
173281
aliasToTable[n.RelationName] = t
174282
if n.Alias != "" && n.Alias != n.RelationName {
175283
aliasToTable[n.Alias] = t
176284
}
285+
} else if n.Alias != "" {
286+
// Function Scan / Values Scan: no Relation Name but Alias names the
287+
// logical target (e.g. pg_settings). Attribute to the alias.
288+
t := tableRef{Schema: n.Schema, Table: n.Alias}
289+
aliasToTable[n.Alias] = t
290+
fallbackTable = n.Alias
177291
}
178292

179293
for _, cond := range []string{n.IndexCond, n.HashCond, n.Filter, n.RecheckCond, n.JoinFilter, n.MergeCond} {
180294
if cond == "" {
181295
continue
182296
}
183-
attributeCond(cond, aliasToTable, n.Schema, n.RelationName, ctx)
297+
attributeCond(cond, aliasToTable, fallbackSchema, fallbackTable, ctx)
184298
}
185299

186300
if len(n.Plans) > 0 {
@@ -223,10 +337,19 @@ func recordParam(aliasOrTable, col, posStr string, aliases map[string]tableRef,
223337
ref, ok := aliases[aliasOrTable]
224338
confidence := "exact"
225339
if !ok {
226-
// Bare column without an alias — attribute to the current relation.
227340
if fallbackTable != "" {
228341
ref = tableRef{Schema: fallbackSchema, Table: fallbackTable}
229-
confidence = "heuristic"
342+
// An unqualified column in plan text (e.g. `Filter: (id = $1)`
343+
// on an Index Scan over `session`) is PG telling us exactly
344+
// which scan node the column belongs to — not a guess. Only
345+
// downgrade to heuristic when we saw a qualifier that didn't
346+
// resolve (stale alias, schema-qualified name we didn't
347+
// track, or a subplan reference).
348+
if aliasOrTable == "" {
349+
confidence = "exact"
350+
} else {
351+
confidence = "heuristic"
352+
}
230353
} else {
231354
confidence = "none"
232355
}

cmd/qshape/attribute_test.go

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package main
22

33
import (
4+
"encoding/json"
45
"testing"
56

67
"github.com/boringsql/qshape"
@@ -25,20 +26,42 @@ func TestAttributeCondAliasedEqual(t *testing.T) {
2526
}
2627
}
2728

28-
func TestAttributeCondUnaliasedFallback(t *testing.T) {
29+
// PG emits bare column names in plan text when the scan is unambiguous
30+
// (e.g. Filter on a single-table Index Scan). The plan node pins the
31+
// column to its relation — that's exact attribution, not a guess.
32+
func TestAttributeCondUnqualifiedOnScanIsExact(t *testing.T) {
2933
ctx := &attrCtx{byPosition: map[int]*qshape.ParamAttribution{}}
30-
aliases := map[string]tableRef{}
34+
aliases := map[string]tableRef{
35+
"session": {Schema: "auth", Table: "session"},
36+
}
3137
attributeCond("(id = $1)", aliases, "auth", "session", ctx)
3238

3339
a, ok := ctx.byPosition[1]
3440
if !ok {
3541
t.Fatal("expected param 1 attributed")
3642
}
37-
if a.Table != "session" || a.Column != "id" {
38-
t.Errorf("wrong fallback attribution: %+v", a)
43+
if a.Table != "session" || a.Column != "id" || a.Schema != "auth" {
44+
t.Errorf("wrong attribution: %+v", a)
45+
}
46+
if a.Confidence != "exact" {
47+
t.Errorf("expected exact confidence for unqualified col on a scan node, got %s", a.Confidence)
48+
}
49+
}
50+
51+
// A qualifier that doesn't resolve (outer-scope ref, schema-qualified name,
52+
// subplan name) — attribute to the current relation as a best guess and
53+
// flag it as heuristic.
54+
func TestAttributeCondMismatchedQualifierIsHeuristic(t *testing.T) {
55+
ctx := &attrCtx{byPosition: map[int]*qshape.ParamAttribution{}}
56+
aliases := map[string]tableRef{}
57+
attributeCond("(outer_alias.id = $1)", aliases, "auth", "session", ctx)
58+
59+
a, ok := ctx.byPosition[1]
60+
if !ok {
61+
t.Fatal("expected param 1 attributed")
3962
}
4063
if a.Confidence != "heuristic" {
41-
t.Errorf("expected heuristic confidence, got %s", a.Confidence)
64+
t.Errorf("expected heuristic for mismatched qualifier, got %s", a.Confidence)
4265
}
4366
}
4467

@@ -59,6 +82,32 @@ func TestAttributeCondMultipleParams(t *testing.T) {
5982
}
6083
}
6184

85+
// PG plans system views like pg_catalog.pg_settings as a Function Scan.
86+
// The plan node has no "Relation Name" but carries the view name in
87+
// "Alias". walkPlan must still attribute conds on that node.
88+
func TestWalkPlanFunctionScanWithAliasOnly(t *testing.T) {
89+
// Index Scan on a function-backed view: Relation Name absent, Alias set.
90+
plan := json.RawMessage(`{
91+
"Node Type": "Function Scan",
92+
"Function Name": "pg_show_all_settings",
93+
"Alias": "pg_settings",
94+
"Filter": "(name = $1)"
95+
}`)
96+
c := &attrCtx{byPosition: map[int]*qshape.ParamAttribution{}}
97+
walkPlan(plan, "", "", c)
98+
99+
a, ok := c.byPosition[1]
100+
if !ok {
101+
t.Fatal("expected param 1 attributed via Alias fallback")
102+
}
103+
if a.Table != "pg_settings" || a.Column != "name" {
104+
t.Errorf("wrong attribution: %+v", a)
105+
}
106+
if a.Confidence != "exact" {
107+
t.Errorf("expected exact, got %s", a.Confidence)
108+
}
109+
}
110+
62111
func TestAttributeCondPreservesExactOverHeuristic(t *testing.T) {
63112
ctx := &attrCtx{byPosition: map[int]*qshape.ParamAttribution{}}
64113
aliases := map[string]tableRef{

cmd/qshape/capture.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ FROM pg_stat_statements s`)
6363
}
6464
where = append(where, fmt.Sprintf("s.calls > $%d", len(args)+1))
6565
args = append(args, minCalls)
66+
// Skip session/meta statements: they can't be PREPAREd (DISCARD,
67+
// BEGIN/COMMIT, SET, SHOW, VACUUM, ANALYZE, reload) and qshape's own
68+
// EXPLAIN / PREPARE / DEALLOCATE probes pollute pg_stat_statements.
69+
where = append(where, `s.query !~* '^\s*(discard|begin|start|commit|rollback|savepoint|release|set|reset|show|listen|unlisten|notify|checkpoint|vacuum|analyze|reindex|cluster|explain|prepare|deallocate|execute|close|fetch|move|lock)\M'`)
6670
sb.WriteString("\nWHERE ")
6771
sb.WriteString(strings.Join(where, " AND "))
6872
sb.WriteString("\nORDER BY s.total_exec_time DESC")

0 commit comments

Comments
 (0)