Skip to content

Commit 350001d

Browse files
authored
fix: harden sql_explain and altimate_core_validate input handling (#693)
* fix: [#691] harden sql_explain and altimate_core_validate inputs Reject empty/placeholder SQL and warehouse names in sql_explain before calling the warehouse. Add dialect-aware EXPLAIN (Snowflake, Postgres, Redshift, MySQL, DuckDB, Databricks, ClickHouse). Translate driver errors into actionable guidance. Remove altimate_core_validate no-schema hard-gate so the engine runs when schema is absent, with a has_schema flag in metadata. Closes #691 * fix: address coderabbit review on #693 - translateExplainError now catches $N and :name bind errors too, not just bare `?`. Detects a syntax-error keyword plus a bind token delimited by whitespace or quotes, covering PG, Snowflake and Oracle phrasings. Adds "there is no parameter $N" (PostgreSQL). - NO_SCHEMA_NOTE documents the flat table-map shape actually accepted by the tool, not the verbose SchemaDefinition shape. - Regression tests added for $1, :name, and the NO_SCHEMA wording.
1 parent f030bf8 commit 350001d

File tree

6 files changed

+1160
-37
lines changed

6 files changed

+1160
-37
lines changed

packages/opencode/src/altimate/native/connections/register.ts

Lines changed: 185 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,164 @@ function getWarehouseType(warehouseName?: string): string {
182182
return Registry.getConfig(warehouseName)?.type ?? "unknown"
183183
}
184184

185+
/**
186+
* Dialect-aware EXPLAIN plan describing how a warehouse should be asked for
187+
* a query plan.
188+
*
189+
* - `prefix`: the statement prefix to prepend to the SQL ("" means this
190+
* warehouse cannot be EXPLAIN'd via a simple statement prefix — the
191+
* handler will return a clear error instead of issuing a broken query)
192+
* - `actuallyAnalyzed`: whether the prefix actually runs the query and
193+
* reports runtime stats. The caller requested `analyze=true`, but some
194+
* warehouses silently downgrade to plan-only (e.g. Snowflake), so we
195+
* reflect the true mode back to the user in the result envelope.
196+
*/
197+
export interface ExplainPlan {
198+
prefix: string
199+
actuallyAnalyzed: boolean
200+
}
201+
202+
/**
203+
* Build a dialect-aware EXPLAIN plan for the given warehouse type.
204+
*
205+
* Warehouse-specific notes:
206+
* - Snowflake: `EXPLAIN USING TEXT`. No ANALYZE variant — silently downgraded.
207+
* - PostgreSQL: `EXPLAIN` or `EXPLAIN (ANALYZE, BUFFERS)` for runtime stats.
208+
* - Redshift: plain `EXPLAIN` only. Redshift does NOT support ANALYZE.
209+
* - MySQL / MariaDB: `EXPLAIN` or `EXPLAIN ANALYZE` (MySQL 8+).
210+
* - DuckDB: `EXPLAIN` or `EXPLAIN ANALYZE`.
211+
* - Databricks / Spark: `EXPLAIN` or `EXPLAIN FORMATTED`.
212+
* - ClickHouse: `EXPLAIN` (no ANALYZE form accepted via statement prefix).
213+
* - BigQuery: uses a dry-run API instead of any EXPLAIN statement. Not
214+
* supported via this code path — return empty prefix.
215+
* - Oracle: `EXPLAIN PLAN FOR` stores the plan in PLAN_TABLE rather than
216+
* returning it, so a bare prefix does not produce output. Not supported.
217+
* - SQL Server: requires `SET SHOWPLAN_TEXT ON` as a session setting.
218+
* Not supported via statement prefix.
219+
*/
220+
export function buildExplainPlan(warehouseType: string | undefined, analyze: boolean): ExplainPlan {
221+
const type = (warehouseType ?? "").toLowerCase()
222+
switch (type) {
223+
case "snowflake":
224+
// Snowflake: no ANALYZE; USING TEXT returns a readable plan.
225+
return { prefix: "EXPLAIN USING TEXT", actuallyAnalyzed: false }
226+
case "postgres":
227+
case "postgresql":
228+
return analyze
229+
? { prefix: "EXPLAIN (ANALYZE, BUFFERS)", actuallyAnalyzed: true }
230+
: { prefix: "EXPLAIN", actuallyAnalyzed: false }
231+
case "redshift":
232+
// Redshift supports only plain EXPLAIN — no ANALYZE/BUFFERS.
233+
return { prefix: "EXPLAIN", actuallyAnalyzed: false }
234+
case "mysql":
235+
case "mariadb":
236+
// MySQL 8+ supports `EXPLAIN ANALYZE`. Older versions will reject it
237+
// at the warehouse and the error will be surfaced to the caller.
238+
return analyze
239+
? { prefix: "EXPLAIN ANALYZE", actuallyAnalyzed: true }
240+
: { prefix: "EXPLAIN", actuallyAnalyzed: false }
241+
case "duckdb":
242+
return analyze
243+
? { prefix: "EXPLAIN ANALYZE", actuallyAnalyzed: true }
244+
: { prefix: "EXPLAIN", actuallyAnalyzed: false }
245+
case "databricks":
246+
case "spark":
247+
// Databricks/Spark `EXPLAIN FORMATTED` returns a more detailed plan but
248+
// does not actually execute the query — still a plan-only mode.
249+
return { prefix: analyze ? "EXPLAIN FORMATTED" : "EXPLAIN", actuallyAnalyzed: false }
250+
case "clickhouse":
251+
return { prefix: "EXPLAIN", actuallyAnalyzed: false }
252+
case "bigquery":
253+
// BigQuery has no EXPLAIN statement — the correct answer is a dry-run
254+
// job via the BigQuery API, which this tool does not support today.
255+
return { prefix: "", actuallyAnalyzed: false }
256+
case "oracle":
257+
// Oracle's `EXPLAIN PLAN FOR` stores the plan in PLAN_TABLE and returns
258+
// no rows, so a statement-prefix approach does not work. Callers would
259+
// need a follow-up `SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY)`.
260+
return { prefix: "", actuallyAnalyzed: false }
261+
case "mssql":
262+
case "sqlserver":
263+
// SQL Server requires `SET SHOWPLAN_TEXT ON` as a session setting,
264+
// not a statement prefix. Not supported via this code path.
265+
return { prefix: "", actuallyAnalyzed: false }
266+
default:
267+
// Unknown warehouse — fall back to plain EXPLAIN and let the driver
268+
// surface a real error if it does not understand the syntax.
269+
return analyze
270+
? { prefix: "EXPLAIN ANALYZE", actuallyAnalyzed: true }
271+
: { prefix: "EXPLAIN", actuallyAnalyzed: false }
272+
}
273+
}
274+
275+
/** @deprecated Use buildExplainPlan for richer metadata about the plan mode. */
276+
export function buildExplainPrefix(warehouseType: string | undefined, analyze: boolean): string {
277+
return buildExplainPlan(warehouseType, analyze).prefix
278+
}
279+
280+
/**
281+
* Translate a raw warehouse/Registry error into an actionable message.
282+
*
283+
* Verbatim driver errors like "Connection ? not found. Available: (none)" are
284+
* useless to an LLM caller that has no way to fix them. Detect common patterns
285+
* and rewrite them with concrete next steps.
286+
*/
287+
export function translateExplainError(
288+
raw: unknown,
289+
warehouseName: string | undefined,
290+
availableWarehouses: string[],
291+
): string {
292+
const msg = raw instanceof Error ? raw.message : String(raw)
293+
294+
// Connection/warehouse lookup failures.
295+
if (/Connection\s+.+\s+not found/i.test(msg) || /warehouse\s+.+\s+not found/i.test(msg)) {
296+
if (availableWarehouses.length > 0) {
297+
return `Warehouse ${JSON.stringify(warehouseName ?? "")} is not configured. Available warehouses: ${availableWarehouses.join(", ")}. Pass one of these as the 'warehouse' parameter, or omit it to use the default.`
298+
}
299+
return "No warehouses are configured. Run `warehouse_add` to set one up before calling sql_explain."
300+
}
301+
302+
// Unsubstituted-placeholder compilation errors.
303+
//
304+
// Bind placeholders come in three flavours: positional `?` (MySQL / JDBC),
305+
// numbered `$1`, `$2`, ... (PostgreSQL), and named `:name` (Oracle / SQLite /
306+
// some SQLAlchemy dialects). Drivers phrase the resulting syntax error in
307+
// many different ways — Snowflake says "unexpected ?", PostgreSQL says
308+
// `syntax error at or near "$1"`, Oracle says `ORA-00911: invalid character`,
309+
// etc. Rather than enumerate every phrasing, detect a bind-token next to a
310+
// syntax-error keyword and translate them all to the same guidance.
311+
const isSyntaxError = /syntax error|invalid character|unexpected token/i.test(msg)
312+
// Match a bind token that is delimited by whitespace, start/end, or a quote.
313+
// The negative lookbehind `(?<!\w)` prevents matching `$1` inside identifiers.
314+
const containsBindToken =
315+
/(?<!\w)\?(?!\w)/.test(msg) ||
316+
/(?<!\w)\$\d+/.test(msg) ||
317+
/(?<!\w):[a-zA-Z_]\w*/.test(msg)
318+
// PostgreSQL surfaces bind-param issues with its own distinctive phrasing.
319+
const isPgBindError = /there is no parameter \$\d+/i.test(msg)
320+
if ((isSyntaxError && containsBindToken) || isPgBindError) {
321+
return "SQL compilation error: the query contains an unsubstituted bind placeholder (`?`, `$1`, or `:name`). sql_explain does not support parameterized queries — inline the literal values before calling."
322+
}
323+
324+
// Snowflake-specific: ANALYZE not supported.
325+
if (/EXPLAIN\s+ANALYZE/i.test(msg) && /not\s+supported/i.test(msg)) {
326+
return "This warehouse does not support EXPLAIN ANALYZE. Retry with analyze=false to get a plan-only EXPLAIN."
327+
}
328+
329+
// Permission denials.
330+
if (/permission\s+denied/i.test(msg) || /access\s+denied/i.test(msg) || /insufficient\s+privilege/i.test(msg)) {
331+
return `The warehouse user lacks permission to EXPLAIN this query. Check role grants for the objects referenced in the SQL. Original error: ${msg}`
332+
}
333+
334+
// Generic SQL compilation error fallback.
335+
if (/SQL compilation error/i.test(msg) || /syntax error/i.test(msg)) {
336+
return `SQL compilation error in the query passed to sql_explain. Fix the SQL and retry. Original error: ${msg}`
337+
}
338+
339+
// Fall through: return the original but with a hint to aid debugging.
340+
return msg
341+
}
342+
185343
/** Register all connection-related handlers. Exported for test re-registration. */
186344
export function registerAll(): void {
187345

@@ -261,10 +419,11 @@ register("sql.execute", async (params: SqlExecuteParams): Promise<SqlExecuteResu
261419

262420
// --- sql.explain ---
263421
register("sql.explain", async (params: SqlExplainParams): Promise<SqlExplainResult> => {
422+
let warehouseName: string | undefined
423+
let warehouseType: string | undefined
264424
try {
265-
const warehouseName = params.warehouse
425+
warehouseName = params.warehouse
266426
let connector
267-
let warehouseType: string | undefined
268427

269428
if (warehouseName) {
270429
connector = await Registry.get(warehouseName)
@@ -276,13 +435,25 @@ register("sql.explain", async (params: SqlExplainParams): Promise<SqlExplainResu
276435
}
277436
connector = await Registry.get(warehouses[0].name)
278437
warehouseType = warehouses[0].type
438+
warehouseName = warehouses[0].name
279439
}
280440

281-
const explainPrefix = params.analyze ? "EXPLAIN ANALYZE" : "EXPLAIN"
282-
const result = await connector.execute(
283-
`${explainPrefix} ${params.sql}`,
284-
10000,
285-
)
441+
const plan = buildExplainPlan(warehouseType, params.analyze ?? false)
442+
if (plan.prefix === "") {
443+
// Warehouse does not support EXPLAIN via a simple statement prefix —
444+
// return a clear error rather than sending a bare statement to the
445+
// driver. BigQuery needs a dry-run job, SQL Server needs SHOWPLAN_TEXT,
446+
// Oracle needs DBMS_XPLAN, etc.
447+
return {
448+
success: false,
449+
plan_rows: [],
450+
error: `sql_explain is not supported for warehouse type ${JSON.stringify(warehouseType)}. This warehouse requires a different plan mechanism (e.g. dry-run API, SET SHOWPLAN_TEXT ON, or DBMS_XPLAN) that sql_explain cannot issue directly.`,
451+
warehouse_type: warehouseType,
452+
analyzed: false,
453+
}
454+
}
455+
456+
const result = await connector.execute(`${plan.prefix} ${params.sql}`, 10000)
286457

287458
const planText = result.rows.map((r) => String(r[0])).join("\n")
288459
const planRows = result.rows.map((r, i) => ({
@@ -295,13 +466,18 @@ register("sql.explain", async (params: SqlExplainParams): Promise<SqlExplainResu
295466
plan_text: planText,
296467
plan_rows: planRows,
297468
warehouse_type: warehouseType,
298-
analyzed: params.analyze ?? false,
469+
// Reflect the true mode: if Snowflake silently downgraded ANALYZE to a
470+
// plan-only EXPLAIN, the caller should know the plan is estimated, not
471+
// observed.
472+
analyzed: plan.actuallyAnalyzed,
299473
}
300474
} catch (e) {
475+
const available = Registry.list().warehouses.map((w) => w.name)
301476
return {
302477
success: false,
303478
plan_rows: [],
304-
error: String(e),
479+
error: translateExplainError(e, warehouseName, available),
480+
warehouse_type: warehouseType,
305481
analyzed: params.analyze ?? false,
306482
}
307483
}

packages/opencode/src/altimate/tools/altimate-core-validate.ts

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,14 @@ import type { Telemetry } from "../telemetry"
55

66
export const AltimateCoreValidateTool = Tool.define("altimate_core_validate", {
77
description:
8-
"Validate SQL syntax and schema references. Checks if tables/columns exist in the schema and if SQL is valid for the target dialect. IMPORTANT: Provide schema_context or schema_path — without schema, all table/column references will report as 'not found'.",
8+
"Validate SQL syntax and schema references. Checks if tables/columns exist in the schema and if SQL is valid for the target dialect. If no schema_path or schema_context is provided, validation still runs but schema-dependent checks (table/column existence) are skipped — syntax and dialect checks still apply. For full validation, run `schema_inspect` first on the referenced tables or pass `schema_context` inline.",
99
parameters: z.object({
1010
sql: z.string().describe("SQL query to validate"),
1111
schema_path: z.string().optional().describe("Path to YAML/JSON schema file"),
1212
schema_context: z.record(z.string(), z.any()).optional().describe("Inline schema definition"),
1313
}),
14-
async execute(args, ctx) {
14+
async execute(args, _ctx) {
1515
const hasSchema = !!(args.schema_path || (args.schema_context && Object.keys(args.schema_context).length > 0))
16-
const noSchema = !hasSchema
17-
if (noSchema) {
18-
const error =
19-
"No schema provided. Provide schema_context or schema_path so table/column references can be resolved."
20-
return {
21-
title: "Validate: NO SCHEMA",
22-
metadata: { success: false, valid: false, has_schema: false, error },
23-
output: `Error: ${error}`,
24-
}
25-
}
2616
try {
2717
const result = await Dispatcher.call("altimate_core.validate", {
2818
sql: args.sql,
@@ -38,27 +28,34 @@ export const AltimateCoreValidateTool = Tool.define("altimate_core_validate", {
3828
}))
3929
// altimate_change end
4030
return {
41-
title: `Validate: ${data.valid ? "VALID" : "INVALID"}`,
31+
title: `Validate: ${data.valid ? "VALID" : "INVALID"}${hasSchema ? "" : " (no schema)"}`,
4232
metadata: {
4333
success: true, // engine ran — validation errors are findings, not failures
4434
valid: data.valid,
4535
has_schema: hasSchema,
4636
...(error && { error }),
4737
...(findings.length > 0 && { findings }),
4838
},
49-
output: formatValidate(data),
39+
output: formatValidate(data, hasSchema),
5040
}
5141
} catch (e) {
5242
const msg = e instanceof Error ? e.message : String(e)
5343
return {
5444
title: "Validate: ERROR",
55-
metadata: { success: false, valid: false, has_schema: hasSchema, error: msg },
45+
metadata: { success: false, valid: false, has_schema: hasSchema, error: msg, error_class: "engine_failure" },
5646
output: `Failed: ${msg}`,
5747
}
5848
}
5949
},
6050
})
6151

52+
// Exported for unit tests.
53+
export const _altimateCoreValidateInternal = {
54+
extractValidationErrors,
55+
classifyValidationError,
56+
formatValidate,
57+
}
58+
6259
function extractValidationErrors(data: Record<string, any>): string | undefined {
6360
if (Array.isArray(data.errors) && data.errors.length > 0) {
6461
const msgs = data.errors.map((e: any) => e.message ?? String(e)).filter(Boolean)
@@ -77,14 +74,36 @@ function classifyValidationError(message: string): string {
7774
return "validation_error"
7875
}
7976

80-
function formatValidate(data: Record<string, any>): string {
77+
// Warning appended when validation runs without a schema. Stored without a
78+
// leading blank so it can be pushed into a line array; a blank line is added
79+
// explicitly at each call site where spacing is desired.
80+
//
81+
// The hint uses the flat table-map shape accepted by the tool (see
82+
// schema-resolver.ts), not the verbose SchemaDefinition form — callers that
83+
// already know the inner Rust struct layout can use either, but the flat form
84+
// is strictly easier to construct inline.
85+
const NO_SCHEMA_NOTE =
86+
"Note: No schema was provided, so table/column existence checks were skipped. " +
87+
"To catch missing tables or columns, run `schema_inspect` on the referenced tables first " +
88+
'or pass `schema_context` inline as a table map, for example `{ users: { id: "INTEGER", email: "VARCHAR" } }`.'
89+
90+
function formatValidate(data: Record<string, any>, hasSchema: boolean): string {
8191
if (data.error) return `Error: ${data.error}`
82-
if (data.valid) return "SQL is valid."
92+
if (data.valid) {
93+
return hasSchema ? "SQL is valid." : `SQL is valid.\n\n${NO_SCHEMA_NOTE}`
94+
}
8395

8496
const lines = ["Validation failed:\n"]
8597
for (const err of data.errors ?? []) {
8698
lines.push(` • ${err.message}`)
8799
if (err.location) lines.push(` at line ${err.location.line}`)
88100
}
101+
if (!hasSchema) {
102+
// Blank separator line, then the note — avoids the double newline that
103+
// would appear if the note itself started with `\n\n` and was joined
104+
// with `\n`.
105+
lines.push("")
106+
lines.push(NO_SCHEMA_NOTE)
107+
}
89108
return lines.join("\n")
90109
}

0 commit comments

Comments
 (0)