Skip to content

Commit 1edb214

Browse files
anandgupta42claude
andcommitted
feat: add Jinja/dbt template preprocessing for SQL analysis tools
SQL analysis tools (analyze, format, optimize, translate) now automatically detect and preprocess Jinja-templated dbt SQL before analysis. This enables meaningful analysis of dbt models without requiring users to manually strip template syntax. The preprocessor handles: ref(), source(), config(), var(), this, comments, if/elif/else/endif, for/endfor, set, macro blocks, adapter.dispatch, and other common dbt Jinja patterns. Closes #63 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 8071ed2 commit 1edb214

7 files changed

Lines changed: 1206 additions & 12 deletions

File tree

packages/altimate-code/src/bridge/protocol.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -951,6 +951,25 @@ export interface LocalTestResult {
951951
error?: string
952952
}
953953

954+
// --- Jinja Preprocessing ---
955+
956+
export interface SqlPreprocessJinjaParams {
957+
sql: string
958+
}
959+
960+
export interface SqlPreprocessJinjaResult {
961+
success: boolean
962+
preprocessed_sql: string
963+
original_sql: string
964+
was_preprocessed: boolean
965+
refs_found: string[]
966+
sources_found: string[]
967+
variables_found: string[]
968+
macros_removed: string[]
969+
warnings: string[]
970+
error?: string
971+
}
972+
954973
// --- Method registry ---
955974

956975
export const BridgeMethods = {
@@ -986,6 +1005,7 @@ export const BridgeMethods = {
9861005
"schema.detect_pii": {} as { params: PiiDetectParams; result: PiiDetectResult },
9871006
"schema.tags": {} as { params: TagsGetParams; result: TagsGetResult },
9881007
"schema.tags_list": {} as { params: TagsListParams; result: TagsListResult },
1008+
"sql.preprocess_jinja": {} as { params: SqlPreprocessJinjaParams; result: SqlPreprocessJinjaResult },
9891009
"sql.diff": {} as { params: SqlDiffParams; result: SqlDiffResult },
9901010
"sql.rewrite": {} as { params: SqlRewriteParams; result: SqlRewriteResult },
9911011
"sql.schema_diff": {} as { params: SchemaDiffParams; result: SchemaDiffResult },

packages/altimate-code/src/tool/registry.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ import { FinopsRoleGrantsTool, FinopsRoleHierarchyTool, FinopsUserRolesTool } fr
5656
import { SchemaDetectPiiTool } from "./schema-detect-pii"
5757
import { SchemaTagsTool, SchemaTagsListTool } from "./schema-tags"
5858
import { SqlRewriteTool } from "./sql-rewrite"
59+
import { SqlPreprocessJinjaTool } from "./sql-preprocess-jinja"
5960

6061
import { SchemaDiffTool } from "./schema-diff"
6162
import { AltimateCoreValidateTool } from "./altimate-core-validate"
@@ -219,6 +220,7 @@ export namespace ToolRegistry {
219220
SchemaTagsTool,
220221
SchemaTagsListTool,
221222
SqlRewriteTool,
223+
SqlPreprocessJinjaTool,
222224
SchemaDiffTool,
223225
AltimateCoreValidateTool,
224226
AltimateCoreLintTool,
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import z from "zod"
2+
import { Tool } from "./tool"
3+
import { Bridge } from "../bridge/client"
4+
import type { SqlPreprocessJinjaResult } from "../bridge/protocol"
5+
6+
export const SqlPreprocessJinjaTool = Tool.define("sql_preprocess_jinja", {
7+
description:
8+
"Preprocess Jinja/dbt template syntax in SQL before analysis. Stubs common dbt macros like {{ ref() }}, {{ source() }}, {{ config() }}, {{ var() }}, {{ this }}, and Jinja block tags ({% if %}, {% for %}) into plain SQL that downstream tools can parse. Use this when SQL analysis tools fail on dbt-templated SQL.",
9+
parameters: z.object({
10+
sql: z.string().describe("SQL with Jinja/dbt template syntax to preprocess"),
11+
}),
12+
async execute(args, ctx) {
13+
try {
14+
const result = await Bridge.call("sql.preprocess_jinja", {
15+
sql: args.sql,
16+
})
17+
18+
if (!result.was_preprocessed) {
19+
return {
20+
title: "Preprocess Jinja: no templates found",
21+
metadata: {
22+
success: true as boolean,
23+
was_preprocessed: false as boolean,
24+
refs: [] as string[],
25+
sources: [] as string[],
26+
variables: [] as string[],
27+
},
28+
output: "No Jinja templates detected in the SQL. The input is already plain SQL.",
29+
}
30+
}
31+
32+
return {
33+
title: `Preprocess Jinja: ${formatSummary(result)}`,
34+
metadata: {
35+
success: result.success,
36+
was_preprocessed: result.was_preprocessed,
37+
refs: result.refs_found,
38+
sources: result.sources_found,
39+
variables: result.variables_found,
40+
},
41+
output: formatResult(result),
42+
}
43+
} catch (e) {
44+
const msg = e instanceof Error ? e.message : String(e)
45+
return {
46+
title: "Preprocess Jinja: ERROR",
47+
metadata: {
48+
success: false as boolean,
49+
was_preprocessed: false as boolean,
50+
refs: [] as string[],
51+
sources: [] as string[],
52+
variables: [] as string[],
53+
},
54+
output: `Failed to preprocess Jinja: ${msg}\n\nEnsure the Python bridge is running and altimate-engine is installed.`,
55+
}
56+
}
57+
},
58+
})
59+
60+
function formatSummary(result: SqlPreprocessJinjaResult): string {
61+
const parts: string[] = []
62+
if (result.refs_found.length > 0) parts.push(`${result.refs_found.length} ref(s)`)
63+
if (result.sources_found.length > 0) parts.push(`${result.sources_found.length} source(s)`)
64+
if (result.variables_found.length > 0) parts.push(`${result.variables_found.length} var(s)`)
65+
return parts.length > 0 ? parts.join(", ") : "templates removed"
66+
}
67+
68+
function formatResult(result: SqlPreprocessJinjaResult): string {
69+
const lines: string[] = []
70+
71+
lines.push("=== Preprocessed SQL ===")
72+
lines.push(result.preprocessed_sql)
73+
lines.push("")
74+
75+
if (result.refs_found.length > 0) {
76+
lines.push(`Models referenced (ref): ${result.refs_found.join(", ")}`)
77+
}
78+
if (result.sources_found.length > 0) {
79+
lines.push(`Sources referenced: ${result.sources_found.join(", ")}`)
80+
}
81+
if (result.variables_found.length > 0) {
82+
lines.push(`Variables used (var): ${result.variables_found.join(", ")}`)
83+
}
84+
if (result.macros_removed.length > 0) {
85+
lines.push(`Macros removed: ${result.macros_removed.join(", ")}`)
86+
}
87+
88+
if (result.warnings.length > 0) {
89+
lines.push("")
90+
lines.push("=== Warnings ===")
91+
for (const w of result.warnings) {
92+
lines.push(` ! ${w}`)
93+
}
94+
}
95+
96+
lines.push("")
97+
lines.push(
98+
"Note: Jinja templates were stubbed with placeholder values. " +
99+
"Analysis results on this SQL are approximate.",
100+
)
101+
102+
return lines.join("\n")
103+
}

packages/altimate-engine/src/altimate_engine/models.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -987,6 +987,26 @@ class AltimateCoreIsSafeParams(BaseModel):
987987
sql: str
988988

989989

990+
# --- Jinja Preprocessing ---
991+
992+
993+
class SqlPreprocessJinjaParams(BaseModel):
994+
sql: str
995+
996+
997+
class SqlPreprocessJinjaResult(BaseModel):
998+
success: bool = True
999+
preprocessed_sql: str
1000+
original_sql: str
1001+
was_preprocessed: bool
1002+
refs_found: list[str] = Field(default_factory=list)
1003+
sources_found: list[str] = Field(default_factory=list)
1004+
variables_found: list[str] = Field(default_factory=list)
1005+
macros_removed: list[str] = Field(default_factory=list)
1006+
warnings: list[str] = Field(default_factory=list)
1007+
error: str = Field(default=None) # Uses same pattern as rest of file
1008+
1009+
9901010
# --- JSON-RPC ---
9911011

9921012

packages/altimate-engine/src/altimate_engine/server.py

Lines changed: 102 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,13 @@
170170
from altimate_engine.dbt.profiles import discover_dbt_connections
171171
from altimate_engine.local.schema_sync import sync_schema
172172
from altimate_engine.local.test_local import test_sql_local
173+
from altimate_engine.sql.jinja_preprocessor import (
174+
contains_jinja,
175+
preprocess_jinja,
176+
)
173177
from altimate_engine.models import (
178+
SqlPreprocessJinjaParams,
179+
SqlPreprocessJinjaResult,
174180
AltimateCoreFixParams,
175181
AltimateCorePolicyParams,
176182
AltimateCoreSemanticsParams,
@@ -298,13 +304,48 @@ def dispatch(request: JsonRpcRequest) -> JsonRpcResponse:
298304
params = request.params or {}
299305

300306
try:
301-
if method == "sql.execute":
307+
if method == "sql.preprocess_jinja":
308+
pp_params = SqlPreprocessJinjaParams(**params)
309+
pp_result = preprocess_jinja(pp_params.sql)
310+
result = SqlPreprocessJinjaResult(
311+
success=True,
312+
preprocessed_sql=pp_result.preprocessed_sql,
313+
original_sql=pp_result.original_sql,
314+
was_preprocessed=pp_result.was_preprocessed,
315+
refs_found=pp_result.refs_found,
316+
sources_found=pp_result.sources_found,
317+
variables_found=pp_result.variables_found,
318+
macros_removed=pp_result.macros_removed,
319+
warnings=pp_result.warnings,
320+
)
321+
elif method == "sql.execute":
302322
result = execute_sql(SqlExecuteParams(**params))
303323
elif method == "schema.inspect":
304324
result = inspect_schema(SchemaInspectParams(**params))
305325
elif method == "sql.analyze":
306326
params_obj = SqlAnalyzeParams(**params)
307-
statements = _split_sql_statements(params_obj.sql)
327+
328+
# Auto-preprocess Jinja if present
329+
jinja_note = ""
330+
sql_to_analyze = params_obj.sql
331+
if contains_jinja(sql_to_analyze):
332+
pp = preprocess_jinja(sql_to_analyze)
333+
if pp.was_preprocessed:
334+
sql_to_analyze = pp.preprocessed_sql
335+
parts = []
336+
if pp.refs_found:
337+
parts.append(f"refs: {', '.join(pp.refs_found)}")
338+
if pp.sources_found:
339+
parts.append(f"sources: {', '.join(pp.sources_found)}")
340+
if pp.variables_found:
341+
parts.append(f"vars: {', '.join(pp.variables_found)}")
342+
detail = f" ({'; '.join(parts)})" if parts else ""
343+
jinja_note = (
344+
f"Jinja templates were preprocessed before analysis{detail}. "
345+
"Results are based on the rendered SQL and may be approximate."
346+
)
347+
348+
statements = _split_sql_statements(sql_to_analyze)
308349
issues = []
309350
any_error = None
310351

@@ -360,37 +401,65 @@ def dispatch(request: JsonRpcRequest) -> JsonRpcResponse:
360401
)
361402
)
362403

404+
confidence_factors = []
405+
if any_error is not None:
406+
confidence_factors.append(f"Parse failed on one statement: {any_error}")
407+
if jinja_note:
408+
confidence_factors.append(jinja_note)
409+
363410
result = SqlAnalyzeResult(
364411
success=any_error is None,
365412
issues=issues,
366413
issue_count=len(issues),
367414
confidence=_compute_overall_confidence(issues),
368-
confidence_factors=[]
369-
if any_error is None
370-
else [f"Parse failed on one statement: {any_error}"],
415+
confidence_factors=confidence_factors,
371416
error=any_error,
372417
)
373418
elif method == "sql.translate":
374419
params_obj = SqlTranslateParams(**params)
420+
421+
# Auto-preprocess Jinja if present
422+
sql_to_translate = params_obj.sql
423+
jinja_warnings: list[str] = []
424+
if contains_jinja(sql_to_translate):
425+
pp = preprocess_jinja(sql_to_translate)
426+
if pp.was_preprocessed:
427+
sql_to_translate = pp.preprocessed_sql
428+
jinja_warnings.append(
429+
"Jinja templates were preprocessed before translation. "
430+
"Review the translated SQL and re-apply Jinja syntax as needed."
431+
)
432+
375433
raw = guard_transpile(
376-
params_obj.sql, params_obj.source_dialect, params_obj.target_dialect
434+
sql_to_translate, params_obj.source_dialect, params_obj.target_dialect
377435
)
436+
all_warnings = jinja_warnings + raw.get("warnings", [])
378437
result = SqlTranslateResult(
379438
success=raw.get("success", True),
380439
translated_sql=raw.get("sql", raw.get("translated_sql")),
381440
source_dialect=params_obj.source_dialect,
382441
target_dialect=params_obj.target_dialect,
383-
warnings=raw.get("warnings", []),
442+
warnings=all_warnings,
384443
error=raw.get("error"),
385444
)
386445
elif method == "sql.optimize":
387446
params_obj = SqlOptimizeParams(**params)
447+
448+
# Auto-preprocess Jinja if present
449+
sql_to_optimize = params_obj.sql
450+
jinja_preprocessed = False
451+
if contains_jinja(sql_to_optimize):
452+
pp = preprocess_jinja(sql_to_optimize)
453+
if pp.was_preprocessed:
454+
sql_to_optimize = pp.preprocessed_sql
455+
jinja_preprocessed = True
456+
388457
# Rewrite for optimization
389458
rw = guard_rewrite_sql(
390-
params_obj.sql, schema_context=params_obj.schema_context
459+
sql_to_optimize, schema_context=params_obj.schema_context
391460
)
392461
# Lint for remaining issues
393-
lint = guard_lint(params_obj.sql, schema_context=params_obj.schema_context)
462+
lint = guard_lint(sql_to_optimize, schema_context=params_obj.schema_context)
394463

395464
suggestions = []
396465
for r in rw.get("rewrites", []):
@@ -424,12 +493,17 @@ def dispatch(request: JsonRpcRequest) -> JsonRpcResponse:
424493
}
425494
)
426495

496+
opt_confidence = "high"
497+
if jinja_preprocessed:
498+
opt_confidence = "medium"
499+
427500
result = SqlOptimizeResult(
428501
success=True,
429502
original_sql=params_obj.sql,
430-
optimized_sql=rw.get("rewritten_sql", params_obj.sql),
503+
optimized_sql=rw.get("rewritten_sql", sql_to_optimize),
431504
suggestions=suggestions,
432505
anti_patterns=anti_patterns,
506+
confidence=opt_confidence,
433507
error=rw.get("error"),
434508
)
435509
elif method == "lineage.check":
@@ -492,12 +566,28 @@ def dispatch(request: JsonRpcRequest) -> JsonRpcResponse:
492566

493567
elif method == "sql.format":
494568
fmt_params = SqlFormatParams(**params)
495-
raw = guard_format_sql(fmt_params.sql, fmt_params.dialect)
569+
570+
# Auto-preprocess Jinja if present
571+
sql_to_format = fmt_params.sql
572+
jinja_fmt_note = None
573+
if contains_jinja(sql_to_format):
574+
pp = preprocess_jinja(sql_to_format)
575+
if pp.was_preprocessed:
576+
sql_to_format = pp.preprocessed_sql
577+
jinja_fmt_note = (
578+
"Note: Jinja templates were removed before formatting. "
579+
"The formatted output contains plain SQL only."
580+
)
581+
582+
raw = guard_format_sql(sql_to_format, fmt_params.dialect)
583+
fmt_error = raw.get("error")
584+
if jinja_fmt_note and not fmt_error:
585+
fmt_error = jinja_fmt_note
496586
result = SqlFormatResult(
497587
success=raw.get("success", True),
498588
formatted_sql=raw.get("formatted_sql", raw.get("sql")),
499589
statement_count=raw.get("statement_count", 1),
500-
error=raw.get("error"),
590+
error=fmt_error,
501591
)
502592
elif method == "sql.explain":
503593
result = explain_sql(SqlExplainParams(**params))

0 commit comments

Comments
 (0)