Skip to content

Commit 8f608c5

Browse files
datadersclaude
andauthored
fix(parser): preserve Jinja templates in SQL extraction (#445)
* fix(parser): preserve Jinja templates in SQL extraction Treat dbt-style Jinja templates as opaque SQL-side parser tokens so SQL extraction keeps refs intact before VISUALISE splitting. Tests: - npx tree-sitter test - cargo test -p ggsql parser::source_tree::tests Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * Fix Jinja source parsing boundaries --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 9c9ab14 commit 8f608c5

3 files changed

Lines changed: 185 additions & 3 deletions

File tree

src/parser/source_tree.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,18 @@ mod tests {
264264
assert!(viz.starts_with("visualise"));
265265
}
266266

267+
#[test]
268+
fn test_extract_sql_preserves_jinja_ref() {
269+
let query = "SELECT order_date, region, revenue FROM {{ ref('fct_orders') }}\nVISUALISE order_date AS x, revenue AS y, region AS color\nDRAW point";
270+
let tree = SourceTree::new(query).unwrap();
271+
272+
let sql = tree.extract_sql().unwrap();
273+
assert_eq!(
274+
sql,
275+
"SELECT order_date, region, revenue FROM {{ ref('fct_orders') }}"
276+
);
277+
}
278+
267279
#[test]
268280
fn test_extract_sql_no_visualise() {
269281
let query = "SELECT * FROM data WHERE x > 5";
@@ -289,6 +301,18 @@ mod tests {
289301
assert!(viz.starts_with("VISUALISE FROM mtcars"));
290302
}
291303

304+
#[test]
305+
fn test_extract_sql_visualise_from_jinja_ref() {
306+
let query = "VISUALISE FROM {{ ref('fct_orders') }} DRAW point MAPPING x AS x, y AS y";
307+
let tree = SourceTree::new(query).unwrap();
308+
309+
let sql = tree.extract_sql().unwrap();
310+
assert_eq!(sql, "SELECT * FROM {{ ref('fct_orders') }}");
311+
312+
let viz = tree.extract_visualise().unwrap();
313+
assert!(viz.starts_with("VISUALISE FROM {{ ref('fct_orders') }}"));
314+
}
315+
292316
#[test]
293317
fn test_extract_sql_visualise_from_with_cte() {
294318
let query =
@@ -407,6 +431,15 @@ mod tests {
407431
assert!(sql.contains("SELECT * FROM 'mtcars.csv'"));
408432
}
409433

434+
#[test]
435+
fn test_extract_sql_from_first_jinja_ref() {
436+
let query = "FROM {{ ref('fct_orders') }} VISUALISE DRAW point MAPPING x AS x, y AS y";
437+
let tree = SourceTree::new(query).unwrap();
438+
439+
let sql = tree.extract_sql().unwrap();
440+
assert_eq!(sql, "SELECT * FROM {{ ref('fct_orders') }}");
441+
}
442+
410443
#[test]
411444
fn test_extract_sql_from_first_case_insensitive() {
412445
let query = "from sales visualise DRAW point MAPPING x AS x, y AS y";

tree-sitter-ggsql/grammar.js

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ function caseInsensitive(keyword) {
1717
module.exports = grammar({
1818
name: 'ggsql',
1919

20+
inline: $ => [
21+
$.source_ref,
22+
],
23+
2024
conflicts: $ => [
2125
[$.sql_portion],
2226
],
@@ -63,6 +67,7 @@ module.exports = grammar({
6367
$.case_expression,
6468
$.cast_expression,
6569
$.function_call,
70+
$.jinja_template,
6671
$.non_from_sql_keyword,
6772
$.string,
6873
$.number,
@@ -84,6 +89,7 @@ module.exports = grammar({
8489
$.case_expression, // CASE WHEN ... THEN ... END
8590
$.cast_expression, // CAST(expr AS type), TRY_CAST(expr AS type)
8691
$.function_call, // Regular function calls like COUNT(), SUM()
92+
$.jinja_template,
8793
$.sql_keyword,
8894
$.string,
8995
$.number,
@@ -128,6 +134,7 @@ module.exports = grammar({
128134
$.identifier,
129135
$.string,
130136
$.number,
137+
$.jinja_template,
131138
$.subquery,
132139
',', '(', ')', '*', '.', '=',
133140
/[^\s;(),'"]+/
@@ -143,6 +150,7 @@ module.exports = grammar({
143150
$.identifier,
144151
$.string,
145152
$.number,
153+
$.jinja_template,
146154
$.subquery,
147155
',', '(', ')', '*', '.', '=',
148156
/[^\s;(),'"]+/
@@ -157,6 +165,7 @@ module.exports = grammar({
157165
$.identifier,
158166
$.string,
159167
$.number,
168+
$.jinja_template,
160169
$.subquery,
161170
',', '(', ')', '*', '.', '=',
162171
/[^\s;(),'"]+/
@@ -171,6 +180,7 @@ module.exports = grammar({
171180
$.identifier,
172181
$.string,
173182
$.number,
183+
$.jinja_template,
174184
$.subquery,
175185
',', '(', ')', '*', '.', '=',
176186
/[^\s;(),'"]+/
@@ -179,6 +189,7 @@ module.exports = grammar({
179189

180190
other_sql_statement: $ => prec(-1, repeat1(choice(
181191
$.non_from_sql_keyword,
192+
$.jinja_template,
182193
/[^\s;(),'"]+/,
183194
$.string,
184195
$.number,
@@ -218,6 +229,7 @@ module.exports = grammar({
218229
$.sql_keyword,
219230
$.string,
220231
$.number,
232+
$.jinja_template,
221233
$.identifier,
222234
$.subquery,
223235
',', '*', '.', '=', '<', '>', '!', '::',
@@ -242,6 +254,7 @@ module.exports = grammar({
242254
$.cast_expression,
243255
$.function_call,
244256
$.subquery, // also handles IN-lists like ('a', 'b')
257+
$.jinja_template,
245258
token('='), token('!='), token('<>'), token('<='), token('>='),
246259
token('<'), token('>'),
247260
token('+'), token('-'), token('*'), token('/'), token('%'), token('||'), token('::'),
@@ -396,6 +409,7 @@ module.exports = grammar({
396409
$.qualified_name, // Handles both simple identifiers and table.column
397410
$.number,
398411
$.string,
412+
$.jinja_template,
399413
'*',
400414
// CASE expression
401415
$.case_expression,
@@ -470,9 +484,16 @@ module.exports = grammar({
470484
repeat(seq('.', $.identifier))
471485
)),
472486

487+
source_ref: $ => choice(
488+
$.qualified_name,
489+
$.string,
490+
$.namespaced_identifier,
491+
$.jinja_template
492+
),
493+
473494
table_ref: $ => prec.right(seq(
474495
choice(
475-
field('table', choice($.qualified_name, $.string, $.namespaced_identifier)),
496+
field('table', $.source_ref),
476497
$.subquery,
477498
),
478499
optional(seq(
@@ -591,14 +612,14 @@ module.exports = grammar({
591612
// Option 1: Just FROM (inherit global mappings)
592613
seq(
593614
caseInsensitive('FROM'),
594-
field('layer_source', choice($.qualified_name, $.string, $.namespaced_identifier))
615+
field('layer_source', $.source_ref)
595616
),
596617
// Option 2: Mapping list (uses shared structure), optionally followed by FROM
597618
seq(
598619
$.mapping_list,
599620
optional(seq(
600621
caseInsensitive('FROM'),
601-
field('layer_source', choice($.qualified_name, $.string, $.namespaced_identifier))
622+
field('layer_source', $.source_ref)
602623
))
603624
)
604625
)
@@ -928,6 +949,15 @@ module.exports = grammar({
928949
$.quoted_identifier
929950
),
930951

952+
// Jinja templates are opaque SQL-side tokens. dbt/fusion renders these
953+
// before ggsql executes SQL, but the parser must preserve them while
954+
// splitting SQL from VISUALISE.
955+
jinja_template: $ => token(choice(
956+
seq('{{', repeat(choice(/[^}]+/, /}[^}]/)), '}}'),
957+
seq('{%', repeat(choice(/[^%]+/, /%[^%]/)), '%}'),
958+
seq('{#', repeat(choice(/[^#]+/, /#[^#]/)), '#}')
959+
)),
960+
931961
// Identifier for use in filter expressions - uses lower precedence so that
932962
// keywords like PARTITION and ORDER can take priority and end the filter
933963
filter_identifier: $ => token(prec(-1, /[a-zA-Z_][a-zA-Z0-9_]*/)),

tree-sitter-ggsql/test/corpus/basic.txt

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3596,3 +3596,122 @@ SELECT grade, ROUND(COUNT(CASE WHEN status = 'Default' THEN 1 END) * 100.0 / COU
35963596
(viz_clause
35973597
(draw_clause
35983598
(geom_type)))))
3599+
3600+
================================================================================
3601+
SQL source with Jinja ref
3602+
================================================================================
3603+
3604+
SELECT order_date, region, revenue FROM {{ ref('fct_orders') }}
3605+
VISUALISE order_date AS x, revenue AS y, region AS color
3606+
DRAW point
3607+
3608+
--------------------------------------------------------------------------------
3609+
3610+
(query
3611+
(sql_portion
3612+
(sql_statement
3613+
(select_statement
3614+
(select_body
3615+
(identifier
3616+
(bare_identifier))
3617+
(identifier
3618+
(bare_identifier))
3619+
(identifier
3620+
(bare_identifier))
3621+
(from_clause
3622+
(table_ref
3623+
table: (jinja_template)))))))
3624+
(visualise_statement
3625+
(visualise_keyword)
3626+
(global_mapping
3627+
(mapping_list
3628+
(mapping_element
3629+
(explicit_mapping
3630+
value: (mapping_value
3631+
(column_reference
3632+
(identifier
3633+
(bare_identifier))))
3634+
name: (aesthetic_name)))
3635+
(mapping_element
3636+
(explicit_mapping
3637+
value: (mapping_value
3638+
(column_reference
3639+
(identifier
3640+
(bare_identifier))))
3641+
name: (aesthetic_name)))
3642+
(mapping_element
3643+
(explicit_mapping
3644+
value: (mapping_value
3645+
(column_reference
3646+
(identifier
3647+
(bare_identifier))))
3648+
name: (aesthetic_name)))))
3649+
(viz_clause
3650+
(draw_clause
3651+
(geom_type)))))
3652+
3653+
================================================================================
3654+
SQL source with Jinja var containing dict literal
3655+
================================================================================
3656+
3657+
SELECT * FROM {{ var('table', {'fallback': 'orders'}) }}
3658+
VISUALISE x AS x, y AS y
3659+
DRAW point
3660+
3661+
--------------------------------------------------------------------------------
3662+
3663+
(query
3664+
(sql_portion
3665+
(sql_statement
3666+
(select_statement
3667+
(select_body
3668+
(from_clause
3669+
(table_ref
3670+
table: (jinja_template)))))))
3671+
(visualise_statement
3672+
(visualise_keyword)
3673+
(global_mapping
3674+
(mapping_list
3675+
(mapping_element
3676+
(explicit_mapping
3677+
value: (mapping_value
3678+
(column_reference
3679+
(identifier
3680+
(bare_identifier))))
3681+
name: (aesthetic_name)))
3682+
(mapping_element
3683+
(explicit_mapping
3684+
value: (mapping_value
3685+
(column_reference
3686+
(identifier
3687+
(bare_identifier))))
3688+
name: (aesthetic_name)))))
3689+
(viz_clause
3690+
(draw_clause
3691+
(geom_type)))))
3692+
3693+
================================================================================
3694+
Layer source with Jinja ref
3695+
================================================================================
3696+
3697+
VISUALISE
3698+
DRAW point MAPPING x AS x FROM {{ ref('fct_orders') }}
3699+
3700+
--------------------------------------------------------------------------------
3701+
3702+
(query
3703+
(visualise_statement
3704+
(visualise_keyword)
3705+
(viz_clause
3706+
(draw_clause
3707+
(geom_type)
3708+
(mapping_clause
3709+
(mapping_list
3710+
(mapping_element
3711+
(explicit_mapping
3712+
value: (mapping_value
3713+
(column_reference
3714+
(identifier
3715+
(bare_identifier))))
3716+
name: (aesthetic_name))))
3717+
layer_source: (jinja_template))))))

0 commit comments

Comments
 (0)