Skip to content

Commit 5912bb7

Browse files
teunbrandclaude
andauthored
Fix for side-effects seeping into data query (#419)
* Fix CREATE + VISUALISE FROM producing invalid double-wrapped SQL transform_global_sql concatenated side-effect SQL (CREATE, INSERT, …) with the VISUALISE FROM injection, then the caller wrapped the whole thing in another CREATE TABLE AS, producing invalid SQL like: CREATE TABLE __ggsql_global__ AS CREATE TEMP TABLE data … Split the return into side-effect statements (executed directly) and the queryable part (wrapped as the global temp table). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * add news bullet * cargo fmt * Fix grammar: INSERT/UPDATE/DELETE parsed as other_sql_statement The tree-sitter lexer was misclassifying INSERT, UPDATE, and DELETE keywords as bare_identifier tokens, causing them to fall through to other_sql_statement instead of their specific statement rules. This happened because keyword prefixes (e.g. IN for INSERT) derailed the lexer's specific keyword path. Fix: wrap the three leading keywords in token(prec(1, ...)) so they get dedicated high-priority lexer tokens, and downgrade other_sql_statement's catch-all regex from token() to a bare regex. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Add cross-references between the two preamble execution paths Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Rename does_consume_cte to has_executable_sql Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Execute side-effect statements before trailing SELECT transform_global_sql returned empty side-effects when a SELECT was present, so CREATE/INSERT preambles were skipped. Hoist side-effect extraction so it runs regardless of the query type. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 3e9973f commit 5912bb7

5 files changed

Lines changed: 269 additions & 24 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
### Fixed
44

5+
- Side effects like `CREATE TEMP TABLE` before the `VISUALISE` statement are now
6+
separated from directly feeding into the visualisation data (#415)
57
- Fixed bug where panel axes were unintentionally anchored to zero when using
68
`FACET ... SETTING free => 'x'/'y'` (#410).
79

src/execute/cte.rs

Lines changed: 60 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -210,35 +210,79 @@ pub fn split_with_query(source_tree: &SourceTree) -> Option<(String, String)> {
210210
Some((cte_prefix, trailing))
211211
}
212212

213-
/// Transform global SQL for execution with temp tables
213+
/// Transform global SQL for execution with temp tables.
214214
///
215-
/// If the SQL has a WITH clause followed by SELECT, extracts just the SELECT
216-
/// portion and transforms CTE references to temp table names.
217-
/// For SQL without WITH clause, just transforms any CTE references.
215+
/// Returns statements to execute directly as side effects (CREATE, INSERT, …)
216+
/// and an optional query whose result should be wrapped as the global temp
217+
/// table.
218218
pub fn transform_global_sql(
219219
source_tree: &SourceTree,
220220
materialized_ctes: &HashSet<String>,
221-
) -> Option<String> {
221+
) -> (Vec<String>, Option<String>) {
222+
// Collect side-effect statements (CREATE, INSERT, UPDATE, DELETE) that
223+
// need to run before the main query. These appear alongside a trailing
224+
// SELECT or VISUALISE FROM.
225+
//
226+
// Only structured DML is handled here — other_sql_statement nodes
227+
// (INSTALL, LOAD, SET, …) are pre-executed in prepare_data_with_reader.
228+
let root = source_tree.root();
229+
230+
let side_effect_stmts = r#"
231+
(sql_statement
232+
[(create_statement)
233+
(insert_statement)
234+
(update_statement)
235+
(delete_statement)] @stmt)
236+
"#;
237+
let side_effects: Vec<String> = source_tree
238+
.find_texts(&root, side_effect_stmts)
239+
.into_iter()
240+
.map(|s| transform_cte_references(s.trim(), materialized_ctes))
241+
.filter(|s| !s.is_empty())
242+
.collect();
243+
222244
// Try to extract trailing SELECT (WITH...SELECT or direct SELECT)
223245
let select_sql = split_with_query(source_tree)
224246
.map(|(_, select)| select)
225247
.or_else(|| {
226248
// Fallback: direct SELECT statement (no WITH clause)
227-
let root = source_tree.root();
228249
source_tree.find_text(&root, "(sql_statement (select_statement) @select)")
229250
});
230251

231252
if let Some(select_sql) = select_sql {
232-
Some(transform_cte_references(&select_sql, materialized_ctes))
233-
} else if does_consume_cte(source_tree) {
234-
// Non-SELECT executable SQL (CREATE, INSERT, UPDATE, DELETE)
235-
// OR VISUALISE FROM (which injects SELECT * FROM <source>)
236-
// Extract SQL (with injection if VISUALISE FROM) and transform CTE references
237-
let sql = source_tree.extract_sql()?;
238-
Some(transform_cte_references(&sql, materialized_ctes))
253+
return (
254+
side_effects,
255+
Some(transform_cte_references(&select_sql, materialized_ctes)),
256+
);
257+
}
258+
259+
if !has_executable_sql(source_tree) {
260+
return (vec![], None);
261+
}
262+
263+
// We have non-SELECT executable SQL and/or VISUALISE FROM.
264+
// Side-effects run directly, VISUALISE FROM becomes the queryable part.
265+
// A bare WITH clause without a trailing statement is not executable on
266+
// its own (its CTEs are already materialized separately).
267+
let viz_from_query = source_tree
268+
.find_text(
269+
&root,
270+
r#"(visualise_statement (from_clause (table_ref) @table))"#,
271+
)
272+
.map(|table| {
273+
let q = format!("SELECT * FROM {}", table);
274+
transform_cte_references(&q, materialized_ctes)
275+
});
276+
277+
if !side_effects.is_empty() || viz_from_query.is_some() {
278+
(side_effects, viz_from_query)
239279
} else {
240-
// No executable SQL (just CTEs)
241-
None
280+
// has_executable_sql was true but we found no specific statements or
281+
// VISUALISE FROM — fall back to extract_sql as the query.
282+
let query = source_tree
283+
.extract_sql()
284+
.map(|s| transform_cte_references(&s, materialized_ctes));
285+
(vec![], query)
242286
}
243287
}
244288

@@ -248,7 +292,7 @@ pub fn transform_global_sql(
248292
/// This handles cases like `WITH a AS (...), b AS (...) VISUALISE` where the WITH
249293
/// clause has no trailing SELECT - these CTEs are still extracted for layer use
250294
/// but shouldn't be executed as global data.
251-
pub fn does_consume_cte(source_tree: &SourceTree) -> bool {
295+
pub fn has_executable_sql(source_tree: &SourceTree) -> bool {
252296
let root = source_tree.root();
253297

254298
// Check for direct executable statements (SELECT, CREATE, INSERT, UPDATE,

src/execute/mod.rs

Lines changed: 77 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,7 +1043,9 @@ pub fn prepare_data_with_reader(query: &str, reader: &dyn Reader) -> Result<Prep
10431043
));
10441044
}
10451045

1046-
// Execute setup statements (INSTALL, LOAD, SET, etc.) before the main query
1046+
// Execute setup statements (INSTALL, LOAD, SET, etc.) before the main query.
1047+
// Structured DML (CREATE, INSERT, UPDATE, DELETE) is handled separately as
1048+
// side-effects in cte::transform_global_sql.
10471049
for stmt in source_tree.find_texts(&root, "(sql_statement (other_sql_statement) @stmt)") {
10481050
execute_query(&stmt)?;
10491051
}
@@ -1063,16 +1065,21 @@ pub fn prepare_data_with_reader(query: &str, reader: &dyn Reader) -> Result<Prep
10631065
// Execute global SQL if present
10641066
// If there's a WITH clause, extract just the trailing SELECT and transform CTE references.
10651067
// The global result is stored as a temp table so filtered layers can query it efficiently.
1066-
// Track whether we actually create the temp table (depends on transform_global_sql succeeding)
10671068
let mut has_global_table = false;
10681069
if sql_part.is_some() {
1069-
if let Some(transformed_sql) = cte::transform_global_sql(&source_tree, &materialized_ctes) {
1070+
let (side_effects, query) = cte::transform_global_sql(&source_tree, &materialized_ctes);
1071+
1072+
for stmt in &side_effects {
1073+
execute_query(stmt)?;
1074+
}
1075+
1076+
if let Some(query) = query {
10701077
// Materialize global result as a temp table directly on the backend
10711078
// (no roundtrip through Rust).
10721079
let statements = reader.dialect().create_or_replace_temp_table_sql(
10731080
&naming::global_table(),
10741081
&[],
1075-
&transformed_sql,
1082+
&query,
10761083
);
10771084
for stmt in &statements {
10781085
execute_query(stmt)?;
@@ -1895,6 +1902,72 @@ mod tests {
18951902
assert_eq!(result.data.get(layer1_key).unwrap().height(), 3);
18961903
}
18971904

1905+
#[cfg(feature = "duckdb")]
1906+
#[test]
1907+
fn test_visualise_from_after_create() {
1908+
let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
1909+
1910+
let query = r#"
1911+
CREATE TEMP TABLE data(x, y) AS (VALUES
1912+
('A', 5),
1913+
('B', 2),
1914+
('C', 4),
1915+
('D', 7),
1916+
('E', 6)
1917+
)
1918+
VISUALISE x, y FROM data
1919+
DRAW area
1920+
"#;
1921+
1922+
let result = prepare_data_with_reader(query, &reader).unwrap();
1923+
let key = result.specs[0].layers[0]
1924+
.data_key
1925+
.as_ref()
1926+
.expect("Layer should have data_key");
1927+
assert_eq!(result.data.get(key).unwrap().height(), 5);
1928+
}
1929+
1930+
#[cfg(feature = "duckdb")]
1931+
#[test]
1932+
fn test_visualise_from_after_create_and_insert() {
1933+
let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
1934+
1935+
let query = r#"
1936+
CREATE TEMP TABLE data(x INTEGER, y INTEGER);
1937+
INSERT INTO data VALUES (1, 10), (2, 20), (3, 30);
1938+
VISUALISE x, y FROM data
1939+
DRAW point
1940+
"#;
1941+
1942+
let result = prepare_data_with_reader(query, &reader).unwrap();
1943+
let key = result.specs[0].layers[0]
1944+
.data_key
1945+
.as_ref()
1946+
.expect("Layer should have data_key");
1947+
assert_eq!(result.data.get(key).unwrap().height(), 3);
1948+
}
1949+
1950+
#[cfg(feature = "duckdb")]
1951+
#[test]
1952+
fn test_select_after_create_and_insert() {
1953+
let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
1954+
1955+
let query = r#"
1956+
CREATE TEMP TABLE data(x INTEGER, y INTEGER);
1957+
INSERT INTO data VALUES (1, 10), (2, 20), (3, 30);
1958+
SELECT * FROM data
1959+
VISUALISE x, y
1960+
DRAW point
1961+
"#;
1962+
1963+
let result = prepare_data_with_reader(query, &reader).unwrap();
1964+
let key = result.specs[0].layers[0]
1965+
.data_key
1966+
.as_ref()
1967+
.expect("Layer should have data_key");
1968+
assert_eq!(result.data.get(key).unwrap().height(), 3);
1969+
}
1970+
18981971
/// Test that literal mappings survive stat transforms (e.g., histogram grouping).
18991972
///
19001973
/// This tests the fix for issue #129 where literal aesthetic columns like

tree-sitter-ggsql/grammar.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ module.exports = grammar({
135135

136136
// INSERT statement
137137
insert_statement: $ => prec.right(seq(
138-
caseInsensitive('INSERT'),
138+
token(prec(1, caseInsensitive('INSERT'))),
139139
repeat1(choice(
140140
$.sql_keyword,
141141
$.identifier,
@@ -149,7 +149,7 @@ module.exports = grammar({
149149

150150
// UPDATE statement
151151
update_statement: $ => prec.right(seq(
152-
caseInsensitive('UPDATE'),
152+
token(prec(1, caseInsensitive('UPDATE'))),
153153
repeat1(choice(
154154
$.sql_keyword,
155155
$.identifier,
@@ -163,7 +163,7 @@ module.exports = grammar({
163163

164164
// DELETE statement
165165
delete_statement: $ => prec.right(seq(
166-
caseInsensitive('DELETE'),
166+
token(prec(1, caseInsensitive('DELETE'))),
167167
repeat1(choice(
168168
$.sql_keyword,
169169
$.identifier,
@@ -177,7 +177,7 @@ module.exports = grammar({
177177

178178
other_sql_statement: $ => prec(-1, repeat1(choice(
179179
$.non_from_sql_keyword,
180-
token(/[^\s;(),'"]+/),
180+
/[^\s;(),'"]+/,
181181
$.string,
182182
$.number,
183183
$.subquery,

tree-sitter-ggsql/test/corpus/basic.txt

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2865,6 +2865,132 @@ VISUALISE DRAW point MAPPING x AS x, y AS y
28652865
(bare_identifier))))
28662866
(aesthetic_name)))))))))
28672867

2868+
================================================================================
2869+
CREATE statement before VISUALISE FROM
2870+
================================================================================
2871+
2872+
CREATE TEMP TABLE data AS SELECT 1;
2873+
VISUALISE FROM data DRAW point
2874+
2875+
--------------------------------------------------------------------------------
2876+
2877+
(query
2878+
(sql_portion
2879+
(sql_statement
2880+
(create_statement
2881+
(select_statement
2882+
(select_body
2883+
(number))))))
2884+
(visualise_statement
2885+
(visualise_keyword)
2886+
(from_clause
2887+
(table_ref
2888+
(qualified_name
2889+
(identifier
2890+
(bare_identifier)))))
2891+
(viz_clause
2892+
(draw_clause
2893+
(geom_type)))))
2894+
2895+
================================================================================
2896+
INSERT statement before VISUALISE FROM
2897+
================================================================================
2898+
2899+
INSERT INTO data VALUES (1, 2);
2900+
VISUALISE FROM data DRAW point
2901+
2902+
--------------------------------------------------------------------------------
2903+
2904+
(query
2905+
(sql_portion
2906+
(sql_statement
2907+
(insert_statement)))
2908+
(visualise_statement
2909+
(visualise_keyword)
2910+
(from_clause
2911+
(table_ref
2912+
(qualified_name
2913+
(identifier
2914+
(bare_identifier)))))
2915+
(viz_clause
2916+
(draw_clause
2917+
(geom_type)))))
2918+
2919+
================================================================================
2920+
UPDATE statement before VISUALISE FROM
2921+
================================================================================
2922+
2923+
UPDATE data SET x = 1;
2924+
VISUALISE FROM data DRAW point
2925+
2926+
--------------------------------------------------------------------------------
2927+
2928+
(query
2929+
(sql_portion
2930+
(sql_statement
2931+
(update_statement)))
2932+
(visualise_statement
2933+
(visualise_keyword)
2934+
(from_clause
2935+
(table_ref
2936+
(qualified_name
2937+
(identifier
2938+
(bare_identifier)))))
2939+
(viz_clause
2940+
(draw_clause
2941+
(geom_type)))))
2942+
2943+
================================================================================
2944+
DELETE statement before VISUALISE FROM
2945+
================================================================================
2946+
2947+
DELETE FROM data WHERE x = 1;
2948+
VISUALISE FROM data DRAW point
2949+
2950+
--------------------------------------------------------------------------------
2951+
2952+
(query
2953+
(sql_portion
2954+
(sql_statement
2955+
(delete_statement)))
2956+
(visualise_statement
2957+
(visualise_keyword)
2958+
(from_clause
2959+
(table_ref
2960+
(qualified_name
2961+
(identifier
2962+
(bare_identifier)))))
2963+
(viz_clause
2964+
(draw_clause
2965+
(geom_type)))))
2966+
2967+
================================================================================
2968+
CREATE and INSERT before VISUALISE FROM
2969+
================================================================================
2970+
2971+
CREATE TABLE data (x INT);
2972+
INSERT INTO data VALUES (1);
2973+
VISUALISE FROM data DRAW point
2974+
2975+
--------------------------------------------------------------------------------
2976+
2977+
(query
2978+
(sql_portion
2979+
(sql_statement
2980+
(create_statement))
2981+
(sql_statement
2982+
(insert_statement)))
2983+
(visualise_statement
2984+
(visualise_keyword)
2985+
(from_clause
2986+
(table_ref
2987+
(qualified_name
2988+
(identifier
2989+
(bare_identifier)))))
2990+
(viz_clause
2991+
(draw_clause
2992+
(geom_type)))))
2993+
28682994
================================================================================
28692995
Arbitrary SQL setup statements
28702996
================================================================================

0 commit comments

Comments
 (0)