Skip to content

Commit e7f9c6e

Browse files
teunbrandclaude
andauthored
Allow arbitrary SQL statements (#364)
* Allow arbitrary SQL setup statements (INSTALL, LOAD, SET, etc.) Relax the grammar's other_sql_statement rule to accept any non-delimiter tokens, so statements like INSTALL/LOAD/SET/ATTACH parse without error. Execute these setup statements before the main query in the pipeline. Flip DDL detection in DuckDB and SQLite readers to a returns_rows whitelist, so unknown statement types are handled gracefully. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * add news item * candles in pentagram shape for clippy * tailor helper function name better * Fix some discrepancies with leading FROM. * share the 'returns rows' check --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent b983904 commit e7f9c6e

8 files changed

Lines changed: 206 additions & 40 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ shapes (#368)
2424
- Reverted an earlier decision to materialize CTEs and the global query in Rust
2525
before registering them back to the backend. We now keep the data purely on the
2626
backend until the layer query as was always intended (#363)
27+
- Relieved some grammatical constraints on the SQL-portion before the VISUALISE
28+
portion (#364).
2729
- Simplified internal approach to DataFrame with DuckDB reader (#365)
2830
- Moved the CLI to its own module rather than be part of the main crate (#379)
2931
- Restructured CLAUDE.md to better deal with the rising complexity of the project (#382)

src/execute/cte.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ pub fn transform_global_sql(
230230

231231
if let Some(select_sql) = select_sql {
232232
Some(transform_cte_references(&select_sql, materialized_ctes))
233-
} else if has_executable_sql(source_tree) {
233+
} else if does_consume_cte(source_tree) {
234234
// Non-SELECT executable SQL (CREATE, INSERT, UPDATE, DELETE)
235235
// OR VISUALISE FROM (which injects SELECT * FROM <source>)
236236
// Extract SQL (with injection if VISUALISE FROM) and transform CTE references
@@ -248,7 +248,7 @@ pub fn transform_global_sql(
248248
/// This handles cases like `WITH a AS (...), b AS (...) VISUALISE` where the WITH
249249
/// clause has no trailing SELECT - these CTEs are still extracted for layer use
250250
/// but shouldn't be executed as global data.
251-
pub fn has_executable_sql(source_tree: &SourceTree) -> bool {
251+
pub fn does_consume_cte(source_tree: &SourceTree) -> bool {
252252
let root = source_tree.root();
253253

254254
// Check for direct executable statements (SELECT, CREATE, INSERT, UPDATE,

src/execute/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -949,6 +949,11 @@ pub fn prepare_data_with_reader(query: &str, reader: &dyn Reader) -> Result<Prep
949949
));
950950
}
951951

952+
// Execute setup statements (INSTALL, LOAD, SET, etc.) before the main query
953+
for stmt in source_tree.find_texts(&root, "(sql_statement (other_sql_statement) @stmt)") {
954+
execute_query(&stmt)?;
955+
}
956+
952957
// Extract CTE definitions from the source tree (in declaration order)
953958
let ctes = cte::extract_ctes(&source_tree);
954959

src/reader/duckdb.rs

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -211,21 +211,10 @@ impl Reader for DuckDBReader {
211211
// Rewrite ggsql:name → __ggsql_data_name__ in SQL
212212
let sql = super::data::rewrite_namespaced_sql(sql)?;
213213

214-
// Check if this is a DDL statement (CREATE, DROP, INSERT, UPDATE, DELETE, ALTER)
215-
// DDL statements don't return rows, so we handle them specially
216-
let trimmed = sql.trim().to_uppercase();
217-
let is_ddl = trimmed.starts_with("CREATE ")
218-
|| trimmed.starts_with("DROP ")
219-
|| trimmed.starts_with("INSERT ")
220-
|| trimmed.starts_with("UPDATE ")
221-
|| trimmed.starts_with("DELETE ")
222-
|| trimmed.starts_with("ALTER ");
223-
224-
if is_ddl {
225-
// For DDL, just execute and return an empty DataFrame
214+
if !super::returns_rows(&sql) {
226215
self.conn
227216
.execute(&sql, params![])
228-
.map_err(|e| GgsqlError::ReaderError(format!("Failed to execute DDL: {}", e)))?;
217+
.map_err(|e| GgsqlError::ReaderError(format!("Failed to execute SQL: {}", e)))?;
229218

230219
return Ok(DataFrame::empty());
231220
}

src/reader/mod.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,19 @@ pub(crate) fn validate_table_name(name: &str) -> Result<()> {
345345
Ok(())
346346
}
347347

348+
/// Does the SQL statement return rows?
349+
///
350+
/// Looks at the first keyword to decide: `SELECT`, `WITH`, `FROM`,
351+
/// `DESCRIBE`, `SHOW` and `EXPLAIN` produce result sets; everything else
352+
/// (DDL, DML) does not.
353+
pub(crate) fn returns_rows(sql: &str) -> bool {
354+
let first_word = sql.split_whitespace().next().unwrap_or("");
355+
matches!(
356+
first_word.to_ascii_uppercase().as_str(),
357+
"SELECT" | "WITH" | "DESCRIBE" | "SHOW" | "EXPLAIN" | "FROM"
358+
)
359+
}
360+
348361
// ============================================================================
349362
// Spec - Result of reader.execute()
350363
// ============================================================================

src/reader/sqlite.rs

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -362,19 +362,10 @@ impl Reader for SqliteReader {
362362
// Rewrite ggsql:name → __ggsql_data_name__ in SQL
363363
let sql = super::data::rewrite_namespaced_sql(sql)?;
364364

365-
// Check if this is a DDL statement
366-
let trimmed = sql.trim().to_uppercase();
367-
let is_ddl = trimmed.starts_with("CREATE ")
368-
|| trimmed.starts_with("DROP ")
369-
|| trimmed.starts_with("INSERT ")
370-
|| trimmed.starts_with("UPDATE ")
371-
|| trimmed.starts_with("DELETE ")
372-
|| trimmed.starts_with("ALTER ");
373-
374-
if is_ddl {
365+
if !super::returns_rows(&sql) {
375366
self.conn
376367
.execute_batch(&sql)
377-
.map_err(|e| GgsqlError::ReaderError(format!("Failed to execute DDL: {}", e)))?;
368+
.map_err(|e| GgsqlError::ReaderError(format!("Failed to execute SQL: {}", e)))?;
378369
return Ok(DataFrame::empty());
379370
}
380371

tree-sitter-ggsql/grammar.js

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -175,19 +175,14 @@ module.exports = grammar({
175175
))
176176
)),
177177

178-
// Other SQL statements - DO NOT match if starts with keywords we handle
179-
// explicitly (WITH, SELECT, CREATE, INSERT, UPDATE, DELETE, VISUALISE, FROM).
180-
other_sql_statement: $ => {
181-
const exclude_pattern = /[^\s;(),'"WwSsCcIiUuDdVvFf]+/;
182-
return prec(-1, repeat1(choice(
183-
$.non_from_sql_keyword,
184-
token(exclude_pattern), // Tokens not starting with excluded letters
185-
$.string,
186-
$.number,
187-
$.subquery,
188-
',', '(', ')', '*', '.', '='
189-
)));
190-
},
178+
other_sql_statement: $ => prec(-1, repeat1(choice(
179+
$.non_from_sql_keyword,
180+
token(/[^\s;(),'"]+/),
181+
$.string,
182+
$.number,
183+
$.subquery,
184+
',', '(', ')', '*', '.', '='
185+
))),
191186

192187
// Subquery in parentheses - fully recursive, can contain any SQL
193188
// Prioritizes WITH/SELECT statements, falls back to token-by-token parsing
@@ -440,7 +435,7 @@ module.exports = grammar({
440435
)),
441436

442437
from_clause: $ => prec.right(1, seq(
443-
caseInsensitive('FROM'),
438+
token(prec(1, caseInsensitive('FROM'))),
444439
$.table_ref,
445440
repeat(seq(',', $.table_ref))
446441
)),

tree-sitter-ggsql/test/corpus/basic.txt

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2780,3 +2780,174 @@ FACET region SETTING scales => 'free_y', ncol => 3
27802780
(bare_identifier)))
27812781
value: (parameter_value
27822782
(number))))))))
2783+
2784+
================================================================================
2785+
Bare FROM (DuckDB-style)
2786+
================================================================================
2787+
2788+
FROM mtcars
2789+
VISUALISE DRAW point MAPPING mpg AS x, hp AS y
2790+
2791+
--------------------------------------------------------------------------------
2792+
2793+
(query
2794+
(sql_portion
2795+
(sql_statement
2796+
(from_statement
2797+
(from_clause
2798+
(table_ref
2799+
(qualified_name
2800+
(identifier
2801+
(bare_identifier))))))))
2802+
(visualise_statement
2803+
(visualise_keyword)
2804+
(viz_clause
2805+
(draw_clause
2806+
(geom_type)
2807+
(mapping_clause
2808+
(mapping_list
2809+
(mapping_element
2810+
(explicit_mapping
2811+
(mapping_value
2812+
(column_reference
2813+
(identifier
2814+
(bare_identifier))))
2815+
(aesthetic_name)))
2816+
(mapping_element
2817+
(explicit_mapping
2818+
(mapping_value
2819+
(column_reference
2820+
(identifier
2821+
(bare_identifier))))
2822+
(aesthetic_name)))))))))
2823+
2824+
================================================================================
2825+
Bare FROM with WHERE clause
2826+
================================================================================
2827+
2828+
FROM sales WHERE year = 2024
2829+
VISUALISE DRAW point MAPPING x AS x, y AS y
2830+
2831+
--------------------------------------------------------------------------------
2832+
2833+
(query
2834+
(sql_portion
2835+
(sql_statement
2836+
(from_statement
2837+
(from_clause
2838+
(table_ref
2839+
(qualified_name
2840+
(identifier
2841+
(bare_identifier)))))
2842+
(non_from_sql_keyword)
2843+
(identifier
2844+
(bare_identifier))
2845+
(number))))
2846+
(visualise_statement
2847+
(visualise_keyword)
2848+
(viz_clause
2849+
(draw_clause
2850+
(geom_type)
2851+
(mapping_clause
2852+
(mapping_list
2853+
(mapping_element
2854+
(explicit_mapping
2855+
(mapping_value
2856+
(column_reference
2857+
(identifier
2858+
(bare_identifier))))
2859+
(aesthetic_name)))
2860+
(mapping_element
2861+
(explicit_mapping
2862+
(mapping_value
2863+
(column_reference
2864+
(identifier
2865+
(bare_identifier))))
2866+
(aesthetic_name)))))))))
2867+
2868+
================================================================================
2869+
Arbitrary SQL setup statements
2870+
================================================================================
2871+
2872+
INSTALL httpfs; LOAD httpfs;
2873+
SELECT * FROM data VISUALISE DRAW point MAPPING x AS x, y AS y
2874+
2875+
--------------------------------------------------------------------------------
2876+
2877+
(query
2878+
(sql_portion
2879+
(sql_statement
2880+
(other_sql_statement))
2881+
(sql_statement
2882+
(other_sql_statement))
2883+
(sql_statement
2884+
(select_statement
2885+
(select_body
2886+
(from_clause
2887+
(table_ref
2888+
(qualified_name
2889+
(identifier
2890+
(bare_identifier)))))))))
2891+
(visualise_statement
2892+
(visualise_keyword)
2893+
(viz_clause
2894+
(draw_clause
2895+
(geom_type)
2896+
(mapping_clause
2897+
(mapping_list
2898+
(mapping_element
2899+
(explicit_mapping
2900+
(mapping_value
2901+
(column_reference
2902+
(identifier
2903+
(bare_identifier))))
2904+
(aesthetic_name)))
2905+
(mapping_element
2906+
(explicit_mapping
2907+
(mapping_value
2908+
(column_reference
2909+
(identifier
2910+
(bare_identifier))))
2911+
(aesthetic_name)))))))))
2912+
2913+
================================================================================
2914+
Arbitrary SQL setup with bare FROM
2915+
================================================================================
2916+
2917+
INSTALL httpfs;
2918+
FROM data VISUALISE DRAW point MAPPING x AS x, y AS y
2919+
2920+
--------------------------------------------------------------------------------
2921+
2922+
(query
2923+
(sql_portion
2924+
(sql_statement
2925+
(other_sql_statement))
2926+
(sql_statement
2927+
(from_statement
2928+
(from_clause
2929+
(table_ref
2930+
(qualified_name
2931+
(identifier
2932+
(bare_identifier))))))))
2933+
(visualise_statement
2934+
(visualise_keyword)
2935+
(viz_clause
2936+
(draw_clause
2937+
(geom_type)
2938+
(mapping_clause
2939+
(mapping_list
2940+
(mapping_element
2941+
(explicit_mapping
2942+
(mapping_value
2943+
(column_reference
2944+
(identifier
2945+
(bare_identifier))))
2946+
(aesthetic_name)))
2947+
(mapping_element
2948+
(explicit_mapping
2949+
(mapping_value
2950+
(column_reference
2951+
(identifier
2952+
(bare_identifier))))
2953+
(aesthetic_name)))))))))

0 commit comments

Comments
 (0)