Skip to content

Commit 7aa76ae

Browse files
committed
Merge remote-tracking branch 'upstream/main' into pr2-hybrid-reader
# Conflicts: # CHANGELOG.md
2 parents a30cda2 + 44eb17d commit 7aa76ae

7 files changed

Lines changed: 609 additions & 164 deletions

File tree

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@
44

55
- New `HybridReader` that composes any primary `Reader` with an in-process `DuckDBReader` for staging. `register()` writes to staging; `execute_sql` routes queries that reference registered names to staging and everything else to the primary. Available behind the existing `duckdb` feature.
66

7+
### Fixed
8+
9+
- Side effects like `CREATE TEMP TABLE` before the `VISUALISE` statement are now
10+
separated from directly feeding into the visualisation data (#415)
11+
- Fixed bug where panel axes were unintentionally anchored to zero when using
12+
`FACET ... SETTING free => 'x'/'y'` (#410).
13+
- Fixed bug where faceted data were matched to the incorrect panels (#409)
14+
715
## 0.3.1 - 2026-04-30
816

917
### Fixed

src/execute/cte.rs

Lines changed: 60 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -210,35 +210,79 @@ pub fn split_with_query(source_tree: &SourceTree) -> Option<(String, String)> {
210210
Some((cte_prefix, trailing))
211211
}
212212

213-
/// Transform global SQL for execution with temp tables
213+
/// Transform global SQL for execution with temp tables.
214214
///
215-
/// If the SQL has a WITH clause followed by SELECT, extracts just the SELECT
216-
/// portion and transforms CTE references to temp table names.
217-
/// For SQL without WITH clause, just transforms any CTE references.
215+
/// Returns statements to execute directly as side effects (CREATE, INSERT, …)
216+
/// and an optional query whose result should be wrapped as the global temp
217+
/// table.
218218
pub fn transform_global_sql(
219219
source_tree: &SourceTree,
220220
materialized_ctes: &HashSet<String>,
221-
) -> Option<String> {
221+
) -> (Vec<String>, Option<String>) {
222+
// Collect side-effect statements (CREATE, INSERT, UPDATE, DELETE) that
223+
// need to run before the main query. These appear alongside a trailing
224+
// SELECT or VISUALISE FROM.
225+
//
226+
// Only structured DML is handled here — other_sql_statement nodes
227+
// (INSTALL, LOAD, SET, …) are pre-executed in prepare_data_with_reader.
228+
let root = source_tree.root();
229+
230+
let side_effect_stmts = r#"
231+
(sql_statement
232+
[(create_statement)
233+
(insert_statement)
234+
(update_statement)
235+
(delete_statement)] @stmt)
236+
"#;
237+
let side_effects: Vec<String> = source_tree
238+
.find_texts(&root, side_effect_stmts)
239+
.into_iter()
240+
.map(|s| transform_cte_references(s.trim(), materialized_ctes))
241+
.filter(|s| !s.is_empty())
242+
.collect();
243+
222244
// Try to extract trailing SELECT (WITH...SELECT or direct SELECT)
223245
let select_sql = split_with_query(source_tree)
224246
.map(|(_, select)| select)
225247
.or_else(|| {
226248
// Fallback: direct SELECT statement (no WITH clause)
227-
let root = source_tree.root();
228249
source_tree.find_text(&root, "(sql_statement (select_statement) @select)")
229250
});
230251

231252
if let Some(select_sql) = select_sql {
232-
Some(transform_cte_references(&select_sql, materialized_ctes))
233-
} else if does_consume_cte(source_tree) {
234-
// Non-SELECT executable SQL (CREATE, INSERT, UPDATE, DELETE)
235-
// OR VISUALISE FROM (which injects SELECT * FROM <source>)
236-
// Extract SQL (with injection if VISUALISE FROM) and transform CTE references
237-
let sql = source_tree.extract_sql()?;
238-
Some(transform_cte_references(&sql, materialized_ctes))
253+
return (
254+
side_effects,
255+
Some(transform_cte_references(&select_sql, materialized_ctes)),
256+
);
257+
}
258+
259+
if !has_executable_sql(source_tree) {
260+
return (vec![], None);
261+
}
262+
263+
// We have non-SELECT executable SQL and/or VISUALISE FROM.
264+
// Side-effects run directly, VISUALISE FROM becomes the queryable part.
265+
// A bare WITH clause without a trailing statement is not executable on
266+
// its own (its CTEs are already materialized separately).
267+
let viz_from_query = source_tree
268+
.find_text(
269+
&root,
270+
r#"(visualise_statement (from_clause (table_ref) @table))"#,
271+
)
272+
.map(|table| {
273+
let q = format!("SELECT * FROM {}", table);
274+
transform_cte_references(&q, materialized_ctes)
275+
});
276+
277+
if !side_effects.is_empty() || viz_from_query.is_some() {
278+
(side_effects, viz_from_query)
239279
} else {
240-
// No executable SQL (just CTEs)
241-
None
280+
// has_executable_sql was true but we found no specific statements or
281+
// VISUALISE FROM — fall back to extract_sql as the query.
282+
let query = source_tree
283+
.extract_sql()
284+
.map(|s| transform_cte_references(&s, materialized_ctes));
285+
(vec![], query)
242286
}
243287
}
244288

@@ -248,7 +292,7 @@ pub fn transform_global_sql(
248292
/// This handles cases like `WITH a AS (...), b AS (...) VISUALISE` where the WITH
249293
/// clause has no trailing SELECT - these CTEs are still extracted for layer use
250294
/// but shouldn't be executed as global data.
251-
pub fn does_consume_cte(source_tree: &SourceTree) -> bool {
295+
pub fn has_executable_sql(source_tree: &SourceTree) -> bool {
252296
let root = source_tree.root();
253297

254298
// Check for direct executable statements (SELECT, CREATE, INSERT, UPDATE,

src/execute/mod.rs

Lines changed: 77 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,7 +1043,9 @@ pub fn prepare_data_with_reader(query: &str, reader: &dyn Reader) -> Result<Prep
10431043
));
10441044
}
10451045

1046-
// Execute setup statements (INSTALL, LOAD, SET, etc.) before the main query
1046+
// Execute setup statements (INSTALL, LOAD, SET, etc.) before the main query.
1047+
// Structured DML (CREATE, INSERT, UPDATE, DELETE) is handled separately as
1048+
// side-effects in cte::transform_global_sql.
10471049
for stmt in source_tree.find_texts(&root, "(sql_statement (other_sql_statement) @stmt)") {
10481050
execute_query(&stmt)?;
10491051
}
@@ -1063,16 +1065,21 @@ pub fn prepare_data_with_reader(query: &str, reader: &dyn Reader) -> Result<Prep
10631065
// Execute global SQL if present
10641066
// If there's a WITH clause, extract just the trailing SELECT and transform CTE references.
10651067
// The global result is stored as a temp table so filtered layers can query it efficiently.
1066-
// Track whether we actually create the temp table (depends on transform_global_sql succeeding)
10671068
let mut has_global_table = false;
10681069
if sql_part.is_some() {
1069-
if let Some(transformed_sql) = cte::transform_global_sql(&source_tree, &materialized_ctes) {
1070+
let (side_effects, query) = cte::transform_global_sql(&source_tree, &materialized_ctes);
1071+
1072+
for stmt in &side_effects {
1073+
execute_query(stmt)?;
1074+
}
1075+
1076+
if let Some(query) = query {
10701077
// Materialize global result as a temp table directly on the backend
10711078
// (no roundtrip through Rust).
10721079
let statements = reader.dialect().create_or_replace_temp_table_sql(
10731080
&naming::global_table(),
10741081
&[],
1075-
&transformed_sql,
1082+
&query,
10761083
);
10771084
for stmt in &statements {
10781085
execute_query(stmt)?;
@@ -1895,6 +1902,72 @@ mod tests {
18951902
assert_eq!(result.data.get(layer1_key).unwrap().height(), 3);
18961903
}
18971904

1905+
#[cfg(feature = "duckdb")]
1906+
#[test]
1907+
fn test_visualise_from_after_create() {
1908+
let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
1909+
1910+
let query = r#"
1911+
CREATE TEMP TABLE data(x, y) AS (VALUES
1912+
('A', 5),
1913+
('B', 2),
1914+
('C', 4),
1915+
('D', 7),
1916+
('E', 6)
1917+
)
1918+
VISUALISE x, y FROM data
1919+
DRAW area
1920+
"#;
1921+
1922+
let result = prepare_data_with_reader(query, &reader).unwrap();
1923+
let key = result.specs[0].layers[0]
1924+
.data_key
1925+
.as_ref()
1926+
.expect("Layer should have data_key");
1927+
assert_eq!(result.data.get(key).unwrap().height(), 5);
1928+
}
1929+
1930+
#[cfg(feature = "duckdb")]
1931+
#[test]
1932+
fn test_visualise_from_after_create_and_insert() {
1933+
let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
1934+
1935+
let query = r#"
1936+
CREATE TEMP TABLE data(x INTEGER, y INTEGER);
1937+
INSERT INTO data VALUES (1, 10), (2, 20), (3, 30);
1938+
VISUALISE x, y FROM data
1939+
DRAW point
1940+
"#;
1941+
1942+
let result = prepare_data_with_reader(query, &reader).unwrap();
1943+
let key = result.specs[0].layers[0]
1944+
.data_key
1945+
.as_ref()
1946+
.expect("Layer should have data_key");
1947+
assert_eq!(result.data.get(key).unwrap().height(), 3);
1948+
}
1949+
1950+
#[cfg(feature = "duckdb")]
1951+
#[test]
1952+
fn test_select_after_create_and_insert() {
1953+
let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
1954+
1955+
let query = r#"
1956+
CREATE TEMP TABLE data(x INTEGER, y INTEGER);
1957+
INSERT INTO data VALUES (1, 10), (2, 20), (3, 30);
1958+
SELECT * FROM data
1959+
VISUALISE x, y
1960+
DRAW point
1961+
"#;
1962+
1963+
let result = prepare_data_with_reader(query, &reader).unwrap();
1964+
let key = result.specs[0].layers[0]
1965+
.data_key
1966+
.as_ref()
1967+
.expect("Layer should have data_key");
1968+
assert_eq!(result.data.get(key).unwrap().height(), 3);
1969+
}
1970+
18981971
/// Test that literal mappings survive stat transforms (e.g., histogram grouping).
18991972
///
19001973
/// This tests the fix for issue #129 where literal aesthetic columns like

src/writer/vegalite/encoding.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -924,11 +924,8 @@ fn build_column_encoding(
924924

925925
// Position scales don't include zero by default — but only when we set
926926
// an explicit domain. With free facet scales (no domain), VL computes
927-
// the domain from data values. Setting zero:false in that case can exclude
928-
// 0 from the domain, breaking charts with pre-computed stacking (y2/theta2
929-
// starts at 0). Let VL's defaults handle it instead.
930-
let is_free = is_position_free_for_aesthetic(aesthetic, ctx.free_scales);
931-
if aesthetic_ctx.is_primary_internal(aesthetic) && !is_free {
927+
// the domain from data values.
928+
if aesthetic_ctx.is_primary_internal(aesthetic) {
932929
scale_obj.insert("zero".to_string(), json!(false));
933930
}
934931

0 commit comments

Comments
 (0)