Skip to content

Commit a30cda2

Browse files
committed
fix(reader): register builtin datasets without DuckDB parquet extension
Decode embedded parquet bytes via Arrow and register through the `arrow` virtual table function instead of writing a temp file and calling `read_parquet`. The latter triggers DuckDB's autoloadable parquet extension, which fails in offline or network-restricted environments (observed as a flaky CI failure on `test_ribbon_transposed_vegalite_encoding`). Mirrors the loader path SqliteReader already uses.
1 parent 138ddee commit a30cda2

2 files changed

Lines changed: 24 additions & 24 deletions

File tree

src/reader/data.rs

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -62,44 +62,44 @@ pub fn builtin_parquet_bytes(name: &str) -> Option<&'static [u8]> {
6262

6363
/// Register any builtin datasets referenced in the SQL with a DuckDB connection.
6464
///
65-
/// Finds `ggsql:X` patterns in the SQL, writes the embedded parquet data to
66-
/// a temp file, and creates a table named `__ggsql_data_X__` in DuckDB.
67-
#[cfg(all(feature = "duckdb", feature = "builtin-data"))]
65+
/// Finds `ggsql:X` patterns in the SQL, decodes the embedded parquet bytes
66+
/// via Arrow, and registers the result as `__ggsql_data_X__` through DuckDB's
67+
/// `arrow` virtual table function (registered by `DuckDBReader`). This avoids
68+
/// DuckDB's autoloadable parquet extension, which fails in offline or
69+
/// network-restricted environments such as some CI runners.
70+
#[cfg(all(feature = "duckdb", feature = "builtin-data", feature = "parquet"))]
6871
pub fn register_builtin_datasets_duckdb(
6972
sql: &str,
7073
conn: &duckdb::Connection,
7174
) -> Result<(), GgsqlError> {
72-
use std::{env, fs};
75+
use duckdb::vtab::arrow::arrow_recordbatch_to_query_params;
7376

7477
let dataset_names = extract_builtin_dataset_names(sql)?;
7578
for name in dataset_names {
76-
let Some(parquet_bytes) = builtin_parquet_bytes(&name) else {
79+
if !is_known_builtin(&name) {
7780
continue;
78-
};
81+
}
7982

8083
let table_name = naming::builtin_data_table(&name);
8184

82-
// Write parquet to temp file for DuckDB's read_parquet
83-
let mut tmp_path = env::temp_dir();
84-
tmp_path.push(format!("{}.parquet", name));
85-
if !tmp_path.exists() {
86-
fs::write(&tmp_path, parquet_bytes).map_err(|e| {
87-
GgsqlError::ReaderError(format!(
88-
"Failed to write builtin dataset '{}' to {}: {}",
89-
name,
90-
tmp_path.display(),
91-
e
92-
))
93-
})?;
85+
let already_registered: i64 = conn
86+
.query_row(
87+
"SELECT COUNT(*) FROM information_schema.tables WHERE table_name = ?",
88+
duckdb::params![&table_name],
89+
|row| row.get(0),
90+
)
91+
.unwrap_or(0);
92+
if already_registered > 0 {
93+
continue;
9494
}
9595

96+
let dataframe = load_builtin_dataframe(&name)?;
97+
let params = arrow_recordbatch_to_query_params(dataframe.inner().clone());
9698
let create_sql = format!(
97-
"CREATE TABLE IF NOT EXISTS {} AS SELECT * FROM read_parquet('{}')",
98-
naming::quote_ident(&table_name),
99-
tmp_path.display()
99+
"CREATE TABLE {} AS SELECT * FROM arrow(?, ?)",
100+
naming::quote_ident(&table_name)
100101
);
101-
102-
conn.execute(&create_sql, duckdb::params![]).map_err(|e| {
102+
conn.execute(&create_sql, params).map_err(|e| {
103103
GgsqlError::ReaderError(format!(
104104
"Failed to register builtin dataset '{}': {}",
105105
name, e

src/reader/duckdb.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ fn normalize_arrow_types(batch: RecordBatch) -> Result<RecordBatch> {
205205
impl Reader for DuckDBReader {
206206
fn execute_sql(&self, sql: &str) -> Result<DataFrame> {
207207
// Register builtin datasets if referenced
208-
#[cfg(feature = "builtin-data")]
208+
#[cfg(all(feature = "builtin-data", feature = "parquet"))]
209209
super::data::register_builtin_datasets_duckdb(sql, &self.conn)?;
210210

211211
// Rewrite ggsql:name → __ggsql_data_name__ in SQL

0 commit comments

Comments
 (0)