Skip to content

Commit 5d241a4

Browse files
authored
Add Rust ADBC execution via dbc (#176)
* add rust adbc dbc execution * Fix Rust ADBC CI driver loading * Fix ADBC execution with trailing comments
1 parent e31216c commit 5d241a4

12 files changed

Lines changed: 1111 additions & 23 deletions

File tree

.github/workflows/integration.yml

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ jobs:
164164
strategy:
165165
fail-fast: false
166166
matrix:
167-
db: [postgres, bigquery, snowflake, clickhouse]
167+
db: [sqlite, duckdb, postgres, bigquery, snowflake, clickhouse]
168168

169169
services:
170170
postgres:
@@ -217,20 +217,26 @@ jobs:
217217
- name: Install dependencies
218218
run: uv sync --extra dev --extra adbc
219219

220-
- name: Install ADBC driver (best effort)
220+
- name: Install ADBC driver through dbc
221221
run: |
222+
DRIVER_DIR="$HOME/.config/adbc/drivers"
223+
echo "ADBC_DRIVER_PATH=$DRIVER_DIR" >> "$GITHUB_ENV"
224+
222225
DB="${{ matrix.db }}"
226+
DRIVER="$DB"
227+
if [ "$DB" = "postgres" ]; then
228+
DRIVER="postgresql"
229+
fi
223230
if [ "$DB" = "clickhouse" ]; then
224-
uvx dbc install --pre clickhouse
231+
uv run --with dbc dbc install --level user --pre clickhouse
232+
uv run --with dbc dbc list --level user
225233
exit 0
226234
fi
227-
PKG_DB="$DB"
228-
if [ "$DB" = "postgres" ]; then
229-
PKG_DB="postgresql"
230-
fi
231-
uv pip install "adbc_driver_${PKG_DB}" || uv pip install "adbc-driver-${PKG_DB}" || true
235+
uv run --with dbc dbc install --level user "$DRIVER"
236+
uv run --with dbc dbc list --level user
232237
233-
- name: Run ADBC smoke tests
238+
- name: Run Python ADBC smoke tests
239+
if: matrix.db != 'sqlite' && matrix.db != 'duckdb'
234240
env:
235241
ADBC_TEST: "1"
236242
ADBC_DB: ${{ matrix.db }}
@@ -246,15 +252,23 @@ jobs:
246252
- name: Install Rust for Rust ADBC probe
247253
uses: dtolnay/rust-toolchain@stable
248254

255+
- name: Export SQLite ADBC driver for Rust
256+
if: matrix.db == 'sqlite'
257+
run: |
258+
echo "SIDEMANTIC_TEST_ADBC_SQLITE_DRIVER=sqlite" >> "$GITHUB_ENV"
259+
echo "SIDEMANTIC_TEST_ADBC_SQLITE_URI=:memory:" >> "$GITHUB_ENV"
260+
echo "SIDEMANTIC_TEST_ADBC_REQUIRE=sqlite" >> "$GITHUB_ENV"
261+
262+
- name: Export DuckDB ADBC driver for Rust
263+
if: matrix.db == 'duckdb'
264+
run: |
265+
echo "SIDEMANTIC_TEST_ADBC_DUCKDB_DRIVER=duckdb" >> "$GITHUB_ENV"
266+
echo "SIDEMANTIC_TEST_ADBC_REQUIRE=duckdb" >> "$GITHUB_ENV"
267+
249268
- name: Export Postgres ADBC driver for Rust
250269
if: matrix.db == 'postgres'
251270
run: |
252-
uv pip install adbc-driver-postgresql
253-
uv run python - <<'PY' >> "$GITHUB_ENV"
254-
import adbc_driver_postgresql
255-
256-
print(f"SIDEMANTIC_TEST_ADBC_POSTGRES_DRIVER={adbc_driver_postgresql._driver_path()}")
257-
PY
271+
echo "SIDEMANTIC_TEST_ADBC_POSTGRES_DRIVER=postgresql" >> "$GITHUB_ENV"
258272
echo "SIDEMANTIC_TEST_ADBC_POSTGRES_URI=postgresql://test:test@localhost:5432/sidemantic_test" >> "$GITHUB_ENV"
259273
echo "SIDEMANTIC_TEST_ADBC_REQUIRE=postgres" >> "$GITHUB_ENV"
260274
@@ -269,7 +283,6 @@ jobs:
269283
echo "Skipping Rust BigQuery ADBC probe; BIGQUERY_ADBC_CREDENTIALS_JSON secret plus BIGQUERY_ADBC_PROJECT/BIGQUERY_ADBC_DATASET vars are required."
270284
exit 0
271285
fi
272-
uvx dbc install bigquery
273286
CREDENTIALS_FILE="$(mktemp)"
274287
printf '%s' "$BIGQUERY_ADBC_CREDENTIALS_JSON" > "$CREDENTIALS_FILE"
275288
echo "::add-mask::$BIGQUERY_ADBC_CREDENTIALS_JSON"
@@ -286,7 +299,6 @@ jobs:
286299
echo "Skipping Rust Snowflake ADBC probe; SNOWFLAKE_ADBC_URI secret is required."
287300
exit 0
288301
fi
289-
uvx dbc install snowflake
290302
echo "::add-mask::$SNOWFLAKE_ADBC_URI"
291303
echo "SIDEMANTIC_TEST_ADBC_SNOWFLAKE_DRIVER=snowflake" >> "$GITHUB_ENV"
292304
echo "SIDEMANTIC_TEST_ADBC_SNOWFLAKE_URI=$SNOWFLAKE_ADBC_URI" >> "$GITHUB_ENV"
@@ -299,5 +311,19 @@ jobs:
299311
echo "SIDEMANTIC_TEST_ADBC_CLICKHOUSE_URI=http://localhost:8123/" >> "$GITHUB_ENV"
300312
echo "SIDEMANTIC_TEST_ADBC_REQUIRE=clickhouse" >> "$GITHUB_ENV"
301313
314+
- name: Run Rust ADBC semantic smoke
315+
env:
316+
SIDEMANTIC_ADBC_TESTS: "1"
317+
SIDEMANTIC_ADBC_DB: ${{ matrix.db }}
318+
SIDEMANTIC_ADBC_REQUIRED: ${{ contains(fromJSON('["sqlite","duckdb","postgres","clickhouse"]'), matrix.db) }}
319+
POSTGRES_URL: "postgres://test:test@localhost:5432/sidemantic_test"
320+
BIGQUERY_EMULATOR_HOST: "localhost:9050"
321+
BIGQUERY_PROJECT: "test-project"
322+
BIGQUERY_DATASET: "test_dataset"
323+
CLICKHOUSE_URL: "adbc://clickhouse?uri=http://localhost:8123/"
324+
SNOWFLAKE_TEST: "1"
325+
RUST_MIN_STACK: 16777216
326+
run: cargo test --manifest-path sidemantic-rs/Cargo.toml --features adbc-exec --test adbc_engines -- --nocapture
327+
302328
- name: Run Rust ADBC probe
303329
run: cargo test --manifest-path sidemantic-rs/Cargo.toml --features adbc-exec --test adbc_driver_matrix

sidemantic-rs/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sidemantic-rs/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ crate-type = ["rlib", "staticlib", "cdylib"]
1414
default = []
1515
python = ["dep:pyo3"]
1616
python-adbc = ["python", "adbc-exec"]
17-
adbc-exec = ["dep:adbc_driver_manager", "dep:adbc_core", "dep:arrow-array", "dep:arrow-schema", "dep:arrow-ipc"]
17+
adbc-exec = ["dep:adbc_driver_manager", "dep:adbc_core", "dep:arrow-array", "dep:arrow-schema", "dep:arrow-ipc", "dep:url"]
1818
wasm = ["dep:wasm-bindgen"]
1919
mcp-server = ["dep:rmcp", "dep:tokio"]
2020
mcp-adbc = ["mcp-server", "adbc-exec"]
@@ -40,6 +40,7 @@ adbc_core = { version = "0.22.0", optional = true }
4040
arrow-array = { version = ">=53.1.0, <58", default-features = false, optional = true }
4141
arrow-schema = { version = ">=53.1.0, <58", default-features = false, optional = true }
4242
arrow-ipc = { version = ">=53.1.0, <58", default-features = false, optional = true }
43+
url = { version = "2.5", optional = true }
4344
chrono = { version = "0.4", default-features = false, features = ["clock"] }
4445
minijinja = { version = "2.5", features = ["builtins", "serde"] }
4546
wasm-bindgen = { version = "0.2", optional = true }

sidemantic-rs/src/db/adbc.rs

Lines changed: 178 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use adbc_core::{
22
options::{AdbcVersion, OptionConnection, OptionDatabase, OptionValue},
33
Connection, Database, Driver, Statement, LOAD_FLAG_DEFAULT,
44
};
5-
use adbc_driver_manager::ManagedDriver;
5+
use adbc_driver_manager::{ManagedConnection, ManagedDatabase, ManagedDriver};
66
use arrow_array::{
77
Array, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal128Array, Float16Array,
88
Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray,
@@ -12,10 +12,16 @@ use arrow_array::{
1212
UInt32Array, UInt64Array, UInt8Array,
1313
};
1414
use arrow_ipc::writer::StreamWriter;
15-
use arrow_schema::{DataType, TimeUnit};
15+
use arrow_schema::{DataType, Schema, TimeUnit};
1616
use std::io::Write;
17+
use std::path::PathBuf;
1718

19+
use crate::core::SemanticGraph;
1820
use crate::error::{Result, SidemanticError};
21+
use crate::sql::{QueryRewriter, SemanticQuery, SqlGenerator};
22+
23+
use super::result::ExecutionResult;
24+
use super::url::ConnectionSpec;
1925

2026
#[derive(Debug, Clone, PartialEq)]
2127
pub enum AdbcValue {
@@ -50,6 +56,153 @@ pub struct AdbcExecutionRequest {
5056
pub connection_options: Vec<(OptionConnection, OptionValue)>,
5157
}
5258

59+
/// Pure Rust ADBC executor for semantic queries.
60+
///
61+
/// Drivers are loaded by the ADBC driver manager. Drivers installed with
62+
/// `dbc install <driver>` are found through the manager's normal manifest
63+
/// search paths.
64+
pub struct AdbcExecutor {
65+
pub spec: ConnectionSpec,
66+
database: ManagedDatabase,
67+
connection: ManagedConnection,
68+
}
69+
70+
impl AdbcExecutor {
71+
pub fn connect(spec: ConnectionSpec) -> Result<Self> {
72+
let entrypoint = spec.entrypoint.clone();
73+
let mut driver = load_managed_driver(
74+
&spec.driver,
75+
entrypoint.as_deref(),
76+
spec.adbc_version,
77+
spec.load_flags,
78+
spec.additional_search_paths.clone(),
79+
)
80+
.map_err(|err| {
81+
SidemanticError::Database(format!(
82+
"failed to load ADBC driver '{}' through the dbc/ADBC registry. \
83+
Install the driver with `dbc install {}` and make sure the ADBC driver \
84+
manager search path can see it. Underlying error: {err}",
85+
spec.driver, spec.driver
86+
))
87+
})?;
88+
89+
let options = connection_spec_database_options(&spec);
90+
let database = if options.is_empty() {
91+
driver.new_database()
92+
} else {
93+
driver.new_database_with_opts(options)
94+
}?;
95+
let connection = database.new_connection()?;
96+
97+
Ok(Self {
98+
spec,
99+
database,
100+
connection,
101+
})
102+
}
103+
104+
pub fn connect_url(url: &str) -> Result<Self> {
105+
Self::connect(ConnectionSpec::from_url(url)?)
106+
}
107+
108+
/// Execute SQL and return an Arrow record batch reader.
109+
pub fn execute_sql(&mut self, sql: &str) -> Result<ExecutionResult> {
110+
let mut statement = self.connection.new_statement()?;
111+
statement.set_sql_query(sql)?;
112+
let mut reader = statement.execute()?;
113+
let schema = reader.schema();
114+
let mut batches = Vec::new();
115+
for batch in &mut reader {
116+
batches.push(batch?);
117+
}
118+
Ok(ExecutionResult::new(sql.to_string(), schema, batches))
119+
}
120+
121+
/// Execute a SQL statement that does not return a result set.
122+
pub fn execute_update(&mut self, sql: &str) -> Result<Option<i64>> {
123+
let mut statement = self.connection.new_statement()?;
124+
statement.set_sql_query(sql)?;
125+
Ok(statement.execute_update()?)
126+
}
127+
128+
/// Generate SQL from a semantic query and execute it through ADBC.
129+
pub fn execute_semantic_query(
130+
&mut self,
131+
graph: &SemanticGraph,
132+
query: &SemanticQuery,
133+
) -> Result<ExecutionResult> {
134+
let sql = SqlGenerator::new(graph).generate(query)?;
135+
self.execute_sql(&sql)
136+
}
137+
138+
/// Rewrite SQL through the semantic graph, then execute the rewritten SQL.
139+
pub fn rewrite_and_execute(
140+
&mut self,
141+
graph: &SemanticGraph,
142+
sql: &str,
143+
) -> Result<ExecutionResult> {
144+
let rewritten = QueryRewriter::new(graph).rewrite(sql)?;
145+
self.execute_sql(&rewritten)
146+
}
147+
148+
/// Return an ADBC metadata stream for catalogs, schemas, tables, and columns.
149+
pub fn get_objects(
150+
&self,
151+
depth: adbc_core::options::ObjectDepth,
152+
) -> Result<Box<dyn RecordBatchReader + Send + '_>> {
153+
Ok(Box::new(
154+
self.connection
155+
.get_objects(depth, None, None, None, None, None)?,
156+
))
157+
}
158+
159+
/// Get the Arrow schema for a table.
160+
pub fn get_table_schema(
161+
&self,
162+
catalog: Option<&str>,
163+
db_schema: Option<&str>,
164+
table_name: &str,
165+
) -> Result<Schema> {
166+
Ok(self
167+
.connection
168+
.get_table_schema(catalog, db_schema, table_name)?)
169+
}
170+
171+
/// Keep a reference to the ADBC database handle for advanced callers.
172+
pub fn database(&self) -> &ManagedDatabase {
173+
&self.database
174+
}
175+
}
176+
177+
fn load_managed_driver(
178+
driver: &str,
179+
entrypoint: Option<&str>,
180+
adbc_version: AdbcVersion,
181+
load_flags: adbc_core::LoadFlags,
182+
additional_search_paths: Option<Vec<PathBuf>>,
183+
) -> std::result::Result<ManagedDriver, adbc_core::error::Error> {
184+
let entrypoint_bytes = entrypoint.map(str::as_bytes);
185+
ManagedDriver::load_from_name(
186+
driver,
187+
entrypoint_bytes,
188+
adbc_version,
189+
load_flags,
190+
additional_search_paths.clone(),
191+
)
192+
.or_else(|err| {
193+
if matches!(adbc_version, AdbcVersion::V100) {
194+
return Err(err);
195+
}
196+
ManagedDriver::load_from_name(
197+
driver,
198+
entrypoint_bytes,
199+
AdbcVersion::V100,
200+
load_flags,
201+
additional_search_paths,
202+
)
203+
})
204+
}
205+
53206
fn adbc_error(context: &str, err: impl std::fmt::Display) -> SidemanticError {
54207
SidemanticError::InvalidConfig(format!("{context}: {err}"))
55208
}
@@ -89,6 +242,29 @@ fn database_options_with_uri(
89242
database_options
90243
}
91244

245+
fn connection_spec_database_options(spec: &ConnectionSpec) -> Vec<(OptionDatabase, OptionValue)> {
246+
let options = spec
247+
.database_options
248+
.iter()
249+
.map(|(key, value)| (database_option_key(key), OptionValue::String(value.clone())))
250+
.collect();
251+
database_options_with_uri(
252+
&spec.driver,
253+
spec.entrypoint.as_deref(),
254+
spec.uri.clone(),
255+
options,
256+
)
257+
}
258+
259+
fn database_option_key(key: &str) -> OptionDatabase {
260+
match key {
261+
"uri" | "adbc.uri" => OptionDatabase::Uri,
262+
"username" | "user" | "adbc.username" => OptionDatabase::Username,
263+
"password" | "adbc.password" => OptionDatabase::Password,
264+
other => OptionDatabase::Other(other.to_string()),
265+
}
266+
}
267+
92268
pub fn execute_with_adbc(request: AdbcExecutionRequest) -> Result<AdbcExecutionResult> {
93269
let AdbcExecutionRequest {
94270
driver,

sidemantic-rs/src/db/mod.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,16 @@
11
#[cfg(feature = "adbc-exec")]
22
mod adbc;
3+
#[cfg(feature = "adbc-exec")]
4+
mod result;
5+
#[cfg(feature = "adbc-exec")]
6+
mod url;
37

48
#[cfg(feature = "adbc-exec")]
59
pub use adbc::{
610
execute_with_adbc, execute_with_adbc_arrow_ipc, write_adbc_arrow_ipc, AdbcArrowIpcResult,
7-
AdbcExecutionRequest, AdbcExecutionResult, AdbcValue,
11+
AdbcExecutionRequest, AdbcExecutionResult, AdbcExecutor, AdbcValue,
812
};
13+
#[cfg(feature = "adbc-exec")]
14+
pub use result::ExecutionResult;
15+
#[cfg(feature = "adbc-exec")]
16+
pub use url::{ConnectionSpec, DBC_DRIVER_HINTS};

sidemantic-rs/src/db/result.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
//! Result wrappers for ADBC execution.
2+
3+
use arrow_array::RecordBatch;
4+
use arrow_schema::SchemaRef;
5+
6+
use crate::error::Result;
7+
8+
/// Arrow-backed result of a database query.
9+
pub struct ExecutionResult {
10+
pub sql: String,
11+
schema: SchemaRef,
12+
batches: Vec<RecordBatch>,
13+
}
14+
15+
impl ExecutionResult {
16+
pub(crate) fn new(sql: String, schema: SchemaRef, batches: Vec<RecordBatch>) -> Self {
17+
Self {
18+
sql,
19+
schema,
20+
batches,
21+
}
22+
}
23+
24+
/// Return the Arrow schema for this result.
25+
pub fn schema(&self) -> SchemaRef {
26+
self.schema.clone()
27+
}
28+
29+
/// Collect all record batches from the result stream.
30+
pub fn collect(self) -> Result<Vec<RecordBatch>> {
31+
Ok(self.batches)
32+
}
33+
}

0 commit comments

Comments
 (0)