Skip to content

Commit 3a7b9a0

Browse files
authored
Remove duckdbfs (#8468)
DuckdbFS implementation for Vortex was introduced in #6198 as opt-out, but changed to opt-in in #6564 due to performance regressions. There were multiple issues (#6709, #6565 #6685) associated with it which differ from vortex's file system behaviour. It also requires additional dependencies CI which are a blocker for #8486 since MacOS runner doesn't bundle openssl for x86_64 on arm, and builds fail. As a long term goal, calling duckdb's blocking IO inside our event loop isn't the right abstraction. We want to allow duckdb to use its own IO outside vortex. Duckdb fs is also not maintaned actively so we're removing it Signed-off-by: Mikhail Kot <mikhail@spiraldb.com>
1 parent 7ff0d87 commit 3a7b9a0

14 files changed

Lines changed: 49 additions & 691 deletions

File tree

vortex-duckdb/README.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,4 @@ loaded.
7777
## Testing a custom DuckDB tag
7878

7979
Change `DUCKDB_VERSION` environment variable value to a preferred hash or commit
80-
(local build), or change build.rs (for testing in CI). If you use a commit,
81-
DuckDB needs to link httpfs statically so you also need to install CURL
82-
development headers (e.g. `libcurl4-openssl-dev`).
80+
(local build), or change build.rs (for testing in CI).

vortex-duckdb/build.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -358,13 +358,12 @@ fn build_duckdb(version: &DuckDBVersion, duckdb_repo_dir: &Path) {
358358
("1", "0")
359359
};
360360

361-
// If we're building from a commit we need to build httpfs and benchmark
361+
// If we're building from a commit we need to build benchmark
362362
// extensions statically, otherwise DuckDB tries to load them from an http
363363
// endpoint with version 0.0.1 (all non-tagged builds) which doesn't exist.
364-
// httpfs static build also requires CURL dev headers
365364
let static_extensions = match version {
366365
DuckDBVersion::Release(_) => "parquet;jemalloc",
367-
DuckDBVersion::Commit(_) => "parquet;jemalloc;httpfs;tpch;tpcds",
366+
DuckDBVersion::Commit(_) => "parquet;jemalloc;tpch;tpcds",
368367
};
369368

370369
let envs = [

vortex-duckdb/cpp/copy_function.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,12 @@ unique_ptr<FunctionData> copy_to_bind(ClientContext &,
5858
}
5959

6060
unique_ptr<GlobalFunctionData>
61-
copy_to_initialize_global(ClientContext &context, FunctionData &bind_data, const string &file_path) {
61+
copy_to_initialize_global(ClientContext &, FunctionData &bind_data, const string &file_path) {
6262
void *const ffi_bind = bind_data.Cast<CopyBindData>().ffi_data->DataPtr();
63-
const auto ffi_ctx = reinterpret_cast<duckdb_client_context>(&context);
6463

6564
duckdb_vx_error error_out = nullptr;
6665
const duckdb_vx_data ffi_global =
67-
duckdb_copy_function_copy_to_initialize_global(ffi_ctx, ffi_bind, file_path.c_str(), &error_out);
66+
duckdb_copy_function_copy_to_initialize_global(ffi_bind, file_path.c_str(), &error_out);
6867
if (error_out) {
6968
throw ExecutorException(IntoErrString(error_out));
7069
}

vortex-duckdb/cpp/table_function.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -197,16 +197,14 @@ bool projection_expression_pushdown(ClientContext &, const TableFunctionProjecti
197197
* and after a query another file is added matching the glob, for second query
198198
* bind() will be called again.
199199
*/
200-
unique_ptr<FunctionData> duckdb_vx_table_function_bind(ClientContext &context,
200+
unique_ptr<FunctionData> duckdb_vx_table_function_bind(ClientContext &,
201201
TableFunctionBindInput &input,
202202
vector<LogicalType> &return_types,
203203
vector<string> &names) {
204204
CTableBindResult result = {return_types, names};
205205

206206
duckdb_vx_error error_out = nullptr;
207-
auto ctx = reinterpret_cast<duckdb_client_context>(&context);
208-
auto ffi_bind_data = duckdb_table_function_bind(ctx,
209-
reinterpret_cast<duckdb_vx_tfunc_bind_input>(&input),
207+
auto ffi_bind_data = duckdb_table_function_bind(reinterpret_cast<duckdb_vx_tfunc_bind_input>(&input),
210208
reinterpret_cast<duckdb_vx_tfunc_bind_result>(&result),
211209
&error_out);
212210
if (error_out) {

vortex-duckdb/include/vortex.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,7 @@ duckdb_vx_data duckdb_table_function_init_global(const duckdb_vx_tfunc_init_inpu
5959
extern duckdb_vx_data duckdb_table_function_init_local(void *global_init_data);
6060

6161
extern
62-
duckdb_vx_data duckdb_table_function_bind(duckdb_client_context ctx,
63-
duckdb_vx_tfunc_bind_input bind_input,
62+
duckdb_vx_data duckdb_table_function_bind(duckdb_vx_tfunc_bind_input bind_input,
6463
duckdb_vx_tfunc_bind_result bind_result,
6564
duckdb_vx_error *error_out);
6665

@@ -74,8 +73,7 @@ duckdb_vx_data duckdb_copy_function_copy_to_bind(const char *const *column_names
7473
duckdb_vx_error *error_out);
7574

7675
extern
77-
duckdb_vx_data duckdb_copy_function_copy_to_initialize_global(duckdb_client_context client_context,
78-
const void *bind_data,
76+
duckdb_vx_data duckdb_copy_function_copy_to_initialize_global(const void *bind_data,
7977
const char *file_path,
8078
duckdb_vx_error *error_out);
8179

vortex-duckdb/src/copy.rs

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
use async_fs::OpenOptions;
45
use futures::SinkExt;
56
use futures::TryStreamExt;
67
use futures::channel::mpsc;
@@ -28,9 +29,7 @@ use crate::RUNTIME;
2829
use crate::SESSION;
2930
use crate::convert::FromLogicalType;
3031
use crate::convert::data_chunk_to_vortex;
31-
use crate::duckdb::ClientContextRef;
3232
use crate::duckdb::DataChunkRef;
33-
use crate::duckdb::DuckDbFsWriter;
3433
use crate::duckdb::LogicalTypeRef;
3534

3635
#[derive(Clone)]
@@ -111,7 +110,6 @@ pub fn copy_to_finalize(init_global: &mut CopyFunctionGlobal) -> VortexResult<()
111110
}
112111

113112
pub fn copy_to_initialize_global(
114-
client_context: &ClientContextRef,
115113
bind_data: &CopyFunctionBind,
116114
file_path: String,
117115
) -> VortexResult<CopyFunctionGlobal> {
@@ -120,16 +118,16 @@ pub fn copy_to_initialize_global(
120118
let array_stream = ArrayStreamAdapter::new(bind_data.dtype.clone(), rx.into_stream());
121119

122120
let handle = SESSION.handle();
123-
// SAFETY: The ClientContext is owned by the Connection and lives for the duration of
124-
// query execution. DuckDB keeps the connection alive while this copy function runs.
125-
let ctx = unsafe { client_context.erase_lifetime() };
126121

127-
// Use DuckDB FS exclusively to match the DuckDB client context configuration.
128-
let writer = DuckDbFsWriter::new(ctx, &file_path)
129-
.map_err(|e| vortex_err!("Failed to create DuckDB FS writer for {file_path}: {e}"))?;
130-
131-
let write_task =
132-
handle.spawn(async move { SESSION.write_options().write(writer, array_stream).await });
122+
let write_task = handle.spawn(async move {
123+
let writer = OpenOptions::new()
124+
.write(true)
125+
.truncate(true)
126+
.create(true)
127+
.open(file_path)
128+
.await?;
129+
SESSION.write_options().write(writer, array_stream).await
130+
});
133131

134132
let worker_pool = RUNTIME.new_pool();
135133
worker_pool.set_workers_to_available_parallelism();

vortex-duckdb/src/duckdb/file_system.rs

Lines changed: 0 additions & 198 deletions
This file was deleted.

vortex-duckdb/src/duckdb/mod.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ mod data_chunk;
1010
mod database;
1111
mod ddb_string;
1212
mod expr;
13-
mod file_system;
1413
mod logical_type;
1514
mod macro_;
1615
mod query_result;
@@ -36,7 +35,6 @@ pub use data_chunk::*;
3635
pub use database::*;
3736
pub use ddb_string::*;
3837
pub use expr::*;
39-
pub use file_system::*;
4038
pub use logical_type::*;
4139
pub use query_result::*;
4240
pub use reusable_dict::*;

0 commit comments

Comments
 (0)