Skip to content

Commit 216cc12

Browse files
authored
Duckdb 1.5-variegata (#6627)
1.5 API changes: - Add support for reusable dictionaries. Remove duckdb_vx_set_dictionary_vector_id in favor of reusable dictionaries. - Add estimated_size to duckdb_vx_object_cache_put - Replace set_variables with set_variable_defaults in database options. - Replace DUCKDB_STANDARD_VECTOR_SIZE with C++ API constant. - Add error on Bloom filter pushdown request, otherwise we get a duckdb assertion and a leak #6627 (comment) when we try to get an ExpressionFilter in duckdb_vx_table_filter_get_expression but it's a Bloom filter - Build httpfs statically if we're on a commit #5767 Continuation of #5901 --------- Signed-off-by: Mikhail Kot <mikhail@spiraldb.com>
1 parent 95ce922 commit 216cc12

File tree

23 files changed

+312
-111
lines changed

23 files changed

+312
-111
lines changed

.github/actions/setup-rust/action.yml

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@ description: "Toolchain setup and Initial compilation"
33

44
inputs:
55
repo-token:
6-
description: "Deprecated: no longer used. Protoc is now downloaded directly from GitHub releases CDN."
6+
description: "GitHub token for accessing the repository (typically secrets.GITHUB_TOKEN)"
77
required: false
8-
default: ""
8+
default: "${{ github.token }}"
99
toolchain:
1010
description: "optional override for the toolchain version (e.g. nightly)"
1111
required: false
@@ -50,9 +50,23 @@ runs:
5050
- name: Rust Compile Cache
5151
if: inputs.enable-sccache == 'true'
5252
uses: mozilla-actions/sccache-action@v0.0.9
53-
with:
54-
version: "v0.14.0"
5553

5654
- name: Install Protoc (for lance-encoding build step)
5755
if: runner.os != 'Windows'
58-
uses: ./.github/actions/setup-protoc
56+
uses: arduino/setup-protoc@v3
57+
with:
58+
version: "29.3"
59+
repo-token: ${{ inputs.repo-token }}
60+
61+
- name: Install Ninja (for DuckDB build system)
62+
uses: seanmiddleditch/gha-setup-ninja@master
63+
64+
- name: Install Sweep
65+
shell: bash
66+
if: ${{ inputs.timestamp == 'true' && github.ref_name == 'develop' }}
67+
run: cargo install cargo-sweep
68+
69+
- name: Timestamp Cache
70+
shell: bash
71+
if: ${{ inputs.timestamp == 'true' && github.ref_name == 'develop' }}
72+
run: cargo sweep --stamp

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ __pycache__/
1212
# Distribution / packaging
1313
.Python
1414
build/
15+
ninja-build/
1516
develop-eggs/
1617
dist/
1718
downloads/

vortex-duckdb/build.rs

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ static DUCKDB_VERSION: Lazy<DuckDBVersion> = Lazy::new(|| {
2929
parse_version(&version)
3030
} else {
3131
// The default DuckDB version to use when DUCKDB_VERSION env var is not set.
32-
DuckDBVersion::Release("1.4.2".to_owned())
32+
DuckDBVersion::Release("1.5.0".to_owned())
3333
}
3434
});
3535

@@ -275,7 +275,15 @@ fn extract_duckdb_source(source_dir: &Path) -> Result<PathBuf, Box<dyn std::erro
275275
}
276276

277277
/// Build DuckDB from source. Used for commit hashes or when VX_DUCKDB_DEBUG is set.
278-
fn build_duckdb(duckdb_source_dir: &Path) -> Result<PathBuf, Box<dyn std::error::Error>> {
278+
fn build_duckdb(
279+
duckdb_source_dir: &Path,
280+
version: &DuckDBVersion,
281+
debug: bool,
282+
) -> Result<PathBuf, Box<dyn std::error::Error>> {
283+
let build_type = match debug {
284+
true => "debug",
285+
false => "release",
286+
};
279287
// Check for ninja
280288
if Command::new("ninja").arg("--version").output().is_err() {
281289
return Err(
@@ -285,10 +293,12 @@ fn build_duckdb(duckdb_source_dir: &Path) -> Result<PathBuf, Box<dyn std::error:
285293

286294
let inner_dir_name = DUCKDB_VERSION.archive_inner_dir_name();
287295
let duckdb_repo_dir = duckdb_source_dir.join(&inner_dir_name);
288-
let build_dir = duckdb_repo_dir.join("build").join("debug");
296+
let build_dir = duckdb_repo_dir.join("build").join(build_type);
289297

290-
// Check if already built
291298
let lib_dir = build_dir.join("src");
299+
let lib_dir_str = lib_dir.display();
300+
println!("cargo:info=Checking if DuckDB is already built in {lib_dir_str}",);
301+
292302
let already_built = lib_dir.join("libduckdb.dylib").exists()
293303
|| lib_dir.join("libduckdb.so").exists()
294304
|| lib_dir
@@ -309,12 +319,26 @@ fn build_duckdb(duckdb_source_dir: &Path) -> Result<PathBuf, Box<dyn std::error:
309319
("1", "0")
310320
};
311321

322+
let mut envs = vec![
323+
("GEN", "ninja"),
324+
("DISABLE_SANITIZER", asan_option),
325+
("THREADSAN", tsan_option),
326+
("BUILD_SHELL", "false"),
327+
("BUILD_UNITTESTS", "false"),
328+
("ENABLE_UNITTEST_CPP_TESTS", "false"),
329+
];
330+
331+
// If we're building from a commit (likely a pre-release), we need to
332+
// build extensions statically. Otherwise DuckDB tries to load them
333+
// from an http endpoint with version 0.0.1 (all non-tagged builds)
334+
// which doesn't exists. httpfs also requires CURL dev headers
335+
if matches!(version, DuckDBVersion::Commit(_)) {
336+
envs.push(("BUILD_EXTENSIONS", "httpfs;parquet;tpch;tpcds;jemalloc"));
337+
};
338+
312339
let output = Command::new("make")
313340
.current_dir(&duckdb_repo_dir)
314-
.env("GEN", "ninja")
315-
.env("DISABLE_SANITIZER", asan_option)
316-
.env("THREADSAN", tsan_option)
317-
.arg("debug")
341+
.envs(envs)
318342
.output()?;
319343

320344
if !output.status.success() {
@@ -398,15 +422,21 @@ fn main() {
398422
drop(fs::remove_dir_all(&duckdb_symlink));
399423
std::os::unix::fs::symlink(&extracted_source_path, &duckdb_symlink).unwrap();
400424

401-
// Determine whether to build from source or use prebuilt libraries
402425
let use_debug_build =
403426
env::var("VX_DUCKDB_DEBUG").is_ok_and(|v| matches!(v.as_str(), "1" | "true"));
427+
println!("cargo:info=DuckDB debug build: {use_debug_build}");
404428

405429
let library_path = if use_debug_build || !DUCKDB_VERSION.is_release() {
406430
// Build from source for:
407431
// - Commit hashes (no prebuilt available)
408432
// - When VX_DUCKDB_DEBUG=1 (user wants debug build)
409-
build_duckdb(&extracted_source_path).unwrap()
433+
match build_duckdb(&extracted_source_path, &DUCKDB_VERSION, use_debug_build) {
434+
Ok(path) => path,
435+
Err(err) => {
436+
println!("cargo:error={err}");
437+
panic!("duckdb build failed");
438+
}
439+
}
410440
} else {
411441
// Download prebuilt libraries for release versions
412442
let archive_path = download_duckdb_lib_archive().unwrap();
@@ -494,6 +524,7 @@ fn main() {
494524
.file("cpp/file_system.cpp")
495525
.file("cpp/logical_type.cpp")
496526
.file("cpp/object_cache.cpp")
527+
.file("cpp/reusable_dict.cpp")
497528
.file("cpp/replacement_scan.cpp")
498529
.file("cpp/scalar_function.cpp")
499530
.file("cpp/table_filter.cpp")

vortex-duckdb/cpp/config.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ duckdb_state duckdb_vx_get_config_value(duckdb_config config, const char *key, d
4040

4141
std::string key_str(key);
4242

43-
// First check set_variables (the primary location for config values)
44-
auto set_it = db_config->options.set_variables.find(key_str);
45-
if (set_it != db_config->options.set_variables.end()) {
43+
// First check set_variable_defaults (the primary location for config values)
44+
auto set_it = db_config->options.set_variable_defaults.find(key_str);
45+
if (set_it != db_config->options.set_variable_defaults.end()) {
4646
*out_value = reinterpret_cast<duckdb_value>(new Value(set_it->second));
4747
return DuckDBSuccess;
4848
}
@@ -75,8 +75,9 @@ int duckdb_vx_config_has_key(duckdb_config config, const char *key) {
7575

7676
std::string key_str(key);
7777

78-
// Check if the key exists in set_variables (primary location)
79-
if (db_config->options.set_variables.find(key_str) != db_config->options.set_variables.end()) {
78+
// Check if the key exists in set_variable_defaults (primary location)
79+
if (db_config->options.set_variable_defaults.find(key_str) !=
80+
db_config->options.set_variable_defaults.end()) {
8081
return 1;
8182
}
8283

vortex-duckdb/cpp/include/duckdb_vx.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "duckdb_vx/file_system.h"
1414
#include "duckdb_vx/logical_type.h"
1515
#include "duckdb_vx/object_cache.h"
16+
#include "duckdb_vx/reusable_dict.h"
1617
#include "duckdb_vx/replacement_scan.h"
1718
#include "duckdb_vx/scalar_function.h"
1819
#include "duckdb_vx/table_filter.h"

vortex-duckdb/cpp/include/duckdb_vx/object_cache.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ typedef void (*duckdb_vx_deleter_fn)(void *ptr);
2020
void duckdb_vx_object_cache_put(duckdb_vx_object_cache object_cache,
2121
const char *key,
2222
void *value,
23+
uint64_t estimated_size,
2324
duckdb_vx_deleter_fn deleter);
2425

2526
// Fetches the key from the object cache, returning nullptr if the key is not present.
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
#pragma once
5+
6+
#include "duckdb.h"
7+
#include "duckdb_vx/error.h"
8+
9+
#ifdef __cplusplus /* If compiled as C++, use C ABI */
10+
extern "C" {
11+
#endif
12+
13+
typedef struct duckdb_vx_reusable_dict_ *duckdb_vx_reusable_dict;
14+
15+
/// Creates a new reusable dictionary from a logical type and size.
16+
/// The returned dictionary can be used with duckdb_vx_vector_dictionary_reusable.
17+
duckdb_vx_reusable_dict duckdb_vx_reusable_dict_create(duckdb_logical_type logical_type, idx_t size);
18+
19+
/// Destroys the reusable dictionary.
20+
void duckdb_vx_reusable_dict_destroy(duckdb_vx_reusable_dict *dict);
21+
22+
/// Clones the reusable dictionary.
23+
duckdb_vx_reusable_dict duckdb_vx_reusable_dict_clone(duckdb_vx_reusable_dict dict);
24+
25+
/// Get the internal vector of the reusable dictionary.
26+
void duckdb_vx_reusable_dict_set_vector(duckdb_vx_reusable_dict reusable, duckdb_vector *out_vector);
27+
28+
/// Creates a dictionary vector using a reusable dictionary and a selection vector.
29+
void duckdb_vx_vector_dictionary_reusable(duckdb_vector vector,
30+
duckdb_vx_reusable_dict reusable,
31+
duckdb_selection_vector sel_vec);
32+
33+
#ifdef __cplusplus /* End C ABI */
34+
}
35+
#endif

vortex-duckdb/cpp/include/duckdb_vx/table_filter.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ typedef enum DUCKDB_VX_TABLE_FILTER_TYPE {
2020
DUCKDB_VX_TABLE_FILTER_TYPE_OPTIONAL_FILTER = 6, // executing filter is not required for query correctness
2121
DUCKDB_VX_TABLE_FILTER_TYPE_IN_FILTER = 7, // col IN (C1, C2, C3, ...)
2222
DUCKDB_VX_TABLE_FILTER_TYPE_DYNAMIC_FILTER = 8, // dynamic filters can be updated at run-time
23-
DUCKDB_VX_TABLE_FILTER_TYPE_EXPRESSION_FILTER = 9 // an arbitrary expression
23+
DUCKDB_VX_TABLE_FILTER_TYPE_EXPRESSION_FILTER = 9, // an arbitrary expression
24+
DUCKDB_VX_TABLE_FILTER_TYPE_BLOOM_FILTER =
25+
10 // a probabilistic filter that can test whether a value is in a set of other value
2426
} duckdb_vx_table_filter_type;
2527

2628
typedef struct duckdb_vx_table_filter_set_ *duckdb_vx_table_filter_set;

vortex-duckdb/cpp/include/duckdb_vx/vector.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,6 @@ void duckdb_vx_vector_dictionary(duckdb_vector ffi_vector,
3232
duckdb_selection_vector ffi_sel_vec,
3333
idx_t count);
3434

35-
void duckdb_vx_set_dictionary_vector_id(duckdb_vector dict, const char *id, unsigned int id_len);
36-
3735
void duckdb_vx_set_dictionary_vector_length(duckdb_vector dict, unsigned int len);
3836

3937
// Add the buffer to the string vector (basically, keep it alive as long as the vector).

vortex-duckdb/cpp/object_cache.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,17 @@ namespace vortex {
1212
class OpaqueWrapper : public duckdb::ObjectCacheEntry {
1313
public:
1414
duckdb::unique_ptr<void, duckdb_vx_deleter_fn> ptr;
15+
duckdb::optional_idx estimated_size;
1516

16-
explicit OpaqueWrapper(void *p, duckdb_vx_deleter_fn del) : ptr(p, del) {
17+
explicit OpaqueWrapper(void *p, duckdb::optional_idx estimated_size, duckdb_vx_deleter_fn del)
18+
: ptr(p, del), estimated_size(estimated_size) {
1719
}
1820
~OpaqueWrapper() override = default;
1921

22+
duckdb::optional_idx GetEstimatedCacheMemory() const override {
23+
return estimated_size;
24+
}
25+
2026
duckdb::string GetObjectType() override {
2127
return "vortex_opaque_wrapper";
2228
}
@@ -32,9 +38,11 @@ class OpaqueWrapper : public duckdb::ObjectCacheEntry {
3238
extern "C" void duckdb_vx_object_cache_put(duckdb_vx_object_cache cache,
3339
const char *key,
3440
void *value,
41+
uint64_t estimated_size,
3542
duckdb_vx_deleter_fn deleter) {
3643
auto object_cache = reinterpret_cast<duckdb::ObjectCache *>(cache);
37-
auto wrapper = duckdb::make_shared_ptr<vortex::OpaqueWrapper>(value, deleter);
44+
auto wrapper =
45+
duckdb::make_shared_ptr<vortex::OpaqueWrapper>(value, duckdb::optional_idx(estimated_size), deleter);
3846
object_cache->Put(std::string(key), wrapper);
3947
}
4048

0 commit comments

Comments
 (0)