Skip to content

Commit 6302ee8

Browse files
authored
duckdb to_string: don't copy (#7582)
1. Don't allocate new string maps in to_string, copy to existing. 2. Add EXPLAIN test for to_string 3. Fix a bug in duckdb sql logic tests runner which didn't compare JSON correctly. Signed-off-by: Mikhail Kot <to@myrrc.dev>
1 parent f3e7b3c commit 6302ee8

7 files changed

Lines changed: 75 additions & 84 deletions

File tree

vortex-duckdb/cpp/include/duckdb_vx/table_function.h

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,18 +38,10 @@ void duckdb_vx_tfunc_bind_result_add_column(duckdb_vx_tfunc_bind_result ffi_resu
3838
size_t name_len,
3939
duckdb_logical_type ffi_type);
4040

41-
// String map for to_string result
4241
typedef struct duckdb_vx_string_map_ *duckdb_vx_string_map;
43-
44-
// Create a new string map
45-
duckdb_vx_string_map duckdb_vx_string_map_create();
46-
4742
// Add a key-value pair to the string map
4843
void duckdb_vx_string_map_insert(duckdb_vx_string_map map, const char *key, const char *value);
4944

50-
// Free the string map
51-
void duckdb_vx_string_map_free(duckdb_vx_string_map map);
52-
5345
// Input data passed into the init_global and init_local callbacks.
5446
typedef struct {
5547
const void *bind_data;
@@ -153,8 +145,8 @@ typedef struct {
153145
bool (*pushdown_complex_filter)(void *bind_data, duckdb_vx_expr expr, duckdb_vx_error *error_out);
154146

155147
void *pushdown_expression;
156-
duckdb_vx_string_map (*to_string)(void *bind_data);
157-
// void *dynamic_to_string;
148+
149+
void (*to_string)(void *bind_data, duckdb_vx_string_map map);
158150

159151
double (*table_scan_progress)(duckdb_client_context ctx, void *bind_data, void *global_state);
160152

vortex-duckdb/cpp/table_function.cpp

Lines changed: 10 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -397,24 +397,18 @@ OperatorPartitionData c_get_partition_data(ClientContext & /*context*/,
397397
return OperatorPartitionData(index);
398398
}
399399

400+
extern "C" void duckdb_vx_string_map_insert(duckdb_vx_string_map map, const char *key, const char *value) {
401+
D_ASSERT(map);
402+
D_ASSERT(key);
403+
D_ASSERT(value);
404+
reinterpret_cast<InsertionOrderPreservingMap<string> *>(map)->insert(key, value);
405+
}
406+
400407
InsertionOrderPreservingMap<string> c_to_string(TableFunctionToStringInput &input) {
401408
InsertionOrderPreservingMap<string> result;
402-
auto &bind = input.bind_data->Cast<CTableBindData>();
403-
404-
// Call the Rust side to get custom string representation if available
405-
if (bind.info->vtab.to_string) {
406-
auto map = bind.info->vtab.to_string(bind.ffi_data->DataPtr());
407-
if (map) {
408-
// Copy the map contents to the result
409-
auto *cpp_map = reinterpret_cast<InsertionOrderPreservingMap<string> *>(map);
410-
for (const auto &[key, value] : *cpp_map) {
411-
result[key] = value;
412-
}
413-
// Free the map allocated by Rust
414-
duckdb_vx_string_map_free(map);
415-
}
416-
}
417-
409+
duckdb_vx_string_map ffi_map = reinterpret_cast<duckdb_vx_string_map>(&result);
410+
void *const ffi_bind = input.bind_data->Cast<CTableBindData>().ffi_data->DataPtr();
411+
static_cast<CTableFunctionInfo &>(*input.table_function.function_info).vtab.to_string(ffi_bind, ffi_map);
418412
return result;
419413
}
420414

@@ -471,25 +465,4 @@ extern "C" duckdb_state duckdb_vx_tfunc_register(duckdb_database ffi_db, const d
471465
}
472466
return DuckDBSuccess;
473467
}
474-
475-
extern "C" duckdb_vx_string_map duckdb_vx_string_map_create() {
476-
auto map = new InsertionOrderPreservingMap<string>();
477-
return reinterpret_cast<duckdb_vx_string_map>(map);
478-
}
479-
480-
extern "C" void duckdb_vx_string_map_insert(duckdb_vx_string_map map, const char *key, const char *value) {
481-
if (!map || !key || !value) {
482-
return;
483-
}
484-
auto *cpp_map = reinterpret_cast<InsertionOrderPreservingMap<string> *>(map);
485-
(*cpp_map)[string(key)] = string(value);
486-
}
487-
488-
extern "C" void duckdb_vx_string_map_free(duckdb_vx_string_map map) {
489-
if (!map) {
490-
return;
491-
}
492-
auto *cpp_map = reinterpret_cast<InsertionOrderPreservingMap<string> *>(map);
493-
delete cpp_map;
494-
}
495468
} // namespace vortex

vortex-duckdb/src/datasource.rs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ use crate::duckdb::Cardinality;
6464
use crate::duckdb::ClientContextRef;
6565
use crate::duckdb::ColumnStatistics;
6666
use crate::duckdb::DataChunkRef;
67+
use crate::duckdb::DuckdbStringMapRef;
6768
use crate::duckdb::ExpressionRef;
6869
use crate::duckdb::LogicalType;
6970
use crate::duckdb::TableFilterSetRef;
@@ -540,17 +541,12 @@ impl<T: DataSourceTableFunction> TableFunction for T {
540541
.ok_or_else(|| vortex_err!("batch id missing, no batches exported"))
541542
}
542543

543-
fn to_string(bind_data: &Self::BindData) -> Option<Vec<(String, String)>> {
544-
let mut result = Vec::new();
545-
546-
result.push(("Function".to_string(), "Vortex Scan".to_string()));
547-
544+
fn to_string(bind_data: &Self::BindData, map: &mut DuckdbStringMapRef) {
545+
map.push("Function", "Vortex Scan");
548546
if !bind_data.filter_exprs.is_empty() {
549547
let mut filters = bind_data.filter_exprs.iter().map(|f| format!("{}", f));
550-
result.push(("Filters".to_string(), filters.join(" /\\\n")));
548+
map.push("Filters", &filters.join(" /\\\n"));
551549
}
552-
553-
Some(result)
554550
}
555551
}
556552

vortex-duckdb/src/duckdb/table_function/mod.rs

Lines changed: 17 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,18 @@ pub struct ColumnStatistics {
4545
pub has_null: bool,
4646
}
4747

48+
// String map lifetime is managed by C++ code
49+
crate::lifetime_wrapper!(DuckdbStringMap, cpp::duckdb_vx_string_map, |_| {});
50+
impl DuckdbStringMapRef {
51+
pub fn push(&mut self, key: &str, value: &str) {
52+
let key = CString::new(key).unwrap_or_else(|_| CString::default());
53+
let value = CString::new(value).unwrap_or_else(|_| CString::default());
54+
unsafe {
55+
cpp::duckdb_vx_string_map_insert(self.as_ptr(), key.as_ptr(), value.as_ptr());
56+
}
57+
}
58+
}
59+
4860
/// A trait that defines the supported operations for a table function in DuckDB.
4961
///
5062
/// This trait does not yet cover the full C++ API, see table_function.hpp.
@@ -154,9 +166,7 @@ pub trait TableFunction: Sized + Debug {
154166
) -> VortexResult<u64>;
155167

156168
/// Returns a vector of key-value pairs for EXPLAIN output
157-
fn to_string(_bind_data: &Self::BindData) -> Option<Vec<(String, String)>> {
158-
None
159-
}
169+
fn to_string(bind_data: &Self::BindData, map: &mut DuckdbStringMapRef);
160170

161171
// TODO(ngates): there are many more callbacks that can be configured.
162172
}
@@ -223,35 +233,13 @@ impl DatabaseRef {
223233
}
224234
}
225235

226-
/// The to_string callback for a table function.
227236
unsafe extern "C-unwind" fn to_string_callback<T: TableFunction>(
228237
bind_data: *mut c_void,
229-
) -> cpp::duckdb_vx_string_map {
238+
map: cpp::duckdb_vx_string_map,
239+
) {
230240
let bind_data = unsafe { &*(bind_data as *const T::BindData) };
231-
232-
match T::to_string(bind_data) {
233-
Some(map) => {
234-
// Create a new C++ map
235-
let cpp_map = unsafe { cpp::duckdb_vx_string_map_create() };
236-
237-
// Fill the map with key-value pairs
238-
for (key, value) in map {
239-
let key_cstr = CString::new(key).unwrap_or_else(|_| CString::default());
240-
let value_cstr = CString::new(value).unwrap_or_else(|_| CString::default());
241-
242-
unsafe {
243-
cpp::duckdb_vx_string_map_insert(
244-
cpp_map,
245-
key_cstr.as_ptr(),
246-
value_cstr.as_ptr(),
247-
);
248-
}
249-
}
250-
251-
cpp_map
252-
}
253-
None => ptr::null_mut(),
254-
}
241+
let map = unsafe { DuckdbStringMap::borrow_mut(map) };
242+
T::to_string(bind_data, map);
255243
}
256244

257245
/// The native function callback for a table function.

vortex-duckdb/src/e2e_test/object_cache_test.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ impl TableFunction for TestTableFunction {
121121
) -> Option<ColumnStatistics> {
122122
None
123123
}
124+
125+
fn to_string(_bind_data: &Self::BindData, _map: &mut crate::duckdb::DuckdbStringMapRef) {}
124126
}
125127

126128
use crate::duckdb::Database;

vortex-sqllogictest/bin/sqllogictests-runner.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use indicatif::MultiProgress;
1717
use indicatif::ProgressBar;
1818
use indicatif::ProgressDrawTarget;
1919
use indicatif::ProgressStyle;
20+
use sqllogictest::Normalizer;
2021
use sqllogictest::Record;
2122
use sqllogictest::Runner;
2223
use sqllogictest::parse_file;
@@ -28,6 +29,18 @@ use vortex_sqllogictest::duckdb::DuckDB;
2829
use vortex_sqllogictest::duckdb::DuckDBTestError;
2930
use vortex_sqllogictest::utils::list_files;
3031

32+
fn duckdb_validator(normalizer: Normalizer, actual: &[Vec<String>], expected: &[String]) -> bool {
33+
let actual = actual.iter().flat_map(|strings| {
34+
strings
35+
.join(" ")
36+
.trim_end()
37+
.split('\n')
38+
.map(|line| line.trim_end().to_string())
39+
.collect::<Vec<_>>()
40+
});
41+
Iterator::eq(actual, expected.iter().map(normalizer))
42+
}
43+
3144
#[tokio::main]
3245
async fn main() -> anyhow::Result<()> {
3346
let args = Args::parse();
@@ -127,6 +140,7 @@ async fn main() -> anyhow::Result<()> {
127140
duckdb_runner.add_label("duckdb");
128141
duckdb_runner.with_column_validator(strict_column_validator);
129142
duckdb_runner.with_normalizer(value_normalizer);
143+
duckdb_runner.with_validator(duckdb_validator);
130144

131145
for record in records.iter() {
132146
if let Record::Halt { .. } = record {
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
include ./setup.slt
5+
6+
onlyif duckdb
7+
query I
8+
COPY (SELECT * FROM (VALUES ('Hello'), ('Hi'), ('Hey')) AS t(str)) TO '$__TEST_DIR__/explain.vortex';
9+
----
10+
3
11+
12+
onlyif duckdb
13+
query TT
14+
EXPLAIN (FORMAT json) SELECT * FROM '$__TEST_DIR__/explain.vortex';
15+
----
16+
physical_plan [
17+
{
18+
"name": "READ_VORTEX",
19+
"children": [],
20+
"extra_info": {
21+
"Function": "Vortex Scan",
22+
"Projections": "str",
23+
"Estimated Cardinality": "3"
24+
}
25+
}
26+
]

0 commit comments

Comments
 (0)