Skip to content

Commit 543dbe7

Browse files
authored
duckdb: don't copy vtab on function bind (#7616)
Store a reference to function's info in bind data to avoid copying vtab. Derive bind data from base class and not derived one to save a few bytes. Replace a few nullptr returns with D_ASSERTs. Signed-off-by: Mikhail Kot <to@myrrc.dev>
1 parent e0bbe32 commit 543dbe7

5 files changed

Lines changed: 82 additions & 95 deletions

File tree

vortex-duckdb/cpp/data_chunk.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
#include "duckdb_vx/data_chunk.h"
45
#include "duckdb_vx/duckdb_diagnostics.h"
56
DUCKDB_INCLUDES_BEGIN
67
#include "duckdb/common/types/data_chunk.hpp"
78
DUCKDB_INCLUDES_END
89

9-
#include "duckdb_vx.h"
10-
1110
const char *duckdb_data_chunk_to_string(duckdb_data_chunk chunk, duckdb_vx_error *err) {
1211
try {
1312
auto dchunk = reinterpret_cast<duckdb::DataChunk *>(chunk);

vortex-duckdb/cpp/expr.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4-
#include "duckdb_vx.h"
5-
4+
#include "duckdb_vx/expr.h"
65
#include "duckdb_vx/duckdb_diagnostics.h"
6+
77
DUCKDB_INCLUDES_BEGIN
88
#include "duckdb/planner/expression/bound_between_expression.hpp"
99
#include "duckdb/planner/expression/bound_columnref_expression.hpp"

vortex-duckdb/cpp/include/duckdb_vx/copy_function.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,4 +76,4 @@ duckdb_state duckdb_vx_copy_func_register_vtab_one(duckdb_database ffi_db);
7676

7777
#ifdef __cplusplus /* End C ABI */
7878
};
79-
#endif
79+
#endif

vortex-duckdb/cpp/logical_type.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,6 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
#include "duckdb_vx/duckdb_diagnostics.h"
5-
6-
DUCKDB_INCLUDES_BEGIN
7-
#include "duckdb/common/types.hpp"
8-
DUCKDB_INCLUDES_END
9-
10-
#include "duckdb_vx.h"
115
#include "duckdb_vx/logical_type.h"
126

137
DUCKDB_INCLUDES_BEGIN
@@ -16,7 +10,7 @@ DUCKDB_INCLUDES_END
1610
#include <cassert>
1711

1812
duckdb_logical_type duckdb_vx_logical_type_copy(duckdb_logical_type ty) {
19-
assert(ty != nullptr);
13+
D_ASSERT(ty);
2014
auto *src = reinterpret_cast<duckdb::LogicalType *>(ty);
2115
auto copy = duckdb::make_uniq<duckdb::LogicalType>(*src);
2216
return reinterpret_cast<duckdb_logical_type>(copy.release());

vortex-duckdb/cpp/table_function.cpp

Lines changed: 77 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -24,37 +24,45 @@ struct CTableFunctionInfo final : TableFunctionInfo {
2424
explicit CTableFunctionInfo(const duckdb_vx_tfunc_vtab_t &vtab) : vtab(vtab) {
2525
}
2626

27-
duckdb_vx_tfunc_vtab_t vtab;
27+
const duckdb_vx_tfunc_vtab_t vtab;
2828
};
2929

30-
struct CTableBindData final : TableFunctionData {
31-
CTableBindData(unique_ptr<CTableFunctionInfo> info_p,
30+
struct CTableBindData final : FunctionData {
31+
CTableBindData(const CTableFunctionInfo &info,
3232
unique_ptr<CData> ffi_data_p,
3333
const vector<LogicalType> &types)
34-
: info(std::move(info_p)), ffi_data(std::move(ffi_data_p)), types(types) {
34+
: info(info), ffi_data(std::move(ffi_data_p)), types(types) {
3535
}
36+
unique_ptr<FunctionData> Copy() const override;
37+
bool Equals(const FunctionData &other_base) const override;
3638

37-
unique_ptr<FunctionData> Copy() const override {
38-
D_ASSERT(info->vtab.bind_data_clone);
39-
40-
duckdb_vx_error error_out = nullptr;
41-
const auto copied_ffi_data = info->vtab.bind_data_clone(ffi_data->DataPtr(), &error_out);
42-
if (error_out) {
43-
throw BinderException(IntoErrString(error_out));
44-
}
45-
46-
auto info_p = make_uniq<CTableFunctionInfo>(info->vtab);
47-
auto ffi_data_p = unique_ptr<CData>(reinterpret_cast<CData *>(copied_ffi_data));
48-
return make_uniq<CTableBindData>(std::move(info_p), std::move(ffi_data_p), types);
49-
}
50-
51-
unique_ptr<CTableFunctionInfo> info;
39+
// Table function info lives for as long as TableFunction is alive as it's
40+
// stored inside TableFunction, so it's safe to store a reference.
41+
const CTableFunctionInfo &info;
5242
unique_ptr<CData> ffi_data;
5343
vector<LogicalType> types;
5444
};
5545

46+
unique_ptr<FunctionData> CTableBindData::Copy() const {
47+
duckdb_vx_error error_out = nullptr;
48+
const auto copied_ffi_data = info.vtab.bind_data_clone(ffi_data->DataPtr(), &error_out);
49+
if (error_out) {
50+
throw BinderException(IntoErrString(error_out));
51+
}
52+
53+
auto ffi_data_p = unique_ptr<CData>(reinterpret_cast<CData *>(copied_ffi_data));
54+
return make_uniq<CTableBindData>(info, std::move(ffi_data_p), types);
55+
}
56+
57+
bool CTableBindData::Equals(const FunctionData &other_base) const {
58+
const CTableBindData &other = other_base.Cast<CTableBindData>();
59+
// if "types" are different, "ffi_data" would also be different as it
60+
// contains types inside, so omit "types" from comparison.
61+
return &info == &other.info && ffi_data.get() == other.ffi_data.get();
62+
}
63+
5664
struct CTableGlobalData final : GlobalTableFunctionState {
57-
explicit CTableGlobalData(unique_ptr<CData> ffi_data_p) : ffi_data(std::move(ffi_data_p)) {
65+
explicit CTableGlobalData(unique_ptr<CData> ffi_data) : ffi_data(std::move(ffi_data)) {
5866
}
5967

6068
idx_t MaxThreads() const override {
@@ -65,28 +73,20 @@ struct CTableGlobalData final : GlobalTableFunctionState {
6573
};
6674

6775
struct CTableLocalData final : LocalTableFunctionState {
68-
explicit CTableLocalData(unique_ptr<CData> ffi_data_p) : ffi_data(std::move(ffi_data_p)) {
76+
explicit CTableLocalData(unique_ptr<CData> ffi_data) : ffi_data(std::move(ffi_data)) {
6977
}
7078

7179
unique_ptr<CData> ffi_data;
7280
};
7381

74-
/**
75-
* Result of the bind function encapsulates the output schema.
76-
*/
77-
struct CTableBindResult {
78-
vector<LogicalType> &return_types;
79-
vector<string> &names;
80-
};
81-
8282
double c_table_scan_progress(ClientContext &context,
8383
const FunctionData *bind_data,
8484
const GlobalTableFunctionState *global_state) {
8585
auto &bind = bind_data->Cast<CTableBindData>();
8686
duckdb_client_context c_ctx = reinterpret_cast<duckdb_client_context>(&context);
8787
void *const c_bind_data = bind.ffi_data->DataPtr();
8888
void *const c_global_state = global_state->Cast<CTableGlobalData>().ffi_data->DataPtr();
89-
return bind.info->vtab.table_scan_progress(c_ctx, c_bind_data, c_global_state);
89+
return bind.info.vtab.table_scan_progress(c_ctx, c_bind_data, c_global_state);
9090
}
9191

9292
static Value &UnwrapValue(duckdb_value value) {
@@ -148,7 +148,7 @@ c_statistics(ClientContext &context, const FunctionData *bind_data, column_t col
148148

149149
duckdb_client_context c_ctx = reinterpret_cast<duckdb_client_context>(&context);
150150
duckdb_column_statistics statistics = {};
151-
if (!bind.info->vtab.statistics(c_ctx, ffi_bind, column_index, &statistics)) {
151+
if (!bind.info.vtab.statistics(c_ctx, ffi_bind, column_index, &statistics)) {
152152
return {};
153153
}
154154

@@ -186,17 +186,17 @@ c_statistics(ClientContext &context, const FunctionData *bind_data, column_t col
186186
}
187187
}
188188

189+
struct CTableBindResult {
190+
vector<LogicalType> &return_types;
191+
vector<string> &names;
192+
};
193+
189194
unique_ptr<FunctionData> c_bind(ClientContext &context,
190195
TableFunctionBindInput &input,
191196
vector<LogicalType> &return_types,
192197
vector<string> &names) {
193198
const auto &info = input.table_function.function_info->Cast<CTableFunctionInfo>();
194-
195-
// Setup bind info to pass into the callback.
196-
CTableBindResult result = {
197-
return_types,
198-
names,
199-
};
199+
CTableBindResult result = {return_types, names};
200200

201201
duckdb_vx_error error_out = nullptr;
202202
auto ctx = reinterpret_cast<duckdb_client_context>(&context);
@@ -208,13 +208,12 @@ unique_ptr<FunctionData> c_bind(ClientContext &context,
208208
throw BinderException(IntoErrString(error_out));
209209
}
210210

211-
return make_uniq<CTableBindData>(make_uniq<CTableFunctionInfo>(info.vtab),
212-
unique_ptr<CData>(reinterpret_cast<CData *>(ffi_bind_data)),
213-
return_types);
211+
auto cdata = unique_ptr<CData>(reinterpret_cast<CData *>(ffi_bind_data));
212+
return make_uniq<CTableBindData>(info, std::move(cdata), return_types);
214213
}
215214

216215
unique_ptr<GlobalTableFunctionState> c_init_global(ClientContext &context, TableFunctionInitInput &input) {
217-
const auto &bind = input.bind_data->Cast<CTableBindData>();
216+
const CTableBindData &bind = input.bind_data->Cast<CTableBindData>();
218217

219218
duckdb_vx_tfunc_init_input ffi_input = {
220219
.bind_data = bind.ffi_data->DataPtr(),
@@ -227,7 +226,7 @@ unique_ptr<GlobalTableFunctionState> c_init_global(ClientContext &context, Table
227226
};
228227

229228
duckdb_vx_error error_out = nullptr;
230-
auto ffi_global_data = bind.info->vtab.init_global(&ffi_input, &error_out);
229+
duckdb_vx_data ffi_global_data = bind.info.vtab.init_global(&ffi_input, &error_out);
231230
if (error_out) {
232231
throw BinderException(IntoErrString(error_out));
233232
}
@@ -240,7 +239,7 @@ unique_ptr<LocalTableFunctionState> c_init_local(ExecutionContext &context,
240239
TableFunctionInitInput &input,
241240
GlobalTableFunctionState *global_state) {
242241
const auto &bind = input.bind_data->Cast<CTableBindData>();
243-
auto global_data = global_state->Cast<CTableGlobalData>().ffi_data->DataPtr();
242+
void *const ffi_global = global_state->Cast<CTableGlobalData>().ffi_data->DataPtr();
244243

245244
duckdb_vx_tfunc_init_input ffi_input = {
246245
.bind_data = bind.ffi_data->DataPtr(),
@@ -253,25 +252,26 @@ unique_ptr<LocalTableFunctionState> c_init_local(ExecutionContext &context,
253252
};
254253

255254
duckdb_vx_error error_out = nullptr;
256-
auto ffi_local_data = bind.info->vtab.init_local(&ffi_input, global_data, &error_out);
255+
duckdb_vx_data ffi_local_data = bind.info.vtab.init_local(&ffi_input, ffi_global, &error_out);
257256
if (error_out) {
258257
throw BinderException(IntoErrString(error_out));
259258
}
260259

261-
return make_uniq<CTableLocalData>(unique_ptr<CData>(reinterpret_cast<CData *>(ffi_local_data)));
260+
auto cdata = unique_ptr<CData>(reinterpret_cast<CData *>(ffi_local_data));
261+
return make_uniq<CTableLocalData>(std::move(cdata));
262262
}
263263

264264
void c_function(ClientContext &context, TableFunctionInput &input, DataChunk &output) {
265265
const auto &bind = input.bind_data->Cast<CTableBindData>();
266266

267-
auto ctx = reinterpret_cast<duckdb_client_context>(&context);
268-
const auto bind_data = bind.ffi_data->DataPtr();
269-
auto global_data = input.global_state->Cast<CTableGlobalData>().ffi_data->DataPtr();
270-
auto local_data = input.local_state->Cast<CTableLocalData>().ffi_data->DataPtr();
267+
duckdb_client_context ffi_ctx = reinterpret_cast<duckdb_client_context>(&context);
268+
void *const ffi_bind = bind.ffi_data->DataPtr();
269+
void *const ffi_global = input.global_state->Cast<CTableGlobalData>().ffi_data->DataPtr();
270+
void *const ffi_local = input.local_state->Cast<CTableLocalData>().ffi_data->DataPtr();
271271

272272
duckdb_data_chunk chunk = reinterpret_cast<duckdb_data_chunk>(&output);
273273
duckdb_vx_error error_out = nullptr;
274-
bind.info->vtab.function(ctx, bind_data, global_data, local_data, chunk, &error_out);
274+
bind.info.vtab.function(ffi_ctx, ffi_bind, ffi_global, ffi_local, chunk, &error_out);
275275
if (error_out) {
276276
throw InvalidInputException(IntoErrString(error_out));
277277
}
@@ -283,12 +283,12 @@ void c_pushdown_complex_filter(ClientContext &,
283283
vector<unique_ptr<Expression>> &filters) {
284284
auto &bind = bind_data->Cast<CTableBindData>();
285285
void *const ffi_bind = bind.ffi_data->DataPtr();
286+
duckdb_vx_error error_out = nullptr;
286287

287288
for (auto iter = filters.begin(); iter != filters.end();) {
288-
duckdb_vx_error error_out = nullptr;
289289
duckdb_vx_expr ffi_expr = reinterpret_cast<duckdb_vx_expr>(iter->get());
290290

291-
const bool pushed = bind.info->vtab.pushdown_complex_filter(ffi_bind, ffi_expr, &error_out);
291+
const bool pushed = bind.info.vtab.pushdown_complex_filter(ffi_bind, ffi_expr, &error_out);
292292
if (error_out) {
293293
throw BinderException(IntoErrString(error_out));
294294
}
@@ -302,7 +302,7 @@ unique_ptr<NodeStatistics> c_cardinality(ClientContext &, const FunctionData *bi
302302
auto &bind = bind_data->Cast<CTableBindData>();
303303

304304
duckdb_vx_node_statistics stats = {};
305-
bind.info->vtab.cardinality(bind.ffi_data->DataPtr(), &stats);
305+
bind.info.vtab.cardinality(bind.ffi_data->DataPtr(), &stats);
306306

307307
auto out = make_uniq<NodeStatistics>();
308308
out->has_estimated_cardinality = stats.has_estimated_cardinality;
@@ -315,29 +315,23 @@ unique_ptr<NodeStatistics> c_cardinality(ClientContext &, const FunctionData *bi
315315

316316
extern "C" duckdb_value duckdb_vx_tfunc_bind_input_get_parameter(duckdb_vx_tfunc_bind_input ffi_input,
317317
size_t index) {
318-
if (!ffi_input) {
319-
return nullptr;
320-
}
321-
318+
D_ASSERT(ffi_input);
322319
const TableFunctionBindInput &input = *reinterpret_cast<TableFunctionBindInput *>(ffi_input);
323-
if (index >= input.inputs.size()) {
324-
return nullptr;
325-
}
326320
return reinterpret_cast<duckdb_value>(new Value(input.inputs[index]));
327321
}
328322

329323
extern "C" void duckdb_vx_tfunc_bind_result_add_column(duckdb_vx_tfunc_bind_result ffi_result,
330324
const char *name_str,
331325
size_t name_len,
332326
duckdb_logical_type ffi_type) {
333-
if (!name_str || !ffi_type) {
334-
return;
335-
}
336-
const auto result = reinterpret_cast<CTableBindResult *>(ffi_result);
337-
const auto logical_type = reinterpret_cast<LogicalType *>(ffi_type);
338-
339-
result->names.emplace_back(name_str, name_len);
340-
result->return_types.emplace_back(*logical_type);
327+
D_ASSERT(ffi_result);
328+
D_ASSERT(name_str);
329+
D_ASSERT(ffi_type);
330+
const CTableBindResult &result = *reinterpret_cast<CTableBindResult *>(ffi_result);
331+
const LogicalType logical_type = *reinterpret_cast<LogicalType *>(ffi_type);
332+
333+
result.names.emplace_back(name_str, name_len);
334+
result.return_types.emplace_back(logical_type);
341335
}
342336

343337
OperatorPartitionData c_get_partition_data(ClientContext &, TableFunctionGetPartitionInput &input) {
@@ -350,7 +344,7 @@ OperatorPartitionData c_get_partition_data(ClientContext &, TableFunctionGetPart
350344
void *const ffi_local = input.local_state->Cast<CTableLocalData>().ffi_data->DataPtr();
351345

352346
duckdb_vx_error error_out = nullptr;
353-
const idx_t batch_index = bind.info->vtab.get_partition_data(ffi_bind, ffi_global, ffi_local, &error_out);
347+
const idx_t batch_index = bind.info.vtab.get_partition_data(ffi_bind, ffi_global, ffi_local, &error_out);
354348
if (error_out) {
355349
throw InvalidInputException(IntoErrString(error_out));
356350
}
@@ -368,18 +362,18 @@ InsertionOrderPreservingMap<string> c_to_string(TableFunctionToStringInput &inpu
368362
InsertionOrderPreservingMap<string> result;
369363
duckdb_vx_string_map ffi_map = reinterpret_cast<duckdb_vx_string_map>(&result);
370364
void *const ffi_bind = input.bind_data->Cast<CTableBindData>().ffi_data->DataPtr();
371-
static_cast<CTableFunctionInfo &>(*input.table_function.function_info).vtab.to_string(ffi_bind, ffi_map);
365+
const auto &info = static_cast<CTableFunctionInfo &>(*input.table_function.function_info);
366+
info.vtab.to_string(ffi_bind, ffi_map);
372367
return result;
373368
}
374369

375370
extern "C" duckdb_state duckdb_vx_tfunc_register(duckdb_database ffi_db, const duckdb_vx_tfunc_vtab_t *vtab) {
376-
if (!ffi_db || !vtab) {
377-
return DuckDBError;
378-
}
371+
D_ASSERT(ffi_db);
372+
D_ASSERT(vtab);
379373

380-
auto wrapper = reinterpret_cast<duckdb::DatabaseWrapper *>(ffi_db);
381-
auto db = wrapper->database->instance;
382-
auto tf = TableFunction(vtab->name, {}, c_function, c_bind, c_init_global, c_init_local);
374+
const DatabaseWrapper &wrapper = *reinterpret_cast<DatabaseWrapper *>(ffi_db);
375+
DatabaseInstance &db = *wrapper.database->instance;
376+
TableFunction tf(vtab->name, {}, c_function, c_bind, c_init_global, c_init_local);
383377

384378
tf.projection_pushdown = true;
385379
tf.filter_pushdown = true;
@@ -400,22 +394,22 @@ extern "C" duckdb_state duckdb_vx_tfunc_register(duckdb_database ffi_db, const d
400394
return {{COLUMN_IDENTIFIER_EMPTY, TableColumn("", LogicalTypeId::BOOLEAN)}};
401395
};
402396

403-
tf.arguments.reserve(vtab->parameter_count);
397+
tf.arguments.resize(vtab->parameter_count);
404398
for (size_t i = 0; i < vtab->parameter_count; i++) {
405-
auto logical_type = reinterpret_cast<LogicalType *>(vtab->parameters[i]);
406-
tf.arguments.emplace_back(*logical_type);
399+
tf.arguments[i] = *reinterpret_cast<LogicalType *>(vtab->parameters[i]);
407400
}
408401

409402
tf.function_info = make_shared_ptr<CTableFunctionInfo>(*vtab);
410403

411404
try {
412-
auto &system_catalog = Catalog::GetSystemCatalog(*db);
413-
auto data = CatalogTransaction::GetSystemTransaction(*db);
405+
auto &system_catalog = Catalog::GetSystemCatalog(db);
406+
auto data = CatalogTransaction::GetSystemTransaction(db);
414407
CreateTableFunctionInfo tf_info(tf);
415-
// Allow registering multiple overloads with the same name but different parameter types.
416408
tf_info.on_conflict = OnCreateConflict::ALTER_ON_CONFLICT;
417409
system_catalog.CreateFunction(data, tf_info);
418-
} catch (...) {
410+
} catch (const std::exception &e) {
411+
ErrorData data(e);
412+
DUCKDB_LOG_ERROR(db, "Failed to create Vortex table function:\t" + data.Message());
419413
return DuckDBError;
420414
}
421415
return DuckDBSuccess;

0 commit comments

Comments
 (0)