11// SPDX-License-Identifier: Apache-2.0
22// SPDX-FileCopyrightText: Copyright the Vortex contributors
33
4+ #include " duckdb_vx/table_function.h"
45#include " duckdb_vx/duckdb_diagnostics.h"
56
67DUCKDB_INCLUDES_BEGIN
@@ -30,8 +31,10 @@ struct CTableFunctionInfo final : TableFunctionInfo {
3031};
3132
3233struct CTableBindData final : TableFunctionData {
33- CTableBindData (unique_ptr<CTableFunctionInfo> info_p, unique_ptr<vortex::CData> ffi_data_p)
34- : info(std::move(info_p)), ffi_data(std::move(ffi_data_p)) {
34+ CTableBindData (unique_ptr<CTableFunctionInfo> info_p,
35+ unique_ptr<vortex::CData> ffi_data_p,
36+ const vector<LogicalType> &types)
37+ : info(std::move(info_p)), ffi_data(std::move(ffi_data_p)), types(types) {
3538 }
3639
3740 unique_ptr<FunctionData> Copy () const override {
@@ -43,11 +46,13 @@ struct CTableBindData final : TableFunctionData {
4346 throw BinderException (IntoErrString (error_out));
4447 }
4548 return make_uniq<CTableBindData>(make_uniq<CTableFunctionInfo>(info->vtab ),
46- unique_ptr<CData>(reinterpret_cast <CData *>(copied_ffi_data)));
49+ unique_ptr<CData>(reinterpret_cast <CData *>(copied_ffi_data)),
50+ types);
4751 }
4852
4953 unique_ptr<CTableFunctionInfo> info;
5054 unique_ptr<CData> ffi_data;
55+ vector<LogicalType> types;
5156};
5257
5358struct CTableGlobalData final : GlobalTableFunctionState {
@@ -88,6 +93,103 @@ double c_table_scan_progress(ClientContext &context,
8893 return bind.info ->vtab .table_scan_progress (c_ctx, c_bind_data, c_global_state);
8994}
9095
96+ static Value &UnwrapValue (duckdb_value value) {
97+ return *(reinterpret_cast <Value *>(value));
98+ }
99+
100+ unique_ptr<BaseStatistics> numeric_stats (duckdb_column_statistics &stats, LogicalType type) {
101+ BaseStatistics out = StringStats::CreateUnknown (type);
102+ if (stats.min ) {
103+ NumericStats::SetMin (out, UnwrapValue (stats.min ));
104+ duckdb_destroy_value (&stats.min );
105+ }
106+ if (stats.max ) {
107+ NumericStats::SetMax (out, UnwrapValue (stats.max ));
108+ duckdb_destroy_value (&stats.max );
109+ }
110+ if (!stats.has_null ) {
111+ out.Set (StatsInfo::CANNOT_HAVE_NULL_VALUES);
112+ }
113+ return out.ToUnique ();
114+ }
115+
116+ unique_ptr<BaseStatistics> string_stats (duckdb_column_statistics &stats, LogicalType type) {
117+ BaseStatistics out = StringStats::CreateUnknown (type);
118+ if (stats.min ) {
119+ StringStats::SetMin (out, StringValue::Get (UnwrapValue (stats.min )));
120+ duckdb_destroy_value (&stats.min );
121+ }
122+ if (stats.max ) {
123+ StringStats::SetMax (out, StringValue::Get (UnwrapValue (stats.max )));
124+ duckdb_destroy_value (&stats.max );
125+ }
126+ if (stats.max_string_length >> 63 ) {
127+ StringStats::SetMaxStringLength (out, uint32_t (stats.max_string_length ));
128+ }
129+ if (!stats.has_null ) {
130+ out.Set (StatsInfo::CANNOT_HAVE_NULL_VALUES);
131+ }
132+
133+ return out.ToUnique ();
134+ }
135+
136+ unique_ptr<BaseStatistics> base_stats (duckdb_column_statistics &stats, LogicalType type) {
137+ BaseStatistics out = StringStats::CreateUnknown (type);
138+ if (!stats.has_null ) {
139+ out.Set (StatsInfo::CANNOT_HAVE_NULL_VALUES);
140+ }
141+ return out.ToUnique ();
142+ }
143+
144+ unique_ptr<BaseStatistics>
145+ c_statistics (ClientContext &context, const FunctionData *bind_data, column_t column_index) {
146+ if (IsVirtualColumn (column_index)) {
147+ return {};
148+ }
149+
150+ const auto &bind = bind_data->Cast <CTableBindData>();
151+ void *const ffi_bind = bind.ffi_data ->DataPtr ();
152+
153+ duckdb_client_context c_ctx = reinterpret_cast <duckdb_client_context>(&context);
154+ duckdb_column_statistics statistics = {};
155+ if (!bind.info ->vtab .statistics (c_ctx, ffi_bind, column_index, &statistics)) {
156+ return {};
157+ }
158+
159+ const LogicalType type = bind.types [column_index];
160+
161+ switch (type.id ()) {
162+ case LogicalTypeId::BOOLEAN:
163+ case LogicalTypeId::TINYINT:
164+ case LogicalTypeId::SMALLINT:
165+ case LogicalTypeId::INTEGER:
166+ case LogicalTypeId::BIGINT:
167+ case LogicalTypeId::FLOAT:
168+ case LogicalTypeId::DOUBLE:
169+ case LogicalTypeId::UTINYINT:
170+ case LogicalTypeId::USMALLINT:
171+ case LogicalTypeId::UINTEGER:
172+ case LogicalTypeId::UBIGINT:
173+ case LogicalTypeId::UHUGEINT:
174+ case LogicalTypeId::HUGEINT: {
175+ return numeric_stats (statistics, type);
176+ }
177+ case LogicalTypeId::VARCHAR:
178+ case LogicalTypeId::BLOB: {
179+ return string_stats (statistics, type);
180+ }
181+ case LogicalTypeId::STRUCT: {
182+ // TODO(myrrc)
183+ // Duckdb's has_null has a different semantics for structs.
184+ // If we propagate our has_null, this breaks Duckdb optimizer.
185+ // You can reproduce it in struct.slt test in vortex-sqllogictests:
186+ return {};
187+ }
188+ default :
189+ return base_stats (statistics, type);
190+ }
191+ }
192+
91193unique_ptr<FunctionData> c_bind (ClientContext &context,
92194 TableFunctionBindInput &input,
93195 vector<LogicalType> &return_types,
@@ -111,7 +213,8 @@ unique_ptr<FunctionData> c_bind(ClientContext &context,
111213 }
112214
113215 return make_uniq<CTableBindData>(make_uniq<CTableFunctionInfo>(info.vtab ),
114- unique_ptr<CData>(reinterpret_cast <CData *>(ffi_bind_data)));
216+ unique_ptr<CData>(reinterpret_cast <CData *>(ffi_bind_data)),
217+ return_types);
115218}
116219
117220unique_ptr<GlobalTableFunctionState> c_init_global (ClientContext &context, TableFunctionInitInput &input) {
@@ -363,6 +466,7 @@ extern "C" duckdb_state duckdb_vx_tfunc_register(duckdb_database ffi_db, const d
363466 tf.get_virtual_columns = c_get_virtual_columns;
364467 tf.to_string = c_to_string;
365468 tf.table_scan_progress = c_table_scan_progress;
469+ tf.statistics = c_statistics;
366470
367471 // Set up the parameters
368472 tf.arguments .reserve (vtab->parameter_count );
0 commit comments