Skip to content

Commit 4d7e6ff

Browse files
committed
added sqlnull support and fixed explain
1 parent 849119d commit 4d7e6ff

10 files changed

Lines changed: 68 additions & 41 deletions

File tree

_duckdb-stubs/__init__.pyi

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -537,7 +537,9 @@ class DuckDBPyRelation:
537537
def distinct(self) -> DuckDBPyRelation: ...
538538
def except_(self, other_rel: Self) -> DuckDBPyRelation: ...
539539
def execute(self) -> DuckDBPyRelation: ...
540-
def explain(self, type: ExplainType | ExplainTypeLiteral = ExplainType.STANDARD) -> str: ...
540+
def explain(
541+
self, type: ExplainType | ExplainTypeLiteral = ExplainType.STANDARD, format: str | None = None
542+
) -> str: ...
541543
def favg(
542544
self, expression: str, groups: str = "", window_spec: str = "", projected_columns: str = ""
543545
) -> DuckDBPyRelation: ...

src/duckdb_py/include/duckdb_python/pybind11/conversions/explain_enum.hpp

Lines changed: 13 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -33,34 +33,18 @@ static ExplainType ExplainTypeFromInteger(int64_t value) {
3333
}
3434
}
3535

36-
namespace PYBIND11_NAMESPACE {
37-
namespace detail {
38-
39-
template <>
40-
struct type_caster<ExplainType> : public type_caster_base<ExplainType> {
41-
using base = type_caster_base<ExplainType>;
42-
ExplainType tmp;
43-
44-
public:
45-
bool load(handle src, bool convert) {
46-
if (base::load(src, convert)) {
47-
return true;
48-
} else if (py::isinstance<py::str>(src)) {
49-
tmp = ExplainTypeFromString(py::str(src));
50-
value = &tmp;
51-
return true;
52-
} else if (py::isinstance<py::int_>(src)) {
53-
tmp = ExplainTypeFromInteger(src.cast<int64_t>());
54-
value = &tmp;
55-
return true;
56-
}
57-
return false;
36+
//! Resolve a Python explain-type argument (ExplainType enum, str, or int) to an ExplainType.
37+
//! NOTE: deliberately NOT a pybind type_caster. A custom caster inheriting type_caster_base shadows the
38+
//! registered py::enum_ inconsistently across translation units - it ends up accepting str/int XOR the enum
39+
//! instance, never both, depending on which TU sees the specialization. Explicit dispatch at the call site is
40+
//! robust regardless of include order.
41+
static ExplainType ExplainTypeFromPython(const py::object &obj) {
42+
if (py::isinstance<py::str>(obj)) {
43+
return ExplainTypeFromString(py::str(obj));
5844
}
59-
60-
static handle cast(ExplainType src, return_value_policy policy, handle parent) {
61-
return base::cast(src, policy, parent);
45+
if (py::isinstance<py::int_>(obj)) {
46+
return ExplainTypeFromInteger(obj.cast<int64_t>());
6247
}
63-
};
64-
65-
} // namespace detail
66-
} // namespace PYBIND11_NAMESPACE
48+
// Fall through to the registered py::enum_ caster (handles an actual ExplainType, throws otherwise).
49+
return obj.cast<ExplainType>();
50+
}

src/duckdb_py/include/duckdb_python/pyrelation.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ struct DuckDBPyRelation {
244244
const Optional<py::int_> &max_col_width, const Optional<py::str> &null_value,
245245
const py::object &render_mode);
246246

247-
string Explain(ExplainType type);
247+
string Explain(ExplainType type, const string &format = "");
248248

249249
static bool IsRelation(const py::object &object);
250250

src/duckdb_py/native/python_objects.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,9 @@ static bool KeyIsHashable(const LogicalType &type) {
462462
}
463463
case LogicalTypeId::STRUCT:
464464
return false;
465+
case LogicalTypeId::SQLNULL:
466+
// A SQLNULL key is always NULL, and Python's None is hashable.
467+
return true;
465468
default:
466469
throw NotImplementedException("Unsupported type: \"%s\"", type.ToString());
467470
}

src/duckdb_py/numpy/array_wrapper.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,25 @@ struct StringConvert {
180180
}
181181
};
182182

183+
struct NullConvert {
184+
template <class DUCKDB_T, class NUMPY_T>
185+
static PyObject *ConvertValue(DUCKDB_T val, NumpyAppendData &append_data) {
186+
// A SQLNULL column contains only NULLs, so ConvertValue is never reached; every row takes NullValue.
187+
(void)val;
188+
(void)append_data;
189+
Py_RETURN_NONE;
190+
}
191+
template <class NUMPY_T, bool PANDAS>
192+
static NUMPY_T NullValue(bool &set_mask) {
193+
if (PANDAS) {
194+
set_mask = false;
195+
Py_RETURN_NONE;
196+
}
197+
set_mask = true;
198+
return nullptr;
199+
}
200+
};
201+
183202
struct BlobConvert {
184203
template <class DUCKDB_T, class NUMPY_T>
185204
static PyObject *ConvertValue(string_t val, NumpyAppendData &append_data) {
@@ -703,6 +722,11 @@ void ArrayWrapper::Append(idx_t current_offset, Vector &input, idx_t source_size
703722
case LogicalTypeId::UUID:
704723
may_have_null = ConvertColumn<hugeint_t, PyObject *, duckdb_py_convert::UUIDConvert>(append_data);
705724
break;
725+
case LogicalTypeId::SQLNULL:
726+
// An all-NULL column (e.g. an untyped NULL literal): emit an object column of None. SQLNULL's physical
727+
// type is INT32, but its data is never read since every row is NULL.
728+
may_have_null = ConvertColumn<int32_t, PyObject *, duckdb_py_convert::NullConvert>(append_data);
729+
break;
706730

707731
default:
708732
throw NotImplementedException("Unsupported type \"%s\"", input.GetType().ToString());

src/duckdb_py/numpy/raw_array_wrapper.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ static idx_t GetNumpyTypeWidth(const LogicalType &type) {
6363
case LogicalTypeId::ARRAY:
6464
case LogicalTypeId::VARIANT:
6565
case LogicalTypeId::GEOMETRY:
66+
case LogicalTypeId::SQLNULL:
6667
return sizeof(PyObject *);
6768
default:
6869
throw NotImplementedException("Unsupported type \"%s\" for DuckDB -> NumPy conversion", type.ToString());
@@ -128,6 +129,7 @@ string RawArrayWrapper::DuckDBToNumpyDtype(const LogicalType &type) {
128129
case LogicalTypeId::ARRAY:
129130
case LogicalTypeId::VARIANT:
130131
case LogicalTypeId::GEOMETRY:
132+
case LogicalTypeId::SQLNULL:
131133
return "object";
132134
case LogicalTypeId::ENUM: {
133135
auto size = EnumType::GetSize(type);

src/duckdb_py/pyrelation.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,17 +1749,22 @@ static void DisplayHTML(const string &html) {
17491749
display_attr(html_object);
17501750
}
17511751

1752-
string DuckDBPyRelation::Explain(ExplainType type) {
1752+
string DuckDBPyRelation::Explain(ExplainType type, const string &format) {
17531753
AssertRelation();
17541754
D_ASSERT(py::gil_check());
17551755
py::gil_scoped_release release;
17561756

1757-
auto explain_format = GetExplainFormat(type);
1757+
// An empty format means "auto": the default format, or HTML when running under Jupyter.
1758+
const bool auto_format = format.empty();
1759+
auto explain_format = auto_format ? GetExplainFormat(type) : ProfilerPrintFormat::FromString(format);
17581760
auto res = rel->Explain(type, explain_format);
17591761
D_ASSERT(res->type == duckdb::QueryResultType::MATERIALIZED_RESULT);
17601762
auto &materialized = res->Cast<MaterializedQueryResult>();
17611763
auto &coll = materialized.Collection();
1762-
if (explain_format != ProfilerPrintFormat::HTML() || !DuckDBPyConnection::IsJupyter()) {
1764+
// Only the implicit Jupyter path renders HTML inline; an explicitly requested format always returns a string.
1765+
const bool jupyter_html =
1766+
auto_format && explain_format == ProfilerPrintFormat::HTML() && DuckDBPyConnection::IsJupyter();
1767+
if (!jupyter_html) {
17631768
string result_;
17641769
for (auto &row : coll.Rows()) {
17651770
// Skip the first column because it just contains 'physical plan'

src/duckdb_py/pyrelation/initialize.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "duckdb_python/pyrelation.hpp"
22
#include "duckdb_python/pyconnection/pyconnection.hpp"
33
#include "duckdb_python/pyresult.hpp"
4+
#include "duckdb_python/pybind11/conversions/explain_enum.hpp"
45
#include "duckdb/parser/qualified_name.hpp"
56
#include "duckdb/main/client_context.hpp"
67
#include "duckdb_python/numpy/numpy_type.hpp"
@@ -262,7 +263,14 @@ static void InitializeSetOperators(py::class_<DuckDBPyRelation> &m) {
262263
static void InitializeMetaQueries(py::class_<DuckDBPyRelation> &m) {
263264
m.def("describe", &DuckDBPyRelation::Describe,
264265
"Gives basic statistics (e.g., min, max) and if NULL exists for each column of the relation.")
265-
.def("explain", &DuckDBPyRelation::Explain, py::arg("type") = "standard");
266+
.def(
267+
"explain",
268+
[](DuckDBPyRelation &self, const py::object &type, const py::object &format) {
269+
// An omitted format (None) maps to "" = auto-select (default, or HTML under Jupyter).
270+
string format_str = format.is_none() ? string() : string(py::str(format));
271+
return self.Explain(ExplainTypeFromPython(type), format_str);
272+
},
273+
py::arg("type") = "standard", py::arg("format") = py::none());
266274
}
267275

268276
void DuckDBPyRelation::Initialize(py::handle &m) {

tests/fast/pandas/test_fetch_nested.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,11 @@ def list_test_cases():
3333
)
3434
]
3535
}),
36+
# An untyped NULL list now has child type SQLNULL (previously it defaulted to INTEGER), so it
37+
# converts to an object array of None rather than a masked integer array.
3638
("SELECT list_value(NULL,NULL,NULL) as a", {
3739
'a': [
38-
np.ma.array(
39-
[0, 0, 0],
40-
mask=[1, 1, 1],
41-
)
40+
np.array([None, None, None], dtype=object)
4241
]
4342
}),
4443
("SELECT list_value() as a", {

tests/fast/test_expression.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,8 +202,8 @@ def test_column_expression_explain(self):
202202
res = rel.explain()
203203
assert "c0" in res
204204
assert "c1" in res
205-
# 'c2' is not in the explain result because it shows NULL instead
206-
assert "NULL" in res
205+
# the physical plan now renders projection column names (c0, c1, c2) rather than literal constant values
206+
assert "c2" in res
207207
res = rel.fetchall()
208208
assert res == [("a", 42, None)]
209209

0 commit comments

Comments
 (0)