Skip to content

Commit 8f63ab2

Browse files
committed
fix handling of scalars
Signed-off-by: Andrew Duffy <andrew@a10y.dev>
1 parent da6549a commit 8f63ab2

4 files changed

Lines changed: 202 additions & 8 deletions

File tree

vortex-duckdb/cpp/include/duckdb_vx/value.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,32 @@
33

44
#pragma once
55

6+
#include "duckdb_vx/duckdb_diagnostics.h"
7+
8+
DUCKDB_INCLUDES_BEGIN
9+
#include "duckdb.h"
10+
DUCKDB_INCLUDES_END
11+
612
#ifdef __cplusplus /* If compiled as C++, use C ABI */
713
extern "C" {
814
#endif
915

1016
// Create a null value with a reference to a logical type.
1117
duckdb_value duckdb_vx_value_create_null(duckdb_logical_type ty);
1218

19+
/// Creates a GEOMETRY value containing the given WKB bytes and CRS.
20+
///
21+
/// `wkb` points to `len` bytes of well-known-binary geometry data; the bytes are not validated.
22+
/// `crs` must be a NUL-terminated UTF-8 string; pass NULL or an empty string for no CRS.
23+
duckdb_value duckdb_vx_value_create_geometry(const uint8_t *wkb, idx_t len, const char *crs);
24+
25+
/// Extracts the raw WKB bytes from a GEOMETRY value as a duckdb_blob.
26+
///
27+
/// This bypasses the GEOMETRY -> BLOB default cast (which would require the spatial extension to
28+
/// be loaded). The returned `data` pointer must be freed with `duckdb_free`. The caller must
29+
/// ensure `value` is a non-null GEOMETRY value, otherwise behavior is undefined.
30+
duckdb_blob duckdb_vx_value_get_geometry(duckdb_value value);
31+
1332
#ifdef __cplusplus /* End C ABI */
1433
}
1534
#endif

vortex-duckdb/cpp/value.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "duckdb_vx/duckdb_diagnostics.h"
55

66
DUCKDB_INCLUDES_BEGIN
7+
#include "duckdb/common/types/geometry_crs.hpp"
78
#include "duckdb/common/types/value.hpp"
89
DUCKDB_INCLUDES_END
910

@@ -14,3 +15,23 @@ extern "C" duckdb_value duckdb_vx_value_create_null(duckdb_logical_type ty) {
1415
auto value = duckdb::make_uniq<duckdb::Value>(*logical_type);
1516
return reinterpret_cast<duckdb_value>(value.release());
1617
}
18+
19+
extern "C" duckdb_value duckdb_vx_value_create_geometry(const uint8_t *wkb, idx_t len, const char *crs) {
20+
const auto bytes = reinterpret_cast<duckdb::const_data_ptr_t>(wkb);
21+
auto value = (crs == nullptr || *crs == '\0')
22+
? duckdb::Value::GEOMETRY(bytes, len)
23+
: duckdb::Value::GEOMETRY(bytes, len, duckdb::CoordinateReferenceSystem(std::string(crs)));
24+
auto owned = duckdb::make_uniq<duckdb::Value>(std::move(value));
25+
return reinterpret_cast<duckdb_value>(owned.release());
26+
}
27+
28+
extern "C" duckdb_blob duckdb_vx_value_get_geometry(duckdb_value value) {
29+
const auto val = reinterpret_cast<duckdb::Value *>(value);
30+
const auto &str = duckdb::StringValue::Get(*val);
31+
const auto size = str.size();
32+
auto buf = reinterpret_cast<void *>(duckdb_malloc(size));
33+
if (size > 0) {
34+
memcpy(buf, str.c_str(), size);
35+
}
36+
return {buf, size};
37+
}

vortex-duckdb/src/convert/scalar.rs

Lines changed: 110 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ use vortex::scalar::PrimitiveScalar;
4747
use vortex::scalar::Scalar;
4848
use vortex::scalar::ScalarValue;
4949
use vortex::scalar::Utf8Scalar;
50+
use vortex_geo::extension::WellKnownBinary;
5051

5152
use crate::convert::dtype::FromLogicalType;
5253
use crate::duckdb::LogicalType;
@@ -169,9 +170,22 @@ impl ToDuckDBScalar for BinaryScalar<'_> {
169170
}
170171

171172
impl ToDuckDBScalar for ExtScalar<'_> {
172-
/// Converts an extension scalar (primarily temporal types) to a DuckDB value.
173+
/// Converts an extension scalar (temporal types or `WellKnownBinary` geometries) to a DuckDB
174+
/// value.
173175
fn try_to_duckdb_scalar(&self) -> VortexResult<Value> {
174176
let logical_type = LogicalType::try_from(&DType::Extension(self.ext_dtype().clone()))?;
177+
178+
if let Some(wkb) = self.ext_dtype().metadata_opt::<WellKnownBinary>() {
179+
let storage = self.to_storage_scalar();
180+
let binary = storage
181+
.as_binary_opt()
182+
.ok_or_else(|| vortex_err!("WellKnownBinary storage must be a binary scalar"))?;
183+
return Ok(match binary.value() {
184+
Some(bytes) => Value::new_geometry(bytes.as_slice(), wkb.crs.as_deref())?,
185+
None => Value::null(&logical_type),
186+
});
187+
}
188+
175189
let Some(temporal) = self.ext_dtype().metadata_opt::<AnyTemporal>() else {
176190
vortex_bail!("Cannot convert non-temporal extension scalar to duckdb value");
177191
};
@@ -266,7 +280,14 @@ impl<'a> TryFrom<&'a ValueRef> for Scalar {
266280
ExtractedValue::Float(v) => Ok(Scalar::primitive(v, Nullable)),
267281
ExtractedValue::Double(v) => Ok(Scalar::primitive(v, Nullable)),
268282
ExtractedValue::Varchar(s) => Ok(Scalar::utf8(s, Nullable)),
269-
ExtractedValue::Blob(b) => Ok(Scalar::binary(b, Nullable)),
283+
ExtractedValue::Blob(b) => match &dtype {
284+
DType::Binary(_) => Ok(Scalar::binary(b, Nullable)),
285+
DType::Extension(ext) if ext.is::<WellKnownBinary>() => Ok(Scalar::extension_ref(
286+
ext.clone(),
287+
Scalar::binary(b, Nullable),
288+
)),
289+
_ => vortex_bail!("Cannot convert DuckDB blob to Vortex scalar of dtype {dtype}"),
290+
},
270291
ExtractedValue::Date(days) => Ok(Scalar::extension::<Date>(
271292
TimeUnit::Days,
272293
Scalar::try_new(
@@ -350,11 +371,19 @@ impl<'a> TryFrom<&'a ValueRef> for Scalar {
350371

351372
#[cfg(test)]
352373
mod tests {
374+
use rstest::rstest;
375+
use vortex::dtype::DType;
376+
use vortex::dtype::Nullability;
377+
use vortex::dtype::extension::ExtDType;
353378
use vortex::extension::datetime::Timestamp;
354379
use vortex::extension::datetime::TimestampOptions;
355380
use vortex::scalar::Scalar;
381+
use vortex_geo::extension::GeoMetadata;
382+
use vortex_geo::extension::WellKnownBinary;
356383

357384
use crate::convert::ToDuckDBScalar;
385+
use crate::cpp::DUCKDB_TYPE;
386+
use crate::duckdb::Value;
358387

359388
#[test]
360389
fn test_scalar_round_trip() {
@@ -413,4 +442,83 @@ mod tests {
413442
assert_eq!(original_scalar, roundtrip_scalar);
414443
}
415444
}
445+
446+
/// Sample WKB bytes for `POINT(1 2)` little-endian.
447+
fn sample_wkb() -> Vec<u8> {
448+
vec![
449+
0x01, // little-endian
450+
0x01, 0x00, 0x00, 0x00, // type = 1 (Point)
451+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f, // x = 1.0
452+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, // y = 2.0
453+
]
454+
}
455+
456+
fn wkb_scalar(crs: Option<&str>, bytes: &[u8]) -> Scalar {
457+
Scalar::extension::<WellKnownBinary>(
458+
GeoMetadata {
459+
crs: crs.map(str::to_string),
460+
},
461+
Scalar::binary(bytes.to_vec(), Nullability::Nullable),
462+
)
463+
}
464+
465+
#[rstest]
466+
#[case::with_crs(Some("EPSG:4326"))]
467+
#[case::no_crs(None)]
468+
#[case::empty_crs(Some(""))]
469+
fn test_geometry_value_extract_round_trip(#[case] crs: Option<&str>) {
470+
let bytes = sample_wkb();
471+
let value = Value::new_geometry(&bytes, crs).unwrap();
472+
473+
// The constructed value must be a GEOMETRY logical type.
474+
assert_eq!(
475+
value.logical_type().as_type_id(),
476+
DUCKDB_TYPE::DUCKDB_TYPE_GEOMETRY
477+
);
478+
479+
// Extract back: bytes round-trip exactly.
480+
let scalar: Scalar = (&*value).try_into().unwrap();
481+
let ext = scalar.as_extension();
482+
let storage = ext.to_storage_scalar();
483+
let storage_binary = storage.as_binary();
484+
assert_eq!(storage_binary.value().unwrap().as_slice(), bytes.as_slice());
485+
486+
// The extension dtype should be `WellKnownBinary` and CRS should round-trip,
487+
// with the documented quirk that `Some("")` collapses to `None` through DuckDB.
488+
let metadata = ext.ext_dtype().metadata::<WellKnownBinary>();
489+
match crs {
490+
Some("") | None => assert_eq!(metadata.crs, None),
491+
Some(s) => assert_eq!(metadata.crs.as_deref(), Some(s)),
492+
}
493+
}
494+
495+
#[test]
496+
fn test_geometry_to_duckdb_scalar_round_trip() {
497+
let bytes = sample_wkb();
498+
let original = wkb_scalar(Some("EPSG:4326"), &bytes);
499+
500+
let duckdb_value = original.try_to_duckdb_scalar().unwrap();
501+
let roundtrip: Scalar = duckdb_value.try_into().unwrap();
502+
503+
assert_eq!(original, roundtrip);
504+
}
505+
506+
#[test]
507+
fn test_null_geometry_to_duckdb_scalar() {
508+
let dtype = ExtDType::<WellKnownBinary>::try_new(
509+
GeoMetadata {
510+
crs: Some("EPSG:4326".to_string()),
511+
},
512+
DType::Binary(Nullability::Nullable),
513+
)
514+
.unwrap()
515+
.erased();
516+
let original = Scalar::null(DType::Extension(dtype));
517+
518+
let duckdb_value = original.try_to_duckdb_scalar().unwrap();
519+
let roundtrip: Scalar = duckdb_value.try_into().unwrap();
520+
521+
assert!(roundtrip.is_null());
522+
assert_eq!(roundtrip.dtype(), original.dtype());
523+
}
416524
}

vortex-duckdb/src/duckdb/value.rs

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,19 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use std::ffi::CStr;
5+
use std::ffi::CString;
56
use std::fmt::Debug;
67
use std::fmt::Display;
78
use std::fmt::Formatter;
9+
use std::ptr;
810

911
use num_traits::AsPrimitive;
1012
use vortex::buffer::BufferString;
1113
use vortex::buffer::ByteBuffer;
1214
use vortex::dtype::NativeDType;
1315
use vortex::error::VortexError;
1416
use vortex::error::VortexExpect;
17+
use vortex::error::VortexResult;
1518
use vortex::error::vortex_err;
1619
use vortex::error::vortex_panic;
1720

@@ -97,12 +100,15 @@ impl ValueRef {
97100
ExtractedValue::Varchar(string)
98101
}
99102
DUCKDB_TYPE::DUCKDB_TYPE_BLOB => {
100-
let blob = unsafe { cpp::duckdb_get_blob(self.as_ptr()) };
101-
let slice =
102-
unsafe { std::slice::from_raw_parts(blob.data.cast::<u8>(), blob.size.as_()) };
103-
let bytes = ByteBuffer::copy_from(slice);
104-
unsafe { cpp::duckdb_free(blob.data) };
105-
ExtractedValue::Blob(bytes)
103+
ExtractedValue::Blob(unsafe { take_blob(cpp::duckdb_get_blob(self.as_ptr())) })
104+
}
105+
DUCKDB_TYPE::DUCKDB_TYPE_GEOMETRY => {
106+
// GEOMETRY values are WKB blobs in DuckDB; we read the bytes directly via a shim
107+
// that bypasses the GEOMETRY -> BLOB default cast (which requires spatial loaded).
108+
// CRS lives on the logical type and is not part of the extracted bytes.
109+
ExtractedValue::Blob(unsafe {
110+
take_blob(cpp::duckdb_vx_value_get_geometry(self.as_ptr()))
111+
})
106112
}
107113
DUCKDB_TYPE::DUCKDB_TYPE_DATE => {
108114
ExtractedValue::Date(unsafe { cpp::duckdb_get_date(self.as_ptr()).days })
@@ -250,6 +256,24 @@ impl Value {
250256
pub fn new_date(days: i32) -> Self {
251257
unsafe { Self::own(cpp::duckdb_create_date(cpp::duckdb_date { days })) }
252258
}
259+
260+
/// Creates a DuckDB GEOMETRY value from WKB bytes and an optional CRS.
261+
///
262+
/// Pass `None` (or an empty string) for `crs` to create a geometry value with no CRS.
263+
pub fn new_geometry(wkb: &[u8], crs: Option<&str>) -> VortexResult<Self> {
264+
let crs_c = crs
265+
.map(CString::new)
266+
.transpose()
267+
.map_err(|_| vortex_err!("CRS must not contain NUL bytes"))?;
268+
let crs_ptr = crs_c.as_ref().map_or(ptr::null(), |c| c.as_ptr());
269+
Ok(unsafe {
270+
Self::own(cpp::duckdb_vx_value_create_geometry(
271+
wkb.as_ptr(),
272+
wkb.len() as idx_t,
273+
crs_ptr,
274+
))
275+
})
276+
}
253277
}
254278

255279
impl Display for ValueRef {
@@ -267,6 +291,28 @@ impl Display for Value {
267291
}
268292
}
269293

294+
/// Copies the bytes of a `duckdb_blob` into a `ByteBuffer` and frees the underlying allocation.
295+
///
296+
/// # Safety
297+
///
298+
/// `blob` must be a freshly returned value from a DuckDB allocator (`duckdb_malloc` or one of the
299+
/// `duckdb_get_*` accessors that allocate).
300+
unsafe fn take_blob(blob: cpp::duckdb_blob) -> ByteBuffer {
301+
let size: usize = blob.size.as_();
302+
// `slice::from_raw_parts` requires a non-null, aligned pointer even for zero-length slices,
303+
// and `duckdb_malloc(0)` is permitted to return null. Skip the slice construction in that case.
304+
let bytes = if size == 0 {
305+
ByteBuffer::empty()
306+
} else {
307+
let slice = unsafe { std::slice::from_raw_parts(blob.data.cast::<u8>(), size) };
308+
ByteBuffer::copy_from(slice)
309+
};
310+
if !blob.data.is_null() {
311+
unsafe { cpp::duckdb_free(blob.data) };
312+
}
313+
bytes
314+
}
315+
270316
#[inline]
271317
pub fn i128_from_parts(high: i64, low: u64) -> i128 {
272318
((high as i128) << 64) | (low as i128)

0 commit comments

Comments
 (0)