Skip to content

Commit e5de6a7

Browse files
feat(vortex-geo): native Point type with planar ST_Distance
Adds a GeoArrow-style `Point` extension type (Struct<x,y,[z],[m]>, dimension-ready) and the planar `GeoDistance` scalar function between two point columns. Signed-off-by: Nemo Yu <zyu379@wisc.edu>
1 parent 4b6c382 commit e5de6a7

6 files changed

Lines changed: 532 additions & 1 deletion

File tree

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! The coordinate building block shared by geometry extension types: the `Struct<x, y, [z], [m]>`
5+
//! storage, its [`Dimension`], the decoded [`Coordinate`] value, and the readers that decode it.
6+
//! `z`/`m` are optional, so all four GeoArrow dimensions share one value type — no third-party deps.
7+
8+
use std::fmt::Display;
9+
use std::fmt::Formatter;
10+
11+
use vortex_array::ArrayRef;
12+
use vortex_array::Canonical;
13+
use vortex_array::ExecutionCtx;
14+
use vortex_array::arrays::PrimitiveArray;
15+
use vortex_array::arrays::extension::ExtensionArrayExt;
16+
use vortex_array::arrays::struct_::StructArrayExt;
17+
use vortex_array::dtype::DType;
18+
use vortex_array::dtype::FieldNames;
19+
use vortex_array::dtype::Nullability;
20+
use vortex_array::dtype::PType;
21+
use vortex_array::dtype::StructFields;
22+
use vortex_array::scalar::Scalar;
23+
use vortex_error::VortexResult;
24+
use vortex_error::vortex_bail;
25+
use vortex_error::vortex_err;
26+
27+
/// Coordinate dimensions, matching GeoArrow. Field order is fixed: x, y, then z before m.
28+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29+
pub enum Dimension {
30+
/// 2D: `x`, `y`.
31+
Xy,
32+
/// 3D with elevation: `x`, `y`, `z`.
33+
Xyz,
34+
/// 3D with a measure: `x`, `y`, `m`.
35+
Xym,
36+
/// 4D: `x`, `y`, `z`, `m`.
37+
Xyzm,
38+
}
39+
40+
impl Dimension {
41+
/// The coordinate struct field names for this dimension, in GeoArrow order.
42+
pub fn field_names(self) -> &'static [&'static str] {
43+
match self {
44+
Dimension::Xy => &["x", "y"],
45+
Dimension::Xyz => &["x", "y", "z"],
46+
Dimension::Xym => &["x", "y", "m"],
47+
Dimension::Xyzm => &["x", "y", "z", "m"],
48+
}
49+
}
50+
51+
/// Recover the dimension from a coordinate's field names, in GeoArrow order.
52+
pub fn from_field_names(names: &[&str]) -> VortexResult<Dimension> {
53+
Ok(match names {
54+
["x", "y"] => Dimension::Xy,
55+
["x", "y", "z"] => Dimension::Xyz,
56+
["x", "y", "m"] => Dimension::Xym,
57+
["x", "y", "z", "m"] => Dimension::Xyzm,
58+
_ => vortex_bail!("not a valid GeoArrow coordinate dimension: {names:?}"),
59+
})
60+
}
61+
}
62+
63+
/// A decoded coordinate. `z`/`m` are `Some` iff the storage dimension includes them.
64+
#[derive(Debug, Clone, Copy, PartialEq)]
65+
pub struct Coordinate {
66+
/// The x (longitude/easting) ordinate.
67+
pub x: f64,
68+
/// The y (latitude/northing) ordinate.
69+
pub y: f64,
70+
/// The optional z (elevation) ordinate.
71+
pub z: Option<f64>,
72+
/// The optional m (measure) ordinate.
73+
pub m: Option<f64>,
74+
}
75+
76+
impl Coordinate {
77+
/// A 2D coordinate (no `z`/`m`).
78+
pub fn xy(x: f64, y: f64) -> Self {
79+
Coordinate {
80+
x,
81+
y,
82+
z: None,
83+
m: None,
84+
}
85+
}
86+
}
87+
88+
impl Display for Coordinate {
89+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
90+
write!(f, "POINT({} {})", self.x, self.y)
91+
}
92+
}
93+
94+
/// The coordinate storage dtype for a dimension: `Struct<x, y, [z], [m]>` of non-nullable f64.
95+
pub fn coordinate_dtype(dim: Dimension, nullability: Nullability) -> DType {
96+
let names = dim.field_names();
97+
let fields = std::iter::repeat_n(
98+
DType::Primitive(PType::F64, Nullability::NonNullable),
99+
names.len(),
100+
)
101+
.collect::<Vec<_>>();
102+
DType::Struct(
103+
StructFields::new(FieldNames::from(names), fields),
104+
nullability,
105+
)
106+
}
107+
108+
/// Validate that `dtype` is a coordinate struct of non-nullable `f64` fields, returning its
109+
/// [`Dimension`]. Any of the four GeoArrow dimensions validates.
110+
pub fn coordinate_dimension(dtype: &DType) -> VortexResult<Dimension> {
111+
let DType::Struct(fields, _) = dtype else {
112+
vortex_bail!("coordinate storage must be a Struct, was {dtype}");
113+
};
114+
let names: Vec<&str> = fields.names().iter().map(|n| n.as_ref()).collect();
115+
for (i, field) in fields.fields().enumerate() {
116+
if !matches!(
117+
field,
118+
DType::Primitive(PType::F64, Nullability::NonNullable)
119+
) {
120+
vortex_bail!(
121+
"coordinate field {} must be non-nullable f64, was {field}",
122+
names[i]
123+
);
124+
}
125+
}
126+
Dimension::from_field_names(&names)
127+
}
128+
129+
/// Decode a [`Coordinate`] from a coordinate `Struct<x, y, [z], [m]>` scalar (`z`/`m` read iff
130+
/// present, so the same decoder serves every dimension).
131+
pub(crate) fn coordinate_from_struct(scalar: &Scalar) -> VortexResult<Coordinate> {
132+
let fields = scalar.as_struct();
133+
let required = |name: &str| -> VortexResult<f64> {
134+
f64::try_from(
135+
&fields
136+
.field(name)
137+
.ok_or_else(|| vortex_err!("coordinate missing {name}"))?,
138+
)
139+
};
140+
let optional = |name: &str| -> VortexResult<Option<f64>> {
141+
fields
142+
.field(name)
143+
.map(|value| f64::try_from(&value))
144+
.transpose()
145+
};
146+
Ok(Coordinate {
147+
x: required("x")?,
148+
y: required("y")?,
149+
z: optional("z")?,
150+
m: optional("m")?,
151+
})
152+
}
153+
154+
/// Decode a [`Coordinate`] from an extension-typed point scalar (unwrapped to its coordinate
155+
/// storage) or a bare coordinate `Struct` scalar. The per-row decode used by the distance fns.
156+
pub fn coordinate_from_scalar(scalar: &Scalar) -> VortexResult<Coordinate> {
157+
match scalar.dtype().as_extension_opt() {
158+
Some(_) => coordinate_from_struct(&scalar.as_extension().to_storage_scalar()),
159+
None => coordinate_from_struct(scalar),
160+
}
161+
}
162+
163+
/// Canonicalize a point column once and return its flat `x`/`y` `f64` columns. The bulk counterpart
164+
/// to [`coordinate_from_scalar`]; distance is planar, so `z`/`m` are ignored.
165+
pub(crate) fn xy_columns(
166+
points: &ArrayRef,
167+
ctx: &mut ExecutionCtx,
168+
) -> VortexResult<(PrimitiveArray, PrimitiveArray)> {
169+
let storage = points
170+
.clone()
171+
.execute::<Canonical>(ctx)?
172+
.into_extension()
173+
.storage_array()
174+
.clone()
175+
.execute::<Canonical>(ctx)?
176+
.into_struct();
177+
let xs = storage
178+
.unmasked_field_by_name("x")?
179+
.clone()
180+
.execute::<Canonical>(ctx)?
181+
.into_primitive();
182+
let ys = storage
183+
.unmasked_field_by_name("y")?
184+
.clone()
185+
.execute::<Canonical>(ctx)?
186+
.into_primitive();
187+
Ok((xs, ys))
188+
}

vortex-geo/src/extension/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
mod coordinate;
5+
mod point;
46
mod wkb;
57

68
use std::fmt::Display;
79

10+
pub(crate) use coordinate::xy_columns;
11+
pub use coordinate::*;
12+
pub use point::*;
813
pub use wkb::*;
914

1015
/// Extension metadata that is common to all the geospatial extension types.

vortex-geo/src/extension/point.rs

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! The [`Point`] geometry extension type (`vortex.geo.point`): a location stored columnarly as
5+
//! `Struct<x, y, [z], [m]>` of `f64`, tagged with [`GeoMetadata`] (CRS).
6+
7+
use prost::Message;
8+
use vortex_array::dtype::extension::ExtDType;
9+
use vortex_array::dtype::extension::ExtId;
10+
use vortex_array::dtype::extension::ExtVTable;
11+
use vortex_array::scalar::Scalar;
12+
use vortex_array::scalar::ScalarValue;
13+
use vortex_error::VortexResult;
14+
15+
use super::GeoMetadata;
16+
use super::coordinate::Coordinate;
17+
use super::coordinate::coordinate_dimension;
18+
use super::coordinate::coordinate_from_struct;
19+
20+
/// A single location: `geoarrow.point`, stored as `Struct<x, y, [z], [m]>` of `f64`.
21+
#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)]
22+
pub struct Point;
23+
24+
impl ExtVTable for Point {
25+
type Metadata = GeoMetadata;
26+
type NativeValue<'a> = Coordinate;
27+
28+
fn id(&self) -> ExtId {
29+
ExtId::new_static("vortex.geo.point")
30+
}
31+
32+
fn serialize_metadata(&self, metadata: &Self::Metadata) -> VortexResult<Vec<u8>> {
33+
Ok(metadata.encode_to_vec())
34+
}
35+
36+
fn deserialize_metadata(&self, metadata: &[u8]) -> VortexResult<Self::Metadata> {
37+
Ok(GeoMetadata::decode(metadata)?)
38+
}
39+
40+
fn validate_dtype(ext_dtype: &ExtDType<Self>) -> VortexResult<()> {
41+
coordinate_dimension(ext_dtype.storage_dtype()).map(|_| ())
42+
}
43+
44+
fn unpack_native<'a>(
45+
ext_dtype: &'a ExtDType<Self>,
46+
storage_value: &'a ScalarValue,
47+
) -> VortexResult<Coordinate> {
48+
let storage = Scalar::try_new(
49+
ext_dtype.storage_dtype().clone(),
50+
Some(storage_value.clone()),
51+
)?;
52+
coordinate_from_struct(&storage)
53+
}
54+
}
55+
56+
#[cfg(test)]
57+
mod tests {
58+
use vortex_array::IntoArray;
59+
use vortex_array::VortexSessionExecute;
60+
use vortex_array::arrays::ExtensionArray;
61+
use vortex_array::arrays::PrimitiveArray;
62+
use vortex_array::arrays::StructArray;
63+
use vortex_array::dtype::DType;
64+
use vortex_array::dtype::Nullability;
65+
use vortex_array::dtype::PType;
66+
use vortex_array::dtype::extension::ExtDType;
67+
use vortex_array::session::ArraySession;
68+
use vortex_error::VortexResult;
69+
use vortex_session::VortexSession;
70+
71+
use super::Point;
72+
use crate::extension::Coordinate;
73+
use crate::extension::Dimension;
74+
use crate::extension::GeoMetadata;
75+
use crate::extension::coordinate_dimension;
76+
use crate::extension::coordinate_dtype;
77+
use crate::extension::coordinate_from_scalar;
78+
79+
fn geo_meta() -> GeoMetadata {
80+
GeoMetadata {
81+
crs: Some("EPSG:4326".to_string()),
82+
}
83+
}
84+
85+
/// `Point` accepts every GeoArrow dimension; the storage carries the canonical field names and
86+
/// the dimension round-trips, so a z/m swap or a mislabel would be caught.
87+
#[test]
88+
fn point_validates_every_dimension() -> VortexResult<()> {
89+
let cases = [
90+
(Dimension::Xy, ["x", "y"].as_slice()),
91+
(Dimension::Xyz, ["x", "y", "z"].as_slice()),
92+
(Dimension::Xym, ["x", "y", "m"].as_slice()),
93+
(Dimension::Xyzm, ["x", "y", "z", "m"].as_slice()),
94+
];
95+
for (dim, expected_fields) in cases {
96+
let storage = coordinate_dtype(dim, Nullability::NonNullable);
97+
let DType::Struct(fields, _) = &storage else {
98+
unreachable!("coordinate_dtype builds a struct");
99+
};
100+
let names: Vec<&str> = fields.names().iter().map(|n| n.as_ref()).collect();
101+
assert_eq!(names.as_slice(), expected_fields);
102+
assert_eq!(coordinate_dimension(&storage)?, dim);
103+
ExtDType::<Point>::try_new(geo_meta(), storage)?;
104+
}
105+
Ok(())
106+
}
107+
108+
/// Invalid storage is rejected at dtype construction: both non-struct storage and a struct whose
109+
/// fields are not GeoArrow coordinates.
110+
#[test]
111+
fn point_rejects_invalid_storage() -> VortexResult<()> {
112+
let primitive = DType::Primitive(PType::F64, Nullability::NonNullable);
113+
assert!(ExtDType::<Point>::try_new(geo_meta(), primitive).is_err());
114+
115+
let wrong_fields = StructArray::from_fields(&[
116+
("a", PrimitiveArray::from_iter(vec![0.0f64]).into_array()),
117+
("b", PrimitiveArray::from_iter(vec![0.0f64]).into_array()),
118+
])?
119+
.into_array();
120+
assert!(ExtDType::<Point>::try_new(geo_meta(), wrong_fields.dtype().clone()).is_err());
121+
Ok(())
122+
}
123+
124+
/// A `Point` column round-trips through scalar execution back to the original coordinates.
125+
#[test]
126+
fn point_unpacks_coordinates() -> VortexResult<()> {
127+
let session = VortexSession::empty().with::<ArraySession>();
128+
let mut ctx = session.create_execution_ctx();
129+
130+
let storage = StructArray::from_fields(&[
131+
(
132+
"x",
133+
PrimitiveArray::from_iter(vec![1.0f64, -111.7610]).into_array(),
134+
),
135+
(
136+
"y",
137+
PrimitiveArray::from_iter(vec![2.0f64, 34.8697]).into_array(),
138+
),
139+
])?
140+
.into_array();
141+
let dtype = ExtDType::<Point>::try_new(geo_meta(), storage.dtype().clone())?;
142+
let points = ExtensionArray::new(dtype.erased(), storage).into_array();
143+
144+
assert_eq!(
145+
coordinate_from_scalar(&points.execute_scalar(0, &mut ctx)?)?,
146+
Coordinate::xy(1.0, 2.0)
147+
);
148+
assert_eq!(
149+
coordinate_from_scalar(&points.execute_scalar(1, &mut ctx)?)?,
150+
Coordinate::xy(-111.7610, 34.8697)
151+
);
152+
Ok(())
153+
}
154+
}

vortex-geo/src/lib.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,23 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use vortex_array::dtype::session::DTypeSessionExt;
5+
use vortex_array::scalar_fn::session::ScalarFnSessionExt;
56
use vortex_session::VortexSession;
67

8+
use crate::extension::Point;
79
use crate::extension::WellKnownBinary;
10+
use crate::scalar_fn::GeoDistance;
811

912
pub mod extension;
13+
pub mod scalar_fn;
1014
/// Set up a session with support for geospatial extension types, encodings and layouts.
1115
pub fn initialize(session: &VortexSession) {
12-
// register geospatial extension types
16+
// Register the geospatial extension types.
1317
session.dtypes().register(WellKnownBinary);
18+
session.dtypes().register(Point);
19+
20+
// Register the geometry scalar functions.
21+
session.scalar_fns().register(GeoDistance);
1422
}
1523

1624
#[cfg(test)]

0 commit comments

Comments
 (0)