Skip to content
195 changes: 195 additions & 0 deletions vortex-geo/src/extension/coordinate.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! Coordinate building blocks for geometry extension types: the `Struct<x, y, z?, m?>` storage,
Comment thread
connortsui20 marked this conversation as resolved.
Outdated
//! its [`Dimension`], and the decoded [`Coordinate`] value.
//!
//! The coordinate fields, where `?` marks an optional field, are:
//! - `x` — longitude or easting
//! - `y` — latitude or northing
//! - `z?` — elevation
//! - `m?` — measure: an arbitrary per-point value such as distance along a route or a timestamp
Comment on lines +8 to +12

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we always prefer f64 vs f32?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cause GeoArrow and WKB both fix coordinates as float64.


use std::fmt::Display;
use std::fmt::Formatter;

use vortex_array::ArrayRef;
use vortex_array::ExecutionCtx;
use vortex_array::arrays::ExtensionArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::StructArray;
use vortex_array::arrays::extension::ExtensionArrayExt;
use vortex_array::arrays::struct_::StructArrayExt;
use vortex_array::dtype::DType;
use vortex_array::dtype::Nullability;
use vortex_array::dtype::PType;
use vortex_array::scalar::Scalar;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;
use vortex_error::vortex_ensure;
use vortex_error::vortex_err;

/// Coordinate dimensions, matching GeoArrow. Field order is fixed: `x`, `y`, then `z` before `m`.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum Dimension {
/// 2D: `x`, `y`.
Xy,
/// 3D with elevation: `x`, `y`, `z`.
Xyz,
/// 3D with a measure: `x`, `y`, `m`.
Xym,
/// 4D: `x`, `y`, `z`, `m`.
Xyzm,
}

impl Dimension {
/// Recover the dimension from a coordinate's field names, in GeoArrow order.
pub(crate) fn from_field_names(names: &[&str]) -> VortexResult<Dimension> {
Ok(match names {
["x", "y"] => Dimension::Xy,
["x", "y", "z"] => Dimension::Xyz,
["x", "y", "m"] => Dimension::Xym,
["x", "y", "z", "m"] => Dimension::Xyzm,
_ => vortex_bail!("not a valid GeoArrow coordinate dimension: {names:?}"),
})
}
}

/// A decoded coordinate. `z?`/`m?` are `Some` iff the storage dimension includes them.
///
/// This is the native value produced when unpacking a [`Point`](crate::extension::Point) scalar;
/// the rest of the coordinate machinery is crate-internal.
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct Coordinate {
/// The x (longitude/easting) ordinate.
pub x: f64,
/// The y (latitude/northing) ordinate.
pub y: f64,
/// The optional `z?` (elevation) ordinate.
pub z: Option<f64>,
/// The optional `m?` (measure) ordinate.
pub m: Option<f64>,
}

impl Coordinate {
/// A 2D coordinate (`z?`/`m?` unset).
pub fn xy(x: f64, y: f64) -> Self {
Coordinate {
x,
y,
z: None,
m: None,
}
}
}

impl Display for Coordinate {
fn fmt(&self, fmt: &mut Formatter<'_>) -> std::fmt::Result {
match (self.z, self.m) {
(None, None) => write!(fmt, "POINT({} {})", self.x, self.y),
(Some(z), None) => write!(fmt, "POINT Z ({} {} {})", self.x, self.y, z),
(None, Some(m)) => write!(fmt, "POINT M ({} {} {})", self.x, self.y, m),
(Some(z), Some(m)) => write!(fmt, "POINT ZM ({} {} {} {})", self.x, self.y, z, m),
}
}
}

/// Validate that `dtype` is a coordinate struct of non-nullable `f64` fields, returning its
/// [`Dimension`]. Any of the four GeoArrow dimensions validates.
pub(crate) fn coordinate_dimension(dtype: &DType) -> VortexResult<Dimension> {
let DType::Struct(fields, _) = dtype else {
vortex_bail!("coordinate storage must be a Struct, was {dtype}");
};
let names: Vec<&str> = fields.names().iter().map(|n| n.as_ref()).collect();
for (i, field) in fields.fields().enumerate() {
vortex_ensure!(
matches!(
field,
DType::Primitive(PType::F64, Nullability::NonNullable)
),
"coordinate field {} must be non-nullable f64, was {field}",
names[i]
);
}
Dimension::from_field_names(&names)
}

/// Decode a [`Coordinate`] from a coordinate `Struct<x, y, z?, m?>` scalar (`z?`/`m?` read iff
/// present, so the same decoder serves every dimension).
pub(crate) fn coordinate_from_struct(scalar: &Scalar) -> VortexResult<Coordinate> {
let fields = scalar.as_struct();
let required = |name: &str| -> VortexResult<f64> {
f64::try_from(
&fields
.field(name)
.ok_or_else(|| vortex_err!("coordinate missing {name}"))?,
)
};
let optional = |name: &str| -> VortexResult<Option<f64>> {
fields
.field(name)
.map(|value| f64::try_from(&value))
.transpose()
};
Ok(Coordinate {
x: required("x")?,
y: required("y")?,
z: optional("z")?,
m: optional("m")?,
})
}

/// Decode a [`Coordinate`] from an extension-typed point scalar (unwrapped to its coordinate
/// storage) or a bare coordinate `Struct` scalar. The per-row decode used by the distance fns.
pub(crate) fn coordinate_from_scalar(scalar: &Scalar) -> VortexResult<Coordinate> {
match scalar.as_extension_opt() {
Some(ext_scalar) => coordinate_from_struct(&ext_scalar.to_storage_scalar()),
None => coordinate_from_struct(scalar),
}
}

/// Canonicalize a point column once and return its flat `x`/`y` `f64` columns. The bulk counterpart
/// to [`coordinate_from_scalar`]; distances use only `x`/`y`, so `z?`/`m?` are ignored.
pub(crate) fn xy_columns(
points: &ArrayRef,
ctx: &mut ExecutionCtx,
) -> VortexResult<(PrimitiveArray, PrimitiveArray)> {
let storage = points
.clone()
.execute::<ExtensionArray>(ctx)?
.storage_array()
.clone()
.execute::<StructArray>(ctx)?;
let xs = storage
.unmasked_field_by_name("x")?
Comment on lines +179 to +180

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you are sure, should check the struct is non-nullable.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

at least a debug assert

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I add a vortex_ensure.

@connortsui20 connortsui20 Jun 11, 2026

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe you want to add a parse_storage helper that produces a typed struct where the construct validates the storage array? So instead of calling xy_columns on the storage array you do parse_storage(storage_array, &mut ctx) and then that given you a ParsedCoordinate struct that holds the primitive arrays you want.

Let me know if that makes sense or not! You can look at the turboquant code for inspiration (though not that we are going to delete that / I need to delete that soon)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got a try, can you look at it? @connortsui20

.clone()
.execute::<PrimitiveArray>(ctx)?;
let ys = storage
.unmasked_field_by_name("y")?
.clone()
.execute::<PrimitiveArray>(ctx)?;
Ok((xs, ys))
}

#[cfg(test)]
mod tests {
use super::Coordinate;

/// Display emits WKT, including `z?`/`m?` when present.
#[test]
fn display_is_wkt() {
let coordinate = |z, m| Coordinate {
x: 1.0,
y: 2.0,
z,
m,
};
assert_eq!(coordinate(None, None).to_string(), "POINT(1 2)");
assert_eq!(coordinate(Some(3.0), None).to_string(), "POINT Z (1 2 3)");
assert_eq!(coordinate(None, Some(4.0)).to_string(), "POINT M (1 2 4)");
assert_eq!(
coordinate(Some(3.0), Some(4.0)).to_string(),
"POINT ZM (1 2 3 4)"
);
}
}
3 changes: 3 additions & 0 deletions vortex-geo/src/extension/mod.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

pub(crate) mod coordinate;
mod point;
mod wkb;

use std::fmt::Display;

pub use point::*;
pub use wkb::*;

/// Extension metadata that is common to all the geospatial extension types.
Expand Down
165 changes: 165 additions & 0 deletions vortex-geo/src/extension/point.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! The [`Point`] geometry extension type (`vortex.geo.point`): a location stored columnarly as
//! `Struct<x, y, z?, m?>` of `f64`, tagged with [`GeoMetadata`] (CRS). `z?` is an optional
//! elevation and `m?` an optional measure — an arbitrary per-point value such as distance along a
//! route or a timestamp.

use prost::Message;
use vortex_array::dtype::extension::ExtDType;
use vortex_array::dtype::extension::ExtId;
use vortex_array::dtype::extension::ExtVTable;
use vortex_array::scalar::Scalar;
use vortex_array::scalar::ScalarValue;
use vortex_error::VortexResult;

use super::GeoMetadata;
use super::coordinate::Coordinate;
use super::coordinate::coordinate_dimension;
use super::coordinate::coordinate_from_struct;

/// A single location: `geoarrow.point`, stored as `Struct<x, y, z?, m?>` of `f64`.
#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)]
pub struct Point;

impl ExtVTable for Point {
type Metadata = GeoMetadata;
type NativeValue<'a> = Coordinate;

fn id(&self) -> ExtId {
ExtId::new_static("vortex.geo.point")
}

fn serialize_metadata(&self, metadata: &Self::Metadata) -> VortexResult<Vec<u8>> {
Ok(metadata.encode_to_vec())
}

fn deserialize_metadata(&self, metadata: &[u8]) -> VortexResult<Self::Metadata> {
Ok(GeoMetadata::decode(metadata)?)
}

fn validate_dtype(ext_dtype: &ExtDType<Self>) -> VortexResult<()> {
coordinate_dimension(ext_dtype.storage_dtype()).map(|_| ())
}

fn unpack_native<'a>(
ext_dtype: &'a ExtDType<Self>,
storage_value: &'a ScalarValue,
) -> VortexResult<Coordinate> {
let storage = Scalar::try_new(
ext_dtype.storage_dtype().clone(),
Some(storage_value.clone()),
)?;
coordinate_from_struct(&storage)
}
}

#[cfg(test)]
mod tests {
use vortex_array::IntoArray;
use vortex_array::VortexSessionExecute;
use vortex_array::arrays::ExtensionArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::arrays::StructArray;
use vortex_array::dtype::DType;
use vortex_array::dtype::FieldNames;
use vortex_array::dtype::Nullability;
use vortex_array::dtype::PType;
use vortex_array::dtype::StructFields;
use vortex_array::dtype::extension::ExtDType;
use vortex_array::session::ArraySession;
use vortex_error::VortexResult;
use vortex_session::VortexSession;

use super::Point;
use crate::extension::GeoMetadata;
use crate::extension::coordinate::Coordinate;
use crate::extension::coordinate::Dimension;
use crate::extension::coordinate::coordinate_dimension;
use crate::extension::coordinate::coordinate_from_scalar;

fn geo_meta() -> GeoMetadata {
GeoMetadata {
crs: Some("EPSG:4326".to_string()),
}
}

/// A coordinate storage dtype with the given field names, non-nullable `f64` per field.
fn coordinate_dtype(names: &[&'static str]) -> DType {
let fields = std::iter::repeat_n(
DType::Primitive(PType::F64, Nullability::NonNullable),
names.len(),
)
.collect::<Vec<_>>();
DType::Struct(
StructFields::new(FieldNames::from(names), fields),
Nullability::NonNullable,
)
}

/// `Point` accepts every GeoArrow dimension; the canonical field names round-trip to their
/// dimension, so a `z?`/`m?` swap or a mislabel would be caught.
#[test]
fn point_validates_every_dimension() -> VortexResult<()> {
let cases = [
(Dimension::Xy, ["x", "y"].as_slice()),
(Dimension::Xyz, ["x", "y", "z"].as_slice()),
(Dimension::Xym, ["x", "y", "m"].as_slice()),
(Dimension::Xyzm, ["x", "y", "z", "m"].as_slice()),
];
for (dim, names) in cases {
let storage = coordinate_dtype(names);
assert_eq!(coordinate_dimension(&storage)?, dim);
ExtDType::<Point>::try_new(geo_meta(), storage)?;
}
Ok(())
}

/// Invalid storage is rejected at dtype construction: both non-struct storage and a struct whose
/// fields are not GeoArrow coordinates.
#[test]
fn point_rejects_invalid_storage() -> VortexResult<()> {
let primitive = DType::Primitive(PType::F64, Nullability::NonNullable);
assert!(ExtDType::<Point>::try_new(geo_meta(), primitive).is_err());

let wrong_fields = StructArray::from_fields(&[
("a", PrimitiveArray::from_iter(vec![0.0f64]).into_array()),
("b", PrimitiveArray::from_iter(vec![0.0f64]).into_array()),
])?
.into_array();
assert!(ExtDType::<Point>::try_new(geo_meta(), wrong_fields.dtype().clone()).is_err());
Ok(())
}

/// A `Point` column round-trips through scalar execution back to the original coordinates.
#[test]
fn point_unpacks_coordinates() -> VortexResult<()> {
let session = VortexSession::empty().with::<ArraySession>();
let mut ctx = session.create_execution_ctx();

let storage = StructArray::from_fields(&[
(
"x",
PrimitiveArray::from_iter(vec![1.0f64, -111.7610]).into_array(),
),
(
"y",
PrimitiveArray::from_iter(vec![2.0f64, 34.8697]).into_array(),
),
])?
.into_array();
let dtype = ExtDType::<Point>::try_new(geo_meta(), storage.dtype().clone())?;
let points = ExtensionArray::new(dtype.erased(), storage).into_array();

assert_eq!(
coordinate_from_scalar(&points.execute_scalar(0, &mut ctx)?)?,
Coordinate::xy(1.0, 2.0)
);
assert_eq!(
coordinate_from_scalar(&points.execute_scalar(1, &mut ctx)?)?,
Coordinate::xy(-111.7610, 34.8697)
);
Ok(())
}
}
Loading
Loading