|
| 1 | +// SPDX-License-Identifier: Apache-2.0 |
| 2 | +// SPDX-FileCopyrightText: Copyright the Vortex contributors |
| 3 | + |
| 4 | +//! The coordinate building block shared by geometry extension types: the `Struct<x, y, [z], [m]>` |
| 5 | +//! storage, its [`Dimension`], the decoded [`Coordinate`] value, and the readers that decode it. |
| 6 | +//! `z`/`m` are optional, so all four GeoArrow dimensions share one value type — no third-party deps. |
| 7 | +
|
| 8 | +use std::fmt::Display; |
| 9 | +use std::fmt::Formatter; |
| 10 | + |
| 11 | +use vortex_array::ArrayRef; |
| 12 | +use vortex_array::Canonical; |
| 13 | +use vortex_array::ExecutionCtx; |
| 14 | +use vortex_array::arrays::PrimitiveArray; |
| 15 | +use vortex_array::arrays::extension::ExtensionArrayExt; |
| 16 | +use vortex_array::arrays::struct_::StructArrayExt; |
| 17 | +use vortex_array::dtype::DType; |
| 18 | +use vortex_array::dtype::FieldNames; |
| 19 | +use vortex_array::dtype::Nullability; |
| 20 | +use vortex_array::dtype::PType; |
| 21 | +use vortex_array::dtype::StructFields; |
| 22 | +use vortex_array::scalar::Scalar; |
| 23 | +use vortex_error::VortexResult; |
| 24 | +use vortex_error::vortex_bail; |
| 25 | +use vortex_error::vortex_err; |
| 26 | + |
| 27 | +/// Coordinate dimensions, matching GeoArrow. Field order is fixed: x, y, then z before m. |
| 28 | +#[derive(Debug, Clone, Copy, PartialEq, Eq)] |
| 29 | +pub enum Dimension { |
| 30 | + /// 2D: `x`, `y`. |
| 31 | + Xy, |
| 32 | + /// 3D with elevation: `x`, `y`, `z`. |
| 33 | + Xyz, |
| 34 | + /// 3D with a measure: `x`, `y`, `m`. |
| 35 | + Xym, |
| 36 | + /// 4D: `x`, `y`, `z`, `m`. |
| 37 | + Xyzm, |
| 38 | +} |
| 39 | + |
| 40 | +impl Dimension { |
| 41 | + /// The coordinate struct field names for this dimension, in GeoArrow order. |
| 42 | + pub fn field_names(self) -> &'static [&'static str] { |
| 43 | + match self { |
| 44 | + Dimension::Xy => &["x", "y"], |
| 45 | + Dimension::Xyz => &["x", "y", "z"], |
| 46 | + Dimension::Xym => &["x", "y", "m"], |
| 47 | + Dimension::Xyzm => &["x", "y", "z", "m"], |
| 48 | + } |
| 49 | + } |
| 50 | + |
| 51 | + /// Recover the dimension from a coordinate's field names, in GeoArrow order. |
| 52 | + pub fn from_field_names(names: &[&str]) -> VortexResult<Dimension> { |
| 53 | + Ok(match names { |
| 54 | + ["x", "y"] => Dimension::Xy, |
| 55 | + ["x", "y", "z"] => Dimension::Xyz, |
| 56 | + ["x", "y", "m"] => Dimension::Xym, |
| 57 | + ["x", "y", "z", "m"] => Dimension::Xyzm, |
| 58 | + _ => vortex_bail!("not a valid GeoArrow coordinate dimension: {names:?}"), |
| 59 | + }) |
| 60 | + } |
| 61 | +} |
| 62 | + |
| 63 | +/// A decoded coordinate. `z`/`m` are `Some` iff the storage dimension includes them. |
| 64 | +#[derive(Debug, Clone, Copy, PartialEq)] |
| 65 | +pub struct Coordinate { |
| 66 | + /// The x (longitude/easting) ordinate. |
| 67 | + pub x: f64, |
| 68 | + /// The y (latitude/northing) ordinate. |
| 69 | + pub y: f64, |
| 70 | + /// The optional z (elevation) ordinate. |
| 71 | + pub z: Option<f64>, |
| 72 | + /// The optional m (measure) ordinate. |
| 73 | + pub m: Option<f64>, |
| 74 | +} |
| 75 | + |
| 76 | +impl Coordinate { |
| 77 | + /// A 2D coordinate (no `z`/`m`). |
| 78 | + pub fn xy(x: f64, y: f64) -> Self { |
| 79 | + Coordinate { |
| 80 | + x, |
| 81 | + y, |
| 82 | + z: None, |
| 83 | + m: None, |
| 84 | + } |
| 85 | + } |
| 86 | +} |
| 87 | + |
| 88 | +impl Display for Coordinate { |
| 89 | + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { |
| 90 | + write!(f, "POINT({} {})", self.x, self.y) |
| 91 | + } |
| 92 | +} |
| 93 | + |
| 94 | +/// The coordinate storage dtype for a dimension: `Struct<x, y, [z], [m]>` of non-nullable f64. |
| 95 | +pub fn coordinate_dtype(dim: Dimension, nullability: Nullability) -> DType { |
| 96 | + let names = dim.field_names(); |
| 97 | + let fields = std::iter::repeat_n( |
| 98 | + DType::Primitive(PType::F64, Nullability::NonNullable), |
| 99 | + names.len(), |
| 100 | + ) |
| 101 | + .collect::<Vec<_>>(); |
| 102 | + DType::Struct( |
| 103 | + StructFields::new(FieldNames::from(names), fields), |
| 104 | + nullability, |
| 105 | + ) |
| 106 | +} |
| 107 | + |
| 108 | +/// Validate that `dtype` is a coordinate struct of non-nullable `f64` fields, returning its |
| 109 | +/// [`Dimension`]. Any of the four GeoArrow dimensions validates. |
| 110 | +pub fn coordinate_dimension(dtype: &DType) -> VortexResult<Dimension> { |
| 111 | + let DType::Struct(fields, _) = dtype else { |
| 112 | + vortex_bail!("coordinate storage must be a Struct, was {dtype}"); |
| 113 | + }; |
| 114 | + let names: Vec<&str> = fields.names().iter().map(|n| n.as_ref()).collect(); |
| 115 | + for (i, field) in fields.fields().enumerate() { |
| 116 | + if !matches!( |
| 117 | + field, |
| 118 | + DType::Primitive(PType::F64, Nullability::NonNullable) |
| 119 | + ) { |
| 120 | + vortex_bail!( |
| 121 | + "coordinate field {} must be non-nullable f64, was {field}", |
| 122 | + names[i] |
| 123 | + ); |
| 124 | + } |
| 125 | + } |
| 126 | + Dimension::from_field_names(&names) |
| 127 | +} |
| 128 | + |
| 129 | +/// Decode a [`Coordinate`] from a coordinate `Struct<x, y, [z], [m]>` scalar (`z`/`m` read iff |
| 130 | +/// present, so the same decoder serves every dimension). |
| 131 | +pub(crate) fn coordinate_from_struct(scalar: &Scalar) -> VortexResult<Coordinate> { |
| 132 | + let fields = scalar.as_struct(); |
| 133 | + let required = |name: &str| -> VortexResult<f64> { |
| 134 | + f64::try_from( |
| 135 | + &fields |
| 136 | + .field(name) |
| 137 | + .ok_or_else(|| vortex_err!("coordinate missing {name}"))?, |
| 138 | + ) |
| 139 | + }; |
| 140 | + let optional = |name: &str| -> VortexResult<Option<f64>> { |
| 141 | + fields |
| 142 | + .field(name) |
| 143 | + .map(|value| f64::try_from(&value)) |
| 144 | + .transpose() |
| 145 | + }; |
| 146 | + Ok(Coordinate { |
| 147 | + x: required("x")?, |
| 148 | + y: required("y")?, |
| 149 | + z: optional("z")?, |
| 150 | + m: optional("m")?, |
| 151 | + }) |
| 152 | +} |
| 153 | + |
| 154 | +/// Decode a [`Coordinate`] from an extension-typed point scalar (unwrapped to its coordinate |
| 155 | +/// storage) or a bare coordinate `Struct` scalar. The per-row decode used by the distance fns. |
| 156 | +pub fn coordinate_from_scalar(scalar: &Scalar) -> VortexResult<Coordinate> { |
| 157 | + match scalar.dtype().as_extension_opt() { |
| 158 | + Some(_) => coordinate_from_struct(&scalar.as_extension().to_storage_scalar()), |
| 159 | + None => coordinate_from_struct(scalar), |
| 160 | + } |
| 161 | +} |
| 162 | + |
| 163 | +/// Canonicalize a point column once and return its flat `x`/`y` `f64` columns. The bulk counterpart |
| 164 | +/// to [`coordinate_from_scalar`]; distance is planar, so `z`/`m` are ignored. |
| 165 | +pub(crate) fn xy_columns( |
| 166 | + points: &ArrayRef, |
| 167 | + ctx: &mut ExecutionCtx, |
| 168 | +) -> VortexResult<(PrimitiveArray, PrimitiveArray)> { |
| 169 | + let storage = points |
| 170 | + .clone() |
| 171 | + .execute::<Canonical>(ctx)? |
| 172 | + .into_extension() |
| 173 | + .storage_array() |
| 174 | + .clone() |
| 175 | + .execute::<Canonical>(ctx)? |
| 176 | + .into_struct(); |
| 177 | + let xs = storage |
| 178 | + .unmasked_field_by_name("x")? |
| 179 | + .clone() |
| 180 | + .execute::<Canonical>(ctx)? |
| 181 | + .into_primitive(); |
| 182 | + let ys = storage |
| 183 | + .unmasked_field_by_name("y")? |
| 184 | + .clone() |
| 185 | + .execute::<Canonical>(ctx)? |
| 186 | + .into_primitive(); |
| 187 | + Ok((xs, ys)) |
| 188 | +} |
0 commit comments