Skip to content

Commit 4daf73b

Browse files
authored
feat(rust/sedona-functions): Add geography and CRS propagation to ST_Dump (#847)
1 parent 5650acb commit 4daf73b

1 file changed

Lines changed: 89 additions & 33 deletions

File tree

rust/sedona-functions/src/st_dump.rs

Lines changed: 89 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use arrow_array::{
1919
ListArray, StructArray,
2020
};
2121
use arrow_schema::{DataType, Field, Fields};
22-
use datafusion_common::error::Result;
22+
use datafusion_common::{config::ConfigOptions, Result};
2323
use datafusion_expr::{ColumnarValue, Volatility};
2424
use geo_traits::{
2525
GeometryCollectionTrait, GeometryTrait, GeometryType, MultiLineStringTrait, MultiPointTrait,
@@ -29,7 +29,7 @@ use sedona_common::sedona_internal_err;
2929
use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
3030
use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES;
3131
use sedona_schema::{
32-
datatypes::{SedonaType, WKB_GEOMETRY},
32+
datatypes::{SedonaType, WKB_GEOGRAPHY, WKB_GEOMETRY},
3333
matchers::ArgMatcher,
3434
};
3535
use std::{io::Write, sync::Arc};
@@ -40,11 +40,24 @@ use crate::executor::WkbExecutor;
4040
///
4141
/// Native implementation to get all the points of a geometry as MULTIPOINT
4242
pub fn st_dump_udf() -> SedonaScalarUDF {
43-
SedonaScalarUDF::new("st_dump", vec![Arc::new(STDump)], Volatility::Immutable)
43+
SedonaScalarUDF::new(
44+
"st_dump",
45+
vec![
46+
Arc::new(STDump {
47+
matcher: ArgMatcher::new(vec![ArgMatcher::is_geometry()], WKB_GEOMETRY),
48+
}),
49+
Arc::new(STDump {
50+
matcher: ArgMatcher::new(vec![ArgMatcher::is_geography()], WKB_GEOGRAPHY),
51+
}),
52+
],
53+
Volatility::Immutable,
54+
)
4455
}
4556

4657
#[derive(Debug)]
47-
struct STDump;
58+
struct STDump {
59+
matcher: ArgMatcher,
60+
}
4861

4962
// A builder for a list of the structs
5063
struct STDumpBuilder {
@@ -54,10 +67,11 @@ struct STDumpBuilder {
5467
struct_offsets_builder: OffsetBufferBuilder<i32>,
5568
null_builder: NullBufferBuilder,
5669
parent_path: Vec<u32>,
70+
return_type: SedonaType,
5771
}
5872

5973
impl STDumpBuilder {
60-
fn new(num_iter: usize) -> Self {
74+
fn new(num_iter: usize, return_type: SedonaType) -> Self {
6175
let path_array_builder = UInt32Builder::with_capacity(num_iter);
6276
let path_array_offsets_builder = OffsetBufferBuilder::new(num_iter);
6377
let geom_builder =
@@ -72,6 +86,7 @@ impl STDumpBuilder {
7286
struct_offsets_builder,
7387
null_builder,
7488
parent_path: Vec::new(), // Reusable buffer to avoid allocation per row
89+
return_type,
7590
}
7691
}
7792

@@ -163,50 +178,59 @@ impl STDumpBuilder {
163178
self.null_builder.append(false);
164179
}
165180

166-
fn finish(mut self) -> ListArray {
181+
fn finish(mut self) -> Result<ListArray> {
167182
let path_array = Arc::new(self.path_array_builder.finish());
168183
let path_offsets = self.path_array_offsets_builder.finish();
169184
let geom_array = self.geom_builder.finish();
170185

171186
let path_field = Arc::new(Field::new("item", DataType::UInt32, true));
172187
let path_list = ListArray::new(path_field, path_offsets, path_array, None);
173188

174-
let fields = Fields::from(vec![
175-
Field::new(
176-
"path",
177-
DataType::List(Arc::new(Field::new("item", DataType::UInt32, true))),
178-
true,
179-
),
180-
WKB_GEOMETRY.to_storage_field("geom", true).unwrap(),
181-
]);
189+
let SedonaType::Arrow(DataType::List(return_list_field)) = self.return_type else {
190+
return sedona_internal_err!("Unexpected return type in st_dump()");
191+
};
192+
193+
let DataType::Struct(fields) = return_list_field.data_type() else {
194+
return sedona_internal_err!("Unexpected return type");
195+
};
196+
182197
let struct_array = StructArray::try_new(
183198
fields.clone(),
184199
vec![Arc::new(path_list), Arc::new(geom_array)],
185200
None,
186201
)
187202
.unwrap();
203+
188204
let struct_offsets = self.struct_offsets_builder.finish();
189-
let struct_field = Arc::new(Field::new("item", DataType::Struct(fields), true));
190205
let nulls = self.null_builder.finish();
191-
ListArray::new(struct_field, struct_offsets, Arc::new(struct_array), nulls)
206+
Ok(ListArray::new(
207+
return_list_field,
208+
struct_offsets,
209+
Arc::new(struct_array),
210+
nulls,
211+
))
192212
}
193213
}
194214

195215
impl SedonaScalarKernel for STDump {
196216
fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
197-
let matcher = ArgMatcher::new(vec![ArgMatcher::is_geometry()], geometry_dump_type());
198-
matcher.match_args(args)
217+
Ok(self
218+
.matcher
219+
.match_args(args)?
220+
.map(|output_type| geometry_dump_type(&output_type)))
199221
}
200222

201-
fn invoke_batch(
223+
fn invoke_batch_from_args(
202224
&self,
203225
arg_types: &[SedonaType],
204226
args: &[ColumnarValue],
227+
return_type: &SedonaType,
228+
_num_rows: usize,
229+
_config_options: Option<&ConfigOptions>,
205230
) -> Result<ColumnarValue> {
206231
let executor = WkbExecutor::new(arg_types, args);
207232

208-
let mut builder = STDumpBuilder::new(executor.num_iterations());
209-
233+
let mut builder = STDumpBuilder::new(executor.num_iterations(), return_type.clone());
210234
executor.execute_wkb_void(|maybe_wkb| {
211235
if let Some(wkb) = maybe_wkb {
212236
builder.append(&wkb)?;
@@ -217,22 +241,30 @@ impl SedonaScalarKernel for STDump {
217241
Ok(())
218242
})?;
219243

220-
executor.finish(Arc::new(builder.finish()))
244+
executor.finish(Arc::new(builder.finish()?))
245+
}
246+
247+
fn invoke_batch(
248+
&self,
249+
_arg_types: &[SedonaType],
250+
_args: &[ColumnarValue],
251+
) -> Result<ColumnarValue> {
252+
sedona_internal_err!("invoke_batch() should not be called for st_dump()")
221253
}
222254
}
223255

224-
fn geometry_dump_fields() -> Fields {
256+
fn geometry_dump_fields(geo_type: &SedonaType) -> Fields {
225257
let path = Field::new(
226258
"path",
227259
DataType::List(Field::new("item", DataType::UInt32, true).into()),
228260
true,
229261
);
230-
let geom = WKB_GEOMETRY.to_storage_field("geom", true).unwrap();
262+
let geom = geo_type.to_storage_field("geom", true).unwrap();
231263
vec![path, geom].into()
232264
}
233265

234-
fn geometry_dump_type() -> SedonaType {
235-
let fields = geometry_dump_fields();
266+
fn geometry_dump_type(geo_type: &SedonaType) -> SedonaType {
267+
let fields = geometry_dump_fields(geo_type);
236268
let struct_type = DataType::Struct(fields);
237269

238270
SedonaType::Arrow(DataType::List(Field::new("item", struct_type, true).into()))
@@ -243,7 +275,7 @@ mod tests {
243275
use arrow_array::{Array, ArrayRef, ListArray, StructArray, UInt32Array};
244276
use datafusion_expr::ScalarUDF;
245277
use rstest::rstest;
246-
use sedona_schema::datatypes::WKB_VIEW_GEOMETRY;
278+
use sedona_schema::{crs::lnglat, datatypes::Edges};
247279
use sedona_testing::{
248280
compare::assert_array_equal, create::create_array, testers::ScalarUdfTester,
249281
};
@@ -258,7 +290,10 @@ mod tests {
258290
}
259291

260292
#[rstest]
261-
fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType) {
293+
fn udf(
294+
#[values(WKB_GEOMETRY, WKB_GEOGRAPHY, SedonaType::Wkb(Edges::Planar, lnglat()))]
295+
sedona_type: SedonaType,
296+
) {
262297
let tester = ScalarUdfTester::new(st_dump_udf().into(), vec![sedona_type.clone()]);
263298

264299
let input = create_array(
@@ -275,14 +310,26 @@ mod tests {
275310
&sedona_type,
276311
);
277312
let result = tester.invoke_array(input).unwrap();
278-
assert_dump_row(&result, 0, &[(&[], Some("POINT (1 2)"))]);
279-
assert_dump_row(&result, 1, &[(&[], Some("LINESTRING (1 1, 2 2)"))]);
280-
assert_dump_row(&result, 2, &[(&[], Some("POLYGON ((1 1, 2 2, 2 1, 1 1))"))]);
313+
assert_dump_row(&result, 0, &[(&[], Some("POINT (1 2)"))], &sedona_type);
314+
assert_dump_row(
315+
&result,
316+
1,
317+
&[(&[], Some("LINESTRING (1 1, 2 2)"))],
318+
&sedona_type,
319+
);
320+
assert_dump_row(
321+
&result,
322+
2,
323+
&[(&[], Some("POLYGON ((1 1, 2 2, 2 1, 1 1))"))],
324+
&sedona_type,
325+
);
281326
assert_dump_row(
282327
&result,
283328
3,
284329
&[(&[1], Some("POINT (1 1)")), (&[2], Some("POINT (2 2)"))],
330+
&sedona_type,
285331
);
332+
286333
assert_dump_row(
287334
&result,
288335
4,
@@ -291,6 +338,7 @@ mod tests {
291338
(&[2], Some("LINESTRING EMPTY")),
292339
(&[3], Some("LINESTRING (3 3, 4 4)")),
293340
],
341+
&sedona_type,
294342
);
295343
assert_dump_row(
296344
&result,
@@ -300,6 +348,7 @@ mod tests {
300348
(&[2], Some("POLYGON EMPTY")),
301349
(&[3], Some("POLYGON ((3 3, 4 4, 4 3, 3 3)))")),
302350
],
351+
&sedona_type,
303352
);
304353
assert_dump_row(
305354
&result,
@@ -311,6 +360,7 @@ mod tests {
311360
(&[2, 3], Some("LINESTRING (3 3, 4 4)")),
312361
(&[3], Some("LINESTRING (1 1, 2 2)")),
313362
],
363+
&sedona_type,
314364
);
315365
assert_dump_row(
316366
&result,
@@ -322,14 +372,20 @@ mod tests {
322372
(&[2, 1, 3], Some("LINESTRING (3 3, 4 4)")),
323373
(&[2, 2], Some("LINESTRING (1 1, 2 2)")),
324374
],
375+
&sedona_type,
325376
);
326377

327378
let null_input = create_array(&[None], &sedona_type);
328379
let result = tester.invoke_array(null_input).unwrap();
329380
assert_dump_row_null(&result, 0);
330381
}
331382

332-
fn assert_dump_row(result: &ArrayRef, row: usize, expected: &[(&[u32], Option<&str>)]) {
383+
fn assert_dump_row(
384+
result: &ArrayRef,
385+
row: usize,
386+
expected: &[(&[u32], Option<&str>)],
387+
sedona_type: &SedonaType,
388+
) {
333389
let list_array = result
334390
.as_ref()
335391
.as_any()
@@ -377,7 +433,7 @@ mod tests {
377433

378434
let expected_geom_values: Vec<Option<&str>> =
379435
expected.iter().map(|(_, geom)| *geom).collect();
380-
let expected_geom_array = create_array(&expected_geom_values, &WKB_GEOMETRY);
436+
let expected_geom_array = create_array(&expected_geom_values, sedona_type);
381437
assert_array_equal(dumped.column(1), &expected_geom_array);
382438
}
383439

0 commit comments

Comments
 (0)