Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sedona.common.geometryObjects;

import java.io.Serializable;
import java.util.Objects;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.geom.Geometry;

/**
* Planar 2D bounding box with min/max X and Y. Always a valid finite bbox; absence of a bbox (e.g.
* bbox of an empty geometry, extent over zero rows) is represented by SQL NULL at the column level
* rather than by an in-band sentinel. This matches PostGIS behavior and leaves {@code xmin > xmax}
* free for a future antimeridian-wraparound semantics on geography bboxes (cf. sedona-db's {@code
* WraparoundInterval}).
Comment thread
jiayuasu marked this conversation as resolved.
*/
public final class Box2D implements Serializable {

private final double xmin;
private final double ymin;
private final double xmax;
private final double ymax;

public Box2D(double xmin, double ymin, double xmax, double ymax) {
this.xmin = xmin;
this.ymin = ymin;
this.xmax = xmax;
this.ymax = ymax;
}

/** Returns the bbox of {@code geometry}, or {@code null} for null/empty geometry. */
public static Box2D fromGeometry(Geometry geometry) {
if (geometry == null || geometry.isEmpty()) {
return null;
}
Envelope env = geometry.getEnvelopeInternal();
return new Box2D(env.getMinX(), env.getMinY(), env.getMaxX(), env.getMaxY());
}

public double getXMin() {
return xmin;
}

public double getYMin() {
return ymin;
}

public double getXMax() {
return xmax;
}

public double getYMax() {
return ymax;
}

/**
* Returns the union of {@code this} and {@code other}. {@code other == null} is treated as a
* no-op, returning {@code this}, so callers can fold over a stream that may include nulls.
*/
public Box2D expandToInclude(Box2D other) {
if (other == null) {
return this;
}
return new Box2D(
Math.min(xmin, other.xmin),
Math.min(ymin, other.ymin),
Math.max(xmax, other.xmax),
Math.max(ymax, other.ymax));
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Box2D)) return false;
Box2D other = (Box2D) o;
return Double.compare(xmin, other.xmin) == 0
&& Double.compare(ymin, other.ymin) == 0
&& Double.compare(xmax, other.xmax) == 0
&& Double.compare(ymax, other.ymax) == 0;
}

@Override
public int hashCode() {
return Objects.hash(xmin, ymin, xmax, ymax);
}

@Override
public String toString() {
return "BOX(" + xmin + " " + ymin + ", " + xmax + " " + ymax + ")";
}
}
7 changes: 6 additions & 1 deletion python/sedona/spark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,12 @@
from sedona.spark.sql.st_constructors import *
from sedona.spark.sql.st_functions import *
from sedona.spark.sql.st_predicates import *
from sedona.spark.sql.types import GeometryType, GeographyType, RasterType
from sedona.spark.sql.types import (
Box2DType,
GeometryType,
GeographyType,
RasterType,
)
from sedona.spark.stac import Client
from sedona.spark.stac.collection_client import CollectionClient
from sedona.spark.stats.clustering.dbscan import dbscan
Expand Down
47 changes: 47 additions & 0 deletions python/sedona/spark/core/geom/box2d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.


class Box2D:
"""Planar 2D bounding box. Always a valid finite bbox; absence of a bbox
is represented by ``None`` (SQL NULL) at the column level rather than by an
in-band sentinel. This matches PostGIS behavior and leaves ``xmin > xmax``
free for a future antimeridian-wraparound semantics on geography bboxes."""

__slots__ = ("xmin", "ymin", "xmax", "ymax")

def __init__(self, xmin: float, ymin: float, xmax: float, ymax: float):
self.xmin = float(xmin)
self.ymin = float(ymin)
self.xmax = float(xmax)
self.ymax = float(ymax)

def __eq__(self, other: object) -> bool:
if not isinstance(other, Box2D):
return NotImplemented
return (
self.xmin == other.xmin
and self.ymin == other.ymin
and self.xmax == other.xmax
and self.ymax == other.ymax
)

def __hash__(self) -> int:
return hash((self.xmin, self.ymin, self.xmax, self.ymax))

def __repr__(self) -> str:
return f"Box2D({self.xmin}, {self.ymin}, {self.xmax}, {self.ymax})"
7 changes: 6 additions & 1 deletion python/sedona/spark/sql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@
from sedona.spark.sql.st_constructors import *
from sedona.spark.sql.st_functions import *
from sedona.spark.sql.st_predicates import *
from sedona.spark.sql.types import GeometryType, GeographyType, RasterType
from sedona.spark.sql.types import (
Box2DType,
GeometryType,
GeographyType,
RasterType,
)

__all__ = (
[
Expand Down
40 changes: 39 additions & 1 deletion python/sedona/spark/sql/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,13 @@
# specific language governing permissions and limitations
# under the License.

from pyspark.sql.types import BinaryType, UserDefinedType
from pyspark.sql.types import (
BinaryType,
DoubleType,
StructField,
StructType,
UserDefinedType,
)

# Only support RasterType when rasterio is installed
try:
Expand All @@ -34,6 +40,7 @@

from sedona.spark.utils import geometry_serde
from sedona.spark.core.geom.geography import Geography
from sedona.spark.core.geom.box2d import Box2D


class GeometryType(UserDefinedType):
Expand Down Expand Up @@ -86,6 +93,37 @@ def scalaUDT(cls):
return "org.apache.spark.sql.sedona_sql.UDT.GeographyUDT"


class Box2DType(UserDefinedType):

@classmethod
def sqlType(cls):
return StructType(
[
StructField("xmin", DoubleType(), nullable=False),
StructField("ymin", DoubleType(), nullable=False),
StructField("xmax", DoubleType(), nullable=False),
StructField("ymax", DoubleType(), nullable=False),
]
)

def serialize(self, obj):
return (obj.xmin, obj.ymin, obj.xmax, obj.ymax)

def deserialize(self, datum):
return Box2D(datum[0], datum[1], datum[2], datum[3])

@classmethod
def module(cls):
return "sedona.spark.sql.types"

def needConversion(self):
return True

@classmethod
def scalaUDT(cls):
return "org.apache.spark.sql.sedona_sql.UDT.Box2DUDT"


class RasterType(UserDefinedType):

@classmethod
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.spark.sql.sedona_sql.UDT

import org.apache.sedona.common.geometryObjects.Box2D
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
import org.apache.spark.sql.types._
import org.json4s.JsonAST.JValue
import org.json4s.JsonDSL._

/**
* UDT for [[Box2D]]. Stored as a Spark struct of four non-nullable doubles (`xmin`, `ymin`,
* `xmax`, `ymax`) so values round-trip natively to Parquet and align with GeoParquet 1.1 bbox
* covering columns.
*/
class Box2DUDT extends UserDefinedType[Box2D] {
Comment thread
jiayuasu marked this conversation as resolved.

override def sqlType: DataType = StructType(
Seq(
StructField("xmin", DoubleType, nullable = false),
StructField("ymin", DoubleType, nullable = false),
StructField("xmax", DoubleType, nullable = false),
StructField("ymax", DoubleType, nullable = false)))

override def pyUDT: String = "sedona.spark.sql.types.Box2DType"
Comment thread
jiayuasu marked this conversation as resolved.

override def userClass: Class[Box2D] = classOf[Box2D]

override def serialize(obj: Box2D): InternalRow = {
val row = new GenericInternalRow(4)
row.setDouble(0, obj.getXMin)
row.setDouble(1, obj.getYMin)
row.setDouble(2, obj.getXMax)
row.setDouble(3, obj.getYMax)
row
}

override def deserialize(datum: Any): Box2D = datum match {
case row: InternalRow =>
new Box2D(row.getDouble(0), row.getDouble(1), row.getDouble(2), row.getDouble(3))
}

override private[sql] def jsonValue: JValue = {
super.jsonValue mapField {
case ("class", _) => "class" -> this.getClass.getName.stripSuffix("$")
case other: Any => other
}
}

override def equals(other: Any): Boolean = other match {
case _: UserDefinedType[_] => other.isInstanceOf[Box2DUDT]
case _ => false
}

override def hashCode(): Int = userClass.hashCode()

override def toString: String = "Box2DUDT"
}

case object Box2DUDT
extends org.apache.spark.sql.sedona_sql.UDT.Box2DUDT
with scala.Serializable {
def apply(): Box2DUDT = new Box2DUDT()
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.spark.sql.sedona_sql.UDT

import org.apache.sedona.common.S2Geography.Geography
import org.apache.sedona.common.geometryObjects.Box2D
import org.apache.spark.sql.types.UDTRegistration
import org.locationtech.jts.geom.Geometry
import org.locationtech.jts.index.SpatialIndex
Expand All @@ -28,6 +29,7 @@ object UdtRegistratorWrapper {
def registerAll(): Unit = {
registerIfNotExists(classOf[Geometry].getName, classOf[GeometryUDT].getName)
registerIfNotExists(classOf[Geography].getName, classOf[GeographyUDT].getName)
registerIfNotExists(classOf[Box2D].getName, classOf[Box2DUDT].getName)
registerIfNotExists(classOf[SpatialIndex].getName, classOf[IndexUDT].getName)
}

Expand Down
Loading
Loading