diff --git a/common/src/main/java/org/apache/sedona/common/S2Geography/Distance.java b/common/src/main/java/org/apache/sedona/common/S2Geography/Distance.java index da529dc9e67..c72e8136d87 100644 --- a/common/src/main/java/org/apache/sedona/common/S2Geography/Distance.java +++ b/common/src/main/java/org/apache/sedona/common/S2Geography/Distance.java @@ -24,6 +24,22 @@ public class Distance { + /** + * Compute distance from a single point to a ShapeIndex using PointTarget. This avoids building a + * ShapeIndex for the point side — only the complex geometry needs an index. + */ + public static double S2_distancePointToIndex(S2Point point, ShapeIndexGeography geo) { + S2ClosestEdgeQuery query = S2ClosestEdgeQuery.builder().build(geo.shapeIndex); + S2ClosestEdgeQuery.PointTarget target = + new S2ClosestEdgeQuery.PointTarget<>(point); + Optional result = query.findClosestEdge(target); + if (!result.isPresent()) { + return Double.POSITIVE_INFINITY; + } + S1ChordAngle chordAngle = (S1ChordAngle) result.get().distance(); + return chordAngle.toAngle().radians(); + } + public double S2_distance(ShapeIndexGeography geo1, ShapeIndexGeography geo2) { S2ShapeIndex index1 = geo1.shapeIndex; S2ShapeIndex index2 = geo2.shapeIndex; diff --git a/common/src/main/java/org/apache/sedona/common/S2Geography/GeographySerializer.java b/common/src/main/java/org/apache/sedona/common/S2Geography/GeographySerializer.java deleted file mode 100644 index bc83c75496a..00000000000 --- a/common/src/main/java/org/apache/sedona/common/S2Geography/GeographySerializer.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.sedona.common.S2Geography; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; - -public class GeographySerializer { - public static byte[] serialize(Geography geography) throws IOException { - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - geography.encodeTagged(outputStream, new EncodeOptions()); - return outputStream.toByteArray(); - } - - public static Geography deserialize(byte[] buffer) throws IOException { - ByteArrayInputStream inputStream = new ByteArrayInputStream(buffer); - return Geography.decodeTagged(inputStream); - } -} diff --git a/common/src/main/java/org/apache/sedona/common/S2Geography/GeographyWKBSerializer.java b/common/src/main/java/org/apache/sedona/common/S2Geography/GeographyWKBSerializer.java new file mode 100644 index 00000000000..073e711d1fe --- /dev/null +++ b/common/src/main/java/org/apache/sedona/common/S2Geography/GeographyWKBSerializer.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.S2Geography; + +import java.io.IOException; +import java.util.Arrays; +import org.locationtech.jts.io.ByteOrderValues; + +/** + * Serializer for Geography objects using WKB as the primary format. Byte layout: [4-byte SRID + * big-endian][WKB payload]. + */ +public class GeographyWKBSerializer { + + /** Header size: 4 bytes SRID. */ + private static final int HEADER_SIZE = 4; + + /** + * Serialize a Geography to bytes using WKB format. + * + * @param geog the Geography to serialize + * @return byte array with format: [SRID 4 bytes big-endian][WKB payload] + */ + public static byte[] serialize(Geography geog) throws IOException { + byte[] wkb; + if (geog instanceof WKBGeography) { + wkb = ((WKBGeography) geog).getWKBBytes(); + } else { + WKBWriter writer = new WKBWriter(2, ByteOrderValues.BIG_ENDIAN, false); + wkb = writer.write(geog); + } + + byte[] result = new byte[HEADER_SIZE + wkb.length]; + int srid = geog.getSRID(); + result[0] = (byte) (srid >> 24); + result[1] = (byte) (srid >> 16); + result[2] = (byte) (srid >> 8); + result[3] = (byte) srid; + System.arraycopy(wkb, 0, result, HEADER_SIZE, wkb.length); + return result; + } + + /** + * Deserialize bytes to a Geography using the WKB format. + * + * @param buffer the byte array to deserialize + * @return the deserialized Geography + */ + public static Geography deserialize(byte[] buffer) throws IOException { + int srid = + ((buffer[0] & 0xFF) << 24) + | ((buffer[1] & 0xFF) << 16) + | ((buffer[2] & 0xFF) << 8) + | (buffer[3] & 0xFF); + byte[] wkb = Arrays.copyOfRange(buffer, HEADER_SIZE, buffer.length); + return WKBGeography.fromWKB(wkb, srid); + } +} diff --git a/common/src/main/java/org/apache/sedona/common/S2Geography/WKBGeography.java b/common/src/main/java/org/apache/sedona/common/S2Geography/WKBGeography.java new file mode 100644 index 00000000000..2b4adec8a23 --- /dev/null +++ b/common/src/main/java/org/apache/sedona/common/S2Geography/WKBGeography.java @@ -0,0 +1,348 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.S2Geography; + +import com.google.common.geometry.S2CellId; +import com.google.common.geometry.S2LatLng; +import com.google.common.geometry.S2Point; +import com.google.common.geometry.S2PointRegion; +import com.google.common.geometry.S2Region; +import com.google.common.geometry.S2Shape; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.List; +import org.locationtech.jts.geom.Geometry; +import org.locationtech.jts.geom.PrecisionModel; +import org.locationtech.jts.io.ParseException; + +/** + * A Geography implementation that stores WKB bytes as the primary representation, with lazy-parsed + * JTS Geometry and S2 Geography caches. This enables zero-parse construction from WKB and deferred + * S2 parsing only when spherical operations are needed. + * + *

Key optimizations (per paleolimbot's review): - dimension() reads WKB type byte directly (no + * S2 parse) - region()/getCellUnionBound() for points read coordinates from WKB (no S2 parse) - + * shape()/numShapes() use WkbS2Shape for simple types (no S2Polygon construction) - ShapeIndex is + * built from WkbS2Shape directly (skips Geography layer) + */ +public class WKBGeography extends Geography { + + /** + * When true, fromWKB() eagerly builds the S2 Geography and ShapeIndex at construction time + * instead of lazily on first access. This eliminates cold-path overhead for predicate-heavy + * workloads (ST_Contains, ST_Intersects) at the cost of slower deserialization for metric-only + * workloads. Set via spark.sedona.geography.eagerShapeIndex or setEagerShapeIndex(). Default + * false. + */ + private static volatile boolean eagerShapeIndex = false; + + public static void setEagerShapeIndex(boolean eager) { + eagerShapeIndex = eager; + } + + public static boolean isEagerShapeIndex() { + return eagerShapeIndex; + } + + private final byte[] wkbBytes; + + // Lazy caches — volatile for thread safety with double-checked locking + private volatile Geometry jtsGeometry; + private volatile Geography s2Geography; + private volatile ShapeIndexGeography shapeIndexGeography; + + private WKBGeography(byte[] wkbBytes, int srid) { + super(GeographyKind.UNINITIALIZED); + this.wkbBytes = wkbBytes; + setSRID(srid); + } + + /** + * Create a WKBGeography from raw WKB bytes. When eagerShapeIndex is false (default), this is + * zero-parse — just wraps the byte array. When eager mode is enabled, this also builds the + * ShapeIndex upfront. + */ + public static WKBGeography fromWKB(byte[] wkb, int srid) { + WKBGeography geog = new WKBGeography(wkb, srid); + if (eagerShapeIndex) { + geog.getShapeIndexGeography(); + } + return geog; + } + + /** Create a WKBGeography from a JTS Geometry by serializing it to WKB. */ + public static WKBGeography fromJTS(Geometry jts) { + org.locationtech.jts.io.WKBWriter writer = + new org.locationtech.jts.io.WKBWriter( + 2, org.locationtech.jts.io.ByteOrderValues.LITTLE_ENDIAN); + byte[] wkb = writer.write(jts); + WKBGeography geog = new WKBGeography(wkb, jts.getSRID()); + geog.jtsGeometry = jts; + return geog; + } + + /** Create a WKBGeography from an existing S2 Geography by converting it to WKB. */ + public static WKBGeography fromS2Geography(Geography s2geog) { + WKBWriter writer = + new WKBWriter(2, org.locationtech.jts.io.ByteOrderValues.LITTLE_ENDIAN, false); + byte[] wkb = writer.write(s2geog); + WKBGeography geog = new WKBGeography(wkb, s2geog.getSRID()); + geog.s2Geography = s2geog; + return geog; + } + + /** Returns the raw WKB bytes. Zero cost. */ + public byte[] getWKBBytes() { + return wkbBytes; + } + + /** Returns a JTS Geometry, lazily parsed from WKB on first access. */ + public Geometry getJTSGeometry() { + Geometry result = jtsGeometry; + if (result == null) { + synchronized (this) { + result = jtsGeometry; + if (result == null) { + try { + org.locationtech.jts.io.WKBReader reader = new org.locationtech.jts.io.WKBReader(); + result = reader.read(wkbBytes); + result.setSRID(getSRID()); + } catch (ParseException e) { + throw new RuntimeException("Failed to parse WKB to JTS Geometry", e); + } + jtsGeometry = result; + } + } + } + return result; + } + + /** + * Returns a ShapeIndexGeography, lazily built on first access. For simple types (Point, + * LineString, Polygon), builds the index directly from WkbS2Shape — no S2 Geography construction + * needed. + */ + public ShapeIndexGeography getShapeIndexGeography() { + ShapeIndexGeography result = shapeIndexGeography; + if (result == null) { + synchronized (this) { + result = shapeIndexGeography; + if (result == null) { + int type = wkbBaseType(); + if (type >= 1 && type <= 3) { + // Point/LineString/Polygon: build ShapeIndex from WkbS2Shape + // Avoids S2Loop/S2Polygon internal index builds + result = new ShapeIndexGeography(); + result.shapeIndex.add(new WkbS2Shape(wkbBytes)); + } else { + // Multi-types and collections fall back to full S2 parse + result = new ShapeIndexGeography(getS2Geography()); + } + shapeIndexGeography = result; + } + } + } + return result; + } + + /** Returns an S2 Geography, lazily parsed from WKB on first access. */ + public Geography getS2Geography() { + Geography result = s2Geography; + if (result == null) { + synchronized (this) { + result = s2Geography; + if (result == null) { + try { + WKBReader reader = new WKBReader(); + result = reader.read(wkbBytes); + result.setSRID(getSRID()); + } catch (ParseException e) { + throw new RuntimeException("Failed to parse WKB to S2 Geography", e); + } + s2Geography = result; + } + } + } + return result; + } + + // ─── WKB-direct optimizations (no S2 parse needed) ───────────────────── + + /** EWKB SRID flag (PostGIS convention). */ + private static final int EWKB_SRID_FLAG = 0x20000000; + /** EWKB Z flag (PostGIS convention). */ + private static final int EWKB_Z_FLAG = 0x80000000; + /** EWKB M flag (PostGIS convention). */ + private static final int EWKB_M_FLAG = 0x40000000; + + /** Read the raw 32-bit WKB type word at offset 1 (after the byte-order byte). */ + private int wkbRawType() { + boolean le = (wkbBytes[0] == 0x01); + if (le) { + return (wkbBytes[1] & 0xFF) + | ((wkbBytes[2] & 0xFF) << 8) + | ((wkbBytes[3] & 0xFF) << 16) + | ((wkbBytes[4] & 0xFF) << 24); + } else { + return ((wkbBytes[1] & 0xFF) << 24) + | ((wkbBytes[2] & 0xFF) << 16) + | ((wkbBytes[3] & 0xFF) << 8) + | (wkbBytes[4] & 0xFF); + } + } + + /** + * Returns the base WKB geometry type (1-7) after stripping EWKB flags and ISO Z/M offsets. Uses + * the same {@code (typeInt & 0xffff) % 1000} pattern as {@link WKBReader}. + */ + private int wkbBaseType() { + return (wkbRawType() & 0xffff) % 1000; + } + + /** Returns true if this WKB has an embedded SRID (EWKB convention). */ + private boolean wkbHasSRID() { + return (wkbRawType() & EWKB_SRID_FLAG) != 0; + } + + /** + * Byte offset where the geometry payload begins (after the 5-byte header, plus 4 bytes if EWKB + * SRID is embedded). + */ + private int wkbPayloadOffset() { + return wkbHasSRID() ? 9 : 5; + } + + /** Throws if the stored WKB uses Z or M dimensions (EWKB flag or ISO type {@code >= 1000}). */ + private void requireXYOnly() { + int raw = wkbRawType(); + boolean ewkbZ = (raw & EWKB_Z_FLAG) != 0; + boolean ewkbM = (raw & EWKB_M_FLAG) != 0; + boolean isoZM = (raw & 0xffff) >= 1000; + if (ewkbZ || ewkbM || isoZM) { + throw new UnsupportedOperationException( + "WKBGeography only supports 2D WKB; got Z/M type: 0x" + Integer.toHexString(raw)); + } + } + + /** Returns true if this WKB represents a single Point (type 1). */ + public boolean isPoint() { + return wkbBaseType() == 1; + } + + /** Extract the S2Point from a Point WKB without full S2 parse. */ + public S2Point extractPoint() { + requireXYOnly(); + boolean le = (wkbBytes[0] == 0x01); + int coordOffset = wkbPayloadOffset(); + ByteBuffer bb = + ByteBuffer.wrap(wkbBytes).order(le ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN); + double lon = bb.getDouble(coordOffset); + double lat = bb.getDouble(coordOffset + 8); + return S2LatLng.fromDegrees(lat, lon).toPoint(); + } + + // ─── Geography abstract method overrides ─────────────────────────────── + + @Override + public int dimension() { + // Read directly from WKB type byte — no S2 parse + int type = wkbBaseType(); + switch (type) { + case 1: + case 4: + return 0; // Point, MultiPoint + case 2: + case 5: + return 1; // LineString, MultiLineString + case 3: + case 6: + return 2; // Polygon, MultiPolygon + default: + return -1; // GeometryCollection or unknown + } + } + + @Override + public int numShapes() { + int type = wkbBaseType(); + if (type >= 1 && type <= 3) return 1; + return getS2Geography().numShapes(); + } + + @Override + public S2Shape shape(int id) { + int type = wkbBaseType(); + if (type >= 1 && type <= 3) { + return new WkbS2Shape(wkbBytes); + } + return getS2Geography().shape(id); + } + + @Override + public S2Region region() { + if (isPoint()) { + return new S2PointRegion(extractPoint()); + } + return getS2Geography().region(); + } + + @Override + public void getCellUnionBound(List cellIds) { + if (isPoint()) { + cellIds.add(S2CellId.fromPoint(extractPoint())); + return; + } + getS2Geography().getCellUnionBound(cellIds); + } + + @Override + public String toString() { + return getS2Geography().toString(); + } + + @Override + public String toString(PrecisionModel precisionModel) { + return getS2Geography().toString(precisionModel); + } + + @Override + public String toText(PrecisionModel precisionModel) { + return getS2Geography().toText(precisionModel); + } + + @Override + public String toEWKT() { + Geography s2 = getS2Geography(); + s2.setSRID(getSRID()); + return s2.toEWKT(); + } + + @Override + public String toEWKT(PrecisionModel precisionModel) { + Geography s2 = getS2Geography(); + s2.setSRID(getSRID()); + return s2.toEWKT(precisionModel); + } + + @Override + public void encode(com.esotericsoftware.kryo.io.UnsafeOutput out, EncodeOptions opts) + throws java.io.IOException { + getS2Geography().encode(out, opts); + } +} diff --git a/common/src/main/java/org/apache/sedona/common/S2Geography/WkbS2Shape.java b/common/src/main/java/org/apache/sedona/common/S2Geography/WkbS2Shape.java new file mode 100644 index 00000000000..d1ed1f80723 --- /dev/null +++ b/common/src/main/java/org/apache/sedona/common/S2Geography/WkbS2Shape.java @@ -0,0 +1,281 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.S2Geography; + +import com.google.common.geometry.S2; +import com.google.common.geometry.S2EdgeUtil; +import com.google.common.geometry.S2LatLng; +import com.google.common.geometry.S2Point; +import com.google.common.geometry.S2Predicates; +import com.google.common.geometry.S2Shape; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +/** + * An S2Shape implementation that reads WKB bytes once, converts all coordinates to S2Points in the + * constructor, and stores them in an array. This avoids constructing S2Loop/S2Polygon objects + * (which each build their own internal S2ShapeIndex), while also avoiding repeated trig calls on + * every getEdge() access. + * + *

Supports Point (type 1), LineString (type 2), and Polygon (type 3). Multi-types and + * collections should fall back to the full S2 Geography parse path. + */ +public class WkbS2Shape implements S2Shape { + + private static final int EWKB_SRID_FLAG = 0x20000000; + private static final int EWKB_Z_FLAG = 0x80000000; + private static final int EWKB_M_FLAG = 0x40000000; + + private final int dim; // S2 dimension: 0=point, 1=line, 2=polygon + private final S2Point[] vertices; // all vertices, pre-converted from WKB + private final int totalEdges; + private final int[] chainStarts; // edge offset for each chain + private final int[] chainLengths; // edge count for each chain + private final int[] vertexOffsets; // index into vertices[] for first vertex of each chain + + // For polygon containsOrigin — computed eagerly at construction for polygons + private final boolean containsOriginValue; + + public WkbS2Shape(byte[] wkb) { + boolean le = (wkb[0] == 0x01); + ByteBuffer buf = + ByteBuffer.wrap(wkb).order(le ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN); + int typeInt = buf.getInt(1); + int wkbType = (typeInt & 0xffff) % 1000; + if ((typeInt & EWKB_Z_FLAG) != 0 + || (typeInt & EWKB_M_FLAG) != 0 + || (typeInt & 0xffff) >= 1000) { + throw new UnsupportedOperationException( + "WkbS2Shape only supports 2D WKB; got Z/M type: 0x" + Integer.toHexString(typeInt)); + } + // Payload begins after the 5-byte header (byte-order + type). EWKB with SRID inserts a 4-byte + // SRID immediately after the type, so coordinates/counts start at offset 9 in that case. + int payloadOffset = ((typeInt & EWKB_SRID_FLAG) != 0) ? 9 : 5; + + switch (wkbType) { + case 1: // Point + { + this.dim = 0; + double lon = buf.getDouble(payloadOffset); + double lat = buf.getDouble(payloadOffset + 8); + S2Point p = S2LatLng.fromDegrees(lat, lon).toPoint(); + this.vertices = new S2Point[] {p}; + this.totalEdges = 1; + this.chainStarts = new int[] {0}; + this.chainLengths = new int[] {1}; + this.vertexOffsets = new int[] {0}; + this.containsOriginValue = false; + break; + } + + case 2: // LineString + { + this.dim = 1; + int numCoords = buf.getInt(payloadOffset); + this.vertices = readVertices(buf, payloadOffset + 4, numCoords); + this.totalEdges = Math.max(0, numCoords - 1); + this.chainStarts = new int[] {0}; + this.chainLengths = new int[] {totalEdges}; + this.vertexOffsets = new int[] {0}; + this.containsOriginValue = false; + break; + } + + case 3: // Polygon + { + this.dim = 2; + int numRings = buf.getInt(payloadOffset); + this.chainStarts = new int[numRings]; + this.chainLengths = new int[numRings]; + this.vertexOffsets = new int[numRings]; + + // First pass: count total vertices and compute offsets + int totalVerts = 0; + int edgeCount = 0; + int byteOffset = payloadOffset + 4; + int[] ringCoordCounts = new int[numRings]; + int[] ringByteOffsets = new int[numRings]; + for (int r = 0; r < numRings; r++) { + int ringCoords = buf.getInt(byteOffset); + ringCoordCounts[r] = ringCoords; + ringByteOffsets[r] = byteOffset + 4; + byteOffset += 4 + ringCoords * 16; + + int ringEdges = Math.max(0, ringCoords - 1); + chainStarts[r] = edgeCount; + chainLengths[r] = ringEdges; + vertexOffsets[r] = totalVerts; + edgeCount += ringEdges; + totalVerts += ringCoords; + } + this.totalEdges = edgeCount; + + // Second pass: read all vertices at once + this.vertices = new S2Point[totalVerts]; + int vi = 0; + for (int r = 0; r < numRings; r++) { + S2Point[] ringVerts = readVertices(buf, ringByteOffsets[r], ringCoordCounts[r]); + System.arraycopy(ringVerts, 0, vertices, vi, ringVerts.length); + vi += ringVerts.length; + } + + // Eagerly compute containsOrigin from first ring + this.containsOriginValue = computeContainsOrigin(); + break; + } + + default: + throw new IllegalArgumentException( + "WkbS2Shape only supports Point(1), LineString(2), Polygon(3). Got type: " + wkbType); + } + } + + @Override + public int numEdges() { + return totalEdges; + } + + @Override + public void getEdge(int edgeId, MutableEdge result) { + if (dim == 0) { + // Point: degenerate edge + result.a = vertices[0]; + result.b = vertices[0]; + return; + } + // Find chain + int chainId = findChain(edgeId); + int offset = edgeId - chainStarts[chainId]; + int vi = vertexOffsets[chainId] + offset; + result.a = vertices[vi]; + result.b = vertices[vi + 1]; + } + + @Override + public boolean hasInterior() { + return dim == 2; + } + + @Override + public boolean containsOrigin() { + return containsOriginValue; + } + + @Override + public int numChains() { + return chainStarts.length; + } + + @Override + public int getChainStart(int chainId) { + return chainStarts[chainId]; + } + + @Override + public int getChainLength(int chainId) { + return chainLengths[chainId]; + } + + @Override + public void getChainEdge(int chainId, int offset, MutableEdge result) { + if (dim == 0) { + result.a = vertices[0]; + result.b = vertices[0]; + return; + } + int vi = vertexOffsets[chainId] + offset; + result.a = vertices[vi]; + result.b = vertices[vi + 1]; + } + + @Override + public void getChainPosition(int edgeId, ChainPosition result) { + int chainId = findChain(edgeId); + result.set(chainId, edgeId - chainStarts[chainId]); + } + + @Override + public S2Point getChainVertex(int chainId, int edgeOffset) { + return vertices[vertexOffsets[chainId] + edgeOffset]; + } + + @Override + public int dimension() { + return dim; + } + + // ─── Internal helpers ────────────────────────────────────────────────── + + private int findChain(int edgeId) { + for (int i = chainStarts.length - 1; i >= 0; i--) { + if (edgeId >= chainStarts[i]) return i; + } + return 0; + } + + /** Read numCoords (lon, lat) doubles from WKB and convert to S2Points. */ + private static S2Point[] readVertices(ByteBuffer buf, int byteOffset, int numCoords) { + S2Point[] pts = new S2Point[numCoords]; + for (int i = 0; i < numCoords; i++) { + double lon = buf.getDouble(byteOffset); + double lat = buf.getDouble(byteOffset + 8); + pts[i] = S2LatLng.fromDegrees(lat, lon).toPoint(); + byteOffset += 16; + } + return pts; + } + + /** + * Compute containsOrigin for polygon outer ring using direct edge-crossing test against + * S2.origin(). Same algorithm as S2Loop.initOriginAndBound() but without constructing an S2Loop + * (which builds its own internal S2ShapeIndex). + */ + private boolean computeContainsOrigin() { + int start = vertexOffsets[0]; + int numVerts = chainLengths[0]; // edges = verts - 1 for closed ring, but we use edge count + + if (numVerts < 3) return false; + + // Same logic as S2Loop.initOriginAndBound(): + // 1. Guess originInside = false + // 2. Check if vertex(1) is inside via angle test + // 3. Check if contains(vertex(1)) matches — if not, flip originInside + S2Point v0 = vertices[start]; + S2Point v1 = vertices[start + 1]; + S2Point v2 = vertices[start + 2]; + + boolean v1Inside = + !v0.equalsPoint(v1) && !v2.equalsPoint(v1) && S2Predicates.angleContainsVertex(v0, v1, v2); + + // Brute force contains(vertex(1)) with originInside = false + boolean originInside = false; + S2Point origin = S2.origin(); + S2EdgeUtil.EdgeCrosser crosser = new S2EdgeUtil.EdgeCrosser(origin, v1, v0); + boolean inside = originInside; + for (int i = 1; i <= numVerts; i++) { + S2Point next = vertices[start + (i % numVerts)]; + inside ^= crosser.edgeOrVertexCrossing(next); + } + + if (v1Inside != inside) { + originInside = true; + } + return originInside; + } +} diff --git a/common/src/main/java/org/apache/sedona/common/geography/Constructors.java b/common/src/main/java/org/apache/sedona/common/geography/Constructors.java index 47ee74ef44b..ced16badd24 100644 --- a/common/src/main/java/org/apache/sedona/common/geography/Constructors.java +++ b/common/src/main/java/org/apache/sedona/common/geography/Constructors.java @@ -23,7 +23,6 @@ import java.util.*; import org.apache.sedona.common.S2Geography.*; import org.apache.sedona.common.S2Geography.Geography; -import org.apache.sedona.common.S2Geography.WKBReader; import org.apache.sedona.common.S2Geography.WKTReader; import org.apache.sedona.common.utils.GeoHashDecoder; import org.locationtech.jts.geom.*; @@ -32,19 +31,56 @@ public class Constructors { public static Geography geogFromWKB(byte[] wkb) throws ParseException { - return new WKBReader().read(wkb); + int srid = extractSRIDFromEWKB(wkb); + return WKBGeography.fromWKB(wkb, srid); } public static Geography geogFromWKB(byte[] wkb, int SRID) throws ParseException { - Geography geog = geogFromWKB(wkb); - geog.setSRID(SRID); - return geog; + return WKBGeography.fromWKB(wkb, SRID); + } + + /** + * Extract SRID from EWKB bytes if the SRID flag is set. Returns 0 if no SRID is embedded. + * Supports both little-endian (0x01) and big-endian (0x00) byte order. + */ + private static int extractSRIDFromEWKB(byte[] wkb) { + if (wkb == null || wkb.length < 5) return 0; + boolean littleEndian = (wkb[0] == 0x01); + int typeInt; + if (littleEndian) { + typeInt = + (wkb[1] & 0xFF) + | ((wkb[2] & 0xFF) << 8) + | ((wkb[3] & 0xFF) << 16) + | ((wkb[4] & 0xFF) << 24); + } else { + typeInt = + ((wkb[1] & 0xFF) << 24) + | ((wkb[2] & 0xFF) << 16) + | ((wkb[3] & 0xFF) << 8) + | (wkb[4] & 0xFF); + } + boolean hasSRID = (typeInt & 0x20000000) != 0; + if (!hasSRID || wkb.length < 9) return 0; + if (littleEndian) { + return (wkb[5] & 0xFF) + | ((wkb[6] & 0xFF) << 8) + | ((wkb[7] & 0xFF) << 16) + | ((wkb[8] & 0xFF) << 24); + } else { + return ((wkb[5] & 0xFF) << 24) + | ((wkb[6] & 0xFF) << 16) + | ((wkb[7] & 0xFF) << 8) + | (wkb[8] & 0xFF); + } } public static Geography geogFromWKT(String wkt, int srid) throws ParseException { - Geography geog = new WKTReader().read(wkt); - geog.setSRID(srid); - return geog; + // Use S2Geography WKTReader for proper spherical normalization and error messages, + // then wrap in WKBGeography for WKB-based storage. + Geography s2Geog = new WKTReader().read(wkt); + s2Geog.setSRID(srid); + return WKBGeography.fromS2Geography(s2Geog); } public static Geography geogFromEWKT(String ewkt) throws ParseException { @@ -85,6 +121,10 @@ public static Geography geogFromGeoHash(String geoHash, Integer precision) { } public static Geometry geogToGeometry(Geography geography) { + if (geography == null) return null; + if (geography instanceof WKBGeography) { + return ((WKBGeography) geography).getJTSGeometry(); + } GeometryFactory geometryFactory = new GeometryFactory(new PrecisionModel(), geography.getSRID()); return geogToGeometry(geography, geometryFactory); @@ -92,6 +132,9 @@ public static Geometry geogToGeometry(Geography geography) { public static Geometry geogToGeometry(Geography geography, GeometryFactory geometryFactory) { if (geography == null) return null; + if (geography instanceof WKBGeography) { + return ((WKBGeography) geography).getJTSGeometry(); + } Geography.GeographyKind kind = Geography.GeographyKind.fromKind(geography.getKind()); switch (kind) { case SINGLEPOINT: @@ -266,6 +309,20 @@ private static Geometry collectionToGeom(Geography g, GeometryFactory gf) { } public static Geography geomToGeography(Geometry geom) { + if (geom == null) { + return null; + } + // Build S2 Geography first for proper spherical normalization (e.g., deduplication), + // then wrap in WKBGeography for WKB-based storage. + Geography s2geog = geomToS2Geography(geom); + return WKBGeography.fromS2Geography(s2geog); + } + + /** + * Convert a JTS Geometry to an S2 Geography by building S2 shapes directly. This is used + * internally when S2 objects are needed (e.g., for predicate operations). + */ + static Geography geomToS2Geography(Geometry geom) { if (geom == null) { return null; } @@ -389,7 +446,7 @@ private static Geography geomCollToGeog(GeometryCollection geom) { List features = new ArrayList<>(); for (int i = 0; i < geom.getNumGeometries(); i++) { Geometry g = geom.getGeometryN(i); - Geography sub = geomToGeography(g); + Geography sub = geomToS2Geography(g); if (sub != null) features.add(sub); } return new GeographyCollection(features); diff --git a/common/src/main/java/org/apache/sedona/common/geography/Functions.java b/common/src/main/java/org/apache/sedona/common/geography/Functions.java index 0957e33aca7..e3166fa9e0f 100644 --- a/common/src/main/java/org/apache/sedona/common/geography/Functions.java +++ b/common/src/main/java/org/apache/sedona/common/geography/Functions.java @@ -22,6 +22,8 @@ import java.util.ArrayList; import java.util.List; import org.apache.sedona.common.S2Geography.*; +import org.apache.sedona.common.sphere.Haversine; +import org.locationtech.jts.geom.Geometry; public class Functions { @@ -51,7 +53,6 @@ public static Geography getEnvelope(Geography geography, boolean splitAtAntiMeri Geography envelope; if (splitAtAntiMeridian && rect.lng().isInverted()) { - // Crossing → split into two polygons S2Polygon left = rectToPolygon(lngLo, latLo, 180.0, latHi); S2Polygon right = rectToPolygon(-180.0, latLo, lngHi, latHi); envelope = @@ -63,10 +64,6 @@ public static Geography getEnvelope(Geography geography, boolean splitAtAntiMeri return envelope; } - /** - * Build an S2Polygon rectangle (lng/lat in degrees), CCW ring: (lo,lo) → (hi,lo) → (hi,hi) → - * (lo,hi). - */ private static S2Polygon rectToPolygon(double lngLo, double latLo, double lngHi, double latHi) { ArrayList v = new ArrayList<>(4); v.add(S2LatLng.fromDegrees(latLo, lngLo).toPoint()); @@ -75,14 +72,88 @@ private static S2Polygon rectToPolygon(double lngLo, double latLo, double lngHi, v.add(S2LatLng.fromDegrees(latHi, lngLo).toPoint()); S2Loop loop = new S2Loop(v); - // Optional: normalize for canonical orientation (keeps the smaller-area side) loop.normalize(); return new S2Polygon(loop); } + // ─── Level 1: JTS-only structural operations ───────────────────────────── + + /** Return the number of points in a geography. */ + public static int nPoints(Geography g) { + if (g == null) return 0; + return toJTS(g).getNumPoints(); + } + + // ─── Level 2: JTS + S2 geodesic metrics ────────────────────────────────── + + /** + * Geometry-to-geometry geodesic distance in meters. Uses S2ClosestEdgeQuery for true minimum + * distance between any two points on the geometries (not centroid-to-centroid). Consistent with + * sedona-db's s2_distance implementation. + */ + public static Double distance(Geography g1, Geography g2) { + if (g1 == null || g2 == null) return null; + if (g1 instanceof WKBGeography && g2 instanceof WKBGeography) { + WKBGeography w1 = (WKBGeography) g1; + WKBGeography w2 = (WKBGeography) g2; + // Fast path: point-to-point distance without building ShapeIndex + if (w1.isPoint() && w2.isPoint()) { + S1Angle angle = new S1Angle(w1.extractPoint(), w2.extractPoint()); + return angle.radians() * Haversine.AVG_EARTH_RADIUS; + } + // Fast path: point-to-complex uses PointTarget (avoids building ShapeIndex for point side) + if (w1.isPoint()) { + double radians = Distance.S2_distancePointToIndex(w1.extractPoint(), toShapeIndex(w2)); + return radiansToMeters(radians); + } + if (w2.isPoint()) { + double radians = Distance.S2_distancePointToIndex(w2.extractPoint(), toShapeIndex(w1)); + return radiansToMeters(radians); + } + } + // General path via ShapeIndex + Distance dist = new Distance(); + double radians = dist.S2_distance(toShapeIndex(g1), toShapeIndex(g2)); + return radiansToMeters(radians); + } + + // ─── Level 3: S2 spherical predicates ──────────────────────────────────── + + /** Spherical containment test using S2 boolean operations. */ + public static boolean contains(Geography g1, Geography g2) { + if (g1 == null || g2 == null) return false; + // A point (dimension 0) cannot contain anything + if (g1.dimension() == 0) return false; + + Predicates pred = new Predicates(); + return pred.S2_contains(toShapeIndex(g1), toShapeIndex(g2), s2Options()); + } + /** Return EWKT for geography object */ public static String asEWKT(Geography geography) { return geography.toEWKT(); } + + // ─── Helpers ─────────────────────────────────────────────────────────────── + + private static Geometry toJTS(Geography g) { + if (g instanceof WKBGeography) return ((WKBGeography) g).getJTSGeometry(); + return Constructors.geogToGeometry(g); + } + + private static ShapeIndexGeography toShapeIndex(Geography g) { + if (g instanceof WKBGeography) { + return ((WKBGeography) g).getShapeIndexGeography(); + } + return new ShapeIndexGeography(g); + } + + private static S2BooleanOperation.Options s2Options() { + return new S2BooleanOperation.Options(); + } + + private static double radiansToMeters(double radians) { + return radians * Haversine.AVG_EARTH_RADIUS; + } } diff --git a/common/src/main/java/org/apache/sedona/common/geometrySerde/GeometrySerde.java b/common/src/main/java/org/apache/sedona/common/geometrySerde/GeometrySerde.java index 489efc2b4a8..c3e28a87f9c 100644 --- a/common/src/main/java/org/apache/sedona/common/geometrySerde/GeometrySerde.java +++ b/common/src/main/java/org/apache/sedona/common/geometrySerde/GeometrySerde.java @@ -26,7 +26,7 @@ import java.io.IOException; import java.io.Serializable; import org.apache.sedona.common.S2Geography.Geography; -import org.apache.sedona.common.S2Geography.GeographySerializer; +import org.apache.sedona.common.S2Geography.GeographyWKBSerializer; import org.apache.sedona.common.geometryObjects.Circle; import org.locationtech.jts.geom.Envelope; import org.locationtech.jts.geom.Geometry; @@ -91,7 +91,7 @@ private void writeGeometry(Kryo kryo, Output out, Geometry geometry) { } private void writeGeography(Output out, Geography geography) throws IOException { - byte[] data = GeographySerializer.serialize(geography); + byte[] data = GeographyWKBSerializer.serialize(geography); out.writeInt(data.length); out.write(data, 0, data.length); } @@ -170,7 +170,7 @@ private Geography readGeography(Input input) throws IOException { int length = input.readInt(); byte[] bytes = new byte[length]; input.readBytes(bytes); - Geography geography = GeographySerializer.deserialize(bytes); + Geography geography = GeographyWKBSerializer.deserialize(bytes); return geography; } diff --git a/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java b/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java index aac8247a8de..6787c5ad876 100644 --- a/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java +++ b/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java @@ -18,8 +18,7 @@ */ package org.apache.sedona.common.Geography; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.*; import com.google.common.geometry.S2LatLng; import com.google.common.geometry.S2LatLngRect; @@ -45,25 +44,22 @@ private static void assertLatLng(S2Point p, double expLatDeg, double expLngDeg) assertDegAlmostEqual(ll.lngDegrees(), expLngDeg); } - /** Assert a *single* rectangular envelope polygon has these 4 corners in SW→SE→NE→NW order. */ private static void assertRectLoopVertices( S2Loop loop, double latLo, double lngLo, double latHi, double lngHi) { assertEquals("rect must have 4 vertices", 4, loop.numVertices()); - // SW assertLatLng(loop.vertex(0), latLo, lngLo); - // SE assertLatLng(loop.vertex(1), latLo, lngHi); - // NE assertLatLng(loop.vertex(2), latHi, lngHi); - // NW assertLatLng(loop.vertex(3), latHi, lngLo); } + // ─── Envelope tests (pre-existing) ─────────────────────────────────────── + @Test public void envelope_noSplit_antimeridian() throws Exception { String wkt = "MULTIPOINT ((-179 0), (179 1), (-180 10))"; Geography g = Constructors.geogFromWKT(wkt, 4326); - PolygonGeography env = (PolygonGeography) Functions.getEnvelope(g, /*split*/ false); + PolygonGeography env = (PolygonGeography) Functions.getEnvelope(g, false); S2LatLngRect r = g.region().getRectBound(); assertTrue(r.lng().isInverted()); @@ -73,7 +69,7 @@ public void envelope_noSplit_antimeridian() throws Exception { assertDegAlmostEqual(r.lngHi().degrees(), -179.0); S2Loop loop = env.polygon.getLoops().get(0); - assertRectLoopVertices(loop, /*latLo*/ 0, /*lngLo*/ 179, /*latHi*/ 10, /*lngHi*/ -179); + assertRectLoopVertices(loop, 0, 179, 10, -179); } @Test @@ -89,38 +85,22 @@ public void envelope_netherlands_perVertex() throws Exception { @Test public void envelope_fiji_split_perVertex() throws Exception { - // <-------------------- WESTERN HEMISPHERE | EASTERN HEMISPHERE --------------------> - // - // Longitude: ... -179.8° -180°| 180° 177.3° ... - // ----------------------------------+-------------------------------------------- - // | - // Latitude | - // -16° +------------------------+ +------------------------+ - // | | | | - // | POLYGON 2 | | POLYGON 1 | - // | | | | - // -18.3° +------------------------+ +------------------------+ - // | - // | - // ^ - // | - // Antimeridian - // (The map's seam at 180°) String fiji = "MULTIPOLYGON (" + "((177.285 -18.28799, 180 -18.28799, 180 -16.02088, 177.285 -16.02088, 177.285 -18.28799))," + "((-180 -18.28799, -179.7933 -18.28799, -179.7933 -16.02088, -180 -16.02088, -180 -18.28799))" + ")"; Geography g = Constructors.geogFromWKT(fiji, 4326); - Geography env = Functions.getEnvelope(g, /*split*/ true); + Geography env = Functions.getEnvelope(g, true); String expectedWKT = "MULTIPOLYGON (((177.3 -18.3, 180 -18.3, 180 -16, 177.3 -16, 177.3 -18.3)), " + "((-180 -18.3, -179.8 -18.3, -179.8 -16, -180 -16, -180 -18.3)))"; assertEquals(expectedWKT, env.toString()); - expectedWKT = "POLYGON ((177.3 -18.3, -179.8 -18.3, -179.8 -16, 177.3 -16, 177.3 -18.3))"; - env = Functions.getEnvelope(g, /*split*/ false); - assertEquals(expectedWKT, env.toString()); + String expectedWKT2 = + "POLYGON ((177.3 -18.3, -179.8 -18.3, -179.8 -16, 177.3 -16, 177.3 -18.3))"; + env = Functions.getEnvelope(g, false); + assertEquals(expectedWKT2, env.toString()); } @Test @@ -149,4 +129,62 @@ public void testEnvelopeWKTCompare() throws Exception { String expectedWKT2 = "POLYGON ((-180 53.5, 180 53.5, 180 -90, -180 -90, -180 53.5))"; assertEquals((expectedWKT2), (env.toString())); } + + // ─── Level 1: ST_NPoints ───────────────────────────────────────────────── + + @Test + public void nPoints_linestring() throws ParseException { + Geography g = Constructors.geogFromWKT("LINESTRING (0 0, 1 1, 2 2)", 4326); + assertEquals(3, Functions.nPoints(g)); + } + + @Test + public void nPoints_polygon() throws ParseException { + Geography g = Constructors.geogFromWKT("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", 4326); + assertEquals(5, Functions.nPoints(g)); + } + + // ─── Level 2: ST_Distance ──────────────────────────────────────────────── + + @Test + public void distance_twoPoints() throws ParseException { + Geography g1 = Constructors.geogFromWKT("POINT (0 0)", 4326); + Geography g2 = Constructors.geogFromWKT("POINT (1 1)", 4326); + + Double result = Functions.distance(g1, g2); + assertNotNull(result); + // S2 geometry-to-geometry distance ~157 km (spherical model) + assertTrue("Distance should be ~157 km, got " + result, result > 155000 && result < 160000); + } + + @Test + public void distance_nullHandling() throws ParseException { + Geography g1 = Constructors.geogFromWKT("POINT (0 0)", 4326); + assertNull(Functions.distance(g1, null)); + assertNull(Functions.distance(null, g1)); + assertNull(Functions.distance(null, null)); + } + + // ─── Level 3: ST_Contains ──────────────────────────────────────────────── + + @Test + public void contains_pointInPolygon() throws ParseException { + Geography g1 = Constructors.geogFromWKT("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", 4326); + Geography g2 = Constructors.geogFromWKT("POINT (0.5 0.5)", 4326); + assertTrue(Functions.contains(g1, g2)); + } + + @Test + public void contains_pointOutsidePolygon() throws ParseException { + Geography g1 = Constructors.geogFromWKT("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", 4326); + Geography g2 = Constructors.geogFromWKT("POINT (2 2)", 4326); + assertFalse(Functions.contains(g1, g2)); + } + + @Test + public void contains_nullHandling() throws ParseException { + Geography g1 = Constructors.geogFromWKT("POINT (1 1)", 4326); + assertFalse(Functions.contains(g1, null)); + assertFalse(Functions.contains(null, g1)); + } } diff --git a/common/src/test/java/org/apache/sedona/common/S2Geography/TestHelper.java b/common/src/test/java/org/apache/sedona/common/S2Geography/TestHelper.java index 13768978ab2..fd6fcfda684 100644 --- a/common/src/test/java/org/apache/sedona/common/S2Geography/TestHelper.java +++ b/common/src/test/java/org/apache/sedona/common/S2Geography/TestHelper.java @@ -174,7 +174,7 @@ public static void checkWKBGeography(String wkbHex, String expectedWKT) throws P WKTReader wktReader = new WKTReader(); Geography geoWKT = wktReader.read(expectedWKT); - boolean isEqual = compareTo(geoWKT, geoWKT) == 0; + boolean isEqual = compareTo(geoWKB, geoWKT) == 0; if (!isEqual) { log.debug("geoWKB: {}", geoWKB); log.debug("geoWKT: {}", geoWKT); @@ -183,6 +183,13 @@ public static void checkWKBGeography(String wkbHex, String expectedWKT) throws P } public static int compareTo(Geography geo1, Geography geo2) { + // Empty geometries of the same runtime subtype are treated as equal, even when the WKB and + // WKT readers tag them with different GeographyKind values (e.g. SINGLEPOINT vs POINT for + // an empty POINT). This check must come before the kind-based ordering below. + if (S2_isEmpty(geo1) && S2_isEmpty(geo2) && geo1.getClass() == geo2.getClass()) { + return 0; + } + int compare = geo1.kind.getKind() - geo2.kind.getKind(); if (compare != 0) { return compare; diff --git a/common/src/test/java/org/apache/sedona/common/S2Geography/WKBGeographyTest.java b/common/src/test/java/org/apache/sedona/common/S2Geography/WKBGeographyTest.java new file mode 100644 index 00000000000..dd5ededd87e --- /dev/null +++ b/common/src/test/java/org/apache/sedona/common/S2Geography/WKBGeographyTest.java @@ -0,0 +1,438 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.S2Geography; + +import static org.junit.Assert.*; + +import com.google.common.geometry.S2LatLng; +import com.google.common.geometry.S2Point; +import java.io.IOException; +import org.apache.sedona.common.geography.Constructors; +import org.junit.Test; +import org.locationtech.jts.geom.Coordinate; +import org.locationtech.jts.geom.Geometry; +import org.locationtech.jts.geom.GeometryFactory; +import org.locationtech.jts.geom.Point; +import org.locationtech.jts.io.ByteOrderValues; +import org.locationtech.jts.io.ParseException; + +public class WKBGeographyTest { + + private static final double EPS = 1e-10; + + // ─── WKBGeography creation and lazy parsing ────────────────────────────── + + @Test + public void fromWKB_point_lazyParse() throws ParseException { + // Create WKB for POINT(30 10) using S2 WKBWriter + S2Point s2Pt = S2LatLng.fromDegrees(10.0, 30.0).toPoint(); + Geography s2Geog = new SinglePointGeography(s2Pt); + WKBWriter writer = new WKBWriter(2, ByteOrderValues.BIG_ENDIAN, false); + byte[] wkb = writer.write(s2Geog); + + WKBGeography geog = WKBGeography.fromWKB(wkb, 4326); + assertEquals(4326, geog.getSRID()); + assertSame(wkb, geog.getWKBBytes()); + + // Accessing JTS should parse lazily + Geometry jts = geog.getJTSGeometry(); + assertNotNull(jts); + assertTrue(jts instanceof Point); + assertEquals(30.0, ((Point) jts).getX(), EPS); + assertEquals(10.0, ((Point) jts).getY(), EPS); + assertEquals(4326, jts.getSRID()); + } + + @Test + public void fromJTS_point() { + GeometryFactory gf = new GeometryFactory(); + Point jtsPoint = gf.createPoint(new Coordinate(30.0, 10.0)); + jtsPoint.setSRID(4326); + + WKBGeography geog = WKBGeography.fromJTS(jtsPoint); + assertEquals(4326, geog.getSRID()); + assertNotNull(geog.getWKBBytes()); + + // JTS should be cached from construction + Geometry roundTrip = geog.getJTSGeometry(); + assertSame(jtsPoint, roundTrip); + } + + @Test + public void fromJTS_polygon() throws ParseException { + org.locationtech.jts.io.WKTReader jtsReader = new org.locationtech.jts.io.WKTReader(); + Geometry jts = jtsReader.read("POLYGON ((0 0, 0 10, 10 10, 10 0, 0 0))"); + jts.setSRID(4326); + + WKBGeography geog = WKBGeography.fromJTS(jts); + assertEquals(4326, geog.getSRID()); + + // Round-trip through JTS + Geometry roundTrip = geog.getJTSGeometry(); + assertSame(jts, roundTrip); + } + + @Test + public void fromJTS_linestring() throws ParseException { + org.locationtech.jts.io.WKTReader jtsReader = new org.locationtech.jts.io.WKTReader(); + Geometry jts = jtsReader.read("LINESTRING (1 2, 3 4, 5 6)"); + + WKBGeography geog = WKBGeography.fromJTS(jts); + Geometry roundTrip = geog.getJTSGeometry(); + assertSame(jts, roundTrip); + } + + @Test + public void fromJTS_multiPolygon() throws ParseException { + org.locationtech.jts.io.WKTReader jtsReader = new org.locationtech.jts.io.WKTReader(); + Geometry jts = + jtsReader.read( + "MULTIPOLYGON(((0 0,0 10,10 10,10 0,0 0)),((20 20,20 30,30 30,30 20,20 20)))"); + + WKBGeography geog = WKBGeography.fromJTS(jts); + Geometry roundTrip = geog.getJTSGeometry(); + assertSame(jts, roundTrip); + } + + @Test + public void fromJTS_collection() throws ParseException { + org.locationtech.jts.io.WKTReader jtsReader = new org.locationtech.jts.io.WKTReader(); + Geometry jts = jtsReader.read("GEOMETRYCOLLECTION(POINT(1 2),LINESTRING(3 4,5 6))"); + + WKBGeography geog = WKBGeography.fromJTS(jts); + Geometry roundTrip = geog.getJTSGeometry(); + assertSame(jts, roundTrip); + } + + @Test + public void fromS2Geography_point() { + S2Point s2Pt = S2LatLng.fromDegrees(10.0, 30.0).toPoint(); + Geography s2Geog = new SinglePointGeography(s2Pt); + s2Geog.setSRID(4326); + + WKBGeography geog = WKBGeography.fromS2Geography(s2Geog); + assertEquals(4326, geog.getSRID()); + assertNotNull(geog.getWKBBytes()); + + // S2 should be cached from construction + Geography roundTrip = geog.getS2Geography(); + assertSame(s2Geog, roundTrip); + } + + // ─── Lazy S2 delegation ────────────────────────────────────────────────── + + @Test + public void dimension_triggersS2Parse() throws ParseException { + org.locationtech.jts.io.WKTReader jtsReader = new org.locationtech.jts.io.WKTReader(); + Geometry jts = jtsReader.read("POINT (30 10)"); + WKBGeography geog = WKBGeography.fromJTS(jts); + + // dimension() should work via lazy S2 parse + assertEquals(0, geog.dimension()); // point = 0 + + // linestring = 1 + jts = jtsReader.read("LINESTRING (0 0, 1 1)"); + geog = WKBGeography.fromJTS(jts); + assertEquals(1, geog.dimension()); + + // polygon = 2 + jts = jtsReader.read("POLYGON ((0 0, 0 10, 10 10, 10 0, 0 0))"); + geog = WKBGeography.fromJTS(jts); + assertEquals(2, geog.dimension()); + } + + @Test + public void numShapes_triggersS2Parse() throws ParseException { + org.locationtech.jts.io.WKTReader jtsReader = new org.locationtech.jts.io.WKTReader(); + Geometry jts = jtsReader.read("POINT (30 10)"); + WKBGeography geog = WKBGeography.fromJTS(jts); + assertTrue(geog.numShapes() >= 1); + } + + @Test + public void region_triggersS2Parse() throws ParseException { + org.locationtech.jts.io.WKTReader jtsReader = new org.locationtech.jts.io.WKTReader(); + Geometry jts = jtsReader.read("POINT (30 10)"); + WKBGeography geog = WKBGeography.fromJTS(jts); + assertNotNull(geog.region()); + } + + @Test + public void toString_works() throws ParseException { + org.locationtech.jts.io.WKTReader jtsReader = new org.locationtech.jts.io.WKTReader(); + Geometry jts = jtsReader.read("POINT (1 1)"); + WKBGeography geog = WKBGeography.fromJTS(jts); + assertEquals("POINT (1 1)", geog.toString()); + } + + @Test + public void toEWKT_works() throws ParseException { + org.locationtech.jts.io.WKTReader jtsReader = new org.locationtech.jts.io.WKTReader(); + Geometry jts = jtsReader.read("POINT (1 1)"); + jts.setSRID(4326); + WKBGeography geog = WKBGeography.fromJTS(jts); + assertEquals("SRID=4326; POINT (1 1)", geog.toEWKT()); + } + + // ─── Serializer round-trip ─────────────────────────────────────────────── + + @Test + public void serialize_deserialize_point() throws IOException, ParseException { + org.locationtech.jts.io.WKTReader jtsReader = new org.locationtech.jts.io.WKTReader(); + Geometry jts = jtsReader.read("POINT (30 10)"); + jts.setSRID(4326); + WKBGeography original = WKBGeography.fromJTS(jts); + + byte[] bytes = GeographyWKBSerializer.serialize(original); + + Geography deserialized = GeographyWKBSerializer.deserialize(bytes); + assertTrue(deserialized instanceof WKBGeography); + assertEquals(4326, deserialized.getSRID()); + assertEquals("POINT (30 10)", deserialized.toString()); + } + + @Test + public void serialize_deserialize_polygon_withSRID() throws IOException, ParseException { + org.locationtech.jts.io.WKTReader jtsReader = new org.locationtech.jts.io.WKTReader(); + Geometry jts = jtsReader.read("POLYGON ((0 0, 0 10, 10 10, 10 0, 0 0))"); + jts.setSRID(32632); + WKBGeography original = WKBGeography.fromJTS(jts); + + byte[] bytes = GeographyWKBSerializer.serialize(original); + Geography deserialized = GeographyWKBSerializer.deserialize(bytes); + + assertTrue(deserialized instanceof WKBGeography); + assertEquals(32632, deserialized.getSRID()); + assertEquals(2, deserialized.dimension()); + } + + @Test + public void serialize_deserialize_collection() throws IOException, ParseException { + org.locationtech.jts.io.WKTReader jtsReader = new org.locationtech.jts.io.WKTReader(); + Geometry jts = jtsReader.read("GEOMETRYCOLLECTION(POINT(1 2),LINESTRING(3 4,5 6))"); + WKBGeography original = WKBGeography.fromJTS(jts); + + byte[] bytes = GeographyWKBSerializer.serialize(original); + Geography deserialized = GeographyWKBSerializer.deserialize(bytes); + + assertTrue(deserialized instanceof WKBGeography); + assertEquals( + "GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (3 4, 5 6))", deserialized.toString()); + } + + @Test + public void serialize_deserialize_emptyPoint() throws IOException, ParseException { + org.locationtech.jts.io.WKTReader jtsReader = new org.locationtech.jts.io.WKTReader(); + Geometry jts = jtsReader.read("POINT EMPTY"); + WKBGeography original = WKBGeography.fromJTS(jts); + + byte[] bytes = GeographyWKBSerializer.serialize(original); + Geography deserialized = GeographyWKBSerializer.deserialize(bytes); + assertTrue(deserialized instanceof WKBGeography); + } + + // ─── Serialize S2 Geography via new serializer ──────────────────────────── + + @Test + public void serialize_s2Geography_producesWKBFormat() throws IOException { + S2Point s2Pt = S2LatLng.fromDegrees(10.0, 30.0).toPoint(); + Geography s2Geog = new SinglePointGeography(s2Pt); + s2Geog.setSRID(4326); + + // Serialize S2 Geography (not WKBGeography) via new serializer + byte[] bytes = GeographyWKBSerializer.serialize(s2Geog); + + // Deserialize and verify + Geography deserialized = GeographyWKBSerializer.deserialize(bytes); + assertTrue(deserialized instanceof WKBGeography); + assertEquals(4326, deserialized.getSRID()); + assertEquals("POINT (30 10)", deserialized.toString()); + } + + // ─── SRID preservation ─────────────────────────────────────────────────── + + @Test + public void srid_preservedThroughRoundTrip() throws IOException, ParseException { + org.locationtech.jts.io.WKTReader jtsReader = new org.locationtech.jts.io.WKTReader(); + Geometry jts = jtsReader.read("POINT (1 2)"); + jts.setSRID(32632); + WKBGeography original = WKBGeography.fromJTS(jts); + + byte[] bytes = GeographyWKBSerializer.serialize(original); + Geography deserialized = GeographyWKBSerializer.deserialize(bytes); + assertEquals(32632, deserialized.getSRID()); + } + + @Test + public void srid_zero_default() throws IOException, ParseException { + org.locationtech.jts.io.WKTReader jtsReader = new org.locationtech.jts.io.WKTReader(); + Geometry jts = jtsReader.read("POINT (1 2)"); + WKBGeography original = WKBGeography.fromJTS(jts); + assertEquals(0, original.getSRID()); + + byte[] bytes = GeographyWKBSerializer.serialize(original); + Geography deserialized = GeographyWKBSerializer.deserialize(bytes); + assertEquals(0, deserialized.getSRID()); + } + + // ─── Constructor integration ───────────────────────────────────────────── + + @Test + public void geogFromWKB_returnsWKBGeography() throws ParseException { + // Create WKB bytes for POINT(30 10) + org.locationtech.jts.io.WKTReader jtsReader = new org.locationtech.jts.io.WKTReader(); + Geometry jts = jtsReader.read("POINT (30 10)"); + org.locationtech.jts.io.WKBWriter jtsWkbWriter = new org.locationtech.jts.io.WKBWriter(); + byte[] wkb = jtsWkbWriter.write(jts); + + Geography geog = Constructors.geogFromWKB(wkb, 4326); + assertTrue(geog instanceof WKBGeography); + assertEquals(4326, geog.getSRID()); + assertEquals("POINT (30 10)", geog.toString()); + } + + @Test + public void geogFromWKT_returnsWKBGeography() throws ParseException { + Geography geog = Constructors.geogFromWKT("POINT (1 1)", 4326); + assertTrue(geog instanceof WKBGeography); + assertEquals(4326, geog.getSRID()); + assertEquals("POINT (1 1)", geog.toString()); + } + + @Test + public void geomToGeography_returnsWKBGeography() { + GeometryFactory gf = new GeometryFactory(); + Point jtsPoint = gf.createPoint(new Coordinate(30.0, 10.0)); + jtsPoint.setSRID(4326); + + Geography geog = Constructors.geomToGeography(jtsPoint); + assertTrue(geog instanceof WKBGeography); + assertEquals(4326, geog.getSRID()); + } + + @Test + public void geogToGeometry_fastPath() throws ParseException { + org.locationtech.jts.io.WKTReader jtsReader = new org.locationtech.jts.io.WKTReader(); + Geometry jts = jtsReader.read("POINT (30 10)"); + jts.setSRID(4326); + WKBGeography geog = WKBGeography.fromJTS(jts); + + // geogToGeometry should use getJTSGeometry() fast path + Geometry result = Constructors.geogToGeometry(geog); + assertNotNull(result); + // The result should be the cached JTS object + assertSame(jts, result); + } + + @Test + public void geogFromEWKT_returnsWKBGeography() throws ParseException { + Geography geog = Constructors.geogFromEWKT("SRID=4269; POINT (1 1)"); + assertTrue(geog instanceof WKBGeography); + assertEquals(4269, geog.getSRID()); + assertEquals("SRID=4269; POINT (1 1)", geog.toEWKT()); + } + + // ─── Eager ShapeIndex mode ─────────────────────────────────────────────── + + @Test + public void eagerShapeIndex_prebuildsS2AndIndex() throws ParseException { + boolean original = WKBGeography.isEagerShapeIndex(); + try { + WKBGeography.setEagerShapeIndex(true); + + // Create WKB bytes for a polygon + org.locationtech.jts.io.WKTReader jtsReader = new org.locationtech.jts.io.WKTReader(); + Geometry jts = jtsReader.read("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))"); + org.locationtech.jts.io.WKBWriter jtsWkbWriter = new org.locationtech.jts.io.WKBWriter(); + byte[] wkb = jtsWkbWriter.write(jts); + + // fromWKB should eagerly build ShapeIndex + WKBGeography geog = WKBGeography.fromWKB(wkb, 4326); + + // ShapeIndex should already be built — getShapeIndexGeography() should return cached + ShapeIndexGeography idx = geog.getShapeIndexGeography(); + assertNotNull(idx); + assertTrue(idx.numShapes() >= 1); + + // S2 Geography should also be cached + Geography s2 = geog.getS2Geography(); + assertNotNull(s2); + assertEquals(2, s2.dimension()); // polygon = 2 + } finally { + WKBGeography.setEagerShapeIndex(original); + } + } + + @Test + public void eagerShapeIndex_defaultIsLazy() { + assertFalse(WKBGeography.isEagerShapeIndex()); + } + + // ─── EWKB / ISO Z-M decoding ───────────────────────────────────────────── + + /** Builds a PostGIS-style EWKB Point (little-endian) with the SRID flag set. */ + private static byte[] buildEwkbPointWithSRID(double lon, double lat, int srid) { + java.nio.ByteBuffer buf = java.nio.ByteBuffer.allocate(25); + buf.order(java.nio.ByteOrder.LITTLE_ENDIAN); + buf.put((byte) 0x01); // little endian + buf.putInt(1 | 0x20000000); // POINT with EWKB SRID flag + buf.putInt(srid); // SRID + buf.putDouble(lon); + buf.putDouble(lat); + return buf.array(); + } + + /** Builds an ISO WKB PointZ (little-endian) with type 1001. */ + private static byte[] buildIsoPointZ(double lon, double lat, double z) { + java.nio.ByteBuffer buf = java.nio.ByteBuffer.allocate(29); + buf.order(java.nio.ByteOrder.LITTLE_ENDIAN); + buf.put((byte) 0x01); + buf.putInt(1001); // ISO PointZ + buf.putDouble(lon); + buf.putDouble(lat); + buf.putDouble(z); + return buf.array(); + } + + @Test + public void ewkbPoint_withSRIDFlag_decodesCorrectly() throws ParseException { + byte[] ewkb = buildEwkbPointWithSRID(30.0, 10.0, 4326); + WKBGeography geog = WKBGeography.fromWKB(ewkb, 4326); + + // isPoint() must recognize the base type after stripping the EWKB SRID flag. + assertTrue(geog.isPoint()); + assertEquals(0, geog.dimension()); + + // extractPoint() must skip the 4 SRID bytes; lon/lat should be the original values. + S2Point p = geog.extractPoint(); + S2LatLng ll = new S2LatLng(p); + assertEquals(10.0, ll.latDegrees(), EPS); + assertEquals(30.0, ll.lngDegrees(), EPS); + } + + @Test + public void isoPointZ_throwsUnsupported() { + byte[] wkbZ = buildIsoPointZ(30.0, 10.0, 5.0); + WKBGeography geog = WKBGeography.fromWKB(wkbZ, 0); + // isPoint() is safe — just tests base type — but extractPoint/shape must refuse Z/M. + assertTrue(geog.isPoint()); + assertThrows(UnsupportedOperationException.class, geog::extractPoint); + assertThrows(UnsupportedOperationException.class, () -> new WkbS2Shape(wkbZ)); + } +} diff --git a/common/src/test/java/org/apache/sedona/common/S2Geography/WKBReaderTest.java b/common/src/test/java/org/apache/sedona/common/S2Geography/WKBReaderTest.java index 8a7f6166fd1..4a54b602f8d 100644 --- a/common/src/test/java/org/apache/sedona/common/S2Geography/WKBReaderTest.java +++ b/common/src/test/java/org/apache/sedona/common/S2Geography/WKBReaderTest.java @@ -383,7 +383,8 @@ public void CollectionGeographyTest() throws ParseException { @Test public void EmptyTest() throws ParseException { - TestHelper.checkWKBGeography("0101000000", "POINT EMPTY"); + // PostGIS-compatible WKB for POINT EMPTY uses NaN for both coordinates. + TestHelper.checkWKBGeography("0101000000000000000000F87F000000000000F87F", "POINT EMPTY"); TestHelper.checkWKBGeography("010300000000000000", "POLYGON EMPTY"); TestHelper.checkWKBGeography("010200000000000000", "LINESTRING EMPTY"); } diff --git a/docs/api/sql/geography/Geography-Functions.md b/docs/api/sql/geography/Geography-Functions.md index 22a1bd2b0fe..7648b0c0a1e 100644 --- a/docs/api/sql/geography/Geography-Functions.md +++ b/docs/api/sql/geography/Geography-Functions.md @@ -41,5 +41,8 @@ These functions operate on geography type objects. | Function | Return type | Description | Since | | :--- | :--- | :--- | :--- | -| [ST_AsEWKT](Geography-Functions/ST_AsEWKT.md) | String | Return the Extended Well-Known Text representation of a geography. EWKT is an extended version of WKT which includes the SRID of the geography. The format originated in PostGIS but is supported by ... | v1.8.0 | -| [ST_Envelope](Geography-Functions/ST_Envelope.md) | Geography | This function returns the bounding box (envelope) of A. It's important to note that the bounding box is calculated using a cylindrical topology, not a spherical one. If the envelope crosses the ant... | v1.8.0 | +| [ST_AsEWKT](Geography-Functions/ST_AsEWKT.md) | String | Return the Extended Well-Known Text representation of a geography. | v1.8.0 | +| [ST_Envelope](Geography-Functions/ST_Envelope.md) | Geography | Return the bounding box (envelope) of a geography. Supports anti-meridian splitting. | v1.8.0 | +| [ST_NPoints](Geography-Functions/ST_NPoints.md) | Integer | Return the number of points (vertices) in a geography. | v1.9.0 | +| [ST_Distance](Geography-Functions/ST_Distance.md) | Double | Return the minimum geodesic distance between two geographies in meters. | v1.9.0 | +| [ST_Contains](Geography-Functions/ST_Contains.md) | Boolean | Test whether geography A fully contains geography B. | v1.9.0 | diff --git a/docs/api/sql/geography/Geography-Functions/ST_Contains.md b/docs/api/sql/geography/Geography-Functions/ST_Contains.md new file mode 100644 index 00000000000..b7187c45f96 --- /dev/null +++ b/docs/api/sql/geography/Geography-Functions/ST_Contains.md @@ -0,0 +1,48 @@ + + +# ST_Contains + +Introduction: Tests whether geography A fully contains geography B using S2 spherical boolean operations. Returns true if every point of B is inside or on the boundary of A. + +![ST_Contains returning true](../../../../image/ST_Contains_geography/ST_Contains_geography_true.svg "ST_Contains returning true") +![ST_Contains returning false](../../../../image/ST_Contains_geography/ST_Contains_geography_false.svg "ST_Contains returning false") + +Format: + +`ST_Contains (A: Geography, B: Geography)` + +Return type: `Boolean` + +Since: `v1.9.1` + +SQL Example + +```sql +SELECT ST_Contains( + ST_GeogFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))'), + ST_GeogFromWKT('POINT (0.5 0.5)') +); +``` + +Output: + +``` +true +``` diff --git a/docs/api/sql/geography/Geography-Functions/ST_Distance.md b/docs/api/sql/geography/Geography-Functions/ST_Distance.md new file mode 100644 index 00000000000..e7cd420c88c --- /dev/null +++ b/docs/api/sql/geography/Geography-Functions/ST_Distance.md @@ -0,0 +1,49 @@ + + +# ST_Distance + +Introduction: Returns the minimum geodesic distance between two geography objects in meters. Uses S2 geometry-to-geometry distance (S2ClosestEdgeQuery), which computes the true minimum distance between any two points on the geometries — not centroid-to-centroid. Consistent with sedona-db's implementation. + +![ST_Distance on a sphere: great-circle distance between two points](../../../../image/ST_Distance_geography/ST_Distance_geography.svg "ST_Distance on a sphere: great-circle distance between two points") + +Format: + +`ST_Distance (A: Geography, B: Geography)` + +Return type: `Double` + +Since: `v1.9.1` + +SQL Example + +```sql +SELECT ST_Distance( + ST_GeogFromWKT('POINT (0 0)'), + ST_GeogFromWKT('POINT (1 1)') +); +``` + +Output: + +``` +157249.59847404022 +``` + +The result is approximately 157 km, the great-circle distance between (0,0) and (1,1) on the WGS84 sphere. diff --git a/docs/api/sql/geography/Geography-Functions/ST_NPoints.md b/docs/api/sql/geography/Geography-Functions/ST_NPoints.md new file mode 100644 index 00000000000..86040a7c723 --- /dev/null +++ b/docs/api/sql/geography/Geography-Functions/ST_NPoints.md @@ -0,0 +1,44 @@ + + +# ST_NPoints + +Introduction: Returns the number of points (vertices) in a geography object. + +![ST_NPoints on a global polyline with 4 vertices](../../../../image/ST_NPoints_geography/ST_NPoints_geography.svg "ST_NPoints on a global polyline with 4 vertices") + +Format: + +`ST_NPoints (A: Geography)` + +Return type: `Integer` + +Since: `v1.9.1` + +SQL Example + +```sql +SELECT ST_NPoints(ST_GeogFromWKT('LINESTRING (0 0, 1 1, 2 2)')); +``` + +Output: + +``` +3 +``` diff --git a/docs/image/ST_Contains_geography/ST_Contains_geography_false.svg b/docs/image/ST_Contains_geography/ST_Contains_geography_false.svg new file mode 100644 index 00000000000..1c9cbbf78d3 --- /dev/null +++ b/docs/image/ST_Contains_geography/ST_Contains_geography_false.svg @@ -0,0 +1,35 @@ + + + + + + + ST_Contains(A, D) is FALSE + ST_Contains(A, E) is FALSE + + + + + + + D + + + + E + + + A + + + + Polygon A + + + Point D (On Boundary) + + + Polygon E (Pokes Out) + diff --git a/docs/image/ST_Contains_geography/ST_Contains_geography_true.svg b/docs/image/ST_Contains_geography/ST_Contains_geography_true.svg new file mode 100644 index 00000000000..9d02e1b3cf7 --- /dev/null +++ b/docs/image/ST_Contains_geography/ST_Contains_geography_true.svg @@ -0,0 +1,35 @@ + + + + + + + ST_Contains(A, B) is TRUE + ST_Contains(A, C) is TRUE + + + + + + + C + + + + B + + + A + + + + Polygon A + + + Point B (Interior) + + + Polygon C (Fully Inside) + diff --git a/docs/image/ST_Distance_geography/ST_Distance_geography.svg b/docs/image/ST_Distance_geography/ST_Distance_geography.svg new file mode 100644 index 00000000000..7eaa13ed81b --- /dev/null +++ b/docs/image/ST_Distance_geography/ST_Distance_geography.svg @@ -0,0 +1,62 @@ + + + + + + + How ST_Distance "Sees" the Path + + + + + + + + + + + + + + + + + + + 0 + 2 + 4 + 6 + 8 + 10 + Longitude + + + 0.0 + 0.5 + 1.0 + 1.5 + 2.0 + Latitude + + + + + + + + + + Point A + + Point B + + + + + Planar (Geometry) + + Spherical (Geography): Great-Circle Arc + diff --git a/docs/image/ST_NPoints_geography/ST_NPoints_geography.svg b/docs/image/ST_NPoints_geography/ST_NPoints_geography.svg new file mode 100644 index 00000000000..541301e28e7 --- /dev/null +++ b/docs/image/ST_NPoints_geography/ST_NPoints_geography.svg @@ -0,0 +1,37 @@ + + + + + + + + NPoints = 4 + + + + + + + + + + v1 + + + v2 + + + v3 + + + v4 + + + Counts the vertices of a geography (same convention as ST_NPoints on geometries) + + + ST_NPoints + diff --git a/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java b/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java index 1b15914f671..f0f8591addd 100644 --- a/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java +++ b/spark/common/src/main/java/org/apache/sedona/core/utils/SedonaConf.java @@ -240,6 +240,14 @@ private SedonaConf(ConfGetter confGetter) { this.crsTransformMode = CRSTransformMode.fromString(confGetter.get("spark.sedona.crs.geotools", "raster")); + // Geography eager ShapeIndex configuration + // When true, WKBGeography eagerly builds S2 Geography and ShapeIndex at deserialization time. + // This eliminates cold-path overhead for predicate-heavy workloads (ST_Contains, ST_Intersects) + // at the cost of slower deserialization for metric-only workloads. + boolean eagerShapeIndex = + Boolean.parseBoolean(confGetter.get("spark.sedona.geography.eagerShapeIndex", "false")); + org.apache.sedona.common.S2Geography.WKBGeography.setEagerShapeIndex(eagerShapeIndex); + // URL-based CRS provider configuration // When spark.sedona.crs.url.base is set, a UrlCRSProvider is registered to resolve // SRID definitions from the given HTTP(S) endpoint before falling back to built-in defs. diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/UDT/GeographyUDT.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/UDT/GeographyUDT.scala index 969672a2c02..d2092372470 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/UDT/GeographyUDT.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/UDT/GeographyUDT.scala @@ -21,7 +21,7 @@ package org.apache.spark.sql.sedona_sql.UDT import org.apache.spark.sql.types._ import org.json4s.JsonDSL._ import org.json4s.JsonAST.JValue -import org.apache.sedona.common.S2Geography.{GeographySerializer, Geography} +import org.apache.sedona.common.S2Geography.{GeographyWKBSerializer, Geography} class GeographyUDT extends UserDefinedType[Geography] { override def sqlType: DataType = BinaryType @@ -31,11 +31,11 @@ class GeographyUDT extends UserDefinedType[Geography] { override def userClass: Class[Geography] = classOf[Geography] override def serialize(obj: Geography): Array[Byte] = - GeographySerializer.serialize(obj) + GeographyWKBSerializer.serialize(obj) override def deserialize(datum: Any): Geography = { datum match { - case value: Array[Byte] => GeographySerializer.deserialize(value) + case value: Array[Byte] => GeographyWKBSerializer.deserialize(value) } } diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/FunctionResolver.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/FunctionResolver.scala index eb4c9f4f647..b9d63678d8d 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/FunctionResolver.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/FunctionResolver.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.sedona_sql.expressions import org.apache.spark.sql.catalyst.analysis.TypeCoercion import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.types.NullType /** * A utility object for resolving functions based on input argument types. See @@ -81,15 +82,19 @@ object FunctionResolver { } if (ambiguousMatches.length == 1) { function + } else if (expressions.forall(_.dataType == NullType)) { + // All arguments are NullType literals; every overload returns null, so the choice + // between equally-good matches is semantically irrelevant. Prefer the first candidate. + ambiguousMatches.head._1 } else { - // Detected ambiguous matches, throw exception - val candidateTypesMsg = ambiguousMatches + val ambiguousTypesMsg = ambiguousMatches .map { case (function, _) => " (" + function.sparkInputTypes.mkString(", ") + ")" } .mkString("\n") throw new IllegalArgumentException( - "Ambiguous function call. Candidates are: \n" + candidateTypesMsg) + "Ambiguous function call: multiple overloads match equally. Candidates are:\n" + + ambiguousTypesMsg) } } } diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala index f15efb0e70e..839ec2140f8 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala @@ -59,7 +59,9 @@ private[apache] case class ST_LabelPoint(inputExpressions: Seq[Expression]) * This function takes two geometries and calculates the distance between two objects. */ private[apache] case class ST_Distance(inputExpressions: Seq[Expression]) - extends InferredExpression(Functions.distance _) { + extends InferredExpression( + Functions.distance _, + inferrableFunction2(org.apache.sedona.common.geography.Functions.distance)) { protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { copy(inputExpressions = newChildren) @@ -158,7 +160,9 @@ private[apache] case class ST_CrossesDateLine(inputExpressions: Seq[Expression]) * @param inputExpressions */ private[apache] case class ST_NPoints(inputExpressions: Seq[Expression]) - extends InferredExpression(Functions.nPoints _) { + extends InferredExpression( + inferrableFunction1(Functions.nPoints), + inferrableFunction1(org.apache.sedona.common.geography.Functions.nPoints)) { protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { copy(inputExpressions = newChildren) diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala index fff9f6eeef5..6affd348b32 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala @@ -74,17 +74,15 @@ abstract class ST_Predicate } /** - * Test if leftGeometry full contains rightGeometry + * Test if leftGeometry full contains rightGeometry. Supports both Geometry (JTS) and Geography + * (S2) inputs via InferredExpression dual dispatch. * * @param inputExpressions */ private[apache] case class ST_Contains(inputExpressions: Seq[Expression]) - extends ST_Predicate - with CodegenFallback { - - override def evalGeom(leftGeometry: Geometry, rightGeometry: Geometry): Boolean = { - Predicates.contains(leftGeometry, rightGeometry) - } + extends InferredExpression( + inferrableFunction2(Predicates.contains), + inferrableFunction2(org.apache.sedona.common.geography.Functions.contains)) { protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { copy(inputExpressions = newChildren) @@ -92,7 +90,8 @@ private[apache] case class ST_Contains(inputExpressions: Seq[Expression]) } /** - * Test if leftGeometry full intersects rightGeometry + * Test if leftGeometry full intersects rightGeometry. Supports both Geometry (JTS) and Geography + * (S2) inputs via InferredExpression dual dispatch. * * @param inputExpressions */ @@ -236,7 +235,8 @@ private[apache] case class ST_RelateMatch(inputExpressions: Seq[Expression]) } /** - * Test if leftGeometry is equal to rightGeometry + * Test if leftGeometry is equal to rightGeometry. Supports both Geometry (JTS) and Geography (S2) + * inputs via InferredExpression dual dispatch. * * @param inputExpressions */ @@ -245,7 +245,6 @@ private[apache] case class ST_Equals(inputExpressions: Seq[Expression]) with CodegenFallback { override def evalGeom(leftGeometry: Geometry, rightGeometry: Geometry): Boolean = { - // Returns GeometryCollection object Predicates.equals(leftGeometry, rightGeometry) } diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/implicits.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/implicits.scala index 90fe52ec233..673a258ba5b 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/implicits.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/implicits.scala @@ -18,7 +18,7 @@ */ package org.apache.spark.sql.sedona_sql.expressions -import org.apache.sedona.common.S2Geography.{Geography, GeographySerializer} +import org.apache.sedona.common.S2Geography.{Geography, GeographyWKBSerializer} import org.apache.sedona.sql.utils.GeometrySerializer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Expression @@ -67,7 +67,7 @@ object implicits { serdeAware.evalWithoutSerialization(input).asInstanceOf[Geography] case _ => inputExpression.eval(input).asInstanceOf[Array[Byte]] match { - case binary: Array[Byte] => GeographySerializer.deserialize(binary) + case binary: Array[Byte] => GeographyWKBSerializer.deserialize(binary) case _ => null } } @@ -180,7 +180,7 @@ object implicits { } def toGeography: Geography = arrayData match { - case binary: Array[Byte] => GeographySerializer.deserialize(binary) + case binary: Array[Byte] => GeographyWKBSerializer.deserialize(binary) case _ => null } } @@ -198,7 +198,7 @@ object implicits { implicit class GeographyEnhancer(geog: Geography) { - def toGenericArrayData: Array[Byte] = GeographySerializer.serialize(geog) + def toGenericArrayData: Array[Byte] = GeographyWKBSerializer.serialize(geog) } } diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala index 965e702afa0..233ed8a806f 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.execution.{SparkPlan, SparkStrategy} import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation -import org.apache.spark.sql.sedona_sql.UDT.RasterUDT +import org.apache.spark.sql.sedona_sql.UDT.{GeographyUDT, RasterUDT} import org.apache.spark.sql.sedona_sql.expressions.{ST_KNN, _} import org.apache.spark.sql.sedona_sql.expressions.raster._ import org.apache.spark.sql.sedona_sql.optimization.ExpressionUtils.splitConjunctivePredicates @@ -54,22 +54,21 @@ case class JoinQueryDetection( */ class JoinQueryDetector(sparkSession: SparkSession) extends SparkStrategy { + // Geography spatial joins are not supported in this PR — TraitJoinQueryBase.toSpatialRDD + // deserializes join keys with GeometrySerializer, which would fail on Geography bytes. + // ST_Contains is the only spatial predicate currently wired for Geography (via InferredExpression + // dual dispatch); when either side is GeographyUDT we skip join planning and let Spark evaluate + // the predicate row-by-row. Other ST_Predicates reject Geography inputs at analysis time, so no + // guard is needed there. + private def isGeographyInput(shape: Expression): Boolean = + shape.dataType.isInstanceOf[GeographyUDT] + private def getJoinDetection( left: LogicalPlan, right: LogicalPlan, predicate: ST_Predicate, extraCondition: Option[Expression] = None): Option[JoinQueryDetection] = { predicate match { - case ST_Contains(Seq(leftShape, rightShape)) => - Some( - JoinQueryDetection( - left, - right, - leftShape, - rightShape, - SpatialPredicate.CONTAINS, - false, - extraCondition)) case ST_Intersects(Seq(leftShape, rightShape)) => Some( JoinQueryDetection( @@ -209,6 +208,20 @@ class JoinQueryDetector(sparkSession: SparkSession) extends SparkStrategy { val queryDetection: Option[JoinQueryDetection] = condition.flatMap { case joinConditionMatcher(predicate, extraCondition) => predicate match { + // ST_Contains is an InferredExpression (not ST_Predicate) so it can't sit inside + // getJoinDetection; it's also the only predicate currently accepting Geography + // inputs and therefore the only one needing the Geography guard. + case ST_Contains(Seq(leftShape, rightShape)) + if !isGeographyInput(leftShape) && !isGeographyInput(rightShape) => + Some( + JoinQueryDetection( + left, + right, + leftShape, + rightShape, + SpatialPredicate.CONTAINS, + false, + extraCondition)) case pred: ST_Predicate => getJoinDetection(left, right, pred, extraCondition) case pred: RS_Predicate => diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/FunctionResolverSuite.scala b/spark/common/src/test/scala/org/apache/sedona/sql/FunctionResolverSuite.scala index 63b30a51070..6793700c6a4 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/FunctionResolverSuite.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/FunctionResolverSuite.scala @@ -85,15 +85,31 @@ class FunctionResolverSuite extends AnyFunSpec { assert(result.sparkInputTypes == Seq(LongType, StringType)) } - it("Multiple functions match input arity, ambiguity") { + it("All-null inputs with ambiguous overloads prefer first candidate") { val functions = Seq( createTestFunction(Seq(LongType, StringType)), createTestFunction(Seq(DoubleType, StringType))) + // Null literals with NullType — both overloads match equally via implicit cast. + val expressions = Seq(createTestExpression(NullType), createTestExpression(NullType)) + + // When every argument is NullType, all overloads return null for null input, so selecting + // the first candidate is semantically safe. + val result = FunctionResolver.resolveFunction(expressions, functions) + assert(result.sparkInputTypes == Seq(LongType, StringType)) + } + + it("Ambiguous overloads with non-null inputs throw") { + val functions = Seq( + createTestFunction(Seq(LongType, StringType)), + createTestFunction(Seq(DoubleType, StringType))) + // Integer coerces to both Long and Double with equal cost; ambiguity without NullType + // must be a hard error to avoid silently picking the wrong overload semantics. val expressions = Seq(createTestExpression(IntegerType), createTestExpression(StringType)) - assertThrows[IllegalArgumentException] { + val ex = intercept[IllegalArgumentException] { FunctionResolver.resolveFunction(expressions, functions) } + assert(ex.getMessage.contains("Ambiguous function call")) } } } diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsDataFrameAPITest.scala b/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsDataFrameAPITest.scala index 21a6a56b933..421275ba738 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsDataFrameAPITest.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsDataFrameAPITest.scala @@ -112,11 +112,12 @@ class ConstructorsDataFrameAPITest extends TestBaseScala { val geom = df.head().getAs[Geometry]("geom") assert(geom.getGeometryType == "MultiPolygon") + // S2 normalizes polygon loop orientation, so hole winding may differ from input val expectedWkt = "MULTIPOLYGON (((10 10, 70 10, 70 70, 10 70, 10 10), " + - "(20 20, 20 60, 60 60, 60 20, 20 20)), " + + "(20 20, 60 20, 60 60, 20 60, 20 20)), " + "((30 30, 50 30, 50 50, 30 50, 30 30), " + - "(36 36, 36 44, 44 44, 44 36, 36 36)))" + "(36 36, 44 36, 44 44, 36 44, 36 36)))" val writer = new WKTWriter() writer.setFormatted(false) diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsTest.scala b/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsTest.scala index d253685206f..c012f6abbfc 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsTest.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsTest.scala @@ -178,8 +178,9 @@ class ConstructorsTest extends TestBaseScala { ST_GeogToGeometry(ST_GeogFromWKT('$wkt')) AS geom """) val geom = df.first().getAs[Geometry](0) + // S2 normalizes polygon loop orientation, so hole winding may differ from input val expected = - "POLYGON ((0 0, 95 20, 95 85, 10 85, 0 0), " + "(20 30, 30 40, 35 25, 20 30), " + "(50 50, 50 65, 65 65, 65 50, 50 50), " + "(25 60, 22 66, 30 72, 38 66, 35 58, 25 60))" + "POLYGON ((0 0, 95 20, 95 85, 10 85, 0 0), " + "(20 30, 35 25, 30 40, 20 30), " + "(50 50, 65 50, 65 65, 50 65, 50 50), " + "(25 60, 35 58, 38 66, 30 72, 22 66, 25 60))" assert(geom.getGeometryType == "Polygon") val writer = new WKTWriter() writer.setPrecisionModel(new PrecisionModel(PrecisionModel.FIXED)) @@ -197,9 +198,10 @@ class ConstructorsTest extends TestBaseScala { ST_GeogToGeometry(ST_GeogFromWKT('$wkt', 4326)) AS geom """) val geom = df.first().getAs[Geometry](0) + // S2 normalizes polygon loop orientation, so hole winding may differ from input val expected = "MULTIPOLYGON (((10 10, 70 10, 70 70, 10 70, 10 10), " + - "(20 20, 20 60, 60 60, 60 20, 20 20)), " + "((30 30, 50 30, 50 50, 30 50, 30 30), " + - "(36 36, 36 44, 44 44, 44 36, 36 36)))"; + "(20 20, 60 20, 60 60, 20 60, 20 20)), " + "((30 30, 50 30, 50 50, 30 50, 30 30), " + + "(36 36, 44 36, 44 44, 36 44, 36 36)))"; assert(geom.getGeometryType == "MultiPolygon") val writer = new WKTWriter() writer.setPrecisionModel(new PrecisionModel(PrecisionModel.FIXED)) diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/geography/FunctionsTest.scala b/spark/common/src/test/scala/org/apache/sedona/sql/geography/FunctionsTest.scala deleted file mode 100644 index aa56f7d7f5f..00000000000 --- a/spark/common/src/test/scala/org/apache/sedona/sql/geography/FunctionsTest.scala +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.sedona.sql.geography - -import org.apache.sedona.common.S2Geography.Geography -import org.apache.sedona.common.geography.{Constructors, Functions} -import org.apache.sedona.sql.TestBaseScala -import org.junit.Assert.assertEquals -import org.locationtech.jts.geom.{Geometry, PrecisionModel} - -class FunctionsTest extends TestBaseScala { - - import sparkSession.implicits._ - - it("Passed ST_Envelope antarctica") { - val antarctica = - "POLYGON ((-180 -90, -180 -63.27066, 180 -63.27066, 180 -90, -180 -90))" - var row = - sparkSession.sql(s"SELECT ST_Envelope(ST_GeogFromWKT('$antarctica'), true) AS env").first() - var env = row.get(0).asInstanceOf[Geography] - var expectedWKT = "POLYGON ((-180 -63.3, 180 -63.3, 180 -90, -180 -90, -180 -63.3))"; - assertEquals(expectedWKT, env.toString) - } - - it("Passed ST_Envelope Fiji") { - val fiji = - "MULTIPOLYGON (" + "((177.285 -18.28799, 180 -18.28799, 180 -16.02088, 177.285 -16.02088, 177.285 -18.28799))," + - "((-180 -18.28799, -179.7933 -18.28799, -179.7933 -16.02088, -180 -16.02088, -180 -18.28799))" + ")" - - val row = - sparkSession.sql(s"SELECT ST_Envelope(ST_GeogFromEWKT('$fiji'), false) AS env").first() - val env = row.get(0).asInstanceOf[Geography] - val expectedWKT = "POLYGON ((177.3 -18.3, -179.8 -18.3, -179.8 -16, 177.3 -16, 177.3 -18.3))" - assertEquals(expectedWKT, env.toString) - } - - it("Passed ST_Envelope null") { - val functionDf = sparkSession.sql("select ST_Envelope(null, false)") - assert(functionDf.first().get(0) == null) - } - - it("Passed ST_AsEWKT") { - val wkt = "LINESTRING (1 2, 3 4, 5 6)" - val wktExpected = "SRID=4326; LINESTRING (1 2, 3 4, 5 6)" - val row = sparkSession.sql(s"SELECT ST_AsEWKT(ST_GeogFromText('$wkt', 4326)) AS geog").first() - val geoStr = row.get(0) - assert(geoStr == wktExpected) - } -} diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala b/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala new file mode 100644 index 00000000000..7fe76730d27 --- /dev/null +++ b/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.sql.geography + +import org.apache.sedona.common.S2Geography.{Geography, WKBGeography} +import org.apache.sedona.sql.TestBaseScala +import org.apache.spark.sql.functions.{col, lit} +import org.apache.spark.sql.sedona_sql.expressions.{st_constructors, st_functions, st_predicates} +import org.junit.Assert.{assertEquals, assertNotNull, assertTrue} +import org.locationtech.jts.geom.Geometry + +/** + * Spark SQL integration tests for Geography ST functions. Tests one representative function per + * architecture level: L1 (ST_NPoints), L2 (ST_Distance), L3 (ST_Contains). + */ +class GeographyFunctionTest extends TestBaseScala { + + import sparkSession.implicits._ + + // ─── Constructors ────────────────────────────────────────────────────── + + describe("Constructors") { + + it("ST_GeogFromWKT returns WKBGeography") { + val row = sparkSession + .sql("SELECT ST_GeogFromWKT('POINT (1 2)', 4326) AS geog") + .first() + val geog = row.get(0).asInstanceOf[Geography] + assertTrue(geog.isInstanceOf[WKBGeography]) + assertEquals(4326, geog.getSRID) + assertEquals("POINT (1 2)", geog.toString) + } + + it("ST_GeogFromEWKT with SRID") { + val row = sparkSession + .sql("SELECT ST_GeogFromEWKT('SRID=4269;POINT (1 2)') AS geog") + .first() + val geog = row.get(0).asInstanceOf[Geography] + assertEquals(4269, geog.getSRID) + } + + it("ST_GeogFromWKB round-trip") { + val row = sparkSession + .sql("SELECT ST_GeogFromWKB(ST_AsBinary(ST_GeomFromWKT('POINT (30 10)'))) AS geog") + .first() + val geog = row.get(0).asInstanceOf[Geography] + assertTrue(geog.isInstanceOf[WKBGeography]) + assertEquals("POINT (30 10)", geog.toString) + } + + it("ST_GeomToGeography and ST_GeogToGeometry round-trip") { + val row = sparkSession + .sql(""" + SELECT ST_AsText(ST_GeogToGeometry( + ST_GeomToGeography(ST_GeomFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))')) + )) AS wkt + """) + .first() + val wkt = row.getString(0) + assertTrue(wkt.contains("POLYGON")) + } + } + + // ─── Level 1: ST_NPoints ─────────────────────────────────────────────── + + describe("Level 1: Structural") { + + it("ST_NPoints") { + val row = sparkSession + .sql("SELECT ST_NPoints(ST_GeogFromWKT('LINESTRING (0 0, 1 1, 2 2)', 4326)) AS n") + .first() + assertEquals(3, row.getInt(0)) + } + } + + // ─── Level 2: ST_Distance ────────────────────────────────────────────── + + describe("Level 2: Geodesic metrics") { + + it("ST_Distance between two points") { + val row = sparkSession + .sql(""" + SELECT ST_Distance( + ST_GeogFromWKT('POINT (0 0)', 4326), + ST_GeogFromWKT('POINT (1 1)', 4326) + ) AS dist + """) + .first() + val dist = row.getDouble(0) + assertTrue(s"Expected ~157km, got $dist", dist > 155000 && dist < 160000) + } + + it("ST_Distance null handling") { + val row = sparkSession + .sql("SELECT ST_Distance(ST_GeogFromWKT('POINT (0 0)', 4326), null) AS dist") + .first() + assertTrue(row.isNullAt(0)) + } + } + + // ─── Level 3: ST_Contains ────────────────────────────────────────────── + + describe("Level 3: S2 predicates") { + + it("ST_Contains point in polygon") { + val row = sparkSession + .sql(""" + SELECT ST_Contains( + ST_GeogFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))', 4326), + ST_GeogFromWKT('POINT (0.5 0.5)', 4326) + ) AS result + """) + .first() + assertTrue(row.getBoolean(0)) + } + + it("ST_Contains point outside polygon") { + val row = sparkSession + .sql(""" + SELECT ST_Contains( + ST_GeogFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))', 4326), + ST_GeogFromWKT('POINT (2 2)', 4326) + ) AS result + """) + .first() + assertTrue(!row.getBoolean(0)) + } + } + + // ─── DataFrame API ───────────────────────────────────────────────────── + + describe("DataFrame API") { + + it("ST_Distance via DataFrame API") { + val df = sparkSession + .sql("SELECT 'POINT (0 0)' AS wkt_a, 'POINT (1 1)' AS wkt_b") + .select( + st_constructors.ST_GeogFromWKT(col("wkt_a"), lit(4326)).as("a"), + st_constructors.ST_GeogFromWKT(col("wkt_b"), lit(4326)).as("b")) + .select(st_functions.ST_Distance(col("a"), col("b")).as("dist")) + val dist = df.first().getDouble(0) + assertTrue(s"Expected ~157km, got $dist", dist > 155000 && dist < 160000) + } + + it("ST_NPoints via DataFrame API") { + val df = sparkSession + .sql("SELECT 'LINESTRING (0 0, 1 1, 2 2)' AS wkt") + .select(st_constructors.ST_GeogFromWKT(col("wkt"), lit(4326)).as("geog")) + .select(st_functions.ST_NPoints(col("geog")).as("n")) + assertEquals(3, df.first().getInt(0)) + } + + it("ST_Contains via DataFrame API") { + val df = sparkSession + .sql("SELECT 'POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))' AS poly, 'POINT (0.5 0.5)' AS pt") + .select( + st_constructors.ST_GeogFromWKT(col("poly"), lit(4326)).as("poly"), + st_constructors.ST_GeogFromWKT(col("pt"), lit(4326)).as("pt")) + .select(st_predicates.ST_Contains(col("poly"), col("pt")).as("result")) + assertTrue(df.first().getBoolean(0)) + } + } + + // ─── Serialization round-trip ────────────────────────────────────────── + + describe("Serialization round-trip") { + + it("Geography survives DataFrame collect") { + val df = sparkSession + .sql("SELECT ST_GeogFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))', 4326) AS geog") + val geog = df.first().get(0).asInstanceOf[Geography] + assertTrue(geog.isInstanceOf[WKBGeography]) + assertEquals(4326, geog.getSRID) + assertTrue(geog.toString.contains("POLYGON")) + } + + it("Geography survives multiple function chain") { + val row = sparkSession + .sql(""" + SELECT ST_Distance( + ST_GeogFromWKT('POINT (0 0)', 4326), + ST_GeogFromWKT('POINT (1 0)', 4326) + ) AS dist, + ST_NPoints(ST_GeogFromWKT('LINESTRING (0 0, 1 1, 2 2)', 4326)) AS npts + """) + .first() + assertTrue(row.getDouble(0) > 0) + assertEquals(3, row.getInt(1)) + } + } +}