Skip to content

Commit df3f740

Browse files
[GH-2830] Adds Geography dual-dispatch to ST_Centroid (#2852)
Co-authored-by: Jia Yu <jiayu@apache.org>
1 parent 0b4da5d commit df3f740

6 files changed

Lines changed: 248 additions & 3 deletions

File tree

common/src/main/java/org/apache/sedona/common/geography/Functions.java

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,86 @@ public static int nPoints(Geography g) {
8585
return toJTS(g).getNumPoints();
8686
}
8787

88+
/**
89+
* Returns the spherical centroid of a geography as a {@link Geography} point. The centroid is
90+
* computed on the sphere using S2 area- and length-weighting:
91+
*
92+
* <ul>
93+
* <li>Polygon / MultiPolygon: area-weighted centroid via {@link S2Polygon#getCentroid()}.
94+
* <li>LineString / MultiLineString: length-weighted centroid via {@link
95+
* S2Polyline#getCentroid()}.
96+
* <li>Point / MultiPoint: mean of the unit vectors.
97+
* <li>GeographyCollection: weighted sum of the children's S2 centroids.
98+
* </ul>
99+
*
100+
* <p>Unlike a planar (lon/lat) centroid, this answer is correct for antimeridian-crossing and
101+
* polar geographies. As with JTS for non-convex shapes, the centroid may lie outside the input
102+
* geometry. Returns {@code null} if the input is {@code null} or if the centroid is undefined
103+
* (e.g. an empty geometry, or antipodal points whose unit vectors cancel).
104+
*/
105+
public static Geography centroid(Geography g) {
106+
if (g == null) return null;
107+
Geography typed = (g instanceof WKBGeography) ? ((WKBGeography) g).getS2Geography() : g;
108+
S2Point weighted = sphericalCentroid(typed);
109+
if (weighted == null) return null;
110+
// S2 returns area- (or length-) weighted centroids; project back to the sphere.
111+
double norm = weighted.norm();
112+
if (!(norm > 0.0)) return null; // zero or NaN — centroid undefined
113+
S2Point unit = S2Point.normalize(weighted);
114+
SinglePointGeography point = new SinglePointGeography(unit);
115+
point.setSRID(g.getSRID());
116+
return WKBGeography.fromS2Geography(point);
117+
}
118+
119+
/**
120+
* Returns the S2-weighted centroid of {@code g} (area-weighted for polygons, length-weighted for
121+
* polylines, sum of unit vectors for points). Result is {@code null} when no centroid
122+
* contribution exists (empty input, unsupported kind).
123+
*/
124+
private static S2Point sphericalCentroid(Geography g) {
125+
if (g instanceof PointGeography) {
126+
List<S2Point> pts = ((PointGeography) g).getPoints();
127+
if (pts == null || pts.isEmpty()) return null;
128+
S2Point sum = pts.get(0);
129+
for (int i = 1; i < pts.size(); i++) {
130+
sum = S2Point.add(sum, pts.get(i));
131+
}
132+
return sum;
133+
}
134+
if (g instanceof PolylineGeography) {
135+
S2Point sum = null;
136+
for (S2Polyline p : ((PolylineGeography) g).getPolylines()) {
137+
sum = addOrInit(sum, p.getCentroid());
138+
}
139+
return sum;
140+
}
141+
if (g instanceof PolygonGeography) {
142+
return ((PolygonGeography) g).polygon.getCentroid();
143+
}
144+
if (g instanceof MultiPolygonGeography) {
145+
S2Point sum = null;
146+
for (Geography feature : ((MultiPolygonGeography) g).getFeatures()) {
147+
if (feature instanceof PolygonGeography) {
148+
sum = addOrInit(sum, ((PolygonGeography) feature).polygon.getCentroid());
149+
}
150+
}
151+
return sum;
152+
}
153+
if (g instanceof GeographyCollection) {
154+
S2Point sum = null;
155+
for (Geography feature : ((GeographyCollection) g).getFeatures()) {
156+
sum = addOrInit(sum, sphericalCentroid(feature));
157+
}
158+
return sum;
159+
}
160+
return null;
161+
}
162+
163+
private static S2Point addOrInit(S2Point sum, S2Point next) {
164+
if (next == null) return sum;
165+
return (sum == null) ? next : S2Point.add(sum, next);
166+
}
167+
88168
/** Return the number of sub-geometries in a geography (1 for singles). */
89169
public static int numGeometries(Geography g) {
90170
if (g == null) return 0;

common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,99 @@ public void nPoints_polygon() throws ParseException {
148148
assertEquals(5, Functions.nPoints(g));
149149
}
150150

151+
// S2 area-weighted centroids on small polygons differ from planar by an O(d^2/R^2)
152+
// spherical correction. 5e-3 deg (~500 m) is wide enough to absorb that drift on
153+
// 1°-scale shapes near origin, while still catching any real bug (a planar/JTS centroid
154+
// of an antimeridian polygon would be off by ~180°, not by hundreds of metres).
155+
private static final double CENTROID_TOL_DEG = 5e-3;
156+
157+
@Test
158+
public void centroid_squarePolygon() throws ParseException {
159+
Geography g = Constructors.geogFromWKT("POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))", 4326);
160+
Geography c = Functions.centroid(g);
161+
assertNotNull(c);
162+
assertEquals(4326, c.getSRID());
163+
org.locationtech.jts.geom.Point p =
164+
(org.locationtech.jts.geom.Point) Constructors.geogToGeometry(c);
165+
assertEquals(1.0, p.getX(), CENTROID_TOL_DEG);
166+
assertEquals(1.0, p.getY(), CENTROID_TOL_DEG);
167+
}
168+
169+
@Test
170+
public void centroid_linestring() throws ParseException {
171+
Geography g = Constructors.geogFromWKT("LINESTRING (0 0, 2 0)", 4326);
172+
Geography c = Functions.centroid(g);
173+
assertNotNull(c);
174+
org.locationtech.jts.geom.Point p =
175+
(org.locationtech.jts.geom.Point) Constructors.geogToGeometry(c);
176+
assertEquals(1.0, p.getX(), CENTROID_TOL_DEG);
177+
assertEquals(0.0, p.getY(), CENTROID_TOL_DEG);
178+
}
179+
180+
@Test
181+
public void centroid_point() throws ParseException {
182+
Geography g = Constructors.geogFromWKT("POINT (3 4)", 4326);
183+
Geography c = Functions.centroid(g);
184+
assertNotNull(c);
185+
org.locationtech.jts.geom.Point p =
186+
(org.locationtech.jts.geom.Point) Constructors.geogToGeometry(c);
187+
// A single point's centroid is the point itself — exact.
188+
assertEquals(3.0, p.getX(), 1e-9);
189+
assertEquals(4.0, p.getY(), 1e-9);
190+
}
191+
192+
@Test
193+
public void centroid_multipoint_meanOfUnitVectors() throws ParseException {
194+
Geography g = Constructors.geogFromWKT("MULTIPOINT ((-1 0), (1 0))", 4326);
195+
Geography c = Functions.centroid(g);
196+
assertNotNull(c);
197+
org.locationtech.jts.geom.Point p =
198+
(org.locationtech.jts.geom.Point) Constructors.geogToGeometry(c);
199+
// Mean of unit vectors at (-1, 0) and (1, 0) lands on (0, 0).
200+
assertEquals(0.0, p.getX(), CENTROID_TOL_DEG);
201+
assertEquals(0.0, p.getY(), CENTROID_TOL_DEG);
202+
}
203+
204+
@Test
205+
public void centroid_multipolygon() throws ParseException {
206+
// Two unit squares at (0..1, 0..1) and (10..11, 0..1). Equal area, so the area-weighted
207+
// centroid should sit at the midpoint between the per-polygon centroids ≈ (5.5, 0.5).
208+
Geography g =
209+
Constructors.geogFromWKT(
210+
"MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((10 0, 11 0, 11 1, 10 1, 10 0)))", 4326);
211+
Geography c = Functions.centroid(g);
212+
assertNotNull(c);
213+
org.locationtech.jts.geom.Point p =
214+
(org.locationtech.jts.geom.Point) Constructors.geogToGeometry(c);
215+
assertEquals(5.5, p.getX(), CENTROID_TOL_DEG);
216+
assertEquals(0.5, p.getY(), CENTROID_TOL_DEG);
217+
}
218+
219+
@Test
220+
public void centroid_antimeridianPolygon_isOnTheAntimeridian() throws ParseException {
221+
// Thin band straddling 180°E. A planar JTS centroid would average the lons and land
222+
// at lon ≈ 0 (the wrong side of the planet). The spherical centroid stays near ±180.
223+
Geography g =
224+
Constructors.geogFromWKT("POLYGON ((170 -1, -170 -1, -170 1, 170 1, 170 -1))", 4326);
225+
Geography c = Functions.centroid(g);
226+
assertNotNull(c);
227+
org.locationtech.jts.geom.Point p =
228+
(org.locationtech.jts.geom.Point) Constructors.geogToGeometry(c);
229+
double lon = p.getX();
230+
double lat = p.getY();
231+
// |lon| close to 180 (either side of the wrap), lat close to 0.
232+
double lonDistFromAntimeridian = Math.min(Math.abs(lon - 180.0), Math.abs(lon + 180.0));
233+
assertTrue(
234+
"expected centroid near the antimeridian; got (" + lon + ", " + lat + ")",
235+
lonDistFromAntimeridian < 0.5);
236+
assertEquals(0.0, lat, 0.5);
237+
}
238+
239+
@Test
240+
public void centroid_nullHandling() {
241+
assertNull(Functions.centroid(null));
242+
}
243+
151244
@Test
152245
public void numGeometries_point() throws ParseException {
153246
Geography g = Constructors.geogFromWKT("POINT (1 2)", 4326);

docs/api/sql/geography/Geography-Functions.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ These functions operate on geography type objects.
4444
| [ST_Area](Geography-Functions/ST_Area.md) | Double | Return the geodesic area of a geography in square meters (WGS84 spheroid). | v1.9.1 |
4545
| [ST_AsEWKT](Geography-Functions/ST_AsEWKT.md) | String | Return the Extended Well-Known Text representation of a geography. | v1.8.0 |
4646
| [ST_AsText](Geography-Functions/ST_AsText.md) | String | Return the Well-Known Text (WKT) representation of a geography. | v1.9.1 |
47+
| [ST_Centroid](Geography-Functions/ST_Centroid.md) | Geography | Return the planar centroid of a geography as a Geography point (computed in projected lon/lat space). | v1.9.1 |
4748
| [ST_Buffer](Geography-Functions/ST_Buffer.md) | Geography | Return the metric ε-buffer of a geography. Distance is always interpreted as meters along the spheroid. | v1.9.1 |
4849
| [ST_Envelope](Geography-Functions/ST_Envelope.md) | Geography | Return the bounding box (envelope) of a geography. Supports anti-meridian splitting. | v1.8.0 |
4950
| [ST_GeometryType](Geography-Functions/ST_GeometryType.md) | String | Return the type of a geography as a string (e.g., "ST_Point", "ST_Polygon"). | v1.9.1 |
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
<!--
2+
Licensed to the Apache Software Foundation (ASF) under one
3+
or more contributor license agreements. See the NOTICE file
4+
distributed with this work for additional information
5+
regarding copyright ownership. The ASF licenses this file
6+
to you under the Apache License, Version 2.0 (the
7+
"License"); you may not use this file except in compliance
8+
with the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing,
13+
software distributed under the License is distributed on an
14+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
KIND, either express or implied. See the License for the
16+
specific language governing permissions and limitations
17+
under the License.
18+
-->
19+
20+
# ST_Centroid
21+
22+
Introduction: Returns the spherical centroid of a geography as a Geography point, computed on the sphere using S2:
23+
24+
- **Polygon / MultiPolygon** — area-weighted centroid via `S2Polygon.getCentroid()`.
25+
- **LineString / MultiLineString** — length-weighted centroid via `S2Polyline.getCentroid()`.
26+
- **Point / MultiPoint** — mean of the unit vectors.
27+
- **GeographyCollection** — recursive weighted sum across the children.
28+
29+
The result is the unit-length centroid on the sphere. Unlike a planar (lon/lat) centroid, it is correct for antimeridian-crossing and high-latitude geographies. As with JTS for non-convex shapes, the centroid may lie outside the input geometry. Returns `NULL` when the centroid is undefined (empty geometry, or antipodal points whose unit vectors cancel).
30+
31+
Format:
32+
33+
`ST_Centroid (A: Geography)`
34+
35+
Return type: `Geography`
36+
37+
Since: `v1.9.1`
38+
39+
SQL Example
40+
41+
```sql
42+
SELECT ST_AsEWKT(ST_Centroid(ST_GeogFromWKT('POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))')));
43+
```
44+
45+
Output (small `O(d²/R²)` spherical correction vs the planar `(1 1)`):
46+
47+
```
48+
POINT (1 1)
49+
```
50+
51+
For an antimeridian-crossing polygon, the spherical centroid stays on the antimeridian instead of jumping to the opposite side of the planet, which a planar centroid would do:
52+
53+
```sql
54+
SELECT ST_AsEWKT(ST_Centroid(ST_GeogFromWKT('POLYGON ((170 -1, -170 -1, -170 1, 170 1, 170 -1))')));
55+
-- result: POINT near (180, 0) (or (-180, 0)), NOT (0, 0)
56+
```

spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,9 @@ private[apache] case class ST_Area(inputExpressions: Seq[Expression])
301301
* @param inputExpressions
302302
*/
303303
private[apache] case class ST_Centroid(inputExpressions: Seq[Expression])
304-
extends InferredExpression(Functions.getCentroid _) {
304+
extends InferredExpression(
305+
inferrableFunction1(Functions.getCentroid),
306+
inferrableFunction1(org.apache.sedona.common.geography.Functions.centroid)) {
305307

306308
protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = {
307309
copy(inputExpressions = newChildren)

spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import org.apache.sedona.sql.TestBaseScala
2323
import org.apache.spark.sql.functions.{col, lit}
2424
import org.apache.spark.sql.sedona_sql.expressions.{st_constructors, st_functions, st_predicates}
2525
import org.junit.Assert.{assertEquals, assertNotNull, assertTrue}
26-
import org.locationtech.jts.geom.{Geometry, Point}
26+
import org.locationtech.jts.geom.Point
2727
import org.locationtech.jts.io.WKTReader
2828

2929
/**
@@ -78,7 +78,7 @@ class GeographyFunctionTest extends TestBaseScala {
7878
}
7979
}
8080

81-
// ─── Level 1: ST_NPoints, ST_NumGeometries, ST_GeometryType, ST_AsText
81+
// ─── Level 1: ST_NPoints, ST_Centroid, ST_NumGeometries, ST_GeometryType, ST_AsText ─
8282

8383
describe("Level 1: Structural") {
8484

@@ -89,6 +89,19 @@ class GeographyFunctionTest extends TestBaseScala {
8989
assertEquals(3, row.getInt(0))
9090
}
9191

92+
it("ST_Centroid square polygon") {
93+
val row = sparkSession
94+
.sql(
95+
"SELECT ST_Centroid(ST_GeogFromWKT('POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))', 4326)) AS c")
96+
.first()
97+
val centroid = row.get(0).asInstanceOf[Geography]
98+
val point = new WKTReader().read(centroid.toString).asInstanceOf[Point]
99+
// Spherical area-weighted centroid of a 2x2° square at origin is ~(1, 1) with an
100+
// O(d^2/R^2) spherical correction; 5e-3° is well within that band.
101+
assertEquals(1.0, point.getX, 5e-3)
102+
assertEquals(1.0, point.getY, 5e-3)
103+
}
104+
92105
it("ST_NumGeometries single") {
93106
val row = sparkSession
94107
.sql("SELECT ST_NumGeometries(ST_GeogFromWKT('POINT (1 2)', 4326)) AS n")

0 commit comments

Comments
 (0)