Skip to content

Commit ac2e60d

Browse files
petern48jiayuasu
authored andcommitted
[apacheGH-2025] Geopandas.GeoSeries: Implement is_valid, is_empty, is_simple (apache#2026)
* Support elements of None type in GeoSeries * Implement test_is_valid * Implement is_empty * Implement is_simple
1 parent ce22cf1 commit ac2e60d

3 files changed

Lines changed: 160 additions & 17 deletions

File tree

python/sedona/geopandas/geoseries.py

Lines changed: 109 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,9 @@ def __init__(
154154
fastpath=fastpath,
155155
)
156156
gs = gpd.GeoSeries(s)
157-
pdf = pd.Series(gs.apply(lambda geom: geom.wkb))
157+
pdf = pd.Series(
158+
gs.apply(lambda geom: geom.wkb if geom is not None else None)
159+
)
158160
# initialize the parent class pyspark Series with the pandas Series
159161
super().__init__(
160162
data=pdf,
@@ -591,18 +593,88 @@ def length(self) -> pspd.Series:
591593
).to_spark_pandas()
592594

593595
@property
594-
def is_valid(self):
595-
# Implementation of the abstract method
596-
raise NotImplementedError("This method is not implemented yet.")
596+
def is_valid(self) -> pspd.Series:
597+
"""Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
598+
geometries that are valid.
599+
600+
Examples
601+
--------
602+
603+
An example with one invalid polygon (a bowtie geometry crossing itself)
604+
and one missing geometry:
605+
606+
>>> from shapely.geometry import Polygon
607+
>>> s = geopandas.GeoSeries(
608+
... [
609+
... Polygon([(0, 0), (1, 1), (0, 1)]),
610+
... Polygon([(0,0), (1, 1), (1, 0), (0, 1)]), # bowtie geometry
611+
... Polygon([(0, 0), (2, 2), (2, 0)]),
612+
... None
613+
... ]
614+
... )
615+
>>> s
616+
0 POLYGON ((0 0, 1 1, 0 1, 0 0))
617+
1 POLYGON ((0 0, 1 1, 1 0, 0 1, 0 0))
618+
2 POLYGON ((0 0, 2 2, 2 0, 0 0))
619+
3 None
620+
dtype: geometry
621+
622+
>>> s.is_valid
623+
0 True
624+
1 False
625+
2 True
626+
3 False
627+
dtype: bool
628+
629+
See also
630+
--------
631+
GeoSeries.is_valid_reason : reason for invalidity
632+
"""
633+
return (
634+
self._process_geometry_column("ST_IsValid", rename="is_valid")
635+
.to_spark_pandas()
636+
.astype("bool")
637+
)
597638

598639
def is_valid_reason(self):
599640
# Implementation of the abstract method
600641
raise NotImplementedError("This method is not implemented yet.")
601642

602643
@property
603-
def is_empty(self):
604-
# Implementation of the abstract method
605-
raise NotImplementedError("This method is not implemented yet.")
644+
def is_empty(self) -> pspd.Series:
645+
"""
646+
Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
647+
empty geometries.
648+
649+
Examples
650+
--------
651+
An example of a GeoDataFrame with one empty point, one point and one missing
652+
value:
653+
654+
>>> from shapely.geometry import Point
655+
>>> d = {'geometry': [Point(), Point(2, 1), None]}
656+
>>> gdf = geopandas.GeoDataFrame(d, crs="EPSG:4326")
657+
>>> gdf
658+
geometry
659+
0 POINT EMPTY
660+
1 POINT (2 1)
661+
2 None
662+
663+
>>> gdf.is_empty
664+
0 True
665+
1 False
666+
2 False
667+
dtype: bool
668+
669+
See Also
670+
--------
671+
GeoSeries.isna : detect missing values
672+
"""
673+
return (
674+
self._process_geometry_column("ST_IsEmpty", rename="is_empty")
675+
.to_spark_pandas()
676+
.astype("bool")
677+
)
606678

607679
def count_coordinates(self):
608680
# Implementation of the abstract method
@@ -617,9 +689,36 @@ def count_interior_rings(self):
617689
raise NotImplementedError("This method is not implemented yet.")
618690

619691
@property
620-
def is_simple(self):
621-
# Implementation of the abstract method
622-
raise NotImplementedError("This method is not implemented yet.")
692+
def is_simple(self) -> pspd.Series:
693+
"""Returns a ``Series`` of ``dtype('bool')`` with value ``True`` for
694+
geometries that do not cross themselves.
695+
696+
This is meaningful only for `LineStrings` and `LinearRings`.
697+
698+
Examples
699+
--------
700+
>>> from shapely.geometry import LineString
701+
>>> s = geopandas.GeoSeries(
702+
... [
703+
... LineString([(0, 0), (1, 1), (1, -1), (0, 1)]),
704+
... LineString([(0, 0), (1, 1), (1, -1)]),
705+
... ]
706+
... )
707+
>>> s
708+
0 LINESTRING (0 0, 1 1, 1 -1, 0 1)
709+
1 LINESTRING (0 0, 1 1, 1 -1)
710+
dtype: geometry
711+
712+
>>> s.is_simple
713+
0 False
714+
1 True
715+
dtype: bool
716+
"""
717+
return (
718+
self._process_geometry_column("ST_IsSimple", rename="is_simple")
719+
.to_spark_pandas()
720+
.astype("bool")
721+
)
623722

624723
@property
625724
def is_ring(self):

python/tests/geopandas/test_geoseries.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from sedona.geopandas import GeoSeries
2424
from tests.test_base import TestBase
2525
from shapely import wkt
26-
from shapely.geometry import Point, LineString, Polygon, GeometryCollection
26+
from shapely.geometry import Point, LineString, Polygon, GeometryCollection, LinearRing
2727
from pandas.testing import assert_series_equal
2828

2929
import sedona.geopandas as sgpd
@@ -195,13 +195,29 @@ def test_length(self):
195195
assert_series_equal(result, expected)
196196

197197
def test_is_valid(self):
198-
pass
198+
geoseries = sgpd.GeoSeries(
199+
[
200+
Polygon([(0, 0), (1, 1), (0, 1)]),
201+
Polygon([(0, 0), (1, 1), (1, 0), (0, 1)]), # bowtie geometry
202+
Polygon([(0, 0), (2, 2), (2, 0)]),
203+
None,
204+
]
205+
)
206+
result = geoseries.is_valid
207+
expected = pd.Series([True, False, True, False])
208+
assert_series_equal(result.to_pandas(), expected)
199209

200210
def test_is_valid_reason(self):
201211
pass
202212

203213
def test_is_empty(self):
204-
pass
214+
geoseries = sgpd.GeoSeries(
215+
[Point(), Point(2, 1), Polygon([(0, 0), (1, 1), (0, 1)]), None],
216+
)
217+
218+
result = geoseries.is_empty
219+
expected = pd.Series([True, False, False, False])
220+
assert_series_equal(result.to_pandas(), expected)
205221

206222
def test_count_coordinates(self):
207223
pass
@@ -213,7 +229,17 @@ def test_count_interior_rings(self):
213229
pass
214230

215231
def test_is_simple(self):
216-
pass
232+
s = sgpd.GeoSeries(
233+
[
234+
LineString([(0, 0), (1, 1), (1, -1), (0, 1)]),
235+
LineString([(0, 0), (1, 1), (1, -1)]),
236+
LinearRing([(0, 0), (1, 1), (1, -1), (0, 1)]),
237+
LinearRing([(0, 0), (-1, 1), (-1, -1), (1, -1)]),
238+
]
239+
)
240+
result = s.is_simple
241+
expected = pd.Series([False, True, False, True])
242+
assert_series_equal(result.to_pandas(), expected)
217243

218244
def test_is_ring(self):
219245
pass

python/tests/geopandas/test_match_geopandas_series.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
LineString,
3333
MultiPolygon,
3434
GeometryCollection,
35+
LinearRing,
3536
)
3637

3738
from sedona.geopandas import GeoSeries
@@ -310,13 +311,21 @@ def test_length(self):
310311
self.check_pd_series_equal(sgpd_result, gpd_result)
311312

312313
def test_is_valid(self):
313-
pass
314+
for _, geom in self.geoms:
315+
sgpd_result = GeoSeries(geom).is_valid
316+
assert isinstance(sgpd_result, ps.Series)
317+
gpd_result = gpd.GeoSeries(geom).is_valid
318+
self.check_pd_series_equal(sgpd_result, gpd_result)
314319

315320
def test_is_valid_reason(self):
316321
pass
317322

318323
def test_is_empty(self):
319-
pass
324+
for _, geom in self.geoms:
325+
sgpd_result = GeoSeries(geom).is_empty
326+
assert isinstance(sgpd_result, ps.Series)
327+
gpd_result = gpd.GeoSeries(geom).is_empty
328+
self.check_pd_series_equal(sgpd_result, gpd_result)
320329

321330
def test_count_coordinates(self):
322331
pass
@@ -328,7 +337,16 @@ def test_count_interior_rings(self):
328337
pass
329338

330339
def test_is_simple(self):
331-
pass
340+
data = [
341+
LineString([(0, 0), (0, 0)]),
342+
LineString([(0, 0), (1, 1), (1, -1), (0, 1)]),
343+
LineString([(0, 0), (1, 1), (0, 0)]),
344+
LinearRing([(0, 0), (1, 1), (1, 0), (0, 1), (0, 0)]),
345+
LinearRing([(0, 0), (-1, 1), (-1, -1), (1, -1)]),
346+
]
347+
sgpd_result = GeoSeries(data).is_simple
348+
gpd_result = gpd.GeoSeries(data).is_simple
349+
self.check_pd_series_equal(sgpd_result, gpd_result)
332350

333351
def test_is_ring(self):
334352
pass

0 commit comments

Comments
 (0)