Skip to content

Commit 88a49e1

Browse files
authored
[GH-2709] Implement GeoSeries: line_merge, count_coordinates, count_geometries, count_interior_rings, concave_hull, minimum_rotated_rectangle, exterior, extract_unique_points, remove_repeated_points (#2710)
1 parent e8a5ed6 commit 88a49e1

5 files changed

Lines changed: 547 additions & 69 deletions

File tree

python/sedona/spark/geopandas/base.py

Lines changed: 260 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -313,14 +313,94 @@ def is_empty(self):
313313
"""
314314
return _delegate_to_geometry_column("is_empty", self)
315315

316-
# def count_coordinates(self):
317-
# raise NotImplementedError("This method is not implemented yet.")
316+
def count_coordinates(self):
317+
"""Return a ``Series`` of ``dtype('int')`` with the number of
318+
coordinate tuples in each geometry.
318319
319-
# def count_geometries(self):
320-
# raise NotImplementedError("This method is not implemented yet.")
320+
Returns
321+
-------
322+
Series (int)
321323
322-
# def count_interior_rings(self):
323-
# raise NotImplementedError("This method is not implemented yet.")
324+
Examples
325+
--------
326+
>>> from sedona.spark.geopandas import GeoSeries
327+
>>> from shapely.geometry import Point, LineString, Polygon
328+
>>> s = GeoSeries(
329+
... [
330+
... Point(0, 0),
331+
... LineString([(0, 0), (1, 1), (2, 2)]),
332+
... Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
333+
... ]
334+
... )
335+
>>> s.count_coordinates()
336+
0 1
337+
1 3
338+
2 5
339+
dtype: int32
340+
341+
"""
342+
return _delegate_to_geometry_column("count_coordinates", self)
343+
344+
def count_geometries(self):
345+
"""Return a ``Series`` of ``dtype('int')`` with the number of
346+
geometries in each multi-geometry or geometry collection.
347+
348+
For non-multi geometries, returns 1.
349+
350+
Returns
351+
-------
352+
Series (int)
353+
354+
Examples
355+
--------
356+
>>> from sedona.spark.geopandas import GeoSeries
357+
>>> from shapely.geometry import Point, MultiPoint, MultiLineString
358+
>>> s = GeoSeries(
359+
... [
360+
... Point(0, 0),
361+
... MultiPoint([(0, 0), (1, 1)]),
362+
... MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]),
363+
... ]
364+
... )
365+
>>> s.count_geometries()
366+
0 1
367+
1 2
368+
2 2
369+
dtype: int32
370+
371+
"""
372+
return _delegate_to_geometry_column("count_geometries", self)
373+
374+
def count_interior_rings(self):
375+
"""Return a ``Series`` of ``dtype('int')`` with the number of
376+
interior rings (holes) in each polygon geometry.
377+
378+
Returns 0 for polygons without holes and for non-polygon geometries.
379+
380+
Returns
381+
-------
382+
Series (int)
383+
384+
Examples
385+
--------
386+
>>> from sedona.spark.geopandas import GeoSeries
387+
>>> from shapely.geometry import Point, Polygon
388+
>>> s = GeoSeries(
389+
... [
390+
... Polygon([(0, 0), (10, 0), (10, 10), (0, 10)],
391+
... [[(1, 1), (2, 1), (2, 2), (1, 2)]]),
392+
... Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
393+
... Point(0, 0),
394+
... ]
395+
... )
396+
>>> s.count_interior_rings()
397+
0 1
398+
1 0
399+
2 0
400+
dtype: int32
401+
402+
"""
403+
return _delegate_to_geometry_column("count_interior_rings", self)
324404

325405
@property
326406
def is_simple(self):
@@ -609,8 +689,41 @@ def centroid(self):
609689
"""
610690
return _delegate_to_geometry_column("centroid", self)
611691

612-
# def concave_hull(self, ratio=0.0, allow_holes=False):
613-
# raise NotImplementedError("This method is not implemented yet.")
692+
def concave_hull(self, ratio=0.0, allow_holes=False):
693+
"""Return the concave hull of each geometry.
694+
695+
The concave hull of a geometry is a possibly concave geometry that
696+
encloses the input geometry.
697+
698+
Parameters
699+
----------
700+
ratio : float, default 0.0
701+
A value between 0 and 1 controlling the concaveness of the hull.
702+
1 produces the convex hull; 0 produces a hull with maximum
703+
concaveness.
704+
allow_holes : bool, default False
705+
If True, the concave hull may contain holes.
706+
707+
Returns
708+
-------
709+
GeoSeries
710+
711+
Examples
712+
--------
713+
>>> from sedona.spark.geopandas import GeoSeries
714+
>>> from shapely.geometry import MultiPoint
715+
>>> s = GeoSeries(
716+
... [MultiPoint([(0, 0), (1, 0), (0.5, 0.5), (1, 1), (0, 1)])]
717+
... )
718+
>>> s.concave_hull(ratio=0.3)
719+
0 POLYGON ((0 0, 0 1, 0.5 0.5, 1 1, 1 0, 0 0))
720+
dtype: geometry
721+
722+
See Also
723+
--------
724+
GeoSeries.convex_hull : convex hull geometry
725+
"""
726+
return _delegate_to_geometry_column("concave_hull", self, ratio, allow_holes)
614727

615728
@property
616729
def convex_hull(self):
@@ -707,15 +820,87 @@ def envelope(self):
707820
"""
708821
return _delegate_to_geometry_column("envelope", self)
709822

710-
# def minimum_rotated_rectangle(self):
711-
# raise NotImplementedError("This method is not implemented yet.")
823+
def minimum_rotated_rectangle(self):
824+
"""Return the minimum rotated rectangle (oriented envelope) that
825+
encloses each geometry.
712826
713-
# @property
714-
# def exterior(self):
715-
# raise NotImplementedError("This method is not implemented yet.")
827+
Unlike ``envelope``, the rectangle may be rotated to better fit the
828+
geometry.
716829
717-
# def extract_unique_points(self):
718-
# raise NotImplementedError("This method is not implemented yet.")
830+
Returns
831+
-------
832+
GeoSeries
833+
834+
Examples
835+
--------
836+
>>> from sedona.spark.geopandas import GeoSeries
837+
>>> from shapely.geometry import MultiPoint
838+
>>> s = GeoSeries(
839+
... [MultiPoint([(0, 0), (1, 0), (0.5, 1)])]
840+
... )
841+
>>> s.minimum_rotated_rectangle()
842+
0 POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))
843+
dtype: geometry
844+
845+
See Also
846+
--------
847+
GeoSeries.envelope : axis-aligned bounding rectangle
848+
GeoSeries.convex_hull : convex hull geometry
849+
"""
850+
return _delegate_to_geometry_column("minimum_rotated_rectangle", self)
851+
852+
@property
853+
def exterior(self):
854+
"""Return the outer boundary of each polygon geometry.
855+
856+
Returns a ``GeoSeries`` of LineStrings representing the exterior ring
857+
of each polygon. For non-polygon geometries, returns ``None``.
858+
859+
.. note::
860+
Sedona's ``ST_ExteriorRing`` returns a ``LINESTRING`` rather than
861+
a ``LINEARRING``. The coordinates are identical to those of the
862+
exterior ring but the geometry type differs from geopandas, which
863+
returns a ``LINEARRING``.
864+
865+
Returns
866+
-------
867+
GeoSeries
868+
869+
Examples
870+
--------
871+
>>> from sedona.spark.geopandas import GeoSeries
872+
>>> from shapely.geometry import Polygon
873+
>>> s = GeoSeries(
874+
... [Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])]
875+
... )
876+
>>> s.exterior
877+
0 LINESTRING (0 0, 1 0, 1 1, 0 1, 0 0)
878+
dtype: geometry
879+
880+
"""
881+
return _delegate_to_geometry_column("exterior", self)
882+
883+
def extract_unique_points(self):
884+
"""Return a ``GeoSeries`` of MultiPoints representing all distinct
885+
vertices of each geometry.
886+
887+
Returns
888+
-------
889+
GeoSeries
890+
891+
Examples
892+
--------
893+
>>> from sedona.spark.geopandas import GeoSeries
894+
>>> from shapely.geometry import LineString
895+
>>> s = GeoSeries(
896+
... [LineString([(0, 0), (1, 1), (0, 0)])]
897+
... )
898+
>>> s.extract_unique_points()
899+
0 MULTIPOINT ((0 0), (1 1))
900+
dtype: geometry
901+
902+
"""
903+
return _delegate_to_geometry_column("extract_unique_points", self)
719904

720905
# def offset_curve(self, distance, quad_segs=8, join_style="round", mitre_limit=5.0):
721906
# raise NotImplementedError("This method is not implemented yet.")
@@ -724,8 +909,33 @@ def envelope(self):
724909
# def interiors(self):
725910
# raise NotImplementedError("This method is not implemented yet.")
726911

727-
# def remove_repeated_points(self, tolerance=0.0):
728-
# raise NotImplementedError("This method is not implemented yet.")
912+
def remove_repeated_points(self, tolerance=0.0):
913+
"""Return a ``GeoSeries`` with duplicate points removed.
914+
915+
Parameters
916+
----------
917+
tolerance : float, default 0.0
918+
Remove vertices that are within ``tolerance`` distance of one
919+
another. A tolerance of 0.0 removes only exactly repeated
920+
coordinates.
921+
922+
Returns
923+
-------
924+
GeoSeries
925+
926+
Examples
927+
--------
928+
>>> from sedona.spark.geopandas import GeoSeries
929+
>>> from shapely.geometry import LineString
930+
>>> s = GeoSeries(
931+
... [LineString([(0, 0), (0, 0), (1, 1), (1, 1), (2, 2)])]
932+
... )
933+
>>> s.remove_repeated_points()
934+
0 LINESTRING (0 0, 1 1, 2 2)
935+
dtype: geometry
936+
937+
"""
938+
return _delegate_to_geometry_column("remove_repeated_points", self, tolerance)
729939

730940
# def set_precision(self, grid_size, mode="valid_output"):
731941
# raise NotImplementedError("This method is not implemented yet.")
@@ -1093,8 +1303,39 @@ def force_3d(self, z=0.0):
10931303
"""
10941304
return _delegate_to_geometry_column("force_3d", self, z)
10951305

1096-
# def line_merge(self, directed=False):
1097-
# raise NotImplementedError("This method is not implemented yet.")
1306+
def line_merge(self, directed=False):
1307+
"""Return merged LineStrings.
1308+
1309+
Returns a ``GeoSeries`` of (Multi)LineStrings, where connected
1310+
LineStrings are merged together into single LineStrings.
1311+
1312+
Parameters
1313+
----------
1314+
directed : bool, default False
1315+
Only ``directed=False`` is supported. Passing ``directed=True``
1316+
will raise ``NotImplementedError``.
1317+
1318+
Returns
1319+
-------
1320+
GeoSeries
1321+
1322+
Examples
1323+
--------
1324+
>>> from sedona.spark.geopandas import GeoSeries
1325+
>>> from shapely.geometry import MultiLineString
1326+
>>> s = GeoSeries(
1327+
... [
1328+
... MultiLineString([[(0, 0), (1, 1)], [(1, 1), (2, 2)]]),
1329+
... MultiLineString([[(0, 0), (1, 1)], [(2, 2), (3, 3)]]),
1330+
... ]
1331+
... )
1332+
>>> s.line_merge()
1333+
0 LINESTRING (0 0, 1 1, 2 2)
1334+
1 MULTILINESTRING ((0 0, 1 1), (2 2, 3 3))
1335+
dtype: geometry
1336+
1337+
"""
1338+
return _delegate_to_geometry_column("line_merge", self, directed)
10981339

10991340
# @property
11001341
# def unary_union(self):

0 commit comments

Comments
 (0)