Skip to content
230 changes: 201 additions & 29 deletions python/sedona/geopandas/geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import os
import typing
from typing import Any, Union
from typing import Any, Union, Literal

import geopandas as gpd
Comment thread
jiayuasu marked this conversation as resolved.
import pandas as pd
Expand Down Expand Up @@ -51,13 +51,8 @@ def __repr__(self) -> str:
"""
Return a string representation of the GeoSeries in WKT format.
"""
try:
Comment thread
zhangfengcdt marked this conversation as resolved.
gpd_series = self._to_geopandas()
return gpd_series.__repr__()

except Exception as e:
# Fallback to parent's representation if conversion fails
return super().__repr__()
gpd_series = self.to_geopandas()
return gpd_series.__repr__()

def __init__(
self,
Expand Down Expand Up @@ -126,6 +121,17 @@ def __init__(

self._anchor = data
self._col_label = index

data_crs = None
if hasattr(data, "crs"):
data_crs = data.crs
if data_crs is not None and crs is not None and data_crs != crs:
raise ValueError(
"CRS mismatch between CRS of the passed geometries "
"and 'crs'. Use 'GeoSeries.set_crs(crs, "
"allow_override=True)' to overwrite CRS or "
"'GeoSeries.to_crs(crs)' to reproject geometries. "
)
else:
if isinstance(data, pd.Series):
assert index is None
Expand Down Expand Up @@ -155,6 +161,180 @@ def __init__(
fastpath=fastpath,
)

if crs:
self.set_crs(crs, inplace=True)

@property
def crs(self) -> Union["CRS", None]:
"""The Coordinate Reference System (CRS) as a ``pyproj.CRS`` object.

Returns None if the CRS is not set, and to set the value it
:getter: Returns a ``pyproj.CRS`` or None. When setting, the value
can be anything accepted by
:meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
such as an authority string (eg "EPSG:4326") or a WKT string.

Note: This assumes all records in the GeoSeries are assumed to have the same CRS.

Examples
--------
>>> s.crs # doctest: +SKIP
<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

See Also
--------
GeoSeries.set_crs : assign CRS
GeoSeries.to_crs : re-project to another CRS
"""
from pyproj import CRS

tmp_df = self._process_geometry_column("ST_SRID", rename="crs")
srid = tmp_df.take([0])[0]
# Sedona returns 0 if doesn't exist
return CRS.from_user_input(srid) if srid else None

@crs.setter
def crs(self, value: Union["CRS", None]):
# Implementation of the abstract method
self.set_crs(value, inplace=True)

@typing.overload
def set_crs(
self,
crs: Union[Any, None] = None,
epsg: Union[int, None] = None,
inplace: Literal[True] = True,
allow_override: bool = False,
) -> None: ...

@typing.overload
def set_crs(
self,
crs: Union[Any, None] = None,
epsg: Union[int, None] = None,
inplace: Literal[False] = False,
allow_override: bool = False,
) -> "GeoSeries": ...

def set_crs(
self,
crs: Union[Any, None] = None,
epsg: Union[int, None] = None,
inplace: bool = False,
allow_override: bool = False,
) -> Union["GeoSeries", None]:
"""
Set the Coordinate Reference System (CRS) of a ``GeoSeries``.

Pass ``None`` to remove CRS from the ``GeoSeries``.

Notes
-----
The underlying geometries are not transformed to this CRS. To
transform the geometries to a new CRS, use the ``to_crs`` method.

Parameters
----------
crs : pyproj.CRS | None, optional
The value can be anything accepted
by :meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
such as an authority string (eg "EPSG:4326") or a WKT string.
epsg : int, optional if `crs` is specified
EPSG code specifying the projection.
inplace : bool, default False
If True, the CRS of the GeoSeries will be changed in place
(while still returning the result) instead of making a copy of
the GeoSeries.
allow_override : bool, default False
If the GeoSeries already has a CRS, allow to replace the
existing CRS, even when both are not equal.

Returns
-------
GeoSeries

Examples
--------
>>> from shapely.geometry import Point
>>> s = geopandas.GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)])
>>> s
0 POINT (1 1)
1 POINT (2 2)
2 POINT (3 3)
dtype: geometry

Setting CRS to a GeoSeries without one:

>>> s.crs is None
True

>>> s = s.set_crs('epsg:3857')
>>> s.crs # doctest: +SKIP
<Projected CRS: EPSG:3857>
Name: WGS 84 / Pseudo-Mercator
Axis Info [cartesian]:
- X[east]: Easting (metre)
- Y[north]: Northing (metre)
Area of Use:
- name: World - 85°S to 85°N
- bounds: (-180.0, -85.06, 180.0, 85.06)
Coordinate Operation:
- name: Popular Visualisation Pseudo-Mercator
- method: Popular Visualisation Pseudo Mercator
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

Overriding existing CRS:

>>> s = s.set_crs(4326, allow_override=True)

Without ``allow_override=True``, ``set_crs`` returns an error if you try to
override CRS.

See Also
--------
GeoSeries.to_crs : re-project to another CRS

"""
from pyproj import CRS

if crs is not None:
crs = CRS.from_user_input(crs)
elif epsg is not None:
crs = CRS.from_epsg(epsg)

curr_crs = self.crs

if not allow_override and curr_crs is not None and not curr_crs == crs:
raise ValueError(
"The GeoSeries already has a CRS which is not equal to the passed "
"CRS. Specify 'allow_override=True' to allow replacing the existing "
"CRS without doing any transformation. If you actually want to "
"transform the geometries, use 'GeoSeries.to_crs' instead."
)

# 0 indicates no srid in sedona
new_epsg = crs.to_epsg() if crs else 0
# Keep the same column name instead of renaming it
result = self._process_geometry_column("ST_SetSRID", rename="", srid=new_epsg)

if inplace:
self._update_anchor(result._to_spark_pandas_df())
return None

return result

def _process_geometry_column(
self, operation: str, rename: str, *args, **kwargs
) -> "GeoSeries":
Expand All @@ -166,7 +346,7 @@ def _process_geometry_column(
operation : str
The spatial operation to apply (e.g., 'ST_Area', 'ST_Buffer').
rename : str
The name of the resulting column.
The name of the resulting column. If empty, the old column name is maintained.
args : tuple
Positional arguments for the operation.
kwargs : dict
Expand Down Expand Up @@ -197,12 +377,14 @@ def _process_geometry_column(
]
params = f", {', '.join(params_list)}"

rename = first_col if not rename else rename

if isinstance(data_type, BinaryType):
sql_expr = (
f"{operation}(ST_GeomFromWKB(`{first_col}`){params}) as {rename}"
f"{operation}(ST_GeomFromWKB(`{first_col}`){params}) as `{rename}`"
)
else:
sql_expr = f"{operation}(`{first_col}`{params}) as {rename}"
sql_expr = f"{operation}(`{first_col}`{params}) as `{rename}`"

sdf = self._internal.spark_frame.selectExpr(sql_expr)
internal = InternalFrame(
Expand All @@ -229,9 +411,11 @@ def to_geopandas(self) -> gpd.GeoSeries:
Returns:
- geopandas.GeoSeries: A geopandas GeoSeries.
"""
from pyspark.pandas.utils import log_advice

log_advice(
"`to_geopandas` loads all data into the driver's memory. "
"It should only be used if the resulting geopandas Series is expected to be small."
"It should only be used if the resulting geopandas GeoSeries is expected to be small."
)
return self._to_geopandas()

Expand All @@ -248,12 +432,10 @@ def _to_geopandas(self) -> gpd.GeoSeries:
return gpd.GeoSeries(pd_series)

def to_spark_pandas(self) -> pspd.Series:
"""
Convert the GeoSeries to a Spark pandas Series.
Returns:
- pyspark.pandas.Series: A Spark pandas Series containing the geometries in WKB format.
"""
return pspd.Series(self._to_internal_pandas())
return pspd.Series(self._psdf._to_internal_pandas())

def _to_spark_pandas_df(self) -> pspd.DataFrame:
return pspd.DataFrame(self._psdf._internal)

@property
def geometry(self) -> "GeoSeries":
Expand Down Expand Up @@ -318,16 +500,6 @@ def area(self) -> pspd.Series:
"""
return self._process_geometry_column("ST_Area", rename="area").to_spark_pandas()

@property
def crs(self):
# Implementation of the abstract method
raise NotImplementedError("This method is not implemented yet.")

@crs.setter
def crs(self, value):
# Implementation of the abstract method
raise NotImplementedError("This method is not implemented yet.")

@property
def geom_type(self):
# Implementation of the abstract method
Expand Down Expand Up @@ -755,7 +927,7 @@ def clip(self, mask, keep_geom_type: bool = False, sort=False) -> "GeoSeries":
# # Utils
# -----------------------------------------------------------------------------

def get_first_geometry_column(self):
def get_first_geometry_column(self) -> Union[str, None]:
first_binary_or_geometry_col = next(
(
field.name
Expand Down
21 changes: 21 additions & 0 deletions python/tests/geopandas/test_geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.

import pytest
import pandas as pd
import geopandas as gpd
import sedona.geopandas as sgpd
Expand Down Expand Up @@ -286,3 +287,23 @@ def test_contains(self):

def test_contains_properly(self):
pass

def test_set_crs(self):
geo_series = sgpd.GeoSeries(self.geoseries)
assert geo_series.crs == None
geo_series = geo_series.set_crs(epsg=4326)
assert geo_series.crs.to_epsg() == 4326

with pytest.raises(ValueError):
geo_series.set_crs(4328)
with pytest.raises(ValueError):
geo_series.crs = None

geo_series = geo_series.set_crs(None, allow_override=True)
assert geo_series.crs == None

geo_series.set_crs(4326, inplace=True)
assert geo_series.crs.to_epsg() == 4326

geo_series = sgpd.GeoSeries(self.geoseries, crs=4326)
assert geo_series.crs.to_epsg() == 4326
14 changes: 14 additions & 0 deletions python/tests/geopandas/test_match_geopandas_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,20 @@ def test_contains(self):
def test_contains_properly(self):
pass

def test_set_crs(self):
for _, geom in self.geoms:
sgpd_series = GeoSeries(geom)
gpd_series = gpd.GeoSeries(geom)
assert sgpd_series.crs == gpd_series.crs

sgpd_series = sgpd_series.set_crs(epsg=4326)
gpd_series = gpd_series.set_crs(epsg=4326)
assert sgpd_series.crs == gpd_series.crs

sgpd_series = sgpd_series.set_crs(epsg=3857, allow_override=True)
gpd_series = gpd_series.set_crs(epsg=3857, allow_override=True)
assert sgpd_series.crs == gpd_series.crs

# -----------------------------------------------------------------------------
# # Utils
# -----------------------------------------------------------------------------
Expand Down
Loading