From a89cb0132ded6ccc476e6077adc475c998f8d9f9 Mon Sep 17 00:00:00 2001
From: Peter Nguyen <petern0408@gmail.com>
Date: Wed, 25 Jun 2025 10:45:51 -0700
Subject: [PATCH 1/8] Fix small constructor bug

---
 python/sedona/geopandas/geodataframe.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/sedona/geopandas/geodataframe.py b/python/sedona/geopandas/geodataframe.py
index a3b1db624c0..e5ac7605bfa 100644
--- a/python/sedona/geopandas/geodataframe.py
+++ b/python/sedona/geopandas/geodataframe.py
@@ -148,7 +148,9 @@ def __init__(
             assert dtype is None
             assert not copy
             if index is None:
-                internal = InternalFrame(spark_frame=data._internal.spark_frame)
+                internal = InternalFrame(
+                    spark_frame=data._internal.spark_frame, index_spark_columns=None
+                )
                 object.__setattr__(self, "_internal_frame", internal)
         elif isinstance(data, SparkDataFrame):
             assert columns is None

From b971e740097e12f55f166a48d336c34f9b919033 Mon Sep 17 00:00:00 2001
From: Peter Nguyen <petern0408@gmail.com>
Date: Wed, 25 Jun 2025 14:59:11 -0700
Subject: [PATCH 2/8] Fix condition for converting to wkb

---
 python/sedona/geopandas/geodataframe.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/sedona/geopandas/geodataframe.py b/python/sedona/geopandas/geodataframe.py
index e5ac7605bfa..9e59c96e9e3 100644
--- a/python/sedona/geopandas/geodataframe.py
+++ b/python/sedona/geopandas/geodataframe.py
@@ -175,8 +175,10 @@ def __init__(
                 )
             gdf = gpd.GeoDataFrame(df)
             # convert each geometry column to wkb type
+            import shapely
             for col in gdf.columns:
-                if isinstance(gdf[col], gpd.GeoSeries):
+                # It's possible we get a list, dict, pd.Series, gpd.GeoSeries, etc of shapely.Geometry objects.
+                if len(gdf[col]) > 0 and isinstance(gdf[col].iloc[0], shapely.Geometry):
                     gdf[col] = gdf[col].apply(lambda geom: geom.wkb)
             pdf = pd.DataFrame(gdf)
             # initialize the parent class pyspark Dataframe with the pandas Series

From a75476d93ddf39512b1f3506e243f1805a960b56 Mon Sep 17 00:00:00 2001
From: Peter Nguyen <petern0408@gmail.com>
Date: Wed, 25 Jun 2025 15:00:33 -0700
Subject: [PATCH 3/8] Fix constructor to not error on sgpd and pspd inputs

---
 python/sedona/geopandas/geodataframe.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/python/sedona/geopandas/geodataframe.py b/python/sedona/geopandas/geodataframe.py
index 9e59c96e9e3..d5f05ba9e20 100644
--- a/python/sedona/geopandas/geodataframe.py
+++ b/python/sedona/geopandas/geodataframe.py
@@ -141,17 +141,12 @@ def __init__(
         if isinstance(data, (GeoDataFrame, GeoSeries)):
             assert dtype is None
             assert not copy
-            self._anchor = data
-            self._col_label = index
+            super().__init__(data, index=index, dtype=dtype, copy=copy)
         elif isinstance(data, (PandasOnSparkSeries, PandasOnSparkDataFrame)):
             assert columns is None
             assert dtype is None
             assert not copy
-            if index is None:
-                internal = InternalFrame(
-                    spark_frame=data._internal.spark_frame, index_spark_columns=None
-                )
-                object.__setattr__(self, "_internal_frame", internal)
+            super().__init__(data, index=index, dtype=dtype)
         elif isinstance(data, SparkDataFrame):
             assert columns is None
             assert dtype is None

From 8c0b15d2a8ee1863b201b270fcd415a95ba93be1 Mon Sep 17 00:00:00 2001
From: Peter Nguyen <petern0408@gmail.com>
Date: Wed, 25 Jun 2025 15:02:24 -0700
Subject: [PATCH 4/8] Add constructor tests for all input types, including
 non-geometry

---
 python/tests/geopandas/test_geodataframe.py | 67 ++++++++++++++++++---
 1 file changed, 58 insertions(+), 9 deletions(-)

diff --git a/python/tests/geopandas/test_geodataframe.py b/python/tests/geopandas/test_geodataframe.py
index b236581255c..76151499247 100644
--- a/python/tests/geopandas/test_geodataframe.py
+++ b/python/tests/geopandas/test_geodataframe.py
@@ -21,9 +21,14 @@
     Point,
 )
 
-from sedona.geopandas import GeoDataFrame
+from sedona.geopandas import GeoDataFrame, GeoSeries
 from tests.test_base import TestBase
 import pyspark.pandas as ps
+import pandas as pd
+import geopandas as gpd
+import sedona.geopandas as sgpd
+import pytest
+from pandas.testing import assert_frame_equal
 
 
 class TestDataframe(TestBase):
@@ -41,10 +46,54 @@ class TestDataframe(TestBase):
     #
     # def teardown_method(self):
     #     shutil.rmtree(self.tempdir)
-
-    def test_constructor(self):
-        df = GeoDataFrame([Point(x, x) for x in range(3)])
-        check_geodataframe(df)
+    @pytest.mark.parametrize(
+        "obj",
+        [
+            [Point(x, x) for x in range(3)],
+            {"geometry": [Point(x, x) for x in range(3)]},
+            pd.DataFrame([Point(x, x) for x in range(3)]),
+            gpd.GeoDataFrame([Point(x, x) for x in range(3)]),
+            pd.Series([Point(x, x) for x in range(3)]),
+            gpd.GeoSeries([Point(x, x) for x in range(3)]),
+            GeoSeries([Point(x, x) for x in range(3)]),
+            GeoDataFrame([Point(x, x) for x in range(3)]),
+        ],
+    )
+    def test_constructor(self, obj):
+        sgpd_df = GeoDataFrame(obj)
+        check_geodataframe(sgpd_df)
+
+    def test_constructor_pandas_on_spark(self):
+        for obj in [
+            ps.DataFrame([Point(x, x) for x in range(3)]),
+            ps.Series([Point(x, x) for x in range(3)]),
+        ]:
+            sgpd_df = GeoDataFrame(obj)
+            check_geodataframe(sgpd_df)
+
+    @pytest.mark.parametrize(
+        "obj",
+        [
+            [],
+            [0, 1, 2],
+            ["x", "y", "z"],
+            {},
+            {"a": [0, 1, 2], 1: [4, 5, 6]},
+            {"a": ["x", "y", "z"], 1: ["a", "b", "c"]},
+            pd.Series([0, 1, 2]),
+            pd.Series(["x", "y", "z"]),
+            pd.DataFrame({"x": ["x", "y", "z"]}),
+            gpd.GeoDataFrame({"x": [0, 1, 2]}),
+            ps.DataFrame({"x": ["x", "y", "z"]}),
+        ],
+    )
+    def test_non_geometry(self, obj):
+        pd_df = pd.DataFrame(obj)
+        # pd.DataFrame(obj) doesn't work correctly for pandas on spark DataFrame type, so we use to_pandas() method instead.
+        if isinstance(obj, ps.DataFrame):
+            pd_df = obj.to_pandas()
+        sgpd_df = sgpd.GeoDataFrame(obj)
+        assert_frame_equal(pd_df, sgpd_df.to_pandas())
 
     def test_psdf(self):
         # this is to make sure the spark session works with pandas on spark api
@@ -73,7 +122,7 @@ def test_type_single_geometry_column(self):
 
         # Assert the geometry column has the correct type and is not nullable
         geometry_field = schema["geometry1"]
-        assert geometry_field.dataType.typeName() == "geometrytype"
+        assert geometry_field.dataType.typeName() == "geometrytype" or geometry_field.dataType.typeName() == "binary"
         assert not geometry_field.nullable
 
         # Assert non-geometry columns are present with correct types
@@ -97,16 +146,16 @@ def test_type_multiple_geometry_columns(self):
         schema = df._internal.spark_frame.schema
         # Assert both geometry columns have the correct type
         geometry_field1 = schema["geometry1"]
-        assert geometry_field1.dataType.typeName() == "geometrytype"
+        assert geometry_field1.dataType.typeName() == "geometrytype" or geometry_field1.dataType.typeName() == "binary"
         assert not geometry_field1.nullable
 
         geometry_field2 = schema["geometry2"]
-        assert geometry_field2.dataType.typeName() == "geometrytype"
+        assert geometry_field2.dataType.typeName() == "geometrytype" or geometry_field2.dataType.typeName() == "binary"
         assert not geometry_field2.nullable
 
         # Check non-geometry column
         attribute_field = schema["attribute"]
-        assert attribute_field.dataType.typeName() != "geometrytype"
+        assert attribute_field.dataType.typeName() != "geometrytype" and attribute_field.dataType.typeName() != "binary"
 
     def test_copy(self):
         df = GeoDataFrame([Point(x, x) for x in range(3)], name="test_df")

From 72b4967c32ce97714eafb0858555f754c328f9c5 Mon Sep 17 00:00:00 2001
From: Peter Nguyen <petern0408@gmail.com>
Date: Wed, 25 Jun 2025 15:03:42 -0700
Subject: [PATCH 5/8] pre-commit reformat

---
 python/sedona/geopandas/geodataframe.py     |  1 +
 python/tests/geopandas/test_geodataframe.py | 20 ++++++++++++++++----
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/python/sedona/geopandas/geodataframe.py b/python/sedona/geopandas/geodataframe.py
index d5f05ba9e20..02a519cd5db 100644
--- a/python/sedona/geopandas/geodataframe.py
+++ b/python/sedona/geopandas/geodataframe.py
@@ -171,6 +171,7 @@ def __init__(
             gdf = gpd.GeoDataFrame(df)
             # convert each geometry column to wkb type
             import shapely
+
             for col in gdf.columns:
                 # It's possible we get a list, dict, pd.Series, gpd.GeoSeries, etc of shapely.Geometry objects.
                 if len(gdf[col]) > 0 and isinstance(gdf[col].iloc[0], shapely.Geometry):
diff --git a/python/tests/geopandas/test_geodataframe.py b/python/tests/geopandas/test_geodataframe.py
index 76151499247..3988ac4355a 100644
--- a/python/tests/geopandas/test_geodataframe.py
+++ b/python/tests/geopandas/test_geodataframe.py
@@ -122,7 +122,10 @@ def test_type_single_geometry_column(self):
 
         # Assert the geometry column has the correct type and is not nullable
         geometry_field = schema["geometry1"]
-        assert geometry_field.dataType.typeName() == "geometrytype" or geometry_field.dataType.typeName() == "binary"
+        assert (
+            geometry_field.dataType.typeName() == "geometrytype"
+            or geometry_field.dataType.typeName() == "binary"
+        )
         assert not geometry_field.nullable
 
         # Assert non-geometry columns are present with correct types
@@ -146,16 +149,25 @@ def test_type_multiple_geometry_columns(self):
         schema = df._internal.spark_frame.schema
         # Assert both geometry columns have the correct type
         geometry_field1 = schema["geometry1"]
-        assert geometry_field1.dataType.typeName() == "geometrytype" or geometry_field1.dataType.typeName() == "binary"
+        assert (
+            geometry_field1.dataType.typeName() == "geometrytype"
+            or geometry_field1.dataType.typeName() == "binary"
+        )
         assert not geometry_field1.nullable
 
         geometry_field2 = schema["geometry2"]
-        assert geometry_field2.dataType.typeName() == "geometrytype" or geometry_field2.dataType.typeName() == "binary"
+        assert (
+            geometry_field2.dataType.typeName() == "geometrytype"
+            or geometry_field2.dataType.typeName() == "binary"
+        )
         assert not geometry_field2.nullable
 
         # Check non-geometry column
         attribute_field = schema["attribute"]
-        assert attribute_field.dataType.typeName() != "geometrytype" and attribute_field.dataType.typeName() != "binary"
+        assert (
+            attribute_field.dataType.typeName() != "geometrytype"
+            and attribute_field.dataType.typeName() != "binary"
+        )
 
     def test_copy(self):
         df = GeoDataFrame([Point(x, x) for x in range(3)], name="test_df")

From 3e243f36da9dce452d7948ab63e455522bf06c8c Mon Sep 17 00:00:00 2001
From: Peter Nguyen <petern0408@gmail.com>
Date: Wed, 25 Jun 2025 15:24:38 -0700
Subject: [PATCH 6/8] Change to BaseGeometry for shapely compatibilty

---
 python/sedona/geopandas/geodataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/sedona/geopandas/geodataframe.py b/python/sedona/geopandas/geodataframe.py
index 02a519cd5db..28e30278ee8 100644
--- a/python/sedona/geopandas/geodataframe.py
+++ b/python/sedona/geopandas/geodataframe.py
@@ -174,7 +174,7 @@ def __init__(
 
             for col in gdf.columns:
                 # It's possible we get a list, dict, pd.Series, gpd.GeoSeries, etc of shapely.Geometry objects.
-                if len(gdf[col]) > 0 and isinstance(gdf[col].iloc[0], shapely.Geometry):
+                if len(gdf[col]) > 0 and isinstance(gdf[col].iloc[0], shapely.geometry.base.BaseGeometry):
                     gdf[col] = gdf[col].apply(lambda geom: geom.wkb)
             pdf = pd.DataFrame(gdf)
             # initialize the parent class pyspark Dataframe with the pandas Series

From aed31262b48f79024a66b1584799eb42de6e5c52 Mon Sep 17 00:00:00 2001
From: Peter Nguyen <petern0408@gmail.com>
Date: Wed, 25 Jun 2025 15:33:28 -0700
Subject: [PATCH 7/8] pre-commit fmt

---
 python/sedona/geopandas/geodataframe.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/sedona/geopandas/geodataframe.py b/python/sedona/geopandas/geodataframe.py
index 28e30278ee8..a2f90dff4b1 100644
--- a/python/sedona/geopandas/geodataframe.py
+++ b/python/sedona/geopandas/geodataframe.py
@@ -174,7 +174,9 @@ def __init__(
 
             for col in gdf.columns:
                 # It's possible we get a list, dict, pd.Series, gpd.GeoSeries, etc of shapely.Geometry objects.
-                if len(gdf[col]) > 0 and isinstance(gdf[col].iloc[0], shapely.geometry.base.BaseGeometry):
+                if len(gdf[col]) > 0 and isinstance(
+                    gdf[col].iloc[0], shapely.geometry.base.BaseGeometry
+                ):
                     gdf[col] = gdf[col].apply(lambda geom: geom.wkb)
             pdf = pd.DataFrame(gdf)
             # initialize the parent class pyspark Dataframe with the pandas Series

From ceb9a2aa2f688c0d8471484eac505ca77cb4d7c2 Mon Sep 17 00:00:00 2001
From: Peter Nguyen <petern0408@gmail.com>
Date: Wed, 25 Jun 2025 16:07:56 -0700
Subject: [PATCH 8/8] Remove empty lst and dct test cases since diff spark
 versions handle differently

---
 python/tests/geopandas/test_geodataframe.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/tests/geopandas/test_geodataframe.py b/python/tests/geopandas/test_geodataframe.py
index 3988ac4355a..33e0041dc20 100644
--- a/python/tests/geopandas/test_geodataframe.py
+++ b/python/tests/geopandas/test_geodataframe.py
@@ -74,10 +74,8 @@ def test_constructor_pandas_on_spark(self):
     @pytest.mark.parametrize(
         "obj",
         [
-            [],
             [0, 1, 2],
             ["x", "y", "z"],
-            {},
             {"a": [0, 1, 2], 1: [4, 5, 6]},
             {"a": ["x", "y", "z"], 1: ["a", "b", "c"]},
             pd.Series([0, 1, 2]),