apache · jiayuasu · Jul 17, 2025 · Jun 4, 2025 · Jun 5, 2025 · Jun 5, 2025
@@ -135,3 +135,117 @@ names in parentheses are python variable names
 - useSpheroid (use_spheroid) - whether to use a cartesian or spheroidal distance calculation. Default is false
 
 In both cases the output is the input DataFrame with the weights column added to each row.
+
+## Moran I
+
+Moran I is the spatial autocorrelation algorithm, which is using spatial
+location and non-spatial attribute. When the value is close to the 1 it
+means that there is spatial correlation, when it is close to 0 then the
+correlation does not exist and data is randomly distributed. When the
+MoranI autocorrelation value is close to -1 it means that there is negative
+correlation. Negative correlation means that close values has dissimilar values.
+
+You can see spatial correlation values on the figure below
+
+- on the left there is negative correlation (-1)
+- in the middle correlation is positive (1)
+- on the right the correlation is close to zero and data is random.
+
+![moranI.png](../../image/moranI.png)
+
+Moran statistics can be used as the Scala/Java and Python functions.
+As the input function requires weight DataFrame. You can create the
+weight DataFrame using Apache Sedona weighting functions. You need
+to keep in mind that your input has to have id column that uniquely identifies
+the feature and value field. The required minimal schema for the MoranI Apache Sedona
+function is:
+
+```
+ |-- id: integer (nullable = true)
+ |-- value: double (nullable = true)
+ |-- weights: array (nullable = false)
+ |    |-- element: struct (containsNull = false)
+ |    |    |-- neighbor: struct (nullable = false)
+ |    |    |    |-- id: integer (nullable = true)
+ |    |    |    |-- value: double (nullable = true)
+ |    |    |-- value: double (nullable = true)
+```
+
+You can manipulate the value column name and id using function parameters.
+
+To use the [Apache Sedona weight functions](#adddistancebandcolumn) you need to pass the id column and value column to kept parameters.
+
+=== "Scala"
+
+    ```scala
+    val weights = Weighting.addDistanceBandColumn(
+          positiveCorrelationFrame,
+          1.0,
+          savedAttributes = Seq("id", "value")
+    )
+
+    val moranResult = Moran.getGlobal(weights, idColumn = "id")
+
+    // result fields
+    moranResult.getPNorm
+    moranResult.getI
+    moranResult.getZNorm
+    ```
+
+=== "Python"
+
+    ```python
+    from sedona.spark.stats.autocorrelation.moran import Moran
+    from sedona.spark.stats.weighting import add_binary_distance_band_column
+
+    result = add_binary_distance_band_column(
+        df,
+        1.0,
+        saved_attributes=["id", "value"]
+    )
+
+    moran_i_result = Moran.get_global(result)
+
+    ## result fields
+    moran_i_result.p_norm
+    moran_i_result.i
+    moran_i_result.z_norm
+    ```
+
+In the result you get the Z norm, P norm and Moran I value.
+
+The full signatures of the functions
+
+=== "Scala"
+
+    ```scala
+    def getGlobal(
+      dataframe: DataFrame,
+      twoTailed: Boolean = true,
+      idColumn: String = ID_COLUMN,
+      valueColumnName: String = VALUE_COLUMN): MoranResult
+
+    // java interface
+    public interface MoranResult {
+        public double getI();
+        public double getPNorm();
+        public double getZNorm();
+    }
+    ```
+
+=== "Python"
+
+    ```python
+    def get_global(
+        df: DataFrame,
+        two_tailed: bool = True,
+        id_column: str = "id",
+        value_column: str = "value",
+    ) -> MoranResult
+
+    @dataclass
+    class MoranResult:
+        i: float
+        p_norm: float
+        z_norm: float
+    ```
@@ -65,6 +65,7 @@ class SedonaJvmLib(Enum):
     st_predicates = "org.apache.spark.sql.sedona_sql.expressions.st_predicates"
     st_aggregates = "org.apache.spark.sql.sedona_sql.expressions.st_aggregates"
     SedonaContext = "org.apache.sedona.spark.SedonaContext"
+    Moran = "org.apache.sedona.stats.autocorrelation.Moran"
 
     @classmethod
     def from_str(cls, geo_lib: str) -> "SedonaJvmLib":

@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from dataclasses import dataclass
+
+from pyspark.sql import DataFrame
+from pyspark.sql import SparkSession
+
+
+@dataclass
+class MoranResult:
+    i: float
+    p_norm: float
+    z_norm: float
+
+
+class Moran:
+
+    @staticmethod
+    def get_global(
+        df: DataFrame,
+        two_tailed: bool = True,
+        id_column: str = "id",
+        value_column: str = "value",
+    ) -> MoranResult:
+        sedona = SparkSession.getActiveSession()
+
+        _jvm = sedona._jvm
+        moran_result = (
+            sedona._jvm.org.apache.sedona.stats.autocorrelation.Moran.getGlobal(
+                df._jdf, two_tailed, id_column, value_column
+            )
+        )
+
+        return MoranResult(
+            i=moran_result.getI(),
+            p_norm=moran_result.getPNorm(),
+            z_norm=moran_result.getZNorm(),
+        )
@@ -21,7 +21,7 @@
 Geographical Analysis, 24(3), 189-206. https://doi.org/10.1111/j.1538-4632.1992.tb00261.x
 """
 
-from pyspark.sql import Column, DataFrame, SparkSession
+from pyspark.sql import DataFrame, SparkSession
 
 # todo change weights and x type to string
 
@@ -59,7 +59,7 @@ def g_local(
     sedona = SparkSession.getActiveSession()
 
     result_df = sedona._jvm.org.apache.sedona.stats.hotspotDetection.GetisOrd.gLocal(
-        dataframe, x, weights, permutations, star, island_weight
+        dataframe._jdf, x, weights, permutations, star, island_weight
     )
 
     return DataFrame(result_df, sedona)
@@ -60,18 +60,21 @@ def add_distance_band_column(
 
     """
     sedona = SparkSession.getActiveSession()
-    return sedona._jvm.org.apache.sedona.stats.Weighting.addDistanceBandColumn(
-        dataframe._jdf,
-        float(threshold),
-        binary,
-        float(alpha),
-        include_zero_distance_neighbors,
-        include_self,
-        float(self_weight),
-        geometry,
-        use_spheroid,
-        saved_attributes,
-        result_name,
+    return DataFrame(
+        sedona._jvm.org.apache.sedona.stats.Weighting.addDistanceBandColumnPython(
+            dataframe._jdf,
+            float(threshold),
+            binary,
+            float(alpha),
+            include_zero_distance_neighbors,
+            include_self,
+            float(self_weight),
+            geometry,
+            use_spheroid,
+            saved_attributes,
+            result_name,
+        ),
+        sedona,
     )
 
 
@@ -110,15 +113,21 @@ def add_binary_distance_band_column(
     """
     sedona = SparkSession.getActiveSession()
 
-    return sedona._jvm.org.apache.sedona.stats.Weighting.addBinaryDistanceBandColumn(
-        dataframe._jdf,
-        float(threshold),
-        include_zero_distance_neighbors,
-        include_self,
-        geometry,
-        use_spheroid,
-        saved_attributes,
-        result_name,
+    return DataFrame(
+        sedona._jvm.org.apache.sedona.stats.Weighting.addDistanceBandColumnPython(
+            dataframe._jdf,
+            float(threshold),
+            True,
+            float(-1.0),
+            include_zero_distance_neighbors,
+            include_self,
+            float(1.0),
+            geometry,
+            use_spheroid,
+            saved_attributes,
+            result_name,
+        ),
+        sedona,
     )
 
 
@@ -161,15 +170,19 @@ def add_weighted_distance_band_column(
     """
     sedona = SparkSession.getActiveSession()
 
-    return sedona._jvm.org.apache.sedona.stats.Weighting.addBinaryDistanceBandColumn(
-        dataframe._jdf,
-        float(threshold),
-        float(alpha),
-        include_zero_distance_neighbors,
-        include_self,
-        float(self_weight),
-        geometry,
-        use_spheroid,
-        saved_attributes,
-        result_name,
+    return DataFrame(
+        sedona._jvm.org.apache.sedona.stats.Weighting.addDistanceBandColumnPython(
+            dataframe._jdf,
+            float(threshold),
+            False,
+            alpha,
+            include_zero_distance_neighbors,
+            include_self,
+            self_weight,
+            geometry,
+            use_spheroid,
+            saved_attributes,
+            result_name,
+        ),
+        sedona,
     )