From ab8e05c8270331bf7c191051ad5d1c1601adfd0c Mon Sep 17 00:00:00 2001
From: sfc-gh-mvashishtha <mahesh.vashishtha@snowflake.com>
Date: Thu, 25 Sep 2025 03:46:13 +0800
Subject: [PATCH 1/3] SNOW-2361885: Implement DataFrameGroupby.__len__()

Signed-off-by: sfc-gh-mvashishtha <mahesh.vashishtha@snowflake.com>
---
 CHANGELOG.md                                  |  1 +
 .../modin/supported/groupby_supported.rst     |  2 +
 .../extensions/dataframe_groupby_overrides.py |  4 +-
 ...ups.py => test_groupby_ngroups___len__.py} | 59 +++++++++++--------
 4 files changed, 37 insertions(+), 29 deletions(-)
 rename tests/integ/modin/groupby/{test_groupby_ngroups.py => test_groupby_ngroups___len__.py} (67%)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b0740db7fd..3eb6af0a3b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -61,6 +61,7 @@
 
 #### New Features
 - Added support for `DataFrame.query` for dataframes with single-level indexes.
+- Added support for `DataFrameGroupby.__len__` and `SeriesGroupBy.__len__`.
 
 #### Improvements
 
diff --git a/docs/source/modin/supported/groupby_supported.rst b/docs/source/modin/supported/groupby_supported.rst
index 12352ac984..fdcb311157 100644
--- a/docs/source/modin/supported/groupby_supported.rst
+++ b/docs/source/modin/supported/groupby_supported.rst
@@ -121,6 +121,8 @@ Computations/descriptive stats
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``last``                    | P                               | Does not support ``min_count`` parameter           |
 +-----------------------------+---------------------------------+----------------------------------------------------+
+| ``__len__``                 | Y                               |                                                    |
++-----------------------------+---------------------------------+----------------------------------------------------+
 | ``max``                     | Y                               | See ``count``                                      |
 +-----------------------------+---------------------------------+----------------------------------------------------+
 | ``mean``                    | Y                               | See ``count``                                      |
diff --git a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_groupby_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_groupby_overrides.py
index 5e43baa65b..de31269a1c 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_groupby_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_groupby_overrides.py
@@ -1230,10 +1230,8 @@ def __getitem__(self, key):
         )
 
 
-@register_df_groupby_override("__len__")
 def __len__(self):
-    # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.DataFrameGroupBy functions
-    ErrorMessage.method_not_implemented_error(name="__len__", class_="GroupBy")
+    return self.ngroups
 
 
 # expanding and rolling are unique cases and need to likely be handled
diff --git a/tests/integ/modin/groupby/test_groupby_ngroups.py b/tests/integ/modin/groupby/test_groupby_ngroups___len__.py
similarity index 67%
rename from tests/integ/modin/groupby/test_groupby_ngroups.py
rename to tests/integ/modin/groupby/test_groupby_ngroups___len__.py
index e1684bd851..a9d710081c 100644
--- a/tests/integ/modin/groupby/test_groupby_ngroups.py
+++ b/tests/integ/modin/groupby/test_groupby_ngroups___len__.py
@@ -6,32 +6,39 @@
 import numpy as np
 import pandas as native_pd
 import pytest
+from pytest import param
 
 import snowflake.snowpark.modin.plugin  # noqa: F401
 from tests.integ.modin.utils import eval_snowpark_pandas_result
 from tests.integ.utils.sql_counter import sql_count_checker
 
 
-def assert_ngroups_equal(snow_res, pd_res):
-    assert snow_res.ngroups == pd_res.ngroups
+@pytest.fixture(
+    params=(
+        param(lambda groupby: groupby.ngroups, id="ngroups"),
+        param(lambda groupby: len(groupby), id="len"),
+    )
+)
+def count_function(request):
+    return request.param
 
 
 @pytest.mark.parametrize("by", ["a", "b", ["a", "b"]])
 @sql_count_checker(query_count=1)
-def test_groupby_sort_multiindex_series(native_series_multi_numeric, by):
+def test_series_with_multiindex(native_series_multi_numeric, by, count_function):
 
     snow_ser = pd.Series(native_series_multi_numeric)
     native_ser = native_series_multi_numeric
     eval_snowpark_pandas_result(
         snow_ser,
         native_ser,
-        lambda ser: ser.groupby(by=by),
-        comparator=assert_ngroups_equal,
+        lambda ser: count_function(ser.groupby(by=by)),
+        comparator=int.__eq__,
     )
 
 
 @sql_count_checker(query_count=1)
-def test_groupby_ngroups_series_nan():
+def test_series_with_nan(count_function):
     index = native_pd.Index(["a", "b", "b", "a"])
     index.names = ["grp_col"]
     native_ser = native_pd.Series([390.0, 350.0, np.nan, 20.0], index=index)
@@ -39,13 +46,13 @@ def test_groupby_ngroups_series_nan():
     eval_snowpark_pandas_result(
         snow_ser,
         native_ser,
-        lambda ser: ser.groupby(by="grp_col"),
-        comparator=assert_ngroups_equal,
+        lambda ser: count_function(ser.groupby(by="grp_col")),
+        comparator=int.__eq__,
     )
 
 
 @sql_count_checker(query_count=1)
-def test_groupby_ngroups_series_nan_all():
+def test_all_nan_series(count_function):
     index = native_pd.Index(["a", "b", "b", "a"])
     index.names = ["grp_col"]
 
@@ -56,13 +63,13 @@ def test_groupby_ngroups_series_nan_all():
     eval_snowpark_pandas_result(
         snow_ser,
         native_ser,
-        lambda ser: ser.groupby(by="grp_col"),
-        comparator=assert_ngroups_equal,
+        lambda ser: count_function(ser.groupby(by="grp_col")),
+        comparator=int.__eq__,
     )
 
 
 @sql_count_checker(query_count=1)
-def test_groupby_ngroups_series():
+def test_series_with_single_level_index(count_function):
     index = native_pd.Index(["a", "b", "b", "a"])
     index.names = ["grp_col"]
 
@@ -75,28 +82,28 @@ def test_groupby_ngroups_series():
     eval_snowpark_pandas_result(
         snow_ser,
         native_ser,
-        lambda ser: ser.groupby(by="grp_col"),
-        comparator=assert_ngroups_equal,
+        lambda ser: count_function(ser.groupby(by="grp_col")),
+        comparator=int.__eq__,
     )
 
 
 @pytest.mark.parametrize("by", ["A", ["A", "B"]])
 @sql_count_checker(query_count=1)
-def test_groupby_ngroups(by):
+def test_df(by, count_function):
     native_df = native_pd.DataFrame({"A": list("aabbcccd"), "B": list("xxxxabcx")})
     snow_df = pd.DataFrame(native_df)
 
     eval_snowpark_pandas_result(
         snow_df,
         native_df,
-        lambda df: df.groupby(by=by),
-        comparator=assert_ngroups_equal,
+        lambda df: count_function(df.groupby(by=by)),
+        comparator=int.__eq__,
     )
 
 
 @pytest.mark.parametrize("by", ["c1", ["c1", "c2"], ["c1", "c2", "c1"]])
 @sql_count_checker(query_count=1)
-def test_groupby_ngroups_nan(by):
+def test_df_with_nan(by, count_function):
     native_df = native_pd.DataFrame(
         {
             "c1": [np.nan, 3, 4, 4, "b"],
@@ -109,22 +116,22 @@ def test_groupby_ngroups_nan(by):
     eval_snowpark_pandas_result(
         snow_df,
         native_df,
-        lambda df: df.groupby(by=by),
-        comparator=assert_ngroups_equal,
+        lambda df: count_function(df.groupby(by=by)),
+        comparator=int.__eq__,
     )
 
 
 @pytest.mark.parametrize("by", ["A", ["A", "B"]])
 @sql_count_checker(query_count=1)
-def test_groupby_ngroups_empty_cols(by):
+def test_df_with_0_rows(by, count_function):
     native_df = native_pd.DataFrame({"A": [], "B": []})
     snow_df = pd.DataFrame({"A": [], "B": []})
 
     eval_snowpark_pandas_result(
         snow_df,
         native_df,
-        lambda df: df.groupby(by=by),
-        comparator=assert_ngroups_equal,
+        lambda df: count_function(df.groupby(by=by)),
+        comparator=int.__eq__,
     )
 
 
@@ -133,10 +140,10 @@ def test_groupby_ngroups_empty_cols(by):
     "level", [0, "B", [1, 1], [1, 0], ["A", "B"], [0, "A"], [-1, 0]]
 )
 @sql_count_checker(query_count=2)
-def test_groupby_ngroups_multiindex(df_multi, level):
+def test_df_with_multiindex(df_multi, level, count_function):
     eval_snowpark_pandas_result(
         df_multi,
         df_multi.to_pandas(),
-        lambda df: df.groupby(level=level),
-        comparator=assert_ngroups_equal,
+        lambda df: count_function(df.groupby(level=level)),
+        comparator=int.__eq__,
     )

From cb58e79b430de07cd38d420d6ba229900df895e1 Mon Sep 17 00:00:00 2001
From: sfc-gh-mvashishtha <mahesh.vashishtha@snowflake.com>
Date: Thu, 25 Sep 2025 05:22:08 +0800
Subject: [PATCH 2/3] SNOW-2361885: Implement DataFrameGroupby.__len__()

Signed-off-by: sfc-gh-mvashishtha <mahesh.vashishtha@snowflake.com>
---
 tests/unit/modin/test_groupby_unsupported.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/unit/modin/test_groupby_unsupported.py b/tests/unit/modin/test_groupby_unsupported.py
index 42c70e1a28..f03f9163f0 100644
--- a/tests/unit/modin/test_groupby_unsupported.py
+++ b/tests/unit/modin/test_groupby_unsupported.py
@@ -8,7 +8,6 @@
 @pytest.mark.parametrize(
     "func, func_name",
     [
-        (lambda se: se.groupby("A").__len__(), "__len__"),
         (lambda se: se.groupby("A").__bytes__(), "__bytes__"),
         (lambda se: se.groupby("A").corrwith, "corrwith"),
         (lambda se: se.groupby("A").dtypes, "dtypes"),
@@ -49,7 +48,6 @@ def test_series_groupby_unsupported_methods_raises(
 @pytest.mark.parametrize(
     "func, func_name",
     [
-        (lambda df: df.groupby("A").__len__(), "__len__"),
         (lambda df: df.groupby("A").__bytes__(), "__bytes__"),
         (lambda df: df.groupby("A").corrwith, "corrwith"),
         (lambda df: df.groupby("A").dtypes, "dtypes"),

From 0f3bc1347bfb503b4de2a50911859ed3a6e01df5 Mon Sep 17 00:00:00 2001
From: sfc-gh-mvashishtha <mahesh.vashishtha@snowflake.com>
Date: Thu, 25 Sep 2025 09:32:47 +0800
Subject: [PATCH 3/3] Really do the override so we don't rely on
 groupby.indices

Signed-off-by: sfc-gh-mvashishtha <mahesh.vashishtha@snowflake.com>
---
 .../modin/plugin/extensions/dataframe_groupby_overrides.py       | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_groupby_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_groupby_overrides.py
index de31269a1c..a97d391332 100644
--- a/src/snowflake/snowpark/modin/plugin/extensions/dataframe_groupby_overrides.py
+++ b/src/snowflake/snowpark/modin/plugin/extensions/dataframe_groupby_overrides.py
@@ -1230,6 +1230,7 @@ def __getitem__(self, key):
         )
 
 
+@register_dataframe_groupby_accessor("__len__")
 def __len__(self):
     return self.ngroups