Kotlin · Allex-Nik · May 27, 2026 · May 27, 2026 · May 27, 2026 · May 27, 2026
diff --git a/core/api/core.api b/core/api/core.api
@@ -1714,6 +1714,10 @@ public final class org/jetbrains/kotlinx/dataframe/api/CountDistinctKt {
 	public static final fun countDistinct (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Ljava/lang/String;)I
 	public static final fun countDistinct (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Lkotlin/reflect/KProperty;)I
 	public static final fun countDistinct (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;)I
+	public static final fun countDistinct (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
+	public static final fun countDistinct (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;Ljava/lang/String;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
+	public static synthetic fun countDistinct$default (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;Ljava/lang/String;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
+	public static synthetic fun countDistinct$default (Lorg/jetbrains/kotlinx/dataframe/api/Grouped;Ljava/lang/String;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
 }
 
 public final class org/jetbrains/kotlinx/dataframe/api/CountKt {

diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt
@@ -4,10 +4,18 @@ import org.jetbrains.kotlinx.dataframe.AnyColumnReference
 import org.jetbrains.kotlinx.dataframe.ColumnsSelector
 import org.jetbrains.kotlinx.dataframe.DataFrame
 import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
+import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
+import org.jetbrains.kotlinx.dataframe.annotations.Refine
+import org.jetbrains.kotlinx.dataframe.api.CountDistinctOnGroupByDocs.COLUMNS_PARAM
+import org.jetbrains.kotlinx.dataframe.api.CountDistinctOnGroupByDocs.COLUMN_SELECTION_DSL
+import org.jetbrains.kotlinx.dataframe.api.CountDistinctOnGroupByDocs.COMPARISON_OBJECT
+import org.jetbrains.kotlinx.dataframe.api.CountDistinctOnGroupByDocs.EXAMPLE
+import org.jetbrains.kotlinx.dataframe.api.CountDistinctOnGroupByDocs.SCOPE
 import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
 import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
 import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
 import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
+import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateValue
 import org.jetbrains.kotlinx.dataframe.indices
 import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
 import kotlin.reflect.KProperty
@@ -93,3 +101,81 @@ public fun <T> DataFrame<T>.countDistinct(vararg columns: AnyColumnReference): I
     countDistinct { columns.toColumnSet() }
 
 // endregion
+
+// region GroupBy
+
+/**
+ * Aggregates this [GroupBy] by counting the number of distinct {@get [COMPARISON_OBJECT] rows} in each group.
+ *
+ * Compares rows in each group based on the values in {@get [SCOPE] all} columns.
+ * Returns a new [DataFrame] where each row corresponds to a group.
+ * The resulting [DataFrame] contains:
+ * - the original group key columns,
+ * - a new column (named [resultName\], default is `"countDistinct"`)
+ * that contains the number of distinct {@get [COMPARISON_OBJECT] rows} in each group.
+ *
+ * See also:
+ * - [aggregate][Grouped.aggregate], which aggregates a [GroupBy] using the provided statistics.
+ * - [count][Grouped.count], which counts the number of rows in each group.
+ * - [distinct][DataFrame.distinct], which removes duplicate rows and returns a new [DataFrame].
+ * - [groupBy][DataFrame.groupBy], which groups the rows of a [DataFrame]
+ * based on the values in one or more specified cols.
+ *
+ * For more information: {@include [DocumentationUrls.CountDistinct]}
+ *
+ * {@get [COLUMN_SELECTION_DSL]}
+ *
+ * ### Example
+ * ```kotlin
+ * {@get [EXAMPLE]}
+ * ```
+ *
+ * @param [resultName\] The name of the result column that will store the number
+ * of distinct {@get [COMPARISON_OBJECT] rows} in each group. Defaults to `"countDistinct"`.
+ * @get [COLUMNS_PARAM]
+ * @return A new [DataFrame] with group keys and corresponding numbers of distinct {@get [COMPARISON_OBJECT] rows}.
+ */
+@ExcludeFromSources
+private interface CountDistinctOnGroupByDocs {
+    typealias COMPARISON_OBJECT = Nothing
+    typealias SCOPE = Nothing
+    typealias EXAMPLE = Nothing
+    typealias COLUMN_SELECTION_DSL = Nothing
+    typealias COLUMNS_PARAM = Nothing
+}
+
+/**
+ * @include [CountDistinctOnGroupByDocs]
+ * @set [EXAMPLE]
+ * // Counts the number of distinct rows for each city, returning
+ * // a new DataFrame with columns "city" and "countDistinct"
+ * df.groupBy { city }.countDistinct()
+ */
+@Refine
+@Interpretable("GroupByCountDistinct0")
+public fun <T> Grouped<T>.countDistinct(resultName: String = "countDistinct"): DataFrame<T> =
+    countDistinct(resultName) { all() }
+
+/**
+ * @include [CountDistinctOnGroupByDocs]
+ * @set [COMPARISON_OBJECT] combinations of values in the selected [columns]
+ * @set [SCOPE] the selected
+ * @set [COLUMN_SELECTION_DSL] {@include [SelectingColumns.ColumnsSelectionDsl]}
+ * @set [EXAMPLE]
+ * // Counts unique combinations of values in the "year" and "title" columns
+ * // for each city, returning a new DataFrame with columns "city" and "countDistinct"
+ * df.groupBy { city }.countDistinct { year and title }
+ * @set [COLUMNS_PARAM] @param [columns\] The [ColumnsSelector] used to select columns
+ * that will be considered for evaluating whether the rows are distinct.
+ */
+@Refine
+@Interpretable("GroupByCountDistinct0")
+public fun <T, C> Grouped<T>.countDistinct(
+    resultName: String = "countDistinct",
+    columns: ColumnsSelector<T, C>,
+): DataFrame<T> =
+    aggregateValue(resultName) {
+        countDistinct(columns) default 0
+    }
+
+// endregion
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt
@@ -150,6 +150,9 @@ internal interface GroupByDocs {
      * `| `__`.`__[**`count`**][Grouped.count]**`() `**
      *
      * {@include [Indent]}
+     * `| `__`.`__[**`countDistinct`**][Grouped.countDistinct]**`() `**
+     *
+     * {@include [Indent]}
      * `| `__`.`__[**`aggregate`**][Grouped.aggregate]**`  {  `**`aggregations: `[`AggregateDsl`][AggregateDsl]**` }`**
      *
      * {@include [Indent]}
@@ -190,6 +193,8 @@ internal interface GroupByDocs {
      *
      * * [count][Grouped.count] — calculate the number of rows in each group
      *   (optionally counting only rows that satisfy the given predicate);
+     * * [`countDistinct`][Grouped.countDistinct] — calculate the number of distinct rows in each group
+     *   (or distinct combinations of values in selected columns);
      * * [max][Grouped.max] / [maxOf][Grouped.maxOf] / [maxFor][Grouped.maxFor] —
      *   calculate the maximum of all values on the selected columns / by a row expression /
      *   for each of the selected columns within each group;
@@ -295,6 +300,8 @@ internal interface GroupByDocs {
      *   from all rows of each group for the selected columns.
      * * [count][Grouped.count] — creates a [DataFrame] containing the grouping key columns and an additional column
      *   with the number of rows in each corresponding group;
+     * * [countDistinct][Grouped.countDistinct] — creates a [DataFrame] containing the grouping key columns
+     *   and an additional column with the number of distinct rows in each corresponding group;
      * * [aggregate][Grouped.aggregate] — performs a set of custom aggregations using [AggregateDsl],
      *   allowing you to compute one or more derived values per group;
      * * [Various aggregation statistics][AggregationStatistics] — predefined shortcuts

diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/countDistinct.kt
@@ -0,0 +1,146 @@
+package org.jetbrains.kotlinx.dataframe.api
+
+import io.kotest.matchers.shouldBe
+import org.jetbrains.kotlinx.dataframe.nrow
+import org.junit.Test
+
+class CountDistinctTests {
+
+    private val df = dataFrameOf(
+        "name" to columnOf("Alice", "Alice", "Bob", "Charlie"),
+        "age" to columnOf(15, 15, 20, 25),
+        "group" to columnOf(1, 1, 1, 2),
+    )
+
+    @Test
+    fun `countDistinct on GroupBy`() {
+        val result = df.groupBy("group").countDistinct()
+        val expected = dataFrameOf(
+            "group" to columnOf(1, 2),
+            "countDistinct" to columnOf(2, 1),
+        )
+        result shouldBe expected
+    }
+
+    @Test
+    fun `countDistinct on GroupBy with custom result name`() {
+        val result = df.groupBy("group").countDistinct("unique")
+        val expected = dataFrameOf(
+            "group" to columnOf(1, 2),
+            "unique" to columnOf(2, 1),
+        )
+        result shouldBe expected
+    }
+
+    @Test
+    fun `countDistinct on GroupBy with one unique row`() {
+        val df = dataFrameOf(
+            "name" to columnOf("Alice", "Alice", "Alice"),
+            "age" to columnOf(15, 15, 15),
+            "group" to columnOf(1, 1, 1),
+        )
+        val result = df.groupBy("group").countDistinct()
+        val expected = dataFrameOf(
+            "group" to columnOf(1),
+            "countDistinct" to columnOf(1),
+        )
+        result shouldBe expected
+    }
+
+    // TODO: check columns as well when #1531 is fixed
+    @Test
+    fun `countDistinct on empty GroupBy`() {
+        df
+            .drop(df.nrow)
+            .groupBy("group").countDistinct()
+            .count() shouldBe 0
+    }
+
+    @Test
+    fun `countDistinct on GroupBy with nulls`() {
+        val result = df
+            .append(null, null, 1)
+            .groupBy("group").countDistinct()
+        val expected = dataFrameOf(
+            "group" to columnOf(1, 2),
+            "countDistinct" to columnOf(3, 1),
+        )
+        result shouldBe expected
+    }
+
+    @Test
+    fun `countDistinct on GroupBy with null group key`() {
+        val result = df
+            .append("Dave", 30, null)
+            .groupBy("group").countDistinct()
+        val expected = dataFrameOf(
+            "group" to columnOf(1, 2, null),
+            "countDistinct" to columnOf(2, 1, 1),
+        )
+        result shouldBe expected
+    }
+
+    @Test
+    fun `countDistinct on GroupBy with columns selector`() {
+        val result = df.groupBy("group").countDistinct { "name"<String>() }
+        val expected = dataFrameOf(
+            "group" to columnOf(1, 2),
+            "countDistinct" to columnOf(2, 1),
+        )
+        result shouldBe expected
+    }
+
+    @Test
+    fun `countDistinct on GroupBy with columns selector (not distinct only by selected column)`() {
+        val df = dataFrameOf(
+            "name" to columnOf("Alice", "Bob", "Charlie"),
+            "age" to columnOf(15, 15, 20),
+            "group" to columnOf(1, 1, 2),
+        )
+        val result = df.groupBy("group").countDistinct { "age"<Int>() }
+        val expected = dataFrameOf(
+            "group" to columnOf(1, 2),
+            "countDistinct" to columnOf(1, 1),
+        )
+        result shouldBe expected
+    }
+
+    @Test
+    fun `countDistinct on GroupBy with multiple columns selector`() {
+        val df = dataFrameOf(
+            "name" to columnOf("Alice", "Alice", "Bob", "Charlie"),
+            "age" to columnOf(15, 15, 20, 25),
+            "group" to columnOf(1, 1, 1, 2),
+            "city" to columnOf("London", "Moscow", "London", "Paris"),
+        )
+        val result = df.groupBy("group").countDistinct { "name"<String>() and "age"<Int>() }
+        val expected = dataFrameOf(
+            "group" to columnOf(1, 2),
+            "countDistinct" to columnOf(2, 1),
+        )
+        result shouldBe expected
+    }
+
+    @Test
+    fun `countDistinct on grouped DataFrame with columns selector and custom result name`() {
+        val result = df.groupBy("group").countDistinct(resultName = "unique") { "name"<String>() }
+        val expected = dataFrameOf(
+            "group" to columnOf(1, 2),
+            "unique" to columnOf(2, 1),
+        )
+        result shouldBe expected
+    }
+
+    @Test
+    fun `countDistinct on grouped DataFrame with multiple columns selector with nulls`() {
+        val result = df
+            .append(null, null, 1)
+            .groupBy("group")
+            .countDistinct { "name"<String>() and "age"<Int>() }
+        val expected = dataFrameOf(
+            "group" to columnOf(1, 2),
+            "countDistinct" to columnOf(3, 1),
+        )
+        result shouldBe expected
+    }
+}
diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt
@@ -6,7 +6,6 @@ import org.jetbrains.kotlinx.dataframe.api.add
 import org.jetbrains.kotlinx.dataframe.api.after
 import org.jetbrains.kotlinx.dataframe.api.chunked
 import org.jetbrains.kotlinx.dataframe.api.colsOf
-import org.jetbrains.kotlinx.dataframe.api.countDistinct
 import org.jetbrains.kotlinx.dataframe.api.distinct
 import org.jetbrains.kotlinx.dataframe.api.distinctBy
 import org.jetbrains.kotlinx.dataframe.api.drop
@@ -431,30 +430,6 @@ class Access : TestBase() {
         // SampleEnd
     }
 
-    @Test
-    @TransformDataFrameExpressions
-    fun countDistinct() {
-        // SampleStart
-        df.countDistinct()
-        // SampleEnd
-    }
-
-    @Test
-    @TransformDataFrameExpressions
-    fun countDistinctColumns_properties() {
-        // SampleStart
-        df.countDistinct { age and name }
-        // SampleEnd
-    }
-
-    @Test
-    @TransformDataFrameExpressions
-    fun countDistinctColumns_strings() {
-        // SampleStart
-        df.countDistinct("age", "name")
-        // SampleEnd
-    }
-
     @Test
     @TransformDataFrameExpressions
     fun distinctColumns_strings() {