@@ -3,17 +3,23 @@ package org.jetbrains.kotlinx.dataframe.api
33import org.jetbrains.kotlinx.dataframe.AnyColumnReference
44import org.jetbrains.kotlinx.dataframe.ColumnsSelector
55import org.jetbrains.kotlinx.dataframe.DataFrame
6+ import org.jetbrains.kotlinx.dataframe.DataRow
67import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
78import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
89import org.jetbrains.kotlinx.dataframe.annotations.Refine
10+ import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.DESCRIPTION
11+ import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.DISTINCT_PARAM
12+ import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.DISTINCT_RETURN
13+ import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.PHRASE_ENDING
14+ import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.SEE_ALSO
915import org.jetbrains.kotlinx.dataframe.api.Select.SelectSelectingOptions
1016import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
11- import org.jetbrains.kotlinx.dataframe.columns.SingleColumn
1217import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
1318import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
1419import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate
1520import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
1621import org.jetbrains.kotlinx.dataframe.documentation.Indent
22+ import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
1723import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException
1824import org.jetbrains.kotlinx.dataframe.impl.columns.DistinctColumnSet
1925import org.jetbrains.kotlinx.dataframe.indices
@@ -23,45 +29,64 @@ import kotlin.reflect.KProperty
2329// region DataFrame
2430
2531/* *
26- * ## The Distinct Operation
32+ * {@get [DESCRIPTION] Removes duplicated rows based on $[PHRASE_ENDING]=all columns.}
2733 *
28- * It removes duplicated rows based on {@get PHRASE_ENDING}.
34+ * The [rows][DataRow] in the resulting [DataFrame] are in the same order
35+ * as they were in the original [DataFrame].
2936 *
30- * __NOTE:__ The rows in the resulting [DataFrame] are in the same order as they were in the original [DataFrame].
37+ * See also {@get [SEE_ALSO] [distinctBy] that removes duplicated rows based on the specified columns
38+ * and keeps all the columns in the resulting [DataFrame].}
3139 *
32- * {@get [DISTINCT_PARAM] @param [columns]
33- * The names of the columns to consider for evaluating distinct rows.}
40+ * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
3441 *
35- * @return A new DataFrame containing only distinct rows .
42+ * See [Selecting Columns][SelectSelectingOptions] .
3643 *
37- * @see [Selecting Columns][SelectSelectingOptions].
38- * @see {@include [DocumentationUrls.Distinct]}
44+ * For more information:
45+ *
46+ * @include [DocumentationUrls.Distinct]
47+ *
48+ * @include [DocumentationUrls.DistinctBy]
49+ *
50+ * @get [DISTINCT_PARAM]
51+ *
52+ * @return {@get [DISTINCT_RETURN] A new [DataFrame] containing only distinct rows.}
3953 */
4054@ExcludeFromSources
4155@Suppress(" ClassName" )
4256private interface DistinctDocs {
57+ // Parameter of the function (the `@param` part of the KDoc)
4358 interface DISTINCT_PARAM
59+
60+ // Value returned by the function (the `@return` part of the KDoc)
61+ interface DISTINCT_RETURN
62+
63+ // Description of what the function does
64+ interface DESCRIPTION
65+
66+ // Part of the description that can be customized for a specific function
67+ interface PHRASE_ENDING
68+
69+ // Reference to a related function (see also)
70+ interface SEE_ALSO
4471}
4572
4673/* *
47- * {@include [DistinctDocs]}
48- * {@set PHRASE_ENDING all columns}.
49- * {@set [DistinctDocs.DISTINCT_PARAM]}
74+ * @include [DistinctDocs]
75+ * @set [DISTINCT_PARAM]
5076 */
5177public fun <T > DataFrame<T>.distinct (): DataFrame <T > = distinctBy { all() }
5278
5379/* *
54- * {@include [DistinctDocs]}
55- * {@set PHRASE_ENDING the specified columns}.
80+ * @include [DistinctDocs]
81+ * @set [DESCRIPTION] Selects the specified columns and keeps only distinct rows based on these selected columns.
82+ * @set [DISTINCT_PARAM] @param [columns\] The [ColumnsSelector] used to select columns
83+ * that will be included in the resulting [DataFrame] and considered for evaluating distinct rows.
84+ * @set [DISTINCT_RETURN] A new [DataFrame] containing only selected columns and distinct rows.
5685 */
5786@Refine
5887@Interpretable(" Distinct0" )
5988public fun <T , C > DataFrame<T>.distinct (columns : ColumnsSelector <T , C >): DataFrame <T > = select(columns).distinct()
6089
61- /* *
62- * {@include [DistinctDocs]}
63- * {@set PHRASE_ENDING the specified columns}.
64- */
6590@Deprecated(DEPRECATED_ACCESS_API )
6691@AccessApiOverload
6792public fun <T > DataFrame<T>.distinct (vararg columns : KProperty <* >): DataFrame <T > =
@@ -71,47 +96,48 @@ public fun <T> DataFrame<T>.distinct(vararg columns: KProperty<*>): DataFrame<T>
7196 }
7297
7398/* *
74- * {@include [DistinctDocs]}
75- * {@set PHRASE_ENDING the specified columns}.
99+ * @include [DistinctDocs]
100+ * @set [DESCRIPTION] Selects the specified columns and keeps only distinct rows based on these selected columns.
101+ * @set [DISTINCT_PARAM] @param [columns\] The names of the columns to select
102+ * and to consider for evaluating distinct rows.
103+ * @set [DISTINCT_RETURN] A new [DataFrame] containing only selected columns and distinct rows.
76104 */
77105public fun <T > DataFrame<T>.distinct (vararg columns : String ): DataFrame <T > = distinct { columns.toColumnSet() }
78106
79- /* *
80- * {@include [DistinctDocs]}
81- * {@set PHRASE_ENDING the specified columns}.
82- */
83107@Deprecated(DEPRECATED_ACCESS_API )
84108@AccessApiOverload
85109public fun <T > DataFrame<T>.distinct (vararg columns : AnyColumnReference ): DataFrame <T > =
86110 distinct { columns.toColumnSet() }
87111
88- /* *
89- * {@include [DistinctDocs]}
90- * {@set PHRASE_ENDING the specified columns}.
91- */
92112@Deprecated(DEPRECATED_ACCESS_API )
93113@AccessApiOverload
94114public fun <T > DataFrame<T>.distinctBy (vararg columns : KProperty <* >): DataFrame <T > =
95115 distinctBy { columns.toColumnSet() }
96116
97117/* *
98- * {@include [DistinctDocs]}
99- * {@set PHRASE_ENDING the specified columns}.
118+ * @include [DistinctDocs]
119+ * {@set [PHRASE_ENDING] the specified}
120+ * @set [SEE_ALSO] [distinct] that selects the specified columns
121+ * (if the columns are not specified, selects all columns)
122+ * and keeps only distinct rows based on these selected columns.
123+ * @set [DISTINCT_PARAM] @param [columns\]
124+ * The names of the columns to consider for evaluating distinct rows.
100125 */
101126public fun <T > DataFrame<T>.distinctBy (vararg columns : String ): DataFrame <T > = distinctBy { columns.toColumnSet() }
102127
103- /* *
104- * {@include [DistinctDocs]}
105- * {@set PHRASE_ENDING the specified columns}.
106- */
107128@Deprecated(DEPRECATED_ACCESS_API )
108129@AccessApiOverload
109130public fun <T > DataFrame<T>.distinctBy (vararg columns : AnyColumnReference ): DataFrame <T > =
110131 distinctBy { columns.toColumnSet() }
111132
112133/* *
113- * {@include [DistinctDocs]}
114- * {@set PHRASE_ENDING the specified columns}.
134+ * @include [DistinctDocs]
135+ * {@set [PHRASE_ENDING] the specified}
136+ * @set [SEE_ALSO] [distinct] that selects the specified columns
137+ * (if the columns are not specified, selects all columns)
138+ * and keeps only distinct rows based on these selected columns.
139+ * @set [DISTINCT_PARAM] @param [columns\] The [ColumnsSelector] used to select columns
140+ * that will be considered for evaluating distinct rows.
115141 */
116142public fun <T , C > DataFrame<T>.distinctBy (columns : ColumnsSelector <T , C >): DataFrame <T > {
117143 val cols = get(columns)
@@ -124,15 +150,13 @@ public fun <T, C> DataFrame<T>.distinctBy(columns: ColumnsSelector<T, C>): DataF
124150// region ColumnsSelectionDsl
125151
126152/* *
127- * ##### Distinct {@include [ColumnsSelectionDslLink]}
153+ * Distinct {@include [ColumnsSelectionDslLink]}.
128154 *
129155 * See [Grammar] for all functions in this interface.
130156 */
131157public interface DistinctColumnsSelectionDsl {
132158
133159 /* *
134- * ## Distinct Grammar
135- *
136160 * @include [DslGrammarTemplate]
137161 * {@set [DslGrammarTemplate.DEFINITIONS]
138162 * {@include [DslGrammarTemplate.ColumnSetDef]}
@@ -152,23 +176,21 @@ public interface DistinctColumnsSelectionDsl {
152176 }
153177
154178 /* *
155- * ## Distinct
156179 * Returns a new [ColumnSet] from [this] [ColumnSet] containing only distinct columns (by path).
157180 * This is useful when you've selected the same column multiple times but only want it once.
158181 *
159- * NOTE: This doesn't solve [DuplicateColumnNamesException] if you've selected two columns with the same name.
182+ * This doesn't solve [DuplicateColumnNamesException] if you've selected two columns with the same name.
160183 * For this, you'll need to [rename][ColumnsSelectionDsl.named] one of the columns.
161184 *
162- * ### Check out: [Grammar]
163- *
164- * #### For Example:
165- * `df.`[select][DataFrame.select]` { (`[colsOf][SingleColumn.colsOf]`<`[Int][Int]`>() `[and][ColumnsSelectionDsl.and]` age).`[distinct][ColumnSet.distinct]`() }`
185+ * See also [Grammar], [named][ColumnsSelectionDsl.named], [simplify][ColumnsSelectionDsl.simplify].
166186 *
167- * `df.`[select][DataFrame.select]` { `[colsAtAnyDepth][ColumnsSelectionDsl.colsAtAnyDepth]`().`[nameStartsWith][ColumnsSelectionDsl.nameStartsWith]`("order").`[distinct][ColumnSet.distinct]`() }`
187+ * ### Examples
188+ * ```kotlin
189+ * df.select { (colsOf<Int>() and age).distinct() }
190+ * df.select { colsAtAnyDepth().nameStartsWith("order").distinct() }
191+ * ```
168192 *
169193 * @return A new [ColumnSet] containing only distinct columns (by path).
170- * @see ColumnsSelectionDsl.named
171- * @see ColumnsSelectionDsl.simplify
172194 */
173195 public fun <C > ColumnSet<C>.distinct (): ColumnSet <C > = DistinctColumnSet (this )
174196}
0 commit comments