Skip to content

Commit 466ca29

Browse files
authored
Merge pull request #1628 from Kotlin/distinct-docs
KDocs fixes for `distinct` and `distinctBy`
2 parents b4e921b + 2ea7d0d commit 466ca29

2 files changed

Lines changed: 74 additions & 49 deletions

File tree

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt

Lines changed: 70 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,23 @@ package org.jetbrains.kotlinx.dataframe.api
33
import org.jetbrains.kotlinx.dataframe.AnyColumnReference
44
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
55
import org.jetbrains.kotlinx.dataframe.DataFrame
6+
import org.jetbrains.kotlinx.dataframe.DataRow
67
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
78
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
89
import org.jetbrains.kotlinx.dataframe.annotations.Refine
10+
import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.DESCRIPTION
11+
import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.DISTINCT_PARAM
12+
import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.DISTINCT_RETURN
13+
import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.PHRASE_ENDING
14+
import org.jetbrains.kotlinx.dataframe.api.DistinctDocs.SEE_ALSO
915
import org.jetbrains.kotlinx.dataframe.api.Select.SelectSelectingOptions
1016
import org.jetbrains.kotlinx.dataframe.columns.ColumnSet
11-
import org.jetbrains.kotlinx.dataframe.columns.SingleColumn
1217
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
1318
import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
1419
import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate
1520
import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
1621
import org.jetbrains.kotlinx.dataframe.documentation.Indent
22+
import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns
1723
import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException
1824
import org.jetbrains.kotlinx.dataframe.impl.columns.DistinctColumnSet
1925
import org.jetbrains.kotlinx.dataframe.indices
@@ -23,45 +29,64 @@ import kotlin.reflect.KProperty
2329
// region DataFrame
2430

2531
/**
26-
* ## The Distinct Operation
32+
* {@get [DESCRIPTION] Removes duplicated rows based on $[PHRASE_ENDING]=all columns.}
2733
*
28-
* It removes duplicated rows based on {@get PHRASE_ENDING}.
34+
* The [rows][DataRow] in the resulting [DataFrame] are in the same order
35+
* as they were in the original [DataFrame].
2936
*
30-
* __NOTE:__ The rows in the resulting [DataFrame] are in the same order as they were in the original [DataFrame].
37+
* See also {@get [SEE_ALSO] [distinctBy] that removes duplicated rows based on the specified columns
38+
* and keeps all the columns in the resulting [DataFrame].}
3139
*
32-
* {@get [DISTINCT_PARAM] @param [columns]
33-
* The names of the columns to consider for evaluating distinct rows.}
40+
* @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention]
3441
*
35-
* @return A new DataFrame containing only distinct rows.
42+
* See [Selecting Columns][SelectSelectingOptions].
3643
*
37-
* @see [Selecting Columns][SelectSelectingOptions].
38-
* @see {@include [DocumentationUrls.Distinct]}
44+
* For more information:
45+
*
46+
* @include [DocumentationUrls.Distinct]
47+
*
48+
* @include [DocumentationUrls.DistinctBy]
49+
*
50+
* @get [DISTINCT_PARAM]
51+
*
52+
* @return {@get [DISTINCT_RETURN] A new [DataFrame] containing only distinct rows.}
3953
*/
4054
@ExcludeFromSources
4155
@Suppress("ClassName")
4256
private interface DistinctDocs {
57+
// Parameter of the function (the `@param` part of the KDoc)
4358
interface DISTINCT_PARAM
59+
60+
// Value returned by the function (the `@return` part of the KDoc)
61+
interface DISTINCT_RETURN
62+
63+
// Description of what the function does
64+
interface DESCRIPTION
65+
66+
// Part of the description that can be customized for a specific function
67+
interface PHRASE_ENDING
68+
69+
// Reference to a related function (see also)
70+
interface SEE_ALSO
4471
}
4572

4673
/**
47-
* {@include [DistinctDocs]}
48-
* {@set PHRASE_ENDING all columns}.
49-
* {@set [DistinctDocs.DISTINCT_PARAM]}
74+
* @include [DistinctDocs]
75+
* @set [DISTINCT_PARAM]
5076
*/
5177
public fun <T> DataFrame<T>.distinct(): DataFrame<T> = distinctBy { all() }
5278

5379
/**
54-
* {@include [DistinctDocs]}
55-
* {@set PHRASE_ENDING the specified columns}.
80+
* @include [DistinctDocs]
81+
* @set [DESCRIPTION] Selects the specified columns and keeps only distinct rows based on these selected columns.
82+
* @set [DISTINCT_PARAM] @param [columns\] The [ColumnsSelector] used to select columns
83+
* that will be included in the resulting [DataFrame] and considered for evaluating distinct rows.
84+
* @set [DISTINCT_RETURN] A new [DataFrame] containing only selected columns and distinct rows.
5685
*/
5786
@Refine
5887
@Interpretable("Distinct0")
5988
public fun <T, C> DataFrame<T>.distinct(columns: ColumnsSelector<T, C>): DataFrame<T> = select(columns).distinct()
6089

61-
/**
62-
* {@include [DistinctDocs]}
63-
* {@set PHRASE_ENDING the specified columns}.
64-
*/
6590
@Deprecated(DEPRECATED_ACCESS_API)
6691
@AccessApiOverload
6792
public fun <T> DataFrame<T>.distinct(vararg columns: KProperty<*>): DataFrame<T> =
@@ -71,47 +96,48 @@ public fun <T> DataFrame<T>.distinct(vararg columns: KProperty<*>): DataFrame<T>
7196
}
7297

7398
/**
74-
* {@include [DistinctDocs]}
75-
* {@set PHRASE_ENDING the specified columns}.
99+
* @include [DistinctDocs]
100+
* @set [DESCRIPTION] Selects the specified columns and keeps only distinct rows based on these selected columns.
101+
* @set [DISTINCT_PARAM] @param [columns\] The names of the columns to select
102+
* and to consider for evaluating distinct rows.
103+
* @set [DISTINCT_RETURN] A new [DataFrame] containing only selected columns and distinct rows.
76104
*/
77105
public fun <T> DataFrame<T>.distinct(vararg columns: String): DataFrame<T> = distinct { columns.toColumnSet() }
78106

79-
/**
80-
* {@include [DistinctDocs]}
81-
* {@set PHRASE_ENDING the specified columns}.
82-
*/
83107
@Deprecated(DEPRECATED_ACCESS_API)
84108
@AccessApiOverload
85109
public fun <T> DataFrame<T>.distinct(vararg columns: AnyColumnReference): DataFrame<T> =
86110
distinct { columns.toColumnSet() }
87111

88-
/**
89-
* {@include [DistinctDocs]}
90-
* {@set PHRASE_ENDING the specified columns}.
91-
*/
92112
@Deprecated(DEPRECATED_ACCESS_API)
93113
@AccessApiOverload
94114
public fun <T> DataFrame<T>.distinctBy(vararg columns: KProperty<*>): DataFrame<T> =
95115
distinctBy { columns.toColumnSet() }
96116

97117
/**
98-
* {@include [DistinctDocs]}
99-
* {@set PHRASE_ENDING the specified columns}.
118+
* @include [DistinctDocs]
119+
* {@set [PHRASE_ENDING] the specified}
120+
* @set [SEE_ALSO] [distinct] that selects the specified columns
121+
* (if the columns are not specified, selects all columns)
122+
* and keeps only distinct rows based on these selected columns.
123+
* @set [DISTINCT_PARAM] @param [columns\]
124+
* The names of the columns to consider for evaluating distinct rows.
100125
*/
101126
public fun <T> DataFrame<T>.distinctBy(vararg columns: String): DataFrame<T> = distinctBy { columns.toColumnSet() }
102127

103-
/**
104-
* {@include [DistinctDocs]}
105-
* {@set PHRASE_ENDING the specified columns}.
106-
*/
107128
@Deprecated(DEPRECATED_ACCESS_API)
108129
@AccessApiOverload
109130
public fun <T> DataFrame<T>.distinctBy(vararg columns: AnyColumnReference): DataFrame<T> =
110131
distinctBy { columns.toColumnSet() }
111132

112133
/**
113-
* {@include [DistinctDocs]}
114-
* {@set PHRASE_ENDING the specified columns}.
134+
* @include [DistinctDocs]
135+
* {@set [PHRASE_ENDING] the specified}
136+
* @set [SEE_ALSO] [distinct] that selects the specified columns
137+
* (if the columns are not specified, selects all columns)
138+
* and keeps only distinct rows based on these selected columns.
139+
* @set [DISTINCT_PARAM] @param [columns\] The [ColumnsSelector] used to select columns
140+
* that will be considered for evaluating distinct rows.
115141
*/
116142
public fun <T, C> DataFrame<T>.distinctBy(columns: ColumnsSelector<T, C>): DataFrame<T> {
117143
val cols = get(columns)
@@ -124,15 +150,13 @@ public fun <T, C> DataFrame<T>.distinctBy(columns: ColumnsSelector<T, C>): DataF
124150
// region ColumnsSelectionDsl
125151

126152
/**
127-
* ##### Distinct {@include [ColumnsSelectionDslLink]}
153+
* Distinct {@include [ColumnsSelectionDslLink]}.
128154
*
129155
* See [Grammar] for all functions in this interface.
130156
*/
131157
public interface DistinctColumnsSelectionDsl {
132158

133159
/**
134-
* ## Distinct Grammar
135-
*
136160
* @include [DslGrammarTemplate]
137161
* {@set [DslGrammarTemplate.DEFINITIONS]
138162
* {@include [DslGrammarTemplate.ColumnSetDef]}
@@ -152,23 +176,21 @@ public interface DistinctColumnsSelectionDsl {
152176
}
153177

154178
/**
155-
* ## Distinct
156179
* Returns a new [ColumnSet] from [this] [ColumnSet] containing only distinct columns (by path).
157180
* This is useful when you've selected the same column multiple times but only want it once.
158181
*
159-
* NOTE: This doesn't solve [DuplicateColumnNamesException] if you've selected two columns with the same name.
182+
* This doesn't solve [DuplicateColumnNamesException] if you've selected two columns with the same name.
160183
* For this, you'll need to [rename][ColumnsSelectionDsl.named] one of the columns.
161184
*
162-
* ### Check out: [Grammar]
163-
*
164-
* #### For Example:
165-
* `df.`[select][DataFrame.select]` { (`[colsOf][SingleColumn.colsOf]`<`[Int][Int]`>() `[and][ColumnsSelectionDsl.and]` age).`[distinct][ColumnSet.distinct]`() }`
185+
* See also [Grammar], [named][ColumnsSelectionDsl.named], [simplify][ColumnsSelectionDsl.simplify].
166186
*
167-
* `df.`[select][DataFrame.select]` { `[colsAtAnyDepth][ColumnsSelectionDsl.colsAtAnyDepth]`().`[nameStartsWith][ColumnsSelectionDsl.nameStartsWith]`("order").`[distinct][ColumnSet.distinct]`() }`
187+
* ### Examples
188+
* ```kotlin
189+
* df.select { (colsOf<Int>() and age).distinct() }
190+
* df.select { colsAtAnyDepth().nameStartsWith("order").distinct() }
191+
* ```
168192
*
169193
* @return A new [ColumnSet] containing only distinct columns (by path).
170-
* @see ColumnsSelectionDsl.named
171-
* @see ColumnsSelectionDsl.simplify
172194
*/
173195
public fun <C> ColumnSet<C>.distinct(): ColumnSet<C> = DistinctColumnSet(this)
174196
}

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,12 @@ internal interface DocumentationUrls {
8484
/** [See `remove` on the documentation website.]({@include [Url]}/remove.html) */
8585
interface Remove
8686

87-
/** <a href="{@include [Url]}/distinct.html">See `distinct` on the documentation website.</a> */
87+
/** [See `distinct` on the documentation website.]({@include [Url]}/distinct.html) */
8888
interface Distinct
8989

90+
/** [See `distinctBy` on the documentation website.]({@include [Url]}/distinct.html#distinctby) */
91+
interface DistinctBy
92+
9093
/** <a href="{@include [Url]}/flatten.html">See `flatten` on the documentation website.</a> */
9194
interface Flatten
9295

0 commit comments

Comments
 (0)