@@ -4,14 +4,61 @@ import org.jetbrains.kotlinx.dataframe.ColumnSelector
44import org.jetbrains.kotlinx.dataframe.DataFrame
55import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
66import org.jetbrains.kotlinx.dataframe.annotations.Refine
7+ import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
78import org.jetbrains.kotlinx.dataframe.impl.api.requireImpl
89import kotlin.reflect.typeOf
910
1011/* *
1112 * Resolves [column] in this [DataFrame] and checks that its runtime type is a subtype of [C].
1213 * Throws if the column can't be resolved or if its type doesn't match.
14+ *
15+ * From the compiler plugin perspective, a new column will appear in the compile-time schema as a result of this operation.
16+ *
17+ * The aim here is to help incrementally migrate workflows to extension properties API.
18+ *
19+ * We recommend considering declaring a [DataSchema] and use [cast] or [convertTo] if you end up with more than a few `requireColumn` calls.
20+ *
21+ * Example:
22+ *
23+ * ```kotlin
24+ * val repos = DataFrame
25+ * .readCsv("https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv")
26+ *
27+ * repos
28+ * .filter { "stargazers_count"<Int>() > 100 }
29+ * .sortByDesc("stargazers_count")
30+ * .select("full_name", "stargazers_count")
31+ * ```
32+ *
33+ * Notice how `stargazers_count` String is repeated three times. We can refactor this code using `requireColumn`:
34+ *
35+ * ```
36+ * val repos = DataFrame
37+ * .readCsv("https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv")
38+ * .requireColumn { "stargazers_count"<Int>() }
39+ *
40+ * repos
41+ * .filter { stargazers_count > 100 }
42+ * .sortByDesc { stargazers_count }
43+ * .select { "full_name" and stargazers_count }
44+ * ```
45+ *
46+ * This way code becomes a bit more robust. For example, usages of a renamed column will become compile time errors that are easy to spot and update:
47+ * ```kotlin
48+ * val repos = DataFrame
49+ * .readCsv("https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv")
50+ * .requireColumn { "stargazers_count"<Int>() }
51+ * .rename { stargazers_count }.into("stars")
52+ *
53+ * repos
54+ * .filter { stars > 100 }
55+ * .sortByDesc { stars }
56+ * .select { "full_name" and stars }
57+ * ```
58+ *
1359 */
1460@Refine
1561@Interpretable(" Require0" )
1662public inline fun <T , reified C > DataFrame<T>.requireColumn (noinline column : ColumnSelector <T , C >): DataFrame <T > =
1763 requireImpl(column, typeOf<C >())
64+
0 commit comments