Skip to content

Commit 37d6925

Browse files
committed
removed big number error from quantile functions. Expanded docs to clarify non-primitive numbers follow comparables
1 parent 012a66b commit 37d6925

7 files changed

Lines changed: 10 additions & 33 deletions

File tree

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,11 @@ import kotlin.experimental.ExperimentalTypeInference
3030
import kotlin.reflect.KProperty
3131

3232
/* TODO KDocs
33-
* numbers -> Double or null
33+
* primitive numbers -> Double or null
3434
* comparable -> itself or null
3535
*
36+
* Careful! non-primitive numbers will thus follow comparable rules
37+
*
3638
* TODO cases where the lambda dictates the return type require explicit type arguments for
3739
* non-number, comparable overloads: https://youtrack.jetbrains.com/issue/KT-76683
3840
* so, `df.median { intCol }` works, but needs `df.median<_, String> { stringCol }` or `df.median({ dateCol })`

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/percentile.kt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,11 @@ import kotlin.experimental.ExperimentalTypeInference
3030
import kotlin.reflect.KProperty
3131

3232
/* TODO KDocs
33-
* numbers -> Double or null
33+
* primitive numbers -> Double or null
3434
* comparable -> itself or null
3535
*
36+
* Careful! non-primitive numbers will thus follow comparable rules
37+
*
3638
* TODO cases where the lambda dictates the return type require explicit type arguments for
3739
* non-number, comparable overloads: https://youtrack.jetbrains.com/issue/KT-76683
3840
* so, `df.percentile { intCol }` works, but needs `df.percentile<_, String> { stringCol }` or `df.percentile({ dateCol })`

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@ import org.jetbrains.kotlinx.dataframe.impl.isIntraComparable
77
import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveNumber
88
import org.jetbrains.kotlinx.dataframe.impl.nothingType
99
import org.jetbrains.kotlinx.dataframe.impl.renderType
10-
import java.math.BigDecimal
11-
import java.math.BigInteger
1210
import kotlin.math.round
1311
import kotlin.reflect.KType
1412
import kotlin.reflect.full.withNullability
@@ -40,11 +38,6 @@ internal fun <T : Comparable<T>> Sequence<T>.medianOrNull(type: KType, skipNaN:
4038
}. Only primitive numbers or self-comparables are supported.",
4139
)
4240

43-
type == typeOf<BigDecimal>() || type == typeOf<BigInteger>() ->
44-
throw IllegalArgumentException(
45-
"Cannot calculate the median for big numbers in DataFrame. Only primitive numbers are supported.",
46-
)
47-
4841
// TODO kdocs: note about loss of precision for Long
4942
}
5043

@@ -107,11 +100,6 @@ internal fun <T : Comparable<T & Any>?> Sequence<T>.indexOfMedian(type: KType, s
107100
renderType(type)
108101
}. Only primitive numbers or self-comparables are supported.",
109102
)
110-
111-
nonNullType == typeOf<BigDecimal>() || nonNullType == typeOf<BigInteger>() ->
112-
throw IllegalArgumentException(
113-
"Cannot calculate the median for big numbers in DataFrame. Only primitive numbers are supported.",
114-
)
115103
}
116104

117105
// propagate NaN to return if they are not to be skipped

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/percentile.kt

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@ import org.jetbrains.kotlinx.dataframe.impl.isIntraComparable
66
import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveNumber
77
import org.jetbrains.kotlinx.dataframe.impl.nothingType
88
import org.jetbrains.kotlinx.dataframe.impl.renderType
9-
import java.math.BigDecimal
10-
import java.math.BigInteger
119
import kotlin.math.round
1210
import kotlin.reflect.KType
1311
import kotlin.reflect.full.withNullability
@@ -33,11 +31,6 @@ internal fun <T : Comparable<T>> Sequence<T>.percentileOrNull(percentile: Double
3331
}. Only primitive numbers or self-comparables are supported.",
3432
)
3533

36-
type == typeOf<BigDecimal>() || type == typeOf<BigInteger>() ->
37-
throw IllegalArgumentException(
38-
"Cannot calculate the percentile for big numbers in DataFrame. Only primitive numbers are supported.",
39-
)
40-
4134
// TODO kdocs: note about loss of precision for Long
4235
}
4336

@@ -99,11 +92,6 @@ internal fun <T : Comparable<T & Any>?> Sequence<T>.indexOfPercentile(
9992
renderType(type)
10093
}. Only primitive numbers or self-comparables are supported.",
10194
)
102-
103-
nonNullType == typeOf<BigDecimal>() || nonNullType == typeOf<BigInteger>() ->
104-
throw IllegalArgumentException(
105-
"Cannot calculate the percentile for big numbers in DataFrame. Only primitive numbers are supported.",
106-
)
10795
}
10896

10997
val indexedSequence = this.mapIndexedNotNull { i, it ->

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/quantile.kt

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@ import org.jetbrains.kotlinx.dataframe.impl.isIntraComparable
66
import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveNumber
77
import org.jetbrains.kotlinx.dataframe.impl.nothingType
88
import org.jetbrains.kotlinx.dataframe.impl.renderType
9-
import java.math.BigDecimal
10-
import java.math.BigInteger
119
import kotlin.math.ceil
1210
import kotlin.math.floor
1311
import kotlin.math.round
@@ -52,11 +50,6 @@ internal fun <T : Comparable<T>> Sequence<Any>.quantileOrNull(
5250
renderType(type)
5351
}. Only primitive numbers or self-comparables are supported.",
5452
)
55-
56-
type == typeOf<BigDecimal>() || type == typeOf<BigInteger>() ->
57-
throw IllegalArgumentException(
58-
"Cannot calculate the $name for big numbers in DataFrame. Only primitive numbers are supported.",
59-
)
6053
}
6154

6255
// propagate NaN to return if they are not to be skipped

docs/StardustDocs/topics/median.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ The operation is also available for self-comparable columns
2020
(so columns of type `T : Comparable<T>`, like `DateTime`, `String`, etc.)
2121
In this case, the return type remains `T?`.
2222
When the number of values is even, the median is the low of the two middle values.
23+
NOTE: This logic also applies to other self-comparable `Number` types, like `BigDecimal`.
24+
They will not be interpolated.
2325

2426
All operations on `Double`/`Float` have the `skipNaN` option, which is
2527
set to `false` by default. This means that if a `NaN` is present in the input, it will be propagated to the result.

docs/StardustDocs/topics/percentile.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ The operation is also available for self-comparable columns
2525
In this case, the return type remains `T?`.
2626
The index of the result of the operation on these types is rounded using
2727
[Quantile Estimation Method](#quantile-estimation-methods) R3.
28+
NOTE: This logic also applies to other self-comparable `Number` types, like `BigDecimal`.
29+
They will not be interpolated.
2830

2931
All operations on `Double`/`Float` have the `skipNaN` option, which is
3032
set to `false` by default. This means that if a `NaN` is present in the input, it will be propagated to the result.

0 commit comments

Comments
 (0)