Skip to content

Commit 00557d4

Browse files
committed
Introducing KdeKernel backed string enum — kde() and kdeRandom()
1 parent 7bfb3ce commit 00557d4

5 files changed

Lines changed: 97 additions & 111 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
## 1.2.3 - WIP
44
- Adding `kde()` method for Kernel Density Estimation — returns a closure that estimates PDF or CDF from sample data, supporting 9 kernel functions with aliases
55
- Adding `kdeRandom()` method for random sampling from a Kernel Density Estimate — returns a closure that generates random floats from the KDE distribution
6+
- Introducing `KdeKernel` backed string enum — `kde()` and `kdeRandom()`. It accepts `KdeKernel` enum cases
67

78
## 1.2.2 - 2026-02-21
89
- Adding `method` parameter to `quantiles()` supporting `'exclusive'` (default) and `'inclusive'` interpolation methods

README.md

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -402,13 +402,16 @@ list($slope, $intercept) = Stat::linearRegression(
402402
// $intercept = 0.0
403403
```
404404

405-
#### Stat::kde ( array $data , float $h , string $kernel = 'normal' , bool $cumulative = false )
405+
#### Stat::kde ( array $data , float $h , KdeKernel $kernel = KdeKernel::Normal , bool $cumulative = false )
406406
Create a continuous probability density function (or cumulative distribution function) from discrete sample data using Kernel Density Estimation.
407407
Returns a `Closure` that can be called with any point to estimate the density (or CDF value).
408408

409-
Supported kernels: `normal` (alias `gauss`), `logistic`, `sigmoid`, `rectangular` (alias `uniform`), `triangular`, `parabolic` (alias `epanechnikov`), `quartic` (alias `biweight`), `triweight`, `cosine`.
409+
Supported kernels: `KdeKernel::Normal` (alias `KdeKernel::Gauss`), `KdeKernel::Logistic`, `KdeKernel::Sigmoid`, `KdeKernel::Rectangular` (alias `KdeKernel::Uniform`), `KdeKernel::Triangular`, `KdeKernel::Parabolic` (alias `KdeKernel::Epanechnikov`), `KdeKernel::Quartic` (alias `KdeKernel::Biweight`), `KdeKernel::Triweight`, `KdeKernel::Cosine`.
410410

411411
```php
412+
use HiFolks\Statistics\Stat;
413+
use HiFolks\Statistics\Enums\KdeKernel;
414+
412415
$data = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2];
413416
$f = Stat::kde($data, h: 1.5);
414417
$f(2.5);
@@ -418,7 +421,7 @@ $f(2.5);
418421
Using a different kernel:
419422

420423
```php
421-
$f = Stat::kde($data, h: 1.5, kernel: 'triangular');
424+
$f = Stat::kde($data, h: 1.5, kernel: KdeKernel::Triangular);
422425
$f(2.5);
423426
```
424427

@@ -430,13 +433,16 @@ $F(2.5);
430433
// estimated CDF at x = 2.5 (probability that a value is <= 2.5)
431434
```
432435

433-
#### Stat::kdeRandom ( array $data , float $h , string $kernel = 'normal' , ?int $seed = null )
436+
#### Stat::kdeRandom ( array $data , float $h , KdeKernel $kernel = KdeKernel::Normal , ?int $seed = null )
434437
Generate random samples from a Kernel Density Estimate.
435438
Returns a `Closure` that, when called, produces a random float drawn from the KDE distribution defined by the data and bandwidth.
436439

437-
Supported kernels: `normal` (alias `gauss`), `logistic`, `sigmoid`, `rectangular` (alias `uniform`), `triangular`, `parabolic` (alias `epanechnikov`), `quartic` (alias `biweight`), `triweight`, `cosine`.
440+
Supported kernels: `KdeKernel::Normal` (alias `KdeKernel::Gauss`), `KdeKernel::Logistic`, `KdeKernel::Sigmoid`, `KdeKernel::Rectangular` (alias `KdeKernel::Uniform`), `KdeKernel::Triangular`, `KdeKernel::Parabolic` (alias `KdeKernel::Epanechnikov`), `KdeKernel::Quartic` (alias `KdeKernel::Biweight`), `KdeKernel::Triweight`, `KdeKernel::Cosine`.
438441

439442
```php
443+
use HiFolks\Statistics\Stat;
444+
use HiFolks\Statistics\Enums\KdeKernel;
445+
440446
$data = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2];
441447
$rand = Stat::kdeRandom($data, h: 1.5, seed: 8675309);
442448
$samples = [];
@@ -449,7 +455,7 @@ for ($i = 0; $i < 10; $i++) {
449455
Using a different kernel:
450456

451457
```php
452-
$rand = Stat::kdeRandom($data, h: 1.5, kernel: 'triangular', seed: 42);
458+
$rand = Stat::kdeRandom($data, h: 1.5, kernel: KdeKernel::Triangular, seed: 42);
453459
$rand();
454460
```
455461

src/Enums/KdeKernel.php

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<?php
2+
3+
namespace HiFolks\Statistics\Enums;
4+
5+
enum KdeKernel: string
6+
{
7+
case Normal = 'normal';
8+
case Gauss = 'gauss';
9+
case Logistic = 'logistic';
10+
case Sigmoid = 'sigmoid';
11+
case Rectangular = 'rectangular';
12+
case Uniform = 'uniform';
13+
case Triangular = 'triangular';
14+
case Parabolic = 'parabolic';
15+
case Epanechnikov = 'epanechnikov';
16+
case Quartic = 'quartic';
17+
case Biweight = 'biweight';
18+
case Triweight = 'triweight';
19+
case Cosine = 'cosine';
20+
21+
public function resolve(): self
22+
{
23+
return match ($this) {
24+
self::Gauss => self::Normal,
25+
self::Uniform => self::Rectangular,
26+
self::Epanechnikov => self::Parabolic,
27+
self::Biweight => self::Quartic,
28+
default => $this,
29+
};
30+
}
31+
}

src/Stat.php

Lines changed: 29 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
namespace HiFolks\Statistics;
44

5+
use HiFolks\Statistics\Enums\KdeKernel;
56
use HiFolks\Statistics\Exception\InvalidDataInputException;
67

78
class Stat
@@ -723,16 +724,16 @@ private static function ranks(array $data): array
723724
*
724725
* @param array<int|float> $data sample data
725726
* @param float $h bandwidth (smoothing parameter), must be > 0
726-
* @param string $kernel kernel name (normal, logistic, sigmoid, rectangular, triangular, parabolic, quartic, triweight, cosine) or alias
727+
* @param KdeKernel $kernel kernel to use for estimation
727728
* @param bool $cumulative if true, return CDF estimator; otherwise PDF estimator
728729
* @return \Closure a callable that takes a float and returns the estimated density or CDF value
729730
*
730-
* @throws InvalidDataInputException if data is empty, bandwidth <= 0, or kernel is invalid
731+
* @throws InvalidDataInputException if data is empty or bandwidth <= 0
731732
*/
732733
public static function kde(
733734
array $data,
734735
float $h,
735-
string $kernel = 'normal',
736+
KdeKernel $kernel = KdeKernel::Normal,
736737
bool $cumulative = false,
737738
): \Closure {
738739
if ($data === []) {
@@ -742,16 +743,7 @@ public static function kde(
742743
throw new InvalidDataInputException("Bandwidth h must be positive.");
743744
}
744745

745-
$aliases = [
746-
'gauss' => 'normal',
747-
'uniform' => 'rectangular',
748-
'epanechnikov' => 'parabolic',
749-
'biweight' => 'quartic',
750-
];
751-
$kernel = strtolower($kernel);
752-
if (isset($aliases[$kernel])) {
753-
$kernel = $aliases[$kernel];
754-
}
746+
$kernel = $kernel->resolve();
755747

756748
$sqrt2pi = sqrt(2.0 * M_PI);
757749

@@ -773,63 +765,56 @@ public static function kde(
773765
};
774766

775767
$kernels = [
776-
'normal' => [
768+
KdeKernel::Normal->value => [
777769
'pdf' => static fn(float $t): float => exp(-$t * $t / 2.0) / $sqrt2pi,
778770
'cdf' => $normalCdf,
779771
'support' => null,
780772
],
781-
'logistic' => [
773+
KdeKernel::Logistic->value => [
782774
'pdf' => static fn(float $t): float => 0.5 / (1.0 + cosh($t)),
783775
'cdf' => static fn(float $t): float => 1.0 / (1.0 + exp(-$t)),
784776
'support' => null,
785777
],
786-
'sigmoid' => [
778+
KdeKernel::Sigmoid->value => [
787779
'pdf' => static fn(float $t): float => (1.0 / M_PI) / cosh($t),
788780
'cdf' => static fn(float $t): float => (2.0 / M_PI) * atan(exp($t)),
789781
'support' => null,
790782
],
791-
'rectangular' => [
783+
KdeKernel::Rectangular->value => [
792784
'pdf' => static fn(float $t): float => 0.5,
793785
'cdf' => static fn(float $t): float => 0.5 * $t + 0.5,
794786
'support' => 1.0,
795787
],
796-
'triangular' => [
788+
KdeKernel::Triangular->value => [
797789
'pdf' => static fn(float $t): float => 1.0 - abs($t),
798790
'cdf' => static fn(float $t): float => $t >= 0
799791
? 1.0 - (1.0 - $t) * (1.0 - $t) / 2.0
800792
: (1.0 + $t) * (1.0 + $t) / 2.0,
801793
'support' => 1.0,
802794
],
803-
'parabolic' => [
795+
KdeKernel::Parabolic->value => [
804796
'pdf' => static fn(float $t): float => 0.75 * (1.0 - $t * $t),
805797
'cdf' => static fn(float $t): float => -0.25 * $t * $t * $t + 0.75 * $t + 0.5,
806798
'support' => 1.0,
807799
],
808-
'quartic' => [
800+
KdeKernel::Quartic->value => [
809801
'pdf' => static fn(float $t): float => (15.0 / 16.0) * (1.0 - $t * $t) ** 2,
810802
'cdf' => static fn(float $t): float => (15.0 * $t - 10.0 * $t ** 3 + 3.0 * $t ** 5) / 16.0 + 0.5,
811803
'support' => 1.0,
812804
],
813-
'triweight' => [
805+
KdeKernel::Triweight->value => [
814806
'pdf' => static fn(float $t): float => (35.0 / 32.0) * (1.0 - $t * $t) ** 3,
815807
'cdf' => static fn(float $t): float => (35.0 * $t - 35.0 * $t ** 3 + 21.0 * $t ** 5 - 5.0 * $t ** 7) / 32.0 + 0.5,
816808
'support' => 1.0,
817809
],
818-
'cosine' => [
810+
KdeKernel::Cosine->value => [
819811
'pdf' => static fn(float $t): float => (M_PI / 4.0) * cos(M_PI * $t / 2.0),
820812
'cdf' => static fn(float $t): float => 0.5 * sin(M_PI * $t / 2.0) + 0.5,
821813
'support' => 1.0,
822814
],
823815
];
824816

825-
if (! isset($kernels[$kernel])) {
826-
$valid = implode(', ', array_merge(array_keys($kernels), array_keys($aliases)));
827-
throw new InvalidDataInputException(
828-
"Unknown kernel '{$kernel}'. Valid kernels: {$valid}.",
829-
);
830-
}
831-
832-
$kernelDef = $kernels[$kernel];
817+
$kernelDef = $kernels[$kernel->value]; // @phpstan-ignore offsetAccess.notFound
833818
$support = $kernelDef['support'];
834819
$fn = $cumulative ? $kernelDef['cdf'] : $kernelDef['pdf'];
835820

@@ -888,16 +873,16 @@ public static function kde(
888873
*
889874
* @param array<int|float> $data sample data
890875
* @param float $h bandwidth (smoothing parameter), must be > 0
891-
* @param string $kernel kernel name or alias
876+
* @param KdeKernel $kernel kernel to use for estimation
892877
* @param int|null $seed optional seed for reproducibility
893878
* @return \Closure a callable that returns a random float from the KDE
894879
*
895-
* @throws InvalidDataInputException if data is empty, bandwidth <= 0, or kernel is invalid
880+
* @throws InvalidDataInputException if data is empty or bandwidth <= 0
896881
*/
897882
public static function kdeRandom(
898883
array $data,
899884
float $h,
900-
string $kernel = 'normal',
885+
KdeKernel $kernel = KdeKernel::Normal,
901886
?int $seed = null,
902887
): \Closure {
903888
if ($data === []) {
@@ -907,16 +892,7 @@ public static function kdeRandom(
907892
throw new InvalidDataInputException("Bandwidth h must be positive.");
908893
}
909894

910-
$aliases = [
911-
'gauss' => 'normal',
912-
'uniform' => 'rectangular',
913-
'epanechnikov' => 'parabolic',
914-
'biweight' => 'quartic',
915-
];
916-
$kernel = strtolower($kernel);
917-
if (isset($aliases[$kernel])) {
918-
$kernel = $aliases[$kernel];
919-
}
895+
$kernel = $kernel->resolve();
920896

921897
// Acklam rational approximation for standard normal inverse CDF
922898
$normalInvCdf = static function (float $p): float {
@@ -995,15 +971,15 @@ public static function kdeRandom(
995971
=> ($t < -1.0 || $t > 1.0) ? 0.0 : (35.0 / 32.0) * (1.0 - $t * $t) ** 3;
996972

997973
$invcdfMap = [
998-
'normal' => $normalInvCdf,
999-
'logistic' => static fn(float $p): float => log($p / (1.0 - $p)),
1000-
'sigmoid' => static fn(float $p): float => log(tan($p * M_PI / 2.0)),
1001-
'rectangular' => static fn(float $p): float => 2.0 * $p - 1.0,
1002-
'triangular' => static fn(float $p): float
974+
KdeKernel::Normal->value => $normalInvCdf,
975+
KdeKernel::Logistic->value => static fn(float $p): float => log($p / (1.0 - $p)),
976+
KdeKernel::Sigmoid->value => static fn(float $p): float => log(tan($p * M_PI / 2.0)),
977+
KdeKernel::Rectangular->value => static fn(float $p): float => 2.0 * $p - 1.0,
978+
KdeKernel::Triangular->value => static fn(float $p): float
1003979
=> $p < 0.5 ? sqrt(2.0 * $p) - 1.0 : 1.0 - sqrt(2.0 - 2.0 * $p),
1004-
'parabolic' => static fn(float $p): float
980+
KdeKernel::Parabolic->value => static fn(float $p): float
1005981
=> 2.0 * cos((acos(2.0 * $p - 1.0) + M_PI) / 3.0),
1006-
'quartic' => static function (float $p) use ($newtonRaphson, $quarticCdf, $quarticPdf): float {
982+
KdeKernel::Quartic->value => static function (float $p) use ($newtonRaphson, $quarticCdf, $quarticPdf): float {
1007983
if ($p <= 0.5) {
1008984
$sign = 1.0;
1009985
} else {
@@ -1021,7 +997,7 @@ public static function kdeRandom(
1021997
$x *= $sign;
1022998
return $newtonRaphson($sign === 1.0 ? $p : 1.0 - $p, $quarticCdf, $quarticPdf, $x);
1023999
},
1024-
'triweight' => static function (float $p) use ($newtonRaphson, $triweightCdf, $triweightPdf): float {
1000+
KdeKernel::Triweight->value => static function (float $p) use ($newtonRaphson, $triweightCdf, $triweightPdf): float {
10251001
if ($p <= 0.5) {
10261002
$sign = 1.0;
10271003
} else {
@@ -1035,17 +1011,10 @@ public static function kdeRandom(
10351011
$x *= $sign;
10361012
return $newtonRaphson($sign === 1.0 ? $p : 1.0 - $p, $triweightCdf, $triweightPdf, $x);
10371013
},
1038-
'cosine' => static fn(float $p): float => (2.0 / M_PI) * asin(2.0 * $p - 1.0),
1014+
KdeKernel::Cosine->value => static fn(float $p): float => (2.0 / M_PI) * asin(2.0 * $p - 1.0),
10391015
];
10401016

1041-
if (! isset($invcdfMap[$kernel])) {
1042-
$valid = implode(', ', array_merge(array_keys($invcdfMap), array_keys($aliases)));
1043-
throw new InvalidDataInputException(
1044-
"Unknown kernel '{$kernel}'. Valid kernels: {$valid}.",
1045-
);
1046-
}
1047-
1048-
$invcdf = $invcdfMap[$kernel];
1017+
$invcdf = $invcdfMap[$kernel->value]; // @phpstan-ignore offsetAccess.notFound
10491018
$n = count($data);
10501019

10511020
if ($seed !== null) {

0 commit comments

Comments
 (0)