Skip to content

Commit 42448e3

Browse files
committed
Adding confidenceInterval() method for computing confidence intervals
1 parent 27c948c commit 42448e3

7 files changed

Lines changed: 204 additions & 1 deletion

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Changelog
22

33
## 1.3.1 - WIP
4+
- Adding `confidenceInterval()` method for computing confidence intervals for the mean using the normal (z) distribution
45
- Adding `rSquared()` method for R² (coefficient of determination) — proportion of variance explained by linear regression
56

67
## 1.3.0 - 2026-02-22

README.md

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ The various mathematical statistics are listed below:
9999
| `covariance()` | the sample covariance of two inputs |
100100
| `linearRegression()` | return the slope and intercept of simple linear regression parameters estimated using ordinary least squares (supports `proportional: true` for regression through the origin) |
101101
| `rSquared()` | coefficient of determination (R²) — proportion of variance explained by linear regression |
102+
| `confidenceInterval()` | confidence interval for the mean using the normal (z) distribution |
102103
| `kde()` | kernel density estimation — returns a closure that estimates the probability density (or CDF) at any point |
103104
| `kdeRandom()` | random sampling from a kernel density estimate — returns a closure that generates random floats from the KDE distribution |
104105

@@ -624,6 +625,54 @@ list($slope, $intercept) = Stat::linearRegression(
624625
// $intercept = 0.0
625626
```
626627

628+
#### Stat::rSquared( array $x, array $y, bool $proportional = false, ?int $round = null )
629+
Return the coefficient of determination (R²) — the proportion of variance in the dependent variable explained by the linear regression model. Values range from 0 (no explanatory power) to 1 (perfect fit).
630+
631+
Requires at least 2 data points and arrays of the same length.
632+
633+
```php
634+
use HiFolks\Statistics\Stat;
635+
$r2 = Stat::rSquared([1, 2, 3, 4, 5], [2, 4, 6, 8, 10]);
636+
// 1.0 (perfect linear relationship)
637+
638+
$r2 = Stat::rSquared(
639+
[1971, 1975, 1979, 1982, 1983],
640+
[1, 2, 3, 4, 5],
641+
round: 2,
642+
);
643+
// 0.96
644+
```
645+
646+
With proportional regression (through the origin):
647+
648+
```php
649+
$r2 = Stat::rSquared(
650+
[1, 2, 3, 4, 5],
651+
[2, 4, 6, 8, 10],
652+
proportional: true,
653+
);
654+
// 1.0
655+
```
656+
657+
#### Stat::confidenceInterval( array $data, float $confidenceLevel = 0.95, ?int $round = null )
658+
Return the confidence interval for the mean using the normal (z) distribution.
659+
660+
Computes: `mean ± z * (stdev / √n)`, where the z-critical value is derived from the inverse normal CDF.
661+
662+
Requires at least 2 data points. The confidence level must be between 0 and 1 exclusive.
663+
664+
```php
665+
use HiFolks\Statistics\Stat;
666+
[$lower, $upper] = Stat::confidenceInterval([2, 4, 4, 4, 5, 5, 7, 9]);
667+
// 95% CI: [3.52, 6.48] (approximately)
668+
669+
[$lower, $upper] = Stat::confidenceInterval([2, 4, 4, 4, 5, 5, 7, 9], confidenceLevel: 0.99);
670+
// 99% CI: wider interval
671+
672+
[$lower, $upper] = Stat::confidenceInterval([2, 4, 4, 4, 5, 5, 7, 9], round: 2);
673+
// [3.52, 6.48]
674+
```
675+
627676
#### Stat::kde ( array $data , float $h , KdeKernel $kernel = KdeKernel::Normal , bool $cumulative = false )
628677
Create a continuous probability density function (or cumulative distribution function) from discrete sample data using Kernel Density Estimation.
629678
Returns a `Closure` that can be called with any point to estimate the density (or CDF value).

TODO.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
- Chi-squared test
1616
- Z-test
1717
- P-value calculation
18-
- Confidence intervals
1918

2019
### Other Distributions (beyond Normal)
2120

src/Stat.php

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
use HiFolks\Statistics\Enums\KdeKernel;
66
use HiFolks\Statistics\Exception\InvalidDataInputException;
7+
use HiFolks\Statistics\NormalDist;
78

89
class Stat
910
{
@@ -1734,4 +1735,49 @@ public static function rSquared(array $x, array $y, bool $proportional = false,
17341735

17351736
return $rSquared;
17361737
}
1738+
1739+
/**
1740+
* Return the confidence interval for the mean using the normal (z) distribution.
1741+
*
1742+
* Computes: mean ± z * (stdev / √n)
1743+
*
1744+
* @param array<int|float> $data
1745+
* @param float $confidenceLevel the confidence level (e.g. 0.95 for 95%)
1746+
* @param int|null $round whether to round the result
1747+
* @return array{0: float, 1: float} [lower bound, upper bound]
1748+
*
1749+
* @throws InvalidDataInputException if data has fewer than 2 elements or confidence level is not in (0, 1)
1750+
*/
1751+
public static function confidenceInterval(
1752+
array $data,
1753+
float $confidenceLevel = 0.95,
1754+
?int $round = null,
1755+
): array {
1756+
if (self::count($data) < 2) {
1757+
throw new InvalidDataInputException(
1758+
"Confidence interval requires at least 2 data points.",
1759+
);
1760+
}
1761+
1762+
if ($confidenceLevel <= 0.0 || $confidenceLevel >= 1.0) {
1763+
throw new InvalidDataInputException(
1764+
"Confidence level must be between 0 and 1 exclusive.",
1765+
);
1766+
}
1767+
1768+
$mean = self::mean($data);
1769+
$standardError = self::sem($data);
1770+
1771+
$zCritical = (new NormalDist(0.0, 1.0))->invCdf((1 + $confidenceLevel) / 2);
1772+
$margin = $zCritical * $standardError;
1773+
1774+
$lower = $mean - $margin;
1775+
$upper = $mean + $margin;
1776+
1777+
if ($round !== null) {
1778+
return [Math::round($lower, $round), Math::round($upper, $round)];
1779+
}
1780+
1781+
return [$lower, $upper];
1782+
}
17371783
}

src/Statistics.php

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,20 @@ public function sem(?int $round = null): float
273273
return Stat::sem($this->numericalArray(), $round);
274274
}
275275

276+
/**
277+
* Return the confidence interval for the mean using the normal (z) distribution.
278+
*
279+
* @param float $confidenceLevel the confidence level (e.g. 0.95 for 95%)
280+
* @param int|null $round whether to round the result
281+
* @return array{0: float, 1: float} [lower bound, upper bound]
282+
*
283+
* @see Stat::confidenceInterval()
284+
*/
285+
public function confidenceInterval(float $confidenceLevel = 0.95, ?int $round = null): array
286+
{
287+
return Stat::confidenceInterval($this->numericalArray(), $confidenceLevel, $round);
288+
}
289+
276290
/**
277291
* Return the mean absolute deviation (MAD).
278292
*

tests/StatTest.php

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,82 @@ public function test_r_squared_with_constant_y(): void
785785
Stat::rSquared([1, 2, 3, 4, 5], [3, 3, 3, 3, 3]);
786786
}
787787

788+
public function test_confidence_interval_95(): void
789+
{
790+
$data = [2, 4, 4, 4, 5, 5, 7, 9];
791+
[$lower, $upper] = Stat::confidenceInterval($data);
792+
// mean = 5.0, stdev ≈ 2.1381, sem ≈ 0.7559, z = 1.96
793+
// margin ≈ 1.4815
794+
$this->assertEqualsWithDelta(3.5185, $lower, 0.01);
795+
$this->assertEqualsWithDelta(6.4815, $upper, 0.01);
796+
}
797+
798+
public function test_confidence_interval_99(): void
799+
{
800+
$data = [2, 4, 4, 4, 5, 5, 7, 9];
801+
[$lower, $upper] = Stat::confidenceInterval($data, confidenceLevel: 0.99);
802+
// 99% CI is wider than 95% CI
803+
$this->assertLessThan(3.5, $lower);
804+
$this->assertGreaterThan(6.5, $upper);
805+
}
806+
807+
public function test_confidence_interval_with_rounding(): void
808+
{
809+
$data = [2, 4, 4, 4, 5, 5, 7, 9];
810+
[$lower, $upper] = Stat::confidenceInterval($data, round: 2);
811+
$this->assertSame(3.52, $lower);
812+
$this->assertSame(6.48, $upper);
813+
}
814+
815+
public function test_confidence_interval_narrows_with_more_data(): void
816+
{
817+
$small = [2, 4, 4, 4, 5, 5, 7, 9];
818+
$large = [2, 4, 4, 4, 5, 5, 7, 9, 3, 4, 5, 6, 4, 5, 6, 5];
819+
[$sLower, $sUpper] = Stat::confidenceInterval($small);
820+
[$lLower, $lUpper] = Stat::confidenceInterval($large);
821+
$this->assertLessThan($sUpper - $sLower, $lUpper - $lLower);
822+
}
823+
824+
public function test_confidence_interval_single_element_throws(): void
825+
{
826+
$this->expectException(InvalidDataInputException::class);
827+
Stat::confidenceInterval([42]);
828+
}
829+
830+
public function test_confidence_interval_empty_throws(): void
831+
{
832+
$this->expectException(InvalidDataInputException::class);
833+
Stat::confidenceInterval([]);
834+
}
835+
836+
public function test_confidence_interval_invalid_confidence_level_throws(): void
837+
{
838+
$data = [1, 2, 3, 4, 5];
839+
$this->expectException(InvalidDataInputException::class);
840+
Stat::confidenceInterval($data, confidenceLevel: 0.0);
841+
}
842+
843+
public function test_confidence_interval_confidence_level_one_throws(): void
844+
{
845+
$data = [1, 2, 3, 4, 5];
846+
$this->expectException(InvalidDataInputException::class);
847+
Stat::confidenceInterval($data, confidenceLevel: 1.0);
848+
}
849+
850+
public function test_confidence_interval_confidence_level_above_one_throws(): void
851+
{
852+
$data = [1, 2, 3, 4, 5];
853+
$this->expectException(InvalidDataInputException::class);
854+
Stat::confidenceInterval($data, confidenceLevel: 1.5);
855+
}
856+
857+
public function test_confidence_interval_negative_confidence_level_throws(): void
858+
{
859+
$data = [1, 2, 3, 4, 5];
860+
$this->expectException(InvalidDataInputException::class);
861+
Stat::confidenceInterval($data, confidenceLevel: -0.1);
862+
}
863+
788864
public function test_kde_normal(): void
789865
{
790866
$data = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2];

tests/StatisticTest.php

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,24 @@ public function test_sem(): void
305305
$this->assertEqualsWithDelta($expected, $s->sem(), 1e-10);
306306
}
307307

308+
public function test_confidence_interval(): void
309+
{
310+
$s = Statistics::make([2, 4, 4, 4, 5, 5, 7, 9]);
311+
[$lower, $upper] = $s->confidenceInterval();
312+
[$expectedLower, $expectedUpper] = Stat::confidenceInterval([2, 4, 4, 4, 5, 5, 7, 9]);
313+
$this->assertEqualsWithDelta($expectedLower, $lower, 1e-10);
314+
$this->assertEqualsWithDelta($expectedUpper, $upper, 1e-10);
315+
}
316+
317+
public function test_confidence_interval_with_params(): void
318+
{
319+
$s = Statistics::make([2, 4, 4, 4, 5, 5, 7, 9]);
320+
[$lower, $upper] = $s->confidenceInterval(confidenceLevel: 0.99, round: 2);
321+
[$expectedLower, $expectedUpper] = Stat::confidenceInterval([2, 4, 4, 4, 5, 5, 7, 9], confidenceLevel: 0.99, round: 2);
322+
$this->assertSame($expectedLower, $lower);
323+
$this->assertSame($expectedUpper, $upper);
324+
}
325+
308326
public function test_mean_absolute_deviation(): void
309327
{
310328
$s = Statistics::make([1, 2, 3, 4, 5]);

0 commit comments

Comments
 (0)