Skip to content

Commit 27c948c

Browse files
committed
Adding rSquared() method for R² (coefficient of determination)
1 parent 302dbc4 commit 27c948c

5 files changed

Lines changed: 112 additions & 1 deletion

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Changelog
22

3+
## 1.3.1 - WIP
4+
- Adding `rSquared()` method for R² (coefficient of determination) — proportion of variance explained by linear regression
5+
36
## 1.3.0 - 2026-02-22
47
- Adding `StreamingStat` class (experimental) for streaming/online computation of mean, variance, stdev, skewness, kurtosis, sum, min, and max with O(1) memory
58
- Adding `percentile()` method for computing the value at any percentile (0–100) with linear interpolation
@@ -12,6 +15,7 @@
1215
- Adding `zscores()` method for computing z-scores of each value in a dataset
1316
- Adding `outliers()` method for z-score based outlier detection with configurable threshold
1417
- Adding `iqrOutliers()` method for IQR-based outlier detection (box plot whiskers), robust for skewed data
18+
- Adding `rSquared()` method for R² (coefficient of determination) — proportion of variance explained by linear regression
1519

1620
## 1.2.5 - 2026-02-22
1721
- Adding `kurtosis()` method for excess kurtosis

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ The various mathematical statistics are listed below:
9898
| `correlation()` | Pearson’s or Spearman’s rank correlation coefficient for two inputs |
9999
| `covariance()` | the sample covariance of two inputs |
100100
| `linearRegression()` | return the slope and intercept of simple linear regression parameters estimated using ordinary least squares (supports `proportional: true` for regression through the origin) |
101+
| `rSquared()` | coefficient of determination (R²) — proportion of variance explained by linear regression |
101102
| `kde()` | kernel density estimation — returns a closure that estimates the probability density (or CDF) at any point |
102103
| `kdeRandom()` | random sampling from a kernel density estimate — returns a closure that generates random floats from the KDE distribution |
103104

TODO.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
- Kendall tau correlation - another rank-based correlation
1010
- Multiple/polynomial regression
11-
- R-squared (coefficient of determination)
1211

1312
### Hypothesis Testing
1413

src/Stat.php

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1680,4 +1680,58 @@ public static function linearRegression(
16801680

16811681
return [$slope, $intercept];
16821682
}
1683+
1684+
/**
1685+
* Calculate the coefficient of determination (R²).
1686+
*
1687+
* R² measures the proportion of variance in y explained by the
1688+
* linear regression on x. Returns a value between 0 and 1.
1689+
*
1690+
* @param array<int|float> $x
1691+
* @param array<int|float> $y
1692+
* @throws InvalidDataInputException
1693+
*/
1694+
public static function rSquared(array $x, array $y, bool $proportional = false, ?int $round = null): float
1695+
{
1696+
$countX = count($x);
1697+
$countY = count($y);
1698+
1699+
if ($countX !== $countY) {
1700+
throw new InvalidDataInputException(
1701+
"R-squared requires x and y arrays of the same length.",
1702+
);
1703+
}
1704+
1705+
if ($countX < 2) {
1706+
throw new InvalidDataInputException(
1707+
"R-squared requires at least 2 data points.",
1708+
);
1709+
}
1710+
1711+
[$slope, $intercept] = self::linearRegression($x, $y, $proportional);
1712+
$meanY = self::mean($y);
1713+
1714+
$ssRes = 0.0;
1715+
$ssTot = 0.0;
1716+
1717+
foreach ($y as $key => $yi) {
1718+
$predicted = $slope * $x[$key] + $intercept;
1719+
$ssRes += ($yi - $predicted) ** 2;
1720+
$ssTot += ($yi - $meanY) ** 2;
1721+
}
1722+
1723+
if ($ssTot == 0) {
1724+
throw new InvalidDataInputException(
1725+
"R-squared is undefined when y values are constant (zero variance).",
1726+
);
1727+
}
1728+
1729+
$rSquared = 1 - $ssRes / $ssTot;
1730+
1731+
if ($round !== null) {
1732+
return round($rSquared, $round);
1733+
}
1734+
1735+
return $rSquared;
1736+
}
16831737
}

tests/StatTest.php

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,59 @@ public function test_proportional_linear_regression_with_all_zeros_x(): void
732732
Stat::linearRegression([0, 0, 0, 0, 0], [1, 2, 3, 4, 5], proportional: true);
733733
}
734734

735+
public function test_r_squared_perfect_fit(): void
736+
{
737+
$r2 = Stat::rSquared([1, 2, 3, 4, 5], [2, 4, 6, 8, 10]);
738+
$this->assertEqualsWithDelta(1.0, $r2, 1e-10);
739+
}
740+
741+
public function test_r_squared_real_data(): void
742+
{
743+
$r2 = Stat::rSquared(
744+
[1971, 1975, 1979, 1982, 1983],
745+
[1, 2, 3, 4, 5],
746+
);
747+
$this->assertEqualsWithDelta(0.961, round($r2, 4), 1e-4);
748+
}
749+
750+
public function test_r_squared_with_rounding(): void
751+
{
752+
$r2 = Stat::rSquared(
753+
[1971, 1975, 1979, 1982, 1983],
754+
[1, 2, 3, 4, 5],
755+
round: 2,
756+
);
757+
$this->assertSame(0.96, $r2);
758+
}
759+
760+
public function test_r_squared_proportional(): void
761+
{
762+
$r2 = Stat::rSquared(
763+
[1, 2, 3, 4, 5],
764+
[2, 4, 6, 8, 10],
765+
proportional: true,
766+
);
767+
$this->assertEqualsWithDelta(1.0, $r2, 1e-10);
768+
}
769+
770+
public function test_r_squared_with_different_lengths(): void
771+
{
772+
$this->expectException(InvalidDataInputException::class);
773+
Stat::rSquared([1, 2, 3], [1, 2]);
774+
}
775+
776+
public function test_r_squared_with_single_element(): void
777+
{
778+
$this->expectException(InvalidDataInputException::class);
779+
Stat::rSquared([1], [2]);
780+
}
781+
782+
public function test_r_squared_with_constant_y(): void
783+
{
784+
$this->expectException(InvalidDataInputException::class);
785+
Stat::rSquared([1, 2, 3, 4, 5], [3, 3, 3, 3, 3]);
786+
}
787+
735788
public function test_kde_normal(): void
736789
{
737790
$data = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2];

0 commit comments

Comments
 (0)