Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## 1.2.2 - WIP
- Adding `medianGrouped()` method for estimating the median of grouped/binned continuous data using interpolation
- Adding Spearman rank correlation via `method` parameter in `correlation()` (`method='ranked'`)


## 1.2.1 - 2026-02-20
Expand Down
16 changes: 14 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ The various mathematical statistics are listed below:
| `variance()` | variance for a sample |
| `geometricMean()` | geometric mean |
| `harmonicMean()` | harmonic mean |
| `correlation()` | the Pearson’s correlation coefficient for two inputs |
| `correlation()` | Pearson’s or Spearman’s rank correlation coefficient for two inputs |
| `covariance()` | the sample covariance of two inputs |
| `linearRegression()` | return the slope and intercept of simple linear regression parameters estimated using ordinary least squares |

Expand Down Expand Up @@ -316,9 +316,11 @@ $covariance = Stat::covariance(
// -7.5
```

#### Stat::correlation ( array $x , array $y )
#### Stat::correlation ( array $x , array $y, string $method = ‘linear’ )
Return the Pearson’s correlation coefficient for two inputs. Pearson’s correlation coefficient r takes values between -1 and +1. It measures the strength and direction of the linear relationship, where +1 means very strong, positive linear relationship, -1 very strong, negative linear relationship, and 0 no linear relationship.

Use `$method = ‘ranked’` for Spearman’s rank correlation, which measures monotonic relationships (not just linear). Spearman’s correlation is computed by applying Pearson’s formula to the ranks of the data.

```php
$correlation = Stat::correlation(
[1, 2, 3, 4, 5, 6, 7, 8, 9],
Expand All @@ -335,6 +337,16 @@ $correlation = Stat::correlation(
// -1.0
```

Spearman’s rank correlation (non-linear but monotonic relationship):
```php
$correlation = Stat::correlation(
[1, 2, 3, 4, 5],
[1, 4, 9, 16, 25],
‘ranked’
);
// 1.0
```

#### Stat::linearRegression ( array $x , array $y )
Return the slope and intercept of simple linear regression parameters estimated using ordinary least squares.
Simple linear regression describes the relationship between an independent variable *$x* and a dependent variable *$y* in terms of a linear function.
Expand Down
10 changes: 2 additions & 8 deletions TODO.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
Missing Functions

Python Function: median_grouped(data, interval)
Description: Median of grouped/binned continuous data
Status: Missing
────────────────────────────────────────

Python Function: kde(data, h, kernel)
Description: Kernel Density Estimation
Status: Missing
Expand All @@ -14,10 +11,7 @@

Missing Parameters/Variants

Feature: correlation() with method='ranked'
Python: Supports both Pearson and Spearman rank correlation
This Package: Only Pearson
────────────────────────────────────────

Feature: linear_regression() with proportional=True
Python: Supports proportional regression (intercept forced to 0)
This Package: No proportional option
Expand Down
47 changes: 46 additions & 1 deletion src/Stat.php
Original file line number Diff line number Diff line change
Expand Up @@ -614,8 +614,14 @@ public static function covariance(array $x, array $y): false|float
* or if the length of arrays are < 2, or if the 2 input arrays has not numeric elements,
* or if the elements of the array are constants
*/
public static function correlation(array $x, array $y): false|float
public static function correlation(array $x, array $y, string $method = 'linear'): false|float
{
if ($method !== 'linear' && $method !== 'ranked') {
throw new InvalidDataInputException(
"Correlation method must be 'linear' or 'ranked'.",
);
}

$countX = count($x);
$countY = count($y);
if ($countX !== $countY) {
Expand All @@ -628,6 +634,12 @@ public static function correlation(array $x, array $y): false|float
"Correlation requires at least two data points.",
);
}

if ($method === 'ranked') {
$x = self::ranks($x);
$y = self::ranks($y);
}

$meanX = self::mean($x);
$meanY = self::mean($y);
$a = 0;
Expand All @@ -651,6 +663,39 @@ public static function correlation(array $x, array $y): false|float
return $a / $b;
}

/**
* Assign average ranks to data values (handles ties by averaging).
*
* @param array<int|float> $data
* @return array<float>
*/
private static function ranks(array $data): array
{
$n = count($data);
$indexed = [];
for ($i = 0; $i < $n; $i++) {
$indexed[] = [$data[$i], $i];
}

usort($indexed, fn(array $a, array $b): int => $a[0] <=> $b[0]);

$ranks = array_fill(0, $n, 0.0);
$i = 0;
while ($i < $n) {
$j = $i;
while ($j < $n && $indexed[$j][0] === $indexed[$i][0]) {
$j++;
}
$averageRank = ($i + 1 + $j) / 2.0;
for ($k = $i; $k < $j; $k++) {
$ranks[$indexed[$k][1]] = $averageRank;
}
$i = $j;
}

return $ranks;
}

/**
* @param array<int|float> $x
* @param array<int|float> $y
Expand Down
76 changes: 76 additions & 0 deletions tests/StatTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,82 @@ public function test_calculates_correlation(): void
$this->assertEquals(0.71, $correlation);
}

public function test_calculates_spearman_correlation(): void
{
// Monotonic relationship: ranks are perfectly correlated
$correlation = Stat::correlation(
[1, 2, 3, 4, 5],
[2, 4, 6, 8, 10],
'ranked',
);
$this->assertIsFloat($correlation);
$this->assertEqualsWithDelta(1.0, $correlation, 1e-9);

// Inverse monotonic relationship
$correlation = Stat::correlation(
[1, 2, 3, 4, 5],
[10, 8, 6, 4, 2],
'ranked',
);
$this->assertIsFloat($correlation);
$this->assertEqualsWithDelta(-1.0, $correlation, 1e-9);

// Non-linear but monotonic: Spearman = 1, Pearson < 1
$correlation = Stat::correlation(
[1, 2, 3, 4, 5],
[1, 4, 9, 16, 25],
'ranked',
);
$this->assertIsFloat($correlation);
$this->assertEqualsWithDelta(1.0, $correlation, 1e-9);
}

public function test_calculates_spearman_correlation_planets(): void
{
// Python docs example: planetary orbital periods and distances from the sun
$orbitalPeriod = [88, 225, 365, 687, 4331, 10_756, 30_687, 60_190];
$distFromSun = [58, 108, 150, 228, 778, 1_400, 2_900, 4_500];

// Perfect monotonic relationship → Spearman = 1.0
$correlation = Stat::correlation($orbitalPeriod, $distFromSun, 'ranked');
$this->assertEqualsWithDelta(1.0, $correlation, 1e-9);

// Linear (Pearson) correlation is imperfect
$correlation = Stat::correlation($orbitalPeriod, $distFromSun);
$this->assertIsFloat($correlation);
$this->assertEquals(0.9882, round($correlation, 4));

// Kepler's third law: linear correlation between
// the square of the period and the cube of the distance
$periodSquared = array_map(fn(int $p): int => $p * $p, $orbitalPeriod);
$distCubed = array_map(fn(int $d): int => $d * $d * $d, $distFromSun);
$correlation = Stat::correlation($periodSquared, $distCubed);
$this->assertIsFloat($correlation);
$this->assertEquals(1.0, round($correlation, 4));
}

public function test_calculates_spearman_correlation_with_ties(): void
{
// Ties should receive average ranks
$correlation = Stat::correlation(
[1, 2, 2, 3],
[10, 20, 20, 30],
'ranked',
);
$this->assertIsFloat($correlation);
$this->assertEqualsWithDelta(1.0, $correlation, 1e-9);
}

public function test_calculates_correlation_invalid_method(): void
{
$this->expectException(InvalidDataInputException::class);
Stat::correlation(
[1, 2, 3],
[4, 5, 6],
'invalid',
);
}

public function test_calculates_correlation_wrong_usage_different_lengths(): void
{
$this->expectException(InvalidDataInputException::class);
Expand Down