Skip to content

Commit 4826e13

Browse files
committed
Adding Spearman rank correlation
1 parent 6afab90 commit 4826e13

5 files changed

Lines changed: 137 additions & 11 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
## 1.2.2 - WIP
44
- Adding `medianGrouped()` method for estimating the median of grouped/binned continuous data using interpolation
5+
- Adding Spearman rank correlation via `method` parameter in `correlation()` (`method='ranked'`)
56

67

78
## 1.2.1 - 2026-02-20

README.md

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ The various mathematical statistics are listed below:
7979
| `variance()` | variance for a sample |
8080
| `geometricMean()` | geometric mean |
8181
| `harmonicMean()` | harmonic mean |
82-
| `correlation()` | the Pearson’s correlation coefficient for two inputs |
82+
| `correlation()` | Pearson’s or Spearman’s rank correlation coefficient for two inputs |
8383
| `covariance()` | the sample covariance of two inputs |
8484
| `linearRegression()` | return the slope and intercept of simple linear regression parameters estimated using ordinary least squares |
8585

@@ -316,9 +316,11 @@ $covariance = Stat::covariance(
316316
// -7.5
317317
```
318318

319-
#### Stat::correlation ( array $x , array $y )
319+
#### Stat::correlation ( array $x , array $y, string $method = ‘linear’ )
320320
Return the Pearson’s correlation coefficient for two inputs. Pearson’s correlation coefficient r takes values between -1 and +1. It measures the strength and direction of the linear relationship, where +1 means very strong, positive linear relationship, -1 very strong, negative linear relationship, and 0 no linear relationship.
321321

322+
Use `$method = ‘ranked’` for Spearman’s rank correlation, which measures monotonic relationships (not just linear). Spearman’s correlation is computed by applying Pearson’s formula to the ranks of the data.
323+
322324
```php
323325
$correlation = Stat::correlation(
324326
[1, 2, 3, 4, 5, 6, 7, 8, 9],
@@ -335,6 +337,16 @@ $correlation = Stat::correlation(
335337
// -1.0
336338
```
337339

340+
Spearman’s rank correlation (non-linear but monotonic relationship):
341+
```php
342+
$correlation = Stat::correlation(
343+
[1, 2, 3, 4, 5],
344+
[1, 4, 9, 16, 25],
345+
‘ranked’
346+
);
347+
// 1.0
348+
```
349+
338350
#### Stat::linearRegression ( array $x , array $y )
339351
Return the slope and intercept of simple linear regression parameters estimated using ordinary least squares.
340352
Simple linear regression describes the relationship between an independent variable *$x* and a dependent variable *$y* in terms of a linear function.

TODO.md

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
Missing Functions
22

3-
Python Function: median_grouped(data, interval)
4-
Description: Median of grouped/binned continuous data
5-
Status: Missing
6-
────────────────────────────────────────
3+
74
Python Function: kde(data, h, kernel)
85
Description: Kernel Density Estimation
96
Status: Missing
@@ -14,10 +11,7 @@
1411

1512
Missing Parameters/Variants
1613

17-
Feature: correlation() with method='ranked'
18-
Python: Supports both Pearson and Spearman rank correlation
19-
This Package: Only Pearson
20-
────────────────────────────────────────
14+
2115
Feature: linear_regression() with proportional=True
2216
Python: Supports proportional regression (intercept forced to 0)
2317
This Package: No proportional option

src/Stat.php

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -614,8 +614,14 @@ public static function covariance(array $x, array $y): false|float
614614
* or if the length of arrays are < 2, or if the 2 input arrays has not numeric elements,
615615
* or if the elements of the array are constants
616616
*/
617-
public static function correlation(array $x, array $y): false|float
617+
public static function correlation(array $x, array $y, string $method = 'linear'): false|float
618618
{
619+
if ($method !== 'linear' && $method !== 'ranked') {
620+
throw new InvalidDataInputException(
621+
"Correlation method must be 'linear' or 'ranked'.",
622+
);
623+
}
624+
619625
$countX = count($x);
620626
$countY = count($y);
621627
if ($countX !== $countY) {
@@ -628,6 +634,12 @@ public static function correlation(array $x, array $y): false|float
628634
"Correlation requires at least two data points.",
629635
);
630636
}
637+
638+
if ($method === 'ranked') {
639+
$x = self::ranks($x);
640+
$y = self::ranks($y);
641+
}
642+
631643
$meanX = self::mean($x);
632644
$meanY = self::mean($y);
633645
$a = 0;
@@ -651,6 +663,39 @@ public static function correlation(array $x, array $y): false|float
651663
return $a / $b;
652664
}
653665

666+
/**
667+
* Assign average ranks to data values (handles ties by averaging).
668+
*
669+
* @param array<int|float> $data
670+
* @return array<float>
671+
*/
672+
private static function ranks(array $data): array
673+
{
674+
$n = count($data);
675+
$indexed = [];
676+
for ($i = 0; $i < $n; $i++) {
677+
$indexed[] = [$data[$i], $i];
678+
}
679+
680+
usort($indexed, fn ($a, $b) => $a[0] <=> $b[0]);
681+
682+
$ranks = array_fill(0, $n, 0.0);
683+
$i = 0;
684+
while ($i < $n) {
685+
$j = $i;
686+
while ($j < $n && $indexed[$j][0] === $indexed[$i][0]) {
687+
$j++;
688+
}
689+
$averageRank = ($i + 1 + $j) / 2.0;
690+
for ($k = $i; $k < $j; $k++) {
691+
$ranks[$indexed[$k][1]] = $averageRank;
692+
}
693+
$i = $j;
694+
}
695+
696+
return $ranks;
697+
}
698+
654699
/**
655700
* @param array<int|float> $x
656701
* @param array<int|float> $y

tests/StatTest.php

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,80 @@ public function test_calculates_correlation(): void
361361
$this->assertEquals(0.71, $correlation);
362362
}
363363

364+
public function test_calculates_spearman_correlation(): void
365+
{
366+
// Monotonic relationship: ranks are perfectly correlated
367+
$correlation = Stat::correlation(
368+
[1, 2, 3, 4, 5],
369+
[2, 4, 6, 8, 10],
370+
'ranked',
371+
);
372+
$this->assertIsFloat($correlation);
373+
$this->assertEqualsWithDelta(1.0, $correlation, 1e-9);
374+
375+
// Inverse monotonic relationship
376+
$correlation = Stat::correlation(
377+
[1, 2, 3, 4, 5],
378+
[10, 8, 6, 4, 2],
379+
'ranked',
380+
);
381+
$this->assertIsFloat($correlation);
382+
$this->assertEqualsWithDelta(-1.0, $correlation, 1e-9);
383+
384+
// Non-linear but monotonic: Spearman = 1, Pearson < 1
385+
$correlation = Stat::correlation(
386+
[1, 2, 3, 4, 5],
387+
[1, 4, 9, 16, 25],
388+
'ranked',
389+
);
390+
$this->assertIsFloat($correlation);
391+
$this->assertEqualsWithDelta(1.0, $correlation, 1e-9);
392+
}
393+
394+
public function test_calculates_spearman_correlation_planets(): void
395+
{
396+
// Python docs example: planetary orbital periods and distances from the sun
397+
$orbitalPeriod = [88, 225, 365, 687, 4331, 10_756, 30_687, 60_190];
398+
$distFromSun = [58, 108, 150, 228, 778, 1_400, 2_900, 4_500];
399+
400+
// Perfect monotonic relationship → Spearman = 1.0
401+
$correlation = Stat::correlation($orbitalPeriod, $distFromSun, 'ranked');
402+
$this->assertEqualsWithDelta(1.0, $correlation, 1e-9);
403+
404+
// Linear (Pearson) correlation is imperfect
405+
$correlation = Stat::correlation($orbitalPeriod, $distFromSun);
406+
$this->assertEquals(0.9882, round($correlation, 4));
407+
408+
// Kepler's third law: linear correlation between
409+
// the square of the period and the cube of the distance
410+
$periodSquared = array_map(fn ($p) => $p * $p, $orbitalPeriod);
411+
$distCubed = array_map(fn ($d) => $d * $d * $d, $distFromSun);
412+
$correlation = Stat::correlation($periodSquared, $distCubed);
413+
$this->assertEquals(1.0, round($correlation, 4));
414+
}
415+
416+
public function test_calculates_spearman_correlation_with_ties(): void
417+
{
418+
// Ties should receive average ranks
419+
$correlation = Stat::correlation(
420+
[1, 2, 2, 3],
421+
[10, 20, 20, 30],
422+
'ranked',
423+
);
424+
$this->assertIsFloat($correlation);
425+
$this->assertEqualsWithDelta(1.0, $correlation, 1e-9);
426+
}
427+
428+
public function test_calculates_correlation_invalid_method(): void
429+
{
430+
$this->expectException(InvalidDataInputException::class);
431+
Stat::correlation(
432+
[1, 2, 3],
433+
[4, 5, 6],
434+
'invalid',
435+
);
436+
}
437+
364438
public function test_calculates_correlation_wrong_usage_different_lengths(): void
365439
{
366440
$this->expectException(InvalidDataInputException::class);

0 commit comments

Comments
 (0)