|
8 | 8 | Repeated-Average Rank Correlation Λ (Lambda) |
9 | 9 | |
10 | 10 | Introduction |
11 | | - ---------- |
| 11 | + ------------ |
12 | 12 | The Repeated-Average Rank correlation Λ (Lambda) introduced here is a new family |
13 | 13 | of robust, symmetric, and asymmetric measures of monotone association based on |
14 | 14 | pairwise slopes in rank space. Compared with traditional rank-based measures |
|
63 | 63 | Λ_s extends this same geometric-mean symmetrization to robust repeated average |
64 | 64 | rank-slope correlations. |
65 | 65 |
|
| 66 | + Functions |
| 67 | + --------- |
| 68 | + - lambda_corr(x, y, ...): main user-facing wrapper with validation, finite filtering, |
| 69 | + and warnings. |
| 70 | + - lambda_corr_nb(x, y, n, ...): Numba-compatible core for use inside @njit code. |
| 71 | + Assumes x and y are already prevalidated (same length n≥3, finite, non-constant). |
| 72 | + |
66 | 73 | Parameters |
67 | 74 | ---------- |
68 | 75 | x, y : 1-D array_like |
69 | 76 | Two input samples of equal length (n ≥ 3). |
70 | | - |
| 77 | + n : integer (only for lambda_corr_nb) |
| 78 | + Size of x and y. |
71 | 79 | pvals : {True, False}, optional |
72 | 80 | Whether to compute p-values. Default: True. |
73 | 81 | If False, all returned p-values are NaN and no permutation/asymptotic |
|
88 | 96 | - "perm": Use Monte Carlo permutation test. Valid for any tie structure. |
89 | 97 | Note: This is approximate unless all permutations are |
90 | 98 | enumerated, which is only feasible for very small n. |
91 | | - The RNG is re-seeded from OS entropy for every call so |
92 | | - permutation p-values vary across runs by default. |
| 99 | + The RNG is re-seeded for every call so permutation p-values vary across |
| 100 | + runs by default. |
93 | 101 | |
94 | 102 | Note: |
95 | 103 | The permutation test samples from the *conditional* null distribution, generated |
|
145 | 153 | adversarial noise among rank methods. |
146 | 154 | - Less biased than Spearman/Kendall relative to Pearson. |
147 | 155 | - Similar or better accuracy than Spearman/Kendall for stronger associations. |
148 | | - - Asymptotic efficiency: ~81% vs. ~91% for Spearman and Kendall. |
| 156 | + - Asymptotic efficiency for bivariate normal: ~81% vs. ~91% for Spearman and Kendall. |
149 | 157 | - Null distribution: centered, symmetric, slightly heavier tails than Spearman. |
150 | 158 | - Symmetric: Λ_s(x,y) == Λ_s(y,x). |
151 | 159 | - Invariant to strictly monotone transforms. |
@@ -550,6 +558,41 @@ def kurt_model(n, A, B): |
550 | 558 | else: # "less" |
551 | 559 | return P_z |
552 | 560 |
|
| 561 | +#Numbda compatible entry |
| 562 | +@njit(cache=True, nogil=True, fastmath=True) |
| 563 | +def lambda_corr_nb(x, y, n, pvals=True, ptype="default", p_tol=1e-4, n_perm=10000, alt="two-sided"): |
| 564 | + |
| 565 | + # assume: x,y already arrays of same length, n>=3, finite, and non-constant |
| 566 | + # Standardized ranks with averaged ties |
| 567 | + rx = _std_ranks(x, n) |
| 568 | + ry = _std_ranks(y, n) |
| 569 | + # Get Lambda correlations - symmetric and asymmetric |
| 570 | + Lambda_s, Lambda_yx, Lambda_xy = _lambda_stats(rx, ry, n) |
| 571 | + |
| 572 | + if pvals: |
| 573 | + if (ptype=="perm") or ((ptype=="default") and (n < 25)): |
| 574 | + p_s, p_yx, p_xy = _lambda_pvals(rx, ry, n, Lambda_s, Lambda_yx, Lambda_xy, |
| 575 | + p_tol=p_tol, n_perm=n_perm, alt=alt) |
| 576 | + elif (ptype=="asymp") or ((ptype=="default") and (n >= 25)): |
| 577 | + p_s = _lambda_p_asymptotic(Lambda_s, n, alt=alt) |
| 578 | + #The null distribution for the asymmetric measures was not calculated seperately |
| 579 | + #but these two Gaussian-ish random variables are not independent; |
| 580 | + #they are very strongly correlated and nearly identically distributed under the null. |
| 581 | + #Therefore, the geometric average should have approximately the same distribution. |
| 582 | + #MC testing confirms this. |
| 583 | + p_yx = _lambda_p_asymptotic(Lambda_yx, n, alt=alt) |
| 584 | + p_xy = _lambda_p_asymptotic(Lambda_xy, n, alt=alt) |
| 585 | + else: |
| 586 | + p_s = p_xy = p_yx = np.nan |
| 587 | + else: |
| 588 | + p_s = p_xy = p_yx = np.nan |
| 589 | + |
| 590 | + # Asymmetry index with safe denominator |
| 591 | + denom = abs(Lambda_yx) + abs(Lambda_xy) |
| 592 | + Lambda_a = 0.0 if denom == 0.0 else float(abs(Lambda_yx - Lambda_xy) / denom) |
| 593 | + |
| 594 | + return Lambda_s, p_s, Lambda_yx, p_yx, Lambda_xy, p_xy, Lambda_a |
| 595 | + |
553 | 596 | #@njit(cache=True, nogil=True) #njit not compatible with warnings |
554 | 597 | def lambda_corr(x, y, pvals=True, ptype="default", p_tol=1e-4, n_perm=10000, alt="two-sided"): |
555 | 598 |
|
@@ -629,32 +672,8 @@ def lambda_corr(x, y, pvals=True, ptype="default", p_tol=1e-4, n_perm=10000, alt |
629 | 672 | UserWarning |
630 | 673 | ) |
631 | 674 |
|
632 | | - # Standardized ranks with averaged ties |
633 | | - rx = _std_ranks(x, n) |
634 | | - ry = _std_ranks(y, n) |
635 | | - # Get Lambda correlations - symmetric and asymmetric |
636 | | - Lambda_s, Lambda_yx, Lambda_xy = _lambda_stats(rx, ry, n) |
637 | | - |
638 | | - if pvals: |
639 | | - if (ptype=="default" and (n < 25)) or ptype=="perm": |
640 | | - p_s, p_yx, p_xy = _lambda_pvals(rx, ry, n, Lambda_s, Lambda_yx, Lambda_xy, |
641 | | - p_tol=p_tol, n_perm=n_perm, alt=alt) |
642 | | - elif (ptype=="default" and n >= 25) or ptype=="asymp": |
643 | | - p_s = _lambda_p_asymptotic(Lambda_s, n, alt=alt) |
644 | | - #The null distribution for the asymmetric measures was not calculated seperately |
645 | | - #but these two Gaussian-ish random variables are not independent; |
646 | | - #they are very strongly correlated and nearly identically distributed under the null. |
647 | | - #Therefore, the geometric average should have approximately the same distribution. |
648 | | - #MC testing confirms this. |
649 | | - p_yx = _lambda_p_asymptotic(Lambda_yx, n, alt=alt) |
650 | | - p_xy = _lambda_p_asymptotic(Lambda_xy, n, alt=alt) |
651 | | - else: |
652 | | - p_s = p_xy = p_yx = np.nan |
653 | | - else: |
654 | | - p_s = p_xy = p_yx = np.nan |
655 | | - |
656 | | - # Asymmetry index with safe denominator |
657 | | - denom = abs(Lambda_yx) + abs(Lambda_xy) |
658 | | - Lambda_a = 0.0 if denom == 0.0 else float(abs(Lambda_yx - Lambda_xy) / denom) |
| 675 | + Lambda_s, p_s, Lambda_yx, p_yx, Lambda_xy, p_xy, Lambda_a = \ |
| 676 | + lambda_corr_nb(x, y, n, pvals=pvals, ptype=ptype, p_tol=p_tol, n_perm=n_perm, |
| 677 | + alt=alt) |
659 | 678 |
|
660 | 679 | return Lambda_s, p_s, Lambda_yx, p_yx, Lambda_xy, p_xy, Lambda_a |
0 commit comments