@@ -28,6 +28,109 @@ var sqrt = require( '@stdlib/math/base/special/sqrt' );
2828/**
2929* Computes the sample Pearson product-moment correlation coefficient of two double-precision floating-point strided arrays using Welford's algorithm.
3030*
31+ * ## Method
32+ *
33+ * - We begin by defining the co-moment \\(C_n\\)
34+ *
35+ * ```tex
36+ * C_n = \sum_{i=1}^{n} ( x_i - \bar{x}_n ) ( y_i - \bar{y}_n )
37+ * ```
38+ *
39+ * where \\(\bar{x}_n\\) and \\(\bar{y}_n\\) are the sample means for \\(x\\) and \\(y\\), respectively.
40+ *
41+ * - Based on Welford's method, we know the update formulas for the sample means are given by
42+ *
43+ * ```tex
44+ * \bar{x}_n = \bar{x}_{n-1} + \frac{x_n - \bar{x}_{n-1}}{n}
45+ * ```
46+ *
47+ * and
48+ *
49+ * ```tex
50+ * \bar{y}_n = \bar{y}_{n-1} + \frac{y_n - \bar{y}_{n-1}}{n}
51+ * ```
52+ *
53+ * - Substituting into the equation for \\(C_n\\) and rearranging terms
54+ *
55+ * ```tex
56+ * C_n = C_{n-1} + (x_n - \bar{x}_n) (y_n - \bar{y}_{n-1})
57+ * ```
58+ *
59+ * where the apparent asymmetry arises from
60+ *
61+ * ```tex
62+ * x_n - \bar{x}_n = \frac{n-1}{n} (x_n - \bar{x}_{n-1})
63+ * ```
64+ *
65+ * and, hence, the update term can be equivalently expressed
66+ *
67+ * ```tex
68+ * \frac{n-1}{n} (x_n - \bar{x}_{n-1}) (y_n - \bar{y}_{n-1})
69+ * ```
70+ *
71+ * - The covariance can be defined
72+ *
73+ * ```tex
74+ * \begin{align*}
75+ * \operatorname{cov}_n(x,y) &= \frac{C_n}{n} \\
76+ * &= \frac{C_{n-1} + (x_n - \bar{x}_n) (y_n - \bar{y}_{n-1})}{n} \\
77+ * &= \frac{(n-1)\operatorname{cov}_{n-1}(x,y) + (x_n - \bar{x}_n) (y_n - \bar{y}_{n-1})}{n}
78+ * \end{align*}
79+ * ```
80+ *
81+ * - Applying Bessel's correction, we arrive at an update formula for calculating an unbiased sample covariance
82+ *
83+ * ```tex
84+ * \begin{align*}
85+ * \operatorname{cov}_n(x,y) &= \frac{n}{n-1}\cdot\frac{(n-1)\operatorname{cov}_{n-1}(x,y) + (x_n - \bar{x}_n) (y_n - \bar{y}_{n-1})}{n} \\
86+ * &= \operatorname{cov}_{n-1}(x,y) + \frac{(x_n - \bar{x}_n) (y_n - \bar{y}_{n-1})}{n-1} \\
87+ * &= \frac{C_{n-1} + (x_n - \bar{x}_n) (y_n - \bar{y}_{n-1})}{n-1}
88+ * &= \frac{C_{n-1} + (x_n - \bar{x}_{n-1}) (y_n - \bar{y}_n)}{n-1}
89+ * \end{align*}
90+ * ```
91+ *
92+ * - To calculate the corrected sample standard deviation, we can use Welford's method, which can be derived as follows. We can express the variance as
93+ *
94+ * ```tex
95+ * \begin{align*}
96+ * S_n &= n \sigma_n^2 \\
97+ * &= \sum_{i=1}^{n} (x_i - \mu_n)^2 \\
98+ * &= \biggl(\sum_{i=1}^{n} x_i^2 \biggr) - n\mu_n^2
99+ * \end{align*}
100+ * ```
101+ *
102+ * Accordingly,
103+ *
104+ * ```tex
105+ * \begin{align*}
106+ * S_n - S_{n-1} &= \sum_{i=1}^{n} x_i^2 - n\mu_n^2 - \sum_{i=1}^{n-1} x_i^2 + (n-1)\mu_{n-1}^2 \\
107+ * &= x_n^2 - n\mu_n^2 + (n-1)\mu_{n-1}^2 \\
108+ * &= x_n^2 - \mu_{n-1}^2 + n(\mu_{n-1}^2 - \mu_n^2) \\
109+ * &= x_n^2 - \mu_{n-1}^2 + n(\mu_{n-1} - \mu_n)(\mu_{n-1} + \mu_n) \\
110+ * &= x_n^2 - \mu_{n-1}^2 + (\mu_{n-1} - x_n)(\mu_{n-1} + \mu_n) \\
111+ * &= x_n^2 - \mu_{n-1}^2 + \mu_{n-1}^2 - x_n\mu_n - x_n\mu_{n-1} + \mu_n\mu_{n-1} \\
112+ * &= x_n^2 - x_n\mu_n - x_n\mu_{n-1} + \mu_n\mu_{n-1} \\
113+ * &= (x_n - \mu_{n-1})(x_n - \mu_n) \\
114+ * &= S_{n-1} + (x_n - \mu_{n-1})(x_n - \mu_n)
115+ * \end{align*}
116+ * ```
117+ *
118+ * where we use the identity
119+ *
120+ * ```tex
121+ * x_n - \mu_{n-1} = n (\mu_n - \mu_{n-1})
122+ * ```
123+ *
124+ * - To compute the corrected sample standard deviation, we apply Bessel's correction and take the square root.
125+ *
126+ * - The sample Pearson product-moment correlation coefficient can thus be calculated as
127+ *
128+ * ```tex
129+ * r = \frac{\operatorname{cov}_n(x,y)}{\sigma_x \sigma_y}
130+ * ```
131+ *
132+ * where \\(\sigma_x\\) and \\(\sigma_y\\) are the corrected sample standard deviations for \\(x\\) and \\(y\\), respectively.
133+ *
31134* @param {PositiveInteger } N - number of indexed elements
32135* @param {Float64Array } x - first input array
33136* @param {integer } strideX - stride length of `x`
0 commit comments