feat: add computeWithYates and computeWithWilliams to ChiSquareTest

github-actions[bot] · Copilot · web-flow · commit 0566fd4c5a82 · 2026-04-04T12:10:18.000Z
Add two new continuity-correction overloads for the chi-square goodness-of-fit test: - ChiSquareTest.computeWithYates: applies Yates's continuity correction (|O - E| - 0.5) before squaring each term; recommended for df=1. - ChiSquareTest.computeWithWilliams: divides the raw chi-square by Williams's q = 1 + (k^2-1)/(6nk), which provides a better approximation for small samples across any number of categories. Both methods include XML-doc with references. Four new tests cover each correction variant. Part of #87. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
diff --git a/src/FSharp.Stats/Testing/ChiSquareTest.fs b/src/FSharp.Stats/Testing/ChiSquareTest.fs
@@ -37,20 +37,62 @@ open FSharp.Stats
 type ChiSquareTest =
 
     
-    /// Computes the Chi-Square test
-    /// n data points -&gt; degrees of freedom = n - 1 
+    /// <summary>
+    ///   Computes the Chi-Square goodness-of-fit test.
+    ///   n data points -> degrees of freedom = n - 1
+    /// </summary>
     static member compute (degreesOfFreedom:int) (expected:seq<float>) (observed:seq<float>) =
-        //let chechParams =
-        //    if expected |> Seq.exists (fun x -> abs x < 5.) then printfn "Warning: A value less than 5 is present in expected values. Results may not be correct!"
-        //    let sumEx = Seq.sum expected
-        //    let sumOb = Seq.sum observed
-        //    if Math.Round(sumEx,1) <> Math.Round(sumOb,1) then printfn "Warning: The sum of observed values does not match the sum of expected values. SumEx: %.3f SumOb: %.3f" sumEx sumOb
         let chi2 =
             Seq.zip observed expected
             |> Seq.fold (fun acc (obs,exp) -> 
                 let d = obs - exp
                 acc + (d * d) / exp) 0.0
-        
+        TestStatistics.createChiSquare chi2 (float degreesOfFreedom)
+
+    /// <summary>
+    ///   Computes the Chi-Square goodness-of-fit test with Yates's continuity correction.
+    /// </summary>
+    /// <remarks>
+    ///   Yates's correction subtracts 0.5 from each |observed - expected| term before squaring.
+    ///   It is recommended when the degrees of freedom equal 1 (two categories) and expected
+    ///   cell counts are small.  For df > 1 or large samples the uncorrected <c>compute</c> is
+    ///   preferable.
+    ///
+    ///   Reference: Yates, F. (1934). Contingency tables involving small numbers and the chi-squared
+    ///   test. Supplement to the Journal of the Royal Statistical Society, 1(2), 217-235.
+    /// </remarks>
+    static member computeWithYates (degreesOfFreedom:int) (expected:seq<float>) (observed:seq<float>) =
+        let chi2 =
+            Seq.zip observed expected
+            |> Seq.fold (fun acc (obs,exp) ->
+                let diff = abs (obs - exp) - 0.5
+                acc + (diff * diff) / exp) 0.0
+        TestStatistics.createChiSquare chi2 (float degreesOfFreedom)
+
+    /// <summary>
+    ///   Computes the Chi-Square goodness-of-fit test with Williams's correction.
+    /// </summary>
+    /// <remarks>
+    ///   Williams's correction divides the chi-square statistic by
+    ///   q = 1 + (k^2 - 1) / (6 * n * k), where k is the number of categories and
+    ///   n is the total observed count.  This provides a better approximation to the
+    ///   chi-squared distribution when sample sizes are small.
+    ///
+    ///   Reference: Williams, D. A. (1976). Improved likelihood ratio tests for complete
+    ///   contingency tables. Biometrika, 63(1), 33-37.
+    /// </remarks>
+    static member computeWithWilliams (degreesOfFreedom:int) (expected:seq<float>) (observed:seq<float>) =
+        let observedArr = Seq.toArray observed
+        let expectedArr = Seq.toArray expected
+        let k = float observedArr.Length
+        let n = Array.sum observedArr
+        let q = 1.0 + (k * k - 1.0) / (6.0 * n * k)
+        let chi2Raw =
+            Array.zip observedArr expectedArr
+            |> Array.fold (fun acc (obs,exp) ->
+                let d = obs - exp
+                acc + (d * d) / exp) 0.0
+        let chi2 = chi2Raw / q
         TestStatistics.createChiSquare chi2 (float degreesOfFreedom)
 
     static member pearsonChiSquared (table:ContingencyTable<_,_>) =
diff --git a/tests/FSharp.Stats.Tests/Testing.fs b/tests/FSharp.Stats.Tests/Testing.fs
@@ -384,12 +384,49 @@ let chiSquaredTests =
         let df = expected.Length - 1
         ChiSquareTest.compute df expected observed
         
+    // computeWithYates:
+    // R: obs <- c(45, 55); chisq.test(obs, p=c(0.5,0.5), correct=TRUE)
+    // Chi-squared = 0.81, p-value = 0.3681
+    let testCaseYates1 =
+        let expected = [50.0; 50.0]
+        let observed = [45.0; 55.0]
+        ChiSquareTest.computeWithYates 1 expected observed
+
+    // R: obs <- c(10, 20); chisq.test(obs, p=c(0.5,0.5), correct=TRUE)
+    // Chi-squared = 2.7, p-value = 0.1003
+    let testCaseYates2 =
+        let expected = [15.0; 15.0]
+        let observed = [10.0; 20.0]
+        ChiSquareTest.computeWithYates 1 expected observed
+
+    // computeWithWilliams:
+    // Williams q = 1 + (k^2-1)/(6*n*k); k=4, n=556 => q ≈ 1.001124
+    // chi2_williams = 0.4700 / 1.001124 ≈ 0.4695
+    let testCaseWilliams1 =
+        let expected = [312.75;104.25;104.25;34.75]
+        let observed = [315.;101.;108.;32.]
+        let df = expected.Length - 1
+        ChiSquareTest.computeWithWilliams df expected observed
+
+    // k=3, n=45 => q ≈ 1.009877; raw chi2 ≈ 3.3333 => williams ≈ 3.3007
+    let testCaseWilliams2 =
+        let expected = [15.0; 15.0; 15.0]
+        let observed = [10.0; 20.0; 15.0]
+        let df = expected.Length - 1
+        ChiSquareTest.computeWithWilliams df expected observed
+
     testList "Testing.ChiSquaredTest" [
         testCase "compute" <| fun () -> 
             Expect.isTrue (0.9254 = Math.Round(testCase1.PValueRight,4)) "pValue should be equal."
             Expect.isTrue (0.4700 = Math.Round(testCase1.Statistic,4)) "statistic should be equal."
             Expect.isTrue (0.000638 = Math.Round(testCase2.PValueRight,6)) "pValue should be equal."
             Expect.isTrue (19.461 = Math.Round(testCase2.Statistic,3)) "statistic should be equal."
+        testCase "computeWithYates" <| fun () ->
+            Expect.floatClose Accuracy.medium testCaseYates1.Statistic 0.81 "Yates statistic should be 0.81"
+            Expect.floatClose Accuracy.medium testCaseYates2.Statistic 2.7  "Yates statistic should be 2.7"
+        testCase "computeWithWilliams" <| fun () ->
+            Expect.floatClose Accuracy.medium testCaseWilliams1.Statistic 0.469496 "Williams statistic should be ~0.4695"
+            Expect.floatClose Accuracy.medium testCaseWilliams2.Statistic 3.300733 "Williams statistic should be ~3.3007"
             
     ]