Skip to content

Commit 0586823

Browse files
committed
adding more examples and recipes
1 parent 7625a4a commit 0586823

4 files changed

Lines changed: 339 additions & 0 deletions

File tree

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
<?php
2+
3+
require __DIR__ . '/../vendor/autoload.php';
4+
5+
use HiFolks\Statistics\NormalDist;
6+
7+
/**
8+
* Recipe: Approximating Binomial Distributions
9+
*
10+
* Adapted from the Python statistics module "Examples and Recipes":
11+
* https://docs.python.org/3/library/statistics.html#examples-and-recipes
12+
*
13+
* NormalDist can be used to approximate binomial distributions
14+
* when the sample size is large (via the Central Limit Theorem).
15+
*
16+
* Scenario: a]conference has 750 attendees. 65% prefer Python
17+
* and 35% prefer Ruby. The "Python" room holds 500 people.
18+
* What is the probability that the room will stay within capacity?
19+
*/
20+
21+
echo "=== Approximating Binomial Distributions ===" . PHP_EOL . PHP_EOL;
22+
23+
$n = 750; // Sample size (attendees)
24+
$p = 0.65; // Probability of preferring Python
25+
$q = 1.0 - $p; // Probability of preferring Ruby
26+
$k = 500; // Room capacity
27+
28+
// For a binomial distribution B(n, p):
29+
// mean = n * p
30+
// sigma = sqrt(n * p * q)
31+
$mu = $n * $p;
32+
$sigma = sqrt($n * $p * $q);
33+
34+
echo "Binomial parameters:" . PHP_EOL;
35+
echo " n = " . $n . " (attendees)" . PHP_EOL;
36+
echo " p = " . $p . " (Python preference)" . PHP_EOL;
37+
echo " Expected Python fans: " . $mu . PHP_EOL;
38+
echo " Standard deviation: " . round($sigma, 2) . PHP_EOL;
39+
echo PHP_EOL;
40+
41+
// Normal approximation with continuity correction
42+
$normal = new NormalDist($mu, $sigma);
43+
$probNormal = $normal->cdf($k + 0.5);
44+
echo "Normal approximation: P(X <= " . $k . ") = "
45+
. round($probNormal, 4) . PHP_EOL;
46+
47+
// Exact binomial calculation using log-space arithmetic.
48+
// P(X <= k) = sum from r=0 to k of C(n,r) * p^r * q^(n-r)
49+
// We use Stirling's log-gamma via log() of factorials to avoid overflow.
50+
51+
// Build log-factorial lookup table
52+
$logFact = [0.0]; // log(0!) = 0
53+
for ($i = 1; $i <= $n; $i++) {
54+
$logFact[$i] = $logFact[$i - 1] + log($i);
55+
}
56+
57+
$logTerms = [];
58+
for ($r = 0; $r <= $k; $r++) {
59+
// log(C(n,r)) = log(n!) - log(r!) - log((n-r)!)
60+
$logBinom = $logFact[$n] - $logFact[$r] - $logFact[$n - $r];
61+
$logTerms[] = $logBinom + $r * log($p) + ($n - $r) * log($q);
62+
}
63+
// Log-sum-exp for numerical stability
64+
$maxLog = max($logTerms);
65+
$sum = 0.0;
66+
foreach ($logTerms as $logTerm) {
67+
$sum += exp($logTerm - $maxLog);
68+
}
69+
$probExact = exp($maxLog + log($sum));
70+
71+
echo "Exact binomial: P(X <= " . $k . ") = "
72+
. round($probExact, 4) . PHP_EOL;
73+
74+
// Monte Carlo simulation approximation
75+
$seed = 8675309;
76+
mt_srand($seed);
77+
$trials = 10_000;
78+
$successes = 0;
79+
for ($i = 0; $i < $trials; $i++) {
80+
$count = 0;
81+
for ($j = 0; $j < $n; $j++) {
82+
if (mt_rand() / mt_getrandmax() < $p) {
83+
$count++;
84+
}
85+
}
86+
if ($count <= $k) {
87+
$successes++;
88+
}
89+
}
90+
$probSimulation = $successes / $trials;
91+
echo "Simulation (" . $trials . " trials): P(X <= " . $k . ") = "
92+
. round($probSimulation, 4) . PHP_EOL;
93+
94+
echo PHP_EOL . "All three methods should give approximately the same result (~0.84)."
95+
. PHP_EOL;
96+
97+
// --- Additional: What capacity is needed for 99% confidence? ---
98+
echo PHP_EOL . "--- Capacity Planning ---" . PHP_EOL;
99+
$needed = $normal->invCdfRounded(0.99, 0);
100+
echo "For 99% confidence, room capacity should be: "
101+
. $needed . " seats" . PHP_EOL;
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
<?php
2+
3+
require __DIR__ . '/../vendor/autoload.php';
4+
5+
use HiFolks\Statistics\NormalDist;
6+
7+
/**
8+
* Recipe: Classic Probability Problems
9+
*
10+
* Adapted from the Python statistics module "Examples and Recipes":
11+
* https://docs.python.org/3/library/statistics.html#examples-and-recipes
12+
*
13+
* Using NormalDist to solve classic probability problems.
14+
*/
15+
16+
echo "=== Classic Probability Problems ===" . PHP_EOL . PHP_EOL;
17+
18+
// --- SAT scores are normally distributed with mean 1060 and std dev 195 ---
19+
$sat = new NormalDist(1060, 195);
20+
21+
// What percentage of students score between 1100 and 1200?
22+
// Adding 0.5 applies a continuity correction for discrete scores.
23+
$fraction = $sat->cdf(1200 + 0.5) - $sat->cdf(1100 - 0.5);
24+
echo "Percentage of students scoring between 1100 and 1200: "
25+
. round($fraction * 100, 1) . "%" . PHP_EOL;
26+
27+
// Quartiles: divide SAT scores into 4 equal-probability groups
28+
echo PHP_EOL . "--- SAT Score Quartiles ---" . PHP_EOL;
29+
$quartiles = $sat->quantiles(4);
30+
echo "Quartiles (Q1, Q2, Q3): "
31+
. implode(', ', array_map(round(...), $quartiles))
32+
. PHP_EOL;
33+
34+
// Deciles: divide SAT scores into 10 equal-probability groups
35+
echo PHP_EOL . "--- SAT Score Deciles ---" . PHP_EOL;
36+
$deciles = $sat->quantiles(10);
37+
echo "Deciles: "
38+
. implode(', ', array_map(round(...), $deciles))
39+
. PHP_EOL;
40+
41+
// --- What SAT score is needed to be in the top 10%? ---
42+
echo PHP_EOL . "--- SAT Score Thresholds ---" . PHP_EOL;
43+
$top10 = $sat->invCdfRounded(0.90, 0);
44+
echo "SAT score needed for top 10%: " . $top10 . PHP_EOL;
45+
46+
$top1 = $sat->invCdfRounded(0.99, 0);
47+
echo "SAT score needed for top 1%: " . $top1 . PHP_EOL;
48+
49+
// --- Probability of scoring above a threshold ---
50+
$threshold = 1300;
51+
$probAbove = 1 - $sat->cdf($threshold);
52+
echo PHP_EOL . "Probability of scoring above " . $threshold . ": "
53+
. round($probAbove * 100, 1) . "%" . PHP_EOL;
54+
55+
// --- Z-score for a specific SAT score ---
56+
$score = 1250;
57+
$z = $sat->zscoreRounded($score, 2);
58+
echo "Z-score for SAT score of " . $score . ": " . $z . PHP_EOL;

examples/recipes_monte_carlo.php

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
<?php
2+
3+
require __DIR__ . '/../vendor/autoload.php';
4+
5+
use HiFolks\Statistics\NormalDist;
6+
use HiFolks\Statistics\Stat;
7+
8+
/**
9+
* Recipe: Monte Carlo Inputs for Simulations
10+
*
11+
* Adapted from the Python statistics module "Examples and Recipes":
12+
* https://docs.python.org/3/library/statistics.html#examples-and-recipes
13+
*
14+
* NormalDist can generate random samples to use as inputs for
15+
* Monte Carlo simulations.
16+
*/
17+
18+
echo "=== Monte Carlo Simulation ===" . PHP_EOL . PHP_EOL;
19+
20+
/**
21+
* A simple model function that combines three uncertain variables.
22+
*/
23+
function model(float $x, float $y, float $z): float
24+
{
25+
return (3 * $x + 7 * $x * $y - 5 * $y) / (11 * $z);
26+
}
27+
28+
$n = 100_000;
29+
30+
// Generate random samples from three independent normal distributions
31+
$X = (new NormalDist(10, 2.5))->samples($n, seed: 3652260728);
32+
$Y = (new NormalDist(15, 1.75))->samples($n, seed: 4582495471);
33+
$Z = (new NormalDist(50, 1.25))->samples($n, seed: 6582483453);
34+
35+
// Compute the model output for each set of inputs
36+
$results = [];
37+
for ($i = 0; $i < $n; $i++) {
38+
$results[] = model($X[$i], $Y[$i], $Z[$i]);
39+
}
40+
41+
// Find the quartiles of the model output distribution
42+
$quantiles = Stat::quantiles($results);
43+
echo "Model output quartiles (Q1, Q2, Q3):" . PHP_EOL;
44+
echo " Q1: " . round($quantiles[0], 4) . PHP_EOL;
45+
echo " Q2: " . round($quantiles[1], 4) . PHP_EOL;
46+
echo " Q3: " . round($quantiles[2], 4) . PHP_EOL;
47+
48+
// Basic descriptive statistics of the simulation
49+
echo PHP_EOL . "--- Simulation Summary ---" . PHP_EOL;
50+
echo "Mean: " . round(Stat::mean($results), 4) . PHP_EOL;
51+
echo "Stdev: " . round(Stat::stdev($results), 4) . PHP_EOL;
52+
echo "Min: " . round(min($results), 4) . PHP_EOL;
53+
echo "Max: " . round(max($results), 4) . PHP_EOL;
54+
55+
// Fit a normal distribution to the simulation results
56+
$fitted = NormalDist::fromSamples($results);
57+
echo PHP_EOL . "--- Fitted Normal Distribution ---" . PHP_EOL;
58+
echo "Estimated mu: " . $fitted->getMeanRounded(4) . PHP_EOL;
59+
echo "Estimated sigma: " . $fitted->getSigmaRounded(4) . PHP_EOL;
60+
61+
// Use the fitted distribution to answer probability questions
62+
$threshold = 2.0;
63+
$probAbove = 1 - $fitted->cdf($threshold);
64+
echo PHP_EOL . "P(result > " . $threshold . "): "
65+
. round($probAbove * 100, 1) . "%" . PHP_EOL;

examples/recipes_naive_bayes.php

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
<?php
2+
3+
require __DIR__ . '/../vendor/autoload.php';
4+
5+
use HiFolks\Statistics\NormalDist;
6+
7+
/**
8+
* Recipe: Naive Bayesian Classifier
9+
*
10+
* Adapted from the Python statistics module "Examples and Recipes":
11+
* https://docs.python.org/3/library/statistics.html#examples-and-recipes
12+
*
13+
* A simple Naive Bayes classifier using NormalDist.
14+
* Given training data for height, weight, and foot size of males
15+
* and females, classify a new person based on their measurements.
16+
*/
17+
18+
echo "=== Naive Bayesian Classifier ===" . PHP_EOL . PHP_EOL;
19+
20+
// --- Training data ---
21+
// Fit normal distributions to each feature for each class
22+
23+
echo "--- Training Phase ---" . PHP_EOL;
24+
25+
$heightMale = NormalDist::fromSamples([6, 5.92, 5.58, 5.92]);
26+
$heightFemale = NormalDist::fromSamples([5, 5.5, 5.42, 5.75]);
27+
28+
$weightMale = NormalDist::fromSamples([180, 190, 170, 165]);
29+
$weightFemale = NormalDist::fromSamples([100, 150, 130, 150]);
30+
31+
$footSizeMale = NormalDist::fromSamples([12, 11, 12, 10]);
32+
$footSizeFemale = NormalDist::fromSamples([6, 8, 7, 9]);
33+
34+
echo "Height (male): mu=" . $heightMale->getMeanRounded(2)
35+
. ", sigma=" . $heightMale->getSigmaRounded(2) . PHP_EOL;
36+
echo "Height (female): mu=" . $heightFemale->getMeanRounded(2)
37+
. ", sigma=" . $heightFemale->getSigmaRounded(2) . PHP_EOL;
38+
echo "Weight (male): mu=" . $weightMale->getMeanRounded(2)
39+
. ", sigma=" . $weightMale->getSigmaRounded(2) . PHP_EOL;
40+
echo "Weight (female): mu=" . $weightFemale->getMeanRounded(2)
41+
. ", sigma=" . $weightFemale->getSigmaRounded(2) . PHP_EOL;
42+
echo "Foot size (male): mu=" . $footSizeMale->getMeanRounded(2)
43+
. ", sigma=" . $footSizeMale->getSigmaRounded(2) . PHP_EOL;
44+
echo "Foot size (female): mu=" . $footSizeFemale->getMeanRounded(2)
45+
. ", sigma=" . $footSizeFemale->getSigmaRounded(2) . PHP_EOL;
46+
47+
// --- Classification ---
48+
echo PHP_EOL . "--- Classification Phase ---" . PHP_EOL . PHP_EOL;
49+
50+
// Person to classify
51+
$ht = 6.0; // height in feet
52+
$wt = 130; // weight in pounds
53+
$fs = 8; // foot size
54+
55+
echo "New person: height=" . $ht . "ft, weight=" . $wt
56+
. "lbs, foot size=" . $fs . PHP_EOL . PHP_EOL;
57+
58+
// Equal prior probabilities
59+
$priorMale = 0.5;
60+
$priorFemale = 0.5;
61+
62+
// Posterior ∝ prior × P(height|class) × P(weight|class) × P(foot_size|class)
63+
// Naive Bayes assumes features are conditionally independent.
64+
$posteriorMale = $priorMale
65+
* $heightMale->pdf($ht)
66+
* $weightMale->pdf($wt)
67+
* $footSizeMale->pdf($fs);
68+
69+
$posteriorFemale = $priorFemale
70+
* $heightFemale->pdf($ht)
71+
* $weightFemale->pdf($wt)
72+
* $footSizeFemale->pdf($fs);
73+
74+
echo "Posterior (male): " . sprintf("%.4e", $posteriorMale) . PHP_EOL;
75+
echo "Posterior (female): " . sprintf("%.4e", $posteriorFemale) . PHP_EOL;
76+
echo PHP_EOL;
77+
78+
$classification = $posteriorMale > $posteriorFemale ? 'male' : 'female';
79+
echo "Classification: " . $classification . PHP_EOL;
80+
81+
// Show confidence as normalized probability
82+
$total = $posteriorMale + $posteriorFemale;
83+
$confidenceMale = $posteriorMale / $total;
84+
$confidenceFemale = $posteriorFemale / $total;
85+
echo "Confidence (male): " . round($confidenceMale * 100, 1) . "%" . PHP_EOL;
86+
echo "Confidence (female): " . round($confidenceFemale * 100, 1) . "%" . PHP_EOL;
87+
88+
// --- Classify a second person ---
89+
echo PHP_EOL . "--- Classify another person ---" . PHP_EOL . PHP_EOL;
90+
91+
$ht2 = 5.5;
92+
$wt2 = 175;
93+
$fs2 = 11;
94+
95+
echo "New person: height=" . $ht2 . "ft, weight=" . $wt2
96+
. "lbs, foot size=" . $fs2 . PHP_EOL . PHP_EOL;
97+
98+
$posteriorMale2 = $priorMale
99+
* $heightMale->pdf($ht2)
100+
* $weightMale->pdf($wt2)
101+
* $footSizeMale->pdf($fs2);
102+
103+
$posteriorFemale2 = $priorFemale
104+
* $heightFemale->pdf($ht2)
105+
* $weightFemale->pdf($wt2)
106+
* $footSizeFemale->pdf($fs2);
107+
108+
$classification2 = $posteriorMale2 > $posteriorFemale2 ? 'male' : 'female';
109+
$total2 = $posteriorMale2 + $posteriorFemale2;
110+
111+
echo "Posterior (male): " . sprintf("%.4e", $posteriorMale2) . PHP_EOL;
112+
echo "Posterior (female): " . sprintf("%.4e", $posteriorFemale2) . PHP_EOL;
113+
echo "Classification: " . $classification2 . PHP_EOL;
114+
echo "Confidence: " . round(max($posteriorMale2, $posteriorFemale2) / $total2 * 100, 1)
115+
. "%" . PHP_EOL;

0 commit comments

Comments
 (0)