Skip to content

Commit 064ebc6

Browse files
authored
Merge pull request #72 from snowch/claude/binomial-distribution-calculator-QJG1z
Fix CDF plots to show proper step functions (where='post')
2 parents 9de0c8f + 619c897 commit 064ebc6

1 file changed

Lines changed: 17 additions & 17 deletions

File tree

chapter_07.md

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ The PMF shows two bars: P(X=0) = 0.7 for a negative test and P(X=1) = 0.3 for a
157157
cdf_values_viz = bernoulli_viz.cdf(k_values_viz)
158158
159159
plt.figure(figsize=(8, 4))
160-
plt.step(k_values_viz, cdf_values_viz, where='mid', color='darkgreen', linewidth=2)
160+
plt.step(k_values_viz, cdf_values_viz, where='post', color='darkgreen', linewidth=2)
161161
plt.title(f"Bernoulli CDF (p={p_viz})")
162162
plt.xlabel("Outcome")
163163
plt.ylabel("Cumulative Probability P(X <= k)")
@@ -192,7 +192,7 @@ Now that we've seen both types of visualizations, let's understand how to read a
192192
- Find P(a < X ≤ b) by calculating P(X ≤ b) - P(X ≤ a)
193193
- **Key property:** The CDF always increases (or stays flat) and approaches 1.0
194194

195-
**Note on CDF visualization:** The charts use `where='mid'` in the step plot for visual clarity, which centers the step between points. In mathematical terms, discrete CDFs are right-continuous functions (they jump up at each value and include that value in the cumulative probability).
195+
**Note on CDF visualization:** The charts use `where='post'` in the step plot to create proper right-continuous step functions. Discrete CDFs are right-continuous (they jump up at each value and include that value in the cumulative probability), which `where='post'` correctly represents.
196196

197197
:::{admonition} Example: Medical Diagnostic Test with p = 0.1
198198
:class: tip
@@ -256,7 +256,7 @@ The PMF shows the probability of each outcome. With p = 0.1, "Negative" has prob
256256
cdf_values = bernoulli_rv.cdf(k_values)
257257
258258
plt.figure(figsize=(8, 4))
259-
plt.step(k_values, cdf_values, where='mid', color='darkgreen', linewidth=2)
259+
plt.step(k_values, cdf_values, where='post', color='darkgreen', linewidth=2)
260260
plt.title(f"Bernoulli CDF (p={p_positive})")
261261
plt.xlabel("Outcome")
262262
plt.ylabel("Cumulative Probability P(X <= k)")
@@ -434,7 +434,7 @@ The PMF shows the probability distribution for the number of heads in 10 coin fl
434434
cdf_values_viz = binomial_viz.cdf(k_values_viz)
435435
436436
plt.figure(figsize=(8, 4))
437-
plt.step(k_values_viz, cdf_values_viz, where='mid', color='darkgreen', linewidth=2)
437+
plt.step(k_values_viz, cdf_values_viz, where='post', color='darkgreen', linewidth=2)
438438
plt.title(f"Binomial CDF (n={n_viz}, p={p_viz})")
439439
plt.xlabel("Number of Successes (k)")
440440
plt.ylabel("Cumulative Probability P(X <= k)")
@@ -519,7 +519,7 @@ The PMF shows the probability distribution for the number of successful calls. W
519519
cdf_values = binomial_rv.cdf(k_values)
520520
521521
plt.figure(figsize=(8, 4))
522-
plt.step(k_values, cdf_values, where='mid', color='darkgreen', linewidth=2)
522+
plt.step(k_values, cdf_values, where='post', color='darkgreen', linewidth=2)
523523
plt.title(f"Binomial CDF (n={n_calls}, p={p_success_call})")
524524
plt.xlabel("Number of Successes (k)")
525525
plt.ylabel("Cumulative Probability P(X <= k)")
@@ -688,7 +688,7 @@ The PMF shows exponentially decreasing probabilities - you're most likely to suc
688688
cdf_values_viz = geom_viz.cdf(k_values_viz - 1)
689689
690690
plt.figure(figsize=(8, 4))
691-
plt.step(k_values_viz, cdf_values_viz, where='mid', color='darkgreen', linewidth=2)
691+
plt.step(k_values_viz, cdf_values_viz, where='post', color='darkgreen', linewidth=2)
692692
plt.title(f"Geometric CDF (p={p_viz})")
693693
plt.xlabel("Trial Number (k)")
694694
plt.ylabel("Cumulative Probability P(X <= k)")
@@ -786,7 +786,7 @@ The PMF shows exponentially decreasing probabilities for the exam example with p
786786
cdf_values = geom_rv.cdf(k_values_trials - 1) # Adjust k for scipy
787787
788788
plt.figure(figsize=(8, 4))
789-
plt.step(k_values_trials, cdf_values, where='mid', color='darkgreen', linewidth=2)
789+
plt.step(k_values_trials, cdf_values, where='post', color='darkgreen', linewidth=2)
790790
plt.title(f"Geometric CDF (p={p_pass})")
791791
plt.xlabel("Trial Number (k)")
792792
plt.ylabel("Cumulative Probability P(X <= k)")
@@ -955,7 +955,7 @@ The PMF shows the distribution is centered around the expected value r/p = 3/0.2
955955
cdf_values_viz = nbinom_viz.cdf(k_values_viz - r_viz)
956956
957957
plt.figure(figsize=(8, 4))
958-
plt.step(k_values_viz, cdf_values_viz, where='mid', color='darkgreen', linewidth=2)
958+
plt.step(k_values_viz, cdf_values_viz, where='post', color='darkgreen', linewidth=2)
959959
plt.title(f"Negative Binomial CDF (r={r_viz}, p={p_viz})")
960960
plt.xlabel("Total Number of Trials (k)")
961961
plt.ylabel("Cumulative Probability P(X <= k)")
@@ -1059,7 +1059,7 @@ The PMF shows the distribution centered around r/p = 60 components with consider
10591059
cdf_values_nb = nbinom_rv.cdf(k_values_components - r_defective) # Adjust k for scipy
10601060
10611061
plt.figure(figsize=(8, 4))
1062-
plt.step(k_values_components, cdf_values_nb, where='mid', color='darkgreen', linewidth=2)
1062+
plt.step(k_values_components, cdf_values_nb, where='post', color='darkgreen', linewidth=2)
10631063
plt.title(f"Negative Binomial CDF (r={r_defective}, p={p_defective})")
10641064
plt.xlabel("Total Number of Components Tested (k)")
10651065
plt.ylabel("Cumulative Probability P(X <= k)")
@@ -1225,7 +1225,7 @@ The PMF shows the distribution centered around λ = 4 with reasonable probabilit
12251225
cdf_values_viz = poisson_viz.cdf(k_values_viz)
12261226
12271227
plt.figure(figsize=(8, 4))
1228-
plt.step(k_values_viz, cdf_values_viz, where='mid', color='darkgreen', linewidth=2)
1228+
plt.step(k_values_viz, cdf_values_viz, where='post', color='darkgreen', linewidth=2)
12291229
plt.title(f"Poisson CDF (λ={lambda_viz})")
12301230
plt.xlabel("Number of Events (k)")
12311231
plt.ylabel("Cumulative Probability P(X <= k)")
@@ -1310,7 +1310,7 @@ The PMF shows the distribution centered around λ = 5 events.
13101310
cdf_values = poisson_rv.cdf(k_values)
13111311
13121312
plt.figure(figsize=(8, 4))
1313-
plt.step(k_values, cdf_values, where='mid', color='darkgreen', linewidth=2)
1313+
plt.step(k_values, cdf_values, where='post', color='darkgreen', linewidth=2)
13141314
plt.title(f"Poisson CDF (λ={lambda_rate})")
13151315
plt.xlabel("Number of Events (k)")
13161316
plt.ylabel("Cumulative Probability P(X <= k)")
@@ -1480,7 +1480,7 @@ The PMF shows most likely to get 0 Aces (about 0.66 probability), less likely to
14801480
cdf_values_viz = hypergeom_viz.cdf(k_values_viz)
14811481
14821482
plt.figure(figsize=(8, 4))
1483-
plt.step(k_values_viz, cdf_values_viz, where='mid', color='darkgreen', linewidth=2)
1483+
plt.step(k_values_viz, cdf_values_viz, where='post', color='darkgreen', linewidth=2)
14841484
plt.title(f"Hypergeometric CDF (N={N_viz}, K={K_viz}, n={n_viz})")
14851485
plt.xlabel("Number of Successes in Sample (k)")
14861486
plt.ylabel("Cumulative Probability P(X <= k)")
@@ -1572,7 +1572,7 @@ The PMF shows the probability distribution for the number of winning tickets in
15721572
cdf_values = hypergeom_rv.cdf(k_values)
15731573
15741574
plt.figure(figsize=(8, 4))
1575-
plt.step(k_values, cdf_values, where='mid', color='darkgreen', linewidth=2)
1575+
plt.step(k_values, cdf_values, where='post', color='darkgreen', linewidth=2)
15761576
plt.title(f"Hypergeometric CDF (N={N_population}, K={K_successes_pop}, n={n_sample})")
15771577
plt.xlabel("Number of Successes in Sample (k)")
15781578
plt.ylabel("Cumulative Probability P(X <= k)")
@@ -1741,7 +1741,7 @@ The PMF shows six equal bars, each with probability 1/6, representing the fair d
17411741
cdf_values_viz = uniform_viz.cdf(k_values_viz)
17421742
17431743
plt.figure(figsize=(8, 4))
1744-
plt.step(k_values_viz, cdf_values_viz, where='mid', color='darkgreen', linewidth=2)
1744+
plt.step(k_values_viz, cdf_values_viz, where='post', color='darkgreen', linewidth=2)
17451745
plt.title(f"Discrete Uniform CDF (a={a_viz}, b={b_viz})")
17461746
plt.xlabel("Outcome")
17471747
plt.ylabel("Cumulative Probability P(X <= k)")
@@ -1829,7 +1829,7 @@ All 20 values have equal probability of 0.05 (1/20).
18291829
cdf_values = uniform_rv.cdf(k_values)
18301830
18311831
plt.figure(figsize=(10, 4))
1832-
plt.step(k_values, cdf_values, where='mid', color='darkgreen', linewidth=2)
1832+
plt.step(k_values, cdf_values, where='post', color='darkgreen', linewidth=2)
18331833
plt.title(f"Discrete Uniform CDF (a={a_sel}, b={b_sel})")
18341834
plt.xlabel("Value")
18351835
plt.ylabel("Cumulative Probability P(X <= k)")
@@ -1996,7 +1996,7 @@ The PMF shows the different probabilities for each face of the loaded die.
19961996
cdf_values_viz = categorical_viz.cdf(values_viz)
19971997
19981998
plt.figure(figsize=(8, 4))
1999-
plt.step(values_viz, cdf_values_viz, where='mid', color='darkgreen', linewidth=2)
1999+
plt.step(values_viz, cdf_values_viz, where='post', color='darkgreen', linewidth=2)
20002000
plt.title("Categorical CDF (Loaded Die)")
20012001
plt.xlabel("Outcome")
20022002
plt.ylabel("Cumulative Probability P(X <= k)")
@@ -2082,7 +2082,7 @@ Coffee is the most popular choice, followed by tea, juice, and water.
20822082
cdf_vals = categorical_rv.cdf(choices)
20832083
20842084
plt.figure(figsize=(8, 4))
2085-
plt.step(choices, cdf_vals, where='mid', color='darkgreen', linewidth=2)
2085+
plt.step(choices, cdf_vals, where='post', color='darkgreen', linewidth=2)
20862086
plt.xticks(choices, labels)
20872087
plt.title("Categorical CDF (Customer Drink Choice)")
20882088
plt.xlabel("Choice")

0 commit comments

Comments
 (0)