Skip to content

Commit 8823ba7

Browse files
committed
updates
1 parent f738fbd commit 8823ba7

2 files changed

Lines changed: 95 additions & 88 deletions

File tree

lectures/_config.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ sphinx:
9494
intro:
9595
- "https://intro.quantecon.org/"
9696
- null
97+
advanced:
98+
- "https://python-advanced.quantecon.org"
99+
- null
97100
mathjax3_config:
98101
tex:
99102
macros:

lectures/likelihood_ratio_process.md

Lines changed: 92 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ from numba import vectorize, jit
5353
from math import gamma
5454
from scipy.integrate import quad
5555
from scipy.optimize import brentq
56+
from scipy.stats import beta as beta_dist
57+
5658
```
5759

5860
## Likelihood Ratio Process
@@ -187,6 +189,7 @@ $\left[0, 0.01\right]$.
187189

188190
```{code-cell} ipython3
189191
plt.plot(range(T), np.sum(l_seq_g <= 0.01, axis=0) / N)
192+
plt.show()
190193
```
191194

192195
Despite the evident convergence of most probability mass to a
@@ -304,6 +307,7 @@ l_seq_f = np.cumprod(l_arr_f, axis=1)
304307
```{code-cell} ipython3
305308
N, T = l_arr_f.shape
306309
plt.plot(range(T), np.mean(l_seq_f, axis=0))
310+
plt.show()
307311
```
308312

309313
We also plot the probability that $L\left(w^t\right)$ falls into
@@ -312,6 +316,7 @@ fast probability mass diverges to $+\infty$.
312316

313317
```{code-cell} ipython3
314318
plt.plot(range(T), np.sum(l_seq_f > 10000, axis=0) / N)
319+
plt.show()
315320
```
316321

317322
## Likelihood Ratio Test
@@ -752,19 +757,17 @@ def protocol_1(π_minus_1, T, N=1000):
752757
Nature decides once at t=-1 which model to use.
753758
"""
754759
760+
# On-off coin flip for the true model
761+
true_models_F = np.random.rand(N) < π_minus_1
762+
755763
sequences = np.empty((N, T))
756-
true_models_F = np.empty(N, dtype=bool)
757764
758-
for i in range(N):
759-
# Nature flips coin
760-
if np.random.rand() < π_minus_1:
761-
# Generate entire sequence from f
762-
sequences[i, :] = np.random.beta(F_a, F_b, T)
763-
true_models_F[i] = True
764-
else:
765-
# Generate entire sequence from g
766-
sequences[i, :] = np.random.beta(G_a, G_b, T)
767-
true_models_F[i] = False
765+
n_f = np.sum(true_models_F)
766+
n_g = N - n_f
767+
if n_f > 0:
768+
sequences[true_models_F, :] = np.random.beta(F_a, F_b, (n_f, T))
769+
if n_g > 0:
770+
sequences[~true_models_F, :] = np.random.beta(G_a, G_b, (n_g, T))
768771
769772
return sequences, true_models_F
770773
```
@@ -778,19 +781,18 @@ def protocol_2(π_minus_1, T, N=1000):
778781
Nature decides at each time step which model to use.
779782
"""
780783
784+
# Coin flips for each time t upto T
785+
true_models_F = np.random.rand(N, T) < π_minus_1
786+
781787
sequences = np.empty((N, T))
782-
true_models_F = np.empty((N, T), dtype=bool)
783788
784-
for i in range(N):
785-
for t in range(T):
786-
# Nature flips coin at each time step t
787-
if np.random.rand() < π_minus_1:
788-
sequences[i, t] = np.random.beta(F_a, F_b)
789-
true_models_F[i, t] = True
790-
else:
791-
sequences[i, t] = np.random.beta(G_a, G_b)
792-
true_models_F[i, t] = False
793-
789+
n_f = np.sum(true_models_F)
790+
n_g = N * T - n_f
791+
if n_f > 0:
792+
sequences[true_models_F] = np.random.beta(F_a, F_b, n_f)
793+
if n_g > 0:
794+
sequences[~true_models_F] = np.random.beta(G_a, G_b, n_g)
795+
794796
return sequences, true_models_F
795797
```
796798

@@ -819,17 +821,8 @@ def compute_likelihood_ratios(sequences):
819821
Compute likelihood ratios for given sequences.
820822
"""
821823
822-
N, T = sequences.shape
823-
l_ratios = np.empty((N, T))
824-
L_cumulative = np.empty((N, T))
825-
826-
for i in range(N):
827-
for t in range(T):
828-
l_ratios[i, t] = f(sequences[i, t]) / g(sequences[i, t])
829-
830-
# Compute cumulative products
831-
L_cumulative[i, :] = np.cumprod(l_ratios[i, :])
832-
824+
l_ratios = f(sequences) / g(sequences)
825+
L_cumulative = np.cumprod(l_ratios, axis=1)
833826
return l_ratios, L_cumulative
834827
```
835828

@@ -868,49 +861,40 @@ Now let's simulate the protocol 1 and compute the error probabilities
868861
# Set parameters
869862
π_minus_1 = 0.5
870863
T_max = 30
871-
N_simulations = 5000
872-
873-
# Protocol 1: Bayesian Model Selection
874-
print("Protocol 1: Bayesian Model Selection")
864+
N_simulations = 10_000
875865
876866
sequences_p1, true_models_p1 = protocol_1(
877867
π_minus_1, T_max, N_simulations)
878868
l_ratios_p1, L_cumulative_p1 = compute_likelihood_ratios(sequences_p1)
879869
880870
# Compute error probabilities for different sample sizes
881-
T_range = range(1, T_max + 1)
882-
α_T = np.empty(T_max) # P(L_T < 1 | f)
883-
β_T = np.empty(T_max) # P(L_T >= 1 | g)
884-
bayesian_error_prob = np.empty(T_max)
871+
T_range = np.arange(1, T_max + 1)
885872
886-
for t in T_range:
887-
t_idx = t - 1
888-
889-
# Type I error: reject H_0 when it's true
890-
# (model f generates data)
891-
f_sequences = L_cumulative_p1[true_models_p1, t_idx]
892-
α_T[t_idx] = np.mean(f_sequences < 1)
893-
894-
# Type II error: accept H_0 when it's false
895-
# (model g generates data)
896-
g_sequences = L_cumulative_p1[~true_models_p1, t_idx]
897-
β_T[t_idx] = np.mean(g_sequences >= 1)
898-
899-
# Bayesian error probability
900-
bayesian_error_prob[t_idx] = 0.5 * (α_T[t_idx] + β_T[t_idx])
873+
# Boolean masks for true models
874+
mask_f = true_models_p1
875+
mask_g = ~true_models_p1
901876
902-
# Plot results for Protocol 1
877+
# Select cumulative likelihoods for each model
878+
L_f = L_cumulative_p1[mask_f, :]
879+
L_g = L_cumulative_p1[mask_g, :]
880+
881+
α_T = np.mean(L_f < 1, axis=0)
882+
β_T = np.mean(L_g >= 1, axis=0)
883+
884+
error_prob = 0.5 * (α_T + β_T)
885+
886+
# Plot results
903887
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
904888
905889
ax1.plot(T_range, α_T, 'b-',
906-
label=r'$\alpha_T$ (Type I error)', linewidth=2)
890+
label=r'$\alpha_T$', linewidth=2)
907891
ax1.plot(T_range, β_T, 'r-',
908-
label=r'$\beta_T$ (Type II error)', linewidth=2)
892+
label=r'$\beta_T$', linewidth=2)
909893
ax1.set_xlabel('$T$')
910894
ax1.set_ylabel('error probability')
911895
ax1.legend()
912896
913-
ax2.plot(T_range, bayesian_error_prob, 'g-',
897+
ax2.plot(T_range, error_prob, 'g-',
914898
label=r'$\frac{1}{2}(\alpha_T+\beta_T)$', linewidth=2)
915899
ax2.set_xlabel('$T$')
916900
ax2.set_ylabel('error probability')
@@ -922,7 +906,7 @@ plt.show()
922906
print(f"At T={T_max}:")
923907
print(f"α_{T_max} = {α_T[-1]:.4f}")
924908
print(f"β_{T_max} = {β_T[-1]:.4f}")
925-
print(f"Bayesian error probability = {bayesian_error_prob[-1]:.4f}")
909+
print(f"Bayesian error probability = {error_prob[-1]:.4f}")
926910
```
927911
928912
## Classification
@@ -943,9 +927,11 @@ $$
943927
Under this rule, the expected misclassification rate is
944928
945929
$$
946-
p(\textrm{misclassification}) = {1 \over 2} (\alpha_1 + \beta_1)
930+
p(\textrm{misclassification}) = {1 \over 2} (\tilde \alpha_t + \tilde \beta_t)
947931
$$ (eq:classerrorprob)
948932
933+
where $\tilde \alpha_t = {\rm Prob}(l_t < 1 \mid f)$ and $\tilde \beta_t = {\rm Prob}(l_t \geq 1 \mid g)$.
934+
949935
Now let's simulate protocol 2 and compute the error probabilities
950936
951937
```{code-cell} ipython3
@@ -956,28 +942,31 @@ l_ratios_p2, _ = compute_likelihood_ratios(sequences_p2)
956942
# Find decision boundary where f(w) = g(w)
957943
root = brentq(lambda w: f(w) / g(w) - 1, 0.001, 0.999)
958944
959-
print(f"Decision boundary points: {root}")
960-
961-
# Compute theoretical α_1 and β_1 using integration
945+
# Compute theoretical tilde α_t and tilde β_t
962946
def α_integrand(w):
963-
"""Integrand for α_1 = P(l_1 < 1 | f)"""
947+
"""Integrand for tilde α_t = P(l_t < 1 | f)"""
964948
return f(w) if f(w) / g(w) < 1 else 0
965949
966950
def β_integrand(w):
967-
"""Integrand for β_1 = P(l_1 >= 1 | g)"""
951+
"""Integrand for tilde β_t = P(l_t >= 1 | g)"""
968952
return g(w) if f(w) / g(w) >= 1 else 0
969953
970954
# Compute the integrals
971-
α_1_theory, _ = quad(α_integrand, 0, 1, limit=100)
972-
β_1_theory, _ = quad(β_integrand, 0, 1, limit=100)
955+
α_theory, _ = quad(α_integrand, 0, 1, limit=100)
956+
β_theory, _ = quad(β_integrand, 0, 1, limit=100)
973957
974-
theory_error = 0.5 * (α_1_theory + β_1_theory)
958+
theory_error = 0.5 * (α_theory + β_theory)
975959
976-
print(f"theoretical α_1 = {α_1_theory:.4f}")
977-
print(f"theoretical β_1 = {β_1_theory:.4f}")
960+
print(f"theoretical tilde α_t = {α_theory:.4f}")
961+
print(f"theoretical tilde β_t = {β_theory:.4f}")
978962
print(f"theoretical classification error probability = {theory_error:.4f}")
963+
```
964+
965+
Since for each $t$, the decision boundary is the same, we can plot the distributions of $f$ and $g$ and the decision boundary
966+
967+
```{code-cell} ipython3
968+
:tags: [hide-input]
979969
980-
# Visualization of distributions and decision boundary
981970
fig, ax = plt.subplots(figsize=(7, 6))
982971
983972
w_range = np.linspace(0.001, 0.999, 1000)
@@ -990,6 +979,19 @@ ax.plot(w_range, f_values, 'b-',
990979
ax.plot(w_range, g_values, 'r-',
991980
label=r'$g(w) \sim Beta(3,1.2)$', linewidth=2)
992981
982+
type1_prob = 1 - beta_dist.cdf(root, F_a, F_b)
983+
type2_prob = beta_dist.cdf(root, G_a, G_b)
984+
985+
w_type1 = w_range[w_range >= root]
986+
f_type1 = [f(w) for w in w_type1]
987+
ax.fill_between(w_type1, 0, f_type1, alpha=0.3, color='blue',
988+
label=fr'$\tilde \alpha_t = {type1_prob:.2f}$')
989+
990+
w_type2 = w_range[w_range <= root]
991+
g_type2 = [g(w) for w in w_type2]
992+
ax.fill_between(w_type2, 0, g_type2, alpha=0.3, color='red',
993+
label=fr'$\tilde \beta_t = {type2_prob:.2f}$')
994+
993995
ax.axvline(root, color='green', linestyle='--', alpha=0.7,
994996
label=f'decision boundary: $w=${root:.3f}')
995997
@@ -1001,56 +1003,58 @@ plt.tight_layout()
10011003
plt.show()
10021004
```
10031005
1006+
On the left of the decision boundary, $f$ is more likely than $g$ with $l_t < 1$.
1007+
1008+
On the right of the decision boundary, $g$ is more likely than $f$ with $l_t \geq 1$.
1009+
10041010
Let's see how it performs in the simulated data
10051011
10061012
```{code-cell} ipython3
1007-
correct_classifications = np.empty(T_max)
1013+
accuracy = np.empty(T_max)
10081014
10091015
for t in range(T_max):
1010-
predictions = (l_ratios_p2[:, t] > 1).astype(int)
1016+
predictions = (l_ratios_p2[:, t] >= 1)
10111017
actual = true_sources_p2[:, t]
1012-
correct_classifications[t] = np.mean(predictions == actual)
1018+
accuracy[t] = np.mean(predictions == actual)
10131019
10141020
plt.figure(figsize=(10, 6))
1015-
plt.plot(range(1, T_max + 1), correct_classifications,
1021+
plt.plot(range(1, T_max + 1), accuracy,
10161022
'b-', linewidth=2, label='empirical accuracy')
10171023
plt.axhline(1 - theory_error, color='r', linestyle='--',
10181024
label=f'theoretical accuracy = {1 - theory_error:.4f}')
1019-
plt.xlabel('time step')
1025+
plt.xlabel('$t$')
10201026
plt.ylabel('accuracy')
10211027
plt.legend()
10221028
plt.ylim(0.5, 1.0)
10231029
plt.show()
10241030
```
10251031
1026-
Let's also compare the two protocols by showing how the error probabilities evolve differently.
1032+
Let's also compare the two protocols by showing how the error probabilities evolve differently
10271033
10281034
```{code-cell} ipython3
1029-
# Comparison of error probabilities between protocols
10301035
fig, ax = plt.subplots(figsize=(7, 6))
10311036
1032-
ax.plot(T_range, bayesian_error_prob, 'b-', linewidth=2,
1033-
label='Protocol 1 (Model Selection)')
1034-
ax.axhline(theory_error, color='r', linestyle='--', linewidth=2,
1035-
label=f'Protocol 2 (Classification) = {theory_error:.4f}')
1036-
ax.set_xlabel('T')
1037+
ax.plot(T_range, error_prob, linewidth=2,
1038+
label='Protocol 1')
1039+
ax.plot(T_range, 1-accuracy, linestyle='--', linewidth=2,
1040+
label=f'Protocol 2')
10371041
ax.set_ylabel('error probability')
10381042
ax.legend()
10391043
plt.show()
10401044
```
10411045
10421046
From the figure above, we can see:
10431047
1048+
- For both protocols, the error probability starts at the same level subject to randomness.
1049+
10441050
- For protocol 1, the error probability decreases as we collect more data because we're trying to determine which single model generated the entire sequence. More data provides stronger evidence.
10451051
10461052
- For protocol 2, the error probability remains constant because each observation is classified independently. The accuracy depends only on the likelihood that the two models generates the single observation.
10471053
1048-
- Under Protocol 1, the Bayesian error probability converges to zero as $T \to \infty$, while under Protocol 2, it remains constant at ${1 \over 2}(\alpha_1 + \beta_1)$.
1049-
10501054
## Sequels
10511055
10521056
Likelihood processes play an important role in Bayesian learning, as described in {doc}`likelihood_bayes`
10531057
and as applied in {doc}`odu`.
10541058
1055-
Likelihood ratio processes appear again in {doc}`additive_functionals`, which contains another illustration
1059+
Likelihood ratio processes appear again in {doc}`advanced:additive_functionals`, which contains another illustration
10561060
of the **peculiar property** of likelihood ratio processes described above.

0 commit comments

Comments
 (0)