@@ -53,6 +53,8 @@ from numba import vectorize, jit
5353from math import gamma
5454from scipy.integrate import quad
5555from scipy.optimize import brentq
56+ from scipy.stats import beta as beta_dist
57+
5658```
5759
5860## Likelihood Ratio Process
@@ -187,6 +189,7 @@ $\left[0, 0.01\right]$.
187189
188190``` {code-cell} ipython3
189191plt.plot(range(T), np.sum(l_seq_g <= 0.01, axis=0) / N)
192+ plt.show()
190193```
191194
192195Despite the evident convergence of most probability mass to a
@@ -304,6 +307,7 @@ l_seq_f = np.cumprod(l_arr_f, axis=1)
304307``` {code-cell} ipython3
305308N, T = l_arr_f.shape
306309plt.plot(range(T), np.mean(l_seq_f, axis=0))
310+ plt.show()
307311```
308312
309313We also plot the probability that $L\left(w^t\right)$ falls into
@@ -312,6 +316,7 @@ fast probability mass diverges to $+\infty$.
312316
313317``` {code-cell} ipython3
314318plt.plot(range(T), np.sum(l_seq_f > 10000, axis=0) / N)
319+ plt.show()
315320```
316321
317322## Likelihood Ratio Test
@@ -752,19 +757,17 @@ def protocol_1(π_minus_1, T, N=1000):
752757 Nature decides once at t=-1 which model to use.
753758 """
754759
760+ # On-off coin flip for the true model
761+ true_models_F = np.random.rand(N) < π_minus_1
762+
755763 sequences = np.empty((N, T))
756- true_models_F = np.empty(N, dtype=bool)
757764
758- for i in range(N):
759- # Nature flips coin
760- if np.random.rand() < π_minus_1:
761- # Generate entire sequence from f
762- sequences[i, :] = np.random.beta(F_a, F_b, T)
763- true_models_F[i] = True
764- else:
765- # Generate entire sequence from g
766- sequences[i, :] = np.random.beta(G_a, G_b, T)
767- true_models_F[i] = False
765+ n_f = np.sum(true_models_F)
766+ n_g = N - n_f
767+ if n_f > 0:
768+ sequences[true_models_F, :] = np.random.beta(F_a, F_b, (n_f, T))
769+ if n_g > 0:
770+ sequences[~true_models_F, :] = np.random.beta(G_a, G_b, (n_g, T))
768771
769772 return sequences, true_models_F
770773```
@@ -778,19 +781,18 @@ def protocol_2(π_minus_1, T, N=1000):
778781 Nature decides at each time step which model to use.
779782 """
780783
784+ # Coin flips for each time t upto T
785+ true_models_F = np.random.rand(N, T) < π_minus_1
786+
781787 sequences = np.empty((N, T))
782- true_models_F = np.empty((N, T), dtype=bool)
783788
784- for i in range(N):
785- for t in range(T):
786- # Nature flips coin at each time step t
787- if np.random.rand() < π_minus_1:
788- sequences[i, t] = np.random.beta(F_a, F_b)
789- true_models_F[i, t] = True
790- else:
791- sequences[i, t] = np.random.beta(G_a, G_b)
792- true_models_F[i, t] = False
793-
789+ n_f = np.sum(true_models_F)
790+ n_g = N * T - n_f
791+ if n_f > 0:
792+ sequences[true_models_F] = np.random.beta(F_a, F_b, n_f)
793+ if n_g > 0:
794+ sequences[~true_models_F] = np.random.beta(G_a, G_b, n_g)
795+
794796 return sequences, true_models_F
795797```
796798
@@ -819,17 +821,8 @@ def compute_likelihood_ratios(sequences):
819821 Compute likelihood ratios for given sequences.
820822 """
821823
822- N, T = sequences.shape
823- l_ratios = np.empty((N, T))
824- L_cumulative = np.empty((N, T))
825-
826- for i in range(N):
827- for t in range(T):
828- l_ratios[i, t] = f(sequences[i, t]) / g(sequences[i, t])
829-
830- # Compute cumulative products
831- L_cumulative[i, :] = np.cumprod(l_ratios[i, :])
832-
824+ l_ratios = f(sequences) / g(sequences)
825+ L_cumulative = np.cumprod(l_ratios, axis=1)
833826 return l_ratios, L_cumulative
834827```
835828
@@ -868,49 +861,40 @@ Now let's simulate the protocol 1 and compute the error probabilities
868861# Set parameters
869862π_minus_1 = 0.5
870863T_max = 30
871- N_simulations = 5000
872-
873- # Protocol 1: Bayesian Model Selection
874- print("Protocol 1: Bayesian Model Selection")
864+ N_simulations = 10_000
875865
876866sequences_p1, true_models_p1 = protocol_1(
877867 π_minus_1, T_max, N_simulations)
878868l_ratios_p1, L_cumulative_p1 = compute_likelihood_ratios(sequences_p1)
879869
880870# Compute error probabilities for different sample sizes
881- T_range = range(1, T_max + 1)
882- α_T = np.empty(T_max) # P(L_T < 1 | f)
883- β_T = np.empty(T_max) # P(L_T >= 1 | g)
884- bayesian_error_prob = np.empty(T_max)
871+ T_range = np.arange(1, T_max + 1)
885872
886- for t in T_range:
887- t_idx = t - 1
888-
889- # Type I error: reject H_0 when it's true
890- # (model f generates data)
891- f_sequences = L_cumulative_p1[true_models_p1, t_idx]
892- α_T[t_idx] = np.mean(f_sequences < 1)
893-
894- # Type II error: accept H_0 when it's false
895- # (model g generates data)
896- g_sequences = L_cumulative_p1[~true_models_p1, t_idx]
897- β_T[t_idx] = np.mean(g_sequences >= 1)
898-
899- # Bayesian error probability
900- bayesian_error_prob[t_idx] = 0.5 * (α_T[t_idx] + β_T[t_idx])
873+ # Boolean masks for true models
874+ mask_f = true_models_p1
875+ mask_g = ~true_models_p1
901876
902- # Plot results for Protocol 1
877+ # Select cumulative likelihoods for each model
878+ L_f = L_cumulative_p1[mask_f, :]
879+ L_g = L_cumulative_p1[mask_g, :]
880+
881+ α_T = np.mean(L_f < 1, axis=0)
882+ β_T = np.mean(L_g >= 1, axis=0)
883+
884+ error_prob = 0.5 * (α_T + β_T)
885+
886+ # Plot results
903887fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
904888
905889ax1.plot(T_range, α_T, 'b-',
906- label=r'$\alpha_T$ (Type I error) ', linewidth=2)
890+ label=r'$\alpha_T$', linewidth=2)
907891ax1.plot(T_range, β_T, 'r-',
908- label=r'$\beta_T$ (Type II error) ', linewidth=2)
892+ label=r'$\beta_T$', linewidth=2)
909893ax1.set_xlabel('$T$')
910894ax1.set_ylabel('error probability')
911895ax1.legend()
912896
913- ax2.plot(T_range, bayesian_error_prob , 'g-',
897+ ax2.plot(T_range, error_prob , 'g-',
914898 label=r'$\frac{1}{2}(\alpha_T+\beta_T)$', linewidth=2)
915899ax2.set_xlabel('$T$')
916900ax2.set_ylabel('error probability')
@@ -922,7 +906,7 @@ plt.show()
922906print(f"At T={T_max}:")
923907print(f"α_{T_max} = {α_T[-1]:.4f}")
924908print(f"β_{T_max} = {β_T[-1]:.4f}")
925- print(f"Bayesian error probability = {bayesian_error_prob [-1]:.4f}")
909+ print(f"Bayesian error probability = {error_prob [-1]:.4f}")
926910```
927911
928912## Classification
943927Under this rule, the expected misclassification rate is
944928
945929$$
946- p(\textrm{misclassification}) = {1 \over 2} (\alpha_1 + \beta_1 )
930+ p(\textrm{misclassification}) = {1 \over 2} (\tilde \alpha_t + \tilde \beta_t )
947931$$ (eq:classerrorprob)
948932
933+ where $\tilde \alpha_t = {\rm Prob}(l_t < 1 \mid f)$ and $\tilde \beta_t = {\rm Prob}(l_t \geq 1 \mid g)$.
934+
949935Now let's simulate protocol 2 and compute the error probabilities
950936
951937```{code-cell} ipython3
@@ -956,28 +942,31 @@ l_ratios_p2, _ = compute_likelihood_ratios(sequences_p2)
956942# Find decision boundary where f(w) = g(w)
957943root = brentq(lambda w: f(w) / g(w) - 1, 0.001, 0.999)
958944
959- print(f"Decision boundary points: {root}")
960-
961- # Compute theoretical α_1 and β_1 using integration
945+ # Compute theoretical tilde α_t and tilde β_t
962946def α_integrand(w):
963- """Integrand for α_1 = P(l_1 < 1 | f)"""
947+ """Integrand for tilde α_t = P(l_t < 1 | f)"""
964948 return f(w) if f(w) / g(w) < 1 else 0
965949
966950def β_integrand(w):
967- """Integrand for β_1 = P(l_1 >= 1 | g)"""
951+ """Integrand for tilde β_t = P(l_t >= 1 | g)"""
968952 return g(w) if f(w) / g(w) >= 1 else 0
969953
970954# Compute the integrals
971- α_1_theory , _ = quad(α_integrand, 0, 1, limit=100)
972- β_1_theory , _ = quad(β_integrand, 0, 1, limit=100)
955+ α_theory , _ = quad(α_integrand, 0, 1, limit=100)
956+ β_theory , _ = quad(β_integrand, 0, 1, limit=100)
973957
974- theory_error = 0.5 * (α_1_theory + β_1_theory )
958+ theory_error = 0.5 * (α_theory + β_theory )
975959
976- print(f"theoretical α_1 = {α_1_theory :.4f}")
977- print(f"theoretical β_1 = {β_1_theory :.4f}")
960+ print(f"theoretical tilde α_t = {α_theory :.4f}")
961+ print(f"theoretical tilde β_t = {β_theory :.4f}")
978962print(f"theoretical classification error probability = {theory_error:.4f}")
963+ ```
964+
965+ Since for each $t$, the decision boundary is the same, we can plot the distributions of $f$ and $g$ and the decision boundary
966+
967+ ```{code-cell} ipython3
968+ :tags: [hide-input]
979969
980- # Visualization of distributions and decision boundary
981970fig, ax = plt.subplots(figsize=(7, 6))
982971
983972w_range = np.linspace(0.001, 0.999, 1000)
@@ -990,6 +979,19 @@ ax.plot(w_range, f_values, 'b-',
990979ax.plot(w_range, g_values, 'r-',
991980 label=r'$g(w) \sim Beta(3,1.2)$', linewidth=2)
992981
982+ type1_prob = 1 - beta_dist.cdf(root, F_a, F_b)
983+ type2_prob = beta_dist.cdf(root, G_a, G_b)
984+
985+ w_type1 = w_range[w_range >= root]
986+ f_type1 = [f(w) for w in w_type1]
987+ ax.fill_between(w_type1, 0, f_type1, alpha=0.3, color='blue',
988+ label=fr'$\tilde \alpha_t = {type1_prob:.2f}$')
989+
990+ w_type2 = w_range[w_range <= root]
991+ g_type2 = [g(w) for w in w_type2]
992+ ax.fill_between(w_type2, 0, g_type2, alpha=0.3, color='red',
993+ label=fr'$\tilde \beta_t = {type2_prob:.2f}$')
994+
993995ax.axvline(root, color='green', linestyle='--', alpha=0.7,
994996 label=f'decision boundary: $w=${root:.3f}')
995997
@@ -1001,56 +1003,58 @@ plt.tight_layout()
10011003plt.show()
10021004```
10031005
1006+ On the left of the decision boundary, $f$ is more likely than $g$ with $l_t < 1$.
1007+
1008+ On the right of the decision boundary, $g$ is more likely than $f$ with $l_t \geq 1$.
1009+
10041010Let's see how it performs in the simulated data
10051011
10061012```{code-cell} ipython3
1007- correct_classifications = np.empty(T_max)
1013+ accuracy = np.empty(T_max)
10081014
10091015for t in range(T_max):
1010- predictions = (l_ratios_p2[:, t] > 1).astype(int )
1016+ predictions = (l_ratios_p2[:, t] >= 1 )
10111017 actual = true_sources_p2[:, t]
1012- correct_classifications [t] = np.mean(predictions == actual)
1018+ accuracy [t] = np.mean(predictions == actual)
10131019
10141020plt.figure(figsize=(10, 6))
1015- plt.plot(range(1, T_max + 1), correct_classifications ,
1021+ plt.plot(range(1, T_max + 1), accuracy ,
10161022 'b-', linewidth=2, label='empirical accuracy')
10171023plt.axhline(1 - theory_error, color='r', linestyle='--',
10181024 label=f'theoretical accuracy = {1 - theory_error:.4f}')
1019- plt.xlabel('time step ')
1025+ plt.xlabel('$t$ ')
10201026plt.ylabel('accuracy')
10211027plt.legend()
10221028plt.ylim(0.5, 1.0)
10231029plt.show()
10241030```
10251031
1026- Let's also compare the two protocols by showing how the error probabilities evolve differently.
1032+ Let's also compare the two protocols by showing how the error probabilities evolve differently
10271033
10281034```{code-cell} ipython3
1029- # Comparison of error probabilities between protocols
10301035fig, ax = plt.subplots(figsize=(7, 6))
10311036
1032- ax.plot(T_range, bayesian_error_prob, 'b-', linewidth=2,
1033- label='Protocol 1 (Model Selection)')
1034- ax.axhline(theory_error, color='r', linestyle='--', linewidth=2,
1035- label=f'Protocol 2 (Classification) = {theory_error:.4f}')
1036- ax.set_xlabel('T')
1037+ ax.plot(T_range, error_prob, linewidth=2,
1038+ label='Protocol 1')
1039+ ax.plot(T_range, 1-accuracy, linestyle='--', linewidth=2,
1040+ label=f'Protocol 2')
10371041ax.set_ylabel('error probability')
10381042ax.legend()
10391043plt.show()
10401044```
10411045
10421046From the figure above, we can see:
10431047
1048+ - For both protocols, the error probability starts at the same level subject to randomness.
1049+
10441050- For protocol 1, the error probability decreases as we collect more data because we're trying to determine which single model generated the entire sequence. More data provides stronger evidence.
10451051
10461052- For protocol 2, the error probability remains constant because each observation is classified independently. The accuracy depends only on the likelihood that the two models generates the single observation.
10471053
1048- - Under Protocol 1, the Bayesian error probability converges to zero as $T \to \infty$, while under Protocol 2, it remains constant at ${1 \over 2}(\alpha_1 + \beta_1)$.
1049-
10501054## Sequels
10511055
10521056Likelihood processes play an important role in Bayesian learning, as described in {doc}`likelihood_bayes`
10531057and as applied in {doc}`odu`.
10541058
1055- Likelihood ratio processes appear again in {doc}`additive_functionals`, which contains another illustration
1059+ Likelihood ratio processes appear again in {doc}`advanced: additive_functionals`, which contains another illustration
10561060of the **peculiar property** of likelihood ratio processes described above.
0 commit comments