1- import Probability.Probability.Induction
1+ import Probability.Probability.Defs
22
33import Mathlib.Algebra.BigOperators.Fin
44import Mathlib.Algebra.BigOperators.Group.Finset.Basic
55import Mathlib.Data.Fintype.BigOperators
66
7- /-!
7+ /-!
88 # Basic properties for probability spaces and expectations
99
1010 The main results:
@@ -14,129 +14,59 @@ import Mathlib.Data.Fintype.BigOperators
1414 - The law of total expectations
1515 -/
1616
17- namespace Finprob
17+ namespace Findist
1818
19- variable (P : Finprob) (B : FinRV Bool)
20-
21- /-- If supported then can be decomposed to the immediate probability and the
22- remaining probability -/
23- theorem decompose_supp (supp : P.supported) :
24- ℙ[ B // P ] = (B P.ωhead).rec 0 P.phead + (1 -P.phead) * ℙ[ B // P.shrink supp ] :=
25- by simp [Finprob.phead, Finprob.shrink]
26- exact P.ℙ.decompose_supp B P.nonempty_P (P.phead_supp_ne_one supp)
27-
28- theorem decompose_degen (degen : P.degenerate) : ℙ[ B // P ] = (B P.ωhead).rec 0 P.phead :=
29- by have tz := P.prob.degenerate_tail_zero degen
30- simp [Pr.probability, ωhead]
31- have almost := P.ℙ.iprod_first_of_tail_zero B P.nonempty_P tz
32- rw [List.length_tail] at almost
33- exact almost
19+ variable {n : ℕ} (P : Findist n) (B : FinRV n Bool)
3420
3521-- TODO: is there a way to simplify this result to not use induction?
36- theorem in_prob (P : Finprob) : Prob ℙ[ B // P ] :=
37- by have hip := P.phead_prob
38- by_cases h : P.supported
39- · rw [P.decompose_supp B h]
40- have ih := Finprob.in_prob (P.shrink h)
41- simp only [Prob] at ⊢ ih hip
42- cases B P.ωhead
43- · simp only;
44- constructor;
45- . have prd_zero : 0 ≤ (1 - P.phead) * ℙ[B//P.shrink h] := Rat.mul_nonneg P.phead_prob.of_complement.1 ih.1
46- simp_all only [phead, Pr.probability, zero_add]
47- · have prd_one : (1 - P.phead) * ℙ[B//P.shrink h] ≤ 1 := mul_le_one₀ P.phead_prob.of_complement.2 ih.1 ih.2
48- simp_all only [phead, Pr.probability, zero_add]
49- · simp only;
50- constructor;
51- · calc
52- 0 ≤ ℙ[B//P.shrink h] := ih.1
53- _ ≤ P.phead * 1 + (1 - P.phead) * ℙ[B//P.shrink h] := P.phead_prob.lower_bound_snd ih.2
54- _ = P.phead + (1 - P.phead) * ℙ[B//P.shrink h] := by ring
55- · calc
56- P.phead + (1 - P.phead) * ℙ[B//P.shrink h] =
57- P.phead * 1 + (1 - P.phead) * ℙ[B//P.shrink h] := by ring
58- _ ≤ 1 := P.phead_prob.upper_bound_fst ih.2
59- · rw [P.decompose_degen B (P.degen_of_not_supp h) ]
60- cases B P.ωhead
61- · simp_all
62- · simp_all
63- termination_by P.length
64- decreasing_by exact shrink_length_lt P h
22+ theorem in_prob (P : Findist n) : Prob ℙ[B // P] := sorry
6523
6624theorem ge_zero : ℙ[ B // P ] ≥ 0 := (P.in_prob B).left
6725
68- theorem le_one : ℙ[ B // P ] ≤ 1 := (P.in_prob B).right
69-
70- end Finprob
71-
72- ------------------------------ List ---------------------------
73-
74- namespace List
75-
76- variable (B C : FinRV Bool)
77-
78- lemma list_compl_sums_to_one (L : List ℚ) : L.iprodb B + L.iprodb (B.not) = L.sum :=
79- by induction L with
80- | nil => simp [List.iprodb]
81- | cons head tail =>
82- simp [List.iprodb]
83- cases (B tail.length)
84- · simp; linarith
85- · simp; linarith
86-
87-
88-
89- lemma law_of_total_probs (L : List ℚ) : L.iprodb B = L.iprodb (B ∧ᵣ C) + L.iprodb (B ∧ᵣ (¬ᵣC) ) :=
90- by induction L with
91- | nil => simp [List.iprodb]
92- | cons head tail =>
93- simp [List.iprodb]
94- cases bB: B tail.length
95- · cases bC : C tail.length; simp_all; simp_all
96- · cases bC : C tail.length
97- · simp_all; ring;
98- · simp_all; ring;
99-
100- theorem law_of_total_expectations (L : List ℚ) (X : FinRV ℚ) (B : FinRV Bool) :
101- L.iprod X = L.iprod (fun ω => if B ω then X ω else 0 ) + L.iprod (fun ω => if ¬B ω then X ω else 0 ) :=
102- by induction L with
103- | nil => simp [List.iprod]
104- | cons head tail =>
105- simp [List.iprod]
106- cases bB: B tail.length
107- · simp_all; ring
108- · simp_all; ring
109- end List
26+ theorem le_one : ℙ[B // P] ≤ 1 := (P.in_prob B).right
11027
28+ end Findist
11129
11230------------------------------ Probablity ---------------------------
11331
11432namespace Pr
11533
116- variable (P : Finprob) (B : FinRV Bool ) (C : FinRV Bool)
34+ variable {n : ℕ} (P : Findist n ) (B C : FinRV n Bool)
11735
118-
119- theorem prob_compl_sums_to_one : ℙ[B // P] + ℙ[¬ᵣB // P] = 1 :=
120- calc
121- ℙ[ B // P ] + ℙ[ ¬ᵣB // P] = P.ℙ.sum := P.ℙ.list_compl_sums_to_one B
122- _ = 1 := P.prob.normalized
36+ theorem prob_compl_sums_to_one : ℙ[B // P] + ℙ[¬ᵣB // P] = 1 := sorry
12337
12438theorem prob_compl_one_minus : ℙ[¬ᵣB // P] = 1 - ℙ[B // P] :=
12539 by have := prob_compl_sums_to_one P B
12640 linarith
12741
128- theorem law_of_total_probs_bool : ℙ[B // P] = ℙ[ B ∧ᵣ C // P] + ℙ[ B ∧ᵣ ¬ᵣC //P] :=
129- P.ℙ.law_of_total_probs B C
130-
131- theorem conditional_total (h : 0 < ℙ[C // P]) : ℙ[B ∧ᵣ C // P] = ℙ[ B | C // P] * ℙ[ C // P] :=
132- by simp [probability_cnd] at ⊢ h
133- have : P.ℙ.iprodb C * (P.ℙ.iprodb C)⁻¹ = 1 :=
134- Rat.mul_inv_cancel (P.ℙ.iprodb C) (Ne.symm (ne_of_lt h))
135- calc
136- P.ℙ.iprodb (B ∧ᵣC) = P.ℙ.iprodb (B ∧ᵣC) * 1 := by ring
137- _ = P.ℙ.iprodb (B ∧ᵣC) * (P.ℙ.iprodb C * (P.ℙ.iprodb C)⁻¹) := by rw [←this]
138- _ = P.ℙ.iprodb (B ∧ᵣ C) / P.ℙ.iprodb C * P.ℙ.iprodb C := by ring
42+ @[simp]
43+ lemma refold_probability : P.p ⬝ᵥ (𝕀 ∘ B) = ℙ[B // P] := rfl
13944
45+ theorem law_of_total_probs_bool : ℙ[B // P] = ℙ[B * C // P] + ℙ[B * (¬ᵣC) // P] :=
46+ by
47+ unfold Pr.probability
48+ have h : ∀ i : Fin n, (𝕀 (B i)) = (𝕀 (B i * C i)) + (𝕀 (B i * (¬ᵣ C) i)) :=
49+ by
50+ intro i
51+ by_cases hB : B i
52+ · by_cases hC : C i
53+ · simp [hB, hC, FinRV.not, indicator]
54+ · simp [hB, hC, FinRV.not, indicator]
55+ · by_cases hC : C i
56+ · simp [hB, hC, FinRV.not, indicator]
57+ · simp [hB, hC, FinRV.not, indicator]
58+ sorry ---I tried to do this proof but got stuck, feel free to delete my work
59+
60+
61+ theorem conditional_total (h : 0 < ℙ[C // P]) : ℙ[B * C // P] = ℙ[B | C // P] * ℙ[C // P] :=
62+ sorry
63+ -- by simp [ probability_cnd ] at ⊢ h
64+ -- have : P.ℙ.iprodb C * (P.ℙ.iprodb C)⁻¹ = 1 :=
65+ -- Rat.mul_inv_cancel (P.ℙ.iprodb C) (Ne.symm (ne_of_lt h))
66+ -- calc
67+ -- P.ℙ.iprodb (B ∧ᵣC) = P.ℙ.iprodb (B ∧ᵣC) * 1 := by ring
68+ -- _ = P.ℙ.iprodb (B ∧ᵣC) * (P.ℙ.iprodb C * (P.ℙ.iprodb C)⁻¹) := by rw [ ←this ]
69+ -- _ = P.ℙ.iprodb (B ∧ᵣ C) / P.ℙ.iprodb C * P.ℙ.iprodb C := by ring
14070
14171
14272theorem law_total_prbs_cnd (h1 : 0 < ℙ[C // P]) (h2 : ℙ[C // P] < 1 )
@@ -146,116 +76,144 @@ theorem law_total_prbs_cnd (h1 : 0 < ℙ[C // P]) (h2 : ℙ[C // P] < 1)
14676 rw [←conditional_total P B (¬ᵣC) h2']
14777 exact law_of_total_probs_bool P B C
14878
79+ variable {k : ℕ} {L : FinRV n (Fin k)}
14980
150- variable {K : ℕ} {L : FinRV (Fin K)}
151-
152- theorem law_of_total_probs : ∑ i : Fin K, ℙ[ B ∧ᵣ (L =ᵣ i) // P ] = ℙ[B // P] := sorry
81+ theorem law_of_total_probs : ∑ i : Fin k, ℙ[B * (L =ᵣ i) // P] = ℙ[B // P] := sorry
15382
15483end Pr
15584
15685------------------------------ Expectation ---------------------------
15786
15887namespace PMF
15988
160- variable {K : ℕ} {L : FinRV (Fin K )}
161- variable {pmf : Fin K → ℚ}
162- variable {P : Finprob }
89+ variable {n : ℕ} {k : ℕ} {L : FinRV n (Fin k )}
90+ variable {pmf : Fin k → ℚ}
91+ variable {P : Findist n }
16392
164- theorem pmf_rv_k_ge_1 (L : FinRV (Fin K)) : 0 < K :=
165- match K with
166- | Nat.zero => Fin.elim0 (L 0 )
167- | Nat.succ n => Nat.succ_pos n
93+ theorem pmf_rv_k_ge_1 (h : PMF pmf P L) : 0 < k :=
94+ match k with
95+ | Nat.zero => Fin.pos <| L ⟨ 0 ,P.nonempty⟩
96+ | Nat.succ k₂ => Nat.zero_lt_succ k₂
16897
16998end PMF
17099
171100------------------------------ Expectation ---------------------------
172101
173102namespace Ex
174103
175- variable {P : Finprob }
176- variable {K : ℕ} {X : FinRV ℚ} {B : FinRV Bool} {L : FinRV (Fin K )}
104+ variable {n : ℕ} { P : Findist n }
105+ variable {k : ℕ} {X : FinRV n ℚ} {B : FinRV n Bool} {L : FinRV n (Fin k )}
177106
178- variable {pmf : Fin K → ℚ}
107+ variable {pmf : Fin k → ℚ}
179108
180- theorem law_total_exp_bool (h1 : 0 < ℙ[B // P]) (h2 : 0 < ℙ[¬ᵣB // P]) :
181- 𝔼[X // P] = 𝔼[X | B // P] * ℙ[B // P] + 𝔼[X | ¬ᵣB // P] * ℙ[¬ᵣB // P] :=
182- by
183- simp [expect, expect_cnd] at ⊢ h1 h2
184- have h1' : P.ℙ.iprodb B ≠ 0 := Ne.symm (ne_of_lt h1)
185- have h2' : P.ℙ.iprodb (¬ᵣB) ≠ 0 := Ne.symm (ne_of_lt h2)
186- have h3' : P.ℙ.iprod X = P.ℙ.iprod (fun ω => if B ω then X ω else 0 ) + P.ℙ.iprod (fun ω => if ¬B ω then X ω else 0 ) :=
187- P.ℙ.law_of_total_expectations X B
188- rw [h3']
189- simp_all
190- sorry
109+ example (f g : Fin k → ℚ) (h : f = g) : ∑ i, f i = ∑ i, g i := by
110+ let ff := f
111+ have h2 : ff = f := by unfold ff; rfl
112+ rw [←h2]
113+ rw [←h]
114+
115+
116+ theorem prob_eq_exp_ind : ℙ[B // P] = 𝔼[𝕀 ∘ B // P] := sorry
191117
192118-- TODO: The following derivations should be our focus
193119
194120---- STEP 1:
121+ variable (g : Fin k → ℚ)
122+
123+ --abbrev 𝕀ᵣ (B : FinRV n Bool) : FinRV n ℚ := fun ω => 𝕀 (B ω)
124+
125+ theorem fin_sum_g : ∀ ω, ∑ i, (g i) * (𝕀 ∘ (L =ᵣ i)) ω = g (L ω) := by
126+ intro ω
127+ unfold FinRV.eq 𝕀 Function.comp indicator
128+ simp
129+ generalize hk : L ω = j
130+ let f i := g i * (decide (j = i)).rec 0 1
131+ have h1 (i : Fin k) : j ≠ i → f i = 0 := by intro h; simp_all [f]
132+ have h2 (i : Fin k ) : j = i → f i = g j := by intro h; simp_all [f]
133+ have hh : f = (fun i ↦ g i * (decide (j = i)).rec 0 1 ) := by simp [f]
134+ rw [←hh]
135+ rw [←h2 j rfl]
136+ apply Finset.sum_eq_single_of_mem
137+ · simp only [Finset.mem_univ]
138+ · intro b _ hneq
139+ exact h1 b hneq.symm
140+
141+ theorem idktheorem (P : Findist n) (L : FinRV n (Fin k)) (g : Fin k → ℚ) :
142+ 𝔼[g ∘ L // P] = ∑ i : Fin k, g i * ℙ[L =ᵣ i // P] := sorry
195143
196144-- LOTUS: the law of the unconscious statistician (or similar)
197- theorem LOTUS {g : Fin K → ℚ} (h : PMF pmf P L):
198- 𝔼[ g ∘ L // P ] = ∑ i : Fin K, (pmf i) * (g i) := sorry
145+ theorem LOTUS {g : Fin k → ℚ} (h : PMF pmf P L):
146+ 𝔼[ g ∘ L // P ] = ∑ i : Fin k, (pmf i) * (g i) :=
147+ by rw [idktheorem P L g]
148+ apply Fintype.sum_congr
149+ intro i
150+ rw [h i]
151+ ring
199152
200153-- this proof will rely on the extensional property of function (functions are the same if they
201154-- return the same value for the same inputs; for all inputs)
202- theorem condexp_pmf : 𝔼[ X |ᵣ L // P] = (fun i ↦ 𝔼[ X | (L =ᵣ i) // P]) ∘ L :=
155+ theorem condexp_pmf : 𝔼[ X |ᵣ L // P] = (fun i ↦ 𝔼[ X | (L =ᵣ i) // P]) ∘ L :=
203156 by sorry
204157
205158
206- theorem expexp : 𝔼[ 𝔼[ X |ᵣ L // P] // P ] = ∑ i : Fin K , 𝔼[ X | L =ᵣ i // P] * ℙ[ L =ᵣ i // P] := by
159+ theorem expexp : 𝔼[ 𝔼[ X |ᵣ L // P] // P ] = ∑ i : Fin k , 𝔼[ X | L =ᵣ i // P] * ℙ[ L =ᵣ i // P] := by
207160 let pmf i := ℙ[ L =ᵣ i // P]
208161 have h_pmf : PMF pmf P L := fun i ↦ rfl
209162 rw [condexp_pmf, LOTUS h_pmf]
210163 apply Finset.sum_congr rfl
211164 intro i _
212165 rw [mul_comm]
213166
214- -- STEP 2:
215-
216- theorem ind_eq_zero_of_cond_empty (h : ℙ[B // P] = 0 ) :
217- ∀ ω : (Fin P.length), (𝕀ᵣ B) ω = 0 :=
218- by sorry
167+ -- STEP 2:
219168
169+ theorem μ_eq_zero_of_cond_empty (h : ℙ[B // P] = 0 ) : ∀ X, 𝔼[X * (𝕀 ∘ B) // P] = 0 := sorry
220170
221- theorem μ _eq_zero_of_cond_empty (h : ℙ[B // P] = 0 ) : μ ℙ X (𝕀ᵣ B) = 0 := sorry
171+ example (a : ℚ ) : a * 0 = 0 := Rat.mul_zero a
222172
223- theorem exp_prod_μ (i : Fin K) : 𝔼[ X | B // P] * ℙ[ B // P] = μ P X (𝕀ᵣ B) :=
173+ theorem exp_prod_μ : 𝔼[ X | B // P] * ℙ[B // P] = 𝔼[X * (𝕀 ∘ B) // P] :=
224174 by unfold expect_cnd
225175 by_cases h: ℙ[B//P] = 0
226- · rw [μ_eq_zero_of_cond_empty h]
227- ring
228- · simp_all only [isUnit_iff_ne_zero, ne_eq, not_false_eq_true,
229- IsUnit.div_mul_cancel]
176+ · rw [h, Rat.mul_zero]
177+ sorry
178+ · sorry
179+ --simp_all only [isUnit_iff_ne_zero, ne_eq, not_false_eq_true,
180+ -- IsUnit.div_mul_cancel]
230181
231182-- STEP 3:
232183-- proves that μ distributes over the random variables
233- theorem μ_dist (h : Fin K → FinRV ℚ) : ∑ i : Fin K, μ P X (h i) = μ P X (fun ω ↦ ∑ i : Fin K , (h i) ω) := sorry
184+ theorem μ_dist (h : Fin k → FinRV n ℚ) : ∑ i : Fin k, 𝔼[X * (h i) // P] = 𝔼[X * (fun ω ↦ ∑ i : Fin k , (h i) ω) // P] := sorry
234185
235186
236187-- TODO: need to sum all probabilities
237188
238- theorem fin_sum : ∀ ω : ℕ, ∑ i : Fin K, (𝕀ᵣ (L =ᵣ i)) ω = 1 := sorry
239189
240- theorem exp_eq_exp_cond_true : 𝔼[X // P] = μ P X (fun ω ↦ 1 ) := sorry
190+ theorem fin_sum : ∀ ω : Fin n, ∑ i : Fin k, (𝕀 ∘ (L =ᵣ i)) ω = (1 :ℚ) :=
191+ by have := fin_sum_g 1 (L := L)
192+ simp_all only [Pi.one_apply, Function.comp_apply, FinRV.eq, one_mul, implies_true]
193+
194+ theorem exp_eq_exp_cond_true : 𝔼[X // P] = 𝔼[X * (fun ω ↦ 1 ) // P] := sorry
241195
242196
243197-- TODO: need to sum all probabilities
244198
245199
246- example {f g : ℕ → ℚ} {m : ℕ} (h : ∀ n : ℕ, f n = g n) : ∑ i : Fin m, f i = ∑ i : Fin m, g i :=
200+ example {f g : ℕ → ℚ} {m : ℕ} (h : ∀ n : ℕ, f n = g n) :
201+ ∑ i : Fin m, f i = ∑ i : Fin m, g i :=
247202 by apply Finset.sum_congr
248203 · simp
249- · simp_all
250-
204+ · simp_all
205+
251206-- STEP 4: We now use the results above to prove the law of total expectations
252- theorem law_total_exp : 𝔼[ 𝔼[ X |ᵣ L // P] // P ] = 𝔼[ X // P] :=
207+ theorem law_total_exp : 𝔼[𝔼[ X |ᵣ L // P] // P] = 𝔼[X // P] :=
253208 calc
254- 𝔼[𝔼[X |ᵣ L // P] // P ] = ∑ i : Fin K, 𝔼[ X | L =ᵣ i // P ] * ℙ[ L =ᵣ i // P] := expexp
255- _ = ∑ i : Fin K, μ P X (𝕀ᵣ (L =ᵣ i)) := by apply Fintype.sum_congr;
256- exact fun a => exp_prod_μ (L K)
257- _ = μ P X (fun ω ↦ ∑ i : Fin K, (𝕀ᵣ (L =ᵣ i)) ω) := μ_dist fun i => 𝕀ᵣ (L=ᵣi)
258- _ = μ P X (fun ω ↦ 1 ) := by conv => lhs; congr; rfl; rfl; intro ω; exact fin_sum ω
209+ 𝔼[𝔼[X |ᵣ L // P] // P ] = ∑ i : Fin k, 𝔼[ X | L =ᵣ i // P ] * ℙ[ L =ᵣ i // P] := expexp
210+ _ = ∑ i : Fin k, 𝔼[X * (𝕀 ∘ (L =ᵣ i)) // P] := by
211+ apply Finset.sum_congr
212+ · rfl
213+ · exact fun a _ ↦ exp_prod_μ
214+ _ = 𝔼[X * (fun ω ↦ ∑ i : Fin k, (𝕀 ∘ (L =ᵣ i)) ω) // P] := μ_dist fun i => 𝕀 ∘ (L=ᵣi)
215+ _ = 𝔼[X * (fun ω ↦ 1 ) // P] := by
216+ unfold expect; conv => lhs; congr; rfl; congr; rfl; intro ω; exact fin_sum ω
259217 _ = 𝔼[X // P] := exp_eq_exp_cond_true.symm
260218
261219end Ex
0 commit comments