Skip to content

Commit 9031777

Browse files
authored
Merge pull request #32 from formalproofs/lotus
- Change in definition of expectations and probabilities - A partial proof of lotus
2 parents e802f1a + d79b3de commit 9031777

3 files changed

Lines changed: 213 additions & 331 deletions

File tree

Probability.lean

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,6 @@
44
import Probability.Probability.Prelude
55
import Probability.Probability.Defs
66
import Probability.Probability.Basic
7-
import Probability.Probability.Induction
7+
--import Probability.Probability.Induction
88

99
import Probability.MDP.Histories

Probability/Probability/Basic.lean

Lines changed: 115 additions & 157 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
import Probability.Probability.Induction
1+
import Probability.Probability.Defs
22

33
import Mathlib.Algebra.BigOperators.Fin
44
import Mathlib.Algebra.BigOperators.Group.Finset.Basic
55
import Mathlib.Data.Fintype.BigOperators
66

7-
/-!
7+
/-!
88
# Basic properties for probability spaces and expectations
99
1010
The main results:
@@ -14,129 +14,59 @@ import Mathlib.Data.Fintype.BigOperators
1414
- The law of total expectations
1515
-/
1616

17-
namespace Finprob
17+
namespace Findist
1818

19-
variable (P : Finprob) (B : FinRV Bool)
20-
21-
/-- If supported then can be decomposed to the immediate probability and the
22-
remaining probability -/
23-
theorem decompose_supp (supp : P.supported) :
24-
ℙ[ B // P ] = (B P.ωhead).rec 0 P.phead + (1-P.phead) * ℙ[ B // P.shrink supp ] :=
25-
by simp [Finprob.phead, Finprob.shrink]
26-
exact P.ℙ.decompose_supp B P.nonempty_P (P.phead_supp_ne_one supp)
27-
28-
theorem decompose_degen (degen : P.degenerate) : ℙ[ B // P ] = (B P.ωhead).rec 0 P.phead :=
29-
by have tz := P.prob.degenerate_tail_zero degen
30-
simp [Pr.probability, ωhead]
31-
have almost := P.ℙ.iprod_first_of_tail_zero B P.nonempty_P tz
32-
rw [List.length_tail] at almost
33-
exact almost
19+
variable {n : ℕ} (P : Findist n) (B : FinRV n Bool)
3420

3521
-- TODO: is there a way to simplify this result to not use induction?
36-
theorem in_prob (P : Finprob) : Prob ℙ[ B // P ] :=
37-
by have hip := P.phead_prob
38-
by_cases h : P.supported
39-
· rw [P.decompose_supp B h]
40-
have ih := Finprob.in_prob (P.shrink h)
41-
simp only [Prob] at ⊢ ih hip
42-
cases B P.ωhead
43-
· simp only;
44-
constructor;
45-
. have prd_zero : 0 ≤ (1 - P.phead) * ℙ[B//P.shrink h] := Rat.mul_nonneg P.phead_prob.of_complement.1 ih.1
46-
simp_all only [phead, Pr.probability, zero_add]
47-
· have prd_one : (1 - P.phead) * ℙ[B//P.shrink h] ≤ 1 := mul_le_one₀ P.phead_prob.of_complement.2 ih.1 ih.2
48-
simp_all only [phead, Pr.probability, zero_add]
49-
· simp only;
50-
constructor;
51-
· calc
52-
0 ≤ ℙ[B//P.shrink h] := ih.1
53-
_ ≤ P.phead * 1 + (1 - P.phead) * ℙ[B//P.shrink h] := P.phead_prob.lower_bound_snd ih.2
54-
_ = P.phead + (1 - P.phead) * ℙ[B//P.shrink h] := by ring
55-
· calc
56-
P.phead + (1 - P.phead) * ℙ[B//P.shrink h] =
57-
P.phead * 1 + (1 - P.phead) * ℙ[B//P.shrink h] := by ring
58-
_ ≤ 1 := P.phead_prob.upper_bound_fst ih.2
59-
· rw [P.decompose_degen B (P.degen_of_not_supp h) ]
60-
cases B P.ωhead
61-
· simp_all
62-
· simp_all
63-
termination_by P.length
64-
decreasing_by exact shrink_length_lt P h
22+
theorem in_prob (P : Findist n) : Prob ℙ[B // P] := sorry
6523

6624
theorem ge_zero : ℙ[ B // P ] ≥ 0 := (P.in_prob B).left
6725

68-
theorem le_one : ℙ[ B // P ] ≤ 1 := (P.in_prob B).right
69-
70-
end Finprob
71-
72-
------------------------------ List ---------------------------
73-
74-
namespace List
75-
76-
variable (B C : FinRV Bool)
77-
78-
lemma list_compl_sums_to_one (L : List ℚ) : L.iprodb B + L.iprodb (B.not) = L.sum :=
79-
by induction L with
80-
| nil => simp [List.iprodb]
81-
| cons head tail =>
82-
simp [List.iprodb]
83-
cases (B tail.length)
84-
· simp; linarith
85-
· simp; linarith
86-
87-
88-
89-
lemma law_of_total_probs (L : List ℚ) : L.iprodb B = L.iprodb (B ∧ᵣ C) + L.iprodb (B ∧ᵣ (¬ᵣC) ) :=
90-
by induction L with
91-
| nil => simp [List.iprodb]
92-
| cons head tail =>
93-
simp [List.iprodb]
94-
cases bB: B tail.length
95-
· cases bC : C tail.length; simp_all; simp_all
96-
· cases bC : C tail.length
97-
· simp_all; ring;
98-
· simp_all; ring;
99-
100-
theorem law_of_total_expectations (L : List ℚ) (X : FinRV ℚ) (B : FinRV Bool) :
101-
L.iprod X = L.iprod (fun ω => if B ω then X ω else 0) + L.iprod (fun ω => if ¬B ω then X ω else 0) :=
102-
by induction L with
103-
| nil => simp [List.iprod]
104-
| cons head tail =>
105-
simp [List.iprod]
106-
cases bB: B tail.length
107-
· simp_all; ring
108-
· simp_all; ring
109-
end List
26+
theorem le_one : ℙ[B // P] ≤ 1 := (P.in_prob B).right
11027

28+
end Findist
11129

11230
------------------------------ Probablity ---------------------------
11331

11432
namespace Pr
11533

116-
variable (P : Finprob) (B : FinRV Bool) (C : FinRV Bool)
34+
variable {n : ℕ} (P : Findist n) (B C : FinRV n Bool)
11735

118-
119-
theorem prob_compl_sums_to_one : ℙ[B // P] + ℙ[¬ᵣB // P] = 1 :=
120-
calc
121-
ℙ[ B // P ] + ℙ[ ¬ᵣB // P] = P.ℙ.sum := P.ℙ.list_compl_sums_to_one B
122-
_ = 1 := P.prob.normalized
36+
theorem prob_compl_sums_to_one : ℙ[B // P] + ℙ[¬ᵣB // P] = 1 := sorry
12337

12438
theorem prob_compl_one_minus : ℙ[¬ᵣB // P] = 1 - ℙ[B // P] :=
12539
by have := prob_compl_sums_to_one P B
12640
linarith
12741

128-
theorem law_of_total_probs_bool : ℙ[B // P] = ℙ[ B ∧ᵣ C // P] + ℙ[ B ∧ᵣ ¬ᵣC //P] :=
129-
P.ℙ.law_of_total_probs B C
130-
131-
theorem conditional_total (h : 0 < ℙ[C // P]) : ℙ[B ∧ᵣ C // P] = ℙ[ B | C // P] * ℙ[ C // P] :=
132-
by simp [probability_cnd] at ⊢ h
133-
have : P.ℙ.iprodb C * (P.ℙ.iprodb C)⁻¹ = 1 :=
134-
Rat.mul_inv_cancel (P.ℙ.iprodb C) (Ne.symm (ne_of_lt h))
135-
calc
136-
P.ℙ.iprodb (B ∧ᵣC) = P.ℙ.iprodb (B ∧ᵣC) * 1 := by ring
137-
_ = P.ℙ.iprodb (B ∧ᵣC) * (P.ℙ.iprodb C * (P.ℙ.iprodb C)⁻¹) := by rw [←this]
138-
_ = P.ℙ.iprodb (B ∧ᵣ C) / P.ℙ.iprodb C * P.ℙ.iprodb C := by ring
42+
@[simp]
43+
lemma refold_probability : P.p ⬝ᵥ (𝕀 ∘ B) = ℙ[B // P] := rfl
13944

45+
theorem law_of_total_probs_bool : ℙ[B // P] = ℙ[B * C // P] + ℙ[B * (¬ᵣC) // P] :=
46+
by
47+
unfold Pr.probability
48+
have h : ∀ i : Fin n, (𝕀 (B i)) = (𝕀 (B i * C i)) + (𝕀 (B i * (¬ᵣ C) i)) :=
49+
by
50+
intro i
51+
by_cases hB : B i
52+
· by_cases hC : C i
53+
· simp [hB, hC, FinRV.not, indicator]
54+
· simp [hB, hC, FinRV.not, indicator]
55+
· by_cases hC : C i
56+
· simp [hB, hC, FinRV.not, indicator]
57+
· simp [hB, hC, FinRV.not, indicator]
58+
sorry ---I tried to do this proof but got stuck, feel free to delete my work
59+
60+
61+
theorem conditional_total (h : 0 < ℙ[C // P]) : ℙ[B * C // P] = ℙ[B | C // P] * ℙ[C // P] :=
62+
sorry
63+
-- by simp [probability_cnd] at ⊢ h
64+
-- have : P.ℙ.iprodb C * (P.ℙ.iprodb C)⁻¹ = 1 :=
65+
-- Rat.mul_inv_cancel (P.ℙ.iprodb C) (Ne.symm (ne_of_lt h))
66+
-- calc
67+
-- P.ℙ.iprodb (B ∧ᵣC) = P.ℙ.iprodb (B ∧ᵣC) * 1 := by ring
68+
-- _ = P.ℙ.iprodb (B ∧ᵣC) * (P.ℙ.iprodb C * (P.ℙ.iprodb C)⁻¹) := by rw [←this]
69+
-- _ = P.ℙ.iprodb (B ∧ᵣ C) / P.ℙ.iprodb C * P.ℙ.iprodb C := by ring
14070

14171

14272
theorem law_total_prbs_cnd (h1 : 0 < ℙ[C // P]) (h2 : ℙ[C // P] < 1)
@@ -146,116 +76,144 @@ theorem law_total_prbs_cnd (h1 : 0 < ℙ[C // P]) (h2 : ℙ[C // P] < 1)
14676
rw [←conditional_total P B (¬ᵣC) h2']
14777
exact law_of_total_probs_bool P B C
14878

79+
variable {k : ℕ} {L : FinRV n (Fin k)}
14980

150-
variable {K : ℕ} {L : FinRV (Fin K)}
151-
152-
theorem law_of_total_probs : ∑ i : Fin K, ℙ[ B ∧ᵣ (L =ᵣ i) // P ] = ℙ[B // P] := sorry
81+
theorem law_of_total_probs : ∑ i : Fin k, ℙ[B * (L =ᵣ i) // P] = ℙ[B // P] := sorry
15382

15483
end Pr
15584

15685
------------------------------ Expectation ---------------------------
15786

15887
namespace PMF
15988

160-
variable {K : ℕ} {L : FinRV (Fin K)}
161-
variable {pmf : Fin K → ℚ}
162-
variable {P : Finprob}
89+
variable {n : ℕ} {k : ℕ} {L : FinRV n (Fin k)}
90+
variable {pmf : Fin k → ℚ}
91+
variable {P : Findist n}
16392

164-
theorem pmf_rv_k_ge_1 (L : FinRV (Fin K)) : 0 < K :=
165-
match K with
166-
| Nat.zero => Fin.elim0 (L 0)
167-
| Nat.succ n => Nat.succ_pos n
93+
theorem pmf_rv_k_ge_1 (h : PMF pmf P L) : 0 < k :=
94+
match k with
95+
| Nat.zero => Fin.pos <| L ⟨0,P.nonempty⟩
96+
| Nat.succ k₂ => Nat.zero_lt_succ k₂
16897

16998
end PMF
17099

171100
------------------------------ Expectation ---------------------------
172101

173102
namespace Ex
174103

175-
variable {P : Finprob}
176-
variable {K : ℕ} {X : FinRV ℚ} {B : FinRV Bool} {L : FinRV (Fin K)}
104+
variable {n : ℕ} {P : Findist n}
105+
variable {k : ℕ} {X : FinRV n ℚ} {B : FinRV n Bool} {L : FinRV n (Fin k)}
177106

178-
variable {pmf : Fin K → ℚ}
107+
variable {pmf : Fin k → ℚ}
179108

180-
theorem law_total_exp_bool (h1 : 0 < ℙ[B // P]) (h2 : 0 < ℙ[¬ᵣB // P]) :
181-
𝔼[X // P] = 𝔼[X | B // P] * ℙ[B // P] + 𝔼[X | ¬ᵣB // P] * ℙ[¬ᵣB // P] :=
182-
by
183-
simp [expect, expect_cnd] at ⊢ h1 h2
184-
have h1' : P.ℙ.iprodb B ≠ 0 := Ne.symm (ne_of_lt h1)
185-
have h2' : P.ℙ.iprodb (¬ᵣB) ≠ 0 := Ne.symm (ne_of_lt h2)
186-
have h3' : P.ℙ.iprod X = P.ℙ.iprod (fun ω => if B ω then X ω else 0) + P.ℙ.iprod (fun ω => if ¬B ω then X ω else 0) :=
187-
P.ℙ.law_of_total_expectations X B
188-
rw [h3']
189-
simp_all
190-
sorry
109+
example (f g : Fin k → ℚ) (h : f = g) : ∑ i, f i = ∑ i, g i := by
110+
let ff := f
111+
have h2 : ff = f := by unfold ff; rfl
112+
rw [←h2]
113+
rw [←h]
114+
115+
116+
theorem prob_eq_exp_ind : ℙ[B // P] = 𝔼[𝕀 ∘ B // P] := sorry
191117

192118
-- TODO: The following derivations should be our focus
193119

194120
---- STEP 1:
121+
variable (g : Fin k → ℚ)
122+
123+
--abbrev 𝕀ᵣ (B : FinRV n Bool) : FinRV n ℚ := fun ω => 𝕀 (B ω)
124+
125+
theorem fin_sum_g: ∀ ω, ∑ i, (g i) * (𝕀 ∘ (L =ᵣ i)) ω = g (L ω) := by
126+
intro ω
127+
unfold FinRV.eq 𝕀 Function.comp indicator
128+
simp
129+
generalize hk : L ω = j
130+
let f i := g i * (decide (j = i)).rec 0 1
131+
have h1 (i : Fin k) : j ≠ i → f i = 0 := by intro h; simp_all [f]
132+
have h2 (i : Fin k ) : j = i → f i = g j := by intro h; simp_all [f]
133+
have hh : f = (fun i ↦ g i * (decide (j = i)).rec 0 1) := by simp [f]
134+
rw [←hh]
135+
rw [←h2 j rfl]
136+
apply Finset.sum_eq_single_of_mem
137+
· simp only [Finset.mem_univ]
138+
· intro b _ hneq
139+
exact h1 b hneq.symm
140+
141+
theorem idktheorem (P : Findist n) (L : FinRV n (Fin k)) (g : Fin k → ℚ) :
142+
𝔼[g ∘ L // P] = ∑ i : Fin k, g i * ℙ[L =ᵣ i // P] := sorry
195143

196144
-- LOTUS: the law of the unconscious statistician (or similar)
197-
theorem LOTUS {g : Fin K → ℚ} (h : PMF pmf P L):
198-
𝔼[ g ∘ L // P ] = ∑ i : Fin K, (pmf i) * (g i) := sorry
145+
theorem LOTUS {g : Fin k → ℚ} (h : PMF pmf P L):
146+
𝔼[ g ∘ L // P ] = ∑ i : Fin k, (pmf i) * (g i) :=
147+
by rw [idktheorem P L g]
148+
apply Fintype.sum_congr
149+
intro i
150+
rw [h i]
151+
ring
199152

200153
-- this proof will rely on the extensional property of function (functions are the same if they
201154
-- return the same value for the same inputs; for all inputs)
202-
theorem condexp_pmf : 𝔼[ X |ᵣ L // P] = (fun i ↦ 𝔼[ X | (L =ᵣ i) // P]) ∘ L :=
155+
theorem condexp_pmf : 𝔼[ X |ᵣ L // P] = (fun i ↦ 𝔼[ X | (L =ᵣ i) // P]) ∘ L :=
203156
by sorry
204157

205158

206-
theorem expexp : 𝔼[ 𝔼[ X |ᵣ L // P] // P ] = ∑ i : Fin K, 𝔼[ X | L =ᵣ i // P] * ℙ[ L =ᵣ i // P] := by
159+
theorem expexp : 𝔼[ 𝔼[ X |ᵣ L // P] // P ] = ∑ i : Fin k, 𝔼[ X | L =ᵣ i // P] * ℙ[ L =ᵣ i // P] := by
207160
let pmf i := ℙ[ L =ᵣ i // P]
208161
have h_pmf : PMF pmf P L := fun i ↦ rfl
209162
rw [condexp_pmf, LOTUS h_pmf]
210163
apply Finset.sum_congr rfl
211164
intro i _
212165
rw [mul_comm]
213166

214-
-- STEP 2:
215-
216-
theorem ind_eq_zero_of_cond_empty (h : ℙ[B // P] = 0) :
217-
∀ ω : (Fin P.length), (𝕀ᵣ B) ω = 0 :=
218-
by sorry
167+
-- STEP 2:
219168

169+
theorem μ_eq_zero_of_cond_empty (h : ℙ[B // P] = 0) : ∀ X, 𝔼[X * (𝕀 ∘ B) // P] = 0 := sorry
220170

221-
theorem μ_eq_zero_of_cond_empty (h : ℙ[B // P] = 0) : μ ℙ X (𝕀ᵣ B) = 0 := sorry
171+
example (a : ) : a * 0 = 0 := Rat.mul_zero a
222172

223-
theorem exp_prod_μ (i : Fin K) : 𝔼[ X | B // P] * ℙ[ B // P] = μ P X (𝕀ᵣ B) :=
173+
theorem exp_prod_μ : 𝔼[X | B // P] * ℙ[B // P] = 𝔼[X * (𝕀 ∘ B) // P] :=
224174
by unfold expect_cnd
225175
by_cases h: ℙ[B//P] = 0
226-
· rw [μ_eq_zero_of_cond_empty h]
227-
ring
228-
· simp_all only [isUnit_iff_ne_zero, ne_eq, not_false_eq_true,
229-
IsUnit.div_mul_cancel]
176+
· rw [h, Rat.mul_zero]
177+
sorry
178+
· sorry
179+
--simp_all only [isUnit_iff_ne_zero, ne_eq, not_false_eq_true,
180+
-- IsUnit.div_mul_cancel]
230181

231182
-- STEP 3:
232183
-- proves that μ distributes over the random variables
233-
theorem μ_dist (h : Fin K → FinRV ℚ) : ∑ i : Fin K, μ P X (h i) = μ P X (fun ω ↦ ∑ i : Fin K, (h i) ω) := sorry
184+
theorem μ_dist (h : Fin k → FinRV n ℚ) : ∑ i : Fin k, 𝔼[X * (h i) // P] = 𝔼[X * (fun ω ↦ ∑ i : Fin k, (h i) ω) // P] := sorry
234185

235186

236187
-- TODO: need to sum all probabilities
237188

238-
theorem fin_sum : ∀ ω : ℕ, ∑ i : Fin K, (𝕀ᵣ (L =ᵣ i)) ω = 1 := sorry
239189

240-
theorem exp_eq_exp_cond_true : 𝔼[X // P] = μ P X (fun ω ↦ 1 ) := sorry
190+
theorem fin_sum : ∀ ω : Fin n, ∑ i : Fin k, (𝕀 ∘ (L =ᵣ i)) ω = (1:ℚ) :=
191+
by have := fin_sum_g 1 (L := L)
192+
simp_all only [Pi.one_apply, Function.comp_apply, FinRV.eq, one_mul, implies_true]
193+
194+
theorem exp_eq_exp_cond_true : 𝔼[X // P] = 𝔼[X * (fun ω ↦ 1 ) // P] := sorry
241195

242196

243197
-- TODO: need to sum all probabilities
244198

245199

246-
example {f g : ℕ → ℚ} {m : ℕ} (h : ∀ n : ℕ, f n = g n) : ∑ i : Fin m, f i = ∑ i : Fin m, g i :=
200+
example {f g : ℕ → ℚ} {m : ℕ} (h : ∀ n : ℕ, f n = g n) :
201+
∑ i : Fin m, f i = ∑ i : Fin m, g i :=
247202
by apply Finset.sum_congr
248203
· simp
249-
· simp_all
250-
204+
· simp_all
205+
251206
-- STEP 4: We now use the results above to prove the law of total expectations
252-
theorem law_total_exp : 𝔼[ 𝔼[ X |ᵣ L // P] // P ] = 𝔼[ X // P] :=
207+
theorem law_total_exp : 𝔼[𝔼[X |ᵣ L // P] // P] = 𝔼[X // P] :=
253208
calc
254-
𝔼[𝔼[X |ᵣ L // P] // P ] = ∑ i : Fin K, 𝔼[ X | L =ᵣ i // P ] * ℙ[ L =ᵣ i // P] := expexp
255-
_ = ∑ i : Fin K, μ P X (𝕀ᵣ (L =ᵣ i)) := by apply Fintype.sum_congr;
256-
exact fun a => exp_prod_μ (L K)
257-
_ = μ P X (fun ω ↦ ∑ i : Fin K, (𝕀ᵣ (L =ᵣ i)) ω) := μ_dist fun i => 𝕀ᵣ (L=ᵣi)
258-
_ = μ P X (fun ω ↦ 1) := by conv => lhs; congr; rfl; rfl; intro ω; exact fin_sum ω
209+
𝔼[𝔼[X |ᵣ L // P] // P ] = ∑ i : Fin k, 𝔼[ X | L =ᵣ i // P ] * ℙ[ L =ᵣ i // P] := expexp
210+
_ = ∑ i : Fin k, 𝔼[X * (𝕀 ∘ (L =ᵣ i)) // P] := by
211+
apply Finset.sum_congr
212+
· rfl
213+
· exact fun a _ ↦ exp_prod_μ
214+
_ = 𝔼[X * (fun ω ↦ ∑ i : Fin k, (𝕀 ∘ (L =ᵣ i)) ω) // P] := μ_dist fun i => 𝕀 ∘ (L=ᵣi)
215+
_ = 𝔼[X * (fun ω ↦ 1) // P] := by
216+
unfold expect; conv => lhs; congr; rfl; congr; rfl; intro ω; exact fin_sum ω
259217
_ = 𝔼[X // P] := exp_eq_exp_cond_true.symm
260218

261219
end Ex

0 commit comments

Comments
 (0)