Skip to content

Commit 715b08c

Browse files
authored
Merge pull request #34 from formalproofs/lotus
Proof of the law of total expectations
2 parents 76a6b0b + f889e83 commit 715b08c

3 files changed

Lines changed: 180 additions & 300 deletions

File tree

Probability/Probability/Basic.lean

Lines changed: 59 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -16,34 +16,50 @@ import Mathlib.Data.Fintype.BigOperators
1616

1717
namespace Findist
1818

19-
variable {n : ℕ} (P : Findist n) (B : FinRV n Bool)
19+
variable {n : ℕ} {P : Findist n} {B : FinRV n Bool}
2020

21-
-- TODO: is there a way to simplify this result to not use induction?
22-
theorem in_prob (P : Findist n) : Prob ℙ[B // P] := sorry
2321

24-
theorem ge_zero : ℙ[ B // P ] ≥ 0 := (P.in_prob B).left
22+
theorem ge_zero : 0 ≤ ℙ[ B // P ] :=
23+
by rw [Ex.prob_eq_exp_ind]
24+
have h : (0 : FinRV n ℚ) ≤ 𝕀∘B := ind_nneg
25+
calc 0 = 𝔼[0 // P] := exp_const.symm
26+
_ ≤ 𝔼[𝕀 ∘ B//P] := exp_monotone h
27+
2528

26-
theorem le_one : ℙ[B // P] ≤ 1 := (P.in_prob B).right
29+
theorem le_one : ℙ[B // P] ≤ 1 :=
30+
by rw [Ex.prob_eq_exp_ind]
31+
have h : 𝕀∘B ≤ (1 : FinRV n ℚ) := ind_le_one
32+
calc 𝔼[𝕀 ∘ B//P] ≤ 𝔼[1 // P] := exp_monotone h
33+
_ = 1 := exp_const
34+
35+
theorem in_prob (P : Findist n) : Prob ℙ[B // P] := ⟨ge_zero, le_one⟩
2736

2837
end Findist
2938

30-
------------------------------ Probablity ---------------------------
39+
------------------------------ Probability ---------------------------
3140

3241
namespace Pr
3342

34-
variable {n : ℕ} (P : Findist n) (B C : FinRV n Bool)
43+
variable {n : ℕ} {P : Findist n} {B C : FinRV n Bool}
44+
45+
46+
theorem prob_compl_sums_to_one : ℙ[B // P] + ℙ[¬ᵣB // P] = 1 :=
47+
by rw [Ex.prob_eq_exp_ind, Ex.prob_eq_exp_ind]
48+
rw [←exp_dists_add]
49+
rw [one_of_ind_bool_or_not]
50+
exact exp_one
3551

36-
theorem prob_compl_sums_to_one : ℙ[B // P] + ℙ[¬ᵣB // P] = 1 := sorry
52+
3753

3854
theorem prob_compl_one_minus : ℙ[¬ᵣB // P] = 1 - ℙ[B // P] :=
39-
by have := prob_compl_sums_to_one P B
55+
by have := prob_compl_sums_to_one (P:=P) (B:=B)
4056
linarith
4157

42-
@[simp]
43-
lemma refold_probability : P.p ⬝ᵥ (𝕀 ∘ B) = ℙ[B // P] := rfl
4458

45-
theorem law_of_total_probs_bool : ℙ[B // P] = ℙ[B * C // P] + ℙ[B * (¬ᵣC) // P] :=
46-
by
59+
-- TODO: I think that we can show the following results from the law of total expectations
60+
61+
--TODO: theorem law_of_total_probs_bool : ℙ[B // P] = ℙ[B * C // P] + ℙ[B * (¬ᵣC) // P] :=
62+
/- by
4763
unfold probability
4864
have h : ∀ i : Fin n, (𝕀 (B i)) = (𝕀 (B i * C i)) + (𝕀 (B i * (¬ᵣ C) i)) :=
4965
by
@@ -56,10 +72,9 @@ theorem law_of_total_probs_bool : ℙ[B // P] = ℙ[B * C // P] + ℙ[B * (¬ᵣ
5672
· simp [hB, hC, FinRV.not, indicator]
5773
· simp [hB, hC, FinRV.not, indicator]
5874
sorry ---I tried to do this proof but got stuck, feel free to delete my work
75+
-/
5976

60-
61-
theorem conditional_total (h : 0 < ℙ[C // P]) : ℙ[B * C // P] = ℙ[B | C // P] * ℙ[C // P] :=
62-
sorry
77+
--TODO: theorem conditional_total (h : 0 < ℙ[C // P]) : ℙ[B * C // P] = ℙ[B | C // P] * ℙ[C // P] :=
6378
-- by simp [probability_cnd] at ⊢ h
6479
-- have : P.ℙ.iprodb C * (P.ℙ.iprodb C)⁻¹ = 1 :=
6580
-- Rat.mul_inv_cancel (P.ℙ.iprodb C) (Ne.symm (ne_of_lt h))
@@ -69,17 +84,17 @@ theorem conditional_total (h : 0 < ℙ[C // P]) : ℙ[B * C // P] = ℙ[B | C /
6984
-- _ = P.ℙ.iprodb (B ∧ᵣ C) / P.ℙ.iprodb C * P.ℙ.iprodb C := by ring
7085

7186

72-
theorem law_total_prbs_cnd (h1 : 0 < ℙ[C // P]) (h2 : ℙ[C // P] < 1)
73-
: ℙ[B // P] = ℙ[B | C // P] * ℙ[ C // P] + ℙ[B | ¬ᵣC // P] * ℙ[¬ᵣC // P] :=
74-
by have h2' : 0 < ℙ[¬ᵣC // P] := by rw [prob_compl_one_minus]; linarith
75-
rw [←conditional_total P B C h1]
76-
rw [←conditional_total P B (¬ᵣC) h2']
77-
exact law_of_total_probs_bool P B C
87+
--TODO: theorem law_total_prbs_cnd (h1 : 0 < ℙ[C // P]) (h2 : ℙ[C // P] < 1)
88+
--: ℙ[B // P] = ℙ[B | C // P] * ℙ[ C // P] + ℙ[B | ¬ᵣC // P] * ℙ[¬ᵣC // P] :=
89+
-- by have h2' : 0 < ℙ[¬ᵣC // P] := by rw [prob_compl_one_minus]; linarith
90+
-- rw [←conditional_total P B C h1]
91+
-- rw [←conditional_total P B (¬ᵣC) h2']
92+
-- exact law_of_total_probs_bool P B C
7893

7994
variable {k : ℕ} {L : FinRV n (Fin k)}
8095

81-
-- TODO: we will prove this from the law for expectations
82-
theorem law_of_total_probs : ∑ i : Fin k, ℙ[B * (L =ᵣ i) // P] = ℙ[B // P] := sorry
96+
-- TODO: we can prove this from the law for expectations
97+
-- TODO: theorem law_of_total_probs : ∑ i : Fin k, ℙ[B * (L =ᵣ i) // P] = ℙ[B // P] := sorry
8398

8499
end Pr
85100

@@ -148,7 +163,7 @@ theorem fin_sum_g: ∀ ω, ∑ i, (g i) * (𝕀 ∘ (L =ᵣ i)) ω = g (L ω) :=
148163
variable {ρ : Type} [AddCommMonoid ρ]
149164

150165
/-- Linearity of expectation --/
151-
theorem expect_linear {m : ℕ} (Xs : Fin m → FinRV n ℚ) : 𝔼[∑ i : Fin m, Xs i // P] = ∑ i : Fin m, 𝔼[Xs i // P] :=
166+
theorem exp_linear {m : ℕ} (Xs : Fin m → FinRV n ℚ) : 𝔼[∑ i : Fin m, Xs i // P] = ∑ i : Fin m, 𝔼[Xs i // P] :=
152167
by unfold expect
153168
exact dotProduct_sum P.p Finset.univ Xs
154169

@@ -160,7 +175,7 @@ theorem fin_sum_simple : (g ∘ L) = ∑ i, (fun _ ↦ g i) * (L =ᵢ i) :=
160175
theorem idktheorem (P : Findist n) (L : FinRV n (Fin k)) (g : Fin k → ℚ) :
161176
𝔼[g ∘ L // P] = ∑ i : Fin k, g i * ℙ[L =ᵣ i // P] := by
162177
rw [fin_sum_simple]
163-
rw [expect_linear]
178+
rw [exp_linear]
164179
apply Fintype.sum_congr
165180
intro a
166181
rw [exp_prod_const_fun]
@@ -182,7 +197,9 @@ theorem LOTUS {g : Fin k → ℚ} (h : PMF pmf P L):
182197
-- this proof will rely on the extensional property of function (functions are the same if they
183198
-- return the same value for the same inputs; for all inputs)
184199
theorem condexp_pmf : 𝔼[ X |ᵣ L // P] = (fun i ↦ 𝔼[ X | (L =ᵣ i) // P]) ∘ L :=
185-
by sorry
200+
by unfold expect_cnd_rv
201+
ext ω; simp
202+
186203

187204

188205
theorem expexp : 𝔼[ 𝔼[ X |ᵣ L // P] // P ] = ∑ i : Fin k, 𝔼[ X | L =ᵣ i // P] * ℙ[ L =ᵣ i // P] := by
@@ -195,32 +212,34 @@ theorem expexp : 𝔼[ 𝔼[ X |ᵣ L // P] // P ] = ∑ i : Fin k, 𝔼[ X | L
195212

196213
-- STEP 2:
197214

198-
theorem μ_eq_zero_of_cond_empty (h : ℙ[B // P] = 0) : ∀ X, 𝔼[X * (𝕀 ∘ B) // P] = 0 := sorry
199-
200215
example (a : ℚ) : a * 0 = 0 := Rat.mul_zero a
201216

202217
theorem exp_prod_μ : 𝔼[X | B // P] * ℙ[B // P] = 𝔼[X * (𝕀 ∘ B) // P] :=
203-
by unfold expect_cnd
218+
by unfold expect_cnd
204219
by_cases h: ℙ[B//P] = 0
205220
· rw [h, Rat.mul_zero]
206-
sorry
207-
· sorry
208-
--simp_all only [isUnit_iff_ne_zero, ne_eq, not_false_eq_true,
209-
-- IsUnit.div_mul_cancel]
221+
unfold expect
222+
rw [dotProd_hadProd_comm, dotProd_hadProd_rotate, prod_zero_of_prob_zero h]
223+
exact (dotProduct_zero X).symm
224+
· simp_all
210225

211226
-- STEP 3:
212-
-- proves that μ distributes over the random variables
213-
theorem μ_dist (h : Fin k → FinRV n ℚ) : ∑ i : Fin k, 𝔼[X * (h i) // P] = 𝔼[X * (fun ω ↦ ∑ i : Fin k, (h i) ω) // P] := sorry
214227

228+
example (Xs : Fin k → FinRV n ℚ) : (fun ω ↦ ∑ i, Xs i ω) = ∑ i, Xs i := by exact Eq.symm (Finset.sum_fn Finset.univ Xs)
229+
230+
-- proves that μ distributes over the random variables
231+
theorem μ_dist (Xs : Fin k → FinRV n ℚ) : ∑ i : Fin k, 𝔼[X * (Xs i) // P] = 𝔼[X * (fun ω ↦ ∑ i : Fin k, Xs i ω) // P] := by
232+
rw [←Finset.sum_fn Finset.univ Xs]
233+
rw [←rv_prod_sum_linear]
234+
rw [exp_linear]
215235

216-
-- TODO: need to sum all probabilities
217236

218237

219238
theorem fin_sum : ∀ ω : Fin n, ∑ i : Fin k, (𝕀 ∘ (L =ᵣ i)) ω = (1:ℚ) :=
220239
by have := fin_sum_g 1 (L := L)
221240
simp_all only [Pi.one_apply, Function.comp_apply, FinRV.eq, one_mul, implies_true]
222241

223-
theorem exp_eq_exp_cond_true : 𝔼[X // P] = 𝔼[X * (fun ω1 ) // P] := sorry
242+
theorem exp_eq_exp_cond_true : 𝔼[X // P] = 𝔼[X * (fun _1 ) // P] := by simp [expect, Pi.mul_def]
224243

225244

226245
example {f g : ℕ → ℚ} {m : ℕ} (h : ∀ n : ℕ, f n = g n) :
@@ -237,9 +256,10 @@ theorem law_total_exp : 𝔼[𝔼[X |ᵣ L // P] // P] = 𝔼[X // P] :=
237256
apply Finset.sum_congr
238257
· rfl
239258
· exact fun a _ ↦ exp_prod_μ
240-
_ = 𝔼[X * (fun ω ↦ ∑ i : Fin k, (𝕀 ∘ (L =ᵣ i)) ω) // P] := μ_dist fun i => 𝕀 ∘ (L=ᵣi)
259+
_ = 𝔼[X * (fun ω ↦ ∑ i : Fin k, (𝕀 ∘ (L =ᵣ i)) ω) // P] := μ_dist (fun i 𝕀 ∘ (L=ᵣi))
241260
_ = 𝔼[X * (fun ω ↦ 1) // P] := by
242261
unfold expect; conv => lhs; congr; rfl; congr; rfl; intro ω; exact fin_sum ω
243262
_ = 𝔼[X // P] := exp_eq_exp_cond_true.symm
244263

264+
245265
end Ex

Probability/Probability/Defs.lean

Lines changed: 95 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import Probability.Probability.Prelude
33
import Mathlib.Data.Matrix.Mul -- dot product definitions and results
44
import Mathlib.Algebra.Notation.Pi.Defs -- operations on functions
55
import Mathlib.Algebra.Module.PointwisePi -- for smul_pi
6+
import Mathlib.LinearAlgebra.Matrix.DotProduct -- for monotonicity
67

78
--------------------------- Findist ---------------------------------------------------------------
89

@@ -13,7 +14,7 @@ variable {n : ℕ}
1314
structure Findist (n : ℕ) : Type where
1415
p : Fin n → ℚ
1516
prob : 1 ⬝ᵥ p = 1
16-
nneg : ∀ i, p i ≥ 0
17+
nneg : 0 p
1718

1819
namespace Findist
1920

@@ -24,7 +25,7 @@ abbrev Δ : ℕ → Type := Delta
2425
def singleton : Findist 1 :=
2526
{p := ![1],
2627
prob := by simp [Matrix.vecHead],
27-
nneg := by simp}
28+
nneg := by simp [Pi.zero_def, Pi.le_def] }
2829

2930

3031
@[simp]
@@ -52,16 +53,19 @@ end Findist
5253
Random variables are defined as function. The operations on random variables can be performed
5354
using the standard notation:
5455
55-
X + Y is elementwise addition
56-
X * Y is elementwise (Hadamard product)
57-
f ∘ X is composition
58-
c • X is scalar multiplication
56+
- X + Y is elementwise addition
57+
- X * Y is elementwise product (Hadamard product)
58+
- f ∘ X is composition
59+
- c • X is scalar multiplication
5960
6061
61-
L =ᵣ i is a boolean indicator random variable
62-
L =ᵢ i is a ℚ indicator random variable
63-
L ≤ᵣ i is a bool indicator random variable
62+
- L =ᵣ i is a boolean indicator random variable
63+
- L =ᵢ i is a ℚ indicator random variable
64+
- L ≤ᵣ i is a bool indicator random variable
6465
66+
Main results
67+
68+
- Hadamard product is linear: Y * (∑ i, Xs i) = ∑ i, Y * (Xs i)
6569
-/
6670

6771

@@ -163,13 +167,49 @@ theorem indi_eq_indr : ∀i : Fin k, (𝕀 ∘ (L =ᵣ i)) = (L =ᵢ i) := by
163167
· simp [h]
164168

165169

170+
variable {B : FinRV n Bool}
166171
/-- Indicator is 0 or 1 -/
167-
theorem ind_zero_one (cond : ρ → Bool) : ∀ ω, (𝕀∘cond) ω = 1 ∨ (𝕀∘cond) ω = 0 := by
172+
theorem ind_zero_one : ∀ ω, (𝕀∘B) ω = 1 ∨ (𝕀∘B) ω = 0 := by
168173
intro ω
169-
by_cases h : cond ω
174+
by_cases h : B ω
170175
· left; simp only [Function.comp_apply, h, indicator]
171176
· right; simp only [Function.comp_apply, h, indicator]
172177

178+
/-- Indicator is 0 or 1 -/
179+
theorem ind_nneg : (0 : FinRV n ℚ) ≤ 𝕀∘B := by
180+
intro ω
181+
simp [𝕀, indicator]
182+
by_cases h : B ω
183+
· simp [h]
184+
· simp [h]
185+
186+
theorem ind_le_one : 𝕀∘B ≤ (1 : FinRV n ℚ) :=
187+
by unfold 𝕀 indicator
188+
intro ω
189+
by_cases h : B ω
190+
· simp [h]
191+
· simp [h]
192+
193+
theorem one_of_true : 𝕀 ∘ (1 : Fin n → Bool) = (1 : Fin n → ℚ) := by ext; simp [𝕀, indicator]
194+
195+
theorem one_of_bool_or_not : B + (¬ᵣ B) = (1 : FinRV n Bool) := by ext ω; unfold FinRV.not; simp
196+
197+
theorem one_of_ind_bool_or_not : (𝕀∘B) + (𝕀∘(¬ᵣ B)) = (1 : FinRV n ℚ) :=
198+
by ext ω
199+
unfold FinRV.not 𝕀 indicator not
200+
by_cases h : B ω
201+
· simp [h]
202+
· simp [h]
203+
204+
variable {X Y: FinRV n ℚ}
205+
206+
theorem rv_le_abs : X ≤ abs ∘ X := by intro i; simp [le_abs_self (X i)]
207+
208+
theorem rv_prod_sum_linear {Xs : Fin k → FinRV n ℚ} : ∑ i, Y * (Xs i) = Y * (∑ i, Xs i) :=
209+
by ext ω
210+
simp
211+
rw [Finset.mul_sum]
212+
173213
end RandomVariable
174214

175215
------------------------------ Probability ---------------------------
@@ -182,28 +222,33 @@ def probability : ℚ := P.p ⬝ᵥ (𝕀 ∘ B)
182222

183223
notation "ℙ[" B "//" P "]" => probability P B
184224

225+
-- helps to refold is when needed
226+
lemma probability_def : P.p ⬝ᵥ (𝕀 ∘ B) = ℙ[B // P] := rfl
227+
185228
-- TODO: the sorry in the definition has to do with the decidability of the membership
186229
--theorem prob_iprod_eq_def : ℙ[B // P] = P.measure (B.preimage true) sorry := sorry
187230

188231
/-- Conditional probability of B -/
189232
def probability_cnd : ℚ := ℙ[B * C // P] / ℙ[ C // P ]
190233

191-
namespace Pr
192234

193-
theorem one_of_true : 𝕀 ∘ (1 : Fin n → Bool) = (1 : Fin n → ℚ) :=
194-
by ext
195-
simp [𝕀, indicator]
235+
---- conditional probability
236+
notation "ℙ[" B "|" C "//" P "]" => probability_cnd P B C
237+
196238

197-
theorem true_one : ℙ[ 1 // P] = 1 :=
239+
theorem prob_one_of_true : ℙ[1 // P] = 1 :=
198240
by unfold probability
199241
rw[one_of_true]
200242
rw [dotProduct_comm]
201243
exact P.prob
202244

203-
---- conditional probability
204-
notation "ℙ[" B "|" C "//" P "]" => probability_cnd P B C
245+
example {a b : ℚ} (h : 0 ≤ a) (h2 : 0 ≤ b) : 0 ≤ a * b := Rat.mul_nonneg h h2
246+
247+
variable {P : Findist n} {B : FinRV n Bool}
205248

206-
end Pr
249+
theorem prod_zero_of_prob_zero : ℙ[B // P] = 0 → (P.p * (𝕀∘B) = 0) := by
250+
intro h; exact prod_eq_zero_of_nneg_dp_zero P.nneg ind_nneg h
251+
207252

208253
------------------------------ PMF ---------------------------
209254

@@ -252,23 +297,48 @@ def expect_cnd_rv : Fin n → ℚ := fun i ↦ 𝔼[ X | L =ᵣ (L i) // P ]
252297

253298
notation "𝔼[" X "|ᵣ" L "//" P "]" => expect_cnd_rv P X L
254299

300+
end Ex
255301
--- some basic properties
256302

257-
theorem exp_dists_add : 𝔼[X + Y // P] = 𝔼[X // P] + 𝔼[Y // P] := by simp [expect]
303+
section Expectation_properties
304+
variable {P : Findist n} {X Y Z: FinRV n ℚ} {B : FinRV n Bool}
305+
306+
theorem exp_congr : (X = Y) → 𝔼[X // P] = 𝔼[Y // P] :=
307+
by intro h
308+
unfold Ex.expect dotProduct
309+
apply Fintype.sum_congr
310+
simp_all
311+
312+
theorem exp_dists_add : 𝔼[X + Y // P] = 𝔼[X // P] + 𝔼[Y // P] := by simp [Ex.expect]
313+
314+
theorem exp_mul_comm : 𝔼[X * Y // P] = 𝔼[Y * X // P] := by unfold Ex.expect; exact dotProd_hadProd_comm
315+
316+
variable {c : ℚ} {p : Fin n → ℚ}
258317

259-
variable {c : ℚ}
318+
theorem const_fun_to_one : (fun _ ↦ c : FinRV n ℚ) = c • 1 := by ext; simp;
260319

261-
theorem exp_prod_const : 𝔼[c • X // P] = c * 𝔼[X // P] := by simp only [expect, dotProduct_smul, smul_eq_mul]
320+
theorem exp_const : 𝔼[(fun _ ↦ c) // P] = c :=
321+
by unfold Ex.expect
322+
rw [const_fun_to_one]
323+
simp only [dotProduct_smul, smul_eq_mul]
324+
rw [dotProduct_comm, P.prob]
325+
simp
326+
327+
theorem exp_one : 𝔼[ 1 // P] = 1 :=
328+
by calc 𝔼[ 1 // P] = 𝔼[ (fun _ ↦ 1) // P] := rfl
329+
_ = 1 := exp_const
330+
331+
theorem exp_prod_const : 𝔼[c • X // P] = c * 𝔼[X // P] := by simp only [Ex.expect, dotProduct_smul, smul_eq_mul]
262332

263333
lemma constant_mul_eq_smul : (fun ω ↦ c * X ω) = c • X := rfl
264334

265335
theorem exp_prod_const_fun : 𝔼[(λ _ ↦ c) * X // P] = c * 𝔼[X // P] :=
266-
by simp only [expect, Pi.mul_def, constant_mul_eq_smul, dotProduct_smul, smul_eq_mul]
267-
336+
by simp only [Ex.expect, Pi.mul_def, constant_mul_eq_smul, dotProduct_smul, smul_eq_mul]
268337

269338
theorem exp_indi_eq_exp_indr : ∀i : Fin k, 𝔼[L =ᵢ i // P] = 𝔼[𝕀 ∘ (L =ᵣ i) // P] := by
270339
intro i
271340
rw [indi_eq_indr]
272341

342+
theorem exp_monotone (h: X ≤ Y) : 𝔼[X // P] ≤ 𝔼[Y // P] := dotProduct_le_dotProduct_of_nonneg_left h P.nneg
273343

274-
end Ex
344+
end Expectation_properties

0 commit comments

Comments
 (0)