|
| 1 | +/- |
| 2 | +Copyright (c) 2026 Dennj Osele. All rights reserved. |
| 3 | +Released under Apache 2.0 license as described in the file LICENSE. |
| 4 | +Authors: Dennj Osele |
| 5 | +-/ |
| 6 | +module |
| 7 | + |
| 8 | +public import Mathlib.Analysis.Convex.DoublyStochasticMatrix |
| 9 | +public import Mathlib.Analysis.Convex.StdSimplex |
| 10 | +public import Mathlib.Analysis.Normed.Lp.PiLp |
| 11 | +public import Mathlib.Order.Filter.AtTopBot.Archimedean |
| 12 | + |
| 13 | +/-! |
| 14 | +# Row-Stochastic and Column-Stochastic Matrices |
| 15 | +
|
| 16 | +This file defines row-stochastic and column-stochastic matrices, and establishes their |
| 17 | +basic properties including closure under multiplication and the L¹ non-expansiveness |
| 18 | +property that is fundamental to Markov chain theory. |
| 19 | +
|
| 20 | +## Main definitions |
| 21 | +
|
| 22 | +* `rowStochastic`: A square matrix is row-stochastic if all entries are nonnegative and |
| 23 | + each row sums to 1. |
| 24 | +* `colStochastic`: A square matrix is column-stochastic if all entries are nonnegative and |
| 25 | + each column sums to 1. |
| 26 | +* `IsStationary`: A distribution `μ` is stationary for a matrix `P` if `μ ᵥ* P = μ`. |
| 27 | +* `cesaroAverage`: The Cesàro average of the iterates of a vector under a matrix. |
| 28 | +* `uniformDistribution`: The uniform distribution on a finite nonempty type. |
| 29 | +
|
| 30 | +## Main statements |
| 31 | +
|
| 32 | +* `rowStochastic.pow_mem`: Powers of row-stochastic matrices are row-stochastic. |
| 33 | +* `rowStochastic.nnnorm_vecMul_sub_le`: Row-stochastic matrices are non-expansive |
| 34 | + operators on probability vectors in the L¹ norm. |
| 35 | +* `rowStochastic.exists_stationary_distribution`: Every row-stochastic matrix on a finite |
| 36 | + nonempty state space has a stationary distribution in the standard simplex. |
| 37 | +
|
| 38 | +## Implementation notes |
| 39 | +
|
| 40 | +Row-stochastic matrices form a submonoid of square matrices. They are the natural |
| 41 | +representation of transition matrices for discrete-time Markov chains, where `P i j` |
| 42 | +represents the probability of transitioning from state `i` to state `j`. |
| 43 | +
|
| 44 | +The relationship with `doublyStochastic` is: |
| 45 | +`doublyStochastic R n = rowStochastic R n ⊓ colStochastic R n` |
| 46 | +
|
| 47 | +## Tags |
| 48 | +
|
| 49 | +stochastic matrix, Markov chain, row-stochastic, column-stochastic, L¹ contraction |
| 50 | +-/ |
| 51 | + |
| 52 | +@[expose] public section |
| 53 | + |
| 54 | +open Finset Function Matrix ENNReal Filter |
| 55 | +open scoped BigOperators |
| 56 | + |
| 57 | +variable {R n : Type*} [Fintype n] [DecidableEq n] |
| 58 | + |
| 59 | +section OrderedSemiring |
| 60 | + |
| 61 | +variable [Semiring R] [PartialOrder R] [IsOrderedRing R] {M : Matrix n n R} |
| 62 | + |
| 63 | +/-- |
| 64 | +A square matrix is row-stochastic iff all entries are nonnegative and each row sums to 1. |
| 65 | +Equivalently, right multiplication by the all-ones column vector gives the all-ones vector. |
| 66 | +-/ |
| 67 | +def rowStochastic (R n : Type*) [Fintype n] [DecidableEq n] [Semiring R] [PartialOrder R] |
| 68 | + [IsOrderedRing R] : Submonoid (Matrix n n R) where |
| 69 | + carrier := {M | (∀ i j, 0 ≤ M i j) ∧ M *ᵥ 1 = 1} |
| 70 | + mul_mem' {M N} hM hN := |
| 71 | + ⟨fun i j => sum_nonneg fun k _ => mul_nonneg (hM.1 _ _) (hN.1 _ _), |
| 72 | + by rw [← mulVec_mulVec, hN.2, hM.2]⟩ |
| 73 | + one_mem' := by simp [zero_le_one_elem] |
| 74 | + |
| 75 | +/-- |
| 76 | +A square matrix is column-stochastic iff all entries are nonnegative and each column sums to 1. |
| 77 | +Equivalently, left multiplication by the all-ones row vector gives the all-ones vector. |
| 78 | +-/ |
| 79 | +def colStochastic (R n : Type*) [Fintype n] [DecidableEq n] [Semiring R] [PartialOrder R] |
| 80 | + [IsOrderedRing R] : Submonoid (Matrix n n R) where |
| 81 | + carrier := {M | (∀ i j, 0 ≤ M i j) ∧ 1 ᵥ* M = 1} |
| 82 | + mul_mem' {M N} hM hN := |
| 83 | + ⟨fun i j => sum_nonneg fun k _ => mul_nonneg (hM.1 _ _) (hN.1 _ _), |
| 84 | + by rw [← vecMul_vecMul, hM.2, hN.2]⟩ |
| 85 | + one_mem' := by simp [zero_le_one_elem] |
| 86 | + |
| 87 | +lemma mem_rowStochastic : M ∈ rowStochastic R n ↔ (∀ i j, 0 ≤ M i j) ∧ M *ᵥ 1 = 1 := Iff.rfl |
| 88 | + |
| 89 | +lemma mem_colStochastic : M ∈ colStochastic R n ↔ (∀ i j, 0 ≤ M i j) ∧ 1 ᵥ* M = 1 := Iff.rfl |
| 90 | + |
| 91 | +lemma mem_rowStochastic_iff_sum : |
| 92 | + M ∈ rowStochastic R n ↔ (∀ i j, 0 ≤ M i j) ∧ ∀ i, ∑ j, M i j = 1 := by |
| 93 | + simp [funext_iff, rowStochastic, mulVec, dotProduct] |
| 94 | + |
| 95 | +lemma mem_colStochastic_iff_sum : |
| 96 | + M ∈ colStochastic R n ↔ (∀ i j, 0 ≤ M i j) ∧ ∀ j, ∑ i, M i j = 1 := by |
| 97 | + simp [funext_iff, colStochastic, vecMul, dotProduct] |
| 98 | + |
| 99 | +/-- Every entry of a row-stochastic matrix is nonnegative. -/ |
| 100 | +lemma nonneg_of_mem_rowStochastic (hM : M ∈ rowStochastic R n) {i j : n} : 0 ≤ M i j := |
| 101 | + hM.1 _ _ |
| 102 | + |
| 103 | +/-- Every entry of a column-stochastic matrix is nonnegative. -/ |
| 104 | +lemma nonneg_of_mem_colStochastic (hM : M ∈ colStochastic R n) {i j : n} : 0 ≤ M i j := |
| 105 | + hM.1 _ _ |
| 106 | + |
| 107 | +/-- Each row sum of a row-stochastic matrix is 1. -/ |
| 108 | +lemma sum_row_of_mem_rowStochastic (hM : M ∈ rowStochastic R n) (i : n) : ∑ j, M i j = 1 := |
| 109 | + (mem_rowStochastic_iff_sum.1 hM).2 _ |
| 110 | + |
| 111 | +/-- Each column sum of a column-stochastic matrix is 1. -/ |
| 112 | +lemma sum_col_of_mem_colStochastic (hM : M ∈ colStochastic R n) (j : n) : ∑ i, M i j = 1 := |
| 113 | + (mem_colStochastic_iff_sum.1 hM).2 _ |
| 114 | + |
| 115 | +/-- A row-stochastic matrix multiplied with the all-ones column vector is 1. -/ |
| 116 | +lemma mulVec_one_of_mem_rowStochastic (hM : M ∈ rowStochastic R n) : M *ᵥ 1 = 1 := |
| 117 | + (mem_rowStochastic.1 hM).2 |
| 118 | + |
| 119 | +/-- The all-ones row vector multiplied with a column-stochastic matrix is 1. -/ |
| 120 | +lemma one_vecMul_of_mem_colStochastic (hM : M ∈ colStochastic R n) : 1 ᵥ* M = 1 := |
| 121 | + (mem_colStochastic.1 hM).2 |
| 122 | + |
| 123 | +/-- Every entry of a row-stochastic matrix is at most 1. -/ |
| 124 | +lemma le_one_of_mem_rowStochastic (hM : M ∈ rowStochastic R n) {i j : n} : M i j ≤ 1 := |
| 125 | + sum_row_of_mem_rowStochastic hM i ▸ single_le_sum (fun k _ => hM.1 _ k) (mem_univ j) |
| 126 | + |
| 127 | +/-- Every entry of a column-stochastic matrix is at most 1. -/ |
| 128 | +lemma le_one_of_mem_colStochastic (hM : M ∈ colStochastic R n) {i j : n} : M i j ≤ 1 := |
| 129 | + sum_col_of_mem_colStochastic hM j ▸ single_le_sum (fun k _ => hM.1 k _) (mem_univ i) |
| 130 | + |
| 131 | +/-- The set of row-stochastic matrices is convex. -/ |
| 132 | +lemma convex_rowStochastic : Convex R (rowStochastic R n : Set (Matrix n n R)) := by |
| 133 | + intro x hx y hy a b ha hb hab |
| 134 | + simp only [SetLike.mem_coe, mem_rowStochastic_iff_sum] at hx hy ⊢ |
| 135 | + simp [add_nonneg, ha, hb, mul_nonneg, hx, hy, sum_add_distrib, ← mul_sum, hab] |
| 136 | + |
| 137 | +/-- The set of column-stochastic matrices is convex. -/ |
| 138 | +lemma convex_colStochastic : Convex R (colStochastic R n : Set (Matrix n n R)) := by |
| 139 | + intro x hx y hy a b ha hb hab |
| 140 | + simp only [SetLike.mem_coe, mem_colStochastic_iff_sum] at hx hy ⊢ |
| 141 | + simp [add_nonneg, ha, hb, mul_nonneg, hx, hy, sum_add_distrib, ← mul_sum, hab] |
| 142 | + |
| 143 | +/-- Any permutation matrix is row-stochastic. -/ |
| 144 | +lemma permMatrix_mem_rowStochastic {σ : Equiv.Perm n} : σ.permMatrix R ∈ rowStochastic R n := by |
| 145 | + rw [mem_rowStochastic_iff_sum] |
| 146 | + exact ⟨fun i j => by aesop, fun _ => by simp [Equiv.toPEquiv_apply]⟩ |
| 147 | + |
| 148 | +/-- Any permutation matrix is column-stochastic. -/ |
| 149 | +lemma permMatrix_mem_colStochastic {σ : Equiv.Perm n} : σ.permMatrix R ∈ colStochastic R n := by |
| 150 | + rw [mem_colStochastic_iff_sum] |
| 151 | + exact ⟨fun i j => by aesop, fun _ => by simp [Equiv.toPEquiv_apply, ← Equiv.eq_symm_apply]⟩ |
| 152 | + |
| 153 | +/-- A matrix is doubly stochastic iff it is both row-stochastic and column-stochastic. -/ |
| 154 | +lemma doublyStochastic_eq_inf : |
| 155 | + doublyStochastic R n = rowStochastic R n ⊓ colStochastic R n := by |
| 156 | + ext M |
| 157 | + simp only [Submonoid.mem_inf, mem_doublyStochastic, mem_rowStochastic, mem_colStochastic] |
| 158 | + exact ⟨fun ⟨h1, h2, h3⟩ => ⟨⟨h1, h2⟩, ⟨h1, h3⟩⟩, fun ⟨⟨h1, h2⟩, ⟨_, h3⟩⟩ => ⟨h1, h2, h3⟩⟩ |
| 159 | + |
| 160 | +/-- The transpose of a row-stochastic matrix is column-stochastic. -/ |
| 161 | +lemma transpose_mem_colStochastic (hM : M ∈ rowStochastic R n) : Mᵀ ∈ colStochastic R n := by |
| 162 | + simp only [mem_colStochastic_iff_sum, transpose_apply] |
| 163 | + exact ⟨fun i j => hM.1 j i, fun j => sum_row_of_mem_rowStochastic hM j⟩ |
| 164 | + |
| 165 | +/-- The transpose of a column-stochastic matrix is row-stochastic. -/ |
| 166 | +lemma transpose_mem_rowStochastic (hM : M ∈ colStochastic R n) : Mᵀ ∈ rowStochastic R n := by |
| 167 | + simp only [mem_rowStochastic_iff_sum, transpose_apply] |
| 168 | + exact ⟨fun i j => hM.1 j i, fun i => sum_col_of_mem_colStochastic hM i⟩ |
| 169 | + |
| 170 | +end OrderedSemiring |
| 171 | + |
| 172 | +/-! ### Row-stochastic matrices and the standard simplex -/ |
| 173 | + |
| 174 | +section Simplex |
| 175 | + |
| 176 | +variable [Semiring R] [PartialOrder R] [IsOrderedRing R] |
| 177 | + |
| 178 | +/-- Each row of a row-stochastic matrix belongs to the standard simplex. -/ |
| 179 | +lemma row_mem_stdSimplex {M : Matrix n n R} (hM : M ∈ rowStochastic R n) (i : n) : |
| 180 | + M i ∈ stdSimplex R n := |
| 181 | + ⟨fun j => hM.1 i j, sum_row_of_mem_rowStochastic hM i⟩ |
| 182 | + |
| 183 | +/-- Each column of a column-stochastic matrix belongs to the standard simplex. -/ |
| 184 | +lemma col_mem_stdSimplex {M : Matrix n n R} (hM : M ∈ colStochastic R n) (j : n) : |
| 185 | + (fun i => M i j) ∈ stdSimplex R n := |
| 186 | + ⟨fun i => hM.1 i j, sum_col_of_mem_colStochastic hM j⟩ |
| 187 | + |
| 188 | +/-- Multiplying a probability vector on the left by a row-stochastic matrix |
| 189 | +preserves membership in the standard simplex. -/ |
| 190 | +lemma vecMul_mem_stdSimplex {M : Matrix n n R} (hM : M ∈ rowStochastic R n) |
| 191 | + {v : n → R} (hv : v ∈ stdSimplex R n) : v ᵥ* M ∈ stdSimplex R n := by |
| 192 | + refine ⟨fun j => sum_nonneg fun i _ => mul_nonneg (hv.1 i) (hM.1 i j), ?_⟩ |
| 193 | + simp only [vecMul, dotProduct] |
| 194 | + rw [sum_comm, ← hv.2] |
| 195 | + exact sum_congr rfl fun i _ => by simp [← mul_sum, sum_row_of_mem_rowStochastic hM] |
| 196 | + |
| 197 | +end Simplex |
| 198 | + |
| 199 | +/-! ### L¹ non-expansiveness for row-stochastic matrices -/ |
| 200 | + |
| 201 | +section L1Norm |
| 202 | + |
| 203 | +variable {M : Matrix n n ℝ} |
| 204 | + |
| 205 | +omit [DecidableEq n] in |
| 206 | +/-- The L¹ norm of a function equals the sum of absolute values. -/ |
| 207 | +private lemma l1_nnnorm_eq_sum (f : PiLp 1 (fun _ : n => ℝ)) : (‖f‖₊ : ℝ) = ∑ i, |f.ofLp i| := by |
| 208 | + rw [PiLp.nnnorm_eq_sum one_ne_top] |
| 209 | + simp only [ENNReal.toReal_one, NNReal.rpow_one, div_one, NNReal.coe_sum, coe_nnnorm, |
| 210 | + Real.norm_eq_abs] |
| 211 | + |
| 212 | +namespace rowStochastic |
| 213 | + |
| 214 | +/-- Row-stochastic matrices are non-expansive operators on probability vectors in the L¹ norm. |
| 215 | +This is the key contraction property for Markov chains. -/ |
| 216 | +theorem nnnorm_vecMul_sub_le (hM : M ∈ rowStochastic ℝ n) (x y : n → ℝ) : |
| 217 | + ‖WithLp.toLp 1 (x ᵥ* M - y ᵥ* M)‖₊ ≤ ‖WithLp.toLp 1 (x - y)‖₊ := by |
| 218 | + have hxy : x ᵥ* M - y ᵥ* M = fun j => ∑ i, (x i - y i) * M i j := by |
| 219 | + ext j; simp only [Pi.sub_apply, vecMul, dotProduct, sub_mul, sum_sub_distrib] |
| 220 | + have key : ∑ j, |∑ i, (x i - y i) * M i j| ≤ ∑ i, |x i - y i| := calc |
| 221 | + ∑ j, |∑ i, (x i - y i) * M i j| |
| 222 | + ≤ ∑ j, ∑ i, |x i - y i| * M i j := sum_le_sum fun j _ => (abs_sum_le_sum_abs _ _).trans |
| 223 | + (sum_le_sum fun i _ => by rw [abs_mul, abs_of_nonneg (hM.1 i j)]) |
| 224 | + _ = ∑ i, |x i - y i| := by |
| 225 | + rw [sum_comm]; simp_rw [← mul_sum, sum_row_of_mem_rowStochastic hM, mul_one] |
| 226 | + have hnorm : (‖WithLp.toLp 1 (x ᵥ* M - y ᵥ* M)‖₊ : ℝ) = ∑ j, |∑ i, (x i - y i) * M i j| := by |
| 227 | + rw [l1_nnnorm_eq_sum]; exact sum_congr rfl fun j _ => congrArg abs (congrFun hxy j) |
| 228 | + exact NNReal.coe_le_coe.mp (by rw [hnorm, l1_nnnorm_eq_sum]; exact key) |
| 229 | + |
| 230 | +/-- Row-stochastic matrices are non-expansive in L¹ norm (version with norm instead of nnnorm). -/ |
| 231 | +theorem norm_vecMul_sub_le (hM : M ∈ rowStochastic ℝ n) (x y : n → ℝ) : |
| 232 | + ‖WithLp.toLp 1 (x ᵥ* M - y ᵥ* M)‖ ≤ ‖WithLp.toLp 1 (x - y)‖ := |
| 233 | + mod_cast nnnorm_vecMul_sub_le hM x y |
| 234 | + |
| 235 | +end rowStochastic |
| 236 | + |
| 237 | +end L1Norm |
| 238 | + |
| 239 | +/-! ### Powers of stochastic matrices -/ |
| 240 | + |
| 241 | +namespace rowStochastic |
| 242 | + |
| 243 | +variable [Semiring R] [PartialOrder R] [IsOrderedRing R] |
| 244 | + |
| 245 | +/-- Powers of row-stochastic matrices are row-stochastic. -/ |
| 246 | +theorem pow_mem {M : Matrix n n R} (hM : M ∈ rowStochastic R n) (k : ℕ) : |
| 247 | + M ^ k ∈ rowStochastic R n := |
| 248 | + Submonoid.pow_mem (rowStochastic R n) hM k |
| 249 | + |
| 250 | +end rowStochastic |
| 251 | + |
| 252 | +namespace colStochastic |
| 253 | + |
| 254 | +variable [Semiring R] [PartialOrder R] [IsOrderedRing R] |
| 255 | + |
| 256 | +/-- Powers of column-stochastic matrices are column-stochastic. -/ |
| 257 | +theorem pow_mem {M : Matrix n n R} (hM : M ∈ colStochastic R n) (k : ℕ) : |
| 258 | + M ^ k ∈ colStochastic R n := |
| 259 | + Submonoid.pow_mem (colStochastic R n) hM k |
| 260 | + |
| 261 | +end colStochastic |
| 262 | + |
| 263 | +/-! ### Stationary distributions -/ |
| 264 | + |
| 265 | +section Stationary |
| 266 | + |
| 267 | +variable {R : Type*} [Semiring R] |
| 268 | + |
| 269 | +/-- A distribution `μ` is stationary for a matrix `P` if `μ ᵥ* P = μ`. -/ |
| 270 | +class IsStationary (μ : n → R) (P : Matrix n n R) : Prop where |
| 271 | + stationary : μ ᵥ* P = μ |
| 272 | + |
| 273 | +/-- Powers of a matrix preserve stationary distributions. -/ |
| 274 | +lemma IsStationary.pow (μ : n → R) (P : Matrix n n R) [IsStationary μ P] (k : ℕ) : |
| 275 | + IsStationary μ (P ^ k) := |
| 276 | + ⟨by induction k with |
| 277 | + | zero => simp [Matrix.vecMul_one] |
| 278 | + | succ k ih => rw [pow_succ, ← Matrix.vecMul_vecMul, ih, IsStationary.stationary]⟩ |
| 279 | + |
| 280 | +end Stationary |
| 281 | + |
| 282 | +/-! ### Cesàro averages and existence of stationary distributions -/ |
| 283 | + |
| 284 | +section CesaroAverage |
| 285 | + |
| 286 | +variable {R : Type*} [DivisionRing R] [PartialOrder R] [IsStrictOrderedRing R] |
| 287 | + |
| 288 | +/-- The Cesàro average of the iterates of a vector under a matrix. -/ |
| 289 | +def cesaroAverage (x₀ : n → R) (P : Matrix n n R) (k : ℕ) : n → R := |
| 290 | + (k + 1 : R)⁻¹ • ∑ i ∈ Finset.range (k + 1), x₀ ᵥ* (P ^ i) |
| 291 | + |
| 292 | +variable [Nonempty n] |
| 293 | + |
| 294 | +/-- The uniform distribution on a finite nonempty type. -/ |
| 295 | +@[reducible, nolint unusedArguments] |
| 296 | +def uniformDistribution (R : Type*) (n : Type*) [Fintype n] [DivisionRing R] : |
| 297 | + n → R := fun _ => 1 / Fintype.card n |
| 298 | + |
| 299 | +end CesaroAverage |
| 300 | + |
| 301 | +/-! ### Cesàro averages on the real simplex -/ |
| 302 | + |
| 303 | +section CesaroAverageReal |
| 304 | + |
| 305 | +variable {M : Matrix n n ℝ} |
| 306 | + |
| 307 | +/-- The Cesàro average of a probability vector under a row-stochastic matrix |
| 308 | +belongs to the standard simplex. -/ |
| 309 | +lemma cesaroAverage_mem_stdSimplex (hM : M ∈ rowStochastic ℝ n) |
| 310 | + {x₀ : n → ℝ} (hx₀ : x₀ ∈ stdSimplex ℝ n) (k : ℕ) : |
| 311 | + cesaroAverage x₀ M k ∈ stdSimplex ℝ n := by |
| 312 | + have hmem i : x₀ ᵥ* M ^ i ∈ stdSimplex ℝ n := |
| 313 | + vecMul_mem_stdSimplex (rowStochastic.pow_mem hM i) hx₀ |
| 314 | + refine ⟨fun j => mul_nonneg (inv_nonneg.mpr (by linarith : (0 : ℝ) ≤ k + 1)) |
| 315 | + (by simp only [Finset.sum_apply]; exact sum_nonneg fun i _ => (hmem i).1 j), ?_⟩ |
| 316 | + simp only [cesaroAverage, Pi.smul_apply, smul_eq_mul, Finset.sum_apply, ← mul_sum, |
| 317 | + sum_comm (γ := n)] |
| 318 | + rw [show ∑ i ∈ range (k + 1), ∑ j, (x₀ ᵥ* M ^ i) j = k + 1 from |
| 319 | + (sum_congr rfl fun i _ => (hmem i).2).trans (by simp), mul_comm] |
| 320 | + exact mul_inv_cancel₀ (by linarith : (k : ℝ) + 1 ≠ 0) |
| 321 | + |
| 322 | +omit [DecidableEq n] in |
| 323 | +/-- The L¹ norm of a probability vector is 1. -/ |
| 324 | +private lemma l1_nnnorm_eq_one {x : n → ℝ} (hx : x ∈ stdSimplex ℝ n) : ‖WithLp.toLp 1 x‖₊ = 1 := by |
| 325 | + rw [← NNReal.coe_inj, NNReal.coe_one, l1_nnnorm_eq_sum, |
| 326 | + (sum_congr rfl fun i _ => abs_of_nonneg (hx.1 i) : ∑ i, |x i| = ∑ i, x i), hx.2] |
| 327 | + |
| 328 | +/-- The Cesàro average is almost invariant: applying the matrix changes it by at most `2/(k+1)`. -/ |
| 329 | +lemma norm_cesaroAverage_vecMul_sub_le (hM : M ∈ rowStochastic ℝ n) |
| 330 | + {x₀ : n → ℝ} (hx₀ : x₀ ∈ stdSimplex ℝ n) (k : ℕ) : |
| 331 | + ‖WithLp.toLp 1 (cesaroAverage x₀ M k ᵥ* M - cesaroAverage x₀ M k)‖ ≤ 2 / (k + 1) := by |
| 332 | + have hk : (0 : ℝ) < k + 1 := by linarith |
| 333 | + have hsimp : cesaroAverage x₀ M k ᵥ* M - cesaroAverage x₀ M k = |
| 334 | + (k + 1 : ℝ)⁻¹ • (x₀ ᵥ* M ^ (k + 1) - x₀) := by |
| 335 | + unfold cesaroAverage |
| 336 | + rw [Matrix.smul_vecMul, ← smul_sub, Matrix.sum_vecMul]; congr 1 |
| 337 | + have h1 : ∀ i, (x₀ ᵥ* M ^ i) ᵥ* M = x₀ ᵥ* M ^ (i + 1) := fun i => by |
| 338 | + rw [Matrix.vecMul_vecMul, ← pow_succ] |
| 339 | + simp_rw [h1] |
| 340 | + rw [Finset.sum_range_succ' (fun i => x₀ ᵥ* M ^ i), Finset.sum_range_succ, pow_zero, |
| 341 | + Matrix.vecMul_one]; abel |
| 342 | + rw [hsimp, WithLp.toLp_smul, norm_smul, Real.norm_eq_abs, abs_of_pos (inv_pos.mpr hk)] |
| 343 | + have h1 := vecMul_mem_stdSimplex (rowStochastic.pow_mem hM (k + 1)) hx₀ |
| 344 | + have : ‖WithLp.toLp 1 (x₀ ᵥ* M ^ (k + 1) - x₀)‖ ≤ 2 := calc |
| 345 | + ‖WithLp.toLp 1 (x₀ ᵥ* M ^ (k + 1) - x₀)‖₊ |
| 346 | + _ = ‖WithLp.toLp 1 (x₀ ᵥ* M ^ (k + 1)) - WithLp.toLp 1 x₀‖₊ := by rw [← WithLp.toLp_sub] |
| 347 | + _ ≤ ‖WithLp.toLp 1 (x₀ ᵥ* M ^ (k + 1))‖₊ + ‖WithLp.toLp 1 x₀‖₊ := nnnorm_sub_le _ _ |
| 348 | + _ = 2 := by rw [l1_nnnorm_eq_one h1, l1_nnnorm_eq_one hx₀]; norm_num |
| 349 | + calc (k + 1 : ℝ)⁻¹ * ‖WithLp.toLp 1 (x₀ ᵥ* M ^ (k + 1) - x₀)‖ |
| 350 | + _ ≤ (k + 1 : ℝ)⁻¹ * 2 := by gcongr |
| 351 | + _ = 2 / (k + 1) := by ring |
| 352 | + |
| 353 | +variable [Nonempty n] |
| 354 | + |
| 355 | +omit [DecidableEq n] in |
| 356 | +/-- The uniform distribution belongs to the standard simplex. -/ |
| 357 | +lemma uniformDistribution_mem_stdSimplex : uniformDistribution ℝ (n := n) ∈ stdSimplex ℝ n := |
| 358 | + ⟨fun _ => by simp only [uniformDistribution, one_div, inv_nonneg]; positivity, |
| 359 | + by simp [uniformDistribution, Finset.card_univ, nsmul_eq_mul]⟩ |
| 360 | + |
| 361 | +namespace rowStochastic |
| 362 | + |
| 363 | +/-- Every row-stochastic matrix on a finite nonempty state space has a stationary distribution. -/ |
| 364 | +theorem exists_stationary_distribution (hM : M ∈ rowStochastic ℝ n) : |
| 365 | + ∃ μ : n → ℝ, μ ∈ stdSimplex ℝ n ∧ IsStationary μ M := by |
| 366 | + let x₀ := uniformDistribution ℝ (n := n) |
| 367 | + let xₖ : ℕ → n → ℝ := fun k => cesaroAverage x₀ M k |
| 368 | + have hxₖ k : xₖ k ∈ stdSimplex ℝ n := |
| 369 | + cesaroAverage_mem_stdSimplex hM uniformDistribution_mem_stdSimplex k |
| 370 | + obtain ⟨μ, hμ_mem, nₖ, hnₖ_mono, hnₖ_lim⟩ := (isCompact_stdSimplex n).tendsto_subseq hxₖ |
| 371 | + refine ⟨μ, hμ_mem, ⟨?_⟩⟩ |
| 372 | + have h_lim_diff : Tendsto (fun k => xₖ (nₖ k) ᵥ* M - xₖ (nₖ k)) atTop (nhds (μ ᵥ* M - μ)) := |
| 373 | + ((Continuous.matrix_vecMul continuous_id continuous_const).continuousAt.tendsto.comp |
| 374 | + hnₖ_lim).sub hnₖ_lim |
| 375 | + have h_error_tendsto : Tendsto (fun k => ‖WithLp.toLp 1 (xₖ (nₖ k) ᵥ* M - xₖ (nₖ k))‖) |
| 376 | + atTop (nhds 0) := |
| 377 | + tendsto_of_tendsto_of_tendsto_of_le_of_le tendsto_const_nhds |
| 378 | + ((tendsto_const_nhds.div_atTop tendsto_id).comp |
| 379 | + ((tendsto_natCast_atTop_atTop.comp hnₖ_mono.tendsto_atTop).atTop_add tendsto_const_nhds)) |
| 380 | + (fun _ => norm_nonneg _) |
| 381 | + (fun k => norm_cesaroAverage_vecMul_sub_le hM uniformDistribution_mem_stdSimplex (nₖ k)) |
| 382 | + have h_norm_zero := tendsto_nhds_unique |
| 383 | + (((PiLp.continuous_toLp 1 (fun _ => ℝ)).continuousAt.tendsto.comp h_lim_diff).norm) |
| 384 | + h_error_tendsto |
| 385 | + exact sub_eq_zero.mp ((WithLp.toLp_injective 1).eq_iff.mp (norm_eq_zero.mp h_norm_zero)) |
| 386 | + |
| 387 | +end rowStochastic |
| 388 | + |
| 389 | +end CesaroAverageReal |
0 commit comments