|
| 1 | +/- |
| 2 | +Copyright (c) 2026 Akhilesh Balaji. All rights reserved. |
| 3 | +Released under Apache 2.0 license as described in the file LICENSE. |
| 4 | +Authors: Akhilesh Balaji, Ching-Tsun Chou |
| 5 | +-/ |
| 6 | + |
| 7 | +module |
| 8 | + |
| 9 | +public import Cslib.Computability.Languages.RegularLanguage |
| 10 | +public import Mathlib.Data.Finite.Card |
| 11 | + |
| 12 | +/-! # Myhill-Nerode Theorem |
| 13 | +
|
| 14 | +Let `l` be a language on an alphabet `α`. The Nerode congruence (referred to as `c_l` |
| 15 | +in comments below) of a language `l` is a right congruence on strings where two strings are |
| 16 | +related iff all their right extensions are either both in the language or both not in it. |
| 17 | +
|
| 18 | +The Myhill-Nerode theorem has three parts [WikipediaMyhillNerode2026]: |
| 19 | +
|
| 20 | +(1) `l` is regular iff `c_l` has a finite number `N` of equivalence classes. |
| 21 | +
|
| 22 | +(2) `N` is the number of states of the minimal DFA accepting `l`. |
| 23 | +
|
| 24 | +(3) The minimal DFA is unique up to unique isomorphism. That is, for any |
| 25 | + minimal DFA accepting `l`, there exists exactly an isomorphism from it to the |
| 26 | + canonical DFA whose states are the equivalence classses of `c_l`, whose |
| 27 | + state transitions are of the form `⟦ x ⟧ → ⟦ x ++ [a] ⟧` (where `a : α` |
| 28 | + and `x : List α`), whose initial state is `⟦ [] ⟧`, and whose accepting states |
| 29 | + are `{ ⟦ x ⟧ | x ∈ l }`. |
| 30 | +
|
| 31 | +## References |
| 32 | +
|
| 33 | +* [J. E. Hopcroft, R. Motwani, J. D. Ullman, |
| 34 | + *Introduction to Automata Theory, Languages, and Computation*][Hopcroft2006] |
| 35 | +* [T. Malkin, *COMS W3261: Computer Science Theory, Handout 3: The Myhill-Nerode Theorem |
| 36 | + and Implications*][Malkin2024] |
| 37 | +* [Wikipedia contributors, Myhill–Nerode theorem][WikipediaMyhillNerode2026] |
| 38 | +-/ |
| 39 | + |
| 40 | +@[expose] public section |
| 41 | + |
| 42 | +variable {α : Type} |
| 43 | + |
| 44 | +namespace Language |
| 45 | + |
| 46 | +open Cslib Language Automata DA FinAcc Acceptor Function |
| 47 | +open scoped RightCongruence |
| 48 | + |
| 49 | +/-- The Nerode congruence (henceforth called `c_l`) of a language `l` is a right congruence on |
| 50 | +strings where two strings are related iff all their right extensions are either both in the language |
| 51 | +or both not in it. -/ |
| 52 | +@[implicit_reducible] |
| 53 | +def NerodeCongruence (l : Language α) : RightCongruence α where |
| 54 | + r x y := ∀ z, x ++ z ∈ l ↔ y ++ z ∈ l |
| 55 | + iseqv.refl := by grind |
| 56 | + iseqv.symm := by grind |
| 57 | + iseqv.trans := by grind |
| 58 | + right_cov.elim := by grind [Covariant] |
| 59 | + |
| 60 | +/-- The quotient type of a Nerode congruence. -/ |
| 61 | +abbrev NerodeQuotient (l : Language α) := Quotient l.NerodeCongruence.eq |
| 62 | + |
| 63 | +/-- The Nerode congruence of a language `l` gives rise to a DFA where each state corresponds to an |
| 64 | +equivalence class of the language under the Nerode congruence. Note that this is simply the DFA |
| 65 | +given rise to by the underlying right congruence with only the accept states specified here as |
| 66 | +`{⟦ x ⟧ | x ∈ l}`. -/ |
| 67 | +def NerodeCongruenceDA (l : Language α) : DA.FinAcc (l.NerodeQuotient) α := |
| 68 | + FinAcc.mk l.NerodeCongruence.toDA ((⟦·⟧) '' l) |
| 69 | + |
| 70 | +variable {l : Language α} |
| 71 | + |
| 72 | +/-- The DFA constructed from the Nerode congruence `c_l` on `l` accepts `l`. -/ |
| 73 | +@[simp, scoped grind =] |
| 74 | +theorem nerodeCongruenceDA_language_eq (l : Language α) : |
| 75 | + language (l.NerodeCongruenceDA) = l := by |
| 76 | + ext x |
| 77 | + simp only [NerodeCongruenceDA, language, Acceptor.Accepts, congr_mtr_eq, Set.mem_image] |
| 78 | + constructor |
| 79 | + · rintro ⟨y, hy, heq⟩ |
| 80 | + have h1 := Quotient.eq.mp heq [] |
| 81 | + simp only [List.append_nil] at h1 |
| 82 | + simpa [← h1] |
| 83 | + · intro hx |
| 84 | + use x, hx |
| 85 | + |
| 86 | +/-- The statement (two strings are related by the Nerode congruence `c_l` on `l` iff all their right |
| 87 | +extensions are either both in the language or both not in it) is equivalent to stating that (all |
| 88 | +their right extensions are either both accepted or rejected by the DFA given rise to by `c_l`.) -/ |
| 89 | +theorem da_nerodeCongruence_iff {State : Type*} (M : DA.FinAcc State α) (x y : List α) : |
| 90 | + ((language M).NerodeCongruence).r x y ↔ |
| 91 | + ∀ z, M.mtr (M.mtr M.start x) z ∈ M.accept ↔ M.mtr (M.mtr M.start y) z ∈ M.accept := by |
| 92 | + simp only [FLTS.mtr, ← List.foldl_append] |
| 93 | + rfl |
| 94 | + |
| 95 | +/-- If `l` is regular then the Nerode congruence on `l` has finitely many equivalence classes. -/ |
| 96 | +theorem IsRegular.finite_nerodeQuotient (h : l.IsRegular) : |
| 97 | + Finite (l.NerodeQuotient) := by |
| 98 | + obtain ⟨State, hFin, M, hM⟩ := IsRegular.iff_dfa.mp h |
| 99 | + rw [← hM] |
| 100 | + apply Finite.of_surjective (fun s ↦ ⟦ Classical.epsilon (fun x ↦ M.mtr M.start x = s) ⟧) |
| 101 | + intro q |
| 102 | + induction q using Quotient.inductionOn with |
| 103 | + | h x => |
| 104 | + use M.mtr M.start x |
| 105 | + apply Quotient.sound |
| 106 | + simp [da_nerodeCongruence_iff, |
| 107 | + @Classical.epsilon_spec _ (fun y ↦ M.mtr M.start y = M.mtr M.start x) ⟨x, rfl⟩] |
| 108 | + |
| 109 | +-- Myhill-Nerode (1) |
| 110 | + |
| 111 | +/-- `l` is regular iff the Nerode congruence on `l` has finitely many equivalence classes. -/ |
| 112 | +@[scoped grind =] |
| 113 | +theorem IsRegular.iff_finite_nerodeQuotient {l : Language α} : |
| 114 | + l.IsRegular ↔ Finite (l.NerodeQuotient) := by |
| 115 | + constructor |
| 116 | + · intro h |
| 117 | + exact IsRegular.finite_nerodeQuotient h |
| 118 | + · intro h |
| 119 | + apply IsRegular.iff_dfa.mpr |
| 120 | + use l.NerodeQuotient, h, l.NerodeCongruenceDA, nerodeCongruenceDA_language_eq l |
| 121 | + |
| 122 | +/-- Given a set of strings all distinguishable by `l` (i.e., not related to each other by the Nerode |
| 123 | +congruence on `l`), the number of states in the DFA accepting `l` is at least the number of strings |
| 124 | +in the set. -/ |
| 125 | +theorem dfa_num_state_ge |
| 126 | + {l : Language α} {ws : Set (List α)} [Finite ws] |
| 127 | + (hws : ws.Pairwise (¬ (l.NerodeCongruence).r · ·)) |
| 128 | + {State : Type*} [Finite State] {M : DA.FinAcc State α} (hm : language M = l) : |
| 129 | + Nat.card State ≥ Nat.card ws := by |
| 130 | + -- In this proof it is easier to work with `Fintype` rather than `Finite` because of the use of |
| 131 | + -- the theorem `Fintype.exists_ne_map_eq_of_card_lt` below. |
| 132 | + have : Fintype State := Fintype.ofFinite _ |
| 133 | + have : Fintype ws := Fintype.ofFinite _ |
| 134 | + simp only [Nat.card_eq_fintype_card] |
| 135 | + by_contra! h |
| 136 | + by_cases h_card : Fintype.card ws ≤ 1 |
| 137 | + · grind [Fintype.card_pos_iff.mpr ⟨M.start⟩] |
| 138 | + · obtain ⟨⟨x, hx⟩, ⟨y, hy⟩, _, _⟩ := |
| 139 | + Fintype.exists_ne_map_eq_of_card_lt (f := fun w : ws ↦ M.mtr M.start w) h |
| 140 | + grind [hws hx hy, da_nerodeCongruence_iff M x y] |
| 141 | + |
| 142 | +-- Myhill-Nerode (2) |
| 143 | + |
| 144 | +/-- All DFAs accepting `l` must have at least as many states as the number of equivalence classes |
| 145 | +of the Nerode congruence on `l`. -/ |
| 146 | +theorem dfa_num_state_min {State : Type} {M : DA.FinAcc State α} [Finite State] : |
| 147 | + Nat.card State ≥ Nat.card (language M).NerodeQuotient := by |
| 148 | + let ws : Set (List α) := Set.range (Quotient.out : NerodeQuotient (language M) → List α) |
| 149 | + have : Finite (language M).NerodeQuotient := |
| 150 | + IsRegular.iff_finite_nerodeQuotient.mp (IsRegular.iff_dfa.mpr ⟨State, inferInstance, M, rfl⟩) |
| 151 | + have : Finite ws := Set.finite_range _ |>.to_subtype |
| 152 | + have hws : ws.Pairwise (fun x y ↦ ¬((language M).NerodeCongruence).r x y) := by |
| 153 | + rintro _ ⟨qx, rfl⟩ _ ⟨qy, rfl⟩ hne h |
| 154 | + apply hne |
| 155 | + simpa using Quotient.sound h |
| 156 | + have h1 := dfa_num_state_ge hws rfl |
| 157 | + rw [Nat.card_congr (Equiv.ofInjective _ Quotient.out_injective).symm] at h1 |
| 158 | + assumption |
| 159 | + |
| 160 | +end Language |
| 161 | + |
| 162 | +namespace Cslib.Automata.DA.FinAcc |
| 163 | + |
| 164 | +open Cslib Language Automata DA FinAcc Acceptor |
| 165 | +open scoped RightCongruence |
| 166 | + |
| 167 | +/-- The minimal DFA accepting `l` has the same number of states as the number of equivalence classes |
| 168 | +of the Nerode congruence on `l`. -/ |
| 169 | +def IsMinimalAutomaton {State : Type*} (M : FinAcc State α) (l : Language α) := |
| 170 | + language M = l ∧ Nat.card State = Nat.card l.NerodeQuotient |
| 171 | + |
| 172 | +/-- Given a DFA `M`, two strings are related iff they reach the same state under when run through |
| 173 | +`M`. The Nerode congruence is the state congruence with respect to the minimal DFA accepting `l`. -/ |
| 174 | +@[implicit_reducible] |
| 175 | +def StateCongruence (M : FinAcc State α) : RightCongruence α where |
| 176 | + r x y := ∀ z, M.mtr M.start (x ++ z) = M.mtr M.start (y ++ z) |
| 177 | + iseqv.refl := by grind |
| 178 | + iseqv.symm := by grind |
| 179 | + iseqv.trans := by grind |
| 180 | + right_cov.elim := by grind [Covariant] |
| 181 | + |
| 182 | +variable {M : FinAcc State α} |
| 183 | + |
| 184 | +/-- The Nerode congruence is the most coarse state congruence given a language. -/ |
| 185 | +theorem stateCongruence_le_nerodeCongruence {x y : List α} |
| 186 | + (h : (StateCongruence M).r x y) : ((language M).NerodeCongruence).r x y := by |
| 187 | + intro z |
| 188 | + grind [h z, language, Acceptor.Accepts, FLTS.mtr_concat_eq] |
| 189 | + |
| 190 | +-- Myhill-Nerode (3) |
| 191 | + |
| 192 | +/-- The minimal DFA `M` accepting the language `l` is unique up to unique isomorphism. -/ |
| 193 | +theorem unique_minimal [Finite State] |
| 194 | + (l : Language α) (hr : l.IsRegular) (hm : M.IsMinimalAutomaton l) : |
| 195 | + ∃! φ : State ≃ l.NerodeQuotient, ∀ x, φ (M.mtr M.start x) = ⟦ x ⟧ := by |
| 196 | + obtain ⟨rfl, hc⟩ := hm |
| 197 | + have := Language.IsRegular.iff_finite_nerodeQuotient.mp hr |
| 198 | + let φ : State → Quotient ((language M).NerodeCongruence).eq := |
| 199 | + fun s ↦ ⟦ Classical.epsilon (fun x : List α ↦ M.mtr M.start x = s) ⟧ |
| 200 | + have hφ (x : List α) : φ (M.mtr M.start x) = ⟦ x ⟧ := by |
| 201 | + apply Quotient.sound |
| 202 | + apply stateCongruence_le_nerodeCongruence |
| 203 | + intro z |
| 204 | + have := @Classical.epsilon_spec _ (fun y : List α ↦ M.mtr M.start y = M.mtr M.start x) ⟨x, rfl⟩ |
| 205 | + grind [FLTS.mtr] |
| 206 | + have hφ_surj : Function.Surjective φ := fun q ↦ q.inductionOn (fun x ↦ ⟨M.mtr M.start x, hφ x⟩) |
| 207 | + have hφ_inj : Function.Injective φ := by |
| 208 | + have eqT := Classical.inhabited_of_nonempty <| Finite.card_eq.mp hc |
| 209 | + apply hφ_surj.injective_of_finite eqT.default |
| 210 | + use Equiv.ofBijective φ ⟨hφ_inj, hφ_surj⟩, hφ |
| 211 | + intro ψ hψ |
| 212 | + ext s |
| 213 | + induction h : φ s using Quotient.inductionOn with |
| 214 | + | h x => grind [hφ_inj ((hφ x).trans h.symm)] |
| 215 | + |
| 216 | +end Cslib.Automata.DA.FinAcc |
0 commit comments