(* File: Non_Regular_Languages.thy Author: Manuel Eberl <eberlm@in.tum.de> This file provides some tools for showing the non-regularity of a language, either via an infinite set of equivalence classes or via the Pumping Lemma. *) section ‹Tools for showing non-regularity of a language› theory Non_Regular_Languages imports Myhill begin subsection ‹Auxiliary material› lemma bij_betw_image_quotient: "bij_betw (λy. f -` {y}) (f ` A) (A // {(a,b). f a = f b})" by (force simp: bij_betw_def inj_on_def image_image quotient_def) lemma regular_Derivs_finite: fixes r :: "'a :: finite rexp" shows "finite (range (λw. Derivs w (lang r)))" proof - have "?thesis ⟷ finite (UNIV // ≈lang r)" unfolding str_eq_conv_Derivs by (rule bij_betw_finite bij_betw_image_quotient)+ also have "…" by (subst Myhill_Nerode [symmetric]) auto finally show ?thesis . qed lemma Nil_in_Derivs_iff: "[] ∈ Derivs w A ⟷ w ∈ A" by (auto simp: Derivs_def) (* TODO: Move to distribution? *) text ‹ The following operation repeats a list $n$ times (usually written as $w ^ n$). › primrec repeat :: "nat ⇒ 'a list ⇒ 'a list" where "repeat 0 xs = []" | "repeat (Suc n) xs = xs @ repeat n xs" lemma repeat_Cons_left: "repeat (Suc n) xs = xs @ repeat n xs" by simp lemma repeat_Cons_right: "repeat (Suc n) xs = repeat n xs @ xs" by (induction n) simp_all lemma repeat_Cons_append_commute [simp]: "repeat n xs @ xs = xs @ repeat n xs" by (subst repeat_Cons_right [symmetric]) simp lemma repeat_Cons_add [simp]: "repeat (m + n) xs = repeat m xs @ repeat n xs" by (induction m) simp_all lemma repeat_Nil [simp]: "repeat n [] = []" by (induction n) simp_all lemma repeat_conv_replicate: "repeat n xs = concat (replicate n xs)" by (induction n) simp_all (* TODO: Move to distribution? *) lemma nth_prefixes [simp]: "n ≤ length xs ⟹ prefixes xs ! n = take n xs" by (induction xs arbitrary: n) (auto simp: nth_Cons split: nat.splits) lemma nth_suffixes [simp]: "n ≤ length xs ⟹ suffixes xs ! n = drop (length xs - n) xs" by (subst suffixes_conv_prefixes) (simp_all add: rev_take) lemma length_take_prefixes: assumes "xs ∈ set (take n (prefixes ys))" shows "length xs < n" proof (cases "n ≤ Suc (length ys)") case True with assms obtain i where "i < n" "xs = take i ys" by (subst (asm) nth_image [symmetric]) auto thus ?thesis by simp next case False with assms have "prefix xs ys" by simp hence "length xs ≤ length ys" by (rule prefix_length_le) also from False have "… < n" by simp finally show ?thesis . qed subsection ‹Non-regularity by giving an infinite set of equivalence classes› text ‹ Non-regularity can be shown by giving an infinite set of non-equivalent words (w.r.t. the Myhill--Nerode relation). › lemma not_regular_langI: assumes "infinite B" "⋀x y. x ∈ B ⟹ y ∈ B ⟹ x ≠ y ⟹ ∃w. ¬(x @ w ∈ A ⟷ y @ w ∈ A)" shows "¬regular_lang (A :: 'a :: finite list set)" proof - have "(λw. Derivs w A) ` B ⊆ range (λw. Derivs w A)" by blast moreover from assms(2) have "inj_on (λw. Derivs w A) B" by (auto simp: inj_on_def Derivs_def) with assms(1) have "infinite ((λw. Derivs w A) ` B)" by (blast dest: finite_imageD) ultimately have "infinite (range (λw. Derivs w A))" by (rule infinite_super) with regular_Derivs_finite show ?thesis by blast qed lemma not_regular_langI': assumes "infinite B" "⋀x y. x ∈ B ⟹ y ∈ B ⟹ x ≠ y ⟹ ∃w. ¬(f x @ w ∈ A ⟷ f y @ w ∈ A)" shows "¬regular_lang (A :: 'a :: finite list set)" proof (rule not_regular_langI) from assms(2) have "inj_on f B" by (force simp: inj_on_def) with ‹infinite B› show "infinite (f ` B)" by (simp add: finite_image_iff) qed (insert assms, auto) subsection ‹The Pumping Lemma› text ‹ The Pumping lemma can be shown very easily from the Myhill--Nerode theorem: if we have a word whose length is more than the (finite) number of equivalence classes, then it must have two different prefixes in the same class and the difference between these two prefixes can then be ``pumped''. › lemma pumping_lemma_aux: fixes A :: "'a list set" defines "δ ≡ λw. Derivs w A" defines "n ≡ card (range δ)" assumes "z ∈ A" "finite (range δ)" "length z ≥ n" shows "∃u v w. z = u @ v @ w ∧ length (u @ v) ≤ n ∧ v ≠ [] ∧ (∀i. u @ repeat i v @ w ∈ A)" proof - define P where "P = set (take (Suc n) (prefixes z))" from ‹length z ≥ n› have [simp]: "card P = Suc n" unfolding P_def by (subst distinct_card) (auto intro!: distinct_take) have length_le: "length y ≤ n" if "y ∈ P" for y using length_take_prefixes[OF that [unfolded P_def]] by simp have "card (δ ` P) ≤ card (range δ)" by (intro card_mono assms) auto also from assms have "… < card P" by simp finally have "¬inj_on δ P" by (rule pigeonhole) then obtain a b where ab: "a ∈ P" "b ∈ P" "a ≠ b" "Derivs a A = Derivs b A" by (auto simp: inj_on_def δ_def) from ab have prefix_ab: "prefix a z" "prefix b z" by (auto simp: P_def dest: in_set_takeD) from ab have length_ab: "length a ≤ n" "length b ≤ n" by (simp_all add: length_le) have *: ?thesis if uz': "prefix u z'" "prefix z' z" "length z' ≤ n" "u ≠ z'" "Derivs z' A = Derivs u A" for u z' proof - from ‹prefix u z'› and ‹u ≠ z'› obtain v where v [simp]: "z' = u @ v" "v ≠ []" by (auto simp: prefix_def) from ‹prefix z' z› obtain w where [simp]: "z = u @ v @ w" by (auto simp: prefix_def) hence [simp]: "Derivs (repeat i v) (Derivs u A) = Derivs u A" for i by (induction i) (use uz' in simp_all) have "Derivs z A = Derivs (u @ repeat i v @ w) A" for i using uz' by simp with ‹z ∈ A› and uz' have "∀i. u @ repeat i v @ w ∈ A" by (simp add: Nil_in_Derivs_iff [of _ A, symmetric]) moreover have "z = u @ v @ w" by simp moreover from ‹length z' ≤ n› have "length (u @ v) ≤ n" by simp ultimately show ?thesis using ‹v ≠ []› by blast qed from prefix_ab have "prefix a b ∨ prefix b a" by (rule prefix_same_cases) with *[of a b] and *[of b a] and ab and prefix_ab and length_ab show ?thesis by blast qed theorem pumping_lemma: fixes r :: "'a :: finite rexp" obtains n where "⋀z. z ∈ lang r ⟹ length z ≥ n ⟹ ∃u v w. z = u @ v @ w ∧ length (u @ v) ≤ n ∧ v ≠ [] ∧ (∀i. u @ repeat i v @ w ∈ lang r)" proof - let ?n = "card (range (λw. Derivs w (lang r)))" have "∃u v w. z = u @ v @ w ∧ length (u @ v) ≤ ?n ∧ v ≠ [] ∧ (∀i. u @ repeat i v @ w ∈ lang r)" if "z ∈ lang r" and "length z ≥ ?n" for z by (intro pumping_lemma_aux[of z] that regular_Derivs_finite) thus ?thesis by (rule that) qed corollary pumping_lemma_not_regular_lang: fixes A :: "'a :: finite list set" assumes "⋀n. length (z n) ≥ n" and "⋀n. z n ∈ A" assumes "⋀n u v w. z n = u @ v @ w ⟹ length (u @ v) ≤ n ⟹ v ≠ [] ⟹ u @ repeat (i n u v w) v @ w ∉ A" shows "¬regular_lang A" proof assume "regular_lang A" then obtain r where r: "lang r = A" by blast from pumping_lemma[of r] guess n . from this[of "z n"] and assms[of n] obtain u v w where "z n = u @ v @ w" and "length (u @ v) ≤ n" and "v ≠ []" and "⋀i. u @ repeat i v @ w ∈ lang r" by (auto simp: r) with assms(3)[of n u v w] show False by (auto simp: r) qed subsection ‹Examples› text ‹ The language of all words containing the same number of $a$s and $b$s is not regular. › lemma "¬regular_lang {w. length (filter id w) = length (filter Not w)}" (is "¬regular_lang ?A") proof (rule not_regular_langI') show "infinite (UNIV :: nat set)" by simp next fix m n :: nat assume "m ≠ n" hence "replicate m True @ replicate m False ∈ ?A" and "replicate n True @ replicate m False ∉ ?A" by simp_all thus "∃w. ¬(replicate m True @ w ∈ ?A ⟷ replicate n True @ w ∈ ?A)" by blast qed text ‹ The language $\{a^i b^i\ |\ i \in \mathbb{N}\}$ is not regular. › lemma eq_replicate_iff: "xs = replicate n x ⟷ set xs ⊆ {x} ∧ length xs = n" using replicate_length_same[of xs x] by (subst eq_commute) auto lemma replicate_eq_appendE: assumes "xs @ ys = replicate n x" obtains i j where "n = i + j" "xs = replicate i x" "ys = replicate j x" proof - have "n = length (replicate n x)" by simp also note assms [symmetric] finally have "n = length xs + length ys" by simp moreover have "xs = replicate (length xs) x" and "ys = replicate (length ys) x" using assms by (auto simp: eq_replicate_iff) ultimately show ?thesis using that[of "length xs" "length ys"] by auto qed lemma "¬regular_lang (range (λi. replicate i True @ replicate i False))" (is "¬regular_lang ?A") proof (rule pumping_lemma_not_regular_lang) fix n :: nat show "length (replicate n True @ replicate n False) ≥ n" by simp show "replicate n True @ replicate n False ∈ ?A" by simp next fix n :: nat and u v w :: "bool list" assume decomp: "replicate n True @ replicate n False = u @ v @ w" and length_le: "length (u @ v) ≤ n" and v_ne: "v ≠ []" define w1 w2 where "w1 = take (n - length (u@v)) w" and "w2 = drop (n - length (u@v)) w" have w_decomp: "w = w1 @ w2" by (simp add: w1_def w2_def) have "replicate n True = take n (replicate n True @ replicate n False)" by simp also note decomp also have "take n (u @ v @ w) = u @ v @ w1" using length_le by (simp add: w1_def) finally have "u @ v @ w1 = replicate n True" by simp then obtain i j k where uvw1: "n = i + j + k" "u = replicate i True" "v = replicate j True" "w1 = replicate k True" by (elim replicate_eq_appendE) auto have "replicate n False = drop n (replicate n True @ replicate n False)" by simp also note decomp finally have [simp]: "w2 = replicate n False" using length_le by (simp add: w2_def) have "u @ repeat (Suc (Suc 0)) v @ w = replicate (n + j) True @ replicate n False" by (simp add: uvw1 w_decomp replicate_add [symmetric]) also have "… ∉ ?A" proof safe fix m assume *: "replicate (n + j) True @ replicate n False = replicate m True @ replicate m False" (is "?lhs = ?rhs") have "n = length (filter Not ?lhs)" by simp also note * also have "length (filter Not ?rhs) = m" by simp finally have [simp]: "m = n" by simp from * have "v = []" by (simp add: uvw1) with ‹v ≠ []› show False by contradiction qed finally show "u @ repeat (Suc (Suc 0)) v @ w ∉ ?A" . qed end