Theory Context_Free_Language
section "Context-Free Languages"
theory Context_Free_Language
imports
"Regular-Sets.Regular_Set"
Renaming_CFG
Disjoint_Union_CFG
begin
subsection ‹Basic Definitions›
text ‹This definition depends on the type of nonterminals of the grammar.›
definition CFL :: "'n itself ⇒ 't list set ⇒ bool" where
"CFL (TYPE('n)) L = (∃P S::'n. L = Lang P S ∧ finite P)"
text ‹Ideally one would existentially quantify over ‹'n› on the right-hand side, but we cannot
quantify over types in HOL. But we can prove that the type is irrelevant because we can always
use another type via renaming.›
text ‹For hiding the infinite type of nonterminals:›
abbreviation cfl :: "'a lang ⇒ bool" where
"cfl L ≡ CFL (TYPE(nat)) L"
subsection ‹Closure Properties›
lemma CFL_Un_closed:
assumes "CFL TYPE('n1) L1" "CFL TYPE('n2) L2"
shows "CFL TYPE(('n1+'n2)option) (L1 ∪ L2)"
proof -
from assms obtain P1 P2 and S1 :: 'n1 and S2 :: 'n2
where L: "L1 = Lang P1 S1" "L2 = Lang P2 S2" and fin: "finite P1" "finite P2"
unfolding CFL_def by blast
let ?f1 = "Some o (Inl:: 'n1 ⇒ 'n1 + 'n2)"
let ?f2 = "Some o (Inr:: 'n2 ⇒ 'n1 + 'n2)"
let ?P1 = "rename_Prods ?f1 P1"
let ?P2 = "rename_Prods ?f2 P2"
let ?S1 = "?f1 S1"
let ?S2 = "?f2 S2"
let ?P = "{(None, [Nt ?S1]), (None, [Nt ?S2])} ∪ (?P1 ∪ ?P2)"
have "Lang ?P None = Lang ?P1 ?S1 ∪ Lang ?P2 ?S2"
by (rule Lang_disj_Un2)(auto simp: Nts_Un in_Nts_rename_Prods)
moreover have "… = Lang P1 S1 ∪ Lang P2 S2"
proof -
have "Lang ?P1 ?S1 = L1" unfolding ‹L1 = _›
by (meson Lang_rename_Prods comp_inj_on inj_Inl inj_Some)
moreover have "Lang ?P2 ?S2 = L2" unfolding ‹L2 = _›
by (meson Lang_rename_Prods comp_inj_on inj_Inr inj_Some)
ultimately show ?thesis using L by argo
qed
moreover have "finite ?P" using fin by auto
ultimately show ?thesis
unfolding CFL_def using L by blast
qed
lemma CFL_concat_closed:
assumes "CFL TYPE('n1) L1" and "CFL TYPE('n2) L2"
shows "CFL TYPE(('n1 + 'n2) option) (L1 @@ L2)"
proof -
obtain P1 S1 where L1_def: "L1 = Lang P1 (S1::'n1)" "finite P1"
using assms(1) CFL_def[of L1] by auto
obtain P2 S2 where L2_def: "L2 = Lang P2 (S2::'n2)" "finite P2"
using assms(2) CFL_def[of L2] by auto
let ?f1 = "Some o (Inl:: 'n1 ⇒ 'n1 + 'n2)"
let ?f2 = "Some o (Inr:: 'n2 ⇒ 'n1 + 'n2)"
let ?P1 = "rename_Prods ?f1 P1"
let ?P2 = "rename_Prods ?f2 P2"
let ?S1 = "?f1 S1"
let ?S2 = "?f2 S2"
let ?S = "None :: ('n1+'n2)option"
let ?P = "{(None, [Nt ?S1, Nt ?S2])} ∪ (?P1 ∪ ?P2)"
have "inj ?f1" by (simp add: inj_on_def)
then have L1r_def: "L1 = Lang ?P1 ?S1"
using L1_def Lang_rename_Prods[of ?f1 P1 S1] inj_on_def by force
have "inj ?f2" by (simp add: inj_on_def)
then have L2r_def: "L2 = Lang ?P2 ?S2"
using L2_def Lang_rename_Prods[of ?f2 P2 S2] inj_on_def by force
have "Lang ?P ?S = L1 @@ L2" unfolding L1r_def L2r_def
by(rule Lang_concat_disj) (auto simp add: disjoint_iff in_Nts_rename_Prods)
moreover have "finite ?P" using ‹finite P1› ‹finite P2› by auto
ultimately show ?thesis unfolding CFL_def by blast
qed
subsection ‹CFG as an Equation System›
text ‹A CFG can be viewed as a system of equations. The least solution is denoted by ‹Lang_lfp›.›
definition inst_sym :: "('n ⇒ 't lang) ⇒ ('n, 't) sym ⇒ 't lang" where
"inst_sym L s = (case s of Tm a ⇒ {[a]} | Nt A ⇒ L A)"
definition concats :: "'a lang list ⇒ 'a lang" where
"concats Ls = foldr (@@) Ls {[]}"
definition inst_syms :: "('n ⇒ 't lang) ⇒ ('n, 't) syms ⇒ 't lang" where
"inst_syms L w = concats (map (inst_sym L) w)"
definition subst_lang :: "('n,'t)Prods ⇒ ('n ⇒ 't lang) ⇒ ('n ⇒ 't lang)" where
"subst_lang P L = (λA. ⋃w ∈ Rhss P A. inst_syms L w)"
definition Lang_lfp :: "('n, 't) Prods ⇒ 'n ⇒ 't lang" where
"Lang_lfp P = lfp (subst_lang P)"
text ‹Now we show that this ‹lfp› is a Kleene fixpoint.›
lemma inst_sym_Sup_range: "inst_sym (Sup(range F)) = (λs. UN i. inst_sym (F i) s)"
by(auto simp: inst_sym_def fun_eq_iff split: sym.splits)
lemma foldr_map_mono: "F ≤ G ⟹ foldr (@@) (map F xs) Ls ⊆ foldr (@@) (map G xs) Ls"
by(induction xs)(auto simp add: le_fun_def subset_eq)
lemma inst_sym_mono: "F ≤ G ⟹ inst_sym F s ⊆ inst_sym G s"
by (auto simp add: inst_sym_def le_fun_def subset_iff split: sym.splits)
lemma foldr_conc_map_inst_sym:
assumes "omega_chain L"
shows "foldr (@@) (map (λs. ⋃i. inst_sym (L i) s) xs) Ls = (⋃i. foldr (@@) (map (inst_sym (L i)) xs) Ls)"
proof(induction xs)
case Nil
then show ?case by simp
next
case (Cons a xs)
show ?case (is "?l = ?r")
proof
show "?l ⊆ ?r"
proof
fix w assume "w ∈ ?l"
with Cons obtain u v i j
where "w = u @ v" "u ∈ inst_sym (L i) a" "v ∈ foldr (@@) (map (inst_sym (L j)) xs) Ls" by(auto)
then show "w ∈ ?r"
using omega_chain_mono[OF assms, of i "max i j"] omega_chain_mono[OF assms, of j "max i j"]
inst_sym_mono foldr_map_mono[of "inst_sym (L j)" "inst_sym (L (max i j))" xs Ls] concI
unfolding le_fun_def by(simp) blast
qed
next
show "?r ⊆ ?l" using Cons by(fastforce)
qed
qed
lemma omega_cont_Lang_lfp: "omega_cont (subst_lang P)"
unfolding omega_cont_def subst_lang_def
proof (safe)
fix L :: "nat ⇒ 'a ⇒ 'b lang"
assume o: "omega_chain L"
show "(λA. ⋃ (inst_syms (Sup (range L)) ` Rhss P A)) = (SUP i. (λA. ⋃ (inst_syms (L i) ` Rhss P A)))"
(is "(λA. ?l A) = (λA. ?r A)")
proof
fix A :: 'a
have "?l A = ⋃(⋃i. (inst_syms (L i) ` Rhss P A))"
by(auto simp: inst_syms_def inst_sym_Sup_range concats_def foldr_conc_map_inst_sym[OF o])
also have "… = ?r A"
by(auto)
finally show "?l A = ?r A" .
qed
qed
theorem Lang_lfp_SUP: "Lang_lfp P = (SUP n. ((subst_lang P)^^n) (λA. {}))"
using Kleene_lfp[OF omega_cont_Lang_lfp] unfolding Lang_lfp_def bot_fun_def by blast
subsection ‹‹Lang_lfp = Lang››
text ‹We prove that the fixpoint characterization of the language defined by a CFG is equivalent
to the standard language definition via derivations. Both directions are proved separately›
lemma inst_syms_mono: "(⋀A. R A ⊆ R' A) ⟹ w ∈ inst_syms R α ⟹ w ∈ inst_syms R' α"
unfolding inst_syms_def concats_def
by (metis (no_types, lifting) foldr_map_mono in_mono inst_sym_mono le_fun_def)
lemma omega_cont_Lang_lfp_iterates: "omega_chain (λn. ((subst_lang P)^^n) (λA. {}))"
using omega_chain_iterates[OF mono_if_omega_cont, OF omega_cont_Lang_lfp]
unfolding bot_fun_def by blast
lemma in_subst_langD_inst_syms: "w ∈ subst_lang P L A ⟹ ∃α. (A,α)∈P ∧ w ∈ inst_syms L α"
unfolding subst_lang_def inst_syms_def Rhss_def by (auto split: prod.splits)
lemma foldr_conc_conc: "foldr (@@) xs {[]} @@ A = foldr (@@) xs A"
by (induction xs)(auto simp: conc_assoc)
lemma derives_if_inst_syms:
"w ∈ inst_syms (λA. {w. P ⊢ [Nt A] ⇒* map Tm w}) α ⟹ P ⊢ α ⇒* map Tm w"
proof (induction α arbitrary: w)
case Nil
then show ?case unfolding inst_syms_def concats_def by(auto)
next
case (Cons s α)
show ?case
proof (cases s)
case (Nt A)
then show ?thesis using Cons
unfolding inst_syms_def concats_def inst_sym_def by(fastforce simp: derives_Cons_decomp)
next
case (Tm a)
then show ?thesis using Cons
unfolding inst_syms_def concats_def inst_sym_def by(auto simp:derives_Tm_Cons)
qed
qed
lemma derives_if_in_subst_lang: "w ∈ ((subst_lang P)^^n) (λA. {}) A ⟹ P ⊢ [Nt A] ⇒* map Tm w"
proof(induction n arbitrary: w A)
case 0
then show ?case by simp
next
case (Suc n)
let ?L = "((subst_lang P)^^n) (λA. {})"
have *: "?L A ⊆ {w. P ⊢ [Nt A] ⇒* map Tm w}" for A
using Suc.IH by blast
obtain α where α: "(A,α) ∈ P" "w ∈ inst_syms ?L α"
using in_subst_langD_inst_syms[OF Suc.prems[simplified]] by blast
show ?case using α(1) derives_if_inst_syms[OF inst_syms_mono[OF *, of _ "λA. A", OF α(2)]]
by (simp add: derives_Cons_rule)
qed
lemma derives_if_Lang_lfp: "w ∈ Lang_lfp P A ⟹ P ⊢ [Nt A] ⇒* map Tm w"
unfolding Lang_lfp_SUP using derives_if_in_subst_lang
by (metis (mono_tags, lifting) SUP_apply UN_E)
lemma Lang_lfp_subset_Lang: "Lang_lfp P A ⊆ Lang P A"
unfolding Lang_def by(blast intro:derives_if_Lang_lfp)
text ‹The other direction:›
lemma inst_syms_decomp:
"⟦ ∀i < length ws. ws ! i ∈ inst_sym L (α ! i); length α = length ws ⟧
⟹ concat ws ∈ inst_syms L α"
proof (induction ws arbitrary: α)
case Nil
then show ?case unfolding inst_syms_def concats_def by simp
next
case (Cons w ws)
then obtain α1 αr where *: "α = α1 # αr" by (metis Suc_length_conv)
with Cons.prems(2) have "length αr = length ws" by simp
moreover from Cons.prems * have "∀i<length ws. ws ! i ∈ inst_sym L (αr ! i)" by auto
ultimately have "concat ws ∈ inst_syms L αr" using Cons.IH by blast
moreover from Cons.prems * have "w ∈ inst_sym L α1" by fastforce
ultimately show ?case unfolding inst_syms_def concats_def using * by force
qed
lemma Lang_lfp_if_derives_aux: "P ⊢ [Nt A] ⇒(n) map Tm w ⟹ w ∈ ((subst_lang P)^^n) (λA. {}) A"
proof(induction n arbitrary: w A rule: less_induct)
case (less n)
show ?case
proof (cases n)
case 0 then show ?thesis using less.prems by auto
next
case (Suc m)
then obtain α where α_intro: "(A,α) ∈ P" "P ⊢ α ⇒(m) map Tm w"
by (metis deriven_start1 less.prems nat.inject)
then obtain ws ms where *:
"w = concat ws ∧ length α = length ws ∧ length α = length ms
∧ sum_list ms = m ∧ (∀i < length ws. P ⊢ [α ! i] ⇒(ms ! i) map Tm (ws ! i))"
using derive_decomp_Tm by metis
have "∀i < length ws. ws ! i ∈ inst_sym (λA. ((subst_lang P)^^m) (λA. {}) A) (α ! i)"
proof (rule allI | rule impI)+
fix i
show "i < length ws ⟹ ws ! i ∈ inst_sym ((subst_lang P ^^ m) (λA. {})) (α ! i)"
unfolding inst_sym_def
proof (induction "α ! i")
case (Nt B)
with * have **: "ms ! i ≤ m"
by (metis elem_le_sum_list)
with Suc have "ms ! i < n" by force
from less.IH[OF this, of B "ws ! i"] Nt *
have "ws ! i ∈ (subst_lang P ^^ (ms ! i)) (λA. {}) B" by fastforce
with omega_chain_mono[OF omega_cont_Lang_lfp_iterates, OF **]
have "ws ! i ∈ (subst_lang P ^^ m) (λA. {}) B" by (metis le_funD subset_iff)
with Nt show ?case by (metis sym.simps(5))
next
case (Tm a)
with * have "P ⊢ map Tm [a] ⇒(ms ! i) map Tm (ws ! i)" by fastforce
then have "ws ! i ∈ {[a]}" using deriven_from_TmsD by fastforce
with Tm show ?case by (metis sym.simps(6))
qed
qed
from inst_syms_decomp[OF this] * have "w ∈ inst_syms ((subst_lang P ^^ m) (λA. {})) α" by argo
with α_intro have "w ∈ (subst_lang P) (λA. (subst_lang P ^^ m) (λA. {}) A) A"
unfolding subst_lang_def Rhss_def by force
with Suc show ?thesis by force
qed
qed
lemma Lang_lfp_if_derives: "P ⊢ [Nt A] ⇒* map Tm w ⟹ w ∈ Lang_lfp P A"
proof -
assume "P ⊢ [Nt A] ⇒* map Tm w"
then obtain n where "P ⊢ [Nt A] ⇒(n) map Tm w" by (meson rtranclp_power)
from Lang_lfp_if_derives_aux[OF this] have "w ∈ ((subst_lang P)^^n) (λA. {}) A" by argo
with Lang_lfp_SUP show "w ∈ Lang_lfp P A" by (metis (mono_tags, lifting) SUP_apply UNIV_I UN_iff)
qed
theorem Lang_lfp_eq_Lang: "Lang_lfp P A = Lang P A"
unfolding Lang_def by(blast intro: Lang_lfp_if_derives derives_if_Lang_lfp)
end