section ‹Word Problem for Context-Free Grammars› (*<*) theory Context_Free_Grammar imports Coinductive_Language "HOL-Library.FSet" begin (*>*) section ‹Context Free Languages› text ‹ A context-free grammar consists of a list of productions for every nonterminal and an initial nonterminal. The productions are required to be in weak Greibach normal form, i.e. each right hand side of a production must either be empty or start with a terminal. › abbreviation "wgreibach α ≡ (case α of (Inr N # _) ⇒ False | _ ⇒ True)" record ('t, 'n) cfg = init :: "'n :: finite" prod :: "'n ⇒ ('t + 'n) list fset" context fixes G :: "('t, 'n :: finite) cfg" begin inductive in_cfl where "in_cfl [] []" | "in_cfl α w ⟹ in_cfl (Inl a # α) (a # w)" | "fBex (prod G N) (λβ. in_cfl (β @ α) w) ⟹ in_cfl (Inr N # α) w" abbreviation lang_trad where "lang_trad ≡ {w. in_cfl [Inr (init G)] w}" fun 𝔬⇩_{P}where "𝔬⇩_{P}[] = True" | "𝔬⇩_{P}(Inl _ # _) = False" | "𝔬⇩_{P}(Inr N # α) = ([] |∈| prod G N ∧ 𝔬⇩_{P}α)" fun 𝔡⇩_{P}where "𝔡⇩_{P}[] a = {||}" | "𝔡⇩_{P}(Inl b # α) a = (if a = b then {|α|} else {||})" | "𝔡⇩_{P}(Inr N # α) a = (λβ. tl β @ α) |`| ffilter (λβ. β ≠ [] ∧ hd β = Inl a) (prod G N) |∪| (if [] |∈| prod G N then 𝔡⇩_{P}α a else {||})" primcorec subst :: "('t + 'n) list fset ⇒ 't language" where "subst P = Lang (fBex P 𝔬⇩_{P}) (λa. subst (ffUnion ((λr. 𝔡⇩_{P}r a) |`| P)))" inductive in_cfls where "fBex P 𝔬⇩_{P}⟹ in_cfls P []" | "in_cfls (ffUnion ((λα. 𝔡⇩_{P}α a) |`| P)) w ⟹ in_cfls P (a # w)" inductive_cases [elim!]: "in_cfls P []" inductive_cases [elim!]: "in_cfls P (a # w)" declare inj_eq[OF bij_is_inj[OF to_language_bij], simp] lemma subst_in_cfls: "subst P = to_language {w. in_cfls P w}" by (coinduction arbitrary: P) (auto intro: in_cfls.intros) lemma 𝔬⇩_{P}_in_cfl: "𝔬⇩_{P}α ⟹ in_cfl α []" by (induct α rule: 𝔬⇩_{P}.induct) (auto intro!: in_cfl.intros elim: fBexI[rotated]) lemma 𝔡⇩_{P}_in_cfl: "β |∈| 𝔡⇩_{P}α a ⟹ in_cfl β w ⟹ in_cfl α (a # w)" proof (induct α a arbitrary: β w rule: 𝔡⇩_{P}.induct) case (3 N α a) then show ?case by (auto simp: rev_fBexI neq_Nil_conv split: if_splits intro!: in_cfl.intros elim!: rev_fBexI[of "_ # _"]) qed (auto split: if_splits intro: in_cfl.intros) lemma in_cfls_in_cfl: "in_cfls P w ⟹ fBex P (λα. in_cfl α w)" by (induct P w rule: in_cfls.induct) (auto simp: 𝔬⇩_{P}_in_cfl 𝔡⇩_{P}_in_cfl ffUnion.rep_eq fmember.rep_eq fBex.rep_eq fBall.rep_eq intro: in_cfl.intros elim: rev_bexI) lemma in_cfls_mono: "in_cfls P w ⟹ P |⊆| Q ⟹ in_cfls Q w" proof (induct P w arbitrary: Q rule: in_cfls.induct) case (2 a P w) from 2(3) 2(2)[of "ffUnion ((λα. local.𝔡⇩_{P}α a) |`| Q)"] show ?case by (auto intro!: ffunion_mono in_cfls.intros) qed (auto intro!: in_cfls.intros) end locale cfg_wgreibach = fixes G :: "('t, 'n :: finite) cfg" assumes weakGreibach: "⋀N α. α |∈| prod G N ⟹ wgreibach α" begin lemma in_cfl_in_cfls: "in_cfl G α w ⟹ in_cfls G {|α|} w" proof (induct α w rule: in_cfl.induct) case (3 N α w) then obtain β where β: "β |∈| prod G N" and in_cfl: "in_cfl G (β @ α) w" and in_cfls: "in_cfls G {|β @ α|} w" by blast then show ?case proof (cases β) case [simp]: Nil from β in_cfls show ?thesis by (cases w) (auto intro!: in_cfls.intros elim: in_cfls_mono) next case [simp]: (Cons x γ) from weakGreibach[OF β] obtain a where [simp]: "x = Inl a" by (cases x) auto with β in_cfls show ?thesis apply - apply (rule in_cfl.cases[OF in_cfl]; auto) apply (force intro: in_cfls.intros(2) elim!: in_cfls_mono) done qed qed (auto intro!: in_cfls.intros) abbreviation lang where "lang ≡ subst G {|[Inr (init G)]|}" lemma lang_lang_trad: "lang = to_language (lang_trad G)" proof - have "in_cfls G {|[Inr (init G)]|} w ⟷ in_cfl G [Inr (init G)] w" for w by (auto dest: in_cfls_in_cfl in_cfl_in_cfls) then show ?thesis by (auto simp: subst_in_cfls) qed end text ‹ The function @{term in_language} decides the word problem for a given language. Since we can construct the language of a CFG using @{const cfg_wgreibach.lang} we obtain an executable (but not very efficient) decision procedure for CFGs for free. › abbreviation "𝔞 ≡ Inl True" abbreviation "𝔟 ≡ Inl False" abbreviation "S ≡ Inr ()" interpretation palindromes: cfg_wgreibach "⦇init = (), prod = λ_. {|[], [𝔞], [𝔟], [𝔞, S, 𝔞], [𝔟, S, 𝔟]|}⦈" by unfold_locales auto lemma "in_language palindromes.lang []" by normalization lemma "in_language palindromes.lang [True]" by normalization lemma "in_language palindromes.lang [False]" by normalization lemma "in_language palindromes.lang [True, True]" by normalization lemma "in_language palindromes.lang [True, False, True]" by normalization lemma "¬ in_language palindromes.lang [True, False]" by normalization lemma "¬ in_language palindromes.lang [True, False, True, False]" by normalization lemma "in_language palindromes.lang [True, False, True, True, False, True]" by normalization lemma "¬ in_language palindromes.lang [True, False, True, False, False, True]" by normalization interpretation Dyck: cfg_wgreibach "⦇init = (), prod = λ_. {|[], [𝔞, S, 𝔟, S]|}⦈" by unfold_locales auto lemma "in_language Dyck.lang []" by normalization lemma "¬ in_language Dyck.lang [True]" by normalization lemma "¬ in_language Dyck.lang [False]" by normalization lemma "in_language Dyck.lang [True, False, True, False]" by normalization lemma "in_language Dyck.lang [True, True, False, False]" by normalization lemma "in_language Dyck.lang [True, False, True, False]" by normalization lemma "in_language Dyck.lang [True, False, True, False, True, True, False, False]" by normalization lemma "¬ in_language Dyck.lang [True, False, True, True, False]" by normalization lemma "¬ in_language Dyck.lang [True, True, False, False, False, True]" by normalization interpretation abSSa: cfg_wgreibach "⦇init = (), prod = λ_. {|[], [𝔞, 𝔟, S, S, 𝔞]|}⦈" by unfold_locales auto lemma "in_language abSSa.lang []" by normalization lemma "¬ in_language abSSa.lang [True]" by normalization lemma "¬ in_language abSSa.lang [False]" by normalization lemma "in_language abSSa.lang [True, False, True]" by normalization lemma "in_language abSSa.lang [True, False, True, False, True, True, False, True, True]" by normalization lemma "in_language abSSa.lang [True, False, True, False, True, True]" by normalization lemma "¬ in_language abSSa.lang [True, False, True, True, False]" by normalization lemma "¬ in_language abSSa.lang [True, True, False, False, False, True]" by normalization (*<*) end (*>*)