Theory Context_Free_Grammar

theory Context_Free_Grammar
imports Coinductive_Language FSet
section ‹Word Problem for Context-Free Grammars›
(*<*)
theory Context_Free_Grammar
imports Coinductive_Language "HOL-Library.FSet"
begin
(*>*)


section ‹Context Free Languages›

text ‹
A context-free grammar consists of a list of productions for every nonterminal
and an initial nonterminal. The productions are required to be in weak Greibach
normal form, i.e. each right hand side of a production must either be empty or
start with a terminal.
›

abbreviation "wgreibach α ≡ (case α of (Inr N # _) ⇒ False | _ ⇒ True)"

record ('t, 'n) cfg =
  init :: "'n :: finite"
  prod :: "'n ⇒ ('t + 'n) list fset"

context
  fixes G :: "('t, 'n :: finite) cfg"
begin

inductive in_cfl where
  "in_cfl [] []"
| "in_cfl α w ⟹ in_cfl (Inl a # α) (a # w)"
| "fBex (prod G N) (λβ. in_cfl (β @ α) w) ⟹ in_cfl (Inr N # α) w"

abbreviation lang_trad where
  "lang_trad ≡ {w. in_cfl [Inr (init G)] w}"

fun 𝔬P where
  "𝔬P [] = True"
| "𝔬P (Inl _ # _) = False"
| "𝔬P (Inr N # α) = ([] |∈| prod G N ∧ 𝔬P α)"

fun 𝔡P where
  "𝔡P [] a = {||}"
| "𝔡P (Inl b # α) a = (if a = b then {|α|} else {||})"
| "𝔡P (Inr N # α) a =
    (λβ. tl β @ α) |`| ffilter (λβ. β ≠ [] ∧ hd β = Inl a) (prod G N) |∪|
    (if [] |∈| prod G N then 𝔡P α a else {||})"

primcorec subst :: "('t + 'n) list fset ⇒ 't language" where
  "subst P = Lang (fBex P 𝔬P) (λa. subst (ffUnion ((λr. 𝔡P r a) |`| P)))"

inductive in_cfls where
  "fBex P 𝔬P ⟹ in_cfls P []"
| "in_cfls (ffUnion ((λα. 𝔡P α a) |`| P)) w ⟹ in_cfls P (a # w)"

inductive_cases [elim!]: "in_cfls P []"
inductive_cases [elim!]: "in_cfls P (a # w)"

declare inj_eq[OF bij_is_inj[OF to_language_bij], simp]

lemma subst_in_cfls: "subst P = to_language {w. in_cfls P w}"
  by (coinduction arbitrary: P) (auto intro: in_cfls.intros)

lemma 𝔬P_in_cfl: "𝔬P α ⟹ in_cfl α []"
  by (induct α rule: 𝔬P.induct) (auto intro!: in_cfl.intros elim: fBexI[rotated])

lemma 𝔡P_in_cfl: "β |∈| 𝔡P α a ⟹ in_cfl β w ⟹ in_cfl α (a # w)"
proof (induct α a arbitrary: β w rule: 𝔡P.induct)
  case (3 N α a)
  then show ?case
    by (auto simp: rev_fBexI neq_Nil_conv split: if_splits
      intro!: in_cfl.intros  elim!: rev_fBexI[of "_ # _"])
qed (auto split: if_splits intro: in_cfl.intros)

lemma in_cfls_in_cfl: "in_cfls P w ⟹ fBex P (λα. in_cfl α w)"
  by (induct P w rule: in_cfls.induct)
    (auto simp: 𝔬P_in_cfl 𝔡P_in_cfl ffUnion.rep_eq fmember.rep_eq fBex.rep_eq fBall.rep_eq
      intro: in_cfl.intros elim: rev_bexI)

lemma in_cfls_mono: "in_cfls P w ⟹ P |⊆| Q ⟹ in_cfls Q w"
proof (induct P w arbitrary: Q rule: in_cfls.induct)
  case (2 a P w)
  from 2(3) 2(2)[of "ffUnion ((λα. local.𝔡P α a) |`| Q)"] show ?case
    by (auto intro!: ffunion_mono in_cfls.intros)
qed (auto intro!: in_cfls.intros)

end

locale cfg_wgreibach =
  fixes G :: "('t, 'n :: finite) cfg"
  assumes weakGreibach: "⋀N α. α |∈| prod G N ⟹ wgreibach α"
begin

lemma in_cfl_in_cfls: "in_cfl G α w ⟹ in_cfls G {|α|} w"
proof (induct α w rule: in_cfl.induct)
  case (3 N α w)
  then obtain β where
    β: "β |∈| prod G N" and
    in_cfl: "in_cfl G (β @ α) w" and
    in_cfls: "in_cfls G {|β @ α|} w" by blast
  then show ?case
  proof (cases β)
    case [simp]: Nil
    from β in_cfls show ?thesis
       by (cases w) (auto intro!: in_cfls.intros elim: in_cfls_mono)
  next
    case [simp]: (Cons x γ)
    from weakGreibach[OF β] obtain a where [simp]: "x = Inl a" by (cases x) auto
    with β in_cfls show ?thesis
      apply -
      apply (rule  in_cfl.cases[OF in_cfl]; auto)
      apply (force intro: in_cfls.intros(2) elim!: in_cfls_mono)
      done
  qed
qed (auto intro!: in_cfls.intros)

abbreviation lang where
  "lang ≡ subst G {|[Inr (init G)]|}"

lemma lang_lang_trad: "lang = to_language (lang_trad G)"
proof -
  have "in_cfls G {|[Inr (init G)]|} w ⟷ in_cfl G [Inr (init G)] w" for w
    by (auto dest: in_cfls_in_cfl in_cfl_in_cfls)
  then show ?thesis
    by (auto simp: subst_in_cfls)
qed

end

text ‹
The function @{term in_language} decides the word problem for a given language.
Since we can construct the language of a CFG using @{const cfg_wgreibach.lang} we obtain an
executable (but not very efficient) decision procedure for CFGs for free.
›

abbreviation "𝔞 ≡ Inl True"
abbreviation "𝔟 ≡ Inl False"
abbreviation "S ≡ Inr ()"

interpretation palindromes: cfg_wgreibach "⦇init = (), prod = λ_. {|[], [𝔞], [𝔟], [𝔞, S, 𝔞], [𝔟, S, 𝔟]|}⦈"
  by unfold_locales auto

lemma "in_language palindromes.lang []" by normalization
lemma "in_language palindromes.lang [True]" by normalization
lemma "in_language palindromes.lang [False]" by normalization
lemma "in_language palindromes.lang [True, True]" by normalization
lemma "in_language palindromes.lang [True, False, True]" by normalization
lemma "¬ in_language palindromes.lang [True, False]" by normalization
lemma "¬ in_language palindromes.lang [True, False, True, False]" by normalization
lemma "in_language palindromes.lang [True, False, True, True, False, True]" by normalization
lemma "¬ in_language palindromes.lang [True, False, True, False, False, True]" by normalization

interpretation Dyck: cfg_wgreibach "⦇init = (), prod = λ_. {|[], [𝔞, S, 𝔟, S]|}⦈"
  by unfold_locales auto
lemma "in_language Dyck.lang []" by normalization
lemma "¬ in_language Dyck.lang [True]" by normalization
lemma "¬ in_language Dyck.lang [False]" by normalization
lemma "in_language Dyck.lang [True, False, True, False]" by normalization
lemma "in_language Dyck.lang [True, True, False, False]" by normalization
lemma "in_language Dyck.lang [True, False, True, False]" by normalization
lemma "in_language Dyck.lang [True, False, True, False, True, True, False, False]" by normalization
lemma "¬ in_language Dyck.lang [True, False, True, True, False]" by normalization
lemma "¬ in_language Dyck.lang [True, True, False, False, False, True]" by normalization

interpretation abSSa: cfg_wgreibach "⦇init = (), prod = λ_. {|[], [𝔞, 𝔟, S, S, 𝔞]|}⦈"
  by unfold_locales auto
lemma "in_language abSSa.lang []" by normalization
lemma "¬ in_language abSSa.lang [True]" by normalization
lemma "¬ in_language abSSa.lang [False]" by normalization
lemma "in_language abSSa.lang [True, False, True]" by normalization
lemma "in_language abSSa.lang [True, False, True, False, True, True, False, True, True]" by normalization
lemma "in_language abSSa.lang [True, False, True, False, True, True]" by normalization
lemma "¬ in_language abSSa.lang [True, False, True, True, False]" by normalization
lemma "¬ in_language abSSa.lang [True, True, False, False, False, True]" by normalization

(*<*)
end
(*>*)