Theory Context_Free_Language

(*
Authors: Tobias Nipkow, Fabian Lehr
*)

section "Context-Free Languages"

theory Context_Free_Language
imports
  Renaming_CFG
  Disjoint_Union_CFG
begin


subsection ‹Basic Definitions›

subsubsection concats›

definition concats :: "'a lang list  'a lang" where
"concats Ls = foldr (@@) Ls {[]}"

lemma concats_simps[simp]:
  "concats [] = {[]}"
  "concats (L#Ls) = L @@ concats Ls"
by(auto simp: concats_def)

lemma foldr_conc_conc: "foldr (@@) xs {[]} @@ A = foldr (@@) xs A"
by (induction xs)(auto simp: conc_assoc)

lemma concats_append[simp]: "concats (Ls1 @ Ls2) = concats Ls1 @@ concats Ls2"
by (simp add: concats_def foldr_conc_conc)

lemma concats_empty_iff:
  "concats Ls = {}  (Lset Ls. L = {})"
by(induction Ls) auto

lemma finite_concats: "L  set Ls. finite L  finite(concats Ls)"
by(induction Ls) (simp_all add: finite_conc_if)

lemma infinite_concats:
  " L  set Ls. L  {};  L  set Ls; infinite L   infinite(concats Ls)"
apply(induction Ls arbitrary: L)
 apply simp
apply (simp)
by (metis concats_empty_iff if_finite_conc1 if_finite_conc2)


subsubsection CFL›

text ‹This definition depends on the type of nonterminals of the grammar.›

definition CFL :: "'n itself  't list set  bool" where
"CFL (TYPE('n)) L = (P S::'n. L = Lang P S  finite P)"

text ‹Ideally one would existentially quantify over 'n› on the right-hand side, but we cannot
quantify over types in HOL. But we can prove that the type is irrelevant because we can always
use another type via renaming.›

text ‹For hiding the infinite type of nonterminals:›

abbreviation cfl :: "'a lang  bool" where
"cfl L  CFL (TYPE(nat)) L"


subsection ‹Closure Properties›

lemma CFL_Un_closed:
  assumes "CFL TYPE('n1) L1" "CFL TYPE('n2) L2"
  shows "CFL TYPE(('n1+'n2)option) (L1  L2)"
proof -
  from assms obtain P1 P2 and S1 :: 'n1 and S2 :: 'n2
    where L: "L1 = Lang P1 S1" "L2 = Lang P2 S2" and fin: "finite P1" "finite P2"
    unfolding CFL_def by blast
  let ?f1 = "Some o (Inl:: 'n1  'n1 + 'n2)"
  let ?f2 = "Some o (Inr:: 'n2  'n1 + 'n2)"
  let ?P1 = "rename_Prods ?f1 P1"
  let ?P2 = "rename_Prods ?f2 P2"
  let ?S1 = "?f1 S1"
  let ?S2 = "?f2 S2"
  let ?P = "{(None, [Nt ?S1]), (None, [Nt ?S2])}  (?P1  ?P2)"
  have "Lang ?P None = Lang ?P1 ?S1  Lang ?P2 ?S2"
    by (rule Lang_disj_Un2)(auto simp: Nts_Un in_Nts_rename_Prods)
  moreover have " = Lang P1 S1  Lang P2 S2"
  proof -
    have "Lang ?P1 ?S1 = L1" unfolding L1 = _
      by (meson Lang_rename_Prods comp_inj_on inj_Inl inj_Some)
    moreover have "Lang ?P2 ?S2 = L2" unfolding L2 = _
      by (meson Lang_rename_Prods comp_inj_on inj_Inr inj_Some)
    ultimately show ?thesis using L by argo
  qed
  moreover have "finite ?P" using fin by auto
  ultimately show ?thesis
    unfolding CFL_def using L by blast 
qed

lemma CFL_concat_closed: 
assumes "CFL TYPE('n1) L1" and "CFL TYPE('n2) L2"
shows "CFL TYPE(('n1 + 'n2) option) (L1 @@ L2)"
proof -
  obtain P1 S1 where L1_def: "L1 = Lang P1 (S1::'n1)" "finite P1"
    using assms(1) CFL_def[of L1] by auto 
  obtain P2 S2 where L2_def: "L2 = Lang P2 (S2::'n2)" "finite P2"
    using assms(2) CFL_def[of L2] by auto
  let ?f1 = "Some o (Inl:: 'n1  'n1 + 'n2)"
  let ?f2 = "Some o (Inr:: 'n2  'n1 + 'n2)"
  let ?P1 = "rename_Prods ?f1 P1"
  let ?P2 = "rename_Prods ?f2 P2"
  let ?S1 = "?f1 S1"
  let ?S2 = "?f2 S2"
  let ?S = "None :: ('n1+'n2)option"
  let ?P = "{(None, [Nt ?S1, Nt ?S2])}  (?P1  ?P2)"
  have "inj ?f1" by (simp add: inj_on_def) 
  then have L1r_def: "L1 = Lang ?P1 ?S1" 
    using L1_def Lang_rename_Prods[of ?f1 P1 S1] inj_on_def by force
  have "inj ?f2" by (simp add: inj_on_def) 
  then have L2r_def: "L2 = Lang ?P2 ?S2" 
    using L2_def Lang_rename_Prods[of ?f2 P2 S2] inj_on_def by force
  have "Lang ?P ?S = L1 @@ L2" unfolding L1r_def L2r_def  
    by(rule Lang_concat_disj) (auto simp add: disjoint_iff in_Nts_rename_Prods)
  moreover have "finite ?P" using finite P1 finite P2 by auto
  ultimately show ?thesis unfolding CFL_def by blast
qed


subsection ‹CFG as an Equation System›

text ‹A CFG can be viewed as a system of equations. The least solution is denoted by Lang_lfp›.›

definition inst_sym :: "('n  't lang)  ('n, 't) sym  't lang" where
"inst_sym L s = (case s of Tm a  {[a]} | Nt A  L A)"

definition inst_syms :: "('n  't lang)  ('n, 't) syms  't lang" where
"inst_syms L w = concats (map (inst_sym L) w)"

definition subst_lang :: "('n,'t)Prods  ('n  't lang)  ('n  't lang)" where
"subst_lang P L = (λA. w  Rhss P A. inst_syms L w)"

definition Lang_lfp :: "('n, 't) Prods  'n  't lang" where
"Lang_lfp P = lfp (subst_lang P)"


subsubsection ‹Basics›

lemma inst_sym_simps[simp]:
  "inst_sym L (Tm a) = {[a]}"
  "inst_sym L (Nt A) = L A"
by(auto simp: inst_sym_def)

lemma inst_syms_Nil[simp]: "inst_syms L [] = {[]}"
by(simp add: inst_syms_def)

lemma inst_syms_Cons[simp]: "inst_syms L (s # β) = inst_sym L s @@ inst_syms L β"
by(simp add: inst_syms_def)

lemma inst_syms_append[simp]: "inst_syms L (α @ β) = inst_syms L α @@ inst_syms L β"
by(simp add: inst_syms_def)

lemma inst_syms_Tms:
  "inst_syms L (map Tm w) = {w}"
by(induction w) (auto simp add: conc_def)

lemma inst_syms_empty:
  assumes "L B = {}"
  shows "B  Nts_syms α  inst_syms L α = {}"
by(induction α) (auto simp add: assms split: sym.splits)

lemma finite_inst_syms:
  assumes "B  Nts_syms α. finite (L B)"
  shows "finite (inst_syms L α)"
proof -
  have "X. X  set(map (inst_sym L) α)  finite X"
    using assms by (auto simp: inst_sym_def Nts_syms_def split:sym.splits)
  thus ?thesis
    unfolding inst_syms_def using finite_concats by blast
qed

lemma infinite_inst_syms:
  assumes "B  Nts_syms α. L B  {}" "B  Nts_syms α" "infinite(L B)"
  shows "infinite (inst_syms L α)"
proof -
  have "X  set(map (inst_sym L) α). X  {}"
    using assms(1) by (auto simp: inst_sym_def Nts_syms_def split:sym.splits)
  from infinite_concats[OF this, of "inst_sym L (Nt B)"] show ?thesis
    using assms(2,3) unfolding inst_syms_def in_Nts_syms by force
qed


subsubsection Lang_lfp›is a Kleene fixpoint›

lemma inst_sym_Sup_range:  "inst_sym (Sup(range F)) = (λs. UN i. inst_sym (F i) s)"
  by(auto simp: inst_sym_def fun_eq_iff split: sym.splits)

lemma foldr_map_mono: "F  G  foldr (@@) (map F xs) Ls  foldr (@@) (map G xs) Ls"
by(induction xs)(auto simp add: le_fun_def subset_eq)

lemma inst_sym_mono: "F  G  inst_sym F s  inst_sym G s"
by (auto simp add: inst_sym_def le_fun_def subset_iff split: sym.splits)

lemma foldr_conc_map_inst_sym:
  assumes "omega_chain L"
  shows "foldr (@@) (map (λs. i. inst_sym (L i) s) xs) Ls = (i. foldr (@@) (map (inst_sym (L i)) xs) Ls)"
proof(induction xs)
  case Nil
  then show ?case by simp 
next
  case (Cons a xs)
  show ?case (is "?l = ?r")
  proof
    show "?l  ?r"
    proof
      fix w assume "w  ?l"
      with Cons obtain u v i j
        where "w = u @ v" "u  inst_sym (L i) a" "v  foldr (@@) (map (inst_sym (L j)) xs) Ls" by(auto)
      then show "w  ?r"
        using omega_chain_mono[OF assms, of i "max i j"] omega_chain_mono[OF assms, of j "max i j"]
          inst_sym_mono foldr_map_mono[of "inst_sym (L j)" "inst_sym (L (max i j))" xs Ls] concI
        unfolding le_fun_def by(simp) blast
    qed
  next
    show "?r  ?l" using Cons by(fastforce)
  qed
qed

lemma omega_cont_Lang_lfp: "omega_cont (subst_lang P)"
unfolding omega_cont_def subst_lang_def
proof (safe)
  fix L :: "nat  'a  'b lang"
  assume o: "omega_chain L"
  show "(λA.  (inst_syms (Sup (range L)) ` Rhss P A)) = (SUP i. (λA.  (inst_syms (L i) ` Rhss P A)))"
    (is "(λA. ?l A) = (λA. ?r A)")
  proof
    fix A :: 'a
    have "?l A = (i. (inst_syms (L i) ` Rhss P A))"
      by(auto simp: inst_syms_def inst_sym_Sup_range concats_def foldr_conc_map_inst_sym[OF o])
    also have " = ?r A"
      by(auto)
    finally show "?l A = ?r A" .
  qed
qed

theorem Lang_lfp_SUP: "Lang_lfp P = (SUP n. ((subst_lang P)^^n) (λA. {}))"
using Kleene_lfp[OF omega_cont_Lang_lfp] unfolding Lang_lfp_def bot_fun_def by blast

lemma Lang_lfp_unfold:
  "Lang_lfp P A = (α  Rhss P A. inst_syms (Lang_lfp P) α)"
unfolding Lang_lfp_def
using fun_cong[OF subst_lang_def[of P "lfp(subst_lang P)"], of A,symmetric]
by (metis lfp_unfold[OF mono_if_omega_cont[OF omega_cont_Lang_lfp]])


subsection Lang_lfp = Lang›

text ‹We prove that the fixpoint characterization of the language defined by a CFG is equivalent
to the standard language definition via derivations. Both directions are proved separately›

lemma inst_syms_mono: "(A. R A  R' A)  w  inst_syms R α  w  inst_syms R' α"
unfolding inst_syms_def concats_def
by (metis (no_types, lifting) foldr_map_mono in_mono inst_sym_mono le_fun_def)

lemma omega_cont_Lang_lfp_iterates: "omega_chain (λn. ((subst_lang P)^^n) (λA. {}))"
  using omega_chain_iterates[OF mono_if_omega_cont, OF omega_cont_Lang_lfp]
  unfolding bot_fun_def by blast

lemma in_subst_langD_inst_syms: "w  subst_lang P L A  α. (A,α)P  w  inst_syms L α"
unfolding subst_lang_def Rhss_def by (auto split: prod.splits)

lemma derives_if_inst_syms:
  "w  inst_syms (Lang P) α  P  α ⇒* map Tm w"
proof (induction α arbitrary: w)
  case Nil
  then show ?case by(auto)
next
  case (Cons s α)
  show ?case
  proof (cases s)
    case (Nt A)
    then show ?thesis using Cons unfolding Lang_def
      by(fastforce simp: derives_Cons_iff)
  next
    case (Tm a)
    then show ?thesis using Cons
      by(auto simp:derives_Tm_Cons)
  qed
qed

lemma derives_if_in_subst_lang: "w  ((subst_lang P)^^n) (λA. {}) A  w  Lang P A"
proof(induction n arbitrary: w A)
  case 0
  then show ?case by simp
next
  case (Suc n)
  let ?L = "((subst_lang P)^^n) (λA. {})"
  have *: "?L A  Lang P A" for A
    using Suc.IH by blast
  obtain α where α: "(A,α)  P" "w  inst_syms ?L α"
    using in_subst_langD_inst_syms[OF Suc.prems[simplified]] by blast
  show ?case using α(1) derives_if_inst_syms[OF inst_syms_mono[OF *, of _ "λA. A", OF α(2)]]
    unfolding Lang_def by (simp add: derives_Cons_rule)
qed

lemma derives_if_Lang_lfp: "w  Lang_lfp P A  w  Lang P A"
  unfolding Lang_lfp_SUP using derives_if_in_subst_lang
  by (metis (mono_tags, lifting) SUP_apply UN_E)

lemma Lang_lfp_subset_Lang: "Lang_lfp P A  Lang P A"
by (auto simp add: derives_if_Lang_lfp)

text ‹The other direction:›

lemma inst_syms_decomp:
  " i < length ws. ws ! i  inst_sym L (α ! i); length α = length ws 
   concat ws  inst_syms L α"
proof (induction ws arbitrary: α)
  case Nil
  then show ?case by simp
next
  case (Cons w ws)
  then obtain α1 αr where *: "α = α1 # αr" by (metis Suc_length_conv)
  with Cons.prems(2) have "length αr = length ws" by simp
  moreover from Cons.prems * have "i<length ws. ws ! i  inst_sym L (αr ! i)" by auto
  ultimately have "concat ws  inst_syms L αr" using Cons.IH by blast
  moreover from Cons.prems * have "w  inst_sym L α1" by fastforce
  ultimately show ?case using * by force
qed

lemma Lang_lfp_if_derives_aux:
  "P  [Nt A] ⇒(n) map Tm w  w  ((subst_lang P)^^n) (λA. {}) A"
proof(induction n arbitrary: w A rule: less_induct)
  case (less n)
  show ?case
  proof (cases n)
    case 0 then show ?thesis using less.prems by auto
  next
    case (Suc m)
    then obtain α where α_intro: "(A,α)  P" "P  α ⇒(m) map Tm w"
      by (metis deriven_Nt_map_TmD less.prems nat.inject)
    then obtain ws ms where *:
      "w = concat ws  length α = length ws  length α = length ms
         sum_list ms = m  (i < length ws. P  [α ! i] ⇒(ms ! i) map Tm (ws ! i))"
      using deriven_to_map_TmD by metis

    have "i < length ws. ws ! i  inst_sym (λA. ((subst_lang P)^^m) (λA. {}) A) (α ! i)"
    proof (rule allI | rule impI)+
      fix i
      show "i < length ws  ws ! i  inst_sym ((subst_lang P ^^ m) (λA. {})) (α ! i)"
      unfolding inst_sym_def
      proof (induction "α ! i")
        case (Nt B)
        with * have **: "ms ! i  m"
          by (metis elem_le_sum_list)
        with Suc have "ms ! i < n" by force
        from less.IH[OF this, of B "ws ! i"] Nt *
          have "ws ! i  (subst_lang P ^^ (ms ! i)) (λA. {}) B" by fastforce
        with omega_chain_mono[OF omega_cont_Lang_lfp_iterates, OF **]
          have "ws ! i  (subst_lang P ^^ m) (λA. {}) B" by (metis le_funD subset_iff)
        with Nt show ?case by (metis sym.simps(5))
      next
        case (Tm a)
        with * have "P  map Tm [a] ⇒(ms ! i) map Tm (ws ! i)" by fastforce
        then have "ws ! i  {[a]}" using deriven_map_TmD by fastforce
        with Tm show ?case by (metis sym.simps(6))
      qed
    qed

    from inst_syms_decomp[OF this] * have "w  inst_syms ((subst_lang P ^^ m) (λA. {})) α" by argo
    with α_intro have "w  (subst_lang P) (λA. (subst_lang P ^^ m) (λA. {}) A) A"
      unfolding subst_lang_def Rhss_def by force
    with Suc show ?thesis by force
  qed
qed

lemma Lang_subset_Lang_lfp:
  "Lang P A  Lang_lfp P A"
proof
  fix w assume "w  Lang P A"
  then obtain n where "P  [Nt A] ⇒(n) map Tm w"
    by (simp add: Lang_def) (meson rtranclp_power)
  from Lang_lfp_if_derives_aux[OF this] have "w  ((subst_lang P)^^n) (λA. {}) A" by argo
  with Lang_lfp_SUP show "w  Lang_lfp P A" by (metis (mono_tags, lifting) SUP_apply UNIV_I UN_iff)
qed

theorem Lang_lfp_eq_Lang:
  "Lang_lfp P A = Lang P A"
using Lang_subset_Lang_lfp Lang_lfp_subset_Lang by (metis subset_antisym)

subsection ‹More›

corollary Lang_unfold: "Lang P A = (α  Rhss P A. inst_syms (Lang P) α)"
  by(fact Lang_lfp_unfold[unfolded Lang_lfp_eq_Lang])

corollary inst_syms_Lang_subset_Langs:
  "inst_syms (Lang P) α  Langs P α"
using Langs_iff_derives derives_if_inst_syms by blast

lemma inst_syms_LangI: "P  α ⇒* map Tm w  w  inst_syms (Lang P) α"
proof(induction rule: converse_derives_induct)
  case base
  then show ?case by (simp add: inst_syms_Tms)
next
  case (step u A v α)
  then show ?case using Langs_prod_subset[OF (A, α)  P] conc_mono inst_syms_Lang_subset_Langs
    by fastforce
qed

lemma Langs_eq_inst_syms_Lang: "Langs P α = inst_syms (Lang P) α"
using Langs_iff_derives inst_syms_LangI inst_syms_Lang_subset_Langs by blast

end