Theory Context_Free_Language

(*
Authors: Tobias Nipkow, Fabian Lehr
*)

section "Context-Free Languages"

theory Context_Free_Language
imports
  "Regular-Sets.Regular_Set"
  Renaming_CFG
  Disjoint_Union_CFG
begin


subsection ‹Basic Definitions›

text ‹This definition depends on the type of nonterminals of the grammar.›

definition CFL :: "'n itself  't list set  bool" where
"CFL (TYPE('n)) L = (P S::'n. L = Lang P S  finite P)"

text ‹Ideally one would existentially quantify over 'n› on the right-hand side, but we cannot
quantify over types in HOL. But we can prove that the type is irrelevant because we can always
use another type via renaming.›

text ‹For hiding the infinite type of nonterminals:›

abbreviation cfl :: "'a lang  bool" where
"cfl L  CFL (TYPE(nat)) L"


subsection ‹Closure Properties›

lemma CFL_Un_closed:
  assumes "CFL TYPE('n1) L1" "CFL TYPE('n2) L2"
  shows "CFL TYPE(('n1+'n2)option) (L1  L2)"
proof -
  from assms obtain P1 P2 and S1 :: 'n1 and S2 :: 'n2
    where L: "L1 = Lang P1 S1" "L2 = Lang P2 S2" and fin: "finite P1" "finite P2"
    unfolding CFL_def by blast
  let ?f1 = "Some o (Inl:: 'n1  'n1 + 'n2)"
  let ?f2 = "Some o (Inr:: 'n2  'n1 + 'n2)"
  let ?P1 = "rename_Prods ?f1 P1"
  let ?P2 = "rename_Prods ?f2 P2"
  let ?S1 = "?f1 S1"
  let ?S2 = "?f2 S2"
  let ?P = "{(None, [Nt ?S1]), (None, [Nt ?S2])}  (?P1  ?P2)"
  have "Lang ?P None = Lang ?P1 ?S1  Lang ?P2 ?S2"
    by (rule Lang_disj_Un2)(auto simp: Nts_Un in_Nts_rename_Prods)
  moreover have " = Lang P1 S1  Lang P2 S2"
  proof -
    have "Lang ?P1 ?S1 = L1" unfolding L1 = _
      by (meson Lang_rename_Prods comp_inj_on inj_Inl inj_Some)
    moreover have "Lang ?P2 ?S2 = L2" unfolding L2 = _
      by (meson Lang_rename_Prods comp_inj_on inj_Inr inj_Some)
    ultimately show ?thesis using L by argo
  qed
  moreover have "finite ?P" using fin by auto
  ultimately show ?thesis
    unfolding CFL_def using L by blast 
qed

lemma CFL_concat_closed: 
assumes "CFL TYPE('n1) L1" and "CFL TYPE('n2) L2"
shows "CFL TYPE(('n1 + 'n2) option) (L1 @@ L2)"
proof -
  obtain P1 S1 where L1_def: "L1 = Lang P1 (S1::'n1)" "finite P1"
    using assms(1) CFL_def[of L1] by auto 
  obtain P2 S2 where L2_def: "L2 = Lang P2 (S2::'n2)" "finite P2"
    using assms(2) CFL_def[of L2] by auto
  let ?f1 = "Some o (Inl:: 'n1  'n1 + 'n2)"
  let ?f2 = "Some o (Inr:: 'n2  'n1 + 'n2)"
  let ?P1 = "rename_Prods ?f1 P1"
  let ?P2 = "rename_Prods ?f2 P2"
  let ?S1 = "?f1 S1"
  let ?S2 = "?f2 S2"
  let ?S = "None :: ('n1+'n2)option"
  let ?P = "{(None, [Nt ?S1, Nt ?S2])}  (?P1  ?P2)"
  have "inj ?f1" by (simp add: inj_on_def) 
  then have L1r_def: "L1 = Lang ?P1 ?S1" 
    using L1_def Lang_rename_Prods[of ?f1 P1 S1] inj_on_def by force
  have "inj ?f2" by (simp add: inj_on_def) 
  then have L2r_def: "L2 = Lang ?P2 ?S2" 
    using L2_def Lang_rename_Prods[of ?f2 P2 S2] inj_on_def by force
  have "Lang ?P ?S = L1 @@ L2" unfolding L1r_def L2r_def  
    by(rule Lang_concat_disj) (auto simp add: disjoint_iff in_Nts_rename_Prods)
  moreover have "finite ?P" using finite P1 finite P2 by auto
  ultimately show ?thesis unfolding CFL_def by blast
qed


subsection ‹CFG as an Equation System›

text ‹A CFG can be viewed as a system of equations. The least solution is denoted by Lang_lfp›.›

definition inst_sym :: "('n  't lang)  ('n, 't) sym  't lang" where
"inst_sym L s = (case s of Tm a  {[a]} | Nt A  L A)"

definition concats :: "'a lang list  'a lang" where
"concats Ls = foldr (@@) Ls {[]}"

definition inst_syms :: "('n  't lang)  ('n, 't) syms  't lang" where
"inst_syms L w = concats (map (inst_sym L) w)"

definition subst_lang :: "('n,'t)Prods  ('n  't lang)  ('n  't lang)" where
"subst_lang P L = (λA. w  Rhss P A. inst_syms L w)"

definition Lang_lfp :: "('n, 't) Prods  'n  't lang" where
"Lang_lfp P = lfp (subst_lang P)"

text ‹Now we show that this lfp› is a Kleene fixpoint.›

lemma inst_sym_Sup_range:  "inst_sym (Sup(range F)) = (λs. UN i. inst_sym (F i) s)"
  by(auto simp: inst_sym_def fun_eq_iff split: sym.splits)

lemma foldr_map_mono: "F  G  foldr (@@) (map F xs) Ls  foldr (@@) (map G xs) Ls"
by(induction xs)(auto simp add: le_fun_def subset_eq)

lemma inst_sym_mono: "F  G  inst_sym F s  inst_sym G s"
by (auto simp add: inst_sym_def le_fun_def subset_iff split: sym.splits)

lemma foldr_conc_map_inst_sym:
  assumes "omega_chain L"
  shows "foldr (@@) (map (λs. i. inst_sym (L i) s) xs) Ls = (i. foldr (@@) (map (inst_sym (L i)) xs) Ls)"
proof(induction xs)
  case Nil
  then show ?case by simp 
next
  case (Cons a xs)
  show ?case (is "?l = ?r")
  proof
    show "?l  ?r"
    proof
      fix w assume "w  ?l"
      with Cons obtain u v i j
        where "w = u @ v" "u  inst_sym (L i) a" "v  foldr (@@) (map (inst_sym (L j)) xs) Ls" by(auto)
      then show "w  ?r"
        using omega_chain_mono[OF assms, of i "max i j"] omega_chain_mono[OF assms, of j "max i j"]
          inst_sym_mono foldr_map_mono[of "inst_sym (L j)" "inst_sym (L (max i j))" xs Ls] concI
        unfolding le_fun_def by(simp) blast
    qed
  next
    show "?r  ?l" using Cons by(fastforce)
  qed
qed

lemma omega_cont_Lang_lfp: "omega_cont (subst_lang P)"
unfolding omega_cont_def subst_lang_def
proof (safe)
  fix L :: "nat  'a  'b lang"
  assume o: "omega_chain L"
  show "(λA.  (inst_syms (Sup (range L)) ` Rhss P A)) = (SUP i. (λA.  (inst_syms (L i) ` Rhss P A)))"
    (is "(λA. ?l A) = (λA. ?r A)")
  proof
    fix A :: 'a
    have "?l A = (i. (inst_syms (L i) ` Rhss P A))"
      by(auto simp: inst_syms_def inst_sym_Sup_range concats_def foldr_conc_map_inst_sym[OF o])
    also have " = ?r A"
      by(auto)
    finally show "?l A = ?r A" .
  qed
qed

theorem Lang_lfp_SUP: "Lang_lfp P = (SUP n. ((subst_lang P)^^n) (λA. {}))"
using Kleene_lfp[OF omega_cont_Lang_lfp] unfolding Lang_lfp_def bot_fun_def by blast


subsection Lang_lfp = Lang›

text ‹We prove that the fixpoint characterization of the language defined by a CFG is equivalent
to the standard language definition via derivations. Both directions are proved separately›

lemma inst_syms_mono: "(A. R A  R' A)  w  inst_syms R α  w  inst_syms R' α"
unfolding inst_syms_def concats_def
by (metis (no_types, lifting) foldr_map_mono in_mono inst_sym_mono le_fun_def)

lemma omega_cont_Lang_lfp_iterates: "omega_chain (λn. ((subst_lang P)^^n) (λA. {}))"
  using omega_chain_iterates[OF mono_if_omega_cont, OF omega_cont_Lang_lfp]
  unfolding bot_fun_def by blast

lemma in_subst_langD_inst_syms: "w  subst_lang P L A  α. (A,α)P  w  inst_syms L α"
unfolding subst_lang_def inst_syms_def Rhss_def by (auto split: prod.splits)

lemma foldr_conc_conc: "foldr (@@) xs {[]} @@ A = foldr (@@) xs A"
by (induction xs)(auto simp: conc_assoc)

lemma derives_if_inst_syms:
  "w  inst_syms (λA. {w. P  [Nt A] ⇒* map Tm w}) α  P  α ⇒* map Tm w"
proof (induction α arbitrary: w)
  case Nil
  then show ?case unfolding inst_syms_def concats_def by(auto)
next
  case (Cons s α)
  show ?case
  proof (cases s)
    case (Nt A)
    then show ?thesis using Cons
      unfolding inst_syms_def concats_def inst_sym_def by(fastforce simp: derives_Cons_decomp)
  next
    case (Tm a)
    then show ?thesis using Cons
      unfolding inst_syms_def concats_def inst_sym_def by(auto simp:derives_Tm_Cons)
  qed
qed

lemma derives_if_in_subst_lang: "w  ((subst_lang P)^^n) (λA. {}) A  P  [Nt A] ⇒* map Tm w"
proof(induction n arbitrary: w A)
  case 0
  then show ?case by simp
next
  case (Suc n)
  let ?L = "((subst_lang P)^^n) (λA. {})"
  have *: "?L A  {w. P  [Nt A] ⇒* map Tm w}" for A
    using Suc.IH by blast
  obtain α where α: "(A,α)  P" "w  inst_syms ?L α"
    using in_subst_langD_inst_syms[OF Suc.prems[simplified]] by blast
  show ?case using α(1) derives_if_inst_syms[OF inst_syms_mono[OF *, of _ "λA. A", OF α(2)]]
    by (simp add: derives_Cons_rule)
qed

lemma derives_if_Lang_lfp: "w  Lang_lfp P A  P  [Nt A] ⇒* map Tm w"
  unfolding Lang_lfp_SUP using derives_if_in_subst_lang
  by (metis (mono_tags, lifting) SUP_apply UN_E)

lemma Lang_lfp_subset_Lang: "Lang_lfp P A  Lang P A"
unfolding Lang_def by(blast intro:derives_if_Lang_lfp)

text ‹The other direction:›

lemma inst_syms_decomp:
  " i < length ws. ws ! i  inst_sym L (α ! i); length α = length ws 
   concat ws  inst_syms L α"
proof (induction ws arbitrary: α)
  case Nil
  then show ?case unfolding inst_syms_def concats_def by simp
next
  case (Cons w ws)
  then obtain α1 αr where *: "α = α1 # αr" by (metis Suc_length_conv)
  with Cons.prems(2) have "length αr = length ws" by simp
  moreover from Cons.prems * have "i<length ws. ws ! i  inst_sym L (αr ! i)" by auto
  ultimately have "concat ws  inst_syms L αr" using Cons.IH by blast
  moreover from Cons.prems * have "w  inst_sym L α1" by fastforce
  ultimately show ?case unfolding inst_syms_def concats_def using * by force
qed

lemma Lang_lfp_if_derives_aux: "P  [Nt A] ⇒(n) map Tm w  w  ((subst_lang P)^^n) (λA. {}) A"
proof(induction n arbitrary: w A rule: less_induct)
  case (less n)
  show ?case
  proof (cases n)
    case 0 then show ?thesis using less.prems by auto
  next
    case (Suc m)
    then obtain α where α_intro: "(A,α)  P" "P  α ⇒(m) map Tm w"
      by (metis deriven_start1 less.prems nat.inject)
    then obtain ws ms where *:
      "w = concat ws  length α = length ws  length α = length ms
         sum_list ms = m  (i < length ws. P  [α ! i] ⇒(ms ! i) map Tm (ws ! i))"
      using derive_decomp_Tm by metis

    have "i < length ws. ws ! i  inst_sym (λA. ((subst_lang P)^^m) (λA. {}) A) (α ! i)"
    proof (rule allI | rule impI)+
      fix i
      show "i < length ws  ws ! i  inst_sym ((subst_lang P ^^ m) (λA. {})) (α ! i)"
      unfolding inst_sym_def
      proof (induction "α ! i")
        case (Nt B)
        with * have **: "ms ! i  m"
          by (metis elem_le_sum_list)
        with Suc have "ms ! i < n" by force
        from less.IH[OF this, of B "ws ! i"] Nt *
          have "ws ! i  (subst_lang P ^^ (ms ! i)) (λA. {}) B" by fastforce
        with omega_chain_mono[OF omega_cont_Lang_lfp_iterates, OF **]
          have "ws ! i  (subst_lang P ^^ m) (λA. {}) B" by (metis le_funD subset_iff)
        with Nt show ?case by (metis sym.simps(5))
      next
        case (Tm a)
        with * have "P  map Tm [a] ⇒(ms ! i) map Tm (ws ! i)" by fastforce
        then have "ws ! i  {[a]}" using deriven_from_TmsD by fastforce
        with Tm show ?case by (metis sym.simps(6))
      qed
    qed

    from inst_syms_decomp[OF this] * have "w  inst_syms ((subst_lang P ^^ m) (λA. {})) α" by argo
    with α_intro have "w  (subst_lang P) (λA. (subst_lang P ^^ m) (λA. {}) A) A"
      unfolding subst_lang_def Rhss_def by force
    with Suc show ?thesis by force
  qed
qed


lemma Lang_lfp_if_derives: "P  [Nt A] ⇒* map Tm w  w  Lang_lfp P A"
proof -
  assume "P  [Nt A] ⇒* map Tm w"
  then obtain n where "P  [Nt A] ⇒(n) map Tm w" by (meson rtranclp_power)
  from Lang_lfp_if_derives_aux[OF this] have "w  ((subst_lang P)^^n) (λA. {}) A" by argo
  with Lang_lfp_SUP show "w  Lang_lfp P A" by (metis (mono_tags, lifting) SUP_apply UNIV_I UN_iff)
qed

theorem Lang_lfp_eq_Lang: "Lang_lfp P A = Lang P A"
unfolding Lang_def by(blast intro: Lang_lfp_if_derives derives_if_Lang_lfp)

end