Theory Pi_Regular_Exp

(* Author: Dmitriy Traytel *)

section ‹$\Pi$-Extended Regular Expressions›

(*<*)
theory Pi_Regular_Exp
imports Pi_Regular_Set "HOL-Library.List_Lexorder" "HOL-Library.Code_Target_Nat"
  Deriving.Compare_Instances   
begin
(*>*)
subsection ‹Syntax of regular expressions›

datatype 'a rexp =
  Zero |
  Full |
  One |
  Atom 'a |
  Plus "('a rexp)" "('a rexp)" |
  Times "('a rexp)" "('a rexp)" |
  Star "('a rexp)" |
  Not "('a rexp)" |
  Inter "('a rexp)" "('a rexp)" |
  Pr "('a rexp)"
derive linorder rexp

text ‹Lifting constructors to lists›

fun rexp_of_list where
  "rexp_of_list OPERATION N [] = N"
| "rexp_of_list OPERATION N [x] = x"
| "rexp_of_list OPERATION N (x # xs) = OPERATION x (rexp_of_list OPERATION N xs)"

abbreviation "PLUS  rexp_of_list Plus Zero"
abbreviation "TIMES  rexp_of_list Times One"
abbreviation "INTERSECT  rexp_of_list Inter Full"

lemma list_singleton_induct [case_names nil single cons]:
  assumes nil: "P []"
  assumes single: "x. P [x]"
  assumes cons: "x y xs. P (y # xs)  P (x # (y # xs))"
  shows "P xs"
  using assms
proof (induct xs)
  case (Cons x xs) thus ?case by (cases xs) auto
qed simp


subsection ‹ACI normalization›

fun toplevel_summands :: "'a rexp  'a rexp set" where
  "toplevel_summands (Plus r s) = toplevel_summands r  toplevel_summands s"
| "toplevel_summands r = {r}"

abbreviation (input) "flatten LISTOP X  LISTOP (sorted_list_of_set X)"

lemma toplevel_summands_nonempty[simp]:
  "toplevel_summands r  {}"
  by (induct r) auto

lemma toplevel_summands_finite[simp]:
  "finite (toplevel_summands r)"
  by (induct r) auto

primrec ACI_norm :: "('a::linorder) rexp  'a rexp"  («_») where
  "«Zero» = Zero"
| "«Full» = Full"
| "«One» = One"
| "«Atom a» = Atom a"
| "«Plus r s» = flatten PLUS (toplevel_summands (Plus «r» «s»))"
| "«Times r s» = Times «r» «s»"
| "«Star r» = Star «r»"
| "«Not r» = Not «r»"
| "«Inter r s» = Inter «r» «s»"
| "«Pr r» = Pr «r»"

lemma Plus_toplevel_summands:
  "Plus r s  toplevel_summands t  False"
  by (induct t) auto

lemma toplevel_summands_not_Plus[simp]:
  "(r s. x  Plus r s)  toplevel_summands x = {x}"
  by (induct x) auto

lemma toplevel_summands_PLUS_strong:
  "xs  []; list_all (λx. ¬(r s. x = Plus r s)) xs  toplevel_summands (PLUS xs) = set xs"
  by (induct xs rule: list_singleton_induct) auto

lemma toplevel_summands_flatten:
  "X  {}; finite X; x  X. ¬(r s. x = Plus r s)  toplevel_summands (flatten PLUS X) = X"
  using toplevel_summands_PLUS_strong[of "sorted_list_of_set X"]
  unfolding list_all_iff by fastforce

lemma ACI_norm_Plus:
  "«r» = Plus s t  s t. r = Plus s t"
  by (induct r) auto

lemma toplevel_summands_flatten_ACI_norm_image:
  "toplevel_summands (flatten PLUS (ACI_norm ` toplevel_summands r)) = ACI_norm ` toplevel_summands r"
  by (intro toplevel_summands_flatten) (auto dest!: ACI_norm_Plus intro: Plus_toplevel_summands)

lemma toplevel_summands_flatten_ACI_norm_image_Union:
  "toplevel_summands (flatten PLUS (ACI_norm ` toplevel_summands r  ACI_norm ` toplevel_summands s)) =
    ACI_norm ` toplevel_summands r  ACI_norm ` toplevel_summands s"
  by (intro toplevel_summands_flatten) (auto dest!: ACI_norm_Plus[OF sym] intro: Plus_toplevel_summands)

lemma toplevel_summands_ACI_norm:
  "toplevel_summands «r» = ACI_norm ` toplevel_summands r"
  by (induct r) (auto simp: toplevel_summands_flatten_ACI_norm_image_Union)

lemma ACI_norm_flatten:
  "«r» = flatten PLUS (ACI_norm ` toplevel_summands r)"
  by (induct r) (auto simp: image_Un toplevel_summands_flatten_ACI_norm_image)

theorem ACI_norm_idem[simp]:
  "««r»» = «r»"
proof (induct r)
  case (Plus r s)
  have "««Plus r s»» = «flatten PLUS (toplevel_summands «r»  toplevel_summands «s»)»"
    (is "_ = «flatten PLUS ?U»") by simp
  also have " = flatten PLUS (ACI_norm ` toplevel_summands (flatten PLUS ?U))"
    by (simp only: ACI_norm_flatten)
  also have "toplevel_summands (flatten PLUS ?U) = ?U"
    by (intro toplevel_summands_flatten) (auto intro: Plus_toplevel_summands)
  also have "flatten PLUS (ACI_norm ` ?U) = flatten PLUS (toplevel_summands «r»  toplevel_summands «s»)"
    by (simp only: image_Un toplevel_summands_ACI_norm[symmetric] Plus)
  finally show ?case by simp
qed auto

fun ACI_nPlus :: "'a::linorder rexp  'a rexp  'a rexp"
where
  "ACI_nPlus (Plus r1 r2) s = ACI_nPlus r1 (ACI_nPlus r2 s)"
| "ACI_nPlus r (Plus s1 s2) =
  (if r = s1 then Plus s1 s2
  else if r < s1 then Plus r (Plus s1 s2)
  else Plus s1 (ACI_nPlus r s2))"
| "ACI_nPlus r s =
  (if r = s then r
  else if r < s then Plus r s
  else Plus s r)"

fun ACI_norm_alt where 
  "ACI_norm_alt Zero = Zero"
| "ACI_norm_alt Full = Full"
| "ACI_norm_alt One = One"
| "ACI_norm_alt (Atom a) = Atom a"
| "ACI_norm_alt (Plus r s) = ACI_nPlus (ACI_norm_alt r) (ACI_norm_alt s)"
| "ACI_norm_alt (Times r s) = Times (ACI_norm_alt r) (ACI_norm_alt s)"
| "ACI_norm_alt (Star r) = Star (ACI_norm_alt r)"
| "ACI_norm_alt (Not r) = Not (ACI_norm_alt r)"
| "ACI_norm_alt (Inter r s) = Inter (ACI_norm_alt r) (ACI_norm_alt s)"
| "ACI_norm_alt (Pr r) = Pr (ACI_norm_alt r)"

lemma toplevel_summands_ACI_nPlus:
  "toplevel_summands (ACI_nPlus r s) = toplevel_summands (Plus r s)"
  by (induct r s rule: ACI_nPlus.induct) auto

lemma toplevel_summands_ACI_norm_alt:
  "toplevel_summands (ACI_norm_alt r) = ACI_norm_alt ` toplevel_summands r"
  by (induct r) (auto simp: toplevel_summands_ACI_nPlus)

lemma ACI_norm_alt_Plus:
  "ACI_norm_alt r = Plus s t  s t. r = Plus s t"
  by (induct r) auto

lemma toplevel_summands_flatten_ACI_norm_alt_image:
  "toplevel_summands (flatten PLUS (ACI_norm_alt ` toplevel_summands r)) = ACI_norm_alt ` toplevel_summands r"
  by (intro toplevel_summands_flatten) (auto dest!: ACI_norm_alt_Plus intro: Plus_toplevel_summands)

lemma ACI_norm_ACI_norm_alt: "«ACI_norm_alt r» = «r»"
proof (induction r)
  case (Plus r s) show ?case
    using ACI_norm_flatten [of "ACI_norm_alt (Plus r s)"] ACI_norm_flatten [of "Plus r s"]
    by (auto simp: image_Un toplevel_summands_ACI_nPlus)
      (metis Plus.IH toplevel_summands_ACI_norm)
qed auto

lemma ACI_nPlus_singleton_PLUS: 
  "xs  []; sorted xs; distinct xs; x  {x}  set xs. ¬(r s. x = Plus r s) 
  ACI_nPlus x (PLUS xs) = (if x  set xs then PLUS xs else PLUS (insort x xs))"
proof (induct xs rule: list_singleton_induct)
  case (single y)
  thus ?case
    by (cases x y rule: linorder_cases) (induct x y rule: ACI_nPlus.induct, auto)+
next
  case (cons y1 y2 ys) thus ?case by (cases x) (auto)
qed simp

lemma ACI_nPlus_PLUS:
  "xs1  []; xs2  []; x  set (xs1 @ xs2). ¬(r s. x = Plus r s); sorted xs2; distinct xs2
  ACI_nPlus (PLUS xs1) (PLUS xs2) = flatten PLUS (set (xs1 @ xs2))"
proof (induct xs1 arbitrary: xs2 rule: list_singleton_induct)
  case (single x1) thus ?case
    apply (auto intro!: trans[OF ACI_nPlus_singleton_PLUS] simp: insert_absorb simp del: sorted_list_of_set_insert_remove)
     apply (metis finite_set finite_sorted_distinct_unique sorted_list_of_set)
    apply (metis remdups_id_iff_distinct sorted_list_of_set_sort_remdups sorted_sort_id)
    done
next
  case (cons x11 x12 xs1) thus ?case
    apply (simp del: sorted_list_of_set_insert_remove)
    apply (rule trans[OF ACI_nPlus_singleton_PLUS])
         apply (auto simp del: sorted_list_of_set_insert_remove simp add: insert_commute[of x11])
       apply (auto simp only: Un_insert_left[of x11, symmetric] insert_absorb) []
      apply (auto simp only: Un_insert_right[of _ x11, symmetric] insert_absorb) []
     apply (auto simp add: insert_commute[of x12])
  done
qed simp

lemma ACI_nPlus_flatten_PLUS:
  "X1  {}; X2  {}; finite X1; finite X2; x  X1  X2. ¬(r s. x = Plus r s)
  ACI_nPlus (flatten PLUS X1) (flatten PLUS X2) = flatten PLUS (X1  X2)"
  by (rule trans[OF ACI_nPlus_PLUS]) auto

lemma ACI_nPlus_ACI_norm[simp]: "ACI_nPlus «r» «s» = «Plus r s»"
  using ACI_norm_flatten [of r] ACI_norm_flatten [of s] ACI_norm_flatten [of "Plus r s"]
  apply (auto intro!: trans [OF ACI_nPlus_flatten_PLUS])
    apply (auto simp: image_Un Un_assoc  intro!: trans [OF ACI_nPlus_flatten_PLUS])
   apply (metis ACI_norm_Plus Plus_toplevel_summands)+
  done

lemma ACI_norm_alt:
  "ACI_norm_alt r = «r»"
  by (induct r) auto

declare ACI_norm_alt[symmetric, code]




subsection ‹Finality›

primrec final :: "'a rexp  bool"
where
  "final Zero = False"
| "final Full = True"
| "final One = True"
| "final (Atom _) = False"
| "final (Plus r s) = (final r  final s)"
| "final (Times r s) = (final r  final s)"
| "final (Star _) = True"
| "final (Not r) = (~ final r)"
| "final (Inter r1 r2) = (final r1  final r2)"
| "final (Pr r) = final r"

lemma toplevel_summands_final:
  "final s = (rtoplevel_summands s. final r)"
  by (induct s) auto

lemma final_PLUS:
  "final (PLUS xs) = (r  set xs. final r)"
  by (induct xs rule: list_singleton_induct) auto

theorem ACI_norm_final[simp]:
  "final «r» = final r"
proof (induct r)
  case (Plus r1 r2) thus ?case using toplevel_summands_final by (auto simp: final_PLUS)
qed auto



subsection ‹Wellformedness w.r.t. an alphabet›

locale alphabet =
fixes Σ :: "nat  'a set" (Σ _›)
and wf_atom :: "nat  'b :: linorder  bool"
begin

primrec wf :: "nat  'b rexp  bool"
where
"wf n Zero = True" |
"wf n Full = True" |
"wf n One = True" |
"wf n (Atom a) = (wf_atom n a)" |
"wf n (Plus r s) = (wf n r  wf n s)" |
"wf n (Times r s) = (wf n r  wf n s)" |
"wf n (Star r) = wf n r" |
"wf n (Not r) = wf n r" |
"wf n (Inter r s) = (wf n r  wf n s)" |
"wf n (Pr r) = wf (n + 1) r"

primrec wf_word where
  "wf_word n [] = True"
| "wf_word n (w # ws) = ((w  Σ n)  wf_word n ws)"

lemma wf_word_snoc[simp]: "wf_word n (ws @ [w]) = ((w  Σ n)  wf_word n ws)"
  by (induct ws) auto

lemma wf_word_append[simp]: "wf_word n (ws @ vs) = (wf_word n ws  wf_word n vs)"
  by (induct ws arbitrary: vs) auto

lemma wf_word: "wf_word n w = (w  lists (Σ n))"
  by (induct w) auto

lemma toplevel_summands_wf:
  "wf n s = (rtoplevel_summands s. wf n r)"
  by (induct s) auto

lemma wf_PLUS[simp]:
  "wf n (PLUS xs) = (r  set xs. wf n r)"
  by (induct xs rule: list_singleton_induct) auto
  
lemma wf_TIMES[simp]:
  "wf n (TIMES xs) = (r  set xs. wf n r)"
  by (induct xs rule: list_singleton_induct) auto

lemma wf_flatten_PLUS[simp]:
  "finite X  wf n (flatten PLUS X) = (r  X. wf n r)"
  using wf_PLUS[of n "sorted_list_of_set X"] by fastforce

theorem ACI_norm_wf[simp]:
  "wf n «r» = wf n r"
proof (induct r arbitrary: n)
  case (Plus r1 r2) thus ?case
   using toplevel_summands_wf[of n "«r1»"] toplevel_summands_wf[of n "«r2»"] by auto
qed auto

lemma wf_INTERSECT[simp]:
  "wf n (INTERSECT xs) = (r  set xs. wf n r)"
  by (induct xs rule: list_singleton_induct) auto

lemma wf_flatten_INTERSECT[simp]:
  "finite X  wf n (flatten INTERSECT X) = (r  X. wf n r)"
  using wf_INTERSECT[of n "sorted_list_of_set X"] by fastforce

end



subsection ‹Language›

locale project =
  alphabet Σ wf_atom for Σ :: "nat  'a set" and wf_atom :: "nat  'b :: linorder  bool" +
fixes project :: "'a  'a"
and lookup :: "'b  'a  bool"
assumes project: "a. a  Σ (Suc n)  project a  Σ n"
begin

primrec lang :: "nat  'b rexp => 'a lang" where
"lang n Zero = {}" |
"lang n Full = lists (Σ n)" |
"lang n One = {[]}" |
"lang n (Atom b) = {[x] | x. lookup b x  x  Σ n}" |
"lang n (Plus r s) = (lang n r)  (lang n s)" |
"lang n (Times r s) = conc (lang n r) (lang n s)" |
"lang n (Star r) = star (lang n r)" |
"lang n (Not r) = lists (Σ n) - lang n r" |
"lang n (Inter r s) = (lang n r  lang n s)" |
"lang n (Pr r) = map project ` lang (n + 1) r"

lemma wf_word_map_project[simp]: "wf_word (Suc n) ws  wf_word n (map project ws)"
  by (induct ws arbitrary: n) (auto intro: project)

lemma wf_lang_wf_word: "wf n r  w  lang n r. wf_word n w"
  by (induct r arbitrary: n) (auto elim: rev_subsetD[OF _ conc_mono] star_induct intro: iffD2[OF wf_word])

lemma lang_subset_lists: "wf n r  lang n r  lists (Σ n)"
proof (induct r arbitrary: n)
  case Pr thus ?case by (fastforce intro!: project)
qed (auto simp: conc_subset_lists star_subset_lists)

lemma toplevel_summands_lang:
  "r  toplevel_summands s  lang n r  lang n s"
  by (induct s) auto

lemma toplevel_summands_lang_UN:
  "lang n s = (rtoplevel_summands s. lang n r)"
  by (induct s) auto

lemma toplevel_summands_in_lang:
  "w  lang n s = (rtoplevel_summands s. w  lang n r)"
  by (induct s) auto

lemma lang_PLUS[simp]:
  "lang n (PLUS xs) = (r  set xs. lang n r)"
  by (induct xs rule: list_singleton_induct) auto
  
lemma lang_TIMES[simp]:
  "lang n (TIMES xs) = foldr (@@) (map (lang n) xs) {[]}"
  by (induct xs rule: list_singleton_induct) auto

lemma lang_flatten_PLUS:
  "finite X  lang n (flatten PLUS X) = (r  X. lang n r)"
  using lang_PLUS[of n "sorted_list_of_set X"] by fastforce

theorem ACI_norm_lang[simp]:
  "lang n «r» = lang n r"
proof (induct r arbitrary: n)
  case (Plus r1 r2)
  moreover
  from Plus[symmetric] have "lang n (Plus r1 r2)  lang n «Plus r1 r2»"
    using toplevel_summands_in_lang[of _ n "«r1»"] toplevel_summands_in_lang[of _ n "«r2»"]
    by auto
  ultimately show ?case by (fastforce dest!: toplevel_summands_lang)
qed auto

lemma lang_final: "final r = ([]  lang n r)"
  using concI[of "[]" _ "[]"] by (induct r arbitrary: n) auto

lemma in_lang_INTERSECT:
  "wf_word n w  w  lang n (INTERSECT xs) = (r  set xs. w  lang n r)"
  by (induct xs rule: list_singleton_induct) (auto simp: wf_word)
  
lemma lang_INTERSECT:
  "lang n (INTERSECT xs) = (if xs = [] then lists (Σ n) else r  set xs. lang n r)"
  by (induct xs rule: list_singleton_induct) auto

lemma lang_flatten_INTERSECT[simp]:
  assumes "finite X" "X  {}" "rX. wf n r"
  shows "w  lang n (flatten INTERSECT X) = (r  X. w  lang n r)" (is "?L = ?R")
proof
  assume ?L
  thus ?R using in_lang_INTERSECT[OF bspec[OF wf_lang_wf_word[OF iffD2[OF wf_flatten_INTERSECT]]],
    OF assms(1,3) ?L, of "sorted_list_of_set X"] assms(1)
    by auto
next
  assume ?R
  with assms show ?L by (intro iffD2[OF in_lang_INTERSECT]) (auto dest: wf_lang_wf_word)
qed

end

(*<*)
end
(*>*)