Theory Regexp_Constructions

(*
  File:     Regexp_Constructions.thy
  Author:   Manuel Eberl <manuel@pruvisto.org>

  Some simple constructions on regular expressions to illustrate closure properties of regular
  languages: reversal, substitution, prefixes, suffixes, subwords ("fragments")
*)
section ‹Basic constructions on regular expressions›
theory Regexp_Constructions
imports
  Main
  "HOL-Library.Sublist"
  Regular_Exp
begin

subsection ‹Reverse language›

lemma rev_conc [simp]: "rev ` (A @@ B) = rev ` B @@ rev ` A"
  unfolding conc_def image_def by force

lemma rev_compower [simp]: "rev ` (A ^^ n) = (rev ` A) ^^ n"
  by (induction n) (simp_all add: conc_pow_comm)

lemma rev_star [simp]: "rev ` star A = star (rev ` A)"
  by (simp add: star_def image_UN)


subsection ‹Substituting characters in a language›    

definition subst_word :: "('a  'b list)  'a list  'b list" where
  "subst_word f xs = concat (map f xs)"
  
lemma subst_word_Nil [simp]: "subst_word f [] = []"
  by (simp add: subst_word_def)
    
lemma subst_word_singleton [simp]: "subst_word f [x] = f x"
  by (simp add: subst_word_def)
    
lemma subst_word_append [simp]: "subst_word f (xs @ ys) = subst_word f xs @ subst_word f ys"
  by (simp add: subst_word_def)
    
lemma subst_word_Cons [simp]: "subst_word f (x # xs) = f x @ subst_word f xs"
  by (simp add: subst_word_def)
    
lemma subst_word_conc [simp]: "subst_word f ` (A @@ B) = subst_word f ` A @@ subst_word f ` B"
  unfolding conc_def image_def by force 

lemma subst_word_compower [simp]: "subst_word f ` (A ^^ n) = (subst_word f ` A) ^^ n"
  by (induction n) simp_all
    
lemma subst_word_star [simp]: "subst_word f ` (star A) = star (subst_word f ` A)"
  by (simp add: star_def image_UN)
    

text ‹Suffix language›

definition Suffixes :: "'a list set  'a list set" where
  "Suffixes A = {w. q. q @ w  A}"

lemma Suffixes_altdef [code]: "Suffixes A = (wA. set (suffixes w))"
  unfolding Suffixes_def set_suffixes_eq suffix_def by blast

lemma Nil_in_Suffixes_iff [simp]: "[]  Suffixes A  A  {}"
  by (auto simp: Suffixes_def)

lemma Suffixes_empty [simp]: "Suffixes {} = {}"
  by (auto simp: Suffixes_def)
    
lemma Suffixes_empty_iff [simp]: "Suffixes A = {}  A = {}"
  by (auto simp: Suffixes_altdef)
    
lemma Suffixes_singleton [simp]: "Suffixes {xs} = set (suffixes xs)"
  by (auto simp: Suffixes_altdef)
    
lemma Suffixes_insert: "Suffixes (insert xs A) = set (suffixes xs)  Suffixes A"
  by (simp add: Suffixes_altdef)

lemma Suffixes_conc [simp]: "A  {}  Suffixes (A @@ B) = Suffixes B  (Suffixes A @@ B)"
  unfolding Suffixes_altdef conc_def by (force simp: suffix_append)
    
lemma Suffixes_union [simp]: "Suffixes (A  B) = Suffixes A  Suffixes B"
  by (auto simp: Suffixes_def)
  
lemma Suffixes_UNION [simp]: "Suffixes ((f ` A)) = ((λx. Suffixes (f x)) ` A)"
  by (auto simp: Suffixes_def)

lemma Suffixes_compower: 
  assumes "A  {}"
  shows   "Suffixes (A ^^ n) = insert [] (Suffixes A @@ (k<n. A ^^ k))"
proof (induction n)
  case (Suc n)
  from Suc have "Suffixes (A ^^ Suc n) = 
                   insert [] (Suffixes A @@ ((k<n. A ^^ k)  A ^^ n))"
    by (simp_all add: assms conc_Un_distrib)
  also have "(k<n. A ^^ k)  A ^^ n = (kinsert n {..<n}. A ^^ k)"  by blast
  also have "insert n {..<n} = {..<Suc n}" by auto
  finally show ?case .
qed simp_all

lemma Suffixes_star [simp]: 
  assumes "A  {}"
  shows   "Suffixes (star A) = Suffixes A @@ star A"
proof -
  have "star A = (n. A ^^ n)" unfolding star_def ..
  also have "Suffixes  = (x. Suffixes (A ^^ x))" by simp
  also have " = (n. insert [] (Suffixes A @@ (k<n. A ^^ k)))"
    using assms by (subst Suffixes_compower) auto
  also have " = insert [] (Suffixes A @@ (n. (k<n. A ^^ k)))"
    by (simp_all add: conc_UNION_distrib)
  also have "(n. (k<n. A ^^ k)) = (n. A ^^ n)" by auto
  also have " = star A" unfolding star_def ..
  also have "insert [] (Suffixes A @@ star A) = Suffixes A @@ star A" 
    using assms by auto
  finally show ?thesis .
qed
  
text ‹Prefix language›

definition Prefixes :: "'a list set  'a list set" where
  "Prefixes A = {w. q. w @ q  A}"

lemma Prefixes_altdef [code]: "Prefixes A = (wA. set (prefixes w))"
  unfolding Prefixes_def set_prefixes_eq prefix_def by blast

lemma Nil_in_Prefixes_iff [simp]: "[]  Prefixes A  A  {}"
  by (auto simp: Prefixes_def)

lemma Prefixes_empty [simp]: "Prefixes {} = {}"
  by (auto simp: Prefixes_def)
    
lemma Prefixes_empty_iff [simp]: "Prefixes A = {}  A = {}"
  by (auto simp: Prefixes_altdef)
    
lemma Prefixes_singleton [simp]: "Prefixes {xs} = set (prefixes xs)"
  by (auto simp: Prefixes_altdef)
    
lemma Prefixes_insert: "Prefixes (insert xs A) = set (prefixes xs)  Prefixes A"
  by (simp add: Prefixes_altdef)

lemma Prefixes_conc [simp]: "B  {}  Prefixes (A @@ B) = Prefixes A  (A @@ Prefixes B)"
  unfolding Prefixes_altdef conc_def by (force simp: prefix_append)
    
lemma Prefixes_union [simp]: "Prefixes (A  B) = Prefixes A  Prefixes B"
  by (auto simp: Prefixes_def)
  
lemma Prefixes_UNION [simp]: "Prefixes ((f ` A)) = ((λx. Prefixes (f x)) ` A)"
  by (auto simp: Prefixes_def)


lemma Prefixes_rev: "Prefixes (rev ` A) = rev ` Suffixes A"
  by (auto simp: Prefixes_altdef prefixes_rev Suffixes_altdef)

lemma Suffixes_rev: "Suffixes (rev ` A) = rev ` Prefixes A"
  by (auto simp: Prefixes_altdef suffixes_rev Suffixes_altdef)    


lemma Prefixes_compower:
  assumes "A  {}"
  shows   "Prefixes (A ^^ n) = insert [] ((k<n. A ^^ k) @@ Prefixes A)"
proof -
  have "A ^^ n = rev ` ((rev ` A) ^^ n)" by (simp add: image_image)
  also have "Prefixes  = insert [] ((k<n. A ^^ k) @@ Prefixes A)"
    unfolding Prefixes_rev 
    by (subst Suffixes_compower) (simp_all add: image_UN image_image Suffixes_rev assms)
  finally show ?thesis .
qed

lemma Prefixes_star [simp]:
  assumes "A  {}"
  shows   "Prefixes (star A) = star A @@ Prefixes A"
proof -
  have "star A = rev ` star (rev ` A)" by (simp add: image_image)
  also have "Prefixes  = star A @@ Prefixes A"
    unfolding Prefixes_rev
    by (subst Suffixes_star) (simp_all add: assms image_image Suffixes_rev)
  finally show ?thesis .
qed
  

subsection ‹Subword language›

text ‹
  The language of all sub-words, i.e. all words that are a contiguous sublist of a word in
  the original language.
›
definition Sublists :: "'a list set  'a list set" where
  "Sublists A = {w. qA. sublist w q}"

lemma Sublists_altdef [code]: "Sublists A = (wA. set (sublists w))"
  by (auto simp: Sublists_def)

lemma Sublists_empty [simp]: "Sublists {} = {}"
  by (auto simp: Sublists_def)
    
lemma Sublists_singleton [simp]: "Sublists {w} = set (sublists w)"
  by (auto simp: Sublists_altdef)

lemma Sublists_insert: "Sublists (insert w A) = set (sublists w)  Sublists A"
  by (auto simp: Sublists_altdef)
    
lemma Sublists_Un [simp]: "Sublists (A  B) = Sublists A  Sublists B"
  by (auto simp: Sublists_altdef)

lemma Sublists_UN [simp]: "Sublists ((f ` A)) = ((λx. Sublists (f x)) ` A)"
  by (auto simp: Sublists_altdef)

lemma Sublists_conv_Prefixes: "Sublists A = Prefixes (Suffixes A)"
  by (auto simp: Sublists_def Prefixes_def Suffixes_def sublist_def)
    
lemma Sublists_conv_Suffixes: "Sublists A = Suffixes (Prefixes A)"
  by (auto simp: Sublists_def Prefixes_def Suffixes_def sublist_def)

lemma Sublists_conc [simp]: 
  assumes "A  {}" "B  {}"
  shows   "Sublists (A @@ B) = Sublists A  Sublists B  Suffixes A @@ Prefixes B"
  using assms unfolding Sublists_conv_Suffixes by auto

lemma star_not_empty [simp]: "star A  {}"
  by auto
    
lemma Sublists_star:
  "A  {}  Sublists (star A) = Sublists A  Suffixes A @@ star A @@ Prefixes A"
  by (simp add: Sublists_conv_Prefixes)

lemma Prefixes_subset_Sublists: "Prefixes A  Sublists A"
  unfolding Prefixes_def Sublists_def by auto

lemma Suffixes_subset_Sublists: "Suffixes A  Sublists A"
  unfolding Suffixes_def Sublists_def by auto


subsection ‹Fragment language›
  
text ‹
  The following is the fragment language of a given language, i.e. the set of all words that
  are (not necessarily contiguous) sub-sequences of a word in the original language.
›
definition Subseqs where "Subseqs A = (wA. set (subseqs w))"

lemma Subseqs_empty [simp]: "Subseqs {} = {}"
  by (simp add: Subseqs_def)

lemma Subseqs_insert [simp]: "Subseqs (insert xs A) = set (subseqs xs)  Subseqs A"
  by (simp add: Subseqs_def)
    
lemma Subseqs_singleton [simp]: "Subseqs {xs} = set (subseqs xs)"
  by simp
    
lemma Subseqs_Un [simp]: "Subseqs (A  B) = Subseqs A  Subseqs B"
  by (simp add: Subseqs_def)
    
lemma Subseqs_UNION [simp]: "Subseqs ((f ` A)) = ((λx. Subseqs (f x)) ` A)"
  by (simp add: Subseqs_def)
  
lemma Subseqs_conc [simp]: "Subseqs (A @@ B) = Subseqs A @@ Subseqs B"
proof safe
  fix xs assume "xs  Subseqs (A @@ B)"
  then obtain ys zs where *: "ys  A" "zs  B" "subseq xs (ys @ zs)" 
    by (auto simp: Subseqs_def conc_def)
  from *(3) obtain xs1 xs2 where "xs = xs1 @ xs2" "subseq xs1 ys" "subseq xs2 zs"
    by (rule subseq_appendE)
  with *(1,2) show "xs  Subseqs A @@ Subseqs B" by (auto simp: Subseqs_def set_subseqs_eq)
next
  fix xs assume "xs  Subseqs A @@ Subseqs B"
  then obtain xs1 xs2 ys zs 
    where "xs = xs1 @ xs2" "subseq xs1 ys" "subseq xs2 zs" "ys  A" "zs  B"
    by (auto simp: conc_def Subseqs_def)
  thus "xs  Subseqs (A @@ B)" by (force simp: Subseqs_def conc_def intro: list_emb_append_mono)
qed

lemma Subseqs_compower [simp]: "Subseqs (A ^^ n) = Subseqs A ^^ n"
  by (induction n) simp_all
    
lemma Subseqs_star [simp]: "Subseqs (star A) = star (Subseqs A)"
  by (simp add: star_def)
    
lemma Sublists_subset_Subseqs: "Sublists A  Subseqs A"
  by (auto simp: Sublists_def Subseqs_def dest!: sublist_imp_subseq)


subsection ‹Various regular expression constructions›

text ‹A construction for language reversal of a regular expression:›

primrec rexp_rev where
  "rexp_rev Zero = Zero"
| "rexp_rev One = One"
| "rexp_rev (Atom x) = Atom x"
| "rexp_rev (Plus r s) = Plus (rexp_rev r) (rexp_rev s)"
| "rexp_rev (Times r s) = Times (rexp_rev s) (rexp_rev r)"
| "rexp_rev (Star r) = Star (rexp_rev r)"

lemma lang_rexp_rev [simp]: "lang (rexp_rev r) = rev ` lang r"
  by (induction r) (simp_all add: image_Un)  
    

text ‹The obvious construction for a singleton-language regular expression:›

fun rexp_of_word where
  "rexp_of_word [] = One"
| "rexp_of_word [x] = Atom x"
| "rexp_of_word (x#xs) = Times (Atom x) (rexp_of_word xs)"
  
lemma lang_rexp_of_word [simp]: "lang (rexp_of_word xs) = {xs}"
  by (induction xs rule: rexp_of_word.induct) (simp_all add: conc_def)

lemma size_rexp_of_word [simp]: "size (rexp_of_word xs) = Suc (2 * (length xs - 1))"
  by (induction xs rule: rexp_of_word.induct) auto


text ‹Character substitution in a regular expression:›

primrec rexp_subst where
  "rexp_subst f Zero = Zero"
| "rexp_subst f One = One"
| "rexp_subst f (Atom x) = rexp_of_word (f x)"
| "rexp_subst f (Plus r s) = Plus (rexp_subst f r) (rexp_subst f s)"
| "rexp_subst f (Times r s) = Times (rexp_subst f r) (rexp_subst f s)"
| "rexp_subst f (Star r) = Star (rexp_subst f r)"

lemma lang_rexp_subst: "lang (rexp_subst f r) = subst_word f ` lang r"
  by (induction r) (simp_all add: image_Un)


text ‹Suffix language of a regular expression:›

primrec suffix_rexp :: "'a rexp  'a rexp" where
  "suffix_rexp Zero = Zero"
| "suffix_rexp One = One"
| "suffix_rexp (Atom a) = Plus (Atom a) One"
| "suffix_rexp (Plus r s) = Plus (suffix_rexp r) (suffix_rexp s)"
| "suffix_rexp (Times r s) =
    (if rexp_empty r then Zero else Plus (Times (suffix_rexp r) s) (suffix_rexp s))"
| "suffix_rexp (Star r) =
    (if rexp_empty r then One else Times (suffix_rexp r) (Star r))"

theorem lang_suffix_rexp [simp]:
  "lang (suffix_rexp r) = Suffixes (lang r)"
  by (induction r) (auto simp: rexp_empty_iff)


text ‹Prefix language of a regular expression:›

primrec prefix_rexp :: "'a rexp  'a rexp" where
  "prefix_rexp Zero = Zero"
| "prefix_rexp One = One"
| "prefix_rexp (Atom a) = Plus (Atom a) One"
| "prefix_rexp (Plus r s) = Plus (prefix_rexp r) (prefix_rexp s)"
| "prefix_rexp (Times r s) =
    (if rexp_empty s then Zero else Plus (Times r (prefix_rexp s)) (prefix_rexp r))"
| "prefix_rexp (Star r) =
    (if rexp_empty r then One else Times (Star r) (prefix_rexp r))"

theorem lang_prefix_rexp [simp]:
  "lang (prefix_rexp r) = Prefixes (lang r)"
  by (induction r) (auto simp: rexp_empty_iff)


text ‹Sub-word language of a regular expression›    

primrec sublist_rexp :: "'a rexp  'a rexp" where
  "sublist_rexp Zero = Zero"
| "sublist_rexp One = One"
| "sublist_rexp (Atom a) = Plus (Atom a) One"
| "sublist_rexp (Plus r s) = Plus (sublist_rexp r) (sublist_rexp s)"
| "sublist_rexp (Times r s) =
    (if rexp_empty r  rexp_empty s then Zero else 
       Plus (sublist_rexp r) (Plus (sublist_rexp s) (Times (suffix_rexp r) (prefix_rexp s))))"
| "sublist_rexp (Star r) =
    (if rexp_empty r then One else 
       Plus (sublist_rexp r) (Times (suffix_rexp r) (Times (Star r) (prefix_rexp r))))"

theorem lang_sublist_rexp [simp]:
  "lang (sublist_rexp r) = Sublists (lang r)"
  by (induction r) (auto simp: rexp_empty_iff Sublists_star)


text ‹Fragment language of a regular expression:›
  
primrec subseqs_rexp :: "'a rexp  'a rexp" where
  "subseqs_rexp Zero = Zero"
| "subseqs_rexp One = One"
| "subseqs_rexp (Atom x) = Plus (Atom x) One"
| "subseqs_rexp (Plus r s) = Plus (subseqs_rexp r) (subseqs_rexp s)"
| "subseqs_rexp (Times r s) = Times (subseqs_rexp r) (subseqs_rexp s)"
| "subseqs_rexp (Star r) = Star (subseqs_rexp r)"

lemma lang_subseqs_rexp [simp]: "lang (subseqs_rexp r) = Subseqs (lang r)"
  by (induction r) auto


text ‹Subword language of a regular expression›


end