Theory RegExp2NAe

(*  Author:     Tobias Nipkow
    Copyright   1998 TUM
*)

section "From regular expressions to nondeterministic automata with epsilon"

theory RegExp2NAe
imports "Regular-Sets.Regular_Exp" NAe
begin

type_synonym 'a bitsNAe = "('a,bool list)nae"

definition
 epsilon :: "'a bitsNAe" where
"epsilon = ([],λa s. {}, λs. s=[])"

definition
"atom"  :: "'a  'a bitsNAe" where
"atom a = ([True],
            λb s. if s=[True]  b=Some a then {[False]} else {},
            λs. s=[False])"

definition
 or :: "'a bitsNAe  'a bitsNAe  'a bitsNAe" where
"or = (λ(ql,dl,fl)(qr,dr,fr).
   ([],
    λa s. case s of
            []  if a=None then {True#ql,False#qr} else {}
          | left#s  if left then True ## dl a s
                              else False ## dr a s,
    λs. case s of []  False | left#s  if left then fl s else fr s))"

definition
 conc :: "'a bitsNAe  'a bitsNAe  'a bitsNAe" where
"conc = (λ(ql,dl,fl)(qr,dr,fr).
   (True#ql,
    λa s. case s of
            []  {}
          | left#s  if left then (True ## dl a s) 
                                   (if fl s  a=None then {False#qr} else {})
                              else False ## dr a s,
    λs. case s of []  False | left#s  ¬left  fr s))"

definition
 star :: "'a bitsNAe  'a bitsNAe" where
"star = (λ(q,d,f).
   ([],
    λa s. case s of
            []  if a=None then {True#q} else {}
          | left#s  if left then (True ## d a s) 
                                   (if f s  a=None then {True#q} else {})
                              else {},
    λs. case s of []  True | left#s  left  f s))"

primrec rexp2nae :: "'a rexp  'a bitsNAe" where
"rexp2nae Zero       = ([], λa s. {}, λs. False)" |
"rexp2nae One        = epsilon" |
"rexp2nae(Atom a)    = atom a" |
"rexp2nae(Plus r s)  = or   (rexp2nae r) (rexp2nae s)" |
"rexp2nae(Times r s) = conc (rexp2nae r) (rexp2nae s)" |
"rexp2nae(Star r)    = star (rexp2nae r)"

declare split_paired_all[simp]

(******************************************************)
(*                     epsilon                        *)
(******************************************************)

lemma step_epsilon[simp]: "step epsilon a = {}"
by(simp add:epsilon_def step_def)

lemma steps_epsilon: "((p,q)  steps epsilon w) = (w=[]  p=q)"
by (induct "w") auto

lemma accepts_epsilon[simp]: "accepts epsilon w = (w = [])"
by (metis (mono_tags, lifting) NAe.accepts_def epsilon_def fin_def prod.sel
      start_def steps_epsilon)

(******************************************************)
(*                       atom                         *)
(******************************************************)

lemma fin_atom: "(fin (atom a) q) = (q = [False])"
by(simp add:atom_def)

lemma start_atom: "start (atom a) = [True]"
by(simp add:atom_def)

(* Use {x. False} = {}? *)

lemma eps_atom[simp]:
 "eps(atom a) = {}"
by (simp add:atom_def step_def)

lemma in_step_atom_Some[simp]:
 "(p,q) : step (atom a) (Some b) = (p=[True]  q=[False]  b=a)"
by (simp add:atom_def step_def)

lemma False_False_in_steps_atom:
  "([False],[False])  steps (atom a) w = (w = [])"
  by (induct "w") (auto simp add: relcomp_unfold)

lemma start_fin_in_steps_atom:
  "(start (atom a), [False])  steps (atom a) w = (w = [a])"
proof (induct "w")
  case Nil
  then show ?case  
    by (simp add: start_atom rtrancl_empty)
next
  case (Cons a w)
  then show ?case 
    by (simp add: False_False_in_steps_atom relcomp_unfold start_atom)
qed

lemma accepts_atom: "accepts (atom a) w = (w = [a])"
by (simp add: accepts_def start_fin_in_steps_atom fin_atom)


(******************************************************)
(*                      or                            *)
(******************************************************)

(***** lift True/False over fin *****)

lemma fin_or_True[iff]:
 "L R. fin (or L R) (True#p) = fin L p"
by(simp add:or_def)

lemma fin_or_False[iff]:
 "L R. fin (or L R) (False#p) = fin R p"
by(simp add:or_def)

(***** lift True/False over step *****)

lemma True_in_step_or[iff]:
  "L R. (True#p,q) : step (or L R) a = (r. q = True#r  (p,r) : step L a)"
by (auto simp add:or_def step_def)

lemma False_in_step_or[iff]:
  "L R. (False#p,q) : step (or L R) a = (r. q = False#r  (p,r) : step R a)"
by (auto simp add:or_def step_def)


(***** lift True/False over epsclosure *****)

lemma lemma1a:
 "(tp,tq)  (eps(or L R))*  
 (p. tp = True#p  q. (p,q)  (eps L)*  tq = True#q)"
by (induct rule:rtrancl_induct; blast intro: rtrancl_into_rtrancl)

lemma lemma1b:
 "(tp,tq)  (eps(or L R))*  
 (p. tp = False#p  q. (p,q)  (eps R)*  tq = False#q)"
by (induct rule:rtrancl_induct; blast intro: rtrancl_into_rtrancl)

lemma lemma2a:
 "(p,q)  (eps L)*   (True#p, True#q)  (eps(or L R))*"
by (induct rule:rtrancl_induct; blast intro: rtrancl_into_rtrancl)

lemma lemma2b:
 "(p,q)  (eps R)*   (False#p, False#q)  (eps(or L R))*"
by (induct rule:rtrancl_induct; blast intro: rtrancl_into_rtrancl)

lemma True_epsclosure_or[iff]:
 "(True#p,q)  (eps(or L R))* = (r. q = True#r  (p,r)  (eps L)*)"
by (blast dest: lemma1a lemma2a)

lemma False_epsclosure_or[iff]:
 "(False#p,q)  (eps(or L R))* = (r. q = False#r  (p,r)  (eps R)*)"
by (blast dest: lemma1b lemma2b)

(***** lift True/False over steps *****)

lemma lift_True_over_steps_or[iff]:
  "p. (True#p,q):steps (or L R) w = (r. q = True # r  (p,r):steps L w)"
by (induct "w"; force)

lemma lift_False_over_steps_or[iff]:
  "p. (False#p,q):steps (or L R) w = (r. q = False#r  (p,r):steps R w)"
by (induct "w"; force)

(***** Epsilon closure of start state *****)

lemma unfold_rtrancl2:
  "R* = Id  (R O R*)"
by (metis r_comp_rtrancl_eq rtrancl_unfold)

lemma in_unfold_rtrancl2:
 "(p,q)  R* = (q = p  (r. (p,r)  R  (r,q)  R*))"
  by (metis converse_rtranclE converse_rtrancl_into_rtrancl rtrancl.simps)

lemmas [iff] = in_unfold_rtrancl2[where ?p = "start(or L R)"] for L R

lemma start_eps_or[iff]:
 "L R. (start(or L R),q)  eps(or L R) = 
       (q = True#start L  q = False#start R)"
by (simp add:or_def step_def)

lemma not_start_step_or_Some[iff]:
 "L R. (start(or L R),q)  step (or L R) (Some a)"
by (simp add:or_def step_def)

lemma steps_or:
 "((start (RegExp2NAe.or L R), q)
      NAe.steps (RegExp2NAe.or L R) w) 
    (w = []  q = start (RegExp2NAe.or L R) 
     (p. q = True # p  (start L, p)  NAe.steps L w 
          q = False # p  (start R, p)  NAe.steps R w))"
  by (cases "w"; simp; blast) 

lemma start_or_not_final[iff]:
 "L R. ¬ fin (or L R) (start(or L R))"
by (simp add:or_def)

lemma accepts_or:
  "accepts (or L R) w = (accepts L w | accepts R w)"
by (auto simp add:accepts_def steps_or)


(******************************************************)
(*                      conc                          *)
(******************************************************)

(** True/False in fin **)

lemma in_conc_True[iff]:
 "L R. fin (conc L R) (True#p) = False"
by (simp add:conc_def)

lemma fin_conc_False[iff]:
 "L R. fin (conc L R) (False#p) = fin R p"
by (simp add:conc_def)

(** True/False in step **)

lemma True_step_conc[iff]:
 "L R. (True#p,q) : step (conc L R) a = 
       ((r. q=True#r  (p,r): step L a) | 
        (fin L p  a=None  q=False#start R))"
by (simp add:conc_def step_def) (blast)

lemma False_step_conc[iff]:
 "L R. (False#p,q) : step (conc L R) a = 
       (r. q = False#r  (p,r) : step R a)"
by (simp add:conc_def step_def) (blast)

(** False in epsclosure **)

lemma lemma1b':
 "(tp,tq)  (eps(conc L R))*  
  (p. tp = False#p  q. (p,q)  (eps R)*  tq = False#q)"
by (induct rule:rtrancl_induct; blast intro: rtrancl_into_rtrancl)

lemma lemma2b':
  "(p,q)  (eps R)*  (False#p, False#q)  (eps(conc L R))*"
by (induct rule:rtrancl_induct; blast intro: rtrancl_into_rtrancl)

lemma False_epsclosure_conc[iff]:
  "((False # p, q)  (eps (conc L R))*) = (r. q = False # r  (p, r)  (eps R)*)"
by (meson lemma1b' lemma2b')

(** False in steps **)

lemma False_steps_conc[iff]:
  "p. (False#p,q) steps (conc L R) w = (r. q=False#r  (p,r) steps R w)"
by (induct "w"; force)

(** True in epsclosure **)

lemma True_True_eps_concI:
 "(p,q)  (eps L)*  (True#p,True#q)  (eps(conc L R))*"
by (induct rule:rtrancl_induct; blast intro: rtrancl_into_rtrancl)

lemma True_True_steps_concI:
  "p. (p,q)  steps L w  (True#p,True#q)  steps (conc L R) w"
proof (induct "w")
  case Nil
  then show ?case 
    by (simp add: True_True_eps_concI)
next
  case (Cons a w)
  then show ?case
    by (auto intro: True_True_eps_concI)
qed

lemma lem:
 "L R. (p,q) : step R None  (False#p, False#q) : step (conc L R) None"
by(simp add: conc_def step_def)

lemma lemma2b'':
 "(p,q)  (eps R)*  (False#p, False#q)  (eps(conc L R))*"
  by blast

lemma True_False_eps_concI:
 "L R. fin L p  (True#p, False#start R)  eps(conc L R)"
by(simp add: conc_def step_def)

lemma True_epsclosure_conc[iff]:
 "((True#p,q)  (eps(conc L R))*) = 
 ((r. (p,r)  (eps L)*  q = True#r) 
  (r. (p,r)  (eps L)*  fin L r 
        (s. (start R, s)  (eps R)*  q = False#s)))"
  (is "?lhs = ?rhs")
proof
  show "?lhs  ?rhs"
    by (induct rule:rtrancl_induct; blast intro: rtrancl_into_rtrancl)
  assume ?rhs
  then show ?lhs
  proof (elim disjE exE conjE)
    fix r
    assume "(p, r)  (eps L)*" "q = True # r"
    then show "(True # p, q)  (eps (RegExp2NAe.conc L R))*"
      by (simp add: True_True_eps_concI)
  next
    fix r s
    assume "(p, r)  (eps L)*" "fin L r" "(start R, s)  (eps R)*" "q = False # s"
    then have "(True # r, False # s)  (eps (RegExp2NAe.conc L R))*"
      by (meson False_epsclosure_conc True_step_conc in_unfold_rtrancl2)
    then show "(True # p, q)  (eps (RegExp2NAe.conc L R))*"
      by (metis True_True_eps_concI (p, r)  (eps L)* q = False # s rtrancl_trans)
  qed
qed

(** True in steps **)

lemma True_steps_concD:
 "(True#p,q)  steps (conc L R) w 
     ((r. (p,r)  steps L w  q = True#r)  
      (u v. w = u@v  (r. (p,r)  steps L u  fin L r 
              (s. (start R,s)  steps R v  q = False#s))))"
proof (induction w arbitrary: p)
  case Nil
  then show ?case by auto
next
  case (Cons a w)
  obtain u v r
    where §: "(u,v)  step (RegExp2NAe.conc L R) (Some a)"
      "(v, q)  NAe.steps (RegExp2NAe.conc L R) w"
      "(p, r)  (eps L)*"
      and "u = True # r  fin L r  (s. (start R, s)  (eps R)*  u = False # s)"
    using Cons.prems
    by simp blast
  then consider "u = True # r" | s where "fin L r" "(start R, s)  (eps R)*  u = False # s"
    by blast
  then show ?case
  proof cases
    case 1
    with § obtain r' where r': "(True#r',q)  steps (conc L R) w"
           and : "(r, r')  step L (Some a)" "v = True # r'"
      by blast
    from Cons.IH [OF r'] show ?thesis
    proof (elim exE disjE conjE)
      fix u' v' r'' s'
      assume : "w = u' @ v'" "(start R, s')  NAe.steps R v'" "q = False # s'"
        and "fin L r''" "(r', r'')  NAe.steps L u'"
      then have "r. (p, r)  (eps L)* O step L (Some a) O NAe.steps L u'  fin L r"
        using  § by blast 
      with  show ?thesis
        by (metis NAe.steps.simps(2) append_Cons) 
    qed (use 1 § Cons in auto)
  next
    case 2
    with § show ?thesis
      by fastforce
  qed
qed

lemma True_steps_conc:
 "(True#p,q)  steps (conc L R) w = 
 ((r. (p,r)  steps L w  q = True#r)  | 
  (u v. w = u@v  (r. (p,r)  steps L u  fin L r  
          (s. (start R,s)  steps R v  q = False#s))))"
by (blast dest: True_steps_concD
    intro: True_True_steps_concI in_steps_epsclosure)

(** starting from the start **)

lemma start_conc:
  "L R. start(conc L R) = True#start L"
by (simp add: conc_def)

lemma final_conc:
 "L R. fin(conc L R) p = (s. p = False#s  fin R s)"
by (simp add:conc_def split: list.split)

lemma accepts_conc:
 "accepts (conc L R) w = (u v. w = u@v  accepts L u  accepts R v)"
by (fastforce simp add: accepts_def True_steps_conc final_conc start_conc)

(******************************************************)
(*                       star                         *)
(******************************************************)

lemma True_in_eps_star[iff]:
 "A. (True#p,q)  eps(star A) = 
     ( (r. q = True#r  (p,r)  eps A)  (fin A p  q = True#start A) )"
by (simp add:star_def step_def) (blast)

lemma True_True_step_starI:
  "A. (p,q) : step A a  (True#p, True#q) : step (star A) a"
by (simp add:star_def step_def)

lemma True_True_eps_starI:
  "(p,r)  (eps A)*  (True#p, True#r)  (eps(star A))*"
proof (induct rule: rtrancl_induct)
  case base
  then show ?case by blast
next
  case (step y z)
  then show ?case by (blast intro: True_True_step_starI rtrancl_into_rtrancl)
qed

lemma True_start_eps_starI:
 "A. fin A p  (True#p,True#start A)  eps(star A)"
by (simp add:star_def step_def)

lemma True_eps_star[iff]:
 "((True#p,s)  (eps(star A))*) = 
 (r. ((p,r)  (eps A)* 
        (q. (p,q)  (eps A)*  fin A q  (start A,r)  (eps A)*)) 
       s = True#r)"
  (is "?lhs = ?rhs")
proof
  show "?lhs  ?rhs"
  proof (induct rule: rtrancl_induct)
    case base
    then show ?case
      by blast
  next
    case (step y z)
    then show ?case
      by (meson True_in_eps_star rtrancl.rtrancl_into_rtrancl rtrancl.rtrancl_refl)
  qed
  show "?rhs  ?lhs"
  by (metis (no_types, opaque_lifting) rtrancl_trans  True_start_eps_starI True_True_eps_starI converse_rtrancl_into_rtrancl)
qed

(** True in step Some **)

lemma True_step_star[iff]:
 "A. (True#p,r)  step (star A) (Some a) =
     (q. (p,q)  step A (Some a)  r=True#q)"
by (simp add:star_def step_def) (blast)


(** True in steps **)

(* reverse list induction! Complicates matters for conc? *)
lemma True_start_steps_starD:
  "(True#start A,rr)  steps (star A) w 
     (us v. w = concat us @ v 
             (uset us. accepts A u) 
             (r. (start A,r)  steps A v  rr = True#r))"
proof (induction w arbitrary: rr rule: rev_induct)
  case Nil
  then show ?case
    using split_list_cycles by fastforce
next
  case (snoc x xs)
  then obtain u v 
    where §: "(v, rr)  (eps (RegExp2NAe.star A))*"
        "(True # start A, u)  NAe.steps (RegExp2NAe.star A) xs"
        "(u, v)  step (RegExp2NAe.star A) (Some x)"
    by (auto simp: O_assoc[symmetric] epsclosure_steps)
  then obtain uss vs r 
    where  "xs = concat uss @ vs" "(aset uss. NAe.accepts A a)" 
           "(start A, r)  NAe.steps A vs"  "u = True # r"
    using snoc.IH by meson
  with § show ?case
    apply (clarify; elim disjE exE)
     apply (rule_tac x = "uss" in exI)
     apply (rule_tac x = "vs@[x]" in exI)
     apply (fastforce simp add: O_assoc[symmetric] epsclosure_steps)
    apply (rule_tac x = "uss@[vs@[x]]" in exI)
    apply (rule_tac x = "[]" in exI)
    apply (fastforce simp add: accepts_def)
    done
qed

lemma True_True_steps_starI:
  "p. (p,q)  steps A w  (True#p,True#q)  steps (star A) w"
by (induct "w") (auto intro: True_True_eps_starI True_True_step_starI)

lemma steps_star_cycle:
 "(u  set us. accepts A u) 
 (True#start A,True#start A)  steps (star A) (concat us)"
proof (induct "us")
  case Nil
  then show ?case
    by simp
next
  case (Cons a us)
  then have "(True # start A, True # start A)  NAe.steps (RegExp2NAe.star A) (concat us)"
    by auto
  moreover
  obtain q where "(start A, q)  NAe.steps A a" "fin A q"
    by (meson Cons.prems NAe.accepts_def list.set_intros(1))
  ultimately 
    have "(True # start A, True # start A)
               NAe.steps (RegExp2NAe.star A) a O NAe.steps (RegExp2NAe.star A) (concat us)"
    by (meson True_True_steps_starI True_start_eps_starI in_epsclosure_steps r_into_rtrancl
        relcomp.relcompI)
  then show ?case
    by (auto simp: accepts_def)
qed

(* Better stated directly with start(star A)? Loop in star A back to start(star A)?*)
lemma True_start_steps_star:
 "(True#start A,rr)  steps (star A) w = 
 (us v. w = concat us @ v 
             (uset us. accepts A u) 
             (r. (start A,r)  steps A v  rr = True#r))"
  (is "?lhs = ?rhs")
proof
  show "?lhs  ?rhs"
    by (simp add: True_start_steps_starD)
  show "?rhs  ?lhs"
    by (blast intro: steps_star_cycle True_True_steps_starI)
qed

(** the start state **)

lemma start_step_star[iff]:
  "A. (start(star A),r) : step (star A) a = (a=None  r = True#start A)"
by (simp add:star_def step_def)

lemmas epsclosure_start_step_star =
  in_unfold_rtrancl2[where ?p = "start (star A)"] for A

lemma start_steps_star:
  "(start(star A),r)  steps (star A) w 
   ((w=[]  r = start(star A)) | (True#start A,r)  steps (star A) w)"
  (is "?lhs = ?rhs")
proof
  assume L: ?lhs
  show ?rhs
  proof (cases w)
    case Nil
    with L show ?thesis 
      by (simp add: epsclosure_start_step_star)
  next
    case (Cons u us)
    with L show ?thesis
      by (clarsimp simp add: epsclosure_start_step_star) blast
  qed
next 
  have ?lhs if "(True # start A, r)  NAe.steps (RegExp2NAe.star A) w"
    by (meson in_steps_epsclosure r_into_rtrancl start_step_star that)
  then show "?rhs  ?lhs" by auto
qed

lemma fin_star_True[iff]: "A. fin (star A) (True#p) = fin A p"
by (simp add:star_def)

lemma fin_star_start[iff]: "A. fin (star A) (start(star A))"
by (simp add:star_def)

(* too complex! Simpler if loop back to start(star A)? *)
lemma accepts_star:
 "accepts (star A) w  (us. (u  set(us). accepts A u)  (w = concat us))"
  (is "?lhs = ?rhs")
proof
  assume ?lhs
  then obtain q where "(start (RegExp2NAe.star A), q)  NAe.steps (RegExp2NAe.star A) w"
                 and qfin: "fin (RegExp2NAe.star A) q"
    by (auto simp: accepts_def)
  then consider "w = []" "q = start (RegExp2NAe.star A)" | "(True # start A, q)  NAe.steps (RegExp2NAe.star A) w"
    by (auto simp add: start_steps_star)
  then show ?rhs
  proof cases
    case 1
    then show ?thesis
      using split_list_first by fastforce
  next
    case 2
    with qfin obtain us v r where §: "w = concat us @ v" 
           "uset us. NAe.accepts A u" "(start A, r)  NAe.steps A v" "fin A r"
      using True_start_steps_starD qfin by blast
    have "concat us @ v = concat (us @ [v])"
      by auto
    with 2 §
     show ?thesis
       by (metis NAe.accepts_def rotate1.simps(2) set_ConsD set_rotate1)
  qed
next
  assume ?rhs
  then obtain us where us: "uset us. q. (start A, q)  NAe.steps A u  fin A q"
           "w = concat us"
    using NAe.accepts_def by blast
  show ?lhs
  proof (cases us rule: rev_exhaust)
    case Nil
    with us show ?thesis
      by (force simp: NAe.accepts_def)
  next
    case (snoc ys y)
    with us show ?thesis
      apply simp
      by (metis NAe.accepts_def NAe.steps_append True_start_steps_star fin_star_True
          start_steps_star)
  qed
qed


(***** Correctness of r2n *****)

lemma accepts_rexp2nae:
 "w. accepts (rexp2nae r) w = (w : lang r)"
proof (induct "r")
  case Zero
  then show ?case
    by (simp add: accepts_def)
next
  case (Times r1 r2)
  then show ?case
    by (metis accepts_conc concE concI lang.simps(5) rexp2nae.simps(5))
next
  case (Star r)
  then show ?case
    by (simp add: accepts_star in_star_iff_concat subset_iff Ball_def)
qed (auto simp add: accepts_atom accepts_or)

end