Theory Lexer3

(*  Title:       POSIX Lexing with Derivatives of Extended Regular Expressions
    Author:      Christian Urban <christian.urban at kcl.ac.uk>, 2022
    Maintainer:  Christian Urban <christian.urban at kcl.ac.uk>
*) 

theory Lexer3
  imports "Derivatives3"
begin

section ‹Values›

datatype 'a val = 
  Void
| Atm 'a
| Seq "'a val" "'a val"
| Right "'a val"
| Left "'a val"
| Stars "('a val) list"
| Recv string "'a val"


section ‹The string behind a value›

fun 
  flat :: "'a val  'a list"
where
  "flat (Void) = []"
| "flat (Atm c) = [c]"
| "flat (Left v) = flat v"
| "flat (Right v) = flat v"
| "flat (Seq v1 v2) = (flat v1) @ (flat v2)"
| "flat (Stars []) = []"
| "flat (Stars (v#vs)) = (flat v) @ (flat (Stars vs))" 
| "flat (Recv l v) = flat v"

abbreviation
  "flats vs  concat (map flat vs)"

lemma flat_Stars [simp]:
 "flat (Stars vs) = concat (map flat vs)"
  by (induct vs) (auto)

lemma flats_empty:
  assumes "(vset vs. flat v = [])"
  shows "flats vs = []"
using assms
by(induct vs) (simp_all)

section ‹Relation between values and regular expressions›

inductive 
  Prf :: "'a val  'a rexp  bool" ( _ : _› [100, 100] 100)
where
 " v1 : r1;  v2 : r2   Seq v1 v2 : Times r1 r2"
| " v1 : r1   Left v1 : Plus r1 r2"
| " v2 : r2   Right v2 : Plus r1 r2"
| " Void : One"
| " Atm c : Atom c"
| "v  set vs.  v : r  flat v  []   Stars vs : Star r"
| "v  set vs1.  v : r  flat v  []; 
    v  set vs2.  v : r  flat v = []; 
    length (vs1 @ vs2) = n   Stars (vs1 @ vs2) : NTimes r n"
| "v  set vs.  v : r  flat v  []; length vs  n   Stars vs : Upto r n"
| "v  set vs1.  v : r   flat v  []; 
    v  set vs2.  v : r  flat v = []; 
    length (vs1 @ vs2) = n   Stars (vs1 @ vs2) : From r n"
| "v  set vs.  v : r   flat v  []; length vs > n   Stars vs : From r n"
| " v : r   Recv l v : Rec l r"
| "c  cs   Atm c : Charset cs"

inductive_cases Prf_elims:
  " v : Zero"
  " v : Times r1 r2"
  " v : Plus r1 r2"
  " v : One"
  " v : Atom c"
  " v : Star r"
  " v : NTimes r n"
  " v : Upto r n"
  " v : From r n"
  " v : Rec l r"
  " v : Charset cs"

lemma Prf_NTimes_empty:
  assumes "v  set vs.  v : r  flat v = []" 
  and     "length vs = n"
  shows " Stars vs : NTimes r n"
  using assms
  by (metis Prf.intros(7) empty_iff eq_Nil_appendI list.set(1))
  

lemma Times_decomp:
  assumes "s  A @@ B"
  shows "s1 s2. s = s1 @ s2  s1  A  s2  B"
  using assms
  by blast

lemma pow_string:
  assumes "s  A ^^ n"
  shows "ss. concat ss = s  (s  set ss. s  A)  length ss = n"
using assms
  apply(induct n arbitrary: s)
  apply(auto dest!: Times_decomp)
  apply(drule_tac x="s2" in meta_spec)
  apply(auto)
  apply(rule_tac x="s1 # ss" in exI)
  apply(simp)
  done

lemma pow_Prf:
  assumes "vset vs.  v : r  flat v  A"
  shows "flats vs  A ^^ (length vs)"
  using assms
  by (induct vs) (auto)

lemma Star_string:
  assumes "s  star A"
  shows "ss. concat ss = s  (s  set ss. s  A)"
using assms
by (metis in_star_iff_concat subsetD)

lemma Star_val:
  assumes "sset ss. v. s = flat v   v : r"
  shows "vs. flats vs = concat ss  (vset vs.  v : r  flat v  [])"
using assms
apply(induct ss)
apply(auto)
apply (metis empty_iff list.set(1))
by (metis append.simps(1) flat.simps(7) flat_Stars set_ConsD)

lemma Aux:
  assumes "sset ss. s = []"
  shows "concat ss = []"
using assms
by (induct ss) (auto)

lemma pow_cstring:
  assumes "s  A ^^ n"
  shows "ss1 ss2. concat (ss1 @ ss2) = s  length (ss1 @ ss2) = n  
         (s  set ss1. s  A  s  [])  (s  set ss2. s  A  s = [])"
using assms
apply(induct n arbitrary: s)
  apply(auto)[1]
  apply(auto dest!: Times_decomp simp add: Seq_def)
  apply(drule_tac x="s2" in meta_spec)
  apply(simp)
apply(erule exE)+
  apply(clarify)
apply(case_tac "s1 = []")
apply(simp)
apply(rule_tac x="ss1" in exI)
apply(rule_tac x="s1 # ss2" in exI)
apply(simp)
apply(rule_tac x="s1 # ss1" in exI)
apply(rule_tac x="ss2" in exI)
  apply(simp)
  done

lemma flats_cval:
  assumes "sset ss. v. s = flat v   v : r"
  shows "vs1 vs2. flats vs1 = concat ss  length (vs1 @ vs2) = length ss  
          (vset vs1.  v : r  flat v  []) 
          (vset vs2.  v : r  flat v = [])"
using assms
apply(induct ss rule: rev_induct)
apply(rule_tac x="[]" in exI)+
apply(simp)
apply(simp)
  apply(clarify)
  apply(case_tac "flat v = []")
  apply(rule_tac x="vs1" in exI)
  apply(simp)
apply(rule_tac x="v#vs2" in exI)
apply(simp)
  apply(rule_tac x="vs1 @ [v]" in exI)
  apply(simp)
apply(rule_tac x="vs2" in exI)
apply(simp)
  done

lemma flats_cval2:
  assumes "sset ss. v. s = flat v   v : r"
  shows "vs. flats vs = concat ss  length vs  length ss  (vset vs.  v : r  flat v  [])"
  using assms
  apply -
  apply(drule flats_cval)
  apply(auto)
  done


lemma Prf_flat_lang:
  assumes " v : r" shows "flat v  lang r"
using assms
  apply(induct v r rule: Prf.induct) 
  apply(auto simp add: concat_in_star subset_eq lang_pow_add)
  apply(meson concI pow_Prf)
  apply(meson atMost_iff pow_Prf)
  apply(subgoal_tac "flats vs1 @ flats vs2  lang r ^^ length vs1")
  apply (metis add_diff_cancel_left' atLeast_iff diff_is_0_eq empty_pow_add last_in_set length_0_conv order_refl)
  apply (metis (no_types, opaque_lifting) Aux imageE list.set_map pow_Prf self_append_conv)
  apply (meson atLeast_iff less_imp_le_nat pow_Prf)
  done

lemma L_flat_Prf2:
  assumes "s  lang r" 
  shows "v.  v : r  flat v = s"
using assms
proof(induct r arbitrary: s)
  case (Star r s)
  have IH: "s. s  lang r  v. v : r  flat v = s" by fact
  have "s  lang (Star r)" by fact
  then obtain ss where "concat ss = s" "s  set ss. s  lang r  s  []"
    by (smt (z3) IH Prf_flat_lang Star_val imageE in_star_iff_concat lang.simps(6) list.set_map subset_iff)  
  then obtain vs where "flats vs = s" "vset vs.  v : r  flat v  []"
  using IH by (metis Star_val) 
  then show "v.  v : Star r  flat v = s"
  using Prf.intros(6) flat_Stars by blast
next 
  case (Times r1 r2 s)
  then show "v.  v : Times r1 r2  flat v = s"
  unfolding Seq_def lang.simps by (fastforce intro: Prf.intros)
next
  case (Plus r1 r2 s)
  then show "v.  v : Plus r1 r2  flat v = s"
  unfolding lang.simps by (fastforce intro: Prf.intros)
next
  case (NTimes r n)
  have IH: "s. s  lang r  v.  v : r  flat v = s" by fact
  have "s  lang (NTimes r n)" by fact
  then obtain ss1 ss2 where "concat (ss1 @ ss2) = s" "length (ss1 @ ss2) = n" 
    "s  set ss1. s  lang r  s  []" "s  set ss2. s  lang r  s = []"
  using pow_cstring by force
  then obtain vs1 vs2 where "flats (vs1 @ vs2) = s" "length (vs1 @ vs2) = n" 
      "vset vs1.  v : r  flat v  []" "vset vs2.  v : r  flat v = []"
    using IH flats_cval  
  apply -
  apply(drule_tac x="ss1 @ ss2" in meta_spec)
  apply(drule_tac x="r" in meta_spec)
  apply(drule meta_mp)
  apply(simp)
  apply (metis Un_iff)
  apply(clarify)
  apply(drule_tac x="vs1" in meta_spec)
  apply(drule_tac x="vs2" in meta_spec)
  apply(simp)
  done
  then show "v.  v : NTimes r n  flat v = s"
    using Prf.intros(7) flat_Stars by blast
next
  case (Upto r n)
  have IH: "s. s  lang r  v. v : r  flat v = s" by fact
  have "s  lang (Upto r n)" by fact
  then obtain ss where "concat ss = s" "s  set ss. s  lang r  s  []" "length ss  n"
    apply(auto)
    by (smt (verit) Nil_eq_concat_conv pow_cstring concat_append le0 le_add_same_cancel1 le_trans length_append self_append_conv)    
  then obtain vs where "flats vs = s" "vset vs.  v : r  flat v  []" "length vs  n"
  using IH flats_cval2
  by (smt (verit, best) le_trans) 
  then show "v.  v : Upto r n  flat v = s"
    by (meson Prf.intros(8) flat_Stars) 
next
  case (From r n)
  have IH: "s. s  lang r  v.  v : r  flat v = s" by fact
  have "s  lang (From r n)" by fact
  then obtain ss1 ss2 k where "concat (ss1 @ ss2) = s" "length (ss1 @ ss2) = k"  "n  k"
    "s  set ss1. s  lang r  s  []" "s  set ss2. s  lang r  s = []"
    using pow_cstring by force 
  then obtain vs1 vs2 where "flats (vs1 @ vs2) = s" "length (vs1 @ vs2) = k" "n  k"
      "vset vs1.  v : r  flat v  []" "vset vs2.  v : r  flat v = []"
    using IH flats_cval  
  apply -
  apply(drule_tac x="ss1 @ ss2" in meta_spec)
  apply(drule_tac x="r" in meta_spec)
  apply(drule meta_mp)
  apply(simp)
  apply (metis Un_iff)
  apply(clarify)
  apply(drule_tac x="vs1" in meta_spec)
  apply(drule_tac x="vs2" in meta_spec)
  apply(simp)
  done
  then show "v.  v : From r n  flat v = s"
    apply(case_tac "length vs1  n")
    apply(rule_tac x="Stars (vs1 @ take (n - length vs1) vs2)" in exI)
     apply(simp)
     apply(subgoal_tac "flats (take (n - length vs1) vs2) = []")
      apply(auto)
       apply(rule Prf.intros(9))
       apply(auto)
    apply (meson in_set_takeD)
    apply (simp add: Aux)
    apply (meson in_set_takeD)
    apply(rule_tac x="Stars vs1" in exI)
    by (simp add: Prf.intros(10))
next
  case (Rec l r)
  then show ?case apply(auto)
    using Prf.intros(11) flat.simps(8) by blast
qed (auto intro: Prf.intros)

lemma L_flat_Prf:
  "lang r = {flat v | v.  v : r}"
  using L_flat_Prf2 Prf_flat_lang by blast


section ‹Sulzmann and Lu functions›

fun 
  mkeps :: "'a rexp  'a val"
where
  "mkeps(One) = Void"
| "mkeps(Times r1 r2) = Seq (mkeps r1) (mkeps r2)"
| "mkeps(Plus r1 r2) = (if nullable(r1) then Left (mkeps r1) else Right (mkeps r2))"
| "mkeps(Star r) = Stars []"
| "mkeps(Upto r n) = Stars []"
| "mkeps(NTimes r n) = Stars (replicate n (mkeps r))"
| "mkeps(From r n) = Stars (replicate n (mkeps r))"
| "mkeps(Rec l r) = Recv l (mkeps r)"

fun injval :: "'a rexp  'a  'a val  'a val"
where
  "injval (Atom d) c Void = Atm c"
| "injval (Plus r1 r2) c (Left v1) = Left(injval r1 c v1)"
| "injval (Plus r1 r2) c (Right v2) = Right(injval r2 c v2)"
| "injval (Times r1 r2) c (Seq v1 v2) = Seq (injval r1 c v1) v2"
| "injval (Times r1 r2) c (Left (Seq v1 v2)) = Seq (injval r1 c v1) v2"
| "injval (Times r1 r2) c (Right v2) = Seq (mkeps r1) (injval r2 c v2)"
| "injval (Star r) c (Seq v (Stars vs)) = Stars ((injval r c v) # vs)" 
| "injval (NTimes r n) c (Seq v (Stars vs)) = Stars ((injval r c v) # vs)" 
| "injval (Upto r n) c (Seq v (Stars vs)) = Stars ((injval r c v) # vs)" 
| "injval (From r n) c (Seq v (Stars vs)) = Stars ((injval r c v) # vs)"
| "injval (Rec l r) c v = Recv l (injval r c v)"
| "injval (Charset cs) c Void = Atm c"

section ‹Mkeps, injval›

lemma mkeps_flat:
  assumes "nullable(r)" 
  shows "flat (mkeps r) = []"
using assms
  by (induct rule: mkeps.induct) (auto)

lemma mkeps_nullable:
  assumes "nullable r" 
  shows " mkeps r : r"
using assms
  apply (induct r) 
  apply (auto intro: Prf.intros split: if_splits)
  apply (metis Prf.intros(7) append_Nil2 in_set_replicate list.size(3) replicate_0)
  apply(rule Prf_NTimes_empty)
  apply(auto simp add: mkeps_flat)
  apply (metis Prf.intros(9) append_Nil empty_iff list.set(1) list.size(3))
  by (metis Prf.intros(9) append_Nil empty_iff in_set_replicate length_replicate list.set(1) mkeps_flat)

lemma Prf_injval_flat:
  assumes " v : deriv c r" 
  shows "flat (injval r c v) = c # (flat v)"
using assms
apply(induct c r arbitrary: v rule: deriv.induct)
apply(auto elim!: Prf_elims intro: mkeps_flat split: if_splits)
done

lemma Prf_injval:
  assumes " v : deriv c r" 
  shows " (injval r c v) : r"
using assms
apply(induct r arbitrary: c v rule: rexp.induct)
apply(auto intro!: Prf.intros mkeps_nullable elim!: Prf_elims simp add: Prf_injval_flat split: if_splits)[7]
(* NTimes *)
apply(case_tac x2)
apply(simp)
apply(simp)
apply(subst append.simps(2)[symmetric])
apply(rule Prf.intros)
apply(auto simp add: Prf_injval_flat)[4]
(* Upto *)
apply(case_tac x2)
apply(simp)
using Prf_elims(1) apply blast
apply(simp)
apply(erule Prf_elims)
apply(erule Prf_elims(8))
apply(simp)
apply(rule Prf.intros(8))
apply(auto simp add: Prf_injval_flat)[2]  
(* From *)
apply(simp)  
apply(case_tac x2)
apply(simp)
apply(erule Prf_elims)
apply(simp)
apply(erule Prf_elims(6))
apply(simp)
apply (simp add: Prf.intros(10) Prf_injval_flat)
apply(simp)
apply(erule Prf_elims)
apply(simp)
apply(erule Prf_elims(9))
apply(simp)
apply (smt (verit, best) Cons_eq_appendI Prf.intros(9) Prf_injval_flat length_Cons length_append list.discI set_ConsD)
apply(simp add: Prf.intros(10) Prf_injval_flat)
apply(simp add: Prf.intros(11))
by (metis Prf.intros(12) Prf_elims(1) Prf_elims(4) deriv.simps(11) injval.simps(12))


section ‹Our Alternative Posix definition›

inductive 
  Posix :: "'a list  'a rexp  'a val  bool" (‹_  _  _› [100, 100, 100] 100)
where
  Posix_One: "[]  One  Void"
| Posix_Atom: "[c]  (Atom c)  (Atm c)"
| Posix_Plus1: "s  r1  v  s  (Plus r1 r2)  (Left v)"
| Posix_Plus2: "s  r2  v; s  lang r1  s  (Plus r1 r2)  (Right v)"
| Posix_Times: "s1  r1  v1; s2  r2  v2;
    ¬(s3 s4. s3  []  s3 @ s4 = s2  (s1 @ s3)  lang r1  s4  lang r2)  
    (s1 @ s2)  (Times r1 r2)  (Seq v1 v2)"
| Posix_Star1: "s1  r  v; s2  Star r  Stars vs; flat v  [];
    ¬(s3 s4. s3  []  s3 @ s4 = s2  (s1 @ s3)  lang r  s4  lang (Star r))
     (s1 @ s2)  Star r  Stars (v # vs)"
| Posix_Star2: "[]  Star r  Stars []"
| Posix_NTimes1: "s1  r  v; s2  NTimes r n  Stars vs; flat v  []; 
    ¬(s3 s4. s3  []  s3 @ s4 = s2  (s1 @ s3)  lang r  s4  lang (NTimes r n))
     (s1 @ s2)  NTimes r (n + 1)  Stars (v # vs)"
| Posix_NTimes2: "v  set vs. []  r  v; length vs = n
     []  NTimes r n  Stars vs" 
| Posix_Upto1: "s1  r  v; s2  Upto r n  Stars vs; flat v  [];
    ¬(s3 s4. s3  []  s3 @ s4 = s2  (s1 @ s3)  lang r  s4  lang (Upto r n))
     (s1 @ s2)  Upto r (n + 1)  Stars (v # vs)"
| Posix_Upto2: "[]  Upto r n  Stars []"
| Posix_From2: "v  set vs. []  r  v; length vs = n
     []  From r n  Stars vs"
| Posix_From1: "s1  r  v; s2  From r (n - 1)  Stars vs; flat v  []; 0 < n;
    ¬(s3 s4. s3  []  s3 @ s4 = s2  (s1 @ s3)  lang r  s4  lang (From r (n - 1)))
     (s1 @ s2)  From r n  Stars (v # vs)"  
| Posix_From3: "s1  r  v; s2  Star r  Stars vs; flat v  [];
    ¬(s3 s4. s3  []  s3 @ s4 = s2  (s1 @ s3)  lang r  s4  lang (Star r))
     (s1 @ s2)  From r 0  Stars (v # vs)"  
| Posix_Rec: "s  r  v  s  (Rec l r)  (Recv l v)"
| Posix_Cset: "c  cs  [c]  (Charset cs)  (Atm c)"

inductive_cases Posix_elims:
  "s  Zero  v"
  "s  One  v"
  "s  Atom c  v"
  "s  Plus r1 r2  v"
  "s  Times r1 r2  v"
  "s  Star r  v"
  "s  NTimes r n  v"
  "s  Upto r n  v"
  "s  From r n  v"
  "s  Rec l r  v"
  "s  Charset cs  v"

lemma Posix1:
  assumes "s  r  v"
  shows "s  lang r" "flat v = s"
using assms
  apply (induct s r v rule: Posix.induct) 
  apply(auto simp add: pow_empty_iff)
  apply (meson ex_in_conv set_empty)
  apply(metis Suc_pred atMost_iff concI lang_pow.simps(2) not_less_eq_eq)
  apply (meson atLeast_iff dual_order.refl in_set_conv_nth)
  apply (metis Suc_le_mono Suc_pred atLeast_iff concI lang_pow.simps(2))
  by (simp add: star_pow)
  

lemma Posix1a:
  assumes "s  r  v"
  shows " v : r"
using assms
  apply(induct s r v rule: Posix.induct)
  apply(auto intro: Prf.intros)
  apply (metis Prf.intros(6) Prf_elims(6) set_ConsD val.inject(5))
  prefer 2
  using Posix1(2) Prf_NTimes_empty apply blast
  apply(erule Prf_elims)
  apply(auto)
  apply(subst append.simps(2)[symmetric])
  apply(rule Prf.intros)
  apply(auto)
  apply (metis (no_types, lifting) Prf.intros(8) Prf_elims(8) Suc_le_mono length_Cons set_ConsD val.inject(5))
  apply (metis Posix1(2) Prf.intros(9) append_Nil empty_iff list.set(1))
  apply(erule Prf_elims)
  apply(auto)
  apply (smt (verit, best) Cons_eq_appendI Prf.intros(9) Suc_pred length_Cons length_append set_ConsD)
  apply (simp add: Prf.intros(10))
  apply(erule Prf_elims)
  apply(auto)
  by (simp add: Prf.intros(10))
      

lemma Posix_mkeps:
  assumes "nullable r"
  shows "[]  r  mkeps r"
using assms
apply(induct r)
apply(auto intro: Posix.intros simp add: nullable_iff)
apply(subst append.simps(1)[symmetric])
apply(rule Posix.intros)
apply(auto)
apply(simp add: Posix_NTimes2 pow_empty_iff)
apply(simp add: Posix_From2 pow_empty_iff)
done

lemma List_eq_zipI:
  assumes "(v1, v2)  set (zip vs1 vs2). v1 = v2" 
  and "length vs1 = length vs2"
  shows "vs1 = vs2"  
 using assms
  apply(induct vs1 arbitrary: vs2)
   apply(case_tac vs2)
   apply(simp)    
   apply(simp)
   apply(case_tac vs2)
   apply(simp)
  apply(simp)
done  


text ‹Our Posix definition determines a unique value.›

lemma Posix_determ:
  assumes "s  r  v1" "s  r  v2"
  shows "v1 = v2"
using assms
proof (induct s r v1 arbitrary: v2 rule: Posix.induct)
  case (Posix_One v2)
  have "[]  One  v2" by fact
  then show "Void = v2" by cases auto
next 
  case (Posix_Atom c v2)
  have "[c]  Atom c  v2" by fact
  then show "Atm c = v2" by cases auto
next 
  case (Posix_Plus1 s r1 v r2 v2)
  have "s  Plus r1 r2  v2" by fact
  moreover
  have "s  r1  v" by fact
  then have "s  lang r1" by (simp add: Posix1)
  ultimately obtain v' where eq: "v2 = Left v'" "s  r1  v'" by cases auto 
  moreover
  have IH: "v2. s  r1  v2  v = v2" by fact
  ultimately have "v = v'" by simp
  then show "Left v = v2" using eq by simp
next 
  case (Posix_Plus2 s r2 v r1 v2)
  have "s  Plus r1 r2  v2" by fact
  moreover
  have "s  lang r1" by fact
  ultimately obtain v' where eq: "v2 = Right v'" "s  r2  v'" 
    by cases (auto simp add: Posix1) 
  moreover
  have IH: "v2. s  r2  v2  v = v2" by fact
  ultimately have "v = v'" by simp
  then show "Right v = v2" using eq by simp
next
  case (Posix_Times s1 r1 v1 s2 r2 v2 v')
  have "(s1 @ s2)  Times r1 r2  v'" 
       "s1  r1  v1" "s2  r2  v2"
       "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang r1  s4  lang r2)" by fact+
  then obtain v1' v2' where "v' = Seq v1' v2'" "s1  r1  v1'" "s2  r2  v2'"
  apply(cases) apply (auto simp add: append_eq_append_conv2)
  using Posix1(1) by fastforce+
  moreover
  have IHs: "v1'. s1  r1  v1'  v1 = v1'"
            "v2'. s2  r2  v2'  v2 = v2'" by fact+
  ultimately show "Seq v1 v2 = v'" by simp
next
  case (Posix_Star1 s1 r v s2 vs v2)
  have "(s1 @ s2)  Star r  v2" 
       "s1  r  v" "s2  Star r  Stars vs" "flat v  []"
       "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang r  s4  lang (Star r))" by fact+
  then obtain v' vs' where "v2 = Stars (v' # vs')" "s1  r  v'" "s2  (Star r)  (Stars vs')"
  apply(cases) apply (auto simp add: append_eq_append_conv2)
  using Posix1(1) apply fastforce
  apply (metis Posix1(1) Posix_Star1.hyps(6) append_Nil append_Nil2)
  using Posix1(2) by blast
  moreover
  have IHs: "v2. s1  r  v2  v = v2"
            "v2. s2  Star r  v2  Stars vs = v2" by fact+
  ultimately show "Stars (v # vs) = v2" by auto
next
  case (Posix_Star2 r v2)
  have "[]  Star r  v2" by fact
  then show "Stars [] = v2" by cases (auto simp add: Posix1)
next
  case (Posix_NTimes2 vs r n v2)
  then show "Stars vs = v2"
    apply(erule_tac Posix_elims)
     apply(auto)
     apply (simp add: Posix1(2))    
    apply(rule List_eq_zipI)
     apply(auto)
    by (meson in_set_zipE)
next
  case (Posix_NTimes1 s1 r v s2 n vs)
  have "(s1 @ s2)  NTimes r (n + 1)  v2" 
       "s1  r  v" "s2  NTimes r n  Stars vs" "flat v  []"
       "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang r  s4  lang (NTimes r n))" by fact+
  then obtain v' vs' where "v2 = Stars (v' # vs')" "s1  r  v'" "s2  (NTimes r n)  (Stars vs')"
  apply(cases) apply (auto simp add: append_eq_append_conv2)
    using Posix1(1) apply fastforce
    apply (metis Posix1(1) Posix_NTimes1.hyps(6) append.right_neutral append_Nil)
  using Posix1(2) by blast
  moreover
  have IHs: "v2. s1  r  v2  v = v2"
            "v2. s2  NTimes r n  v2  Stars vs = v2" by fact+
  ultimately show "Stars (v # vs) = v2" by auto
next
  case (Posix_Upto1 s1 r v s2 n vs)
  have "(s1 @ s2)  Upto r (n + 1)  v2" 
       "s1  r  v" "s2  Upto r n  Stars vs" "flat v  []"
       "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang r  s4  lang (Upto r n))" by fact+
  then obtain v' vs' where "v2 = Stars (v' # vs')" "s1  r  v'" "s2  (Upto r n)  (Stars vs')"
    apply(cases) apply (auto simp add: append_eq_append_conv2)
    using Posix1(1) apply fastforce
    apply (metis Posix1(1) Posix_Upto1.hyps(6) append.right_neutral append_Nil)
  using Posix1(2) by blast
  moreover
  have IHs: "v2. s1  r  v2  v = v2"
            "v2. s2  Upto r n  v2  Stars vs = v2" by fact+
  ultimately show "Stars (v # vs) = v2" by auto
next
  case (Posix_Upto2 r n)
  have "[]  Upto r n  v2" by fact
  then show "Stars [] = v2" by cases (auto simp add: Posix1)
next
  case (Posix_From2 vs r n v2)
  then show "Stars vs = v2"
    apply(erule_tac Posix_elims)
     apply(auto)
    apply(rule List_eq_zipI)
     apply(auto)
      apply(meson in_set_zipE)
     apply (simp add: Posix1(2))
    using Posix1(2) by blast
next
  case (Posix_From1 s1 r v s2 n vs)
  have "(s1 @ s2)  From r n  v2" 
       "s1  r  v" "s2  From r (n - 1)  Stars vs" "flat v  []" "0 < n"
       "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang r  s4  lang (From r (n - 1 )))" by fact+
  then obtain v' vs' where "v2 = Stars (v' # vs')" "s1  r  v'" "s2  (From r (n - 1))  (Stars vs')"
  apply(cases) apply (auto simp add: append_eq_append_conv2)
    using Posix1(1) Posix1(2) apply blast 
     apply(case_tac n)
      apply(simp)
     apply(simp)
    apply (smt (verit, ccfv_threshold) Posix1(1) UN_E append_eq_append_conv2 lang.simps(9))
    by (metis One_nat_def Posix1(1) Posix_From1.hyps(7) append_Nil2 append_self_conv2)
  moreover
  have IHs: "v2. s1  r  v2  v = v2"
            "v2. s2  From r (n - 1)  v2  Stars vs = v2" by fact+
  ultimately show "Stars (v # vs) = v2" by auto
next
  case (Posix_From3 s1 r v s2 vs)
  have "(s1 @ s2)  From r 0  v2" 
       "s1  r  v" "s2  Star r  Stars vs" "flat v  []"
       "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang r  s4  lang (Star r))" by fact+
  then obtain v' vs' where "v2 = Stars (v' # vs')" "s1  r  v'" "s2  (Star r)  (Stars vs')"
  apply(cases) apply (auto simp add: append_eq_append_conv2)
    using Posix1(2) apply fastforce
    using Posix1(1) apply fastforce
    by (metis Posix1(1) Posix_From3.hyps(6) append.right_neutral append_Nil)
  moreover
  have IHs: "v2. s1  r  v2  v = v2"
            "v2. s2  Star r  v2  Stars vs = v2" by fact+
  ultimately show "Stars (v # vs) = v2" by auto  
next
  case (Posix_Rec s r v l v2)
  then show "Recv l v = v2" by (metis Posix_elims(10))
next 
  case (Posix_Cset c cs v2)
  have "[c]  Charset cs  v2" by fact
  then show "Atm c = v2" by cases auto
qed


lemma Posix_injval:
  assumes "s  (deriv c r)  v"
  shows "(c # s)  r  (injval r c v)"
using assms
proof(induct r arbitrary: s v rule: rexp.induct)
  case Zero
  have "s  deriv c Zero  v" by fact
  then have "s  Zero  v" by simp
  then have "False" by cases
  then show "(c # s)  Zero  (injval Zero c v)" by simp
next
  case One
  have "s  deriv c One  v" by fact
  then have "s  Zero  v" by simp
  then have "False" by cases
  then show "(c # s)  One  (injval One c v)" by simp
next 
  case (Atom d)
  consider (eq) "c = d" | (ineq) "c  d" by blast
  then show "(c # s)  (Atom d)  (injval (Atom d) c v)"
  proof (cases)
    case eq
    have "s  deriv c (Atom d)  v" by fact
    then have "s  One  v" using eq by simp
    then have eqs: "s = []  v = Void" by cases simp
    show "(c # s)  Atom d  injval (Atom d) c v" using eq eqs 
    by (auto intro: Posix.intros)
  next
    case ineq
    have "s  deriv c (Atom d)  v" by fact
    then have "s  Zero  v" using ineq by simp
    then have "False" by cases
    then show "(c # s)  Atom d  injval (Atom d) c v" by simp
  qed
next
  case (Plus r1 r2)
  have IH1: "s v. s  deriv c r1  v  (c # s)  r1  injval r1 c v" by fact
  have IH2: "s v. s  deriv c r2  v  (c # s)  r2  injval r2 c v" by fact
  have "s  deriv c (Plus r1 r2)  v" by fact
  then have "s  Plus (deriv c r1) (deriv c r2)  v" by simp
  then consider (left) v' where "v = Left v'" "s  deriv c r1  v'" 
              | (right) v' where "v = Right v'" "s  lang (deriv c r1)" "s  deriv c r2  v'" 
              by cases auto
  then show "(c # s)  Plus r1 r2  injval (Plus r1 r2) c v"
  proof (cases)
    case left
    have "s  deriv c r1  v'" by fact
    then have "(c # s)  r1  injval r1 c v'" using IH1 by simp
    then have "(c # s)  Plus r1 r2  injval (Plus r1 r2) c (Left v')" by (auto intro: Posix.intros)
    then show "(c # s)  Plus r1 r2  injval (Plus r1 r2) c v" using left by simp
  next 
    case right
    have "s  lang (deriv c r1)" by fact
    then have "c # s  lang r1" by (simp add: lang_deriv Deriv_def)
    moreover 
    have "s  deriv c r2  v'" by fact
    then have "(c # s)  r2  injval r2 c v'" using IH2 by simp
    ultimately have "(c # s)  Plus r1 r2  injval (Plus r1 r2) c (Right v')" 
      by (auto intro: Posix.intros)
    then show "(c # s)  Plus r1 r2  injval (Plus r1 r2) c v" using right by simp
  qed
next
  case (Times r1 r2)
  have IH1: "s v. s  deriv c r1  v  (c # s)  r1  injval r1 c v" by fact
  have IH2: "s v. s  deriv c r2  v  (c # s)  r2  injval r2 c v" by fact
  have "s  deriv c (Times r1 r2)  v" by fact
  then consider 
        (left_nullable) v1 v2 s1 s2 where 
        "v = Left (Seq v1 v2)"  "s = s1 @ s2" 
        "s1  deriv c r1  v1" "s2  r2  v2" "nullable r1" 
        "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang (deriv c r1)  s4  lang r2)"
      | (right_nullable) v1 s1 s2 where 
        "v = Right v1" "s = s1 @ s2"  
        "s  deriv c r2  v1" "nullable r1" "s1 @ s2  lang (Times (deriv c r1) r2)"
      | (not_nullable) v1 v2 s1 s2 where
        "v = Seq v1 v2" "s = s1 @ s2" 
        "s1  deriv c r1  v1" "s2  r2  v2" "¬nullable r1" 
        "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang (deriv c r1)  s4  lang r2)"
        by (force split: if_splits elim!: Posix_elims simp add: lang_deriv Deriv_def)   
  then show "(c # s)  Times r1 r2  injval (Times r1 r2) c v" 
    proof (cases)
      case left_nullable
      have "s1  deriv c r1  v1" by fact
      then have "(c # s1)  r1  injval r1 c v1" using IH1 by simp
      moreover
      have "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang (deriv c r1)  s4  lang r2)" by fact
      then have "¬ (s3 s4. s3  []  s3 @ s4 = s2  (c # s1) @ s3  lang r1  s4  lang r2)" 
         by (simp add: lang_deriv Deriv_def)
      ultimately have "((c # s1) @ s2)  Times r1 r2  Seq (injval r1 c v1) v2" using left_nullable by (rule_tac Posix.intros)
      then show "(c # s)  Times r1 r2  injval (Times r1 r2) c v" using left_nullable by simp
    next
      case right_nullable
      have "nullable r1" by fact
      then have "[]  r1  (mkeps r1)" by (rule Posix_mkeps)
      moreover
      have "s  deriv c r2  v1" by fact
      then have "(c # s)  r2  (injval r2 c v1)" using IH2 by simp
      moreover
      have "s1 @ s2  lang (Times (deriv c r1) r2)" by fact
      then have "¬ (s3 s4. s3  []  s3 @ s4 = c # s  [] @ s3  lang r1  s4  lang r2)" 
        using right_nullable 
        apply (auto simp add: lang_deriv Deriv_def append_eq_Cons_conv)
        by (metis concI mem_Collect_eq)
      ultimately have "([] @ (c # s))  Times r1 r2  Seq (mkeps r1) (injval r2 c v1)"
      by(rule Posix.intros)
      then show "(c # s)  Times r1 r2  injval (Times r1 r2) c v" using right_nullable by simp
    next
      case not_nullable
      have "s1  deriv c r1  v1" by fact
      then have "(c # s1)  r1  injval r1 c v1" using IH1 by simp
      moreover
      have "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang (deriv c r1)  s4  lang r2)" by fact
      then have "¬ (s3 s4. s3  []  s3 @ s4 = s2  (c # s1) @ s3  lang r1  s4  lang r2)" by (simp add: lang_deriv Deriv_def)
      ultimately have "((c # s1) @ s2)  Times r1 r2  Seq (injval r1 c v1) v2" using not_nullable 
        by (rule_tac Posix.intros) (simp_all) 
      then show "(c # s)  Times r1 r2  injval (Times r1 r2) c v" using not_nullable by simp
    qed
next
  case (Star r)
  have IH: "s v. s  deriv c r  v  (c # s)  r  injval r c v" by fact
  have "s  deriv c (Star r)  v" by fact
  then consider
      (cons) v1 vs s1 s2 where 
        "v = Seq v1 (Stars vs)" "s = s1 @ s2" 
        "s1  deriv c r  v1" "s2  (Star r)  (Stars vs)"
        "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang (deriv c r)  s4  lang (Star r))" 
        apply(auto elim!: Posix_elims(1-5) simp add: lang_deriv Deriv_def intro: Posix.intros)
        apply(rotate_tac 3)
        apply(erule_tac Posix_elims(6))
        apply (simp add: Posix.intros(6))
        using Posix.intros(7) by blast
    then show "(c # s)  Star r  injval (Star r) c v" 
    proof (cases)
      case cons
          have "s1  deriv c r  v1" by fact
          then have "(c # s1)  r  injval r c v1" using IH by simp
        moreover
          have "s2  Star r  Stars vs" by fact
        moreover 
          have "(c # s1)  r  injval r c v1" by fact 
          then have "flat (injval r c v1) = (c # s1)" by (rule Posix1)
          then have "flat (injval r c v1)  []" by simp
        moreover 
          have "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang (deriv c r)  s4  lang (Star r))" by fact
          then have "¬ (s3 s4. s3  []  s3 @ s4 = s2  (c # s1) @ s3  lang r  s4  lang (Star r))" 
            by (simp add: lang_deriv Deriv_def)
        ultimately 
        have "((c # s1) @ s2)  Star r  Stars (injval r c v1 # vs)" by (rule Posix.intros)
        then show "(c # s)  Star r  injval (Star r) c v" using cons by(simp)
      qed
next
  case (NTimes r n)
  have IH: "s v. s  deriv c r  v  (c # s)  r  injval r c v" by fact
  have "s  deriv c (NTimes r n)  v" by fact
  then consider
      (cons) v1 vs s1 s2 where 
        "v = Seq v1 (Stars vs)" "s = s1 @ s2" 
        "s1  deriv c r  v1" "s2  (NTimes r (n - 1))  (Stars vs)" "0 < n"
        "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang (deriv c r)  s4  lang (NTimes r (n - 1)))" 
    apply(auto elim: Posix_elims simp add: lang_derivs Deriv_def intro: Posix.intros split: if_splits)
    apply(erule Posix_elims)
    apply(simp)
    apply(subgoal_tac "vss. v2 = Stars vss")
    apply(clarify)
    apply(drule_tac x="vss" in meta_spec)
    apply(drule_tac x="s1" in meta_spec)
    apply(drule_tac x="s2" in meta_spec)
     apply(simp add: lang_derivs Deriv_def)
    apply(erule Posix_elims)
     apply(auto)
      done
    then show "(c # s)  (NTimes r n)  injval (NTimes r n) c v" 
    proof (cases)
      case cons
          have "s1  deriv c r  v1" by fact
          then have "(c # s1)  r  injval r c v1" using IH by simp
        moreover
          have "s2  (NTimes r (n - 1))  Stars vs" by fact
        moreover 
          have "(c # s1)  r  injval r c v1" by fact 
          then have "flat (injval r c v1) = (c # s1)" by (rule Posix1)
          then have "flat (injval r c v1)  []" by simp
        moreover 
          have "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang (deriv c r)  s4  lang (NTimes r (n - 1)))" by fact
          then have "¬ (s3 s4. s3  []  s3 @ s4 = s2  (c # s1) @ s3  lang r  s4  lang (NTimes r (n - 1)))"
            by (simp add: lang_deriv Deriv_def)
        ultimately 
        have "((c # s1) @ s2)  NTimes r n  Stars (injval r c v1 # vs)"
          by (metis One_nat_def Posix_NTimes1 Suc_pred add.commute cons(5) plus_1_eq_Suc)
        then show "(c # s)  NTimes r n  injval (NTimes r n) c v" using cons by(simp)
      qed  
next
  case (Upto r n)
  have IH: "s v. s  deriv c r  v  (c # s)  r  injval r c v" by fact
  have "s  deriv c (Upto r n)  v" by fact
  then consider
      (cons) v1 vs s1 s2 where 
        "v = Seq v1 (Stars vs)" "s = s1 @ s2" 
        "s1  deriv c r  v1" "s2  (Upto r (n - 1))  (Stars vs)"
        "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang (deriv c r)  s4  lang (Upto r (n - 1)))" 
    apply(auto elim!: Posix_elims simp add: lang_deriv Deriv_def intro: Posix.intros)    
    apply(case_tac n)
     apply(auto)
    using Posix_elims(1) apply blast
    apply(erule_tac Posix_elims)
    apply(auto)
    by (metis Posix1a Prf_elims(8) UN_E cons diff_Suc_1 lang.simps(8))
    then show "(c # s)  Upto r n  injval (Upto r n) c v" 
    proof (cases)
      case cons
          have "s1  deriv c r  v1" by fact
          then have "(c # s1)  r  injval r c v1" using IH by simp
        moreover
          have "s2  Upto r (n - 1)  Stars vs" by fact
        moreover 
          have "(c # s1)  r  injval r c v1" by fact 
          then have "flat (injval r c v1) = (c # s1)" by (rule Posix1)
          then have "flat (injval r c v1)  []" by simp
        moreover 
          have "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang (deriv c r)  s4  lang (Upto r (n - 1)))" by fact
          then have "¬ (s3 s4. s3  []  s3 @ s4 = s2  (c # s1) @ s3  lang r  s4  lang (Upto r (n - 1)))" 
            by (simp add: lang_deriv Deriv_def)
        ultimately 
        have "((c # s1) @ s2)  Upto r n  Stars (injval r c v1 # vs)"
          by (metis One_nat_def Posix_Upto1 Posix_elims(1) Suc_pred Upto.prems add.commute bot_nat_0.not_eq_extremum deriv.simps(8) plus_1_eq_Suc)
        then show "(c # s)  Upto r n  injval (Upto r n) c v" using cons by(simp)
      qed
next
  case (From r n)
  have IH: "s v. s  deriv c r  v  (c # s)  r  injval r c v" by fact
  have "s  deriv c (From r n)  v" by fact
  then consider
      (cons) v1 vs s1 s2 where 
        "v = Seq v1 (Stars vs)" "s = s1 @ s2" 
        "s1  deriv c r  v1" "s2  (From r (n - 1))  (Stars vs)" "0 < n"
        "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang (deriv c r)  s4  lang (From r (n - 1)))"
     | (null) v1 vs s1 s2 where 
        "v = Seq v1 (Stars vs)" "s = s1 @ s2"  "s2  (Star r)  (Stars vs)" 
        "s1  deriv c r  v1" "n = 0"
         "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang (deriv c r)  s4  lang (Star r))"  
    apply(auto elim: Posix_elims simp add: lang_deriv Deriv_def intro: Posix.intros split: if_splits)
    apply(erule Posix_elims)
    apply(auto)
    apply(auto elim: Posix_elims simp add: lang_deriv Deriv_def intro: Posix.intros split: if_splits)
    apply(metis Posix1a Prf_elims(6))     
    apply(erule Posix_elims)
    apply(auto)
    apply(erule Posix_elims(9))
    apply (metis (no_types, lifting) Nil_is_append_conv Posix_From2)
     apply (simp add: Posix_From1 that(1))
    by (simp add: Posix_From3 that(1))
    then show "(c # s)  (From r n)  injval (From r n) c v" 
    proof (cases)
      case cons
          have "s1  deriv c r  v1" by fact
          then have "(c # s1)  r  injval r c v1" using IH by simp
        moreover
          have "s2  (From r (n - 1))  Stars vs" by fact
        moreover 
          have "(c # s1)  r  injval r c v1" by fact 
          then have "flat (injval r c v1) = (c # s1)" by (rule Posix1)
          then have "flat (injval r c v1)  []" by simp
        moreover 
          have "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang (deriv c r)  s4  lang (From r (n - 1)))" by fact
          then have "¬ (s3 s4. s3  []  s3 @ s4 = s2  (c # s1) @ s3  lang r  s4  lang (From r (n - 1)))" 
            by (simp add: lang_deriv Deriv_def)
        ultimately 
        have "((c # s1) @ s2)  From r n  Stars (injval r c v1 # vs)"
          by (meson Posix_From1 cons(5))
        then show "(c # s)  From r n  injval (From r n) c v" using cons by(simp)
      next 
       case null
          have "s1  deriv c r  v1" by fact
          then have "(c # s1)  r  injval r c v1" using IH by simp
          moreover 
            have "s2  Star r  Stars vs" by fact
          moreover 
          have "(c # s1)  r  injval r c v1" by fact 
          then have "flat (injval r c v1) = (c # s1)" by (rule Posix1)
          then have "flat (injval r c v1)  []" by simp
          moreover
          have "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang (deriv c r)  s4  lang (Star r))" by fact
          then have "¬ (s3 s4. s3  []  s3 @ s4 = s2  (c # s1) @ s3  lang r  s4  lang (Star r))" 
            by (simp add: lang_deriv Deriv_def)
        ultimately 
        have "((c # s1) @ s2)  From r 0  Stars (injval r c v1 # vs)"
          by (metis Posix_From3) 
        then show "(c # s)  From r n  injval (From r n) c v" using null by (simp)
      qed  
next
  case (Rec l r)
  then show "(c # s)  Rec l r  injval (Rec l r) c v"
    by (simp add: Posix_Rec)
next 
  case (Charset cs)
  consider (eq) "c  cs" | (ineq) "c  cs" by blast
  then show "(c # s)  (Charset cs)  (injval (Charset cs) c v)"
  proof (cases)
    case eq
    have "s  deriv c (Charset cs)  v" by fact
    then have "s  One  v" using eq by simp
    then have eqs: "s = []  v = Void" by cases simp
    show "(c # s)  Charset cs  injval (Charset cs) c v" using eq eqs 
    by (auto intro: Posix.intros)
  next
    case ineq
    have "s  deriv c (Charset cs)  v" by fact
    then have "s  Zero  v" using ineq by simp
    then have "False" by cases
    then show "(c # s)  Charset cs  injval (Charset cs) c v" by simp
  qed
qed


section ‹The Lexer by Sulzmann and Lu›

fun 
  lexer :: "'a rexp  'a list  ('a val) option"
where
  "lexer r [] = (if nullable r then Some(mkeps r) else None)"
| "lexer r (c#s) = (case (lexer (deriv c r) s) of  
                    None  None
                  | Some(v)  Some(injval r c v))"


lemma lexer_correct_None:
  shows "s  lang r  lexer r s = None"
apply(induct s arbitrary: r)
apply(simp add: nullable_iff)
apply(drule_tac x="deriv a r" in meta_spec)
apply(auto simp add: lang_deriv Deriv_def)
done

lemma lexer_correct_Some:
  shows "s  lang r  (v. lexer r s = Some(v)  s  r  v)"
apply(induct s arbitrary: r)
apply(auto simp add: Posix_mkeps nullable_iff)[1]
apply(drule_tac x="deriv a r" in meta_spec)
apply(simp add: lang_deriv Deriv_def)
apply(rule iffI)
apply(auto intro: Posix_injval simp add: Posix1(1))
done 

lemma lexer_correctness:
  shows "(lexer r s = Some v)  s  r  v"
  and   "(lexer r s = None)  ¬(v. s  r  v)"
apply(auto)
using lexer_correct_None lexer_correct_Some apply fastforce
using Posix1(1) Posix_determ lexer_correct_Some apply blast
using Posix1(1) lexer_correct_None apply blast
using lexer_correct_None lexer_correct_Some by blast



end