Theory Simplifying3

(*  Title:       POSIX Lexing with Derivatives of Extended Regular Expressions
    Author:      Christian Urban <christian.urban at kcl.ac.uk>, 2022
    Maintainer:  Christian Urban <christian.urban at kcl.ac.uk>
*) 

theory Simplifying3
  imports Lexer3 
begin

section ‹Lexer including simplifications›


fun F_RIGHT where
  "F_RIGHT f v = Right (f v)"

fun F_LEFT where
  "F_LEFT f v = Left (f v)"

fun F_Plus where
  "F_Plus f1 f2 (Right v) = Right (f2 v)"
| "F_Plus f1 f2 (Left v) = Left (f1 v)"  
| "F_Plus f1 f2 v = v"


fun F_Times1 where
  "F_Times1 f1 f2 v = Seq (f1 Void) (f2 v)"

fun F_Times2 where 
  "F_Times2 f1 f2 v = Seq (f1 v) (f2 Void)"

fun F_Times where 
  "F_Times f1 f2 (Seq v1 v2) = Seq (f1 v1) (f2 v2)"
| "F_Times f1 f2 v = v"

fun simp_Plus where
  "simp_Plus (Zero, f1) (r2, f2) = (r2, F_RIGHT f2)"
| "simp_Plus (r1, f1) (Zero, f2) = (r1, F_LEFT f1)"
| "simp_Plus (r1, f1) (r2, f2) = 
  (if r1 = r2 then (r1, F_LEFT f1) else (Plus r1 r2, F_Plus f1 f2))"

fun simp_Times where
  "simp_Times (Zero, f1) (r2, f2) = (Zero, undefined)"
| "simp_Times (r1, f1) (Zero, f2) = (Zero, undefined)"
| "simp_Times (One, f1) (r2, f2) = (r2, F_Times1 f1 f2)"
| "simp_Times (r1, f1) (One, f2) = (r1, F_Times2 f1 f2)"
| "simp_Times (r1, f1) (r2, f2) = (Times r1 r2, F_Times f1 f2)"  

lemma simp_Times_simps[simp]:
  "simp_Times p1 p2 = (if (fst p1 = Zero) then (Zero, undefined)
                    else (if (fst p2 = Zero) then (Zero, undefined)
                    else (if (fst p1 = One) then (fst p2, F_Times1 (snd p1) (snd p2))
                    else (if (fst p2 = One) then (fst p1, F_Times2 (snd p1) (snd p2))
                    else (Times (fst p1) (fst p2), F_Times (snd p1) (snd p2))))))"
 by (induct p1 p2 rule: simp_Times.induct)(auto)

lemma simp_Plus_simps[simp]:
  "simp_Plus p1 p2 = (if (fst p1 = Zero) then (fst p2, F_RIGHT (snd p2))
                    else (if (fst p2 = Zero) then (fst p1, F_LEFT (snd p1))
                    else (if (fst p1 = fst p2) then (fst p1, F_LEFT (snd p1))
                    else (Plus (fst p1) (fst p2), F_Plus (snd p1) (snd p2)))))"
by (induct p1 p2 rule: simp_Plus.induct) (auto)

fun 
  simp :: "'a rexp  'a rexp * ('a val  'a val)"
where
  "simp (Plus r1 r2) = simp_Plus (simp r1) (simp r2)" 
| "simp (Times r1 r2) = simp_Times (simp r1) (simp r2)" 
| "simp r = (r, id)"

fun 
  slexer :: "'a rexp  'a list  ('a val) option"
where
  "slexer r [] = (if nullable r then Some(mkeps r) else None)"
| "slexer r (c#s) = (let (rs, fr) = simp (deriv c r) in
                         (case (slexer rs s) of  
                            None  None
                          | Some(v)  Some(injval r c (fr v))))"

lemma slexer_better_simp:
  "slexer r (c#s) = (case (slexer (fst (simp (deriv c r))) s) of  
                            None  None
                          | Some(v)  Some(injval r c ((snd (simp (deriv c r))) v)))"
by (auto split: prod.split option.split)


lemma L_fst_simp:
  shows "lang r = lang (fst (simp r))"
by (induct r) (auto)

lemma Posix_simp:
  assumes "s  (fst (simp r))  v" 
  shows "s  r  ((snd (simp r)) v)"
using assms
proof(induct r arbitrary: s v rule: rexp.induct)
  case (Plus r1 r2 s v)
  have IH1: "s v. s  fst (simp r1)  v  s  r1  snd (simp r1) v" by fact
  have IH2: "s v. s  fst (simp r2)  v  s  r2  snd (simp r2) v" by fact
  have as: "s  fst (simp (Plus r1 r2))  v" by fact
  consider (Zero_Zero) "fst (simp r1) = Zero" "fst (simp r2) = Zero"
         | (Zero_NZero) "fst (simp r1) = Zero" "fst (simp r2)  Zero"
         | (NZero_Zero) "fst (simp r1)  Zero" "fst (simp r2) = Zero"
         | (NZero_NZero1) "fst (simp r1)  Zero" "fst (simp r2)  Zero" "fst (simp r1) = fst (simp r2)"
         | (NZero_NZero2) "fst (simp r1)  Zero" "fst (simp r2)  Zero" "fst (simp r1)  fst (simp r2)" by auto
  then show "s  Plus r1 r2  snd (simp (Plus r1 r2)) v" 
    proof(cases)
      case (Zero_Zero)
      with as have "s  Zero  v" by simp 
      then show "s  Plus r1 r2  snd (simp (Plus r1 r2)) v" by (rule Posix_elims(1))
    next
      case (Zero_NZero)
      with as have "s  fst (simp r2)  v" by simp
      with IH2 have "s  r2  snd (simp r2) v" by simp
      moreover
      from Zero_NZero have "fst (simp r1) = Zero" by simp
      then have "lang (fst (simp r1)) = {}" by simp
      then have "lang r1 = {}" using L_fst_simp by auto
      then have "s  lang r1" by simp 
      ultimately have "s  Plus r1 r2  Right (snd (simp r2) v)" by (rule Posix_Plus2)
      then show "s  Plus r1 r2  snd (simp (Plus r1 r2)) v"
      using Zero_NZero by simp
    next
      case (NZero_Zero)
      with as have "s  fst (simp r1)  v" by simp
      with IH1 have "s  r1  snd (simp r1) v" by simp
      then have "s  Plus r1 r2  Left (snd (simp r1) v)" by (rule Posix_Plus1) 
      then show "s  Plus r1 r2  snd (simp (Plus r1 r2)) v" using NZero_Zero by simp
    next
      case (NZero_NZero1)
      with as have a: "s  fst (simp r1)  v" by simp
        then show "s  Plus r1 r2  snd (simp (Plus r1 r2)) v" 
        using IH1 NZero_NZero1 Posix_Plus1 a by fastforce
    next
      case (NZero_NZero2)
      with as have "s  Plus (fst (simp r1)) (fst (simp r2))  v" by simp
      then consider (Left) v1 where "v = Left v1" "s  (fst (simp r1))  v1"
                  | (Right) v2 where "v = Right v2" "s  (fst (simp r2))  v2" "s  lang (fst (simp r1))"
                  by (erule_tac Posix_elims(4)) 
      then show "s  Plus r1 r2  snd (simp (Plus r1 r2)) v"
      proof(cases)
        case (Left)
        then have "v = Left v1" "s  r1  (snd (simp r1) v1)" using IH1 by simp_all
        then show "s  Plus r1 r2  snd (simp (Plus r1 r2)) v" using NZero_NZero2
          by (simp_all add: Posix_Plus1)
      next 
        case (Right)
        then have "v = Right v2" "s  r2  (snd (simp r2) v2)" "s  lang r1" using IH2 L_fst_simp by auto
        then show "s  Plus r1 r2  snd (simp (Plus r1 r2)) v" using NZero_NZero2
          by (simp_all add: Posix_Plus2)
      qed
    qed
next
  case (Times r1 r2 s v)
  have IH1: "s v. s  fst (simp r1)  v  s  r1  snd (simp r1) v" by fact
  have IH2: "s v. s  fst (simp r2)  v  s  r2  snd (simp r2) v" by fact
  have as: "s  fst (simp (Times r1 r2))  v" by fact
  consider (Zero)  "fst (simp r1) = Zero  fst (simp r2) = Zero"
         | (One_One) "fst (simp r1) = One" "fst (simp r2) = One"
         | (One_NOne) "fst (simp r1) = One" "fst (simp r2)  One" "fst (simp r2)  Zero"
         | (NOne_One) "fst (simp r1)  One" "fst (simp r2) = One" "fst (simp r1)  Zero"
         | (NOne_NOne) "fst (simp r1)  One" "fst (simp r2)  One" 
                       "fst (simp r1)  Zero" "fst (simp r2)  Zero" by auto
  then show "s  Times r1 r2  snd (simp (Times r1 r2)) v" 
  proof(cases)
      case (Zero)
        with as have "False" 
      by (metis Posix_elims(1) fst_conv simp.simps(2) simp_Times_simps) 
      then show "s  Times r1 r2  snd (simp (Times r1 r2)) v" by simp     
  next
      case (One_One)
      with as have b: "s  One  v" by simp 
      from b have "s  r1  snd (simp r1) v" using IH1 One_One by simp
      moreover
      from b have c: "s = []" "v = Void" using Posix_elims(2) by auto
      moreover
      have "[]  One  Void" by (simp add: Posix_One)
      then have "[]  fst (simp r2)  Void" using One_One by simp
      then have "[]  r2  snd (simp r2) Void" using IH2 by simp
      ultimately have "([] @ [])  Times r1 r2  Seq (snd (simp r1) Void) (snd (simp r2) Void)"
        using Posix_Times by blast 
      then show "s  Times r1 r2  snd (simp (Times r1 r2)) v" using c One_One by simp
    next
      case (One_NOne)
      with as have b: "s  fst (simp r2)  v" by simp 
      from b have "s  r2  snd (simp r2) v" using IH2 One_NOne by simp
      moreover
      have "[]  One  Void" by (simp add: Posix_One)
      then have "[]  fst (simp r1)  Void" using One_NOne by simp
      then have "[]  r1  snd (simp r1) Void" using IH1 by simp
      moreover
      from One_NOne(1) have "lang (fst (simp r1)) = {[]}" by simp
      then have "lang r1 = {[]}" by (simp add: L_fst_simp[symmetric])
      ultimately have "([] @ s)  Times r1 r2  Seq (snd (simp r1) Void) (snd (simp r2) v)"
        by(rule_tac Posix_Times) auto
      then show "s  Times r1 r2  snd (simp (Times r1 r2)) v" using One_NOne by simp
    next
      case (NOne_One)
        with as have "s  fst (simp r1)  v" by simp
        with IH1 have "s  r1  snd (simp r1) v" by simp
      moreover
        have "[]  One  Void" by (simp add: Posix_One)
        then have "[]  fst (simp r2)  Void" using NOne_One by simp
        then have "[]  r2  snd (simp r2) Void" using IH2 by simp
      ultimately have "(s @ [])  Times r1 r2  Seq (snd (simp r1) v) (snd (simp r2) Void)"
        by(rule_tac Posix_Times) auto
      then show "s  Times r1 r2  snd (simp (Times r1 r2)) v" using NOne_One by simp
    next
      case (NOne_NOne)
      with as have "s  Times (fst (simp r1)) (fst (simp r2))  v" by simp
      then obtain s1 s2 v1 v2 where eqs: "s = s1 @ s2" "v = Seq v1 v2"
                     "s1  (fst (simp r1))  v1" "s2  (fst (simp r2))  v2"
                     "¬ (s3 s4. s3  []  s3 @ s4 = s2  s1 @ s3  lang r1  s4  lang r2)"
                     by (erule_tac Posix_elims(5)) (auto simp add: L_fst_simp[symmetric]) 
      then have "s1  r1  (snd (simp r1) v1)" "s2  r2  (snd (simp r2) v2)"
        using IH1 IH2 by auto             
      then show "s  Times r1 r2  snd (simp (Times r1 r2)) v" using eqs NOne_NOne
        by(auto intro: Posix_Times)
    qed
qed (simp_all)


lemma slexer_correctness:
  shows "slexer r s = lexer r s"
proof(induct s arbitrary: r)
  case Nil
  show "slexer r [] = lexer r []" by simp
next 
  case (Cons c s r)
  have IH: "r. slexer r s = lexer r s" by fact
  show "slexer r (c # s) = lexer r (c # s)" 
   proof (cases "s  lang (deriv c r)")
     case True
       assume a1: "s  lang (deriv c r)"
       then obtain v1 where a2: "lexer (deriv c r) s = Some v1" "s  deriv c r  v1"
         using lexer_correct_Some by auto
       from a1 have "s  lang (fst (simp (deriv c r)))" using L_fst_simp[symmetric] by auto
       then obtain v2 where a3: "lexer (fst (simp (deriv c r))) s = Some v2" "s  (fst (simp (deriv c r)))  v2"
          using lexer_correct_Some by auto
       then have a4: "slexer (fst (simp (deriv c r))) s = Some v2" using IH by simp
       from a3(2) have "s  deriv c r  (snd (simp (deriv c r))) v2" using Posix_simp by auto
       with a2(2) have "v1 = (snd (simp (deriv c r))) v2" using Posix_determ by auto
       with a2(1) a4 show "slexer r (c # s) = lexer r (c # s)" by (auto split: prod.split)
     next 
     case False
       assume b1: "s  lang (deriv c r)"
       then have "lexer (deriv c r) s = None" using lexer_correct_None by auto
       moreover
       from b1 have "s  lang (fst (simp (deriv c r)))" using L_fst_simp[symmetric] by auto
       then have "lexer (fst (simp (deriv c r))) s = None" using lexer_correct_None by auto
       then have "slexer (fst (simp (deriv c r))) s = None" using IH by simp
       ultimately show "slexer r (c # s) = lexer r (c # s)" 
         by (simp del: slexer.simps add: slexer_better_simp)
   qed
qed  

end