Theory RegExpInterface

(*************************************************************************
 * Copyright (C) 
 *               2019      The University of Exeter 
 *               2018-2019 The University of Paris-Saclay
 *               2018      The University of Sheffield
 *
 * License:
 *   This program can be redistributed and/or modified under the terms
 *   of the 2-clause BSD-style license.
 *
 *   SPDX-License-Identifier: BSD-2-Clause
 *************************************************************************)

chapter‹The High-Level Interface to the Automata-Library›

theory RegExpInterface
  imports "Functional-Automata.Execute"
  keywords
  "reflect_ML_exports" :: thy_decl

begin


text‹ The implementation of the monitoring concept follows the following design decisions:
 We re-use generated code from the AFP submissions @{theory "Regular-Sets.Regular_Set"} and 
  @{theory "Functional-Automata.Automata"}, converted by the code-generator into executable SML code
  (ports to future Isabelle versions should just reuse future versions of these)
 Monitor-Expressions are regular expressions (in some adapted syntax) 
  over Document Class identifiers; they denote the language of all possible document object
  instances belonging to these classes
 Instead of expanding the sub-class relation (and building the product automaton of all 
  monitor expressions), we convert the monitor expressions into automata over class-id's
  executed in parallel, in order to avoid blowup.
 For efficiency reasons, the class-ids were internally abstracted to integers; the
  encoding table is called environment ‹env›.
 For reusability reasons, we did NOT abstract the internal state representation in the
  deterministic automata construction (lists of lists of bits - sic !) by replacing them
  by unique keys via a suitable coding-table; rather, we opted for keeping the automatas small
  (no products, no subclass-expansion).
›

section‹Monitor Syntax over RegExp - constructs›

notation Star  ((_)* [0]100)
notation Plus  (infixr || 55)
notation Times (infixr ~~ 60)
notation Atom  (_ 65)

definition rep1 :: "'a rexp  'a rexp" ((_)+)
  where "A+   A ~~ A*"
    
definition opt :: "'a rexp  'a rexp" ((_))
   where "A   A || One"

value "Star (Conc(Alt (Atom(CHR ''a'')) (Atom(CHR ''b''))) (Atom(CHR ''c'')))"
text‹or better equivalently:›
value "(CHR ''a'' || CHR ''b'') ~~ CHR ''c''*"

section‹Some Standard and Derived Semantics›
text‹ This is just a reminder - already defined in @{theory "Regular-Sets.Regular_Exp"} 
as @{term lang}.›

text‹In the following, we give a semantics for our regular expressions, which so far have
just been a term language (i.e. abstract syntax). The semantics is a ``denotational semantics'',
i.e. we give a direct meaning for regular expressions in some universe of ``denotations''. 

This universe of denotations is in our concrete case:›

text‹Now the denotational semantics for regular expression can be defined on a post-card:›

fun       Lang :: "'a rexp => 'a lang"
  where   L_Emp :   "Lang Zero        = {}"
         |L_One:    "Lang One         = {[]}"
         |L_Atom:   "Lang (a)       = {[a]}"
         |L_Un:     "Lang (el || er)  = (Lang el)  (Lang er)"
         |L_Conc:   "Lang (el ~~ er)  = {xs@ys | xs ys. xs  Lang el  ys  Lang er}"
         |L_Star:   "Lang (Star e)    = Regular_Set.star(Lang e)"


text‹A more useful definition is the sub-language - definition›
fun       Lsub :: "'a::order rexp => 'a lang"
  where   Lsub_Emp:    "Lsub Zero        = {}"
         |Lsub_One:    "Lsub One         = {[]}"
         |Lsub_Atom:   "Lsub (a)       = {z . x. x  a  z=[x]}"
         |Lsub_Un:     "Lsub (el || er)  = (Lsub el)  (Lsub er)"
         |Lsub_Conc:   "Lsub (el ~~ er)  = {xs@ys | xs ys. xs  Lsub el  ys  Lsub er}"
         |Lsub_Star:   "Lsub (Star e)    = Regular_Set.star(Lsub e)"


definition XX where "XX = (rexp2na example_expression)"
definition YY where "YY = na2da(rexp2na example_expression)"
(* reminder from execute *)
value "NA.accepts (rexp2na example_expression) [0,1,1,0,0,1]"
value "DA.accepts (na2da (rexp2na example_expression)) [0,1,1,0,0,1]"

section‹HOL - Adaptions and Export to SML›

definition enabled :: "('a, set)da    set  'a list   'a list" 
  where   "enabled A σ = filter (λx. next A x σ  {}) "


definition zero where "zero = (0::nat)"
definition one where "one = (1::nat)"

export_code   zero one Suc Int.nat  nat_of_integer int_of_integer  (* for debugging *)
             example_expression                                   (* for debugging *)

             Zero One     Atom     Plus  Times  Star              (* regexp abstract syntax *)

             rexp2na      na2da    enabled                        (* low-level automata interface *)
             NA.accepts   DA.accepts  
             in SML   module_name RegExpChecker 

subsection‹Infrastructure for Reflecting exported SML code›
MLfun reflect_local_ML_exports args trans =  let
    fun eval_ML_context ctxt = let 
      fun is_sml_file f = String.isSuffix ".ML" (Path.implode (#path f))
      val files = (map (Generated_Files.check_files_in (Context.proof_of ctxt)) args) 
      val ml_files = filter is_sml_file (map #1 (maps Generated_Files.get_files_in files))
      val ml_content = map (fn f => Syntax.read_input (Bytes.content (#content f))) ml_files
      fun eval ml_content   = fold (fn sml => (ML_Context.exec 
                                           (fn () => ML_Context.eval_source ML_Compiler.flags sml))) 
                                   ml_content 
    in 
      (eval ml_content #> Local_Theory.propagate_ml_env) ctxt
    end
  in
    Toplevel.generic_theory eval_ML_context trans
  end


  val files_in_theory =
    (Parse.underscore >> K [] || Scan.repeat1 Parse.path_binding) --
      Scan.option (keyword( |-- Parse.!!! (keywordin 
                     |-- Parse.theory_name --| keyword)));

  val _ =
    Outer_Syntax.command command_keywordreflect_ML_exports 
      "evaluate generated Standard ML files"
      (Parse.and_list1 files_in_theory  >> (fn args => reflect_local_ML_exports args));



reflect_ML_exports _



section‹The Abstract Interface For Monitor Expressions›
text‹Here comes the hic : The reflection of the HOL-Automata module into an SML module 
with an abstract interface hiding some generation artefacts like the internal states 
of the deterministic automata ...›


MLstructure RegExpInterface : sig
    type automaton
    type env 
    type cid
    val  alphabet    : term list -> env
    val  ext_alphabet: env -> term list -> env
    val  conv        : theory -> term -> env -> int RegExpChecker.rexp (* for debugging *)
    val  rexp_term2da: theory -> env -> term -> automaton
    val  enabled     : automaton -> env -> cid list  
    val  next        : automaton -> env -> cid -> automaton
    val  final       : automaton -> bool
    val  accepts     : automaton -> env -> cid list -> bool
  end
 =
struct
local open RegExpChecker in

  type state = bool list RegExpChecker.set
  type env = string list
  type cid = string

  type automaton = state * ((Int.int -> state -> state) * (state -> bool))

  val add_atom = fold_aterms (fn Const (c as (_, Typerexp _)) => insert (op=) c | _=> I);
  fun alphabet termS  =  rev(map fst (fold add_atom termS []));
  fun ext_alphabet env termS  =  
         let val res = rev(map fst (fold add_atom termS [])) @ env;
             val _ = if has_duplicates  (op=) res
                     then error("reject and accept alphabets must be disjoint!")
                     else ()
         in res end;

  fun conv _ Const_Regular_Exp.rexp.Zero _ _ = Zero
     |conv _ Const_Regular_Exp.rexp.One _ _ = Onea 
     |conv thy Const_Regular_Exp.rexp.Times _ for X Y env = Times(conv thy X env, conv thy Y env)
     |conv thy Const_Regular_Exp.rexp.Plus _ for X Y env = Plus(conv thy X env, conv thy Y env)
     |conv thy Const_Regular_Exp.rexp.Star _ for X env = Star(conv thy X env)
     |conv thy Const_RegExpInterface.opt _ for X env = Plus(conv thy X env, Onea)
     |conv thy Const_RegExpInterface.rep1 _ for X env = Times(conv thy X env, Star(conv thy X env))
     |conv _ (Const (s, Typerexp _)) env =
               let val n = find_index (fn x => x = s) env 
                   val _ = if n<0 then error"conversion error of regexp."  else ()
               in  Atom(n) end
     |conv thy S _ = error("conversion error of regexp:" ^ (Syntax.string_of_term_global thy S))

   val eq_int = {equal = curry(op =) : Int.int -> Int.int -> bool};
   val eq_bool_list = {equal = curry(op =) : bool list  -> bool list  -> bool};

   fun rexp_term2da thy env term = let val rexp = conv thy term env;
                                   val nda = RegExpChecker.rexp2na eq_int rexp;
                                   val da = RegExpChecker.na2da eq_bool_list nda;
                               in  da end;


   (* here comes the main interface of the module: 
      - "enabled" gives the part of the alphabet "env" for which the automatan does not
        go into a final state
      - next provides an automata transformation that produces an automaton that
        recognizes the rest of a word after a *)
   fun enabled (da as (state,(_,_))) env  = 
                              let val inds = RegExpChecker.enabled da state (0 upto (length env - 1))
                              in  map (fn i => nth env i) inds end

   fun next  (current_state, (step,fin)) env a =
                              let val index = find_index (fn x => x = a) env   
                              in  if index < 0 then error"undefined id for monitor"
                                  else (step index current_state,(step,fin))
                              end

   fun final (current_state, (_,fin)) = fin current_state

   fun accepts da env word = let fun index a = find_index (fn x => x = a) env   
                                 val indexL = map index word
                                 val _ = if forall (fn x => x >= 0) indexL then ()
                                         else error"undefined id for monitor"
                             in  RegExpChecker.accepts da indexL end

end; (* local *)
end  (* struct *)

lemma regexp_sub : "a  b  Lsub (a)  Lsub (b)"
  using dual_order.trans by auto

lemma regexp_seq_mono:
      "Lang(a)  Lang (a')  Lang(b)  Lang (b')  Lang(a ~~ b)  Lang(a' ~~ b')"  by auto

lemma regexp_seq_mono':
      "Lsub(a)  Lsub (a')  Lsub(b)  Lsub (b')  Lsub(a ~~ b)  Lsub(a' ~~ b')"  by auto

lemma regexp_alt_mono :"Lang(a)  Lang (a')  Lang(a || b)  Lang(a' || b)"  by auto

lemma regexp_alt_mono' :"Lsub(a)  Lsub (a')  Lsub(a || b)  Lsub(a' || b)"  by auto

lemma regexp_alt_commute : "Lang(a || b) = Lang(b || a)"  by auto

lemma regexp_alt_commute' : "Lsub(a || b) = Lsub(b || a)"  by auto

lemma regexp_unit_right : "Lang (a) = Lang (a ~~ One) " by simp 

lemma regexp_unit_right' : "Lsub (a) = Lsub (a ~~ One) " by simp 

lemma regexp_unit_left  : "Lang (a) = Lang (One ~~ a) " by simp 

lemma regexp_unit_left'  : "Lsub (a) = Lsub (One ~~ a) " by simp

lemma opt_star_incl :"Lang (opt a)  Lang (Star a)"  by (simp add: opt_def subset_iff)

lemma opt_star_incl':"Lsub (opt a)  Lsub (Star a)"  by (simp add: opt_def subset_iff)

lemma rep1_star_incl:"Lang (rep1 a)  Lang (Star a)"
  unfolding rep1_def by(subst L_Star, subst L_Conc)(force)

lemma rep1_star_incl':"Lsub (rep1 a)  Lsub (Star a)"
  unfolding rep1_def by(subst Lsub_Star, subst Lsub_Conc)(force)

lemma cancel_rep1 : "Lang (a)  Lang (rep1 a)"
  unfolding rep1_def by auto

lemma cancel_rep1' : "Lsub (a)  Lsub (rep1 a)"
  unfolding rep1_def by auto

lemma seq_cancel_opt : "Lang (a)  Lang (c)  Lang (a)  Lang (opt b ~~ c)"
  by(subst regexp_unit_left, rule regexp_seq_mono)(simp_all add: opt_def)

lemma seq_cancel_opt' : "Lsub (a)  Lsub (c)  Lsub (a)  Lsub (opt b ~~ c)"
  by(subst regexp_unit_left', rule regexp_seq_mono')(simp_all add: opt_def)

lemma seq_cancel_Star : "Lang (a)  Lang (c)  Lang (a)  Lang (Star b ~~ c)" 
  by auto

lemma seq_cancel_Star' : "Lsub (a)  Lsub (c)  Lsub (a)  Lsub (Star b ~~ c)" 
  by auto

lemma mono_Star : "Lang (a)  Lang (b)  Lang (Star a)  Lang (Star b)" 
  by(auto)(metis in_star_iff_concat order.trans)

lemma mono_Star' : "Lsub (a)  Lsub (b)  Lsub (Star a)  Lsub (Star b)" 
  by(auto)(metis in_star_iff_concat order.trans)

lemma mono_rep1_star:"Lang (a)  Lang (b)  Lang (rep1 a)  Lang (Star b)"
  using mono_Star rep1_star_incl by blast

lemma mono_rep1_star':"Lsub (a)  Lsub (b)  Lsub (rep1 a)  Lsub (Star b)"
  using mono_Star' rep1_star_incl' by blast


no_notation Star  ((_)* [0]100)
no_notation Plus  (infixr || 55)
no_notation Times (infixr ~~ 60)
no_notation Atom  (_ 65)
no_notation rep1 ((_)+)
no_notation opt  ((_))

MLstructure RegExpInterface_Notations =
struct
val Star = (termRegular_Exp.Star, Mixfix (Syntax.read_input "⦃(_)⦄*", [0], 100, Position.no_range))
val Plus = (termRegular_Exp.Plus, Infixr (Syntax.read_input "||", 55, Position.no_range))
val Times = (termRegular_Exp.Times, Infixr (Syntax.read_input "~~", 60, Position.no_range))
val Atom = (termRegular_Exp.Atom, Mixfix (Syntax.read_input "⌊_⌋", [], 65, Position.no_range))
val opt = (termRegExpInterface.opt, Mixfix (Syntax.read_input "⟦(_)⟧", [], 1000, Position.no_range))
val rep1 = (termRegExpInterface.rep1, Mixfix (Syntax.read_input "⦃(_)⦄+", [], 1000, Position.no_range))
val notations = [Star, Plus, Times, Atom, rep1, opt]
end

end