Theory RegExpInterface
chapter‹The High-Level Interface to the Automata-Library›
theory RegExpInterface
imports "Functional-Automata.Execute"
keywords
"reflect_ML_exports" :: thy_decl
begin
text‹ The implementation of the monitoring concept follows the following design decisions:
▸ We re-use generated code from the AFP submissions @{theory "Regular-Sets.Regular_Set"} and
@{theory "Functional-Automata.Automata"}, converted by the code-generator into executable SML code
(ports to future Isabelle versions should just reuse future versions of these)
▸ Monitor-Expressions are regular expressions (in some adapted syntax)
over Document Class identifiers; they denote the language of all possible document object
instances belonging to these classes
▸ Instead of expanding the sub-class relation (and building the product automaton of all
monitor expressions), we convert the monitor expressions into automata over class-id's
executed in parallel, in order to avoid blowup.
▸ For efficiency reasons, the class-ids were internally abstracted to integers; the
encoding table is called environment ▩‹env›.
▸ For reusability reasons, we did NOT abstract the internal state representation in the
deterministic automata construction (lists of lists of bits - sic !) by replacing them
by unique keys via a suitable coding-table; rather, we opted for keeping the automatas small
(no products, no subclass-expansion).
›
section‹Monitor Syntax over RegExp - constructs›
notation Star (‹⦃(_)⦄⇧*› [0]100)
notation Plus (infixr ‹||› 55)
notation Times (infixr ‹~~› 60)
notation Atom (‹⌊_⌋› 65)
definition rep1 :: "'a rexp ⇒ 'a rexp" (‹⦃(_)⦄⇧+›)
where "⦃A⦄⇧+ ≡ A ~~ ⦃A⦄⇧*"
definition opt :: "'a rexp ⇒ 'a rexp" (‹⟦(_)⟧›)
where "⟦A⟧ ≡ A || One"
value "Star (Conc(Alt (Atom(CHR ''a'')) (Atom(CHR ''b''))) (Atom(CHR ''c'')))"
text‹or better equivalently:›
value "⦃(⌊CHR ''a''⌋ || ⌊CHR ''b''⌋) ~~ ⌊CHR ''c''⌋⦄⇧*"
section‹Some Standard and Derived Semantics›
text‹ This is just a reminder - already defined in @{theory "Regular-Sets.Regular_Exp"}
as @{term lang}.›
text‹In the following, we give a semantics for our regular expressions, which so far have
just been a term language (i.e. abstract syntax). The semantics is a ``denotational semantics'',
i.e. we give a direct meaning for regular expressions in some universe of ``denotations''.
This universe of denotations is in our concrete case:›
text‹Now the denotational semantics for regular expression can be defined on a post-card:›
fun Lang :: "'a rexp => 'a lang"
where L_Emp : "Lang Zero = {}"
|L_One: "Lang One = {[]}"
|L_Atom: "Lang (⌊a⌋) = {[a]}"
|L_Un: "Lang (el || er) = (Lang el) ∪ (Lang er)"
|L_Conc: "Lang (el ~~ er) = {xs@ys | xs ys. xs ∈ Lang el ∧ ys ∈ Lang er}"
|L_Star: "Lang (Star e) = Regular_Set.star(Lang e)"
text‹A more useful definition is the sub-language - definition›
fun L⇩s⇩u⇩b :: "'a::order rexp => 'a lang"
where L⇩s⇩u⇩b_Emp: "L⇩s⇩u⇩b Zero = {}"
|L⇩s⇩u⇩b_One: "L⇩s⇩u⇩b One = {[]}"
|L⇩s⇩u⇩b_Atom: "L⇩s⇩u⇩b (⌊a⌋) = {z . ∀x. x ≤ a ∧ z=[x]}"
|L⇩s⇩u⇩b_Un: "L⇩s⇩u⇩b (el || er) = (L⇩s⇩u⇩b el) ∪ (L⇩s⇩u⇩b er)"
|L⇩s⇩u⇩b_Conc: "L⇩s⇩u⇩b (el ~~ er) = {xs@ys | xs ys. xs ∈ L⇩s⇩u⇩b el ∧ ys ∈ L⇩s⇩u⇩b er}"
|L⇩s⇩u⇩b_Star: "L⇩s⇩u⇩b (Star e) = Regular_Set.star(L⇩s⇩u⇩b e)"
definition XX where "XX = (rexp2na example_expression)"
definition YY where "YY = na2da(rexp2na example_expression)"
value "NA.accepts (rexp2na example_expression) [0,1,1,0,0,1]"
value "DA.accepts (na2da (rexp2na example_expression)) [0,1,1,0,0,1]"
section‹HOL - Adaptions and Export to SML›
definition enabled :: "('a,'σ set)da ⇒ 'σ set ⇒ 'a list ⇒ 'a list"
where "enabled A σ = filter (λx. next A x σ ≠ {}) "
definition zero where "zero = (0::nat)"
definition one where "one = (1::nat)"
export_code zero one Suc Int.nat nat_of_integer int_of_integer
example_expression
Zero One Atom Plus Times Star
rexp2na na2da enabled
NA.accepts DA.accepts
in SML module_name RegExpChecker
subsection‹Infrastructure for Reflecting exported SML code›
ML‹
fun reflect_local_ML_exports args trans = let
fun eval_ML_context ctxt = let
fun is_sml_file f = String.isSuffix ".ML" (Path.implode (#path f))
val files = (map (Generated_Files.check_files_in (Context.proof_of ctxt)) args)
val ml_files = filter is_sml_file (map #1 (maps Generated_Files.get_files_in files))
val ml_content = map (fn f => Syntax.read_input (Bytes.content (#content f))) ml_files
fun eval ml_content = fold (fn sml => (ML_Context.exec
(fn () => ML_Context.eval_source ML_Compiler.flags sml)))
ml_content
in
(eval ml_content #> Local_Theory.propagate_ml_env) ctxt
end
in
Toplevel.generic_theory eval_ML_context trans
end
val files_in_theory =
(Parse.underscore >> K [] || Scan.repeat1 Parse.path_binding) --
Scan.option (\<^keyword>‹(› |-- Parse.!!! (\<^keyword>‹in›
|-- Parse.theory_name --| \<^keyword>‹)›));
val _ =
Outer_Syntax.command \<^command_keyword>‹reflect_ML_exports›
"evaluate generated Standard ML files"
(Parse.and_list1 files_in_theory >> (fn args => reflect_local_ML_exports args));
›
reflect_ML_exports _
section‹The Abstract Interface For Monitor Expressions›
text‹Here comes the hic : The reflection of the HOL-Automata module into an SML module
with an abstract interface hiding some generation artefacts like the internal states
of the deterministic automata ...›
ML‹
structure RegExpInterface : sig
type automaton
type env
type cid
val alphabet : term list -> env
val ext_alphabet: env -> term list -> env
val conv : theory -> term -> env -> int RegExpChecker.rexp
val rexp_term2da: theory -> env -> term -> automaton
val enabled : automaton -> env -> cid list
val next : automaton -> env -> cid -> automaton
val final : automaton -> bool
val accepts : automaton -> env -> cid list -> bool
end
=
struct
local open RegExpChecker in
type state = bool list RegExpChecker.set
type env = string list
type cid = string
type automaton = state * ((Int.int -> state -> state) * (state -> bool))
val add_atom = fold_aterms (fn Const (c as (_, \<^Type>‹rexp _›)) => insert (op=) c | _=> I);
fun alphabet termS = rev(map fst (fold add_atom termS []));
fun ext_alphabet env termS =
let val res = rev(map fst (fold add_atom termS [])) @ env;
val _ = if has_duplicates (op=) res
then error("reject and accept alphabets must be disjoint!")
else ()
in res end;
fun conv _ \<^Const_>‹Regular_Exp.rexp.Zero _› _ = Zero
|conv _ \<^Const_>‹Regular_Exp.rexp.One _› _ = Onea
|conv thy \<^Const_>‹Regular_Exp.rexp.Times _ for X Y› env = Times(conv thy X env, conv thy Y env)
|conv thy \<^Const_>‹Regular_Exp.rexp.Plus _ for X Y› env = Plus(conv thy X env, conv thy Y env)
|conv thy \<^Const_>‹Regular_Exp.rexp.Star _ for X› env = Star(conv thy X env)
|conv thy \<^Const_>‹RegExpInterface.opt _ for X› env = Plus(conv thy X env, Onea)
|conv thy \<^Const_>‹RegExpInterface.rep1 _ for X› env = Times(conv thy X env, Star(conv thy X env))
|conv _ (Const (s, \<^Type>‹rexp _›)) env =
let val n = find_index (fn x => x = s) env
val _ = if n<0 then error"conversion error of regexp." else ()
in Atom(n) end
|conv thy S _ = error("conversion error of regexp:" ^ (Syntax.string_of_term_global thy S))
val eq_int = {equal = curry(op =) : Int.int -> Int.int -> bool};
val eq_bool_list = {equal = curry(op =) : bool list -> bool list -> bool};
fun rexp_term2da thy env term = let val rexp = conv thy term env;
val nda = RegExpChecker.rexp2na eq_int rexp;
val da = RegExpChecker.na2da eq_bool_list nda;
in da end;
fun enabled (da as (state,(_,_))) env =
let val inds = RegExpChecker.enabled da state (0 upto (length env - 1))
in map (fn i => nth env i) inds end
fun next (current_state, (step,fin)) env a =
let val index = find_index (fn x => x = a) env
in if index < 0 then error"undefined id for monitor"
else (step index current_state,(step,fin))
end
fun final (current_state, (_,fin)) = fin current_state
fun accepts da env word = let fun index a = find_index (fn x => x = a) env
val indexL = map index word
val _ = if forall (fn x => x >= 0) indexL then ()
else error"undefined id for monitor"
in RegExpChecker.accepts da indexL end
end;
end
›
lemma regexp_sub : "a ≤ b ⟹ L⇩s⇩u⇩b (⌊a⌋) ⊆ L⇩s⇩u⇩b (⌊b⌋)"
using dual_order.trans by auto
lemma regexp_seq_mono:
"Lang(a) ⊆ Lang (a') ⟹ Lang(b) ⊆ Lang (b') ⟹ Lang(a ~~ b) ⊆ Lang(a' ~~ b')" by auto
lemma regexp_seq_mono':
"L⇩s⇩u⇩b(a) ⊆ L⇩s⇩u⇩b (a') ⟹ L⇩s⇩u⇩b(b) ⊆ L⇩s⇩u⇩b (b') ⟹ L⇩s⇩u⇩b(a ~~ b) ⊆ L⇩s⇩u⇩b(a' ~~ b')" by auto
lemma regexp_alt_mono :"Lang(a) ⊆ Lang (a') ⟹ Lang(a || b) ⊆ Lang(a' || b)" by auto
lemma regexp_alt_mono' :"L⇩s⇩u⇩b(a) ⊆ L⇩s⇩u⇩b (a') ⟹ L⇩s⇩u⇩b(a || b) ⊆ L⇩s⇩u⇩b(a' || b)" by auto
lemma regexp_alt_commute : "Lang(a || b) = Lang(b || a)" by auto
lemma regexp_alt_commute' : "L⇩s⇩u⇩b(a || b) = L⇩s⇩u⇩b(b || a)" by auto
lemma regexp_unit_right : "Lang (a) = Lang (a ~~ One) " by simp
lemma regexp_unit_right' : "L⇩s⇩u⇩b (a) = L⇩s⇩u⇩b (a ~~ One) " by simp
lemma regexp_unit_left : "Lang (a) = Lang (One ~~ a) " by simp
lemma regexp_unit_left' : "L⇩s⇩u⇩b (a) = L⇩s⇩u⇩b (One ~~ a) " by simp
lemma opt_star_incl :"Lang (opt a) ⊆ Lang (Star a)" by (simp add: opt_def subset_iff)
lemma opt_star_incl':"L⇩s⇩u⇩b (opt a) ⊆ L⇩s⇩u⇩b (Star a)" by (simp add: opt_def subset_iff)
lemma rep1_star_incl:"Lang (rep1 a) ⊆ Lang (Star a)"
unfolding rep1_def by(subst L_Star, subst L_Conc)(force)
lemma rep1_star_incl':"L⇩s⇩u⇩b (rep1 a) ⊆ L⇩s⇩u⇩b (Star a)"
unfolding rep1_def by(subst L⇩s⇩u⇩b_Star, subst L⇩s⇩u⇩b_Conc)(force)
lemma cancel_rep1 : "Lang (a) ⊆ Lang (rep1 a)"
unfolding rep1_def by auto
lemma cancel_rep1' : "L⇩s⇩u⇩b (a) ⊆ L⇩s⇩u⇩b (rep1 a)"
unfolding rep1_def by auto
lemma seq_cancel_opt : "Lang (a) ⊆ Lang (c) ⟹ Lang (a) ⊆ Lang (opt b ~~ c)"
by(subst regexp_unit_left, rule regexp_seq_mono)(simp_all add: opt_def)
lemma seq_cancel_opt' : "L⇩s⇩u⇩b (a) ⊆ L⇩s⇩u⇩b (c) ⟹ L⇩s⇩u⇩b (a) ⊆ L⇩s⇩u⇩b (opt b ~~ c)"
by(subst regexp_unit_left', rule regexp_seq_mono')(simp_all add: opt_def)
lemma seq_cancel_Star : "Lang (a) ⊆ Lang (c) ⟹ Lang (a) ⊆ Lang (Star b ~~ c)"
by auto
lemma seq_cancel_Star' : "L⇩s⇩u⇩b (a) ⊆ L⇩s⇩u⇩b (c) ⟹ L⇩s⇩u⇩b (a) ⊆ L⇩s⇩u⇩b (Star b ~~ c)"
by auto
lemma mono_Star : "Lang (a) ⊆ Lang (b) ⟹ Lang (Star a) ⊆ Lang (Star b)"
by(auto)(metis in_star_iff_concat order.trans)
lemma mono_Star' : "L⇩s⇩u⇩b (a) ⊆ L⇩s⇩u⇩b (b) ⟹ L⇩s⇩u⇩b (Star a) ⊆ L⇩s⇩u⇩b (Star b)"
by(auto)(metis in_star_iff_concat order.trans)
lemma mono_rep1_star:"Lang (a) ⊆ Lang (b) ⟹ Lang (rep1 a) ⊆ Lang (Star b)"
using mono_Star rep1_star_incl by blast
lemma mono_rep1_star':"L⇩s⇩u⇩b (a) ⊆ L⇩s⇩u⇩b (b) ⟹ L⇩s⇩u⇩b (rep1 a) ⊆ L⇩s⇩u⇩b (Star b)"
using mono_Star' rep1_star_incl' by blast
no_notation Star (‹⦃(_)⦄⇧*› [0]100)
no_notation Plus (infixr ‹||› 55)
no_notation Times (infixr ‹~~› 60)
no_notation Atom (‹⌊_⌋› 65)
no_notation rep1 (‹⦃(_)⦄⇧+›)
no_notation opt (‹⟦(_)⟧›)
ML‹
structure RegExpInterface_Notations =
struct
val Star = (\<^term>‹Regular_Exp.Star›, Mixfix (Syntax.read_input "⦃(_)⦄⇧*", [0], 100, Position.no_range))
val Plus = (\<^term>‹Regular_Exp.Plus›, Infixr (Syntax.read_input "||", 55, Position.no_range))
val Times = (\<^term>‹Regular_Exp.Times›, Infixr (Syntax.read_input "~~", 60, Position.no_range))
val Atom = (\<^term>‹Regular_Exp.Atom›, Mixfix (Syntax.read_input "⌊_⌋", [], 65, Position.no_range))
val opt = (\<^term>‹RegExpInterface.opt›, Mixfix (Syntax.read_input "⟦(_)⟧", [], 1000, Position.no_range))
val rep1 = (\<^term>‹RegExpInterface.rep1›, Mixfix (Syntax.read_input "⦃(_)⦄⇧+", [], 1000, Position.no_range))
val notations = [Star, Plus, Times, Atom, rep1, opt]
end
›
end