Theory Certification_Monads.Parser_Monad
section ‹Monadic Parser Combinators›
theory Parser_Monad
imports
Error_Monad
Show.Show
begin
abbreviation (input) "tab ≡ CHR 0x09"
abbreviation (input) "carriage_return ≡ CHR 0x0D"
abbreviation (input) "wspace ≡ [CHR '' '', CHR ''⏎'', tab, carriage_return]"
definition trim :: "string ⇒ string"
where "trim = dropWhile (λc. c ∈ set wspace)"
lemma trim:
"∃w. s = w @ trim s"
by (unfold trim_def) (metis takeWhile_dropWhile_id)
text ‹
A parser takes a list of tokes and returns either an error message or
a result together with the remaining tokens.
›
type_synonym
('t, 'a) gen_parser = "'t list ⇒ string + ('a × 't list)"
type_synonym
'a parser = "(char, 'a) gen_parser"
subsection ‹Monad-Setup for Parsers›
definition return :: "'a ⇒ ('t, 'a) gen_parser"
where
"return x = (λts. Error_Monad.return (x, ts))"
definition error :: "string ⇒ ('t, 'a) gen_parser"
where
"error e = (λ_. Error_Monad.error e)"
definition bind :: "('t, 'a) gen_parser ⇒ ('a ⇒ ('t, 'b) gen_parser) ⇒ ('t, 'b) gen_parser"
where
"bind m f ts = do {
(x, ts') ← m ts;
f x ts'
}"
adhoc_overloading
Monad_Syntax.bind bind
lemma bind_cong [fundef_cong]:
fixes m1 :: "('t, 'a) gen_parser"
assumes "m1 ts2 = m2 ts2"
and "⋀ y ts. m2 ts2 = Inr (y, ts) ⟹ f1 y ts = f2 y ts"
and "ts1 = ts2"
shows "((m1 ⤜ f1) ts1) = ((m2 ⤜ f2) ts2)"
using assms unfolding bind_def by (cases "m1 ts1") auto
definition update_tokens :: "('t list ⇒ 't list) ⇒ ('t, 't list) gen_parser"
where
"update_tokens f ts = Error_Monad.return (ts, f ts)"
definition get_tokens :: "('t, 't list) gen_parser"
where
"get_tokens = update_tokens (λx. x)"
definition set_tokens :: "'t list ⇒ ('t, unit) gen_parser"
where
[code_unfold]: "set_tokens ts = update_tokens (λ_. ts) ⪢ return ()"
definition err_expecting :: "string ⇒ ('t::show, 'a) gen_parser"
where
"err_expecting msg ts = Error_Monad.error
(''expecting '' @ msg @ '', but found: '' @ shows_quote (shows (take 30 ts)) [])"
fun eoi :: "('t :: show, unit) gen_parser"
where
"eoi [] = Error_Monad.return ((), [])" |
"eoi ts = err_expecting ''end of input'' ts"
fun exactly_aux :: "string ⇒ string ⇒ string ⇒ string parser"
where
"exactly_aux s i (x # xs) (y # ys) =
(if x = y then exactly_aux s i xs ys
else err_expecting (''\"'' @ s @ ''\"'') i)" |
"exactly_aux s i [] xs = Error_Monad.return (s, trim xs)" |
"exactly_aux s i (x # xs) [] = err_expecting (''\"'' @ s @ ''\"'') i"
fun oneof_aux :: "string list ⇒ string list ⇒ string parser"
where
"oneof_aux allowed (x # xs) ts =
(if map snd (zip x ts) = x then Error_Monad.return (x, trim (List.drop (length x) ts))
else oneof_aux allowed xs ts)" |
"oneof_aux allowed [] ts = err_expecting (''one of '' @ shows_list allowed []) ts"
definition is_parser :: "'a parser ⇒ bool" where
"is_parser p ⟷ (∀s r x. p s = Inr (x, r) ⟶ length s ≥ length r)"
lemma is_parserI [intro]:
assumes "⋀s r x. p s = Inr (x, r) ⟹ length s ≥ length r"
shows "is_parser p"
using assms unfolding is_parser_def by blast
lemma is_parserE [elim]:
assumes "is_parser p"
and "(⋀s r x. p s = Inr (x, r) ⟹ length s ≥ length r) ⟹ P"
shows "P"
using assms by (auto simp: is_parser_def)
lemma is_parser_length:
assumes "is_parser p" and "p s = Inr (x, r)"
shows "length s ≥ length r"
using assms by blast
text ‹A \emph{consuming parser} (cparser for short) consumes at least one token of input.›
definition is_cparser :: "'a parser ⇒ bool"
where
"is_cparser p ⟷ (∀s r x. p s = Inr (x, r) ⟶ length s > length r)"
lemma is_cparserI [intro]:
assumes "⋀s r x. p s = Inr (x, r) ⟹ length s > length r"
shows "is_cparser p"
using assms unfolding is_cparser_def by blast
lemma is_cparserE [elim]:
assumes "is_cparser p"
and "(⋀s r x. p s = Inr (x, r) ⟹ length s > length r) ⟹ P"
shows "P"
using assms by (auto simp: is_cparser_def)
lemma is_cparser_length:
assumes "is_cparser p" and "p s = Inr (x, r)"
shows "length s > length r"
using assms by blast
lemma is_parser_bind [intro, simp]:
assumes p: "is_parser p" and q: "⋀ x. is_parser (q x)"
shows "is_parser (p ⤜ q)"
proof
fix s r x
assume "(p ⤜ q) s = Inr (x, r)"
then obtain y t
where P: "p s = Inr (y, t)" and Q: "q y t = Inr (x, r)"
unfolding bind_def by (cases "p s") auto
have "length r ≤ length t" using q and Q by (auto simp: is_parser_def)
also have "… ≤ length s" using p and P by (auto simp: is_parser_def)
finally show "length r ≤ length s" .
qed
definition oneof :: "string list ⇒ string parser"
where
"oneof xs = oneof_aux xs xs"
lemma oneof_result:
assumes "oneof xs s = Inr (y, r)"
shows "∃ w. s = y @ w @ r ∧ y ∈ set xs"
proof -
{
fix ys
assume "oneof_aux ys xs s = Inr (y,r)"
hence "∃ w. s = y @ w @ r ∧ y ∈ set xs"
proof (induct xs)
case Nil thus ?case by (simp add: err_expecting_def)
next
case (Cons z zs)
thus ?case
proof (cases "map snd (zip z s) = z")
case False with Cons show ?thesis by simp
next
case True
hence s: "s = z @ drop (length z) s"
proof (induct z arbitrary: s)
case (Cons a zz)
thus ?case
by (cases s, auto)
qed simp
from trim[of "drop (length z) s"] obtain w where "drop (length z) s = w @ trim (drop (length z) s)" by blast
with s have s: "s = z @ w @ trim (drop (length z) s)" by auto
from True Cons(2) have yz: "y = z" and r: "r = trim (drop (length y) s)" by auto
show ?thesis
by (simp add: yz r, rule exI, rule s)
qed
qed
}
from this [OF assms [unfolded oneof_def]]
show ?thesis .
qed
definition exactly :: "string ⇒ string parser"
where
"exactly s x = exactly_aux s x s x"
lemma exactly_result:
assumes "exactly x s = Inr (y, r)"
shows "∃ w. s = x @ w @ r ∧ y = x"
proof -
{
fix a b
assume "exactly_aux a b x s = Inr (y,r)"
hence "∃ w. s = x @ w @ r ∧ y = a"
proof (induct x arbitrary: s)
case Nil
thus ?case using trim[of s] by auto
next
case (Cons c xs) note xs = this
show ?case
proof (cases s)
case Nil
with xs show ?thesis by (auto simp: err_expecting_def)
next
case (Cons d ss)
note xs = xs[unfolded Cons]
from xs(2) have "exactly_aux a b xs ss = Inr (y, r) ∧ c = d"
by (cases "c = d") (auto simp: err_expecting_def)
hence res: "exactly_aux a b xs ss = Inr (y, r)" and c: "c = d" by auto
from xs(1)[OF res]
show ?thesis unfolding Cons c by auto
qed
qed
}
from this[OF assms [unfolded exactly_def]] show ?thesis .
qed
hide_const oneof_aux exactly_aux
lemma oneof_length:
assumes "oneof xs s = Inr (y, r)"
shows "length s ≥ length y + length r ∧ y ∈ set xs"
proof -
from oneof_result [OF assms]
obtain w where "s = y @ w @ r ∧ y ∈ set xs" ..
thus ?thesis by auto
qed
lemma is_parser_oneof [intro]:
"is_parser (oneof ts)"
proof
fix s r x
assume "oneof ts s = Inr (x ,r)"
from oneof_length [OF this] show " length s ≥ length r" by auto
qed
lemma is_cparser_oneof [intro, simp]:
assumes "∀x∈set ts. length x ≥ 1"
shows "is_cparser (oneof ts)"
proof
fix s r x
assume "oneof ts s = Inr (x ,r)"
from oneof_length [OF this] assms
show " length s > length r" by auto
qed
lemma exactly_length:
assumes "exactly x s = Inr (y, r)"
shows "length s ≥ length x + length r"
proof -
from exactly_result [OF assms]
obtain w where "s = x @ w @ r" by auto
thus ?thesis by auto
qed
lemma is_parser_exactly [intro]:
"is_parser (exactly xs)"
proof
fix s r x
assume "exactly xs s = Inr (x ,r)"
from exactly_length [OF this]
show "length s ≥ length r" by auto
qed
lemma is_cparser_exactly [intro]:
assumes "length xs ≥ 1"
shows "is_cparser (exactly xs)"
proof
fix s r x
assume "exactly xs s = Inr (x, r)"
from exactly_length [OF this]
show "length s > length r" using assms by auto
qed
fun many :: "(char ⇒ bool) ⇒ (char list) parser"
where
"many P (t # ts) =
(if P t then do {
(rs, ts') ← many P ts;
Error_Monad.return (t # rs, ts')
} else Error_Monad.return ([], t # ts))" |
"many P [] = Error_Monad.return ([], [])"
lemma is_parser_many [intro]:
"is_parser (many P)"
proof
fix s r x
assume "many P s = Inr (x, r)"
thus "length r ≤ length s"
proof (induct s arbitrary: x r)
case (Cons t ts)
thus ?case by (cases "P t", cases "many P ts") force+
qed simp
qed
definition manyof :: "char list ⇒ (char list) parser"
where
[code_unfold]: "manyof cs = many (λc. c ∈ set cs)"
lemma is_parser_manyof [intro]:
"is_parser (manyof cs)"
unfolding manyof_def by blast
definition spaces :: "unit parser"
where
[code_unfold]: "spaces = manyof wspace ⪢ return ()"
lemma is_parser_return [intro]:
"is_parser (return x)"
by (auto simp: is_parser_def return_def)
lemma is_parser_error [intro]:
"is_parser (error x)"
by (auto simp: is_parser_def error_def)
lemma is_parser_If [intro!]:
assumes "is_parser p" and "is_parser q"
shows "is_parser (if b then p else q)"
using assms by (cases b) auto
lemma is_parser_Let [intro!]:
assumes "is_parser (f y)"
shows "is_parser (let x = y in f x)"
using assms by auto
lemma is_parser_spaces [intro]:
"is_parser spaces"
unfolding spaces_def by blast
fun scan_upto :: "string ⇒ string parser"
where
"scan_upto end (t # ts) =
(if map snd (zip end (t # ts)) = end then do {
Error_Monad.return (end, List.drop (length end) (t # ts))
} else do {
(res, ts') ← scan_upto end ts;
Error_Monad.return (t # res, ts')
})" |
"scan_upto end [] = Error_Monad.error (''did not find end-marker '' @ shows_quote (shows end) [])"
lemma scan_upto_length:
assumes "scan_upto end s = Inr (y, r)"
shows "length s ≥ length end + length r"
using assms
proof (induct s arbitrary: y r)
case (Cons t ts)
show ?case
proof (cases "map snd (zip end (t # ts)) = end")
case True
then obtain tss where tss: "tss = t # ts" and map: "map snd (zip end tss) = end" by auto
from map have len: "length tss ≥ length end"
proof (induct "end" arbitrary: tss)
case (Cons e en)
thus ?case by (cases tss, auto)
qed simp
from True tss Cons(2) have y: "y = end" and r: "r = List.drop (length end) tss" by auto
show ?thesis by (simp only: tss[symmetric], simp add: y r, auto simp: len)
next
case False
with Cons obtain res ts'
where "scan_upto end ts = Inr (res,ts')" by (cases "scan_upto end ts") (auto)
from Cons(1)[OF this] Cons(2) False this show ?thesis by auto
qed
qed simp
lemma is_parser_scan_upto [intro]:
"is_parser (scan_upto end)"
unfolding is_parser_def using scan_upto_length [of "end"] by force
lemma is_cparser_scan_upto [intro]:
"is_cparser (scan_upto (e # end))"
unfolding is_cparser_def using scan_upto_length [of "e # end"] by force
end