(* Title: Xml Author: Christian Sternagel Author: René Thiemann *) section ‹Parsing and Printing XML Documents› theory Xml imports Certification_Monads.Parser_Monad "HOL-Library.Char_ord" "HOL-Library.Code_Abstract_Char" begin datatype xml = ― ‹node-name, attributes, child-nodes› XML string "(string × string) list" "xml list" | XML_text string datatype xmldoc = ― ‹header, body› XMLDOC "string list" (root_node: xml) fun tag :: "xml ⇒ string" where "tag (XML name _ _ ) = name" | "tag (XML_text _) = []" hide_const (open) tag fun children :: "xml ⇒ xml list" where "children (XML _ _ cs) = cs" | "children (XML_text _) = []" hide_const (open) children fun num_children :: "xml ⇒ nat" where "num_children (XML _ _ cs) = length cs" | "num_children (XML_text _) = 0" hide_const (open) num_children subsection ‹Printing of XML Nodes and Documents› instantiation xml :: "show" begin definition shows_attr :: "string × string ⇒ shows" where "shows_attr av = shows (fst av) o shows_string (''=\"'' @ snd av @ ''\"'')" definition shows_attrs :: "(string × string) list ⇒ shows" where "shows_attrs as = foldr (λa. '' '' +#+ shows_attr a) as" fun shows_XML_indent :: "string ⇒ nat ⇒ xml ⇒ shows" where "shows_XML_indent ind i (XML n a c) = (''⏎'' +#+ ind +#+ ''<'' +#+ shows n +@+ shows_attrs a +@+ (if c = [] then shows_string ''/>'' else ( ''>'' +#+ foldr (shows_XML_indent (replicate i (CHR '' '') @ ind) i) c +@+ ''⏎'' +#+ ind +#+ ''</'' +#+ shows n +@+ shows_string ''>'')))" | "shows_XML_indent ind i (XML_text t) = shows_string t" definition "shows_prec (d::nat) xml = shows_XML_indent '''' 2 xml" definition "shows_list (xs :: xml list) = showsp_list shows_prec 0 xs" lemma shows_attr_append: "(s +#+ shows_attr av) (r @ t) = (s +#+ shows_attr av) r @ t" unfolding shows_attr_def by (cases av) (auto simp: show_law_simps) lemma shows_attrs_append [show_law_simps]: "shows_attrs as (r @ s) = shows_attrs as r @ s" using shows_attr_append by (induct as) (simp_all add: shows_attrs_def) lemma append_xml': "shows_XML_indent ind i xml (r @ s) = shows_XML_indent ind i xml r @ s" by (induct xml arbitrary: ind r s) (auto simp: show_law_simps) lemma shows_prec_xml_append [show_law_simps]: "shows_prec d (xml::xml) (r @ s) = shows_prec d xml r @ s" unfolding shows_prec_xml_def by (rule append_xml') instance by standard (simp_all add: show_law_simps shows_list_xml_def) end instantiation xmldoc :: "show" begin fun shows_xmldoc where "shows_xmldoc (XMLDOC h x) = shows_lines h o shows_nl o shows x" definition "shows_prec (d::nat) doc = shows_xmldoc doc" definition "shows_list (xs :: xmldoc list) = showsp_list shows_prec 0 xs" lemma shows_prec_xmldoc_append [show_law_simps]: "shows_prec d (x::xmldoc) (r @ s) = shows_prec d x r @ s" by (cases x) (auto simp: shows_prec_xmldoc_def show_law_simps) instance by standard (simp_all add: show_law_simps shows_list_xmldoc_def) end subsection ‹XML-Parsing› definition parse_text :: "string option parser" where "parse_text = do { ts ← many ((≠) CHR ''<''); let text = trim ts; if text = [] then return None else return (Some (List.rev (trim (List.rev text)))) }" lemma is_parser_parse_text [intro]: "is_parser parse_text" by (auto simp: parse_text_def) lemma parse_text_consumes: assumes *: "ts ≠ []" "hd ts ≠ CHR ''<''" and parse: "parse_text ts = Inr (t, ts')" shows "length ts' < length ts" proof - from * obtain a tss where ts: "ts = a # tss" and not: "a ≠ CHR ''<''" by (cases ts, auto) note parse = parse [unfolded parse_text_def Let_def ts] from parse obtain x1 x2 where many: "many ((≠) CHR ''<'') tss = Inr (x1, x2)" using not by (cases "many ((≠) CHR ''<'') tss", auto simp: bind_def) from is_parser_many many have len: "length x2 ≤ length tss" by blast from parse many have "length ts' ≤ length x2" using not by (simp add: bind_def return_def split: if_splits) with len show ?thesis unfolding ts by auto qed definition parse_attribute_value :: "string parser" where "parse_attribute_value = do { exactly [CHR ''\"'']; v ← many ((≠) CHR ''\"''); exactly [CHR ''\"'']; return v }" lemma is_parser_parse_attribute_value [intro]: "is_parser parse_attribute_value" by (auto simp: parse_attribute_value_def) text ‹A list of characters that are considered to be "letters" for tag-names.› definition letters :: "char list" where "letters = ''abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789&;:-''" definition is_letter :: "char ⇒ bool" where "is_letter c ⟷ c ∈ set letters" lemma is_letter_pre_code: "is_letter c ⟷ CHR ''a'' ≤ c ∧ c ≤ CHR ''z'' ∨ CHR ''A'' ≤ c ∧ c ≤ CHR ''Z'' ∨ CHR ''0'' ≤ c ∧ c ≤ CHR ''9'' ∨ c ∈ set ''_&;:-''" by (cases c) (simp add: less_eq_char_def is_letter_def letters_def) definition many_letters :: "string parser" where [simp]: "many_letters = manyof letters" lemma many_letters [code, code_unfold]: "many_letters = many is_letter" by (simp add: is_letter_def [abs_def] manyof_def) definition parse_name :: "string parser" where "parse_name s = (do { n ← many_letters; spaces; if n = [] then error (''expected letter '' @ letters @ '' but first symbol is \"'' @ take 1 s @ ''\"'') else return n }) s" lemma is_parser_parse_name [intro]: "is_parser parse_name" proof fix s r x assume res: "parse_name s = Inr (x, r)" let ?exp = "do { n ← many_letters; spaces; if n = [] then error (''expected letter '' @ letters @ '' but first symbol is \"'' @ take 1 s @ ''\"'') else return n }" have isp: "is_parser ?exp" by auto have id: "parse_name s = ?exp s" by (simp add: parse_name_def) from isp [unfolded is_parser_def, rule_format, OF res [unfolded id]] show "length r ≤ length s" . qed function (sequential) parse_attributes :: "(string × string) list parser" where "parse_attributes [] = Error_Monad.return ([], [])" | "parse_attributes (c # s) = (if c ∈ set ''/>'' then Error_Monad.return ([], c # s) else (do { k ← parse_name; exactly ''=''; v ← parse_attribute_value; atts ← parse_attributes; return ((k, v) # atts) }) (c # s))" by pat_completeness auto termination parse_attributes proof show "wf (measure length)" by simp next fix c s y ts ya tsa yb tsb assume pn: "parse_name (c # s) = Inr (y, ts)" and oo: "exactly ''='' ts = Inr (ya, tsa)" and pav: "parse_attribute_value tsa = Inr (yb, tsb)" have cp: "is_cparser (exactly ''='')" by auto from cp [unfolded is_cparser_def] oo have 1: "length ts > length tsa" by auto from is_parser_parse_name [unfolded is_parser_def] pn have 2: "length (c # s) ≥ length ts" by force from is_parser_parse_attribute_value [unfolded is_parser_def] pav have 3: "length tsa ≥ length tsb" by force from 1 2 3 show "(tsb, c # s) ∈ measure length" by auto qed lemma is_parser_parse_attributes [intro]: "is_parser parse_attributes" proof fix s r x assume "parse_attributes s = Inr (x, r)" then show "length r ≤ length s" proof (induct arbitrary: x rule: parse_attributes.induct) case (2 c s) show ?case proof (cases "c ∈ set ''/>''") case True with 2(2) show ?thesis by simp next case False from False 2(2) obtain y1 s1 where pn: "parse_name (c # s) = Inr (y1, s1)" by (cases "parse_name (c # s)") (auto simp: bind_def) from False 2(2) pn obtain y2 s2 where oo: "exactly ''='' s1 = Inr (y2, s2)" by (cases "exactly ''='' s1") (auto simp: bind_def) from False 2(2) pn oo obtain y3 s3 where pav: "parse_attribute_value s2 = Inr (y3, s3)" by (cases "parse_attribute_value s2") (auto simp: bind_def) from False 2(2) pn oo pav obtain y4 where patts: "parse_attributes s3 = Inr (y4, r)" by (cases "parse_attributes s3") (auto simp: return_def bind_def) have "length r ≤ length s3" using 2(1)[OF False pn oo pav patts] . also have "… ≤ length s2" using is_parser_parse_attribute_value [unfolded is_parser_def] pav by auto also have "… ≤ length s1" using is_parser_exactly [unfolded is_parser_def] oo by auto also have "… ≤ length (c # s)" using is_parser_parse_name [unfolded is_parser_def] pn by force finally show "length r ≤ length (c # s)" by auto qed qed simp qed context notes [[function_internals]] begin function parse_nodes :: "xml list parser" where "parse_nodes ts = (if ts = [] ∨ take 2 ts = ''</'' then return [] ts else if hd ts ≠ CHR ''<'' then (do { t ← parse_text; ns ← parse_nodes; return (XML_text (the t) # ns) }) ts else (do { exactly ''<''; n ← parse_name; atts ← parse_attributes; e ← oneof [''/>'', ''>'']; (λ ts'. if e = ''/>'' then (do { cs ← parse_nodes; return (XML n atts [] # cs) }) ts' else (do { cs ← parse_nodes; exactly ''</''; exactly n; exactly ''>''; ns ← parse_nodes; return (XML n atts cs # ns) }) ts') }) ts)" by pat_completeness auto end lemma parse_nodes_help: "parse_nodes_dom s ∧ (∀ x r. parse_nodes s = Inr (x, r) ⟶ length r ≤ length s)" (is "?prop s") proof (induct rule: wf_induct [where P = ?prop and r = "measure length"]) fix s assume "∀ t. (t, s) ∈ measure length ⟶ ?prop t" then have ind1: "⋀ t. length t < length s ⟹ parse_nodes_dom t" and ind2: "⋀ t x r. length t < length s ⟹ parse_nodes t = Inr (x,r) ⟹ length r ≤ length t" by auto let ?check = "λ s. s = [] ∨ take 2 s = ''</''" let ?check2 = "hd s ≠ CHR ''<''" have dom: "parse_nodes_dom s" proof fix y assume "parse_nodes_rel y s" then show "parse_nodes_dom y" proof fix ts ya tsa assume *: "y = tsa" "s = ts" "¬ (ts = [] ∨ take 2 ts = ''</'')" "hd ts ≠ CHR ''<''" and parse: "parse_text ts = Inr (ya, tsa)" from parse_text_consumes[OF _ _ parse] *(3-4) have "length tsa < length ts" by auto with * have len: "length s > length y" by simp from ind1[OF this] show "parse_nodes_dom y" . next fix ts ya tsa yaa tsb yb tsc yc tsd assume "y = tsd" and "s = ts" and "¬ ?check ts" and "exactly ''<'' ts = Inr (ya, tsa)" and "parse_name tsa = Inr (yaa, tsb)" and "parse_attributes tsb = Inr (yb, tsc)" and "oneof [''/>'', ''>''] tsc = Inr (yc, tsd)" and "yc = ''/>''" then have len: "length s > length y" using is_cparser_exactly [of "''<''"] and is_parser_oneof [of "[''/>'', ''>'']"] and is_parser_parse_attributes and is_parser_parse_name by (auto dest!: is_parser_length is_cparser_length) with ind1[OF len] show "parse_nodes_dom y" by simp next fix ts ya tsa yaa tsb yb tsc yc tsd assume "y = tsd" and "s = ts" and "¬ ?check ts" and "exactly ''<'' ts = Inr (ya, tsa)" and "parse_name tsa = Inr (yaa, tsb)" and "parse_attributes tsb = Inr (yb, tsc)" and "oneof [''/>'', ''>''] tsc = Inr (yc, tsd)" then have len: "length s > length y" using is_cparser_exactly [of "''<''", simplified] and is_parser_oneof [of "[''/>'', ''>'']"] and is_parser_parse_attributes and is_parser_parse_name by (auto dest!: is_parser_length is_cparser_length) with ind1[OF len] show "parse_nodes_dom y" by simp next fix ts ya tsa yaa tsb yb tsc yc tse ye tsf yf tsg yg tsh yh tsi yi tsj assume y: "y = tsj" and "s = ts" and "¬ ?check ts" and "exactly ''<'' ts = Inr (ya, tsa)" and "parse_name tsa = Inr (yaa, tsb)" and "parse_attributes tsb = Inr (yb, tsc)" and "oneof [''/>'', ''>''] tsc = Inr (yc, tse)" and rec: "parse_nodes_sumC tse = Inr (ye, tsf)" and last: "exactly ''</'' tsf = Inr (yf, tsg)" "exactly yaa tsg = Inr (yg, tsh)" "exactly ''>'' tsh = Inr (yi, tsj)" then have len: "length s > length tse" using is_cparser_exactly [of "''<''", simplified] and is_parser_oneof [of "[''/>'', ''>'']"] and is_parser_parse_attributes and is_parser_parse_name by (auto dest!: is_parser_length is_cparser_length) from last(1) last(2) have len2a: "length tsf ≥ length tsh" using is_parser_exactly [of "''</''"] and is_parser_exactly [of yaa] and is_parser_parse_name by (auto dest!: is_parser_length) have len2c: "length tsh ≥ length y" using last(3) using is_parser_exactly [of "''>''"] by (auto simp: y dest!: is_parser_length) from len2a len2c have len2: "length tsf ≥ length y" by simp from ind2[OF len rec[unfolded parse_nodes_def[symmetric]]] len len2 have "length s > length y" by simp from ind1[OF this] show "parse_nodes_dom y" . qed qed note psimps = parse_nodes.psimps[OF dom] show "?prop s" proof (intro conjI, rule dom, intro allI impI) fix x r assume res: "parse_nodes s = Inr (x,r)" note res = res[unfolded psimps] then show "length r ≤ length s" proof (cases "?check s") case True then show ?thesis using res by (simp add: return_def) next case False note oFalse = this show ?thesis proof (cases ?check2) case True note res = res[simplified False True, simplified] from res obtain y1 s1 where pt: "parse_text s = Inr (y1, s1)" by (cases "parse_text s", auto simp: bind_def) note res = res[unfolded bind_def pt, simplified] from res obtain y2 s2 where pn: "parse_nodes s1 = Inr (y2, s2)" by (cases "parse_nodes s1") (auto simp: bind_def) note res = res[simplified bind_def pn, simplified] from res have r: "r = s2" by (simp add: return_def bind_def) from parse_text_consumes[OF _ True pt] False have lens: "length s1 < length s" by auto from ind2[OF lens pn] have "length s2 ≤ length s1" . then show ?thesis using lens unfolding r by auto next case False note ooFalse = this note res = res[simplified oFalse ooFalse, simplified] from res obtain y1 s1 where oo: "exactly ''<'' s = Inr (y1, s1)" by (cases "exactly ''<'' s", auto simp: bind_def) note res = res[unfolded bind_def oo, simplified] from res obtain y2 s2 where pn: "parse_name s1 = Inr (y2, s2)" by (cases "parse_name s1") (auto simp: bind_def psimps) note res = res[simplified bind_def pn, simplified] from res obtain y3 s3 where pa: "parse_attributes s2 = Inr (y3, s3)" by (cases "parse_attributes s2") (auto simp: return_def bind_def) note res = res[simplified pa, simplified] from res obtain y4 s4 where oo2: "oneof [''/>'', ''>''] s3 = Inr (y4, s4)" by (cases "oneof [''/>'', ''>''] s3") (auto simp: return_def bind_def) note res = res[unfolded oo2, simplified] from is_parser_parse_attributes and is_parser_oneof [of "[''/>'', ''>'']"] and is_cparser_exactly [of "''<''", simplified] and is_parser_parse_name and oo pn pa oo2 have s_s4: "length s > length s4" by (auto dest!: is_parser_length is_cparser_length) show ?thesis proof (cases "y4 = ''/>''") case True from res True obtain y5 where pns: "parse_nodes s4 = Inr (y5, r)" by (cases "parse_nodes s4") (auto simp: return_def bind_def) from ind2[OF s_s4 pns] s_s4 show "length r ≤ length s" by simp next case False note res = res[simplified False, simplified] from res obtain y6 s6 where pns: "parse_nodes s4 = Inr (y6, s6)" by (cases "parse_nodes s4") (auto simp: return_def bind_def) note res = res[unfolded bind_def pns, simplified, unfolded bind_def] from res obtain y7 s7 where oo3: "exactly ''</'' s6 = Inr (y7, s7)" by (cases "exactly ''</'' s6", auto) note res = res[unfolded oo3, simplified, unfolded bind_def, simplified, unfolded bind_def] from res obtain y8 s8 where oo4: "exactly y2 s7 = Inr (y8, s8)" by (cases "exactly y2 s7", auto) note res = res[unfolded oo4 bind_def, simplified] from res obtain y10 s10 where oo5: "exactly ''>'' s8 = Inr (y10,s10)" by (cases "exactly ''>'' s8", auto simp: bind_def) note res = res[unfolded oo5 bind_def, simplified] from res obtain y11 s11 where pns2: "parse_nodes s10 = Inr (y11, s11)" by (cases "parse_nodes s10", auto simp: bind_def) note res = res[unfolded bind_def pns2, simplified] note one = is_parser_oneof [unfolded is_parser_def, rule_format] note exact = is_parser_exactly [unfolded is_parser_def, rule_format] from ind2[OF s_s4 pns] s_s4 exact[OF oo3] exact[OF oo4] have s_s7: "length s > length s8" unfolding is_parser_def by force with exact[OF oo5] have s_s10: "length s > length s10" by simp with ind2[OF s_s10 pns2] have s_s11: "length s > length s11" by simp then show "length r ≤ length s" using res by (auto simp: return_def) qed qed qed qed qed simp termination parse_nodes using parse_nodes_help by blast lemma parse_nodes [intro]: "is_parser parse_nodes" unfolding is_parser_def using parse_nodes_help by blast text ‹A more efficient variant of @{term "oneof [''/>'', ''>'']"}.› fun oneof_closed :: "string parser" where "oneof_closed (x # xs) = (if x = CHR ''>'' then Error_Monad.return (''>'', trim xs) else if x = CHR ''/'' ∧ (case xs of [] ⇒ False | y # ys ⇒ y = CHR ''>'') then Error_Monad.return (''/>'', trim (tl xs)) else err_expecting (''one of [/>, >]'') (x # xs))" | "oneof_closed xs = err_expecting (''one of [/>, >]'') xs" lemma oneof_closed: "oneof [''/>'', ''>''] = oneof_closed" (is "?l = ?r") proof (rule ext) fix xs have id: "''one of '' @ shows_list [''/>'', ''>''] [] = ''one of [/>, >]''" by (simp add: shows_list_list_def showsp_list_def pshowsp_list_def shows_list_gen_def shows_string_def shows_prec_list_def shows_list_char_def) note d = oneof_def oneof_aux.simps id show "?l xs = ?r xs" proof (cases xs) case Nil show ?thesis unfolding Nil d by simp next case (Cons x xs) note oCons = this show ?thesis proof (cases "x = CHR ''>''") case True show ?thesis unfolding Cons d True by simp next case False note oFalse = this show ?thesis proof (cases "x = CHR ''/''") case False show ?thesis unfolding Cons d using False oFalse by simp next case True show ?thesis proof (cases xs) case Nil show ?thesis unfolding Cons Nil d by auto next case (Cons y ys) show ?thesis unfolding oCons Cons d by simp qed qed qed qed qed lemma If_removal: "(λ e x. if b e then f e x else g e x) = (λ e. if b e then f e else g e)" by (intro ext) auto declare parse_nodes.simps [unfolded oneof_closed, unfolded If_removal [of "λ e. e = ''/>''"], code] definition parse_node :: "xml parser" where "parse_node = do { exactly ''<''; n ← parse_name; atts ← parse_attributes; e ← oneof [''/>'', ''>'']; if e = ''/>'' then return (XML n atts []) else do { cs ← parse_nodes; exactly ''</''; exactly n; exactly ''>''; return (XML n atts cs) } }" declare parse_node_def [unfolded oneof_closed, code] function parse_header :: "string list parser" where "parse_header ts = (if take 2 (trim ts) = ''<?'' then (do { h ← scan_upto ''?>''; hs ← parse_header; return (h # hs) }) ts else (do { spaces; return [] }) ts)" by pat_completeness auto termination parse_header proof fix ts y tsa assume "scan_upto ''?>'' ts = Inr (y, tsa)" with is_cparser_scan_upto have "length ts > length tsa" unfolding is_cparser_def by force then show "(tsa, ts) ∈ measure length" by simp qed simp definition "comment_error (x :: unit) = Code.abort (STR ''comment not terminated'') (λ _. Nil :: string)" definition "comment_error_hyphen (x :: unit) = Code.abort (STR ''double hyphen within comment'') (λ _. Nil :: string)" fun rc_aux where "rc_aux False (c # cs) = (if c = CHR ''<'' ∧ take 3 cs = ''!--'' then rc_aux True (drop 3 cs) else c # rc_aux False cs)" | "rc_aux True (c # cs) = (if c = CHR ''-'' ∧ take 1 cs = ''-'' then if take 2 cs = ''-'' then comment_error () else if take 2 cs = ''->'' then rc_aux False (drop 2 cs) else comment_error_hyphen () else rc_aux True cs)" | "rc_aux False [] = []" | "rc_aux True [] = comment_error ()" definition "remove_comments xs = rc_aux False xs" definition "rc_open_1 xs = rc_aux False xs" definition "rc_open_2 xs = rc_aux False (CHR ''<'' # xs)" definition "rc_open_3 xs = rc_aux False (CHR ''<'' # CHR ''!'' # xs)" definition "rc_open_4 xs = rc_aux False (CHR ''<'' # CHR ''!'' # CHR ''-'' # xs)" definition "rc_close_1 xs = rc_aux True xs" definition "rc_close_2 xs = rc_aux True (CHR ''-'' # xs)" definition "rc_close_3 xs = rc_aux True (CHR ''-'' # CHR ''-'' # xs)" lemma remove_comments_code[code]: "remove_comments xs = rc_open_1 xs" unfolding remove_comments_def rc_open_1_def .. lemma char_eq_via_integer_eq: "c = d ⟷ integer_of_char c = integer_of_char d" unfolding integer_of_char_def by simp lemma integer_of_char_simps[simp]: "integer_of_char (CHR ''<'') = 60" "integer_of_char (CHR ''>'') = 62" "integer_of_char (CHR ''/'') = 47" "integer_of_char (CHR ''!'') = 33" "integer_of_char (CHR ''-'') = 45" by code_simp+ lemma rc_open_close_simp[code]: "rc_open_1 (c # cs) = (if integer_of_char c = 60 then rc_open_2 cs else c # rc_open_1 cs)" "rc_open_1 [] = []" "rc_open_2 (c # cs) = (let ic = integer_of_char c in if ic = 33 then rc_open_3 cs else if ic = 60 then c # rc_open_2 cs else CHR ''<'' # c # rc_open_1 cs)" "rc_open_2 [] = ''<''" "rc_open_3 (c # cs) = (let ic = integer_of_char c in if ic = 45 then rc_open_4 cs else if ic = 60 then c # CHR ''!'' # rc_open_2 cs else CHR ''<'' # CHR ''!'' # c # rc_open_1 cs)" "rc_open_3 [] = ''<!''" "rc_open_4 (c # cs) = (let ic = integer_of_char c in if ic = 45 then rc_close_1 cs else if ic = 60 then c # CHR ''!'' # CHR ''-'' # rc_open_2 cs else CHR ''<'' # CHR ''!'' # CHR ''-'' # c # rc_open_1 cs)" "rc_open_4 [] = ''<!-''" "rc_close_1 (c # cs) = (if integer_of_char c = 45 then rc_close_2 cs else rc_close_1 cs)" "rc_close_1 [] = comment_error ()" "rc_close_2 (c # cs) = (if integer_of_char c = 45 then rc_close_3 cs else rc_close_1 cs)" "rc_close_2 [] = comment_error ()" "rc_close_3 (c # cs) = (if integer_of_char c = 62 then rc_open_1 cs else comment_error_hyphen ())" "rc_close_3 [] = comment_error ()" unfolding rc_open_1_def rc_open_2_def rc_open_3_def rc_open_4_def rc_close_1_def rc_close_2_def rc_close_3_def by (simp_all add: char_eq_via_integer_eq Let_def) definition parse_doc :: "xmldoc parser" where "parse_doc = do { update_tokens remove_comments; h ← parse_header; xml ← parse_node; eoi; return (XMLDOC h xml) }" definition doc_of_string :: "string ⇒ string + xmldoc" where "doc_of_string s = do { (doc, _) ← parse_doc s; Error_Monad.return doc }" subsection ‹More efficient code equations› lemma trim_code[code]: "trim = dropWhile (λ c. let ci = integer_of_char c in if ci ≥ 34 then False else ci = 32 ∨ ci = 10 ∨ ci = 9 ∨ ci = 13)" unfolding trim_def apply (rule arg_cong[of _ _ dropWhile], rule ext) unfolding Let_def in_set_simps less_eq_char_code char_eq_via_integer_eq by (auto simp: integer_of_char_def Let_def) fun parse_text_main :: "string ⇒ string ⇒ string × string" where "parse_text_main [] res = ('''', rev (trim res))" | "parse_text_main (c # cs) res = (if c = CHR ''<'' then (c # cs, rev (trim res)) else parse_text_main cs (c # res))" definition "parse_text_impl cs = (case parse_text_main (trim cs) '''' of (rem, txt) ⇒ if txt = [] then Inr (None, rem) else Inr (Some txt, rem))" lemma parse_text_main: "parse_text_main xs ys = (dropWhile ((≠) CHR ''<'') xs, rev (trim (rev (takeWhile ((≠) CHR ''<'') xs) @ ys)))" by (induct xs arbitrary: ys, auto) lemma many_take_drop: "many f xs = Inr (takeWhile f xs, dropWhile f xs)" by (induct f xs rule: many.induct, auto) lemma trim_takeWhile_inside: "trim (takeWhile ((≠) CHR ''<'') cs) = takeWhile ((≠) CHR ''<'') (trim cs)" unfolding trim_def by (induct cs, auto) lemma trim_dropWhile_inside: "dropWhile ((≠) CHR ''<'') cs = dropWhile ((≠) CHR ''<'') (trim cs)" unfolding trim_def by (induct cs, auto) declare [[code drop: parse_text]] lemma parse_text_code[code]: "parse_text cs = parse_text_impl cs" proof - define xs where "xs = trim cs" show ?thesis unfolding parse_text_def unfolding Parser_Monad.bind_def Error_Monad.bind_def unfolding Let_def unfolding many_take_drop sum.simps split unfolding trim_takeWhile_inside trim_dropWhile_inside[of cs] Parser_Monad.return_def unfolding parse_text_impl_def unfolding xs_def[symmetric] unfolding parse_text_main split apply (simp, intro conjI impI, force simp: trim_def) proof define ys where "ys = takeWhile ((≠) CHR ''<'') xs" assume "trim (rev (takeWhile ((≠) CHR ''<'') xs)) = []" and "takeWhile ((≠) CHR ''<'') xs ≠ []" hence "trim (rev ys) = []" and "ys ≠ []" unfolding ys_def by auto from this(1) have ys: "⋀ y. y ∈ set ys ⟹ y ∈ set wspace" unfolding trim_def by simp with ‹ys ≠ []› show False unfolding ys_def xs_def trim_def by (metis (no_types, lifting) dropWhile_eq_Nil_conv dropWhile_idem trim_def trim_takeWhile_inside xs_def) qed qed declare [[code drop: parse_text_main]] lemma parse_text_main_code[code]: "parse_text_main [] res = ('''', rev (trim res))" "parse_text_main (c # cs) res = (if integer_of_char c = 60 then (c # cs, rev (trim res)) else parse_text_main cs (c # res))" unfolding parse_text_main.simps by (auto simp: char_eq_via_integer_eq) lemma exactly_head: "exactly [c] (c # cs) = Inr ([c],trim cs)" unfolding exactly_def by simp lemma take_1_test: "(case cs of [] ⇒ False | c # x ⇒ c = CHR ''/'') = (take 1 cs = ''/'')" by (cases cs, auto) definition "exactly_close = exactly ''>''" definition "exactly_end = exactly ''</''" lemma exactly_close_code[code]: "exactly_close [] = err_expecting (''\">\"'') []" "exactly_close (c # cs) = (if integer_of_char c = 62 then Inr (''>'', trim cs) else err_expecting (''\">\"'') (c # cs))" unfolding exactly_close_def exactly_def exactly_aux.simps by (auto simp: char_eq_via_integer_eq) lemma exactly_end_code[code]: "exactly_end [] = err_expecting (''\"</\"'') []" "exactly_end [c] = err_expecting (''\"</\"'') [c]" "exactly_end (c # d # cs) = (if integer_of_char c = 60 ∧ integer_of_char d = 47 then Inr (''</'', trim cs) else err_expecting (''\"</\"'') (c # d # cs))" unfolding exactly_end_def exactly_def exactly_aux.simps by (auto simp: char_eq_via_integer_eq) fun oneof_closed_combined :: "'a parser ⇒ 'a parser ⇒ 'a parser" where "oneof_closed_combined p q (x # xs) = (if x = CHR ''>'' then q (trim xs) else if x = CHR ''/'' ∧ (case xs of [] ⇒ False | y # ys ⇒ y = CHR ''>'') then p (trim (tl xs)) else err_expecting (''one of [/>, >]'') (x # xs))" | "oneof_closed_combined p q xs = err_expecting (''one of [/>, >]'') xs" lemma oneof_closed_combined: "oneof_closed_combined p q = (oneof_closed ⤜ (λe. if e = ''/>'' then p else q))" (is "?l = ?r") proof (intro ext) fix xs show "?l xs = ?r xs" unfolding Parser_Monad.bind_def Error_Monad.bind_def by (cases xs, auto split: sum.splits simp: err_expecting_def) qed declare [[code drop: oneof_closed_combined]] lemma oneof_closed_combined_code[code]: "oneof_closed_combined p q [] = err_expecting (''one of [/>, >]'') ''''" "oneof_closed_combined p q (x # xs) = (let xi = integer_of_char x in (if xi = 62 then q (trim xs) else (if xi = 47 then (case xs of [] ⇒ err_expecting (''one of [/>, >]'') (x # xs) | y # ys ⇒ if integer_of_char y = 62 then p (trim ys) else err_expecting (''one of [/>, >]'') (x # xs)) else err_expecting (''one of [/>, >]'') (x # xs))))" unfolding oneof_closed_combined.simps Let_def by (auto split: list.splits simp: char_eq_via_integer_eq) lemmas parse_nodes_current_code = parse_nodes.simps[unfolded oneof_closed, unfolded If_removal [of "λ e. e = ''/>''"]] lemma parse_nodes_pre_code: "parse_nodes (c # cs) = (if c = CHR ''<'' then if (case cs of [] ⇒ False | c # _ ⇒ c = CHR ''/'') then Parser_Monad.return [] (c # cs) else (parse_name ⤜ (λn. parse_attributes ⤜ (λatts. oneof_closed_combined (parse_nodes ⤜ (λcs. Parser_Monad.return (XML n atts [] # cs))) (parse_nodes ⤜ (λcs. exactly_end ⤜ (λ_. exactly n ⤜ (λ_. exactly_close ⤜ (λ_. parse_nodes ⤜ (λns. Parser_Monad.return (XML n atts cs # ns)))))))))) (trim cs) else (parse_text ⤜ (λt. parse_nodes ⤜ (λns. Parser_Monad.return (XML_text (the t) # ns)))) (c # cs))" unfolding parse_nodes_current_code[of "c # cs"] exactly_close_def exactly_end_def oneof_closed_combined by (simp_all add: Parser_Monad.bind_def exactly_head take_1_test) declare [[code drop: parse_nodes]] lemma parse_nodes_code[code]: "parse_nodes [] = Parser_Monad.return [] ''''" "parse_nodes (c # cs) = (if integer_of_char c = 60 then if (case cs of [] ⇒ False | d # _ ⇒ d = CHR ''/'') then Parser_Monad.return [] (c # cs) else (parse_name ⤜ (λn. parse_attributes ⤜ (λatts. oneof_closed_combined (parse_nodes ⤜ (λcs. Parser_Monad.return (XML n atts [] # cs))) (parse_nodes ⤜ (λcs. exactly_end ⤜ (λ_. exactly n ⤜ (λ_. exactly_close ⤜ (λ_. parse_nodes ⤜ (λns. Parser_Monad.return (XML n atts cs # ns)))))))))) (trim cs) else (parse_text ⤜ (λt. parse_nodes ⤜ (λns. Parser_Monad.return (XML_text (the t) # ns)))) (c # cs))" unfolding parse_nodes_pre_code unfolding Let_def by (auto simp: char_eq_via_integer_eq) declare [[code drop: parse_attributes]] lemma parse_attributes_code[code]: "parse_attributes [] = Error_Monad.return ([], [])" "parse_attributes (c # s) = (let ic = integer_of_char c in (if ic = 47 ∨ ic = 62 then Inr ([], c # s) else (parse_name ⤜ (λk. exactly ''='' ⤜ (λ_. parse_attribute_value ⤜ (λv. parse_attributes ⤜ (λatts. Parser_Monad.return ((k, v) # atts)))))) (c # s)))" unfolding parse_attributes.simps unfolding Let_def in_set_simps by (auto simp: char_eq_via_integer_eq) declare [[code drop: is_letter]] lemma is_letter_code[code]: "is_letter c = (let ci = integer_of_char c in (97 ≤ ci ∧ ci ≤ 122 ∨ 65 ≤ ci ∧ ci ≤ 90 ∨ 48 ≤ ci ∧ ci ≤ 59 ∨ ci = 95 ∨ ci = 38 ∨ ci = 45))" proof - define d where "d = integer_of_char c" have "d ≤ 59 ⟷ (d ≤ 57 ∨ d = 58 ∨ d = 59)" for d :: int by auto hence "d ≤ 59 ⟷ (d ≤ 57 ∨ d = 58 ∨ d = 59)" by (metis int_of_integer_numeral integer_eqI integer_less_eq_iff verit_comp_simplify1(2)) thus ?thesis unfolding is_letter_pre_code in_set_simps Let_def d_def less_eq_char_code char_eq_via_integer_eq unfolding integer_of_char_def by auto qed declare spaces_def[code_unfold del] lemma spaces_code[code]: "spaces cs = Inr ((), trim cs)" unfolding spaces_def trim_def manyof_def many_take_drop Parser_Monad.bind_def Parser_Monad.return_def by auto declare many_letters[code del, code_unfold del] fun many_letters_main where "many_letters_main [] = ([], [])" | "many_letters_main (c # cs) = (if is_letter c then case many_letters_main cs of (ds,es) ⇒ (c # ds, es) else ([], c # cs))" lemma many_letters_code[code]: "many_letters cs = Inr (many_letters_main cs)" unfolding many_letters_def manyof_def many_take_drop by (rule arg_cong[of _ _ Inr], rule sym, induct cs, auto simp: is_letter_def) lemma parse_name_code[code]: "parse_name s = (case many_letters_main s of (n, ts) ⇒ if n = [] then Inl (''expected letter '' @ letters @ '' but first symbol is \"'' @ take 1 s @ ''\"'') else Inr (n, trim ts))" unfolding parse_name_def many_letters_code spaces_code Parser_Monad.bind_def Error_Monad.bind_def sum.simps split Parser_Monad.error_def Parser_Monad.return_def if_distribR by auto end