Theory KMP
theory KMP
imports
Theory_Of_Lists
begin
hide_const abs
section‹ Knuth-Morris-Pratt matching according to Bird \label{sec:KMP} ›
subsection‹ Step 1: Specification \label{sec:KMP:specification} ›
text‹
We begin with the specification of string matching given by \<^citet>‹‹Chapter~16› in
"Bird:PearlsofFAD:2010"›. (References to ``Bird'' in the following are to this text.) Note that
we assume @{const ‹eq›} has some nice properties (see \S\ref{sec:equality}) and
use strict lists.
›
fixrec endswith :: "[:'a::Eq_def:] → [:'a:] → tr" where
[simp del]: "endswith⋅pat = selem⋅pat oo stails"
fixrec matches :: "[:'a::Eq_def:] → [:'a:] → [:Integer:]" where
[simp del]: "matches⋅pat = smap⋅slength oo sfilter⋅(endswith⋅pat) oo sinits"
text‹
Bird describes @{term "matches⋅pat⋅xs"} as returning ``a list of integers ‹p› such that ‹pat› is a
suffix of @{term "stake⋅p⋅xs"}.''
The following examples illustrate this behaviour:
›
lemma "matches⋅[::]⋅[::] = [:0:]"
unfolding matches.unfold endswith.unfold by simp
lemma "matches⋅[::]⋅[:10::Integer, 20, 30:] = [:0, 1, 2, 3:]"
unfolding matches.unfold endswith.unfold by simp
lemma "matches⋅[:1::Integer,2,3,1,2:]⋅[:1,2,1,2,3,1,2,3,1,2:] = [:7, 10:]"
unfolding matches.unfold endswith.unfold
by (simp add: sfilter_scons_let del: sfilter_strict_scons sfilter.simps)
lemma endswith_strict[simp]:
"endswith⋅⊥ = ⊥"
"endswith⋅pat⋅⊥ = ⊥"
by (fixrec_simp; simp add: cfun_eq_iff)+
lemma matches_strict[simp]:
"matches⋅⊥ = ⊥"
"matches⋅pat⋅⊥ = ⊥"
by (fixrec_simp; clarsimp simp: cfun_eq_iff)+
text‹
Bird's strategy for deriving KMP from this specification is encoded in the following lemmas:
if we can rewrite @{const ‹endswith›} as a composition of a predicate with a
@{const ‹sfoldl›}, then we can rewrite @{const ‹matches›} into a @{const ‹sscanl›}.
›
lemma fork_sfoldl:
shows "sfoldl⋅f1⋅z1 && sfoldl⋅f2⋅z2 = sfoldl⋅(Λ (a, b) z. (f1⋅a⋅z, f2⋅b⋅z))⋅(z1, z2)" (is "?lhs = ?rhs")
proof(rule cfun_eqI)
fix xs show "?lhs⋅xs = ?rhs⋅xs"
by (induct xs arbitrary: z1 z2) simp_all
qed
lemma smap_sfilter_split_cfcomp:
assumes "f⋅⊥ = ⊥"
assumes "p⋅⊥ = ⊥"
shows "smap⋅f oo sfilter⋅(p oo g) = smap⋅cfst oo sfilter⋅(p oo csnd) oo smap⋅(f && g)" (is "?lhs = ?rhs")
proof(rule cfun_eqI)
fix xs show "?lhs⋅xs = ?rhs⋅xs"
using assms by (induct xs) (simp_all add: If2_def[symmetric] split: If2_splits)
qed
lemma Bird_strategy:
assumes endswith: "endswith⋅pat = p oo sfoldl⋅op⋅z"
assumes step: "step = (Λ (n, x) y. (n + 1, op⋅x⋅y))"
assumes "p⋅⊥ = ⊥"
shows "matches⋅pat = smap⋅cfst oo sfilter⋅(p oo csnd) oo sscanl⋅step⋅(0, z)"
apply (simp add: matches.simps assoc_oo endswith)
apply (subst smap_sfilter_split_cfcomp, fastforce, fact)
apply (subst slength_sfoldl)
apply (subst fork_sfoldl)
apply (simp add: oo_assoc[symmetric])
apply (subst sinits_sscanl)
apply (fold step)
apply (rule refl)
done
text‹
Bird proceeds by reworking @{const ‹endswith›} into the form required by @{thm [source] "Bird_strategy"}.
This is eased by an alternative definition of @{const ‹endswith›}.
›
lemma sfilter_supto:
assumes "0 ≤ d"
shows "sfilter⋅(Λ x. le⋅(MkI⋅n - x)⋅(MkI⋅d))⋅(supto⋅(MkI⋅m)⋅(MkI⋅n))
= supto⋅(MkI⋅(if m ≤ n - d then n - d else m))⋅(MkI⋅n)" (is "?sfilterp⋅?suptomn = _")
proof(cases "m ≤ n - d")
case True
moreover
from True assms have "?sfilterp⋅?suptomn = ?sfilterp⋅(supto⋅(MkI⋅m)⋅(MkI⋅(n - d - 1)) :@ supto⋅(MkI⋅(n - d))⋅(MkI⋅n))"
using supto_split1 by auto
moreover from True assms have "?sfilterp⋅(supto⋅(MkI⋅m)⋅(MkI⋅(n - d - 1))) = [::]" by auto
ultimately show ?thesis by (clarsimp intro!: trans[OF sfilter_cong[OF refl] sfilter_const_TT])
next
case False then show ?thesis
by (clarsimp intro!: trans[OF sfilter_cong[OF refl] sfilter_const_TT])
qed
lemma endswith_eq_sdrop: "endswith⋅pat⋅xs = eq⋅pat⋅(sdrop⋅(slength⋅xs - slength⋅pat)⋅xs)"
proof(cases "pat = ⊥" "xs = ⊥" rule: bool.exhaust[case_product bool.exhaust])
case False_False then show ?thesis
by (cases "endswith⋅pat⋅xs";
simp only: endswith.simps cfcomp2 stails_sdrop';
force simp: zero_Integer_def one_Integer_def elim: slengthE)
qed simp_all
lemma endswith_def2:
shows "endswith⋅pat⋅xs = eq⋅pat⋅(shead⋅(sfilter⋅(Λ x. prefix⋅x⋅pat)⋅(stails⋅xs)))" (is "?lhs = ?rhs")
proof(cases "pat = ⊥" "xs = ⊥" rule: bool.exhaust[case_product bool.exhaust])
case False_False
from False_False obtain patl xsl where *: "slength⋅xs = MkI⋅xsl" "0 ≤ xsl" "slength⋅pat = MkI⋅patl" "0 ≤ patl"
by (meson Integer.exhaust slength_bottom_iff slength_ge_0)
let ?patl_xsl = "if patl ≤ xsl then xsl - patl else 0"
have "?rhs = eq⋅pat⋅(shead⋅(sfilter⋅(Λ x. le⋅(slength⋅x)⋅(slength⋅pat) andalso prefix⋅x⋅pat)⋅(stails⋅xs)))"
by (subst prefix_slength_strengthen) simp
also have "… = eq⋅pat⋅(shead⋅(sfilter⋅(Λ x. prefix⋅x⋅pat)⋅(sfilter⋅(Λ x. le⋅(slength⋅x)⋅(slength⋅pat))⋅(stails⋅xs))))"
by (simp add: sfilter_sfilter')
also have "… = eq⋅pat⋅(shead⋅(smap⋅(Λ k. sdrop⋅k⋅xs)⋅(sfilter⋅(Λ k. prefix⋅(sdrop⋅k⋅xs)⋅pat)⋅(sfilter⋅(Λ k. le⋅(slength⋅(sdrop⋅k⋅xs))⋅(MkI⋅patl))⋅(supto⋅(MkI⋅0)⋅(MkI⋅xsl))))))"
using ‹slength⋅xs = MkI⋅xsl› ‹slength⋅pat = MkI⋅patl›
by (simp add: stails_sdrop' sfilter_smap' cfcomp1 zero_Integer_def)
also have "… = eq⋅pat⋅(shead⋅(smap⋅(Λ k. sdrop⋅k⋅xs)⋅(sfilter⋅(Λ k. prefix⋅(sdrop⋅k⋅xs)⋅pat)⋅(sfilter⋅(Λ k. le⋅(MkI⋅xsl - k)⋅(MkI⋅patl))⋅(supto⋅(MkI⋅0)⋅(MkI⋅xsl))))))"
using ‹slength⋅xs = MkI⋅xsl›
by (subst (2) sfilter_cong[where p'="Λ x. le⋅(MkI⋅xsl - x)⋅(MkI⋅patl)"]; fastforce simp: zero_Integer_def)
also have "… = If prefix⋅(sdrop⋅(MkI⋅?patl_xsl)⋅xs)⋅pat
then eq⋅pat⋅(sdrop⋅(MkI⋅?patl_xsl)⋅xs)
else eq⋅pat⋅(shead⋅(smap⋅(Λ k. sdrop⋅k⋅xs)⋅(sfilter⋅(Λ x. prefix⋅(sdrop⋅x⋅xs)⋅pat)⋅(supto⋅(MkI⋅(?patl_xsl + 1))⋅(MkI⋅xsl)))))"
using False_False ‹0 ≤ xsl› ‹0 ≤ patl›
by (subst sfilter_supto) (auto simp: If_distr one_Integer_def)
also have "… = ?lhs" (is "If ?c then _ else ?else = _")
proof(cases ?c)
case TT with ‹slength⋅xs = MkI⋅xsl› ‹slength⋅pat = MkI⋅patl›
show ?thesis by (simp add: endswith_eq_sdrop sdrop_neg zero_Integer_def)
next
case FF
have "?else = shead⋅(smap⋅(Λ x. eq⋅pat⋅(sdrop⋅x⋅xs))⋅(sfilter⋅(Λ x. prefix⋅(sdrop⋅x⋅xs)⋅pat)⋅(supto⋅(MkI⋅(?patl_xsl + 1))⋅(MkI⋅xsl))))"
using FF by (subst shead_smap_distr[where f="eq⋅pat", symmetric]) (auto simp: cfcomp1)
also have "… = shead⋅(smap⋅(Λ x. seq⋅x⋅FF)⋅(sfilter⋅(Λ x. prefix⋅(sdrop⋅x⋅xs)⋅pat)⋅(supto⋅(MkI⋅(?patl_xsl + 1))⋅(MkI⋅xsl))))"
using False_False * by (subst smap_cong[OF refl, where f'="Λ x. seq⋅x⋅FF"]) (auto simp: zero_Integer_def split: if_splits)
also from * FF have "… = ?lhs"
apply (auto 0 0 simp: shead_smap_distr seq_conv_if endswith_eq_sdrop zero_Integer_def dest!: prefix_FF_not_snilD)
apply (metis (full_types) le_MkI_MkI linorder_not_less order_refl prefix_FF_not_snilD sdrop_all zless_imp_add1_zle)
using FF apply fastforce
apply (metis add.left_neutral le_MkI_MkI linorder_not_less order_refl prefix_FF_not_snilD sdrop_0(1) sdrop_all zero_Integer_def zless_imp_add1_zle)
done
finally show ?thesis using FF by simp
qed (simp add: False_False)
finally show ?thesis ..
qed simp_all
text‹
Bird then generalizes @{term ‹sfilter⋅(Λ x. prefix⋅x⋅pat) oo stails›} to @{term ‹split›},
where ``‹split⋅pat⋅xs› splits ‹pat› into two lists ‹us› and ‹vs› so that
@{prop ‹us :@ vs = pat›} and ‹us› is the longest suffix of ‹xs› that is a prefix of ‹pat›.''
›
fixrec split :: "[:'a::Eq_def:] → [:'a:] → [:'a:] × [:'a:]" where
[simp del]: "split⋅pat⋅xs = If prefix⋅xs⋅pat then (xs, sdrop⋅(slength⋅xs)⋅pat) else split⋅pat⋅(stail⋅xs)"
lemma split_strict[simp]:
shows "split⋅⊥ = ⊥"
and "split⋅pat⋅⊥ = ⊥"
by fixrec_simp+
lemma split_bottom_iff[simp]: "(split⋅pat⋅xs = ⊥) ⟷ (pat = ⊥ ∨ xs = ⊥)"
by (cases "pat = ⊥"; clarsimp) (induct xs; subst split.unfold; simp)
lemma split_snil[simp]:
assumes "pat ≠ ⊥"
shows "split⋅pat⋅[::] = ([::], pat)"
using assms by fixrec_simp
lemma split_pattern:
assumes "xs ≠ ⊥"
assumes "split⋅pat⋅xs = (us, vs)"
shows "us :@ vs = pat"
using assms
proof(cases "pat = ⊥", simp, induct xs arbitrary: us vs)
case snil then show ?case by (subst (asm) split.unfold) simp
next
case (scons x xs) then show ?case
by (subst (asm) (3) split.unfold)
(fastforce dest: prefix_sdrop_slength simp: If2_def[symmetric] split: If2_splits)
qed simp
lemma endswith_split:
shows "endswith⋅pat = snull oo csnd oo split⋅pat"
proof(rule cfun_eqI)
fix xs show "endswith⋅pat⋅xs = (snull oo csnd oo split⋅pat)⋅xs"
proof(cases "pat = ⊥", simp, induct xs)
case (scons x xs) then show ?case
unfolding endswith_def2
by (subst split.unfold)
(fastforce dest: prefix_sdrop_prefix_eq simp: If2_def[symmetric] If_distr snull_eq_snil split: If2_splits)
qed (simp_all add: snull_eq_snil endswith.simps)
qed
lemma split_length_lt:
assumes "pat ≠ ⊥"
assumes "xs ≠ ⊥"
shows "lt⋅(slength⋅(prod.fst (split⋅pat⋅xs)))⋅(slength⋅xs + 1) = TT"
using assms
proof(induct xs)
case (scons x xs) then show ?case
by (subst split.unfold)
(auto simp: If2_def[symmetric] one_Integer_def split: If2_splits elim!: slengthE elim: lt_trans)
qed simp_all
text‹
The predicate ‹p› required by @{thm [source] "Bird_strategy"} is therefore ‹snull oo csnd›. It
remains to find ‹op› and ‹z› such that:
▪ @{term ‹split⋅pat⋅[::] = z›}
▪ @{term ‹split⋅pat⋅(xs :@ [:x:]) = op⋅(split⋅pat⋅xs)⋅x›}
›
text‹
so that @{term ‹split = sfoldl⋅op⋅z›}.
We obtain @{term ‹z = ([::], pat)›} directly from the definition of @{term ‹split›}.
Bird derives ‹op› on the basis of this crucial observation:
›
lemma split_snoc:
shows "split⋅pat⋅(xs :@ [:x:]) = split⋅pat⋅(cfst⋅(split⋅pat⋅xs) :@ [:x:])"
proof(cases "pat = ⊥", simp, cases "x = ⊥", simp, induct xs)
case (scons x xs) then show ?case
apply -
apply (subst (1 3) split.unfold)
apply (clarsimp simp: If2_def[symmetric] split: If2_splits; intro conjI impI)
apply (subst split.unfold; fastforce)
apply (subst split.unfold; fastforce)
apply (simp add: append_prefixD)
done
qed simp_all
fixrec
op :: "[:'a::Eq_def:] → [:'a:] × [:'a:] → 'a → [:'a:] × [:'a:]"
where
[simp del]:
"op⋅pat⋅(us, vs)⋅x =
( If prefix⋅[:x:]⋅vs then (us :@ [:x:], stail⋅vs)
else If snull⋅us then ([::], pat)
else op⋅pat⋅(split⋅pat⋅(stail⋅us))⋅x )"
lemma op_strict[simp]:
"op⋅pat⋅⊥ = ⊥"
"op⋅pat⋅(us, ⊥) = ⊥"
"op⋅pat⋅usvs⋅⊥ = ⊥"
by fixrec_simp+
text‹
Bird demonstrates that @{const ‹op›} is partially correct wrt @{const ‹split›}, i.e.,
@{prop "op⋅pat⋅(split⋅pat⋅xs)⋅x ⊑ split⋅pat⋅(xs :@ [:x:])"}. For total correctness we
essentially prove that @{const ‹op›} terminates on well-defined arguments with an inductive argument.
›
lemma op_induct[case_names step]:
fixes usvs:: "[:'a:] × 'b"
assumes step: "⋀usvs. (⋀usvs'. lt⋅(slength⋅(cfst⋅usvs'))⋅(slength⋅(cfst⋅usvs)) = TT ⟹ P usvs') ⟹ P usvs"
shows "P usvs"
proof(induct usvs rule: wf_induct)
let ?r = "{ (usvs', usvs) |(usvs :: [:'a:] × 'b) (usvs' :: [:'a:] × 'b). lt⋅(slength⋅(cfst⋅usvs'))⋅(slength⋅(cfst⋅usvs)) = TT }"
show "wf ?r"
proof(rule wf_subset[OF wf_inv_image[where f="λ(x, _). slength⋅x", OF wf_subset[OF wf_Integer_ge_less_than[where d=0]]]])
let ?rslen = "{ (slength⋅us', slength⋅us) |(us :: [:'a:]) (us' :: [:'a:]). lt⋅(slength⋅us')⋅(slength⋅us) = TT }"
show "?rslen ⊆ Integer_ge_less_than 0"
apply (clarsimp simp: Integer_ge_less_than_def zero_Integer_def)
apply (metis Integer.exhaust dist_eq_tr(4) dist_eq_tr(6) lt_Integer_bottom_iff lt_MkI_MkI slength_ge_0)
done
show "?r ⊆ inv_image ?rslen (λ(x, _). slength⋅x)" by (auto 0 3)
qed
fix usvs
assume "∀usvs'. (usvs', usvs) ∈ ?r ⟶ P usvs'"
then show "P usvs"
by - (rule step; clarsimp; metis eq_fst_iff)
qed
lemma op_induct'[case_names step]:
assumes step: "⋀us. (⋀us'. lt⋅(slength⋅us')⋅(slength⋅us) = TT ⟹ P us') ⟹ P us"
shows "P us"
by (rule op_induct[where P="P ∘ prod.fst" and usvs="(us, vs)" for vs::unit, simplified])
(fastforce intro: step)
lemma split_snoc_op:
"split⋅pat⋅(xs :@ [:x:]) = op⋅pat⋅(split⋅pat⋅xs)⋅x"
proof(induct "split⋅pat⋅xs" arbitrary: xs rule: op_induct)
case (step xs) show ?case
proof(cases "pat = ⊥" "xs = ⊥" "x = ⊥" rule: bool.exhaust[case_product bool.exhaust bool.exhaust])
case False_False_False
obtain us vs where *: "split⋅pat⋅xs = (us, vs)" by fastforce
from False_False_False * have **: "prefix⋅(us :@ [:x:])⋅pat = prefix⋅[:x:]⋅vs"
using split_pattern same_prefix_prefix sappend_bottom_iff by blast
from False_False_False * **
have ***: "sdrop⋅(slength⋅(us :@ [:x:]))⋅pat = stail⋅vs" if "prefix⋅(us :@ [:x:])⋅pat = TT"
using sdrop_sappend_same[where xs="us :@ [:x:]"] that
by (cases vs; clarsimp) (fastforce dest!: split_pattern)
from False_False_False * ** *** show ?thesis
apply -
apply (subst split_snoc)
apply (subst split.unfold)
apply (subst op.unfold)
apply (fastforce simp: If2_def[symmetric] snull_FF_conv split: If2_splits intro: step split_length_lt)
done
qed simp_all
qed
lemma split_sfoldl_op:
assumes "pat ≠ ⊥"
shows "sfoldl⋅(op⋅pat)⋅([::], pat) = split⋅pat" (is "?lhs = ?rhs")
proof -
have "?lhs⋅xs = ?rhs⋅xs" for xs
using assms by (induct xs rule: srev_induct) (simp_all add: split_snoc_op)
then show ?thesis by (simp add: cfun_eq_iff)
qed
lemma matches_op:
shows "matches⋅pat = smap⋅cfst oo sfilter⋅(snull oo csnd oo csnd)
oo sscanl⋅(Λ (n, usvs) x. (n + 1, op⋅pat⋅usvs⋅x))⋅(0, ([::], pat))" (is "?lhs = ?rhs")
proof(cases "pat = ⊥")
case True
then have "?lhs⋅xs = ?rhs⋅xs" for xs by (cases xs; clarsimp)
then show ?thesis by (simp add: cfun_eq_iff)
next
case False then show ?thesis
apply (subst (2) oo_assoc)
apply (rule Bird_strategy)
apply (simp_all add: endswith_split split_sfoldl_op oo_assoc)
done
qed
text‹
Using @{thm [source] "split_sfoldl_op"} we can rewrite @{const ‹op›} into a more perspicuous form
that exhibits how KMP handles the failure of the text to continue matching the pattern:
›
fixrec
op' :: "[:'a::Eq_def:] → [:'a:] × [:'a:] → 'a → [:'a:] × [:'a:]"
where
[simp del]:
"op'⋅pat⋅(us, vs)⋅x =
( If prefix⋅[:x:]⋅vs then (us :@ [:x:], stail⋅vs)
else If snull⋅us then ([::], pat)
else sfoldl⋅(op'⋅pat)⋅([::], pat)⋅(stail⋅us :@ [:x:])
)"
text‹
Intuitively if ‹x› continues the pattern match then we
extend the @{const ‹split›} of ‹pat›
recorded in ‹us› and ‹vs›. Otherwise we
need to find a prefix of ‹pat› to continue matching
with. If we have yet to make any progress (i.e., ‹us =
[::]›) we restart with the entire ‹pat› (aka
‹z›) and discard ‹x›. Otherwise, because a
match cannot begin with @{term ‹us :@ [:x:]›}, we @{const
‹split›} ‹pat› (aka ‹z›) by
iterating @{const ‹op'›} over @{term
‹stail⋅us :@ [:x:]›}. The remainder of the
development is about memoising this last computation.
This is not yet the full KMP algorithm as it lacks what we call the
`K' optimisation, which we add in \S\ref{sec:KMP:data_refinement}.
Note that a termination proof for @{const "op'"} in HOL is tricky due
to its use of higher-order nested recursion via @{const
‹sfoldl›}.
›
lemma op'_strict[simp]:
"op'⋅pat⋅⊥ = ⊥"
"op'⋅pat⋅(us, ⊥) = ⊥"
"op'⋅pat⋅usvs⋅⊥ = ⊥"
by fixrec_simp+
lemma sfoldl_op'_strict[simp]:
"op'⋅pat⋅(sfoldl⋅(op'⋅pat)⋅(us, ⊥)⋅xs)⋅x = ⊥"
by (induct xs arbitrary: x rule: srev_induct) simp_all
lemma op'_op:
shows "op'⋅pat⋅usvs⋅x = op⋅pat⋅usvs⋅x"
proof(cases "pat = ⊥" "x = ⊥" rule: bool.exhaust[case_product bool.exhaust])
case True_False then show ?thesis
apply (subst op'.unfold)
apply (subst op.unfold)
apply simp
done
next
case False_False then show ?thesis
proof(induct usvs arbitrary: x rule: op_induct)
case (step usvs x)
have *: "sfoldl⋅(op'⋅pat)⋅([::], pat)⋅xs = sfoldl⋅(op⋅pat)⋅([::], pat)⋅xs"
if "lt⋅(slength⋅xs)⋅(slength⋅(cfst⋅usvs)) = TT" for xs
using that
proof(induct xs rule: srev_induct)
case (ssnoc x' xs')
from ssnoc(1,2,4) have "lt⋅(slength⋅xs')⋅(slength⋅(cfst⋅usvs)) = TT"
using lt_slength_0(2) lt_trans by auto
moreover
from step(2) ssnoc(1,2,4) have "lt⋅(slength⋅(cfst⋅(split⋅pat⋅xs')))⋅(slength⋅(cfst⋅usvs)) = TT"
using lt_trans split_length_lt by (auto 10 0)
ultimately show ?case by (simp add: ssnoc.hyps split_sfoldl_op split_snoc_op step)
qed simp_all
from step.prems show ?case
apply (subst op'.unfold)
apply (subst op.unfold)
apply (clarsimp simp: If2_def[symmetric] snull_FF_conv split_sfoldl_op[symmetric] * split: If2_splits)
apply (clarsimp simp: split_sfoldl_op step split_length_lt)
done
qed
qed simp_all
subsection‹ Step 2: Data refinement and the `K' optimisation \label{sec:KMP:data_refinement} ›
text‹
Bird memoises the restart computation in @{const ‹op'›} in two steps. The first reifies
the control structure of @{const ‹op'›} into a non-wellfounded tree, which we discuss here. The
second increases the sharing in this tree; see \S\ref{sec:KMP:increase_sharing}.
Briefly, we cache the @{term ‹sfoldl⋅(op'⋅pat)⋅([::], pat)⋅(stail⋅us :@ [:x:])›}
computation in @{const ‹op'›} by finding a ``representation'' type @{typ "'t"}
for the ``abstract'' type @{typ ‹[:'a::Eq_def:] × [:'a:]›}, a
pair of functions @{term ‹rep :: [:'a::Eq_def:] × [:'a:] → 't›},
@{term ‹abs :: 't → [:'a::Eq_def:] × [:'a:]›} where @{prop ‹abs oo rep = ID›}, and then
finding a derived form of @{const ‹op'›} that works on @{typ "'t"} rather
than @{typ "[:'a::Eq_def:] × [:'a:]"}. We also take the opportunity to add the `K' optimisation in the form of the @{term ‹next›}
function.
As such steps are essentially @{emph ‹deus ex machina›}, we try to provide some intuition
after showing the new definitions.
›
domain 'a tree
= Null
| Node (label :: 'a) (lazy left :: "'a tree") (lazy right :: "'a tree")
lemma tree_injects'[simp]:
"(Node⋅a⋅l⋅r = Node⋅a'⋅l'⋅r') ⟷ (a = a' ∧ (a ≠ ⊥ ⟶ l = l' ∧ r = r'))"
by (cases "a = ⊥"; clarsimp)
lemma match_Null_match_Node_tree_case: "match_Null⋅t⋅k1 +++ match_Node⋅t⋅k2 = tree_case⋅k1⋅k2⋅t"
by (cases t) simp_all
lemma match_Node_mplus_match_Node: "match_Node⋅x⋅k1 +++ match_Node⋅x⋅k2 = match_Node⋅x⋅(Λ v l r. k1⋅v⋅l⋅r +++ k2⋅v⋅l⋅r)"
by (cases x; clarsimp)
lemma tree_case_distr:
"f⋅⊥ = ⊥ ⟹ f⋅(tree_case⋅g⋅h⋅t) = tree_case⋅(f⋅g)⋅(Λ x l r. f⋅(h⋅x⋅l⋅r))⋅t"
"(tree_case⋅g'⋅h'⋅t)⋅z = tree_case⋅(g'⋅z)⋅(Λ x l r. h'⋅x⋅l⋅r⋅z)⋅t"
by (case_tac [!] t) simp_all
lemma tree_case_cong:
assumes "t = t'"
assumes "t' = Null ⟹ n = n'"
assumes "⋀v l r. ⟦t' = Node⋅v⋅l⋅r; v ≠ ⊥⟧ ⟹ c v l r = c' v l r"
assumes "cont (λ(x, y, z). c x y z)"
assumes "cont (λ(x, y, z). c' x y z)"
shows "tree_case⋅n⋅(Λ v l r. c v l r)⋅t = tree_case⋅n'⋅(Λ v l r. c' v l r)⋅t'"
using assms by (cases t; cases t'; clarsimp simp: prod_cont_iff)
lemma tree_take_smaller:
assumes "tree_take i⋅t = tree_take i⋅u"
assumes "j ≤ i"
shows "tree_take j⋅t = tree_take j⋅u"
using assms by (metis min.orderE tree.take_take)
fixrec tree_map' :: "('a → 'b) → 'a tree → 'b tree" where
"tree_map'⋅f⋅Null = Null"
| "a ≠ ⊥ ⟹ tree_map'⋅f⋅(Node⋅a⋅l⋅r) = Node⋅(f⋅a)⋅(tree_map'⋅f⋅l)⋅(tree_map'⋅f⋅r)"
lemma tree_map'_strict[simp]: "tree_map'⋅f⋅⊥ = ⊥"
by fixrec_simp
lemma tree_map'_ID': "tree_map'⋅ID⋅xs = xs"
by (induct xs) simp_all
lemma tree_map'_ID[simp]: "tree_map'⋅ID = ID"
by (clarsimp simp: cfun_eq_iff tree_map'_ID')
lemma tree_map'_strict_scons[simp]:
assumes "f⋅⊥ = ⊥"
shows "tree_map'⋅f⋅(Node⋅a⋅l⋅r) = Node⋅(f⋅a)⋅(tree_map'⋅f⋅l)⋅(tree_map'⋅f⋅r)"
using assms by (cases "a = ⊥"; clarsimp)
lemma tree_map'_comp'[simp]:
assumes "f⋅⊥ = ⊥"
shows "tree_map'⋅f⋅(tree_map'⋅g⋅t) = tree_map'⋅(f oo g)⋅t"
using assms by (induct t) simp_all
lemma tree_map'_comp[simp]:
assumes "f⋅⊥ = ⊥"
shows "tree_map'⋅f oo tree_map'⋅g = tree_map'⋅(f oo g)"
using assms by (clarsimp simp: cfun_eq_iff)
lemma tree_unique:
fixes x :: "'a tree"
assumes xfx: "x = f⋅x"
assumes f: "⋀i t u. tree_take i⋅t = tree_take i⋅u
⟹ tree_take (Suc i)⋅(f⋅t) = tree_take (Suc i)⋅(f⋅u)"
shows "x = fix⋅f"
proof(rule tree.take_lemma)
fix i show "tree_take i⋅x = tree_take i⋅(fix⋅f)"
proof(induct i)
case (Suc i) from xfx f[OF Suc, folded fix_eq] show ?case by simp
qed simp
qed
fixrec "next" :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree → ([:'a:] × [:'a:]) tree" where
"next⋅[::]⋅t = t"
| "⟦x ≠ ⊥; xs ≠ ⊥⟧ ⟹
next⋅(x :# xs)⋅Null = Null"
| "⟦x ≠ ⊥; xs ≠ ⊥⟧ ⟹
next⋅(x :# xs)⋅(Node⋅(us, [::])⋅l⋅r) = Node⋅(us, [::])⋅l⋅r"
| "⟦v ≠ ⊥; vs ≠ ⊥; x ≠ ⊥; xs ≠ ⊥⟧ ⟹
next⋅(x :# xs)⋅(Node⋅(us, v :# vs)⋅l⋅r) = If eq⋅x⋅v then l else Node⋅(us, v :# vs)⋅l⋅r"
fixrec
root2 :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree"
and op2 :: "[:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree"
and rep2 :: "[:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree"
and left2 :: "[:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree"
and right2 :: "[:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree"
where
[simp del]:
"root2⋅pat = rep2⋅pat⋅([::], pat)"
| "op2⋅pat⋅Null⋅x = root2⋅pat"
| "usvs ≠ ⊥ ⟹
op2⋅pat⋅(Node⋅usvs⋅l⋅r)⋅x = If prefix⋅[:x:]⋅(csnd⋅usvs) then r else op2⋅pat⋅l⋅x"
| [simp del]:
"rep2⋅pat⋅usvs = Node⋅usvs⋅(left2⋅pat⋅usvs)⋅(right2⋅pat⋅usvs)"
| "left2⋅pat⋅([::], vs) = next⋅vs⋅Null"
| "⟦u ≠ ⊥; us ≠ ⊥⟧ ⟹
left2⋅pat⋅(u :# us, vs) = next⋅vs⋅(sfoldl⋅(op2⋅pat)⋅(root2⋅pat)⋅us)"
| "right2⋅pat⋅(us, [::]) = Null"
| "⟦v ≠ ⊥; vs ≠ ⊥⟧ ⟹
right2⋅pat⋅(us, v :# vs) = rep2⋅pat⋅(us :@ [:v:], vs)"
fixrec abs2 :: "([:'a:] × [:'a:]) tree → [:'a:] × [:'a:]" where
"usvs ≠ ⊥ ⟹ abs2⋅(Node⋅usvs⋅l⋅r) = usvs"
fixrec matches2 :: "[:'a::Eq_def:] → [:'a:] → [:Integer:]" where
[simp del]: "matches2⋅pat = smap⋅cfst oo sfilter⋅(snull oo csnd oo abs2 oo csnd)
oo sscanl⋅(Λ (n, x) y. (n + 1, op2⋅pat⋅x⋅y))⋅(0, root2⋅pat)"
text‹
\begin{figure}
\centering
\begin{tikzpicture}[
shorten >=1pt,
node distance=1.5cm,
on grid,
auto,
initial text=,
thick,
accepting/.style = {rectangle,minimum size=0.3cm}
]
\node[state,accepting] (q_0i) {};
\node[state,initial] (q_0) [right=of q_0i] {$q_0$};
\node[state] (q_1) [right=of q_0] {$q_1$};
\node[state] (q_2) [right=of q_1] {$q_2$};
\node[state] (q_3) [right=of q_2] {$q_3$};
\node[state] (q_4) [right=of q_3] {$q_4$};
\node[state,double] (q_5) [right=of q_4] {$q_5$};
\node[state,accepting] (q_5r) [right=of q_5] {};
\path[->] (q_0) edge [bend left] node [above] {0} (q_1)
(q_1) edge [bend left] node [above] {1} (q_2)
(q_2) edge [bend left] node [above] {0} (q_3)
(q_3) edge [bend left] node [above] {0} (q_4)
(q_4) edge [bend left] node [above] {1} (q_5)
(q_5) edge [bend left] node [above] {*} (q_5r);
\path[->] (q_0) edge [bend right] (q_0i)
(q_1) edge [bend left] (q_0)
(q_2) edge [bend left] (q_0) % MP
(q_2) edge [bend left,color=red] (q_0i) % K opt
(q_3) edge [bend left] (q_1)
(q_4) edge [bend left] (q_1) % MP
(q_4) edge [bend left,color=red] (q_0) % K opt
(q_5) edge [bend left] (q_2);
\end{tikzpicture}
\caption{An example from \<^citet>‹‹\S2.1› in "CrochemoreRytter:2002"›. The MP tree for the
pattern $01001$ is drawn in black: right transitions are labelled
with a symbol, whereas left transitions are unlabelled. The two
`K'-optimised left transitions are shown in red. The boxes denote
@{const ‹Null›}. The root node is $q_0$.}
\label{fig:example_tree}
\end{figure}
This tree can be interpreted as a sort of automaton\footnote{\<^citet>‹‹\S3.1› in "Bird:2012"› suggests it can
be thought of as a doubly-linked list, following \<^citet>‹"TakeichiAkama:1990"›.)}, where @{const
‹op2›} goes @{const ‹right›} if the pattern
continues with the next element of the text, and @{const
‹left›} otherwise, to determine how much of a prefix of
the pattern could still be in play. Figure~\ref{fig:example_tree}
visualises such an automaton for the pattern $01001$, used by \<^citet>‹‹\S2.1› in "CrochemoreRytter:2002"› to
illustrate the difference between Morris-Pratt (MP) and
Knuth-Morris-Pratt (KMP) preprocessing as we discuss below. Note that
these are not the classical Mealy machines that correspond to regular
expressions, where all outgoing transitions are labelled with symbols.
The following lemma shows how our sample automaton is encoded as a non-wellfounded tree.
›
lemma concrete_tree_KMP:
shows "root2⋅[:0::Integer, 1, 0, 0, 1:]
= (μ q0. Node⋅([::], [:0, 1, 0, 0, 1:])
⋅Null
⋅(μ q1. Node⋅([:0:], [:1, 0, 0, 1:])
⋅q0
⋅(μ q2. Node⋅([:0,1:], [:0, 0, 1:])
⋅Null
⋅(Node⋅([:0,1,0:], [:0, 1:])
⋅q1
⋅(Node⋅([:0,1,0,0:], [:1:])
⋅q0
⋅(Node⋅([:0,1,0,0,1:], [::])⋅q2⋅Null))))))"
(is "?lhs = fix⋅?F")
proof(rule tree_unique)
note rep2.simps[simp]
show "?lhs = ?F⋅?lhs"
apply (subst root2.unfold; simp)
apply (rule tree_unique; simp)
apply (intro conjI)
apply (subst (1) root2.unfold; simp)
apply (subst (1) root2.unfold; fastforce)
apply (rule tree_unique; simp)
apply (intro conjI)
apply (subst (1) root2.unfold; simp)
apply (subst (1) root2.unfold; simp)
apply (subst (1) root2.unfold; simp)
apply (subst (1) root2.unfold; fastforce)
apply (subst (1) root2.unfold; simp)
apply (subst (1) root2.unfold; simp)
apply (subst (1) root2.unfold; simp)
apply (subst (1 2) root2.unfold; fastforce)
apply (subst (1) root2.unfold; simp)
apply (subst (1) root2.unfold; simp)
apply (subst (1) root2.unfold; fastforce)
apply (rename_tac i t u; case_tac i; clarsimp)
apply (rename_tac t u i; case_tac i; clarsimp)
apply (rename_tac t u i; case_tac i; clarsimp)
apply (meson Suc_n_not_le_n linear tree_take_smaller)
apply (rule parallel_fix_ind; simp)
apply (rename_tac i t u x y; case_tac i; clarsimp)
apply (rename_tac i; case_tac i; clarsimp; intro conjI)
apply (meson Suc_n_not_le_n linear tree_take_smaller)
apply (rename_tac i; case_tac i; clarsimp)
apply (rename_tac i; case_tac i; clarsimp)
apply (meson Suc_n_not_le_n linear tree_take_smaller)
done
fix i :: nat
fix t u :: "([:Integer:] × [:Integer:]) tree"
assume "tree_take i⋅t = tree_take i⋅u"
then show "tree_take (Suc i)⋅(?F⋅t) = tree_take (Suc i)⋅(?F⋅u)"
apply simp
apply (rule parallel_fix_ind; simp)
apply (case_tac i; clarsimp; intro conjI)
apply (meson Suc_n_not_le_n linear tree_take_smaller)
apply (rule parallel_fix_ind; simp)
apply (rename_tac j t0 t1; case_tac j; clarsimp)
apply (rename_tac j; case_tac j; clarsimp; intro conjI)
apply (meson Suc_n_not_le_n linear tree_take_smaller)
apply (rename_tac j; case_tac j; clarsimp; intro conjI)
apply (meson Suc_n_not_le_n linear tree_take_smaller)
apply (rename_tac j; case_tac j; clarsimp)
apply (meson Suc_n_not_le_n linear tree_take_smaller)
done
qed
text‹
The sharing that we expect from a lazy (call-by-need) evaluator is here implied by the use of
nested fixed points.
The KMP preprocessor is expressed by the @{const ‹left2›} function, where @{const ‹op2›} is used
to match the pattern against itself; the use of @{const ‹op2›} in @{const ‹matches2›} (``the driver'')
is responsible for matching the (preprocessed) pattern against the text. This formally cashes in
an observation by \<^citet>‹‹\S5› in "vanderWoude:1989"›, that these two algorithms
are essentially the same, which has eluded other presentations\footnote{For instance, contrast
our shared use of @{const ‹op2›} with the separated \texttt{match}
and \texttt{rematch} functions of \<^citet>‹‹Figure~1› in "AgerDanvyRohde:2006"›.}.
Bird uses @{const ‹Null›} on a left path to signal to the driver that it should discard the
current element of the text and restart matching from the beginning of the pattern (i.e,
@{const ‹root2›}). This is a step towards the removal of @{term ‹us›} in \S\ref{sec:KMP:step8}.
Note that the @{const ‹Null›} at the end of the rightmost path is unreachable: the rightmost
@{const ‹Node›} has @{term "vs = [::]"} and therefore @{const ‹op2›} always takes the left branch.
The `K' optimisation is perhaps best understood by example. Consider
the automaton in Figure~\ref{fig:example_tree}, and a text beginning
with \texttt{011}. Using the MP (black) transitions we take the path
$\rightarrow q_0 \stackrel{{\mathtt{0}}}{\rightarrow} q_1
\stackrel{\mathtt{1}}{\rightarrow} \overbrace{q_2 \rightarrow q_0
\rightarrow \Box}$. Now, due to the failure of the comparison of the
current element of the text (\texttt{1}) at $q_2$, we can predict that
the (identical) comparison at node $q_0$ will fail as well, and
therefore have $q_2$ left-branch directly to $\Box$. This saves a
comparison in the driver at the cost of another in the preprocessor
(in @{const ‹next›}). These optimisations are the red
arrows in the diagram, and can in general save an arbitrary number of
driver comparisons; consider the pattern $\mathtt{1}^n$ for instance.
More formally, @{const ‹next›} ensures that the heads of
the suffixes of the pattern (@{term ‹vs›}) on consecutive
labels on left paths are distinct; see below for a proof of this fact
in our setting, and \<^citet>‹‹\S3.3.4› in
"Gusfield:1997"› for a classical account. Unlike Bird's suggestion
(p134), our @{const ‹next›} function is not recursive.
We note in passing that while MP only allows ‹Null› on
the left of the root node, ‹Null› can be on the left of
any KMP node except for the rightmost
(i.e., the one that signals a complete pattern match) where no optimisation is possible.
We proceed with the formalities of the data refinement.
›
schematic_goal root2_op2_rep2_left2_right2_def:
"( root2 :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
, op2 :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
, rep2 :: [:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree
, left2 :: [:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree
, right2 :: [:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
= fix⋅?F"
unfolding op2_def root2_def rep2_def left2_def right2_def by simp
lemma abs2_strict[simp]:
"abs2⋅⊥ = ⊥"
"abs2⋅Null = ⊥"
by fixrec_simp+
lemma next_strict[simp]:
"next⋅⊥ = ⊥"
"next⋅xs⋅⊥ = ⊥"
"next⋅(x :# xs)⋅(Node⋅(us, ⊥)⋅l⋅r) = ⊥"
apply fixrec_simp
apply (cases xs; fixrec_simp; simp)
apply (cases "x = ⊥"; cases "xs = ⊥"; cases "us = ⊥"; fixrec_simp)
done
lemma next_Null[simp]:
assumes "xs ≠ ⊥"
shows "next⋅xs⋅Null = Null"
using assms by (cases xs) simp_all
lemma next_snil[simp]:
assumes "xs ≠ ⊥"
shows "next⋅xs⋅(Node⋅(us, [::])⋅l⋅r) = Node⋅(us, [::])⋅l⋅r"
using assms by (cases xs) simp_all
lemma op2_rep2_left2_right2_strict[simp]:
"op2⋅pat⋅⊥ = ⊥"
"op2⋅pat⋅(Node⋅(us, ⊥)⋅l⋅r) = ⊥"
"op2⋅pat⋅(Node⋅usvs⋅l⋅r)⋅⊥ = ⊥"
"rep2⋅pat⋅⊥ = ⊥"
"left2⋅pat⋅(⊥, vs) = ⊥"
"left2⋅pat⋅(us, ⊥) = ⊥"
"right2⋅pat⋅(us, ⊥) = ⊥"
apply fixrec_simp
apply (cases "us = ⊥"; fixrec_simp; simp)
apply (cases "usvs = ⊥"; fixrec_simp; simp)
apply fixrec_simp
apply fixrec_simp
apply (cases us; fixrec_simp)
apply fixrec_simp
done
lemma snd_abs_root2_bottom[simp]: "prod.snd (abs2⋅(root2⋅⊥)) = ⊥"
by (simp add: root2.unfold rep2.unfold)
lemma abs_rep2_ID'[simp]: "abs2⋅(rep2⋅pat⋅usvs) = usvs"
by (cases "usvs = ⊥"; subst rep2.unfold; clarsimp)
lemma abs_rep2_ID: "abs2 oo rep2⋅pat = ID"
by (clarsimp simp: cfun_eq_iff)
lemma rep2_snoc_right2:
assumes "prefix⋅[:x:]⋅vs = TT"
shows "rep2⋅pat⋅(us :@ [:x:], stail⋅vs) = right2⋅pat⋅(us, vs)"
using assms by (cases "x = ⊥"; cases vs; clarsimp)
lemma not_prefix_op2_next:
assumes "prefix⋅[:x:]⋅xs = FF"
shows "op2⋅pat⋅(next⋅xs⋅(rep2⋅pat⋅usvs))⋅x = op2⋅pat⋅(rep2⋅pat⋅usvs)⋅x"
proof -
obtain us vs where "usvs = (us, vs)" by force
with assms show ?thesis
by (cases xs; cases us; clarsimp; cases vs;
clarsimp simp: rep2.simps prefix_singleton_FF If2_def[symmetric] split: If2_splits)
qed
text‹
Bird's appeal to ‹foldl_fusion› (p130) is too weak to
justify this data refinement as his condition (iii) requires the
worker functions to coincide on all representation values. Concretely
he asks that:
\begin{center}
@{prop "rep2⋅pat⋅(op⋅pat⋅(abs2⋅t)⋅x) = op2⋅pat⋅t⋅x"} ―‹Bird (17.2)›
\end{center}
where ‹t› is an arbitrary tree. This does not hold for junk representations
such as:
\begin{center}
@{term ‹t = Node⋅(pat, [::])⋅Null⋅Null›}
\end{center}
Using worker/wrapper fusion \<^citep>‹"GillHutton:2009" and "Gammie:2011"› specialised to @{const
‹sscanl›} (@{thm [source] "sscanl_ww_fusion"}) we only
need to establish this identity for valid representations, i.e., when
‹t› lies under the image of ‹rep2›. In
pictures, we show that this diagram commutes:
\begin{center}
\begin{tikzcd}[column sep=8em]
usvs \arrow[r, "‹Λ usvs. op⋅pat⋅usvs⋅x›"] \arrow[d, "‹rep2⋅pat›"] & usvs' \arrow[d, "‹rep2⋅pat›"] \\
t \arrow[r, "‹Λ usvs. op2⋅pat⋅usvs⋅x ›"] & t'
\end{tikzcd}
\end{center}
Clearly this result self-composes: after an initial @{term
‹rep2⋅pat›} step, we can repeatedly simulate
@{const ‹op›} steps with @{const ‹op2›} steps.
›
lemma op_op2_refinement:
assumes "pat ≠ ⊥"
shows "rep2⋅pat⋅(op⋅pat⋅usvs⋅x) = op2⋅pat⋅(rep2⋅pat⋅usvs)⋅x"
proof(cases "x = ⊥" "usvs = ⊥" rule: bool.exhaust[case_product bool.exhaust])
case False_False
then have "x ≠ ⊥" "usvs ≠ ⊥" by simp_all
then show ?thesis
proof(induct usvs arbitrary: x rule: op_induct)
case (step usvs)
obtain us vs where usvs: "usvs = (us, vs)" by fastforce
have *: "sfoldl⋅(op2⋅pat)⋅(root2⋅pat)⋅xs = rep2⋅pat⋅(split⋅pat⋅xs)" if "lt⋅(slength⋅xs)⋅(slength⋅us) = TT" for xs
using that
proof(induct xs rule: srev_induct)
case (ssnoc x xs)
from ssnoc(1,2,4) have IH: "sfoldl⋅(op2⋅pat)⋅(root2⋅pat)⋅xs = rep2⋅pat⋅(split⋅pat⋅xs)"
by - (rule ssnoc; auto intro: lt_trans dest: lt_slength_0)
obtain us' vs' where us'vs': "split⋅pat⋅xs = (us', vs')" by fastforce
from ‹pat ≠ ⊥› ssnoc(1,2,4) usvs show ?case
apply (clarsimp simp: split_sfoldl_op[symmetric] IH)
apply (rule step(1)[simplified abs_rep2_ID', simplified, symmetric])
using lt_trans split_length_lt split_sfoldl_op apply fastforce+
done
qed (fastforce simp: ‹pat ≠ ⊥› root2.unfold)+
have **: "If snull⋅us then rep2⋅pat⋅([::], pat) else rep2⋅pat⋅(op⋅pat⋅(split⋅pat⋅(stail⋅us))⋅x)
= op2⋅pat⋅(left2⋅pat⋅(us, vs))⋅x" if "prefix⋅[:x:]⋅vs = FF"
proof(cases us)
case snil with that show ?thesis
by simp (metis next_Null op2.simps(1) prefix.simps(1) prefix_FF_not_snilD root2.simps)
next
case (scons u' us')
from ‹pat ≠ ⊥› scons have "lt⋅(slength⋅(cfst⋅(split⋅pat⋅us')))⋅(slength⋅us) = TT"
using split_length_lt by fastforce
from ‹pat ≠ ⊥› ‹x ≠ ⊥› usvs that scons this show ?thesis
by (clarsimp simp: * step(1)[simplified abs_rep2_ID'] not_prefix_op2_next)
qed simp
from ‹usvs ≠ ⊥› usvs show ?case
apply (subst (2) rep2.unfold)
apply (subst op2.unfold)
apply (subst op.unfold)
apply (clarsimp simp: If_distr rep2_snoc_right2 ** cong: If_cong)
done
qed
qed (simp_all add: rep2.unfold)
text‹
Therefore the result of this data refinement is extensionally equal to
the specification:
›
lemma data_refinement:
shows "matches = matches2"
proof(intro cfun_eqI)
fix pat xs :: "[:'a:]" show "matches⋅pat⋅xs = matches2⋅pat⋅xs"
proof(cases "pat = ⊥")
case True then show ?thesis by (cases xs; clarsimp simp: matches2.simps)
next
case False then show ?thesis
unfolding matches2.simps
apply (subst matches_op)
apply (subst sscanl_ww_fusion[where wrap="ID ** abs2" and unwrap="ID ** rep2⋅pat" and f'="Λ (n, x) y. (n + 1, op2⋅pat⋅x⋅y)"])
apply (simp add: abs_rep2_ID)
apply (simp add: op_op2_refinement)
apply (simp add: oo_assoc sfilter_smap root2.unfold)
apply (simp add: oo_assoc[symmetric])
done
qed
qed
text‹
This computation can be thought of as a pair coroutines with a
producer (@{const ‹root2›}/@{const ‹rep2›})
/ consumer (@{const ‹op2›}) structure. It turns out that
laziness is not essential (see \S\ref{sec:implementations}), though it
does depend on being able to traverse incompletely defined trees.
The key difficulty in defining this computation in HOL using present
technology is that @{const ‹op2›} is neither terminating
nor @{emph ‹friendly›} in the terminology of \<^citet>‹"BlanchetteEtAl:2017"›.
While this representation works for automata with this sort of
structure, it is unclear how general it is; in particular it may not
work so well if @{const ‹left›} branches can go forward
as well as back. See also the commentary in \<^citet>‹"HinzeJeuring:2001"›, who observe that sharing is easily lost, and so
it is probably only useful in ``closed'' settings like the present
one, unless the language is extended in unusual ways \<^citep>‹"JeanninEtAl:2017"›.
\label{thm:k_property}
We conclude by proving that @{const ‹rep2›} produces
trees that have the `K' property, viz that labels on consecutive nodes
on a left path do not start with the same symbol. This also
establishes the productivity of @{const ‹root2›}. The
pattern of proof used here -- induction nested in coinduction --
recurs in \S\ref{sec:KMP:increase_sharing}.
›
coinductive K :: "([:'a::Eq:] × [:'a:]) tree ⇒ bool" where
"K Null"
| "⟦ usvs ≠ ⊥; K l; K r;
⋀v vs. csnd⋅usvs = v :# vs ⟹ l = Null ∨ (∃v' vs'. csnd⋅(label⋅l) = v' :# vs' ∧ eq⋅v⋅v' = FF)
⟧ ⟹ K (Node⋅usvs⋅l⋅r)"
declare K.intros[intro!, simp]
lemma sfoldl_op2_root2_rep2_split:
assumes "pat ≠ ⊥"
shows "sfoldl⋅(op2⋅pat)⋅(root2⋅pat)⋅xs = rep2⋅pat⋅(split⋅pat⋅xs)"
proof(induct xs rule: srev_induct)
case (ssnoc x xs) with ‹pat ≠ ⊥› ssnoc show ?case by (clarsimp simp: split_sfoldl_op[symmetric] op_op2_refinement)
qed (simp_all add: ‹pat ≠ ⊥› root2.unfold)
lemma K_rep2:
assumes "pat ≠ ⊥"
assumes "us :@ vs = pat"
shows "K (rep2⋅pat⋅(us, vs))"
using assms
proof(coinduction arbitrary: us vs)
case (K us vs) then show ?case
proof(induct us arbitrary: vs rule: op_induct')
case (step us)
from step.prems have "us ≠ ⊥" "vs ≠ ⊥" by auto
show ?case
proof(cases us)
case bottom with ‹us ≠ ⊥› show ?thesis by simp
next
case snil with step.prems show ?thesis by (cases vs; force simp: rep2.simps)
next
case (scons u' us')
from ‹pat ≠ ⊥› scons ‹us ≠ ⊥› ‹vs ≠ ⊥›
obtain usl vsl where splitl: "split⋅pat⋅us' = (usl, vsl)" "usl ≠ ⊥" "vsl ≠ ⊥" "usl :@ vsl = pat"
by (metis (no_types, opaque_lifting) Rep_cfun_strict1 prod.collapse sappend_strict sappend_strict2 split_pattern)
from scons obtain l r where r: "rep2⋅pat⋅(us, vs) = Node⋅(us, vs)⋅l⋅r" by (simp add: rep2.simps)
moreover
have "(∃us vs. l = rep2⋅pat⋅(us, vs) ∧ us :@ vs = pat) ∨ K l"
proof(cases vs)
case snil with scons splitl r show ?thesis
by (clarsimp simp: rep2.simps sfoldl_op2_root2_rep2_split)
next
case scons
with ‹pat ≠ ⊥› ‹us = u' :# us'› ‹u' ≠ ⊥› ‹us' ≠ ⊥› ‹vs ≠ ⊥› r splitl show ?thesis
apply (clarsimp simp: rep2.simps sfoldl_op2_root2_rep2_split)
apply (cases vsl; cases usl; clarsimp simp: If2_def[symmetric] sfoldl_op2_root2_rep2_split split: If2_splits)
apply (rename_tac ul' usl')
apply (cut_tac us'="prod.fst (split⋅pat⋅usl')" and vs="prod.snd (split⋅pat⋅usl')" in step(1); clarsimp simp: split_pattern)
apply (metis fst_conv lt_trans slength.simps(2) split_length_lt step.prems(1))
apply (erule disjE; clarsimp simp: sfoldl_op2_root2_rep2_split)
apply (rename_tac b l r)
apply (case_tac b; clarsimp simp: rep2.simps)
apply (auto simp: If2_def[symmetric] rep2.simps dest: split_pattern[rotated] split: If2_splits)
done
qed (simp add: ‹vs ≠ ⊥›)
moreover
from ‹us :@ vs = pat› ‹us ≠ ⊥› ‹vs ≠ ⊥› r
have "(∃usr vsr. r = rep2⋅pat⋅(usr, vsr) ∧ usr :@ vsr = pat) ∨ K r"
by (cases vs; clarsimp simp: rep2.simps)
moreover
have "l = Null ∨ (∃v' vs'. csnd⋅(label⋅l) = v' :# vs' ∧ eq⋅v⋅v' = FF)" if "vs = v :# vs'" for v vs'
proof(cases vsl)
case snil with ‹us :@ vs = pat› ‹us = u' :# us'› splitl show ?thesis
using split_length_lt[where pat=pat and xs=us']
by (force elim: slengthE simp: one_Integer_def split: if_splits)
next
case scons
from splitl have "lt⋅(slength⋅usl)⋅(slength⋅us' + 1) = TT"
by (metis fst_conv fst_strict split_bottom_iff split_length_lt)
with scons ‹pat ≠ ⊥› ‹us = u' :# us'› ‹u' ≠ ⊥› ‹us' ≠ ⊥› ‹vs ≠ ⊥› r splitl ‹vs = v :# vs'› show ?thesis
using step(1)[OF _ ‹pat ≠ ⊥›, where us'="prod.fst (split⋅pat⋅us')" and vs="prod.snd (split⋅pat⋅us')"]
by (clarsimp simp: rep2.simps sfoldl_op2_root2_rep2_split If2_def[symmetric] split: If2_splits)
qed (simp add: ‹vsl ≠ ⊥›)
moreover note ‹pat ≠ ⊥› ‹us ≠ ⊥› ‹vs ≠ ⊥›
ultimately show ?thesis by auto
qed
qed
qed
theorem K_root2:
assumes "pat ≠ ⊥"
shows "K (root2⋅pat)"
using assms unfolding root2.unfold by (simp add: K_rep2)
text‹
The remaining steps are as follows:
▪ 3. introduce an accumulating parameter (‹grep›).
▪ 4. inline ‹rep› and simplify.
▪ 5. simplify to Bird's ``simpler forms.''
▪ 6. memoise ‹left›.
▪ 7. simplify, unfold ‹prefix›.
▪ 8. discard ‹us›.
▪ 9. factor out ‹pat›.
›
subsection‹ Step 3: Introduce an accumulating parameter (grep) ›
text‹
Next we prepare for the second memoization step (\S\ref{sec:KMP:increase_sharing})
by introducing an accumulating parameter to @{const ‹rep2›} that supplies the value of the left
subtree.
We retain @{const ‹rep2›} as a wrapper for now, and inline @{const ‹right2›} to speed up
simplification.
›
fixrec
root3 :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree"
and op3 :: "[:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree"
and rep3 :: "[:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree"
and grep3 :: "[:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree"
where
[simp del]:
"root3⋅pat = rep3⋅pat⋅([::], pat)"
| "op3⋅pat⋅Null⋅x = root3⋅pat"
| "usvs ≠ ⊥ ⟹
op3⋅pat⋅(Node⋅usvs⋅l⋅r)⋅x = If prefix⋅[:x:]⋅(csnd⋅usvs) then r else op3⋅pat⋅l⋅x"
| [simp del]:
"rep3⋅pat⋅usvs = grep3⋅pat⋅(case cfst⋅usvs of [::] ⇒ Null | u :# us ⇒ sfoldl⋅(op3⋅pat)⋅(root3⋅pat)⋅us)⋅usvs"
| [simp del]:
"grep3⋅pat⋅l⋅usvs = Node⋅usvs⋅(next⋅(csnd⋅usvs)⋅l)⋅(case csnd⋅usvs of
[::] ⇒ Null
| v :# vs ⇒ rep3⋅pat⋅(cfst⋅usvs :@ [:v:], vs))"
schematic_goal root3_op3_rep3_grep3_def:
"( root3 :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
, op3 :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
, rep3 :: [:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree
, grep3 :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
= fix⋅?F"
unfolding root3_def op3_def rep3_def grep3_def by simp
lemma r3_2:
"(Λ (root, op, rep, grep). (root, op, rep))⋅
( root3 :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
, op3 :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
, rep3 :: [:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree
, grep3 :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
= (Λ (root, op, rep, left, right). (root, op, rep))⋅
( root2 :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
, op2 :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
, rep2 :: [:'a::Eq_def:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree
, left2 :: [:'a::Eq_def:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree
, right2 :: [:'a::Eq_def:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )"
unfolding root2_op2_rep2_left2_right2_def root3_op3_rep3_grep3_def
apply (simp add: match_snil_match_scons_slist_case match_Null_match_Node_tree_case slist_case_distr tree_case_distr)
apply (simp add: fix_cprod fix_const)
apply (simp add: slist_case_distr)
done
subsection‹ Step 4: Inline rep ›
text‹
We further simplify by inlining @{const ‹rep3›} into @{const ‹root3›} and @{const ‹grep3›}.
›
fixrec
root4 :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree"
and op4 :: "[:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree"
and grep4 :: "[:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree"
where
[simp del]:
"root4⋅pat = grep4⋅pat⋅Null⋅([::], pat)"
| "op4⋅pat⋅Null⋅x = root4⋅pat"
| "usvs ≠ ⊥ ⟹
op4⋅pat⋅(Node⋅usvs⋅l⋅r)⋅x = If prefix⋅[:x:]⋅(csnd⋅usvs) then r else op4⋅pat⋅l⋅x"
| [simp del]:
"grep4⋅pat⋅l⋅usvs = Node⋅usvs⋅(next⋅(csnd⋅usvs)⋅l)⋅(case csnd⋅usvs of
[::] ⇒ Null
| v :# vs ⇒ grep4⋅pat⋅(case cfst⋅usvs :@ [:v:] of
[::] ⇒ Null
| u :# us ⇒ sfoldl⋅(op4⋅pat)⋅(root4⋅pat)⋅us)⋅(cfst⋅usvs :@ [:v:], vs))"
schematic_goal root4_op4_grep4_def:
"( root4 :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
, op4 :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
, grep4 :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
= fix⋅?F"
unfolding root4_def op4_def grep4_def by simp
lemma fix_syn4_permute:
assumes "cont (λ(X1, X2, X3, X4). F1 X1 X2 X3 X4)"
assumes "cont (λ(X1, X2, X3, X4). F2 X1 X2 X3 X4)"
assumes "cont (λ(X1, X2, X3, X4). F3 X1 X2 X3 X4)"
assumes "cont (λ(X1, X2, X3, X4). F4 X1 X2 X3 X4)"
shows "fix_syn (λ(X1, X2, X3, X4). (F1 X1 X2 X3 X4, F2 X1 X2 X3 X4, F3 X1 X2 X3 X4, F4 X1 X2 X3 X4))
= (λ(x1, x2, x4, x3). (x1, x2, x3, x4))
(fix_syn (λ(X1, X2, X4, X3). (F1 X1 X2 X3 X4, F2 X1 X2 X3 X4, F4 X1 X2 X3 X4, F3 X1 X2 X3 X4)))"
by (induct rule: parallel_fix_ind) (use assms in ‹auto simp: prod_cont_iff›)
lemma r4_3:
"( root4 :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
, op4 :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
, grep4 :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
= (Λ (root, op, rep, grep). (root, op, grep))⋅
( root3 :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
, op3 :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
, rep3 :: [:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree
, grep3 :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )"
unfolding root3_op3_rep3_grep3_def root4_op4_grep4_def
apply (clarsimp simp: slist_case_distr match_Null_match_Node_tree_case tree_case_distr eta_cfun)
apply (subst fix_syn4_permute; clarsimp simp: fix_cprod fix_const)
done
subsection‹ Step 5: Simplify to Bird's ``simpler forms'' ›
text‹
The remainder of @{const ‹left2›} in @{const ‹grep4›} can be simplified by transforming the
@{text "case"} scrutinee from @{term "cfst⋅usvs :@ [:v:]"} into @{term "cfst⋅usvs"}.
›
fixrec
root5 :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree"
and op5 :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree"
and grep5 :: "[:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree"
where
[simp del]:
"root5⋅pat = grep5⋅pat⋅Null⋅([::], pat)"
| "op5⋅pat⋅Null⋅x = root5⋅pat"
| "usvs ≠ ⊥ ⟹
op5⋅pat⋅(Node⋅usvs⋅l⋅r)⋅x = If prefix⋅[:x:]⋅(csnd⋅usvs) then r else op5⋅pat⋅l⋅x"
| [simp del]:
"grep5⋅pat⋅l⋅usvs = Node⋅usvs⋅(next⋅(csnd⋅usvs)⋅l)⋅(case csnd⋅usvs of
[::] ⇒ Null
| v :# vs ⇒ grep5⋅pat⋅(case cfst⋅usvs of
[::] ⇒ root5⋅pat
| u :# us ⇒ sfoldl⋅(op5⋅pat)⋅(root5⋅pat)⋅(us :@ [:v:]))⋅(cfst⋅usvs :@ [:v:], vs))"
schematic_goal root5_op5_grep5_def:
"( root5 :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
, op5 :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
, grep5 :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
= fix⋅?F"
unfolding root5_def op5_def grep5_def by simp
lemma op5_grep5_strict[simp]:
"op5⋅pat⋅⊥ = ⊥"
"op5⋅pat⋅(Node⋅(us, ⊥)⋅l⋅r) = ⊥"
"op5⋅pat⋅(Node⋅usvs⋅l⋅r)⋅⊥ = ⊥"
"grep5⋅pat⋅l⋅⊥ = ⊥"
apply fixrec_simp
apply (cases "us = ⊥"; fixrec_simp; simp)
apply (cases "usvs = ⊥"; fixrec_simp; simp)
apply fixrec_simp
done
lemma r5_4:
"( root5 :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
, op5 :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
, grep5 :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
= ( root4 :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
, op4 :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
, grep4 :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )"
unfolding root4_op4_grep4_def root5_op5_grep5_def
by (clarsimp simp: slist_case_distr slist_case_snoc stail_sappend cong: slist_case_cong)
subsection‹ Step 6: Memoize left \label{sec:KMP:increase_sharing} ›
text‹
The last substantial step is to memoise the computation of the left subtrees by tying the knot.
Note this makes the computation of ‹us› in the tree redundant; we remove it in \S\ref{sec:KMP:step8}.
›
fixrec
root6 :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree"
and op6 :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree"
and grep6 :: "[:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree"
where
[simp del]:
"root6⋅pat = grep6⋅pat⋅Null⋅([::], pat)"
| "op6⋅pat⋅Null⋅x = root6⋅pat"
| "usvs ≠ ⊥ ⟹
op6⋅pat⋅(Node⋅usvs⋅l⋅r)⋅x = If prefix⋅[:x:]⋅(csnd⋅usvs) then r else op6⋅pat⋅l⋅x"
| [simp del]:
"grep6⋅pat⋅l⋅usvs = Node⋅usvs⋅(next⋅(csnd⋅usvs)⋅l)⋅(case csnd⋅usvs of
[::] ⇒ Null
| v :# vs ⇒ grep6⋅pat⋅(op6⋅pat⋅l⋅v)⋅(cfst⋅usvs :@ [:v:], vs))"
schematic_goal root6_op6_grep6_def:
"( root6 :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
, op6 :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
, grep6 :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
= fix⋅?F"
unfolding root6_def op6_def grep6_def by simp
lemma op6_grep6_strict[simp]:
"op6⋅pat⋅⊥ = ⊥"
"op6⋅pat⋅(Node⋅(us, ⊥)⋅l⋅r) = ⊥"
"op6⋅pat⋅(Node⋅usvs⋅l⋅r)⋅⊥ = ⊥"
"grep6⋅pat⋅l⋅⊥ = ⊥"
apply fixrec_simp
apply (cases "us = ⊥"; fixrec_simp; simp)
apply (cases "usvs = ⊥"; fixrec_simp; simp)
apply fixrec_simp
done
text‹
Intuitively this step cashes in the fact that, in the context of
@{term ‹grep6⋅pat⋅l⋅usvs›}, @{term
"sfoldl⋅(op6⋅pat)⋅(root6⋅pat)⋅us"} is
equal to @{term ‹l›}.
Connecting this step with the previous one is not simply a matter of
equational reasoning; we can see this by observing that the right
subtree of @{term ‹grep5⋅pat⋅l⋅usvs›}
does not depend on @{term ‹l›} whereas that of @{term
‹grep6⋅pat⋅l⋅usvs›} does, and therefore
these cannot be extensionally equal. Furthermore the computations of
the corresponding @{term ‹root›}s do not proceed in
lockstep: consider the computation of the left subtree.
For our purposes it is enough to show that the trees @{const
‹root5›} and @{const ‹root6›} are equal,
from which it follows that @{prop "op6 = op5"} by induction on its
tree argument. The equality is established by exhibiting a @{emph
‹tree bisimulation›} (@{const tree_bisim}) that relates
the corresponding ``producer'' ‹grep› functions. Such a
relation ‹R› must satisfy:
▪ ‹R ⊥ ⊥›;
▪ ‹R Null Null›; and
▪ if ‹R (Node⋅x⋅l⋅r) (Node⋅x'⋅l'⋅r')› then ‹x = x'›, ‹R l l'›, and ‹R r r'›.
›
text‹
The following pair of ‹left› functions define suitable left
paths from the corresponding @{term ‹root›}s.
›
fixrec left5 :: "[:'a::Eq_def:] → [:'a:] → ([:'a:] × [:'a:]) tree" where
"left5⋅pat⋅[::] = Null"
| "⟦u ≠ ⊥; us ≠ ⊥⟧ ⟹
left5⋅pat⋅(u :# us) = sfoldl⋅(op5⋅pat)⋅(root5⋅pat)⋅us"
fixrec left6 :: "[:'a::Eq_def:] → [:'a:] → ([:'a:] × [:'a:]) tree" where
"left6⋅pat⋅[::] = Null"
| "⟦u ≠ ⊥; us ≠ ⊥⟧ ⟹
left6⋅pat⋅(u :# us) = sfoldl⋅(op6⋅pat)⋅(root6⋅pat)⋅us"
inductive
root_bisim :: "[:'a::Eq_def:] ⇒ ([:'a:] × [:'a:]) tree ⇒ ([:'a:] × [:'a:]) tree ⇒ bool"
for pat :: "[:'a:]"
where
bottom: "root_bisim pat ⊥ ⊥"
| Null: "root_bisim pat Null Null"
| gl: "⟦pat ≠ ⊥; us ≠ ⊥; vs ≠ ⊥⟧
⟹ root_bisim pat (grep6⋅pat⋅(left6⋅pat⋅us)⋅(us, vs)) (grep5⋅pat⋅(left5⋅pat⋅us)⋅(us, vs))"
declare root_bisim.intros[intro!, simp]
lemma left6_left5_strict[simp]:
"left6⋅pat⋅⊥ = ⊥"
"left5⋅pat⋅⊥ = ⊥"
by fixrec_simp+
lemma op6_left6: "⟦us ≠ ⊥; v ≠ ⊥⟧ ⟹ op6⋅pat⋅(left6⋅pat⋅us)⋅v = left6⋅pat⋅(us :@ [:v:])"
by (cases us) simp_all
lemma op5_left5: "⟦us ≠ ⊥; v ≠ ⊥⟧ ⟹ op5⋅pat⋅(left5⋅pat⋅us)⋅v = left5⋅pat⋅(us :@ [:v:])"
by (cases us) simp_all
lemma root5_left5: "v ≠ ⊥ ⟹ root5⋅pat = left5⋅pat⋅[:v:]"
by simp
lemma op5_sfoldl_left5: "⟦us ≠ ⊥; u ≠ ⊥; v ≠ ⊥⟧ ⟹
op5⋅pat⋅(sfoldl⋅(op5⋅pat)⋅(root5⋅pat)⋅us)⋅v = left5⋅pat⋅(u :# us :@ [:v:])"
by simp
lemma root_bisim_root:
assumes "pat ≠ ⊥"
shows "root_bisim pat (root6⋅pat) (root5⋅pat)"
unfolding root6.unfold root5.unfold using assms
by simp (metis (no_types, lifting) left5.simps(1) left6.simps(1) root_bisim.simps slist.con_rews(3))
lemma next_grep6_cases[consumes 3, case_names gl nl]:
assumes "vs ≠ ⊥"
assumes "xs ≠ ⊥"
assumes "P (next⋅xs⋅(grep6⋅pat⋅(left6⋅pat⋅us)⋅(us, vs)))"
obtains (gl) "P (grep6⋅pat⋅(left6⋅pat⋅us)⋅(us, vs))" | (nl) "P (next⋅vs⋅(left6⋅pat⋅us))"
using assms
apply atomize_elim
apply (subst grep6.unfold)
apply (subst (asm) grep6.unfold)
apply (cases xs; clarsimp)
apply (cases vs; clarsimp simp: If2_def[symmetric] split: If2_splits)
done
lemma root_bisim_op_next56:
assumes "root_bisim pat t6 t5"
assumes "prefix⋅[:x:]⋅xs = FF"
shows "op6⋅pat⋅(next⋅xs⋅t6)⋅x = op6⋅pat⋅t6⋅x ∧ op5⋅pat⋅(next⋅xs⋅t5)⋅x = op5⋅pat⋅t5⋅x"
using ‹root_bisim pat t6 t5›
proof cases
case Null with assms(2) show ?thesis by (cases xs) simp_all
next
case (gl us vs) with assms(2) show ?thesis
apply (cases "x = ⊥", simp)
apply (cases xs; clarsimp)
apply (subst (1 2) grep6.simps)
apply (subst (1 2) grep5.simps)
apply (cases vs; clarsimp simp: If2_def[symmetric] split: If2_splits)
done
qed simp
text‹
The main part of establishing that @{const ‹root_bisim›}
is a @{const ‹tree_bisim›} is in showing that the left
paths constructed by the ‹grep›s are @{const
‹root_bisim›}-related. We do this by inducting over the
length of the pattern so far matched (‹us›), as we did
when proving that this tree has the `K' property in
\S\ref{thm:k_property}.
›
lemma
assumes "pat ≠ ⊥"
shows root_bisim_op: "root_bisim pat t6 t5 ⟹ root_bisim pat (op6⋅pat⋅t6⋅x) (op5⋅pat⋅t5⋅x)"
and root_bisim_next_left: "root_bisim pat (next⋅vs⋅(left6⋅pat⋅us)) (next⋅vs⋅(left5⋅pat⋅us))" (is "?rbnl us vs")
proof -
let ?ogl5 = "λus vs. op5⋅pat⋅(grep5⋅pat⋅(left5⋅pat⋅us)⋅(us, vs))⋅x"
let ?ogl6 = "λus vs. op6⋅pat⋅(grep6⋅pat⋅(left6⋅pat⋅us)⋅(us, vs))⋅x"
let ?for5 = "λus. sfoldl⋅(op5⋅pat)⋅(root5⋅pat)⋅us"
let ?for6 = "λus. sfoldl⋅(op6⋅pat)⋅(root6⋅pat)⋅us"
have "⟦?ogl6 us vs = Node⋅usvs⋅l⋅r; cfst⋅usvs ≠ ⊥; x ≠ ⊥⟧ ⟹ le⋅(slength⋅(cfst⋅usvs))⋅(slength⋅us + 1) = TT"
and "⟦op6⋅pat⋅(next⋅xs⋅(grep6⋅pat⋅(left6⋅pat⋅us)⋅(us, vs)))⋅x = Node⋅usvs⋅l⋅r; cfst⋅usvs ≠ ⊥; x ≠ ⊥; us ≠ ⊥; vs ≠ ⊥⟧ ⟹ le⋅(slength⋅(cfst⋅usvs))⋅(slength⋅us + 1) = TT"
and "⟦?for6 us = Node⋅usvs⋅l⋅r; cfst⋅usvs ≠ ⊥⟧ ⟹ lt⋅(slength⋅(cfst⋅usvs))⋅(slength⋅us + 1) = TT"
and "⟦us ≠ ⊥; vs ≠ ⊥⟧ ⟹ root_bisim pat (?ogl6 us vs) (?ogl5 us vs)"
and "root_bisim pat (?for6 us) (?for5 us)"
and "?rbnl us vs"
for usvs l r xs us vs
proof(induct us arbitrary: usvs l r vs x xs rule: op_induct')
case (step us)
have rbl: "root_bisim pat (left6⋅pat⋅us) (left5⋅pat⋅us)"
by (cases us; fastforce intro: step(5) simp: left6.unfold left5.unfold)
{ case (1 usvs l r vs x)
from rbl
have L: "le⋅(slength⋅(prod.fst usvs'))⋅(slength⋅us + 1) = TT"
if "op6⋅pat⋅(next⋅vs⋅(left6⋅pat⋅us))⋅x = Node⋅usvs'⋅l⋅r"
and "cfst⋅usvs' ≠ ⊥"
and "vs ≠ ⊥"
for usvs' l r
proof cases
case bottom with that show ?thesis by simp
next
case Null with that show ?thesis
apply simp
apply (subst (asm) root6.unfold)
apply (subst (asm) grep6.unfold)
apply (fastforce intro: le_plus_1)
done
next
case (gl us'' vs'') show ?thesis
proof(cases us)
case bottom with that show ?thesis by simp
next
case snil with that show ?thesis
apply simp
apply (subst (asm) root6.unfold)
apply (subst (asm) grep6.unfold)
apply clarsimp
done
next
case (scons ush ust)
moreover from that gl scons ‹x ≠ ⊥› have "le⋅(slength⋅(cfst⋅usvs'))⋅(slength⋅us'' + 1) = TT"
apply simp
apply (subst (asm) (2) grep6.unfold)
apply (fastforce dest: step(2, 3)[rotated])
done
moreover from gl scons have "lt⋅(slength⋅us'')⋅(slength⋅(stail⋅us) + 1) = TT"
apply simp
apply (subst (asm) grep6.unfold)
apply (fastforce dest: step(3)[rotated])
done
ultimately show ?thesis
apply clarsimp
apply (metis Integer_le_both_plus_1 Ord_linear_class.le_trans le_iff_lt_or_eq)
done
qed
qed
from 1 show ?case
apply (subst (asm) grep6.unfold)
apply (cases vs; clarsimp simp: If2_def[symmetric] split: If2_splits)
apply (rule L; fastforce)
apply (metis (no_types, lifting) ab_semigroup_add_class.add_ac(1) fst_conv grep6.simps le_refl_Integer sappend_snil_id_right slength.simps(2) slength_bottom_iff slength_sappend slist.con_rews(3) tree_injects')
apply (rule L; fastforce)
done }
note slength_ogl = this
{ case (2 usvs l r vs x xs)
from 2 have "xs ≠ ⊥" by clarsimp
from ‹vs ≠ ⊥› ‹xs ≠ ⊥› 2(1) show ?case
proof(cases rule: next_grep6_cases)
case gl with ‹cfst⋅usvs ≠ ⊥› ‹x ≠ ⊥› show ?thesis using slength_ogl by blast
next
case nl
from rbl show ?thesis
proof cases
case bottom with nl ‹cfst⋅usvs ≠ ⊥› show ?thesis by simp
next
case Null with nl ‹us ≠ ⊥› ‹vs ≠ ⊥› show ?thesis
apply simp
apply (subst (asm) root6.unfold)
apply (subst (asm) grep6.unfold)
apply (clarsimp simp: zero_Integer_def one_Integer_def elim!: slengthE)
done
next
case (gl us'' vs'') show ?thesis
proof(cases us)
case bottom with ‹us ≠ ⊥› show ?thesis by simp
next
case snil with gl show ?thesis by (subst (asm) grep6.unfold) simp
next
case (scons u' us') with 2 nl gl show ?thesis
apply clarsimp
apply (subst (asm) (4) grep6.unfold)
apply clarsimp
apply (drule step(3)[rotated]; clarsimp)
apply (drule step(2)[rotated]; clarsimp)
apply (clarsimp simp: lt_le)
apply (metis Integer_le_both_plus_1 Ord_linear_class.le_trans)
done
qed
qed
qed }
{ case (3 usvs l r) show ?case
proof(cases us rule: srev_cases)
case snil with 3 show ?thesis
apply (subst (asm) root6.unfold)
apply (subst (asm) grep6.unfold)
apply fastforce
done
next
case (ssnoc u' us')
then have "root_bisim pat (?for6 us') (?for5 us')" by (fastforce intro: step(5))
then show ?thesis
proof cases
case bottom with 3 ssnoc show ?thesis by simp
next
case Null with 3 ssnoc show ?thesis
apply simp
apply (subst (asm) root6.unfold)
apply (subst (asm) grep6.unfold)
apply (clarsimp simp: zero_Integer_def one_Integer_def elim!: slengthE)
done
next
case (gl us'' vs'') with 3 ssnoc show ?thesis
apply clarsimp
apply (subst (asm) (2) grep6.unfold)
apply (fastforce simp: zero_Integer_def one_Integer_def split: if_splits dest!: step(1)[rotated] step(3)[rotated] elim!: slengthE)
done
qed
qed (use 3 in simp) }
{ case (4 vs x) show ?case
proof(cases "prefix⋅[:x:]⋅vs")
case bottom then show ?thesis
apply (subst grep6.unfold)
apply (subst grep5.unfold)
apply auto
done
next
case TT with ‹pat ≠ ⊥› ‹us ≠ ⊥› show ?thesis
apply (subst grep6.unfold)
apply (subst grep5.unfold)
apply (cases vs; clarsimp simp: op6_left6)
apply (cases us; clarsimp simp del: left6.simps left5.simps simp add: root5_left5)
apply (metis (no_types, lifting) op5_sfoldl_left5 root5_left5 root_bisim.simps sappend_bottom_iff slist.con_rews(3) slist.con_rews(4))
done
next
case FF with ‹pat ≠ ⊥› ‹us ≠ ⊥› show ?thesis
apply (subst grep6.unfold)
apply (subst grep5.unfold)
using rbl
apply (auto simp: root_bisim_op_next56 elim!: root_bisim.cases intro: root_bisim_root)
apply (subst (asm) grep6.unfold)
apply (cases us; fastforce dest: step(3)[rotated] intro: step(4))
done
qed }
{ case 5 show ?case
proof(cases us rule: srev_cases)
case (ssnoc u' us')
then have "root_bisim pat (?for6 us') (?for5 us')" by (fastforce intro: step(5))
then show ?thesis
proof cases
case (gl us'' vs'') with ssnoc show ?thesis
apply clarsimp
apply (subst (asm) grep6.unfold)
apply (fastforce dest: step(3)[rotated] intro: step(4))
done
qed (use ‹pat ≠ ⊥› ssnoc root_bisim_root in auto)
qed (use ‹pat ≠ ⊥› root_bisim_root in auto) }
{ case (6 vs)
from rbl ‹pat ≠ ⊥› show rbnl: "?rbnl us vs"
proof cases
case bottom then show ?thesis by fastforce
next
case Null then show ?thesis by (cases vs) auto
next
case (gl us'' vs'') then show ?thesis
apply clarsimp
apply (subst grep5.unfold)
apply (subst grep6.unfold)
apply (subst (asm) grep5.unfold)
apply (subst (asm) grep6.unfold)
apply (cases us; clarsimp; cases vs''; clarsimp)
apply (metis Rep_cfun_strict1 bottom left5.simps(2) left6.simps(2) next_snil next_strict(1) rbl)
apply (cases vs; clarsimp)
using rbl apply (fastforce dest: step(3)[rotated] intro: step(6) simp: If2_def[symmetric] simp del: eq_FF split: If2_splits)+
done
qed }
qed
from ‹pat ≠ ⊥› this(4) show "root_bisim pat t6 t5 ⟹ root_bisim pat (op6⋅pat⋅t6⋅x) (op5⋅pat⋅t5⋅x)"
by (auto elim!: root_bisim.cases intro: root_bisim_root)
show ‹root_bisim pat (next⋅vs⋅(left6⋅pat⋅us)) (next⋅vs⋅(left5⋅pat⋅us))› by fact
qed
text‹
With this result in hand the remainder is technically fiddly but straightforward.
›
lemmas tree_bisimI = iffD2[OF fun_cong[OF tree.bisim_def[unfolded atomize_eq]], rule_format]
lemma tree_bisim_root_bisim:
shows "tree_bisim (root_bisim pat)"
proof(rule tree_bisimI, erule root_bisim.cases, goal_cases bottom Null Node)
case (Node x y us vs) then show ?case
apply (subst (asm) grep5.unfold)
apply (subst (asm) grep6.unfold)
apply hypsubst_thin
apply (clarsimp simp: root_bisim_next_left)
apply (cases vs; clarsimp)
apply (cases us; clarsimp simp del: left6.simps left5.simps simp add: op5_sfoldl_left5 op6_left6)
apply (metis (no_types, lifting) root5_left5 root_bisim.simps slist.con_rews(3,4))
apply (metis (no_types, lifting) op5_sfoldl_left5 root_bisim.simps sappend_bottom_iff slist.con_rews(3, 4))
done
qed simp_all
lemma r6_5:
shows "(root6⋅pat, op6⋅pat) = (root5⋅pat, op5⋅pat)"
proof(cases "pat = ⊥")
case True
from True have "root6⋅pat = root5⋅pat"
apply (subst root6.unfold)
apply (subst grep6.unfold)
apply (subst root5.unfold)
apply (subst grep5.unfold)
apply simp
done
moreover
from True ‹root6⋅pat = root5⋅pat› have "op6⋅pat⋅t⋅x = op5⋅pat⋅t⋅x" for t x
by (induct t) simp_all
ultimately show ?thesis by (simp add: cfun_eq_iff)
next
case False
then have root: "root6⋅pat = root5⋅pat"
by (rule tree.coinduct[OF tree_bisim_root_bisim root_bisim_root])
moreover
from root have "op6⋅pat⋅t⋅x = op5⋅pat⋅t⋅x" for t x by (induct t) simp_all
ultimately show ?thesis by (simp add: cfun_eq_iff)
qed
text‹
We conclude this section by observing that accumulator-introduction is a well known technique
(see, for instance, \<^citet>‹‹\S13.6› in "Hutton:2016"›), but the examples in the
literature assume that the type involved is defined inductively. Bird adopts this strategy without
considering what the mixed inductive/coinductive rule is that justifies the preservation of total
correctness.
The difficulty of this step is why we wired in the `K' opt earlier: it allows us to preserve the
shape of the tree all the way from the data refinement to the final version.
›
subsection‹ Step 7: Simplify, unfold prefix ›
text‹
The next step (Bird, bottom of p132) is to move the case split in @{const ‹grep6›} on ‹vs› above the
‹Node› constructor, which makes @{term ‹grep7›} strict in that parameter and therefore not
extensionally equal to @{const ‹grep6›}. We establish a weaker correspondence using fixed-point induction.
We also unfold @{const ‹prefix›} in @{const ‹op6›}.
›
fixrec
root7 :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree"
and op7 :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree"
and grep7 :: "[:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree"
where
[simp del]:
"root7⋅pat = grep7⋅pat⋅Null⋅([::], pat)"
| "op7⋅pat⋅Null⋅x = root7⋅pat"
| "op7⋅pat⋅(Node⋅(us, [::])⋅l⋅r)⋅x = op7⋅pat⋅l⋅x"
| "⟦v ≠ ⊥; vs ≠ ⊥⟧ ⟹
op7⋅pat⋅(Node⋅(us, v :# vs)⋅l⋅r)⋅x = If eq⋅x⋅v then r else op7⋅pat⋅l⋅x"
| [simp del]:
"grep7⋅pat⋅l⋅(us, [::]) = Node⋅(us, [::])⋅l⋅Null"
| "⟦v ≠ ⊥; vs ≠ ⊥⟧ ⟹
grep7⋅pat⋅l⋅(us, v :# vs) = Node⋅(us, v :# vs)⋅(next⋅(v :# vs)⋅l)⋅(grep7⋅pat⋅(op7⋅pat⋅l⋅v)⋅(us :@ [:v:], vs))"
schematic_goal root7_op7_grep7_def:
"( root7 :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
, op7 :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
, grep7 :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
= fix⋅?F"
unfolding root7_def op7_def grep7_def by simp
lemma r7_6_aux:
assumes "pat ≠ ⊥"
shows
"(Λ (root, op, grep). (root⋅pat, seq⋅x⋅(op⋅pat⋅t⋅x), grep⋅pat⋅l⋅(us, vs)))⋅
( root7 :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
, op7 :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
, grep7 :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
= (Λ (root, op, grep). (root⋅pat, seq⋅x⋅(op⋅pat⋅t⋅x), seq⋅vs⋅(grep⋅pat⋅l⋅(us, vs))))⋅
( root6 :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
, op6 :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
, grep6 :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )"
unfolding root6_op6_grep6_def root7_op7_grep7_def
proof(induct arbitrary: t x l us vs rule: parallel_fix_ind[case_names adm bottom step])
case (step X Y t x l us vs) then show ?case
apply -
apply (cases X, cases Y)
apply (rename_tac r10 o10 g10 r9 o9 g9)
apply (clarsimp simp: cfun_eq_iff assms match_Node_mplus_match_Node match_Null_match_Node_tree_case tree_case_distr match_snil_match_scons_slist_case slist_case_distr If_distr)
apply (intro allI conjI)
apply (case_tac t; clarsimp)
apply (rename_tac us vs l r)
apply (case_tac "x = ⊥"; clarsimp)
apply (case_tac vs; clarsimp; fail)
apply (case_tac vs; clarsimp)
apply (metis ID1 seq_simps(3))
done
qed simp_all
lemma r7_6:
assumes "pat ≠ ⊥"
shows "root7⋅pat = root6⋅pat"
and "x ≠ ⊥ ⟹ op7⋅pat⋅t⋅x = op6⋅pat⋅t⋅x"
using r7_6_aux[OF assms] by (force simp: cfun_eq_iff dest: spec[where x=x])+
subsection‹ Step 8: Discard us \label{sec:KMP:step8} ›
text‹
We now discard ‹us› from the tree as it is no longer used. This requires a new
definition of @{const ‹next›}.
This is essentially another data refinement.
›
fixrec next' :: "'a::Eq_def → [:'a:] tree → [:'a:] tree" where
"next'⋅x⋅Null = Null"
| "next'⋅x⋅(Node⋅[::]⋅l⋅r) = Node⋅[::]⋅l⋅r"
| "⟦v ≠ ⊥; vs ≠ ⊥; x ≠ ⊥⟧ ⟹
next'⋅x⋅(Node⋅(v :# vs)⋅l⋅r) = If eq⋅x⋅v then l else Node⋅(v :# vs)⋅l⋅r"
fixrec
root8 :: "[:'a::Eq_def:] → [:'a:] tree"
and op8 :: "[:'a:] → [:'a:] tree → 'a → [:'a:] tree"
and grep8 :: "[:'a:] → [:'a:] tree → [:'a:] → [:'a:] tree"
where
[simp del]:
"root8⋅pat = grep8⋅pat⋅Null⋅pat"
| "op8⋅pat⋅Null⋅x = root8⋅pat"
| "op8⋅pat⋅(Node⋅[::]⋅l⋅r)⋅x = op8⋅pat⋅l⋅x"
| "⟦v ≠ ⊥; vs ≠ ⊥⟧ ⟹
op8⋅pat⋅(Node⋅(v :# vs)⋅l⋅r)⋅x = If eq⋅x⋅v then r else op8⋅pat⋅l⋅x"
| "grep8⋅pat⋅l⋅[::] = Node⋅[::]⋅l⋅Null"
| "⟦v ≠ ⊥; vs ≠ ⊥⟧ ⟹
grep8⋅pat⋅l⋅(v :# vs) = Node⋅(v :# vs)⋅(next'⋅v⋅l)⋅(grep8⋅pat⋅(op8⋅pat⋅l⋅v)⋅vs)"
fixrec ok8 :: "[:'a:] tree → tr" where
"vs ≠ ⊥ ⟹ ok8⋅(Node⋅vs⋅l⋅r) = snull⋅vs"
schematic_goal root8_op8_grep8_def:
"( root8 :: [:'a::Eq_def:] → [:'a:] tree
, op8 :: [:'a:] → [:'a:] tree → 'a → [:'a:] tree
, grep8 :: [:'a:] → [:'a:] tree → [:'a:] → [:'a:] tree )
= fix⋅?F"
unfolding op8_def root8_def grep8_def by simp
lemma next'_strict[simp]:
"next'⋅x⋅⊥ = ⊥"
"next'⋅⊥⋅(Node⋅(v :# vs)⋅l⋅r) = ⊥"
by (cases "v :# vs = ⊥"; fixrec_simp; clarsimp)+
lemma root8_op8_grep8_strict[simp]:
"grep8⋅pat⋅l⋅⊥ = ⊥"
"op8⋅pat⋅⊥ = ⊥"
"root8⋅⊥ = ⊥"
by fixrec_simp+
lemma ok8_strict[simp]:
"ok8⋅⊥ = ⊥"
"ok8⋅Null = ⊥"
by fixrec_simp+
text‹
We cannot readily relate @{const ‹next›} and @{const
‹next'›} using worker/wrapper as the obvious abstraction
is not invertible. Conversely the desired result is easily shown by
fixed-point induction.
›
lemma next'_next:
assumes "v ≠ ⊥"
assumes "vs ≠ ⊥"
shows "next'⋅v⋅(tree_map'⋅csnd⋅t) = tree_map'⋅csnd⋅(next⋅(v :# vs)⋅t)"
proof(induct t)
case (Node usvs' l r) with assms show ?case
apply (cases usvs'; clarsimp)
apply (rename_tac us'' vs'')
apply (case_tac vs''; clarsimp simp: If_distr)
done
qed (use assms in simp_all)
lemma r8_7[simplified]:
shows "(Λ (root, op, grep). ( root⋅pat
, op⋅pat⋅(tree_map'⋅csnd⋅t)⋅x
, grep⋅pat⋅(tree_map'⋅csnd⋅l)⋅(csnd⋅usvs)))⋅(root8, op8, grep8)
= (Λ (root, op, grep). ( tree_map'⋅csnd⋅(root⋅pat)
, tree_map'⋅csnd⋅(op⋅pat⋅t⋅x)
, tree_map'⋅csnd⋅(grep⋅pat⋅l⋅usvs)))⋅(root7, op7, grep7)"
unfolding root7_op7_grep7_def root8_op8_grep8_def
proof(induct arbitrary: l t usvs x rule: parallel_fix_ind[case_names adm bottom step])
case (step X Y l t usvs x) then show ?case
apply -
apply (cases X; cases Y)
apply (clarsimp simp: cfun_eq_iff next'_next
match_snil_match_scons_slist_case slist_case_distr
match_Node_mplus_match_Node match_Null_match_Node_tree_case tree_case_distr
cong: slist_case_cong)
apply (cases t; clarsimp simp: If_distr)
apply (rename_tac us vs l r)
apply (case_tac vs; fastforce)
done
qed simp_all
text‹
Top-level equivalence follows from ‹lfp_fusion› specialized to ‹sscanl› (@{thm [source]
"sscanl_lfp_fusion"}), which states that
\begin{center}
@{prop ‹smap⋅g oo sscanl⋅f⋅z = sscanl⋅f'⋅(g⋅z)›}
\end{center}
provided that ‹g› is strict and the following diagram commutes for @{prop ‹x ≠ ⊥›}:
\begin{center}
\begin{tikzcd}[column sep=6em]
a \arrow[r, "‹Λ a. f⋅a⋅x›"] \arrow[d, "‹g›"] & a' \arrow[d, "‹g›"] \\
b \arrow[r, "‹Λ a. f'⋅a⋅x›"] & b'
\end{tikzcd}
\end{center}
›
lemma ok8_ok8: "ok8 oo tree_map'⋅csnd = snull oo csnd oo abs2" (is "?lhs = ?rhs")
proof(rule cfun_eqI)
fix t show "?lhs⋅t = ?rhs⋅t"
by (cases t; clarsimp) (metis ok8.simps ok8_strict(1) snull_strict tree.con_rews(3))
qed
lemma matches8:
shows "matches⋅pat = smap⋅cfst oo sfilter⋅(ok8 oo csnd) oo sscanl⋅(Λ (n, x) y. (n + 1, op8⋅pat⋅x⋅y))⋅(0, root8⋅pat)" (is "?lhs = ?rhs")
proof(cases "pat = ⊥")
case True
then have "?lhs⋅xs = ?rhs⋅xs" for xs by (cases xs; clarsimp)
then show ?thesis by (simp add: cfun_eq_iff)
next
case False
then have *: "matches⋅pat = smap⋅cfst oo sfilter⋅(snull oo csnd oo abs2 oo csnd) oo sscanl⋅(Λ (n, x) y. (n + 1, op7⋅pat⋅x⋅y))⋅(0, root7⋅pat)"
using data_refinement[where 'a='a] r3_2[where 'a='a] r4_3[where 'a='a] r5_4[where 'a='a] r6_5(1)[where pat=pat] r7_6[where pat=pat]
unfolding matches2.unfold by (fastforce simp: oo_assoc cfun_eq_iff csplit_def intro!: cfun_arg_cong sscanl_cong)
from ‹pat ≠ ⊥› show ?thesis
apply -
apply (subst conjunct1[OF r8_7])
apply (subst sscanl_lfp_fusion[where g="ID ** tree_map'⋅csnd" and z="(0, root7⋅pat)", simplified, symmetric])
prefer 2
apply (subst oo_assoc, subst sfilter_smap, simp)
apply (simp add: oo_assoc)
apply (simp add: oo_assoc[symmetric])
apply (subst oo_assoc, subst ok8_ok8)
apply (clarsimp simp: oo_assoc *)
apply (rule refl)
apply (clarsimp simp: r8_7)
done
qed
subsection‹ Step 9: Factor out pat (final version) \label{sec:KMP:final_version} ›
text‹
Finally we factor @{term ‹pat›} from these definitions and arrive
at Bird's cyclic data structure, which when executed using lazy
evaluation actually memoises the computation of @{const ‹grep8›}.
The ‹Letrec› syntax groups recursive bindings with
‹,› and separates these with ‹;›. Its lack
of support for clausal definitions, and that \texttt{HOLCF}
‹case› does not support nested patterns, leads to some
awkwardness.
›
fixrec matchesf :: "[:'a::Eq_def:] → [:'a:] → [:Integer:]" where
[simp del]: "matchesf⋅pat =
(Letrec okf = (Λ (Node⋅vs⋅l⋅r). snull⋅vs);
nextf = (Λ x t. case t of
Null ⇒ Null
| Node⋅vs⋅l⋅r ⇒ (case vs of
[::] ⇒ t
| v :# vs ⇒ If eq⋅x⋅v then l else t));
rootf = grepf⋅Null⋅pat,
opf = (Λ t x. case t of
Null ⇒ rootf
| Node⋅vs⋅l⋅r ⇒ (case vs of
[::] ⇒ opf⋅l⋅x
| v :# vs ⇒ If eq⋅x⋅v then r else opf⋅l⋅x)),
grepf = (Λ l vs. case vs of
[::] ⇒ Node⋅[::]⋅l⋅Null
| v :# vs ⇒ Node⋅(v :# vs)⋅(nextf⋅v⋅l)⋅(grepf⋅(opf⋅l⋅v)⋅vs));
stepf = (Λ (n, t) x. (n + 1, opf⋅t⋅x))
in smap⋅cfst oo sfilter⋅(okf oo csnd) oo sscanl⋅stepf⋅(0, rootf))"
lemma matchesf_ok8: "(Λ (Node⋅vs⋅l⋅r). snull⋅vs) = ok8"
by (clarsimp simp: cfun_eq_iff; rename_tac x; case_tac x; clarsimp)
lemma matchesf_next':
"(Λ x t. case t of Null ⇒ Null | Node⋅vs⋅l⋅r ⇒ (case vs of [::] ⇒ t | v :# vs ⇒ If eq⋅x⋅v then l else t)) = next'"
apply (clarsimp simp: cfun_eq_iff next'.unfold
match_snil_match_scons_slist_case slist_case_distr
match_Node_mplus_match_Node match_Null_match_Node_tree_case tree_case_distr)
apply (simp cong: tree_case_cong)
apply (simp cong: slist_case_cong)
done
lemma matchesf_8:
"fix⋅(Λ (Rootf, Opf, Grepf).
( Grepf⋅Null⋅pat
, Λ t x. case t of Null ⇒ Rootf | Node⋅vs⋅l⋅r ⇒
(case vs of [::] ⇒ Opf⋅l⋅x | v :# vs ⇒ If eq⋅x⋅v then r else Opf⋅l⋅x)
, Λ l vs. case vs of [::] ⇒ Node⋅[::]⋅l⋅Null | v :# vs ⇒ Node⋅(v :# vs)⋅(next'⋅v⋅l)⋅(Grepf⋅(Opf⋅l⋅v)⋅vs)) )
= (Λ (root, op, grep). (root⋅pat, op⋅pat, grep⋅pat))⋅(root8, op8, grep8)"
unfolding root8_op8_grep8_def
by (rule lfp_fusion[symmetric])
(fastforce simp: cfun_eq_iff
match_snil_match_scons_slist_case slist_case_distr
match_Node_mplus_match_Node match_Null_match_Node_tree_case tree_case_distr)+
theorem matches_final:
shows "matches = matchesf"
by (clarsimp simp: cfun_eq_iff fix_const eta_cfun csplit_cfun3 CLetrec_def
matches8 matchesf.unfold matchesf_next' matchesf_ok8 matchesf_8[simplified eta_cfun])
text‹
The final program above is easily syntactically translated into the
Haskell shown in Figure~\ref{fig:haskell-kmp}, and one can expect
GHC's list fusion machinery to compile the top-level driver into an
efficient loop. \<^citet>‹"LochbihlerMaximova:2015"›
have mechanised this optimisation for Isabelle/HOL's code generator
(and see also \<^citet>‹"Huffman:2009"›).
As we lack both pieces of infrastructure we show such a fusion is sound
by hand.
›
lemma fused_driver':
assumes "g⋅⊥ = ⊥"
assumes "p⋅⊥ = ⊥"
shows "smap⋅g oo sfilter⋅p oo sscanl⋅f⋅z
= (μ R. Λ z xxs. case xxs of
[::] ⇒ If p⋅z then [:g⋅z:] else [::]
| x :# xs ⇒ let z' = f⋅z⋅x in If p⋅z then g⋅z :# R⋅z'⋅xs else R⋅z'⋅xs)⋅z"
(is "?lhs = ?rhs")
proof(rule cfun_eqI)
fix xs from assms show "?lhs⋅xs = ?rhs⋅xs"
by (induct xs arbitrary: z) (subst fix_eq; fastforce simp: If_distr Let_def)+
qed
end