Theory KMP

(*<*)
theory KMP
imports
  Theory_Of_Lists
begin

hide_const abs

(*>*)
section‹ Knuth-Morris-Pratt matching according to Bird \label{sec:KMP} ›


subsection‹ Step 1: Specification \label{sec:KMP:specification} ›

text‹

We begin with the specification of string matching given by \<^citet>‹‹Chapter~16› in
"Bird:PearlsofFAD:2010"›. (References to ``Bird'' in the following are to this text.) Note that
we assume @{const ‹eq›} has some nice properties (see \S\ref{sec:equality}) and
use strict lists.

›

fixrec endswith :: "[:'a::Eq_def:] → [:'a:] → tr" where
[simp del]: "endswith⋅pat = selem⋅pat oo stails"

fixrec matches :: "[:'a::Eq_def:] → [:'a:] → [:Integer:]" where
[simp del]: "matches⋅pat = smap⋅slength oo sfilter⋅(endswith⋅pat) oo sinits"

text‹

Bird describes @{term "matches⋅pat⋅xs"} as returning ``a list of integers ‹p› such that ‹pat› is a
suffix of @{term "stake⋅p⋅xs"}.''

The following examples illustrate this behaviour:

›

lemma "matches⋅[::]⋅[::] = [:0:]"
unfolding matches.unfold endswith.unfold by simp

lemma "matches⋅[::]⋅[:10::Integer, 20, 30:] = [:0, 1, 2, 3:]"
unfolding matches.unfold endswith.unfold by simp

lemma "matches⋅[:1::Integer,2,3,1,2:]⋅[:1,2,1,2,3,1,2,3,1,2:] = [:7, 10:]"
unfolding matches.unfold endswith.unfold
by (simp add: sfilter_scons_let del: sfilter_strict_scons sfilter.simps)

lemma endswith_strict[simp]:
  "endswith⋅⊥ = ⊥"
  "endswith⋅pat⋅⊥ = ⊥"
by (fixrec_simp; simp add: cfun_eq_iff)+

lemma matches_strict[simp]:
  "matches⋅⊥ = ⊥"
  "matches⋅pat⋅⊥ = ⊥"
by (fixrec_simp; clarsimp simp: cfun_eq_iff)+

text‹

Bird's strategy for deriving KMP from this specification is encoded in the following lemmas:
if we can rewrite @{const ‹endswith›} as a composition of a predicate with a
@{const ‹sfoldl›}, then we can rewrite @{const ‹matches›} into a @{const ‹sscanl›}.

›

lemma fork_sfoldl:
  shows "sfoldl⋅f1⋅z1 && sfoldl⋅f2⋅z2 = sfoldl⋅(Λ (a, b) z. (f1⋅a⋅z, f2⋅b⋅z))⋅(z1, z2)" (is "?lhs = ?rhs")
proof(rule cfun_eqI)
  fix xs show "?lhs⋅xs = ?rhs⋅xs"
    by (induct xs arbitrary: z1 z2) simp_all
qed

lemma smap_sfilter_split_cfcomp: ―‹ Bird (16.4) ›
  assumes "f⋅⊥ = ⊥"
  assumes "p⋅⊥ = ⊥"
  shows "smap⋅f oo sfilter⋅(p oo g) = smap⋅cfst oo sfilter⋅(p oo csnd) oo smap⋅(f && g)" (is "?lhs = ?rhs")
proof(rule cfun_eqI)
  fix xs show "?lhs⋅xs = ?rhs⋅xs"
    using assms by (induct xs) (simp_all add: If2_def[symmetric] split: If2_splits)
qed

lemma Bird_strategy:
  assumes endswith: "endswith⋅pat = p oo sfoldl⋅op⋅z"
  assumes step: "step = (Λ (n, x) y. (n + 1, op⋅x⋅y))"
  assumes "p⋅⊥ = ⊥" ―‹ We can reasonably expect the predicate to be strict ›
  shows "matches⋅pat = smap⋅cfst oo sfilter⋅(p oo csnd) oo sscanl⋅step⋅(0, z)"
apply (simp add: matches.simps assoc_oo endswith)
apply (subst smap_sfilter_split_cfcomp, fastforce, fact)
apply (subst slength_sfoldl)
apply (subst fork_sfoldl)
apply (simp add: oo_assoc[symmetric])
apply (subst sinits_sscanl)
apply (fold step)
apply (rule refl)
done

text‹

Bird proceeds by reworking @{const ‹endswith›} into the form required by @{thm [source] "Bird_strategy"}.
This is eased by an alternative definition of @{const ‹endswith›}.

›

lemma sfilter_supto:
  assumes "0 ≤ d"
  shows "sfilter⋅(Λ x. le⋅(MkI⋅n - x)⋅(MkI⋅d))⋅(supto⋅(MkI⋅m)⋅(MkI⋅n))
       = supto⋅(MkI⋅(if m ≤ n - d then n - d else m))⋅(MkI⋅n)" (is "?sfilterp⋅?suptomn = _")
proof(cases "m ≤ n - d")
  case True
  moreover
  from True assms have "?sfilterp⋅?suptomn = ?sfilterp⋅(supto⋅(MkI⋅m)⋅(MkI⋅(n - d - 1)) :@ supto⋅(MkI⋅(n - d))⋅(MkI⋅n))"
    using supto_split1 by auto
  moreover from True assms have "?sfilterp⋅(supto⋅(MkI⋅m)⋅(MkI⋅(n - d - 1))) = [::]" by auto
  ultimately show ?thesis by (clarsimp intro!: trans[OF sfilter_cong[OF refl] sfilter_const_TT])
next
  case False then show ?thesis
    by (clarsimp intro!: trans[OF sfilter_cong[OF refl] sfilter_const_TT])
qed

lemma endswith_eq_sdrop: "endswith⋅pat⋅xs = eq⋅pat⋅(sdrop⋅(slength⋅xs - slength⋅pat)⋅xs)"
proof(cases "pat = ⊥" "xs = ⊥" rule: bool.exhaust[case_product bool.exhaust])
  case False_False then show ?thesis
    by (cases "endswith⋅pat⋅xs";
        simp only: endswith.simps cfcomp2 stails_sdrop';
        force simp: zero_Integer_def one_Integer_def elim: slengthE)
qed simp_all

lemma endswith_def2:  ―‹ Bird p127 ›
  shows "endswith⋅pat⋅xs = eq⋅pat⋅(shead⋅(sfilter⋅(Λ x. prefix⋅x⋅pat)⋅(stails⋅xs)))" (is "?lhs = ?rhs")
proof(cases "pat = ⊥" "xs = ⊥" rule: bool.exhaust[case_product bool.exhaust])
  case False_False
  from False_False obtain patl xsl where *: "slength⋅xs = MkI⋅xsl" "0 ≤ xsl" "slength⋅pat = MkI⋅patl" "0 ≤ patl"
    by (meson Integer.exhaust slength_bottom_iff slength_ge_0)
  let ?patl_xsl = "if patl ≤ xsl then xsl - patl else 0"
  have "?rhs = eq⋅pat⋅(shead⋅(sfilter⋅(Λ x. le⋅(slength⋅x)⋅(slength⋅pat) andalso prefix⋅x⋅pat)⋅(stails⋅xs)))"
    by (subst prefix_slength_strengthen) simp
  also have "… = eq⋅pat⋅(shead⋅(sfilter⋅(Λ x. prefix⋅x⋅pat)⋅(sfilter⋅(Λ x. le⋅(slength⋅x)⋅(slength⋅pat))⋅(stails⋅xs))))"
    by (simp add: sfilter_sfilter')
  also have "… = eq⋅pat⋅(shead⋅(smap⋅(Λ k. sdrop⋅k⋅xs)⋅(sfilter⋅(Λ k. prefix⋅(sdrop⋅k⋅xs)⋅pat)⋅(sfilter⋅(Λ k. le⋅(slength⋅(sdrop⋅k⋅xs))⋅(MkI⋅patl))⋅(supto⋅(MkI⋅0)⋅(MkI⋅xsl))))))"
    using ‹slength⋅xs = MkI⋅xsl› ‹slength⋅pat = MkI⋅patl›
    by (simp add: stails_sdrop' sfilter_smap' cfcomp1 zero_Integer_def)
  also have "… = eq⋅pat⋅(shead⋅(smap⋅(Λ k. sdrop⋅k⋅xs)⋅(sfilter⋅(Λ k. prefix⋅(sdrop⋅k⋅xs)⋅pat)⋅(sfilter⋅(Λ k. le⋅(MkI⋅xsl - k)⋅(MkI⋅patl))⋅(supto⋅(MkI⋅0)⋅(MkI⋅xsl))))))"
    using ‹slength⋅xs = MkI⋅xsl›
    by (subst (2) sfilter_cong[where p'="Λ x. le⋅(MkI⋅xsl - x)⋅(MkI⋅patl)"]; fastforce simp: zero_Integer_def)
  also have "… = If prefix⋅(sdrop⋅(MkI⋅?patl_xsl)⋅xs)⋅pat
                  then eq⋅pat⋅(sdrop⋅(MkI⋅?patl_xsl)⋅xs)
                  else eq⋅pat⋅(shead⋅(smap⋅(Λ k. sdrop⋅k⋅xs)⋅(sfilter⋅(Λ x. prefix⋅(sdrop⋅x⋅xs)⋅pat)⋅(supto⋅(MkI⋅(?patl_xsl + 1))⋅(MkI⋅xsl)))))"
    using False_False ‹0 ≤ xsl› ‹0 ≤ patl›
    by (subst sfilter_supto) (auto simp: If_distr one_Integer_def)
  also have "… = ?lhs" (is "If ?c then _ else ?else = _")
  proof(cases ?c)
    case TT with ‹slength⋅xs = MkI⋅xsl› ‹slength⋅pat = MkI⋅patl›
    show ?thesis by (simp add: endswith_eq_sdrop sdrop_neg zero_Integer_def)
  next
    case FF ―‹ Recursive case: the lists generated by ‹supto› are too short ›
    have "?else = shead⋅(smap⋅(Λ x. eq⋅pat⋅(sdrop⋅x⋅xs))⋅(sfilter⋅(Λ x. prefix⋅(sdrop⋅x⋅xs)⋅pat)⋅(supto⋅(MkI⋅(?patl_xsl + 1))⋅(MkI⋅xsl))))"
      using FF by (subst shead_smap_distr[where f="eq⋅pat", symmetric]) (auto simp: cfcomp1)
    also have "… = shead⋅(smap⋅(Λ x. seq⋅x⋅FF)⋅(sfilter⋅(Λ x. prefix⋅(sdrop⋅x⋅xs)⋅pat)⋅(supto⋅(MkI⋅(?patl_xsl + 1))⋅(MkI⋅xsl))))"
      using False_False * by (subst smap_cong[OF refl, where f'="Λ x. seq⋅x⋅FF"]) (auto simp: zero_Integer_def split: if_splits)
    also from * FF have "… = ?lhs"
      apply (auto 0 0 simp: shead_smap_distr seq_conv_if endswith_eq_sdrop zero_Integer_def dest!: prefix_FF_not_snilD)
        apply (metis (full_types) le_MkI_MkI linorder_not_less order_refl prefix_FF_not_snilD sdrop_all zless_imp_add1_zle)
       using FF apply fastforce
      apply (metis add.left_neutral le_MkI_MkI linorder_not_less order_refl prefix_FF_not_snilD sdrop_0(1) sdrop_all zero_Integer_def zless_imp_add1_zle)
      done
    finally show ?thesis using FF by simp
  qed (simp add: False_False)
  finally show ?thesis ..
qed simp_all

text‹

Bird then generalizes @{term ‹sfilter⋅(Λ x. prefix⋅x⋅pat) oo stails›} to @{term ‹split›},
where ``‹split⋅pat⋅xs› splits ‹pat› into two lists ‹us› and ‹vs› so that
@{prop ‹us :@ vs = pat›} and ‹us› is the longest suffix of ‹xs› that is a prefix of ‹pat›.''

›

fixrec split :: "[:'a::Eq_def:] → [:'a:] → [:'a:] × [:'a:]" where ―‹ Bird p128 ›
[simp del]: "split⋅pat⋅xs = If prefix⋅xs⋅pat then (xs, sdrop⋅(slength⋅xs)⋅pat) else split⋅pat⋅(stail⋅xs)"

lemma split_strict[simp]:
  shows "split⋅⊥ = ⊥"
    and "split⋅pat⋅⊥ = ⊥"
by fixrec_simp+

lemma split_bottom_iff[simp]: "(split⋅pat⋅xs = ⊥) ⟷ (pat = ⊥ ∨ xs = ⊥)"
by (cases "pat = ⊥"; clarsimp) (induct xs; subst split.unfold; simp)

lemma split_snil[simp]:
  assumes "pat ≠ ⊥"
  shows "split⋅pat⋅[::] = ([::], pat)"
using assms by fixrec_simp

lemma split_pattern: ―‹ Bird p128, observation ›
  assumes "xs ≠ ⊥"
  assumes "split⋅pat⋅xs = (us, vs)"
  shows "us :@ vs = pat"
using assms
proof(cases "pat = ⊥", simp, induct xs arbitrary: us vs)
  case snil then show ?case by (subst (asm) split.unfold) simp
next
  case (scons x xs) then show ?case
    by (subst (asm) (3) split.unfold)
       (fastforce dest: prefix_sdrop_slength simp: If2_def[symmetric] split: If2_splits)
qed simp

lemma endswith_split: ―‹ Bird p128, after defining ‹split› ›
  shows "endswith⋅pat = snull oo csnd oo split⋅pat"
proof(rule cfun_eqI)
  fix xs show "endswith⋅pat⋅xs = (snull oo csnd oo split⋅pat)⋅xs"
  proof(cases "pat = ⊥", simp, induct xs)
    case (scons x xs) then show ?case
      unfolding endswith_def2
      by (subst split.unfold)
         (fastforce dest: prefix_sdrop_prefix_eq simp: If2_def[symmetric] If_distr snull_eq_snil split: If2_splits)
  qed (simp_all add: snull_eq_snil endswith.simps)
qed

lemma split_length_lt:
  assumes "pat ≠ ⊥"
  assumes "xs ≠ ⊥"
  shows "lt⋅(slength⋅(prod.fst (split⋅pat⋅xs)))⋅(slength⋅xs + 1) = TT"
using assms
proof(induct xs)
  case (scons x xs) then show ?case
    by (subst split.unfold)
       (auto simp: If2_def[symmetric] one_Integer_def split: If2_splits elim!: slengthE elim: lt_trans)
qed simp_all

text‹

The predicate ‹p› required by @{thm [source] "Bird_strategy"} is therefore ‹snull oo csnd›. It
remains to find ‹op› and ‹z› such that:

▪ @{term ‹split⋅pat⋅[::] = z›}
▪ @{term ‹split⋅pat⋅(xs :@ [:x:]) = op⋅(split⋅pat⋅xs)⋅x›}

›
text‹

so that @{term ‹split = sfoldl⋅op⋅z›}.

We obtain @{term ‹z = ([::], pat)›} directly from the definition of @{term ‹split›}.

Bird derives ‹op› on the basis of this crucial observation:

›

lemma split_snoc: ―‹ Bird p128 ›
  shows "split⋅pat⋅(xs :@ [:x:]) = split⋅pat⋅(cfst⋅(split⋅pat⋅xs) :@ [:x:])"
proof(cases "pat = ⊥", simp, cases "x = ⊥", simp, induct xs)
  case (scons x xs) then show ?case
    apply -
    apply (subst (1 3) split.unfold)
    apply (clarsimp simp: If2_def[symmetric] split: If2_splits; intro conjI impI)
      apply (subst split.unfold; fastforce)
     apply (subst split.unfold; fastforce)
    apply (simp add: append_prefixD)
    done
qed simp_all

fixrec ―‹ Bird p129 ›
  op :: "[:'a::Eq_def:] → [:'a:] × [:'a:] → 'a → [:'a:] × [:'a:]"
where
[simp del]:
  "op⋅pat⋅(us, vs)⋅x =
     (     If prefix⋅[:x:]⋅vs then (us :@ [:x:], stail⋅vs)
      else If snull⋅us then ([::], pat)
      else op⋅pat⋅(split⋅pat⋅(stail⋅us))⋅x )"

lemma op_strict[simp]:
  "op⋅pat⋅⊥ = ⊥"
  "op⋅pat⋅(us, ⊥) = ⊥"
  "op⋅pat⋅usvs⋅⊥ = ⊥"
by fixrec_simp+

text‹

Bird demonstrates that @{const ‹op›} is partially correct wrt @{const ‹split›}, i.e.,
@{prop "op⋅pat⋅(split⋅pat⋅xs)⋅x ⊑ split⋅pat⋅(xs :@ [:x:])"}. For total correctness we
essentially prove that @{const ‹op›} terminates on well-defined arguments with an inductive argument.

›

lemma op_induct[case_names step]:
  fixes usvs:: "[:'a:] × 'b"
  assumes step: "⋀usvs. (⋀usvs'. lt⋅(slength⋅(cfst⋅usvs'))⋅(slength⋅(cfst⋅usvs)) = TT ⟹ P usvs') ⟹ P usvs"
  shows "P usvs"
proof(induct usvs rule: wf_induct)
  let ?r = "{ (usvs', usvs) |(usvs :: [:'a:] × 'b) (usvs' :: [:'a:] × 'b). lt⋅(slength⋅(cfst⋅usvs'))⋅(slength⋅(cfst⋅usvs)) = TT }"
  show "wf ?r"
  proof(rule wf_subset[OF wf_inv_image[where f="λ(x, _). slength⋅x", OF wf_subset[OF wf_Integer_ge_less_than[where d=0]]]])
    let ?rslen = "{ (slength⋅us', slength⋅us) |(us :: [:'a:]) (us' :: [:'a:]). lt⋅(slength⋅us')⋅(slength⋅us) = TT }"
    show "?rslen ⊆ Integer_ge_less_than 0"
      apply (clarsimp simp: Integer_ge_less_than_def zero_Integer_def)
      apply (metis Integer.exhaust dist_eq_tr(4) dist_eq_tr(6) lt_Integer_bottom_iff lt_MkI_MkI slength_ge_0)
      done
    show "?r ⊆ inv_image ?rslen (λ(x, _). slength⋅x)" by (auto 0 3)
  qed
  fix usvs
  assume "∀usvs'. (usvs', usvs) ∈ ?r ⟶ P usvs'"
  then show "P usvs"
    by - (rule step; clarsimp; metis eq_fst_iff)
qed

lemma op_induct'[case_names step]:
  assumes step: "⋀us. (⋀us'. lt⋅(slength⋅us')⋅(slength⋅us) = TT ⟹ P us') ⟹ P us"
  shows "P us"
by (rule op_induct[where P="P ∘ prod.fst" and usvs="(us, vs)" for vs::unit, simplified])
   (fastforce intro: step)

lemma split_snoc_op:
  "split⋅pat⋅(xs :@ [:x:]) = op⋅pat⋅(split⋅pat⋅xs)⋅x"
proof(induct "split⋅pat⋅xs" arbitrary: xs rule: op_induct)
  case (step xs) show ?case
  proof(cases "pat = ⊥" "xs = ⊥" "x = ⊥" rule: bool.exhaust[case_product bool.exhaust bool.exhaust])
    case False_False_False
    obtain us vs where *: "split⋅pat⋅xs = (us, vs)" by fastforce
    from False_False_False * have **: "prefix⋅(us :@ [:x:])⋅pat = prefix⋅[:x:]⋅vs"
      using split_pattern same_prefix_prefix sappend_bottom_iff by blast
    from False_False_False * **
    have ***: "sdrop⋅(slength⋅(us :@ [:x:]))⋅pat = stail⋅vs" if "prefix⋅(us :@ [:x:])⋅pat = TT"
      using sdrop_sappend_same[where xs="us :@ [:x:]"] that
      by (cases vs; clarsimp) (fastforce dest!: split_pattern)
    from False_False_False * ** *** show ?thesis
      apply -
      apply (subst split_snoc)
      apply (subst split.unfold)
      apply (subst op.unfold)
      apply (fastforce simp: If2_def[symmetric] snull_FF_conv split: If2_splits intro: step split_length_lt)
      done
  qed simp_all
qed

lemma split_sfoldl_op:
  assumes "pat ≠ ⊥"
  shows "sfoldl⋅(op⋅pat)⋅([::], pat) = split⋅pat" (is "?lhs = ?rhs")
proof -
  have "?lhs⋅xs = ?rhs⋅xs" for xs
    using assms by (induct xs rule: srev_induct) (simp_all add: split_snoc_op)
  then show ?thesis by (simp add: cfun_eq_iff)
qed

lemma matches_op:
  shows "matches⋅pat = smap⋅cfst oo sfilter⋅(snull oo csnd oo csnd)
                                oo sscanl⋅(Λ (n, usvs) x. (n + 1, op⋅pat⋅usvs⋅x))⋅(0, ([::], pat))" (is "?lhs = ?rhs")
proof(cases "pat = ⊥")
  case True
  then have "?lhs⋅xs = ?rhs⋅xs" for xs by (cases xs; clarsimp)
  then show ?thesis by (simp add: cfun_eq_iff)
next
  case False then show ?thesis
    apply (subst (2) oo_assoc)
    apply (rule Bird_strategy)
    apply (simp_all add: endswith_split split_sfoldl_op oo_assoc)
    done
qed

text‹

Using @{thm [source] "split_sfoldl_op"} we can rewrite @{const ‹op›} into a more perspicuous form
that exhibits how KMP handles the failure of the text to continue matching the pattern:

›

fixrec
  op' :: "[:'a::Eq_def:] → [:'a:] × [:'a:] → 'a → [:'a:] × [:'a:]"
where
[simp del]:
  "op'⋅pat⋅(us, vs)⋅x =
     (     If prefix⋅[:x:]⋅vs then (us :@ [:x:], stail⋅vs) ― ‹ continue matching ›
      else If snull⋅us then ([::], pat) ― ‹ fail at the start of the pattern: discard ‹x› ›
      else sfoldl⋅(op'⋅pat)⋅([::], pat)⋅(stail⋅us :@ [:x:]) ― ‹ fail later: discard ‹shead⋅us› and determine where to restart ›
     )"

text‹

Intuitively if ‹x› continues the pattern match then we
extend the @{const ‹split›} of ‹pat›
recorded in ‹us› and ‹vs›.  Otherwise we
need to find a prefix of ‹pat› to continue matching
with. If we have yet to make any progress (i.e., ‹us =
[::]›) we restart with the entire ‹pat› (aka
‹z›) and discard ‹x›.  Otherwise, because a
match cannot begin with @{term ‹us :@ [:x:]›}, we @{const
‹split›} ‹pat› (aka ‹z›) by
iterating @{const ‹op'›} over @{term
‹stail⋅us :@ [:x:]›}.  The remainder of the
development is about memoising this last computation.

This is not yet the full KMP algorithm as it lacks what we call the
`K' optimisation, which we add in \S\ref{sec:KMP:data_refinement}.
Note that a termination proof for @{const "op'"} in HOL is tricky due
to its use of higher-order nested recursion via @{const
‹sfoldl›}.

›

lemma op'_strict[simp]:
  "op'⋅pat⋅⊥ = ⊥"
  "op'⋅pat⋅(us, ⊥) = ⊥"
  "op'⋅pat⋅usvs⋅⊥ = ⊥"
by fixrec_simp+

lemma sfoldl_op'_strict[simp]:
  "op'⋅pat⋅(sfoldl⋅(op'⋅pat)⋅(us, ⊥)⋅xs)⋅x = ⊥"
by (induct xs arbitrary: x rule: srev_induct) simp_all

lemma op'_op:
  shows "op'⋅pat⋅usvs⋅x = op⋅pat⋅usvs⋅x"
proof(cases "pat = ⊥" "x = ⊥" rule: bool.exhaust[case_product bool.exhaust])
  case True_False then show ?thesis
    apply (subst op'.unfold)
    apply (subst op.unfold)
    apply simp
    done
next
  case False_False then show ?thesis
  proof(induct usvs arbitrary: x rule: op_induct)
    case (step usvs x)
    have *: "sfoldl⋅(op'⋅pat)⋅([::], pat)⋅xs = sfoldl⋅(op⋅pat)⋅([::], pat)⋅xs"
         if "lt⋅(slength⋅xs)⋅(slength⋅(cfst⋅usvs)) = TT" for xs
    using that
    proof(induct xs rule: srev_induct)
      case (ssnoc x' xs')
      from ssnoc(1,2,4) have "lt⋅(slength⋅xs')⋅(slength⋅(cfst⋅usvs)) = TT"
        using lt_slength_0(2) lt_trans by auto
      moreover
      from step(2) ssnoc(1,2,4) have "lt⋅(slength⋅(cfst⋅(split⋅pat⋅xs')))⋅(slength⋅(cfst⋅usvs)) = TT"
        using lt_trans split_length_lt by (auto 10 0)
      ultimately show ?case by (simp add: ssnoc.hyps split_sfoldl_op split_snoc_op step)
    qed simp_all
    from step.prems show ?case
      apply (subst op'.unfold)
      apply (subst op.unfold)
      apply (clarsimp simp: If2_def[symmetric] snull_FF_conv split_sfoldl_op[symmetric] * split: If2_splits)
      apply (clarsimp simp: split_sfoldl_op step split_length_lt)
      done
  qed
qed simp_all


subsection‹ Step 2: Data refinement and the `K' optimisation \label{sec:KMP:data_refinement} ›

text‹

Bird memoises the restart computation in @{const ‹op'›} in two steps.  The first reifies
the control structure of @{const ‹op'›} into a non-wellfounded tree, which we discuss here. The
second increases the sharing in this tree; see \S\ref{sec:KMP:increase_sharing}.

Briefly, we cache the @{term ‹sfoldl⋅(op'⋅pat)⋅([::], pat)⋅(stail⋅us :@ [:x:])›}
computation in @{const ‹op'›} by finding a ``representation'' type @{typ "'t"}
for the ``abstract'' type @{typ ‹[:'a::Eq_def:] × [:'a:]›}, a
pair of functions @{term ‹rep :: [:'a::Eq_def:] × [:'a:] → 't›},
@{term ‹abs :: 't → [:'a::Eq_def:] × [:'a:]›} where @{prop ‹abs oo rep = ID›}, and then
finding a derived form of @{const ‹op'›} that works on @{typ "'t"} rather
than @{typ "[:'a::Eq_def:] × [:'a:]"}. We also take the opportunity to add the `K' optimisation in the form of the @{term ‹next›}
function.

As such steps are essentially @{emph ‹deus ex machina›}, we try to provide some intuition
after showing the new definitions.

›

domain 'a tree ―‹ Bird p130 ›
  = Null
  | Node (label :: 'a) (lazy left :: "'a tree") (lazy right :: "'a tree") ―‹ Strict in the label @{typ "'a"} ›

(*<*)

lemma tree_injects'[simp]: ―‹ An unconditional form of @{thm [source] tree.injects}. ›
  "(Node⋅a⋅l⋅r = Node⋅a'⋅l'⋅r') ⟷ (a = a' ∧ (a ≠ ⊥ ⟶ l = l' ∧ r = r'))"
by (cases "a = ⊥"; clarsimp)

lemma match_Null_match_Node_tree_case: "match_Null⋅t⋅k1 +++ match_Node⋅t⋅k2 = tree_case⋅k1⋅k2⋅t"
by (cases t) simp_all

lemma match_Node_mplus_match_Node: "match_Node⋅x⋅k1 +++ match_Node⋅x⋅k2 = match_Node⋅x⋅(Λ v l r. k1⋅v⋅l⋅r +++ k2⋅v⋅l⋅r)"
by (cases x; clarsimp)

lemma tree_case_distr:
  "f⋅⊥ = ⊥ ⟹ f⋅(tree_case⋅g⋅h⋅t) = tree_case⋅(f⋅g)⋅(Λ x l r. f⋅(h⋅x⋅l⋅r))⋅t"
  "(tree_case⋅g'⋅h'⋅t)⋅z = tree_case⋅(g'⋅z)⋅(Λ x l r. h'⋅x⋅l⋅r⋅z)⋅t"
by (case_tac [!] t) simp_all

lemma tree_case_cong:
  assumes "t = t'"
  assumes "t' = Null ⟹ n = n'"
  assumes "⋀v l r. ⟦t' = Node⋅v⋅l⋅r; v ≠ ⊥⟧ ⟹ c v l r = c' v l r"
  assumes "cont (λ(x, y, z). c x y z)"
  assumes "cont (λ(x, y, z). c' x y z)"
  shows "tree_case⋅n⋅(Λ v l r. c v l r)⋅t = tree_case⋅n'⋅(Λ v l r. c' v l r)⋅t'"
using assms by (cases t; cases t'; clarsimp simp: prod_cont_iff)

lemma tree_take_smaller:
  assumes "tree_take i⋅t = tree_take i⋅u"
  assumes "j ≤ i"
  shows "tree_take j⋅t = tree_take j⋅u"
using assms by (metis min.orderE tree.take_take)

fixrec tree_map' :: "('a → 'b) → 'a tree → 'b tree" where
  "tree_map'⋅f⋅Null = Null"
| "a ≠ ⊥ ⟹ tree_map'⋅f⋅(Node⋅a⋅l⋅r) = Node⋅(f⋅a)⋅(tree_map'⋅f⋅l)⋅(tree_map'⋅f⋅r)"

lemma tree_map'_strict[simp]: "tree_map'⋅f⋅⊥ = ⊥"
by fixrec_simp

lemma tree_map'_ID': "tree_map'⋅ID⋅xs = xs"
by (induct xs) simp_all

lemma tree_map'_ID[simp]: "tree_map'⋅ID = ID"
by (clarsimp simp: cfun_eq_iff tree_map'_ID')

lemma tree_map'_strict_scons[simp]:
  assumes "f⋅⊥ = ⊥"
  shows "tree_map'⋅f⋅(Node⋅a⋅l⋅r) = Node⋅(f⋅a)⋅(tree_map'⋅f⋅l)⋅(tree_map'⋅f⋅r)"
using assms by (cases "a = ⊥"; clarsimp)

lemma tree_map'_comp'[simp]:
  assumes "f⋅⊥ = ⊥"
  shows "tree_map'⋅f⋅(tree_map'⋅g⋅t) = tree_map'⋅(f oo g)⋅t"
using assms by (induct t) simp_all

lemma tree_map'_comp[simp]:
  assumes "f⋅⊥ = ⊥"
  shows "tree_map'⋅f oo tree_map'⋅g = tree_map'⋅(f oo g)"
using assms by (clarsimp simp: cfun_eq_iff)

lemma tree_unique: ―‹ Adapted from @{cite [cite_macro=citet] "Matthews:1999"} for \emph{contractive functions}. ›
  fixes x :: "'a tree"
  assumes xfx: "x = f⋅x"
  assumes f: "⋀i t u. tree_take i⋅t = tree_take i⋅u
                      ⟹ tree_take (Suc i)⋅(f⋅t) = tree_take (Suc i)⋅(f⋅u)"
  shows "x = fix⋅f"
proof(rule tree.take_lemma)
  fix i show "tree_take i⋅x = tree_take i⋅(fix⋅f)"
  proof(induct i)
    case (Suc i) from xfx f[OF Suc, folded fix_eq] show ?case by simp
  qed simp
qed

(*>*)

fixrec "next" :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree → ([:'a:] × [:'a:]) tree" where
  "next⋅[::]⋅t = t"
| "⟦x ≠ ⊥; xs ≠ ⊥⟧ ⟹
   next⋅(x :# xs)⋅Null = Null"
| "⟦x ≠ ⊥; xs ≠ ⊥⟧ ⟹
   next⋅(x :# xs)⋅(Node⋅(us, [::])⋅l⋅r) = Node⋅(us, [::])⋅l⋅r"
| "⟦v ≠ ⊥; vs ≠ ⊥; x ≠ ⊥; xs ≠ ⊥⟧ ⟹
   next⋅(x :# xs)⋅(Node⋅(us, v :# vs)⋅l⋅r) = If eq⋅x⋅v then l else Node⋅(us, v :# vs)⋅l⋅r"

fixrec ―‹ Bird p131 ``an even simpler form'', with the `K' optimisation ›
    root2  :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree"
and op2    :: "[:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree"
and rep2   :: "[:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree"
and left2  :: "[:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree"
and right2 :: "[:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree"
where
  [simp del]:
  "root2⋅pat = rep2⋅pat⋅([::], pat)"
| "op2⋅pat⋅Null⋅x = root2⋅pat"
| "usvs ≠ ⊥ ⟹
   op2⋅pat⋅(Node⋅usvs⋅l⋅r)⋅x = If prefix⋅[:x:]⋅(csnd⋅usvs) then r else op2⋅pat⋅l⋅x"
| [simp del]:
  "rep2⋅pat⋅usvs = Node⋅usvs⋅(left2⋅pat⋅usvs)⋅(right2⋅pat⋅usvs)"
| "left2⋅pat⋅([::], vs) = next⋅vs⋅Null"
| "⟦u ≠ ⊥; us ≠ ⊥⟧ ⟹
   left2⋅pat⋅(u :# us, vs) = next⋅vs⋅(sfoldl⋅(op2⋅pat)⋅(root2⋅pat)⋅us)" ―‹ Note the use of @{term ‹op2›} and @{const ‹next›}. ›
| "right2⋅pat⋅(us, [::]) = Null" ―‹ Unreachable ›
| "⟦v ≠ ⊥; vs ≠ ⊥⟧ ⟹
   right2⋅pat⋅(us, v :# vs) = rep2⋅pat⋅(us :@ [:v:], vs)"

fixrec abs2 :: "([:'a:] × [:'a:]) tree → [:'a:] × [:'a:]" where
 "usvs ≠ ⊥ ⟹ abs2⋅(Node⋅usvs⋅l⋅r) = usvs"

fixrec matches2 :: "[:'a::Eq_def:] → [:'a:] → [:Integer:]" where
[simp del]: "matches2⋅pat = smap⋅cfst oo sfilter⋅(snull oo csnd oo abs2 oo csnd)
                                     oo sscanl⋅(Λ (n, x) y. (n + 1, op2⋅pat⋅x⋅y))⋅(0, root2⋅pat)"

text‹

\begin{figure}
  \centering
  \begin{tikzpicture}[
    shorten >=1pt,
    node distance=1.5cm,
    on grid,
    auto,
    initial text=,
    thick,
    accepting/.style = {rectangle,minimum size=0.3cm}
    ]
    \node[state,accepting] (q_0i)                     {};
    \node[state,initial]   (q_0)  [right=of q_0i]     {$q_0$};
    \node[state]           (q_1)  [right=of q_0]      {$q_1$};
    \node[state]           (q_2)  [right=of q_1]      {$q_2$};
    \node[state]           (q_3)  [right=of q_2]      {$q_3$};
    \node[state]           (q_4)  [right=of q_3]      {$q_4$};
    \node[state,double]    (q_5)  [right=of q_4]      {$q_5$};
    \node[state,accepting] (q_5r) [right=of q_5]      {};

    \path[->] (q_0) edge [bend left] node [above]       {0} (q_1)
              (q_1) edge [bend left] node [above]       {1} (q_2)
              (q_2) edge [bend left] node [above]       {0} (q_3)
              (q_3) edge [bend left] node [above]       {0} (q_4)
              (q_4) edge [bend left] node [above]       {1} (q_5)
              (q_5) edge [bend left] node [above]       {*} (q_5r);

    \path[->] (q_0) edge [bend right]               (q_0i)
              (q_1) edge [bend left]                (q_0)
              (q_2) edge [bend left]                (q_0)  % MP
              (q_2) edge [bend left,color=red]      (q_0i) % K opt
              (q_3) edge [bend left]                (q_1)
              (q_4) edge [bend left]                (q_1)  % MP
              (q_4) edge [bend left,color=red]      (q_0)  % K opt
              (q_5) edge [bend left]                (q_2);
  \end{tikzpicture}
  \caption{An example from \<^citet>‹‹\S2.1› in "CrochemoreRytter:2002"›. The MP tree for the
    pattern $01001$ is drawn in black: right transitions are labelled
    with a symbol, whereas left transitions are unlabelled. The two
    `K'-optimised left transitions are shown in red. The boxes denote
    @{const ‹Null›}. The root node is $q_0$.}
  \label{fig:example_tree}
\end{figure}

This tree can be interpreted as a sort of automaton\footnote{\<^citet>‹‹\S3.1› in "Bird:2012"› suggests it can
be thought of as a doubly-linked list, following \<^citet>‹"TakeichiAkama:1990"›.)}, where @{const
‹op2›} goes @{const ‹right›} if the pattern
continues with the next element of the text, and @{const
‹left›} otherwise, to determine how much of a prefix of
the pattern could still be in play.  Figure~\ref{fig:example_tree}
visualises such an automaton for the pattern $01001$, used by \<^citet>‹‹\S2.1› in "CrochemoreRytter:2002"› to
illustrate the difference between Morris-Pratt (MP) and
Knuth-Morris-Pratt (KMP) preprocessing as we discuss below.  Note that
these are not the classical Mealy machines that correspond to regular
expressions, where all outgoing transitions are labelled with symbols.

The following lemma shows how our sample automaton is encoded as a non-wellfounded tree.

›

lemma concrete_tree_KMP:
  shows "root2⋅[:0::Integer, 1, 0, 0, 1:]
      = (μ q0. Node⋅([::], [:0, 1, 0, 0, 1:])
                     ⋅Null
                     ⋅(μ q1. Node⋅([:0:], [:1, 0, 0, 1:])
                          ⋅q0
                          ⋅(μ q2. Node⋅([:0,1:], [:0, 0, 1:])
                               ⋅Null ―‹ K optimisation: MP ‹q0› ›
                               ⋅(Node⋅([:0,1,0:], [:0, 1:])
                                     ⋅q1
                                     ⋅(Node⋅([:0,1,0,0:], [:1:])
                                          ⋅q0 ―‹ K optimisation: MP ‹q1› ›
                                          ⋅(Node⋅([:0,1,0,0,1:], [::])⋅q2⋅Null))))))"
(is "?lhs = fix⋅?F")
proof(rule tree_unique) ✐‹tag invisible›
  note rep2.simps[simp]
  show "?lhs = ?F⋅?lhs"
    apply (subst root2.unfold; simp)
    apply (rule tree_unique; simp)
     apply (intro conjI)
      apply (subst (1) root2.unfold; simp)
      apply (subst (1) root2.unfold; fastforce)
     apply (rule tree_unique; simp)
      apply (intro conjI)
         apply (subst (1) root2.unfold; simp)
         apply (subst (1) root2.unfold; simp)
        apply (subst (1) root2.unfold; simp)
        apply (subst (1) root2.unfold; fastforce)
       apply (subst (1) root2.unfold; simp)
       apply (subst (1) root2.unfold; simp)
       apply (subst (1) root2.unfold; simp)
       apply (subst (1 2) root2.unfold; fastforce)
      apply (subst (1) root2.unfold; simp)
      apply (subst (1) root2.unfold; simp)
      apply (subst (1) root2.unfold; fastforce)
     apply (rename_tac i t u; case_tac i; clarsimp)
     apply (rename_tac t u i; case_tac i; clarsimp)
     apply (rename_tac t u i; case_tac i; clarsimp)
     apply (meson Suc_n_not_le_n linear tree_take_smaller)
    apply (rule parallel_fix_ind; simp)
    apply (rename_tac i t u x y; case_tac i; clarsimp)
    apply (rename_tac i; case_tac i; clarsimp; intro conjI)
     apply (meson Suc_n_not_le_n linear tree_take_smaller)
    apply (rename_tac i; case_tac i; clarsimp)
    apply (rename_tac i; case_tac i; clarsimp)
    apply (meson Suc_n_not_le_n linear tree_take_smaller)
    done
  fix i :: nat
  fix t u :: "([:Integer:] × [:Integer:]) tree"
  assume "tree_take i⋅t = tree_take i⋅u"
  then show "tree_take (Suc i)⋅(?F⋅t) = tree_take (Suc i)⋅(?F⋅u)"
    apply simp
    apply (rule parallel_fix_ind; simp)
    apply (case_tac i; clarsimp; intro conjI)
     apply (meson Suc_n_not_le_n linear tree_take_smaller)
    apply (rule parallel_fix_ind; simp)
    apply (rename_tac j t0 t1; case_tac j; clarsimp)
    apply (rename_tac j; case_tac j; clarsimp; intro conjI)
     apply (meson Suc_n_not_le_n linear tree_take_smaller)
    apply (rename_tac j; case_tac j; clarsimp; intro conjI)
     apply (meson Suc_n_not_le_n linear tree_take_smaller)
    apply (rename_tac j; case_tac j; clarsimp)
    apply (meson Suc_n_not_le_n linear tree_take_smaller)
    done
qed

text‹

The sharing that we expect from a lazy (call-by-need) evaluator is here implied by the use of
nested fixed points.

The KMP preprocessor is expressed by the @{const ‹left2›} function, where @{const ‹op2›} is used
to match the pattern against itself; the use of @{const ‹op2›} in @{const ‹matches2›} (``the driver'')
is responsible for matching the (preprocessed) pattern against the text. This formally cashes in
an observation by \<^citet>‹‹\S5› in "vanderWoude:1989"›, that these two algorithms
are essentially the same, which has eluded other presentations\footnote{For instance, contrast
our shared use of @{const ‹op2›} with the separated \texttt{match}
and \texttt{rematch} functions of \<^citet>‹‹Figure~1› in "AgerDanvyRohde:2006"›.}.

Bird uses @{const ‹Null›} on a left path to signal to the driver that it should discard the
current element of the text and restart matching from the beginning of the pattern (i.e,
@{const ‹root2›}). This is a step towards the removal of @{term ‹us›} in \S\ref{sec:KMP:step8}.

Note that the @{const ‹Null›} at the end of the rightmost path is unreachable: the rightmost
@{const ‹Node›} has @{term "vs = [::]"} and therefore @{const ‹op2›} always takes the left branch.

The `K' optimisation is perhaps best understood by example. Consider
the automaton in Figure~\ref{fig:example_tree}, and a text beginning
with \texttt{011}. Using the MP (black) transitions we take the path
$\rightarrow q_0 \stackrel{{\mathtt{0}}}{\rightarrow} q_1
\stackrel{\mathtt{1}}{\rightarrow} \overbrace{q_2 \rightarrow q_0
\rightarrow \Box}$. Now, due to the failure of the comparison of the
current element of the text (\texttt{1}) at $q_2$, we can predict that
the (identical) comparison at node $q_0$ will fail as well, and
therefore have $q_2$ left-branch directly to $\Box$. This saves a
comparison in the driver at the cost of another in the preprocessor
(in @{const ‹next›}). These optimisations are the red
arrows in the diagram, and can in general save an arbitrary number of
driver comparisons; consider the pattern $\mathtt{1}^n$ for instance.

More formally, @{const ‹next›} ensures that the heads of
the suffixes of the pattern (@{term ‹vs›}) on consecutive
labels on left paths are distinct; see below for a proof of this fact
in our setting, and \<^citet>‹‹\S3.3.4› in
"Gusfield:1997"› for a classical account. Unlike Bird's suggestion
(p134), our @{const ‹next›} function is not recursive.

We note in passing that while MP only allows ‹Null› on
the left of the root node, ‹Null› can be on the left of
any KMP node except for the rightmost
(i.e., the one that signals a complete pattern match) where no optimisation is possible.

We proceed with the formalities of the data refinement.

›

schematic_goal root2_op2_rep2_left2_right2_def: ― ‹ Obtain the definition of these functions as a single fixed point ›
  "( root2  :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
   , op2    :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
   , rep2   :: [:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree
   , left2  :: [:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree
   , right2 :: [:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
   = fix⋅?F"
unfolding op2_def root2_def rep2_def left2_def right2_def by simp

lemma abs2_strict[simp]:
  "abs2⋅⊥ = ⊥"
  "abs2⋅Null = ⊥"
by fixrec_simp+

lemma next_strict[simp]:
  "next⋅⊥ = ⊥"
  "next⋅xs⋅⊥ = ⊥"
  "next⋅(x :# xs)⋅(Node⋅(us, ⊥)⋅l⋅r) = ⊥"
  apply fixrec_simp
 apply (cases xs; fixrec_simp; simp)
apply (cases "x = ⊥"; cases "xs = ⊥"; cases "us = ⊥"; fixrec_simp)
done

lemma next_Null[simp]:
  assumes "xs ≠ ⊥"
shows "next⋅xs⋅Null = Null"
using assms by (cases xs) simp_all

lemma next_snil[simp]:
  assumes "xs ≠ ⊥"
  shows "next⋅xs⋅(Node⋅(us, [::])⋅l⋅r) = Node⋅(us, [::])⋅l⋅r"
using assms by (cases xs) simp_all

lemma op2_rep2_left2_right2_strict[simp]:
  "op2⋅pat⋅⊥ = ⊥"
  "op2⋅pat⋅(Node⋅(us, ⊥)⋅l⋅r) = ⊥"
  "op2⋅pat⋅(Node⋅usvs⋅l⋅r)⋅⊥ = ⊥"
  "rep2⋅pat⋅⊥ = ⊥"
  "left2⋅pat⋅(⊥, vs) = ⊥"
  "left2⋅pat⋅(us, ⊥) = ⊥"
  "right2⋅pat⋅(us, ⊥) = ⊥"
      apply fixrec_simp
     apply (cases "us = ⊥"; fixrec_simp; simp)
    apply (cases "usvs = ⊥"; fixrec_simp; simp)
   apply fixrec_simp
  apply fixrec_simp
 apply (cases us; fixrec_simp)
apply fixrec_simp
done

lemma snd_abs_root2_bottom[simp]: "prod.snd (abs2⋅(root2⋅⊥)) = ⊥"
by (simp add: root2.unfold rep2.unfold)

lemma abs_rep2_ID'[simp]: "abs2⋅(rep2⋅pat⋅usvs) = usvs"
by (cases "usvs = ⊥"; subst rep2.unfold; clarsimp)

lemma abs_rep2_ID: "abs2 oo rep2⋅pat = ID"
by (clarsimp simp: cfun_eq_iff)

lemma rep2_snoc_right2: ―‹ Bird p131 ›
  assumes "prefix⋅[:x:]⋅vs = TT"
  shows "rep2⋅pat⋅(us :@ [:x:], stail⋅vs) = right2⋅pat⋅(us, vs)"
using assms by (cases "x = ⊥"; cases vs; clarsimp)

lemma not_prefix_op2_next:
  assumes "prefix⋅[:x:]⋅xs = FF"
  shows "op2⋅pat⋅(next⋅xs⋅(rep2⋅pat⋅usvs))⋅x = op2⋅pat⋅(rep2⋅pat⋅usvs)⋅x"
proof -
  obtain us vs where "usvs = (us, vs)" by force
  with assms show ?thesis
    by (cases xs; cases us; clarsimp; cases vs;
        clarsimp simp: rep2.simps prefix_singleton_FF If2_def[symmetric] split: If2_splits)
qed

text‹

Bird's appeal to ‹foldl_fusion› (p130) is too weak to
justify this data refinement as his condition (iii) requires the
worker functions to coincide on all representation values. Concretely
he asks that:

\begin{center}
  @{prop "rep2⋅pat⋅(op⋅pat⋅(abs2⋅t)⋅x) = op2⋅pat⋅t⋅x"} ―‹Bird (17.2)›
\end{center}

where ‹t› is an arbitrary tree. This does not hold for junk representations
such as:

\begin{center}
  @{term ‹t = Node⋅(pat, [::])⋅Null⋅Null›}
\end{center}

Using worker/wrapper fusion \<^citep>‹"GillHutton:2009" and "Gammie:2011"› specialised to @{const
‹sscanl›} (@{thm [source] "sscanl_ww_fusion"}) we only
need to establish this identity for valid representations, i.e., when
‹t› lies under the image of ‹rep2›. In
pictures, we show that this diagram commutes:

\begin{center}
  \begin{tikzcd}[column sep=8em]
    usvs \arrow[r, "‹Λ usvs. op⋅pat⋅usvs⋅x›"] \arrow[d, "‹rep2⋅pat›"] & usvs' \arrow[d, "‹rep2⋅pat›"] \\
    t \arrow[r, "‹Λ usvs. op2⋅pat⋅usvs⋅x ›"]                         & t'
  \end{tikzcd}
\end{center}

Clearly this result self-composes: after an initial @{term
‹rep2⋅pat›} step, we can repeatedly simulate
@{const ‹op›} steps with @{const ‹op2›} steps.

›

lemma op_op2_refinement:
  assumes "pat ≠ ⊥"
  shows "rep2⋅pat⋅(op⋅pat⋅usvs⋅x) = op2⋅pat⋅(rep2⋅pat⋅usvs)⋅x"
proof(cases "x = ⊥" "usvs = ⊥" rule: bool.exhaust[case_product bool.exhaust])
  case False_False
  then have "x ≠ ⊥" "usvs ≠ ⊥" by simp_all
  then show ?thesis
  proof(induct usvs arbitrary: x rule: op_induct)
    case (step usvs)
    obtain us vs where usvs: "usvs = (us, vs)" by fastforce
    have *: "sfoldl⋅(op2⋅pat)⋅(root2⋅pat)⋅xs = rep2⋅pat⋅(split⋅pat⋅xs)" if "lt⋅(slength⋅xs)⋅(slength⋅us) = TT" for xs
    using that
    proof(induct xs rule: srev_induct)
      case (ssnoc x xs)
      from ssnoc(1,2,4) have IH: "sfoldl⋅(op2⋅pat)⋅(root2⋅pat)⋅xs = rep2⋅pat⋅(split⋅pat⋅xs)"
        by - (rule ssnoc; auto intro: lt_trans dest: lt_slength_0)
      obtain us' vs' where us'vs': "split⋅pat⋅xs = (us', vs')" by fastforce
      from ‹pat ≠ ⊥› ssnoc(1,2,4) usvs show ?case
        apply (clarsimp simp: split_sfoldl_op[symmetric] IH)
        apply (rule step(1)[simplified abs_rep2_ID', simplified, symmetric])
        using lt_trans split_length_lt split_sfoldl_op apply fastforce+
        done
    qed (fastforce simp: ‹pat ≠ ⊥› root2.unfold)+
    have **: "If snull⋅us then rep2⋅pat⋅([::], pat) else rep2⋅pat⋅(op⋅pat⋅(split⋅pat⋅(stail⋅us))⋅x)
            = op2⋅pat⋅(left2⋅pat⋅(us, vs))⋅x" if "prefix⋅[:x:]⋅vs = FF"
    proof(cases us)
      case snil with that show ?thesis
        by simp (metis next_Null op2.simps(1) prefix.simps(1) prefix_FF_not_snilD root2.simps)
    next
      case (scons u' us')
      from ‹pat ≠ ⊥› scons have "lt⋅(slength⋅(cfst⋅(split⋅pat⋅us')))⋅(slength⋅us) = TT"
        using split_length_lt by fastforce
      from ‹pat ≠ ⊥› ‹x ≠ ⊥› usvs that scons this show ?thesis
        by (clarsimp simp: * step(1)[simplified abs_rep2_ID'] not_prefix_op2_next)
    qed simp
    from ‹usvs ≠ ⊥› usvs show ?case
      apply (subst (2) rep2.unfold)
      apply (subst op2.unfold)
      apply (subst op.unfold)
      apply (clarsimp simp: If_distr rep2_snoc_right2 ** cong: If_cong)
      done
  qed
qed (simp_all add: rep2.unfold)

text‹

Therefore the result of this data refinement is extensionally equal to
the specification:

›

lemma data_refinement:
  shows "matches = matches2"
proof(intro cfun_eqI)
  fix pat xs :: "[:'a:]" show "matches⋅pat⋅xs = matches2⋅pat⋅xs"
  proof(cases "pat = ⊥")
    case True then show ?thesis by (cases xs; clarsimp simp: matches2.simps)
  next
    case False then show ?thesis
      unfolding matches2.simps
      apply (subst matches_op) ―‹ Continue with previous derivation. ›
      apply (subst sscanl_ww_fusion[where wrap="ID ** abs2" and unwrap="ID ** rep2⋅pat" and f'="Λ (n, x) y. (n + 1, op2⋅pat⋅x⋅y)"])
        apply (simp add: abs_rep2_ID)
       apply (simp add: op_op2_refinement)
      apply (simp add: oo_assoc sfilter_smap root2.unfold)
      apply (simp add: oo_assoc[symmetric])
      done
  qed
qed

text‹

This computation can be thought of as a pair coroutines with a
producer (@{const ‹root2›}/@{const ‹rep2›})
/ consumer (@{const ‹op2›}) structure. It turns out that
laziness is not essential (see \S\ref{sec:implementations}), though it
does depend on being able to traverse incompletely defined trees.

The key difficulty in defining this computation in HOL using present
technology is that @{const ‹op2›} is neither terminating
nor @{emph ‹friendly›} in the terminology of \<^citet>‹"BlanchetteEtAl:2017"›.

While this representation works for automata with this sort of
structure, it is unclear how general it is; in particular it may not
work so well if @{const ‹left›} branches can go forward
as well as back. See also the commentary in \<^citet>‹"HinzeJeuring:2001"›, who observe that sharing is easily lost, and so
it is probably only useful in ``closed'' settings like the present
one, unless the language is extended in unusual ways \<^citep>‹"JeanninEtAl:2017"›.

\label{thm:k_property}

We conclude by proving that @{const ‹rep2›} produces
trees that have the `K' property, viz that labels on consecutive nodes
on a left path do not start with the same symbol. This also
establishes the productivity of @{const ‹root2›}. The
pattern of proof used here -- induction nested in coinduction --
recurs in \S\ref{sec:KMP:increase_sharing}.

›

coinductive K :: "([:'a::Eq:] × [:'a:]) tree ⇒ bool" where
  "K Null"
| "⟦ usvs ≠ ⊥; K l; K r;
     ⋀v vs. csnd⋅usvs = v :# vs ⟹ l = Null ∨ (∃v' vs'. csnd⋅(label⋅l) = v' :# vs' ∧ eq⋅v⋅v' = FF)
   ⟧ ⟹ K (Node⋅usvs⋅l⋅r)"

declare K.intros[intro!, simp]

lemma sfoldl_op2_root2_rep2_split:
  assumes "pat ≠ ⊥"
  shows "sfoldl⋅(op2⋅pat)⋅(root2⋅pat)⋅xs = rep2⋅pat⋅(split⋅pat⋅xs)"
proof(induct xs rule: srev_induct)
  case (ssnoc x xs) with ‹pat ≠ ⊥› ssnoc show ?case by (clarsimp simp: split_sfoldl_op[symmetric] op_op2_refinement)
qed (simp_all add: ‹pat ≠ ⊥› root2.unfold)

lemma K_rep2:
  assumes "pat ≠ ⊥"
  assumes "us :@ vs = pat"
  shows "K (rep2⋅pat⋅(us, vs))"
using assms
proof(coinduction arbitrary: us vs)
  case (K us vs) then show ?case
  proof(induct us arbitrary: vs rule: op_induct')
    case (step us)
    from step.prems have "us ≠ ⊥" "vs ≠ ⊥" by auto
    show ?case
    proof(cases us)
      case bottom with ‹us ≠ ⊥› show ?thesis by simp
    next
      case snil with step.prems show ?thesis by (cases vs; force simp: rep2.simps)
    next
      case (scons u' us')
      from ‹pat ≠ ⊥› scons ‹us ≠ ⊥› ‹vs ≠ ⊥›
      obtain usl vsl where splitl: "split⋅pat⋅us' = (usl, vsl)" "usl ≠ ⊥" "vsl ≠ ⊥" "usl :@ vsl = pat"
        by (metis (no_types, opaque_lifting) Rep_cfun_strict1 prod.collapse sappend_strict sappend_strict2 split_pattern)
      from scons obtain l r where r: "rep2⋅pat⋅(us, vs) = Node⋅(us, vs)⋅l⋅r" by (simp add: rep2.simps)
      moreover
      have "(∃us vs. l = rep2⋅pat⋅(us, vs) ∧ us :@ vs = pat) ∨ K l"
      proof(cases vs)
        case snil with scons splitl r show ?thesis
          by (clarsimp simp: rep2.simps sfoldl_op2_root2_rep2_split)
      next
        case scons
        with ‹pat ≠ ⊥› ‹us = u' :# us'› ‹u' ≠ ⊥› ‹us' ≠ ⊥› ‹vs ≠ ⊥› r splitl show ?thesis
          apply (clarsimp simp: rep2.simps sfoldl_op2_root2_rep2_split)
          apply (cases vsl; cases usl; clarsimp simp: If2_def[symmetric] sfoldl_op2_root2_rep2_split split: If2_splits)
          apply (rename_tac ul' usl')
          apply (cut_tac us'="prod.fst (split⋅pat⋅usl')" and vs="prod.snd (split⋅pat⋅usl')" in step(1); clarsimp simp: split_pattern)
           apply (metis fst_conv lt_trans slength.simps(2) split_length_lt step.prems(1))
          apply (erule disjE; clarsimp simp: sfoldl_op2_root2_rep2_split)
          apply (rename_tac b l r)
          apply (case_tac b; clarsimp simp: rep2.simps)
           apply (auto simp: If2_def[symmetric] rep2.simps dest: split_pattern[rotated] split: If2_splits)
          done
      qed (simp add: ‹vs ≠ ⊥›)
      moreover
      from ‹us :@ vs = pat› ‹us ≠ ⊥› ‹vs ≠ ⊥› r
      have "(∃usr vsr. r = rep2⋅pat⋅(usr, vsr) ∧ usr :@ vsr = pat) ∨ K r"
        by (cases vs; clarsimp simp: rep2.simps)
      moreover
      have "l = Null ∨ (∃v' vs'. csnd⋅(label⋅l) = v' :# vs' ∧ eq⋅v⋅v' = FF)" if "vs = v :# vs'" for v vs'
      proof(cases vsl)
        case snil with ‹us :@ vs = pat› ‹us = u' :# us'› splitl show ?thesis
          using split_length_lt[where pat=pat and xs=us']
          by (force elim: slengthE simp: one_Integer_def split: if_splits)
      next
        case scons
        from splitl have "lt⋅(slength⋅usl)⋅(slength⋅us' + 1) = TT"
          by (metis fst_conv fst_strict split_bottom_iff split_length_lt)
        with scons ‹pat ≠ ⊥› ‹us = u' :# us'› ‹u' ≠ ⊥› ‹us' ≠ ⊥› ‹vs ≠ ⊥› r splitl ‹vs = v :# vs'› show ?thesis
          using step(1)[OF _ ‹pat ≠ ⊥›, where us'="prod.fst (split⋅pat⋅us')" and vs="prod.snd (split⋅pat⋅us')"]
          by (clarsimp simp: rep2.simps sfoldl_op2_root2_rep2_split If2_def[symmetric] split: If2_splits)
      qed (simp add: ‹vsl ≠ ⊥›)
      moreover note ‹pat ≠ ⊥› ‹us ≠ ⊥› ‹vs ≠ ⊥›
      ultimately show ?thesis by auto
    qed
  qed
qed

theorem K_root2:
  assumes "pat ≠ ⊥"
  shows "K (root2⋅pat)"
using assms unfolding root2.unfold by (simp add: K_rep2)

text‹

The remaining steps are as follows:

▪ 3. introduce an accumulating parameter (‹grep›).
▪ 4. inline ‹rep› and simplify.
▪ 5. simplify to Bird's ``simpler forms.''
▪ 6. memoise ‹left›.
▪ 7. simplify, unfold ‹prefix›.
▪ 8. discard ‹us›.
▪ 9. factor out ‹pat›.

›


subsection‹ Step 3: Introduce an accumulating parameter (grep) ›

text‹

Next we prepare for the second memoization step (\S\ref{sec:KMP:increase_sharing})
by introducing an accumulating parameter to @{const ‹rep2›} that supplies the value of the left
subtree.

We retain @{const ‹rep2›} as a wrapper for now, and inline @{const ‹right2›} to speed up
simplification.

›

fixrec ―‹ Bird p131 / p132 ›
    root3  :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree"
and op3    :: "[:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree"
and rep3   :: "[:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree"
and grep3  :: "[:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree"
where
  [simp del]:
  "root3⋅pat = rep3⋅pat⋅([::], pat)"
| "op3⋅pat⋅Null⋅x = root3⋅pat"
| "usvs ≠ ⊥ ⟹
   op3⋅pat⋅(Node⋅usvs⋅l⋅r)⋅x = If prefix⋅[:x:]⋅(csnd⋅usvs) then r else op3⋅pat⋅l⋅x"
| [simp del]: ―‹ Inline @{const ‹left2›}, factor out @{const ‹next›}. ›
  "rep3⋅pat⋅usvs = grep3⋅pat⋅(case cfst⋅usvs of [::] ⇒ Null | u :# us ⇒ sfoldl⋅(op3⋅pat)⋅(root3⋅pat)⋅us)⋅usvs"
| [simp del]:  ―‹ @{const ‹rep2›} with @{const ‹left2›} abstracted, @{const ‹right2›} inlined. ›
  "grep3⋅pat⋅l⋅usvs = Node⋅usvs⋅(next⋅(csnd⋅usvs)⋅l)⋅(case csnd⋅usvs of
          [::] ⇒ Null
        | v :# vs ⇒ rep3⋅pat⋅(cfst⋅usvs :@ [:v:], vs))"

schematic_goal root3_op3_rep3_grep3_def:
  "( root3  :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
   , op3    :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
   , rep3   :: [:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree
   , grep3  :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
   = fix⋅?F"
unfolding root3_def op3_def rep3_def grep3_def by simp

lemma r3_2:
  "(Λ (root, op, rep, grep). (root, op, rep))⋅
   ( root3  :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
   , op3    :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
   , rep3   :: [:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree
   , grep3  :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
 = (Λ (root, op, rep, left, right). (root, op, rep))⋅
   ( root2  :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
   , op2    :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
   , rep2   :: [:'a::Eq_def:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree
   , left2  :: [:'a::Eq_def:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree
   , right2 :: [:'a::Eq_def:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )"
unfolding root2_op2_rep2_left2_right2_def root3_op3_rep3_grep3_def
apply (simp add: match_snil_match_scons_slist_case match_Null_match_Node_tree_case slist_case_distr tree_case_distr)
apply (simp add: fix_cprod fix_const) ―‹ Very slow. Sensitive to tuple order due to the asymmetry of ‹fix_cprod›. ›
apply (simp add: slist_case_distr)
done


subsection‹ Step 4: Inline rep ›

text‹

We further simplify by inlining @{const ‹rep3›} into @{const ‹root3›} and @{const ‹grep3›}.

›

fixrec
    root4  :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree"
and op4    :: "[:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree"
and grep4  :: "[:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree"
where
  [simp del]:
  "root4⋅pat = grep4⋅pat⋅Null⋅([::], pat)"
| "op4⋅pat⋅Null⋅x = root4⋅pat"
| "usvs ≠ ⊥ ⟹
   op4⋅pat⋅(Node⋅usvs⋅l⋅r)⋅x = If prefix⋅[:x:]⋅(csnd⋅usvs) then r else op4⋅pat⋅l⋅x"
| [simp del]:
  "grep4⋅pat⋅l⋅usvs = Node⋅usvs⋅(next⋅(csnd⋅usvs)⋅l)⋅(case csnd⋅usvs of
          [::] ⇒ Null
        | v :# vs ⇒ grep4⋅pat⋅(case cfst⋅usvs :@ [:v:] of
              [::] ⇒ Null ―‹ unreachable ›
            | u :# us ⇒ sfoldl⋅(op4⋅pat)⋅(root4⋅pat)⋅us)⋅(cfst⋅usvs :@ [:v:], vs))"

schematic_goal root4_op4_grep4_def:
  "( root4  :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
   , op4    :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
   , grep4  :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
   = fix⋅?F"
unfolding root4_def op4_def grep4_def by simp

lemma fix_syn4_permute:
  assumes "cont (λ(X1, X2, X3, X4). F1 X1 X2 X3 X4)"
  assumes "cont (λ(X1, X2, X3, X4). F2 X1 X2 X3 X4)"
  assumes "cont (λ(X1, X2, X3, X4). F3 X1 X2 X3 X4)"
  assumes "cont (λ(X1, X2, X3, X4). F4 X1 X2 X3 X4)"
  shows "fix_syn (λ(X1, X2, X3, X4). (F1 X1 X2 X3 X4, F2 X1 X2 X3 X4, F3 X1 X2 X3 X4, F4 X1 X2 X3 X4))
      = (λ(x1, x2, x4, x3). (x1, x2, x3, x4))
            (fix_syn (λ(X1, X2, X4, X3). (F1 X1 X2 X3 X4, F2 X1 X2 X3 X4, F4 X1 X2 X3 X4, F3 X1 X2 X3 X4)))"
by (induct rule: parallel_fix_ind) (use assms in ‹auto simp: prod_cont_iff›)

lemma r4_3:
  "( root4  :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
   , op4    :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
   , grep4  :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
 = (Λ (root, op, rep, grep). (root, op, grep))⋅
   ( root3  :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
   , op3    :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
   , rep3   :: [:'a:] → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree
   , grep3  :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )"
unfolding root3_op3_rep3_grep3_def root4_op4_grep4_def
apply (clarsimp simp: slist_case_distr match_Null_match_Node_tree_case tree_case_distr eta_cfun)
apply (subst fix_syn4_permute; clarsimp simp: fix_cprod fix_const) ―‹ Slow ›
done


subsection‹ Step 5: Simplify to Bird's ``simpler forms'' ›

text‹

The remainder of @{const ‹left2›} in @{const ‹grep4›} can be simplified by transforming the
@{text "case"} scrutinee from @{term "cfst⋅usvs :@ [:v:]"} into @{term "cfst⋅usvs"}.

›

fixrec
    root5  :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree"
and op5    :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree"
and grep5  :: "[:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree"
where
  [simp del]:
  "root5⋅pat = grep5⋅pat⋅Null⋅([::], pat)"
| "op5⋅pat⋅Null⋅x = root5⋅pat"
| "usvs ≠ ⊥ ⟹
   op5⋅pat⋅(Node⋅usvs⋅l⋅r)⋅x = If prefix⋅[:x:]⋅(csnd⋅usvs) then r else op5⋅pat⋅l⋅x"
| [simp del]:
  "grep5⋅pat⋅l⋅usvs = Node⋅usvs⋅(next⋅(csnd⋅usvs)⋅l)⋅(case csnd⋅usvs of
          [::] ⇒ Null
        | v :# vs ⇒ grep5⋅pat⋅(case cfst⋅usvs of ― ‹ was @{term ‹cfst⋅usvs :@ [:v:]›} ›
              [::] ⇒ root5⋅pat
            | u :# us ⇒ sfoldl⋅(op5⋅pat)⋅(root5⋅pat)⋅(us :@ [:v:]))⋅(cfst⋅usvs :@ [:v:], vs))"

schematic_goal root5_op5_grep5_def:
  "( root5  :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
   , op5    :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
   , grep5  :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
   = fix⋅?F"
unfolding root5_def op5_def grep5_def by simp

lemma op5_grep5_strict[simp]:
  "op5⋅pat⋅⊥ = ⊥"
  "op5⋅pat⋅(Node⋅(us, ⊥)⋅l⋅r) = ⊥"
  "op5⋅pat⋅(Node⋅usvs⋅l⋅r)⋅⊥ = ⊥"
  "grep5⋅pat⋅l⋅⊥ = ⊥"
   apply fixrec_simp
  apply (cases "us = ⊥"; fixrec_simp; simp)
 apply (cases "usvs = ⊥"; fixrec_simp; simp)
apply fixrec_simp
done

lemma r5_4:
  "( root5  :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
   , op5    :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
   , grep5  :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
 = ( root4  :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
   , op4    :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
   , grep4  :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )"
unfolding root4_op4_grep4_def root5_op5_grep5_def
by (clarsimp simp: slist_case_distr slist_case_snoc stail_sappend cong: slist_case_cong)


subsection‹ Step 6: Memoize left \label{sec:KMP:increase_sharing} ›

text‹

The last substantial step is to memoise the computation of the left subtrees by tying the knot.

Note this makes the computation of ‹us› in the tree redundant; we remove it in \S\ref{sec:KMP:step8}.

›

fixrec ―‹ Bird p132 ›
    root6  :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree"
and op6    :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree"
and grep6  :: "[:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree"
where
  [simp del]:
  "root6⋅pat = grep6⋅pat⋅Null⋅([::], pat)"
| "op6⋅pat⋅Null⋅x = root6⋅pat"
| "usvs ≠ ⊥ ⟹
   op6⋅pat⋅(Node⋅usvs⋅l⋅r)⋅x = If prefix⋅[:x:]⋅(csnd⋅usvs) then r else op6⋅pat⋅l⋅x"
| [simp del]:
  "grep6⋅pat⋅l⋅usvs = Node⋅usvs⋅(next⋅(csnd⋅usvs)⋅l)⋅(case csnd⋅usvs of
          [::] ⇒ Null
        | v :# vs ⇒ grep6⋅pat⋅(op6⋅pat⋅l⋅v)⋅(cfst⋅usvs :@ [:v:], vs))"

schematic_goal root6_op6_grep6_def:
  "( root6  :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
   , op6    :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
   , grep6  :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
   = fix⋅?F"
unfolding root6_def op6_def grep6_def by simp

lemma op6_grep6_strict[simp]:
  "op6⋅pat⋅⊥ = ⊥"
  "op6⋅pat⋅(Node⋅(us, ⊥)⋅l⋅r) = ⊥"
  "op6⋅pat⋅(Node⋅usvs⋅l⋅r)⋅⊥ = ⊥"
  "grep6⋅pat⋅l⋅⊥ = ⊥"
   apply fixrec_simp
  apply (cases "us = ⊥"; fixrec_simp; simp)
 apply (cases "usvs = ⊥"; fixrec_simp; simp)
apply fixrec_simp
done

text‹

Intuitively this step cashes in the fact that, in the context of
@{term ‹grep6⋅pat⋅l⋅usvs›}, @{term
"sfoldl⋅(op6⋅pat)⋅(root6⋅pat)⋅us"} is
equal to @{term ‹l›}.

Connecting this step with the previous one is not simply a matter of
equational reasoning; we can see this by observing that the right
subtree of @{term ‹grep5⋅pat⋅l⋅usvs›}
does not depend on @{term ‹l›} whereas that of @{term
‹grep6⋅pat⋅l⋅usvs›} does, and therefore
these cannot be extensionally equal. Furthermore the computations of
the corresponding @{term ‹root›}s do not proceed in
lockstep: consider the computation of the left subtree.

For our purposes it is enough to show that the trees @{const
‹root5›} and @{const ‹root6›} are equal,
from which it follows that @{prop "op6 = op5"} by induction on its
tree argument. The equality is established by exhibiting a @{emph
‹tree bisimulation›} (@{const tree_bisim}) that relates
the corresponding ``producer'' ‹grep› functions. Such a
relation ‹R› must satisfy:

▪ ‹R ⊥ ⊥›;
▪ ‹R Null Null›; and
▪ if ‹R (Node⋅x⋅l⋅r) (Node⋅x'⋅l'⋅r')› then ‹x = x'›, ‹R l l'›, and ‹R r r'›.

›
text‹

The following pair of ‹left› functions define suitable left
paths from the corresponding @{term ‹root›}s.

›

fixrec left5 :: "[:'a::Eq_def:] → [:'a:] → ([:'a:] × [:'a:]) tree" where
  "left5⋅pat⋅[::] = Null"
| "⟦u ≠ ⊥; us ≠ ⊥⟧ ⟹
   left5⋅pat⋅(u :# us) = sfoldl⋅(op5⋅pat)⋅(root5⋅pat)⋅us"

fixrec left6 :: "[:'a::Eq_def:] → [:'a:] → ([:'a:] × [:'a:]) tree" where
  "left6⋅pat⋅[::] = Null"
| "⟦u ≠ ⊥; us ≠ ⊥⟧ ⟹
   left6⋅pat⋅(u :# us) = sfoldl⋅(op6⋅pat)⋅(root6⋅pat)⋅us"

inductive ―‹ This relation is not inductive. ›
  root_bisim :: "[:'a::Eq_def:] ⇒ ([:'a:] × [:'a:]) tree ⇒ ([:'a:] × [:'a:]) tree ⇒ bool"
  for pat :: "[:'a:]"
where
  bottom: "root_bisim pat ⊥ ⊥"
| Null: "root_bisim pat Null Null"
| gl: "⟦pat ≠ ⊥; us ≠ ⊥; vs ≠ ⊥⟧
   ⟹ root_bisim pat (grep6⋅pat⋅(left6⋅pat⋅us)⋅(us, vs)) (grep5⋅pat⋅(left5⋅pat⋅us)⋅(us, vs))"

declare root_bisim.intros[intro!, simp]

lemma left6_left5_strict[simp]:
  "left6⋅pat⋅⊥ = ⊥"
  "left5⋅pat⋅⊥ = ⊥"
by fixrec_simp+

lemma op6_left6: "⟦us ≠ ⊥; v ≠ ⊥⟧ ⟹ op6⋅pat⋅(left6⋅pat⋅us)⋅v = left6⋅pat⋅(us :@ [:v:])"
by (cases us) simp_all

lemma op5_left5: "⟦us ≠ ⊥; v ≠ ⊥⟧ ⟹ op5⋅pat⋅(left5⋅pat⋅us)⋅v = left5⋅pat⋅(us :@ [:v:])"
by (cases us) simp_all

lemma root5_left5: "v ≠ ⊥ ⟹ root5⋅pat = left5⋅pat⋅[:v:]"
by simp

lemma op5_sfoldl_left5: "⟦us ≠ ⊥; u ≠ ⊥; v ≠ ⊥⟧ ⟹
                 op5⋅pat⋅(sfoldl⋅(op5⋅pat)⋅(root5⋅pat)⋅us)⋅v = left5⋅pat⋅(u :# us :@ [:v:])"
by simp

lemma root_bisim_root:
  assumes "pat ≠ ⊥"
  shows "root_bisim pat (root6⋅pat) (root5⋅pat)"
unfolding root6.unfold root5.unfold using assms
by simp (metis (no_types, lifting) left5.simps(1) left6.simps(1) root_bisim.simps slist.con_rews(3))

lemma next_grep6_cases[consumes 3, case_names gl nl]:
  assumes "vs ≠ ⊥"
  assumes "xs ≠ ⊥"
  assumes "P (next⋅xs⋅(grep6⋅pat⋅(left6⋅pat⋅us)⋅(us, vs)))"
  obtains (gl) "P (grep6⋅pat⋅(left6⋅pat⋅us)⋅(us, vs))" | (nl) "P (next⋅vs⋅(left6⋅pat⋅us))"
using assms
apply atomize_elim
apply (subst grep6.unfold)
apply (subst (asm) grep6.unfold)
apply (cases xs; clarsimp)
apply (cases vs; clarsimp simp: If2_def[symmetric] split: If2_splits)
done

lemma root_bisim_op_next56:
  assumes "root_bisim pat t6 t5"
  assumes "prefix⋅[:x:]⋅xs = FF"
  shows "op6⋅pat⋅(next⋅xs⋅t6)⋅x = op6⋅pat⋅t6⋅x ∧ op5⋅pat⋅(next⋅xs⋅t5)⋅x = op5⋅pat⋅t5⋅x"
using ‹root_bisim pat t6 t5›
proof cases
  case Null with assms(2) show ?thesis by (cases xs) simp_all
next
  case (gl us vs) with assms(2) show ?thesis
    apply (cases "x = ⊥", simp)
    apply (cases xs; clarsimp)
    apply (subst (1 2) grep6.simps)
    apply (subst (1 2) grep5.simps)
    apply (cases vs; clarsimp simp: If2_def[symmetric] split: If2_splits)
    done
qed simp

text‹

The main part of establishing that @{const ‹root_bisim›}
is a @{const ‹tree_bisim›} is in showing that the left
paths constructed by the ‹grep›s are @{const
‹root_bisim›}-related. We do this by inducting over the
length of the pattern so far matched (‹us›), as we did
when proving that this tree has the `K' property in
\S\ref{thm:k_property}.

›

lemma
  assumes "pat ≠ ⊥"
  shows root_bisim_op: "root_bisim pat t6 t5 ⟹ root_bisim pat (op6⋅pat⋅t6⋅x) (op5⋅pat⋅t5⋅x)" ― ‹ unused ›
    and root_bisim_next_left: "root_bisim pat (next⋅vs⋅(left6⋅pat⋅us)) (next⋅vs⋅(left5⋅pat⋅us))" (is "?rbnl us vs")
proof -
  let ?ogl5 = "λus vs. op5⋅pat⋅(grep5⋅pat⋅(left5⋅pat⋅us)⋅(us, vs))⋅x"
  let ?ogl6 = "λus vs. op6⋅pat⋅(grep6⋅pat⋅(left6⋅pat⋅us)⋅(us, vs))⋅x"
  let ?for5 = "λus. sfoldl⋅(op5⋅pat)⋅(root5⋅pat)⋅us"
  let ?for6 = "λus. sfoldl⋅(op6⋅pat)⋅(root6⋅pat)⋅us"
  have "⟦?ogl6 us vs = Node⋅usvs⋅l⋅r; cfst⋅usvs ≠ ⊥; x ≠ ⊥⟧ ⟹ le⋅(slength⋅(cfst⋅usvs))⋅(slength⋅us + 1) = TT"
   and "⟦op6⋅pat⋅(next⋅xs⋅(grep6⋅pat⋅(left6⋅pat⋅us)⋅(us, vs)))⋅x = Node⋅usvs⋅l⋅r; cfst⋅usvs ≠ ⊥; x ≠ ⊥; us ≠ ⊥; vs ≠ ⊥⟧ ⟹ le⋅(slength⋅(cfst⋅usvs))⋅(slength⋅us + 1) = TT"
   and "⟦?for6 us = Node⋅usvs⋅l⋅r; cfst⋅usvs ≠ ⊥⟧ ⟹ lt⋅(slength⋅(cfst⋅usvs))⋅(slength⋅us + 1) = TT"
   and "⟦us ≠ ⊥; vs ≠ ⊥⟧ ⟹ root_bisim pat (?ogl6 us vs) (?ogl5 us vs)"
   and "root_bisim pat (?for6 us) (?for5 us)"
   and "?rbnl us vs"
   for usvs l r xs us vs
  proof(induct us arbitrary: usvs l r vs x xs rule: op_induct')
    case (step us)
    have rbl: "root_bisim pat (left6⋅pat⋅us) (left5⋅pat⋅us)"
      by (cases us; fastforce intro: step(5) simp: left6.unfold left5.unfold)
    { case (1 usvs l r vs x)
      from rbl
      have L: "le⋅(slength⋅(prod.fst usvs'))⋅(slength⋅us + 1) = TT"
          if "op6⋅pat⋅(next⋅vs⋅(left6⋅pat⋅us))⋅x = Node⋅usvs'⋅l⋅r"
         and "cfst⋅usvs' ≠ ⊥"
         and "vs ≠ ⊥"
         for usvs' l r
      proof cases
        case bottom with that show ?thesis by simp
      next
        case Null with that show ?thesis
          apply simp
          apply (subst (asm) root6.unfold)
          apply (subst (asm) grep6.unfold)
          apply (fastforce intro: le_plus_1)
          done
      next
        case (gl us'' vs'') show ?thesis
        proof(cases us)
          case bottom with that show ?thesis by simp
        next
          case snil with that show ?thesis
            apply simp
            apply (subst (asm) root6.unfold)
            apply (subst (asm) grep6.unfold)
            apply clarsimp
            done
        next
          case (scons ush ust)
          moreover from that gl scons ‹x ≠ ⊥› have "le⋅(slength⋅(cfst⋅usvs'))⋅(slength⋅us'' + 1) = TT"
            apply simp
            apply (subst (asm) (2) grep6.unfold)
            apply (fastforce dest: step(2, 3)[rotated])
            done
          moreover from gl scons have "lt⋅(slength⋅us'')⋅(slength⋅(stail⋅us) + 1) = TT"
            apply simp
            apply (subst (asm) grep6.unfold)
            apply (fastforce dest: step(3)[rotated])
            done
          ultimately show ?thesis
            apply clarsimp
            apply (metis Integer_le_both_plus_1 Ord_linear_class.le_trans le_iff_lt_or_eq)
            done
        qed
      qed
      from 1 show ?case
        apply (subst (asm) grep6.unfold)
        apply (cases vs; clarsimp simp: If2_def[symmetric] split: If2_splits)
          apply (rule L; fastforce)
         apply (metis (no_types, lifting) ab_semigroup_add_class.add_ac(1) fst_conv grep6.simps le_refl_Integer sappend_snil_id_right slength.simps(2) slength_bottom_iff slength_sappend slist.con_rews(3) tree_injects')
        apply (rule L; fastforce)
        done }
    note slength_ogl = this
    { case (2 usvs l r vs x xs)
      from 2 have "xs ≠ ⊥" by clarsimp
      from ‹vs ≠ ⊥› ‹xs ≠ ⊥› 2(1) show ?case
      proof(cases rule: next_grep6_cases)
        case gl with ‹cfst⋅usvs ≠ ⊥› ‹x ≠ ⊥› show ?thesis using slength_ogl by blast
      next
        case nl
        from rbl show ?thesis
        proof cases
          case bottom with nl ‹cfst⋅usvs ≠ ⊥› show ?thesis by simp
        next
          case Null with nl ‹us ≠ ⊥› ‹vs ≠ ⊥› show ?thesis
            apply simp
            apply (subst (asm) root6.unfold)
            apply (subst (asm) grep6.unfold)
            apply (clarsimp simp: zero_Integer_def one_Integer_def elim!: slengthE)
            done
        next
          case (gl us'' vs'') show ?thesis
          proof(cases us)
            case bottom with ‹us ≠ ⊥› show ?thesis by simp
          next
            case snil with gl show ?thesis by (subst (asm) grep6.unfold) simp
          next
            case (scons u' us') with 2 nl gl show ?thesis
              apply clarsimp
              apply (subst (asm) (4) grep6.unfold)
              apply clarsimp
              apply (drule step(3)[rotated]; clarsimp)
              apply (drule step(2)[rotated]; clarsimp)
              apply (clarsimp simp: lt_le)
              apply (metis Integer_le_both_plus_1 Ord_linear_class.le_trans)
              done
          qed
        qed
      qed }
    { case (3 usvs l r) show ?case
      proof(cases us rule: srev_cases)
        case snil with 3 show ?thesis
          apply (subst (asm) root6.unfold)
          apply (subst (asm) grep6.unfold)
          apply fastforce
          done
      next
        case (ssnoc u' us')
        then have "root_bisim pat (?for6 us') (?for5 us')" by (fastforce intro: step(5))
        then show ?thesis
        proof cases
          case bottom with 3 ssnoc show ?thesis by simp
        next
          case Null with 3 ssnoc show ?thesis
            apply simp
            apply (subst (asm) root6.unfold)
            apply (subst (asm) grep6.unfold)
            apply (clarsimp simp: zero_Integer_def one_Integer_def elim!: slengthE)
            done
        next
          case (gl us'' vs'') with 3 ssnoc show ?thesis
            apply clarsimp
            apply (subst (asm) (2) grep6.unfold)
            apply (fastforce simp: zero_Integer_def one_Integer_def split: if_splits dest!: step(1)[rotated] step(3)[rotated] elim!: slengthE)
            done
        qed
      qed (use 3 in simp) }
    { case (4 vs x) show ?case
      proof(cases "prefix⋅[:x:]⋅vs")
        case bottom then show ?thesis
          apply (subst grep6.unfold)
          apply (subst grep5.unfold)
          apply auto
          done
      next
        case TT with ‹pat ≠ ⊥› ‹us ≠ ⊥› show ?thesis
          apply (subst grep6.unfold)
          apply (subst grep5.unfold)
          apply (cases vs; clarsimp simp: op6_left6)
          apply (cases us; clarsimp simp del: left6.simps left5.simps simp add: root5_left5)
          apply (metis (no_types, lifting) op5_sfoldl_left5 root5_left5 root_bisim.simps sappend_bottom_iff slist.con_rews(3) slist.con_rews(4))
          done
      next
        case FF with ‹pat ≠ ⊥› ‹us ≠ ⊥› show ?thesis
          apply (subst grep6.unfold)
          apply (subst grep5.unfold)
          using rbl
          apply (auto simp: root_bisim_op_next56 elim!: root_bisim.cases intro: root_bisim_root)
          apply (subst (asm) grep6.unfold)
          apply (cases us; fastforce dest: step(3)[rotated] intro: step(4))
          done
      qed }
    { case 5 show ?case
      proof(cases us rule: srev_cases)
        case (ssnoc u' us')
        then have "root_bisim pat (?for6 us') (?for5 us')" by (fastforce intro: step(5))
        then show ?thesis
        proof cases
          case (gl us'' vs'') with ssnoc show ?thesis
            apply clarsimp
            apply (subst (asm) grep6.unfold)
            apply (fastforce dest: step(3)[rotated] intro: step(4))
            done
        qed (use ‹pat ≠ ⊥› ssnoc root_bisim_root in auto)
      qed (use ‹pat ≠ ⊥› root_bisim_root in auto) }
    { case (6 vs)
      from rbl ‹pat ≠ ⊥› show rbnl: "?rbnl us vs"
      proof cases
        case bottom then show ?thesis by fastforce
      next
        case Null then show ?thesis by (cases vs) auto
      next
        case (gl us'' vs'') then show ?thesis
          apply clarsimp
          apply (subst grep5.unfold)
          apply (subst grep6.unfold)
          apply (subst (asm) grep5.unfold)
          apply (subst (asm) grep6.unfold)
          apply (cases us; clarsimp; cases vs''; clarsimp)
           apply (metis Rep_cfun_strict1 bottom left5.simps(2) left6.simps(2) next_snil next_strict(1) rbl)
          apply (cases vs; clarsimp)
          using rbl apply (fastforce dest: step(3)[rotated] intro: step(6) simp: If2_def[symmetric] simp del: eq_FF split: If2_splits)+
          done
    qed }
  qed
  from ‹pat ≠ ⊥› this(4) show "root_bisim pat t6 t5 ⟹ root_bisim pat (op6⋅pat⋅t6⋅x) (op5⋅pat⋅t5⋅x)"
    by (auto elim!: root_bisim.cases intro: root_bisim_root)
  show ‹root_bisim pat (next⋅vs⋅(left6⋅pat⋅us)) (next⋅vs⋅(left5⋅pat⋅us))› by fact
qed

text‹

With this result in hand the remainder is technically fiddly but straightforward.

›

lemmas tree_bisimI = iffD2[OF fun_cong[OF tree.bisim_def[unfolded atomize_eq]], rule_format]

lemma tree_bisim_root_bisim:
  shows "tree_bisim (root_bisim pat)"
proof(rule tree_bisimI, erule root_bisim.cases, goal_cases bottom Null Node)
  case (Node x y us vs) then show ?case
    apply (subst (asm) grep5.unfold)
    apply (subst (asm) grep6.unfold)
    apply hypsubst_thin
    apply (clarsimp simp: root_bisim_next_left)
    apply (cases vs; clarsimp)
    apply (cases us; clarsimp simp del: left6.simps left5.simps simp add: op5_sfoldl_left5 op6_left6)
     apply (metis (no_types, lifting) root5_left5 root_bisim.simps slist.con_rews(3,4))
    apply (metis (no_types, lifting) op5_sfoldl_left5 root_bisim.simps sappend_bottom_iff slist.con_rews(3, 4))
    done
qed simp_all

lemma r6_5:
  shows "(root6⋅pat, op6⋅pat) = (root5⋅pat, op5⋅pat)"
proof(cases "pat = ⊥")
  case True
  from True have "root6⋅pat = root5⋅pat"
    apply (subst root6.unfold)
    apply (subst grep6.unfold)
    apply (subst root5.unfold)
    apply (subst grep5.unfold)
    apply simp
    done
  moreover
  from True ‹root6⋅pat = root5⋅pat› have "op6⋅pat⋅t⋅x = op5⋅pat⋅t⋅x" for t x
    by (induct t) simp_all
  ultimately show ?thesis by (simp add: cfun_eq_iff)
next
  case False
  then have root: "root6⋅pat = root5⋅pat"
    by (rule tree.coinduct[OF tree_bisim_root_bisim root_bisim_root])
  moreover
  from root have "op6⋅pat⋅t⋅x = op5⋅pat⋅t⋅x" for t x by (induct t) simp_all
  ultimately show ?thesis by (simp add: cfun_eq_iff)
qed

text‹

We conclude this section by observing that accumulator-introduction is a well known technique
(see, for instance, \<^citet>‹‹\S13.6› in "Hutton:2016"›), but the examples in the
literature assume that the type involved is defined inductively. Bird adopts this strategy without
considering what the mixed inductive/coinductive rule is that justifies the preservation of total
correctness.

The difficulty of this step is why we wired in the `K' opt earlier: it allows us to preserve the
shape of the tree all the way from the data refinement to the final version.

›


subsection‹ Step 7: Simplify, unfold prefix ›

text‹

The next step (Bird, bottom of p132) is to move the case split in @{const ‹grep6›} on ‹vs› above the
‹Node› constructor, which makes @{term ‹grep7›} strict in that parameter and therefore not
extensionally equal to @{const ‹grep6›}. We establish a weaker correspondence using fixed-point induction.

We also unfold @{const ‹prefix›} in @{const ‹op6›}.

›

fixrec
    root7  :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree"
and op7    :: "[:'a::Eq_def:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree"
and grep7  :: "[:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree"
where
  [simp del]:
  "root7⋅pat = grep7⋅pat⋅Null⋅([::], pat)"
| "op7⋅pat⋅Null⋅x = root7⋅pat"
| "op7⋅pat⋅(Node⋅(us, [::])⋅l⋅r)⋅x = op7⋅pat⋅l⋅x" ―‹ Unfold ‹prefix› ›
| "⟦v ≠ ⊥; vs ≠ ⊥⟧ ⟹
   op7⋅pat⋅(Node⋅(us, v :# vs)⋅l⋅r)⋅x = If eq⋅x⋅v then r else op7⋅pat⋅l⋅x"
| [simp del]:
  "grep7⋅pat⋅l⋅(us, [::]) = Node⋅(us, [::])⋅l⋅Null" ―‹ Case split on ‹vs› hoisted above ‹Node›. ›
| "⟦v ≠ ⊥; vs ≠ ⊥⟧ ⟹
   grep7⋅pat⋅l⋅(us, v :# vs) = Node⋅(us, v :# vs)⋅(next⋅(v :# vs)⋅l)⋅(grep7⋅pat⋅(op7⋅pat⋅l⋅v)⋅(us :@ [:v:], vs))"

schematic_goal root7_op7_grep7_def:
  "( root7  :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
   , op7    :: [:'a:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
   , grep7  :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
   = fix⋅?F"
unfolding root7_def op7_def grep7_def by simp

lemma r7_6_aux:
  assumes "pat ≠ ⊥"
  shows
  "(Λ (root, op, grep). (root⋅pat, seq⋅x⋅(op⋅pat⋅t⋅x), grep⋅pat⋅l⋅(us, vs)))⋅
   ( root7  :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
   , op7    :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
   , grep7  :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )
 = (Λ (root, op, grep). (root⋅pat, seq⋅x⋅(op⋅pat⋅t⋅x), seq⋅vs⋅(grep⋅pat⋅l⋅(us, vs))))⋅
   ( root6  :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree
   , op6    :: [:'a::Eq_def:] → ([:'a:] × [:'a:]) tree → 'a → ([:'a:] × [:'a:]) tree
   , grep6  :: [:'a:] → ([:'a:] × [:'a:]) tree → [:'a:] × [:'a:] → ([:'a:] × [:'a:]) tree )"
unfolding root6_op6_grep6_def root7_op7_grep7_def
proof(induct arbitrary: t x l us vs rule: parallel_fix_ind[case_names adm bottom step])
  case (step X Y t x l us vs) then show ?case
    apply -
    apply (cases X, cases Y)
    apply (rename_tac r10 o10 g10 r9 o9 g9)
    apply (clarsimp simp: cfun_eq_iff assms match_Node_mplus_match_Node match_Null_match_Node_tree_case tree_case_distr match_snil_match_scons_slist_case slist_case_distr If_distr)
    apply (intro allI conjI)
     apply (case_tac t; clarsimp)
     apply (rename_tac us vs l r)
     apply (case_tac "x = ⊥"; clarsimp)
     apply (case_tac vs; clarsimp; fail)
    apply (case_tac vs; clarsimp)
    apply (metis ID1 seq_simps(3))
    done
qed simp_all

lemma r7_6:
  assumes "pat ≠ ⊥"
  shows "root7⋅pat = root6⋅pat"
    and "x ≠ ⊥ ⟹ op7⋅pat⋅t⋅x = op6⋅pat⋅t⋅x"
using r7_6_aux[OF assms] by (force simp: cfun_eq_iff dest: spec[where x=x])+


subsection‹ Step 8: Discard us \label{sec:KMP:step8} ›

text‹

We now discard ‹us› from the tree as it is no longer used. This requires a new
definition of @{const ‹next›}.

This is essentially another data refinement.

›

fixrec next' :: "'a::Eq_def → [:'a:] tree → [:'a:] tree" where
  "next'⋅x⋅Null = Null"
| "next'⋅x⋅(Node⋅[::]⋅l⋅r) = Node⋅[::]⋅l⋅r"
| "⟦v ≠ ⊥; vs ≠ ⊥; x ≠ ⊥⟧ ⟹
   next'⋅x⋅(Node⋅(v :# vs)⋅l⋅r) = If eq⋅x⋅v then l else Node⋅(v :# vs)⋅l⋅r"

fixrec ―‹ Bird p133 ›
    root8 :: "[:'a::Eq_def:] → [:'a:] tree"
and op8   :: "[:'a:] → [:'a:] tree → 'a → [:'a:] tree"
and grep8 :: "[:'a:] → [:'a:] tree → [:'a:] → [:'a:] tree"
where
[simp del]:
  "root8⋅pat = grep8⋅pat⋅Null⋅pat"
| "op8⋅pat⋅Null⋅x = root8⋅pat"
| "op8⋅pat⋅(Node⋅[::]⋅l⋅r)⋅x = op8⋅pat⋅l⋅x"
| "⟦v ≠ ⊥; vs ≠ ⊥⟧ ⟹
   op8⋅pat⋅(Node⋅(v :# vs)⋅l⋅r)⋅x = If eq⋅x⋅v then r else op8⋅pat⋅l⋅x"
| "grep8⋅pat⋅l⋅[::] = Node⋅[::]⋅l⋅Null"
| "⟦v ≠ ⊥; vs ≠ ⊥⟧ ⟹
   grep8⋅pat⋅l⋅(v :# vs) = Node⋅(v :# vs)⋅(next'⋅v⋅l)⋅(grep8⋅pat⋅(op8⋅pat⋅l⋅v)⋅vs)"

fixrec ok8 :: "[:'a:] tree → tr" where
  "vs ≠ ⊥ ⟹ ok8⋅(Node⋅vs⋅l⋅r) = snull⋅vs"

schematic_goal root8_op8_grep8_def:
  "( root8 :: [:'a::Eq_def:] → [:'a:] tree
   , op8   :: [:'a:] → [:'a:] tree → 'a → [:'a:] tree
   , grep8 :: [:'a:] → [:'a:] tree → [:'a:] → [:'a:] tree )
   = fix⋅?F"
unfolding op8_def root8_def grep8_def by simp

lemma next'_strict[simp]:
  "next'⋅x⋅⊥ = ⊥"
  "next'⋅⊥⋅(Node⋅(v :# vs)⋅l⋅r) = ⊥"
by (cases "v :# vs = ⊥"; fixrec_simp; clarsimp)+

lemma root8_op8_grep8_strict[simp]:
  "grep8⋅pat⋅l⋅⊥ = ⊥"
  "op8⋅pat⋅⊥ = ⊥"
  "root8⋅⊥ = ⊥"
by fixrec_simp+

lemma ok8_strict[simp]:
  "ok8⋅⊥ = ⊥"
  "ok8⋅Null = ⊥"
by fixrec_simp+

text‹

We cannot readily relate @{const ‹next›} and @{const
‹next'›} using worker/wrapper as the obvious abstraction
is not invertible.  Conversely the desired result is easily shown by
fixed-point induction.

›

lemma next'_next:
  assumes "v ≠ ⊥"
  assumes "vs ≠ ⊥"
  shows "next'⋅v⋅(tree_map'⋅csnd⋅t) = tree_map'⋅csnd⋅(next⋅(v :# vs)⋅t)"
proof(induct t)
  case (Node usvs' l r) with assms show ?case
    apply (cases usvs'; clarsimp)
    apply (rename_tac us'' vs'')
    apply (case_tac vs''; clarsimp simp: If_distr)
    done
qed (use assms in simp_all)

lemma r8_7[simplified]:
  shows "(Λ (root, op, grep). ( root⋅pat
                              ,  op⋅pat⋅(tree_map'⋅csnd⋅t)⋅x
                              , grep⋅pat⋅(tree_map'⋅csnd⋅l)⋅(csnd⋅usvs)))⋅(root8, op8, grep8)
       = (Λ (root, op, grep). ( tree_map'⋅csnd⋅(root⋅pat)
                              , tree_map'⋅csnd⋅(op⋅pat⋅t⋅x)
                              , tree_map'⋅csnd⋅(grep⋅pat⋅l⋅usvs)))⋅(root7, op7, grep7)"
unfolding root7_op7_grep7_def root8_op8_grep8_def
proof(induct arbitrary: l t usvs x rule: parallel_fix_ind[case_names adm bottom step])
  case (step X Y l t usvs x) then show ?case
    apply -
    apply (cases X; cases Y)
    apply (clarsimp simp: cfun_eq_iff next'_next
                          match_snil_match_scons_slist_case slist_case_distr
                          match_Node_mplus_match_Node match_Null_match_Node_tree_case tree_case_distr
                    cong: slist_case_cong)
    apply (cases t; clarsimp simp: If_distr)
    apply (rename_tac us vs l r)
    apply (case_tac vs; fastforce)
    done
qed simp_all

text‹

Top-level equivalence follows from ‹lfp_fusion› specialized to ‹sscanl› (@{thm [source]
"sscanl_lfp_fusion"}), which states that
\begin{center}
  @{prop ‹smap⋅g oo sscanl⋅f⋅z = sscanl⋅f'⋅(g⋅z)›}
\end{center}
provided that ‹g› is strict and the following diagram commutes for @{prop ‹x ≠ ⊥›}:

\begin{center}
  \begin{tikzcd}[column sep=6em]
    a \arrow[r, "‹Λ a. f⋅a⋅x›"] \arrow[d, "‹g›"] & a' \arrow[d, "‹g›"] \\
    b \arrow[r, "‹Λ a. f'⋅a⋅x›"]                 & b'
  \end{tikzcd}
\end{center}

›

lemma ok8_ok8: "ok8 oo tree_map'⋅csnd = snull oo csnd oo abs2" (is "?lhs = ?rhs")
proof(rule cfun_eqI)
  fix t show "?lhs⋅t = ?rhs⋅t"
    by (cases t; clarsimp) (metis ok8.simps ok8_strict(1) snull_strict tree.con_rews(3))
qed

lemma matches8: ―‹ Bird p133 ›
  shows "matches⋅pat = smap⋅cfst oo sfilter⋅(ok8 oo csnd) oo sscanl⋅(Λ (n, x) y. (n + 1, op8⋅pat⋅x⋅y))⋅(0, root8⋅pat)" (is "?lhs = ?rhs")
proof(cases "pat = ⊥")
  case True
  then have "?lhs⋅xs = ?rhs⋅xs" for xs by (cases xs; clarsimp)
  then show ?thesis by (simp add: cfun_eq_iff)
next
  case False
  then have *: "matches⋅pat = smap⋅cfst oo sfilter⋅(snull oo csnd oo abs2 oo csnd) oo sscanl⋅(Λ (n, x) y. (n + 1, op7⋅pat⋅x⋅y))⋅(0, root7⋅pat)"
    using data_refinement[where 'a='a] r3_2[where 'a='a] r4_3[where 'a='a] r5_4[where 'a='a] r6_5(1)[where pat=pat] r7_6[where pat=pat]
    unfolding matches2.unfold by (fastforce simp: oo_assoc cfun_eq_iff csplit_def intro!: cfun_arg_cong sscanl_cong)
  from ‹pat ≠ ⊥› show ?thesis
    apply -
    apply (subst conjunct1[OF r8_7])
    apply (subst sscanl_lfp_fusion[where g="ID ** tree_map'⋅csnd" and z="(0, root7⋅pat)", simplified, symmetric])
     prefer 2
     apply (subst oo_assoc, subst sfilter_smap, simp)
     apply (simp add: oo_assoc)
     apply (simp add: oo_assoc[symmetric])
     apply (subst oo_assoc, subst ok8_ok8)
     apply (clarsimp simp: oo_assoc *)
     apply (rule refl) (* instantiate schematic *)
    apply (clarsimp simp: r8_7)
    done
qed


subsection‹ Step 9: Factor out pat (final version) \label{sec:KMP:final_version} ›

text‹

Finally we factor @{term ‹pat›} from these definitions and arrive
at Bird's cyclic data structure, which when executed using lazy
evaluation actually memoises the computation of @{const ‹grep8›}.

The ‹Letrec› syntax groups recursive bindings with
‹,› and separates these with ‹;›. Its lack
of support for clausal definitions, and that \texttt{HOLCF}
‹case› does not support nested patterns, leads to some
awkwardness.

›

fixrec matchesf :: "[:'a::Eq_def:] → [:'a:] → [:Integer:]" where
[simp del]: "matchesf⋅pat =
    (Letrec okf = (Λ (Node⋅vs⋅l⋅r). snull⋅vs);
            nextf = (Λ x t. case t of
                   Null ⇒ Null
                 | Node⋅vs⋅l⋅r ⇒ (case vs of
                     [::] ⇒ t
                   | v :# vs ⇒ If eq⋅x⋅v then l else t));
            rootf = grepf⋅Null⋅pat,
            opf = (Λ t x. case t of
                   Null ⇒ rootf
                 | Node⋅vs⋅l⋅r ⇒ (case vs of
                     [::] ⇒ opf⋅l⋅x
                   | v :# vs ⇒ If eq⋅x⋅v then r else opf⋅l⋅x)),
            grepf = (Λ l vs. case vs of
                   [::] ⇒ Node⋅[::]⋅l⋅Null
                 | v :# vs ⇒ Node⋅(v :# vs)⋅(nextf⋅v⋅l)⋅(grepf⋅(opf⋅l⋅v)⋅vs));
            stepf = (Λ (n, t) x. (n + 1, opf⋅t⋅x))
         in smap⋅cfst oo sfilter⋅(okf oo csnd) oo sscanl⋅stepf⋅(0, rootf))"

lemma matchesf_ok8: "(Λ (Node⋅vs⋅l⋅r). snull⋅vs) = ok8"
by (clarsimp simp: cfun_eq_iff; rename_tac x; case_tac x; clarsimp)

lemma matchesf_next':
  "(Λ x t. case t of Null ⇒ Null | Node⋅vs⋅l⋅r ⇒ (case vs of [::] ⇒ t | v :# vs ⇒ If eq⋅x⋅v then l else t)) = next'"
apply (clarsimp simp: cfun_eq_iff next'.unfold
                      match_snil_match_scons_slist_case slist_case_distr
                      match_Node_mplus_match_Node match_Null_match_Node_tree_case tree_case_distr)
apply (simp cong: tree_case_cong)
apply (simp cong: slist_case_cong)
done

lemma matchesf_8:
  "fix⋅(Λ (Rootf, Opf, Grepf).
          ( Grepf⋅Null⋅pat
          , Λ t x. case t of Null ⇒ Rootf | Node⋅vs⋅l⋅r ⇒
                (case vs of [::] ⇒ Opf⋅l⋅x | v :# vs ⇒ If eq⋅x⋅v then r else Opf⋅l⋅x)
          , Λ l vs. case vs of [::] ⇒ Node⋅[::]⋅l⋅Null | v :# vs ⇒ Node⋅(v :# vs)⋅(next'⋅v⋅l)⋅(Grepf⋅(Opf⋅l⋅v)⋅vs)) )
= (Λ (root, op, grep). (root⋅pat, op⋅pat, grep⋅pat))⋅(root8, op8, grep8)"
unfolding root8_op8_grep8_def
by (rule lfp_fusion[symmetric])
   (fastforce simp: cfun_eq_iff
                    match_snil_match_scons_slist_case slist_case_distr
                    match_Node_mplus_match_Node match_Null_match_Node_tree_case tree_case_distr)+

theorem matches_final:
  shows "matches = matchesf"
by (clarsimp simp: cfun_eq_iff fix_const eta_cfun csplit_cfun3 CLetrec_def
                   matches8 matchesf.unfold matchesf_next' matchesf_ok8 matchesf_8[simplified eta_cfun])

text‹

The final program above is easily syntactically translated into the
Haskell shown in Figure~\ref{fig:haskell-kmp}, and one can expect
GHC's list fusion machinery to compile the top-level driver into an
efficient loop.  \<^citet>‹"LochbihlerMaximova:2015"›
have mechanised this optimisation for Isabelle/HOL's code generator
(and see also \<^citet>‹"Huffman:2009"›).

As we lack both pieces of infrastructure we show such a fusion is sound
by hand.

›

lemma fused_driver':
  assumes "g⋅⊥ = ⊥"
  assumes "p⋅⊥ = ⊥"
  shows "smap⋅g oo sfilter⋅p oo sscanl⋅f⋅z
       = (μ R. Λ z xxs. case xxs of
                          [::] ⇒ If p⋅z then [:g⋅z:] else [::]
                        | x :# xs ⇒ let z' = f⋅z⋅x in If p⋅z then g⋅z :# R⋅z'⋅xs else R⋅z'⋅xs)⋅z"
(is "?lhs = ?rhs")
proof(rule cfun_eqI)
  fix xs from assms show "?lhs⋅xs = ?rhs⋅xs"
    by (induct xs arbitrary: z) (subst fix_eq; fastforce simp: If_distr Let_def)+
qed

(*<*)

end
(*>*)