Theory KMP

(*<*)
theory KMP
imports
  Theory_Of_Lists
begin

hide_const abs

(*>*)
section‹ Knuth-Morris-Pratt matching according to Bird \label{sec:KMP} ›


subsection‹ Step 1: Specification \label{sec:KMP:specification} ›

text‹

We begin with the specification of string matching given by citet‹Chapter~16› in
"Bird:PearlsofFAD:2010". (References to ``Bird'' in the following are to this text.) Note that
we assume @{consteq} has some nice properties (see \S\ref{sec:equality}) and
use strict lists.

›

fixrec endswith :: "[:'a::Eq_def:]  [:'a:]  tr" where
[simp del]: "endswithpat = selempat oo stails"

fixrec matches :: "[:'a::Eq_def:]  [:'a:]  [:Integer:]" where
[simp del]: "matchespat = smapslength oo sfilter(endswithpat) oo sinits"

text‹

Bird describes @{term "matchespatxs"} as returning ``a list of integers p› such that pat› is a
suffix of @{term "stakepxs"}.''

The following examples illustrate this behaviour:

›

lemma "matches[::][::] = [:0:]"
unfolding matches.unfold endswith.unfold by simp

lemma "matches[::][:10::Integer, 20, 30:] = [:0, 1, 2, 3:]"
unfolding matches.unfold endswith.unfold by simp

lemma "matches[:1::Integer,2,3,1,2:][:1,2,1,2,3,1,2,3,1,2:] = [:7, 10:]"
unfolding matches.unfold endswith.unfold
by (simp add: sfilter_scons_let del: sfilter_strict_scons sfilter.simps)

lemma endswith_strict[simp]:
  "endswith = "
  "endswithpat = "
by (fixrec_simp; simp add: cfun_eq_iff)+

lemma matches_strict[simp]:
  "matches = "
  "matchespat = "
by (fixrec_simp; clarsimp simp: cfun_eq_iff)+

text‹

Bird's strategy for deriving KMP from this specification is encoded in the following lemmas:
if we can rewrite @{constendswith} as a composition of a predicate with a
@{constsfoldl}, then we can rewrite @{constmatches} into a @{constsscanl}.

›

lemma fork_sfoldl:
  shows "sfoldlf1z1 && sfoldlf2z2 = sfoldl(Λ (a, b) z. (f1az, f2bz))(z1, z2)" (is "?lhs = ?rhs")
proof(rule cfun_eqI)
  fix xs show "?lhsxs = ?rhsxs"
    by (induct xs arbitrary: z1 z2) simp_all
qed

lemma smap_sfilter_split_cfcomp: ―‹ Bird (16.4) ›
  assumes "f = "
  assumes "p = "
  shows "smapf oo sfilter(p oo g) = smapcfst oo sfilter(p oo csnd) oo smap(f && g)" (is "?lhs = ?rhs")
proof(rule cfun_eqI)
  fix xs show "?lhsxs = ?rhsxs"
    using assms by (induct xs) (simp_all add: If2_def[symmetric] split: If2_splits)
qed

lemma Bird_strategy:
  assumes endswith: "endswithpat = p oo sfoldlopz"
  assumes step: "step = (Λ (n, x) y. (n + 1, opxy))"
  assumes "p = " ―‹ We can reasonably expect the predicate to be strict ›
  shows "matchespat = smapcfst oo sfilter(p oo csnd) oo sscanlstep(0, z)"
apply (simp add: matches.simps assoc_oo endswith)
apply (subst smap_sfilter_split_cfcomp, fastforce, fact)
apply (subst slength_sfoldl)
apply (subst fork_sfoldl)
apply (simp add: oo_assoc[symmetric])
apply (subst sinits_sscanl)
apply (fold step)
apply (rule refl)
done

text‹

Bird proceeds by reworking @{constendswith} into the form required by @{thm [source] "Bird_strategy"}.
This is eased by an alternative definition of @{constendswith}.

›

lemma sfilter_supto:
  assumes "0  d"
  shows "sfilter(Λ x. le(MkIn - x)(MkId))(supto(MkIm)(MkIn))
       = supto(MkI(if m  n - d then n - d else m))(MkIn)" (is "?sfilterp?suptomn = _")
proof(cases "m  n - d")
  case True
  moreover
  from True assms have "?sfilterp?suptomn = ?sfilterp(supto(MkIm)(MkI(n - d - 1)) :@ supto(MkI(n - d))(MkIn))"
    using supto_split1 by auto
  moreover from True assms have "?sfilterp(supto(MkIm)(MkI(n - d - 1))) = [::]" by auto
  ultimately show ?thesis by (clarsimp intro!: trans[OF sfilter_cong[OF refl] sfilter_const_TT])
next
  case False then show ?thesis
    by (clarsimp intro!: trans[OF sfilter_cong[OF refl] sfilter_const_TT])
qed

lemma endswith_eq_sdrop: "endswithpatxs = eqpat(sdrop(slengthxs - slengthpat)xs)"
proof(cases "pat = " "xs = " rule: bool.exhaust[case_product bool.exhaust])
  case False_False then show ?thesis
    by (cases "endswithpatxs";
        simp only: endswith.simps cfcomp2 stails_sdrop';
        force simp: zero_Integer_def one_Integer_def elim: slengthE)
qed simp_all

lemma endswith_def2:  ―‹ Bird p127 ›
  shows "endswithpatxs = eqpat(shead(sfilter(Λ x. prefixxpat)(stailsxs)))" (is "?lhs = ?rhs")
proof(cases "pat = " "xs = " rule: bool.exhaust[case_product bool.exhaust])
  case False_False
  from False_False obtain patl xsl where *: "slengthxs = MkIxsl" "0  xsl" "slengthpat = MkIpatl" "0  patl"
    by (meson Integer.exhaust slength_bottom_iff slength_ge_0)
  let ?patl_xsl = "if patl  xsl then xsl - patl else 0"
  have "?rhs = eqpat(shead(sfilter(Λ x. le(slengthx)(slengthpat) andalso prefixxpat)(stailsxs)))"
    by (subst prefix_slength_strengthen) simp
  also have " = eqpat(shead(sfilter(Λ x. prefixxpat)(sfilter(Λ x. le(slengthx)(slengthpat))(stailsxs))))"
    by (simp add: sfilter_sfilter')
  also have " = eqpat(shead(smap(Λ k. sdropkxs)(sfilter(Λ k. prefix(sdropkxs)pat)(sfilter(Λ k. le(slength(sdropkxs))(MkIpatl))(supto(MkI0)(MkIxsl))))))"
    using slengthxs = MkIxsl slengthpat = MkIpatl
    by (simp add: stails_sdrop' sfilter_smap' cfcomp1 zero_Integer_def)
  also have " = eqpat(shead(smap(Λ k. sdropkxs)(sfilter(Λ k. prefix(sdropkxs)pat)(sfilter(Λ k. le(MkIxsl - k)(MkIpatl))(supto(MkI0)(MkIxsl))))))"
    using slengthxs = MkIxsl
    by (subst (2) sfilter_cong[where p'="Λ x. le(MkIxsl - x)(MkIpatl)"]; fastforce simp: zero_Integer_def)
  also have " = If prefix(sdrop(MkI?patl_xsl)xs)pat
                  then eqpat(sdrop(MkI?patl_xsl)xs)
                  else eqpat(shead(smap(Λ k. sdropkxs)(sfilter(Λ x. prefix(sdropxxs)pat)(supto(MkI(?patl_xsl + 1))(MkIxsl)))))"
    using False_False 0  xsl 0  patl
    by (subst sfilter_supto) (auto simp: If_distr one_Integer_def)
  also have " = ?lhs" (is "If ?c then _ else ?else = _")
  proof(cases ?c)
    case TT with slengthxs = MkIxsl slengthpat = MkIpatl
    show ?thesis by (simp add: endswith_eq_sdrop sdrop_neg zero_Integer_def)
  next
    case FF ―‹ Recursive case: the lists generated by supto› are too short ›
    have "?else = shead(smap(Λ x. eqpat(sdropxxs))(sfilter(Λ x. prefix(sdropxxs)pat)(supto(MkI(?patl_xsl + 1))(MkIxsl))))"
      using FF by (subst shead_smap_distr[where f="eqpat", symmetric]) (auto simp: cfcomp1)
    also have " = shead(smap(Λ x. seqxFF)(sfilter(Λ x. prefix(sdropxxs)pat)(supto(MkI(?patl_xsl + 1))(MkIxsl))))"
      using False_False * by (subst smap_cong[OF refl, where f'="Λ x. seqxFF"]) (auto simp: zero_Integer_def split: if_splits)
    also from * FF have " = ?lhs"
      apply (auto 0 0 simp: shead_smap_distr seq_conv_if endswith_eq_sdrop zero_Integer_def dest!: prefix_FF_not_snilD)
        apply (metis (full_types) le_MkI_MkI linorder_not_less order_refl prefix_FF_not_snilD sdrop_all zless_imp_add1_zle)
       using FF apply fastforce
      apply (metis add.left_neutral le_MkI_MkI linorder_not_less order_refl prefix_FF_not_snilD sdrop_0(1) sdrop_all zero_Integer_def zless_imp_add1_zle)
      done
    finally show ?thesis using FF by simp
  qed (simp add: False_False)
  finally show ?thesis ..
qed simp_all

text‹

Bird then generalizes @{term sfilter(Λ x. prefixxpat) oo stails} to @{term split},
where ``split⋅pat⋅xs› splits pat› into two lists us› and vs› so that
@{prop us :@ vs = pat} and us› is the longest suffix of xs› that is a prefix of pat›.''

›

fixrec split :: "[:'a::Eq_def:]  [:'a:]  [:'a:] × [:'a:]" where ―‹ Bird p128 ›
[simp del]: "splitpatxs = If prefixxspat then (xs, sdrop(slengthxs)pat) else splitpat(stailxs)"

lemma split_strict[simp]:
  shows "split = "
    and "splitpat = "
by fixrec_simp+

lemma split_bottom_iff[simp]: "(splitpatxs = )  (pat =   xs = )"
by (cases "pat = "; clarsimp) (induct xs; subst split.unfold; simp)

lemma split_snil[simp]:
  assumes "pat  "
  shows "splitpat[::] = ([::], pat)"
using assms by fixrec_simp

lemma split_pattern: ―‹ Bird p128, observation ›
  assumes "xs  "
  assumes "splitpatxs = (us, vs)"
  shows "us :@ vs = pat"
using assms
proof(cases "pat = ", simp, induct xs arbitrary: us vs)
  case snil then show ?case by (subst (asm) split.unfold) simp
next
  case (scons x xs) then show ?case
    by (subst (asm) (3) split.unfold)
       (fastforce dest: prefix_sdrop_slength simp: If2_def[symmetric] split: If2_splits)
qed simp

lemma endswith_split: ―‹ Bird p128, after defining split›
  shows "endswithpat = snull oo csnd oo splitpat"
proof(rule cfun_eqI)
  fix xs show "endswithpatxs = (snull oo csnd oo splitpat)xs"
  proof(cases "pat = ", simp, induct xs)
    case (scons x xs) then show ?case
      unfolding endswith_def2
      by (subst split.unfold)
         (fastforce dest: prefix_sdrop_prefix_eq simp: If2_def[symmetric] If_distr snull_eq_snil split: If2_splits)
  qed (simp_all add: snull_eq_snil endswith.simps)
qed

lemma split_length_lt:
  assumes "pat  "
  assumes "xs  "
  shows "lt(slength(prod.fst (splitpatxs)))(slengthxs + 1) = TT"
using assms
proof(induct xs)
  case (scons x xs) then show ?case
    by (subst split.unfold)
       (auto simp: If2_def[symmetric] one_Integer_def split: If2_splits elim!: slengthE elim: lt_trans)
qed simp_all

text‹

The predicate p› required by @{thm [source] "Bird_strategy"} is therefore snull oo csnd›. It
remains to find op› and z› such that:

 @{term splitpat[::] = z}
 @{term splitpat(xs :@ [:x:]) = op(splitpatxs)x}
text‹

so that @{term split = sfoldlopz}.

We obtain @{term z = ([::], pat)} directly from the definition of @{term split}.

Bird derives op› on the basis of this crucial observation:

›

lemma split_snoc: ―‹ Bird p128 ›
  shows "splitpat(xs :@ [:x:]) = splitpat(cfst(splitpatxs) :@ [:x:])"
proof(cases "pat = ", simp, cases "x = ", simp, induct xs)
  case (scons x xs) then show ?case
    apply -
    apply (subst (1 3) split.unfold)
    apply (clarsimp simp: If2_def[symmetric] split: If2_splits; intro conjI impI)
      apply (subst split.unfold; fastforce)
     apply (subst split.unfold; fastforce)
    apply (simp add: append_prefixD)
    done
qed simp_all

fixrec ―‹ Bird p129 ›
  op :: "[:'a::Eq_def:]  [:'a:] × [:'a:]  'a  [:'a:] × [:'a:]"
where
[simp del]:
  "oppat(us, vs)x =
     (     If prefix[:x:]vs then (us :@ [:x:], stailvs)
      else If snullus then ([::], pat)
      else oppat(splitpat(stailus))x )"

lemma op_strict[simp]:
  "oppat = "
  "oppat(us, ) = "
  "oppatusvs = "
by fixrec_simp+

text‹

Bird demonstrates that @{constop} is partially correct wrt @{constsplit}, i.e.,
@{prop "oppat(splitpatxs)x  splitpat(xs :@ [:x:])"}. For total correctness we
essentially prove that @{constop} terminates on well-defined arguments with an inductive argument.

›

lemma op_induct[case_names step]:
  fixes usvs:: "[:'a:] × 'b"
  assumes step: "usvs. (usvs'. lt(slength(cfstusvs'))(slength(cfstusvs)) = TT  P usvs')  P usvs"
  shows "P usvs"
proof(induct usvs rule: wf_induct)
  let ?r = "{ (usvs', usvs) |(usvs :: [:'a:] × 'b) (usvs' :: [:'a:] × 'b). lt(slength(cfstusvs'))(slength(cfstusvs)) = TT }"
  show "wf ?r"
  proof(rule wf_subset[OF wf_inv_image[where f="λ(x, _). slengthx", OF wf_subset[OF wf_Integer_ge_less_than[where d=0]]]])
    let ?rslen = "{ (slengthus', slengthus) |(us :: [:'a:]) (us' :: [:'a:]). lt(slengthus')(slengthus) = TT }"
    show "?rslen  Integer_ge_less_than 0"
      apply (clarsimp simp: Integer_ge_less_than_def zero_Integer_def)
      apply (metis Integer.exhaust dist_eq_tr(4) dist_eq_tr(6) lt_Integer_bottom_iff lt_MkI_MkI slength_ge_0)
      done
    show "?r  inv_image ?rslen (λ(x, _). slengthx)" by (auto 0 3)
  qed
  fix usvs
  assume "usvs'. (usvs', usvs)  ?r  P usvs'"
  then show "P usvs"
    by - (rule step; clarsimp; metis eq_fst_iff)
qed

lemma op_induct'[case_names step]:
  assumes step: "us. (us'. lt(slengthus')(slengthus) = TT  P us')  P us"
  shows "P us"
by (rule op_induct[where P="P  prod.fst" and usvs="(us, vs)" for vs::unit, simplified])
   (fastforce intro: step)

lemma split_snoc_op:
  "splitpat(xs :@ [:x:]) = oppat(splitpatxs)x"
proof(induct "splitpatxs" arbitrary: xs rule: op_induct)
  case (step xs) show ?case
  proof(cases "pat = " "xs = " "x = " rule: bool.exhaust[case_product bool.exhaust bool.exhaust])
    case False_False_False
    obtain us vs where *: "splitpatxs = (us, vs)" by fastforce
    from False_False_False * have **: "prefix(us :@ [:x:])pat = prefix[:x:]vs"
      using split_pattern same_prefix_prefix sappend_bottom_iff by blast
    from False_False_False * **
    have ***: "sdrop(slength(us :@ [:x:]))pat = stailvs" if "prefix(us :@ [:x:])pat = TT"
      using sdrop_sappend_same[where xs="us :@ [:x:]"] that
      by (cases vs; clarsimp) (fastforce dest!: split_pattern)
    from False_False_False * ** *** show ?thesis
      apply -
      apply (subst split_snoc)
      apply (subst split.unfold)
      apply (subst op.unfold)
      apply (fastforce simp: If2_def[symmetric] snull_FF_conv split: If2_splits intro: step split_length_lt)
      done
  qed simp_all
qed

lemma split_sfoldl_op:
  assumes "pat  "
  shows "sfoldl(oppat)([::], pat) = splitpat" (is "?lhs = ?rhs")
proof -
  have "?lhsxs = ?rhsxs" for xs
    using assms by (induct xs rule: srev_induct) (simp_all add: split_snoc_op)
  then show ?thesis by (simp add: cfun_eq_iff)
qed

lemma matches_op:
  shows "matchespat = smapcfst oo sfilter(snull oo csnd oo csnd)
                                oo sscanl(Λ (n, usvs) x. (n + 1, oppatusvsx))(0, ([::], pat))" (is "?lhs = ?rhs")
proof(cases "pat = ")
  case True
  then have "?lhsxs = ?rhsxs" for xs by (cases xs; clarsimp)
  then show ?thesis by (simp add: cfun_eq_iff)
next
  case False then show ?thesis
    apply (subst (2) oo_assoc)
    apply (rule Bird_strategy)
    apply (simp_all add: endswith_split split_sfoldl_op oo_assoc)
    done
qed

text‹

Using @{thm [source] "split_sfoldl_op"} we can rewrite @{constop} into a more perspicuous form
that exhibits how KMP handles the failure of the text to continue matching the pattern:

›

fixrec
  op' :: "[:'a::Eq_def:]  [:'a:] × [:'a:]  'a  [:'a:] × [:'a:]"
where
[simp del]:
  "op'pat(us, vs)x =
     (     If prefix[:x:]vs then (us :@ [:x:], stailvs) ― ‹ continue matching ›
      else If snullus then ([::], pat) ― ‹ fail at the start of the pattern: discard x›
      else sfoldl(op'pat)([::], pat)(stailus :@ [:x:]) ― ‹ fail later: discard shead⋅us› and determine where to restart ›
     )"

text‹

Intuitively if x› continues the pattern match then we
extend the @{constsplit} of pat›
recorded in us› and vs›.  Otherwise we
need to find a prefix of pat› to continue matching
with. If we have yet to make any progress (i.e., us =
[::]›) we restart with the entire pat› (aka
z›) and discard x›.  Otherwise, because a
match cannot begin with @{term us :@ [:x:]}, we @{constsplit} pat› (aka z›) by
iterating @{constop'} over @{term
stailus :@ [:x:]}.  The remainder of the
development is about memoising this last computation.

This is not yet the full KMP algorithm as it lacks what we call the
`K' optimisation, which we add in \S\ref{sec:KMP:data_refinement}.
Note that a termination proof for @{const "op'"} in HOL is tricky due
to its use of higher-order nested recursion via @{constsfoldl}.

›

lemma op'_strict[simp]:
  "op'pat = "
  "op'pat(us, ) = "
  "op'patusvs = "
by fixrec_simp+

lemma sfoldl_op'_strict[simp]:
  "op'pat(sfoldl(op'pat)(us, )xs)x = "
by (induct xs arbitrary: x rule: srev_induct) simp_all

lemma op'_op:
  shows "op'patusvsx = oppatusvsx"
proof(cases "pat = " "x = " rule: bool.exhaust[case_product bool.exhaust])
  case True_False then show ?thesis
    apply (subst op'.unfold)
    apply (subst op.unfold)
    apply simp
    done
next
  case False_False then show ?thesis
  proof(induct usvs arbitrary: x rule: op_induct)
    case (step usvs x)
    have *: "sfoldl(op'pat)([::], pat)xs = sfoldl(oppat)([::], pat)xs"
         if "lt(slengthxs)(slength(cfstusvs)) = TT" for xs
    using that
    proof(induct xs rule: srev_induct)
      case (ssnoc x' xs')
      from ssnoc(1,2,4) have "lt(slengthxs')(slength(cfstusvs)) = TT"
        using lt_slength_0(2) lt_trans by auto
      moreover
      from step(2) ssnoc(1,2,4) have "lt(slength(cfst(splitpatxs')))(slength(cfstusvs)) = TT"
        using lt_trans split_length_lt by (auto 10 0)
      ultimately show ?case by (simp add: ssnoc.hyps split_sfoldl_op split_snoc_op step)
    qed simp_all
    from step.prems show ?case
      apply (subst op'.unfold)
      apply (subst op.unfold)
      apply (clarsimp simp: If2_def[symmetric] snull_FF_conv split_sfoldl_op[symmetric] * split: If2_splits)
      apply (clarsimp simp: split_sfoldl_op step split_length_lt)
      done
  qed
qed simp_all


subsection‹ Step 2: Data refinement and the `K' optimisation \label{sec:KMP:data_refinement} ›

text‹

Bird memoises the restart computation in @{constop'} in two steps.  The first reifies
the control structure of @{constop'} into a non-wellfounded tree, which we discuss here. The
second increases the sharing in this tree; see \S\ref{sec:KMP:increase_sharing}.

Briefly, we cache the @{term sfoldl(op'pat)([::], pat)(stailus :@ [:x:])}
computation in @{constop'} by finding a ``representation'' type @{typ "'t"}
for the ``abstract'' type @{typ [:'a::Eq_def:] × [:'a:]}, a
pair of functions @{term rep :: [:'a::Eq_def:] × [:'a:]  't},
@{term abs :: 't  [:'a::Eq_def:] × [:'a:]} where @{prop abs oo rep = ID}, and then
finding a derived form of @{constop'} that works on @{typ "'t"} rather
than @{typ "[:'a::Eq_def:] × [:'a:]"}. We also take the opportunity to add the `K' optimisation in the form of the @{term next}
function.

As such steps are essentially @{emph ‹deus ex machina›}, we try to provide some intuition
after showing the new definitions.

›

domain 'a tree ―‹ Bird p130 ›
  = Null
  | Node (label :: 'a) (lazy left :: "'a tree") (lazy right :: "'a tree") ―‹ Strict in the label @{typ "'a"}

(*<*)

lemma tree_injects'[simp]: ―‹ An unconditional form of @{thm [source] tree.injects}. ›
  "(Nodealr = Nodea'l'r')  (a = a'  (a    l = l'  r = r'))"
by (cases "a = "; clarsimp)

lemma match_Null_match_Node_tree_case: "match_Nulltk1 +++ match_Nodetk2 = tree_casek1k2t"
by (cases t) simp_all

lemma match_Node_mplus_match_Node: "match_Nodexk1 +++ match_Nodexk2 = match_Nodex(Λ v l r. k1vlr +++ k2vlr)"
by (cases x; clarsimp)

lemma tree_case_distr:
  "f =   f(tree_caseght) = tree_case(fg)(Λ x l r. f(hxlr))t"
  "(tree_caseg'h't)z = tree_case(g'z)(Λ x l r. h'xlrz)t"
by (case_tac [!] t) simp_all

lemma tree_case_cong:
  assumes "t = t'"
  assumes "t' = Null  n = n'"
  assumes "v l r. t' = Nodevlr; v    c v l r = c' v l r"
  assumes "cont (λ(x, y, z). c x y z)"
  assumes "cont (λ(x, y, z). c' x y z)"
  shows "tree_casen(Λ v l r. c v l r)t = tree_casen'(Λ v l r. c' v l r)t'"
using assms by (cases t; cases t'; clarsimp simp: prod_cont_iff)

lemma tree_take_smaller:
  assumes "tree_take it = tree_take iu"
  assumes "j  i"
  shows "tree_take jt = tree_take ju"
using assms by (metis min.orderE tree.take_take)

fixrec tree_map' :: "('a  'b)  'a tree  'b tree" where
  "tree_map'fNull = Null"
| "a    tree_map'f(Nodealr) = Node(fa)(tree_map'fl)(tree_map'fr)"

lemma tree_map'_strict[simp]: "tree_map'f = "
by fixrec_simp

lemma tree_map'_ID': "tree_map'IDxs = xs"
by (induct xs) simp_all

lemma tree_map'_ID[simp]: "tree_map'ID = ID"
by (clarsimp simp: cfun_eq_iff tree_map'_ID')

lemma tree_map'_strict_scons[simp]:
  assumes "f = "
  shows "tree_map'f(Nodealr) = Node(fa)(tree_map'fl)(tree_map'fr)"
using assms by (cases "a = "; clarsimp)

lemma tree_map'_comp'[simp]:
  assumes "f = "
  shows "tree_map'f(tree_map'gt) = tree_map'(f oo g)t"
using assms by (induct t) simp_all

lemma tree_map'_comp[simp]:
  assumes "f = "
  shows "tree_map'f oo tree_map'g = tree_map'(f oo g)"
using assms by (clarsimp simp: cfun_eq_iff)

lemma tree_unique: ―‹ Adapted from @{cite [cite_macro=citet] "Matthews:1999"} for \emph{contractive functions}. ›
  fixes x :: "'a tree"
  assumes xfx: "x = fx"
  assumes f: "i t u. tree_take it = tree_take iu
                       tree_take (Suc i)(ft) = tree_take (Suc i)(fu)"
  shows "x = fixf"
proof(rule tree.take_lemma)
  fix i show "tree_take ix = tree_take i(fixf)"
  proof(induct i)
    case (Suc i) from xfx f[OF Suc, folded fix_eq] show ?case by simp
  qed simp
qed

(*>*)

fixrec "next" :: "[:'a::Eq_def:]  ([:'a:] × [:'a:]) tree  ([:'a:] × [:'a:]) tree" where
  "next[::]t = t"
| "x  ; xs   
   next(x :# xs)Null = Null"
| "x  ; xs   
   next(x :# xs)(Node(us, [::])lr) = Node(us, [::])lr"
| "v  ; vs  ; x  ; xs   
   next(x :# xs)(Node(us, v :# vs)lr) = If eqxv then l else Node(us, v :# vs)lr"

fixrec ―‹ Bird p131 ``an even simpler form'', with the `K' optimisation ›
    root2  :: "[:'a::Eq_def:]  ([:'a:] × [:'a:]) tree"
and op2    :: "[:'a:]  ([:'a:] × [:'a:]) tree  'a  ([:'a:] × [:'a:]) tree"
and rep2   :: "[:'a:]  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree"
and left2  :: "[:'a:]  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree"
and right2 :: "[:'a:]  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree"
where
  [simp del]:
  "root2pat = rep2pat([::], pat)"
| "op2patNullx = root2pat"
| "usvs   
   op2pat(Nodeusvslr)x = If prefix[:x:](csndusvs) then r else op2patlx"
| [simp del]:
  "rep2patusvs = Nodeusvs(left2patusvs)(right2patusvs)"
| "left2pat([::], vs) = nextvsNull"
| "u  ; us   
   left2pat(u :# us, vs) = nextvs(sfoldl(op2pat)(root2pat)us)" ―‹ Note the use of @{term op2} and @{constnext}. ›
| "right2pat(us, [::]) = Null" ―‹ Unreachable ›
| "v  ; vs   
   right2pat(us, v :# vs) = rep2pat(us :@ [:v:], vs)"

fixrec abs2 :: "([:'a:] × [:'a:]) tree  [:'a:] × [:'a:]" where
 "usvs    abs2(Nodeusvslr) = usvs"

fixrec matches2 :: "[:'a::Eq_def:]  [:'a:]  [:Integer:]" where
[simp del]: "matches2pat = smapcfst oo sfilter(snull oo csnd oo abs2 oo csnd)
                                     oo sscanl(Λ (n, x) y. (n + 1, op2patxy))(0, root2pat)"

text‹

\begin{figure}
  \centering
  \begin{tikzpicture}[
    shorten >=1pt,
    node distance=1.5cm,
    on grid,
    auto,
    initial text=,
    thick,
    accepting/.style = {rectangle,minimum size=0.3cm}
    ]
    \node[state,accepting] (q_0i)                     {};
    \node[state,initial]   (q_0)  [right=of q_0i]     {$q_0$};
    \node[state]           (q_1)  [right=of q_0]      {$q_1$};
    \node[state]           (q_2)  [right=of q_1]      {$q_2$};
    \node[state]           (q_3)  [right=of q_2]      {$q_3$};
    \node[state]           (q_4)  [right=of q_3]      {$q_4$};
    \node[state,double]    (q_5)  [right=of q_4]      {$q_5$};
    \node[state,accepting] (q_5r) [right=of q_5]      {};

    \path[->] (q_0) edge [bend left] node [above]       {0} (q_1)
              (q_1) edge [bend left] node [above]       {1} (q_2)
              (q_2) edge [bend left] node [above]       {0} (q_3)
              (q_3) edge [bend left] node [above]       {0} (q_4)
              (q_4) edge [bend left] node [above]       {1} (q_5)
              (q_5) edge [bend left] node [above]       {*} (q_5r);

    \path[->] (q_0) edge [bend right]               (q_0i)
              (q_1) edge [bend left]                (q_0)
              (q_2) edge [bend left]                (q_0)  % MP
              (q_2) edge [bend left,color=red]      (q_0i) % K opt
              (q_3) edge [bend left]                (q_1)
              (q_4) edge [bend left]                (q_1)  % MP
              (q_4) edge [bend left,color=red]      (q_0)  % K opt
              (q_5) edge [bend left]                (q_2);
  \end{tikzpicture}
  \caption{An example from citet‹\S2.1› in "CrochemoreRytter:2002". The MP tree for the
    pattern $01001$ is drawn in black: right transitions are labelled
    with a symbol, whereas left transitions are unlabelled. The two
    `K'-optimised left transitions are shown in red. The boxes denote
    @{constNull}. The root node is $q_0$.}
  \label{fig:example_tree}
\end{figure}

This tree can be interpreted as a sort of automaton\footnote{citet‹\S3.1› in "Bird:2012" suggests it can
be thought of as a doubly-linked list, following citet"TakeichiAkama:1990".)}, where @{constop2} goes @{constright} if the pattern
continues with the next element of the text, and @{constleft} otherwise, to determine how much of a prefix of
the pattern could still be in play.  Figure~\ref{fig:example_tree}
visualises such an automaton for the pattern $01001$, used by citet‹\S2.1› in "CrochemoreRytter:2002" to
illustrate the difference between Morris-Pratt (MP) and
Knuth-Morris-Pratt (KMP) preprocessing as we discuss below.  Note that
these are not the classical Mealy machines that correspond to regular
expressions, where all outgoing transitions are labelled with symbols.

The following lemma shows how our sample automaton is encoded as a non-wellfounded tree.

›

lemma concrete_tree_KMP:
  shows "root2[:0::Integer, 1, 0, 0, 1:]
      = (μ q0. Node([::], [:0, 1, 0, 0, 1:])
                     Null
                     (μ q1. Node([:0:], [:1, 0, 0, 1:])
                          q0
                          (μ q2. Node([:0,1:], [:0, 0, 1:])
                               Null ―‹ K optimisation: MP q0›
                               (Node([:0,1,0:], [:0, 1:])
                                     q1
                                     (Node([:0,1,0,0:], [:1:])
                                          q0 ―‹ K optimisation: MP q1›
                                          (Node([:0,1,0,0,1:], [::])q2Null))))))"
(is "?lhs = fix?F")
proof(rule tree_unique) tag invisible›
  note rep2.simps[simp]
  show "?lhs = ?F?lhs"
    apply (subst root2.unfold; simp)
    apply (rule tree_unique; simp)
     apply (intro conjI)
      apply (subst (1) root2.unfold; simp)
      apply (subst (1) root2.unfold; fastforce)
     apply (rule tree_unique; simp)
      apply (intro conjI)
         apply (subst (1) root2.unfold; simp)
         apply (subst (1) root2.unfold; simp)
        apply (subst (1) root2.unfold; simp)
        apply (subst (1) root2.unfold; fastforce)
       apply (subst (1) root2.unfold; simp)
       apply (subst (1) root2.unfold; simp)
       apply (subst (1) root2.unfold; simp)
       apply (subst (1 2) root2.unfold; fastforce)
      apply (subst (1) root2.unfold; simp)
      apply (subst (1) root2.unfold; simp)
      apply (subst (1) root2.unfold; fastforce)
     apply (rename_tac i t u; case_tac i; clarsimp)
     apply (rename_tac t u i; case_tac i; clarsimp)
     apply (rename_tac t u i; case_tac i; clarsimp)
     apply (meson Suc_n_not_le_n linear tree_take_smaller)
    apply (rule parallel_fix_ind; simp)
    apply (rename_tac i t u x y; case_tac i; clarsimp)
    apply (rename_tac i; case_tac i; clarsimp; intro conjI)
     apply (meson Suc_n_not_le_n linear tree_take_smaller)
    apply (rename_tac i; case_tac i; clarsimp)
    apply (rename_tac i; case_tac i; clarsimp)
    apply (meson Suc_n_not_le_n linear tree_take_smaller)
    done
  fix i :: nat
  fix t u :: "([:Integer:] × [:Integer:]) tree"
  assume "tree_take it = tree_take iu"
  then show "tree_take (Suc i)(?Ft) = tree_take (Suc i)(?Fu)"
    apply simp
    apply (rule parallel_fix_ind; simp)
    apply (case_tac i; clarsimp; intro conjI)
     apply (meson Suc_n_not_le_n linear tree_take_smaller)
    apply (rule parallel_fix_ind; simp)
    apply (rename_tac j t0 t1; case_tac j; clarsimp)
    apply (rename_tac j; case_tac j; clarsimp; intro conjI)
     apply (meson Suc_n_not_le_n linear tree_take_smaller)
    apply (rename_tac j; case_tac j; clarsimp; intro conjI)
     apply (meson Suc_n_not_le_n linear tree_take_smaller)
    apply (rename_tac j; case_tac j; clarsimp)
    apply (meson Suc_n_not_le_n linear tree_take_smaller)
    done
qed

text‹

The sharing that we expect from a lazy (call-by-need) evaluator is here implied by the use of
nested fixed points.

The KMP preprocessor is expressed by the @{constleft2} function, where @{constop2} is used
to match the pattern against itself; the use of @{constop2} in @{constmatches2} (``the driver'')
is responsible for matching the (preprocessed) pattern against the text. This formally cashes in
an observation by citet‹\S5› in "vanderWoude:1989", that these two algorithms
are essentially the same, which has eluded other presentations\footnote{For instance, contrast
our shared use of @{constop2} with the separated \texttt{match}
and \texttt{rematch} functions of citet‹Figure~1› in "AgerDanvyRohde:2006".}.

Bird uses @{constNull} on a left path to signal to the driver that it should discard the
current element of the text and restart matching from the beginning of the pattern (i.e,
@{constroot2}). This is a step towards the removal of @{term us} in \S\ref{sec:KMP:step8}.

Note that the @{constNull} at the end of the rightmost path is unreachable: the rightmost
@{constNode} has @{term "vs = [::]"} and therefore @{constop2} always takes the left branch.

The `K' optimisation is perhaps best understood by example. Consider
the automaton in Figure~\ref{fig:example_tree}, and a text beginning
with \texttt{011}. Using the MP (black) transitions we take the path
$\rightarrow q_0 \stackrel{{\mathtt{0}}}{\rightarrow} q_1
\stackrel{\mathtt{1}}{\rightarrow} \overbrace{q_2 \rightarrow q_0
\rightarrow \Box}$. Now, due to the failure of the comparison of the
current element of the text (\texttt{1}) at $q_2$, we can predict that
the (identical) comparison at node $q_0$ will fail as well, and
therefore have $q_2$ left-branch directly to $\Box$. This saves a
comparison in the driver at the cost of another in the preprocessor
(in @{constnext}). These optimisations are the red
arrows in the diagram, and can in general save an arbitrary number of
driver comparisons; consider the pattern $\mathtt{1}^n$ for instance.

More formally, @{constnext} ensures that the heads of
the suffixes of the pattern (@{term vs}) on consecutive
labels on left paths are distinct; see below for a proof of this fact
in our setting, and citet‹\S3.3.4› in
"Gusfield:1997" for a classical account. Unlike Bird's suggestion
(p134), our @{constnext} function is not recursive.

We note in passing that while MP only allows Null› on
the left of the root node, Null› can be on the left of
any KMP node except for the rightmost
(i.e., the one that signals a complete pattern match) where no optimisation is possible.

We proceed with the formalities of the data refinement.

›

schematic_goal root2_op2_rep2_left2_right2_def: ― ‹ Obtain the definition of these functions as a single fixed point ›
  "( root2  :: [:'a::Eq_def:]  ([:'a:] × [:'a:]) tree
   , op2    :: [:'a:]  ([:'a:] × [:'a:]) tree  'a  ([:'a:] × [:'a:]) tree
   , rep2   :: [:'a:]  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree
   , left2  :: [:'a:]  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree
   , right2 :: [:'a:]  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree )
   = fix?F"
unfolding op2_def root2_def rep2_def left2_def right2_def by simp

lemma abs2_strict[simp]:
  "abs2 = "
  "abs2Null = "
by fixrec_simp+

lemma next_strict[simp]:
  "next = "
  "nextxs = "
  "next(x :# xs)(Node(us, )lr) = "
  apply fixrec_simp
 apply (cases xs; fixrec_simp; simp)
apply (cases "x = "; cases "xs = "; cases "us = "; fixrec_simp)
done

lemma next_Null[simp]:
  assumes "xs  "
shows "nextxsNull = Null"
using assms by (cases xs) simp_all

lemma next_snil[simp]:
  assumes "xs  "
  shows "nextxs(Node(us, [::])lr) = Node(us, [::])lr"
using assms by (cases xs) simp_all

lemma op2_rep2_left2_right2_strict[simp]:
  "op2pat = "
  "op2pat(Node(us, )lr) = "
  "op2pat(Nodeusvslr) = "
  "rep2pat = "
  "left2pat(, vs) = "
  "left2pat(us, ) = "
  "right2pat(us, ) = "
      apply fixrec_simp
     apply (cases "us = "; fixrec_simp; simp)
    apply (cases "usvs = "; fixrec_simp; simp)
   apply fixrec_simp
  apply fixrec_simp
 apply (cases us; fixrec_simp)
apply fixrec_simp
done

lemma snd_abs_root2_bottom[simp]: "prod.snd (abs2(root2)) = "
by (simp add: root2.unfold rep2.unfold)

lemma abs_rep2_ID'[simp]: "abs2(rep2patusvs) = usvs"
by (cases "usvs = "; subst rep2.unfold; clarsimp)

lemma abs_rep2_ID: "abs2 oo rep2pat = ID"
by (clarsimp simp: cfun_eq_iff)

lemma rep2_snoc_right2: ―‹ Bird p131 ›
  assumes "prefix[:x:]vs = TT"
  shows "rep2pat(us :@ [:x:], stailvs) = right2pat(us, vs)"
using assms by (cases "x = "; cases vs; clarsimp)

lemma not_prefix_op2_next:
  assumes "prefix[:x:]xs = FF"
  shows "op2pat(nextxs(rep2patusvs))x = op2pat(rep2patusvs)x"
proof -
  obtain us vs where "usvs = (us, vs)" by force
  with assms show ?thesis
    by (cases xs; cases us; clarsimp; cases vs;
        clarsimp simp: rep2.simps prefix_singleton_FF If2_def[symmetric] split: If2_splits)
qed

text‹

Bird's appeal to foldl_fusion› (p130) is too weak to
justify this data refinement as his condition (iii) requires the
worker functions to coincide on all representation values. Concretely
he asks that:

\begin{center}
  @{prop "rep2pat(oppat(abs2t)x) = op2pattx"} ―‹Bird (17.2)›
\end{center}

where t› is an arbitrary tree. This does not hold for junk representations
such as:

\begin{center}
  @{term t = Node(pat, [::])NullNull}
\end{center}

Using worker/wrapper fusion citep"GillHutton:2009" and "Gammie:2011" specialised to @{constsscanl} (@{thm [source] "sscanl_ww_fusion"}) we only
need to establish this identity for valid representations, i.e., when
t› lies under the image of rep2›. In
pictures, we show that this diagram commutes:

\begin{center}
  \begin{tikzcd}[column sep=8em]
    usvs \arrow[r, "Λ usvs. op⋅pat⋅usvs⋅x›"] \arrow[d, "rep2⋅pat›"] & usvs' \arrow[d, "rep2⋅pat›"] \\
    t \arrow[r, "Λ usvs. op2⋅pat⋅usvs⋅x ›"]                         & t'
  \end{tikzcd}
\end{center}

Clearly this result self-composes: after an initial @{term
rep2pat} step, we can repeatedly simulate
@{constop} steps with @{constop2} steps.

›

lemma op_op2_refinement:
  assumes "pat  "
  shows "rep2pat(oppatusvsx) = op2pat(rep2patusvs)x"
proof(cases "x = " "usvs = " rule: bool.exhaust[case_product bool.exhaust])
  case False_False
  then have "x  " "usvs  " by simp_all
  then show ?thesis
  proof(induct usvs arbitrary: x rule: op_induct)
    case (step usvs)
    obtain us vs where usvs: "usvs = (us, vs)" by fastforce
    have *: "sfoldl(op2pat)(root2pat)xs = rep2pat(splitpatxs)" if "lt(slengthxs)(slengthus) = TT" for xs
    using that
    proof(induct xs rule: srev_induct)
      case (ssnoc x xs)
      from ssnoc(1,2,4) have IH: "sfoldl(op2pat)(root2pat)xs = rep2pat(splitpatxs)"
        by - (rule ssnoc; auto intro: lt_trans dest: lt_slength_0)
      obtain us' vs' where us'vs': "splitpatxs = (us', vs')" by fastforce
      from pat   ssnoc(1,2,4) usvs show ?case
        apply (clarsimp simp: split_sfoldl_op[symmetric] IH)
        apply (rule step(1)[simplified abs_rep2_ID', simplified, symmetric])
        using lt_trans split_length_lt split_sfoldl_op apply fastforce+
        done
    qed (fastforce simp: pat   root2.unfold)+
    have **: "If snullus then rep2pat([::], pat) else rep2pat(oppat(splitpat(stailus))x)
            = op2pat(left2pat(us, vs))x" if "prefix[:x:]vs = FF"
    proof(cases us)
      case snil with that show ?thesis
        by simp (metis next_Null op2.simps(1) prefix.simps(1) prefix_FF_not_snilD root2.simps)
    next
      case (scons u' us')
      from pat   scons have "lt(slength(cfst(splitpatus')))(slengthus) = TT"
        using split_length_lt by fastforce
      from pat   x   usvs that scons this show ?thesis
        by (clarsimp simp: * step(1)[simplified abs_rep2_ID'] not_prefix_op2_next)
    qed simp
    from usvs   usvs show ?case
      apply (subst (2) rep2.unfold)
      apply (subst op2.unfold)
      apply (subst op.unfold)
      apply (clarsimp simp: If_distr rep2_snoc_right2 ** cong: If_cong)
      done
  qed
qed (simp_all add: rep2.unfold)

text‹

Therefore the result of this data refinement is extensionally equal to
the specification:

›

lemma data_refinement:
  shows "matches = matches2"
proof(intro cfun_eqI)
  fix pat xs :: "[:'a:]" show "matchespatxs = matches2patxs"
  proof(cases "pat = ")
    case True then show ?thesis by (cases xs; clarsimp simp: matches2.simps)
  next
    case False then show ?thesis
      unfolding matches2.simps
      apply (subst matches_op) ―‹ Continue with previous derivation. ›
      apply (subst sscanl_ww_fusion[where wrap="ID ** abs2" and unwrap="ID ** rep2pat" and f'="Λ (n, x) y. (n + 1, op2patxy)"])
        apply (simp add: abs_rep2_ID)
       apply (simp add: op_op2_refinement)
      apply (simp add: oo_assoc sfilter_smap root2.unfold)
      apply (simp add: oo_assoc[symmetric])
      done
  qed
qed

text‹

This computation can be thought of as a pair coroutines with a
producer (@{constroot2}/@{constrep2})
/ consumer (@{constop2}) structure. It turns out that
laziness is not essential (see \S\ref{sec:implementations}), though it
does depend on being able to traverse incompletely defined trees.

The key difficulty in defining this computation in HOL using present
technology is that @{constop2} is neither terminating
nor @{emph ‹friendly›} in the terminology of citet"BlanchetteEtAl:2017".

While this representation works for automata with this sort of
structure, it is unclear how general it is; in particular it may not
work so well if @{constleft} branches can go forward
as well as back. See also the commentary in citet"HinzeJeuring:2001", who observe that sharing is easily lost, and so
it is probably only useful in ``closed'' settings like the present
one, unless the language is extended in unusual ways citep"JeanninEtAl:2017".

\label{thm:k_property}

We conclude by proving that @{constrep2} produces
trees that have the `K' property, viz that labels on consecutive nodes
on a left path do not start with the same symbol. This also
establishes the productivity of @{constroot2}. The
pattern of proof used here -- induction nested in coinduction --
recurs in \S\ref{sec:KMP:increase_sharing}.

›

coinductive K :: "([:'a::Eq:] × [:'a:]) tree  bool" where
  "K Null"
| " usvs  ; K l; K r;
     v vs. csndusvs = v :# vs  l = Null  (v' vs'. csnd(labell) = v' :# vs'  eqvv' = FF)
     K (Nodeusvslr)"

declare K.intros[intro!, simp]

lemma sfoldl_op2_root2_rep2_split:
  assumes "pat  "
  shows "sfoldl(op2pat)(root2pat)xs = rep2pat(splitpatxs)"
proof(induct xs rule: srev_induct)
  case (ssnoc x xs) with pat   ssnoc show ?case by (clarsimp simp: split_sfoldl_op[symmetric] op_op2_refinement)
qed (simp_all add: pat   root2.unfold)

lemma K_rep2:
  assumes "pat  "
  assumes "us :@ vs = pat"
  shows "K (rep2pat(us, vs))"
using assms
proof(coinduction arbitrary: us vs)
  case (K us vs) then show ?case
  proof(induct us arbitrary: vs rule: op_induct')
    case (step us)
    from step.prems have "us  " "vs  " by auto
    show ?case
    proof(cases us)
      case bottom with us   show ?thesis by simp
    next
      case snil with step.prems show ?thesis by (cases vs; force simp: rep2.simps)
    next
      case (scons u' us')
      from pat   scons us   vs  
      obtain usl vsl where splitl: "splitpatus' = (usl, vsl)" "usl  " "vsl  " "usl :@ vsl = pat"
        by (metis (no_types, opaque_lifting) Rep_cfun_strict1 prod.collapse sappend_strict sappend_strict2 split_pattern)
      from scons obtain l r where r: "rep2pat(us, vs) = Node(us, vs)lr" by (simp add: rep2.simps)
      moreover
      have "(us vs. l = rep2pat(us, vs)  us :@ vs = pat)  K l"
      proof(cases vs)
        case snil with scons splitl r show ?thesis
          by (clarsimp simp: rep2.simps sfoldl_op2_root2_rep2_split)
      next
        case scons
        with pat   us = u' :# us' u'   us'   vs   r splitl show ?thesis
          apply (clarsimp simp: rep2.simps sfoldl_op2_root2_rep2_split)
          apply (cases vsl; cases usl; clarsimp simp: If2_def[symmetric] sfoldl_op2_root2_rep2_split split: If2_splits)
          apply (rename_tac ul' usl')
          apply (cut_tac us'="prod.fst (splitpatusl')" and vs="prod.snd (splitpatusl')" in step(1); clarsimp simp: split_pattern)
           apply (metis fst_conv lt_trans slength.simps(2) split_length_lt step.prems(1))
          apply (erule disjE; clarsimp simp: sfoldl_op2_root2_rep2_split)
          apply (rename_tac b l r)
          apply (case_tac b; clarsimp simp: rep2.simps)
           apply (auto simp: If2_def[symmetric] rep2.simps dest: split_pattern[rotated] split: If2_splits)
          done
      qed (simp add: vs  )
      moreover
      from us :@ vs = pat us   vs   r
      have "(usr vsr. r = rep2pat(usr, vsr)  usr :@ vsr = pat)  K r"
        by (cases vs; clarsimp simp: rep2.simps)
      moreover
      have "l = Null  (v' vs'. csnd(labell) = v' :# vs'  eqvv' = FF)" if "vs = v :# vs'" for v vs'
      proof(cases vsl)
        case snil with us :@ vs = pat us = u' :# us' splitl show ?thesis
          using split_length_lt[where pat=pat and xs=us']
          by (force elim: slengthE simp: one_Integer_def split: if_splits)
      next
        case scons
        from splitl have "lt(slengthusl)(slengthus' + 1) = TT"
          by (metis fst_conv fst_strict split_bottom_iff split_length_lt)
        with scons pat   us = u' :# us' u'   us'   vs   r splitl vs = v :# vs' show ?thesis
          using step(1)[OF _ pat  , where us'="prod.fst (splitpatus')" and vs="prod.snd (splitpatus')"]
          by (clarsimp simp: rep2.simps sfoldl_op2_root2_rep2_split If2_def[symmetric] split: If2_splits)
      qed (simp add: vsl  )
      moreover note pat   us   vs  
      ultimately show ?thesis by auto
    qed
  qed
qed

theorem K_root2:
  assumes "pat  "
  shows "K (root2pat)"
using assms unfolding root2.unfold by (simp add: K_rep2)

text‹

The remaining steps are as follows:

 3. introduce an accumulating parameter (grep›).
 4. inline rep› and simplify.
 5. simplify to Bird's ``simpler forms.''
 6. memoise left›.
 7. simplify, unfold prefix›.
 8. discard us›.
 9. factor out pat›.

›


subsection‹ Step 3: Introduce an accumulating parameter (grep) ›

text‹

Next we prepare for the second memoization step (\S\ref{sec:KMP:increase_sharing})
by introducing an accumulating parameter to @{constrep2} that supplies the value of the left
subtree.

We retain @{constrep2} as a wrapper for now, and inline @{constright2} to speed up
simplification.

›

fixrec ―‹ Bird p131 / p132 ›
    root3  :: "[:'a::Eq_def:]  ([:'a:] × [:'a:]) tree"
and op3    :: "[:'a:]  ([:'a:] × [:'a:]) tree  'a  ([:'a:] × [:'a:]) tree"
and rep3   :: "[:'a:]  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree"
and grep3  :: "[:'a:]  ([:'a:] × [:'a:]) tree  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree"
where
  [simp del]:
  "root3pat = rep3pat([::], pat)"
| "op3patNullx = root3pat"
| "usvs   
   op3pat(Nodeusvslr)x = If prefix[:x:](csndusvs) then r else op3patlx"
| [simp del]: ―‹ Inline @{constleft2}, factor out @{constnext}. ›
  "rep3patusvs = grep3pat(case cfstusvs of [::]  Null | u :# us  sfoldl(op3pat)(root3pat)us)usvs"
| [simp del]:  ―‹ @{constrep2} with @{constleft2} abstracted, @{constright2} inlined. ›
  "grep3patlusvs = Nodeusvs(next(csndusvs)l)(case csndusvs of
          [::]  Null
        | v :# vs  rep3pat(cfstusvs :@ [:v:], vs))"

schematic_goal root3_op3_rep3_grep3_def:
  "( root3  :: [:'a::Eq_def:]  ([:'a:] × [:'a:]) tree
   , op3    :: [:'a:]  ([:'a:] × [:'a:]) tree  'a  ([:'a:] × [:'a:]) tree
   , rep3   :: [:'a:]  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree
   , grep3  :: [:'a:]  ([:'a:] × [:'a:]) tree  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree )
   = fix?F"
unfolding root3_def op3_def rep3_def grep3_def by simp

lemma r3_2:
  "(Λ (root, op, rep, grep). (root, op, rep))
   ( root3  :: [:'a::Eq_def:]  ([:'a:] × [:'a:]) tree
   , op3    :: [:'a:]  ([:'a:] × [:'a:]) tree  'a  ([:'a:] × [:'a:]) tree
   , rep3   :: [:'a:]  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree
   , grep3  :: [:'a:]  ([:'a:] × [:'a:]) tree  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree )
 = (Λ (root, op, rep, left, right). (root, op, rep))
   ( root2  :: [:'a::Eq_def:]  ([:'a:] × [:'a:]) tree
   , op2    :: [:'a:]  ([:'a:] × [:'a:]) tree  'a  ([:'a:] × [:'a:]) tree
   , rep2   :: [:'a::Eq_def:]  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree
   , left2  :: [:'a::Eq_def:]  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree
   , right2 :: [:'a::Eq_def:]  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree )"
unfolding root2_op2_rep2_left2_right2_def root3_op3_rep3_grep3_def
apply (simp add: match_snil_match_scons_slist_case match_Null_match_Node_tree_case slist_case_distr tree_case_distr)
apply (simp add: fix_cprod fix_const) ―‹ Very slow. Sensitive to tuple order due to the asymmetry of fix_cprod›. ›
apply (simp add: slist_case_distr)
done


subsection‹ Step 4: Inline rep ›

text‹

We further simplify by inlining @{constrep3} into @{constroot3} and @{constgrep3}.

›

fixrec
    root4  :: "[:'a::Eq_def:]  ([:'a:] × [:'a:]) tree"
and op4    :: "[:'a:]  ([:'a:] × [:'a:]) tree  'a  ([:'a:] × [:'a:]) tree"
and grep4  :: "[:'a:]  ([:'a:] × [:'a:]) tree  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree"
where
  [simp del]:
  "root4pat = grep4patNull([::], pat)"
| "op4patNullx = root4pat"
| "usvs   
   op4pat(Nodeusvslr)x = If prefix[:x:](csndusvs) then r else op4patlx"
| [simp del]:
  "grep4patlusvs = Nodeusvs(next(csndusvs)l)(case csndusvs of
          [::]  Null
        | v :# vs  grep4pat(case cfstusvs :@ [:v:] of
              [::]  Null ―‹ unreachable ›
            | u :# us  sfoldl(op4pat)(root4pat)us)(cfstusvs :@ [:v:], vs))"

schematic_goal root4_op4_grep4_def:
  "( root4  :: [:'a::Eq_def:]  ([:'a:] × [:'a:]) tree
   , op4    :: [:'a:]  ([:'a:] × [:'a:]) tree  'a  ([:'a:] × [:'a:]) tree
   , grep4  :: [:'a:]  ([:'a:] × [:'a:]) tree  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree )
   = fix?F"
unfolding root4_def op4_def grep4_def by simp

lemma fix_syn4_permute:
  assumes "cont (λ(X1, X2, X3, X4). F1 X1 X2 X3 X4)"
  assumes "cont (λ(X1, X2, X3, X4). F2 X1 X2 X3 X4)"
  assumes "cont (λ(X1, X2, X3, X4). F3 X1 X2 X3 X4)"
  assumes "cont (λ(X1, X2, X3, X4). F4 X1 X2 X3 X4)"
  shows "fix_syn (λ(X1, X2, X3, X4). (F1 X1 X2 X3 X4, F2 X1 X2 X3 X4, F3 X1 X2 X3 X4, F4 X1 X2 X3 X4))
      = (λ(x1, x2, x4, x3). (x1, x2, x3, x4))
            (fix_syn (λ(X1, X2, X4, X3). (F1 X1 X2 X3 X4, F2 X1 X2 X3 X4, F4 X1 X2 X3 X4, F3 X1 X2 X3 X4)))"
by (induct rule: parallel_fix_ind) (use assms in auto simp: prod_cont_iff)

lemma r4_3:
  "( root4  :: [:'a::Eq_def:]  ([:'a:] × [:'a:]) tree
   , op4    :: [:'a:]  ([:'a:] × [:'a:]) tree  'a  ([:'a:] × [:'a:]) tree
   , grep4  :: [:'a:]  ([:'a:] × [:'a:]) tree  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree )
 = (Λ (root, op, rep, grep). (root, op, grep))
   ( root3  :: [:'a::Eq_def:]  ([:'a:] × [:'a:]) tree
   , op3    :: [:'a:]  ([:'a:] × [:'a:]) tree  'a  ([:'a:] × [:'a:]) tree
   , rep3   :: [:'a:]  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree
   , grep3  :: [:'a:]  ([:'a:] × [:'a:]) tree  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree )"
unfolding root3_op3_rep3_grep3_def root4_op4_grep4_def
apply (clarsimp simp: slist_case_distr match_Null_match_Node_tree_case tree_case_distr eta_cfun)
apply (subst fix_syn4_permute; clarsimp simp: fix_cprod fix_const) ―‹ Slow ›
done


subsection‹ Step 5: Simplify to Bird's ``simpler forms'' ›

text‹

The remainder of @{constleft2} in @{constgrep4} can be simplified by transforming the
@{text "case"} scrutinee from @{term "cfstusvs :@ [:v:]"} into @{term "cfstusvs"}.

›

fixrec
    root5  :: "[:'a::Eq_def:]  ([:'a:] × [:'a:]) tree"
and op5    :: "[:'a::Eq_def:]  ([:'a:] × [:'a:]) tree  'a  ([:'a:] × [:'a:]) tree"
and grep5  :: "[:'a:]  ([:'a:] × [:'a:]) tree  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree"
where
  [simp del]:
  "root5pat = grep5patNull([::], pat)"
| "op5patNullx = root5pat"
| "usvs   
   op5pat(Nodeusvslr)x = If prefix[:x:](csndusvs) then r else op5patlx"
| [simp del]:
  "grep5patlusvs = Nodeusvs(next(csndusvs)l)(case csndusvs of
          [::]  Null
        | v :# vs  grep5pat(case cfstusvs of ― ‹ was @{term cfstusvs :@ [:v:]}
              [::]  root5pat
            | u :# us  sfoldl(op5pat)(root5pat)(us :@ [:v:]))(cfstusvs :@ [:v:], vs))"

schematic_goal root5_op5_grep5_def:
  "( root5  :: [:'a::Eq_def:]  ([:'a:] × [:'a:]) tree
   , op5    :: [:'a:]  ([:'a:] × [:'a:]) tree  'a  ([:'a:] × [:'a:]) tree
   , grep5  :: [:'a:]  ([:'a:] × [:'a:]) tree  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree )
   = fix?F"
unfolding root5_def op5_def grep5_def by simp

lemma op5_grep5_strict[simp]:
  "op5pat = "
  "op5pat(Node(us, )lr) = "
  "op5pat(Nodeusvslr) = "
  "grep5patl = "
   apply fixrec_simp
  apply (cases "us = "; fixrec_simp; simp)
 apply (cases "usvs = "; fixrec_simp; simp)
apply fixrec_simp
done

lemma r5_4:
  "( root5  :: [:'a::Eq_def:]  ([:'a:] × [:'a:]) tree
   , op5    :: [:'a::Eq_def:]  ([:'a:] × [:'a:]) tree  'a  ([:'a:] × [:'a:]) tree
   , grep5  :: [:'a:]  ([:'a:] × [:'a:]) tree  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree )
 = ( root4  :: [:'a::Eq_def:]  ([:'a:] × [:'a:]) tree
   , op4    :: [:'a::Eq_def:]  ([:'a:] × [:'a:]) tree  'a  ([:'a:] × [:'a:]) tree
   , grep4  :: [:'a:]  ([:'a:] × [:'a:]) tree  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree )"
unfolding root4_op4_grep4_def root5_op5_grep5_def
by (clarsimp simp: slist_case_distr slist_case_snoc stail_sappend cong: slist_case_cong)


subsection‹ Step 6: Memoize left \label{sec:KMP:increase_sharing} ›

text‹

The last substantial step is to memoise the computation of the left subtrees by tying the knot.

Note this makes the computation of us› in the tree redundant; we remove it in \S\ref{sec:KMP:step8}.

›

fixrec ―‹ Bird p132 ›
    root6  :: "[:'a::Eq_def:]  ([:'a:] × [:'a:]) tree"
and op6    :: "[:'a::Eq_def:]  ([:'a:] × [:'a:]) tree  'a  ([:'a:] × [:'a:]) tree"
and grep6  :: "[:'a:]  ([:'a:] × [:'a:]) tree  [:'a:] × [:'a:]  ([:'a:] × [:'a:]) tree"
where
  [simp del]:
  "root6pat = grep6patNull([::], pat)"
| "op6patNullx = root6pat"
| "usvs   
   op6pat(Nodeusvslr)x = If prefix[:x:](csndusvs) then r else op6patlx"
| [simp del]:
  "grep6patlusvs = Nodeusvs(next(csndusvs)l)(case csndusvs of
          [::]  Null
        | v :# vs  grep6pat(op6patlv)(cfstusvs :@ [:v:], vs))"

schematic_goal root6_op6_grep6_def:
  "( root6  :: [:'a::Eq_def:]  ([:'a:] × [:'a:]) tree
   , op6    :: [:'a:]  ([:'a:] × [:'a:]) tree  'a  ([:'a:] × [:'a:]) tree
   , grep6  :: [:'a:]  ([:'a:] × [:'a:]) tree  [:'a:] × [:'a:]  ([:'a:] × [:'a