Theory KnuthMorrisPratt
section ‹Knuth-Morris-Pratt fast string search algorithm›
text ‹Development based on Filliâtre's verification using Why3›
text ‹Many thanks to Christian Zimmerer for versions of the algorithms
as while loops›
theory KnuthMorrisPratt imports "Collections.Diff_Array" "HOL-Library.While_Combinator"
begin
subsection ‹General definitions›
abbreviation "array ≡ new_array"
abbreviation length_array :: "'a array ⇒ nat" (‹∥_∥›)
where "length_array ≡ array_length"
notation array_get (infixl ‹!!› 100)
notation array_set (‹_[_ ::= _]› [1000,0,0] 900)
definition matches :: "'a array ⇒ nat ⇒ 'a array ⇒ nat ⇒ nat ⇒ bool"
where "matches a i b j n = (i+n ≤ ∥a∥ ∧ j+n ≤ ∥b∥
∧ (∀k<n. a!!(i+k) = b!!(j+k)))"
lemma matches_empty [simp]: "matches a i b j 0 ⟷ i ≤ ∥a∥ ∧ j ≤ ∥b∥"
by (simp add: matches_def)
lemma matches_right_extension:
"⟦matches a i b j n;
Suc (i+n) ≤ ∥a∥;
Suc (j+n) ≤ ∥b∥;
a!!(i+n) = b!!(j+n)⟧ ⟹
matches a i b j (Suc n)"
by (auto simp: matches_def less_Suc_eq)
lemma matches_contradiction_at_first:
"⟦0 < n; a!!i ≠ b!!j⟧ ⟹ ¬ matches a i b j n"
by (auto simp: matches_def)
lemma matches_contradiction_at_i:
"⟦a!!(i+k) ≠ b!!(j+k); k < n⟧ ⟹ ¬ matches a i b j n"
by (auto simp: matches_def)
lemma matches_right_weakening:
"⟦matches a i b j n; n' ≤ n⟧ ⟹ matches a i b j n'"
by (auto simp: matches_def)
lemma matches_left_weakening_add:
assumes "matches a i b j n" "k≤n"
shows "matches a (i+k) b (j+k) (n-k)"
using assms by (auto simp: matches_def less_diff_conv algebra_simps)
lemma matches_left_weakening:
assumes "matches a (i - (n - n')) b (j - (n - n')) n"
and "n' ≤ n"
and "n - n' ≤ i"
and "n - n' ≤ j"
shows "matches a i b j n'"
by (metis assms diff_diff_cancel diff_le_self le_add_diff_inverse2 matches_left_weakening_add)
lemma matches_sym: "matches a i b j n ⟹ matches b j a i n"
by (simp add: matches_def)
lemma matches_trans:
"⟦matches a i b j n; matches b j c k n⟧ ⟹ matches a i c k n"
by (simp add: matches_def)
text ‹Denotes the maximal @{term "n < j"} such that the first @{term n} elements of @{term p}
match the last @{term n} elements of @{term p}[0..@{term "j-1"}]
The first @{term n} characters of the pattern have a copy starting at @{term "j-n"}.›
definition is_next :: "'a array ⇒ nat ⇒ nat ⇒ bool" where
"is_next p j n =
(n < j ∧ matches p (j-n) p 0 n ∧ (∀m. n < m ∧ m < j ⟶ ¬ matches p (j-m) p 0 m))"
lemma next_iteration:
assumes "matches a (i-j) p 0 j" "is_next p j n" "j ≤ i"
shows "matches a (i-n) p 0 n"
proof -
have "matches a (i-n) p (j-n) n"
using assms by (auto simp: algebra_simps is_next_def intro: matches_left_weakening [where n=j])
moreover have "matches p (j-n) p 0 n"
using is_next_def assms by blast
ultimately show ?thesis
using matches_trans by blast
qed
lemma next_is_maximal:
assumes "matches a (i-j) p 0 j" "is_next p j n"
and "j ≤ i" "n < m" "m < j"
shows "¬ matches a (i-m) p 0 m"
proof -
have "matches a (i-m) p (j-m) m"
by (rule matches_left_weakening [where n=j]) (use assms in auto)
with assms show ?thesis
by (meson is_next_def matches_sym matches_trans)
qed
text ‹Filliâtre's version of the lemma above›
corollary next_is_maximal':
assumes match: "matches a (i-j) p 0 j" "is_next p j n"
and more: "j ≤ i" "i-j < k" "k < i-n"
shows "¬ matches a k p 0 ∥p∥"
proof -
have "¬ matches a k p 0 (i-k)"
using next_is_maximal [OF match] more
by (metis add.commute diff_diff_cancel diff_le_self le_trans less_diff_conv less_or_eq_imp_le)
moreover have "i-k < ∥p∥"
using assms by (auto simp: matches_def)
ultimately show ?thesis
using matches_right_weakening nless_le by blast
qed
lemma next_1_0 [simp]: "is_next p 1 0 ⟷ 1 ≤ ∥p∥"
by (auto simp add: is_next_def matches_def)
subsection ‹The Build-table routine›
definition buildtab_step :: "'a array ⇒ nat array ⇒ nat ⇒ nat ⇒ nat array × nat × nat" where
"buildtab_step p nxt i j =
(if p!!i = p!!j then (nxt[Suc i::=Suc j], Suc i, Suc j)
else if j=0 then (nxt[Suc i::=0], Suc i, j)
else (nxt, i, nxt!!j))"
text ‹The conjunction of the invariants given in the Why3 development›
definition buildtab_invariant :: "'a array ⇒ nat array ⇒ nat ⇒ nat ⇒ bool" where
"buildtab_invariant p nxt i j =
(∥nxt∥ = ∥p∥ ∧ i ≤ ∥p∥
∧ j<i ∧ matches p (i-j) p 0 j
∧ (∀k. 0 < k ∧ k ≤ i ⟶ is_next p k (nxt!!k))
∧ (∀k. Suc j < k ∧ k < Suc i ⟶ ¬ matches p (Suc i - k) p 0 k))"
text ‹The invariant trivially holds upon initialisation›
lemma buildtab_invariant_init: "∥p∥ ≥ 2 ⟹ buildtab_invariant p (array 0 ∥p∥) 1 0"
by (auto simp: buildtab_invariant_def is_next_def)
subsubsection ‹The invariant holds after an iteration›
text ‹each conjunct is proved separately›
lemma length_invariant:
shows "let (nxt',i',j') = buildtab_step p nxt i j in ∥nxt'∥ = ∥nxt∥"
by (simp add: buildtab_step_def)
lemma i_invariant:
assumes "Suc i < m"
shows "let (nxt',i',j') = buildtab_step p nxt i j in i' ≤ m"
using assms by (simp add: buildtab_step_def)
lemma ji_invariant:
assumes "buildtab_invariant p nxt i j"
shows "let (nxt',i',j') = buildtab_step p nxt i j in j'<i'"
proof -
have j: "0 < j ⟹ nxt !! j < j"
using assms by (simp add: buildtab_invariant_def is_next_def)
show ?thesis
using assms by (auto simp add: buildtab_invariant_def buildtab_step_def intro: order.strict_trans j)
qed
lemma matches_invariant:
assumes "buildtab_invariant p nxt i j" and "Suc i < ∥p∥"
shows "let (nxt',i',j') = buildtab_step p nxt i j in matches p (i' - j') p 0 j'"
using assms by (auto simp: buildtab_invariant_def buildtab_step_def matches_right_extension intro: next_iteration)
lemma is_next_invariant:
assumes "buildtab_invariant p nxt i j" and "Suc i < ∥p∥"
shows "let (nxt',i',j') = buildtab_step p nxt i j in ∀k. 0 < k ⟶ k ≤ i' ⟶ is_next p k (nxt'!!k)"
proof (cases "p!!i = p!!j")
case True
with assms have "matches p (i-j) p 0 (Suc j)"
by (simp add: buildtab_invariant_def matches_right_extension)
then have "is_next p (Suc i) (Suc j)"
using assms by (auto simp: is_next_def buildtab_invariant_def)
with True assms show ?thesis
by (simp add: buildtab_invariant_def buildtab_step_def array_get_array_set_other le_Suc_eq)
next
case neq: False
show ?thesis
proof (cases "j=0")
case True
then have "¬ matches p (i-j) p 0 (Suc j)"
using matches_contradiction_at_first neq by fastforce
with True assms have "is_next p (Suc i) 0"
unfolding is_next_def buildtab_invariant_def
by (metis Suc_leI diff_Suc_Suc diff_zero matches_empty nat_less_le zero_less_Suc)
with assms neq show ?thesis
by (simp add: buildtab_invariant_def buildtab_step_def array_get_array_set_other le_Suc_eq)
next
case False
with assms neq show ?thesis
by (simp add: buildtab_invariant_def buildtab_step_def)
qed
qed
lemma non_matches_aux:
assumes "Suc (Suc j) < k" "matches p (Suc (Suc i) - k) p 0 k"
shows "matches p (Suc i - (k - Suc 0)) p 0 (k - Suc 0)"
using matches_right_weakening assms by fastforce
lemma non_matches_invariant:
assumes bt: "buildtab_invariant p nxt i j" and "∥p∥ ≥ 2" "Suc i < ∥p∥"
shows "let (nxt',i',j') = buildtab_step p nxt i j in ∀k. Suc j' < k ⟶ k < Suc i' ⟶ ¬ matches p (Suc i' - k) p 0 k"
proof (cases "p!!i = p!!j")
case True
with non_matches_aux bt show ?thesis
by (fastforce simp add: Suc_less_eq2 buildtab_step_def buildtab_invariant_def)
next
case neq: False
have "j < i"
using bt by (auto simp: buildtab_invariant_def)
then have no_match_Sj: "¬ matches p (Suc i - Suc j) p 0 (Suc j)"
using neq by (force simp: matches_def)
show ?thesis
proof (cases "j=0")
case True
have "¬ matches p (Suc (Suc i) - k) p 0 k"
if "Suc 0 < k" and "k < Suc (Suc i)" for k
proof (cases "k = Suc (Suc 0)")
case True
with assms neq that show ?thesis
by (auto simp add: matches_contradiction_at_first ‹j=0›)
next
case False
then have "Suc 0 < k - Suc 0"
using that by linarith
with bt that have "¬ matches p (Suc i - (k - Suc 0)) p 0 (k - Suc 0)"
using True by (force simp add: buildtab_invariant_def)
then show ?thesis
by (metis False Suc_lessI non_matches_aux that(1))
qed
with True show ?thesis
by (auto simp: buildtab_invariant_def buildtab_step_def)
next
case False
then have "0 < j"
by auto
have False if lessK: "Suc (nxt!!j) < k" and "k < Suc i" and contra: "matches p (Suc i - k) p 0 k" for k
proof (cases "Suc j < k")
case True
then show ?thesis
using bt that by (auto simp: buildtab_invariant_def)
next
case False
then have "k ≤ j"
using less_Suc_eq_le no_match_Sj contra by fastforce
obtain k' where k': "k = Suc k'" "k' < i"
using ‹k < Suc i› lessK not0_implies_Suc by fastforce
have "is_next p j (nxt!!j)"
using bt that ‹j>0› by (auto simp: buildtab_invariant_def)
with no_match_Sj k' have "¬ matches p (j - k') p 0 k'"
by (metis Suc_less_eq ‹k ≤ j› is_next_def lessK less_Suc_eq_le)
moreover
have "matches p 0 p (i-j) j"
using bt buildtab_invariant_def by (metis matches_sym)
then have "matches p (j-k') p (i - k') k'"
using ‹j<i› False k' matches_left_weakening
by (smt (verit, best) Nat.diff_diff_eq Suc_leI Suc_le_lessD ‹k ≤ j› diff_diff_cancel diff_is_0_eq lessI nat_less_le)
moreover have "matches p (i - k') p 0 k'"
using contra k' matches_right_weakening by fastforce
ultimately show False
using matches_trans by blast
qed
with assms neq False show ?thesis
by (auto simp: buildtab_invariant_def buildtab_step_def)
qed
qed
lemma buildtab_invariant:
assumes ini: "buildtab_invariant p nxt i j"
and "Suc i < ∥p∥" "(nxt',i',j') = buildtab_step p nxt i j"
shows "buildtab_invariant p nxt' i' j'"
unfolding buildtab_invariant_def
using assms i_invariant [of concl: p nxt i j] length_invariant [of p nxt i j]
ji_invariant [OF ini] matches_invariant [OF ini] non_matches_invariant [OF ini] is_next_invariant [OF ini]
by (simp add: buildtab_invariant_def split: prod.split_asm)
subsubsection ‹The build-table loop and its correctness›
text ‹Declaring a partial recursive function with the tailrec option
relaxes the need for a termination proof, because a tail-recursive recursion
equation can never cause inconsistency.›
subsubsection ‹The build-table loop and its correctness›
partial_function (tailrec) buildtab :: "'a array ⇒ nat array ⇒ nat ⇒ nat ⇒ nat array" where
"buildtab p nxt i j =
(if Suc i < ∥p∥
then let (nxt',i',j') = buildtab_step p nxt i j in buildtab p nxt' i' j'
else nxt)"
declare buildtab.simps[code]
text ‹Nevertheless, termination must eventually be shown:
to use induction to reason about executions. We do so by defining
a well founded relation. Termination proofs are by well-founded induction.›
definition "rel_buildtab m = inv_image (lex_prod (measure (λi. m-i)) (measure id)) snd"
lemma wf_rel_buildtab: "wf (rel_buildtab m)"
unfolding rel_buildtab_def
by (auto intro: wf_same_fst)
lemma buildtab_correct:
assumes k: "0<k ∧ k < ∥p∥" and ini: "buildtab_invariant p nxt i j"
shows "is_next p k (buildtab p nxt i j !! k)"
using ini
proof (induction "(nxt,i,j)" arbitrary: nxt i j rule: wf_induct_rule[OF wf_rel_buildtab [of "∥p∥"]])
case (1 nxt i j)
show ?case
proof (cases "Suc i < ∥p∥")
case True
then obtain nxt' i' j'
where eq: "(nxt', i', j') = buildtab_step p nxt i j" and invar': "buildtab_invariant p nxt' i' j'"
using "1.prems" buildtab_invariant by (metis surj_pair)
then have "j>0 ⟹ nxt'!!j < j"
using "1.prems"
by (auto simp: buildtab_invariant_def is_next_def buildtab_step_def split: if_split_asm)
then have decreasing: "((nxt', i', j'), nxt, i, j) ∈ rel_buildtab ∥p∥"
using eq True by (auto simp: rel_buildtab_def buildtab_step_def split: if_split_asm)
show ?thesis
using "1.hyps" [OF decreasing invar'] "1.prems" eq True
by(auto simp add: buildtab.simps[of p nxt] split: prod.splits)
next
case False
with 1 k show ?thesis
by (auto simp: buildtab_invariant_def buildtab.simps)
qed
qed
text ‹Before building the table, check for the degenerate case›
definition table :: "'a array ⇒ nat array" where
"table p = (if ∥p∥ > 1 then buildtab p (array 0 ∥p∥) 1 0
else array 0 ∥p∥)"
declare table_def[code]
lemma is_next_table:
assumes "0 < j ∧ j < ∥p∥"
shows "is_next p j (table p !!j)"
using buildtab_correct[of _ p] buildtab_invariant_init[of p] assms by (simp add: table_def)
subsubsection ‹Linearity of @{term buildtabW}›
partial_function (tailrec) T_buildtab :: "'a array ⇒ nat array ⇒ nat ⇒ nat ⇒ nat ⇒ nat" where
"T_buildtab p nxt i j t =
(if Suc i < ∥p∥
then let (nxt',i',j') = buildtab_step p nxt i j in T_buildtab p nxt' i' j' (Suc t)
else t)"
lemma T_buildtab_correct:
assumes ini: "buildtab_invariant p nxt i j"
shows "T_buildtab p nxt i j t ≤ 2*∥p∥ - 2*i + j + t"
using ini
proof (induction "(nxt,i,j)" arbitrary: nxt i j t rule: wf_induct_rule[OF wf_rel_buildtab [of "∥p∥"]])
case 1
have *: "Suc (T_buildtab p nxt' i' j' t) ≤ 2*∥p∥ - 2*i + j + t"
if eq: "buildtab_step p nxt i j = (nxt', i', j')" and "Suc i < ∥p∥"
for nxt' i' j' t
proof -
have invar': "buildtab_invariant p nxt' i' j'"
using "1.prems" buildtab_invariant that by fastforce
then have nextj: "j>0 ⟹ nxt'!!j < j"
using eq "1.prems"
by (auto simp: buildtab_invariant_def is_next_def buildtab_step_def split: if_split_asm)
then have decreasing: "((nxt', i', j'), nxt, i, j) ∈ rel_buildtab ∥p∥"
using that by (auto simp: rel_buildtab_def same_fst_def buildtab_step_def split: if_split_asm)
then have "T_buildtab p nxt' i' j' t ≤ 2 * ∥p∥ - 2 * i' + j' + t"
using "1.hyps" invar' by blast
then show ?thesis
using "1.prems" that nextj
by (force simp: T_buildtab.simps [of p nxt' i' j'] buildtab_step_def split: if_split_asm)
qed
show ?case
using * [where t = "Suc t"] by (auto simp add: T_buildtab.simps split: prod.split)
qed
lemma T_buildtab_linear:
assumes "2 ≤ ∥p∥"
shows "T_buildtab p (array 0 ∥p∥) 1 0 0 ≤ 2*(∥p∥ - 1)"
using assms T_buildtab_correct [OF buildtab_invariant_init, of p 0] by auto
subsection ‹The actual string search algorithm›
definition
"KMP_step p nxt a i j =
(if a!!i = p!!j then (Suc i, Suc j)
else if j=0 then (Suc i, 0) else (i, nxt!!j))"
text ‹The conjunction of the invariants given in the Why3 development›
definition KMP_invariant :: "'a array ⇒ 'a array ⇒ nat ⇒ nat ⇒ bool" where
"KMP_invariant p a i j =
(j ≤ ∥p∥ ∧ j≤i ∧ i ≤ ∥a∥ ∧ matches a (i-j) p 0 j
∧ (∀k < i-j. ¬ matches a k p 0 ∥p∥))"
text ‹The invariant trivially holds upon initialisation›
lemma KMP_invariant_init: "KMP_invariant p a 0 0"
by (auto simp: KMP_invariant_def)
text ‹The invariant holds after an iteration›
lemma KMP_invariant:
assumes ini: "KMP_invariant p a i j"
and j: "j < ∥p∥" and i: "i < ∥a∥"
shows "let (i',j') = KMP_step p (table p) a i j in KMP_invariant p a i' j'"
proof (cases "a!!i = p!!j")
case True
then show ?thesis
using assms by (simp add: KMP_invariant_def KMP_step_def matches_right_extension)
next
case neq: False
show ?thesis
proof (cases "j=0")
case True
with neq assms show ?thesis
by (simp add: matches_contradiction_at_first KMP_invariant_def KMP_step_def less_Suc_eq)
next
case False
then have is_nxt: "is_next p j (table p !!j)"
using assms is_next_table j by blast
then have "table p !! j ≤ j"
by (simp add: is_next_def)
moreover have "matches a (i - table p !! j) p 0 (table p !! j)"
by (meson is_nxt KMP_invariant_def ini next_iteration)
moreover
have False if k: "k < i - table p !! j" and ma: "matches a k p 0 ∥p∥" for k
proof -
have "k ≠ i-j"
by (metis KMP_invariant_def add_0 ini j le_add_diff_inverse2 ma matches_contradiction_at_i neq)
then show False
by (meson KMP_invariant_def ini is_nxt k linorder_cases ma next_is_maximal')
qed
ultimately show ?thesis
using neq assms False by (auto simp: KMP_invariant_def KMP_step_def)
qed
qed
text ‹The first three arguments are precomputed so that they are not part of the inner loop.›
partial_function (tailrec) search :: "nat ⇒ nat ⇒ nat array ⇒ 'a array ⇒ 'a array ⇒ nat ⇒ nat ⇒ nat * nat" where
"search m n nxt p a i j =
(if j < m ∧ i < n then let (i',j') = KMP_step p nxt a i j in search m n nxt p a i' j'
else (i,j))"
declare search.simps[code]
definition "rel_KMP n = lex_prod (measure (λi. n-i)) (measure id)"
lemma wf_rel_KMP: "wf (rel_KMP n)"
unfolding rel_KMP_def by (auto intro: wf_same_fst)
text ‹Also expresses the absence of a match, when @{term "r = ∥a∥"}›
definition first_occur :: "'a array ⇒ 'a array ⇒ nat ⇒ bool"
where "first_occur p a r = ((r < ∥a∥ ⟶ matches a r p 0 ∥p∥) ∧ (∀k<r. ¬ matches a k p 0 ∥p∥))"
lemma KMP_correct:
assumes ini: "KMP_invariant p a i j"
defines [simp]: "nxt ≡ table p"
shows "let (i',j') = search ∥p∥ ∥a∥ nxt p a i j in first_occur p a (if j' = ∥p∥ then i' - ∥p∥ else i')"
using ini
proof (induction "(i,j)" arbitrary: i j rule: wf_induct_rule[OF wf_rel_KMP [of "∥a∥"]])
case (1 i j)
then have ij: "j ≤ ∥p∥" "j ≤ i" "i ≤ ∥a∥"
and match: "matches a (i - j) p 0 j"
and nomatch: "(∀k<i - j. ¬ matches a k p 0 ∥p∥)"
by (auto simp: KMP_invariant_def)
show ?case
proof (cases "j < ∥p∥ ∧ i < ∥a∥")
case True
have "first_occur p a (if j'' = ∥p∥ then i'' - ∥p∥ else i'')"
if eq: "KMP_step p (table p) a i j = (i', j')" and eq': "search ∥p∥ ∥a∥ nxt p a i' j' = (i'',j'')"
for i' j' i'' j''
proof -
have decreasing: "((i',j'), i, j) ∈ rel_KMP ∥a∥"
using that is_next_table [of j] True
by (auto simp: rel_KMP_def KMP_step_def is_next_def split: if_split_asm)
show ?thesis
using "1.hyps" [OF decreasing] "1.prems" KMP_invariant that True by fastforce
qed
with True show ?thesis
by (smt (verit, best) case_prodI2 nxt_def prod.case_distrib search.simps)
next
case False
have "False" if "matches a k p 0 ∥p∥" "j < ∥p∥" "i = ∥a∥" for k
proof -
have "∥p∥+k ≤ i"
using that by (simp add: matches_def)
with that nomatch show False by auto
qed
with False ij show ?thesis
apply (simp add: first_occur_def split: prod.split)
by (metis le_less_Suc_eq match nomatch not_less_eq prod.inject search.simps)
qed
qed
definition KMP_search :: "'a array ⇒ 'a array ⇒ nat × nat" where
"KMP_search p a = search ∥p∥ ∥a∥ (table p) p a 0 0"
declare KMP_search_def[code]
lemma KMP_search:
"(i,j) = KMP_search p a ⟹ first_occur p a (if j = ∥p∥ then i - ∥p∥ else i)"
unfolding KMP_search_def
using KMP_correct[OF KMP_invariant_init[of p a]] by auto
subsection ‹Examples›
text ‹Building the table, examples from the KMP paper and from Cormen et al.›
definition "Knuth_pattern = array_of_list [1,2,3,1,2,3,1,3,1,2::nat]"
value "list_of_array (table Knuth_pattern)"
definition "CLR_pattern = array_of_list [1,2,1,2,1,2,1,2,3,1::nat]"
value "list_of_array (table CLR_pattern)"
text ‹Worst-case string searches›
definition bad_list :: "nat ⇒ nat list"
where "bad_list n = replicate n 0 @ [Suc 0]"
definition "bad_pattern = array_of_list (bad_list 1000)"
definition "bad_string = array_of_list (bad_list 2000000)"
definition "worse_string = array_of_list (replicate 2000000 (0::nat))"
definition "lousy_string = array_of_list (concat (replicate 2002 (bad_list 999)))"
value "list_of_array (table bad_pattern)"
text ‹A successful search›
value "KMP_search bad_pattern bad_string"
text ‹The search above from the specification alone, i.e. brute-force›
lemma "matches bad_string (2000001-1001) bad_pattern 0 1001"
by eval
text ‹Unsuccessful searches›
value "KMP_search bad_pattern worse_string"
text ‹The search above from the specification alone, i.e. brute-force›
lemma "∀k<2000000. ¬ matches worse_string k bad_pattern 0 1001"
by eval
value "KMP_search lousy_string bad_string"
lemma "∀k < ∥lousy_string∥. ¬ matches lousy_string k bad_pattern 0 1001"
by eval
subsection ‹Alternative approach, expressing the algorithms as while loops›
definition buildtabW:: "'a array ⇒ nat array ⇒ nat ⇒ nat ⇒ nat array option" where
"buildtabW p nxt i j ≡
map_option fst (while_option (λ(_, i', _). Suc i' < ∥p∥)
(λ(nxt', i', j'). buildtab_step p nxt' i' j')
(nxt, i, j))"
lemma buildtabW_halts:
assumes "buildtab_invariant p nxt i j"
shows "∃y. buildtabW p nxt i j = Some y"
proof -
have "∃y. (λp nxt i j. while_option (λ(_, i', _). Suc i' < ∥p∥)
(λ(nxt', i', j'). buildtab_step p nxt' i' j')
(nxt, i, j)) p nxt i j = Some y"
proof (rule measure_while_option_Some[of "λ(nxt, i, j). buildtab_invariant p nxt i j" _ _
"(λp (nxt, i, j). 2 * ∥p∥ - 2 * i + j) p"],
clarify, rule conjI, goal_cases)
case (2 nxt i j)
then show ?case by (auto simp: buildtab_step_def buildtab_invariant_def matches_def is_next_def)
qed (fastforce simp: assms buildtab_invariant)+
then show ?thesis unfolding buildtabW_def by blast
qed
lemma buildtabW_correct:
assumes k: "0<k ∧ k < ∥p∥" and ini: "buildtab_invariant p nxt i j"
shows "is_next p k (the (buildtabW p nxt i j) !! k)"
proof -
obtain nxt' i' j' where †:
"while_option (λ(_, i', _). Suc i' < ∥p∥) (λ(nxt', i', j'). buildtab_step p nxt' i' j') (nxt, i, j) = Some (nxt', i', j')"
using buildtabW_halts[OF ini] unfolding buildtabW_def by fast
from while_option_rule[OF _ †, of "λ(nxt, i, j). buildtab_invariant p nxt i j"]
have "buildtab_invariant p nxt' i' j'" using buildtab_invariant ini by fastforce
with while_option_stop[OF †] † show ?thesis
using assms k by (auto simp: is_next_def matches_def buildtab_invariant_def buildtabW_def)
qed
subsubsection ‹Linearity of @{term buildtabW}›
definition T_buildtabW :: "'a array ⇒ nat array ⇒ nat ⇒ nat ⇒ nat ⇒ nat option" where
"T_buildtabW p nxt i j t ≡ map_option (λ(_, _, _, r). r)
(while_option (λ(_, i, _, _). Suc i < ∥p∥)
(λ(nxt, i, j, t). let (nxt', i', j') = buildtab_step p nxt i j in (nxt', i', j', Suc t))
(nxt, i, j, t))"
lemma T_buildtabW_halts:
assumes "buildtab_invariant p nxt i j"
shows "∃y. T_buildtabW p nxt i j t = Some y"
proof -
have "∃y. (while_option (λ(_, i, _, _). Suc i < ∥p∥)
(λ(nxt, i, j, t). let (nxt', i', j') = buildtab_step p nxt i j in (nxt', i', j', Suc t))
(nxt, i, j, t)) = Some y"
proof (intro measure_while_option_Some[of "λ(nxt, i, j, t). buildtab_invariant p nxt i j" _ _
"(λp (nxt, i, j, t). 2 * ∥p∥ - 2 * i + j) p"], clarify, rule conjI, goal_cases)
case (2 nxt i j t)
then show ?case by (auto simp: buildtab_step_def buildtab_invariant_def is_next_def)
qed (fastforce simp: assms buildtab_invariant split: prod.splits)+
then show ?thesis unfolding T_buildtabW_def by blast
qed
lemma T_buildtabW_correct:
assumes ini: "buildtab_invariant p nxt i j"
shows "the (T_buildtabW p nxt i j t) ≤ 2*∥p∥ - 2*i + j + t"
proof -
let ?b = "(λ(nxt', i', j', t'). Suc i' < ∥p∥)"
let ?c = "(λ(nxt, i, j, t). let (nxt', i', j') = buildtab_step p nxt i j in (nxt', i', j', Suc t))"
let ?s = "(nxt, i, j, t)"
let ?P1 = "λ(nxt', i', j', t'). buildtab_invariant p nxt' i' j'
∧ (if Suc i' < ∥p∥ then Suc t' else t') ≤ 2 * ∥p∥ - (2 * i' - j') + t'"
let ?P2 = "λ(nxt', i', j', t'). buildtab_invariant p nxt' i' j'
∧ 2 * ∥p∥ - 2 * i' + j' + t' ≤ 2 * ∥p∥ - 2 * i + j + t"
obtain nxt' i' j' t' where †: "(while_option ?b ?c ?s) = Some (nxt', i', j', t')"
using T_buildtabW_halts[OF ini] unfolding T_buildtabW_def by fast
have 1: "(⋀s. ?P1 s ⟹ ?b s ⟹ ?P1 (?c s))" proof (clarify, intro conjI, goal_cases)
case (2 nxt⇩1 i⇩1 j⇩1 t⇩1 nxt⇩2 i⇩2 j⇩2 t⇩2)
then show ?case
by (auto simp: buildtab_step_def split: if_split_asm)
qed (insert buildtab_invariant, fastforce split: prod.splits)
have P1: "?P1 ?s" using ini by auto
from while_option_rule[OF 1 † P1]
have invar1: "buildtab_invariant p nxt' i' j'" and
invar2: "t' ≤ 2 * ∥p∥ - (2 * i' - j') + t'" by blast (simp add: while_option_stop[OF †])
have "?P2 (nxt', i', j', t')" proof (rule while_option_rule[OF _ †], clarify, intro conjI, goal_cases)
case (1 nxt⇩1 i⇩1 j⇩1 t⇩1 nxt⇩2 i⇩2 j⇩2 t⇩2)
with buildtab_invariant[OF 1(3)] show invar: ?case by (auto split: prod.splits)
next
case (2 nxt⇩1 i⇩1 j⇩1 t⇩1 nxt⇩2 i⇩2 j⇩2 t⇩2)
with 2(4) show ?case
by (auto 0 2 simp: buildtab_step_def buildtab_invariant_def is_next_def split: if_split_asm)
qed (use ini in simp)
with invar1 invar2 † have "t' ≤ 2 * ∥p∥ - 2 * i + j + t" by simp
with † show ?thesis by (simp add: T_buildtabW_def)
qed
lemma T_buildtabW_linear:
assumes "2 ≤ ∥p∥"
shows "the (T_buildtabW p (array 0 ∥p∥) 1 0 0) ≤ 2*(∥p∥ - 1)"
using assms T_buildtabW_correct [OF buildtab_invariant_init, of p 0] by linarith
subsubsection ‹The actual string search algorithm›
definition searchW :: "nat ⇒ nat ⇒ nat array ⇒ 'a array ⇒ 'a array ⇒ nat ⇒ nat ⇒ (nat * nat) option" where
"searchW m n nxt p a i j = while_option (λ(i, j). j < m ∧ i < n) (λ(i,j). KMP_step p nxt a i j) (i,j)"
lemma searchW_halts:
assumes "KMP_invariant p a i j"
shows "∃y. searchW ∥p∥ ∥a∥ (table p) p a i j = Some y"
unfolding searchW_def
proof ((intro measure_while_option_Some[of "λ(i,j). KMP_invariant p a i j" _ _
"λ(i, j). 2 * ∥a∥ - 2 * i + j"], rule conjI; clarify), goal_cases)
case (2 i j)
moreover obtain i' j' where †: "(i', j') = KMP_step p (table p) a i j" by (metis surj_pair)
moreover have "KMP_invariant p a i' j'" using † KMP_invariant[OF 2(1) 2(2) 2(3)] by auto
ultimately have "2 * ∥a∥ - 2 * i' + j' < 2 * ∥a∥ - 2 * i + j"
using is_next_table[of j p]
by (auto simp: KMP_invariant_def KMP_step_def matches_def is_next_def split: if_split_asm)
then show ?case using † by (auto split: prod.splits)
qed (use KMP_invariant assms in fastforce)+
lemma KMP_correctW:
assumes ini: "KMP_invariant p a i j"
defines [simp]: "nxt ≡ table p"
shows "let (i',j') = the (searchW ∥p∥ ∥a∥ nxt p a i j) in first_occur p a (if j' = ∥p∥ then i' - ∥p∥ else i')"
proof -
obtain i' j' where †: "while_option (λ(i, j). j < ∥p∥ ∧ i < ∥a∥) (λ(i,j). KMP_step p nxt a i j) (i,j) = Some (i', j')"
using searchW_halts[OF ini] by (auto simp: searchW_def)
have "KMP_invariant p a i' j'"
using while_option_rule[OF _ †, of "λ(i', j'). KMP_invariant p a i' j'"] ini KMP_invariant by fastforce
with † while_option_stop[OF †] show ?thesis
by (auto simp: searchW_def KMP_invariant_def first_occur_def matches_def)
qed
definition KMP_searchW :: "'a array ⇒ 'a array ⇒ nat × nat" where
"KMP_searchW p a = the (searchW ∥p∥ ∥a∥ (table p) p a 0 0)"
declare KMP_searchW_def[code]
lemma KMP_searchW:
"(i,j) = KMP_searchW p a ⟹ first_occur p a (if j = ∥p∥ then i - ∥p∥ else i)"
unfolding KMP_searchW_def
using KMP_correctW[OF KMP_invariant_init[of p a]] by auto
end