# Theory Min_Ed_Dist0

theory Min_Ed_Dist0
imports IArray Product_Lexorder RBT_Mapping State_Main Example_Misc Tracing Ground_Function
```subsection "Minimum Edit Distance"

theory Min_Ed_Dist0
imports
"HOL-Library.IArray"
"HOL-Library.Code_Target_Numeral"
"HOL-Library.Product_Lexorder"
"HOL-Library.RBT_Mapping"
Example_Misc
"../util/Tracing"
"../util/Ground_Function"
begin

subsubsection "Misc"

text "Executable argmin"

fun argmin :: "('a ⇒ 'b::order) ⇒ 'a list ⇒ 'a" where
"argmin f [a] = a" |
"argmin f (a#as) = (let m = argmin f as in if f a ≤ f m then a else m)"
(* end rm *)

(* Ex: Optimization of argmin *)
fun argmin2 :: "('a ⇒ 'b::order) ⇒ 'a list ⇒ 'a * 'b" where
"argmin2 f [a] = (a, f a)" |
"argmin2 f (a#as) = (let fa = f a; (am,m) = argmin2 f as in if fa ≤ m then (a, fa) else (am,m))"

subsubsection "Edit Distance"

datatype 'a ed = Copy | Repl 'a | Ins 'a | Del

fun edit :: "'a ed list ⇒ 'a list ⇒ 'a list" where
"edit (Copy # es) (x # xs) = x # edit es xs" |
"edit (Repl a # es) (x # xs) = a # edit es xs" |
"edit (Ins a # es) xs = a # edit es xs" |
"edit (Del # es) (x # xs) = edit es xs" |
"edit (Copy # es) [] = edit es []" |
"edit (Repl a # es) [] = edit es []" |
"edit (Del # es) [] = edit es []" |
"edit [] xs = xs"

abbreviation cost where
"cost es ≡ length [e <- es. e ≠ Copy]"

subsubsection "Minimum Edit Sequence"

fun min_eds :: "'a list ⇒ 'a list ⇒ 'a ed list" where
"min_eds [] [] = []" |
"min_eds [] (y#ys) = Ins y # min_eds [] ys" |
"min_eds (x#xs) [] = Del # min_eds xs []" |
"min_eds (x#xs) (y#ys) =
argmin cost [Ins y # min_eds (x#xs) ys, Del # min_eds xs (y#ys),
(if x=y then Copy else Repl y) # min_eds xs ys]"

lemma "min_eds ''vintner'' ''writers'' =
[Ins CHR ''w'', Repl CHR ''r'', Copy, Del, Copy, Del, Copy, Copy, Ins CHR ''s'']"
by eval
(*

*)
lemma min_eds_correct: "edit (min_eds xs ys) xs = ys"
by (induction xs ys rule: min_eds.induct) auto

lemma min_eds_same: "min_eds xs xs = replicate (length xs) Copy"
by (induction xs) auto

lemma min_eds_eq_Nil_iff: "min_eds xs ys = [] ⟷ xs = [] ∧ ys = []"
by (induction xs ys rule: min_eds.induct) auto

lemma min_eds_Nil: "min_eds [] ys = map Ins ys"
by (induction ys) auto

lemma min_eds_Nil2: "min_eds xs [] = replicate (length xs) Del"
by (induction xs) auto

lemma if_edit_Nil2: "edit es ([]::'a list) = ys ⟹ length ys ≤ cost es"
apply(induction es "[]::'a list" arbitrary: ys rule: edit.induct)
apply auto
done

lemma if_edit_eq_Nil: "edit es xs = [] ⟹ length xs ≤ cost es"
by (induction es xs rule: edit.induct) auto

lemma min_eds_minimal: "edit es xs = ys ⟹ cost(min_eds xs ys) ≤ cost es"
proof(induction xs ys arbitrary: es rule: min_eds.induct)
case 1 thus ?case by simp
next
case 2 thus ?case by (auto simp add: min_eds_Nil dest: if_edit_Nil2)
next
case 3
thus ?case by(auto simp add: min_eds_Nil2 dest: if_edit_eq_Nil)
next
case 4
show ?case
proof (cases "es")
case Nil then show ?thesis using "4.prems" by (auto simp: min_eds_same)
next
case [simp]: (Cons e es')
show ?thesis
proof (cases e)
case Copy
thus ?thesis using "4.prems" "4.IH"(3)[of es'] by simp
next
case (Repl a)
thus ?thesis using "4.prems" "4.IH"(3)[of es']
using [[simp_depth_limit=1]] by simp
next
case (Ins a)
thus ?thesis using "4.prems" "4.IH"(1)[of es']
using [[simp_depth_limit=1]] by auto
next
case Del
thus ?thesis using "4.prems" "4.IH"(2)[of es']
using [[simp_depth_limit=1]] by auto
qed
qed
qed

subsubsection "Computing the Minimum Edit Distance"

fun min_ed :: "'a list ⇒ 'a list ⇒ nat" where
"min_ed [] [] = 0" |
"min_ed [] (y#ys) = 1 + min_ed [] ys" |
"min_ed (x#xs) [] = 1 + min_ed xs []" |
"min_ed (x#xs) (y#ys) =
Min {1 + min_ed (x#xs) ys, 1 + min_ed xs (y#ys), (if x=y then 0 else 1) + min_ed xs ys}"

lemma min_ed_min_eds: "min_ed xs ys = cost(min_eds xs ys)"
apply(induction xs ys rule: min_ed.induct)
apply (auto split!: if_splits)
done

lemma "min_ed ''madagascar'' ''bananas'' = 6"
by eval
(*
*)

text "Exercise: Optimization of the Copy case"

fun min_eds2 :: "'a list ⇒ 'a list ⇒ 'a ed list" where
"min_eds2 [] [] = []" |
"min_eds2 [] (y#ys) = Ins y # min_eds2 [] ys" |
"min_eds2 (x#xs) [] = Del # min_eds2 xs []" |
"min_eds2 (x#xs) (y#ys) =
(if x=y then Copy # min_eds2 xs ys
else argmin cost
[Ins y # min_eds2 (x#xs) ys, Del # min_eds2 xs (y#ys), Repl y # min_eds2 xs ys])"

lemma cost_Copy_Del: "cost(min_eds xs ys) ≤ cost (min_eds xs (x#ys)) + 1"
apply(induction xs ys rule: min_eds.induct)
apply(auto simp del: filter_True filter_False split!: if_splits)
done

lemma cost_Copy_Ins: "cost(min_eds xs ys) ≤ cost (min_eds (x#xs) ys) + 1"
apply(induction xs ys rule: min_eds.induct)
apply(auto simp del: filter_True filter_False split!: if_splits)
done

lemma "cost(min_eds2 xs ys) = cost(min_eds xs ys)"
proof(induction xs ys rule: min_eds2.induct)
case (4 x xs y ys) thus ?case
apply (auto split!: if_split)
apply (metis (mono_tags, lifting) Suc_eq_plus1 Suc_leI cost_Copy_Del cost_Copy_Ins le_imp_less_Suc le_neq_implies_less not_less)
apply (metis Suc_eq_plus1 cost_Copy_Del le_antisym)
by (metis Suc_eq_plus1 cost_Copy_Ins le_antisym)
qed simp_all

lemma "min_eds2 xs ys = min_eds xs ys"
oops
(* Not proveable because Copy comes last in min_eds but first in min_eds2.
Can reorder, but the proof still requires the same two lemmas cost_*_* above.
*)

subsubsection "Indexing"

text "Indexing lists"

context
fixes xs ys :: "'a list"
fixes m n :: nat
begin

function (sequential)
min_ed_ix' :: "nat * nat ⇒ nat" where
"min_ed_ix' (i,j) =
(if i ≥ m then
if j ≥ n then 0 else 1 + min_ed_ix' (i,j+1) else
if j ≥ n then 1 + min_ed_ix' (i+1, j)
else
Min {1 + min_ed_ix' (i,j+1), 1 + min_ed_ix' (i+1, j),
(if xs!i = ys!j then 0 else 1) + min_ed_ix' (i+1,j+1)})"
by pat_completeness auto
termination by(relation "measure(λ(i,j). (m - i) + (n - j))") auto

declare min_ed_ix'.simps[simp del]

end

lemma min_ed_ix'_min_ed:
"min_ed_ix' xs ys (length xs) (length ys) (i, j) = min_ed (drop i xs) (drop j ys)"
apply(induction "(i,j)" arbitrary: i j rule: min_ed_ix'.induct[of "length xs" "length ys"])
apply(subst min_ed_ix'.simps)
done

text "Indexing functions"

context
fixes xs ys :: "nat ⇒ 'a"
fixes m n :: nat
begin

function (sequential)
min_ed_ix :: "nat × nat ⇒ nat" where
"min_ed_ix (i, j) =
(if i ≥ m then
if j ≥ n then 0 else n-j else
if j ≥ n then m-i
else
min_list [1 + min_ed_ix (i, j+1), 1 + min_ed_ix (i+1, j),
(if xs i = ys j then 0 else 1) + min_ed_ix (i+1, j+1)])"
by pat_completeness auto
termination by(relation "measure(λ(i,j). (m - i) + (n - j))") auto

subsubsection ‹Functional Memoization›

memoize_fun min_ed_ix⇩m: min_ed_ix with_memory dp_consistency_mapping monadifies (state) min_ed_ix.simps
thm min_ed_ix⇩m'.simps

memoize_correct
by memoize_prover
print_theorems

lemmas [code] = min_ed_ix⇩m.memoized_correct

declare min_ed_ix.simps[simp del]

subsubsection ‹Imperative Memoization›

context
fixes mem :: "nat ref × nat ref × nat option array ref × nat option array ref"
assumes mem_is_init: "mem = result_of (init_state (n + 1) m (m + 1)) Heap.empty"
begin

interpretation iterator
"λ (x, y). x ≤ m ∧ y ≤ n ∧ x > 0"
"λ (x, y). if y > 0 then (x, y - 1) else (x - 1, n)"
"λ (x, y). (m - x) * (n + 1) + (n - y)"
by (rule table_iterator_down)

lemma [intro]:
"dp_consistency_heap_array_pair' (n + 1) fst snd id m (m + 1) mem"
by (standard; simp add: mem_is_init injective_def)

lemma [intro]:
"dp_consistency_heap_array_pair_iterator (n + 1) fst snd id m (m + 1) mem
(λ (x, y). if y > 0 then (x, y - 1) else (x - 1, n))
(λ (x, y). (m - x) * (n + 1) + (n - y))
(λ (x, y). x ≤ m ∧ y ≤ n ∧ x > 0)
"
by (standard; simp add: mem_is_init injective_def)

memoize_fun min_ed_ix⇩h: min_ed_ix
with_memory (default_proof) dp_consistency_heap_array_pair_iterator
where size = "n + 1"
and key1="fst :: nat × nat ⇒ nat" and key2="snd :: nat × nat ⇒ nat"
and k1="m :: nat" and k2="m + 1 :: nat"
and to_index = "id :: nat ⇒ nat"
and mem = mem
and cnt = "λ (x, y). x ≤ m ∧ y ≤ n ∧ x > 0"
and nxt = "λ (x::nat, y). if y > 0 then (x, y - 1) else (x - 1, n)"
and sizef = "λ (x, y). (m - x) * (n + 1) + (n - y)"

memoize_correct
by memoize_prover

lemmas memoized_empty =
min_ed_ix⇩h.memoized_empty[OF min_ed_ix⇩h.consistent_DP_iter_and_compute[OF min_ed_ix⇩h.crel]]
lemmas iter_heap_unfold = iter_heap_unfold

end (* Fixed Memory *)

end

subsubsection ‹Test Cases›

abbreviation "slice xs i j ≡ map xs [i..<j]"

lemma min_ed_Nil1: "min_ed [] ys = length ys"
by (induction ys) auto

lemma min_ed_Nil2: "min_ed xs [] = length xs"
by (induction xs) auto

(* prove correctness of min_ed_ix directly ? *)
lemma min_ed_ix_min_ed: "min_ed_ix xs ys m n (i,j) = min_ed (slice xs i m) (slice ys j n)"
apply(induction "(i,j)" arbitrary: i j rule: min_ed_ix.induct[of m n])
apply(simp add: min_ed_ix.simps upt_conv_Cons min_ed_Nil1 min_ed_Nil2 Suc_diff_Suc)
done

text ‹Functional Test Cases›

definition "min_ed_list xs ys = min_ed_ix (λi. xs!i) (λi. ys!i) (length xs) (length ys) (0,0)"

lemma "min_ed_list ''madagascar'' ''bananas'' = 6"
by eval

definition "min_ed_ia xs ys = (let a = IArray xs; b = IArray ys
in min_ed_ix (λi. a!!i) (λi. b!!i) (length xs) (length ys) (0,0))"

lemma "min_ed_ia ''madagascar'' ''bananas'' = 6"
by eval

text ‹Extracting an Executable Constant for the Imperative Implementation›

ground_function min_ed_ix⇩h'_impl: min_ed_ix⇩h'.simps
termination
by(relation "measure(λ(xs, ys, m, n, mem, i, j). (m - i) + (n - j))") auto

lemmas [simp del] = min_ed_ix⇩h'_impl.simps min_ed_ix⇩h'.simps

lemma min_ed_ix⇩h'_impl_def:
fixes m n :: nat
fixes mem :: "nat ref × nat ref × nat option array ref × nat option array ref"
assumes mem_is_init: "mem = result_of (init_state (n + 1) m (m + 1)) Heap.empty"
shows "min_ed_ix⇩h'_impl xs ys m n mem = min_ed_ix⇩h' xs ys m n mem"
proof -
have "min_ed_ix⇩h'_impl xs ys m n mem (i, j) = min_ed_ix⇩h' xs ys m n mem (i, j)" for i j
apply (induction rule: min_ed_ix⇩h'.induct[OF mem_is_init])
apply (subst min_ed_ix⇩h'_impl.simps)
apply (subst min_ed_ix⇩h'.simps[OF mem_is_init])
apply (solve_cong simp)
done
then show ?thesis
by auto
qed

definition
"iter_min_ed_ix xs ys m n mem = iterator_defs.iter_heap
(λ (x, y). x ≤ m ∧ y ≤ n ∧ x > 0)
(λ (x, y). if y > 0 then (x, y - 1) else (x - 1, n))
(min_ed_ix⇩h'_impl xs ys m n mem)
"

lemma iter_min_ed_ix_unfold[code]:
"iter_min_ed_ix xs ys m n mem = (λ (i, j).
(if i > 0 ∧ i ≤ m ∧ j ≤ n
then do {
min_ed_ix⇩h'_impl xs ys m n mem (i, j);
iter_min_ed_ix xs ys m n mem (if j > 0 then (i, j - 1) else (i - 1, n))
}
unfolding iter_min_ed_ix_def by (rule ext) (safe, simp add: iter_heap_unfold)

definition
"min_ed_ix_impl xs ys m n i j = do {
mem ← (init_state (n + 1) (m::nat) (m + 1) ::
(nat ref × nat ref × nat option array ref × nat option array ref) Heap);
iter_min_ed_ix xs ys m n mem (m, n);
min_ed_ix⇩h'_impl xs ys m n mem (i, j)
}"

lemma bf_impl_correct:
"min_ed_ix xs ys m n (i, j) = result_of (min_ed_ix_impl xs ys m n i j) Heap.empty"
using memoized_empty[OF HOL.refl, of xs ys m n "(i, j)" "λ _. (m, n)"]
execute_bind_success[OF succes_init_state] min_ed_ix_impl_def min_ed_ix⇩h'_impl_def
iter_min_ed_ix_def
)

text ‹Imperative Test Case›

definition
"min_ed_ia⇩h xs ys = (let a = IArray xs; b = IArray ys
in min_ed_ix_impl (λi. a!!i) (λi. b!!i) (length xs) (length ys) 0 0)"

definition