imports IArray Product_Lexorder RBT_Mapping State_Main Example_Misc Tracing Ground_Function

subsection "Minimum Edit Distance" theory Min_Ed_Dist0 imports "HOL-Library.IArray" "HOL-Library.Code_Target_Numeral" "HOL-Library.Product_Lexorder" "HOL-Library.RBT_Mapping" "../state_monad/State_Main" "../heap_monad/Heap_Main" Example_Misc "../util/Tracing" "../util/Ground_Function" begin subsubsection "Misc" text "Executable argmin" fun argmin :: "('a ⇒ 'b::order) ⇒ 'a list ⇒ 'a" where "argmin f [a] = a" | "argmin f (a#as) = (let m = argmin f as in if f a ≤ f m then a else m)" (* end rm *) (* Ex: Optimization of argmin *) fun argmin2 :: "('a ⇒ 'b::order) ⇒ 'a list ⇒ 'a * 'b" where "argmin2 f [a] = (a, f a)" | "argmin2 f (a#as) = (let fa = f a; (am,m) = argmin2 f as in if fa ≤ m then (a, fa) else (am,m))" subsubsection "Edit Distance" datatype 'a ed = Copy | Repl 'a | Ins 'a | Del fun edit :: "'a ed list ⇒ 'a list ⇒ 'a list" where "edit (Copy # es) (x # xs) = x # edit es xs" | "edit (Repl a # es) (x # xs) = a # edit es xs" | "edit (Ins a # es) xs = a # edit es xs" | "edit (Del # es) (x # xs) = edit es xs" | "edit (Copy # es) [] = edit es []" | "edit (Repl a # es) [] = edit es []" | "edit (Del # es) [] = edit es []" | "edit [] xs = xs" abbreviation cost where "cost es ≡ length [e <- es. e ≠ Copy]" subsubsection "Minimum Edit Sequence" fun min_eds :: "'a list ⇒ 'a list ⇒ 'a ed list" where "min_eds [] [] = []" | "min_eds [] (y#ys) = Ins y # min_eds [] ys" | "min_eds (x#xs) [] = Del # min_eds xs []" | "min_eds (x#xs) (y#ys) = argmin cost [Ins y # min_eds (x#xs) ys, Del # min_eds xs (y#ys), (if x=y then Copy else Repl y) # min_eds xs ys]" lemma "min_eds ''vintner'' ''writers'' = [Ins CHR ''w'', Repl CHR ''r'', Copy, Del, Copy, Del, Copy, Copy, Ins CHR ''s'']" by eval (* value "min_eds ''madagascar'' ''bananas''" value "min_eds ''madagascaram'' ''banananas''" *) lemma min_eds_correct: "edit (min_eds xs ys) xs = ys" by (induction xs ys rule: min_eds.induct) auto lemma min_eds_same: "min_eds xs xs = replicate (length xs) Copy" by (induction xs) auto lemma min_eds_eq_Nil_iff: "min_eds xs ys = [] ⟷ xs = [] ∧ ys = []" by (induction xs ys rule: min_eds.induct) auto lemma min_eds_Nil: "min_eds [] ys = map Ins ys" by (induction ys) auto lemma min_eds_Nil2: "min_eds xs [] = replicate (length xs) Del" by (induction xs) auto lemma if_edit_Nil2: "edit es ([]::'a list) = ys ⟹ length ys ≤ cost es" apply(induction es "[]::'a list" arbitrary: ys rule: edit.induct) apply auto done lemma if_edit_eq_Nil: "edit es xs = [] ⟹ length xs ≤ cost es" by (induction es xs rule: edit.induct) auto lemma min_eds_minimal: "edit es xs = ys ⟹ cost(min_eds xs ys) ≤ cost es" proof(induction xs ys arbitrary: es rule: min_eds.induct) case 1 thus ?case by simp next case 2 thus ?case by (auto simp add: min_eds_Nil dest: if_edit_Nil2) next case 3 thus ?case by(auto simp add: min_eds_Nil2 dest: if_edit_eq_Nil) next case 4 show ?case proof (cases "es") case Nil then show ?thesis using "4.prems" by (auto simp: min_eds_same) next case [simp]: (Cons e es') show ?thesis proof (cases e) case Copy thus ?thesis using "4.prems" "4.IH"(3)[of es'] by simp next case (Repl a) thus ?thesis using "4.prems" "4.IH"(3)[of es'] using [[simp_depth_limit=1]] by simp next case (Ins a) thus ?thesis using "4.prems" "4.IH"(1)[of es'] using [[simp_depth_limit=1]] by auto next case Del thus ?thesis using "4.prems" "4.IH"(2)[of es'] using [[simp_depth_limit=1]] by auto qed qed qed subsubsection "Computing the Minimum Edit Distance" fun min_ed :: "'a list ⇒ 'a list ⇒ nat" where "min_ed [] [] = 0" | "min_ed [] (y#ys) = 1 + min_ed [] ys" | "min_ed (x#xs) [] = 1 + min_ed xs []" | "min_ed (x#xs) (y#ys) = Min {1 + min_ed (x#xs) ys, 1 + min_ed xs (y#ys), (if x=y then 0 else 1) + min_ed xs ys}" lemma min_ed_min_eds: "min_ed xs ys = cost(min_eds xs ys)" apply(induction xs ys rule: min_ed.induct) apply (auto split!: if_splits) done lemma "min_ed ''madagascar'' ''bananas'' = 6" by eval (* value "min_ed ''madagascaram'' ''banananas''" *) text "Exercise: Optimization of the Copy case" fun min_eds2 :: "'a list ⇒ 'a list ⇒ 'a ed list" where "min_eds2 [] [] = []" | "min_eds2 [] (y#ys) = Ins y # min_eds2 [] ys" | "min_eds2 (x#xs) [] = Del # min_eds2 xs []" | "min_eds2 (x#xs) (y#ys) = (if x=y then Copy # min_eds2 xs ys else argmin cost [Ins y # min_eds2 (x#xs) ys, Del # min_eds2 xs (y#ys), Repl y # min_eds2 xs ys])" value "min_eds2 ''madagascar'' ''bananas''" lemma cost_Copy_Del: "cost(min_eds xs ys) ≤ cost (min_eds xs (x#ys)) + 1" apply(induction xs ys rule: min_eds.induct) apply(auto simp del: filter_True filter_False split!: if_splits) done lemma cost_Copy_Ins: "cost(min_eds xs ys) ≤ cost (min_eds (x#xs) ys) + 1" apply(induction xs ys rule: min_eds.induct) apply(auto simp del: filter_True filter_False split!: if_splits) done lemma "cost(min_eds2 xs ys) = cost(min_eds xs ys)" proof(induction xs ys rule: min_eds2.induct) case (4 x xs y ys) thus ?case apply (auto split!: if_split) apply (metis (mono_tags, lifting) Suc_eq_plus1 Suc_leI cost_Copy_Del cost_Copy_Ins le_imp_less_Suc le_neq_implies_less not_less) apply (metis Suc_eq_plus1 cost_Copy_Del le_antisym) by (metis Suc_eq_plus1 cost_Copy_Ins le_antisym) qed simp_all lemma "min_eds2 xs ys = min_eds xs ys" oops (* Not proveable because Copy comes last in min_eds but first in min_eds2. Can reorder, but the proof still requires the same two lemmas cost_*_* above. *) subsubsection "Indexing" text "Indexing lists" context fixes xs ys :: "'a list" fixes m n :: nat begin function (sequential) min_ed_ix' :: "nat * nat ⇒ nat" where "min_ed_ix' (i,j) = (if i ≥ m then if j ≥ n then 0 else 1 + min_ed_ix' (i,j+1) else if j ≥ n then 1 + min_ed_ix' (i+1, j) else Min {1 + min_ed_ix' (i,j+1), 1 + min_ed_ix' (i+1, j), (if xs!i = ys!j then 0 else 1) + min_ed_ix' (i+1,j+1)})" by pat_completeness auto termination by(relation "measure(λ(i,j). (m - i) + (n - j))") auto declare min_ed_ix'.simps[simp del] end lemma min_ed_ix'_min_ed: "min_ed_ix' xs ys (length xs) (length ys) (i, j) = min_ed (drop i xs) (drop j ys)" apply(induction "(i,j)" arbitrary: i j rule: min_ed_ix'.induct[of "length xs" "length ys"]) apply(subst min_ed_ix'.simps) apply(simp add: Cons_nth_drop_Suc[symmetric]) done text "Indexing functions" context fixes xs ys :: "nat ⇒ 'a" fixes m n :: nat begin function (sequential) min_ed_ix :: "nat × nat ⇒ nat" where "min_ed_ix (i, j) = (if i ≥ m then if j ≥ n then 0 else n-j else if j ≥ n then m-i else min_list [1 + min_ed_ix (i, j+1), 1 + min_ed_ix (i+1, j), (if xs i = ys j then 0 else 1) + min_ed_ix (i+1, j+1)])" by pat_completeness auto termination by(relation "measure(λ(i,j). (m - i) + (n - j))") auto subsubsection ‹Functional Memoization› memoize_fun min_ed_ix⇩_{m}: min_ed_ix with_memory dp_consistency_mapping monadifies (state) min_ed_ix.simps thm min_ed_ix⇩_{m}'.simps memoize_correct by memoize_prover print_theorems lemmas [code] = min_ed_ix⇩_{m}.memoized_correct declare min_ed_ix.simps[simp del] subsubsection ‹Imperative Memoization› context fixes mem :: "nat ref × nat ref × nat option array ref × nat option array ref" assumes mem_is_init: "mem = result_of (init_state (n + 1) m (m + 1)) Heap.empty" begin interpretation iterator "λ (x, y). x ≤ m ∧ y ≤ n ∧ x > 0" "λ (x, y). if y > 0 then (x, y - 1) else (x - 1, n)" "λ (x, y). (m - x) * (n + 1) + (n - y)" by (rule table_iterator_down) lemma [intro]: "dp_consistency_heap_array_pair' (n + 1) fst snd id m (m + 1) mem" by (standard; simp add: mem_is_init injective_def) lemma [intro]: "dp_consistency_heap_array_pair_iterator (n + 1) fst snd id m (m + 1) mem (λ (x, y). if y > 0 then (x, y - 1) else (x - 1, n)) (λ (x, y). (m - x) * (n + 1) + (n - y)) (λ (x, y). x ≤ m ∧ y ≤ n ∧ x > 0) " by (standard; simp add: mem_is_init injective_def) memoize_fun min_ed_ix⇩_{h}: min_ed_ix with_memory (default_proof) dp_consistency_heap_array_pair_iterator where size = "n + 1" and key1="fst :: nat × nat ⇒ nat" and key2="snd :: nat × nat ⇒ nat" and k1="m :: nat" and k2="m + 1 :: nat" and to_index = "id :: nat ⇒ nat" and mem = mem and cnt = "λ (x, y). x ≤ m ∧ y ≤ n ∧ x > 0" and nxt = "λ (x::nat, y). if y > 0 then (x, y - 1) else (x - 1, n)" and sizef = "λ (x, y). (m - x) * (n + 1) + (n - y)" monadifies (heap) min_ed_ix.simps memoize_correct by memoize_prover lemmas memoized_empty = min_ed_ix⇩_{h}.memoized_empty[OF min_ed_ix⇩_{h}.consistent_DP_iter_and_compute[OF min_ed_ix⇩_{h}.crel]] lemmas iter_heap_unfold = iter_heap_unfold end (* Fixed Memory *) end subsubsection ‹Test Cases› abbreviation "slice xs i j ≡ map xs [i..<j]" lemma min_ed_Nil1: "min_ed [] ys = length ys" by (induction ys) auto lemma min_ed_Nil2: "min_ed xs [] = length xs" by (induction xs) auto (* prove correctness of min_ed_ix directly ? *) lemma min_ed_ix_min_ed: "min_ed_ix xs ys m n (i,j) = min_ed (slice xs i m) (slice ys j n)" apply(induction "(i,j)" arbitrary: i j rule: min_ed_ix.induct[of m n]) apply(simp add: min_ed_ix.simps upt_conv_Cons min_ed_Nil1 min_ed_Nil2 Suc_diff_Suc) done text ‹Functional Test Cases› definition "min_ed_list xs ys = min_ed_ix (λi. xs!i) (λi. ys!i) (length xs) (length ys) (0,0)" lemma "min_ed_list ''madagascar'' ''bananas'' = 6" by eval definition "min_ed_ia xs ys = (let a = IArray xs; b = IArray ys in min_ed_ix (λi. a!!i) (λi. b!!i) (length xs) (length ys) (0,0))" lemma "min_ed_ia ''madagascar'' ''bananas'' = 6" by eval text ‹Extracting an Executable Constant for the Imperative Implementation› ground_function min_ed_ix⇩_{h}'_impl: min_ed_ix⇩_{h}'.simps termination by(relation "measure(λ(xs, ys, m, n, mem, i, j). (m - i) + (n - j))") auto lemmas [simp del] = min_ed_ix⇩_{h}'_impl.simps min_ed_ix⇩_{h}'.simps lemma min_ed_ix⇩_{h}'_impl_def: includes heap_monad_syntax fixes m n :: nat fixes mem :: "nat ref × nat ref × nat option array ref × nat option array ref" assumes mem_is_init: "mem = result_of (init_state (n + 1) m (m + 1)) Heap.empty" shows "min_ed_ix⇩_{h}'_impl xs ys m n mem = min_ed_ix⇩_{h}' xs ys m n mem" proof - have "min_ed_ix⇩_{h}'_impl xs ys m n mem (i, j) = min_ed_ix⇩_{h}' xs ys m n mem (i, j)" for i j apply (induction rule: min_ed_ix⇩_{h}'.induct[OF mem_is_init]) apply (subst min_ed_ix⇩_{h}'_impl.simps) apply (subst min_ed_ix⇩_{h}'.simps[OF mem_is_init]) apply (solve_cong simp) done then show ?thesis by auto qed definition "iter_min_ed_ix xs ys m n mem = iterator_defs.iter_heap (λ (x, y). x ≤ m ∧ y ≤ n ∧ x > 0) (λ (x, y). if y > 0 then (x, y - 1) else (x - 1, n)) (min_ed_ix⇩_{h}'_impl xs ys m n mem) " lemma iter_min_ed_ix_unfold[code]: "iter_min_ed_ix xs ys m n mem = (λ (i, j). (if i > 0 ∧ i ≤ m ∧ j ≤ n then do { min_ed_ix⇩_{h}'_impl xs ys m n mem (i, j); iter_min_ed_ix xs ys m n mem (if j > 0 then (i, j - 1) else (i - 1, n)) } else Heap_Monad.return ()))" unfolding iter_min_ed_ix_def by (rule ext) (safe, simp add: iter_heap_unfold) definition "min_ed_ix_impl xs ys m n i j = do { mem ← (init_state (n + 1) (m::nat) (m + 1) :: (nat ref × nat ref × nat option array ref × nat option array ref) Heap); iter_min_ed_ix xs ys m n mem (m, n); min_ed_ix⇩_{h}'_impl xs ys m n mem (i, j) }" lemma bf_impl_correct: "min_ed_ix xs ys m n (i, j) = result_of (min_ed_ix_impl xs ys m n i j) Heap.empty" using memoized_empty[OF HOL.refl, of xs ys m n "(i, j)" "λ _. (m, n)"] by (simp add: execute_bind_success[OF succes_init_state] min_ed_ix_impl_def min_ed_ix⇩_{h}'_impl_def iter_min_ed_ix_def ) text ‹Imperative Test Case› definition "min_ed_ia⇩_{h}xs ys = (let a = IArray xs; b = IArray ys in min_ed_ix_impl (λi. a!!i) (λi. b!!i) (length xs) (length ys) 0 0)" definition "test_case = min_ed_ia⇩_{h}''madagascar'' ''bananas''" export_code min_ed_ix in SML module_name Test code_reflect Test functions test_case text ‹One can see a trace of the calls to the memory in the output› ML ‹Test.test_case ()› end