Theory Longest_Common_Subsequence

theory Longest_Common_Subsequence
imports Sublist IArray Code_Target_Numeral Product_Lexorder RBT_Mapping State_Main
subsection ‹Longest Common Subsequence›

theory Longest_Common_Subsequence
  imports
    "HOL-Library.Sublist"
    "HOL-Library.IArray"
    "HOL-Library.Code_Target_Numeral"
    "HOL-Library.Product_Lexorder"
    "HOL-Library.RBT_Mapping"
    "../state_monad/State_Main"
begin

subsubsection ‹Misc›

(* TODO: Move *)
lemma finite_subseq:
  "finite {xs. subseq xs ys}" (is "finite ?S")
proof -
  have "?S ⊆ {xs. set xs ⊆ set ys ∧ length xs ≤ length ys}"
    by (auto elim: list_emb_set intro: list_emb_length)
  moreover have "finite …"
    by (intro finite_lists_length_le finite_set)
  ultimately show ?thesis
    by (rule finite_subset)
qed

lemma subseq_singleton_right:
  "subseq xs [x] = (xs = [x] ∨ xs = [])"
  by (cases xs; simp add: subseq_append_le_same_iff[of _ "[]", simplified])

lemma subseq_append_single_right:
  "subseq xs (ys @ [x]) = ((∃ xs'. subseq xs' ys ∧ xs = xs' @ [x]) ∨ subseq xs ys)"
  by (auto simp: subseq_append_iff subseq_singleton_right)

(* TODO: Move, generalize *)
lemma Max_nat_plus:
  "Max (((+) n) ` S) = (n :: nat) + Max S" if "finite S" "S ≠ {}"
  using that by (auto intro!: Max_ge Max_in Max_eqI)


subsubsection ‹Definitions›

context
  fixes A B :: "'a list"
begin

fun lcs :: "nat ⇒ nat ⇒ nat" where
  "lcs 0 _ = 0" |
  "lcs _ 0 = 0" |
  "lcs (Suc i) (Suc j) = (if A!i = B!j then 1 + lcs i j else max (lcs i (j + 1)) (lcs (i + 1) j))"

definition "OPT i j = Max {length xs | xs. subseq xs (take i A) ∧ subseq xs (take j B)}"

lemma finite_OPT:
  "finite {xs. subseq xs (take i A) ∧ subseq xs (take j B)}" (is "finite ?S")
proof -
  have "?S ⊆ {xs. subseq xs (take i A)}"
    by auto
  moreover have "finite …"
    by (rule finite_subseq)
  ultimately show ?thesis
    by (rule finite_subset)
qed

subsubsection ‹Correctness Proof›
lemma non_empty_OPT:
  "{xs. subseq xs (take i A) ∧ subseq xs (take j B)} ≠ {}"
  by auto

lemma OPT_0_left:
  "OPT 0 j = 0"
  unfolding OPT_def by (simp add: subseq_append_le_same_iff[of _ "[]", simplified])

lemma OPT_0_right:
  "OPT i 0 = 0"
  unfolding OPT_def by (simp add: subseq_append_le_same_iff[of _ "[]", simplified])

lemma OPT_rec1:
  "OPT (i + 1) (j + 1) = 1 + OPT i j" (is "?l = ?r")
  if "A!i = B!j" "i < length A" "j < length B"
proof -
  let ?S = "{length xs |xs. subseq xs (take (i + 1) A) ∧ subseq xs (take (j + 1) B)}"
  let ?R = "{length xs + 1 |xs. subseq xs (take i A) ∧ subseq xs (take j B)}"
  have "?S = {length xs | xs. subseq xs (take i A) ∧ subseq xs (take j B)}
    ∪ {length xs | xs. ∃ ys. subseq ys (take i A) ∧ subseq ys (take j B) ∧ xs = ys @ [B!i]}
    "
    using that
    apply (simp add: take_Suc_conv_app_nth)
    apply (simp add: subseq_append_single_right)
    apply auto
    apply (metis length_append_singleton list_emb_prefix subseq_append)+
    done
  moreover have "… = {length xs | xs. subseq xs (take i A) ∧ subseq xs (take j B)}
    ∪ {length xs + 1 | xs. subseq xs (take i A) ∧ subseq xs (take j B)}"
    by force
  moreover have "Max … = Max ?R"
    using finite_OPT by - (rule Max_eq_if, auto)
  ultimately show "?l = ?r"
    unfolding OPT_def
    using finite_OPT non_empty_OPT
    by (subst Max_nat_plus[symmetric]) (auto simp: image_def intro: arg_cong[where f = Max])
qed

lemma OPT_rec2:
  "OPT (i + 1) (j + 1) = max (OPT i (j + 1)) (OPT (i + 1) j)" (is "?l = ?r")
  if "A!i ≠ B!j" "i < length A" "j < length B"
proof -
  have "{length xs |xs. subseq xs (take (i + 1) A) ∧ subseq xs (take (j + 1) B)}
    = {length xs |xs. subseq xs (take i A) ∧ subseq xs (take (j + 1) B)}
    ∪ {length xs |xs. subseq xs (take (i + 1) A) ∧ subseq xs (take j B)}"
    using that by (auto simp: subseq_append_single_right take_Suc_conv_app_nth)
  with finite_OPT non_empty_OPT show "?l = ?r"
    unfolding OPT_def by (simp) (rule Max_Un, auto)
qed

lemma lcs_correct':
  "OPT i j = lcs i j" if "i ≤ length A" "j ≤ length B"
  using that OPT_rec1 OPT_rec2 by (induction i j rule: lcs.induct; simp add: OPT_0_left OPT_0_right)

theorem lcs_correct:
  "Max {length xs | xs. subseq xs A ∧ subseq xs B} = lcs (length A) (length B)"
  by (simp add: OPT_def lcs_correct'[symmetric])

end (* Fixed Lists *)


subsubsection ‹Functional Memoization›

context
  fixes A B :: "'a iarray"
begin

fun lcs_ia :: "nat ⇒ nat ⇒ nat" where
  "lcs_ia 0 _ = 0" |
  "lcs_ia _ 0 = 0" |
  "lcs_ia (Suc i) (Suc j) =
    (if A!!i = B!!j then 1 + lcs_ia i j else max (lcs_ia i (j + 1)) (lcs_ia (i + 1) j))"

lemma lcs_lcs_ia:
  "lcs xs ys i j = lcs_ia i j" if "A = IArray xs" "B = IArray ys"
  by (induction i j rule: lcs_ia.induct; simp; simp add: that)

memoize_fun lcsm: lcs_ia with_memory dp_consistency_mapping monadifies (state) lcs_ia.simps

memoize_correct
  by memoize_prover

lemmas [code] = lcsm.memoized_correct

end

subsubsection ‹Test Case›

definition lcsa where
  "lcsa xs ys = (let A = IArray xs; B = IArray ys in lcs_ia A B (length xs) (length ys))"

lemma lcsa_correct:
  "lcs xs ys (length xs) (length ys) = lcsa xs ys"
  unfolding lcsa_def by (simp add: lcs_lcs_ia)

value "lcsa ''ABCDGH'' ''AEDFHR''"

value "lcsa ''AGGTAB'' ''GXTXAYB''"

end (* Theory *)