Theory Natural_Mergesort

(* Author: Christian Sternagel *)

theory Natural_Mergesort
  imports "HOL-Data_Structures.Sorting"
begin

subsection ‹Auxiliary Results›

lemma C_merge_adj':
  "C_merge_adj xss ≤ length (concat xss)"
proof (induct xss rule: C_merge_adj.induct)
  case (3 xs ys zss)
  then show ?case using C_merge_ub [of xs ys] by simp
qed simp_all

lemma length_concat_merge_adj:
  "length (concat (merge_adj xss)) = length (concat xss)"
  by (induct xss rule: merge_adj.induct) (simp_all add: length_merge)

lemma C_merge_all':
  "C_merge_all xss ≤ length (concat xss) * ⌈log 2 (length xss)⌉"
proof (induction xss rule: C_merge_all.induct)
  case (3 xs ys zss)
  let ?xss = "xs # ys # zss"
  let ?m = "length (concat ?xss)"

  have *: "⌈log 2 (real n + 2)⌉ = ⌈log 2 (Suc n div 2 + 1)⌉ + 1" for n :: nat
    using ceiling_log2_div2 [of "n + 2"] by (simp add: algebra_simps)

  have "C_merge_all ?xss = C_merge_adj ?xss + C_merge_all (merge_adj ?xss)" by simp
  also have "… ≤ ?m + C_merge_all (merge_adj ?xss)"
    using C_merge_adj' [of ?xss] by auto
  also have "… ≤ ?m + length (concat (merge_adj ?xss)) * ⌈log 2 (length (merge_adj ?xss))⌉"
    using "3.IH" by simp
  also have "… = ?m + ?m * ⌈log 2 (length (merge_adj ?xss))⌉"
    by (simp only: length_concat_merge_adj)
  also have "… ≤ ?m * ⌈log 2 (length ?xss)⌉"
    by (auto simp: * algebra_simps)
  finally show ?case by simp
qed simp_all


subsection ‹Definition of Natural Mergesort›

text ‹
  Partition input into ascending and descending subsequences.
  (The latter are reverted on the fly.)
›
fun runs :: "('a::linorder) list ⇒ 'a list list" and
    asc :: "'a ⇒ ('a list ⇒ 'a list) ⇒ 'a list ⇒ 'a list list" and
    desc :: "'a ⇒ 'a list ⇒ 'a list ⇒ 'a list list"
  where
    "runs (a # b # xs) = (if a > b then desc b [a] xs else asc b ((#) a) xs)"
  | "runs [x] = [[x]]"
  | "runs [] = []"
  | "asc a as (b # bs) = (if ¬ a > b then asc b (as ∘ (#) a) bs else as [a] # runs (b # bs))"
  | "asc a as [] = [as [a]]"
  | "desc a as (b # bs) = (if a > b then desc b (a # as) bs else (a # as) # runs (b # bs))"
  | "desc a as [] = [a # as]"

definition nmsort :: "('a::linorder) list ⇒ 'a list"
  where
    "nmsort xs = merge_all (runs xs)"


subsection ‹Functional Correctness›

definition "ascP f = (∀xs ys. f (xs @ ys) = f xs @ ys)"

lemma ascP_Cons [simp]: "ascP ((#) x)" by (simp add: ascP_def)

lemma ascP_comp_Cons [simp]: "ascP f ⟹ ascP (f ∘ (#) x)"
  by (auto simp: ascP_def simp flip: append_Cons)

lemma ascP_simp [simp]:
  assumes "ascP f"
  shows "f [x] = f [] @ [x]"
  using assms [unfolded ascP_def, THEN spec, THEN spec, of "[]" "[x]"] by simp

lemma
  shows mset_runs: "∑⇩_# (image_mset mset (mset (runs xs))) = mset xs"
    and mset_asc: "ascP f ⟹ ∑⇩_# (image_mset mset (mset (asc x f ys))) = {#x#} + mset (f []) + mset ys"
    and mset_desc: "∑⇩_# (image_mset mset (mset (desc x xs ys))) = {#x#} + mset xs + mset ys"
  by (induct xs and x f ys and x xs ys rule: runs_asc_desc.induct) auto

lemma mset_nmsort:
  "mset (nmsort xs) = mset xs"
  by (auto simp: mset_merge_all nmsort_def mset_runs)

lemma
  shows sorted_runs: "∀x∈set (runs xs). sorted x"
    and sorted_asc: "ascP f ⟹ sorted (f []) ⟹ ∀x∈set (f []). x ≤ a ⟹ ∀x∈set (asc a f ys). sorted x"
    and sorted_desc: "sorted xs ⟹ ∀x∈set xs. a ≤ x ⟹ ∀x∈set (desc a xs ys). sorted x"
  by (induct xs and a f ys and a xs ys rule: runs_asc_desc.induct)
    (auto simp: sorted_append not_less dest: order_trans, fastforce)

lemma sorted_nmsort:
  "sorted (nmsort xs)"
  by (auto intro: sorted_merge_all simp: nmsort_def sorted_runs)


subsection ‹Running Time Analysis›

fun C_runs :: "('a::linorder) list ⇒ nat" and
    C_asc :: "('a::linorder) ⇒ 'a list ⇒ nat" and
    C_desc :: "('a::linorder) ⇒ 'a list ⇒ nat"
  where
    "C_runs (a # b # xs) = 1 + (if a > b then C_desc b xs else C_asc b xs)"
  | "C_runs xs = 0"
  | "C_asc a (b # bs) = 1 + (if ¬ a > b then C_asc b bs else C_runs (b # bs))"
  | "C_asc a [] = 0"
  | "C_desc a (b # bs) = 1 + (if a > b then C_desc b bs else C_runs (b # bs))"
  | "C_desc a [] = 0"

fun C_nmsort :: "('a::linorder) list ⇒ nat"
  where
    "C_nmsort xs = C_runs xs + C_merge_all (runs xs)"

lemma
  fixes a :: "'a::linorder" and xs ys :: "'a list"
  shows C_runs: "C_runs xs ≤ length xs - 1"
    and C_asc: "C_asc a ys ≤ length ys"
    and C_desc: "C_desc a ys ≤ length ys"
  by (induct xs and a ys and a ys rule: C_runs_C_asc_C_desc.induct) auto

lemma
  shows length_runs: "length (runs xs) ≤ length xs"
    and length_asc: "ascP f ⟹ length (asc a f ys) ≤ 1 + length ys"
    and length_desc: "length (desc a xs ys) ≤ 1 + length ys"
  by (induct xs and a f ys and a xs ys rule: runs_asc_desc.induct) auto

lemma
  shows length_concat_runs [simp]: "length (concat (runs xs)) = length xs"
    and length_concat_asc: "ascP f ⟹ length (concat (asc a f ys)) = 1 + length (f []) + length ys"
    and length_concat_desc: "length (concat (desc a xs ys)) = 1 + length xs + length ys"
  by (induct xs and a f ys and a xs ys rule: runs_asc_desc.induct) auto

lemma log2_mono:
  "x > 0 ⟹ x ≤ y ⟹ log 2 x ≤ log 2 y"
  by auto

lemma
  shows runs_ne: "xs ≠ [] ⟹ runs xs ≠ []"
    and "ascP f ⟹ asc a f ys ≠ []"
    and "desc a xs ys ≠ []"
  by (induct xs and a f ys and a xs ys rule: runs_asc_desc.induct) simp_all

lemma C_nmsort:
  assumes [simp]: "length xs = n"
  shows "C_nmsort xs ≤ n + n * ⌈log 2 n⌉"
proof -
  have [simp]: "xs = [] ⟷ length xs = 0" by blast
  have "int (C_merge_all (runs xs)) ≤ int n * ⌈log 2 (length (runs xs))⌉"
    using C_merge_all' [of "runs xs"] by simp
  also have "… ≤ int n * ⌈log 2 n⌉"
    using length_runs [of xs]
    by (cases n) (auto intro!: runs_ne mult_mono ceiling_mono log2_mono)
  finally have "int (C_merge_all (runs xs)) ≤ int n * ⌈log 2 n⌉" .
  moreover have "C_runs xs ≤ n" using C_runs [of xs] by auto
  ultimately show ?thesis by (auto intro: add_mono)
qed

end