Theory Natural_Mergesort

(* Author: Christian Sternagel *)

theory Natural_Mergesort
  imports "HOL-Data_Structures.Sorting"
begin

subsection ‹Auxiliary Results›

lemma C_merge_adj':
  "C_merge_adj xss  length (concat xss)"
proof (induct xss rule: C_merge_adj.induct)
  case (3 xs ys zss)
  then show ?case using C_merge_ub [of xs ys] by simp
qed simp_all

lemma length_concat_merge_adj:
  "length (concat (merge_adj xss)) = length (concat xss)"
  by (induct xss rule: merge_adj.induct) (simp_all add: length_merge)

lemma C_merge_all':
  "C_merge_all xss  length (concat xss) * log 2 (length xss)"
proof (induction xss rule: C_merge_all.induct)
  case (3 xs ys zss)
  let ?xss = "xs # ys # zss"
  let ?m = "length (concat ?xss)"

  have *: "log 2 (real n + 2) = log 2 (Suc n div 2 + 1) + 1" for n :: nat
    using ceiling_log2_div2 [of "n + 2"] by (simp add: algebra_simps)

  have "C_merge_all ?xss = C_merge_adj ?xss + C_merge_all (merge_adj ?xss)" by simp
  also have "  ?m + C_merge_all (merge_adj ?xss)"
    using C_merge_adj' [of ?xss] by auto
  also have "  ?m + length (concat (merge_adj ?xss)) * log 2 (length (merge_adj ?xss))"
    using "3.IH" by simp
  also have " = ?m + ?m * log 2 (length (merge_adj ?xss))"
    by (simp only: length_concat_merge_adj)
  also have "  ?m * log 2 (length ?xss)"
    by (auto simp: * algebra_simps)
  finally show ?case by simp
qed simp_all


subsection ‹Definition of Natural Mergesort›

text ‹
  Partition input into ascending and descending subsequences.
  (The latter are reverted on the fly.)
›
fun runs :: "('a::linorder) list  'a list list" and
    asc :: "'a  ('a list  'a list)  'a list  'a list list" and
    desc :: "'a  'a list  'a list  'a list list"
  where
    "runs (a # b # xs) = (if a > b then desc b [a] xs else asc b ((#) a) xs)"
  | "runs [x] = [[x]]"
  | "runs [] = []"
  | "asc a as (b # bs) = (if ¬ a > b then asc b (as  (#) a) bs else as [a] # runs (b # bs))"
  | "asc a as [] = [as [a]]"
  | "desc a as (b # bs) = (if a > b then desc b (a # as) bs else (a # as) # runs (b # bs))"
  | "desc a as [] = [a # as]"

definition nmsort :: "('a::linorder) list  'a list"
  where
    "nmsort xs = merge_all (runs xs)"


subsection ‹Functional Correctness›

definition "ascP f = (xs ys. f (xs @ ys) = f xs @ ys)"

lemma ascP_Cons [simp]: "ascP ((#) x)" by (simp add: ascP_def)

lemma ascP_comp_Cons [simp]: "ascP f  ascP (f  (#) x)"
  by (auto simp: ascP_def simp flip: append_Cons)

lemma ascP_simp [simp]:
  assumes "ascP f"
  shows "f [x] = f [] @ [x]"
  using assms [unfolded ascP_def, THEN spec, THEN spec, of "[]" "[x]"] by simp

lemma
  shows mset_runs: "# (image_mset mset (mset (runs xs))) = mset xs"
    and mset_asc: "ascP f  # (image_mset mset (mset (asc x f ys))) = {#x#} + mset (f []) + mset ys"
    and mset_desc: "# (image_mset mset (mset (desc x xs ys))) = {#x#} + mset xs + mset ys"
  by (induct xs and x f ys and x xs ys rule: runs_asc_desc.induct) auto

lemma mset_nmsort:
  "mset (nmsort xs) = mset xs"
  by (auto simp: mset_merge_all nmsort_def mset_runs)

lemma
  shows sorted_runs: "xset (runs xs). sorted x"
    and sorted_asc: "ascP f  sorted (f [])  xset (f []). x  a  xset (asc a f ys). sorted x"
    and sorted_desc: "sorted xs  xset xs. a  x  xset (desc a xs ys). sorted x"
  by (induct xs and a f ys and a xs ys rule: runs_asc_desc.induct)
    (auto simp: sorted_append not_less dest: order_trans, fastforce)

lemma sorted_nmsort:
  "sorted (nmsort xs)"
  by (auto intro: sorted_merge_all simp: nmsort_def sorted_runs)


subsection ‹Running Time Analysis›

fun C_runs :: "('a::linorder) list  nat" and
    C_asc :: "('a::linorder)  'a list  nat" and
    C_desc :: "('a::linorder)  'a list  nat"
  where
    "C_runs (a # b # xs) = 1 + (if a > b then C_desc b xs else C_asc b xs)"
  | "C_runs xs = 0"
  | "C_asc a (b # bs) = 1 + (if ¬ a > b then C_asc b bs else C_runs (b # bs))"
  | "C_asc a [] = 0"
  | "C_desc a (b # bs) = 1 + (if a > b then C_desc b bs else C_runs (b # bs))"
  | "C_desc a [] = 0"

fun C_nmsort :: "('a::linorder) list  nat"
  where
    "C_nmsort xs = C_runs xs + C_merge_all (runs xs)"

lemma
  fixes a :: "'a::linorder" and xs ys :: "'a list"
  shows C_runs: "C_runs xs  length xs - 1"
    and C_asc: "C_asc a ys  length ys"
    and C_desc: "C_desc a ys  length ys"
  by (induct xs and a ys and a ys rule: C_runs_C_asc_C_desc.induct) auto

lemma
  shows length_runs: "length (runs xs)  length xs"
    and length_asc: "ascP f  length (asc a f ys)  1 + length ys"
    and length_desc: "length (desc a xs ys)  1 + length ys"
  by (induct xs and a f ys and a xs ys rule: runs_asc_desc.induct) auto

lemma
  shows length_concat_runs [simp]: "length (concat (runs xs)) = length xs"
    and length_concat_asc: "ascP f  length (concat (asc a f ys)) = 1 + length (f []) + length ys"
    and length_concat_desc: "length (concat (desc a xs ys)) = 1 + length xs + length ys"
  by (induct xs and a f ys and a xs ys rule: runs_asc_desc.induct) auto

lemma log2_mono:
  "x > 0  x  y  log 2 x  log 2 y"
  by auto

lemma
  shows runs_ne: "xs  []  runs xs  []"
    and "ascP f  asc a f ys  []"
    and "desc a xs ys  []"
  by (induct xs and a f ys and a xs ys rule: runs_asc_desc.induct) simp_all

lemma C_nmsort:
  assumes [simp]: "length xs = n"
  shows "C_nmsort xs  n + n * log 2 n"
proof -
  have [simp]: "xs = []  length xs = 0" by blast
  have "int (C_merge_all (runs xs))  int n * log 2 (length (runs xs))"
    using C_merge_all' [of "runs xs"] by simp
  also have "  int n * log 2 n"
    using length_runs [of xs]
    by (cases n) (auto intro!: runs_ne mult_mono ceiling_mono log2_mono)
  finally have "int (C_merge_all (runs xs))  int n * log 2 n" .
  moreover have "C_runs xs  n" using C_runs [of xs] by auto
  ultimately show ?thesis by (auto intro: add_mono)
qed

end