Theory Natural_Mergesort
theory Natural_Mergesort
imports "HOL-Data_Structures.Sorting"
begin
subsection ‹Auxiliary Results›
lemma C_merge_adj':
"C_merge_adj xss ≤ length (concat xss)"
proof (induct xss rule: C_merge_adj.induct)
case (3 xs ys zss)
then show ?case using C_merge_ub [of xs ys] by simp
qed simp_all
lemma length_concat_merge_adj:
"length (concat (merge_adj xss)) = length (concat xss)"
by (induct xss rule: merge_adj.induct) (simp_all add: length_merge)
lemma C_merge_all':
"C_merge_all xss ≤ length (concat xss) * ⌈log 2 (length xss)⌉"
proof (induction xss rule: C_merge_all.induct)
case (3 xs ys zss)
let ?xss = "xs # ys # zss"
let ?m = "length (concat ?xss)"
have *: "⌈log 2 (real n + 2)⌉ = ⌈log 2 (Suc n div 2 + 1)⌉ + 1" for n :: nat
using ceiling_log2_div2 [of "n + 2"] by (simp add: algebra_simps)
have "C_merge_all ?xss = C_merge_adj ?xss + C_merge_all (merge_adj ?xss)" by simp
also have "… ≤ ?m + C_merge_all (merge_adj ?xss)"
using C_merge_adj' [of ?xss] by auto
also have "… ≤ ?m + length (concat (merge_adj ?xss)) * ⌈log 2 (length (merge_adj ?xss))⌉"
using "3.IH" by simp
also have "… = ?m + ?m * ⌈log 2 (length (merge_adj ?xss))⌉"
by (simp only: length_concat_merge_adj)
also have "… ≤ ?m * ⌈log 2 (length ?xss)⌉"
by (auto simp: * algebra_simps)
finally show ?case by simp
qed simp_all
subsection ‹Definition of Natural Mergesort›
text ‹
Partition input into ascending and descending subsequences.
(The latter are reverted on the fly.)
›
fun runs :: "('a::linorder) list ⇒ 'a list list" and
asc :: "'a ⇒ ('a list ⇒ 'a list) ⇒ 'a list ⇒ 'a list list" and
desc :: "'a ⇒ 'a list ⇒ 'a list ⇒ 'a list list"
where
"runs (a # b # xs) = (if a > b then desc b [a] xs else asc b ((#) a) xs)"
| "runs [x] = [[x]]"
| "runs [] = []"
| "asc a as (b # bs) = (if ¬ a > b then asc b (as ∘ (#) a) bs else as [a] # runs (b # bs))"
| "asc a as [] = [as [a]]"
| "desc a as (b # bs) = (if a > b then desc b (a # as) bs else (a # as) # runs (b # bs))"
| "desc a as [] = [a # as]"
definition nmsort :: "('a::linorder) list ⇒ 'a list"
where
"nmsort xs = merge_all (runs xs)"
subsection ‹Functional Correctness›
definition "ascP f = (∀xs ys. f (xs @ ys) = f xs @ ys)"
lemma ascP_Cons [simp]: "ascP ((#) x)" by (simp add: ascP_def)
lemma ascP_comp_Cons [simp]: "ascP f ⟹ ascP (f ∘ (#) x)"
by (auto simp: ascP_def simp flip: append_Cons)
lemma ascP_simp [simp]:
assumes "ascP f"
shows "f [x] = f [] @ [x]"
using assms [unfolded ascP_def, THEN spec, THEN spec, of "[]" "[x]"] by simp
lemma
shows mset_runs: "∑⇩# (image_mset mset (mset (runs xs))) = mset xs"
and mset_asc: "ascP f ⟹ ∑⇩# (image_mset mset (mset (asc x f ys))) = {#x#} + mset (f []) + mset ys"
and mset_desc: "∑⇩# (image_mset mset (mset (desc x xs ys))) = {#x#} + mset xs + mset ys"
by (induct xs and x f ys and x xs ys rule: runs_asc_desc.induct) auto
lemma mset_nmsort:
"mset (nmsort xs) = mset xs"
by (auto simp: mset_merge_all nmsort_def mset_runs)
lemma
shows sorted_runs: "∀x∈set (runs xs). sorted x"
and sorted_asc: "ascP f ⟹ sorted (f []) ⟹ ∀x∈set (f []). x ≤ a ⟹ ∀x∈set (asc a f ys). sorted x"
and sorted_desc: "sorted xs ⟹ ∀x∈set xs. a ≤ x ⟹ ∀x∈set (desc a xs ys). sorted x"
by (induct xs and a f ys and a xs ys rule: runs_asc_desc.induct)
(auto simp: sorted_append not_less dest: order_trans, fastforce)
lemma sorted_nmsort:
"sorted (nmsort xs)"
by (auto intro: sorted_merge_all simp: nmsort_def sorted_runs)
subsection ‹Running Time Analysis›
fun C_runs :: "('a::linorder) list ⇒ nat" and
C_asc :: "('a::linorder) ⇒ 'a list ⇒ nat" and
C_desc :: "('a::linorder) ⇒ 'a list ⇒ nat"
where
"C_runs (a # b # xs) = 1 + (if a > b then C_desc b xs else C_asc b xs)"
| "C_runs xs = 0"
| "C_asc a (b # bs) = 1 + (if ¬ a > b then C_asc b bs else C_runs (b # bs))"
| "C_asc a [] = 0"
| "C_desc a (b # bs) = 1 + (if a > b then C_desc b bs else C_runs (b # bs))"
| "C_desc a [] = 0"
fun C_nmsort :: "('a::linorder) list ⇒ nat"
where
"C_nmsort xs = C_runs xs + C_merge_all (runs xs)"
lemma
fixes a :: "'a::linorder" and xs ys :: "'a list"
shows C_runs: "C_runs xs ≤ length xs - 1"
and C_asc: "C_asc a ys ≤ length ys"
and C_desc: "C_desc a ys ≤ length ys"
by (induct xs and a ys and a ys rule: C_runs_C_asc_C_desc.induct) auto
lemma
shows length_runs: "length (runs xs) ≤ length xs"
and length_asc: "ascP f ⟹ length (asc a f ys) ≤ 1 + length ys"
and length_desc: "length (desc a xs ys) ≤ 1 + length ys"
by (induct xs and a f ys and a xs ys rule: runs_asc_desc.induct) auto
lemma
shows length_concat_runs [simp]: "length (concat (runs xs)) = length xs"
and length_concat_asc: "ascP f ⟹ length (concat (asc a f ys)) = 1 + length (f []) + length ys"
and length_concat_desc: "length (concat (desc a xs ys)) = 1 + length xs + length ys"
by (induct xs and a f ys and a xs ys rule: runs_asc_desc.induct) auto
lemma log2_mono:
"x > 0 ⟹ x ≤ y ⟹ log 2 x ≤ log 2 y"
by auto
lemma
shows runs_ne: "xs ≠ [] ⟹ runs xs ≠ []"
and "ascP f ⟹ asc a f ys ≠ []"
and "desc a xs ys ≠ []"
by (induct xs and a f ys and a xs ys rule: runs_asc_desc.induct) simp_all
lemma C_nmsort:
assumes [simp]: "length xs = n"
shows "C_nmsort xs ≤ n + n * ⌈log 2 n⌉"
proof -
have [simp]: "xs = [] ⟷ length xs = 0" by blast
have "int (C_merge_all (runs xs)) ≤ int n * ⌈log 2 (length (runs xs))⌉"
using C_merge_all' [of "runs xs"] by simp
also have "… ≤ int n * ⌈log 2 n⌉"
using length_runs [of xs]
by (cases n) (auto intro!: runs_ne mult_mono ceiling_mono log2_mono)
finally have "int (C_merge_all (runs xs)) ≤ int n * ⌈log 2 n⌉" .
moreover have "C_runs xs ≤ n" using C_runs [of xs] by auto
ultimately show ?thesis by (auto intro: add_mono)
qed
end