# Theory Frequency_Moments_Preliminary_Results

```section ‹Preliminary Results›

theory Frequency_Moments_Preliminary_Results
imports
"HOL.Transcendental"
"HOL-Computational_Algebra.Primes"
"HOL-Library.Extended_Real"
"HOL-Library.Multiset"
"HOL-Library.Sublist"
Prefix_Free_Code_Combinators.Prefix_Free_Code_Combinators
Bertrands_Postulate.Bertrand
begin

text ‹This section contains various preliminary results.›

lemma card_ordered_pairs:
fixes M :: "('a ::linorder) set"
assumes "finite M"
shows "2 * card {(x,y) ∈ M × M. x < y} = card M * (card M - 1)"
proof -
have a: "finite (M × M)" using assms by simp

have inj_swap: "inj (λx. (snd x, fst x))"
by (rule inj_onI, simp add: prod_eq_iff)

have "2 * card {(x,y) ∈ M × M. x < y} =
card {(x,y) ∈ M × M. x < y} + card ((λx. (snd x, fst x))`{(x,y) ∈ M × M. x < y})"
by (simp add: card_image[OF inj_on_subset[OF inj_swap]])
also have "... = card {(x,y) ∈ M × M. x < y} + card {(x,y) ∈ M × M. y < x}"
by (auto intro: arg_cong[where f="card"] simp add:set_eq_iff image_iff)
also have "... = card ({(x,y) ∈ M × M. x < y} ∪ {(x,y) ∈ M × M. y < x})"
by (intro card_Un_disjoint[symmetric] a finite_subset[where B="M × M"] subsetI) auto
also have "... = card ((M × M) - {(x,y) ∈ M × M. x = y})"
by (auto intro: arg_cong[where f="card"] simp add:set_eq_iff)
also have "... = card (M × M) - card {(x,y) ∈ M × M. x = y}"
by (intro card_Diff_subset a finite_subset[where B="M × M"] subsetI) auto
also have "... = card M ^ 2 - card ((λx. (x,x)) ` M)"
using assms
by (intro arg_cong2[where f="(-)"] arg_cong[where f="card"])
(auto simp:power2_eq_square set_eq_iff image_iff)
also have "... = card M ^ 2 - card M"
by (intro arg_cong2[where f="(-)"] card_image inj_onI, auto)
also have "... = card M * (card M - 1)"
by (cases "card M ≥ 0", auto simp:power2_eq_square algebra_simps)
finally show ?thesis by simp
qed

lemma ereal_mono: "x ≤ y ⟹ ereal x ≤ ereal y"
by simp

lemma log_mono: "a > 1 ⟹ x ≤ y ⟹ 0 < x ⟹ log a x ≤ log a y"
by (subst log_le_cancel_iff, auto)

lemma abs_ge_iff: "((x::real) ≤ abs y) = (x ≤ y ∨ x ≤ -y)"
by linarith

lemma count_list_gr_1:
"(x ∈ set xs) = (count_list xs x ≥ 1)"
by (induction xs, simp, simp)

lemma count_list_append: "count_list (xs@ys) v = count_list xs v + count_list ys v"
by (induction xs, simp, simp)

lemma count_list_lt_suffix:
assumes "suffix a b"
assumes "x ∈ {b ! i| i. i <  length b - length a}"
shows  "count_list a x < count_list b x"
proof -
have "length a ≤ length b" using assms(1)
hence "x ∈ set (nths b {i. i < length b - length a})"
using assms diff_commute by (auto simp add:set_nths)
hence a:"x ∈ set (take (length b - length a) b)"
by (subst (asm) lessThan_def[symmetric], simp)
have "b = (take (length b - length a) b)@drop (length b - length a) b"
by simp
also have "... = (take (length b - length a) b)@a"
using assms(1) suffix_take by auto
finally have b:"b = (take (length b - length a) b)@a" by simp

have "count_list a x < 1 + count_list a x" by simp
also have "... ≤ count_list (take (length b - length a) b) x + count_list a x"
using a count_list_gr_1
also have "... = count_list b x"
using b count_list_append by metis
finally show ?thesis by simp
qed

lemma suffix_drop_drop:
assumes "x ≥ y"
shows "suffix (drop x a) (drop y a)"
proof -
have "drop y a = take (x - y) (drop y a)@drop (x- y) (drop y a)"
by (subst append_take_drop_id, simp)
also have " ... = take (x-y) (drop y a)@drop x a"
using assms by simp
finally have "drop y a = take (x-y) (drop y a)@drop x a" by simp
thus ?thesis
qed

lemma count_list_card: "count_list xs x = card {k. k < length xs ∧ xs ! k = x}"
proof -
have "count_list xs x = length (filter ((=) x) xs)"
by (induction xs, simp, simp)
also have "... = card {k. k < length xs ∧ xs ! k = x}"
by (subst length_filter_conv_card, metis)
finally show ?thesis by simp
qed

lemma card_gr_1_iff:
assumes "finite S"  "x ∈ S"  "y ∈ S"  "x ≠ y"
shows "card S > 1"
using assms card_le_Suc0_iff_eq leI by auto

lemma count_list_ge_2_iff:
assumes "y < z"
assumes "z < length xs"
assumes "xs ! y = xs ! z"
shows "count_list xs (xs ! y) > 1"
proof -
have " 1 < card {k. k < length xs ∧ xs ! k = xs ! y}"
using assms by (intro card_gr_1_iff[where x="y" and y="z"], auto)

thus ?thesis
qed

text ‹Results about multisets and sorting›

text ‹This is a induction scheme over the distinct elements of a multisets:
We can represent each multiset as a sum like:
@{text "replicate_mset n⇩1 x⇩1 + replicate_mset n⇩2 x⇩2 + ... + replicate_mset n⇩k x⇩k"} where the
@{term "x⇩i"} are distinct.›

lemma disj_induct_mset:
assumes "P {#}"
assumes "⋀n M x. P M ⟹ ¬(x ∈# M) ⟹ n > 0 ⟹ P (M + replicate_mset n x)"
shows "P M"
proof (induction "size M" arbitrary: M rule:nat_less_induct)
case 1
show ?case
proof (cases "M = {#}")
case True
then show ?thesis using assms by simp
next
case False
then obtain x where x_def: "x ∈# M" using multiset_nonemptyE by auto
define M1 where "M1 = M - replicate_mset (count M x) x"
then have M_def: "M = M1 + replicate_mset (count M x) x"
have "size M1 < size M"
by (metis M_def x_def count_greater_zero_iff less_add_same_cancel1 size_replicate_mset size_union)
hence "P M1" using 1 by blast
then show "P M"
apply (subst M_def, rule assms(2), simp)
qed
qed

lemma prod_mset_conv:
fixes f :: "'a ⇒ 'b::{comm_monoid_mult}"
shows "prod_mset (image_mset f A) = prod (λx. f x^(count A x)) (set_mset A)"
proof (induction A rule: disj_induct_mset)
case 1
then show ?case by simp
next
case (2 n M x)
moreover have "count M x = 0" using 2 by (simp add: count_eq_zero_iff)
moreover have "⋀y. y ∈ set_mset M ⟹ y ≠ x" using 2 by blast
ultimately show ?case by (simp add:algebra_simps)
qed

text ‹There is a version @{thm [source] sum_list_map_eq_sum_count} but it doesn't work
if the function maps into the reals.›

lemma sum_list_eval:
fixes f :: "'a ⇒ 'b::{ring,semiring_1}"
shows "sum_list (map f xs) = (∑x ∈ set xs. of_nat (count_list xs x) * f x)"
proof -
define M where "M = mset xs"
have "sum_mset (image_mset f M) = (∑x ∈ set_mset M. of_nat (count M x) * f x)"
proof (induction "M" rule:disj_induct_mset)
case 1
then show ?case by simp
next
case (2 n M x)
have a:"⋀y. y ∈ set_mset M ⟹ y ≠ x" using 2(2) by blast
show ?case using 2 by (simp add:a  count_eq_zero_iff[symmetric])
qed
moreover have "⋀x. count_list xs x = count (mset xs) x"
by (induction xs, simp, simp)
ultimately show ?thesis
qed

lemma prod_list_eval:
fixes f :: "'a ⇒ 'b::{ring,semiring_1,comm_monoid_mult}"
shows "prod_list (map f xs) = (∏x ∈ set xs. (f x)^(count_list xs x))"
proof -
define M where "M = mset xs"
have "prod_mset (image_mset f M) = (∏x ∈ set_mset M. f x ^ (count M x))"
proof (induction "M" rule:disj_induct_mset)
case 1
then show ?case by simp
next
case (2 n M x)
have a:"⋀y. y ∈ set_mset M ⟹ y ≠ x" using 2(2) by blast
have b:"count M x = 0" using 2 by (subst  count_eq_zero_iff) blast
show ?case using 2  by (simp add:a b mult.commute)
qed
moreover have "⋀x. count_list xs x = count (mset xs) x"
by (induction xs, simp, simp)
ultimately show ?thesis
qed

lemma sorted_sorted_list_of_multiset: "sorted (sorted_list_of_multiset M)"
by (induction M, auto simp:sorted_insort)

lemma count_mset: "count (mset xs) a = count_list xs a"
by (induction xs, auto)

lemma swap_filter_image: "filter_mset g (image_mset f A) = image_mset f (filter_mset (g ∘ f) A)"
by (induction A, auto)

lemma list_eq_iff:
assumes "mset xs = mset ys"
assumes "sorted xs"
assumes "sorted ys"
shows "xs = ys"
using assms properties_for_sort by blast

lemma sorted_list_of_multiset_image_commute:
assumes "mono f"
shows "sorted_list_of_multiset (image_mset f M) = map f (sorted_list_of_multiset M)"
proof -
have "sorted (sorted_list_of_multiset (image_mset f M))"
moreover have " sorted_wrt (λx y. f x ≤ f y) (sorted_list_of_multiset M)"
by (rule sorted_wrt_mono_rel[where P="λx y. x ≤ y"])
(auto intro: monoD[OF assms] sorted_sorted_list_of_multiset)
hence "sorted (map f (sorted_list_of_multiset M))"
by (subst sorted_wrt_map)
ultimately show ?thesis
by (intro list_eq_iff, auto)
qed

text ‹Results about rounding and floating point numbers›

lemma round_down_ge:
"x ≤ round_down prec x + 2 powr (-prec)"
using round_down_correct by (simp, meson diff_diff_eq diff_eq_diff_less_eq)

lemma truncate_down_ge:
"x ≤ truncate_down prec x + abs x * 2 powr (-prec)"
proof (cases "abs x > 0")
case True
have "x ≤ round_down (int prec - ⌊log 2 ¦x¦⌋) x + 2 powr (-real_of_int(int prec - ⌊log 2 ¦x¦⌋))"
by (rule round_down_ge)
also have "... ≤ truncate_down prec x + 2 powr ( ⌊log 2 ¦x¦⌋) * 2 powr (-real prec)"
also have "... ≤ truncate_down prec x + ¦x¦ * 2 powr (-real prec)"
using True
finally show ?thesis by simp
next
case False
then show ?thesis by simp
qed

lemma truncate_down_pos:
assumes "x ≥ 0"
shows "x * (1 - 2 powr (-prec)) ≤ truncate_down prec x"
(metis truncate_down_ge assms  abs_of_nonneg)

lemma truncate_down_eq:
assumes "truncate_down r x = truncate_down r y"
shows "abs (x-y) ≤ max (abs x) (abs y) * 2 powr (-real r)"
proof -
have "x - y ≤ truncate_down r x + abs x * 2 powr (-real r) - y"
by (rule diff_right_mono, rule truncate_down_ge)
also have "... ≤ y + abs x * 2 powr (-real r) - y"
using truncate_down_le
by (intro diff_right_mono add_mono, subst assms(1), simp_all)
also have "... ≤ abs x * 2 powr (-real r)" by simp
also have "... ≤ max (abs x) (abs y) * 2 powr (-real r)" by simp
finally have a:"x - y ≤ max (abs x) (abs y) * 2 powr (-real r)" by simp

have "y - x ≤ truncate_down r y + abs y * 2 powr (-real r) - x"
by (rule diff_right_mono, rule truncate_down_ge)
also have "... ≤ x + abs y * 2 powr (-real r) - x"
using truncate_down_le
by (intro diff_right_mono add_mono, subst assms(1)[symmetric], auto)
also have "... ≤ abs y * 2 powr (-real r)" by simp
also have "... ≤ max (abs x) (abs y) * 2 powr (-real r)" by simp
finally have b:"y - x ≤ max (abs x) (abs y) * 2 powr (-real r)" by simp

show ?thesis
using abs_le_iff a b by linarith
qed

definition rat_of_float :: "float ⇒ rat" where
"rat_of_float f = of_int (mantissa f) *
(if exponent f ≥ 0 then 2 ^ (nat (exponent f)) else 1 / 2 ^ (nat (-exponent f)))"

lemma real_of_rat_of_float: "real_of_rat (rat_of_float x) = real_of_float x"
proof -
have "real_of_rat (rat_of_float x) = mantissa x * (2 powr (exponent x))"
by (simp add:rat_of_float_def of_rat_mult of_rat_divide of_rat_power powr_realpow[symmetric] powr_minus_divide)
also have "... = real_of_float x"
using mantissa_exponent by simp
finally show ?thesis by simp
qed

lemma log_est: "log 2 (real n + 1) ≤ n"
proof -
have "1 + real n = real (n + 1)"
by simp
also have "... ≤ real (2 ^ n)"
by (intro of_nat_mono suc_n_le_2_pow_n)
also have "... = 2 powr (real n)"
finally have "1 + real n ≤ 2 powr (real n)"
by simp
thus ?thesis
qed

lemma truncate_mantissa_bound:
"abs (⌊x * 2 powr (real r - real_of_int ⌊log 2 ¦x¦⌋)⌋) ≤ 2 ^ (r+1)" (is "?lhs ≤ _")
proof -
define q where "q = ⌊x * 2 powr (real r - real_of_int (⌊log 2 ¦x¦⌋))⌋"

have "abs q ≤ 2 ^ (r + 1)" if a:"x > 0"
proof -
have "abs q = q"
using a by (intro abs_of_nonneg, simp add:q_def)
also have "... ≤ x * 2 powr (real r - real_of_int ⌊log 2 ¦x¦⌋)"
unfolding q_def using of_int_floor_le by blast
also have "... = x * 2 powr real_of_int (int r - ⌊log 2 ¦x¦⌋)"
by auto
also have "... = 2 powr (log 2 x + real_of_int (int r - ⌊log 2 ¦x¦⌋))"
also have "... ≤ 2 powr (real r + 1)"
using a by (intro powr_mono, linarith+)
also have "... = 2 ^ (r+1)"
finally show "abs q ≤ 2 ^ (r+1)"
by (metis of_int_le_iff of_int_numeral of_int_power)
qed

moreover have "abs q ≤ (2 ^ (r + 1))" if a: "x < 0"
proof -
have "-(2 ^ (r+1) + 1) = -(2 powr (real r + 1)+1)"
also have  "... < -(2 powr (log 2 (- x) + (r - ⌊log 2 ¦x¦⌋)) + 1)"
using a by (simp, linarith)
also have "... = x * 2 powr (r - ⌊log 2 ¦x¦⌋) - 1"
also have "... ≤ q"
also have "... = - abs q"
using a
by (subst abs_of_neg, simp_all add: mult_pos_neg2 q_def)
finally have "-(2 ^ (r+1)+1) < - abs q" using of_int_less_iff by fastforce
hence "-(2 ^ (r+1)) ≤ - abs q" by linarith
thus "abs q ≤ 2^(r+1)" by linarith
qed

moreover have "x = 0 ⟹ abs q ≤ 2^(r+1)"
ultimately have "abs q ≤ 2^(r+1)"
by fastforce
thus ?thesis using q_def by blast
qed

lemma truncate_float_bit_count:
"bit_count (F⇩e (float_of (truncate_down r x))) ≤ 10 + 4 * real r + 2*log 2 (2 + ¦log 2 ¦x¦¦)"
(is "?lhs ≤ ?rhs")
proof -
define m where "m = ⌊x * 2 powr (real r - real_of_int ⌊log 2 ¦x¦⌋)⌋"
define e where "e = ⌊log 2 ¦x¦⌋ - int r"

have a: "(real_of_int ⌊log 2 ¦x¦⌋ - real r) = e"
have "abs m + 2 ≤ 2 ^ (r + 1) + 2^1"
using truncate_mantissa_bound
also have "... ≤ 2 ^ (r+2)"
by simp
finally have b:"abs m + 2 ≤ 2 ^ (r+2)" by simp
hence "real_of_int (¦m¦ + 2) ≤ real_of_int (4 * 2 ^ r)"
by (subst of_int_le_iff, simp)
hence "¦real_of_int m¦ + 2 ≤ 4 * 2 ^ r"
by simp
hence c:"log 2 (real_of_int (¦m¦ + 2)) ≤ r+2"

have "real_of_int (abs e + 1) ≤ real_of_int ¦⌊log 2 ¦x¦⌋¦ +  real_of_int r + 1"
also have "... ≤ 1 + abs (log 2 (abs x)) + real_of_int r + 1"
also have "... ≤ (real_of_int r+ 1) * (2 + abs (log 2 (abs x)))"
finally have d:"real_of_int (abs e + 1) ≤ (real_of_int r+ 1) * (2 + abs (log 2 (abs x)))" by simp

have "log 2 (real_of_int (abs e + 1)) ≤ log 2 (real_of_int r + 1) + log 2 (2 + abs (log 2 (abs x)))"
using d by (simp add: log_mult[symmetric])
also have "... ≤ r + log 2 (2 + abs (log 2 (abs x)))"
finally have e: "log 2 (real_of_int (abs e + 1)) ≤ r + log 2 (2 + abs (log 2 (abs x)))" by simp

have "?lhs =  bit_count (F⇩e (float_of (real_of_int m * 2 powr real_of_int e)))"
by (simp add:truncate_down_def round_down_def m_def[symmetric] a)
also have "... ≤ ereal (6 + (2 * log 2 (real_of_int (¦m¦ + 2)) + 2 * log 2 (real_of_int (¦e¦ + 1))))"
using float_bit_count_2 by simp
also have "... ≤ ereal (6 + (2 * real (r+2) + 2 * (r + log 2 (2 + abs (log 2 (abs x))))))"
using c e
by (subst ereal_less_eq, intro add_mono mult_left_mono, linarith+)
also have "... = ?rhs" by simp
finally show ?thesis by simp
qed

definition prime_above :: "nat ⇒ nat"
where "prime_above n = (SOME x. x ∈ {n..(2*n+2)} ∧ prime x)"

text ‹The term @{term"prime_above n"} returns a prime between @{term "n::nat"} and @{term "2*(n::nat)+2"}.
Because of Bertrand's postulate there always is such a value. In a refinement of the algorithms, it may make sense to
replace this with an algorithm, that finds such a prime exactly or approximately.

The definition is intentionally inexact, to allow refinement with various algorithms, without modifying the
high-level mathematical correctness proof.›

lemma ex_subset:
assumes "∃x ∈ A. P x"
assumes "A ⊆ B"
shows "∃x ∈ B. P x"
using assms by auto

lemma
shows prime_above_prime: "prime (prime_above n)"
and prime_above_range: "prime_above n ∈ {n..(2*n+2)}"
proof -
define r where "r = (λx. x ∈ {n..(2*n+2)} ∧ prime x)"
have "∃x. r x"
proof (cases "n>2")
case True
hence "n-1 > 1" by simp
hence "∃x ∈ {(n-1)<..<(2*(n-1))}. prime x"
using bertrand by simp
moreover have "{n - 1<..<2 * (n - 1)} ⊆ {n..2 * n + 2}"
by (intro subsetI, auto)
ultimately have "∃x ∈ {n..(2*n+2)}. prime x"
by (rule ex_subset)
then show ?thesis by (simp add:r_def Bex_def)
next
case False
hence "2 ∈ {n..(2*n+2)}"
by simp
moreover have "prime (2::nat)"
using two_is_prime_nat by blast
ultimately have "r 2"
using r_def by simp
then show ?thesis by (rule exI)
qed
moreover have "prime_above n = (SOME x. r x)"
ultimately have a:"r (prime_above n)"
using someI_ex by metis
show "prime (prime_above n)"
using a unfolding r_def by blast
show "prime_above n ∈ {n..(2*n+2)}"
using a unfolding r_def by blast
qed

lemma prime_above_min:  "prime_above n ≥ 2"
using prime_above_prime