# Theory Median_Method.Median

section ‹Intervals are Borel measurable›

theory Median
imports
"HOL-Probability.Probability"
"HOL-Library.Multiset"
"Universal_Hash_Families.Universal_Hash_Families_More_Independent_Families"
begin

text ‹This section contains a proof that intervals are Borel measurable, where an interval is
defined as a convex subset of linearly ordered space, more precisely, a set is an interval, if
for each triple of points $x < y < z$: If $x$ and $z$ are in the set so is $y$.
This includes ordinary intervals like @{term "{a..b}"}, @{term "{a<..<b}"} but also for example
@{term [show_types] "{(x::rat). x * x < 2}"} which cannot be expressed in the standard notation.

In the @{theory "HOL-Analysis.Borel_Space"} there are proofs for the measurability of each specific
type of interval, but those unfortunately do not help if we want to express the result about the
median bound for arbitrary types of intervals.›

definition interval :: "('a :: linorder) set ⇒ bool" where
"interval I = (∀x y z. x ∈ I ⟶ z ∈ I ⟶ x ≤ y ⟶ y ≤ z ⟶ y ∈ I)"

definition up_ray :: "('a :: linorder) set ⇒ bool" where
"up_ray I = (∀x y. x ∈ I ⟶ x ≤ y ⟶ y ∈ I)"

lemma up_ray_borel:
assumes "up_ray (I :: (('a :: linorder_topology) set))"
shows "I ∈ borel"
proof (cases "closed I")
case True
then show ?thesis using borel_closed by blast
next
case False
hence b:"¬ closed I" by blast

have "open I"
proof (rule Topological_Spaces.openI)
fix x
assume c:"x ∈ I"
show "∃T. open T ∧ x ∈ T ∧ T ⊆ I"
proof (cases "∃y. y < x ∧ y ∈ I")
case True
then obtain y where a:"y < x ∧ y ∈ I" by blast
have "open {y<..}" by simp
moreover have "x ∈ {y<..}" using a by simp
moreover have "{y<..} ⊆ I"
using a assms(1) by (auto simp: up_ray_def)
ultimately show ?thesis by blast
next
case False
hence "I ⊆ {x..}" using linorder_not_less by auto
moreover have "{x..} ⊆ I"
using c assms(1) unfolding up_ray_def by blast
ultimately have "I = {x..}"
by (rule order_antisym)
moreover have "closed {x..}" by simp
ultimately have "False" using b by auto
then show ?thesis by simp
qed
qed
then show ?thesis by simp
qed

definition down_ray :: "('a :: linorder) set ⇒ bool" where
"down_ray I = (∀x y. y ∈ I ⟶ x ≤ y ⟶ x ∈ I)"

lemma down_ray_borel:
assumes "down_ray (I :: (('a :: linorder_topology) set))"
shows "I ∈ borel"
proof -
have "up_ray (-I)" using assms
by (simp add: up_ray_def down_ray_def, blast)
hence "(-I) ∈ borel" using up_ray_borel by blast
thus "I ∈ borel"
by (metis borel_comp double_complement)
qed

text ‹Main result of this section:›

lemma interval_borel:
assumes "interval (I :: (('a :: linorder_topology) set))"
shows "I ∈ borel"
proof (cases "I = {}")
case True
then show ?thesis by simp
next
case False
then obtain x where a:"x ∈ I" by blast
have "⋀y z. y ∈ I ∪ {x..} ⟹ y ≤ z ⟹ z ∈ I ∪ {x..}"
by (metis assms a interval_def  IntE UnE Un_Int_eq(1) Un_Int_eq(2) atLeast_iff nle_le order.trans)
hence "up_ray (I ∪ {x..})"
using up_ray_def by blast
hence b:"I ∪ {x..} ∈ borel"
using up_ray_borel by blast

have "⋀y z. y ∈ I ∪ {..x} ⟹ z ≤ y ⟹ z ∈ I ∪ {..x}"
by (metis assms a interval_def UnE UnI1 UnI2 atMost_iff dual_order.trans linorder_le_cases)
hence "down_ray (I ∪ {..x})"
using down_ray_def by blast
hence c:"I ∪ {..x} ∈ borel"
using down_ray_borel by blast

have "I = (I ∪ {x..}) ∩ (I ∪ {..x})"
using a by fastforce

then show ?thesis using b c
by (metis sets.Int)
qed

section ‹Order statistics are Borel measurable›

text ‹This section contains a proof that order statistics of Borel measurable random variables are
themselves Borel measurable.

The proof relies on the existence of branch-free comparison-sort algorithms. Given a sequence length
these algorithms perform compare-swap operations on predefined pairs of positions. In particular the
result of a comparison does not affect future operations. An example for a branch-free comparison
sort algorithm is shell-sort and also bubble-sort without the early exit.

The advantage of using such a comparison-sort algorithm is that it can be lifted to work on random
variables, where the result of a comparison-swap operation on two random variables @{term"X"} and
@{term"Y"} can be represented as the expressions @{term "λω. min (X ω) (Y ω)"} and
@{term "λω. max (X ω) (Y ω)"}.

Because taking the point-wise minimum (resp. maximum) of two random variables is still
Borel measurable, and because the entire sorting operation can be represented using such
compare-swap operations, we can show that all order statistics are Borel measuable.›

fun sort_primitive where
"sort_primitive i j f k = (if k = i then min (f i) (f j) else (if k = j then max (f i) (f j) else f k))"

fun sort_map where
"sort_map f n = fold id [sort_primitive j i. i <- [0..<n], j <- [0..<i]] f"

lemma sort_map_ind:
"sort_map f (Suc n) = fold id [sort_primitive j n. j <- [0..<n]] (sort_map f n)"
by simp

lemma sort_map_strict_mono:
fixes f :: "nat ⇒ 'b :: linorder"
shows "j < n ⟹ i < j ⟹ sort_map f n i ≤ sort_map f n j"
proof (induction n arbitrary: i j)
case 0
then show ?case by simp
next
case (Suc n)
define g where "g = (λk. fold id [sort_primitive j n. j <- [0..<k]] (sort_map f n))"
define k where "k = n"
have a:"(∀i j. j < n ⟶ i < j ⟶ g k i ≤ g k j) ∧ (∀l. l < k ⟶ g k l ≤ g k n)"
proof (induction k)
case 0
then show ?case using Suc by (simp add:g_def del:sort_map.simps)
next
case (Suc k)
have "g (Suc k) = sort_primitive k n (g k)"
then show ?case using Suc
apply (cases "g k k ≤ g k n")
using less_antisym apply blast
apply (cases "g k n ≤ g k k")
apply (metis less_antisym max.coboundedI2 max.orderE)
by simp
qed

hence "⋀i j. j < Suc n ⟹ i < j ⟹ g n i ≤ g n j"
apply (simp add:k_def) using less_antisym by blast
moreover have "sort_map f (Suc n) = g n"
ultimately show ?case
using Suc by (simp del:sort_map.simps)
qed

lemma sort_map_mono:
fixes f :: "nat ⇒ 'b :: linorder"
shows "j < n ⟹ i ≤ j ⟹ sort_map f n i ≤ sort_map f n j"
by (metis sort_map_strict_mono eq_iff le_imp_less_or_eq)

lemma sort_map_perm:
fixes f :: "nat ⇒ 'b :: linorder"
shows "image_mset (sort_map f n) (mset [0..<n]) = image_mset f (mset [0..<n])"
proof -
define is_swap where "is_swap = (λ(ts :: ((nat ⇒ 'b) ⇒ nat ⇒ 'b)). ∃i < n. ∃j < n. ts = sort_primitive i j)"
define t :: "((nat ⇒ 'b) ⇒ nat ⇒ 'b) list"
where "t = [sort_primitive j i. i <- [0..<n], j <- [0..<i]]"

have a: "⋀x f. is_swap x ⟹ image_mset (x f) (mset_set {0..<n}) = image_mset f (mset_set {0..<n})"
proof -
fix x
fix f :: "nat ⇒ 'b :: linorder"
assume "is_swap x"
then obtain i j where x_def: "x = sort_primitive i j" and i_bound: "i < n" and j_bound:"j < n"
using is_swap_def by blast
define inv where "inv = mset_set {k. k < n ∧ k ≠ i ∧ k ≠ j}"
have b:"{0..<n} = {k. k < n ∧ k ≠ i ∧ k ≠ j} ∪ {i,j}"
apply (rule order_antisym, rule subsetI, simp, blast, rule subsetI, simp)
using i_bound j_bound by meson
have c:"⋀k. k ∈# inv ⟹ (x f) k = f k"
have "image_mset (x f) inv = image_mset f inv"
apply (rule multiset_eqI)
using c multiset.map_cong0 by force
moreover have "image_mset (x f) (mset_set {i,j}) = image_mset f (mset_set {i,j})"
apply (cases "i = j")
moreover have "mset_set {0..<n} = inv + mset_set {i,j}"
by (simp only:inv_def b, rule mset_set_Union, simp, simp, simp)
ultimately show "image_mset (x f) (mset_set {0..<n}) = image_mset f (mset_set {0..<n})"
by simp
qed

have "(∀x ∈ set t. is_swap x) ⟹ image_mset (fold id t f) (mset [0..<n]) = image_mset f (mset [0..<n])"
by (induction t arbitrary:f, simp, simp add:a)
moreover have "⋀x. x ∈ set t ⟹ is_swap x"
by (meson atLeastLessThan_iff imageE less_imp_le less_le_trans)
ultimately have "image_mset (fold id t f) (mset [0..<n]) = image_mset f (mset [0..<n])" by blast
then show ?thesis by (simp add:t_def)
qed

lemma list_eq_iff:
assumes "mset xs = mset ys"
assumes "sorted xs"
assumes "sorted ys"
shows "xs = ys"
using assms properties_for_sort by blast

lemma sort_map_eq_sort:
fixes f :: "nat ⇒ ('b :: linorder)"
shows "map (sort_map f n) [0..<n] = sort (map f [0..<n])" (is "?A = ?B")
proof -
have "mset ?A = mset ?B"
using sort_map_perm[where f="f" and n="n"]
by (simp del:sort_map.simps)
moreover have "sorted ?B"
by simp
moreover have "sorted ?A"
apply (subst sorted_wrt_iff_nth_less)
apply (simp del:sort_map.simps)
by (metis sort_map_mono nat_less_le)
ultimately show "?A = ?B"
using list_eq_iff by blast
qed

lemma order_statistics_measurable_aux:
fixes X :: "nat ⇒ 'a ⇒ ('b :: {linorder_topology, second_countable_topology})"
assumes "n ≥ 1"
assumes "j < n"
assumes "⋀i. i < n ⟹ X i ∈ measurable M borel"
shows "(λx. (sort_map (λi. X i x) n) j) ∈ measurable M borel"
proof -
have n_ge_0:"n > 0" using assms by simp
define is_swap where "is_swap = (λ(ts :: ((nat ⇒ 'b) ⇒ nat ⇒ 'b)). ∃i < n. ∃j < n. ts = sort_primitive i j)"
define t :: "((nat ⇒ 'b) ⇒ nat ⇒ 'b) list"
where "t = [sort_primitive j i. i <- [0..<n], j <- [0..<i]]"

define meas_ptw :: "(nat ⇒ 'a ⇒ 'b) ⇒ bool"
where "meas_ptw = (λf. (∀k. k < n ⟶ f k ∈ borel_measurable M))"

have ind_step:
"⋀x (g :: nat ⇒ 'a ⇒ 'b). meas_ptw g ⟹ is_swap x ⟹ meas_ptw (λk ω. x (λi. g i ω) k)"
proof -
fix x g
assume "meas_ptw g"
hence a:"⋀k. k < n ⟹ g k ∈ borel_measurable M" by (simp add:meas_ptw_def)
assume "is_swap x"
then obtain i j where x_def:"x=sort_primitive i j" and i_le:"i < n" and j_le:"j < n"
have "⋀k. k < n ⟹ (λω. x (λi. g i ω) k) ∈ borel_measurable M"
proof -
fix k
assume "k < n"
thus " (λω. x (λi. g i ω) k) ∈ borel_measurable M"
apply (cases "k = i", simp)
using a i_le j_le borel_measurable_min apply blast
apply (cases "k = j", simp)
using a i_le j_le borel_measurable_max apply blast
using a by simp
qed
thus "meas_ptw (λk ω. x (λi. g i ω) k)"
qed

have "(∀x ∈ set t. is_swap x) ⟹ meas_ptw (λ k ω. (fold id t (λk. X k ω)) k)"
proof (induction t rule:rev_induct)
case Nil
then show ?case using assms by (simp add:meas_ptw_def)
next
case (snoc x xs)
have a:"meas_ptw (λk ω. fold (λa. a) xs (λk. X k ω) k)" using snoc by simp
have b:"is_swap x" using snoc by simp
show ?case using ind_step[OF a b] by simp
qed
moreover have "⋀x. x ∈ set t ⟹ is_swap x"
by (meson atLeastLessThan_iff imageE less_imp_le less_le_trans)
ultimately show ?thesis using assms
qed

text ‹Main results of this section:›

lemma order_statistics_measurable:
fixes X :: "nat ⇒ 'a ⇒ ('b :: {linorder_topology, second_countable_topology})"
assumes "n ≥ 1"
assumes "j < n"
assumes "⋀i. i < n ⟹ X i ∈ measurable M borel"
shows "(λx. (sort (map (λi. X i x) [0..<n])) ! j) ∈ measurable M borel"
apply (subst sort_map_eq_sort[symmetric])
using assms by (simp add:order_statistics_measurable_aux del:sort_map.simps)

definition median :: "nat ⇒ (nat ⇒ ('a :: linorder)) ⇒ 'a" where
"median n f = sort (map f [0..<n]) ! (n div 2)"

lemma median_measurable:
fixes X :: "nat ⇒ 'a ⇒ ('b :: {linorder_topology, second_countable_topology})"
assumes "n ≥ 1"
assumes "⋀i. i < n ⟹ X i ∈ measurable M borel"
shows "(λx. median n (λi. X i x)) ∈ measurable M borel"
apply (rule order_statistics_measurable[OF assms(1) _ assms(2)])
using assms(1) by force+

section ‹The Median Method›

text ‹This section contains the proof for the probability that the median of independent random
variables will be in an interval with high probability if the individual variables are in the
same interval with probability larger than $\frac{1}{2}$.

The proof starts with the elementary observation that the median of a seqeuence with $n$ elements
is in an interval $I$ if at least half of them are in $I$. This works because after sorting the
sequence the elements that will be in the interval must necessarily form a consecutive subsequence,
if its length is larger than $\frac{n}{2}$ the median must be in it.

The remainder follows the proof in \<^cite>‹‹\textsection 2.1› in "alon1999"› using the Hoeffding inequality
to estimate the probability that at least half of the sequence elements will be in the interval $I$.›

lemma interval_rule:
assumes "interval I"
assumes "a ≤ x" "x ≤ b"
assumes "a ∈ I"
assumes "b ∈ I"
shows "x ∈ I"
using assms by blast

lemma sorted_int:
assumes "interval I"
assumes "sorted xs"
assumes "k < length xs" "i ≤ j" "j ≤ k "
assumes "xs ! i ∈ I" "xs ! k ∈ I"
shows "xs ! j ∈ I"
apply (rule interval_rule[where a="xs ! i" and b="xs ! k"])
using assms by (simp add: sorted_nth_mono)+

lemma mid_in_interval:
assumes "2*length (filter (λx. x ∈ I) xs) > length xs"
assumes "interval I"
assumes "sorted xs"
shows "xs ! (length xs div 2) ∈ I"
proof -
have "length (filter (λx. x ∈ I) xs) > 0"  using assms(1) by linarith
then obtain v where v_1: "v < length xs" and v_2: "xs ! v ∈ I"
by (metis filter_False in_set_conv_nth length_greater_0_conv)

define J where "J = {k. k < length xs ∧ xs ! k ∈ I}"

have card_J_min: "2*card J > length xs"
using assms(1) by (simp add:J_def length_filter_conv_card)

consider
(a) "xs ! (length xs div 2) ∈ I" |
(b) "xs ! (length xs div 2) ∉ I ∧ v > (length xs div 2)" |
(c) "xs ! (length xs div 2) ∉ I ∧ v < (length xs div 2)"
by (metis linorder_cases v_2)
thus ?thesis
proof (cases)
case a
then show ?thesis by simp
next
case b
have p:"⋀k. k ≤ length xs div 2 ⟹ xs ! k ∉ I"
using b v_2 sorted_int[OF assms(2) assms(3) v_1, where j="length xs div 2"] apply simp by blast
hence "card J ≤ card {Suc (length xs div 2)..<length xs}"
unfolding J_def using not_less_eq_eq[symmetric] by (intro card_mono subsetI) auto
hence "card J ≤ length xs - (Suc (length xs div 2))"
using card_atLeastLessThan by metis
hence "length xs ≤ 2*( length xs - (Suc (length xs div 2)))"
using card_J_min by linarith
hence "False" using b v_1 by auto
then show ?thesis by simp
next
case c
have "⋀k. k ≥ length xs div 2 ⟹ k < length xs ⟹ xs ! k ∉ I"
using c v_1 v_2 sorted_int[OF assms(2,3), where i ="v" and j="length xs div 2"]
by simp blast
hence "card J ≤ card {0..<(length xs div 2)}"
unfolding J_def using linorder_le_less_linear by (intro card_mono subsetI) auto
hence "card J ≤ (length xs div 2)"
using card_atLeastLessThan by simp
then show ?thesis using card_J_min by linarith
qed
qed

lemma median_est:
assumes "interval I"
assumes "2*card {k. k < n ∧ f k ∈ I} > n"
shows "median n f ∈ I"
proof -
have "{k. k < n ∧ f k ∈ I} = {i. i < n ∧ map f [0..<n] ! i ∈ I}" by auto
thus ?thesis using assms unfolding median_def
by (intro mid_in_interval[OF _ assms(1),where xs="sort (map f [0..<n])", simplified])
qed

lemma median_est_rev:
assumes "interval I"
assumes "median n f ∉ I"
shows "2*card {k. k < n ∧ f k ∉ I} ≥ n"
proof (rule ccontr)
assume a: "¬(2*card {k. k < n ∧ f k ∉ I} ≥ n)"

have "2 * n = 2 * card {k. k < n}" by simp
also have "... = 2 * card ({k. k < n ∧ f k ∈ I} ∪ {k. k < n ∧ f k ∉ I})"
by (intro arg_cong2[where f="(*)"] refl arg_cong[where f="card"]) auto
also have "... = 2 * card {k. k < n ∧ f k ∈ I} + 2 * card {k. k < n ∧ f k ∉ I}"
by (subst card_Un_disjoint) auto
also have "... ≤ n  + 2 * card {k. k < n ∧ f k ∉ I}"
using median_est[OF assms(1)] assms(2) not_less by (intro add_mono) auto
also have "... < n + n"
using a by (intro add_strict_left_mono) auto
finally show "False" by auto
qed

lemma prod_pmf_bernoulli_mono:
assumes "finite I"
assumes "⋀i. i ∈ I ⟹ 0 ≤ f i ∧ f i ≤ g i ∧ g i ≤ 1"
assumes "⋀x y. x ∈ A ⟹ (∀i ∈ I. x i ≤ y i) ⟹ y ∈ A"
shows "measure (Pi_pmf I d (bernoulli_pmf ∘ f)) A ≤ measure (Pi_pmf I d (bernoulli_pmf ∘ g)) A"
(is "?L ≤ ?R")
proof -
define q where "q i = pmf_of_list [(0::nat, f i), (1, g i - f i), (2, 1 - g i)]" for i

have wf:"pmf_of_list_wf [(0::nat, f i), (1, g i - f i), (2, 1 - g i)]" if "i ∈ I" for i
using assms(2)[OF that] by (intro pmf_of_list_wfI) auto

have 0: "bernoulli_pmf (f i) = map_pmf (λx. x = 0) (q i)" (is "?L1 = ?R1")
if "i ∈ I" for i
proof -
have "0 ≤ f i" "f i ≤ 1" using assms(2)[OF that] by auto
hence "pmf ?L1 x = pmf ?R1 x" for x
unfolding q_def pmf_map measure_pmf_of_list[OF wf[OF that]]
thus ?thesis
by (intro pmf_eqI) auto
qed

have 1: "bernoulli_pmf (g i) = map_pmf (λx. x ∈ {0,1}) (q i)" (is "?L1 = ?R1")
if "i ∈ I" for i
proof -
have "0 ≤ g i" "g i ≤ 1" using assms(2)[OF that] by auto
hence "pmf ?L1 x = pmf ?R1 x" for x
unfolding q_def pmf_map measure_pmf_of_list[OF wf[OF that]]
thus ?thesis
by (intro pmf_eqI) auto
qed

have 2: "(λk. x k = 0) ∈ A ⟹ (λk. x k = 0 ∨ x k = Suc 0) ∈ A" for x
by (erule assms(3)) auto

have "?L = measure (Pi_pmf I d (λi. map_pmf (λx. x = 0) (q i))) A"
unfolding comp_def by (simp add:0 cong: Pi_pmf_cong)
also have "... = measure (map_pmf ((∘) (λx. x = 0)) (Pi_pmf I (if d then 0 else 2) q)) A"
by (intro arg_cong2[where f="measure_pmf.prob"] Pi_pmf_map[OF assms(1)]) auto
also have "... = measure (Pi_pmf I (if d then 0 else 2) q) {x. (λk. x k = 0) ∈ A}"
also have "... ≤ measure (Pi_pmf I (if d then 0 else 2) q) {x. (λk. x k ∈ {0,1}) ∈ A}"
using 2 by (intro measure_pmf.finite_measure_mono subsetI) auto
also have "... = measure (map_pmf ((∘) (λx. x ∈ {0,1})) (Pi_pmf I (if d then 0 else 2) q)) A"
also have "... = measure (Pi_pmf I d (λi. map_pmf (λx. x ∈ {0,1}) (q i))) A"
by (intro arg_cong2[where f="measure_pmf.prob"] Pi_pmf_map[OF assms(1), symmetric]) auto
also have "... = ?R"
unfolding comp_def by (simp add:1 cong: Pi_pmf_cong)
finally show ?thesis by simp
qed

lemma discrete_measure_eqI:
assumes "sets M = count_space UNIV"
assumes "sets N = count_space UNIV"
assumes "countable Ω"
assumes "⋀x. x ∈ Ω ⟹ emeasure M {x} = emeasure N {x} ∧ emeasure M {x} ≠ ∞"
assumes "AE x in M. x ∈ Ω"
assumes "AE x in N. x ∈ Ω"
shows "M = N"
proof -
define E where "E = insert {} ((λx. {x})  Ω)"

have 0: "Int_stable E" unfolding E_def by (intro Int_stableI) auto
have 1: "countable E" using assms(3) unfolding E_def by simp

have "E ⊆ Pow Ω" unfolding E_def by auto
have "emeasure M A = emeasure N A" if A_range: "A ∈ E" for A
using that assms(4) unfolding E_def by auto
moreover have "sets M = sets N" using assms(1,2) by simp
moreover have "Ω ∈ sets M" using assms(1) by simp
moreover have "E ≠ {}" unfolding E_def by simp
moreover have "⋃E = Ω" unfolding E_def by simp
moreover have "emeasure M a ≠ ∞" if "a ∈ E" for a
using that assms(4) unfolding E_def by auto
moreover have "sets (restrict_space M Ω) = Pow Ω"
using assms(1) by (simp add:sets_restrict_space range_inter)
moreover have "sets (restrict_space N Ω) = Pow Ω"
using assms(2) by (simp add:sets_restrict_space range_inter)
moreover have "sigma_sets Ω E = Pow Ω"
unfolding E_def by (intro sigma_sets_singletons_and_empty assms(3))
ultimately show ?thesis
by (intro measure_eqI_restrict_generator[OF 0 _ _ _ _ _ _ assms(5,6) 1]) auto
qed

text ‹Main results of this section:›

text ‹The next theorem establishes a bound for the probability of the median of independent random
variables using the binomial distribution. In a follow-up step, we will establish tail bounds
for the binomial distribution and corresponding median bounds.

This two-step strategy was suggested by Yong Kiam Tan. In a previous version, I only had verified
the exponential tail bound (see theorem \verb+median_bound+ below).›

theorem (in prob_space) median_bound_raw:
fixes I :: "('b :: {linorder_topology, second_countable_topology}) set"
assumes "n > 0" "p ≥ 0"
assumes "interval I"
assumes "indep_vars (λ_. borel) X {0..<n}"
assumes "⋀i. i < n ⟹ 𝒫(ω in M. X i ω ∈ I) ≥ p"
shows "𝒫(ω in M. median n (λi. X i ω) ∈ I) ≥ 1 - measure (binomial_pmf n p) {..n div 2}"
(is "?L ≥ ?R")
proof -
let ?pi = "Pi_pmf {..<n} undefined"
define q where "q i = 𝒫(ω in M. X i ω ∈ I)" for i

have n_ge_1: "n ≥ 1" using assms(1) by simp

have 0: "{k. k < n ∧ (k < n ⟶ X k ω ∈ I)} = {k. k < n ∧ X k ω ∈ I}" for ω
by auto

have "countable ({..<n} →⇩E (UNIV ::  bool set))"
by (intro countable_PiE) auto
hence countable_ext: "countable (extensional {..<n} :: (nat ⇒ bool) set)"
unfolding PiE_def by auto

have m0: "I ∈ sets borel"
using interval_borel[OF assms(3)] by simp

have m1: "random_variable borel (λx. X k x)" if "k ∈ {..<n}" for k
using assms(4) that unfolding indep_vars_def by auto

have m2: "(λx. x ∈ I) ∈ borel →⇩M (measure_pmf ((bernoulli_pmf ∘ q) k))"
for k using m0 by measurable
hence m3: "random_variable (measure_pmf ((bernoulli_pmf ∘ q) k)) (λx. X k x ∈ I)"
if "k ∈ {..<n}" for k
by (intro measurable_compose[OF m1] that)

hence m4: "random_variable (PiM {..<n} (bernoulli_pmf ∘ q)) (λω. λk∈{..<n}. X k ω ∈ I)"
by (intro measurable_restrict) auto
moreover have "A ∈ sets (Pi⇩M {..<n} (λx. measure_pmf (bernoulli_pmf (q x))))"
if "A ⊆ extensional {..<n}" for A
proof -
have "A = (⋃a ∈ A. {a})" by auto
also have "... = (⋃a ∈ A. PiE {..<n} (λk. {a k}))"
using that by (intro arg_cong[where f="Union"] image_cong refl PiE_singleton[symmetric]) auto
also have "... ∈ sets (Pi⇩M {..<n} (λx. measure_pmf (bernoulli_pmf (q x))))"
using that countable_ext countable_subset
by (intro sets.countable_Union countable_image image_subsetI sets_PiM_I_finite) auto
finally show ?thesis by simp
qed
hence m5: "id ∈ (PiM {..<n} (bernoulli_pmf ∘ q)) →⇩M (count_space UNIV)"
by (intro measurableI) (simp_all add:vimage_def space_PiM PiE_def)
ultimately have "random_variable (count_space UNIV) (id ∘ (λω. λk∈{..<n}. X k ω ∈ I))"
by (rule measurable_comp)
hence m6: "random_variable (count_space UNIV) (λω. λk∈{..<n}. X k ω ∈ I)" by simp

have indep: "indep_vars (bernoulli_pmf ∘ q) (λi x. X i x ∈ I) {0..<n}"
by (intro indep_vars_compose2[OF assms(4)] m2)

have "measure M {x ∈ space M. (X k x ∈ I) = ω} = measure (bernoulli_pmf (q k)) {ω}"
if "k < n" for ω k
proof (cases "ω")
case True
then show ?thesis unfolding q_def  by (simp add:measure_pmf_single)
next
case False
have "{x ∈ space M. X k x ∈ I} ∈ events"
using that m0 by (intro measurable_sets_Collect[OF m1]) auto
hence "prob {x ∈ space M. X k x ∉ I} = 1 - prob {ω ∈ space M. X k ω ∈ I}"
by (subst prob_neg) auto
thus ?thesis using False unfolding q_def by (simp add:measure_pmf_single)
qed

hence 1: "emeasure M {x ∈ space M. (X k x ∈ I) = ω} = emeasure (bernoulli_pmf (q k)) {ω}"
if "k < n" for ω k
using that unfolding emeasure_eq_measure measure_pmf.emeasure_eq_measure by simp

interpret product_sigma_finite "(bernoulli_pmf ∘ q)"
by standard

have "distr M (count_space UNIV) (λω. (λk∈{..<n} . X k ω ∈ I)) = distr
(distr M (PiM {..<n} (bernoulli_pmf ∘ q)) (λω. λk∈{..<n}. X k ω ∈ I)) (count_space UNIV) id"
by (subst distr_distr[OF m5 m4]) (simp add:comp_def)
also have "... = distr (PiM {..<n} (λi. (distr M ((bernoulli_pmf ∘ q) i) (λω. X i ω ∈ I) )))
(count_space UNIV) id"
using assms(1) indep atLeast0LessThan by (intro arg_cong2[where f="λx y. distr x y id"]
iffD1[OF indep_vars_iff_distr_eq_PiM'] m3) auto
also have "... = distr (PiM {..<n} (bernoulli_pmf ∘ q)) (count_space UNIV) id"
using m3 1 by (intro distr_cong PiM_cong refl discrete_measure_eqI[where Ω="UNIV"])
also have "... = ?pi (bernoulli_pmf ∘ q)"
proof (rule discrete_measure_eqI[where Ω="extensional {..<n}"], goal_cases)
case 1 show ?case by simp
next
case 2 show ?case by simp
next
case 3 show ?case using countable_ext by simp
next
case (4 x)
have "emeasure (Pi⇩M {..<n} (bernoulli_pmf ∘ q)) {x} =
emeasure (Pi⇩M {..<n} (bernoulli_pmf ∘ q)) (PiE {..<n} (λk. {x k}))"
using PiE_singleton[OF 4] by simp
also have "... = (∏i<n. emeasure (measure_pmf (bernoulli_pmf (q i))) {x i})"
by (subst emeasure_PiM) auto
also have "... = emeasure (Pi_pmf {..<n} undefined (bernoulli_pmf∘q))
(PiE_dflt {..<n} undefined (λk. {x k}))"
unfolding measure_pmf.emeasure_eq_measure
also have "... = emeasure (Pi_pmf {..<n} undefined (bernoulli_pmf∘q)) {x}"
using 4 by (intro arg_cong2[where f="emeasure"]) (auto simp add:PiE_dflt_def extensional_def)
finally have "emeasure (Pi⇩M {..<n} (bernoulli_pmf ∘ q)) {x} =
emeasure (Pi_pmf {..<n} undefined (bernoulli_pmf ∘ q)) {x}"
by simp
thus ?case using 4
by (subst (1 2) emeasure_distr[OF m5]) (simp_all add:vimage_def space_PiM PiE_def)
next
case 5
have "AE x in Pi⇩M {..<n} (bernoulli_pmf ∘ q). x ∈ extensional {..<n}"
by (intro AE_I2) (simp add:space_PiM PiE_def)
then show ?case by (subst AE_distr_iff[OF m5]) simp_all
next
case 6
then show ?case by (intro AE_pmfI) (simp add: set_Pi_pmf PiE_dflt_def extensional_def)
qed
finally have 2: "distr M (count_space UNIV) (λω. (λk∈{..<n}. X k ω ∈ I)) = ?pi (bernoulli_pmf∘q)"
by simp

have 3: "n < 2 * card {k. k < n ∧ y k}" if
"n < 2 * card {k. k < n ∧ x k}" "⋀i. i < n ⟹ x i ⟹ y i" for x y
proof -
have "2 * card {k. k < n ∧ x k} ≤ 2 * card {k. k < n ∧ y k}"
using that(2) by (intro mult_left_mono card_mono) auto
thus ?thesis using that(1) by simp
qed

have 4: "0 ≤ p ∧ p ≤ q i ∧ q i ≤ 1" if "i < n" for i
unfolding q_def using assms(2,5) that by auto

have p_range: "p ∈ {0..1}"
using 4[OF assms(1)] by auto

have "?R = 1 - measure_pmf.prob (binomial_pmf n p) {k. 2 * k ≤ n}"
by (intro arg_cong2[where f="(-)"] arg_cong2[where f="measure_pmf.prob"]) auto
also have "... = measure (binomial_pmf n p) {k. n < 2 * k}"
by (subst measure_pmf.prob_compl[symmetric]) (simp_all add:set_diff_eq not_le)
also have "... = measure (?pi (bernoulli_pmf ∘ (λ_. p))) {ω. n < 2 * card {k. k < n ∧ ω k}}"
using p_range by (subst binomial_pmf_altdef'[where A="{..<n}" and dflt="undefined"]) auto
also have "... ≤ measure (?pi (bernoulli_pmf ∘ q)) {ω. n < 2 * card {k. k < n ∧ ω k}}"
using 3 4 by (intro prod_pmf_bernoulli_mono) auto
also have "... =
𝒫(ω in distr M (count_space UNIV) (λω. λk∈{..<n}. X k ω ∈ I). n<2*card {k. k < n ∧ ω k})"
unfolding 2 by simp
also have "... = 𝒫(ω in M. n < 2*card {k. k < n ∧ X k ω ∈ I})"
by (subst measure_distr[OF m6]) (simp_all add:vimage_def Int_def conj_commute 0)
also have "... ≤ ?L"
using median_est[OF assms(3)] m0 m1
by (intro finite_measure_mono measurable_sets_Collect[OF median_measurable[OF n_ge_1]]) auto
finally show "?R ≤ ?L" by simp
qed

text ‹Cumulative distribution of the binomial distribution (contributed by Yong Kiam Tan):›

lemma prob_binomial_pmf_upto:
assumes "0 ≤ p" "p ≤ 1"
shows "measure_pmf.prob (binomial_pmf n p) {..m} =
sum (λi. real (n choose i) * p^i * (1 - p) ^(n-i)) {0..m}"
by (auto simp: pmf_binomial[OF assms] measure_measure_pmf_finite intro!: sum.cong)

text ‹A tail bound for the binomial distribution using Hoeffding's inequality:›

lemma binomial_pmf_tail:
assumes "p ∈ {0..1}" "real k ≤ real n * p"
shows "measure (binomial_pmf n p) {..k} ≤ exp (- 2 * real n * (p - real k / n)^2)"
(is "?L ≤ ?R")
proof (cases "n = 0")
case True then show ?thesis by simp
next
case False
let ?A = "{..<n}"
let ?pi = "Pi_pmf ?A undefined (λ_. bernoulli_pmf p)"

define μ where "μ = (∑i<n. (∫x. (of_bool (x i) :: real) ∂ ?pi))"
define ε :: real where "ε = μ - k" (* eps ≥ 0 <-> k ≤ mu *)

have "μ = (∑i<n. (∫x. (of_bool x :: real) ∂ (map_pmf (λω. ω i) ?pi)))"
unfolding μ_def by simp
also have "... = (∑i<n. (∫x. (of_bool x :: real) ∂ (bernoulli_pmf p)))"
also have "... = real n * p" using assms(1) by simp
finally have μ_alt: "μ = real n * p"
by simp

have ε_ge_0: "ε ≥ 0"
using assms(2) unfolding ε_def μ_alt by auto

have indep: "prob_space.indep_vars ?pi (λ_. borel) (λk ω. of_bool (ω k)) {..<n}"
by (intro prob_space.indep_vars_compose2[OF prob_space_measure_pmf indep_vars_Pi_pmf]) auto
interpret Hoeffding_ineq "?pi" "{..<n}" "λk ω. of_bool (ω k)" "λ_.0" "λ_.1" μ
using indep unfolding μ_def by (unfold_locales) simp_all

have "?L = measure (map_pmf (λf. card {x ∈ ?A. f x}) ?pi) {..k}"
by (intro arg_cong2[where f="measure_pmf.prob"] binomial_pmf_altdef' assms(1)) auto
also have "... = 𝒫(ω in ?pi. (∑i<n. of_bool (ω i)) ≤ μ - ε)"
unfolding ε_def by (simp add:vimage_def Int_def)
also have "... ≤ exp (- 2 * ε⇧2 / (∑i<n. (1 - 0)⇧2))"
using False by (intro Hoeffding_ineq_le ε_ge_0) auto
also have "... = ?R"
unfolding ε_def μ_alt by (simp add:power2_eq_square field_simps)
finally show ?thesis by simp
qed

theorem (in prob_space) median_bound:
fixes n :: nat
fixes I :: "('b :: {linorder_topology, second_countable_topology}) set"
assumes "interval I"
assumes "α > 0"
assumes "ε ∈ {0<..<1}"
assumes "indep_vars (λ_. borel) X {0..<n}"
assumes "n ≥ - ln ε / (2 * α⇧2)"
assumes "⋀i. i < n ⟹ 𝒫(ω in M. X i ω ∈ I) ≥ 1/2+α"
shows "𝒫(ω in M. median n (λi. X i ω) ∈ I) ≥ 1-ε"
proof -
have "0 < -ln ε / (2 * α⇧2)"
using assms by (intro divide_pos_pos) auto
also have "... ≤ real n" using assms by simp
finally have "real n > 0" by simp
hence n_ge_0:"n > 0" by simp

have d0: "real_of_int ⌊real n / 2⌋ * 2 / real n ≤ 1"
using n_ge_0 by simp linarith

hence d1: "real (nat ⌊real n / 2⌋) ≤ real n * (1 / 2)"
also have "... ≤ real n * (1 / 2 + α)"
using assms(2) by (intro mult_left_mono) auto
finally have d1: "real (nat ⌊real n / 2⌋) ≤ real n * (1 / 2 + α)" by simp

have "1/2 + α ≤ 𝒫(ω in M. X 0 ω ∈ I)" using n_ge_0 by (intro assms(6))
also have "... ≤ 1" by simp
finally have d2: "1 / 2 + α ≤ 1" by simp

have d3: "nat ⌊real n / 2⌋ = n div 2" by linarith

have "1 - ε ≤ 1 - exp (- 2 * real n * α⇧2)"
using assms(2,3,5) by (intro diff_mono order.refl iffD1[OF ln_ge_iff]) (auto simp:field_simps)
also have "... ≤ 1 - exp (- 2 * real n * ((1/2+α) - real (nat ⌊real n/2⌋) / real n)⇧2)"
using d0 n_ge_0 assms(2)
by (intro diff_mono order.refl iffD2[OF exp_le_cancel_iff] mult_left_mono_neg power_mono) auto
also have "... ≤ 1 - measure (binomial_pmf n (1/2+α)) {..nat ⌊real n/2⌋}"
using assms(2) d1 d2 by (intro diff_mono order.refl binomial_pmf_tail) auto
also have "... = 1 - measure (binomial_pmf n (1/2+α)) {..n div 2}" by (simp add:d3)
also have "... ≤ 𝒫(ω in M. median n (λi. X i ω) ∈ I)"
using assms(2) by (intro median_bound_raw n_ge_0 assms(1,4,6) add_nonneg_nonneg) auto
finally show ?thesis by simp
qed

text ‹This is a specialization of the above to closed real intervals.›

corollary (in prob_space) median_bound_1:
assumes "α > 0"
assumes "ε ∈ {0<..<1}"
assumes "indep_vars (λ_. borel) X {0..<n}"
assumes "n ≥ - ln ε / (2 * α⇧2)"
assumes "∀i ∈ {0..<n}. 𝒫(ω in M. X i ω ∈ ({a..b} :: real set)) ≥ 1/2+α"
shows "𝒫(ω in M. median n (λi. X i ω) ∈ {a..b}) ≥ 1-ε"
using assms(5) by (intro median_bound[OF _ assms(1,2,3,4)]) (auto simp:interval_def)

text ‹This is a specialization of the above, where $\alpha = \frac{1}{6}$ and the interval is
described using a mid point @{term "μ"} and radius @{term "δ"}. The choice of
$\alpha = \frac{1}{6}$ implies a success probability per random variable of $\frac{2}{3}$. It is a
commonly chosen success probability for Monte-Carlo algorithms
(cf. \<^cite>‹‹\textsection 4› in "baryossef2002"› or \<^cite>‹‹\textsection 1› in "kane2010"›).›

corollary (in prob_space) median_bound_2:
fixes μ δ :: real
assumes "ε ∈ {0<..<1}"
assumes "indep_vars (λ_. borel) X {0..<n}"
assumes "n ≥ -18 * ln ε"
assumes "⋀i. i < n ⟹ 𝒫(ω in M. abs (X i ω - μ) > δ) ≤ 1/3"
shows "𝒫(ω in M. abs (median n (λi. X i ω) - μ) ≤ δ) ≥ 1-ε"
proof -
have b:"space M - {ω ∈ space M. X i ω ∈ {μ-δ..μ+δ}} = {ω ∈ space M. abs (X i ω - μ) > δ}" for i
by auto

have "⋀i. i < n ⟹ 1 - 𝒫(ω in M. X i ω ∈ {μ- δ..μ+δ}) ≤ 1/3"
using assms
apply (subst prob_compl[symmetric])
by (subst b, simp)

hence a:"⋀i. i < n ⟹ 𝒫(ω in M. X i ω ∈ {μ- δ..μ+δ}) ≥ 2/3" by simp

have "1-ε ≤ 𝒫(ω in M. median n (λi. X i ω) ∈ {μ-δ..μ+δ})" using a assms(3)
by (intro median_bound_1[OF _ assms(1,2), where α="1/6"]) (simp_all add:power2_eq_square)
also have "... = 𝒫(ω in M. abs (median n (λi. X i ω) - μ) ≤ δ)"
by (intro arg_cong2[where f="measure"] Collect_cong) auto
finally show ?thesis by simp
qed

lemma sorted_mono_map:
assumes "sorted xs"
assumes "mono f"
shows "sorted (map f xs)"
using assms(2) unfolding sorted_wrt_map
by (intro sorted_wrt_mono_rel[OF _ assms(1)]) (simp add:mono_def)

text ‹This could be added to @{theory "HOL.List"}:›
lemma map_sort:
assumes "mono f"
shows "sort (map f xs) = map f (sort xs)"
using assms by (intro properties_for_sort sorted_mono_map) auto

lemma median_cong:
assumes "⋀i. i < n ⟹ f i = g i"
shows "median n f = median n g"
unfolding median_def using assms
by (intro arg_cong2[where f="(!)"] arg_cong[where f="sort"] map_cong) auto

lemma median_restrict:
"median n (λi ∈ {0..<n}.f i) = median n f"
by (rule median_cong, simp)

lemma median_commute_mono:
assumes "n > 0"
assumes "mono g"
shows "g (median n f) = median n (g ∘ f)"
apply (subst map_map[symmetric])
apply (subst map_sort[OF assms(2)])
apply (subst nth_map, simp) using assms apply fastforce
by simp

lemma median_rat:
assumes "n > 0"
shows "real_of_rat (median n f) = median n (λi. real_of_rat (f i))"
apply (subst (2) comp_def[where g="f", symmetric])
apply (rule median_commute_mono[OF assms(1)])

lemma median_const:
assumes "k > 0"
shows "median k (λi ∈ {0..<k}. a) = a"
proof -
have b: "sorted (map (λ_. a) [0..<k])"
by (subst sorted_wrt_map, simp)
have a: "sort (map (λ_. a) [0..<k]) = map (λ_. a) [0..<k]"
by (subst sorted_sort_id[OF b], simp)
have "median k (λi ∈ {0..<k}. a) = median k (λ_. a)"
by (subst median_restrict, simp)
also have "... = a" using assms by (simp add:median_def a)
finally show ?thesis by simp
qed

end
`