Theory Median
section ‹Intervals are Borel measurable›
theory Median
imports
"HOL-Probability.Probability"
"HOL-Library.Multiset"
"Universal_Hash_Families.Universal_Hash_Families_More_Independent_Families"
begin
text ‹This section contains a proof that intervals are Borel measurable, where an interval is
defined as a convex subset of linearly ordered space, more precisely, a set is an interval, if
for each triple of points $x < y < z$: If $x$ and $z$ are in the set so is $y$.
This includes ordinary intervals like @{term "{a..b}"}, @{term "{a<..<b}"} but also for example
@{term [show_types] "{(x::rat). x * x < 2}"} which cannot be expressed in the standard notation.
In the @{theory "HOL-Analysis.Borel_Space"} there are proofs for the measurability of each specific
type of interval, but those unfortunately do not help if we want to express the result about the
median bound for arbitrary types of intervals.›
definition interval :: "('a :: linorder) set ⇒ bool" where
"interval I = (∀x y z. x ∈ I ⟶ z ∈ I ⟶ x ≤ y ⟶ y ≤ z ⟶ y ∈ I)"
definition up_ray :: "('a :: linorder) set ⇒ bool" where
"up_ray I = (∀x y. x ∈ I ⟶ x ≤ y ⟶ y ∈ I)"
lemma up_ray_borel:
assumes "up_ray (I :: (('a :: linorder_topology) set))"
shows "I ∈ borel"
proof (cases "closed I")
case True
then show ?thesis using borel_closed by blast
next
case False
hence b:"¬ closed I" by blast
have "open I"
proof (rule Topological_Spaces.openI)
fix x
assume c:"x ∈ I"
show "∃T. open T ∧ x ∈ T ∧ T ⊆ I"
proof (cases "∃y. y < x ∧ y ∈ I")
case True
then obtain y where a:"y < x ∧ y ∈ I" by blast
have "open {y<..}" by simp
moreover have "x ∈ {y<..}" using a by simp
moreover have "{y<..} ⊆ I"
using a assms(1) by (auto simp: up_ray_def)
ultimately show ?thesis by blast
next
case False
hence "I ⊆ {x..}" using linorder_not_less by auto
moreover have "{x..} ⊆ I"
using c assms(1) unfolding up_ray_def by blast
ultimately have "I = {x..}"
by (rule order_antisym)
moreover have "closed {x..}" by simp
ultimately have "False" using b by auto
then show ?thesis by simp
qed
qed
then show ?thesis by simp
qed
definition down_ray :: "('a :: linorder) set ⇒ bool" where
"down_ray I = (∀x y. y ∈ I ⟶ x ≤ y ⟶ x ∈ I)"
lemma down_ray_borel:
assumes "down_ray (I :: (('a :: linorder_topology) set))"
shows "I ∈ borel"
proof -
have "up_ray (-I)" using assms
by (simp add: up_ray_def down_ray_def, blast)
hence "(-I) ∈ borel" using up_ray_borel by blast
thus "I ∈ borel"
by (metis borel_comp double_complement)
qed
text ‹Main result of this section:›
lemma interval_borel:
assumes "interval (I :: (('a :: linorder_topology) set))"
shows "I ∈ borel"
proof (cases "I = {}")
case True
then show ?thesis by simp
next
case False
then obtain x where a:"x ∈ I" by blast
have "⋀y z. y ∈ I ∪ {x..} ⟹ y ≤ z ⟹ z ∈ I ∪ {x..}"
by (metis assms a interval_def IntE UnE Un_Int_eq(1) Un_Int_eq(2) atLeast_iff nle_le order.trans)
hence "up_ray (I ∪ {x..})"
using up_ray_def by blast
hence b:"I ∪ {x..} ∈ borel"
using up_ray_borel by blast
have "⋀y z. y ∈ I ∪ {..x} ⟹ z ≤ y ⟹ z ∈ I ∪ {..x}"
by (metis assms a interval_def UnE UnI1 UnI2 atMost_iff dual_order.trans linorder_le_cases)
hence "down_ray (I ∪ {..x})"
using down_ray_def by blast
hence c:"I ∪ {..x} ∈ borel"
using down_ray_borel by blast
have "I = (I ∪ {x..}) ∩ (I ∪ {..x})"
using a by fastforce
then show ?thesis using b c
by (metis sets.Int)
qed
section ‹Order statistics are Borel measurable›
text ‹This section contains a proof that order statistics of Borel measurable random variables are
themselves Borel measurable.
The proof relies on the existence of branch-free comparison-sort algorithms. Given a sequence length
these algorithms perform compare-swap operations on predefined pairs of positions. In particular the
result of a comparison does not affect future operations. An example for a branch-free comparison
sort algorithm is shell-sort and also bubble-sort without the early exit.
The advantage of using such a comparison-sort algorithm is that it can be lifted to work on random
variables, where the result of a comparison-swap operation on two random variables @{term"X"} and
@{term"Y"} can be represented as the expressions @{term "λω. min (X ω) (Y ω)"} and
@{term "λω. max (X ω) (Y ω)"}.
Because taking the point-wise minimum (resp. maximum) of two random variables is still
Borel measurable, and because the entire sorting operation can be represented using such
compare-swap operations, we can show that all order statistics are Borel measuable.›
fun sort_primitive where
"sort_primitive i j f k = (if k = i then min (f i) (f j) else (if k = j then max (f i) (f j) else f k))"
fun sort_map where
"sort_map f n = fold id [sort_primitive j i. i <- [0..<n], j <- [0..<i]] f"
lemma sort_map_ind:
"sort_map f (Suc n) = fold id [sort_primitive j n. j <- [0..<n]] (sort_map f n)"
by simp
lemma sort_map_strict_mono:
fixes f :: "nat ⇒ 'b :: linorder"
shows "j < n ⟹ i < j ⟹ sort_map f n i ≤ sort_map f n j"
proof (induction n arbitrary: i j)
case 0
then show ?case by simp
next
case (Suc n)
define g where "g = (λk. fold id [sort_primitive j n. j <- [0..<k]] (sort_map f n))"
define k where "k = n"
have a:"(∀i j. j < n ⟶ i < j ⟶ g k i ≤ g k j) ∧ (∀l. l < k ⟶ g k l ≤ g k n)"
proof (induction k)
case 0
then show ?case using Suc by (simp add:g_def del:sort_map.simps)
next
case (Suc k)
have "g (Suc k) = sort_primitive k n (g k)"
by (simp add:g_def)
then show ?case using Suc
apply (cases "g k k ≤ g k n")
apply (simp add:min_def max_def)
using less_antisym apply blast
apply (cases "g k n ≤ g k k")
apply (simp add:min_def max_def)
apply (metis less_antisym max.coboundedI2 max.orderE)
by simp
qed
hence "⋀i j. j < Suc n ⟹ i < j ⟹ g n i ≤ g n j"
apply (simp add:k_def) using less_antisym by blast
moreover have "sort_map f (Suc n) = g n"
by (simp add:sort_map_ind g_def del:sort_map.simps)
ultimately show ?case
using Suc by (simp del:sort_map.simps)
qed
lemma sort_map_mono:
fixes f :: "nat ⇒ 'b :: linorder"
shows "j < n ⟹ i ≤ j ⟹ sort_map f n i ≤ sort_map f n j"
by (metis sort_map_strict_mono eq_iff le_imp_less_or_eq)
lemma sort_map_perm:
fixes f :: "nat ⇒ 'b :: linorder"
shows "image_mset (sort_map f n) (mset [0..<n]) = image_mset f (mset [0..<n])"
proof -
define is_swap where "is_swap = (λ(ts :: ((nat ⇒ 'b) ⇒ nat ⇒ 'b)). ∃i < n. ∃j < n. ts = sort_primitive i j)"
define t :: "((nat ⇒ 'b) ⇒ nat ⇒ 'b) list"
where "t = [sort_primitive j i. i <- [0..<n], j <- [0..<i]]"
have a: "⋀x f. is_swap x ⟹ image_mset (x f) (mset_set {0..<n}) = image_mset f (mset_set {0..<n})"
proof -
fix x
fix f :: "nat ⇒ 'b :: linorder"
assume "is_swap x"
then obtain i j where x_def: "x = sort_primitive i j" and i_bound: "i < n" and j_bound:"j < n"
using is_swap_def by blast
define inv where "inv = mset_set {k. k < n ∧ k ≠ i ∧ k ≠ j}"
have b:"{0..<n} = {k. k < n ∧ k ≠ i ∧ k ≠ j} ∪ {i,j}"
apply (rule order_antisym, rule subsetI, simp, blast, rule subsetI, simp)
using i_bound j_bound by meson
have c:"⋀k. k ∈# inv ⟹ (x f) k = f k"
by (simp add:x_def inv_def)
have "image_mset (x f) inv = image_mset f inv"
apply (rule multiset_eqI)
using c multiset.map_cong0 by force
moreover have "image_mset (x f) (mset_set {i,j}) = image_mset f (mset_set {i,j})"
apply (cases "i = j")
by (simp add:x_def max_def min_def)+
moreover have "mset_set {0..<n} = inv + mset_set {i,j}"
by (simp only:inv_def b, rule mset_set_Union, simp, simp, simp)
ultimately show "image_mset (x f) (mset_set {0..<n}) = image_mset f (mset_set {0..<n})"
by simp
qed
have "(∀x ∈ set t. is_swap x) ⟹ image_mset (fold id t f) (mset [0..<n]) = image_mset f (mset [0..<n])"
by (induction t arbitrary:f, simp, simp add:a)
moreover have "⋀x. x ∈ set t ⟹ is_swap x"
apply (simp add:t_def is_swap_def)
by (meson atLeastLessThan_iff imageE less_imp_le less_le_trans)
ultimately have "image_mset (fold id t f) (mset [0..<n]) = image_mset f (mset [0..<n])" by blast
then show ?thesis by (simp add:t_def)
qed
lemma list_eq_iff:
assumes "mset xs = mset ys"
assumes "sorted xs"
assumes "sorted ys"
shows "xs = ys"
using assms properties_for_sort by blast
lemma sort_map_eq_sort:
fixes f :: "nat ⇒ ('b :: linorder)"
shows "map (sort_map f n) [0..<n] = sort (map f [0..<n])" (is "?A = ?B")
proof -
have "mset ?A = mset ?B"
using sort_map_perm[where f="f" and n="n"]
by (simp del:sort_map.simps)
moreover have "sorted ?B"
by simp
moreover have "sorted ?A"
apply (subst sorted_wrt_iff_nth_less)
apply (simp del:sort_map.simps)
by (metis sort_map_mono nat_less_le)
ultimately show "?A = ?B"
using list_eq_iff by blast
qed
lemma order_statistics_measurable_aux:
fixes X :: "nat ⇒ 'a ⇒ ('b :: {linorder_topology, second_countable_topology})"
assumes "n ≥ 1"
assumes "j < n"
assumes "⋀i. i < n ⟹ X i ∈ measurable M borel"
shows "(λx. (sort_map (λi. X i x) n) j) ∈ measurable M borel"
proof -
have n_ge_0:"n > 0" using assms by simp
define is_swap where "is_swap = (λ(ts :: ((nat ⇒ 'b) ⇒ nat ⇒ 'b)). ∃i < n. ∃j < n. ts = sort_primitive i j)"
define t :: "((nat ⇒ 'b) ⇒ nat ⇒ 'b) list"
where "t = [sort_primitive j i. i <- [0..<n], j <- [0..<i]]"
define meas_ptw :: "(nat ⇒ 'a ⇒ 'b) ⇒ bool"
where "meas_ptw = (λf. (∀k. k < n ⟶ f k ∈ borel_measurable M))"
have ind_step:
"⋀x (g :: nat ⇒ 'a ⇒ 'b). meas_ptw g ⟹ is_swap x ⟹ meas_ptw (λk ω. x (λi. g i ω) k)"
proof -
fix x g
assume "meas_ptw g"
hence a:"⋀k. k < n ⟹ g k ∈ borel_measurable M" by (simp add:meas_ptw_def)
assume "is_swap x"
then obtain i j where x_def:"x=sort_primitive i j" and i_le:"i < n" and j_le:"j < n"
by (simp add:is_swap_def, blast)
have "⋀k. k < n ⟹ (λω. x (λi. g i ω) k) ∈ borel_measurable M"
proof -
fix k
assume "k < n"
thus " (λω. x (λi. g i ω) k) ∈ borel_measurable M"
apply (simp add:x_def)
apply (cases "k = i", simp)
using a i_le j_le borel_measurable_min apply blast
apply (cases "k = j", simp)
using a i_le j_le borel_measurable_max apply blast
using a by simp
qed
thus "meas_ptw (λk ω. x (λi. g i ω) k)"
by (simp add:meas_ptw_def)
qed
have "(∀x ∈ set t. is_swap x) ⟹ meas_ptw (λ k ω. (fold id t (λk. X k ω)) k)"
proof (induction t rule:rev_induct)
case Nil
then show ?case using assms by (simp add:meas_ptw_def)
next
case (snoc x xs)
have a:"meas_ptw (λk ω. fold (λa. a) xs (λk. X k ω) k)" using snoc by simp
have b:"is_swap x" using snoc by simp
show ?case using ind_step[OF a b] by simp
qed
moreover have "⋀x. x ∈ set t ⟹ is_swap x"
apply (simp add:t_def is_swap_def)
by (meson atLeastLessThan_iff imageE less_imp_le less_le_trans)
ultimately show ?thesis using assms
by (simp add:t_def[symmetric] meas_ptw_def)
qed
text ‹Main results of this section:›
lemma order_statistics_measurable:
fixes X :: "nat ⇒ 'a ⇒ ('b :: {linorder_topology, second_countable_topology})"
assumes "n ≥ 1"
assumes "j < n"
assumes "⋀i. i < n ⟹ X i ∈ measurable M borel"
shows "(λx. (sort (map (λi. X i x) [0..<n])) ! j) ∈ measurable M borel"
apply (subst sort_map_eq_sort[symmetric])
using assms by (simp add:order_statistics_measurable_aux del:sort_map.simps)
definition median :: "nat ⇒ (nat ⇒ ('a :: linorder)) ⇒ 'a" where
"median n f = sort (map f [0..<n]) ! (n div 2)"
lemma median_measurable:
fixes X :: "nat ⇒ 'a ⇒ ('b :: {linorder_topology, second_countable_topology})"
assumes "n ≥ 1"
assumes "⋀i. i < n ⟹ X i ∈ measurable M borel"
shows "(λx. median n (λi. X i x)) ∈ measurable M borel"
apply (simp add:median_def)
apply (rule order_statistics_measurable[OF assms(1) _ assms(2)])
using assms(1) by force+
section ‹The Median Method›
text ‹This section contains the proof for the probability that the median of independent random
variables will be in an interval with high probability if the individual variables are in the
same interval with probability larger than $\frac{1}{2}$.
The proof starts with the elementary observation that the median of a seqeuence with $n$ elements
is in an interval $I$ if at least half of them are in $I$. This works because after sorting the
sequence the elements that will be in the interval must necessarily form a consecutive subsequence,
if its length is larger than $\frac{n}{2}$ the median must be in it.
The remainder follows the proof in \<^cite>‹‹\textsection 2.1› in "alon1999"› using the Hoeffding inequality
to estimate the probability that at least half of the sequence elements will be in the interval $I$.›
lemma interval_rule:
assumes "interval I"
assumes "a ≤ x" "x ≤ b"
assumes "a ∈ I"
assumes "b ∈ I"
shows "x ∈ I"
using assms(1) apply (simp add:interval_def)
using assms by blast
lemma sorted_int:
assumes "interval I"
assumes "sorted xs"
assumes "k < length xs" "i ≤ j" "j ≤ k "
assumes "xs ! i ∈ I" "xs ! k ∈ I"
shows "xs ! j ∈ I"
apply (rule interval_rule[where a="xs ! i" and b="xs ! k"])
using assms by (simp add: sorted_nth_mono)+
lemma mid_in_interval:
assumes "2*length (filter (λx. x ∈ I) xs) > length xs"
assumes "interval I"
assumes "sorted xs"
shows "xs ! (length xs div 2) ∈ I"
proof -
have "length (filter (λx. x ∈ I) xs) > 0" using assms(1) by linarith
then obtain v where v_1: "v < length xs" and v_2: "xs ! v ∈ I"
by (metis filter_False in_set_conv_nth length_greater_0_conv)
define J where "J = {k. k < length xs ∧ xs ! k ∈ I}"
have card_J_min: "2*card J > length xs"
using assms(1) by (simp add:J_def length_filter_conv_card)
consider
(a) "xs ! (length xs div 2) ∈ I" |
(b) "xs ! (length xs div 2) ∉ I ∧ v > (length xs div 2)" |
(c) "xs ! (length xs div 2) ∉ I ∧ v < (length xs div 2)"
by (metis linorder_cases v_2)
thus ?thesis
proof (cases)
case a
then show ?thesis by simp
next
case b
have p:"⋀k. k ≤ length xs div 2 ⟹ xs ! k ∉ I"
using b v_2 sorted_int[OF assms(2) assms(3) v_1, where j="length xs div 2"] apply simp by blast
hence "card J ≤ card {Suc (length xs div 2)..<length xs}"
unfolding J_def using not_less_eq_eq[symmetric] by (intro card_mono subsetI) auto
hence "card J ≤ length xs - (Suc (length xs div 2))"
using card_atLeastLessThan by metis
hence "length xs ≤ 2*( length xs - (Suc (length xs div 2)))"
using card_J_min by linarith
hence "False" using b v_1 by auto
then show ?thesis by simp
next
case c
have "⋀k. k ≥ length xs div 2 ⟹ k < length xs ⟹ xs ! k ∉ I"
using c v_1 v_2 sorted_int[OF assms(2,3), where i ="v" and j="length xs div 2"]
by simp blast
hence "card J ≤ card {0..<(length xs div 2)}"
unfolding J_def using linorder_le_less_linear by (intro card_mono subsetI) auto
hence "card J ≤ (length xs div 2)"
using card_atLeastLessThan by simp
then show ?thesis using card_J_min by linarith
qed
qed
lemma median_est:
assumes "interval I"
assumes "2*card {k. k < n ∧ f k ∈ I} > n"
shows "median n f ∈ I"
proof -
have "{k. k < n ∧ f k ∈ I} = {i. i < n ∧ map f [0..<n] ! i ∈ I}" by auto
thus ?thesis using assms unfolding median_def
by (intro mid_in_interval[OF _ assms(1),where xs="sort (map f [0..<n])", simplified])
(simp_all add:filter_sort comp_def length_filter_conv_card)
qed
lemma median_est_rev:
assumes "interval I"
assumes "median n f ∉ I"
shows "2*card {k. k < n ∧ f k ∉ I} ≥ n"
proof (rule ccontr)
assume a: "¬(2*card {k. k < n ∧ f k ∉ I} ≥ n)"
have "2 * n = 2 * card {k. k < n}" by simp
also have "... = 2 * card ({k. k < n ∧ f k ∈ I} ∪ {k. k < n ∧ f k ∉ I})"
by (intro arg_cong2[where f="(*)"] refl arg_cong[where f="card"]) auto
also have "... = 2 * card {k. k < n ∧ f k ∈ I} + 2 * card {k. k < n ∧ f k ∉ I}"
by (subst card_Un_disjoint) auto
also have "... ≤ n + 2 * card {k. k < n ∧ f k ∉ I}"
using median_est[OF assms(1)] assms(2) not_less by (intro add_mono) auto
also have "... < n + n"
using a by (intro add_strict_left_mono) auto
finally show "False" by auto
qed
lemma prod_pmf_bernoulli_mono:
assumes "finite I"
assumes "⋀i. i ∈ I ⟹ 0 ≤ f i ∧ f i ≤ g i ∧ g i ≤ 1"
assumes "⋀x y. x ∈ A ⟹ (∀i ∈ I. x i ≤ y i) ⟹ y ∈ A"
shows "measure (Pi_pmf I d (bernoulli_pmf ∘ f)) A ≤ measure (Pi_pmf I d (bernoulli_pmf ∘ g)) A"
(is "?L ≤ ?R")
proof -
define q where "q i = pmf_of_list [(0::nat, f i), (1, g i - f i), (2, 1 - g i)]" for i
have wf:"pmf_of_list_wf [(0::nat, f i), (1, g i - f i), (2, 1 - g i)]" if "i ∈ I" for i
using assms(2)[OF that] by (intro pmf_of_list_wfI) auto
have 0: "bernoulli_pmf (f i) = map_pmf (λx. x = 0) (q i)" (is "?L1 = ?R1")
if "i ∈ I" for i
proof -
have "0 ≤ f i" "f i ≤ 1" using assms(2)[OF that] by auto
hence "pmf ?L1 x = pmf ?R1 x" for x
unfolding q_def pmf_map measure_pmf_of_list[OF wf[OF that]]
by (cases x;simp_all add:vimage_def)
thus ?thesis
by (intro pmf_eqI) auto
qed
have 1: "bernoulli_pmf (g i) = map_pmf (λx. x ∈ {0,1}) (q i)" (is "?L1 = ?R1")
if "i ∈ I" for i
proof -
have "0 ≤ g i" "g i ≤ 1" using assms(2)[OF that] by auto
hence "pmf ?L1 x = pmf ?R1 x" for x
unfolding q_def pmf_map measure_pmf_of_list[OF wf[OF that]]
by (cases x;simp_all add:vimage_def)
thus ?thesis
by (intro pmf_eqI) auto
qed
have 2: "(λk. x k = 0) ∈ A ⟹ (λk. x k = 0 ∨ x k = Suc 0) ∈ A" for x
by (erule assms(3)) auto
have "?L = measure (Pi_pmf I d (λi. map_pmf (λx. x = 0) (q i))) A"
unfolding comp_def by (simp add:0 cong: Pi_pmf_cong)
also have "... = measure (map_pmf ((∘) (λx. x = 0)) (Pi_pmf I (if d then 0 else 2) q)) A"
by (intro arg_cong2[where f="measure_pmf.prob"] Pi_pmf_map[OF assms(1)]) auto
also have "... = measure (Pi_pmf I (if d then 0 else 2) q) {x. (λk. x k = 0) ∈ A}"
by (simp add:comp_def vimage_def)
also have "... ≤ measure (Pi_pmf I (if d then 0 else 2) q) {x. (λk. x k ∈ {0,1}) ∈ A}"
using 2 by (intro measure_pmf.finite_measure_mono subsetI) auto
also have "... = measure (map_pmf ((∘) (λx. x ∈ {0,1})) (Pi_pmf I (if d then 0 else 2) q)) A"
by (simp add:vimage_def comp_def)
also have "... = measure (Pi_pmf I d (λi. map_pmf (λx. x ∈ {0,1}) (q i))) A"
by (intro arg_cong2[where f="measure_pmf.prob"] Pi_pmf_map[OF assms(1), symmetric]) auto
also have "... = ?R"
unfolding comp_def by (simp add:1 cong: Pi_pmf_cong)
finally show ?thesis by simp
qed
lemma discrete_measure_eqI:
assumes "sets M = count_space UNIV"
assumes "sets N = count_space UNIV"
assumes "countable Ω"
assumes "⋀x. x ∈ Ω ⟹ emeasure M {x} = emeasure N {x} ∧ emeasure M {x} ≠ ∞"
assumes "AE x in M. x ∈ Ω"
assumes "AE x in N. x ∈ Ω"
shows "M = N"
proof -
define E where "E = insert {} ((λx. {x}) ` Ω)"
have 0: "Int_stable E" unfolding E_def by (intro Int_stableI) auto
have 1: "countable E" using assms(3) unfolding E_def by simp
have "E ⊆ Pow Ω" unfolding E_def by auto
have "emeasure M A = emeasure N A" if A_range: "A ∈ E" for A
using that assms(4) unfolding E_def by auto
moreover have "sets M = sets N" using assms(1,2) by simp
moreover have "Ω ∈ sets M" using assms(1) by simp
moreover have "E ≠ {}" unfolding E_def by simp
moreover have "⋃E = Ω" unfolding E_def by simp
moreover have "emeasure M a ≠ ∞" if "a ∈ E" for a
using that assms(4) unfolding E_def by auto
moreover have "sets (restrict_space M Ω) = Pow Ω"
using assms(1) by (simp add:sets_restrict_space range_inter)
moreover have "sets (restrict_space N Ω) = Pow Ω"
using assms(2) by (simp add:sets_restrict_space range_inter)
moreover have "sigma_sets Ω E = Pow Ω"
unfolding E_def by (intro sigma_sets_singletons_and_empty assms(3))
ultimately show ?thesis
by (intro measure_eqI_restrict_generator[OF 0 _ _ _ _ _ _ assms(5,6) 1]) auto
qed
text ‹Main results of this section:›
text ‹The next theorem establishes a bound for the probability of the median of independent random
variables using the binomial distribution. In a follow-up step, we will establish tail bounds
for the binomial distribution and corresponding median bounds.
This two-step strategy was suggested by Yong Kiam Tan. In a previous version, I only had verified
the exponential tail bound (see theorem \verb+median_bound+ below).›
theorem (in prob_space) median_bound_raw:
fixes I :: "('b :: {linorder_topology, second_countable_topology}) set"
assumes "n > 0" "p ≥ 0"
assumes "interval I"
assumes "indep_vars (λ_. borel) X {0..<n}"
assumes "⋀i. i < n ⟹ 𝒫(ω in M. X i ω ∈ I) ≥ p"
shows "𝒫(ω in M. median n (λi. X i ω) ∈ I) ≥ 1 - measure (binomial_pmf n p) {..n div 2}"
(is "?L ≥ ?R")
proof -
let ?pi = "Pi_pmf {..<n} undefined"
define q where "q i = 𝒫(ω in M. X i ω ∈ I)" for i
have n_ge_1: "n ≥ 1" using assms(1) by simp
have 0: "{k. k < n ∧ (k < n ⟶ X k ω ∈ I)} = {k. k < n ∧ X k ω ∈ I}" for ω
by auto
have "countable ({..<n} →⇩E (UNIV :: bool set))"
by (intro countable_PiE) auto
hence countable_ext: "countable (extensional {..<n} :: (nat ⇒ bool) set)"
unfolding PiE_def by auto
have m0: "I ∈ sets borel"
using interval_borel[OF assms(3)] by simp
have m1: "random_variable borel (λx. X k x)" if "k ∈ {..<n}" for k
using assms(4) that unfolding indep_vars_def by auto
have m2: "(λx. x ∈ I) ∈ borel →⇩M (measure_pmf ((bernoulli_pmf ∘ q) k))"
for k using m0 by measurable
hence m3: "random_variable (measure_pmf ((bernoulli_pmf ∘ q) k)) (λx. X k x ∈ I)"
if "k ∈ {..<n}" for k
by (intro measurable_compose[OF m1] that)
hence m4: "random_variable (PiM {..<n} (bernoulli_pmf ∘ q)) (λω. λk∈{..<n}. X k ω ∈ I)"
by (intro measurable_restrict) auto
moreover have "A ∈ sets (Pi⇩M {..<n} (λx. measure_pmf (bernoulli_pmf (q x))))"
if "A ⊆ extensional {..<n}" for A
proof -
have "A = (⋃a ∈ A. {a})" by auto
also have "... = (⋃a ∈ A. PiE {..<n} (λk. {a k}))"
using that by (intro arg_cong[where f="Union"] image_cong refl PiE_singleton[symmetric]) auto
also have "... ∈ sets (Pi⇩M {..<n} (λx. measure_pmf (bernoulli_pmf (q x))))"
using that countable_ext countable_subset
by (intro sets.countable_Union countable_image image_subsetI sets_PiM_I_finite) auto
finally show ?thesis by simp
qed
hence m5: "id ∈ (PiM {..<n} (bernoulli_pmf ∘ q)) →⇩M (count_space UNIV)"
by (intro measurableI) (simp_all add:vimage_def space_PiM PiE_def)
ultimately have "random_variable (count_space UNIV) (id ∘ (λω. λk∈{..<n}. X k ω ∈ I))"
by (rule measurable_comp)
hence m6: "random_variable (count_space UNIV) (λω. λk∈{..<n}. X k ω ∈ I)" by simp
have indep: "indep_vars (bernoulli_pmf ∘ q) (λi x. X i x ∈ I) {0..<n}"
by (intro indep_vars_compose2[OF assms(4)] m2)
have "measure M {x ∈ space M. (X k x ∈ I) = ω} = measure (bernoulli_pmf (q k)) {ω}"
if "k < n" for ω k
proof (cases "ω")
case True
then show ?thesis unfolding q_def by (simp add:measure_pmf_single)
next
case False
have "{x ∈ space M. X k x ∈ I} ∈ events"
using that m0 by (intro measurable_sets_Collect[OF m1]) auto
hence "prob {x ∈ space M. X k x ∉ I} = 1 - prob {ω ∈ space M. X k ω ∈ I}"
by (subst prob_neg) auto
thus ?thesis using False unfolding q_def by (simp add:measure_pmf_single)
qed
hence 1: "emeasure M {x ∈ space M. (X k x ∈ I) = ω} = emeasure (bernoulli_pmf (q k)) {ω}"
if "k < n" for ω k
using that unfolding emeasure_eq_measure measure_pmf.emeasure_eq_measure by simp
interpret product_sigma_finite "(bernoulli_pmf ∘ q)"
by standard
have "distr M (count_space UNIV) (λω. (λk∈{..<n} . X k ω ∈ I)) = distr
(distr M (PiM {..<n} (bernoulli_pmf ∘ q)) (λω. λk∈{..<n}. X k ω ∈ I)) (count_space UNIV) id"
by (subst distr_distr[OF m5 m4]) (simp add:comp_def)
also have "... = distr (PiM {..<n} (λi. (distr M ((bernoulli_pmf ∘ q) i) (λω. X i ω ∈ I) )))
(count_space UNIV) id"
using assms(1) indep atLeast0LessThan by (intro arg_cong2[where f="λx y. distr x y id"]
iffD1[OF indep_vars_iff_distr_eq_PiM'] m3) auto
also have "... = distr (PiM {..<n} (bernoulli_pmf ∘ q)) (count_space UNIV) id"
using m3 1 by (intro distr_cong PiM_cong refl discrete_measure_eqI[where Ω="UNIV"])
(simp_all add:emeasure_distr vimage_def Int_def conj_commute)
also have "... = ?pi (bernoulli_pmf ∘ q)"
proof (rule discrete_measure_eqI[where Ω="extensional {..<n}"], goal_cases)
case 1 show ?case by simp
next
case 2 show ?case by simp
next
case 3 show ?case using countable_ext by simp
next
case (4 x)
have "emeasure (Pi⇩M {..<n} (bernoulli_pmf ∘ q)) {x} =
emeasure (Pi⇩M {..<n} (bernoulli_pmf ∘ q)) (PiE {..<n} (λk. {x k}))"
using PiE_singleton[OF 4] by simp
also have "... = (∏i<n. emeasure (measure_pmf (bernoulli_pmf (q i))) {x i})"
by (subst emeasure_PiM) auto
also have "... = emeasure (Pi_pmf {..<n} undefined (bernoulli_pmf∘q))
(PiE_dflt {..<n} undefined (λk. {x k}))"
unfolding measure_pmf.emeasure_eq_measure
by (subst measure_Pi_pmf_PiE_dflt) (simp_all add:prod_ennreal)
also have "... = emeasure (Pi_pmf {..<n} undefined (bernoulli_pmf∘q)) {x}"
using 4 by (intro arg_cong2[where f="emeasure"]) (auto simp add:PiE_dflt_def extensional_def)
finally have "emeasure (Pi⇩M {..<n} (bernoulli_pmf ∘ q)) {x} =
emeasure (Pi_pmf {..<n} undefined (bernoulli_pmf ∘ q)) {x}"
by simp
thus ?case using 4
by (subst (1 2) emeasure_distr[OF m5]) (simp_all add:vimage_def space_PiM PiE_def)
next
case 5
have "AE x in Pi⇩M {..<n} (bernoulli_pmf ∘ q). x ∈ extensional {..<n}"
by (intro AE_I2) (simp add:space_PiM PiE_def)
then show ?case by (subst AE_distr_iff[OF m5]) simp_all
next
case 6
then show ?case by (intro AE_pmfI) (simp add: set_Pi_pmf PiE_dflt_def extensional_def)
qed
finally have 2: "distr M (count_space UNIV) (λω. (λk∈{..<n}. X k ω ∈ I)) = ?pi (bernoulli_pmf∘q)"
by simp
have 3: "n < 2 * card {k. k < n ∧ y k}" if
"n < 2 * card {k. k < n ∧ x k}" "⋀i. i < n ⟹ x i ⟹ y i" for x y
proof -
have "2 * card {k. k < n ∧ x k} ≤ 2 * card {k. k < n ∧ y k}"
using that(2) by (intro mult_left_mono card_mono) auto
thus ?thesis using that(1) by simp
qed
have 4: "0 ≤ p ∧ p ≤ q i ∧ q i ≤ 1" if "i < n" for i
unfolding q_def using assms(2,5) that by auto
have p_range: "p ∈ {0..1}"
using 4[OF assms(1)] by auto
have "?R = 1 - measure_pmf.prob (binomial_pmf n p) {k. 2 * k ≤ n}"
by (intro arg_cong2[where f="(-)"] arg_cong2[where f="measure_pmf.prob"]) auto
also have "... = measure (binomial_pmf n p) {k. n < 2 * k}"
by (subst measure_pmf.prob_compl[symmetric]) (simp_all add:set_diff_eq not_le)
also have "... = measure (?pi (bernoulli_pmf ∘ (λ_. p))) {ω. n < 2 * card {k. k < n ∧ ω k}}"
using p_range by (subst binomial_pmf_altdef'[where A="{..<n}" and dflt="undefined"]) auto
also have "... ≤ measure (?pi (bernoulli_pmf ∘ q)) {ω. n < 2 * card {k. k < n ∧ ω k}}"
using 3 4 by (intro prod_pmf_bernoulli_mono) auto
also have "... =
𝒫(ω in distr M (count_space UNIV) (λω. λk∈{..<n}. X k ω ∈ I). n<2*card {k. k < n ∧ ω k})"
unfolding 2 by simp
also have "... = 𝒫(ω in M. n < 2*card {k. k < n ∧ X k ω ∈ I})"
by (subst measure_distr[OF m6]) (simp_all add:vimage_def Int_def conj_commute 0)
also have "... ≤ ?L"
using median_est[OF assms(3)] m0 m1
by (intro finite_measure_mono measurable_sets_Collect[OF median_measurable[OF n_ge_1]]) auto
finally show "?R ≤ ?L" by simp
qed
text ‹Cumulative distribution of the binomial distribution (contributed by Yong Kiam Tan):›
lemma prob_binomial_pmf_upto:
assumes "0 ≤ p" "p ≤ 1"
shows "measure_pmf.prob (binomial_pmf n p) {..m} =
sum (λi. real (n choose i) * p^i * (1 - p) ^(n-i)) {0..m}"
by (auto simp: pmf_binomial[OF assms] measure_measure_pmf_finite intro!: sum.cong)
text ‹A tail bound for the binomial distribution using Hoeffding's inequality:›
lemma binomial_pmf_tail:
assumes "p ∈ {0..1}" "real k ≤ real n * p"
shows "measure (binomial_pmf n p) {..k} ≤ exp (- 2 * real n * (p - real k / n)^2)"
(is "?L ≤ ?R")
proof (cases "n = 0")
case True then show ?thesis by simp
next
case False
let ?A = "{..<n}"
let ?pi = "Pi_pmf ?A undefined (λ_. bernoulli_pmf p)"
define μ where "μ = (∑i<n. (∫x. (of_bool (x i) :: real) ∂ ?pi))"
define ε :: real where "ε = μ - k"
have "μ = (∑i<n. (∫x. (of_bool x :: real) ∂ (map_pmf (λω. ω i) ?pi)))"
unfolding μ_def by simp
also have "... = (∑i<n. (∫x. (of_bool x :: real) ∂ (bernoulli_pmf p)))"
by (simp add: Pi_pmf_component)
also have "... = real n * p" using assms(1) by simp
finally have μ_alt: "μ = real n * p"
by simp
have ε_ge_0: "ε ≥ 0"
using assms(2) unfolding ε_def μ_alt by auto
have indep: "prob_space.indep_vars ?pi (λ_. borel) (λk ω. of_bool (ω k)) {..<n}"
by (intro prob_space.indep_vars_compose2[OF prob_space_measure_pmf indep_vars_Pi_pmf]) auto
interpret Hoeffding_ineq "?pi" "{..<n}" "λk ω. of_bool (ω k)" "λ_.0" "λ_.1" μ
using indep unfolding μ_def by (unfold_locales) simp_all
have "?L = measure (map_pmf (λf. card {x ∈ ?A. f x}) ?pi) {..k}"
by (intro arg_cong2[where f="measure_pmf.prob"] binomial_pmf_altdef' assms(1)) auto
also have "... = 𝒫(ω in ?pi. (∑i<n. of_bool (ω i)) ≤ μ - ε)"
unfolding ε_def by (simp add:vimage_def Int_def)
also have "... ≤ exp (- 2 * ε⇧2 / (∑i<n. (1 - 0)⇧2))"
using False by (intro Hoeffding_ineq_le ε_ge_0) auto
also have "... = ?R"
unfolding ε_def μ_alt by (simp add:power2_eq_square field_simps)
finally show ?thesis by simp
qed
theorem (in prob_space) median_bound:
fixes n :: nat
fixes I :: "('b :: {linorder_topology, second_countable_topology}) set"
assumes "interval I"
assumes "α > 0"
assumes "ε ∈ {0<..<1}"
assumes "indep_vars (λ_. borel) X {0..<n}"
assumes "n ≥ - ln ε / (2 * α⇧2)"
assumes "⋀i. i < n ⟹ 𝒫(ω in M. X i ω ∈ I) ≥ 1/2+α"
shows "𝒫(ω in M. median n (λi. X i ω) ∈ I) ≥ 1-ε"
proof -
have "0 < -ln ε / (2 * α⇧2)"
using assms by (intro divide_pos_pos) auto
also have "... ≤ real n" using assms by simp
finally have "real n > 0" by simp
hence n_ge_0:"n > 0" by simp
have d0: "real_of_int ⌊real n / 2⌋ * 2 / real n ≤ 1"
using n_ge_0 by simp linarith
hence d1: "real (nat ⌊real n / 2⌋) ≤ real n * (1 / 2)"
using n_ge_0 by (simp add:field_simps)
also have "... ≤ real n * (1 / 2 + α)"
using assms(2) by (intro mult_left_mono) auto
finally have d1: "real (nat ⌊real n / 2⌋) ≤ real n * (1 / 2 + α)" by simp
have "1/2 + α ≤ 𝒫(ω in M. X 0 ω ∈ I)" using n_ge_0 by (intro assms(6))
also have "... ≤ 1" by simp
finally have d2: "1 / 2 + α ≤ 1" by simp
have d3: "nat ⌊real n / 2⌋ = n div 2" by linarith
have "1 - ε ≤ 1 - exp (- 2 * real n * α⇧2)"
using assms(2,3,5) by (intro diff_mono order.refl iffD1[OF ln_ge_iff]) (auto simp:field_simps)
also have "... ≤ 1 - exp (- 2 * real n * ((1/2+α) - real (nat ⌊real n/2⌋) / real n)⇧2)"
using d0 n_ge_0 assms(2)
by (intro diff_mono order.refl iffD2[OF exp_le_cancel_iff] mult_left_mono_neg power_mono) auto
also have "... ≤ 1 - measure (binomial_pmf n (1/2+α)) {..nat ⌊real n/2⌋}"
using assms(2) d1 d2 by (intro diff_mono order.refl binomial_pmf_tail) auto
also have "... = 1 - measure (binomial_pmf n (1/2+α)) {..n div 2}" by (simp add:d3)
also have "... ≤ 𝒫(ω in M. median n (λi. X i ω) ∈ I)"
using assms(2) by (intro median_bound_raw n_ge_0 assms(1,4,6) add_nonneg_nonneg) auto
finally show ?thesis by simp
qed
text ‹This is a specialization of the above to closed real intervals.›
corollary (in prob_space) median_bound_1:
assumes "α > 0"
assumes "ε ∈ {0<..<1}"
assumes "indep_vars (λ_. borel) X {0..<n}"
assumes "n ≥ - ln ε / (2 * α⇧2)"
assumes "∀i ∈ {0..<n}. 𝒫(ω in M. X i ω ∈ ({a..b} :: real set)) ≥ 1/2+α"
shows "𝒫(ω in M. median n (λi. X i ω) ∈ {a..b}) ≥ 1-ε"
using assms(5) by (intro median_bound[OF _ assms(1,2,3,4)]) (auto simp:interval_def)
text ‹This is a specialization of the above, where $\alpha = \frac{1}{6}$ and the interval is
described using a mid point @{term "μ"} and radius @{term "δ"}. The choice of
$\alpha = \frac{1}{6}$ implies a success probability per random variable of $\frac{2}{3}$. It is a
commonly chosen success probability for Monte-Carlo algorithms
(cf. \<^cite>‹‹\textsection 4› in "baryossef2002"› or \<^cite>‹‹\textsection 1› in "kane2010"›).›
corollary (in prob_space) median_bound_2:
fixes μ δ :: real
assumes "ε ∈ {0<..<1}"
assumes "indep_vars (λ_. borel) X {0..<n}"
assumes "n ≥ -18 * ln ε"
assumes "⋀i. i < n ⟹ 𝒫(ω in M. abs (X i ω - μ) > δ) ≤ 1/3"
shows "𝒫(ω in M. abs (median n (λi. X i ω) - μ) ≤ δ) ≥ 1-ε"
proof -
have b:"space M - {ω ∈ space M. X i ω ∈ {μ-δ..μ+δ}} = {ω ∈ space M. abs (X i ω - μ) > δ}" for i
by auto
have "⋀i. i < n ⟹ 1 - 𝒫(ω in M. X i ω ∈ {μ- δ..μ+δ}) ≤ 1/3"
using assms
apply (subst prob_compl[symmetric])
apply (measurable, simp add:indep_vars_def)
by (subst b, simp)
hence a:"⋀i. i < n ⟹ 𝒫(ω in M. X i ω ∈ {μ- δ..μ+δ}) ≥ 2/3" by simp
have "1-ε ≤ 𝒫(ω in M. median n (λi. X i ω) ∈ {μ-δ..μ+δ})" using a assms(3)
by (intro median_bound_1[OF _ assms(1,2), where α="1/6"]) (simp_all add:power2_eq_square)
also have "... = 𝒫(ω in M. abs (median n (λi. X i ω) - μ) ≤ δ)"
by (intro arg_cong2[where f="measure"] Collect_cong) auto
finally show ?thesis by simp
qed
section ‹Some additional results about the median›
lemma sorted_mono_map:
assumes "sorted xs"
assumes "mono f"
shows "sorted (map f xs)"
using assms(2) unfolding sorted_wrt_map
by (intro sorted_wrt_mono_rel[OF _ assms(1)]) (simp add:mono_def)
text ‹This could be added to @{theory "HOL.List"}:›
lemma map_sort:
assumes "mono f"
shows "sort (map f xs) = map f (sort xs)"
using assms by (intro properties_for_sort sorted_mono_map) auto
lemma median_cong:
assumes "⋀i. i < n ⟹ f i = g i"
shows "median n f = median n g"
unfolding median_def using assms
by (intro arg_cong2[where f="(!)"] arg_cong[where f="sort"] map_cong) auto
lemma median_restrict:
"median n (λi ∈ {0..<n}.f i) = median n f"
by (rule median_cong, simp)
lemma median_commute_mono:
assumes "n > 0"
assumes "mono g"
shows "g (median n f) = median n (g ∘ f)"
apply (simp add: median_def del:map_map)
apply (subst map_map[symmetric])
apply (subst map_sort[OF assms(2)])
apply (subst nth_map, simp) using assms apply fastforce
by simp
lemma median_rat:
assumes "n > 0"
shows "real_of_rat (median n f) = median n (λi. real_of_rat (f i))"
apply (subst (2) comp_def[where g="f", symmetric])
apply (rule median_commute_mono[OF assms(1)])
by (simp add: mono_def of_rat_less_eq)
lemma median_const:
assumes "k > 0"
shows "median k (λi ∈ {0..<k}. a) = a"
proof -
have b: "sorted (map (λ_. a) [0..<k])"
by (subst sorted_wrt_map, simp)
have a: "sort (map (λ_. a) [0..<k]) = map (λ_. a) [0..<k]"
by (subst sorted_sort_id[OF b], simp)
have "median k (λi ∈ {0..<k}. a) = median k (λ_. a)"
by (subst median_restrict, simp)
also have "... = a" using assms by (simp add:median_def a)
finally show ?thesis by simp
qed
end