Theory Median_Method.Median

section ‹Intervals are Borel measurable›

theory Median
  imports 
    "HOL-Probability.Probability" 
    "HOL-Library.Multiset" 
    "Universal_Hash_Families.Universal_Hash_Families_More_Independent_Families"
begin

text ‹This section contains a proof that intervals are Borel measurable, where an interval is
defined as a convex subset of linearly ordered space, more precisely, a set is an interval, if 
for each triple of points $x < y < z$: If $x$ and $z$ are in the set so is $y$.
This includes ordinary intervals like @{term "{a..b}"}, @{term "{a<..<b}"} but also for example
@{term [show_types] "{(x::rat). x * x < 2}"} which cannot be expressed in the standard notation.

In the @{theory "HOL-Analysis.Borel_Space"} there are proofs for the measurability of each specific
type of interval, but those unfortunately do not help if we want to express the result about the
median bound for arbitrary types of intervals.›

definition interval :: "('a :: linorder) set ⇒ bool" where
  "interval I = (∀x y z. x ∈ I ⟶ z ∈ I ⟶ x ≤ y ⟶ y ≤ z ⟶ y ∈ I)"

definition up_ray :: "('a :: linorder) set ⇒ bool" where
  "up_ray I = (∀x y. x ∈ I ⟶ x ≤ y ⟶ y ∈ I)"

lemma up_ray_borel:
  assumes "up_ray (I :: (('a :: linorder_topology) set))"
  shows "I ∈ borel"
proof (cases "closed I")
  case True
  then show ?thesis using borel_closed by blast
next
  case False
  hence b:"¬ closed I" by blast

  have "open I"
  proof (rule Topological_Spaces.openI)
    fix x
    assume c:"x ∈ I"
    show "∃T. open T ∧ x ∈ T ∧ T ⊆ I"
    proof (cases "∃y. y < x ∧ y ∈ I")
      case True
      then obtain y where a:"y < x ∧ y ∈ I" by blast
      have "open {y<..}" by simp
      moreover have "x ∈ {y<..}" using a by simp
      moreover have "{y<..} ⊆ I"
        using a assms(1) by (auto simp: up_ray_def) 
      ultimately show ?thesis by blast
    next
      case False
      hence "I ⊆ {x..}" using linorder_not_less by auto
      moreover have "{x..} ⊆ I"
        using c assms(1) unfolding up_ray_def by blast
      ultimately have "I = {x..}" 
        by (rule order_antisym)
      moreover have "closed {x..}" by simp
      ultimately have "False" using b by auto
      then show ?thesis by simp
    qed
  qed
  then show ?thesis by simp
qed

definition down_ray :: "('a :: linorder) set ⇒ bool" where
  "down_ray I = (∀x y. y ∈ I ⟶ x ≤ y ⟶ x ∈ I)"

lemma down_ray_borel:
  assumes "down_ray (I :: (('a :: linorder_topology) set))"
  shows "I ∈ borel"
proof -
  have "up_ray (-I)" using assms 
    by (simp add: up_ray_def down_ray_def, blast)
  hence "(-I) ∈ borel" using up_ray_borel by blast
  thus "I ∈ borel" 
    by (metis borel_comp double_complement)
qed

text ‹Main result of this section:›

lemma interval_borel:
  assumes "interval (I :: (('a :: linorder_topology) set))"
  shows "I ∈ borel"
proof (cases "I = {}")
  case True
  then show ?thesis by simp
next
  case False
  then obtain x where a:"x ∈ I" by blast
  have "⋀y z. y ∈ I ∪ {x..} ⟹ y ≤ z ⟹ z ∈ I ∪ {x..}" 
    by (metis assms a interval_def  IntE UnE Un_Int_eq(1) Un_Int_eq(2) atLeast_iff nle_le order.trans)
  hence "up_ray (I ∪ {x..})"
    using up_ray_def by blast
  hence b:"I ∪ {x..} ∈ borel" 
    using up_ray_borel by blast

  have "⋀y z. y ∈ I ∪ {..x} ⟹ z ≤ y ⟹ z ∈ I ∪ {..x}" 
    by (metis assms a interval_def UnE UnI1 UnI2 atMost_iff dual_order.trans linorder_le_cases)
  hence "down_ray (I ∪ {..x})"
    using down_ray_def by blast
  hence c:"I ∪ {..x} ∈ borel"
    using down_ray_borel by blast

  have "I = (I ∪ {x..}) ∩ (I ∪ {..x})"
    using a by fastforce    

  then show ?thesis using b c
    by (metis sets.Int)
qed

section ‹Order statistics are Borel measurable›

text ‹This section contains a proof that order statistics of Borel measurable random variables are
themselves Borel measurable.

The proof relies on the existence of branch-free comparison-sort algorithms. Given a sequence length
these algorithms perform compare-swap operations on predefined pairs of positions. In particular the
result of a comparison does not affect future operations. An example for a branch-free comparison
sort algorithm is shell-sort and also bubble-sort without the early exit.

The advantage of using such a comparison-sort algorithm is that it can be lifted to work on random
variables, where the result of a comparison-swap operation on two random variables @{term"X"} and
@{term"Y"} can be represented as the expressions @{term "λω. min (X ω) (Y ω)"} and
@{term "λω. max (X ω) (Y ω)"}.

Because taking the point-wise minimum (resp. maximum) of two random variables is still
Borel measurable, and because the entire sorting operation can be represented using such
compare-swap operations, we can show that all order statistics are Borel measuable.›

fun sort_primitive where
  "sort_primitive i j f k = (if k = i then min (f i) (f j) else (if k = j then max (f i) (f j) else f k))"

fun sort_map where
  "sort_map f n = fold id [sort_primitive j i. i <- [0..<n], j <- [0..<i]] f"

lemma sort_map_ind:
  "sort_map f (Suc n) = fold id [sort_primitive j n. j <- [0..<n]] (sort_map f n)"
  by simp

lemma sort_map_strict_mono:
  fixes f :: "nat ⇒ 'b :: linorder"
  shows "j < n ⟹ i < j ⟹ sort_map f n i ≤ sort_map f n j"
proof (induction n arbitrary: i j)
  case 0
  then show ?case by simp
next
  case (Suc n)
  define g where "g = (λk. fold id [sort_primitive j n. j <- [0..<k]] (sort_map f n))"
  define k where "k = n"
  have a:"(∀i j. j < n ⟶ i < j ⟶ g k i ≤ g k j) ∧ (∀l. l < k ⟶ g k l ≤ g k n)"
  proof (induction k)
    case 0
    then show ?case using Suc by (simp add:g_def del:sort_map.simps)
  next
    case (Suc k)
    have "g (Suc k) = sort_primitive k n (g k)" 
      by (simp add:g_def)
    then show ?case using Suc
      apply (cases "g k k ≤ g k n")
       apply (simp add:min_def max_def)
       using less_antisym apply blast
      apply (cases "g k n ≤ g k k")
       apply (simp add:min_def max_def)
       apply (metis less_antisym max.coboundedI2 max.orderE)
      by simp
  qed

  hence "⋀i j. j < Suc n ⟹ i < j ⟹ g n i ≤ g n j"
    apply (simp add:k_def) using less_antisym by blast
  moreover have "sort_map f (Suc n) = g n" 
    by (simp add:sort_map_ind g_def del:sort_map.simps)
  ultimately show ?case
    using Suc by (simp del:sort_map.simps)
qed

lemma sort_map_mono:
  fixes f :: "nat ⇒ 'b :: linorder"
  shows "j < n ⟹ i ≤ j ⟹ sort_map f n i ≤ sort_map f n j"
  by (metis sort_map_strict_mono eq_iff le_imp_less_or_eq)

lemma sort_map_perm:
  fixes f :: "nat ⇒ 'b :: linorder"
  shows "image_mset (sort_map f n) (mset [0..<n]) = image_mset f (mset [0..<n])"
proof -
  define is_swap where "is_swap = (λ(ts :: ((nat ⇒ 'b) ⇒ nat ⇒ 'b)). ∃i < n. ∃j < n. ts = sort_primitive i j)"
  define t :: "((nat ⇒ 'b) ⇒ nat ⇒ 'b) list" 
    where "t = [sort_primitive j i. i <- [0..<n], j <- [0..<i]]"

  have a: "⋀x f. is_swap x ⟹ image_mset (x f) (mset_set {0..<n}) = image_mset f (mset_set {0..<n})"
  proof -
    fix x
    fix f :: "nat ⇒ 'b :: linorder"
    assume "is_swap x"
    then obtain i j where x_def: "x = sort_primitive i j" and i_bound: "i < n" and j_bound:"j < n" 
      using is_swap_def by blast
    define inv where "inv = mset_set {k. k < n ∧ k ≠ i ∧ k ≠ j}"
    have b:"{0..<n} = {k. k < n ∧ k ≠ i ∧ k ≠ j} ∪ {i,j}"
      apply (rule order_antisym, rule subsetI, simp, blast, rule subsetI, simp)
      using i_bound j_bound by meson
    have c:"⋀k. k ∈# inv ⟹ (x f) k = f k" 
      by (simp add:x_def inv_def)
    have "image_mset (x f) inv = image_mset f inv"
      apply (rule multiset_eqI)
      using c multiset.map_cong0 by force
    moreover have "image_mset (x f) (mset_set {i,j}) = image_mset f (mset_set {i,j})"
      apply (cases "i = j")
      by (simp add:x_def max_def min_def)+
    moreover have "mset_set {0..<n} = inv + mset_set {i,j}"
      by (simp only:inv_def b, rule mset_set_Union, simp, simp, simp) 
    ultimately show "image_mset (x f) (mset_set {0..<n}) = image_mset f (mset_set {0..<n})"
      by simp
  qed

  have "(∀x ∈ set t. is_swap x) ⟹ image_mset (fold id t f) (mset [0..<n]) = image_mset f (mset [0..<n])"
    by (induction t arbitrary:f, simp, simp add:a) 
  moreover have "⋀x. x ∈ set t ⟹ is_swap x" 
    apply (simp add:t_def is_swap_def) 
    by (meson atLeastLessThan_iff imageE less_imp_le less_le_trans)  
  ultimately have "image_mset (fold id t f) (mset [0..<n]) = image_mset f (mset [0..<n])" by blast
  then show ?thesis by (simp add:t_def)
qed

lemma list_eq_iff:
  assumes "mset xs = mset ys"
  assumes "sorted xs"
  assumes "sorted ys"
  shows "xs = ys" 
  using assms properties_for_sort by blast

lemma sort_map_eq_sort:
  fixes f :: "nat ⇒ ('b :: linorder)"
  shows "map (sort_map f n) [0..<n] = sort (map f [0..<n])" (is "?A = ?B")
proof -
  have "mset ?A = mset ?B"
    using sort_map_perm[where f="f" and n="n"]
    by (simp del:sort_map.simps)
  moreover have "sorted ?B"
    by simp
  moreover have "sorted ?A"
    apply (subst sorted_wrt_iff_nth_less)
    apply (simp del:sort_map.simps)
    by (metis sort_map_mono nat_less_le)
  ultimately show "?A = ?B" 
    using list_eq_iff by blast
qed

lemma order_statistics_measurable_aux: 
  fixes X :: "nat ⇒ 'a ⇒ ('b :: {linorder_topology, second_countable_topology})"
  assumes "n ≥ 1" 
  assumes "j < n"
  assumes "⋀i. i < n ⟹ X i ∈ measurable M borel"
  shows "(λx. (sort_map (λi. X i x) n) j) ∈ measurable M borel"
proof -
  have n_ge_0:"n > 0" using assms by simp
  define is_swap where "is_swap = (λ(ts :: ((nat ⇒ 'b) ⇒ nat ⇒ 'b)). ∃i < n. ∃j < n. ts = sort_primitive i j)"
  define t :: "((nat ⇒ 'b) ⇒ nat ⇒ 'b) list" 
    where "t = [sort_primitive j i. i <- [0..<n], j <- [0..<i]]"

  define meas_ptw :: "(nat ⇒ 'a ⇒ 'b) ⇒ bool"
    where "meas_ptw = (λf. (∀k. k < n ⟶ f k ∈ borel_measurable M))"

  have ind_step:
    "⋀x (g :: nat ⇒ 'a ⇒ 'b). meas_ptw g ⟹ is_swap x ⟹ meas_ptw (λk ω. x (λi. g i ω) k)"
  proof -
    fix x g
    assume "meas_ptw g"
    hence a:"⋀k. k < n ⟹ g k ∈ borel_measurable M" by (simp add:meas_ptw_def)
    assume "is_swap x"
    then obtain i j where x_def:"x=sort_primitive i j" and i_le:"i < n" and j_le:"j < n"
      by (simp add:is_swap_def, blast)
    have "⋀k. k < n ⟹ (λω. x (λi. g i ω) k) ∈ borel_measurable M"
    proof -
      fix k
      assume "k < n"
      thus " (λω. x (λi. g i ω) k) ∈ borel_measurable M"
        apply (simp add:x_def)
        apply (cases "k = i", simp)
        using a i_le j_le borel_measurable_min apply blast
        apply (cases "k = j", simp)
        using a i_le j_le borel_measurable_max apply blast
        using a by simp
    qed
    thus "meas_ptw (λk ω. x (λi. g i ω) k)" 
      by (simp add:meas_ptw_def)
  qed

  have "(∀x ∈ set t. is_swap x) ⟹ meas_ptw (λ k ω. (fold id t (λk. X k ω)) k)"
  proof (induction t rule:rev_induct)
    case Nil
    then show ?case using assms by (simp add:meas_ptw_def)
  next
    case (snoc x xs)
    have a:"meas_ptw (λk ω. fold (λa. a) xs (λk. X k ω) k)" using snoc by simp
    have b:"is_swap x" using snoc by simp
    show ?case using ind_step[OF a b] by simp
  qed
  moreover have "⋀x. x ∈ set t ⟹ is_swap x" 
    apply (simp add:t_def is_swap_def) 
    by (meson atLeastLessThan_iff imageE less_imp_le less_le_trans)  
  ultimately show ?thesis using assms
    by (simp add:t_def[symmetric] meas_ptw_def)
qed

text ‹Main results of this section:›

lemma order_statistics_measurable:
  fixes X :: "nat ⇒ 'a ⇒ ('b :: {linorder_topology, second_countable_topology})"
  assumes "n ≥ 1" 
  assumes "j < n"
  assumes "⋀i. i < n ⟹ X i ∈ measurable M borel"
  shows "(λx. (sort (map (λi. X i x) [0..<n])) ! j) ∈ measurable M borel"
  apply (subst sort_map_eq_sort[symmetric])
  using assms by (simp add:order_statistics_measurable_aux del:sort_map.simps)

definition median :: "nat ⇒ (nat ⇒ ('a :: linorder)) ⇒ 'a" where
  "median n f = sort (map f [0..<n]) ! (n div 2)"

lemma median_measurable:
  fixes X :: "nat ⇒ 'a ⇒ ('b :: {linorder_topology, second_countable_topology})"
  assumes "n ≥ 1" 
  assumes "⋀i. i < n ⟹ X i ∈ measurable M borel"
  shows "(λx. median n (λi. X i x)) ∈ measurable M borel"
  apply (simp add:median_def)
  apply (rule order_statistics_measurable[OF assms(1) _ assms(2)])
  using assms(1) by force+

section ‹The Median Method›

text ‹This section contains the proof for the probability that the median of independent random
variables will be in an interval with high probability if the individual variables are in the
same interval with probability larger than $\frac{1}{2}$.

The proof starts with the elementary observation that the median of a seqeuence with $n$ elements
is in an interval $I$ if at least half of them are in $I$. This works because after sorting the
sequence the elements that will be in the interval must necessarily form a consecutive subsequence,
if its length is larger than $\frac{n}{2}$ the median must be in it.

The remainder follows the proof in \<^cite>‹‹\textsection 2.1› in "alon1999"› using the Hoeffding inequality
to estimate the probability that at least half of the sequence elements will be in the interval $I$.›

lemma interval_rule:
  assumes "interval I"
  assumes "a ≤ x" "x ≤ b"
  assumes "a ∈ I"
  assumes "b ∈ I"
  shows "x ∈ I"
  using assms(1) apply (simp add:interval_def)
  using assms by blast

lemma sorted_int:
  assumes "interval I"
  assumes "sorted xs"
  assumes "k < length xs" "i ≤ j" "j ≤ k "
  assumes "xs ! i ∈ I" "xs ! k ∈ I"
  shows "xs ! j ∈ I"
  apply (rule interval_rule[where a="xs ! i" and b="xs ! k"])
  using assms by (simp add: sorted_nth_mono)+

lemma mid_in_interval:
  assumes "2*length (filter (λx. x ∈ I) xs) > length xs"
  assumes "interval I"
  assumes "sorted xs"
  shows "xs ! (length xs div 2) ∈ I"
proof -
  have "length (filter (λx. x ∈ I) xs) > 0"  using assms(1) by linarith
  then obtain v where v_1: "v < length xs" and v_2: "xs ! v ∈ I" 
    by (metis filter_False in_set_conv_nth length_greater_0_conv)

  define J where "J = {k. k < length xs ∧ xs ! k ∈ I}"

  have card_J_min: "2*card J > length xs"
    using assms(1) by (simp add:J_def length_filter_conv_card)

  consider
    (a) "xs ! (length xs div 2) ∈ I" |
    (b) "xs ! (length xs div 2) ∉ I ∧ v > (length xs div 2)" |
    (c) "xs ! (length xs div 2) ∉ I ∧ v < (length xs div 2)"
    by (metis linorder_cases v_2)
  thus ?thesis
  proof (cases)
    case a
    then show ?thesis by simp
  next
    case b
    have p:"⋀k. k ≤ length xs div 2 ⟹ xs ! k ∉ I"
      using b v_2 sorted_int[OF assms(2) assms(3) v_1, where j="length xs div 2"] apply simp by blast
    hence "card J ≤ card {Suc (length xs div 2)..<length xs}"
      unfolding J_def using not_less_eq_eq[symmetric] by (intro card_mono subsetI) auto
    hence "card J ≤ length xs - (Suc (length xs div 2))"
      using card_atLeastLessThan by metis
    hence "length xs ≤ 2*( length xs - (Suc (length xs div 2)))"
      using card_J_min by linarith
    hence "False" using b v_1 by auto
    then show ?thesis by simp
  next
    case c
    have "⋀k. k ≥ length xs div 2 ⟹ k < length xs ⟹ xs ! k ∉ I"
      using c v_1 v_2 sorted_int[OF assms(2,3), where i ="v" and j="length xs div 2"]
      by simp blast
    hence "card J ≤ card {0..<(length xs div 2)}"
      unfolding J_def using linorder_le_less_linear by (intro card_mono subsetI) auto
    hence "card J ≤ (length xs div 2)"
      using card_atLeastLessThan by simp
    then show ?thesis using card_J_min by linarith
  qed
qed

lemma median_est:
  assumes "interval I"
  assumes "2*card {k. k < n ∧ f k ∈ I} > n"
  shows "median n f ∈ I"
proof -
  have "{k. k < n ∧ f k ∈ I} = {i. i < n ∧ map f [0..<n] ! i ∈ I}" by auto
  thus ?thesis using assms unfolding median_def
    by (intro mid_in_interval[OF _ assms(1),where xs="sort (map f [0..<n])", simplified])
      (simp_all add:filter_sort comp_def length_filter_conv_card)
qed

lemma median_est_rev:
  assumes "interval I"
  assumes "median n f ∉ I"
  shows "2*card {k. k < n ∧ f k ∉ I} ≥ n"
proof (rule ccontr)
  assume a: "¬(2*card {k. k < n ∧ f k ∉ I} ≥ n)"

  have "2 * n = 2 * card {k. k < n}" by simp
  also have "... = 2 * card ({k. k < n ∧ f k ∈ I} ∪ {k. k < n ∧ f k ∉ I})"
    by (intro arg_cong2[where f="(*)"] refl arg_cong[where f="card"]) auto
  also have "... = 2 * card {k. k < n ∧ f k ∈ I} + 2 * card {k. k < n ∧ f k ∉ I}"
    by (subst card_Un_disjoint) auto
  also have "... ≤ n  + 2 * card {k. k < n ∧ f k ∉ I}"
    using median_est[OF assms(1)] assms(2) not_less by (intro add_mono) auto
  also have "... < n + n"
    using a by (intro add_strict_left_mono) auto
  finally show "False" by auto
qed

lemma prod_pmf_bernoulli_mono:
  assumes "finite I"
  assumes "⋀i. i ∈ I ⟹ 0 ≤ f i ∧ f i ≤ g i ∧ g i ≤ 1"
  assumes "⋀x y. x ∈ A ⟹ (∀i ∈ I. x i ≤ y i) ⟹ y ∈ A"
  shows "measure (Pi_pmf I d (bernoulli_pmf ∘ f)) A ≤ measure (Pi_pmf I d (bernoulli_pmf ∘ g)) A"
    (is "?L ≤ ?R")
proof -
  define q where "q i = pmf_of_list [(0::nat, f i), (1, g i - f i), (2, 1 - g i)]" for i

  have wf:"pmf_of_list_wf [(0::nat, f i), (1, g i - f i), (2, 1 - g i)]" if "i ∈ I" for i
    using assms(2)[OF that] by (intro pmf_of_list_wfI) auto

  have 0: "bernoulli_pmf (f i) = map_pmf (λx. x = 0) (q i)" (is "?L1 = ?R1") 
    if "i ∈ I" for i
  proof -
    have "0 ≤ f i" "f i ≤ 1" using assms(2)[OF that] by auto
    hence "pmf ?L1 x = pmf ?R1 x" for x
      unfolding q_def pmf_map measure_pmf_of_list[OF wf[OF that]] 
      by (cases x;simp_all add:vimage_def)
    thus ?thesis
      by (intro pmf_eqI) auto
  qed

  have 1: "bernoulli_pmf (g i) = map_pmf (λx. x ∈ {0,1}) (q i)" (is "?L1 = ?R1")
    if "i ∈ I" for i
  proof -
    have "0 ≤ g i" "g i ≤ 1" using assms(2)[OF that] by auto
    hence "pmf ?L1 x = pmf ?R1 x" for x
      unfolding q_def pmf_map measure_pmf_of_list[OF wf[OF that]] 
      by (cases x;simp_all add:vimage_def)
    thus ?thesis
      by (intro pmf_eqI) auto
  qed

  have 2: "(λk. x k = 0) ∈ A ⟹ (λk. x k = 0 ∨ x k = Suc 0) ∈ A" for x
    by (erule assms(3)) auto

  have "?L = measure (Pi_pmf I d (λi. map_pmf (λx. x = 0) (q i))) A"
    unfolding comp_def by (simp add:0 cong: Pi_pmf_cong)
  also have "... = measure (map_pmf ((∘) (λx. x = 0)) (Pi_pmf I (if d then 0 else 2) q)) A"
    by (intro arg_cong2[where f="measure_pmf.prob"] Pi_pmf_map[OF assms(1)]) auto 
  also have "... = measure (Pi_pmf I (if d then 0 else 2) q) {x. (λk. x k = 0) ∈ A}"
    by (simp add:comp_def vimage_def)
  also have "... ≤ measure (Pi_pmf I (if d then 0 else 2) q) {x. (λk. x k ∈ {0,1}) ∈ A}"
    using 2 by (intro measure_pmf.finite_measure_mono subsetI) auto 
  also have "... = measure (map_pmf ((∘) (λx. x ∈ {0,1})) (Pi_pmf I (if d then 0 else 2) q)) A"
    by (simp add:vimage_def comp_def)
  also have "... = measure (Pi_pmf I d (λi. map_pmf (λx. x ∈ {0,1}) (q i))) A"
    by (intro arg_cong2[where f="measure_pmf.prob"] Pi_pmf_map[OF assms(1), symmetric]) auto
  also have "... = ?R" 
    unfolding comp_def by (simp add:1 cong: Pi_pmf_cong)
  finally show ?thesis by simp
qed

lemma discrete_measure_eqI:
  assumes "sets M = count_space UNIV"
  assumes "sets N = count_space UNIV" 
  assumes "countable Ω"
  assumes "⋀x. x ∈ Ω ⟹ emeasure M {x} = emeasure N {x} ∧ emeasure M {x} ≠ ∞"
  assumes "AE x in M. x ∈ Ω"
  assumes "AE x in N. x ∈ Ω"
  shows "M = N"
proof -
  define E where "E = insert {} ((λx. {x}) ` Ω)"

  have 0: "Int_stable E" unfolding E_def by (intro Int_stableI) auto
  have 1: "countable E" using assms(3) unfolding E_def by simp

  have "E ⊆ Pow Ω" unfolding E_def by auto
  have "emeasure M A = emeasure N A" if A_range: "A ∈ E" for A
    using that assms(4) unfolding E_def by auto
  moreover have "sets M = sets N" using assms(1,2) by simp
  moreover have "Ω ∈ sets M" using assms(1) by simp
  moreover have "E ≠ {}" unfolding E_def by simp
  moreover have "⋃E = Ω" unfolding E_def by simp
  moreover have "emeasure M a ≠ ∞" if "a ∈ E" for a
    using that assms(4) unfolding E_def by auto
  moreover have "sets (restrict_space M Ω) = Pow Ω"
    using assms(1) by (simp add:sets_restrict_space range_inter)
  moreover have "sets (restrict_space N Ω) = Pow Ω"
    using assms(2) by (simp add:sets_restrict_space range_inter)
  moreover have "sigma_sets Ω E = Pow Ω"
    unfolding E_def by (intro sigma_sets_singletons_and_empty assms(3)) 
  ultimately show ?thesis
    by (intro measure_eqI_restrict_generator[OF 0 _ _ _ _ _ _ assms(5,6) 1]) auto
qed

text ‹Main results of this section:›

text ‹The next theorem establishes a bound for the probability of the median of independent random
variables using the binomial distribution. In a follow-up step, we will establish tail bounds
for the binomial distribution and corresponding median bounds.

This two-step strategy was suggested by Yong Kiam Tan. In a previous version, I only had verified 
the exponential tail bound (see theorem \verb+median_bound+ below).›

theorem (in prob_space) median_bound_raw:
  fixes I :: "('b :: {linorder_topology, second_countable_topology}) set"
  assumes "n > 0" "p ≥ 0"
  assumes "interval I"
  assumes "indep_vars (λ_. borel) X {0..<n}"
  assumes "⋀i. i < n ⟹ 𝒫(ω in M. X i ω ∈ I) ≥ p" 
  shows "𝒫(ω in M. median n (λi. X i ω) ∈ I) ≥ 1 - measure (binomial_pmf n p) {..n div 2}" 
    (is "?L ≥ ?R")
proof -
  let ?pi = "Pi_pmf {..<n} undefined"
  define q where "q i = 𝒫(ω in M. X i ω ∈ I)" for i

  have n_ge_1: "n ≥ 1" using assms(1) by simp

  have 0: "{k. k < n ∧ (k < n ⟶ X k ω ∈ I)} = {k. k < n ∧ X k ω ∈ I}" for ω
    by auto

  have "countable ({..<n} →⇩_E (UNIV ::  bool set))"
    by (intro countable_PiE) auto
  hence countable_ext: "countable (extensional {..<n} :: (nat ⇒ bool) set)"
    unfolding PiE_def by auto

  have m0: "I ∈ sets borel"
    using interval_borel[OF assms(3)] by simp

  have m1: "random_variable borel (λx. X k x)" if "k ∈ {..<n}" for k 
    using assms(4) that unfolding indep_vars_def by auto

  have m2: "(λx. x ∈ I) ∈ borel →⇩_M (measure_pmf ((bernoulli_pmf ∘ q) k))"  
    for k using m0 by measurable
  hence m3: "random_variable (measure_pmf ((bernoulli_pmf ∘ q) k)) (λx. X k x ∈ I)"
    if "k ∈ {..<n}" for k 
    by (intro measurable_compose[OF m1] that)

  hence m4: "random_variable (PiM {..<n} (bernoulli_pmf ∘ q)) (λω. λk∈{..<n}. X k ω ∈ I)" 
    by (intro measurable_restrict) auto
  moreover have "A ∈ sets (Pi⇩_M {..<n} (λx. measure_pmf (bernoulli_pmf (q x))))"
    if "A ⊆ extensional {..<n}" for A 
  proof -
    have "A = (⋃a ∈ A. {a})" by auto
    also have "... = (⋃a ∈ A. PiE {..<n} (λk. {a k}))"
      using that by (intro arg_cong[where f="Union"] image_cong refl PiE_singleton[symmetric]) auto
    also have "... ∈ sets (Pi⇩_M {..<n} (λx. measure_pmf (bernoulli_pmf (q x))))"
      using that countable_ext countable_subset
      by (intro sets.countable_Union countable_image image_subsetI sets_PiM_I_finite) auto
    finally show ?thesis by simp
  qed
  hence m5: "id ∈ (PiM {..<n} (bernoulli_pmf ∘ q)) →⇩_M (count_space UNIV)"
    by (intro measurableI) (simp_all add:vimage_def space_PiM PiE_def)
  ultimately have "random_variable (count_space UNIV) (id ∘ (λω. λk∈{..<n}. X k ω ∈ I))"
    by (rule measurable_comp)
  hence m6: "random_variable (count_space UNIV) (λω. λk∈{..<n}. X k ω ∈ I)" by simp

  have indep: "indep_vars (bernoulli_pmf ∘ q) (λi x. X i x ∈ I) {0..<n}" 
    by (intro indep_vars_compose2[OF assms(4)] m2)

  have "measure M {x ∈ space M. (X k x ∈ I) = ω} = measure (bernoulli_pmf (q k)) {ω}"
    if "k < n" for ω k
  proof (cases "ω")
    case True
    then show ?thesis unfolding q_def  by (simp add:measure_pmf_single)
  next
    case False
    have "{x ∈ space M. X k x ∈ I} ∈ events"
      using that m0 by (intro measurable_sets_Collect[OF m1]) auto
    hence "prob {x ∈ space M. X k x ∉ I} = 1 - prob {ω ∈ space M. X k ω ∈ I}" 
      by (subst prob_neg) auto
    thus ?thesis using False unfolding q_def by (simp add:measure_pmf_single)
  qed

  hence 1: "emeasure M {x ∈ space M. (X k x ∈ I) = ω} = emeasure (bernoulli_pmf (q k)) {ω}"
    if "k < n" for ω k
    using that unfolding emeasure_eq_measure measure_pmf.emeasure_eq_measure by simp

  interpret product_sigma_finite "(bernoulli_pmf ∘ q)"
    by standard

  have "distr M (count_space UNIV) (λω. (λk∈{..<n} . X k ω ∈ I)) = distr 
    (distr M (PiM {..<n} (bernoulli_pmf ∘ q)) (λω. λk∈{..<n}. X k ω ∈ I)) (count_space UNIV) id"
    by (subst distr_distr[OF m5 m4]) (simp add:comp_def) 
  also have "... = distr (PiM {..<n} (λi. (distr M ((bernoulli_pmf ∘ q) i) (λω. X i ω ∈ I) )))
    (count_space UNIV) id" 
    using assms(1) indep atLeast0LessThan by (intro arg_cong2[where f="λx y. distr x y id"] 
        iffD1[OF indep_vars_iff_distr_eq_PiM'] m3) auto 
  also have "... = distr (PiM {..<n} (bernoulli_pmf ∘ q)) (count_space UNIV) id"
    using m3 1 by (intro distr_cong PiM_cong refl discrete_measure_eqI[where Ω="UNIV"])
        (simp_all add:emeasure_distr vimage_def Int_def conj_commute)
  also have "... = ?pi (bernoulli_pmf ∘ q)"
  proof (rule discrete_measure_eqI[where Ω="extensional {..<n}"], goal_cases)
    case 1 show ?case by simp
  next
    case 2 show ?case by simp
  next
    case 3 show ?case using countable_ext by simp
  next
    case (4 x)
    have "emeasure (Pi⇩_M {..<n} (bernoulli_pmf ∘ q)) {x} = 
      emeasure (Pi⇩_M {..<n} (bernoulli_pmf ∘ q)) (PiE {..<n} (λk. {x k}))"
      using PiE_singleton[OF 4] by simp
    also have "... = (∏i<n. emeasure (measure_pmf (bernoulli_pmf (q i))) {x i})"
      by (subst emeasure_PiM) auto
    also have "... = emeasure (Pi_pmf {..<n} undefined (bernoulli_pmf∘q)) 
      (PiE_dflt {..<n} undefined (λk. {x k}))"
      unfolding measure_pmf.emeasure_eq_measure
      by (subst measure_Pi_pmf_PiE_dflt) (simp_all add:prod_ennreal)
    also have "... = emeasure (Pi_pmf {..<n} undefined (bernoulli_pmf∘q)) {x}"
      using 4 by (intro arg_cong2[where f="emeasure"]) (auto simp add:PiE_dflt_def extensional_def)
    finally have "emeasure (Pi⇩_M {..<n} (bernoulli_pmf ∘ q)) {x} = 
      emeasure (Pi_pmf {..<n} undefined (bernoulli_pmf ∘ q)) {x}"
      by simp
    thus ?case using 4 
      by (subst (1 2) emeasure_distr[OF m5]) (simp_all add:vimage_def space_PiM PiE_def)
  next
    case 5
    have "AE x in Pi⇩_M {..<n} (bernoulli_pmf ∘ q). x ∈ extensional {..<n}"
      by (intro AE_I2) (simp add:space_PiM PiE_def)
    then show ?case by (subst AE_distr_iff[OF m5]) simp_all
  next
    case 6
    then show ?case by (intro AE_pmfI) (simp add: set_Pi_pmf PiE_dflt_def extensional_def)
  qed
  finally have 2: "distr M (count_space UNIV) (λω. (λk∈{..<n}. X k ω ∈ I)) = ?pi (bernoulli_pmf∘q)"
    by simp

  have 3: "n < 2 * card {k. k < n ∧ y k}" if 
    "n < 2 * card {k. k < n ∧ x k}" "⋀i. i < n ⟹ x i ⟹ y i" for x y
  proof -
    have "2 * card {k. k < n ∧ x k} ≤ 2 * card {k. k < n ∧ y k}"
      using that(2) by (intro mult_left_mono card_mono) auto
    thus ?thesis using that(1) by simp
  qed

  have 4: "0 ≤ p ∧ p ≤ q i ∧ q i ≤ 1" if "i < n" for i
    unfolding q_def using assms(2,5) that by auto

  have p_range: "p ∈ {0..1}"
    using 4[OF assms(1)] by auto

  have "?R = 1 - measure_pmf.prob (binomial_pmf n p) {k. 2 * k ≤ n}"
    by (intro arg_cong2[where f="(-)"] arg_cong2[where f="measure_pmf.prob"]) auto
  also have "... = measure (binomial_pmf n p) {k. n < 2 * k}"
    by (subst measure_pmf.prob_compl[symmetric]) (simp_all add:set_diff_eq not_le)
  also have "... = measure (?pi (bernoulli_pmf ∘ (λ_. p))) {ω. n < 2 * card {k. k < n ∧ ω k}}"
    using p_range by (subst binomial_pmf_altdef'[where A="{..<n}" and dflt="undefined"]) auto
  also have "... ≤ measure (?pi (bernoulli_pmf ∘ q)) {ω. n < 2 * card {k. k < n ∧ ω k}}"
    using 3 4 by (intro prod_pmf_bernoulli_mono) auto
  also have "... = 
    𝒫(ω in distr M (count_space UNIV) (λω. λk∈{..<n}. X k ω ∈ I). n<2*card {k. k < n ∧ ω k})"
    unfolding 2 by simp
  also have "... = 𝒫(ω in M. n < 2*card {k. k < n ∧ X k ω ∈ I})" 
    by (subst measure_distr[OF m6]) (simp_all add:vimage_def Int_def conj_commute 0)
  also have "... ≤ ?L"
    using median_est[OF assms(3)] m0 m1
    by (intro finite_measure_mono measurable_sets_Collect[OF median_measurable[OF n_ge_1]]) auto 
  finally show "?R ≤ ?L" by simp
qed

text ‹Cumulative distribution of the binomial distribution (contributed by Yong Kiam Tan):›

lemma prob_binomial_pmf_upto:
  assumes "0 ≤ p" "p ≤ 1"
  shows "measure_pmf.prob (binomial_pmf n p) {..m} =
    sum (λi. real (n choose i) * p^i * (1 - p) ^(n-i)) {0..m}"
  by (auto simp: pmf_binomial[OF assms] measure_measure_pmf_finite intro!: sum.cong)

text ‹A tail bound for the binomial distribution using Hoeffding's inequality:›

lemma binomial_pmf_tail:
  assumes "p ∈ {0..1}" "real k ≤ real n * p"
  shows "measure (binomial_pmf n p) {..k} ≤ exp (- 2 * real n * (p - real k / n)^2)" 
    (is "?L ≤ ?R")
proof (cases "n = 0")
  case True then show ?thesis by simp
next
  case False
  let ?A = "{..<n}"
  let ?pi = "Pi_pmf ?A undefined (λ_. bernoulli_pmf p)"

  define μ where "μ = (∑i<n. (∫x. (of_bool (x i) :: real) ∂ ?pi))"
  define ε :: real where "ε = μ - k" (* eps ≥ 0 <-> k ≤ mu *)

  have "μ = (∑i<n. (∫x. (of_bool x :: real) ∂ (map_pmf (λω. ω i) ?pi)))"
    unfolding μ_def by simp
  also have "... = (∑i<n. (∫x. (of_bool x :: real) ∂ (bernoulli_pmf p)))"
    by (simp add: Pi_pmf_component)
  also have "... = real n * p" using assms(1) by simp
  finally have μ_alt: "μ = real n * p"
    by simp

  have ε_ge_0: "ε ≥ 0"
    using assms(2) unfolding ε_def μ_alt by auto

  have indep: "prob_space.indep_vars ?pi (λ_. borel) (λk ω. of_bool (ω k)) {..<n}"
    by (intro prob_space.indep_vars_compose2[OF prob_space_measure_pmf indep_vars_Pi_pmf]) auto
  interpret Hoeffding_ineq "?pi" "{..<n}" "λk ω. of_bool (ω k)" "λ_.0" "λ_.1" μ
    using indep unfolding μ_def by (unfold_locales) simp_all  
  
  have "?L = measure (map_pmf (λf. card {x ∈ ?A. f x}) ?pi) {..k}"
    by (intro arg_cong2[where f="measure_pmf.prob"] binomial_pmf_altdef' assms(1)) auto
  also have "... = 𝒫(ω in ?pi. (∑i<n. of_bool (ω i)) ≤ μ - ε)"
    unfolding ε_def by (simp add:vimage_def Int_def)
  also have "... ≤ exp (- 2 * ε⇧² / (∑i<n. (1 - 0)⇧²))"
    using False by (intro Hoeffding_ineq_le ε_ge_0) auto
  also have "... = ?R"
    unfolding ε_def μ_alt by (simp add:power2_eq_square field_simps) 
  finally show ?thesis by simp
qed

theorem (in prob_space) median_bound:
  fixes n :: nat
  fixes I :: "('b :: {linorder_topology, second_countable_topology}) set"
  assumes "interval I"
  assumes "α > 0"
  assumes "ε ∈ {0<..<1}"
  assumes "indep_vars (λ_. borel) X {0..<n}"
  assumes "n ≥ - ln ε / (2 * α⇧²)"
  assumes "⋀i. i < n ⟹ 𝒫(ω in M. X i ω ∈ I) ≥ 1/2+α" 
  shows "𝒫(ω in M. median n (λi. X i ω) ∈ I) ≥ 1-ε"
proof -
  have "0 < -ln ε / (2 * α⇧²)"  
    using assms by (intro divide_pos_pos) auto
  also have "... ≤ real n" using assms by simp
  finally have "real n > 0" by simp
  hence n_ge_0:"n > 0" by simp

  have d0: "real_of_int ⌊real n / 2⌋ * 2 / real n ≤ 1"
    using n_ge_0 by simp linarith

  hence d1: "real (nat ⌊real n / 2⌋) ≤ real n * (1 / 2)"
    using n_ge_0 by (simp add:field_simps)
  also have "... ≤ real n * (1 / 2 + α)"
    using assms(2) by (intro mult_left_mono) auto
  finally have d1: "real (nat ⌊real n / 2⌋) ≤ real n * (1 / 2 + α)" by simp

  have "1/2 + α ≤ 𝒫(ω in M. X 0 ω ∈ I)" using n_ge_0 by (intro assms(6))
  also have "... ≤ 1" by simp
  finally have d2: "1 / 2 + α ≤ 1" by simp

  have d3: "nat ⌊real n / 2⌋ = n div 2" by linarith

  have "1 - ε ≤ 1 - exp (- 2 * real n * α⇧²)" 
    using assms(2,3,5) by (intro diff_mono order.refl iffD1[OF ln_ge_iff]) (auto simp:field_simps)
  also have "... ≤ 1 - exp (- 2 * real n * ((1/2+α) - real (nat ⌊real n/2⌋) / real n)⇧²)"
    using d0 n_ge_0 assms(2)
    by (intro diff_mono order.refl iffD2[OF exp_le_cancel_iff] mult_left_mono_neg power_mono) auto 
  also have "... ≤ 1 - measure (binomial_pmf n (1/2+α)) {..nat ⌊real n/2⌋}"
    using assms(2) d1 d2 by (intro diff_mono order.refl binomial_pmf_tail) auto
  also have "... = 1 - measure (binomial_pmf n (1/2+α)) {..n div 2}" by (simp add:d3)
  also have "... ≤ 𝒫(ω in M. median n (λi. X i ω) ∈ I)"
    using assms(2) by (intro median_bound_raw n_ge_0 assms(1,4,6) add_nonneg_nonneg) auto
  finally show ?thesis by simp
qed

text ‹This is a specialization of the above to closed real intervals.›

corollary (in prob_space) median_bound_1:
  assumes "α > 0"
  assumes "ε ∈ {0<..<1}"
  assumes "indep_vars (λ_. borel) X {0..<n}"
  assumes "n ≥ - ln ε / (2 * α⇧²)"
  assumes "∀i ∈ {0..<n}. 𝒫(ω in M. X i ω ∈ ({a..b} :: real set)) ≥ 1/2+α" 
  shows "𝒫(ω in M. median n (λi. X i ω) ∈ {a..b}) ≥ 1-ε" 
  using assms(5) by (intro median_bound[OF _ assms(1,2,3,4)]) (auto simp:interval_def)

text ‹This is a specialization of the above, where $\alpha = \frac{1}{6}$ and the interval is 
described using a mid point @{term "μ"} and radius @{term "δ"}. The choice of 
$\alpha = \frac{1}{6}$ implies a success probability per random variable of $\frac{2}{3}$. It is a 
commonly chosen success probability for Monte-Carlo algorithms 
(cf. \<^cite>‹‹\textsection 4› in "baryossef2002"› or \<^cite>‹‹\textsection 1› in "kane2010"›).›

corollary (in prob_space) median_bound_2:
  fixes μ δ :: real
  assumes "ε ∈ {0<..<1}"
  assumes "indep_vars (λ_. borel) X {0..<n}"
  assumes "n ≥ -18 * ln ε"
  assumes "⋀i. i < n ⟹ 𝒫(ω in M. abs (X i ω - μ) > δ) ≤ 1/3" 
  shows "𝒫(ω in M. abs (median n (λi. X i ω) - μ) ≤ δ) ≥ 1-ε"
proof -
  have b:"space M - {ω ∈ space M. X i ω ∈ {μ-δ..μ+δ}} = {ω ∈ space M. abs (X i ω - μ) > δ}" for i
    by auto

  have "⋀i. i < n ⟹ 1 - 𝒫(ω in M. X i ω ∈ {μ- δ..μ+δ}) ≤ 1/3"
    using assms
    apply (subst prob_compl[symmetric])
     apply (measurable, simp add:indep_vars_def)
    by (subst b, simp)

  hence a:"⋀i. i < n ⟹ 𝒫(ω in M. X i ω ∈ {μ- δ..μ+δ}) ≥ 2/3" by simp
  
  have "1-ε ≤ 𝒫(ω in M. median n (λi. X i ω) ∈ {μ-δ..μ+δ})" using a assms(3)
    by (intro median_bound_1[OF _ assms(1,2), where α="1/6"]) (simp_all add:power2_eq_square)
  also have "... = 𝒫(ω in M. abs (median n (λi. X i ω) - μ) ≤ δ)"
    by (intro arg_cong2[where f="measure"] Collect_cong) auto
  finally show ?thesis by simp
qed

section ‹Some additional results about the median›

lemma sorted_mono_map: 
  assumes "sorted xs"
  assumes "mono f"
  shows "sorted (map f xs)"
  using assms(2) unfolding sorted_wrt_map
  by (intro sorted_wrt_mono_rel[OF _ assms(1)]) (simp add:mono_def)

text ‹This could be added to @{theory "HOL.List"}:›
lemma map_sort:
  assumes "mono f"
  shows "sort (map f xs) = map f (sort xs)"
  using assms by (intro properties_for_sort sorted_mono_map) auto

lemma median_cong:
  assumes "⋀i. i < n ⟹ f i = g i"
  shows "median n f = median n g"
  unfolding median_def using assms
  by (intro arg_cong2[where f="(!)"] arg_cong[where f="sort"] map_cong) auto

lemma median_restrict: 
  "median n (λi ∈ {0..<n}.f i) = median n f"
  by (rule median_cong, simp)

lemma median_commute_mono:
  assumes "n > 0"
  assumes "mono g"
  shows "g (median n f) = median n (g ∘ f)"
  apply (simp add: median_def del:map_map)
  apply (subst map_map[symmetric])
  apply (subst map_sort[OF assms(2)])
  apply (subst nth_map, simp) using assms apply fastforce
  by simp

lemma median_rat:
  assumes "n > 0"
  shows "real_of_rat (median n f) = median n (λi. real_of_rat (f i))"
  apply (subst (2) comp_def[where g="f", symmetric])
  apply (rule median_commute_mono[OF assms(1)])
  by (simp add: mono_def of_rat_less_eq)

lemma median_const:
  assumes "k > 0"
  shows "median k (λi ∈ {0..<k}. a) = a"
proof -
  have b: "sorted (map (λ_. a) [0..<k])" 
    by (subst sorted_wrt_map, simp)
  have a: "sort (map (λ_. a) [0..<k]) = map (λ_. a) [0..<k]"
    by (subst sorted_sort_id[OF b], simp)
  have "median k (λi ∈ {0..<k}. a) = median k (λ_. a)"
    by (subst median_restrict, simp)
  also have "... = a" using assms by (simp add:median_def a)
  finally show ?thesis by simp
qed

end