Theory Hoeffding

(*
  File:    Hoeffding.thy
  Author:  Manuel Eberl, TU München
*)
section ‹Hoeffding's Lemma and Hoeffding's Inequality›
theory Hoeffding
  imports Product_PMF Independent_Family
begin

text ‹
  Hoeffding's inequality shows that a sum of bounded independent random variables is concentrated
  around its mean, with an exponential decay of the tail probabilities.
›

subsection ‹Hoeffding's Lemma›

lemma convex_on_exp: 
  fixes l :: real
  assumes "l ≥ 0"
  shows   "convex_on UNIV (λx. exp(l*x))"
  using assms
  by (intro convex_on_realI[where f' = "λx. l * exp (l * x)"])
     (auto intro!: derivative_eq_intros mult_left_mono)

lemma mult_const_minus_self_real_le:
  fixes x :: real
  shows "x * (c - x) ≤ c⇧² / 4"
proof -
  have "x * (c - x) = -(x - c / 2)⇧² + c⇧² / 4"
    by (simp add: field_simps power2_eq_square)
  also have "… ≤ 0 + c⇧² / 4"
    by (intro add_mono) auto
  finally show ?thesis by simp
qed

lemma Hoeffdings_lemma_aux:
  fixes h p :: real
  assumes "h ≥ 0" and "p ≥ 0"
  defines "L ≡ (λh. -h * p + ln (1 + p * (exp h - 1)))"
  shows   "L h ≤ h⇧² / 8"
proof (cases "h = 0")
  case False
  hence h: "h > 0"
    using ‹h ≥ 0› by simp
  define L' where "L' = (λh. -p + p * exp h / (1 + p * (exp h - 1)))"
  define L'' where "L'' = (λh. -(p⇧²) * exp h * exp h / (1 + p * (exp h - 1))⇧² +
                              p * exp h / (1 + p * (exp h - 1)))"
  define Ls where "Ls = (λn. [L, L', L''] ! n)"

  have [simp]: "L 0 = 0" "L' 0 = 0"
    by (auto simp: L_def L'_def)

  have L': "(L has_real_derivative L' x) (at x)" if "x ∈ {0..h}" for x
  proof -
    have "1 + p * (exp x - 1) > 0"
      using ‹p ≥ 0› that by (intro add_pos_nonneg mult_nonneg_nonneg) auto
    thus ?thesis
      unfolding L_def L'_def by (auto intro!: derivative_eq_intros)
  qed

  have L'': "(L' has_real_derivative L'' x) (at x)" if "x ∈ {0..h}" for x
  proof -
    have *: "1 + p * (exp x - 1) > 0"
      using ‹p ≥ 0› that by (intro add_pos_nonneg mult_nonneg_nonneg) auto
    show ?thesis
      unfolding L'_def L''_def
      by (insert *, (rule derivative_eq_intros refl | simp)+) (auto simp: divide_simps; algebra)
  qed

  have diff: "∀m t. m < 2 ∧ 0 ≤ t ∧ t ≤ h ⟶ (Ls m has_real_derivative Ls (Suc m) t) (at t)"
    using L' L'' by (auto simp: Ls_def nth_Cons split: nat.splits)
  from Taylor[of 2 Ls L 0 h 0 h, OF _ _ diff]
    obtain t where t: "t ∈ {0<..<h}" "L h = L'' t * h⇧² / 2"
      using ‹h > 0› by (auto simp: Ls_def lessThan_nat_numeral)
  define u where "u = p * exp t / (1 + p * (exp t - 1))"

  have "L'' t = u * (1 - u)"
    by (simp add: L''_def u_def divide_simps; algebra)
  also have "… ≤ 1 / 4"
    using mult_const_minus_self_real_le[of u 1] by simp
  finally have "L'' t ≤ 1 / 4" .

  note t(2)
  also have "L'' t * h⇧² / 2 ≤ (1 / 4) * h⇧² / 2"
    using ‹L'' t ≤ 1 / 4› by (intro mult_right_mono divide_right_mono) auto
  finally show "L h ≤ h⇧² / 8" by simp
qed (auto simp: L_def)


locale interval_bounded_random_variable = prob_space +
  fixes f :: "'a ⇒ real" and a b :: real
  assumes random_variable [measurable]: "random_variable borel f"
  assumes AE_in_interval: "AE x in M. f x ∈ {a..b}"
begin

lemma integrable [intro]: "integrable M f"
proof (rule integrable_const_bound)
  show "AE x in M. norm (f x) ≤ max ¦a¦ ¦b¦"
    by (intro eventually_mono[OF AE_in_interval]) auto
qed (fact random_variable)

text ‹
  We first show Hoeffding's lemma for distributions whose expectation is 0. The general
  case will easily follow from this later.
›
lemma Hoeffdings_lemma_nn_integral_0:
  assumes "l > 0" and E0: "expectation f = 0"
  shows   "nn_integral M (λx. exp (l * f x)) ≤ ennreal (exp (l⇧² * (b - a)⇧² / 8))"
proof (cases "AE x in M. f x = 0")
  case True
  hence "nn_integral M (λx. exp (l * f x)) = nn_integral M (λx. ennreal 1)"
    by (intro nn_integral_cong_AE) auto
  also have "… = ennreal (expectation (λ_. 1))"
    by (intro nn_integral_eq_integral) auto
  finally show ?thesis by (simp add: prob_space)
next
  case False
  have "a < 0"
  proof (rule ccontr)
    assume a: "¬(a < 0)"
    have "AE x in M. f x = 0"
    proof (subst integral_nonneg_eq_0_iff_AE [symmetric])
      show "AE x in M. f x ≥ 0"
        using AE_in_interval by eventually_elim (use a in auto)
    qed (use E0 in ‹auto simp: id_def integrable›)
    with False show False by contradiction
  qed

  have "b > 0"
  proof (rule ccontr)
    assume b: "¬(b > 0)"
    have "AE x in M. -f x = 0"
    proof (subst integral_nonneg_eq_0_iff_AE [symmetric])
      show "AE x in M. -f x ≥ 0"
        using AE_in_interval by eventually_elim (use b in auto)
    qed (use E0 in ‹auto simp: id_def integrable›)
    with False show False by simp
  qed
    
  have "a < b"
    using ‹a < 0› ‹b > 0› by linarith

  define p where "p = -a / (b - a)"
  define L where "L = (λt. -t* p + ln (1 - p + p * exp t))"
  define z where "z = l * (b - a)"
  have "z > 0"
    unfolding z_def using ‹a < b› ‹l > 0› by auto
  have "p > 0"
    using ‹a < 0› ‹a < b› unfolding p_def by (intro divide_pos_pos) auto

  have "(∫⇧⁺x. exp (l * f x) ∂M) ≤
        (∫⇧⁺x. (b - f x) / (b - a) * exp (l * a) + (f x - a) / (b - a) * exp (l * b) ∂M)"
  proof (intro nn_integral_mono_AE eventually_mono[OF AE_in_interval] ennreal_leI)
    fix x assume x: "f x ∈ {a..b}"
    define y where "y = (b - f x) / (b-a)"
    have y: "y ∈ {0..1}"
      using x ‹a < b› by (auto simp: y_def)
    have conv: "convex_on UNIV (λx. exp(l*x))"
      using ‹l > 0› by (intro convex_on_exp) auto
    have "exp (l * ((1 - y) *⇩_R b + y *⇩_R a)) ≤ (1 - y) * exp (l * b) + y * exp (l * a)"
      using y ‹l > 0› by (intro convex_onD[OF convex_on_exp]) auto
    also have "(1 - y) *⇩_R b + y *⇩_R a = f x"
      using ‹a < b› by (simp add: y_def divide_simps) (simp add: algebra_simps)?
    also have "1 - y = (f x - a) / (b - a)"
      using ‹a < b› by (simp add: field_simps y_def)
    finally show "exp (l * f x) ≤ (b - f x) / (b - a) * exp (l*a) + (f x - a)/(b-a) * exp (l*b)"
      by (simp add: y_def)
  qed
  also have "… = (∫⇧⁺x. ennreal (b - f x) * exp (l * a) / (b - a) +
                        ennreal (f x - a) * exp (l * b) / (b - a) ∂M)"
    using ‹a < 0› ‹b > 0›
    by (intro nn_integral_cong_AE eventually_mono[OF AE_in_interval])
       (simp add: ennreal_plus ennreal_mult flip: divide_ennreal)
  also have "… = ((∫⇧⁺ x. ennreal (b - f x) ∂M) * ennreal (exp (l * a)) +
                   (∫⇧⁺ x. ennreal (f x - a) ∂M) * ennreal (exp (l * b))) / ennreal (b - a)"
    by (simp add: nn_integral_add nn_integral_divide nn_integral_multc add_divide_distrib_ennreal)
  also have "(∫⇧⁺ x. ennreal (b - f x) ∂M) = ennreal (expectation (λx. b - f x))"
    by (intro nn_integral_eq_integral Bochner_Integration.integrable_diff
              eventually_mono[OF AE_in_interval] integrable_const integrable) auto
  also have "expectation (λx. b - f x) = b"
    using assms by (subst Bochner_Integration.integral_diff) (auto simp: prob_space)
  also have "(∫⇧⁺ x. ennreal (f x - a) ∂M) = ennreal (expectation (λx. f x - a))"
    by (intro nn_integral_eq_integral Bochner_Integration.integrable_diff
              eventually_mono[OF AE_in_interval] integrable_const integrable) auto
  also have "expectation (λx. f x - a) = (-a)"
    using assms by (subst Bochner_Integration.integral_diff) (auto simp: prob_space)
  also have "(ennreal b * (exp (l * a)) + ennreal (-a) * (exp (l * b))) / (b - a) =
             ennreal (b * exp (l * a) - a * exp (l * b)) / ennreal (b - a)"
    using ‹a < 0› ‹b > 0›
    by (simp flip: ennreal_mult ennreal_plus add: mult_nonpos_nonneg divide_ennreal mult_mono)
  also have "b * exp (l * a) - a * exp (l * b) = exp (L z) * (b - a)"
  proof -
    have pos: "1 - p + p * exp z > 0"
    proof -
      have "exp z > 1" using ‹l > 0› and ‹a < b›
        by (subst one_less_exp_iff) (auto simp: z_def intro!: mult_pos_pos)
      hence "(exp z - 1) * p ≥ 0"
        unfolding p_def using ‹a < 0› and ‹a < b›
        by (intro mult_nonneg_nonneg divide_nonneg_pos) auto
      thus ?thesis
        by (simp add: algebra_simps)
    qed

    have "exp (L z) * (b - a) = exp (-z * p) * (1 - p + p * exp z) * (b - a)"
      using pos by (simp add: exp_add L_def exp_diff exp_minus divide_simps)
    also have "… = b * exp (l * a) - a * exp (l * b)" using ‹a < b›
      by (simp add: p_def z_def divide_simps) (simp add:  exp_diff algebra_simps)?
    finally show ?thesis by simp
  qed
  also have "ennreal (exp (L z) * (b - a)) / ennreal (b - a) = ennreal (exp (L z))"
    using ‹a < b› by (simp add: divide_ennreal)
  also have "L z = -z * p + ln (1 + p * (exp z - 1))"
    by (simp add: L_def algebra_simps)
  also have "… ≤ z⇧² / 8"
    unfolding L_def by (rule Hoeffdings_lemma_aux[where p = p]) (use ‹z > 0› ‹p > 0› in simp_all)
  hence "ennreal (exp (-z * p + ln (1 + p * (exp z - 1)))) ≤ ennreal (exp (z⇧² / 8))"
    by (intro ennreal_leI) auto
  finally show ?thesis
    by (simp add: z_def power_mult_distrib)
qed

context
begin

interpretation shift: interval_bounded_random_variable M "λx. f x - μ" "a - μ" "b - μ"
  rewrites "b - μ - (a - μ) ≡ b - a"
  by unfold_locales (auto intro!: eventually_mono[OF AE_in_interval])

lemma expectation_shift: "expectation (λx. f x - expectation f) = 0"
  by (subst Bochner_Integration.integral_diff) (auto simp: integrable prob_space)

lemmas Hoeffdings_lemma_nn_integral = shift.Hoeffdings_lemma_nn_integral_0[OF _ expectation_shift]

end

end



subsection ‹Hoeffding's Inequality›

text ‹
  Consider ‹n› independent real random variables $X_1, \ldots, X_n$ that each almost surely lie
  in a compact interval $[a_i, b_i]$. Hoeffding's inequality states that the distribution of the
  sum of the $X_i$ is tightly concentrated around the sum of the expected values: the probability
  of it being above or below the sum of the expected values by more than some ‹ε› decreases
  exponentially with ‹ε›.
›

locale indep_interval_bounded_random_variables = prob_space +
  fixes I :: "'b set" and X :: "'b ⇒ 'a ⇒ real"
  fixes a b :: "'b ⇒ real"
  assumes fin: "finite I"
  assumes indep: "indep_vars (λ_. borel) X I"
  assumes AE_in_interval: "⋀i. i ∈ I ⟹ AE x in M. X i x ∈ {a i..b i}"
begin

lemma random_variable [measurable]:
  assumes i: "i ∈ I"
  shows "random_variable borel (X i)"
  using i indep unfolding indep_vars_def by blast

lemma bounded_random_variable [intro]:
  assumes i: "i ∈ I"
  shows   "interval_bounded_random_variable M (X i) (a i) (b i)"
  by unfold_locales (use AE_in_interval[OF i] i in auto)

end


locale Hoeffding_ineq = indep_interval_bounded_random_variables +
  fixes μ :: real
  defines "μ ≡ (∑i∈I. expectation (X i))"
begin

theorem%important Hoeffding_ineq_ge:
  assumes "ε ≥ 0"
  assumes "(∑i∈I. (b i - a i)⇧²) > 0"
  shows   "prob {x∈space M. (∑i∈I. X i x) ≥ μ + ε} ≤ exp (-2 * ε⇧² / (∑i∈I. (b i - a i)⇧²))"
proof (cases "ε = 0")
  case [simp]: True
  have "prob {x∈space M. (∑i∈I. X i x) ≥ μ + ε} ≤ 1"
    by simp
  thus ?thesis by simp
next
  case False
  with ‹ε ≥ 0› have ε: "ε > 0"
    by auto

  define d where "d = (∑i∈I. (b i - a i)⇧²)"
  define l :: real where "l = 4 * ε / d"
  have d: "d > 0"
    using assms by (simp add: d_def)
  have l: "l > 0"
    using ε d by (simp add: l_def)
  define μ' where "μ' = (λi. expectation (X i))"

  have "{x∈space M. (∑i∈I. X i x) ≥ μ + ε} = {x∈space M. (∑i∈I. X i x) - μ ≥ ε}"
    by (simp add: algebra_simps)
  hence "ennreal (prob {x∈space M. (∑i∈I. X i x) ≥ μ + ε}) = emeasure M …"
    by (simp add: emeasure_eq_measure)
  also have "… ≤ ennreal (exp (-l*ε)) * (∫⇧⁺x∈space M. exp (l * ((∑i∈I. X i x) - μ)) ∂M)"
    by (intro Chernoff_ineq_nn_integral_ge l) auto
  also have "(λx. (∑i∈I. X i x) - μ) = (λx. (∑i∈I. X i x - μ' i))"
    by (simp add: μ_def sum_subtractf μ'_def)
  also have "(∫⇧⁺x∈space M. exp (l * ((∑i∈I. X i x - μ' i))) ∂M) =
             (∫⇧⁺x. (∏i∈I. ennreal (exp (l * (X i x - μ' i)))) ∂M)"
    by (intro nn_integral_cong)
       (simp_all add: sum_distrib_left ring_distribs exp_diff exp_sum fin prod_ennreal)
  also have "… = (∏i∈I. ∫⇧⁺x. ennreal (exp (l * (X i x - μ' i))) ∂M)"
    by (intro indep_vars_nn_integral fin indep_vars_compose2[OF indep]) auto
  also have "ennreal (exp (-l * ε)) * … ≤
               ennreal (exp (-l * ε)) * (∏i∈I. ennreal (exp (l⇧² * (b i - a i)⇧² / 8)))"
  proof (intro mult_left_mono prod_mono_ennreal)
    fix i assume i: "i ∈ I"
    from i interpret interval_bounded_random_variable M "X i" "a i" "b i" ..
    show "(∫⇧⁺x. ennreal (exp (l * (X i x - μ' i))) ∂M) ≤ ennreal (exp (l⇧² * (b i - a i)⇧² / 8))"
      unfolding μ'_def by (rule Hoeffdings_lemma_nn_integral) fact+
  qed auto
  also have "… = ennreal (exp (-l*ε) * (∏i∈I. exp (l⇧² * (b i - a i)⇧² / 8)))"
    by (simp add: prod_ennreal prod_nonneg flip: ennreal_mult)
  also have "exp (-l*ε) * (∏i∈I. exp (l⇧² * (b i - a i)⇧² / 8)) = exp (d * l⇧² / 8 - l * ε)"
    by (simp add: exp_diff exp_minus sum_divide_distrib sum_distrib_left
                  sum_distrib_right exp_sum fin divide_simps mult_ac d_def)
  also have "d * l⇧² / 8 - l * ε = -2 * ε⇧² / d"
    using d by (simp add: l_def field_simps power2_eq_square)
  finally show ?thesis
    by (subst (asm) ennreal_le_iff) (simp_all add: d_def)
qed

corollary Hoeffding_ineq_le:
  assumes ε: "ε ≥ 0"
  assumes "(∑i∈I. (b i - a i)⇧²) > 0"
  shows   "prob {x∈space M. (∑i∈I. X i x) ≤ μ - ε} ≤ exp (-2 * ε⇧² / (∑i∈I. (b i - a i)⇧²))"
proof -
  interpret flip: Hoeffding_ineq M I "λi x. -X i x" "λi. -b i" "λi. -a i" "-μ"
  proof unfold_locales
    fix i assume "i ∈ I"
    then interpret interval_bounded_random_variable M "X i" "a i" "b i" ..
    show "AE x in M. - X i x ∈ {- b i..- a i}"
      by (intro eventually_mono[OF AE_in_interval]) auto
  qed (auto simp: fin μ_def sum_negf intro: indep_vars_compose2[OF indep])

  have "prob {x∈space M. (∑i∈I. X i x) ≤ μ - ε} = prob {x∈space M. (∑i∈I. -X i x) ≥ -μ + ε}"
    by (simp add: sum_negf algebra_simps)
  also have "… ≤ exp (- 2 * ε⇧² / (∑i∈I. (b i - a i)⇧²))"
    using flip.Hoeffding_ineq_ge[OF ε] assms(2) by simp
  finally show ?thesis .
qed

corollary Hoeffding_ineq_abs_ge:
  assumes ε: "ε ≥ 0"
  assumes "(∑i∈I. (b i - a i)⇧²) > 0"
  shows   "prob {x∈space M. ¦(∑i∈I. X i x) - μ¦ ≥ ε} ≤ 2 * exp (-2 * ε⇧² / (∑i∈I. (b i - a i)⇧²))"
proof -
  have "{x∈space M. ¦(∑i∈I. X i x) - μ¦ ≥ ε} =
        {x∈space M. (∑i∈I. X i x) ≥ μ + ε} ∪ {x∈space M. (∑i∈I. X i x) ≤ μ - ε}"
    by auto
  also have "prob … ≤ prob {x∈space M. (∑i∈I. X i x) ≥ μ + ε} +
                       prob {x∈space M. (∑i∈I. X i x) ≤ μ - ε}"
    by (intro measure_Un_le) auto
  also have "… ≤ exp (-2 * ε⇧² / (∑i∈I. (b i - a i)⇧²)) + exp (-2 * ε⇧² / (∑i∈I. (b i - a i)⇧²))"
    by (intro add_mono Hoeffding_ineq_ge Hoeffding_ineq_le assms)
  finally show ?thesis by simp
qed

end


subsection ‹Hoeffding's inequality for i.i.d. bounded random variables›

text ‹
  If we have ‹n› even identically-distributed random variables, the statement of Hoeffding's
  lemma simplifies a bit more: it shows that the probability that the average of the $X_i$
  is more than ‹ε› above the expected value is no greater than $e^{\frac{-2ny^2}{(b-a)^2}}$.

  This essentially gives us a more concrete version of the weak law of large numbers: the law
  states that the probability vanishes for ‹n → ∞› for any ‹ε > 0›. Unlike Hoeffding's inequality,
  it does not assume the variables to have bounded support, but it does not provide concrete bounds.
›

locale iid_interval_bounded_random_variables = prob_space +
  fixes I :: "'b set" and X :: "'b ⇒ 'a ⇒ real" and Y :: "'a ⇒ real"
  fixes a b :: real
  assumes fin: "finite I"
  assumes indep: "indep_vars (λ_. borel) X I"
  assumes distr_X: "i ∈ I ⟹ distr M borel (X i) = distr M borel Y"
  assumes rv_Y [measurable]: "random_variable borel Y"
  assumes AE_in_interval: "AE x in M. Y x ∈ {a..b}"
begin

lemma random_variable [measurable]:
  assumes i: "i ∈ I"
  shows "random_variable borel (X i)"
  using i indep unfolding indep_vars_def by blast

sublocale X: indep_interval_bounded_random_variables M I X "λ_. a" "λ_. b"
proof
  fix i assume i: "i ∈ I"
  have "AE x in M. Y x ∈ {a..b}"
    by (fact AE_in_interval)
  also have "?this ⟷ (AE x in distr M borel Y. x ∈ {a..b})"
    by (subst AE_distr_iff) auto
  also have "distr M borel Y = distr M borel (X i)"
    using i by (simp add: distr_X)
  also have "(AE x in …. x ∈ {a..b}) ⟷ (AE x in M. X i x ∈ {a..b})"
    using i by (subst AE_distr_iff) auto
  finally show "AE x in M. X i x ∈ {a..b}" .
qed (simp_all add: fin indep)

lemma expectation_X [simp]:
  assumes i: "i ∈ I"
  shows "expectation (X i) = expectation Y"
proof -
  have "expectation (X i) = lebesgue_integral (distr M borel (X i)) (λx. x)"
    using i by (intro integral_distr [symmetric]) auto
  also have "distr M borel (X i) = distr M borel Y"
    using i by (rule distr_X)
  also have "lebesgue_integral … (λx. x) = expectation Y"
    by (rule integral_distr) auto
  finally show "expectation (X i) = expectation Y" .
qed

end


locale Hoeffding_ineq_iid = iid_interval_bounded_random_variables +
  fixes μ :: real
  defines "μ ≡ expectation Y"
begin

sublocale X: Hoeffding_ineq M I X "λ_. a" "λ_. b" "real (card I) * μ"
  by unfold_locales (simp_all add: μ_def)

corollary
  assumes ε: "ε ≥ 0"
  assumes "a < b" "I ≠ {}"
  defines "n ≡ card I"
  shows   Hoeffding_ineq_ge:
            "prob {x∈space M. (∑i∈I. X i x) ≥ n * μ + ε} ≤
               exp (-2 * ε⇧² / (n * (b - a)⇧²))" (is ?le)
    and   Hoeffding_ineq_le:
            "prob {x∈space M. (∑i∈I. X i x) ≤ n * μ - ε} ≤
               exp (-2 * ε⇧² / (n * (b - a)⇧²))" (is ?ge)
    and   Hoeffding_ineq_abs_ge:
            "prob {x∈space M. ¦(∑i∈I. X i x) - n * μ¦ ≥ ε} ≤
               2 * exp (-2 * ε⇧² / (n * (b - a)⇧²))" (is ?abs_ge)
proof -
  have pos: "(∑i∈I. (b - a)⇧²) > 0"
    using ‹a < b› ‹I ≠ {}› fin by (intro sum_pos) auto
  show ?le
    using X.Hoeffding_ineq_ge[OF ε pos] by (simp add: n_def)
  show ?ge
    using X.Hoeffding_ineq_le[OF ε pos] by (simp add: n_def)
  show ?abs_ge
    using X.Hoeffding_ineq_abs_ge[OF ε pos] by (simp add: n_def)
qed

lemma 
  assumes ε: "ε ≥ 0"
  assumes "a < b" "I ≠ {}"
  defines "n ≡ card I"
  shows   Hoeffding_ineq_ge':
            "prob {x∈space M. (∑i∈I. X i x) / n ≥ μ + ε} ≤
               exp (-2 * n * ε⇧² / (b - a)⇧²)" (is ?ge)
    and   Hoeffding_ineq_le':
            "prob {x∈space M. (∑i∈I. X i x) / n ≤ μ - ε} ≤
               exp (-2 * n * ε⇧² / (b - a)⇧²)" (is ?le)
    and   Hoeffding_ineq_abs_ge':
            "prob {x∈space M. ¦(∑i∈I. X i x) / n - μ¦ ≥ ε} ≤
               2 * exp (-2 * n * ε⇧² / (b - a)⇧²)" (is ?abs_ge)
proof -
  have "n > 0"
    using assms fin by (auto simp: field_simps)
  have ε': "ε * n ≥ 0"
    using ‹n > 0› ‹ε ≥ 0› by auto
  have eq: "- (2 * (ε * real n)⇧² / (real (card I) * (b - a)⇧²)) =
            - (2 * real n * ε⇧² / (b - a)⇧²)"
    using ‹n > 0› by (simp add: power2_eq_square divide_simps n_def)

  have "{x∈space M. (∑i∈I. X i x) / n ≥ μ + ε} =
        {x∈space M. (∑i∈I. X i x) ≥ μ * n + ε * n}"
    using ‹n > 0› by (intro Collect_cong conj_cong refl) (auto simp: field_simps)
  with Hoeffding_ineq_ge[OF ε' ‹a < b› ‹I ≠ {}›] ‹n > 0› eq show ?ge
    by (simp add: n_def mult_ac)

  have "{x∈space M. (∑i∈I. X i x) / n ≤ μ - ε} =
        {x∈space M. (∑i∈I. X i x) ≤ μ * n - ε * n}"
    using ‹n > 0› by (intro Collect_cong conj_cong refl) (auto simp: field_simps)
  with Hoeffding_ineq_le[OF ε' ‹a < b› ‹I ≠ {}›] ‹n > 0› eq show ?le
    by (simp add: n_def mult_ac)

  have "{x∈space M. ¦(∑i∈I. X i x) / n - μ¦ ≥ ε} =
        {x∈space M. ¦(∑i∈I. X i x) - μ * n¦ ≥ ε * n}"
    using ‹n > 0› by (intro Collect_cong conj_cong refl) (auto simp: field_simps)
  with Hoeffding_ineq_abs_ge[OF ε' ‹a < b› ‹I ≠ {}›] ‹n > 0› eq show ?abs_ge
    by (simp add: n_def mult_ac)
qed

end


subsection ‹Hoeffding's Inequality for the Binomial distribution›

text ‹
  We can now apply Hoeffding's inequality to the Binomial distribution, which can be seen
  as the sum of ‹n› i.i.d. coin flips (the support of each of which is contained in $[0,1]$).
›

locale binomial_distribution =
  fixes n :: nat and p :: real
  assumes p: "p ∈ {0..1}"
begin

context
  fixes coins :: "(nat ⇒ bool) pmf" and μ
  assumes n: "n > 0"
  defines "coins ≡ Pi_pmf {..<n} False (λ_. bernoulli_pmf p)"
begin

lemma coins_component:
  assumes i: "i < n"
  shows   "distr (measure_pmf coins) borel (λf. if f i then 1 else 0) =
             distr (measure_pmf (bernoulli_pmf p)) borel (λb. if b then 1 else 0)"
proof -
  have "distr (measure_pmf coins) borel (λf. if f i then 1 else 0) =
        distr (measure_pmf (map_pmf (λf. f i) coins)) borel (λb. if b then 1 else 0)"
    unfolding map_pmf_rep_eq by (subst distr_distr) (auto simp: o_def)
  also have "map_pmf (λf. f i) coins = bernoulli_pmf p"
    unfolding coins_def using i by (subst Pi_pmf_component) auto
  finally show ?thesis
    unfolding map_pmf_rep_eq .
qed

lemma prob_binomial_pmf_conv_coins:
  "measure_pmf.prob (binomial_pmf n p) {x. P (real x)} = 
   measure_pmf.prob coins {x. P (∑i<n. if x i then 1 else 0)}"
proof -
  have eq1: "(∑i<n. if x i then 1 else 0) = real (card {i∈{..<n}. x i})" for x
  proof -
    have "(∑i<n. if x i then 1 else (0::real)) = (∑i∈{i∈{..<n}. x i}. 1)"
      by (intro sum.mono_neutral_cong_right) auto
    thus ?thesis by simp
  qed
  have eq2: "binomial_pmf n p = map_pmf (λv. card {i∈{..<n}. v i}) coins"
    unfolding coins_def by (rule binomial_pmf_altdef') (use p in auto)
  show ?thesis
    by (subst eq2) (simp_all add: eq1)
qed

interpretation Hoeffding_ineq_iid
  coins "{..<n}" "λi f. if f i then 1 else 0" "λf. if f 0 then 1 else 0" 0 1 p
proof unfold_locales
  show "prob_space.indep_vars (measure_pmf coins) (λ_. borel) (λi f. if f i then 1 else 0) {..<n}"
    unfolding coins_def
    by (intro prob_space.indep_vars_compose2[OF _ indep_vars_Pi_pmf])
       (auto simp: measure_pmf.prob_space_axioms)
next
  have "measure_pmf.expectation coins (λf. if f 0 then 1 else 0 :: real) =
        measure_pmf.expectation (map_pmf (λf. f 0) coins) (λb. if b then 1 else 0 :: real)"
    by (simp add: coins_def)
  also have "map_pmf (λf. f 0) coins = bernoulli_pmf p"
    using n by (simp add: coins_def Pi_pmf_component)
  also have "measure_pmf.expectation … (λb. if b then 1 else 0) = p"
    using p by simp
  finally show "p ≡ measure_pmf.expectation coins (λf. if f 0 then 1 else 0)" by simp
qed (auto simp: coins_component)

corollary
  fixes ε :: real
  assumes ε: "ε ≥ 0"
  shows prob_ge: "measure_pmf.prob (binomial_pmf n p) {x. x ≥ n * p + ε} ≤ exp (-2 * ε⇧² / n)"
    and prob_le: "measure_pmf.prob (binomial_pmf n p) {x. x ≤ n * p - ε} ≤ exp (-2 * ε⇧² / n)"
    and prob_abs_ge:
          "measure_pmf.prob (binomial_pmf n p) {x. ¦x - n * p¦ ≥ ε} ≤ 2 * exp (-2 * ε⇧² / n)"
proof -
  have [simp]: "{..<n} ≠ {}"
    using n by auto
  show "measure_pmf.prob (binomial_pmf n p) {x. x ≥ n * p + ε} ≤ exp (-2 * ε⇧² / n)"
    using Hoeffding_ineq_ge[of ε] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
  show "measure_pmf.prob (binomial_pmf n p) {x. x ≤ n * p - ε} ≤ exp (-2 * ε⇧² / n)"
    using Hoeffding_ineq_le[of ε] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
  show "measure_pmf.prob (binomial_pmf n p) {x. ¦x - n * p¦ ≥ ε} ≤ 2 *  exp (-2 * ε⇧² / n)"
    using Hoeffding_ineq_abs_ge[of ε]
    by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
qed

corollary
  fixes ε :: real
  assumes ε: "ε ≥ 0"
  shows prob_ge': "measure_pmf.prob (binomial_pmf n p) {x. x / n ≥ p + ε} ≤ exp (-2 * n * ε⇧²)"
    and prob_le': "measure_pmf.prob (binomial_pmf n p) {x. x / n ≤ p - ε} ≤ exp (-2 * n * ε⇧²)"
    and prob_abs_ge':
          "measure_pmf.prob (binomial_pmf n p) {x. ¦x / n - p¦ ≥ ε} ≤ 2 * exp (-2 * n * ε⇧²)"
proof -
  have [simp]: "{..<n} ≠ {}"
    using n by auto
  show "measure_pmf.prob (binomial_pmf n p) {x. x / n ≥ p + ε} ≤ exp (-2 * n * ε⇧²)"
    using Hoeffding_ineq_ge'[of ε] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
  show "measure_pmf.prob (binomial_pmf n p) {x. x / n ≤ p - ε} ≤ exp (-2 * n * ε⇧²)"
    using Hoeffding_ineq_le'[of ε] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
  show "measure_pmf.prob (binomial_pmf n p) {x. ¦x / n - p¦ ≥ ε} ≤ 2 * exp (-2 * n * ε⇧²)"
    using Hoeffding_ineq_abs_ge'[of ε]
    by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
qed

end

end


subsection ‹Tail bounds for the negative binomial distribution›

text ‹
  Since the tail probabilities of a negative Binomial distribution are equal to the
  tail probabilities of some Binomial distribution, we can obtain tail bounds for the
  negative Binomial distribution through the Hoeffding tail bounds for the Binomial
  distribution.
›

context
  fixes p q :: real
  assumes p: "p ∈ {0<..<1}"
  defines "q ≡ 1 - p"
begin

lemma prob_neg_binomial_pmf_ge_bound:
  fixes n :: nat and k :: real
  defines "μ ≡ real n * q / p"
  assumes k: "k ≥ 0"
  shows "measure_pmf.prob (neg_binomial_pmf n p) {x. real x ≥ μ + k}
         ≤ exp (- 2 * p ^ 3 * k⇧² / (n + p * k))"
proof -
  consider "n = 0" | "p = 1" | "n > 0" "p ≠ 1"
    by blast
  thus ?thesis
  proof cases
    assume [simp]: "n = 0"
    show ?thesis using k
      by (simp add: indicator_def μ_def)
  next
    assume [simp]: "p = 1"
    show ?thesis using k
      by (auto simp add: indicator_def μ_def q_def)
  next
    assume n: "n > 0" and "p ≠ 1"
    from ‹p ≠ 1› and p have p: "p ∈ {0<..<1}"
      by auto
    from p have q: "q ∈ {0<..<1}"
      by (auto simp: q_def)

    define k1 where "k1 = μ + k"
    have k1: "k1 ≥ μ"
      using k by (simp add: k1_def)
    have "k1 > 0"
      by (rule less_le_trans[OF _ k1]) (use p n in ‹auto simp: q_def μ_def›)
  
    define k1' where "k1' = nat (ceiling k1)"
    have "μ ≥ 0" using p
      by (auto simp: μ_def q_def)
    have "¬(x < k1') ⟷ real x ≥ k1" for x
      unfolding k1'_def by linarith
    hence eq: "UNIV - {..<k1'} = {x. x ≥ k1}"
      by auto
    hence "measure_pmf.prob (neg_binomial_pmf n p) {n. n ≥ k1} =
          1 - measure_pmf.prob (neg_binomial_pmf n p) {..<k1'}"
      using measure_pmf.prob_compl[of "{..<k1'}" "neg_binomial_pmf n p"] by simp
    also have "measure_pmf.prob (neg_binomial_pmf n p) {..<k1'} =
               measure_pmf.prob (binomial_pmf (n + k1' - 1) q) {..<k1'}"
      unfolding q_def using p by (intro prob_neg_binomial_pmf_lessThan) auto
    also have "1 - … = measure_pmf.prob (binomial_pmf (n + k1' - 1) q) {n. n ≥ k1}"
      using measure_pmf.prob_compl[of "{..<k1'}" "binomial_pmf (n + k1' - 1) q"] eq by simp
    also have "{x. real x ≥ k1} = {x. x ≥ real (n + k1' - 1) * q + (k1 - real (n + k1' - 1) * q)}"
      by simp
    also have "measure_pmf.prob (binomial_pmf (n + k1' - 1) q) … ≤
                 exp (-2 * (k1 - real (n + k1' - 1) * q)⇧² / real (n + k1' - 1))"
    proof (rule binomial_distribution.prob_ge)
      show "binomial_distribution q"
        by unfold_locales (use q in auto)
    next
      show "n + k1' - 1 > 0"
        using ‹k1 > 0› n unfolding k1'_def by linarith
    next
      have "real (n + nat ⌈k1⌉ - 1) ≤ real n + k1"
        using ‹k1 > 0› by linarith
      hence "real (n + k1' - 1) * q  ≤ (real n + k1) * q"
        unfolding k1'_def by (intro mult_right_mono) (use p in ‹simp_all add: q_def›)
      also have "… ≤ k1"
        using k1 p by (simp add: q_def field_simps μ_def)
      finally show "0 ≤ k1 - real (n + k1' - 1) * q"
        by simp
    qed
    also have "{x. real (n + k1' - 1) * q + (k1 - real (n + k1' - 1) * q) ≤ real x} = {x. real x ≥ k1}"
      by simp
    also have "exp (-2 * (k1 - real (n + k1' - 1) * q)⇧² / real (n + k1' - 1)) ≤
               exp (-2 * (k1 - (n + k1) * q)⇧² / (n + k1))"
    proof -
      have "real (n + k1' - Suc 0) ≤ real n + k1"
        unfolding k1'_def using ‹k1 > 0› by linarith
      moreover have "(real n + k1) * q ≤ k1"
        using k1 p by (auto simp: q_def field_simps μ_def)
      moreover have "1 < n + k1'"
        using n ‹k1 > 0› unfolding k1'_def by linarith
      ultimately have "2 * (k1 - real (n + k1' - 1) * q)⇧² / real (n + k1' - 1) ≥
                       2 * (k1 - (n + k1) * q)⇧² / (n + k1)"
        by (intro frac_le mult_left_mono power_mono mult_nonneg_nonneg mult_right_mono diff_mono)
           (use q in simp_all)
      thus ?thesis
        by simp
    qed
    also have "… = exp (-2 * (p * k1 - q * n)⇧² / (k1 + n))"
      by (simp add: q_def algebra_simps)
    also have "-2 * (p * k1 - q * n)⇧² = -2 * p⇧² * (k1 - μ)⇧²"
      using p by (auto simp: field_simps μ_def)
    also have "k1 - μ = k"
      by (simp add: k1_def μ_def)
    also note k1_def
    also have "μ + k + real n = real n / p + k"
      using p by (simp add: μ_def q_def field_simps)
    also have "- 2 * p⇧² * k⇧² / (real n / p + k) = - 2 * p ^ 3 * k⇧² / (p * k + n)"
      using p by (simp add: field_simps power3_eq_cube power2_eq_square)
    finally show ?thesis by (simp add: add_ac)
  qed
qed

lemma prob_neg_binomial_pmf_le_bound:
  fixes n :: nat and k :: real
  defines "μ ≡ real n * q / p"
  assumes k: "k ≥ 0"
  shows "measure_pmf.prob (neg_binomial_pmf n p) {x. real x ≤ μ - k}
         ≤ exp (-2 * p ^ 3 * k⇧² / (n - p * k))"
proof -
  consider "n = 0" | "p = 1" | "k > μ" | "n > 0" "p ≠ 1" "k ≤ μ"
    by force
  thus ?thesis
  proof cases
    assume [simp]: "n = 0"
    show ?thesis using k
      by (simp add: indicator_def μ_def)
  next
    assume [simp]: "p = 1"
    show ?thesis using k
      by (auto simp add: indicator_def μ_def q_def)
  next
    assume "k > μ"
    hence "{x. real x ≤ μ - k} = {}"
      by auto
    thus ?thesis by simp
  next
    assume n: "n > 0" and "p ≠ 1" and "k ≤ μ"
    from ‹p ≠ 1› and p have p: "p ∈ {0<..<1}"
      by auto
    from p have q: "q ∈ {0<..<1}"
      by (auto simp: q_def)

    define f :: "real ⇒ real" where "f = (λx. (p * x - q * n)⇧² / (x + n))"
    have f_mono: "f x ≥ f y" if "x ≥ 0" "y ≤ n * q / p" "x ≤ y" for x y :: real
      using that(3)
    proof (rule DERIV_nonpos_imp_nonincreasing)
      fix t assume t: "t ≥ x" "t ≤ y"
      have "x > -n"
        using n ‹x ≥ 0› by linarith
      hence "(f has_field_derivative ((p * t - q * n) * (n * (1 + p) + p * t) / (n + t) ^ 2)) (at t)"
        unfolding f_def using t
        by (auto intro!: derivative_eq_intros simp: algebra_simps q_def power2_eq_square)
      moreover {
        have "p * t ≤ p * y"
          using p by (intro mult_left_mono t) auto
        also have "p * y ≤ q * n"
          using ‹y ≤ n * q / p› p by (simp add: field_simps)
        finally have "p * t ≤ q * n" .
      }
      hence "(p * t - q * n) * (n * (1 + p) + p * t) / (n + t) ^ 2 ≤ 0"
        using p ‹x ≥ 0› t
        by (intro mult_nonpos_nonneg divide_nonpos_nonneg add_nonneg_nonneg mult_nonneg_nonneg) auto
      ultimately show "∃y. (f has_real_derivative y) (at t) ∧ y ≤ 0"
        by blast
    qed

    define k1 where "k1 = μ - k"
    have k1: "k1 ≤ real n * q / p"
      using assms by (simp add: μ_def k1_def)
    have "k1 ≥ 0"
      using k ‹k ≤ μ› by (simp add: μ_def k1_def)
  
    define k1' where "k1' = nat (floor k1)"
    have "μ ≥ 0" using p
      by (auto simp: μ_def q_def)
    have "(x ≤ k1') ⟷ real x ≤ k1" for x
      unfolding k1'_def not_less using ‹k1 ≥ 0› by linarith
    hence eq: "{n. n ≤ k1}  = {..k1'}"
      by auto
    hence "measure_pmf.prob (neg_binomial_pmf n p) {n. n ≤ k1} =
           measure_pmf.prob (neg_binomial_pmf n p) {..k1'}"
      by simp
    also have "measure_pmf.prob (neg_binomial_pmf n p) {..k1'} =
               measure_pmf.prob (binomial_pmf (n + k1') q) {..k1'}"
      unfolding q_def using p by (intro prob_neg_binomial_pmf_atMost) auto
    also note eq [symmetric]
    also have "{x. real x ≤ k1} = {x. x ≤ real (n + k1') * q - (real (n + k1') * q - real k1')}"
      using eq by auto
    also have "measure_pmf.prob (binomial_pmf (n + k1') q) … ≤
                 exp (-2 * (real (n + k1') * q - real k1')⇧² / real (n + k1'))"
    proof (rule binomial_distribution.prob_le)
      show "binomial_distribution q"
        by unfold_locales (use q in auto)
    next
      show "n + k1' > 0"
        using ‹k1 ≥ 0› n unfolding k1'_def by linarith
    next
      have "p * k1' ≤ p * k1"
        using p ‹k1 ≥ 0› by (intro mult_left_mono) (auto simp: k1'_def)
      also have "… ≤ q * n"
        using k1 p by (simp add: field_simps)
      finally show "0 ≤ real (n + k1') * q - real k1'"
        by (simp add: algebra_simps q_def)
    qed
    also have "{x. real x ≤ real (n + k1') * q - (real (n + k1') * q - k1')} = {..k1'}"
      by auto
    also have "real (n + k1') * q - k1' = -(p * k1' - q * n)"
      by (simp add: q_def algebra_simps)
    also have "… ^ 2 = (p * k1' - q * n) ^ 2"
      by algebra
    also have "- 2 * (p * real k1' - q * real n)⇧² / real (n + k1') = -2 * f (real k1')"
      by (simp add: f_def)
    also have "f (real k1') ≥ f k1"
      by (rule f_mono) (use ‹k1 ≥ 0› k1 in ‹auto simp: k1'_def›)
    hence "exp (-2 * f (real k1')) ≤ exp (-2 * f k1)"
      by simp
    also have "… = exp (-2 * (p * k1 - q * n)⇧² / (k1 + n))"
      by (simp add: f_def)

    also have "-2 * (p * k1 - q * n)⇧² = -2 * p⇧² * (k1 - μ)⇧²"
      using p by (auto simp: field_simps μ_def)
    also have "(k1 - μ) ^ 2 = k ^ 2"
      by (simp add: k1_def μ_def)
    also note k1_def
    also have "μ - k + real n = real n / p - k"
      using p by (simp add: μ_def q_def field_simps)
    also have "- 2 * p⇧² * k⇧² / (real n / p - k) = - 2 * p ^ 3 * k⇧² / (n - p * k)"
      using p by (simp add: field_simps power3_eq_cube power2_eq_square)
    also have "{..k1'} = {x. real x ≤ μ - k}"
      using eq by (simp add: k1_def)
    finally show ?thesis .
  qed
qed

text ‹
  Due to the function $exp(-l/x)$ being concave for $x \geq \frac{l}{2}$, the above two
  bounds can be combined into the following one for moderate values of ‹k›.
  (cf. 🌐‹https://math.stackexchange.com/questions/1565559›)
›
lemma prob_neg_binomial_pmf_abs_ge_bound:
  fixes n :: nat and k :: real
  defines "μ ≡ real n * q / p"
  assumes "k ≥ 0" and n_ge: "n ≥ p * k * (p⇧² * k + 1)"
  shows "measure_pmf.prob (neg_binomial_pmf n p) {x. ¦real x - μ¦ ≥ k} ≤
           2 * exp (-2 * p ^ 3 * k ^ 2 / n)"
proof (cases "k = 0")
  case False
  with ‹k ≥ 0› have k: "k > 0"
    by auto
  define l :: real where "l = 2 * p ^ 3 * k ^ 2"
  have l: "l > 0"
    using p k by (auto simp: l_def)
  define f :: "real ⇒ real" where "f = (λx. exp (-l / x))"
  define f' where "f' = (λx. -l * exp (-l / x) / x ^ 2)"

  have f'_mono: "f' x ≤ f' y" if "x ≥ l / 2" "x ≤ y" for x y :: real
    using that(2)
  proof (rule DERIV_nonneg_imp_nondecreasing)
    fix t assume t: "x ≤ t" "t ≤ y"
    have "t > 0"
      using that l t by auto
    have "(f' has_field_derivative (l * (2 * t - l) / (exp (l / t) * t ^ 4))) (at t)"
      unfolding f'_def using t that ‹t > 0›
      by (auto intro!: derivative_eq_intros simp: field_simps exp_minus simp flip: power_Suc)
    moreover have "l * (2 * t - l) / (exp (l / t) * t ^ 4) ≥ 0"
      using that t l by (intro divide_nonneg_pos mult_nonneg_nonneg) auto
    ultimately show "∃y. (f' has_real_derivative y) (at t) ∧ 0 ≤ y" by blast
  qed

  have convex: "convex_on {l/2..} (λx. -f x)" unfolding f_def
  proof (intro convex_on_realI[where f' = f'])
    show "((λx. - exp (- l / x)) has_real_derivative f' x) (at x)" if "x ∈ {l/2..}" for x
      using that l
      by (auto intro!: derivative_eq_intros simp: f'_def power2_eq_square algebra_simps)
  qed (use l in ‹auto intro!: f'_mono›)

  have eq: "{x. ¦real x - μ¦ ≥ k} = {x. real x ≤ μ - k} ∪ {x. real x ≥ μ + k}"
    by auto
  have "measure_pmf.prob (neg_binomial_pmf n p) {x. ¦real x - μ¦ ≥ k} ≤
        measure_pmf.prob (neg_binomial_pmf n p) {x. real x ≤ μ - k} +
        measure_pmf.prob (neg_binomial_pmf n p) {x. real x ≥ μ + k}"
    by (subst eq, rule measure_Un_le) auto
  also have "… ≤ exp (-2 * p ^ 3 * k⇧² / (n - p * k)) + exp (-2 * p ^ 3 * k⇧² / (n + p * k))"
    unfolding μ_def
    by (intro prob_neg_binomial_pmf_le_bound prob_neg_binomial_pmf_ge_bound add_mono ‹k ≥ 0›)
  also have "… = 2 * (1/2 * f (n - p * k) + 1/2 * f (n + p * k))"
    by (simp add: f_def l_def)
  also have "1/2 * f (n - p * k) + 1/2 * f (n + p * k) ≤ f (1/2 * (n - p * k) + 1/2 * (n + p * k))"
  proof -
    let ?x = "n - p * k" and ?y = "n + p * k"
    have le1: "l / 2 ≤ ?x" using n_ge
      by (simp add: l_def power2_eq_square power3_eq_cube algebra_simps)
    also have "… ≤ ?y"
      using p k by simp
    finally have le2: "l / 2 ≤ ?y" .
    have "-f ((1 - 1 / 2) *⇩_R ?x + (1 / 2) *⇩_R ?y) ≤ (1 - 1 / 2) * - f ?x + 1 / 2 * - f ?y"
      using le1 le2 by (intro convex_onD[OF convex]) auto
    thus ?thesis by simp
  qed
  also have "1/2 * (n - p * k) + 1/2 * (n + p * k) = n"
    by (simp add: algebra_simps)
  also have "2 * f n = 2 * exp (-l / n)"
    by (simp add: f_def)
  finally show ?thesis
    by (simp add: l_def)
qed auto

end

end