# Theory Hoeffding

(*
File:    Hoeffding.thy
Author:  Manuel Eberl, TU München
*)
section ‹Hoeffding's Lemma and Hoeffding's Inequality›
theory Hoeffding
imports Product_PMF Independent_Family
begin

text ‹
Hoeffding's inequality shows that a sum of bounded independent random variables is concentrated
around its mean, with an exponential decay of the tail probabilities.
›

subsection ‹Hoeffding's Lemma›

lemma convex_on_exp:
fixes l :: real
assumes "l ≥ 0"
shows   "convex_on UNIV (λx. exp(l*x))"
using assms
by (intro convex_on_realI[where f' = "λx. l * exp (l * x)"])
(auto intro!: derivative_eq_intros mult_left_mono)

lemma mult_const_minus_self_real_le:
fixes x :: real
shows "x * (c - x) ≤ c⇧2 / 4"
proof -
have "x * (c - x) = -(x - c / 2)⇧2 + c⇧2 / 4"
also have "… ≤ 0 + c⇧2 / 4"
finally show ?thesis by simp
qed

lemma Hoeffdings_lemma_aux:
fixes h p :: real
assumes "h ≥ 0" and "p ≥ 0"
defines "L ≡ (λh. -h * p + ln (1 + p * (exp h - 1)))"
shows   "L h ≤ h⇧2 / 8"
proof (cases "h = 0")
case False
hence h: "h > 0"
using ‹h ≥ 0› by simp
define L' where "L' = (λh. -p + p * exp h / (1 + p * (exp h - 1)))"
define L'' where "L'' = (λh. -(p⇧2) * exp h * exp h / (1 + p * (exp h - 1))⇧2 +
p * exp h / (1 + p * (exp h - 1)))"
define Ls where "Ls = (λn. [L, L', L''] ! n)"

have [simp]: "L 0 = 0" "L' 0 = 0"
by (auto simp: L_def L'_def)

have L': "(L has_real_derivative L' x) (at x)" if "x ∈ {0..h}" for x
proof -
have "1 + p * (exp x - 1) > 0"
using ‹p ≥ 0› that by (intro add_pos_nonneg mult_nonneg_nonneg) auto
thus ?thesis
unfolding L_def L'_def by (auto intro!: derivative_eq_intros)
qed

have L'': "(L' has_real_derivative L'' x) (at x)" if "x ∈ {0..h}" for x
proof -
have *: "1 + p * (exp x - 1) > 0"
using ‹p ≥ 0› that by (intro add_pos_nonneg mult_nonneg_nonneg) auto
show ?thesis
unfolding L'_def L''_def
by (insert *, (rule derivative_eq_intros refl | simp)+) (auto simp: divide_simps; algebra)
qed

have diff: "∀m t. m < 2 ∧ 0 ≤ t ∧ t ≤ h ⟶ (Ls m has_real_derivative Ls (Suc m) t) (at t)"
using L' L'' by (auto simp: Ls_def nth_Cons split: nat.splits)
from Taylor[of 2 Ls L 0 h 0 h, OF _ _ diff]
obtain t where t: "t ∈ {0<..<h}" "L h = L'' t * h⇧2 / 2"
using ‹h > 0› by (auto simp: Ls_def lessThan_nat_numeral)
define u where "u = p * exp t / (1 + p * (exp t - 1))"

have "L'' t = u * (1 - u)"
by (simp add: L''_def u_def divide_simps; algebra)
also have "… ≤ 1 / 4"
using mult_const_minus_self_real_le[of u 1] by simp
finally have "L'' t ≤ 1 / 4" .

note t(2)
also have "L'' t * h⇧2 / 2 ≤ (1 / 4) * h⇧2 / 2"
using ‹L'' t ≤ 1 / 4› by (intro mult_right_mono divide_right_mono) auto
finally show "L h ≤ h⇧2 / 8" by simp
qed (auto simp: L_def)

locale interval_bounded_random_variable = prob_space +
fixes f :: "'a ⇒ real" and a b :: real
assumes random_variable [measurable]: "random_variable borel f"
assumes AE_in_interval: "AE x in M. f x ∈ {a..b}"
begin

lemma integrable [intro]: "integrable M f"
proof (rule integrable_const_bound)
show "AE x in M. norm (f x) ≤ max ¦a¦ ¦b¦"
by (intro eventually_mono[OF AE_in_interval]) auto
qed (fact random_variable)

text ‹
We first show Hoeffding's lemma for distributions whose expectation is 0. The general
case will easily follow from this later.
›
lemma Hoeffdings_lemma_nn_integral_0:
assumes "l > 0" and E0: "expectation f = 0"
shows   "nn_integral M (λx. exp (l * f x)) ≤ ennreal (exp (l⇧2 * (b - a)⇧2 / 8))"
proof (cases "AE x in M. f x = 0")
case True
hence "nn_integral M (λx. exp (l * f x)) = nn_integral M (λx. ennreal 1)"
by (intro nn_integral_cong_AE) auto
also have "… = ennreal (expectation (λ_. 1))"
by (intro nn_integral_eq_integral) auto
finally show ?thesis by (simp add: prob_space)
next
case False
have "a < 0"
proof (rule ccontr)
assume a: "¬(a < 0)"
have "AE x in M. f x = 0"
proof (subst integral_nonneg_eq_0_iff_AE [symmetric])
show "AE x in M. f x ≥ 0"
using AE_in_interval by eventually_elim (use a in auto)
qed (use E0 in ‹auto simp: id_def integrable›)
with False show False by contradiction
qed

have "b > 0"
proof (rule ccontr)
assume b: "¬(b > 0)"
have "AE x in M. -f x = 0"
proof (subst integral_nonneg_eq_0_iff_AE [symmetric])
show "AE x in M. -f x ≥ 0"
using AE_in_interval by eventually_elim (use b in auto)
qed (use E0 in ‹auto simp: id_def integrable›)
with False show False by simp
qed

have "a < b"
using ‹a < 0› ‹b > 0› by linarith

define p where "p = -a / (b - a)"
define L where "L = (λt. -t* p + ln (1 - p + p * exp t))"
define z where "z = l * (b - a)"
have "z > 0"
unfolding z_def using ‹a < b› ‹l > 0› by auto
have "p > 0"
using ‹a < 0› ‹a < b› unfolding p_def by (intro divide_pos_pos) auto

have "(∫⇧+x. exp (l * f x) ∂M) ≤
(∫⇧+x. (b - f x) / (b - a) * exp (l * a) + (f x - a) / (b - a) * exp (l * b) ∂M)"
proof (intro nn_integral_mono_AE eventually_mono[OF AE_in_interval] ennreal_leI)
fix x assume x: "f x ∈ {a..b}"
define y where "y = (b - f x) / (b-a)"
have y: "y ∈ {0..1}"
using x ‹a < b› by (auto simp: y_def)
have conv: "convex_on UNIV (λx. exp(l*x))"
using ‹l > 0› by (intro convex_on_exp) auto
have "exp (l * ((1 - y) *⇩R b + y *⇩R a)) ≤ (1 - y) * exp (l * b) + y * exp (l * a)"
using y ‹l > 0› by (intro convex_onD[OF convex_on_exp]) auto
also have "(1 - y) *⇩R b + y *⇩R a = f x"
using ‹a < b› by (simp add: y_def divide_simps) (simp add: algebra_simps)?
also have "1 - y = (f x - a) / (b - a)"
using ‹a < b› by (simp add: field_simps y_def)
finally show "exp (l * f x) ≤ (b - f x) / (b - a) * exp (l*a) + (f x - a)/(b-a) * exp (l*b)"
qed
also have "… = (∫⇧+x. ennreal (b - f x) * exp (l * a) / (b - a) +
ennreal (f x - a) * exp (l * b) / (b - a) ∂M)"
using ‹a < 0› ‹b > 0›
by (intro nn_integral_cong_AE eventually_mono[OF AE_in_interval])
(simp add: ennreal_plus ennreal_mult flip: divide_ennreal)
also have "… = ((∫⇧+ x. ennreal (b - f x) ∂M) * ennreal (exp (l * a)) +
(∫⇧+ x. ennreal (f x - a) ∂M) * ennreal (exp (l * b))) / ennreal (b - a)"
also have "(∫⇧+ x. ennreal (b - f x) ∂M) = ennreal (expectation (λx. b - f x))"
by (intro nn_integral_eq_integral Bochner_Integration.integrable_diff
eventually_mono[OF AE_in_interval] integrable_const integrable) auto
also have "expectation (λx. b - f x) = b"
using assms by (subst Bochner_Integration.integral_diff) (auto simp: prob_space)
also have "(∫⇧+ x. ennreal (f x - a) ∂M) = ennreal (expectation (λx. f x - a))"
by (intro nn_integral_eq_integral Bochner_Integration.integrable_diff
eventually_mono[OF AE_in_interval] integrable_const integrable) auto
also have "expectation (λx. f x - a) = (-a)"
using assms by (subst Bochner_Integration.integral_diff) (auto simp: prob_space)
also have "(ennreal b * (exp (l * a)) + ennreal (-a) * (exp (l * b))) / (b - a) =
ennreal (b * exp (l * a) - a * exp (l * b)) / ennreal (b - a)"
using ‹a < 0› ‹b > 0›
by (simp flip: ennreal_mult ennreal_plus add: mult_nonpos_nonneg divide_ennreal mult_mono)
also have "b * exp (l * a) - a * exp (l * b) = exp (L z) * (b - a)"
proof -
have pos: "1 - p + p * exp z > 0"
proof -
have "exp z > 1" using ‹l > 0› and ‹a < b›
by (subst one_less_exp_iff) (auto simp: z_def intro!: mult_pos_pos)
hence "(exp z - 1) * p ≥ 0"
unfolding p_def using ‹a < 0› and ‹a < b›
by (intro mult_nonneg_nonneg divide_nonneg_pos) auto
thus ?thesis
qed

have "exp (L z) * (b - a) = exp (-z * p) * (1 - p + p * exp z) * (b - a)"
also have "… = b * exp (l * a) - a * exp (l * b)" using ‹a < b›
finally show ?thesis by simp
qed
also have "ennreal (exp (L z) * (b - a)) / ennreal (b - a) = ennreal (exp (L z))"
using ‹a < b› by (simp add: divide_ennreal)
also have "L z = -z * p + ln (1 + p * (exp z - 1))"
also have "… ≤ z⇧2 / 8"
unfolding L_def by (rule Hoeffdings_lemma_aux[where p = p]) (use ‹z > 0› ‹p > 0› in simp_all)
hence "ennreal (exp (-z * p + ln (1 + p * (exp z - 1)))) ≤ ennreal (exp (z⇧2 / 8))"
by (intro ennreal_leI) auto
finally show ?thesis
qed

context
begin

interpretation shift: interval_bounded_random_variable M "λx. f x - μ" "a - μ" "b - μ"
rewrites "b - μ - (a - μ) ≡ b - a"
by unfold_locales (auto intro!: eventually_mono[OF AE_in_interval])

lemma expectation_shift: "expectation (λx. f x - expectation f) = 0"
by (subst Bochner_Integration.integral_diff) (auto simp: integrable prob_space)

lemmas Hoeffdings_lemma_nn_integral = shift.Hoeffdings_lemma_nn_integral_0[OF _ expectation_shift]

end

end

subsection ‹Hoeffding's Inequality›

text ‹
Consider ‹n› independent real random variables $X_1, \ldots, X_n$ that each almost surely lie
in a compact interval $[a_i, b_i]$. Hoeffding's inequality states that the distribution of the
sum of the $X_i$ is tightly concentrated around the sum of the expected values: the probability
of it being above or below the sum of the expected values by more than some ‹ε› decreases
exponentially with ‹ε›.
›

locale indep_interval_bounded_random_variables = prob_space +
fixes I :: "'b set" and X :: "'b ⇒ 'a ⇒ real"
fixes a b :: "'b ⇒ real"
assumes fin: "finite I"
assumes indep: "indep_vars (λ_. borel) X I"
assumes AE_in_interval: "⋀i. i ∈ I ⟹ AE x in M. X i x ∈ {a i..b i}"
begin

lemma random_variable [measurable]:
assumes i: "i ∈ I"
shows "random_variable borel (X i)"
using i indep unfolding indep_vars_def by blast

lemma bounded_random_variable [intro]:
assumes i: "i ∈ I"
shows   "interval_bounded_random_variable M (X i) (a i) (b i)"
by unfold_locales (use AE_in_interval[OF i] i in auto)

end

locale Hoeffding_ineq = indep_interval_bounded_random_variables +
fixes μ :: real
defines "μ ≡ (∑i∈I. expectation (X i))"
begin

theorem%important Hoeffding_ineq_ge:
assumes "ε ≥ 0"
assumes "(∑i∈I. (b i - a i)⇧2) > 0"
shows   "prob {x∈space M. (∑i∈I. X i x) ≥ μ + ε} ≤ exp (-2 * ε⇧2 / (∑i∈I. (b i - a i)⇧2))"
proof (cases "ε = 0")
case [simp]: True
have "prob {x∈space M. (∑i∈I. X i x) ≥ μ + ε} ≤ 1"
by simp
thus ?thesis by simp
next
case False
with ‹ε ≥ 0› have ε: "ε > 0"
by auto

define d where "d = (∑i∈I. (b i - a i)⇧2)"
define l :: real where "l = 4 * ε / d"
have d: "d > 0"
using assms by (simp add: d_def)
have l: "l > 0"
using ε d by (simp add: l_def)
define μ' where "μ' = (λi. expectation (X i))"

have "{x∈space M. (∑i∈I. X i x) ≥ μ + ε} = {x∈space M. (∑i∈I. X i x) - μ ≥ ε}"
hence "ennreal (prob {x∈space M. (∑i∈I. X i x) ≥ μ + ε}) = emeasure M …"
also have "… ≤ ennreal (exp (-l*ε)) * (∫⇧+x∈space M. exp (l * ((∑i∈I. X i x) - μ)) ∂M)"
by (intro Chernoff_ineq_nn_integral_ge l) auto
also have "(λx. (∑i∈I. X i x) - μ) = (λx. (∑i∈I. X i x - μ' i))"
by (simp add: μ_def sum_subtractf μ'_def)
also have "(∫⇧+x∈space M. exp (l * ((∑i∈I. X i x - μ' i))) ∂M) =
(∫⇧+x. (∏i∈I. ennreal (exp (l * (X i x - μ' i)))) ∂M)"
by (intro nn_integral_cong)
(simp_all add: sum_distrib_left ring_distribs exp_diff exp_sum fin prod_ennreal)
also have "… = (∏i∈I. ∫⇧+x. ennreal (exp (l * (X i x - μ' i))) ∂M)"
by (intro indep_vars_nn_integral fin indep_vars_compose2[OF indep]) auto
also have "ennreal (exp (-l * ε)) * … ≤
ennreal (exp (-l * ε)) * (∏i∈I. ennreal (exp (l⇧2 * (b i - a i)⇧2 / 8)))"
proof (intro mult_left_mono prod_mono_ennreal)
fix i assume i: "i ∈ I"
from i interpret interval_bounded_random_variable M "X i" "a i" "b i" ..
show "(∫⇧+x. ennreal (exp (l * (X i x - μ' i))) ∂M) ≤ ennreal (exp (l⇧2 * (b i - a i)⇧2 / 8))"
unfolding μ'_def by (rule Hoeffdings_lemma_nn_integral) fact+
qed auto
also have "… = ennreal (exp (-l*ε) * (∏i∈I. exp (l⇧2 * (b i - a i)⇧2 / 8)))"
by (simp add: prod_ennreal prod_nonneg flip: ennreal_mult)
also have "exp (-l*ε) * (∏i∈I. exp (l⇧2 * (b i - a i)⇧2 / 8)) = exp (d * l⇧2 / 8 - l * ε)"
by (simp add: exp_diff exp_minus sum_divide_distrib sum_distrib_left
sum_distrib_right exp_sum fin divide_simps mult_ac d_def)
also have "d * l⇧2 / 8 - l * ε = -2 * ε⇧2 / d"
using d by (simp add: l_def field_simps power2_eq_square)
finally show ?thesis
by (subst (asm) ennreal_le_iff) (simp_all add: d_def)
qed

corollary Hoeffding_ineq_le:
assumes ε: "ε ≥ 0"
assumes "(∑i∈I. (b i - a i)⇧2) > 0"
shows   "prob {x∈space M. (∑i∈I. X i x) ≤ μ - ε} ≤ exp (-2 * ε⇧2 / (∑i∈I. (b i - a i)⇧2))"
proof -
interpret flip: Hoeffding_ineq M I "λi x. -X i x" "λi. -b i" "λi. -a i" "-μ"
proof unfold_locales
fix i assume "i ∈ I"
then interpret interval_bounded_random_variable M "X i" "a i" "b i" ..
show "AE x in M. - X i x ∈ {- b i..- a i}"
by (intro eventually_mono[OF AE_in_interval]) auto
qed (auto simp: fin μ_def sum_negf intro: indep_vars_compose2[OF indep])

have "prob {x∈space M. (∑i∈I. X i x) ≤ μ - ε} = prob {x∈space M. (∑i∈I. -X i x) ≥ -μ + ε}"
also have "… ≤ exp (- 2 * ε⇧2 / (∑i∈I. (b i - a i)⇧2))"
using flip.Hoeffding_ineq_ge[OF ε] assms(2) by simp
finally show ?thesis .
qed

corollary Hoeffding_ineq_abs_ge:
assumes ε: "ε ≥ 0"
assumes "(∑i∈I. (b i - a i)⇧2) > 0"
shows   "prob {x∈space M. ¦(∑i∈I. X i x) - μ¦ ≥ ε} ≤ 2 * exp (-2 * ε⇧2 / (∑i∈I. (b i - a i)⇧2))"
proof -
have "{x∈space M. ¦(∑i∈I. X i x) - μ¦ ≥ ε} =
{x∈space M. (∑i∈I. X i x) ≥ μ + ε} ∪ {x∈space M. (∑i∈I. X i x) ≤ μ - ε}"
by auto
also have "prob … ≤ prob {x∈space M. (∑i∈I. X i x) ≥ μ + ε} +
prob {x∈space M. (∑i∈I. X i x) ≤ μ - ε}"
by (intro measure_Un_le) auto
also have "… ≤ exp (-2 * ε⇧2 / (∑i∈I. (b i - a i)⇧2)) + exp (-2 * ε⇧2 / (∑i∈I. (b i - a i)⇧2))"
by (intro add_mono Hoeffding_ineq_ge Hoeffding_ineq_le assms)
finally show ?thesis by simp
qed

end

subsection ‹Hoeffding's inequality for i.i.d. bounded random variables›

text ‹
If we have ‹n› even identically-distributed random variables, the statement of Hoeffding's
lemma simplifies a bit more: it shows that the probability that the average of the $X_i$
is more than ‹ε› above the expected value is no greater than $e^{\frac{-2ny^2}{(b-a)^2}}$.

This essentially gives us a more concrete version of the weak law of large numbers: the law
states that the probability vanishes for ‹n → ∞› for any ‹ε > 0›. Unlike Hoeffding's inequality,
it does not assume the variables to have bounded support, but it does not provide concrete bounds.
›

locale iid_interval_bounded_random_variables = prob_space +
fixes I :: "'b set" and X :: "'b ⇒ 'a ⇒ real" and Y :: "'a ⇒ real"
fixes a b :: real
assumes fin: "finite I"
assumes indep: "indep_vars (λ_. borel) X I"
assumes distr_X: "i ∈ I ⟹ distr M borel (X i) = distr M borel Y"
assumes rv_Y [measurable]: "random_variable borel Y"
assumes AE_in_interval: "AE x in M. Y x ∈ {a..b}"
begin

lemma random_variable [measurable]:
assumes i: "i ∈ I"
shows "random_variable borel (X i)"
using i indep unfolding indep_vars_def by blast

sublocale X: indep_interval_bounded_random_variables M I X "λ_. a" "λ_. b"
proof
fix i assume i: "i ∈ I"
have "AE x in M. Y x ∈ {a..b}"
by (fact AE_in_interval)
also have "?this ⟷ (AE x in distr M borel Y. x ∈ {a..b})"
by (subst AE_distr_iff) auto
also have "distr M borel Y = distr M borel (X i)"
using i by (simp add: distr_X)
also have "(AE x in …. x ∈ {a..b}) ⟷ (AE x in M. X i x ∈ {a..b})"
using i by (subst AE_distr_iff) auto
finally show "AE x in M. X i x ∈ {a..b}" .

lemma expectation_X [simp]:
assumes i: "i ∈ I"
shows "expectation (X i) = expectation Y"
proof -
have "expectation (X i) = lebesgue_integral (distr M borel (X i)) (λx. x)"
using i by (intro integral_distr [symmetric]) auto
also have "distr M borel (X i) = distr M borel Y"
using i by (rule distr_X)
also have "lebesgue_integral … (λx. x) = expectation Y"
by (rule integral_distr) auto
finally show "expectation (X i) = expectation Y" .
qed

end

locale Hoeffding_ineq_iid = iid_interval_bounded_random_variables +
fixes μ :: real
defines "μ ≡ expectation Y"
begin

sublocale X: Hoeffding_ineq M I X "λ_. a" "λ_. b" "real (card I) * μ"

corollary
assumes ε: "ε ≥ 0"
assumes "a < b" "I ≠ {}"
defines "n ≡ card I"
shows   Hoeffding_ineq_ge:
"prob {x∈space M. (∑i∈I. X i x) ≥ n * μ + ε} ≤
exp (-2 * ε⇧2 / (n * (b - a)⇧2))" (is ?le)
and   Hoeffding_ineq_le:
"prob {x∈space M. (∑i∈I. X i x) ≤ n * μ - ε} ≤
exp (-2 * ε⇧2 / (n * (b - a)⇧2))" (is ?ge)
and   Hoeffding_ineq_abs_ge:
"prob {x∈space M. ¦(∑i∈I. X i x) - n * μ¦ ≥ ε} ≤
2 * exp (-2 * ε⇧2 / (n * (b - a)⇧2))" (is ?abs_ge)
proof -
have pos: "(∑i∈I. (b - a)⇧2) > 0"
using ‹a < b› ‹I ≠ {}› fin by (intro sum_pos) auto
show ?le
using X.Hoeffding_ineq_ge[OF ε pos] by (simp add: n_def)
show ?ge
using X.Hoeffding_ineq_le[OF ε pos] by (simp add: n_def)
show ?abs_ge
using X.Hoeffding_ineq_abs_ge[OF ε pos] by (simp add: n_def)
qed

lemma
assumes ε: "ε ≥ 0"
assumes "a < b" "I ≠ {}"
defines "n ≡ card I"
shows   Hoeffding_ineq_ge':
"prob {x∈space M. (∑i∈I. X i x) / n ≥ μ + ε} ≤
exp (-2 * n * ε⇧2 / (b - a)⇧2)" (is ?ge)
and   Hoeffding_ineq_le':
"prob {x∈space M. (∑i∈I. X i x) / n ≤ μ - ε} ≤
exp (-2 * n * ε⇧2 / (b - a)⇧2)" (is ?le)
and   Hoeffding_ineq_abs_ge':
"prob {x∈space M. ¦(∑i∈I. X i x) / n - μ¦ ≥ ε} ≤
2 * exp (-2 * n * ε⇧2 / (b - a)⇧2)" (is ?abs_ge)
proof -
have "n > 0"
using assms fin by (auto simp: field_simps)
have ε': "ε * n ≥ 0"
using ‹n > 0› ‹ε ≥ 0› by auto
have eq: "- (2 * (ε * real n)⇧2 / (real (card I) * (b - a)⇧2)) =
- (2 * real n * ε⇧2 / (b - a)⇧2)"
using ‹n > 0› by (simp add: power2_eq_square divide_simps n_def)

have "{x∈space M. (∑i∈I. X i x) / n ≥ μ + ε} =
{x∈space M. (∑i∈I. X i x) ≥ μ * n + ε * n}"
using ‹n > 0› by (intro Collect_cong conj_cong refl) (auto simp: field_simps)
with Hoeffding_ineq_ge[OF ε' ‹a < b› ‹I ≠ {}›] ‹n > 0› eq show ?ge

have "{x∈space M. (∑i∈I. X i x) / n ≤ μ - ε} =
{x∈space M. (∑i∈I. X i x) ≤ μ * n - ε * n}"
using ‹n > 0› by (intro Collect_cong conj_cong refl) (auto simp: field_simps)
with Hoeffding_ineq_le[OF ε' ‹a < b› ‹I ≠ {}›] ‹n > 0› eq show ?le

have "{x∈space M. ¦(∑i∈I. X i x) / n - μ¦ ≥ ε} =
{x∈space M. ¦(∑i∈I. X i x) - μ * n¦ ≥ ε * n}"
using ‹n > 0› by (intro Collect_cong conj_cong refl) (auto simp: field_simps)
with Hoeffding_ineq_abs_ge[OF ε' ‹a < b› ‹I ≠ {}›] ‹n > 0› eq show ?abs_ge
qed

end

subsection ‹Hoeffding's Inequality for the Binomial distribution›

text ‹
We can now apply Hoeffding's inequality to the Binomial distribution, which can be seen
as the sum of ‹n› i.i.d. coin flips (the support of each of which is contained in $[0,1]$).
›

locale binomial_distribution =
fixes n :: nat and p :: real
assumes p: "p ∈ {0..1}"
begin

context
fixes coins :: "(nat ⇒ bool) pmf" and μ
assumes n: "n > 0"
defines "coins ≡ Pi_pmf {..<n} False (λ_. bernoulli_pmf p)"
begin

lemma coins_component:
assumes i: "i < n"
shows   "distr (measure_pmf coins) borel (λf. if f i then 1 else 0) =
distr (measure_pmf (bernoulli_pmf p)) borel (λb. if b then 1 else 0)"
proof -
have "distr (measure_pmf coins) borel (λf. if f i then 1 else 0) =
distr (measure_pmf (map_pmf (λf. f i) coins)) borel (λb. if b then 1 else 0)"
unfolding map_pmf_rep_eq by (subst distr_distr) (auto simp: o_def)
also have "map_pmf (λf. f i) coins = bernoulli_pmf p"
unfolding coins_def using i by (subst Pi_pmf_component) auto
finally show ?thesis
unfolding map_pmf_rep_eq .
qed

lemma prob_binomial_pmf_conv_coins:
"measure_pmf.prob (binomial_pmf n p) {x. P (real x)} =
measure_pmf.prob coins {x. P (∑i<n. if x i then 1 else 0)}"
proof -
have eq1: "(∑i<n. if x i then 1 else 0) = real (card {i∈{..<n}. x i})" for x
proof -
have "(∑i<n. if x i then 1 else (0::real)) = (∑i∈{i∈{..<n}. x i}. 1)"
by (intro sum.mono_neutral_cong_right) auto
thus ?thesis by simp
qed
have eq2: "binomial_pmf n p = map_pmf (λv. card {i∈{..<n}. v i}) coins"
unfolding coins_def by (rule binomial_pmf_altdef') (use p in auto)
show ?thesis
by (subst eq2) (simp_all add: eq1)
qed

interpretation Hoeffding_ineq_iid
coins "{..<n}" "λi f. if f i then 1 else 0" "λf. if f 0 then 1 else 0" 0 1 p
proof unfold_locales
show "prob_space.indep_vars (measure_pmf coins) (λ_. borel) (λi f. if f i then 1 else 0) {..<n}"
unfolding coins_def
by (intro prob_space.indep_vars_compose2[OF _ indep_vars_Pi_pmf])
(auto simp: measure_pmf.prob_space_axioms)
next
have "measure_pmf.expectation coins (λf. if f 0 then 1 else 0 :: real) =
measure_pmf.expectation (map_pmf (λf. f 0) coins) (λb. if b then 1 else 0 :: real)"
also have "map_pmf (λf. f 0) coins = bernoulli_pmf p"
using n by (simp add: coins_def Pi_pmf_component)
also have "measure_pmf.expectation … (λb. if b then 1 else 0) = p"
using p by simp
finally show "p ≡ measure_pmf.expectation coins (λf. if f 0 then 1 else 0)" by simp
qed (auto simp: coins_component)

corollary
fixes ε :: real
assumes ε: "ε ≥ 0"
shows prob_ge: "measure_pmf.prob (binomial_pmf n p) {x. x ≥ n * p + ε} ≤ exp (-2 * ε⇧2 / n)"
and prob_le: "measure_pmf.prob (binomial_pmf n p) {x. x ≤ n * p - ε} ≤ exp (-2 * ε⇧2 / n)"
and prob_abs_ge:
"measure_pmf.prob (binomial_pmf n p) {x. ¦x - n * p¦ ≥ ε} ≤ 2 * exp (-2 * ε⇧2 / n)"
proof -
have [simp]: "{..<n} ≠ {}"
using n by auto
show "measure_pmf.prob (binomial_pmf n p) {x. x ≥ n * p + ε} ≤ exp (-2 * ε⇧2 / n)"
using Hoeffding_ineq_ge[of ε] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
show "measure_pmf.prob (binomial_pmf n p) {x. x ≤ n * p - ε} ≤ exp (-2 * ε⇧2 / n)"
using Hoeffding_ineq_le[of ε] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
show "measure_pmf.prob (binomial_pmf n p) {x. ¦x - n * p¦ ≥ ε} ≤ 2 *  exp (-2 * ε⇧2 / n)"
using Hoeffding_ineq_abs_ge[of ε]
by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
qed

corollary
fixes ε :: real
assumes ε: "ε ≥ 0"
shows prob_ge': "measure_pmf.prob (binomial_pmf n p) {x. x / n ≥ p + ε} ≤ exp (-2 * n * ε⇧2)"
and prob_le': "measure_pmf.prob (binomial_pmf n p) {x. x / n ≤ p - ε} ≤ exp (-2 * n * ε⇧2)"
and prob_abs_ge':
"measure_pmf.prob (binomial_pmf n p) {x. ¦x / n - p¦ ≥ ε} ≤ 2 * exp (-2 * n * ε⇧2)"
proof -
have [simp]: "{..<n} ≠ {}"
using n by auto
show "measure_pmf.prob (binomial_pmf n p) {x. x / n ≥ p + ε} ≤ exp (-2 * n * ε⇧2)"
using Hoeffding_ineq_ge'[of ε] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
show "measure_pmf.prob (binomial_pmf n p) {x. x / n ≤ p - ε} ≤ exp (-2 * n * ε⇧2)"
using Hoeffding_ineq_le'[of ε] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
show "measure_pmf.prob (binomial_pmf n p) {x. ¦x / n - p¦ ≥ ε} ≤ 2 * exp (-2 * n * ε⇧2)"
using Hoeffding_ineq_abs_ge'[of ε]
by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all)
qed

end

end

subsection ‹Tail bounds for the negative binomial distribution›

text ‹
Since the tail probabilities of a negative Binomial distribution are equal to the
tail probabilities of some Binomial distribution, we can obtain tail bounds for the
negative Binomial distribution through the Hoeffding tail bounds for the Binomial
distribution.
›

context
fixes p q :: real
assumes p: "p ∈ {0<..<1}"
defines "q ≡ 1 - p"
begin

lemma prob_neg_binomial_pmf_ge_bound:
fixes n :: nat and k :: real
defines "μ ≡ real n * q / p"
assumes k: "k ≥ 0"
shows "measure_pmf.prob (neg_binomial_pmf n p) {x. real x ≥ μ + k}
≤ exp (- 2 * p ^ 3 * k⇧2 / (n + p * k))"
proof -
consider "n = 0" | "p = 1" | "n > 0" "p ≠ 1"
by blast
thus ?thesis
proof cases
assume [simp]: "n = 0"
show ?thesis using k
next
assume [simp]: "p = 1"
show ?thesis using k
by (auto simp add: indicator_def μ_def q_def)
next
assume n: "n > 0" and "p ≠ 1"
from ‹p ≠ 1› and p have p: "p ∈ {0<..<1}"
by auto
from p have q: "q ∈ {0<..<1}"
by (auto simp: q_def)

define k1 where "k1 = μ + k"
have k1: "k1 ≥ μ"
using k by (simp add: k1_def)
have "k1 > 0"
by (rule less_le_trans[OF _ k1]) (use p n in ‹auto simp: q_def μ_def›)

define k1' where "k1' = nat (ceiling k1)"
have "μ ≥ 0" using p
by (auto simp: μ_def q_def)
have "¬(x < k1') ⟷ real x ≥ k1" for x
unfolding k1'_def by linarith
hence eq: "UNIV - {..<k1'} = {x. x ≥ k1}"
by auto
hence "measure_pmf.prob (neg_binomial_pmf n p) {n. n ≥ k1} =
1 - measure_pmf.prob (neg_binomial_pmf n p) {..<k1'}"
using measure_pmf.prob_compl[of "{..<k1'}" "neg_binomial_pmf n p"] by simp
also have "measure_pmf.prob (neg_binomial_pmf n p) {..<k1'} =
measure_pmf.prob (binomial_pmf (n + k1' - 1) q) {..<k1'}"
unfolding q_def using p by (intro prob_neg_binomial_pmf_lessThan) auto
also have "1 - … = measure_pmf.prob (binomial_pmf (n + k1' - 1) q) {n. n ≥ k1}"
using measure_pmf.prob_compl[of "{..<k1'}" "binomial_pmf (n + k1' - 1) q"] eq by simp
also have "{x. real x ≥ k1} = {x. x ≥ real (n + k1' - 1) * q + (k1 - real (n + k1' - 1) * q)}"
by simp
also have "measure_pmf.prob (binomial_pmf (n + k1' - 1) q) … ≤
exp (-2 * (k1 - real (n + k1' - 1) * q)⇧2 / real (n + k1' - 1))"
proof (rule binomial_distribution.prob_ge)
show "binomial_distribution q"
by unfold_locales (use q in auto)
next
show "n + k1' - 1 > 0"
using ‹k1 > 0› n unfolding k1'_def by linarith
next
have "real (n + nat ⌈k1⌉ - 1) ≤ real n + k1"
using ‹k1 > 0› by linarith
hence "real (n + k1' - 1) * q  ≤ (real n + k1) * q"
unfolding k1'_def by (intro mult_right_mono) (use p in ‹simp_all add: q_def›)
also have "… ≤ k1"
using k1 p by (simp add: q_def field_simps μ_def)
finally show "0 ≤ k1 - real (n + k1' - 1) * q"
by simp
qed
also have "{x. real (n + k1' - 1) * q + (k1 - real (n + k1' - 1) * q) ≤ real x} = {x. real x ≥ k1}"
by simp
also have "exp (-2 * (k1 - real (n + k1' - 1) * q)⇧2 / real (n + k1' - 1)) ≤
exp (-2 * (k1 - (n + k1) * q)⇧2 / (n + k1))"
proof -
have "real (n + k1' - Suc 0) ≤ real n + k1"
unfolding k1'_def using ‹k1 > 0› by linarith
moreover have "(real n + k1) * q ≤ k1"
using k1 p by (auto simp: q_def field_simps μ_def)
moreover have "1 < n + k1'"
using n ‹k1 > 0› unfolding k1'_def by linarith
ultimately have "2 * (k1 - real (n + k1' - 1) * q)⇧2 / real (n + k1' - 1) ≥
2 * (k1 - (n + k1) * q)⇧2 / (n + k1)"
by (intro frac_le mult_left_mono power_mono mult_nonneg_nonneg mult_right_mono diff_mono)
(use q in simp_all)
thus ?thesis
by simp
qed
also have "… = exp (-2 * (p * k1 - q * n)⇧2 / (k1 + n))"
also have "-2 * (p * k1 - q * n)⇧2 = -2 * p⇧2 * (k1 - μ)⇧2"
using p by (auto simp: field_simps μ_def)
also have "k1 - μ = k"
also note k1_def
also have "μ + k + real n = real n / p + k"
using p by (simp add: μ_def q_def field_simps)
also have "- 2 * p⇧2 * k⇧2 / (real n / p + k) = - 2 * p ^ 3 * k⇧2 / (p * k + n)"
using p by (simp add: field_simps power3_eq_cube power2_eq_square)
qed
qed

lemma prob_neg_binomial_pmf_le_bound:
fixes n :: nat and k :: real
defines "μ ≡ real n * q / p"
assumes k: "k ≥ 0"
shows "measure_pmf.prob (neg_binomial_pmf n p) {x. real x ≤ μ - k}
≤ exp (-2 * p ^ 3 * k⇧2 / (n - p * k))"
proof -
consider "n = 0" | "p = 1" | "k > μ" | "n > 0" "p ≠ 1" "k ≤ μ"
by force
thus ?thesis
proof cases
assume [simp]: "n = 0"
show ?thesis using k
next
assume [simp]: "p = 1"
show ?thesis using k
by (auto simp add: indicator_def μ_def q_def)
next
assume "k > μ"
hence "{x. real x ≤ μ - k} = {}"
by auto
thus ?thesis by simp
next
assume n: "n > 0" and "p ≠ 1" and "k ≤ μ"
from ‹p ≠ 1› and p have p: "p ∈ {0<..<1}"
by auto
from p have q: "q ∈ {0<..<1}"
by (auto simp: q_def)

define f :: "real ⇒ real" where "f = (λx. (p * x - q * n)⇧2 / (x + n))"
have f_mono: "f x ≥ f y" if "x ≥ 0" "y ≤ n * q / p" "x ≤ y" for x y :: real
using that(3)
proof (rule DERIV_nonpos_imp_nonincreasing)
fix t assume t: "t ≥ x" "t ≤ y"
have "x > -n"
using n ‹x ≥ 0› by linarith
hence "(f has_field_derivative ((p * t - q * n) * (n * (1 + p) + p * t) / (n + t) ^ 2)) (at t)"
unfolding f_def using t
by (auto intro!: derivative_eq_intros simp: algebra_simps q_def power2_eq_square)
moreover {
have "p * t ≤ p * y"
using p by (intro mult_left_mono t) auto
also have "p * y ≤ q * n"
using ‹y ≤ n * q / p› p by (simp add: field_simps)
finally have "p * t ≤ q * n" .
}
hence "(p * t - q * n) * (n * (1 + p) + p * t) / (n + t) ^ 2 ≤ 0"
using p ‹x ≥ 0› t
by (intro mult_nonpos_nonneg divide_nonpos_nonneg add_nonneg_nonneg mult_nonneg_nonneg) auto
ultimately show "∃y. (f has_real_derivative y) (at t) ∧ y ≤ 0"
by blast
qed

define k1 where "k1 = μ - k"
have k1: "k1 ≤ real n * q / p"
using assms by (simp add: μ_def k1_def)
have "k1 ≥ 0"
using k ‹k ≤ μ› by (simp add: μ_def k1_def)

define k1' where "k1' = nat (floor k1)"
have "μ ≥ 0" using p
by (auto simp: μ_def q_def)
have "(x ≤ k1') ⟷ real x ≤ k1" for x
unfolding k1'_def not_less using ‹k1 ≥ 0› by linarith
hence eq: "{n. n ≤ k1}  = {..k1'}"
by auto
hence "measure_pmf.prob (neg_binomial_pmf n p) {n. n ≤ k1} =
measure_pmf.prob (neg_binomial_pmf n p) {..k1'}"
by simp
also have "measure_pmf.prob (neg_binomial_pmf n p) {..k1'} =
measure_pmf.prob (binomial_pmf (n + k1') q) {..k1'}"
unfolding q_def using p by (intro prob_neg_binomial_pmf_atMost) auto
also note eq [symmetric]
also have "{x. real x ≤ k1} = {x. x ≤ real (n + k1') * q - (real (n + k1') * q - real k1')}"
using eq by auto
also have "measure_pmf.prob (binomial_pmf (n + k1') q) … ≤
exp (-2 * (real (n + k1') * q - real k1')⇧2 / real (n + k1'))"
proof (rule binomial_distribution.prob_le)
show "binomial_distribution q"
by unfold_locales (use q in auto)
next
show "n + k1' > 0"
using ‹k1 ≥ 0› n unfolding k1'_def by linarith
next
have "p * k1' ≤ p * k1"
using p ‹k1 ≥ 0› by (intro mult_left_mono) (auto simp: k1'_def)
also have "… ≤ q * n"
using k1 p by (simp add: field_simps)
finally show "0 ≤ real (n + k1') * q - real k1'"
qed
also have "{x. real x ≤ real (n + k1') * q - (real (n + k1') * q - k1')} = {..k1'}"
by auto
also have "real (n + k1') * q - k1' = -(p * k1' - q * n)"
also have "… ^ 2 = (p * k1' - q * n) ^ 2"
by algebra
also have "- 2 * (p * real k1' - q * real n)⇧2 / real (n + k1') = -2 * f (real k1')"
also have "f (real k1') ≥ f k1"
by (rule f_mono) (use ‹k1 ≥ 0› k1 in ‹auto simp: k1'_def›)
hence "exp (-2 * f (real k1')) ≤ exp (-2 * f k1)"
by simp
also have "… = exp (-2 * (p * k1 - q * n)⇧2 / (k1 + n))"

also have "-2 * (p * k1 - q * n)⇧2 = -2 * p⇧2 * (k1 - μ)⇧2"
using p by (auto simp: field_simps μ_def)
also have "(k1 - μ) ^ 2 = k ^ 2"
also note k1_def
also have "μ - k + real n = real n / p - k"
using p by (simp add: μ_def q_def field_simps)
also have "- 2 * p⇧2 * k⇧2 / (real n / p - k) = - 2 * p ^ 3 * k⇧2 / (n - p * k)"
using p by (simp add: field_simps power3_eq_cube power2_eq_square)
also have "{..k1'} = {x. real x ≤ μ - k}"
using eq by (simp add: k1_def)
finally show ?thesis .
qed
qed

text ‹
Due to the function $exp(-l/x)$ being concave for $x \geq \frac{l}{2}$, the above two
bounds can be combined into the following one for moderate values of ‹k›.
(cf. 🌐‹https://math.stackexchange.com/questions/1565559›)
›
lemma prob_neg_binomial_pmf_abs_ge_bound:
fixes n :: nat and k :: real
defines "μ ≡ real n * q / p"
assumes "k ≥ 0" and n_ge: "n ≥ p * k * (p⇧2 * k + 1)"
shows "measure_pmf.prob (neg_binomial_pmf n p) {x. ¦real x - μ¦ ≥ k} ≤
2 * exp (-2 * p ^ 3 * k ^ 2 / n)"
proof (cases "k = 0")
case False
with ‹k ≥ 0› have k: "k > 0"
by auto
define l :: real where "l = 2 * p ^ 3 * k ^ 2"
have l: "l > 0"
using p k by (auto simp: l_def)
define f :: "real ⇒ real" where "f = (λx. exp (-l / x))"
define f' where "f' = (λx. -l * exp (-l / x) / x ^ 2)"

have f'_mono: "f' x ≤ f' y" if "x ≥ l / 2" "x ≤ y" for x y :: real
using that(2)
proof (rule DERIV_nonneg_imp_nondecreasing)
fix t assume t: "x ≤ t" "t ≤ y"
have "t > 0"
using that l t by auto
have "(f' has_field_derivative (l * (2 * t - l) / (exp (l / t) * t ^ 4))) (at t)"
unfolding f'_def using t that ‹t > 0›
by (auto intro!: derivative_eq_intros simp: field_simps exp_minus simp flip: power_Suc)
moreover have "l * (2 * t - l) / (exp (l / t) * t ^ 4) ≥ 0"
using that t l by (intro divide_nonneg_pos mult_nonneg_nonneg) auto
ultimately show "∃y. (f' has_real_derivative y) (at t) ∧ 0 ≤ y" by blast
qed

have convex: "convex_on {l/2..} (λx. -f x)" unfolding f_def
proof (intro convex_on_realI[where f' = f'])
show "((λx. - exp (- l / x)) has_real_derivative f' x) (at x)" if "x ∈ {l/2..}" for x
using that l
by (auto intro!: derivative_eq_intros simp: f'_def power2_eq_square algebra_simps)
qed (use l in ‹auto intro!: f'_mono›)

have eq: "{x. ¦real x - μ¦ ≥ k} = {x. real x ≤ μ - k} ∪ {x. real x ≥ μ + k}"
by auto
have "measure_pmf.prob (neg_binomial_pmf n p) {x. ¦real x - μ¦ ≥ k} ≤
measure_pmf.prob (neg_binomial_pmf n p) {x. real x ≤ μ - k} +
measure_pmf.prob (neg_binomial_pmf n p) {x. real x ≥ μ + k}"
by (subst eq, rule measure_Un_le) auto
also have "… ≤ exp (-2 * p ^ 3 * k⇧2 / (n - p * k)) + exp (-2 * p ^ 3 * k⇧2 / (n + p * k))"
unfolding μ_def
by (intro prob_neg_binomial_pmf_le_bound prob_neg_binomial_pmf_ge_bound add_mono ‹k ≥ 0›)
also have "… = 2 * (1/2 * f (n - p * k) + 1/2 * f (n + p * k))"
also have "1/2 * f (n - p * k) + 1/2 * f (n + p * k) ≤ f (1/2 * (n - p * k) + 1/2 * (n + p * k))"
proof -
let ?x = "n - p * k" and ?y = "n + p * k"
have le1: "l / 2 ≤ ?x" using n_ge
by (simp add: l_def power2_eq_square power3_eq_cube algebra_simps)
also have "… ≤ ?y"
using p k by simp
finally have le2: "l / 2 ≤ ?y" .
have "-f ((1 - 1 / 2) *⇩R ?x + (1 / 2) *⇩R ?y) ≤ (1 - 1 / 2) * - f ?x + 1 / 2 * - f ?y"
using le1 le2 by (intro convex_onD[OF convex]) auto
thus ?thesis by simp
qed
also have "1/2 * (n - p * k) + 1/2 * (n + p * k) = n"