Theory Bennett_Inequality
section ‹Bennett's Inequality›
text ‹In this section we verify Bennett's inequality~\cite{bennett1962} and a (weak) version of
Bernstein's inequality as a corollary. Both inequalities give concentration bounds for sums of
independent random variables. The statement and proofs follow a summary paper by
Boucheron et al.~\cite{DBLP:conf/ac/BoucheronLB03}.›
theory Bennett_Inequality
imports Concentration_Inequalities_Preliminary
begin
context prob_space
begin
lemma indep_vars_Chernoff_ineq_ge:
assumes I: "finite I"
assumes ind: "indep_vars (λ _. borel) X I"
assumes sge: "s ≥ 0"
assumes int: "⋀i. i ∈ I ⟹ integrable M (λx. exp (s * X i x))"
shows "prob {x ∈ space M. (∑i ∈I. X i x - expectation (X i)) ≥ t} ≤
exp (-s*t) *
(∏i∈I. expectation (λx. exp(s * (X i x - expectation (X i)))))"
proof (cases "s = 0")
case [simp]: True
thus ?thesis
by (simp add: prob_space)
next
case False
then have s: "s > 0" using sge by auto
have [measurable]: "⋀i. i ∈ I ⟹ random_variable borel (X i)"
using ind unfolding indep_vars_def by blast
have indep1: "indep_vars (λ_. borel)
(λi ω. exp (s * (X i ω - expectation (X i)))) I"
apply (intro indep_vars_compose[OF ind, unfolded o_def])
by auto
define S where "S = (λx. (∑i ∈I. X i x - expectation (X i)))"
have int1: "⋀i. i ∈ I ⟹
integrable M (λω. exp (s * (X i ω - expectation (X i))))"
by (auto simp add: algebra_simps exp_diff int)
have eprod: "⋀x. exp (s * S x) = (∏i∈I. exp(s * (X i x - expectation (X i))))"
unfolding S_def
by (simp add: assms(1) exp_sum vector_space_over_itself.scale_sum_right)
from indep_vars_integrable[OF I indep1 int1]
have intS: "integrable M (λx. exp (s * S x))"
unfolding eprod by auto
then have si: "set_integrable M (space M) (λx. exp (s * S x))"
unfolding set_integrable_def
apply (intro integrable_mult_indicator)
by auto
from Chernoff_ineq_ge[OF s si]
have "prob {x ∈ space M. S x ≥ t} ≤ exp (- s * t) * (∫x∈space M. exp (s * S x)∂M)"
by auto
also have "(∫x∈space M. exp (s * S x)∂M) = expectation (λx. exp(s * S x))"
unfolding set_integral_space[OF intS] by auto
also have "... = expectation (λx. ∏i∈I. exp(s * (X i x - expectation (X i))))"
unfolding S_def
by (simp add: assms(1) exp_sum vector_space_over_itself.scale_sum_right)
also have "... = (∏i∈I. expectation (λx. exp(s * (X i x - expectation (X i)))))"
apply (intro indep_vars_lebesgue_integral[OF I indep1 int1]) .
finally show ?thesis
unfolding S_def
by auto
qed
definition bennett_h::"real ⇒ real"
where "bennett_h u = (1 + u) * ln (1 + u) - u"
lemma exp_sub_two_terms_eq:
fixes x :: real
shows "exp x - x - 1 = (∑n. x^(n+2) / fact (n+2))"
"summable (λn. x^(n+2) / fact (n+2))"
proof -
have "(∑i<2. inverse (fact i) * x ^ i) = 1 + x"
by (simp add:numeral_eq_Suc)
thus "exp x - x - 1 = (∑n. x^(n+2) / fact (n+2))"
unfolding exp_def
apply (subst suminf_split_initial_segment[where k = 2])
by (auto simp add: summable_exp divide_inverse_commute)
have "summable (λn. x^n / fact n)"
by (simp add: divide_inverse_commute summable_exp)
then have "summable (λn. x^(Suc (Suc n)) / fact (Suc (Suc n)))"
apply (subst summable_Suc_iff)
apply (subst summable_Suc_iff)
by auto
thus "summable (λn. x^(n+2) / fact (n+2))" by auto
qed
lemma psi_mono:
defines "f ≡ (λx. (exp x - x - 1) - x^2 / 2)"
assumes xy: "a ≤ (b::real)"
shows "f a ≤ f b"
proof -
have 1: "(f has_real_derivative (exp x - x - 1)) (at x)" for x
unfolding f_def
by (auto intro!: derivative_eq_intros)
have 2: "⋀x. x ∈ {a..b} ⟹ 0 ≤ exp x - x - 1"
by (smt (verit) exp_ge_add_one_self)
from deriv_nonneg_imp_mono[OF 1 2 xy]
show ?thesis by auto
qed
lemma psi_inequality:
assumes le: "x ≤ (y::real)" "y ≥ 0"
shows "y^2 * (exp x - x - 1) ≤ x^2 * (exp y - y - 1)"
proof -
have x: "exp x - x - 1 = (∑n. (x^(n+2) / fact (n+2)))"
"summable (λn. x^(n+2) / fact (n+2))"
using exp_sub_two_terms_eq .
have y: "exp y - y - 1 = (∑n. (y^(n+2) / fact (n+2)))"
"summable (λn. y^(n+2) / fact (n+2))"
using exp_sub_two_terms_eq .
have l:"y^2 * (exp x - x - 1) = (∑n. y^2 * (x^(n+2) / fact (n+2)))"
using x
apply (subst suminf_mult)
by auto
have ls: "summable (λn. y^2 * (x^(n+2) / fact (n+2)))"
by (intro summable_mult[OF x(2)])
have r:"x^2 * (exp y - y - 1) = (∑n. x^2 * (y^(n+2) / fact (n+2)))"
using y
apply (subst suminf_mult)
by auto
have rs: "summable (λn. x^2 * (y^(n+2) / fact (n+2)))"
by (intro summable_mult[OF y(2)])
have "¦x¦ ≤ ¦y¦ ∨ ¦y¦ < ¦x¦" by auto
moreover {
assume "¦x¦ ≤ ¦y¦"
then have "x^ n ≤ y ^n" for n
by (smt (verit, ccfv_threshold) bot_nat_0.not_eq_extremum le power_0 real_root_less_mono real_root_power_cancel root_abs_power)
then have "(x^2 * y^2) * x^n ≤ (x^2 * y^2) * y^n" for n
by (simp add: mult_left_mono)
then have "y⇧2 * (x ^ (n + 2)) ≤ x⇧2 * (y ^ (n + 2))" for n
by (metis (full_types) ab_semigroup_mult_class.mult_ac(1) mult.commute power_add)
then have "y⇧2 * (x ^ (n + 2)) / fact (n+2)≤ x⇧2 * (y ^ (n + 2)) / fact (n+2)" for n
by (meson divide_right_mono fact_ge_zero)
then have "(∑n. y^2 * (x^(n+2) / fact (n+2))) ≤ (∑n. x^2 * (y^(n+2) / fact (n+2)))"
apply (intro suminf_le[OF _ ls rs])
by auto
then have "y^2 * (exp x - x - 1) ≤ x^2 * (exp y - y - 1)"
using l r by presburger
}
moreover {
assume ineq: "¦y¦ < ¦x¦"
from psi_mono[OF assms(1)]
have "(exp x - x - 1) - x^2 /2 ≤ (exp y - y - 1) - y^2/2" .
then have "y^2 * ((exp x - x - 1) - x^2 /2) ≤ x^2 * ((exp y - y - 1) - y^2/2)"
by (smt (verit, best) ineq diff_divide_distrib exp_lower_Taylor_quadratic le(1) le(2) mult_nonneg_nonneg one_less_exp_iff power_zero_numeral prob_space.psi_mono prob_space_completion right_diff_distrib zero_le_power2)
then have "y^2 * (exp x - x - 1) ≤ x^2 * (exp y - y - 1)"
by (simp add: mult.commute right_diff_distrib)
}
ultimately show ?thesis by auto
qed
lemma bennett_inequality_1:
assumes I: "finite I"
assumes ind: "indep_vars (λ _. borel) X I"
assumes intsq: "⋀i. i ∈ I ⟹ integrable M (λx. (X i x)^2)"
assumes bnd: "⋀i. i ∈ I ⟹ AE x in M. X i x ≤ 1"
assumes t: "t ≥ 0"
defines "V ≡ (∑i ∈ I. expectation(λx. X i x^2))"
shows "prob {x ∈ space M. (∑i ∈ I. X i x - expectation (X i)) ≥ t} ≤
exp (-V * bennett_h (t / V))"
proof (cases "V = 0")
case True
then show ?thesis
by auto
next
case f: False
have "V ≥ 0"
unfolding V_def
apply (intro sum_nonneg integral_nonneg_AE)
by auto
then have Vpos: "V > 0" using f by auto
define l :: real where "l = ln(1 + t / V)"
then have l: "l ≥ 0"
using t Vpos by auto
have rv[measurable]: "⋀i. i ∈ I ⟹ random_variable borel (X i)"
using ind unfolding indep_vars_def by blast
define ψ where "ψ = (λx::real. exp(x) - x - 1)"
have rw: "exp y = 1 + y + ψ y" for y
unfolding ψ_def by auto
have ebnd: "⋀i. i ∈ I ⟹
AE x in M. exp (l * X i x) ≤ exp l"
apply (drule bnd)
using l by (auto simp add: mult_left_le)
have int: "⋀i. i ∈ I ⟹ integrable M (λx. (X i x))"
using rv intsq square_integrable_imp_integrable by blast
have intl: "⋀i. i ∈ I ⟹ integrable M (λx. (l * X i x))"
using int by blast
have intexpl: "⋀i. i ∈ I ⟹ integrable M (λx. exp (l * X i x))"
apply (intro integrable_const_bound[where B = "exp l"])
using ebnd by auto
have intpsi: "⋀i. i ∈ I ⟹ integrable M (λx. ψ (l * X i x))"
unfolding ψ_def
using intl intexpl by auto
have **: "⋀i. i ∈ I ⟹
expectation (λx. ψ (l * X i x)) ≤ ψ l * expectation (λx. (X i x)^2)"
proof -
fix i assume i: "i ∈ I"
then have "AE x in M. l * X i x ≤ l"
using ebnd by auto
then have "AE x in M. l^2 * ψ (l * X i x) ≤ (l * X i x)^2 * ψ l"
using psi_inequality[OF _ l] unfolding ψ_def
by auto
then have "AE x in M. l^2 * ψ (l * X i x) ≤ l^2 * (ψ l * (X i x)^2)"
by (auto simp add: field_simps)
then have "AE x in M. ψ (l * X i x) ≤ ψ l * (X i x)^2 "
by (smt (verit, best) AE_cong ψ_def exp_eq_one_iff mult_cancel_left mult_eq_0_iff mult_left_mono zero_eq_power2 zero_le_power2)
then have "AE x in M. 0 ≤ ψ l * (X i x)^2 - ψ (l * X i x) "
by auto
then have "expectation (λx. ψ l * (X i x)^2 + (- ψ (l * X i x))) ≥ 0"
by (simp add: integral_nonneg_AE)
also have "expectation (λx. ψ l * (X i x)^2 + (- ψ (l * X i x))) =
ψ l * expectation (λx. (X i x)^2) - expectation (λx. ψ (l * X i x))"
apply (subst Bochner_Integration.integral_add)
using intpsi[OF i] intsq[OF i] by auto
finally show "expectation (λx. ψ (l * X i x)) ≤ ψ l * expectation (λx. (X i x)^2)"
by auto
qed
then have *: "⋀i. i ∈ I ⟹
expectation (λx. exp (l * X i x)) ≤
exp (l * expectation (X i)) * exp (ψ l * expectation (λx. X i x^2))"
proof -
fix i
assume iI: "i ∈ I"
have "expectation (λx. exp (l * X i x)) =
1 + l * expectation (λx. X i x) +
expectation (λx. ψ (l * X i x))"
unfolding rw
apply (subst Bochner_Integration.integral_add)
using iI intl intpsi apply auto[2]
apply (subst Bochner_Integration.integral_add)
using intl iI prob_space by auto
also have "... = l * expectation (X i) + 1 + expectation (λx. ψ (l * X i x))"
by auto
also have "... ≤ 1 + l * expectation (X i) + ψ l * expectation (λx. X i x^2)"
using **[OF iI] by auto
also have "... ≤ exp (l * expectation (X i)) * exp (ψ l * expectation (λx. X i x^2))"
by (simp add: is_num_normalize(1) mult_exp_exp)
finally show "expectation (λx. exp (l * X i x)) ≤
exp (l * expectation (X i)) * exp (ψ l * expectation (λx. X i x^2))" .
qed
have "(∏i∈I. expectation (λx. exp (l * (X i x)))) ≤
(∏i∈I. exp (l * expectation (X i)) * exp (ψ l * expectation (λx. X i x^2)))"
by (auto intro!: prod_mono simp add: *)
also have "... =
(∏i∈I. exp (l * expectation (X i))) * (∏i∈I. exp (ψ l * expectation (λx. X i x^2)))"
by (auto simp add: prod.distrib)
finally have **:
"(∏i∈I. expectation (λx. exp (l * (X i x)))) ≤
(∏i∈I. exp (l * expectation (X i))) * exp (ψ l * V)"
by (simp add: V_def I exp_sum sum_distrib_left)
from indep_vars_Chernoff_ineq_ge[OF I ind l intexpl]
have "prob {x ∈ space M. (∑i ∈ I. X i x - expectation (X i)) ≥ t} ≤
exp (- l * t) *
(∏i∈I. expectation (λx. exp (l * (X i x - expectation (X i)))))"
by auto
also have "(∏i∈I. expectation (λx. exp (l * (X i x - expectation (X i))))) =
(∏i∈I. expectation (λx. exp (l * (X i x))) * exp (- l * expectation (X i)))"
by (auto intro!: prod.cong simp add: field_simps exp_diff exp_minus_inverse)
also have "... =
(∏i∈I. exp (- l * expectation (X i))) * (∏i∈I. expectation (λx. exp (l * (X i x))))"
by (auto simp add: prod.distrib)
also have "... ≤
(∏i∈I. exp (- l * expectation (X i))) * ((∏i∈I. exp (l * expectation (X i))) * exp (ψ l * V))"
apply (intro mult_left_mono[OF **])
by (meson exp_ge_zero prod_nonneg)
also have "... = exp (ψ l * V)"
apply (simp add: prod.distrib [symmetric])
by (smt (verit, ccfv_threshold) exp_minus_inverse prod.not_neutral_contains_not_neutral)
finally have "
prob {x ∈ space M. (∑i ∈ I. X i x - expectation (X i)) ≥ t} ≤
exp (ψ l * V - l * t)"
by (simp add:mult_exp_exp)
also have "ψ l * V - l * t = -V * bennett_h (t / V)"
unfolding ψ_def l_def bennett_h_def
apply (subst exp_ln)
subgoal by (smt (verit) Vpos divide_nonneg_nonneg t)
by (auto simp add: algebra_simps)
finally show ?thesis .
qed
lemma real_AE_le_sum:
assumes "⋀i. i ∈ I ⟹ AE x in M. f i x ≤ (g i x::real)"
shows "AE x in M. (∑i∈I. f i x) ≤ (∑i∈I. g i x)"
proof (cases)
assume "finite I"
with AE_finite_allI[OF this assms] have 0:"AE x in M. (∀i∈I. f i x ≤ g i x)" by auto
show ?thesis by (intro eventually_mono[OF 0] sum_mono) auto
qed simp
lemma real_AE_eq_sum:
assumes "⋀i. i ∈ I ⟹ AE x in M. f i x = (g i x::real)"
shows "AE x in M. (∑i∈I. f i x) = (∑i∈I. g i x)"
proof -
have 1: "AE x in M. (∑i∈I. f i x) ≤ (∑i∈I. g i x)"
apply (intro real_AE_le_sum)
apply (drule assms)
by auto
have 2: "AE x in M. (∑i∈I. g i x) ≤ (∑i∈I. f i x)"
apply (intro real_AE_le_sum)
apply (drule assms)
by auto
show ?thesis
using 1 2
by auto
qed
theorem bennett_inequality:
assumes I: "finite I"
assumes ind: "indep_vars (λ _. borel) X I"
assumes intsq: "⋀i. i ∈ I ⟹ integrable M (λx. (X i x)^2)"
assumes bnd: "⋀i. i ∈ I ⟹ AE x in M. X i x ≤ B"
assumes t: "t ≥ 0"
assumes B: "B > 0"
defines "V ≡ (∑i ∈ I. expectation (λx. X i x^2))"
shows "prob {x ∈ space M. (∑i ∈ I. X i x - expectation (X i)) ≥ t} ≤
exp (- V / B^2 * bennett_h (t * B / V))"
proof -
define Y where "Y = (λi x. X i x / B)"
from indep_vars_compose[OF ind, where Y = "λi x. x/ B"]
have 1: "indep_vars (λ_. borel) Y I"
unfolding Y_def by (auto simp add: o_def)
have 2: "⋀i. i ∈ I ⟹ integrable M (λx. (Y i x)⇧2)"
unfolding Y_def apply (drule intsq)
by (auto simp add: field_simps)
have 3: "⋀i. i ∈ I ⟹ AE x in M. Y i x ≤ 1"
unfolding Y_def apply (drule bnd)
using B by auto
have 4:"0 ≤ t / B" using t B by auto
have rw1: "(∑i∈I. Y i x - expectation (Y i)) =
(∑i∈I. X i x - expectation (X i)) / B" for x
unfolding Y_def
by (auto simp: diff_divide_distrib sum_divide_distrib)
have rw2: "expectation (λx. (Y i x)⇧2) =
expectation (λx. (X i x)⇧2) / B^2" for i
unfolding Y_def
by (simp add: power_divide)
have rw3:"- (∑i∈I. expectation (λx. (X i x)⇧2) / B^2) = - V / B^2"
unfolding V_def
by (auto simp add: sum_divide_distrib)
have "t / B / (∑i∈I. expectation (λx. (X i x)⇧2) / B^2) =
t / B / (V / B^2)"
unfolding V_def
by (auto simp add: sum_divide_distrib)
then have rw4: "t / B / (∑i∈I. expectation (λx. (X i x)⇧2) / B^2) =
t * B / V"
by (simp add: power2_eq_square)
have "prob {x ∈ space M. t ≤ (∑i∈I. X i x - expectation (X i))} =
prob{x ∈ space M. t / B ≤ (∑i∈I. X i x - expectation (X i)) / B}"
by (smt (verit, best) B Collect_cong divide_cancel_right divide_right_mono)
also have "... ≤
exp (- V / B⇧2 *
bennett_h (t * B / V))"
using bennett_inequality_1[OF I 1 2 3 4]
unfolding rw1 rw2 rw3 rw4 .
finally show ?thesis .
qed
lemma bennett_h_bernstein_bound:
assumes "x ≥ 0"
shows "bennett_h x ≥ x^2 / (2 * (1 + x / 3))"
proof -
have eq:"x^2 / (2 * (1 + x / 3)) = 3/2 * x - 9/2 * (x / (x+3))"
using assms
by (sos "(() & ())")
define g where "g = (λx. bennett_h x - (3/2 * x - 9/2 * (x / (x+3))))"
define g' where "g' = (λx::real.
ln(1 + x) + 27 / (2 * (x+3)^2) - 3 / 2)"
define g'' where "g'' = (λx::real.
1 / (1 + x) - 27 / (x+3)^3)"
have "54 / ((2 * x + 6)^2) = 27 / (2 * (x + 3)⇧2)" (is "?L = ?R") for x :: real
proof -
have "?L = 54 / (2^2 * (x + 3)^2)"
unfolding power_mult_distrib[symmetric] by (simp add:algebra_simps)
also have "... = ?R" by simp
finally show ?thesis by simp
qed
hence 1: "x ≥ 0 ⟹ (g has_real_derivative (g' x)) (at x)" for x
unfolding g_def g'_def bennett_h_def by (auto intro!: derivative_eq_intros simp:power2_eq_square)
have 2: "x ≥ 0 ⟹ (g' has_real_derivative (g'' x)) (at x)" for x
unfolding g'_def g''_def
apply (auto intro!: derivative_eq_intros)[1]
by (sos "(() & ())")
have gz: "g 0 = 0"
unfolding g_def bennett_h_def by auto
have g1z: "g' 0 = 0"
unfolding g'_def by auto
have p2: "g'' x ≥ 0" if "x ≥ 0" for x
proof -
have "27 * (1+x) ≤ (x+3)^3"
using that unfolding power3_eq_cube by (auto simp:algebra_simps)
hence " 27 / (x + 3) ^ 3 ≤ 1 / (1+x)"
using that by (subst frac_le_eq) (auto intro!:divide_nonpos_pos)
thus ?thesis unfolding g''_def by simp
qed
from deriv_nonneg_imp_mono[OF 2 p2 _]
have "x ≥ 0 ⟹ g' x ≥ 0" for x using g1z
by (metis atLeastAtMost_iff)
from deriv_nonneg_imp_mono[OF 1 this _]
have "x ≥ 0 ⟹ g x ≥ 0" for x using gz
by (metis atLeastAtMost_iff)
thus ?thesis
using assms eq g_def by force
qed
lemma sum_sq_exp_eq_zero_imp_zero:
assumes "finite I" "i ∈ I"
assumes intsq: "integrable M (λx. (X i x)^2)"
assumes "(∑i ∈ I. expectation (λx. X i x^2)) = 0"
shows "AE x in M. X i x = (0::real)"
proof -
have "(∀i ∈I. expectation (λx. X i x^2) = 0)"
using assms
apply (subst sum_nonneg_eq_0_iff[symmetric])
by auto
then have "expectation (λx. X i x^2) = 0"
using assms(2) by blast
thus ?thesis
using integral_nonneg_eq_0_iff_AE[OF intsq]
by auto
qed
corollary bernstein_inequality:
assumes I: "finite I"
assumes ind: "indep_vars (λ _. borel) X I"
assumes intsq: "⋀i. i ∈ I ⟹ integrable M (λx. (X i x)^2)"
assumes bnd: "⋀i. i ∈ I ⟹ AE x in M. X i x ≤ B"
assumes t: "t ≥ 0"
assumes B: "B > 0"
defines "V ≡ (∑i ∈ I. expectation (λx. X i x^2))"
shows "prob {x ∈ space M. (∑i ∈ I. X i x - expectation (X i)) ≥ t} ≤
exp (- (t^2 / (2 * (V + t * B / 3))))"
proof (cases "V = 0")
case True
then have 1:"⋀i. i ∈ I ⟹ AE x in M. X i x = 0"
unfolding V_def
using sum_sq_exp_eq_zero_imp_zero
by (metis I intsq)
then have 2:"⋀i. i ∈ I ⟹ expectation (X i) = 0"
using integral_eq_zero_AE by blast
have "AE x in M. (∑i ∈ I. X i x - expectation (X i)) = (∑i ∈ I. 0)"
apply (intro real_AE_eq_sum)
using 1 2
by auto
then have *: "AE x in M. (∑i ∈ I. X i x - expectation (X i)) = 0"
by force
moreover {
assume "t > 0"
then have "prob {x ∈ space M. (∑i ∈ I. X i x - expectation (X i)) ≥ t} = 0"
apply (intro prob_eq_0_AE)
using * by auto
then have ?thesis by auto
}
ultimately show ?thesis
apply (cases "t = 0") using t by auto
next
case f: False
have "V ≥ 0"
unfolding V_def
apply (intro sum_nonneg integral_nonneg_AE)
by auto
then have V: "V > 0" using f by auto
have "t * B / V ≥ 0" using t B V by auto
from bennett_h_bernstein_bound[OF this]
have "(t * B / V)⇧2 / (2 * (1 + t * B / V / 3))
≤ bennett_h (t * B / V)" .
then have "(- V / B^2) * bennett_h (t * B / V) ≤
(- V / B^2) * ((t * B / V)⇧2 / (2 * (1 + t * B / V / 3)))"
apply (subst mult_left_mono_neg)
using B V by auto
also have "... =
((- V / B^2) * (t * B / V)⇧2) / (2 * (1 + t * B / V / 3))"
by auto
also have " ((- V / B^2) * (t * B / V)⇧2) = -(t^2) / V"
using V B by (auto simp add: field_simps power2_eq_square)
finally have *: "(- V / B^2) * bennett_h (t * B / V) ≤
-(t^2) / (2 * (V + t * B / 3))"
using V by (auto simp add: field_simps)
from bennett_inequality[OF assms(1-6)]
have "prob {x ∈ space M. (∑i ∈ I. X i x - expectation (X i)) ≥ t} ≤
exp (- V / B^2 * bennett_h (t * B / V))"
using V_def by auto
also have "... ≤ exp (- (t^2/ (2 * (V + t * B / 3))))"
using *
by auto
finally show ?thesis .
qed
end
end