(* File: Hoeffding.thy Author: Manuel Eberl, TU München *) section ‹Hoeffding's Lemma and Hoeffding's Inequality› theory Hoeffding imports Product_PMF Independent_Family begin text ‹ Hoeffding's inequality shows that a sum of bounded independent random variables is concentrated around its mean, with an exponential decay of the tail probabilities. › subsection ‹Hoeffding's Lemma› lemma convex_on_exp: fixes l :: real assumes "l ≥ 0" shows "convex_on UNIV (λx. exp(l*x))" using assms by (intro convex_on_realI[where f' = "λx. l * exp (l * x)"]) (auto intro!: derivative_eq_intros mult_left_mono) lemma mult_const_minus_self_real_le: fixes x :: real shows "x * (c - x) ≤ c⇧^{2}/ 4" proof - have "x * (c - x) = -(x - c / 2)⇧^{2}+ c⇧^{2}/ 4" by (simp add: field_simps power2_eq_square) also have "… ≤ 0 + c⇧^{2}/ 4" by (intro add_mono) auto finally show ?thesis by simp qed lemma Hoeffdings_lemma_aux: fixes h p :: real assumes "h ≥ 0" and "p ≥ 0" defines "L ≡ (λh. -h * p + ln (1 + p * (exp h - 1)))" shows "L h ≤ h⇧^{2}/ 8" proof (cases "h = 0") case False hence h: "h > 0" using ‹h ≥ 0› by simp define L' where "L' = (λh. -p + p * exp h / (1 + p * (exp h - 1)))" define L'' where "L'' = (λh. -(p⇧^{2}) * exp h * exp h / (1 + p * (exp h - 1))⇧^{2}+ p * exp h / (1 + p * (exp h - 1)))" define Ls where "Ls = (λn. [L, L', L''] ! n)" have [simp]: "L 0 = 0" "L' 0 = 0" by (auto simp: L_def L'_def) have L': "(L has_real_derivative L' x) (at x)" if "x ∈ {0..h}" for x proof - have "1 + p * (exp x - 1) > 0" using ‹p ≥ 0› that by (intro add_pos_nonneg mult_nonneg_nonneg) auto thus ?thesis unfolding L_def L'_def by (auto intro!: derivative_eq_intros) qed have L'': "(L' has_real_derivative L'' x) (at x)" if "x ∈ {0..h}" for x proof - have *: "1 + p * (exp x - 1) > 0" using ‹p ≥ 0› that by (intro add_pos_nonneg mult_nonneg_nonneg) auto show ?thesis unfolding L'_def L''_def by (insert *, (rule derivative_eq_intros refl | simp)+) (auto simp: divide_simps; algebra) qed have diff: "∀m t. m < 2 ∧ 0 ≤ t ∧ t ≤ h ⟶ (Ls m has_real_derivative Ls (Suc m) t) (at t)" using L' L'' by (auto simp: Ls_def nth_Cons split: nat.splits) from Taylor[of 2 Ls L 0 h 0 h, OF _ _ diff] obtain t where t: "t ∈ {0<..<h}" "L h = L'' t * h⇧^{2}/ 2" using ‹h > 0› by (auto simp: Ls_def lessThan_nat_numeral) define u where "u = p * exp t / (1 + p * (exp t - 1))" have "L'' t = u * (1 - u)" by (simp add: L''_def u_def divide_simps; algebra) also have "… ≤ 1 / 4" using mult_const_minus_self_real_le[of u 1] by simp finally have "L'' t ≤ 1 / 4" . note t(2) also have "L'' t * h⇧^{2}/ 2 ≤ (1 / 4) * h⇧^{2}/ 2" using ‹L'' t ≤ 1 / 4› by (intro mult_right_mono divide_right_mono) auto finally show "L h ≤ h⇧^{2}/ 8" by simp qed (auto simp: L_def) locale interval_bounded_random_variable = prob_space + fixes f :: "'a ⇒ real" and a b :: real assumes random_variable [measurable]: "random_variable borel f" assumes AE_in_interval: "AE x in M. f x ∈ {a..b}" begin lemma integrable [intro]: "integrable M f" proof (rule integrable_const_bound) show "AE x in M. norm (f x) ≤ max ¦a¦ ¦b¦" by (intro eventually_mono[OF AE_in_interval]) auto qed (fact random_variable) text ‹ We first show Hoeffding's lemma for distributions whose expectation is 0. The general case will easily follow from this later. › lemma Hoeffdings_lemma_nn_integral_0: assumes "l > 0" and E0: "expectation f = 0" shows "nn_integral M (λx. exp (l * f x)) ≤ ennreal (exp (l⇧^{2}* (b - a)⇧^{2}/ 8))" proof (cases "AE x in M. f x = 0") case True hence "nn_integral M (λx. exp (l * f x)) = nn_integral M (λx. ennreal 1)" by (intro nn_integral_cong_AE) auto also have "… = ennreal (expectation (λ_. 1))" by (intro nn_integral_eq_integral) auto finally show ?thesis by (simp add: prob_space) next case False have "a < 0" proof (rule ccontr) assume a: "¬(a < 0)" have "AE x in M. f x = 0" proof (subst integral_nonneg_eq_0_iff_AE [symmetric]) show "AE x in M. f x ≥ 0" using AE_in_interval by eventually_elim (use a in auto) qed (use E0 in ‹auto simp: id_def integrable›) with False show False by contradiction qed have "b > 0" proof (rule ccontr) assume b: "¬(b > 0)" have "AE x in M. -f x = 0" proof (subst integral_nonneg_eq_0_iff_AE [symmetric]) show "AE x in M. -f x ≥ 0" using AE_in_interval by eventually_elim (use b in auto) qed (use E0 in ‹auto simp: id_def integrable›) with False show False by simp qed have "a < b" using ‹a < 0› ‹b > 0› by linarith define p where "p = -a / (b - a)" define L where "L = (λt. -t* p + ln (1 - p + p * exp t))" define z where "z = l * (b - a)" have "z > 0" unfolding z_def using ‹a < b› ‹l > 0› by auto have "p > 0" using ‹a < 0› ‹a < b› unfolding p_def by (intro divide_pos_pos) auto have "(∫⇧^{+}x. exp (l * f x) ∂M) ≤ (∫⇧^{+}x. (b - f x) / (b - a) * exp (l * a) + (f x - a) / (b - a) * exp (l * b) ∂M)" proof (intro nn_integral_mono_AE eventually_mono[OF AE_in_interval] ennreal_leI) fix x assume x: "f x ∈ {a..b}" define y where "y = (b - f x) / (b-a)" have y: "y ∈ {0..1}" using x ‹a < b› by (auto simp: y_def) have conv: "convex_on UNIV (λx. exp(l*x))" using ‹l > 0› by (intro convex_on_exp) auto have "exp (l * ((1 - y) *⇩_{R}b + y *⇩_{R}a)) ≤ (1 - y) * exp (l * b) + y * exp (l * a)" using y ‹l > 0› by (intro convex_onD[OF convex_on_exp]) auto also have "(1 - y) *⇩_{R}b + y *⇩_{R}a = f x" using ‹a < b› by (simp add: y_def divide_simps) (simp add: algebra_simps)? also have "1 - y = (f x - a) / (b - a)" using ‹a < b› by (simp add: field_simps y_def) finally show "exp (l * f x) ≤ (b - f x) / (b - a) * exp (l*a) + (f x - a)/(b-a) * exp (l*b)" by (simp add: y_def) qed also have "… = (∫⇧^{+}x. ennreal (b - f x) * exp (l * a) / (b - a) + ennreal (f x - a) * exp (l * b) / (b - a) ∂M)" using ‹a < 0› ‹b > 0› by (intro nn_integral_cong_AE eventually_mono[OF AE_in_interval]) (simp add: ennreal_plus ennreal_mult flip: divide_ennreal) also have "… = ((∫⇧^{+}x. ennreal (b - f x) ∂M) * ennreal (exp (l * a)) + (∫⇧^{+}x. ennreal (f x - a) ∂M) * ennreal (exp (l * b))) / ennreal (b - a)" by (simp add: nn_integral_add nn_integral_divide nn_integral_multc add_divide_distrib_ennreal) also have "(∫⇧^{+}x. ennreal (b - f x) ∂M) = ennreal (expectation (λx. b - f x))" by (intro nn_integral_eq_integral Bochner_Integration.integrable_diff eventually_mono[OF AE_in_interval] integrable_const integrable) auto also have "expectation (λx. b - f x) = b" using assms by (subst Bochner_Integration.integral_diff) (auto simp: prob_space) also have "(∫⇧^{+}x. ennreal (f x - a) ∂M) = ennreal (expectation (λx. f x - a))" by (intro nn_integral_eq_integral Bochner_Integration.integrable_diff eventually_mono[OF AE_in_interval] integrable_const integrable) auto also have "expectation (λx. f x - a) = (-a)" using assms by (subst Bochner_Integration.integral_diff) (auto simp: prob_space) also have "(ennreal b * (exp (l * a)) + ennreal (-a) * (exp (l * b))) / (b - a) = ennreal (b * exp (l * a) - a * exp (l * b)) / ennreal (b - a)" using ‹a < 0› ‹b > 0› by (simp flip: ennreal_mult ennreal_plus add: mult_nonpos_nonneg divide_ennreal mult_mono) also have "b * exp (l * a) - a * exp (l * b) = exp (L z) * (b - a)" proof - have pos: "1 - p + p * exp z > 0" proof - have "exp z > 1" using ‹l > 0› and ‹a < b› by (subst one_less_exp_iff) (auto simp: z_def intro!: mult_pos_pos) hence "(exp z - 1) * p ≥ 0" unfolding p_def using ‹a < 0› and ‹a < b› by (intro mult_nonneg_nonneg divide_nonneg_pos) auto thus ?thesis by (simp add: algebra_simps) qed have "exp (L z) * (b - a) = exp (-z * p) * (1 - p + p * exp z) * (b - a)" using pos by (simp add: exp_add L_def exp_diff exp_minus divide_simps) also have "… = b * exp (l * a) - a * exp (l * b)" using ‹a < b› by (simp add: p_def z_def divide_simps) (simp add: exp_diff algebra_simps)? finally show ?thesis by simp qed also have "ennreal (exp (L z) * (b - a)) / ennreal (b - a) = ennreal (exp (L z))" using ‹a < b› by (simp add: divide_ennreal) also have "L z = -z * p + ln (1 + p * (exp z - 1))" by (simp add: L_def algebra_simps) also have "… ≤ z⇧^{2}/ 8" unfolding L_def by (rule Hoeffdings_lemma_aux[where p = p]) (use ‹z > 0› ‹p > 0› in simp_all) hence "ennreal (exp (-z * p + ln (1 + p * (exp z - 1)))) ≤ ennreal (exp (z⇧^{2}/ 8))" by (intro ennreal_leI) auto finally show ?thesis by (simp add: z_def power_mult_distrib) qed context begin interpretation shift: interval_bounded_random_variable M "λx. f x - μ" "a - μ" "b - μ" rewrites "b - μ - (a - μ) ≡ b - a" by unfold_locales (auto intro!: eventually_mono[OF AE_in_interval]) lemma expectation_shift: "expectation (λx. f x - expectation f) = 0" by (subst Bochner_Integration.integral_diff) (auto simp: integrable prob_space) lemmas Hoeffdings_lemma_nn_integral = shift.Hoeffdings_lemma_nn_integral_0[OF _ expectation_shift] end end subsection ‹Hoeffding's Inequality› text ‹ Consider ‹n› independent real random variables $X_1, \ldots, X_n$ that each almost surely lie in a compact interval $[a_i, b_i]$. Hoeffding's inequality states that the distribution of the sum of the $X_i$ is tightly concentrated around the sum of the expected values: the probability of it being above or below the sum of the expected values by more than some ‹ε› decreases exponentially with ‹ε›. › locale indep_interval_bounded_random_variables = prob_space + fixes I :: "'b set" and X :: "'b ⇒ 'a ⇒ real" fixes a b :: "'b ⇒ real" assumes fin: "finite I" assumes indep: "indep_vars (λ_. borel) X I" assumes AE_in_interval: "⋀i. i ∈ I ⟹ AE x in M. X i x ∈ {a i..b i}" begin lemma random_variable [measurable]: assumes i: "i ∈ I" shows "random_variable borel (X i)" using i indep unfolding indep_vars_def by blast lemma bounded_random_variable [intro]: assumes i: "i ∈ I" shows "interval_bounded_random_variable M (X i) (a i) (b i)" by unfold_locales (use AE_in_interval[OF i] i in auto) end locale Hoeffding_ineq = indep_interval_bounded_random_variables + fixes μ :: real defines "μ ≡ (∑i∈I. expectation (X i))" begin theorem%important Hoeffding_ineq_ge: assumes "ε ≥ 0" assumes "(∑i∈I. (b i - a i)⇧^{2}) > 0" shows "prob {x∈space M. (∑i∈I. X i x) ≥ μ + ε} ≤ exp (-2 * ε⇧^{2}/ (∑i∈I. (b i - a i)⇧^{2}))" proof (cases "ε = 0") case [simp]: True have "prob {x∈space M. (∑i∈I. X i x) ≥ μ + ε} ≤ 1" by simp thus ?thesis by simp next case False with ‹ε ≥ 0› have ε: "ε > 0" by auto define d where "d = (∑i∈I. (b i - a i)⇧^{2})" define l :: real where "l = 4 * ε / d" have d: "d > 0" using assms by (simp add: d_def) have l: "l > 0" using ε d by (simp add: l_def) define μ' where "μ' = (λi. expectation (X i))" have "{x∈space M. (∑i∈I. X i x) ≥ μ + ε} = {x∈space M. (∑i∈I. X i x) - μ ≥ ε}" by (simp add: algebra_simps) hence "ennreal (prob {x∈space M. (∑i∈I. X i x) ≥ μ + ε}) = emeasure M …" by (simp add: emeasure_eq_measure) also have "… ≤ ennreal (exp (-l*ε)) * (∫⇧^{+}x∈space M. exp (l * ((∑i∈I. X i x) - μ)) ∂M)" by (intro Chernoff_ineq_nn_integral_ge l) auto also have "(λx. (∑i∈I. X i x) - μ) = (λx. (∑i∈I. X i x - μ' i))" by (simp add: μ_def sum_subtractf μ'_def) also have "(∫⇧^{+}x∈space M. exp (l * ((∑i∈I. X i x - μ' i))) ∂M) = (∫⇧^{+}x. (∏i∈I. ennreal (exp (l * (X i x - μ' i)))) ∂M)" by (intro nn_integral_cong) (simp_all add: sum_distrib_left ring_distribs exp_diff exp_sum fin prod_ennreal) also have "… = (∏i∈I. ∫⇧^{+}x. ennreal (exp (l * (X i x - μ' i))) ∂M)" by (intro indep_vars_nn_integral fin indep_vars_compose2[OF indep]) auto also have "ennreal (exp (-l * ε)) * … ≤ ennreal (exp (-l * ε)) * (∏i∈I. ennreal (exp (l⇧^{2}* (b i - a i)⇧^{2}/ 8)))" proof (intro mult_left_mono prod_mono_ennreal) fix i assume i: "i ∈ I" from i interpret interval_bounded_random_variable M "X i" "a i" "b i" .. show "(∫⇧^{+}x. ennreal (exp (l * (X i x - μ' i))) ∂M) ≤ ennreal (exp (l⇧^{2}* (b i - a i)⇧^{2}/ 8))" unfolding μ'_def by (rule Hoeffdings_lemma_nn_integral) fact+ qed auto also have "… = ennreal (exp (-l*ε) * (∏i∈I. exp (l⇧^{2}* (b i - a i)⇧^{2}/ 8)))" by (simp add: prod_ennreal prod_nonneg flip: ennreal_mult) also have "exp (-l*ε) * (∏i∈I. exp (l⇧^{2}* (b i - a i)⇧^{2}/ 8)) = exp (d * l⇧^{2}/ 8 - l * ε)" by (simp add: exp_diff exp_minus sum_divide_distrib sum_distrib_left sum_distrib_right exp_sum fin divide_simps mult_ac d_def) also have "d * l⇧^{2}/ 8 - l * ε = -2 * ε⇧^{2}/ d" using d by (simp add: l_def field_simps power2_eq_square) finally show ?thesis by (subst (asm) ennreal_le_iff) (simp_all add: d_def) qed corollary Hoeffding_ineq_le: assumes ε: "ε ≥ 0" assumes "(∑i∈I. (b i - a i)⇧^{2}) > 0" shows "prob {x∈space M. (∑i∈I. X i x) ≤ μ - ε} ≤ exp (-2 * ε⇧^{2}/ (∑i∈I. (b i - a i)⇧^{2}))" proof - interpret flip: Hoeffding_ineq M I "λi x. -X i x" "λi. -b i" "λi. -a i" "-μ" proof unfold_locales fix i assume "i ∈ I" then interpret interval_bounded_random_variable M "X i" "a i" "b i" .. show "AE x in M. - X i x ∈ {- b i..- a i}" by (intro eventually_mono[OF AE_in_interval]) auto qed (auto simp: fin μ_def sum_negf intro: indep_vars_compose2[OF indep]) have "prob {x∈space M. (∑i∈I. X i x) ≤ μ - ε} = prob {x∈space M. (∑i∈I. -X i x) ≥ -μ + ε}" by (simp add: sum_negf algebra_simps) also have "… ≤ exp (- 2 * ε⇧^{2}/ (∑i∈I. (b i - a i)⇧^{2}))" using flip.Hoeffding_ineq_ge[OF ε] assms(2) by simp finally show ?thesis . qed corollary Hoeffding_ineq_abs_ge: assumes ε: "ε ≥ 0" assumes "(∑i∈I. (b i - a i)⇧^{2}) > 0" shows "prob {x∈space M. ¦(∑i∈I. X i x) - μ¦ ≥ ε} ≤ 2 * exp (-2 * ε⇧^{2}/ (∑i∈I. (b i - a i)⇧^{2}))" proof - have "{x∈space M. ¦(∑i∈I. X i x) - μ¦ ≥ ε} = {x∈space M. (∑i∈I. X i x) ≥ μ + ε} ∪ {x∈space M. (∑i∈I. X i x) ≤ μ - ε}" by auto also have "prob … ≤ prob {x∈space M. (∑i∈I. X i x) ≥ μ + ε} + prob {x∈space M. (∑i∈I. X i x) ≤ μ - ε}" by (intro measure_Un_le) auto also have "… ≤ exp (-2 * ε⇧^{2}/ (∑i∈I. (b i - a i)⇧^{2})) + exp (-2 * ε⇧^{2}/ (∑i∈I. (b i - a i)⇧^{2}))" by (intro add_mono Hoeffding_ineq_ge Hoeffding_ineq_le assms) finally show ?thesis by simp qed end subsection ‹Hoeffding's inequality for i.i.d. bounded random variables› text ‹ If we have ‹n› even identically-distributed random variables, the statement of Hoeffding's lemma simplifies a bit more: it shows that the probability that the average of the $X_i$ is more than ‹ε› above the expected value is no greater than $e^{\frac{-2ny^2}{(b-a)^2}}$. This essentially gives us a more concrete version of the weak law of large numbers: the law states that the probability vanishes for ‹n → ∞› for any ‹ε > 0›. Unlike Hoeffding's inequality, it does not assume the variables to have bounded support, but it does not provide concrete bounds. › locale iid_interval_bounded_random_variables = prob_space + fixes I :: "'b set" and X :: "'b ⇒ 'a ⇒ real" and Y :: "'a ⇒ real" fixes a b :: real assumes fin: "finite I" assumes indep: "indep_vars (λ_. borel) X I" assumes distr_X: "i ∈ I ⟹ distr M borel (X i) = distr M borel Y" assumes rv_Y [measurable]: "random_variable borel Y" assumes AE_in_interval: "AE x in M. Y x ∈ {a..b}" begin lemma random_variable [measurable]: assumes i: "i ∈ I" shows "random_variable borel (X i)" using i indep unfolding indep_vars_def by blast sublocale X: indep_interval_bounded_random_variables M I X "λ_. a" "λ_. b" proof fix i assume i: "i ∈ I" have "AE x in M. Y x ∈ {a..b}" by (fact AE_in_interval) also have "?this ⟷ (AE x in distr M borel Y. x ∈ {a..b})" by (subst AE_distr_iff) auto also have "distr M borel Y = distr M borel (X i)" using i by (simp add: distr_X) also have "(AE x in …. x ∈ {a..b}) ⟷ (AE x in M. X i x ∈ {a..b})" using i by (subst AE_distr_iff) auto finally show "AE x in M. X i x ∈ {a..b}" . qed (simp_all add: fin indep) lemma expectation_X [simp]: assumes i: "i ∈ I" shows "expectation (X i) = expectation Y" proof - have "expectation (X i) = lebesgue_integral (distr M borel (X i)) (λx. x)" using i by (intro integral_distr [symmetric]) auto also have "distr M borel (X i) = distr M borel Y" using i by (rule distr_X) also have "lebesgue_integral … (λx. x) = expectation Y" by (rule integral_distr) auto finally show "expectation (X i) = expectation Y" . qed end locale Hoeffding_ineq_iid = iid_interval_bounded_random_variables + fixes μ :: real defines "μ ≡ expectation Y" begin sublocale X: Hoeffding_ineq M I X "λ_. a" "λ_. b" "real (card I) * μ" by unfold_locales (simp_all add: μ_def) corollary assumes ε: "ε ≥ 0" assumes "a < b" "I ≠ {}" defines "n ≡ card I" shows Hoeffding_ineq_ge: "prob {x∈space M. (∑i∈I. X i x) ≥ n * μ + ε} ≤ exp (-2 * ε⇧^{2}/ (n * (b - a)⇧^{2}))" (is ?le) and Hoeffding_ineq_le: "prob {x∈space M. (∑i∈I. X i x) ≤ n * μ - ε} ≤ exp (-2 * ε⇧^{2}/ (n * (b - a)⇧^{2}))" (is ?ge) and Hoeffding_ineq_abs_ge: "prob {x∈space M. ¦(∑i∈I. X i x) - n * μ¦ ≥ ε} ≤ 2 * exp (-2 * ε⇧^{2}/ (n * (b - a)⇧^{2}))" (is ?abs_ge) proof - have pos: "(∑i∈I. (b - a)⇧^{2}) > 0" using ‹a < b› ‹I ≠ {}› fin by (intro sum_pos) auto show ?le using X.Hoeffding_ineq_ge[OF ε pos] by (simp add: n_def) show ?ge using X.Hoeffding_ineq_le[OF ε pos] by (simp add: n_def) show ?abs_ge using X.Hoeffding_ineq_abs_ge[OF ε pos] by (simp add: n_def) qed lemma assumes ε: "ε ≥ 0" assumes "a < b" "I ≠ {}" defines "n ≡ card I" shows Hoeffding_ineq_ge': "prob {x∈space M. (∑i∈I. X i x) / n ≥ μ + ε} ≤ exp (-2 * n * ε⇧^{2}/ (b - a)⇧^{2})" (is ?ge) and Hoeffding_ineq_le': "prob {x∈space M. (∑i∈I. X i x) / n ≤ μ - ε} ≤ exp (-2 * n * ε⇧^{2}/ (b - a)⇧^{2})" (is ?le) and Hoeffding_ineq_abs_ge': "prob {x∈space M. ¦(∑i∈I. X i x) / n - μ¦ ≥ ε} ≤ 2 * exp (-2 * n * ε⇧^{2}/ (b - a)⇧^{2})" (is ?abs_ge) proof - have "n > 0" using assms fin by (auto simp: field_simps) have ε': "ε * n ≥ 0" using ‹n > 0› ‹ε ≥ 0› by auto have eq: "- (2 * (ε * real n)⇧^{2}/ (real (card I) * (b - a)⇧^{2})) = - (2 * real n * ε⇧^{2}/ (b - a)⇧^{2})" using ‹n > 0› by (simp add: power2_eq_square divide_simps n_def) have "{x∈space M. (∑i∈I. X i x) / n ≥ μ + ε} = {x∈space M. (∑i∈I. X i x) ≥ μ * n + ε * n}" using ‹n > 0› by (intro Collect_cong conj_cong refl) (auto simp: field_simps) with Hoeffding_ineq_ge[OF ε' ‹a < b› ‹I ≠ {}›] ‹n > 0› eq show ?ge by (simp add: n_def mult_ac) have "{x∈space M. (∑i∈I. X i x) / n ≤ μ - ε} = {x∈space M. (∑i∈I. X i x) ≤ μ * n - ε * n}" using ‹n > 0› by (intro Collect_cong conj_cong refl) (auto simp: field_simps) with Hoeffding_ineq_le[OF ε' ‹a < b› ‹I ≠ {}›] ‹n > 0› eq show ?le by (simp add: n_def mult_ac) have "{x∈space M. ¦(∑i∈I. X i x) / n - μ¦ ≥ ε} = {x∈space M. ¦(∑i∈I. X i x) - μ * n¦ ≥ ε * n}" using ‹n > 0› by (intro Collect_cong conj_cong refl) (auto simp: field_simps) with Hoeffding_ineq_abs_ge[OF ε' ‹a < b› ‹I ≠ {}›] ‹n > 0› eq show ?abs_ge by (simp add: n_def mult_ac) qed end subsection ‹Hoeffding's Inequality for the Binomial distribution› text ‹ We can now apply Hoeffding's inequality to the Binomial distribution, which can be seen as the sum of ‹n› i.i.d. coin flips (the support of each of which is contained in $[0,1]$). › locale binomial_distribution = fixes n :: nat and p :: real assumes p: "p ∈ {0..1}" begin context fixes coins :: "(nat ⇒ bool) pmf" and μ assumes n: "n > 0" defines "coins ≡ Pi_pmf {..<n} False (λ_. bernoulli_pmf p)" begin lemma coins_component: assumes i: "i < n" shows "distr (measure_pmf coins) borel (λf. if f i then 1 else 0) = distr (measure_pmf (bernoulli_pmf p)) borel (λb. if b then 1 else 0)" proof - have "distr (measure_pmf coins) borel (λf. if f i then 1 else 0) = distr (measure_pmf (map_pmf (λf. f i) coins)) borel (λb. if b then 1 else 0)" unfolding map_pmf_rep_eq by (subst distr_distr) (auto simp: o_def) also have "map_pmf (λf. f i) coins = bernoulli_pmf p" unfolding coins_def using i by (subst Pi_pmf_component) auto finally show ?thesis unfolding map_pmf_rep_eq . qed lemma prob_binomial_pmf_conv_coins: "measure_pmf.prob (binomial_pmf n p) {x. P (real x)} = measure_pmf.prob coins {x. P (∑i<n. if x i then 1 else 0)}" proof - have eq1: "(∑i<n. if x i then 1 else 0) = real (card {i∈{..<n}. x i})" for x proof - have "(∑i<n. if x i then 1 else (0::real)) = (∑i∈{i∈{..<n}. x i}. 1)" by (intro sum.mono_neutral_cong_right) auto thus ?thesis by simp qed have eq2: "binomial_pmf n p = map_pmf (λv. card {i∈{..<n}. v i}) coins" unfolding coins_def by (rule binomial_pmf_altdef') (use p in auto) show ?thesis by (subst eq2) (simp_all add: eq1) qed interpretation Hoeffding_ineq_iid coins "{..<n}" "λi f. if f i then 1 else 0" "λf. if f 0 then 1 else 0" 0 1 p proof unfold_locales show "prob_space.indep_vars (measure_pmf coins) (λ_. borel) (λi f. if f i then 1 else 0) {..<n}" unfolding coins_def by (intro prob_space.indep_vars_compose2[OF _ indep_vars_Pi_pmf]) (auto simp: measure_pmf.prob_space_axioms) next have "measure_pmf.expectation coins (λf. if f 0 then 1 else 0 :: real) = measure_pmf.expectation (map_pmf (λf. f 0) coins) (λb. if b then 1 else 0 :: real)" by (simp add: coins_def) also have "map_pmf (λf. f 0) coins = bernoulli_pmf p" using n by (simp add: coins_def Pi_pmf_component) also have "measure_pmf.expectation … (λb. if b then 1 else 0) = p" using p by simp finally show "p ≡ measure_pmf.expectation coins (λf. if f 0 then 1 else 0)" by simp qed (auto simp: coins_component) corollary fixes ε :: real assumes ε: "ε ≥ 0" shows prob_ge: "measure_pmf.prob (binomial_pmf n p) {x. x ≥ n * p + ε} ≤ exp (-2 * ε⇧^{2}/ n)" and prob_le: "measure_pmf.prob (binomial_pmf n p) {x. x ≤ n * p - ε} ≤ exp (-2 * ε⇧^{2}/ n)" and prob_abs_ge: "measure_pmf.prob (binomial_pmf n p) {x. ¦x - n * p¦ ≥ ε} ≤ 2 * exp (-2 * ε⇧^{2}/ n)" proof - have [simp]: "{..<n} ≠ {}" using n by auto show "measure_pmf.prob (binomial_pmf n p) {x. x ≥ n * p + ε} ≤ exp (-2 * ε⇧^{2}/ n)" using Hoeffding_ineq_ge[of ε] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all) show "measure_pmf.prob (binomial_pmf n p) {x. x ≤ n * p - ε} ≤ exp (-2 * ε⇧^{2}/ n)" using Hoeffding_ineq_le[of ε] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all) show "measure_pmf.prob (binomial_pmf n p) {x. ¦x - n * p¦ ≥ ε} ≤ 2 * exp (-2 * ε⇧^{2}/ n)" using Hoeffding_ineq_abs_ge[of ε] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all) qed corollary fixes ε :: real assumes ε: "ε ≥ 0" shows prob_ge': "measure_pmf.prob (binomial_pmf n p) {x. x / n ≥ p + ε} ≤ exp (-2 * n * ε⇧^{2})" and prob_le': "measure_pmf.prob (binomial_pmf n p) {x. x / n ≤ p - ε} ≤ exp (-2 * n * ε⇧^{2})" and prob_abs_ge': "measure_pmf.prob (binomial_pmf n p) {x. ¦x / n - p¦ ≥ ε} ≤ 2 * exp (-2 * n * ε⇧^{2})" proof - have [simp]: "{..<n} ≠ {}" using n by auto show "measure_pmf.prob (binomial_pmf n p) {x. x / n ≥ p + ε} ≤ exp (-2 * n * ε⇧^{2})" using Hoeffding_ineq_ge'[of ε] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all) show "measure_pmf.prob (binomial_pmf n p) {x. x / n ≤ p - ε} ≤ exp (-2 * n * ε⇧^{2})" using Hoeffding_ineq_le'[of ε] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all) show "measure_pmf.prob (binomial_pmf n p) {x. ¦x / n - p¦ ≥ ε} ≤ 2 * exp (-2 * n * ε⇧^{2})" using Hoeffding_ineq_abs_ge'[of ε] by (subst prob_binomial_pmf_conv_coins) (use assms in simp_all) qed end end subsection ‹Tail bounds for the negative binomial distribution› text ‹ Since the tail probabilities of a negative Binomial distribution are equal to the tail probabilities of some Binomial distribution, we can obtain tail bounds for the negative Binomial distribution through the Hoeffding tail bounds for the Binomial distribution. › context fixes p q :: real assumes p: "p ∈ {0<..<1}" defines "q ≡ 1 - p" begin lemma prob_neg_binomial_pmf_ge_bound: fixes n :: nat and k :: real defines "μ ≡ real n * q / p" assumes k: "k ≥ 0" shows "measure_pmf.prob (neg_binomial_pmf n p) {x. real x ≥ μ + k} ≤ exp (- 2 * p ^ 3 * k⇧^{2}/ (n + p * k))" proof - consider "n = 0" | "p = 1" | "n > 0" "p ≠ 1" by blast thus ?thesis proof cases assume [simp]: "n = 0" show ?thesis using k by (simp add: indicator_def μ_def) next assume [simp]: "p = 1" show ?thesis using k by (auto simp add: indicator_def μ_def q_def) next assume n: "n > 0" and "p ≠ 1" from ‹p ≠ 1› and p have p: "p ∈ {0<..<1}" by auto from p have q: "q ∈ {0<..<1}" by (auto simp: q_def) define k1 where "k1 = μ + k" have k1: "k1 ≥ μ" using k by (simp add: k1_def) have "k1 > 0" by (rule less_le_trans[OF _ k1]) (use p n in ‹auto simp: q_def μ_def›) define k1' where "k1' = nat (ceiling k1)" have "μ ≥ 0" using p by (auto simp: μ_def q_def) have "¬(x < k1') ⟷ real x ≥ k1" for x unfolding k1'_def by linarith hence eq: "UNIV - {..<k1'} = {x. x ≥ k1}" by auto hence "measure_pmf.prob (neg_binomial_pmf n p) {n. n ≥ k1} = 1 - measure_pmf.prob (neg_binomial_pmf n p) {..<k1'}" using measure_pmf.prob_compl[of "{..<k1'}" "neg_binomial_pmf n p"] by simp also have "measure_pmf.prob (neg_binomial_pmf n p) {..<k1'} = measure_pmf.prob (binomial_pmf (n + k1' - 1) q) {..<k1'}" unfolding q_def using p by (intro prob_neg_binomial_pmf_lessThan) auto also have "1 - … = measure_pmf.prob (binomial_pmf (n + k1' - 1) q) {n. n ≥ k1}" using measure_pmf.prob_compl[of "{..<k1'}" "binomial_pmf (n + k1' - 1) q"] eq by simp also have "{x. real x ≥ k1} = {x. x ≥ real (n + k1' - 1) * q + (k1 - real (n + k1' - 1) * q)}" by simp also have "measure_pmf.prob (binomial_pmf (n + k1' - 1) q) … ≤ exp (-2 * (k1 - real (n + k1' - 1) * q)⇧^{2}/ real (n + k1' - 1))" proof (rule binomial_distribution.prob_ge) show "binomial_distribution q" by unfold_locales (use q in auto) next show "n + k1' - 1 > 0" using ‹k1 > 0› n unfolding k1'_def by linarith next have "real (n + nat ⌈k1⌉ - 1) ≤ real n + k1" using ‹k1 > 0› by linarith hence "real (n + k1' - 1) * q ≤ (real n + k1) * q" unfolding k1'_def by (intro mult_right_mono) (use p in ‹simp_all add: q_def›) also have "… ≤ k1" using k1 p by (simp add: q_def field_simps μ_def) finally show "0 ≤ k1 - real (n + k1' - 1) * q" by simp qed also have "{x. real (n + k1' - 1) * q + (k1 - real (n + k1' - 1) * q) ≤ real x} = {x. real x ≥ k1}" by simp also have "exp (-2 * (k1 - real (n + k1' - 1) * q)⇧^{2}/ real (n + k1' - 1)) ≤ exp (-2 * (k1 - (n + k1) * q)⇧^{2}/ (n + k1))" proof - have "real (n + k1' - Suc 0) ≤ real n + k1" unfolding k1'_def using ‹k1 > 0› by linarith moreover have "(real n + k1) * q ≤ k1" using k1 p by (auto simp: q_def field_simps μ_def) moreover have "1 < n + k1'" using n ‹k1 > 0› unfolding k1'_def by linarith ultimately have "2 * (k1 - real (n + k1' - 1) * q)⇧^{2}/ real (n + k1' - 1) ≥ 2 * (k1 - (n + k1) * q)⇧^{2}/ (n + k1)" by (intro frac_le mult_left_mono power_mono mult_nonneg_nonneg mult_right_mono diff_mono) (use q in simp_all) thus ?thesis by simp qed also have "… = exp (-2 * (p * k1 - q * n)⇧^{2}/ (k1 + n))" by (simp add: q_def algebra_simps) also have "-2 * (p * k1 - q * n)⇧^{2}= -2 * p⇧^{2}* (k1 - μ)⇧^{2}" using p by (auto simp: field_simps μ_def) also have "k1 - μ = k" by (simp add: k1_def μ_def) also note k1_def also have "μ + k + real n = real n / p + k" using p by (simp add: μ_def q_def field_simps) also have "- 2 * p⇧^{2}* k⇧^{2}/ (real n / p + k) = - 2 * p ^ 3 * k⇧^{2}/ (p * k + n)" using p by (simp add: field_simps power3_eq_cube power2_eq_square) finally show ?thesis by (simp add: add_ac) qed qed lemma prob_neg_binomial_pmf_le_bound: fixes n :: nat and k :: real defines "μ ≡ real n * q / p" assumes k: "k ≥ 0" shows "measure_pmf.prob (neg_binomial_pmf n p) {x. real x ≤ μ - k} ≤ exp (-2 * p ^ 3 * k⇧^{2}/ (n - p * k))" proof - consider "n = 0" | "p = 1" | "k > μ" | "n > 0" "p ≠ 1" "k ≤ μ" by force thus ?thesis proof cases assume [simp]: "n = 0" show ?thesis using k by (simp add: indicator_def μ_def) next assume [simp]: "p = 1" show ?thesis using k by (auto simp add: indicator_def μ_def q_def) next assume "k > μ" hence "{x. real x ≤ μ - k} = {}" by auto thus ?thesis by simp next assume n: "n > 0" and "p ≠ 1" and "k ≤ μ" from ‹p ≠ 1› and p have p: "p ∈ {0<..<1}" by auto from p have q: "q ∈ {0<..<1}" by (auto simp: q_def) define f :: "real ⇒ real" where "f = (λx. (p * x - q * n)⇧^{2}/ (x + n))" have f_mono: "f x ≥ f y" if "x ≥ 0" "y ≤ n * q / p" "x ≤ y" for x y :: real using that(3) proof (rule DERIV_nonpos_imp_nonincreasing) fix t assume t: "t ≥ x" "t ≤ y" have "x > -n" using n ‹x ≥ 0› by linarith hence "(f has_field_derivative ((p * t - q * n) * (n * (1 + p) + p * t) / (n + t) ^ 2)) (at t)" unfolding f_def using t by (auto intro!: derivative_eq_intros simp: algebra_simps q_def power2_eq_square) moreover { have "p * t ≤ p * y" using p by (intro mult_left_mono t) auto also have "p * y ≤ q * n" using ‹y ≤ n * q / p› p by (simp add: field_simps) finally have "p * t ≤ q * n" . } hence "(p * t - q * n) * (n * (1 + p) + p * t) / (n + t) ^ 2 ≤ 0" using p ‹x ≥ 0› t by (intro mult_nonpos_nonneg divide_nonpos_nonneg add_nonneg_nonneg mult_nonneg_nonneg) auto ultimately show "∃y. (f has_real_derivative y) (at t) ∧ y ≤ 0" by blast qed define k1 where "k1 = μ - k" have k1: "k1 ≤ real n * q / p" using assms by (simp add: μ_def k1_def) have "k1 ≥ 0" using k ‹k ≤ μ› by (simp add: μ_def k1_def) define k1' where "k1' = nat (floor k1)" have "μ ≥ 0" using p by (auto simp: μ_def q_def) have "(x ≤ k1') ⟷ real x ≤ k1" for x unfolding k1'_def not_less using ‹k1 ≥ 0› by linarith hence eq: "{n. n ≤ k1} = {..k1'}" by auto hence "measure_pmf.prob (neg_binomial_pmf n p) {n. n ≤ k1} = measure_pmf.prob (neg_binomial_pmf n p) {..k1'}" by simp also have "measure_pmf.prob (neg_binomial_pmf n p) {..k1'} = measure_pmf.prob (binomial_pmf (n + k1') q) {..k1'}" unfolding q_def using p by (intro prob_neg_binomial_pmf_atMost) auto also note eq [symmetric] also have "{x. real x ≤ k1} = {x. x ≤ real (n + k1') * q - (real (n + k1') * q - real k1')}" using eq by auto also have "measure_pmf.prob (binomial_pmf (n + k1') q) … ≤ exp (-2 * (real (n + k1') * q - real k1')⇧^{2}/ real (n + k1'))" proof (rule binomial_distribution.prob_le) show "binomial_distribution q" by unfold_locales (use q in auto) next show "n + k1' > 0" using ‹k1 ≥ 0› n unfolding k1'_def by linarith next have "p * k1' ≤ p * k1" using p ‹k1 ≥ 0› by (intro mult_left_mono) (auto simp: k1'_def) also have "… ≤ q * n" using k1 p by (simp add: field_simps) finally show "0 ≤ real (n + k1') * q - real k1'" by (simp add: algebra_simps q_def) qed also have "{x. real x ≤ real (n + k1') * q - (real (n + k1') * q - k1')} = {..k1'}" by auto also have "real (n + k1') * q - k1' = -(p * k1' - q * n)" by (simp add: q_def algebra_simps) also have "… ^ 2 = (p * k1' - q * n) ^ 2" by algebra also have "- 2 * (p * real k1' - q * real n)⇧^{2}/ real (n + k1') = -2 * f (real k1')" by (simp add: f_def) also have "f (real k1') ≥ f k1" by (rule f_mono) (use ‹k1 ≥ 0› k1 in ‹auto simp: k1'_def›) hence "exp (-2 * f (real k1')) ≤ exp (-2 * f k1)" by simp also have "… = exp (-2 * (p * k1 - q * n)⇧^{2}/ (k1 + n))" by (simp add: f_def) also have "-2 * (p * k1 - q * n)⇧^{2}= -2 * p⇧^{2}* (k1 - μ)⇧^{2}" using p by (auto simp: field_simps μ_def) also have "(k1 - μ) ^ 2 = k ^ 2" by (simp add: k1_def μ_def) also note k1_def also have "μ - k + real n = real n / p - k" using p by (simp add: μ_def q_def field_simps) also have "- 2 * p⇧^{2}* k⇧^{2}/ (real n / p - k) = - 2 * p ^ 3 * k⇧^{2}/ (n - p * k)" using p by (simp add: field_simps power3_eq_cube power2_eq_square) also have "{..k1'} = {x. real x ≤ μ - k}" using eq by (simp add: k1_def) finally show ?thesis . qed qed text ‹ Due to the function $exp(-l/x)$ being concave for $x \geq \frac{l}{2}$, the above two bounds can be combined into the following one for moderate values of ‹k›. (cf. 🌐‹https://math.stackexchange.com/questions/1565559›) › lemma prob_neg_binomial_pmf_abs_ge_bound: fixes n :: nat and k :: real defines "μ ≡ real n * q / p" assumes "k ≥ 0" and n_ge: "n ≥ p * k * (p⇧^{2}* k + 1)" shows "measure_pmf.prob (neg_binomial_pmf n p) {x. ¦real x - μ¦ ≥ k} ≤ 2 * exp (-2 * p ^ 3 * k ^ 2 / n)" proof (cases "k = 0") case False with ‹k ≥ 0› have k: "k > 0" by auto define l :: real where "l = 2 * p ^ 3 * k ^ 2" have l: "l > 0" using p k by (auto simp: l_def) define f :: "real ⇒ real" where "f = (λx. exp (-l / x))" define f' where "f' = (λx. -l * exp (-l / x) / x ^ 2)" have f'_mono: "f' x ≤ f' y" if "x ≥ l / 2" "x ≤ y" for x y :: real using that(2) proof (rule DERIV_nonneg_imp_nondecreasing) fix t assume t: "x ≤ t" "t ≤ y" have "t > 0" using that l t by auto have "(f' has_field_derivative (l * (2 * t - l) / (exp (l / t) * t ^ 4))) (at t)" unfolding f'_def using t that ‹t > 0› by (auto intro!: derivative_eq_intros simp: field_simps exp_minus simp flip: power_Suc) moreover have "l * (2 * t - l) / (exp (l / t) * t ^ 4) ≥ 0" using that t l by (intro divide_nonneg_pos mult_nonneg_nonneg) auto ultimately show "∃y. (f' has_real_derivative y) (at t) ∧ 0 ≤ y" by blast qed have convex: "convex_on {l/2..} (λx. -f x)" unfolding f_def proof (intro convex_on_realI[where f' = f']) show "((λx. - exp (- l / x)) has_real_derivative f' x) (at x)" if "x ∈ {l/2..}" for x using that l by (auto intro!: derivative_eq_intros simp: f'_def power2_eq_square algebra_simps) qed (use l in ‹auto intro!: f'_mono›) have eq: "{x. ¦real x - μ¦ ≥ k} = {x. real x ≤ μ - k} ∪ {x. real x ≥ μ + k}" by auto have "measure_pmf.prob (neg_binomial_pmf n p) {x. ¦real x - μ¦ ≥ k} ≤ measure_pmf.prob (neg_binomial_pmf n p) {x. real x ≤ μ - k} + measure_pmf.prob (neg_binomial_pmf n p) {x. real x ≥ μ + k}" by (subst eq, rule measure_Un_le) auto also have "… ≤ exp (-2 * p ^ 3 * k⇧^{2}/ (n - p * k)) + exp (-2 * p ^ 3 * k⇧^{2}/ (n + p * k))" unfolding μ_def by (intro prob_neg_binomial_pmf_le_bound prob_neg_binomial_pmf_ge_bound add_mono ‹k ≥ 0›) also have "… = 2 * (1/2 * f (n - p * k) + 1/2 * f (n + p * k))" by (simp add: f_def l_def) also have "1/2 * f (n - p * k) + 1/2 * f (n + p * k) ≤ f (1/2 * (n - p * k) + 1/2 * (n + p * k))" proof - let ?x = "n - p * k" and ?y = "n + p * k" have le1: "l / 2 ≤ ?x" using n_ge by (simp add: l_def power2_eq_square power3_eq_cube algebra_simps) also have "… ≤ ?y" using p k by simp finally have le2: "l / 2 ≤ ?y" . have "-f ((1 - 1 / 2) *⇩_{R}?x + (1 / 2) *⇩_{R}?y) ≤ (1 - 1 / 2) * - f ?x + 1 / 2 * - f ?y" using le1 le2 by (intro convex_onD[OF convex]) auto thus ?thesis by simp qed also have "1/2 * (n - p * k) + 1/2 * (n + p * k) = n" by (simp add: algebra_simps) also have "2 * f n = 2 * exp (-l / n)" by (simp add: f_def) finally show ?thesis by (simp add: l_def) qed auto end end