Theory Prob_Lemmas
theory Prob_Lemmas
imports
"HOL-Probability.Probability"
Girth_Chromatic.Girth_Chromatic
Ugraph_Misc
begin
section‹Lemmas about probabilities›
text‹In this section, auxiliary lemmas for computing bounds on expectation and probabilites
of random variables are set up.›
subsection‹Indicator variables and valid probability values›
abbreviation rind :: "'a set ⇒ 'a ⇒ real" where
"rind ≡ indicator"
lemma product_indicator:
"rind A x * rind B x = rind (A ∩ B) x"
unfolding indicator_def
by auto
text‹We call a real number `valid' iff it is in the range 0 to 1, inclusively, and additionally
`nonzero' iff it is neither 0 nor 1.›
abbreviation "valid_prob (p :: real) ≡ 0 ≤ p ∧ p ≤ 1"
abbreviation "nonzero_prob (p :: real) ≡ 0 < p ∧ p < 1"
text‹A function @{typ "'a ⇒ real"} is a `valid probability function' iff each value in the image
is valid, and similarly for `nonzero'.›
abbreviation "valid_prob_fun f ≡ (∀n. valid_prob (f n))"
abbreviation "nonzero_prob_fun f ≡ (∀n. nonzero_prob (f n))"
lemma nonzero_fun_is_valid_fun: "nonzero_prob_fun f ⟹ valid_prob_fun f"
by (simp add: less_imp_le)
subsection‹Expectation and variance›
context prob_space
begin
text‹Note that there is already a notion of independent sets (see @{term indep_set}), but we use
the following -- simpler -- definition:›
definition "indep A B ⟷ prob (A ∩ B) = prob A * prob B"
text‹The probability of an indicator variable is equal to its expectation:›
lemma expectation_indicator:
"A ∈ events ⟹ expectation (rind A) = prob A"
by simp
text‹For a non-negative random variable @{term X}, the Markov inequality gives the following
upper bound: \[ \Pr[X \ge a] \le \frac{\Ex[X]}{a} \]›
lemma markov_inequality:
assumes "⋀a. 0 ≤ X a" and "integrable M X" "0 < t"
shows "prob {a ∈ space M. t ≤ X a} ≤ expectation X / t"
proof -
have "(∫⇧+ x. ennreal (X x) ∂M) = (∫x. X x ∂M)"
using assms by (intro nn_integral_eq_integral) auto
thus ?thesis
using assms nn_integral_Markov_inequality[of X "space M" M "1 / t"]
by (auto cong: nn_integral_cong simp: emeasure_eq_measure ennreal_mult[symmetric])
qed
text‹$\Var[X] = \Ex[X^2] - \Ex[X]^2 $›
lemma variance_expectation:
fixes X :: "'a ⇒ real"
assumes "integrable M (λx. (X x)^2)" and "X ∈ borel_measurable M"
shows
"integrable M (λx. (X x - expectation X)^2)" (is ?integrable)
"variance X = expectation (λx. (X x)^2) - (expectation X)^2" (is ?variance)
proof -
have int: "integrable M X"
using integrable_squareD[OF assms] by simp
have "(λx. (X x - expectation X)^2) = (λx. (X x)^2 + (expectation X)^2 - (2 * X x * expectation X))"
by (simp only: power2_diff)
hence
"variance X = expectation (λx. (X x)^2) + (expectation X)^2 + expectation (λx. - (2 * X x * expectation X))"
?integrable
using integral_add by (simp add: int assms prob_space)+
thus ?variance ?integrable
by (simp add: int power2_eq_square)+
qed
text‹A corollary from the Markov inequality is Chebyshev's inequality, which gives an upper
bound for the deviation of a random variable from its expectation:
\[ \Pr[\left| Y - \Ex[Y] \right| \ge s] \le \frac{\Var[X]}{a^2} \]›
lemma chebyshev_inequality:
fixes Y :: "'a ⇒ real"
assumes Y_int: "integrable M (λy. (Y y)^2)"
assumes Y_borel: "Y ∈ borel_measurable M"
fixes s :: "real"
assumes s_pos: "0 < s"
shows "prob {a ∈ space M. s ≤ ¦Y a - expectation Y¦} ≤ variance Y / s^2"
proof -
let ?X = "λa. (Y a - expectation Y)^2"
let ?t = "s^2"
have "0 < ?t"
using s_pos by simp
hence "prob {a ∈ space M. ?t ≤ ?X a} ≤ variance Y / s^2"
using markov_inequality variance_expectation[OF Y_int Y_borel] by (simp add: field_simps)
moreover have "{a ∈ space M. ?t ≤ ?X a} = {a ∈ space M. s ≤ ¦Y a - expectation Y¦}"
using abs_le_square_iff s_pos by force
ultimately show ?thesis
by simp
qed
text‹Hence, we can derive an upper bound for the probability that a random variable is $0$.›
corollary chebyshev_prob_zero:
fixes Y :: "'a ⇒ real"
assumes Y_int: "integrable M (λy. (Y y)^2)"
assumes Y_borel: "Y ∈ borel_measurable M"
assumes μ_pos: "expectation Y > 0"
shows "prob {a ∈ space M. Y a = 0} ≤ expectation (λy. (Y y)^2) / (expectation Y)^2 - 1"
proof -
let ?s = "expectation Y"
have "prob {a ∈ space M. Y a = 0} ≤ prob {a ∈ space M. ?s ≤ ¦Y a - ?s¦}"
using Y_borel by (auto intro!: finite_measure_mono borel_measurable_diff borel_measurable_abs borel_measurable_le)
also have "… ≤ variance Y / ?s^2"
using assms by (fact chebyshev_inequality)
also have "… = (expectation (λy. (Y y)^2) - ?s^2) / ?s^2"
using Y_int Y_borel by (simp add: variance_expectation)
also have "… = expectation (λy. (Y y)^2) / ?s^2 - 1"
using μ_pos by (simp add: field_simps)
finally show ?thesis .
qed
end
subsection‹Sets of indicator variables›
text‹\label{sec:delta}
This section introduces some inequalities about expectation and other values related to the sum of
a set of random indicators.›
locale prob_space_with_indicators = prob_space +
fixes I :: "'i set"
assumes finite_I: "finite I"
fixes A :: "'i ⇒ 'a set"
assumes A: "A ` I ⊆ events"
assumes prob_non_zero: "∃i ∈ I. 0 < prob (A i)"
begin
text‹We call the underlying sets @{term "A i"} for each @{term "i ∈ I"}, and the corresponding
indicator variables @{term "X i"}. The sum is denoted by @{term Y}, and its expectation by
@{term μ}.›
definition "X i = rind (A i)"
definition "Y x = (∑i ∈ I. X i x)"
definition "μ = expectation Y"
text‹In the lecture notes, the following two relations are called $\sim$ and $\nsim$,
respectively. Note that they are not the opposite of each other.›
abbreviation ineq_indep :: "'i ⇒ 'i ⇒ bool" where
"ineq_indep i j ≡ (i ≠ j ∧ indep (A i) (A j))"
abbreviation ineq_dep :: "'i ⇒ 'i ⇒ bool" where
"ineq_dep i j ≡ (i ≠ j ∧ ¬indep (A i) (A j))"
definition "Δ⇩a = (∑i ∈ I. ∑j | j ∈ I ∧ i ≠ j. prob (A i ∩ A j))"
definition "Δ⇩d = (∑i ∈ I. ∑j | j ∈ I ∧ ineq_dep i j. prob (A i ∩ A j))"
lemma Δ_zero:
assumes "⋀i j. i ∈ I ⟹ j ∈ I ⟹ i ≠ j ⟹ indep (A i) (A j)"
shows "Δ⇩d = 0"
proof -
{
fix i
assume "i ∈ I"
hence "{j. j ∈ I ∧ ineq_dep i j} = {}"
using assms by auto
hence "(∑j | j ∈ I ∧ ineq_dep i j. prob (A i ∩ A j)) = 0"
using sum.empty by metis
}
hence "Δ⇩d = (0 :: real) * card I"
unfolding Δ⇩d_def by simp
thus ?thesis
by simp
qed
lemma A_events[measurable]: "i ∈ I ⟹ A i ∈ events"
using A by auto
lemma expectation_X_Y: "μ = (∑i∈I. expectation (X i))"
unfolding μ_def Y_def[abs_def] X_def
by (simp add: less_top[symmetric])
lemma expectation_X_non_zero: "∃i ∈ I. 0 < expectation (X i)"
unfolding X_def using prob_non_zero expectation_indicator by simp
corollary μ_non_zero[simp]: "0 < μ"
unfolding expectation_X_Y
using expectation_X_non_zero
by (auto intro!: sum_lower finite_I
simp add: expectation_indicator X_def)
lemma Δ⇩d_nonneg: "0 ≤ Δ⇩d"
unfolding Δ⇩d_def
by (simp add: sum_nonneg)
corollary μ_sq_non_zero[simp]: "0 < μ^2"
by (rule zero_less_power) simp
lemma Y_square_unfold: "(λx. (Y x)^2) = (λx. ∑i ∈ I. ∑j ∈ I. rind (A i ∩ A j) x)"
unfolding fun_eq_iff Y_def X_def
by (auto simp: sum_square product_indicator)
lemma integrable_Y_sq[simp]: "integrable M (λy. (Y y)^2)"
unfolding Y_square_unfold
by (simp add: sets.Int less_top[symmetric])
lemma measurable_Y[measurable]: "Y ∈ borel_measurable M"
unfolding Y_def[abs_def] X_def by simp
lemma expectation_Y_Δ: "expectation (λx. (Y x)^2) = μ + Δ⇩a"
proof -
let ?ei = "λi j. expectation (rind (A i ∩ A j))"
have "expectation (λx. (Y x)^2) = (∑i ∈ I. ∑j ∈ I. ?ei i j)"
unfolding Y_square_unfold by (simp add: less_top[symmetric])
also have "… = (∑i ∈ I. ∑j ∈ I. if i = j then ?ei i j else ?ei i j)"
by simp
also have "… = (∑i ∈ I. (∑j | j ∈ I ∧ i = j. ?ei i j) + (∑j | j ∈ I ∧ i ≠ j. ?ei i j))"
by (simp only: sum_split[OF finite_I])
also have "… = (∑i ∈ I. ∑j | j ∈ I ∧ i = j. ?ei i j) + (∑i ∈ I. ∑j | j ∈ I ∧ i ≠ j. ?ei i j)" (is "_ = ?lhs + ?rhs")
by (fact sum.distrib)
also have "… = μ + Δ⇩a"
proof -
have "?lhs = μ"
proof -
{
fix i
assume i: "i ∈ I"
have "(∑j | j ∈ I ∧ i = j. ?ei i j) = (∑j | j ∈ I ∧ i = j. ?ei i i)"
by simp
also have "… = (∑j | i = j. ?ei i i)"
using i by metis
also have "… = expectation (rind (A i))"
by auto
finally have "(∑j | j ∈ I ∧ i = j. ?ei i j) = …" .
}
hence "?lhs = (∑i∈I. expectation (rind (A i)))"
by force
also have "… = μ"
unfolding expectation_X_Y X_def ..
finally show "?lhs = μ" .
qed
moreover have "?rhs = Δ⇩a"
proof -
{
fix i j
assume "i ∈ I" "j ∈ I"
with A have "A i ∩ A j ∈ events" by blast
hence "?ei i j = prob (A i ∩ A j)"
by (fact expectation_indicator)
}
thus ?thesis
unfolding Δ⇩a_def by simp
qed
ultimately show "?lhs + ?rhs = μ + Δ⇩a"
by simp
qed
finally show ?thesis .
qed
lemma Δ_expectation_X: "Δ⇩a ≤ μ^2 + Δ⇩d"
proof -
let ?p = "λi j. prob (A i ∩ A j)"
let ?p' = "λi j. prob (A i) * prob (A j)"
let ?ie = "λi j. indep (A i) (A j)"
have "Δ⇩a = (∑i ∈ I. ∑j | j ∈ I ∧ i ≠ j. if ?ie i j then ?p i j else ?p i j)"
unfolding Δ⇩a_def by simp
also have "… = (∑i ∈ I. (∑j | j ∈ I ∧ ineq_indep i j. ?p i j) + (∑j | j ∈ I ∧ ineq_dep i j. ?p i j))"
by (simp only: sum_split2[OF finite_I])
also have "… = (∑i ∈ I. ∑j | j ∈ I ∧ ineq_indep i j. ?p i j) + Δ⇩d" (is "_ = ?lhs + _")
unfolding Δ⇩d_def by (fact sum.distrib)
also have "… ≤ μ^2 + Δ⇩d"
proof (rule add_right_mono)
have "(∑i∈I. ∑j | j ∈ I ∧ ineq_indep i j. ?p i j) = (∑i ∈ I. ∑j | j ∈ I ∧ ineq_indep i j. ?p' i j)"
unfolding indep_def by simp
also have "… ≤ (∑i ∈ I. ∑j ∈ I. ?p' i j)"
proof (rule sum_mono)
fix i
assume "i ∈ I"
show "(∑j | j ∈ I ∧ ineq_indep i j. ?p' i j) ≤ (∑j∈I. ?p' i j)"
by (rule sum_upper[OF finite_I]) (simp add: zero_le_mult_iff)
qed
also have "… = (∑i ∈ I. prob (A i))^2"
by (fact sum_square[symmetric])
also have "… = (∑i ∈ I. expectation (X i))^2"
unfolding X_def using expectation_indicator A by simp
also have "… = μ^2"
using expectation_X_Y[symmetric] by simp
finally show "?lhs ≤ μ^2" .
qed
finally show ?thesis .
qed
lemma prob_μ_Δ⇩a: "prob {a ∈ space M. Y a = 0} ≤ 1 / μ + Δ⇩a / μ^2 - 1"
proof -
have "prob {a ∈ space M. Y a = 0} ≤ expectation (λy. (Y y)^2) / μ^2 - 1"
unfolding μ_def by (rule chebyshev_prob_zero) (simp add: μ_def[symmetric])+
also have "… = (μ + Δ⇩a) / μ^2 - 1"
using expectation_Y_Δ by simp
also have "… = 1 / μ + Δ⇩a / μ^2 - 1"
unfolding power2_eq_square by (simp add: field_simps add_divide_distrib)
finally show ?thesis .
qed
lemma prob_μ_Δ⇩d: "prob {a ∈ space M. Y a = 0} ≤ 1/μ + Δ⇩d/μ^2"
proof -
have "prob {a ∈ space M. Y a = 0} ≤ 1/μ + Δ⇩a/μ^2 - 1"
by (fact prob_μ_Δ⇩a)
also have "… = (1/μ - 1) + Δ⇩a/μ^2"
by simp
also have "… ≤ (1/μ - 1) + (μ^2 + Δ⇩d)/μ^2"
using divide_right_mono[OF Δ_expectation_X] by simp
also have "… = 1/μ + Δ⇩d/μ^2"
using μ_sq_non_zero by (simp add: field_simps)
finally show ?thesis .
qed
end
end