Theory Prob_Lemmas

theory Prob_Lemmas
imports
  "HOL-Probability.Probability"
  Girth_Chromatic.Girth_Chromatic
  Ugraph_Misc
begin

section‹Lemmas about probabilities›

text‹In this section, auxiliary lemmas for computing bounds on expectation and probabilites
of random variables are set up.›

subsection‹Indicator variables and valid probability values›

abbreviation rind :: "'a set ⇒ 'a ⇒ real" where
"rind ≡ indicator"

lemma product_indicator:
  "rind A x * rind B x = rind (A ∩ B) x"
unfolding indicator_def
by auto

text‹We call a real number `valid' iff it is in the range 0 to 1, inclusively, and additionally
`nonzero' iff it is neither 0 nor 1.›

abbreviation "valid_prob (p :: real) ≡ 0 ≤ p ∧ p ≤ 1"
abbreviation "nonzero_prob (p :: real) ≡ 0 < p ∧ p < 1"

text‹A function @{typ "'a ⇒ real"} is a `valid probability function' iff each value in the image
is valid, and similarly for `nonzero'.›

abbreviation "valid_prob_fun f ≡ (∀n. valid_prob (f n))"
abbreviation "nonzero_prob_fun f ≡ (∀n. nonzero_prob (f n))"

lemma nonzero_fun_is_valid_fun: "nonzero_prob_fun f ⟹ valid_prob_fun f"
by (simp add: less_imp_le)

subsection‹Expectation and variance›

context prob_space
begin

text‹Note that there is already a notion of independent sets (see @{term indep_set}), but we use
the following -- simpler -- definition:›

definition "indep A B ⟷ prob (A ∩ B) = prob A * prob B"

text‹The probability of an indicator variable is equal to its expectation:›

lemma expectation_indicator:
  "A ∈ events ⟹ expectation (rind A) = prob A"
  by simp

text‹For a non-negative random variable @{term X}, the Markov inequality gives the following
upper bound: \[ \Pr[X \ge a] \le \frac{\Ex[X]}{a} \]›

lemma markov_inequality:
  assumes "⋀a. 0 ≤ X a" and "integrable M X" "0 < t"
  shows "prob {a ∈ space M. t ≤ X a} ≤ expectation X / t"
proof -
  ― ‹proof adapted from @{thm [source] edge_space.Markov_inequality}, but generalized to arbitrary
       @{term prob_space}s›
  have "(∫⇧⁺ x. ennreal (X x) ∂M) = (∫x. X x ∂M)"
    using assms by (intro nn_integral_eq_integral) auto
  thus ?thesis
    using assms nn_integral_Markov_inequality[of X "space M" M "1 / t"]
    by (auto cong: nn_integral_cong simp: emeasure_eq_measure ennreal_mult[symmetric])
qed

text‹$\Var[X] = \Ex[X^2] - \Ex[X]^2 $›

lemma variance_expectation:
  fixes X :: "'a ⇒ real"
  assumes "integrable M (λx. (X x)^2)" and "X ∈ borel_measurable M"
  shows
    "integrable M (λx. (X x - expectation X)^2)" (is ?integrable)
    "variance X = expectation (λx. (X x)^2) - (expectation X)^2" (is ?variance)
proof -
  have int: "integrable M X"
    using integrable_squareD[OF assms] by simp

  have "(λx. (X x - expectation X)^2) = (λx. (X x)^2 + (expectation X)^2 - (2 * X x * expectation X))"
    by (simp only: power2_diff)
  hence
    "variance X = expectation (λx. (X x)^2) + (expectation X)^2 + expectation (λx. - (2 * X x * expectation X))"
    ?integrable
    using integral_add by (simp add: int assms prob_space)+

  thus ?variance ?integrable
    by (simp add: int power2_eq_square)+
qed

text‹A corollary from the Markov inequality is Chebyshev's inequality, which gives an upper
bound for the deviation of a random variable from its expectation:
\[ \Pr[\left| Y - \Ex[Y] \right| \ge s] \le \frac{\Var[X]}{a^2} \]›

lemma chebyshev_inequality:
  fixes Y :: "'a ⇒ real"
  assumes Y_int: "integrable M (λy. (Y y)^2)"
  assumes Y_borel: "Y ∈ borel_measurable M"
  fixes s :: "real"
  assumes s_pos: "0 < s"
  shows "prob {a ∈ space M. s ≤ ¦Y a - expectation Y¦} ≤ variance Y / s^2"
proof -
  let ?X = "λa. (Y a - expectation Y)^2"
  let ?t = "s^2"

  have "0 < ?t"
    using s_pos by simp
  hence "prob {a ∈ space M. ?t ≤ ?X a} ≤ variance Y / s^2"
    using markov_inequality variance_expectation[OF Y_int Y_borel] by (simp add: field_simps)
  moreover have "{a ∈ space M. ?t ≤ ?X a} = {a ∈ space M. s ≤ ¦Y a - expectation Y¦}"
    using abs_le_square_iff s_pos by force
  ultimately show ?thesis
    by simp
qed

text‹Hence, we can derive an upper bound for the probability that a random variable is $0$.›

corollary chebyshev_prob_zero:
  fixes Y :: "'a ⇒ real"
  assumes Y_int: "integrable M (λy. (Y y)^2)"
  assumes Y_borel: "Y ∈ borel_measurable M"
  assumes μ_pos: "expectation Y > 0"
  shows "prob {a ∈ space M. Y a = 0} ≤ expectation (λy. (Y y)^2) / (expectation Y)^2 - 1"
proof -
  let ?s = "expectation Y"

  have "prob {a ∈ space M. Y a = 0} ≤ prob {a ∈ space M. ?s ≤ ¦Y a - ?s¦}"
    using Y_borel by (auto intro!: finite_measure_mono borel_measurable_diff borel_measurable_abs borel_measurable_le)
  also have "… ≤ variance Y / ?s^2"
    using assms by (fact chebyshev_inequality)
  also have "… = (expectation (λy. (Y y)^2) - ?s^2) / ?s^2"
    using Y_int Y_borel by (simp add: variance_expectation)
  also have "… = expectation (λy. (Y y)^2) / ?s^2 - 1"
    using μ_pos by (simp add: field_simps)
  finally show ?thesis .
qed

end

subsection‹Sets of indicator variables›

text‹\label{sec:delta}
This section introduces some inequalities about expectation and other values related to the sum of
a set of random indicators.›

locale prob_space_with_indicators = prob_space +
  fixes I :: "'i set"
  assumes finite_I: "finite I"

  fixes A :: "'i ⇒ 'a set"
  assumes A: "A ` I ⊆ events"

  assumes prob_non_zero: "∃i ∈ I. 0 < prob (A i)"
begin

text‹We call the underlying sets @{term "A i"} for each @{term "i ∈ I"}, and the corresponding
indicator variables @{term "X i"}. The sum is denoted by @{term Y}, and its expectation by
@{term μ}.›

definition "X i = rind (A i)"
definition "Y x = (∑i ∈ I. X i x)"

definition "μ = expectation Y"

text‹In the lecture notes, the following two relations are called $\sim$ and $\nsim$,
respectively. Note that they are not the opposite of each other.›

abbreviation ineq_indep :: "'i ⇒ 'i ⇒ bool" where
"ineq_indep i j ≡ (i ≠ j ∧ indep (A i) (A j))"

abbreviation ineq_dep :: "'i ⇒ 'i ⇒ bool" where
"ineq_dep i j ≡ (i ≠ j ∧ ¬indep (A i) (A j))"

definition "Δ⇩_a = (∑i ∈ I. ∑j | j ∈ I ∧ i ≠ j. prob (A i ∩ A j))"
definition "Δ⇩_d = (∑i ∈ I. ∑j | j ∈ I ∧ ineq_dep i j. prob (A i ∩ A j))"

lemma Δ_zero:
  assumes "⋀i j. i ∈ I ⟹ j ∈ I ⟹ i ≠ j ⟹ indep (A i) (A j)"
  shows "Δ⇩_d = 0"
proof -
  {
    fix i
    assume "i ∈ I"
    hence "{j. j ∈ I ∧ ineq_dep i j} = {}"
      using assms by auto
    hence "(∑j | j ∈ I ∧ ineq_dep i j. prob (A i ∩ A j)) = 0"
      using sum.empty by metis
  }
  hence "Δ⇩_d = (0 :: real) * card I"
    unfolding Δ⇩_d_def by simp
  thus ?thesis
    by simp
qed

lemma A_events[measurable]: "i ∈ I ⟹ A i ∈ events"
using A by auto

lemma expectation_X_Y: "μ = (∑i∈I. expectation (X i))"
unfolding μ_def Y_def[abs_def] X_def
by (simp add: less_top[symmetric])

lemma expectation_X_non_zero: "∃i ∈ I. 0 < expectation (X i)"
unfolding X_def using prob_non_zero expectation_indicator by simp

corollary μ_non_zero[simp]: "0 < μ"
unfolding expectation_X_Y
using expectation_X_non_zero
by (auto intro!: sum_lower finite_I
         simp add: expectation_indicator X_def)

lemma Δ⇩_d_nonneg: "0 ≤ Δ⇩_d"
unfolding Δ⇩_d_def
by (simp add: sum_nonneg)

corollary μ_sq_non_zero[simp]: "0 < μ^2"
by (rule zero_less_power) simp

lemma Y_square_unfold: "(λx. (Y x)^2) = (λx. ∑i ∈ I. ∑j ∈ I. rind (A i ∩ A j) x)"
unfolding fun_eq_iff Y_def X_def
by (auto simp: sum_square product_indicator)

lemma integrable_Y_sq[simp]: "integrable M (λy. (Y y)^2)"
unfolding Y_square_unfold
by (simp add: sets.Int less_top[symmetric])

lemma measurable_Y[measurable]: "Y ∈ borel_measurable M"
unfolding Y_def[abs_def] X_def by simp

lemma expectation_Y_Δ: "expectation (λx. (Y x)^2) = μ + Δ⇩_a"
proof -
  let ?ei = "λi j. expectation (rind (A i ∩ A j))"

  have "expectation (λx. (Y x)^2) = (∑i ∈ I. ∑j ∈ I. ?ei i j)"
    unfolding Y_square_unfold by (simp add: less_top[symmetric])
  also have "… = (∑i ∈ I. ∑j ∈ I. if i = j then ?ei i j else ?ei i j)"
    by simp
  also have "… = (∑i ∈ I. (∑j | j ∈ I ∧ i = j. ?ei i j) + (∑j | j ∈ I ∧ i ≠ j. ?ei i j))"
    by (simp only: sum_split[OF finite_I])
  also have "… = (∑i ∈ I. ∑j | j ∈ I ∧ i = j. ?ei i j) + (∑i ∈ I. ∑j | j ∈ I ∧ i ≠ j. ?ei i j)" (is "_ = ?lhs + ?rhs")
    by (fact sum.distrib)
  also have "… =  μ + Δ⇩_a"
    proof -
      have "?lhs = μ"
        proof -
          {
            fix i
            assume i: "i ∈ I"
            have "(∑j | j ∈ I ∧ i = j. ?ei i j) = (∑j | j ∈ I ∧ i = j. ?ei i i)"
              by simp
            also have "… = (∑j | i = j. ?ei i i)"
              using i by metis
            also have "… = expectation (rind (A i))"
              by auto
            finally have "(∑j | j ∈ I ∧ i = j. ?ei i j) = …" .
          }
          hence "?lhs = (∑i∈I. expectation (rind (A i)))"
            by force
          also have "… = μ"
            unfolding expectation_X_Y X_def ..
          finally show "?lhs = μ" .
        qed
      moreover have "?rhs = Δ⇩_a"
        proof -
          {
            fix i j
            assume "i ∈ I" "j ∈ I"
            with A have "A i ∩ A j ∈ events" by blast
            hence "?ei i j = prob (A i ∩ A j)"
              by (fact expectation_indicator)
          }
          thus ?thesis
            unfolding Δ⇩_a_def by simp
        qed
      ultimately show "?lhs + ?rhs = μ + Δ⇩_a"
        by simp
    qed
  finally show ?thesis .
qed

lemma Δ_expectation_X: "Δ⇩_a ≤ μ^2 + Δ⇩_d"
proof -
  let ?p = "λi j. prob (A i ∩ A j)"
  let ?p' = "λi j. prob (A i) * prob (A j)"
  let ?ie = "λi j. indep (A i) (A j)"

  have "Δ⇩_a = (∑i ∈ I. ∑j | j ∈ I ∧ i ≠ j. if ?ie i j then ?p i j else ?p i j)"
    unfolding Δ⇩_a_def by simp
  also have "… = (∑i ∈ I. (∑j | j ∈ I ∧ ineq_indep i j. ?p i j) + (∑j | j ∈ I ∧ ineq_dep i j. ?p i j))"
    by (simp only: sum_split2[OF finite_I])
  also have "… = (∑i ∈ I. ∑j | j ∈ I ∧ ineq_indep i j. ?p i j) + Δ⇩_d" (is "_ = ?lhs + _")
    unfolding Δ⇩_d_def by (fact sum.distrib)
  also have "… ≤ μ^2 + Δ⇩_d"
    proof (rule add_right_mono)
      have "(∑i∈I. ∑j | j ∈ I ∧ ineq_indep i j. ?p i j) = (∑i ∈ I. ∑j | j ∈ I ∧ ineq_indep i j. ?p' i j)"
        unfolding indep_def by simp
      also have "… ≤ (∑i ∈ I. ∑j ∈ I. ?p' i j)"
        proof (rule sum_mono)
          fix i
          assume "i ∈ I"
          show "(∑j | j ∈ I ∧ ineq_indep i j. ?p' i j) ≤ (∑j∈I. ?p' i j)"
            by (rule sum_upper[OF finite_I]) (simp add: zero_le_mult_iff)
        qed
      also have "… = (∑i ∈ I. prob (A i))^2"
        by (fact sum_square[symmetric])
      also have "… = (∑i ∈ I. expectation (X i))^2"
        unfolding X_def using expectation_indicator A by simp
      also have "… = μ^2"
        using expectation_X_Y[symmetric] by simp
      finally show "?lhs ≤ μ^2" .
    qed
  finally show ?thesis .
qed

lemma prob_μ_Δ⇩_a: "prob {a ∈ space M. Y a = 0} ≤ 1 / μ + Δ⇩_a / μ^2 - 1"
proof -
  have "prob {a ∈ space M. Y a = 0} ≤ expectation (λy. (Y y)^2) / μ^2 - 1"
    unfolding μ_def by (rule chebyshev_prob_zero) (simp add: μ_def[symmetric])+
  also have "… = (μ + Δ⇩_a) / μ^2 - 1"
    using expectation_Y_Δ by simp
  also have "… = 1 / μ + Δ⇩_a / μ^2 - 1"
    unfolding power2_eq_square by (simp add: field_simps add_divide_distrib)
  finally show ?thesis .
qed

lemma prob_μ_Δ⇩_d: "prob {a ∈ space M. Y a = 0} ≤ 1/μ + Δ⇩_d/μ^2"
proof -
  have "prob {a ∈ space M. Y a = 0} ≤ 1/μ + Δ⇩_a/μ^2 - 1"
    by (fact prob_μ_Δ⇩_a)
  also have "… = (1/μ - 1) + Δ⇩_a/μ^2"
    by simp
  also have "… ≤ (1/μ - 1) + (μ^2 + Δ⇩_d)/μ^2"
    using divide_right_mono[OF Δ_expectation_X] by simp
  also have "… = 1/μ + Δ⇩_d/μ^2"
    using μ_sq_non_zero by (simp add: field_simps)
  finally show ?thesis .
qed

end

end