Theory Bienaymes_Identity

section ‹Bienaym\'e's identity›

text ‹Bienaym\'e's identity~\cite[\S 17]{loeve1977} can be used to deduce the variance of a sum of
random variables, if their co-variance is known. A common use-case of the identity is the
computation of the variance of the mean of pair-wise independent variables.›

theory Bienaymes_Identity
  imports Concentration_Inequalities_Preliminary
begin

context prob_space
begin

lemma variance_divide:
  fixes f :: "'a  real"
  assumes "integrable M f"
  shows "variance (λω. f ω / r) = variance f / r^2"
  using assms
  by (subst Bochner_Integration.integral_divide[OF assms(1)])
    (simp add:diff_divide_distrib[symmetric] power2_eq_square algebra_simps)

definition covariance where
  "covariance f g = expectation (λω. (f ω - expectation f) * (g ω - expectation g))"

lemma covariance_eq:
  fixes f :: "'a  real"
  assumes "f  borel_measurable M" "g  borel_measurable M"
  assumes "integrable M (λω. f ω^2)" "integrable M (λω. g ω^2)"
  shows "covariance f g = expectation (λω. f ω * g ω) - expectation f * expectation g"
proof -
  have "integrable M f" using square_integrable_imp_integrable assms by auto
  moreover have "integrable M g" using square_integrable_imp_integrable assms by auto
  ultimately show ?thesis
    using assms cauchy_schwartz(1)[where M="M"]
    by (simp add:covariance_def algebra_simps prob_space)
qed

lemma covar_integrable:
  fixes f g :: "'a  real"
  assumes "f  borel_measurable M" "g  borel_measurable M"
  assumes "integrable M (λω. f ω^2)" "integrable M (λω. g ω^2)"
  shows "integrable M (λω. (f ω - expectation f) * (g ω - expectation g))"
proof -
  have "integrable M f" using square_integrable_imp_integrable assms by auto
  moreover have "integrable M g" using square_integrable_imp_integrable assms by auto
  ultimately show ?thesis using assms cauchy_schwartz(1)[where M="M"] by (simp add: algebra_simps)
qed

lemma sum_square_int:
  fixes f :: "'b  'a  real"
  assumes "finite I"
  assumes "i. i  I  f i  borel_measurable M"
  assumes "i. i  I  integrable M (λω. f i ω^2)"
  shows "integrable M (λω. (i  I. f i ω)2)"
proof -
  have " integrable M (λω. iI. jI. f j ω * f i ω)"
    using assms
    by (intro Bochner_Integration.integrable_sum cauchy_schwartz(1)[where M="M"], auto)
  thus ?thesis
    by (simp add:power2_eq_square sum_distrib_left sum_distrib_right)
qed

theorem bienaymes_identity:
  fixes f :: "'b  'a  real"
  assumes "finite I"
  assumes "i. i  I  f i  borel_measurable M"
  assumes "i. i  I  integrable M (λω. f i ω^2)"
  shows
    "variance (λω. (i  I. f i ω)) = (i  I. (j  I. covariance (f i) (f j)))"
proof -
  have a:"i j. i  I  j  I 
    integrable M (λω. (f i ω - expectation (f i)) * (f j ω - expectation (f j)))"
    using assms covar_integrable by simp
  have "variance (λω. (i  I. f i ω)) = expectation (λω. (iI. f i ω - expectation (f i))2)"
    using square_integrable_imp_integrable[OF assms(2,3)]
    by (simp add: Bochner_Integration.integral_sum  sum_subtractf)
  also have "... = expectation (λω. (i  I. (j  I.
    (f i ω - expectation (f i)) *  (f j ω - expectation (f j)))))"
    by (simp add: power2_eq_square sum_distrib_right sum_distrib_left mult.commute)
  also have "... = (i  I. (j  I. covariance (f i) (f j)))"
    using a by (simp add: Bochner_Integration.integral_sum covariance_def)
  finally show ?thesis by simp
qed

lemma covar_self_eq:
  fixes f :: "'a  real"
  shows "covariance f f = variance f"
  by (simp add:covariance_def power2_eq_square)

lemma covar_indep_eq_zero:
  fixes f g :: "'a  real"
  assumes "integrable M f"
  assumes "integrable M g"
  assumes "indep_var borel f borel g"
  shows "covariance f g = 0"
proof -
  have a:"indep_var borel ((λt. t - expectation f)  f) borel ((λt. t - expectation g)  g)"
    by (rule indep_var_compose[OF assms(3)], auto)

  have b:"expectation (λω. (f ω - expectation f) * (g ω - expectation g)) = 0"
    using a assms by (subst indep_var_lebesgue_integral, auto simp add:comp_def prob_space)

  thus ?thesis by (simp add:covariance_def)
qed

lemma bienaymes_identity_2:
  fixes f :: "'b  'a  real"
  assumes "finite I"
  assumes "i. i  I  f i  borel_measurable M"
  assumes "i. i  I  integrable M (λω. f i ω^2)"
  shows "variance (λω. (i  I. f i ω)) =
      (i  I. variance (f i)) + (i  I. j  I - {i}. covariance (f i) (f j))"
proof -
  have "variance (λω. (i  I. f i ω)) = (iI. jI. covariance (f i) (f j))"
    by (simp add: bienaymes_identity[OF assms(1,2,3)])
  also have "... = (iI. covariance (f i) (f i) + (jI-{i}. covariance (f i) (f j)))"
    using assms by (subst sum.insert[symmetric], auto simp add:insert_absorb)
  also have "... = (iI. variance (f i)) +  (i  I. (jI-{i}. covariance (f i) (f j)))"
    by (simp add: covar_self_eq[symmetric] sum.distrib)
  finally show ?thesis by simp
qed

theorem bienaymes_identity_pairwise_indep:
  fixes f :: "'b  'a  real"
  assumes "finite I"
  assumes "i. i  I  f i  borel_measurable M"
  assumes "i. i  I  integrable M (λω. f i ω^2)"
  assumes "i j. i  I  j  I  i  j  indep_var borel (f i) borel (f j)"
  shows "variance (λω. (i  I. f i ω)) = (i  I. variance (f i))"
proof -
  have "i j. i  I  j  I - {i}  covariance (f i) (f j) = 0"
    using covar_indep_eq_zero assms(4) square_integrable_imp_integrable[OF assms(2,3)] by auto
  hence a:"(i  I. j  I - {i}. covariance (f i) (f j)) = 0"
    by simp
  thus ?thesis by (simp add: bienaymes_identity_2[OF assms(1,2,3)])
qed

lemma indep_var_from_indep_vars:
  assumes "i  j"
  assumes "indep_vars (λ_. M') f {i, j}"
  shows "indep_var M' (f i) M' (f j)"
proof -
  have a:"inj (case_bool i j)" using assms(1)
    by (simp add: bool.case_eq_if inj_def)
  have b:"range (case_bool i j) = {i,j}"
    by (simp add: UNIV_bool insert_commute)
  have c:"indep_vars (λ_. M') f (range (case_bool i j))" using assms(2) b by simp

  have "True = indep_vars (λx. M') (λx. f (case_bool i j x)) UNIV"
    using indep_vars_reindex[OF a c]
    by (simp add:comp_def)
  also have "... = indep_vars (λx. case_bool M' M' x) (λx. case_bool (f i) (f j) x) UNIV"
    by (rule indep_vars_cong, auto simp:bool.case_distrib bool.case_eq_if)
  also have "... = ?thesis"
    by (simp add: indep_var_def)
  finally show ?thesis by simp
qed

lemma bienaymes_identity_pairwise_indep_2:
  fixes f :: "'b  'a  real"
  assumes "finite I"
  assumes "i. i  I  f i  borel_measurable M"
  assumes "i. i  I  integrable M (λω. f i ω^2)"
  assumes "J. J  I  card J = 2  indep_vars (λ _. borel) f J"
  shows "variance (λω. (i  I. f i ω)) = (i  I. variance (f i))"
  using assms(4)
  by (intro bienaymes_identity_pairwise_indep[OF assms(1,2,3)] indep_var_from_indep_vars, auto)

lemma bienaymes_identity_full_indep:
  fixes f :: "'b  'a  real"
  assumes "finite I"
  assumes "i. i  I  f i  borel_measurable M"
  assumes "i. i  I  integrable M (λω. f i ω^2)"
  assumes "indep_vars (λ _. borel) f I"
  shows "variance (λω. (i  I. f i ω)) = (i  I. variance (f i))"
  by (intro bienaymes_identity_pairwise_indep_2[OF assms(1,2,3)] indep_vars_subset[OF assms(4)])
    auto

end

end