# Theory Probability_Ext

```section ‹Probability Spaces›

theory Probability_Ext
imports
"HOL-Probability.Stream_Space"
Universal_Hash_Families.Carter_Wegman_Hash_Family
Frequency_Moments_Preliminary_Results
begin

text ‹Random variables that depend on disjoint sets of the components of a product space are
independent.›

lemma make_ext:
assumes "⋀x. P x = P (restrict x I)"
shows "(∀x ∈ Pi I A. P x) = (∀x ∈ PiE I A. P x)"
using assms by (simp add:PiE_def Pi_def set_eq_iff, force)

lemma PiE_reindex:
assumes "inj_on f I"
shows "PiE I (A ∘ f) = (λa. restrict (a ∘ f) I) ` PiE (f ` I) A" (is "?lhs = ?g ` ?rhs")
proof -
have "?lhs ⊆ ?g` ?rhs"
proof (rule subsetI)
fix x
assume a:"x ∈ Pi⇩E I (A ∘ f)"
define y where y_def: "y = (λk. if k ∈ f ` I then x (the_inv_into I f k) else undefined)"
have b:"y ∈ PiE (f ` I) A"
using a assms the_inv_into_f_eq[OF assms]
by (simp add: y_def PiE_iff extensional_def)
have c: "x = (λa. restrict (a ∘ f) I) y"
using a assms the_inv_into_f_eq extensional_arb
by (intro ext, simp add:y_def PiE_iff, fastforce)
show "x ∈ ?g ` ?rhs" using b c by blast
qed
moreover have "?g ` ?rhs ⊆ ?lhs"
by (rule image_subsetI, simp add:Pi_def PiE_def)
ultimately show ?thesis by blast
qed

context prob_space
begin

lemma indep_sets_reindex:
assumes "inj_on f I"
shows "indep_sets A (f ` I) = indep_sets (λi. A (f i)) I"
proof -
have a:"⋀J g. J ⊆ I ⟹ (∏j ∈ f ` J. g j) = (∏j ∈ J. g (f j))"
by (metis assms prod.reindex_cong subset_inj_on)

have "J ⊆ I ⟹ (Π⇩E i ∈ J. A (f i)) = (λa. restrict (a ∘ f) J) ` PiE (f ` J) A" for J
using assms inj_on_subset
by (subst PiE_reindex[symmetric]) auto

hence b:"⋀P J. J ⊆ I ⟹  (⋀x. P x = P (restrict x J)) ⟹ (∀A' ∈ Π⇩E i ∈ J. A (f i). P A') =  (∀A'∈PiE (f ` J) A. P (A' ∘ f))"
by simp

have c:"⋀J. J ⊆ I ⟹ finite (f ` J) = finite J"
by (meson assms finite_image_iff inj_on_subset)

show ?thesis
by (simp add:indep_sets_def all_subset_image a c)
qed

lemma indep_vars_cong_AE:
assumes "AE x in M. (∀i ∈ I. X' i x = Y' i x)"
assumes "indep_vars M' X' I"
assumes "⋀i. i ∈ I ⟹ random_variable (M' i) (Y' i)"
shows "indep_vars M' Y' I"
proof (cases "I ≠ {}")
case True

have a: "AE x in M. (λi∈I. Y' i x) = (λi∈I. X' i x)"
by (rule AE_mp[OF assms(1)], rule AE_I2, simp cong:restrict_cong)
have b: "⋀i. i ∈ I ⟹ random_variable (M' i) (X' i)"
have c: "⋀x. x ∈ I ⟹ AE xa in M. X' x xa = Y' x xa"
by (rule AE_mp[OF assms(1)], rule AE_I2, simp)

have "distr M (Pi⇩M I M') (λx. λi∈I. Y' i x) = distr M (Pi⇩M I M') (λx. λi∈I. X' i x)"
by (intro distr_cong_AE measurable_restrict a b assms(3)) auto
also have "... =  Pi⇩M I (λi. distr M (M' i) (X' i))"
using assms True b by (subst indep_vars_iff_distr_eq_PiM'[symmetric]) auto
also have "... =  Pi⇩M I (λi. distr M (M' i) (Y' i))"
by (intro PiM_cong distr_cong_AE c assms(3) b) auto
finally have "distr M (Pi⇩M I M') (λx. λi∈I. Y' i x) = Pi⇩M I (λi. distr M (M' i) (Y' i))"
by simp

thus ?thesis
using True assms(3)
by (subst indep_vars_iff_distr_eq_PiM') auto
next
case False
then show ?thesis
qed

lemma indep_vars_reindex:
assumes "inj_on f I"
assumes "indep_vars M' X' (f ` I)"
shows "indep_vars (M' ∘ f) (λk ω. X' (f k) ω) I"
using assms by (simp add:indep_vars_def2 indep_sets_reindex)

lemma variance_divide:
fixes f :: "'a ⇒ real"
assumes "integrable M f"
shows "variance (λω. f ω / r) = variance f / r^2"
using assms
by (subst Bochner_Integration.integral_divide[OF assms(1)])

lemma pmf_mono:
assumes "M = measure_pmf p"
assumes "⋀x. x ∈ P ⟹ x ∈ set_pmf p ⟹ x ∈ Q"
shows "prob P ≤ prob Q"
proof -
have "prob P = prob (P ∩ (set_pmf p))"
by (rule  measure_pmf_eq[OF assms(1)], blast)
also have "... ≤ prob Q"
using assms by (intro finite_measure.finite_measure_mono, auto)
finally show ?thesis by simp
qed

assumes "M = measure_pmf p"
assumes  "⋀x. x ∈ P ⟹ x ∈ set_pmf p ⟹ x ∈ Q ∨ x ∈ R"
shows "prob P ≤ prob Q + prob R"
proof -
have [simp]:"events = UNIV" by (subst assms(1), simp)
have "prob P ≤ prob (Q ∪ R)"
using assms by (intro pmf_mono[OF assms(1)], blast)
also have "... ≤ prob Q + prob R"
finally show ?thesis by simp
qed

assumes "M = measure_pmf p"
assumes "prob {ω. P ω} ≤ r1"
assumes "prob {ω. Q ω} ≤ r2"
shows "prob {ω. P ω ∨ Q ω} ≤ r1 + r2" (is "?lhs ≤ ?rhs")
proof -
have "?lhs ≤ prob {ω. P ω} + prob {ω. Q ω}"
also have "... ≤ ?rhs"
finally show ?thesis
by simp
qed

definition covariance where
"covariance f g = expectation (λω. (f ω - expectation f) * (g ω - expectation g))"

lemma real_prod_integrable:
fixes f g :: "'a ⇒ real"
assumes [measurable]: "f ∈ borel_measurable M" "g ∈ borel_measurable M"
assumes sq_int: "integrable M (λω. f ω^2)" "integrable M (λω. g ω^2)"
shows "integrable M (λω. f ω * g ω)"
unfolding integrable_iff_bounded
proof
have "(∫⇧+ ω. ennreal (norm (f ω * g ω)) ∂M)⇧2 = (∫⇧+ ω. ennreal ¦f ω¦ * ennreal ¦g ω¦ ∂M)⇧2"
also have "... ≤  (∫⇧+ ω. ennreal ¦f ω¦^2 ∂M) * (∫⇧+ ω. ennreal ¦g ω¦^2 ∂M)"
by (rule Cauchy_Schwarz_nn_integral, auto)
also have "... < ∞"
using sq_int by (auto simp: integrable_iff_bounded ennreal_power ennreal_mult_less_top)
finally have "(∫⇧+ x. ennreal (norm (f x * g x)) ∂M)⇧2 < ∞"
by simp
thus "(∫⇧+ x. ennreal (norm (f x * g x)) ∂M) < ∞"
qed auto

lemma covariance_eq:
fixes f :: "'a ⇒ real"
assumes "f ∈ borel_measurable M" "g ∈ borel_measurable M"
assumes "integrable M (λω. f ω^2)" "integrable M (λω. g ω^2)"
shows "covariance f g = expectation (λω. f ω * g ω) - expectation f * expectation g"
proof -
have "integrable M f" using square_integrable_imp_integrable assms by auto
moreover have "integrable M g" using square_integrable_imp_integrable assms by auto
ultimately show ?thesis
using assms real_prod_integrable
qed

lemma covar_integrable:
fixes f g :: "'a ⇒ real"
assumes "f ∈ borel_measurable M" "g ∈ borel_measurable M"
assumes "integrable M (λω. f ω^2)" "integrable M (λω. g ω^2)"
shows "integrable M (λω. (f ω - expectation f) * (g ω - expectation g))"
proof -
have "integrable M f" using square_integrable_imp_integrable assms by auto
moreover have "integrable M g" using square_integrable_imp_integrable assms by auto
ultimately show ?thesis using assms real_prod_integrable by (simp add: algebra_simps)
qed

lemma sum_square_int:
fixes f :: "'b ⇒ 'a ⇒ real"
assumes "finite I"
assumes "⋀i. i ∈ I ⟹ f i ∈ borel_measurable M"
assumes "⋀i. i ∈ I ⟹ integrable M (λω. f i ω^2)"
shows "integrable M (λω. (∑i ∈ I. f i ω)⇧2)"
proof -
have " integrable M (λω. ∑i∈I. ∑j∈I. f j ω * f i ω)"
using assms
by (intro Bochner_Integration.integrable_sum real_prod_integrable, auto)
thus ?thesis
qed

lemma var_sum_1:
fixes f :: "'b ⇒ 'a ⇒ real"
assumes "finite I"
assumes "⋀i. i ∈ I ⟹ f i ∈ borel_measurable M"
assumes "⋀i. i ∈ I ⟹ integrable M (λω. f i ω^2)"
shows
"variance (λω. (∑i ∈ I. f i ω)) = (∑i ∈ I. (∑j ∈ I. covariance (f i) (f j)))"
proof -
have a:"⋀i j. i ∈ I ⟹ j ∈ I ⟹ integrable M (λω. (f i ω - expectation (f i)) * (f j ω - expectation (f j)))"
using assms covar_integrable by simp
have "variance (λω. (∑i ∈ I. f i ω)) = expectation (λω. (∑i∈I. f i ω - expectation (f i))⇧2)"
using square_integrable_imp_integrable[OF assms(2,3)]
also have "... = expectation (λω. (∑i ∈ I. (∑j ∈ I. (f i ω - expectation (f i)) *  (f j ω - expectation (f j)))))"
by (simp add: power2_eq_square sum_distrib_right sum_distrib_left mult.commute)
also have "... = (∑i ∈ I. (∑j ∈ I. covariance (f i) (f j)))"
using a by (simp add: Bochner_Integration.integral_sum covariance_def)
finally show ?thesis by simp
qed

lemma covar_self_eq:
fixes f :: "'a ⇒ real"
shows "covariance f f = variance f"

lemma covar_indep_eq_zero:
fixes f g :: "'a ⇒ real"
assumes "integrable M f"
assumes "integrable M g"
assumes "indep_var borel f borel g"
shows "covariance f g = 0"
proof -
have a:"indep_var borel ((λt. t - expectation f) ∘ f) borel ((λt. t - expectation g) ∘ g)"
by (rule indep_var_compose[OF assms(3)], auto)

have b:"expectation (λω. (f ω - expectation f) * (g ω - expectation g)) = 0"
using a assms by (subst indep_var_lebesgue_integral, auto simp add:comp_def prob_space)

qed

lemma var_sum_2:
fixes f :: "'b ⇒ 'a ⇒ real"
assumes "finite I"
assumes "⋀i. i ∈ I ⟹ f i ∈ borel_measurable M"
assumes "⋀i. i ∈ I ⟹ integrable M (λω. f i ω^2)"
shows "variance (λω. (∑i ∈ I. f i ω)) =
(∑i ∈ I. variance (f i)) + (∑i ∈ I. ∑j ∈ I - {i}. covariance (f i) (f j))"
proof -
have "variance (λω. (∑i ∈ I. f i ω)) = (∑i∈I. ∑j∈I. covariance (f i) (f j))"
also have "... = (∑i∈I. covariance (f i) (f i) + (∑j∈I-{i}. covariance (f i) (f j)))"
using assms by (subst sum.insert[symmetric], auto simp add:insert_absorb)
also have "... = (∑i∈I. variance (f i)) +  (∑i ∈ I. (∑j∈I-{i}. covariance (f i) (f j)))"
finally show ?thesis by simp
qed

lemma var_sum_pairwise_indep:
fixes f :: "'b ⇒ 'a ⇒ real"
assumes "finite I"
assumes "⋀i. i ∈ I ⟹ f i ∈ borel_measurable M"
assumes "⋀i. i ∈ I ⟹ integrable M (λω. f i ω^2)"
assumes "⋀i j. i ∈ I ⟹ j ∈ I ⟹ i ≠ j ⟹ indep_var borel (f i) borel (f j)"
shows "variance (λω. (∑i ∈ I. f i ω)) = (∑i ∈ I. variance (f i))"
proof -
have "⋀i j. i ∈ I ⟹ j ∈ I - {i} ⟹ covariance (f i) (f j) = 0"
using covar_indep_eq_zero assms(4) square_integrable_imp_integrable[OF assms(2,3)] by auto
hence a:"(∑i ∈ I. ∑j ∈ I - {i}. covariance (f i) (f j)) = 0"
by simp
thus ?thesis by (simp add: var_sum_2[OF assms(1,2,3)])
qed

lemma indep_var_from_indep_vars:
assumes "i ≠ j"
assumes "indep_vars (λ_. M') f {i, j}"
shows "indep_var M' (f i) M' (f j)"
proof -
have a:"inj (case_bool i j)" using assms(1)
have b:"range (case_bool i j) = {i,j}"
have c:"indep_vars (λ_. M') f (range (case_bool i j))" using assms(2) b by simp

have "True = indep_vars (λx. M') (λx. f (case_bool i j x)) UNIV"
using indep_vars_reindex[OF a c]
also have "... = indep_vars (λx. case_bool M' M' x) (λx. case_bool (f i) (f j) x) UNIV"
by (rule indep_vars_cong, auto simp:bool.case_distrib bool.case_eq_if)
also have "... = ?thesis"
finally show ?thesis by simp
qed

lemma var_sum_pairwise_indep_2:
fixes f :: "'b ⇒ 'a ⇒ real"
assumes "finite I"
assumes "⋀i. i ∈ I ⟹ f i ∈ borel_measurable M"
assumes "⋀i. i ∈ I ⟹ integrable M (λω. f i ω^2)"
assumes "⋀J. J ⊆ I ⟹ card J = 2 ⟹ indep_vars (λ _. borel) f J"
shows "variance (λω. (∑i ∈ I. f i ω)) = (∑i ∈ I. variance (f i))"
using assms(4)
by (intro var_sum_pairwise_indep[OF assms(1,2,3)] indep_var_from_indep_vars, auto)

lemma var_sum_all_indep:
fixes f :: "'b ⇒ 'a ⇒ real"
assumes "finite I"
assumes "⋀i. i ∈ I ⟹ f i ∈ borel_measurable M"
assumes "⋀i. i ∈ I ⟹ integrable M (λω. f i ω^2)"
assumes "indep_vars (λ _. borel) f I"
shows "variance (λω. (∑i ∈ I. f i ω)) = (∑i ∈ I. variance (f i))"
by (intro var_sum_pairwise_indep_2[OF assms(1,2,3)] indep_vars_subset[OF assms(4)],  auto)

end

end
```