Theory Distribution_Functions

(*  Title:    HOL/Probability/Distribution_Functions.thy
    Authors:  Jeremy Avigad (CMU) and Luke Serafin (CMU)
*)

section ‹Distribution Functions›

text ‹
Shows that the cumulative distribution function (cdf) of a distribution (a measure on the reals) is
nondecreasing and right continuous, which tends to 0 and 1 in either direction.

Conversely, every such function is the cdf of a unique distribution. This direction defines the
measure in the obvious way on half-open intervals, and then applies the Caratheodory extension
theorem.
›

(* TODO: the locales "finite_borel_measure" and "real_distribution" are defined here, but maybe they
 should be somewhere else. *)

theory Distribution_Functions
  imports Probability_Measure
begin

lemma UN_Ioc_eq_UNIV: "(n. { -real n <.. real n}) = UNIV"
  by auto
     (metis le_less_trans minus_minus neg_less_iff_less not_le real_arch_simple
            of_nat_0_le_iff reals_Archimedean2)

subsection ‹Properties of cdf's›

definition
  cdf :: "real measure  real  real"
where
  "cdf M  λx. measure M {..x}"

lemma cdf_def2: "cdf M x = measure M {..x}"
  by (simp add: cdf_def)

locale finite_borel_measure = finite_measure M for M :: "real measure" +
  assumes M_is_borel: "sets M = sets borel"
begin

lemma sets_M[intro]: "a  sets borel  a  sets M"
  using M_is_borel by auto

lemma cdf_diff_eq:
  assumes "x < y"
  shows "cdf M y - cdf M x = measure M {x<..y}"
proof -
  from assms have *: "{..x}  {x<..y} = {..y}" by auto
  have "measure M {..y} = measure M {..x} + measure M {x<..y}"
    by (subst finite_measure_Union [symmetric], auto simp add: *)
  thus ?thesis
    unfolding cdf_def by auto
qed

lemma cdf_nondecreasing: "x  y  cdf M x  cdf M y"
  unfolding cdf_def by (auto intro!: finite_measure_mono)

lemma borel_UNIV: "space M = UNIV"
 by (metis in_mono sets.sets_into_space space_in_borel top_le M_is_borel)

lemma cdf_nonneg: "cdf M x  0"
  unfolding cdf_def by (rule measure_nonneg)

lemma cdf_bounded: "cdf M x  measure M (space M)"
  unfolding cdf_def by (intro bounded_measure)

lemma cdf_lim_infty:
  "((λi. cdf M (real i))  measure M (space M))"
proof -
  have "(λi. cdf M (real i))  measure M ( i::nat. {..real i})"
    unfolding cdf_def by (rule finite_Lim_measure_incseq) (auto simp: incseq_def)
  also have "( i::nat. {..real i}) = space M"
    by (auto simp: borel_UNIV intro: real_arch_simple)
  finally show ?thesis .
qed

lemma cdf_lim_at_top: "(cdf M  measure M (space M)) at_top"
  by (rule tendsto_at_topI_sequentially_real)
     (simp_all add: mono_def cdf_nondecreasing cdf_lim_infty)

lemma cdf_lim_neg_infty: "((λi. cdf M (- real i))  0)"
proof -
  have "(λi. cdf M (- real i))  measure M ( i::nat. {.. - real i })"
    unfolding cdf_def by (rule finite_Lim_measure_decseq) (auto simp: decseq_def)
  also have "( i::nat. {..- real i}) = {}"
    by auto (metis leD le_minus_iff reals_Archimedean2)
  finally show ?thesis
    by simp
qed

lemma cdf_lim_at_bot: "(cdf M  0) at_bot"
proof -
  have *: "((λx :: real. - cdf M (- x))  0) at_top"
    by (intro tendsto_at_topI_sequentially_real monoI)
       (auto simp: cdf_nondecreasing cdf_lim_neg_infty tendsto_minus_cancel_left[symmetric])
  from filterlim_compose [OF *, OF filterlim_uminus_at_top_at_bot]
  show ?thesis
    unfolding tendsto_minus_cancel_left[symmetric] by simp
qed

lemma cdf_is_right_cont: "continuous (at_right a) (cdf M)"
  unfolding continuous_within
proof (rule tendsto_at_right_sequentially[where b="a + 1"])
  fix f :: "nat  real" and x assume f: "decseq f" "f  a"
  then have "(λn. cdf M (f n))  measure M (i. {.. f i})"
    using decseq f unfolding cdf_def
    by (intro finite_Lim_measure_decseq) (auto simp: decseq_def)
  also have "(i. {.. f i}) = {.. a}"
    using decseq_ge[OF f] by (auto intro: order_trans LIMSEQ_le_const[OF f(2)])
  finally show "(λn. cdf M (f n))  cdf M a"
    by (simp add: cdf_def)
qed simp

lemma cdf_at_left: "(cdf M  measure M {..<a}) (at_left a)"
proof (rule tendsto_at_left_sequentially[of "a - 1"])
  fix f :: "nat  real" and x assume f: "incseq f" "f  a" "x. f x < a" "x. a - 1 < f x"
  then have "(λn. cdf M (f n))  measure M (i. {.. f i})"
    using incseq f unfolding cdf_def
    by (intro finite_Lim_measure_incseq) (auto simp: incseq_def)
  also have "(i. {.. f i}) = {..<a}"
    by (auto dest!: order_tendstoD(1)[OF f(2)] eventually_happens'[OF sequentially_bot]
             intro: less_imp_le le_less_trans f(3))
  finally show "(λn. cdf M (f n))  measure M {..<a}"
    by (simp add: cdf_def)
qed auto

lemma isCont_cdf: "isCont (cdf M) x  measure M {x} = 0"
proof -
  have "isCont (cdf M) x  cdf M x = measure M {..<x}"
    by (auto simp: continuous_at_split cdf_is_right_cont continuous_within[where s="{..< _}"]
                   cdf_at_left tendsto_unique[OF _ cdf_at_left])
  also have "cdf M x = measure M {..<x}  measure M {x} = 0"
    unfolding cdf_def ivl_disj_un(2)[symmetric]
    by (subst finite_measure_Union) auto
  finally show ?thesis .
qed

lemma countable_atoms: "countable {x. measure M {x} > 0}"
  using countable_support unfolding zero_less_measure_iff .

end

locale real_distribution = prob_space M for M :: "real measure" +
  assumes events_eq_borel [simp, measurable_cong]: "sets M = sets borel"
begin

lemma finite_borel_measure_M: "finite_borel_measure M"
  by standard auto

sublocale finite_borel_measure M
  by (rule finite_borel_measure_M)

lemma space_eq_univ [simp]: "space M = UNIV"
  using events_eq_borel[THEN sets_eq_imp_space_eq] by simp

lemma cdf_bounded_prob: "x. cdf M x  1"
  by (subst prob_space [symmetric], rule cdf_bounded)

lemma cdf_lim_infty_prob: "(λi. cdf M (real i))  1"
  by (subst prob_space [symmetric], rule cdf_lim_infty)

lemma cdf_lim_at_top_prob: "(cdf M  1) at_top"
  by (subst prob_space [symmetric], rule cdf_lim_at_top)

lemma measurable_finite_borel [simp]:
  "f  borel_measurable borel  f  borel_measurable M"
  by (rule borel_measurable_subalgebra[where N=borel]) auto

end

lemma (in prob_space) real_distribution_distr [intro, simp]:
  "random_variable borel X  real_distribution (distr M borel X)"
  unfolding real_distribution_def real_distribution_axioms_def by (auto intro!: prob_space_distr)

subsection ‹Uniqueness›

lemma (in finite_borel_measure) emeasure_Ioc:
  assumes "a  b" shows "emeasure M {a <.. b} = cdf M b - cdf M a"
proof -
  have "{a <.. b} = {..b} - {..a}"
    by auto
  moreover have "{..x}  sets M" for x
    using atMost_borel[of x] M_is_borel by auto
  moreover note a  b
  ultimately show ?thesis
    by (simp add: emeasure_eq_measure finite_measure_Diff cdf_def)
qed

lemma cdf_unique':
  fixes M1 M2
  assumes "finite_borel_measure M1" and "finite_borel_measure M2"
  assumes "cdf M1 = cdf M2"
  shows "M1 = M2"
proof (rule measure_eqI_generator_eq[where Ω=UNIV])
  fix X assume "X  range (λ(a, b). {a<..b::real})"
  then obtain a b where Xeq: "X = {a<..b}" by auto
  then show "emeasure M1 X = emeasure M2 X"
    by (cases "a  b")
       (simp_all add: assms(1,2)[THEN finite_borel_measure.emeasure_Ioc] assms(3))
next
  show "(i. {- real (i::nat)<..real i}) = UNIV"
    by (rule UN_Ioc_eq_UNIV)
qed (auto simp: finite_borel_measure.emeasure_Ioc[OF assms(1)]
  assms(1,2)[THEN finite_borel_measure.M_is_borel] borel_sigma_sets_Ioc
  Int_stable_def)

lemma cdf_unique:
  "real_distribution M1  real_distribution M2  cdf M1 = cdf M2  M1 = M2"
  using cdf_unique'[of M1 M2] by (simp add: real_distribution.finite_borel_measure_M)

lemma
  fixes F :: "real  real"
  assumes nondecF : " x y. x  y  F x  F y"
    and right_cont_F : "a. continuous (at_right a) F"
    and lim_F_at_bot : "(F  0) at_bot"
    and lim_F_at_top : "(F  m) at_top"
    and m: "0  m"
  shows interval_measure_UNIV: "emeasure (interval_measure F) UNIV = m"
    and finite_borel_measure_interval_measure: "finite_borel_measure (interval_measure F)"
proof -
  let ?F = "interval_measure F"
  { have "ennreal (m - 0) = (SUP i. ennreal (F (real i) - F (- real i)))"
      by (intro LIMSEQ_unique[OF _ LIMSEQ_SUP] tendsto_ennrealI tendsto_intros
                lim_F_at_bot[THEN filterlim_compose] lim_F_at_top[THEN filterlim_compose]
                lim_F_at_bot[THEN filterlim_compose] filterlim_real_sequentially
                filterlim_uminus_at_top[THEN iffD1])
         (auto simp: incseq_def nondecF intro!: diff_mono)
    also have " = (SUP i. emeasure ?F {- real i<..real i})"
      by (subst emeasure_interval_measure_Ioc) (simp_all add: nondecF right_cont_F)
    also have " = emeasure ?F (i::nat. {- real i<..real i})"
      by (rule SUP_emeasure_incseq) (auto simp: incseq_def)
    also have "(i. {- real (i::nat)<..real i}) = space ?F"
      by (simp add: UN_Ioc_eq_UNIV)
    finally have "emeasure ?F (space ?F) = m"
      by simp }
  note * = this
  then show "emeasure (interval_measure F) UNIV = m"
    by simp

  interpret finite_measure ?F
  proof
    show "emeasure ?F (space ?F)  "
      using * by simp
  qed
  show "finite_borel_measure (interval_measure F)"
    proof qed simp_all
qed

lemma real_distribution_interval_measure:
  fixes F :: "real  real"
  assumes nondecF : " x y. x  y  F x  F y" and
    right_cont_F : "a. continuous (at_right a) F" and
    lim_F_at_bot : "(F  0) at_bot" and
    lim_F_at_top : "(F  1) at_top"
  shows "real_distribution (interval_measure F)"
proof -
  let ?F = "interval_measure F"
  interpret prob_space ?F
    proof qed (use interval_measure_UNIV[OF assms] in simp)
  show ?thesis
    proof qed simp_all
qed

lemma
  fixes F :: "real  real"
  assumes nondecF : " x y. x  y  F x  F y" and
    right_cont_F : "a. continuous (at_right a) F" and
    lim_F_at_bot : "(F  0) at_bot"
  shows emeasure_interval_measure_Iic: "emeasure (interval_measure F) {.. x} = F x"
    and measure_interval_measure_Iic: "measure (interval_measure F) {.. x} = F x"
  unfolding cdf_def
proof -
  have F_nonneg[simp]: "0  F y" for y
    using lim_F_at_bot by (rule tendsto_upperbound) (auto simp: eventually_at_bot_linorder nondecF intro!: exI[of _ y])

  have "emeasure (interval_measure F) (i::nat. {-real i <.. x}) = F x - ennreal 0"
  proof (intro LIMSEQ_unique[OF Lim_emeasure_incseq])
    have "(λi. F x - F (- real i))  F x - 0"
      by (intro tendsto_intros lim_F_at_bot[THEN filterlim_compose] filterlim_real_sequentially
                filterlim_uminus_at_top[THEN iffD1])
    from tendsto_ennrealI[OF this]
    show "(λi. emeasure (interval_measure F) {- real i<..x})  F x - ennreal 0"
      apply (rule filterlim_cong[THEN iffD1, rotated 3])
        apply simp
       apply simp
      apply (rule eventually_sequentiallyI[where c="nat (ceiling (- x))"])
      apply (simp add: emeasure_interval_measure_Ioc right_cont_F nondecF)
      done
  qed (auto simp: incseq_def)
  also have "(i::nat. {-real i <.. x}) = {..x}"
    by auto (metis minus_minus neg_less_iff_less reals_Archimedean2)
  finally show "emeasure (interval_measure F) {..x} = F x"
    by simp
  then show "measure (interval_measure F) {..x} = F x"
    by (simp add: measure_def)
qed

lemma cdf_interval_measure:
  "( x y. x  y  F x  F y)  (a. continuous (at_right a) F)  (F  0) at_bot  cdf (interval_measure F) = F"
  by (simp add: cdf_def fun_eq_iff measure_interval_measure_Iic)

end