Theory Invariants

(*  Author:  Sébastien Gouëzel   sebastien.gouezel@univ-rennes1.fr
    License: BSD
*)

section ‹The invariant sigma-algebra, Birkhoff theorem›

theory Invariants
  imports Recurrence "HOL-Probability.Conditional_Expectation"
begin

subsection ‹The sigma-algebra of invariant subsets›

text ‹The invariant sigma-algebra of a qmpt is made of those sets that are invariant under the
dynamics. When the transformation is ergodic, it is made of sets of zero or full measure. In
general, the Birkhoff theorem is expressed in terms of the conditional expectation of an integrable
function with respect to the invariant sigma-algebra.›

context qmpt begin

text ‹We define the invariant sigma-algebra, as the sigma algebra of sets which are invariant
under the dynamics, i.e., they coincide with their preimage under $T$.›

definition Invariants where "Invariants = sigma (space M) {A  sets M. T-`A  space M = A}"

text ‹For this definition to make sense, we need to check that it really defines a sigma-algebra:
otherwise, the \verb+sigma+ operation would make garbage out of it. This is the content of the next
lemma.›

lemma Invariants_sets: "sets Invariants = {A  sets M. T-`A  space M = A}"
proof -
  have "sigma_algebra (space M) {A  sets M. T-`A  space M = A}"
  proof -
    define I where "I = {A. T-`A  space M = A}"
    have i: "A. A  I  A  space M" unfolding I_def by auto
    have "algebra (space M) I"
    proof (subst algebra_iff_Un)
      have a: "I  Pow (space M)" using i by auto
      have b: "{}  I" unfolding I_def by auto
      {
        fix A assume *: "A  I"
        then have "T-`(space M - A) = T-`(space M) - T-`A" by auto
        then have "T-`(space M - A)  space M = T-`(space M)  (space M) - T-`A  (space M)" by auto
        also have "... = space M - A" using * I_def by (simp add: inf_absorb2 subsetI)
        finally have "space M - A  I" unfolding I_def by simp
      }
      then have c: "(aI. space M - a  I)" by simp
      have d: "(aI. bI. a  b  I)" unfolding I_def by auto
      show "I  Pow (space M)  {}  I  (aI. space M - a  I)  (aI. bI. a  b  I)"
        using a b c d by blast
    qed
    moreover have "(F. range F  I  (i::nat. F i)  I)" unfolding I_def by auto
    ultimately have "sigma_algebra (space M) I" using sigma_algebra_iff by auto
    moreover have "sigma_algebra (space M) (sets M)" using measure_space measure_space_def by auto
    ultimately have "sigma_algebra (space M) (I  (sets M))" using sigma_algebra_intersection by auto
    moreover have "I  sets M = {A  sets M. T-`A  space M = A}" unfolding I_def by auto
    ultimately show ?thesis by simp
  qed
  then show ?thesis unfolding Invariants_def using sigma_algebra.sets_measure_of_eq by blast
qed

text ‹By definition, the invariant subalgebra is a subalgebra of the original algebra. This is
expressed in the following lemmas.›

lemma Invariants_is_subalg: "subalgebra M Invariants"
  unfolding subalgebra_def
  using Invariants_sets Invariants_def by (simp add: space_measure_of_conv)

lemma Invariants_in_sets:
  assumes "A  sets Invariants"
  shows "A  sets M"
using Invariants_sets assms by blast

lemma Invariants_measurable_func:
  assumes "f  measurable Invariants N"
  shows "f  measurable M N"
using Invariants_is_subalg measurable_from_subalg assms by auto

text ‹We give several trivial characterizations of invariant sets or functions.›

lemma Invariants_vrestr:
  assumes "A  sets Invariants"
  shows "T--`A = A"
using assms Invariants_sets Invariants_in_sets[OF assms] by auto

lemma Invariants_points:
  assumes "A  sets Invariants" "x  A"
  shows "T x  A"
using assms Invariants_sets by auto

lemma Invariants_func_is_invariant:
  fixes f::"_  'b::t2_space"
  assumes "f  borel_measurable Invariants" "x  space M"
  shows "f (T x) = f x"
proof -
  have "{f x}  sets borel" by simp
  then have "f-`({f x})  space M  Invariants" using assms(1)
    by (metis (no_types, lifting) Invariants_def measurable_sets space_measure_of_conv)
  moreover have "x  f-`({f x})  space M" using assms(2) by blast
  ultimately have "T x  f-`({f x})  space M" by (rule Invariants_points)
  then show ?thesis by simp
qed

lemma Invariants_func_is_invariant_n:
  fixes f::"_  'b::t2_space"
  assumes "f  borel_measurable Invariants" "x  space M"
  shows "f ((T^^n) x) = f x"
by (induction n, auto simp add: assms Invariants_func_is_invariant)

lemma Invariants_func_charac:
  assumes [measurable]: "f  measurable M N"
      and "x. x  space M  f(T x) = f x"
  shows "f  measurable Invariants N"
proof (rule measurableI)
  fix A assume "A  sets N"
  have "space Invariants = space M" using Invariants_is_subalg subalgebra_def by force
  show "f -` A  space Invariants  sets Invariants"
    apply (subst Invariants_sets)
    apply (auto simp add: assms A  sets N space Invariants = space M)
    using A  sets N assms(1) measurable_sets by blast
next
  fix x assume "x  space Invariants"
  have "space Invariants = space M" using Invariants_is_subalg subalgebra_def by force
  then show "f x  space N" using assms(1) x  space Invariants by (metis measurable_space)
qed

lemma birkhoff_sum_of_invariants:
  fixes f::" _  real"
  assumes "f  borel_measurable Invariants" "x  space M"
  shows "birkhoff_sum f n x = n * f x"
unfolding birkhoff_sum_def using Invariants_func_is_invariant_n[OF assms] by auto

text ‹There are two possible definitions of the invariant sigma-algebra, competing in the
literature: one could also use the sets such that $T^{-1}A$ coincides with $A$ up to
a measure $0$ set. It turns out that this is equivalent to being invariant (in our sense) up
to a measure $0$ set. Therefore, for all interesting purposes, the two definitions would
give the same results.

For the proof, we start from an almost invariant set, and build a genuinely invariant set that
coincides with it by adding or throwing away null parts.
›

proposition Invariants_quasi_Invariants_sets:
  assumes [measurable]: "A  sets M"
  shows "(B  sets Invariants. A Δ B  null_sets M)  (T--`A Δ A  null_sets M)"
proof
  assume "B  sets Invariants. A Δ B  null_sets M"
  then obtain B where "B  sets Invariants" "A Δ B  null_sets M" by auto
  then have [measurable]: "B  sets M" using Invariants_in_sets by simp

  have "B = T--` B" using Invariants_vrestr B  sets Invariants by simp
  then have "T--`A Δ B = T--`(A Δ B)" by simp
  moreover have "T--`(A Δ B)  null_sets M"
    by (rule T_quasi_preserves_null2(1)[OF A Δ B  null_sets M])
  ultimately have "T--`A Δ B  null_sets M" by simp
  then show "T--`A Δ A  null_sets M"
    by (rule null_sym_diff_transitive) (auto simp add: A Δ B  null_sets M Un_commute)
next
  assume H: "T --` A Δ A  null_sets M"
  have [measurable]: "n. (T^^n)--`A  sets M" by simp
  {
    fix K assume [measurable]: "K  sets M" and "T--`K Δ K  null_sets M"
    fix n::nat
    have "(T^^n)--`K Δ K  null_sets M"
    proof (induction n)
      case 0
      have "(T^^0)--` K = K" using T_vrestr_0 by simp
      then show ?case using Diff_cancel sup.idem by (metis null_sets.empty_sets)
    next
      case (Suc n)
      have "T--`((T^^n)--`K Δ K)  null_sets M"
        using Suc.IH T_quasi_preserves_null(1)[of "((T^^n)--`K Δ K)"] by simp
      then have *: "(T^^(Suc n))--`K Δ T--`K  null_sets M" using T_vrestr_composed(2)[OF K  sets M] by simp
      then show ?case
        by (rule null_sym_diff_transitive, simp add: T--`K Δ K  null_sets M K  sets M, measurable)
    qed
  } note * = this

  define C where "C = (n. (T^^n)--`A)"
  have [measurable]: "C  sets M" unfolding C_def by simp
  have "C Δ A  (n. (T^^n)--`A Δ A)" unfolding C_def by auto
  moreover have "(n. (T^^n)--`A Δ A)  null_sets M"
    using * null_sets_UN assms T --` A Δ A  null_sets M by auto
  ultimately have CA: "C Δ A  null_sets M" by (meson C  sets M assms sets.Diff sets.Un null_sets_subset)
  then have "T--`(C Δ A)  null_sets M" by (rule T_quasi_preserves_null2(1))
  then have "T--`C Δ T--`A  null_sets M" by simp
  then have "T--`C Δ A  null_sets M"
    by (rule null_sym_diff_transitive, auto simp add: H)
  then have TCC: "T--`C Δ C  null_sets M"
    apply (rule null_sym_diff_transitive) using CA by (auto simp add: Un_commute)

  have "C  (n{1..}. (T^^n)--`A)" unfolding C_def by auto
  moreover have "T--`C = (n{1..}. (T^^n)--`A)"
    using T_vrestr_composed(2)[OF assms] by (simp add: C_def atLeast_Suc_greaterThan greaterThan_0)
  ultimately have "C  T--`C" by blast
  then have "(T^^0)--`C  (T^^1)--`C" using T_vrestr_0 by auto
  moreover have "(T^^1)--`C  (n{1..}. (T^^n)--`C)" by auto
  ultimately have "(T^^0)--`C  (n{1..}. (T^^n)--`C)" by auto
  then have "(T^^0)--`C  (n{1..}. (T^^n)--`C) = (n{1..}. (T^^n)--`C)" by auto
  moreover have "(n. (T^^n)--`C) = (T^^0)--`C  (n{1..}. (T^^n)--`C)" by (rule union_insert_0)
  ultimately have C2: "(n. (T^^n)--`C) = (n{1..}. (T^^n)--`C)" by simp

  define B where "B = (n. (T^^n)--`C)"
  have [measurable]: "B  sets M" unfolding B_def by simp
  have "B Δ C  (n. (T^^n)--`C Δ C)" unfolding B_def by auto
  moreover have "(n. (T^^n)--`C Δ C)  null_sets M"
    using * null_sets_UN assms TCC by auto
  ultimately have "B Δ C  null_sets M" by (meson B  sets M C  sets M assms sets.Diff sets.Un null_sets_subset)
  then have "B Δ A  null_sets M"
    by (rule null_sym_diff_transitive, auto simp add: CA)
  then have a: "A Δ B  null_sets M" by (simp add: Un_commute)

  have "T--`B = (n{1..}. (T^^n)--`C)"
    using T_vrestr_composed(2)[OF C  sets M] by (simp add: B_def atLeast_Suc_greaterThan greaterThan_0)
  then have "T--`B = B" unfolding B_def using C2 by simp
  then have "B  sets Invariants" using Invariants_sets vimage_restr_def by auto

  then show "B  sets Invariants. A Δ B  null_sets M" using a by blast
qed

text ‹In a conservative setting, it is enough to be included in its image or its preimage to be
almost invariant: otherwise, since the difference set has disjoint preimages, and is therefore
null by conservativity.›

lemma (in conservative) preimage_included_then_almost_invariant:
  assumes [measurable]: "A  sets M" and "T--`A  A"
  shows "A Δ (T--`A)  null_sets M"
proof -
  define B where "B = A - T--`A"
  then have [measurable]: "B  sets M" by simp
  have "(T^^(Suc n))--`A  (T^^n)--`A" for n using T_vrestr_composed(3)[OF assms(1)] vrestr_inclusion[OF assms(2)] by auto
  then have "disjoint_family (λn. (T^^n)--`A - (T^^(Suc n))--`A)" by (rule disjoint_family_Suc2[where ?A = "λn. (T^^n)--`A"])
  moreover have "(T^^n)--`A - (T^^(Suc n))--`A = (T^^n)--`B" for n unfolding B_def Suc_eq_plus1 using T_vrestr_composed(3)[OF assms(1)] by auto
  ultimately have "disjoint_family (λn. (T^^n)--` B)" by simp
  then have "n. n  0  ((T^^n)--`B)  B = {}" unfolding disjoint_family_on_def by (metis UNIV_I T_vrestr_0(1)[OF B  sets M])
  then have "n. n > 0  (T^^n)-`B  B = {}" unfolding vimage_restr_def by (simp add: Int_assoc)
  then have "B  null_sets M" using disjoint_then_null[OF B  sets M] Int_commute by auto
  then show ?thesis unfolding B_def using assms(2) by (simp add: Diff_mono Un_absorb2)
qed

lemma (in conservative) preimage_includes_then_almost_invariant:
  assumes [measurable]: "A  sets M" and "A  T--`A"
  shows "A Δ (T--`A)  null_sets M"
proof -
  define B where "B = T--`A - A"
  then have [measurable]: "B  sets M" by simp
  have "n. (T^^(Suc n))--`A  (T^^n)--`A" using T_vrestr_composed(3)[OF assms(1)] vrestr_inclusion[OF assms(2)] by auto
  then have "disjoint_family (λn. (T^^(Suc n))--`A - (T^^n)--`A)" by (rule disjoint_family_Suc[where ?A = "λn. (T^^n)--`A"])
  moreover have "n. (T^^(Suc n))--`A - (T^^n)--`A = (T^^n)--`B" unfolding B_def Suc_eq_plus1 using T_vrestr_composed(3)[OF assms(1)] by auto
  ultimately have "disjoint_family (λn. (T^^n)--` B)" by simp
  then have "n. n  0  ((T^^n)--`B)  B = {}" unfolding disjoint_family_on_def by (metis UNIV_I T_vrestr_0(1)[OF B  sets M])
  then have "n. n > 0  (T^^n)-`B  B = {}" unfolding vimage_restr_def by (simp add: Int_assoc)
  then have "B  null_sets M" using disjoint_then_null[OF B  sets M] Int_commute by auto
  then show ?thesis unfolding B_def using assms(2) by (simp add: Diff_mono Un_absorb1)
qed

text ‹The above properties for sets are also true for functions: if $f$ and $f \circ T$
coincide almost everywhere, i.e., $f$ is almost invariant, then $f$ coincides almost everywhere
with a true invariant function.

The idea of the proof is straightforward: throw away the orbits on
which $f$ is not really invariant (say this is the complement of the good set),
and replace it by $0$ there. However, this does not work
directly: the good set is not invariant, some points may have a non-constant value of $f$ on their
orbit but reach the good set eventually. One can however define $g$ to be equal to the
eventual value of $f$ along the orbit, if the orbit reaches the good set, and $0$ elsewhere.›

proposition Invariants_quasi_Invariants_functions:
  fixes f::"_  'b::{second_countable_topology, t2_space}"
  assumes f_meas [measurable]: "f  borel_measurable M"
  shows "(g  borel_measurable Invariants. AE x in M. f x = g x)  (AE x in M. f(T x) = f x)"
proof
  assume "gborel_measurable Invariants. AE x in M. f x = g x"
  then obtain g where g:"gborel_measurable Invariants" "AE x in M. f x = g x" by blast
  then have [measurable]: "g  borel_measurable M" using Invariants_measurable_func by auto
  define A where "A = {x  space M. f x = g x}"
  have [measurable]: "A  sets M" unfolding A_def by simp
  define B where "B = space M - A"
  have [measurable]: "B  sets M" unfolding B_def by simp
  moreover have "AE x in M. x  B" unfolding B_def A_def using g(2) by auto
  ultimately have "B  null_sets M" using AE_iff_null_sets by blast
  then have "T--`B  null_sets M" by (rule T_quasi_preserves_null2(1))
  then have "B  T--`B  null_sets M" using B  null_sets M by auto
  then have "AE x in M. x  (B  T--`B)" using AE_iff_null_sets null_setsD2 by blast
  then have i: "AE x in M. x  space M - (B  T--`B)" by auto
  {
    fix x assume *: "x  space M - (B  T--`B)"
    then have "x  A" unfolding B_def by blast
    then have "f x = g x" unfolding A_def by blast
    have "T x  A" using * B_def by auto
    then have "f(T x) = g(T x)" unfolding A_def by blast
    moreover have "g(T x) = g x"
      apply (rule Invariants_func_is_invariant) using * by (auto simp add: assms gborel_measurable Invariants)
    ultimately have "f(T x) = f x" using f x = g x by simp
  }
  then show "AE x in M. f(T x) = f x" using i by auto
next
  assume *: "AE x in M. f (T x) = f x"
  text ‹\verb+good_set+ is the set of points for which $f$ is constant on their orbit. Here, we define
  $g = f$. If a point ever enters \verb+good_set+, then we take $g$ to be the value of $f$ there. Otherwise,
  $g$ takes an arbitrary value, say $y_0$.›
  define good_set where "good_set = {x  space M. n. f((T^^(Suc n)) x) = f((T^^n) x)}"
  define good_time where "good_time = (λx. Inf {n. (T^^n) x  good_set})"
  have "AE x in M. x  good_set" using T_AE_iterates[OF *] by (simp add: good_set_def)
  have [measurable]: "good_set  sets M" unfolding good_set_def by auto
  obtain y0::'b where True by auto
  define g where "g = (λx. if (n. (T^^n) x  good_set) then f((T^^(good_time x)) x) else y0)"
  have [measurable]: "good_time  measurable M (count_space UNIV)" unfolding good_time_def by measurable
  have [measurable]: "g  borel_measurable M" unfolding g_def by measurable

  have "f x = g x" if "x  good_set" for x
  proof -
    have a: "0  {n. (T^^n) x  good_set}" using that by simp
    have "good_time x = 0"
      unfolding good_time_def apply (intro cInf_eq_non_empty) using a by blast+
    moreover have "{n. (T^^n) x  good_set}  {}" using a by blast
    ultimately show "f x = g x" unfolding g_def by auto
  qed
  then have "AE x in M. f x = g x" using AE x in M. x  good_set by auto

  have *: "f((T^^(Suc 0)) x) = f((T^^0) x)" if "x  good_set" for x
    using that unfolding good_set_def by blast
  have good_1: "T x  good_set  f(T x) = f x" if "x  good_set" for x
    using *[OF that] that unfolding good_set_def apply (auto)
    unfolding T_Tn_T_compose by blast
  then have good_k: "x. x  good_set  (T^^k) x  good_set  f((T^^k) x) = f x" for k
      by (induction k, auto)

  have "g(T x) = g x" if "x  space M" for x
  proof (cases)
    assume *: "n. (T^^n) (T x)  good_set"
    define n where "n = Inf {n. (T^^n) (T x)  good_set}"
    have "(T^^n)(T x)  good_set" using * Inf_nat_def1 by (metis empty_iff mem_Collect_eq n_def)
    then have a: "(T^^(n+1)) x  good_set" by (metis Suc_eq_plus1 comp_eq_dest_lhs funpow.simps(2) funpow_swap1)
    then have **: "m. (T^^m) x  good_set" by blast
    define m where "m = Inf {m. (T^^m) x  good_set}"
    then have "(T^^m) x  good_set" using ** Inf_nat_def1 by (metis empty_iff mem_Collect_eq)
    have "n+1  {m. (T^^m) x  good_set}" using a by simp
    then have "m  n+1" using m_def by (simp add: Inf_nat_def Least_le)
    then obtain k where "n+1 = m + k" using le_iff_add by blast
    have "g x = f((T^^m) x)" unfolding g_def good_time_def using ** m_def by simp
    also have "... = f((T^^k) ((T^^m) x))" using (T^^m) x  good_set good_k by simp
    also have "... = f((T^^(n+1))x)" using n+1 = m + k[symmetric] funpow_add by (metis add.commute comp_apply)
    also have "... = f((T^^n) (T x))" using funpow_Suc_right by (metis Suc_eq_plus1 comp_apply)
    also have "... = g(T x)" unfolding g_def good_time_def using * n_def by simp
    finally show "g(T x) = g x" by simp
  next
    assume *: "¬(n. (T^^n) (T x)  good_set)"
    then have "g(T x) = y0" unfolding g_def by simp
    have **: "¬(n. (T^^(Suc n)) x  good_set)" using funpow_Suc_right * by (metis comp_apply)
    have "T x  good_set" using good_k * by blast
    then have "x  good_set" using good_1 by auto
    then have "¬(n. (T^^n) x  good_set)" using ** using good_1 by fastforce
    then have "g x = y0" unfolding g_def by simp
    then show "g(T x) = g x" using g(T x) = y0 by simp
  qed
  then have "g  borel_measurable Invariants" by (rule Invariants_func_charac[OF g  borel_measurable M])
  then show "gborel_measurable Invariants. AE x in M. f x = g x" using AE x in M. f x = g x by blast
qed

text ‹In a conservative setting, it suffices to have an almost everywhere inequality to get
an almost everywhere equality, as the set where there is strict inequality has $0$ measure
as its iterates are disjoint, by conservativity.›

proposition (in conservative) AE_decreasing_then_invariant:
  fixes f::"_  'b::{linorder_topology, second_countable_topology}"
  assumes "AE x in M. f(T x)  f x"
    and [measurable]: "f  borel_measurable M"
  shows "AE x in M. f(T x) = f x"
proof -
  obtain D::"'b set" where D: "countable D" "(x y. x < y  (d  D. x  d  d < y))"
    using countable_separating_set_linorder2 by blast

  define A where "A = {x  space M. f(T x)  f x}"
  then have [measurable]: "A  sets M" by simp
  define B where "B = {x  space M. n. f((T^^(n+1)) x)  f((T^^n)x)}"
  then have [measurable]: "B  sets M" by simp

  have "space M - A  null_sets M" unfolding A_def using assms by (simp add: assms(1) AE_iff_null_sets)
  then have "(n. (T^^n)--`(space M - A))  null_sets M" by (metis null_sets_UN T_quasi_preserves_null2(2))
  moreover have "space M - B = (n. (T^^n)--`(space M - A))"
    unfolding B_def A_def by auto
  ultimately have "space M - B  null_sets M" by simp

  have *: "B = (n. (T^^n)--`A)"
    unfolding B_def A_def by auto
  then have "T--`B = (n. T--` (T^^n)--`A)" by auto
  also have "... = (n. (T^^(n+1))--`A)" using T_vrestr_composed(2)[OF A  sets M] by simp
  also have "...  (n. (T^^n)--`A)" by blast
  finally have B1: "B  T--`B" using * by simp
  have "B  A" using * T_vrestr_0[OF A  sets M] by blast
  then have B2: "x. x  B  f(T x)  f x" unfolding A_def by auto

  define C where "C = (λt. {x  B. f x  t})"
  {
    fix t
    have "C t = B  f-`{..t}  space M" unfolding C_def using sets.sets_into_space[OF B  sets M] by auto
    then have [measurable]: "C t  sets M" using assms(2) by simp
    have "C t  T--`(C t)" using B1 unfolding C_def vimage_restr_def apply auto using B2 order_trans by blast
    then have "T--`(C t) - C t  null_sets M" by (metis Diff_mono Un_absorb1 preimage_includes_then_almost_invariant[OF C t  sets M])
  }
  then have "(dD. T--`(C d) - C d)  null_sets M" using countable D by (simp add: null_sets_UN')
  then have "(space M - B)  (dD. T--`(C d) - C d)  null_sets M" using space M - B  null_sets M by auto
  then have "AE x in M. x  (space M - B)  (dD. T--`(C d) - C d)" using AE_not_in by blast
  moreover
  {
    fix x assume x: "x  space M" "x  (space M - B)  (dD. T--`(C d) - C d)"
    then have "x  B" by simp
    then have "T x  B" using B1 by auto
    have "f(T x) = f x"
    proof (rule ccontr)
      assume "f(T x)  f x"
      then have "f(T x) < f x" using B2[OF x  B] by simp
      then obtain d where d: "d  D" "f(T x)  d  d < f x" using D by auto
      then have "T x  C d" using T x  B unfolding C_def by simp
      then have "x  T--`(C d)" using x  space M by simp
      then have "x  C d" using x d  D by simp
      then have "f x  d" unfolding C_def by simp
      then show False using d by auto
    qed
  }
  ultimately show ?thesis by auto
qed

proposition (in conservative) AE_increasing_then_invariant:
  fixes f::"_  'b::{linorder_topology, second_countable_topology}"
  assumes "AE x in M. f(T x)  f x"
    and [measurable]: "f  borel_measurable M"
  shows "AE x in M. f(T x) = f x"
proof -
  obtain D::"'b set" where D: "countable D" "(x y. x < y  (d  D. x < d  d  y))"
    using countable_separating_set_linorder1 by blast

  define A where "A = {x  space M. f(T x)  f x}"
  then have [measurable]: "A  sets M" by simp
  define B where "B = {x  space M. n. f((T^^(n+1)) x)  f((T^^n)x)}"
  then have [measurable]: "B  sets M" by simp

  have "space M - A  null_sets M" unfolding A_def using assms by (simp add: assms(1) AE_iff_null_sets)
  then have "(n. (T^^n)--`(space M - A))  null_sets M" by (metis null_sets_UN T_quasi_preserves_null2(2))
  moreover have "space M - B = (n. (T^^n)--`(space M - A))"
    unfolding B_def A_def by auto
  ultimately have "space M - B  null_sets M" by simp

  have *: "B = (n. (T^^n)--`A)"
    unfolding B_def A_def by auto
  then have "T--`B = (n. T--` (T^^n)--`A)" by auto
  also have "... = (n. (T^^(n+1))--`A)" using T_vrestr_composed(2)[OF A  sets M] by simp
  also have "...  (n. (T^^n)--`A)" by blast
  finally have B1: "B  T--`B" using * by simp
  have "B  A" using * T_vrestr_0[OF A  sets M] by blast
  then have B2: "x. x  B  f(T x)  f x" unfolding A_def by auto

  define C where "C = (λt. {x  B. f x  t})"
  {
    fix t
    have "C t = B  f-`{t..}  space M" unfolding C_def using sets.sets_into_space[OF B  sets M] by auto
    then have [measurable]: "C t  sets M" using assms(2) by simp
    have "C t  T--`(C t)" using B1 unfolding C_def vimage_restr_def apply auto using B2 order_trans by blast
    then have "T--`(C t) - C t  null_sets M" by (metis Diff_mono Un_absorb1 preimage_includes_then_almost_invariant[OF C t  sets M])
  }
  then have "(dD. T--`(C d) - C d)  null_sets M" using countable D by (simp add: null_sets_UN')
  then have "(space M - B)  (dD. T--`(C d) - C d)  null_sets M" using space M - B  null_sets M by auto
  then have "AE x in M. x  (space M - B)  (dD. T--`(C d) - C d)" using AE_not_in by blast
  moreover
  {
    fix x assume x: "x  space M" "x  (space M - B)  (dD. T--`(C d) - C d)"
    then have "x  B" by simp
    then have "T x  B" using B1 by auto
    have "f(T x) = f x"
    proof (rule ccontr)
      assume "f(T x)  f x"
      then have "f(T x) > f x" using B2[OF x  B] by simp
      then obtain d where d: "d  D" "f(T x)  d  d > f x" using D by auto
      then have "T x  C d" using T x  B unfolding C_def by simp
      then have "x  T--`(C d)" using x  space M by simp
      then have "x  C d" using x d  D by simp
      then have "f x  d" unfolding C_def by simp
      then show False using d by auto
    qed
  }
  ultimately show ?thesis by auto
qed

text ‹For an invertible map, the invariants of $T$ and $T^{-1}$ are the same.›

lemma Invariants_Tinv:
  assumes "invertible_qmpt"
  shows "qmpt.Invariants M Tinv = Invariants"
proof -
  interpret I: qmpt M Tinv using Tinv_qmpt[OF assms] by auto
  have "(T -` A  space M = A)  (Tinv -` A  space M = A)" if "A  sets M" for A
  proof
    assume "T -` A  space M = A"
    then show "Tinv -` A  space M = A"
      using assms that unfolding Tinv_def invertible_qmpt_def
      apply auto
      apply (metis IntE UNIV_I bij_def imageE inv_f_f vimageE)
      apply (metis I.T_spaceM_stable(1) Int_iff Tinv_def bij_inv_eq_iff vimageI)
      done
  next
    assume "Tinv -` A  space M = A"
    then show "T -` A  space M = A"
      using assms that unfolding Tinv_def invertible_qmpt_def
      apply auto
      apply (metis IntE bij_def inv_f_f vimageE)
      apply (metis T_Tinv_of_set T_meas Tinv_def assms qmpt.vrestr_of_set qmpt_axioms vrestr_image(3))
      done
  qed
  then have "{A  sets M. Tinv -` A  space M = A} = {A  sets M. T -` A  space M = A}"
    by blast
  then show ?thesis unfolding Invariants_def I.Invariants_def by auto
qed

end

sublocale fmpt  finite_measure_subalgebra M Invariants
  unfolding finite_measure_subalgebra_def finite_measure_subalgebra_axioms_def
  using Invariants_is_subalg by (simp add: finite_measureI)

context fmpt
begin

text ‹The conditional expectation with respect to the invariant sigma-algebra is the same
for $f$ or $f \circ T$, essentially by definition.›

lemma Invariants_of_foTn:
  fixes f::"'a  real"
  assumes [measurable]: "integrable M f"
  shows "AE x in M. real_cond_exp M Invariants (f o (T^^n)) x = real_cond_exp M Invariants f x"
proof (rule real_cond_exp_charact)
  fix A assume [measurable]: "A  sets Invariants"
  then have [measurable]: "A  sets M" using Invariants_in_sets by blast
  then have ind_meas [measurable]: "((indicator A)::('a  real))  borel_measurable Invariants" by auto

  have "set_lebesgue_integral M A (f  (T^^n)) = (x. indicator A x * f((T^^n) x) M)"
    by (auto simp: comp_def set_lebesgue_integral_def)
  also have "... = (x. indicator A ((T^^n) x) * f ((T^^n) x) M)"
    by (rule Bochner_Integration.integral_cong, auto simp add: Invariants_func_is_invariant_n[OF ind_meas])
  also have "... = (x. indicator A x * f x M)"
    apply (rule Tn_integral_preserving(2)) using integrable_mult_indicator[OF A  sets M assms] by auto
  also have "... = (x. indicator A x * real_cond_exp M Invariants f x M)"
    apply (rule real_cond_exp_intg(2)[symmetric]) using integrable_mult_indicator[OF A  sets M assms] by auto
  also have "... = set_lebesgue_integral M A (real_cond_exp M Invariants f)"
    by (auto simp: set_lebesgue_integral_def)
  finally show "set_lebesgue_integral M A (f  (T^^n)) = set_lebesgue_integral M A (real_cond_exp M Invariants f)"
    by simp
qed (auto simp add: assms real_cond_exp_int Tn_integral_preserving(1)[OF assms] comp_def)

lemma Invariants_of_foT:
  fixes f::"'a  real"
  assumes [measurable]: "integrable M f"
  shows "AE x in M. real_cond_exp M Invariants f x = real_cond_exp M Invariants (f o T) x"
using Invariants_of_foTn[OF assms, where ?n = 1] by auto

lemma birkhoff_sum_Invariants:
  fixes f::"'a  real"
  assumes [measurable]: "integrable M f"
  shows "AE x in M. real_cond_exp M Invariants (birkhoff_sum f n) x = n * real_cond_exp M Invariants f x"
proof -
  define F where "F = (λi. f o (T^^i))"
  have [measurable]: "i. F i  borel_measurable M" unfolding F_def by auto
  have *: "integrable M (F i)" for i unfolding F_def
    by (subst comp_def, rule Tn_integral_preserving(1)[OF assms, of i])

  have "AE x in M. n * real_cond_exp M Invariants f x = (i{..<n}. real_cond_exp M Invariants f x)" by auto
  moreover have "AE x in M. (i{..<n}. real_cond_exp M Invariants f x) = (i{..<n}. real_cond_exp M Invariants (F i) x)"
    apply (rule AE_symmetric[OF AE_equal_sum]) unfolding F_def using Invariants_of_foTn[OF assms] by simp
  moreover have "AE x in M. (i{..<n}. real_cond_exp M Invariants (F i) x) = real_cond_exp M Invariants (λx. i{..<n}. F i x) x"
    by (rule AE_symmetric[OF real_cond_exp_sum [OF *]])
  moreover have "AE x in M. real_cond_exp M Invariants (λx. i{..<n}. F i x) x = real_cond_exp M Invariants (birkhoff_sum f n) x"
    apply (rule real_cond_exp_cong) unfolding F_def using birkhoff_sum_def[symmetric] by auto
  ultimately show ?thesis by auto
qed

end (* of context fmpt *)

subsection ‹Birkhoff theorem›

subsubsection ‹Almost everywhere version of Birkhoff theorem›

text ‹This paragraph is devoted to the proof of Birkhoff theorem, arguably
the most fundamental result of ergodic theory.
This theorem asserts that Birkhoff averages of an integrable function $f$ converge almost surely,
to the conditional expectation of $f$ with respect to the invariant sigma algebra.

This result implies for instance the strong law of large numbers (in probability theory).

There are numerous proofs of this statement, but none is really easy. We follow the very efficient
argument given in Katok-Hasselblatt. To help the reader, here is the same proof informally. The
first part of the proof is formalized in \verb+birkhoff_lemma1+, the second one in
\verb+birkhoff_lemma+, and the conclusion in \verb+birkhoff_theorem+.

Start with an integrable function $g$. let $G_n(x) = \max_{k\leq n} S_k g(x)$. Then $\limsup S_n g/n
\leq 0$ outside of $A$, the set where $G_n$ tends to infinity. Moreover, $G_{n+1} - G_n \circ T$ is
bounded by $g$, and tends to $g$ on $A$. It follows from the dominated convergence theorem that
$\int_A G_{n+1} - G_n \circ T \to \int_A g$. As $\int_A G_{n+1} - G_n \circ T = \int_A G_{n+1} - G_n
\geq 0$, we obtain $\int_A g \geq 0$.

Apply now this result to the function $g = f - E(f | I) - \epsilon$, where $\epsilon>0$ is fixed.
Then $\int_A g = -\epsilon \mu(A)$, then have $\mu(A) = 0$. Thus, almost surely, $\limsup S_n
g/n\leq 0$, i.e., $\limsup S_n f/n \leq E(f|I)+\epsilon$. Letting $\epsilon$ tend to $0$ gives
$\limsup S_n f/n \leq E(f|I)$.

Applying the same result to $-f$ gives $S_n f/n \to E(f|I)$.
›

context fmpt
begin

lemma birkhoff_aux1:
  fixes f::"'a  real"
  assumes [measurable]: "integrable M f"
  defines "A  {x  space M. limsup (λn. ereal(birkhoff_sum f n x)) = }"
    shows "A  sets Invariants" "(x. f x * indicator A x M)  0"
proof -
  let ?bsf = "birkhoff_sum f"
  have [measurable]: "A  sets M" unfolding A_def by simp
  have Ainv: "x  A  T x  A" if "x  space M" for x
  proof -
    have "ereal(?bsf (1 + n) x) = ereal(f x) + ereal(?bsf n (T x))" for n
      unfolding birkhoff_sum_cocycle birkhoff_sum_1 by simp
    moreover have "limsup (λn. ereal(f x) + ereal(?bsf n (T x)))
                    = ereal(f x) + limsup(λn. ereal(?bsf n (T x)))"
      by (rule ereal_limsup_lim_add, auto)
    moreover have "limsup (λn. ereal(?bsf (n+1) x)) = limsup (λn. ereal(?bsf n x))" using limsup_shift by simp
    ultimately have "limsup (λn. ereal(birkhoff_sum f n x)) = ereal(f x) + limsup (λn. ereal(?bsf n (T x)))" by simp
    then have "limsup (λn. ereal(?bsf n x)) =   limsup (λn. ereal(?bsf n (T x))) = " by simp
    then show "x  A  T x  A" using x  space M A_def by simp
  qed
  then show "A  sets Invariants" using assms(2) Invariants_sets by auto

  define F where "F = (λn x. MAX k {0..n}. ?bsf k x)"
  have [measurable]: "n. F n  borel_measurable M" unfolding F_def by measurable
  have intFn: "integrable M (F n)" for n
    unfolding F_def by (rule integrable_MAX, auto simp add: birkhoff_sum_integral(1)[OF assms(1)])

  have Frec: "F (n+1) x - F n (T x) = max (-F n (T x)) (f x)" for n x
  proof -
    have "{0..n+1} = {0}  {1..n+1}" by auto
    then have "(λk. ?bsf k x) ` {0..n+1} = (λk. ?bsf k x) ` {0}  (λk. ?bsf k x) ` {1..n+1}" by blast
    then have *: "(λk. ?bsf k x) ` {0..n+1} = {0}  (λk. ?bsf k x) ` {1..n+1}" using birkhoff_sum_1(1) by simp
    have b: "F (n+1) x = max (Max {0}) (MAX k {1..n+1}. ?bsf k x)"
      by (subst F_def, subst *, rule Max.union, auto)

    have "(λk. ?bsf k x) ` {1..n+1} = (λk. ?bsf (1+k) x) ` {0..n}" using Suc_le_D by fastforce
    also have "... = (λk. f x + ?bsf k (T x)) ` {0..n}"
      by (subst birkhoff_sum_cocycle, subst birkhoff_sum_1(2), auto)
    finally have c: "F (n+1) x = max 0 (MAX k {0..n}. ?bsf k (T x) + f x)" using b by (simp add: add_ac)

    have "{f x + birkhoff_sum f k (T x) |k. k {0..n}} = (+) (f x) ` {birkhoff_sum f k (T x) |k. k {0..n}}" by blast
    have "(MAX k {0..n}. ?bsf k (T x) + f x) = (MAX k {0..n}. ?bsf k (T x)) + f x"
      by (rule Max_add_commute) auto
    also have "... = F n (T x) + f x" unfolding F_def by simp
    finally have "(MAX k {0..n}. ?bsf k (T x) + f x) = f x + F n (T x)" by simp
    then have "F (n+1) x = max 0 (f x + F n (T x))" using c by simp
    then show "F (n+1) x - F n (T x) = max (-F n (T x)) (f x)" by auto
  qed

  have a: "abs((F (n+1) x - F n (T x)) * indicator A x)  abs(f x)" for n x
  proof -
    have "F (n+1) x -F n (T x)  f x" using Frec by simp
    then have *: "F (n+1) x -F n (T x)  - abs(f x)" by simp

    have "F n (T x)  birkhoff_sum f 0 (T x)"
      unfolding F_def apply (rule Max_ge, simp) using atLeastAtMost_iff by blast
    then have "F n (T x)  0" using birkhoff_sum_1(1) by simp
    then have "-F n (T x)  abs (f x)" by simp
    moreover have "f x  abs(f x)" by simp
    ultimately have "F (n+1) x -F n (T x)  abs(f x)" using Frec by simp
    then have "abs(F (n+1) x - F n (T x))  abs(f x)" using * by simp
    then show "abs((F (n+1) x - F n (T x)) * indicator A x)  abs(f x)" unfolding indicator_def by auto
  qed
  have b: "(λn. (F (n+1) x - F n (T x)) * indicator A x)  f x * indicator A x" for x
  proof (rule tendsto_eventually, cases)
    assume "x  A"
    then have "T x  A" using Ainv A_def by auto
    then have "limsup (λn. ereal(birkhoff_sum f n (T x))) > ereal(-f x)" unfolding A_def by simp
    then obtain N where "ereal(?bsf N (T x)) > ereal(-f x)" using Limsup_obtain by blast
    then have *: "?bsf N (T x) > -f x" by simp
    {
      fix n assume "nN"
      then have "?bsf N (T x)  (λk. ?bsf k (T x)) ` {0..n}" by auto
      then have "F n (T x)  ?bsf N (T x)" unfolding F_def by simp
      then have "F n (T x)  -f x" using * by simp
      then have "max (-F n (T x)) (f x) = f x" by simp
      then have "F (n+1) x - F n (T x) = f x" using Frec by simp
      then have "(F (n+1) x - F n (T x)) * indicator A x = f x * indicator A x" by simp
    }
    then show "eventually (λn. (F (n+1) x - F n (T x)) * indicator A x = f x * indicator A x) sequentially"
      using eventually_sequentially by blast
  next
    assume "¬(x  A)"
    then have "indicator A x = (0::real)" by simp
    then show "eventually (λn. (F (n+1) x - F n (T x)) * indicator A x = f x * indicator A x) sequentially" by auto
  qed
  have lim: "(λn. (x. (F (n+1) x - F n (T x)) * indicator A x M))  (x. f x * indicator A x M)"
  proof (rule integral_dominated_convergence[where ?w = "(λx. abs(f x))"])
    show "integrable M (λx. ¦f x¦)" using assms(1) by auto
    show "AE x in M. (λn. (F (n + 1) x - F n (T x)) * indicator A x)  f x * indicator A x" using b by auto
    show "n. AE x in M. norm ((F (n + 1) x - F n (T x)) * indicator A x)  ¦f x¦" using a by auto
  qed (simp_all)

  have "(x. (F (n+1) x - F n (T x)) * indicator A x M)  0" for n
  proof -
    have "(x. F n (T x) * indicator A x M) = (x. (λx. F n x * indicator A x) (T x) M)"
      by (rule Bochner_Integration.integral_cong, auto simp add: Ainv indicator_def)
    also have "... = (x. F n x * indicator A x M)"
      by (rule T_integral_preserving, auto simp add: intFn integrable_real_mult_indicator)
    finally have i: "(x. F n (T x) * indicator A x M) = (x. F n x * indicator A x M)" by simp

    have "(x. (F (n+1) x - F n (T x)) * indicator A x M) = (x. F (n+1) x * indicator A x - F n (T x) * indicator A x M)"
      by (simp add: mult.commute right_diff_distrib)
    also have "... = (x. F (n+1) x * indicator A x M) - (x. F n (T x) * indicator A x M)"
      by (rule Bochner_Integration.integral_diff, auto simp add: intFn integrable_real_mult_indicator T_meas T_integral_preserving(1))
    also have "... = (x. F (n+1) x * indicator A x M) - (x. F n x * indicator A x M)"
      using i by simp
    also have "... = (x. F (n+1) x * indicator A x - F n x * indicator A x M)"
      by (rule Bochner_Integration.integral_diff[symmetric], auto simp add: intFn integrable_real_mult_indicator)
    also have "... = (x. (F (n+1) x - F n x) * indicator A x M)"
      by (simp add: mult.commute right_diff_distrib)
    finally have *: "(x. (F (n+1) x - F n (T x)) * indicator A x M) = (x. (F (n+1) x - F n x) * indicator A x M)"
      by simp

    have "F n x  F (n+1) x" for x unfolding F_def by (rule Max_mono, auto)
    then have "(F (n+1) x - F n x) * indicator A x  0" for x by simp
    then have "integralL M (λx. 0)  integralL M (λx. (F (n+1) x - F n x) * indicator A x)"
      by (auto simp add: intFn integrable_real_mult_indicator intro: integral_mono)
    then have "(x. (F (n+1) x - F n x) * indicator A x M)  0" by simp
    then show "(x. (F (n+1) x - F n (T x)) * indicator A x M)  0" using * by simp
  qed
  then show "(x. f x * indicator A x M)  0" using lim by (simp add: LIMSEQ_le_const)
qed

lemma birkhoff_aux2:
  fixes f::"'a  real"
  assumes [measurable]: "integrable M f"
  shows "AE x in M. limsup (λn. ereal(birkhoff_sum f n x / n))  real_cond_exp M Invariants f x"
proof -
  {
    fix ε assume "ε > (0::real)"
    define g where "g = (λx. f x - real_cond_exp M Invariants f x - ε)"
    then have intg: "integrable M g" using assms real_cond_exp_int(1) assms by auto
    define A where "A = {x  space M. limsup (λn. ereal(birkhoff_sum g n x)) = }"
    have Ag: "A  sets Invariants" "(x. g x * indicator A x M)  0"
      unfolding A_def by (rule birkhoff_aux1[where ?f = g, OF intg])+
    then have [measurable]: "A  sets M" by (simp add: Invariants_in_sets)

    have eq: "(x. indicator A x * real_cond_exp M Invariants f x M) = (x. indicator A x * f x M)"
    proof (rule real_cond_exp_intg[where ?f = "λx. (indicator A x)::real" and ?g = f])
      have "(λx. indicator A x * f x) = (λx. f x * indicator A x)" by auto
      then show "integrable M (λx. indicator A x * f x)"
        using integrable_real_mult_indicator[OF A  sets M assms] by simp
      show "indicator A  borel_measurable Invariants" using A  sets Invariants by measurable
    qed (simp)

    have "0  (x. g x * indicator A x M)" using Ag by simp
    also have "... = (x. f x * indicator A x - real_cond_exp M Invariants f x * indicator A x - ε * indicator A x M)"
      unfolding g_def by (simp add: left_diff_distrib)
    also have "... = (x. f x * indicator A x M) - (x. real_cond_exp M Invariants f x * indicator A x M) - (x. ε * indicator A x M)"
      using assms real_cond_exp_int(1)[OF assms] integrable_real_mult_indicator[OF A  sets M]
      by (auto simp: simp del: integrable_mult_left_iff)
    also have "... = - (x. ε * indicator A x M)"
      by (auto simp add: eq mult.commute)
    also have "... = - ε * measure M A" by auto
    finally have "0  - ε * measure M A" by simp
    then have "measure M A = 0" using ε > 0 by (simp add: measure_le_0_iff mult_le_0_iff)
    then have "A  null_sets M" by (simp add: emeasure_eq_measure null_setsI)
    then have "AE x in M. x  space M - A" by (metis (no_types, lifting) AE_cong Diff_iff AE_not_in)
    moreover
    {
      fix x assume "x  space M - A"
      then have "limsup (λn. ereal(birkhoff_sum g n x)) < " unfolding A_def by auto
      then obtain C where C: "n. birkhoff_sum g n x  C" using limsup_finite_then_bounded by presburger
      {
        fix n::nat assume "n > 0"
        have "birkhoff_sum g n x = birkhoff_sum f n x - birkhoff_sum (real_cond_exp M Invariants f) n x - birkhoff_sum (λx. ε) n x"
          unfolding g_def using birkhoff_sum_add birkhoff_sum_diff by auto
        moreover have "birkhoff_sum (real_cond_exp M Invariants f) n x = n * real_cond_exp M Invariants f x"
          using birkhoff_sum_of_invariants using x  space M - A by auto
        moreover have "birkhoff_sum (λx. ε) n x = n * ε" unfolding birkhoff_sum_def by auto
        ultimately have "birkhoff_sum g n x = birkhoff_sum f n x - n * real_cond_exp M Invariants f x - n * ε"
          by simp
        then have "birkhoff_sum f n x = birkhoff_sum g n x + n * real_cond_exp M Invariants f x + n * ε"
          by simp
        then have "birkhoff_sum f n x / n = birkhoff_sum g n x / n + real_cond_exp M Invariants f x + ε"
          using n > 0 by (simp add: field_simps)
        then have "birkhoff_sum f n x / n  C/n + real_cond_exp M Invariants f x + ε"
          using C[of n] n > 0 by (simp add: divide_right_mono)
        then have "ereal(birkhoff_sum f n x / n)  ereal(C/n + real_cond_exp M Invariants f x + ε)"
          by simp
      }
      then have "eventually (λn. ereal(birkhoff_sum f n x / n)  ereal(C/n + real_cond_exp M Invariants f x + ε)) sequentially"
        by (simp add: eventually_at_top_dense)
      then have b: "limsup (λn. ereal(birkhoff_sum f n x / n))  limsup (λn. ereal(C/n + real_cond_exp M Invariants f x + ε))"
        by (simp add: Limsup_mono)

      have "(λn. ereal(C*(1/real n) + real_cond_exp M Invariants f x + ε))  ereal(C * 0 + real_cond_exp M Invariants f x + ε)"
        by (intro tendsto_intros)
      then have "limsup (λn. ereal(C/real n + real_cond_exp M Invariants f x + ε)) = real_cond_exp M Invariants f x + ε"
        using sequentially_bot tendsto_iff_Liminf_eq_Limsup by force
      then have "limsup (λn. ereal(birkhoff_sum f n x / n))  real_cond_exp M Invariants f x + ε"
        using b by simp
    }
    ultimately have "AE x in M. limsup (λn. ereal(birkhoff_sum f n x / n))  real_cond_exp M Invariants f x + ε"
      by auto
    then have "AE x in M. limsup (λn. ereal(birkhoff_sum f n x / n))  ereal(real_cond_exp M Invariants f x) + ε"
      by auto
  }
  then show ?thesis
    by (rule AE_upper_bound_inf_ereal)
qed

theorem birkhoff_theorem_AE_nonergodic:
  fixes f::"'a  real"
  assumes "integrable M f"
  shows "AE x in M. (λn. birkhoff_sum f n x / n)  real_cond_exp M Invariants f x"
proof -
  {
    fix x assume i: "limsup (λn. ereal(birkhoff_sum f n x /n))  real_cond_exp M Invariants f x"
             and ii: "limsup (λn. ereal(birkhoff_sum (λx. -f x) n x / n))  real_cond_exp M Invariants (λx. -f x) x"
             and iii: "real_cond_exp M Invariants (λx. -f x) x = - real_cond_exp M Invariants f x"
    have "n. birkhoff_sum (λx. -f x) n x = - birkhoff_sum f n x"
      using birkhoff_sum_cmult[where ?c = "-1" and ?f = f] by auto
    then have "n. ereal(birkhoff_sum (λx. -f x) n x / n) = - ereal(birkhoff_sum f n x / n)" by auto
    moreover have "limsup (λn. - ereal(birkhoff_sum f n x / n)) = - liminf (λn. ereal(birkhoff_sum f n x /n))"
      by (rule ereal_Limsup_uminus)
    ultimately have "-liminf (λn. ereal(birkhoff_sum f n x /n)) = limsup (λn. ereal(birkhoff_sum (λx. -f x) n x / n))"
      by simp
    then have "-liminf (λn. ereal(birkhoff_sum f n x /n))  - real_cond_exp M Invariants f x"
      using ii iii by simp
    then have "liminf (λn. ereal(birkhoff_sum f n x /n))  real_cond_exp M Invariants f x"
      by (simp add: ereal_uminus_le_reorder)
    then have "(λn. birkhoff_sum f n x /n)  real_cond_exp M Invariants f x"
      using i by (simp add: limsup_le_liminf_real)
  } note * = this
  moreover have "AE x in M. limsup (λn. ereal(birkhoff_sum f n x /n))  real_cond_exp M Invariants f x"
    using birkhoff_aux2 assms by simp
  moreover have "AE x in M. limsup (λn. ereal(birkhoff_sum (λx. -f x) n x / n))  real_cond_exp M Invariants (λx. -f x) x"
    using birkhoff_aux2 assms by simp
  moreover have "AE x in M. real_cond_exp M Invariants (λx. -f x) x = - real_cond_exp M Invariants f x"
    using real_cond_exp_cmult[where ?c = "-1"] assms by force
  ultimately show ?thesis by auto
qed

text ‹If a function $f$ is integrable, then $E(f\circ T - f | I) = E(f\circ T | I) - E(f|I) = 0$.
Hence, $S_n(f \circ T - f) / n$ converges almost everywhere to $0$, i.e., $f(T^n x)/n \to 0$.
It is remarkable (and sometimes useful) that this holds under the weaker condition that
$f\circ T - f$ is integrable (but not necessarily $f$), where this naive argument fails.

The reason is that the Birkhoff sum of $f \circ T - f$ is $f\circ T^n - f$. If $n$ is such that $x$
and $T^n(x)$ belong to a set where $f$ is bounded, it follows that this Birkhoff sum is also
bounded. Along such a sequence of times, $S_n(f\circ T - f)/n$ tends to $0$.
By Poincare recurrence theorem, there are such times for almost every points. As it also converges
to $E(f \circ T - f | I)$, it follows that this function is almost everywhere $0$. Then
$f (T^n x)/n = S_n(f\circ T^n - f)/n - f/n$ tends almost surely to $E(f\circ T-f |I) = 0$.
›

lemma limit_foTn_over_n:
  fixes f::"'a  real"
  assumes [measurable]: "f  borel_measurable M"
      and "integrable M (λx. f(T x) - f x)"
  shows "AE x in M. real_cond_exp M Invariants (λx. f(T x) - f x) x = 0"
        "AE x in M. (λn. f((T^^n) x) / n)  0"
proof -
  define E::"nat  'a set" where "E k = {x  space M. ¦f x¦  k}" for k
  have [measurable]: "E k  sets M" for k unfolding E_def by auto
  have *: "(k. E k) = space M" unfolding E_def by (auto simp add: real_arch_simple)
  define F::"nat  'a set" where "F k = recurrent_subset_infty (E k)" for k
  have [measurable]: "F k  sets M" for k unfolding F_def by auto
  have **: "E k - F k  null_sets M" for k unfolding F_def using Poincare_recurrence_thm by auto
  have "space M - (k. F k)  null_sets M"
    apply (rule null_sets_subset[of "(k. E k - F k)"]) unfolding *[symmetric] using ** by auto
  with AE_not_in[OF this] have "AE x in M. x  (k. F k)" by auto
  moreover have "AE x in M. (λn. birkhoff_sum (λx. f(T x) - f x) n x / n)
       real_cond_exp M Invariants (λx. f(T x) - f x) x"
    by (rule birkhoff_theorem_AE_nonergodic[OF assms(2)])
  moreover have "real_cond_exp M Invariants (λx. f(T x) - f x) x = 0  (λn. f((T^^n) x) / n)  0"
    if H: "(λn. birkhoff_sum (λx. f(T x) - f x) n x / n)  real_cond_exp M Invariants (λx. f(T x) - f x) x"
          "x  (k. F k)" for x
  proof -
    have "f((T^^n) x) = birkhoff_sum (λx. f(T x) - f x) n x + f x" for n
      unfolding birkhoff_sum_def by (induction n, auto)
    then have "f((T^^n) x) / n = birkhoff_sum (λx. f(T x) - f x) n x / n + f x * (1/n)" for n
      by (auto simp add: divide_simps)
    moreover have "(λn. birkhoff_sum (λx. f(T x) - f x) n x / n + f x * (1/n))  real_cond_exp M Invariants (λx. f(T x) - f x) x + f x * 0"
      by (intro tendsto_intros H(1))
    ultimately have lim: "(λn. f((T^^n) x) / n)  real_cond_exp M Invariants (λx. f(T x) - f x) x"
      by auto

    obtain k where "x  F k" using H(2) by auto
    then have "infinite {n. (T^^n) x  E k}"
      unfolding F_def recurrent_subset_infty_inf_returns by auto
    with infinite_enumerate[OF this] obtain r :: "nat  nat"
      where r: "strict_mono r" "n. r n  {n. (T^^n) x  E k}"
      by auto
    have A: "(λn. k * (1/r n))  real k * 0"
      apply (intro tendsto_intros)
      using LIMSEQ_subseq_LIMSEQ[OF lim_1_over_n strict_mono r] unfolding comp_def by auto
    have B: "¦f((T^^(r n)) x) / r n¦  k / (r n)" for n
      using r(2) unfolding E_def by (auto simp add: divide_simps)
    have "(λn. f((T^^(r n)) x) / r n)  0"
      apply (rule tendsto_rabs_zero_cancel, rule tendsto_sandwich[of "λn. 0" _ _ "λn. k * (1/r n)"])
      using A B by auto
    moreover have "(λn. f((T^^(r n)) x) / r n)  real_cond_exp M Invariants (λx. f(T x) - f x) x"
      using LIMSEQ_subseq_LIMSEQ[OF lim strict_mono r] unfolding comp_def by auto
    ultimately have *: "real_cond_exp M Invariants (λx. f(T x) - f x) x = 0"
      using LIMSEQ_unique by auto
    then have "(λn. f((T^^n) x) / n)  0" using lim by auto
    then show ?thesis using * by auto
  qed
  ultimately show "AE x in M. real_cond_exp M Invariants (λx. f(T x) - f x) x = 0"
                  "AE x in M. (λn. f((T^^n) x) / n)  0"
    by auto
qed

text ‹We specialize the previous statement to the case where $f$ itself is integrable.›

lemma limit_foTn_over_n':
  fixes f::"'a  real"
  assumes [measurable]: "integrable M f"
  shows "AE x in M. (λn. f((T^^n) x) / n)  0"
by (rule limit_foTn_over_n, simp, rule Bochner_Integration.integrable_diff)
   (auto intro: assms T_integral_preserving(1))

text ‹It is often useful to show that a function is cohomologous to a nicer function, i.e., to
prove that a given $f$ can be written as $f = g + u - u \circ T$ where $g$ is nicer than $f$. We
show below that any integrable function is cohomologous to a function which is arbitrarily close to
$E(f|I)$. This is an improved version of Lemma 2.1 in [Benoist-Quint, Annals of maths, 2011]. Note
that the function $g$ to which $f$ is cohomologous is very nice (and, in particular, integrable),
but the transfer function is only measurable in this argument. The fact that the control on
conditional expectation is nevertheless preserved throughout the argument follows from
Lemma~\verb+limit_foTn_over_n+ above.›

text ‹We start with the lemma (and the proof) of [BQ2011]. It shows that, if a function has a
conditional expectation with respect to invariants which is positive, then it is cohomologous to a
nonnegative function. The argument is the clever remark that $g = \max (0, \inf_n S_n f)$ and $u =
\min (0, \inf_n S_n f)$ work (where these expressions are well defined as $S_n f$ tends to infinity
thanks to our assumption).›

lemma cohomologous_approx_cond_exp_aux:
  fixes f::"'a  real"
  assumes [measurable]: "integrable M f"
      and "AE x in M. real_cond_exp M Invariants f x > 0"
    shows "u g. u  borel_measurable M  (integrable M g)  (AE x in M. g x  0  g x  max 0 (f x))  (x. f x = g x + u x - u (T x))"
proof -
  define h::"'a  real" where "h = (λx. (INF n{1..}. birkhoff_sum f n x))"
  define u where "u = (λx. min (h x) 0)"
  define g where "g = (λx. f x - u x + u (T x))"
  have [measurable]: "h  borel_measurable M" "u  borel_measurable M" "g  borel_measurable M"
    unfolding g_def h_def u_def by auto
  have "f x = g x + u x - u (T x)" for x unfolding g_def by auto
  {
    fix x assume H: "real_cond_exp M Invariants f x > 0"
                    "(λn. birkhoff_sum f n x / n)  real_cond_exp M Invariants f x"
    have "eventually (λn. ereal(birkhoff_sum f n x / n) * ereal n = ereal(birkhoff_sum f n x)) sequentially"
      unfolding eventually_sequentially by (rule exI[of _ 1], auto)
    moreover have "(λn. ereal(birkhoff_sum f n x / n) * ereal n)  ereal(real_cond_exp M Invariants f x) * "
      apply (intro tendsto_intros) using H by auto
    ultimately have "(λn. ereal(birkhoff_sum f n x))  ereal(real_cond_exp M Invariants f x) * "
      by (blast intro: Lim_transform_eventually)
    then have "(λn. ereal(birkhoff_sum f n x))  "
      using H by auto
    then have B: "C. n. C  birkhoff_sum f n x"
      by (intro liminf_finite_then_bounded_below, simp add: liminf_PInfty)

    have "h x  f x"
      unfolding h_def apply (rule cInf_lower) using B by force+

    have "{birkhoff_sum f n (T x) |n. n  {1..}} = {birkhoff_sum f (1+n) (x) - f x |n. n  {1..}}"
      unfolding birkhoff_sum_cocycle by auto
    also have "... = {birkhoff_sum f n x - f x |n. n  {2..}}"
      by (metis (no_types, opaque_lifting) Suc_1 Suc_eq_plus1_left Suc_le_D Suc_le_mono atLeast_iff)
    finally have *: "{birkhoff_sum f n (T x) |n. n  {1..}} = (λt. t - (f x))`{birkhoff_sum f n x |n. n  {2..}}"
      by auto

    have "h(T x) = Inf {birkhoff_sum f n (T x) |n. n  {1..}}"
      unfolding h_def by (metis Setcompr_eq_image)
    also have "... =  (t{birkhoff_sum f n x |n. n  {2..}}. t - f x)"
      by (simp only: *)
    also have "... = (λt. t - (f x)) (Inf {birkhoff_sum f n x |n. n  {2..}})"
      using B by (auto intro!: monoI bijI mono_bij_cInf [symmetric])
    finally have I: "Inf {birkhoff_sum f n x |n. n  {2..}} = f x + h (T x)" by auto
    have "max 0 (h x) + u x = h x"
      unfolding u_def by auto
    also have "... = Inf {birkhoff_sum f n x |n. n  {1..}}"
      unfolding h_def by (metis Setcompr_eq_image)
    also have "... = Inf ({birkhoff_sum f n x |n. n  {1}}  {birkhoff_sum f n x |n. n  {2..}})"
      by (auto intro!: arg_cong[of _ _ Inf], metis One_nat_def Suc_1 antisym birkhoff_sum_1(2) not_less_eq_eq, force)
    also have "Inf ({birkhoff_sum f n x |n. n  {1}}  {birkhoff_sum f n x |n. n  {2..}})
      = min (Inf {birkhoff_sum f n x |n. n  {1}}) (Inf {birkhoff_sum f n x |n. n  {2..}})"
      unfolding inf_min[symmetric] apply (intro cInf_union_distrib) using B by auto
    also have "... = min (f x) (f x + h (T x))" using I by auto
    also have "... = f x + u (T x)" unfolding u_def by auto
    finally have "max 0 (h x) = f x + u (T x) - u x" by auto
    then have "g x = max 0 (h x)" unfolding g_def by auto
    then have "g x  0  g x  max 0 (f x)" using h x  f x by auto
  }
  then have *: "AE x in M. g x  0  g x  max 0 (f x)"
    using assms(2) birkhoff_theorem_AE_nonergodic[OF assms(1)] by auto
  moreover have "integrable M g"
    apply (rule Bochner_Integration.integrable_bound[of _ f]) using * by (auto simp add: assms)
  ultimately have "u  borel_measurable M  integrable M g  (AE x in M. 0  g x  g x  max 0 (f x))  (x. f x = g x + u x - u (T x))"
    using x. f x = g x + u x - u (T x) u  borel_measurable M by auto
  then show ?thesis by blast
qed

text ‹To deduce the stronger version that $f$ is cohomologous to an arbitrarily good approximation
of $E(f|I)$, we apply the previous lemma twice, to control successively the negative and the
positive side. The sign control in the conclusion of the previous lemma implies that the second step
does not spoil the first one.›

lemma cohomologous_approx_cond_exp:
  fixes f::"'a  real" and B::"'a  real"
  assumes [measurable]: "integrable M f" "B  borel_measurable M"
      and "AE x in M. B x > 0"
    shows "g u. u  borel_measurable M
               integrable M g
               (x. f x = g x + u x - u (T x))
               (AE x in M. abs(g x - real_cond_exp M Invariants f x)  B x)"
proof -
  define C where "C = (λx. min (B x) 1)"
  have [measurable]: "integrable M C"
    apply (rule Bochner_Integration.integrable_bound[of _ "λ_. (1::real)"], auto)
    unfolding C_def using assms(3) by auto
  have "C x  B x" for x unfolding C_def by auto
  have "AE x in M. C x > 0" unfolding C_def using assms(3) by auto
  have AECI: "AE x in M. real_cond_exp M Invariants C x > 0"
    by (intro real_cond_exp_gr_c integrable M C AE x in M. C x > 0)

  define f1 where "f1 = (λx. f x - real_cond_exp M Invariants f x)"
  have "integrable M f1"
    unfolding f1_def by (intro Bochner_Integration.integrable_diff integrable M f real_cond_exp_int(1))
  have "AE x in M. real_cond_exp M Invariants f1 x = real_cond_exp M Invariants f x - real_cond_exp M Invariants (real_cond_exp M Invariants f) x"
    unfolding f1_def apply (rule real_cond_exp_diff) by (intro Bochner_Integration.integrable_diff
                integrable M f integrable M C real_cond_exp_int(1))+
  moreover have "AE x in M. real_cond_exp M Invariants (real_cond_exp M Invariants f) x = real_cond_exp M Invariants f x"
    by (intro real_cond_exp_nested_subalg subalg integrable M f, auto)
  ultimately have AEf1: "AE x in M. real_cond_exp M Invariants f1 x = 0" by auto

  have A [measurable]: "integrable M (λx. f1 x + C x)"
    by (intro Bochner_Integration.integrable_add integrable M f1 integrable M C)
  have "AE x in M. real_cond_exp M Invariants (λx. f1 x + C x) x = real_cond_exp M Invariants f1 x + real_cond_exp M Invariants C x"
    by (intro real_cond_exp_add integrable M f1 integrable M C)
  then have B: "AE x in M. real_cond_exp M Invariants (λx. f1 x + C x) x > 0"
    using AECI AEf1 by auto

  obtain u2 g2 where H2: "u2  borel_measurable M" "integrable M g2" "AE x in M. g2 x  0  g2 x  max 0 (f1 x + C x)" "x. f1 x + C x = g2 x + u2 x - u2 (T x)"
    using cohomologous_approx_cond_exp_aux[OF A B] by blast

  define f2 where "f2 = (λx. (g2 x - C x))"
  have *: "u2(T x) - u2 x = f2 x -f1 x" for x unfolding f2_def using H2(4)[of x] by auto
  have "AE x in M. f2 x  - C x" using H2(3) unfolding f2_def by auto
  have "integrable M f2"
    unfolding f2_def by (intro Bochner_Integration.integrable_diff integrable M g2 integrable M C)
  have "AE x in M. real_cond_exp M Invariants (λx. u2(T x) - u2 x) x = 0"
  proof (rule limit_foTn_over_n)
    show "integrable M (λx. u2(T x) - u2 x)"
      unfolding * by (intro Bochner_Integration.integrable_diff integrable M f1 integrable M f2)
  qed (simp add: u2  borel_measurable M)
  then have "AE x in M. real_cond_exp M Invariants (λx. f2 x - f1 x) x = 0"
    unfolding * by simp
  moreover have "AE x in M. real_cond_exp M Invariants (λx. f2 x - f1 x) x = real_cond_exp M Invariants f2 x - real_cond_exp M Invariants f1 x"
    by (intro real_cond_exp_diff integrable M f2 integrable M f1)
  ultimately have AEf2: "AE x in M. real_cond_exp M Invariants f2 x = 0"
    using AEf1 by auto

  have A [measurable]: "integrable M (λx. C x - f2 x)"
    by (intro Bochner_Integration.integrable_diff integrable M f2 integrable M C)
  have "AE x in M. real_cond_exp M Invariants (λx. C x - f2 x) x = real_cond_exp M Invariants C x - real_cond_exp M Invariants f2 x"
    by (intro real_cond_exp_diff integrable M f2 integrable M C)
  then have B: "AE x in M. real_cond_exp M Invariants (λx. C x - f2 x) x > 0"
    using AECI AEf2 by auto

  obtain u3 g3 where H3: "u3  borel_measurable M" "integrable M g3" "AE x in M. g3 x  0  g3 x  max 0 (C x - f2 x)" "x. C x - f2 x = g3 x + u3 x - u3 (T x)"
    using cohomologous_approx_cond_exp_aux[OF A B] by blast

  define f3 where "f3 = (λx. C x - g3 x)"
  have "AE x in M. f3 x  min (C x) (f2 x)" unfolding f3_def using H3(3) by auto
  then have "AE x in M. f3 x  -C x" using AE x in M. f2 x  - C x AE x in M. C x > 0 by auto
  moreover have "AE x in M. f3 x  C x" unfolding f3_def using H3(3) by auto
  ultimately have "AE x in M. abs(f3 x)  C x" by auto
  then have *: "AE x in M. abs(f3 x)  B x" using order_trans[OF _ x. C x  B x] by auto

  define g where "g = (λx. f3 x + real_cond_exp M Invariants f x)"
  define u where "u = (λx. u2 x - u3 x)"
  have "AE x in M. abs (g x - real_cond_exp M Invariants f x)  B x"
    unfolding g_def using * by auto
  moreover have "f x = g x + u x - u(T x)" for x
    using H3(4)[of x] H2(4)[of x] unfolding u_def g_def f3_def f2_def f1_def by auto
  moreover have "u  borel_measurable M"
    unfolding u_def using u2  borel_measurable M u3  borel_measurable M by auto
  moreover have "integrable M g"
    unfolding g_def f3_def by (intro Bochner_Integration.integrable_add Bochner_Integration.integrable_diff
    integrable M C integrable M g3 integrable M f real_cond_exp_int(1))
  ultimately show ?thesis by auto
qed


subsubsection ‹$L^1$ version of Birkhoff theorem›

text ‹The $L^1$ convergence in Birkhoff theorem follows from the almost everywhere convergence and
general considerations on $L^1$ convergence (Scheffe's lemma) explained
in \verb+AE_and_int_bound_implies_L1_conv2+.
This argument works neatly for nonnegative functions, the general case reduces to this one by taking
the positive and negative parts of a given function.

One could also prove it by truncation: for bounded functions, the $L^1$ convergence follows
from the boundedness and almost sure convergence. The general case follows by density, but it
is a little bit tedious to write as one need to make sure that the conditional expectation
of the truncation converges to the conditional expectation of the original function. This is true
in $L^1$ as the conditional expectation is a contraction in $L^1$, it follows almost everywhere
after taking a subsequence. All in all, the argument based on Scheffe's lemma seems more
economical.›

lemma birkhoff_lemma_L1:
  fixes f::"'a  real"
  assumes "x. f x  0"
      and [measurable]: "integrable M f"
  shows "(λn. +x. norm(birkhoff_sum f n x / n - real_cond_exp M Invariants f x) M)  0"
proof (rule Scheffe_lemma2)
  show i: "integrable M (real_cond_exp M Invariants f)" using assms by (simp add: real_cond_exp_int(1))
  show "AE x in M. (λn. birkhoff_sum f n x / real n)  real_cond_exp M Invariants f x"
    using birkhoff_theorem_AE_nonergodic assms by simp
  fix n
  have [measurable]: "(λx. ennreal ¦birkhoff_sum f n x¦)  borel_measurable M" by measurable
  show [measurable]: "(λx. birkhoff_sum f n x / real n)  borel_measurable M" by measurable

  have "AE x in M. real_cond_exp M Invariants f x  0" using assms(1) real_cond_exp_pos by simp
  then have *: "AE x in M. norm (real_cond_exp M Invariants f x) = real_cond_exp M Invariants f x" by auto
  have **: "( x. norm (real_cond_exp M Invariants f x) M) = ( x. real_cond_exp M Invariants f x M)"
    apply (rule integral_cong_AE) using * by auto

  have "(+x. ennreal (norm (real_cond_exp M Invariants f x)) M) = ( x. norm (real_cond_exp M Invariants f x) M)"
    by (rule nn_integral_eq_integral) (auto simp add: i)
  also have "... = ( x. real_cond_exp M Invariants f x M)"
    using ** by simp
  also have "... = ( x. f x M)"
    using real_cond_exp_int(2) assms(2) by auto
  also have "... = (x. norm(f x) M)" using assms by auto
  also have "... = (+x. norm(f x) M)"
    by (rule nn_integral_eq_integral[symmetric], auto simp add: assms(2))
  finally have eq: "(+ x. norm (real_cond_exp M Invariants f x) M) = (+ x. norm(f x) M)" by simp

  {
    fix x
    have "norm(birkhoff_sum f n x)  birkhoff_sum (λx. norm(f x)) n x"
      using birkhoff_sum_abs by fastforce
    then have "norm(birkhoff_sum f n x)  birkhoff_sum (λx. ennreal(norm(f x))) n x"
      unfolding birkhoff_sum_def by auto
  }
  then have "(+x. norm(birkhoff_sum f n x) M)  (+x. birkhoff_sum (λx. ennreal(norm(f x))) n x M)"
    by (simp add: nn_integral_mono)
  also have "... = n * (+x. norm(f x) M)"
    by (rule birkhoff_sum_nn_integral, auto)
  also have "... = n * (+ x. norm (real_cond_exp M Invariants f x) M)"
    using eq by simp
  finally have *: "(+x. norm(birkhoff_sum f n x) M)  n * (+ x. norm (real_cond_exp M Invariants f x) M)"
    by simp

  show "(+ x. ennreal (norm (birkhoff_sum f n x / real n)) M)  (+ x. norm (real_cond_exp M Invariants f x) M)"
  proof (cases)
    assume "n = 0"
    then show ?thesis by auto
  next
    assume "¬(n = 0)"
    then have "n > 0" by simp
    then have "1/ennreal(real n)  0" by simp
    have "(+ x. ennreal (norm (birkhoff_sum f n x / real n)) M) = (+ x. ennreal (norm (birkhoff_sum f n x)) / ennreal(real n) M)"
      using n > 0 by (auto simp: divide_ennreal)
    also have "... = (+ x. (1/ennreal(real n)) * ennreal (norm (birkhoff_sum f n x)) M)"
      by (simp add: 0 < n divide_ennreal_def mult.commute)
    also have "... = (1/ennreal(real n) * (+ x. ennreal (norm (birkhoff_sum f n x)) M))"
      by (subst nn_integral_cmult) auto
    also have "...  (1/ennreal(real n)) * (ennreal(real n) * (+ x. norm (real_cond_exp M Invariants f x) M))"
      using * by (intro mult_mono) (auto simp: ennreal_of_nat_eq_real_of_nat)
    also have "... = (+ x. norm (real_cond_exp M Invariants f x) M)"
      using n > 0
      by (auto simp del: ennreal_1 simp add: ennreal_1[symmetric] divide_ennreal ennreal_mult[symmetric] mult.assoc[symmetric])
        simp
    finally show ?thesis by simp
  qed
qed

theorem birkhoff_theorem_L1_nonergodic:
  fixes f::"'a  real"
  assumes [measurable]: "integrable M f"
  shows "(λn. +x. norm(birkhoff_sum f n x / n - real_cond_exp M Invariants f x) M)  0"
proof -
  define g where "g = (λx. max (f x) 0)"
  have g_int [measurable]: "integrable M g" unfolding g_def using assms by auto
  define h where "h = (λx. max (-f x) 0)"
  have h_int [measurable]: "integrable M h" unfolding h_def using assms by auto
  have "f = (λx. g x - h x)" unfolding g_def h_def by auto
  {
    fix n::nat assume "n > 0"
    have "x. birkhoff_sum f n x = birkhoff_sum g n x - birkhoff_sum h n x" using birkhoff_sum_diff f = (λx. g x - h x) by auto
    then have "x. birkhoff_sum f n x / n = birkhoff_sum g n x / n - birkhoff_sum h n x / n" using n > 0 by (simp add: diff_divide_distrib)
    moreover have "AE x in M. real_cond_exp M Invariants g x - real_cond_exp M Invariants h x = real_cond_exp M Invariants f x"
      using AE_symmetric[OF real_cond_exp_diff] g_int h_int f = (λx. g x - h x) by auto
    ultimately have "AE x in M. birkhoff_sum f n x / n - real_cond_exp M Invariants f x =
        (birkhoff_sum g n x / n - real_cond_exp M Invariants g x) - (birkhoff_sum h n x / n - real_cond_exp M Invariants h x)"
      by auto
    then have *: "AE x in M. norm(birkhoff_sum f n x / n - real_cond_exp M Invariants f x) 
      norm(birkhoff_sum g n x / n - real_cond_exp M Invariants g x) + norm(birkhoff_sum h n x / n - real_cond_exp M Invariants h x)"
      by auto
    have "(+ x. norm(birkhoff_sum f n x / n - real_cond_exp M Invariants f x) M) 
      (+ x. ennreal(norm(birkhoff_sum g n x / n - real_cond_exp M Invariants g x)) + norm(birkhoff_sum h n x / n - real_cond_exp M Invariants h x) M)"
      apply (rule nn_integral_mono_AE) using * by (simp add: ennreal_plus[symmetric] del: ennreal_plus)
    also have "... = (+ x. norm(birkhoff_sum g n x / n - real_cond_exp M Invariants g x) M) + (+ x. norm(birkhoff_sum h n x / n - real_cond_exp M Invariants h x) M)"
      apply (rule nn_integral_add) apply auto using real_cond_exp_F_meas borel_measurable_cond_exp2 by measurable
    finally have "(+ x. norm(birkhoff_sum f n x / n - real_cond_exp M Invariants f x) M) 
      (+ x. norm(birkhoff_sum g n x / n - real_cond_exp M Invariants g x) M) + (+ x. norm(birkhoff_sum h n x / n - real_cond_exp M Invariants h x) M)"
      by simp
  }
  then have *: "eventually (λn. (+ x. norm(birkhoff_sum f n x / n - real_cond_exp M Invariants f x) M) 
      (+ x. norm(birkhoff_sum g n x / n - real_cond_exp M Invariants g x) M) + (+ x. norm(birkhoff_sum h n x / n - real_cond_exp M Invariants h x) M))
      sequentially"
    using eventually_at_top_dense by auto
  have **: "eventually (λn. (+ x. norm(birkhoff_sum f n x / n - real_cond_exp M Invariants f x) M)  0) sequentially"
    by simp

  have "(λn. (+ x. norm(birkhoff_sum g n x / n - real_cond_exp M Invariants g x) M))  0"
    apply (rule birkhoff_lemma_L1, auto simp add: g_int) unfolding g_def by auto
  moreover have "(λn. (+ x. norm(birkhoff_sum h n x / n - real_cond_exp M Invariants h x) M))  0"
    apply (rule birkhoff_lemma_L1, auto simp add: h_int) unfolding h_def by auto
  ultimately have "(λn. (+ x. norm(birkhoff_sum g n x / n - real_cond_exp M Invariants g x) M) + (+ x. norm(birkhoff_sum h n x / n - real_cond_exp M Invariants h x) M))  0"
    using tendsto_add[of _ 0 _ _ 0] by auto
  then show ?thesis
    using tendsto_sandwich[OF ** *] by auto
qed


subsubsection ‹Conservativity of skew products›

text ‹The behaviour of skew-products of the form $(x, y) \mapsto (Tx, y + f x)$ is directly related
to Birkhoff theorem, as the iterates involve the Birkhoff sums in the fiber. Birkhoff theorem
implies that such a skew product is conservative when the function $f$ has vanishing conditional
expectation.

To prove the theorem, assume by contradiction that a set $A$ with positive measure does not
intersect its preimages. Replacing $A$ with a smaller set $C$, we can assume that $C$ is bounded in
the $y$-direction, by a constant $N$, and also that all its nonempty vertical fibers, above the
projection $Cx$, have a measure bounded from below. Then, by Birkhoff theorem, for any $r>0$, most
of the first $n$ preimages of $C$ are contained in the set $\{|y| \leq r n+N\}$, of measure $O(r
n)$. Hence, they can not be disjoint if $r < \mu(C)$. To make this argument rigorous, one should
only consider the preimages whose $x$-component belongs to a set $Dx$ where the Birkhoff sums are
small. This condition has a positive measure if $\mu(Cx) + \mu(Dx) > \mu(M)$, which one can ensure
by taking $Dx$ large enough.›

theorem (in fmpt) skew_product_conservative:
  fixes f::"'a  real"
  assumes [measurable]: "integrable M f"
    and "AE x in M. real_cond_exp M Invariants f x = 0"
  shows "conservative_mpt (M M lborel) (λ(x,y). (T x, y + f x))"
proof (rule conservative_mptI)
  let ?TS = "(λ(x,y). (T x, y + f x))"
  let ?MS = "M M (lborel::real measure)"

  have f_meas [measurable]: "f  borel_measurable M" by auto
  have "mpt M T" by (simp add: mpt_axioms)
  with mpt_skew_product_real[OF this f_meas] show "mpt ?MS ?TS" by simp
  then interpret TS: mpt ?MS ?TS by auto

  fix A::"('a × real) set"
  assume A1 [measurable]: "A  sets ?MS" and A2: "emeasure ?MS A > 0"
  have "A = (N::nat. A  {(x,y). abs(y)  N})" by (auto simp add: real_arch_simple)
  then have *: "emeasure ?MS (N::nat. A  {(x,y). abs(y)  N}) > 0"
    using A2 by simp

  have "space ?MS = space M × space (lborel::real measure)" using space_pair_measure by auto
  then have A_inc: "A  space M × space (lborel::real measure)" using sets.sets_into_space[OF A1] by auto

  {
    fix N::nat
    have "{(x, y). abs(y)  real N  x  space M} = space M × {-(real N)..(real N)}" by auto
    then have "{(x, y). ¦y¦  real N  x  space M}  sets ?MS" by auto
    then have "A  {(x, y). ¦y¦  real N  x  space M}  sets ?MS" using A1 by auto
    moreover have "A  {(x,y). abs(y)  real N} = A  {(x, y). ¦y¦  real N  x  space M}"
      using A_inc by blast
    ultimately have "A  {(x,y). abs(y)  real N}  sets ?MS" by auto
  }
  then have [measurable]: "N::nat. A  {(x, y). ¦y¦  real N}  sets (M M borel)" by auto

  have "N::nat. emeasure ?MS (A  {(x,y). abs(y)  N}) > 0"
    apply (rule emeasure_pos_unionE) using * by auto
  then obtain N::nat where N: "emeasure ?MS (A  {(x,y). abs(y)  N}) > 0"
    by auto

  define B where "B = A  {(x,y). abs(y)  N}"
  have B_meas [measurable]: "B  sets (M M lborel)" unfolding B_def by auto
  have "0 < emeasure (M M lborel) B" unfolding B_def using N by auto
  also have "... = (+x. emeasure lborel (Pair x -` B) M)"
    apply (rule sigma_finite_measure.emeasure_pair_measure_alt)
    using B_meas by (auto simp add: lborel.sigma_finite_measure_axioms)
  finally have *: "(+x. emeasure lborel (Pair x -` B) M) > 0" by simp

  have "Cxsets M. e::real>0. emeasure M Cx > 0  (x  Cx. emeasure lborel (Pair x -` B)  e)"
    by (rule not_AE_zero_int_ennreal_E, auto simp add: *)
  then obtain Cx e where [measurable]: "Cx  sets M" and Cxe: "e>(0::real)" "emeasure M Cx > 0" "x. x  Cx  emeasure lborel (Pair x -` B)  e"
    by blast
  define C where "C = B  (Cx × (UNIV::real set))"
  have C_meas [measurable]: "C  sets (M M lborel)" unfolding C_def using B_meas by auto
  have Cx_fibers: "x. x  Cx  emeasure lborel (Pair x -` C)  e" using Cxe(3) C_def by auto

  define c where "c = (measure M Cx)/2"
  have "c > 0" unfolding c_def using Cxe(2) by (simp add: emeasure_eq_measure)

  text ‹We will apply Birkhoff theorem to show that most preimages of $C$ at time $n$ are contained in a cylinder
  of height roughly $r n$, for some suitably small $r$. How small $r$ should be to get a
  contradiction can be determined at the end of the proof. It turns out that the good condition
  is the following one -- this is by no means obvious now.›

  define r where "r = (if measure M (space M) = 0 then 1 else e * c / (4 * measure M (space M)))"
  have "r > 0" using e > 0 c > 0 unfolding r_def
    apply auto using measure_le_0_iff by fastforce
  have pos: "e*c-2*r*measure M (space M) > 0" using e > 0 c > 0 unfolding r_def by auto

  define Bgood where "Bgood = {x  space M. (λn. birkhoff_sum f n x / n)  0}"
  have [measurable]: "Bgood  sets M" unfolding Bgood_def by auto
  have *: "AE x in M. x  Bgood" unfolding Bgood_def using birkhoff_theorem_AE_nonergodic[OF assms(1)] assms(2) by auto
  then have "emeasure M Bgood = emeasure M (space M)"
    by (intro emeasure_eq_AE) auto

  {
    fix x assume "x  Bgood"
    then have "x  space M" unfolding Bgood_def by auto
    have "(λn. birkhoff_sum f n x / n)  0" using x  Bgood unfolding Bgood_def by auto
    moreover have "0  {-r<..<r}" "open {-r<..<r}" using r>0 by auto
    ultimately have "eventually (λn. birkhoff_sum f n x / n  {-r<..<r}) sequentially"
      using topological_tendstoD by blast
    then obtain n0 where n0: "n0>0" "n. n  n0  birkhoff_sum f n x / n  {-r<..<r}"
      using eventually_sequentially by (metis (mono_tags, lifting) le0 le_simps(3) neq0_conv)
    {
      fix n assume "n  n0"
      then have "n>0" using n0>0 by auto
      with n0(2)[OF n  n0] have "abs(birkhoff_sum f n x / n)  r" by auto
      then have "abs(birkhoff_sum f n x)  r * n" using n>0 by (simp add: divide_le_eq)
    }
    then have "x  (n0. {x  space M. n{n0..}. abs(birkhoff_sum f n x)  r * n})" using x  space M by blast
  }
  then have "AE x in M. x  space M - (n0. {x  space M. n{n0..}. abs(birkhoff_sum f n x)  r * n})"
    using * by auto
  then have eqM: "emeasure M (n0. {x  space M. n{n0..}. abs(birkhoff_sum f n x)  r * n}) = emeasure M (space M)"
    by (intro emeasure_eq_AE) auto

  have "(λn0. emeasure M {x  space M. n{n0..}. abs(birkhoff_sum f n x)  r * n} + c)
           emeasure M (n0. {x  space M. n{n0..}. abs(birkhoff_sum f n x)  r * n}) + c"
    by (intro tendsto_intros Lim_emeasure_incseq) (auto simp add: incseq_def)
  moreover have "emeasure M (n0. {x  space M. n{n0..}. abs(birkhoff_sum f n x)  r * n}) + c > emeasure M (space M)"
    using eqM c > 0 emeasure_eq_measure by auto
  ultimately have "eventually (λn0. emeasure M {x  space M. n{n0..}. abs(birkhoff_sum f n x)  r * n} + c > emeasure M (space M)) sequentially"
    unfolding order_tendsto_iff by auto
  then obtain n0 where n0: "emeasure M {x  space M. n{n0..}. abs(birkhoff_sum f n x)  r * n} + c > emeasure M (space M)"
    using eventually_sequentially by auto

  define Dx where "Dx = {x  space M. n{n0..}. abs(birkhoff_sum f n x)  r * n}"
  have Dx_meas [measurable]: "Dx  sets M" unfolding Dx_def by auto
  have "emeasure M Dx + c  emeasure M (space M)" using n0 Dx_def by auto

  obtain n1::nat where n1: "n1 > max n0 ((measure M (space M) * 2 * N + e*c*n0 - e*c) / (e*c-2*r*measure M (space M)))"
    by (metis mult.commute mult.left_neutral numeral_One reals_Archimedean3 zero_less_numeral)
  then have "n1 > n0" by auto
  have n1_ineq: "n1 * (e*c-2*r*measure M (space M)) > (measure M (space M) * 2 * N + e*c*n0 - e*c)"
    using n1 pos by (simp add: pos_divide_less_eq)

  define D where "D = (λn. Dx × {-r*n1-N..r*n1+N}  (?TS^^n)-`C)"
  have Dn_meas [measurable]: "D n  sets (M M lborel)" for n
    unfolding D_def apply (rule TS.T_intersec_meas(2)) using C_meas by auto

  have "emeasure ?MS (D n)  e * c" if "n  {n0..n1}" for n
  proof -
    have "n  n0" "n  n1" using that by auto
    {
      fix x assume [simp]: "x  space M"

      define F where "F = {y  {-r*n1-N..r*n1+N}. y + birkhoff_sum f n x  Pair ((T^^n)x) -`C}"
      have [measurable]: "F  sets lborel" unfolding F_def by measurable
      {
        fix y::real
        have "(?TS^^n)(x,y) = ((T^^n)x, y + birkhoff_sum f n x)"
          using skew_product_real_iterates by simp
        then have "(indicator C ((?TS^^n) (x,y))::ennreal) = indicator Cx ((T^^n)x) * indicator (Pair ((T^^n)x) -`C) (y + birkhoff_sum f n x)"
          using C_def by (simp add: indicator_def)
        moreover have "(indicator (D n) (x, y)::ennreal) = indicator Dx x * indicator {-r*n1-N..r*n1+N} y * indicator C ((?TS^^n) (x,y))"
          unfolding D_def by (simp add: indicator_def)
        ultimately have "(indicator (D n) (x, y)::ennreal) = indicator Dx x * indicator {-r*n1-N..r*n1+N} y
            * indicator Cx ((T^^n)x) * indicator (Pair ((T^^n)x) -`C) (y + birkhoff_sum f n x)"
          by (simp add: mult.assoc)
        then have "(indicator (D n) (x, y)::ennreal) = indicator (Dx  (T^^n)-`Cx) x * indicator F y"
          unfolding F_def by (simp add: indicator_def)
      }
      then have "(+y. indicator (D n) (x, y) lborel) = (+y. indicator (Dx  (T^^n)-`Cx) x * indicator F y lborel)"
        by auto
      also have "... = indicator (Dx  (T^^n)-`Cx) x * (+y. indicator F y lborel)"
        by (rule nn_integral_cmult, auto)
      also have "... = indicator (Dx  (T^^n)-`Cx) x * emeasure lborel F" using F  sets lborel by auto
      finally have A: "(+y. indicator (D n) (x, y) lborel) = indicator (Dx  (T^^n)-`Cx) x * emeasure lborel F"
        by simp

      have "(+y. indicator (D n) (x, y) lborel)  ennreal e * indicator (Dx  (T^^n)-`Cx) x"
      proof (cases)
        assume "indicator (Dx  (T^^n)-`Cx) x = (0::ennreal)"
        then show ?thesis by auto
      next
        assume "¬(indicator (Dx  (T^^n)-`Cx) x = (0::ennreal))"
        then have "x  Dx  (T^^n)-`Cx" by (simp add: indicator_eq_0_iff)
        then have "x  Dx" "(T^^n) x  Cx" by auto
        then have "abs(birkhoff_sum f n x)  r * n" using n  {n0..n1} Dx_def by auto
        then have *: "abs(birkhoff_sum f n x)  r * n1" using n  n1 r>0
          by (meson of_nat_le_iff order_trans mult_le_cancel_left_pos)

        have F_expr: "F = {-r*n1-N..r*n1+N}  (+)(birkhoff_sum f n x) -` (Pair ((T^^n)x) -`C)"
          unfolding F_def by (auto simp add: add.commute)
        have "(Pair ((T^^n)x) -`C)  {real_of_int (- int N)..real N}" unfolding C_def B_def by auto
        then have "((+)(birkhoff_sum f n x)) -` (Pair ((T^^n)x) -`C)  {-N-birkhoff_sum f n x..N-birkhoff_sum f n x}"
          by auto
        also have "...  {-r * n1 - N .. r * n1 + N}" using * by auto
        finally have "F = ((+)(birkhoff_sum f n x)) -` (Pair ((T^^n)x) -`C)" unfolding F_expr by auto

        then have "emeasure lborel F = emeasure lborel ((+)(birkhoff_sum f n x) -` (Pair ((T^^n)x) -`C))" by auto
        also have "... = emeasure lborel (((+)(birkhoff_sum f n x) -` (Pair ((T^^n)x) -`C))  space lborel)" by simp
        also have "... = emeasure (distr lborel borel ((+) (birkhoff_sum f n x))) (Pair ((T^^n)x) -`C)"
          apply (rule emeasure_distr[symmetric]) using C_meas by auto
        also have "... = emeasure lborel (Pair ((T^^n)x) -`C)" using lborel_distr_plus[of "birkhoff_sum f n x"] by simp
        also have "...  e" using Cx_fibers (T^^n) x  Cx by auto
        finally have "emeasure lborel F  e" by auto
        then show ?thesis using A by (simp add: indicator_def)
      qed
    }
    moreover have "emeasure ?MS (D n) = (+x. (+y. indicator (D n) (x, y) lborel) M)"
      using Dn_meas lborel.emeasure_pair_measure by blast
    ultimately have "emeasure ?MS (D n)  (+x. ennreal e * indicator (Dx  (T ^^ n) -` Cx) x M)"
      by (simp add: nn_integral_mono)
    also have "(+x. ennreal e * indicator (Dx  (T ^^ n) -` Cx) x M) = e * (+x. indicator (Dx  (T ^^ n) -` Cx) x M)"
      apply (rule nn_integral_cmult) using e>0 by auto
    also have "... = ennreal e * emeasure M (Dx  (T ^^ n) -` Cx)" by simp
    finally have *: "emeasure ?MS (D n)  ennreal e * emeasure M (Dx  (T ^^ n) -` Cx)" by auto

    have "c + emeasure M (space M)  emeasure M Dx + emeasure M Cx"
      using emeasure M Dx + c  emeasure M (space M) unfolding c_def
      by (auto simp: emeasure_eq_measure ennreal_plus[symmetric] simp del: ennreal_plus)
    also have "... = emeasure M Dx + emeasure M ((T^^n)--`Cx)"
      by (simp add: T_vrestr_same_emeasure(2))
    also have "... = emeasure M (Dx  ((T^^n)--`Cx)) + emeasure M (Dx  ((T^^n)--`Cx))"
      by (rule emeasure_Un_Int, auto)
    also have "...  emeasure M (space M) + emeasure M (Dx  ((T^^n)-`Cx))"
    proof -
      have "emeasure M (Dx  ((T^^n)--`Cx))  emeasure M (space M)"
        by (rule emeasure_mono, auto simp add: sets.sets_into_space)
      moreover have "Dx  ((T^^n)--`Cx) = Dx  ((T^^n)-`Cx)"
        by (simp add: vrestr_intersec_in_space)
      ultimately show ?thesis by (metis add.commute add_left_mono)
    qed
    finally have "emeasure M (Dx  ((T^^n)-`Cx))  c" by (simp add: emeasure_eq_measure)
    then have "ennreal e * emeasure M (Dx  ((T^^n)-`Cx))  ennreal e * c" using e > 0
      using mult_left_mono by fastforce
    with * show "emeasure ?MS (D n)  e * c"
      using 0<c 0<e by (auto simp: ennreal_mult[symmetric])
  qed

  have "¬(disjoint_family_on D {n0..n1})"
  proof
    assume D: "disjoint_family_on D {n0..n1}"
    have "emeasure lborel {-r*n1-N..r*n1+N} = (r * real n1 + real N) - (- r * real n1 - real N)"
      apply (rule emeasure_lborel_Icc) using r>0 by auto
    then have *: "emeasure lborel {-r*n1-N..r*n1+N} = ennreal(2 * r * real n1 + 2 * real N)"
      by (auto simp: ac_simps)

    have "ennreal(e * c) * (real n1 - real n0 + 1) = ennreal(e*c) * card {n0..n1}"
      using n1 > n0 by (auto simp: ennreal_of_nat_eq_real_of_nat Suc_diff_le ac_simps of_nat_diff)
    also have "... = (n{n0..n1}. ennreal(e*c))"
      by (simp add: ac_simps)
    also have "...  (n{n0..n1}. emeasure ?MS (D n))"
      using n. n  {n0..n1}  emeasure ?MS (D n)  e * c by (meson sum_mono)
    also have "... = emeasure ?MS (n{n0..n1}. D n)"
      apply (rule sum_emeasure) using Dn_meas by (auto simp add: D)
    also have "...  emeasure ?MS (space M × {-r*n1-N..r*n1+N})"
      apply (rule emeasure_mono) unfolding D_def using sets.sets_into_space[OF Dx_meas] by auto
    also have "... = emeasure M (space M) * emeasure lborel {-r*n1-N..r*n1+N}"
      by (rule sigma_finite_measure.emeasure_pair_measure_Times, auto simp add: lborel.sigma_finite_measure_axioms)
    also have "... = emeasure M (space M) * ennreal(2 * r * real n1 + 2 * real N)"
      using * by auto
    finally have "ennreal(e * c) * (real n1- real n0+1)  emeasure M (space M) * ennreal(2 * r * real n1 + 2 * real N)" by simp
    then have "e*c * (real n1- real n0 + 1)  measure M (space M) * (2 * r * real n1 + 2 * real N)"
      using 0<r 0<e 0<c n0 < n1 emeasure_eq_measure by (auto simp: ennreal_mult'[symmetric] simp del: ennreal_plus)
    then have "0  measure M (space M) * (2 * r * real n1 + 2 * real N) - e*c * (real n1- real n0 + 1)" by auto
    also have "... = (measure M (space M) * 2 * N + e*c*n0 - e*c) - n1 * (e*c-2*r*measure M (space M))"
      by algebra
    finally have "n1 * (e*c-2*r*measure M (space M))  measure M (space M) * 2 * N + e*c*n0 - e*c"
      by linarith
    then show False using n1_ineq by auto
  qed
  then obtain n m where nm: "n<m" "D m  D n  {}" unfolding disjoint_family_on_def by (metis inf_sup_aci(1) linorder_cases)
  define k where "k = m-n"
  then have "k>0" "D (n+k)  D n  {}" using nm by auto
  then have "((?TS^^(n+k))-`A)  ((?TS^^n)-`A)  {}" unfolding D_def C_def B_def by auto
  moreover have "((?TS^^(n+k))-`A)  ((?TS^^n)-`A) = (?TS^^n)-`(((?TS^^k)-`A)  A)"
    using funpow_add by (simp add: add.commute funpow_add set.compositionality)
  ultimately have "((?TS^^k)-`A)  A  {}" by auto
  then show "k>0. ((?TS^^k)-`A)  A  {}" using k>0 by auto
qed



subsubsection ‹Oscillations around the limit in Birkhoff theorem›

text ‹In this paragraph, we prove that, in Birkhoff theorem with vanishing limit, the Birkhoff sums
are infinitely many times arbitrarily close to $0$, both on the positive and the negative side.

In the ergodic case, this statement implies for instance that if the Birkhoff sums of an integrable
function tend to infinity almost everywhere, then the integral of the function can not vanish, it
has to be strictly positive (while Birkhoff theorem per se does not exclude the convergence to
infinity, at a rate slower than linear). This converts a qualitative information (convergence to
infinity at an unknown rate) to a quantitative information (linear convergence to infinity). This
result (sometimes known as Atkinson's Lemma) has been reinvented many times, for instance by Kesten
and by Guivarch. It plays an important role in the study of random products of matrices.

This is essentially a consequence of the conservativity of the corresponding skew-product, proved in
\verb+skew_product_conservative+. Indeed, this implies that, starting from a small set $X \times
[-e/2, e/2]$, the skew-product comes back infinitely often to itself, which implies that the
Birkhoff sums at these times are bounded by $e$.

To show that the Birkhoff sums come back to $[0,e]$ is a little bit more tricky. Argue by
contradiction, and induce on $A \times [0,e/2]$ where $A$ is the set of points where the Birkhoff
sums don't come back to $[0,e]$. Then the second coordinate decreases strictly when one iterates the
skew product, which is not compatible with conservativity.›

lemma birkhoff_sum_small_asymp_lemma:
  assumes [measurable]: "integrable M f"
    and "AE x in M. real_cond_exp M Invariants f x = 0" "e>(0::real)"
  shows "AE x in M. infinite {n. birkhoff_sum f n x  {0..e}}"
proof -
  have [measurable]: "f  borel_measurable M" by auto
  have [measurable]: "N. {x  space M. N. n{N..}. birkhoff_sum f n x  {0..e}}  sets M" by auto

  {
    fix N assume "N>(0::nat)"
    define Ax where "Ax = {x  space M. n{N..}. birkhoff_sum f n x  {0..e}}"
    then have [measurable]: "Ax  sets M" by auto
    define A where "A = Ax × {0..e/2}"
    then have A_meas [measurable]: "A  sets (M M lborel)" by auto

    define TN where "TN = T^^N"
    interpret TN: fmpt M TN
      unfolding TN_def using fmpt_power by auto
    define fN where "fN = birkhoff_sum f N"
    have "TN.birkhoff_sum fN n x = birkhoff_sum f (n*N) x" for n x
    proof (induction n)
      case 0
      then show ?case by auto
    next
      case (Suc n)
      have "TN.birkhoff_sum fN (Suc n) x = TN.birkhoff_sum fN n x + fN ((TN^^n) x)"
        using TN.birkhoff_sum_cocycle[of fN n 1] by auto
      also have "... = birkhoff_sum f (n*N) x + birkhoff_sum f N ((TN^^n) x)"
        using Suc.IH fN_def by auto
      also have "... = birkhoff_sum f (n*N+N) x" unfolding TN_def
        by (subst funpow_mult, subst mult.commute[of N n], rule birkhoff_sum_cocycle[of f "n*N" N x, symmetric])
      finally show ?case by (simp add: add.commute)
    qed
    then have not0e: "x n. x  Ax  n  0  TN.birkhoff_sum fN n x  {0..e}" unfolding Ax_def by auto

    let ?TS = "(λ(x,y). (T x, y + f x))"
    let ?MS = "M M (lborel::real measure)"
    interpret TS: conservative_mpt ?MS ?TS
      by (rule skew_product_conservative, auto simp add: assms)

    let ?TSN = "(λ(x,y). (TN x, y + fN x))"
    have *:"?TSN = ?TS^^N" unfolding TN_def fN_def using skew_product_real_iterates by auto
    interpret TSN: conservative_mpt ?MS ?TSN
      using * TS.conservative_mpt_power by auto

    define MA TA where "MA = restrict_space ?MS A" and "TA = TSN.induced_map A"
    interpret TA: conservative_mpt MA TA unfolding MA_def TA_def
      by (rule TSN.induced_map_conservative_mpt, measurable)
    have *: " x y. snd (TA (x,y)) = snd (x,y) + TN.birkhoff_sum fN (TSN.return_time_function A (x,y)) x"
      unfolding TA_def TSN.induced_map_def using TN.skew_product_real_iterates Pair_def by auto
    have [measurable]: "snd  borel_measurable ?MS" by auto
    then have [measurable]: "snd  borel_measurable MA" unfolding MA_def using measurable_restrict_space1 by blast

    have "AE z in MA. z  TSN.recurrent_subset A"
      unfolding MA_def using TSN.induced_map_recurrent_typical(1)[OF A_meas].
    moreover
    {
      fix z assume z: "z  TSN.recurrent_subset A"
      define x y where "x = fst z" and "y = snd z"
      then have "z = (x,y)" by simp
      have "z  A" using z "TSN.recurrent_subset_incl"(1) by auto
      then have "x  Ax" "y  {0..e/2}" unfolding A_def x_def y_def by auto
      define y2 where "y2 = y + TN.birkhoff_sum fN (TSN.return_time_function A (x,y)) x"
      have "y2 = snd (TA z)" unfolding y2_def using * z = (x, y) by force
      moreover have "TA z  A" unfolding TA_def using z  A TSN.induced_map_stabilizes_A by auto
      ultimately have "y2  {0..e/2}" unfolding A_def by auto

      have "TSN.return_time_function A (x,y)  0"
        using z = (x,y) z  TSN.recurrent_subset A TSN.return_time0[of A] by fast
      then have "TN.birkhoff_sum fN (TSN.return_time_function A (x,y)) x  {0..e}"
        using not0e[OF x  Ax] by auto
      moreover have "TN.birkhoff_sum fN (TSN.return_time_function A (x,y)) x  {-e..e}"
        using y  {0..e/2} y2  {0..e/2} y2_def by auto
      ultimately have "TN.birkhoff_sum fN (TSN.return_time_function A (x,y)) x  {-e..<0}"
        by auto
      then have "y2 < y" using y2_def by auto
      then have "snd(TA z) < snd z" unfolding y_def using y2 = snd (TA z) by auto
    }
    ultimately have a: "AE z in MA. snd(TA z) < snd z" by auto
    then have "AE z in MA. snd(TA z)  snd z" by auto
    then have "AE z in MA. snd(TA z) = snd z" using TA.AE_decreasing_then_invariant[of snd] by auto
    then have "AE z in MA. False" using a by auto
    then have "space MA  null_sets MA" by (simp add: AE_iff_null_sets)
    then have "emeasure MA A = 0" by (metis A_meas MA_def null_setsD1 space_restrict_space2)
    then have "emeasure ?MS A = 0" unfolding MA_def
      by (metis A_meas emeasure_restrict_space sets.sets_into_space sets.top space_restrict_space space_restrict_space2)
    moreover have "emeasure ?MS A = emeasure M Ax * emeasure lborel {0..e/2}"
      unfolding A_def by (intro lborel.emeasure_pair_measure_Times) auto
    ultimately have "emeasure M {x  space M. n{N..}. birkhoff_sum f n x  {0..e}} = 0" using e>0 Ax_def by simp
    then have "{x  space M. n{N..}. birkhoff_sum f n x  {0..e}}  null_sets M" by auto
  }
  then have "(N{0<..}. {x  space M. n{N..}. birkhoff_sum f n x  {0..e}})  null_sets M" by (auto simp: greaterThan_0)
  moreover have "{x  space M. ¬(infinite {n. birkhoff_sum f n x  {0..e}})}  (N{0<..}. {x  space M. n{N..}. birkhoff_sum f n x  {0..e}})"
  proof
    fix x assume H: "x  {x  space M. ¬(infinite {n. birkhoff_sum f n x  {0..e}})}"
    then have "x  space M" by auto
    have *: "finite {n. birkhoff_sum f n x  {0..e}}" using H by auto
    then obtain N where "n. n  N  n  {n. birkhoff_sum f n x  {0..e}}"
      by (metis finite_nat_set_iff_bounded not_less)
    then have "x  {x  space M. n{N+1..}. birkhoff_sum f n x  {0..e}}" using x  space M by auto
    moreover have "N+1>0" by auto
    ultimately show "x  (N{0<..}. {x  space M. n{N..}. birkhoff_sum f n x  {0..e}})" by auto
  qed
  ultimately show ?thesis unfolding eventually_ae_filter by auto
qed

theorem birkhoff_sum_small_asymp_pos_nonergodic:
  assumes [measurable]: "integrable M f" and "e > (0::real)"
  shows "AE x in M. infinite {n. birkhoff_sum f n x  {n * real_cond_exp M Invariants f x .. n * real_cond_exp M Invariants f x + e}}"
proof -
  define g where "g = (λx. f x - real_cond_exp M Invariants f x)"
  have g_meas [measurable]: "integrable M g" unfolding g_def using real_cond_exp_int(1)[OF assms(1)] assms(1) by auto
  have "AE x in M. real_cond_exp M Invariants (real_cond_exp M Invariants f) x = real_cond_exp M Invariants f x"
    by (rule real_cond_exp_F_meas, auto simp add: real_cond_exp_int(1)[OF assms(1)])
  then have *: "AE x in M. real_cond_exp M Invariants g x = 0"
    unfolding g_def using real_cond_exp_diff[OF assms(1) real_cond_exp_int(1)[OF assms(1)]] by auto
  have "AE x in M. infinite {n. birkhoff_sum g n x  {0..e}}"
    by (rule birkhoff_sum_small_asymp_lemma, auto simp add: e>0 * g_meas)
  moreover
  {
    fix x assume "x  space M" "infinite {n. birkhoff_sum g n x  {0..e}}"
    {
      fix n assume H: "birkhoff_sum g n x  {0..e}"
      have "birkhoff_sum g n x = birkhoff_sum f n x - birkhoff_sum (real_cond_exp M Invariants f) n x"
        unfolding g_def using birkhoff_sum_diff by auto
      also have "... = birkhoff_sum f n x - n * real_cond_exp M Invariants f x"
        using birkhoff_sum_of_invariants x  space M by auto
      finally have "birkhoff_sum f n x  {n * real_cond_exp M Invariants f x .. n * real_cond_exp M Invariants f x + e}" using H by simp
    }
    then have "{n. birkhoff_sum g n x  {0..e}}  {n. birkhoff_sum f n x  {n * real_cond_exp M Invariants f x .. n * real_cond_exp M Invariants f x + e}}"
      by auto
    then have "infinite {n. birkhoff_sum f n x  {n * real_cond_exp M Invariants f x .. n * real_cond_exp M Invariants f x + e}}"
      using infinite {n. birkhoff_sum g n x  {0..e}} finite_subset by blast
  }
  ultimately show ?thesis by auto
qed

theorem birkhoff_sum_small_asymp_neg_nonergodic:
  assumes [measurable]: "integrable M f" and "e > (0::real)"
  shows "AE x in M. infinite {n. birkhoff_sum f n x  {n * real_cond_exp M Invariants f x - e .. n * real_cond_exp M Invariants f x}}"
proof -
  define g where "g = (λx. real_cond_exp M Invariants f x - f x)"
  have g_meas [measurable]: "integrable M g" unfolding g_def using real_cond_exp_int(1)[OF assms(1)] assms(1) by auto
  have "AE x in M. real_cond_exp M Invariants (real_cond_exp M Invariants f) x = real_cond_exp M Invariants f x"
    by (rule real_cond_exp_F_meas, auto simp add: real_cond_exp_int(1)[OF assms(1)])
  then have *: "AE x in M. real_cond_exp M Invariants g x = 0"
    unfolding g_def using real_cond_exp_diff[OF real_cond_exp_int(1)[OF assms(1)] assms(1)] by auto
  have "AE x in M. infinite {n. birkhoff_sum g n x  {0..e}}"
    by (rule birkhoff_sum_small_asymp_lemma, auto simp add: e>0 * g_meas)
  moreover
  {
    fix x assume "x  space M" "infinite {n. birkhoff_sum g n x  {0..e}}"
    {
      fix n assume H: "birkhoff_sum g n x  {0..e}"
      have "birkhoff_sum g n x = birkhoff_sum (real_cond_exp M Invariants f) n x - birkhoff_sum f n x"
        unfolding g_def using birkhoff_sum_diff by auto
      also have "... = n * real_cond_exp M Invariants f x - birkhoff_sum f n x"
        using birkhoff_sum_of_invariants x  space M by auto
      finally have "birkhoff_sum f n x  {n * real_cond_exp M Invariants f x - e .. n * real_cond_exp M Invariants f x}" using H by simp
    }
    then have "{n. birkhoff_sum g n x  {0..e}}  {n. birkhoff_sum f n x  {n * real_cond_exp M Invariants f x - e .. n * real_cond_exp M Invariants f x}}"
      by auto
    then have "infinite {n. birkhoff_sum f n x  {n * real_cond_exp M Invariants f x - e .. n * real_cond_exp M Invariants f x}}"
      using infinite {n. birkhoff_sum g n x  {0..e}} finite_subset by blast
  }
  ultimately show ?thesis by auto
qed


subsubsection ‹Conditional expectation for the induced map›

text ‹Thanks to Birkhoff theorem, one can relate conditional expectations with respect to the invariant
sigma algebra, for a map and for a corresponding induced map, as follows.›

proposition Invariants_cond_exp_induced_map:
  fixes f::"'a  real"
  assumes [measurable]: "A  sets M" "integrable M f"
  defines "MA  restrict_space M A" and "TA  induced_map A" and "fA  induced_function A f"
  shows "AE x in MA. real_cond_exp MA (qmpt.Invariants MA TA) fA x
      = real_cond_exp M Invariants f x * real_cond_exp MA (qmpt.Invariants MA TA) (return_time_function A) x"
proof -
  interpret A: fmpt MA TA unfolding MA_def TA_def by (rule induced_map_fmpt[OF assms(1)])

  have "integrable M fA" unfolding fA_def using induced_function_integral_nonergodic(1) assms by auto
  then have "integrable MA fA" unfolding MA_def
    by (metis assms(1) integrable_mult_indicator integrable_restrict_space sets.Int_space_eq2)
  then have a: "AE x in MA. (λn. A.birkhoff_sum fA n x / n)  real_cond_exp MA A.Invariants fA x"
    using A.birkhoff_theorem_AE_nonergodic by auto

  have "AE x in M. (λn. birkhoff_sum f n x / n)  real_cond_exp M Invariants f x"
    using birkhoff_theorem_AE_nonergodic assms(2) by auto
  then have b: "AE x in MA. (λn. birkhoff_sum f n x / n)  real_cond_exp M Invariants f x"
    unfolding MA_def by (metis (mono_tags, lifting) AE_restrict_space_iff assms(1) eventually_mono sets.Int_space_eq2)

  define phiA where "phiA = (λx. return_time_function A x)"
  have "integrable M phiA" unfolding phiA_def using return_time_integrable by auto
  then have "integrable MA phiA" unfolding MA_def
    by (metis assms(1) integrable_mult_indicator integrable_restrict_space sets.Int_space_eq2)
  then have c: "AE x in MA. (λn. A.birkhoff_sum (λx. real(phiA x)) n x / n)  real_cond_exp MA A.Invariants phiA x"
    using A.birkhoff_theorem_AE_nonergodic by auto

  have "A-recurrent_subset A  null_sets M" using Poincare_recurrence_thm(1)[OF assms(1)] by auto
  then have "A - recurrent_subset A  null_sets MA" unfolding MA_def
    by (metis Diff_subset assms(1) emeasure_restrict_space null_setsD1 null_setsD2 null_setsI sets.Int_space_eq2 sets_restrict_space_iff)
  then have "AE x in MA. x  recurrent_subset A"
    by (simp add: AE_iff_null MA_def null_setsD2 set_diff_eq space_restrict_space2)
  moreover have "x. x  recurrent_subset A  phiA x > 0" unfolding phiA_def using return_time0 by fastforce
  ultimately have *: "AE x in MA. phiA x > 0" by auto
  have d: "AE x in MA. real_cond_exp MA A.Invariants phiA x > 0"
    by (rule A.real_cond_exp_gr_c, auto simp add: * integrable MA phiA)

  {
    fix x
    assume A: "(λn. A.birkhoff_sum fA n x / n)  real_cond_exp MA A.Invariants fA x"
       and B: "(λn. birkhoff_sum f n x / n)  real_cond_exp M Invariants f x"
       and C: "(λn. A.birkhoff_sum (λx. real(phiA x)) n x / n)  real_cond_exp MA A.Invariants phiA x"
       and D: "real_cond_exp MA A.Invariants phiA x > 0"
    define R where "R = (λn. A.birkhoff_sum phiA n x)"

    have D2: "ereal(real_cond_exp MA A.Invariants phiA x) > 0" using D by simp
    have "n. real(R n)/ n = A.birkhoff_sum (λx. real(phiA x)) n x / n" unfolding R_def A.birkhoff_sum_def by auto
    moreover have "(λn. A.birkhoff_sum (λx. real(phiA x)) n x / n)  real_cond_exp MA A.Invariants phiA x" using C by auto
    ultimately have Rnn: "(λn. real(R n)/n)  real_cond_exp MA A.Invariants phiA x" by presburger

    have "n. ereal(real(R n))/ n = ereal(A.birkhoff_sum (λx. real(phiA x)) n x / n)" unfolding R_def A.birkhoff_sum_def by auto
    moreover have "(λn. ereal(A.birkhoff_sum (λx. real(phiA x)) n x / n))  real_cond_exp MA A.Invariants phiA x" using C by auto
    ultimately have i: "(λn. ereal(real(R n))/n)  real_cond_exp MA A.Invariants phiA x" by auto
    have ii: "(λn. real n)  " by (rule id_nat_ereal_tendsto_PInf)
    have iii: "(λn. ereal(real(R n))/n * real n)  " using tendsto_mult_ereal_PInf[OF i D2 ii] by simp
    have "n. n > 0  ereal(real(R n))/n * real n = R n" by auto
    then have "eventually (λn. ereal(real(R n))/n * real n = R n) sequentially" 
      using eventually_at_top_dense by blast
    then have "(λn. real(R n))  " using iii by (simp add: filterlim_cong)
    then have "(λn. birkhoff_sum f (R n) x / (R n))  real_cond_exp M Invariants f x" using limit_along_weak_subseq B by auto
    then have l: "(λn. (birkhoff_sum f (R n) x / (R n)) * ((R n)/n))  real_cond_exp M Invariants f x * real_cond_exp MA A.Invariants phiA x"
      by (rule tendsto_mult, simp add: Rnn)
    obtain N where N: "n. n > N  R n > 0" using (λn. real(R n))  
      by (metis (full_types) eventually_at_top_dense filterlim_iff filterlim_weak_subseq)
    then have "n. n > N  (birkhoff_sum f (R n) x / (R n)) * ((R n)/n) = birkhoff_sum f (R n) x / n"
      by auto
    then have "eventually (λn. (birkhoff_sum f (R n) x / (R n)) * ((R n)/n) = birkhoff_sum f (R n) x / n) sequentially"
      by simp
    with tendsto_cong[OF this] have main_limit: "(λn. birkhoff_sum f (R n) x / n)  real_cond_exp M Invariants f x * real_cond_exp MA A.Invariants phiA x"
      using l by auto
    have "n. birkhoff_sum f (R n) x = A.birkhoff_sum fA n x"
      unfolding R_def fA_def phiA_def TA_def using induced_function_birkhoff_sum[OF assms(1)] by simp
    then have "n. birkhoff_sum f (R n) x /n = A.birkhoff_sum fA n x / n" by simp
    then have "(λn. A.birkhoff_sum fA n x / n)  real_cond_exp M Invariants f x * real_cond_exp MA A.Invariants phiA x"
      using main_limit by presburger
    then have "real_cond_exp MA A.Invariants fA x = real_cond_exp M Invariants f x * real_cond_exp MA A.Invariants phiA x"
      using A LIMSEQ_unique by auto
  }
  then show ?thesis using a b c d unfolding phiA_def by auto
qed

corollary Invariants_cond_exp_induced_map_0:
  fixes f::"'a  real"
  assumes [measurable]: "A  sets M" "integrable M f" and "AE x in M. real_cond_exp M Invariants f x = 0"
  defines "MA  restrict_space M A" and "TA  induced_map A" and "fA  induced_function A f"
  shows "AE x in MA. real_cond_exp MA (qmpt.Invariants MA TA) fA x = 0"
proof -
  have "AE x in MA. real_cond_exp M Invariants f x = 0" unfolding MA_def
    apply (subst AE_restrict_space_iff) using assms(3) by auto
  then show ?thesis unfolding MA_def TA_def fA_def using Invariants_cond_exp_induced_map[OF assms(1) assms(2)]
    by auto
qed

end (*of locale fmpt*)
end (*of Invariants.thy*)