# Theory Normalizing_Sequences

theory Normalizing_Sequences
imports Transfer_Operator Asymptotic_Density
(*  Author:  Sébastien Gouëzel   sebastien.gouezel@univ-rennes1.fr
*)

section ‹Normalizing sequences›

theory Normalizing_Sequences
imports Transfer_Operator Asymptotic_Density
begin

text ‹In this file, we prove the main result in~\cite{gouezel_normalizing_sequences}: in a
conservative system, if a renormalized sequence $S_n f/B_n$ converges in distribution towards
a limit which is not a Dirac mass at $0$, then $B_n$ can not grow exponentially fast. We also prove
the easier result that, in a probability preserving system, normalizing sequences grow at most
polynomially.›

subsection ‹Measure of the preimages of disjoint sets.›

If $A_n$ are disjoint sets, and $P$ is a finite mass measure which is absolutely continuous
with respect to $M$, then $T^{-n}A_n$ is most often small: $P(T^{-n} A_n)$ tends to $0$
in Cesaro average. The proof is written in terms of densities and positive transfer operators,
so we first write it in ennreal.›

theorem (in conservative) disjoint_sets_emeasure_Cesaro_tendsto_zero:
fixes P::"'a measure" and A::"nat ⇒ 'a set"
assumes [measurable]: "⋀n. A n ∈ sets M"
and "disjoint_family A"
"absolutely_continuous M P" "sets P = sets M"
"emeasure P (space M) ≠ ∞"
shows "(λn. (∑i<n. emeasure P (space M ∩ (T^^i)-(A i)))/n) ⇢ 0"
proof (rule order_tendstoI)
fix delta::ennreal assume "delta > 0"

have "∃epsilon. epsilon ≠ 0 ∧ epsilon ≠ ∞ ∧ 4 * epsilon < delta"
apply (cases delta)
apply (rule exI[of _ "delta/5"]) using ‹delta>0› apply (auto simp add: ennreal_divide_eq_top_iff ennreal_divide_numeral numeral_mult_ennreal intro!: ennreal_lessI)
apply (rule exI[of _ 1]) by auto
then obtain epsilon where "epsilon ≠ 0" "epsilon ≠ ∞" "4 * epsilon < delta"
by auto
then have "epsilon > 0" using not_gr_zero by blast

define L::ennreal where "L = (1/epsilon) * (1+ emeasure P (space M))"
have "L ≠ ∞"
unfolding L_def using assms(5) divide_ennreal_def ennreal_mult_eq_top_iff ‹epsilon ≠ 0› by auto
have "L ≠ 0"
unfolding L_def using ‹epsilon ≠ ∞› by (simp add: ennreal_divide_eq_top_iff)
have "emeasure P (space M) ≤ epsilon * L" unfolding L_def
using ‹epsilon ≠ 0› ‹epsilon ≠ ∞› ‹emeasure P (space M) ≠ ∞›
apply (cases epsilon)
by simp
then have "emeasure P (space M) / L ≤ epsilon"
using ‹L ≠ 0› ‹L ≠ ∞› by (metis divide_le_posI_ennreal mult.commute not_gr_zero)
then have "c * (emeasure P (space M)/L) ≤ c * epsilon" for c by (rule mult_left_mono, simp)

text ‹We introduce the density of $P$.›
define f where "f = RN_deriv M P"
have [measurable]: "f ∈ borel_measurable M"
unfolding f_def by auto
have [simp]: "P = density M f"
unfolding f_def apply (rule density_RN_deriv[symmetric]) using assms by auto
have "space P = space M"
by auto
interpret Pc: finite_measure P
apply standard unfolding ‹space P = space M› using assms(5) by auto

have *: "AE x in P. eventually (λn. (∑i<n. (nn_transfer_operator^^i) f x) > L * f x) sequentially"
proof -
have "AE x in M. f x ≠ ∞"
unfolding f_def apply (intro RN_deriv_finite Pc.sigma_finite_measure)
unfolding ‹space P = space M› using assms by auto
moreover have "AE x in M. f x > 0 ⟶ (∑n. (nn_transfer_operator^^n) f x) = ∞"
using recurrence_series_infinite_transfer_operator by auto
ultimately have "AE x in M. f x > 0 ⟶ ((∑n. (nn_transfer_operator^^n) f x) = ∞ ∧ f x ≠ ∞)"
by auto
then have AEP: "AE x in P. (∑n. (nn_transfer_operator^^n) f x) = ∞ ∧ f x ≠ ∞"
unfolding ‹P = density M f› using AE_density[of f M] by auto
moreover have "eventually (λn. (∑i<n. (nn_transfer_operator^^i) f x) > L * f x) sequentially"
if "(∑n. (nn_transfer_operator^^n) f x) = ∞ ∧ f x ≠ ∞" for x
proof -
have "(λn. (∑i<n. (nn_transfer_operator^^i) f x)) ⇢ (∑i. (nn_transfer_operator^^i) f x)"
moreover have "(∑i. (nn_transfer_operator^^i) f x) > L * f x"
using that ‹L ≠ ∞› by (auto simp add: ennreal_mult_less_top top.not_eq_extremum)
ultimately show ?thesis
by (rule order_tendstoD(1))
qed
ultimately show ?thesis
by auto
qed
have "∃U N. U ∈ sets P ∧ (∀n ≥ N. ∀x ∈ U. (∑i<n. (nn_transfer_operator^^i) f x) > L * f x) ∧ emeasure P (space P - U) < epsilon"
apply (rule Pc.Egorov_lemma[OF _ *]) using ‹epsilon≠0› by (auto simp add: zero_less_iff_neq_zero)
then obtain U N1 where [measurable]: "U ∈ sets M" and U: "emeasure P (space M - U) < epsilon"
"⋀n x. n ≥ N1 ⟹ x ∈ U ⟹ L * f x < (∑i<n. (nn_transfer_operator^^i) f x)"
unfolding ‹sets P = sets M› ‹space P = space M› by blast
have "U ⊆ space M" by (rule sets.sets_into_space, simp)

define K where "K = N1 + 1"
have "K ≥ N1" "K ≥ 1" unfolding K_def by auto
have *: "K * emeasure P (space M) / epsilon ≠ ∞"
using ‹emeasure P (space M) ≠ ∞› ‹epsilon ≠ 0› ennreal_divide_eq_top_iff ennreal_mult_eq_top_iff by auto
obtain N2::nat where N2: "N2 ≥ K * emeasure P (space M) / epsilon"
using ennreal_archimedean[OF *] by auto
define N where "N = 2 * K + N2"
have "(∑k∈{..<n}. emeasure P (space M ∩ (T^^k)-(A k))) / n < delta" if "n ≥ N" for n
proof -
have "n ≥ 2 * K" "of_nat n ≥ ((of_nat N2)::ennreal)" using that unfolding N_def by auto
then have "n ≥ K * emeasure P (space M) / epsilon"
using N2 order_trans by blast
then have "K * emeasure P (space M) ≤ n * epsilon"
using ‹epsilon > 0› ‹epsilon ≠ ∞›
by (smt divide_ennreal_def divide_right_mono_ennreal ennreal_mult_divide_eq ennreal_mult_eq_top_iff infinity_ennreal_def mult.commute not_le order_le_less)
have "n ≥ 1" using ‹n ≥ 2 * K› ‹K ≥ 1› by auto

have *: "((∑k∈{K..<n-K}. indicator (A k) ((T^^k) x))::ennreal) ≤ (∑i∈{K..<n}. indicator (A (i-j)) ((T^^(i-j)) x))"
if "j < K" for j x
proof -
have "(∑k ∈ {K..<n-K}. indicator (A k) ((T^^k) x)) ≤ ((∑k∈{K-j..<n-j}. indicator (A k) ((T^^k) x))::ennreal)"
apply (rule sum_mono2) using ‹j < K› by auto
also have "... = (∑i∈{K..<n}. indicator (A (i-j)) ((T^^(i-j)) x))"
apply (rule sum.reindex_bij_betw[symmetric], rule bij_betw_byWitness[of _ "λx. x+j"]) using ‹j < K› by auto
finally show ?thesis by simp
qed

have "L * (∑ k ∈ {K..<n-K}. emeasure P (U ∩ (T^^k)-(A k))) = L * (∑ k ∈ {K..<n-K}.(∫⇧+x. indicator (U ∩ (T^^k)-(A k)) x ∂P))"
by auto
also have "... = (∑ k ∈ {K..<n-K}. (∫⇧+x. L * indicator (U ∩ (T^^k)-(A k)) x ∂P))"
unfolding sum_distrib_left by (intro sum.cong nn_integral_cmult[symmetric], auto)
also have "... = (∑ k ∈ {K..<n-K}. (∫⇧+x. f x * (L * indicator (U ∩ (T^^k)-(A k)) x) ∂M))"
unfolding ‹P = density M f› by (intro sum.cong nn_integral_density, auto)
also have "... = (∑ k ∈ {K..<n-K}. (∫⇧+x. f x * L * indicator U x * indicator (A k) ((T^^k) x) ∂M))"
by (intro sum.cong nn_integral_cong, auto simp add: algebra_simps indicator_def)
also have "... ≤ (∑ k ∈ {K..<n-K}. (∫⇧+x. (∑j ∈ {..<K}. (nn_transfer_operator^^j) f x) * indicator (A k) ((T^^k) x) ∂M))"
apply (intro sum_mono nn_integral_mono)
using U(2)[OF ‹K ≥ N1›] unfolding indicator_def using less_imp_le by (auto simp add: algebra_simps)
also have "... = (∫⇧+x. (∑k∈{K..<n-K}. (∑j ∈ {..<K}. (nn_transfer_operator^^j) f x * indicator (A k) ((T^^k) x))) ∂M)"
apply (subst nn_integral_sum, simp) unfolding sum_distrib_right by auto
also have "... = (∫⇧+x. (∑j ∈ {..<K}. (∑k∈{K..<n-K}. (nn_transfer_operator^^j) f x * indicator (A k) ((T^^k) x))) ∂M)"
by (rule nn_integral_cong, rule sum.swap)
also have "... = (∑j ∈ {..<K}. (∫⇧+x. (nn_transfer_operator^^j) f x * (∑k∈{K..<n-K}. indicator (A k) ((T^^k) x)) ∂M))"
apply (subst nn_integral_sum, simp) unfolding sum_distrib_left by auto
also have "... ≤ (∑j ∈ {..<K}. (∫⇧+x. (nn_transfer_operator^^j) f x * (∑i∈{K..<n}. indicator (A (i-j)) ((T^^(i-j)) x)) ∂M))"
apply (rule sum_mono, rule nn_integral_mono) using * by (auto simp add: mult_left_mono)
also have "... = (∑i∈{K..<n}. (∑j ∈ {..<K}. (∫⇧+x. (nn_transfer_operator^^j) f x * indicator (A (i-j)) ((T^^(i-j)) x) ∂M)))"
unfolding sum_distrib_left using sum.swap by (subst nn_integral_sum, auto)
also have "... = (∑i∈{K..<n}. (∑j ∈ {..<K}. (∫⇧+x. f x * indicator (A (i-j)) ((T^^(i-j)) ((T^^j) x)) ∂M)))"
by (subst nn_transfer_operator_intg_Tn, auto)
also have "... = (∑i∈{K..<n}. (∫⇧+x. f x * (∑j ∈ {..<K}. indicator (A (i-j)) ((T^^(i-j)) ((T^^j) x))) ∂M))"
unfolding sum_distrib_left by (subst nn_integral_sum, auto)
also have "... = (∑i∈{K..<n}. (∫⇧+x. (∑j ∈ {..<K}. indicator (A (i-j)) ((T^^((i-j)+j)) x)) ∂P))"
unfolding ‹P = density M f› funpow_add comp_def apply (rule sum.cong, simp) by (rule nn_integral_density[symmetric], auto)
also have "... = (∑i∈{K..<n}. (∫⇧+x. (∑j ∈ {..<K}. indicator (A (i-j)) ((T^^i) x)) ∂P))"
by auto
also have "... ≤ (∑i∈{K..<n}. (∫⇧+x. (1::ennreal) ∂P))"
apply (rule sum_mono) apply (rule nn_integral_mono) apply (rule disjoint_family_indicator_le_1)
using assms(2) apply (auto simp add: disjoint_family_on_def)
by (metis Int_iff diff_diff_cancel equals0D le_less le_trans)
also have "... ≤ n * emeasure P (space M)"
using assms(4) by (auto intro!: mult_right_mono)
finally have *: "L * (∑ k ∈ {K..<n-K}. emeasure P (U ∩ (T^^k)-(A k))) ≤ n * emeasure P (space M)"
by simp
have Ineq: "(∑ k ∈ {K..<n-K}. emeasure P (U ∩ (T^^k)-(A k))) ≤ n * emeasure P (space M) / L"
using divide_right_mono_ennreal[OF *, of L] ‹L ≠ 0›
by (metis (no_types, lifting) ‹L ≠ ∞› ennreal_mult_divide_eq infinity_ennreal_def mult.commute)

have I: "{..<K} ∪ {K..<n-K} ∪ {n-K..<n} = {..<n}" using ‹n ≥ 2 * K› by auto
have "(∑k∈{..<n}. emeasure P (space M ∩ (T^^k)-(A k))) ≤ (∑k∈{..<n}. emeasure P (U ∩ (T^^k)-(A k)) + epsilon)"
proof (rule sum_mono)
fix k
have "emeasure P (space M ∩ (T^^k)-(A k)) ≤ emeasure P ((U ∩ (T^^k)-(A k)) ∪ (space M - U))"
by (rule emeasure_mono, auto)
also have "... ≤ emeasure P (U ∩ (T^^k)-(A k)) + emeasure P (space M - U)"
also have "... ≤ emeasure P (U ∩ (T^^k)-(A k)) + epsilon"
using U(1) by auto
finally show "emeasure P (space M ∩ (T ^^ k) - A k) ≤ emeasure P (U ∩ (T ^^ k) - A k) + epsilon"
by simp
qed
also have "... = (∑k∈{..<K} ∪ {K..<n-K} ∪ {n-K..<n}. emeasure P (U ∩ (T^^k)-(A k))) + (∑k∈{..<n}. epsilon)"
unfolding sum.distrib I by simp
also have "... = (∑k∈{..<K}. emeasure P (U ∩ (T^^k)-(A k))) + (∑k∈{K..<n-K}. emeasure P (U ∩ (T^^k)-(A k)))
+ (∑k∈{n-K..<n}. emeasure P (U ∩ (T^^k)-(A k))) + n * epsilon"
apply (subst sum.union_disjoint) apply simp apply simp using ‹n ≥ 2 * K›
by (subst sum.union_disjoint, auto)
also have "... ≤ (∑k∈{..<K}. emeasure P (space M)) + n * emeasure P (space M) / L + (∑k∈{n-K..<n}. emeasure P (space M)) + n * epsilon"
apply (intro add_mono sum_mono Ineq emeasure_mono) using ‹U ⊆ space M› by auto
also have "... = K * emeasure P (space M) + n * emeasure P (space M)/L + K * emeasure P (space M) + n * epsilon"
using ‹n ≥ 2 * K› by auto
also have "... ≤ n * epsilon + n * epsilon + n * epsilon + n * epsilon"
using ‹K * emeasure P (space M) ≤ n * epsilon› ‹of_nat n * (emeasure P (space M)/L) ≤ of_nat n * epsilon›
ennreal_times_divide by auto
also have "... = n * (4 * epsilon)"
by (metis (no_types, lifting) add.assoc distrib_right mult.left_commute mult_2 numeral_Bit0)
also have "... < n * delta"
using ‹4 * epsilon < delta› ‹n ≥ 1›
finally show ?thesis
apply (subst divide_less_ennreal)
using ‹n ≥ 1› of_nat_less_top by (auto simp add: mult.commute)
qed
then show "eventually (λn. (∑k∈{..<n}. emeasure P (space M ∩ (T^^k)-(A k))) / n < delta) sequentially"
unfolding eventually_sequentially by auto
qed (simp)

text ‹We state the previous theorem using measures instead of emeasures. This is clearly
equivalent, but one has to play with ennreal carefully to prove it.›

theorem (in conservative) disjoint_sets_measure_Cesaro_tendsto_zero:
fixes P::"'a measure" and A::"nat ⇒ 'a set"
assumes [measurable]: "⋀n. A n ∈ sets M"
and "disjoint_family A"
"absolutely_continuous M P" "sets P = sets M"
"emeasure P (space M) ≠ ∞"
shows "(λn. (∑i<n. measure P (space M ∩ (T^^i)-(A i)))/n) ⇢ 0"
proof -
have "space P = space M"
using assms(4) sets_eq_imp_space_eq by blast
moreover have "emeasure P Q ≤ emeasure P (space P)" for Q
ultimately have [simp]: "emeasure P Q ≠ ⊤" for Q
using ‹emeasure P (space M) ≠ ∞› neq_top_trans by auto
have *: "ennreal ((∑i<n. measure P (space M ∩ (T^^i)-(A i)))/n) = (∑i<n. emeasure P (space M ∩ (T^^i)-(A i)))/n" if "n > 0" for n
apply (subst divide_ennreal[symmetric])
apply (auto intro!: sum_nonneg that simp add: ennreal_of_nat_eq_real_of_nat[symmetric])
apply(subst sum_ennreal[symmetric], simp)
apply (subst emeasure_eq_ennreal_measure) by auto
have "eventually (λn. ennreal ((∑i<n. measure P (space M ∩ (T^^i)-(A i)))/n) = (∑i<n. emeasure P (space M ∩ (T^^i)-(A i)))/n) sequentially"
unfolding eventually_sequentially apply (rule exI[of _ 1]) using * by auto
then have *: "(λn. ennreal ((∑i<n. measure P (space M ∩ (T^^i)-(A i)))/n)) ⇢ ennreal 0"
using disjoint_sets_emeasure_Cesaro_tendsto_zero[OF assms] tendsto_cong by force
show ?thesis
apply (subst tendsto_ennreal_iff[symmetric]) using * apply auto
unfolding eventually_sequentially apply (rule exI[of _ 1])
by (auto simp add: divide_simps intro!: sum_nonneg)
qed

text ‹As convergence to $0$ in Cesaro mean is equivalent to convergence to $0$ along a density
one sequence, we obtain the equivalent formulation of the previous theorem.›

theorem (in conservative) disjoint_sets_measure_density_one_tendsto_zero:
fixes P::"'a measure" and A::"nat ⇒ 'a set"
assumes [measurable]: "⋀n. A n ∈ sets M"
and "disjoint_family A"
"absolutely_continuous M P" "sets P = sets M"
"emeasure P (space M) ≠ ∞"
shows "∃B. lower_asymptotic_density B = 1 ∧ (λn. measure P (space M ∩ (T^^n)-(A n)) * indicator B n) ⇢ 0"
by (rule cesaro_imp_density_one[OF _ disjoint_sets_measure_Cesaro_tendsto_zero[OF assms]], simp)

subsection ‹Normalizing sequences do not grow exponentially in conservative systems›

text ‹We prove the main result in~\cite{gouezel_normalizing_sequences}: in a
conservative system, if a renormalized sequence $S_n f/B_n$ converges in distribution towards
a limit which is not a Dirac mass at $0$, then $B_n$ can not grow exponentially fast. The proof
is expressed in the following locale. The main theorem is Theorem~\verb+subexponential_growth+
below. To prove it, we need several preliminary estimates.›

text ‹We will use the fact that a real random variables which is not the Dirac mass at $0$
gives positive mass to a set separated away from $0$.›

lemma (in real_distribution) not_Dirac_0_imp_positive_mass_away_0:
assumes "prob {0} < 1"
shows "∃a. a > 0 ∧ prob {x. abs(x) > a} > 0"
proof -
have "1 = prob UNIV"
using prob_space by auto
also have "... = prob {0} + prob (UNIV -{0})"
by (subst finite_measure_Union[symmetric], auto)
finally have "0 < prob (UNIV -{0})"
using assms by auto
also have "... ≤ prob (⋃n::nat. {x. abs(x)>(1/2)^n})"
apply (rule finite_measure_mono)
by (auto, meson one_less_numeral_iff reals_power_lt_ex semiring_norm(76) zero_less_abs_iff)
finally have "prob (⋃n::nat. {x. abs(x)>(1/2)^n}) ≠ 0"
by simp
then have "∃n. prob {x. abs(x)>(1/2)^n} ≠ 0"
using measure_countably_zero[of "λn. {x. abs(x)>(1/2)^n}"] by force
then obtain N where N: "prob {x. abs(x) > (1/2)^N} ≠ 0"
by blast
show ?thesis
apply (rule exI[of _ "(1/2)^N"]) using N by (auto simp add: zero_less_measure_iff)
qed

locale conservative_limit =
conservative M + PS: prob_space P + PZ: real_distribution Z
for M::"'a measure" and P::"'a measure" and Z::"real measure" +
fixes f g::"'a ⇒ real" and B::"nat ⇒ real"
assumes PabsM: "absolutely_continuous M P"
and Bpos: "⋀n. B n > 0"
and M [measurable]: "f ∈ borel_measurable M" "g ∈ borel_measurable M" "sets P = sets M"
and non_trivial: "PZ.prob {0} < 1"
and conv: "weak_conv_m (λn. distr P borel (λx. (g x + birkhoff_sum f n x) / B n)) Z"
begin

text ‹For measurability statements, we want every question about $Z$ or $P$ to reduce to a
question about Borel sets of $M$. We add in the next lemma all the statements that are needed
so that this happens automatically.›
lemma PSZ [simp, measurable_cong]:
"space P = space M"
"h ∈ borel_measurable P ⟷ h ∈ borel_measurable M"
"A ∈ sets P ⟷ A ∈ sets M"
using M sets_eq_imp_space_eq real_distribution_def by auto

text ‹The first nontrivial upper bound is the following lemma, asserting that $B_{n+1}$ can not
be much larger than $\max B_i$ for $i \leq n$. This is proved by saying that $S_{n+1} f = f + (S_n f) \circ T$, and we know that $S_n f$ is not too large on a set of very large measure, so
the same goes for $(S_n f) \circ T$ by a non-singularity argument. Excepted that the measure $P$
does not have to be nonsingular for the map $T$, so one has to tweak a little bit this idea,
using transfer operators and conservativity. This is easier to do when the density of $P$ is
bounded by $1$, so we first give the proof under this assumption, and then we reduce to this
case by replacing $M$ with $M+P$ in the second lemma below.›

text ‹First, let us prove the lemma assuming that the density $h$ of $P$ is bounded by $1$.›

lemma upper_bound_C_aux:
assumes "P = density M h" "⋀x. h x ≤ 1"
and [measurable]: "h ∈ borel_measurable M"
shows "∃C≥1. ∀n. B (Suc n) ≤ C * Max {B i|i. i ≤ n}"
proof -
obtain a0 where a0: "a0 > 0" "PZ.prob {x. abs(x) > a0} > 0"
using PZ.not_Dirac_0_imp_positive_mass_away_0[OF non_trivial] by blast
define a where "a = a0/2"
have "a > 0" using ‹a0 > 0› unfolding a_def by auto
define alpha where "alpha = PZ.prob {x. abs (x) > a0}/4"
have "alpha > 0" unfolding alpha_def using a0 by auto
have "PZ.prob {x. abs (x) > 2 * a} > 3 * alpha"
using a0 unfolding a_def alpha_def by auto

text ‹First step: choose $K$ such that, with probability $1-\alpha$, one has
$\sum_{1 \leq k < K} h(T^k x) \geq 1$. This follows directly from conservativity.›
have "∃K. K ≥ 1 ∧ PS.prob {x ∈ space M. (∑i∈{1..<K}. h ((T^^i) x)) ≥ 1} ≥ 1 - alpha"
proof -
have *: "AE x in P. eventually (λn. (∑i<n. h ((T^^i) x)) > 2) sequentially"
proof -
have "AE x in M. h x > 0 ⟶ (∑i. h ((T^^i) x)) = ∞"
using recurrence_series_infinite by auto
then have AEP: "AE x in P. (∑i. h ((T^^i) x)) = ∞"
unfolding ‹P = density M h› using AE_density[of h M] by auto
moreover have "eventually (λn. (∑i<n. h ((T^^i) x)) > 2) sequentially"
if "(∑i. h ((T^^i) x)) = ∞" for x
proof -
have "(λn. (∑i<n. h ((T^^i) x))) ⇢ (∑i. h ((T^^i) x))"
moreover have "(∑i. h ((T^^i) x)) > 2"
using that by auto
ultimately show ?thesis
by (rule order_tendstoD(1))
qed
ultimately show ?thesis
by auto
qed
have "∃U N. U ∈ sets P ∧ (∀n ≥ N. ∀x ∈ U. (∑i<n. h ((T^^i) x)) > 2) ∧ emeasure P (space P - U) < alpha"
apply (rule PS.Egorov_lemma)
apply measurable using M(3) measurable_ident_sets apply blast
using * ‹alpha > 0› by auto
then obtain U N1 where [measurable]: "U ∈ sets M" and U: "emeasure P (space M - U) < alpha"
"⋀n x. n ≥ N1 ⟹ x ∈ U ⟹ 2 < (∑i<n. h ((T^^i) x))"
by auto
have "U ⊆ space M" by (rule sets.sets_into_space, simp)
define K where "K = N1+1"
then have "K ≥ 1" by auto
have Ux: "(∑i∈{1..<K}. h ((T^^i) x)) ≥ 1" if "x ∈ U" for x
proof -
have *: "1 < t" if "2 < 1 + t" for t::ennreal
apply (cases t) using that apply auto

have "2 < (∑i ∈ {..<K}. h ((T^^i) x))"
apply (rule U(2)) unfolding K_def using that by auto
also have "... = (∑i ∈ {0}. h ((T^^i) x)) + (∑i ∈ {1..<K}. h ((T^^i) x))"
apply (subst sum.union_disjoint[symmetric]) apply simp apply simp apply simp
apply (rule sum.cong) using ‹K ≥ 1› by auto
also have "... = h x + (∑i ∈ {1..<K}. h ((T^^i) x))"
by auto
also have "... ≤ 1 + (∑i ∈ {1..<K}. h ((T^^i) x))"
using assms by auto
finally show ?thesis using less_imp_le[OF *] by auto
qed
have "PS.prob {x ∈ space M. (∑i∈{1..<K}. h ((T^^i) x)) ≥ 1} ≥ 1 - alpha"
proof -
have "PS.prob (space P - U) < alpha"
using U(1) by (simp add: PS.emeasure_eq_measure ennreal_less_iff)
then have "1 - alpha < PS.prob U"
using PS.prob_compl by auto
also have "... ≤ PS.prob {x ∈ space M. (∑i∈{1..<K}. h ((T^^i) x)) ≥ 1}"
apply (rule PS.finite_measure_mono) using Ux sets.sets_into_space[OF ‹U ∈ sets M›] by auto
finally show ?thesis by simp
qed
then show ?thesis using ‹K ≥ 1› by auto
qed
then obtain K where K: "K ≥ 1" "PS.prob {x ∈ space M. (∑i∈{1..<K}. h ((T^^i) x)) ≥ 1} ≥ 1 - alpha"
by blast

text ‹Second step: obtain $D$ which controls the tails of the $K$ first Birkhoff sums of $f$.›
have "∃D. PS.prob {x ∈ space M. ∀k < K. abs(g x + birkhoff_sum f k x - g((T^^k) x)) ≤ D} ≥ 1 - alpha"
proof -
have D: "∃D. PS.prob {x ∈ space P. abs(g x + birkhoff_sum f k x - g((T^^k) x)) ≥ D} < alpha/K ∧ D ≥ 1" for k
apply (rule PS.random_variable_small_tails) using ‹K ≥ 1› ‹alpha > 0› by auto
have "∃D. ∀k. PS.prob {x ∈ space P. abs(g x + birkhoff_sum f k x - g((T^^k) x)) ≥ D k} < alpha/K ∧ D k ≥ 1"
apply (rule choice) using D by auto
then obtain D where D: "⋀k. PS.prob {x ∈ space P. abs(g x + birkhoff_sum f k x - g((T^^k) x)) ≥ D k} < alpha/K"
by blast
define D0 where "D0 = Max (D{..K})"
have "PS.prob {x ∈ space M. ∀k < K. abs(g x + birkhoff_sum f k x - g((T^^k) x)) ≤ D0} ≥ 1 - alpha"
proof -
have D1: "PS.prob {x ∈ space M. abs(g x + birkhoff_sum f k x - g((T^^k) x)) ≥ D0} < alpha/K" if "k ≤ K" for k
proof -
have "D k ≤ D0"
unfolding D0_def apply (rule Max_ge) using that by auto
have "PS.prob {x ∈ space M. abs(g x + birkhoff_sum f k x - g((T^^k) x)) ≥ D0}
≤ PS.prob {x ∈ space P. abs(g x + birkhoff_sum f k x - g((T^^k) x)) ≥ D k}"
apply (rule PS.finite_measure_mono) using ‹D k ≤ D0› by auto
then show ?thesis using D[of k] by auto
qed
have "PS.prob (⋃k∈ {..<K}. {x ∈ space M. abs(g x + birkhoff_sum f k x - g((T^^k) x)) ≥ D0}) ≤
(∑k ∈ {..<K}. PS.prob {x ∈ space M. abs(g x + birkhoff_sum f k x - g((T^^k) x)) ≥ D0})"
also have "... ≤ (∑k ∈ {..<K}. alpha/K)"
apply (rule sum_mono) using less_imp_le[OF D1] by auto
also have "... = alpha"
using ‹K ≥ 1› by auto
finally have "PS.prob (⋃k∈ {..<K}. {x ∈ space M. abs(g x + birkhoff_sum f k x - g((T^^k) x)) ≥ D0}) ≤ alpha"
by simp
then have "1 - alpha ≤ 1 - PS.prob (⋃k∈ {..<K}. {x ∈ space M. abs(g x + birkhoff_sum f k x - g((T^^k) x)) ≥ D0})"
by simp
also have "... = PS.prob (space P - (⋃k∈ {..<K}. {x ∈ space M. abs(g x + birkhoff_sum f k x - g((T^^k) x)) ≥ D0}))"
by (rule PS.prob_compl[symmetric], auto)
also have "... ≤ PS.prob {x ∈ space M. ∀k < K. abs(g x + birkhoff_sum f k x - g((T^^k) x)) ≤ D0}"
by (rule PS.finite_measure_mono, auto)
finally show ?thesis by simp
qed
then show ?thesis by blast
qed
then obtain D where D: "PS.prob {x ∈ space M. ∀k < K. abs(g x + birkhoff_sum f k x - g((T^^k) x)) ≤ D} ≥ 1 - alpha"
by blast

text ‹Third step: obtain $\epsilon$ small enough so that, for any set $U$ with probability less
than $\epsilon$ and for any $k\leq K$, one has $\int_U \hat T^k h < \delta$, where $\delta$ is
very small.›
define delta where "delta = alpha/(2 * K)"
then have "delta > 0" using ‹alpha > 0› ‹K ≥ 1› by auto
have "∃epsilon > (0::real). ∀U ∈ sets P. ∀k ≤ K. emeasure P U < epsilon ⟶ (∫⇧+x∈U. ((nn_transfer_operator^^k) h) x ∂P) ≤ delta"
proof -
have *: "∃epsilon>(0::real). ∀U∈sets P. emeasure P U < epsilon ⟶ (∫⇧+x∈U. ((nn_transfer_operator^^k) h) x ∂P) < delta"
for k
proof (rule small_nn_integral_on_small_sets[OF _ ‹0 < delta›])
have "(∫⇧+x. ((nn_transfer_operator^^k) h) x ∂P) = (∫⇧+x. h x * ((nn_transfer_operator^^k) h) x ∂M)"
unfolding ‹P = density M h› by (rule nn_integral_density, auto)
also have "... ≤ (∫⇧+x. 1 * ((nn_transfer_operator^^k) h) x ∂M)"
apply (intro nn_integral_mono mult_right_mono) using assms(2) by auto
also have "... = (∫⇧+x. 1 * h x ∂M)"
by (rule nn_transfer_operator_intTn_g, auto)
also have "... = emeasure P (space M)"
using PS.emeasure_space_1 by (simp add: emeasure_density ‹P = density M h›)
also have "... < ∞"
using PS.emeasure_space_1 by simp
finally show "(∫⇧+x. ((nn_transfer_operator^^k) h) x ∂P) ≠ ∞"
by auto
qed (simp)
have "∃epsilon. ∀k. epsilon k > (0::real) ∧ (∀U∈sets P. emeasure P U < epsilon k ⟶ (∫⇧+x∈U. ((nn_transfer_operator^^k) h) x ∂P) < delta)"
apply (rule choice) using * by blast
then obtain epsilon::"nat ⇒ real" where E: "⋀k. epsilon k > 0"
"⋀k U. U ∈ sets P ⟹ emeasure P U < epsilon k ⟹ (∫⇧+x∈U. ((nn_transfer_operator^^k) h) x ∂P) < delta"
by blast
define epsilon0 where "epsilon0 = Min (epsilon{..K})"
have "epsilon0 ∈ epsilon{..K}" unfolding epsilon0_def by (rule Min_in, auto)
then have "epsilon0 > 0" using E(1) by auto
have small_setint: "(∫⇧+x∈U. ((nn_transfer_operator^^k) h) x ∂P) ≤ delta"
if "k ≤ K" "U ∈ sets P" "emeasure P U < epsilon0" for k U
proof -
have *: "epsilon0 ≤ epsilon k"
unfolding epsilon0_def apply (rule Min_le) using ‹k ≤ K› by auto
show ?thesis
apply (rule less_imp_le[OF E(2)[OF ‹U ∈ sets P›]])
using ennreal_leI[OF *] ‹emeasure P U < epsilon0› by auto
qed
then show ?thesis using ‹epsilon0 > 0› by auto
qed
then obtain epsilon::real where "epsilon > 0" and
small_setint: "⋀k U. k ≤ K ⟹ U ∈ sets P ⟹ emeasure P U < epsilon ⟹ (∫⇧+x∈U. ((nn_transfer_operator^^k) h) x ∂P) ≤ delta"
by blast

text ‹Fourth step: obtain an index after which the convergence in distribution ensures that
the probability to be larger than $2 a$ and to be very large is comparable for $(g+S_n f)/B_n$
and for $Z$.›
obtain C0 where "PZ.prob {x. abs(x) ≥ C0} < epsilon" "C0 ≥ 1"
using PZ.random_variable_small_tails[OF ‹epsilon > 0›, of "λx. x"] by auto
have A: "eventually (λn. measure (distr P borel (λx. (g x + birkhoff_sum f n x) / B n)) {x. abs (x) > 2 * a} > 3 * alpha) sequentially"
apply (rule open_set_weak_conv_lsc[of _ Z])
by (auto simp add: PZ.real_distribution_axioms conv ‹PZ.prob {x. abs (x) > 2 * a} > 3 * alpha›)
have B: "eventually (λn. measure (distr P borel (λx. (g x + birkhoff_sum f n x) / B n)) {x. abs (x) ≥ C0} < epsilon) sequentially"
apply (rule closed_set_weak_conv_usc[of _ Z])
by (auto simp add: PZ.real_distribution_axioms conv ‹PZ.prob {x. abs(x) ≥ C0} < epsilon›)
obtain N where N: "⋀n. n ≥ N ⟹ measure (distr P borel (λx. (g x + birkhoff_sum f n x) / B n)) {x. abs (x) > 2 * a} > 3 * alpha"
"⋀n. n ≥ N ⟹ measure (distr P borel (λx. (g x + birkhoff_sum f n x) / B n)) {x. abs (x) ≥ C0} < epsilon"
using eventually_conj[OF A B] unfolding eventually_sequentially by blast

text ‹Fifth step: obtain a trivial control on $B_n$ for $n$ smaller than $N$.›
define C1 where "C1 = Max {B k/B 0 |k. k ≤ N+K+1}"
define C where "C = max (max C0 C1) (max (D / (a * B 0)) (C0/a))"
have "C ≥ 1" unfolding C_def using ‹C0 ≥ 1› by auto

text ‹Now, we can put everything together. If $n$ is large enough, we prove that
$B_{n+1} \leq C \max_{i\leq n} B_i$, by contradiction.›
have geK: "B (Suc n) ≤ C * Max {B i |i. i ≤ n}" if "n > N + K" for n
proof (rule ccontr)
have "Suc n ≥ N" using that by auto
let ?h = "(λx. (g x + birkhoff_sum f (Suc n) x) / B (Suc n))"
have "measure (distr P borel ?h) {x. abs (x) > 2 * a}
= measure P (?h- {x. abs (x) > 2 * a} ∩ space P)"
by (rule measure_distr, auto)
also have "... = measure P {x ∈ space M. abs(?h x) > 2 * a}"
by (rule HOL.cong[of "measure P"], auto)
finally have A: "PS.prob {x ∈ space M. abs(?h x) > 2 * a} > 3 * alpha"
using N(1)[OF ‹Suc n ≥ N›] by auto

have *: "PS.prob {y ∈ space M. C0 ≤ ¦g y + birkhoff_sum f (Suc n - k) y¦ / ¦B (Suc n - k)¦} < epsilon"
if "k ∈ {1..<K}" for k
proof -
have "Suc n - k ≥ N" using that ‹n > N + K› by auto
let ?h = "(λx. (g x + birkhoff_sum f (Suc n-k) x) / B (Suc n-k))"
have "measure (distr P borel ?h) {x. abs (x) ≥ C0}
= measure P (?h- {x. abs (x) ≥ C0} ∩ space P)"
by (rule measure_distr, auto)
also have "... = measure P {x ∈ space M. abs(?h x) ≥ C0}"
by (rule HOL.cong[of "measure P"], auto)
finally show ?thesis
using N(2)[OF ‹Suc n - k ≥ N›] by auto
qed
have P_le_epsilon: "emeasure P {y ∈ space M. C0 ≤ ¦g y + birkhoff_sum f (Suc n - k) y¦ / ¦B (Suc n - k)¦} < ennreal epsilon"
if "k ∈ {1..<K}" for k
using *[OF that] ‹epsilon > 0› ennreal_lessI unfolding PS.emeasure_eq_measure by auto

assume "¬(B (Suc n) ≤ C * Max {B i |i. i ≤ n})"
then have "C * Max {B i |i. i ≤ n} ≤ B (Suc n)" by simp
moreover have "C * B 0 ≤ C * Max {B i |i. i ≤ n}"
apply (rule mult_left_mono, rule Max_ge) using ‹C ≥ 1› by auto
ultimately have "C * B 0 ≤ B (Suc n)"
by auto

have "(D / (a * B 0)) * B 0 ≤ C * B 0"
apply (rule mult_right_mono) unfolding C_def using Bpos[of 0] by auto
then have "(D / (a * B 0)) * B 0 ≤ B (Suc n)"
using ‹C * B 0 ≤ B (Suc n)› by simp
then have "D ≤ a * B (Suc n)"
using Bpos[of 0] ‹a > 0› by (auto simp add: divide_simps algebra_simps)

define X where "X = {x ∈ space M. abs((g x + birkhoff_sum f (Suc n) x)/B(Suc n)) > 2 * a}
∩ {x ∈ space M. ∀k < K. abs(g x + birkhoff_sum f k x - g((T^^k) x)) ≤ D}
∩ {x ∈ space M. (∑i∈{1..<K}. h ((T^^i) x)) ≥ 1}"
have [measurable]: "X ∈ sets M" unfolding X_def by auto
have "3 * alpha + (1-alpha) + (1-alpha) ≤
PS.prob {x ∈ space M. abs((g x + birkhoff_sum f (Suc n) x)/B(Suc n)) > 2 * a}
+ PS.prob {x ∈ space M. ∀k < K. abs(g x + birkhoff_sum f k x - g((T^^k) x)) ≤ D}
+ PS.prob {x ∈ space M. (∑i∈{1..<K}. h ((T^^i) x)) ≥ 1}"
using A D K(2) by auto
also have "... ≤ 2 + PS.prob X"
unfolding X_def by (rule PS.sum_measure_le_measure_inter3, auto)
finally have "PS.prob X ≥ alpha" by auto

have I: "(λy. abs((g y + birkhoff_sum f (Suc n - k) y)/ B (Suc n - k))) ((T^^k) x) ≥ C0" if "x ∈ X" "k ∈ {1..<K}" for x k
proof -
have "2 * a * B(Suc n) ≤ abs(g x + birkhoff_sum f (Suc n) x)"
using ‹x ∈ X› Bpos[of "Suc n"] unfolding X_def by (auto simp add: divide_simps)
also have "... = abs(g((T^^k) x) + birkhoff_sum f (Suc n -k) ((T^^k) x) + (g x + birkhoff_sum f k x - g((T^^k) x)))"
using ‹n > N+K› ‹k ∈ {1..<K}› birkhoff_sum_cocycle[of f k "Suc n - k" x] by auto
also have "... ≤ abs(g((T^^k) x) + birkhoff_sum f (Suc n -k) ((T^^k) x)) + abs(g x + birkhoff_sum f k x - g((T^^k) x))"
by auto
also have "... ≤ abs(g((T^^k) x) + birkhoff_sum f (Suc n -k) ((T^^k) x)) + D"
using ‹x ∈ X› ‹k ∈ {1..<K}› unfolding X_def by auto
also have "... ≤ abs(g((T^^k) x) + birkhoff_sum f (Suc n -k) ((T^^k) x)) + a * B (Suc n)"
using ‹D ≤ a * B (Suc n)› by simp
finally have *: "a * B (Suc n) ≤ abs(g((T^^k) x) + birkhoff_sum f (Suc n -k) ((T^^k) x))"
by simp
have "(C0/a) * B (Suc n - k) ≤ C * B (Suc n - k)"
apply (rule mult_right_mono) unfolding C_def using less_imp_le[OF Bpos] by auto
also have "... ≤ C * Max {B i |i. i ≤ n}"
apply (rule mult_left_mono, rule Max_ge) using ‹k ∈ {1..<K}› ‹C ≥ 1› by auto
also have "... ≤ B (Suc n)"
by fact
finally have "C0 * B (Suc n - k) ≤ a * B (Suc n)"
using ‹a>0› by (simp add: divide_simps algebra_simps)
then have "C0 * B (Suc n - k) ≤ abs(g((T^^k) x) + birkhoff_sum f (Suc n -k) ((T^^k) x))"
using * by auto
then show ?thesis
using Bpos[of "Suc n - k"] by (simp add: divide_simps)
qed
have J: "1 ≤ (∑k∈{1..<K}. (λy. h y * indicator {y ∈ space M. abs((g y + birkhoff_sum f (Suc n - k) y)/ B (Suc n - k)) ≥ C0} y) ((T^^k) x))"
if "x ∈ X" for x
proof -
have "x ∈ space M"
using ‹x ∈ X› unfolding X_def by auto
have "1 ≤ (∑k ∈ {1..<K}. h ((T^^k) x))"
using ‹x ∈ X› unfolding X_def by auto
also have "... = (∑k ∈ {1..<K}. h ((T^^k) x) * indicator {y ∈ space M. abs((g y + birkhoff_sum f (Suc n - k) y)/ B (Suc n - k)) ≥ C0} ((T^^k) x))"
apply (rule sum.cong)
unfolding indicator_def using I[OF ‹x ∈ X›] T_spaceM_stable(2)[OF ‹x ∈ space M›] by auto
finally show ?thesis by simp
qed
have "ennreal alpha ≤ emeasure P X"
using ‹PS.prob X ≥ alpha› by (simp add: PS.emeasure_eq_measure)
also have "... = (∫⇧+x. indicator X x ∂P)"
by auto
also have "... ≤ (∫⇧+x. (∑k∈{1..<K}. (λy. h y
* indicator {y ∈ space M. abs((g y + birkhoff_sum f (Suc n - k) y)/ B (Suc n - k)) ≥ C0} y) ((T^^k) x)) ∂P)"
apply (rule nn_integral_mono) using J unfolding indicator_def by fastforce
also have "... = (∑k∈{1..<K}. (∫⇧+x. (λy. h y
* indicator {y ∈ space M. abs((g y + birkhoff_sum f (Suc n - k) y)/ B (Suc n - k)) ≥ C0} y) ((T^^k) x) ∂P))"
by (rule nn_integral_sum, auto)
also have "... = (∑k∈{1..<K}. (∫⇧+x. (λy. h y
* indicator {y ∈ space M. abs((g y + birkhoff_sum f (Suc n - k) y)/ B (Suc n - k)) ≥ C0} y) ((T^^k) x) * h x ∂M))"
unfolding ‹P = density M h› by (auto intro!: sum.cong nn_integral_densityR[symmetric])
also have "... = (∑k∈{1..<K}. (∫⇧+x. h x
* indicator {y ∈ space M. abs((g y + birkhoff_sum f (Suc n - k) y)/ B (Suc n - k)) ≥ C0} x * ((nn_transfer_operator^^k) h) x ∂M))"
by (auto intro!: sum.cong nn_transfer_operator_intTn_g[symmetric])
also have "... = (∑k∈{1..<K}. (∫⇧+x.
((nn_transfer_operator^^k) h) x * indicator {y ∈ space M. abs((g y + birkhoff_sum f (Suc n - k) y)/ B (Suc n - k)) ≥ C0} x ∂P))"
unfolding ‹P = density M h› by (subst nn_integral_density, auto intro!: sum.cong simp add: algebra_simps)
also have "... ≤ (∑k∈{1..<K}. ennreal delta)"
by (rule sum_mono, rule small_setint, auto simp add: P_le_epsilon)
also have "... = ennreal (∑k∈{1..<K}. delta)"
using less_imp_le[OF ‹delta > 0›] by (rule sum_ennreal)
finally have "alpha ≤ (∑k∈{1..<K}. delta)"
apply (subst ennreal_le_iff[symmetric]) using ‹delta > 0› by auto
also have "... ≤ K * delta"
using ‹delta > 0› by auto
finally show False
unfolding delta_def using ‹K ≥ 1› ‹alpha > 0› by (auto simp add: divide_simps algebra_simps)
qed
text ‹If $n$ is not large, we get the same bound in a trivial way, as there are only finitely
many cases to consider and we have adjusted the constant $C$ so that it works for all of them.›
have leK: "B (Suc n) ≤ C * Max {B i |i. i ≤ n}" if "n ≤ N+K" for n
proof -
have "B (Suc n)/B 0 ≤ Max {B k/B 0 |k. k ≤ N+K+1}"
apply (rule Max_ge, simp) using ‹n ≤ N+K› by auto
also have "... ≤ C" unfolding C_def C1_def by auto
finally have "B (Suc n) ≤ C * B 0"
using Bpos[of 0] by (simp add: divide_simps)
also have "... ≤ C * Max {B i |i. i ≤ n}"
apply (rule mult_left_mono) apply (rule Max_ge) using ‹C ≥ 1› by auto
finally show ?thesis by simp
qed
have "B (Suc n) ≤ C * Max {B i |i. i ≤ n}" for n
using geK[of n] leK[of n] by force
then show ?thesis
using ‹C ≥ 1› by auto
qed

text ‹Then, we prove the lemma without further assumptions, reducing to the previous case by
replacing $m$ with $m+P$. We do this at the level of densities since the addition of measures
is not defined in the library (and it would be problematic as measures carry their sigma-algebra,
so what should one do when the sigma-algebras do not coincide?)›

lemma upper_bound_C:
"∃C≥1. ∀n. B (Suc n) ≤ C * Max {B i|i. i ≤ n}"
proof -
text ‹We introduce the density of $P$, and show that it is almost everywhere finite.›
define h where "h = RN_deriv M P"
have [measurable]: "h ∈ borel_measurable M"
unfolding h_def by auto
have P [simp]: "P = density M h"
unfolding h_def apply (rule density_RN_deriv[symmetric]) using PabsM by auto
have "space P = space M"
by auto
have *: "AE x in M. h x ≠ ∞"
unfolding h_def apply (rule RN_deriv_finite)
using PS.sigma_finite_measure_axioms PabsM by auto
have **: "null_sets (density M (λx. 1 + h x)) = null_sets M"
by (rule null_sets_density, auto)

text ‹We introduce the new system with invariant measure $M+P$, given by the density $1+h$.›
interpret A: conservative "density M (λx. 1 + h x)" T
apply (rule conservative_density) using * by auto
interpret B: conservative_limit T "density M (λx. 1 + h x)" P Z f g B
apply standard
using conv Bpos non_trivial absolutely_continuousI_density[OF ‹h ∈ borel_measurable M›]
unfolding absolutely_continuous_def ** by auto

text ‹We obtain the result by applying the result above to the new dynamical system.
We have to check the additional assumption that the density of $P$ with respect to the new measure
$M + P$ is bounded by $1$. Since this density if $h/(1+h)$, this is trivial modulo a computation
in ennreal that is not automated (yet?).›
have z: "1 = ennreal 1" by auto
have Trivial: "a = (1+a) * (a/(1+a))" if "a ≠ ⊤" for a::ennreal
apply (cases a) apply auto unfolding z ennreal_plus_if apply (subst divide_ennreal) apply simp apply simp
apply (subst ennreal_mult'[symmetric]) using that by auto
have Trivial2: "a / (1+a) ≤ 1" for a::ennreal
apply (cases a) apply auto unfolding z ennreal_plus_if apply (subst divide_ennreal) by auto
show ?thesis
apply (rule B.upper_bound_C_aux[of "λx. h x/(1 + h x)"])
using * Trivial Trivial2 by (auto simp add: density_density_eq density_unique_iff)
qed

text ‹The second main upper bound is the following. Again, it proves that
$B_{n+1} \leq L \max_{i \leq n} B_i$, for some constant $L$, but with two differences. First,
$L$ only depends on the distribution of $Z$ (which is stronger). Second, this estimate is only
proved along a density $1$ sequence of times (which is weaker). The first point implies that
this lemma will also apply to $T^j$, with the same $L$, which amounts to replacing $L$ by $L^{1/j}$,
making it in practice arbitrarily close to $1$. The second point is problematic at first sight, but
for the exceptional times we will use the bound of the previous lemma so this will not really
create problems.

For the proof, we split the sum $S_{n+1} f$ as $S_n f + f \circ T^n$. If $B_{n+1}$ is much larger
than $B_n$, we deduce that $S_n f$ is much smaller than $S_{n+1}f$ with large probability, which
means that $f \circ T^n$ is larger than anything that has been seen before. Since preimages of
distinct events have a measure that tends to $0$ along a density $1$ subsequence, this can only
happen along a density $0$ subsequence.›

lemma upper_bound_L:
fixes a::real and L::real and alpha::real
assumes "a > 0" "alpha > 0" "L > 3"
"PZ.prob {x. abs (x) > 2 * a} > 3 * alpha"
"PZ.prob {x. abs (x) ≥ (L-1) * a} < alpha"
shows "∃A. lower_asymptotic_density A = 1 ∧ (∀n∈A. B (Suc n) ≤ L * Max {B i|i. i ≤ n})"
proof -
define m where "m = (λn. Max {B i|i. i ≤ n})"
define K where "K = (λn::nat. {x ∈ space M. abs(f x) ∈ {a * L * m n <..< a * L * m (Suc n)}})"
have [measurable]: "K n ∈ sets M" for n
unfolding K_def by auto
have *: "m n ≤ m p" if "n ≤ p" for n p
unfolding m_def K_def using that by (auto intro!: Max_mono)
have "K n ∩ K p = {}" if "n < p" for n p
fix x assume "¦f x¦ < a * L * m (Suc n)" "a * L * m p < ¦f x¦"
moreover have "a * L * m (Suc n) ≤ a * L * m p"
using *[of "Suc n" p] that ‹a > 0› ‹L > 3› by auto
ultimately show False by auto
qed
then have "disjoint_family K"
unfolding disjoint_family_on_def using nat_neq_iff by auto

have "∃A0. lower_asymptotic_density A0 = 1 ∧
(λn. measure P (space M ∩ (T^^n)-(K n)) * indicator A0 n) ⇢ 0"
apply (rule disjoint_sets_measure_density_one_tendsto_zero) apply fact+
using PabsM by auto
then obtain A0 where A0: "lower_asymptotic_density A0 = 1" "(λn. measure P (space M ∩ (T^^n)-(K n)) * indicator A0 n) ⇢ 0"
by blast
obtain N0 where N0: "⋀n. n ≥ N0 ⟹ measure P (space M ∩ (T^^n)-(K n)) * indicator A0 n < alpha"
using order_tendstoD(2)[OF A0(2) ‹alpha > 0›] unfolding eventually_sequentially by blast

have A: "eventually (λn. measure (distr P borel (λx. (g x + birkhoff_sum f n x) / B n)) {x. abs (x) > 2 * a} > 3 * alpha) sequentially"
apply (rule open_set_weak_conv_lsc[of _ Z])
by (auto simp add: PZ.real_distribution_axioms conv assms)
have B: "eventually (λn. measure (distr P borel (λx. (g x + birkhoff_sum f n x) / B n)) {x. abs (x) ≥ (L- 1) * a} < alpha) sequentially"
apply (rule closed_set_weak_conv_usc[of _ Z])
by (auto simp add: PZ.real_distribution_axioms conv assms)
obtain N where N: "⋀n. n ≥ N ⟹ measure (distr P borel (λx. (g x + birkhoff_sum f n x) / B n)) {x. abs (x) > 2 * a} > 3 * alpha"
"⋀n. n ≥ N ⟹ measure (distr P borel (λx. (g x + birkhoff_sum f n x) / B n)) {x. abs (x) ≥ (L-1) * a} < alpha"
using eventually_conj[OF A B] unfolding eventually_sequentially by blast

have I: "PS.prob {x ∈ space M. abs((g x + birkhoff_sum f n x) / B n) < (L-1) * a} > 1 - alpha" if "n ≥ N" for n
proof -
let ?h = "(λx. (g x + birkhoff_sum f n x) / B n)"
have "measure (distr P borel ?h) {x. abs (x) ≥ (L-1) * a}
= measure P (?h- {x. abs (x) ≥ (L-1) * a} ∩ space P)"
by (rule measure_distr, auto)
also have "... = measure P {x ∈ space M. abs(?h x) ≥ (L-1) * a}"
by (rule HOL.cong[of "measure P"], auto)
finally have A: "PS.prob {x ∈ space M. abs(?h x) ≥ (L-1) * a} < alpha"
using N(2)[OF that] by auto
have *: "{x ∈ space M. abs(?h x) < (L-1) * a} = space M - {x ∈ space M. abs(?h x) ≥ (L-1) * a}"
by auto
show ?thesis
unfolding * using A PS.prob_compl by auto
qed

have Main: "PS.prob (space M ∩ (T^^n)-(K n)) > alpha" if "n ≥ N" "B (Suc n) > L * m n" for n
proof -
have "Suc n ≥ N" using that by auto
let ?h = "(λx. (g x + birkhoff_sum f (Suc n) x) / B (Suc n))"
have "measure (distr P borel ?h) {x. abs (x) > 2 * a}
= measure P (?h- {x. abs (x) > 2 * a} ∩ space P)"
by (rule measure_distr, auto)
also have "... = measure P {x ∈ space M. abs(?h x) > 2 * a}"
by (rule HOL.cong[of "measure P"], auto)
finally have A: "PS.prob {x ∈ space M. abs(?h x) > 2 * a} > 3 * alpha"
using N(1)[OF ‹Suc n ≥ N›] by auto

define X where "X = {x ∈ space M. abs((g x + birkhoff_sum f n x) / B n) < (L-1) * a}
∩ {x ∈ space M. abs((g x + birkhoff_sum f (Suc n) x) / B (Suc n)) < (L-1) * a}
∩ {x ∈ space M. abs((g x + birkhoff_sum f (Suc n) x) / B (Suc n)) > 2 * a}"
have "(1 - alpha) + (1 - alpha) + 3 * alpha <
PS.prob {x ∈ space M. abs((g x + birkhoff_sum f n x) / B n) < (L-1) * a}
+ PS.prob {x ∈ space M. abs((g x + birkhoff_sum f (Suc n) x) / B (Suc n)) < (L-1) * a}
+ PS.prob {x ∈ space M. abs((g x + birkhoff_sum f (Suc n) x) / B (Suc n)) > 2 * a}"
using A I[OF ‹n ≥ N›] I[OF ‹Suc n ≥ N›] by auto
also have "... ≤ 2 + PS.prob X"
unfolding X_def by (rule PS.sum_measure_le_measure_inter3, auto)
finally have "PS.prob X > alpha" by auto

have "X ⊆ space M ∩ (T^^n)-(K n)"
proof
have *: "B i ≤ m n" if "i ≤ n" for i
unfolding m_def by (rule Max_ge, auto simp add: that)
have **: "B i ≤ B (Suc n)" if "i ≤ Suc n" for i
proof (cases "i ≤ n")
case True
have "m n ≤ B (Suc n) / L"
using ‹L * m n < B (Suc n)› ‹L > 3› by (simp add: divide_simps algebra_simps)
also have "... ≤ B (Suc n)"
using Bpos[of "Suc n"] ‹L > 3› by (simp add: divide_simps algebra_simps)
finally show ?thesis using *[OF True] by simp
next
case False
then show ?thesis
using ‹i ≤ Suc n› le_SucE by blast
qed
have "m (Suc n) = B (Suc n)"
unfolding m_def by (rule Max_eqI, auto simp add: **)

fix x assume "x ∈ X"
then have "abs (g x + birkhoff_sum f n x) < (L-1) * a * B n"
unfolding X_def using Bpos[of n] by (auto simp add: algebra_simps divide_simps)
also have "... ≤ L * a * m n"
using *[of n] ‹L > 3› ‹a > 0› Bpos[of n] by (auto intro!: mult_mono)
also have "... ≤ a * B (Suc n)"
using ‹B (Suc n) > L * m n› less_imp_le ‹a > 0› by auto
finally have A: "abs (g x + birkhoff_sum f n x) < a * B (Suc n)"
by simp

have B: "abs(g x + birkhoff_sum f (Suc n) x) < (L-1) * a * B (Suc n)"
using ‹x ∈ X› unfolding X_def using Bpos[of "Suc n"] by (auto simp add: algebra_simps divide_simps)
have *: "f((T^^n) x) = (g x + birkhoff_sum f (Suc n) x) - (g x + birkhoff_sum f n x)"
by (metis add.commute birkhoff_sum_1(2) birkhoff_sum_cocycle plus_1_eq_Suc)
have "abs(f((T^^n) x)) ≤ abs (g x + birkhoff_sum f (Suc n) x) + abs(g x + birkhoff_sum f n x)"
unfolding * by simp
also have "... < (L-1) * a * B (Suc n) + a * B (Suc n)"
using A B by auto
also have "... = L * a * m (Suc n)"
using ‹m (Suc n) = B (Suc n)› by (simp add: algebra_simps)
finally have Z1: "abs(f((T^^n) x)) < L * a * m (Suc n)"
by simp

have "2 * a * B (Suc n) < abs (g x + birkhoff_sum f (Suc n) x)"
using ‹x ∈ X› unfolding X_def using Bpos[of "Suc n"] by (auto simp add: algebra_simps divide_simps)
also have "... = abs(f((T^^n) x) + (g x + birkhoff_sum f n x))"
unfolding * by auto
also have "... ≤ abs(f((T^^n) x)) + abs (g x + birkhoff_sum f n x)"
by auto
also have "... < abs(f((T^^n) x)) + a * B (Suc n)"
using A by auto
finally have "abs(f((T^^n) x)) > a * B (Suc n)"
by simp
then have Z2: "abs(f((T^^n) x)) > a * L * m n"
using mult_strict_left_mono[OF ‹B (Suc n) > L * m n› ‹a > 0›] by auto

show "x ∈ space M ∩ (T ^^ n) - K n"
using Z1 Z2 ‹x ∈ X› unfolding K_def X_def by (auto simp add: algebra_simps)
qed
have "PS.prob X ≤ PS.prob (space M ∩ (T^^n)-(K n))"
by (rule PS.finite_measure_mono, fact, auto)
then show "alpha < PS.prob (space M ∩ (T ^^ n) - K n)"
using ‹alpha < PS.prob X› by simp
qed
define A where "A = A0 ∩ {N + N0..}"
have "lower_asymptotic_density A = 1"
unfolding A_def by (rule lower_asymptotic_density_one_intersection, fact, simp)
moreover have "B (Suc n) ≤ L * m n" if "n ∈ A" for n
proof (rule ccontr)
assume "¬(B (Suc n) ≤ L * m n)"
then have "L * m n < B (Suc n)" "n ≥ N" "n ≥ N0"
using ‹n ∈ A› unfolding A_def by auto
then have "PS.prob (space M ∩ (T^^n)-(K n)) > alpha"
using Main by auto
moreover have "PS.prob (space M ∩ (T^^n)-(K n)) * indicator A0 n < alpha"
using N0[OF ‹n ≥ N0›] by simp
moreover have "indicator A0 n = (1::real)"
using ‹n ∈ A› unfolding A_def indicator_def by auto
ultimately show False
by simp
qed
ultimately show ?thesis
unfolding m_def by blast
qed

text ‹Now, we combine the two previous statements to prove the main theorem.›

theorem subexponential_growth:
"(λn. max 0 (ln (B n) /n)) ⇢ 0"
proof -
obtain a0 where a0: "a0 > 0" "PZ.prob {x. abs (x) > a0} > 0"
using PZ.not_Dirac_0_imp_positive_mass_away_0[OF non_trivial] by blast
define a where "a = a0/2"
have "a > 0" using ‹a0 > 0› unfolding a_def by auto
define alpha where "alpha = PZ.prob {x. abs (x) > a0}/4"
have "alpha > 0" unfolding alpha_def using a0 by auto
have "PZ.prob {x. abs (x) > 2 * a} > 3 * alpha"
using a0 unfolding a_def alpha_def by auto

obtain C0 where C0: "PZ.prob {x. abs(x) ≥ C0} < alpha" "C0 ≥ 3 * a"
using PZ.random_variable_small_tails[OF ‹alpha > 0›, of "λx. x"] by auto
define L where "L = C0/a + 1"
have "PZ.prob {x. abs(x) ≥ (L-1) * a} < alpha"
unfolding L_def using C0 ‹a>0› by auto
have "L > 3"
unfolding L_def using C0 ‹a > 0› by (auto simp add: divide_simps)

obtain C where C: "⋀n. B (Suc n) ≤ C * Max {B i|i. i ≤ n}" "C ≥ 1"
using upper_bound_C by blast
have C2: "B n ≤ C * Max {B i|i. i < n}" if "n > 0" for n
proof -
obtain m where m: "n = Suc m"
using ‹0 < n› gr0_implies_Suc by auto
have *: "i ≤ m ⟷ i < Suc m" for i by auto
show ?thesis using C(1)[of m] unfolding m * by auto
qed

have Mainj: "eventually (λn. ln (B n) / n ≤ (1+ln L)/j) sequentially" if "j > 0" for j::nat
proof -
have *: "∃A. lower_asymptotic_density A = 1 ∧ (∀n∈A. B (j * Suc n + k) ≤ L * Max {B (j * i + k) |i. i ≤ n})" for k
proof -
interpret Tj0: conservative M "(T^^j)" using conservative_power[of j] by auto
have *: "g x + birkhoff_sum f k x + Tj0.birkhoff_sum (λx. birkhoff_sum f j ((T ^^ k) x)) n x = g x + birkhoff_sum f (j * n + k) x" for x n
proof -
have "birkhoff_sum f (j * n + k) x = (∑i ∈ {..<k} ∪ {k..<j * n + k}. f ((T ^^ i) x))"
unfolding birkhoff_sum_def by (rule sum.cong, auto)
also have "... = (∑i ∈ {..<k}. f ((T ^^ i) x)) + (∑i ∈ {k..<j * n + k}. f ((T ^^ i) x))"
by (auto intro!: sum.union_disjoint)
also have "... = birkhoff_sum f k x + (∑s<j. ∑i<n. f ((T ^^ (i * j + s)) ((T^^k) x)))"
apply (subst sum_arith_progression)
by (auto simp add: algebra_simps intro!: sum.reindex_bij_betw[symmetric] bij_betw_byWitness[of _ "λa. a-k"])
also have "... = birkhoff_sum f k x + Tj0.birkhoff_sum (λx. birkhoff_sum f j ((T ^^ k) x)) n x"
by (auto simp add: algebra_simps intro!: sum.swap)
finally show ?thesis by simp
qed
interpret Tj: conservative_limit "T^^j" M P Z "λx. birkhoff_sum f j ((T^^k) x)" "λx. g x + birkhoff_sum f k x" "λn. B (j * n + k)"
apply standard
using PabsM Bpos non_trivial conv ‹j>0› unfolding * by (auto intro!: weak_conv_m_subseq strict_monoI)
show ?thesis
apply (rule Tj.upper_bound_L[OF ‹a > 0› ‹alpha > 0›]) by fact+
qed
have "∃A. ∀k. lower_asymptotic_density (A k) = 1 ∧ (∀n∈A k. B (j * Suc n + k) ≤ L * Max {B (j * i + k) |i. i ≤ n})"
apply (rule choice) using * by auto
then obtain A where A: "⋀k. lower_asymptotic_density (A k) = 1" "⋀k n. n ∈ A k ⟹ B (j * Suc n + k) ≤ L * Max {B (j * i + k) |i. i ≤ n}"
by blast
define Aj where "Aj = (⋂k<j. A k)"
have "lower_asymptotic_density Aj = 1"
unfolding Aj_def using A(1) by (simp add: lower_asymptotic_density_one_finite_Intersection)
define Bj where "Bj = UNIV - Aj"
have "upper_asymptotic_density Bj = 0"
using ‹lower_asymptotic_density Aj = 1›
unfolding Bj_def lower_upper_asymptotic_density_complement by simp

define M where "M = (λn. Max {B p |p. p < (n+1) * j})"
have "B 0 ≤ M n" for n
unfolding M_def apply (rule Max_ge, auto, rule exI[of _ 0]) using ‹j > 0› by auto
then have Mpos: "M n > 0" for n
by (metis Bpos not_le not_less_iff_gr_or_eq order.strict_trans)
have M_L: "M (Suc n) ≤ L * M n" if "n ∈ Aj" for n
proof -
have *: "B s ≤ L * M n" if "s < (n+2) * j" for s
proof (cases "s < (n+1) * j")
case True
have "B s ≤ M n"
unfolding M_def apply (rule Max_ge) using True by auto
also have "... ≤ L * M n" using ‹L > 3› ‹M n > 0› by auto
finally show ?thesis by simp
next
case False
then obtain k where "k < j" "s = (n+1) * j + k" using ‹s < (n+2) * j› le_Suc_ex by force
then have "B s = B (j * Suc n + k)" by (auto simp add: algebra_simps)
also have "... ≤ L * Max {B (j * i + k) |i. i ≤ n}"
using A(2)[of n k] ‹n ∈ Aj› unfolding Aj_def using ‹k < j› by auto
also have "... ≤ L * Max {B a|a. a < (n+1) * j}"
apply (rule mult_left_mono, rule Max_mono) using ‹L>3› proof (auto)
fix i assume "i ≤ n" show "∃a. B (j * i + k) = B a ∧ a < j + n * j"
apply (rule exI[of _ "j * i + k"]) using ‹k < j› ‹i ≤ n›
qed
finally show ?thesis unfolding M_def by auto
qed
show ?thesis
unfolding M_def apply (rule Max.boundedI)
using * unfolding M_def using ‹j > 0› by auto
qed
have M_C: "M (Suc n) ≤ C^j * M n" for n
proof -
have I: "Max {B s|s. s < (n+1) * j + k} ≤ C^k * M n" for k
proof (induction k)
case 0
show ?case
apply (rule Max.boundedI) unfolding M_def using ‹j > 0› by auto
next
case (Suc k)
have *: "B s ≤ C * C ^ k * M n" if "s < Suc (j + n * j + k)" for s
proof (cases "s < j + n * j + k")
case True
then have "B s ≤ C^k * M n" using iffD1[OF Max_le_iff, OF _ _ Suc.IH] by auto
also have "... ≤ C * C^k * M n" using ‹C ≥ 1› ‹M n > 0› by auto
finally show ?thesis by simp
next
case False
then have "s = j + n * j + k" using that by auto
then have "B s ≤ C * Max {B s|s. s < (n+1) * j + k}" using C2[of s] using ‹j > 0› by auto
also have "... ≤ C * C^k * M n" using Suc.IH ‹C ≥ 1› by auto
finally show ?thesis by simp
qed
show ?case
apply (rule Max.boundedI) using ‹j > 0› * by auto
qed
show ?thesis using I[of j] unfolding M_def by (auto simp add: algebra_simps)
qed
have I: "ln (M n) ≤ ln (M 0) + n * ln L + card (Bj ∩ {..<n}) * ln (C^j)" for n
proof (induction n)
case 0
show ?case by auto
next
case (Suc n)
show ?case
proof (cases "n ∈ Bj")
case True
then have *: "Bj ∩ {..<Suc n} = Bj ∩ {..<n} ∪ {n}" by auto
have **: "card (Bj ∩ {..<Suc n}) = card (Bj ∩ {..<n}) + card {n}"
unfolding * by (rule card_Un_disjoint, auto)

have "ln (M (Suc n)) ≤ ln (C^j * M n)"
using M_C ‹⋀n. 0 < M n› less_le_trans ln_le_cancel_iff by blast
also have "... = ln (M n) + ln (C^j)"
using ‹C ≥ 1› ‹0 < M n› ln_mult by auto
also have "... ≤ ln (M 0) + n * ln L + card (Bj ∩ {..<n}) * ln (C^j) + ln (C^j)"
using Suc.IH by auto
also have "... = ln (M 0) + n * ln L + card (Bj ∩ {..<Suc n}) * ln (C^j)"
using ** by (auto simp add: algebra_simps)
also have "... ≤ ln (M 0) + (Suc n) * ln L + card (Bj ∩ {..<Suc n}) * ln (C^j)"
using ‹L > 3› by auto
finally show ?thesis by auto
next
case False
have "M (Suc n) ≤ L * M n"
apply (rule M_L) using False unfolding Bj_def by auto
then have "ln (M (Suc n)) ≤ ln (L * M n)"
using ‹⋀n. 0 < M n› less_le_trans ln_le_cancel_iff by blast
also have "... = ln (M n) + ln L"
using ‹L > 3› ‹0 < M n› ln_mult by auto
also have "... ≤ ln (M 0) + Suc n * ln L + card (Bj ∩ {..<n}) * ln (C^j)"
using Suc.IH by (auto simp add: algebra_simps)
also have "... ≤ ln (M 0) + Suc n * ln L + card (Bj ∩ {..<Suc n}) * ln (C^j)"
using ‹C ≥ 1› by (auto intro!: mult_right_mono card_mono)
finally show ?thesis by auto
qed
qed
have "ln (M n)/n ≤ ln (M 0)* (1/n) + ln L + (card (Bj ∩ {..<n})/n) * ln (C^j)" if "n≥1" for n
using that apply (auto simp add: algebra_simps divide_simps)
by (metis (no_types, hide_lams) I add.assoc mult.commute mult_left_mono of_nat_0_le_iff semiring_normalization_rules(34))
then have A: "eventually (λn. ln (M n)/n ≤ ln (M 0)* (1/n) + ln L + (card (Bj ∩ {..<n})/n) * ln (C^j)) sequentially"
unfolding eventually_sequentially by blast

have *: "(λn. ln (M 0)*(1/n) + ln L + (card (Bj ∩ {..<n})/n) * ln (C^j)) ⇢ ln (M 0) * 0 + ln L + 0 *ln (C^j)"
by (intro tendsto_intros upper_asymptotic_density_zero_lim, fact)
have B: "eventually (λn. ln (M 0)*(1/n) + ln L + (card (Bj ∩ {..<n})/n) * ln (C^j) < 1 + ln L) sequentially"
by (rule order_tendstoD[OF *], auto)
have "eventually (λn. ln (M n)/n < 1 + ln L) sequentially"
using eventually_conj[OF A B] by (simp add: eventually_mono)
then obtain N where N: "⋀n. n ≥ N ⟹ ln (M n)/n < 1 + ln L"
unfolding eventually_sequentially by blast
have "ln (B p) / p ≤ (1+ln L) / j" if "p ≥ (N+1) * j" for p
proof -
define n where "n = p div j"
have "n ≥ N+1" unfolding n_def using that
by (metis ‹0 < j› div_le_mono div_mult_self_is_m)
then have "n ≥ N" "n ≥ 1" by auto
have *: "p < (n+1) * j" "n * j ≤ p"
unfolding n_def using ‹j > 0› dividend_less_div_times by auto
have "B p ≤ M n"
unfolding M_def apply (rule Max_ge) using * by auto
then have "ln (B p) ≤ ln (M n)"
using Bpos Mpos ln_le_cancel_iff by blast
also have "... ≤ n * (1+ln L)"
using N[OF ‹n ≥ N›] ‹n ≥ 1› by (auto simp add: divide_simps algebra_simps)
also have "... ≤ (p/j) * (1+ln L)"
apply (rule mult_right_mono) using *(2) ‹j > 0› ‹L > 3›
apply (auto simp add: divide_simps algebra_simps)
using of_nat_mono by fastforce
finally show ?thesis
using ‹j > 0› that by (simp add: algebra_simps divide_simps)
qed
then show "eventually (λp. ln (B p) / p ≤ (1+ln L)/j) sequentially"
unfolding eventually_sequentially by auto
qed
show "(λn. max 0 (ln (B n) / real n)) ⇢ 0"
proof (rule order_tendstoI)
fix e::real assume "e > 0"
have *: "(λj. (1+ln L) * (1/j)) ⇢ (1+ln L) * 0"
by (intro tendsto_intros)
have "eventually (λj. (1+ln L) * (1/j) < e) sequentially"
apply (rule order_tendstoD[OF *]) using ‹e > 0› by auto
then obtain j::nat where j: "j > 0" "(1+ln L) * (1/j) < e"
unfolding eventually_sequentially using add.right_neutral le_eq_less_or_eq by fastforce
show "eventually (λn. max 0 (ln (B n) / real n) < e) sequentially"
using Mainj[OF ‹j > 0›] j(2) ‹e > 0› by (simp add: eventually_mono)
qed

end (*of locale conservative_limit*)

subsection ‹Normalizing sequences grow at most polynomially in probability preserving systems›

text ‹In probability preserving systems, normalizing sequences grow at most polynomially.
The proof, also given in~\cite{gouezel_normalizing_sequences}, is considerably easier than
the conservative case. We prove that $B_{n+1} \leq C B_n$ (more precisely, this only holds if
$B_{n+1}$ is large enough), by arguing that $S_{n+1} f = S_n f + f \circ T^n$, where $f\circ T^n$
is negligible if $B_{n+1}$ is large thanks to the measure preservation. We also prove that
$B_{2n} \leq E B_n$, by writing $S_{2n} f = S_n f+ S_n f \circ T^n$ and arguing that the two terms
on the right have the same distribution. Finally, combining these two estimates, the polynomial

locale pmpt_limit =
pmpt M + PZ: real_distribution Z
for M::"'a measure" and Z::"real measure" +
fixes f::"'a ⇒ real" and B::"nat ⇒ real"
assumes Bpos: "⋀n. B n > 0"
and M [measurable]: "f ∈ borel_measurable M"
and non_trivial: "PZ.prob {0} < 1"
and conv: "weak_conv_m (λn. distr P borel (λx. (birkhoff_sum f n x) / B n)) Z"
begin

text ‹First, we prove that $B_{n+1} \leq C B_n$ if $B_{n+1}$ is large enough.›

lemma upper_bound_CD:
"∃C D. (∀n. B (Suc n) ≤ D ∨ B (Suc n) ≤ C * B n) ∧ C ≥ 1"
proof -
obtain a where a: "a > 0" "PZ.prob {x. abs (x) > a} > 0"
using PZ.not_Dirac_0_imp_positive_mass_away_0[OF non_trivial] by blast
define alpha where "alpha = PZ.prob {x. abs (x) > a}/4"
have "alpha > 0" unfolding alpha_def using a by auto
have A: "PZ.prob {x. abs (x) > a} > 3 * alpha"
using a unfolding alpha_def by auto

obtain C0 where C0: "PZ.prob {x. abs(x) ≥ C0} < alpha" "C0 ≥ a"
using PZ.random_variable_small_tails[OF ‹alpha > 0›, of "λx. x"] by auto

have A: "eventually (λn. measure (distr M borel (λx. (birkhoff_sum f n x) / B n)) {x. abs (x) > a} > 3 * alpha) sequentially"
apply (rule open_set_weak_conv_lsc[of _ Z])
by (auto simp add: PZ.real_distribution_axioms conv A)
have B: "eventually (λn. measure (distr M borel (λx. (birkhoff_sum f n x) / B n)) {x. abs (x) ≥ C0} < alpha) sequentially"
apply (rule closed_set_weak_conv_usc[of _ Z])
by (auto simp add: PZ.real_distribution_axioms conv C0)
obtain N where N: "⋀n. n ≥ N ⟹ measure (distr M borel (λx. (birkhoff_sum f n x) / B n)) {x. abs x > a} > 3 * alpha"
"⋀n. n ≥ N ⟹ measure (distr M borel (λx. (birkhoff_sum f n x) / B n)) {x. abs x ≥ C0} < alpha"
using eventually_conj[OF A B] unfolding eventually_sequentially by blast

obtain Cf where Cf: "prob {x ∈ space M. abs(f x) ≥ Cf} < alpha" "Cf ≥ 1"
using random_variable_small_tails[OF ‹alpha > 0› M] by auto
have Main: "B (Suc n) ≤ (2*C0/a) * B n" if "n ≥ N" "B (Suc n) ≥ 2 * Cf/a" for n
proof -
have "Suc n ≥ N" using that by auto
let ?h = "(λx. (birkhoff_sum f (Suc n) x) / B (Suc n))"
have "measure (distr M borel ?h) {x. abs (x) > a}
= measure M (?h- {x. abs (x) > a} ∩ space M)"
by (rule measure_distr, auto)
also have "... = prob {x ∈ space M. abs(?h x) > a}"
by (rule HOL.cong[of "measure M"], auto)
finally have A: "prob {x ∈ space M. abs(?h x) > a} > 3 * alpha"
using N(1)[OF ‹Suc n ≥ N›] by auto

let ?h = "(λx. (birkhoff_sum f n x) / B n)"
have "measure (distr M borel ?h) {x. abs (x) ≥ C0}
= measure M (?h- {x. abs (x) ≥ C0} ∩ space M)"
by (rule measure_distr, auto)
also have "... = measure M {x ∈ space M. abs(?h x) ≥ C0}"
by (rule HOL.cong[of "measure M"], auto)
finally have B0: "prob {x ∈ space M. abs(?h x) ≥ C0} < alpha"
using N(2)[OF ‹n ≥ N›] by auto
have *: "{x ∈ space M. abs(?h x) < C0} = space M - {x ∈ space M. abs(?h x) ≥ C0}"
by auto
have B: "prob {x ∈ space M. abs(?h x) < C0} > 1- alpha"
unfolding * using B0 prob_compl by auto

have "prob {x ∈ space M. abs(f ((T^^n) x)) ≥ Cf} = prob ((T^^n)-{x ∈ space M. abs(f x) ≥ Cf} ∩ space M)"
by (rule HOL.cong[of "prob"], auto)
also have "... = prob {x ∈ space M. abs(f x) ≥ Cf}"
using T_vrestr_same_measure(2)[of "{x ∈ space M. abs(f x) ≥ Cf}" n]
unfolding vimage_restr_def by auto
finally have C0: "prob {x ∈ space M. abs(f ((T^^n) x)) ≥ Cf} < alpha"
using Cf by simp
have *: "{x ∈ space M. abs(f ((T^^n) x)) < Cf} = space M - {x ∈ space M. abs(f ((T^^n) x)) ≥ Cf}"
by auto
have C: "prob {x ∈ space M. abs(f ((T^^n) x)) < Cf} > 1- alpha"
unfolding * using C0 prob_compl by auto

define X where "X = {x ∈ space M. abs((birkhoff_sum f n x) / B n) < C0}
∩ {x ∈ space M. abs((birkhoff_sum f (Suc n) x) / B (Suc n)) > a}
∩ {x ∈ space M. abs(f ((T^^n) x)) < Cf}"
have "(1 - alpha) + 3 * alpha + (1 - alpha) <
prob {x ∈ space M. abs((birkhoff_sum f n x) / B n) < C0}
+ prob {x ∈ space M. abs((birkhoff_sum f (Suc n) x) / B (Suc n)) > a}
+ prob {x ∈ space M. abs(f ((T^^n) x)) < Cf}"
using A B C by auto
also have "... ≤ 2 + prob X"
unfolding X_def by (rule sum_measure_le_measure_inter3, auto)
finally have "prob X > alpha" by auto
then have "X ≠ {}" using ‹alpha > 0› by auto
then obtain x where "x ∈ X" by auto
have *: "abs(birkhoff_sum f n x) ≤ C0 * B n"
"abs(birkhoff_sum f (Suc n) x) ≥ a * B (Suc n)"
"abs(f((T^^n) x)) ≤ Cf"
using ‹x ∈ X› Bpos[of n] Bpos[of "Suc n"] unfolding X_def by (auto simp add: divide_simps)
have "a * B (Suc n) ≤ abs(birkhoff_sum f (Suc n) x)"
using * by simp
also have "... = abs(birkhoff_sum f n x + f ((T^^n) x))"
by (metis Groups.add_ac(2) One_nat_def birkhoff_sum_1(3) birkhoff_sum_cocycle plus_1_eq_Suc)
also have "... ≤ C0 * B n + Cf"
using * by auto
also have "... ≤ C0 * B n + (a/2) * B (Suc n)"
using ‹B (Suc n) ≥ 2 * Cf/a› ‹a > 0› by (auto simp add: divide_simps algebra_simps)
finally show "B (Suc n) ≤ (2 * C0/a) * B n"
using ‹a > 0› by (auto simp add: divide_simps algebra_simps)
qed
define C1 where "C1 = Max {B(Suc n)/B n |n. n ≤ N}"
have *: "B (Suc n) ≤ max ((2 * C0/a)) C1 * B n" if "B (Suc n) > 2 * Cf/a" for n
proof (cases "n > N")
case True
then show ?thesis
using Main[OF less_imp_le[OF ‹n > N›] less_imp_le[OF that]] Bpos[of n]
by (meson max.cobounded1 order_trans real_mult_le_cancel_iff1)
next
case False
then have "n ≤ N" by simp
have "B(Suc n)/B n ≤ C1"
unfolding C1_def apply (rule Max_ge) using ‹n ≤ N› by auto
then have "B (Suc n) ≤ C1 * B n"
using Bpos[of n] by (simp add: divide_simps)
then show ?thesis
using Bpos[of n] by (meson max.cobounded2 order_trans real_mult_le_cancel_iff1)
qed
show ?thesis
apply (rule exI[of _ "max ((2 * C0/a)) C1"], rule exI[of _ "2 * Cf/a"])
using * linorder_not_less ‹C0 ≥ a› ‹a > 0› by (auto intro!: max.coboundedI1)
qed

text ‹Second, we prove that $B_{2n} \leq E B_n$.›

lemma upper_bound_E:
"∃E. ∀n. B (2 * n) ≤ E * B n"
proof -
obtain a where a: "a > 0" "PZ.prob {x. abs (x) > a} > 0"
using PZ.not_Dirac_0_imp_positive_mass_away_0[OF non_trivial] by blast
define alpha where "alpha = PZ.prob {x. abs (x) > a}/4"
have "alpha > 0" unfolding alpha_def using a by auto
have A: "PZ.prob {x. abs (x) > a} > 3 * alpha"
using a unfolding alpha_def by auto

obtain C0 where C0: "PZ.prob {x. abs(x) ≥ C0} < alpha" "C0 ≥ a"
using PZ.random_variable_small_tails[OF ‹alpha > 0›, of "λx. x"] by auto

have A: "eventually (λn. measure (distr M borel (λx. (birkhoff_sum f n x) / B n)) {x. abs (x) > a} > 3 * alpha) sequentially"
apply (rule open_set_weak_conv_lsc[of _ Z])
by (auto simp add: PZ.real_distribution_axioms conv A)
have B: "eventually (λn. measure (distr M borel (λx. (birkhoff_sum f n x) / B n)) {x. abs (x) ≥ C0} < alpha) sequentially"
apply (rule closed_set_weak_conv_usc[of _ Z])
by (auto simp add: PZ.real_distribution_axioms conv C0)
obtain N where N: "⋀n. n ≥ N ⟹ measure (distr M borel (λx. (birkhoff_sum f n x) / B n)) {x. abs x > a} > 3 * alpha"
"⋀n. n ≥ N ⟹ measure (distr M borel (λx. (birkhoff_sum f n x) / B n)) {x. abs x ≥ C0} < alpha"
using eventually_conj[OF A B] unfolding eventually_sequentially by blast

have Main: "B (2 * n) ≤ (2*C0/a) * B n" if "n ≥ N" for n
proof -
have "2 * n ≥ N" using that by auto
let ?h = "(λx. (birkhoff_sum f (2 * n) x) / B (2 * n))"
have "measure (distr M borel ?h) {x. abs (x) > a}
= measure M (?h- {x. abs (x) > a} ∩ space M)"
by (rule measure_distr, auto)
also have "... = prob {x ∈ space M. abs(?h x) > a}"
by (rule HOL.cong[of "measure M"], auto)
finally have A: "prob {x ∈ space M. abs((birkhoff_sum f (2 * n) x) / B (2 * n)) > a} > 3 * alpha"
using N(1)[OF ‹2 * n ≥ N›] by auto

let ?h = "(λx. (birkhoff_sum f n x) / B n)"
have "measure (distr M borel ?h) {x. abs (x) ≥ C0}
= measure M (?h- {x. abs (x) ≥ C0} ∩ space M)"
by (rule measure_distr, auto)
also have "... = measure M {x ∈ space M. abs(?h x) ≥ C0}"
by (rule HOL.cong[of "measure M"], auto)
finally have B0: "prob {x ∈ space M. abs(?h x) ≥ C0} < alpha"
using N(2)[OF ‹n ≥ N›] by auto
have *: "{x ∈ space M. abs(?h x) < C0} = space M - {x ∈ space M. abs(?h x) ≥ C0}"
by auto
have B: "prob {x ∈ space M. abs((birkhoff_sum f n x) / B n) < C0} > 1- alpha"
unfolding * using B0 prob_compl by auto

have "prob {x ∈ space M. abs(?h ((T^^n) x)) < C0} = prob ((T^^n)-{x ∈ space M. abs(?h x) < C0} ∩ space M)"
by (rule HOL.cong[of "prob"], auto)
also have "... = prob {x ∈ space M. abs(?h x) < C0}"
using T_vrestr_same_measure(2)[of "{x ∈ space M. abs(?h x) < C0}" n]
unfolding vimage_restr_def by auto
finally have C: "prob {x ∈ space M. abs((birkhoff_sum f n ((T^^n) x)) / B n) < C0} > 1- alpha"
using B by simp

define X where "X = {x ∈ space M. abs((birkhoff_sum f n x) / B n) < C0}
∩ {x ∈ space M. abs((birkhoff_sum f (2* n) x) / B (2* n)) > a}
∩ {x ∈ space M. abs((birkhoff_sum f n ((T^^n) x)) / B n) < C0}"
have "(1 - alpha) + 3 * alpha + (1 - alpha) <
prob {x ∈ space M. abs((birkhoff_sum f n x) / B n) < C0}
+ prob {x ∈ space M. abs((birkhoff_sum f (2* n) x) / B (2* n)) > a}
+ prob {x ∈ space M. abs((birkhoff_sum f n ((T^^n) x)) / B n) < C0}"
using A B C by auto
also have "... ≤ 2 + prob X"
unfolding X_def by (rule sum_measure_le_measure_inter3, auto)
finally have "prob X > alpha" by auto
then have "X ≠ {}" using ‹alpha > 0› by auto
then obtain x where "x ∈ X" by auto
have *: "abs(birkhoff_sum f n x) ≤ C0 * B n"
"abs((birkhoff_sum f (2 * n) x)) ≥ a * B (2 * n)"
"abs((birkhoff_sum f n ((T^^n) x))) ≤ C0 * B n"
using ‹x ∈ X› Bpos[of n] Bpos[of "2* n"] unfolding X_def by (auto simp add: divide_simps)
have "a * B (2 * n) ≤ abs(birkhoff_sum f (2 * n) x)"
using * by simp
also have "... = abs(birkhoff_sum f n x + birkhoff_sum f n ((T^^n) x))"
unfolding birkhoff_sum_cocycle[of f n n x, symmetric] by (simp add: mult_2)
also have "... ≤ 2 * C0 * B n"
using * by auto
finally show "B (2 * n) ≤ (2 * C0/a) * B n"
using ‹a > 0› by (auto simp add: divide_simps algebra_simps)
qed
define C1 where "C1 = Max {B(2 * n)/B n |n. n ≤ N}"
have *: "B (2*n) ≤ max ((2 * C0/a)) C1 * B n" for n
proof (cases "n > N")
case True
then show ?thesis
using Main[OF less_imp_le[OF ‹n > N›]] Bpos[of n]
by (meson max.cobounded1 order_trans real_mult_le_cancel_iff1)
next
case False
then have "n ≤ N" by simp
have "B(2*n)/B n ≤ C1"
unfolding C1_def apply (rule Max_ge) using ‹n ≤ N› by auto
then have "B (2*n) ≤ C1 * B n"
using Bpos[of n] by (simp add: divide_simps)
then show ?thesis
using Bpos[of n] by (meson max.cobounded2 order_trans real_mult_le_cancel_iff1)
qed
show ?thesis
apply (rule exI[of _ "max ((2 * C0/a)) C1"])
using * by auto
qed

text ‹Finally, we combine the estimates in the two lemmas above to show that $B_n$ grows
at most polynomially.›

theorem polynomial_growth:
"∃C K. ∀n>0. B n ≤ C * (real n)^K"
proof -
obtain C D where C: "C ≥ 1" "⋀n. B (Suc n) ≤ D ∨ B (Suc n) ≤ C * B n"
using upper_bound_CD by blast
obtain E where E: "⋀n. B (2 * n) ≤ E * B n"
using upper_bound_E by blast
have "E ≥ 1" using E[of 0] Bpos[of 0] by auto

obtain k::nat where "log 2 (max C E) ≤ k"
using real_arch_simple[of "log 2 (max C E)"] by blast
then have "max C E ≤ 2^k"
by (meson less_log_of_power not_less one_less_numeral_iff semiring_norm(76))
then have "C ≤ 2^k" "E ≤ 2^k"
by auto

define P where "P = max D (B 0)"
have "P > 0" unfolding P_def using Bpos[of 0] by auto
have Main: "⋀n. n < 2^r ⟹ B n ≤ P * 2^(2 * k * r)" for r
proof (induction r)
case 0
fix n::nat assume "n < 2^0"
then show "B n ≤ P * 2 ^ (2 * k * 0)"
unfolding P_def by auto
next
case (Suc r)
fix n::nat assume "n < 2^(Suc r)"
consider "even n" | "B n ≤ D" | "odd n ∧ B n > D" by linarith
then show "B n ≤ P * 2 ^ (2 * k * Suc r)"
proof (cases)
case 1
then obtain m where m: "n = 2 * m" by (rule evenE)
have "m < 2^r"
using ‹n < 2^(Suc r)› unfolding m by auto
then have *: "B m ≤ P * 2^(2 * k * r)"
using Suc.IH by auto
have "B n ≤ E * B m"
unfolding m using E by simp
also have "... ≤ 2^k * B m"
apply (rule mult_right_mono[OF _ less_imp_le[OF Bpos[of m]]])
using ‹E ≤ 2^k› by simp
also have "... ≤ 2^k * (P * 2^(2 * k * r))"
apply (rule mult_left_mono[OF *]) by auto
also have "... = P * 2^(2 * k * r + k)"
also have "... ≤ P * 2^(2 * k * Suc r)"
apply (rule mult_left_mono) using ‹P > 0› by auto
finally show ?thesis by simp
next
case 2
have "D ≤ P * 1"
unfolding P_def by auto
also have "... ≤ P * 2^(2 * k * Suc r)"
by (rule mult_left_mono[OF _ less_imp_le[OF ‹P > 0›]], auto)
finally show ?thesis using 2 by simp
next
case 3
then obtain m where m: "n = 2 * m + 1"
using oddE by blast
have "m < 2^r"
using ‹n < 2^(Suc r)› unfolding m by auto
then have *: "B m ≤ P * 2^(2 * k * r)"
using Suc.IH by auto
have "B n > D" using 3 by auto
then have "B n ≤ C * B (2 * m)"
unfolding m using C(2)[of "2 * m"] by auto
also have "... ≤ C * (E * B m)"
apply (rule mult_left_mono) using ‹C ≥ 1› E[of m] by auto
also have "... ≤ 2^k * (2^k * B m)"
apply (intro mult_mono) using ‹C ≤ 2^k› ‹C ≥ 1› ‹E ≥ 1› ‹E ≤ 2^k› Bpos[of m] by auto
also have "... ≤ 2^k * (2^k * (P * 2^(2 * k * r)))"
apply (intro mult_left_mono) using * by auto
also have "... = P * 2^(2 * k * Suc r)"
finally show ?thesis by simp
qed
qed
have I: "B n ≤ (P * 2^(2 * k)) * n^(2 * k)" if "n > 0" for n
proof -
define r::nat where "r = nat(floor(log 2 (real n)))"
have *: "int r = floor(log 2 (real n))"
unfolding r_def using ‹0 < n› by auto
have I: "2^r ≤ n ∧ n < 2^(r+1)"
using floor_log_nat_eq_powr_iff[OF _ ‹n > 0›, of 2 r] * by auto
then have "B n ≤ P * 2^(2 * k * (r+1))"
using Main[of n "r+1"] by auto
also have "... = (P * 2^(2 * k)) * ((2^r)^(2*k))"
`