Theory MDP_reward

(* Author: Maximilian Schäffeler *)

theory MDP_reward
  imports
    Bounded_Functions
    MDP_reward_Util
    Blinfun_Util
    MDP_disc
begin

section ‹Markov Decision Processes with Rewards›

locale MDP_reward = discrete_MDP A K
  for
    A and 
    K :: "'s ::countable × 'a ::countable ⇒ 's pmf" +
  fixes
    r :: "('s × 'a) ⇒ real" and
    l :: real
  assumes
    zero_le_disc [simp]: "0 ≤ l" and
    r_bounded: "bounded (range r)"
begin

text ‹
This extension to the basic MDPs is formalized with another locale.
It assumes the existence of a reward function @{term r} which takes a state-action pair to a real 
number. We assume that the function is bounded @{prop r_bounded}.

Furthermore, we fix a discounting factor @{term l}, where @{term "0 ≤ l ∧ l < 1"}.
›

subsection ‹Util›
subsubsection ‹Basic Properties of rewards›
lemma r_bfun: "r ∈ bfun"
  using r_bounded
  by auto

lemma r_bounded': "bounded (r ` X)"
  by (auto intro: r_bounded bounded_subset)

definition "r⇩_M = (⨆sa. ¦r sa¦)"

lemma abs_r_le_r⇩_M: "¦r sa¦ ≤ r⇩_M"
  using bounded_norm_le_SUP_norm r_bounded r⇩_M_def by fastforce

lemma abs_r⇩_M_eq_r⇩_M [simp]: "¦r⇩_M¦ = r⇩_M"
  using abs_r_le_r⇩_M by fastforce

lemma r⇩_M_nonneg: "0 ≤ r⇩_M"
  using abs_r⇩_M_eq_r⇩_M by linarith

lemma measurable_r_nth [measurable]: "(λt. r (t !! i)) ∈ borel_measurable S"
  by measurable

lemma integrable_r_nth [simp]: "integrable (𝒯 p s) (λt. r (t !! i))"
  by (fastforce simp: bounded_iff intro: abs_r_le_r⇩_M)

lemma expectation_abs_r_le: "measure_pmf.expectation d (λa. ¦r (s, a)¦) ≤ r⇩_M"
  using abs_r_le_r⇩_M
  by (fastforce intro!: measure_pmf.integral_le_const measure_pmf.integrable_const_bound)

lemma abs_exp_r_le: "¦measure_pmf.expectation d r¦ ≤ r⇩_M"
  using abs_r_le_r⇩_M
  by (fastforce intro!: measure_pmf.integral_le_const order.trans[OF integral_abs_bound] measure_pmf.integrable_const_bound)

subsubsection ‹Infinite disounted sums›
lemma abs_disc_eq[simp]: "¦l ^ i * x¦ = l ^ i * ¦x¦"
  by (auto simp: abs_mult)

lemma norm_l_pow_eq[simp]: "norm (l^t *⇩_R F) = l^t * norm F"
  by auto

subsection ‹Total Reward for Single Traces›

abbreviation "ν_trace_fin t N ≡ ∑i < N. l ^ i * r (t !! i)"
abbreviation "ν_trace t ≡ ∑i. l ^ i * r (t !! i)"

lemma abs_ν_trace_fin_le: "¦ν_trace_fin t N¦ ≤ (∑i < N. l^i * r⇩_M)"
  by (auto intro!: sum_mono order.trans[OF sum_abs] mult_left_mono abs_r_le_r⇩_M)

lemma measurable_suminf_reward[measurable]: "ν_trace ∈ borel_measurable S"
  by measurable

lemma integrable_ν_trace_fin: "integrable (𝒯 p s) (λt. ν_trace_fin t N)"
  by (fastforce simp: bounded_iff intro: abs_ν_trace_fin_le)


context 
  fixes p :: "('s, 'a) pol"
begin

subsection ‹Expected Finite-Horizon Discounted Reward›
definition "ν_fin n s = ∫t. ν_trace_fin t n ∂𝒯 p s"

lemma abs_ν_fin_le: "¦ν_fin N s¦ ≤ (∑i<N. l^i * r⇩_M)"
  unfolding ν_fin_def
  using abs_ν_trace_fin_le
  by (fastforce intro!: prob_space.integral_le_const order_trans[OF integral_abs_bound])

lemma ν_fin_bfun: "(λs. ν_fin N s) ∈ bfun"
  by (auto intro!: abs_ν_fin_le)

lift_definition ν⇩_b_fin :: "nat ⇒ 's ⇒⇩_b real" is ν_fin
  using ν_fin_bfun .

lemma ν_fin_Suc[simp]: "ν_fin (Suc n) s = ν_fin n s + l ^ n * ∫t.  r (t !! n) ∂𝒯 p s"
  by (simp add: ν_fin_def)

lemma ν_fin_zero[simp]: "ν_fin 0 s = 0"
  by (simp add: ν_fin_def)

lemma ν_fin_eq_Pn: "ν_fin n s = (∑i<n. l^i * measure_pmf.expectation (Pn' p s i) r)"
  by (induction n) (auto simp: Pn'_eq_𝒯 integral_distr)
end

subsection ‹Expected Total Discounted Reward›

definition "ν p s = lim (λn. ν_fin p n s)"

lemmas ν_eq_lim = ν_def

lemma ν_eq_Pn: "ν p s = (∑i. l^i * measure_pmf.expectation (Pn' p s i) r)"
  by (simp add: ν_fin_eq_Pn ν_eq_lim suminf_eq_lim)


subsection ‹Reward of a Decision Rule›
context 
  fixes d :: "('s, 'a) dec"
begin
abbreviation "r_dec s ≡ ∫a. r (s, a) ∂d s"

lemma abs_r_dec_le: "¦r_dec s¦ ≤ r⇩_M"
  using expectation_abs_r_le integral_abs_bound order_trans by fast

lemma r_dec_eq_r_K0: "r_dec s = measure_pmf.expectation (K0' d s) r"
  by (simp add: K0'_def)

lemma r_dec_bfun: "r_dec ∈ bfun"
  using abs_r_dec_le by (auto intro!: bfun_normI)

lift_definition r_dec⇩_b :: "'s ⇒⇩_b real" is "r_dec"
  using r_dec_bfun .

declare r_dec⇩_b.rep_eq[simp] bfun.Bfun_inverse[simp]

lemma norm_r_dec_le: "norm r_dec⇩_b ≤ r⇩_M"
  by (simp add: abs_r_dec_le norm_bound)
end

lemma r_dec_det [simp]: "r_dec (mk_dec_det d) s = r (s, d s)"
  unfolding mk_dec_det_def by auto

subsection ‹Transition Probability Matrix for MDPs›

context
  fixes p :: "nat ⇒ ('s, 'a) dec"
begin
definition "𝒫⇩_X n = push_exp (λs. Xn' (mk_markovian p) s n)"

lemma 𝒫⇩_X_0[simp]: "𝒫⇩_X 0 = id"
  by (simp add: 𝒫⇩_X_def)

lemma 𝒫⇩_X_bounded_linear[simp]: "bounded_linear (𝒫⇩_X t)"
  unfolding 𝒫⇩_X_def by simp

lemma norm_𝒫⇩_X [simp]: "onorm (𝒫⇩_X t) = 1"
  unfolding 𝒫⇩_X_def by simp

lemma norm_𝒫⇩_X_apply[simp]: "norm (𝒫⇩_X n x) ≤ norm x"
  using onorm[OF 𝒫⇩_X_bounded_linear] by simp

lemma 𝒫⇩_X_bound_r: "norm (𝒫⇩_X t (r_dec⇩_b (p t))) ≤ r⇩_M"
  using norm_𝒫⇩_X_apply norm_r_dec_le order.trans by blast

lemma 𝒫⇩_X_bounded_r: "bounded (range (λt. (𝒫⇩_X t (r_dec⇩_b (p t)))))"
  using 𝒫⇩_X_bound_r by (auto intro!: boundedI)

end

lemma ν_fin_elem: "ν_fin (mk_markovian p) n s = (∑i<n. l^i * 𝒫⇩_X p i (r_dec⇩_b (p i)) s)"
  unfolding 𝒫⇩_X_def ν_fin_eq_Pn Pn'_markovian_eq_Xn'_bind measure_pmf_bind
  using measure_pmf_in_subprob_algebra abs_r_le_r⇩_M
  by (subst integral_bind) (auto simp: r_dec_eq_r_K0)

lemma ν⇩_b_fin_eq_𝒫⇩_X: "ν⇩_b_fin (mk_markovian p) n = (∑i<n. l^i *⇩_R 𝒫⇩_X p i (r_dec⇩_b (p i)))"
  by (auto simp: ν_fin_elem sum_apply_bfun ν⇩_b_fin.rep_eq)

lemma ν_fin_eq_𝒫⇩_X: "ν_fin (mk_markovian p) n = (∑i<n. l^i *⇩_R 𝒫⇩_X p i (r_dec⇩_b (p i)))"
  by (metis ν⇩_b_fin.rep_eq ν⇩_b_fin_eq_𝒫⇩_X)


text ‹
@{term "𝒫⇩₁ d v"} defines for each state the expected value of @{term v} 
after taking a single step in the MDP according to the decision rule @{term d}.  
›

context
  fixes d :: "('s, 'a) dec"
begin
lift_definition 𝒫⇩₁ :: "('s ⇒⇩_b real) ⇒⇩_L ('s ⇒⇩_b real)" is "push_exp (K_st d)"
  using push_exp_bounded_linear .

lemma 𝒫⇩₁_bfun_one [simp]:"𝒫⇩₁ 1 = 1"
  by (auto simp: 𝒫⇩₁.rep_eq)

lemma 𝒫⇩₁_pow_bfun_one [simp]: "(𝒫⇩₁^^t) 1 = 1"
  by (induction t) auto

lemma 𝒫⇩₁_pow: "blinfun_apply (𝒫⇩₁ ^^ n) = blinfun_apply 𝒫⇩₁ ^^ n"
  by (induction n) auto

lemma norm_𝒫⇩₁ [simp]: "norm 𝒫⇩₁ = 1"
  by (simp add: norm_blinfun.rep_eq 𝒫⇩₁.rep_eq)
end

lemma 𝒫⇩_X_Suc: "𝒫⇩_X p (Suc n) v = 𝒫⇩₁ (p 0) ((𝒫⇩_X (λn. p (Suc n)) n) v)"
  unfolding 𝒫⇩_X_def 𝒫⇩₁.rep_eq
  by (fastforce intro!: abs_le_norm_bfun integral_bind[where K = "count_space UNIV"]
      simp: measure_pmf_in_subprob_algebra measure_pmf_bind Suc_Xn'_markovian)

lemma 𝒫⇩_X_Suc': "𝒫⇩_X p (Suc n) v = 𝒫⇩_X p n (𝒫⇩₁ (p n) v)"
proof (induction n arbitrary: p)
  case 0
  thus ?case
    by (simp add: 𝒫⇩_X_Suc)
next
  case (Suc n)
  thus ?case 
    by (metis 𝒫⇩_X_Suc)
qed

lemma 𝒫⇩_X_const: "𝒫⇩_X (λ_. d) n = 𝒫⇩₁ d ^^ n"
  by (induction n) (auto simp add: 𝒫⇩₁_pow 𝒫⇩_X_Suc)

lemma 𝒫⇩_X_sconst: "𝒫⇩_X (λ_. p) n = 𝒫⇩₁ p ^^n"
  using 𝒫⇩_X_const.

lemma norm_P_n[simp]: "onorm (𝒫⇩₁ d ^^ n) = 1"
  using norm_𝒫⇩_X[of "λ_. d"] by (auto simp: 𝒫⇩_X_sconst)

lemma norm_𝒫⇩₁_pow [simp]: "norm (𝒫⇩₁ d ^^ t) = 1"
  by (simp add: norm_blinfun.rep_eq)

lemma 𝒫⇩_X_Suc_n_elem: "𝒫⇩_X p n (𝒫⇩₁ (p n) v) = 𝒫⇩_X p (Suc n) v"
  using 𝒫⇩_X_Suc' 𝒫⇩₁.rep_eq by auto

lemma 𝒫⇩₁_eq_𝒫⇩_X_one: "blinfun_apply (𝒫⇩₁ (p 0)) = 𝒫⇩_X p 1"
  by (auto simp: 𝒫⇩_X_Suc' 𝒫⇩₁.rep_eq)


lemma 𝒫⇩₁_pos: "0 ≤ u ⟹ 0 ≤ 𝒫⇩₁ d u"
  by (auto simp: 𝒫⇩₁.rep_eq less_eq_bfun_def)

lemma 𝒫⇩₁_nonneg: "nonneg_blinfun (𝒫⇩₁ d)"
  by (simp add: 𝒫⇩₁_pos nonneg_blinfun_def)

lemma 𝒫⇩₁_n_pos: "0 ≤ u ⟹ 0 ≤ (𝒫⇩₁ d ^^ n) u"
  by (induction n) (auto simp: 𝒫⇩₁.rep_eq less_eq_bfun_def)

lemma 𝒫⇩₁_n_nonneg: "nonneg_blinfun (𝒫⇩₁ d ^^ n)"
  by (simp add: 𝒫⇩₁_n_pos nonneg_blinfun_def)

lemma 𝒫⇩₁_n_disc_pos: "0 ≤ u ⟹ 0 ≤ (l^n *⇩_R 𝒫⇩₁ d ^^n) u"
  by (auto simp: 𝒫⇩₁_n_pos scaleR_nonneg_nonneg blinfun.scaleR_left)

lemma 𝒫⇩₁_sum_pos: "0 ≤ u ⟹ 0 ≤ (∑t≤n. l^t *⇩_R (𝒫⇩₁ d ^^ t)) u"
  using 𝒫⇩₁_n_pos 𝒫⇩₁_pos
  by (induction n) (auto simp: blinfun.add_left blinfun.scaleR_left scaleR_nonneg_nonneg)

lemma 𝒫⇩₁_sum_ge: 
  assumes "0 ≤ u" 
  shows "u ≤ (∑t≤n. l^t *⇩_R 𝒫⇩₁ d ^^t) u"
  using 𝒫⇩₁_n_disc_pos[OF assms, of "Suc _"]
  by (induction n) (auto intro: add_increasing2 simp add: blinfun.add_left)


subsection ‹The Bellman Operator›
definition "L d v ≡ r_dec⇩_b d + l *⇩_R 𝒫⇩₁ d v"

lemma norm_L_le: "norm (L d v) ≤ r⇩_M + l * norm v"
  using norm_blinfun[of "𝒫⇩₁ d"] norm_𝒫⇩₁ norm_r_dec_le
  by (auto intro!: norm_add_rule_thm mult_left_mono simp: L_def)

lemma abs_L_le: "¦L d v s¦ ≤ r⇩_M + l * norm v"
  using order.trans[OF norm_le_norm_bfun norm_L_le] by auto

subsubsection ‹Bellman Operator for Single Actions›
abbreviation "L⇩_a a v s ≡ r (s, a) + l * measure_pmf.expectation (K (s,a)) v"

lemma L⇩_a_le:
  fixes v :: "'s ⇒⇩_b real"
  shows "¦L⇩_a a v s¦ ≤ r⇩_M + l * norm v"
  using abs_r_le_r⇩_M
  by (fastforce intro: order_trans[OF abs_triangle_ineq] order_trans[OF integral_abs_bound]  
      add_mono mult_mono measure_pmf.integral_le_const abs_le_norm_bfun 
      simp: abs_mult)

lemma L⇩_a_bounded:
  "bounded (range (λa. L⇩_a a (apply_bfun v) s))"
  using L⇩_a_le by (auto intro!: boundedI)

lemma L⇩_a_int: 
  fixes d :: "'a pmf" and v :: "'s ⇒⇩_b real"
  shows "(∫a. L⇩_a a v s ∂d) = (∫a. r (s, a) ∂d) + l * ∫a. ∫s'. v s' ∂K (s, a) ∂d"
proof (subst Bochner_Integration.integral_add)
  show "integrable d (λa. r (s, a))"
    using abs_r_le_r⇩_M by (fastforce intro!: bounded_integrable simp: bounded_iff)
  show "integrable d (λa. l * ∫s'. v s' ∂K (s, a))"
    by (intro bounded_integrable) 
      (auto intro!: mult_mono order_trans[OF integral_abs_bound] boundedI[of _ "l * norm v"]
        measure_pmf.integral_le_const simp: abs_le_norm_bfun abs_mult)
qed auto

lemma L_eq_L⇩_a: "L d v s = measure_pmf.expectation (d s) (λa. L⇩_a a v s)"
  unfolding L⇩_a_int L_def K_st_def 𝒫⇩₁.rep_eq
  by (auto simp: measure_pmf_bind integral_measure_pmf_bind[where B = "norm v"] abs_le_norm_bfun)

lemma L_eq_L⇩_a_det: "L (mk_dec_det d) v s = L⇩_a (d s) v s"
  by (auto simp: L_eq_L⇩_a mk_dec_det_def)

lemma L⇩_a_eq_L: "measure_pmf.expectation p (λa. L⇩_a a (apply_bfun v) s) = 
  L (λt. if t = s then p else return_pmf (SOME a. a ∈ A t)) v s"
  unfolding L_eq_L⇩_a by auto

lemma L_le: "L d v s ≤ r⇩_M + l * norm v"
  unfolding L_def
  using norm_𝒫⇩₁ norm_blinfun[of "(𝒫⇩₁ d)"] abs_r_dec_le
  by (fastforce intro: order_trans[OF le_norm_bfun] add_mono mult_left_mono dest: abs_le_D1)

lemma L⇩_a_le': "L⇩_a a (apply_bfun v) s ≤ r⇩_M + l * norm v"
  using L⇩_a_le abs_le_D1 by blast


subsection ‹Optimality Equations›

definition "ℒ (v :: 's ⇒⇩_b real) s = (⨆d ∈ D⇩_R. L d v s)"

lemma ℒ_bfun: "ℒ v ∈ bfun"
  unfolding ℒ_def using abs_L_le ex_dec by (fastforce intro!: cSup_abs_le bfun_normI)

lift_definition ℒ⇩_b :: "('s ⇒⇩_b real) ⇒ 's ⇒⇩_b real" is ℒ
  using ℒ_bfun .

lemma L_bounded[simp, intro]: "bounded (range (λp. L p v s))"
  using abs_L_le by (auto intro!: boundedI)

lemma L_bounded'[simp, intro]: "bounded ((λp. L p v s) ` X)"
  by (auto intro: bounded_subset)

lemma L_bdd_above[simp, intro]: "bdd_above ((λp. L p v s) ` X)"
  by (auto intro: bounded_imp_bdd_above)

lemma L_le_ℒ⇩_b: "is_dec d ⟹ L d v ≤ ℒ⇩_b v"
  by (fastforce simp: ℒ⇩_b.rep_eq ℒ_def intro!: cSUP_upper)

subsubsection ‹Equivalences involving @{const ℒ⇩_b}›

lemma SUP_step_MR_eq:
  "ℒ v s = (⨆pa ∈ {pa. set_pmf pa ⊆ A s}. (∫a. L⇩_a a v s ∂measure_pmf pa))"
  unfolding ℒ_def
proof (intro antisym)
  show "(⨆d∈D⇩_R. L d v s) ≤ (⨆pa ∈ {pa. set_pmf pa ⊆ A s}. ∫a. L⇩_a a v s ∂measure_pmf pa)"
  proof (rule cSUP_mono)
    show "D⇩_R ≠ {}"
      using D⇩_R_ne .
  next show "bdd_above ((λpa. ∫a. L⇩_a a v s ∂measure_pmf pa) ` {pa. set_pmf pa ⊆ A s})"
      using L⇩_a_bounded L⇩_a_le
      by (auto intro!: order_trans[OF integral_abs_bound] 
          bounded_imp_bdd_above boundedI[where B = "r⇩_M + l * norm v"] 
          measure_pmf.integral_le_const bounded_integrable)
  next show "∃m∈{pa. set_pmf pa ⊆ A s}. L n v s ≤ ∫a. L⇩_a a v s ∂measure_pmf m" if "n ∈ D⇩_R" for n
      using that
      by (fastforce simp: L_eq_L⇩_a L⇩_a_int is_dec_def)
  qed
next
  have aux: "{pa. set_pmf pa ⊆ A s} ≠ {}"
    using D⇩_R_ne is_dec_def by auto
  show "(⨆pa∈{pa. set_pmf pa ⊆ A s}. ∫a. L⇩_a a v s ∂measure_pmf pa) ≤ (⨆d∈D⇩_R. L d v s)"
  proof (intro cSUP_least[OF aux] cSUP_upper2)
    fix n 
    assume h: "n ∈ {pa. set_pmf pa ⊆ A s}"
    let ?p = "(λs'. if s = s' then n else SOME a. set_pmf a ⊆ A s')"
    have aux: "∃a. set_pmf a ⊆ A sa" for sa
      using ex_dec is_dec_def by blast
    show "?p ∈ D⇩_R"
      unfolding is_dec_def using h someI_ex[OF aux] by auto
    thus "(∫a. L⇩_a a v s ∂n) ≤ L ?p v s"
      by (auto simp: L_eq_L⇩_a)
    show "bdd_above ((λd. L d v s) ` D⇩_R)"
      by (fastforce intro!: bounded_imp_bdd_above simp: bounded_def)
  next
  qed
qed

lemma ℒ⇩_b_eq_SUP_L⇩_a: "ℒ⇩_b v s = (⨆p ∈ {p. set_pmf p ⊆ A s}. ∫a. L⇩_a a v s ∂measure_pmf p)"
  using SUP_step_MR_eq ℒ⇩_b.rep_eq by presburger

lemma SUP_step_det_eq: "(⨆d ∈ D⇩_D. L (mk_dec_det d) v s) = (⨆a ∈ A s. L⇩_a a v s)"
proof (intro antisym cSUP_mono)
  show "bdd_above ((λa. L⇩_a a v s) ` A s)"
    using L⇩_a_bounded by (fastforce intro!: bounded_imp_bdd_above simp: bounded_def)
  show "bdd_above ((λd. L (mk_dec_det d) v s) ` D⇩_D)"
    by (auto intro!: bounded_imp_bdd_above boundedI abs_L_le)
  show "∃m∈A s. L (mk_dec_det n) v s ≤ L⇩_a m v s" if "n ∈ D⇩_D" for n
    using that is_dec_det_def by (auto simp: L_eq_L⇩_a_det intro: bexI[of _ "n s"])
  show "∃m∈D⇩_D. L⇩_a n v s ≤ L (mk_dec_det m) v s" if "n ∈ A s" for n
    using that A_ne
    by (fastforce simp: L_eq_L⇩_a_det is_dec_det_def some_in_eq
        intro!: bexI[of _ "λs'. if s = s' then _ else SOME a. a ∈ A s'"])
qed (auto simp: A_ne)

lemma integrable_L⇩_a: "integrable (measure_pmf x) (λa. L⇩_a a (apply_bfun v) s)"
proof (intro Bochner_Integration.integrable_add integrable_mult_right)
  show "integrable (measure_pmf x) (λx. r (s, x))"
    using abs_r_le_r⇩_M 
    by (auto intro: measure_pmf.integrable_const_bound[of _ "r⇩_M"])
next
  show "integrable (measure_pmf x) (λx. measure_pmf.expectation (K (s, x)) v)"
    by (auto intro!: bounded_integrable boundedI order.trans[OF integral_abs_bound] 
        measure_pmf.integral_le_const abs_le_norm_bfun)
qed

lemma SUP_L⇩_a_eq_det:
  fixes v :: "'s ⇒⇩_b real"
  shows "(⨆p∈{p. set_pmf p ⊆ A s}. ∫a. L⇩_a a v s ∂measure_pmf p) = (⨆a∈A s. L⇩_a a v s)"
proof (intro antisym)
  show "(⨆pa∈{pa. set_pmf pa ⊆ A s}. measure_pmf.expectation pa (λa. L⇩_a a v s))
    ≤ (⨆a∈A s. L⇩_a a v s)"
    using ex_dec is_dec_def integrable_L⇩_a A_ne L⇩_a_bounded
    by (fastforce intro: bounded_range_subset intro!: cSUP_least lemma_4_3_1)
  show "(⨆a∈A s. L⇩_a a v s) ≤ (⨆p∈{p. set_pmf p ⊆ A s}. ∫a. L⇩_a a v s ∂measure_pmf p)"
    unfolding SUP_step_MR_eq[symmetric] SUP_step_det_eq[symmetric] ℒ_def
    using ex_dec_det by (fastforce intro!: cSUP_mono)
qed

lemma ℒ_eq_SUP_det: "ℒ v s = (⨆d ∈ D⇩_D. L (mk_dec_det d) v s)"
  using SUP_step_MR_eq SUP_step_det_eq SUP_L⇩_a_eq_det by auto

lemma ℒ⇩_b_eq_SUP_det: "ℒ⇩_b v s = (⨆d ∈ D⇩_D. L (mk_dec_det d) v s)"
  using ℒ_eq_SUP_det unfolding ℒ⇩_b.rep_eq by auto


subsection ‹Monotonicity›

lemma 𝒫⇩_X_mono[intro]: "a ≤ b ⟹ 𝒫⇩_X p n a ≤ 𝒫⇩_X p n b"
  by (fastforce simp: 𝒫⇩_X_def intro: integral_mono)

lemma 𝒫⇩₁_mono[intro]: "a ≤ b ⟹ 𝒫⇩₁ p a ≤ 𝒫⇩₁ p b"
  using 𝒫⇩₁_nonneg by auto

lemma L_mono[intro]: "u ≤ v ⟹ L d u ≤ L d v"
  unfolding L_def by (auto intro: scaleR_left_mono)

lemma ℒ⇩_b_mono[intro]: "u ≤ v ⟹ ℒ⇩_b u ≤ ℒ⇩_b v"
  using  ex_dec L_mono[of u v] 
  by (fastforce intro!: cSUP_mono simp: ℒ⇩_b.rep_eq ℒ_def)

lemma step_mono:
  assumes "ℒ⇩_b v ≤ v" "d ∈ D⇩_R"
  shows "L d v ≤ v"
  using assms L_le_ℒ⇩_b order.trans by blast

lemma step_mono_elem_det:
  assumes "v ≤ ℒ⇩_b v" "e > 0"
  shows "∃d∈D⇩_D. v ≤ L (mk_dec_det d) v + e *⇩_R 1"
proof -
  have "v s ≤ (⨆a∈A s. L⇩_a a v s)" for s
    using SUP_step_det_eq ℒ⇩_b_eq_SUP_det assms(1) by fastforce
  hence "∃a∈A s. v s - e < L⇩_a a v s" for s
    using A_ne L⇩_a_le'
    by (subst less_cSUP_iff[symmetric]) (fastforce simp: assms add_strict_increasing algebra_simps intro!: bdd_above.I2)+
  hence aux: "∃a∈A s. v s ≤ L⇩_a a v s + e" for s
    by (auto simp: diff_less_eq intro: less_imp_le)
  then obtain d where "is_dec_det d" "v s ≤ L (mk_dec_det d) v s + e" for s
    by (metis L_eq_L⇩_a_det is_dec_det_def)
  thus ?thesis
    by fastforce
qed

lemma step_mono_elem:
  assumes "v ≤ ℒ⇩_b v" "e > 0"
  shows "∃d∈D⇩_R. v ≤ L d v + e *⇩_R 1"
  using assms step_mono_elem_det by blast

lemma 𝒫⇩_X_L_le:
  assumes "ℒ⇩_b v ≤ v" "p ∈ Π⇩_M⇩_R"
  shows "𝒫⇩_X p n (L (p n) v) ≤ 𝒫⇩_X p n v"
  using assms step_mono by auto

end

locale MDP_reward_disc = MDP_reward A K r l
  for
    A and 
    K :: "'s ::countable × 'a ::countable ⇒ 's pmf" and
    r l +
  assumes
    disc_lt_one [simp]: "l < 1"
begin

definition "is_opt_act v s = is_arg_max (λa. L⇩_a a v s) (λa. a ∈ A s)"
abbreviation "opt_acts v s ≡ {a. is_opt_act v s a}"

lemma summable_disc [intro, simp]: "summable (λi. l ^ i * x)"
  by (simp add: mult.commute)

lemma summable_r_disc[intro, simp]:
  "summable (λi. ¦l ^ i * r (sa i)¦)"
  "summable (λi. l ^ i * ¦r (sa i)¦)"
  "summable (λi. l ^ i * r (sa i))"
proof -
  show "summable (λi. ¦l ^ i * r (sa i)¦)"
    using abs_r_le_r⇩_M
    by (fastforce intro!: mult_left_mono summable_comparison_test'[OF summable_disc])
  thus "summable (λi. l ^ i * r (sa i))" "summable (λi. l ^ i * ¦r (sa i)¦)"
    by (auto intro: summable_rabs_cancel)
qed

lemma summable_norm_disc_I[intro]:
  assumes "summable (λt. (l^t * norm F))"
  shows "summable (λt. norm (l^t *⇩_R F))"
  using assms by auto

lemma summable_norm_disc_I'[intro]:
  assumes "summable (λt. (l^t * norm (F t)))"
  shows "summable (λt. norm (l^t *⇩_R F t))"
  using assms by auto

lemma summable_discI [intro]:
  assumes "bounded (range F)"
  shows "summable (λt. l^t * norm (F t))"
proof -
  obtain b where "norm (F x) ≤ b" for x
    using assms by (auto simp: bounded_iff)
  thus ?thesis
    using Abel_lemma[of l 1 F b] by (auto simp: mult.commute)
qed

lemma summable_disc_reward [intro]:
  assumes "bounded (range (F :: nat ⇒ 'b :: banach))"
  shows "summable (λt. l^t *⇩_R (F t))"
  using assms by (auto intro: summable_norm_cancel)

lemma summable_norm_bfun_disc: "summable (λt. l^t * norm (apply_bfun f t))"
  using norm_le_norm_bfun
  by (auto simp: mult.commute[of "l^_"] intro!: Abel_lemma[of _ 1 _ "norm f"])

lemma summable_bfun_disc [simp]: "summable (λt. l^t * (apply_bfun f t))"
proof -
  have "norm (l^t * apply_bfun f t) = l^t * norm (apply_bfun f t)" for t
    by (auto simp: abs_mult)
  hence "summable (λt. norm (l^t * (apply_bfun f t)))"
    by (auto simp only: abs_mult)
  thus ?thesis
    by (auto intro: summable_norm_cancel)
qed

lemma norm_bfun_disc_le: "norm f ≤ B ⟹ (∑x. l^x * norm (apply_bfun f x)) ≤ (∑x. l^x * B)"
  by (fastforce intro!: suminf_le mult_left_mono norm_le_norm_bfun intro: order.trans)

lemma norm_bfun_disc_le': "norm f ≤ B ⟹ (∑x. l^x * (apply_bfun f x)) ≤ (∑x. l^x * B)"
  by (auto simp: mult_left_mono intro!: suminf_le order.trans[OF _ norm_bfun_disc_le])

lemma sum_disc_lim_l: "(∑x. l^x * B) = B /(1-l)"
  by (simp add: suminf_mult2[symmetric] summable_geometric suminf_geometric[of l])

lemma sum_disc_bound: "(∑x. l^x * apply_bfun f x) ≤ (norm f) /(1-l)"
  using norm_bfun_disc_le' sum_disc_lim  by auto

lemma sum_disc_bound':
  fixes f :: "nat ⇒ 'b ⇒⇩_b real"
  assumes h: "∀n. norm (f n) ≤ B"
  shows "norm (∑x. l^x *⇩_R f x) ≤ B /(1-l)"
proof -
  have "norm (∑x. l^x *⇩_R f x) ≤  (∑x. norm (l^x *⇩_R f x))"
    using h
    by (fastforce intro!: boundedI summable_norm)
  also have "… ≤ (∑x. l^x * B)"
    using h
    by (auto intro!: suminf_le boundedI simp: mult_mono')
  also have "… = B /(1-l)"
    by (simp add: sum_disc_lim)
  finally show "norm (∑x. l^x *⇩_R f x) ≤ B /(1-l)" .
qed


lemma abs_ν_trace_le: "¦ν_trace t¦ ≤ (∑i. l ^ i * r⇩_M)"
  by (auto intro!: abs_r_le_r⇩_M mult_left_mono order_trans[OF summable_rabs] suminf_le)

lemma integrable_ν_trace: "integrable (𝒯 p s) ν_trace"
  by (fastforce simp: bounded_iff intro: abs_ν_trace_le)

context 
  fixes p :: "('s, 'a) pol"
begin

lemma ν_eq_ν_trace: "ν p s = ∫t. ν_trace t ∂𝒯 p s"
proof -
  have "(λn. ν_fin p n s) ⇢ ∫t. ν_trace t ∂𝒯 p s"
    unfolding ν_fin_def
  proof(intro integral_dominated_convergence)
    show "AE x in 𝒯 p s. ν_trace_fin x ⇢ ν_trace x"
      using summable_LIMSEQ by blast
  next
    have "(∑i<N. l ^ i * r⇩_M) ≤ (∑N. l ^ N * r⇩_M)" for N
      by (auto intro: sum_le_suminf simp: r⇩_M_nonneg)
    thus "AE x in 𝒯 p s. norm (ν_trace_fin x N) ≤ (∑N. l ^ N * r⇩_M)" for N
      using order_trans[OF abs_ν_trace_fin_le] by fastforce
  qed auto
  thus ?thesis
    using ν_eq_lim limI by fastforce
qed

lemma abs_ν_le: "¦ν p s¦ ≤ (∑i. l^i * r⇩_M)"
  unfolding ν_eq_Pn
  using abs_exp_r_le
  by (fastforce intro!: order.trans[OF summable_rabs] suminf_le summable_comparison_test'[OF summable_disc] mult_left_mono)

lemma ν_le: "ν p s ≤ (∑i. l^i * r⇩_M)"
  by (auto intro: abs_ν_le abs_le_D1)

(* 6.1.2 in Puterman *)
lemma ν_bfun: "ν p ∈ bfun"
  by (auto intro!: abs_ν_le)

lift_definition ν⇩_b :: "'s ⇒⇩_b real" is "ν p"
  using ν_bfun by blast

lemma norm_ν_le: "norm ν⇩_b ≤ r⇩_M / (1-l)"
  using abs_ν_le sum_disc_lim
  by (auto simp: ν⇩_b.rep_eq norm_bfun_def' intro: cSUP_least)
end

lemma ν_as_markovian: "ν (mk_markovian (as_markovian p (return_pmf s))) s = ν p s"
  by (auto simp: ν_eq_Pn Pn_as_markovian_eq Pn'_def)

lemma ν⇩_b_as_markovian: "ν⇩_b (mk_markovian (as_markovian p (return_pmf s))) s = ν⇩_b p s"
  using ν_as_markovian by (auto simp: ν⇩_b.rep_eq)

subsection ‹Optimal Reward›

definition "ν_MD s ≡ ⨆p ∈ Π⇩_M⇩_D. ν (mk_markovian_det p) s"
definition "ν_opt s ≡ ⨆p ∈ Π⇩_H⇩_R. ν p s"

lemma ν_opt_bfun: "ν_opt ∈ bfun"
  using abs_ν_le policies_ne 
  by (fastforce simp: ν_opt_def intro!: order_trans[OF cSup_abs_le] bfun_normI)

lift_definition ν⇩_b_opt :: "'s ⇒⇩_b real" is ν_opt
  using ν_opt_bfun .

lemma ν⇩_b_opt_eq: "ν⇩_b_opt s = (⨆p ∈ Π⇩_H⇩_R. ν⇩_b p s)"
  using ν⇩_b.rep_eq ν⇩_b_opt.rep_eq ν_opt_def by presburger

lemma ν_le_ν_opt [intro]:
  assumes "is_policy p"
  shows "ν p s ≤ ν_opt s"
  unfolding ν_opt_def using abs_ν_le assms
  by (force intro: cSUP_upper intro!: bounded_imp_bdd_above boundedI)

lemma ν⇩_b_le_opt [intro]: "p ∈ Π⇩_H⇩_R ⟹ ν⇩_b p ≤ ν⇩_b_opt"
  using ν_le by (fastforce simp: ν⇩_b.rep_eq ν⇩_b_opt.rep_eq)

lemma ν⇩_b_le_opt_MD [intro]: "p ∈ Π⇩_M⇩_D ⟹ ν⇩_b (mk_markovian_det p) ≤ ν⇩_b_opt"
  by (auto simp: mk_markovian_det_def is_dec_det_def is_dec_def is_policy_def)

lemma ν⇩_b_le_opt_DD [intro]: "is_dec_det d ⟹ ν⇩_b (mk_stationary_det d) ≤ ν⇩_b_opt"
  by (auto simp add: is_policy_def mk_markovian_def)

lemma ν⇩_b_le_opt_DR [intro]: "is_dec d ⟹ ν⇩_b (mk_stationary d) ≤ ν⇩_b_opt"
  by (auto simp add: is_policy_def mk_markovian_def)

lemma ν⇩_b_opt_eq_MR: "ν⇩_b_opt s = (⨆p ∈ Π⇩_M⇩_R. ν⇩_b (mk_markovian p) s)"
proof (rule antisym)
  show "ν⇩_b_opt s ≤ (⨆p∈Π⇩_M⇩_R. ν⇩_b (mk_markovian p) s)"
    unfolding ν⇩_b_opt_eq
  proof (rule cSUP_mono)
    show "Π⇩_H⇩_R ≠ {}"
      using policies_ne by simp
    show "bdd_above ((λp. ν⇩_b (mk_markovian p) s) ` Π⇩_M⇩_R)"
      by (auto intro!: boundedI bounded_imp_bdd_above abs_ν_le simp: ν⇩_b.rep_eq) 
    show "n ∈ Π⇩_H⇩_R ⟹ ∃m∈Π⇩_M⇩_R. ν⇩_b n s ≤ ν⇩_b (mk_markovian m) s" for n
      using is_Π⇩_M⇩_R_as_markovian by (subst ν⇩_b_as_markovian[symmetric]) fastforce     
  qed
  show "(⨆p∈Π⇩_M⇩_R. ν⇩_b (mk_markovian p) s) ≤ ν⇩_b_opt s"
    using Π⇩_M⇩_R_ne Π⇩_M⇩_R_imp_policies 
    by (auto intro!: cSUP_mono bounded_imp_bdd_above boundedI abs_ν_le simp: ν⇩_b_opt_eq  ν⇩_b.rep_eq)
qed

lemma summable_norm_disc_reward'[simp]: "summable (λt. l^t * norm (𝒫⇩_X p t (r_dec⇩_b (p t))))"
  using 𝒫⇩_X_bounded_r by auto

lemma summable_disc_reward_𝒫⇩_X [simp]: "summable (λt. l^t *⇩_R 𝒫⇩_X p t (r_dec⇩_b (p t)))"
  using summable_disc_reward 𝒫⇩_X_bounded_r by blast

lemma disc_reward_tendsto:
  "(λn. ∑t<n. l^t *⇩_R 𝒫⇩_X p t (r_dec⇩_b (p t))) ⇢ (∑t. l^t *⇩_R 𝒫⇩_X p t (r_dec⇩_b (p t)))"
  by (simp add: summable_LIMSEQ)

lemma ν_eq_𝒫⇩_X: "ν (mk_markovian p) = (∑i. l^i *⇩_R 𝒫⇩_X p i (r_dec⇩_b (p i)))"
proof -
  have "ν (mk_markovian p) s = (∑i. l^i * 𝒫⇩_X p i (r_dec⇩_b (p i)) s)" for s
    unfolding ν⇩_b.rep_eq 𝒫⇩_X_def ν_eq_Pn Pn'_markovian_eq_Xn'_bind measure_pmf_bind
    using measure_pmf_in_subprob_algebra abs_r_le_r⇩_M
    by (subst integral_bind) (auto simp: r_dec_eq_r_K0)
  thus ?thesis
    by (auto simp: suminf_apply_bfun)
qed

lemma ν⇩_b_eq_𝒫⇩_X: "ν⇩_b (mk_markovian p) = (∑i. l^i *⇩_R 𝒫⇩_X p i (r_dec⇩_b (p i)))"
  by (auto simp: ν_eq_𝒫⇩_X ν⇩_b.rep_eq)

lemma ν⇩_b_fin_tendsto_ν⇩_b: "(ν⇩_b_fin (mk_markovian p)) ⇢ ν⇩_b (mk_markovian p)"
  using disc_reward_tendsto ν⇩_b_eq_𝒫⇩_X ν⇩_b_fin_eq_𝒫⇩_X
  by presburger

lemma norm_𝒫⇩₁_l_less: "norm (l *⇩_R 𝒫⇩₁ d) < 1"
  by auto
lemma disc_𝒫⇩₁_tendsto: "(λn. (∑t≤n. l^t *⇩_R 𝒫⇩₁ d ^^t)) ⇢ (∑t. l^t *⇩_R 𝒫⇩₁ d ^^t)"
  by (fastforce simp: bounded_iff intro: summable_LIMSEQ')

lemma disc_𝒫⇩₁_lim: "lim (λn. (∑t≤n. l^t *⇩_R 𝒫⇩₁ d ^^ t)) = (∑t. l^t *⇩_R 𝒫⇩₁ d ^^t)"
  using limI disc_𝒫⇩₁_tendsto
  by blast

lemma convergent_disc_𝒫⇩₁: "convergent (λn. (∑t≤n. l^t *⇩_R 𝒫⇩₁ d ^^t))"
  using convergentI disc_𝒫⇩₁_tendsto 
  by blast

lemma 𝒫⇩₁_suminf_ge: 
  assumes "0 ≤ u" shows "u ≤ (∑t. l^t *⇩_R 𝒫⇩₁ d ^^t) u"
proof -
  have aux: "⋀x. (λn. (∑t≤n. l^t *⇩_R 𝒫⇩₁ d ^^t) u x) ⇢ (∑t. l^t *⇩_R 𝒫⇩₁ d ^^t) u x"
    using bfun_tendsto_apply_bfun disc_𝒫⇩₁_lim lim_blinfun_apply[OF convergent_disc_𝒫⇩₁] 
    by fastforce
  have "⋀n. u ≤ (∑t≤n. l^t *⇩_R 𝒫⇩₁ d ^^t) u"
    using 𝒫⇩₁_sum_ge[OF assms] by auto
  thus ?thesis
    by (auto intro!: LIMSEQ_le_const[OF aux])
qed

lemma 𝒫⇩₁_suminf_pos: 
  assumes "0 ≤ u" 
  shows "0 ≤ (∑t. l^t *⇩_R 𝒫⇩₁ d ^^t) u"
  using 𝒫⇩₁_suminf_ge[of u] assms order.trans by auto

lemma lemma_6_1_2_b:
  assumes "v ≤ u"
  shows "(∑t. l^t *⇩_R 𝒫⇩₁ d ^^t) v ≤ (∑t. l^t *⇩_R 𝒫⇩₁ d ^^t) u"
proof -
  have "0 ≤ (∑n. l ^ n *⇩_R 𝒫⇩₁ d ^^ n) (u - v)"
    using 𝒫⇩₁_suminf_pos assms by simp
  thus ?thesis
    by (simp add: blinfun.diff_right)
qed

lemma ν_stationary: "ν⇩_b (mk_stationary d) = (∑t. l^t *⇩_R (𝒫⇩₁ d ^^ t)) (r_dec⇩_b d)"
proof -
  have "ν⇩_b (mk_stationary d) = (∑t. (l ^ t *⇩_R (𝒫⇩₁ d ^^ t)) (r_dec⇩_b d))"
    by (simp add: ν⇩_b_eq_𝒫⇩_X scaleR_blinfun.rep_eq 𝒫⇩_X_sconst)
  also have "...  = (∑t. (l ^ t *⇩_R (𝒫⇩₁ d ^^ t))) (r_dec⇩_b d)"
    by (subst bounded_linear.suminf[where f = "λx. blinfun_apply x (r_dec⇩_b d)"]) 
      (auto intro!: bounded_linear.suminf boundedI)
  finally show ?thesis .
qed

lemma ν_stationary_inv: "ν⇩_b (mk_stationary d) = inv⇩_L (id_blinfun - l *⇩_R 𝒫⇩₁ d) (r_dec⇩_b d)"
  by (auto simp: ν_stationary inv⇩_L_inf_sum blincomp_scaleR_right)


text ‹The value of a markovian policy can be expressed in terms of @{const L}.›

lemma ν_step: "ν⇩_b (mk_markovian p) = L (p 0) (ν⇩_b (mk_markovian (λn. p (Suc n))))"
proof -
  have s: "summable (λt. l^t *⇩_R (𝒫⇩_X p (Suc t) (r_dec⇩_b (p (Suc t)))))"
    using 𝒫⇩_X_bound_r by (auto intro!: boundedI[of _ r⇩_M])
  have 
    "ν⇩_b (mk_markovian p) = r_dec⇩_b (p 0) + (∑t. l ^ (Suc t) *⇩_R 𝒫⇩_X p (Suc t) (r_dec⇩_b (p (Suc t))))"
    by (subst suminf_split_head) (auto simp: ν⇩_b_eq_𝒫⇩_X)
  also have 
    "… = r_dec⇩_b (p 0) + l *⇩_R (∑t. 𝒫⇩₁ (p 0) (l^t *⇩_R 𝒫⇩_X (λn. p (Suc n)) t (r_dec⇩_b (p (Suc t)))))"
    using suminf_scaleR_right[OF s] by (auto simp: 𝒫⇩_X_Suc blinfun.scaleR_right)
  also have 
    "… = L (p 0) (ν⇩_b (mk_markovian (λn. p (Suc n))))"
    using blinfun.bounded_linear_right bounded_linear.suminf[of "blinfun_apply (𝒫⇩₁ (p 0))"]
    by (fastforce simp add: ν⇩_b_eq_𝒫⇩_X L_def)
  finally show ?thesis .
qed

lemma L_ν_fix: "ν⇩_b (mk_stationary d) = L d (ν⇩_b (mk_stationary d))"
  using ν_step .

lemma L_fix_ν:
  assumes "L p v = v"
  shows "v = ν⇩_b (mk_stationary p)"
proof -
  have "r_dec⇩_b p = (id_blinfun - l *⇩_R 𝒫⇩₁ p) v"
    using assms by (auto simp: eq_diff_eq L_def blinfun.diff_left blinfun.scaleR_left)
  hence "v = (∑t. (l *⇩_R 𝒫⇩₁ p)^^t) (r_dec⇩_b p)"
    using inv_norm_le'(2)[OF norm_𝒫⇩₁_l_less] by auto
  thus "v = ν⇩_b (mk_stationary p)"
    by (auto simp: ν_stationary blincomp_scaleR_right)
qed

lemma L_ν_fix_iff: "L d v = v ⟷ v = ν⇩_b (mk_stationary d)"
  using L_fix_ν L_ν_fix by auto

subsection ‹Properties of Solutions of the Optimality Equations›

abbreviation "𝒫⇩_d p n v ≡ l^n *⇩_R 𝒫⇩_X p n v"

lemma 𝒫⇩_d_lim: "(λn. (𝒫⇩_d p n v)) ⇢ 0"
proof -
  have "(λn. l^n * norm v) ⇢ 0"
    by (auto intro!: tendsto_eq_intros)
  moreover have "norm (𝒫⇩_d p n v) ≤ l^n * norm v" for p n
    by (simp add: mult_mono')
  ultimately have "(λn. norm (𝒫⇩_d p n v)) ⇢ 0" for p
    by (auto simp: Lim_transform_bound[where g = "λn. (l^n * norm v)"])
  thus "(λn. (𝒫⇩_d p n v)) ⇢ 0" for p
    using tendsto_norm_zero_cancel by fast
qed



(* 6.2.2 a) in Puterman *)

lemma ℒ_dec_ge_opt:
  assumes "ℒ⇩_b v ≤ v"
  shows "ν⇩_b_opt ≤ v"
proof -
  have "ν⇩_b (mk_markovian p) ≤ v" if "p ∈ Π⇩_M⇩_R" for p
  proof -
    let ?p = "mk_markovian p"
    have aux: "ν⇩_b_fin ?p n + l^n *⇩_R 𝒫⇩_X p n v ≤ v" for n
    proof (induction n)
      case (Suc n)
      have "𝒫⇩_X p n (r_dec⇩_b (p n)) + l *⇩_R (𝒫⇩_X p (Suc n) v) ≤ 𝒫⇩_X p n v"
        using 𝒫⇩_X_L_le assms that by (simp add: 𝒫⇩_X_Suc_n_elem L_def linear_simps)
      hence "ν⇩_b_fin ?p (n + 1) + l^(n + 1) *⇩_R (𝒫⇩_X p (n + 1) v) ≤ ν⇩_b_fin ?p n + l^n *⇩_R (𝒫⇩_X p n v)"
        by (auto simp del: scaleR_scaleR intro: scaleR_left_mono simp: ν⇩_b_fin_eq_𝒫⇩_X 
            mult.commute[of l] scaleR_add_right[symmetric] scaleR_scaleR[symmetric])
      also have "… ≤ v"
        using Suc.IH by (auto simp: ν⇩_b_fin_eq_𝒫⇩_X)
      finally show ?case
        by auto
    qed (auto simp: ν⇩_b_fin_eq_𝒫⇩_X)
    have 1: "(λn. (ν⇩_b_fin ?p n + 𝒫⇩_d p n v) s) ⇢ ν⇩_b ?p s" for s
      using bfun_tendsto_apply_bfun Limits.tendsto_add[OF ν⇩_b_fin_tendsto_ν⇩_b 𝒫⇩_d_lim] by fastforce
    have "ν⇩_b ?p s ≤ v s" for s
      using that aux assms by (fastforce intro!: lim_mono[OF _ 1, of  _ _ "λn. v s"])
    thus ?thesis
      using that by blast
  qed
  thus ?thesis
    using policies_ne by (fastforce simp: is_policy_def ν⇩_b_opt_eq_MR intro!: cSUP_least)
qed

lemma ℒ_inc_le_opt:
  assumes "v ≤ ℒ⇩_b v"
  shows "v ≤ ν⇩_b_opt"
proof -
  have le_elem: "v s ≤ ν⇩_b_opt s + (e/(1-l))" if "e > 0" for s e
  proof -
    obtain d where "d ∈ D⇩_R" and hd: "v ≤ L d v + e *⇩_R 1"
      using assms step_mono_elem ‹e > 0› by blast
    let ?Pinf = "(∑i. l^i *⇩_R 𝒫⇩₁ d^^i)"
    have "v ≤ r_dec⇩_b d + l *⇩_R (𝒫⇩₁ d) v + e *⇩_R 1"
      using hd L_def by fastforce
    hence "(id_blinfun - l *⇩_R 𝒫⇩₁ d) v ≤ r_dec⇩_b d + e *⇩_R 1"
      by (auto simp: blinfun.diff_left blinfun.scaleR_left algebra_simps)
    hence "?Pinf ((id_blinfun - l *⇩_R 𝒫⇩₁ d) v) ≤ ?Pinf (r_dec⇩_b d + e *⇩_R 1)"
      using lemma_6_1_2_b 𝒫⇩₁_def hd by auto
    hence "v ≤ ?Pinf (r_dec⇩_b d + e *⇩_R 1)"
      using inv_norm_le'(2)[of "l *⇩_R 𝒫⇩₁ d"] by (auto simp: blincomp_scaleR_right)
    also have "… = ν⇩_b (mk_stationary d) + e *⇩_R ?Pinf 1"
      by (simp add: ν_stationary blinfun.add_right blinfun.scaleR_right)
    also have "… = ν⇩_b (mk_stationary d) + e *⇩_R (∑i. (l^i *⇩_R ((𝒫⇩₁ d^^i))) 1)"
      using convergent_disc_𝒫⇩₁ 
      by (auto simp: summable_iff_convergent' bounded_linear.suminf[of "λx. blinfun_apply x 1"])
    also have "… = ν⇩_b (mk_stationary d) + e *⇩_R (∑i. (l^i *⇩_R 1))"
      by (auto simp: scaleR_blinfun.rep_eq)
    also have "… ≤ (ν⇩_b (mk_stationary d) + (e / (1-l)) *⇩_R  1)"
      by (auto simp: bounded_linear.suminf[symmetric, where f = "λx. x *⇩_R 1"] 
          suminf_geometric bounded_linear_scaleR_left summable_geometric)
    finally have "v s ≤ (ν⇩_b (mk_stationary d) + (e/(1-l)) *⇩_R  1) s"
      by auto
    thus "v s ≤ ν⇩_b_opt s + (e/(1-l))"
      using ‹d ∈ D⇩_R› ν⇩_b_le_opt
      by (auto simp: is_policy_def mk_markovian_def less_eq_bfun_def intro: order_trans)
  qed
  have "v s ≤ ν⇩_b_opt s + e" if "e > 0" for s e
  proof -
    have "e * (1 - l) > 0"
      by (simp add: ‹0 < e›)
    thus "v s ≤ ν⇩_b_opt s + e"
      using disc_lt_one that le_elem by (fastforce split: if_splits)
  qed
  thus ?thesis
    by (fastforce intro: field_le_epsilon)
qed    
lemma ℒ_fix_imp_opt:
  assumes "v = ℒ⇩_b v"
  shows "v = ν⇩_b_opt"
  using assms dual_order.antisym[OF ℒ_dec_ge_opt ℒ_inc_le_opt] by auto

lemma bounded_P: "bounded (𝒫⇩₁ ` X)"
  by (auto simp: bounded_iff)

subsection ‹Solutions to the Optimality Equation›
subsubsection ‹@{const ℒ⇩_b} and @{const L} are Contraction Mappings›
declare bounded_apply_blinfun[intro] bounded_apply_bfun'[intro]

lemma contraction_ℒ: "dist (ℒ⇩_b v) (ℒ⇩_b u) ≤ l * dist v u"
proof -
  have "dist (ℒ⇩_b v s) (ℒ⇩_b u s) ≤ l * dist v u" if "ℒ⇩_b u s ≤ ℒ⇩_b v s" for s v u
  proof -
    have "dist (ℒ⇩_b v s) (ℒ⇩_b u s) ≤ (⨆d ∈ D⇩_R. L d v s - L d u s)"
      using ex_dec that by (fastforce intro!: le_SUP_diff' simp: dist_real_def ℒ⇩_b.rep_eq ℒ_def)
    also have "… = (⨆d ∈ D⇩_R. l * (𝒫⇩₁ d (v - u) s))"
      by (auto simp: L_def right_diff_distrib blinfun.diff_right)
    also have "… = l * (⨆d ∈ D⇩_R. 𝒫⇩₁ d (v - u) s)"
      using D⇩_R_ne bounded_P by (fastforce intro: bounded_SUP_mul)
    also have "… ≤ l * norm (⨆d ∈ D⇩_R. 𝒫⇩₁ d (v - u) s)"
      by (simp add: mult_left_mono)
    also have "… ≤ l * (⨆d ∈ D⇩_R. norm ((𝒫⇩₁ d (v - u)) s))"
    proof -
      have "bounded ((λx. norm ((𝒫⇩₁ x (v - u)) s)) ` D⇩_R)"
        using bounded_apply_bfun' bounded_P bounded_apply_blinfun bounded_norm_comp by metis
      thus ?thesis
        using D⇩_R_ne ex_dec bounded_norm_comp by (fastforce intro!: mult_left_mono)
    qed
    also have "… ≤ l * (⨆p ∈ D⇩_R. norm (𝒫⇩₁ p ((v - u))))"
      using D⇩_R_ne abs_le_norm_bfun bounded_P
      by (fastforce simp: bounded_norm_comp intro!: bounded_imp_bdd_above mult_left_mono cSUP_mono)
    also have "… ≤ l * (⨆p ∈ D⇩_R. norm ((v - u)))"
      using norm_push_exp_le_norm D⇩_R_ne
      by (fastforce simp: 𝒫⇩₁.rep_eq intro!: mult_left_mono cSUP_mono)
    also have "… = l * dist v u"
      by (auto simp: dist_norm)
    finally show ?thesis .
  qed
  hence "ℒ⇩_b u s ≤ ℒ⇩_b v s ⟹ dist (ℒ⇩_b v s) (ℒ⇩_b u s) ≤ l * dist v u" 
    "ℒ⇩_b v s ≤ ℒ⇩_b u s ⟹ dist (ℒ⇩_b v s) (ℒ⇩_b u s) ≤ l * dist v u" for u v s
    by (fastforce simp: dist_commute)+
  thus ?thesis
    using linear[of "ℒ⇩_b u _"] by (fastforce intro: dist_bound)
qed

lemma is_contraction_ℒ: "is_contraction ℒ⇩_b"
  using contraction_ℒ zero_le_disc disc_lt_one unfolding is_contraction_def by blast

lemma contraction_L: "dist (L p v) (L p u) ≤ l * dist v u"
proof -
  have aux: "L p v s - L p u s ≤ l * dist v u" if lea: "L p v s ≥ L p u s" for v s u
  proof -
    have "L p v s - L p u s = (l *⇩_R  (𝒫⇩₁ p v - 𝒫⇩₁ p u)) s"
      by (simp add: L_def scale_right_diff_distrib)
    also have "… ≤ l * norm (𝒫⇩₁ p (v - u) s)"
      by (auto simp: blinfun.diff_right intro!: mult_left_mono)
    also have "… ≤ l * norm (𝒫⇩₁ p (v - u))"
      using abs_le_norm_bfun by (auto intro!: mult_left_mono)
    also have "… ≤ l * dist v u"
      by (simp add: 𝒫⇩₁.rep_eq mult_left_mono norm_push_exp_le_norm dist_norm)
    finally show ?thesis
      by auto
  qed
  have "dist (L p v s) (L p u s) ≤ l * dist v u" for v s u
    using aux[of v _ u] aux[of u _ v]
    by (cases "L p v s ≥ L p u s") (auto simp: dist_real_def dist_commute)
  thus "dist (L p v) (L p u) ≤ l * dist v u"
    by (simp add: dist_bound)
qed

lemma is_contraction_L: "is_contraction (L p)"
  unfolding is_contraction_def using contraction_L disc_lt_one zero_le_disc by blast

subsubsection ‹Existence of a Fixpoint of @{const ℒ⇩_b}›
lemma ℒ⇩_b_conv:
  "∃!v. ℒ⇩_b v = v" "(λn. (ℒ⇩_b ^^ n) v) ⇢ (THE v. ℒ⇩_b v = v)"
  using banach'[OF is_contraction_ℒ] by auto

lemma ℒ⇩_b_fix_iff_opt [simp]: "ℒ⇩_b v = v ⟷ v = ν⇩_b_opt"
  using banach'(1) is_contraction_ℒ ℒ_fix_imp_opt by metis

lemma ν⇩_b_opt_fix: "ν⇩_b_opt = (THE v. ℒ⇩_b v = v)"
  by auto

lemma ℒ⇩_b_opt [simp]: "ℒ⇩_b ν⇩_b_opt = ν⇩_b_opt"
  by auto

lemma ℒ⇩_b_lim: "(λn. (ℒ⇩_b ^^ n) v) ⇢ ν⇩_b_opt"
  using ℒ⇩_b_conv(2) ν⇩_b_opt_fix by presburger

lemma thm_6_2_6: "ν⇩_b p = ν⇩_b_opt ⟷ ℒ⇩_b (ν⇩_b p) = ν⇩_b p"
  by force

lemma thm_6_2_6': "ν p = ν_opt ⟷ ℒ⇩_b (ν⇩_b p) = ν⇩_b p"
  using thm_6_2_6 ν⇩_b.rep_eq ν⇩_b_opt.rep_eq by fastforce

subsection ‹Existence of Optimal Policies›

definition "ν_improving v d ⟷ (∀s. is_arg_max (λd. (L d v) s) (λd. d ∈ D⇩_R) d)"

lemma ν_improving_iff: "ν_improving v d ⟷ d ∈ D⇩_R ∧ (∀d' ∈ D⇩_R. ∀s. L d' v s ≤ L d v s)"
  by (auto simp: ν_improving_def is_arg_max_linorder)

lemma ν_improving_D_MR[dest]: "ν_improving v d ⟹ d ∈ D⇩_R"
  by (auto simp add: ν_improving_iff)

lemma ν_improving_ge: "ν_improving v d ⟹ d' ∈ D⇩_R ⟹ L d' v s ≤ L d v s"
  by (auto simp: ν_improving_iff)

lemma ν_improving_imp_ℒ⇩_b: "ν_improving v d ⟹ ℒ⇩_b v = L d v"
  by (fastforce intro!: cSup_eq_maximum simp: ν_improving_iff ℒ⇩_b.rep_eq ℒ_def)

lemma ℒ⇩_b_imp_ν_improving: 
  assumes "d ∈ D⇩_R" "ℒ⇩_b v = L d v"
  shows "ν_improving v d"
  using assms L_le_ℒ⇩_b by (auto simp: ν_improving_iff assms(2)[symmetric])

lemma ν_improving_alt:
  assumes "d ∈ D⇩_R"
  shows "ν_improving v d ⟷ ℒ⇩_b v = L d v"
  using ℒ⇩_b_imp_ν_improving ν_improving_imp_ℒ⇩_b assms by blast

definition "ν_conserving d = ν_improving (ν⇩_b_opt) d"

lemma ν_conserving_iff: "ν_conserving d ⟷ d ∈ D⇩_R ∧ (∀d' ∈ D⇩_R. ∀s. L d' ν⇩_b_opt s ≤ L d ν⇩_b_opt s)"
  by (auto simp: ν_conserving_def ν_improving_iff)

lemma ν_conserving_ge: "ν_conserving d ⟹ d' ∈ D⇩_R ⟹ L d' ν⇩_b_opt s ≤ L d ν⇩_b_opt s"
  by (auto simp: ν_conserving_iff intro: ν_improving_ge)

lemma ν_conserving_imp_ℒ⇩_b [simp]: "ν_conserving d ⟹ L d ν⇩_b_opt = ν⇩_b_opt"
  using ν_improving_imp_ℒ⇩_b by (fastforce simp: ν_conserving_def)

lemma ℒ⇩_b_imp_ν_conserving:
  assumes "d ∈ D⇩_R" "ℒ⇩_b ν⇩_b_opt = L d ν⇩_b_opt"
  shows "ν_conserving d"
  using ℒ⇩_b_imp_ν_improving assms by (auto simp: ν_conserving_def)

lemma ν_conserving_alt: 
  assumes "d ∈ D⇩_R"
  shows "ν_conserving d ⟷ ℒ⇩_b ν⇩_b_opt = L d ν⇩_b_opt"
  unfolding ν_conserving_def using ν_improving_alt assms by auto

lemma ν_conserving_alt':
  assumes "d ∈ D⇩_R"
  shows "ν_conserving d ⟷ L d ν⇩_b_opt = ν⇩_b_opt"
  using assms ν_conserving_alt by auto

subsubsection ‹Conserving Decision Rules are Optimal›

theorem ex_improving_imp_conserving:
  assumes "⋀v. ∃d. ν_improving v (mk_dec_det d)"
  shows "∃d. ν_conserving (mk_dec_det d)"
  by (simp add: assms ν_conserving_def)

theorem conserving_imp_opt[simp]:
  assumes "ν_conserving (mk_dec_det d)"
  shows "ν⇩_b (mk_stationary_det d) = ν⇩_b_opt"
  using L_ν_fix_iff ν_conserving_imp_ℒ⇩_b[OF assms] by simp

lemma conserving_imp_opt':
  assumes "∃d. ν_conserving (mk_dec_det d)"
  shows "∃d ∈ D⇩_D. (ν⇩_b (mk_stationary_det d)) = ν⇩_b_opt"
  using assms by (fastforce simp: ν_conserving_def)

theorem improving_att_imp_det_opt:
  assumes "⋀v. ∃d. ν_improving v (mk_dec_det d)"
  shows "ν⇩_b_opt s = (⨆d ∈ D⇩_D. ν⇩_b (mk_stationary_det d) s)"
proof -
  obtain d where d: "ν_conserving (mk_dec_det d)"
    using assms ex_improving_imp_conserving by auto
  hence "d ∈ D⇩_D"
    using ν_conserving_iff is_dec_mk_dec_det_iff by blast
  thus ?thesis
    using Π⇩_M⇩_R_imp_policies ν⇩_b_le_opt
    by (fastforce intro!: cSup_eq_maximum[where z = "ν⇩_b_opt s", symmetric]
        simp: conserving_imp_opt[OF d] image_iff)
qed


lemma ℒ⇩_b_sup_att_dec:
  assumes "d ∈ D⇩_R" "ℒ⇩_b v = L d v"
  shows "∃d' ∈ D⇩_D. ℒ⇩_b v = L (mk_dec_det d') v"
proof -
  have "∃a∈ A s. L d v s = L⇩_a a v s" for s
    unfolding L_eq_L⇩_a
    using assms is_dec_def L⇩_a_bounded A_ne ℒ⇩_b.rep_eq ℒ_def
    by (intro lemma_4_3_1') 
      (auto intro: bounded_range_subset simp: assms(2)[symmetric] L_eq_L⇩_a[symmetric] SUP_step_MR_eq)
  then obtain d' where d: "d' s ∈ A s" "L d v s = L⇩_a (d' s) v s" for s
    by metis
  thus ?thesis
    using assms d
    by (fastforce simp: is_dec_det_def mk_dec_det_def L_eq_L⇩_a)
qed

lemma ℒ⇩_b_sup_att_dec':
  assumes "d ∈ D⇩_R" "ℒ⇩_b v = L d v"
  shows "∃d' ∈ D⇩_D. ν_improving v (mk_dec_det d')"
  using ℒ⇩_b_sup_att_dec ν_improving_alt assms by force

subsubsection ‹Deterministic Decision Rules are Optimal›

lemma opt_imp_opt_dec_det:
  assumes "p ∈ Π⇩_H⇩_R" "ν⇩_b p = ν⇩_b_opt" 
  shows "∃d ∈ D⇩_D. ν⇩_b (mk_stationary_det d) = ν⇩_b_opt"
proof -
  have aux: "L (as_markovian p (return_pmf s) 0) ν⇩_b_opt s = ν⇩_b_opt s" for s
  proof -
    let ?ps = "as_markovian p (return_pmf s)"
    have markovian_suc_le: "ν⇩_b (mk_markovian (λn. as_markovian p (return_pmf s) (Suc n))) ≤ ν⇩_b_opt"
      using is_Π⇩_M⇩_R_as_markovian assms by (auto simp: is_policy_def mk_markovian_def)
    have aux_le: "⋀x f g. f ≤ g ⟹ apply_bfun f x ≤ apply_bfun g x"
      unfolding less_eq_bfun_def by auto
    have "ν⇩_b_opt s = ν⇩_b (mk_markovian ?ps) s"
      using assms ν⇩_b_as_markovian by metis
    also have "… = L (?ps 0) (ν⇩_b (mk_markovian (λn. ?ps (Suc n)))) s"
      using ν_step by blast
    also have "… ≤ L (?ps 0) (ν⇩_b_opt) s"
      unfolding L_def using markovian_suc_le 𝒫⇩₁_mono by (auto intro!: mult_left_mono)
    finally have "ν⇩_b_opt s ≤ L (?ps 0) (ν⇩_b_opt) s" .
    have "as_markovian p (return_pmf s) 0 ∈ D⇩_R"
      using is_Π⇩_M⇩_R_as_markovian assms by fast
    have "L (?ps 0) ν⇩_b_opt ≤ ν⇩_b_opt"
      using ‹?ps 0 ∈ D⇩_R› L_le_ℒ⇩_b[of "?ps 0" "ν⇩_b_opt"] by simp
    thus "L (?ps 0) ν⇩_b_opt s = ν⇩_b_opt s"
      using ‹ν⇩_b_opt s ≤ (L (?ps 0) ν⇩_b_opt) s› by (auto intro!: antisym)
  qed
  have "L (p []) v s = L (as_markovian p (return_pmf s) 0) v s" for v s
    by (auto simp: L_def 𝒫⇩₁.rep_eq K_st_def)
  hence "L (p []) ν⇩_b_opt = ν⇩_b_opt"
    using aux by auto
  hence "∃d ∈ D⇩_D. L (mk_dec_det d) ν⇩_b_opt = ν⇩_b_opt"
    using ℒ⇩_b_sup_att_dec assms(1) ℒ⇩_b_opt is_policy_def mem_Collect_eq by metis
  thus ?thesis
    using conserving_imp_opt' ν_conserving_alt' by blast
qed

subsubsection ‹Optimal Decision Rules for Finite Action Spaces›

(* 6.2.10 *)
lemma ex_opt_act: 
assumes "⋀s. finite (A s)"
shows "∃a ∈ A s. L⇩_a a (v :: _ ⇒⇩_b _) s = ℒ⇩_b v s"
      unfolding ℒ⇩_b.rep_eq ℒ_eq_SUP_det SUP_step_det_eq
      using arg_max_on_in[OF assms A_ne]
      by (auto simp: cSup_eq_Sup_fin Sup_fin_Max assms A_ne finite_arg_max_eq_Max[symmetric])

lemma ex_opt_dec_det:
assumes "⋀s. finite (A s)"
shows "∃d ∈ D⇩_D. L (mk_dec_det d) (v :: _ ⇒⇩_b _) = ℒ⇩_b v"
  unfolding is_dec_det_def mk_dec_det_def
  using ex_opt_act[OF assms]  someI_ex
  apply (auto intro!: exI[of _ ‹λs. SOME a. a ∈ A s ∧ L⇩_a a v s = ℒ⇩_b v s›] bfun_eqI)
   apply (smt (verit, best) someI_ex)
  apply (subst L_eq_L⇩_a)
  apply (subst expectation_return_pmf)
  by (smt (verit, best) someI_ex)

lemma thm_6_2_10:
  assumes "⋀s. finite (A s)"
  shows "∃d ∈ D⇩_D. ν⇩_b_opt = ν⇩_b (mk_stationary_det d)"
  using assms conserving_imp_opt' ℒ⇩_b_opt L_ν_fix_iff ex_opt_dec_det 
  by metis

subsubsection ‹Existence of Epsilon-Optimal Policies›

lemma ex_det_eps:
  assumes "0 < e"
  shows "∃d ∈ D⇩_D. ℒ⇩_b v ≤ L (mk_dec_det d) v + e *⇩_R 1"
proof -
  have "∃a ∈ A s. ℒ⇩_b v s ≤ L⇩_a a v s + e" for s
  proof -
    have "bdd_above ((λa. L⇩_a a v s) ` A s)"
      using L⇩_a_le by (auto intro!: boundedI bounded_imp_bdd_above)
    hence "∃a ∈ A s. ℒ⇩_b v s - e < L⇩_a a v s"
      unfolding ℒ⇩_b.rep_eq ℒ_eq_SUP_det SUP_step_det_eq
      by (auto simp: less_cSUP_iff[OF A_ne, symmetric] ‹0 < e›)
    thus "∃a ∈ A s. ℒ⇩_b v s ≤ L⇩_a a v s + e"
      by force
  qed
  thus ?thesis
    unfolding mk_dec_det_def is_dec_det_def
    by (auto simp: L_def 𝒫⇩₁.rep_eq bind_return_pmf K_st_def less_eq_bfun_def) metis
qed

lemma thm_6_2_11:
  assumes "eps > 0"
  shows "∃d ∈ D⇩_D. ν⇩_b_opt ≤ ν⇩_b (mk_stationary_det d) + eps *⇩_R 1"
proof -
  have "(1-l) * eps > 0"
    by (simp add: assms)
  then obtain d where "d ∈ D⇩_D" and d: "ℒ⇩_b ν⇩_b_opt ≤ L (mk_dec_det d) ν⇩_b_opt + ((1-l)*eps) *⇩_R 1"
    using ex_det_eps[of _ ν⇩_b_opt] by auto
  let ?d = "mk_dec_det d"
  let ?lK = "l *⇩_R 𝒫⇩₁ ?d"
  let ?lK_opt = "l *⇩_R 𝒫⇩₁ ?d ν⇩_b_opt"
  have "ν⇩_b_opt  ≤ r_dec⇩_b ?d + ?lK_opt + ((1-l)*eps) *⇩_R 1"
    using L_def ℒ_fix_imp_opt d by simp
  hence "ν⇩_b_opt - ?lK_opt - ((1-l)*eps) *⇩_R 1 ≤ r_dec⇩_b ?d"
    by (simp add: cancel_ab_semigroup_add_class.diff_right_commute diff_le_eq)
  hence "(∑i. ?lK ^^ i) (ν⇩_b_opt - ?lK_opt - ((1-l)*eps) *⇩_R 1) ≤ ν⇩_b (mk_stationary ?d)"
    using lemma_6_1_2_b suminf_cong by (simp add: blincomp_scaleR_right ν_stationary)
  hence "((∑i. ?lK ^^ i) o⇩_L (id_blinfun - ?lK)) ν⇩_b_opt - (∑i. ?lK ^^ i) (((1-l)*eps) *⇩_R 1) 
    ≤ (ν⇩_b (mk_stationary ?d))"
    by (simp add: blinfun.diff_right blinfun.diff_left blinfun.scaleR_left)
  hence le: "ν⇩_b_opt - (∑i. ?lK ^^ i) (((1-l)*eps) *⇩_R 1) ≤ ν⇩_b (mk_stationary ?d)"
    by (auto simp: inv_norm_le')
  have s: "summable (λi. (l *⇩_R 𝒫⇩₁ ?d)^^i)"
    using convergent_disc_𝒫⇩₁ summable_iff_convergent'
    by (simp add: blincomp_scaleR_right summable_iff_convergent')
  have "(∑i. ?lK ^^ i) (((1-l)*eps) *⇩_R 1) = eps *⇩_R 1"
  proof -
    have "(∑i. ?lK ^^ i) (((1-l)*eps) *⇩_R 1) = ((1-l)*eps) *⇩_R (∑i. ?lK^^i) 1"
      using blinfun.scaleR_right by blast
    also have "… = ((1-l)*eps) *⇩_R (∑i. (?lK^^i) 1) "
      using s by (auto simp: bounded_linear.suminf[of "λx. blinfun_apply x 1"])
    also have "… = ((1-l)*eps) *⇩_R (∑i. (l ^ i)) *⇩_R 1"
      by (auto simp: blinfun.scaleR_left blincomp_scaleR_right bounded_linear_scaleR_left 
          bounded_linear.suminf[of "λx. x *⇩_R 1"])
    also have "… = ((1-l)*eps) *⇩_R (1 / (1-l)) *⇩_R 1"
      by (simp add: suminf_geometric)
    also have "… = eps *⇩_R 1"
      using disc_lt_one ‹0 < (1 - l) * eps› by auto
    finally show ?thesis .
  qed
  thus ?thesis
    using ‹d ∈ D⇩_D› diff_le_eq le
    by auto
qed

lemma ex_det_dist_eps:
  assumes "0 < (e :: real)"
  shows "∃d ∈ D⇩_D. dist (ℒ⇩_b v) (L (mk_dec_det d) v) ≤ e"
proof -
  obtain d where "d ∈ D⇩_D" "L (mk_dec_det d) v ≤ (ℒ⇩_b v)" 
    and h2: "ℒ⇩_b v ≤ L (mk_dec_det d) v + e *⇩_R 1"
    using assms ex_det_eps L_le_ℒ⇩_b by blast
  hence "0 ≤ ℒ⇩_b v -  L (mk_dec_det d) v"
    by simp
  moreover have "ℒ⇩_b v - L (mk_dec_det d) v ≤ e *⇩_R 1"
    using h2 by (simp add: add.commute diff_le_eq)
  ultimately have "∀s. ¦(ℒ⇩_b v) s -  L (mk_dec_det d) v s¦ ≤ e"
    unfolding less_eq_bfun_def by auto
  hence "dist (ℒ⇩_b v) (L (mk_dec_det d) v) ≤ e"
    unfolding dist_bfun.rep_eq by (auto intro!: cSUP_least simp: dist_real_def)
  thus ?thesis
    using ‹d ∈ D⇩_D› 
    by auto
qed

lemma less_imp_ex_add_le: "(x :: real) < y ⟹ ∃eps>0. x + eps ≤ y"
  by (meson field_le_epsilon less_le_not_le nle_le)

lemma ν⇩_b_opt_le_det: "ν⇩_b_opt s ≤ (⨆d ∈ D⇩_D. ν⇩_b (mk_stationary_det d) s)"
proof (subst le_cSUP_iff, safe)
  fix y
  assume "y < ν⇩_b_opt s"
  then obtain eps where 1: "y ≤ ν⇩_b_opt s - eps" and "eps > 0"
    using less_imp_ex_add_le by force
  hence "eps / 2 > 0" by auto
  obtain d where "d ∈ D⇩_D" and "ν⇩_b_opt s ≤ ν⇩_b (mk_stationary_det d) s + eps / 2"
    using thm_6_2_11[OF ‹eps / 2 > 0›] by fastforce
  hence "y < ν⇩_b (mk_stationary_det d) s"
    using ‹eps > 0› by (auto simp: diff_less_eq intro: le_less_trans[OF 1])
  thus "∃i∈D⇩_D. y < ν⇩_b (mk_stationary_det i) s"
    using ‹d ∈ D⇩_D› by blast
next
  show "D⇩_D = {} ⟹ False"
    using D_det_ne by blast
  show "bdd_above ((λd. ν⇩_b (mk_stationary_det d) s) ` D⇩_D)"
    by (auto intro!: bounded_imp_bdd_above boundedI abs_ν_le simp: ν⇩_b.rep_eq)
qed

lemma ν⇩_b_opt_eq_det: "ν⇩_b_opt s = (⨆d ∈ D⇩_D. ν⇩_b (mk_stationary_det d) s)"
  using ν⇩_b_le_opt_DD D_det_ne
  by (fastforce intro!: antisym[OF ν⇩_b_opt_le_det] cSUP_least)

(* unused, delete? *)
lemma lemma_6_3_1_a:
  assumes "v0 ∈ bfun"
  shows "uniform_limit UNIV (λn. ((λv. ℒ (Bfun v)) ^^ n) v0) ν_opt sequentially"
proof -
  have ℒ_Bfun_eq: "v0 ∈ bfun ⟹ ((λv. ℒ (Bfun v))^^n) v0 = (ℒ⇩_b ^^n) (Bfun v0)" for n
    by (induction n) (auto simp: ℒ⇩_b.rep_eq apply_bfun_inverse)
  have "uniform_limit UNIV (λn. (ℒ⇩_b ^^ n) (Bfun v0)) ν⇩_b_opt sequentially"
    by (intro tendsto_bfun_uniform_limit[OF ℒ⇩_b_lim])
  hence "uniform_limit UNIV (λn. (ℒ⇩_b ^^ n) (Bfun v0)) ν_opt sequentially"
    by (simp add: ν_opt_bfun ν⇩_b_opt.rep_eq)
  thus ?thesis
    by (auto simp: assms ℒ_Bfun_eq)
qed

lemma dist_Suc_tendsto_zero:
  assumes "(λn. f n) ⇢ (y::_::real_normed_vector)"
  shows "(λn. dist (f n) (f (Suc n))) ⇢ 0"
  using assms tendsto_diff tendsto_norm LIMSEQ_Suc by (fastforce simp: dist_norm)

lemma dist_ℒ⇩_b_tendsto: "(λn. dist ((ℒ⇩_b^^n) v) ((ℒ⇩_b^^(Suc n)) v)) ⇢ 0"
  using ℒ⇩_b_lim by (fast intro!: dist_Suc_tendsto_zero)

definition "max_L_ex s v ≡ has_arg_max (λa. L⇩_a a v s) (A s)"

lemma ν⇩_b_fin_zero[simp]: "ν⇩_b_fin p 0 = 0"
  by (auto simp: ν⇩_b_fin.rep_eq)

lemma ν⇩_b_fin_Suc[simp]: 
  "ν⇩_b_fin (mk_stationary d) (Suc n) = ν⇩_b_fin (mk_stationary d) n + ((l *⇩_R 𝒫⇩₁ d)^^ n) (r_dec⇩_b d)"
  by (auto simp: 𝒫⇩_X_sconst ν⇩_b_fin.rep_eq ν_fin_eq_𝒫⇩_X blincomp_scaleR_right blinfun.scaleR_left)

lemma ν⇩_b_fin_eq: "ν⇩_b_fin (mk_stationary d) n = (∑i < n. ((l *⇩_R 𝒫⇩₁ d)^^ i)) (r_dec⇩_b d)"
  by (induction n) (auto simp add: plus_blinfun.rep_eq)

lemma L_iter: "(L d ^^ m) v = ν⇩_b_fin (mk_stationary d) m + ((l *⇩_R 𝒫⇩₁ d)^^ m) v"
proof (induction m arbitrary: v)
  case (Suc m)
  have "(L d ^^ Suc m) v = (L d ^^ m) (L d v)"
    by (simp add: funpow_Suc_right del: funpow.simps)
  also have "… = ν⇩_b_fin (mk_stationary d) m + ((l *⇩_R 𝒫⇩₁ d) ^^ m) (L d v)"
    using Suc by simp
  also have "… = ν⇩_b_fin (mk_stationary d) (Suc m) + ((l *⇩_R 𝒫⇩₁ d) ^^ Suc m) v"
    unfolding L_def 
    by (auto simp: 𝒫⇩₁_pow blinfun.bilinear_simps blincomp_scaleR_right funpow_swap1) 
  finally show ?case .
qed simp

lemma bounded_stationary_ν⇩_b_fin: "bounded ((λx. (ν⇩_b_fin (mk_stationary x) N) s) ` X)"
  using ν⇩_b_fin.rep_eq abs_ν_fin_le by (auto intro!: boundedI)

lemma bounded_disc_𝒫⇩₁: "bounded ((λx. (((l *⇩_R 𝒫⇩₁ x) ^^ m) v) s) ` X)"
  by (auto simp: 𝒫⇩_X_const[symmetric] blinfun.bilinear_simps blincomp_scaleR_right 
      intro!: boundedI[of _  "l ^ m * norm v"] mult_left_mono order.trans[OF abs_le_norm_bfun])

lemma bounded_disc_𝒫⇩₁': "bounded ((λx. ((𝒫⇩₁ x ^^ m) v) s) ` X)"
  by (auto simp: 𝒫⇩_X_const[symmetric] intro!: boundedI[of _  "norm v"] order.trans[OF abs_le_norm_bfun])

lemma L_iter_le_ℒ⇩_b: "is_dec d ⟹ (L d ^^ n) v ≤ (ℒ⇩_b ^^ n) v"
  using order_trans[OF L_mono L_le_ℒ⇩_b] by (induction n) auto

end

subsection ‹More Restrictive MDP Locales›
locale MDP_fin_acts = discrete_MDP +
  assumes "⋀s. finite (A s)"

locale MDP_att_ℒ = MDP_reward_disc A K r l
  for
    A and 
    K :: "'s ::countable × 'a ::countable ⇒ 's pmf" and
    r and l +
  assumes Sup_att: "max_L_ex (s :: 's) v"
begin
theorem ℒ⇩_b_eq_argmax_L⇩_a:
  fixes v :: "'s ⇒⇩_b real"
  assumes "is_arg_max (λa. L⇩_a a v s) (λa. a ∈ A s) a"
  shows "ℒ⇩_b v s = L⇩_a a v s"
  using L⇩_a_le assms A_ne ℒ⇩_b.rep_eq ℒ_eq_SUP_det SUP_step_det_eq
  by (auto intro!: cSUP_upper2 antisym cSUP_least simp: is_arg_max_linorder)

lemma L⇩_a_le_arg_max: "a ∈ A s ⟹ L⇩_a a v s ≤ L⇩_a (arg_max_on (λa. L⇩_a a v s) (A s)) v s"
  using Sup_att app_arg_max_ge[OF Sup_att[unfolded max_L_ex_def]]
  by (simp add: arg_max_on_def)

lemma arg_max_on_in: "has_arg_max f Q ⟹ arg_max_on f Q ∈ Q"
  using has_arg_max_arg_max by (auto simp: arg_max_on_def)

lemma ℒ⇩_b_eq_L⇩_a_max: "ℒ⇩_b v s = L⇩_a (arg_max_on (λa. L⇩_a a v s) (A s)) v s"
  using app_arg_max_eq_SUP[symmetric] Sup_att max_L_ex_def 
  by (auto simp: ℒ⇩_b_eq_SUP_det SUP_step_det_eq)

lemma ex_opt_det: "∃d ∈ D⇩_D. ℒ⇩_b v = L (mk_dec_det d) v"
proof -
  define d where "d = (λs. arg_max_on (λa. L⇩_a a v s) (A s))"
  have "ℒ⇩_b v s = L (mk_dec_det d) v s" for s
    by (auto simp: d_def ℒ⇩_b_eq_L⇩_a_max L_eq_L⇩_a_det)
  moreover have "d ∈ D⇩_D"
    using Sup_att arg_max_on_in by (auto simp: d_def is_dec_det_def max_L_ex_def)
  ultimately show ?thesis
    by auto
qed

lemma ex_improving_det: "∃d ∈ D⇩_D. ν_improving v (mk_dec_det d)"
  using ν_improving_alt ex_opt_det by auto
end

locale MDP_act = discrete_MDP A K for A :: "'s::countable ⇒ 'a::countable set" and K +
  fixes arb_act ::  "'a set ⇒ 'a"
  assumes arb_act_in[simp]: "X ≠ {} ⟹ arb_act X ∈ X" 

locale MDP_act_disc = MDP_act A K + MDP_att_ℒ A K r l
  for A :: "'s::countable ⇒ 'a::countable set" and K r l
begin


lemma is_opt_act_some: "is_opt_act v s (arb_act (opt_acts v s))"
  using arb_act_in[of "{a. is_arg_max (λa. L⇩_a a v s) (λa. a ∈ A s) a}"] Sup_att has_arg_max_def
  unfolding max_L_ex_def is_opt_act_def by auto

lemma some_opt_acts_in_A: "arb_act (opt_acts v s) ∈ A s"
  using is_opt_act_some unfolding is_opt_act_def is_arg_max_def by auto

lemma ν_improving_opt_acts: "ν_improving v0 (mk_dec_det (λs. arb_act (opt_acts (apply_bfun v0) s)))"
  using is_opt_act_def is_opt_act_some some_opt_acts_in_A
  by (subst ν_improving_alt) (fastforce simp: L_eq_L⇩_a_det ℒ⇩_b_eq_argmax_L⇩_a is_dec_det_def)+

end

locale MDP_finite_type = MDP_reward_disc A K r l
  for A and K :: "'s :: finite × 'a :: finite ⇒ 's pmf" and r l

end