Theory MDP_reward
theory MDP_reward
imports
Bounded_Functions
MDP_reward_Util
Blinfun_Util
MDP_disc
begin
section ‹Markov Decision Processes with Rewards›
locale MDP_reward = discrete_MDP A K
for
A and
K :: "'s ::countable × 'a ::countable ⇒ 's pmf" +
fixes
r :: "('s × 'a) ⇒ real" and
l :: real
assumes
zero_le_disc [simp]: "0 ≤ l" and
r_bounded: "bounded (range r)"
begin
text ‹
This extension to the basic MDPs is formalized with another locale.
It assumes the existence of a reward function @{term r} which takes a state-action pair to a real
number. We assume that the function is bounded @{prop r_bounded}.
Furthermore, we fix a discounting factor @{term l}, where @{term "0 ≤ l ∧ l < 1"}.
›
subsection ‹Util›
subsubsection ‹Basic Properties of rewards›
lemma r_bfun: "r ∈ bfun"
using r_bounded
by auto
lemma r_bounded': "bounded (r ` X)"
by (auto intro: r_bounded bounded_subset)
definition "r⇩M = (⨆sa. ¦r sa¦)"
lemma abs_r_le_r⇩M: "¦r sa¦ ≤ r⇩M"
using bounded_norm_le_SUP_norm r_bounded r⇩M_def by fastforce
lemma abs_r⇩M_eq_r⇩M [simp]: "¦r⇩M¦ = r⇩M"
using abs_r_le_r⇩M by fastforce
lemma r⇩M_nonneg: "0 ≤ r⇩M"
using abs_r⇩M_eq_r⇩M by linarith
lemma measurable_r_nth [measurable]: "(λt. r (t !! i)) ∈ borel_measurable S"
by measurable
lemma integrable_r_nth [simp]: "integrable (𝒯 p s) (λt. r (t !! i))"
by (fastforce simp: bounded_iff intro: abs_r_le_r⇩M)
lemma expectation_abs_r_le: "measure_pmf.expectation d (λa. ¦r (s, a)¦) ≤ r⇩M"
using abs_r_le_r⇩M
by (fastforce intro!: measure_pmf.integral_le_const measure_pmf.integrable_const_bound)
lemma abs_exp_r_le: "¦measure_pmf.expectation d r¦ ≤ r⇩M"
using abs_r_le_r⇩M
by (fastforce intro!: measure_pmf.integral_le_const order.trans[OF integral_abs_bound] measure_pmf.integrable_const_bound)
subsubsection ‹Infinite disounted sums›
lemma abs_disc_eq[simp]: "¦l ^ i * x¦ = l ^ i * ¦x¦"
by (auto simp: abs_mult)
lemma norm_l_pow_eq[simp]: "norm (l^t *⇩R F) = l^t * norm F"
by auto
subsection ‹Total Reward for Single Traces›
abbreviation "ν_trace_fin t N ≡ ∑i < N. l ^ i * r (t !! i)"
abbreviation "ν_trace t ≡ ∑i. l ^ i * r (t !! i)"
lemma abs_ν_trace_fin_le: "¦ν_trace_fin t N¦ ≤ (∑i < N. l^i * r⇩M)"
by (auto intro!: sum_mono order.trans[OF sum_abs] mult_left_mono abs_r_le_r⇩M)
lemma measurable_suminf_reward[measurable]: "ν_trace ∈ borel_measurable S"
by measurable
lemma integrable_ν_trace_fin: "integrable (𝒯 p s) (λt. ν_trace_fin t N)"
by (fastforce simp: bounded_iff intro: abs_ν_trace_fin_le)
context
fixes p :: "('s, 'a) pol"
begin
subsection ‹Expected Finite-Horizon Discounted Reward›
definition "ν_fin n s = ∫t. ν_trace_fin t n ∂𝒯 p s"
lemma abs_ν_fin_le: "¦ν_fin N s¦ ≤ (∑i<N. l^i * r⇩M)"
unfolding ν_fin_def
using abs_ν_trace_fin_le
by (fastforce intro!: prob_space.integral_le_const order_trans[OF integral_abs_bound])
lemma ν_fin_bfun: "(λs. ν_fin N s) ∈ bfun"
by (auto intro!: abs_ν_fin_le)
lift_definition ν⇩b_fin :: "nat ⇒ 's ⇒⇩b real" is ν_fin
using ν_fin_bfun .
lemma ν_fin_Suc[simp]: "ν_fin (Suc n) s = ν_fin n s + l ^ n * ∫t. r (t !! n) ∂𝒯 p s"
by (simp add: ν_fin_def)
lemma ν_fin_zero[simp]: "ν_fin 0 s = 0"
by (simp add: ν_fin_def)
lemma ν_fin_eq_Pn: "ν_fin n s = (∑i<n. l^i * measure_pmf.expectation (Pn' p s i) r)"
by (induction n) (auto simp: Pn'_eq_𝒯 integral_distr)
end
subsection ‹Expected Total Discounted Reward›
definition "ν p s = lim (λn. ν_fin p n s)"
lemmas ν_eq_lim = ν_def
lemma ν_eq_Pn: "ν p s = (∑i. l^i * measure_pmf.expectation (Pn' p s i) r)"
by (simp add: ν_fin_eq_Pn ν_eq_lim suminf_eq_lim)
subsection ‹Reward of a Decision Rule›
context
fixes d :: "('s, 'a) dec"
begin
abbreviation "r_dec s ≡ ∫a. r (s, a) ∂d s"
lemma abs_r_dec_le: "¦r_dec s¦ ≤ r⇩M"
using expectation_abs_r_le integral_abs_bound order_trans by fast
lemma r_dec_eq_r_K0: "r_dec s = measure_pmf.expectation (K0' d s) r"
by (simp add: K0'_def)
lemma r_dec_bfun: "r_dec ∈ bfun"
using abs_r_dec_le by (auto intro!: bfun_normI)
lift_definition r_dec⇩b :: "'s ⇒⇩b real" is "r_dec"
using r_dec_bfun .
declare r_dec⇩b.rep_eq[simp] bfun.Bfun_inverse[simp]
lemma norm_r_dec_le: "norm r_dec⇩b ≤ r⇩M"
by (simp add: abs_r_dec_le norm_bound)
end
lemma r_dec_det [simp]: "r_dec (mk_dec_det d) s = r (s, d s)"
unfolding mk_dec_det_def by auto
subsection ‹Transition Probability Matrix for MDPs›
context
fixes p :: "nat ⇒ ('s, 'a) dec"
begin
definition "𝒫⇩X n = push_exp (λs. Xn' (mk_markovian p) s n)"
lemma 𝒫⇩X_0[simp]: "𝒫⇩X 0 = id"
by (simp add: 𝒫⇩X_def)
lemma 𝒫⇩X_bounded_linear[simp]: "bounded_linear (𝒫⇩X t)"
unfolding 𝒫⇩X_def by simp
lemma norm_𝒫⇩X [simp]: "onorm (𝒫⇩X t) = 1"
unfolding 𝒫⇩X_def by simp
lemma norm_𝒫⇩X_apply[simp]: "norm (𝒫⇩X n x) ≤ norm x"
using onorm[OF 𝒫⇩X_bounded_linear] by simp
lemma 𝒫⇩X_bound_r: "norm (𝒫⇩X t (r_dec⇩b (p t))) ≤ r⇩M"
using norm_𝒫⇩X_apply norm_r_dec_le order.trans by blast
lemma 𝒫⇩X_bounded_r: "bounded (range (λt. (𝒫⇩X t (r_dec⇩b (p t)))))"
using 𝒫⇩X_bound_r by (auto intro!: boundedI)
end
lemma ν_fin_elem: "ν_fin (mk_markovian p) n s = (∑i<n. l^i * 𝒫⇩X p i (r_dec⇩b (p i)) s)"
unfolding 𝒫⇩X_def ν_fin_eq_Pn Pn'_markovian_eq_Xn'_bind measure_pmf_bind
using measure_pmf_in_subprob_algebra abs_r_le_r⇩M
by (subst integral_bind) (auto simp: r_dec_eq_r_K0)
lemma ν⇩b_fin_eq_𝒫⇩X: "ν⇩b_fin (mk_markovian p) n = (∑i<n. l^i *⇩R 𝒫⇩X p i (r_dec⇩b (p i)))"
by (auto simp: ν_fin_elem sum_apply_bfun ν⇩b_fin.rep_eq)
lemma ν_fin_eq_𝒫⇩X: "ν_fin (mk_markovian p) n = (∑i<n. l^i *⇩R 𝒫⇩X p i (r_dec⇩b (p i)))"
by (metis ν⇩b_fin.rep_eq ν⇩b_fin_eq_𝒫⇩X)
text ‹
@{term "𝒫⇩1 d v"} defines for each state the expected value of @{term v}
after taking a single step in the MDP according to the decision rule @{term d}.
›
context
fixes d :: "('s, 'a) dec"
begin
lift_definition 𝒫⇩1 :: "('s ⇒⇩b real) ⇒⇩L ('s ⇒⇩b real)" is "push_exp (K_st d)"
using push_exp_bounded_linear .
lemma 𝒫⇩1_bfun_one [simp]:"𝒫⇩1 1 = 1"
by (auto simp: 𝒫⇩1.rep_eq)
lemma 𝒫⇩1_pow_bfun_one [simp]: "(𝒫⇩1^^t) 1 = 1"
by (induction t) auto
lemma 𝒫⇩1_pow: "blinfun_apply (𝒫⇩1 ^^ n) = blinfun_apply 𝒫⇩1 ^^ n"
by (induction n) auto
lemma norm_𝒫⇩1 [simp]: "norm 𝒫⇩1 = 1"
by (simp add: norm_blinfun.rep_eq 𝒫⇩1.rep_eq)
end
lemma 𝒫⇩X_Suc: "𝒫⇩X p (Suc n) v = 𝒫⇩1 (p 0) ((𝒫⇩X (λn. p (Suc n)) n) v)"
unfolding 𝒫⇩X_def 𝒫⇩1.rep_eq
by (fastforce intro!: abs_le_norm_bfun integral_bind[where K = "count_space UNIV"]
simp: measure_pmf_in_subprob_algebra measure_pmf_bind Suc_Xn'_markovian)
lemma 𝒫⇩X_Suc': "𝒫⇩X p (Suc n) v = 𝒫⇩X p n (𝒫⇩1 (p n) v)"
proof (induction n arbitrary: p)
case 0
thus ?case
by (simp add: 𝒫⇩X_Suc)
next
case (Suc n)
thus ?case
by (metis 𝒫⇩X_Suc)
qed
lemma 𝒫⇩X_const: "𝒫⇩X (λ_. d) n = 𝒫⇩1 d ^^ n"
by (induction n) (auto simp add: 𝒫⇩1_pow 𝒫⇩X_Suc)
lemma 𝒫⇩X_sconst: "𝒫⇩X (λ_. p) n = 𝒫⇩1 p ^^n"
using 𝒫⇩X_const.
lemma norm_P_n[simp]: "onorm (𝒫⇩1 d ^^ n) = 1"
using norm_𝒫⇩X[of "λ_. d"] by (auto simp: 𝒫⇩X_sconst)
lemma norm_𝒫⇩1_pow [simp]: "norm (𝒫⇩1 d ^^ t) = 1"
by (simp add: norm_blinfun.rep_eq)
lemma 𝒫⇩X_Suc_n_elem: "𝒫⇩X p n (𝒫⇩1 (p n) v) = 𝒫⇩X p (Suc n) v"
using 𝒫⇩X_Suc' 𝒫⇩1.rep_eq by auto
lemma 𝒫⇩1_eq_𝒫⇩X_one: "blinfun_apply (𝒫⇩1 (p 0)) = 𝒫⇩X p 1"
by (auto simp: 𝒫⇩X_Suc' 𝒫⇩1.rep_eq)
lemma 𝒫⇩1_pos: "0 ≤ u ⟹ 0 ≤ 𝒫⇩1 d u"
by (auto simp: 𝒫⇩1.rep_eq less_eq_bfun_def)
lemma 𝒫⇩1_nonneg: "nonneg_blinfun (𝒫⇩1 d)"
by (simp add: 𝒫⇩1_pos nonneg_blinfun_def)
lemma 𝒫⇩1_n_pos: "0 ≤ u ⟹ 0 ≤ (𝒫⇩1 d ^^ n) u"
by (induction n) (auto simp: 𝒫⇩1.rep_eq less_eq_bfun_def)
lemma 𝒫⇩1_n_nonneg: "nonneg_blinfun (𝒫⇩1 d ^^ n)"
by (simp add: 𝒫⇩1_n_pos nonneg_blinfun_def)
lemma 𝒫⇩1_n_disc_pos: "0 ≤ u ⟹ 0 ≤ (l^n *⇩R 𝒫⇩1 d ^^n) u"
by (auto simp: 𝒫⇩1_n_pos scaleR_nonneg_nonneg blinfun.scaleR_left)
lemma 𝒫⇩1_sum_pos: "0 ≤ u ⟹ 0 ≤ (∑t≤n. l^t *⇩R (𝒫⇩1 d ^^ t)) u"
using 𝒫⇩1_n_pos 𝒫⇩1_pos
by (induction n) (auto simp: blinfun.add_left blinfun.scaleR_left scaleR_nonneg_nonneg)
lemma 𝒫⇩1_sum_ge:
assumes "0 ≤ u"
shows "u ≤ (∑t≤n. l^t *⇩R 𝒫⇩1 d ^^t) u"
using 𝒫⇩1_n_disc_pos[OF assms, of "Suc _"]
by (induction n) (auto intro: add_increasing2 simp add: blinfun.add_left)
subsection ‹The Bellman Operator›
definition "L d v ≡ r_dec⇩b d + l *⇩R 𝒫⇩1 d v"
lemma norm_L_le: "norm (L d v) ≤ r⇩M + l * norm v"
using norm_blinfun[of "𝒫⇩1 d"] norm_𝒫⇩1 norm_r_dec_le
by (auto intro!: norm_add_rule_thm mult_left_mono simp: L_def)
lemma abs_L_le: "¦L d v s¦ ≤ r⇩M + l * norm v"
using order.trans[OF norm_le_norm_bfun norm_L_le] by auto
subsubsection ‹Bellman Operator for Single Actions›
abbreviation "L⇩a a v s ≡ r (s, a) + l * measure_pmf.expectation (K (s,a)) v"
lemma L⇩a_le:
fixes v :: "'s ⇒⇩b real"
shows "¦L⇩a a v s¦ ≤ r⇩M + l * norm v"
using abs_r_le_r⇩M
by (fastforce intro: order_trans[OF abs_triangle_ineq] order_trans[OF integral_abs_bound]
add_mono mult_mono measure_pmf.integral_le_const abs_le_norm_bfun
simp: abs_mult)
lemma L⇩a_bounded:
"bounded (range (λa. L⇩a a (apply_bfun v) s))"
using L⇩a_le by (auto intro!: boundedI)
lemma L⇩a_int:
fixes d :: "'a pmf" and v :: "'s ⇒⇩b real"
shows "(∫a. L⇩a a v s ∂d) = (∫a. r (s, a) ∂d) + l * ∫a. ∫s'. v s' ∂K (s, a) ∂d"
proof (subst Bochner_Integration.integral_add)
show "integrable d (λa. r (s, a))"
using abs_r_le_r⇩M by (fastforce intro!: bounded_integrable simp: bounded_iff)
show "integrable d (λa. l * ∫s'. v s' ∂K (s, a))"
by (intro bounded_integrable)
(auto intro!: mult_mono order_trans[OF integral_abs_bound] boundedI[of _ "l * norm v"]
measure_pmf.integral_le_const simp: abs_le_norm_bfun abs_mult)
qed auto
lemma L_eq_L⇩a: "L d v s = measure_pmf.expectation (d s) (λa. L⇩a a v s)"
unfolding L⇩a_int L_def K_st_def 𝒫⇩1.rep_eq
by (auto simp: measure_pmf_bind integral_measure_pmf_bind[where B = "norm v"] abs_le_norm_bfun)
lemma L_eq_L⇩a_det: "L (mk_dec_det d) v s = L⇩a (d s) v s"
by (auto simp: L_eq_L⇩a mk_dec_det_def)
lemma L⇩a_eq_L: "measure_pmf.expectation p (λa. L⇩a a (apply_bfun v) s) =
L (λt. if t = s then p else return_pmf (SOME a. a ∈ A t)) v s"
unfolding L_eq_L⇩a by auto
lemma L_le: "L d v s ≤ r⇩M + l * norm v"
unfolding L_def
using norm_𝒫⇩1 norm_blinfun[of "(𝒫⇩1 d)"] abs_r_dec_le
by (fastforce intro: order_trans[OF le_norm_bfun] add_mono mult_left_mono dest: abs_le_D1)
lemma L⇩a_le': "L⇩a a (apply_bfun v) s ≤ r⇩M + l * norm v"
using L⇩a_le abs_le_D1 by blast
subsection ‹Optimality Equations›
definition "ℒ (v :: 's ⇒⇩b real) s = (⨆d ∈ D⇩R. L d v s)"
lemma ℒ_bfun: "ℒ v ∈ bfun"
unfolding ℒ_def using abs_L_le ex_dec by (fastforce intro!: cSup_abs_le bfun_normI)
lift_definition ℒ⇩b :: "('s ⇒⇩b real) ⇒ 's ⇒⇩b real" is ℒ
using ℒ_bfun .
lemma L_bounded[simp, intro]: "bounded (range (λp. L p v s))"
using abs_L_le by (auto intro!: boundedI)
lemma L_bounded'[simp, intro]: "bounded ((λp. L p v s) ` X)"
by (auto intro: bounded_subset)
lemma L_bdd_above[simp, intro]: "bdd_above ((λp. L p v s) ` X)"
by (auto intro: bounded_imp_bdd_above)
lemma L_le_ℒ⇩b: "is_dec d ⟹ L d v ≤ ℒ⇩b v"
by (fastforce simp: ℒ⇩b.rep_eq ℒ_def intro!: cSUP_upper)
subsubsection ‹Equivalences involving @{const ℒ⇩b}›
lemma SUP_step_MR_eq:
"ℒ v s = (⨆pa ∈ {pa. set_pmf pa ⊆ A s}. (∫a. L⇩a a v s ∂measure_pmf pa))"
unfolding ℒ_def
proof (intro antisym)
show "(⨆d∈D⇩R. L d v s) ≤ (⨆pa ∈ {pa. set_pmf pa ⊆ A s}. ∫a. L⇩a a v s ∂measure_pmf pa)"
proof (rule cSUP_mono)
show "D⇩R ≠ {}"
using D⇩R_ne .
next show "bdd_above ((λpa. ∫a. L⇩a a v s ∂measure_pmf pa) ` {pa. set_pmf pa ⊆ A s})"
using L⇩a_bounded L⇩a_le
by (auto intro!: order_trans[OF integral_abs_bound]
bounded_imp_bdd_above boundedI[where B = "r⇩M + l * norm v"]
measure_pmf.integral_le_const bounded_integrable)
next show "∃m∈{pa. set_pmf pa ⊆ A s}. L n v s ≤ ∫a. L⇩a a v s ∂measure_pmf m" if "n ∈ D⇩R" for n
using that
by (fastforce simp: L_eq_L⇩a L⇩a_int is_dec_def)
qed
next
have aux: "{pa. set_pmf pa ⊆ A s} ≠ {}"
using D⇩R_ne is_dec_def by auto
show "(⨆pa∈{pa. set_pmf pa ⊆ A s}. ∫a. L⇩a a v s ∂measure_pmf pa) ≤ (⨆d∈D⇩R. L d v s)"
proof (intro cSUP_least[OF aux] cSUP_upper2)
fix n
assume h: "n ∈ {pa. set_pmf pa ⊆ A s}"
let ?p = "(λs'. if s = s' then n else SOME a. set_pmf a ⊆ A s')"
have aux: "∃a. set_pmf a ⊆ A sa" for sa
using ex_dec is_dec_def by blast
show "?p ∈ D⇩R"
unfolding is_dec_def using h someI_ex[OF aux] by auto
thus "(∫a. L⇩a a v s ∂n) ≤ L ?p v s"
by (auto simp: L_eq_L⇩a)
show "bdd_above ((λd. L d v s) ` D⇩R)"
by (fastforce intro!: bounded_imp_bdd_above simp: bounded_def)
next
qed
qed
lemma ℒ⇩b_eq_SUP_L⇩a: "ℒ⇩b v s = (⨆p ∈ {p. set_pmf p ⊆ A s}. ∫a. L⇩a a v s ∂measure_pmf p)"
using SUP_step_MR_eq ℒ⇩b.rep_eq by presburger
lemma SUP_step_det_eq: "(⨆d ∈ D⇩D. L (mk_dec_det d) v s) = (⨆a ∈ A s. L⇩a a v s)"
proof (intro antisym cSUP_mono)
show "bdd_above ((λa. L⇩a a v s) ` A s)"
using L⇩a_bounded by (fastforce intro!: bounded_imp_bdd_above simp: bounded_def)
show "bdd_above ((λd. L (mk_dec_det d) v s) ` D⇩D)"
by (auto intro!: bounded_imp_bdd_above boundedI abs_L_le)
show "∃m∈A s. L (mk_dec_det n) v s ≤ L⇩a m v s" if "n ∈ D⇩D" for n
using that is_dec_det_def by (auto simp: L_eq_L⇩a_det intro: bexI[of _ "n s"])
show "∃m∈D⇩D. L⇩a n v s ≤ L (mk_dec_det m) v s" if "n ∈ A s" for n
using that A_ne
by (fastforce simp: L_eq_L⇩a_det is_dec_det_def some_in_eq
intro!: bexI[of _ "λs'. if s = s' then _ else SOME a. a ∈ A s'"])
qed (auto simp: A_ne)
lemma integrable_L⇩a: "integrable (measure_pmf x) (λa. L⇩a a (apply_bfun v) s)"
proof (intro Bochner_Integration.integrable_add integrable_mult_right)
show "integrable (measure_pmf x) (λx. r (s, x))"
using abs_r_le_r⇩M
by (auto intro: measure_pmf.integrable_const_bound[of _ "r⇩M"])
next
show "integrable (measure_pmf x) (λx. measure_pmf.expectation (K (s, x)) v)"
by (auto intro!: bounded_integrable boundedI order.trans[OF integral_abs_bound]
measure_pmf.integral_le_const abs_le_norm_bfun)
qed
lemma SUP_L⇩a_eq_det:
fixes v :: "'s ⇒⇩b real"
shows "(⨆p∈{p. set_pmf p ⊆ A s}. ∫a. L⇩a a v s ∂measure_pmf p) = (⨆a∈A s. L⇩a a v s)"
proof (intro antisym)
show "(⨆pa∈{pa. set_pmf pa ⊆ A s}. measure_pmf.expectation pa (λa. L⇩a a v s))
≤ (⨆a∈A s. L⇩a a v s)"
using ex_dec is_dec_def integrable_L⇩a A_ne L⇩a_bounded
by (fastforce intro: bounded_range_subset intro!: cSUP_least lemma_4_3_1)
show "(⨆a∈A s. L⇩a a v s) ≤ (⨆p∈{p. set_pmf p ⊆ A s}. ∫a. L⇩a a v s ∂measure_pmf p)"
unfolding SUP_step_MR_eq[symmetric] SUP_step_det_eq[symmetric] ℒ_def
using ex_dec_det by (fastforce intro!: cSUP_mono)
qed
lemma ℒ_eq_SUP_det: "ℒ v s = (⨆d ∈ D⇩D. L (mk_dec_det d) v s)"
using SUP_step_MR_eq SUP_step_det_eq SUP_L⇩a_eq_det by auto
lemma ℒ⇩b_eq_SUP_det: "ℒ⇩b v s = (⨆d ∈ D⇩D. L (mk_dec_det d) v s)"
using ℒ_eq_SUP_det unfolding ℒ⇩b.rep_eq by auto
subsection ‹Monotonicity›
lemma 𝒫⇩X_mono[intro]: "a ≤ b ⟹ 𝒫⇩X p n a ≤ 𝒫⇩X p n b"
by (fastforce simp: 𝒫⇩X_def intro: integral_mono)
lemma 𝒫⇩1_mono[intro]: "a ≤ b ⟹ 𝒫⇩1 p a ≤ 𝒫⇩1 p b"
using 𝒫⇩1_nonneg by auto
lemma L_mono[intro]: "u ≤ v ⟹ L d u ≤ L d v"
unfolding L_def by (auto intro: scaleR_left_mono)
lemma ℒ⇩b_mono[intro]: "u ≤ v ⟹ ℒ⇩b u ≤ ℒ⇩b v"
using ex_dec L_mono[of u v]
by (fastforce intro!: cSUP_mono simp: ℒ⇩b.rep_eq ℒ_def)
lemma step_mono:
assumes "ℒ⇩b v ≤ v" "d ∈ D⇩R"
shows "L d v ≤ v"
using assms L_le_ℒ⇩b order.trans by blast
lemma step_mono_elem_det:
assumes "v ≤ ℒ⇩b v" "e > 0"
shows "∃d∈D⇩D. v ≤ L (mk_dec_det d) v + e *⇩R 1"
proof -
have "v s ≤ (⨆a∈A s. L⇩a a v s)" for s
using SUP_step_det_eq ℒ⇩b_eq_SUP_det assms(1) by fastforce
hence "∃a∈A s. v s - e < L⇩a a v s" for s
using A_ne L⇩a_le'
by (subst less_cSUP_iff[symmetric]) (fastforce simp: assms add_strict_increasing algebra_simps intro!: bdd_above.I2)+
hence aux: "∃a∈A s. v s ≤ L⇩a a v s + e" for s
by (auto simp: diff_less_eq intro: less_imp_le)
then obtain d where "is_dec_det d" "v s ≤ L (mk_dec_det d) v s + e" for s
by (metis L_eq_L⇩a_det is_dec_det_def)
thus ?thesis
by fastforce
qed
lemma step_mono_elem:
assumes "v ≤ ℒ⇩b v" "e > 0"
shows "∃d∈D⇩R. v ≤ L d v + e *⇩R 1"
using assms step_mono_elem_det by blast
lemma 𝒫⇩X_L_le:
assumes "ℒ⇩b v ≤ v" "p ∈ Π⇩M⇩R"
shows "𝒫⇩X p n (L (p n) v) ≤ 𝒫⇩X p n v"
using assms step_mono by auto
end
locale MDP_reward_disc = MDP_reward A K r l
for
A and
K :: "'s ::countable × 'a ::countable ⇒ 's pmf" and
r l +
assumes
disc_lt_one [simp]: "l < 1"
begin
definition "is_opt_act v s = is_arg_max (λa. L⇩a a v s) (λa. a ∈ A s)"
abbreviation "opt_acts v s ≡ {a. is_opt_act v s a}"
lemma summable_disc [intro, simp]: "summable (λi. l ^ i * x)"
by (simp add: mult.commute)
lemma summable_r_disc[intro, simp]:
"summable (λi. ¦l ^ i * r (sa i)¦)"
"summable (λi. l ^ i * ¦r (sa i)¦)"
"summable (λi. l ^ i * r (sa i))"
proof -
show "summable (λi. ¦l ^ i * r (sa i)¦)"
using abs_r_le_r⇩M
by (fastforce intro!: mult_left_mono summable_comparison_test'[OF summable_disc])
thus "summable (λi. l ^ i * r (sa i))" "summable (λi. l ^ i * ¦r (sa i)¦)"
by (auto intro: summable_rabs_cancel)
qed
lemma summable_norm_disc_I[intro]:
assumes "summable (λt. (l^t * norm F))"
shows "summable (λt. norm (l^t *⇩R F))"
using assms by auto
lemma summable_norm_disc_I'[intro]:
assumes "summable (λt. (l^t * norm (F t)))"
shows "summable (λt. norm (l^t *⇩R F t))"
using assms by auto
lemma summable_discI [intro]:
assumes "bounded (range F)"
shows "summable (λt. l^t * norm (F t))"
proof -
obtain b where "norm (F x) ≤ b" for x
using assms by (auto simp: bounded_iff)
thus ?thesis
using Abel_lemma[of l 1 F b] by (auto simp: mult.commute)
qed
lemma summable_disc_reward [intro]:
assumes "bounded (range (F :: nat ⇒ 'b :: banach))"
shows "summable (λt. l^t *⇩R (F t))"
using assms by (auto intro: summable_norm_cancel)
lemma summable_norm_bfun_disc: "summable (λt. l^t * norm (apply_bfun f t))"
using norm_le_norm_bfun
by (auto simp: mult.commute[of "l^_"] intro!: Abel_lemma[of _ 1 _ "norm f"])
lemma summable_bfun_disc [simp]: "summable (λt. l^t * (apply_bfun f t))"
proof -
have "norm (l^t * apply_bfun f t) = l^t * norm (apply_bfun f t)" for t
by (auto simp: abs_mult)
hence "summable (λt. norm (l^t * (apply_bfun f t)))"
by (auto simp only: abs_mult)
thus ?thesis
by (auto intro: summable_norm_cancel)
qed
lemma norm_bfun_disc_le: "norm f ≤ B ⟹ (∑x. l^x * norm (apply_bfun f x)) ≤ (∑x. l^x * B)"
by (fastforce intro!: suminf_le mult_left_mono norm_le_norm_bfun intro: order.trans)
lemma norm_bfun_disc_le': "norm f ≤ B ⟹ (∑x. l^x * (apply_bfun f x)) ≤ (∑x. l^x * B)"
by (auto simp: mult_left_mono intro!: suminf_le order.trans[OF _ norm_bfun_disc_le])
lemma sum_disc_lim_l: "(∑x. l^x * B) = B /(1-l)"
by (simp add: suminf_mult2[symmetric] summable_geometric suminf_geometric[of l])
lemma sum_disc_bound: "(∑x. l^x * apply_bfun f x) ≤ (norm f) /(1-l)"
using norm_bfun_disc_le' sum_disc_lim by auto
lemma sum_disc_bound':
fixes f :: "nat ⇒ 'b ⇒⇩b real"
assumes h: "∀n. norm (f n) ≤ B"
shows "norm (∑x. l^x *⇩R f x) ≤ B /(1-l)"
proof -
have "norm (∑x. l^x *⇩R f x) ≤ (∑x. norm (l^x *⇩R f x))"
using h
by (fastforce intro!: boundedI summable_norm)
also have "… ≤ (∑x. l^x * B)"
using h
by (auto intro!: suminf_le boundedI simp: mult_mono')
also have "… = B /(1-l)"
by (simp add: sum_disc_lim)
finally show "norm (∑x. l^x *⇩R f x) ≤ B /(1-l)" .
qed
lemma abs_ν_trace_le: "¦ν_trace t¦ ≤ (∑i. l ^ i * r⇩M)"
by (auto intro!: abs_r_le_r⇩M mult_left_mono order_trans[OF summable_rabs] suminf_le)
lemma integrable_ν_trace: "integrable (𝒯 p s) ν_trace"
by (fastforce simp: bounded_iff intro: abs_ν_trace_le)
context
fixes p :: "('s, 'a) pol"
begin
lemma ν_eq_ν_trace: "ν p s = ∫t. ν_trace t ∂𝒯 p s"
proof -
have "(λn. ν_fin p n s) ⇢ ∫t. ν_trace t ∂𝒯 p s"
unfolding ν_fin_def
proof(intro integral_dominated_convergence)
show "AE x in 𝒯 p s. ν_trace_fin x ⇢ ν_trace x"
using summable_LIMSEQ by blast
next
have "(∑i<N. l ^ i * r⇩M) ≤ (∑N. l ^ N * r⇩M)" for N
by (auto intro: sum_le_suminf simp: r⇩M_nonneg)
thus "AE x in 𝒯 p s. norm (ν_trace_fin x N) ≤ (∑N. l ^ N * r⇩M)" for N
using order_trans[OF abs_ν_trace_fin_le] by fastforce
qed auto
thus ?thesis
using ν_eq_lim limI by fastforce
qed
lemma abs_ν_le: "¦ν p s¦ ≤ (∑i. l^i * r⇩M)"
unfolding ν_eq_Pn
using abs_exp_r_le
by (fastforce intro!: order.trans[OF summable_rabs] suminf_le summable_comparison_test'[OF summable_disc] mult_left_mono)
lemma ν_le: "ν p s ≤ (∑i. l^i * r⇩M)"
by (auto intro: abs_ν_le abs_le_D1)
lemma ν_bfun: "ν p ∈ bfun"
by (auto intro!: abs_ν_le)
lift_definition ν⇩b :: "'s ⇒⇩b real" is "ν p"
using ν_bfun by blast
lemma norm_ν_le: "norm ν⇩b ≤ r⇩M / (1-l)"
using abs_ν_le sum_disc_lim
by (auto simp: ν⇩b.rep_eq norm_bfun_def' intro: cSUP_least)
end
lemma ν_as_markovian: "ν (mk_markovian (as_markovian p (return_pmf s))) s = ν p s"
by (auto simp: ν_eq_Pn Pn_as_markovian_eq Pn'_def)
lemma ν⇩b_as_markovian: "ν⇩b (mk_markovian (as_markovian p (return_pmf s))) s = ν⇩b p s"
using ν_as_markovian by (auto simp: ν⇩b.rep_eq)
subsection ‹Optimal Reward›
definition "ν_MD s ≡ ⨆p ∈ Π⇩M⇩D. ν (mk_markovian_det p) s"
definition "ν_opt s ≡ ⨆p ∈ Π⇩H⇩R. ν p s"
lemma ν_opt_bfun: "ν_opt ∈ bfun"
using abs_ν_le policies_ne
by (fastforce simp: ν_opt_def intro!: order_trans[OF cSup_abs_le] bfun_normI)
lift_definition ν⇩b_opt :: "'s ⇒⇩b real" is ν_opt
using ν_opt_bfun .
lemma ν⇩b_opt_eq: "ν⇩b_opt s = (⨆p ∈ Π⇩H⇩R. ν⇩b p s)"
using ν⇩b.rep_eq ν⇩b_opt.rep_eq ν_opt_def by presburger
lemma ν_le_ν_opt [intro]:
assumes "is_policy p"
shows "ν p s ≤ ν_opt s"
unfolding ν_opt_def using abs_ν_le assms
by (force intro: cSUP_upper intro!: bounded_imp_bdd_above boundedI)
lemma ν⇩b_le_opt [intro]: "p ∈ Π⇩H⇩R ⟹ ν⇩b p ≤ ν⇩b_opt"
using ν_le by (fastforce simp: ν⇩b.rep_eq ν⇩b_opt.rep_eq)
lemma ν⇩b_le_opt_MD [intro]: "p ∈ Π⇩M⇩D ⟹ ν⇩b (mk_markovian_det p) ≤ ν⇩b_opt"
by (auto simp: mk_markovian_det_def is_dec_det_def is_dec_def is_policy_def)
lemma ν⇩b_le_opt_DD [intro]: "is_dec_det d ⟹ ν⇩b (mk_stationary_det d) ≤ ν⇩b_opt"
by (auto simp add: is_policy_def mk_markovian_def)
lemma ν⇩b_le_opt_DR [intro]: "is_dec d ⟹ ν⇩b (mk_stationary d) ≤ ν⇩b_opt"
by (auto simp add: is_policy_def mk_markovian_def)
lemma ν⇩b_opt_eq_MR: "ν⇩b_opt s = (⨆p ∈ Π⇩M⇩R. ν⇩b (mk_markovian p) s)"
proof (rule antisym)
show "ν⇩b_opt s ≤ (⨆p∈Π⇩M⇩R. ν⇩b (mk_markovian p) s)"
unfolding ν⇩b_opt_eq
proof (rule cSUP_mono)
show "Π⇩H⇩R ≠ {}"
using policies_ne by simp
show "bdd_above ((λp. ν⇩b (mk_markovian p) s) ` Π⇩M⇩R)"
by (auto intro!: boundedI bounded_imp_bdd_above abs_ν_le simp: ν⇩b.rep_eq)
show "n ∈ Π⇩H⇩R ⟹ ∃m∈Π⇩M⇩R. ν⇩b n s ≤ ν⇩b (mk_markovian m) s" for n
using is_Π⇩M⇩R_as_markovian by (subst ν⇩b_as_markovian[symmetric]) fastforce
qed
show "(⨆p∈Π⇩M⇩R. ν⇩b (mk_markovian p) s) ≤ ν⇩b_opt s"
using Π⇩M⇩R_ne Π⇩M⇩R_imp_policies
by (auto intro!: cSUP_mono bounded_imp_bdd_above boundedI abs_ν_le simp: ν⇩b_opt_eq ν⇩b.rep_eq)
qed
lemma summable_norm_disc_reward'[simp]: "summable (λt. l^t * norm (𝒫⇩X p t (r_dec⇩b (p t))))"
using 𝒫⇩X_bounded_r by auto
lemma summable_disc_reward_𝒫⇩X [simp]: "summable (λt. l^t *⇩R 𝒫⇩X p t (r_dec⇩b (p t)))"
using summable_disc_reward 𝒫⇩X_bounded_r by blast
lemma disc_reward_tendsto:
"(λn. ∑t<n. l^t *⇩R 𝒫⇩X p t (r_dec⇩b (p t))) ⇢ (∑t. l^t *⇩R 𝒫⇩X p t (r_dec⇩b (p t)))"
by (simp add: summable_LIMSEQ)
lemma ν_eq_𝒫⇩X: "ν (mk_markovian p) = (∑i. l^i *⇩R 𝒫⇩X p i (r_dec⇩b (p i)))"
proof -
have "ν (mk_markovian p) s = (∑i. l^i * 𝒫⇩X p i (r_dec⇩b (p i)) s)" for s
unfolding ν⇩b.rep_eq 𝒫⇩X_def ν_eq_Pn Pn'_markovian_eq_Xn'_bind measure_pmf_bind
using measure_pmf_in_subprob_algebra abs_r_le_r⇩M
by (subst integral_bind) (auto simp: r_dec_eq_r_K0)
thus ?thesis
by (auto simp: suminf_apply_bfun)
qed
lemma ν⇩b_eq_𝒫⇩X: "ν⇩b (mk_markovian p) = (∑i. l^i *⇩R 𝒫⇩X p i (r_dec⇩b (p i)))"
by (auto simp: ν_eq_𝒫⇩X ν⇩b.rep_eq)
lemma ν⇩b_fin_tendsto_ν⇩b: "(ν⇩b_fin (mk_markovian p)) ⇢ ν⇩b (mk_markovian p)"
using disc_reward_tendsto ν⇩b_eq_𝒫⇩X ν⇩b_fin_eq_𝒫⇩X
by presburger
lemma norm_𝒫⇩1_l_less: "norm (l *⇩R 𝒫⇩1 d) < 1"
by auto
lemma disc_𝒫⇩1_tendsto: "(λn. (∑t≤n. l^t *⇩R 𝒫⇩1 d ^^t)) ⇢ (∑t. l^t *⇩R 𝒫⇩1 d ^^t)"
by (fastforce simp: bounded_iff intro: summable_LIMSEQ')
lemma disc_𝒫⇩1_lim: "lim (λn. (∑t≤n. l^t *⇩R 𝒫⇩1 d ^^ t)) = (∑t. l^t *⇩R 𝒫⇩1 d ^^t)"
using limI disc_𝒫⇩1_tendsto
by blast
lemma convergent_disc_𝒫⇩1: "convergent (λn. (∑t≤n. l^t *⇩R 𝒫⇩1 d ^^t))"
using convergentI disc_𝒫⇩1_tendsto
by blast
lemma 𝒫⇩1_suminf_ge:
assumes "0 ≤ u" shows "u ≤ (∑t. l^t *⇩R 𝒫⇩1 d ^^t) u"
proof -
have aux: "⋀x. (λn. (∑t≤n. l^t *⇩R 𝒫⇩1 d ^^t) u x) ⇢ (∑t. l^t *⇩R 𝒫⇩1 d ^^t) u x"
using bfun_tendsto_apply_bfun disc_𝒫⇩1_lim lim_blinfun_apply[OF convergent_disc_𝒫⇩1]
by fastforce
have "⋀n. u ≤ (∑t≤n. l^t *⇩R 𝒫⇩1 d ^^t) u"
using 𝒫⇩1_sum_ge[OF assms] by auto
thus ?thesis
by (auto intro!: LIMSEQ_le_const[OF aux])
qed
lemma 𝒫⇩1_suminf_pos:
assumes "0 ≤ u"
shows "0 ≤ (∑t. l^t *⇩R 𝒫⇩1 d ^^t) u"
using 𝒫⇩1_suminf_ge[of u] assms order.trans by auto
lemma lemma_6_1_2_b:
assumes "v ≤ u"
shows "(∑t. l^t *⇩R 𝒫⇩1 d ^^t) v ≤ (∑t. l^t *⇩R 𝒫⇩1 d ^^t) u"
proof -
have "0 ≤ (∑n. l ^ n *⇩R 𝒫⇩1 d ^^ n) (u - v)"
using 𝒫⇩1_suminf_pos assms by simp
thus ?thesis
by (simp add: blinfun.diff_right)
qed
lemma ν_stationary: "ν⇩b (mk_stationary d) = (∑t. l^t *⇩R (𝒫⇩1 d ^^ t)) (r_dec⇩b d)"
proof -
have "ν⇩b (mk_stationary d) = (∑t. (l ^ t *⇩R (𝒫⇩1 d ^^ t)) (r_dec⇩b d))"
by (simp add: ν⇩b_eq_𝒫⇩X scaleR_blinfun.rep_eq 𝒫⇩X_sconst)
also have "... = (∑t. (l ^ t *⇩R (𝒫⇩1 d ^^ t))) (r_dec⇩b d)"
by (subst bounded_linear.suminf[where f = "λx. blinfun_apply x (r_dec⇩b d)"])
(auto intro!: bounded_linear.suminf boundedI)
finally show ?thesis .
qed
lemma ν_stationary_inv: "ν⇩b (mk_stationary d) = inv⇩L (id_blinfun - l *⇩R 𝒫⇩1 d) (r_dec⇩b d)"
by (auto simp: ν_stationary inv⇩L_inf_sum blincomp_scaleR_right)
text ‹The value of a markovian policy can be expressed in terms of @{const L}.›
lemma ν_step: "ν⇩b (mk_markovian p) = L (p 0) (ν⇩b (mk_markovian (λn. p (Suc n))))"
proof -
have s: "summable (λt. l^t *⇩R (𝒫⇩X p (Suc t) (r_dec⇩b (p (Suc t)))))"
using 𝒫⇩X_bound_r by (auto intro!: boundedI[of _ r⇩M])
have
"ν⇩b (mk_markovian p) = r_dec⇩b (p 0) + (∑t. l ^ (Suc t) *⇩R 𝒫⇩X p (Suc t) (r_dec⇩b (p (Suc t))))"
by (subst suminf_split_head) (auto simp: ν⇩b_eq_𝒫⇩X)
also have
"… = r_dec⇩b (p 0) + l *⇩R (∑t. 𝒫⇩1 (p 0) (l^t *⇩R 𝒫⇩X (λn. p (Suc n)) t (r_dec⇩b (p (Suc t)))))"
using suminf_scaleR_right[OF s] by (auto simp: 𝒫⇩X_Suc blinfun.scaleR_right)
also have
"… = L (p 0) (ν⇩b (mk_markovian (λn. p (Suc n))))"
using blinfun.bounded_linear_right bounded_linear.suminf[of "blinfun_apply (𝒫⇩1 (p 0))"]
by (fastforce simp add: ν⇩b_eq_𝒫⇩X L_def)
finally show ?thesis .
qed
lemma L_ν_fix: "ν⇩b (mk_stationary d) = L d (ν⇩b (mk_stationary d))"
using ν_step .
lemma L_fix_ν:
assumes "L p v = v"
shows "v = ν⇩b (mk_stationary p)"
proof -
have "r_dec⇩b p = (id_blinfun - l *⇩R 𝒫⇩1 p) v"
using assms by (auto simp: eq_diff_eq L_def blinfun.diff_left blinfun.scaleR_left)
hence "v = (∑t. (l *⇩R 𝒫⇩1 p)^^t) (r_dec⇩b p)"
using inv_norm_le'(2)[OF norm_𝒫⇩1_l_less] by auto
thus "v = ν⇩b (mk_stationary p)"
by (auto simp: ν_stationary blincomp_scaleR_right)
qed
lemma L_ν_fix_iff: "L d v = v ⟷ v = ν⇩b (mk_stationary d)"
using L_fix_ν L_ν_fix by auto
subsection ‹Properties of Solutions of the Optimality Equations›
abbreviation "𝒫⇩d p n v ≡ l^n *⇩R 𝒫⇩X p n v"
lemma 𝒫⇩d_lim: "(λn. (𝒫⇩d p n v)) ⇢ 0"
proof -
have "(λn. l^n * norm v) ⇢ 0"
by (auto intro!: tendsto_eq_intros)
moreover have "norm (𝒫⇩d p n v) ≤ l^n * norm v" for p n
by (simp add: mult_mono')
ultimately have "(λn. norm (𝒫⇩d p n v)) ⇢ 0" for p
by (auto simp: Lim_transform_bound[where g = "λn. (l^n * norm v)"])
thus "(λn. (𝒫⇩d p n v)) ⇢ 0" for p
using tendsto_norm_zero_cancel by fast
qed
lemma ℒ_dec_ge_opt:
assumes "ℒ⇩b v ≤ v"
shows "ν⇩b_opt ≤ v"
proof -
have "ν⇩b (mk_markovian p) ≤ v" if "p ∈ Π⇩M⇩R" for p
proof -
let ?p = "mk_markovian p"
have aux: "ν⇩b_fin ?p n + l^n *⇩R 𝒫⇩X p n v ≤ v" for n
proof (induction n)
case (Suc n)
have "𝒫⇩X p n (r_dec⇩b (p n)) + l *⇩R (𝒫⇩X p (Suc n) v) ≤ 𝒫⇩X p n v"
using 𝒫⇩X_L_le assms that by (simp add: 𝒫⇩X_Suc_n_elem L_def linear_simps)
hence "ν⇩b_fin ?p (n + 1) + l^(n + 1) *⇩R (𝒫⇩X p (n + 1) v) ≤ ν⇩b_fin ?p n + l^n *⇩R (𝒫⇩X p n v)"
by (auto simp del: scaleR_scaleR intro: scaleR_left_mono simp: ν⇩b_fin_eq_𝒫⇩X
mult.commute[of l] scaleR_add_right[symmetric] scaleR_scaleR[symmetric])
also have "… ≤ v"
using Suc.IH by (auto simp: ν⇩b_fin_eq_𝒫⇩X)
finally show ?case
by auto
qed (auto simp: ν⇩b_fin_eq_𝒫⇩X)
have 1: "(λn. (ν⇩b_fin ?p n + 𝒫⇩d p n v) s) ⇢ ν⇩b ?p s" for s
using bfun_tendsto_apply_bfun Limits.tendsto_add[OF ν⇩b_fin_tendsto_ν⇩b 𝒫⇩d_lim] by fastforce
have "ν⇩b ?p s ≤ v s" for s
using that aux assms by (fastforce intro!: lim_mono[OF _ 1, of _ _ "λn. v s"])
thus ?thesis
using that by blast
qed
thus ?thesis
using policies_ne by (fastforce simp: is_policy_def ν⇩b_opt_eq_MR intro!: cSUP_least)
qed
lemma ℒ_inc_le_opt:
assumes "v ≤ ℒ⇩b v"
shows "v ≤ ν⇩b_opt"
proof -
have le_elem: "v s ≤ ν⇩b_opt s + (e/(1-l))" if "e > 0" for s e
proof -
obtain d where "d ∈ D⇩R" and hd: "v ≤ L d v + e *⇩R 1"
using assms step_mono_elem ‹e > 0› by blast
let ?Pinf = "(∑i. l^i *⇩R 𝒫⇩1 d^^i)"
have "v ≤ r_dec⇩b d + l *⇩R (𝒫⇩1 d) v + e *⇩R 1"
using hd L_def by fastforce
hence "(id_blinfun - l *⇩R 𝒫⇩1 d) v ≤ r_dec⇩b d + e *⇩R 1"
by (auto simp: blinfun.diff_left blinfun.scaleR_left algebra_simps)
hence "?Pinf ((id_blinfun - l *⇩R 𝒫⇩1 d) v) ≤ ?Pinf (r_dec⇩b d + e *⇩R 1)"
using lemma_6_1_2_b 𝒫⇩1_def hd by auto
hence "v ≤ ?Pinf (r_dec⇩b d + e *⇩R 1)"
using inv_norm_le'(2)[of "l *⇩R 𝒫⇩1 d"] by (auto simp: blincomp_scaleR_right)
also have "… = ν⇩b (mk_stationary d) + e *⇩R ?Pinf 1"
by (simp add: ν_stationary blinfun.add_right blinfun.scaleR_right)
also have "… = ν⇩b (mk_stationary d) + e *⇩R (∑i. (l^i *⇩R ((𝒫⇩1 d^^i))) 1)"
using convergent_disc_𝒫⇩1
by (auto simp: summable_iff_convergent' bounded_linear.suminf[of "λx. blinfun_apply x 1"])
also have "… = ν⇩b (mk_stationary d) + e *⇩R (∑i. (l^i *⇩R 1))"
by (auto simp: scaleR_blinfun.rep_eq)
also have "… ≤ (ν⇩b (mk_stationary d) + (e / (1-l)) *⇩R 1)"
by (auto simp: bounded_linear.suminf[symmetric, where f = "λx. x *⇩R 1"]
suminf_geometric bounded_linear_scaleR_left summable_geometric)
finally have "v s ≤ (ν⇩b (mk_stationary d) + (e/(1-l)) *⇩R 1) s"
by auto
thus "v s ≤ ν⇩b_opt s + (e/(1-l))"
using ‹d ∈ D⇩R› ν⇩b_le_opt
by (auto simp: is_policy_def mk_markovian_def less_eq_bfun_def intro: order_trans)
qed
have "v s ≤ ν⇩b_opt s + e" if "e > 0" for s e
proof -
have "e * (1 - l) > 0"
by (simp add: ‹0 < e›)
thus "v s ≤ ν⇩b_opt s + e"
using disc_lt_one that le_elem by (fastforce split: if_splits)
qed
thus ?thesis
by (fastforce intro: field_le_epsilon)
qed
lemma ℒ_fix_imp_opt:
assumes "v = ℒ⇩b v"
shows "v = ν⇩b_opt"
using assms dual_order.antisym[OF ℒ_dec_ge_opt ℒ_inc_le_opt] by auto
lemma bounded_P: "bounded (𝒫⇩1 ` X)"
by (auto simp: bounded_iff)
subsection ‹Solutions to the Optimality Equation›
subsubsection ‹@{const ℒ⇩b} and @{const L} are Contraction Mappings›
declare bounded_apply_blinfun[intro] bounded_apply_bfun'[intro]
lemma contraction_ℒ: "dist (ℒ⇩b v) (ℒ⇩b u) ≤ l * dist v u"
proof -
have "dist (ℒ⇩b v s) (ℒ⇩b u s) ≤ l * dist v u" if "ℒ⇩b u s ≤ ℒ⇩b v s" for s v u
proof -
have "dist (ℒ⇩b v s) (ℒ⇩b u s) ≤ (⨆d ∈ D⇩R. L d v s - L d u s)"
using ex_dec that by (fastforce intro!: le_SUP_diff' simp: dist_real_def ℒ⇩b.rep_eq ℒ_def)
also have "… = (⨆d ∈ D⇩R. l * (𝒫⇩1 d (v - u) s))"
by (auto simp: L_def right_diff_distrib blinfun.diff_right)
also have "… = l * (⨆d ∈ D⇩R. 𝒫⇩1 d (v - u) s)"
using D⇩R_ne bounded_P by (fastforce intro: bounded_SUP_mul)
also have "… ≤ l * norm (⨆d ∈ D⇩R. 𝒫⇩1 d (v - u) s)"
by (simp add: mult_left_mono)
also have "… ≤ l * (⨆d ∈ D⇩R. norm ((𝒫⇩1 d (v - u)) s))"
proof -
have "bounded ((λx. norm ((𝒫⇩1 x (v - u)) s)) ` D⇩R)"
using bounded_apply_bfun' bounded_P bounded_apply_blinfun bounded_norm_comp by metis
thus ?thesis
using D⇩R_ne ex_dec bounded_norm_comp by (fastforce intro!: mult_left_mono)
qed
also have "… ≤ l * (⨆p ∈ D⇩R. norm (𝒫⇩1 p ((v - u))))"
using D⇩R_ne abs_le_norm_bfun bounded_P
by (fastforce simp: bounded_norm_comp intro!: bounded_imp_bdd_above mult_left_mono cSUP_mono)
also have "… ≤ l * (⨆p ∈ D⇩R. norm ((v - u)))"
using norm_push_exp_le_norm D⇩R_ne
by (fastforce simp: 𝒫⇩1.rep_eq intro!: mult_left_mono cSUP_mono)
also have "… = l * dist v u"
by (auto simp: dist_norm)
finally show ?thesis .
qed
hence "ℒ⇩b u s ≤ ℒ⇩b v s ⟹ dist (ℒ⇩b v s) (ℒ⇩b u s) ≤ l * dist v u"
"ℒ⇩b v s ≤ ℒ⇩b u s ⟹ dist (ℒ⇩b v s) (ℒ⇩b u s) ≤ l * dist v u" for u v s
by (fastforce simp: dist_commute)+
thus ?thesis
using linear[of "ℒ⇩b u _"] by (fastforce intro: dist_bound)
qed
lemma is_contraction_ℒ: "is_contraction ℒ⇩b"
using contraction_ℒ zero_le_disc disc_lt_one unfolding is_contraction_def by blast
lemma contraction_L: "dist (L p v) (L p u) ≤ l * dist v u"
proof -
have aux: "L p v s - L p u s ≤ l * dist v u" if lea: "L p v s ≥ L p u s" for v s u
proof -
have "L p v s - L p u s = (l *⇩R (𝒫⇩1 p v - 𝒫⇩1 p u)) s"
by (simp add: L_def scale_right_diff_distrib)
also have "… ≤ l * norm (𝒫⇩1 p (v - u) s)"
by (auto simp: blinfun.diff_right intro!: mult_left_mono)
also have "… ≤ l * norm (𝒫⇩1 p (v - u))"
using abs_le_norm_bfun by (auto intro!: mult_left_mono)
also have "… ≤ l * dist v u"
by (simp add: 𝒫⇩1.rep_eq mult_left_mono norm_push_exp_le_norm dist_norm)
finally show ?thesis
by auto
qed
have "dist (L p v s) (L p u s) ≤ l * dist v u" for v s u
using aux[of v _ u] aux[of u _ v]
by (cases "L p v s ≥ L p u s") (auto simp: dist_real_def dist_commute)
thus "dist (L p v) (L p u) ≤ l * dist v u"
by (simp add: dist_bound)
qed
lemma is_contraction_L: "is_contraction (L p)"
unfolding is_contraction_def using contraction_L disc_lt_one zero_le_disc by blast
subsubsection ‹Existence of a Fixpoint of @{const ℒ⇩b}›
lemma ℒ⇩b_conv:
"∃!v. ℒ⇩b v = v" "(λn. (ℒ⇩b ^^ n) v) ⇢ (THE v. ℒ⇩b v = v)"
using banach'[OF is_contraction_ℒ] by auto
lemma ℒ⇩b_fix_iff_opt [simp]: "ℒ⇩b v = v ⟷ v = ν⇩b_opt"
using banach'(1) is_contraction_ℒ ℒ_fix_imp_opt by metis
lemma ν⇩b_opt_fix: "ν⇩b_opt = (THE v. ℒ⇩b v = v)"
by auto
lemma ℒ⇩b_opt [simp]: "ℒ⇩b ν⇩b_opt = ν⇩b_opt"
by auto
lemma ℒ⇩b_lim: "(λn. (ℒ⇩b ^^ n) v) ⇢ ν⇩b_opt"
using ℒ⇩b_conv(2) ν⇩b_opt_fix by presburger
lemma thm_6_2_6: "ν⇩b p = ν⇩b_opt ⟷ ℒ⇩b (ν⇩b p) = ν⇩b p"
by force
lemma thm_6_2_6': "ν p = ν_opt ⟷ ℒ⇩b (ν⇩b p) = ν⇩b p"
using thm_6_2_6 ν⇩b.rep_eq ν⇩b_opt.rep_eq by fastforce
subsection ‹Existence of Optimal Policies›
definition "ν_improving v d ⟷ (∀s. is_arg_max (λd. (L d v) s) (λd. d ∈ D⇩R) d)"
lemma ν_improving_iff: "ν_improving v d ⟷ d ∈ D⇩R ∧ (∀d' ∈ D⇩R. ∀s. L d' v s ≤ L d v s)"
by (auto simp: ν_improving_def is_arg_max_linorder)
lemma ν_improving_D_MR[dest]: "ν_improving v d ⟹ d ∈ D⇩R"
by (auto simp add: ν_improving_iff)
lemma ν_improving_ge: "ν_improving v d ⟹ d' ∈ D⇩R ⟹ L d' v s ≤ L d v s"
by (auto simp: ν_improving_iff)
lemma ν_improving_imp_ℒ⇩b: "ν_improving v d ⟹ ℒ⇩b v = L d v"
by (fastforce intro!: cSup_eq_maximum simp: ν_improving_iff ℒ⇩b.rep_eq ℒ_def)
lemma ℒ⇩b_imp_ν_improving:
assumes "d ∈ D⇩R" "ℒ⇩b v = L d v"
shows "ν_improving v d"
using assms L_le_ℒ⇩b by (auto simp: ν_improving_iff assms(2)[symmetric])
lemma ν_improving_alt:
assumes "d ∈ D⇩R"
shows "ν_improving v d ⟷ ℒ⇩b v = L d v"
using ℒ⇩b_imp_ν_improving ν_improving_imp_ℒ⇩b assms by blast
definition "ν_conserving d = ν_improving (ν⇩b_opt) d"
lemma ν_conserving_iff: "ν_conserving d ⟷ d ∈ D⇩R ∧ (∀d' ∈ D⇩R. ∀s. L d' ν⇩b_opt s ≤ L d ν⇩b_opt s)"
by (auto simp: ν_conserving_def ν_improving_iff)
lemma ν_conserving_ge: "ν_conserving d ⟹ d' ∈ D⇩R ⟹ L d' ν⇩b_opt s ≤ L d ν⇩b_opt s"
by (auto simp: ν_conserving_iff intro: ν_improving_ge)
lemma ν_conserving_imp_ℒ⇩b [simp]: "ν_conserving d ⟹ L d ν⇩b_opt = ν⇩b_opt"
using ν_improving_imp_ℒ⇩b by (fastforce simp: ν_conserving_def)
lemma ℒ⇩b_imp_ν_conserving:
assumes "d ∈ D⇩R" "ℒ⇩b ν⇩b_opt = L d ν⇩b_opt"
shows "ν_conserving d"
using ℒ⇩b_imp_ν_improving assms by (auto simp: ν_conserving_def)
lemma ν_conserving_alt:
assumes "d ∈ D⇩R"
shows "ν_conserving d ⟷ ℒ⇩b ν⇩b_opt = L d ν⇩b_opt"
unfolding ν_conserving_def using ν_improving_alt assms by auto
lemma ν_conserving_alt':
assumes "d ∈ D⇩R"
shows "ν_conserving d ⟷ L d ν⇩b_opt = ν⇩b_opt"
using assms ν_conserving_alt by auto
subsubsection ‹Conserving Decision Rules are Optimal›
theorem ex_improving_imp_conserving:
assumes "⋀v. ∃d. ν_improving v (mk_dec_det d)"
shows "∃d. ν_conserving (mk_dec_det d)"
by (simp add: assms ν_conserving_def)
theorem conserving_imp_opt[simp]:
assumes "ν_conserving (mk_dec_det d)"
shows "ν⇩b (mk_stationary_det d) = ν⇩b_opt"
using L_ν_fix_iff ν_conserving_imp_ℒ⇩b[OF assms] by simp
lemma conserving_imp_opt':
assumes "∃d. ν_conserving (mk_dec_det d)"
shows "∃d ∈ D⇩D. (ν⇩b (mk_stationary_det d)) = ν⇩b_opt"
using assms by (fastforce simp: ν_conserving_def)
theorem improving_att_imp_det_opt:
assumes "⋀v. ∃d. ν_improving v (mk_dec_det d)"
shows "ν⇩b_opt s = (⨆d ∈ D⇩D. ν⇩b (mk_stationary_det d) s)"
proof -
obtain d where d: "ν_conserving (mk_dec_det d)"
using assms ex_improving_imp_conserving by auto
hence "d ∈ D⇩D"
using ν_conserving_iff is_dec_mk_dec_det_iff by blast
thus ?thesis
using Π⇩M⇩R_imp_policies ν⇩b_le_opt
by (fastforce intro!: cSup_eq_maximum[where z = "ν⇩b_opt s", symmetric]
simp: conserving_imp_opt[OF d] image_iff)
qed
lemma ℒ⇩b_sup_att_dec:
assumes "d ∈ D⇩R" "ℒ⇩b v = L d v"
shows "∃d' ∈ D⇩D. ℒ⇩b v = L (mk_dec_det d') v"
proof -
have "∃a∈ A s. L d v s = L⇩a a v s" for s
unfolding L_eq_L⇩a
using assms is_dec_def L⇩a_bounded A_ne ℒ⇩b.rep_eq ℒ_def
by (intro lemma_4_3_1')
(auto intro: bounded_range_subset simp: assms(2)[symmetric] L_eq_L⇩a[symmetric] SUP_step_MR_eq)
then obtain d' where d: "d' s ∈ A s" "L d v s = L⇩a (d' s) v s" for s
by metis
thus ?thesis
using assms d
by (fastforce simp: is_dec_det_def mk_dec_det_def L_eq_L⇩a)
qed
lemma ℒ⇩b_sup_att_dec':
assumes "d ∈ D⇩R" "ℒ⇩b v = L d v"
shows "∃d' ∈ D⇩D. ν_improving v (mk_dec_det d')"
using ℒ⇩b_sup_att_dec ν_improving_alt assms by force
subsubsection ‹Deterministic Decision Rules are Optimal›
lemma opt_imp_opt_dec_det:
assumes "p ∈ Π⇩H⇩R" "ν⇩b p = ν⇩b_opt"
shows "∃d ∈ D⇩D. ν⇩b (mk_stationary_det d) = ν⇩b_opt"
proof -
have aux: "L (as_markovian p (return_pmf s) 0) ν⇩b_opt s = ν⇩b_opt s" for s
proof -
let ?ps = "as_markovian p (return_pmf s)"
have markovian_suc_le: "ν⇩b (mk_markovian (λn. as_markovian p (return_pmf s) (Suc n))) ≤ ν⇩b_opt"
using is_Π⇩M⇩R_as_markovian assms by (auto simp: is_policy_def mk_markovian_def)
have aux_le: "⋀x f g. f ≤ g ⟹ apply_bfun f x ≤ apply_bfun g x"
unfolding less_eq_bfun_def by auto
have "ν⇩b_opt s = ν⇩b (mk_markovian ?ps) s"
using assms ν⇩b_as_markovian by metis
also have "… = L (?ps 0) (ν⇩b (mk_markovian (λn. ?ps (Suc n)))) s"
using ν_step by blast
also have "… ≤ L (?ps 0) (ν⇩b_opt) s"
unfolding L_def using markovian_suc_le 𝒫⇩1_mono by (auto intro!: mult_left_mono)
finally have "ν⇩b_opt s ≤ L (?ps 0) (ν⇩b_opt) s" .
have "as_markovian p (return_pmf s) 0 ∈ D⇩R"
using is_Π⇩M⇩R_as_markovian assms by fast
have "L (?ps 0) ν⇩b_opt ≤ ν⇩b_opt"
using ‹?ps 0 ∈ D⇩R› L_le_ℒ⇩b[of "?ps 0" "ν⇩b_opt"] by simp
thus "L (?ps 0) ν⇩b_opt s = ν⇩b_opt s"
using ‹ν⇩b_opt s ≤ (L (?ps 0) ν⇩b_opt) s› by (auto intro!: antisym)
qed
have "L (p []) v s = L (as_markovian p (return_pmf s) 0) v s" for v s
by (auto simp: L_def 𝒫⇩1.rep_eq K_st_def)
hence "L (p []) ν⇩b_opt = ν⇩b_opt"
using aux by auto
hence "∃d ∈ D⇩D. L (mk_dec_det d) ν⇩b_opt = ν⇩b_opt"
using ℒ⇩b_sup_att_dec assms(1) ℒ⇩b_opt is_policy_def mem_Collect_eq by metis
thus ?thesis
using conserving_imp_opt' ν_conserving_alt' by blast
qed
subsubsection ‹Optimal Decision Rules for Finite Action Spaces›
lemma ex_opt_act:
assumes "⋀s. finite (A s)"
shows "∃a ∈ A s. L⇩a a (v :: _ ⇒⇩b _) s = ℒ⇩b v s"
unfolding ℒ⇩b.rep_eq ℒ_eq_SUP_det SUP_step_det_eq
using arg_max_on_in[OF assms A_ne]
by (auto simp: cSup_eq_Sup_fin Sup_fin_Max assms A_ne finite_arg_max_eq_Max[symmetric])
lemma ex_opt_dec_det:
assumes "⋀s. finite (A s)"
shows "∃d ∈ D⇩D. L (mk_dec_det d) (v :: _ ⇒⇩b _) = ℒ⇩b v"
unfolding is_dec_det_def mk_dec_det_def
using ex_opt_act[OF assms] someI_ex
apply (auto intro!: exI[of _ ‹λs. SOME a. a ∈ A s ∧ L⇩a a v s = ℒ⇩b v s›] bfun_eqI)
apply (smt (verit, best) someI_ex)
apply (subst L_eq_L⇩a)
apply (subst expectation_return_pmf)
by (smt (verit, best) someI_ex)
lemma thm_6_2_10:
assumes "⋀s. finite (A s)"
shows "∃d ∈ D⇩D. ν⇩b_opt = ν⇩b (mk_stationary_det d)"
using assms conserving_imp_opt' ℒ⇩b_opt L_ν_fix_iff ex_opt_dec_det
by metis
subsubsection ‹Existence of Epsilon-Optimal Policies›
lemma ex_det_eps:
assumes "0 < e"
shows "∃d ∈ D⇩D. ℒ⇩b v ≤ L (mk_dec_det d) v + e *⇩R 1"
proof -
have "∃a ∈ A s. ℒ⇩b v s ≤ L⇩a a v s + e" for s
proof -
have "bdd_above ((λa. L⇩a a v s) ` A s)"
using L⇩a_le by (auto intro!: boundedI bounded_imp_bdd_above)
hence "∃a ∈ A s. ℒ⇩b v s - e < L⇩a a v s"
unfolding ℒ⇩b.rep_eq ℒ_eq_SUP_det SUP_step_det_eq
by (auto simp: less_cSUP_iff[OF A_ne, symmetric] ‹0 < e›)
thus "∃a ∈ A s. ℒ⇩b v s ≤ L⇩a a v s + e"
by force
qed
thus ?thesis
unfolding mk_dec_det_def is_dec_det_def
by (auto simp: L_def 𝒫⇩1.rep_eq bind_return_pmf K_st_def less_eq_bfun_def) metis
qed
lemma thm_6_2_11:
assumes "eps > 0"
shows "∃d ∈ D⇩D. ν⇩b_opt ≤ ν⇩b (mk_stationary_det d) + eps *⇩R 1"
proof -
have "(1-l) * eps > 0"
by (simp add: assms)
then obtain d where "d ∈ D⇩D" and d: "ℒ⇩b ν⇩b_opt ≤ L (mk_dec_det d) ν⇩b_opt + ((1-l)*eps) *⇩R 1"
using ex_det_eps[of _ ν⇩b_opt] by auto
let ?d = "mk_dec_det d"
let ?lK = "l *⇩R 𝒫⇩1 ?d"
let ?lK_opt = "l *⇩R 𝒫⇩1 ?d ν⇩b_opt"
have "ν⇩b_opt ≤ r_dec⇩b ?d + ?lK_opt + ((1-l)*eps) *⇩R 1"
using L_def ℒ_fix_imp_opt d by simp
hence "ν⇩b_opt - ?lK_opt - ((1-l)*eps) *⇩R 1 ≤ r_dec⇩b ?d"
by (simp add: cancel_ab_semigroup_add_class.diff_right_commute diff_le_eq)
hence "(∑i. ?lK ^^ i) (ν⇩b_opt - ?lK_opt - ((1-l)*eps) *⇩R 1) ≤ ν⇩b (mk_stationary ?d)"
using lemma_6_1_2_b suminf_cong by (simp add: blincomp_scaleR_right ν_stationary)
hence "((∑i. ?lK ^^ i) o⇩L (id_blinfun - ?lK)) ν⇩b_opt - (∑i. ?lK ^^ i) (((1-l)*eps) *⇩R 1)
≤ (ν⇩b (mk_stationary ?d))"
by (simp add: blinfun.diff_right blinfun.diff_left blinfun.scaleR_left)
hence le: "ν⇩b_opt - (∑i. ?lK ^^ i) (((1-l)*eps) *⇩R 1) ≤ ν⇩b (mk_stationary ?d)"
by (auto simp: inv_norm_le')
have s: "summable (λi. (l *⇩R 𝒫⇩1 ?d)^^i)"
using convergent_disc_𝒫⇩1 summable_iff_convergent'
by (simp add: blincomp_scaleR_right summable_iff_convergent')
have "(∑i. ?lK ^^ i) (((1-l)*eps) *⇩R 1) = eps *⇩R 1"
proof -
have "(∑i. ?lK ^^ i) (((1-l)*eps) *⇩R 1) = ((1-l)*eps) *⇩R (∑i. ?lK^^i) 1"
using blinfun.scaleR_right by blast
also have "… = ((1-l)*eps) *⇩R (∑i. (?lK^^i) 1) "
using s by (auto simp: bounded_linear.suminf[of "λx. blinfun_apply x 1"])
also have "… = ((1-l)*eps) *⇩R (∑i. (l ^ i)) *⇩R 1"
by (auto simp: blinfun.scaleR_left blincomp_scaleR_right bounded_linear_scaleR_left
bounded_linear.suminf[of "λx. x *⇩R 1"])
also have "… = ((1-l)*eps) *⇩R (1 / (1-l)) *⇩R 1"
by (simp add: suminf_geometric)
also have "… = eps *⇩R 1"
using disc_lt_one ‹0 < (1 - l) * eps› by auto
finally show ?thesis .
qed
thus ?thesis
using ‹d ∈ D⇩D› diff_le_eq le
by auto
qed
lemma ex_det_dist_eps:
assumes "0 < (e :: real)"
shows "∃d ∈ D⇩D. dist (ℒ⇩b v) (L (mk_dec_det d) v) ≤ e"
proof -
obtain d where "d ∈ D⇩D" "L (mk_dec_det d) v ≤ (ℒ⇩b v)"
and h2: "ℒ⇩b v ≤ L (mk_dec_det d) v + e *⇩R 1"
using assms ex_det_eps L_le_ℒ⇩b by blast
hence "0 ≤ ℒ⇩b v - L (mk_dec_det d) v"
by simp
moreover have "ℒ⇩b v - L (mk_dec_det d) v ≤ e *⇩R 1"
using h2 by (simp add: add.commute diff_le_eq)
ultimately have "∀s. ¦(ℒ⇩b v) s - L (mk_dec_det d) v s¦ ≤ e"
unfolding less_eq_bfun_def by auto
hence "dist (ℒ⇩b v) (L (mk_dec_det d) v) ≤ e"
unfolding dist_bfun.rep_eq by (auto intro!: cSUP_least simp: dist_real_def)
thus ?thesis
using ‹d ∈ D⇩D›
by auto
qed
lemma less_imp_ex_add_le: "(x :: real) < y ⟹ ∃eps>0. x + eps ≤ y"
by (meson field_le_epsilon less_le_not_le nle_le)
lemma ν⇩b_opt_le_det: "ν⇩b_opt s ≤ (⨆d ∈ D⇩D. ν⇩b (mk_stationary_det d) s)"
proof (subst le_cSUP_iff, safe)
fix y
assume "y < ν⇩b_opt s"
then obtain eps where 1: "y ≤ ν⇩b_opt s - eps" and "eps > 0"
using less_imp_ex_add_le by force
hence "eps / 2 > 0" by auto
obtain d where "d ∈ D⇩D" and "ν⇩b_opt s ≤ ν⇩b (mk_stationary_det d) s + eps / 2"
using thm_6_2_11[OF ‹eps / 2 > 0›] by fastforce
hence "y < ν⇩b (mk_stationary_det d) s"
using ‹eps > 0› by (auto simp: diff_less_eq intro: le_less_trans[OF 1])
thus "∃i∈D⇩D. y < ν⇩b (mk_stationary_det i) s"
using ‹d ∈ D⇩D› by blast
next
show "D⇩D = {} ⟹ False"
using D_det_ne by blast
show "bdd_above ((λd. ν⇩b (mk_stationary_det d) s) ` D⇩D)"
by (auto intro!: bounded_imp_bdd_above boundedI abs_ν_le simp: ν⇩b.rep_eq)
qed
lemma ν⇩b_opt_eq_det: "ν⇩b_opt s = (⨆d ∈ D⇩D. ν⇩b (mk_stationary_det d) s)"
using ν⇩b_le_opt_DD D_det_ne
by (fastforce intro!: antisym[OF ν⇩b_opt_le_det] cSUP_least)
lemma lemma_6_3_1_a:
assumes "v0 ∈ bfun"
shows "uniform_limit UNIV (λn. ((λv. ℒ (Bfun v)) ^^ n) v0) ν_opt sequentially"
proof -
have ℒ_Bfun_eq: "v0 ∈ bfun ⟹ ((λv. ℒ (Bfun v))^^n) v0 = (ℒ⇩b ^^n) (Bfun v0)" for n
by (induction n) (auto simp: ℒ⇩b.rep_eq apply_bfun_inverse)
have "uniform_limit UNIV (λn. (ℒ⇩b ^^ n) (Bfun v0)) ν⇩b_opt sequentially"
by (intro tendsto_bfun_uniform_limit[OF ℒ⇩b_lim])
hence "uniform_limit UNIV (λn. (ℒ⇩b ^^ n) (Bfun v0)) ν_opt sequentially"
by (simp add: ν_opt_bfun ν⇩b_opt.rep_eq)
thus ?thesis
by (auto simp: assms ℒ_Bfun_eq)
qed
lemma dist_Suc_tendsto_zero:
assumes "(λn. f n) ⇢ (y::_::real_normed_vector)"
shows "(λn. dist (f n) (f (Suc n))) ⇢ 0"
using assms tendsto_diff tendsto_norm LIMSEQ_Suc by (fastforce simp: dist_norm)
lemma dist_ℒ⇩b_tendsto: "(λn. dist ((ℒ⇩b^^n) v) ((ℒ⇩b^^(Suc n)) v)) ⇢ 0"
using ℒ⇩b_lim by (fast intro!: dist_Suc_tendsto_zero)
definition "max_L_ex s v ≡ has_arg_max (λa. L⇩a a v s) (A s)"
lemma ν⇩b_fin_zero[simp]: "ν⇩b_fin p 0 = 0"
by (auto simp: ν⇩b_fin.rep_eq)
lemma ν⇩b_fin_Suc[simp]:
"ν⇩b_fin (mk_stationary d) (Suc n) = ν⇩b_fin (mk_stationary d) n + ((l *⇩R 𝒫⇩1 d)^^ n) (r_dec⇩b d)"
by (auto simp: 𝒫⇩X_sconst ν⇩b_fin.rep_eq ν_fin_eq_𝒫⇩X blincomp_scaleR_right blinfun.scaleR_left)
lemma ν⇩b_fin_eq: "ν⇩b_fin (mk_stationary d) n = (∑i < n. ((l *⇩R 𝒫⇩1 d)^^ i)) (r_dec⇩b d)"
by (induction n) (auto simp add: plus_blinfun.rep_eq)
lemma L_iter: "(L d ^^ m) v = ν⇩b_fin (mk_stationary d) m + ((l *⇩R 𝒫⇩1 d)^^ m) v"
proof (induction m arbitrary: v)
case (Suc m)
have "(L d ^^ Suc m) v = (L d ^^ m) (L d v)"
by (simp add: funpow_Suc_right del: funpow.simps)
also have "… = ν⇩b_fin (mk_stationary d) m + ((l *⇩R 𝒫⇩1 d) ^^ m) (L d v)"
using Suc by simp
also have "… = ν⇩b_fin (mk_stationary d) (Suc m) + ((l *⇩R 𝒫⇩1 d) ^^ Suc m) v"
unfolding L_def
by (auto simp: 𝒫⇩1_pow blinfun.bilinear_simps blincomp_scaleR_right funpow_swap1)
finally show ?case .
qed simp
lemma bounded_stationary_ν⇩b_fin: "bounded ((λx. (ν⇩b_fin (mk_stationary x) N) s) ` X)"
using ν⇩b_fin.rep_eq abs_ν_fin_le by (auto intro!: boundedI)
lemma bounded_disc_𝒫⇩1: "bounded ((λx. (((l *⇩R 𝒫⇩1 x) ^^ m) v) s) ` X)"
by (auto simp: 𝒫⇩X_const[symmetric] blinfun.bilinear_simps blincomp_scaleR_right
intro!: boundedI[of _ "l ^ m * norm v"] mult_left_mono order.trans[OF abs_le_norm_bfun])
lemma bounded_disc_𝒫⇩1': "bounded ((λx. ((𝒫⇩1 x ^^ m) v) s) ` X)"
by (auto simp: 𝒫⇩X_const[symmetric] intro!: boundedI[of _ "norm v"] order.trans[OF abs_le_norm_bfun])
lemma L_iter_le_ℒ⇩b: "is_dec d ⟹ (L d ^^ n) v ≤ (ℒ⇩b ^^ n) v"
using order_trans[OF L_mono L_le_ℒ⇩b] by (induction n) auto
end
subsection ‹More Restrictive MDP Locales›
locale MDP_fin_acts = discrete_MDP +
assumes "⋀s. finite (A s)"
locale MDP_att_ℒ = MDP_reward_disc A K r l
for
A and
K :: "'s ::countable × 'a ::countable ⇒ 's pmf" and
r and l +
assumes Sup_att: "max_L_ex (s :: 's) v"
begin
theorem ℒ⇩b_eq_argmax_L⇩a:
fixes v :: "'s ⇒⇩b real"
assumes "is_arg_max (λa. L⇩a a v s) (λa. a ∈ A s) a"
shows "ℒ⇩b v s = L⇩a a v s"
using L⇩a_le assms A_ne ℒ⇩b.rep_eq ℒ_eq_SUP_det SUP_step_det_eq
by (auto intro!: cSUP_upper2 antisym cSUP_least simp: is_arg_max_linorder)
lemma L⇩a_le_arg_max: "a ∈ A s ⟹ L⇩a a v s ≤ L⇩a (arg_max_on (λa. L⇩a a v s) (A s)) v s"
using Sup_att app_arg_max_ge[OF Sup_att[unfolded max_L_ex_def]]
by (simp add: arg_max_on_def)
lemma arg_max_on_in: "has_arg_max f Q ⟹ arg_max_on f Q ∈ Q"
using has_arg_max_arg_max by (auto simp: arg_max_on_def)
lemma ℒ⇩b_eq_L⇩a_max: "ℒ⇩b v s = L⇩a (arg_max_on (λa. L⇩a a v s) (A s)) v s"
using app_arg_max_eq_SUP[symmetric] Sup_att max_L_ex_def
by (auto simp: ℒ⇩b_eq_SUP_det SUP_step_det_eq)
lemma ex_opt_det: "∃d ∈ D⇩D. ℒ⇩b v = L (mk_dec_det d) v"
proof -
define d where "d = (λs. arg_max_on (λa. L⇩a a v s) (A s))"
have "ℒ⇩b v s = L (mk_dec_det d) v s" for s
by (auto simp: d_def ℒ⇩b_eq_L⇩a_max L_eq_L⇩a_det)
moreover have "d ∈ D⇩D"
using Sup_att arg_max_on_in by (auto simp: d_def is_dec_det_def max_L_ex_def)
ultimately show ?thesis
by auto
qed
lemma ex_improving_det: "∃d ∈ D⇩D. ν_improving v (mk_dec_det d)"
using ν_improving_alt ex_opt_det by auto
end
locale MDP_act = discrete_MDP A K for A :: "'s::countable ⇒ 'a::countable set" and K +
fixes arb_act :: "'a set ⇒ 'a"
assumes arb_act_in[simp]: "X ≠ {} ⟹ arb_act X ∈ X"
locale MDP_act_disc = MDP_act A K + MDP_att_ℒ A K r l
for A :: "'s::countable ⇒ 'a::countable set" and K r l
begin
lemma is_opt_act_some: "is_opt_act v s (arb_act (opt_acts v s))"
using arb_act_in[of "{a. is_arg_max (λa. L⇩a a v s) (λa. a ∈ A s) a}"] Sup_att has_arg_max_def
unfolding max_L_ex_def is_opt_act_def by auto
lemma some_opt_acts_in_A: "arb_act (opt_acts v s) ∈ A s"
using is_opt_act_some unfolding is_opt_act_def is_arg_max_def by auto
lemma ν_improving_opt_acts: "ν_improving v0 (mk_dec_det (λs. arb_act (opt_acts (apply_bfun v0) s)))"
using is_opt_act_def is_opt_act_some some_opt_acts_in_A
by (subst ν_improving_alt) (fastforce simp: L_eq_L⇩a_det ℒ⇩b_eq_argmax_L⇩a is_dec_det_def)+
end
locale MDP_finite_type = MDP_reward_disc A K r l
for A and K :: "'s :: finite × 'a :: finite ⇒ 's pmf" and r l
end