Theory Backward_Induction
theory Backward_Induction
imports "MDP-Rewards.MDP_reward"
begin
locale MDP_reward_fin = discrete_MDP A K
for
A and
K :: "'s ::countable × 'a ::countable ⇒ 's pmf" +
fixes
r :: "('s × 'a) ⇒ real" and
r_fin :: "'s ⇒ real" and
N :: "nat"
assumes
r_fin_bounded: "bounded (range r_fin)" and
r_bounded_fin: "bounded (range r)"
begin
interpretation MDP_reward A K r 1
rewrites "1 * (x::real) = x" and "⋀x.(1::real)^(x::nat)=1"
using r_bounded_fin
by unfold_locales (auto simp: algebra_simps)
definition "νN p s = (∫t. (∑i<N. r (t !! i)) + (r_fin (fst(t !! N))) ∂𝒯 p s)"
lemma measurable_r_fin_nth [measurable]: "(λt. r_fin ((t !! i))) ∈ borel_measurable S"
by measurable
lemma integrable_r_fin_nth [simp]: "integrable (𝒯 p s) (λt. r_fin (fst(t !! i)))"
using bounded_range_subset[OF r_fin_bounded]
by (auto simp: range_composition[of r_fin])
lemma νN_eq: "νN p s = (∑i < N. measure_pmf.expectation (Pn' p s i) r) + measure_pmf.expectation (Xn' p s N) r_fin"
proof -
have "νN p s = (∫t. (∑i<N. r (t !! i)) ∂𝒯 p s) + (∫t. (r_fin (fst(t !! N))) ∂𝒯 p s)"
unfolding νN_def
by (auto intro: Bochner_Integration.integral_add)
moreover have " (∫t. (∑i<N. r (t !! i)) ∂𝒯 p s) = (∑i < N. measure_pmf.expectation (Pn' p s i) r)"
using ν_fin_Suc ν_fin_eq_Pn by force
moreover have "(∫t. (r_fin (fst(t !! N))) ∂𝒯 p s) = measure_pmf.expectation (Xn' p s N) r_fin"
by (auto simp: Xn'_Pn' Pn'_eq_𝒯 integral_distr)
ultimately show ?thesis by auto
qed
function νN_eval where
"νN_eval p h s = (
if length h = N then r_fin s else
if length h > N then 0 else
measure_pmf.expectation (p h s) (λa. r (s,a) +
measure_pmf.expectation (K (s,a)) (λs'. νN_eval p (h@[(s,a)]) s'))) "
by auto
termination
by (relation "Wellfounded.measure (λ(_,h,s). N - length h)") auto
lemmas abs_disc_eq[simp del]
lemmas νN_eval.simps[simp del]
lemma pmf_bounded_integrable: "bounded (range (f::_ ⇒ real)) ⟹ integrable (measure_pmf p) f"
using bounded_norm_le_SUP_norm[of f]
by (intro measure_pmf.integrable_const_bound[of _ "⨆x. ¦f x¦"]) auto
lemma abs_boundedD[dest]: "(⋀x. ¦f x¦ ≤ (c::real)) ⟹ bounded (range f)"
using bounded_real by auto
lemma abs_integral_le[intro]: "(⋀x. ¦f x¦ ≤ (c::real)) ⟹ abs (measure_pmf.expectation p f) ≤ c"
by (fastforce intro!: pmf_bounded_integrable abs_boundedD measure_pmf.integral_le_const order.trans[OF integral_abs_bound])
lemma abs_νN_eval_le: "¦νN_eval p h s¦ ≤ (N - length h) * r⇩M + (⨆s. ¦r_fin s¦)"
proof (induction "(N - length h)" arbitrary: h s)
case 0
then show ?case
using r_fin_bounded
by (auto simp: νN_eval.simps intro!: bounded_imp_bdd_above cSUP_upper2)
next
case (Suc x)
have "N > length h"
using Suc(2) by linarith
hence Suc_le: "Suc (length h) ≤ N"
by auto
have *: "¦νN_eval p (h @ [(s, a)]) s'¦ ≤ real (N - length h - 1) * r⇩M + (⨆s. ¦r_fin s¦)" for a s'
using Suc.hyps(1)[of "h @[(s,a)]"] Suc.hyps(2)
by (auto simp: of_nat_diff[OF Suc_le] algebra_simps)
hence **: "¦measure_pmf.expectation (p h s) (λa. measure_pmf.expectation (K (s, a)) (νN_eval p (h @ [(s, a)])))¦
≤ real (N - length h - 1) * r⇩M + (⨆s. ¦r_fin s¦)"
using Suc by auto
have "¦measure_pmf.expectation (p h s) (λa. r (s, a) + measure_pmf.expectation (K (s, a)) (νN_eval p (h @ [(s, a)])))¦
≤ ¦measure_pmf.expectation (p h s) (λa. r (s, a)) + measure_pmf.expectation (p h s) (λa. measure_pmf.expectation (K (s, a)) (νN_eval p (h @ [(s, a)])))¦"
using abs_r_le_r⇩M
by (subst Bochner_Integration.integral_add) (auto intro!: abs_boundedD * pmf_bounded_integrable)
also have "… ≤ ¦measure_pmf.expectation (p h s) (λa. r (s, a))¦ + ¦ measure_pmf.expectation (p h s) (λa. measure_pmf.expectation (K (s, a)) (νN_eval p (h @ [(s, a)])))¦"
by auto
also have "… ≤ r⇩M + ¦ measure_pmf.expectation (p h s) (λa. measure_pmf.expectation (K (s, a)) (νN_eval p (h @ [(s, a)])))¦"
using abs_r_le_r⇩M by auto
also have "… ≤ r⇩M + (N - length h - 1) * r⇩M + (⨆s. ¦r_fin s¦)"
using "**" by force
also have "… ≤ (N - length h) * r⇩M + (⨆s. ¦r_fin s¦)"
using Suc Suc_le by (auto simp: of_nat_diff algebra_simps)
finally show ?case
using νN_eval.simps ‹length h < N› by force
qed
lemma abs_νN_eval_le': "¦νN_eval p h s¦ ≤ N * r⇩M + (⨆s. ¦r_fin s¦)"
by (simp add: mult_left_mono r⇩M_nonneg algebra_simps order.trans[OF abs_νN_eval_le[of p h s]])
lemma measure_pmf_expectation_bind:
assumes "bounded (range f)"
shows "measure_pmf.expectation (p ⤜ q) (f::_ ⇒ real) = measure_pmf.expectation p (λx. measure_pmf.expectation (q x) f)"
unfolding measure_pmf_bind
using assms measure_pmf_in_subprob_space
by (fastforce intro!: Giry_Monad.integral_bind[of _ "count_space UNIV" "⨆x. ¦f x¦"] bounded_imp_bdd_above cSUP_upper)+
lemma Pn'_shift: "bounded (range (f :: _ ⇒ real)) ⟹ measure_pmf.expectation (p h s)
(λa. measure_pmf.expectation (K (s, a))
(λs'. measure_pmf.expectation (Pn' (λh'. p ((h @ (s, a)# h'))) s' n) f))
= measure_pmf.expectation (Pn' (λh'. p (h @ h')) s (Suc n)) f"
unfolding PSuc' π_Suc_def K0'_def
by (auto simp: measure_pmf_expectation_bind)
lemma bounded_r_snd': "bounded ((λa. r (s, a)) ` X)"
using r_bounded' image_image
by metis
lemma bounded_r_snd: "bounded (range (λa. r (s, a)))"
using bounded_r_snd'.
lemma νN_eval_eq: "length h ≤ N ⟹ νN_eval p h s =
(∑i ∈{length h..< N}.
measure_pmf.expectation (Pn' (λh'. p (h@h')) s (i - length h)) r) + measure_pmf.expectation (Xn' (λh'. p (h@h')) s (N - length h)) r_fin"
proof (induction "N - length h" arbitrary: h s)
case 0
then show ?case
using νN_eval.simps by auto
next
case (Suc x)
hence "length h < N"
by auto
hence
"νN_eval p h s =
measure_pmf.expectation (p h s) (λa. r (s,a) +
measure_pmf.expectation (K (s,a)) (λs'. νN_eval p (h@[(s,a)]) s'))"
by (auto simp: νN_eval.simps[of p h] split: if_splits)
also have "… =
measure_pmf.expectation (p h s) (λa. r (s,a)) + measure_pmf.expectation (p h s) (λa. measure_pmf.expectation (K (s,a)) (λs'. νN_eval p (h@[(s,a)]) s'))"
using abs_νN_eval_le' bounded_r_snd
by (fastforce simp: bounded_real intro!: Bochner_Integration.integral_add pmf_bounded_integrable abs_integral_le)
also have "… =
(∑i = length h..<N. measure_pmf.expectation (Pn' (λh'. p (h @ h')) s (i - length h)) r) + measure_pmf.expectation (Xn' (λh'. p (h @ h')) s (N - length h)) r_fin"
proof -
have "measure_pmf.expectation (p h s) (λa. measure_pmf.expectation (K (s,a)) (λs'. νN_eval p (h@[(s,a)]) s')) =
measure_pmf.expectation (p h s)
(λa. measure_pmf.expectation (K (s, a))
(λs'. (∑i = length (h @ [(s, a)])..<N. measure_pmf.expectation (Pn' (λh'. p ((h @ [(s, a)]) @ h')) s' (i - length (h @ [(s, a)]))) r) +
measure_pmf.expectation (Xn' (λh'. p ((h @ [(s, a)]) @ h')) s' (N - length (h @ [(s, a)]))) r_fin))"
using Suc ‹length h < N›
by auto
also have "… =
measure_pmf.expectation (p h s)
(λa. measure_pmf.expectation (K (s, a))
(λs'. (∑i = length h + 1..<N. measure_pmf.expectation (Pn' (λh'. p ((h @ [(s, a)]) @ h')) s' (i - length h - 1)) r) +
measure_pmf.expectation (Xn' (λh'. p ((h @ [(s, a)]) @ h')) s' (N - length h - 1)) r_fin))"
using Suc ‹length h < N› K0'_def
by auto
also have "… =
measure_pmf.expectation (p h s)
(λa. measure_pmf.expectation (K (s, a))
(λs'. (∑i = length h + 1..<N. measure_pmf.expectation (Pn' (λh'. p ((h @ [(s, a)]) @ h')) s' (i - length h - 1)) r)) +
measure_pmf.expectation (K (s, a))
(λs'. measure_pmf.expectation (Xn' (λh'. p ((h @ [(s, a)]) @ h')) s' (N - length h - 1)) r_fin))"
using abs_exp_r_le r_fin_bounded
by (fastforce intro!: Bochner_Integration.integral_cong[OF refl] Bochner_Integration.integral_add
pmf_bounded_integrable Bochner_Integration.integrable_sum simp: bounded_real)+
also have "… =
measure_pmf.expectation (p h s)
(λa. measure_pmf.expectation (K (s, a))
(λs'. (∑i = length h + 1..<N. measure_pmf.expectation (Pn' (λh'. p ((h @ [(s, a)]) @ h')) s' (i - length h - 1)) r))) +
measure_pmf.expectation (p h s) (λa. measure_pmf.expectation (K (s, a)) (λs'. measure_pmf.expectation
(Xn' (λh'. p ((h @ [(s, a)]) @ h')) s' (N - length h - 1)) r_fin))"
using abs_r_le_r⇩M r_fin_bounded
by (fastforce intro!:
Bochner_Integration.integral_add Bochner_Integration.integrable_sum pmf_bounded_integrable
abs_integral_le order.trans[OF sum_abs] order.trans[OF sum_bounded_above[of _ _ "r⇩M"]] simp: bounded_real)
also have "… = measure_pmf.expectation (p h s) (λa. (∑i = length h + 1..<N. measure_pmf.expectation (K (s, a))
(λs'. measure_pmf.expectation (Pn' (λh'. p ((h @ [(s, a)]) @ h')) s' (i - length h - 1)) r))) +
measure_pmf.expectation (p h s) (λa. measure_pmf.expectation (K (s, a)) (λs'. measure_pmf.expectation
(Xn' (λh'. p ((h @ [(s, a)]) @ h')) s' (N - length h - 1)) r_fin))"
using abs_r_le_r⇩M
by (subst Bochner_Integration.integral_sum) (auto intro!: pmf_bounded_integrable boundedI[of _ "r⇩M"] abs_integral_le)
also have "… = (∑i = length h + 1..<N. measure_pmf.expectation (p h s) (λa. measure_pmf.expectation (K (s, a))
(λs'. measure_pmf.expectation (Pn' (λh'. p ((h @ [(s, a)]) @ h')) s' (i - length h - 1)) r))) +
measure_pmf.expectation (p h s) (λa. measure_pmf.expectation (K (s, a)) (λs'. measure_pmf.expectation
(Xn' (λh'. p ((h @ [(s, a)]) @ h')) s' (N - length h - 1)) r_fin))"
using abs_r_le_r⇩M
by (subst Bochner_Integration.integral_sum) (auto intro!: pmf_bounded_integrable boundedI[of _ "r⇩M"] abs_integral_le)
also have "… =
(∑i = length h + 1..<N. (measure_pmf.expectation (Pn' (λh'. p (h @ h')) s (i - length h))) r) +
measure_pmf.expectation (Xn' (λh'. p (h @ h')) s (N - length h)) r_fin"
using r_bounded r_fin_bounded ‹length h < N›
by (auto simp add: Pn'_shift Xn'_Pn' Suc_diff_Suc range_composition)
finally show ?thesis
unfolding sum.atLeast_Suc_lessThan[OF ‹length h < N›] r_dec_eq_r_K0
by auto
qed
finally show ?case .
qed
lemma νN_eval_correct: "νN_eval p [] s = νN p s"
using lessThan_atLeast0
by (auto simp: νN_eq νN_eval_eq)
lift_definition νN⇩b :: "('s, 'a) pol ⇒ 's ⇒⇩b real" is νN
using r_fin_bounded
by (intro bfun_normI[of _ "r⇩M * N + (⨆x. ¦r_fin x¦)"])
(auto simp add: νN_eq r⇩M_def r_bounded bounded_abs_range intro!: add_mono
order.trans[OF integral_abs_bound] pmf_bounded_integrable lemma_4_3_1
order.trans[OF sum_abs] order.trans[OF abs_triangle_ineq] order.trans[OF sum_bounded_above[of _ _ r⇩M]])
definition "νN_opt s = (⨆p ∈ Π⇩H⇩R. νN p s)"
definition "νN_eval_opt h s = (⨆p ∈ Π⇩H⇩R. νN_eval p h s)"
function νN_opt_eqn where
"νN_opt_eqn h s = (
if length h = N then r_fin s else
if length h > N then 0 else
⨆a ∈ A s. (r (s,a) +
measure_pmf.expectation (K (s,a)) (λs'. νN_opt_eqn (h@[(s,a)]) s'))) "
by auto
termination
by (relation "Wellfounded.measure (λ(h,s). N - length h)") auto
lemmas νN_opt_eqn.simps[simp del]
lemma abs_νN_opt_eqn_le: "¦νN_opt_eqn h s¦ ≤ (N - length h) * r⇩M + (⨆s. ¦r_fin s¦)"
proof (induction "(N - length h)" arbitrary: h s)
case 0
then show ?case
using r_fin_bounded
by (auto simp: νN_opt_eqn.simps intro!: bounded_imp_bdd_above cSUP_upper2)
next
case (Suc x)
have "N > length h"
using Suc(2) by linarith
have *: "¦νN_opt_eqn (h @ [(s, a)]) s'¦ ≤ real (N - length h - 1) * r⇩M + (⨆s. ¦r_fin s¦)" for a s'
using Suc(1)[of "(h @ [(s, a)])"] Suc(2)
by auto
hence "¦measure_pmf.expectation (K (s, a)) (νN_opt_eqn (h @ [(s, a)]))¦
≤ real (N - length h - 1) * r⇩M + (⨆s. ¦r_fin s¦)" for a
using Suc by auto
hence **: "r⇩M + ¦measure_pmf.expectation (K (s, a)) (νN_opt_eqn (h @ [(s, a)]))¦
≤ real (N - length h) * r⇩M + (⨆s. ¦r_fin s¦)" for a
using Suc
by (auto simp: of_nat_diff algebra_simps)
hence *: "¦r (s, a)¦ + ¦measure_pmf.expectation (K (s, a)) (νN_opt_eqn (h @ [(s, a)]))¦ ≤ real (N - length h) * r⇩M + (⨆s. ¦r_fin s¦)" for a
using abs_r_le_r⇩M
by (meson add_le_cancel_right order.trans)
hence *: "¦r (s, a) + measure_pmf.expectation (K (s, a)) (νN_opt_eqn (h @ [(s, a)]))¦ ≤ real (N - length h) * r⇩M + (⨆s. ¦r_fin s¦)" for a
using order.trans[OF abs_triangle_ineq] by auto
have "¦νN_opt_eqn h s¦ = ¦⨆a∈A s. r (s, a) + measure_pmf.expectation (K (s, a)) (νN_opt_eqn (h @ [(s, a)]))¦"
unfolding νN_opt_eqn.simps[of h] using ‹length h < N›
by auto
also have "… ≤ ¦⨆a∈A s. measure_pmf.expectation (return_pmf a) (λa. r (s, a) + measure_pmf.expectation (K (s, a)) (νN_opt_eqn (h @ [(s, a)])))¦"
by auto
also have "… ≤ (⨆a∈A s. ¦ measure_pmf.expectation (return_pmf a) (λa. r (s, a) + measure_pmf.expectation (K (s, a)) (νN_opt_eqn (h @ [(s, a)])))¦)"
using ‹length h < N› A_ne *
by (auto intro!: boundedI abs_cSUP_le)
also have "… ≤ real (N - length h) * r⇩M + (⨆s. ¦r_fin s¦)"
using * A_ne
by (auto intro!: cSUP_least)
finally show ?case.
qed
lemma abs_νN_opt_eqn_le': "¦νN_opt_eqn h s¦ ≤ N * r⇩M + (⨆s. ¦r_fin s¦)"
by (simp add: mult_left_mono r⇩M_nonneg algebra_simps order.trans[OF abs_νN_opt_eqn_le[of h s]])
lemma abs_νN_eval_opt_le': "¦νN_eval_opt h s¦ ≤ N * r⇩M + (⨆s. ¦r_fin s¦)"
unfolding νN_eval_opt_def
using policies_ne abs_νN_eval_le'
by (auto intro!: order.trans[OF abs_cSUP_le] boundedI cSUP_least)
lemma exp_νN_eval_opt_le: "¦measure_pmf.expectation (K (s, a)) (νN_eval_opt h)¦ ≤ N * r⇩M + (⨆s. ¦r_fin s¦)"
by (metis abs_νN_eval_opt_le' abs_integral_le)
lemma bounded_exp_νN_eval_opt: "(bounded ((λa. measure_pmf.expectation (K (s, a)) (νN_eval_opt (h a))) ` X))"
using exp_νN_eval_opt_le
by (auto intro!: boundedI)
lemma bounded_r_exp_νN_eval_opt: "(bounded ((λa. r (s, a) + measure_pmf.expectation (K (s, a)) (νN_eval_opt (h a))) ` X))"
using bounded_exp_νN_eval_opt r_bounded abs_r_le_r⇩M
by (intro bounded_plus_comp) (auto intro!: boundedI)
lemma integrable_r_exp_νN_eval_opt: "(integrable (measure_pmf q) ((λa. r (s, a) + measure_pmf.expectation (K (s, a)) (νN_eval_opt (h a)))))"
using bounded_r_exp_νN_eval_opt pmf_bounded_integrable by blast
lemma exp_νN_eval_le: "¦measure_pmf.expectation (K (s, a)) (νN_eval p h)¦ ≤ N * r⇩M + (⨆s. ¦r_fin s¦)"
by (metis abs_νN_eval_le' abs_integral_le)
lemma bounded_exp_νN_eval: "(bounded ((λa. measure_pmf.expectation (K (s, a)) (νN_eval p (h a))) ` X))"
using exp_νN_eval_le
by (auto intro!: boundedI)
lemma bounded_r_exp_νN_eval: "(bounded ((λa. r (s, a) + measure_pmf.expectation (K (s, a)) (νN_eval p (h a))) ` X))"
using bounded_exp_νN_eval r_bounded abs_r_le_r⇩M
by (intro bounded_plus_comp) (auto intro!: boundedI)
lemma integrable_r_exp_νN_eval: "(integrable (measure_pmf q) ((λa. r (s, a) + measure_pmf.expectation (K (s, a)) (νN_eval p (h a)))))"
using bounded_r_exp_νN_eval pmf_bounded_integrable by blast
lemma exp_νN_opt_eqn_le: "¦measure_pmf.expectation (K (s, a)) (νN_opt_eqn h)¦ ≤ N * r⇩M + (⨆s. ¦r_fin s¦)"
by (metis abs_νN_opt_eqn_le' abs_integral_le)
lemma bounded_exp_νN_opt_eqn: "(bounded ((λa. measure_pmf.expectation (K (s, a)) (νN_opt_eqn (h a))) ` X))"
using exp_νN_opt_eqn_le
by (auto intro!: boundedI)
lemma bounded_r_exp_νN_opt_eqn: "(bounded ((λa. r (s, a) + measure_pmf.expectation (K (s, a)) (νN_opt_eqn (h a))) ` X))"
using bounded_exp_νN_opt_eqn r_bounded abs_r_le_r⇩M
by (intro bounded_plus_comp) (auto intro!: boundedI)
lemma integrable_r_exp_νN_opt_eqn: "(integrable (measure_pmf q) ((λa. r (s, a) + measure_pmf.expectation (K (s, a)) (νN_opt_eqn (h a)))))"
using bounded_r_exp_νN_opt_eqn pmf_bounded_integrable by blast
lemma νN_eval_le_opt_eqn: "p ∈ Π⇩H⇩R ⟹ νN_eval p h s ≤ νN_opt_eqn h s"
proof (induction p h s rule: νN_eval.induct)
case (1 p h s)
have "νN_eval p (h @ [(s, a)]) s' ≤ νN_opt_eqn (h @[(s,a)]) s'" if "length h < N" for a s'
using that 1 by fastforce
hence *: "r (s, a) + measure_pmf.expectation (K (s, a)) (νN_eval p (h @ [(s, a)])) ≤ r (s, a) + measure_pmf.expectation (K (s, a)) (νN_opt_eqn (h @ [(s, a)]))" if "length h < N" for a
using abs_νN_eval_le' abs_νN_opt_eqn_le' that
by (fastforce intro!: integral_mono pmf_bounded_integrable simp: bounded_real)
have **: "a ∈ set_pmf (p h s) ⟹ a ∈ A s" for a
using 1 is_dec_def is_policy_def by blast
then show ?case
unfolding νN_eval.simps[of p h] νN_opt_eqn.simps[of h]
using integrable_r_exp_νN_eval bounded_r_exp_νN_eval bounded_r_exp_νN_opt_eqn *
by (auto simp: set_pmf_not_empty intro!: order.trans[OF lemma_4_3_1] cSUP_mono bexI bounded_imp_bdd_above)
qed
lemma νN_eval_le_opt: "p∈Π⇩H⇩R ⟹ νN_eval_opt h s ≥ νN_eval p h s"
unfolding νN_eval_opt_def
using bounded_subset_range[OF abs_boundedD[OF abs_νN_eval_le']]
by (force intro!: cSUP_upper abs_boundedD bounded_imp_bdd_above)
lemma νN_opt_eqn_bounded[simp, intro]: "bounded ((νN_opt_eqn h) ` X)"
by (meson Blinfun_Util.bounded_subset abs_νN_opt_eqn_le' abs_boundedD subset_UNIV)
lemma νN_eval_opt_bounded[simp, intro]: "bounded ((νN_eval_opt h) ` X)"
by (meson Blinfun_Util.bounded_subset abs_νN_eval_opt_le' abs_boundedD subset_UNIV)
lemma νN_eval_bounded[simp, intro]: "bounded ((νN_eval p h) ` X)"
by (meson Blinfun_Util.bounded_subset abs_νN_eval_le' abs_boundedD subset_UNIV)
lemma νN_opt_ge: "length h ≤ N ⟹ νN_opt_eqn h s ≥ νN_eval_opt h s"
proof (induction "N - length h" arbitrary: h s)
case 0
then show ?case
unfolding νN_eval_opt_def νN_opt_eqn.simps[of h]
using policies_ne
by (subst νN_eval_eq) auto
next
case (Suc x)
hence "length h < N"
by linarith
{
fix p assume "p ∈ Π⇩H⇩R"
have "νN_eval p h s = measure_pmf.expectation (p h s) (λa. (r (s,a) +
measure_pmf.expectation (K (s,a)) (λs'. νN_eval p (h@[(s,a)]) s')))"
unfolding νN_eval.simps[of p h]
using ‹length h < N›
by auto
also have "… ≤ (⨆a ∈ A s. (r (s,a) +
measure_pmf.expectation (K (s,a)) (λs'. νN_eval p (h@[(s,a)]) s')))"
using ‹p ∈ Π⇩H⇩R› is_dec_def is_policy_def bounded_r_snd' bounded_exp_νN_eval
by (auto intro!: lemma_4_3_1 bounded_plus_comp pmf_bounded_integrable simp: r_bounded')
also have "… ≤ (⨆a ∈ A s. (r (s,a) +
measure_pmf.expectation (K (s,a)) (λs'. νN_opt_eqn (h@[(s,a)]) s')))"
proof -
have "a ∈ A s ⟹
r (s,a) + measure_pmf.expectation (K (s,a)) (νN_eval p (h @ [(s,a)])) ≤
r (s,a) + measure_pmf.expectation (K (s,a)) (νN_eval_opt (h@[(s,a)]))" for a
using abs_boundedD[OF abs_νN_eval_opt_le'] abs_boundedD[OF abs_νN_eval_le']
using νN_eval_le_opt ‹p ∈ Π⇩H⇩R›
by (force intro!: integral_mono pmf_bounded_integrable)
moreover have "a ∈ A s ⟹
r (s,a) + measure_pmf.expectation (K (s,a)) (νN_eval_opt (h@[(s,a)])) ≤
r (s,a) + measure_pmf.expectation (K (s,a)) (νN_opt_eqn (h@[(s,a)]))" for a
using νN_eval_le_opt_eqn policies_ne Suc
by (auto intro!: integral_mono pmf_bounded_integrable cSUP_least)
ultimately show ?thesis
using A_ne bounded_imp_bdd_above bounded_r_exp_νN_opt_eqn
by (fastforce intro!: cSUP_mono)+
qed
also have "… = νN_opt_eqn h s"
unfolding νN_opt_eqn.simps[of h]
using ‹length h < N›
by auto
finally have "νN_opt_eqn h s ≥ νN_eval p h s".
}
then show ?case
unfolding νN_eval_opt_def
using policies_ne
by (auto intro!: cSUP_least)
qed
lemma Sup_wit_ex:
assumes "(d ::real)> 0"
assumes "X ≠ {}"
assumes "bdd_above (f ` X)"
shows "∃x ∈ X. (⨆x ∈ X. f x) < f x + d"
proof -
have "∃x ∈X. (⨆x ∈ X. f x) - d < f x"
using assms
by (auto simp: less_cSUP_iff[symmetric])
thus ?thesis
by force
qed
lemma νN_opt_eqn_markov: "length h ≤ N ⟹ length h = length h' ⟹ νN_opt_eqn h = νN_opt_eqn h'"
proof (induction "N - length h" arbitrary: h h')
case 0
then show ?case
by (auto simp: νN_opt_eqn.simps)
next
case (Suc x)
{
fix s
have "νN_opt_eqn h s = (⨆a ∈ A s. r (s, a) + measure_pmf.expectation (K(s,a)) (νN_opt_eqn (h@[(s,a)])))"
using Suc by (fastforce simp: νN_opt_eqn.simps)
also have "… = (⨆a ∈ A s. r (s, a) + measure_pmf.expectation (K(s,a)) (νN_opt_eqn (h'@[(s,a)])))"
using Suc
by (auto intro!: SUP_cong Bochner_Integration.integral_cong Suc(1)[THEN cong])
also have "… = νN_opt_eqn h' s"
using Suc νN_opt_eqn.simps by fastforce
finally have "νN_opt_eqn h s = νN_opt_eqn h' s ".
}
thus ?case by auto
qed
lemma νN_opt_le:
fixes eps :: real
assumes "eps > 0"
shows "∃p ∈ Π⇩M⇩D. ∀h s. length h ≤ N ⟶ νN_eval (mk_markovian_det p) h s + real (N - length h) * eps ≥ νN_opt_eqn h s"
proof -
define p where "p = (λn s. if n ≥ N then SOME a. a ∈ A s else
SOME a. a ∈ A s ∧
r (s, a) + measure_pmf.expectation (K (s, a)) (νN_opt_eqn (replicate n (s, SOME a. a ∈ A s) @ [(s,a)])) + eps > νN_opt_eqn (replicate n (s,SOME a. a ∈ A s)) s)"
have *: "∃a . a ∈ A s ∧
r (s, a) + measure_pmf.expectation (K (s, a)) (νN_opt_eqn (h@[(s,a)])) + eps > νN_opt_eqn h s"
if "length h < N"
for h s
using that Sup_wit_ex[OF assms A_ne, unfolded Bex_def] bounded_imp_bdd_above bounded_r_exp_νN_opt_eqn
by (auto simp: νN_opt_eqn.simps)
hence **: "∃a . a ∈ A s ∧
r (s, a) + measure_pmf.expectation (K (s, a)) (νN_opt_eqn ((replicate n (s,SOME a. a ∈ A s))@[(s,a)])) + eps > νN_opt_eqn (replicate n (s,SOME a. a ∈ A s)) s"
if "n < N" for n s
using that by simp
have p_prop: "p n s ∈ A s ∧ r (s, p n s) + measure_pmf.expectation (K (s, p n s)) (νN_opt_eqn ((replicate n (s,SOME a. a ∈ A s))@[(s,p n s)])) + eps > νN_opt_eqn ((replicate n (s,SOME a. a ∈ A s))) s"
if "n < N" for n s
using someI_ex[OF **[OF that], of s] that
by (auto simp: p_def)
hence p_prop': "p (length h) s ∈ A s ∧ r (s, p (length h) s) + measure_pmf.expectation (K (s, p (length h) s)) (νN_opt_eqn (h@[(s,p (length h) s)])) + eps > νN_opt_eqn h s"
if "length h < N" for h s
using that
by (auto simp:
νN_opt_eqn_markov[of h "(replicate (length h) (s,SOME a. a ∈ A s))"]
νN_opt_eqn_markov[of "(h@[(s,p (length h) s)])" "(replicate (length h) (s, SOME a. a ∈ A s) @ [(s, p (length h) s)])"])
have "p n s ∈ A s" for n s
using SOME_is_dec_det is_dec_det_def p_def p_prop by auto
hence p:"p ∈ Π⇩M⇩D"
using is_dec_det_def by force
{
fix h s p
assume "p ∈ Π⇩M⇩D"
and
p: "⋀h s. length h < N ⟹ r (s, p (length h) s) + measure_pmf.expectation (K (s, p (length h) s)) (νN_opt_eqn (h@[(s,p (length h) s)])) + eps > νN_opt_eqn h s"
have "length h ≤ N ⟹ νN_eval (mk_markovian_det p) h s + real (N - length h) * eps ≥ νN_opt_eqn h s"
proof (induction "N - length h" arbitrary: h s)
case 0
hence *: "length h = N"
by auto
thus ?case
by (auto simp: νN_opt_eqn.simps νN_eval.simps)
next
case (Suc x)
hence *: "length h < N"
by auto
have "νN_opt_eqn h s - real (N - length h) * eps < r (s, p (length h) s) + measure_pmf.expectation (K (s, p (length h) s)) (νN_opt_eqn (h@[(s,p (length h) s)])) - real (N - length h) * eps + eps"
using p[OF *, of s] by auto
also have "… = r (s, p (length h) s) + measure_pmf.expectation (K (s, p (length h) s)) (νN_opt_eqn (h@[(s,p (length h) s)])) - real (N - length h - 1) * eps"
proof -
have "real (N - length h - 1) = real (N - length h) - 1"
using * by (auto simp: algebra_simps)
thus ?thesis
by algebra
qed
also have "… = r (s, p (length h) s) + measure_pmf.expectation (K (s, p (length h) s)) (λs'. νN_opt_eqn (h@[(s,p (length h) s)]) s' - real (N - length h - 1) * eps)"
by (subst Bochner_Integration.integral_diff) (auto intro: pmf_bounded_integrable)
also have "… ≤ r (s, p (length h) s) + measure_pmf.expectation (K (s, p (length h) s)) (νN_eval (mk_markovian_det p) (h@[(s,p (length h) s)]))"
using Suc(1)[of "h@[_]"] Suc *
by (intro add_mono integral_mono pmf_bounded_integrable bounded_minus_comp) (auto simp: algebra_simps)
also have "… = νN_eval (mk_markovian_det p) h s"
using Suc
by (auto simp: mk_markovian_det_def νN_eval.simps)
finally show ?case
by auto
qed
}
thus ?thesis
using p p_prop' by blast
qed
lemma νN_opt_le':
fixes eps :: real
assumes "eps > 0"
shows "∃p ∈ Π⇩M⇩D. ∀h s. length h ≤ N ⟶ νN_eval (mk_markovian_det p) h s + eps ≥ νN_opt_eqn h s"
proof -
obtain p where "p∈Π⇩M⇩D" and "⋀h s. length h ≤ N ⟹ νN_opt_eqn h s ≤ νN_eval (mk_markovian_det p) h s + real (N - length h) * (eps/N)"
using νN_opt_le[of "eps / N"] νN_opt_le assms
by (cases "N = 0") force+
hence **: "⋀h s. length h ≤ N ⟹ νN_opt_eqn h s ≤ νN_eval (mk_markovian_det p) h s + eps - ((eps * length h) / N)"
using assms
by (cases "N = 0") (auto simp: algebra_simps of_nat_diff intro: add_increasing)
moreover have *:"eps * real (length h) / N ≥ 0" for h
using assms by auto
ultimately have "⋀h s. length h ≤ N ⟹ νN_opt_eqn h s ≤ νN_eval (mk_markovian_det p) h s + eps"
by (auto intro!: order.trans[OF **])
thus ?thesis
using ‹p ∈ Π⇩H⇩D› by blast
qed
lemma mk_det_preserves: "p ∈ Π⇩H⇩D ⟹ (mk_det p) ∈ Π⇩H⇩R"
unfolding is_policy_def mk_det_def
by (auto simp: is_dec_def is_dec_det_def)
lemma mk_markovian_det_preserves: "p ∈ Π⇩M⇩D ⟹ (mk_markovian_det p) ∈ Π⇩H⇩R"
unfolding is_policy_def mk_markovian_det_def
by (auto simp: is_dec_def is_dec_det_def)
lemma νN_opt_eq:
assumes "length h ≤ N"
shows "νN_opt_eqn h s = νN_eval_opt h s"
proof -
{
fix eps :: real
assume "0 < eps"
hence "∃p∈Π⇩H⇩R. ∀h s. length h ≤ N ⟶ νN_opt_eqn h s ≤ νN_eval p h s + eps"
using mk_markovian_det_preserves νN_opt_le'[of eps]
by auto
then obtain p where "p∈Π⇩H⇩R" and **: "length h ≤ N ⟹ νN_opt_eqn h s ≤ νN_eval p h s + eps" for h s
by auto
hence "length h ≤ N ⟹ νN_opt_eqn h s ≤ νN_eval_opt h s + eps" for h s
using νN_eval_le_opt[of p]
by (auto intro: order.trans[OF **])
}
hence "length h ≤ N ⟹ νN_opt_eqn h s ≤ νN_eval_opt h s"
by (meson field_le_epsilon)
thus ?thesis
using νN_opt_ge assms antisym by auto
qed
lemma νN_opt_eqn_correct: "νN_opt s = νN_opt_eqn [] s"
using νN_eval_correct νN_eval_opt_def νN_opt_def νN_opt_eq by force
lemma thm_4_3_4:
assumes "eps ≥ 0" "p ∈ Π⇩M⇩D"
and "⋀h s. length h < N ⟹ r (s, p (length h) s) + measure_pmf.expectation (K (s, p (length h) s)) (νN_opt_eqn (h@[(s, p (length h) s)])) + eps
≥ (⨆a ∈ A s. r (s, a) + measure_pmf.expectation (K (s,a)) (νN_opt_eqn (h@[(s, a)])))"
shows "⋀h s. length h ≤ N ⟹ νN_eval (mk_markovian_det p) h s + (N - length h) * eps ≥ νN_opt_eqn h s"
"⋀s. νN (mk_markovian_det p) s + N * eps ≥ νN_opt s"
proof -
show "νN_eval (mk_markovian_det p) h s + (N - length h) * eps ≥ νN_opt_eqn h s" if "length h ≤ N" for h s
using assms that
proof (induction "N - length h" arbitrary: h s)
case 0
then show ?case
using νN_eval.simps νN_opt_eqn.simps by force
next
case (Suc x)
have "νN_opt_eqn h s = (⨆a ∈ A s. r (s, a) + measure_pmf.expectation (K (s,a)) (νN_opt_eqn (h@[(s, a)])))"
using Suc.hyps(2) νN_opt_eqn.simps by fastforce
also have "… ≤ r (s, p (length h) s) + measure_pmf.expectation (K (s, p (length h) s)) (νN_opt_eqn (h@[(s, p (length h) s)])) + eps"
using Suc.hyps(2) Suc.prems(3)
by simp
also have "… ≤ r (s, p (length h) s) + measure_pmf.expectation (K (s, p (length h) s)) (λs'. νN_eval (mk_markovian_det p) (h@[(s, p (length h) s)]) s' +
(N - length (h@[(s,p (length h) s)])) * eps) + eps"
using Suc(1)[of "(h@[(s,p (length h) s)])"] Suc.hyps(2) assms
by (auto intro!: integral_mono pmf_bounded_integrable bounded_plus_comp)
also have "… = r (s, p (length h) s) + measure_pmf.expectation (K (s, p (length h) s)) (νN_eval (mk_markovian_det p) (h@[(s, p (length h) s)])) + (N - length h) * eps"
using Suc
by (subst Bochner_Integration.integral_add) (auto simp: of_nat_diff left_diff_distrib distrib_right intro!: pmf_bounded_integrable)
also have "… = νN_eval (mk_markovian_det p) h s + (N - length h) * eps"
using Suc
by (auto simp add: νN_eval.simps mk_markovian_det_def)
finally show ?case.
qed
from this[of "[]"] show "νN (mk_markovian_det p) s + N * eps ≥ νN_opt s" for s
using νN_eval_correct νN_opt_eqn_correct
by auto
qed
lemma νN_has_eps_opt_pol:
assumes "eps > 0"
shows "∃p ∈ Π⇩M⇩D. ∀s. νN (mk_markovian_det p) s + eps ≥ νN_opt s"
proof -
obtain p where "p∈Π⇩M⇩D" and
P: "⋀h s. length h ≤ N ⟹ νN_opt_eqn h s ≤ νN_eval (mk_markovian_det p) h s + eps"
using νN_opt_le'[of eps] assms by auto
from P[of "[]"] have "νN_opt_eqn [] s ≤ νN_eval (mk_markovian_det p) [] s + eps" for s
by auto
thus ?thesis
unfolding νN_opt_eqn_correct
using νN_eval_correct ‹p ∈ Π⇩H⇩D› by auto
qed
lemma νN_le_opt: "p ∈ Π⇩H⇩R ⟹ νN p s ≤ νN_opt s"
by (metis νN_eval_correct νN_eval_le_opt_eqn νN_opt_eqn_correct)
lemma νN_has_opt_pol:
assumes "⋀h s.
length h < N
⟹ ∃a ∈ A s. r (s, a) + measure_pmf.expectation (K (s,a)) (νN_opt_eqn (h@[(s,a)]))
= (⨆a ∈ A s. r (s, a) + measure_pmf.expectation (K (s,a)) (νN_opt_eqn (h@[(s,a)])))"
shows "∃p ∈ Π⇩M⇩D. ∀s. νN (mk_markovian_det p) s = νN_opt s"
proof -
define p where "p = (λn s. if n ≥ N then SOME a. a ∈ A s else
SOME a. a ∈ A s ∧
r (s, a) + measure_pmf.expectation (K (s, a)) (νN_opt_eqn (replicate n (s,SOME a. a ∈ A s)@[(s,a)])) = νN_opt_eqn (replicate n (s,SOME a. a ∈ A s)) s
)"
have p_short: "p n s = (
SOME a. a ∈ A s ∧
r (s, a) + measure_pmf.expectation (K (s, a)) (νN_opt_eqn (replicate n (s,SOME a. a ∈ A s)@[(s,a)])) = νN_opt_eqn (replicate n (s,SOME a. a ∈ A s)) s)"
if "n < N" for n s
unfolding p_def using that by auto
have *: "p n s ∈ A s"
"(n < N ⟹ r (s, p n s) + measure_pmf.expectation (K (s,p n s)) (νN_opt_eqn ((replicate n (s,SOME a. a ∈ A s))@[(s,p n s)]))
= (⨆a ∈ A s. r (s, a) + measure_pmf.expectation (K (s,a)) (νN_opt_eqn ((replicate n (s,SOME a. a ∈ A s))@[(s,a)]))))" for n s
using someI_ex[OF assms[unfolded Bex_def]] SOME_is_dec_det
by (auto simp: νN_opt_eqn.simps is_dec_det_def p_def)
have "νN (mk_markovian_det p) s ≥ νN_opt s" for s
proof (intro thm_4_3_4(2)[of 0 p, simplified])
show "∀n. is_dec_det (p n)"
using *
by (auto simp: is_dec_det_def)
next
{
fix h :: "('s × 'a) list" and s
assume "length h < N"
have "(⨆a∈A s. r (s, a) + measure_pmf.expectation (K (s, a)) (νN_opt_eqn (h @ [(s, a)]))) =
(⨆a ∈ A s. r (s, a) + measure_pmf.expectation (K (s,a)) (νN_opt_eqn ((replicate (length h) (s,SOME a. a ∈ A s))@[(s,a)])))"
using ‹length h < N›
by (auto intro!: SUP_cong Bochner_Integration.integral_cong νN_opt_eqn_markov[THEN cong])
also have "… = r (s, p (length h) s) + measure_pmf.expectation (K (s,p (length h) s)) (νN_opt_eqn ((replicate (length h) (s,SOME a. a ∈ A s))@[(s,p (length h) s)]))"
using * ‹length h < N› by presburger
also have "… = r (s, p (length h) s) + measure_pmf.expectation (K (s,p (length h) s)) (νN_opt_eqn (h@[(s,p (length h) s)]))"
using ‹length h < N›
by (auto intro!: Bochner_Integration.integral_cong νN_opt_eqn_markov[THEN cong])
finally show "(⨆a∈A s. r (s, a) + measure_pmf.expectation (K (s, a)) (νN_opt_eqn (h @ [(s, a)])))
≤ r (s, p (length h) s) + measure_pmf.expectation (K (s, p (length h) s)) (νN_opt_eqn (h @ [(s, p (length h) s)]))"
by auto
}
qed
hence "νN (mk_markovian_det p) s = νN_opt s" for s
using νN_le_opt *(1) mk_markovian_det_preserves
by (simp add: is_dec_det_def order_antisym)
thus ?thesis
using *(1)
by (auto simp: is_dec_det_def)
qed
lemma ex_Max: "finite X ⟹ X ≠ {} ⟹ ∃x ∈ X. f x = Max (f ` X)"
by (metis (mono_tags, opaque_lifting) Max_in empty_is_image finite_imageI imageE)
lemma fin_A_imp_opt_pol:
assumes "⋀s. finite (A s)"
shows "∃p∈Π⇩M⇩D. ∀s. νN (mk_markovian_det p) s = νN_opt s"
using A_ne assms νN_has_opt_pol
by (fastforce simp: cSup_eq_Max intro!: ex_Max)
section ‹Backward Induction›
function bw_ind_aux where
"bw_ind_aux n s = (
if n = N then r_fin s else
if n > N then 0 else
⨆a ∈ A s. (r (s,a) +
measure_pmf.expectation (K (s,a)) (λs'. bw_ind_aux (Suc n) s'))) "
by auto
termination
by (relation "Wellfounded.measure (λ(h,s). N - h)") auto
lemmas bw_ind_aux.simps[simp del]
lemma bw_ind_aux_eq: "bw_ind_aux (length h) s = νN_opt_eqn h s"
by (induction h s rule: νN_opt_eqn.induct)
(auto simp: bw_ind_aux.simps νN_opt_eqn.simps split: if_splits intro!: Bochner_Integration.integral_cong SUP_cong)
fun bw_ind_aux' where
"bw_ind_aux' (Suc n) m = (
let m' = (λi s.
if i = n
then (⨆a ∈ A s. (r (s,a) + measure_pmf.expectation (K (s,a)) (m (Suc n))))
else m i s) in
bw_ind_aux' n m')" |
"bw_ind_aux' 0 m = m"
definition "bw_ind = bw_ind_aux' N (λi s. if i = N then r_fin s else 0)"
lemma bw_ind_aux'_const[simp]:
assumes "i ≥ n"
shows "bw_ind_aux' n m i = m i"
using assms
proof (induction n arbitrary: m i)
case 0
then show ?case by (auto simp: bw_ind_aux'.simps)
next
case (Suc n)
then show ?case
by auto
qed
lemma bw_ind_aux'_indep:
assumes "i < n" and
"⋀j. j > i ⟹ m j = m' j"
shows "bw_ind_aux' n m i s = bw_ind_aux' n m' i s"
using assms
proof (induction n arbitrary: m i m')
case 0
then show ?case
by fastforce
next
case (Suc n)
show ?case
proof (cases "i < n")
case True
then show ?thesis
by (auto intro!: Suc(1) ext simp: Suc(2,3))
next
case False
then show ?thesis
using Suc.prems(1) less_Suc_eq
by (auto simp: Suc)
qed
qed
lemma bw_ind_aux'_simps': "i < n ⟹ bw_ind_aux' n m i s = (⨆a ∈ A s. (r (s,a) + measure_pmf.expectation (K (s,a)) (bw_ind_aux' n m (Suc i))))"
proof (induction n arbitrary: m i s)
case 0
then show ?case by auto
next
case (Suc n)
have "bw_ind_aux' (Suc n) m i s = bw_ind_aux' n (λi s. if i = n then ⨆a∈A s. r (s, a) + measure_pmf.expectation (K (s, a)) (m (Suc n)) else m i s) i s"
by auto
also have "… = (⨆a∈A s. r (s, a) + measure_pmf.expectation (K (s, a)) ((bw_ind_aux' (Suc n) m (Suc i))))"
using Suc.prems le_less_Suc_eq
by (cases "n ≤ i") (auto simp: Suc.IH bw_ind_aux'_const)
finally show ?case.
qed
lemma bw_ind_correct: "n ≤ N ⟹ bw_ind n = bw_ind_aux n"
unfolding bw_ind_def
proof (induction "N - n" arbitrary: n)
case 0
show ?case
using 0
by (subst bw_ind_aux.simps) (auto)
next
case (Suc x)
thus ?case
by (auto simp: bw_ind_aux'_simps' bw_ind_aux.simps intro!: ext)
qed
definition "bw_ind_pol_gen (d :: 'a set ⇒ 'a) n s = (
if n ≥ N then d (A s)
else
d ({a . is_arg_max (λa. r (s, a) + measure_pmf.expectation (K (s, a)) (bw_ind_aux (Suc n))) (λa. a ∈ A s) a}))"
lemma bw_ind_pol_is_arg_max:
assumes "⋀X. X ≠ {} ⟹ d X ∈ X" "⋀s. finite (A s)"
shows "is_arg_max (λa. r (s, a) + measure_pmf.expectation (K (s, a)) (bw_ind_aux (Suc n))) (λa. a ∈ A s) (d ({a . is_arg_max (λa. r (s, a) + measure_pmf.expectation (K (s, a)) (bw_ind_aux (Suc n))) (λa. a ∈ A s) a}))"
proof -
let ?s = "{a. is_arg_max (λa. r (s, a) + measure_pmf.expectation (K (s, a)) (bw_ind_aux (Suc n))) (λa. a ∈ A s) a}"
have ‹d ?s ∈ ?s›
using assms(1)[of " {a. is_arg_max (λa. r (s, a) + measure_pmf.expectation (K (s, a)) (bw_ind_aux (Suc n))) (λa. a ∈ A s) a}"]
using finite_is_arg_max A_ne assms
by (auto simp add: finite_is_arg_max)
thus ?thesis
by auto
qed
lemma bw_ind_pol_gen:
assumes "⋀X. X ≠ {} ⟹ d X ∈ X" "⋀s. finite (A s)"
shows "bw_ind_pol_gen d ∈ Π⇩M⇩D"
proof -
have ***:"X ≠ {} ⟹ X ⊆ Y ⟹ d X ∈ Y" for X Y
using assms
by auto
have "∃a. is_arg_max (λa. r (s, a) + measure_pmf.expectation (K (s, a)) (bw_ind_aux (Suc n))) (λa. a ∈ A s) a" for n s
using finite_is_arg_max[OF assms(2) A_ne]
by auto
thus ?thesis
unfolding bw_ind_pol_gen_def is_dec_det_def
by (force intro!: ***)
qed
lemma
assumes "⋀X. X ≠ {} ⟹ d X ∈ X" "⋀s. finite (A s)" "length h ≤ N"
shows "νN_eval (mk_markovian_det (bw_ind_pol_gen d)) h s = νN_eval_opt h s"
proof -
have "(⋀h s. length h < N ⟹
(⨆a∈A s. r (s, a) + measure_pmf.expectation (K (s, a)) (νN_opt_eqn (h @ [(s, a)])))
≤ r (s, bw_ind_pol_gen d (length h) s) +
measure_pmf.expectation (K (s, bw_ind_pol_gen d (length h) s))
(νN_opt_eqn (h @ [(s, bw_ind_pol_gen d (length h) s)])))"
using A_ne bw_ind_pol_is_arg_max[OF assms(1,2)]
unfolding bw_ind_aux_eq[symmetric]
by (auto intro!: cSUP_least simp: bw_ind_pol_gen_def)
hence "length h ≤ N ⟹ νN_opt_eqn h s ≤ νN_eval (mk_markovian_det (bw_ind_pol_gen d)) h s" for h s
using assms bw_ind_pol_gen thm_4_3_4[of 0 "bw_ind_pol_gen d", simplified]
by auto
thus ?thesis
using νN_opt_eq νN_eval_le_opt assms bw_ind_pol_gen mk_markovian_det_preserves
by (auto intro!: antisym)
qed
lemma bw_ind_aux'_eq: "n ≤ N ⟹ bw_ind_aux' N (λi s. if i = N then r_fin s else 0) n = bw_ind_aux n"
using bw_ind_def bw_ind_correct by presburger
end
end