Theory Policy_Iteration_Fin
theory Policy_Iteration_Fin
imports
Policy_Iteration
MDP_fin
Blinfun_To_Matrix
begin
context MDP_nat_disc begin
lemma finite_D⇩D[simp]: "finite D⇩D"
proof -
let ?set = "{d. ∀x :: nat. (x ∈ {0..<states} ⟶ d x ∈ (⋃s∈{0..<states}. A s)) ∧ (x ∉ {0..<states} ⟶ d x = 0)}"
have "finite (⋃s<states. A s)"
using A_fin by auto
hence "finite ?set"
by (intro finite_set_of_finite_funs) auto
moreover have "D⇩D ⊆ ?set"
unfolding is_dec_det_def
using A_outside
by (auto simp: not_less)
ultimately show ?thesis
using finite_subset
by auto
qed
lemma finite_rel: "finite {(u, v). is_dec_det u ∧ is_dec_det v ∧ ν⇩b (mk_stationary_det u) > ν⇩b (mk_stationary_det v)}"
proof-
have aux: "finite {(u, v). is_dec_det u ∧ is_dec_det v}"
by auto
show ?thesis
by (auto intro: finite_subset[OF _ aux])
qed
lemma eval_eq_imp_policy_eq:
assumes "policy_eval d = policy_eval (policy_step d)" "is_dec_det d"
shows "d = policy_step d"
proof -
have "policy_eval d s = policy_eval (policy_step d) s" for s
using assms
by auto
have "policy_eval d = L (mk_dec_det d) (policy_eval (policy_step d))"
unfolding policy_eval_def
using L_ν_fix
by (auto simp: assms(1)[symmetric, unfolded policy_eval_def])
hence "policy_eval d = ℒ⇩b (policy_eval d)"
by (metis L_ν_fix policy_eval_def assms eval_policy_step_L)
hence "L (mk_dec_det d) (policy_eval d) s = ℒ⇩b (policy_eval d) s" for s
using ‹policy_eval d = L (mk_dec_det d) (policy_eval (policy_step d))› assms(1) by auto
hence "is_arg_max (λa. L⇩a a (ν⇩b (mk_stationary (mk_dec_det d))) s) (λa. a ∈ A s) (d s)" for s
unfolding L_eq_L⇩a_det
unfolding policy_eval_def ℒ⇩b.rep_eq ℒ_eq_SUP_det SUP_step_det_eq
using assms(2) is_dec_det_def L⇩a_le
by (auto simp del: ν⇩b.rep_eq simp: ν⇩b.rep_eq[symmetric]
intro!: SUP_is_arg_max boundedI[of _ "r⇩M + l * norm _"] bounded_imp_bdd_above)
thus ?thesis
unfolding policy_eval_def policy_step_def policy_improvement_def
by auto
qed
termination policy_iteration
proof (relation "{(u, v). u ∈ D⇩D ∧ v ∈ D⇩D ∧ ν⇩b (mk_stationary_det u) > ν⇩b (mk_stationary_det v)}")
show "wf {(u, v). u ∈ D⇩D ∧ v ∈ D⇩D ∧ ν⇩b (mk_stationary_det v) < ν⇩b (mk_stationary_det u)}"
using finite_rel
by (auto intro!: finite_acyclic_wf acyclicI_order)
next
fix d x
assume h: "x = policy_step d" "¬ (d = x ∨ ¬ is_dec_det d)"
have "is_dec_det d ⟹ ν⇩b (mk_stationary_det d) ≤ ν⇩b (mk_stationary_det (policy_step d))"
using policy_eval_mon
by (simp add: policy_eval_def)
hence "is_dec_det d ⟹ d ≠ policy_step d ⟹
ν⇩b (mk_stationary_det d) < ν⇩b (mk_stationary_det (policy_step d))"
using eval_eq_imp_policy_eq policy_eval_def
by (force intro!: order.not_eq_order_implies_strict)
thus "(x, d) ∈ {(u, v). u ∈ D⇩D ∧ v ∈ D⇩D ∧ ν⇩b (mk_stationary_det v) < ν⇩b (mk_stationary_det u)}"
using is_dec_det_pi policy_step_def h
by auto
qed
lemma is_dec_det_pi': "d ∈ D⇩D ⟹ is_dec_det (policy_iteration d)"
using is_dec_det_pi
by (induction d rule: policy_iteration.induct) (auto simp: Let_def policy_step_def)
lemma pi_pi[simp]: "d ∈ D⇩D ⟹ policy_step (policy_iteration d) = policy_iteration d"
using is_dec_det_pi
by (induction d rule: policy_iteration.induct) (auto simp: policy_step_def Let_def)
lemma policy_iteration_correct:
"d ∈ D⇩D ⟹ ν⇩b (mk_stationary_det (policy_iteration d)) = ν⇩b_opt"
by (induction d rule: policy_iteration.induct)
(fastforce intro!: policy_step_eq_imp_opt is_dec_det_pi' simp del: policy_iteration.simps)
lemma ν⇩b_zero_notin: "s ≥ states ⟹ ν⇩b p s = 0"
using ν_zero_notin unfolding ν⇩b.rep_eq by auto
lemma r_dec⇩b_zero_notin: "s ≥ states ⟹ r_dec⇩b d s = 0"
using reward_zero_outside
by auto
lemma ν⇩b_eq_inv: "ν⇩b (mk_stationary d) = inv⇩L (id_blinfun - l *⇩R 𝒫⇩1 d) (r_dec⇩b d)"
using ν_stationary_inv.
lemma ν⇩b_eq_bfun_if: "ν⇩b (mk_stationary d) = bfun_if (λi. i < states) (ν⇩b (mk_stationary d)) 0"
using ν⇩b_zero_notin by (auto simp: bfun_if.rep_eq)
lemma ν⇩b_vec_aux: "((1⇩m states) - l ⋅⇩m (blinfun_to_mat states states (𝒫⇩1 d))) *⇩v bfun_to_vec states (ν⇩b (mk_stationary d)) = bfun_to_vec states (r_dec⇩b d)"
proof -
let ?to_mat = "blinfun_to_mat states states"
let ?to_vec = "bfun_to_vec states"
have "((1⇩m states) - l ⋅⇩m (?to_mat (𝒫⇩1 d))) *⇩v ?to_vec (ν⇩b (mk_stationary d)) =
((1⇩m states) - ?to_mat (l *⇩R (𝒫⇩1 d))) *⇩v ?to_vec (ν⇩b (mk_stationary d))"
using blinfun_to_mat_scale by fastforce
also have "… = (?to_mat id_blinfun - ?to_mat (l *⇩R (𝒫⇩1 d))) *⇩v ?to_vec (ν⇩b (mk_stationary d))"
using blinfun_to_mat_id by presburger
also have "… = ?to_mat (id_blinfun - l *⇩R 𝒫⇩1 d) *⇩v ?to_vec (ν⇩b (mk_stationary d))"
using blinfun_to_mat_sub by presburger
also have "… = ?to_vec ((id_blinfun - l *⇩R 𝒫⇩1 d) ((ν⇩b (mk_stationary d))))"
unfolding blinfun_to_mat_mult using ν⇩b_eq_bfun_if by auto
also have "… = ?to_vec (r_dec⇩b d)"
by (metis L_ν_fix_iff L_def blinfun.diff_left blinfun.scaleR_left diff_eq_eq id_blinfun.rep_eq)
finally show ?thesis.
qed
lemma summable_geom_𝒫⇩1: "summable (λk. ((l *⇩R 𝒫⇩1 d)^^k))"
using summable_inv_Q norm_𝒫⇩1
by (metis add_diff_cancel_left' diff_add_cancel norm_𝒫⇩1_l_less)
lemma summable_geom_𝒫⇩1': "summable (λk. ((l *⇩R 𝒫⇩1 d)^^k) v)" for v
using summable_geom_𝒫⇩1 tendsto_blinfun_apply
unfolding summable_def sums_def
by (fastforce simp: blinfun.sum_left)
lemma summable_geom_𝒫⇩1'': "summable (λk. ((l *⇩R 𝒫⇩1 d)^^k) v s)" for v s
using summable_geom_𝒫⇩1' bfun_tendsto_apply_bfun
unfolding summable_def sums_def
by (fastforce simp: sum_apply_bfun)
lemma K_closed': "s<states ⟹ j ∈ set_pmf (K (s, a)) ⟹ j < states"
by (meson K_closed atLeastLessThan_iff basic_trans_rules(31))
lemma 𝒫⇩1_indep:
assumes "(⋀i. i < states ⟹ apply_bfun v i = apply_bfun v' i)" "j < states"
shows "(l *⇩R 𝒫⇩1 d) v j = (l *⇩R 𝒫⇩1 d) v' j"
using assms K_closed'[OF assms(2)]
by (auto simp: blinfun.scaleR_left 𝒫⇩1.rep_eq K_st_def intro!: integral_cong_AE AE_pmfI)
lemma inv⇩L_indep:
assumes "⋀i. i < states ⟹ apply_bfun v i = apply_bfun v' i" "j < states"
shows "((inv⇩L (id_blinfun - l *⇩R 𝒫⇩1 d)) v) j = ((inv⇩L (id_blinfun - l *⇩R 𝒫⇩1 d)) v') j"
proof -
have "((l *⇩R 𝒫⇩1 d) ^^ n) v j = ((l *⇩R 𝒫⇩1 d) ^^ n) v' j" for n
using assms 𝒫⇩1_indep by (induction n arbitrary: j v v') fastforce+
thus ?thesis
using summable_geom_𝒫⇩1 summable_geom_𝒫⇩1'
by (auto simp: inv⇩L_inf_sum blinfun_apply_suminf[symmetric] suminf_apply_bfun)
qed
lemma vec_ν⇩b: "bfun_to_vec states (ν⇩b (mk_stationary d)) =
inverse_mat ((1⇩m states) - l ⋅⇩m (blinfun_to_mat states states (𝒫⇩1 d))) *⇩v (bfun_to_vec states (r_dec⇩b d))"
proof -
have "bfun_to_vec states (ν⇩b (mk_stationary d)) = bfun_to_vec states (inv⇩L (id_blinfun - l *⇩R 𝒫⇩1 d) (r_dec⇩b d))"
using ν⇩b_eq_inv by force
also have "… = bfun_to_vec states (inv⇩L (id_blinfun - l *⇩R 𝒫⇩1 d) (bfun_if (λi. i < states) (r_dec⇩b d) 0))"
using r_dec⇩b_zero_notin
by (subst bfun_if_eq) auto
also have "… = blinfun_to_mat states states (inv⇩L (id_blinfun - l *⇩R 𝒫⇩1 d)) *⇩v (bfun_to_vec states (r_dec⇩b d))"
using blinfun_to_mat_mult..
also have "… = inverse_mat (blinfun_to_mat states states (id_blinfun - l *⇩R 𝒫⇩1 d)) *⇩v (bfun_to_vec states (r_dec⇩b d))"
using inv⇩L_indep 𝒫⇩1_indep
by (fastforce simp add: inverse_blinfun_to_mat invertible⇩L_inf_sum blinfun.diff_left)+
finally show ?thesis
using blinfun_to_mat_id blinfun_to_mat_scale blinfun_to_mat_sub by presburger
qed
lemma invertible_ν⇩b_mat: "invertible_mat ((1⇩m states) - l ⋅⇩m (blinfun_to_mat states states (𝒫⇩1 d)))"
proof -
have "invertible_mat (blinfun_to_mat states states ((id_blinfun - l *⇩R 𝒫⇩1 d)))"
using 𝒫⇩1_indep inv⇩L_indep
by (fastforce simp: invertible⇩L_inf_sum blinfun.diff_left intro!: inverse_blinfun_to_mat(2))+
thus ?thesis
by (auto simp: blinfun_to_mat_id blinfun_to_mat_sub blinfun_to_mat_scale)
qed
lemma mat_cong:
assumes "(⋀i j. i < n ⟹ j < m ⟹ f i j = g i j)"
shows "Matrix.mat n m (λ(i, j). f i j) = Matrix.mat n m (λ(i,j). g i j)"
using assms by auto
lemma 𝒫⇩1_mat: "(Matrix.mat states states (λ(s, s'). pmf (K (s, d s)) s')) = blinfun_to_mat states states (𝒫⇩1 (mk_dec_det d))"
proof -
have "pmf (K (s, d s)) s' = measure_pmf.expectation (K (s, d s)) (λk. if s' = k then 1 else 0)"
if "s < states" "s' < states" for s s'
by (auto simp: integral_measure_pmf_real[of "{s'}"] split: if_splits)
thus ?thesis
by (auto simp: blinfun_to_mat_def 𝒫⇩1.rep_eq K_st_def mk_dec_det_def bind_return_pmf)
qed
lemma vec_ν⇩b': "bfun_to_vec states (ν⇩b (mk_stationary_det d)) =
inverse_mat ((1⇩m states) - l ⋅⇩m (Matrix.mat states states (λ(s, s'). pmf (K (s, d s)) s'))) *⇩v
(vec states (λi. r (i, d i)))"
unfolding vec_ν⇩b using 𝒫⇩1_mat by (auto simp: bfun_to_vec_def)
lemma vec_ν⇩b'':
assumes "s < states"
shows "(ν⇩b (mk_stationary_det d)) s =
(inverse_mat ((1⇩m states) - l ⋅⇩m (Matrix.mat states states (λ(s, s'). pmf (K (s, d s)) s'))) *⇩v
(vec states (λi. r (i, d i)))) $ s"
using vec_ν⇩b' assms unfolding bfun_to_vec_def by (metis index_vec)
lemma invertible_ν⇩b_mat':
"invertible_mat (1⇩m states - l ⋅⇩m Matrix.mat states states (λ(s, y). pmf (K (s, d s)) y))"
using invertible_ν⇩b_mat 𝒫⇩1_mat by presburger
lemma dim_vec_ν⇩b: "dim_vec (inverse_mat ((1⇩m states) -
l ⋅⇩m (Matrix.mat states states (λ(s, s'). pmf (K (s, d s)) s'))) *⇩v
(vec states (λi. r (i, d i)))) = states"
by (simp add: inverse_mat_dims(2) invertible_ν⇩b_mat')
end
end