Theory MDP_fin
theory MDP_fin
imports
"MDP-Rewards.MDP_reward"
begin
locale MDP_on = MDP_act_disc arb_act A K r l
for
A and
K :: "'s ::countable × 'a ::countable ⇒ 's pmf" and r l arb_act +
fixes S :: "'s set"
assumes
fin_states: "finite S" and
fin_actions: "⋀s. finite (A s)" and
K_closed: "set_pmf (K (s,a)) ⊆ S"
begin
lemma ℒ⇩b_indep:
assumes "⋀s. s ∈ S ⟹ apply_bfun v s = apply_bfun v' s"
and "s ∈ S"
shows "ℒ⇩b v s = ℒ⇩b v' s"
proof -
have "measure_pmf.expectation (K (s, a)) (apply_bfun v) = measure_pmf.expectation (K (s, a)) (apply_bfun v')" for a
using assms K_closed subsetD
by (auto intro!: AE_pmfI Bochner_Integration.integral_cong_AE)
thus ?thesis
unfolding ℒ⇩b_eq_SUP_L⇩a L⇩a_int by auto
qed
end
locale MDP_nat_type = MDP_act A K
for A :: "nat ⇒ nat set" and K +
assumes A_fin : "⋀s. finite (A s)"
locale MDP_nat = MDP_nat_type +
fixes states :: nat
assumes K_closed: "∀s < states. set_pmf (K (s,a)) ⊆ {0..<states}"
assumes K_closed_compl: "∀s≥ states. set_pmf (K (s,a)) ⊆ {states..}"
assumes A_outside: "⋀s. s ≥ states ⟹ A s = {0}"
locale MDP_nat_disc = MDP_nat arb_act A K states + MDP_act_disc arb_act A K r l
for A K r l arb_act states +
assumes reward_zero_outside: "∀s≥states. r (s,a) = 0"
begin
lemma ℒ⇩b_eq_L⇩a_max': "ℒ⇩b v s = (MAX a ∈ A s. L⇩a a v s)"
unfolding ℒ⇩b_eq_L⇩a_max
using finite_arg_max_eq_Max[of "A s" "λa. L⇩a a v s"] A_ne A_fin
by auto
abbreviation "state_space ≡ {0..<states}"
lemma set_pmf_Xn': "s ∉ state_space ⟹ set_pmf (Xn' p s i) ⊆ {states..}"
using K_closed_compl
by (induction i arbitrary: p s) (auto dest!: subsetD simp: Suc_Xn' linorder_not_less)
lemma set_pmf_Pn': "s ∉ state_space ⟹ (∀sa ∈ set_pmf (Pn' p s i). fst sa∉ state_space)"
using set_pmf_Xn'[unfolded Xn'_Pn'] by fastforce
lemma reward_Pn'_notin: "s ∉ state_space ⟹ (∀sa ∈ set_pmf (Pn' p s i). r sa = 0)"
using set_pmf_Pn' reward_zero_outside by (fastforce simp: linorder_not_less)
lemma ν_zero_notin:
assumes "s ∉ state_space"
shows "ν p s = 0"
proof -
have "ν_fin p n s = 0" for n
using assms reward_Pn'_notin
by (auto simp: ν_fin_eq_Pn intro!: sum.neutral integral_eq_zero_AE AE_pmfI)
thus ?thesis
unfolding ν_def by auto
qed
lemma ν_opt_zero_notin:
assumes "s ∉ state_space"
shows "ν_opt s = 0"
unfolding ν_opt_def using assms ν_zero_notin policies_ne by auto
end
end