Theory MDP_fin

theory MDP_fin
  imports 
  "MDP-Rewards.MDP_reward"
begin

locale MDP_on = MDP_act_disc arb_act A K r l 
  for
    A and
    K :: "'s ::countable × 'a ::countable  's pmf" and r l arb_act +
  fixes S :: "'s set"
  assumes 
    fin_states: "finite S" and 
    fin_actions: "s. finite (A s)" and
    K_closed: "set_pmf (K (s,a))  S"
begin

lemma b_indep:
  assumes "s. s  S  apply_bfun v s = apply_bfun v' s"
    and "s  S"
  shows "b v s = b v' s"
proof -
  have "measure_pmf.expectation (K (s, a)) (apply_bfun v) = measure_pmf.expectation (K (s, a)) (apply_bfun v')" for a
    using assms K_closed subsetD
    by (auto intro!: AE_pmfI Bochner_Integration.integral_cong_AE)
  thus ?thesis
    unfolding b_eq_SUP_La La_int by auto
qed

end

locale MDP_nat_type = MDP_act A K
  for A :: "nat  nat set" and K +
  assumes A_fin : "s. finite (A s)"

locale MDP_nat = MDP_nat_type +
  fixes states :: nat
  assumes K_closed: "s < states. set_pmf (K (s,a))  {0..<states}"
  assumes K_closed_compl: "s states. set_pmf (K (s,a))  {states..}"
  assumes A_outside: "s. s  states  A s = {0}"


locale MDP_nat_disc = MDP_nat arb_act A K states + MDP_act_disc arb_act A K r l 
  for A K r l arb_act states +
  assumes reward_zero_outside: "sstates. r (s,a) = 0"
begin
lemma b_eq_La_max': "b v s = (MAX a  A s. La a v s)"
  unfolding b_eq_La_max
  using finite_arg_max_eq_Max[of "A s" "λa. La a v s"] A_ne A_fin
  by auto

abbreviation "state_space  {0..<states}"

lemma set_pmf_Xn': "s  state_space  set_pmf (Xn' p s i)  {states..}"
  using K_closed_compl 
  by (induction i arbitrary: p s) (auto dest!: subsetD simp: Suc_Xn' linorder_not_less)

lemma set_pmf_Pn': "s  state_space  (sa  set_pmf (Pn' p s i). fst sa state_space)"
  using set_pmf_Xn'[unfolded Xn'_Pn'] by fastforce

lemma reward_Pn'_notin: "s  state_space  (sa  set_pmf (Pn' p s i). r sa = 0)"
  using set_pmf_Pn' reward_zero_outside by (fastforce simp: linorder_not_less)

lemma ν_zero_notin:
  assumes "s  state_space" 
  shows "ν p s = 0"
proof -
  have "ν_fin p n s = 0" for n
    using assms reward_Pn'_notin
    by (auto simp: ν_fin_eq_Pn intro!: sum.neutral integral_eq_zero_AE AE_pmfI)
  thus ?thesis 
    unfolding ν_def by auto
qed

lemma ν_opt_zero_notin:
  assumes "s  state_space" 
  shows "ν_opt s = 0"
  unfolding ν_opt_def using assms ν_zero_notin policies_ne by auto

end

end