Theory MDP_cont
section ‹Discrete-Time Markov Decision Processes with Arbitrary State Spaces›
text ‹
In this file we construct discrete-time Markov decision processes,
e.g. with arbitrary state spaces.
Proofs and definitions are adapted from Markov\_Models.Discrete\_Time\_Markov\_Process.
›
theory MDP_cont
imports "HOL-Probability.Probability"
begin
lemma Ionescu_Tulcea_C_eq:
assumes "⋀i h. h ∈ space (PiM {0..<i} N) ⟹ P i h = P' i h"
assumes h: "Ionescu_Tulcea P N" "Ionescu_Tulcea P' N"
shows "Ionescu_Tulcea.C P N 0 n (λx. undefined) = Ionescu_Tulcea.C P' N 0 n (λx. undefined)"
proof (induction n)
case 0
then show ?case using h by (auto simp: Ionescu_Tulcea.C_def)
next
case (Suc n)
have aux: "space (PiM {0..<0 + n} N) = space (rec_nat (λn. return (Pi⇩M {0..<n} N))
(λm ma n ω. ma n ω ⤜ Ionescu_Tulcea.eP P' N (n + m)) n 0 (λx. undefined))"
using h
by (subst Ionescu_Tulcea.space_C[symmetric, where P = P', where x = "(λx. undefined)"])
(auto simp add: Ionescu_Tulcea.C_def)
have "⋀i h. h ∈ space (PiM {0..<i} N) ⟹ Ionescu_Tulcea.eP P N i h = Ionescu_Tulcea.eP P' N i h"
by (auto simp add: Ionescu_Tulcea.eP_def assms)
then show ?case
using Suc.IH h
using aux[symmetric]
by (auto intro!: bind_cong simp: Ionescu_Tulcea.C_def)
qed
lemma Ionescu_Tulcea_CI_eq:
assumes "⋀i h. h ∈ space (PiM {0..<i} N) ⟹ P i h = P' i h"
assumes h: "Ionescu_Tulcea P N" "Ionescu_Tulcea P' N"
shows "Ionescu_Tulcea.CI P N = Ionescu_Tulcea.CI P' N"
proof -
have "⋀J. Ionescu_Tulcea.CI P N J = Ionescu_Tulcea.CI P' N J"
unfolding Ionescu_Tulcea.CI_def[OF h(1)] Ionescu_Tulcea.CI_def[OF h(2)]
using assms
by (auto intro!: distr_cong Ionescu_Tulcea_C_eq)
thus ?thesis by auto
qed
lemma measure_eqI_PiM_sequence:
fixes M :: "nat ⇒ 'a measure"
assumes *[simp]: "sets P = PiM UNIV M" "sets Q = PiM UNIV M"
assumes eq: "⋀A n. (⋀i. A i ∈ sets (M i)) ⟹
P (prod_emb UNIV M {..n} (Pi⇩E {..n} A)) = Q (prod_emb UNIV M {..n} (Pi⇩E {..n} A))"
assumes A: "finite_measure P"
shows "P = Q"
proof (rule measure_eqI_PiM_infinite[OF * _ A])
fix J :: "nat set" and F'
assume J: "finite J" "⋀i. i ∈ J ⟹ F' i ∈ sets (M i)"
define n where "n = (if J = {} then 0 else Max J)"
define F where "F i = (if i ∈ J then F' i else space (M i))" for i
then have F[simp, measurable]: "F i ∈ sets (M i)" for i
using J by auto
have emb_eq: "prod_emb UNIV M J (Pi⇩E J F') = prod_emb UNIV M {..n} (Pi⇩E {..n} F)"
proof cases
assume "J = {}" then show ?thesis
by (auto simp add: n_def F_def[abs_def] prod_emb_def PiE_def)
next
assume "J ≠ {}" then show ?thesis
by (auto simp: prod_emb_def PiE_iff F_def n_def less_Suc_eq_le ‹finite J› split: if_split_asm)
qed
show "emeasure P (prod_emb UNIV M J (Pi⇩E J F')) = emeasure Q (prod_emb UNIV M J (Pi⇩E J F'))"
unfolding emb_eq by (rule eq) fact
qed
lemma distr_cong_simp:
"M = K ⟹ sets N = sets L ⟹ (⋀x. x ∈ space M =simp=> f x = g x) ⟹ distr M N f = distr K L g"
unfolding simp_implies_def by (rule distr_cong)
subsection ‹Definition and Basic Properties›
locale discrete_MDP =
fixes Ms :: "'s measure"
and Ma :: "'a measure"
and A :: "'s ⇒ 'a set"
and K :: "'s × 'a ⇒ 's measure"
assumes A_s: "⋀s. A s ∈ sets Ma"
assumes A_ne: "⋀s. A s ≠ {}"
assumes ex_pol: "∃δ ∈ Ms →⇩M Ma. ∀s. δ s ∈ A s"
assumes K[measurable]: "K ∈ Ms ⨂⇩M Ma →⇩M prob_algebra Ms"
begin
lemma space_prodI[intro]: "x ∈ space A' ⟹ y ∈ space B ⟹ (x,y) ∈ space (A' ⨂⇩M B)"
by (auto simp add: space_pair_measure)
abbreviation "M ≡ Ms ⨂⇩M Ma"
abbreviation "Ma_A s ≡ restrict_space Ma (A s)"
lemma space_ma[intro]: "s ∈ space Ms ⟹ a ∈ space Ma ⟹ (s,a) ∈ space M"
by (simp add: space_pair_measure)
lemma space_x0[simp]: "x0 ∈ space (prob_algebra Ms) ⟹ space x0 = space Ms"
by (metis (mono_tags, lifting) space_prob_algebra mem_Collect_eq sets_eq_imp_space_eq)
lemma A_subs_Ma: "A s ⊆ space Ma"
by (simp add: A_s sets.sets_into_space)
lemma space_Ma_A_subset: "s ∈ space Ms ⟹ space (Ma_A s) ⊆ A s"
by (simp add: space_restrict_space)
lemma K_restrict [measurable]: "K ∈ (Ms ⨂⇩M Ma_A s) →⇩M prob_algebra Ms"
by measurable (metis measurable_id measurable_pair_iff measurable_restrict_space2_iff)
lemma measurable_K_act[measurable, intro]: "s ∈ space Ms ⟹ (λa. K (s, a)) ∈ Ma →⇩M prob_algebra Ms"
by measurable
lemma measurable_K_st[measurable, intro]: "a ∈ space Ma ⟹ (λs. K (s, a)) ∈ Ms →⇩M prob_algebra Ms"
by measurable
lemma space_K[simp]: "sa ∈ space M ⟹ space (K sa) = space Ms"
using K unfolding prob_algebra_def measurable_restrict_space2_iff
by (auto dest: subprob_measurableD)
lemma space_K2[simp]: "s ∈ space Ms ⟹ a ∈ space Ma ⟹ space (K (s, a)) = space Ms"
by (simp add: space_pair_measure)
lemma space_K_E: "s' ∈ space (K (s,a)) ⟹ s ∈ space Ms ⟹ a ∈ space Ma ⟹ s' ∈ space Ms"
by auto
lemma sets_K: "sa ∈ space M ⟹ sets (K sa) = sets Ms"
using K unfolding prob_algebra_def unfolding measurable_restrict_space2_iff
by (auto dest: subprob_measurableD)
lemma sets_K'[measurable_cong]: "s ∈ space Ms ⟹ a ∈ space Ma ⟹ sets (K (s,a)) = sets Ms"
by (simp add: sets_K space_pair_measure)
lemma prob_space_K[intro]: "sa ∈ space M ⟹ prob_space (K sa)"
using measurable_space[OF K] by (simp add: space_prob_algebra)
lemma prob_space_K2[intro]: "s ∈ space Ms ⟹ a ∈ space Ma ⟹ prob_space (K (s,a))"
using prob_space_K by (simp add: space_pair_measure)
lemma K_in_space [intro]: "m ∈ space M ⟹ K m ∈ space (prob_algebra Ms)"
by (meson K measurable_space)
subsection ‹Policies›
type_synonym ('c, 'd) pol = "nat ⇒ ((nat ⇒ 'c × 'd) × 'c) ⇒ 'd measure"
abbreviation "H i ≡ Pi⇩M {0..<i} (λ_. M)"
abbreviation "Hs i ≡ H i ⨂⇩M Ms"
lemma space_H1: "j < (i :: nat) ⟹ ω ∈ space (H i) ⟹ ω j ∈ space M"
using PiE_def
by (auto simp: space_PiM)
lemma space_case_nat[intro]:
assumes "ω ∈ space (H i)" "s ∈ space Ms"
shows "case_nat s (fst ∘ ω) i ∈ space Ms"
using assms
by (cases i) (auto intro!: space_H1 measurable_space[OF measurable_fst])
lemma undefined_in_H0: "(λ_. undefined) ∈ space (H (0 :: nat))"
by auto
lemma sets_K_Suc[measurable_cong]: "h ∈ space (H (Suc n)) ⟹ sets (K (h n)) = sets Ms"
using sets_K space_H1 by blast
text‹A decision rule is a function from states to distributions over enabled actions.›
definition "is_dec0 d ≡ d ∈ Ms →⇩M prob_algebra Ma "
definition "is_dec (t :: nat) d ≡ (d ∈ Hs t →⇩M prob_algebra Ma) "
lemma "is_dec0 d ⟹ is_dec t (λ(_,s). d s)"
unfolding is_dec0_def is_dec_def by auto
text‹A policy is a function from histories to valid decision rules.›
definition is_policy :: "('s, 'a) pol ⇒ bool" where
"is_policy p ≡ ∀i. is_dec i (p i)"
abbreviation p0 :: "('s, 'a) pol ⇒ 's ⇒ 'a measure" where
"p0 p s ≡ p (0 ::nat) (λ_. undefined, s)"
context
fixes p assumes p[simp]: "is_policy p"
begin
lemma is_policyD[measurable]: "p i ∈ Hs i →⇩M prob_algebra Ma"
using p unfolding is_policy_def is_dec_def by auto
lemma space_policy[simp]: "hs ∈ space (Hs i) ⟹ space (p i hs) = space Ma"
using K is_policyD unfolding prob_algebra_def measurable_restrict_space2_iff
by (auto dest: subprob_measurableD)
lemma space_policy'[simp]: "h ∈ space (H i) ⟹ s ∈ space Ms ⟹ space (p i (h,s)) = space Ma"
using space_policy
by (simp add: space_pair_measure)
lemma space_policyI[intro]:
assumes "s ∈ space Ms" "h ∈ space (H i)" "a ∈ space Ma"
shows "a ∈ space (p i (h,s))"
using space_policy assms
by (auto simp: space_pair_measure)
lemma sets_policy[simp]: "hs ∈ space (Hs i) ⟹ sets (p i hs) = sets Ma"
using p K is_policyD
unfolding prob_algebra_def measurable_restrict_space2_iff
by (auto dest: subprob_measurableD)
lemma sets_policy'[measurable_cong, simp]:
"h ∈ space (H i) ⟹ s ∈ space Ms ⟹ sets (p i (h,s)) = sets Ma"
using sets_policy
by (auto simp: space_pair_measure)
lemma sets_policy''[measurable_cong, simp]:
"h ∈ space ((Pi⇩M {} (λ_. M))) ⟹ s ∈ space Ms ⟹ sets (p 0 (h,s)) = sets Ma"
using sets_policy
by (auto simp: space_pair_measure)
lemma policy_prob_space: "hs ∈ space (Hs i) ⟹ prob_space (p i hs)"
proof -
assume h: "hs ∈ space (Hs i)"
hence "p i hs ∈ space (prob_algebra Ma)"
using p by (auto intro: measurable_space)
thus "prob_space (p i hs)"
unfolding prob_algebra_def by (simp add: space_restrict_space)
qed
lemma policy_prob_space': "h ∈ space (H i) ⟹ s ∈ space Ms ⟹ prob_space (p i (h,s))"
using policy_prob_space by (simp add: space_pair_measure)
lemma prob_space_p0: "x ∈ space Ms ⟹ prob_space (p0 p x)"
by (simp add: policy_prob_space')
lemma p0_sets[measurable_cong]: "x ∈ space Ms ⟹ sets (p 0 (λ_. undefined,x)) = sets Ma"
using subprob_measurableD(2) measurable_prob_algebraD by simp
lemma space_p0[simp]: "s ∈ space Ms ⟹ space (p0 p s) = space Ma"
by auto
lemma return_policy_prob_algebra [measurable]:
"h ∈ space (H n) ⟹ x ∈ space Ms ⟹ (λa. return M (x, a)) ∈ p n (h, x) →⇩M prob_algebra M"
by measurable
end
subsection ‹Successor Policy›
text ‹To shift the policy by one step, we provide a single state-action pair as history›
definition "Suc_policy p sa = (λi (h, s). p (Suc i) (λi'. case_nat sa h i', s))"
lemma p_as_Suc_policy: "p (Suc i) (h, s) = Suc_policy p ((h 0)) i (λi. h (Suc i), s)"
proof -
have *: "case_nat (f 0) (λi. f (Suc i)) = f" for f
by (auto split: nat.splits)
show ?thesis
unfolding Suc_policy_def
by (auto simp: *)
qed
lemma is_policy_Suc_policy[intro]:
assumes s: "sa ∈ space M" and p: "is_policy p"
shows "is_policy (Suc_policy p sa)"
proof -
have *: "(λx. case_nat sa (fst x)) ∈ Pi⇩M {0..<i} (λ_. M) ⨂⇩M Ms →⇩M Pi⇩M {0..<Suc i} (λ_. M)" for i
using s space_H1
by (intro measurable_PiM_single) (fastforce simp: space_PiM space_pair_measure split: nat.splits)+
have "⋀i. p i ∈ Pi⇩M {0..<i} (λ_. M) ⨂⇩M Ms →⇩M prob_algebra Ma"
using is_policyD p by blast
hence "⋀i. Suc_policy p sa i ∈ Pi⇩M {0..<i} (λ_. M) ⨂⇩M Ms →⇩M prob_algebra Ma"
unfolding Suc_policy_def
using *
by measurable
thus ?thesis unfolding is_policy_def is_dec_def
by blast
qed
lemma Suc_policy_measurable_step[measurable]:
assumes "is_policy p"
shows "(λx. Suc_policy p (fst (fst x)) n (snd (fst x), snd x)) ∈
(M ⨂⇩M Pi⇩M {0..<n} (λ_. M)) ⨂⇩M Ms →⇩M prob_algebra Ma"
unfolding Suc_policy_def
using assms
by measurable (auto
intro: measurable_PiM_single'
simp: space_pair_measure PiE_iff space_PiM extensional_def
split: nat.split)
subsection ‹Single-Step Distribution›
text‹@{term "K'"} takes
a policy,
a distribution over 's,
the epoch,
and a history,
produces a distribution over the next state-action pair.›
definition K' :: "('s, 'a) pol ⇒ 's measure ⇒ nat ⇒ (nat ⇒ ('s × 'a)) ⇒ ('s × 'a) measure"
where
"K' p s0 n ω = do {
s ← case_nat s0 (K ∘ ω) n;
a ← p n (ω, s);
return M (s, a)
}"
lemma prob_space_K':
assumes p: "is_policy p" and x: "x0 ∈ space (prob_algebra Ms)" and h: "h ∈ space (H n)"
shows "prob_space (K' p x0 n h)"
unfolding K'_def
proof (intro prob_space.prob_space_bind[where S = M])
show "prob_space (case n of 0 ⇒ x0 | Suc x ⇒ (K ∘ h) x)"
using x h space_H1 by (auto split: nat.splits simp: space_prob_algebra)
next
show "AE x in case n of 0 ⇒ x0 | Suc x ⇒ (K ∘ h) x.
prob_space (p n (h, x) ⤜ (λa. return M (x, a)))"
proof (cases n)
case 0
then have h': "h ∈ space (Pi⇩M {0..<0} (λ_. M))"
using h by auto
show ?thesis
using 0 p h x sets_policy'
by (fastforce intro: prob_space.prob_space_bind[where S=M]
policy_prob_space prob_space_return
cong: measurable_cong_sets)
next
case (Suc nat)
then show ?thesis
proof (intro AE_I2 prob_space.prob_space_bind[of _ _ M], goal_cases)
case (1 x)
then show ?case
using p space_H1 h x
by (fastforce intro!: policy_prob_space)
next
case (2 x a)
then show ?case
using h p space_H1
by (fastforce intro!: prob_space_return)
next
case (3 x)
then show ?case
using p h x space_K space_H1
by (fastforce intro!: measurable_prob_algebraD return_policy_prob_algebra)
qed
qed
next
show "(λs. p n (h, s) ⤜ (λa. return M (s, a))) ∈
(case n of 0 ⇒ x0 | Suc x ⇒ (K ∘ h) x) →⇩M subprob_algebra M"
proof (intro measurable_bind[where N = Ma])
show " (λx. p n (h, x)) ∈ (case n of 0 ⇒ x0 | Suc x ⇒ (K ∘ h) x) →⇩M subprob_algebra Ma"
using p h x
by (auto split: nat.splits intro!: measurable_prob_algebraD simp: space_prob_algebra)
next
show "(λs. return M (fst s, snd s)) ∈
(case n of 0 ⇒ x0 | Suc x ⇒ (K ∘ h) x) ⨂⇩M Ma →⇩M subprob_algebra M"
using h x sets_K_Suc
by (auto split: nat.splits simp: sets_K space_prob_algebra cong: measurable_cong_sets)
qed
qed
lemma measurable_K'[measurable]:
assumes p: "is_policy p" and x: "x ∈ space (prob_algebra Ms)"
shows "K' p x i ∈ H i →⇩M prob_algebra M"
proof -
fix i
show "K' p x i ∈ Pi⇩M {0..<i} (λ_. M) →⇩M prob_algebra M"
unfolding K'_def
proof (intro measurable_bind_prob_space2[where N = Ms])
show "(λa. case i of 0 ⇒ x | Suc x ⇒ (K ∘ a) x) ∈ Pi⇩M {0..<i} (λ_. M) →⇩M prob_algebra Ms"
using x by measurable auto
next
show "(λ(ω, y). p i (ω, y) ⤜ (λa. return M (y, a))) ∈
Pi⇩M {0..<i} (λ_. M) ⨂⇩M Ms →⇩M prob_algebra M"
using x p by auto
qed
qed
subsection ‹Initial State-Action Distribution›
text ‹@{term "K0"} produces the initial state-action distribution from a state distribution
and a policy.›
definition "K0 p s0 = K' p s0 0 (λ_. undefined)"
lemma K0_def':
"K0 p s0 = do {
s ← s0;
a ← p0 p s;
return M (s, a)}"
unfolding K0_def K'_def by auto
lemma K0_prob[measurable]: "is_policy p ⟹ K0 p ∈ prob_algebra Ms →⇩M prob_algebra M"
unfolding K0_def'
by measurable
lemma prob_space_K0: "is_policy p ⟹ x0 ∈ space (prob_algebra Ms) ⟹ prob_space (K0 p x0)"
by (simp add: K0_def prob_space_K')
lemma space_K0[simp]: "is_policy p ⟹ s ∈ space (prob_algebra Ms) ⟹ space (K0 p s) = space M"
by (meson K0_prob measurable_prob_algebraD sets_eq_imp_space_eq sets_kernel)
lemma sets_K0[measurable_cong]:
assumes "is_policy p" "s ∈ space (prob_algebra Ms)"
shows "sets (K0 p s) = sets M"
using assms by (meson K0_prob measurable_prob_algebraD subprob_measurableD(2))
lemma K0_return_eq_p0:
assumes "is_policy p" "s ∈ space Ms"
shows "K0 p (return Ms s) = p0 p s ⤜ (λa. return M (s,a))"
unfolding K0_def'
using assms
by (subst bind_return[where N = M]) (auto intro!: measurable_prob_algebraD)
lemma M_ne_policy[intro]: "is_policy p ⟹ s ∈ space (prob_algebra Ms) ⟹ space M ≠ {}"
using space_K0 prob_space.not_empty prob_space_K0
by force
subsection ‹Sequence Space of the MDP›
text‹We can instantiate @{const Ionescu_Tulcea} with @{const K'}.›
lemma IT_K': "is_policy p ⟹ x ∈ space (prob_algebra Ms) ⟹ Ionescu_Tulcea (K' p x) (λ_. M)"
unfolding Ionescu_Tulcea_def using measurable_K' prob_space_K'
by (fast dest: measurable_prob_algebraD)
definition lim_sequence :: "('s, 'a) pol ⇒ 's measure ⇒ (nat ⇒ ('s × 'a)) measure"
where
"lim_sequence p x = projective_family.lim UNIV (Ionescu_Tulcea.CI (K' p x) (λ_. M)) (λ_. M)"
lemma
assumes x: "x ∈ space (prob_algebra Ms)" and p: "is_policy p"
shows space_lim_sequence: "space (lim_sequence p x) = space (Π⇩M i∈UNIV. M)"
and sets_lim_sequence[measurable_cong]: "sets (lim_sequence p x) = sets (Π⇩M i∈UNIV. M)"
and emeasure_lim_sequence_emb: "⋀J X. finite J ⟹ X ∈ sets (Π⇩M j∈J. M) ⟹
emeasure (lim_sequence p x) (prod_emb UNIV (λ_. M) J X) =
emeasure (Ionescu_Tulcea.CI (K' p x) (λ_. M) J) X"
and emeasure_lim_sequence_emb_I0o: "⋀n X. X ∈ sets (Π⇩M i ∈ {0..<n}. M) ⟹
emeasure (lim_sequence p x) (prod_emb UNIV (λ_. M) {0..<n} X) =
emeasure (Ionescu_Tulcea.C (K' p x) (λ_. M) 0 n (λx. undefined)) X"
proof -
interpret Ionescu_Tulcea "K' p x" "λ_. M"
using IT_K'[OF p x] .
show "space (lim_sequence p x) = space (Π⇩M i∈UNIV. M)"
unfolding lim_sequence_def by simp
show "sets (lim_sequence p x) = sets (Π⇩M i∈UNIV. M)"
unfolding lim_sequence_def by simp
{ fix J :: "nat set" and X assume "finite J" "X ∈ sets (Π⇩M j∈J. M)"
then show "emeasure (lim_sequence p x) (PF.emb UNIV J X) = emeasure (CI J) X"
unfolding lim_sequence_def by (rule lim) }
note emb = this
have up_to_I0o[simp]: "up_to {0..<n} = n" for n
unfolding up_to_def by (rule Least_equality) auto
{ fix n :: nat and X assume "X ∈ sets (Π⇩M j∈{0..<n}. M)"
thus "emeasure (lim_sequence p x) (PF.emb UNIV {0..<n} X) = emeasure (C 0 n (λx. undefined)) X"
by (simp add: space_C emb CI_def space_PiM distr_id2 sets_C cong: distr_cong_simp) }
qed
lemma lim_sequence_prob_space:
assumes "is_policy p" "s ∈ space (prob_algebra Ms)"
shows "prob_space (lim_sequence p s)"
using assms proof -
assume p: "is_policy p"
fix s assume [simp]: "s ∈ space (prob_algebra Ms)"
interpret Ionescu_Tulcea "K' p s" "λ_. M"
using IT_K' p by simp
have sp:
"space (lim_sequence p s) = prod_emb UNIV (λ_. M) {} (Π⇩E j∈{}. space M)"
"space (CI {}) = {} →⇩E space M"
by (auto simp: p space_lim_sequence space_PiM prod_emb_def PF.space_P)
show "prob_space (lim_sequence p s)"
using PF.prob_space_P[THEN prob_space.emeasure_space_1, of "{}"]
by (auto intro!: prob_spaceI simp add: p sp emeasure_lim_sequence_emb simp del: PiE_empty_domain)
qed
subsection ‹Measurablility of the Sequence Space›
lemma lim_sequence[measurable]:
assumes p: "is_policy p"
shows "lim_sequence p ∈ prob_algebra Ms →⇩M prob_algebra (Π⇩M i∈UNIV. M)"
proof (intro measurable_prob_algebra_generated[OF sets_PiM Int_stable_prod_algebra
prod_algebra_sets_into_space])
show "⋀a. a ∈ space (prob_algebra Ms) ⟹ prob_space (lim_sequence p a)"
using lim_sequence_prob_space p by blast
next
fix a assume [simp]: "a ∈ space (prob_algebra Ms)"
show "sets (lim_sequence p a) = sets (Pi⇩M UNIV (λi. M))"
by (simp add: p sets_lim_sequence)
next
fix X :: "(nat ⇒ 's × 'a) set" assume "X ∈ prod_algebra UNIV (λi. M)"
then obtain J :: "nat set" and F where J: "J ≠ {}" "finite J" "F ∈ J → sets M"
and X: "X = prod_emb UNIV (λ_. M) J (Pi⇩E J F)"
unfolding prod_algebra_def by auto
then have Pi_F: "finite J" "Pi⇩E J F ∈ sets (Pi⇩M J (λ_. M))"
by (auto intro: sets_PiM_I_finite)
define n where "n = (LEAST n. ∀i≥n. i ∉ J)"
have J_le_n: "J ⊆ {0..<n}"
proof -
have "⋀x. x ∈ J ⟹ ∀i≥Suc (Max J). i ∉ J"
using not_le Max_less_iff[OF ‹finite J›]
by (auto simp: Suc_le_eq)
moreover have "x ∈ J ⟹ ∀i≥a. i ∉ J ⟹ x < a" for x a
using not_le by auto
ultimately show ?thesis
unfolding n_def
by (fastforce intro!: LeastI2[of "λn. ∀i≥n. i ∉ J" "Suc (Max J)" "λx. _ < x"])
qed
have C: "(λx. Ionescu_Tulcea.C (K' p x) (λ_. M) 0 n (λx. undefined)) ∈ prob_algebra Ms →⇩M subprob_algebra (Pi⇩M {0..<n} (λ_. M))"
proof (induction n)
case 0
thus ?case
by (auto simp: measurable_cong[OF Ionescu_Tulcea.C.simps(1)[OF IT_K', OF p]])
next
case (Suc n)
have "(λw. Ionescu_Tulcea.eP (K' p (fst w)) (λ_. M) n (snd w))
∈ prob_algebra Ms ⨂⇩M Pi⇩M {0..<n} (λ_. M) →⇩M subprob_algebra (Pi⇩M {0..<Suc n} (λ_. M))"
proof (subst measurable_cong)
fix w assume "w ∈ space (prob_algebra Ms ⨂⇩M Pi⇩M {0..<n} (λ_. M))"
then show "Ionescu_Tulcea.eP (K' p (fst w)) (λ_. M) n (snd w) =
distr (K' p (fst w) n (snd w)) (Π⇩M i∈{0..<Suc n}. M) (fun_upd (snd w) n)"
by (auto simp: p space_pair_measure Ionescu_Tulcea.eP_def[OF IT_K'] split: prod.split)
next
show "(λw. distr (K' p (fst w) n (snd w)) (Π⇩M i∈{0..<Suc n}. M) (fun_upd (snd w) n))
∈ prob_algebra Ms ⨂⇩M Pi⇩M {0..<n} (λ_. M) →⇩M subprob_algebra (Pi⇩M {0..<Suc n} (λ_. M))"
proof (rule measurable_distr2[where M = M])
show "(λ(x, y). (snd x)(n := y)) ∈ (prob_algebra Ms ⨂⇩M Pi⇩M {0..<n} (λ_. M)) ⨂⇩M M →⇩M Pi⇩M {0..<Suc n} (λi. M)"
proof (rule measurable_PiM_single')
fix i assume "i ∈ {0..<Suc n}"
then show "(λω. (case ω of (x, y) ⇒ (snd x)(n := y)) i) ∈ (prob_algebra Ms ⨂⇩M Pi⇩M {0..<n} (λ_. M)) ⨂⇩M M →⇩M M"
unfolding split_beta' by (cases "i = n") auto
next
show "(λω. case ω of (x, y) ⇒ (snd x)(n := y)) ∈ space ((prob_algebra Ms ⨂⇩M Pi⇩M {0..<n} (λ_. M)) ⨂⇩M M) → {0..<Suc n} →⇩E space M"
by (auto simp: space_pair_measure space_PiM less_Suc_eq PiE_iff)
qed
next
show "(λx. K' p (fst x) n (snd x)) ∈ prob_algebra Ms ⨂⇩M Pi⇩M {0..<n} (λ_. M) →⇩M subprob_algebra M"
unfolding K'_def comp_def
using p
by (auto intro!: measurable_prob_algebraD)
qed
qed
then show ?case
using Suc.IH
by (subst measurable_cong[OF Ionescu_Tulcea.C.simps(2)[OF IT_K', where p1 = p, OF p]])
(auto intro!: measurable_bind)
qed
have *: "(λx. Ionescu_Tulcea.CI (K' p x) (λ_. M) J) ∈ prob_algebra Ms →⇩M subprob_algebra (Pi⇩M J (λ_. M))"
using measurable_distr[OF measurable_restrict_subset[OF J_le_n], of "(λ_. M)"] C p
by (subst measurable_cong)
(auto simp: Ionescu_Tulcea.up_to_def[OF IT_K'] n_def Ionescu_Tulcea.CI_def[OF IT_K'])
have "(λa. emeasure (lim_sequence p a) X) ∈ borel_measurable (prob_algebra Ms) ⟷
(λa. emeasure (Ionescu_Tulcea.CI (K' p a) (λ_. M) J) (Pi⇩E J F)) ∈
borel_measurable (prob_algebra Ms)"
unfolding X using J Pi_F by (intro p measurable_cong emeasure_lim_sequence_emb) auto
also have "…"
using * measurable_emeasure_subprob_algebra Pi_F(2) by auto
finally show "(λa. emeasure (lim_sequence p a) X) ∈ borel_measurable (prob_algebra Ms)" .
qed
lemma lim_sequence_aux[measurable]:
assumes p: "is_policy p"
assumes f : "⋀x. x ∈ space M ⟹ is_policy (f x)"
assumes f': "⋀n. (λx. f (fst (fst x)) n (snd (fst x), snd x)) ∈
(M ⨂⇩M Pi⇩M {0..<n} (λ_. M)) ⨂⇩M Ms →⇩M prob_algebra Ma"
assumes gm: "g ∈ M →⇩M prob_algebra Ms"
shows "(λx. lim_sequence (f x) (g x)) ∈ M →⇩M prob_algebra (Pi⇩M UNIV (λ_. M))"
proof (intro measurable_prob_algebra_generated[OF sets_PiM Int_stable_prod_algebra prod_algebra_sets_into_space])
fix a assume a[simp]: "a ∈ space M"
have g: "⋀x. x ∈ space M ⟹ g x ∈ space (prob_algebra Ms)"
by (meson gm measurable_space)
interpret Ionescu_Tulcea "K' (f a) (g a)" "λ_. M"
using IT_K' p
using f[OF ‹a ∈ space M›] g by measurable
have p': "is_policy (f a)"
using ‹a ∈ space M› p f by auto
have sp:
"space (lim_sequence (f a) (g a)) = prod_emb UNIV (λ_. M) {} (Π⇩E j∈{}. space M)"
"space (CI {}) = {} →⇩E space M"
using g a p' by (auto simp: space_lim_sequence p' space_PiM prod_emb_def PF.space_P)
have "emeasure (lim_sequence (f a) (g a)) (space (lim_sequence (f a) (g a))) = 1"
unfolding sp
using g a p' sets.top[of "(Pi⇩M {} (λ_. M))", unfolded space_PiM_empty]
PF.prob_space_P[THEN prob_space.emeasure_space_1, of "{}"]
by (subst emeasure_lim_sequence_emb) (auto simp: emeasure_lim_sequence_emb sp)
thus "prob_space (lim_sequence (f a) (g a))"
by (auto intro: prob_spaceI)
show "sets (lim_sequence (f a) (g a)) = sets (Pi⇩M UNIV (λi. M))"
by (simp add: lim_sequence_def)
next
fix X :: "(nat ⇒ 's × 'a) set" assume "X ∈ prod_algebra UNIV (λi. M)"
then obtain J :: "nat set" and F where J: "J ≠ {}" "finite J" "F ∈ J → sets M"
and X: "X = prod_emb UNIV (λ_. M) J (Pi⇩E J F)"
unfolding prod_algebra_def by auto
then have Pi_F: "finite J" "Pi⇩E J F ∈ sets (Pi⇩M J (λ_. M))"
by (auto intro: sets_PiM_I_finite)
define n where "n = (LEAST n. ∀i≥n. i ∉ J)"
have J_le_n: "J ⊆ {0..<n}"
unfolding n_def
by (rule LeastI2[of _ "Suc (Max J)"]) (auto simp: ‹finite J› Suc_le_eq not_le[symmetric])
have g: "⋀x. x ∈ space M ⟹ g x ∈ space (prob_algebra Ms)"
by (meson gm measurable_space)
have C: "(λx. Ionescu_Tulcea.C (K' (f x) (g x)) (λ_. M) 0 n (λx. undefined)) ∈
M →⇩M subprob_algebra (Pi⇩M {0..<n} (λ_. M))"
proof (induction n)
case 0
then show ?case
using g f
by (auto simp: measurable_cong[OF Ionescu_Tulcea.C.simps(1)[OF IT_K']])
next
case (Suc n)
then show ?case
proof (subst measurable_cong[OF Ionescu_Tulcea.C.simps(2)[OF IT_K']])
show "(λw. Ionescu_Tulcea.C (K' (f w) (g w)) (λ_. M) 0 n (λx. undefined) ⤜ Ionescu_Tulcea.eP (K' (f w) (g w)) (λ_. M) (0 + n))
∈ M →⇩M subprob_algebra (Pi⇩M {0..<Suc n} (λ_. M))"
if h: "(λx. Ionescu_Tulcea.C (K' (f x) (g x)) (λ_. M) 0 n (λx. undefined)) ∈ M →⇩M subprob_algebra (Pi⇩M {0..<n} (λ_. M))"
proof (rule measurable_bind'[OF h])
show "(λ(x, y). Ionescu_Tulcea.eP (K' (f x) (g x)) (λ_. M) (0 + n) y) ∈ M ⨂⇩M Pi⇩M {0..<n} (λ_. M) →⇩M subprob_algebra (Pi⇩M {0..<Suc n} (λ_. M))"
proof (subst measurable_cong)
fix n :: nat and w assume "w ∈ space (M ⨂⇩M Pi⇩M {0..<n} (λ_. M))"
then show "(case w of (x, a) ⇒ Ionescu_Tulcea.eP (K' (f x) (g x)) (λ_. M) (0 + n) a) =
(case w of (x, a) ⇒ distr (K' (f x) (g x) n a) (Π⇩M i∈{0..<Suc n}. M) (fun_upd a n))"
by (auto simp: IT_K' Ionescu_Tulcea.eP_def f g space_ma p space_pair_measure
Ionescu_Tulcea.eP_def[OF IT_K'])
next
fix n
have "(λw. distr (K' (f (fst w)) (g (fst w)) n (snd w)) (Pi⇩M {0..<Suc n} (λi. M)) (fun_upd (snd w) n))
∈ M ⨂⇩M Pi⇩M {0..<n} (λ_. M) →⇩M subprob_algebra (Pi⇩M {0..<Suc n} (λ_. M))"
proof (intro measurable_distr2[where M=M] measurable_PiM_single', goal_cases)
case (1 i)
then show ?case
by (cases "i = n") (auto simp: split_beta')
next
case 2
then show ?case
by (auto simp: split_beta' PiE_iff extensional_def Pi_iff space_pair_measure space_PiM)
next
case 3
then show ?case
unfolding K'_def
proof (intro measurable_bind[where N = Ms], goal_cases)
case 1
then show ?case
unfolding measurable_pair_swap_iff[of _ M]
by measurable (auto simp: gm measurable_snd'' intro: measurable_prob_algebraD)
next
case 2
then show ?case
unfolding Suc_policy_def
using f'
by (auto intro!: measurable_bind[where N = Ma] measurable_prob_algebraD)
qed
qed
thus "(λw. case w of (x, a) ⇒ distr ((K' (f x) (g x)) n a) (Pi⇩M {0..<Suc n} (λi. M)) (fun_upd a n)) ∈ M ⨂⇩M Pi⇩M {0..<n} (λ_. M) →⇩M subprob_algebra (Pi⇩M {0..<Suc n} (λ_. M))"
by measurable
qed
qed
qed (auto simp: f g)
qed
have p': "⋀a. a ∈ space M ⟹ is_policy (f a)"
using f by auto
have "(λa. emeasure (lim_sequence (f a) (g a)) X) ∈ borel_measurable M ⟷
(λa. emeasure (Ionescu_Tulcea.CI (K' (f a) (g a)) (λ_. M) J) (Pi⇩E J F)) ∈ borel_measurable M"
unfolding X using J Pi_F
by (fastforce simp add: g f K space_pair_measure intro!: p p' measurable_cong emeasure_lim_sequence_emb)
also have "..."
proof (intro measurable_compose[OF _ measurable_emeasure_subprob_algebra[OF Pi_F(2)]],
subst measurable_cong[where g = "(λw. distr (Ionescu_Tulcea.C (K' (f w) (g w))
(λ_. M) 0 n (λx. undefined)) (Pi⇩M J (λ_. M)) (λf. restrict f J))"], goal_cases)
case (1 w)
then show ?case
unfolding Ionescu_Tulcea.CI_def[OF IT_K'[OF f[OF 1] g[OF 1]]]
using p
by (subst Ionescu_Tulcea.up_to_def[OF IT_K'[of "Suc_policy p w" "K w"]])
(auto simp add: n_def ‹w ∈ space M› prob_space_K sets_K space_prob_algebra)
next
case 2
then show ?case
using measurable_compose measurable_distr[OF measurable_restrict_subset[OF J_le_n]] C
by blast
qed
thus "(λa. emeasure (lim_sequence (f a) (g a)) X) ∈ borel_measurable M"
using calculation by blast
qed
lemma lim_sequence_Suc_return[measurable]:
assumes p: "is_policy p"
assumes s: "s ∈ space Ms"
shows "(λx. lim_sequence (Suc_policy p (s, snd x)) (return Ms (fst x))) ∈
M →⇩M prob_algebra (Pi⇩M UNIV (λ_. M))"
proof (intro lim_sequence_aux[OF p], goal_cases)
case (1 x)
then show ?case
by (meson is_policy_Suc_policy measurable_snd measurable_space p s space_ma)
next
case (2 n)
then show ?case
using p
unfolding Suc_policy_def
by measurable (auto intro: measurable_PiM_single'
simp: s space_pair_measure space_PiM PiE_iff extensional_def split: nat.split)
qed measurable
lemma lim_sequence_Suc_K[measurable]:
assumes "is_policy p"
shows "(λx. lim_sequence (Suc_policy p x) (K x)) ∈ M →⇩M prob_algebra (Pi⇩M UNIV (λ_. M))"
using assms
by (fastforce intro!: lim_sequence_aux)
subsection ‹Iteration Rule›
lemma step_C:
assumes x: "x ∈ space (prob_algebra Ms)" and p: "is_policy p"
shows "Ionescu_Tulcea.C (K' p x) (λ_. M) 0 1 (λ_. undefined) ⤜
Ionescu_Tulcea.C (K' p x) (λ_. M) 1 n =
K0 p x ⤜ (λa. Ionescu_Tulcea.C (K' p x) (λ_. M) 1 n (case_nat a (λ_. undefined)))"
proof -
interpret Ionescu_Tulcea "K' p x" "λ_. M"
using IT_K'[OF p x] .
have [simp]: "space (K0 p x) ≠ {}"
using space_K0[OF p x] x by auto
have [simp]: "((λ_. undefined)(0 := x::('s × 'a))) = case_nat x (λ_. undefined)" for x
by (auto simp: fun_eq_iff split: nat.split)
have "C 0 1 (λ_. undefined) ⤜ C 1 n = eP 0 (λ_. undefined) ⤜ C 1 n"
using measurable_eP[of 0] measurable_C[of 1 n, measurable del]
by (simp add: bind_return[where N="Pi⇩M {0} (λ_. M)"])
also have "… = K0 p x ⤜ (λa. C 1 n (case_nat a (λ_. undefined)))"
unfolding eP_def
proof (subst bind_distr[where K = "Pi⇩M {0..<Suc n} (λ_. M)"], goal_cases)
case 1
then show ?case
using measurable_C[of 1 n, measurable del] x[THEN sets_K0[OF p]] measurable_ident_sets[OF sets_P]
unfolding K0_def
by auto
next
case 2
then show ?case
using measurable_C[of 1 n]
by auto
next
case 3
then show ?case
by (simp add: space_P)
next
case 4
then show ?case
unfolding K0_def
by (auto intro!: bind_cong)
qed
finally show
"C 0 1 (λ_. undefined) ⤜ C 1 n = K0 p x ⤜ (λa. C 1 n (case_nat a (λ_. undefined)))" .
qed
lemma lim_sequence_eq:
assumes x: "x ∈ space (prob_algebra Ms)" assumes p: "is_policy p"
shows "lim_sequence p x =
K0 p x ⤜ (λy. distr (lim_sequence (Suc_policy p y) (K y)) (Π⇩M _∈UNIV. M) (case_nat y))"
(is "_ = ?B p x")
proof (rule measure_eqI_PiM_infinite)
show "sets (lim_sequence p x) = sets (Π⇩M j∈UNIV. M)"
using x p by (rule sets_lim_sequence)
have [simp]: "space (K' p x 0 (λn. undefined)) ≠ {}"
using p
using IT_K' Ionescu_Tulcea.non_empty Ionescu_Tulcea.space_P x by fastforce
show "sets (?B p x) = sets (Pi⇩M UNIV (λj. M))"
using p x M_ne_policy space_K0 by auto
interpret lim_sequence: prob_space "lim_sequence p x"
using lim_sequence x p by (auto simp: measurable_restrict_space2_iff prob_algebra_def)
show "finite_measure (lim_sequence p x)"
by (rule lim_sequence.finite_measure)
interpret Ionescu_Tulcea "K' p x" "λ_. M"
using IT_K'[OF p x] .
let ?U = "λ_::nat. undefined :: ('s × 'a)"
fix J :: "nat set" and F'
assume J: "finite J" "⋀i. i ∈ J ⟹ F' i ∈ sets M"
define n where "n = (if J = {} then 0 else Max J)"
define F where "F i = (if i ∈ J then F' i else space M)" for i
then have F[simp, measurable]: "F i ∈ sets M" for i
using J by auto
have emb_eq: "PF.emb UNIV J (Pi⇩E J F') = PF.emb UNIV {0..<Suc n} (Pi⇩E {0..<Suc n} F)"
proof cases
assume "J = {}" then show ?thesis
by (auto simp add: n_def F_def[abs_def] prod_emb_def PiE_def)
next
assume "J ≠ {}" then show ?thesis
by (auto simp: prod_emb_def PiE_iff F_def n_def less_Suc_eq_le ‹finite J› split: if_split_asm)
qed
have "emeasure (lim_sequence p x) (PF.emb UNIV J (Pi⇩E J F')) =
emeasure (C 0 (Suc n) ?U) (Pi⇩E {0..<Suc n} F)"
using x p unfolding emb_eq
by (rule emeasure_lim_sequence_emb_I0o) (auto intro!: sets_PiM_I_finite)
also have "C 0 (Suc n) ?U = K0 p x ⤜ (λy. C 1 n (case_nat y ?U))"
using split_C[of ?U 0 "Suc 0" n] step_C[OF x p] by simp
also have "emeasure (K0 p x ⤜ (λy. C 1 n (case_nat y ?U))) (Pi⇩E {0..<Suc n} F) =
(∫⇧+y. C 1 n (case_nat y ?U) (Pi⇩E {0..<Suc n} F) ∂K0 p x)"
using measurable_C[of 1 n, measurable del] sets_K0[OF p x] F x p non_empty space_K0
by (intro emeasure_bind[OF _ measurable_compose[OF _ measurable_C]])
(auto cong: measurable_cong_sets intro!: measurable_PiM_single' split: nat.split_asm)
also have "… = (∫⇧+y. distr (lim_sequence (Suc_policy p y) (K y))
(Pi⇩M UNIV (λj. M)) (case_nat y) (PF.emb UNIV J (Pi⇩E J F')) ∂K0 p x)"
proof (intro nn_integral_cong)
fix y assume "y ∈ space (K0 p x)"
then have y: "y ∈ space M"
using x p space_K0 by blast
then interpret y: Ionescu_Tulcea "K' (Suc_policy p y) (K y)" "λ_. M"
using p by (auto intro!: IT_K')
have "fst y ∈ space Ms"
by (meson measurable_fst measurable_space y)
let ?y = "case_nat y"
have [simp]: "?y ?U ∈ space (Pi⇩M {0} (λi. M))"
using y by (auto simp: space_PiM PiE_iff extensional_def split: nat.split)
have yM[measurable]: "?y ∈ Pi⇩M {0..<m} (λ_. M) →⇩M Pi⇩M {0..<Suc m} (λi. M)" for m
using y
by (auto intro: measurable_PiM_single' simp: space_PiM PiE_iff extensional_def split: nat.split)
have y': "?y ?U ∈ space (Pi⇩M {0..<1} (λi. M))"
by (simp add: space_PiM PiE_def y extensional_def split: nat.split)
have eq1: "?y -` Pi⇩E {0..<Suc n} F ∩ space (Pi⇩M {0..<n} (λ_. M)) =
(if y ∈ F 0 then Pi⇩E {0..<n} (F∘Suc) else {})"
unfolding set_eq_iff using y sets.sets_into_space[OF F]
by (auto simp: space_PiM PiE_iff extensional_def Ball_def split: nat.split nat.split_asm)
have eq2: "?y -` PF.emb UNIV {0..<Suc n} (Pi⇩E {0..<Suc n} F) ∩ space (Pi⇩M UNIV (λ_. M)) =
(if y ∈ F 0 then PF.emb UNIV {0..<n} (Pi⇩E {0..<n} (F∘Suc)) else {})"
unfolding set_eq_iff using y sets.sets_into_space[OF F]
by (auto simp: space_PiM PiE_iff prod_emb_def extensional_def Ball_def split: nat.splits)
let ?I = "indicator (F 0) y"
have "fst y ∈ space Ms"
using y by (meson measurable_fst measurable_space)
have "C 1 n (?y ?U) = distr (y.C 0 n ?U) (Π⇩M i∈{0..<Suc n}. M) ?y"
proof (induction n)
case (Suc m)
have "C 1 (Suc m) (?y ?U) = distr (y.C 0 m ?U) (Pi⇩M {0..<Suc m} (λi. M)) ?y ⤜ eP (Suc m)"
using Suc by simp
also have "… = y.C 0 m ?U ⤜ (λx. eP (Suc m) (?y x))"
by (auto intro!: bind_distr[where K="Pi⇩M {0..<Suc (Suc m)} (λ_. M)"] simp: y y.space_C y.sets_C cong: measurable_cong_sets)
also have "… = y.C 0 m ?U ⤜ (λx. distr (y.eP m x) (Pi⇩M {0..<Suc (Suc m)} (λi. M)) ?y)"
proof (intro bind_cong refl)
fix ω' assume ω': "ω' ∈ space (y.C 0 m ?U)"
moreover have "K' p x (Suc m) (?y ω') = K' (Suc_policy p y) (K y) m ω'"
unfolding K'_def Suc_policy_def
by (auto split: nat.splits)
ultimately show "eP (Suc m) (?y ω') = distr (y.eP m ω') (Pi⇩M {0..<Suc (Suc m)} (λi. M)) ?y"
unfolding eP_def y.eP_def
by (subst distr_distr) (auto simp: y.space_C y.sets_P split: nat.split cong: measurable_cong_sets
intro!: distr_cong measurable_fun_upd[where J="{0..<m}"])
qed
also have "… = distr (y.C 0 m ?U ⤜ y.eP m) (Pi⇩M {0..<Suc (Suc m)} (λi. M)) ?y"
by (auto intro!: distr_bind[symmetric, OF _ _ yM] simp: y.space_C y.sets_C cong: measurable_cong_sets)
finally show ?case
by simp
qed (use y in ‹simp add: PiM_empty distr_return›)
then have "C 1 n (case_nat y ?U) (Pi⇩E {0..<Suc n} F) =
(distr (y.C 0 n ?U) (Π⇩M i∈{0..<Suc n}. M) ?y) (Pi⇩E {0..<Suc n} F)" by simp
also have "… = ?I * y.C 0 n ?U (Pi⇩E {0..<n} (F ∘ Suc))"
by (subst emeasure_distr) (auto simp: y.sets_C y.space_C eq1 cong: measurable_cong_sets)
also have
"… = ?I * lim_sequence (Suc_policy p y) (K y) (PF.emb UNIV {0..<n} (Pi⇩E {0..<n} (F ∘ Suc)))"
using y sets_PiM_I_finite
by (subst emeasure_lim_sequence_emb_I0o) (auto simp add: p sets_PiM_I_finite)
also have "… = distr (lim_sequence (Suc_policy p y) (K y)) (Pi⇩M UNIV (λj. M)) ?y
(PF.emb UNIV {0..<Suc n} (Pi⇩E {0..<Suc n} F))"
proof (subst emeasure_distr, goal_cases)
case 1
thus ?case
using y
by measurable (simp add: lim_sequence_def measurable_ident_sets)
case 2
thus ?case
by auto
case 3
thus ?case
using y
by (subst space_lim_sequence[OF _ is_policy_Suc_policy[OF _ p]]) (auto simp: eq2)
qed
finally show "emeasure (C 1 n (case_nat y (λ_. undefined))) (Pi⇩E {0..<Suc n} F) =
emeasure (distr (lim_sequence (Suc_policy p y) (K y)) (Pi⇩M UNIV (λj. M)) (case_nat y))
(y.PF.emb UNIV J (Pi⇩E J F'))"
unfolding emb_eq .
qed
also have "… = emeasure (K0 p x ⤜ (λy. distr (lim_sequence (Suc_policy p y) (K y))
(Pi⇩M UNIV (λj. M)) (case_nat y))) (PF.emb UNIV J (Pi⇩E J F'))"
using J sets_K0[OF ‹is_policy p› ‹x ∈ space (prob_algebra Ms)›] p
by (subst emeasure_bind[where N="PiM UNIV (λ_. M)"]) (auto simp: sets_K x cong: measurable_cong_sets
intro!: measurable_distr2[OF _ measurable_prob_algebraD[OF lim_sequence]]
measurable_prob_algebraD
measurable_distr2[where M = "PiM UNIV (λ_. M)"])
finally show "emeasure (lim_sequence p x) (PF.emb UNIV J (Pi⇩E J F')) =
emeasure (K0 p x ⤜ (λy. distr (lim_sequence (Suc_policy p y) (K y)) (Pi⇩M UNIV (λj. M))
(case_nat y))) (PF.emb UNIV J (Pi⇩E J F'))".
qed
subsection ‹Stream Space of the MDP›
definition lim_stream :: "('s, 'a) pol ⇒ 's measure ⇒ ('s × 'a) stream measure"
where
"lim_stream p x = distr (lim_sequence p x) (stream_space M) to_stream"
lemma space_lim_stream: "space (lim_stream p x) = streams (space M)"
unfolding lim_stream_def by (simp add: space_stream_space)
lemma sets_lim_stream[measurable_cong]: "sets (lim_stream p x) = sets (stream_space M)"
unfolding lim_stream_def by simp
lemma lim_stream[measurable]:
assumes "is_policy p"
shows "lim_stream p ∈ prob_algebra Ms →⇩M prob_algebra (stream_space M)"
unfolding lim_stream_def[abs_def]
using assms
by (auto intro: measurable_distr_prob_space2[OF lim_sequence])
lemma lim_stream_Suc[measurable]:
assumes p: "is_policy p"
shows "(λa. lim_stream (Suc_policy p a) (K a)) ∈ M →⇩M prob_algebra (stream_space M)"
unfolding lim_stream_def[abs_def]
using p
by (auto intro: measurable_distr_prob_space2[OF lim_sequence_Suc_K])
lemma space_stream_space_M_ne: "x ∈ space M ⟹ space (stream_space M) ≠ {}"
using sconst_streams[of x "space M"] by (auto simp: space_stream_space)
lemma prob_space_lim_stream[intro]:
assumes "is_policy p" "x ∈ space (prob_algebra Ms)"
shows "prob_space (lim_stream p x)"
by (metis (no_types, lifting) space_prob_algebra measurable_space assms lim_stream mem_Collect_eq)
lemma prob_space_step:
assumes "is_policy p" "x ∈ space M"
shows "prob_space (lim_stream (Suc_policy p x) (K x))"
by (auto simp: assms K_in_space is_policy_Suc_policy)
lemma lim_stream_eq:
assumes p: "is_policy p"
assumes x: "x ∈ space (prob_algebra Ms)"
shows "lim_stream p x = do {
y ← K0 p x;
ω ← lim_stream (Suc_policy p y) (K y);
return (stream_space M) (y ## ω)
}"
unfolding lim_stream_def lim_sequence_eq[OF x p]
proof (subst distr_bind[OF _ _ measurable_to_stream])
show "(λy. distr (lim_sequence (Suc_policy p y) (K y)) (Pi⇩M UNIV (λj. M)) (case_nat y)) ∈
K0 p x →⇩M subprob_algebra (Pi⇩M UNIV (λi. M))"
proof (intro measurable_prob_algebraD measurable_distr_prob_space2[where M="Pi⇩M UNIV (λj. M)"])
show "(λx. lim_sequence (Suc_policy p x) (K x)) ∈ K0 p x →⇩M prob_algebra (Pi⇩M UNIV (λj. M))"
using lim_sequence_Suc_K[OF p] sets_K0[OF p x] measurable_cong_sets
by blast
next show "(λ(ya, y). case_nat ya y) ∈ K0 p x ⨂⇩M Pi⇩M UNIV (λj. M) →⇩M Pi⇩M UNIV (λj. M)"
using sets_K0[OF p x]
by (subst measurable_cong_sets[of _ "M ⨂⇩M Pi⇩M UNIV (λj. M)"]) auto
qed
next
show "space (K0 p x) ≠ {}"
using x p prob_space.not_empty prob_space_K0
by blast
next
show "K0 p x ⤜ (λx. distr (distr (lim_sequence (Suc_policy p x) (K x)) (Pi⇩M UNIV (λj. M))
(case_nat x)) (stream_space M) to_stream) = K0 p x ⤜ (λy. distr (lim_sequence (Suc_policy p y)
(K y)) (stream_space M) to_stream ⤜ (λω. return (stream_space M) (y ## ω)))"
proof (intro bind_cong refl, subst distr_distr)
show "to_stream ∈ Pi⇩M UNIV (λj. M) →⇩M stream_space M"
by measurable
next
show "⋀a. a ∈ space (K0 p x) ⟹
case_nat a ∈ lim_sequence (Suc_policy p a) (K a) →⇩M Pi⇩M UNIV (λj. M)"
by measurable (auto simp: p x intro!: measurable_ident_sets sets_lim_sequence intro: measurable_space)
next
show "⋀a. a ∈ space (K0 p x) ⟹
distr (lim_sequence (Suc_policy p a) (K a)) (stream_space M) (to_stream ∘ case_nat a) =
distr (lim_sequence (Suc_policy p a) (K a)) (stream_space M) to_stream ⤜
(λω. return (stream_space M) (a ## ω))"
proof (subst bind_return_distr', goal_cases)
case (1 a)
then show ?case by (simp add: p space_stream_space_M_ne x)
next
case (2 a)
then show ?case using p x by (auto simp: sets_lim_sequence cong: measurable_cong_sets intro!: distr_cong)[1]
next
case (3 a)
then show ?case
using p x
by (subst distr_distr) (auto simp: to_stream_nat_case intro!: measurable_compose[OF _ measurable_to_stream]
sets_lim_sequence distr_cong measurable_ident_sets)
qed
qed
qed
end
end