Theory Hidden_Markov_Model

section ‹Hidden Markov Models›

theory Hidden_Markov_Model
  imports
    Markov_Models.Discrete_Time_Markov_Chain Auxiliary
    "HOL-Library.IArray"
begin

subsection ‹Definitions›

text ‹Definition of Markov Kernels that are closed w.r.t. to a set of states.›
locale Closed_Kernel =
  fixes K :: "'s ⇒ 't pmf" and S :: "'t set"
  assumes finite: "finite S"
      and wellformed: "S ≠ {}"
      and closed: "∀ s. K s ⊆ S"

text ‹
  An HMM is parameterized by a Markov kernel for the transition probabilites between internal states,
  a Markov kernel for the output probabilities of observations,
  and a fixed set of observations.
›
locale HMM_defs =
  fixes 𝒦 :: "'s ⇒ 's pmf" and 𝒪 :: "'s ⇒ 't pmf" and 𝒪⇩_s :: "'t set"

locale HMM =
  HMM_defs + O: Closed_Kernel 𝒪 𝒪⇩_s
begin

lemma observations_finite: "finite 𝒪⇩_s"
  and observations_wellformed: "𝒪⇩_s ≠ {}"
  and observations_closed: "∀ s. 𝒪 s ⊆ 𝒪⇩_s"
  using O.finite O.wellformed O.closed by -

end

text ‹Fixed set of internal states.›
locale HMM2_defs = HMM_defs 𝒦 𝒪 for 𝒦 :: "'s ⇒ 's pmf" and 𝒪 :: "'s ⇒ 't pmf" +
  fixes 𝒮 :: "'s set"

locale HMM2 = HMM2_defs + HMM + K: Closed_Kernel 𝒦 𝒮
begin

lemma states_finite: "finite 𝒮"
  and states_wellformed: "𝒮 ≠ {}"
  and states_closed: "∀ s. 𝒦 s ⊆ 𝒮"
  using K.finite K.wellformed K.closed by -

end

text ‹
  The set of internal states is now given as a list to iterate over.
  This is needed for the computations on HMMs.
›
locale HMM3_defs = HMM2_defs 𝒪⇩_s 𝒦 for 𝒪⇩_s :: "'t set" and 𝒦 :: "'s ⇒ 's pmf" +
  fixes state_list :: "'s list"

locale HMM3 = HMM3_defs _ _ 𝒪⇩_s 𝒦 + HMM2 𝒪⇩_s 𝒦 for 𝒪⇩_s :: "'t set" and 𝒦 :: "'s ⇒ 's pmf" +
  assumes state_list_𝒮: "set state_list = 𝒮"

context HMM_defs
begin

no_notation (ASCII) comp  (infixl ‹o› 55)

text ‹The ``default'' observation.›
definition
  "obs ≡ SOME x. x ∈ 𝒪⇩_s"

lemma (in HMM) obs:
  "obs ∈ 𝒪⇩_s"
  unfolding obs_def using observations_wellformed by (auto intro: someI_ex)

text ‹
  The HMM is encoded as a Markov chain over pairs of states and observations.
  This is the Markov chain's defining Markov kernel.
›
definition
  "K ≡ λ (s⇩₁, o⇩₁ :: 't). bind_pmf (𝒦 s⇩₁) (λ s⇩₂. map_pmf (λ o⇩₂. (s⇩₂, o⇩₂)) (𝒪 s⇩₂))"

sublocale MC_syntax K .

text ‹
  Uniform distribution of the pairs ‹(s, o)› for a fixed state ‹s›.
›
definition "I (s :: 's) = map_pmf (λ x. (s, x)) (pmf_of_set 𝒪⇩_s)"

text ‹
  The likelihood of an observation sequence given a starting state ‹s› is defined in terms of
  the trace space of the Markov kernel given the uniform distribution of pairs for ‹s›.
›
definition
  "likelihood s os = T' (I s) {ω ∈ space S. ∃ o⇩₀ xs ω'. ω = (s, o⇩₀) ## xs @- ω' ∧ map snd xs = os}"

abbreviation (input) "L os ω ≡ ∃ xs ω'. ω = xs @- ω' ∧ map snd xs = os"

lemma likelihood_alt_def: "likelihood s os = T' (I s) {(s, o) ## xs @- ω' |o xs ω'. map snd xs = os}"
  unfolding likelihood_def by (simp add: in_S)


subsection ‹Iteration Rule For Likelihood›

lemma L_Nil:
  "L [] ω = True"
  by simp

lemma emeasure_T_observation_Nil:
  "T (s, o⇩₀) {ω ∈ space S. L [] ω} = 1"
  by simp

lemma L_Cons:
  "L (o # os) ω ⟷ snd (shd ω) = o ∧ L os (stl ω)"
  apply (cases ω; cases "shd ω"; safe; clarsimp)
   apply force
  subgoal for x xs ω'
    by (force intro: exI[where x = "(x, o) # xs"])
  done

lemma L_measurable[measurable]:
  "Measurable.pred S (L os)"
  apply (induction os)
   apply (simp; fail)
  subgoal premises that for o os
    by(subst L_Cons)
      (intro Measurable.pred_intros_logic
        measurable_compose[OF measurable_shd] measurable_compose[OF measurable_stl that];
        measurable)
  done

lemma init_measurable[measurable]:
  "Measurable.pred S (λx. ∃o⇩₀ xs ω'. x = (s, o⇩₀) ## xs @- ω' ∧ map snd xs = os)"
  (is "Measurable.pred S ?f")
proof -
  have *: "?f ω ⟷ fst (shd ω) = s ∧ L os (stl ω)" for ω
    by (cases ω) auto
  show ?thesis
    by (subst *)
       (intro Measurable.pred_intros_logic measurable_compose[OF measurable_shd]; measurable)
qed

lemma T_init_observation_eq:
  "T (s, o) {ω ∈ space S. L os ω} = T (s, o') {ω ∈ space S. L os ω}"
  apply (subst emeasure_Collect_T[unfolded space_T], (measurable; fail))
  apply (subst (2) emeasure_Collect_T[unfolded space_T], (measurable; fail))
  apply (simp add: K_def)
  done

text ‹
  Shows that it is equivalent to define likelihood in terms of the trace space starting at a single
  pair of an internal state ‹s› and the default observation @{term obs}.
›
lemma (in HMM) likelihood_init:
  "likelihood s os = T (s, obs) {ω ∈ space S. L os ω}"
proof -
  have *: "(∑o∈𝒪⇩_s. emeasure (T (s, o)) {ω ∈ space S. L os ω}) =
    of_nat (card 𝒪⇩_s) * emeasure (T (s, obs)) {ω ∈ space S. L os ω}"
    by (subst sum_constant[symmetric]) (fastforce intro: sum.cong T_init_observation_eq[simplified])
  show ?thesis
    unfolding likelihood_def
    apply (subst emeasure_T')
    subgoal
      by measurable
    using *
    apply (simp add: I_def in_S observations_finite observations_wellformed nn_integral_pmf_of_set)
    apply (subst mult.commute)
    apply (simp add: observations_finite observations_wellformed mult_divide_eq_ennreal)
    done
qed

lemma emeasure_T_observation_Cons:
  "T (s, o⇩₀) {ω ∈ space S. L (o⇩₁ # os) ω} =
   (∫⇧⁺ t. ennreal (pmf (𝒪 t) o⇩₁) * T (t, o⇩₁) {ω ∈ space S. L os ω} ∂(𝒦 s))" (is "?l = ?r")
proof -
  have *:
    "∫⇧⁺ y. T (s', y) {x ∈ space S. ∃xs. (∃ω'. (s', y) ## x = xs @- ω') ∧ map snd xs = o⇩₁ # os}
       ∂measure_pmf (𝒪 s') =
    ennreal (pmf (𝒪 s') o⇩₁) * T (s', o⇩₁) {ω ∈ space S. ∃xs. (∃ω'. ω = xs @- ω') ∧ map snd xs = os}"
    (is "?L = ?R") for s'
  proof -
    have "?L = ∫⇧⁺ x. ennreal (pmf (𝒪 s') x) *
            T (s', x) {ω ∈ space S. ∃xs. (∃ω'. (s', x) ## ω = xs @- ω') ∧ map snd xs = o⇩₁ # os}
          ∂count_space UNIV"
      by (rule nn_integral_measure_pmf)
    also have "… =
      ∫⇧⁺ o⇩₂. (if o⇩₂ = o⇩₁
              then ennreal (pmf (𝒪 s') o⇩₁) * T (s', o⇩₁) {ω ∈ space S. L os ω}
              else 0)
       ∂count_space UNIV"
      apply (rule nn_integral_cong_AE
          [where v = "λ o⇩₂. if o⇩₂ = o⇩₁
            then ennreal (pmf (𝒪 s') o⇩₁) * T (s', o⇩₁) {ω ∈ space S. L os ω} else 0"]
          )
       apply (rule AE_I2)
       apply (split if_split, safe)
      subgoal
        by (auto intro!: arg_cong2[where f = times, OF HOL.refl] arg_cong2[where f = emeasure];
            metis list.simps(9) shift.simps(2) snd_conv
           )
      subgoal
        by (subst arg_cong2[where f = emeasure and d = "{}", OF HOL.refl]) auto
      done
    also have "… = (∫⇧⁺o⇩₂∈{o⇩₁}. (ennreal (pmf (𝒪 s') o⇩₁) * T (s', o⇩₁) {ω ∈ space S. L os ω})
      ∂count_space UNIV)"
      by (rule nn_integral_cong_AE) auto
    also have "… = ?R"
      by simp
    finally show ?thesis .
  qed
  have "?l = ∫⇧⁺ t. T t {x ∈ space S. ∃xs ω'. t ## x = xs @- ω' ∧ map snd xs = o⇩₁ # os} ∂ (K (s, o⇩₀))"
    by (subst emeasure_Collect_T[unfolded space_T], measurable)
  also have "… = ?r"
    using * by (simp add: K_def)
  finally show ?thesis .
qed


subsection ‹Computation of Likelihood›

fun backward where
  "backward s [] = 1" |
  "backward s (o # os) = (∫⇧⁺ t. ennreal (pmf (𝒪 t) o) * backward t os ∂measure_pmf (𝒦 s))"

lemma emeasure_T_observation_backward:
  "emeasure (T (s, o)) {ω ∈ space S. L os ω} = backward s os"
  using emeasure_T_observation_Cons by (induction os arbitrary: s o; simp)

lemma (in HMM) likelihood_backward:
  "likelihood s os = backward s os"
  unfolding likelihood_init emeasure_T_observation_backward ..

end (* HMM Defs *)

context HMM2
begin

fun (in HMM2_defs) forward where
  "forward s t_end [] = indicator {t_end} s" |
  "forward s t_end (o # os) =
    (∑t ∈ 𝒮. ennreal (pmf (𝒪 t) o) * ennreal (pmf (𝒦 s) t) * forward t t_end os)"

lemma forward_split:
  "forward s t (os1 @ os2) = (∑t' ∈ 𝒮. forward s t' os1 * forward t' t os2)"
  if "s ∈ 𝒮"
  using that
  apply (induction os1 arbitrary: s)
  subgoal for s
    apply (simp add: sum_indicator_mult[OF states_finite])
    apply (subst sum.cong[where B = "{s}"])
    by auto
  subgoal for a os1 s
    apply simp
    apply (subst sum_distrib_right)
    apply (subst sum.swap)
    apply (simp add: sum_distrib_left algebra_simps)
    done
  done

lemma (in -)
  "(∑t ∈ S. f t) = f t" if "finite S" "t ∈ S" "∀ s ∈ S - {t}. f s = 0"
  thm sum.empty sum.insert sum.mono_neutral_right[of S "{t}"]
  apply (subst sum.mono_neutral_right[of S "{t}"])
  using that
     apply auto
  done
(*
  oops
  by (metis add.right_neutral empty_iff finite.intros(1) insert_iff subsetI sum.empty sum.insert sum.mono_neutral_right that)

  using that
  apply auto
*)

lemma forward_backward:
  "(∑t ∈ 𝒮. forward s t os) = backward s os" if "s ∈ 𝒮"
  using ‹s ∈ 𝒮›
  apply (induction os arbitrary: s)
  subgoal for s
    by (subst sum.mono_neutral_right[of 𝒮 "{s}", OF states_finite])
       (auto split: if_split_asm simp: indicator_def)
  subgoal for a os s
    apply (simp add: sum.swap sum_distrib_left[symmetric])
    apply (subst nn_integral_measure_pmf_support[where A = 𝒮])
    using states_finite states_closed by (auto simp: algebra_simps)
  done

theorem likelihood_forward:
  "likelihood s os = (∑t ∈ 𝒮. forward s t os)" if ‹s ∈ 𝒮›
  unfolding likelihood_backward forward_backward[symmetric, OF ‹s ∈ 𝒮›] ..


subsection ‹Definition of Maximum Probabilities›

abbreviation (input) "V os as ω ≡ (∃ ω'. ω = zip as os @- ω')"

definition
  "max_prob s os =
  Max {T' (I s) {ω ∈ space S. ∃o ω'. ω = (s, o) ## zip as os @- ω'}
       | as. length as = length os ∧ set as ⊆ 𝒮}"

fun viterbi_prob where
  "viterbi_prob s t_end [] = indicator {t_end} s" |
  "viterbi_prob s t_end (o # os) =
    (MAX t ∈ 𝒮. ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * viterbi_prob t t_end os)"

definition
  "is_decoding s os as ≡
    T' (I s) {ω ∈ space S. ∃o ω'. ω = (s, o) ## zip as os @- ω'} = max_prob s os ∧
    length as = length os ∧ set as ⊆ 𝒮"


subsection ‹Iteration Rule For Maximum Probabilities›

lemma emeasure_T_state_Nil:
  "T (s, o⇩₀) {ω ∈ space S. V [] as ω} = 1"
  by simp

lemma max_prob_T_state_Nil:
  "Max {T (s, o) {ω ∈ space S. V [] as ω} | as. length as = length [] ∧ set as ⊆ 𝒮} = 1"
  by (simp add: emeasure_T_state_Nil)

lemma V_Cons: "V (o # os) (a # as) ω ⟷ fst (shd ω) = a ∧ snd (shd ω) = o ∧ V os as (stl ω)"
  by (cases ω) auto

lemma measurable_V[measurable]:
  "Measurable.pred S (λω. V os as ω)"
proof (induction os as rule: list_induct2')
  case (4 x xs y ys)
  then show ?case
    by (subst V_Cons)
       (intro Measurable.pred_intros_logic
          measurable_compose[OF measurable_shd] measurable_compose[OF measurable_stl];
        measurable)
qed simp+

lemma init_V_measurable[measurable]:
  "Measurable.pred S (λx. ∃o ω'. x = (s, o) ## zip as os @- ω')" (is "Measurable.pred S ?f")
proof -
  have *: "?f ω ⟷ fst (shd ω) = s ∧ V os as (stl ω)" for ω
    by (cases ω) auto
  show ?thesis
    by (subst *)
       (intro Measurable.pred_intros_logic measurable_compose[OF measurable_shd]; measurable)
qed

lemma max_prob_Cons':
  "Max {T (s, o⇩₁) {ω ∈ space S. V (o # os) as ω} | as. length as = length (o # os) ∧ set as ⊆ 𝒮} =
  (
    MAX t ∈ 𝒮. ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) *
      (MAX as ∈ {as. length as = length os ∧ set as ⊆ 𝒮}. T (t, o) {ω ∈ space S. V os as ω})
  )" (is "?l = ?r")
  and T_V_Cons:
  "T (s, o⇩₁) {ω ∈ space S. V (o # os) (t # as) ω}
  = ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * T (t, o) {ω ∈ space S. V os as ω}"
  (is "?l' = ?r'")
  if "length as = length os"
proof -
  let ?S = "λ os. {as. length as = length os ∧ set as ⊆ 𝒮}"
  have S_finite: "finite (?S os)" for os :: "'t list"
    using finite_lists_length_eq[OF states_finite] by (rule finite_subset[rotated]) auto
  have S_nonempty: "?S os ≠ {}" for os :: "'t list"
  proof -
    let ?a = "SOME a. a ∈ 𝒮" let ?as = "replicate (length os) ?a"
    from states_wellformed have "?a ∈ 𝒮"
      by (auto intro: someI_ex)
    then have "?as ∈ ?S os"
      by auto
    then show ?thesis
      by force
  qed
  let ?f = "λt as os. T t {ω ∈ space S. V os as (t ## ω)}"
  let ?g = "λt as os. T t {ω ∈ space S. V os as ω}"
  have *: "?f t as (o # os) = ?g t (tl as) os * indicator {(hd as, o)} t"
    if "length as = Suc n" for t as n
    unfolding indicator_def using that by (cases as) auto
  have **: "K (s, o⇩₁) {(t, o)} = pmf (𝒪 t) o * pmf (𝒦 s) t" for t
    unfolding K_def
    apply (simp add: vimage_def)
    apply (subst arg_cong2[where
          f = nn_integral and d = "λ x. 𝒪 x {xa. xa = o ∧ x = t} * indicator {t} x",
          OF HOL.refl])
    subgoal
      by (auto simp: indicator_def)
    by (simp add: emeasure_pmf_single ennreal_mult')
  have "?l = (MAX as ∈ ?S (o # os). ∫⇧⁺ t. ?f t as (o # os) ∂K (s, o⇩₁))"
    by (subst Max_to_image2; subst emeasure_Collect_T[unfolded space_T]; rule measurable_V HOL.refl)
  also have "… = (MAX as ∈ ?S (o # os). ∫⇧⁺ t. ?g t (tl as) os * indicator {(hd as,o)} t ∂K (s,o⇩₁))"
    by (simp cong: Max_image_cong_simp add: *)
  also have "… = (MAX(t, as)∈ 𝒮 × ?S os. ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * ?g (t, o) as os)"
  proof ((rule Max_eq_image_if; clarsimp?), goal_cases)
    case 1
    from S_finite[of "o # os"] show ?case
      by simp
  next
    case 2
    from states_finite show ?case
      by (blast intro: S_finite)
  next
    case (3 as)
    then show ?case
      by - (rule bexI[where x = "hd as"]; cases as; auto simp: algebra_simps **)
  next
    case (4 x as)
    then show ?case
      by - (rule exI[where x = "x # as"], simp add: algebra_simps **)
  qed
  also have "… = ?r"
    by (subst Max_image_left_mult[symmetric], fact+)
       (rule sym, rule Max_image_pair, rule states_finite, fact+)
  finally show "?l = ?r" .
  have "?l' = ∫⇧⁺ t'. ?f t' (t # as) (o # os) ∂K (s, o⇩₁)"
    by (rule emeasure_Collect_T[unfolded space_T]; rule measurable_V)
  also from that have "… = ∫⇧⁺ t'. ?g t' as os * indicator {(t,o)} t' ∂K (s,o⇩₁)"
    by (subst *[of _ "length as"]; simp)
  also have "… = ?r'"
    by (simp add: **, simp only: algebra_simps)
  finally show "?l' = ?r'" .
qed

lemmas max_prob_Cons = max_prob_Cons'[OF length_replicate]



subsection ‹Computation of Maximum Probabilities›

lemma T_init_V_eq:
  "T (s, o) {ω ∈ space S. V os as ω} = T (s, o') {ω ∈ space S. V os as ω}"
  apply (subst emeasure_Collect_T[unfolded space_T], (measurable; fail))
  apply (subst (2) emeasure_Collect_T[unfolded space_T], (measurable; fail))
  apply (simp add: K_def)
  done

lemma T'_I_T:
  "T' (I s) {ω ∈ space S. ∃o ω'. ω = (s, o) ## zip as os @- ω'} = T (s,o) {ω ∈ space S. V os as ω}"
proof -
  have "(∑o∈𝒪⇩_s. T (s, o) {ω ∈ space S. V os as ω}) =
    of_nat (card 𝒪⇩_s) * T (s, o) {ω ∈ space S. V os as ω}" for as
    by (subst sum_constant[symmetric]) (fastforce intro: sum.cong T_init_V_eq[simplified])
  then show ?thesis
    unfolding max_prob_def
    apply (subst emeasure_T')
    subgoal
      by measurable
    apply (simp add: I_def in_S observations_finite observations_wellformed nn_integral_pmf_of_set)
    apply (subst mult.commute)
    apply (simp add: observations_finite observations_wellformed mult_divide_eq_ennreal)
    done
qed

lemma max_prob_init:
  "max_prob s os = Max {T (s,o) {ω ∈ space S. V os as ω} | as. length as = length os ∧ set as ⊆ 𝒮}"
  unfolding max_prob_def by (simp add: T'_I_T[symmetric])

lemma max_prob_Nil[simp]:
  "max_prob s [] = 1"
  unfolding max_prob_init[where o = obs] by auto

lemma Max_start:
  "(MAX t∈𝒮. (indicator {t} s :: ennreal)) = 1" if "s ∈ 𝒮"
  using states_finite that by (auto simp: indicator_def intro: Max_eqI)

lemma Max_V_viterbi:
  "(MAX t ∈ 𝒮. viterbi_prob s t os) =
   Max {T (s, o) {ω ∈ space S. V os as ω} | as. length as = length os ∧ set as ⊆ 𝒮}" if "s ∈ 𝒮"
  using that states_finite states_wellformed
  by (induction os arbitrary: s o; simp
        add: Max_start max_prob_Cons[simplified] Max_image_commute Max_image_left_mult Max_to_image2
        cong: Max_image_cong
      )

lemma max_prob_viterbi:
  "(MAX t ∈ 𝒮. viterbi_prob s t os) = max_prob s os" if "s ∈ 𝒮"
  using max_prob_init[of s os] Max_V_viterbi[OF ‹s ∈ 𝒮›, symmetric] by simp

end

subsection ‹Decoding the Most Probable Hidden State Sequence›

context HMM3
begin

fun viterbi where
  "viterbi s t_end [] = ([], indicator {t_end} s)" |
  "viterbi s t_end (o # os) = fst (
    argmax snd (map
      (λt. let (xs, v) = viterbi t t_end os in (t # xs, ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * v))
    state_list))"

lemma state_list_nonempty:
  "state_list ≠ []"
  using state_list_𝒮 states_wellformed by auto

lemma viterbi_viterbi_prob:
  "snd (viterbi s t_end os) = viterbi_prob s t_end os"
proof (induction os arbitrary: s)
  case Nil
  then show ?case
    by simp
next
  case (Cons o os)
  let ?f =
    "λt. let (xs, v) = viterbi t t_end os in (t # xs, ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * v)"
  let ?xs = "map ?f state_list"
  from state_list_nonempty have "map ?f state_list ≠ []"
    by simp
  from argmax(2,3)[OF this, of snd] have *:
    "snd (fst (argmax snd ?xs)) = snd (argmax snd ?xs)"
    "snd (argmax snd ?xs) = (MAX x ∈ set ?xs. snd x)" .
  then show ?case
    apply (simp add: state_list_𝒮)
    apply (rule Max_eq_image_if)
       apply (intro finite_imageI states_finite; fail)
      apply (intro finite_imageI states_finite; fail)
    subgoal
      apply clarsimp
      subgoal for x
        using Cons.IH[of x] by (auto split: prod.splits)
      done
    apply clarsimp
    subgoal for x
      using Cons.IH[of x] by (force split: prod.splits)
    done
qed

context
begin

private fun val_of where
  "val_of s [] [] = 1" |
  "val_of s (t # xs) (o # os) = ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * val_of t xs os"

lemma val_of_T:
  "val_of s as os = T (s, o⇩₁) {ω ∈ space S. V os as ω}" if "length as = length os"
  using that by (induction arbitrary: o⇩₁ rule: val_of.induct; (subst T_V_Cons)?; simp)

lemma viterbi_sequence:
  "snd (viterbi s t_end os) = val_of s (fst (viterbi s t_end os)) os"
  if "snd (viterbi s t_end os) > 0"
  using that
proof (induction os arbitrary: s)
  case Nil
  then show ?case
    by (simp add: indicator_def split: if_split_asm split_of_bool_asm)
next
  case (Cons o os s)
  let ?xs = "map
    (λt. let (xs, v) = viterbi t t_end os in (t # xs, ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * v))
    state_list"
  from state_list_nonempty have "?xs ≠ []"
    by simp
  from argmax(1)[OF this, of snd] obtain t where
    "t ∈ set state_list"
    "fst (argmax snd ?xs) =
    (t # fst (viterbi t t_end os), ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * snd (viterbi t t_end os))"
    by (auto split: prod.splits)
  with Cons show ?case
    by (auto simp: ennreal_zero_less_mult_iff)
qed

lemma viterbi_valid_path:
  "length as = length os ∧ set as ⊆ 𝒮" if "viterbi s t_end os = (as, v)"
using that proof (induction os arbitrary: s as v)
  case Nil
  then show ?case
    by simp
next
  case (Cons o os s as v)
  let ?xs = "map
    (λt. let (xs, v) = viterbi t t_end os in (t # xs, ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * v))
    state_list"
  from state_list_nonempty have "?xs ≠ []"
    by simp
  from argmax(1)[OF this, of snd] obtain t where "t ∈ 𝒮"
    "fst (argmax snd ?xs) =
    (t # fst (viterbi t t_end os), ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * snd (viterbi t t_end os))"
    by (auto simp: state_list_𝒮 split: prod.splits)
  with Cons.prems show ?case
    by (cases "viterbi t t_end os"; simp add: Cons.IH)
qed

definition
  "viterbi_final s os = fst (argmax snd (map (λ t. viterbi s t os) state_list))"

lemma viterbi_finalE:
  obtains t where
    "t ∈ 𝒮" "viterbi_final s os = viterbi s t os"
    "snd (viterbi s t os) = Max ((λt. snd (viterbi s t os)) ` 𝒮)"
proof -
  from state_list_nonempty have "map (λ t. viterbi s t os) state_list ≠ []"
    by simp
  from argmax[OF this, of snd] show ?thesis
    by (auto simp: state_list_𝒮 image_comp comp_def viterbi_final_def intro: that)
qed

theorem viterbi_final_max_prob:
  assumes "viterbi_final s os = (as, v)" "s ∈ 𝒮"
  shows "v = max_prob s os"
proof -
  obtain t where "t ∈ 𝒮" "viterbi_final s os = viterbi s t os"
    "snd (viterbi s t os) = Max ((λt. snd (viterbi s t os)) ` 𝒮)"
    by (rule viterbi_finalE)
  with assms show ?thesis
    by (simp add: viterbi_viterbi_prob max_prob_viterbi)
qed

theorem viterbi_final_is_decoding:
  assumes "viterbi_final s os = (as, v)" "v > 0" "s ∈ 𝒮"
  shows "is_decoding s os as"
proof -
  from viterbi_valid_path[of s _ os as v] assms have as: "length as = length os" "set as ⊆ 𝒮"
    by - (rule viterbi_finalE[of s os]; simp)+
  obtain t where "t ∈ 𝒮" "viterbi_final s os = viterbi s t os"
    by (rule viterbi_finalE)
  with assms viterbi_sequence[of s t os] have "val_of s as os = v"
    by (cases "viterbi s t os") (auto simp: snd_def split!: prod.splits)
  with val_of_T as have "max_prob s os = T (s, obs) {ω ∈ space S. V os as ω}"
    by (simp add: viterbi_final_max_prob[OF assms(1,3)])
  with as show ?thesis
    unfolding is_decoding_def by (simp only: T'_I_T)
qed

end (* Anonymous context *)

end (* HMM 3 *)

end (* Theory *)