# Theory Hidden_Markov_Model

```section ‹Hidden Markov Models›

theory Hidden_Markov_Model
imports
Markov_Models.Discrete_Time_Markov_Chain Auxiliary
"HOL-Library.IArray"
begin

subsection ‹Definitions›

text ‹Definition of Markov Kernels that are closed w.r.t. to a set of states.›
locale Closed_Kernel =
fixes K :: "'s ⇒ 't pmf" and S :: "'t set"
assumes finite: "finite S"
and wellformed: "S ≠ {}"
and closed: "∀ s. K s ⊆ S"

text ‹
An HMM is parameterized by a Markov kernel for the transition probabilites between internal states,
a Markov kernel for the output probabilities of observations,
and a fixed set of observations.
›
locale HMM_defs =
fixes 𝒦 :: "'s ⇒ 's pmf" and 𝒪 :: "'s ⇒ 't pmf" and 𝒪⇩s :: "'t set"

locale HMM =
HMM_defs + O: Closed_Kernel 𝒪 𝒪⇩s
begin

lemma observations_finite: "finite 𝒪⇩s"
and observations_wellformed: "𝒪⇩s ≠ {}"
and observations_closed: "∀ s. 𝒪 s ⊆ 𝒪⇩s"
using O.finite O.wellformed O.closed by -

end

text ‹Fixed set of internal states.›
locale HMM2_defs = HMM_defs 𝒦 𝒪 for 𝒦 :: "'s ⇒ 's pmf" and 𝒪 :: "'s ⇒ 't pmf" +
fixes 𝒮 :: "'s set"

locale HMM2 = HMM2_defs + HMM + K: Closed_Kernel 𝒦 𝒮
begin

lemma states_finite: "finite 𝒮"
and states_wellformed: "𝒮 ≠ {}"
and states_closed: "∀ s. 𝒦 s ⊆ 𝒮"
using K.finite K.wellformed K.closed by -

end

text ‹
The set of internal states is now given as a list to iterate over.
This is needed for the computations on HMMs.
›
locale HMM3_defs = HMM2_defs 𝒪⇩s 𝒦 for 𝒪⇩s :: "'t set" and 𝒦 :: "'s ⇒ 's pmf" +
fixes state_list :: "'s list"

locale HMM3 = HMM3_defs _ _ 𝒪⇩s 𝒦 + HMM2 𝒪⇩s 𝒦 for 𝒪⇩s :: "'t set" and 𝒦 :: "'s ⇒ 's pmf" +
assumes state_list_𝒮: "set state_list = 𝒮"

context HMM_defs
begin

no_notation (ASCII) comp  (infixl "o" 55)

text ‹The ``default'' observation.›
definition
"obs ≡ SOME x. x ∈ 𝒪⇩s"

lemma (in HMM) obs:
"obs ∈ 𝒪⇩s"
unfolding obs_def using observations_wellformed by (auto intro: someI_ex)

text ‹
The HMM is encoded as a Markov chain over pairs of states and observations.
This is the Markov chain's defining Markov kernel.
›
definition
"K ≡ λ (s⇩1, o⇩1 :: 't). bind_pmf (𝒦 s⇩1) (λ s⇩2. map_pmf (λ o⇩2. (s⇩2, o⇩2)) (𝒪 s⇩2))"

sublocale MC_syntax K .

text ‹
Uniform distribution of the pairs ‹(s, o)› for a fixed state ‹s›.
›
definition "I (s :: 's) = map_pmf (λ x. (s, x)) (pmf_of_set 𝒪⇩s)"

text ‹
The likelihood of an observation sequence given a starting state ‹s› is defined in terms of
the trace space of the Markov kernel given the uniform distribution of pairs for ‹s›.
›
definition
"likelihood s os = T' (I s) {ω ∈ space S. ∃ o⇩0 xs ω'. ω = (s, o⇩0) ## xs @- ω' ∧ map snd xs = os}"

abbreviation (input) "L os ω ≡ ∃ xs ω'. ω = xs @- ω' ∧ map snd xs = os"

lemma likelihood_alt_def: "likelihood s os = T' (I s) {(s, o) ## xs @- ω' |o xs ω'. map snd xs = os}"
unfolding likelihood_def by (simp add: in_S)

subsection ‹Iteration Rule For Likelihood›

lemma L_Nil:
"L [] ω = True"
by simp

lemma emeasure_T_observation_Nil:
"T (s, o⇩0) {ω ∈ space S. L [] ω} = 1"
by simp

lemma L_Cons:
"L (o # os) ω ⟷ snd (shd ω) = o ∧ L os (stl ω)"
apply (cases ω; cases "shd ω"; safe; clarsimp)
apply force
subgoal for x xs ω'
by (force intro: exI[where x = "(x, o) # xs"])
done

lemma L_measurable[measurable]:
"Measurable.pred S (L os)"
apply (induction os)
apply (simp; fail)
subgoal premises that for o os
by(subst L_Cons)
(intro Measurable.pred_intros_logic
measurable_compose[OF measurable_shd] measurable_compose[OF measurable_stl that];
measurable)
done

lemma init_measurable[measurable]:
"Measurable.pred S (λx. ∃o⇩0 xs ω'. x = (s, o⇩0) ## xs @- ω' ∧ map snd xs = os)"
(is "Measurable.pred S ?f")
proof -
have *: "?f ω ⟷ fst (shd ω) = s ∧ L os (stl ω)" for ω
by (cases ω) auto
show ?thesis
by (subst *)
(intro Measurable.pred_intros_logic measurable_compose[OF measurable_shd]; measurable)
qed

lemma T_init_observation_eq:
"T (s, o) {ω ∈ space S. L os ω} = T (s, o') {ω ∈ space S. L os ω}"
apply (subst emeasure_Collect_T[unfolded space_T], (measurable; fail))
apply (subst (2) emeasure_Collect_T[unfolded space_T], (measurable; fail))
done

text ‹
Shows that it is equivalent to define likelihood in terms of the trace space starting at a single
pair of an internal state ‹s› and the default observation @{term obs}.
›
lemma (in HMM) likelihood_init:
"likelihood s os = T (s, obs) {ω ∈ space S. L os ω}"
proof -
have *: "(∑o∈𝒪⇩s. emeasure (T (s, o)) {ω ∈ space S. L os ω}) =
of_nat (card 𝒪⇩s) * emeasure (T (s, obs)) {ω ∈ space S. L os ω}"
by (subst sum_constant[symmetric]) (fastforce intro: sum.cong T_init_observation_eq[simplified])
show ?thesis
unfolding likelihood_def
apply (subst emeasure_T')
subgoal
by measurable
using *
apply (simp add: I_def in_S observations_finite observations_wellformed nn_integral_pmf_of_set)
apply (subst mult.commute)
apply (simp add: observations_finite observations_wellformed mult_divide_eq_ennreal)
done
qed

lemma emeasure_T_observation_Cons:
"T (s, o⇩0) {ω ∈ space S. L (o⇩1 # os) ω} =
(∫⇧+ t. ennreal (pmf (𝒪 t) o⇩1) * T (t, o⇩1) {ω ∈ space S. L os ω} ∂(𝒦 s))" (is "?l = ?r")
proof -
have *:
"∫⇧+ y. T (s', y) {x ∈ space S. ∃xs. (∃ω'. (s', y) ## x = xs @- ω') ∧ map snd xs = o⇩1 # os}
∂measure_pmf (𝒪 s') =
ennreal (pmf (𝒪 s') o⇩1) * T (s', o⇩1) {ω ∈ space S. ∃xs. (∃ω'. ω = xs @- ω') ∧ map snd xs = os}"
(is "?L = ?R") for s'
proof -
have "?L = ∫⇧+ x. ennreal (pmf (𝒪 s') x) *
T (s', x) {ω ∈ space S. ∃xs. (∃ω'. (s', x) ## ω = xs @- ω') ∧ map snd xs = o⇩1 # os}
∂count_space UNIV"
by (rule nn_integral_measure_pmf)
also have "… =
∫⇧+ o⇩2. (if o⇩2 = o⇩1
then ennreal (pmf (𝒪 s') o⇩1) * T (s', o⇩1) {ω ∈ space S. L os ω}
else 0)
∂count_space UNIV"
apply (rule nn_integral_cong_AE
[where v = "λ o⇩2. if o⇩2 = o⇩1
then ennreal (pmf (𝒪 s') o⇩1) * T (s', o⇩1) {ω ∈ space S. L os ω} else 0"]
)
apply (rule AE_I2)
apply (split if_split, safe)
subgoal
by (auto intro!: arg_cong2[where f = times, OF HOL.refl] arg_cong2[where f = emeasure];
metis list.simps(9) shift.simps(2) snd_conv
)
subgoal
by (subst arg_cong2[where f = emeasure and d = "{}", OF HOL.refl]) auto
done
also have "… = ∫⇧+o⇩2∈{o⇩1}.
(ennreal (pmf (𝒪 s') o⇩1) * T (s', o⇩1) {ω ∈ space S. L os ω})
∂count_space UNIV"
by (rule nn_integral_cong_AE) auto
also have "… = ?R"
by simp
finally show ?thesis .
qed
have "?l = ∫⇧+ t. T t {x ∈ space S. ∃xs ω'. t ## x = xs @- ω' ∧ map snd xs = o⇩1 # os} ∂ (K (s, o⇩0))"
by (subst emeasure_Collect_T[unfolded space_T], measurable)
also have "… = ?r"
using * by (simp add: K_def)
finally show ?thesis .
qed

subsection ‹Computation of Likelihood›

fun backward where
"backward s [] = 1" |
"backward s (o # os) = (∫⇧+ t. ennreal (pmf (𝒪 t) o) * backward t os ∂measure_pmf (𝒦 s))"

lemma emeasure_T_observation_backward:
"emeasure (T (s, o)) {ω ∈ space S. L os ω} = backward s os"
using emeasure_T_observation_Cons by (induction os arbitrary: s o; simp)

lemma (in HMM) likelihood_backward:
"likelihood s os = backward s os"
unfolding likelihood_init emeasure_T_observation_backward ..

end (* HMM Defs *)

context HMM2
begin

fun (in HMM2_defs) forward where
"forward s t_end [] = indicator {t_end} s" |
"forward s t_end (o # os) =
(∑t ∈ 𝒮. ennreal (pmf (𝒪 t) o) * ennreal (pmf (𝒦 s) t) * forward t t_end os)"

lemma forward_split:
"forward s t (os1 @ os2) = (∑t' ∈ 𝒮. forward s t' os1 * forward t' t os2)"
if "s ∈ 𝒮"
using that
apply (induction os1 arbitrary: s)
subgoal for s
apply (subst sum.cong[where B = "{s}"])
by auto
subgoal for a os1 s
apply simp
apply (subst sum_distrib_right)
apply (subst sum.swap)
done
done

lemma (in -)
"(∑t ∈ S. f t) = f t" if "finite S" "t ∈ S" "∀ s ∈ S - {t}. f s = 0"
thm sum.empty sum.insert sum.mono_neutral_right[of S "{t}"]
apply (subst sum.mono_neutral_right[of S "{t}"])
using that
apply auto
done
(*
oops
by (metis add.right_neutral empty_iff finite.intros(1) insert_iff subsetI sum.empty sum.insert sum.mono_neutral_right that)

using that
apply auto
*)

lemma forward_backward:
"(∑t ∈ 𝒮. forward s t os) = backward s os" if "s ∈ 𝒮"
using ‹s ∈ 𝒮›
apply (induction os arbitrary: s)
subgoal for s
by (subst sum.mono_neutral_right[of 𝒮 "{s}", OF states_finite])
(auto split: if_split_asm simp: indicator_def)
subgoal for a os s
apply (subst nn_integral_measure_pmf_support[where A = 𝒮])
using states_finite states_closed by (auto simp: algebra_simps)
done

theorem likelihood_forward:
"likelihood s os = (∑t ∈ 𝒮. forward s t os)" if ‹s ∈ 𝒮›
unfolding likelihood_backward forward_backward[symmetric, OF ‹s ∈ 𝒮›] ..

subsection ‹Definition of Maximum Probabilities›

abbreviation (input) "V os as ω ≡ (∃ ω'. ω = zip as os @- ω')"

definition
"max_prob s os =
Max {T' (I s) {ω ∈ space S. ∃o ω'. ω = (s, o) ## zip as os @- ω'}
| as. length as = length os ∧ set as ⊆ 𝒮}"

fun viterbi_prob where
"viterbi_prob s t_end [] = indicator {t_end} s" |
"viterbi_prob s t_end (o # os) =
(MAX t ∈ 𝒮. ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * viterbi_prob t t_end os)"

definition
"is_decoding s os as ≡
T' (I s) {ω ∈ space S. ∃o ω'. ω = (s, o) ## zip as os @- ω'} = max_prob s os ∧
length as = length os ∧ set as ⊆ 𝒮"

subsection ‹Iteration Rule For Maximum Probabilities›

lemma emeasure_T_state_Nil:
"T (s, o⇩0) {ω ∈ space S. V [] as ω} = 1"
by simp

lemma max_prob_T_state_Nil:
"Max {T (s, o) {ω ∈ space S. V [] as ω} | as. length as = length [] ∧ set as ⊆ 𝒮} = 1"

lemma V_Cons: "V (o # os) (a # as) ω ⟷ fst (shd ω) = a ∧ snd (shd ω) = o ∧ V os as (stl ω)"
by (cases ω) auto

lemma measurable_V[measurable]:
"Measurable.pred S (λω. V os as ω)"
proof (induction os as rule: list_induct2')
case (4 x xs y ys)
then show ?case
by (subst V_Cons)
(intro Measurable.pred_intros_logic
measurable_compose[OF measurable_shd] measurable_compose[OF measurable_stl];
measurable)
qed simp+

lemma init_V_measurable[measurable]:
"Measurable.pred S (λx. ∃o ω'. x = (s, o) ## zip as os @- ω')" (is "Measurable.pred S ?f")
proof -
have *: "?f ω ⟷ fst (shd ω) = s ∧ V os as (stl ω)" for ω
by (cases ω) auto
show ?thesis
by (subst *)
(intro Measurable.pred_intros_logic measurable_compose[OF measurable_shd]; measurable)
qed

lemma max_prob_Cons':
"Max {T (s, o⇩1) {ω ∈ space S. V (o # os) as ω} | as. length as = length (o # os) ∧ set as ⊆ 𝒮} =
(
MAX t ∈ 𝒮. ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) *
(MAX as ∈ {as. length as = length os ∧ set as ⊆ 𝒮}. T (t, o) {ω ∈ space S. V os as ω})
)" (is "?l = ?r")
and T_V_Cons:
"T (s, o⇩1) {ω ∈ space S. V (o # os) (t # as) ω}
= ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * T (t, o) {ω ∈ space S. V os as ω}"
(is "?l' = ?r'")
if "length as = length os"
proof -
let ?S = "λ os. {as. length as = length os ∧ set as ⊆ 𝒮}"
have S_finite: "finite (?S os)" for os :: "'t list"
using finite_lists_length_eq[OF states_finite] by (rule finite_subset[rotated]) auto
have S_nonempty: "?S os ≠ {}" for os :: "'t list"
proof -
let ?a = "SOME a. a ∈ 𝒮" let ?as = "replicate (length os) ?a"
from states_wellformed have "?a ∈ 𝒮"
by (auto intro: someI_ex)
then have "?as ∈ ?S os"
by auto
then show ?thesis
by force
qed
let ?f = "λt as os. T t {ω ∈ space S. V os as (t ## ω)}"
let ?g = "λt as os. T t {ω ∈ space S. V os as ω}"
have *: "?f t as (o # os) = ?g t (tl as) os * indicator {(hd as, o)} t"
if "length as = Suc n" for t as n
unfolding indicator_def using that by (cases as) auto
have **: "K (s, o⇩1) {(t, o)} = pmf (𝒪 t) o * pmf (𝒦 s) t" for t
unfolding K_def
apply (subst arg_cong2[where
f = nn_integral and d = "λ x. 𝒪 x {xa. xa = o ∧ x = t} * indicator {t} x",
OF HOL.refl])
subgoal
by (auto simp: indicator_def)
have "?l = (MAX as ∈ ?S (o # os). ∫⇧+ t. ?f t as (o # os) ∂K (s, o⇩1))"
by (subst Max_to_image2; subst emeasure_Collect_T[unfolded space_T]; rule measurable_V HOL.refl)
also have "… = (MAX as ∈ ?S (o # os). ∫⇧+ t. ?g t (tl as) os * indicator {(hd as,o)} t ∂K (s,o⇩1))"
by (simp cong: Max_image_cong_simp add: *)
also have "… = (MAX(t, as)∈ 𝒮 × ?S os. ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * ?g (t, o) as os)"
proof ((rule Max_eq_image_if; clarsimp?), goal_cases)
case 1
from S_finite[of "o # os"] show ?case
by simp
next
case 2
from states_finite show ?case
by (blast intro: S_finite)
next
case (3 as)
then show ?case
by - (rule bexI[where x = "hd as"]; cases as; auto simp: algebra_simps **)
next
case (4 x as)
then show ?case
by - (rule exI[where x = "x # as"], simp add: algebra_simps **)
qed
also have "… = ?r"
by (subst Max_image_left_mult[symmetric], fact+)
(rule sym, rule Max_image_pair, rule states_finite, fact+)
finally show "?l = ?r" .
have "?l' = ∫⇧+ t'. ?f t' (t # as) (o # os) ∂K (s, o⇩1)"
by (rule emeasure_Collect_T[unfolded space_T]; rule measurable_V)
also from that have "… = ∫⇧+ t'. ?g t' as os * indicator {(t,o)} t' ∂K (s,o⇩1)"
by (subst *[of _ "length as"]; simp)
also have "… = ?r'"
by (simp add: **, simp only: algebra_simps)
finally show "?l' = ?r'" .
qed

lemmas max_prob_Cons = max_prob_Cons'[OF length_replicate]

subsection ‹Computation of Maximum Probabilities›

lemma T_init_V_eq:
"T (s, o) {ω ∈ space S. V os as ω} = T (s, o') {ω ∈ space S. V os as ω}"
apply (subst emeasure_Collect_T[unfolded space_T], (measurable; fail))
apply (subst (2) emeasure_Collect_T[unfolded space_T], (measurable; fail))
done

lemma T'_I_T:
"T' (I s) {ω ∈ space S. ∃o ω'. ω = (s, o) ## zip as os @- ω'} = T (s,o) {ω ∈ space S. V os as ω}"
proof -
have "(∑o∈𝒪⇩s. T (s, o) {ω ∈ space S. V os as ω}) =
of_nat (card 𝒪⇩s) * T (s, o) {ω ∈ space S. V os as ω}" for as
by (subst sum_constant[symmetric]) (fastforce intro: sum.cong T_init_V_eq[simplified])
then show ?thesis
unfolding max_prob_def
apply (subst emeasure_T')
subgoal
by measurable
apply (simp add: I_def in_S observations_finite observations_wellformed nn_integral_pmf_of_set)
apply (subst mult.commute)
apply (simp add: observations_finite observations_wellformed mult_divide_eq_ennreal)
done
qed

lemma max_prob_init:
"max_prob s os = Max {T (s,o) {ω ∈ space S. V os as ω} | as. length as = length os ∧ set as ⊆ 𝒮}"
unfolding max_prob_def by (simp add: T'_I_T[symmetric])

lemma max_prob_Nil[simp]:
"max_prob s [] = 1"
unfolding max_prob_init[where o = obs] by auto

lemma Max_start:
"(MAX t∈𝒮. (indicator {t} s :: ennreal)) = 1" if "s ∈ 𝒮"
using states_finite that by (auto simp: indicator_def intro: Max_eqI)

lemma Max_V_viterbi:
"(MAX t ∈ 𝒮. viterbi_prob s t os) =
Max {T (s, o) {ω ∈ space S. V os as ω} | as. length as = length os ∧ set as ⊆ 𝒮}" if "s ∈ 𝒮"
using that states_finite states_wellformed
by (induction os arbitrary: s o; simp
add: Max_start max_prob_Cons[simplified] Max_image_commute Max_image_left_mult Max_to_image2
cong: Max_image_cong
)

lemma max_prob_viterbi:
"(MAX t ∈ 𝒮. viterbi_prob s t os) = max_prob s os" if "s ∈ 𝒮"
using max_prob_init[of s os] Max_V_viterbi[OF ‹s ∈ 𝒮›, symmetric] by simp

end

subsection ‹Decoding the Most Probable Hidden State Sequence›

context HMM3
begin

fun viterbi where
"viterbi s t_end [] = ([], indicator {t_end} s)" |
"viterbi s t_end (o # os) = fst (
argmax snd (map
(λt. let (xs, v) = viterbi t t_end os in (t # xs, ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * v))
state_list))"

lemma state_list_nonempty:
"state_list ≠ []"
using state_list_𝒮 states_wellformed by auto

lemma viterbi_viterbi_prob:
"snd (viterbi s t_end os) = viterbi_prob s t_end os"
proof (induction os arbitrary: s)
case Nil
then show ?case
by simp
next
case (Cons o os)
let ?f =
"λt. let (xs, v) = viterbi t t_end os in (t # xs, ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * v)"
let ?xs = "map ?f state_list"
from state_list_nonempty have "map ?f state_list ≠ []"
by simp
from argmax(2,3)[OF this, of snd] have *:
"snd (fst (argmax snd ?xs)) = snd (argmax snd ?xs)"
"snd (argmax snd ?xs) = (MAX x ∈ set ?xs. snd x)" .
then show ?case
apply (rule Max_eq_image_if)
apply (intro finite_imageI states_finite; fail)
apply (intro finite_imageI states_finite; fail)
subgoal
apply clarsimp
subgoal for x
using Cons.IH[of x] by (auto split: prod.splits)
done
apply clarsimp
subgoal for x
using Cons.IH[of x] by (force split: prod.splits)
done
qed

context
begin

private fun val_of where
"val_of s [] [] = 1" |
"val_of s (t # xs) (o # os) = ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * val_of t xs os"

lemma val_of_T:
"val_of s as os = T (s, o⇩1) {ω ∈ space S. V os as ω}" if "length as = length os"
using that by (induction arbitrary: o⇩1 rule: val_of.induct; (subst T_V_Cons)?; simp)

lemma viterbi_sequence:
"snd (viterbi s t_end os) = val_of s (fst (viterbi s t_end os)) os"
if "snd (viterbi s t_end os) > 0"
using that
proof (induction os arbitrary: s)
case Nil
then show ?case
by (simp add: indicator_def split: if_split_asm split_of_bool_asm)
next
case (Cons o os s)
let ?xs = "map
(λt. let (xs, v) = viterbi t t_end os in (t # xs, ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * v))
state_list"
from state_list_nonempty have "?xs ≠ []"
by simp
from argmax(1)[OF this, of snd] obtain t where
"t ∈ set state_list"
"fst (argmax snd ?xs) =
(t # fst (viterbi t t_end os), ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * snd (viterbi t t_end os))"
by (auto split: prod.splits)
with Cons show ?case
by (auto simp: ennreal_zero_less_mult_iff)
qed

lemma viterbi_valid_path:
"length as = length os ∧ set as ⊆ 𝒮" if "viterbi s t_end os = (as, v)"
using that proof (induction os arbitrary: s as v)
case Nil
then show ?case
by simp
next
case (Cons o os s as v)
let ?xs = "map
(λt. let (xs, v) = viterbi t t_end os in (t # xs, ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * v))
state_list"
from state_list_nonempty have "?xs ≠ []"
by simp
from argmax(1)[OF this, of snd] obtain t where "t ∈ 𝒮"
"fst (argmax snd ?xs) =
(t # fst (viterbi t t_end os), ennreal (pmf (𝒪 t) o * pmf (𝒦 s) t) * snd (viterbi t t_end os))"
by (auto simp: state_list_𝒮 split: prod.splits)
with Cons.prems show ?case
by (cases "viterbi t t_end os"; simp add: Cons.IH)
qed

definition
"viterbi_final s os = fst (argmax snd (map (λ t. viterbi s t os) state_list))"

lemma viterbi_finalE:
obtains t where
"t ∈ 𝒮" "viterbi_final s os = viterbi s t os"
"snd (viterbi s t os) = Max ((λt. snd (viterbi s t os)) ` 𝒮)"
proof -
from state_list_nonempty have "map (λ t. viterbi s t os) state_list ≠ []"
by simp
from argmax[OF this, of snd] show ?thesis
by (auto simp: state_list_𝒮 image_comp comp_def viterbi_final_def intro: that)
qed

theorem viterbi_final_max_prob:
assumes "viterbi_final s os = (as, v)" "s ∈ 𝒮"
shows "v = max_prob s os"
proof -
obtain t where "t ∈ 𝒮" "viterbi_final s os = viterbi s t os"
"snd (viterbi s t os) = Max ((λt. snd (viterbi s t os)) ` 𝒮)"
by (rule viterbi_finalE)
with assms show ?thesis
qed

theorem viterbi_final_is_decoding:
assumes "viterbi_final s os = (as, v)" "v > 0" "s ∈ 𝒮"
shows "is_decoding s os as"
proof -
from viterbi_valid_path[of s _ os as v] assms have as: "length as = length os" "set as ⊆ 𝒮"
by - (rule viterbi_finalE[of s os]; simp)+
obtain t where "t ∈ 𝒮" "viterbi_final s os = viterbi s t os"
by (rule viterbi_finalE)
with assms viterbi_sequence[of s t os] have "val_of s as os = v"
by (cases "viterbi s t os") (auto simp: snd_def split!: prod.splits)
with val_of_T as have "max_prob s os = T (s, obs) {ω ∈ space S. V os as ω}"