# Theory Lemma_R

section ‹Lemma R\label{s:lemma_r}›

theory Lemma_R
imports Inductive_Inference_Basics
begin

text ‹A common technique for constructing a class that cannot be
learned is diagonalization against all strategies (see, for instance,
Section~\ref{s:lim_bc}). Similarly, the typical way of proving that a class
cannot be learned is by assuming there is a strategy and deriving a
contradiction. Both techniques are easier to carry out if one has to consider
only \emph{total} recursive strategies. This is not possible in general,
since after all the definitions of the inference types admit strictly partial
strategies. However, for many inference types one can show that for every
strategy there is a total strategy with at least the same learning power''.
Results to that effect are called Lemma~R.

Lemma~R comes in different strengths depending on how general the
construction of the total recursive strategy is. CONS is the only inference
type considered here for which not even a weak form of Lemma~R holds.›

subsection ‹Strong Lemma R for LIM, FIN, and BC›

text ‹In its strong form Lemma~R says that for any strategy $S$, there
is a total strategy $T$ that learns all classes $S$ learns regardless of
hypothesis space. The strategy $T$ can be derived from $S$ by a delayed
simulation of $S$. More precisely, for input $f^n$, $T$ simulates $S$ for
prefixes $f^0, f^1, \ldots, f^n$ for at most $n$ steps. If $S$ halts on none
of the prefixes, $T$ outputs an arbitrary hypothesis. Otherwise let $k \leq n$ be maximal such that $S$ halts on $f^k$ in at most $n$ steps. Then $T$
outputs $S(f^k)$. ›

text ‹We reformulate some lemmas for @{term r_result1} to make it easier
to use them with @{term "φ"}.›

lemma r_result1_converg_phi:
assumes "φ i x ↓= v"
shows "∃t.
(∀t'≥t. eval r_result1 [t', i, x] ↓= Suc v) ∧
(∀t'<t. eval r_result1 [t', i, x] ↓= 0)"
using assms r_result1_converg' phi_def by simp_all

lemma r_result1_bivalent':
assumes "eval r_phi [i, x] ↓= v"
shows "eval r_result1 [t, i, x] ↓= Suc v ∨ eval r_result1 [t, i, x] ↓= 0"
using assms r_result1 r_result_bivalent' r_phi'' by simp

lemma r_result1_bivalent_phi:
assumes "φ i x ↓= v"
shows "eval r_result1 [t, i, x] ↓= Suc v ∨ eval r_result1 [t, i, x] ↓= 0"
using assms r_result1_bivalent' phi_def by simp_all

lemma r_result1_diverg_phi:
assumes "φ i x ↑"
shows "eval r_result1 [t, i, x] ↓= 0"
using assms phi_def r_result1_diverg' by simp

lemma r_result1_some_phi:
assumes "eval r_result1 [t, i, x] ↓= Suc v"
shows "φ i x ↓= v"
using assms phi_def r_result1_Some' by simp

lemma r_result1_saturating':
assumes "eval r_result1 [t, i, x] ↓= Suc v"
shows "eval r_result1 [t + d, i, x] ↓= Suc v"
using assms r_result1 r_result_saturating r_phi'' by simp

lemma r_result1_saturating_the:
assumes "the (eval r_result1 [t, i, x]) > 0" and "t' ≥ t"
shows "the (eval r_result1 [t', i, x]) > 0"
proof -
from assms(1) obtain v where "eval r_result1 [t, i, x] ↓= Suc v"
using r_result1_bivalent_phi r_result1_diverg_phi
by (metis inc_induct le_0_eq not_less_zero option.discI option.expand option.sel)
with assms have "eval r_result1 [t', i, x] ↓= Suc v"
using r_result1_saturating' le_Suc_ex by blast
then show ?thesis by simp
qed

lemma Greatest_bounded_Suc:
fixes P :: "nat ⇒ nat"
shows "(if P n > 0 then Suc n
else if ∃j<n. P j > 0 then Suc (GREATEST j. j < n ∧ P j > 0) else 0) =
(if ∃j<Suc n. P j > 0 then Suc (GREATEST j. j < Suc n ∧ P j > 0) else 0)"
(is "?lhs = ?rhs")
proof (cases "∃j<Suc n. P j > 0")
case 1: True
show ?thesis
proof (cases "P n > 0")
case True
then have "(GREATEST j. j < Suc n ∧ P j > 0) = n"
using Greatest_equality[of "λj. j < Suc n ∧ P j > 0"] by simp
moreover have "?rhs = Suc (GREATEST j. j < Suc n ∧ P j > 0)"
using 1 by simp
ultimately have "?rhs = Suc n" by simp
then show ?thesis using True by simp
next
case False
then have "?lhs = Suc (GREATEST j. j < n ∧ P j > 0)"
using 1 by (metis less_SucE)
moreover have "?rhs = Suc (GREATEST j. j < Suc n ∧ P j > 0)"
using 1 by simp
moreover have "(GREATEST j. j < n ∧ P j > 0) =
(GREATEST j. j < Suc n ∧ P j > 0)"
using 1 False by (metis less_SucI less_Suc_eq)
ultimately show ?thesis by simp
qed
next
case False
then show ?thesis by auto
qed

text ‹For $n$, $i$, $x$, the next function simulates $\varphi_i$ on all
non-empty prefixes of at most length $n$ of the list $x$ for at most $n$
steps. It returns the length of the longest such prefix for which $\varphi_i$
halts, or zero if $\varphi_i$ does not halt for any prefix.›

definition "r_delay_aux ≡
Pr 2 (r_constn 1 0)
(Cn 4 r_ifz
[Cn 4 r_result1
[Cn 4 r_length [Id 4 3], Id 4 2,
Cn 4 r_take [Cn 4 S [Id 4 0], Id 4 3]],
Id 4 1, Cn 4 S [Id 4 0]])"

lemma r_delay_aux_prim: "prim_recfn 3 r_delay_aux"
unfolding r_delay_aux_def by simp_all

lemma r_delay_aux_total: "total r_delay_aux"
using  prim_recfn_total[OF r_delay_aux_prim] .

lemma r_delay_aux:
assumes "n ≤ e_length x"
shows "eval r_delay_aux [n, i, x] ↓=
(if ∃j<n. the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0
then Suc (GREATEST j.
j < n ∧
the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0)
else 0)"
proof -
define z where "z ≡
Cn 4 r_result1
[Cn 4 r_length [Id 4 3], Id 4 2, Cn 4 r_take [Cn 4 S [Id 4 0], Id 4 3]]"
then have z_recfn: "recfn 4 z" by simp
have z: "eval z [j, r, i, x] = eval r_result1 [e_length x, i, e_take (Suc j) x]"
if "j < e_length x" for j r i x
unfolding z_def using that by simp

define g where "g ≡ Cn 4 r_ifz [z, Id 4 1, Cn 4 S [Id 4 0]]"
then have g: "eval g [j, r, i, x] ↓=
(if the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0 then Suc j else r)"
if "j < e_length x" for j r i x
using that z prim_recfn_total z_recfn by simp

show ?thesis
using assms
proof (induction n)
case 0
moreover have "eval r_delay_aux [0, i, x] ↓= 0"
using eval_Pr_0 r_delay_aux_def r_delay_aux_prim r_constn
ultimately show ?case by simp
next
case (Suc n)
let ?P = "λj. the (eval r_result1 [e_length x, i, e_take (Suc j) x])"
have "eval r_delay_aux [n, i, x] ↓"
using Suc by simp
moreover have "eval r_delay_aux [Suc n, i, x] =
eval (Pr 2 (r_constn 1 0) g) [Suc n, i, x]"
unfolding r_delay_aux_def g_def z_def by simp
ultimately have "eval r_delay_aux [Suc n, i, x] =
eval g [n, the (eval r_delay_aux [n, i, x]), i, x]"
using r_delay_aux_prim Suc eval_Pr_converg_Suc
by (simp add: r_delay_aux_def g_def z_def numeral_3_eq_3)
then have "eval r_delay_aux [Suc n, i, x] ↓=
(if ?P n > 0 then Suc n
else if ∃j<n. ?P j > 0 then Suc (GREATEST j. j < n ∧ ?P j > 0) else 0)"
using g Suc by simp
then have "eval r_delay_aux [Suc n, i, x] ↓=
(if ∃j<Suc n. ?P j > 0 then Suc (GREATEST j. j < Suc n ∧ ?P j > 0) else 0)"
using Greatest_bounded_Suc[where ?P="?P"] by simp
then show ?case by simp
qed
qed

text ‹The next function simulates $\varphi_i$ on all non-empty prefixes
of a list $x$ of length $n$ for at most $n$ steps and outputs the length of
the longest prefix for which $\varphi_i$ halts, or zero if $\varphi_i$ does
not halt for any such prefix.›

definition "r_delay ≡ Cn 2 r_delay_aux [Cn 2 r_length [Id 2 1], Id 2 0, Id 2 1]"

lemma r_delay_recfn [simp]: "recfn 2 r_delay"
unfolding r_delay_def by (simp add: r_delay_aux_prim)

lemma r_delay:
"eval r_delay [i, x] ↓=
(if ∃j<e_length x. the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0
then Suc (GREATEST j.
j < e_length x ∧ the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0)
else 0)"
unfolding r_delay_def using r_delay_aux r_delay_aux_prim by simp

definition "delay i x ≡ Some
(if ∃j<e_length x. the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0
then Suc (GREATEST j.
j < e_length x ∧ the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0)
else 0)"

lemma delay_in_R2: "delay ∈ ℛ⇧2"
using r_delay totalI2 R2I delay_def r_delay_recfn
by (metis (no_types, lifting) numeral_2_eq_2 option.simps(3))

lemma delay_le_length: "the (delay i x) ≤ e_length x"
proof (cases "∃j<e_length x. the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0")
case True
let ?P = "λj. j < e_length x ∧ the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0"
from True have "∃j. ?P j" by simp
moreover have "⋀y. ?P y ⟹ y ≤ e_length x" by simp
ultimately have "?P (Greatest ?P)"
using GreatestI_ex_nat[where ?P="?P"] by blast
then have "Greatest ?P < e_length x" by simp
moreover have "delay i x ↓= Suc (Greatest ?P)"
using delay_def True by simp
ultimately show ?thesis by auto
next
case False
then show ?thesis using delay_def by auto
qed

lemma e_take_delay_init:
assumes "f ∈ ℛ" and "the (delay i (f ▹ n)) > 0"
shows "e_take (the (delay i (f ▹ n))) (f ▹ n) = f ▹ (the (delay i (f ▹ n)) - 1)"
using assms e_take_init[of f _ n] length_init[of f n] delay_le_length[of i "f ▹ n"]
by (metis One_nat_def Suc_le_lessD Suc_pred)

lemma delay_gr0_converg:
assumes "the (delay i x) > 0"
shows "φ i (e_take (the (delay i x)) x) ↓"
proof -
let ?P = "λj. j < e_length x ∧ the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0"
have "∃j. ?P j"
proof (rule ccontr)
assume "¬ (∃j. ?P j)"
then have "delay i x ↓= 0"
using delay_def by simp
with assms show False by simp
qed
then have d: "the (delay i x) = Suc (Greatest ?P)"
using delay_def by simp
moreover have "⋀y. ?P y ⟹ y ≤ e_length x" by simp
ultimately have "?P (Greatest ?P)"
using ‹∃j. ?P j› GreatestI_ex_nat[where ?P="?P"] by blast
then have "the (eval r_result1 [e_length x, i, e_take (Suc (Greatest ?P)) x]) > 0"
by simp
then have "the (eval r_result1 [e_length x, i, e_take (the (delay i x)) x]) > 0"
using d by simp
then show ?thesis using r_result1_diverg_phi by fastforce
qed

lemma delay_unbounded:
fixes n :: nat
assumes "f ∈ ℛ" and "∀n. φ i (f ▹ n) ↓"
shows "∃m. the (delay i (f ▹ m)) > n"
proof -
from assms have "∃t. the (eval r_result1 [t, i, f ▹ n]) > 0"
using r_result1_converg_phi
by (metis le_refl option.exhaust_sel option.sel zero_less_Suc)
then obtain t where t: "the (eval r_result1 [t, i, f ▹ n]) > 0"
by auto
let ?m = "max n t"
have "Suc ?m ≥ t" by simp
have m: "the (eval r_result1 [Suc ?m, i, f ▹ n]) > 0"
proof -
let ?w = "eval r_result1 [t, i, f ▹ n]"
obtain v where v: "?w ↓= Suc v"
using t assms(2) r_result1_bivalent_phi by fastforce
have "eval r_result1 [Suc ?m, i, f ▹ n] = ?w"
using v t r_result1_saturating' ‹Suc ?m ≥ t› le_Suc_ex by fastforce
then show ?thesis using t by simp
qed
let ?x = "f ▹ ?m"
have "the (delay i ?x) > n"
proof -
let ?P = "λj. j < e_length ?x ∧ the (eval r_result1 [e_length ?x, i, e_take (Suc j) ?x]) > 0"
have "e_length ?x = Suc ?m" by simp
moreover have "e_take (Suc n) ?x = f ▹ n"
using assms(1) e_take_init by auto
ultimately have "?P n"
using m by simp
have "⋀y. ?P y ⟹ y ≤ e_length ?x" by simp
with ‹?P n› have "n ≤ (Greatest ?P)"
using Greatest_le_nat[of ?P n "e_length ?x"] by simp
moreover have "the (delay i ?x) = Suc (Greatest ?P)"
using delay_def ‹?P n› by auto
ultimately show ?thesis by simp
qed
then show ?thesis by auto
qed

lemma delay_monotone:
assumes "f ∈ ℛ" and "n⇩1 ≤ n⇩2"
shows "the (delay i (f ▹ n⇩1)) ≤ the (delay i (f ▹ n⇩2))"
(is "the (delay i ?x1) ≤ the (delay i ?x2)")
proof (cases "the (delay i (f ▹ n⇩1)) = 0")
case True
then show ?thesis by simp
next
case False
let ?P1 = "λj. j < e_length ?x1 ∧ the (eval r_result1 [e_length ?x1, i, e_take (Suc j) ?x1]) > 0"
let ?P2 = "λj. j < e_length ?x2 ∧ the (eval r_result1 [e_length ?x2, i, e_take (Suc j) ?x2]) > 0"
from False have d1: "the (delay i ?x1) = Suc (Greatest ?P1)" "∃j. ?P1 j"
using delay_def option.collapse by fastforce+
moreover have "⋀y. ?P1 y ⟹ y ≤ e_length ?x1" by simp
ultimately have *: "?P1 (Greatest ?P1)" using GreatestI_ex_nat[of ?P1] by blast
let ?j = "Greatest ?P1"
from * have "?j < e_length ?x1" by auto
then have 1: "e_take (Suc ?j) ?x1 = e_take (Suc ?j) ?x2"
using assms e_take_init by auto
from * have 2: "?j < e_length ?x2" using assms(2) by auto
with 1 * have "the (eval r_result1 [e_length ?x1, i, e_take (Suc ?j) ?x2]) > 0"
by simp
moreover have "e_length ?x1 ≤ e_length ?x2"
using assms(2) by auto
ultimately have "the (eval r_result1 [e_length ?x2, i, e_take (Suc ?j) ?x2]) > 0"
using r_result1_saturating_the by simp
with 2 have "?P2 ?j" by simp
then have d2: "the (delay i ?x2) = Suc (Greatest ?P2)"
using delay_def by auto
have "⋀y. ?P2 y ⟹ y ≤ e_length ?x2" by simp
with ‹?P2 ?j› have "?j ≤ (Greatest ?P2)" using Greatest_le_nat[of ?P2] by blast
with d1 d2 show ?thesis by simp
qed

lemma delay_unbounded_monotone:
fixes n :: nat
assumes "f ∈ ℛ" and "∀n. φ i (f ▹ n) ↓"
shows "∃m⇩0. ∀m≥m⇩0. the (delay i (f ▹ m)) > n"
proof -
from assms delay_unbounded obtain m⇩0 where "the (delay i (f ▹ m⇩0)) > n"
by blast
then have "∀m≥m⇩0. the (delay i (f ▹ m)) > n"
using assms(1) delay_monotone order.strict_trans2 by blast
then show ?thesis by auto
qed

text ‹Now we can define a function that simulates an arbitrary strategy
$\varphi_i$ in a delayed way. The parameter $d$ is the default hypothesis for
when $\varphi_i$ does not halt within the time bound for any prefix.›

definition r_totalizer :: "nat ⇒ recf" where
"r_totalizer d ≡
Cn 2
(r_lifz
(r_constn 1 d)
(Cn 2 r_phi
[Id 2 0, Cn 2 r_take [Cn 2 r_delay [Id 2 0, Id 2 1], Id 2 1]]))
[Cn 2 r_delay [Id 2 0, Id 2 1], Id 2 0, Id 2 1]"

lemma r_totalizer_recfn: "recfn 2 (r_totalizer d)"
unfolding r_totalizer_def by simp

lemma r_totalizer:
"eval (r_totalizer d) [i, x] =
(if the (delay i x) = 0 then Some d else φ i (e_take (the (delay i x)) x))"
proof -
let ?i = "Cn 2 r_delay [Id 2 0, Id 2 1]"
have "eval ?i [i, x] = eval r_delay [i, x]" for i x
using r_delay_recfn by simp
then have i: "eval ?i [i, x] = delay i x" for i x
using r_delay by (simp add: delay_def)
let ?t = "r_constn 1 d"
have t: "eval ?t [i, x] ↓= d" for i x by simp
let ?e1 = "Cn 2 r_take [?i, Id 2 1]"
let ?e = "Cn 2 r_phi [Id 2 0, ?e1]"
have "eval ?e1 [i, x] = eval r_take [the (delay i x), x]" for i x
using r_delay i delay_def by simp
then have "eval ?e1 [i, x] ↓= e_take (the (delay i x)) x" for i x
using delay_le_length by simp
then have e: "eval ?e [i, x] = φ i (e_take (the (delay i x)) x)"
using phi_def by simp
let ?z = "r_lifz ?t ?e"
have recfn_te: "recfn 2 ?t" "recfn 2 ?e"
by simp_all
then have "eval (r_totalizer d) [i, x] = eval (r_lifz ?t ?e) [the (delay i x), i, x]"
for i x
unfolding r_totalizer_def using i r_totalizer_recfn delay_def by simp
then have "eval (r_totalizer d) [i, x] =
(if the (delay i x) = 0 then eval ?t [i, x] else eval ?e [i, x])"
for i x
using recfn_te by simp
then show ?thesis using t e by simp
qed

lemma r_totalizer_total: "total (r_totalizer d)"
proof (rule totalI2)
show "recfn 2 (r_totalizer d)" using r_totalizer_recfn by simp
show "⋀x y. eval (r_totalizer d) [x, y] ↓"
using r_totalizer delay_gr0_converg by simp
qed

definition totalizer :: "nat ⇒ partial2" where
"totalizer d i x ≡
if the (delay i x) = 0 then Some d else φ i (e_take (the (delay i x)) x)"

lemma totalizer_init:
assumes "f ∈ ℛ"
shows "totalizer d i (f ▹ n) =
(if the (delay i (f ▹ n)) = 0 then Some d
else φ i (f ▹ (the (delay i (f ▹ n)) - 1)))"
using assms e_take_delay_init by (simp add: totalizer_def)

lemma totalizer_in_R2: "totalizer d ∈ ℛ⇧2"
using totalizer_def r_totalizer r_totalizer_total R2I r_totalizer_recfn
by metis

text ‹For LIM, @{term totalizer} works with every default hypothesis
$d$.›

lemma lemma_R_for_Lim:
assumes "learn_lim ψ U (φ i)"
shows "learn_lim ψ U (totalizer d i)"
proof (rule learn_limI)
show env: "environment ψ U (totalizer d i)"
using assms learn_limE(1) totalizer_in_R2 by auto
show "∃j. ψ j = f ∧ (∀⇧∞n. totalizer d i (f ▹ n) ↓= j)" if "f ∈ U" for f
proof -
have "f ∈ ℛ"
using assms env that by auto
from assms learn_limE obtain j n⇩0 where
j: "ψ j = f" and
n0: "∀n≥n⇩0. (φ i) (f ▹ n) ↓= j"
using ‹f ∈ U› by metis
obtain m⇩0 where m0: "∀m≥m⇩0. the (delay i (f ▹ m)) > n⇩0"
using delay_unbounded_monotone ‹f ∈ ℛ› ‹f ∈ U› assms learn_limE(1)
by blast
then have "∀m≥m⇩0. totalizer d i (f ▹ m) = φ i (e_take (the (delay i (f ▹ m))) (f ▹ m))"
using totalizer_def by auto
then have "∀m≥m⇩0. totalizer d i (f ▹ m) = φ i (f ▹ (the (delay i (f ▹ m)) - 1))"
using e_take_delay_init m0 ‹f ∈ ℛ› by auto
with m0 n0 have "∀m≥m⇩0. totalizer d i (f ▹ m) ↓= j"
by auto
with j show ?thesis by auto
qed
qed

text ‹The effective version of Lemma~R for LIM states that there is a
total recursive function computing Gödel numbers of total strategies
from those of arbitrary strategies.›

lemma lemma_R_for_Lim_effective:
"∃g∈ℛ. ∀i.
φ (the (g i)) ∈ ℛ ∧
(∀U ψ. learn_lim ψ U (φ i) ⟶ learn_lim ψ U (φ (the (g i))))"
proof -
have "totalizer 0 ∈ 𝒫⇧2" using totalizer_in_R2 by auto
then obtain g where g: "g ∈ ℛ" "∀i. (totalizer 0) i = φ (the (g i))"
using numbering_translation_for_phi by blast
with totalizer_in_R2 have "∀i. φ (the (g i)) ∈ ℛ"
by (metis R2_proj_R1)
moreover from g(2) lemma_R_for_Lim[where ?d=0] have
"∀i U ψ. learn_lim ψ U (φ i) ⟶ learn_lim ψ U (φ (the (g i)))"
by simp
ultimately show ?thesis using g(1) by blast
qed

text ‹In order for us to use the previous lemma, we need a function
that performs the actual computation:›

definition "r_limr ≡
SOME g.
recfn 1 g ∧
total g ∧
(∀i. φ (the (eval g [i])) ∈ ℛ ∧
(∀U ψ. learn_lim ψ U (φ i) ⟶ learn_lim ψ U (φ (the (eval g [i])))))"

lemma r_limr_recfn: "recfn 1 r_limr"
and r_limr_total: "total r_limr"
and r_limr:
"φ (the (eval r_limr [i])) ∈ ℛ"
"learn_lim ψ U (φ i) ⟹ learn_lim ψ U (φ (the (eval r_limr [i])))"
proof -
let ?P = "λg.
g ∈ ℛ ∧
(∀i. φ (the (g i)) ∈ ℛ ∧ (∀U ψ. learn_lim ψ U (φ i) ⟶ learn_lim ψ U (φ (the (g i)))))"
let ?Q = "λg.
recfn 1 g ∧
total g ∧
(∀i. φ (the (eval g [i])) ∈ ℛ ∧
(∀U ψ. learn_lim ψ U (φ i) ⟶ learn_lim ψ U (φ (the (eval g [i])))))"
have "∃g. ?P g" using lemma_R_for_Lim_effective by auto
then obtain g where "?P g" by auto
then obtain g' where g': "recfn 1 g'" "total g'" "∀i. eval g' [i] = g i"
by blast
with ‹?P g› have "?Q g'" by simp
with r_limr_def someI_ex[of ?Q] show
"recfn 1 r_limr"
"total r_limr"
"φ (the (eval r_limr [i])) ∈ ℛ"
"learn_lim ψ U (φ i) ⟹ learn_lim ψ U (φ (the (eval r_limr [i])))"
by auto
qed

text ‹For BC, too, @{term totalizer} works with every default
hypothesis $d$.›

lemma lemma_R_for_BC:
assumes "learn_bc ψ U (φ i)"
shows "learn_bc ψ U (totalizer d i)"
proof (rule learn_bcI)
show env: "environment ψ U (totalizer d i)"
using assms learn_bcE(1) totalizer_in_R2 by auto
show "∃n⇩0. ∀n≥n⇩0. ψ (the (totalizer d i (f ▹ n))) = f" if "f ∈ U" for f
proof -
have "f ∈ ℛ"
using assms env that by auto
obtain n⇩0 where n0: "∀n≥n⇩0. ψ (the ((φ i) (f ▹ n))) = f"
using assms learn_bcE ‹f ∈ U› by metis
obtain m⇩0 where m0: "∀m≥m⇩0. the (delay i (f ▹ m)) > n⇩0"
using delay_unbounded_monotone ‹f ∈ ℛ› ‹f ∈ U› assms learn_bcE(1)
by blast
then have "∀m≥m⇩0. totalizer d i (f ▹ m) = φ i (e_take (the (delay i (f ▹ m))) (f ▹ m))"
using totalizer_def by auto
then have "∀m≥m⇩0. totalizer d i (f ▹ m) = φ i (f ▹ (the (delay i (f ▹ m)) - 1))"
using e_take_delay_init m0 ‹f ∈ ℛ› by auto
with m0 n0 have "∀m≥m⇩0. ψ (the (totalizer d i (f ▹ m))) = f"
by auto
then show ?thesis by auto
qed
qed

corollary lemma_R_for_BC_simple:
assumes "learn_bc ψ U s"
shows "∃s'∈ℛ. learn_bc ψ U s'"
using assms lemma_R_for_BC totalizer_in_R2 learn_bcE
by (metis R2_proj_R1 learn_bcE(1) phi_universal)

text ‹For FIN the default hypothesis of @{term totalizer} must be
zero, signalling don't know yet''.›

lemma lemma_R_for_FIN:
assumes "learn_fin ψ U (φ i)"
shows "learn_fin ψ U (totalizer 0 i)"
proof (rule learn_finI)
show env: "environment ψ U (totalizer 0 i)"
using assms learn_finE(1) totalizer_in_R2 by auto
show "∃j n⇩0. ψ j = f ∧
(∀n<n⇩0. totalizer 0 i (f ▹ n) ↓= 0) ∧
(∀n≥n⇩0. totalizer 0 i (f ▹ n) ↓= Suc j)"
if "f ∈ U" for f
proof -
have "f ∈ ℛ"
using assms env that by auto
from assms learn_finE[of ψ U "φ i"] obtain j where
j: "ψ j = f" and
ex_n0: "∃n⇩0. (∀n<n⇩0. (φ i) (f ▹ n) ↓= 0) ∧ (∀n≥n⇩0. (φ i) (f ▹ n) ↓= Suc j)"
using ‹f ∈ U› by blast
let ?Q = "λn⇩0. (∀n<n⇩0. (φ i) (f ▹ n) ↓= 0) ∧ (∀n≥n⇩0. (φ i) (f ▹ n) ↓= Suc j)"
define n⇩0 where "n⇩0 = Least ?Q"
with ex_n0 have n0: "?Q n⇩0" "∀n<n⇩0. ¬ ?Q n"
using LeastI_ex[of ?Q] not_less_Least[of _ ?Q] by blast+
define m⇩0 where "m⇩0 = (LEAST m⇩0. ∀m≥m⇩0. the (delay i (f ▹ m)) > n⇩0)"
(is "m⇩0 = Least ?P")
moreover have "∃m⇩0. ∀m≥m⇩0. the (delay i (f ▹ m)) > n⇩0"
using delay_unbounded_monotone ‹f∈ℛ› ‹f ∈ U› assms learn_finE(1)
by simp
ultimately have m0: "?P m⇩0" "∀m<m⇩0. ¬ ?P m"
using LeastI_ex[of ?P] not_less_Least[of _ ?P] by blast+
then have "∀m≥m⇩0. totalizer 0 i (f ▹ m) = φ i (e_take (the (delay i (f ▹ m))) (f ▹ m))"
using totalizer_def by auto
then have "∀m≥m⇩0. totalizer 0 i (f ▹ m) = φ i (f ▹ (the (delay i (f ▹ m)) - 1))"
using e_take_delay_init m0 ‹f∈ℛ› by auto
with m0 n0 have "∀m≥m⇩0. totalizer 0 i (f ▹ m) ↓= Suc j"
by auto
moreover have "totalizer 0 i (f ▹ m) ↓= 0" if "m < m⇩0" for m
proof (cases "the (delay i (f ▹ m)) = 0")
case True
then show ?thesis by (simp add: totalizer_def)
next
case False
then have "the (delay i (f ▹ m)) ≤ n⇩0"
using m0 that ‹f ∈ ℛ› delay_monotone by (meson leI order.strict_trans2)
then show ?thesis
using ‹f ∈ ℛ› n0(1) totalizer_init by (simp add: Suc_le_lessD)
qed
ultimately show ?thesis using j by auto
qed
qed

subsection ‹Weaker Lemma R for CP and TOTAL›

text ‹For TOTAL the default hypothesis used by @{term totalizer}
depends on the hypothesis space, because it must refer to a total function in
that space. Consequently the total strategy depends on the hypothesis space,
which makes this form of Lemma~R weaker than the ones in the previous
section.›

lemma lemma_R_for_TOTAL:
fixes ψ :: partial2
shows "∃d. ∀U. ∀i. learn_total ψ U (φ i) ⟶  learn_total ψ U (totalizer d i)"
proof (cases "∃d. ψ d ∈ ℛ")
case True
then obtain d where "ψ d ∈ ℛ" by auto
have "learn_total ψ U (totalizer d i)" if "learn_total ψ U (φ i)" for U i
proof (rule learn_totalI)
show env: "environment ψ U (totalizer d i)"
using that learn_totalE(1) totalizer_in_R2 by auto
show "⋀f. f ∈ U ⟹ ∃j. ψ j = f ∧ (∀⇧∞n. totalizer d i (f ▹ n) ↓= j)"
using that learn_total_def lemma_R_for_Lim[where ?d=d] learn_limE(2) by metis
show "ψ (the (totalizer d i (f ▹ n))) ∈ ℛ" if "f ∈ U" for f n
proof (cases "the (delay i (f ▹ n)) = 0")
case True
then show ?thesis using totalizer_def ‹ψ d ∈ ℛ› by simp
next
case False
have "f ∈ ℛ"
using that env by auto
then show ?thesis
using False that ‹learn_total ψ U (φ i)› totalizer_init learn_totalE(3)
by simp
qed
qed
then show ?thesis by auto
next
case False
then show ?thesis using learn_total_def lemma_R_for_Lim by auto
qed

corollary lemma_R_for_TOTAL_simple:
assumes "learn_total ψ U s"
shows "∃s'∈ℛ. learn_total ψ U s'"
using assms lemma_R_for_TOTAL totalizer_in_R2
by (metis R2_proj_R1 learn_totalE(1) phi_universal)

text ‹For CP the default hypothesis used by @{term totalizer} depends
on both the hypothesis space and the class. Therefore the total strategy
depends on both the the hypothesis space and the class, which makes Lemma~R
for CP even weaker than the one for TOTAL.›

lemma lemma_R_for_CP:
fixes ψ :: partial2 and U :: "partial1 set"
assumes "learn_cp ψ U (φ i)"
shows "∃d. learn_cp ψ U (totalizer d i)"
proof (cases "U = {}")
case True
then show ?thesis using assms learn_cp_def lemma_R_for_Lim by auto
next
case False
then obtain f where "f ∈ U" by auto
from ‹f ∈ U› obtain d where "ψ d = f"
using learn_cpE(2)[OF assms] by auto
with ‹f ∈ U› have "ψ d ∈ U" by simp
have "learn_cp ψ U (totalizer d i)"
proof (rule learn_cpI)
show env: "environment ψ U (totalizer d i)"
using assms learn_cpE(1) totalizer_in_R2 by auto
show "⋀f. f ∈ U ⟹ ∃j. ψ j = f ∧ (∀⇧∞n. totalizer d i (f ▹ n) ↓= j)"
using assms learn_cp_def lemma_R_for_Lim[where ?d=d] learn_limE(2) by metis
show "ψ (the (totalizer d i (f ▹ n))) ∈ U" if "f ∈ U" for f n
proof (cases "the (delay i (f ▹ n)) = 0")
case True
then show ?thesis using totalizer_def ‹ψ d ∈ U› by simp
next
case False
then show ?thesis
using that env assms totalizer_init learn_cpE(3) by auto
qed
qed
then show ?thesis by auto
qed

subsection ‹No Lemma R for CONS›

text ‹This section demonstrates that the class $V_{01}$ of all total
recursive functions $f$ where $f(0)$ or $f(1)$ is a Gödel number of $f$ can
be consistently learned in the limit, but not by a total strategy. This implies
that Lemma~R does not hold for CONS.›

definition V01 :: "partial1 set" ("V⇩0⇩1") where
"V⇩0⇩1 = {f. f ∈ ℛ ∧ (φ (the (f 0)) = f ∨ φ (the (f 1)) = f)}"

subsubsection ‹No total CONS strategy for @{term "V⇩0⇩1"}\label{s:v01_not_total}›

text ‹In order to show that no total strategy can learn @{term
"V⇩0⇩1"} we construct, for each total strategy $S$, one or two
functions in @{term "V⇩0⇩1"} such that $S$ fails for at least one
of them. At the core of this construction is a process that given a total
recursive strategy $S$ and numbers $z, i, j \in \mathbb{N}$ builds a function
$f$ as follows: Set $f(0) = i$ and $f(1) = j$. For $x\geq1$:
\begin{enumerate}
\item[(a)] Check whether $S$ changes its hypothesis when $f^x$ is
extended by 0, that is, if $S(f^x) \neq S(f^x0)$. If so, set $f(x+1) = 0$.
\item[(b)] Otherwise check if $S$ changes its hypothesis when $f^x$ is extended
by $1$, that is, if $S(f^x) \neq S(f^x1)$. If so, set $f(x+1) = 1$.
\item[(c)] If neither happens, set $f(x+1) = z$.
\end{enumerate}
In other words, as long as we can force $S$ to change its hypothesis by
extending the function by 0 or 1, we do just that. Now there are two
cases:
\begin{enumerate}
\item[Case 1.] For all $x\geq1$ either (a) or (b) occurs; then $S$
changes its hypothesis on $f$ all the time and thus does not learn $f$ in
the limit (not to mention consistently). The value of $z$ makes no
difference in this case.
\item[Case 2.] For some minimal $x$, (c) occurs, that is,
there is an $f^x$ such that $h := S(f^x) = S(f^x0) = S(f^x1)$. But the
hypothesis $h$ cannot be consistent with both prefixes $f^x0$ and $f^x1$.
Running the process once with $z = 0$ and once with $z = 1$ yields two
functions starting with $f^x0$ and $f^x1$, respectively, such that $S$
outputs the same hypothesis, $h$, on both prefixes and thus cannot be
consistent for both functions.
\end{enumerate}
This process is computable because $S$ is total. The construction does not
work if we only assume $S$ to be a CONS strategy for $V_{01}$, because we
need to be able to apply $S$ to prefixes not in $V_{01}$.

The parameters $i$ and $j$ provide flexibility to find functions built by the
above process that are actually in $V_{01}$. To this end we will use
Smullyan's double fixed-point theorem.›

context
fixes s :: partial1
assumes s_in_R1 [simp, intro]: "s ∈ ℛ"
begin

text ‹The function @{term prefixes} constructs prefixes according to the
aforementioned process.›

fun prefixes :: "nat ⇒ nat ⇒ nat ⇒ nat ⇒ nat list" where
"prefixes z i j 0 = [i]"
| "prefixes z i j (Suc x) = prefixes z i j x @
[if x = 0 then j
else if s (list_encode (prefixes z i j x @ [0])) ≠ s (list_encode (prefixes z i j x))
then 0
else if s (list_encode (prefixes z i j x @ [1])) ≠ s (list_encode (prefixes z i j x))
then 1
else z]"

lemma prefixes_length: "length (prefixes z i j x) = Suc x"
by (induction x) simp_all

text ‹The functions @{term[names_short] "adverse z i j"} are the
functions constructed by @{term[names_short] "prefixes"}.›

definition adverse :: "nat ⇒ nat ⇒ nat ⇒ nat ⇒ nat option" where
"adverse z i j x ≡ Some (last (prefixes z i j x))"

lemma init_adverse_eq_prefixes: "(adverse z i j) ▹ n = list_encode (prefixes z i j n)"
proof -
have "prefix (adverse z i j) n = prefixes z i j n"
proof (induction n)
case 0
then show ?case using adverse_def prefixes_length prefixI' by fastforce
next
case (Suc n)
qed
then show ?thesis by (simp add: init_def)
qed

"adverse z i j 0 ↓= i"
"adverse z i j 1 ↓= j"

text ‹Had we introduced ternary partial recursive functions, the
@{term[names_short] "adverse z"} functions would be among them.›

lemma adverse_in_R3: "∃r. recfn 3 r ∧ total r ∧ (λi j x. eval r [i, j, x]) = adverse z"
proof -
obtain rs where rs: "recfn 1 rs" "total rs" "(λx. eval rs [x]) = s"
using R1E by auto
have s_total: "⋀x. s x ↓" by simp

define f where "f = Cn 2 r_singleton_encode [Id 2 0]"
then have "recfn 2 f" by simp
have f: "⋀i j. eval f [i, j] ↓= list_encode [i]"
unfolding f_def by simp

define ch1 where "ch1 = Cn 4 r_ifeq
[Cn 4 rs [Cn 4 r_snoc [Id 4 1, r_constn 3 1]],
Cn 4 rs [Id 4 1],
r_dummy 3 (r_const z),
r_constn 3 1]"
then have ch1: "recfn 4 ch1" "total ch1"
using Cn_total prim_recfn_total rs by auto

define ch0 where "ch0 = Cn 4 r_ifeq
[Cn 4 rs [Cn 4 r_snoc [Id 4 1, r_constn 3 0]],
Cn 4 rs [Id 4 1],
ch1,
r_constn 3 0]"
then have ch0_total: "total ch0" "recfn 4 ch0"
using Cn_total prim_recfn_total rs ch1 by auto

have "eval ch1 [l, v, i, j] ↓= (if s (e_snoc v 1) = s v then z else 1)" for l v i j
proof -
have "eval ch1 [l, v, i, j] = eval r_ifeq [the (s (e_snoc v 1)), the (s v), z, 1]"
unfolding ch1_def using rs by auto
then show ?thesis by (simp add: s_total option.expand)
qed
moreover have "eval ch0 [l, v, i, j] ↓=
(if s (e_snoc v 0) = s v then the (eval ch1 [l, v, i, j]) else 0)" for l v i j
proof -
have "eval ch0 [l, v, i, j] =
eval r_ifeq [the (s (e_snoc v 0)), the (s v), the (eval ch1 [l, v, i, j]), 0]"
unfolding ch0_def using rs ch1 by auto
then show ?thesis by (simp add: s_total option.expand)
qed
ultimately have ch0: "⋀l v i j. eval ch0 [l, v, i, j] ↓=
(if s (e_snoc v 0) ≠ s v then 0
else if s (e_snoc v 1) ≠ s v then 1 else z)"
by simp

define app where "app = Cn 4 r_ifz [Id 4 0, Id 4 3, ch0]"
then have "recfn 4 app" "total app"
using ch0_total totalI4 by auto
have "eval app [l, v, i, j] ↓= (if l = 0 then j else the (eval ch0 [l, v, i, j]))" for l v i j
unfolding app_def using ch0_total by simp
with ch0 have app: "⋀l v i j. eval app [l, v, i, j] ↓=
(if l = 0 then j
else if s (e_snoc v 0) ≠ s v then 0
else if s (e_snoc v 1) ≠ s v then 1 else z)"
by simp

define g where "g = Cn 4 r_snoc [Id 4 1, app]"
with app have g: "⋀l v i j. eval g [l, v, i, j] ↓= e_snoc v
(if l = 0 then j
else if s (e_snoc v 0) ≠ s v then 0
else if s (e_snoc v 1) ≠ s v then 1 else z)"
using ‹recfn 4 app› by auto
from g_def have "recfn 4 g" "total g"
using ‹recfn 4 app› ‹total app› Cn_total Mn_free_imp_total by auto

define b where "b = Pr 2 f g"
then have "recfn 3 b"
using ‹recfn 2 f› ‹recfn 4 g› by simp
have b: "eval b [x, i, j] ↓= list_encode (prefixes z i j x)" for x i j
proof (induction x)
case 0
then show ?case
unfolding b_def using f ‹recfn 2 f› ‹recfn 4 g› by simp
next
case (Suc x)
then have "eval b [Suc x, i, j] = eval g [x, the (eval b [x, i, j]), i, j]"
using b_def ‹recfn 3 b› by simp
also have "... ↓=
(let v = list_encode (prefixes z i j x)
in e_snoc v
(if x = 0 then j
else if s (e_snoc v 0) ≠ s v then 0
else if s (e_snoc v 1) ≠ s v then 1 else z))"
using g Suc by simp
also have "... ↓=
(let v = list_encode (prefixes z i j x)
in e_snoc v
(if x = 0 then j
else if s (list_encode (prefixes z i j x @ [0])) ≠ s v then 0
else if s (list_encode (prefixes z i j x @ [1])) ≠ s v then 1 else z))"
using list_decode_encode by presburger
finally show ?case by simp
qed

define b' where "b' = Cn 3 b [Id 3 2, Id 3 0, Id 3 1]"
then have "recfn 3 b'"
using ‹recfn 3 b› by simp
with b have b': "⋀i j x. eval b' [i, j, x] ↓= list_encode (prefixes z i j x)"
using b'_def by simp

define r where "r = Cn 3 r_last [b']"
then have "recfn 3 r"
using ‹recfn 3 b'› by simp
with b' have "⋀i j x. eval r [i, j, x] ↓= last (prefixes z i j x)"
using r_def prefixes_length by auto
moreover from this have "total r"
using totalI3 ‹recfn 3 r› by simp
ultimately have "(λi j x. eval r [i, j, x]) = adverse z"
with ‹recfn 3 r› ‹total r› show ?thesis by auto
qed

proof -
r: "recfn 3 r" "total r" "(λi j x. eval r [i, j, x]) = adverse z"
by blast
define rij where "rij = Cn 1 r [r_const i, r_const j, Id 1 0]"
then have "recfn 1 rij" "total rij"
using r(1,2) Cn_total Mn_free_imp_total by auto
from rij_def have "⋀x. eval rij [x] = eval r [i, j, x]"
using r(1) by auto
with r(3) have "⋀x. eval rij [x] = adverse z i j x"
by metis
with ‹recfn 1 rij› ‹total rij› show ?thesis by auto
qed

text ‹Next we show that for every $z$ there are $i$, $j$ such that
@{term[names_short] "adverse z i j ∈ V⇩0⇩1"}. The first step is to show that for every
$z$, Gödel numbers for @{term[names_short] "adverse z i j"} can be computed
uniformly from $i$ and $j$.›

lemma phi_translate_adverse: "∃f∈ℛ⇧2.∀i j. φ (the (f i j)) = adverse z i j"
proof -
obtain r where r: "recfn 3 r" "total r" "(λi j x. eval r [i, j, x]) = adverse z"
let ?p = "encode r"
define rf where "rf = Cn 2 (r_smn 1 2) [r_dummy 1 (r_const ?p), Id 2 0, Id 2 1]"
then have "recfn 2 rf" and "total rf"
using Mn_free_imp_total by simp_all
define f where "f ≡ λi j. eval rf [i, j]"
with ‹recfn 2 rf› ‹total rf› have "f ∈ ℛ⇧2" by auto
have rf: "eval rf [i, j] = eval (r_smn 1 2) [?p, i, j]" for i j
unfolding rf_def by simp
{
fix i j x
have "φ (the (f i j)) x = eval r_phi [the (f i j), x]"
using phi_def by simp
also have "... = eval r_phi [the (eval rf [i, j]), x]"
using f_def by simp
also have "... = eval (r_universal 1) [the (eval (r_smn 1 2) [?p, i, j]), x]"
using rf r_phi_def by simp
also have "... = eval (r_universal (2 + 1)) (?p # [i, j] @ [x])"
using smn_lemma[of 1 "[i, j]" 2 "[x]"] by simp
also have "... = eval (r_universal 3) [?p, i, j, x]"
by simp
also have "... = eval r [i, j, x]"
using r_universal r by force
also have "... = adverse z i j x"
using r(3) by metis
finally have "φ (the (f i j)) x = adverse z i j x" .
}
with ‹f ∈ ℛ⇧2› show ?thesis by blast
qed

text ‹The second, and final, step is to apply Smullyan's double
fixed-point theorem to show the existence of @{term[names_short] adverse}
functions in @{term "V⇩0⇩1"}.›

lemma adverse_in_V01: "∃m n. adverse 0 m n ∈ V⇩0⇩1 ∧ adverse 1 m n ∈ V⇩0⇩1"
proof -
obtain f⇩0 where f0: "f⇩0 ∈ ℛ⇧2" "∀i j. φ (the (f⇩0 i j)) = adverse 0 i j"
obtain f⇩1 where f1: "f⇩1 ∈ ℛ⇧2" "∀i j. φ (the (f⇩1 i j)) = adverse 1 i j"
obtain m n where "φ m = φ (the (f⇩0 m n))" and "φ n = φ (the (f⇩1 m n))"
using smullyan_double_fixed_point[OF f0(1) f1(1)] by blast
with f0(2) f1(2) have "φ m = adverse 0 m n" and "φ n = adverse 1 m n"
by simp_all
moreover have "the (adverse 0 m n 0) = m" and "the (adverse 1 m n 1) = n"
ultimately have
"φ (the (adverse 0 m n 0)) = adverse 0 m n"
"φ (the (adverse 1 m n 1)) = adverse 1 m n"
by simp_all
moreover have "adverse 0 m n ∈ ℛ" and "adverse 1 m n ∈ ℛ"
ultimately show ?thesis using V01_def by auto
qed

text ‹Before we prove the main result of this section we need some
lemmas regarding the shape of the @{term[names_short] adverse} functions and
hypothesis changes of the strategy.›

assumes "x > 0"
shows "adverse z i j (Suc x) ↓=
(if s (e_snoc ((adverse z i j) ▹ x) 0) ≠ s ((adverse z i j) ▹ x)
then 0
else if s (e_snoc ((adverse z i j) ▹ x) 1) ≠ s ((adverse z i j) ▹ x)
then 1 else z)"
proof -
have "adverse z i j (Suc x) ↓=
(if s (list_encode (prefixes z i j x @ [0])) ≠ s (list_encode (prefixes z i j x))
then 0
else if s (list_encode (prefixes z i j x @ [1])) ≠ s (list_encode (prefixes z i j x))
then 1 else z)"
qed

text ‹The process in the proof sketch (page~\pageref{s:v01_not_total})
consists of steps (a), (b), and (c). The next abbreviation is true iff.\ step
(a) or (b) applies.›

abbreviation "hyp_change z i j x ≡
s (e_snoc ((adverse z i j) ▹ x) 0) ≠ s ((adverse z i j) ▹ x) ∨
s (e_snoc ((adverse z i j) ▹ x) 1) ≠ s ((adverse z i j) ▹ x)"

text ‹If step (c) applies, the process appends $z$.›

assumes "x > 0" and "¬ hyp_change z i j x"
shows "adverse z i j (Suc x) ↓= z"

text ‹While (a) or (b) applies, the process appends a value that
forces $S$ to change its hypothesis.›

lemma while_hyp_change:
assumes "∀x≤n. x > 0 ⟶ hyp_change z i j x"
shows "∀x≤Suc n. adverse z i j x = adverse z' i j x"
using assms
proof (induction n)
case 0
next
case (Suc n)
then have "∀x≤n. x > 0 ⟶ hyp_change z i j x" by simp
with Suc have "∀x≤Suc n. x > 0 ⟶ adverse z i j x = adverse z' i j x"
by simp
moreover have "adverse z i j 0 = adverse z' i j 0"
ultimately have zz': "∀x≤Suc n. adverse z i j x = adverse z' i j x"
by auto
moreover have "adverse z i j ∈ ℛ" "adverse z' i j ∈ ℛ"
ultimately have init_zz': "(adverse z i j) ▹ (Suc n) = (adverse z' i j) ▹ (Suc n)"
using init_eqI by blast

have "adverse z i j (Suc (Suc n)) = adverse z' i j (Suc (Suc n))"
proof (cases "s (e_snoc ((adverse z i j) ▹ (Suc n)) 0) ≠ s ((adverse z i j) ▹ (Suc n))")
case True
then have "s (e_snoc ((adverse z' i j) ▹ (Suc n)) 0) ≠ s ((adverse z' i j) ▹ (Suc n))"
using init_zz' by simp
then have "adverse z' i j (Suc (Suc n)) ↓= 0"
moreover have "adverse z i j (Suc (Suc n)) ↓= 0"
ultimately show ?thesis by simp
next
case False
then have "s (e_snoc ((adverse z' i j) ▹ (Suc n)) 0) = s ((adverse z' i j) ▹ (Suc n))"
using init_zz' by simp
then have "adverse z' i j (Suc (Suc n)) ↓= 1"
using init_zz' Suc.prems adverse_Suc by (smt le_refl zero_less_Suc)
moreover have "adverse z i j (Suc (Suc n)) ↓= 1"
using False Suc.prems adverse_Suc by auto
ultimately show ?thesis by simp
qed
with zz' show ?case using le_SucE by blast
qed

text ‹The next result corresponds to Case~1 from the proof sketch.›

lemma always_hyp_change_no_lim:
assumes "∀x>0. hyp_change z i j x"
shows "¬ learn_lim φ {adverse z i j} s"
proof (rule infinite_hyp_changes_not_Lim[of "adverse z i j"])
show "adverse z i j ∈ {adverse z i j}" by simp
show "∀n. ∃m⇩1>n. ∃m⇩2>n. s (adverse z i j ▹ m⇩1) ≠ s (adverse z i j ▹ m⇩2)"
proof
fix n
from assms obtain m⇩1 where m1: "m⇩1 > n" "hyp_change z i j m⇩1"
by auto
have "s (adverse z i j ▹ m⇩1) ≠ s (adverse z i j ▹ (Suc m⇩1))"
proof (cases "s (e_snoc ((adverse z i j) ▹ m⇩1) 0) ≠ s ((adverse z i j) ▹ m⇩1)")
case True
then have "adverse z i j (Suc m⇩1) ↓= 0"
then have "(adverse z i j) ▹ (Suc m⇩1) = e_snoc ((adverse z i j) ▹ m⇩1) 0"
with True show ?thesis by simp
next
case False
then have "adverse z i j (Suc m⇩1) ↓= 1"
then have "(adverse z i j) ▹ (Suc m⇩1) = e_snoc ((adverse z i j) ▹ m⇩1) 1"
with False m1(2) show ?thesis by simp
qed
then show "∃m⇩1>n. ∃m⇩2>n. s (adverse z i j ▹ m⇩1) ≠ s (adverse z i j ▹ m⇩2)"
using less_SucI m1(1) by blast
qed
qed

text ‹The next result corresponds to Case~2 from the proof sketch.›

lemma no_hyp_change_no_cons:
assumes "x > 0" and "¬ hyp_change z i j x"
shows "¬ learn_cons φ {adverse 0 i j, adverse 1 i j} s"
proof -
let ?P = "λx. x > 0 ∧ ¬ hyp_change z i j x"
define xmin where "xmin = Least ?P"
with assms have xmin:
"?P xmin"
"⋀x. x < xmin ⟹ ¬ ?P x"
using LeastI[of ?P] not_less_Least[of _ ?P] by simp_all
then have "xmin > 0" by simp

have "∀x≤xmin - 1. x > 0 ⟶ hyp_change z i j x"
using xmin by (metis One_nat_def Suc_pred le_imp_less_Suc)
then have
"∀x≤xmin. adverse z i j x = adverse 0 i j x"
"∀x≤xmin. adverse z i j x = adverse 1 i j x"
using while_hyp_change[of "xmin - 1" z i j 0]
using while_hyp_change[of "xmin - 1" z i j 1]
by simp_all
then have
init_z0: "(adverse z i j) ▹ xmin = (adverse 0 i j) ▹ xmin" and
init_z1: "(adverse z i j) ▹ xmin = (adverse 1 i j) ▹ xmin"
then have
a0: "adverse 0 i j (Suc xmin) ↓= 0" and
a1: "adverse 1 i j (Suc xmin) ↓= 1"
by metis+
then have
i0: "(adverse 0 i j) ▹ (Suc xmin) = e_snoc ((adverse z i j) ▹ xmin) 0" and
i1: "(adverse 1 i j) ▹ (Suc xmin) = e_snoc ((adverse z i j) ▹ xmin) 1"
using init_z0 init_z1 by (simp_all add: init_Suc_snoc)
moreover have
"s (e_snoc ((adverse z i j) ▹ xmin) 0) = s ((adverse z i j) ▹ xmin)"
"s (e_snoc ((adverse z i j) ▹ xmin) 1) = s ((adverse z i j) ▹ xmin)"
using xmin by simp_all
ultimately have
"s ((adverse 0 i j) ▹ (Suc xmin)) = s ((adverse z i j) ▹ xmin)"
"s ((adverse 1 i j) ▹ (Suc xmin)) = s ((adverse z i j) ▹ xmin)"
by simp_all
then have
"s ((adverse 0 i j) ▹ (Suc xmin)) = s ((adverse 1 i j) ▹ (Suc xmin))"
by simp
moreover have "(adverse 0 i j) ▹ (Suc xmin) ≠ (adverse 1 i j) ▹ (Suc xmin)"
using a0 a1 i0 i1 by (metis append1_eq_conv list_decode_encode zero_neq_one)
ultimately show "¬ learn_cons φ {adverse 0 i j, adverse 1 i j} s"
using same_hyp_different_init_not_cons by blast
qed

text ‹Combining the previous two lemmas shows that @{term
"V⇩0⇩1"} cannot be learned consistently in the limit by the total
strategy $S$.›

lemma V01_not_in_R_cons: "¬ learn_cons φ V⇩0⇩1 s"
proof -
obtain m n where
mn0: "adverse 0 m n ∈ V⇩0⇩1" and
mn1: "adverse 1 m n ∈ V⇩0⇩1"
show "¬ learn_cons φ V⇩0⇩1 s"
proof (cases "∀x>0. hyp_change 0 m n x")
case True
then have "¬ learn_lim φ {adverse 0 m n} s"
using always_hyp_change_no_lim by simp
with mn0 show ?thesis
using learn_cons_def learn_lim_closed_subseteq by auto
next
case False
then obtain x where x: "x > 0" "¬ hyp_change 0 m n x" by auto
then have "¬ learn_cons φ {adverse 0 m n, adverse 1 m n} s"
using no_hyp_change_no_cons[OF x] by simp
with mn0 mn1 show ?thesis using learn_cons_closed_subseteq by auto
qed
qed

end

subsubsection ‹@{term "V⇩0⇩1"} is in CONS›

text ‹At first glance, consistently learning @{term "V⇩0⇩1"} looks fairly
easy. After all every @{term "f ∈ V⇩0⇩1"} provides a Gödel number of itself
either at argument 0 or 1. A strategy only has to figure out which one is
right. However, the strategy $S$ we are going to devise does not always
converge to $f(0)$ or $f(1)$. Instead it uses a technique called
amalgamation''. The amalgamation of two Gödel numbers $i$ and $j$ is a
function whose value at $x$ is determined by simulating $\varphi_i(x)$ and
$\varphi_j(x)$ in parallel and outputting the value of the first one to halt.
If neither halts the value is undefined. There is a function
$a\in\mathcal{R}^2$ such that $\varphi_{a(i,j)}$ is the amalgamation of $i$
and $j$.

If @{term "f ∈ V⇩0⇩1"} then $\varphi_{a(f(0), f(1))}$ is
total because by definition of @{term "V⇩0⇩1"} we have
$\varphi_{f(0)} = f$ or $\varphi_{f(1)} = f$ and $f$ is total.

Given a prefix $f^n$ of an @{term "f ∈ V⇩0⇩1"} the strategy
$S$ first computes $\varphi_{a(f(0), f(1))}(x)$ for $x = 0, \ldots, n$. For
the resulting prefix $\varphi^n_{a(f(0), f(1))}$ there are two cases:
\begin{enumerate}
\item[Case 1.] It differs from $f^n$, say at minimum index $x$. Then for
either $z = 0$ or $z = 1$ we have $\varphi_{f(z)}(x) \neq f(x)$ by
definition of amalgamation. This
implies $\varphi_{f(z)} \neq f$, and thus $\varphi_{f(1-z)} = f$ by
definition of @{term "V⇩0⇩1"}. We set $S(f^n) = f(1 - z)$. This
hypothesis is correct and hence consistent.
\item[Case 2.] It equals $f^n$. Then we set $S(f^n) = a(f(0), f(1))$. This
hypothesis is consistent by definition of this case.
\end{enumerate}

In both cases the hypothesis is consistent. If Case~1 holds for some $n$, the
same $x$ and $z$ will be found also for all larger values of $n$. Therefore
$S$ converges to the correct hypothesis $f(1 - z)$. If Case~2 holds for all
$n$, then $S$ always outputs the same hypothesis $a(f(0), f(1))$ and thus
also converges.

The above discussion tacitly assumes $n \geq 1$, such that both $f(0)$ and
$f(1)$ are available to $S$. For $n = 0$ the strategy outputs an arbitrary
consistent hypothesis.›

text ‹Amalgamation uses the concurrent simulation of functions.›

definition parallel :: "nat ⇒ nat ⇒ nat ⇒ nat option" where
"parallel i j x ≡ eval r_parallel [i, j, x]"

lemma r_parallel': "eval r_parallel [i, j, x] = parallel i j x"
using parallel_def by simp

lemma r_parallel'':
shows "eval r_phi [i, x] ↑ ∧ eval r_phi [j, x] ↑ ⟹ eval r_parallel [i, j, x] ↑"
and "eval r_phi [i, x] ↓ ∧ eval r_phi [j, x] ↑ ⟹
eval r_parallel [i, j, x] ↓= prod_encode (0, the (eval r_phi [i, x]))"
and "eval r_phi [j, x] ↓ ∧ eval r_phi [i, x] ↑ ⟹
eval r_parallel [i, j, x] ↓= prod_encode (1, the (eval r_phi [j, x]))"
and "eval r_phi [i, x] ↓ ∧ eval r_phi [j, x] ↓ ⟹
eval r_parallel [i, j, x] ↓= prod_encode (0, the (eval r_phi [i, x])) ∨
eval r_parallel [i, j, x] ↓= prod_encode (1, the (eval r_phi [j, x]))"
proof -
let ?f = "Cn 1 r_phi [r_const i, Id 1 0]"
let ?g = "Cn 1 r_phi [r_const j, Id 1 0]"
have *: "⋀x. eval r_phi [i, x] = eval ?f [x]" "⋀x. eval r_phi [j, x] = eval ?g [x]"
by simp_all
show "eval r_phi [i, x] ↑ ∧ eval r_phi [j, x] ↑ ⟹ eval r_parallel [i, j, x] ↑"
and "eval r_phi [i, x] ↓ ∧ eval r_phi [j, x] ↑ ⟹
eval r_parallel [i, j, x] ↓= prod_encode (0, the (eval r_phi [i, x]))"
and "eval r_phi [j, x] ↓ ∧ eval r_phi [i, x] ↑ ⟹
eval r_parallel [i, j, x] ↓= prod_encode (1, the (eval r_phi [j, x]))"
and "eval r_phi [i, x] ↓ ∧ eval r_phi [j, x] ↓ ⟹
eval r_parallel [i, j, x] ↓= prod_encode (0, the (eval r_phi [i, x])) ∨
eval r_parallel [i, j, x] ↓= prod_encode (1, the (eval r_phi [j, x]))"
using r_parallel[OF *] by simp_all
qed

lemma parallel:
"φ i x ↑ ∧ φ j x ↑ ⟹ parallel i j x ↑"
"φ i x ↓ ∧ φ j x ↑ ⟹ parallel i j x ↓= prod_encode (0, the (φ i x))"
"φ j x ↓ ∧ φ i x ↑ ⟹ parallel i j x ↓= prod_encode (1, the (φ j x))"
"φ i x ↓ ∧ φ j x ↓ ⟹
parallel i j x ↓= prod_encode (0, the (φ i x)) ∨
parallel i j x ↓= prod_encode (1, the (φ j x))"
using phi_def r_parallel'' r_parallel parallel_def by simp_all

lemma parallel_converg_pdec1_0_or_1:
assumes "parallel i j x ↓"
shows "pdec1 (the (parallel i j x)) = 0 ∨ pdec1 (the (parallel i j x)) = 1"
using assms parallel[of i x j] parallel(3)[of j x i]
by (metis fst_eqD option.sel prod_encode_inverse)

lemma parallel_converg_either: "(φ i x ↓ ∨ φ j x ↓) = (parallel i j x ↓)"
using parallel by (metis option.simps(3))

lemma parallel_0:
assumes "parallel i j x ↓= prod_encode (0, v)"
shows "φ i x ↓= v"
using parallel assms
by (smt option.collapse option.sel option.simps(3) prod.inject prod_encode_eq zero_neq_one)

lemma parallel_1:
assumes "parallel i j x ↓= prod_encode (1, v)"
shows "φ j x ↓= v"
using parallel assms
by (smt option.collapse option.sel option.simps(3) prod.inject prod_encode_eq zero_neq_one)

lemma parallel_converg_V01:
assumes "f ∈ V⇩0⇩1"
shows "parallel (the (f 0)) (the (f 1)) x ↓"
proof -
have "f ∈ ℛ ∧ (φ (the (f 0)) = f ∨ φ (the (f 1)) = f)"
using assms V01_def by auto
then have "φ (the (f 0)) ∈ ℛ ∨ φ (the (f 1)) ∈ ℛ"
by auto
then have "φ (the (f 0)) x ↓ ∨ φ (the (f 1)) x ↓"
using R1_imp_total1 by auto
then show ?thesis using parallel_converg_either by simp
qed

text ‹The amalgamation of two Gödel numbers can then be described
in terms of @{term "parallel"}.›

definition amalgamation :: "nat ⇒ nat ⇒ partial1" where
"amalgamation i j x ≡
if parallel i j x ↑ then None else Some (pdec2 (the (parallel i j x)))"

lemma amalgamation_diverg: "amalgamation i j x ↑ ⟷ φ i x ↑ ∧ φ j x ↑"
using amalgamation_def parallel by (metis option.simps(3))

lemma amalgamation_total:
assumes "total1 (φ i) ∨ total1 (φ j)"
shows "total1 (amalgamation i j)"
using assms amalgamation_diverg[of i j] total_def by auto

lemma amalgamation_V01_total:
assumes "f ∈ V⇩0⇩1"
shows "total1 (amalgamation (the (f 0)) (the (f 1)))"
using assms V01_def amalgamation_total R1_imp_total1 total1_def
by (metis (mono_tags, lifting) mem_Collect_eq)

definition "r_amalgamation ≡ Cn 3 r_pdec2 [r_parallel]"

lemma r_amalgamation_recfn: "recfn 3 r_amalgamation"
unfolding r_amalgamation_def by simp

lemma r_amalgamation: "eval r_amalgamation [i, j, x] = amalgamation i j x"
proof (cases "parallel i j x ↑")
case True
then have "eval r_parallel [i, j, x] ↑"
then have "eval r_amalgamation [i, j, x] ↑"
unfolding r_amalgamation_def by simp
moreover from True have "amalgamation i j x ↑"
using amalgamation_def by simp
ultimately show ?thesis by simp
next
case False
then have "eval r_parallel [i, j, x] ↓"
then have "eval r_amalgamation [i, j, x] = eval r_pdec2 [the (eval r_parallel [i, j, x])]"
unfolding r_amalgamation_def by simp
also have "... ↓= pdec2 (the (eval r_parallel [i, j, x]))"
by simp
finally show ?thesis by (simp add: False amalgamation_def r_parallel')
qed

text ‹The function @{term "amalgamate"} computes Gödel numbers of
amalgamations. It corresponds to the function $a$ from the proof sketch.›

definition amalgamate :: "nat ⇒ nat ⇒ nat" where
"amalgamate i j ≡ smn 1 (encode r_amalgamation) [i, j]"

lemma amalgamate: "φ (amalgamate i j) = amalgamation i j"
proof
fix x
have "φ (amalgamate i j) x = eval r_phi [amalgamate i j, x]"
also have "... = eval r_phi [smn 1 (encode r_amalgamation) [i, j], x]"
using amalgamate_def by simp
also have "... = eval r_phi
[encode (Cn 1 (r_universal 3)
(r_constn 0 (encode r_amalgamation) # map (r_constn 0) [i, j] @ map (Id 1) [0])), x]"
using smn[of 1 "encode r_amalgamation" "[i, j]"] by (simp add: numeral_3_eq_3)
also have "... = eval r_phi
[encode (Cn 1 (r_universal 3)
(r_const (encode r_amalgamation) # [r_const i, r_const j, Id 1 0])), x]"
(is "... = eval r_phi [encode ?f, x]")
finally have "φ (amalgamate i j) x = eval r_phi
[encode (Cn 1 (r_universal 3)
(r_const (encode r_amalgamation) # [r_const i, r_const j, Id 1 0])), x]" .
then have "φ (amalgamate i j) x = eval (r_universal 3) [encode r_amalgamation, i, j, x]"
unfolding r_phi_def using r_universal[of ?f 1] r_amalgamation_recfn by simp
then show "φ (amalgamate i j) x = amalgamation i j x"
using r_amalgamation by (simp add: r_amalgamation_recfn r_universal)
qed

lemma amalgamation_in_P1: "amalgamation i j ∈ 𝒫"
using amalgamate by (metis P2_proj_P1 phi_in_P2)

lemma amalgamation_V01_R1:
assumes "f ∈ V⇩0⇩1"
shows "amalgamation (the (f 0)) (the (f 1)) ∈ ℛ"
using assms amalgamation_V01_total amalgamation_in_P1

definition "r_amalgamate ≡
Cn 2 (r_smn 1 2) [r_dummy 1 (r_const (encode r_amalgamation)), Id 2 0, Id 2 1]"

lemma r_amalgamate_recfn: "recfn 2 r_amalgamate"
unfolding r_amalgamate_def by simp

lemma r_amalgamate: "eval r_amalgamate [i, j] ↓= amalgamate i j"
proof -
let ?p = "encode r_amalgamation"
have rs21: "eval (r_smn 1 2) [?p, i, j] ↓= smn 1 ?p [i, j]"
using r_smn by simp
moreover have "eval r_amalgamate [i, j] = eval (r_smn 1 2) [?p, i, j]"
unfolding r_amalgamate_def by auto
ultimately have "eval r_amalgamate [i, j] ↓= smn 1 ?p [i, j]"
by simp
then show ?thesis using amalgamate_def by simp
qed

text ‹The strategy $S$ distinguishes the two cases from the proof
sketch with the help of the next function, which checks if a hypothesis
$\varphi_i$ is inconsistent with a prefix $e$. If so, it returns the least $x < |e|$ witnessing the inconsistency; otherwise it returns the length $|e|$.
If $\varphi_i$ diverges for some $x < |e|$, so does the function.›

definition inconsist :: partial2 where
"inconsist i e ≡
(if ∃x<e_length e. φ i x ↑ then None
else if ∃x<e_length e. φ i x ↓≠ e_nth e x
then Some (LEAST x. x < e_length e ∧ φ i x ↓≠ e_nth e x)
else Some (e_length e))"

lemma inconsist_converg:
assumes "inconsist i e ↓"
shows "inconsist i e =
(if ∃x<e_length e. φ i x ↓≠ e_nth e x
then Some (LEAST x. x < e_length e ∧ φ i x ↓≠ e_nth e x)
else Some (e_length e))"
and "∀x<e_length e. φ i x ↓"
using inconsist_def assms by (presburger, meson)

lemma inconsist_bounded:
assumes "inconsist i e ↓"
shows "the (inconsist i e) ≤ e_length e"
proof (cases "∃x<e_length e. φ i x ↓≠ e_nth e x")
case True
then show ?thesis
using inconsist_converg[OF assms]
by (smt Least_le dual_order.strict_implies_order dual_order.strict_trans2 option.sel)
next
case False
then show ?thesis using inconsist_converg[OF assms] by auto
qed

lemma inconsist_consistent:
assumes "inconsist i e ↓"
shows "inconsist i e ↓= e_length e ⟷ (∀x<e_length e. φ i x ↓= e_nth e x)"
proof
show "∀x<e_length e. φ i x ↓= e_nth e x" if "inconsist i e ↓= e_length e"
proof (cases "∃x<e_length e. φ i x ↓≠ e_nth e x")
case True
then show ?thesis
using that inconsist_converg[OF assms]
by (metis (mono_tags, lifting) not_less_Least option.inject)
next
case False
then show ?thesis
using that inconsist_converg[OF assms] by simp
qed
show "∀x<e_length e. φ i x ↓= e_nth e x ⟹ inconsist i e ↓= e_length e"
unfolding inconsist_def using assms by auto
qed

lemma inconsist_converg_eq:
assumes "inconsist i e ↓= e_length e"
shows "∀x<e_length e. φ i x ↓= e_nth e x"
using assms inconsist_consistent by auto

lemma inconsist_converg_less:
assumes "inconsist i e ↓" and "the (inconsist i e) < e_length e"
shows "∃x<e_length e. φ i x ↓≠ e_nth e x"
and "inconsist i e ↓= (LEAST x. x < e_length e ∧ φ i x ↓≠ e_nth e x)"
proof -
show "∃x<e_length e. φ i x ↓≠ e_nth e x"
using assms by (metis (no_types, lifting) inconsist_converg(1) nat_neq_iff option.sel)
then show "inconsist i e ↓= (LEAST x. x < e_length e ∧ φ i x ↓≠ e_nth e x)"
using assms inconsist_converg by presburger
qed

lemma least_bounded_Suc:
assumes "∃x. x < upper ∧ P x"
shows "(LEAST x. x < upper ∧ P x) = (LEAST x. x <</