section ‹Lemma R\label{s:lemma_r}› theory Lemma_R imports Inductive_Inference_Basics begin text ‹A common technique for constructing a class that cannot be learned is diagonalization against all strategies (see, for instance, Section~\ref{s:lim_bc}). Similarly, the typical way of proving that a class cannot be learned is by assuming there is a strategy and deriving a contradiction. Both techniques are easier to carry out if one has to consider only \emph{total} recursive strategies. This is not possible in general, since after all the definitions of the inference types admit strictly partial strategies. However, for many inference types one can show that for every strategy there is a total strategy with at least the same ``learning power''. Results to that effect are called Lemma~R. Lemma~R comes in different strengths depending on how general the construction of the total recursive strategy is. CONS is the only inference type considered here for which not even a weak form of Lemma~R holds.› subsection ‹Strong Lemma R for LIM, FIN, and BC› text ‹In its strong form Lemma~R says that for any strategy $S$, there is a total strategy $T$ that learns all classes $S$ learns regardless of hypothesis space. The strategy $T$ can be derived from $S$ by a delayed simulation of $S$. More precisely, for input $f^n$, $T$ simulates $S$ for prefixes $f^0, f^1, \ldots, f^n$ for at most $n$ steps. If $S$ halts on none of the prefixes, $T$ outputs an arbitrary hypothesis. Otherwise let $k \leq n$ be maximal such that $S$ halts on $f^k$ in at most $n$ steps. Then $T$ outputs $S(f^k)$. › text ‹We reformulate some lemmas for @{term r_result1} to make it easier to use them with @{term "φ"}.› lemma r_result1_converg_phi: assumes "φ i x ↓= v" shows "∃t. (∀t'≥t. eval r_result1 [t', i, x] ↓= Suc v) ∧ (∀t'<t. eval r_result1 [t', i, x] ↓= 0)" using assms r_result1_converg' phi_def by simp_all lemma r_result1_bivalent': assumes "eval r_phi [i, x] ↓= v" shows "eval r_result1 [t, i, x] ↓= Suc v ∨ eval r_result1 [t, i, x] ↓= 0" using assms r_result1 r_result_bivalent' r_phi'' by simp lemma r_result1_bivalent_phi: assumes "φ i x ↓= v" shows "eval r_result1 [t, i, x] ↓= Suc v ∨ eval r_result1 [t, i, x] ↓= 0" using assms r_result1_bivalent' phi_def by simp_all lemma r_result1_diverg_phi: assumes "φ i x ↑" shows "eval r_result1 [t, i, x] ↓= 0" using assms phi_def r_result1_diverg' by simp lemma r_result1_some_phi: assumes "eval r_result1 [t, i, x] ↓= Suc v" shows "φ i x ↓= v" using assms phi_def r_result1_Some' by simp lemma r_result1_saturating': assumes "eval r_result1 [t, i, x] ↓= Suc v" shows "eval r_result1 [t + d, i, x] ↓= Suc v" using assms r_result1 r_result_saturating r_phi'' by simp lemma r_result1_saturating_the: assumes "the (eval r_result1 [t, i, x]) > 0" and "t' ≥ t" shows "the (eval r_result1 [t', i, x]) > 0" proof - from assms(1) obtain v where "eval r_result1 [t, i, x] ↓= Suc v" using r_result1_bivalent_phi r_result1_diverg_phi by (metis inc_induct le_0_eq not_less_zero option.discI option.expand option.sel) with assms have "eval r_result1 [t', i, x] ↓= Suc v" using r_result1_saturating' le_Suc_ex by blast then show ?thesis by simp qed lemma Greatest_bounded_Suc: fixes P :: "nat ⇒ nat" shows "(if P n > 0 then Suc n else if ∃j<n. P j > 0 then Suc (GREATEST j. j < n ∧ P j > 0) else 0) = (if ∃j<Suc n. P j > 0 then Suc (GREATEST j. j < Suc n ∧ P j > 0) else 0)" (is "?lhs = ?rhs") proof (cases "∃j<Suc n. P j > 0") case 1: True show ?thesis proof (cases "P n > 0") case True then have "(GREATEST j. j < Suc n ∧ P j > 0) = n" using Greatest_equality[of "λj. j < Suc n ∧ P j > 0"] by simp moreover have "?rhs = Suc (GREATEST j. j < Suc n ∧ P j > 0)" using 1 by simp ultimately have "?rhs = Suc n" by simp then show ?thesis using True by simp next case False then have "?lhs = Suc (GREATEST j. j < n ∧ P j > 0)" using 1 by (metis less_SucE) moreover have "?rhs = Suc (GREATEST j. j < Suc n ∧ P j > 0)" using 1 by simp moreover have "(GREATEST j. j < n ∧ P j > 0) = (GREATEST j. j < Suc n ∧ P j > 0)" using 1 False by (metis less_SucI less_Suc_eq) ultimately show ?thesis by simp qed next case False then show ?thesis by auto qed text ‹For $n$, $i$, $x$, the next function simulates $\varphi_i$ on all non-empty prefixes of at most length $n$ of the list $x$ for at most $n$ steps. It returns the length of the longest such prefix for which $\varphi_i$ halts, or zero if $\varphi_i$ does not halt for any prefix.› definition "r_delay_aux ≡ Pr 2 (r_constn 1 0) (Cn 4 r_ifz [Cn 4 r_result1 [Cn 4 r_length [Id 4 3], Id 4 2, Cn 4 r_take [Cn 4 S [Id 4 0], Id 4 3]], Id 4 1, Cn 4 S [Id 4 0]])" lemma r_delay_aux_prim: "prim_recfn 3 r_delay_aux" unfolding r_delay_aux_def by simp_all lemma r_delay_aux_total: "total r_delay_aux" using prim_recfn_total[OF r_delay_aux_prim] . lemma r_delay_aux: assumes "n ≤ e_length x" shows "eval r_delay_aux [n, i, x] ↓= (if ∃j<n. the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0 then Suc (GREATEST j. j < n ∧ the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0) else 0)" proof - define z where "z ≡ Cn 4 r_result1 [Cn 4 r_length [Id 4 3], Id 4 2, Cn 4 r_take [Cn 4 S [Id 4 0], Id 4 3]]" then have z_recfn: "recfn 4 z" by simp have z: "eval z [j, r, i, x] = eval r_result1 [e_length x, i, e_take (Suc j) x]" if "j < e_length x" for j r i x unfolding z_def using that by simp define g where "g ≡ Cn 4 r_ifz [z, Id 4 1, Cn 4 S [Id 4 0]]" then have g: "eval g [j, r, i, x] ↓= (if the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0 then Suc j else r)" if "j < e_length x" for j r i x using that z prim_recfn_total z_recfn by simp show ?thesis using assms proof (induction n) case 0 moreover have "eval r_delay_aux [0, i, x] ↓= 0" using eval_Pr_0 r_delay_aux_def r_delay_aux_prim r_constn by (simp add: r_delay_aux_def) ultimately show ?case by simp next case (Suc n) let ?P = "λj. the (eval r_result1 [e_length x, i, e_take (Suc j) x])" have "eval r_delay_aux [n, i, x] ↓" using Suc by simp moreover have "eval r_delay_aux [Suc n, i, x] = eval (Pr 2 (r_constn 1 0) g) [Suc n, i, x]" unfolding r_delay_aux_def g_def z_def by simp ultimately have "eval r_delay_aux [Suc n, i, x] = eval g [n, the (eval r_delay_aux [n, i, x]), i, x]" using r_delay_aux_prim Suc eval_Pr_converg_Suc by (simp add: r_delay_aux_def g_def z_def numeral_3_eq_3) then have "eval r_delay_aux [Suc n, i, x] ↓= (if ?P n > 0 then Suc n else if ∃j<n. ?P j > 0 then Suc (GREATEST j. j < n ∧ ?P j > 0) else 0)" using g Suc by simp then have "eval r_delay_aux [Suc n, i, x] ↓= (if ∃j<Suc n. ?P j > 0 then Suc (GREATEST j. j < Suc n ∧ ?P j > 0) else 0)" using Greatest_bounded_Suc[where ?P="?P"] by simp then show ?case by simp qed qed text ‹The next function simulates $\varphi_i$ on all non-empty prefixes of a list $x$ of length $n$ for at most $n$ steps and outputs the length of the longest prefix for which $\varphi_i$ halts, or zero if $\varphi_i$ does not halt for any such prefix.› definition "r_delay ≡ Cn 2 r_delay_aux [Cn 2 r_length [Id 2 1], Id 2 0, Id 2 1]" lemma r_delay_recfn [simp]: "recfn 2 r_delay" unfolding r_delay_def by (simp add: r_delay_aux_prim) lemma r_delay: "eval r_delay [i, x] ↓= (if ∃j<e_length x. the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0 then Suc (GREATEST j. j < e_length x ∧ the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0) else 0)" unfolding r_delay_def using r_delay_aux r_delay_aux_prim by simp definition "delay i x ≡ Some (if ∃j<e_length x. the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0 then Suc (GREATEST j. j < e_length x ∧ the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0) else 0)" lemma delay_in_R2: "delay ∈ ℛ⇧^{2}" using r_delay totalI2 R2I delay_def r_delay_recfn by (metis (no_types, lifting) numeral_2_eq_2 option.simps(3)) lemma delay_le_length: "the (delay i x) ≤ e_length x" proof (cases "∃j<e_length x. the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0") case True let ?P = "λj. j < e_length x ∧ the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0" from True have "∃j. ?P j" by simp moreover have "⋀y. ?P y ⟹ y ≤ e_length x" by simp ultimately have "?P (Greatest ?P)" using GreatestI_ex_nat[where ?P="?P"] by blast then have "Greatest ?P < e_length x" by simp moreover have "delay i x ↓= Suc (Greatest ?P)" using delay_def True by simp ultimately show ?thesis by auto next case False then show ?thesis using delay_def by auto qed lemma e_take_delay_init: assumes "f ∈ ℛ" and "the (delay i (f ▹ n)) > 0" shows "e_take (the (delay i (f ▹ n))) (f ▹ n) = f ▹ (the (delay i (f ▹ n)) - 1)" using assms e_take_init[of f _ n] length_init[of f n] delay_le_length[of i "f ▹ n"] by (metis One_nat_def Suc_le_lessD Suc_pred) lemma delay_gr0_converg: assumes "the (delay i x) > 0" shows "φ i (e_take (the (delay i x)) x) ↓" proof - let ?P = "λj. j < e_length x ∧ the (eval r_result1 [e_length x, i, e_take (Suc j) x]) > 0" have "∃j. ?P j" proof (rule ccontr) assume "¬ (∃j. ?P j)" then have "delay i x ↓= 0" using delay_def by simp with assms show False by simp qed then have d: "the (delay i x) = Suc (Greatest ?P)" using delay_def by simp moreover have "⋀y. ?P y ⟹ y ≤ e_length x" by simp ultimately have "?P (Greatest ?P)" using ‹∃j. ?P j› GreatestI_ex_nat[where ?P="?P"] by blast then have "the (eval r_result1 [e_length x, i, e_take (Suc (Greatest ?P)) x]) > 0" by simp then have "the (eval r_result1 [e_length x, i, e_take (the (delay i x)) x]) > 0" using d by simp then show ?thesis using r_result1_diverg_phi by fastforce qed lemma delay_unbounded: fixes n :: nat assumes "f ∈ ℛ" and "∀n. φ i (f ▹ n) ↓" shows "∃m. the (delay i (f ▹ m)) > n" proof - from assms have "∃t. the (eval r_result1 [t, i, f ▹ n]) > 0" using r_result1_converg_phi by (metis le_refl option.exhaust_sel option.sel zero_less_Suc) then obtain t where t: "the (eval r_result1 [t, i, f ▹ n]) > 0" by auto let ?m = "max n t" have "Suc ?m ≥ t" by simp have m: "the (eval r_result1 [Suc ?m, i, f ▹ n]) > 0" proof - let ?w = "eval r_result1 [t, i, f ▹ n]" obtain v where v: "?w ↓= Suc v" using t assms(2) r_result1_bivalent_phi by fastforce have "eval r_result1 [Suc ?m, i, f ▹ n] = ?w" using v t r_result1_saturating' ‹Suc ?m ≥ t› le_Suc_ex by fastforce then show ?thesis using t by simp qed let ?x = "f ▹ ?m" have "the (delay i ?x) > n" proof - let ?P = "λj. j < e_length ?x ∧ the (eval r_result1 [e_length ?x, i, e_take (Suc j) ?x]) > 0" have "e_length ?x = Suc ?m" by simp moreover have "e_take (Suc n) ?x = f ▹ n" using assms(1) e_take_init by auto ultimately have "?P n" using m by simp have "⋀y. ?P y ⟹ y ≤ e_length ?x" by simp with ‹?P n› have "n ≤ (Greatest ?P)" using Greatest_le_nat[of ?P n "e_length ?x"] by simp moreover have "the (delay i ?x) = Suc (Greatest ?P)" using delay_def ‹?P n› by auto ultimately show ?thesis by simp qed then show ?thesis by auto qed lemma delay_monotone: assumes "f ∈ ℛ" and "n⇩_{1}≤ n⇩_{2}" shows "the (delay i (f ▹ n⇩_{1})) ≤ the (delay i (f ▹ n⇩_{2}))" (is "the (delay i ?x1) ≤ the (delay i ?x2)") proof (cases "the (delay i (f ▹ n⇩_{1})) = 0") case True then show ?thesis by simp next case False let ?P1 = "λj. j < e_length ?x1 ∧ the (eval r_result1 [e_length ?x1, i, e_take (Suc j) ?x1]) > 0" let ?P2 = "λj. j < e_length ?x2 ∧ the (eval r_result1 [e_length ?x2, i, e_take (Suc j) ?x2]) > 0" from False have d1: "the (delay i ?x1) = Suc (Greatest ?P1)" "∃j. ?P1 j" using delay_def option.collapse by fastforce+ moreover have "⋀y. ?P1 y ⟹ y ≤ e_length ?x1" by simp ultimately have *: "?P1 (Greatest ?P1)" using GreatestI_ex_nat[of ?P1] by blast let ?j = "Greatest ?P1" from * have "?j < e_length ?x1" by auto then have 1: "e_take (Suc ?j) ?x1 = e_take (Suc ?j) ?x2" using assms e_take_init by auto from * have 2: "?j < e_length ?x2" using assms(2) by auto with 1 * have "the (eval r_result1 [e_length ?x1, i, e_take (Suc ?j) ?x2]) > 0" by simp moreover have "e_length ?x1 ≤ e_length ?x2" using assms(2) by auto ultimately have "the (eval r_result1 [e_length ?x2, i, e_take (Suc ?j) ?x2]) > 0" using r_result1_saturating_the by simp with 2 have "?P2 ?j" by simp then have d2: "the (delay i ?x2) = Suc (Greatest ?P2)" using delay_def by auto have "⋀y. ?P2 y ⟹ y ≤ e_length ?x2" by simp with ‹?P2 ?j› have "?j ≤ (Greatest ?P2)" using Greatest_le_nat[of ?P2] by blast with d1 d2 show ?thesis by simp qed lemma delay_unbounded_monotone: fixes n :: nat assumes "f ∈ ℛ" and "∀n. φ i (f ▹ n) ↓" shows "∃m⇩_{0}. ∀m≥m⇩_{0}. the (delay i (f ▹ m)) > n" proof - from assms delay_unbounded obtain m⇩_{0}where "the (delay i (f ▹ m⇩_{0})) > n" by blast then have "∀m≥m⇩_{0}. the (delay i (f ▹ m)) > n" using assms(1) delay_monotone order.strict_trans2 by blast then show ?thesis by auto qed text ‹Now we can define a function that simulates an arbitrary strategy $\varphi_i$ in a delayed way. The parameter $d$ is the default hypothesis for when $\varphi_i$ does not halt within the time bound for any prefix.› definition r_totalizer :: "nat ⇒ recf" where "r_totalizer d ≡ Cn 2 (r_lifz (r_constn 1 d) (Cn 2 r_phi [Id 2 0, Cn 2 r_take [Cn 2 r_delay [Id 2 0, Id 2 1], Id 2 1]])) [Cn 2 r_delay [Id 2 0, Id 2 1], Id 2 0, Id 2 1]" lemma r_totalizer_recfn: "recfn 2 (r_totalizer d)" unfolding r_totalizer_def by simp lemma r_totalizer: "eval (r_totalizer d) [i, x] = (if the (delay i x) = 0 then Some d else φ i (e_take (the (delay i x)) x))" proof - let ?i = "Cn 2 r_delay [Id 2 0, Id 2 1]" have "eval ?i [i, x] = eval r_delay [i, x]" for i x using r_delay_recfn by simp then have i: "eval ?i [i, x] = delay i x" for i x using r_delay by (simp add: delay_def) let ?t = "r_constn 1 d" have t: "eval ?t [i, x] ↓= d" for i x by simp let ?e1 = "Cn 2 r_take [?i, Id 2 1]" let ?e = "Cn 2 r_phi [Id 2 0, ?e1]" have "eval ?e1 [i, x] = eval r_take [the (delay i x), x]" for i x using r_delay i delay_def by simp then have "eval ?e1 [i, x] ↓= e_take (the (delay i x)) x" for i x using delay_le_length by simp then have e: "eval ?e [i, x] = φ i (e_take (the (delay i x)) x)" using phi_def by simp let ?z = "r_lifz ?t ?e" have recfn_te: "recfn 2 ?t" "recfn 2 ?e" by simp_all then have "eval (r_totalizer d) [i, x] = eval (r_lifz ?t ?e) [the (delay i x), i, x]" for i x unfolding r_totalizer_def using i r_totalizer_recfn delay_def by simp then have "eval (r_totalizer d) [i, x] = (if the (delay i x) = 0 then eval ?t [i, x] else eval ?e [i, x])" for i x using recfn_te by simp then show ?thesis using t e by simp qed lemma r_totalizer_total: "total (r_totalizer d)" proof (rule totalI2) show "recfn 2 (r_totalizer d)" using r_totalizer_recfn by simp show "⋀x y. eval (r_totalizer d) [x, y] ↓" using r_totalizer delay_gr0_converg by simp qed definition totalizer :: "nat ⇒ partial2" where "totalizer d i x ≡ if the (delay i x) = 0 then Some d else φ i (e_take (the (delay i x)) x)" lemma totalizer_init: assumes "f ∈ ℛ" shows "totalizer d i (f ▹ n) = (if the (delay i (f ▹ n)) = 0 then Some d else φ i (f ▹ (the (delay i (f ▹ n)) - 1)))" using assms e_take_delay_init by (simp add: totalizer_def) lemma totalizer_in_R2: "totalizer d ∈ ℛ⇧^{2}" using totalizer_def r_totalizer r_totalizer_total R2I r_totalizer_recfn by metis text ‹For LIM, @{term totalizer} works with every default hypothesis $d$.› lemma lemma_R_for_Lim: assumes "learn_lim ψ U (φ i)" shows "learn_lim ψ U (totalizer d i)" proof (rule learn_limI) show env: "environment ψ U (totalizer d i)" using assms learn_limE(1) totalizer_in_R2 by auto show "∃j. ψ j = f ∧ (∀⇧^{∞}n. totalizer d i (f ▹ n) ↓= j)" if "f ∈ U" for f proof - have "f ∈ ℛ" using assms env that by auto from assms learn_limE obtain j n⇩_{0}where j: "ψ j = f" and n0: "∀n≥n⇩_{0}. (φ i) (f ▹ n) ↓= j" using ‹f ∈ U› by metis obtain m⇩_{0}where m0: "∀m≥m⇩_{0}. the (delay i (f ▹ m)) > n⇩_{0}" using delay_unbounded_monotone ‹f ∈ ℛ› ‹f ∈ U› assms learn_limE(1) by blast then have "∀m≥m⇩_{0}. totalizer d i (f ▹ m) = φ i (e_take (the (delay i (f ▹ m))) (f ▹ m))" using totalizer_def by auto then have "∀m≥m⇩_{0}. totalizer d i (f ▹ m) = φ i (f ▹ (the (delay i (f ▹ m)) - 1))" using e_take_delay_init m0 ‹f ∈ ℛ› by auto with m0 n0 have "∀m≥m⇩_{0}. totalizer d i (f ▹ m) ↓= j" by auto with j show ?thesis by auto qed qed text ‹The effective version of Lemma~R for LIM states that there is a total recursive function computing Gödel numbers of total strategies from those of arbitrary strategies.› lemma lemma_R_for_Lim_effective: "∃g∈ℛ. ∀i. φ (the (g i)) ∈ ℛ ∧ (∀U ψ. learn_lim ψ U (φ i) ⟶ learn_lim ψ U (φ (the (g i))))" proof - have "totalizer 0 ∈ 𝒫⇧^{2}" using totalizer_in_R2 by auto then obtain g where g: "g ∈ ℛ" "∀i. (totalizer 0) i = φ (the (g i))" using numbering_translation_for_phi by blast with totalizer_in_R2 have "∀i. φ (the (g i)) ∈ ℛ" by (metis R2_proj_R1) moreover from g(2) lemma_R_for_Lim[where ?d=0] have "∀i U ψ. learn_lim ψ U (φ i) ⟶ learn_lim ψ U (φ (the (g i)))" by simp ultimately show ?thesis using g(1) by blast qed text ‹In order for us to use the previous lemma, we need a function that performs the actual computation:› definition "r_limr ≡ SOME g. recfn 1 g ∧ total g ∧ (∀i. φ (the (eval g [i])) ∈ ℛ ∧ (∀U ψ. learn_lim ψ U (φ i) ⟶ learn_lim ψ U (φ (the (eval g [i])))))" lemma r_limr_recfn: "recfn 1 r_limr" and r_limr_total: "total r_limr" and r_limr: "φ (the (eval r_limr [i])) ∈ ℛ" "learn_lim ψ U (φ i) ⟹ learn_lim ψ U (φ (the (eval r_limr [i])))" proof - let ?P = "λg. g ∈ ℛ ∧ (∀i. φ (the (g i)) ∈ ℛ ∧ (∀U ψ. learn_lim ψ U (φ i) ⟶ learn_lim ψ U (φ (the (g i)))))" let ?Q = "λg. recfn 1 g ∧ total g ∧ (∀i. φ (the (eval g [i])) ∈ ℛ ∧ (∀U ψ. learn_lim ψ U (φ i) ⟶ learn_lim ψ U (φ (the (eval g [i])))))" have "∃g. ?P g" using lemma_R_for_Lim_effective by auto then obtain g where "?P g" by auto then obtain g' where g': "recfn 1 g'" "total g'" "∀i. eval g' [i] = g i" by blast with ‹?P g› have "?Q g'" by simp with r_limr_def someI_ex[of ?Q] show "recfn 1 r_limr" "total r_limr" "φ (the (eval r_limr [i])) ∈ ℛ" "learn_lim ψ U (φ i) ⟹ learn_lim ψ U (φ (the (eval r_limr [i])))" by auto qed text ‹For BC, too, @{term totalizer} works with every default hypothesis $d$.› lemma lemma_R_for_BC: assumes "learn_bc ψ U (φ i)" shows "learn_bc ψ U (totalizer d i)" proof (rule learn_bcI) show env: "environment ψ U (totalizer d i)" using assms learn_bcE(1) totalizer_in_R2 by auto show "∃n⇩_{0}. ∀n≥n⇩_{0}. ψ (the (totalizer d i (f ▹ n))) = f" if "f ∈ U" for f proof - have "f ∈ ℛ" using assms env that by auto obtain n⇩_{0}where n0: "∀n≥n⇩_{0}. ψ (the ((φ i) (f ▹ n))) = f" using assms learn_bcE ‹f ∈ U› by metis obtain m⇩_{0}where m0: "∀m≥m⇩_{0}. the (delay i (f ▹ m)) > n⇩_{0}" using delay_unbounded_monotone ‹f ∈ ℛ› ‹f ∈ U› assms learn_bcE(1) by blast then have "∀m≥m⇩_{0}. totalizer d i (f ▹ m) = φ i (e_take (the (delay i (f ▹ m))) (f ▹ m))" using totalizer_def by auto then have "∀m≥m⇩_{0}. totalizer d i (f ▹ m) = φ i (f ▹ (the (delay i (f ▹ m)) - 1))" using e_take_delay_init m0 ‹f ∈ ℛ› by auto with m0 n0 have "∀m≥m⇩_{0}. ψ (the (totalizer d i (f ▹ m))) = f" by auto then show ?thesis by auto qed qed corollary lemma_R_for_BC_simple: assumes "learn_bc ψ U s" shows "∃s'∈ℛ. learn_bc ψ U s'" using assms lemma_R_for_BC totalizer_in_R2 learn_bcE by (metis R2_proj_R1 learn_bcE(1) phi_universal) text ‹For FIN the default hypothesis of @{term totalizer} must be zero, signalling ``don't know yet''.› lemma lemma_R_for_FIN: assumes "learn_fin ψ U (φ i)" shows "learn_fin ψ U (totalizer 0 i)" proof (rule learn_finI) show env: "environment ψ U (totalizer 0 i)" using assms learn_finE(1) totalizer_in_R2 by auto show "∃j n⇩_{0}. ψ j = f ∧ (∀n<n⇩_{0}. totalizer 0 i (f ▹ n) ↓= 0) ∧ (∀n≥n⇩_{0}. totalizer 0 i (f ▹ n) ↓= Suc j)" if "f ∈ U" for f proof - have "f ∈ ℛ" using assms env that by auto from assms learn_finE[of ψ U "φ i"] obtain j where j: "ψ j = f" and ex_n0: "∃n⇩_{0}. (∀n<n⇩_{0}. (φ i) (f ▹ n) ↓= 0) ∧ (∀n≥n⇩_{0}. (φ i) (f ▹ n) ↓= Suc j)" using ‹f ∈ U› by blast let ?Q = "λn⇩_{0}. (∀n<n⇩_{0}. (φ i) (f ▹ n) ↓= 0) ∧ (∀n≥n⇩_{0}. (φ i) (f ▹ n) ↓= Suc j)" define n⇩_{0}where "n⇩_{0}= Least ?Q" with ex_n0 have n0: "?Q n⇩_{0}" "∀n<n⇩_{0}. ¬ ?Q n" using LeastI_ex[of ?Q] not_less_Least[of _ ?Q] by blast+ define m⇩_{0}where "m⇩_{0}= (LEAST m⇩_{0}. ∀m≥m⇩_{0}. the (delay i (f ▹ m)) > n⇩_{0})" (is "m⇩_{0}= Least ?P") moreover have "∃m⇩_{0}. ∀m≥m⇩_{0}. the (delay i (f ▹ m)) > n⇩_{0}" using delay_unbounded_monotone ‹f∈ℛ› ‹f ∈ U› assms learn_finE(1) by simp ultimately have m0: "?P m⇩_{0}" "∀m<m⇩_{0}. ¬ ?P m" using LeastI_ex[of ?P] not_less_Least[of _ ?P] by blast+ then have "∀m≥m⇩_{0}. totalizer 0 i (f ▹ m) = φ i (e_take (the (delay i (f ▹ m))) (f ▹ m))" using totalizer_def by auto then have "∀m≥m⇩_{0}. totalizer 0 i (f ▹ m) = φ i (f ▹ (the (delay i (f ▹ m)) - 1))" using e_take_delay_init m0 ‹f∈ℛ› by auto with m0 n0 have "∀m≥m⇩_{0}. totalizer 0 i (f ▹ m) ↓= Suc j" by auto moreover have "totalizer 0 i (f ▹ m) ↓= 0" if "m < m⇩_{0}" for m proof (cases "the (delay i (f ▹ m)) = 0") case True then show ?thesis by (simp add: totalizer_def) next case False then have "the (delay i (f ▹ m)) ≤ n⇩_{0}" using m0 that ‹f ∈ ℛ› delay_monotone by (meson leI order.strict_trans2) then show ?thesis using ‹f ∈ ℛ› n0(1) totalizer_init by (simp add: Suc_le_lessD) qed ultimately show ?thesis using j by auto qed qed subsection ‹Weaker Lemma R for CP and TOTAL› text ‹For TOTAL the default hypothesis used by @{term totalizer} depends on the hypothesis space, because it must refer to a total function in that space. Consequently the total strategy depends on the hypothesis space, which makes this form of Lemma~R weaker than the ones in the previous section.› lemma lemma_R_for_TOTAL: fixes ψ :: partial2 shows "∃d. ∀U. ∀i. learn_total ψ U (φ i) ⟶ learn_total ψ U (totalizer d i)" proof (cases "∃d. ψ d ∈ ℛ") case True then obtain d where "ψ d ∈ ℛ" by auto have "learn_total ψ U (totalizer d i)" if "learn_total ψ U (φ i)" for U i proof (rule learn_totalI) show env: "environment ψ U (totalizer d i)" using that learn_totalE(1) totalizer_in_R2 by auto show "⋀f. f ∈ U ⟹ ∃j. ψ j = f ∧ (∀⇧^{∞}n. totalizer d i (f ▹ n) ↓= j)" using that learn_total_def lemma_R_for_Lim[where ?d=d] learn_limE(2) by metis show "ψ (the (totalizer d i (f ▹ n))) ∈ ℛ" if "f ∈ U" for f n proof (cases "the (delay i (f ▹ n)) = 0") case True then show ?thesis using totalizer_def ‹ψ d ∈ ℛ› by simp next case False have "f ∈ ℛ" using that env by auto then show ?thesis using False that ‹learn_total ψ U (φ i)› totalizer_init learn_totalE(3) by simp qed qed then show ?thesis by auto next case False then show ?thesis using learn_total_def lemma_R_for_Lim by auto qed corollary lemma_R_for_TOTAL_simple: assumes "learn_total ψ U s" shows "∃s'∈ℛ. learn_total ψ U s'" using assms lemma_R_for_TOTAL totalizer_in_R2 by (metis R2_proj_R1 learn_totalE(1) phi_universal) text ‹For CP the default hypothesis used by @{term totalizer} depends on both the hypothesis space and the class. Therefore the total strategy depends on both the the hypothesis space and the class, which makes Lemma~R for CP even weaker than the one for TOTAL.› lemma lemma_R_for_CP: fixes ψ :: partial2 and U :: "partial1 set" assumes "learn_cp ψ U (φ i)" shows "∃d. learn_cp ψ U (totalizer d i)" proof (cases "U = {}") case True then show ?thesis using assms learn_cp_def lemma_R_for_Lim by auto next case False then obtain f where "f ∈ U" by auto from ‹f ∈ U› obtain d where "ψ d = f" using learn_cpE(2)[OF assms] by auto with ‹f ∈ U› have "ψ d ∈ U" by simp have "learn_cp ψ U (totalizer d i)" proof (rule learn_cpI) show env: "environment ψ U (totalizer d i)" using assms learn_cpE(1) totalizer_in_R2 by auto show "⋀f. f ∈ U ⟹ ∃j. ψ j = f ∧ (∀⇧^{∞}n. totalizer d i (f ▹ n) ↓= j)" using assms learn_cp_def lemma_R_for_Lim[where ?d=d] learn_limE(2) by metis show "ψ (the (totalizer d i (f ▹ n))) ∈ U" if "f ∈ U" for f n proof (cases "the (delay i (f ▹ n)) = 0") case True then show ?thesis using totalizer_def ‹ψ d ∈ U› by simp next case False then show ?thesis using that env assms totalizer_init learn_cpE(3) by auto qed qed then show ?thesis by auto qed subsection ‹No Lemma R for CONS› text ‹This section demonstrates that the class $V_{01}$ of all total recursive functions $f$ where $f(0)$ or $f(1)$ is a Gödel number of $f$ can be consistently learned in the limit, but not by a total strategy. This implies that Lemma~R does not hold for CONS.› definition V01 :: "partial1 set" ("V⇩_{0}⇩_{1}") where "V⇩_{0}⇩_{1}= {f. f ∈ ℛ ∧ (φ (the (f 0)) = f ∨ φ (the (f 1)) = f)}" subsubsection ‹No total CONS strategy for @{term "V⇩_{0}⇩_{1}"}\label{s:v01_not_total}› text ‹In order to show that no total strategy can learn @{term "V⇩_{0}⇩_{1}"} we construct, for each total strategy $S$, one or two functions in @{term "V⇩_{0}⇩_{1}"} such that $S$ fails for at least one of them. At the core of this construction is a process that given a total recursive strategy $S$ and numbers $z, i, j \in \mathbb{N}$ builds a function $f$ as follows: Set $f(0) = i$ and $f(1) = j$. For $x\geq1$: \begin{enumerate} \item[(a)] Check whether $S$ changes its hypothesis when $f^x$ is extended by 0, that is, if $S(f^x) \neq S(f^x0)$. If so, set $f(x+1) = 0$. \item[(b)] Otherwise check if $S$ changes its hypothesis when $f^x$ is extended by $1$, that is, if $S(f^x) \neq S(f^x1)$. If so, set $f(x+1) = 1$. \item[(c)] If neither happens, set $f(x+1) = z$. \end{enumerate} In other words, as long as we can force $S$ to change its hypothesis by extending the function by 0 or 1, we do just that. Now there are two cases: \begin{enumerate} \item[Case 1.] For all $x\geq1$ either (a) or (b) occurs; then $S$ changes its hypothesis on $f$ all the time and thus does not learn $f$ in the limit (not to mention consistently). The value of $z$ makes no difference in this case. \item[Case 2.] For some minimal $x$, (c) occurs, that is, there is an $f^x$ such that $h := S(f^x) = S(f^x0) = S(f^x1)$. But the hypothesis $h$ cannot be consistent with both prefixes $f^x0$ and $f^x1$. Running the process once with $z = 0$ and once with $z = 1$ yields two functions starting with $f^x0$ and $f^x1$, respectively, such that $S$ outputs the same hypothesis, $h$, on both prefixes and thus cannot be consistent for both functions. \end{enumerate} This process is computable because $S$ is total. The construction does not work if we only assume $S$ to be a CONS strategy for $V_{01}$, because we need to be able to apply $S$ to prefixes not in $V_{01}$. The parameters $i$ and $j$ provide flexibility to find functions built by the above process that are actually in $V_{01}$. To this end we will use Smullyan's double fixed-point theorem.› context fixes s :: partial1 assumes s_in_R1 [simp, intro]: "s ∈ ℛ" begin text ‹The function @{term prefixes} constructs prefixes according to the aforementioned process.› fun prefixes :: "nat ⇒ nat ⇒ nat ⇒ nat ⇒ nat list" where "prefixes z i j 0 = [i]" | "prefixes z i j (Suc x) = prefixes z i j x @ [if x = 0 then j else if s (list_encode (prefixes z i j x @ [0])) ≠ s (list_encode (prefixes z i j x)) then 0 else if s (list_encode (prefixes z i j x @ [1])) ≠ s (list_encode (prefixes z i j x)) then 1 else z]" lemma prefixes_length: "length (prefixes z i j x) = Suc x" by (induction x) simp_all text ‹The functions @{term[names_short] "adverse z i j"} are the functions constructed by @{term[names_short] "prefixes"}.› definition adverse :: "nat ⇒ nat ⇒ nat ⇒ nat ⇒ nat option" where "adverse z i j x ≡ Some (last (prefixes z i j x))" lemma init_adverse_eq_prefixes: "(adverse z i j) ▹ n = list_encode (prefixes z i j n)" proof - have "prefix (adverse z i j) n = prefixes z i j n" proof (induction n) case 0 then show ?case using adverse_def prefixes_length prefixI' by fastforce next case (Suc n) then show ?case using adverse_def by (simp add: prefix_Suc) qed then show ?thesis by (simp add: init_def) qed lemma adverse_at_01: "adverse z i j 0 ↓= i" "adverse z i j 1 ↓= j" by (auto simp add: adverse_def) text ‹Had we introduced ternary partial recursive functions, the @{term[names_short] "adverse z"} functions would be among them.› lemma adverse_in_R3: "∃r. recfn 3 r ∧ total r ∧ (λi j x. eval r [i, j, x]) = adverse z" proof - obtain rs where rs: "recfn 1 rs" "total rs" "(λx. eval rs [x]) = s" using R1E by auto have s_total: "⋀x. s x ↓" by simp define f where "f = Cn 2 r_singleton_encode [Id 2 0]" then have "recfn 2 f" by simp have f: "⋀i j. eval f [i, j] ↓= list_encode [i]" unfolding f_def by simp define ch1 where "ch1 = Cn 4 r_ifeq [Cn 4 rs [Cn 4 r_snoc [Id 4 1, r_constn 3 1]], Cn 4 rs [Id 4 1], r_dummy 3 (r_const z), r_constn 3 1]" then have ch1: "recfn 4 ch1" "total ch1" using Cn_total prim_recfn_total rs by auto define ch0 where "ch0 = Cn 4 r_ifeq [Cn 4 rs [Cn 4 r_snoc [Id 4 1, r_constn 3 0]], Cn 4 rs [Id 4 1], ch1, r_constn 3 0]" then have ch0_total: "total ch0" "recfn 4 ch0" using Cn_total prim_recfn_total rs ch1 by auto have "eval ch1 [l, v, i, j] ↓= (if s (e_snoc v 1) = s v then z else 1)" for l v i j proof - have "eval ch1 [l, v, i, j] = eval r_ifeq [the (s (e_snoc v 1)), the (s v), z, 1]" unfolding ch1_def using rs by auto then show ?thesis by (simp add: s_total option.expand) qed moreover have "eval ch0 [l, v, i, j] ↓= (if s (e_snoc v 0) = s v then the (eval ch1 [l, v, i, j]) else 0)" for l v i j proof - have "eval ch0 [l, v, i, j] = eval r_ifeq [the (s (e_snoc v 0)), the (s v), the (eval ch1 [l, v, i, j]), 0]" unfolding ch0_def using rs ch1 by auto then show ?thesis by (simp add: s_total option.expand) qed ultimately have ch0: "⋀l v i j. eval ch0 [l, v, i, j] ↓= (if s (e_snoc v 0) ≠ s v then 0 else if s (e_snoc v 1) ≠ s v then 1 else z)" by simp define app where "app = Cn 4 r_ifz [Id 4 0, Id 4 3, ch0]" then have "recfn 4 app" "total app" using ch0_total totalI4 by auto have "eval app [l, v, i, j] ↓= (if l = 0 then j else the (eval ch0 [l, v, i, j]))" for l v i j unfolding app_def using ch0_total by simp with ch0 have app: "⋀l v i j. eval app [l, v, i, j] ↓= (if l = 0 then j else if s (e_snoc v 0) ≠ s v then 0 else if s (e_snoc v 1) ≠ s v then 1 else z)" by simp define g where "g = Cn 4 r_snoc [Id 4 1, app]" with app have g: "⋀l v i j. eval g [l, v, i, j] ↓= e_snoc v (if l = 0 then j else if s (e_snoc v 0) ≠ s v then 0 else if s (e_snoc v 1) ≠ s v then 1 else z)" using ‹recfn 4 app› by auto from g_def have "recfn 4 g" "total g" using ‹recfn 4 app› ‹total app› Cn_total Mn_free_imp_total by auto define b where "b = Pr 2 f g" then have "recfn 3 b" using ‹recfn 2 f› ‹recfn 4 g› by simp have b: "eval b [x, i, j] ↓= list_encode (prefixes z i j x)" for x i j proof (induction x) case 0 then show ?case unfolding b_def using f ‹recfn 2 f› ‹recfn 4 g› by simp next case (Suc x) then have "eval b [Suc x, i, j] = eval g [x, the (eval b [x, i, j]), i, j]" using b_def ‹recfn 3 b› by simp also have "... ↓= (let v = list_encode (prefixes z i j x) in e_snoc v (if x = 0 then j else if s (e_snoc v 0) ≠ s v then 0 else if s (e_snoc v 1) ≠ s v then 1 else z))" using g Suc by simp also have "... ↓= (let v = list_encode (prefixes z i j x) in e_snoc v (if x = 0 then j else if s (list_encode (prefixes z i j x @ [0])) ≠ s v then 0 else if s (list_encode (prefixes z i j x @ [1])) ≠ s v then 1 else z))" using list_decode_encode by presburger finally show ?case by simp qed define b' where "b' = Cn 3 b [Id 3 2, Id 3 0, Id 3 1]" then have "recfn 3 b'" using ‹recfn 3 b› by simp with b have b': "⋀i j x. eval b' [i, j, x] ↓= list_encode (prefixes z i j x)" using b'_def by simp define r where "r = Cn 3 r_last [b']" then have "recfn 3 r" using ‹recfn 3 b'› by simp with b' have "⋀i j x. eval r [i, j, x] ↓= last (prefixes z i j x)" using r_def prefixes_length by auto moreover from this have "total r" using totalI3 ‹recfn 3 r› by simp ultimately have "(λi j x. eval r [i, j, x]) = adverse z" unfolding adverse_def by simp with ‹recfn 3 r› ‹total r› show ?thesis by auto qed lemma adverse_in_R1: "adverse z i j ∈ ℛ" proof - from adverse_in_R3 obtain r where r: "recfn 3 r" "total r" "(λi j x. eval r [i, j, x]) = adverse z" by blast define rij where "rij = Cn 1 r [r_const i, r_const j, Id 1 0]" then have "recfn 1 rij" "total rij" using r(1,2) Cn_total Mn_free_imp_total by auto from rij_def have "⋀x. eval rij [x] = eval r [i, j, x]" using r(1) by auto with r(3) have "⋀x. eval rij [x] = adverse z i j x" by metis with ‹recfn 1 rij› ‹total rij› show ?thesis by auto qed text ‹Next we show that for every $z$ there are $i$, $j$ such that @{term[names_short] "adverse z i j ∈ V⇩_{0}⇩_{1}"}. The first step is to show that for every $z$, Gödel numbers for @{term[names_short] "adverse z i j"} can be computed uniformly from $i$ and $j$.› lemma phi_translate_adverse: "∃f∈ℛ⇧^{2}.∀i j. φ (the (f i j)) = adverse z i j" proof - obtain r where r: "recfn 3 r" "total r" "(λi j x. eval r [i, j, x]) = adverse z" using adverse_in_R3 by blast let ?p = "encode r" define rf where "rf = Cn 2 (r_smn 1 2) [r_dummy 1 (r_const ?p), Id 2 0, Id 2 1]" then have "recfn 2 rf" and "total rf" using Mn_free_imp_total by simp_all define f where "f ≡ λi j. eval rf [i, j]" with ‹recfn 2 rf› ‹total rf› have "f ∈ ℛ⇧^{2}" by auto have rf: "eval rf [i, j] = eval (r_smn 1 2) [?p, i, j]" for i j unfolding rf_def by simp { fix i j x have "φ (the (f i j)) x = eval r_phi [the (f i j), x]" using phi_def by simp also have "... = eval r_phi [the (eval rf [i, j]), x]" using f_def by simp also have "... = eval (r_universal 1) [the (eval (r_smn 1 2) [?p, i, j]), x]" using rf r_phi_def by simp also have "... = eval (r_universal (2 + 1)) (?p # [i, j] @ [x])" using smn_lemma[of 1 "[i, j]" 2 "[x]"] by simp also have "... = eval (r_universal 3) [?p, i, j, x]" by simp also have "... = eval r [i, j, x]" using r_universal r by force also have "... = adverse z i j x" using r(3) by metis finally have "φ (the (f i j)) x = adverse z i j x" . } with ‹f ∈ ℛ⇧^{2}› show ?thesis by blast qed text ‹The second, and final, step is to apply Smullyan's double fixed-point theorem to show the existence of @{term[names_short] adverse} functions in @{term "V⇩_{0}⇩_{1}"}.› lemma adverse_in_V01: "∃m n. adverse 0 m n ∈ V⇩_{0}⇩_{1}∧ adverse 1 m n ∈ V⇩_{0}⇩_{1}" proof - obtain f⇩_{0}where f0: "f⇩_{0}∈ ℛ⇧^{2}" "∀i j. φ (the (f⇩_{0}i j)) = adverse 0 i j" using phi_translate_adverse[of 0] by auto obtain f⇩_{1}where f1: "f⇩_{1}∈ ℛ⇧^{2}" "∀i j. φ (the (f⇩_{1}i j)) = adverse 1 i j" using phi_translate_adverse[of 1] by auto obtain m n where "φ m = φ (the (f⇩_{0}m n))" and "φ n = φ (the (f⇩_{1}m n))" using smullyan_double_fixed_point[OF f0(1) f1(1)] by blast with f0(2) f1(2) have "φ m = adverse 0 m n" and "φ n = adverse 1 m n" by simp_all moreover have "the (adverse 0 m n 0) = m" and "the (adverse 1 m n 1) = n" using adverse_at_01 by simp_all ultimately have "φ (the (adverse 0 m n 0)) = adverse 0 m n" "φ (the (adverse 1 m n 1)) = adverse 1 m n" by simp_all moreover have "adverse 0 m n ∈ ℛ" and "adverse 1 m n ∈ ℛ" using adverse_in_R1 by simp_all ultimately show ?thesis using V01_def by auto qed text ‹Before we prove the main result of this section we need some lemmas regarding the shape of the @{term[names_short] adverse} functions and hypothesis changes of the strategy.› lemma adverse_Suc: assumes "x > 0" shows "adverse z i j (Suc x) ↓= (if s (e_snoc ((adverse z i j) ▹ x) 0) ≠ s ((adverse z i j) ▹ x) then 0 else if s (e_snoc ((adverse z i j) ▹ x) 1) ≠ s ((adverse z i j) ▹ x) then 1 else z)" proof - have "adverse z i j (Suc x) ↓= (if s (list_encode (prefixes z i j x @ [0])) ≠ s (list_encode (prefixes z i j x)) then 0 else if s (list_encode (prefixes z i j x @ [1])) ≠ s (list_encode (prefixes z i j x)) then 1 else z)" using assms adverse_def by simp then show ?thesis by (simp add: init_adverse_eq_prefixes) qed text ‹The process in the proof sketch (page~\pageref{s:v01_not_total}) consists of steps (a), (b), and (c). The next abbreviation is true iff.\ step (a) or (b) applies.› abbreviation "hyp_change z i j x ≡ s (e_snoc ((adverse z i j) ▹ x) 0) ≠ s ((adverse z i j) ▹ x) ∨ s (e_snoc ((adverse z i j) ▹ x) 1) ≠ s ((adverse z i j) ▹ x)" text ‹If step (c) applies, the process appends $z$.› lemma adverse_Suc_not_hyp_change: assumes "x > 0" and "¬ hyp_change z i j x" shows "adverse z i j (Suc x) ↓= z" using assms adverse_Suc by simp text ‹While (a) or (b) applies, the process appends a value that forces $S$ to change its hypothesis.› lemma while_hyp_change: assumes "∀x≤n. x > 0 ⟶ hyp_change z i j x" shows "∀x≤Suc n. adverse z i j x = adverse z' i j x" using assms proof (induction n) case 0 then show ?case by (simp add: adverse_def le_Suc_eq) next case (Suc n) then have "∀x≤n. x > 0 ⟶ hyp_change z i j x" by simp with Suc have "∀x≤Suc n. x > 0 ⟶ adverse z i j x = adverse z' i j x" by simp moreover have "adverse z i j 0 = adverse z' i j 0" using adverse_at_01 by simp ultimately have zz': "∀x≤Suc n. adverse z i j x = adverse z' i j x" by auto moreover have "adverse z i j ∈ ℛ" "adverse z' i j ∈ ℛ" using adverse_in_R1 by simp_all ultimately have init_zz': "(adverse z i j) ▹ (Suc n) = (adverse z' i j) ▹ (Suc n)" using init_eqI by blast have "adverse z i j (Suc (Suc n)) = adverse z' i j (Suc (Suc n))" proof (cases "s (e_snoc ((adverse z i j) ▹ (Suc n)) 0) ≠ s ((adverse z i j) ▹ (Suc n))") case True then have "s (e_snoc ((adverse z' i j) ▹ (Suc n)) 0) ≠ s ((adverse z' i j) ▹ (Suc n))" using init_zz' by simp then have "adverse z' i j (Suc (Suc n)) ↓= 0" by (simp add: adverse_Suc) moreover have "adverse z i j (Suc (Suc n)) ↓= 0" using True by (simp add: adverse_Suc) ultimately show ?thesis by simp next case False then have "s (e_snoc ((adverse z' i j) ▹ (Suc n)) 0) = s ((adverse z' i j) ▹ (Suc n))" using init_zz' by simp then have "adverse z' i j (Suc (Suc n)) ↓= 1" using init_zz' Suc.prems adverse_Suc by (smt le_refl zero_less_Suc) moreover have "adverse z i j (Suc (Suc n)) ↓= 1" using False Suc.prems adverse_Suc by auto ultimately show ?thesis by simp qed with zz' show ?case using le_SucE by blast qed text ‹The next result corresponds to Case~1 from the proof sketch.› lemma always_hyp_change_no_lim: assumes "∀x>0. hyp_change z i j x" shows "¬ learn_lim φ {adverse z i j} s" proof (rule infinite_hyp_changes_not_Lim[of "adverse z i j"]) show "adverse z i j ∈ {adverse z i j}" by simp show "∀n. ∃m⇩_{1}>n. ∃m⇩_{2}>n. s (adverse z i j ▹ m⇩_{1}) ≠ s (adverse z i j ▹ m⇩_{2})" proof fix n from assms obtain m⇩_{1}where m1: "m⇩_{1}> n" "hyp_change z i j m⇩_{1}" by auto have "s (adverse z i j ▹ m⇩_{1}) ≠ s (adverse z i j ▹ (Suc m⇩_{1}))" proof (cases "s (e_snoc ((adverse z i j) ▹ m⇩_{1}) 0) ≠ s ((adverse z i j) ▹ m⇩_{1})") case True then have "adverse z i j (Suc m⇩_{1}) ↓= 0" using m1 adverse_Suc by simp then have "(adverse z i j) ▹ (Suc m⇩_{1}) = e_snoc ((adverse z i j) ▹ m⇩_{1}) 0" by (simp add: init_Suc_snoc) with True show ?thesis by simp next case False then have "adverse z i j (Suc m⇩_{1}) ↓= 1" using m1 adverse_Suc by simp then have "(adverse z i j) ▹ (Suc m⇩_{1}) = e_snoc ((adverse z i j) ▹ m⇩_{1}) 1" by (simp add: init_Suc_snoc) with False m1(2) show ?thesis by simp qed then show "∃m⇩_{1}>n. ∃m⇩_{2}>n. s (adverse z i j ▹ m⇩_{1}) ≠ s (adverse z i j ▹ m⇩_{2})" using less_SucI m1(1) by blast qed qed text ‹The next result corresponds to Case~2 from the proof sketch.› lemma no_hyp_change_no_cons: assumes "x > 0" and "¬ hyp_change z i j x" shows "¬ learn_cons φ {adverse 0 i j, adverse 1 i j} s" proof - let ?P = "λx. x > 0 ∧ ¬ hyp_change z i j x" define xmin where "xmin = Least ?P" with assms have xmin: "?P xmin" "⋀x. x < xmin ⟹ ¬ ?P x" using LeastI[of ?P] not_less_Least[of _ ?P] by simp_all then have "xmin > 0" by simp have "∀x≤xmin - 1. x > 0 ⟶ hyp_change z i j x" using xmin by (metis One_nat_def Suc_pred le_imp_less_Suc) then have "∀x≤xmin. adverse z i j x = adverse 0 i j x" "∀x≤xmin. adverse z i j x = adverse 1 i j x" using while_hyp_change[of "xmin - 1" z i j 0] using while_hyp_change[of "xmin - 1" z i j 1] by simp_all then have init_z0: "(adverse z i j) ▹ xmin = (adverse 0 i j) ▹ xmin" and init_z1: "(adverse z i j) ▹ xmin = (adverse 1 i j) ▹ xmin" using adverse_in_R1 init_eqI by blast+ then have a0: "adverse 0 i j (Suc xmin) ↓= 0" and a1: "adverse 1 i j (Suc xmin) ↓= 1" using adverse_Suc_not_hyp_change xmin(1) init_z1 by metis+ then have i0: "(adverse 0 i j) ▹ (Suc xmin) = e_snoc ((adverse z i j) ▹ xmin) 0" and i1: "(adverse 1 i j) ▹ (Suc xmin) = e_snoc ((adverse z i j) ▹ xmin) 1" using init_z0 init_z1 by (simp_all add: init_Suc_snoc) moreover have "s (e_snoc ((adverse z i j) ▹ xmin) 0) = s ((adverse z i j) ▹ xmin)" "s (e_snoc ((adverse z i j) ▹ xmin) 1) = s ((adverse z i j) ▹ xmin)" using xmin by simp_all ultimately have "s ((adverse 0 i j) ▹ (Suc xmin)) = s ((adverse z i j) ▹ xmin)" "s ((adverse 1 i j) ▹ (Suc xmin)) = s ((adverse z i j) ▹ xmin)" by simp_all then have "s ((adverse 0 i j) ▹ (Suc xmin)) = s ((adverse 1 i j) ▹ (Suc xmin))" by simp moreover have "(adverse 0 i j) ▹ (Suc xmin) ≠ (adverse 1 i j) ▹ (Suc xmin)" using a0 a1 i0 i1 by (metis append1_eq_conv list_decode_encode zero_neq_one) ultimately show "¬ learn_cons φ {adverse 0 i j, adverse 1 i j} s" using same_hyp_different_init_not_cons by blast qed text ‹Combining the previous two lemmas shows that @{term "V⇩_{0}⇩_{1}"} cannot be learned consistently in the limit by the total strategy $S$.› lemma V01_not_in_R_cons: "¬ learn_cons φ V⇩_{0}⇩_{1}s" proof - obtain m n where mn0: "adverse 0 m n ∈ V⇩_{0}⇩_{1}" and mn1: "adverse 1 m n ∈ V⇩_{0}⇩_{1}" using adverse_in_V01 by auto show "¬ learn_cons φ V⇩_{0}⇩_{1}s" proof (cases "∀x>0. hyp_change 0 m n x") case True then have "¬ learn_lim φ {adverse 0 m n} s" using always_hyp_change_no_lim by simp with mn0 show ?thesis using learn_cons_def learn_lim_closed_subseteq by auto next case False then obtain x where x: "x > 0" "¬ hyp_change 0 m n x" by auto then have "¬ learn_cons φ {adverse 0 m n, adverse 1 m n} s" using no_hyp_change_no_cons[OF x] by simp with mn0 mn1 show ?thesis using learn_cons_closed_subseteq by auto qed qed end subsubsection ‹@{term "V⇩_{0}⇩_{1}"} is in CONS› text ‹At first glance, consistently learning @{term "V⇩_{0}⇩_{1}"} looks fairly easy. After all every @{term "f ∈ V⇩_{0}⇩_{1}"} provides a Gödel number of itself either at argument 0 or 1. A strategy only has to figure out which one is right. However, the strategy $S$ we are going to devise does not always converge to $f(0)$ or $f(1)$. Instead it uses a technique called ``amalgamation''. The amalgamation of two Gödel numbers $i$ and $j$ is a function whose value at $x$ is determined by simulating $\varphi_i(x)$ and $\varphi_j(x)$ in parallel and outputting the value of the first one to halt. If neither halts the value is undefined. There is a function $a\in\mathcal{R}^2$ such that $\varphi_{a(i,j)}$ is the amalgamation of $i$ and $j$. If @{term "f ∈ V⇩_{0}⇩_{1}"} then $\varphi_{a(f(0), f(1))}$ is total because by definition of @{term "V⇩_{0}⇩_{1}"} we have $\varphi_{f(0)} = f$ or $\varphi_{f(1)} = f$ and $f$ is total. Given a prefix $f^n$ of an @{term "f ∈ V⇩_{0}⇩_{1}"} the strategy $S$ first computes $\varphi_{a(f(0), f(1))}(x)$ for $x = 0, \ldots, n$. For the resulting prefix $\varphi^n_{a(f(0), f(1))}$ there are two cases: \begin{enumerate} \item[Case 1.] It differs from $f^n$, say at minimum index $x$. Then for either $z = 0$ or $z = 1$ we have $\varphi_{f(z)}(x) \neq f(x)$ by definition of amalgamation. This implies $\varphi_{f(z)} \neq f$, and thus $\varphi_{f(1-z)} = f$ by definition of @{term "V⇩_{0}⇩_{1}"}. We set $S(f^n) = f(1 - z)$. This hypothesis is correct and hence consistent. \item[Case 2.] It equals $f^n$. Then we set $S(f^n) = a(f(0), f(1))$. This hypothesis is consistent by definition of this case. \end{enumerate} In both cases the hypothesis is consistent. If Case~1 holds for some $n$, the same $x$ and $z$ will be found also for all larger values of $n$. Therefore $S$ converges to the correct hypothesis $f(1 - z)$. If Case~2 holds for all $n$, then $S$ always outputs the same hypothesis $a(f(0), f(1))$ and thus also converges. The above discussion tacitly assumes $n \geq 1$, such that both $f(0)$ and $f(1)$ are available to $S$. For $n = 0$ the strategy outputs an arbitrary consistent hypothesis.› text ‹Amalgamation uses the concurrent simulation of functions.› definition parallel :: "nat ⇒ nat ⇒ nat ⇒ nat option" where "parallel i j x ≡ eval r_parallel [i, j, x]" lemma r_parallel': "eval r_parallel [i, j, x] = parallel i j x" using parallel_def by simp lemma r_parallel'': shows "eval r_phi [i, x] ↑ ∧ eval r_phi [j, x] ↑ ⟹ eval r_parallel [i, j, x] ↑" and "eval r_phi [i, x] ↓ ∧ eval r_phi [j, x] ↑ ⟹ eval r_parallel [i, j, x] ↓= prod_encode (0, the (eval r_phi [i, x]))" and "eval r_phi [j, x] ↓ ∧ eval r_phi [i, x] ↑ ⟹ eval r_parallel [i, j, x] ↓= prod_encode (1, the (eval r_phi [j, x]))" and "eval r_phi [i, x] ↓ ∧ eval r_phi [j, x] ↓ ⟹ eval r_parallel [i, j, x] ↓= prod_encode (0, the (eval r_phi [i, x])) ∨ eval r_parallel [i, j, x] ↓= prod_encode (1, the (eval r_phi [j, x]))" proof - let ?f = "Cn 1 r_phi [r_const i, Id 1 0]" let ?g = "Cn 1 r_phi [r_const j, Id 1 0]" have *: "⋀x. eval r_phi [i, x] = eval ?f [x]" "⋀x. eval r_phi [j, x] = eval ?g [x]" by simp_all show "eval r_phi [i, x] ↑ ∧ eval r_phi [j, x] ↑ ⟹ eval r_parallel [i, j, x] ↑" and "eval r_phi [i, x] ↓ ∧ eval r_phi [j, x] ↑ ⟹ eval r_parallel [i, j, x] ↓= prod_encode (0, the (eval r_phi [i, x]))" and "eval r_phi [j, x] ↓ ∧ eval r_phi [i, x] ↑ ⟹ eval r_parallel [i, j, x] ↓= prod_encode (1, the (eval r_phi [j, x]))" and "eval r_phi [i, x] ↓ ∧ eval r_phi [j, x] ↓ ⟹ eval r_parallel [i, j, x] ↓= prod_encode (0, the (eval r_phi [i, x])) ∨ eval r_parallel [i, j, x] ↓= prod_encode (1, the (eval r_phi [j, x]))" using r_parallel[OF *] by simp_all qed lemma parallel: "φ i x ↑ ∧ φ j x ↑ ⟹ parallel i j x ↑" "φ i x ↓ ∧ φ j x ↑ ⟹ parallel i j x ↓= prod_encode (0, the (φ i x))" "φ j x ↓ ∧ φ i x ↑ ⟹ parallel i j x ↓= prod_encode (1, the (φ j x))" "φ i x ↓ ∧ φ j x ↓ ⟹ parallel i j x ↓= prod_encode (0, the (φ i x)) ∨ parallel i j x ↓= prod_encode (1, the (φ j x))" using phi_def r_parallel'' r_parallel parallel_def by simp_all lemma parallel_converg_pdec1_0_or_1: assumes "parallel i j x ↓" shows "pdec1 (the (parallel i j x)) = 0 ∨ pdec1 (the (parallel i j x)) = 1" using assms parallel[of i x j] parallel(3)[of j x i] by (metis fst_eqD option.sel prod_encode_inverse) lemma parallel_converg_either: "(φ i x ↓ ∨ φ j x ↓) = (parallel i j x ↓)" using parallel by (metis option.simps(3)) lemma parallel_0: assumes "parallel i j x ↓= prod_encode (0, v)" shows "φ i x ↓= v" using parallel assms by (smt option.collapse option.sel option.simps(3) prod.inject prod_encode_eq zero_neq_one) lemma parallel_1: assumes "parallel i j x ↓= prod_encode (1, v)" shows "φ j x ↓= v" using parallel assms by (smt option.collapse option.sel option.simps(3) prod.inject prod_encode_eq zero_neq_one) lemma parallel_converg_V01: assumes "f ∈ V⇩_{0}⇩_{1}" shows "parallel (the (f 0)) (the (f 1)) x ↓" proof - have "f ∈ ℛ ∧ (φ (the (f 0)) = f ∨ φ (the (f 1)) = f)" using assms V01_def by auto then have "φ (the (f 0)) ∈ ℛ ∨ φ (the (f 1)) ∈ ℛ" by auto then have "φ (the (f 0)) x ↓ ∨ φ (the (f 1)) x ↓" using R1_imp_total1 by auto then show ?thesis using parallel_converg_either by simp qed text ‹The amalgamation of two Gödel numbers can then be described in terms of @{term "parallel"}.› definition amalgamation :: "nat ⇒ nat ⇒ partial1" where "amalgamation i j x ≡ if parallel i j x ↑ then None else Some (pdec2 (the (parallel i j x)))" lemma amalgamation_diverg: "amalgamation i j x ↑ ⟷ φ i x ↑ ∧ φ j x ↑" using amalgamation_def parallel by (metis option.simps(3)) lemma amalgamation_total: assumes "total1 (φ i) ∨ total1 (φ j)" shows "total1 (amalgamation i j)" using assms amalgamation_diverg[of i j] total_def by auto lemma amalgamation_V01_total: assumes "f ∈ V⇩_{0}⇩_{1}" shows "total1 (amalgamation (the (f 0)) (the (f 1)))" using assms V01_def amalgamation_total R1_imp_total1 total1_def by (metis (mono_tags, lifting) mem_Collect_eq) definition "r_amalgamation ≡ Cn 3 r_pdec2 [r_parallel]" lemma r_amalgamation_recfn: "recfn 3 r_amalgamation" unfolding r_amalgamation_def by simp lemma r_amalgamation: "eval r_amalgamation [i, j, x] = amalgamation i j x" proof (cases "parallel i j x ↑") case True then have "eval r_parallel [i, j, x] ↑" by (simp add: r_parallel') then have "eval r_amalgamation [i, j, x] ↑" unfolding r_amalgamation_def by simp moreover from True have "amalgamation i j x ↑" using amalgamation_def by simp ultimately show ?thesis by simp next case False then have "eval r_parallel [i, j, x] ↓" by (simp add: r_parallel') then have "eval r_amalgamation [i, j, x] = eval r_pdec2 [the (eval r_parallel [i, j, x])]" unfolding r_amalgamation_def by simp also have "... ↓= pdec2 (the (eval r_parallel [i, j, x]))" by simp finally show ?thesis by (simp add: False amalgamation_def r_parallel') qed text ‹The function @{term "amalgamate"} computes Gödel numbers of amalgamations. It corresponds to the function $a$ from the proof sketch.› definition amalgamate :: "nat ⇒ nat ⇒ nat" where "amalgamate i j ≡ smn 1 (encode r_amalgamation) [i, j]" lemma amalgamate: "φ (amalgamate i j) = amalgamation i j" proof fix x have "φ (amalgamate i j) x = eval r_phi [amalgamate i j, x]" by (simp add: phi_def) also have "... = eval r_phi [smn 1 (encode r_amalgamation) [i, j], x]" using amalgamate_def by simp also have "... = eval r_phi [encode (Cn 1 (r_universal 3) (r_constn 0 (encode r_amalgamation) # map (r_constn 0) [i, j] @ map (Id 1) [0])), x]" using smn[of 1 "encode r_amalgamation" "[i, j]"] by (simp add: numeral_3_eq_3) also have "... = eval r_phi [encode (Cn 1 (r_universal 3) (r_const (encode r_amalgamation) # [r_const i, r_const j, Id 1 0])), x]" (is "... = eval r_phi [encode ?f, x]") by (simp add: r_constn_def) finally have "φ (amalgamate i j) x = eval r_phi [encode (Cn 1 (r_universal 3) (r_const (encode r_amalgamation) # [r_const i, r_const j, Id 1 0])), x]" . then have "φ (amalgamate i j) x = eval (r_universal 3) [encode r_amalgamation, i, j, x]" unfolding r_phi_def using r_universal[of ?f 1] r_amalgamation_recfn by simp then show "φ (amalgamate i j) x = amalgamation i j x" using r_amalgamation by (simp add: r_amalgamation_recfn r_universal) qed lemma amalgamation_in_P1: "amalgamation i j ∈ 𝒫" using amalgamate by (metis P2_proj_P1 phi_in_P2) lemma amalgamation_V01_R1: assumes "f ∈ V⇩_{0}⇩_{1}" shows "amalgamation (the (f 0)) (the (f 1)) ∈ ℛ" using assms amalgamation_V01_total amalgamation_in_P1 by (simp add: P1_total_imp_R1) definition "r_amalgamate ≡ Cn 2 (r_smn 1 2) [r_dummy 1 (r_const (encode r_amalgamation)), Id 2 0, Id 2 1]" lemma r_amalgamate_recfn: "recfn 2 r_amalgamate" unfolding r_amalgamate_def by simp lemma r_amalgamate: "eval r_amalgamate [i, j] ↓= amalgamate i j" proof - let ?p = "encode r_amalgamation" have rs21: "eval (r_smn 1 2) [?p, i, j] ↓= smn 1 ?p [i, j]" using r_smn by simp moreover have "eval r_amalgamate [i, j] = eval (r_smn 1 2) [?p, i, j]" unfolding r_amalgamate_def by auto ultimately have "eval r_amalgamate [i, j] ↓= smn 1 ?p [i, j]" by simp then show ?thesis using amalgamate_def by simp qed text ‹The strategy $S$ distinguishes the two cases from the proof sketch with the help of the next function, which checks if a hypothesis $\varphi_i$ is inconsistent with a prefix $e$. If so, it returns the least $x < |e|$ witnessing the inconsistency; otherwise it returns the length $|e|$. If $\varphi_i$ diverges for some $x < |e|$, so does the function.› definition inconsist :: partial2 where "inconsist i e ≡ (if ∃x<e_length e. φ i x ↑ then None else if ∃x<e_length e. φ i x ↓≠ e_nth e x then Some (LEAST x. x < e_length e ∧ φ i x ↓≠ e_nth e x) else Some (e_length e))" lemma inconsist_converg: assumes "inconsist i e ↓" shows "inconsist i e = (if ∃x<e_length e. φ i x ↓≠ e_nth e x then Some (LEAST x. x < e_length e ∧ φ i x ↓≠ e_nth e x) else Some (e_length e))" and "∀x<e_length e. φ i x ↓" using inconsist_def assms by (presburger, meson) lemma inconsist_bounded: assumes "inconsist i e ↓" shows "the (inconsist i e) ≤ e_length e" proof (cases "∃x<e_length e. φ i x ↓≠ e_nth e x") case True then show ?thesis using inconsist_converg[OF assms] by (smt Least_le dual_order.strict_implies_order dual_order.strict_trans2 option.sel) next case False then show ?thesis using inconsist_converg[OF assms] by auto qed lemma inconsist_consistent: assumes "inconsist i e ↓" shows "inconsist i e ↓= e_length e ⟷ (∀x<e_length e. φ i x ↓= e_nth e x)" proof show "∀x<e_length e. φ i x ↓= e_nth e x" if "inconsist i e ↓= e_length e" proof (cases "∃x<e_length e. φ i x ↓≠ e_nth e x") case True then show ?thesis using that inconsist_converg[OF assms] by (metis (mono_tags, lifting) not_less_Least option.inject) next case False then show ?thesis using that inconsist_converg[OF assms] by simp qed show "∀x<e_length e. φ i x ↓= e_nth e x ⟹ inconsist i e ↓= e_length e" unfolding inconsist_def using assms by auto qed lemma inconsist_converg_eq: assumes "inconsist i e ↓= e_length e" shows "∀x<e_length e. φ i x ↓= e_nth e x" using assms inconsist_consistent by auto lemma inconsist_converg_less: assumes "inconsist i e ↓" and "the (inconsist i e) < e_length e" shows "∃x<e_length e. φ i x ↓≠ e_nth e x" and "inconsist i e ↓= (LEAST x. x < e_length e ∧ φ i x ↓≠ e_nth e x)" proof - show "∃x<e_length e. φ i x ↓≠ e_nth e x" using assms by (metis (no_types, lifting) inconsist_converg(1) nat_neq_iff option.sel) then show "inconsist i e ↓= (LEAST x. x < e_length e ∧ φ i x ↓≠ e_nth e x)" using assms inconsist_converg by presburger qed lemma least_bounded_Suc: assumes "∃x. x < upper ∧ P x" shows "(LEAST x. x < upper ∧ P x) = (LEAST x. x <</