Theory CP_FIN_NUM

section ‹FIN is a proper subset of CP\label{s:fin_cp}›

theory CP_FIN_NUM
  imports Inductive_Inference_Basics
begin

text ‹Let $S$ be a FIN strategy for a non-empty class $U$. Let $T$ be a
strategy that hypothesizes an arbitrary function from $U$ while $S$ outputs
``don't know'' and the hypothesis of $S$ otherwise. Then $T$ is a CP strategy
for $U$.›

lemma nonempty_FIN_wrt_impl_CP:
  assumes "U  {}" and "U  FIN_wrt ψ"
  shows "U  CP_wrt ψ"
proof -
  obtain s where "learn_fin ψ U s"
    using assms(2) FIN_wrt_def  by auto
  then have env: "environment ψ U s" and
    fin: "f. f  U 
      i n0. ψ i = f  (n<n0. s (f  n) ↓= 0)  (nn0. s (f  n) ↓= Suc i)"
    using learn_finE by auto
  from assms(1) obtain f0 where "f0  U"
    by auto
  with fin obtain i0 where "ψ i0 = f0"
    by blast
  define t where "t x 
    (if s x  then None else if s x ↓= 0 then Some i0 else Some (the (s x) - 1))"
    for x
  have "t  𝒫"
  proof -
    from env obtain rs where rs: "recfn 1 rs" "x. eval rs [x] = s x"
      by auto
    define rt where "rt = Cn 1 r_ifz [rs, r_const i0, Cn 1 r_dec [rs]]"
    then have "recfn 1 rt"
      using rs(1) by simp
    then have "eval rt [x] ↓= (if s x ↓= 0 then i0 else (the (s x)) - 1)" if "s x " for x
      using rs rt_def that by auto
    moreover have "eval rt [x] " if "eval rs [x] " for x
      using rs rt_def that by simp
    ultimately have "eval rt [x] = t x" for x
      using rs(2) t_def by simp
    with recfn 1 rt show ?thesis by auto
  qed
  have "learn_cp ψ U t"
  proof (rule learn_cpI)
    show "environment ψ U t"
      using env t_def t  𝒫 by simp
    show "i. ψ i = f  (n. t (f  n) ↓= i)" if "f  U" for f
    proof -
      from that fin obtain i n0 where
        i: "ψ i = f" "n<n0. s (f  n) ↓= 0" "nn0. s (f  n) ↓= Suc i"
        by blast
      moreover have "nn0. t (f  n) ↓= i"
        using that t_def i(3) by simp
      ultimately show ?thesis by auto
    qed
    show "ψ (the (t (f  n)))  U" if "f  U" for f n
      using ψ i0 = f0 f0  U t_def fin env that
      by (metis (no_types, lifting) diff_Suc_1 not_less option.sel)
  qed
  then show ?thesis using CP_wrt_def env by auto
qed

lemma FIN_wrt_impl_CP:
  assumes "U  FIN_wrt ψ"
  shows "U  CP_wrt ψ"
proof (cases "U = {}")
  case True
  then have "ψ  𝒫2  U  CP_wrt ψ"
    using CP_wrt_def learn_cpI[of ψ "{}" "λx. Some 0"] const_in_Prim1 by auto
  moreover have "ψ  𝒫2"
    using assms FIN_wrt_def learn_finE by auto
  ultimately show "U  CP_wrt ψ" by simp
next
  case False
  with nonempty_FIN_wrt_impl_CP assms show ?thesis
    by simp
qed

corollary FIN_subseteq_CP: "FIN  CP"
proof
  fix U
  assume "U  FIN"
  then have "ψ. U  FIN_wrt ψ"
    using FIN_def FIN_wrt_def by auto
  then have "ψ. U  CP_wrt ψ"
    using FIN_wrt_impl_CP by auto
  then show "U  CP"
    by (simp add: CP_def CP_wrt_def)
qed

text ‹In order to show the \emph{proper} inclusion, we show @{term
"U0  CP - FIN"}. A CP strategy for @{term "U0"} simply
hypothesizes the function in @{term U0} with the longest prefix of $f^n$ not
ending in zero. For that we define a function computing the index of the
rightmost non-zero value in a list, returning the length of the list if there
is no such value.›

definition findr :: partial1 where
  "findr e 
    if i<e_length e. e_nth e i  0
    then Some (GREATEST i. i < e_length e  e_nth e i  0)
    else Some (e_length e)"

lemma findr_total: "findr e "
  unfolding findr_def by simp

lemma findr_ex:
  assumes "i<e_length e. e_nth e i  0"
  shows "the (findr e) < e_length e"
    and "e_nth e (the (findr e))  0"
    and "i. the (findr e) < i  i < e_length e  e_nth e i = 0"
proof -
  let ?P = "λi. i < e_length e  e_nth e i  0"
  from assms have "i. ?P i" by simp
  then have "?P (Greatest ?P)"
    using GreatestI_ex_nat[of ?P "e_length e"] by fastforce
  moreover have *: "findr e = Some (Greatest ?P)"
    using assms findr_def by simp
  ultimately show "the (findr e) < e_length e" and "e_nth e (the (findr e))  0"
    by fastforce+
  show "i. the (findr e) < i  i < e_length e  e_nth e i = 0"
    using * Greatest_le_nat[of ?P _ "e_length e"] by fastforce
qed

definition "r_findr 
  let g =
    Cn 3 r_ifz
     [Cn 3 r_nth [Id 3 2, Id 3 0],
      Cn 3 r_ifeq [Id 3 0, Id 3 1, Cn 3 S [Id 3 0], Id 3 1],
      Id 3 0]
  in Cn 1 (Pr 1 Z g) [Cn 1 r_length [Id 1 0], Id 1 0]"

lemma r_findr_prim [simp]: "prim_recfn 1 r_findr"
  unfolding r_findr_def by simp

lemma r_findr [simp]: "eval r_findr [e] = findr e"
proof -
  define g where "g =
    Cn 3 r_ifz
     [Cn 3 r_nth [Id 3 2, Id 3 0],
      Cn 3 r_ifeq [Id 3 0, Id 3 1, Cn 3 S [Id 3 0], Id 3 1],
      Id 3 0]"
  then have "recfn 3 g"
    by simp
  with g_def have g: "eval g [j, r, e] ↓=
      (if e_nth e j  0 then j else if j = r then Suc j else r)" for j r e
    by simp
  let ?h = "Pr 1 Z g"
  have "recfn 2 ?h"
    by (simp add: recfn 3 g)
  let ?P = "λe j i. i < j  e_nth e i  0"
  let ?G = "λe j. Greatest (?P e j)"
  have h: "eval ?h [j, e] =
    (if i<j. e_nth e i = 0 then Some j else Some (?G e j))" for j e
  proof (induction j)
    case 0
    then show ?case using recfn 2 ?h by auto
  next
    case (Suc j)
    then have "eval ?h [Suc j, e] = eval g [j, the (eval ?h [j, e]), e]"
      using recfn 2 ?h by auto
    then have "eval ?h [Suc j, e] =
        eval g [j, if i<j. e_nth e i = 0 then j else ?G e j, e]"
      using Suc by auto
    then have *: "eval ?h [Suc j, e] ↓=
      (if e_nth e j  0 then j
       else if j = (if i<j. e_nth e i = 0 then j else ?G e j)
            then Suc j
            else (if i<j. e_nth e i = 0 then j else ?G e j))"
      using g by simp
    show ?case
    proof (cases "i<Suc j. e_nth e i = 0")
      case True
      then show ?thesis using * by simp
    next
      case False
      then have ex: "i<Suc j. e_nth e i  0"
        by auto
      show ?thesis
      proof (cases "e_nth e j = 0")
        case True
        then have ex': "i<j. e_nth e i  0"
          using ex less_Suc_eq by fastforce
        then have "(if i<j. e_nth e i = 0 then j else ?G e j) = ?G e j"
          by metis
        moreover have "?G e j < j"
          using ex' GreatestI_nat[of "?P e j"] less_imp_le_nat by blast
        ultimately have "eval ?h [Suc j, e] ↓= ?G e j"
          using * True by simp
        moreover have "?G e j = ?G e (Suc j)"
          using True by (metis less_SucI less_Suc_eq)
        ultimately show ?thesis using ex by metis
      next
        case False
        then have "eval ?h [Suc j, e] ↓= j"
          using * by simp
        moreover have "?G e (Suc j) = j"
          using ex False Greatest_equality[of "?P e (Suc j)"] by simp
        ultimately show ?thesis using ex by simp
      qed
    qed
  qed
  let ?hh = "Cn 1 ?h [Cn 1 r_length [Id 1 0], Id 1 0]"
  have "recfn 1 ?hh"
    using recfn 2 ?h by simp
  with h have hh: "eval ?hh [e] ↓=
      (if i<e_length e. e_nth e i = 0 then e_length e else ?G e (e_length e))" for e
    by auto
  then have "eval ?hh [e] = findr e" for e
    unfolding findr_def by auto
  moreover have "total ?hh"
    using hh totalI1 recfn 1 ?hh by simp
  ultimately show ?thesis
    using recfn 1 ?hh g_def r_findr_def findr_def by metis
qed

lemma U0_in_CP: "U0  CP"
proof -
  define s where
    "s  λx. if findr x ↓= e_length x then Some 0 else Some (e_take (Suc (the (findr x))) x)"
  have "s  𝒫"
  proof -
    define r where
      "r  Cn 1 r_ifeq [r_findr, r_length, Z, Cn 1 r_take [Cn 1 S [r_findr], Id 1 0]]"
    then have "x. eval r [x] = s x"
      using s_def findr_total by fastforce
    moreover have "recfn 1 r"
      using r_def by simp
    ultimately show ?thesis by auto
  qed
  moreover have "learn_cp prenum U0 s"
  proof (rule learn_cpI)
    show "environment prenum U0 s"
      using s  𝒫 s_def prenum_in_R2 U0_in_NUM by auto
    show "i. prenum i = f  (n. s (f  n) ↓= i)" if "f  U0" for f
    proof (cases "f = (λ_. Some 0)")
      case True
      then have "s (f  n) ↓= 0" for n
        using findr_def s_def by simp
      then have "n0. s (f  n) ↓= 0" by simp
      moreover have "prenum 0 = f"
        using True by auto
      ultimately show ?thesis by auto
    next
      case False
      then obtain ws where ws: "length ws > 0" "last ws  0" "f = ws  0"
        using U0_def f  U0 almost0_canonical by blast
      let ?m = "length ws - 1"
      let ?i = "list_encode ws"
      have "prenum ?i = f"
        using ws by auto
      moreover have "s (f  n) ↓= ?i" if "n  ?m" for n
      proof -
        have "e_nth (f  n) ?m  0"
          using ws that by (simp add: last_conv_nth)
        then have "k<Suc n. e_nth (f  n) k  0"
          using le_imp_less_Suc that by blast
        moreover have
          "(GREATEST k. k < e_length (f  n)  e_nth (f  n) k  0) = ?m"
        proof (rule Greatest_equality)
          show "?m < e_length (f  n)  e_nth (f  n) ?m  0"
            using e_nth (f  n) ?m  0 that by auto
          show "y. y < e_length (f  n)  e_nth (f  n) y  0  y  ?m"
            using ws less_Suc_eq_le by fastforce
        qed
        ultimately have "findr (f  n) ↓= ?m"
          using that findr_def by simp
        moreover have "?m < e_length (f  n)"
          using that by simp
        ultimately have "s (f  n) ↓= e_take (Suc ?m) (f  n)"
          using s_def by simp
        moreover have "e_take (Suc ?m) (f  n) = list_encode ws"
        proof -
          have "take (Suc ?m) (prefix f n) = prefix f ?m"
            using take_prefix[of f ?m n] ws that by (simp add: almost0_in_R1)
          then have "take (Suc ?m) (prefix f n) = ws"
            using ws prefixI by auto
          then show ?thesis by simp
        qed
        ultimately show ?thesis by simp
      qed
      ultimately show ?thesis by auto
    qed
    show "f n. f  U0  prenum (the (s (f  n)))  U0"
      using U0_def by fastforce
  qed
  ultimately show ?thesis using CP_def by blast
qed

text ‹As a bit of an interlude, we can now show that CP is not
closed under the subset relation. This works by removing functions from
@{term "U0"} in a ``noncomputable'' way such that a strategy cannot ensure
that every intermediate hypothesis is in that new class.›

lemma CP_not_closed_subseteq: "V U. V  U  U  CP  V  CP"
proof -
  ― ‹The numbering $g\in\mathcal{R}^2$ enumerates all
  functions $i0^\infty \in U_0$.›
  define g where "g  λi. [i]   0"
  have g_inj: "i = j" if "g i = g j" for i j
  proof -
    have "g i 0 ↓= i" and "g j 0 ↓= j"
      by (simp_all add: g_def)
    with that show "i = j"
      by (metis option.inject)
  qed

  ― ‹Define a class $V$. If the strategy $\varphi_i$ learns
  $g_i$, it outputs a hypothesis for $g_i$ on some shortest prefix $g_i^m$.
  Then the function $g_i^m10^\infty$ is included in the class $V$; otherwise
  $g_i$ is included.›
  define V where "V 
    {if learn_lim φ {g i} (φ i)
     then (prefix (g i) (LEAST n. φ (the (φ i ((g i)  n))) = g i)) @ [1]  0
     else g i |
     i. i  UNIV}"
  have "V  CP_wrt φ"
  proof
    ― ‹Assuming $V \in CP_\varphi$, there is a CP strategy
    $\varphi_i$ for $V$.›
    assume "V  CP_wrt φ"
    then obtain s where s: "s  𝒫" "learn_cp φ V s"
      using CP_wrt_def learn_cpE(1) by auto
    then obtain i where i: "φ i = s"
      using phi_universal by auto

    show False
    proof (cases "learn_lim φ {g i} (φ i)")
      case learn: True
      ― ‹If $\varphi_i$ learns $g_i$, it hypothesizes $g_i$ on
      some shortest prefix $g_i^m$. Thus it hypothesizes $g_i$ on some prefix
      of $g_i^m10^\infty \in V$, too. But $g_i$ is not a class-preserving
      hypothesis because $g_i \notin V$.›
      let ?P = "λn. φ (the (φ i ((g i)   n))) = g i"
      let ?m = "Least ?P"
      have "n. ?P n"
        using i s by (meson learn infinite_hyp_wrong_not_Lim insertI1 lessI)
      then have "?P ?m"
        using LeastI_ex[of ?P] by simp
      define h where "h = (prefix (g i) ?m) @ [1]  0"
      then have "h  V"
        using V_def learn by auto
      have "(g i)   ?m = h   ?m"
      proof -
        have "prefix (g i) ?m = prefix h ?m"
          unfolding h_def by (simp add: prefix_prepend_less)
        then show ?thesis by auto
      qed
      then have "φ (the (φ i (h   ?m))) = g i"
        using ?P ?m by simp
      moreover have "g i  V"
      proof
        assume "g i  V"
        then obtain j where j: "g i =
          (if learn_lim φ {g j} (φ j)
           then (prefix (g j) (LEAST n. φ (the (φ j ((g j)   n))) = g j)) @ [1]  0
           else g j)"
          using V_def by auto
        show False
        proof (cases "learn_lim φ {g j} (φ j)")
          case True
          then have "g i =
              (prefix (g j) (LEAST n. φ (the (φ j ((g j)   n))) = g j)) @ [1]  0"
              (is "g i = ?vs @ [1]  0")
            using j by simp
          moreover have len: "length ?vs > 0" by simp
          ultimately have "g i (length ?vs) ↓= 1"
            by (simp add: prepend_associative)
          moreover have "g i (length ?vs) ↓= 0"
            using g_def len by simp
          ultimately show ?thesis by simp
        next
          case False
          then show ?thesis
            using j g_inj learn by auto
        qed
      qed
      ultimately have "φ (the (φ i (h   ?m)))  V" by simp
      then have "¬ learn_cp φ V (φ i)"
        using h  V learn_cpE(3) by auto
      then show ?thesis by (simp add: i s(2))
    next
      ― ‹If $\varphi_i$ does not learn $g_i$, then $g_i\in V$.
      Hence $\varphi_i$ does not learn $V$.›
      case False
      then have "g i  V"
        using V_def by auto
      with False have "¬ learn_lim φ V (φ i)"
        using learn_lim_closed_subseteq by auto
      then show ?thesis
        using s(2) i by (simp add: learn_cp_def)
    qed
  qed
  then have "V  CP"
    using CP_wrt_phi by simp
  moreover have "V  U0"
    using V_def g_def U0_def by auto
  ultimately show ?thesis using U0_in_CP by auto
qed

text ‹Continuing with the main result of this section, we show that
@{term "U0"} cannot be learned finitely. Any FIN strategy would have
to output a hypothesis for the constant zero function on some prefix. But
@{term "U0"} contains infinitely many other functions starting with
the same prefix, which the strategy then would not learn finitely.›

lemma U0_not_in_FIN: "U0  FIN"
proof
  assume "U0  FIN"
  then obtain ψ s where "learn_fin ψ U0 s"
    using FIN_def by blast
  with learn_finE have cp: "f. f  U0 
      i n0. ψ i = f  (n<n0. s (f  n) ↓= 0)  (nn0. s (f  n) ↓= Suc i)"
    by simp_all

  define z where "z = []  0"
  then have "z  U0"
    using U0_def by auto
  with cp obtain i n0 where i: "ψ i = z" and n0: "nn0. s (z  n) ↓= Suc i"
    by blast

  define w where "w = replicate (Suc n0) 0 @ [1]  0"
  then have "prefix w n0 = replicate (Suc n0) 0"
    by (simp add: prefix_prepend_less)
  moreover have "prefix z n0 = replicate (Suc n0) 0"
    using prefixI[of "replicate (Suc n0) 0" z] less_Suc_eq_0_disj unfolding z_def
    by fastforce
  ultimately have "z   n0 = w  n0"
    by (simp add: init_prefixE)
  with n0 have *: "s (w  n0) ↓= Suc i" by auto

  have "w  U0" using w_def U0_def by auto
  with cp obtain i' n0' where i': "ψ i' = w"
    and n0': "n<n0'. s (w  n) ↓= 0" "nn0'. s (w  n) ↓= Suc i'"
    by blast

  have "i  i'"
  proof
    assume "i = i'"
    then have "w = z"
      using i i' by simp
    have "w (Suc n0) ↓= 1"
      using w_def prepend[of "replicate (Suc n0) 0 @ [1]" "0" "Suc n0"]
      by (metis length_append_singleton length_replicate lessI nth_append_length)
    moreover have "z (Suc n0) ↓= 0"
      using z_def by simp
    ultimately show False
      using w = z by simp
  qed
  then have "s (w  n0) ↓≠ Suc i"
    using n0' by (cases "n0 < n0'") simp_all
  with * show False by simp
qed

theorem FIN_subset_CP: "FIN  CP"
  using U0_in_CP U0_not_in_FIN FIN_subseteq_CP by auto


section ‹NUM and FIN are incomparable\label{s:num_fin}›

text ‹The class $V_0$ of all total recursive functions $f$ where $f(0)$
is a Gödel number of $f$ can be learned finitely by always hypothesizing
$f(0)$. The class is not in NUM and therefore serves to separate NUM and
FIN.›

definition V0 :: "partial1 set" (V0) where
  "V0 = {f. f    φ (the (f 0)) = f}"

lemma V0_altdef: "V0 = {[i]  f| i f. f    φ i = [i]  f}"
  (is "V0 = ?W")
proof
  show "V0  ?W"
  proof
    fix f
    assume "f  V0"
    then have "f  "
      unfolding V0_def by simp
    then obtain i where i: "f 0 ↓= i" by fastforce
    define g where "g = (λx. f (x + 1))"
    then have "g  "
      using skip_R1[OF f  ] by blast
    moreover have "[i]  g = f"
      using g_def i by auto
    moreover have "φ i = f"
      using f  V0 V0_def i by force
    ultimately show "f  ?W" by auto
  qed
  show "?W  V0"
  proof
    fix g
    assume "g  ?W"
    then have "φ (the (g 0)) = g" by auto
    moreover have "g  "
      using prepend_in_R1 g  ?W by auto
    ultimately show "g  V0"
      by (simp add: V0_def)
  qed
qed

lemma V0_in_FIN: "V0  FIN"
proof -
  define s where "s = (λx. Some (Suc (e_hd x)))"
  have "s  𝒫"
  proof -
    define r where "r = Cn 1 S [r_hd]"
    then have "recfn 1 r" by simp
    moreover have "eval r [x] ↓= Suc (e_hd x)" for x
      unfolding r_def by simp
    ultimately show ?thesis
      using s_def by blast
  qed
  have s: "s (f  n) ↓= Suc (the (f 0))" for f n
    unfolding s_def by simp
  have "learn_fin φ V0 s"
  proof (rule learn_finI)
    show "environment φ V0 s"
      using s_def s  𝒫 phi_in_P2 V0_def by auto
    show "i n0. φ i = f  (n<n0. s (f  n) ↓= 0)  (nn0. s (f  n) ↓= Suc i)"
        if "f  V0" for f
      using that V0_def s by auto
  qed
  then show ?thesis using FIN_def by auto
qed

text ‹To every @{term "f  "} a number can be prepended that is
a Gödel number of the resulting function. Such a function is then in $V_0$.

If $V_0$ was in NUM, it would be embedded in a total numbering. Shifting this
numbering to the left, essentially discarding the values at point $0$, would
yield a total numbering for @{term ""}, which contradicts @{thm[source]
R1_not_in_NUM}. This proves @{prop "V0  NUM"}.›

lemma prepend_goedel:
  assumes "f  "
  shows "i. φ i = [i]  f"
proof -
  obtain r where r: "recfn 1 r" "total r" "x. eval r [x] = f x"
    using assms by auto
  define r_psi where "r_psi = Cn 2 r_ifz [Id 2 1, Id 2 0, Cn 2 r [Cn 2 r_dec [Id 2 1]]]"
  then have "recfn 2 r_psi"
    using r(1) by simp
  have "eval r_psi [i, x] = (if x = 0 then Some i else f (x - 1))" for i x
  proof -
    have "eval (Cn 2 r [Cn 2 r_dec [Id 2 1]]) [i, x] = f (x - 1)"
      using r by simp
    then have "eval r_psi [i, x] = eval r_ifz [x, i, the (f (x - 1))]"
      unfolding r_psi_def using recfn 2 r_psi r R1_imp_total1[OF assms] by auto
    then show ?thesis
      using assms by simp
  qed
  with recfn 2 r_psi have "(λi x. if x = 0 then Some i else f (x - 1))  𝒫2"
    by auto
  with kleene_fixed_point obtain i where
    "φ i = (λx. if x = 0 then Some i else f (x - 1))"
    by blast
  then have "φ i = [i]  f" by auto
  then show ?thesis by auto
qed

lemma V0_in_FIN_minus_NUM: "V0  FIN - NUM"
proof -
  have "V0  NUM"
  proof
    assume "V0  NUM"
    then obtain ψ where ψ: "ψ  2" "f. f  V0  i. ψ i = f"
      by auto
    define ψ' where "ψ' i x = ψ i (Suc x)" for i x
    have "ψ'  2"
    proof
      from ψ(1) obtain r_psi where
        r_psi: "recfn 2 r_psi" "total r_psi" "i x. eval r_psi [i, x] = ψ i x"
        by blast
      define r_psi' where "r_psi' = Cn 2 r_psi [Id 2 0, Cn 2 S [Id 2 1]]"
      then have "recfn 2 r_psi'" and "i x. eval r_psi' [i, x] = ψ' i x"
        unfolding r_psi'_def ψ'_def using r_psi by simp_all
      then show "ψ'  𝒫2" by blast
      show "total2 ψ'"
        using ψ'_def ψ(1) by (simp add: total2I)
    qed
    have "i. ψ' i = f" if "f  " for f
    proof -
      from that obtain j where j: "φ j = [j]  f"
        using prepend_goedel by auto
      then have "φ j  V0"
        using that V0_altdef by auto
      with ψ obtain i where "ψ i = φ j" by auto
      then have "ψ' i = f"
        using ψ'_def j by (auto simp add: prepend_at_ge)
      then show ?thesis by auto
    qed
    with ψ'  2 have "  NUM" by auto
    with R1_not_in_NUM show False by simp
  qed
  then show ?thesis
    using V0_in_FIN by auto
qed

corollary FIN_not_subseteq_NUM: "¬ FIN  NUM"
  using V0_in_FIN_minus_NUM by auto


section ‹NUM and CP are incomparable\label{s:num_cp}›

text ‹There are FIN classes outside of NUM, and CP encompasses FIN.
Hence there are CP classes outside of NUM, too.›

theorem CP_not_subseteq_NUM: "¬ CP  NUM"
  using FIN_subseteq_CP FIN_not_subseteq_NUM by blast

text ‹Conversely there is a subclass of @{term "U0"} that
is in NUM but cannot be learned in a class-preserving way. The following
proof is due to Jantke and Beick~cite"jb-cpnii-81". The idea is to
diagonalize against all strategies, that is, all partial recursive
functions.›

theorem NUM_not_subseteq_CP: "¬ NUM  CP"
proof-
  ― ‹Define a family of functions $f_k$.›
  define f where "f  λk. [k]  0"
  then have "f k  " for k
    using almost0_in_R1 by auto

  ― ‹If the strategy $\varphi_k$ learns $f_k$ it hypothesizes
  $f_k$ for some shortest prefix $f_k^{a_k}$. Define functions $f'_k =
  k0^{a_k}10^\infty$.›
  define a where
    "a  λk. LEAST x. (φ (the ((φ k) ((f k)  x)))) = f k"
  define f' where "f'  λk. (k # (replicate (a k) 0) @ [1])  0"
  then have "f' k  " for k
    using almost0_in_R1 by auto

  ― ‹Although $f_k$ and $f'_k$ differ, they share the prefix of length $a_k + 1$.›
  have init_eq: "(f' k)  (a k) = (f k)  (a k)" for k
  proof (rule init_eqI)
    fix x assume "x  a k"
    then show "f' k x = f k x"
      by (cases "x = 0") (simp_all add: nth_append f'_def f_def)
  qed
  have "f k  f' k" for k
  proof -
    have "f k (Suc (a k)) ↓= 0" using f_def by auto
    moreover have "f' k (Suc (a k)) ↓= 1"
      using f'_def prepend[of "(k # (replicate (a k) 0) @ [1])" "0" "Suc (a k)"]
      by (metis length_Cons length_append_singleton length_replicate lessI nth_Cons_Suc
        nth_append_length)
    ultimately show ?thesis by auto
  qed

  ― ‹The separating class $U$ contains $f'_k$ if $\varphi_k$
  learns $f_k$; otherwise it contains $f_k$.›
  define U where
    "U  {if learn_lim φ {f k} (φ k) then f' k else f k |k. k  UNIV}"
  have "U  CP"
  proof
    assume "U  CP"
    have "k. learn_cp φ U (φ k)"
    proof -
      have "ψ s. learn_cp ψ U s"
        using CP_def U  CP by auto
      then obtain s where s: "learn_cp φ U s"
        using learn_cp_wrt_goedel[OF goedel_numbering_phi] by blast
      then obtain k where "φ k = s"
        using phi_universal learn_cp_def learn_lim_def by auto
      then show ?thesis using s by auto
    qed
    then obtain k where k: "learn_cp φ U (φ k)" by auto
    then have learn: "learn_lim φ U (φ k)"
      using learn_cp_def by simp
    ― ‹If $f_k$ was in $U$, $\varphi_k$ would learn it. But then,
    by definition of $U$, $f_k$ would not be in $U$. Hence $f_k \notin U$.›
    have "f k  U"
    proof
      assume "f k  U"
      then obtain m where m: "f k = (if learn_lim φ {f m} (φ m) then f' m else f m)"
        using U_def by auto
      have "f k 0 ↓= m"
        using f_def f'_def m by simp
      moreover have "f k 0 ↓= k" by (simp add: f_def)
      ultimately have "m = k" by simp
      with m have "f k = (if learn_lim φ {f k} (φ k) then f' k else f k)"
        by auto
      moreover have "learn_lim φ {f k} (φ k)"
        using f k  U learn_lim_closed_subseteq[OF learn] by simp
      ultimately have "f k = f' k"
        by simp
      then show False
        using f k  f' k by simp
    qed
    then have "f' k  U" using U_def by fastforce
    then have in_U: "n. φ (the ((φ k) ((f' k)  n)))  U"
      using learn_cpE(3)[OF k] by simp

    ― ‹Since $f'_k \in U$, the strategy $\varphi_k$ learns $f_k$.
    Then $a_k$ is well-defined, $f'^{a_k} = f^{a_k}$, and $\varphi_k$
    hypothesizes $f_k$ on $f'^{a_k}$, which is not a class-preserving
    hypothesis.›
    have "learn_lim φ {f k} (φ k)" using U_def f k  U by fastforce
    then have "i n0. φ i = f k  (nn0. φ k ((f k)  n) ↓= i)"
      using learn_limE(2) by simp
    then obtain i n0 where "φ i = f k  (nn0. φ k ((f k)  n) ↓= i)"
      by auto
    then have "φ (the (φ k ((f k)  (a k)))) = f k"
      using a_def LeastI[of "λx. (φ (the ((φ k) ((f k)  x)))) = f k" n0]
      by simp
    then have "φ (the ((φ k) ((f' k)  (a k)))) = f k"
      using init_eq by simp
    then show False
      using f k  U in_U by metis
  qed
  moreover have "U  NUM"
    using NUM_closed_subseteq[OF U0_in_NUM, of U] f_def f'_def U0_def U_def
    by fastforce
  ultimately show ?thesis by auto
qed


section ‹NUM is a proper subset of TOTAL\label{s:num_total}›

text ‹A NUM class $U$ is embedded in a total numbering @{term ψ}.
The strategy $S$ with $S(f^n) = \min \{i \mid \forall k \le n: \psi_i(k) =
f(k)\}$ for $f \in U$ converges to the least index of $f$ in @{term ψ},
and thus learns $f$ in the limit. Moreover it will be a TOTAL strategy
because @{term ψ} contains only total functions. This shows @{prop "NUM
 TOTAL"}.›

text ‹First we define, for every hypothesis space $\psi$, a
function that tries to determine for a given list $e$ and index $i$ whether
$e$ is a prefix of $\psi_i$. In other words it tries to decide whether $i$ is
a consistent hypothesis for $e$. ``Tries'' refers to the fact that the
function will diverge if $\psi_i(x)\uparrow$ for any $x \le |e|$. We start
with a version that checks the list only up to a given length.›

definition r_consist_upto :: "recf  recf" where
  "r_consist_upto r_psi 
    let g = Cn 4 r_ifeq
      [Cn 4 r_psi [Id 4 2, Id 4 0], Cn 4 r_nth [Id 4 3, Id 4 0], Id 4 1, r_constn 3 1]
    in Pr 2 (r_constn 1 0) g"

lemma r_consist_upto_recfn: "recfn 2 r_psi  recfn 3 (r_consist_upto r_psi)"
  using r_consist_upto_def by simp

lemma r_consist_upto:
  assumes "recfn 2 r_psi"
  shows "k<j. eval r_psi [i, k]  
      eval (r_consist_upto r_psi) [j, i, e] =
        (if k<j. eval r_psi [i, k] ↓= e_nth e k then Some 0 else Some 1)"
    and "¬ (k<j. eval r_psi [i, k] )  eval (r_consist_upto r_psi) [j, i, e] "
proof -
  define g where "g =
    Cn 4 r_ifeq
     [Cn 4 r_psi [Id 4 2, Id 4 0], Cn 4 r_nth [Id 4 3, Id 4 0], Id 4 1, r_constn 3 1]"
  then have "recfn 4 g"
    using assms by simp
  moreover have "eval (Cn 4 r_nth [Id 4 3, Id 4 0]) [j, r, i, e] ↓= e_nth e j" for j r i e
    by simp
  moreover have "eval (r_constn 3 1) [j, r, i, e] ↓= 1" for j r i e
    by simp
  moreover have "eval (Cn 4 r_psi [Id 4 2, Id 4 0]) [j, r, i, e] = eval r_psi [i, j]" for j r i e
    using assms(1) by simp
  ultimately have g: "eval g [j, r, i, e] =
    (if eval r_psi [i, j]  then None
     else if eval r_psi [i, j] ↓= e_nth e j then Some r else Some 1)"
    for j r i e
    using recfn 4 g g_def assms by auto
  have goal1: "k<j. eval r_psi [i, k]  
    eval (r_consist_upto r_psi) [j, i, e] =
      (if k<j. eval r_psi [i, k] ↓= e_nth e k then Some 0 else Some 1)"
    for j i e
  proof (induction j)
    case 0
    then show ?case
      using r_consist_upto_def r_consist_upto_recfn assms eval_Pr_0 by simp
  next
    case (Suc j)
    then have "eval (r_consist_upto r_psi) [Suc j, i, e] =
        eval g [j, the (eval (r_consist_upto r_psi) [j, i, e]), i, e]"
      using assms eval_Pr_converg_Suc g_def r_consist_upto_def r_consist_upto_recfn
      by simp
    also have "... = eval g [j, if k<j. eval r_psi [i, k] ↓= e_nth e k then 0 else 1, i, e]"
      using Suc by auto
    also have "... ↓= (if eval r_psi [i, j] ↓= e_nth e j
        then if k<j. eval r_psi [i, k] ↓= e_nth e k then 0 else 1 else 1)"
      using g by (simp add: Suc.prems)
    also have "... ↓= (if k<Suc j. eval r_psi [i, k] ↓= e_nth e k then 0 else 1)"
      by (simp add: less_Suc_eq)
    finally show ?case by simp
  qed
  then show "k<j. eval r_psi [i, k]  
    eval (r_consist_upto r_psi) [j, i, e] =
    (if k<j. eval r_psi [i, k] ↓= e_nth e k then Some 0 else Some 1)"
    by simp
  show "¬ (k<j. eval r_psi [i, k] )  eval (r_consist_upto r_psi) [j, i, e] "
  proof -
    assume "¬ (k<j. eval r_psi [i, k] )"
    then have "k<j. eval r_psi [i, k] " by simp
    let ?P = "λk. k < j  eval r_psi [i, k] "
    define kmin where "kmin = Least ?P"
    then have "?P kmin"
      using LeastI_ex[of ?P] k<j. eval r_psi [i, k]  by auto
    from kmin_def have "k. k < kmin  ¬ ?P k"
      using kmin_def not_less_Least[of _ ?P] by blast
    then have "k < kmin. eval r_psi [i, k] "
      using ?P kmin by simp
    then have "eval (r_consist_upto r_psi) [kmin, i, e] =
        (if k<kmin. eval r_psi [i, k] ↓= e_nth e k then Some 0 else Some 1)"
      using goal1 by simp
    moreover have "eval r_psi [i, kmin] "
      using ?P kmin by simp
    ultimately have "eval (r_consist_upto r_psi) [Suc kmin, i, e] "
      using r_consist_upto_def g assms by simp
    moreover have "j  kmin"
      using ?P kmin by simp
    ultimately show "eval (r_consist_upto r_psi) [j, i, e] "
      using r_consist_upto_def r_consist_upto_recfn ?P kmin eval_Pr_converg_le assms
      by (metis (full_types) Suc_leI length_Cons list.size(3) numeral_2_eq_2 numeral_3_eq_3)
  qed
qed

text ‹The next function provides the consistency decision functions we
need.›

definition consistent :: "partial2  partial2" where
  "consistent ψ i e 
    if k<e_length e. ψ i k 
    then if k<e_length e. ψ i k ↓= e_nth e k
         then Some 0 else Some 1
    else None"

text ‹Given $i$ and $e$, @{term "consistent ψ"} decides whether $e$
is a prefix of $\psi_i$, provided $\psi_i$ is defined for the length of
$e$.›

definition r_consistent :: "recf  recf" where
  "r_consistent r_psi 
     Cn 2 (r_consist_upto r_psi) [Cn 2 r_length [Id 2 1], Id 2 0, Id 2 1]"

lemma r_consistent_recfn [simp]: "recfn 2 r_psi  recfn 2 (r_consistent r_psi)"
  using r_consistent_def r_consist_upto_recfn by simp

lemma r_consistent_converg:
  assumes "recfn 2 r_psi" and "k<e_length e. eval r_psi [i, k] "
  shows "eval (r_consistent r_psi) [i, e] ↓=
    (if k<e_length e. eval r_psi [i, k] ↓= e_nth e k then 0 else 1)"
proof -
  have "eval (r_consistent r_psi) [i, e] = eval (r_consist_upto r_psi) [e_length e, i, e]"
    using r_consistent_def r_consist_upto_recfn assms(1) by simp
  then show ?thesis using assms r_consist_upto(1) by simp
qed

lemma r_consistent_diverg:
  assumes "recfn 2 r_psi" and "k<e_length e. eval r_psi [i, k] "
  shows "eval (r_consistent r_psi) [i, e] "
  unfolding r_consistent_def
  using r_consist_upto_recfn[OF assms(1)] r_consist_upto[OF assms(1)] assms(2)
  by simp

lemma r_consistent:
  assumes "recfn 2 r_psi" and "x y. eval r_psi [x, y] = ψ x y"
  shows "eval (r_consistent r_psi) [i, e] = consistent ψ i e"
proof (cases "k<e_length e. ψ i k ")
  case True
  then have "k<e_length e. eval r_psi [i, k] "
    using assms by simp
  then show ?thesis
    unfolding consistent_def using True by (simp add: assms r_consistent_converg)
next
  case False
  then have "consistent ψ i e "
    unfolding consistent_def by auto
  moreover have "eval (r_consistent r_psi) [i, e] "
    using r_consistent_diverg[OF assms(1)] assms False by simp
  ultimately show ?thesis by simp
qed

lemma consistent_in_P2:
  assumes "ψ  𝒫2"
  shows "consistent ψ  𝒫2"
  using assms r_consistent P2E[OF assms(1)] P2I r_consistent_recfn by metis

lemma consistent_for_R2:
  assumes "ψ  2"
  shows "consistent ψ i e =
    (if j<e_length e. ψ i j ↓= e_nth e j then Some 0 else Some 1)"
  using assms by (simp add: consistent_def)

lemma consistent_init:
  assumes "ψ  2" and "f  "
  shows "consistent ψ i (f  n) = (if ψ i  n = f  n then Some 0 else Some 1)"
  using consistent_def[of _ _ "init f n"] assms  init_eq_iff_eq_upto by simp

lemma consistent_in_R2:
  assumes "ψ  2"
  shows "consistent ψ  2"
  using total2I consistent_in_P2 consistent_for_R2[OF assms] P2_total_imp_R2 R2_imp_P2 assms
  by (metis option.simps(3))

text ‹For total hypothesis spaces the next function computes the
minimum hypothesis consistent with a given prefix. It diverges if no such
hypothesis exists.›

definition min_cons_hyp :: "partial2  partial1" where
  "min_cons_hyp ψ e 
    if i. consistent ψ i e ↓= 0 then Some (LEAST i. consistent ψ i e ↓= 0) else None"

lemma min_cons_hyp_in_P1:
  assumes "ψ  2"
  shows "min_cons_hyp ψ  𝒫"
proof -
  from assms consistent_in_R2 obtain rc where
    rc: "recfn 2 rc" "total rc" "i e. eval rc [i, e] = consistent ψ i e"
    using R2E[of "consistent ψ"] by metis
  define r where "r = Mn 1 rc"
  then have "recfn 1 r"
    using rc(1) by simp
  moreover from this have "eval r [e] = min_cons_hyp ψ e" for e
    using r_def eval_Mn'[of 1 rc "[e]"] rc min_cons_hyp_def assms
    by (auto simp add: consistent_in_R2)
  ultimately show ?thesis by auto
qed

text ‹The function @{term "min_cons_hyp ψ"} is a strategy for
learning all NUM classes embedded in @{term ψ}. It is an example of an
``identification-by-enumeration'' strategy.›

lemma NUM_imp_learn_total:
  assumes "ψ  2" and "U  NUM_wrt ψ"
  shows "learn_total ψ U (min_cons_hyp ψ)"
proof (rule learn_totalI)
  have ex_psi_i_f: "i. ψ i = f" if "f  U" for f
    using assms that NUM_wrt_def by simp
  moreover have consistent_eq_0: "consistent ψ i ((ψ i)  n) ↓= 0" for i n
    using assms by (simp add: consistent_init)
  ultimately have "f n. f  U  min_cons_hyp ψ (f  n) "
    using min_cons_hyp_def assms(1) by fastforce
  then show env: "environment ψ U (min_cons_hyp ψ)"
    using assms NUM_wrt_def min_cons_hyp_in_P1 NUM_E(1) NUM_I by auto

  show "f n. f  U  ψ (the (min_cons_hyp ψ (f  n)))  "
    using assms by (simp)

  show "i. ψ i = f  (n. min_cons_hyp ψ (f  n) ↓= i)" if "f  U" for f
  proof -
    from that env have "f  " by auto

    let ?P = "λi. ψ i = f"
    define imin where "imin  Least ?P"
    with ex_psi_i_f that have imin: "?P imin" "j. ?P j  j  imin"
      using LeastI_ex[of ?P] Least_le[of ?P] by simp_all
    then have f_neq: "ψ i  f" if "i < imin" for i
      using leD that by auto

    let ?Q = "λi n. ψ i  n  f  n"
    define nu :: "nat  nat" where "nu = (λi. SOME n. ?Q i n)"
    have nu_neq: "ψ i  (nu i)  f  (nu i)" if "i < imin" for i
    proof -
      from assms have "ψ i  " by simp
      moreover from assms imin(1) have "f  " by auto
      moreover have "f  ψ i"
        using that f_neq by auto
      ultimately have "n. f  n  (ψ i)  n"
        using neq_fun_neq_init by simp
      then show "?Q i (nu i)"
        unfolding nu_def using someI_ex[of "λn. ?Q i n"] by metis
    qed

    have "n0. nn0. min_cons_hyp ψ (f  n) ↓= imin"
    proof (cases "imin = 0")
      case True
      then have "n. min_cons_hyp ψ (f  n) ↓= imin"
        using consistent_eq_0 assms(1) imin(1) min_cons_hyp_def by auto
      then show ?thesis by simp
    next
      case False
      define n0 where "n0 = Max (set (map nu [0..<imin]))" (is "_ = Max ?N")
      have "nu i  n0" if "i < imin" for i
      proof -
        have "finite ?N"
          using n0_def by simp
        moreover have "?N  {}"
          using False n0_def by simp
        moreover have "nu i  ?N"
          using that by simp
        ultimately show ?thesis
          using that Max_ge n0_def by blast
      qed
      then have "ψ i  n0  f  n0" if "i < imin" for i
        using nu_neq neq_init_forall_ge that by blast
      then have *: "ψ i  n  f  n" if "i < imin" and "n  n0" for i n
        using nu_neq neq_init_forall_ge that by blast

      have "ψ imin  n = f  n" for n
        using imin(1) by simp
      moreover have "(consistent ψ i (f  n) ↓= 0) = (ψ i  n = f  n)" for i n
        by (simp add: f   assms(1) consistent_init)
      ultimately have "min_cons_hyp ψ (f  n) ↓= (LEAST i. ψ i  n = f  n)" for n
        using min_cons_hyp_def[of ψ "f  n"] by auto
      moreover have "(LEAST i. ψ i  n = f  n) = imin" if "n  n0" for n
      proof (rule Least_equality)
        show "ψ imin  n = f  n"
          using imin(1) by simp
        show "y. ψ y  n = f  n  imin  y"
          using imin * leI that by blast
      qed
      ultimately have "min_cons_hyp ψ (f  n) ↓= imin" if "n  n0" for n
        using that by blast
      then show ?thesis by auto
    qed
    with imin(1) show ?thesis by auto
  qed
qed

corollary NUM_subseteq_TOTAL: "NUM  TOTAL"
proof
  fix U
  assume "U  NUM"
  then have "ψ2. fU. i. ψ i = f" by auto
  then have "ψ2. U  NUM_wrt ψ"
    using NUM_wrt_def by simp
  then have "ψ s. learn_total ψ U s"
    using NUM_imp_learn_total by auto
  then show "U  TOTAL"
    using TOTAL_def by auto
qed

text ‹The class @{term V0} is in @{term "TOTAL - NUM"}. ›

theorem NUM_subset_TOTAL: "NUM  TOTAL"
  using CP_subseteq_TOTAL FIN_not_subseteq_NUM FIN_subseteq_CP NUM_subseteq_TOTAL
  by auto

end