Theory Boolos_Curious_Inference_Automated

section ‹Boolos Curious Proof Problem›

theory Boolos_Curious_Inference_Automated imports Main
begin   (* sledgehammer_params[timeout=60] *)

text‹First declare a non-empty type @{term "i"} of objects (natural numbers in the context of this paper).›
typedecl i


text ‹The signature for BCP consists of four uninterpreted constant symbols.›
consts
 e :: "i" ("e")  ―‹one›
 s :: "ii" ("s_")  ―‹successor function›
 f :: "iii" ("f__")  ―‹binary function; axiomatised below as Ackermann function›
 d :: "ibool" ("d_")  ―‹arbitrary uninterpreted unary predicate›


text ‹Axioms @{term "A1"}-@{term "A3"} model the Ackermann function and Axioms @{term "A4"} and @{term "A5"}
stipulate the properties of predicate @{term "d"}.›
axiomatization where
A1: "n. fne = se"  and   ―‹Axiom 1 for Ackermann function @{term "f"}
A2: "y. fesy = ssfey"  and   ―‹Axiom 2 for Ackermann function @{term "f"}
A3: "x y. fsxsy = fxf(sx)y"  and  ―‹Axiom 3 for Ackermann function @{term "f"}
A4: "de"  and  ―‹@{term "d"} (an arbitrary predicate) holds for one›
A5: "x. dx  dsx"  ―‹if @{term "d"} holds for @{term "x"} it also holds for the successor of @{term "x"}

text ‹Trying to prove automatically with \textit{Sledgehammer} @{cite Sledgehammer} that @{term "d"} holds
for fssssesssse still fails at this point. As Boolos'
explains, a naive first-order proof would require more modus ponens steps (with A5 and A4) than
there are atoms in the
universe.›
lemma "dfssssesssse" ―‹sledgehammer› oops  ―‹no proof found; timeout›

section ‹Automated Proof: Using Two Definitions›

text ‹We interactively provide two shorthand notations @{term "ind"} and @{term "p"}. After their
introduction a proof can be found fully automatically with \textit{Sledgehammer}. @{term "ind X"}
is defined to hold if
and only if  @{term "X"} is `inductive' over  @{term "e"} and  @{term "s"}.
 @{term "pxy"} holds if and only if @{term "pxy"} is in smallest inductive set
over  @{term "e"} and  @{term "s"}.
Note that the symbols @{term "ind"} and @{term "p"} do not occur in the BCP problem
statement.›
definition ind ("ind_") where "ind  λX. Xe  (x. X x  X sx)"
definition p ("p_") where "p  λx y. (λz::i. (X. ind X  X z)) f x y"

text ‹Using these definitions, state-of-art higher-order ATPs integrated with Isabelle/HOL can now fully
automatically prove Boolos' Curious Problem:›
theorem "dfssssesssse"  ―‹@{term sledgehammer} (*
   sledgehammer [z3]  (* z3 found a proof... *)
   sledgehammer [vampire]  (* vampire found a proof... *)
   sledgehammer [remote_leo2]  (* remote_leo2 found a proof... *)
   sledgehammer [e] (* e found a proof ... *)
   sledgehammer [zipperposition]  (* zipperposition found a proof... *)
   sledgehammer [cvc4]  (* No proof found *)
   sledgehammer [verit] (* No proof found *)
   sledgehammer [spass] (* No proof found *)
   sledgehammer [remote_leo3]  (* No proof found *)  *)
 by (metis A1 A2 A3 A4 A5 ind_def p_def)  ―‹metis proof reconstruction succeeds›

text ‹In experiments (using a MacBook Pro (16-inch, 2019), 2,6 GHz 6-Core Intel Core i7,
16 GB 2667 MHz DDR4) running Isabelle2022 automatically found proofs were reported by various theorem
provers, including
@{term "Z3"} @{cite Z3},
@{term "Vampire"} @{cite Vampire},
@{term "Zipperposition"} @{cite Zipperpin},
@{term "E"} @{cite Eprover},
and Leo-II (@{term "remote_leo2"}) @{cite Leo2}.›

section ‹Automated Proof: Using a Single Definition›

definition p' ("p'_") where
  "p'  λx y. (λz::i. (X. (Xe  (x. X x  X sx))  X z)) f x y"

theorem "dfssssesssse"  ―‹@{term "sledgehammer (A1 A2 A3 A4 A5 p'_def)"}  (*
  sledgehammer[remote_leo2]  (A1 A2 A3 A4 A5 p'_def)
 ―‹sledgehammer [z3] (A1 A2 A3 A4 A5 p'\_def)   (* z3 found a proof... *)
   sledgehammer [vampire] (A1 A2 A3 A4 A5 p'\_def)  (* vampire found a proof... *)
   sledgehammer [remote\_leo2] (A1 A2 A3 A4 A5 p'\_def)  (* remote\_leo2 found a proof... *)
   sledgehammer [zipperposition] (A1 A2 A3 A4 A5 p'\_def)  (* zipperposition found a proof... *)
   sledgehammer [e] (A1 A2 A3 A4 A5 p'\_def)  (* e found a proof ... *)
   sledgehammer [cvc4] (A1 A2 A3 A4 A5 p'\_def)  (* No proof found *)
   sledgehammer [verit] (A1 A2 A3 A4 A5 p'\_def)  (* No proof found *)
   sledgehammer [spass] (A1 A2 A3 A4 A5 p'\_def)  (* No proof found *)
   sledgehammer [remote\_leo3] (A1 A2 A3 A4 A5 p'\_def)  (* No proof found *)› *)
  by (smt A1 A2 A3 A4 A5 p'_def)  ―‹smt proof reconstruction succeeds›

text ‹In experiments (using the same environment as above) several provers reported
proofs, including @{term "Z3"} and @{term "E"}.›


section‹Proof Reconstruction: E's Proof from @{cite BCI}
text ‹In this section we reconstruct and verify in Isabelle/HOL the proof argument found by E
as reported in @{cite BCI}. Analysing E's proof we can identify the following five lemmata:›
lemma L1a: "ind d" by (simp add: A4 A5 ind_def)
lemma L1b: "x. pxe" by (simp add: A1 ind_def p_def)
lemma L1c: "ind pe" by (metis A2 L1b ind_def p_def)
lemma L2: "x Y. (ind px  ind psx  ind Y)  Yfsxsssse" by (metis ind_def p_def)
lemma L3: "x. ind px  ind psx" by (metis A3 L1b ind_def p_def)


text ‹Using these lemmata E then constructs the following refutation argument:›
theorem "dfssssesssse"
  proof -
    { assume L4: "¬dfssssesssse"
      have L5: "¬ind psssse  ¬ind pssse" using L1a L1b L1c L2 L4 by blast
      have L6: "¬ind pssse  ¬ind psse  ¬ind pse" using L3 L5 by blast
      have "False" using L1c L3 L6 by auto
    }
  then show ?thesis by blast
  qed

text ‹This refutation argument can alternatively be replaced by:›
theorem "dfssssesssse"
  proof -
    have L7: "ind pse" using L1c L3 by blast
    have L8: "ind psse" using L3 L7 by blast
    have L9: "ind pssse" using L3 L8 by blast
    have L10: "ind psssse" using L3 L9 by blast
    have L11: "(ind pssse  ind psssse  ind d)" using L10 L1a L9 by blast
  then show ?thesis using L2 by blast
qed


text ‹Lemma @{term "L2"} can actually be simplified (this was also hinted at by an anonymous
reviewer of @{cite J63}):›
lemma L2_1: "x Y. (ind px  ind Y)  Yfxe" by (metis ind_def p_def)
lemma L2_2: "x Y. (ind px  ind Y)  Yfxse" by (metis ind_def p_def)
lemma L2_3: "x Y. (ind px  ind Y)  Yfxsse" by (metis ind_def p_def)
lemma L2_4: "x Y. (ind px  ind Y)  Yfxssse" by (metis ind_def p_def)
lemma L2_5: "x Y. (ind px  ind Y)  Yfxsssse" by (metis ind_def p_def)
text ‹etc.›

text ‹The following statement, however, has a countermodel.›
lemma L2_1: "x y Y. (ind px  ind Y)  Yf x y"
  nitpick[user_axioms,expect=genuine] oops ―‹countermodel by nitpick›


  text ‹Instead of using @{term "L2"} we can now use @{term "L2_5"} in our proof of BCP from above:›

theorem "dfssssesssse"
  proof -
    have L7: "ind pse" using L1c L3 by blast
    have L8: "ind psse" using L3 L7 by blast
    have L9: "ind pssse" using L3 L8 by blast
    have L10: "ind psssse" using L3 L9 by blast
    have L11: "(ind pssse  ind psssse  ind d)" using L10 L1a L9 by blast
  then show ?thesis using L2_5 by blast
qed



section‹Conclusion›
text ‹Isabelle/HOL data sources were provided in relation to @{cite J63}, which describes recent progress
and remaining challenges in automating Boolos' Curious Problem.
The interactive introduction of lemmas, as still exercised in @{cite KetlandAFP} and earlier
in @{cite BoolosOmegaMizar}, is no longer necessary, since higher-order theorem provers are
now able to speculate the required lemmas automatically, provided that appropriate shorthand
notations are provided (see the definitions of @{term "ind"} and @{term "p"}).

Since Boolos' example for speeding up proofs is interesting in several respects, it is now being used as
an exercise in lecture courses at several universities (including University of Bamberg, University
of Greifswald, University of Luxembourg, Free University of Berlin); having our source files
permanently maintained in AFP hence makes sense.›

end