Theory Simple_DFS

section ‹\isaheader{Simple DFS Algorithm}›
theory Simple_DFS
imports 
  Collections.Refine_Dflt 
begin



section ‹Graphs Implemented by Successor Function›
subsection ‹Refinement relation›
definition "E_of_succ succ  {(u,v). vsucc u}"
definition [to_relAPP]: "succg_rel R  (R  Rlist_set_rel) O br E_of_succ (λ_. True)"


consts i_graph :: "interface  interface"
  ― ‹Define the conceptual type of graphs.›

lemmas [autoref_rel_intf] = REL_INTFI[of "succg_rel" i_graph]
  ― ‹Declare succg_rel› to be a relator for graphs.›


lemma in_id_succg_rel_iff: "(s,E)Idsuccg_rel  (v. distinct (s v)  set (s v) = E``{v})"  
  ― ‹Simplification in case of identity refinements for nodes›
  unfolding succg_rel_def br_def E_of_succ_def list_set_rel_def
  by (auto; force dest: fun_relD)
  
  
subsection ‹Successor Operation›
definition [simp]: "op_succ E u  E``{u}"
  ― ‹Define the abstract successor operation.›

context begin interpretation autoref_syn .
  lemma [autoref_op_pat]: "E``{v}  op_succ$E$v" by simp
  ― ‹Declare a rewrite rule to operation identification.›
end

lemma refine_succg_succs[autoref_rules]: 
  "(λsuccs v. succs v,op_succ)Rsuccg_relRRlist_set_rel"
  ― ‹Declare implementation of successor function to Autoref.›
  apply (intro fun_relI)
  apply (auto simp add: succg_rel_def br_def E_of_succ_def dest: fun_relD)
  done

section ‹DFS Algorithm›
text ‹
  We define a simple DFS-algorithm, prove a simple correctness
  property, and do data refinement to an efficient implementation.
›

subsection ‹Definition›

text ‹Recursive DFS-Algorithm. 
  E› is the edge relation of the graph, tgt› the node to 
  search for, and src› the start node.
  Already explored nodes are 
  stored in V›.›

context 
  fixes E :: "('v×'v) set" and src :: 'v and tgt :: 'v
begin
  definition dfs :: "bool nres" where
    "dfs  do {
      (_,r)  RECT (λdfs (V,v). 
        if v=tgt then RETURN (V,True)
        else do {
          let V = insert v V;
          FOREACHc (E``{v}) (λ(V,brk). ¬brk) (λv' (V,brk). 
            if v'V then dfs (V,v') else RETURN (V,False)
          ) (V,False)
        }) ({},src);
      RETURN r
    }"

  definition "reachable  {v. (src,v)E*}"

subsection ‹Correctness Proof›  
  lemma dfs_correct:
    assumes FIN: "finite (E*``{src})"
    shows "dfs  SPEC (λr. r  (src,tgt)E*)"
  proof -
    from FIN have [simp, intro!]: "finite reachable"
      unfolding reachable_def by (auto simp: Image_def)

    text ‹We first define the (inductive) pre and postconditions for 
      the recursion, and the loop invariant›  
          
    define rpre where "rpre  λS (V,v). 
        vreachable - V 
       Vreachable
       SV
       tgtV
       E``(V-S)  V"

    define rpost where "rpost  λS (V,v) (V',r). 
          (rtgtreachable) 
         (¬r  
            tgtV'
           VV' 
           vV'
           V'reachable
           E``(V'-S)  V'
          )
      "

    define fe_inv where "fe_inv  λS V v it (V',r).
        (rtgtreachable)
       (¬r  
          tgt  V'
         insert v VV'
         E``{v} - it  V'
         V'reachable
         Sinsert v V
         E``(V'-S)  V'  it 
         E``(V'-insert v S)  V'
        )"

    text ‹Then we prove the verification conditions that our VCG will generate as separate facts.
      Of course, the workflow is to first let run the VCG, and then extract these facts from 
      its output. This way, we can make explicit the ideas of the proof, and present them separately 
      from the mainly technical VC generation.
     ›
        
    have vc_pre_initial: "rpre {} ({}, src)"
      by (auto simp: rpre_def reachable_def)

    {
      (* Case: Found node *)
      fix S V
      assume "rpre S (V,tgt)"
      hence "rpost S (V,tgt) (insert tgt V, True)"
        unfolding rpre_def rpost_def
        by auto
    } note vc_node_found = this

    { (* The set of nodes that the foreach loop iterates over is finite *)
      fix S V v
      assume "rpre S (V, v)"
      hence "finite (E``{v})"
        unfolding rpre_def reachable_def
        using FIN  
        apply auto
        by (meson finite_Image_subset finite_reachable_advance r_le_rtrancl)
    } note vc_foreach_finite = this
  
    {
      (* fe_inv initial *)
      fix S V v
      assume A: "v  V" "v  tgt"
        and PRE: "rpre S (V, v)"
      hence "fe_inv S V v (E``{v}) (insert v V, False)"
        unfolding fe_inv_def rpre_def by auto
    } note vc_enter_foreach = this

    {
      (* fe_inv ensures rpre *)
      fix S V v v' it V'
      assume A: "v'V'" "v'  it" "it  E``{v}"
        and FEI: "fe_inv S V v it (V', False)"
        and PRE: "rpre S (V, v)"

      from A have "(v,v')E" by auto
      moreover from PRE have "v  reachable" by (auto simp: rpre_def)
      hence "E``{v}  reachable" by (auto simp: reachable_def)
      ultimately have [simp]: "v'reachable" by blast

      from A FEI PRE have "rpre (insert v S) (V', v')"
        unfolding rpre_def fe_inv_def by auto
    } note vc_rec_pre = this

    {
      (* rpost implies fe_inv *)
      fix S V V' v v' it Vr''
      assume "fe_inv S V v it (V', False)"
        and "rpost (insert v S) (V', v') Vr''"
      hence "fe_inv S V v (it - {v'}) Vr''"
        unfolding rpre_def rpost_def fe_inv_def by clarsimp (auto; blast)
    } note vc_iterate_foreach = this

    { (* Recursive call in variant relation *)
      fix S V V' v it
      assume "rpre S (V, v)"  
      and "fe_inv S V v it (V', False)"
      hence "(V',V)finite_psupset local.reachable"
        unfolding fe_inv_def rpre_def
        by (auto simp: finite_psupset_def)
      
    } note vc_rec_var = this
      
    {
      (* fe_inv preserved if ignoring visitied node*)
      fix S V V' v v' it Vr''
      assume "fe_inv S V v it (V', False)"
        and "v'V'"
      hence "fe_inv S V v (it - {v'}) (V', False)"
        unfolding fe_inv_def by auto
    } note vc_foreach_ignore = this


    {
      (* fe_inv (completed) implies rpost *)
      fix S V v V'
      assume FEI: "fe_inv S V v {} (V', False)"
      have "rpost S (V, v) (V', False)"
        unfolding rpost_def
        using FEI by (auto simp: fe_inv_def) []
    } note vc_foreach_completed_imp_post = this

    {
      (* fe_inv (interrupted) implies rpost *)
      fix S V v V' it
      assume FEI: "fe_inv S V v it (V', True)"
      hence "rpost S (V, v) (V', True)"
        by (auto simp add: rpost_def fe_inv_def) []
    } note vc_foreach_interrupted_imp_post = this

    { (* The postcondition of the recursion implies our desired specification *)
      fix V r
      assume "rpost {} ({}, src) (V, r)"
      hence "r = ((src, tgt)  E*)"
        by (auto 
          simp: rpost_def reachable_def 
          dest: Image_closed_trancl 
          intro: rev_ImageI)
    } note vc_rpost_imp_spec = this

    text ‹The verification condition generation is technical. We invoke the VCG,
      and discharge the generated VCs. The trivial ones are discharged on the spot, the
      more complicated ones have been extracted to separate facts, that we use in the proof 
      text below.
    ›
    show ?thesis
      unfolding dfs_def
      apply (refine_vcg RECT_rule_arb[
        where 
              pre=rpre and arb="{}"
          and M="λa x. SPEC (rpost a x)"
          and V="inv_image (finite_psupset reachable) fst", 
        THEN order_trans
        ])
      subgoal by blast  (* Well-Foundedness  *)
      subgoal by (rule vc_pre_initial)
      subgoal by (auto simp add: rpre_def rpost_def) (* Found tgt node *)
      
      subgoal for f S _ V v (* Entering inner FOREACH loop *)
        apply (refine_rcg refine_vcg 
          FOREACHc_rule'[where I="fe_inv S V v" (*for S V*)]
          )
        apply clarsimp_all

        subgoal by (simp add: vc_foreach_finite) (* Finiteness of iteration set *)

        subgoal by (rule vc_enter_foreach) (simp add: rpre_def) (* Loop invar holds initially*)
        subgoal (* Loop invar preserved by inner recursion *)
          apply (rule order_trans, rprems)   (* Applying recursion induction hypothesis *)
          apply (erule (4) vc_rec_pre; fail) (* Precondition of inner recursion holds*)
          apply (simp add: vc_rec_var; fail) (* Parameters are smaller wrt termination ordering *)
          apply (auto simp: vc_iterate_foreach; fail) (* Postcondition of inner rec implies loop invar again  *)
          done
          
        subgoal by (rule vc_foreach_ignore; auto)  (* Node already visited: invariant is preserved *)
        subgoal by (auto simp: vc_foreach_completed_imp_post) (* Foreach loop terminated normally: Implies postcondition *)
        subgoal by (auto simp: vc_foreach_interrupted_imp_post) (* Foreach loop interrupted (tgt found): Implies postcondition *)
        done
      subgoal by (auto simp add: vc_rpost_imp_spec) (* Postcondition implies our specification *)
      done
    qed
    
end


subsection ‹Data Refinement and Determinization›

text ‹
  Next, we use automatic data refinement and transfer to generate an
  executable algorithm. We fix the node-type to natural numbers,
  and the successor-function to return a list-set. 
  The implementation of the visited set is left open, and Autoref's heuristics
  will choose one (default for nat set: red-black-trees).
›

text ‹In our first example, we use autoref_monadic›, which combines the 
  Autoref tool and the determinization of the Monadic Refinement Framework.›
  
schematic_goal dfs_impl_refine_aux:
  fixes succi and E :: "('a::linorder × 'a) set" and tgt src :: 'a
  assumes [autoref_rules]: "(succi,E)Idsuccg_rel"
  notes [autoref_rules] = IdI[of src] IdI[of tgt]
  shows "RETURN (?f::?'c)  ?R (dfs E src tgt)"
  unfolding dfs_def by autoref_monadic 

text ‹We define a new constant from the synthesis result›
concrete_definition dfs_impl for succi src tgt uses dfs_impl_refine_aux
text ‹Set up code equations for the recursion combinators›
prepare_code_thms dfs_impl_def
text ‹And export the algorithm to all supported target languages›
export_code dfs_impl in Haskell
export_code dfs_impl checking SML OCaml? Haskell? Scala


text ‹Chaining the refinement theorems, we get correctness arguments that 
  are almost independent of the refinement framework:›

lemma succ_ran_fin:
  assumes R: "(succi,E)  Rvsuccg_rel"
  assumes "vRange Rv"
  shows "finite (E``{v})"
  using assms
  unfolding succg_rel_def br_def E_of_succ_def
  apply clarsimp
  apply (drule (1) fun_relD)
  using list_set_rel_range[of Rv]
  by auto

lemma run_ran_aux:
  assumes R: "(succi,E)  Rvsuccg_rel"
  assumes REACH: "(src,v)E*"
  assumes R0: "(v0i,src)  Rv"
  shows "v  Range Rv"
  using REACH R0
proof (induction arbitrary: v0i rule: converse_rtrancl_induct)
  case base thus ?case by auto
next
  case (step src v')  
  from (src, v')  E have "v'  Range Rv" using R list_set_rel_range[of Rv]
    apply (clarsimp simp: succg_rel_def br_def E_of_succ_def)
    apply (drule fun_relD[OF _ (v0i, src)  Rv])
    by auto
  with step.IH show ?thesis by blast
qed

lemma run_ran_fin: 
  assumes R: "(succi,E)  Rvsuccg_rel"
  assumes R0: "(v0i,src)  Rv"
  shows "v. (src,v)E*  finite (E``{v})"
  using succ_ran_fin run_ran_aux assms by metis

text ‹Correctness theorem presented in the paper:›
theorem dfs_code_correct: 
  assumes SUCCI: "(succi,E)Idsuccg_rel"
  assumes FIN: "finite (E*``{src})"  
  shows "dfs_impl succi src tgt  (src,tgt)E*"
proof -
  note dfs_impl.refine[OF SUCCI, of src tgt]
  also note dfs_correct[OF FIN]
  finally show ?thesis by (auto simp: split: dres.split)
qed

subsubsection ‹Using only Autoref›

text ‹Here we show the result of Autoref, without the determinization phase of
  the Monadic Refinement Framework: ›
schematic_goal 
  fixes succi and E :: "('a::linorder × 'a) set" and tgt src :: 'a
  assumes [autoref_rules]: "(succi,E)Idsuccg_rel"
  notes [autoref_rules] = IdI[of src] IdI[of tgt]
  shows "(?f::?'c, dfs E src tgt)  ?R"
  unfolding dfs_def[abs_def] 
  apply (autoref (trace))
  done

subsubsection ‹Choosing Different Implementations›
text ‹Ad-hoc override of implementation selection heuristics: Using hashset for the visited set›  
schematic_goal dfs_impl_refine_aux2:
  fixes succi and E :: "(('a::hashable) × 'a) set" and tgt src :: 'a
  assumes [autoref_rules]: "(succi,E)Idsuccg_rel"
  notes [autoref_rules] = IdI[of src] IdI[of tgt]
  notes [autoref_tyrel] = ty_REL[where 'a="'a set" and R="Iddflt_ahs_rel"] 
  shows "(?f::?'c, dfs E src tgt)  ?R"
  unfolding dfs_def[abs_def] 
  apply autoref_monadic
  done
  

text ‹We can also leave the type of the nodes and its implementation
  unspecified. However, we need a comparison operator on nodes›
  
(* With linorder typeclass on abstract type *)  
schematic_goal dfs_impl_refine_aux3:
  fixes succi and E :: "('a::linorder × 'a) set" 
    and Rv :: "('ai×'a) set"
  assumes [autoref_rules_raw]: "(cmpk, dflt_cmp (≤) (<))(RvRvId)"
  notes [autoref_tyrel] = ty_REL[where 'a="'a set" and R="Rvdflt_rs_rel"]
  assumes P_REF[autoref_rules]: 
    "(succi,E)Rvsuccg_rel"
    "(vdi,tgt::'a)Rv"
    "(v0i,src)Rv"
  shows "(RETURN (?f::?'c), dfs E src tgt)?R"
  unfolding dfs_def[abs_def]
  by autoref_monadic

(* With arbitrary cmpk' operator on abstract type, not forcing a linorder typeclass instance.
  Useful if there are multiple possible instantiations of a typeclass (eg for product ordering), 
  and one does not want to commit to one.
*)  
schematic_goal dfs_impl_refine_aux3':
  fixes succi and E :: "('a × 'a) set" 
    and Rv :: "('ai×'a) set"
  assumes [autoref_ga_rules]: "eq_linorder cmpk'"
  assumes [autoref_rules_raw]: "(cmpk, cmpk')(RvRvcomp_res_rel)"
  notes [autoref_tyrel] = ty_REL[where 'a="'a set" and R="Rvmap2set_rel (rbt_map_rel (comp2lt cmpk'))"]
  assumes P_REF[autoref_rules]: 
    "(succi,E)Rvsuccg_rel"
    "(vdi,tgt::'a)Rv"
    "(v0i,src)Rv"
  shows "(RETURN (?f::?'c), dfs E src tgt)?R"
  unfolding dfs_def[abs_def]
  by autoref_monadic
  
  
text ‹We also generate code for the alternative implementations›
concrete_definition dfs_impl2 for succi src tgt uses dfs_impl_refine_aux2
concrete_definition dfs_impl3 for cmpk succi v0i vdi uses dfs_impl_refine_aux3
concrete_definition dfs_impl3' for cmpk succi v0i vdi uses dfs_impl_refine_aux3'

prepare_code_thms dfs_impl2_def
prepare_code_thms dfs_impl3_def
prepare_code_thms dfs_impl3'_def

export_code dfs_impl dfs_impl2 dfs_impl3 dfs_impl3' checking SML OCaml? Haskell? Scala

text ‹And we prove the alternative implementations correct ›

theorem dfs_code2_correct: 
  assumes SUCCI: "(succi,E)Idsuccg_rel"
  assumes FIN: "finite (E*``{src})"  
  shows "dfs_impl2 succi src tgt  (src,tgt)E*"
proof -
  note dfs_impl2.refine[OF SUCCI, of src tgt, THEN nres_relD]
  also note dfs_correct[OF FIN]
  finally show ?thesis by (auto simp: split: dres.split)
qed
  
theorem dfs_code3_correct: 
  fixes succi and succ :: "'a::linorder  'a set" 
    and Rv :: "('ai×'a) set"
  assumes V0: "(v0i,src)Rv"
  assumes VD: "(vdi,tgt)Rv"
  assumes SUCCI: "(succi,E)Rvsuccg_rel"
  assumes CMP: "(cmpk, dflt_cmp (≤) (<))(RvRvId)"
  assumes FIN: "finite (E*``{src})"  
  shows "dfs_impl3 cmpk succi v0i vdi  (src,tgt)E*"
proof -
  note dfs_impl3.refine[OF CMP SUCCI VD V0, THEN nres_relD]
  also note dfs_correct[OF FIN]
  finally show ?thesis by (auto simp: split: dres.split)
qed

theorem dfs_code3'_correct: 
  fixes succi and succ :: "'a::linorder  'a set" 
    and Rv :: "('ai×'a) set"
  assumes V0: "(v0i,src)Rv"
  assumes VD: "(vdi,tgt)Rv"
  assumes SUCCI: "(succi,E)Rvsuccg_rel"
  assumes CGA: "eq_linorder cmpk'"
  assumes CMP: "(cmpk, cmpk')  RvRvcomp_res_rel"
  assumes FIN: "finite (E*``{src})"  
  shows "dfs_impl3' cmpk succi v0i vdi  (src,tgt)E*"
proof -
  note dfs_impl3'.refine[OF CGA CMP SUCCI VD V0, THEN nres_relD]
  also note dfs_correct[OF FIN]
  finally show ?thesis by (auto simp: split: dres.split)
qed


(* Reachability *)  

definition [simp]: "op_reachable E u v  (u,v)E*"
context begin interpretation autoref_syn .
  lemma [autoref_op_pat]: "(u,v)E*  op_reachable$E$u$v" by simp


  (* We use a quite general setup here, working with any linearly ordered
    abstract node type, refined by any relation. *)
  theorem dfs_code3_correct_rl[autoref_rules]: 
    fixes succi and succ :: "'a::linorder  'a set" 
      and Rv :: "('ai×'a) set"
    assumes V0: "(v0i,src)Rv"
    assumes VD: "(vdi,tgt)Rv"
    assumes SUCCI: "(succi,E)Rvsuccg_rel"
    assumes CGA: "SIDE_GEN_ALGO (eq_linorder cmpk')"
    assumes CMP: "GEN_OP cmpk cmpk' (RvRvcomp_res_rel)"
    assumes FIN: "SIDE_PRECOND (finite (E*``{src}))"  
    shows "(dfs_impl3' cmpk succi v0i vdi, 
          (OP op_reachable ::: Rvsuccg_rel  Rv  Rv  bool_rel)$E$src$tgt)
           bool_rel"
    using dfs_code3'_correct[OF V0 VD SUCCI, of cmpk' cmpk] CGA CMP FIN
    unfolding autoref_tag_defs by simp
    
end


end