Theory Breadth_First_Search

section ‹Breadth First Search›
theory Breadth_First_Search
imports "../Refine_Monadic"
begin
  
text ‹
  This is a slightly modified version of Task~5 of our submission to
  the VSTTE 2011 verification competition 
  (@{url "https://sites.google.com/site/vstte2012/compet"}). The task was to
  formalize a breadth-first-search algorithm.
›

text ‹
  With Isabelle's locale-construct, we put ourselves into a context where 
  the succ›-function is fixed. 
  We assume finitely branching graphs here, as our
  foreach-construct is only defined for finite sets.
›
locale Graph =
  fixes succ :: "'vertex  'vertex set"
  assumes [simp, intro!]: "finite (succ v)"
begin


  subsection ‹Distances in a Graph›
  text ‹
    We start over by defining the basic notions of paths and shortest paths.
›

  text ‹A path is expressed by the dist›-predicate. Intuitively,
    dist v d v'› means that there is a path of length d›
    between v› and v'›.

    The definition of the dist›-predicate is done inductively, i.e.,
    as the least solution of the following constraints:
›
  inductive dist :: "'vertex  nat  'vertex  bool" where
    dist_z: "dist v 0 v" |
    dist_suc: "dist v d vh; v'  succ vh   dist v (Suc d) v'"

  text ‹
    Next, we define a predicate that expresses that the shortest path between
    v› and v'› has length d›.
    This is the case if there is a path of length d›, but there
    is no shorter path.
›
  definition "min_dist v v' = (LEAST d. dist v d v')"
  definition "conn v v' = (d. dist v d v')"

  subsubsection ‹Properties›
  text ‹
    In this subsection, we prove some properties of paths.
›

  lemma
    shows connI[intro]: "dist v d v'  conn v v'"
      and connI_id[intro]: "conn v v"
      and connI_succ[intro]: "conn v v'  v''  succ v'  conn v v''"
    by (auto simp: conn_def intro: dist_suc dist_z)

  lemma min_distI2: 
    " conn v v' ; d.  dist v d v'; d'. dist v d' v'  d  d'   Q d  
       Q (min_dist v v')"
    unfolding min_dist_def
    by (rule LeastI2_wellorder[where Q=Q and a="SOME d. dist v d v'"])
       (auto simp: conn_def intro: someI)

  lemma min_distI_eq:
    " dist v d v'; d'. dist v d' v'  d  d'   min_dist v v' = d"
    by (force intro: min_distI2 simp: conn_def)

  text ‹Two nodes are connected by a path of length 0›, 
    iff they are equal.›
  lemma dist_z_iff[simp]: "dist v 0 v'  v'=v"
    by (auto elim: dist.cases intro: dist.intros)

  text ‹The same holds for min_dist›, i.e., 
    the shortest path between two nodes has length 0›, 
    iff these nodes are equal.›
  lemma min_dist_z[simp]: "min_dist v v = 0"
    by (rule min_distI2) (auto intro: dist_z)

  lemma min_dist_z_iff[simp]: "conn v v'  min_dist v v' = 0  v'=v" 
    by (rule min_distI2) (auto intro: dist_z)
    
  lemma min_dist_is_dist: "conn v v'  dist v (min_dist v v') v'"
    by (auto intro: min_distI2)
  lemma min_dist_minD: "dist v d v'  min_dist v v'  d"
    by (auto intro: min_distI2)

  text ‹We also provide introduction and destruction rules for the
    pattern min_dist v v' = Suc d›.
›

  lemma min_dist_succ: 
    " conn v v'; v''  succ v'   min_dist v v''  Suc (min_dist v v') "
    by (rule min_distI2[where v'=v'])
       (auto elim: dist.cases intro!: min_dist_minD dist_suc)

  lemma min_dist_suc:
    assumes c: "conn v v'" "min_dist v v' = Suc d"
    shows "v''. conn v v''  v'  succ v''  min_dist v v'' = d"
  proof -
    from min_dist_is_dist[OF c(1)]
    have "min_dist v v' = Suc d  ?thesis"
    proof cases
      case (dist_suc d' v'') then show ?thesis
        using min_dist_succ[of v v'' v'] min_dist_minD[of v d v'']
        by (auto simp: connI)
    qed simp
    with c show ?thesis by simp
  qed

  text ‹
    If there is a node with a shortest path of length d›, 
    then, for any d'<d›, there is also a node with a shortest path
    of length d'›.
›
  lemma min_dist_less:
    assumes "conn src v" "min_dist src v = d" and "d' < d"
    shows "v'. conn src v'  min_dist src v' = d'"
    using assms
  proof (induct d arbitrary: v)
    case (Suc d) with min_dist_suc[of src v] show ?case
      by (cases "d' = d") auto
  qed auto

  text ‹
    Lemma min_dist_less› can be weakened to d'≤d›.
›

  corollary min_dist_le:
    assumes c: "conn src v" and d': "d'  min_dist src v"
    shows "v'. conn src v'  min_dist src v' = d'"
    using min_dist_less[OF c, of "min_dist src v" d'] d' c
    by (auto simp: le_less)
  
  subsection ‹Invariants›
  text ‹
    In our framework, it is convenient to annotate the invariants and 
    auxiliary assertions into the program. Thus, we have to define the
    invariants first.
›

  text ‹
    The invariant for the outer loop is split into two parts:
    The first part already holds before 
    the if C={}› check, the second part only holds again 
    at the end of the loop body.
›

  text ‹
    The first part of the invariant, bfs_invar'›,
    intuitively states the following:
    If the loop is not {\em break}ed, then we have:
    \begin{itemize}
      \item The next-node set N› is a subset of V›, and
        the destination node is not contained into V-(C∪N)›,
      \item all nodes in the current-node set C› have a shortest
        path of length d›,
      \item all nodes in the next-node set N› have a shortest path
        of length d+1›,
      \item all nodes in the visited set V› have a shortest path
        of length at most d+1›,
      \item all nodes with a path shorter than d› are already in 
        V›, and
      \item all nodes with a shortest path of length d+1› are either
        in the next-node set N›, or they are undiscovered successors
        of a node in the current-node set.
    \end{itemize}
    
    If the loop has been {\em break}ed, d› is the distance of the
    shortest path between src› and dst›.
›

  definition "bfs_invar' src dst σ  let (f,V,C,N,d)=σ in
    (¬f  (
      N  V  dst  V - (C  N) 
      (vC. conn src v  min_dist src v = d) 
      (vN. conn src v  min_dist src v = Suc d) 
      (vV. conn src v  min_dist src v  Suc d) 
      (v. conn src v  min_dist src v  d  v  V) 
      (v. conn src v  min_dist src v = Suc d  v  N  (((succ`C)) - V))
    ))  (
    f  conn src dst  min_dist src dst = d
    )"

  text ‹
    The second part of the invariant, empty_assm›, just states 
    that C› can only be empty if N› is also empty.
›
  definition "empty_assm σ  let (f,V,C,N,d)=σ in
    C={}  N={}"

  text ‹
    Finally, we define the invariant of the outer loop, bfs_invar›, 
    as the conjunction of both parts:
›
  definition "bfs_invar src dst σ  bfs_invar' src dst σ  
    empty_assm σ"

  text ‹
    The invariant of the inner foreach-loop states that
    the successors that have already been processed (succ v - it›),
    have been added to V› and have also been added to 
    N'› if they are not in V›.
›
  definition "FE_invar V N v it σ  let (V',N')=σ in 
    V'=V  (succ v - it) 
    N'=N  ((succ v - it) - V)"

  subsection ‹Algorithm›
  text ‹
    The following algorithm is a straightforward transcription of the 
    algorithm given in the assignment to the monadic style featured by our 
    framework.
    We briefly explain the (mainly syntactic) differences:
    \begin{itemize}
      \item The initialization of the variables occur after the loop in
          our formulation. This is just a syntactic difference, as our loop
          construct has the form WHILEI I c f σ0, where σ0
          is the initial state, and I› is the loop invariant;
      \item We translated the textual specification 
        {\em remove one vertex v› from C›} as accurately as
        possible: The statement v ← SPEC (λv. v∈C)› 
        nondeterministically assigns a node from C› to v›, 
        that is then removed in the next statement;
      \item In our monad, we have no notion of loop-breaking (yet). Hence we
        added an additional boolean variable f› that indicates that
        the loop shall terminate. The RETURN›-statements used in our
        program are the return-operator of the monad, and must not be mixed up
        with the return-statement given in the original program, that is 
        modeled by breaking the loop. The if-statement after the loop takes
        care to return the right value;
      \item We added an else-branch to the if-statement that checks
        whether we reached the destination node;
      \item We added an assertion of the first part of the invariant to the
        program text, moreover, we annotated invariants at the loops.
        We also added an assertion w∉N› into the inner loop. This
        is merely an optimization, that will allow us to implement the
        insert operation more efficiently;
      \item Each conditional branch in the loop body ends with a 
        RETURN›-statement. This is required by the monadic style;
      \item Failure is modeled by an option-datatype. The result 
        Some d› means that the integer d› is returned,
        the result None› means that a failure is returned.
    \end{itemize}
›

  text_raw ‹\clearpage›

  definition bfs :: "'vertex  'vertex  
    (nat option nres)"
    where "bfs src dst  do {
    (f,_,_,_,d)  WHILEI (bfs_invar src dst) (λ(f,V,C,N,d). f=False  C{})
      (λ(f,V,C,N,d). do {
        v  SPEC (λv. vC); let C = C-{v};
        if v=dst then RETURN (True,{},{},{},d)
        else do {
          (V,N)  FOREACHi (FE_invar V N v) (succ v) (λw (V,N). 
            if (wV) then do { 
              ASSERT (wN);
              RETURN (insert w V, insert w N) 
            } else RETURN (V,N)
          ) (V,N);
          ASSERT (bfs_invar' src dst (f,V,C,N,d));
          if (C={}) then do {
            let C=N; 
            let N={}; 
            let d=d+1;
            RETURN (f,V,C,N,d)
          } else RETURN (f,V,C,N,d)
        }
      })
      (False,{src},{src},{},0::nat);
    if f then RETURN (Some d) else RETURN None
    }"

  subsection "Verification Tasks"
  text ‹
    In order to make the proof more readable, we have extracted the
    difficult verification conditions and proved them in separate lemmas.
    The other verification conditions are proved automatically by Isabelle/HOL
    during the proof of the main theorem.

    Due to the timing constraints of the competition, the verification 
    conditions are mostly proved in Isabelle's apply-style, that is faster
    to write for the experienced user, but harder to read by a human.

    Exemplarily, we formulated the last proof in the proof language 
    {\em Isar}, that allows one to write human-readable proofs and verify
    them with Isabelle/HOL.
›

  text ‹
    The first part of the invariant is preserved if we take a node from 
    C›, and add its successors that are not in V› to 
    N›.
    This is the verification condition for the assertion after the 
    foreach-loop.
›
  lemma invar_succ_step:
    assumes "bfs_invar' src dst (False, V, C, N, d)"
    assumes "vC"
    assumes "vdst"
    shows "bfs_invar' src dst 
            (False, V  succ v, C - {v}, N  (succ v - V), d)"
    using assms
  proof (simp (no_asm_use) add: bfs_invar'_def, intro conjI, goal_cases)
    case 6 then show ?case
      by (metis Un_iff Diff_iff connI_succ le_SucE min_dist_succ)
  next
    case 7 then show ?case
      by (metis Un_iff connI_succ min_dist_succ)
  qed blast+

  text ‹
    The first part of the invariant is preserved if the 
    if C={}›-statement is executed.
    This is the verification condition for the 
    loop-invariant. Note that preservation of the second part of the 
    invariant is proven easily inside the main proof.
›
  lemma invar_empty_step: 
    assumes "bfs_invar' src dst (False, V, {}, N, d)"
    shows "bfs_invar' src dst (False, V, N, {}, Suc d)"
    using assms
  proof (simp (no_asm_use) add: bfs_invar'_def, intro conjI impI allI, goal_cases)
    case 3 then show ?case
      by (metis le_SucI)
  next
    case 4 then show ?case
      by (metis le_SucE subsetD)
  next
    case (5 v) then show ?case
      using min_dist_suc[of src v "Suc d"] by metis
  next
    case 6 then show ?case
      by (metis Suc_n_not_le_n)
  qed blast+

  text ‹The invariant holds initially.›
  lemma invar_init: "bfs_invar src dst (False, {src}, {src}, {}, 0)"
    by (auto simp: bfs_invar_def bfs_invar'_def empty_assm_def)
       (metis min_dist_suc min_dist_z_iff)

  text ‹The invariant is preserved if we break the loop.›
  lemma invar_break: 
    assumes "bfs_invar src dst (False, V, C, N, d)"
    assumes "dstC"
    shows "bfs_invar src dst (True, {}, {}, {}, d)"
    using assms unfolding bfs_invar_def bfs_invar'_def empty_assm_def
    by auto

  text ‹If we have {\em break}ed the loop, the invariant implies
    that we, indeed, returned the shortest path.›
  lemma invar_final_succeed:
    assumes "bfs_invar' src dst (True, V, C, N, d)"  
    shows "min_dist src dst = d"
    using assms unfolding bfs_invar'_def
    by auto

  text ‹If the loop terminated normally, there is no path between 
    src› and dst›. 

    The lemma is formulated as deriving a contradiction from the fact
    that there is a path and the loop terminated normally.

    Note the proof language {\em Isar} that was used here.
    It allows one to write human-readable proofs in a theorem prover.
›
  lemma invar_final_fail: 
    assumes C: "conn src dst" ― ‹There is a path between 
      @{text "src"} and @{text "dst"}.›
    assumes INV: "bfs_invar' src dst (False, V, {}, {}, d)"
    shows False
  proof -
    txt ‹We make a case-distinction whether d'≤d›:›
    have "min_dist src dst  d  d + 1  min_dist src dst" by auto
    moreover {
      txt ‹Due to the invariant, any node with a path of 
        length at most d› is contained in V›
      from INV have "v. conn src v  min_dist src v  d  vV"
        unfolding bfs_invar'_def by auto
      moreover
      txt ‹This applies in the first case, hence we obtain that 
        dst∈V›
      assume A: "min_dist src dst  d"
      moreover from C have "dist src (min_dist src dst) dst" 
        by (blast intro: min_dist_is_dist)
      ultimately have "dstV" by blast
      txt ‹However, as C› and N› are empty, the invariant
        also implies that dst› is not in V›:›
      moreover from INV have "dstV" unfolding bfs_invar'_def by auto
      txt ‹This yields a contradiction:›
      ultimately have False by blast
    } moreover {
      txt ‹In the case d+1 ≤ d'›, we also obtain a node
        that has a shortest path of length d+1›:›
      assume "d+1  min_dist src dst"
      with min_dist_le[OF C] obtain v' where 
        "conn src v'" "min_dist src v' = d + 1"
        by auto
      txt ‹However, the invariant states that such nodes are either in
        N› or are successors of C›. As N› 
        and C› are both empty, we again get a contradiction.›
      with INV have False unfolding bfs_invar'_def by auto
    } ultimately show ?thesis by blast
  qed

  text ‹
    Finally, we prove our algorithm correct:
    The following theorem solves both verification tasks.

    Note that a proposition of the form S ⊑ SPEC Φ› states partial 
    correctness in our framework, i.e., S› refines the 
    specification Φ›.

    The actual specification that we prove here
    precisely reflects the two verification tasks: {\em If the algorithm fails,
    there is no path between src› and dst›, otherwise
    it returns the length of the shortest path.}

    The proof of this theorem first applies the verification condition 
    generator (apply (intro refine_vcg)›), and then uses the
    lemmas proved beforehand to discharge the verification conditions.
    During the auto›-methods, some trivial verification conditions,
    e.g., those concerning the invariant of the inner loop, are discharged
    automatically.
    During the proof, we gradually unfold the definition of the loop
    invariant.
›
  definition "bfs_spec src dst  SPEC (
    λr. case r of None  ¬ conn src dst
              | Some d  conn src dst  min_dist src dst = d)"
  theorem bfs_correct: "bfs src dst  bfs_spec src dst"
    unfolding bfs_def bfs_spec_def
    apply (intro refine_vcg)

    unfolding FE_invar_def
    apply (auto simp: 
      intro: invar_init invar_break )

    unfolding bfs_invar_def 
    apply (auto simp: 
      intro: invar_succ_step invar_empty_step invar_final_succeed)

    unfolding empty_assm_def
    apply (auto intro: invar_final_fail)

    unfolding bfs_invar'_def
    apply auto
    done

end

end