Theory Breadth_First_Search

section ‹Breadth First Search›
theory Breadth_First_Search
imports "../Refine_Monadic"
begin
  
text ‹
  This is a slightly modified version of Task~5 of our submission to
  the VSTTE 2011 verification competition 
  (@{url "https://sites.google.com/site/vstte2012/compet"}). The task was to
  formalize a breadth-first-search algorithm.
›

text ‹
  With Isabelle's locale-construct, we put ourselves into a context where 
  the ‹succ›-function is fixed. 
  We assume finitely branching graphs here, as our
  foreach-construct is only defined for finite sets.
›
locale Graph =
  fixes succ :: "'vertex ⇒ 'vertex set"
  assumes [simp, intro!]: "finite (succ v)"
begin


  subsection ‹Distances in a Graph›
  text ‹
    We start over by defining the basic notions of paths and shortest paths.
›

  text ‹A path is expressed by the ‹dist›-predicate. Intuitively,
    ‹dist v d v'› means that there is a path of length ‹d›
    between ‹v› and ‹v'›.

    The definition of the ‹dist›-predicate is done inductively, i.e.,
    as the least solution of the following constraints:
›
  inductive dist :: "'vertex ⇒ nat ⇒ 'vertex ⇒ bool" where
    dist_z: "dist v 0 v" |
    dist_suc: "⟦dist v d vh; v' ∈ succ vh ⟧ ⟹ dist v (Suc d) v'"

  text ‹
    Next, we define a predicate that expresses that the shortest path between
    ‹v› and ‹v'› has length ‹d›.
    This is the case if there is a path of length ‹d›, but there
    is no shorter path.
›
  definition "min_dist v v' = (LEAST d. dist v d v')"
  definition "conn v v' = (∃d. dist v d v')"

  subsubsection ‹Properties›
  text ‹
    In this subsection, we prove some properties of paths.
›

  lemma
    shows connI[intro]: "dist v d v' ⟹ conn v v'"
      and connI_id[intro]: "conn v v"
      and connI_succ[intro]: "conn v v' ⟹ v'' ∈ succ v' ⟹ conn v v''"
    by (auto simp: conn_def intro: dist_suc dist_z)

  lemma min_distI2: 
    "⟦ conn v v' ; ⋀d. ⟦ dist v d v'; ⋀d'. dist v d' v' ⟹ d ≤ d' ⟧ ⟹ Q d ⟧ 
      ⟹ Q (min_dist v v')"
    unfolding min_dist_def
    by (rule LeastI2_wellorder[where Q=Q and a="SOME d. dist v d v'"])
       (auto simp: conn_def intro: someI)

  lemma min_distI_eq:
    "⟦ dist v d v'; ⋀d'. dist v d' v' ⟹ d ≤ d' ⟧ ⟹ min_dist v v' = d"
    by (force intro: min_distI2 simp: conn_def)

  text ‹Two nodes are connected by a path of length ‹0›, 
    iff they are equal.›
  lemma dist_z_iff[simp]: "dist v 0 v' ⟷ v'=v"
    by (auto elim: dist.cases intro: dist.intros)

  text ‹The same holds for ‹min_dist›, i.e., 
    the shortest path between two nodes has length ‹0›, 
    iff these nodes are equal.›
  lemma min_dist_z[simp]: "min_dist v v = 0"
    by (rule min_distI2) (auto intro: dist_z)

  lemma min_dist_z_iff[simp]: "conn v v' ⟹ min_dist v v' = 0 ⟷ v'=v" 
    by (rule min_distI2) (auto intro: dist_z)
    
  lemma min_dist_is_dist: "conn v v' ⟹ dist v (min_dist v v') v'"
    by (auto intro: min_distI2)
  lemma min_dist_minD: "dist v d v' ⟹ min_dist v v' ≤ d"
    by (auto intro: min_distI2)

  text ‹We also provide introduction and destruction rules for the
    pattern ‹min_dist v v' = Suc d›.
›

  lemma min_dist_succ: 
    "⟦ conn v v'; v'' ∈ succ v' ⟧ ⟹ min_dist v v'' ≤ Suc (min_dist v v') "
    by (rule min_distI2[where v'=v'])
       (auto elim: dist.cases intro!: min_dist_minD dist_suc)

  lemma min_dist_suc:
    assumes c: "conn v v'" "min_dist v v' = Suc d"
    shows "∃v''. conn v v'' ∧ v' ∈ succ v'' ∧ min_dist v v'' = d"
  proof -
    from min_dist_is_dist[OF c(1)]
    have "min_dist v v' = Suc d ⟶ ?thesis"
    proof cases
      case (dist_suc d' v'') then show ?thesis
        using min_dist_succ[of v v'' v'] min_dist_minD[of v d v'']
        by (auto simp: connI)
    qed simp
    with c show ?thesis by simp
  qed

  text ‹
    If there is a node with a shortest path of length ‹d›, 
    then, for any ‹d'<d›, there is also a node with a shortest path
    of length ‹d'›.
›
  lemma min_dist_less:
    assumes "conn src v" "min_dist src v = d" and "d' < d"
    shows "∃v'. conn src v' ∧ min_dist src v' = d'"
    using assms
  proof (induct d arbitrary: v)
    case (Suc d) with min_dist_suc[of src v] show ?case
      by (cases "d' = d") auto
  qed auto

  text ‹
    Lemma ‹min_dist_less› can be weakened to ‹d'≤d›.
›

  corollary min_dist_le:
    assumes c: "conn src v" and d': "d' ≤ min_dist src v"
    shows "∃v'. conn src v' ∧ min_dist src v' = d'"
    using min_dist_less[OF c, of "min_dist src v" d'] d' c
    by (auto simp: le_less)
  
  subsection ‹Invariants›
  text ‹
    In our framework, it is convenient to annotate the invariants and 
    auxiliary assertions into the program. Thus, we have to define the
    invariants first.
›

  text ‹
    The invariant for the outer loop is split into two parts:
    The first part already holds before 
    the ‹if C={}› check, the second part only holds again 
    at the end of the loop body.
›

  text ‹
    The first part of the invariant, ‹bfs_invar'›,
    intuitively states the following:
    If the loop is not {\em break}ed, then we have:
    \begin{itemize}
      \item The next-node set ‹N› is a subset of ‹V›, and
        the destination node is not contained into ‹V-(C∪N)›,
      \item all nodes in the current-node set ‹C› have a shortest
        path of length ‹d›,
      \item all nodes in the next-node set ‹N› have a shortest path
        of length ‹d+1›,
      \item all nodes in the visited set ‹V› have a shortest path
        of length at most ‹d+1›,
      \item all nodes with a path shorter than ‹d› are already in 
        ‹V›, and
      \item all nodes with a shortest path of length ‹d+1› are either
        in the next-node set ‹N›, or they are undiscovered successors
        of a node in the current-node set.
    \end{itemize}
    
    If the loop has been {\em break}ed, ‹d› is the distance of the
    shortest path between ‹src› and ‹dst›.
›

  definition "bfs_invar' src dst σ ≡ let (f,V,C,N,d)=σ in
    (¬f ⟶ (
      N ⊆ V ∧ dst ∉ V - (C ∪ N) ∧
      (∀v∈C. conn src v ∧ min_dist src v = d) ∧
      (∀v∈N. conn src v ∧ min_dist src v = Suc d) ∧
      (∀v∈V. conn src v ∧ min_dist src v ≤ Suc d) ∧
      (∀v. conn src v ∧ min_dist src v ≤ d ⟶ v ∈ V) ∧
      (∀v. conn src v ∧ min_dist src v = Suc d ⟶ v ∈ N ∪ ((⋃(succ`C)) - V))
    )) ∧ (
    f ⟶ conn src dst ∧ min_dist src dst = d
    )"

  text ‹
    The second part of the invariant, ‹empty_assm›, just states 
    that ‹C› can only be empty if ‹N› is also empty.
›
  definition "empty_assm σ ≡ let (f,V,C,N,d)=σ in
    C={} ⟶ N={}"

  text ‹
    Finally, we define the invariant of the outer loop, ‹bfs_invar›, 
    as the conjunction of both parts:
›
  definition "bfs_invar src dst σ ≡ bfs_invar' src dst σ ∧ 
    empty_assm σ"

  text ‹
    The invariant of the inner foreach-loop states that
    the successors that have already been processed (‹succ v - it›),
    have been added to ‹V› and have also been added to 
    ‹N'› if they are not in ‹V›.
›
  definition "FE_invar V N v it σ ≡ let (V',N')=σ in 
    V'=V ∪ (succ v - it) ∧
    N'=N ∪ ((succ v - it) - V)"

  subsection ‹Algorithm›
  text ‹
    The following algorithm is a straightforward transcription of the 
    algorithm given in the assignment to the monadic style featured by our 
    framework.
    We briefly explain the (mainly syntactic) differences:
    \begin{itemize}
      \item The initialization of the variables occur after the loop in
          our formulation. This is just a syntactic difference, as our loop
          construct has the form ‹WHILEI I c f σ⇩₀›, where ‹σ⇩₀›
          is the initial state, and ‹I› is the loop invariant;
      \item We translated the textual specification 
        {\em remove one vertex ‹v› from ‹C›} as accurately as
        possible: The statement ‹v ← SPEC (λv. v∈C)› 
        nondeterministically assigns a node from ‹C› to ‹v›, 
        that is then removed in the next statement;
      \item In our monad, we have no notion of loop-breaking (yet). Hence we
        added an additional boolean variable ‹f› that indicates that
        the loop shall terminate. The ‹RETURN›-statements used in our
        program are the return-operator of the monad, and must not be mixed up
        with the return-statement given in the original program, that is 
        modeled by breaking the loop. The if-statement after the loop takes
        care to return the right value;
      \item We added an else-branch to the if-statement that checks
        whether we reached the destination node;
      \item We added an assertion of the first part of the invariant to the
        program text, moreover, we annotated invariants at the loops.
        We also added an assertion ‹w∉N› into the inner loop. This
        is merely an optimization, that will allow us to implement the
        insert operation more efficiently;
      \item Each conditional branch in the loop body ends with a 
        ‹RETURN›-statement. This is required by the monadic style;
      \item Failure is modeled by an option-datatype. The result 
        ‹Some d› means that the integer ‹d› is returned,
        the result ‹None› means that a failure is returned.
    \end{itemize}
›

  text_raw ‹\clearpage›

  definition bfs :: "'vertex ⇒ 'vertex ⇒ 
    (nat option nres)"
    where "bfs src dst ≡ do {
    (f,_,_,_,d) ← WHILEI (bfs_invar src dst) (λ(f,V,C,N,d). f=False ∧ C≠{})
      (λ(f,V,C,N,d). do {
        v ← SPEC (λv. v∈C); let C = C-{v};
        if v=dst then RETURN (True,{},{},{},d)
        else do {
          (V,N) ← FOREACHi (FE_invar V N v) (succ v) (λw (V,N). 
            if (w∉V) then do { 
              ASSERT (w∉N);
              RETURN (insert w V, insert w N) 
            } else RETURN (V,N)
          ) (V,N);
          ASSERT (bfs_invar' src dst (f,V,C,N,d));
          if (C={}) then do {
            let C=N; 
            let N={}; 
            let d=d+1;
            RETURN (f,V,C,N,d)
          } else RETURN (f,V,C,N,d)
        }
      })
      (False,{src},{src},{},0::nat);
    if f then RETURN (Some d) else RETURN None
    }"

  subsection "Verification Tasks"
  text ‹
    In order to make the proof more readable, we have extracted the
    difficult verification conditions and proved them in separate lemmas.
    The other verification conditions are proved automatically by Isabelle/HOL
    during the proof of the main theorem.

    Due to the timing constraints of the competition, the verification 
    conditions are mostly proved in Isabelle's apply-style, that is faster
    to write for the experienced user, but harder to read by a human.

    Exemplarily, we formulated the last proof in the proof language 
    {\em Isar}, that allows one to write human-readable proofs and verify
    them with Isabelle/HOL.
›

  text ‹
    The first part of the invariant is preserved if we take a node from 
    ‹C›, and add its successors that are not in ‹V› to 
    ‹N›.
    This is the verification condition for the assertion after the 
    foreach-loop.
›
  lemma invar_succ_step:
    assumes "bfs_invar' src dst (False, V, C, N, d)"
    assumes "v∈C"
    assumes "v≠dst"
    shows "bfs_invar' src dst 
            (False, V ∪ succ v, C - {v}, N ∪ (succ v - V), d)"
    using assms
  proof (simp (no_asm_use) add: bfs_invar'_def, intro conjI, goal_cases)
    case 6 then show ?case
      by (metis Un_iff Diff_iff connI_succ le_SucE min_dist_succ)
  next
    case 7 then show ?case
      by (metis Un_iff connI_succ min_dist_succ)
  qed blast+

  text ‹
    The first part of the invariant is preserved if the 
    ‹if C={}›-statement is executed.
    This is the verification condition for the 
    loop-invariant. Note that preservation of the second part of the 
    invariant is proven easily inside the main proof.
›
  lemma invar_empty_step: 
    assumes "bfs_invar' src dst (False, V, {}, N, d)"
    shows "bfs_invar' src dst (False, V, N, {}, Suc d)"
    using assms
  proof (simp (no_asm_use) add: bfs_invar'_def, intro conjI impI allI, goal_cases)
    case 3 then show ?case
      by (metis le_SucI)
  next
    case 4 then show ?case
      by (metis le_SucE subsetD)
  next
    case (5 v) then show ?case
      using min_dist_suc[of src v "Suc d"] by metis
  next
    case 6 then show ?case
      by (metis Suc_n_not_le_n)
  qed blast+

  text ‹The invariant holds initially.›
  lemma invar_init: "bfs_invar src dst (False, {src}, {src}, {}, 0)"
    by (auto simp: bfs_invar_def bfs_invar'_def empty_assm_def)
       (metis min_dist_suc min_dist_z_iff)

  text ‹The invariant is preserved if we break the loop.›
  lemma invar_break: 
    assumes "bfs_invar src dst (False, V, C, N, d)"
    assumes "dst∈C"
    shows "bfs_invar src dst (True, {}, {}, {}, d)"
    using assms unfolding bfs_invar_def bfs_invar'_def empty_assm_def
    by auto

  text ‹If we have {\em break}ed the loop, the invariant implies
    that we, indeed, returned the shortest path.›
  lemma invar_final_succeed:
    assumes "bfs_invar' src dst (True, V, C, N, d)"  
    shows "min_dist src dst = d"
    using assms unfolding bfs_invar'_def
    by auto

  text ‹If the loop terminated normally, there is no path between 
    ‹src› and ‹dst›. 

    The lemma is formulated as deriving a contradiction from the fact
    that there is a path and the loop terminated normally.

    Note the proof language {\em Isar} that was used here.
    It allows one to write human-readable proofs in a theorem prover.
›
  lemma invar_final_fail: 
    assumes C: "conn src dst" ― ‹There is a path between 
      @{text "src"} and @{text "dst"}.›
    assumes INV: "bfs_invar' src dst (False, V, {}, {}, d)"
    shows False
  proof -
    txt ‹We make a case-distinction whether ‹d'≤d›:›
    have "min_dist src dst ≤ d ∨ d + 1 ≤ min_dist src dst" by auto
    moreover {
      txt ‹Due to the invariant, any node with a path of 
        length at most ‹d› is contained in ‹V›
›
      from INV have "∀v. conn src v ∧ min_dist src v ≤ d ⟶ v∈V"
        unfolding bfs_invar'_def by auto
      moreover
      txt ‹This applies in the first case, hence we obtain that 
        ‹dst∈V››
      assume A: "min_dist src dst ≤ d"
      moreover from C have "dist src (min_dist src dst) dst" 
        by (blast intro: min_dist_is_dist)
      ultimately have "dst∈V" by blast
      txt ‹However, as ‹C› and ‹N› are empty, the invariant
        also implies that ‹dst› is not in ‹V›:›
      moreover from INV have "dst∉V" unfolding bfs_invar'_def by auto
      txt ‹This yields a contradiction:›
      ultimately have False by blast
    } moreover {
      txt ‹In the case ‹d+1 ≤ d'›, we also obtain a node
        that has a shortest path of length ‹d+1›:›
      assume "d+1 ≤ min_dist src dst"
      with min_dist_le[OF C] obtain v' where 
        "conn src v'" "min_dist src v' = d + 1"
        by auto
      txt ‹However, the invariant states that such nodes are either in
        ‹N› or are successors of ‹C›. As ‹N› 
        and ‹C› are both empty, we again get a contradiction.›
      with INV have False unfolding bfs_invar'_def by auto
    } ultimately show ?thesis by blast
  qed

  text ‹
    Finally, we prove our algorithm correct:
    The following theorem solves both verification tasks.

    Note that a proposition of the form ‹S ⊑ SPEC Φ› states partial 
    correctness in our framework, i.e., ‹S› refines the 
    specification ‹Φ›.

    The actual specification that we prove here
    precisely reflects the two verification tasks: {\em If the algorithm fails,
    there is no path between ‹src› and ‹dst›, otherwise
    it returns the length of the shortest path.}

    The proof of this theorem first applies the verification condition 
    generator (‹apply (intro refine_vcg)›), and then uses the
    lemmas proved beforehand to discharge the verification conditions.
    During the ‹auto›-methods, some trivial verification conditions,
    e.g., those concerning the invariant of the inner loop, are discharged
    automatically.
    During the proof, we gradually unfold the definition of the loop
    invariant.
›
  definition "bfs_spec src dst ≡ SPEC (
    λr. case r of None ⇒ ¬ conn src dst
              | Some d ⇒ conn src dst ∧ min_dist src dst = d)"
  theorem bfs_correct: "bfs src dst ≤ bfs_spec src dst"
    unfolding bfs_def bfs_spec_def
    apply (intro refine_vcg)

    unfolding FE_invar_def
    apply (auto simp: 
      intro: invar_init invar_break )

    unfolding bfs_invar_def 
    apply (auto simp: 
      intro: invar_succ_step invar_empty_step invar_final_succeed)

    unfolding empty_assm_def
    apply (auto intro: invar_final_fail)

    unfolding bfs_invar'_def
    apply auto
    done

end

end