Theory Model

(*<*)
(*
 * Copyright 2015, NICTA
 *
 * This software may be distributed and modified according to the terms of
 * the BSD 2-Clause license. Note that NO WARRANTY is provided.
 * See "LICENSE_BSD2.txt" for details.
 *
 * @TAG(NICTA_BSD)
 *)

theory Model
imports
  ConcurrentIMP.CIMP
  "HOL-Library.Sublist"
begin

(* From 40e16c534243 by Makarius. Doesn't seem to have a huge impact on run time now (2021-01-07) *)
declare subst_all [simp del] [[simproc del: defined_all]]

(*>*)
section‹A model of a Schism garbage collector \label{sec:gc-model}›

text ‹

The following formalises Figures~2.8 (mark_object_fn›),
2.9 (load and store but not alloc), and 2.15 (garbage collector) of
citet"Pizlo201xPhd"; see also citet"Pizlo+2010PLDI".

We additionally need to model TSO memory, the handshakes and
compare-and-swap (\texttt{CAS}).  We closely model things where
interference is possible and abstract everything else.

@{bold @{emph ‹NOTE›}: this model is for TSO
\emph{only}. We elide any details irrelevant for that memory
model.›}

We begin by defining the types of the various parts. Our program
locations are labelled with strings for readability. We enumerate the
names of the processes in our system. The safety proof treats an
arbitary (unbounded) number of mutators.

›

type_synonym location = string

datatype 'mut process_name = mutator 'mut | gc | sys

text ‹

The garbage collection process can be in one of the following phases.

›

datatype gc_phase
  = ph_Idle
  | ph_Init
  | ph_Mark
  | ph_Sweep

text ‹

The garbage collector instructs mutators to perform certain actions,
and blocks until the mutators signal these actions are done. The
mutators always respond with their work list (a set of
references). The handshake can be of one of the specified types.

›

datatype hs_type
  = ht_NOOP
  | ht_GetRoots
  | ht_GetWork

text‹

We track how many \texttt{noop} and \texttt{get\_roots} handshakes
each process has participated in as ghost state. See
\S\ref{sec:gc_handshakes}.

›

datatype hs_phase
  = hp_Idle ― ‹done 1 noop›
  | hp_IdleInit
  | hp_InitMark
  | hp_Mark ― ‹done 4 noops›
  | hp_IdleMarkSweep ― ‹done get roots›

definition
  hs_step :: "hs_phase  hs_phase"
where
  "hs_step ph = (case ph of
       hp_Idle           hp_IdleInit
     | hp_IdleInit       hp_InitMark
     | hp_InitMark       hp_Mark
     | hp_Mark           hp_IdleMarkSweep
     | hp_IdleMarkSweep  hp_Idle)"

text ‹

An object consists of a garbage collection mark and two partial
maps. Firstly the types:

 @{typ "'field"} is the abstract type of fields.
 @{typ "'ref"} is the abstract type of object references.
 @{typ "'mut"} is the abstract type of the mutators' names.

The maps:

 obj_fields› maps @{typ "'field"}s to object
  references (or @{const "None"} signifying \texttt{NULL} or type
  error).
 obj_payload› maps a @{typ "'field"} to non-reference
  data. For convenience we similarly allow that to be \texttt{NULL}.

›

type_synonym gc_mark = bool

record ('field, 'payload, 'ref) object =
  obj_mark :: "gc_mark"
  obj_fields :: "'field  'ref"
  obj_payload :: "'field  'payload"

text‹

The TSO store buffers track store actions, represented by ('field, 'ref) mem_store_action›.

›

datatype ('field, 'payload, 'ref) mem_store_action
  = mw_Mark 'ref gc_mark
  | mw_Mutate 'ref 'field "'ref option"
  | mw_Mutate_Payload 'ref 'field "'payload option"
  | mw_fA gc_mark
  | mw_fM gc_mark
  | mw_Phase gc_phase

text‹

An action is a request by a mutator or the garbage collector to the
system.

›

datatype ('field, 'ref) mem_load_action
  = mr_Ref 'ref 'field
  | mr_Payload 'ref 'field
  | mr_Mark 'ref
  | mr_Phase
  | mr_fM
  | mr_fA

datatype ('field, 'mut, 'payload, 'ref) request_op
  = ro_MFENCE
  | ro_Load "('field, 'ref) mem_load_action"
  | ro_Store "('field, 'payload, 'ref) mem_store_action"
  | ro_Lock
  | ro_Unlock
  | ro_Alloc
  | ro_Free 'ref
  | ro_hs_gc_load_pending 'mut
  | ro_hs_gc_store_type hs_type
  | ro_hs_gc_store_pending 'mut
  | ro_hs_gc_load_W
  | ro_hs_mut_load_pending
  | ro_hs_mut_load_type
  | ro_hs_mut_done "'ref set"

abbreviation "LoadfM  ro_Load mr_fM"
abbreviation "LoadMark r  ro_Load (mr_Mark r)"
abbreviation "LoadPayload r f  ro_Load (mr_Payload r f)"
abbreviation "LoadPhase  ro_Load mr_Phase"
abbreviation "LoadRef r f  ro_Load (mr_Ref r f)"

abbreviation "StorefA m  ro_Store (mw_fA m)"
abbreviation "StorefM m  ro_Store (mw_fM m)"
abbreviation "StoreMark r m  ro_Store (mw_Mark r m)"
abbreviation "StorePayload r f pl  ro_Store (mw_Mutate_Payload r f pl)"
abbreviation "StorePhase ph  ro_Store (mw_Phase ph)"
abbreviation "StoreRef r f r'  ro_Store (mw_Mutate r f r')"

type_synonym ('field, 'mut, 'payload, 'ref) request
  = "'mut process_name × ('field, 'mut, 'payload, 'ref) request_op"

datatype ('field, 'payload, 'ref) response
  = mv_Bool "bool"
  | mv_Mark "gc_mark option"
  | mv_Payload "'payload option" ―‹ the requested reference might be invalid ›
  | mv_Phase "gc_phase"
  | mv_Ref "'ref option"
  | mv_Refs "'ref set"
  | mv_Void
  | mv_hs_type hs_type

text‹

The following record is the type of all processes's local states. For
the mutators and the garbage collector, consider these to be local
variables or registers.

The system's fA›, fM›, phase› and heap› variables are subject to the TSO memory model, as are all heap
operations.

›

record ('field, 'mut, 'payload, 'ref) local_state =
  ― ‹System-specific fields›
  heap :: "'ref  ('field, 'payload, 'ref) object"
  ― ‹TSO memory state›
  mem_store_buffers :: "'mut process_name  ('field, 'payload, 'ref) mem_store_action list"
  mem_lock :: "'mut process_name option"
  ― ‹Handshake state›
  hs_pending :: "'mut  bool"
  ― ‹Ghost state›
  ghost_hs_in_sync :: "'mut  bool"
  ghost_hs_phase :: "hs_phase"

  ― ‹Mutator-specific temporaries›
  new_ref :: "'ref option"
  roots :: "'ref set"
  ghost_honorary_root :: "'ref set"
  payload_value :: "'payload option"
  mutator_data :: "'field  'payload"
  mutator_hs_pending :: "bool"

  ― ‹Garbage collector-specific temporaries›
  field_set :: "'field set"
  mut :: "'mut"
  muts :: "'mut set"

  ― ‹Local variables used by multiple processes›
  fA :: "gc_mark"
  fM :: "gc_mark"
  cas_mark :: "gc_mark option"
  field :: "'field"
  mark :: "gc_mark option"
  phase :: "gc_phase"
  tmp_ref :: "'ref"
  ref :: "'ref option"
  refs :: "'ref set"
  W :: "'ref set"
  ― ‹Handshake state›
  hs_type :: "hs_type"
  ― ‹Ghost state›
  ghost_honorary_grey :: "'ref set"

text‹We instantiate CIMP's types as follows:›

type_synonym ('field, 'mut, 'payload, 'ref) gc_com
  = "(('field, 'payload, 'ref) response, location, ('field, 'mut, 'payload, 'ref) request, ('field, 'mut, 'payload, 'ref) local_state) com"
type_synonym ('field, 'mut, 'payload, 'ref) gc_loc_comp
  = "(('field, 'payload, 'ref) response, location, ('field, 'mut, 'payload, 'ref) request, ('field, 'mut, 'payload, 'ref) local_state) loc_comp"
type_synonym ('field, 'mut, 'payload, 'ref) gc_pred
  = "(('field, 'payload, 'ref) response, location, 'mut process_name, ('field, 'mut, 'payload, 'ref) request, ('field, 'mut, 'payload, 'ref) local_state) state_pred"
type_synonym ('field, 'mut, 'payload, 'ref) gc_system
  = "(('field, 'payload, 'ref) response, location, 'mut process_name, ('field, 'mut, 'payload, 'ref) request, ('field, 'mut, 'payload, 'ref) local_state) system"

type_synonym ('field, 'mut, 'payload, 'ref) gc_event
  = "('field, 'mut, 'payload, 'ref) request × ('field, 'payload, 'ref) response"
type_synonym ('field, 'mut, 'payload, 'ref) gc_history
  = "('field, 'mut, 'payload, 'ref) gc_event list"

type_synonym ('field, 'mut, 'payload, 'ref) lst_pred
  = "('field, 'mut, 'payload, 'ref) local_state  bool"

type_synonym ('field, 'mut, 'payload, 'ref) lsts
  = "'mut process_name  ('field, 'mut, 'payload, 'ref) local_state"

type_synonym ('field, 'mut, 'payload, 'ref) lsts_pred
  = "('field, 'mut, 'payload, 'ref) lsts  bool"

text‹

We use one locale per process to define a namespace for definitions
local to these processes. Mutator definitions are parametrised by the
mutator's identifier m›. We never interpret these locales; we
typically use their contents by prefixing identifiers with the locale
name. This might be considered an abuse. The attributes depend on
locale scoping somewhat, which is a mixed blessing.

If we have more than one mutator then we need to show that mutators do
not mutually interfere. To that end we define an extra locale that
contains these proofs.

›

locale mut_m = fixes m :: "'mut"
locale mut_m' = mut_m + fixes m' :: "'mut" assumes mm'[iff]: "m  m'"
locale gc
locale sys


subsection‹Object marking \label{sec:gc-marking}›

text‹

Both the mutators and the garbage collector mark references, which
indicates that a reference is live in the current round of
collection. This operation is defined in citet‹Figure~2.8› in "Pizlo201xPhd". These definitions are
parameterised by the name of the process.

›

context
  fixes p :: "'mut process_name"
begin

abbreviation lock_syn :: "location  ('field, 'mut, 'payload, 'ref) gc_com" where
  "lock_syn l  l Request (λs. (p, ro_Lock)) (λ_ s. {s})"
notation lock_syn (_ lock)

abbreviation unlock_syn :: "location  ('field, 'mut, 'payload, 'ref) gc_com" where
  "unlock_syn l  l Request (λs. (p, ro_Unlock)) (λ_ s. {s})"
notation unlock_syn (_ unlock)

abbreviation
  load_mark_syn :: "location  (('field, 'mut, 'payload, 'ref) local_state  'ref)
               ((gc_mark option  gc_mark option)
                  ('field, 'mut, 'payload, 'ref) local_state
                  ('field, 'mut, 'payload, 'ref) local_state)  ('field, 'mut, 'payload, 'ref) gc_com"
where
  "load_mark_syn l r upd  l Request (λs. (p, LoadMark (r s))) (λmv s. { upd m s |m. mv = mv_Mark m })"
notation load_mark_syn (_ load'_mark)

abbreviation load_fM_syn :: "location  ('field, 'mut, 'payload, 'ref) gc_com" where
  "load_fM_syn l  l Request (λs. (p, ro_Load mr_fM)) (λmv s. { sfM := m |m. mv = mv_Mark (Some m) })"
notation load_fM_syn (_ load'_fM)

abbreviation
  load_phase :: "location  ('field, 'mut, 'payload, 'ref) gc_com"
where
  "load_phase l  l Request (λs. (p, LoadPhase)) (λmv s. { sphase := ph |ph. mv = mv_Phase ph })"
notation load_phase (_ load'_phase)

abbreviation
  store_mark_syn :: "location  (('field, 'mut, 'payload, 'ref) local_state  'ref)  (('field, 'mut, 'payload, 'ref) local_state  bool)  ('field, 'mut, 'payload, 'ref) gc_com"
where
  "store_mark_syn l r fl  l Request (λs. (p, StoreMark (r s) (fl s))) (λ_ s. { s ghost_honorary_grey := {r s}  })"
notation store_mark_syn (_ store'_mark)

abbreviation
  add_to_W_syn :: "location  (('field, 'mut, 'payload, 'ref) local_state  'ref)  ('field, 'mut, 'payload, 'ref) gc_com"
where
  "add_to_W_syn l r  l λs. s W := W s  {r s}, ghost_honorary_grey := {} "
notation add_to_W_syn (_ add'_to'_W)

text‹

The reference we're marking is given in @{const "ref"}. If the current
process wins the \texttt{CAS} race then the reference is marked and
added to the local work list @{const "W"}.

TSO means we cannot avoid having the mark store pending in a store
buffer; in other words, we cannot have objects atomically transition
from white to grey. The following scheme blackens a white object, and
then reverts it to grey. The @{const "ghost_honorary_grey"} variable
is used to track objects undergoing this transition.

As CIMP provides no support for function calls, we prefix each
statement's label with a string from its callsite.

›

definition
  mark_object_fn :: "location  ('field, 'mut, 'payload, 'ref) gc_com"
where
  "mark_object_fn l =
     l @ ''_mo_null'' IF ¬ (NULL ref) THEN
       l @ ''_mo_mark'' load_mark (the  ref) mark_update ;;
       l @ ''_mo_fM'' load_fM ;;
       l @ ''_mo_mtest'' IF mark  Some  fM THEN
         l @ ''_mo_phase'' load_phase ;;
         l @ ''_mo_ptest'' IF phase  ph_Idle THEN
           ― ‹CAS: claim object›
           l @ ''_mo_co_lock'' lock ;;
           l @ ''_mo_co_cmark'' load_mark (the  ref) cas_mark_update ;;
           l @ ''_mo_co_ctest'' IF cas_mark = mark THEN
             l @ ''_mo_co_mark'' store_mark (the  ref) fM
           FI ;;
           l @ ''_mo_co_unlock'' unlock ;;
           l @ ''_mo_co_won'' IF cas_mark = mark THEN
             l @ ''_mo_co_W'' add_to_W (the  ref)
           FI
         FI
       FI
     FI"

end

text‹

The worklists (field @{term "W"}) are not subject to TSO. As we later
show (\S\ref{def:valid_W_inv}), these are disjoint and hence
operations on these are private to each process, with the sole
exception of when the GC requests them from the mutators. We describe
that mechanism next.

›

subsection‹Handshakes \label{sec:gc_handshakes}›

text‹

The garbage collector needs to synchronise with the mutators.
Here we do so by having the GC busy-wait: it sets a pending› flag for each mutator
and then waits for each to respond.

The system side of the interface collects the responses from the
mutators into a single worklist, which acts as a proxy for the garbage
collector's local worklist during get_roots› and get_work› handshakes.
We carefully model the effect these handshakes have on the processes' TSO buffers.

The system and mutators track handshake phases using ghost state; see
\S\ref{sec:phase-invariants}.

The handshake type and handshake pending bit are not subject to TSO as we expect
a realistic implementation of handshakes would involve synchronisation.

›

abbreviation hp_step :: "hs_type  hs_phase  hs_phase" where
  "hp_step ht 
     case ht of
         ht_NOOP  hs_step
       | ht_GetRoots  hs_step
       | ht_GetWork  id"

context sys
begin

definition
  handshake :: "('field, 'mut, 'payload, 'ref) gc_com"
where
  "handshake =
     ''sys_hs_gc_set_type'' Response
        (λreq s. { (s hs_type := ht,
                       ghost_hs_in_sync := False,
                       ghost_hs_phase := hp_step ht (ghost_hs_phase s) ,
                    mv_Void)
                 |ht. req = (gc, ro_hs_gc_store_type ht) })
    ''sys_hs_gc_mut_reqs'' Response
        (λreq s. { (s hs_pending := (hs_pending s)(m := True) , mv_Void)
                 |m. req = (gc, ro_hs_gc_store_pending m) })
    ''sys_hs_gc_done'' Response
        (λreq s. { (s, mv_Bool (¬hs_pending s m))
                 |m. req = (gc, ro_hs_gc_load_pending m) })
    ''sys_hs_gc_load_W'' Response
        (λreq s. { (s W := {} , mv_Refs (W s))
                 |_::unit. req = (gc, ro_hs_gc_load_W) })
    ''sys_hs_mut_pending'' Response
        (λreq s. { (s, mv_Bool (hs_pending s m))
                 |m. req = (mutator m, ro_hs_mut_load_pending) })
    ''sys_hs_mut'' Response
        (λreq s. { (s, mv_hs_type (hs_type s))
                 |m. req = (mutator m, ro_hs_mut_load_type) })
    ''sys_hs_mut_done'' Response
        (λreq s. { (s hs_pending := (hs_pending s)(m := False),
                       W := W s  W',
                       ghost_hs_in_sync := (ghost_hs_in_sync s)(m := True) ,
                    mv_Void)
                 |m W'. req = (mutator m, ro_hs_mut_done W') })"

end

text‹

The mutators' side of the interface. Also updates the ghost state
tracking the handshake state for @{const "ht_NOOP"} and @{const
"ht_GetRoots"} but not @{const "ht_GetWork"}.

Again we could make these subject to TSO, but that would be over specification.

›

context mut_m
begin

abbreviation mark_object_syn :: "location  ('field, 'mut, 'payload, 'ref) gc_com" (_ mark'_object [0] 71) where
  "l mark_object  mark_object_fn (mutator m) l"

abbreviation mfence_syn :: "location  ('field, 'mut, 'payload, 'ref) gc_com" (_ MFENCE [0] 71) where
  "l MFENCE  l Request (λs. (mutator m, ro_MFENCE)) (λ_ s. {s})"

abbreviation hs_load_pending_syn :: "location  ('field, 'mut, 'payload, 'ref) gc_com" (_ hs'_load'_pending'_ [0] 71) where
  "l hs_load_pending_  l Request (λs. (mutator m, ro_hs_mut_load_pending)) (λmv s. { s mutator_hs_pending := b  |b. mv = mv_Bool b })"

abbreviation hs_load_type_syn :: "location  ('field, 'mut, 'payload, 'ref) gc_com" (_ hs'_load'_type [0] 71) where
  "l hs_load_type  l Request (λs. (mutator m, ro_hs_mut_load_type)) (λmv s. { s hs_type := ht  |ht. mv = mv_hs_type ht})"

abbreviation hs_noop_done_syn :: "location  ('field, 'mut, 'payload, 'ref) gc_com" (_ hs'_noop'_done'_) where
  "l hs_noop_done_  l Request (λs. (mutator m, ro_hs_mut_done {}))
                                   (λ_ s. {s ghost_hs_phase := hs_step (ghost_hs_phase s) })"

abbreviation hs_get_roots_done_syn :: "location  (('field, 'mut, 'payload, 'ref) local_state  'ref set)  ('field, 'mut, 'payload, 'ref) gc_com" (_ hs'_get'_roots'_done'_)  where
  "l hs_get_roots_done_ wl  l Request (λs. (mutator m, ro_hs_mut_done (wl s)))
                                           (λ_ s. {s W := {}, ghost_hs_phase := hs_step (ghost_hs_phase s) })"

abbreviation hs_get_work_done_syn :: "location  (('field, 'mut, 'payload, 'ref) local_state  'ref set)  ('field, 'mut, 'payload, 'ref) gc_com" (_ hs'_get'_work'_done)  where
  "l hs_get_work_done wl  l Request (λs. (mutator m, ro_hs_mut_done (wl s)))
                                         (λ_ s. {s W := {} })"

definition
  handshake :: "('field, 'mut, 'payload, 'ref) gc_com"
where
  "handshake =
      ''hs_load_pending'' hs_load_pending_ ;;
      ''hs_pending'' IF mutator_hs_pending
      THEN
        ''hs_mfence'' MFENCE ;;
        ''hs_load_ht'' hs_load_type ;;
        ''hs_noop'' IF hs_type = ht_NOOP
        THEN
          ''hs_noop_done'' hs_noop_done_
        ELSE ''hs_get_roots'' IF hs_type = ht_GetRoots
        THEN
          ''hs_get_roots_refs'' ´refs := ´roots ;;
          ''hs_get_roots_loop'' WHILE ¬EMPTY refs DO
            ''hs_get_roots_loop_choose_ref'' ´ref :∈ Some ` ´refs ;;
            ''hs_get_roots_loop'' mark_object ;;
            ''hs_get_roots_loop_done'' ´refs := (´refs - {the ´ref})
          OD ;;
          ''hs_get_roots_done'' hs_get_roots_done_ W
        ELSE ''hs_get_work'' IF hs_type = ht_GetWork
        THEN
          ''hs_get_work_done'' hs_get_work_done W
        FI FI FI
      FI"

end

text‹

The garbage collector's side of the interface.

›

context gc
begin

abbreviation set_hs_type :: "location  hs_type  ('field, 'mut, 'payload, 'ref) gc_com" (_ set'_hs'_type)  where
  "l set_hs_type ht  l Request (λs. (gc, ro_hs_gc_store_type ht)) (λ_ s. {s})"

abbreviation set_hs_pending :: "location  (('field, 'mut, 'payload, 'ref) local_state  'mut)  ('field, 'mut, 'payload, 'ref) gc_com" (_ set'_hs'_pending)  where
  "l set_hs_pending m  l Request (λs. (gc, ro_hs_gc_store_pending (m s))) (λ_ s. {s})"

abbreviation load_W :: "location  ('field, 'mut, 'payload, 'ref) gc_com" (_ load'_W) where
  "l load_W  l @ ''_load_W'' Request (λs. (gc, ro_hs_gc_load_W))
                                          (λresp s. {sW := W' |W'. resp = mv_Refs W'})"

abbreviation mfence :: "location  ('field, 'mut, 'payload, 'ref) gc_com" (_ MFENCE)  where
  "l MFENCE  l Request (λs. (gc, ro_MFENCE)) (λ_ s. {s})"

definition
  handshake_init :: "location  hs_type  ('field, 'mut, 'payload, 'ref) gc_com" (_ handshake'_init)
where
  "l handshake_init req =
     l @ ''_init_type'' set_hs_type req ;;
     l @ ''_init_muts'' ´muts := UNIV ;;
     l @ ''_init_loop'' WHILE ¬ (EMPTY muts) DO
       l @ ''_init_loop_choose_mut'' ´mut :∈ ´muts ;;
       l @ ''_init_loop_set_pending'' set_hs_pending mut ;;
       l @ ''_init_loop_done'' ´muts := (´muts - {´mut})
     OD"

definition
  handshake_done :: "location  ('field, 'mut, 'payload, 'ref) gc_com" (_ handshake'_done)
where
  "l handshake_done =
     l @ ''_done_muts'' ´muts := UNIV ;;
     l @ ''_done_loop'' WHILE ¬EMPTY muts DO
       l @ ''_done_loop_choose_mut'' ´mut :∈ ´muts ;;
       l @ ''_done_loop_rendezvous'' Request
               (λs. (gc, ro_hs_gc_load_pending (mut s)))
               (λmv s. { s muts := muts s - { mut s |done. mv = mv_Bool done  done } })
     OD"

definition
  handshake_noop :: "location  ('field, 'mut, 'payload, 'ref) gc_com" (_ handshake'_noop)
where
  "l handshake_noop =
         l @ ''_mfence'' MFENCE ;;
         l handshake_init ht_NOOP ;;
         l handshake_done"

definition
  handshake_get_roots :: "location  ('field, 'mut, 'payload, 'ref) gc_com" (_ handshake'_get'_roots)
where
  "l handshake_get_roots =
         l handshake_init ht_GetRoots ;;
         l handshake_done ;;
         l load_W"

definition
  handshake_get_work :: "location  ('field, 'mut, 'payload, 'ref) gc_com" (_ handshake'_get'_work)
where
  "l handshake_get_work =
         l handshake_init ht_GetWork ;;
         l handshake_done ;;
         l load_W"

end


subsection‹The system process›

text ‹

The system process models the environment in which the garbage
collector and mutators execute.  We translate the x86-TSO memory model
due to citet"DBLP:journals/cacm/SewellSONM10"
into a CIMP process. It is a reactive system: it receives requests and
returns values, but initiates no communication itself. It can,
however, autonomously commit a store pending in a TSO store buffer.

The memory bus can be locked by atomic compare-and-swap (\texttt{CAS})
instructions (and others in general). A processor is not blocked
(i.e., it can read from memory) when it holds the lock, or no-one
does.

›

definition
  not_blocked :: "('field, 'mut, 'payload, 'ref) local_state  'mut process_name  bool"
where
  "not_blocked s p = (case mem_lock s of None  True | Some p'  p = p')"

text‹

We compute the view a processor has of memory by applying all its
pending stores.

›

definition
  do_store_action :: "('field, 'payload, 'ref) mem_store_action  ('field, 'mut, 'payload, 'ref) local_state  ('field, 'mut, 'payload, 'ref) local_state"
where
  "do_store_action wact =
     (λs. case wact of
       mw_Mark r gc_mark     sheap := (heap s)(r := map_option (λobj. objobj_mark := gc_mark) (heap s r))
     | mw_Mutate r f new_r   sheap := (heap s)(r := map_option (λobj. objobj_fields := (obj_fields obj)(f := new_r) ) (heap s r))
     | mw_Mutate_Payload r f pl  sheap := (heap s)(r := map_option (λobj. objobj_payload := (obj_payload obj)(f := pl) ) (heap s r))
     | mw_fM gc_mark         sfM := gc_mark
     | mw_fA gc_mark         sfA := gc_mark
     | mw_Phase gc_phase     sphase := gc_phase)"

definition
  fold_stores :: "('field, 'payload, 'ref) mem_store_action list  ('field, 'mut, 'payload, 'ref) local_state  ('field, 'mut, 'payload, 'ref) local_state"
where
  "fold_stores ws = fold (λw. (∘) (do_store_action w)) ws id"

abbreviation
  processors_view_of_memory :: "'mut process_name  ('field, 'mut, 'payload, 'ref) local_state  ('field, 'mut, 'payload, 'ref) local_state"
where
  "processors_view_of_memory p s  fold_stores (mem_store_buffers s p) s"

definition
  do_load_action :: "('field, 'ref) mem_load_action
                    ('field, 'mut, 'payload, 'ref) local_state
                    ('field, 'payload, 'ref) response"
where
  "do_load_action ract =
     (λs. case ract of
       mr_Ref r f  mv_Ref (Option.bind (heap s r) (λobj. obj_fields obj f))
     | mr_Payload r f  mv_Payload (Option.bind (heap s r) (λobj. obj_payload obj f))
     | mr_Mark r   mv_Mark (map_option obj_mark (heap s r))
     | mr_Phase    mv_Phase (phase s)
     | mr_fM       mv_Mark (Some (fM s))
     | mr_fA       mv_Mark (Some (fA s)))"

definition
  sys_load :: "'mut process_name
               ('field, 'ref) mem_load_action
               ('field, 'mut, 'payload, 'ref) local_state
               ('field, 'payload, 'ref) response"
where
  "sys_load p ract = do_load_action ract  processors_view_of_memory p"

context sys
begin

text‹

The semantics of TSO memory following citet‹\S3› in "DBLP:journals/cacm/SewellSONM10". This differs
from the earlier citet"DBLP:conf/tphol/OwensSS09" by allowing the TSO lock to be taken by a
process with a non-empty store buffer. We omit their treatment of
registers; these are handled by the local states of the other
processes. The system can autonomously take the oldest store in the
store buffer for processor p› and commit it to memory,
provided p› either holds the lock or no processor does.

›

definition
  mem_TSO :: "('field, 'mut, 'payload, 'ref) gc_com"
where
  "mem_TSO =
        ''tso_load'' Response (λreq s. { (s, sys_load p mr s)
                                         |p mr. req = (p, ro_Load mr)  not_blocked s p })
       ''tso_store'' Response (λreq s. { (s mem_store_buffers := (mem_store_buffers s)(p := mem_store_buffers s p @ [w]) , mv_Void)
                                          |p w. req = (p, ro_Store w) })
       ''tso_mfence'' Response (λreq s. { (s, mv_Void)
                                           |p. req = (p, ro_MFENCE)  mem_store_buffers s p = [] })
       ''tso_lock'' Response (λreq s. { (s mem_lock := Some p , mv_Void)
                                         |p. req = (p, ro_Lock)  mem_lock s = None })
       ''tso_unlock'' Response (λreq s. { (s mem_lock := None , mv_Void)
                                         |p. req = (p, ro_Unlock)  mem_lock s = Some p  mem_store_buffers s p = [] })
       ''tso_dequeue_store_buffer'' LocalOp (λs. { (do_store_action w s) mem_store_buffers := (mem_store_buffers s)(p := ws) 
                                                    | p w ws. mem_store_buffers s p = w # ws  not_blocked s p  p  sys })"

text‹

We track which references are allocated using the domain of @{const
"heap"}.

\label{sec:sys_alloc}

For now we assume that the system process magically allocates and
deallocates references.

We also arrange for the object to be marked atomically (see
\S\ref{sec:mut_alloc}) which morally should be done by the mutator. In
practice allocation pools enable this kind of atomicity (wrt the sweep
loop in the GC described in \S\ref{sec:gc-model-gc}).

Note that the \texttt{abort} in citet‹Figure~2.9: Alloc› in "Pizlo201xPhd" means the atomic
fails and the mutator can revert to activity outside of
\texttt{Alloc}, avoiding deadlock. We instead signal the exhaustion of
the heap explicitly, i.e., the @{const "ro_Alloc"} action cannot fail.

›

definition
  alloc :: "('field, 'mut, 'payload, 'ref) gc_com"
where
  "alloc = ''alloc'' Response (λreq s.
      if dom (heap s) = UNIV
      then {(s, mv_Ref None) |_::unit. snd req = ro_Alloc }
      else { ( s heap := (heap s)(r := Some  obj_mark = fA s, obj_fields = Map.empty, obj_payload = Map.empty ) , mv_Ref (Some r) )
           |r. r  dom (heap s)  snd req = ro_Alloc })"

text‹

References are freed by removing them from @{const "heap"}.

›

definition
  free :: "('field, 'mut, 'payload, 'ref) gc_com"
where
  "free = ''sys_free'' Response (λreq s.
      { (sheap := (heap s)(r := None), mv_Void) |r. snd req = ro_Free r })"

text‹

The top-level system process.

›

definition
  com :: "('field, 'mut, 'payload, 'ref) gc_com"
where
  "com =
    LOOP DO
        mem_TSO
       alloc
       free
       handshake
    OD"

end


subsection‹Mutators›

text‹

The mutators need to cooperate with the garbage collector. In
particular, when the garbage collector is not idle the mutators use a
\emph{write barrier} (see \S\ref{sec:gc-marking}).

The local state for each mutator tracks a working set of references,
which abstracts from how the process's registers and stack are
traversed to discover roots.

›

context mut_m
begin

text‹

\label{sec:mut_alloc}

Allocation is defined in citet‹Figure~2.9› in "Pizlo201xPhd". See \S\ref{sec:sys_alloc}
for how we abstract it.

›

abbreviation alloc :: "('field, 'mut, 'payload, 'ref) gc_com" where
  "alloc 
    ''alloc'' Request (λs. (mutator m, ro_Alloc))
                        (λmv s. { s roots := roots s  set_option opt_r  |opt_r. mv = mv_Ref opt_r })"

text‹

The mutator can always discard any references it holds.

›

abbreviation discard :: "('field, 'mut, 'payload, 'ref) gc_com" where
  "discard 
    ''discard_refs'' LocalOp (λs. { s roots := roots'  |roots'. roots'  roots s })"

text‹

Load and store are defined in citet‹Figure~2.9› in "Pizlo201xPhd".

Dereferencing a reference can increase the set of mutator roots.

›

abbreviation load :: "('field, 'mut, 'payload, 'ref) gc_com" where
  "load 
    ''mut_load_choose'' LocalOp (λs. { s tmp_ref := r, field := f  |r f. r  roots s }) ;;
    ''mut_load'' Request (λs. (mutator m, LoadRef (tmp_ref s) (field s)))
                           (λmv s. { s roots := roots s  set_option r 
                                   |r. mv = mv_Ref r })"

text‹

\label{sec:write-barriers}

Storing a reference involves marking both the old and new references,
i.e., both \emph{insertion} and \emph{deletion} barriers are
installed. The deletion barrier preserves the \emph{weak tricolour
invariant}, and the insertion barrier preserves the \emph{strong
tricolour invariant}; see \S\ref{sec:strong-tricolour-invariant} for
further discussion.

Note that the the mutator reads the overwritten reference but does not
store it in its roots.

›

abbreviation
  mut_deref :: "location
           (('field, 'mut, 'payload, 'ref) local_state  'ref)
           (('field, 'mut, 'payload, 'ref) local_state  'field)
           (('ref option  'ref option)  ('field, 'mut, 'payload, 'ref) local_state  ('field, 'mut, 'payload, 'ref) local_state)  ('field, 'mut, 'payload, 'ref) gc_com" (_ deref)
where
  "l deref r f upd  l Request (λs. (mutator m, LoadRef (r s) (f s)))
                                   (λmv s. { upd opt_r' (sghost_honorary_root := set_option opt_r') |opt_r'. mv = mv_Ref opt_r' })"

(*
Does not work in local theory mode:

syntax
  "_mut_fassign" :: "location ⇒ idt ⇒ 'ref ⇒ 'field ⇒ ('field, 'mut, 'payload, 'ref) gc_com" ("⦃_⦄ ´_ := ´_ → _" [0, 0, 70] 71)
translations
  "⦃l⦄ ´q := ´r→f"    => "CONST mut_deref l r «f» (_update_name q)"

 ´ref := ´tmp_ref→´field ;;
*)

abbreviation
  store_ref :: "location
               (('field, 'mut, 'payload, 'ref) local_state  'ref)
               (('field, 'mut, 'payload, 'ref) local_state  'field)
               (('field, 'mut, 'payload, 'ref) local_state  'ref option)
               ('field, 'mut, 'payload, 'ref) gc_com" (_ store'_ref)
where
  "l store_ref r f r'  l Request (λs. (mutator m, StoreRef (r s) (f s) (r' s))) (λ_ s. {sghost_honorary_root := {}})"

definition
  store :: "('field, 'mut, 'payload, 'ref) gc_com"
where
  "store =
     ― ‹Choose vars for ref→field := new_ref›
     ''store_choose'' LocalOp (λs. { s tmp_ref := r, field := f, new_ref := r' 
                                     |r f r'. r  roots s  r'  Some ` roots s  {None} }) ;;
     ― ‹Mark the reference we're about to overwrite. Does not update roots.›
     ''deref_del'' deref tmp_ref field ref_update ;;
     ''store_del'' mark_object ;;
     ― ‹Mark the reference we're about to insert.›
     ''lop_store_ins'' ´ref := ´new_ref ;;
     ''store_ins'' mark_object ;;
     ''store_ins'' store_ref tmp_ref field new_ref"

text‹

Load and store payload data.

›

abbreviation load_payload :: "('field, 'mut, 'payload, 'ref) gc_com" where
  "load_payload 
    ''mut_load_payload_choose'' LocalOp (λs. { s tmp_ref := r, field := f  |r f. r  roots s }) ;;
    ''mut_load_payload'' Request (λs. (mutator m, LoadPayload (tmp_ref s) (field s)))
                                   (λmv s. { s mutator_data := (mutator_data s)(var := pl) 
                                           |var pl. mv = mv_Payload pl })"

abbreviation store_payload :: "('field, 'mut, 'payload, 'ref) gc_com" where
  "store_payload 
    ''mut_store_payload_choose'' LocalOp (λs. { s tmp_ref := r, field := f, payload_value := pl s  |r f pl. r  roots s }) ;;
    ''mut_store_payload'' Request (λs. (mutator m, StorePayload (tmp_ref s) (field s) (payload_value s)))
                                   (λmv s. { s mutator_data := (mutator_data s)(f := pl) 
                                           |f pl. mv = mv_Payload pl })"

text‹

A mutator makes a non-deterministic choice amongst its possible
actions. For completeness we allow mutators to issue \texttt{MFENCE}
instructions. We leave \texttt{CAS} (etc) to future work. Neither has
a significant impact on the rest of the development.

›
(*
FIXME add SKIP before alloc, mfence. handshake needs work too: want to
commit to checking for handshakes in a strongly fair way. A SKIP
at the top level of ‹handshake› + a noop transition + appropriate fairness constraints might work.

FIXME is mut local computation strong enough? only works on mutator data; not roots.
*)

definition
  com :: "('field, 'mut, 'payload, 'ref) gc_com"
where
  "com =
    LOOP DO
        ''mut_local_computation'' LocalOp (λs. {smutator_data := f (mutator_data s) |f. True})
       alloc
       discard
       load
       store
       load_payload
       store_payload
       ''mut_mfence'' MFENCE
       handshake
    OD"

end


subsection ‹Garbage collector \label{sec:gc-model-gc}›

text‹

We abstract the primitive actions of the garbage collector thread.

›

abbreviation
  gc_deref :: "location
              (('field, 'mut, 'payload, 'ref) local_state  'ref)
              (('field, 'mut, 'payload, 'ref) local_state  'field)
              (('ref option  'ref option)  ('field, 'mut, 'payload, 'ref) local_state  ('field, 'mut, 'payload, 'ref) local_state)  ('field, 'mut, 'payload, 'ref) gc_com"
where
  "gc_deref l r f upd  l Request (λs. (gc, LoadRef (r s) (f s)))
                                    (λmv s. { upd r' s |r'. mv = mv_Ref r' })"

abbreviation
  gc_load_mark :: "location
                 (('field, 'mut, 'payload, 'ref) local_state  'ref)
                 ((gc_mark option  gc_mark option)  ('field, 'mut, 'payload, 'ref) local_state  ('field, 'mut, 'payload, 'ref) local_state)
                 ('field, 'mut, 'payload, 'ref) gc_com"
where
  "gc_load_mark l r upd  l Request (λs. (gc, LoadMark (r s))) (λmv s. { upd m s |m. mv = mv_Mark m })"

syntax
  "_gc_fassign" :: "location  idt  'ref  'field  ('field, 'mut, 'payload, 'ref) gc_com" (_ ´_ := ´_  _› [0, 0, 0, 70] 71)
  "_gc_massign" :: "location  idt  'ref  ('field, 'mut, 'payload, 'ref) gc_com" (_ ´_ := ´_  flag [0, 0, 0] 71)
syntax_consts
  "_gc_fassign"  gc_deref and
  "_gc_massign"  gc_load_mark
translations
  "l ´q := ´rf"    => "CONST gc_deref l r «f» (_update_name q)"
  "l ´m := ´rflag" => "CONST gc_load_mark l r (_update_name m)"

context gc
begin

abbreviation store_fA_syn :: "location  (('field, 'mut, 'payload, 'ref) local_state  gc_mark)  ('field, 'mut, 'payload, 'ref) gc_com" (_ store'_fA) where
  "l store_fA f  l Request (λs. (gc, StorefA (f s))) (λ_ s. {s})"

abbreviation load_fM_syn :: "location  ('field, 'mut, 'payload, 'ref) gc_com" (_ load'_fM) where
  "l load_fM  l Request (λs. (gc, LoadfM)) (λmv s. { sfM := m |m. mv = mv_Mark (Some m) })"

abbreviation store_fM_syn :: "location  ('field, 'mut, 'payload, 'ref) gc_com" (_ store'_fM) where
  "l store_fM  l Request (λs. (gc, StorefM (fM s))) (λ_ s. {s})"

abbreviation store_phase_syn :: "location  gc_phase  ('field, 'mut, 'payload, 'ref) gc_com" (_ store'_phase) where
  "l store_phase ph  l Request (λs. (gc, StorePhase ph)) (λ_ s. {s phase := ph })"

abbreviation mark_object_syn :: "location  ('field, 'mut, 'payload, 'ref) gc_com" (_ mark'_object) where
  "l mark_object  mark_object_fn gc l"

abbreviation free_syn :: "location  (('field, 'mut, 'payload, 'ref) local_state  'ref)  ('field, 'mut, 'payload, 'ref) gc_com" (_ free) where
  "l free r  l Request (λs. (gc, ro_Free (r s))) (λ_ s. {s})"

text‹

The following CIMP program encodes the garbage collector algorithm
proposed in Figure~2.15 of citet"Pizlo201xPhd".

›

definition (in gc)
  com :: "('field, 'mut, 'payload, 'ref) gc_com"
where
  "com =
     LOOP DO
       ''idle_noop'' handshake_noop ;; ― ‹hp_Idle›

       ''idle_load_fM'' load_fM ;;
       ''idle_invert_fM'' ´fM := (¬ ´fM) ;;
       ''idle_store_fM'' store_fM ;;

       ''idle_flip_noop'' handshake_noop ;; ― ‹hp_IdleInit›

       ''idle_phase_init'' store_phase ph_Init ;;

       ''init_noop'' handshake_noop ;; ― ‹hp_InitMark›

       ''init_phase_mark'' store_phase ph_Mark ;;
       ''mark_load_fM'' load_fM ;;
       ''mark_store_fA'' store_fA fM ;;

       ''mark_noop'' handshake_noop ;; ― ‹hp_Mark›

       ''mark_loop_get_roots'' handshake_get_roots ;; ― ‹hp_IdleMarkSweep›

       ''mark_loop'' WHILE ¬EMPTY W DO
         ''mark_loop_inner'' WHILE ¬EMPTY W DO
           ''mark_loop_choose_ref'' ´tmp_ref :∈ ´W ;;
           ''mark_loop_fields'' ´field_set := UNIV ;;
           ''mark_loop_mark_object_loop'' WHILE ¬EMPTY field_set DO
             ''mark_loop_mark_choose_field'' ´field :∈ ´field_set ;;
             ''mark_loop_mark_deref'' ´ref := ´tmp_ref´field ;;
             ''mark_loop'' mark_object ;;
             ''mark_loop_mark_field_done'' ´field_set := (´field_set - {´field})
           OD ;;
           ''mark_loop_blacken'' ´W := (´W - {´tmp_ref})
         OD ;;
         ''mark_loop_get_work'' handshake_get_work
       OD ;;

       ― ‹sweep›

       ''mark_end'' store_phase ph_Sweep ;;
       ''sweep_load_fM'' load_fM ;;
       ''sweep_refs'' ´refs := UNIV ;;
       ''sweep_loop'' WHILE ¬EMPTY refs DO
         ''sweep_loop_choose_ref'' ´tmp_ref :∈ ´refs ;;
         ''sweep_loop_load_mark'' ´mark := ´tmp_refflag ;;
         ''sweep_loop_check'' IF ¬NULL mark  the  mark  fM THEN
           ''sweep_loop_free'' free tmp_ref
         FI ;;
         ''sweep_loop_ref_done'' ´refs := (´refs - {´tmp_ref})
       OD ;;
       ''sweep_idle'' store_phase ph_Idle
     OD"

end

primrec
  gc_coms :: "'mut process_name  ('field, 'mut, 'payload, 'ref) gc_com"
where
  "gc_coms (mutator m) = mut_m.com m"
| "gc_coms gc = gc.com"
| "gc_coms sys = sys.com"
(*<*)

end
(*>*)