Theory Language

(*File: Language.thy
  Author: L Beringer & M Hofmann, LMU Munich
  Date: 05/12/2008
  Purpose: Syntax and operational semantics of subset of JVML 
*)
(*<*)
theory Language imports AssocLists begin
(*>*)

section‹Language \label{sec:language}›
subsection‹Syntax›

text‹We have syntactic classes of (local) variables, class names,
field names, and method names. Naming restrictions, namespaces, long
Java names etc.~are not modelled.›

typedecl Var
typedecl Class
typedecl Field
typedecl Method

text‹Since arithmetic operations are modelled as unimplemented
functions, we introduce the type of values in this section. The domain
of heap locations is arbitrary.›

typedecl Addr 

text‹A reference is either null or an address.›

datatype Ref = Nullref | Loc Addr

text‹Values are either integer numbers or references.›

datatype Val = RVal Ref | IVal int

text‹The type of (instruction) labels is fixed, since the operational
semantics increments the program counter after each instruction.›

type_synonym Label = int

text‹Regarding the instructions, we support basic operand-stack
manipulations, object creation, field modifications, casts, static
method invocations, conditional and unconditional jumps, and a return
instruction.

For every (Isabelle) function f : Val⇒Val⇒Val› we have an
instruction binop f› whose semantics is to invoke f›
on the two topmost values on the operand stack and replace them with
the result.  Similarly for unop f›.›

datatype Instr =
  const Val
| dup
| pop
| swap
| load Var
| store Var
| binop "Val  Val  Val"
| unop "Val  Val" 
| new Class
| getfield Class Field
| putfield Class Field
| checkcast Class
| invokeS Class Method
| goto Label
| iftrue Label
| vreturn 

text‹Method body declarations contain a list of formal parameters, a
mapping from instruction labels to instructions, and a start
label. The operational semantics assumes that instructions are
labelled consecutively\footnote{In the paper, we slightly abstract
from this by including a successor functions on labels}.›

type_synonym Mbody = "Var list × (Label, Instr) AssList × Label" 

text‹A class definition associates method bodies to method names.›
type_synonym Classdef = "(Method, Mbody) AssList"

text‹Finally, a program consists of classes.›
type_synonym Prog = "(Class, Classdef) AssList"

text‹Taken together, the three types Prog›, Classdef›,
and Mbody› represent an abstract model of the virtual machine
environment. In our opinion, it would be desirable to avoid modelling
this environment at a finer level, at least for the purpose of the
program logic. For example, we prefer not to consider in detail the
representation of the constant pool.›

subsection‹Dynamic semantics›
subsubsection‹Semantic components›

text‹An object consists of the identifier of its dynamic class and a
map from field names to values. Currently, we do not model
type-correctness, nor do we require that all (or indeed any) of the
fields stem from the static definition of the class, or a super-class.
Note, however, that type correctness can be expressed in the logic.›

type_synonym Object = "Class × (Field, Val) AssList"

text‹The heap is represented as a map from addresses to values.  The
JVM specification does not prescribe any particular object layout. The
proposed type reflects this indeterminacy, but allows one to calculate
the byte-correct size of a heap only after a layout scheme has been
supplied. Alternative heap models would be the store-less semantics in
the sense of Jonkers~cite"Jonkers1981" and
Deutsch~cite"Deutsch1992", (where the heap is modelled as a partial
equivalence relation on access paths), or object-based semantics in
the sense of Reddy~cite"Reddy1996", where the heap is represented as
a history of update operations.  H\"ahnle et al.~use a variant of the
latter in their dynamic logic for a {\sc
JavaCard}~~cite"HaehnleM:Cassis2005".›

type_synonym Heap = "(Addr, Object) AssList"

text‹Later, one might extend heaps by a component for static fields.›

text‹The types of the (register) store and the operand stack are as
expected.›

type_synonym Store = "(Var, Val) AssList"
type_synonym OpStack = "Val list"

text‹States contain an operand stack, a store, and a heap.›
type_synonym State = "OpStack × Store × Heap"

definition heap::"State  Heap"
where "heap s = snd(snd s)"

text‹The operational semantics and the program logic are defined
relative to a fixed program P›.  Alternatively, the type of the
operational semantics (and proof judgements) could be extended by a
program component.  We also define the constant value TRUE›,
the representation of which does not matter for the current
formalisation.›

axiomatization P::Prog and TRUE::Val

text‹In order to obtain more readable rules, we define operations
for extracting method bodies and instructions from the program.›

definition mbody_is::"Class  Method  Mbody  bool"
where "mbody_is C m M = ( CD . PC = Some CD  CDm = Some M)"

definition get_ins::"Mbody  Label  Instr option"
where "get_ins M l = (fst(snd M))l"

definition ins_is::"Class  Method  Label  Instr  bool"
where "ins_is C m l ins = ( M . mbody_is C m M  get_ins M l = Some ins)"

text‹The transfer of method arguments from the caller's operand stack
to the formal parameters of an invoked method is modelled by the
predicate›

inductive_set Frame::"(OpStack × (Var list) × Store × OpStack) set"
where
FrameNil: "oo=ops  (ops,[],emp,oo) : Frame"
|
Frame_cons: "(oo,par,S,ops) : Frame; R =S[xv]
             (v # oo, x # par,R,ops):Frame"

(*<*)
lemma Frame_deterministic[rule_format]:
"(ops, par, S, os)  Frame  
( R opsa . (ops, par, R, opsa)  Frame  R=S  opsa = os)"
apply (erule Frame.induct, clarsimp)
apply (erule Frame.cases, clarsimp, clarsimp)
apply (erule thin_rl, clarsimp)
apply (erule Frame.cases, clarsimp, clarsimp)
done
(*>*)

text‹In order to obtain a deterministic semantics, we assume the
existence of a function, with the obvious freshness axiom for this
construction.›

axiomatization nextLoc::"Heap  Addr"
where nextLoc_fresh: "h(nextLoc h) = None"

subsubsection‹Operational judgements› 

text‹Similar to Bannwart-M\"uller~cite"BannwartMueller05", we define
two operational judgements: a one-step relation and a relation that
represents the transitive closure of the former until the end of the
current method invocation. These relations are mutually recursive,
since the method invocation rule contracts the execution of the
invoked method to a single step. The one-step relation associates a
state to its immediate successor state, where the program counter is
interpreted with respect to the current method body. The transitive
closure ignores the bottom part of the operand stack and the store of
the final configuration. It simply returns the heap and the result of
the method invocation, where the latter is given by the topmost value
on the operand stack. In contrast to~cite"BannwartMueller05", we do
not use an explicit return› variable. Both relations take an
additional index of type nat› that monitors the derivation
height. This is useful in the proof of soundness of the program
logic.›

text‹Intuitively, (M,l,s,n,l',s'):Step› means that method
(body) M› evolves in one step from state s› to state
s'›, while statement (M,s,n,h,v):Exec› indicates that
executing from s› in method M› leads eventually to a
state whose final value is h›, where precisely the last step in
this sequence is a vreturn› instruction and the return value is
v›.›

text‹Like Bannwart and M\"uller, we define a "frame-less"
semantics. i.e.~the execution of a method body is modelled by a
transitive closure of the basic step-relation, which results in a
one-step reduction at the invocation site. Arguably, an operational
semantics with an explicit frame stack is closer to the real JVM. It
should not be difficult to verify the operational soundness of the
present system w.r.t.~such a finer model, or to modify the
semantics.›

inductive_set
  Step::"(Mbody × Label × State × nat × Label × State) set"
and
  Exec::"(Mbody × Label × State × nat × Heap × Val) set"
where
Const:"get_ins M l = Some (const v); NEXT = (v # os,s,h); ll=l+1
        (M,l,(os,s,h), 1, ll, NEXT) : Step"
|
Dup:  "get_ins M l = Some dup; NEXT = (v # v # os,s,h); ll =l+1
        (M,l,(v # os,s,h), 1, ll, NEXT) : Step"
|
Pop:  "get_ins M l = Some pop; NEXT = (os,s,h); ll=l+1
        (M,l,(v # os,s,h), 1, ll, NEXT) : Step"
|
Swap: "get_ins M l = Some swap; NEXT = (w # (v # os),s,h); ll= l+1
        (M,l,(v # (w # os),s,h), 1, ll, NEXT) : Step"
|
Load: "get_ins M l = Some (load x); sx = Some v;
         NEXT = (v # os,s,h); ll=l+1
        (M,l,(os,s,h), 1, ll,NEXT) : Step"
|
Store:"get_ins M l = Some (store x); NEXT = (os,s[xv],h); ll= l+1
        (M,l,(v # os,s,h), 1, ll, NEXT) : Step"
|
Binop:"get_ins M l = Some (binop f); NEXT = ((f v w) # os,s,h); ll=l+1
        (M,l,(v # (w # os),s,h), 1, ll,NEXT) : Step"
|
Unop: "get_ins M l = Some (unop f); NEXT = ((f v) # os,s,h);ll=l+1
        (M,l,(v # os,s,h), 1, ll, NEXT) : Step"
|
New:  "get_ins M l = Some (new d); newobj = (d, emp); a=nextLoc h; 
         NEXT = ((RVal (Loc a)) # os,s,h[anewobj]); ll = l+1
        (M,l,(os,s,h), 1, ll,NEXT) : Step"
|
Get:  "get_ins M l = Some (getfield d F); ha = Some (d, Flds);
         FldsF = Some v; NEXT = (v # os,s,h); ll=l+1
        (M,l,((RVal (Loc a)) # os,s,h), 1, ll,NEXT) : Step"
|
Put:  "get_ins M l = Some (putfield d F); ha = Some (d, Flds);
         newobj = (d, Flds[Fv]); NEXT = (os,s,h[anewobj]); ll=l+1
        (M,l,(v # ((RVal (Loc a)) # os),s,h), 1, ll, NEXT) : Step"
|
Cast: "get_ins M l = Some (checkcast d); ha = Some (d, Flds);
         NEXT = ((RVal (Loc a)) # os,s,h); ll=l+1
        (M,l,((RVal (Loc a)) # os,s,h), 1, ll,NEXT) : Step"
|
Goto: "get_ins M l = Some (goto pc)  (M,l,S, 1, pc,S) : Step"
|
IfT:  "get_ins M l = Some (iftrue pc); NEXT = (os,s,h)
        (M,l,(TRUE # os,s,h), 1, pc, NEXT) : Step"
|
IfF:  "get_ins M l = Some (iftrue pc); v  TRUE; NEXT = (os,s,h); ll=l+1
        (M,l,(v # os,s,h), 1, ll, NEXT) : Step"
|
InvS: "get_ins M l = Some (invokeS C m); mbody_is C m (par,code,l0);
         ((par,code,l0),l0,([], S,h), n, hh, v): Exec; 
         (ops,par,S,os) : Frame; NEXT = (v # os,s,hh); ll = l+1
        (M,l,(ops,s,h), Suc n, ll, NEXT) : Step"
|
Vret: "get_ins M l = Some vreturn  (M,l,(v # os,s,h), 1, h, v) : Exec"
|
Run:  "(M,l,s,n,ll,t):Step; (M,ll,t,m,h,v):Exec; k = (max n m) +1 
        (M,l,s,k,h,v) : Exec"

text‹A big-step operational judgement that abstracts from the
derivation height is easily defined.›

definition Opsem::"Mbody  Label  State  Heap  Val  bool"
where "Opsem M l s h v = ( n . (M,l,s,n,h,v):Exec)"

subsection ‹Basic properties›

text ‹We provide elimination lemmas for the inductively defined
relations›

inductive_cases eval_cases: 
 "(M,l,s,n,ll,t) : Step"
 "(M,l,s,n,h,v) : Exec"
(*<*)
lemma no_zero_height_derivsAux[rule_format]: 
"n . ((M,l,s,n,ll,t) : Step  (n=0  False))  ((MM,lll,ss,m,h,v):Exec  (m=0  False))"
by (rule allI, rule Step_Exec.induct, simp_all)

lemma no_zero_height_derivsAux2: "((M,l,s,0,ll,t):Step  False)  ((MM,lll,ss,0,h,v):Exec  False)"
by (insert no_zero_height_derivsAux, fast)
(*>*)
text ‹and observe that no derivations of height 0 exist.›
lemma no_zero_height_Step_derivs: "(M,l,s,0,ll,t):Step  False"
(*<*)by (insert no_zero_height_derivsAux2, fast)(*>*)
(*<*)
lemma no_zero_height_Step_derivs1: "(M,l,(os,S,H),0,ll,t):Step  False"
by (insert no_zero_height_derivsAux2, fast)
(*>*)

lemma no_zero_height_Exec_derivs: "(M,l,s,0,h,v):Exec  False"
(*<*)by (insert no_zero_height_derivsAux2, fast)(*>*)
(*<*)
lemma no_zero_height_Exec_derivs1: "(M,l,(os,S,H),0,h,v):Exec  False"
by (insert no_zero_height_derivsAux2, fast)
(*>*)

(*<*)
(*Elimination rules*)
lemma ConstElim1:"(M, l, (os, S, h), n, ll,t)  Step; get_ins M l = Some (const v) 
                n = Suc 0  t = (v # os, S, h)  ll = l+1"
by (erule eval_cases, simp_all)

lemma DupElim1: "(M, l, (os, S, h), n, ll, t)  Step; get_ins M l =  Some dup 
                 v ops . os = v # ops  n = Suc 0  t = (v # os, S, h)  ll = l+1"
by (erule eval_cases, simp_all)

lemma PopElim1: "(M, l, (os, S, h), n, ll, t)  Step; get_ins M l =  Some pop 
                 v ops . os = v # ops  n = Suc 0  t = (ops, S, h)  ll = l+1"
by (erule eval_cases, simp_all)

lemma SwapElim1: "(M, l, (os, S, h), n, ll, t)  Step; get_ins M l = Some swap
                v w ops . os = v # w # ops  n = Suc 0  t = (w # v # ops, S, h)  ll = l+1"
by (erule eval_cases, simp_all)

lemma LoadElim1: "(M, l, (os, S, h), n, ll, t)  Step; get_ins M l = Some (load x)
                   v . Sx = Some v  n = Suc 0  t = (v # os, S, h)  ll = l+1"
by (erule eval_cases, simp_all)

lemma StoreElim1: "(M, l, (os, S, h), n, ll, t)  Step; get_ins M l = Some (store x)
                    v ops . os = v # ops  n = Suc 0  t = (ops, S[xv], h)  ll = l+1"
by (erule eval_cases, simp_all)

lemma BinopElim1: "(M, l, (os, S, h), n, ll, t)  Step; get_ins M l = Some (binop f)
                    v w ops . os = v # w # ops  n = Suc 0  t = (f v w # ops, S, h)  ll = l+1"
by (erule eval_cases, simp_all) 

lemma UnopElim1: "(M, l, (os, S, h), n, ll, t)  Step; get_ins M l = Some (unop f)
                   v ops . os = v # ops  n = Suc 0  t = (f v # ops, S, h)  ll = l+1"
by (erule eval_cases, simp_all)

lemma NewElim1: "(M, l, (os, S, h), n, ll, t)  Step; get_ins M l = Some (new d)
                 a . a = nextLoc h  n = Suc 0  t = (RVal (Loc a) # os, S, h[a(d, emp)])  ll = l+1"
by (erule eval_cases, simp_all)

lemma GetElim1: "(M, l, (os, S, h), n, ll, t)  Step; get_ins M l = Some (getfield d F)
                 a Flds v ops. ha = Some (d, Flds)  FldsF = Some v  
                                   os = RVal (Loc a) # ops  n = Suc 0  t = (v # ops, S, h)  ll = l+1"
by (erule eval_cases, simp_all)

lemma PutElim1: "(M, l, (os, S, h), n, ll, t)  Step; get_ins M l = Some (putfield d F)
                  a Flds v ops. ha = Some (d, Flds)  os = v # RVal (Loc a) # ops 
                                   n = Suc 0  t = (ops, S, h[a(d, Flds[Fv])])  ll = l+1"
by (erule eval_cases, simp_all)

lemma CastElim1: "(M, l, (os, S, h), n, ll, t)  Step; get_ins M l = Some (checkcast d)
                  a Flds ops . ha = Some (d, Flds)  os = RVal (Loc a) # ops  n = Suc 0  
                                   t = (RVal (Loc a) # ops, S, h)  ll = l+1"
by (erule eval_cases, simp_all)

lemma GotoElim1: "(M, l, (os, S, h), n, ll, t)  Step; get_ins M l = Some (goto pc)
                 n = Suc 0  t = (os, S, h)  ll = pc"
by (erule eval_cases, simp_all)

lemma IfElim1: "(M, l, (os, S, h), n, ll, t)  Step; get_ins M l = Some (iftrue pc)
               ( ops . os = TRUE # ops  n = Suc 0  t = (ops, S, h)  ll = pc)  
                  ( v ops . v  TRUE  os = v # ops  n = Suc 0  t = (ops, S, h)  ll = l+1)"
by (erule eval_cases, simp_all)

lemma InvokeElim1: "(M, l, (os, S, h), n, ll, t)  Step; get_ins M l = Some (invokeS C m)
                     code l0 v ops hh u k R par. 
                           mbody_is C m (par,code, l0)  (os,par,R,ops):Frame  
                           ((par,code,l0), l0, ([], R, h), k, hh, v)  Exec  
                           n = Suc k  t = (v # ops, S, hh)  ll = l+1"
by (erule eval_cases, simp_all, fastforce)
lemma InvokeElim: "(M, l, s, n, ll, t)  Step; get_ins M l = Some (invokeS C m)
                     code l0 v ops hh u k R par. 
                           mbody_is C m (par,code, l0)  (fst s,par,R,ops):Frame  
                           ((par,code,l0), l0, ([], R, snd(snd s)), k, hh, v)  Exec  
                           n = Suc k  t = (v # ops, fst (snd s), hh)  ll = l+1"
by (erule eval_cases, simp_all, fastforce)

lemma RetElim1: "(M, l, (os, S, h), n, ll, t)  Step; get_ins M l = Some (vreturn)  False"
by (erule eval_cases, simp_all)

lemma ExecElim1: "(M,l,(os,S,H),k,h,v):Exec
       (get_ins M l = Some vreturn  ( ops . os = v # ops  k = Suc 0  h=H)) 
          ( n m t ll. (M, l,(os,S,H), n, ll,t)  Step  (M, ll, t, m, h, v)  Exec  k = Suc (max n m))"
apply (erule eval_cases, simp_all)
apply (rule disjI2)
  apply clarsimp
  apply (rule, rule, rule, rule) apply (rule, rule, rule) apply assumption
  apply (rule, assumption) apply simp
done

lemma InstrElimNext:
 "(M, l, s, n, ll, t)  Step;
   get_ins M l = Some I;
   I = const c  I = dup  I = pop  I = swap  I = load x 
   I = store x  I = binop f  I = unop g  I = new d 
   I = getfield d F  I = putfield d F  I = checkcast d
   ll = l+1"
apply (drule eval_cases, simp_all)
apply clarsimp 
apply clarsimp
done
(*>*)

text‹By induction on the derivation system one can show
determinism.›

(*<*)
lemma StepExec_determ_Aux[rule_format]:
"( n1 l M s l1 t . n1  n  (M, l, s, n1, l1, t)  Step 
       ( n2 l2 r. (M,l,s,n2,l2,r):Step  (n1=n2  t=r  l1=l2))) 
 ( n1 l M s h v . n1  n  (M, l, s, n1, h, v)  Exec 
      ( n2 k w . (M,l,s,n2,k,w):Exec  (n1=n2  h=k  v=w)))"
supply [[simproc del: defined_all]]
apply (induct n)
apply clarsimp apply rule apply clarsimp apply (drule no_zero_height_Step_derivs1, simp)
   apply clarsimp apply (drule no_zero_height_Exec_derivs1, simp)
apply clarsimp
apply rule
  apply clarsimp 
apply (erule Step.cases)
  apply clarsimp apply (drule ConstElim1) apply simp apply clarsimp
  apply clarsimp apply (drule DupElim1) apply simp apply clarsimp
  apply clarsimp apply (drule PopElim1) apply simp apply clarsimp
  apply clarsimp apply (drule SwapElim1) apply simp apply clarsimp
  apply clarsimp apply (drule LoadElim1) apply simp apply clarsimp
  apply clarsimp apply (drule StoreElim1) apply simp apply clarsimp
  apply clarsimp apply (drule BinopElim1) apply simp apply clarsimp
  apply clarsimp apply (drule UnopElim1) apply simp apply clarsimp
  apply clarsimp apply (drule NewElim1) apply simp apply clarsimp
  apply clarsimp apply (drule GetElim1) apply fastforce apply clarsimp
  apply clarsimp apply (drule PutElim1) apply fastforce apply clarsimp
  apply clarsimp apply (drule CastElim1) apply simp apply clarsimp
  apply clarsimp apply (drule GotoElim1) apply simp apply clarsimp
  apply clarsimp apply (drule IfElim1) apply simp apply clarsimp
  apply clarsimp apply (drule IfElim1) apply simp apply clarsimp
  apply clarsimp apply (drule InvokeElim1) apply simp apply clarsimp
    apply (erule thin_rl)
    apply (simp add: mbody_is_def, clarsimp)
    apply (drule Frame_deterministic, assumption, clarsimp)
apply clarsimp
  apply (erule Exec.cases)
  apply clarsimp apply (erule Exec.cases)
    apply clarsimp
    apply clarsimp apply (drule RetElim1, simp, simp)
  apply clarsimp apply (drule ExecElim1)
    apply clarsimp
    apply (erule disjE, clarsimp) apply (drule RetElim1, simp, simp)
    apply clarsimp
      apply (erule_tac x=na in allE, rotate_tac -1, clarsimp)
      apply (erule_tac x=la in allE, rotate_tac -1)
      apply (erule_tac x=ad in allE, rotate_tac -1)
      apply (erule_tac x=ae in allE, rotate_tac -1)
      apply (erule_tac x=bb in allE, rotate_tac -1)
      apply (erule_tac x=af in allE, rotate_tac -1)
      apply (erule_tac x=ag in allE, rotate_tac -1)
      apply (erule_tac x=bc in allE, rotate_tac -1)
      apply (erule_tac x=ll in allE, rotate_tac -1)
      apply (erule_tac x=ah in allE, rotate_tac -1)
      apply (erule_tac x=ai in allE, rotate_tac -1)
      apply (erule_tac x=bd in allE, clarsimp)
      apply (rotate_tac -1)
      apply (erule_tac x=nb in allE, rotate_tac -1) 
      apply (erule_tac x=lla in allE, rotate_tac -1)
      apply (erule_tac x=a in allE, rotate_tac -1)
      apply (erule_tac x=aa in allE, rotate_tac -1)
      apply (erule_tac x=b in allE, clarsimp)
      apply (erule_tac x=m in allE, rotate_tac -1, clarsimp)
      apply (erule_tac x=lla in allE, rotate_tac -1)
      apply (erule_tac x=ad in allE, rotate_tac -1)
      apply (erule_tac x=ae in allE, rotate_tac -1)
      apply (erule_tac x=bb in allE, rotate_tac -1)
      apply (erule_tac x=a in allE, rotate_tac -1)
      apply (erule_tac x=aa in allE, rotate_tac -1)
      apply (erule_tac x=b in allE, rotate_tac -1)
      apply (erule_tac x=ha in allE, rotate_tac -1)
      apply (erule_tac x=va in allE, rotate_tac -1, clarsimp)
done
(*>*)

lemma Step_determ:
 "(M,l,s,n,l1,t)  Step; (M,l,s,m,l2,r):Step  n=m  t=r  l1=l2"
(*<*)
apply (insert StepExec_determ_Aux[of n])
apply (erule conjE)
apply (rotate_tac -1, erule thin_rl)
apply fast
done
(*>*)

lemma Exec_determ:
 "(M,l,s,n,h,v)  Exec; (M,l,s,m,k,w):Exec  n=m  h=k  v=w"
(*<*)
apply (insert StepExec_determ_Aux[of n])
apply (erule conjE)
apply (rotate_tac -2, erule thin_rl)
apply fast
done
(*>*)

(*<*)
end
(*>*)