Theory State
section "Concrete state and instructions"
theory State
imports Main Memory
begin
text ‹A state consists of registers, memory, flags and a rip. Some design considerations here:
\begin{itemize}
\item All register values are 256 bits. We could also distinguish 64 bits registers, 128 registers etc.
That would increase complexity in proofs and datastructures. The cost of using 256 everywhere is
that a goal typically will have some casted 64 bits values.
\item The instruction pointer RIP is a special 64-bit register outside of the normal register set.
\item Strings are used for registers and flags. We would prefer an enumerative datatype, however, that would be
extremely slow since there are roughly 100 register names.
\end{itemize}
›
record state =
regs :: "string ⇒ 256word"
mem :: "64 word ⇒ 8 word"
flags :: "string ⇒ bool"
rip :: "64 word"
definition real_reg :: "string ⇒ bool × string × nat × nat"
where "real_reg reg ≡
case reg of
''rip'' ⇒ (True, ''rip'', 0,64)
| ''rax'' ⇒ (True, ''rax'', 0,64)
| ''eax'' ⇒ (True, ''rax'', 0,32)
| ''ax'' ⇒ (False, ''rax'', 0,16)
| ''ah'' ⇒ (False, ''rax'', 8,16)
| ''al'' ⇒ (False, ''rax'', 0,8)
| ''rbx'' ⇒ (True, ''rbx'', 0,64)
| ''ebx'' ⇒ (True, ''rbx'', 0,32)
| ''bx'' ⇒ (False, ''rbx'', 0,16)
| ''bh'' ⇒ (False, ''rbx'', 8,16)
| ''bl'' ⇒ (False, ''rbx'', 0,8)
| ''rcx'' ⇒ (True, ''rcx'', 0,64)
| ''ecx'' ⇒ (True, ''rcx'', 0,32)
| ''cx'' ⇒ (False, ''rcx'', 0,16)
| ''ch'' ⇒ (False, ''rcx'', 8,16)
| ''cl'' ⇒ (False, ''rcx'', 0,8)
| ''rdx'' ⇒ (True, ''rdx'', 0,64)
| ''edx'' ⇒ (True, ''rdx'', 0,32)
| ''dx'' ⇒ (False, ''rdx'', 0,16)
| ''dh'' ⇒ (False, ''rdx'', 8,16)
| ''dl'' ⇒ (False, ''rdx'', 0,8)
| ''rbp'' ⇒ (True, ''rbp'', 0,64)
| ''ebp'' ⇒ (True, ''rbp'', 0,32)
| ''bp'' ⇒ (False, ''rbp'', 0,16)
| ''bpl'' ⇒ (False, ''rbp'', 0,8)
| ''rsp'' ⇒ (True, ''rsp'', 0,64)
| ''esp'' ⇒ (True, ''rsp'', 0,32)
| ''sp'' ⇒ (False, ''rsp'', 0,16)
| ''spl'' ⇒ (False, ''rsp'', 0,8)
| ''rdi'' ⇒ (True, ''rdi'', 0,64)
| ''edi'' ⇒ (True, ''rdi'', 0,32)
| ''di'' ⇒ (False, ''rdi'', 0,16)
| ''dil'' ⇒ (False, ''rdi'', 0,8)
| ''rsi'' ⇒ (True, ''rsi'', 0,64)
| ''esi'' ⇒ (True, ''rsi'', 0,32)
| ''si'' ⇒ (False, ''rsi'', 0,16)
| ''sil'' ⇒ (False, ''rsi'', 0,8)
| ''r15'' ⇒ (True, ''r15'', 0,64)
| ''r15d'' ⇒ (True, ''r15'', 0,32)
| ''r15w'' ⇒ (False, ''r15'', 0,16)
| ''r15b'' ⇒ (False, ''r15'', 0,8)
| ''r14'' ⇒ (True, ''r14'', 0,64)
| ''r14d'' ⇒ (True, ''r14'', 0,32)
| ''r14w'' ⇒ (False, ''r14'', 0,16)
| ''r14b'' ⇒ (False, ''r14'', 0,8)
| ''r13'' ⇒ (True, ''r13'', 0,64)
| ''r13d'' ⇒ (True, ''r13'', 0,32)
| ''r13w'' ⇒ (False, ''r13'', 0,16)
| ''r13b'' ⇒ (False, ''r13'', 0,8)
| ''r12'' ⇒ (True, ''r12'', 0,64)
| ''r12d'' ⇒ (True, ''r12'', 0,32)
| ''r12w'' ⇒ (False, ''r12'', 0,16)
| ''r12b'' ⇒ (False, ''r12'', 0,8)
| ''r11'' ⇒ (True, ''r11'', 0,64)
| ''r11d'' ⇒ (True, ''r11'', 0,32)
| ''r11w'' ⇒ (False, ''r11'', 0,16)
| ''r11b'' ⇒ (False, ''r11'', 0,8)
| ''r10'' ⇒ (True, ''r10'', 0,64)
| ''r10d'' ⇒ (True, ''r10'', 0,32)
| ''r10w'' ⇒ (False, ''r10'', 0,16)
| ''r10b'' ⇒ (False, ''r10'', 0,8)
| ''r9'' ⇒ (True, ''r9'' , 0,64)
| ''r9d'' ⇒ (True, ''r9'' , 0,32)
| ''r9w'' ⇒ (False, ''r9'' , 0,16)
| ''r9b'' ⇒ (False, ''r9'' , 0,8)
| ''r8'' ⇒ (True, ''r8'' , 0,64)
| ''r8d'' ⇒ (True, ''r8'' , 0,32)
| ''r8w'' ⇒ (False, ''r8'' , 0,16)
| ''r8b'' ⇒ (False, ''r8'' , 0,8)
| ''xmm0'' ⇒ (True, ''xmm0'' , 0,128)
| ''xmm1'' ⇒ (True, ''xmm1'' , 0,128)
| ''xmm2'' ⇒ (True, ''xmm2'' , 0,128)
| ''xmm3'' ⇒ (True, ''xmm3'' , 0,128)
| ''xmm4'' ⇒ (True, ''xmm4'' , 0,128)
| ''xmm5'' ⇒ (True, ''xmm5'' , 0,128)
| ''xmm6'' ⇒ (True, ''xmm6'' , 0,128)
| ''xmm7'' ⇒ (True, ''xmm7'' , 0,128)
| ''xmm8'' ⇒ (True, ''xmm8'' , 0,128)
| ''xmm9'' ⇒ (True, ''xmm9'' , 0,128)
| ''xmm10'' ⇒ (True, ''xmm10'' , 0,128)
| ''xmm11'' ⇒ (True, ''xmm11'' , 0,128)
| ''xmm12'' ⇒ (True, ''xmm12'' , 0,128)
| ''xmm13'' ⇒ (True, ''xmm13'' , 0,128)
| ''xmm14'' ⇒ (True, ''xmm14'' , 0,128)
| ''xmm15'' ⇒ (True, ''xmm15'' , 0,128)
"
text ‹x86 has register aliassing. For example, register EAX is the lower 32 bits of register RAX.
This function map register aliasses to the ``real'' register. For example:
@{term "real_reg ''ah'' = (False, ''rax'', 8,16)"}.
This means that register AH is the second byte (bits 8 to 16) of register RAX.
The bool @{const False} indicates that writing to AH does not overwrite the remainder of RAX.
@{term "real_reg ''eax'' = (True, ''rax'', 0,32)"}.
Register EAX is the lower 4 bytes of RAX. Writing to EAX means overwritting the remainder of RAX
with zeroes.
›
definition reg_size :: "string ⇒ nat"
where "reg_size reg ≡ let (_,_,l,h) = real_reg reg in (h - l) div 8"
text‹ We now define functions for reading and writing from state.›
definition reg_read :: "state ⇒ string ⇒ 256 word"
where "reg_read σ reg ≡
if reg = ''rip'' then ucast (rip σ) else
if reg = '''' then 0 else
let (_,r,l,h) = real_reg reg in
⟨l,h⟩(regs σ r)"
primrec fromBool :: "bool ⇒ 'a :: len word"
where
"fromBool True = 1"
| "fromBool False = 0"
definition flag_read :: "state ⇒ string ⇒ 256 word"
where "flag_read σ flag ≡ fromBool (flags σ flag)"
definition mem_read :: "state ⇒ 64 word ⇒ nat ⇒ 256 word"
where "mem_read σ a si ≡ word_rcat (read_bytes (mem σ) a si)"
text ‹Doing state-updates occur through a tiny deeply embedded language of state updates. This allows us
to reason over state updates through theorems.›