Theory Memory

(*  Title:       X86 instruction semantics and basic block symbolic execution
    Authors:     Freek Verbeek, Abhijith Bharadwaj, Joshua Bockenek, Ian Roessle, Timmy Weerwag, Binoy Ravindran
    Year:        2020
    Maintainer:  Freek Verbeek (freek@vt.edu)
*)

section "Memory-related theorems"

theory Memory
  imports BitByte
begin

context
  fixes dummy_type :: "'a::len"
begin

primrec read_bytes :: "('a word  8 word)  'a word  nat  8word list"
  where "read_bytes m a 0   = []"
  | "read_bytes m a (Suc n) = m (a + of_nat n) # read_bytes m a n"

text ‹Read bytes from memory. Memory is represented by a term of @{typ "64 word  8 word"}.
      Given an address @{term [show_types] "a::64 word"} and a size @{term n}, retrieve the bytes in
      the order they are stored in memory.›

definition region_addresses :: "'a word  nat  'a word set"
  where "region_addresses a si  {a' .  i < si . a' = a + of_nat (si - i - 1)}"

text ‹The set of addresses belonging to a region starting at address @{term "a::64 word"} of @{term si} bytes.›

definition region_overflow :: "'a word  nat  bool"
  where "region_overflow a si  unat a + si  2^LENGTH('a)"

text ‹An overflow occurs if the address plus the size is greater equal @{term "2^64"}

definition enclosed :: "'a word  nat  'a word  nat  bool"
  where "enclosed a' si' a si  unat a + si < 2^LENGTH('a)  unat a  unat a'  unat a' + si'  unat a + si"

text ‹A region is enclosed in another if its @{const region_addresses} is a subset of the other.›

definition separate :: "'a word  nat  'a word  nat  bool"
  where "separate a si a' si'  si 0  si'  0  region_addresses a si  region_addresses a' si' = {}"

text ‹A region is separate from another if they do not overlap.›



lemma region_addresses_iff: "a'  region_addresses a si  unat (a' - a) < si"
  apply (auto simp add: region_addresses_def unsigned_of_nat)
   apply (metis diff_Suc_less le_less_trans less_imp_Suc_add take_bit_nat_less_eq_self zero_less_Suc)
  by (metis Suc_diff_Suc add.commute diff_add_cancel diff_diff_cancel diff_less less_imp_Suc_add order_less_imp_le word_unat.Rep_inverse zero_less_Suc)

lemma notin_region_addresses:
  assumes "x  region_addresses a si"
  shows "unat x < unat a  unat a + si  unat x"
  by (metis assms add.commute less_diff_conv2 not_le_imp_less region_addresses_iff unat_sub_if')

lemma notin_region_addresses_sub:
  assumes "x  region_addresses a si"
  shows "unat (x - a') < unat (a - a')  unat (a - a') + si  unat (x - a')"
  using assms notin_region_addresses region_addresses_iff by auto

lemma region_addresses_eq_empty_iff: "region_addresses a si = {}  si = 0"
  by (metis region_addresses_iff add_diff_cancel_left' ex_in_conv neq0_conv not_less0 unsigned_0)

lemma length_read_bytes:
  shows "length (read_bytes m a si) = si"
  by (induct si,auto)

lemma nth_read_bytes:
assumes "n < si"
shows "read_bytes m a si ! n = m (a + of_nat (si - 1 - n))"
  using assms
  apply (induct si arbitrary: n,auto)
  subgoal for si n
    by(cases n,auto)
  done

text ‹Writing to memory occurs via function @{const override_on}.
      In case of enclosure, reading bytes from memory overridden on a set of region addresses can
      be simplified to reading bytes from the overwritten memory only.
      In case of separation, reading bytes from overridden memory can be simplified to reading
      from the original memory.›

lemma read_bytes_override_on_enclosed:
assumes "offset'  offset"
    and "si'  si"
    and "unat offset + si'  si + unat offset'"
  shows "read_bytes (override_on m m' (region_addresses (a - offset) si)) (a - offset') si' = read_bytes m' (a - offset') si'"
proof-
  {
    fix i
    assume 1: "i < si'"
    let ?i = "(si + i + unat offset') - unat offset - si'"

    have "i + unat offset' < si' + unat offset"
      using 1 assms(1)
      by unat_arith
    hence 2: "si + i + unat offset' - (unat offset + si') < si"
      using diff_less[of "(si' + unat offset - i) - unat offset'" si] assms(3)
      by auto
    moreover
    have "of_nat (si' - Suc i) - offset' = of_nat (si - Suc ?i) - offset"
      using assms 1 2 by (auto simp add: of_nat_diff)
    ultimately
    have " i' < si. of_nat (si' - Suc i) - offset' = of_nat (si - Suc i') - offset"
      by auto
  }
  note 1 = this
  show ?thesis
    apply (rule nth_equalityI)
    using 1
    by (auto simp add: length_read_bytes nth_read_bytes override_on_def region_addresses_def)
qed

lemmas read_bytes_override_on = read_bytes_override_on_enclosed[where offset=0 and offset'=0,simplified]

lemma read_bytes_override_on_enclosed_plus:
  assumes "unat offset + si'  si"
      and "si  2^LENGTH('a)"
  shows "read_bytes (override_on m m' (region_addresses a si)) (offset+a) si' = read_bytes m' (offset+a) si'"
proof-
  {
    fix i
    have "i < si'   i' < si. offset + (of_nat (si' - Suc i)::'a word) = of_nat (si - Suc i')"
      apply (rule exI[of _"si - si' + i - unat offset"])
      using assms by (auto simp add: of_nat_diff)
  }
  note 1 = this
  show ?thesis
    apply (rule nth_equalityI)
    using assms 1
    by (auto simp add: override_on_def length_read_bytes nth_read_bytes region_addresses_def)
qed


lemma read_bytes_override_on_separate:
assumes "separate a si a' si'"
  shows "read_bytes (override_on m m' (region_addresses a si)) a' si' = read_bytes m a' si'"
  apply (rule nth_equalityI)
  using assms
  by (auto simp add: length_read_bytes nth_read_bytes override_on_def separate_def region_addresses_def)


text‹Bytes are are written to memory one-by-one, then read by @{const read_bytes} producing a list of bytes.
    That list is concatenated again using @{const word_rcat}. Writing @{term si} bytes of word @{term w} into
    memory, reading the byte-list and concatenating again produces @{term si} bytes of the original word.›


lemma word_rcat_read_bytes_enclosed:
  fixes w :: "'b::len word"
  assumes "LENGTH('b)  2^LENGTH('a)"
    and "unat offset + si  2^LENGTH('a)"
  shows "word_rcat (read_bytes (λa'. take_byte (unat (a' - a)) w) (a + offset) si) = unat offset * 8,(unat offset + si) * 8w"
  apply (rule word_eqI)
  using assms
  apply (auto simp add: test_bit_rcat word_size length_read_bytes rev_nth nth_read_bytes unat_of_nat nth_takebyte unat_word_ariths )
    apply (auto simp add: take_byte_def nth_ucast nth_takebits take_bit_eq_mod split: if_split_asm)[1]
   apply (auto simp add: nth_takebits split: if_split_asm)[1]
  apply (auto simp add: take_byte_def nth_ucast nth_takebits split: if_split_asm)[1]
  by (auto simp add: rev_nth length_read_bytes take_byte_def nth_ucast nth_takebits nth_read_bytes unat_word_ariths unat_of_nat take_bit_eq_mod split: if_split_asm)

lemmas word_rcat_read_bytes = word_rcat_read_bytes_enclosed[where offset=0,simplified]



text ‹The following theorems allow reasoning over enclosure and separation, for example as linear arithmetic.›

lemma enclosed_spec:
  assumes enclosed: "enclosed a' si' a si"
      and x_in: "x  region_addresses a' si'"
    shows "x  region_addresses a si"
proof -
  from x_in have "unat (x - a') < si'"
    using region_addresses_iff by blast
  with enclosed have "unat (x - a) < si"
    unfolding enclosed_def by (auto simp add: unat_sub_if' split: if_split_asm)
  then show ?thesis
    using region_addresses_iff by blast
qed

lemma address_in_enclosed_region_as_linarith:
  assumes "enclosed a' si' a si"
      and "x  region_addresses a' si'"
    shows "a  x  a'  x  x < a' + of_nat si'  x < a + of_nat si"
  using assms
  by (auto simp add: enclosed_def region_addresses_def word_le_nat_alt word_less_nat_alt unat_of_nat unat_word_ariths unat_sub_if' take_bit_eq_mod)


lemma address_of_enclosed_region_ge:
  assumes "enclosed a' si' a si"
    shows "a'  a"
  using assms word_le_nat_alt
  by (auto simp add: enclosed_def)

lemma address_in_enclosed_region:
  assumes "enclosed a' si' a si"
      and "x  region_addresses a' si'"
    shows "unat (x - a)  unat (a' - a)  unat (a' - a) + si' > unat (x - a)  unat (x - a) < si"
  by (smt (verit, ccfv_threshold) Memory.enclosed_def Memory.region_addresses_iff
      address_in_enclosed_region_as_linarith assms(1) assms(2) diff_diff_add
      le_add_diff_inverse mcs(4) nat_add_left_cancel_less no_ulen_sub of_nat_add
      un_ui_le unat_mono unat_of_nat_eq unat_sub unsigned_less word_sub_le_iff)

lemma enclosed_minus_minus:
  fixes a :: "'a word"
  assumes "offset  offset'"
      and "unat offset - si  unat offset' - si'"
      and "unat offset'  si'"
      and "unat offset  si"
      and "a  offset"
    shows "enclosed (a - offset') si' (a - offset) si"
proof-
  have "unat offset'  unat a"
    using assms(1,5)
    by unat_arith
  thus ?thesis
    using assms
    apply (auto simp add: enclosed_def unat_sub_if_size word_size)
    apply unat_arith
    using diff_le_mono2[of "unat offset - si" "unat offset' - si'" "unat a"]
    apply (auto simp add: enclosed_def unat_sub_if_size word_size)
     by unat_arith+
qed

lemma enclosed_plus:
  fixes a :: "'a word"
  assumes "si' < si"
      and "unat a + si < 2^LENGTH('a)"
    shows "enclosed a si' a si"
  using assms
  by (auto simp add: enclosed_def)

lemma separate_symm: "separate a si a' si' = separate a' si' a si"
  by (metis inf.commute separate_def)

lemma separate_iff: "separate a si a' si'  si > 0  si' > 0  unat (a' - a)  si  unat (a - a')  si'"
proof
  assume assm: "separate a si a' si'"
  have "unat (a' - a)  si" if "separate a si a' si'" for a si a' si'
  proof (rule ccontr)
    assume "¬ unat (a' - a)  si"
    then have "a'  region_addresses a si"
      by (simp add: region_addresses_iff)
    moreover from that have "a'  region_addresses a' si'"
      using region_addresses_iff separate_def by auto
    ultimately have "¬separate a si a' si'"
      by (meson disjoint_iff separate_def)
    with that show False
      by blast
  qed
  with assm have "unat (a' - a)  si" and "unat (a - a')  si'"
    using separate_symm by auto
  with assm show "si > 0  si' > 0  unat (a' - a)  si  unat (a - a')  si'"
    using separate_def by auto
next
  assume assm: "si > 0  si' > 0  unat (a' - a)  si  unat (a - a')  si'"
  then have "unat (x - a')  si'" if "unat (x - a) < si" for x
    using that apply (auto simp add: unat_sub_if' split: if_split_asm)
     apply (meson Nat.le_diff_conv2 add_increasing le_less_trans less_imp_le_nat unsigned_greater_eq unsigned_less)
    by (smt (verit) Nat.le_diff_conv2 add_leD2 le_less_trans linorder_not_less nat_le_linear unat_lt2p)
  then have "region_addresses a si  region_addresses a' si' = {}"
    by (simp add: region_addresses_iff disjoint_iff leD)
  with assm show "separate a si a' si'"
    by (simp add: separate_def)
qed

lemma separate_as_linarith:
assumes "¬region_overflow a si"
    and "¬region_overflow a' si'"
  shows "separate a si a' si'  0 < si  0 < si'  (a + of_nat si  a'  a' + of_nat si'  a)"
    (is "?lhs  ?rhs")
proof
  assume ?lhs then show ?rhs
   by (meson separate_iff le_less le_plus not_le_imp_less word_of_nat_le)
next
  have *: "separate a si a' si'"
    if "si > 0" and "si' > 0" and "a + of_nat si  a'"
      and "¬region_overflow a si" and "¬region_overflow a' si'"
    for a si a' si'
  proof -
    from that have "unat a + si < 2^LENGTH('a)" and "unat a' + si' < 2^LENGTH('a)"
      by (meson not_le region_overflow_def)+
    have "x < a + of_nat si" if "x  region_addresses a si" for x
      by (smt (verit) Abs_fnat_hom_add unat a + si < 2 ^ LENGTH('a) add.commute dual_order.trans le_add1 less_diff_conv2 not_less region_addresses_iff that unat_of_nat_len unat_sub_if' word_less_iff_unsigned word_unat.Rep_inverse)
    moreover have "x  a'" if "x  region_addresses a' si'" for x
      using address_in_enclosed_region_as_linarith enclosed_def unat a' + si' < 2 ^ LENGTH('a) that by blast
    ultimately show ?thesis
      using separate_def that by fastforce
  qed
  assume ?rhs then show ?lhs
    by (meson "*" assms separate_symm)
qed

text ‹Compute separation in case the addresses and sizes are immediate values.›
lemmas separate_as_linarith_numeral [simp] =
  separate_as_linarith [of "numeral a::'a word" "numeral si" "numeral a'::'a word" "numeral si'"] for a si a' si'
lemmas separate_as_linarith_numeral_1 [simp] =
  separate_as_linarith [of "numeral a::'a word" "numeral si" "numeral a'::'a word" "Suc 0"] for a si a'
lemmas separate_as_linarith_numeral1_ [simp] =
  separate_as_linarith [of "numeral a::'a word" "Suc 0" "numeral a'::'a word" "numeral si'"] for a a' si'
lemmas separate_as_linarith_numeral11 [simp] =
  separate_as_linarith [of "numeral a::'a word" "Suc 0" "numeral a'::'a word" "Suc 0"] for a a'
lemmas region_overflow_numeral[simp] =
  region_overflow_def [of "numeral a::'a word" "numeral si"] for a si
lemmas region_overflow_numeral1[simp] =
  region_overflow_def [of "numeral a::'a word" "Suc 0"] for a


lemma separate_plus_none:
  assumes "si'  unat offset"
      and "0 < si"
      and "0 < si'"
      and "unat offset + si  2^LENGTH('a)"
    shows "separate (offset + a) si a si'"
  using assms apply (auto simp add: separate_iff)
  by (smt (verit) Nat.diff_diff_right add.commute add_leD1 diff_0 diff_is_0_eq diff_zero not_gr_zero unat_sub_if' unsigned_0)

lemmas unat_minus = unat_sub_if'[of 0,simplified]

lemma separate_minus_minus':
  assumes "si  0"
      and "si'  0"
      and "unat offset  si"
      and "unat offset'  si'"
      and "unat offset - si  unat offset'"
    shows "separate (a - offset) si (a - offset') si'"
  using assms apply (auto simp add: separate_iff)
   apply (metis Nat.le_diff_conv2 add.commute add_leD2 unat_sub_if')
  by (smt (verit) add.commute add_diff_cancel_right' diff_add_cancel diff_le_self diff_less less_le_trans nat_le_linear not_le unat_sub_if')

lemma separate_minus_minus:
  assumes "si  0"
      and "si'  0"
      and "unat offset  si"
      and "unat offset'  si'"
      and "unat offset - si  unat offset'  unat offset' - si'  unat offset"
    shows "separate (a - offset) si (a - offset') si'"
  by (meson assms separate_minus_minus' separate_symm)

lemma separate_minus_none:
  assumes "si  0"
      and "si'  0"
      and "unat offset  si"
      and "si'  2^LENGTH('a) - unat offset"
    shows "separate (a - offset) si a si'"
proof -
  have "0 < si" and "0 < si'"
    using assms(1,2) by blast+
  moreover have "¬ offset  0"
    using assms(1) assms(3) by fastforce
  ultimately show ?thesis
    by (simp add: assms(3) assms(4) local.unat_minus separate_iff unsigned_eq_0_iff)
qed


text ‹The following theorems are used during symbolic execution to determine whether two regions are separate.›

lemmas separate_simps = separate_plus_none separate_minus_none separate_minus_minus

end
end