Theory IP_Address

(*  Title:      IP_Address.thy
    Authors:    Cornelius Diekmann
theory IP_Address

section ‹Modelling IP Adresses›
  text‹An IP address is basically an unsigned integer.
    We model IP addresses of arbitrary lengths.

    We will write @{typ "'i::len word"} for IP addresses of length @{term "LENGTH('i::len)"}.
    We use the convention to write @{typ 'i} whenever we mean IP addresses instead of generic words.
    When we will later have theorems with several polymorphic types in it (e.g. arbitrarily
    extensible packets), this notation makes it easier to spot that type @{typ 'i} is for
    IP addresses.

    The files @{file ‹IPv4.thy›} @{file ‹IPv6.thy›} concrete this for IPv4 and IPv6.›

  text‹The maximum IP address›
  definition max_ip_addr :: "'i::len word" where
    "max_ip_addr  of_nat ((2^(len_of(TYPE('i)))) - 1)"

  lemma max_ip_addr_max_word: "max_ip_addr = - 1"
    by (simp only: max_ip_addr_def of_nat_mask_eq flip: mask_eq_exp_minus_1) simp

  lemma max_ip_addr_max: "a. a  max_ip_addr"
    by(simp add: max_ip_addr_max_word)
  lemma range_0_max_UNIV: "UNIV = {0 .. max_ip_addr}" (*not in the simp set, for a reason*)
    by(simp add: max_ip_addr_max_word) fastforce

  lemma "size (x::'i::len word) = len_of(TYPE('i))" by(simp add:word_size)

subsection‹Sets of IP Addresses›

    includes bit_operations_syntax

  (*Warning, not executable!*)
  text‹Specifying sets with network masks:›
  definition ipset_from_netmask::"'i::len word  'i::len word  'i::len word set" where
    "ipset_from_netmask addr netmask 
        network_prefix = (addr AND netmask)
        {network_prefix .. network_prefix OR (NOT netmask)}"

  text‹Example (pseudo syntax):
    @{const ipset_from_netmask}› =
        { ..}›

  text‹A network mask of all ones (i.e. @{term "(- 1)::'i::len word"}).›
  lemma ipset_from_netmask_minusone:
    "ipset_from_netmask ip (- 1) = {ip}" by (simp add: ipset_from_netmask_def)
  lemma ipset_from_netmask_maxword:
    "ipset_from_netmask ip (- 1) = {ip}" by (simp add: ipset_from_netmask_def)

  lemma ipset_from_netmask_zero:
    "ipset_from_netmask ip 0 = UNIV" by (auto simp add: ipset_from_netmask_def)

  text‹Specifying sets in Classless Inter-domain Routing (CIDR) notation:›
  definition ipset_from_cidr ::"'i::len word  nat  'i::len word set" where
    "ipset_from_cidr addr pflength 
       ipset_from_netmask addr ((mask pflength) << (len_of(TYPE('i)) - pflength))"

  text‹Example (pseudo syntax):
    @{const ipset_from_cidr} 24› = { ..}›

  (*does this simplify stuff?*)
  lemma "(case ipcidr of (base, len)  ipset_from_cidr base len) = uncurry ipset_from_cidr ipcidr"
    by(simp add: uncurry_case_stmt)

  lemma ipset_from_cidr_0: "ipset_from_cidr ip 0 = UNIV"
    by(auto simp add: ipset_from_cidr_def ipset_from_netmask_def Let_def)

  text‹A prefix length of word size gives back the singleton set with the IP address.
       Example: = {}›
  lemma ipset_from_cidr_wordlength:
    fixes ip :: "'i::len word"
    shows "ipset_from_cidr ip (LENGTH('i)) = {ip}"
    by (simp add: ipset_from_cidr_def ipset_from_netmask_def)

  text‹Alternative definition: Considering words as bit lists:›
  lemma ipset_from_cidr_bl:
    fixes addr :: "'i::len word"
    shows "ipset_from_cidr addr pflength 
            ipset_from_netmask addr (of_bl ((replicate pflength True) @
                                            (replicate ((len_of(TYPE('i))) - pflength)) False))"
    by(simp add: ipset_from_cidr_def mask_bl shiftl_of_bl)

  lemma ipset_from_cidr_alt:
    fixes pre :: "'i::len word"
    shows "ipset_from_cidr pre len =
            {pre AND (mask len << LENGTH('i) - len)
             pre OR mask (LENGTH('i) - len)}"
    apply(simp add: ipset_from_cidr_def ipset_from_netmask_def Let_def)
    apply(simp add: word_oa_dist)
    apply(simp add: NOT_mask_shifted_lenword)

  lemma ipset_from_cidr_alt2:
    fixes base ::"'i::len word"
    shows "ipset_from_cidr base len =
           ipset_from_netmask base (NOT (mask (LENGTH('i) - len)))"
    apply(simp add: ipset_from_cidr_def)
    using NOT_mask_shifted_lenword by(metis word_not_not)

  text‹In CIDR notation, we cannot express the empty set.›
  lemma ipset_from_cidr_not_empty: "ipset_from_cidr base len  {}"
    by(simp add: ipset_from_cidr_alt bitmagic_zeroLast_leq_or1Last)

  text‹Though we can write, we say that is well-formed.›
  lemma ipset_from_cidr_base_wellforemd: fixes base:: "'i::len word"
    assumes "mask (LENGTH('i) - l) AND base = 0"
      shows "ipset_from_cidr base l = {base .. base OR mask (LENGTH('i) - l)}"
  proof -
    have maskshift_eq_not_mask_generic:
      "((mask l << LENGTH('i) - l) :: 'i::len word) = NOT (mask (LENGTH('i) - l))"
      using NOT_mask_shifted_lenword by (metis word_not_not)
    have *: "base AND NOT (mask (LENGTH('i) - l)) = base"
      unfolding mask_eq_0_eq_x[symmetric] using assms word_bw_comms(1)[of base] by simp
    hence **: "base AND NOT (mask (LENGTH('i) - l)) OR mask (LENGTH('i) - l) =
                base OR mask (LENGTH('i) - l)" by simp
    have "ipset_from_netmask base (NOT (mask (LENGTH('i) - l))) =
            {base .. base || mask (LENGTH('i) - l)}"
      by(simp add: ipset_from_netmask_def Let_def ** *)
    thus ?thesis by(simp add: ipset_from_cidr_def maskshift_eq_not_mask_generic)

  lemma ipset_from_cidr_large_pfxlen:
    fixes ip:: "'i::len word"
    assumes "n  LENGTH('i)"
    shows "ipset_from_cidr ip n = {ip}"
  proof -
    have obviously: "mask (LENGTH('i) - n) = 0" by (simp add: assms)
    show ?thesis
      apply(subst ipset_from_cidr_base_wellforemd)
       subgoal using assms by simp
      by (simp add: obviously)

  lemma ipset_from_netmask_base_mask_consume:
    fixes base :: "'i::len word"
    shows "ipset_from_netmask (base AND NOT (mask (LENGTH('i) - m)))
                              (NOT (mask (LENGTH('i) - m)))
             ipset_from_netmask base (NOT (mask (LENGTH('i) - m)))"
    unfolding ipset_from_netmask_def by(simp)

  text‹Another definition of CIDR notation:
       All IP address which are equal on the first @{term "len - n"} bits›
  definition ip_cidr_set :: "'i::len word  nat  'i word set" where
    "ip_cidr_set i r 
      {j . i AND NOT (mask (LENGTH('i) - r)) = j AND NOT (mask (LENGTH('i) - r))}"

  text‹The definitions are equal›
  lemma ipset_from_cidr_eq_ip_cidr_set:
    fixes base::"'i::len word"
    shows "ipset_from_cidr base len = ip_cidr_set base len"
  proof -
    have maskshift_eq_not_mask_generic:
      "((mask len << LENGTH('a) - len) :: 'a::len word) = NOT (mask (LENGTH('a) - len))"
      using NOT_mask_shifted_lenword by (metis word_not_not)
    have 1: "mask (len - m) AND base AND NOT (mask (len - m)) = 0"
      for len m and base::"'i::len word"
      by(simp add: word_bw_lcs)
    have 2: "mask (LENGTH('i) - len) AND pfxm_p = 0 
           (a  ipset_from_netmask pfxm_p (NOT (mask (LENGTH('i) - len)))) 
           (pfxm_p = NOT (mask (LENGTH('i) - len)) AND a)" for a::"'i::len word" and pfxm_p
    apply(subst ipset_from_cidr_alt2[symmetric])
    apply(subst zero_base_lsb_imp_set_eq_as_bit_operation)
     apply(simp; fail)
    apply(subst ipset_from_cidr_base_wellforemd)
     apply(simp; fail)
    from 2[OF 1, of _ base] have
      "(x  ipset_from_netmask base (~~ (mask (LENGTH('i) - len)))) 
       (base && ~~ (mask (LENGTH('i) - len)) = x && ~~ (mask (LENGTH('i) - len)))" for x
    apply(simp add: ipset_from_netmask_base_mask_consume)
    unfolding word_bw_comms(1)[of _ " ~~ (mask (LENGTH('i) - len))"] by simp
    then show ?thesis
      unfolding ip_cidr_set_def ipset_from_cidr_def
      by(auto simp add:  maskshift_eq_not_mask_generic)

  lemma ip_cidr_set_change_base: "j  ip_cidr_set i r  ip_cidr_set j r = ip_cidr_set i r"
    by (auto simp: ip_cidr_set_def)

subsection‹IP Addresses as WordIntervals›
  text‹The nice thing is: @{typ "'i wordinterval"}s are executable.›

  definition iprange_single :: "'i::len word  'i wordinterval" where
    "iprange_single ip  WordInterval ip ip"

  fun iprange_interval :: "('i::len word × 'i::len word)  'i wordinterval" where
    "iprange_interval (ip_start, ip_end) = WordInterval ip_start ip_end"
  declare iprange_interval.simps[simp del]

  lemma iprange_interval_uncurry: "iprange_interval ipcidr = uncurry WordInterval ipcidr"
    by(cases ipcidr) (simp add: iprange_interval.simps)

  lemma "wordinterval_to_set (iprange_single ip) = {ip}"
    by(simp add: iprange_single_def)
  lemma "wordinterval_to_set (iprange_interval (ip1, ip2)) = {ip1 .. ip2}"
    by(simp add: iprange_interval.simps)

  text‹Now we can use the set operations on @{typ "'i::len wordinterval"}s›
  term wordinterval_to_set
  term wordinterval_element
  term wordinterval_union
  term wordinterval_empty
  term wordinterval_setminus
  term wordinterval_UNIV
  term wordinterval_invert
  term wordinterval_intersection
  term wordinterval_subset
  term wordinterval_eq

subsection‹IP Addresses in CIDR Notation›
  text‹We want to convert IP addresses in CIDR notation to intervals.
    We already have @{const ipset_from_cidr}, which gives back a non-executable set.
    We want to convert to something we can store in an @{typ "'i wordinterval"}.›

  fun ipcidr_to_interval_start :: "('i::len word × nat)  'i::len word" where
    "ipcidr_to_interval_start (pre, len) = (
      let netmask = (mask len) << (LENGTH('i) - len);
          network_prefix = (pre AND netmask)
      in network_prefix)"
  fun ipcidr_to_interval_end :: "('i::len word × nat)  'i::len word" where
    "ipcidr_to_interval_end (pre, len) = (
      let netmask = (mask len) << (LENGTH('i) - len);
          network_prefix = (pre AND netmask)
      in network_prefix OR (NOT netmask))"
  definition ipcidr_to_interval :: "('i::len word × nat)  ('i word × 'i word)" where
    "ipcidr_to_interval cidr  (ipcidr_to_interval_start cidr, ipcidr_to_interval_end cidr)"

  lemma ipset_from_cidr_ipcidr_to_interval:
    "ipset_from_cidr base len =
      {ipcidr_to_interval_start (base,len) .. ipcidr_to_interval_end (base,len)}"
    by(simp add: Let_def ipcidr_to_interval_def ipset_from_cidr_def ipset_from_netmask_def)
  declare ipcidr_to_interval_start.simps[simp del] ipcidr_to_interval_end.simps[simp del]

  lemma ipcidr_to_interval:
    "ipcidr_to_interval (base, len) = (s,e)  ipset_from_cidr base len = {s .. e}"
    by (simp add: ipcidr_to_interval_def ipset_from_cidr_ipcidr_to_interval)

  definition ipcidr_tuple_to_wordinterval :: "('i::len word × nat)  'i wordinterval" where
    "ipcidr_tuple_to_wordinterval iprng  iprange_interval (ipcidr_to_interval iprng)"

  lemma wordinterval_to_set_ipcidr_tuple_to_wordinterval:
    "wordinterval_to_set (ipcidr_tuple_to_wordinterval (b, m)) = ipset_from_cidr b m"
    unfolding ipcidr_tuple_to_wordinterval_def ipset_from_cidr_ipcidr_to_interval
    by(simp add: iprange_interval.simps)

  lemma wordinterval_to_set_ipcidr_tuple_to_wordinterval_uncurry:
    "wordinterval_to_set (ipcidr_tuple_to_wordinterval ipcidr) = uncurry ipset_from_cidr ipcidr"
    by(cases ipcidr, simp add: wordinterval_to_set_ipcidr_tuple_to_wordinterval)

  definition ipcidr_union_set :: "('i::len word × nat) set  ('i word) set" where
    "ipcidr_union_set ips  (base, len)  ips. ipset_from_cidr base len"

  lemma ipcidr_union_set_uncurry:
    "ipcidr_union_set ips = ( ipcidr  ips. uncurry ipset_from_cidr ipcidr)"
    by(simp add: ipcidr_union_set_def uncurry_case_stmt)

subsection‹Clever Operations on IP Addresses in CIDR Notation›
  text‹Intersecting two intervals may result in a new interval.
    Example: {1..10} ∩ {5..20} = {5..10}›

    Intersecting two IP address ranges represented as CIDR ranges results either in the empty set
    or the smaller of the two ranges. It will never create a new range.
    (*contributed by Lars Noschinski*)
    private lemma less_and_not_mask_eq:
      fixes i :: "('a :: len) word"
      assumes "r2  r1" "i && ~~ (mask r2) = x && ~~ (mask r2)"
      shows "i && ~~ (mask r1) = x && ~~ (mask r1)"
    proof -
      have "i AND NOT (mask r1) = (i && ~~ (mask r2)) && ~~ (mask r1)" (is "_ = ?w && _")
        using r2  r1 by (simp add: and_not_mask_twice max_def)
      also have "?w = x && ~~ (mask r2)" by fact
      also have " && ~~ (mask r1) = x && ~~ (mask r1)"
        using r2  r1 by (simp add: and_not_mask_twice max_def)
      finally show ?thesis .

    lemma ip_cidr_set_less:
      fixes i :: "'i::len word"
      shows "r1  r2  ip_cidr_set i r2  ip_cidr_set i r1"
      unfolding ip_cidr_set_def
      apply auto
      apply (rule less_and_not_mask_eq[where ?r2.0="LENGTH('i) - r2"])
      apply auto

    private lemma ip_cidr_set_intersect_subset_helper:
      fixes i1 r1 i2 r2
      assumes disj: "ip_cidr_set i1 r1  ip_cidr_set i2 r2  {}" and  "r1  r2"
      shows "ip_cidr_set i2 r2  ip_cidr_set i1 r1"
      proof -
      from disj obtain j where "j  ip_cidr_set i1 r1" "j  ip_cidr_set i2 r2" by auto
      with r1  r2 have "j  ip_cidr_set j r1" "j  ip_cidr_set j r1"
        using ip_cidr_set_change_base ip_cidr_set_less by blast+

      show "ip_cidr_set i2 r2  ip_cidr_set i1 r1"
        fix i assume "i  ip_cidr_set i2 r2"
        with j  ip_cidr_set i2 r2 have "i  ip_cidr_set j r2" using ip_cidr_set_change_base by auto
        also have "ip_cidr_set j r2  ip_cidr_set j r1" using r1  r2 ip_cidr_set_less by blast
        also have " = ip_cidr_set i1 r1" using j  ip_cidr_set i1 r1 ip_cidr_set_change_base by blast
        finally show "i  ip_cidr_set i1 r1" .

    lemma ip_cidr_set_notsubset_empty_inter:
      "¬ ip_cidr_set i1 r1  ip_cidr_set i2 r2 
       ¬ ip_cidr_set i2 r2  ip_cidr_set i1 r1 
       ip_cidr_set i1 r1  ip_cidr_set i2 r2 = {}"
      apply(cases "r1  r2")
       subgoal using ip_cidr_set_intersect_subset_helper by blast
      apply(cases "r2  r1")
       subgoal using ip_cidr_set_intersect_subset_helper by blast

  lemma ip_cidr_intersect:
     "¬ ipset_from_cidr b2 m2  ipset_from_cidr b1 m1 
      ¬ ipset_from_cidr b1 m1  ipset_from_cidr b2 m2 
      ipset_from_cidr b1 m1  ipset_from_cidr b2 m2 = {}"
    apply(simp add: ipset_from_cidr_eq_ip_cidr_set)
    using ip_cidr_set_notsubset_empty_inter by blast

  text‹Computing the intersection of two IP address ranges in CIDR notation›
  fun ipcidr_conjunct :: "('i::len word × nat)  ('i word × nat)  ('i word × nat) option" where
    "ipcidr_conjunct (base1, m1) (base2, m2) = (
         ipset_from_cidr base1 m1  ipset_from_cidr base2 m2 = {}
       else if
         ipset_from_cidr base1 m1  ipset_from_cidr base2 m2
         Some (base1, m1)
         Some (base2, m2)

  text‹Intersecting with an address with prefix length zero always yields a non-empty result.›
  lemma ipcidr_conjunct_any: "ipcidr_conjunct a (x,0)  None" "ipcidr_conjunct (y,0) b  None"
     apply(cases a, simp add: ipset_from_cidr_0 ipset_from_cidr_not_empty)
    by(cases b, simp add: ipset_from_cidr_0 ipset_from_cidr_not_empty)

  lemma ipcidr_conjunct_correct: "(case ipcidr_conjunct (b1, m1) (b2, m2)
                                          of Some (bx, mx)  ipset_from_cidr bx mx
                                          |  None  {}) =
                                   (ipset_from_cidr b1 m1)  (ipset_from_cidr b2 m2)"
    apply(simp split: if_split_asm)
    using ip_cidr_intersect by fast
  declare ipcidr_conjunct.simps[simp del]

subsection‹Code Equations›
  text‹Executable definition using word intervals›
  lemma ipcidr_conjunct_word[code_unfold]:
  "ipcidr_conjunct ips1 ips2 = (
      wordinterval_empty (wordinterval_intersection
                            (ipcidr_tuple_to_wordinterval ips1) (ipcidr_tuple_to_wordinterval ips2))
     else if
       wordinterval_subset (ipcidr_tuple_to_wordinterval ips1) (ipcidr_tuple_to_wordinterval ips2)
       Some ips1
       Some ips2
  apply(cases ips1, cases ips2, rename_tac b1 m1 b2 m2, simp)
  apply(auto simp add: wordinterval_to_set_ipcidr_tuple_to_wordinterval ipcidr_conjunct.simps
             split: if_split_asm)

  (*with the code_unfold lemma before, this works!*)
  lemma "ipcidr_conjunct (0::32 word,0) (8,1) = Some (8, 1)" by eval
  export_code ipcidr_conjunct checking SML

  text‹making element check executable›
  lemma addr_in_ipset_from_netmask_code[code_unfold]:
    "addr  (ipset_from_netmask base netmask) 
      (base AND netmask)  addr  addr  (base AND netmask) OR (NOT netmask)"
    by(simp add: ipset_from_netmask_def Let_def)
  lemma addr_in_ipset_from_cidr_code[code_unfold]:
    "(addr::'i::len word)  (ipset_from_cidr pre len) 
       (pre AND ((mask len) << (LENGTH('i) - len)))  addr 
        addr  pre OR (mask (LENGTH('i) - len))"
  unfolding ipset_from_cidr_alt by simp