Theory IEEE

(* Formalization of IEEE-754 Standard for binary floating-point arithmetic *)
(* Author: Lei Yu, University of Cambridge

  Contrib: Peter Lammich: fixed wrong sign handling in fmadd

*)

section ‹Specification of the IEEE standard›

theory IEEE
  imports
    "HOL-Library.Float"
    Word_Lib.Word_Lemmas
begin

typedef (overloaded) ('e::len, 'f::len) float = "UNIV::(1 word × 'e word × 'f word) set"
  by auto

setup_lifting type_definition_float

syntax "_float" :: "type  type  type" ("'(_, _') float")
text ‹parse ('a, 'b) float› as ('a::len, 'b::len) float.›

parse_translation let
    fun float t u = Syntax.const @{type_syntax float} $ t $ u;
    fun len_tr u =
      (case Term_Position.strip_positions u of
        v as Free (x, _) =>
          if Lexicon.is_tid x then
            (Syntax.const @{syntax_const "_ofsort"} $ v $
              Syntax.const @{class_syntax len})
          else u
      | _ => u)
    fun len_float_tr [t, u] =
      float (len_tr t) (len_tr u)
  in
    [(@{syntax_const "_float"}, K len_float_tr)]
  end

subsection ‹Derived parameters for floating point formats›

definition wordlength :: "('e, 'f) float itself  nat"
  where "wordlength x = LENGTH('e) + LENGTH('f) + 1"

definition bias :: "('e, 'f) float itself  nat"
  where "bias x = 2^(LENGTH('e) - 1) - 1"

definition emax :: "('e, 'f) float itself  nat"
  where "emax x = unat (- 1::'e word)"

abbreviation fracwidth::"('e, 'f) float itself  nat" where
  "fracwidth _  LENGTH('f)"

subsection ‹Predicates for the four IEEE formats›

definition is_single :: "('e, 'f) float itself  bool"
  where "is_single x  LENGTH('e) = 8  wordlength x = 32"

definition is_double :: "('e, 'f) float itself  bool"
  where "is_double x  LENGTH('e) = 11  wordlength x = 64"

definition is_single_extended :: "('e, 'f) float itself  bool"
  where "is_single_extended x  LENGTH('e)  11  wordlength x  43"

definition is_double_extended :: "('e, 'f) float itself  bool"
  where "is_double_extended x  LENGTH('e)  15  wordlength x  79"


subsection ‹Extractors for fields›

lift_definition sign::"('e, 'f) float  nat" is
  "λ(s::1 word, _::'e word, _::'f word). unat s" .

lift_definition exponent::"('e, 'f) float  nat" is
  "λ(_, e::'e word, _). unat e" .

lift_definition fraction::"('e, 'f) float  nat" is
  "λ(_, _, f::'f word). unat f" .

abbreviation "real_of_word x  real (unat x)"

lift_definition valof :: "('e, 'f) float  real"
is "λ(s, e, f).
    let x = (TYPE(('e, 'f) float)) in
    (if e = 0
     then (-1::real)^(unat s) * (2 / (2^bias x)) * (real_of_word f/2^(LENGTH('f)))
     else (-1::real)^(unat s) * ((2^(unat e)) / (2^bias x)) * (1 + real_of_word f/2^LENGTH('f)))"
  .

subsection ‹Partition of numbers into disjoint classes›

definition is_nan :: "('e, 'f) float  bool"
  where "is_nan a  exponent a = emax TYPE(('e, 'f)float)  fraction a  0"

definition is_infinity :: "('e, 'f) float  bool"
  where "is_infinity a  exponent a = emax TYPE(('e, 'f)float)  fraction a = 0"

definition is_normal :: "('e, 'f) float  bool"
  where "is_normal a  0 < exponent a  exponent a < emax TYPE(('e, 'f)float)"

definition is_denormal :: "('e, 'f) float  bool"
  where "is_denormal a  exponent a = 0  fraction a  0"

definition is_zero :: "('e, 'f) float  bool"
  where "is_zero a  exponent a = 0  fraction a = 0"

definition is_finite :: "('e, 'f) float  bool"
  where "is_finite a  (is_normal a  is_denormal a  is_zero a)"


subsection ‹Special values›

lift_definition plus_infinity :: "('e, 'f) float" ("") is "(0, - 1, 0)" .

lift_definition topfloat :: "('e, 'f) float" is "(0, - 2, 2^LENGTH('f) - 1)" .

instantiation float::(len, len) zero begin

lift_definition zero_float :: "('e, 'f) float" is "(0, 0, 0)" .

instance proof qed

end

subsection ‹Negation operation on floating point values›

instantiation float::(len, len) uminus begin
lift_definition uminus_float :: "('e, 'f) float  ('e, 'f) float" is  "λ(s, e, f). (1 - s, e, f)" .
instance proof qed
end

abbreviation (input) "minus_zero  - (0::('e, 'f)float)"
abbreviation (input) "minus_infinity  - "
abbreviation (input) "bottomfloat  - topfloat"


subsection ‹Real number valuations›

text ‹The largest value that can be represented in floating point format.›
definition largest :: "('e, 'f) float itself  real"
  where "largest x = (2^(emax x - 1) / 2^bias x) * (2 - 1/(2^fracwidth x))"

text ‹Threshold, used for checking overflow.›
definition threshold :: "('e, 'f) float itself  real"
  where "threshold x = (2^(emax x - 1) / 2^bias x) * (2 - 1/(2^(Suc(fracwidth x))))"

text ‹Unit of least precision.›

lift_definition one_lp::"('e ,'f) float  ('e ,'f) float" is "λ(s, e, f). (0, e::'e word, 1)" .
lift_definition zero_lp::"('e ,'f) float  ('e ,'f) float" is "λ(s, e, f). (0, e::'e word, 0)" .

definition ulp :: "('e, 'f) float  real" where "ulp a = valof (one_lp a) - valof (zero_lp a)"

text ‹Enumerated type for rounding modes.›
datatype roundmode = roundNearestTiesToEven
  | roundNearestTiesToAway
  | roundTowardPositive
  | roundTowardNegative
  | roundTowardZero

abbreviation (input) "RNE  roundNearestTiesToEven"
abbreviation (input) "RNA  roundNearestTiesToAway"
abbreviation (input) "RTP  roundTowardPositive"
abbreviation (input) "RTN  roundTowardNegative"
abbreviation (input) "RTZ  roundTowardZero"


subsection ‹Rounding›

text ‹Characterization of best approximation from a set of abstract values.›
definition "is_closest v s x a  a  s  (b. b  s  ¦v a - x¦  ¦v b - x¦)"

text ‹Best approximation with a deciding preference for multiple possibilities.›
definition "closest v p s x =
  (SOME a. is_closest v s x a  ((b. is_closest v s x b  p b)  p a))"

fun round :: "roundmode  real  ('e ,'f) float"
where
  "round roundNearestTiesToEven y =
   (if y  - threshold TYPE(('e ,'f) float) then minus_infinity
    else if y  threshold TYPE(('e ,'f) float) then plus_infinity
    else closest (valof) (λa. even (fraction a)) {a. is_finite a} y)" (*FIXME: broken, especially if both nearest are odd*)
| "round roundNearestTiesToAway y =
   (if y  - threshold TYPE(('e ,'f) float) then minus_infinity
    else if y  threshold TYPE(('e ,'f) float) then plus_infinity
    else closest (valof) (λa. True) {a. is_finite a  ¦valof a¦  ¦y¦} y)"
| "round roundTowardPositive y =
   (if y < - largest TYPE(('e ,'f) float) then bottomfloat
    else if y > largest TYPE(('e ,'f) float) then plus_infinity
    else closest (valof) (λa. True) {a. is_finite a  valof a  y} y)"
| "round roundTowardNegative y =
   (if y < - largest TYPE(('e ,'f) float) then minus_infinity
    else if y > largest TYPE(('e ,'f) float) then topfloat
    else closest (valof) (λa. True) {a. is_finite a  valof a  y} y)"
| "round roundTowardZero y =
   (if y < - largest TYPE(('e ,'f) float) then bottomfloat
    else if y > largest TYPE(('e ,'f) float) then topfloat
    else closest (valof) (λa. True) {a. is_finite a  ¦valof a¦  ¦y¦} y)"

text ‹Rounding to integer values in floating point format.›

definition is_integral :: "('e ,'f) float  bool"
  where "is_integral a  is_finite a  (n::nat. ¦valof a¦ = real n)"

fun intround :: "roundmode  real  ('e ,'f) float"
where
  "intround roundNearestTiesToEven y =
    (if y  - threshold TYPE(('e ,'f) float) then minus_infinity
     else if y  threshold TYPE(('e ,'f) float) then plus_infinity
     else closest (valof) (λa. (n::nat. even n  ¦valof a¦ = real n)) {a. is_integral a} y)"
| "intround roundNearestTiesToAway y =
    (if y  - threshold TYPE(('e ,'f) float) then minus_infinity
     else if y  threshold TYPE(('e ,'f) float) then plus_infinity
     else closest (valof) (λx. True) {a. is_integral a  ¦valof a¦  ¦y¦} y)"
| "intround roundTowardPositive y =
    (if y < - largest TYPE(('e ,'f) float) then bottomfloat
     else if y > largest TYPE(('e ,'f) float) then plus_infinity
     else closest (valof) (λx. True) {a. is_integral a  valof a  y} y)"
| "intround roundTowardNegative y =
    (if y < - largest TYPE(('e ,'f) float) then minus_infinity
     else if y > largest TYPE(('e ,'f) float) then topfloat
     else closest (valof) (λx. True) {a. is_integral a  valof a  y} y)"
| "intround roundTowardZero y =
    (if y < - largest TYPE(('e ,'f) float) then bottomfloat
     else if y > largest TYPE(('e ,'f) float) then topfloat
     else closest (valof) (λx. True) {a. is_integral a  ¦valof a¦  ¦y¦} y)"

text ‹Round, choosing between -0.0 or +0.0›

definition float_round::"roundmode  bool  real  ('e, 'f) float"
  where "float_round mode toneg r =
      (let x = round mode r in
         if is_zero x
            then if toneg
                    then minus_zero
                 else 0
         else x)"

text ‹Non-standard of NaN.›
definition some_nan :: "('e ,'f) float"
  where "some_nan = (SOME a. is_nan a)"

text ‹Coercion for signs of zero results.›
definition zerosign :: "nat  ('e ,'f) float  ('e ,'f) float"
  where "zerosign s a =
    (if is_zero a then (if s = 0 then 0 else - 0) else a)"

text ‹Remainder operation.›
definition rem :: "real  real  real"
  where "rem x y =
    (let n = closest id (λx. n::nat. even n  ¦x¦ = real n) {x. n :: nat. ¦x¦ = real n} (x / y)
     in x - n * y)"

definition frem :: "roundmode  ('e ,'f) float  ('e ,'f) float  ('e ,'f) float"
  where "frem m a b =
    (if is_nan a  is_nan b  is_infinity a  is_zero b then some_nan
     else zerosign (sign a) (round m (rem (valof a) (valof b))))"


subsection ‹Definitions of the arithmetic operations›

definition fintrnd :: "roundmode  ('e ,'f) float  ('e ,'f) float"
  where "fintrnd m a =
    (if is_nan a then (some_nan)
     else if is_infinity a then a
     else zerosign (sign a) (intround m (valof a)))"

definition fadd :: "roundmode  ('e ,'f) float  ('e ,'f) float  ('e ,'f) float"
  where "fadd m a b =
    (if is_nan a  is_nan b  (is_infinity a  is_infinity b  sign a  sign b)
     then some_nan
     else if (is_infinity a) then a
     else if (is_infinity b) then b
     else
      zerosign
        (if is_zero a  is_zero b  sign a = sign b then sign a
         else if m = roundTowardNegative then 1 else 0)
        (round m (valof a + valof b)))"

definition fsub :: "roundmode  ('e ,'f) float  ('e ,'f) float  ('e ,'f) float"
  where "fsub m a b =
    (if is_nan a  is_nan b  (is_infinity a  is_infinity b  sign a = sign b)
     then some_nan
     else if is_infinity a then a
     else if is_infinity b then - b
     else
      zerosign
        (if is_zero a  is_zero b  sign a  sign b then sign a
         else if m = roundTowardNegative then 1 else 0)
        (round m (valof a - valof b)))"

definition fmul :: "roundmode  ('e ,'f) float  ('e ,'f) float  ('e ,'f) float"
  where "fmul m a b =
    (if is_nan a  is_nan b  (is_zero a  is_infinity b)  (is_infinity a  is_zero b)
     then some_nan
     else if is_infinity a  is_infinity b
     then (if sign a = sign b then plus_infinity else minus_infinity)
     else zerosign (if sign a = sign b then 0 else 1 ) (round m (valof a * valof b)))"

definition fdiv :: "roundmode  ('e ,'f) float  ('e ,'f) float  ('e ,'f) float"
  where "fdiv m a b =
    (if is_nan a  is_nan b  (is_zero a  is_zero b)  (is_infinity a  is_infinity b)
     then some_nan
     else if is_infinity a  is_zero b
     then (if sign a = sign b then plus_infinity else minus_infinity)
     else if is_infinity b
     then (if sign a = sign b then 0 else - 0)
     else zerosign (if sign a = sign b then 0 else 1) (round m (valof a / valof b)))"

definition fsqrt :: "roundmode  ('e ,'f) float  ('e ,'f) float"
  where "fsqrt m a =
    (if is_nan a then some_nan
     else if is_zero a  is_infinity a  sign a = 0 then a
     else if sign a = 1 then some_nan
     else zerosign (sign a) (round m (sqrt (valof a))))"

definition fmul_add :: "roundmode  ('t ,'w) float  ('t ,'w) float  ('t ,'w) float  ('t ,'w) float"
  where "fmul_add mode x y z = (let 
    signP = if sign x = sign y then 0 else 1;
    infP = is_infinity x   is_infinity y
  in
    if is_nan x  is_nan y  is_nan z then some_nan
    else if is_infinity x  is_zero y 
           is_zero x  is_infinity y 
           is_infinity z  infP  signP  sign z 
      then some_nan
    else if is_infinity z  (sign z = 0)  infP  (signP = 0)
      then plus_infinity
    else if is_infinity z  (sign z = 1)  infP  (signP = 1)
      then minus_infinity
    else let 
      r1 = valof x * valof y;
      r2 = valof z;
      r = r1+r2
    in 
      if r=0 then ( ― ‹Exact Zero Case. Same sign rules as for add apply. ›
        if r1=0  r2=0  signP=sign z then zerosign signP 0
        else if mode = roundTowardNegative then -0
        else 0
      ) else ( ― ‹Not exactly zero: Rounding has sign of exact value, even if rounded val is zero›
        zerosign (if r<0 then 1 else 0) (round mode r)
      )
  )"


subsection ‹Comparison operations›

datatype ccode = Gt | Lt | Eq | Und

definition fcompare :: "('e ,'f) float  ('e ,'f) float  ccode"
  where "fcompare a b =
    (if is_nan a  is_nan b then Und
     else if is_infinity a  sign a = 1
     then (if is_infinity b  sign b = 1 then Eq else Lt)
     else if is_infinity a  sign a = 0
     then (if is_infinity b  sign b = 0 then Eq else Gt)
     else if is_infinity b  sign b = 1 then Gt
     else if is_infinity b  sign b = 0 then Lt
     else if valof a < valof b then Lt
     else if valof a = valof b then Eq
     else Gt)"

definition flt :: "('e ,'f) float  ('e ,'f) float  bool"
  where "flt a b  fcompare a b = Lt"

definition fle :: "('e ,'f) float  ('e ,'f) float  bool"
  where "fle a b  fcompare a b = Lt  fcompare a b = Eq"

definition fgt :: "('e ,'f) float  ('e ,'f) float  bool"
  where "fgt a b  fcompare a b = Gt"

definition fge :: "('e ,'f) float  ('e ,'f) float  bool"
  where "fge a b  fcompare a b = Gt  fcompare a b = Eq"

definition feq :: "('e ,'f) float  ('e ,'f) float  bool"
  where "feq a b  fcompare a b = Eq"


section ‹Specify float to be double  precision and round to even›

instantiation float :: (len, len) plus
begin

definition plus_float :: "('a, 'b) float  ('a, 'b) float  ('a, 'b) float"
  where "a + b = fadd RNE a b"

instance ..

end

instantiation float :: (len, len) minus
begin

definition minus_float :: "('a, 'b) float  ('a, 'b) float  ('a, 'b) float"
  where "a - b = fsub RNE a b"

instance ..

end

instantiation float :: (len, len) times
begin

definition times_float :: "('a, 'b) float  ('a, 'b) float  ('a, 'b) float"
  where "a * b = fmul RNE a b"

instance ..

end

instantiation float :: (len, len) one
begin

lift_definition one_float :: "('a, 'b) float" is "(0, 2^(LENGTH('a) - 1) - 1, 0)" .

instance ..

end

instantiation float :: (len, len) inverse
begin

definition divide_float :: "('a, 'b) float  ('a, 'b) float  ('a, 'b) float"
  where "a div b = fdiv RNE a b"

definition inverse_float :: "('a, 'b) float  ('a, 'b) float"
  where "inverse_float a = fdiv RNE 1 a"

instance ..

end

definition float_rem :: "('a, 'b) float  ('a, 'b) float  ('a, 'b) float"
  where "float_rem a b = frem RNE a b"

definition float_sqrt :: "('a, 'b) float  ('a, 'b) float"
  where "float_sqrt a = fsqrt RNE a"

definition ROUNDFLOAT ::"('a, 'b) float  ('a, 'b) float"
  where "ROUNDFLOAT a = fintrnd RNE a"


instantiation float :: (len, len) ord
begin

definition less_float :: "('a, 'b) float  ('a, 'b) float  bool"
  where "a < b  flt a b"

definition less_eq_float :: "('a, 'b) float  ('a, 'b) float  bool"
  where "a  b  fle a b"

instance ..

end

definition float_eq :: "('a, 'b) float  ('a, 'b) float  bool"  (infixl "" 70)
  where "float_eq a b = feq a b"

instantiation float :: (len, len) abs
begin

definition abs_float :: "('a, 'b) float  ('a, 'b) float"
  where "abs_float a = (if sign a = 0 then a else - a)"

instance ..
end

text ‹The 1 + ε› property.›
definition normalizes :: "_ itself  real  bool"
  where "normalizes float_format x =
    (1/ (2::real)^(bias float_format - 1)  ¦x¦  ¦x¦ < threshold float_format)"

end