Theory Prefix_Free_Code_Combinators
section ‹Introduction\label{sec:intro}›
theory Prefix_Free_Code_Combinators
imports
"HOL-Library.Extended_Real"
"HOL-Library.Float"
"HOL-Library.FuncSet"
"HOL-Library.List_Lexorder"
"HOL-Library.Log_Nat"
"HOL-Library.Sublist"
begin
text ‹The encoders are represented as partial prefix-free functions. The advantage of
prefix free codes is that they can be easily combined by concatenation. The approach
of using prefix free codes (on the byte-level) for the representation of complex data
structures is common in many industry encoding libraries (cf. \<^cite>‹"rfc8949"›).
The reason for representing encoders using partial functions, stems from some use-cases
where the objects to be encoded may be in a much smaller sets, as their type may suggest.
For example a natural number may be known to have a given range, or a function may be
encodable because it has a finite domain.
Note: Prefix-free codes can also be automatically derived using Huffmans' algorithm, which was
formalized by Blanchette~\<^cite>‹"Huffman-AFP"›. This is especially useful if it is possible to transmit
a dictionary before the data. On the other hand these standard codes are useful, when the above is
impractical and/or the distribution of the input is unknown or expected to be close to the one's
implied by standard codes.
The following section contains general definitions and results, followed by
Section~\ref{sec:dep_enc} to \ref{sec:float_enc} where encoders for primitive types
and combinators are defined. Each construct is accompanied by lemmas verifying that they
form prefix free codes as well as bounds on the bit count to encode the data.
Section~\ref{sec:examples} concludes with a few examples.›
section ‹Encodings›
fun opt_prefix where
"opt_prefix (Some x) (Some y) = prefix x y" |
"opt_prefix _ _ = False"
definition "opt_comp x y = (opt_prefix x y ∨ opt_prefix y x)"
fun opt_append :: "bool list option ⇒ bool list option ⇒ bool list option"
where
"opt_append (Some x) (Some y) = Some (x@y)" |
"opt_append _ _ = None"
lemma opt_comp_sym: "opt_comp x y = opt_comp y x"
by (simp add:opt_comp_def, blast)
lemma opt_comp_append:
assumes "opt_comp (opt_append x y) z"
shows "opt_comp x z"
proof -
obtain x' y' z' where a: "x = Some x'" "y = Some y'" "z = Some z'"
using assms
by (cases x, case_tac[!] y, case_tac[!] z, auto simp: opt_comp_def)
have "prefix (x'@y') z' ∨ prefix z' (x'@y')"
using a assms by (simp add:opt_comp_def)
hence "prefix x' z' ∨ prefix z' x'"
using prefix_same_cases append_prefixD by blast
thus ?thesis
using a by (simp add:opt_comp_def)
qed
lemma opt_comp_append_2:
assumes "opt_comp x (opt_append y z)"
shows "opt_comp x y"
using opt_comp_append opt_comp_sym assms by blast
lemma opt_comp_append_3:
assumes "opt_comp (opt_append x y) (opt_append x z)"
shows "opt_comp y z"
using assms
by (cases x, case_tac[!] y, case_tac[!] z, auto simp: opt_comp_def)
type_synonym 'a encoding = "'a ⇀ bool list"
definition is_encoding :: "'a encoding ⇒ bool"
where "is_encoding f = (∀x y. opt_prefix (f x) (f y) ⟶ x = y)"
text ‹An encoding function is represented as partial functions into lists of booleans, where
each list element represents a bit. Such a function is defined to be an encoding, if it is
prefix-free on its domain. This is similar to the formalization by Hibon and
Paulson~\<^cite>‹"Source_Coding_Theorem-AFP"› except for the use of partial functions for the
practical reasons described in Section~\ref{sec:intro}.›
lemma is_encodingI:
assumes "⋀x x' y y'. e x = Some x' ⟹ e y = Some y' ⟹
prefix x' y' ⟹ x = y"
shows "is_encoding e"
proof -
have "opt_prefix (e x) (e y) ⟹ x = y" for x y
using assms by (cases "e x", case_tac[!] "e y", auto)
thus ?thesis by (simp add:is_encoding_def)
qed
lemma is_encodingI_2:
assumes "⋀x y . opt_comp (e x) (e y) ⟹ x = y"
shows "is_encoding e"
using assms by (simp add:opt_comp_def is_encoding_def)
lemma encoding_triv: "is_encoding Map.empty"
by (rule is_encodingI_2, simp add:opt_comp_def)
lemma is_encodingD:
assumes "is_encoding e"
assumes "opt_comp (e x) (e y)"
shows "x = y"
using assms by (auto simp add:opt_comp_def is_encoding_def)
lemma encoding_imp_inj:
assumes "is_encoding f"
shows "inj_on f (dom f)"
using assms
by (intro inj_onI, simp add:is_encoding_def, force)
fun bit_count :: "bool list option ⇒ ereal" where
"bit_count None = ∞" |
"bit_count (Some x) = ereal (length x)"
lemma bit_count_finite_imp_dom:
"bit_count (f x) < ∞ ⟹ x ∈ dom f"
by (cases "f x", auto)
lemma bit_count_append:
"bit_count (opt_append x y) = bit_count x + bit_count y"
by (cases x, case_tac[!] "y", simp_all)
section ‹(Dependent) Products\label{sec:dep_enc}›
definition encode_dependent_prod ::
"'a encoding ⇒ ('a ⇒ 'b encoding) ⇒ ('a × 'b) encoding"
(infixr ‹⋈⇩e› 65)
where
"encode_dependent_prod e f x =
opt_append (e (fst x)) (f (fst x) (snd x))"
lemma dependent_encoding:
assumes "is_encoding e1"
assumes "⋀x. x ∈ dom e1 ⟹ is_encoding (e2 x)"
shows "is_encoding (e1 ⋈⇩e e2)"
proof (rule is_encodingI_2)
fix x y
assume a:"opt_comp ((e1 ⋈⇩e e2) x) ((e1 ⋈⇩e e2) y)"
have d:"opt_comp (e1 (fst x)) (e1 (fst y))"
using a unfolding encode_dependent_prod_def
by (metis opt_comp_append opt_comp_append_2)
hence b:"fst x = fst y"
using is_encodingD[OF assms(1)] by simp
hence "opt_comp (e2 (fst x) (snd x)) (e2 (fst x) (snd y))"
using a unfolding encode_dependent_prod_def by (metis opt_comp_append_3)
moreover have "fst x ∈ dom e1" using d b
by (cases "e1 (fst x)", simp_all add:opt_comp_def dom_def)
ultimately have c:"snd x = snd y"
using is_encodingD[OF assms(2)] by simp
show "x = y"
using b c by (simp add: prod_eq_iff)
qed
lemma dependent_bit_count:
"bit_count ((e⇩1 ⋈⇩e e⇩2) (x⇩1,x⇩2)) =
bit_count (e⇩1 x⇩1) + bit_count (e⇩2 x⇩1 x⇩2)"
by (simp add: encode_dependent_prod_def bit_count_append)
lemma dependent_bit_count_2:
"bit_count ((e⇩1 ⋈⇩e e⇩2) x) =
bit_count (e⇩1 (fst x)) + bit_count (e⇩2 (fst x) (snd x))"
by (simp add: encode_dependent_prod_def bit_count_append)
text ‹This abbreviation is for non-dependent products.›
abbreviation encode_prod ::
"'a encoding ⇒ 'b encoding ⇒ ('a × 'b) encoding"
(infixr ‹×⇩e› 65)
where
"encode_prod e1 e2 ≡ e1 ⋈⇩e (λ_. e2)"
section ‹Composition›
lemma encoding_compose:
assumes "is_encoding f"
assumes "inj_on g {x. p x}"
shows "is_encoding (λx. if p x then f (g x) else None)"
using assms by (simp add:comp_def is_encoding_def inj_onD)
lemma encoding_compose_2:
assumes "is_encoding f"
assumes "inj g"
shows "is_encoding (λx. f (g x))"
using assms by (simp add:comp_def is_encoding_def inj_onD)
section ‹Natural Numbers›
fun encode_bounded_nat :: "nat ⇒ nat ⇒ bool list" where
"encode_bounded_nat (Suc l) n =
(let r = n ≥ (2^l) in r#encode_bounded_nat l (n-of_bool r*2^l))" |
"encode_bounded_nat 0 _ = []"
lemma encode_bounded_nat_prefix_free:
fixes u v l :: nat
assumes "u < 2^l"
assumes "v < 2^l"
assumes "prefix (encode_bounded_nat l u) (encode_bounded_nat l v)"
shows "u = v"
using assms
proof (induction l arbitrary: u v)
case 0
then show ?case by simp
next
case (Suc l)
have "prefix (encode_bounded_nat l (u - of_bool (u ≥ 2^l)*2^l))
(encode_bounded_nat l (v - of_bool (v ≥ 2^l)*2^l))"
and a:"(u ≥ 2^l) = (v ≥ 2^l)"
using Suc(4) by (simp_all add: Let_def split: split_of_bool_asm)
moreover have "u - of_bool (u ≥ 2^l)*2^l < 2^l"
using Suc(2) by (cases "u < 2^l", auto simp add:of_bool_def)
moreover have "v - of_bool (v ≥ 2^l)*2^l < 2^l"
using Suc(3) by (cases "v < 2^l", auto simp add:of_bool_def)
ultimately have
"u - of_bool (u ≥ 2^l)*2^l = v - of_bool (v ≥ 2^l)*2^l"
by (intro Suc(1), simp_all)
thus "u = v" using a by (simp split: split_of_bool_asm)
qed
definition Nb⇩e :: "nat ⇒ nat encoding"
where "Nb⇩e l n = (
if n < l
then Some (encode_bounded_nat (floorlog 2 (l-1)) n)
else None)"
text ‹@{term "Nb⇩e l"} is encoding for natural numbers strictly smaller than
@{term "l"} using a fixed length encoding.›
lemma bounded_nat_bit_count:
"bit_count (Nb⇩e l y) = (if y < l then floorlog 2 (l-1) else ∞)"
proof -
have a:"length (encode_bounded_nat h m) = h" for h m
by (induction h arbitrary: m, simp, simp add:Let_def)
show ?thesis
using a by (simp add:Nb⇩e_def)
qed
lemma bounded_nat_bit_count_2:
assumes "y < l"
shows "bit_count (Nb⇩e l y) = floorlog 2 (l-1)"
using assms bounded_nat_bit_count by simp
lemma "dom (Nb⇩e l) = {..<l}"
by (simp add:Nb⇩e_def dom_def lessThan_def)
lemma bounded_nat_encoding: "is_encoding (Nb⇩e l)"
proof -
have "x < l ⟹ x < 2 ^ floorlog 2 (l-1)" for x :: nat
by (intro floorlog_leD floorlog_mono, auto)
thus ?thesis
using encode_bounded_nat_prefix_free
by (intro is_encodingI, simp add:Nb⇩e_def split:if_splits, blast)
qed
fun encode_unary_nat :: "nat ⇒ bool list" where
"encode_unary_nat (Suc l) = False#(encode_unary_nat l)" |
"encode_unary_nat 0 = [True]"
lemma encode_unary_nat_prefix_free:
fixes u v :: nat
assumes "prefix (encode_unary_nat u) (encode_unary_nat v)"
shows "u = v"
using assms
proof (induction u arbitrary: v)
case 0
then show ?case by (cases v, simp_all)
next
case (Suc u)
then show ?case by (cases v, simp_all)
qed
definition Nu⇩e :: "nat encoding"
where "Nu⇩e n = Some (encode_unary_nat n)"
text ‹@{term "Nu⇩e"} is encoding for natural numbers using unary encoding. It is
inefficient except for special cases, where the probability of large numbers decreases
exponentially with its magnitude.›
lemma unary_nat_bit_count:
"bit_count (Nu⇩e n) = Suc n"
unfolding Nu⇩e_def by (induction n, auto)
lemma unary_encoding: "is_encoding Nu⇩e"
using encode_unary_nat_prefix_free
by (intro is_encodingI, simp add:Nu⇩e_def)
text ‹Encoding for positive numbers using Elias-Gamma code.›
definition Ng⇩e :: "nat encoding" where
"Ng⇩e n =
(if n > 0
then (Nu⇩e ⋈⇩e (λr. Nb⇩e (2^r)))
(let r = floorlog 2 n - 1 in (r, n - 2^r))
else None)"
text ‹@{term "Ng⇩e"} is an encoding for positive numbers using Elias-Gamma encoding\<^cite>‹"elias1975"›.›
lemma elias_gamma_bit_count:
"bit_count (Ng⇩e n) = (if n > 0 then 2 * ⌊log 2 n⌋ + 1 else (∞::ereal))"
proof (cases "n > 0")
case True
define r where "r = floorlog 2 n - Suc 0"
have "floorlog 2 n ≠ 0"
using True
by (simp add:floorlog_eq_zero_iff)
hence a:"floorlog 2 n > 0" by simp
have "n < 2^(floorlog 2 n)"
using True floorlog_bounds by simp
also have "... = 2^(r+1)"
using a by (simp add:r_def)
finally have "n < 2^(r+1)" by simp
hence b:"n - 2^r < 2^r" by simp
have "floorlog 2 (2 ^ r - Suc 0) ≤ r"
by (rule floorlog_leI, auto)
moreover have "r ≤ floorlog 2 (2 ^ r - Suc 0)"
by (cases r, simp, auto intro: floorlog_geI)
ultimately have c:"floorlog 2 (2 ^ r - Suc 0) = r"
using order_antisym by blast
have "bit_count (Ng⇩e n) = bit_count (Nu⇩e r) +
bit_count (Nb⇩e (2 ^ r) (n - 2 ^ r))"
using True by (simp add:Ng⇩e_def r_def[symmetric] dependent_bit_count)
also have "... = ereal (r + 1) + ereal (r)"
using b c
by (simp add: unary_nat_bit_count bounded_nat_bit_count)
also have "... = 2 * r + 1" by simp
also have "... = 2 * ⌊log 2 n⌋ + 1"
using True by (simp add:floorlog_def r_def)
finally show ?thesis using True by simp
next
case False
then show ?thesis by (simp add:Ng⇩e_def)
qed
lemma elias_gamma_encoding: "is_encoding Ng⇩e"
proof -
have a: "inj_on (λx. let r = floorlog 2 x - 1 in (r, x - 2 ^ r))
{n. 0 < n}"
proof (rule inj_onI)
fix x y :: nat
assume "x ∈ {n. 0 < n}"
hence x_pos: "0 < x" by simp
assume "y ∈ {n. 0 < n}"
hence y_pos: "0 < y" by simp
define r where "r = floorlog 2 x - Suc 0"
assume b:"(let r = floorlog 2 x - 1 in (r, x - 2 ^ r)) =
(let r = floorlog 2 y - 1 in (r, y - 2 ^ r))"
hence c:"r = floorlog 2 y - Suc 0"
by (simp_all add:Let_def r_def)
have "x - 2^r = y - 2^r" using b
by (simp add:Let_def r_def[symmetric] c[symmetric] prod_eq_iff)
moreover have "x ≥ 2^r"
using r_def x_pos floorlog_bounds by simp
moreover have "y ≥ 2^r"
using c floorlog_bounds y_pos by simp
ultimately show "x = y" using eq_diff_iff by blast
qed
have "is_encoding (λn. Ng⇩e n)"
unfolding Ng⇩e_def using a
by (intro encoding_compose[where f="Nu⇩e ⋈⇩e (λr. Nb⇩e (2^r))"]
dependent_encoding unary_encoding bounded_nat_encoding) auto
thus ?thesis by simp
qed
definition N⇩e :: "nat encoding" where "N⇩e x = Ng⇩e (x+1)"
text ‹@{term "N⇩e"} is an encoding for all natural numbers using exponential Golomb
encoding~\<^cite>‹"teuhola1978"›. Exponential Golomb codes are also used in video compression
applications~\<^cite>‹"richardson2010"›.›
lemma exp_golomb_encoding: "is_encoding N⇩e"
proof -
have "is_encoding (λn. N⇩e n)"
unfolding N⇩e_def
by (intro encoding_compose_2[where g="(λn. n + 1)"] elias_gamma_encoding, auto)
thus ?thesis by simp
qed
lemma exp_golomb_bit_count_exact:
"bit_count (N⇩e n) = 2 * ⌊log 2 (n+1)⌋ + 1"
by (simp add:N⇩e_def elias_gamma_bit_count)
lemma exp_golomb_bit_count:
"bit_count (N⇩e n) ≤ (2 * log 2 (real n+1) + 1)"
by (simp add:exp_golomb_bit_count_exact add.commute)
lemma exp_golomb_bit_count_est:
assumes "n ≤ m "
shows "bit_count (N⇩e n) ≤ (2 * log 2 (real m+1) + 1)"
proof -
have "bit_count (N⇩e n) ≤ (2 * log 2 (real n+1) + 1)"
using exp_golomb_bit_count by simp
also have "... ≤ (2 * log 2 (real m+1) + 1)"
using assms by simp
finally show ?thesis by simp
qed
section ‹Integers›
definition I⇩e :: "int encoding" where
"I⇩e x = N⇩e (nat (if x ≤0 then (-2 * x) else (2*x-1)))"
text ‹@{term "I⇩e"} is an encoding for integers using exponential Golomb codes by embedding
the integers into the natural numbers, specifically the positive numbers are embedded into
the odd-numbers and the negative numbers are embedded into the even numbers. The embedding
has the benefit, that the bit count for an integer only depends on its absolute value.›
lemma int_encoding: "is_encoding I⇩e"
proof -
have "inj (λx. nat (if x ≤ 0 then - 2 * x else 2 * x - 1))"
by (rule inj_onI, auto simp add:eq_nat_nat_iff, presburger)
thus ?thesis
unfolding I⇩e_def
by (intro exp_golomb_encoding encoding_compose_2[where f="N⇩e"])
auto
qed
lemma int_bit_count: "bit_count (I⇩e n) = 2 * ⌊log 2 (2*¦n¦+1)⌋ +1"
proof -
have a:"m > 0 ⟹
⌊log (real 2) (real (2 * m))⌋ = ⌊log (real 2) (real (2 * m + 1))⌋"
for m :: nat by (rule floor_log_eq_if, auto)
have "n > 0 ⟹
⌊log 2 (2 * real_of_int n)⌋ = ⌊log 2 (2 * real_of_int n + 1)⌋"
using a[where m="nat n"] by (simp add:add.commute)
thus ?thesis
by (simp add:I⇩e_def exp_golomb_bit_count_exact floorlog_def)
qed
lemma int_bit_count_1:
assumes "abs n > 0"
shows "bit_count (I⇩e n) = 2 * ⌊log 2 ¦n¦⌋ +3"
proof -
have a:"m > 0 ⟹
⌊log (real 2) (real (2 * m))⌋ = ⌊log (real 2) (real (2 * m + 1))⌋"
for m :: nat by (rule floor_log_eq_if, auto)
have "n < 0 ⟹
⌊log 2 (-2 * real_of_int n)⌋ = ⌊log 2 (1-2 * real_of_int n)⌋"
using a[where m="nat (-n)"] by (simp add:add.commute)
hence "bit_count (I⇩e n) = 2 * ⌊log 2 (2*real_of_int ¦n¦)⌋ +1"
using assms
by (simp add:I⇩e_def exp_golomb_bit_count_exact floorlog_def)
also have "... = 2 * ⌊log 2 ¦n¦⌋ + 3"
using assms by (subst log_mult, auto)
finally show ?thesis by simp
qed
lemma int_bit_count_est_1:
assumes "¦n¦ ≤ r"
shows "bit_count (I⇩e n) ≤ 2 * log 2 (r+1) +3"
proof (cases "abs n > 0")
case True
have "real_of_int ⌊log 2 ¦real_of_int n¦⌋ ≤ log 2 ¦real_of_int n¦"
using of_int_floor_le by blast
also have "... ≤ log 2 (real_of_int r+1)"
using True assms by force
finally have
"real_of_int ⌊log 2 ¦real_of_int n¦⌋ ≤ log 2 (real_of_int r + 1)"
by simp
then show ?thesis
using True assms by (simp add:int_bit_count_1)
next
case False
have "r ≥ 0" using assms by simp
moreover have "n = 0" using False by simp
ultimately show ?thesis by (simp add:I⇩e_def exp_golomb_bit_count_exact)
qed
lemma int_bit_count_est:
assumes "¦n¦ ≤ r"
shows "bit_count (I⇩e n) ≤ 2 * log 2 (2*r+1) +1"
proof -
have "bit_count (I⇩e n) ≤ 2 * log 2 (2*¦n¦+1) +1"
by (simp add:int_bit_count)
also have "... ≤ 2 * log 2 (2* r + 1) + 1"
using assms by simp
finally show ?thesis by simp
qed
section ‹Lists›
definition Lf⇩e where
"Lf⇩e e n xs =
(if length xs = n
then fold (λx y. opt_append y (e x)) xs (Some [])
else None)"
text ‹@{term "Lf⇩e e n"} is an encoding for lists of length @{term"n"},
where the elements are encoding using the encoder @{term "e"}.›
lemma fixed_list_encoding:
assumes "is_encoding e"
shows "is_encoding (Lf⇩e e n)"
proof (induction n)
case 0
then show ?case
by (rule is_encodingI_2, simp_all add:Lf⇩e_def opt_comp_def split:if_splits)
next
case (Suc n)
show ?case
proof (rule is_encodingI_2)
fix x y
assume a:"opt_comp (Lf⇩e e (Suc n) x) (Lf⇩e e (Suc n) y)"
have b:"length x = Suc n" using a
by (cases "length x = Suc n", simp_all add:Lf⇩e_def opt_comp_def)
then obtain x1 x2 where x_def: "x = x1@[x2]" "length x1 = n"
by (metis length_append_singleton lessI nat.inject order.refl
take_all take_hd_drop)
have c:"length y = Suc n" using a
by (cases "length y = Suc n", simp_all add:Lf⇩e_def opt_comp_def)
then obtain y1 y2 where y_def: "y = y1@[y2]" "length y1 = n"
by (metis length_append_singleton lessI nat.inject order.refl
take_all take_hd_drop)
have d: "opt_comp (opt_append (Lf⇩e e n x1) (e x2))
(opt_append (Lf⇩e e n y1) (e y2)) "
using a b c by (simp add:Lf⇩e_def x_def y_def)
hence "opt_comp (Lf⇩e e n x1) (Lf⇩e e n y1)"
using opt_comp_append opt_comp_append_2 by blast
hence e:"x1 = y1"
using is_encodingD[OF Suc] by blast
hence "opt_comp (e x2) (e y2)"
using opt_comp_append_3 d by simp
hence "x2 = y2"
using is_encodingD[OF assms] by blast
thus "x = y" using e x_def y_def by simp
qed
qed
lemma fixed_list_bit_count:
"bit_count (Lf⇩e e n xs) =
(if length xs = n then (∑x ← xs. (bit_count (e x))) else ∞)"
proof (induction n arbitrary: xs)
case 0
then show ?case by (simp add:Lf⇩e_def)
next
case (Suc n)
show ?case
proof (cases "length xs = Suc n")
case True
then obtain x1 x2 where x_def: "xs = x1@[x2]" "length x1 = n"
by (metis length_append_singleton lessI nat.inject order.refl
take_all take_hd_drop)
have "bit_count (Lf⇩e e n x1) = (∑x←x1. bit_count (e x))"
using x_def(2) Suc by simp
then show ?thesis by (simp add:Lf⇩e_def x_def bit_count_append)
next
case False
then show ?thesis by (simp add:Lf⇩e_def)
qed
qed
definition L⇩e
where "L⇩e e xs = (Nu⇩e ⋈⇩e (λn. Lf⇩e e n)) (length xs, xs)"
text ‹@{term "L⇩e e"} is an encoding for arbitrary length lists, where the elements are encoding using the
encoder @{term "e"}.›
lemma list_encoding:
assumes "is_encoding e"
shows "is_encoding (L⇩e e)"
proof -
have "inj (λxs. (length xs, xs))"
by (simp add: inj_on_def)
hence "is_encoding (λxs. L⇩e e xs)"
using assms unfolding L⇩e_def
by (intro encoding_compose_2[where g=" (λx. (length x, x))"]
dependent_encoding unary_encoding fixed_list_encoding) auto
thus ?thesis by simp
qed
lemma sum_list_triv_ereal:
fixes a :: ereal
shows "sum_list (map (λ_. a) xs) = length xs * a"
apply (cases a, simp add:sum_list_triv)
by (induction xs, simp, simp)+
lemma list_bit_count:
"bit_count (L⇩e e xs) = (∑x ← xs. bit_count (e x) + 1) + 1"
proof -
have "bit_count (L⇩e e xs) =
ereal (1 + real (length xs)) + (∑x←xs. bit_count (e x))"
by (simp add: L⇩e_def dependent_bit_count fixed_list_bit_count unary_nat_bit_count)
also have "... = (∑x←xs. bit_count (e x)) + (∑x ← xs. 1) + 1"
by (simp add:ac_simps group_cancel.add1 sum_list_triv_ereal)
also have "... = (∑x ← xs. bit_count (e x) + 1) + 1"
by (simp add:sum_list_addf)
finally show ?thesis by simp
qed
section ‹Functions›
definition encode_fun :: "'a list ⇒ 'b encoding ⇒ ('a ⇒ 'b) encoding"
(infixr ‹→⇩e› 65) where
"encode_fun xs e f =
(if f ∈ extensional (set xs)
then (Lf⇩e e (length xs) (map f xs))
else None)"
text ‹@{term "xs →⇩e e"} is an encoding for functions whose domain is @{term "set xs"},
where the values are encoding using the encoder @{term "e"}.›
lemma fun_encoding:
assumes "is_encoding e"
shows "is_encoding (xs →⇩e e)"
proof -
have a:"inj_on (λx. map x xs) {x. x ∈ extensional (set xs)}"
by (rule inj_onI) (simp add: extensionalityI)
have "is_encoding (λx. (xs →⇩e e) x)"
unfolding encode_fun_def
by (intro encoding_compose[where f="Lf⇩e e (length xs)"]
fixed_list_encoding assms a)
thus ?thesis by simp
qed
lemma fun_bit_count:
"bit_count ((xs →⇩e e) f) =
(if f ∈ extensional (set xs) then (∑x ← xs. bit_count (e (f x))) else ∞)"
by (simp add:encode_fun_def fixed_list_bit_count comp_def)
lemma fun_bit_count_est:
assumes "f ∈ extensional (set xs)"
assumes "⋀x. x ∈ set xs ⟹ bit_count (e (f x)) ≤ a"
shows "bit_count ((xs →⇩e e) f) ≤ ereal (real (length xs)) * a"
proof -
have "bit_count ((xs →⇩e e) f) = (∑x ← xs. bit_count (e (f x)))"
using assms(1) by (simp add:fun_bit_count)
also have "... ≤ (∑x ← xs. a)"
by (intro sum_list_mono assms(2), simp)
also have "... = ereal (real (length xs)) * a"
by (simp add:sum_list_triv_ereal)
finally show ?thesis by simp
qed
section ‹Finite Sets›
definition S⇩e :: "'a encoding ⇒ 'a set encoding" where
"S⇩e e S =
(if finite S ∧ S ⊆ dom e
then (L⇩e e (linorder.sorted_key_list_of_set (≤) (the ∘ e) S))
else None)"
text ‹@{term "S⇩e e"} is an encoding for finite sets whose elements are encoded using the
encoder @{term "e"}.›
lemma set_encoding:
assumes "is_encoding e"
shows "is_encoding (S⇩e e)"
proof -
have a:"inj_on (the ∘ e) (dom e)"
using inj_on_def
by (intro comp_inj_on encoding_imp_inj assms, fastforce)
interpret folding_insort_key "(≤)" "(<)" "(dom e)" "(the ∘ e)"
using a by (unfold_locales) auto
have "is_encoding (λS. S⇩e e S)"
unfolding S⇩e_def using sorted_key_list_of_set_inject
by (intro encoding_compose[where f="L⇩e e"] list_encoding assms(1) inj_onI, simp)
thus ?thesis by simp
qed
lemma set_bit_count:
assumes "is_encoding e"
shows "bit_count (S⇩e e S) = (if finite S then (∑x ∈ S. bit_count (e x)+1)+1 else ∞)"
proof (cases "finite S")
case f:True
have "bit_count (S⇩e e S) = (∑x∈S. bit_count (e x)+1)+1"
proof (cases "S ⊆ dom e")
case True
have a:"inj_on (the ∘ e) (dom e)"
using inj_on_def by (intro comp_inj_on encoding_imp_inj[OF assms], fastforce)
interpret folding_insort_key "(≤)" "(<)" "(dom e)" "(the ∘ e)"
using a by (unfold_locales) auto
have b:"distinct (linorder.sorted_key_list_of_set (≤) (the ∘ e) S)"
(is "distinct ?l") using distinct_sorted_key_list_of_set True
distinct_if_distinct_map by auto
have "bit_count (S⇩e e S) = (∑x←?l. bit_count (e x) + 1) + 1"
using f True by (simp add:S⇩e_def list_bit_count)
also have "... = (∑x∈S. bit_count (e x)+1)+1"
by (simp add: sum_list_distinct_conv_sum_set[OF b]
set_sorted_key_list_of_set[OF True f])
finally show ?thesis by simp
next
case False
hence "∃i∈S. e i = None" by force
hence "∃i∈S. bit_count (e i) = ∞" by force
hence "(∑x∈S. bit_count (e x) + 1) = ∞"
by (simp add:sum_Pinfty f)
then show ?thesis using False by (simp add:S⇩e_def)
qed
thus ?thesis using f by simp
next
case False
then show ?thesis by (simp add:S⇩e_def)
qed
lemma sum_triv_ereal:
fixes a :: ereal
assumes "finite S"
shows "(∑_ ∈ S. a) = card S * a"
proof (cases a)
case (real r)
then show ?thesis by simp
next
case PInf
show ?thesis using assms PInf
by (induction S rule:finite_induct, auto)
next
case MInf
show ?thesis using assms MInf
by (induction S rule:finite_induct, auto)
qed
lemma set_bit_count_est:
assumes "is_encoding f"
assumes "finite S"
assumes "card S ≤ m"
assumes "0 ≤ a"
assumes "⋀x. x ∈ S ⟹ bit_count (f x) ≤ a"
shows "bit_count (S⇩e f S) ≤ ereal (real m) * (a+1) + 1"
proof -
have "bit_count (S⇩e f S) = (∑x∈S. bit_count (f x) + 1) + 1"
using assms by (simp add:set_bit_count)
also have "... ≤ (∑x∈S. a + 1) + 1"
using assms by (intro sum_mono add_mono) auto
also have "... = ereal (real (card S)) * (a + 1) + 1"
by (simp add:sum_triv_ereal[OF assms(2)])
also have "... ≤ ereal (real m) * (a+1) + 1"
using assms(3,4) by (intro add_mono ereal_mult_right_mono) auto
finally show ?thesis by simp
qed
section ‹Floating point numbers\label{sec:float_enc}›
definition F⇩e where "F⇩e f = (I⇩e ×⇩e I⇩e) (mantissa f,exponent f)"
lemma float_encoding:
"is_encoding F⇩e"
proof -
have "inj (λx. (mantissa x, exponent x))" (is "inj ?g")
proof (rule injI)
fix x y
assume "(mantissa x, exponent x) = (mantissa y, exponent y)"
hence "real_of_float x = real_of_float y"
by (simp add:mantissa_exponent)
thus "x = y"
by (metis real_of_float_inverse)
qed
thus "is_encoding (λf. F⇩e f)"
unfolding F⇩e_def
by (intro encoding_compose_2[where g="?g"]
dependent_encoding int_encoding) auto
qed
lemma suc_n_le_2_pow_n:
fixes n :: nat
shows "n + 1 ≤ 2 ^ n"
by (induction n, simp, simp)
lemma float_bit_count_1:
"bit_count (F⇩e f) ≤ 6 + 2 * (log 2 (¦mantissa f¦ + 1) +
log 2 (¦exponent f¦ + 1))" (is "?lhs ≤ ?rhs")
proof -
have "?lhs = bit_count (I⇩e (mantissa f)) +
bit_count (I⇩e (exponent f))"
by (simp add:F⇩e_def dependent_bit_count)
also have "... ≤
ereal (2 * log 2 (real_of_int (¦mantissa f¦ + 1)) + 3) +
ereal (2 * log 2 (real_of_int (¦exponent f¦ + 1)) + 3)"
by (intro int_bit_count_est_1 add_mono) auto
also have "... = ?rhs"
by simp
finally show ?thesis by simp
qed
text ‹The following establishes an estimate for the bit count of a floating
point number in non-normalized representation:›
lemma float_bit_count_2:
fixes m :: int
fixes e :: int
defines "f ≡ float_of (m * 2 powr e)"
shows "bit_count (F⇩e f) ≤
6 + 2 * (log 2 (¦m¦ + 2) + log 2 (¦e¦ + 1))"
proof -
have b:" (r + 1) * int i ≤ r * (2 ^ i - 1) + 1"
if b_assms: "r ≥ 1" for r :: int and i :: nat
proof (cases "i > 0")
case True
have "(r + 1) * int i = r * i + 2 * int ((i-1)+1) - i"
using True by (simp add:algebra_simps)
also have "... ≤ r * i + int (2^1) * int (2^(i-1)) - i"
using b_assms
by (intro add_mono diff_mono mult_mono of_nat_mono suc_n_le_2_pow_n)
simp_all
also have "... = r * i + 2^i - i"
using True
by (subst of_nat_mult[symmetric], subst power_add[symmetric])
simp
also have "... = r *i + 1 * (2 ^ i - int i - 1) + 1" by simp
also have "... ≤ r *i + r * (2 ^ i - int i - 1) + 1"
using b_assms
by (intro add_mono mult_mono, simp_all)
also have "... = r * (2 ^ i - 1) + 1"
by (simp add:algebra_simps)
finally show ?thesis by simp
next
case False
hence "i = 0" by simp
then show ?thesis by simp
qed
have a:"log 2 (¦mantissa f¦ + 1) + log 2 (¦exponent f¦ + 1) ≤
log 2 (¦m¦+2) + log 2 (¦e¦+1)"
proof (cases "f=0")
case True then show ?thesis by simp
next
case False
moreover have "f = Float m e"
by (simp add:f_def Float.abs_eq)
ultimately obtain i :: nat
where m_def: "m = mantissa f * 2 ^ i"
and e_def: "e = exponent f - i"
using denormalize_shift by blast
have mantissa_ge_1: "1 ≤ ¦mantissa f¦"
using False mantissa_noteq_0 by fastforce
have "(¦mantissa f¦ + 1) * (¦exponent f¦ + 1) =
(¦mantissa f¦ + 1) * (¦e+i¦+1)"
by (simp add:e_def)
also have "... ≤ (¦mantissa f¦ + 1) * ((¦e¦+¦i¦)+1)"
by (intro mult_mono add_mono, simp_all)
also have "... = (¦mantissa f¦ + 1) * ((¦e¦+1)+i)"
by simp
also have "... = (¦mantissa f¦ + 1) * (¦e¦+1) + (¦mantissa f¦+1)*i"
by (simp add:algebra_simps)
also have "... ≤ (¦mantissa f¦ + 1) * (¦e¦+1) + (¦mantissa f¦ * (2^i-1)+1)"
by (intro add_mono b mantissa_ge_1, simp)
also have "... = (¦mantissa f¦ + 1) * (¦e¦+1) + (¦mantissa f¦ * (2^i-1)+1)*(1)"
by simp
also have
"... ≤ (¦mantissa f¦ + 1) * (¦e¦+1) + (¦mantissa f¦* (2^i-1)+1)*(¦e¦+1)"
by (intro add_mono mult_left_mono, simp_all)
also have "... = ((¦mantissa f¦ + 1)+(¦mantissa f¦* (2^i-1)+1))*(¦e¦+1)"
by (simp add:algebra_simps)
also have "... = (¦mantissa f¦*2^i+2)*(¦e¦+1)"
by (simp add:algebra_simps)
also have "... = (¦m¦+2)*(¦e¦+1)"
by (simp add:m_def abs_mult)
finally have "(¦mantissa f¦ + 1) * (¦exponent f¦ + 1) ≤ (¦m¦+2)*(¦e¦+1)"
by simp
hence "(¦real_of_int (mantissa f)¦ + 1) * (¦of_int (exponent f)¦ + 1) ≤
(¦of_int m¦+2)*(¦of_int e¦+1)"
by (simp flip:of_int_abs) (metis (mono_tags, opaque_lifting) numeral_One
of_int_add of_int_le_iff of_int_mult of_int_numeral)
then show ?thesis by (simp flip: log_mult_pos)
qed
have "bit_count (F⇩e f) ≤
6 + 2 * (log 2 (¦mantissa f¦ + 1) + log 2 (¦exponent f¦ + 1))"
using float_bit_count_1 by simp
also have "... ≤ 6 + 2 * (log 2 (¦m¦ + 2) + log 2 (¦e¦ + 1))"
using a by simp
finally show ?thesis by simp
qed
lemma float_bit_count_zero:
"bit_count (F⇩e (float_of 0)) = 2"
by (simp add:F⇩e_def dependent_bit_count int_bit_count
zero_float.abs_eq[symmetric])
end