Theory Frequency_Moment_2
section ‹Frequency Moment $2$›
theory Frequency_Moment_2
imports
Universal_Hash_Families.Carter_Wegman_Hash_Family
Equivalence_Relation_Enumeration.Equivalence_Relation_Enumeration
Landau_Ext
Median_Method.Median
Probability_Ext
Universal_Hash_Families.Universal_Hash_Families_More_Product_PMF
Frequency_Moments
begin
hide_const (open) Discrete_Topology.discrete
hide_const (open) Isolated.discrete
text ‹This section contains a formalization of the algorithm for the second frequency moment.
It is based on the algorithm described in \<^cite>‹‹\textsection 2.2› in "alon1999"›.
The only difference is that the algorithm is adapted to work with prime field of odd order, which
greatly reduces the implementation complexity.›
fun f2_hash where
"f2_hash p h k = (if even (ring.hash (ring_of (mod_ring p)) k h) then int p - 1 else - int p - 1)"
type_synonym f2_state = "nat × nat × nat × (nat × nat ⇒ nat list) × (nat × nat ⇒ int)"
fun f2_init :: "rat ⇒ rat ⇒ nat ⇒ f2_state pmf" where
"f2_init δ ε n =
do {
let s⇩1 = nat ⌈6 / δ⇧2⌉;
let s⇩2 = nat ⌈-(18 * ln (real_of_rat ε))⌉;
let p = prime_above (max n 3);
h ← prod_pmf ({..<s⇩1}×{..<s⇩2}) (λ_. pmf_of_set (bounded_degree_polynomials (ring_of (mod_ring p)) 4));
return_pmf (s⇩1, s⇩2, p, h, (λ_ ∈ {..<s⇩1} × {..<s⇩2}. (0 :: int)))
}"
fun f2_update :: "nat ⇒ f2_state ⇒ f2_state pmf" where
"f2_update x (s⇩1, s⇩2, p, h, sketch) =
return_pmf (s⇩1, s⇩2, p, h, λi ∈ {..<s⇩1} × {..<s⇩2}. f2_hash p (h i) x + sketch i)"
fun f2_result :: "f2_state ⇒ rat pmf" where
"f2_result (s⇩1, s⇩2, p, h, sketch) =
return_pmf (median s⇩2 (λi⇩2 ∈ {..<s⇩2}.
(∑i⇩1∈{..<s⇩1} . (rat_of_int (sketch (i⇩1, i⇩2)))⇧2) / (((rat_of_nat p)⇧2-1) * rat_of_nat s⇩1)))"
fun f2_space_usage :: "(nat × nat × rat × rat) ⇒ real" where
"f2_space_usage (n, m, ε, δ) = (
let s⇩1 = nat ⌈6 / δ⇧2 ⌉ in
let s⇩2 = nat ⌈-(18 * ln (real_of_rat ε))⌉ in
3 +
2 * log 2 (s⇩1 + 1) +
2 * log 2 (s⇩2 + 1) +
2 * log 2 (9 + 2 * real n) +
s⇩1 * s⇩2 * (5 + 4*log 2 (8 + 2 * real n) + 2 * log 2 (real m * (18 + 4 * real n) + 1 )))"
definition encode_f2_state :: "f2_state ⇒ bool list option" where
"encode_f2_state =
N⇩e ⋈⇩e (λs⇩1.
N⇩e ⋈⇩e (λs⇩2.
N⇩e ⋈⇩e (λp.
(List.product [0..<s⇩1] [0..<s⇩2] →⇩e P⇩e p 4) ×⇩e
(List.product [0..<s⇩1] [0..<s⇩2] →⇩e I⇩e))))"
lemma "inj_on encode_f2_state (dom encode_f2_state)"
proof -
have " is_encoding encode_f2_state"
unfolding encode_f2_state_def
by (intro dependent_encoding exp_golomb_encoding fun_encoding list_encoding int_encoding poly_encoding)
thus ?thesis
by (rule encoding_imp_inj)
qed
context
fixes ε δ :: rat
fixes n :: nat
fixes as :: "nat list"
fixes result
assumes ε_range: "ε ∈ {0<..<1}"
assumes δ_range: "δ > 0"
assumes as_range: "set as ⊆ {..<n}"
defines "result ≡ fold (λa state. state ⤜ f2_update a) as (f2_init δ ε n) ⤜ f2_result"
begin
private definition s⇩1 where "s⇩1 = nat ⌈6 / δ⇧2⌉"
lemma s1_gt_0: "s⇩1 > 0"
using δ_range by (simp add:s⇩1_def)
private definition s⇩2 where "s⇩2 = nat ⌈-(18* ln (real_of_rat ε))⌉"
lemma s2_gt_0: "s⇩2 > 0"
using ε_range by (simp add:s⇩2_def)
private definition p where "p = prime_above (max n 3)"
lemma p_prime: "Factorial_Ring.prime p"
unfolding p_def using prime_above_prime by blast
lemma p_ge_3: "p ≥ 3"
unfolding p_def by (meson max.boundedE prime_above_lower_bound)
lemma p_gt_0: "p > 0" using p_ge_3 by linarith
lemma p_gt_1: "p > 1" using p_ge_3 by simp
lemma p_ge_n: "p ≥ n" unfolding p_def
by (meson max.boundedE prime_above_lower_bound )
interpretation carter_wegman_hash_family "ring_of (mod_ring p)" 4
using carter_wegman_hash_familyI[OF mod_ring_is_field mod_ring_finite]
using p_prime by auto
definition sketch where "sketch = fold (λa state. state ⤜ f2_update a) as (f2_init δ ε n)"
private definition Ω where"Ω = prod_pmf ({..<s⇩1} × {..<s⇩2}) (λ_. pmf_of_set space)"
private definition Ω⇩p where"Ω⇩p = measure_pmf Ω"
private definition sketch_rv where "sketch_rv ω = of_int (sum_list (map (f2_hash p ω) as))^2"
private definition mean_rv where "mean_rv ω = (λi⇩2. (∑i⇩1 = 0..<s⇩1. sketch_rv (ω (i⇩1, i⇩2))) / (((of_nat p)⇧2 - 1) * of_nat s⇩1))"
private definition result_rv where "result_rv ω = median s⇩2 (λi⇩2∈{..<s⇩2}. mean_rv ω i⇩2)"
lemma mean_rv_alg_sketch:
"sketch = Ω ⤜ (λω. return_pmf (s⇩1, s⇩2, p, ω, λi ∈ {..<s⇩1} × {..<s⇩2}. sum_list (map (f2_hash p (ω i)) as)))"
proof -
have "sketch = fold (λa state. state ⤜ f2_update a) as (f2_init δ ε n)"
by (simp add:sketch_def)
also have "... = Ω ⤜ (λω. return_pmf (s⇩1, s⇩2, p, ω,
λi ∈ {..<s⇩1} × {..<s⇩2}. sum_list (map (f2_hash p (ω i)) as)))"
proof (induction as rule:rev_induct)
case Nil
then show ?case
by (simp add:s⇩1_def s⇩2_def space_def p_def[symmetric] Ω_def restrict_def Let_def)
next
case (snoc a as)
have "fold (λa state. state ⤜ f2_update a) (as @ [a]) (f2_init δ ε n) = Ω ⤜
(λω. return_pmf (s⇩1, s⇩2, p, ω, λs ∈ {..<s⇩1} × {..<s⇩2}. (∑x ← as. f2_hash p (ω s) x)) ⤜ f2_update a)"
using snoc by (simp add: bind_assoc_pmf restrict_def del:f2_hash.simps f2_init.simps)
also have "... = Ω ⤜ (λω. return_pmf (s⇩1, s⇩2, p, ω, λi ∈ {..<s⇩1} × {..<s⇩2}. (∑x ← as@[a]. f2_hash p (ω i) x)))"
by (subst bind_return_pmf) (simp add: add.commute del:f2_hash.simps cong:restrict_cong)
finally show ?case by blast
qed
finally show ?thesis by auto
qed
lemma distr: "result = map_pmf result_rv Ω"
proof -
have "result = sketch ⤜ f2_result"
by (simp add:result_def sketch_def)
also have "... = Ω ⤜ (λx. f2_result (s⇩1, s⇩2, p, x, λi∈{..<s⇩1} × {..<s⇩2}. sum_list (map (f2_hash p (x i)) as)))"
by (simp add: mean_rv_alg_sketch bind_assoc_pmf bind_return_pmf)
also have "... = map_pmf result_rv Ω"
by (simp add:map_pmf_def result_rv_def mean_rv_def sketch_rv_def lessThan_atLeast0 cong:restrict_cong)
finally show ?thesis by simp
qed
private lemma f2_hash_pow_exp:
assumes "k < p"
shows
"expectation (λω. real_of_int (f2_hash p ω k) ^m) =
((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p)"
proof -
have "odd p" using p_prime p_ge_3 prime_odd_nat assms by simp
then obtain t where t_def: "p=2*t+1"
using oddE by blast
have "Collect even ∩ {..<2 * t + 1} ⊆ (*) 2 ` {..<t + 1}"
by (rule in_image_by_witness[where g="λx. x div 2"], simp, linarith)
moreover have " (*) 2 ` {..<t + 1} ⊆ Collect even ∩ {..<2 * t + 1}"
by (rule image_subsetI, simp)
ultimately have "card ({k. even k} ∩ {..<p}) = card ((λx. 2*x) ` {..<t+1})"
unfolding t_def using order_antisym by metis
also have "... = card {..<t+1}"
by (rule card_image, simp add: inj_on_mult)
also have "... = t+1" by simp
finally have card_even: "card ({k. even k} ∩ {..<p}) = t+1" by simp
hence "card ({k. even k} ∩ {..<p}) * 2 = (p+1)" by (simp add:t_def)
hence prob_even: "prob {ω. hash k ω ∈ Collect even} = (real p + 1)/(2*real p)"
using assms
by (subst prob_range, auto simp:frac_eq_eq p_gt_0 mod_ring_def ring_of_def lessThan_def)
have "p = card {..<p}" by simp
also have "... = card (({k. odd k} ∩ {..<p}) ∪ ({k. even k} ∩ {..<p}))"
by (rule arg_cong[where f="card"], auto)
also have "... = card ({k. odd k} ∩ {..<p}) + card ({k. even k} ∩ {..<p})"
by (rule card_Un_disjoint, simp, simp, blast)
also have "... = card ({k. odd k} ∩ {..<p}) + t+1"
by (simp add:card_even)
finally have "p = card ({k. odd k} ∩ {..<p}) + t+1"
by simp
hence "card ({k. odd k} ∩ {..<p}) * 2 = (p-1)"
by (simp add:t_def)
hence prob_odd: "prob {ω. hash k ω ∈ Collect odd} = (real p - 1)/(2*real p)"
using assms
by (subst prob_range, auto simp add: frac_eq_eq mod_ring_def ring_of_def lessThan_def)
have "expectation (λx. real_of_int (f2_hash p x k) ^ m) =
expectation (λω. indicator {ω. even (hash k ω)} ω * (real p - 1)^m +
indicator {ω. odd (hash k ω)} ω * (-real p - 1)^m)"
by (rule Bochner_Integration.integral_cong, simp, simp)
also have "... =
prob {ω. hash k ω ∈ Collect even} * (real p - 1) ^ m +
prob {ω. hash k ω ∈ Collect odd} * (-real p - 1) ^ m "
by (simp, simp add:M_def)
also have "... = (real p + 1) * (real p - 1) ^ m / (2 * real p) + (real p - 1) * (- real p - 1) ^ m / (2 * real p)"
by (subst prob_even, subst prob_odd, simp)
also have "... =
((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p)"
by (simp add:add_divide_distrib ac_simps)
finally show "expectation (λx. real_of_int (f2_hash p x k) ^ m) =
((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p)" by simp
qed
lemma
shows var_sketch_rv:"variance sketch_rv ≤ 2*(real_of_rat (F 2 as)^2) * ((real p)⇧2-1)⇧2" (is "?A")
and exp_sketch_rv:"expectation sketch_rv = real_of_rat (F 2 as) * ((real p)⇧2-1)" (is "?B")
proof -
define h where "h = (λω x. real_of_int (f2_hash p ω x))"
define c where "c = (λx. real (count_list as x))"
define r where "r = (λ(m::nat). ((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p))"
define h_prod where "h_prod = (λas ω. prod_list (map (h ω) as))"
define exp_h_prod :: "nat list ⇒ real" where "exp_h_prod = (λas. (∏i ∈ set as. r (count_list as i)))"
have f_eq: "sketch_rv = (λω. (∑x ∈ set as. c x * h ω x)^2)"
by (rule ext, simp add:sketch_rv_def c_def h_def sum_list_eval del:f2_hash.simps)
have r_one: "r (Suc 0) = 0"
by (simp add:r_def algebra_simps)
have r_two: "r 2 = (real p^2-1)"
using p_gt_0 unfolding r_def power2_eq_square
by (simp add:nonzero_divide_eq_eq, simp add:algebra_simps)
have"(real p)^2 ≥ 2^2"
by (rule power_mono, use p_gt_1 in linarith, simp)
hence p_square_ge_4: "(real p)⇧2 ≥ 4" by simp
have "r 4 = (real p)^4+2*(real p)⇧2 - 3"
using p_gt_0 unfolding r_def
by (subst nonzero_divide_eq_eq, auto simp:power4_eq_xxxx power2_eq_square algebra_simps)
also have "... ≤ (real p)^4+2*(real p)⇧2 + 3"
by simp
also have "... ≤ 3 * r 2 * r 2"
using p_square_ge_4
by (simp add:r_two power4_eq_xxxx power2_eq_square algebra_simps mult_left_mono)
finally have r_four_est: "r 4 ≤ 3 * r 2 * r 2" by simp
have exp_h_prod_elim: "exp_h_prod = (λas. prod_list (map (r ∘ count_list as) (remdups as)))"
by (simp add:exp_h_prod_def prod.set_conv_list[symmetric])
have exp_h_prod: "⋀x. set x ⊆ set as ⟹ length x ≤ 4 ⟹ expectation (h_prod x) = exp_h_prod x"
proof -
fix x
assume "set x ⊆ set as"
hence x_sub_p: "set x ⊆ {..<p}" using as_range p_ge_n by auto
hence x_le_p: "⋀k. k ∈ set x ⟹ k < p" by auto
assume "length x ≤ 4"
hence card_x: "card (set x) ≤ 4" using card_length dual_order.trans by blast
have "set x ⊆ carrier (ring_of (mod_ring p))"
using x_sub_p by (simp add:mod_ring_def ring_of_def lessThan_def)
hence h_indep: "indep_vars (λ_. borel) (λi ω. h ω i ^ count_list x i) (set x)"
using k_wise_indep_vars_subset[OF k_wise_indep] card_x as_range h_def
by (auto intro:indep_vars_compose2[where X="hash" and M'=" (λ_. discrete)"])
have "expectation (h_prod x) = expectation (λω. ∏ i ∈ set x. h ω i^(count_list x i))"
by (simp add:h_prod_def prod_list_eval)
also have "... = (∏i ∈ set x. expectation (λω. h ω i^(count_list x i)))"
by (simp add: indep_vars_lebesgue_integral[OF _ h_indep])
also have "... = (∏i ∈ set x. r (count_list x i))"
using f2_hash_pow_exp x_le_p
by (simp add:h_def r_def M_def[symmetric] del:f2_hash.simps)
also have "... = exp_h_prod x"
by (simp add:exp_h_prod_def)
finally show "expectation (h_prod x) = exp_h_prod x" by simp
qed
have "⋀x y. kernel_of x = kernel_of y ⟹ exp_h_prod x = exp_h_prod y"
proof -
fix x y :: "nat list"
assume a:"kernel_of x = kernel_of y"
then obtain f where b:"bij_betw f (set x) (set y)" and c:"⋀z. z ∈ set x ⟹ count_list x z = count_list y (f z)"
using kernel_of_eq_imp_bij by blast
have "exp_h_prod x = prod ( (λi. r(count_list y i)) ∘ f) (set x)"
by (simp add:exp_h_prod_def c)
also have "... = (∏i ∈ f ` (set x). r(count_list y i))"
by (metis b bij_betw_def prod.reindex)
also have "... = exp_h_prod y"
unfolding exp_h_prod_def
by (rule prod.cong, metis b bij_betw_def) simp
finally show "exp_h_prod x = exp_h_prod y" by simp
qed
hence exp_h_prod_cong: "⋀p x. of_bool (kernel_of x = kernel_of p) * exp_h_prod p =
of_bool (kernel_of x = kernel_of p) * exp_h_prod x"
by (metis (full_types) of_bool_eq_0_iff vector_space_over_itself.scale_zero_left)
have c:"(∑p←enum_rgfs n. of_bool (kernel_of xs = kernel_of p) * r) = r"
if a:"length xs = n" for xs :: "nat list" and n and r :: real
proof -
have "(∑p←enum_rgfs n. of_bool (kernel_of xs = kernel_of p) * 1) = (1::real)"
using equiv_rels_2[OF a[symmetric]] by (simp add:equiv_rels_def comp_def)
thus "(∑p←enum_rgfs n. of_bool (kernel_of xs = kernel_of p) * r) = (r::real)"
by (simp add:sum_list_mult_const)
qed
have "expectation sketch_rv = (∑i∈set as. (∑j∈set as. c i * c j * expectation (h_prod [i,j])))"
by (simp add:f_eq h_prod_def power2_eq_square sum_distrib_left sum_distrib_right Bochner_Integration.integral_sum algebra_simps)
also have "... = (∑i∈set as. (∑j∈set as. c i * c j * exp_h_prod [i,j]))"
by (simp add:exp_h_prod)
also have "... = (∑i ∈ set as. (∑j ∈ set as.
c i * c j * (sum_list (map (λp. of_bool (kernel_of [i,j] = kernel_of p) * exp_h_prod p) (enum_rgfs 2)))))"
by (subst exp_h_prod_cong, simp add:c)
also have "... = (∑i∈set as. c i * c i * r 2)"
by (simp add: numeral_eq_Suc kernel_of_eq All_less_Suc exp_h_prod_elim r_one distrib_left sum.distrib sum_collapse)
also have "... = real_of_rat (F 2 as) * ((real p)^2-1)"
by (simp add: sum_distrib_right[symmetric] c_def F_def power2_eq_square of_rat_sum of_rat_mult r_two)
finally show b:?B by simp
have "expectation (λx. (sketch_rv x)⇧2) = (∑i1 ∈ set as. (∑i2 ∈ set as. (∑i3 ∈ set as. (∑i4 ∈ set as.
c i1 * c i2 * c i3 * c i4 * expectation (h_prod [i1, i2, i3, i4])))))"
by (simp add:f_eq h_prod_def power4_eq_xxxx sum_distrib_left sum_distrib_right Bochner_Integration.integral_sum algebra_simps)
also have "... = (∑i1 ∈ set as. (∑i2 ∈ set as. (∑i3 ∈ set as. (∑i4 ∈ set as.
c i1 * c i2 * c i3 * c i4 * exp_h_prod [i1,i2,i3,i4]))))"
by (simp add:exp_h_prod)
also have "... = (∑i1 ∈ set as. (∑i2 ∈ set as. (∑i3 ∈ set as. (∑i4 ∈ set as.
c i1 * c i2 * c i3 * c i4 *
(sum_list (map (λp. of_bool (kernel_of [i1,i2,i3,i4] = kernel_of p) * exp_h_prod p) (enum_rgfs 4)))))))"
by (subst exp_h_prod_cong, simp add:c)
also have "... =
3 * (∑i ∈ set as. (∑j ∈ set as. c i^2 * c j^2 * r 2 * r 2)) + ((∑ i ∈ set as. c i^4 * r 4) - 3 * (∑ i ∈ set as. c i ^ 4 * r 2 * r 2))"
apply (simp add: numeral_eq_Suc exp_h_prod_elim r_one)
apply (simp add: kernel_of_eq All_less_Suc numeral_eq_Suc distrib_left sum.distrib sum_collapse neq_commute of_bool_not_iff)
apply (simp add: algebra_simps sum_subtractf sum_collapse)
apply (simp add: sum_distrib_left algebra_simps)
done
also have "... = 3 * (∑i ∈ set as. c i^2 * r 2)^2 + (∑ i ∈ set as. c i ^ 4 * (r 4 - 3 * r 2 * r 2))"
by (simp add:power2_eq_square sum_distrib_left algebra_simps sum_subtractf)
also have "... = 3 * (∑i ∈ set as. c i^2)^2 * (r 2)^2 + (∑i ∈ set as. c i ^ 4 * (r 4 - 3 * r 2 * r 2))"
by (simp add:power_mult_distrib sum_distrib_right[symmetric])
also have "... ≤ 3 * (∑i ∈ set as. c i^2)^2 * (r 2)^2 + (∑i ∈ set as. c i ^ 4 * 0)"
using r_four_est
by (auto intro!: sum_nonpos simp add:mult_nonneg_nonpos)
also have "... = 3 * (real_of_rat (F 2 as)^2) * ((real p)⇧2-1)⇧2"
by (simp add:c_def r_two F_def of_rat_sum of_rat_power)
finally have "expectation (λx. (sketch_rv x)⇧2) ≤ 3 * (real_of_rat (F 2 as)^2) * ((real p)⇧2-1)⇧2"
by simp
thus "variance sketch_rv ≤ 2*(real_of_rat (F 2 as)^2) * ((real p)⇧2-1)⇧2"
by (simp add: variance_eq, simp add:power_mult_distrib b)
qed
lemma space_omega_1 [simp]: "Sigma_Algebra.space Ω⇩p = UNIV"
by (simp add:Ω⇩p_def)
interpretation Ω: prob_space "Ω⇩p"
by (simp add:Ω⇩p_def prob_space_measure_pmf)
lemma integrable_Ω:
fixes f :: "((nat × nat) ⇒ (nat list)) ⇒ real"
shows "integrable Ω⇩p f"
unfolding Ω⇩p_def Ω_def
by (rule integrable_measure_pmf_finite, auto intro:finite_PiE simp:set_prod_pmf)
lemma sketch_rv_exp:
assumes "i⇩2 < s⇩2"
assumes "i⇩1 ∈ {0..<s⇩1}"
shows "Ω.expectation (λω. sketch_rv (ω (i⇩1, i⇩2))) = real_of_rat (F 2 as) * ((real p)⇧2 - 1)"
proof -
have "Ω.expectation (λω. (sketch_rv (ω (i⇩1, i⇩2))) :: real) = expectation sketch_rv"
using integrable_Ω integrable_M assms
unfolding Ω_def Ω⇩p_def M_def
by (subst expectation_Pi_pmf_slice, auto)
also have "... = (real_of_rat (F 2 as)) * ((real p)⇧2 - 1)"
using exp_sketch_rv by simp
finally show ?thesis by simp
qed
lemma sketch_rv_var:
assumes "i⇩2 < s⇩2"
assumes "i⇩1 ∈ {0..<s⇩1}"
shows "Ω.variance (λω. sketch_rv (ω (i⇩1, i⇩2))) ≤ 2 * (real_of_rat (F 2 as))⇧2 * ((real p)⇧2 - 1)⇧2"
proof -
have "Ω.variance (λω. (sketch_rv (ω (i⇩1, i⇩2)) :: real)) = variance sketch_rv"
using integrable_Ω integrable_M assms
unfolding Ω_def Ω⇩p_def M_def
by (subst variance_prod_pmf_slice, auto)
also have "... ≤ 2 * (real_of_rat (F 2 as))⇧2 * ((real p)⇧2 - 1)⇧2"
using var_sketch_rv by simp
finally show ?thesis by simp
qed
lemma mean_rv_exp:
assumes "i < s⇩2"
shows "Ω.expectation (λω. mean_rv ω i) = real_of_rat (F 2 as)"
proof -
have a:"(real p)⇧2 > 1" using p_gt_1 by simp
have "Ω.expectation (λω. mean_rv ω i) = (∑i⇩1 = 0..<s⇩1. Ω.expectation (λω. sketch_rv (ω (i⇩1, i)))) / (((real p)⇧2 - 1) * real s⇩1)"
using assms integrable_Ω by (simp add:mean_rv_def)
also have "... = (∑i⇩1 = 0..<s⇩1. real_of_rat (F 2 as) * ((real p)⇧2 - 1)) / (((real p)⇧2 - 1) * real s⇩1)"
using sketch_rv_exp[OF assms] by simp
also have "... = real_of_rat (F 2 as)"
using s1_gt_0 a by simp
finally show ?thesis by simp
qed
lemma mean_rv_var:
assumes "i < s⇩2"
shows "Ω.variance (λω. mean_rv ω i) ≤ (real_of_rat (δ * F 2 as))⇧2 / 3"
proof -
have a: "Ω.indep_vars (λ_. borel) (λi⇩1 x. sketch_rv (x (i⇩1, i))) {0..<s⇩1}"
using assms
unfolding Ω⇩p_def Ω_def
by (intro indep_vars_restrict_intro'[where f="fst"])
(auto simp add: restrict_dfl_def case_prod_beta lessThan_atLeast0)
have p_sq_ne_1: "(real p)^2 ≠ 1"
by (metis p_gt_1 less_numeral_extra(4) of_nat_power one_less_power pos2 semiring_char_0_class.of_nat_eq_1_iff)
have s1_bound: " 6 / (real_of_rat δ)⇧2 ≤ real s⇩1"
unfolding s⇩1_def
by (metis (mono_tags, opaque_lifting) of_rat_ceiling of_rat_divide of_rat_numeral_eq of_rat_power real_nat_ceiling_ge)
have "Ω.variance (λω. mean_rv ω i) = Ω.variance (λω. ∑i⇩1 = 0..<s⇩1. sketch_rv (ω (i⇩1, i))) / (((real p)⇧2 - 1) * real s⇩1)⇧2"
unfolding mean_rv_def by (subst Ω.variance_divide[OF integrable_Ω], simp)
also have "... = (∑i⇩1 = 0..<s⇩1. Ω.variance (λω. sketch_rv (ω (i⇩1, i)))) / (((real p)⇧2 - 1) * real s⇩1)⇧2"
by (subst Ω.bienaymes_identity_full_indep[OF _ _ integrable_Ω a]) (auto simp: Ω_def Ω⇩p_def)
also have "... ≤ (∑i⇩1 = 0..<s⇩1. 2*(real_of_rat (F 2 as)^2) * ((real p)⇧2-1)⇧2) / (((real p)⇧2 - 1) * real s⇩1)⇧2"
by (rule divide_right_mono, rule sum_mono[OF sketch_rv_var[OF assms]], auto)
also have "... = 2 * (real_of_rat (F 2 as)^2) / real s⇩1"
using p_sq_ne_1 s1_gt_0 by (subst frac_eq_eq, auto simp:power2_eq_square)
also have "... ≤ 2 * (real_of_rat (F 2 as)^2) / (6 / (real_of_rat δ)⇧2)"
using s1_gt_0 δ_range by (intro divide_left_mono mult_pos_pos s1_bound) auto
also have "... = (real_of_rat (δ * F 2 as))⇧2 / 3"
by (simp add:of_rat_mult algebra_simps)
finally show ?thesis by simp
qed
lemma mean_rv_bounds:
assumes "i < s⇩2"
shows "Ω.prob {ω. real_of_rat δ * real_of_rat (F 2 as) < ¦mean_rv ω i - real_of_rat (F 2 as)¦} ≤ 1/3"
proof (cases "as = []")
case True
then show ?thesis
using assms by (subst mean_rv_def, subst sketch_rv_def, simp add:F_def)
next
case False
hence "F 2 as > 0" using F_gr_0 by auto
hence a: "0 < real_of_rat (δ * F 2 as)"
using δ_range by simp
have [simp]: "(λω. mean_rv ω i) ∈ borel_measurable Ω⇩p"
by (simp add:Ω_def Ω⇩p_def)
have "Ω.prob {ω. real_of_rat δ * real_of_rat (F 2 as) < ¦mean_rv ω i - real_of_rat (F 2 as)¦} ≤
Ω.prob {ω. real_of_rat (δ * F 2 as) ≤ ¦mean_rv ω i - real_of_rat (F 2 as)¦}"
by (rule Ω.pmf_mono[OF Ω⇩p_def], simp add:of_rat_mult)
also have "... ≤ Ω.variance (λω. mean_rv ω i) / (real_of_rat (δ * F 2 as))⇧2"
using Ω.Chebyshev_inequality[where a="real_of_rat (δ * F 2 as)" and f="λω. mean_rv ω i",simplified]
a prob_space_measure_pmf[where p="Ω"] mean_rv_exp[OF assms] integrable_Ω by simp
also have "... ≤ ((real_of_rat (δ * F 2 as))⇧2/3) / (real_of_rat (δ * F 2 as))⇧2"
by (rule divide_right_mono, rule mean_rv_var[OF assms], simp)
also have "... = 1/3" using a by force
finally show ?thesis by blast
qed
lemma f2_alg_correct':
"𝒫(ω in measure_pmf result. ¦ω - F 2 as¦ ≤ δ * F 2 as) ≥ 1-of_rat ε"
proof -
have a: "Ω.indep_vars (λ_. borel) (λi ω. mean_rv ω i) {0..<s⇩2}"
using s1_gt_0 unfolding Ω⇩p_def Ω_def
by (intro indep_vars_restrict_intro'[where f="snd"])
(auto simp: Ω⇩p_def Ω_def mean_rv_def restrict_dfl_def)
have b: "- 18 * ln (real_of_rat ε) ≤ real s⇩2"
unfolding s⇩2_def using of_nat_ceiling by auto
have "1 - of_rat ε ≤ Ω.prob {ω. ¦median s⇩2 (mean_rv ω) - real_of_rat (F 2 as) ¦ ≤ of_rat δ * of_rat (F 2 as)}"
using ε_range Ω.median_bound_2[OF _ a b, where δ="real_of_rat δ * real_of_rat (F 2 as)"
and μ="real_of_rat (F 2 as)"] mean_rv_bounds
by simp
also have "... = Ω.prob {ω. ¦real_of_rat (result_rv ω) - of_rat (F 2 as) ¦ ≤ of_rat δ * of_rat (F 2 as)}"
by (simp add:result_rv_def median_restrict lessThan_atLeast0 median_rat[OF s2_gt_0]
mean_rv_def sketch_rv_def of_rat_divide of_rat_sum of_rat_mult of_rat_diff of_rat_power)
also have "... = Ω.prob {ω. ¦result_rv ω - F 2 as¦ ≤ δ * F 2 as} "
by (simp add:of_rat_less_eq of_rat_mult[symmetric] of_rat_diff[symmetric] set_eq_iff)
finally have "Ω.prob {y. ¦result_rv y - F 2 as¦ ≤ δ * F 2 as} ≥ 1-of_rat ε " by simp
thus ?thesis by (simp add: distr Ω⇩p_def)
qed
lemma f2_exact_space_usage':
"AE ω in sketch . bit_count (encode_f2_state ω) ≤ f2_space_usage (n, length as, ε, δ)"
proof -
have "p ≤ 2 * max n 3 + 2"
by (subst p_def, rule prime_above_upper_bound)
also have "... ≤ 2 * n + 8"
by (cases "n ≤ 2", simp_all)
finally have p_bound: "p ≤ 2 * n + 8"
by simp
have "bit_count (N⇩e p) ≤ ereal (2 * log 2 (real p + 1) + 1)"
by (rule exp_golomb_bit_count)
also have "... ≤ ereal (2 * log 2 (2 * real n + 9) + 1)"
using p_bound by simp
finally have p_bit_count: "bit_count (N⇩e p) ≤ ereal (2 * log 2 (2 * real n + 9) + 1)"
by simp
have a: "bit_count (encode_f2_state (s⇩1, s⇩2, p, y, λi∈{..<s⇩1} × {..<s⇩2}.
sum_list (map (f2_hash p (y i)) as))) ≤ ereal (f2_space_usage (n, length as, ε, δ))"
if a:"y∈{..<s⇩1} × {..<s⇩2} →⇩E bounded_degree_polynomials (ring_of (mod_ring p)) 4" for y
proof -
have "y ∈ extensional ({..<s⇩1} × {..<s⇩2})" using a PiE_iff by blast
hence y_ext: "y ∈ extensional (set (List.product [0..<s⇩1] [0..<s⇩2]))"
by (simp add:lessThan_atLeast0)
have h_bit_count_aux: "bit_count (P⇩e p 4 (y x)) ≤ ereal (4 + 4 * log 2 (8 + 2 * real n))"
if b:"x ∈ set (List.product [0..<s⇩1] [0..<s⇩2])" for x
proof -
have "y x ∈ bounded_degree_polynomials (ring_of (mod_ring p)) 4"
using b a by force
hence "bit_count (P⇩e p 4 (y x)) ≤ ereal (real 4 * (log 2 (real p) + 1))"
by (rule bounded_degree_polynomial_bit_count[OF p_gt_1] )
also have "... ≤ ereal (real 4 * (log 2 (8 + 2 * real n) + 1) )"
using p_gt_0 p_bound by simp
also have "... ≤ ereal (4 + 4 * log 2 (8 + 2 * real n))"
by simp
finally show ?thesis
by blast
qed
have h_bit_count:
"bit_count ((List.product [0..<s⇩1] [0..<s⇩2] →⇩e P⇩e p 4) y) ≤ ereal (real s⇩1 * real s⇩2 * (4 + 4 * log 2 (8 + 2 * real n)))"
using fun_bit_count_est[where e="P⇩e p 4", OF y_ext h_bit_count_aux]
by simp
have sketch_bit_count_aux:
"bit_count (I⇩e (sum_list (map (f2_hash p (y x)) as))) ≤ ereal (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1))" (is "?lhs ≤ ?rhs")
if " x ∈ {0..<s⇩1} × {0..<s⇩2}" for x
proof -
have "¦sum_list (map (f2_hash p (y x)) as)¦ ≤ sum_list (map (abs ∘ (f2_hash p (y x))) as)"
by (subst map_map[symmetric]) (rule sum_list_abs)
also have "... ≤ sum_list (map (λ_. (int p+1)) as)"
by (rule sum_list_mono) (simp add:p_gt_0)
also have "... = int (length as) * (int p+1)"
by (simp add: sum_list_triv)
also have "... ≤ int (length as) * (9+2*(int n))"
using p_bound by (intro mult_mono, auto)
finally have "¦sum_list (map (f2_hash p (y x)) as)¦ ≤ int (length as) * (9 + 2 * int n)" by simp
hence "?lhs ≤ ereal (2 * log 2 (real_of_int (2* (int (length as) * (9 + 2 * int n)) + 1)) + 1)"
by (rule int_bit_count_est)
also have "... = ?rhs" by (simp add:algebra_simps)
finally show "?thesis" by simp
qed
have
"bit_count ((List.product [0..<s⇩1] [0..<s⇩2] →⇩e I⇩e) (λi∈{..<s⇩1} × {..<s⇩2}. sum_list (map (f2_hash p (y i)) as)))
≤ ereal (real (length (List.product [0..<s⇩1] [0..<s⇩2]))) * (ereal (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1)))"
by (intro fun_bit_count_est)
(simp_all add:extensional_def lessThan_atLeast0 sketch_bit_count_aux del:f2_hash.simps)
also have "... = ereal (real s⇩1 * real s⇩2 * (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1)))"
by simp
finally have sketch_bit_count:
"bit_count ((List.product [0..<s⇩1] [0..<s⇩2] →⇩e I⇩e) (λi∈{..<s⇩1} × {..<s⇩2}. sum_list (map (f2_hash p (y i)) as))) ≤
ereal (real s⇩1 * real s⇩2 * (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1)))" by simp
have "bit_count (encode_f2_state (s⇩1, s⇩2, p, y, λi∈{..<s⇩1} × {..<s⇩2}. sum_list (map (f2_hash p (y i)) as))) ≤
bit_count (N⇩e s⇩1) + bit_count (N⇩e s⇩2) +bit_count (N⇩e p) +
bit_count ((List.product [0..<s⇩1] [0..<s⇩2] →⇩e P⇩e p 4) y) +
bit_count ((List.product [0..<s⇩1] [0..<s⇩2] →⇩e I⇩e) (λi∈{..<s⇩1} × {..<s⇩2}. sum_list (map (f2_hash p (y i)) as)))"
by (simp add:Let_def s⇩1_def s⇩2_def encode_f2_state_def dependent_bit_count add.assoc)
also have "... ≤ ereal (2 * log 2 (real s⇩1 + 1) + 1) + ereal (2 * log 2 (real s⇩2 + 1) + 1) + ereal (2 * log 2 (2 * real n + 9) + 1) +
(ereal (real s⇩1 * real s⇩2) * (4 + 4 * log 2 (8 + 2 * real n))) +
(ereal (real s⇩1 * real s⇩2) * (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1) ))"
by (intro add_mono exp_golomb_bit_count p_bit_count, auto intro: h_bit_count sketch_bit_count)
also have "... = ereal (f2_space_usage (n, length as, ε, δ))"
by (simp add:distrib_left add.commute s⇩1_def[symmetric] s⇩2_def[symmetric] Let_def)
finally show "bit_count (encode_f2_state (s⇩1, s⇩2, p, y, λi∈{..<s⇩1} × {..<s⇩2}. sum_list (map (f2_hash p (y i)) as))) ≤
ereal (f2_space_usage (n, length as, ε, δ))"
by simp
qed
have "set_pmf Ω = {..<s⇩1} × {..<s⇩2} →⇩E bounded_degree_polynomials (ring_of (mod_ring p)) 4"
by (simp add: Ω_def set_prod_pmf) (simp add: space_def)
thus ?thesis
by (simp add:mean_rv_alg_sketch AE_measure_pmf_iff del:f2_space_usage.simps, metis a)
qed
end
text ‹Main results of this section:›
theorem f2_alg_correct:
assumes "ε ∈ {0<..<1}"
assumes "δ > 0"
assumes "set as ⊆ {..<n}"
defines "Ω ≡ fold (λa state. state ⤜ f2_update a) as (f2_init δ ε n) ⤜ f2_result"
shows "𝒫(ω in measure_pmf Ω. ¦ω - F 2 as¦ ≤ δ * F 2 as) ≥ 1-of_rat ε"
using f2_alg_correct'[OF assms(1,2,3)] Ω_def by auto
theorem f2_exact_space_usage:
assumes "ε ∈ {0<..<1}"
assumes "δ > 0"
assumes "set as ⊆ {..<n}"
defines "M ≡ fold (λa state. state ⤜ f2_update a) as (f2_init δ ε n)"
shows "AE ω in M. bit_count (encode_f2_state ω) ≤ f2_space_usage (n, length as, ε, δ)"
using f2_exact_space_usage'[OF assms(1,2,3)]
by (subst (asm) sketch_def[OF assms(1,2,3)], subst M_def, simp)
theorem f2_asymptotic_space_complexity:
"f2_space_usage ∈ O[at_top ×⇩F at_top ×⇩F at_right 0 ×⇩F at_right 0](λ (n, m, ε, δ).
(ln (1 / of_rat ε)) / (of_rat δ)⇧2 * (ln (real n) + ln (real m)))"
(is "_ ∈ O[?F](?rhs)")
proof -
define n_of :: "nat × nat × rat × rat ⇒ nat" where "n_of = (λ(n, m, ε, δ). n)"
define m_of :: "nat × nat × rat × rat ⇒ nat" where "m_of = (λ(n, m, ε, δ). m)"
define ε_of :: "nat × nat × rat × rat ⇒ rat" where "ε_of = (λ(n, m, ε, δ). ε)"
define δ_of :: "nat × nat × rat × rat ⇒ rat" where "δ_of = (λ(n, m, ε, δ). δ)"
define g where "g = (λx. (1/ (of_rat (δ_of x))⇧2) * (ln (1 / of_rat (ε_of x))) * (ln (real (n_of x)) + ln (real (m_of x))))"
have evt: "(⋀x.
0 < real_of_rat (δ_of x) ∧ 0 < real_of_rat (ε_of x) ∧
1/real_of_rat (δ_of x) ≥ δ ∧ 1/real_of_rat (ε_of x) ≥ ε ∧
real (n_of x) ≥ n ∧ real (m_of x) ≥ m⟹ P x)
⟹ eventually P ?F" (is "(⋀x. ?prem x ⟹ _) ⟹ _")
for δ ε n m P
apply (rule eventually_mono[where P="?prem" and Q="P"])
apply (simp add:ε_of_def case_prod_beta' δ_of_def n_of_def m_of_def)
apply (intro eventually_conj eventually_prod1' eventually_prod2'
sequentially_inf eventually_at_right_less inv_at_right_0_inf)
by (auto simp add:prod_filter_eq_bot)
have unit_1: "(λ_. 1) ∈ O[?F](λx. 1 / (real_of_rat (δ_of x))⇧2)"
using one_le_power
by (intro landau_o.big_mono evt[where δ="1"], auto simp add:power_one_over[symmetric])
have unit_2: "(λ_. 1) ∈ O[?F](λx. ln (1 / real_of_rat (ε_of x)))"
by (intro landau_o.big_mono evt[where ε="exp 1"])
(auto intro!:iffD2[OF ln_ge_iff] simp add:abs_ge_iff)
have unit_3: "(λ_. 1) ∈ O[?F](λx. real (n_of x))"
using of_nat_le_iff by (intro landau_o.big_mono evt; fastforce)
have unit_4: "(λ_. 1) ∈ O[?F](λx. real (m_of x))"
using of_nat_le_iff by (intro landau_o.big_mono evt; fastforce)
have unit_5: "(λ_. 1) ∈ O[?F](λx. ln (real (n_of x)))"
by (auto intro!: landau_o.big_mono evt[where n="exp 1"])
(metis abs_ge_self linorder_not_le ln_ge_iff not_exp_le_zero order.trans)
have unit_6: "(λ_. 1) ∈ O[?F](λx. ln (real (n_of x)) + ln (real (m_of x)))"
by (intro landau_sum_1 evt[where m=1 and n=1] unit_5 iffD2[OF ln_ge_iff]) auto
have unit_7: "(λ_. 1) ∈ O[?F](λx. 1 / real_of_rat (ε_of x))"
by (intro landau_o.big_mono evt[where ε="1"], auto)
have unit_8: "(λ_. 1) ∈ O[?F](g)"
unfolding g_def by (intro landau_o.big_mult_1 unit_1 unit_2 unit_6)
have unit_9: "(λ_. 1) ∈ O[?F](λx. real (n_of x) * real (m_of x))"
by (intro landau_o.big_mult_1 unit_3 unit_4)
have " (λx. 6 * (1 / (real_of_rat (δ_of x))⇧2)) ∈ O[?F](λx. 1 / (real_of_rat (δ_of x))⇧2)"
by (subst landau_o.big.cmult_in_iff, simp_all)
hence l1: "(λx. real (nat ⌈6 / (δ_of x)⇧2⌉)) ∈ O[?F](λx. 1 / (real_of_rat (δ_of x))⇧2)"
by (intro landau_real_nat landau_rat_ceil[OF unit_1]) (simp_all add:of_rat_divide of_rat_power)
have "(λx. - ( ln (real_of_rat (ε_of x)))) ∈ O[?F](λx. ln (1 / real_of_rat (ε_of x)))"
by (intro landau_o.big_mono evt) (subst ln_div, auto)
hence l2: "(λx. real (nat ⌈- (18 * ln (real_of_rat (ε_of x)))⌉)) ∈ O[?F](λx. ln (1 / real_of_rat (ε_of x)))"
by (intro landau_real_nat landau_ceil[OF unit_2], simp)
have l3_aux: " (λx. real (m_of x) * (18 + 4 * real (n_of x)) + 1) ∈ O[?F](λx. real (n_of x) * real (m_of x))"
by (rule sum_in_bigo[OF _unit_9], subst mult.commute)
(intro landau_o.mult sum_in_bigo, auto simp:unit_3)
note of_nat_int_ceiling [simp del]
have "(λx. ln (real (m_of x) * (18 + 4 * real (n_of x)) + 1)) ∈ O[?F](λx. ln (real (n_of x) * real (m_of x)))"
apply (rule landau_ln_2[where a="2"], simp, simp)
apply (rule evt[where m="2" and n="1"])
apply (metis dual_order.trans mult_left_mono mult_of_nat_commute of_nat_0_le_iff verit_prod_simplify(1))
using l3_aux by simp
also have "(λx. ln (real (n_of x) * real (m_of x))) ∈ O[?F](λx. ln (real (n_of x)) + ln(real (m_of x)))"
by (intro landau_o.big_mono evt[where m=1 and n=1], auto simp add:ln_mult)
finally have l3: "(λx. ln (real (m_of x) * (18 + 4 * real (n_of x)) + 1)) ∈ O[?F](λx. ln (real (n_of x)) + ln (real (m_of x)))"
using landau_o.big_trans by simp
have §: "(λx. q + 2 * real (n_of x))
∈ O[sequentially ×⇩F sequentially ×⇩F at_right 0 ×⇩F at_right 0](λx. real (n_of x))"
if "q>0" for q
using that
by (auto intro!: sum_in_bigo simp add:unit_3)
have l4: "(λx. ln (8 + 2 * real (n_of x))) ∈ O[?F](λx. ln (real (n_of x)) + ln (real (m_of x)))"
by (intro § landau_sum_1 evt[where m=1 and n="2"] landau_ln_2[where a="2"] iffD2[OF ln_ge_iff]) auto
have l5: "(λx. ln (9 + 2 * real (n_of x))) ∈ O[?F](λx. ln (real (n_of x)) + ln (real (m_of x)))"
by (intro § landau_sum_1 evt[where m=1 and n="2"] landau_ln_2[where a="2"] iffD2[OF ln_ge_iff]) auto
have l6: "(λx. ln (real (nat ⌈6 / (δ_of x)⇧2⌉) + 1)) ∈ O[?F](g)"
unfolding g_def
by (intro landau_o.big_mult_1 landau_ln_3 sum_in_bigo unit_6 unit_2 l1 unit_1, simp)
have l7: "(λx. ln (9 + 2 * real (n_of x))) ∈ O[?F](g)"
unfolding g_def
by (intro landau_o.big_mult_1' unit_1 unit_2 l5)
have l8: "(λx. ln (real (nat ⌈- (18 * ln (real_of_rat (ε_of x)))⌉) + 1) ) ∈ O[?F](g)"
unfolding g_def
by (intro landau_o.big_mult_1 unit_6 landau_o.big_mult_1' unit_1 landau_ln_3 sum_in_bigo l2 unit_2) simp
have l9: "(λx. 5 + 4 * ln (8 + 2 * real (n_of x)) / ln 2 + 2 * ln (real (m_of x) * (18 + 4 * real (n_of x)) + 1) / ln 2)
∈ O[?F](λx. ln (real (n_of x)) + ln (real (m_of x)))"
by (intro sum_in_bigo, auto simp: l3 l4 unit_6)
have l10: "(λx. real (nat ⌈6 / (δ_of x)⇧2⌉) * real (nat ⌈- (18 * ln (real_of_rat (ε_of x)))⌉) *
(5 + 4 * ln (8 + 2 * real (n_of x)) / ln 2 + 2 * ln(real (m_of x) * (18 + 4 * real (n_of x)) + 1) / ln 2))
∈ O[?F](g)"
unfolding g_def by (intro landau_o.mult, auto simp: l1 l2 l9)
have "f2_space_usage = (λx. f2_space_usage (n_of x, m_of x, ε_of x, δ_of x))"
by (simp add:case_prod_beta' n_of_def ε_of_def δ_of_def m_of_def)
also have "... ∈ O[?F](g)"
by (auto intro!:sum_in_bigo simp:Let_def log_def l6 l7 l8 l10 unit_8)
also have "... = O[?F](?rhs)"
by (simp add:case_prod_beta' g_def n_of_def ε_of_def δ_of_def m_of_def)
finally show ?thesis by simp
qed
end