# Theory Frequency_Moment_2

```section ‹Frequency Moment \$2\$›

theory Frequency_Moment_2
imports
Universal_Hash_Families.Carter_Wegman_Hash_Family
Equivalence_Relation_Enumeration.Equivalence_Relation_Enumeration
Landau_Ext
Median_Method.Median
Probability_Ext
Universal_Hash_Families.Universal_Hash_Families_More_Product_PMF
Frequency_Moments
begin

hide_const (open) Discrete_Topology.discrete
hide_const (open) Isolated.discrete

text ‹This section contains a formalization of the algorithm for the second frequency moment.
It is based on the algorithm described in \<^cite>‹‹\textsection 2.2› in "alon1999"›.
The only difference is that the algorithm is adapted to work with prime field of odd order, which
greatly reduces the implementation complexity.›

fun f2_hash where
"f2_hash p h k = (if even (ring.hash (ring_of (mod_ring p)) k h) then int p - 1 else - int p - 1)"

type_synonym f2_state = "nat × nat × nat × (nat × nat ⇒ nat list) × (nat × nat ⇒ int)"

fun f2_init :: "rat ⇒ rat ⇒ nat ⇒ f2_state pmf" where
"f2_init δ ε n =
do {
let s⇩1 = nat ⌈6 / δ⇧2⌉;
let s⇩2 = nat ⌈-(18 * ln (real_of_rat ε))⌉;
let p = prime_above (max n 3);
h ← prod_pmf ({..<s⇩1}×{..<s⇩2}) (λ_. pmf_of_set (bounded_degree_polynomials (ring_of (mod_ring p)) 4));
return_pmf (s⇩1, s⇩2, p, h, (λ_ ∈ {..<s⇩1} × {..<s⇩2}. (0 :: int)))
}"

fun f2_update :: "nat ⇒ f2_state ⇒ f2_state pmf" where
"f2_update x (s⇩1, s⇩2, p, h, sketch) =
return_pmf (s⇩1, s⇩2, p, h, λi ∈ {..<s⇩1} × {..<s⇩2}. f2_hash p (h i) x + sketch i)"

fun f2_result :: "f2_state ⇒ rat pmf" where
"f2_result (s⇩1, s⇩2, p, h, sketch) =
return_pmf (median s⇩2 (λi⇩2 ∈ {..<s⇩2}.
(∑i⇩1∈{..<s⇩1} . (rat_of_int (sketch (i⇩1, i⇩2)))⇧2) / (((rat_of_nat p)⇧2-1) * rat_of_nat s⇩1)))"

fun f2_space_usage :: "(nat × nat × rat × rat) ⇒ real" where
"f2_space_usage (n, m, ε, δ) = (
let s⇩1 = nat ⌈6 / δ⇧2 ⌉ in
let s⇩2 = nat ⌈-(18 * ln (real_of_rat ε))⌉ in
3 +
2 * log 2 (s⇩1 + 1) +
2 * log 2 (s⇩2 + 1) +
2 * log 2 (9 + 2 * real n) +
s⇩1 * s⇩2 * (5 + 4*log 2 (8 + 2 * real n) + 2 * log 2 (real m * (18 + 4 * real n) + 1 )))"

definition encode_f2_state :: "f2_state ⇒ bool list option" where
"encode_f2_state =
N⇩e ⋈⇩e (λs⇩1.
N⇩e ⋈⇩e (λs⇩2.
N⇩e ⋈⇩e (λp.
(List.product [0..<s⇩1] [0..<s⇩2] →⇩e P⇩e p 4) ×⇩e
(List.product [0..<s⇩1] [0..<s⇩2] →⇩e I⇩e))))"

lemma "inj_on encode_f2_state (dom encode_f2_state)"
proof -
have " is_encoding encode_f2_state"
unfolding encode_f2_state_def
by (intro dependent_encoding exp_golomb_encoding fun_encoding list_encoding int_encoding poly_encoding)

thus ?thesis
by (rule encoding_imp_inj)
qed

context
fixes ε δ :: rat
fixes n :: nat
fixes as :: "nat list"
fixes result
assumes ε_range: "ε ∈ {0<..<1}"
assumes δ_range: "δ > 0"
assumes as_range: "set as ⊆ {..<n}"
defines "result ≡ fold (λa state. state ⤜ f2_update a) as (f2_init δ ε n) ⤜ f2_result"
begin

private definition s⇩1 where "s⇩1 = nat ⌈6 / δ⇧2⌉"

lemma s1_gt_0: "s⇩1 > 0"

private definition s⇩2 where "s⇩2 = nat ⌈-(18* ln (real_of_rat ε))⌉"

lemma s2_gt_0: "s⇩2 > 0"

private definition p where "p = prime_above (max n 3)"

lemma p_prime: "Factorial_Ring.prime p"
unfolding p_def using prime_above_prime by blast

lemma p_ge_3: "p ≥ 3"
unfolding p_def by (meson max.boundedE prime_above_lower_bound)

lemma p_gt_0: "p > 0" using p_ge_3 by linarith

lemma p_gt_1: "p > 1" using p_ge_3 by simp

lemma p_ge_n: "p ≥ n" unfolding p_def
by (meson max.boundedE prime_above_lower_bound )

interpretation carter_wegman_hash_family "ring_of (mod_ring p)" 4
using carter_wegman_hash_familyI[OF mod_ring_is_field mod_ring_finite]
using p_prime by auto

definition sketch where "sketch = fold (λa state. state ⤜ f2_update a) as (f2_init δ ε n)"
private definition Ω where"Ω = prod_pmf ({..<s⇩1} × {..<s⇩2}) (λ_. pmf_of_set space)"
private definition Ω⇩p where"Ω⇩p = measure_pmf Ω"
private definition sketch_rv where "sketch_rv ω = of_int (sum_list (map (f2_hash p ω) as))^2"
private definition mean_rv where "mean_rv ω = (λi⇩2. (∑i⇩1 = 0..<s⇩1. sketch_rv (ω (i⇩1, i⇩2))) / (((of_nat p)⇧2 - 1) * of_nat s⇩1))"
private definition result_rv where "result_rv ω = median s⇩2 (λi⇩2∈{..<s⇩2}. mean_rv ω i⇩2)"

lemma mean_rv_alg_sketch:
"sketch = Ω ⤜ (λω. return_pmf (s⇩1, s⇩2, p, ω, λi ∈ {..<s⇩1} × {..<s⇩2}. sum_list (map (f2_hash p (ω i)) as)))"
proof -
have "sketch =  fold (λa state. state ⤜ f2_update a) as (f2_init δ ε n)"
also have "... = Ω ⤜ (λω. return_pmf (s⇩1, s⇩2, p, ω,
λi ∈ {..<s⇩1} × {..<s⇩2}. sum_list (map (f2_hash p (ω i)) as)))"
proof (induction as rule:rev_induct)
case Nil
then show ?case
by (simp add:s⇩1_def s⇩2_def space_def p_def[symmetric] Ω_def restrict_def Let_def)
next
case (snoc a as)
have "fold (λa state. state ⤜ f2_update a) (as @ [a]) (f2_init δ ε n) = Ω ⤜
(λω. return_pmf (s⇩1, s⇩2, p, ω, λs ∈ {..<s⇩1} × {..<s⇩2}. (∑x ← as.  f2_hash p (ω s) x)) ⤜ f2_update a)"
using snoc by (simp add: bind_assoc_pmf restrict_def del:f2_hash.simps f2_init.simps)
also have "... =  Ω ⤜ (λω. return_pmf (s⇩1, s⇩2, p, ω, λi ∈ {..<s⇩1} × {..<s⇩2}. (∑x ← as@[a].  f2_hash p (ω i) x)))"
finally show ?case by blast
qed
finally show ?thesis by auto
qed

lemma distr:  "result = map_pmf result_rv Ω"
proof -
have "result = sketch ⤜ f2_result"
also have "... = Ω ⤜ (λx. f2_result (s⇩1, s⇩2, p, x, λi∈{..<s⇩1} × {..<s⇩2}. sum_list (map (f2_hash p (x i)) as)))"
by (simp add: mean_rv_alg_sketch  bind_assoc_pmf bind_return_pmf)
also have "... = map_pmf result_rv Ω"
by (simp add:map_pmf_def result_rv_def mean_rv_def sketch_rv_def lessThan_atLeast0 cong:restrict_cong)
finally show ?thesis by simp
qed

private lemma f2_hash_pow_exp:
assumes "k < p"
shows
"expectation (λω. real_of_int (f2_hash p ω k) ^m) =
((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p)"
proof -

have "odd p" using p_prime p_ge_3 prime_odd_nat assms by simp
then obtain t where t_def: "p=2*t+1"
using oddE by blast

have "Collect even ∩ {..<2 * t + 1} ⊆ (*) 2 ` {..<t + 1}"
by (rule in_image_by_witness[where g="λx. x div 2"], simp, linarith)
moreover have " (*) 2 ` {..<t + 1} ⊆ Collect even ∩ {..<2 * t + 1}"
by (rule image_subsetI, simp)
ultimately have "card ({k. even k} ∩ {..<p}) = card ((λx. 2*x) ` {..<t+1})"
unfolding t_def using order_antisym by metis
also have "... = card {..<t+1}"
by (rule card_image, simp add: inj_on_mult)
also have "... =  t+1" by simp
finally have card_even: "card ({k. even k} ∩ {..<p}) = t+1" by simp
hence "card ({k. even k} ∩ {..<p}) * 2 = (p+1)" by (simp add:t_def)
hence prob_even: "prob {ω. hash k ω ∈ Collect even} = (real p + 1)/(2*real p)"
using assms
by (subst prob_range, auto simp:frac_eq_eq p_gt_0 mod_ring_def ring_of_def lessThan_def)

have "p = card {..<p}" by simp
also have "... = card (({k. odd k} ∩ {..<p}) ∪ ({k. even k} ∩ {..<p}))"
by (rule arg_cong[where f="card"], auto)
also have "... = card ({k. odd k} ∩ {..<p}) +  card ({k. even k} ∩ {..<p})"
by (rule card_Un_disjoint, simp, simp, blast)
also have "... = card ({k. odd k} ∩ {..<p}) + t+1"
finally have "p = card ({k. odd k} ∩ {..<p}) + t+1"
by simp
hence "card ({k. odd k} ∩ {..<p}) * 2 = (p-1)"
hence prob_odd: "prob {ω. hash k ω ∈ Collect odd} = (real p - 1)/(2*real p)"
using assms
by (subst prob_range, auto simp add: frac_eq_eq mod_ring_def ring_of_def lessThan_def)

have "expectation (λx. real_of_int (f2_hash p x k) ^ m) =
expectation (λω. indicator {ω. even (hash k ω)} ω * (real p - 1)^m +
indicator {ω. odd (hash k ω)} ω * (-real p - 1)^m)"
by (rule Bochner_Integration.integral_cong, simp, simp)
also have "... =
prob {ω. hash  k ω ∈ Collect even}  * (real p - 1) ^ m  +
prob {ω. hash  k ω ∈ Collect odd}  * (-real p - 1) ^ m "
also have "... = (real p + 1) * (real p - 1) ^ m / (2 * real p) + (real p - 1) * (- real p - 1) ^ m / (2 * real p)"
by (subst prob_even, subst prob_odd, simp)
also have "... =
((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p)"
finally show "expectation (λx. real_of_int (f2_hash p x k) ^ m) =
((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p)" by simp
qed

lemma
shows var_sketch_rv:"variance sketch_rv ≤ 2*(real_of_rat (F 2 as)^2) * ((real p)⇧2-1)⇧2" (is "?A")
and exp_sketch_rv:"expectation sketch_rv = real_of_rat (F 2 as) * ((real p)⇧2-1)" (is "?B")
proof -
define h where "h = (λω x. real_of_int (f2_hash p ω x))"
define c where "c = (λx. real (count_list as x))"
define r where "r = (λ(m::nat). ((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p))"
define h_prod where "h_prod = (λas ω. prod_list (map (h ω) as))"

define exp_h_prod :: "nat list ⇒ real" where "exp_h_prod = (λas. (∏i ∈ set as. r (count_list as i)))"

have f_eq: "sketch_rv = (λω. (∑x ∈ set as. c x * h ω x)^2)"
by (rule ext, simp add:sketch_rv_def c_def h_def sum_list_eval del:f2_hash.simps)

have r_one: "r (Suc 0) = 0"

have r_two: "r 2 = (real p^2-1)"
using p_gt_0 unfolding r_def power2_eq_square

have"(real p)^2 ≥ 2^2"
by (rule power_mono, use p_gt_1 in linarith, simp)
hence p_square_ge_4: "(real p)⇧2 ≥ 4" by simp

have "r 4 = (real p)^4+2*(real p)⇧2 - 3"
using p_gt_0 unfolding r_def
by (subst nonzero_divide_eq_eq, auto simp:power4_eq_xxxx power2_eq_square algebra_simps)
also have "... ≤ (real p)^4+2*(real p)⇧2 + 3"
by simp
also have "... ≤ 3 * r 2 * r 2"
using p_square_ge_4
by (simp add:r_two power4_eq_xxxx power2_eq_square algebra_simps mult_left_mono)
finally have r_four_est: "r 4 ≤ 3 * r 2 * r 2"  by simp

have exp_h_prod_elim: "exp_h_prod = (λas. prod_list (map (r ∘ count_list as) (remdups as)))"

have exp_h_prod: "⋀x. set x ⊆ set as ⟹ length x ≤ 4 ⟹ expectation (h_prod x) = exp_h_prod x"
proof -
fix x
assume "set x ⊆ set as"
hence x_sub_p: "set x ⊆ {..<p}" using as_range p_ge_n by auto
hence x_le_p: "⋀k. k ∈ set x ⟹ k < p" by auto
assume "length x ≤ 4"
hence card_x: "card (set x) ≤ 4" using card_length dual_order.trans by blast

have "set x ⊆ carrier (ring_of (mod_ring p))"
using x_sub_p by (simp add:mod_ring_def ring_of_def lessThan_def)

hence h_indep: "indep_vars (λ_. borel) (λi ω. h ω i ^ count_list x i) (set x)"
using k_wise_indep_vars_subset[OF k_wise_indep] card_x as_range h_def
by (auto intro:indep_vars_compose2[where X="hash" and M'=" (λ_. discrete)"])

have "expectation (h_prod x) = expectation (λω. ∏ i ∈ set x. h ω i^(count_list x i))"
also have "... = (∏i ∈ set x. expectation (λω. h ω i^(count_list x i)))"
by (simp add: indep_vars_lebesgue_integral[OF _ h_indep])
also have "... = (∏i ∈ set x. r (count_list x i))"
using f2_hash_pow_exp x_le_p
by (simp add:h_def r_def M_def[symmetric] del:f2_hash.simps)
also have "... = exp_h_prod x"
finally show "expectation (h_prod x) = exp_h_prod x" by simp
qed

have "⋀x y. kernel_of x = kernel_of y ⟹ exp_h_prod x = exp_h_prod y"
proof -
fix x y :: "nat list"
assume a:"kernel_of x = kernel_of y"
then obtain f where b:"bij_betw f (set x) (set y)" and c:"⋀z. z ∈ set x ⟹ count_list x z = count_list y (f z)"
using kernel_of_eq_imp_bij by blast
have "exp_h_prod x = prod ( (λi. r(count_list y i)) ∘ f) (set x)"
also have "... = (∏i ∈ f ` (set x). r(count_list y i))"
by (metis b bij_betw_def prod.reindex)
also have "... = exp_h_prod y"
unfolding exp_h_prod_def
by (rule prod.cong, metis b bij_betw_def) simp
finally show "exp_h_prod x = exp_h_prod y" by simp
qed

hence exp_h_prod_cong: "⋀p x. of_bool (kernel_of x = kernel_of p) * exp_h_prod p =
of_bool (kernel_of x = kernel_of p) * exp_h_prod x"
by (metis (full_types) of_bool_eq_0_iff vector_space_over_itself.scale_zero_left)

have c:"(∑p←enum_rgfs n. of_bool (kernel_of xs = kernel_of p) * r) = r"
if a:"length xs = n" for xs :: "nat list" and n and r :: real
proof -
have "(∑p←enum_rgfs n. of_bool (kernel_of xs = kernel_of p) * 1) = (1::real)"
using equiv_rels_2[OF a[symmetric]] by (simp add:equiv_rels_def comp_def)
thus "(∑p←enum_rgfs n. of_bool (kernel_of xs = kernel_of p) * r) = (r::real)"
qed

have "expectation sketch_rv = (∑i∈set as. (∑j∈set as. c i * c j * expectation (h_prod [i,j])))"
by (simp add:f_eq h_prod_def power2_eq_square sum_distrib_left sum_distrib_right Bochner_Integration.integral_sum algebra_simps)
also have "... = (∑i∈set as. (∑j∈set as. c i * c j * exp_h_prod [i,j]))"
also have "... = (∑i ∈ set as. (∑j ∈ set as.
c i * c j * (sum_list (map (λp. of_bool (kernel_of [i,j] = kernel_of p) * exp_h_prod p) (enum_rgfs 2)))))"
also have "... = (∑i∈set as. c i * c i * r 2)"
by (simp add: numeral_eq_Suc kernel_of_eq All_less_Suc exp_h_prod_elim r_one distrib_left sum.distrib sum_collapse)
also have "... = real_of_rat (F 2 as) * ((real p)^2-1)"
by (simp add: sum_distrib_right[symmetric] c_def F_def power2_eq_square of_rat_sum of_rat_mult r_two)
finally show b:?B by simp

have "expectation (λx. (sketch_rv x)⇧2) = (∑i1 ∈ set as. (∑i2 ∈ set as. (∑i3 ∈ set as. (∑i4 ∈ set as.
c i1 * c i2 * c i3 * c i4 * expectation (h_prod [i1, i2, i3, i4])))))"
by (simp add:f_eq h_prod_def power4_eq_xxxx sum_distrib_left sum_distrib_right Bochner_Integration.integral_sum algebra_simps)
also have "... = (∑i1 ∈ set as. (∑i2 ∈ set as. (∑i3 ∈ set as. (∑i4 ∈ set as.
c i1 * c i2 * c i3 * c i4 * exp_h_prod [i1,i2,i3,i4]))))"
also have "... = (∑i1 ∈ set as. (∑i2 ∈ set as. (∑i3 ∈ set as. (∑i4 ∈ set as.
c i1 * c i2 * c i3 * c i4 *
(sum_list (map (λp. of_bool (kernel_of [i1,i2,i3,i4] = kernel_of p) * exp_h_prod p) (enum_rgfs 4)))))))"
also have "... =
3 * (∑i ∈ set as. (∑j ∈ set as. c i^2 * c j^2 * r 2 * r 2)) + ((∑ i ∈ set as. c i^4 * r 4) - 3 *  (∑ i ∈ set as. c i ^ 4 * r 2 * r 2))"
apply (simp add: numeral_eq_Suc exp_h_prod_elim r_one) (* large intermediate terms *)
apply (simp add: kernel_of_eq All_less_Suc numeral_eq_Suc distrib_left sum.distrib sum_collapse neq_commute of_bool_not_iff)
apply (simp add: algebra_simps sum_subtractf sum_collapse)
done
also have "... = 3 * (∑i ∈ set as. c i^2 * r 2)^2 + (∑ i ∈ set as. c i ^ 4 * (r 4 - 3 * r 2 * r 2))"
by (simp add:power2_eq_square sum_distrib_left algebra_simps sum_subtractf)
also have "... = 3 * (∑i ∈ set as. c i^2)^2 * (r 2)^2 + (∑i ∈ set as. c i ^ 4 * (r 4 - 3 * r 2 * r 2))"
also have "... ≤ 3 * (∑i ∈ set as. c i^2)^2 * (r 2)^2 + (∑i ∈ set as. c i ^ 4 * 0)"
using r_four_est
by (auto intro!: sum_nonpos simp add:mult_nonneg_nonpos)
also have "... = 3 * (real_of_rat (F 2 as)^2) * ((real p)⇧2-1)⇧2"
by (simp add:c_def r_two F_def of_rat_sum of_rat_power)
finally have "expectation (λx. (sketch_rv x)⇧2) ≤ 3 * (real_of_rat (F 2 as)^2) * ((real p)⇧2-1)⇧2"
by simp

thus "variance sketch_rv ≤ 2*(real_of_rat (F 2 as)^2) * ((real p)⇧2-1)⇧2"
qed

lemma space_omega_1 [simp]: "Sigma_Algebra.space Ω⇩p = UNIV"

interpretation Ω: prob_space "Ω⇩p"

lemma integrable_Ω:
fixes f :: "((nat × nat) ⇒ (nat list)) ⇒ real"
shows "integrable Ω⇩p f"
unfolding Ω⇩p_def Ω_def
by (rule integrable_measure_pmf_finite, auto intro:finite_PiE simp:set_prod_pmf)

lemma sketch_rv_exp:
assumes "i⇩2 < s⇩2"
assumes "i⇩1 ∈ {0..<s⇩1}"
shows "Ω.expectation (λω. sketch_rv (ω (i⇩1, i⇩2))) = real_of_rat (F 2 as) * ((real p)⇧2 - 1)"
proof -
have "Ω.expectation (λω.  (sketch_rv (ω (i⇩1, i⇩2))) :: real) = expectation sketch_rv"
using integrable_Ω integrable_M assms
unfolding Ω_def Ω⇩p_def M_def
by (subst expectation_Pi_pmf_slice, auto)
also have "... = (real_of_rat (F 2 as)) * ((real p)⇧2 - 1)"
using exp_sketch_rv by simp
finally show ?thesis by simp
qed

lemma sketch_rv_var:
assumes "i⇩2 < s⇩2"
assumes "i⇩1 ∈ {0..<s⇩1}"
shows "Ω.variance (λω. sketch_rv (ω (i⇩1, i⇩2))) ≤ 2 * (real_of_rat (F 2 as))⇧2 * ((real p)⇧2 - 1)⇧2"
proof -
have "Ω.variance (λω. (sketch_rv (ω (i⇩1, i⇩2)) :: real)) = variance sketch_rv"
using integrable_Ω integrable_M assms
unfolding Ω_def Ω⇩p_def M_def
by (subst variance_prod_pmf_slice, auto)
also have "... ≤  2 * (real_of_rat (F 2 as))⇧2 * ((real p)⇧2 - 1)⇧2"
using var_sketch_rv by simp
finally show ?thesis by simp
qed

lemma mean_rv_exp:
assumes "i < s⇩2"
shows "Ω.expectation (λω. mean_rv ω i) = real_of_rat (F 2 as)"
proof -
have a:"(real p)⇧2 > 1" using p_gt_1 by simp

have "Ω.expectation (λω. mean_rv ω i) = (∑i⇩1 = 0..<s⇩1. Ω.expectation (λω. sketch_rv (ω (i⇩1, i)))) / (((real p)⇧2 - 1) * real s⇩1)"
using assms integrable_Ω by (simp add:mean_rv_def)
also have "... = (∑i⇩1 = 0..<s⇩1. real_of_rat (F 2 as) * ((real p)⇧2 - 1)) / (((real p)⇧2 - 1) * real s⇩1)"
using sketch_rv_exp[OF assms] by simp
also have "... = real_of_rat (F 2 as)"
using s1_gt_0 a by simp
finally show ?thesis by simp
qed

lemma mean_rv_var:
assumes "i < s⇩2"
shows "Ω.variance (λω. mean_rv ω i) ≤ (real_of_rat (δ * F 2 as))⇧2 / 3"
proof -
have a: "Ω.indep_vars (λ_. borel) (λi⇩1 x. sketch_rv (x (i⇩1, i))) {0..<s⇩1}"
using assms
unfolding Ω⇩p_def Ω_def
by (intro indep_vars_restrict_intro'[where f="fst"])
(auto simp add: restrict_dfl_def case_prod_beta lessThan_atLeast0)

have p_sq_ne_1: "(real p)^2 ≠ 1"
by (metis p_gt_1 less_numeral_extra(4) of_nat_power one_less_power pos2 semiring_char_0_class.of_nat_eq_1_iff)

have s1_bound: " 6 / (real_of_rat δ)⇧2 ≤ real s⇩1"
unfolding s⇩1_def
by  (metis (mono_tags, opaque_lifting) of_rat_ceiling of_rat_divide of_rat_numeral_eq of_rat_power real_nat_ceiling_ge)

have "Ω.variance (λω. mean_rv ω i) = Ω.variance (λω. ∑i⇩1 = 0..<s⇩1. sketch_rv (ω (i⇩1, i))) / (((real p)⇧2 - 1) * real s⇩1)⇧2"
unfolding mean_rv_def by (subst Ω.variance_divide[OF integrable_Ω], simp)
also have "... = (∑i⇩1 = 0..<s⇩1. Ω.variance (λω. sketch_rv (ω (i⇩1, i)))) / (((real p)⇧2 - 1) * real s⇩1)⇧2"
by (subst Ω.bienaymes_identity_full_indep[OF _ _ integrable_Ω a]) (auto simp: Ω_def Ω⇩p_def)
also have "... ≤  (∑i⇩1 = 0..<s⇩1. 2*(real_of_rat (F 2 as)^2) * ((real p)⇧2-1)⇧2)  / (((real p)⇧2 - 1) * real s⇩1)⇧2"
by (rule divide_right_mono, rule sum_mono[OF sketch_rv_var[OF assms]], auto)
also have "... = 2 * (real_of_rat (F 2 as)^2) / real s⇩1"
using p_sq_ne_1 s1_gt_0 by (subst frac_eq_eq, auto simp:power2_eq_square)
also have "... ≤ 2 * (real_of_rat (F 2 as)^2) / (6 / (real_of_rat δ)⇧2)"
using  s1_gt_0 δ_range by (intro divide_left_mono mult_pos_pos s1_bound) auto
also have "... = (real_of_rat (δ * F 2 as))⇧2 / 3"
finally show ?thesis by simp
qed

lemma mean_rv_bounds:
assumes "i < s⇩2"
shows "Ω.prob {ω. real_of_rat δ * real_of_rat (F 2 as) < ¦mean_rv ω i - real_of_rat (F 2 as)¦} ≤ 1/3"
proof (cases "as = []")
case True
then show ?thesis
using assms by (subst mean_rv_def, subst sketch_rv_def, simp add:F_def)
next
case False
hence "F 2 as > 0" using F_gr_0 by auto

hence a: "0 < real_of_rat (δ * F 2 as)"
using δ_range by simp
have [simp]: "(λω. mean_rv ω i) ∈ borel_measurable Ω⇩p"
have "Ω.prob {ω. real_of_rat δ * real_of_rat (F 2 as) < ¦mean_rv ω i - real_of_rat (F 2 as)¦} ≤
Ω.prob {ω. real_of_rat (δ * F 2 as) ≤ ¦mean_rv ω i - real_of_rat (F 2 as)¦}"
by (rule Ω.pmf_mono[OF Ω⇩p_def], simp add:of_rat_mult)
also have "... ≤  Ω.variance (λω. mean_rv ω i) / (real_of_rat (δ * F 2 as))⇧2"
using Ω.Chebyshev_inequality[where a="real_of_rat (δ * F 2 as)" and f="λω. mean_rv ω i",simplified]
a prob_space_measure_pmf[where p="Ω"] mean_rv_exp[OF assms] integrable_Ω by simp
also have "... ≤ ((real_of_rat (δ * F 2 as))⇧2/3) / (real_of_rat (δ * F 2 as))⇧2"
by (rule divide_right_mono, rule mean_rv_var[OF assms], simp)
also  have "... = 1/3" using a by force
finally show ?thesis by blast
qed

lemma f2_alg_correct':
"𝒫(ω in measure_pmf result. ¦ω - F 2 as¦ ≤ δ * F 2 as) ≥ 1-of_rat ε"
proof -
have a: "Ω.indep_vars (λ_. borel) (λi ω. mean_rv ω i) {0..<s⇩2}"
using s1_gt_0 unfolding Ω⇩p_def Ω_def
by (intro indep_vars_restrict_intro'[where f="snd"])
(auto simp: Ω⇩p_def Ω_def mean_rv_def restrict_dfl_def)

have b: "- 18 * ln (real_of_rat ε) ≤ real s⇩2"
unfolding  s⇩2_def using of_nat_ceiling by auto

have "1 - of_rat ε ≤ Ω.prob {ω.  ¦median s⇩2 (mean_rv ω) -  real_of_rat (F 2 as) ¦ ≤ of_rat δ * of_rat (F 2 as)}"
using ε_range Ω.median_bound_2[OF _ a b, where δ="real_of_rat δ * real_of_rat (F 2 as)"
and μ="real_of_rat (F 2 as)"] mean_rv_bounds
by simp
also have "... = Ω.prob {ω.  ¦real_of_rat (result_rv ω) - of_rat (F 2 as) ¦ ≤ of_rat δ * of_rat (F 2 as)}"
by (simp add:result_rv_def median_restrict lessThan_atLeast0 median_rat[OF s2_gt_0]
mean_rv_def sketch_rv_def of_rat_divide of_rat_sum of_rat_mult of_rat_diff of_rat_power)
also have "... = Ω.prob {ω. ¦result_rv ω - F 2 as¦ ≤ δ * F 2 as} "
by (simp add:of_rat_less_eq of_rat_mult[symmetric]  of_rat_diff[symmetric] set_eq_iff)
finally have "Ω.prob {y. ¦result_rv y - F 2 as¦ ≤ δ * F 2 as} ≥ 1-of_rat ε " by simp
thus ?thesis by (simp add: distr Ω⇩p_def)
qed

lemma f2_exact_space_usage':
"AE ω in sketch . bit_count (encode_f2_state ω) ≤ f2_space_usage (n, length as, ε, δ)"
proof -
have "p ≤ 2 * max n 3 + 2"
by (subst p_def, rule prime_above_upper_bound)
also have "... ≤ 2 * n + 8"
by (cases "n ≤ 2", simp_all)
finally have p_bound: "p ≤ 2 * n + 8"
by simp
have "bit_count (N⇩e p) ≤ ereal (2 * log 2 (real p + 1) + 1)"
by (rule exp_golomb_bit_count)
also have "... ≤ ereal (2 * log 2 (2 * real n + 9) + 1)"
using p_bound by simp
finally have p_bit_count: "bit_count (N⇩e p) ≤ ereal (2 * log 2 (2 * real n + 9) + 1)"
by simp

have a: "bit_count (encode_f2_state (s⇩1, s⇩2, p, y, λi∈{..<s⇩1} × {..<s⇩2}.
sum_list (map (f2_hash p (y i)) as))) ≤ ereal (f2_space_usage (n, length as, ε, δ))"
if a:"y∈{..<s⇩1} × {..<s⇩2} →⇩E bounded_degree_polynomials (ring_of (mod_ring p)) 4" for y
proof -
have "y ∈ extensional ({..<s⇩1} × {..<s⇩2})" using a PiE_iff by blast
hence y_ext: "y ∈ extensional (set (List.product [0..<s⇩1] [0..<s⇩2]))"

have h_bit_count_aux: "bit_count (P⇩e p 4 (y x)) ≤ ereal (4 + 4 * log 2 (8 + 2 * real n))"
if b:"x ∈  set (List.product [0..<s⇩1] [0..<s⇩2])" for x
proof -
have "y x ∈ bounded_degree_polynomials (ring_of (mod_ring p)) 4"
using b a by force
hence "bit_count (P⇩e p 4 (y x)) ≤ ereal (real 4 * (log 2 (real p) + 1))"
by (rule bounded_degree_polynomial_bit_count[OF p_gt_1] )
also have "... ≤ ereal (real 4 * (log 2 (8 + 2 * real n) + 1) )"
using p_gt_0 p_bound by simp
also have "... ≤ ereal (4 + 4 * log 2 (8 + 2 * real n))"
by simp
finally show ?thesis
by blast
qed

have h_bit_count:
"bit_count ((List.product [0..<s⇩1] [0..<s⇩2] →⇩e P⇩e p 4) y) ≤ ereal (real s⇩1 * real s⇩2 * (4 + 4 * log 2 (8 + 2 * real n)))"
using fun_bit_count_est[where e="P⇩e p 4", OF y_ext h_bit_count_aux]
by simp

have sketch_bit_count_aux:
"bit_count (I⇩e (sum_list (map (f2_hash p (y x)) as))) ≤ ereal (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1))" (is "?lhs ≤ ?rhs")
if " x ∈ {0..<s⇩1} × {0..<s⇩2}" for x
proof -
have "¦sum_list (map (f2_hash p (y x)) as)¦ ≤ sum_list (map (abs ∘ (f2_hash p (y x))) as)"
by (subst map_map[symmetric])  (rule sum_list_abs)
also have "... ≤  sum_list (map (λ_. (int p+1)) as)"
also have "... = int (length as) * (int p+1)"
also have "... ≤ int (length as) * (9+2*(int n))"
using p_bound by (intro mult_mono, auto)
finally  have "¦sum_list (map (f2_hash p (y x)) as)¦ ≤ int (length as) * (9 + 2 * int n)" by simp
hence "?lhs ≤ ereal (2 * log 2 (real_of_int (2* (int (length as) * (9 + 2 * int n)) + 1)) + 1)"
by (rule int_bit_count_est)
also have "... = ?rhs" by (simp add:algebra_simps)
finally show "?thesis" by simp
qed

have
"bit_count ((List.product [0..<s⇩1] [0..<s⇩2] →⇩e I⇩e) (λi∈{..<s⇩1} × {..<s⇩2}. sum_list (map (f2_hash p (y i)) as)))
≤ ereal (real (length (List.product [0..<s⇩1] [0..<s⇩2]))) * (ereal (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1)))"
by (intro fun_bit_count_est)
also have "... = ereal (real s⇩1 * real s⇩2 * (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1)))"
by simp
finally have sketch_bit_count:
"bit_count ((List.product [0..<s⇩1] [0..<s⇩2] →⇩e I⇩e) (λi∈{..<s⇩1} × {..<s⇩2}. sum_list (map (f2_hash p (y i)) as))) ≤
ereal (real s⇩1 * real s⇩2 * (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1)))" by simp

have "bit_count (encode_f2_state (s⇩1, s⇩2, p, y, λi∈{..<s⇩1} × {..<s⇩2}. sum_list (map (f2_hash p (y i)) as))) ≤
bit_count (N⇩e s⇩1) + bit_count (N⇩e s⇩2) +bit_count (N⇩e p) +
bit_count ((List.product [0..<s⇩1] [0..<s⇩2] →⇩e P⇩e p 4) y) +
bit_count ((List.product [0..<s⇩1] [0..<s⇩2] →⇩e I⇩e) (λi∈{..<s⇩1} × {..<s⇩2}. sum_list (map (f2_hash p (y i)) as)))"
also have "... ≤ ereal (2 * log 2 (real s⇩1 + 1) + 1) + ereal (2 * log 2 (real s⇩2 + 1) + 1) + ereal (2 * log 2 (2 * real n + 9) + 1) +
(ereal (real s⇩1 * real s⇩2) * (4 + 4 * log 2 (8 + 2 * real n))) +
(ereal (real s⇩1 * real s⇩2) * (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1) ))"
by (intro add_mono exp_golomb_bit_count p_bit_count, auto intro: h_bit_count sketch_bit_count)
also have "... = ereal (f2_space_usage (n, length as, ε, δ))"
finally show "bit_count (encode_f2_state (s⇩1, s⇩2, p, y, λi∈{..<s⇩1} × {..<s⇩2}. sum_list (map (f2_hash p (y i)) as))) ≤
ereal (f2_space_usage (n, length as, ε, δ))"
by simp
qed

have "set_pmf Ω = {..<s⇩1} × {..<s⇩2} →⇩E bounded_degree_polynomials (ring_of (mod_ring p)) 4"
thus ?thesis
by (simp  add:mean_rv_alg_sketch AE_measure_pmf_iff del:f2_space_usage.simps, metis a)
qed

end

text ‹Main results of this section:›

theorem f2_alg_correct:
assumes "ε ∈ {0<..<1}"
assumes "δ > 0"
assumes "set as ⊆ {..<n}"
defines "Ω ≡ fold (λa state. state ⤜ f2_update a) as (f2_init δ ε n) ⤜ f2_result"
shows "𝒫(ω in measure_pmf Ω. ¦ω - F 2 as¦ ≤ δ * F 2 as) ≥ 1-of_rat ε"
using f2_alg_correct'[OF assms(1,2,3)] Ω_def by auto

theorem f2_exact_space_usage:
assumes "ε ∈ {0<..<1}"
assumes "δ > 0"
assumes "set as ⊆ {..<n}"
defines "M ≡ fold (λa state. state ⤜ f2_update a) as (f2_init δ ε n)"
shows "AE ω in M. bit_count (encode_f2_state ω) ≤ f2_space_usage (n, length as, ε, δ)"
using f2_exact_space_usage'[OF assms(1,2,3)]
by (subst (asm) sketch_def[OF assms(1,2,3)], subst M_def, simp)

theorem f2_asymptotic_space_complexity:
"f2_space_usage ∈ O[at_top ×⇩F at_top ×⇩F at_right 0 ×⇩F at_right 0](λ (n, m, ε, δ).
(ln (1 / of_rat ε)) / (of_rat δ)⇧2 * (ln (real n) + ln (real m)))"
(is "_ ∈ O[?F](?rhs)")
proof -
define n_of :: "nat × nat × rat × rat ⇒ nat" where "n_of = (λ(n, m, ε, δ). n)"
define m_of :: "nat × nat × rat × rat ⇒ nat" where "m_of = (λ(n, m, ε, δ). m)"
define ε_of :: "nat × nat × rat × rat ⇒ rat" where "ε_of = (λ(n, m, ε, δ). ε)"
define δ_of :: "nat × nat × rat × rat ⇒ rat" where "δ_of = (λ(n, m, ε, δ). δ)"

define g where "g = (λx. (1/ (of_rat (δ_of x))⇧2) * (ln (1 / of_rat (ε_of x))) * (ln (real (n_of x)) + ln (real (m_of x))))"

have evt: "(⋀x.
0 < real_of_rat (δ_of x) ∧ 0 < real_of_rat (ε_of x) ∧
1/real_of_rat (δ_of x) ≥ δ ∧ 1/real_of_rat (ε_of x) ≥ ε ∧
real (n_of x) ≥ n ∧ real (m_of x) ≥ m⟹ P x)
⟹ eventually P ?F"  (is "(⋀x. ?prem x ⟹ _) ⟹ _")
for δ ε n m P
apply (rule eventually_mono[where P="?prem" and Q="P"])
apply (simp add:ε_of_def case_prod_beta' δ_of_def n_of_def m_of_def)
apply (intro eventually_conj eventually_prod1' eventually_prod2'
sequentially_inf eventually_at_right_less inv_at_right_0_inf)

have unit_1: "(λ_. 1) ∈ O[?F](λx. 1 / (real_of_rat (δ_of x))⇧2)"
using one_le_power
by (intro landau_o.big_mono evt[where δ="1"], auto simp add:power_one_over[symmetric])

have unit_2: "(λ_. 1) ∈ O[?F](λx. ln (1 / real_of_rat (ε_of x)))"
by (intro landau_o.big_mono  evt[where ε="exp 1"])

have unit_3: "(λ_. 1) ∈ O[?F](λx. real (n_of x))"
by (intro landau_o.big_mono evt, auto)

have unit_4: "(λ_. 1) ∈ O[?F](λx. real (m_of x))"
by (intro landau_o.big_mono evt, auto)

have unit_5: "(λ_. 1) ∈ O[?F](λx. ln (real (n_of x)))"
by (auto intro!: landau_o.big_mono evt[where n="exp 1"])
(metis abs_ge_self linorder_not_le ln_ge_iff not_exp_le_zero order.trans)

have unit_6: "(λ_. 1) ∈ O[?F](λx. ln (real (n_of x)) + ln (real (m_of x)))"
by (intro landau_sum_1 evt unit_5 iffD2[OF ln_ge_iff], auto)

have unit_7: "(λ_. 1) ∈ O[?F](λx. 1 / real_of_rat (ε_of x))"
by (intro landau_o.big_mono  evt[where ε="1"], auto)

have unit_8: "(λ_. 1) ∈ O[?F](g)"
unfolding g_def by (intro landau_o.big_mult_1 unit_1 unit_2 unit_6)

have unit_9: "(λ_. 1) ∈ O[?F](λx. real (n_of x) * real (m_of x))"
by (intro landau_o.big_mult_1 unit_3 unit_4)

have " (λx. 6 * (1 / (real_of_rat (δ_of x))⇧2)) ∈ O[?F](λx. 1 / (real_of_rat (δ_of x))⇧2)"
by (subst landau_o.big.cmult_in_iff, simp_all)
hence l1: "(λx. real (nat ⌈6 / (δ_of x)⇧2⌉)) ∈ O[?F](λx. 1 / (real_of_rat (δ_of x))⇧2)"
by (intro landau_real_nat  landau_rat_ceil[OF unit_1]) (simp_all add:of_rat_divide of_rat_power)

have "(λx. - ( ln (real_of_rat (ε_of x)))) ∈ O[?F](λx. ln (1 / real_of_rat (ε_of x)))"
by (intro landau_o.big_mono evt) (subst ln_div, auto)
hence l2: "(λx. real (nat ⌈- (18 * ln (real_of_rat (ε_of x)))⌉)) ∈ O[?F](λx. ln (1 / real_of_rat (ε_of x)))"
by (intro landau_real_nat landau_ceil[OF unit_2], simp)

have l3_aux: " (λx. real (m_of x) * (18 + 4 * real (n_of x)) + 1) ∈ O[?F](λx. real (n_of x) * real (m_of x))"
by (rule sum_in_bigo[OF _unit_9], subst mult.commute)
(intro landau_o.mult sum_in_bigo, auto simp:unit_3)

note of_nat_int_ceiling [simp del]

have "(λx. ln (real (m_of x) * (18 + 4 * real (n_of x)) + 1)) ∈ O[?F](λx. ln (real (n_of x) * real (m_of x)))"
apply (rule landau_ln_2[where a="2"], simp, simp)
apply (rule evt[where m="2" and n="1"])
apply (metis dual_order.trans mult_left_mono mult_of_nat_commute of_nat_0_le_iff verit_prod_simplify(1))
using l3_aux by simp
also have "(λx. ln (real (n_of x) * real (m_of x))) ∈ O[?F](λx. ln (real (n_of x)) + ln(real (m_of x)))"
by (intro landau_o.big_mono evt[where m="1" and n="1"], auto simp add:ln_mult)
finally have l3: "(λx. ln (real (m_of x) * (18 + 4 * real (n_of x)) + 1)) ∈ O[?F](λx. ln (real (n_of x)) + ln (real (m_of x)))"
using  landau_o.big_trans by simp

have l4: "(λx. ln (8 + 2 * real (n_of x))) ∈ O[?F](λx. ln (real (n_of x)) + ln (real (m_of x)))"
by (intro landau_sum_1  evt[where n="2"] landau_ln_2[where a="2"] iffD2[OF ln_ge_iff])

have l5: "(λx. ln (9 + 2 * real (n_of x))) ∈ O[?F](λx. ln (real (n_of x)) + ln (real (m_of x)))"
by (intro landau_sum_1  evt[where n="2"] landau_ln_2[where a="2"] iffD2[OF ln_ge_iff])

have l6: "(λx. ln (real (nat ⌈6 / (δ_of x)⇧2⌉) + 1)) ∈ O[?F](g)"
unfolding g_def
by (intro landau_o.big_mult_1 landau_ln_3 sum_in_bigo unit_6 unit_2 l1 unit_1, simp)

have l7: "(λx. ln (9 + 2 * real (n_of x))) ∈ O[?F](g)"
unfolding g_def
by (intro landau_o.big_mult_1' unit_1 unit_2 l5)

have l8: "(λx. ln (real (nat ⌈- (18 * ln (real_of_rat (ε_of x)))⌉) + 1) ) ∈ O[?F](g)"
unfolding g_def
by (intro landau_o.big_mult_1 unit_6 landau_o.big_mult_1' unit_1 landau_ln_3 sum_in_bigo l2 unit_2) simp

have l9: "(λx. 5 + 4 * ln (8 + 2 * real (n_of x)) / ln 2 + 2 * ln (real (m_of x) * (18 + 4 * real (n_of x)) + 1) / ln 2)
∈ O[?F](λx. ln (real (n_of x)) + ln (real (m_of x)))"
by (intro sum_in_bigo, auto simp: l3 l4 unit_6)

have l10: "(λx. real (nat ⌈6 / (δ_of x)⇧2⌉) * real (nat ⌈- (18 * ln (real_of_rat (ε_of x)))⌉) *
(5 + 4 * ln (8 + 2 * real (n_of x)) / ln 2 + 2 * ln(real (m_of x) * (18 + 4 * real (n_of x)) + 1) / ln 2))
∈ O[?F](g)"
unfolding g_def by (intro landau_o.mult, auto simp: l1 l2 l9)

have "f2_space_usage = (λx. f2_space_usage (n_of x, m_of x, ε_of x, δ_of x))"
by (simp add:case_prod_beta' n_of_def ε_of_def δ_of_def m_of_def)
also have "... ∈ O[?F](g)"
by (auto intro!:sum_in_bigo simp:Let_def log_def l6 l7 l8 l10 unit_8)
also have "... = O[?F](?rhs)"
by (simp add:case_prod_beta' g_def n_of_def ε_of_def δ_of_def m_of_def)
finally show ?thesis by simp
qed

end
```