Theory Frequency_Moment_2

section ‹Frequency Moment $2$›

theory Frequency_Moment_2
  imports
    Universal_Hash_Families.Carter_Wegman_Hash_Family
    Universal_Hash_Families.Universal_Hash_Families_More_Finite_Fields
    Equivalence_Relation_Enumeration.Equivalence_Relation_Enumeration
    Landau_Ext
    Median_Method.Median
    Probability_Ext
    Product_PMF_Ext
    Frequency_Moments
begin

hide_const (open) Discrete_Topology.discrete
hide_const (open) Isolated.discrete

text ‹This section contains a formalization of the algorithm for the second frequency moment.
It is based on the algorithm described in cite‹\textsection 2.2› in "alon1999".
The only difference is that the algorithm is adapted to work with prime field of odd order, which
greatly reduces the implementation complexity.›

fun f2_hash where
  "f2_hash p h k = (if even (ring.hash (mod_ring p) k h) then int p - 1 else - int p - 1)"

type_synonym f2_state = "nat × nat × nat × (nat × nat  nat list) × (nat × nat  int)"

fun f2_init :: "rat  rat  nat  f2_state pmf" where
  "f2_init δ ε n =
    do {
      let s1 = nat 6 / δ2;
      let s2 = nat -(18 * ln (real_of_rat ε));
      let p = prime_above (max n 3);
      h  prod_pmf ({..<s1} × {..<s2}) (λ_. pmf_of_set (bounded_degree_polynomials (mod_ring p) 4));
      return_pmf (s1, s2, p, h, (λ_  {..<s1} × {..<s2}. (0 :: int)))
    }"

fun f2_update :: "nat  f2_state  f2_state pmf" where
  "f2_update x (s1, s2, p, h, sketch) =
    return_pmf (s1, s2, p, h, λi  {..<s1} × {..<s2}. f2_hash p (h i) x + sketch i)"

fun f2_result :: "f2_state  rat pmf" where
  "f2_result (s1, s2, p, h, sketch) =
    return_pmf (median s2 (λi2  {..<s2}.
      (i1{..<s1} . (rat_of_int (sketch (i1, i2)))2) / (((rat_of_nat p)2-1) * rat_of_nat s1)))"

fun f2_space_usage :: "(nat × nat × rat × rat)  real" where
  "f2_space_usage (n, m, ε, δ) = (
    let s1 = nat 6 / δ2  in
    let s2 = nat -(18 * ln (real_of_rat ε)) in
    3 +
    2 * log 2 (s1 + 1) +
    2 * log 2 (s2 + 1) +
    2 * log 2 (9 + 2 * real n) +
    s1 * s2 * (5 + 4*log 2 (8 + 2 * real n) + 2 * log 2 (real m * (18 + 4 * real n) + 1 )))"

definition encode_f2_state :: "f2_state  bool list option" where
  "encode_f2_state =
    Ne e (λs1.
    Ne e (λs2.
    Ne e (λp.
    (List.product [0..<s1] [0..<s2] e Pe p 4) ×e
    (List.product [0..<s1] [0..<s2] e Ie))))"

lemma "inj_on encode_f2_state (dom encode_f2_state)"
proof -
  have " is_encoding encode_f2_state"
    unfolding encode_f2_state_def
    by (intro dependent_encoding exp_golomb_encoding fun_encoding list_encoding int_encoding poly_encoding)

  thus ?thesis
    by (rule encoding_imp_inj)
qed

context
  fixes ε δ :: rat
  fixes n :: nat
  fixes as :: "nat list"
  fixes result
  assumes ε_range: "ε  {0<..<1}"
  assumes δ_range: "δ > 0"
  assumes as_range: "set as  {..<n}"
  defines "result  fold (λa state. state  f2_update a) as (f2_init δ ε n)  f2_result"
begin

private definition s1 where "s1 = nat 6 / δ2"

lemma s1_gt_0: "s1 > 0"
    using δ_range by (simp add:s1_def)

private definition s2 where "s2 = nat -(18* ln (real_of_rat ε))"

lemma s2_gt_0: "s2 > 0"
    using ε_range by (simp add:s2_def)

private definition p where "p = prime_above (max n 3)"

lemma p_prime: "Factorial_Ring.prime p"
  unfolding p_def using prime_above_prime by blast

lemma p_ge_3: "p  3"
    unfolding p_def by (meson max.boundedE prime_above_lower_bound)

lemma p_gt_0: "p > 0" using p_ge_3 by linarith

lemma p_gt_1: "p > 1" using p_ge_3 by simp

lemma p_ge_n: "p  n" unfolding p_def
  by (meson max.boundedE prime_above_lower_bound )

interpretation carter_wegman_hash_family "mod_ring p" 4
  using carter_wegman_hash_familyI[OF mod_ring_is_field mod_ring_finite]
  using p_prime by auto

definition sketch where "sketch = fold (λa state. state  f2_update a) as (f2_init δ ε n)"
private definition Ω where"Ω = prod_pmf ({..<s1} × {..<s2}) (λ_. pmf_of_set space)"
private definition Ωp where"Ωp = measure_pmf Ω"
private definition sketch_rv where "sketch_rv ω = of_int (sum_list (map (f2_hash p ω) as))^2"
private definition mean_rv where "mean_rv ω = (λi2. (i1 = 0..<s1. sketch_rv (ω (i1, i2))) / (((of_nat p)2 - 1) * of_nat s1))"
private definition result_rv where "result_rv ω = median s2 (λi2{..<s2}. mean_rv ω i2)"

lemma mean_rv_alg_sketch:
  "sketch = Ω  (λω. return_pmf (s1, s2, p, ω, λi  {..<s1} × {..<s2}. sum_list (map (f2_hash p (ω i)) as)))"
proof -
  have "sketch =  fold (λa state. state  f2_update a) as (f2_init δ ε n)"
    by (simp add:sketch_def)
  also have "... = Ω  (λω. return_pmf (s1, s2, p, ω,
      λi  {..<s1} × {..<s2}. sum_list (map (f2_hash p (ω i)) as)))"
  proof (induction as rule:rev_induct)
    case Nil
    then show ?case
      by (simp add:s1_def s2_def space_def p_def[symmetric] Ω_def restrict_def Let_def)
  next
    case (snoc a as)
    have "fold (λa state. state  f2_update a) (as @ [a]) (f2_init δ ε n) = Ω 
      (λω. return_pmf (s1, s2, p, ω, λs  {..<s1} × {..<s2}. (x  as.  f2_hash p (ω s) x))  f2_update a)"
      using snoc by (simp add: bind_assoc_pmf restrict_def del:f2_hash.simps f2_init.simps)
    also have "... =  Ω  (λω. return_pmf (s1, s2, p, ω, λi  {..<s1} × {..<s2}. (x  as@[a].  f2_hash p (ω i) x)))"
      by (subst bind_return_pmf) (simp add: add.commute del:f2_hash.simps cong:restrict_cong)
    finally show ?case by blast
  qed
  finally show ?thesis by auto
qed

lemma distr:  "result = map_pmf result_rv Ω"
proof -
  have "result = sketch  f2_result"
    by (simp add:result_def sketch_def)
  also have "... = Ω  (λx. f2_result (s1, s2, p, x, λi{..<s1} × {..<s2}. sum_list (map (f2_hash p (x i)) as)))"
    by (simp add: mean_rv_alg_sketch  bind_assoc_pmf bind_return_pmf)
  also have "... = map_pmf result_rv Ω"
    by (simp add:map_pmf_def result_rv_def mean_rv_def sketch_rv_def lessThan_atLeast0 cong:restrict_cong)
  finally show ?thesis by simp
qed

private lemma f2_hash_pow_exp:
  assumes "k < p"
  shows
    "expectation (λω. real_of_int (f2_hash p ω k) ^m) =
     ((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p)"
proof -

  have "odd p" using p_prime p_ge_3 prime_odd_nat assms by simp
  then obtain t where t_def: "p=2*t+1"
    using oddE by blast

  have "Collect even  {..<2 * t + 1}  (*) 2 ` {..<t + 1}"
    by (rule in_image_by_witness[where g="λx. x div 2"], simp, linarith)
  moreover have " (*) 2 ` {..<t + 1}  Collect even  {..<2 * t + 1}"
    by (rule image_subsetI, simp)
  ultimately have "card ({k. even k}  {..<p}) = card ((λx. 2*x) ` {..<t+1})"
    unfolding t_def using order_antisym by metis
  also have "... = card {..<t+1}"
    by (rule card_image, simp add: inj_on_mult)
  also have "... =  t+1" by simp
  finally have card_even: "card ({k. even k}  {..<p}) = t+1" by simp
  hence "card ({k. even k}  {..<p}) * 2 = (p+1)" by (simp add:t_def)
  hence prob_even: "prob {ω. hash k ω  Collect even} = (real p + 1)/(2*real p)"
    using assms by (subst prob_range, auto simp:frac_eq_eq p_gt_0 mod_ring_def)

  have "p = card {..<p}" by simp
  also have "... = card (({k. odd k}  {..<p})  ({k. even k}  {..<p}))"
    by (rule arg_cong[where f="card"], auto)
  also have "... = card ({k. odd k}  {..<p}) +  card ({k. even k}  {..<p})"
    by (rule card_Un_disjoint, simp, simp, blast)
  also have "... = card ({k. odd k}  {..<p}) + t+1"
    by (simp add:card_even)
  finally have "p = card ({k. odd k}  {..<p}) + t+1"
    by simp
  hence "card ({k. odd k}  {..<p}) * 2 = (p-1)"
    by (simp add:t_def)
  hence prob_odd: "prob {ω. hash k ω  Collect odd} = (real p - 1)/(2*real p)"
    using assms by (subst prob_range, auto simp add: frac_eq_eq mod_ring_def)

  have "expectation (λx. real_of_int (f2_hash p x k) ^ m) =
    expectation (λω. indicator {ω. even (hash k ω)} ω * (real p - 1)^m +
      indicator {ω. odd (hash k ω)} ω * (-real p - 1)^m)"
    by (rule Bochner_Integration.integral_cong, simp, simp)
  also have "... =
     prob {ω. hash  k ω  Collect even}  * (real p - 1) ^ m  +
     prob {ω. hash  k ω  Collect odd}  * (-real p - 1) ^ m "
    by (simp, simp add:M_def)
  also have "... = (real p + 1) * (real p - 1) ^ m / (2 * real p) + (real p - 1) * (- real p - 1) ^ m / (2 * real p)"
    by (subst prob_even, subst prob_odd, simp)
  also have "... =
    ((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p)"
    by (simp add:add_divide_distrib ac_simps)
  finally show "expectation (λx. real_of_int (f2_hash p x k) ^ m) =
    ((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p)" by simp
qed

lemma
  shows var_sketch_rv:"variance sketch_rv  2*(real_of_rat (F 2 as)^2) * ((real p)2-1)2" (is "?A")
  and exp_sketch_rv:"expectation sketch_rv = real_of_rat (F 2 as) * ((real p)2-1)" (is "?B")
proof -
  define h where "h = (λω x. real_of_int (f2_hash p ω x))"
  define c where "c = (λx. real (count_list as x))"
  define r where "r = (λ(m::nat). ((real p - 1) ^ m * (real p + 1) + (- real p - 1) ^ m * (real p - 1)) / (2 * real p))"
  define h_prod where "h_prod = (λas ω. prod_list (map (h ω) as))"

  define exp_h_prod :: "nat list  real" where "exp_h_prod = (λas. (i  set as. r (count_list as i)))"

  have f_eq: "sketch_rv = (λω. (x  set as. c x * h ω x)^2)"
    by (rule ext, simp add:sketch_rv_def c_def h_def sum_list_eval del:f2_hash.simps)

  have r_one: "r (Suc 0) = 0"
    by (simp add:r_def algebra_simps)

  have r_two: "r 2 = (real p^2-1)"
    using p_gt_0 unfolding r_def power2_eq_square
    by (simp add:nonzero_divide_eq_eq, simp add:algebra_simps)

  have"(real p)^2  2^2"
    by (rule power_mono, use p_gt_1 in linarith, simp)
  hence p_square_ge_4: "(real p)2  4" by simp

  have "r 4 = (real p)^4+2*(real p)2 - 3"
    using p_gt_0 unfolding r_def
    by (subst nonzero_divide_eq_eq, auto simp:power4_eq_xxxx power2_eq_square algebra_simps)
  also have "...  (real p)^4+2*(real p)2 + 3"
    by simp
  also have "...  3 * r 2 * r 2"
    using p_square_ge_4
    by (simp add:r_two power4_eq_xxxx power2_eq_square algebra_simps mult_left_mono)
  finally have r_four_est: "r 4  3 * r 2 * r 2"  by simp

  have exp_h_prod_elim: "exp_h_prod = (λas. prod_list (map (r  count_list as) (remdups as)))"
    by (simp add:exp_h_prod_def prod.set_conv_list[symmetric])

  have exp_h_prod: "x. set x  set as  length x  4  expectation (h_prod x) = exp_h_prod x"
  proof -
    fix x
    assume "set x  set as"
    hence x_sub_p: "set x  {..<p}" using as_range p_ge_n by auto
    hence x_le_p: "k. k  set x  k < p" by auto
    assume "length x  4"
    hence card_x: "card (set x)  4" using card_length dual_order.trans by blast

    have "set x  carrier (mod_ring p) "
      using x_sub_p by (simp add:mod_ring_def)

    hence h_indep: "indep_vars (λ_. borel) (λi ω. h ω i ^ count_list x i) (set x)"
      using k_wise_indep_vars_subset[OF k_wise_indep] card_x as_range h_def
      by (auto intro:indep_vars_compose2[where X="hash" and M'=" (λ_. discrete)"])

    have "expectation (h_prod x) = expectation (λω.  i  set x. h ω i^(count_list x i))"
      by (simp add:h_prod_def prod_list_eval)
    also have "... = (i  set x. expectation (λω. h ω i^(count_list x i)))"
      by (simp add: indep_vars_lebesgue_integral[OF _ h_indep])
    also have "... = (i  set x. r (count_list x i))"
      using f2_hash_pow_exp x_le_p
      by (simp add:h_def r_def M_def[symmetric] del:f2_hash.simps)
    also have "... = exp_h_prod x"
      by (simp add:exp_h_prod_def)
    finally show "expectation (h_prod x) = exp_h_prod x" by simp
  qed

  have "x y. kernel_of x = kernel_of y  exp_h_prod x = exp_h_prod y"
  proof -
    fix x y :: "nat list"
    assume a:"kernel_of x = kernel_of y"
    then obtain f where b:"bij_betw f (set x) (set y)" and c:"z. z  set x  count_list x z = count_list y (f z)"
      using kernel_of_eq_imp_bij by blast
    have "exp_h_prod x = prod ( (λi. r(count_list y i))  f) (set x)"
      by (simp add:exp_h_prod_def c)
    also have "... = (i  f ` (set x). r(count_list y i))"
      by (metis b bij_betw_def prod.reindex)
    also have "... = exp_h_prod y"
      unfolding exp_h_prod_def
      by (rule prod.cong, metis b bij_betw_def) simp
    finally show "exp_h_prod x = exp_h_prod y" by simp
  qed

  hence exp_h_prod_cong: "p x. of_bool (kernel_of x = kernel_of p) * exp_h_prod p =
    of_bool (kernel_of x = kernel_of p) * exp_h_prod x"
    by (metis (full_types) of_bool_eq_0_iff vector_space_over_itself.scale_zero_left)

  have c:"(penum_rgfs n. of_bool (kernel_of xs = kernel_of p) * r) = r"
    if a:"length xs = n" for xs :: "nat list" and n and r :: real
  proof -
    have "(penum_rgfs n. of_bool (kernel_of xs = kernel_of p) * 1) = (1::real)"
      using equiv_rels_2[OF a[symmetric]] by (simp add:equiv_rels_def comp_def)
    thus "(penum_rgfs n. of_bool (kernel_of xs = kernel_of p) * r) = (r::real)"
      by (simp add:sum_list_mult_const)
  qed

  have "expectation sketch_rv = (iset as. (jset as. c i * c j * expectation (h_prod [i,j])))"
    by (simp add:f_eq h_prod_def power2_eq_square sum_distrib_left sum_distrib_right Bochner_Integration.integral_sum algebra_simps)
  also have "... = (iset as. (jset as. c i * c j * exp_h_prod [i,j]))"
    by (simp add:exp_h_prod)
  also have "... = (i  set as. (j  set as.
    c i * c j * (sum_list (map (λp. of_bool (kernel_of [i,j] = kernel_of p) * exp_h_prod p) (enum_rgfs 2)))))"
    by (subst exp_h_prod_cong, simp add:c)
  also have "... = (iset as. c i * c i * r 2)"
    by (simp add: numeral_eq_Suc kernel_of_eq All_less_Suc exp_h_prod_elim r_one distrib_left sum.distrib sum_collapse)
  also have "... = real_of_rat (F 2 as) * ((real p)^2-1)"
    by (simp add: sum_distrib_right[symmetric] c_def F_def power2_eq_square of_rat_sum of_rat_mult r_two)
  finally show b:?B by simp

  have "expectation (λx. (sketch_rv x)2) = (i1  set as. (i2  set as. (i3  set as. (i4  set as.
    c i1 * c i2 * c i3 * c i4 * expectation (h_prod [i1, i2, i3, i4])))))"
    by (simp add:f_eq h_prod_def power4_eq_xxxx sum_distrib_left sum_distrib_right Bochner_Integration.integral_sum algebra_simps)
  also have "... = (i1  set as. (i2  set as. (i3  set as. (i4  set as.
    c i1 * c i2 * c i3 * c i4 * exp_h_prod [i1,i2,i3,i4]))))"
    by (simp add:exp_h_prod)
  also have "... = (i1  set as. (i2  set as. (i3  set as. (i4  set as.
    c i1 * c i2 * c i3 * c i4 *
    (sum_list (map (λp. of_bool (kernel_of [i1,i2,i3,i4] = kernel_of p) * exp_h_prod p) (enum_rgfs 4)))))))"
    by (subst exp_h_prod_cong, simp add:c)
  also have "... =
    3 * (i  set as. (j  set as. c i^2 * c j^2 * r 2 * r 2)) + (( i  set as. c i^4 * r 4) - 3 *  ( i  set as. c i ^ 4 * r 2 * r 2))"
    apply (simp add: numeral_eq_Suc exp_h_prod_elim r_one) (* large intermediate terms *)
    apply (simp add: kernel_of_eq All_less_Suc numeral_eq_Suc distrib_left sum.distrib sum_collapse neq_commute of_bool_not_iff)
    apply (simp add: algebra_simps sum_subtractf sum_collapse)
    apply (simp add: sum_distrib_left algebra_simps)
    done
  also have "... = 3 * (i  set as. c i^2 * r 2)^2 + ( i  set as. c i ^ 4 * (r 4 - 3 * r 2 * r 2))"
    by (simp add:power2_eq_square sum_distrib_left algebra_simps sum_subtractf)
  also have "... = 3 * (i  set as. c i^2)^2 * (r 2)^2 + (i  set as. c i ^ 4 * (r 4 - 3 * r 2 * r 2))"
    by (simp add:power_mult_distrib sum_distrib_right[symmetric])
  also have "...  3 * (i  set as. c i^2)^2 * (r 2)^2 + (i  set as. c i ^ 4 * 0)"
    using r_four_est
    by (auto intro!: sum_nonpos simp add:mult_nonneg_nonpos)
  also have "... = 3 * (real_of_rat (F 2 as)^2) * ((real p)2-1)2"
    by (simp add:c_def r_two F_def of_rat_sum of_rat_power)
  finally have "expectation (λx. (sketch_rv x)2)  3 * (real_of_rat (F 2 as)^2) * ((real p)2-1)2"
    by simp

  thus "variance sketch_rv  2*(real_of_rat (F 2 as)^2) * ((real p)2-1)2"
     by (simp add: variance_eq, simp add:power_mult_distrib b)
qed

lemma space_omega_1 [simp]: "Sigma_Algebra.space Ωp = UNIV"
    by (simp add:Ωp_def)

interpretation Ω: prob_space "Ωp"
  by (simp add:Ωp_def prob_space_measure_pmf)

lemma integrable_Ω:
  fixes f :: "((nat × nat)  (nat list))  real"
  shows "integrable Ωp f"
  unfolding Ωp_def Ω_def
  by (rule integrable_measure_pmf_finite, auto intro:finite_PiE simp:set_prod_pmf)

lemma sketch_rv_exp:
  assumes "i2 < s2"
  assumes "i1  {0..<s1}"
  shows "Ω.expectation (λω. sketch_rv (ω (i1, i2))) = real_of_rat (F 2 as) * ((real p)2 - 1)"
proof -
  have "Ω.expectation (λω.  (sketch_rv (ω (i1, i2))) :: real) = expectation sketch_rv"
    using integrable_Ω integrable_M assms
    unfolding Ω_def Ωp_def M_def
    by (subst expectation_Pi_pmf_slice, auto)
  also have "... = (real_of_rat (F 2 as)) * ((real p)2 - 1)"
    using exp_sketch_rv by simp
  finally show ?thesis by simp
qed

lemma sketch_rv_var:
  assumes "i2 < s2"
  assumes "i1  {0..<s1}"
  shows "Ω.variance (λω. sketch_rv (ω (i1, i2)))  2 * (real_of_rat (F 2 as))2 * ((real p)2 - 1)2"
proof -
  have "Ω.variance (λω. (sketch_rv (ω (i1, i2)) :: real)) = variance sketch_rv"
    using integrable_Ω integrable_M assms
    unfolding Ω_def Ωp_def M_def
    by (subst variance_prod_pmf_slice, auto)
  also have "...   2 * (real_of_rat (F 2 as))2 * ((real p)2 - 1)2"
    using var_sketch_rv by simp
  finally show ?thesis by simp
qed

lemma mean_rv_exp:
  assumes "i < s2"
  shows "Ω.expectation (λω. mean_rv ω i) = real_of_rat (F 2 as)"
proof -
  have a:"(real p)2 > 1" using p_gt_1 by simp

  have "Ω.expectation (λω. mean_rv ω i) = (i1 = 0..<s1. Ω.expectation (λω. sketch_rv (ω (i1, i)))) / (((real p)2 - 1) * real s1)"
    using assms integrable_Ω by (simp add:mean_rv_def)
  also have "... = (i1 = 0..<s1. real_of_rat (F 2 as) * ((real p)2 - 1)) / (((real p)2 - 1) * real s1)"
    using sketch_rv_exp[OF assms] by simp
  also have "... = real_of_rat (F 2 as)"
    using s1_gt_0 a by simp
  finally show ?thesis by simp
qed

lemma mean_rv_var:
  assumes "i < s2"
  shows "Ω.variance (λω. mean_rv ω i)  (real_of_rat (δ * F 2 as))2 / 3"
proof -
  have a: "Ω.indep_vars (λ_. borel) (λi1 x. sketch_rv (x (i1, i))) {0..<s1}"
    using assms
    unfolding Ωp_def Ω_def
    by (intro indep_vars_restrict_intro'[where f="fst"])
     (auto simp add: restrict_dfl_def case_prod_beta lessThan_atLeast0)

  have p_sq_ne_1: "(real p)^2  1"
    by (metis p_gt_1 less_numeral_extra(4) of_nat_power one_less_power pos2 semiring_char_0_class.of_nat_eq_1_iff)

  have s1_bound: " 6 / (real_of_rat δ)2  real s1"
    unfolding s1_def
    by  (metis (mono_tags, opaque_lifting) of_rat_ceiling of_rat_divide of_rat_numeral_eq of_rat_power real_nat_ceiling_ge)

  have "Ω.variance (λω. mean_rv ω i) = Ω.variance (λω. i1 = 0..<s1. sketch_rv (ω (i1, i))) / (((real p)2 - 1) * real s1)2"
    unfolding mean_rv_def by (subst Ω.variance_divide[OF integrable_Ω], simp)
  also have "... = (i1 = 0..<s1. Ω.variance (λω. sketch_rv (ω (i1, i)))) / (((real p)2 - 1) * real s1)2"
    by (subst Ω.bienaymes_identity_full_indep[OF _ _ integrable_Ω a]) (auto simp: Ω_def Ωp_def)
  also have "...   (i1 = 0..<s1. 2*(real_of_rat (F 2 as)^2) * ((real p)2-1)2)  / (((real p)2 - 1) * real s1)2"
    by (rule divide_right_mono, rule sum_mono[OF sketch_rv_var[OF assms]], auto)
  also have "... = 2 * (real_of_rat (F 2 as)^2) / real s1"
    using p_sq_ne_1 s1_gt_0 by (subst frac_eq_eq, auto simp:power2_eq_square)
  also have "...  2 * (real_of_rat (F 2 as)^2) / (6 / (real_of_rat δ)2)"
    using  s1_gt_0 δ_range by (intro divide_left_mono mult_pos_pos s1_bound) auto
  also have "... = (real_of_rat (δ * F 2 as))2 / 3"
    by (simp add:of_rat_mult algebra_simps)
  finally show ?thesis by simp
qed

lemma mean_rv_bounds:
  assumes "i < s2"
  shows "Ω.prob {ω. real_of_rat δ * real_of_rat (F 2 as) < ¦mean_rv ω i - real_of_rat (F 2 as)¦}  1/3"
proof (cases "as = []")
  case True
  then show ?thesis
    using assms by (subst mean_rv_def, subst sketch_rv_def, simp add:F_def)
next
  case False
  hence "F 2 as > 0" using F_gr_0 by auto

  hence a: "0 < real_of_rat (δ * F 2 as)"
    using δ_range by simp
  have [simp]: "(λω. mean_rv ω i)  borel_measurable Ωp"
    by (simp add:Ω_def Ωp_def)
  have "Ω.prob {ω. real_of_rat δ * real_of_rat (F 2 as) < ¦mean_rv ω i - real_of_rat (F 2 as)¦} 
      Ω.prob {ω. real_of_rat (δ * F 2 as)  ¦mean_rv ω i - real_of_rat (F 2 as)¦}"
    by (rule Ω.pmf_mono[OF Ωp_def], simp add:of_rat_mult)
  also have "...   Ω.variance (λω. mean_rv ω i) / (real_of_rat (δ * F 2 as))2"
    using Ω.Chebyshev_inequality[where a="real_of_rat (δ * F 2 as)" and f="λω. mean_rv ω i",simplified]
      a prob_space_measure_pmf[where p="Ω"] mean_rv_exp[OF assms] integrable_Ω by simp
  also have "...  ((real_of_rat (δ * F 2 as))2/3) / (real_of_rat (δ * F 2 as))2"
    by (rule divide_right_mono, rule mean_rv_var[OF assms], simp)
  also  have "... = 1/3" using a by force
  finally show ?thesis by blast
qed

lemma f2_alg_correct':
   "𝒫(ω in measure_pmf result. ¦ω - F 2 as¦  δ * F 2 as)  1-of_rat ε"
proof -
  have a: "Ω.indep_vars (λ_. borel) (λi ω. mean_rv ω i) {0..<s2}"
    using s1_gt_0 unfolding Ωp_def Ω_def
    by (intro indep_vars_restrict_intro'[where f="snd"])
      (auto simp: Ωp_def Ω_def mean_rv_def restrict_dfl_def)

  have b: "- 18 * ln (real_of_rat ε)  real s2"
    unfolding  s2_def using of_nat_ceiling by auto

  have "1 - of_rat ε  Ω.prob {ω.  ¦median s2 (mean_rv ω) -  real_of_rat (F 2 as) ¦  of_rat δ * of_rat (F 2 as)}"
    using ε_range Ω.median_bound_2[OF _ a b, where δ="real_of_rat δ * real_of_rat (F 2 as)"
        and μ="real_of_rat (F 2 as)"] mean_rv_bounds
    by simp
  also have "... = Ω.prob {ω.  ¦real_of_rat (result_rv ω) - of_rat (F 2 as) ¦  of_rat δ * of_rat (F 2 as)}"
    by (simp add:result_rv_def median_restrict lessThan_atLeast0 median_rat[OF s2_gt_0]
         mean_rv_def sketch_rv_def of_rat_divide of_rat_sum of_rat_mult of_rat_diff of_rat_power)
  also have "... = Ω.prob {ω. ¦result_rv ω - F 2 as¦  δ * F 2 as} "
    by (simp add:of_rat_less_eq of_rat_mult[symmetric]  of_rat_diff[symmetric] set_eq_iff)
  finally have "Ω.prob {y. ¦result_rv y - F 2 as¦  δ * F 2 as}  1-of_rat ε " by simp
  thus ?thesis by (simp add: distr Ωp_def)
qed

lemma f2_exact_space_usage':
   "AE ω in sketch . bit_count (encode_f2_state ω)  f2_space_usage (n, length as, ε, δ)"
proof -
  have "p  2 * max n 3 + 2"
    by (subst p_def, rule prime_above_upper_bound)
  also have "...  2 * n + 8"
    by (cases "n  2", simp_all)
  finally have p_bound: "p  2 * n + 8"
    by simp
  have "bit_count (Ne p)  ereal (2 * log 2 (real p + 1) + 1)"
    by (rule exp_golomb_bit_count)
  also have "...  ereal (2 * log 2 (2 * real n + 9) + 1)"
    using p_bound by simp
  finally have p_bit_count: "bit_count (Ne p)  ereal (2 * log 2 (2 * real n + 9) + 1)"
    by simp

  have a: "bit_count (encode_f2_state (s1, s2, p, y, λi{..<s1} × {..<s2}.
      sum_list (map (f2_hash p (y i)) as)))  ereal (f2_space_usage (n, length as, ε, δ))"
    if a:"y{..<s1} × {..<s2} E bounded_degree_polynomials (mod_ring p) 4" for y
  proof -
    have "y  extensional ({..<s1} × {..<s2})" using a PiE_iff by blast
    hence y_ext: "y  extensional (set (List.product [0..<s1] [0..<s2]))"
      by (simp add:lessThan_atLeast0)

    have h_bit_count_aux: "bit_count (Pe p 4 (y x))  ereal (4 + 4 * log 2 (8 + 2 * real n))"
      if b:"x   set (List.product [0..<s1] [0..<s2])" for x
    proof -
      have "y x  bounded_degree_polynomials (mod_ring p) 4"
        using b a by force
      hence "bit_count (Pe p 4 (y x))  ereal (real 4 * (log 2 (real p) + 1))"
        by (rule bounded_degree_polynomial_bit_count[OF p_gt_1] )
      also have "...  ereal (real 4 * (log 2 (8 + 2 * real n) + 1) )"
        using p_gt_0 p_bound by simp
      also have "...  ereal (4 + 4 * log 2 (8 + 2 * real n))"
        by simp
      finally show ?thesis
        by blast
    qed

    have h_bit_count:
      "bit_count ((List.product [0..<s1] [0..<s2] e Pe p 4) y)  ereal (real s1 * real s2 * (4 + 4 * log 2 (8 + 2 * real n)))"
      using fun_bit_count_est[where e="Pe p 4", OF y_ext h_bit_count_aux]
      by simp

    have sketch_bit_count_aux:
      "bit_count (Ie (sum_list (map (f2_hash p (y x)) as)))  ereal (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1))" (is "?lhs  ?rhs")
      if " x  {0..<s1} × {0..<s2}" for x
    proof -
      have "¦sum_list (map (f2_hash p (y x)) as)¦  sum_list (map (abs  (f2_hash p (y x))) as)"
        by (subst map_map[symmetric])  (rule sum_list_abs)
      also have "...   sum_list (map (λ_. (int p+1)) as)"
        by (rule sum_list_mono) (simp add:p_gt_0)
      also have "... = int (length as) * (int p+1)"
        by (simp add: sum_list_triv)
      also have "...  int (length as) * (9+2*(int n))"
        using p_bound by (intro mult_mono, auto)
      finally  have "¦sum_list (map (f2_hash p (y x)) as)¦  int (length as) * (9 + 2 * int n)" by simp
      hence "?lhs  ereal (2 * log 2 (real_of_int (2* (int (length as) * (9 + 2 * int n)) + 1)) + 1)"
        by (rule int_bit_count_est)
      also have "... = ?rhs" by (simp add:algebra_simps)
      finally show "?thesis" by simp
    qed

    have
      "bit_count ((List.product [0..<s1] [0..<s2] e Ie) (λi{..<s1} × {..<s2}. sum_list (map (f2_hash p (y i)) as)))
       ereal (real (length (List.product [0..<s1] [0..<s2]))) * (ereal (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1)))"
      by (intro fun_bit_count_est)
       (simp_all add:extensional_def lessThan_atLeast0 sketch_bit_count_aux del:f2_hash.simps)
    also have "... = ereal (real s1 * real s2 * (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1)))"
      by simp
    finally have sketch_bit_count:
       "bit_count ((List.product [0..<s1] [0..<s2] e Ie) (λi{..<s1} × {..<s2}. sum_list (map (f2_hash p (y i)) as))) 
      ereal (real s1 * real s2 * (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1)))" by simp

    have "bit_count (encode_f2_state (s1, s2, p, y, λi{..<s1} × {..<s2}. sum_list (map (f2_hash p (y i)) as))) 
      bit_count (Ne s1) + bit_count (Ne s2) +bit_count (Ne p) +
      bit_count ((List.product [0..<s1] [0..<s2] e Pe p 4) y) +
      bit_count ((List.product [0..<s1] [0..<s2] e Ie) (λi{..<s1} × {..<s2}. sum_list (map (f2_hash p (y i)) as)))"
      by (simp add:Let_def s1_def s2_def encode_f2_state_def dependent_bit_count add.assoc)
    also have "...  ereal (2 * log 2 (real s1 + 1) + 1) + ereal (2 * log 2 (real s2 + 1) + 1) + ereal (2 * log 2 (2 * real n + 9) + 1) +
      (ereal (real s1 * real s2) * (4 + 4 * log 2 (8 + 2 * real n))) +
      (ereal (real s1 * real s2) * (1 + 2 * log 2 (real (length as) * (18 + 4 * real n) + 1) ))"
      by (intro add_mono exp_golomb_bit_count p_bit_count, auto intro: h_bit_count sketch_bit_count)
    also have "... = ereal (f2_space_usage (n, length as, ε, δ))"
      by (simp add:distrib_left add.commute s1_def[symmetric] s2_def[symmetric] Let_def)
    finally show "bit_count (encode_f2_state (s1, s2, p, y, λi{..<s1} × {..<s2}. sum_list (map (f2_hash p (y i)) as))) 
      ereal (f2_space_usage (n, length as, ε, δ))"
      by simp
  qed

  have "set_pmf Ω = {..<s1} × {..<s2} E bounded_degree_polynomials (mod_ring p) 4"
    by (simp add: Ω_def set_prod_pmf) (simp add: space_def)
  thus ?thesis
    by (simp  add:mean_rv_alg_sketch AE_measure_pmf_iff del:f2_space_usage.simps, metis a)
qed

end


text ‹Main results of this section:›

theorem f2_alg_correct:
  assumes "ε  {0<..<1}"
  assumes "δ > 0"
  assumes "set as  {..<n}"
  defines "Ω  fold (λa state. state  f2_update a) as (f2_init δ ε n)  f2_result"
  shows "𝒫(ω in measure_pmf Ω. ¦ω - F 2 as¦  δ * F 2 as)  1-of_rat ε"
  using f2_alg_correct'[OF assms(1,2,3)] Ω_def by auto

theorem f2_exact_space_usage:
  assumes "ε  {0<..<1}"
  assumes "δ > 0"
  assumes "set as  {..<n}"
  defines "M  fold (λa state. state  f2_update a) as (f2_init δ ε n)"
  shows "AE ω in M. bit_count (encode_f2_state ω)  f2_space_usage (n, length as, ε, δ)"
  using f2_exact_space_usage'[OF assms(1,2,3)]
  by (subst (asm) sketch_def[OF assms(1,2,3)], subst M_def, simp)

theorem f2_asymptotic_space_complexity:
  "f2_space_usage  O[at_top ×F at_top ×F at_right 0 ×F at_right 0](λ (n, m, ε, δ).
  (ln (1 / of_rat ε)) / (of_rat δ)2 * (ln (real n) + ln (real m)))"
  (is "_  O[?F](?rhs)")
proof -
  define n_of :: "nat × nat × rat × rat  nat" where "n_of = (λ(n, m, ε, δ). n)"
  define m_of :: "nat × nat × rat × rat  nat" where "m_of = (λ(n, m, ε, δ). m)"
  define ε_of :: "nat × nat × rat × rat  rat" where "ε_of = (λ(n, m, ε, δ). ε)"
  define δ_of :: "nat × nat × rat × rat  rat" where "δ_of = (λ(n, m, ε, δ). δ)"

  define g where "g = (λx. (1/ (of_rat (δ_of x))2) * (ln (1 / of_rat (ε_of x))) * (ln (real (n_of x)) + ln (real (m_of x))))"

  have evt: "(x.
    0 < real_of_rat (δ_of x)  0 < real_of_rat (ε_of x) 
    1/real_of_rat (δ_of x)  δ  1/real_of_rat (ε_of x)  ε 
    real (n_of x)  n  real (m_of x)  m P x)
     eventually P ?F"  (is "(x. ?prem x  _)  _")
    for δ ε n m P
    apply (rule eventually_mono[where P="?prem" and Q="P"])
    apply (simp add:ε_of_def case_prod_beta' δ_of_def n_of_def m_of_def)
     apply (intro eventually_conj eventually_prod1' eventually_prod2'
        sequentially_inf eventually_at_right_less inv_at_right_0_inf)
    by (auto simp add:prod_filter_eq_bot)

  have unit_1: "(λ_. 1)  O[?F](λx. 1 / (real_of_rat (δ_of x))2)"
    using one_le_power
    by (intro landau_o.big_mono evt[where δ="1"], auto simp add:power_one_over[symmetric])

  have unit_2: "(λ_. 1)  O[?F](λx. ln (1 / real_of_rat (ε_of x)))"
    by (intro landau_o.big_mono  evt[where ε="exp 1"])
     (auto intro!:iffD2[OF ln_ge_iff] simp add:abs_ge_iff)

  have unit_3: "(λ_. 1)  O[?F](λx. real (n_of x))"
    by (intro landau_o.big_mono evt, auto)

  have unit_4: "(λ_. 1)  O[?F](λx. real (m_of x))"
    by (intro landau_o.big_mono evt, auto)

  have unit_5: "(λ_. 1)  O[?F](λx. ln (real (n_of x)))"
    by (auto intro!: landau_o.big_mono evt[where n="exp 1"])
      (metis abs_ge_self linorder_not_le ln_ge_iff not_exp_le_zero order.trans)

  have unit_6: "(λ_. 1)  O[?F](λx. ln (real (n_of x)) + ln (real (m_of x)))"
    by (intro landau_sum_1 evt unit_5 iffD2[OF ln_ge_iff], auto)

  have unit_7: "(λ_. 1)  O[?F](λx. 1 / real_of_rat (ε_of x))"
    by (intro landau_o.big_mono  evt[where ε="1"], auto)

  have unit_8: "(λ_. 1)  O[?F](g)"
    unfolding g_def by (intro landau_o.big_mult_1 unit_1 unit_2 unit_6)

  have unit_9: "(λ_. 1)  O[?F](λx. real (n_of x) * real (m_of x))"
    by (intro landau_o.big_mult_1 unit_3 unit_4)

  have " (λx. 6 * (1 / (real_of_rat (δ_of x))2))  O[?F](λx. 1 / (real_of_rat (δ_of x))2)"
    by (subst landau_o.big.cmult_in_iff, simp_all)
  hence l1: "(λx. real (nat 6 / (δ_of x)2))  O[?F](λx. 1 / (real_of_rat (δ_of x))2)"
    by (intro landau_real_nat  landau_rat_ceil[OF unit_1]) (simp_all add:of_rat_divide of_rat_power)

  have "(λx. - ( ln (real_of_rat (ε_of x))))  O[?F](λx. ln (1 / real_of_rat (ε_of x)))"
    by (intro landau_o.big_mono evt) (subst ln_div, auto)
  hence l2: "(λx. real (nat - (18 * ln (real_of_rat (ε_of x)))))  O[?F](λx. ln (1 / real_of_rat (ε_of x)))"
    by (intro landau_real_nat landau_ceil[OF unit_2], simp)

  have l3_aux: " (λx. real (m_of x) * (18 + 4 * real (n_of x)) + 1)  O[?F](λx. real (n_of x) * real (m_of x))"
    by (rule sum_in_bigo[OF _unit_9], subst mult.commute)
      (intro landau_o.mult sum_in_bigo, auto simp:unit_3)

  note of_nat_int_ceiling [simp del]

  have "(λx. ln (real (m_of x) * (18 + 4 * real (n_of x)) + 1))  O[?F](λx. ln (real (n_of x) * real (m_of x)))"
     apply (rule landau_ln_2[where a="2"], simp, simp)
      apply (rule evt[where m="2" and n="1"])
     apply (metis dual_order.trans mult_left_mono mult_of_nat_commute of_nat_0_le_iff verit_prod_simplify(1))
    using l3_aux by simp
  also have "(λx. ln (real (n_of x) * real (m_of x)))  O[?F](λx. ln (real (n_of x)) + ln(real (m_of x)))"
    by (intro landau_o.big_mono evt[where m="1" and n="1"], auto simp add:ln_mult)
  finally have l3: "(λx. ln (real (m_of x) * (18 + 4 * real (n_of x)) + 1))  O[?F](λx. ln (real (n_of x)) + ln (real (m_of x)))"
    using  landau_o.big_trans by simp

  have l4: "(λx. ln (8 + 2 * real (n_of x)))  O[?F](λx. ln (real (n_of x)) + ln (real (m_of x)))"
    by (intro landau_sum_1  evt[where n="2"] landau_ln_2[where a="2"] iffD2[OF ln_ge_iff])
     (auto intro!: sum_in_bigo simp add:unit_3)

  have l5: "(λx. ln (9 + 2 * real (n_of x)))  O[?F](λx. ln (real (n_of x)) + ln (real (m_of x)))"
    by (intro landau_sum_1  evt[where n="2"] landau_ln_2[where a="2"] iffD2[OF ln_ge_iff])
     (auto intro!: sum_in_bigo simp add:unit_3)

  have l6: "(λx. ln (real (nat 6 / (δ_of x)2) + 1))  O[?F](g)"
    unfolding g_def
    by (intro landau_o.big_mult_1 landau_ln_3 sum_in_bigo unit_6 unit_2 l1 unit_1, simp)

  have l7: "(λx. ln (9 + 2 * real (n_of x)))  O[?F](g)"
    unfolding g_def
    by (intro landau_o.big_mult_1' unit_1 unit_2 l5)

  have l8: "(λx. ln (real (nat - (18 * ln (real_of_rat (ε_of x)))) + 1) )  O[?F](g)"
    unfolding g_def
    by (intro landau_o.big_mult_1 unit_6 landau_o.big_mult_1' unit_1 landau_ln_3 sum_in_bigo l2 unit_2) simp

  have l9: "(λx. 5 + 4 * ln (8 + 2 * real (n_of x)) / ln 2 + 2 * ln (real (m_of x) * (18 + 4 * real (n_of x)) + 1) / ln 2)
       O[?F](λx. ln (real (n_of x)) + ln (real (m_of x)))"
    by (intro sum_in_bigo, auto simp: l3 l4 unit_6)

  have l10: "(λx. real (nat 6 / (δ_of x)2) * real (nat - (18 * ln (real_of_rat (ε_of x)))) *
      (5 + 4 * ln (8 + 2 * real (n_of x)) / ln 2 + 2 * ln(real (m_of x) * (18 + 4 * real (n_of x)) + 1) / ln 2))
       O[?F](g)"
    unfolding g_def by (intro landau_o.mult, auto simp: l1 l2 l9)

  have "f2_space_usage = (λx. f2_space_usage (n_of x, m_of x, ε_of x, δ_of x))"
    by (simp add:case_prod_beta' n_of_def ε_of_def δ_of_def m_of_def)
  also have "...  O[?F](g)"
    by (auto intro!:sum_in_bigo simp:Let_def log_def l6 l7 l8 l10 unit_8)
  also have "... = O[?F](?rhs)"
    by (simp add:case_prod_beta' g_def n_of_def ε_of_def δ_of_def m_of_def)
  finally show ?thesis by simp
qed

end