# Theory Frequency_Moment_0

section ‹Frequency Moment $0$\label{sec:f0}›

theory Frequency_Moment_0
imports
Frequency_Moments_Preliminary_Results
Median_Method.Median
K_Smallest
Universal_Hash_Families.Carter_Wegman_Hash_Family
Frequency_Moments
Landau_Ext
Product_PMF_Ext
Universal_Hash_Families.Field
begin

text ‹This section contains a formalization of a new algorithm for the zero-th frequency moment
inspired by ideas described in \<^cite>‹"baryossef2002"›.
It is a KMV-type ($k$-minimum value) algorithm with a rounding method and matches the space complexity
of the best algorithm described in \<^cite>‹"baryossef2002"›.

In addition to the Isabelle proof here, there is also an informal hand-written proof in
Appendix~\ref{sec:f0_proof}.›

type_synonym f0_state = "nat × nat × nat × nat × (nat ⇒ nat list) × (nat ⇒ float set)"

definition hash where "hash p = ring.hash (mod_ring p)"

fun f0_init :: "rat ⇒ rat ⇒ nat ⇒ f0_state pmf" where
"f0_init δ ε n =
do {
let s = nat ⌈-18 * ln (real_of_rat ε)⌉;
let t = nat ⌈80 / (real_of_rat δ)⇧2⌉;
let p = prime_above (max n 19);
let r = nat (4 * ⌈log 2 (1 / real_of_rat δ)⌉ + 23);
h ← prod_pmf {..<s} (λ_. pmf_of_set (bounded_degree_polynomials (mod_ring p) 2));
return_pmf (s, t, p, r, h, (λ_ ∈ {0..<s}. {}))
}"

fun f0_update :: "nat ⇒ f0_state ⇒ f0_state pmf" where
"f0_update x (s, t, p, r, h, sketch) =
return_pmf (s, t, p, r, h, λi ∈ {..<s}.
least t (insert (float_of (truncate_down r (hash p x (h i)))) (sketch i)))"

fun f0_result :: "f0_state ⇒ rat pmf" where
"f0_result (s, t, p, r, h, sketch) = return_pmf (median s (λi ∈ {..<s}.
(if card (sketch i) < t then of_nat (card (sketch i)) else
rat_of_nat t* rat_of_nat p / rat_of_float (Max (sketch i)))
))"

fun f0_space_usage :: "(nat × rat × rat) ⇒ real" where
"f0_space_usage (n, ε, δ) = (
let s = nat ⌈-18 * ln (real_of_rat ε)⌉ in
let r = nat (4 * ⌈log 2 (1 / real_of_rat δ)⌉ + 23) in
let t = nat ⌈80 / (real_of_rat δ)⇧2 ⌉ in
6 +
2 * log 2 (real s + 1) +
2 * log 2 (real t + 1) +
2 * log 2 (real n + 21) +
2 * log 2 (real r + 1) +
real s * (5 + 2 * log 2 (21 + real n) +
real t * (13 + 4 * r + 2 * log 2 (log 2 (real n + 13)))))"

definition encode_f0_state :: "f0_state ⇒ bool list option" where
"encode_f0_state =
N⇩e ⋈⇩e (λs.
N⇩e ×⇩e (
N⇩e ⋈⇩e (λp.
N⇩e ×⇩e (
([0..<s] →⇩e (P⇩e p 2)) ×⇩e
([0..<s] →⇩e (S⇩e F⇩e))))))"

lemma "inj_on encode_f0_state (dom encode_f0_state)"
proof -
have "is_encoding encode_f0_state"
unfolding encode_f0_state_def
by (intro dependent_encoding exp_golomb_encoding poly_encoding fun_encoding set_encoding float_encoding)
thus ?thesis  by (rule encoding_imp_inj)
qed

context
fixes ε δ :: rat
fixes n :: nat
fixes as :: "nat list"
fixes result
assumes ε_range: "ε ∈ {0<..<1}"
assumes δ_range: "δ ∈ {0<..<1}"
assumes as_range: "set as ⊆ {..<n}"
defines "result ≡ fold (λa state. state ⤜ f0_update a) as (f0_init δ ε n) ⤜ f0_result"
begin

private definition t where "t = nat ⌈80 / (real_of_rat δ)⇧2⌉"
private lemma t_gt_0: "t > 0" using δ_range by (simp add:t_def)

private definition s where "s = nat ⌈-(18 * ln (real_of_rat ε))⌉"
private lemma s_gt_0: "s > 0" using ε_range by (simp add:s_def)

private definition p where "p = prime_above (max n 19)"

private lemma p_prime:"Factorial_Ring.prime p"
using p_def prime_above_prime by presburger

private lemma p_ge_18: "p ≥ 18"
proof -
have "p ≥ 19"
by (metis p_def prime_above_lower_bound max.bounded_iff)
thus ?thesis by simp
qed

private lemma p_gt_0: "p > 0" using p_ge_18 by simp
private lemma p_gt_1: "p > 1" using p_ge_18 by simp

private lemma n_le_p: "n ≤ p"
proof -
have "n ≤ max n 19" by simp
also have "... ≤ p"
unfolding p_def by (rule prime_above_lower_bound)
finally show ?thesis by simp
qed

private lemma p_le_n: "p ≤ 2*n + 40"
proof -
have "p ≤ 2 * (max n 19) + 2"
by (subst p_def, rule prime_above_upper_bound)
also have "... ≤ 2 * n + 40"
by (cases "n ≥ 19", auto)
finally show ?thesis by simp
qed

private lemma as_lt_p: "⋀x. x ∈ set as ⟹ x < p"
using as_range atLeastLessThan_iff
by (intro order_less_le_trans[OF _ n_le_p]) blast

private lemma as_subset_p: "set as ⊆ {..<p}"
using as_lt_p  by (simp add: subset_iff)

private definition r where "r = nat (4 * ⌈log 2 (1 / real_of_rat δ)⌉ + 23)"

private lemma r_bound: "4 * log 2 (1 / real_of_rat δ) + 23 ≤ r"
proof -
have "0 ≤ log 2 (1 / real_of_rat δ)" using δ_range by simp
hence "0 ≤ ⌈log 2 (1 / real_of_rat δ)⌉" by simp
hence "0 ≤ 4 * ⌈log 2 (1 / real_of_rat δ)⌉ + 23"
qed

private lemma r_ge_23: "r ≥ 23"
proof -
have "(23::real) = 0 + 23" by simp
also have "... ≤ 4 * log 2 (1 / real_of_rat δ) + 23"
using δ_range by (intro add_mono mult_nonneg_nonneg, auto)
also have "... ≤ r" using r_bound by simp
finally show "23 ≤ r" by simp
qed

private lemma two_pow_r_le_1: "0 < 1 - 2 powr - real r"
proof -
have a: "2 powr (0::real) = 1"
by simp
show ?thesis using r_ge_23
by (simp, subst a[symmetric], intro powr_less_mono, auto)
qed

interpretation carter_wegman_hash_family "mod_ring p" 2
rewrites "ring.hash (mod_ring p) = Frequency_Moment_0.hash p"
using carter_wegman_hash_familyI[OF mod_ring_is_field mod_ring_finite]
using hash_def p_prime by auto

private definition tr_hash where "tr_hash x ω = truncate_down r (hash x ω)"

private definition sketch_rv where
"sketch_rv ω = least t ((λx. float_of (tr_hash x ω))  set as)"

private definition estimate
where "estimate S = (if card S < t then of_nat (card S) else of_nat t * of_nat p / rat_of_float (Max S))"

private definition sketch_rv' where "sketch_rv' ω = least t ((λx. tr_hash x ω)  set as)"
private definition estimate' where "estimate' S = (if card S < t then real (card S) else real t * real p / Max S)"

private definition Ω⇩0 where "Ω⇩0 = prod_pmf {..<s} (λ_. pmf_of_set space)"

private lemma f0_alg_sketch:
defines "sketch ≡ fold (λa state. state ⤜ f0_update a) as (f0_init δ ε n)"
shows "sketch = map_pmf (λx. (s,t,p,r, x, λi ∈ {..<s}. sketch_rv (x i))) Ω⇩0"
unfolding sketch_rv_def
proof (subst sketch_def, induction as rule:rev_induct)
case Nil
then show ?case
by (simp add:s_def p_def[symmetric] map_pmf_def t_def r_def Let_def least_def restrict_def space_def Ω⇩0_def)
next
case (snoc x xs)
let ?sketch = "λω xs. least t ((λa. float_of (tr_hash a ω))  set xs)"
have "fold (λa state. state ⤜ f0_update a) (xs @ [x]) (f0_init δ ε n) =
(map_pmf (λω. (s, t, p, r, ω, λi ∈ {..<s}. ?sketch (ω i) xs)) Ω⇩0) ⤜ f0_update x"
by (simp add: restrict_def snoc del:f0_init.simps)
also have "... = Ω⇩0 ⤜ (λω. f0_update x (s, t, p, r, ω, λi∈{..<s}. ?sketch (ω i) xs)) "
by (simp add:map_pmf_def bind_assoc_pmf bind_return_pmf del:f0_update.simps)
also have "... = map_pmf (λω. (s, t, p, r, ω, λi∈{..<s}. ?sketch (ω i) (xs@[x]))) Ω⇩0"
by (simp add:least_insert map_pmf_def tr_hash_def cong:restrict_cong)
finally show ?case by blast
qed

private lemma card_nat_in_ball:
fixes x :: nat
fixes q :: real
assumes "q ≥ 0"
defines "A ≡ {k. abs (real x - real k) ≤ q ∧ k ≠ x}"
shows "real (card A) ≤ 2 * q" and "finite A"
proof -
have a: "of_nat x ∈ {⌈real x-q⌉..⌊real x+q⌋}"
using assms

have "card A = card (int  A)"
by (rule card_image[symmetric], simp)
also have "... ≤ card ({⌈real x-q⌉..⌊real x+q⌋} - {of_nat x})"
by (intro card_mono image_subsetI, simp_all add:A_def abs_le_iff, linarith)
also have "... = card {⌈real x-q⌉..⌊real x+q⌋} - 1"
by (rule card_Diff_singleton, rule a)
also have "... = int (card {⌈real x-q⌉..⌊real x+q⌋}) - int 1"
by (intro of_nat_diff)
(metis a card_0_eq empty_iff finite_atLeastAtMost_int less_one linorder_not_le)
also have "... ≤ ⌊q+real x⌋+1 -⌈real x-q⌉ - 1"
using assms by (simp, linarith)
also have "... ≤ 2*q"
by linarith
finally show "card A ≤ 2 * q"
by simp

have "A ⊆ {..x + nat ⌈q⌉}"
by (rule subsetI, simp add:A_def abs_le_iff, linarith)
thus "finite A"
by (rule finite_subset, simp)
qed

private lemma prob_degree_lt_1:
"prob {ω. degree ω < 1} ≤ 1/real p"
proof -
have "space ∩ {ω. length ω ≤ Suc 0} = bounded_degree_polynomials (mod_ring p) 1"
by (auto simp:set_eq_iff bounded_degree_polynomials_def space_def)
moreover have "field_size = p" by (simp add:mod_ring_def)
hence "real (card (bounded_degree_polynomials (mod_ring p) (Suc 0))) / real (card space) = 1 / real p"
ultimately show ?thesis
qed

private lemma collision_prob:
assumes "c ≥ 1"
shows "prob {ω. ∃x ∈ set as. ∃y ∈ set as. x ≠ y ∧ tr_hash x ω ≤ c ∧ tr_hash x ω = tr_hash y ω} ≤
(5/2) * (real (card (set as)))⇧2 * c⇧2 * 2 powr -(real r) / (real p)⇧2 + 1/real p" (is "prob {ω. ?l ω} ≤ ?r1 + ?r2")
proof -
define ρ :: real where "ρ = 9/8"

have rho_c_ge_0: "ρ * c ≥ 0" unfolding ρ_def using assms by simp

have c_ge_0: "c≥0" using assms by simp

have "degree ω ≥ 1 ⟹ ω ∈ space ⟹ degree ω = 1" for ω
(metis One_nat_def Suc_1 le_less_Suc_eq less_imp_diff_less list.size(3) pos2)

hence a: "⋀ω x y. x < p ⟹ y < p ⟹  x ≠ y ⟹ degree ω ≥ 1 ⟹ ω ∈ space ⟹  hash x ω ≠ hash y ω"
using inj_onD[OF inj_if_degree_1]  mod_ring_carr by blast

have b: "prob {ω. degree ω ≥ 1 ∧ tr_hash x ω ≤ c ∧ tr_hash x ω = tr_hash y ω} ≤ 5 * c⇧2 * 2 powr (-real r) /(real p)⇧2"
if b_assms: "x ∈ set as"  "y ∈ set as"  "x < y" for x y
proof -
have c: "real u ≤ ρ * c ∧ ¦real u - real v¦ ≤ ρ * c * 2 powr (-real r)"
if c_assms:"truncate_down r (real u) ≤ c" "truncate_down r (real u) = truncate_down r (real v)" for u v
proof -
have "9 * 2 powr - real r ≤ 9 * 2 powr (- real 23)"
using r_ge_23 by (intro mult_left_mono powr_mono, auto)

also have "... ≤ 1" by simp

finally have "9 * 2 powr - real r ≤ 1" by simp

hence "1 ≤ ρ * (1 - 2 powr (- real r))"

hence d: "(c*1) / (1 - 2 powr (-real r)) ≤ c * ρ"
using assms two_pow_r_le_1 by (simp add: pos_divide_le_eq)

have "⋀x. truncate_down r (real x) ≤ c ⟹ real x * (1 - 2 powr - real r) ≤ c * 1"
using  truncate_down_pos[OF of_nat_0_le_iff] order_trans by (simp, blast)

hence "⋀x. truncate_down r (real x) ≤  c  ⟹ real x ≤ c * ρ"
using two_pow_r_le_1 by (intro order_trans[OF _ d], simp add: pos_le_divide_eq)

hence e: "real u ≤ c * ρ" "real v ≤ c * ρ"
using c_assms by auto

have " ¦real u - real v¦ ≤ (max ¦real u¦ ¦real v¦) * 2 powr (-real r)"
using c_assms by (intro truncate_down_eq, simp)

also have "... ≤ (c * ρ) * 2 powr (-real r)"
using e by (intro mult_right_mono, auto)

finally have "¦real u - real v¦ ≤ ρ * c * 2 powr (-real r)"

thus ?thesis using e by (simp add:algebra_simps)
qed

have "prob {ω. degree ω ≥ 1 ∧ tr_hash x ω ≤ c ∧ tr_hash x ω = tr_hash y ω} ≤
prob (⋃ i ∈ {(u,v) ∈ {..<p} × {..<p}. u ≠ v ∧ truncate_down r u ≤ c ∧ truncate_down r u = truncate_down r v}.
{ω.  hash x ω = fst i ∧ hash y ω = snd i})"
using a by (intro pmf_mono[OF M_def], simp add:tr_hash_def)
(metis hash_range mod_ring_carr b_assms as_subset_p lessThan_iff nat_neq_iff subset_eq)

also have "... ≤ (∑ i∈ {(u,v) ∈ {..<p} × {..<p}. u ≠ v ∧
truncate_down r u ≤ c ∧ truncate_down r u = truncate_down r v}.
prob {ω. hash x ω = fst i ∧ hash  y ω = snd i})"
by (intro measure_UNION_le finite_cartesian_product finite_subset[where B="{0..<p} × {0..<p}"])

also have "... ≤ (∑ i∈ {(u,v) ∈ {..<p} × {..<p}. u ≠ v ∧
truncate_down r u ≤ c ∧ truncate_down r u = truncate_down r v}.
prob {ω. (∀u ∈ {x,y}. hash u ω = (if u = x then (fst i) else (snd i)))})"
by (intro sum_mono  pmf_mono[OF M_def]) force

also have "... ≤ (∑ i∈ {(u,v) ∈ {..<p} × {..<p}. u ≠ v ∧
truncate_down r u ≤ c ∧ truncate_down r u = truncate_down r v}. 1/(real p)⇧2)"
using assms as_subset_p b_assms
by (intro sum_mono, subst hash_prob)  (auto simp add: mod_ring_def power2_eq_square)

also have "... = 1/(real p)⇧2 *
card {(u,v) ∈ {0..<p} × {0..<p}. u ≠ v ∧ truncate_down r u ≤ c ∧ truncate_down r u = truncate_down r v}"
by simp

also have "... ≤ 1/(real p)⇧2 *
card {(u,v) ∈ {..<p} × {..<p}. u ≠ v ∧ real u ≤ ρ * c ∧ abs (real u - real v) ≤ ρ * c * 2 powr (-real r)}"
using c
by (intro mult_mono of_nat_mono card_mono finite_cartesian_product finite_subset[where B="{..<p}×{..<p}"])
auto

also have "... ≤ 1/(real p)⇧2 * card (⋃u' ∈ {u. u < p ∧ real u ≤ ρ * c}.
{(u::nat,v::nat). u = u' ∧ abs (real u - real v) ≤ ρ * c * 2 powr (-real r) ∧ v < p ∧ v ≠ u'})"
by (intro mult_left_mono of_nat_mono card_mono finite_cartesian_product finite_subset[where B="{..<p}×{..<p}"])
auto

also have "... ≤ 1/(real p)⇧2 * (∑ u' ∈ {u. u < p ∧ real u ≤ ρ * c}.
card  {(u,v). u = u' ∧ abs (real u - real v) ≤ ρ * c * 2 powr (-real r) ∧ v < p ∧ v ≠ u'})"
by (intro mult_left_mono of_nat_mono card_UN_le, auto)

also have "... = 1/(real p)⇧2 * (∑ u' ∈ {u. u < p ∧  real u ≤ ρ * c}.
card ((λx. (u' ,x))  {v. abs (real u' - real v) ≤ ρ * c * 2 powr (-real r) ∧ v < p ∧ v ≠ u'}))"
by (intro arg_cong2[where f="(*)"] arg_cong[where f="real"] sum.cong arg_cong[where f="card"])

also have "... ≤ 1/(real p)⇧2 * (∑ u' ∈ {u. u < p ∧ real u ≤ ρ * c}.
card {v. abs (real u' - real v) ≤ ρ * c * 2 powr (-real r) ∧ v < p ∧ v ≠ u'})"
by (intro mult_left_mono of_nat_mono sum_mono card_image_le, auto)

also have "... ≤ 1/(real p)⇧2 * (∑ u' ∈ {u. u < p ∧ real u ≤ ρ * c}.
card {v. abs (real u' - real v) ≤ ρ * c * 2 powr (-real r) ∧ v ≠ u'})"
by (intro mult_left_mono sum_mono of_nat_mono card_mono card_nat_in_ball subsetI)  auto

also have "... ≤ 1/(real p)⇧2 * (∑ u' ∈ {u. u < p ∧ real u ≤ ρ * c}.
real (card {v. abs (real u' - real v) ≤ ρ * c * 2 powr (-real r) ∧ v ≠ u'}))"
by simp

also have "... ≤ 1/(real p)⇧2 * (∑ u' ∈ {u. u < p ∧ real u ≤ ρ * c}. 2 * (ρ * c * 2 powr (-real r)))"
by (intro mult_left_mono sum_mono card_nat_in_ball(1), auto)

also have "... =  1/(real p)⇧2 * (real (card {u. u < p ∧ real u ≤ ρ * c}) * (2 * (ρ * c * 2 powr (-real r))))"
by simp

also have "... ≤  1/(real p)⇧2 * (real (card {u. u ≤ nat (⌊ρ * c ⌋)}) * (2 * (ρ * c * 2 powr (-real r))))"
using rho_c_ge_0 le_nat_floor
by (intro mult_left_mono mult_right_mono of_nat_mono card_mono subsetI) auto

also have "... ≤  1/(real p)⇧2 * ((1+ρ * c) * (2 * (ρ * c * 2 powr (-real r))))"
using rho_c_ge_0 by (intro mult_left_mono mult_right_mono, auto)

also have "... ≤  1/(real p)⇧2 * (((1+ρ) * c) * (2 * (ρ * c * 2 powr (-real r))))"
using assms by (intro mult_mono, auto simp add:distrib_left distrib_right ρ_def)

also have "... = (ρ * (2 + ρ * 2)) * c⇧2 * 2 powr (-real r) /(real p)⇧2"

also have "... ≤ 5 * c⇧2 *  2 powr (-real r) /(real p)⇧2"
by (intro divide_right_mono mult_right_mono) (auto simp add:ρ_def)

finally show ?thesis by simp
qed

have "prob {ω. ?l ω ∧ degree ω ≥ 1} ≤
prob (⋃ i ∈ {(x,y) ∈ (set as) × (set as). x < y}. {ω. degree ω ≥ 1 ∧ tr_hash (fst i) ω ≤ c ∧
tr_hash (fst i) ω = tr_hash (snd i) ω})"
by (rule pmf_mono[OF M_def], simp, metis linorder_neqE_nat)

also have "... ≤ (∑ i ∈ {(x,y) ∈ (set as) × (set as). x < y}. prob
{ω. degree ω ≥ 1 ∧ tr_hash  (fst i) ω ≤ c ∧ tr_hash (fst i) ω = tr_hash (snd i) ω})"
unfolding M_def
by (intro measure_UNION_le finite_cartesian_product finite_subset[where B="(set as) × (set as)"])
auto

also have "... ≤ (∑ i ∈ {(x,y) ∈ (set as) × (set as). x < y}. 5  * c⇧2 * 2 powr (-real r) /(real p)⇧2)"
using b by (intro sum_mono, simp add:case_prod_beta)

also have "... =  ((5/2) * c⇧2  * 2 powr (-real r) /(real p)⇧2) * (2 * card  {(x,y) ∈ (set as) × (set as). x < y})"
by simp

also have "... =  ((5/2) * c⇧2  * 2 powr (-real r) /(real p)⇧2) * (card (set as) * (card (set as) - 1))"
by (subst card_ordered_pairs, auto)

also have "... ≤ ((5/2) * c⇧2 * 2 powr (-real r) /(real p)⇧2) * (real (card (set as)))⇧2"
by (intro mult_left_mono) (auto simp add:power2_eq_square mult_left_mono)

also have "... = (5/2) * (real (card (set as)))⇧2 * c⇧2 * 2 powr (-real r) /(real p)⇧2"

finally have f:"prob {ω. ?l ω ∧ degree ω ≥ 1} ≤ ?r1" by simp

have "prob {ω. ?l ω} ≤ prob {ω. ?l ω ∧ degree ω ≥ 1} + prob {ω. degree ω < 1}"
also have "... ≤ ?r1 + ?r2"
finally show ?thesis by simp
qed

private lemma of_bool_square: "(of_bool x)⇧2 = ((of_bool x)::real)"
by (cases x, auto)

private definition Q where "Q y ω = card {x ∈ set as. int (hash x ω) < y}"

private definition m where "m = card (set as)"

private lemma
assumes "a ≥ 0"
assumes "a ≤ int p"
shows exp_Q: "expectation (λω. real (Q a ω)) = real m * (of_int a) / p"
and var_Q: "variance (λω. real (Q a ω)) ≤ real m * (of_int a) / p"
proof -
have exp_single: "expectation (λω. of_bool (int (hash x ω) < a)) = real_of_int a /real p"
if a:"x ∈ set as" for x
proof -
have x_le_p: "x < p" using a as_lt_p by simp
have "expectation (λω. of_bool (int (hash x ω) < a)) = expectation (indicat_real {ω. int (Frequency_Moment_0.hash p x ω) < a})"
by (intro arg_cong2[where f="integral⇧L"] ext, simp_all)
also have "... = prob {ω. hash x ω ∈ {k. int k < a}}"
also have "... = card ({k. int k < a} ∩ {..<p}) / real p"
by (subst prob_range, simp_all add: x_le_p mod_ring_def)
also have "... = card {..<nat a} / real p"
using assms by (intro arg_cong2[where f="(/)"] arg_cong[where f="real"] arg_cong[where f="card"])
also have "... =  real_of_int a/real p"
using assms by simp
finally show "expectation (λω. of_bool (int (hash x ω) < a)) = real_of_int a /real p"
by simp
qed

have "expectation(λω. real (Q a ω)) = expectation (λω. (∑x ∈ set as. of_bool (int (hash x ω) < a)))"
also have "... =  (∑x ∈ set as. expectation (λω. of_bool (int (hash x ω) < a)))"
by (rule Bochner_Integration.integral_sum, simp)
also have "... = (∑ x ∈ set as. a /real p)"
by (rule sum.cong, simp, subst exp_single, simp, simp)
also have "... = real m *  real_of_int a / real p"
finally show "expectation (λω. real (Q a ω)) = real m * real_of_int a / p" by simp

have indep: "J ⊆ set as ⟹ card J = 2 ⟹ indep_vars (λ_. borel) (λi x. of_bool (int (hash i x) < a)) J" for J
using as_subset_p mod_ring_carr
by (intro indep_vars_compose2[where Y="λi x. of_bool (int x < a)" and M'="λ_. discrete"]
k_wise_indep_vars_subset[OF k_wise_indep] finite_subset[OF _ finite_set]) auto

have rv: "⋀x. x ∈ set as ⟹ random_variable borel (λω. of_bool (int (hash x ω) < a))"

have "variance (λω. real (Q a ω)) = variance (λω. (∑x ∈ set as. of_bool (int (hash x ω) < a)))"
also have "... = (∑x ∈ set as. variance (λω. of_bool (int (hash x ω) < a)))"
by (intro var_sum_pairwise_indep_2 indep rv) auto
also have "... ≤ (∑ x ∈ set as. a / real p)"
also have "... = real m * real_of_int a /real p"
finally show "variance (λω. real (Q a ω)) ≤ real m * real_of_int a / p"
by simp
qed

private lemma t_bound: "t ≤ 81 / (real_of_rat δ)⇧2"
proof -
have "t ≤ 80 / (real_of_rat δ)⇧2 + 1" using t_def t_gt_0 by linarith
also have "... ≤ 80 / (real_of_rat δ)⇧2 + 1 /  (real_of_rat δ)⇧2"
also have "... = 81 / (real_of_rat δ)⇧2" by simp
finally show ?thesis by simp
qed

private lemma t_r_bound:
"18 * 40 * (real t)⇧2 * 2 powr (-real r) ≤ 1"
proof -
have "720 * (real t)⇧2 * 2 powr (-real r) ≤ 720 * (81 / (real_of_rat δ)⇧2)⇧2 * 2 powr (-4 * log 2 (1 / real_of_rat δ) - 23)"
using r_bound t_bound by (intro mult_left_mono mult_mono power_mono powr_mono, auto)

also have "... ≤ 720 * (81 / (real_of_rat δ)⇧2)⇧2 * (2 powr (-4 * log 2 (1 / real_of_rat δ)) * 2 powr (-23))"
using δ_range by (intro mult_left_mono mult_mono power_mono add_mono)

also have "... = 720 * (81⇧2 / (real_of_rat δ)^4) * (2 powr (log 2 ((real_of_rat δ)^4))  * 2 powr (-23))"
using δ_range by (intro arg_cong2[where f="(*)"])

also have "... = 720 * 81⇧2 * 2 powr (-23)" using δ_range by simp

also have "... ≤ 1" by simp

finally show ?thesis by simp
qed

private lemma m_eq_F_0: "real m = of_rat (F 0 as)"

private lemma estimate'_bounds:
"prob {ω. of_rat δ * real_of_rat (F 0 as) < ¦estimate' (sketch_rv' ω) - of_rat (F 0 as)¦} ≤ 1/3"
proof (cases "card (set as) ≥ t")
case True
define δ' where "δ' = 3 * real_of_rat δ / 4"
define u where "u = ⌈real t * p / (m * (1+δ'))⌉"
define v where "v = ⌊real t * p / (m * (1-δ'))⌋"

define has_no_collision where
"has_no_collision = (λω. ∀x∈ set as. ∀y ∈ set as. (tr_hash x ω = tr_hash y ω ⟶ x = y) ∨ tr_hash x ω > v)"

have "2 powr (-real r) ≤ 2 powr (-(4 * log 2 (1 / real_of_rat δ) + 23))"
using r_bound by (intro powr_mono, linarith, simp)
also have "... = 2 powr (-4 * log 2 (1 /real_of_rat δ) -23)"
by (rule arg_cong2[where f="(powr)"], auto simp add:algebra_simps)
also have "... ≤ 2 powr ( -1 * log 2 (1 /real_of_rat δ) -4)"
using δ_range by (intro powr_mono diff_mono, auto)
also have "... = 2 powr ( -1 * log 2 (1 /real_of_rat δ)) /  16"
also have "... = real_of_rat δ / 16"
also have "... < real_of_rat δ / 8"
using δ_range by (subst pos_divide_less_eq, auto)
finally have r_le_δ: "2 powr (-real r) < real_of_rat δ / 8"
by simp

have δ'_gt_0: "δ' > 0" using δ_range by (simp add:δ'_def)
have "δ' < 3/4" using δ_range by (simp add:δ'_def)+
also have "... < 1" by simp
finally have δ'_lt_1: "δ' < 1" by simp

have "t ≤ 81 / (real_of_rat δ)⇧2"
using t_bound by simp
also have "... = (81*9/16) / (δ')⇧2"
also have "... ≤ 46 / δ'⇧2"
by (intro divide_right_mono, simp, simp)
finally have t_le_δ': "t ≤ 46/ δ'⇧2" by simp

have "80 ≤ (real_of_rat δ)⇧2 * (80 / (real_of_rat δ)⇧2)" using δ_range by simp
also have "... ≤ (real_of_rat δ)⇧2 * t"
by (intro mult_left_mono, simp add:t_def of_nat_ceiling, simp)
finally have "80 ≤ (real_of_rat δ)⇧2 * t" by simp
hence t_ge_δ': "45 ≤ t * δ' * δ'" by (simp add:δ'_def power2_eq_square)

have "m ≤ card {..<n}" unfolding m_def using as_range by (intro card_mono, auto)
also have "... ≤ p" using n_le_p by simp
finally have m_le_p: "m ≤ p" by simp

hence t_le_m: "t ≤ card (set as)" using True by simp
have m_ge_0: "real m > 0" using m_def True t_gt_0 by simp

have "v ≤ real t * real p / (real m * (1 - δ'))" by (simp add:v_def)

also have "... ≤ real t * real p / (real m * (1/4))"
using δ'_lt_1 m_ge_0 δ_range
by (intro divide_left_mono mult_left_mono mult_nonneg_nonneg mult_pos_pos, simp_all add:δ'_def)

finally have v_ubound: "v ≤ 4 * real t * real p / real m" by (simp add:algebra_simps)

have a_ge_1: "u ≥ 1" using δ'_gt_0 p_gt_0 m_ge_0 t_gt_0
by (auto intro!:mult_pos_pos divide_pos_pos simp add:u_def)
hence a_ge_0: "u ≥ 0" by simp
have "real m * (1 - δ') < real m" using δ'_gt_0 m_ge_0 by simp
also have "... ≤ 1 * real p" using m_le_p by simp
also have "... ≤ real t * real p" using t_gt_0 by (intro mult_right_mono, auto)
finally have " real m * (1 - δ') < real t * real p" by simp
hence v_gt_0: "v > 0" using mult_pos_pos m_ge_0 δ'_lt_1 by (simp add:v_def)
hence v_ge_1: "real_of_int v ≥ 1" by linarith

have "real t ≤ real m" using True m_def by linarith
also have "... < (1 + δ') * real m" using δ'_gt_0 m_ge_0 by force
finally have a_le_p_aux: "real t < (1 + δ') * real m"  by simp

have "u ≤ real t * real p / (real m * (1 + δ'))+1" by (simp add:u_def)
also have "... < real p + 1"
using m_ge_0 δ'_gt_0 a_le_p_aux  a_le_p_aux p_gt_0
finally have "u ≤ real p"
by (metis int_less_real_le not_less of_int_le_iff of_int_of_nat_eq)
hence u_le_p: "u ≤ int p" by linarith

have "prob {ω. Q u ω ≥ t} ≤ prob {ω ∈ Sigma_Algebra.space M. abs (real (Q u ω) -
expectation (λω. real (Q u ω))) ≥ 3 * sqrt (m * real_of_int u / p)}"
proof (rule pmf_mono[OF M_def])
fix ω
assume "ω ∈ {ω. t ≤ Q u ω}"
hence t_le: "t ≤ Q u ω" by simp
have "real m * real_of_int u / real p ≤ real m * (real t * real p / (real m * (1 + δ'))+1) / real p"
using m_ge_0 p_gt_0 by (intro divide_right_mono mult_left_mono, simp_all add: u_def)
also have "... = real m * real t * real p / (real m * (1+δ') * real p) + real m / real p"
also have "... = real t / (1+δ') + real m / real p"
using p_gt_0 m_ge_0 by simp
also have "... ≤ real t / (1+δ') + 1"
using m_le_p p_gt_0 by (intro add_mono, auto)
finally have "real m * real_of_int u / real p ≤ real t / (1 + δ') + 1"
by simp

hence "3 * sqrt (real m * of_int u / real p) + real m * of_int u / real p ≤
3 * sqrt (t / (1+δ')+1)+(t/(1+δ')+1)"
by (intro add_mono mult_left_mono real_sqrt_le_mono, auto)
also have "... ≤ 3 * sqrt (real t+1) + ((t * (1 - δ' / (1+δ'))) + 1)"
using δ'_gt_0 t_gt_0 by (intro add_mono mult_left_mono real_sqrt_le_mono)
also have "... = 3 * sqrt (real t+1) + (t - δ' * t / (1+δ')) + 1" by (simp add:algebra_simps)
also have "... ≤ 3 * sqrt (46 / δ'⇧2 + 1 / δ'⇧2) + (t - δ' * t/2) + 1 / δ'"
using δ'_gt_0 t_gt_0 δ'_lt_1 add_pos_pos  t_le_δ'
also have "... ≤ (21 / δ' + (t - 45 / (2*δ'))) + 1 / δ'"
using δ'_gt_0 t_ge_δ' by (intro add_mono)
(simp_all add:real_sqrt_divide divide_le_cancel real_le_lsqrt pos_divide_le_eq ac_simps)
also have "... ≤ t" using δ'_gt_0 by simp
also have "... ≤ Q u ω" using t_le by simp
finally have "3 * sqrt (real m * of_int u / real p) + real m * of_int u / real p ≤ Q u ω"
by simp
hence " 3 * sqrt (real m * real_of_int u / real p) ≤ ¦real (Q u ω) - expectation (λω. real (Q u ω))¦"
using a_ge_0 u_le_p  True by (simp add:exp_Q abs_ge_iff)

thus "ω ∈ {ω ∈ Sigma_Algebra.space M. 3 * sqrt (real m * real_of_int u / real p) ≤
¦real (Q u ω) - expectation (λω. real (Q u ω))¦}"
qed
also have "... ≤ variance  (λω. real (Q u ω)) / (3 * sqrt (real m * of_int u / real p))⇧2"
using a_ge_1 p_gt_0 m_ge_0
by (intro Chebyshev_inequality, simp add:M_def, auto)

also have "... ≤ (real m * real_of_int u / real p) / (3 * sqrt (real m * of_int u / real p))⇧2"
using a_ge_0 u_le_p by (intro divide_right_mono var_Q, auto)

also have "... ≤ 1/9" using a_ge_0 by simp

finally have case_1: "prob {ω. Q u ω ≥ t} ≤ 1/9" by simp

have case_2: "prob {ω. Q v ω < t} ≤ 1/9"
proof (cases "v ≤ p")
case True
have "prob {ω. Q v ω < t} ≤ prob {ω ∈ Sigma_Algebra.space M. abs (real (Q v ω) - expectation (λω. real (Q v ω)))
≥ 3 * sqrt (m * real_of_int v / p)}"
proof (rule pmf_mono[OF M_def])
fix ω
assume "ω ∈ set_pmf (pmf_of_set space)"
have "(real t + 3 * sqrt (real t / (1 - δ') )) * (1 - δ') = real t - δ' * t + 3 * ((1-δ') * sqrt( real t / (1-δ') ))"

also have "... = real t - δ' * t + 3 * sqrt (  (1-δ')⇧2 * (real t /  (1-δ')))"
using δ'_lt_1 by (subst real_sqrt_mult, simp)

also have "... = real t - δ' * t + 3 * sqrt ( real t * (1- δ'))"

also have "... ≤ real t - 45/ δ' + 3 * sqrt ( real t )"
using δ'_gt_0 t_ge_δ' δ'_lt_1 by (intro add_mono mult_left_mono real_sqrt_le_mono)

also have "... ≤ real t - 45/ δ' + 3 * sqrt ( 46 / δ'⇧2)"
using  t_le_δ' δ'_lt_1 δ'_gt_0

also have "... = real t + (3 * sqrt(46) - 45)/ δ'"
using δ'_gt_0 by (simp add:real_sqrt_divide diff_divide_distrib)

also have "... ≤ t"
using δ'_gt_0 by (simp add:pos_divide_le_eq real_le_lsqrt)

finally have aux: "(real t + 3 * sqrt (real t / (1 - δ'))) * (1 - δ') ≤ real t "
by simp

assume "ω ∈ {ω. Q v ω < t}"
hence "Q v ω < t" by simp

hence "real (Q v ω) + 3 * sqrt (real m * real_of_int v / real p)
≤ real t - 1 + 3 * sqrt (real m * real_of_int v / real p)"

also have "... ≤ (real t-1) + 3 * sqrt (real m * (real t * real p / (real m * (1- δ'))) / real p)"
by (intro add_mono mult_left_mono real_sqrt_le_mono divide_right_mono)

also have "... ≤ real t + 3 * sqrt(real t / (1-δ')) - 1"
using m_ge_0 p_gt_0 by simp

also have "... ≤ real t / (1-δ')-1"
using δ'_lt_1 aux by (simp add: pos_le_divide_eq)
also have "... ≤ real m * (real t * real p / (real m * (1-δ'))) / real p - 1"
using p_gt_0 m_ge_0 by simp
also have "... ≤ real m * (real t * real p / (real m * (1-δ'))) / real p - real m / real p"
using m_le_p p_gt_0
by (intro diff_mono, auto)
also have "... = real m * (real t * real p / (real m * (1-δ'))-1) / real p"
by (simp add: left_diff_distrib right_diff_distrib diff_divide_distrib)
also have "... ≤  real m * real_of_int v / real p"
by (intro divide_right_mono mult_left_mono, simp_all add:v_def)

finally have "real (Q v ω) + 3 * sqrt (real m * real_of_int v / real p)
≤ real m * real_of_int v / real p" by simp

hence " 3 * sqrt (real m * real_of_int v / real p) ≤ ¦real (Q v ω) -expectation (λω. real (Q v ω))¦"
using v_gt_0 True by (simp add: exp_Q abs_ge_iff)

thus "ω ∈ {ω∈ Sigma_Algebra.space M. 3 * sqrt (real m * real_of_int v / real p) ≤
¦real (Q v ω) - expectation (λω. real (Q v ω))¦}"
qed
also have "... ≤ variance (λω. real (Q v ω)) / (3 * sqrt (real m * real_of_int v / real p))⇧2"
using v_gt_0 p_gt_0 m_ge_0
by (intro Chebyshev_inequality, simp add:M_def, auto)

also have "... ≤ (real m * real_of_int v / real p) / (3 * sqrt (real m * real_of_int v / real p))⇧2"
using  v_gt_0 True  by (intro divide_right_mono var_Q, auto)

also have "... = 1/9"
using p_gt_0 v_gt_0 m_ge_0 by (simp add:power2_eq_square)

finally show ?thesis by simp
next
case False
have "prob {ω. Q v ω < t} ≤ prob {ω. False}"
proof (rule pmf_mono[OF M_def])
fix ω
assume a:"ω ∈ {ω. Q v ω < t}"
assume "ω ∈ set_pmf (pmf_of_set space)"
hence b:"⋀x. x < p ⟹ hash x ω < p"
using hash_range mod_ring_carr by (simp add:M_def measure_pmf_inverse)
have "t ≤ card (set as)" using True by simp
also have "... ≤ Q v ω"
unfolding Q_def  using b False as_lt_p by (intro card_mono subsetI, simp, force)
also have "... < t" using a by simp
finally have "False" by auto
thus "ω ∈ {ω. False}" by simp
qed
also have "... = 0" by auto
finally show ?thesis by simp
qed

have "prob {ω. ¬has_no_collision ω} ≤
prob {ω. ∃x ∈ set as. ∃y ∈ set as. x ≠ y ∧ tr_hash x ω ≤ real_of_int v ∧ tr_hash x ω = tr_hash y ω}"
by (rule pmf_mono[OF M_def]) (simp add:has_no_collision_def M_def, force)

also have "... ≤ (5/2) * (real (card (set as)))⇧2 * (real_of_int v)⇧2 * 2 powr - real r / (real p)⇧2 + 1 / real p"
using collision_prob v_ge_1 by blast

also have "... ≤ (5/2) * (real m)⇧2 * (real_of_int v)⇧2 * 2 powr - real r / (real p)⇧2 + 1 / real p"

also have "... ≤ (5/2) * (real m)⇧2 * (4 * real t * real p / real m)⇧2 * (2 powr - real r) / (real p)⇧2 + 1 / real p"
using v_def v_ge_1 v_ubound
by (intro add_mono divide_right_mono  mult_right_mono  mult_left_mono, auto)

also have "... = 40 * (real t)⇧2 * (2 powr -real r) + 1 / real p"
using p_gt_0 m_ge_0 t_gt_0 by (simp add:algebra_simps power2_eq_square)

also have "... ≤ 1/18 + 1/18"

also have "... = 1/9" by simp

finally have case_3: "prob {ω. ¬has_no_collision ω} ≤ 1/9" by simp

have "prob {ω. real_of_rat δ * of_rat (F 0 as) < ¦estimate' (sketch_rv' ω) - of_rat (F 0 as)¦} ≤
prob {ω. Q u ω ≥ t ∨ Q v ω < t ∨ ¬(has_no_collision ω)}"
proof (rule pmf_mono[OF M_def], rule ccontr)
fix ω
assume "ω ∈ set_pmf (pmf_of_set space)"
assume "ω ∈ {ω. real_of_rat δ * real_of_rat (F 0 as) < ¦estimate' (sketch_rv' ω) - real_of_rat (F 0 as)¦}"
hence est: "real_of_rat δ * real_of_rat (F 0 as) < ¦estimate' (sketch_rv' ω) - real_of_rat (F 0 as)¦" by simp
assume "ω ∉ {ω. t ≤ Q u ω ∨ Q v ω < t ∨ ¬ has_no_collision ω}"
hence "¬( t ≤ Q u ω ∨ Q v ω < t ∨ ¬ has_no_collision ω)" by simp
hence lb: "Q u ω < t" and ub: "Q v ω ≥ t" and no_col: "has_no_collision ω" by simp+

define y where "y =  nth_mset (t-1) {#int (hash x ω). x ∈# mset_set (set as)#}"
define y' where "y' = nth_mset (t-1) {#tr_hash x ω. x ∈# mset_set (set as)#}"

have rank_t_lb: "u ≤ y"
unfolding y_def using True t_gt_0 lb
by (intro nth_mset_bound_left, simp_all add:count_less_def swap_filter_image Q_def)

have rank_t_ub: "y ≤ v - 1"
unfolding y_def using True t_gt_0 ub
by (intro nth_mset_bound_right, simp_all add:Q_def swap_filter_image count_le_def)

have y_ge_0: "real_of_int y ≥ 0" using rank_t_lb a_ge_0 by linarith

have "mono (λx. truncate_down r (real_of_int x))"
by (metis truncate_down_mono mono_def of_int_le_iff)
hence y'_eq: "y' = truncate_down r y"
unfolding y_def y'_def  using True t_gt_0
by (subst nth_mset_commute_mono[where f="(λx. truncate_down r (of_int x))"])

have "real_of_int u * (1 - 2 powr -real r) ≤ real_of_int y * (1 - 2 powr (-real r))"
using rank_t_lb of_int_le_iff two_pow_r_le_1
by (intro mult_right_mono, auto)
also have "... ≤ y'"
using y'_eq truncate_down_pos[OF y_ge_0] by simp
finally have rank_t_lb': "u * (1 - 2 powr -real r) ≤ y'" by simp

have "y' ≤ real_of_int y"
by (subst y'_eq, rule truncate_down_le, simp)
also have "... ≤ real_of_int (v-1)"
using rank_t_ub of_int_le_iff by blast
finally have rank_t_ub': "y' ≤ v-1"
by simp

have "0 < u * (1-2 powr -real r)"
using a_ge_1 two_pow_r_le_1 by (intro mult_pos_pos, auto)
hence y'_pos: "y' > 0" using rank_t_lb' by linarith

have no_col': "⋀x. x ≤ y' ⟹ count {#tr_hash x ω. x ∈# mset_set (set as)#} x ≤ 1"
using  rank_t_ub' no_col
by (simp add:vimage_def card_le_Suc0_iff_eq count_image_mset has_no_collision_def) force

have h_1: "Max (sketch_rv' ω) = y'"
using True t_gt_0 no_col'

have "card (sketch_rv' ω) = card (least ((t-1)+1) (set_mset {#tr_hash x ω. x ∈# mset_set (set as)#}))"
also have "... = (t-1) +1"
using True t_gt_0 no_col' by (intro nth_mset_max(2), simp_all add:y'_def)
also have "... = t" using t_gt_0 by simp
finally have "card (sketch_rv' ω) = t" by simp
hence h_3: "estimate' (sketch_rv' ω) = real t * real p / y'"

have "(real t) * real p ≤  (1 + δ') * real m * ((real t) * real p / (real m * (1 + δ')))"
using δ'_lt_1 m_def True t_gt_0 δ'_gt_0 by auto
also have "... ≤ (1+δ') * m * u"
using δ'_gt_0 by (intro mult_left_mono, simp_all add:u_def)
also have "... < ((1 + real_of_rat δ)*(1-real_of_rat δ/8)) * m * u"
using True m_def t_gt_0 a_ge_1 δ_range
by (intro mult_strict_right_mono, auto simp add:δ'_def right_diff_distrib)
also have "... ≤ ((1 + real_of_rat δ)*(1-2 powr (-r))) * m * u"
using r_le_δ δ_range a_ge_0 by (intro mult_right_mono mult_left_mono, auto)
also have "... = (1 + real_of_rat δ) * m * (u * (1-2 powr -real r))"
by simp
also have "... ≤ (1 + real_of_rat δ) * m * y'"
using δ_range by (intro mult_left_mono rank_t_lb', simp)
finally have "real t * real p < (1 + real_of_rat δ) * m * y'" by simp
hence f_1: "estimate' (sketch_rv' ω) < (1 + real_of_rat δ) * m"
using y'_pos by (simp add: h_3 pos_divide_less_eq)

have "(1 - real_of_rat δ) * m * y' ≤ (1 - real_of_rat δ) * m * v"
using δ_range rank_t_ub' y'_pos by (intro mult_mono rank_t_ub', simp_all)
also have "... = (1-real_of_rat δ) * (real m * v)"
by simp
also have "... < (1-δ') * (real m * v)"
using δ_range m_ge_0 v_ge_1
by (intro mult_strict_right_mono mult_pos_pos, simp_all add:δ'_def)
also have "... ≤ (1-δ') * (real m * (real t * real p / (real m * (1-δ'))))"
using δ'_gt_0 δ'_lt_1 by (intro mult_left_mono, auto simp add:v_def)
also have "... = real t * real p"
using δ'_gt_0 δ'_lt_1 t_gt_0 p_gt_0 m_ge_0 by auto
finally have "(1 - real_of_rat δ) * m * y' < real t * real p" by simp
hence f_2: "estimate' (sketch_rv' ω) > (1 - real_of_rat δ) * m"
using y'_pos by (simp add: h_3 pos_less_divide_eq)

have "abs (estimate' (sketch_rv' ω) - real_of_rat (F 0 as)) < real_of_rat δ * (real_of_rat (F 0 as))"
using f_1 f_2 by (simp add:abs_less_iff algebra_simps m_eq_F_0)
thus "False" using est by linarith
qed
also have "... ≤ 1/9 + (1/9 + 1/9)"
by (intro pmf_add_2[OF M_def] case_1 case_2 case_3)
also have "... = 1/3" by simp
finally show ?thesis by simp
next
case False
have "prob {ω. real_of_rat δ * of_rat (F 0 as) < ¦estimate' (sketch_rv' ω) - of_rat (F 0 as)¦} ≤
prob {ω. ∃x ∈ set as. ∃y ∈ set as. x ≠ y ∧ tr_hash x ω ≤ real p ∧ tr_hash x ω = tr_hash y ω}"
proof (rule pmf_mono[OF M_def])
fix ω
assume a:"ω ∈ {ω. real_of_rat δ * real_of_rat (F 0 as) < ¦estimate' (sketch_rv' ω) - real_of_rat (F 0 as)¦}"
assume b:"ω ∈ set_pmf (pmf_of_set space)"
have c: "card (set as) < t" using False by auto
hence "card ((λx. tr_hash x ω)  set as) < t"
using card_image_le order_le_less_trans by blast
hence d:"card (sketch_rv' ω) = card ((λx. tr_hash x ω)  (set as))"
have "card (sketch_rv' ω) < t"
by (metis List.finite_set  c d card_image_le  order_le_less_trans)
hence "estimate' (sketch_rv' ω) = card (sketch_rv' ω)" by (simp add:estimate'_def)
hence "card (sketch_rv' ω) ≠ real_of_rat (F 0 as)"
using a δ_range by simp
(metis abs_zero cancel_comm_monoid_add_class.diff_cancel of_nat_less_0_iff pos_prod_lt zero_less_of_rat_iff)
hence "card (sketch_rv' ω) ≠ card (set as)"
using m_def m_eq_F_0 by linarith
hence "¬inj_on (λx. tr_hash x ω) (set as)"
using card_image d by auto
moreover have "tr_hash x ω ≤ real p" if a:"x ∈ set as" for x
proof -
have "hash x ω < p"
using hash_range as_lt_p a b by (simp add:mod_ring_carr M_def)
thus "tr_hash x ω ≤ real p" using truncate_down_le by (simp add:tr_hash_def)
qed
ultimately show "ω ∈ {ω. ∃x ∈ set as. ∃y ∈ set as. x ≠ y ∧ tr_hash x ω ≤ real p ∧ tr_hash x ω = tr_hash y ω}"
qed
also have "... ≤ (5/2) * (real (card (set as)))⇧2 * (real p)⇧2 * 2 powr - real r / (real p)⇧2 + 1 / real p"
using p_gt_0 by (intro collision_prob, auto)
also have "... = (5/2) * (real (card (set as)))⇧2 * 2 powr (- real r) + 1 / real p"
using p_gt_0 by (simp add:ac_simps power2_eq_square)
also have "... ≤ (5/2) * (real t)⇧2 * 2 powr (-real r) + 1 / real p"
using False by (intro add_mono mult_right_mono mult_left_mono power_mono, auto)
also have "... ≤ 1/6 + 1/6"
using t_r_bound p_ge_18 by (intro add_mono, simp_all)
also have "... ≤ 1/3" by simp
finally show ?thesis by simp
qed

private lemma median_bounds:
"𝒫(ω in measure_pmf Ω⇩0. ¦median s (λi. estimate (sketch_rv (ω i))) - F 0 as¦ ≤ δ * F 0 as) ≥ 1 - real_of_rat ε"
proof -
have "strict_mono_on A real_of_float" for A by (meson less_float.rep_eq strict_mono_onI)
hence real_g_2: "⋀ω.  sketch_rv' ω = real_of_float  sketch_rv ω"
by (simp add: sketch_rv'_def sketch_rv_def tr_hash_def least_mono_commute image_comp)

moreover have "inj_on real_of_float A" for A
ultimately have card_eq: "⋀ω. card (sketch_rv ω) = card (sketch_rv' ω)"
using real_g_2 by (auto intro!: card_image[symmetric])

have "Max (sketch_rv' ω) = real_of_float (Max (sketch_rv ω))" if a:"card (sketch_rv' ω) ≥ t" for ω
proof -
have "mono real_of_float"
using less_eq_float.rep_eq mono_def by blast
moreover have "finite (sketch_rv ω)"
moreover have " sketch_rv ω ≠ {}"
using card_eq[symmetric] card_gt_0_iff t_gt_0 a by (simp, force)
ultimately show ?thesis
by (subst mono_Max_commute[where f="real_of_float"], simp_all add:real_g_2)
qed
hence real_g: "⋀ω. estimate' (sketch_rv' ω) = real_of_rat (estimate (sketch_rv ω))"

have indep: "prob_space.indep_vars (measure_pmf Ω⇩0) (λ_. borel) (λi ω. estimate' (sketch_rv' (ω i))) {0..<s}"
unfolding Ω⇩0_def
by (rule indep_vars_restrict_intro', auto simp add:restrict_dfl_def lessThan_atLeast0)

moreover have "- (18 * ln (real_of_rat ε)) ≤ real s"
using of_nat_ceiling by (simp add:s_def) blast

moreover have "i < s ⟹ measure Ω⇩0 {ω. of_rat δ * of_rat (F 0 as) < ¦estimate' (sketch_rv' (ω i)) - of_rat (F 0 as)¦} ≤ 1/3"
for i
using estimate'_bounds unfolding Ω⇩0_def M_def
by (subst prob_prod_pmf_slice, simp_all)

ultimately have "1-real_of_rat ε ≤ 𝒫(ω in measure_pmf Ω⇩0.
¦median s (λi. estimate' (sketch_rv' (ω i))) - real_of_rat (F 0 as)¦ ≤  real_of_rat δ * real_of_rat (F 0 as))"
using ε_range prob_space_measure_pmf
by (intro prob_space.median_bound_2) auto
also have "... = 𝒫(ω in measure_pmf Ω⇩0.