Theory Distributed_Distinct_Elements_Outer_Algorithm

section ‹Outer Algorithm\label{sec:outer_algorithm}›

text ‹This section introduces the final solution with optimal size space usage. Internally it relies
on the inner algorithm described in Section~\ref{sec:inner_algorithm}, dependending on the
paramaters $n$, $\varepsilon$ and $\delta$ it either uses the inner algorithm directly or if
$\varepsilon^{-1}$ is larger than $\ln n$ it runs $\frac{\varepsilon^{-1}}{\ln \ln n}$ copies of the
inner algorithm (with the modified failure probability $\frac{1}{\ln n}$) using an expander to
select its seeds. The theorems below verify that the probability that the relative
accuracy of the median of the copies is too large is below $\varepsilon$.›

theory Distributed_Distinct_Elements_Outer_Algorithm
  imports
    Distributed_Distinct_Elements_Accuracy
    Prefix_Free_Code_Combinators.Prefix_Free_Code_Combinators
    Frequency_Moments.Landau_Ext
    Landau_Symbols.Landau_More
begin

unbundle intro_cong_syntax

text ‹The following are non-asymptotic hard bounds on the space usage for the sketches and seeds
repsectively. The end of this section contains a proof that the sum is asymptotically in
$\mathcal O(\ln( \varepsilon^{-1}) \delta^{-1} + \ln n)$.›

definition "state_space_usage = (λ(n,ε,δ). 2^40 * (ln(1/δ)+1)/ ε^2 + log 2 (log 2 n + 3))"
definition "seed_space_usage = (λ(n,ε,δ). 2^30+2^23*ln n+48*(log 2(1/ε)+16)2+336*ln (1/δ))"

locale outer_algorithm =
  fixes n :: nat
  fixes δ :: real
  fixes ε :: real
  assumes n_gt_0: "n > 0"
  assumes δ_gt_0: "δ > 0" and δ_lt_1: "δ < 1"
  assumes ε_gt_0: "ε > 0" and ε_lt_1: "ε < 1"
begin

definition n0 where "n0 = max (real n) (exp (exp 5))"
definition stage_two where "stage_two = (δ < (1/ln n0))"
definition δi :: real where "δi = (if stage_two then (1/ln n0) else δ)"
definition m :: nat where "m = (if stage_two then nat 4 * ln (1/ δ)/ln (ln n0) else 1)"
definition α where "α = (if stage_two then (1/ln n0) else 1)"

lemma m_lbound:
  assumes "stage_two"
  shows "m  4 * ln (1/ δ)/ln(ln n0)"
proof -
  have "m = real (nat 4 * ln (1 / δ) / ln (ln n0))"
    using assms unfolding m_def by simp
  also have "...  4 * ln (1 / δ) / ln (ln n0)"
    by linarith
  finally show ?thesis by simp
qed

lemma n_lbound:
  "n0  exp (exp 5)" "ln n0  exp 5" "5  ln (ln n0)" "ln n0 > 1" "n0 > 1"
proof -
  show 0:"n0  exp (exp 5)"
    unfolding n0_def by simp
  have "(1::real)  exp (exp 5)"
    by (approximation 5)
  hence "n0  1"
    using 0 by argo
  thus 1:"ln n0  exp 5"
    using 0 by (intro iffD2[OF ln_ge_iff]) auto
  moreover have "1 < exp (5::real)"
    by (approximation 5)
  ultimately show 2:"ln n0 > 1"
    by argo
  show "5  ln (ln n0)"
    using 1 2 by (subst ln_ge_iff) simp
  have "(1::real) < exp (exp 5)"
    by (approximation 5)
  thus "n0 > 1"
    using 0 by argo
qed

lemma δ1_gt_0: "0 < δi"
  using n_lbound(4) δ_gt_0 unfolding δi_def
  by (cases "stage_two") simp_all

lemma δ1_lt_1: "δi < 1"
  using n_lbound(4) δ_lt_1 unfolding δi_def
  by (cases "stage_two") simp_all

lemma m_gt_0_aux:
  assumes "stage_two"
  shows "1  ln (1 / δ) / ln (ln n0)"
proof -
  have "ln n0  1 / δ"
    using n_lbound(4)
    using assms unfolding pos_le_divide_eq[OF δ_gt_0] stage_two_def
    by (simp add:divide_simps ac_simps)
  hence "ln (ln n0)  ln (1 / δ)"
    using n_lbound(4) δ_gt_0 by (intro iffD2[OF ln_le_cancel_iff] divide_pos_pos) auto
  thus "1  ln (1 / δ) / ln (ln n0)"
    using n_lbound(3)
    by (subst pos_le_divide_eq) auto
qed

lemma m_gt_0: "m > 0"
proof (cases "stage_two")
  case True
  have "0 < 4 * ln (1/ δ)/ln(ln n0)"
    using m_gt_0_aux[OF True] by simp
  also have "...  m"
    using m_lbound[OF True] by simp
  finally have "0 < real m"
    by simp
  then show ?thesis by simp
next
  case False
  then show ?thesis unfolding m_def by simp
qed

lemma α_gt_0: "α > 0"
  using n_lbound(4) unfolding α_def
  by (cases "stage_two") auto

lemma α_le_1: "α  1"
  using n_lbound(4) unfolding α_def
  by (cases "stage_two") simp_all

sublocale I: inner_algorithm "n" "δi" "ε"
  unfolding inner_algorithm_def using n_gt_0 ε_gt_0 ε_lt_1 δ1_gt_0 δ1_lt_1 by auto

abbreviation Θ where "Θ   m α I.Ω"

lemma Θ: "m > 0" "α > 0" using α_gt_0 m_gt_0 by auto

type_synonym state = "inner_algorithm.state list"

fun single :: "nat  nat  state" where
  "single θ x = map (λj. I.single (pro_select Θ θ j) x) [0..<m]"

fun merge :: "state  state  state" where
  "merge x y = map (λ(x,y). I.merge x y) (zip x y)"

fun estimate :: "state  real" where
  "estimate x = median m (λi. I.estimate (x ! i))"

definition ν :: "nat  nat set  state"
  where "ν θ A = map (λi. I.τ (pro_select Θ θ i) A) [0..<m]"

text ‹The following three theorems verify the correctness of the algorithm. The term @{term "τ"}
is a mathematical description of the sketch for a given subset, while @{term "single"},
@{term "merge"} are the actual functions that compute the sketches.›

theorem merge_result: "merge (ν ω A) (ν ω B) = ν ω (A  B)" (is "?L = ?R")
proof -
  have 0: "zip [0..<m] [0..<m] = map (λx. (x,x)) [0..<m]" for m
    by (induction m, auto)

  have "?L = map (λx. I.merge (I.τ (pro_select Θ ω x) A) (I.τ (pro_select Θ ω x) B)) [0..<m]"
    unfolding ν_def
    by (simp add:zip_map_map 0 comp_def case_prod_beta)
  also have "... = map (λx.  I.τ (pro_select Θ ω x) (A  B)) [0..<m]"
    by (intro map_cong refl I.merge_result expander_pro_range[OF Θ])
  also have "... = ?R"
    unfolding ν_def by simp
  finally show ?thesis by simp
qed

theorem single_result: "single ω x = ν ω {x}" (is "?L = ?R")
proof -
  have "?L = map (λj. I.single (pro_select Θ ω j) x) [0..<m]"
    by (simp del:I.single.simps)
  also have "... = ?R"
    unfolding ν_def by (intro map_cong I.single_result expander_pro_range[OF Θ]) auto
  finally show ?thesis by simp
qed

theorem estimate_result:
  assumes "A  {..<n}" "A  {}"
  defines "p  (pmf_of_set {..<pro_size Θ})"
  shows "measure p {ω. ¦estimate (ν ω A)- real (card A)¦ > ε * real (card A)}  δ" (is "?L  ?R")
proof (cases "stage_two")
  case True
  define I where "I = {x. ¦x - real (card A)¦  ε * real (card A)}"
  have int_I: "interval I"
    unfolding interval_def I_def by auto

  define μ where "μ = measure I.Ω {ω. I.estimate (I.τ ω A)  I}"

  have 0:"μ + α > 0"
    unfolding μ_def
    by (intro add_nonneg_pos α_gt_0) auto

  have "μ  δi"
    unfolding μ_def I_def using I.estimate_result[OF assms(1,2)]
    by (simp add: not_le del:I.estimate.simps)
  also have "... = 1/ln n0"
    using True unfolding δi_def by simp
  finally have "μ  1/ln n0" by simp
  hence "μ + α  1/ln n0 + 1/ln n0"
    unfolding α_def using True by (intro add_mono) auto
  also have "... = 2/ln n0"
    by simp
  finally have 1:"μ + α  2 / ln n0"
    by simp
  hence 2:"ln n0  2 / (μ + α)"
    using 0 n_lbound by (simp add:field_simps)

  have "μ + α  2/ln n0"
    by (intro 1)
  also have "...  2/exp 5"
    using n_lbound by (intro divide_left_mono) simp_all
  also have "...  1/2"
    by (approximation 5)
  finally have 3:"μ + α  1/2" by simp

  have 4: "2 * ln 2 + 8 * exp (- 1)  (5::real)"
    by (approximation 5)

  have "?L = measure p {ω. median m (λi. I.estimate (ν ω A  ! i))  I}"
    unfolding I_def by (simp add:not_le)
  also have "... 
    measure p {θ. real (card {i  {..<m}. I.estimate (I.τ (pro_select Θ θ i) A)  I}) real m/2}"
  proof (rule pmf_mono)
    fix θ assume "θ  set_pmf p"
    assume a:"θ  {ω. median m (λi. I.estimate (ν ω A ! i))  I}"
    hence "real m  real (2*card {i. i < m  I.estimate (ν θ A ! i)  I})"
      by (intro of_nat_mono median_est_rev int_I) auto
    also have "... = 2 * real (card {i{..<m}. I.estimate (ν θ A ! i)  I})"
      by simp
    also have "... = 2 * real (card {i  {..<m}. I.estimate (I.τ (pro_select Θ θ i) A)  I})"
      unfolding ν_def by (intro_cong "[σ2 (*), σ1 of_nat, σ1 card]" more:restr_Collect_cong)
       (simp del:I.estimate.simps)
    finally have "real m  2 * real (card {i  {..<m}. I.estimate (I.τ (pro_select Θ θ i) A)  I})"
      by simp
    thus "θ  {θ. real m / 2  real (card {i  {..<m}. I.estimate (I.τ (pro_select Θ θ i) A)  I})}"
      by simp
  qed
  also have "...=measure Θ{θ. real(card {i  {..<m}. I.estimate (I.τ (θ i) A)  I})(1/2)*real m}"
    unfolding sample_pro_alt p_def by (simp del:I.estimate.simps)
  also have "...  exp (-real m * ((1/2) * ln (1/ (μ + α)) - 2*exp (-1)))"
    using 3 m_gt_0 α_gt_0 unfolding μ_def by (intro walk_tail_bound) force+
  also have "...  exp (-real m * ((1/2) * ln (ln n0 / 2) - 2*exp (-1)))"
    using 0 2 3 n_lbound
    by (intro iffD2[OF exp_le_cancel_iff] mult_right_mono mult_left_mono_neg[where c="-real m"]
        diff_mono mult_left_mono iffD2[OF ln_le_cancel_iff]) (simp_all)
  also have "... = exp (-real m * (ln (ln n0) / 2 - (ln 2/2 + 2*exp (-1))))"
    using n_lbound by (subst ln_div) (simp_all add:algebra_simps)
  also have "...  exp (-real m * (ln (ln n0) / 2 - (ln (ln (exp(exp 5))) / 4)))"
    using 4
    by (intro iffD2[OF exp_le_cancel_iff] mult_left_mono_neg[where c="-real m"] diff_mono) simp_all
  also have "...  exp (-real m * (ln (ln n0) / 2 - (ln (ln n0) / 4)))"
    using n_lbound
    by (intro iffD2[OF exp_le_cancel_iff] mult_left_mono_neg[where c="-real m"] diff_mono) simp_all
  also have "... = exp (- real m * (ln (ln n0)/ 4) )"
    by (simp add:algebra_simps)
  also have "...  exp (- (4 * ln (1/ δ)/ln(ln n0)) * (ln (ln n0)/4))"
    using m_lbound[OF True] n_lbound
    by (intro iffD2[OF exp_le_cancel_iff] mult_right_mono divide_nonneg_pos) simp_all
  also have "... = exp (- ln (1/ δ))"
    using n_lbound by simp
  also have "... = δ"
    using δ_gt_0 by (subst ln_inverse[symmetric]) auto
  finally show ?thesis by simp
next
  case False
  have m_eq: "m = 1"
    unfolding m_def using False by simp
  hence "?L = measure p {ω. ε * real (card A) < ¦I.estimate (ν ω A ! 0) - real (card A)¦}"
    unfolding estimate.simps m_eq median_def by simp
  also have "... =  measure p {ω. ε*card A<¦I.estimate (I.τ (pro_select Θ ω 0) A)-real(card A)¦}"
    unfolding ν_def m_eq by (simp del: I.estimate.simps)
  also have "... = measure Θ {ω. ε*real(card A) < ¦I.estimate (I.τ (ω 0) A)-real(card A)¦}"
    unfolding sample_pro_alt p_def by (simp del:I.estimate.simps)
  also have "...=
    measure (map_pmf (λθ. θ 0) Θ) {ω. ε*real(card A) < ¦I.estimate (I.τ ω A)-real(card A)¦}"
    by simp
  also have "... = measure I.Ω {ω. ε*real(card A) < ¦I.estimate (I.τ ω A)-real(card A)¦}"
    using m_eq by (subst expander_uniform_property[OF Θ]) auto
  also have "...  δi"
    by (intro I.estimate_result[OF assms(1,2)])
  also have "... = ?R"
    unfolding δi_def using False by simp
  finally show ?thesis
    by simp
qed

text ‹The function @{term "encode_state"} can represent states as bit strings.
This enables verification of the space usage.›

definition encode_state
  where "encode_state = Lfe I.encode_state m"

lemma encode_state: "is_encoding encode_state"
  unfolding encode_state_def
  by (intro fixed_list_encoding I.encode_state)

lemma state_bit_count:
  "bit_count (encode_state (ν ω A))  state_space_usage (real n, ε, δ)"
    (is "?L  ?R")
proof -
  have 0: "length (ν ω A) = m"
    unfolding ν_def by simp
  have "?L = (xν ω A. bit_count (I.encode_state x))"
    using 0 unfolding encode_state_def fixed_list_bit_count by simp
  also have "... = (x[0..<m]. bit_count (I.encode_state (I.τ (pro_select Θ ω x) A)))"
    unfolding ν_def by (simp add:comp_def)
  also have "...  (x[0..<m]. ereal (2^36 *(ln (1/δi)+ 1)/ε2 + log 2 (log 2 (real n) + 3)))"
    using I.state_bit_count by (intro sum_list_mono I.state_bit_count expander_pro_range[OF Θ])
  also have "... = ereal ( real m * (2^36 *(ln (1/δi)+ 1)/ε2 + log 2 (log 2 (real n) + 3)))"
    unfolding sum_list_triv_ereal by simp
  also have "...  2^40 * (ln(1/δ)+1)/ ε^2 + log 2 (log 2 n + 3)" (is "?L1  ?R1")
  proof (cases "stage_two")
    case True
    have "4*ln (1/δ)/ln(ln n0)  4*ln (1/δ)/ln(ln n0) + 1"
      by simp
    also have "...  4*ln (1/δ)/ln(ln n0) + ln (1/δ)/ln(ln n0)"
      using m_gt_0_aux[OF True] by (intro add_mono) auto
    also have "... = 5 * ln (1/δ)/ln(ln n0)" by simp
    finally have 3: "4*ln (1/δ)/ln(ln n0)  5 * ln (1/δ)/ln(ln n0)"
      by simp

    have 4: "0  log 2 (log 2 (real n) + 3)"
      using n_gt_0
      by (intro iffD2[OF zero_le_log_cancel_iff] add_nonneg_pos) auto

    have 5: "1 / ln 2 + 3 / exp 5  exp (1::real)"  "1.2 / ln 2  (2::real)"
      by (approximation 5)+

    have "log 2(log 2 (real n)+3)  log 2 (log 2 n0 + 3)"
      using n_gt_0 by (intro iffD2[OF log_le_cancel_iff] add_mono add_nonneg_pos
          iffD2[OF zero_le_log_cancel_iff]) (simp_all add:n0_def)
    also have "... = ln (ln n0 / ln 2 + 3) / ln 2"
      unfolding log_def by simp
    also have "...  ln (ln n0/ln 2 + (3 / exp 5) * ln n0) / ln 2"
      using n_lbound by (intro divide_right_mono iffD2[OF ln_le_cancel_iff] add_mono add_nonneg_pos)
       (simp_all add:divide_simps)
    also have "... = ln ( ln n0 * (1 /ln 2 + 3/exp 5)) / ln 2"
      by (simp add:algebra_simps)
    also have "...  ln ( ln n0 * exp 1) / ln 2"
      using n_lbound by (intro divide_right_mono iffD2[OF ln_le_cancel_iff] add_mono
          mult_left_mono 5 Rings.mult_pos_pos add_pos_nonneg) auto
    also have "... = (ln (ln n0) + 1) / ln 2"
      using n_lbound by (subst ln_mult) simp_all
    also have "...  (ln (ln n0) + 0.2 * ln (ln n0)) / ln 2"
      using n_lbound by (intro divide_right_mono add_mono) auto
    also have "... = (1.2/ ln 2) * ln (ln n0)"
      by simp
    also have "...  2 * ln (ln n0)"
      using n_lbound by (intro mult_right_mono 5) simp
    finally have "log 2(log 2 (real n)+3)  2 * ln (ln n0)"
      by simp
    hence 6: "log 2(log 2 (real n)+3)/ln(ln n0)  2"
      using n_lbound by (subst pos_divide_le_eq) simp_all

    have "?L1 = real(nat 4*ln (1/δ)/ln(ln n0))*(2^36*(ln (ln n0)+1)/ε^2+log 2(log 2 (real n)+3))"
      using True unfolding m_def δi_def by simp
    also have "... = 4*ln (1/δ)/ln(ln n0)*(2^36*(ln (ln n0)+1)/ε^2+log 2(log 2 (real n)+3))"
      using m_gt_0_aux[OF True] by (subst of_nat_nat) simp_all
    also have "...  (5*ln (1/δ)/ln(ln n0)) *(2^36*(ln (ln n0)+1)/ε^2+log 2(log 2 (real n)+3))"
      using n_lbound(3) ε_gt_0  4 by (intro ereal_mono mult_right_mono
          add_nonneg_nonneg divide_nonneg_pos mult_nonneg_nonneg 3) simp_all
    also have "...  (5 * ln (1/δ)/ln(ln n0))*((2^36+2^36)*ln (ln n0)/ε^2+log 2(log 2 (real n)+3))"
      using n_lbound δ_gt_0 δ_lt_1
      by (intro ereal_mono mult_left_mono add_mono divide_right_mono divide_nonneg_pos) auto
    also have "... = 5*(2^37)* ln (1/δ)/ ε^2 + (5*ln (1/δ)) * (log 2(log 2 (real n)+3)/ln(ln n0))"
      using n_lbound by (simp add:algebra_simps)
    also have "...  5*(2^37)* ln (1/δ)/ ε^2 + (5*ln(1/ δ)) * 2"
      using δ_gt_0 δ_lt_1 by (intro add_mono ereal_mono order.refl mult_left_mono 6) auto
    also have "... = 5*(2^37)* ln (1/δ)/ ε^2 + 5*2*ln(1/ δ) / 1"
      by simp
    also have "...  5*(2^37)* ln (1/δ)/ ε^2 + 5*2*ln(1/ δ) / ε^2"
      using ε_gt_0 ε_lt_1 δ_gt_0 δ_lt_1
      by (intro add_mono ereal_mono divide_left_mono Rings.mult_pos_pos power_le_one) auto
    also have "... = (5*(2^37+2))* (ln (1/δ)+0)/ ε^2 + 0"
      by (simp add:algebra_simps)
    also have "...  2^40 * (ln (1 / δ)+1) / ε^2 +  log 2 (log 2 (real n) + 3)"
      using ε_gt_0 ε_lt_1 δ_gt_0 δ_lt_1 n_gt_0 by (intro add_mono ereal_mono divide_right_mono
          mult_right_mono iffD2[OF zero_le_log_cancel_iff] add_nonneg_pos) auto
    finally show ?thesis by simp
  next
    case False
    have "?L1 = 2^36 *(ln (1/δ)+ 1)/ε2 + log 2 (log 2 (real n) + 3)"
      using False unfolding δi_def m_def by simp
    also have "...  ?R1"
      using ε_gt_0 ε_lt_1 δ_gt_0 δ_lt_1
      by (intro ereal_mono add_mono divide_right_mono mult_right_mono add_nonneg_nonneg) auto
    finally show ?thesis by simp
  qed
  finally show ?thesis
    unfolding state_space_usage_def by simp
qed

text ‹Encoding function for the seeds which are just natural numbers smaller than
@{term "pro_size Θ"}.›

definition encode_seed
  where "encode_seed = Nbe (pro_size Θ)"

lemma encode_seed:
  "is_encoding encode_seed"
  unfolding encode_seed_def by (intro bounded_nat_encoding)

lemma random_bit_count:
  assumes "ω < pro_size Θ"
  shows "bit_count (encode_seed ω)  seed_space_usage (real n, ε, δ)"
    (is "?L  ?R")
proof -
  have 0: "pro_size Θ > 0" by (intro pro_size_gt_0)
  have 1: "pro_size I.Ω > 0" by (intro pro_size_gt_0)

  have "(55+60*ln (ln n0))^3  (180+60*ln (ln n0))^3"
    using n_lbound by (intro power_mono add_mono) auto
  also have "... = 180^3 * (1+ln (ln n0)/real 3)^3"
    unfolding power_mult_distrib[symmetric] by simp
  also have "...  180^3 * exp (ln (ln n0))"
    using n_lbound by (intro mult_left_mono exp_ge_one_plus_x_over_n_power_n) auto
  also have "... = 180^3 * ln n0"
    using n_lbound by (subst exp_ln) auto
  also have "...  180^3 * max (ln n) (ln (exp (exp 5)))"
    using n_gt_0 unfolding n0_def by (subst ln_max_swap) auto
  also have "...  180^3 * (ln n + exp 5)"
    using n_gt_0 unfolding ln_exp by (intro mult_left_mono) auto
  finally have 2:"(55+60*ln (ln n0))^3  180^3 * ln n + 180^3*exp 5"
    by simp

  have 3:"(1::real)+180^3*exp 5  2^30" "(4::real)/ln 2 + 180^3  2^23"
    by (approximation 10)+

  have "?L = ereal (real (floorlog 2 (pro_size Θ - 1)))"
    using assms unfolding encode_seed_def bounded_nat_bit_count by simp
  also have "...  ereal (real (floorlog 2 (pro_size Θ)))"
    by (intro ereal_mono Nat.of_nat_mono floorlog_mono) auto
  also have "... = ereal (1 + of_int log 2 (real (pro_size Θ)))"
    using 0 unfolding floorlog_def by simp
  also have "...  ereal (1 + log 2 (real (pro_size Θ)))"
    by (intro add_mono ereal_mono) auto
  also have "... = 1 + log 2 (real (pro_size I.Ω) * (2^4) ^ ((m - 1) * nat ln α / ln 0.95))"
    unfolding expander_pro_size[OF Θ] by simp
  also have "... = 1 + log 2 (real (pro_size I.Ω) * 2^ (4 * (m - 1) * nat ln α / ln 0.95))"
    unfolding power_mult by simp
  also have "... = 1 + log 2 (real (pro_size I.Ω)) + (4*(m-1)* natln α / ln 0.95)"
    using 1 by (subst log_mult) simp_all
  also have "...  1+log 2(2 powr (4*log 2 n + 48 * (log 2 (1/ε)+16)2+ (55+60*ln (1/δi))^3))+
    (4*(m-1)* natln α / ln 0.95)"
    using 1 by (intro ereal_mono add_mono iffD2[OF log_le_cancel_iff] I.random_bit_count) auto
  also have "...=1+4*log 2 n+48*(log 2(1/ε)+16)2+(55+60*ln (1/δi))^3+(4*(m-1)*natln α/ln 0.95)"
    by (subst log_powr_cancel) auto
  also have "...  2^30 + 2^23*ln n+48*(log 2(1/ε)+16)2 + 336*ln (1/δ)" (is "?L1  ?R1")
  proof (cases "stage_two")
    case True

    have "-1 < (0::real)" by simp
    also have "...  ln α / ln 0.95"
      using α_gt_0 α_le_1 by (intro divide_nonpos_neg) auto
    finally have 4: "- 1 < ln α / ln 0.95" by simp

    have 5: "- 1 / ln 0.95  (20::real)"
      by (approximation 10)

    have "(4*(m-1)*natln α/ln 0.95) = 4 * (real m-1) * of_int ln α/ln 0.95"
      using 4 m_gt_0 unfolding of_nat_mult by (subst of_nat_nat) auto
    also have "...  4 * (real m-1) * (ln α/ln 0.95 + 1)"
      using m_gt_0 by (intro mult_left_mono) auto
    also have "... = 4 * (real m-1) * (-ln (ln n0)/ln 0.95 + 1)"
      using n_lbound True unfolding α_def
      by (subst ln_inverse[symmetric]) (simp_all add:inverse_eq_divide)
    also have "... = 4 * (real m - 1 ) * (ln (ln n0) * (-1/ln 0.95) + 1)"
      by simp
    also have "...  4 * (real m - 1 ) * (ln (ln n0) * 20 + 1)"
      using n_lbound m_gt_0 by (intro mult_left_mono add_mono 5) auto
    also have "... = 4 * (real (nat 4 * ln (1 / δ) / ln (ln n0))-1) *  (ln (ln n0) * 20 + 1)"
      using True unfolding m_def by simp
    also have "... = 4 * (real_of_int 4 * ln (1 / δ) / ln (ln n0)-1) *  (ln (ln n0) * 20 + 1)"
      using m_gt_0_aux[OF True] by (subst of_nat_nat) simp_all
    also have "...  4 * (4 * ln (1 / δ) / ln (ln n0)) * (ln (ln n0) * 20 + 1)"
      using n_lbound by (intro mult_left_mono mult_right_mono) auto
    also have "...  4 * (4 * ln (1 / δ) / ln (ln n0)) * (ln (ln n0) * 20 + ln (ln n0))"
      using δ_gt_0 δ_lt_1 n_lbound
      by (intro mult_left_mono mult_right_mono add_mono divide_nonneg_pos Rings.mult_nonneg_nonneg)
       simp_all
    also have "... = 336 * ln (1  / δ)"
      using n_lbound by simp
    finally have 6: "4 * (m-1) * nat ln α/ln 0.95  336 * ln (1/δ)"
      by simp

    have "?L1 =1+4*log 2 n+48*(log 2(1/ε)+16)2+(55+60*ln (ln n0))^3+(4*(m-1)*natln α/ln 0.95)"
      using True unfolding δi_def by simp
    also have "...  1+4*log 2 n+48*(log 2(1/ε)+16)2+(180^3 * ln n + 180^3*exp 5) + 336 * ln (1/δ)"
      by (intro add_mono 6 2 ereal_mono order.refl)
    also have "... = (1+180^3*exp 5)+ (4/ln 2 + 180^3)*ln n+48*(log 2(1/ε)+16)2+ 336 * ln (1/δ)"
      by (simp add:log_def algebra_simps)
    also have "...  2^30 + 2^23*ln n+48*(log 2(1/ε)+16)2+ 336 * ln (1/δ)"
      using n_gt_0 by (intro add_mono ereal_mono 3 order.refl mult_right_mono) auto
    finally show ?thesis by simp
  next
    case False
    hence "1 / δ  ln n0"
      using δ_gt_0 n_lbound
      unfolding stage_two_def not_less by (simp add:divide_simps ac_simps)
    hence 7: "ln (1 / δ)  ln (ln n0)"
      using n_lbound δ_gt_0 δ_lt_1
      by (intro iffD2[OF ln_le_cancel_iff]) auto

    have 8: "0  336*ln (1/δ) "
      using δ_gt_0 δ_lt_1 by auto

    have "?L1 = 1 + 4 * log 2 (real n) + 48 * (log 2 (1 / ε) + 16)2 + (55 + 60 * ln (1 / δ)) ^ 3"
      using False unfolding δi_def m_def by simp
    also have "...  1 + 4 * log 2 (real n) + 48 * (log 2 (1 / ε) + 16)2 + (55 + 60 * ln (ln n0))^3"
      using δ_gt_0 δ_lt_1
      by (intro add_mono order.refl ereal_mono power_mono mult_left_mono add_nonneg_nonneg 7) auto
    also have "...  1+4*log 2(real n)+48*(log 2 (1 / ε)+16)2+(180^3*ln (real n) + 180 ^ 3 * exp 5)"
      by (intro add_mono ereal_mono 2 order.refl)
    also have "... = (1+180^3*exp 5)+ (4/ln 2 + 180^3)*ln n+48*(log 2(1/ε)+16)2+ 0"
      by (simp add:log_def algebra_simps)
    also have "...  2^30 + 2^23*ln n+48*(log 2(1/ε)+16)2 + 336*ln (1/δ)"
      using n_gt_0 by (intro add_mono ereal_mono 3 order.refl mult_right_mono 8) auto
    finally show ?thesis by simp
  qed
  also have "... = seed_space_usage (real n, ε, δ)"
    unfolding seed_space_usage_def by simp
  finally show ?thesis by simp
qed

text ‹The following is an alternative form expressing the correctness and space usage theorems.
If @{term "x"} is expression formed by @{term "single"} and @{term "merge"} operations. Then
@{term "x"} requires @{term "state_space_usage (real n, ε, δ)"} bits to encode and
@{term "estimate x"} approximates the count of the distinct universe elements in the expression.

For example:

@{term "estimate (merge (single ω 1) (merge (single ω 5) (single ω 1)))"} approximates the
cardinality of @{term "{1::nat, 5, 1}"} i.e. $2$.›

datatype sketch_tree = Single nat | Merge sketch_tree sketch_tree

fun eval :: "nat  sketch_tree  state"
  where
    "eval ω (Single x) = single ω x" |
    "eval ω (Merge x y) = merge (eval ω x) (eval ω y)"

fun sketch_tree_set :: "sketch_tree  nat set"
  where
    "sketch_tree_set (Single x) = {x}" |
    "sketch_tree_set (Merge x y) = sketch_tree_set x  sketch_tree_set y"

theorem correctness:
  fixes X
  assumes "sketch_tree_set t  {..<n}"
  defines "p  pmf_of_set {..<pro_size Θ}"
  defines "X  real (card (sketch_tree_set t))"
  shows "measure p {ω. ¦estimate (eval ω t) - X¦ > ε * X}  δ" (is "?L  ?R")
proof -
  define A where "A = sketch_tree_set t"
  have X_eq: "X = real (card A)"
    unfolding X_def A_def by simp

  have 0:"eval ω t = ν ω A" for ω
    unfolding A_def using single_result merge_result
    by (induction t) (auto simp del:merge.simps single.simps)

  have 1: "A  {..<n}"
    using assms(1) unfolding A_def by blast

  have 2: "A  {}"
    unfolding A_def by (induction t) auto

  show ?thesis
    unfolding 0 X_eq p_def by (intro estimate_result 1 2)
qed

theorem space_usage:
  assumes "ω < pro_size Θ"
  shows
    "bit_count (encode_state (eval ω t))  state_space_usage (real n, ε, δ)" (is "?A")
    "bit_count (encode_seed ω)  seed_space_usage (real n, ε, δ)" (is "?B")
proof-
  define A where "A = sketch_tree_set t"

  have 0:"eval ω t = ν ω A" for ω
    unfolding A_def using single_result merge_result
    by (induction t) (auto simp del:merge.simps single.simps)

  show ?A
    unfolding 0 by (intro state_bit_count)
  show ?B
    using random_bit_count[OF assms] by simp
qed

end

text ‹The functions @{term "state_space_usage"} and @{term "seed_space_usage"} are exact bounds
on the space usage for the state and the seed. The following establishes asymptotic bounds
with respect to the limit $n, \delta^{-1}, \varepsilon^{-1} \rightarrow \infty$.›

context
begin

text ‹Some local notation to ease proofs about the asymptotic space usage of the algorithm:›

private definition n_of :: "real × real × real  real" where "n_of = (λ(n, ε, δ). n)"
private definition δ_of :: "real × real × real  real" where "δ_of = (λ(n, ε, δ). δ)"
private definition ε_of :: "real × real × real  real" where "ε_of = (λ(n, ε, δ). ε)"

private abbreviation F :: "(real × real × real) filter"
  where "F  (at_top ×F at_right 0 ×F at_right 0)"

private lemma var_simps:
  "n_of = fst"
  "ε_of = (λx. fst (snd x))"
  "δ_of = (λx. snd (snd x))"
  unfolding n_of_def ε_of_def δ_of_def by (auto simp add:case_prod_beta)

private lemma evt_n: "eventually (λx. n_of x  n) F"
  unfolding var_simps by (intro eventually_prod1' eventually_prod2' eventually_ge_at_top)
      (simp add:prod_filter_eq_bot)

private lemma evt_n_1: "F x in F. 0  ln (n_of x)"
  by (intro eventually_mono[OF evt_n[of "1"]] ln_ge_zero) simp

private lemma evt_n_2: "F x in F. 0  ln (ln (n_of x))"
  using order_less_le_trans[OF exp_gt_zero]
  by (intro eventually_mono[OF evt_n[of "exp 1"]] ln_ge_zero iffD2[OF ln_ge_iff]) auto

private lemma evt_ε: "eventually (λx. 1/ε_of x  ε  ε_of x > 0) F"
  unfolding var_simps by (intro eventually_prod1' eventually_prod2' eventually_conj
      real_inv_at_right_0_inf eventually_at_right_less) (simp_all add:prod_filter_eq_bot)

private lemma evt_δ: "eventually (λx. 1/δ_of x  δ  δ_of x > 0) F"
  unfolding var_simps by (intro eventually_prod1' eventually_prod2' eventually_conj
      real_inv_at_right_0_inf eventually_at_right_less) (simp_all add:prod_filter_eq_bot)

private lemma evt_δ_1: "F x in F. 0  ln (1 / δ_of x)"
  by (intro eventually_mono[OF evt_δ[of "1"]] ln_ge_zero) simp

theorem asymptotic_state_space_complexity:
  "state_space_usage  O[F](λ(n, ε, δ). ln (1/δ)/ε^2 + ln (ln n))"
  (is "_  O[?F](?rhs)")
proof -
  have 0:"(λx. 1)  O[?F](λx. ln (1 / δ_of x))"
    using order_less_le_trans[OF exp_gt_zero]
    by (intro landau_o.big_mono eventually_mono[OF evt_δ[of "exp 1"]])
      (auto intro!: iffD2[OF ln_ge_iff] simp add:abs_ge_iff)

  have 1:"(λx. 1)  O[?F](λx. ln (n_of x))"
    using order_less_le_trans[OF exp_gt_zero]
    by (intro landau_o.big_mono eventually_mono[OF evt_n[of "exp 1"]])
      (auto intro!:iffD2[OF ln_ge_iff] simp add:abs_ge_iff)

  have "(λx. ((ln (1/δ_of x)+1)* (1/ε_of x)2)) O[?F](λx. ln(1/δ_of x)* (1/ε_of x)2)"
    by (intro landau_o.mult sum_in_bigo 0) simp_all
  hence 2: "(λx. 2^40*((ln (1/δ_of x)+1)* (1/ε_of x)2)) O[?F](λx. ln(1/δ_of x)* (1/ε_of x)2)"
    unfolding cmult_in_bigo_iff by simp

  have 3: "(1::real)  exp 2"
    by (approximation 5)

  have "(λx. ln (n_of x) / ln 2 + 3)  O[?F](λx. ln (n_of x))"
    using 1 by (intro sum_in_bigo) simp_all
  hence "(λx. ln (ln (n_of x) / ln 2 + 3))  O[?F](λx. ln (ln (n_of x)))"
    using order_less_le_trans[OF exp_gt_zero] order_trans[OF 3]
    by (intro landau_ln_2[where a="2"] eventually_mono[OF evt_n[of "exp 2"]])
     (auto intro!:iffD2[OF ln_ge_iff] add_nonneg_nonneg divide_nonneg_pos)
  hence 4: "(λx. log 2 (log 2 (n_of x) + 3)) O[?F](λx. ln(ln(n_of x)))"
    unfolding log_def by simp

  have 5: "F x in ?F. 0  ln (1 / δ_of x) * (1 / ε_of x)2"
    by (intro eventually_mono[OF eventually_conj[OF evt_δ_1 evt_ε[of "1"]]]) auto

  have "state_space_usage = (λx. state_space_usage (n_of x, ε_of x, δ_of x))"
    by (simp add:case_prod_beta' n_of_def δ_of_def ε_of_def)
  also have "... = (λx. 2 ^ 40 * ((ln (1 / (δ_of x)) + 1)* (1/ε_of x)2) + log 2 (log 2 (n_of x)+3))"
    unfolding state_space_usage_def by (simp add:divide_simps)
  also have "...  O[?F](λx. ln (1/δ_of x)* (1/ε_of x)2 + ln (ln (n_of x)))"
    by (intro landau_sum 2 4 5 evt_n_2)
  also have "... = O[?F](?rhs)"
    by (simp add:case_prod_beta' n_of_def δ_of_def ε_of_def divide_simps)
  finally show ?thesis by simp
qed

theorem asymptotic_seed_space_complexity:
  "seed_space_usage  O[F](λ(n, ε, δ). ln (1/δ)+ln (1/ε)^2 + ln n)"
  (is "_  O[?F](?rhs)")
proof -
  have 0: "F x in ?F. 0  (ln (1 / ε_of x))2"
    by simp

  have 1: "F x in ?F. 0  ln (1 / δ_of x) + (ln (1 / ε_of x))2"
    by (intro eventually_mono[OF eventually_conj[OF evt_δ_1 0]] add_nonneg_nonneg) auto

  have 2: "(λx. 1)  O[?F](λx. ln (1 / ε_of x))"
    using order_less_le_trans[OF exp_gt_zero]
    by (intro landau_o.big_mono eventually_mono[OF evt_ε[of "exp 1"]])
      (auto intro!:iffD2[OF ln_ge_iff] simp add:abs_ge_iff)

  have "(λx. 1)  O[at_top ×F at_right 0 ×F at_right 0](λx. ln (n_of x))"
    using order_less_le_trans[OF exp_gt_zero]
    by (intro landau_o.big_mono eventually_mono[OF evt_n[of "exp 1"]])
      (auto intro!:iffD2[OF ln_ge_iff] simp add:abs_ge_iff)
  hence 3: "(λx. 1)  O[?F](λx. ln (1 / δ_of x) + (ln (1 / ε_of x))2 + ln (n_of x))"
    by (intro landau_sum_2 1 evt_n_1 0 evt_δ_1) simp
  have 4: "(λx. ln (n_of x))  O[?F](λx. ln (1 / δ_of x) + (ln (1 / ε_of x))2 + ln (n_of x))"
    by (intro landau_sum_2 1 evt_n_1) simp
  have "(λx. log 2 (1 / ε_of x) + 16)  O[?F](λx. ln (1 / ε_of x))"
    using 2 unfolding log_def by (intro sum_in_bigo) simp_all
  hence 5: "(λx. (log 2 (1 / ε_of x) + 16)2)  O[?F](λx. ln (1/δ_of x)+(ln (1/ε_of x))2)"
    using 0 unfolding power2_eq_square by (intro landau_sum_2 landau_o.mult evt_δ_1) simp_all
  have 6: "(λx. (log 2 (1 / ε_of x) + 16)2)  O[?F](λx. ln (1/δ_of x)+(ln (1/ε_of x))2+ln (n_of x))"
    by (intro landau_sum_1[OF _ _ 5] 1 evt_n_1)
  have 7: "(λx. ln (1/δ_of x))  O[?F](λx. ln (1/δ_of x)+(ln (1/ε_of x))2+ln (n_of x))"
    by (intro landau_sum_1 1 evt_δ_1 0 evt_n_1) simp

  have "seed_space_usage = (λx. seed_space_usage (n_of x, ε_of x, δ_of x))"
    by (simp add:case_prod_beta' n_of_def δ_of_def ε_of_def)
  also have "... = (λx. 2^30+2^23*ln (n_of x)+48*(log 2 (1/(ε_of x))+16)2 + 336 * ln (1 / δ_of x))"
    unfolding seed_space_usage_def by (simp add:divide_simps)
  also have "...  O[?F](λx. ln (1/δ_of x)+ln (1/ε_of x)^2 + ln (n_of x))"
    using 3 4 6 7 by (intro sum_in_bigo) simp_all
  also have "... = O[?F](?rhs)"
    by (simp add:case_prod_beta' n_of_def δ_of_def ε_of_def)
  finally show ?thesis by simp
qed

definition "space_usage x = state_space_usage x + seed_space_usage x"

theorem asymptotic_space_complexity:
  "space_usage  O[at_top ×F at_right 0 ×F at_right 0](λ(n, ε, δ). ln (1/δ)/ε^2 + ln n)"
proof -
  let ?f1 = "(λx. ln (1/δ_of x)*(1/ε_of x^2)+ln (ln (n_of x)))"
  let ?f2 = "(λx. ln(1/δ_of x)+ln(1/ε_of x)^2+ln (n_of x))"

  have 0: "F x in F. 0  (1 / (ε_of x)2)"
    unfolding var_simps by (intro eventually_prod1' eventually_prod2' eventually_inv)
      (simp_all add:prod_filter_eq_bot eventually_nonzero_simps)

  have 1: "F x in F. 0  ln (1 / δ_of x) * (1 / (ε_of x)2)"
    by (intro eventually_mono[OF eventually_conj[OF evt_δ_1 0]] mult_nonneg_nonneg) auto

  have 2: "F x in F. 0  ln (1 / δ_of x) * (1 / (ε_of x)2) + ln (ln (n_of x))"
    by (intro eventually_mono[OF eventually_conj[OF 1 evt_n_2]] add_nonneg_nonneg) auto

  have 3: "F x in F. 0  ln (1 / (ε_of x)2)"
    unfolding power_one_over[symmetric]
    by (intro eventually_mono[OF evt_ε[of "1"]] ln_ge_zero) simp

  have 4: "F x in F. 0  ln (1 / δ_of x) + (ln (1 / ε_of x))2 + ln (n_of x)"
    by (intro eventually_mono[OF eventually_conj[OF evt_n_1 eventually_conj[OF evt_δ_1 3]]]
        add_nonneg_nonneg) auto

  have 5: "(λ_. 1)  O[F](λx. 1 / (ε_of x)2)"
    unfolding var_simps by (intro bigo_prod_1 bigo_prod_2 bigo_inv)
      (simp_all add:power_divide prod_filter_eq_bot)

  have 6: "(λ_. 1)  O[F](λx. ln (1 / δ_of x))"
    unfolding var_simps
    by (intro bigo_prod_1 bigo_prod_2 bigo_inv) (simp_all add:prod_filter_eq_bot)

  have 7: "state_space_usage  O[F](λx. ln (1 / δ_of x) * (1 / (ε_of x)2) + ln (ln (n_of x)))"
    using asymptotic_state_space_complexity unfolding δ_of_def ε_of_def n_of_def
    by (simp add:case_prod_beta')

  have 8: "seed_space_usage  O[F](λx. ln (1 / δ_of x) + (ln (1 / ε_of x))2 + ln (n_of x))"
    using asymptotic_seed_space_complexity unfolding δ_of_def ε_of_def n_of_def
    by (simp add:case_prod_beta')

  have 9: "(λx. ln (n_of x))  O[F](λx. ln (1 / δ_of x) * (1 / (ε_of x)2) + ln (n_of x))"
    by (intro landau_sum_2 evt_n_1 1) simp

  have "(λx. (ln (1 / ε_of x))2)  O[F](λx. 1 / ε_of x^2)"
    unfolding var_simps
    by (intro bigo_prod_1 bigo_prod_2 bigo_inv) (simp_all add:power_divide prod_filter_eq_bot)
  hence 10: "(λx. (ln (1 / ε_of x))2)  O[F](λx. ln (1 / δ_of x) * (1 / ε_of x^2) + ln (n_of x))"
    by (intro landau_sum_1 evt_n_1 1 landau_o.big_mult_1' 6)
  have 11: "(λx. ln (1 / δ_of x))  O[F](λx. ln (1 / δ_of x) * (1 / ε_of x^2) + ln (n_of x))"
    by (intro landau_sum_1 evt_n_1 1 landau_o.big_mult_1 5) simp
  have 12: "(λx. ln (1/δ_of x) * (1/ε_of x^2))  O[F](λx. ln (1/δ_of x)*(1/ε_of x^2)+ln (n_of x))"
    by (intro landau_sum_1 1 evt_n_1) simp

  have "(λx. ln (ln (n_of x)))  O[F](λx. ln (n_of x))"
    unfolding var_simps by (intro bigo_prod_1 bigo_prod_2) (simp_all add:prod_filter_eq_bot)
  hence 13: "(λx. ln (ln (n_of x)))  O[F](λx. ln (1 / δ_of x) * (1 / ε_of x^2) + ln (n_of x))"
    by (intro landau_sum_2 evt_n_1 1)

  have "space_usage = (λx. state_space_usage x + seed_space_usage x)"
    unfolding space_usage_def by simp
  also have "...  O[F](λx. ?f1 x + ?f2 x)"
    by (intro landau_sum 2 4 7 8)
  also have "...  O[F](λx. ln (1 / δ_of x) * (1/ε_of x^2) + ln (n_of x))"
    by (intro landau_o.big.subsetI sum_in_bigo 9 10 11 12 13)
  also have "... = O[F](λ(n, ε, δ). ln (1/δ)/ε^2 + ln n)"
    unfolding δ_of_def ε_of_def n_of_def
    by (simp add:case_prod_beta')
  finally show ?thesis by simp
qed

end

unbundle no intro_cong_syntax

end