Theory Distributed_Distinct_Elements_Outer_Algorithm
section ‹Outer Algorithm\label{sec:outer_algorithm}›
text ‹This section introduces the final solution with optimal size space usage. Internally it relies
on the inner algorithm described in Section~\ref{sec:inner_algorithm}, dependending on the
paramaters $n$, $\varepsilon$ and $\delta$ it either uses the inner algorithm directly or if
$\varepsilon^{-1}$ is larger than $\ln n$ it runs $\frac{\varepsilon^{-1}}{\ln \ln n}$ copies of the
inner algorithm (with the modified failure probability $\frac{1}{\ln n}$) using an expander to
select its seeds. The theorems below verify that the probability that the relative
accuracy of the median of the copies is too large is below $\varepsilon$.›
theory Distributed_Distinct_Elements_Outer_Algorithm
imports
Distributed_Distinct_Elements_Accuracy
Prefix_Free_Code_Combinators.Prefix_Free_Code_Combinators
Frequency_Moments.Landau_Ext
Landau_Symbols.Landau_More
begin
unbundle intro_cong_syntax
text ‹The following are non-asymptotic hard bounds on the space usage for the sketches and seeds
repsectively. The end of this section contains a proof that the sum is asymptotically in
$\mathcal O(\ln( \varepsilon^{-1}) \delta^{-1} + \ln n)$.›
definition "state_space_usage = (λ(n,ε,δ). 2^40 * (ln(1/δ)+1)/ ε^2 + log 2 (log 2 n + 3))"
definition "seed_space_usage = (λ(n,ε,δ). 2^30+2^23*ln n+48*(log 2(1/ε)+16)⇧2+336*ln (1/δ))"
locale outer_algorithm =
fixes n :: nat
fixes δ :: real
fixes ε :: real
assumes n_gt_0: "n > 0"
assumes δ_gt_0: "δ > 0" and δ_lt_1: "δ < 1"
assumes ε_gt_0: "ε > 0" and ε_lt_1: "ε < 1"
begin
definition n⇩0 where "n⇩0 = max (real n) (exp (exp 5))"
definition stage_two where "stage_two = (δ < (1/ln n⇩0))"
definition δ⇩i :: real where "δ⇩i = (if stage_two then (1/ln n⇩0) else δ)"
definition m :: nat where "m = (if stage_two then nat ⌈4 * ln (1/ δ)/ln (ln n⇩0)⌉ else 1)"
definition α where "α = (if stage_two then (1/ln n⇩0) else 1)"
lemma m_lbound:
assumes "stage_two"
shows "m ≥ 4 * ln (1/ δ)/ln(ln n⇩0)"
proof -
have "m = real (nat ⌈4 * ln (1 / δ) / ln (ln n⇩0)⌉)"
using assms unfolding m_def by simp
also have "... ≥ 4 * ln (1 / δ) / ln (ln n⇩0)"
by linarith
finally show ?thesis by simp
qed
lemma n_lbound:
"n⇩0 ≥ exp (exp 5)" "ln n⇩0 ≥ exp 5" "5 ≤ ln (ln n⇩0)" "ln n⇩0 > 1" "n⇩0 > 1"
proof -
show 0:"n⇩0 ≥ exp (exp 5)"
unfolding n⇩0_def by simp
have "(1::real) ≤ exp (exp 5)"
by (approximation 5)
hence "n⇩0 ≥ 1"
using 0 by argo
thus 1:"ln n⇩0 ≥ exp 5"
using 0 by (intro iffD2[OF ln_ge_iff]) auto
moreover have "1 < exp (5::real)"
by (approximation 5)
ultimately show 2:"ln n⇩0 > 1"
by argo
show "5 ≤ ln (ln n⇩0)"
using 1 2 by (subst ln_ge_iff) simp
have "(1::real) < exp (exp 5)"
by (approximation 5)
thus "n⇩0 > 1"
using 0 by argo
qed
lemma δ1_gt_0: "0 < δ⇩i"
using n_lbound(4) δ_gt_0 unfolding δ⇩i_def
by (cases "stage_two") simp_all
lemma δ1_lt_1: "δ⇩i < 1"
using n_lbound(4) δ_lt_1 unfolding δ⇩i_def
by (cases "stage_two") simp_all
lemma m_gt_0_aux:
assumes "stage_two"
shows "1 ≤ ln (1 / δ) / ln (ln n⇩0)"
proof -
have "ln n⇩0 ≤ 1 / δ"
using n_lbound(4)
using assms unfolding pos_le_divide_eq[OF δ_gt_0] stage_two_def
by (simp add:divide_simps ac_simps)
hence "ln (ln n⇩0) ≤ ln (1 / δ)"
using n_lbound(4) δ_gt_0 by (intro iffD2[OF ln_le_cancel_iff] divide_pos_pos) auto
thus "1 ≤ ln (1 / δ) / ln (ln n⇩0)"
using n_lbound(3)
by (subst pos_le_divide_eq) auto
qed
lemma m_gt_0: "m > 0"
proof (cases "stage_two")
case True
have "0 < 4 * ln (1/ δ)/ln(ln n⇩0)"
using m_gt_0_aux[OF True] by simp
also have "... ≤ m"
using m_lbound[OF True] by simp
finally have "0 < real m"
by simp
then show ?thesis by simp
next
case False
then show ?thesis unfolding m_def by simp
qed
lemma α_gt_0: "α > 0"
using n_lbound(4) unfolding α_def
by (cases "stage_two") auto
lemma α_le_1: "α ≤ 1"
using n_lbound(4) unfolding α_def
by (cases "stage_two") simp_all
sublocale I: inner_algorithm "n" "δ⇩i" "ε"
unfolding inner_algorithm_def using n_gt_0 ε_gt_0 ε_lt_1 δ1_gt_0 δ1_lt_1 by auto
abbreviation Θ where "Θ ≡ ℰ m α I.Ω"
lemma Θ: "m > 0" "α > 0" using α_gt_0 m_gt_0 by auto
type_synonym state = "inner_algorithm.state list"
fun single :: "nat ⇒ nat ⇒ state" where
"single θ x = map (λj. I.single (pro_select Θ θ j) x) [0..<m]"
fun merge :: "state ⇒ state ⇒ state" where
"merge x y = map (λ(x,y). I.merge x y) (zip x y)"
fun estimate :: "state ⇒ real" where
"estimate x = median m (λi. I.estimate (x ! i))"
definition ν :: "nat ⇒ nat set ⇒ state"
where "ν θ A = map (λi. I.τ (pro_select Θ θ i) A) [0..<m]"
text ‹The following three theorems verify the correctness of the algorithm. The term @{term "τ"}
is a mathematical description of the sketch for a given subset, while @{term "single"},
@{term "merge"} are the actual functions that compute the sketches.›
theorem merge_result: "merge (ν ω A) (ν ω B) = ν ω (A ∪ B)" (is "?L = ?R")
proof -
have 0: "zip [0..<m] [0..<m] = map (λx. (x,x)) [0..<m]" for m
by (induction m, auto)
have "?L = map (λx. I.merge (I.τ (pro_select Θ ω x) A) (I.τ (pro_select Θ ω x) B)) [0..<m]"
unfolding ν_def
by (simp add:zip_map_map 0 comp_def case_prod_beta)
also have "... = map (λx. I.τ (pro_select Θ ω x) (A ∪ B)) [0..<m]"
by (intro map_cong refl I.merge_result expander_pro_range[OF Θ])
also have "... = ?R"
unfolding ν_def by simp
finally show ?thesis by simp
qed
theorem single_result: "single ω x = ν ω {x}" (is "?L = ?R")
proof -
have "?L = map (λj. I.single (pro_select Θ ω j) x) [0..<m]"
by (simp del:I.single.simps)
also have "... = ?R"
unfolding ν_def by (intro map_cong I.single_result expander_pro_range[OF Θ]) auto
finally show ?thesis by simp
qed
theorem estimate_result:
assumes "A ⊆ {..<n}" "A ≠ {}"
defines "p ≡ (pmf_of_set {..<pro_size Θ})"
shows "measure p {ω. ¦estimate (ν ω A)- real (card A)¦ > ε * real (card A)} ≤ δ" (is "?L ≤ ?R")
proof (cases "stage_two")
case True
define I where "I = {x. ¦x - real (card A)¦ ≤ ε * real (card A)}"
have int_I: "interval I"
unfolding interval_def I_def by auto
define μ where "μ = measure I.Ω {ω. I.estimate (I.τ ω A) ∉ I}"
have 0:"μ + α > 0"
unfolding μ_def
by (intro add_nonneg_pos α_gt_0) auto
have "μ ≤ δ⇩i"
unfolding μ_def I_def using I.estimate_result[OF assms(1,2)]
by (simp add: not_le del:I.estimate.simps)
also have "... = 1/ln n⇩0"
using True unfolding δ⇩i_def by simp
finally have "μ ≤ 1/ln n⇩0" by simp
hence "μ + α ≤ 1/ln n⇩0 + 1/ln n⇩0"
unfolding α_def using True by (intro add_mono) auto
also have "... = 2/ln n⇩0"
by simp
finally have 1:"μ + α ≤ 2 / ln n⇩0"
by simp
hence 2:"ln n⇩0 ≤ 2 / (μ + α)"
using 0 n_lbound by (simp add:field_simps)
have "μ + α ≤ 2/ln n⇩0"
by (intro 1)
also have "... ≤ 2/exp 5"
using n_lbound by (intro divide_left_mono) simp_all
also have "... ≤ 1/2"
by (approximation 5)
finally have 3:"μ + α ≤ 1/2" by simp
have 4: "2 * ln 2 + 8 * exp (- 1) ≤ (5::real)"
by (approximation 5)
have "?L = measure p {ω. median m (λi. I.estimate (ν ω A ! i)) ∉ I}"
unfolding I_def by (simp add:not_le)
also have "... ≤
measure p {θ. real (card {i ∈ {..<m}. I.estimate (I.τ (pro_select Θ θ i) A) ∉ I})≥ real m/2}"
proof (rule pmf_mono)
fix θ assume "θ ∈ set_pmf p"
assume a:"θ ∈ {ω. median m (λi. I.estimate (ν ω A ! i)) ∉ I}"
hence "real m ≤ real (2*card {i. i < m ∧ I.estimate (ν θ A ! i) ∉ I})"
by (intro of_nat_mono median_est_rev int_I) auto
also have "... = 2 * real (card {i∈{..<m}. I.estimate (ν θ A ! i) ∉ I})"
by simp
also have "... = 2 * real (card {i ∈ {..<m}. I.estimate (I.τ (pro_select Θ θ i) A) ∉ I})"
unfolding ν_def by (intro_cong "[σ⇩2 (*), σ⇩1 of_nat, σ⇩1 card]" more:restr_Collect_cong)
(simp del:I.estimate.simps)
finally have "real m ≤ 2 * real (card {i ∈ {..<m}. I.estimate (I.τ (pro_select Θ θ i) A) ∉ I})"
by simp
thus "θ ∈ {θ. real m / 2 ≤ real (card {i ∈ {..<m}. I.estimate (I.τ (pro_select Θ θ i) A) ∉ I})}"
by simp
qed
also have "...=measure Θ{θ. real(card {i ∈ {..<m}. I.estimate (I.τ (θ i) A) ∉ I})≥(1/2)*real m}"
unfolding sample_pro_alt p_def by (simp del:I.estimate.simps)
also have "... ≤ exp (-real m * ((1/2) * ln (1/ (μ + α)) - 2*exp (-1)))"
using 3 m_gt_0 α_gt_0 unfolding μ_def by (intro walk_tail_bound) force+
also have "... ≤ exp (-real m * ((1/2) * ln (ln n⇩0 / 2) - 2*exp (-1)))"
using 0 2 3 n_lbound
by (intro iffD2[OF exp_le_cancel_iff] mult_right_mono mult_left_mono_neg[where c="-real m"]
diff_mono mult_left_mono iffD2[OF ln_le_cancel_iff]) (simp_all)
also have "... = exp (-real m * (ln (ln n⇩0) / 2 - (ln 2/2 + 2*exp (-1))))"
using n_lbound by (subst ln_div) (simp_all add:algebra_simps)
also have "... ≤ exp (-real m * (ln (ln n⇩0) / 2 - (ln (ln (exp(exp 5))) / 4)))"
using 4
by (intro iffD2[OF exp_le_cancel_iff] mult_left_mono_neg[where c="-real m"] diff_mono) simp_all
also have "... ≤ exp (-real m * (ln (ln n⇩0) / 2 - (ln (ln n⇩0) / 4)))"
using n_lbound
by (intro iffD2[OF exp_le_cancel_iff] mult_left_mono_neg[where c="-real m"] diff_mono) simp_all
also have "... = exp (- real m * (ln (ln n⇩0)/ 4) )"
by (simp add:algebra_simps)
also have "... ≤ exp (- (4 * ln (1/ δ)/ln(ln n⇩0)) * (ln (ln n⇩0)/4))"
using m_lbound[OF True] n_lbound
by (intro iffD2[OF exp_le_cancel_iff] mult_right_mono divide_nonneg_pos) simp_all
also have "... = exp (- ln (1/ δ))"
using n_lbound by simp
also have "... = δ"
using δ_gt_0 by (subst ln_inverse[symmetric]) auto
finally show ?thesis by simp
next
case False
have m_eq: "m = 1"
unfolding m_def using False by simp
hence "?L = measure p {ω. ε * real (card A) < ¦I.estimate (ν ω A ! 0) - real (card A)¦}"
unfolding estimate.simps m_eq median_def by simp
also have "... = measure p {ω. ε*card A<¦I.estimate (I.τ (pro_select Θ ω 0) A)-real(card A)¦}"
unfolding ν_def m_eq by (simp del: I.estimate.simps)
also have "... = measure Θ {ω. ε*real(card A) < ¦I.estimate (I.τ (ω 0) A)-real(card A)¦}"
unfolding sample_pro_alt p_def by (simp del:I.estimate.simps)
also have "...=
measure (map_pmf (λθ. θ 0) Θ) {ω. ε*real(card A) < ¦I.estimate (I.τ ω A)-real(card A)¦}"
by simp
also have "... = measure I.Ω {ω. ε*real(card A) < ¦I.estimate (I.τ ω A)-real(card A)¦}"
using m_eq by (subst expander_uniform_property[OF Θ]) auto
also have "... ≤ δ⇩i"
by (intro I.estimate_result[OF assms(1,2)])
also have "... = ?R"
unfolding δ⇩i_def using False by simp
finally show ?thesis
by simp
qed
text ‹The function @{term "encode_state"} can represent states as bit strings.
This enables verification of the space usage.›
definition encode_state
where "encode_state = Lf⇩e I.encode_state m"
lemma encode_state: "is_encoding encode_state"
unfolding encode_state_def
by (intro fixed_list_encoding I.encode_state)
lemma state_bit_count:
"bit_count (encode_state (ν ω A)) ≤ state_space_usage (real n, ε, δ)"
(is "?L ≤ ?R")
proof -
have 0: "length (ν ω A) = m"
unfolding ν_def by simp
have "?L = (∑x←ν ω A. bit_count (I.encode_state x))"
using 0 unfolding encode_state_def fixed_list_bit_count by simp
also have "... = (∑x←[0..<m]. bit_count (I.encode_state (I.τ (pro_select Θ ω x) A)))"
unfolding ν_def by (simp add:comp_def)
also have "... ≤ (∑x←[0..<m]. ereal (2^36 *(ln (1/δ⇩i)+ 1)/ε⇧2 + log 2 (log 2 (real n) + 3)))"
using I.state_bit_count by (intro sum_list_mono I.state_bit_count expander_pro_range[OF Θ])
also have "... = ereal ( real m * (2^36 *(ln (1/δ⇩i)+ 1)/ε⇧2 + log 2 (log 2 (real n) + 3)))"
unfolding sum_list_triv_ereal by simp
also have "... ≤ 2^40 * (ln(1/δ)+1)/ ε^2 + log 2 (log 2 n + 3)" (is "?L1 ≤ ?R1")
proof (cases "stage_two")
case True
have "⌈4*ln (1/δ)/ln(ln n⇩0)⌉ ≤ 4*ln (1/δ)/ln(ln n⇩0) + 1"
by simp
also have "... ≤ 4*ln (1/δ)/ln(ln n⇩0) + ln (1/δ)/ln(ln n⇩0)"
using m_gt_0_aux[OF True] by (intro add_mono) auto
also have "... = 5 * ln (1/δ)/ln(ln n⇩0)" by simp
finally have 3: "⌈4*ln (1/δ)/ln(ln n⇩0)⌉ ≤ 5 * ln (1/δ)/ln(ln n⇩0)"
by simp
have 4: "0 ≤ log 2 (log 2 (real n) + 3)"
using n_gt_0
by (intro iffD2[OF zero_le_log_cancel_iff] add_nonneg_pos) auto
have 5: "1 / ln 2 + 3 / exp 5 ≤ exp (1::real)" "1.2 / ln 2 ≤ (2::real)"
by (approximation 5)+
have "log 2(log 2 (real n)+3) ≤ log 2 (log 2 n⇩0 + 3)"
using n_gt_0 by (intro iffD2[OF log_le_cancel_iff] add_mono add_nonneg_pos
iffD2[OF zero_le_log_cancel_iff]) (simp_all add:n⇩0_def)
also have "... = ln (ln n⇩0 / ln 2 + 3) / ln 2"
unfolding log_def by simp
also have "... ≤ ln (ln n⇩0/ln 2 + (3 / exp 5) * ln n⇩0) / ln 2"
using n_lbound by (intro divide_right_mono iffD2[OF ln_le_cancel_iff] add_mono add_nonneg_pos)
(simp_all add:divide_simps)
also have "... = ln ( ln n⇩0 * (1 /ln 2 + 3/exp 5)) / ln 2"
by (simp add:algebra_simps)
also have "... ≤ ln ( ln n⇩0 * exp 1) / ln 2"
using n_lbound by (intro divide_right_mono iffD2[OF ln_le_cancel_iff] add_mono
mult_left_mono 5 Rings.mult_pos_pos add_pos_nonneg) auto
also have "... = (ln (ln n⇩0) + 1) / ln 2"
using n_lbound by (subst ln_mult) simp_all
also have "... ≤ (ln (ln n⇩0) + 0.2 * ln (ln n⇩0)) / ln 2"
using n_lbound by (intro divide_right_mono add_mono) auto
also have "... = (1.2/ ln 2) * ln (ln n⇩0)"
by simp
also have "... ≤ 2 * ln (ln n⇩0)"
using n_lbound by (intro mult_right_mono 5) simp
finally have "log 2(log 2 (real n)+3) ≤ 2 * ln (ln n⇩0)"
by simp
hence 6: "log 2(log 2 (real n)+3)/ln(ln n⇩0) ≤ 2"
using n_lbound by (subst pos_divide_le_eq) simp_all
have "?L1 = real(nat ⌈4*ln (1/δ)/ln(ln n⇩0)⌉)*(2^36*(ln (ln n⇩0)+1)/ε^2+log 2(log 2 (real n)+3))"
using True unfolding m_def δ⇩i_def by simp
also have "... = ⌈4*ln (1/δ)/ln(ln n⇩0)⌉*(2^36*(ln (ln n⇩0)+1)/ε^2+log 2(log 2 (real n)+3))"
using m_gt_0_aux[OF True] by (subst of_nat_nat) simp_all
also have "... ≤ (5*ln (1/δ)/ln(ln n⇩0)) *(2^36*(ln (ln n⇩0)+1)/ε^2+log 2(log 2 (real n)+3))"
using n_lbound(3) ε_gt_0 4 by (intro ereal_mono mult_right_mono
add_nonneg_nonneg divide_nonneg_pos mult_nonneg_nonneg 3) simp_all
also have "... ≤ (5 * ln (1/δ)/ln(ln n⇩0))*((2^36+2^36)*ln (ln n⇩0)/ε^2+log 2(log 2 (real n)+3))"
using n_lbound δ_gt_0 δ_lt_1
by (intro ereal_mono mult_left_mono add_mono divide_right_mono divide_nonneg_pos) auto
also have "... = 5*(2^37)* ln (1/δ)/ ε^2 + (5*ln (1/δ)) * (log 2(log 2 (real n)+3)/ln(ln n⇩0))"
using n_lbound by (simp add:algebra_simps)
also have "... ≤ 5*(2^37)* ln (1/δ)/ ε^2 + (5*ln(1/ δ)) * 2"
using δ_gt_0 δ_lt_1 by (intro add_mono ereal_mono order.refl mult_left_mono 6) auto
also have "... = 5*(2^37)* ln (1/δ)/ ε^2 + 5*2*ln(1/ δ) / 1"
by simp
also have "... ≤ 5*(2^37)* ln (1/δ)/ ε^2 + 5*2*ln(1/ δ) / ε^2"
using ε_gt_0 ε_lt_1 δ_gt_0 δ_lt_1
by (intro add_mono ereal_mono divide_left_mono Rings.mult_pos_pos power_le_one) auto
also have "... = (5*(2^37+2))* (ln (1/δ)+0)/ ε^2 + 0"
by (simp add:algebra_simps)
also have "... ≤ 2^40 * (ln (1 / δ)+1) / ε^2 + log 2 (log 2 (real n) + 3)"
using ε_gt_0 ε_lt_1 δ_gt_0 δ_lt_1 n_gt_0 by (intro add_mono ereal_mono divide_right_mono
mult_right_mono iffD2[OF zero_le_log_cancel_iff] add_nonneg_pos) auto
finally show ?thesis by simp
next
case False
have "?L1 = 2^36 *(ln (1/δ)+ 1)/ε⇧2 + log 2 (log 2 (real n) + 3)"
using False unfolding δ⇩i_def m_def by simp
also have "... ≤ ?R1"
using ε_gt_0 ε_lt_1 δ_gt_0 δ_lt_1
by (intro ereal_mono add_mono divide_right_mono mult_right_mono add_nonneg_nonneg) auto
finally show ?thesis by simp
qed
finally show ?thesis
unfolding state_space_usage_def by simp
qed
text ‹Encoding function for the seeds which are just natural numbers smaller than
@{term "pro_size Θ"}.›
definition encode_seed
where "encode_seed = Nb⇩e (pro_size Θ)"
lemma encode_seed:
"is_encoding encode_seed"
unfolding encode_seed_def by (intro bounded_nat_encoding)
lemma random_bit_count:
assumes "ω < pro_size Θ"
shows "bit_count (encode_seed ω) ≤ seed_space_usage (real n, ε, δ)"
(is "?L ≤ ?R")
proof -
have 0: "pro_size Θ > 0" by (intro pro_size_gt_0)
have 1: "pro_size I.Ω > 0" by (intro pro_size_gt_0)
have "(55+60*ln (ln n⇩0))^3 ≤ (180+60*ln (ln n⇩0))^3"
using n_lbound by (intro power_mono add_mono) auto
also have "... = 180^3 * (1+ln (ln n⇩0)/real 3)^3"
unfolding power_mult_distrib[symmetric] by simp
also have "... ≤ 180^3 * exp (ln (ln n⇩0))"
using n_lbound by (intro mult_left_mono exp_ge_one_plus_x_over_n_power_n) auto
also have "... = 180^3 * ln n⇩0"
using n_lbound by (subst exp_ln) auto
also have "... ≤ 180^3 * max (ln n) (ln (exp (exp 5)))"
using n_gt_0 unfolding n⇩0_def by (subst ln_max_swap) auto
also have "... ≤ 180^3 * (ln n + exp 5)"
using n_gt_0 unfolding ln_exp by (intro mult_left_mono) auto
finally have 2:"(55+60*ln (ln n⇩0))^3 ≤ 180^3 * ln n + 180^3*exp 5"
by simp
have 3:"(1::real)+180^3*exp 5 ≤ 2^30" "(4::real)/ln 2 + 180^3 ≤ 2^23"
by (approximation 10)+
have "?L = ereal (real (floorlog 2 (pro_size Θ - 1)))"
using assms unfolding encode_seed_def bounded_nat_bit_count by simp
also have "... ≤ ereal (real (floorlog 2 (pro_size Θ)))"
by (intro ereal_mono Nat.of_nat_mono floorlog_mono) auto
also have "... = ereal (1 + of_int ⌊log 2 (real (pro_size Θ))⌋)"
using 0 unfolding floorlog_def by simp
also have "... ≤ ereal (1 + log 2 (real (pro_size Θ)))"
by (intro add_mono ereal_mono) auto
also have "... = 1 + log 2 (real (pro_size I.Ω) * (2^4) ^ ((m - 1) * nat ⌈ln α / ln 0.95⌉))"
unfolding expander_pro_size[OF Θ] by simp
also have "... = 1 + log 2 (real (pro_size I.Ω) * 2^ (4 * (m - 1) * nat ⌈ln α / ln 0.95⌉))"
unfolding power_mult by simp
also have "... = 1 + log 2 (real (pro_size I.Ω)) + (4*(m-1)* nat⌈ln α / ln 0.95⌉)"
using 1 by (subst log_mult) simp_all
also have "... ≤ 1+log 2(2 powr (4*log 2 n + 48 * (log 2 (1/ε)+16)⇧2+ (55+60*ln (1/δ⇩i))^3))+
(4*(m-1)* nat⌈ln α / ln 0.95⌉)"
using 1 by (intro ereal_mono add_mono iffD2[OF log_le_cancel_iff] I.random_bit_count) auto
also have "...=1+4*log 2 n+48*(log 2(1/ε)+16)⇧2+(55+60*ln (1/δ⇩i))^3+(4*(m-1)*nat⌈ln α/ln 0.95⌉)"
by (subst log_powr_cancel) auto
also have "... ≤ 2^30 + 2^23*ln n+48*(log 2(1/ε)+16)⇧2 + 336*ln (1/δ)" (is "?L1 ≤ ?R1")
proof (cases "stage_two")
case True
have "-1 < (0::real)" by simp
also have "... ≤ ln α / ln 0.95"
using α_gt_0 α_le_1 by (intro divide_nonpos_neg) auto
finally have 4: "- 1 < ln α / ln 0.95" by simp
have 5: "- 1 / ln 0.95 ≤ (20::real)"
by (approximation 10)
have "(4*(m-1)*nat⌈ln α/ln 0.95⌉) = 4 * (real m-1) * of_int ⌈ln α/ln 0.95⌉"
using 4 m_gt_0 unfolding of_nat_mult by (subst of_nat_nat) auto
also have "... ≤ 4 * (real m-1) * (ln α/ln 0.95 + 1)"
using m_gt_0 by (intro mult_left_mono) auto
also have "... = 4 * (real m-1) * (-ln (ln n⇩0)/ln 0.95 + 1)"
using n_lbound True unfolding α_def
by (subst ln_inverse[symmetric]) (simp_all add:inverse_eq_divide)
also have "... = 4 * (real m - 1 ) * (ln (ln n⇩0) * (-1/ln 0.95) + 1)"
by simp
also have "... ≤ 4 * (real m - 1 ) * (ln (ln n⇩0) * 20 + 1)"
using n_lbound m_gt_0 by (intro mult_left_mono add_mono 5) auto
also have "... = 4 * (real (nat ⌈4 * ln (1 / δ) / ln (ln n⇩0)⌉)-1) * (ln (ln n⇩0) * 20 + 1)"
using True unfolding m_def by simp
also have "... = 4 * (real_of_int ⌈4 * ln (1 / δ) / ln (ln n⇩0)⌉-1) * (ln (ln n⇩0) * 20 + 1)"
using m_gt_0_aux[OF True] by (subst of_nat_nat) simp_all
also have "... ≤ 4 * (4 * ln (1 / δ) / ln (ln n⇩0)) * (ln (ln n⇩0) * 20 + 1)"
using n_lbound by (intro mult_left_mono mult_right_mono) auto
also have "... ≤ 4 * (4 * ln (1 / δ) / ln (ln n⇩0)) * (ln (ln n⇩0) * 20 + ln (ln n⇩0))"
using δ_gt_0 δ_lt_1 n_lbound
by (intro mult_left_mono mult_right_mono add_mono divide_nonneg_pos Rings.mult_nonneg_nonneg)
simp_all
also have "... = 336 * ln (1 / δ)"
using n_lbound by simp
finally have 6: "4 * (m-1) * nat ⌈ln α/ln 0.95⌉ ≤ 336 * ln (1/δ)"
by simp
have "?L1 =1+4*log 2 n+48*(log 2(1/ε)+16)⇧2+(55+60*ln (ln n⇩0))^3+(4*(m-1)*nat⌈ln α/ln 0.95⌉)"
using True unfolding δ⇩i_def by simp
also have "... ≤ 1+4*log 2 n+48*(log 2(1/ε)+16)⇧2+(180^3 * ln n + 180^3*exp 5) + 336 * ln (1/δ)"
by (intro add_mono 6 2 ereal_mono order.refl)
also have "... = (1+180^3*exp 5)+ (4/ln 2 + 180^3)*ln n+48*(log 2(1/ε)+16)⇧2+ 336 * ln (1/δ)"
by (simp add:log_def algebra_simps)
also have "... ≤ 2^30 + 2^23*ln n+48*(log 2(1/ε)+16)⇧2+ 336 * ln (1/δ)"
using n_gt_0 by (intro add_mono ereal_mono 3 order.refl mult_right_mono) auto
finally show ?thesis by simp
next
case False
hence "1 / δ ≤ ln n⇩0"
using δ_gt_0 n_lbound
unfolding stage_two_def not_less by (simp add:divide_simps ac_simps)
hence 7: "ln (1 / δ) ≤ ln (ln n⇩0)"
using n_lbound δ_gt_0 δ_lt_1
by (intro iffD2[OF ln_le_cancel_iff]) auto
have 8: "0 ≤ 336*ln (1/δ) "
using δ_gt_0 δ_lt_1 by auto
have "?L1 = 1 + 4 * log 2 (real n) + 48 * (log 2 (1 / ε) + 16)⇧2 + (55 + 60 * ln (1 / δ)) ^ 3"
using False unfolding δ⇩i_def m_def by simp
also have "... ≤ 1 + 4 * log 2 (real n) + 48 * (log 2 (1 / ε) + 16)⇧2 + (55 + 60 * ln (ln n⇩0))^3"
using δ_gt_0 δ_lt_1
by (intro add_mono order.refl ereal_mono power_mono mult_left_mono add_nonneg_nonneg 7) auto
also have "... ≤ 1+4*log 2(real n)+48*(log 2 (1 / ε)+16)⇧2+(180^3*ln (real n) + 180 ^ 3 * exp 5)"
by (intro add_mono ereal_mono 2 order.refl)
also have "... = (1+180^3*exp 5)+ (4/ln 2 + 180^3)*ln n+48*(log 2(1/ε)+16)⇧2+ 0"
by (simp add:log_def algebra_simps)
also have "... ≤ 2^30 + 2^23*ln n+48*(log 2(1/ε)+16)⇧2 + 336*ln (1/δ)"
using n_gt_0 by (intro add_mono ereal_mono 3 order.refl mult_right_mono 8) auto
finally show ?thesis by simp
qed
also have "... = seed_space_usage (real n, ε, δ)"
unfolding seed_space_usage_def by simp
finally show ?thesis by simp
qed
text ‹The following is an alternative form expressing the correctness and space usage theorems.
If @{term "x"} is expression formed by @{term "single"} and @{term "merge"} operations. Then
@{term "x"} requires @{term "state_space_usage (real n, ε, δ)"} bits to encode and
@{term "estimate x"} approximates the count of the distinct universe elements in the expression.
For example:
@{term "estimate (merge (single ω 1) (merge (single ω 5) (single ω 1)))"} approximates the
cardinality of @{term "{1::nat, 5, 1}"} i.e. $2$.›
datatype sketch_tree = Single nat | Merge sketch_tree sketch_tree
fun eval :: "nat ⇒ sketch_tree ⇒ state"
where
"eval ω (Single x) = single ω x" |
"eval ω (Merge x y) = merge (eval ω x) (eval ω y)"
fun sketch_tree_set :: "sketch_tree ⇒ nat set"
where
"sketch_tree_set (Single x) = {x}" |
"sketch_tree_set (Merge x y) = sketch_tree_set x ∪ sketch_tree_set y"
theorem correctness:
fixes X
assumes "sketch_tree_set t ⊆ {..<n}"
defines "p ≡ pmf_of_set {..<pro_size Θ}"
defines "X ≡ real (card (sketch_tree_set t))"
shows "measure p {ω. ¦estimate (eval ω t) - X¦ > ε * X} ≤ δ" (is "?L ≤ ?R")
proof -
define A where "A = sketch_tree_set t"
have X_eq: "X = real (card A)"
unfolding X_def A_def by simp
have 0:"eval ω t = ν ω A" for ω
unfolding A_def using single_result merge_result
by (induction t) (auto simp del:merge.simps single.simps)
have 1: "A ⊆ {..<n}"
using assms(1) unfolding A_def by blast
have 2: "A ≠ {}"
unfolding A_def by (induction t) auto
show ?thesis
unfolding 0 X_eq p_def by (intro estimate_result 1 2)
qed
theorem space_usage:
assumes "ω < pro_size Θ"
shows
"bit_count (encode_state (eval ω t)) ≤ state_space_usage (real n, ε, δ)" (is "?A")
"bit_count (encode_seed ω) ≤ seed_space_usage (real n, ε, δ)" (is "?B")
proof-
define A where "A = sketch_tree_set t"
have 0:"eval ω t = ν ω A" for ω
unfolding A_def using single_result merge_result
by (induction t) (auto simp del:merge.simps single.simps)
show ?A
unfolding 0 by (intro state_bit_count)
show ?B
using random_bit_count[OF assms] by simp
qed
end
text ‹The functions @{term "state_space_usage"} and @{term "seed_space_usage"} are exact bounds
on the space usage for the state and the seed. The following establishes asymptotic bounds
with respect to the limit $n, \delta^{-1}, \varepsilon^{-1} \rightarrow \infty$.›
context
begin
text ‹Some local notation to ease proofs about the asymptotic space usage of the algorithm:›
private definition n_of :: "real × real × real ⇒ real" where "n_of = (λ(n, ε, δ). n)"
private definition δ_of :: "real × real × real ⇒ real" where "δ_of = (λ(n, ε, δ). δ)"
private definition ε_of :: "real × real × real ⇒ real" where "ε_of = (λ(n, ε, δ). ε)"
private abbreviation F :: "(real × real × real) filter"
where "F ≡ (at_top ×⇩F at_right 0 ×⇩F at_right 0)"
private lemma var_simps:
"n_of = fst"
"ε_of = (λx. fst (snd x))"
"δ_of = (λx. snd (snd x))"
unfolding n_of_def ε_of_def δ_of_def by (auto simp add:case_prod_beta)
private lemma evt_n: "eventually (λx. n_of x ≥ n) F"
unfolding var_simps by (intro eventually_prod1' eventually_prod2' eventually_ge_at_top)
(simp add:prod_filter_eq_bot)
private lemma evt_n_1: "∀⇩F x in F. 0 ≤ ln (n_of x)"
by (intro eventually_mono[OF evt_n[of "1"]] ln_ge_zero) simp
private lemma evt_n_2: "∀⇩F x in F. 0 ≤ ln (ln (n_of x))"
using order_less_le_trans[OF exp_gt_zero]
by (intro eventually_mono[OF evt_n[of "exp 1"]] ln_ge_zero iffD2[OF ln_ge_iff]) auto
private lemma evt_ε: "eventually (λx. 1/ε_of x ≥ ε ∧ ε_of x > 0) F"
unfolding var_simps by (intro eventually_prod1' eventually_prod2' eventually_conj
real_inv_at_right_0_inf eventually_at_right_less) (simp_all add:prod_filter_eq_bot)
private lemma evt_δ: "eventually (λx. 1/δ_of x ≥ δ ∧ δ_of x > 0) F"
unfolding var_simps by (intro eventually_prod1' eventually_prod2' eventually_conj
real_inv_at_right_0_inf eventually_at_right_less) (simp_all add:prod_filter_eq_bot)
private lemma evt_δ_1: "∀⇩F x in F. 0 ≤ ln (1 / δ_of x)"
by (intro eventually_mono[OF evt_δ[of "1"]] ln_ge_zero) simp
theorem asymptotic_state_space_complexity:
"state_space_usage ∈ O[F](λ(n, ε, δ). ln (1/δ)/ε^2 + ln (ln n))"
(is "_ ∈ O[?F](?rhs)")
proof -
have 0:"(λx. 1) ∈ O[?F](λx. ln (1 / δ_of x))"
using order_less_le_trans[OF exp_gt_zero]
by (intro landau_o.big_mono eventually_mono[OF evt_δ[of "exp 1"]])
(auto intro!: iffD2[OF ln_ge_iff] simp add:abs_ge_iff)
have 1:"(λx. 1) ∈ O[?F](λx. ln (n_of x))"
using order_less_le_trans[OF exp_gt_zero]
by (intro landau_o.big_mono eventually_mono[OF evt_n[of "exp 1"]])
(auto intro!:iffD2[OF ln_ge_iff] simp add:abs_ge_iff)
have "(λx. ((ln (1/δ_of x)+1)* (1/ε_of x)⇧2))∈ O[?F](λx. ln(1/δ_of x)* (1/ε_of x)⇧2)"
by (intro landau_o.mult sum_in_bigo 0) simp_all
hence 2: "(λx. 2^40*((ln (1/δ_of x)+1)* (1/ε_of x)⇧2))∈ O[?F](λx. ln(1/δ_of x)* (1/ε_of x)⇧2)"
unfolding cmult_in_bigo_iff by simp
have 3: "(1::real) ≤ exp 2"
by (approximation 5)
have "(λx. ln (n_of x) / ln 2 + 3) ∈ O[?F](λx. ln (n_of x))"
using 1 by (intro sum_in_bigo) simp_all
hence "(λx. ln (ln (n_of x) / ln 2 + 3)) ∈ O[?F](λx. ln (ln (n_of x)))"
using order_less_le_trans[OF exp_gt_zero] order_trans[OF 3]
by (intro landau_ln_2[where a="2"] eventually_mono[OF evt_n[of "exp 2"]])
(auto intro!:iffD2[OF ln_ge_iff] add_nonneg_nonneg divide_nonneg_pos)
hence 4: "(λx. log 2 (log 2 (n_of x) + 3))∈ O[?F](λx. ln(ln(n_of x)))"
unfolding log_def by simp
have 5: "∀⇩F x in ?F. 0 ≤ ln (1 / δ_of x) * (1 / ε_of x)⇧2"
by (intro eventually_mono[OF eventually_conj[OF evt_δ_1 evt_ε[of "1"]]]) auto
have "state_space_usage = (λx. state_space_usage (n_of x, ε_of x, δ_of x))"
by (simp add:case_prod_beta' n_of_def δ_of_def ε_of_def)
also have "... = (λx. 2 ^ 40 * ((ln (1 / (δ_of x)) + 1)* (1/ε_of x)⇧2) + log 2 (log 2 (n_of x)+3))"
unfolding state_space_usage_def by (simp add:divide_simps)
also have "... ∈ O[?F](λx. ln (1/δ_of x)* (1/ε_of x)⇧2 + ln (ln (n_of x)))"
by (intro landau_sum 2 4 5 evt_n_2)
also have "... = O[?F](?rhs)"
by (simp add:case_prod_beta' n_of_def δ_of_def ε_of_def divide_simps)
finally show ?thesis by simp
qed
theorem asymptotic_seed_space_complexity:
"seed_space_usage ∈ O[F](λ(n, ε, δ). ln (1/δ)+ln (1/ε)^2 + ln n)"
(is "_ ∈ O[?F](?rhs)")
proof -
have 0: "∀⇩F x in ?F. 0 ≤ (ln (1 / ε_of x))⇧2"
by simp
have 1: "∀⇩F x in ?F. 0 ≤ ln (1 / δ_of x) + (ln (1 / ε_of x))⇧2"
by (intro eventually_mono[OF eventually_conj[OF evt_δ_1 0]] add_nonneg_nonneg) auto
have 2: "(λx. 1) ∈ O[?F](λx. ln (1 / ε_of x))"
using order_less_le_trans[OF exp_gt_zero]
by (intro landau_o.big_mono eventually_mono[OF evt_ε[of "exp 1"]])
(auto intro!:iffD2[OF ln_ge_iff] simp add:abs_ge_iff)
have "(λx. 1) ∈ O[at_top ×⇩F at_right 0 ×⇩F at_right 0](λx. ln (n_of x))"
using order_less_le_trans[OF exp_gt_zero]
by (intro landau_o.big_mono eventually_mono[OF evt_n[of "exp 1"]])
(auto intro!:iffD2[OF ln_ge_iff] simp add:abs_ge_iff)
hence 3: "(λx. 1) ∈ O[?F](λx. ln (1 / δ_of x) + (ln (1 / ε_of x))⇧2 + ln (n_of x))"
by (intro landau_sum_2 1 evt_n_1 0 evt_δ_1) simp
have 4: "(λx. ln (n_of x)) ∈ O[?F](λx. ln (1 / δ_of x) + (ln (1 / ε_of x))⇧2 + ln (n_of x))"
by (intro landau_sum_2 1 evt_n_1) simp
have "(λx. log 2 (1 / ε_of x) + 16) ∈ O[?F](λx. ln (1 / ε_of x))"
using 2 unfolding log_def by (intro sum_in_bigo) simp_all
hence 5: "(λx. (log 2 (1 / ε_of x) + 16)⇧2) ∈ O[?F](λx. ln (1/δ_of x)+(ln (1/ε_of x))⇧2)"
using 0 unfolding power2_eq_square by (intro landau_sum_2 landau_o.mult evt_δ_1) simp_all
have 6: "(λx. (log 2 (1 / ε_of x) + 16)⇧2) ∈ O[?F](λx. ln (1/δ_of x)+(ln (1/ε_of x))⇧2+ln (n_of x))"
by (intro landau_sum_1[OF _ _ 5] 1 evt_n_1)
have 7: "(λx. ln (1/δ_of x)) ∈ O[?F](λx. ln (1/δ_of x)+(ln (1/ε_of x))⇧2+ln (n_of x))"
by (intro landau_sum_1 1 evt_δ_1 0 evt_n_1) simp
have "seed_space_usage = (λx. seed_space_usage (n_of x, ε_of x, δ_of x))"
by (simp add:case_prod_beta' n_of_def δ_of_def ε_of_def)
also have "... = (λx. 2^30+2^23*ln (n_of x)+48*(log 2 (1/(ε_of x))+16)⇧2 + 336 * ln (1 / δ_of x))"
unfolding seed_space_usage_def by (simp add:divide_simps)
also have "... ∈ O[?F](λx. ln (1/δ_of x)+ln (1/ε_of x)^2 + ln (n_of x))"
using 3 4 6 7 by (intro sum_in_bigo) simp_all
also have "... = O[?F](?rhs)"
by (simp add:case_prod_beta' n_of_def δ_of_def ε_of_def)
finally show ?thesis by simp
qed
definition "space_usage x = state_space_usage x + seed_space_usage x"
theorem asymptotic_space_complexity:
"space_usage ∈ O[at_top ×⇩F at_right 0 ×⇩F at_right 0](λ(n, ε, δ). ln (1/δ)/ε^2 + ln n)"
proof -
let ?f1 = "(λx. ln (1/δ_of x)*(1/ε_of x^2)+ln (ln (n_of x)))"
let ?f2 = "(λx. ln(1/δ_of x)+ln(1/ε_of x)^2+ln (n_of x))"
have 0: "∀⇩F x in F. 0 ≤ (1 / (ε_of x)⇧2)"
unfolding var_simps by (intro eventually_prod1' eventually_prod2' eventually_inv)
(simp_all add:prod_filter_eq_bot eventually_nonzero_simps)
have 1: "∀⇩F x in F. 0 ≤ ln (1 / δ_of x) * (1 / (ε_of x)⇧2)"
by (intro eventually_mono[OF eventually_conj[OF evt_δ_1 0]] mult_nonneg_nonneg) auto
have 2: "∀⇩F x in F. 0 ≤ ln (1 / δ_of x) * (1 / (ε_of x)⇧2) + ln (ln (n_of x))"
by (intro eventually_mono[OF eventually_conj[OF 1 evt_n_2]] add_nonneg_nonneg) auto
have 3: "∀⇩F x in F. 0 ≤ ln (1 / (ε_of x)⇧2)"
unfolding power_one_over[symmetric]
by (intro eventually_mono[OF evt_ε[of "1"]] ln_ge_zero) simp
have 4: "∀⇩F x in F. 0 ≤ ln (1 / δ_of x) + (ln (1 / ε_of x))⇧2 + ln (n_of x)"
by (intro eventually_mono[OF eventually_conj[OF evt_n_1 eventually_conj[OF evt_δ_1 3]]]
add_nonneg_nonneg) auto
have 5: "(λ_. 1) ∈ O[F](λx. 1 / (ε_of x)⇧2)"
unfolding var_simps by (intro bigo_prod_1 bigo_prod_2 bigo_inv)
(simp_all add:power_divide prod_filter_eq_bot)
have 6: "(λ_. 1) ∈ O[F](λx. ln (1 / δ_of x))"
unfolding var_simps
by (intro bigo_prod_1 bigo_prod_2 bigo_inv) (simp_all add:prod_filter_eq_bot)
have 7: "state_space_usage ∈ O[F](λx. ln (1 / δ_of x) * (1 / (ε_of x)⇧2) + ln (ln (n_of x)))"
using asymptotic_state_space_complexity unfolding δ_of_def ε_of_def n_of_def
by (simp add:case_prod_beta')
have 8: "seed_space_usage ∈ O[F](λx. ln (1 / δ_of x) + (ln (1 / ε_of x))⇧2 + ln (n_of x))"
using asymptotic_seed_space_complexity unfolding δ_of_def ε_of_def n_of_def
by (simp add:case_prod_beta')
have 9: "(λx. ln (n_of x)) ∈ O[F](λx. ln (1 / δ_of x) * (1 / (ε_of x)⇧2) + ln (n_of x))"
by (intro landau_sum_2 evt_n_1 1) simp
have "(λx. (ln (1 / ε_of x))⇧2) ∈ O[F](λx. 1 / ε_of x^2)"
unfolding var_simps
by (intro bigo_prod_1 bigo_prod_2 bigo_inv) (simp_all add:power_divide prod_filter_eq_bot)
hence 10: "(λx. (ln (1 / ε_of x))⇧2) ∈ O[F](λx. ln (1 / δ_of x) * (1 / ε_of x^2) + ln (n_of x))"
by (intro landau_sum_1 evt_n_1 1 landau_o.big_mult_1' 6)
have 11: "(λx. ln (1 / δ_of x)) ∈ O[F](λx. ln (1 / δ_of x) * (1 / ε_of x^2) + ln (n_of x))"
by (intro landau_sum_1 evt_n_1 1 landau_o.big_mult_1 5) simp
have 12: "(λx. ln (1/δ_of x) * (1/ε_of x^2)) ∈ O[F](λx. ln (1/δ_of x)*(1/ε_of x^2)+ln (n_of x))"
by (intro landau_sum_1 1 evt_n_1) simp
have "(λx. ln (ln (n_of x))) ∈ O[F](λx. ln (n_of x))"
unfolding var_simps by (intro bigo_prod_1 bigo_prod_2) (simp_all add:prod_filter_eq_bot)
hence 13: "(λx. ln (ln (n_of x))) ∈ O[F](λx. ln (1 / δ_of x) * (1 / ε_of x^2) + ln (n_of x))"
by (intro landau_sum_2 evt_n_1 1)
have "space_usage = (λx. state_space_usage x + seed_space_usage x)"
unfolding space_usage_def by simp
also have "... ∈ O[F](λx. ?f1 x + ?f2 x)"
by (intro landau_sum 2 4 7 8)
also have "... ⊆ O[F](λx. ln (1 / δ_of x) * (1/ε_of x^2) + ln (n_of x))"
by (intro landau_o.big.subsetI sum_in_bigo 9 10 11 12 13)
also have "... = O[F](λ(n, ε, δ). ln (1/δ)/ε^2 + ln n)"
unfolding δ_of_def ε_of_def n_of_def
by (simp add:case_prod_beta')
finally show ?thesis by simp
qed
end
unbundle no intro_cong_syntax
end