Theory Tutorial_Pseudorandom_Objects

section ‹Tutorial on the use of Pseudorandom-Objects›

theory Tutorial_Pseudorandom_Objects
  imports
    Universal_Hash_Families.Pseudorandom_Objects_Hash_Families
    Expander_Graphs.Pseudorandom_Objects_Expander_Walks
    Equivalence_Relation_Enumeration.Equivalence_Relation_Enumeration
    Median_Method.Median
    Concentration_Inequalities.Bienaymes_Identity
    Frequency_Moments.Frequency_Moments
begin

text ‹This section is a tutorial for the use of pseudorandom objects. Starting from the
approximation algorithm for the second frequency moment by Alon et al.~\cite{alon1999}, we will
improve the solution until we achieve a space complexity of
$\mathcal O(\ln n + \varepsilon^{-2} \ln(\delta^{-1}) \ln m)$, where $n$ denotes the range of the
stream elements, $m$ denotes the length of the stream, $\varepsilon$ denotes the desired accuracy
and $\delta$ denotes the desired failure probability.

The construction relies on a combination of pseudorandom object, in particular an expander walk
and two chained hash families.›

hide_const (open) topological_space_class.discrete
hide_const (open) Abstract_Rewriting.restrict
hide_fact (open) Abstract_Rewriting.restrict_def
hide_fact (open) Henstock_Kurzweil_Integration.integral_cong
hide_fact (open) Henstock_Kurzweil_Integration.integral_mult_right
hide_fact (open) Henstock_Kurzweil_Integration.integral_diff

text ‹The following lemmas show a one-side and two-sided Chernoff-bound for $\{0,1\}$-valued
independent identically distributed random variables. This to show the similarity with expander
walks, for which similar bounds can be established:
@{thm [source] expander_chernoff_bound_one_sided} and @{thm [source] expander_chernoff_bound}.›

lemma classic_chernoff_bound_one_sided:
  fixes l :: nat
  assumes "AE x in measure_pmf p. f x  {0,1::real}"
  assumes "(x. f x p)  μ" "l > 0" "γ  0"
  shows "measure (prod_pmf {0..<l} (λ_. p)) {w. (i<l. f (w i))/l-μγ}  exp (- 2 * real l * γ^2)"
    (is "?L  ?R")
proof -
  define ν  where "ν = real l*(x. f x p)"
  let ?p = "prod_pmf {0..<l} (λ_. p)"

  have 1: "prob_space.indep_vars (measure_pmf ?p) (λ_. borel) (λi x. f (x i)) {0..<l}"
    by (intro prob_space.indep_vars_compose2[OF _ indep_vars_Pi_pmf] prob_space_measure_pmf) auto

  have "f (y i)  {0..1}" if "y  {0..<l} E set_pmf p" "i  {0..<l}" for y i
  proof -
    have "y i  set_pmf p" using that by auto
    thus ?thesis using assms(1) unfolding AE_measure_pmf_iff by auto
  qed
  hence 2: "AE x in measure_pmf ?p. f (x i)  {0..1}"
    if "i  {0..<l}" for i
    using that by (intro AE_pmfI) (auto simp: set_prod_pmf)

  have "(i=0..<l. (x. f (x i) ?p)) = (i<l. (x. f x map_pmf (λx. x i) ?p))"
    by (auto simp:atLeast0LessThan)
  also have "... = (i<l. (x. f x p))" by (subst Pi_pmf_component) auto
  also have "... = ν" unfolding ν_def by simp
  finally have 3: "(i=0..<l. (x. f (x i) prod_pmf {0..<l} (λ_. p))) = ν" by simp

  have 4: "ν  real l * μ" unfolding ν_def using assms(2) by (simp add: mult_le_cancel_left)

  interpret Hoeffding_ineq "measure_pmf ?p" "{0..<l}" "λi x. f (x i)" "(λ_. 0)" "(λ_. 1)" ν
    using 1 2 unfolding 3 by unfold_locales auto

  have "?L  measure ?p {x. (i=0..<l. f (x i))  real l*μ + real l*γ}"
    using assms(3) by (intro pmf_mono) (auto simp:field_simps atLeast0LessThan)
  also have "...  measure ?p {x  space ?p. (i=0..<l. f (x i))  ν + real l*γ}"
    using 4 by (intro pmf_mono) auto
  also have "...  exp (- 2 * (real l * γ)^2 / (i=0..<l. (1 - 0)2))"
    using assms(3,4) by (intro Hoeffding_ineq_ge) auto
  also have "... = ?R" using assms(3) by (simp add:power2_eq_square)
  finally show ?thesis by simp
qed

lemma classic_chernoff_bound:
  assumes "AE x in measure_pmf p. f x  {0,1::real}" "l > (0::nat)" "γ  0"
  defines "μ  (x. f x p)"
  shows "measure (prod_pmf {0..<l} (λ_. p)) {w. ¦(i<l. f (w i))/l-μ¦γ}  2*exp (-2*real l*γ^2)"
    (is "?L  ?R")
proof -
  have [simp]:"integrable p f" using assms(1) unfolding AE_measure_pmf_iff
    by (intro integrable_bounded_pmf boundedI[where B="1"]) auto
  let ?w = "prod_pmf {0..<l} (λ_. p)"
  have "?L  measure ?w {w. (i<l. f (w i))/l-μγ} + measure ?w {w. (i<l. f (w i))/l-μ-(γ)}"
    by (intro pmf_add) auto
  also have "...  exp (-2*real l*γ^2) + measure ?w {w. -((i<l. f (w i))/l-μ)γ}"
    using assms by (intro add_mono classic_chernoff_bound_one_sided) (auto simp:algebra_simps)
  also have "...  exp (-2*real l*γ^2) + measure ?w {w. ((i<l. 1-f (w i))/l-(1-μ))γ}"
    using assms(2) by (auto simp: sum_subtractf field_simps)
  also have "...  exp (-2*real l*γ^2) + exp (-2*real l*γ^2)"
    using assms by (intro add_mono classic_chernoff_bound_one_sided) auto
  also have "... = ?R" by simp
  finally show ?thesis by simp
qed

text ‹Definition of the second frequency moment of a stream.›

definition F2 :: "'a list  real" where
  "F2 xs = ( x  set xs. (of_nat (count_list xs x)^2))"

lemma prime_power_ls: "is_prime_power (pro_size ( [- 1, 1]))"
proof -
  have "is_prime_power ((2::nat)^1)" by (intro is_prime_powerI) auto
  thus "is_prime_power (pro_size ( [- 1, 1]))" by (auto simp:list_pro_size numeral_eq_Suc)
qed

lemma prime_power_h2: "is_prime_power (pro_size ( 4 n ( [- 1, 1::real])))"
  by (intro hash_pro_size_prime_power prime_power_ls) auto

abbreviation Ψ where "Ψ  pmf_of_set {-1,1::real}"

lemma f2_exp:
  assumes "finite (set_pmf p)"
  assumes "I. I  {0..<n}  card I  4  map_pmf (λx. (λiI. x i)) p = prod_pmf I (λ_. Ψ)"
  assumes "set xs  {0..<n::nat}"
  shows "(h. (x  xs. h x)^2 p) = F2 xs" (is "?L = ?R")
proof -
  let ?c = "(λx. real (count_list xs x))"

  have [simp]: "integrable (measure_pmf p) f" for f :: "_  real"
    by (intro integrable_measure_pmf_finite assms)

  have 0:"(h. h x * h y p) = of_bool (x = y)"
    (is "?L1 = ?R1") if "x  set xs" "y  set xs" for x y
  proof -
    have xy_lt_n: "x < n" "y < n"  using assms that by auto
    have card_xy: "card {x,y}  4" by (cases "x = y") auto

    have "?L1 = (h. (h x * h y) map_pmf (λf. restrict f {x,y}) p)"
      by simp
    also have "... = (h. (h x * h y) prod_pmf {x,y} (λ_. Ψ))"
      using xy_lt_n card_xy by (intro integral_cong assms(2) arg_cong[where f="measure_pmf"]) auto
    also have "... = of_bool (x = y)" (is "?L2 = ?R2")
    proof (cases "x = y")
      case True
      hence "?L2 = (h. (h x ^2) prod_pmf {x} (λ_. pmf_of_set {-1,1}))"
        unfolding power2_eq_square by simp
      also have "... = (x. x^2 pmf_of_set {-1,1})"
        unfolding Pi_pmf_singleton by simp
      also have "... = 1" by (subst integral_pmf_of_set) auto
      also have "... = ?R2" using True by simp
      finally show ?thesis by simp
    next
      case False
      hence "?L2 = (h. (i{x,y}. h i) prod_pmf {x,y} (λ_. pmf_of_set {-1,1}))" by simp
      also have "... = (i{x,y}. (x. x pmf_of_set {-1,1}))"
        by (intro expectation_prod_Pi_pmf integrable_measure_pmf_finite) auto
      also have "... = 0" using False by (subst integral_pmf_of_set) auto
      also have "... = ?R2" using False by simp
      finally show ?thesis by simp
    qed
    finally show ?thesis by simp
  qed

  have "?L = (h. (x  set xs. real (count_list xs x) * h x)^2 p)"
    unfolding sum_list_eval by simp
  also have "... = (h. (x  set xs. (y  set xs. (?c x * ?c y) * h x * h y)) p)"
    unfolding power2_eq_square sum_distrib_left sum_distrib_right by (simp add:ac_simps)
  also have "... = (x  set xs. (y  set xs. (h. (?c x * ?c y) * h x * h y p)))" by simp
  also have "... = (x  set xs. (y  set xs. ?c x * ?c y * (h. h x * h y p)))"
    by (subst integral_mult_right[symmetric]) (simp_all add:ac_simps)
  also have "... = (x  set xs. (y  set xs. ?c x * ?c y * of_bool (x = y)))"
    by (intro sum.cong refl) (simp add: 0)
  also have "... = (x  set xs. ?c x^2) "
    unfolding of_bool_def by (simp add:if_distrib if_distribR sum.If_cases power2_eq_square)
  also have "... = F2 xs" unfolding F2_def by simp
  finally show ?thesis by simp
qed

lemma f2_exp_sq:
  assumes "finite (set_pmf p)"
  assumes "I. I  {0..<n}  card I  4  map_pmf (λx. (λiI. x i)) p = prod_pmf I (λ_. Ψ)"
  assumes "set xs  {0..<n::nat}"
  shows "(h. ((xxs. h x)^2)^2 p)  3 * F2 xs^2" (is "?L  ?R")
proof -
  let ?c = "(λx. real (count_list xs x))"

  have [simp]: "integrable (measure_pmf p) f" for f :: "_  real"
    by (intro integrable_measure_pmf_finite assms)

  define S where "S = set xs"

  have a: "finite S" unfolding S_def by simp

  define Q :: "nat  nat  nat  nat  real"
    where "Q a b c d =
      of_bool(a=bc=dac) + of_bool(a=cb=dab) +
      of_bool(a=db=cab) + of_bool(a=bb=cc=d)" for a b c d

  have cases: "(h. h a*h b*h c*h d p) = Q a b c d" (is "?L1 = ?R1")
    if "a  S" "bS" "c  S" "d  S" for a b c d
  proof -
    have "card {a,b,c,d} = card (set [a,b,c,d])" by (intro arg_cong[where f="card"]) auto
    also have "...  length [a,b,c,d]" by (intro card_length)
    finally have card: "card {a, b, c, d}  4" by simp

    have "?L1 = (h. h a*h b*h c*h d map_pmf (λf. restrict f {a,b,c,d}) p)" by simp
    also have "... = (h. h a*h b*h c*h d prod_pmf {a,b,c,d} (λ_. Ψ))" using that assms(3)
      by (intro integral_cong arg_cong[where f="measure_pmf"] assms(2) card) (auto simp:S_def)
    also have "... = (h. (i [a,b,c,d]. h i) prod_pmf {a,b,c,d} (λ_. Ψ))" by (simp add:ac_simps)
    also have "... = (h. (i {a,b,c,d}. h i^count_list [a,b,c,d] i) prod_pmf {a,b,c,d} (λ_. Ψ))"
      by (subst prod_list_eval) auto
    also have "... = (i  {a,b,c,d}. (x. x^count_list [a,b,c,d] i Ψ))"
      by (intro expectation_prod_Pi_pmf integrable_measure_pmf_finite) auto
    also have "... = (i  {a,b,c,d}. of_bool (even (count_list [a,b,c,d] i)))"
      by (intro prod.cong refl) (auto simp:integral_pmf_of_set)
    also have "... = (i  set (remdups [a,b,c,d]). of_bool (even (count_list [a,b,c,d] i)))"
      by (intro prod.cong refl) auto
    also have "... = (i  remdups [a,b,c,d]. of_bool (even (count_list [a,b,c,d] i)))"
      by (intro prod.distinct_set_conv_list) auto
    also have "... = Q a b c d" unfolding Q_def by simp
    finally show ?thesis by simp
  qed

  have "?L = (h. (x  S. real (count_list xs x) * h x)^4 p)"
    unfolding S_def sum_list_eval by simp
  also have "... = (h. (aS. (bS.(cS.(dS.(?c a*?c b*?c c*?c d)*h a*h b*h c*h d)))) p)"
    unfolding power4_eq_xxxx sum_distrib_left sum_distrib_right by (simp add:ac_simps)
  also have "... = (aS.(bS.(cS.(dS.(h. (?c a*?c b*?c c*?c d)*h a*h b*h c*h d p)))))"
    by simp
  also have "... = (aS.(bS.(cS.(dS. (?c a*?c b*?c c*?c d)*(h. h a*h b*h c*h d p)))))"
    by (subst integral_mult_right[symmetric]) (simp_all add:ac_simps)
  also have "... = (aS.(bS.(cS.(dS. (?c a*?c b*?c c*?c d)*(Q a b c d)))))"
    by (intro sum.cong refl) (simp add:cases)
  also have "... = 1*(aS. ?c a^4) + 3*(aS. (bS. ?c a^2 * ?c b^2 * of_bool(ab)))"
    unfolding Q_def
    by (simp add: sum.distrib distrib_left sum_collapse[OF a] ac_simps sum_distrib_left[symmetric]
        power2_eq_square power4_eq_xxxx)
  also have "...  3*(aS. ?c a^4) + 3*(aS. (bS. ?c a^2 * ?c b^2 * of_bool(ab)))"
    by (intro add_mono mult_right_mono sum_nonneg) auto
  also have "... = 3*(aS. (bS. ?c a^2 * ?c b^2 * (of_bool (a=b) + of_bool(ab))))"
    using a by (simp add: sum.distrib distrib_left)
  also have "... = 3*(aS. (bS. ?c a^2 * ?c b^2 * 1))"
    by (intro sum.cong arg_cong2[where f="(*)"] refl) auto
  also have "... = 3 * F2 xs^2" unfolding F2_def power2_eq_square
    by (simp add: S_def sum_distrib_left sum_distrib_right ac_simps)
  finally show "?L  3 * F2 xs^2" by simp
qed

lemma f2_var:
  assumes "finite (set_pmf p)"
  assumes "I. I  {0..<n}  card I  4  map_pmf (λx. (λiI. x i)) p = prod_pmf I (λ_. Ψ)"
  assumes "set xs  {0..<n::nat}"
  shows "measure_pmf.variance p (λh. (xxs. h x)^2)  2* F2 xs^2"
    (is "?L  ?R")
proof -
  have [simp]: "integrable (measure_pmf p) f" for f :: "_  real"
    by (intro integrable_measure_pmf_finite assms)

  have "?L = (h. ((xxs. h x)^2)^2 p) - F2 xs^2"
    by (subst measure_pmf.variance_eq) (simp_all add:f2_exp[OF assms(1-3)])
  also have "...  3 * F2 xs^2 - F2 xs^2"
    by (intro diff_mono f2_exp_sq[OF assms]) auto
  finally show ?thesis by simp
qed

lemma
  assumes "s  set_pmf (P 4 n ( [-1,1]))"
  assumes "set xs  {0..<n}"
  shows f2_exp_hp: "(h. (x  xs. h x)^2 sample_pro s) = F2 xs" (is "?T1")
    and f2_exp_sq_hp: "(h. ((x  xs. h x)^2)^2 sample_pro s)  3* F2 xs^2" (is "?T2")
    and f2_var_hp: "measure_pmf.variance s (λh. (x  xs. h x)^2)  2* F2 xs^2"  (is "?T3")
proof -
  have 0:"map_pmf (λx. restrict x I) (sample_pro s) = prod_pmf I (λ_. Ψ)" (is "?L = _")
    if "I  {0..<n}" "card I  4" for I
  proof -
    have "?L = prod_pmf I (λ_. sample_pro ( [- 1, 1]))"
      using that by (intro hash_pro_pmf_distr[OF _ assms(1)] prime_power_ls) auto
    also have "... = prod_pmf I (λ_. Ψ)" by (subst list_pro_2) auto
    finally show ?thesis by simp
  qed

  show ?T1 by (intro f2_exp[OF _ _ assms(2)] finite_pro_set 0) simp
  show ?T2 by (intro f2_exp_sq[OF _ _ assms(2)] finite_pro_set 0) simp
  show ?T3 by (intro f2_var[OF _ _ assms(2)] finite_pro_set 0) simp
qed

lemmas f2_exp_h = f2_exp_hp[OF hash_pro_in_hash_pro_pmf[OF prime_power_ls]]
lemmas f2_var_h = f2_var_hp[OF hash_pro_in_hash_pro_pmf[OF prime_power_ls]]

lemma F2_definite:
  assumes "xs  []"
  shows "F2 xs > 0"
proof -
  have "0 < real (card (set xs))" using assms by (simp add: card_gt_0_iff)
  also have "... = (x  set xs. 1)" by simp
  also have "...  F2 xs" using count_list_gr_1 unfolding F2_def by (intro sum_mono) force
  finally show ?thesis by simp
qed

text ‹The following algorithm uses a completely random function, accordingly it requires
a lot of space: $\mathcal O(n + \ln m)$.›

fun example_1 :: "nat  nat list  real pmf"
  where "example_1 n xs =
    do {
      h  prod_pmf {0..<n} (λ_. pmf_of_set {-1,1::real});
      return_pmf ((x  xs. h x)^2)
    }"

lemma example_1_correct:
  assumes "set xs  {0..<n}"
  shows
    "measure_pmf.expectation (example_1 n xs) id = F2 xs" (is "?L1 = ?R1")
    "measure_pmf.variance (example_1 n xs) id  2 * F2 xs^2" (is "?L2  ?R2")
proof -
  have "?L1 = (h. (x  xs. h x)^2 prod_pmf {0..<n} (λ_. Ψ))"
    by (simp add:map_pmf_def[symmetric])
  also have "... = ?R1" using assms by (intro f2_exp)
      (auto intro: Pi_pmf_subset[symmetric] simp add:restrict_def set_Pi_pmf)
  finally show "?L1 = ?R1" by simp

  have "?L2 = measure_pmf.variance (prod_pmf {0..<n} (λ_. Ψ)) (λh. (x  xs. h x)^2)"
    by (simp add:map_pmf_def[symmetric] atLeast0LessThan)
  also have "...  ?R2"
    using assms by (intro f2_var)
      (auto intro: Pi_pmf_subset[symmetric] simp add:restrict_def set_Pi_pmf)
  finally show "?L2  ?R2" by simp
qed

text ‹This version replaces a the use of completely random function with a pseudorandom object,
it requires a lot less space: $\mathcal O(\ln n + \ln m)$.›

fun example_2 :: "nat  nat list  real pmf"
  where "example_2 n xs =
    do {
      h  sample_pro ( 4 n ( [-1,1]));
      return_pmf ((x  xs. h x)^2)
    }"

lemma example_2_correct:
  assumes "set xs  {0..<n}"
  shows
    "measure_pmf.expectation (example_2 n xs) id = F2 xs" (is "?L1 = ?R1")
    "measure_pmf.variance (example_2 n xs) id  2 * F2 xs^2" (is "?L2  ?R2")
proof -
  have "?L1 = (h. (x  xs. h x)^2 sample_pro ( 4 n ( [-1,1])))"
    by (simp add:map_pmf_def[symmetric])
  also have "... = ?R1"
    using assms by (intro f2_exp_h) auto
  finally show "?L1 = ?R1" by simp

  have "?L2 = measure_pmf.variance (sample_pro ( 4 n ( [-1,1]))) (λh. (x  xs. h x)^2)"
    by (simp add:map_pmf_def[symmetric])
  also have "...  ?R2"
    using assms by (intro f2_var_h) auto
  finally show "?L2  ?R2" by simp
qed

text ‹The following version replaces the deterministic construction of the pseudorandom object
with a randomized one. This algorithm is much faster, but the correctness proof is more difficult.›

fun example_3 :: "nat  nat list  real pmf"
  where "example_3 n xs =
    do {
      h  sample_pro =<< P 4 n ( [-1,1]);
      return_pmf ((x  xs. h x)^2)
    }"

lemma
  assumes "set xs  {0..<n}"
  shows
    "measure_pmf.expectation (example_3 n xs) id = F2 xs" (is "?L1 = ?R1")
    "measure_pmf.variance (example_3 n xs) id  2 * F2 xs^2" (is "?L2  ?R2")
proof -
  let ?p = "P 4 n ( [-1,1::real])"
  let ?q = "bind_pmf ?p sample_pro"

  have "¦h x¦  1" if that1: "M  set_pmf ?p" "h  pro_set M" "x  set xs" for h M x
  proof -
    obtain i where 1:"h = pro_select M i"
      using that1(2) unfolding set_sample_pro[of "M"] by auto
    have "h x  pro_set ( [-1,1::real])"
      unfolding 1 using that(1) by (intro hash_pro_pmf_range[OF prime_power_ls])  auto
    thus ?thesis by (auto simp: list_pro_set)
  qed

  hence 0: "bounded ((λxa. xa x) ` set_pmf ?q)" if "x  set xs" for x
    using that by (intro boundedI[where B="1"]) auto

  have "(h. (x  xs. h x)^2 ?q) = (s. (h. (x  xs. h x)^2 sample_pro s) ?p)"
    by (intro integral_bind_pmf bounded_pow bounded_sum_list 0)
  also have "... = (s. F2 xs ?p)"
    by (intro integral_cong_AE AE_pmfI f2_exp_hp[OF _ assms]) simp_all
  also have "... = ?R1" by simp
  finally have a:"(h. (x  xs. h x)^2 ?q) = ?R1" by simp
  thus "?L1 = ?R1" by (simp add:map_pmf_def[symmetric])

  have "?L2 = measure_pmf.variance ?q (λh. (x  xs. h x)^2)"
    by (simp add:map_pmf_def[symmetric])
  also have "... = (h. ((x  xs. h x)^2)^2 ?q) -  (h. (x  xs. h x)^2 ?q)^2"
    by (intro measure_pmf.variance_eq integrable_bounded_pmf bounded_pow bounded_sum_list 0)
  also have "... = (s. (h. ((x  xs. h x)^2)^2 sample_pro s) ?p)- (F2 xs)2"
    unfolding a
    by (intro arg_cong2[where f="(-)"] integral_bind_pmf refl bounded_pow bounded_sum_list 0)
  also have "...  (s. 3*F2 xs^2  ?p) - (F2 xs)^2"
    by (intro diff_mono integral_mono_AE' AE_pmfI f2_exp_sq_hp[OF _ assms]) simp_all
  also have "... = ?R2" by simp
  finally show "?L2  ?R2" by simp
qed

context
  fixes ε δ :: real
  assumes ε_gt_0: "ε > 0"
  assumes δ_range: "δ  {0<..<1}"
begin

text ‹By using the mean of many independent parallel estimates the following algorithm
achieves a relative accuracy of $\varepsilon$, with probability $\frac{3}{4}$.
It requires $\mathcal O(\varepsilon^{-2} (\ln n + \ln m))$ bits of space.›

fun example_4 :: "nat  nat list  real pmf"
  where "example_4 n xs =
    do {
      let s = nat 8 / ε^2;
      h  prod_pmf {0..<s} (λ_. sample_pro ( 4 n ( [-1,1])));
      return_pmf ((j < s. (x  xs. h j x)^2)/s)
    }"

lemma example_4_correct_aux:
  assumes "set xs  {0..<n}"
  defines "s  nat 8 / ε^2"
  defines "R  (λh :: nat  nat  real. (j<s. (xxs. h j x)^2)/real s)"
  assumes fin: "finite (set_pmf p)"
  assumes indep: "prob_space.k_wise_indep_vars (measure_pmf p) 2 (λ_. discrete) (λi x. x i) {..<s}"
  assumes comp: "i. i < s  map_pmf (λx. x i) p = sample_pro ( 4 n ( [-1,1]))"
  shows "measure p {h. ¦R h - F2 xs¦ > ε * F2 xs}  1/4" (is "?L  ?R")
proof (cases "xs = []")
  case True thus ?thesis by (simp add:R_def F2_def)
next
  case False
  note f2_gt_0 = F2_definite[OF False]
  let ?p = "sample_pro ( 4 n ( [-1,1::real]))"

  have [simp]: "integrable (measure_pmf p) f" for f :: "_  real"
    by (intro integrable_measure_pmf_finite fin)

  have "8 / ε2 > 0" using ε_gt_0 by (intro divide_pos_pos) auto
  hence 0:"8 / ε2 > 0" by simp
  hence 1: "s > 0" unfolding s_def by simp

  have "(h. R h p) = (j<s. (h. (xxs. h j x)^2 p))/real s" unfolding R_def by simp
  also have "... = (j<s. (h. (xxs. h x)^2 (map_pmf(λh. h j)p)))/real s" by simp
  also have "... = (j<s. (h. (xxs. h x)^2 ?p))/real s"
    by (intro sum.cong arg_cong2[where f="(/)"] refl) (simp add: comp)
  also have "... = F2 xs" using 1 unfolding f2_exp_h[OF assms(1)] by simp
  finally have exp_R: "(h. R h p) = F2 xs" by simp

  have "measure_pmf.variance p R = measure_pmf.variance p (λh. (j<s. (xxs. h j x)^2))/s^2"
    unfolding R_def by (subst measure_pmf.variance_divide) simp_all
  also have "... = (j<s. measure_pmf.variance p (λh. (xxs. h j x)^2))/real s^2"
    by (intro arg_cong2[where f="(/)"] refl measure_pmf.bienaymes_identity_pairwise_indep_2
        prob_space.indep_vars_compose2[OF _ prob_space.k_wise_indep_vars_subset[OF _ indep]]
        prob_space_measure_pmf) (auto intro:finite_subset)
  also have "... = (j<s. measure_pmf.variance(map_pmf(λh. h j)p)(λh. (xxs. h x)^2))/real s^2"
    by simp
  also have "... = (j<s. measure_pmf.variance ?p (λh. (xxs. h x)^2))/ real s^2"
    by (intro sum.cong arg_cong2[where f="(/)"] refl) (simp add: comp)
  also have "...  (j<s. 2 * F2 xs^2)/real s^2"
    by (intro divide_right_mono sum_mono f2_var_h[OF assms(1)]) simp
  also have "... = 2 * F2 xs^2/real s" by (simp add:power2_eq_square divide_simps)
  also have "... = 2 * F2 xs^2/ 8/ε^2"
    using less_imp_le[OF 0] unfolding s_def by (subst of_nat_nat) auto
  also have "...  2 * F2 xs^2 / (8/ ε^2)"
    using ε_gt_0 by (intro divide_left_mono mult_pos_pos) simp_all
  also have "... = ε^2 * F2 xs^2/4" by simp
  finally have var_R: "measure_pmf.variance p R  ε^2 * F2 xs^2/4" by simp

  have "(h. R h p) = (j<s. (h. (xxs. h j x)^2 p))/real s" unfolding R_def by simp
  also have "... = (j<s. (h. (xxs. h x)^2 (map_pmf(λh. h j)p)))/real s" by simp
  also have "... = (j<s. (h. (xxs. h x)^2 ?p))/real s"
    by (intro sum.cong arg_cong2[where f="(/)"] refl) (simp add:comp)
  also have "... = F2 xs" using 1 unfolding f2_exp_h[OF assms(1)] by simp
  finally have exp_R: "(h. R h p) = F2 xs" by simp

  have "?L  measure p {h. ¦R h - F2 xs¦  ε * F2 xs}" by (intro pmf_mono) auto
  also have "...  𝒫(h in p. ¦R h - (h. R h p)¦  ε * F2 xs)" unfolding exp_R by simp
  also have "...  measure_pmf.variance p R / (ε * F2 xs)^2"
    using f2_gt_0 ε_gt_0 by (intro measure_pmf.Chebyshev_inequality) simp_all
  also have "...  (ε^2 * F2 xs^2/4) /  (ε * F2 xs)^2"
    by (intro divide_right_mono var_R) simp
  also have "... = 1/4" using ε_gt_0 f2_gt_0 by (simp add:divide_simps)
  finally show ?thesis by simp
qed

lemma example_4_correct:
  assumes "set xs  {0..<n}"
  shows "𝒫(ω in example_4 n xs. ¦ω - F2 xs¦ > ε * F2 xs)  1/4" (is "?L  ?R")
proof -
  define s :: nat where "s = nat 8 / ε^2"
  define R where "R h = (j<s. (xxs. h j x)^2)/s" for h :: "nat  nat  real"

  let ?p = "sample_pro ( 4 n ( [-1,1::real]))"
  let ?q = "prod_pmf {..<s} (λ_. ?p)"

  have "?L = (h. indicator {h. ¦R h - F2 xs¦ > ε * F2 xs} h  ?q)"
    by (simp add:Let_def measure_bind_pmf R_def s_def indicator_def atLeast0LessThan)
  also have "... = measure ?q {h. ¦R h - F2 xs¦ > ε * F2 xs}" by simp
  also have "...  ?R" unfolding R_def s_def
    by (intro example_4_correct_aux[OF assms] prob_space.k_wise_indep_vars_triv
        prob_space_measure_pmf indep_vars_Pi_pmf)
     (auto intro: finite_pro_set simp add:Pi_pmf_component set_Pi_pmf)
  finally show ?thesis by simp
qed

text ‹Instead of independent samples, we can choose the seeds using a second
pair-wise independent pseudorandom object. This algorithm requires only
$\mathcal O(\ln n + \varepsilon^{-2} \ln m)$ bits of space.›

fun example_5 :: "nat  nat list  real pmf"
  where "example_5 n xs =
    do {
      let s = nat 8 / ε^2;
      h  sample_pro ( 2 s ( 4 n ( [-1,1])));
      return_pmf ((j < s. (x  xs. h j x)^2)/s)
    }"

lemma example_5_correct_aux:
  assumes "set xs  {0..<n}"
  defines "s  nat 8 / ε^2"
  defines "R  (λh :: nat  nat  real. (j<s. (xxs. h j x)^2)/real s)"
  shows "measure (sample_pro ( 2 s ( 4 n ( [-1,1])))) {h. ¦R h - F2 xs¦ > ε * F2 xs}  1/4"
proof -
  let ?p = "sample_pro ( 2 s ( 4 n ( [-1,1::real])))"

  have "prob_space.k_wise_indep_vars ?p 2 (λ_. discrete) (λi x. x i) {..<s}"
    using hash_pro_indep[OF prime_power_h2]
    by (simp add: prob_space.k_wise_indep_vars_def[OF prob_space_measure_pmf])

  thus ?thesis unfolding R_def s_def
    by (intro example_4_correct_aux[OF assms(1)] finite_pro_set)
      (simp_all add:hash_pro_component[OF prime_power_h2])
qed

lemma example_5_correct:
  assumes "set xs  {0..<n}"
  shows "𝒫(ω in example_5 n xs. ¦ω - F2 xs¦ > ε * F2 xs)  1/4" (is "?L  ?R")
proof -
  define s :: nat where "s = nat 8 / ε^2"
  define R where "R h = (j<s. (xxs. h j x)^2)/s" for h :: "nat  nat  real"

  let ?p = "sample_pro ( 2 s ( 4 n ( [-1,1::real])))"

  have "?L = (h. indicator {h. ¦R h - F2 xs¦ > ε * F2 xs} h  ?p)"
    by (simp add:Let_def measure_bind_pmf R_def s_def indicator_def)
  also have "... = measure ?p {h. ¦R h - F2 xs¦ > ε * F2 xs}" by simp
  also have "...  ?R" unfolding R_def s_def by (intro example_5_correct_aux[OF assms])
  finally show ?thesis by simp
qed

text ‹The following algorithm improves on the previous one, by achieving a success probability
of $\delta$. This works by taking the median of $\mathcal O(\ln(\delta^{-1}))$ parallel independent
samples. It requires $\mathcal O(\ln(\delta^{-1}) (\ln n + \varepsilon^{-2} \ln m))$ bits of space.›

fun example_6 :: "nat  nat list  real pmf"
  where "example_6 n xs =
    do {
      let s = nat 8 / ε^2; let t = nat 8 * ln (1/δ);
      h  prod_pmf {0..<t} (λ_. sample_pro ( 2 s ( 4 n ( [-1,1]))));
      return_pmf (median t (λi. ((j < s. (x  xs. h i j x)^2)/ s)))
    }"

lemma example_6_correct:
  assumes "set xs  {0..<n}"
  shows "𝒫(ω in example_6 n xs. ¦ω - F2 xs¦ > ε * F2 xs)  δ" (is "?L  ?R")
proof -
  define s where "s = nat 8 / ε^2"
  define t where "t = nat 8 * ln(1/δ)"
  define R where "R h = (j<s. (xxs. h j x)^2)/s" for h :: "nat  nat  real"
  define I where "I = {w. ¦w - F2 xs¦  ε*F2 xs}"

  have "8 * ln (1 / δ) > 0" using δ_range by (intro mult_pos_pos ln_gt_zero) auto
  hence t_gt_0: "t > 0" unfolding t_def by simp
  have int_I: "interval I" unfolding interval_def I_def by auto

  let ?p = "sample_pro ( 2 s ( 4 n ( [-1,1::real])))"
  let ?q = "prod_pmf {0..<t} (λ_. ?p)"

  have "(h. (of_bool (R h  I)::real) ?p) = (h. indicator {h. R h  I} h ?p)"
    unfolding of_bool_def indicator_def by simp
  also have "... = measure ?p {h. R h  I}" by simp
  also have "...  1/4"
    using example_5_correct_aux[OF assms] unfolding R_def s_def I_def by (simp add:not_le)
  finally have 0: "(h. (of_bool (R h  I)::real) ?p)  1/4" by simp

  have "?L =  (h. indicator {h. ¦median t (λi. R (h i)) - F2 xs¦ > ε * F2 xs} h  ?q)"
    by (simp add:Let_def measure_bind_pmf R_def s_def indicator_def t_def)
  also have "... = measure ?q {h. median t (λi. R (h i))  I}"
    unfolding I_def by (simp add:not_le)
  also have "...  measure ?q {h. t  2 * card {k. k < t  R (h k)  I}}"
    using median_est_rev[OF int_I] by (intro pmf_mono) auto
  also have "... = measure ?q {h. (k<t. of_bool(R (h k)  I))/real t - 1/4  (1/4)}"
    using t_gt_0 by (intro arg_cong2[where f="measure"]) (auto simp:Int_def divide_simps)
  also have "...  exp ( - 2 * real t * (1/4)^2)"
    by (intro classic_chernoff_bound_one_sided t_gt_0 AE_pmfI 0) auto
  also have "... = exp (- (real t / 8))" using t_gt_0 by (simp add:power2_eq_square)
  also have "...  exp (- of_int 8 * ln (1 / δ) / 8)" unfolding t_def
    by (intro iffD2[OF exp_le_cancel_iff] divide_right_mono iffD2[OF neg_le_iff_le]) auto
  also have "...  exp (- (8 * ln (1 / δ)) / 8)"
    by (intro iffD2[OF exp_le_cancel_iff] divide_right_mono iffD2[OF neg_le_iff_le]) auto
  also have "... = exp (- ln (1 / δ))" by simp
  also have "... = δ" using δ_range by (subst ln_div) auto
  finally show ?thesis by simp
qed

text ‹The following algorithm uses an expander random walk, instead of independent samples.
It requires only $\mathcal O(\ln n + \ln(\delta^{-1}) \varepsilon^{-2} \ln m)$ bits of space.›

fun example_7 :: "nat  nat list  real pmf"
  where "example_7 n xs =
    do {
      let s = nat 8 / ε^2; let t = nat 32 * ln (1/δ);
      h  sample_pro ( t (1/8) ( 2 s ( 4 n ( [-1,1]))));
      return_pmf (median t (λi. ((j < s. (x  xs. h i j x)^2)/ s)))
    }"

lemma example_7_correct:
  assumes "set xs  {0..<n}"
  shows "𝒫(ω in example_7 n xs. ¦ω - F2 xs¦ > ε * F2 xs)  δ" (is "?L  ?R")
proof -
  define s t where s_def: "s = nat 8 / ε^2" and t_def: "t = nat 32 * ln(1/δ)"
  define R where "R h = (j<s. (xxs. h j x)^2)/s" for h :: "nat  nat  real"
  define I where "I = {w. ¦w - F2 xs¦  ε*F2 xs}"

  have "8 * ln (1 / δ) > 0" using δ_range by (intro mult_pos_pos ln_gt_zero) auto
  hence t_gt_0: "t > 0" unfolding t_def by simp
  have int_I: "interval I" unfolding interval_def I_def by auto

  let ?p = "sample_pro ( 2 s ( 4 n ( [-1,1::real])))"
  let ?q = "sample_pro ( t (1/8) ( 2 s ( 4 n ( [-1,1]))))"

  have "(h. (of_bool (R h  I)::real) ?p) = (h. indicator {h. R h  I} h ?p)"
    by (simp add:of_bool_def indicator_def)
  also have "... = measure ?p {h. R h  I}" by simp
  also have "...  1/4"
    using example_5_correct_aux[OF assms] unfolding R_def s_def I_def by (simp add:not_le)
  finally have *: "(h. (of_bool (R h  I)::real) ?p)  1/4" by simp

  have "?L =  (h. indicator {h. ¦median t (λi. R (h i)) - F2 xs¦ > ε * F2 xs} h  ?q)"
    by (simp add:Let_def measure_bind_pmf R_def s_def indicator_def t_def)
  also have "... = measure ?q {h. median t (λi. R (h i))  I}"
    unfolding I_def by (simp add:not_le)
  also have "...  measure ?q {h. t  2 * card {k. k < t  R (h k)  I}}"
    using median_est_rev[OF int_I] by (intro pmf_mono) auto
  also have "... = measure ?q {h. 1/8 + 1/8  (k<t. of_bool(R (h k)  I))/real t - 1/4}"
    using t_gt_0 by (intro arg_cong2[where f="measure"] Collect_cong refl)
     (auto simp add:of_bool_def sum.If_cases Int_def field_simps)
  also have "...  exp (- 2 * real t * (1/8)2)"
    by (intro expander_chernoff_bound_one_sided t_gt_0 *) auto
  also have "... = exp (- (real t / 32))" using t_gt_0 by (simp add:power2_eq_square)
  also have "...  exp (- of_int 32 * ln (1 / δ) / 32)" unfolding t_def
    by (intro iffD2[OF exp_le_cancel_iff] divide_right_mono iffD2[OF neg_le_iff_le]) auto
  also have "...  exp (- (32 * ln (1 / δ)) / 32)"
    by (intro iffD2[OF exp_le_cancel_iff] divide_right_mono iffD2[OF neg_le_iff_le]) auto
  also have "... = exp (- ln (1 / δ))" by simp
  also have "... = δ" using δ_range by (subst ln_div) auto
  finally show ?thesis by simp
qed

end

end