Theory Randomized_Closest_Pair_Growth
section ‹Growth of Close Points›
text ‹This section verifies a result similar to (but more general than)
Lemma~2 by Rabin~\cite{rabin1976}. Let $N(d)$ denote the number of pairs from the point sequence
$p_1,\ldots,p_n$, with distance less than $d$:
\[
N(d) := | \{ (i,j) | d(p_i, p_j) < d \wedge 1 \leq i,j \leq n \} |
\]
Obviously, $N(d)$ is monotone. It is possible to show that the growth of
$N(d)$ is bounded.
In particular:
\[
N(ad) \leq (2a \sqrt{2}+3)^2 N(d)
\]
for all $a > 0$, $d > 0$. As far as we can tell the proof below is new.
\emph{Proof:} Consider a 2D-grid with size $\alpha := \frac{d}{\sqrt{2}}$ and let us denote by $G(x,y)$
the number of points that fall in the cell $(x,y) \in \mathbb Z \times \mathbb Z$, i.e.:
\[
G(x,y) := \left| \left\{ i \middle | \left\lfloor \frac{p_{i,1}}{\alpha} \right\rfloor = x \wedge
\left\lfloor \frac{p_{i,2}}{\alpha} \right\rfloor = x \right\} \right| \textrm{,}
\]
where $p_{i,1}$ (resp. $p_{i,2}$) denote the first (resp. second) component of point p.
Let also $s := \lceil a \sqrt{2} \rceil$.
Then we can observe that
\begin{eqnarray*}
N(ad) & \leq & \sum_{(x,y)\in\mathbb Z\times\mathbb Z} \sum_{i=-s}^{s} \sum_{j=-s}^{s} G(x,y) G(x+i,y+j) \\
& = & \sum_{i=-s}^{s} \sum_{j=-s}^{s} \sum_{(x,y)\in\mathbb Z\times\mathbb Z} G(x,y) G(x+i,y+j)\\
& \leq & \sum_{i=-s}^{s} \sum_{j=-s}^{s} \left(\left(\sum_{(x,y)\in\mathbb Z\times\mathbb Z} G(x,y)^2 \right) \left(\sum_{(x,y)\in\mathbb Z\times\mathbb Z} G(x+i,y+j)^2\right) \right)^{1/2}\\
& \leq & \sum_{i=-s}^{s} \sum_{j=-s}^{s} \left(\left(\sum_{(x,y)\in\mathbb Z\times\mathbb Z} G(x,y)^2 \right) \left(\sum_{(x,y)\in\mathbb Z\times\mathbb Z} G(x,y)^2\right) \right)^{1/2}\\
& \leq & (2s+1)^2 \sum_{(x,y)\in\mathbb Z\times\mathbb Z} G(x,y)^2 \\
& \leq & (2a \sqrt(2)+3)^2 \sum_{(x,y)\in\mathbb Z\times\mathbb Z} G(x,y)^2 \\
& \leq & (2a \sqrt(2)+3)^2 N(d)
\end{eqnarray*}
The first inequality follows from the fact that if two points are $ad$ close,
their x-coordinates and y-coordinates will differ by at most $ad$. I.e. their grid coordinates will
differ at most by $s$. This means the pair will be accounted for in the right hand side of the
inequality.
The third inequality is an application of the Cauchy--Schwarz inequality.
The last inequality follows from the fact that the largest possible distance of two points
in the same grid cell is $d$. \qed›
theory Randomized_Closest_Pair_Growth
imports
"HOL-Library.Sublist"
Randomized_Closest_Pair_Correct
begin
lemma inj_translate:
fixes a b :: int
shows "inj (λx. (fst x + a, snd x + b))"
proof -
have 0:"(λx. (fst x + a, snd x + b)) = (λx. x + (a,b))" by auto
show ?thesis unfolding 0 by simp
qed
lemma of_nat_sum':
"(of_nat (sum' f S) :: ('a :: {semiring_char_0})) = sum' (λx. of_nat (f x)) S"
unfolding sum.G_def by simp
lemma sum'_nonneg:
fixes f :: "'a ⇒ 'b :: {ordered_comm_monoid_add}"
assumes "⋀x. x ∈ S ⟹ f x ≥ 0"
shows "sum' f S ≥ 0"
proof -
have "0 ≤ sum f {x ∈ S. f x ≠ 0}" using assms by (intro sum_nonneg) auto
thus ?thesis unfolding sum.G_def by simp
qed
lemma sum'_mono:
fixes f :: "'a ⇒ 'b :: {ordered_comm_monoid_add}"
assumes "⋀x. x ∈ S ⟹ f x ≤ g x"
assumes "finite {x ∈ S. f x ≠ 0}"
assumes "finite {x ∈ S. g x ≠ 0}"
shows "sum' f S ≤ sum' g S" (is "?L ≤ ?R")
proof -
let ?S = "{i ∈ S. f i ≠ 0} ∪ {i ∈ S. g i ≠ 0}"
have "?L = sum' f ?S" by (intro sum.mono_neutral_right') auto
also have "... = (∑i ∈ ?S. f i)" using assms by (intro sum.eq_sum) auto
also have "... ≤ (∑i ∈ ?S. g i)" using assms by (intro sum_mono) auto
also have "... = sum' g ?S" using assms by (intro sum.eq_sum[symmetric]) auto
also have "... = ?R" by (intro sum.mono_neutral_left') auto
finally show ?thesis by simp
qed
lemma cauchy_schwarz':
assumes "finite {i ∈ S. f i ≠ 0}"
assumes "finite {i ∈ S. g i ≠ 0}"
shows "sum' (λi. f i * g i) S ≤ sqrt (sum' (λi. f i^2) S) * sqrt (sum' (λi. g i^2) S)"
(is "?L ≤ ?R")
proof -
let ?S = "{i ∈ S. f i ≠ 0} ∪ {i ∈ S. g i ≠ 0}"
have "?L = sum' (λi. f i * g i) ?S" by (intro sum.mono_neutral_right') auto
also have "... = (∑i ∈ ?S. f i * g i)" using assms by (intro sum.eq_sum) auto
also have "... ≤ (∑i ∈ ?S. ¦f i¦ * ¦g i¦)" by (intro sum_mono) (metis abs_ge_self abs_mult)
also have "... ≤ L2_set f ?S * L2_set g ?S" by (rule L2_set_mult_ineq)
also have "... = sqrt (sum' (λi. f i^2) ?S) * sqrt (sum' (λi. g i^2) ?S)"
unfolding L2_set_def using assms sum.eq_sum by simp
also have "... = ?R"
by (intro arg_cong2[where f="(λx y. sqrt x * sqrt y)"] sum.mono_neutral_left') auto
finally show ?thesis by simp
qed
context comm_monoid_set
begin
lemma reindex_bij_betw':
assumes "bij_betw h S T"
shows "G (λx. g (h x)) S = G g T"
proof -
have "h ` {x ∈ S. g (h x) ≠ ❙1} = {x ∈ T. g x ≠ ❙1}"
using bij_betw_imp_surj_on[OF assms] by auto
hence 0: "bij_betw h {x ∈ S. g (h x) ≠ ❙1} {x ∈ T. g x ≠ ❙1}"
by (intro bij_betw_subset[OF assms]) auto
hence "finite {x ∈ S. g (h x) ≠ ❙1} = finite {x ∈ T. g x ≠ ❙1}"
using bij_betw_finite by auto
thus ?thesis unfolding G_def using reindex_bij_betw[OF 0] by simp
qed
end
definition "close_point_size xs d = length (filter (λ(p,q). dist p q < d) (List.product xs xs))"
lemma grid_dist_upper:
fixes p q :: point
assumes "d > 0"
shows "dist p q < sqrt (∑i∈UNIV.(d*(¦⌊p$i/d⌋-⌊q$i/d⌋¦+1))^2)"
(is "?L < ?R")
proof -
have a:"¦x - y¦ < ¦d * real_of_int (¦⌊x/d⌋ - ⌊y/d⌋¦ + 1)¦" for x y :: real
proof -
have "¦x - y¦ = d * ¦x/d - y/d¦"
using assms by (simp add: abs_mult_pos' right_diff_distrib)
also have "... < d * real_of_int (¦⌊x/d⌋ - ⌊y/d⌋¦ + 1)"
by (intro mult_strict_left_mono assms) linarith
also have "... = ¦d * real_of_int (¦⌊x/d⌋ - ⌊y/d⌋¦ + 1)¦"
using assms by simp
finally show ?thesis by simp
qed
have "?L = sqrt (∑i∈UNIV. (p $ i - q $ i)⇧2)"
unfolding dist_vec_def dist_real_def L2_set_def by simp
also have "... < ?R"
using assms by (intro real_sqrt_less_mono sum_strict_mono power2_strict_mono a) auto
finally show ?thesis by simp
qed
lemma grid_dist_upperI:
fixes p q :: point
fixes d :: real
assumes "d > 0"
assumes "⋀k. ¦⌊p$k/d⌋-⌊q$k/d⌋¦ ≤ s"
shows "dist p q < d * (s+1) * sqrt 2"
proof -
have s_ge_0: "s ≥ 0" using assms(2)[where k="0"] by simp
have "dist p q < sqrt (∑i ∈ UNIV. (d*(¦⌊p$i/d⌋-⌊q$i/d⌋¦+1))^2)"
by (intro grid_dist_upper assms)
also have "... ≤ sqrt (∑i ∈ (UNIV::2 set). (d*(s+1))^2)"
using assms
by (intro real_sqrt_le_mono sum_mono power_mono mult_left_mono iffD2[OF of_int_le_iff]) auto
also have "... = sqrt (2 * (d*(s+1))^2)" by simp
also have "... = sqrt 2 * sqrt ((d*(s+1))^2)" by (simp add:real_sqrt_mult)
also have "... = sqrt 2 * (d * (s+1))" using assms s_ge_0 by simp
also have "... = d * (s+1) * sqrt 2" by simp
finally show ?thesis by simp
qed
lemma close_point_approx_upper:
fixes xs :: "point list"
fixes G :: "int × int ⇒ real"
assumes "d > 0" "e > 0"
defines "s ≡ ⌈d / e⌉"
defines "G ≡ (λx. real (length (filter (λp. to_grid e p = x) xs)))"
shows "close_point_size xs d ≤ (∑i ∈ {-s..s}×{-s..s}. sum' (λx. G x * G (x+i)) UNIV)"
(is "?L ≤ ?R")
proof -
let ?f = "to_grid e"
let ?pairs = "mset (List.product xs xs)"
define T where "T = {-s..s} × {-s..s}"
have "s ≥ 1" unfolding s_def using assms by simp
hence s_ge_0: "s ≥ 0" by simp
have 0: "finite T" unfolding T_def by simp
have a: "size {#p ∈# ?pairs. ?f (fst p)-?f (snd p) = i #} = sum' (λx. G x * G (x+i)) UNIV"
(is "?L1 = ?R1") for i
proof -
have "?L1 = size {#p ∈# ?pairs. (?f (fst p),?f (snd p)) ∈ {(x,y). x - y = i} #}"
by simp
also have "... = sum' (λq. size {# p ∈# ?pairs. (?f (fst p), ?f (snd p))= q #} ) {(x,y). x-y=i}"
unfolding size_filter_mset_decompose' by simp
also have "... = sum' (λq. size {# p ∈# ?pairs. (?f (fst p), ?f (snd p)) = (q+i,q) #}) UNIV"
by (intro arg_cong[where f="real"] sum.reindex_bij_betw'[symmetric] bij_betwI[where g="snd"])
auto
also have "... =
sum' (λq. length (filter (λp. ?f (fst p) = q+i ∧ ?f (snd p) = q) (List.product xs xs))) UNIV"
by (simp flip: size_mset mset_filter conj_commute)
also have "... = sum' (λx. G (x+i) * G x) UNIV"
by (subst filter_product)
(simp add:G_def build_grid_def of_nat_sum' case_prod_beta' prod_eq_iff)
finally show ?thesis by (simp add:algebra_simps)
qed
have b:"f (?f p) - f (?f q) ∈ {-s..s}" if "f = fst ∨ f = snd" "dist p q < d" for p q f
proof -
have "¦f (?f p) - f (?f q)¦ ≤ ⌈dist p q/e⌉"
using grid_dist[OF assms(2), where p="p" and q="q"] that(1) unfolding to_grid_def by auto
also have "... ≤ s"
unfolding s_def using that(2) assms(1,2)
by (simp add: ceiling_mono divide_le_cancel)
finally have "¦f (?f p) - f (?f q)¦ ≤ s" by simp
thus ?thesis using s_ge_0 by auto
qed
have c:"?f p - ?f q ∈ T" if "dist p q < d" for p q
unfolding T_def using b[OF _ that] unfolding mem_Times_iff by simp
have "?L = size (filter_mset (λ(p,q). dist p q < d) ?pairs)"
unfolding close_point_size_def by (metis mset_filter size_mset)
also have "... ≤ size (filter_mset (λp. ?f (fst p) - ?f (snd p) ∈ T) ?pairs)"
using c by (intro size_mset_mono of_nat_mono multiset_filter_mono_2) auto
also have "... = (∑i ∈ T. size (filter_mset (λp. ?f (fst p) - ?f (snd p) = i) ?pairs))"
by (intro size_filter_mset_decompose arg_cong[where f="of_nat"] 0)
also have "... = (∑i ∈ T. sum' (λx. G x * G (x+i)) UNIV)"
unfolding of_nat_sum by (intro sum.cong a refl)
also have "... = ?R" unfolding T_def by simp
finally show ?thesis by simp
qed
lemma close_point_approx_lower:
fixes xs :: "point list"
fixes G :: "int × int ⇒ real"
fixes d :: real
assumes "d > 0"
defines "G ≡ (λx. real (length (filter (λp. to_grid d p = x) xs)))"
shows "sum' (λx. G x ^ 2) UNIV ≤ close_point_size xs (d * sqrt 2)"
(is "?L ≤ ?R")
proof -
let ?f = "to_grid d"
let ?pairs = "mset (List.product xs xs)"
have "?L = sum' (λx. length (filter (λp. ?f p = x) xs)^2) UNIV "
unfolding build_grid_def G_def by (simp add:of_nat_sum' prod_eq_iff case_prod_beta')
also have "... = sum'(λx. length(List.product (filter(λp. ?f p=x)xs) (filter(λp. ?f p=x)xs)))UNIV"
unfolding length_product by (simp add:power2_eq_square)
also have "... = sum' (λx. length (filter(λp. ?f(fst p)=x∧?f(snd p)=x)(List.product xs xs))) UNIV"
by (subst filter_product) simp
also have "... = sum' (λx. size {# p ∈# ?pairs. ?f (fst p) = x ∧ ?f (snd p) = x #}) UNIV"
by (intro arg_cong2[where f="sum'"] arg_cong[where f="real"] refl ext)
(metis (no_types, lifting) mset_filter size_mset)
also have "... = sum' (λx. size {# p ∈#{# p∈#?pairs. ?f(fst p)=?f(snd p) #}. ?f(fst p)=x #}) UNIV"
unfolding filter_filter_mset
by (intro sum.cong' arg_cong[where f="real"] arg_cong[where f="size"] filter_mset_cong) auto
also have "... = size {# p ∈# {# p ∈# ?pairs. ?f (fst p) = ?f (snd p) #}. ?f (fst p) ∈ UNIV #}"
by (intro arg_cong[where f="real"] size_filter_mset_decompose'[symmetric])
also have "... ≤ size {# p ∈# ?pairs. ?f (fst p) = ?f (snd p) #}" by simp
also have "... = size {# p ∈# ?pairs. ∀k. ⌊fst p $ k/d⌋ = ⌊snd p $ k/d⌋ #}"
unfolding to_grid_def prod.inject
by (intro arg_cong[where f="size"] arg_cong[where f="of_nat"] filter_mset_cong refl)
(metis (full_types) exhaust_2 one_neq_zero)
also have "... ≤ size {# p ∈# ?pairs. dist (fst p) (snd p) < d * of_int (0+1) * sqrt 2 #}"
by (intro of_nat_mono size_mset_mono multiset_filter_mono_2 grid_dist_upperI[OF assms(1)]) simp
also have "... = ?R" unfolding close_point_size_def
by (simp add:case_prod_beta') (metis (no_types, lifting) mset_filter size_mset)
finally show ?thesis by simp
qed
lemma build_grid_finite:
assumes "inj f"
shows "finite {x. filter (λp. to_grid d p = f x) xs ≠ []}"
proof -
have 0:"finite (to_grid d ` set xs)" by (intro finite_imageI) auto
have "finite {x. filter (λp. to_grid d p = x) xs ≠ []}"
unfolding filter_empty_conv by (intro finite_subset[OF _ 0]) blast
hence "finite (f -` {x. filter (λp. to_grid d p = x) xs ≠ []})" by (intro finite_vimageI assms)
thus ?thesis by (simp add:vimage_def)
qed
text ‹Main result of this section:›
lemma growth_lemma:
fixes xs :: "point list"
assumes "a > 0" "d > 0"
shows "close_point_size xs (a * d) ≤ (2 * sqrt 2 * a + 3)^2 * close_point_size xs d"
(is "?L ≤ ?R")
proof -
let ?s = "⌈a * sqrt 2⌉"
let ?G = "(λx. real (length (filter (λp. to_grid (d/sqrt 2) p = x) xs)))"
let ?I = "{-?s..?s}×{-?s..?s}"
have "?s ≥ 1" using assms by auto
hence s_ge_0: "?s ≥ 0" by simp
have a: "?s = ⌈a * d / (d / sqrt 2)⌉" using assms by simp
have "?L ≤ (∑i∈{-?s..?s}×{-?s..?s}. sum' (λx. ?G x * ?G (x+i)) UNIV)"
using assms unfolding a by (intro close_point_approx_upper) auto
also have "... ≤ (∑i∈?I. sqrt (sum' (λx. ?G x^2) UNIV) * sqrt (sum' (λx. ?G (x+i)^2) UNIV))"
by (intro sum_mono cauchy_schwarz') (auto intro: inj_translate build_grid_finite)
also have "... = (∑i∈?I. sqrt (sum' (λx. ?G x^2) UNIV) * sqrt (sum' (λx. ?G x^2) UNIV))"
by (intro arg_cong2[where f="(λx y. sqrt x * sqrt y)"] sum.cong refl
sum.reindex_bij_betw' bij_plus_right)
also have "... = (∑i∈?I. ¦sum' (λx. ?G x^2) UNIV¦)" by simp
also have "... = (2* ?s + 1)^2 * ¦sum' (λx. ?G x^2) UNIV¦"
using s_ge_0 by (auto simp: power2_eq_square)
also have "... = (2* ?s + 1)^2 * sum' (λx. ?G x^2) UNIV"
by (intro arg_cong2[where f="(*)"] refl abs_of_nonneg sum'_nonneg) auto
also have "... ≤ (2*?s+1)^2 * real (close_point_size xs ((d/sqrt 2)* sqrt 2))"
using assms by (intro mult_left_mono close_point_approx_lower) auto
also have "... = (2 * of_int ?s+1)^2 * real (close_point_size xs d)" by simp
also have "... ≤ (2 * (a * sqrt 2 + 1) + 1)^2 * real (close_point_size xs d)"
using s_ge_0 by (intro mult_right_mono power_mono add_mono mult_left_mono) auto
also have "... = ?R" by (auto simp:algebra_simps)
finally show ?thesis by simp
qed
end