Theory Randomized_Closest_Pair_Correct

section ‹Correctness›

text ‹This section verifies that the algorithm always returns the correct result.

Because the algorithm checks every pair of points in the same or in neighboring cells. It is
enough to establish that the grid distance is at least the distance of the closest pair.

The latter is true by construction, because the grid distance is chosen as a minimum of actually
occurring point distances.›

theory Randomized_Closest_Pair_Correct
  imports Randomized_Closest_Pair
begin

definition min_dist :: "('a::metric_space) list ⇒ real"
  where "min_dist xs = Min {dist x y|x y. {# x, y#} ⊆# mset xs}"

text ‹For a list with length at least two, the result is the minimum distance between the points
of any two elements of the list. This means that @{term "min_dist xs = 0"}, if and only if the same
point occurs twice in the list.

Note that this means, we won't assume the distinctness of the input list, and show the correctness
of the algorithm in the above sense.›

lemma image_conv_2: "{f x y|x y. p x y} = (case_prod f) ` {(x,y). p x y}" by auto

lemma min_dist_set_fin: "finite {dist x y|x y. {#x, y#} ⊆# mset xs}"
proof -
  have a:"finite (set xs × set xs)" by simp
  have "x ∈# mset xs ∧ y ∈# mset xs" if "{#x, y#} ⊆# mset xs" for x y
    using that by (meson insert_union_subset_iff mset_subset_eq_insertD)
  thus ?thesis unfolding image_conv_2 by (intro finite_imageI finite_subset[OF _ a]) auto
qed

lemma min_dist_ne:  "length xs ≥ 2 ⟷ {dist x y|x y. {# x,y#} ⊆# mset xs} ≠ {}" (is "?L ⟷ ?R")
proof
  assume ?L
  then obtain xh1 xh2 xt where xs:"xs=xh1#xh2#xt" by (metis Suc_le_length_iff numerals(2))
  hence "{#xh1,xh2#} ⊆# mset xs" unfolding xs by simp
  thus ?R by auto
next
  assume ?R
  then obtain x y where xy: "{#x,y#} ⊆# mset xs" by auto
  have "2 ≤  size {#x, y#}" by simp
  also have "... ≤ size (mset xs)" by (intro size_mset_mono xy)
  finally have "2 ≤ size (mset xs)" by simp
  thus ?L by simp
qed
lemmas min_dist_neI = iffD1[OF min_dist_ne]

lemma min_dist_nonneg:
  assumes "length xs ≥ 2"
  shows "min_dist xs ≥ 0"
  unfolding min_dist_def by (intro Min.boundedI min_dist_set_fin assms iffD1[OF min_dist_ne]) auto

lemma min_dist_pos_iff:
  assumes "length xs ≥ 2"
  shows "distinct xs ⟷ 0 < min_dist xs"
proof -
  have "¬(distinct xs) ⟷ (∃x. count (mset xs) x ≠ of_bool (x ∈ set xs))"
    unfolding of_bool_def distinct_count_atmost_1 by fastforce
  also have "... ⟷ (∃x. count (mset xs) x ∉ {0,1})"
    using count_mset_0_iff by (intro ex_cong1) simp
  also have "... ⟷ (∃x. count (mset xs) x ≥ count {#x, x#} x)"
    by (intro ex_cong1) (simp add:numeral_eq_Suc Suc_le_eq dual_order.strict_iff_order)
  also have "... ⟷ (∃x. {#x, x#} ⊆# mset xs)" by (intro ex_cong1) (simp add: subseteq_mset_def)
  also have "... ⟷ 0 ∈ {dist x y |x y. {#x, y#} ⊆# mset xs}" by auto
  also have "... ⟷ min_dist xs = 0" (is "?L ⟷ ?R")
  proof
    assume "?L"
    hence "min_dist xs ≤ 0" unfolding min_dist_def by (intro Min_le min_dist_set_fin)
    thus "min_dist xs = 0" using min_dist_nonneg[OF assms] by auto
  next
    assume "?R"
    thus "0 ∈ {dist x y |x y. {#x, y#} ⊆# mset xs}"
      unfolding min_dist_def using Min_in[OF min_dist_set_fin min_dist_neI[OF assms]] by simp
  qed
  finally  have "¬(distinct xs) ⟷ min_dist xs = 0" by simp
  thus ?thesis using min_dist_nonneg[OF assms] by auto
qed

lemma multiset_filter_mono_2:
  assumes "⋀x. x ∈ set_mset xs ⟹ P x ⟹ Q x"
  shows "filter_mset P xs ⊆# filter_mset Q xs" (is "?L ⊆# ?R")
proof -
  have "?L = filter_mset (λx. Q x ∧ P x) xs" using assms by (intro filter_mset_cong) auto
  also have "... = filter_mset P (filter_mset Q xs)" by (simp add:filter_filter_mset)
  also have "... ⊆# ?R" by simp
  finally show ?thesis by simp
qed

lemma filter_mset_disj:
  "filter_mset (λx. p x ∨ q x) xs = filter_mset (λx. p x ∧ ¬ q x) xs + filter_mset q xs"
  by (induction xs) auto

lemma size_filter_mset_decompose:
  assumes "finite T"
  shows "size (filter_mset (λx. f x ∈ T) xs) = (∑t ∈ T. size (filter_mset (λx. f x = t) xs))"
  using assms
proof (induction T)
  case empty thus ?case by simp
next
  case (insert x F) thus ?case by (simp add:filter_mset_disj) metis
qed

lemma size_filter_mset_decompose':
  "size (filter_mset (λx. f x ∈ T) xs) = sum' (λt. size (filter_mset (λx. f x = t) xs)) T"
  (is "?L = ?R")
proof -
  let ?T = "f ` set_mset xs ∩ T"
  have "?L = size (filter_mset (λx. f x ∈ ?T) xs)"
    by (intro arg_cong[where f="size"] filter_mset_cong) auto
  also have "... = (∑t ∈ ?T. size (filter_mset (λx. f x = t) xs))"
    by (intro size_filter_mset_decompose) auto
  also have "... = sum' (λt. size (filter_mset (λx. f x = t) xs)) ?T"
    by (intro sum.eq_sum[symmetric]) auto
  also have "... = ?R" by (intro sum.mono_neutral_left') auto
  finally show ?thesis by simp
qed


lemma filter_product:
  "filter (λx. P (fst x)∧Q (snd x)) (List.product xs ys) = List.product (filter P xs) (filter Q ys)"
proof (induction xs)
  case Nil thus ?case by simp
next
  case (Cons xh xt) thus ?case by (simp add:filter_map comp_def)
qed

lemma floor_diff_bound: "¦⌊x⌋-⌊y⌋¦ ≤ ⌈¦x - (y::real)¦⌉" by linarith

lemma power2_strict_mono:
  fixes x y :: "'a :: linordered_idom"
  assumes "¦x¦ < ¦y¦"
  shows "x^2 < y^2"
  using assms unfolding power2_eq_square
  by (metis abs_mult_less abs_mult_self_eq)

definition "grid ps d = ⦇ g_dist = d, g_lookup = (λq. map_tm return (filter (λx. to_grid d x = q) ps)) ⦈"

lemma build_grid_val: "val (build_grid ps d) = grid ps d"
  unfolding build_grid_def grid_def by simp

lemma lookup_neighborhood:
  "mset (val (lookup_neighborhood (grid ps d) p)) =
  filter_mset (λx. to_grid d x - to_grid d p ∈ {(0,0),(0,1),(1,-1),(1,0),(1,1)}) (mset ps) - {#p#}"
proof -
  define ls where  "ls = [(0::int,0::int),(0,1),(1,-1),(1,0),(1,1)]"
  define g where "g = grid ps d"
  define cs where "cs = map ((+) (to_grid (g_dist g) p)) ([(0,0),(0,1),(1,-1),(1,0),(1,1)])"

  have distinct_ls: "distinct ls" unfolding ls_def by (simp add: upto.simps)

  have "mset (concat (map (λx. val (g_lookup g (x + to_grid (g_dist g) p))) ls)) =
    mset (concat (map (λx. filter (λq. to_grid d q - to_grid d p = x) ps) ls))"
    by (simp add:grid_def filter_eq_val_filter_tm cs_def comp_def algebra_simps ls_def g_def)
  also have "... = {# q ∈# mset ps. to_grid d q - to_grid d p ∈ set ls #}"
    using distinct_ls by (induction ls) (simp_all add:filter_mset_disj, metis)
  also have "... = {#x ∈# mset ps. to_grid d x - to_grid d p ∈ {(0,0),(0,1),(1,-1),(1,0),(1,1)}#}"
    unfolding ls_def by simp
  finally have a:
    "mset (concat (map (λx. val (g_lookup g (x + to_grid (g_dist g) p))) ls)) =
    {#x ∈# mset ps. to_grid d x - to_grid d p ∈ {(0,0),(0,1),(1,-1),(1,0),(1,1)}#}" by simp

  thus ?thesis
    unfolding g_def[symmetric] lookup_neighborhood_def ls_def[symmetric]
    by (simp add:val_remove1 comp_def)
qed

lemma fin_nat_pairs: "finite {(i, j). i < j ∧ j < (n::nat)}"
  by (rule finite_subset[where B="{..<n }×{..<n}"]) auto

lemma mset_list_subset:
  assumes "distinct ys" "set ys ⊆ {..<length xs}"
  shows  "mset (map ((!) xs) ys) ⊆# mset xs" (is "?L ⊆# ?R")
proof -
  have "mset ys ⊆# mset [0..<length xs]" using assms
    by (metis finite_lessThan mset_set_set mset_set_upto_eq_mset_upto subset_imp_msubset_mset_set)
  hence "image_mset ((!) xs) (mset ys) ⊆# image_mset ((!) xs) (mset ([0..<length xs]))"
    by (intro image_mset_subseteq_mono)
  moreover have "image_mset ((!) xs) (mset ([0..<length xs])) = mset xs" by (metis map_nth mset_map)
  ultimately show ?thesis by simp
qed

lemma sample_distance:
  assumes "length ps ≥ 2"
  shows "AE d in map_pmf val (sample_distance ps). min_dist ps ≤ d"
proof -
  let ?S = "{i. fst i < snd i ∧ snd i < length ps}"
  let ?p = "pmf_of_set ?S"

  have "(0,1) ∈ ?S" using assms by auto
  hence a:"finite ?S" "?S ≠ {}"
    using fin_nat_pairs[where n="length ps"] by (auto simp:case_prod_beta')

  have "min_dist ps ≤ dist (ps ! (fst x)) (ps ! (snd x))" if "x ∈ ?S" for x
  proof -
    have "mset (map ((!) ps) [fst x, snd x]) ⊆# mset ps"
      using that by (intro mset_list_subset) auto
    hence "{#ps ! fst x, ps ! snd x#} ⊆# mset ps" by simp
    hence "(λ(x, y). dist x y) (ps ! (fst x), ps ! (snd x)) ∈ {dist x y |x y. {#x, y#} ⊆# mset ps}"
      unfolding image_conv_2 by (intro imageI) simp
    thus ?thesis unfolding min_dist_def by (intro Min_le min_dist_set_fin) simp
  qed
  thus ?thesis
    using a unfolding sample_distance_def map_pmf_def[symmetric] val_tpmf_simps
    by (intro AE_pmfI) (auto)
qed

lemma first_phase:
  assumes "length ps ≥ 2"
  shows "AE d in map_pmf val (first_phase ps). min_dist ps ≤ d"
proof -
  have "min_dist ps ≤ val (min_list_tm ds)"
    if ds_range:"set ds⊆set_pmf(map_pmf val (sample_distance ps))" and "length ds=length ps" for ds
  proof -
    have ds_ne: "ds ≠ []" using assms that(2) by auto

    have "min_dist ps ≤ a" if "a ∈ set ds" for a
    proof -
      have "a ∈ set_pmf (map_pmf val (sample_distance ps))" using ds_range that by auto
      thus ?thesis using sample_distance[OF assms] by (auto simp add: AE_measure_pmf_iff)
    qed
    hence "min_dist ps ≤ Min (set ds)" using ds_ne by (intro Min.boundedI) auto
    also have "... = min_list ds" unfolding min_list_Min[OF ds_ne] by simp
    also have "... = val (min_list_tm ds)" by (intro val_min_list[symmetric] ds_ne)
    finally show ?thesis by simp
  qed

  thus ?thesis
    unfolding first_phase_def val_tpmf_simps val_replicate_tpmf
    by (intro AE_pmfI) (auto simp:set_replicate_pmf)
qed

definition grid_lex_ord :: "int * int ⇒ int * int ⇒ bool"
  where "grid_lex_ord x y = (fst x < fst y ∨ (fst x = fst y ∧ snd x ≤ snd y))"

lemma grid_lex_order_antisym: "grid_lex_ord x y ∨ grid_lex_ord y x"
  unfolding grid_lex_ord_def by auto

lemma grid_dist:
  fixes p q :: point
  assumes "d > 0"
  shows  "¦⌊p $ k/d⌋ - ⌊q $ k/d⌋¦ ≤ ⌈dist p q/d⌉"
proof -
  have "¦p$k - q$k¦ = sqrt ((p$k - q$k)^2)" by simp
  also have "... = sqrt (∑j∈UNIV. of_bool(j=k)*(p$j - q$j)^2)" by simp
  also have "... ≤ dist p q" unfolding dist_vec_def L2_set_def
    by (intro real_sqrt_le_mono sum_mono) (auto simp:dist_real_def)
  finally have "¦p$k - q$k¦ ≤ dist p q" by simp
  hence 0:"¦p$k /d - q$k /d¦≤ dist p q /d" using assms by (simp add:field_simps)
  have "¦⌊p$k/d⌋ - ⌊q$k/d⌋¦ ≤ ⌈¦p$k /d - q$k /d¦⌉" by (intro floor_diff_bound)
  also have "... ≤ ⌈dist p q/d⌉" by (intro ceiling_mono 0)
  finally show ?thesis by simp
qed

lemma grid_dist_2:
  fixes p q :: point
  assumes "d > 0"
  assumes "⌈dist p q/d⌉ ≤ s"
  shows  "to_grid d p - to_grid d q ∈ {-s..s}×{-s..s}"
proof -
  have "f (to_grid d p) - f (to_grid d q) ∈ {-s..s}" if "f = fst ∨ f = snd" for f
  proof -
    have "¦f (to_grid d p) - f (to_grid d q)¦ ≤ ⌈dist p q/d⌉"
      using that grid_dist[OF assms(1)] unfolding to_grid_def by auto
    also have "... ≤ s" by (intro assms(2))
    finally have "¦f (to_grid d p) - f (to_grid d q)¦ ≤ s" by simp
    thus ?thesis by auto
  qed
  thus ?thesis by (simp add:mem_Times_iff)
qed

lemma grid_dist_3:
  fixes p q :: point
  assumes "d > 0"
  assumes "⌈dist q p/d⌉ ≤ 1" "grid_lex_ord (to_grid d p) (to_grid d q)"
  shows  "to_grid d q - to_grid d p ∈ {(0,0),(0,1),(1,-1),(1,0),(1,1)}"
proof -
  have a:"{-1..1} = {-1,0,1::int}" by auto
  let ?r = "to_grid d q - to_grid d p"
  have "?r ∈ {-1..1}×{-1..1}" by (intro grid_dist_2 assms(1-2))
  moreover have "?r ∉ {(-1,0),(-1,-1),(-1,1),(0,-1)}" using assms(3)
    unfolding grid_lex_ord_def insert_iff de_Morgan_disj
    by (intro conjI notI) (simp_all add:algebra_simps)
  ultimately show ?thesis unfolding a by simp
qed

lemma second_phase_aux:
  assumes "d > 0" "min_dist ps ≤ d" "length ps ≥ 2"
  obtains u v where
    "min_dist ps = dist u v"
    "{#u, v#} ⊆# mset ps"
    "grid_lex_ord (to_grid d u) (to_grid d v)"
    "u ∈ set ps" "v ∈ set (val (lookup_neighborhood (grid ps d) u))"
proof -
  have "∃u v. min_dist ps = dist u v ∧ {#u, v#} ⊆# mset ps"
    unfolding min_dist_def using Min_in[OF min_dist_set_fin min_dist_neI[OF assms(3)]] by auto

  then obtain u v where uv:
    "min_dist ps = dist u v" "{#u, v#} ⊆# mset ps"
    "grid_lex_ord (to_grid d u) (to_grid d v)"
    using add_mset_commute dist_commute grid_lex_order_antisym by (metis (no_types, lifting))

  have u_range: "u ∈ set ps" using uv(2) set_mset_mono by fastforce

  have "to_grid d v - to_grid d u ∈ {(0,0),(0,1),(1,-1),(1,0),(1,1)}"
    using assms(1,2) uv(1,3) by (intro grid_dist_3) (simp_all add:dist_commute)

  hence "v ∈# mset (val (lookup_neighborhood (grid ps d) u))"
    using uv(2) unfolding lookup_neighborhood by (simp add: in_diff_count insert_subset_eq_iff)

  thus ?thesis using that u_range uv by simp
qed

lemma second_phase:
  assumes "d > 0" "min_dist ps ≤ d" "length ps ≥ 2"
  shows "val (second_phase d ps) = min_dist ps" (is "?L = ?R")
proof -
  let ?g = "grid ps d"

  have "∃u v. min_dist ps = dist u v ∧ {#u, v#} ⊆# mset ps"
    unfolding min_dist_def using Min_in[OF min_dist_set_fin min_dist_neI[OF assms(3)]] by auto

  then obtain u v where uv:
    "min_dist ps = dist u v" "{#u, v#} ⊆# mset ps"
    "grid_lex_ord (to_grid d u) (to_grid d v)"
    and u_range: "u ∈ set ps"
    and v_range: "v ∈ set (val (lookup_neighborhood (grid ps d) u))"
    using second_phase_aux[OF assms] by auto

  hence a: "val (lookup_neighborhood (grid ps d) u) ≠ []" by auto

  have "∃x∈set ps. min_dist ps ∈ dist x ` set (val (lookup_neighborhood (grid ps d) x))"
    using v_range uv(1) by (intro bexI[where x="u"] u_range) simp

  hence b: "Min (⋃x∈set ps. dist x ` set (val (lookup_neighborhood (grid ps d) x))) ≤ min_dist ps"
    by (intro Min.coboundedI finite_UN_I) simp_all

  have "{# x, y#} ⊆# mset ps"
    if "x ∈ set ps" "y ∈ set (val (lookup_neighborhood (grid ps d) x))" for x y
  proof -
    have "y ∈# mset (val (lookup_neighborhood (grid ps d) x))" using that by simp
    moreover have "mset (val (lookup_neighborhood (grid ps d) x)) ⊆#  mset ps - {#x#}"
      using that(1) unfolding lookup_neighborhood subset_eq_diff_conv by simp
    ultimately have "y ∈# mset ps - {#x#}" by (metis mset_subset_eqD)
    moreover have "x ∈# mset ps" using that(1) by simp
    ultimately show "{#x, y#} ⊆# mset ps" by (simp add: insert_subset_eq_iff)
  qed
  hence c: "min_dist ps ≤ Min (⋃x∈set ps. dist x ` set (val (lookup_neighborhood (grid ps d) x)))"
    unfolding min_dist_def using a u_range by (intro Min_antimono min_dist_set_fin) auto

  have "?L = val (min_list_tm (concat (map (λx. map (dist x) (val (lookup_neighborhood ?g x))) ps)))"
    unfolding second_phase_def by (simp add:calc_dists_neighborhood_def build_grid_val)
  also have "... = min_list (concat (map (λx. map (dist x) (val (lookup_neighborhood ?g x))) ps))"
    using assms(3) a u_range by (intro val_min_list) auto
  also have "... = Min (⋃x∈set ps. dist x ` set (val (lookup_neighborhood ?g x)))"
    using a u_range by (subst min_list_Min) auto
  also have "... = min_dist ps" using b c by simp
  finally show ?thesis by simp
qed

text ‹Main result of this section:›

theorem closest_pair_correct:
  assumes "length ps ≥ 2"
  shows "AE r in map_pmf val (closest_pair ps). r = min_dist ps"
proof -
  define fp where "fp = map_pmf val (first_phase ps)"

  have "r = min_dist ps" if
    "d ∈ fp"
    "r = (if d = 0 then 0 else val (second_phase d ps))" for r d
  proof -
    have d_ge: "d ≥ min_dist ps"
      using that(1) first_phase[OF assms] unfolding AE_measure_pmf_iff fp_def[symmetric] by simp
    show ?thesis
    proof (cases "d > 0")
      case True
      thus ?thesis using second_phase[OF True d_ge assms] that(2)
        by (simp add: AE_measure_pmf_iff)
    next
      case False
      hence "d = 0" "min_dist ps = 0" using d_ge min_dist_nonneg[OF assms] by auto
      then show ?thesis using that(2) by auto
    qed
  qed
  thus ?thesis unfolding closest_pair_def val_tpmf_simps fp_def[symmetric] if_distrib
    by (intro AE_pmfI) (auto simp:if_distrib)
qed

end