Theory Sauer_Shelah_Lemma

(*  Title:      Sauer_Shelah_Lemma.thy
    Author:     Ata Keskin, TU München
*)

section "Sauer-Shelah Lemma"

theory Sauer_Shelah_Lemma
  imports Shattering Card_Lemmas Binomial_Lemmas
begin                                     

subsection ‹Generalized Sauer-Shelah Lemma›

text ‹To prove the Sauer-Shelah Lemma, we will first prove a slightly stronger fact that every family
      @{term "F"} shatters at least as many sets as @{term "card F"}. We first fix an element @{term "x  ( F)"}
      and consider the subfamily @{term F0} of sets in the family not containing it. By induction, @{term F0} 
      shatters at least as many elements of @{term F} as @{term "card F0"}. 
      Next, we consider the subfamily @{term F1} of sets in the family that contain @{term x}.
      Again, by induction, @{term F1} shatters as many elements of @{term F} as its cardinality. 
      The number of elements of @{term F} shattered by @{term F0} and @{term F1} sum up to at least 
      @{term "card F0 + card F1 = card F"}. When a set @{term "S  F"} is shattered by only one of the two subfamilies, say @{term F0}, 
      it contributes one unit to the set @{term "shattered_by F0"} and to @{term "shattered_by F"}. However, when the set is shattered by 
      both subfamilies, both @{term S} and @{term "S  {x}"} are in @{term "shattered_by F"}, so @{term S} contributes two units
      to @{term "shattered_by F0  shattered_by F1"}. Therefore, the cardinality of @{term "shattered_by F"} 
      is at least equal to the cardinality of @{term "shattered_by F0  shattered_by F1"}, which is at least @{term "card F"}.›

lemma sauer_shelah_0:
  fixes F :: "'a set set"
  shows "finite ( F)  card F  card (shattered_by F)"
proof (induction F rule: measure_induct_rule[of "card"])
  case (less F)
  note finite_F = finite_UnionD[OF less(2)]
  note finite_shF = finite_shattered_by[OF less(2)]
  show ?case
  proof (cases "2  card F")
    case True
    from obtain_difference_element[OF True] 
    obtain x :: 'a where x_in_Union_F: "x  F" 
                     and x_not_in_Int_F: "x  F" by blast

    text ‹Define F0 as the subfamily of F containing sets that don't contain @{term x}.›
    let ?F0 = "{S  F. x  S}"
    from x_in_Union_F have F0_psubset_F: "?F0  F" by blast
    from F0_psubset_F have F0_in_F: "?F0  F" by blast
    from subset_shattered_by[OF F0_in_F] have shF0_subset_shF: "shattered_by ?F0  shattered_by F" .
    from F0_in_F have Un_F0_in_Un_F:" ?F0   F" by blast

    text ‹F0 shatters at least as many sets as @{term "card F0"} by the induction hypothesis.›
    note IH_F0 = less(1)[OF psubset_card_mono[OF finite_F F0_psubset_F] rev_finite_subset[OF less(2) Un_F0_in_Un_F]]

    text ‹Define F1 as the subfamily of F containing sets that contain @{term x}.›
    let ?F1 = "{S  F. x  S}"
    from x_not_in_Int_F have F1_psubset_F: "?F1  F" by blast
    from F1_psubset_F have F1_in_F: "?F1  F" by blast
    from subset_shattered_by[OF F1_in_F] have shF1_subset_shF: "shattered_by ?F1  shattered_by F" .
    from F1_in_F have Un_F1_in_Un_F:" ?F1   F" by blast

    text ‹F1 shatters at least as many sets as @{term "card F1"} by the induction hypothesis.›
    note IH_F1 = less(1)[OF psubset_card_mono[OF finite_F F1_psubset_F] rev_finite_subset[OF less(2) Un_F1_in_Un_F]]

    from shF0_subset_shF shF1_subset_shF 
    have shattered_subset: "(shattered_by ?F0)  (shattered_by ?F1)  shattered_by F" by simp

    text ‹There is a set with the same cardinality as the intersection of 
        @{term "shattered_by F0"} and @{term "shattered_by F1"} which is disjoint from their union and is also contained in @{term "shattered_by F"}.›
    have f_copies_the_intersection:
      "f. inj_on f (shattered_by ?F0  shattered_by ?F1) 
       (shattered_by ?F0  shattered_by ?F1)  (f ` (shattered_by ?F0  shattered_by ?F1)) = {} 
       f ` (shattered_by ?F0  shattered_by ?F1)  shattered_by F"
    proof
      have x_not_in_shattered: "S(shattered_by ?F0)  (shattered_by ?F1). x  S" unfolding shattered_by_def by blast
     
      text ‹This set is precisely the image of the intersection under @{term "insert x"}.›
      let ?f = "insert x"
      have 0: "inj_on ?f (shattered_by ?F0  shattered_by ?F1)"
      proof
        fix X Y
        assume x0: "X  (shattered_by ?F0  shattered_by ?F1)" and y0: "Y  (shattered_by ?F0  shattered_by ?F1)"
               and 0: "?f X = ?f Y"
        from x_not_in_shattered x0 have "X = ?f X - {x}" by blast
        also from 0 have "... = ?f Y - {x}" by argo
        also from x_not_in_shattered y0 have "... = Y" by blast
        finally show "X = Y" .
      qed

      text ‹The set is disjoint from the union.›
      have 1: "(shattered_by ?F0  shattered_by ?F1)  ?f ` (shattered_by ?F0  shattered_by ?F1) = {}"
      proof (rule ccontr)
        assume "(shattered_by ?F0  shattered_by ?F1)  ?f ` (shattered_by ?F0  shattered_by ?F1)  {}"
        then obtain S where 10: "S  (shattered_by ?F0  shattered_by ?F1)" 
                        and 11: "S  ?f ` (shattered_by ?F0  shattered_by ?F1)" by auto
        from 10 x_not_in_shattered have "x  S" by blast
        with 11 show "False" by blast
      qed

      text ‹This set is also in @{term "shattered_by F"}.›
      have 2: "?f ` (shattered_by ?F0  shattered_by ?F1)  shattered_by F"
      proof 
        fix S_x
        assume "S_x  ?f ` (shattered_by ?F0  shattered_by ?F1)"
        then obtain S where 20: "S  shattered_by ?F0" 
                        and 21: "S  shattered_by ?F1" 
                        and 22: "S_x = ?f S" by blast
        from x_not_in_shattered 20 have x_not_in_S: "x  S" by blast

        from 22 Pow_insert[of x S] have "Pow S_x = Pow S  ?f ` Pow S" by fast
        also from 20 have "... = (?F0 ∩* S)  (?f ` Pow S)" unfolding shattered_by_def by blast
        also from 21 have "... = (?F0 ∩* S)  (?f ` (?F1 ∩* S))" unfolding shattered_by_def by force
        also from insert_IntF[of x S ?F1] have "... = (?F0 ∩* S)  (?f ` ?F1 ∩* (?f S))" by argo
        also from 22 have "... = (?F0 ∩* S)  (?F1 ∩* S_x)" by blast
        also from 22 have "... = (?F0 ∩* S_x)  (?F1 ∩* S_x)" by blast
        also from subset_IntF[OF F0_in_F, of S_x] subset_IntF[OF F1_in_F, of S_x] have "...  (F ∩* S_x)" by blast
        finally have "Pow S_x  (F ∩* S_x)" .
        thus "S_x  shattered_by F" unfolding shattered_by_def by blast
      qed

      from 0 1 2 show "inj_on ?f (shattered_by ?F0  shattered_by ?F1) 
        (shattered_by ?F0  shattered_by ?F1)  (?f ` (shattered_by ?F0  shattered_by ?F1)) = {} 
        ?f ` (shattered_by ?F0  shattered_by ?F1)  shattered_by F" by blast
    qed

    have F0_union_F1_is_F: "?F0  ?F1 = F" by fastforce
    from finite_F have finite_F0: "finite ?F0" and finite_F1: "finite ?F1" by fastforce+
    have disjoint_F0_F1: "?F0  ?F1 = {}" by fastforce

    text ‹We have the following lower bound on the cardinality of @{term "shattered_by F"}:›
    from F0_union_F1_is_F card_Un_disjoint[OF finite_F0 finite_F1 disjoint_F0_F1] 
    have "card F = card ?F0 + card ?F1" by argo
    also from IH_F0
    have "...  card (shattered_by ?F0) + card ?F1" by linarith
    also from IH_F1
    have "...  card (shattered_by ?F0) + card (shattered_by ?F1)" by linarith
    also from card_Int_copy[OF finite_shF shattered_subset f_copies_the_intersection]
    have "...  card (shattered_by F)" by argo
    finally show ?thesis .
  next
    text ‹If @{term F} contains less than 2 sets, the statement follows trivially.›
    case False
    hence "card F = 0  card F = 1" by force
    thus ?thesis
    proof
      assume "card F = 0"
      thus ?thesis by auto
    next
      assume asm: "card F = 1"
      hence F_not_empty: "F  {}" by fastforce
      from shatters_empty[OF F_not_empty] have "{{}}  shattered_by F" unfolding shattered_by_def by fastforce
      from card_mono[OF finite_shF this] asm show ?thesis by fastforce
    qed
  qed
qed

subsection ‹Sauer-Shelah Lemma›

text ‹The generalized version immediately implies the Sauer-Shelah Lemma,
      because only @{text "(∑i≤k. n choose i)"} of the subsets of an @{term n}-item universe have cardinality less than @{term "k + (1::nat)"}.
      Thus, when @{text "(∑i≤k. n choose i) < card F"}, there are not enough sets to be shattered, 
      so one of the shattered sets must have cardinality at least @{term "k + (1::nat)"}.›

corollary sauer_shelah:
  fixes F :: "'a set set"
  assumes "finite (F)" and "(ik. card (F) choose i) < card F"
  shows "S. (F shatters S  card S = k + 1)"
proof -
  let ?K = "{S. S  F  card S  k}"
  from finite_Pow_iff[of F] assms(1) have finite_Pow_Un: "finite (Pow (F))" by fast

  from sauer_shelah_0[OF assms(1)] assms(2) have "(ik. card (F) choose i) < card (shattered_by F)" by linarith
  with choose_row_sum_set[OF assms(1), of k] have "card ?K < card (shattered_by F)" by presburger

  from finite_diff_not_empty[OF finite_subset[OF _ finite_Pow_Un] this] 
  obtain S where "S  shattered_by F - ?K" by blast
  then have F_shatters_S: "F shatters S" and "S  F" and "¬(S  F  card S  k)" unfolding shattered_by_def by blast+
  then have card_S_ge_Suc_k: "k + 1  card S" by simp
  from obtain_subset_with_card_n[OF card_S_ge_Suc_k] obtain S' where "card S' = k + 1" and "S'  S" by blast
  from this(1) supset_shatters[OF this(2) F_shatters_S] show ?thesis by blast
qed

subsection ‹Sauer-Shelah Lemma for hypergraphs›

text ‹If we designate X to be the set of hyperedges and S the set of vertices, we can also formulate the Sauer-Shelah Lemma in terms of hypergraphs. 
      In this form, the statement provides a sufficient condition for the existence of an hyperedge of a given cardinality which is shattered by the set of edges.›

corollary sauer_shelah_2:
  fixes X :: "'a set set" and S :: "'a set"
  assumes "finite S" and "X  Pow S" and "(ik. card S choose i) < card X"
  shows "Y. (X shatters Y  card Y = k + 1)"
proof -
  from assms(2) have 0: "X  S" by blast
  then have "(ik. card (X) choose i)  (ik. card S choose i)"
    by (simp add: assms(1) card_mono choose_mono sum_mono)
  then show ?thesis
    using "0" assms finite_subset sauer_shelah by fastforce
qed

subsection ‹Alternative statement of the Sauer-Shelah Lemma›

text ‹We can also state the Sauer-Shelah Lemma in terms of the @{term VC_dim}. If the VC-dimension of @{term F} is @{term k} then @{term F}
      can consist at most of @{text "(∑i≤k. card (⋃F) choose i)"} sets which is in @{text "𝒪(card (⋃F)^k)"}.›

corollary sauer_shelah_alt:
  assumes "finite (F)" and "VC_dim F = k"
  shows "card F  (ik. card (F) choose i)"
proof (rule ccontr)
  assume "¬ card F  (ik. card (F) choose i)" hence "(ik. card (F) choose i) < card F" by linarith
  then obtain S where "F shatters S" and "card S = k + 1"
    by (meson assms(1) sauer_shelah)
  then have §: "k + 1  {card S | S. F shatters S}"
    by simp metis
  have "finite {A. F shatters A}"
    by (metis finite ( F) finite_shattered_by shattered_by_def)
  then have "bdd_above {card A |A. F shatters A}"
    by simp
  then have "k + 1  Sup {card A |A. F shatters A}"
    by (smt (verit, best) "§" cSup_upper)
  then have "k + 1  VC_dim F"
    by (simp add: VC_dim_def)
  then show False
    using assms(2) by auto
qed

end