Theory Glued_Codes

(*  Title:      Glued Codes
    File:       Combinatorics_Words_Graph_Lemma.Glued_Codes
    Author:     Martin Raška, Charles University

Part of Combinatorics on Words Formalized. See https://gitlab.com/formalcow/combinatorics-on-words-formalized/
*)

theory Glued_Codes
  imports Combinatorics_Words.Submonoids
begin

chapter "Glued codes"

section ‹Lists that do not end with a fixed letter›

lemma append_last_neq:
  "us = ε  last us  w  vs = ε  last vs  w  us  vs = ε  last (us  vs)  w"
  by (auto simp only: last_append split: if_split)

lemma last_neq_induct [consumes 1, case_names emp hd_eq hd_neq]:
  assumes invariant: "us = ε  last us  w"
      and emp: "P ε"
      and hd_eq: "us. us  ε  last us  w  P us  P (w # us)"
      and hd_neq: "u us. u  w  us = ε  last us  w  P us  P (u # us)"
  shows "P us"
using invariant proof (induction us)
  case (Cons u us)
    have inv: "us = ε  last us  w"
      using Cons.prems by (intro disjI) simp
    show "P (u # us)"
    proof (cases)
      assume "u = w"
      have *: "us  ε" and "last us  w"
        using Cons.prems unfolding u = w by auto
      then show "P (u # us)" unfolding u = w using Cons.IH[OF inv] by (fact hd_eq)
    qed (use inv Cons.IH[OF inv] in fact hd_neq)
qed (rule P ε)

lemma last_neq_blockE:
  assumes last_neq: "us  ε" and "last us  w"
  obtains k u us' where "u  w" and "us' = ε  last us'  w" and "[w] @ k  u # us' = us"
using disjI2[OF last us  w] us  ε proof (induction us rule: last_neq_induct)
  case (hd_eq us)
    from us  ε show ?case
      by (rule hd_eq.IH[rotated]) (intro hd_eq.prems(1)[of _ _ "Suc _"], assumption+, simp)
next
  case (hd_neq u us)
    from hd_neq.hyps show ?case
     by (rule hd_neq.prems(1)[of _ _ 0]) simp
qed blast

lemma last_neq_block_induct [consumes 1, case_names emp block]:
  assumes last_neq: "us = ε  last us  w"
      and emp: "P ε"
      and block: "k u us. u  w  us = ε  last us  w  P us  P ([w] @ k  (u # us))"
  shows "P us"
using last_neq proof (induction us rule: ssuf_induct)
  case (ssuf us)
    show ?case proof (cases "us = ε")
      assume "us  ε"
      obtain k u us' where "u  w" and "us' = ε  last us'  w" and "[w] @ k  u # us' = us"
        using us  ε us = ε  last us  w by (elim last_neq_blockE) (simp add: us  ε)
      have "us' <s us" and "us' = ε  last us'  w"
        using us = ε  last us  w by (auto simp flip: [w] @ k  u # us' = us)
      from u  w us' = ε  last us'  w ssuf.IH[OF this]
      show "P us" unfolding [w] @ k  u # us' = us[symmetric] by (fact block)
    qed (simp only: emp)
qed

section ‹Glue a list element with its successors/predecessors›

function glue :: "'a list  'a list list  'a list list" where
  glue_emp:  "glue w ε = ε" |
  glue_Cons: "glue w (u # us) =
    (let glue_tl = glue w us in
      if u = w then (u  hd glue_tl) # tl glue_tl
      else u # glue_tl)"
  unfolding prod_eq_iff prod.sel by (cases rule: list.exhaust[of "snd _"]) blast+
  termination by (relation "measure (length  snd)") simp_all

lemma no_gluing: "w  set us  glue w us = us"
  by (induction us) auto

lemma glue_nemp [simp, intro!]: "us  ε  glue w us  ε"
  by (elim hd_tlE) (auto simp only: glue.simps Let_def split!: if_split)

lemma glue_is_emp_iff [simp]: "glue w us = ε  us = ε"
  using glue_nemp glue_emp by blast

lemma len_glue: "us = ε  last us  w  |glue w us| + count_list us w = |us|"
  by (induction rule: last_neq_induct) (auto simp add: Let_def)

lemma len_glue_le: assumes "us = ε  last us  w" shows "|glue w us|  |us|"
  using len_glue[OF assms] unfolding nat_le_iff_add eq_commute[of "|us|"] by blast

lemma len_glue_less []: "us = ε  last us  w  w  set us  |glue w us| < |us|"
  by (simp add: count_list_gr_0_iff flip: len_glue[of us])

lemma assumes "us = ε  last us  w" and "ε  set us"
  shows emp_not_in_glue: "ε  set (glue w us)"
    and glued_not_in_glue: "w  set (glue w us)"
  unfolding atomize_conj using assms by (induction us rule: last_neq_induct)
    (auto simp: Let_def dest!: tl_set lists_hd_in_set[OF glue_nemp[of _ w]])

lemma glue_glue: "us = ε  last us  w  ε  set us  glue w (glue w us) = glue w us"
  using no_gluing[OF glued_not_in_glue].

lemma glue_block_append: assumes "u  w"
  shows "glue w ([w] @ k  (u # us)) = (w @ k  u) # glue w us"
  by (induction k) (simp_all add: u  w)

lemma concat_glue [simp]: "us = ε  last us  w  concat (glue w us) = concat us"
  by (induction us rule: last_neq_block_induct) (simp_all add: glue_block_append)

lemma glue_append:
  "us = ε  last us  w  glue w (us  vs) = glue w us  glue w vs"
  by (induction us rule: last_neq_block_induct) (simp_all add: glue_block_append)

lemma glue_pow:
  assumes "us = ε  last us  w"
  shows "glue w (us @ k) = (glue w us) @ k"
  by (induction k) (simp_all add: assms glue_append)

lemma glue_in_lists_hull [intro]:
  "us = ε  last us  w  us  lists G  glue w us  lists G"
  by (induction rule: last_neq_induct) (simp_all add: Let_def tl_in_lists prod_cl gen_in)

― ‹Gluing from the right (gluing a letter with its predecessor)›
function gluer :: "'a list  'a list list  'a list list" where
  gluer_emp:  "gluer w ε = ε" |
  gluer_Cons: "gluer w (u # us) =
    (let gluer_butlast = gluer w (butlast (u # us)) in
      if last (u # us) = w then (butlast gluer_butlast)  [last gluer_butlast  last (u # us)]
      else gluer_butlast  [last (u # us)])"
  unfolding prod_eq_iff prod.sel by (cases rule: list.exhaust[of "snd _"]) blast+
  termination by (relation "measure (length  snd)") simp_all

lemma gluer_nemp_def: assumes "us  ε"
  shows "gluer w us =
    (let gluer_butlast = gluer w (butlast us) in
      if last us = w then (butlast gluer_butlast)  [last gluer_butlast  last us]
      else gluer_butlast  [last us])"
  using gluer_Cons[of w "hd us" "tl us"] unfolding hd_Cons_tl[OF us  ε].

lemma gluer_nemp: assumes "us  ε" shows "gluer w us  ε"
  unfolding gluer_nemp_def[OF us  ε]
  by (simp only: Let_def split!: if_split)

lemma hd_neq_induct [consumes 1, case_names emp snoc_eq snoc_neq]:
  assumes invariant: "us = ε  hd us  w"
      and emp: "P ε"
      and snoc_eq: "us. us  ε  hd us  w  P us  P (us  [w])"
      and snoc_neq: "u us. u  w  us = ε  hd us  w  P us  P (us  [u])"
  shows "P us"
using last_neq_induct[where P="λx. P (rev x)" for P, reversed, unfolded rev_rev_ident, OF assms].

lemma gluer_rev [reversal_rule]: assumes "us = ε  last us  w"
  shows "gluer (rev w) (rev (map rev us)) =  rev (map rev (glue w us))"
  using assms by (induction us rule: last_neq_induct)
    (simp_all add: gluer_nemp_def Let_def map_tl last_rev hd_map)

lemma glue_rev [reversal_rule]: assumes "us = ε  hd us  w"
  shows "glue (rev w) (rev (map rev us)) =  rev (map rev (gluer w us))"
  using assms by (induction us rule: hd_neq_induct)
    (simp_all add: gluer_nemp_def Let_def map_tl last_rev hd_map)

section ‹Generators with glued element›

text ‹The following set will turn out to be the generating set of all words whose
 decomposition into a generating code does not end with w›

inductive_set glued_gens :: "'a list  'a list set  'a list set"
  for w G where
    other_gen: "g  G  g  w  g  glued_gens w G"
  | glued [intro!]: "u  glued_gens w G   w  u  glued_gens w G"

lemma in_glued_gensI: assumes "g  G" "g  w"
  shows "w @ k  g = u  u  glued_gens w G"
  by (induction k arbitrary: u) (auto simp: other_gen[OF g  G g  w])

lemma in_glued_gensE:
  assumes "u  glued_gens w G"
  obtains k g where "g  G" and "g  w" and "w @ k  g = u"
using assms proof (induction)
  case (glued u)
    show ?case by (auto intro!: glued.IH[OF glued.prems[of _ "Suc _"]])
qed (use pow_zero in blast)

lemma glued_gens_alt_def: "glued_gens w C = {w @ k  g | k g. g  C  g  w}"
  by (blast elim!: in_glued_gensE intro: in_glued_gensI)

lemma glued_hull_sub_hull [simp, intro!]: "w  G  glued_gens w G  G"
  by (rule hull_mono') (auto elim!: in_glued_gensE)

lemma glued_hull_sub_hull': "w  G  u  glued_gens w G  u  G"
  using set_mp[OF glued_hull_sub_hull].

lemma in_glued_hullE:
  assumes "w  G" and "u  glued_gens w G"
  obtains us where "concat us = u" and "us  lists G" and "us = ε  last us  w"
using u  glued_gens w G proof (induction arbitrary: thesis)
  case (prod_cl v u)
    obtain k g where "g  G" and "g  w" and "concat ([w] @ k  [g]) = v"
      using v  glued_gens w G by simp (elim in_glued_gensE)
    obtain us where u: "concat us = u" and "us  lists G" and "(us = ε  last us  w)" by fact
    have "concat ([w] @ k  [g]  us) = v  u"
      by (simp flip: concat ([w] @ k  [g]) = v concat us = u)
    with (us = ε  last us  w) show thesis
      by (elim prod_cl.prems, intro lists.intros
          append_in_lists pow_in_lists w  G g  G us  lists G)
         (auto simp: g  w)
qed (use concat.simps(1) in blast)

lemma glue_in_lists [simp, intro!]:
  assumes "us = ε  last us  w"
  shows "us  lists G  glue w us  lists (glued_gens w G)"
  using assms by (induction rule: last_neq_block_induct)
    (auto simp: glue_block_append intro: in_glued_gensI)

lemma concat_in_glued_hull[intro]:
  "us  lists G  us = ε  last us  w  concat us  glued_gens w G"
  unfolding concat_glue[symmetric] by (intro concat_in_hull' glue_in_lists)

lemma glued_hull_conv: assumes "w  G"
  shows "glued_gens w G = {concat us | us. us  lists G  (us = ε  last us  w)}"
  by (blast elim!: in_glued_hullE[OF w  G])

section ‹Bounded gluing›

lemma bounded_glue_in_lists:
  assumes "us = ε  last us  w" and "¬ [w] @ n ≤f us"
  shows "us  lists G  glue w us  lists {w @ k  g | k g. g  G  g  w  k < n}"
using assms proof (induction us rule: last_neq_block_induct)
  case (block k u us)
    have "k < n" and "¬ [w] @ n ≤f us"
      using ¬ [w] @ n ≤f [w] @ k  u # us
      by (blast intro!: not_le_imp_less le_exps_pref, blast intro!: fac_ext_pref fac_ext_hd)
    then show ?case
      using [w] @ k  u # us  lists G u  w unfolding glue_block_append[OF u  w]
      by (blast intro!: block.IH del: in_listsD in_listsI)
qed simp

subsection ‹Gluing on binary alphabet›

lemma bounded_bin_glue_in_lists: ― ‹meaning: a binary code›
  assumes "us = ε  last us  x"
      and "¬ [x] @ n ≤f us"
      and "us  lists {x, y}"
  shows "glue x us  lists {x @ k  y | k. k < n}"
using bounded_glue_in_lists[OF assms] by blast

lemma single_bin_glue_in_lists: ― ‹meaning: a single occurrence›
  assumes "us = ε  last us  x"
      and "¬ [x,x] ≤f us"
      and "us  lists {x, y}"
  shows "glue x us  lists {x  y, y}"
  using bounded_bin_glue_in_lists[of _ _ 2, simplified, OF assms] unfolding numeral_nat
  by (auto elim!: sub_lists_mono[rotated] less_SucE)

lemma count_list_single_bin_glue:
  assumes "x  ε" and "x  y"
      and "us = ε  last us  x"
      and "us  lists {x,y}"
      and "¬ [x,x] ≤f us"
  shows "count_list (glue x us) (x  y) = count_list us x"
    and "count_list (glue x us) y + count_list us x = count_list us y"
using assms(3-5) unfolding atomize_conj pow_Suc[symmetric]
proof (induction us rule: last_neq_block_induct)
  case (block k u us)
    have "u = y" using [x] @ k  u # us  lists {x, y} u  x by simp
    have IH: "count_list (glue x us) (x  y) = count_list us x 
              count_list (glue x us) y + count_list us x = count_list us y"
      using block.prems by (intro block.IH) (simp, blast intro!: fac_ext_pref fac_ext_hd)
    have "¬ [x] @ Suc (Suc 0) ≤f [x] @ k  u # us"
      using block.prems(2) by auto
    then have "k < Suc (Suc 0)"
      by (blast intro!: not_le_imp_less le_exps_pref)
    then show ?case unfolding u = y glue_block_append[OF x  y[symmetric]]
      by (elim less_SucE less_zeroE) (simp_all add: x  y x  y[symmetric] x  ε IH)
qed simp

section ‹Code with glued element›

context code
begin

text ‹If the original generating set is a code, then also the glued generators form a code›

lemma glued_hull_last_dec: assumes "w  𝒞" and  "u  glued_gens w 𝒞" and "u  ε"
  shows "last (Dec 𝒞 u)  w"
  using u  glued_gens w 𝒞
  by (elim in_glued_hullE[OF w  𝒞]) (auto simp: code_unique_dec u  ε)

lemma in_glued_hullI [intro]:
  assumes "u  𝒞" and "(u = ε  last (Dec 𝒞 u)  w)"
  shows "u  glued_gens w 𝒞"
  using concat_in_glued_hull[OF dec_in_lists[OF u  𝒞], of w]
  by (simp add: u  𝒞 u = ε  last (Dec 𝒞 u)  w)

lemma code_glued_hull_conv: assumes "w  𝒞"
  shows "glued_gens w 𝒞 = {u  𝒞. u = ε  last (Dec 𝒞 u)  w}"
proof
  show "glued_gens w 𝒞  {u  𝒞. u = ε  last (Dec 𝒞 u)  w}"
    using glued_hull_sub_hull'[OF w  𝒞] glued_hull_last_dec[OF w  𝒞] by blast
  show "{u  𝒞. u = ε  last (Dec 𝒞 u)  w}  glued_gens w 𝒞"
    using in_glued_hullI by blast
qed

lemma in_glued_hull_iff:
  assumes "w  𝒞" and "u  𝒞"
  shows "u  glued_gens w 𝒞  u = ε  last (Dec 𝒞 u)  w"
  by (simp add: w  𝒞 u  𝒞 code_glued_hull_conv)

lemma glued_not_in_glued_hull: "w  𝒞  w  glued_gens w 𝒞"
  unfolding in_glued_hull_iff[OF _ gen_in] code_el_dec
  by (simp add: nemp)

lemma glued_gens_nemp: assumes "u  glued_gens w 𝒞" shows "u  ε"
  using assms by (induction) (auto simp add: nemp)

lemma glued_gens_code: assumes "w  𝒞" shows "code (glued_gens w 𝒞)"
proof
  show "us = vs" if "us  lists (glued_gens w 𝒞)" and "vs  lists (glued_gens w 𝒞)"
    and "concat us = concat vs" for us vs
  using that proof (induction rule: list_induct2')
    case (4 u us v vs)
      have *: "us  lists (glued_gens w 𝒞)  us  lists 𝒞" for us
        using sub_lists_mono[OF subset_trans[OF genset_sub glued_hull_sub_hull[OF w  𝒞]]].
      obtain k u' l v'
        where "u'  𝒞" "u'  w" "w @ k  u' = u"
          and "v'  𝒞" "v'  w" "w @ l  v' = v"
        using "4.prems"(1-2) by simp (elim conjE in_glued_gensE)
      from this(3, 6) "4.prems" w  𝒞
      have "concat (([w] @ k  [u'])  (Ref 𝒞 us)) = concat (([w] @ l  [v'])  (Ref 𝒞 vs))"
        by (simp add: concat_ref * lassoc)
      with w  𝒞 u'  𝒞 v'  𝒞 "4.prems"(1-2)
      have "[w] @ k  [u']  [w] @ l  [v']"
        by (elim eqd_comp[OF is_code, rotated 2])
        (simp_all add: "*" pow_in_lists ref_in')
      with u'  w v'  w w @ k  u' = u w @ l  v' = v
      have "u = v"
        by (elim sing_pref_comp_mismatch[rotated 2, elim_format]) blast+
      then show "u # us = v # vs"
        using "4.IH" "4.prems"(1-3) by simp
  qed (auto dest!: glued_gens_nemp)
qed

text ‹A crucial lemma showing the relation between gluing and the decomposition into generators›

lemma dec_glued_gens: assumes "w  𝒞" and "u  glued_gens w 𝒞"
  shows "Dec (glued_gens w 𝒞) u = glue w (Dec 𝒞 u)"
  using u  glued_gens w 𝒞 glued_hull_sub_hull'[OF w  𝒞 u  glued_gens w 𝒞]
  by (intro code.code_unique_dec glued_gens_code)
     (simp_all add: in_glued_hull_iff w  𝒞)

lemma ref_glue: "us = ε  last us  w  us  lists 𝒞  Ref 𝒞 (glue w us) = us"
  by (intro refI glue_in_lists_hull) simp_all

end
theorem glued_code:
  assumes "code C" and "w  C"
  shows "code {w @ k  u |k u. u  C  u  w}"
  using code.glued_gens_code[OF code C w  C] unfolding glued_gens_alt_def.

section ‹Gluing is primitivity preserving›

text ‹It is easy to obtain that gluing lists of code elements preserves primitivity.
We provide the result under weaker condition where glue blocks of the list
have unique concatenation.›

lemma (in code) code_prim_glue:
  assumes last_neq: "us = ε  last us  w"
      and "us  lists 𝒞"
  shows "primitive us  primitive (glue w us)"
  using prim_map_prim[OF prim_concat_prim, of "decompose 𝒞" "glue w us"]
  unfolding refine_def[symmetric] ref_glue[OF assms].

― ‹In the context of code the inverse to the glue function is the @{const refine} function,
i.e. @{term "λvs. concat (map (decompose 𝒞) vs)"}, see @{thm code.ref_glue}.
The role of the @{const decompose} function outside the code context supply the 'unglue' function,
which maps glued blocks to its unique preimages (see below).›

definition glue_block :: "'a list 'a list list  'a list list  bool"
  where "glue_block w us bs =
    (ps k u ss. (ps = ε  last ps  w)  u  w  ps  [w] @ k  u # ss = us  [w] @ k  [u] = bs)"

lemma glue_blockI [intro]:
  "ps = ε  last ps  w  u  w  ps  [w] @ k  u # ss = us  [w] @ k  [u] = bs
     glue_block w us bs"
  unfolding glue_block_def by (intro exI conjI)

lemma glue_blockE:
  assumes "glue_block w us bs"
  obtains ps k u ss where "ps = ε  last ps  w" and "u  w" "ps  [w] @ k  u # ss = us"
      and "[w] @ k  [u] = bs"
  using assms unfolding glue_block_def by (elim exE conjE)

lemma assumes "glue_block w us bs"
  shows glue_block_of_appendL: "glue_block w (us  vs) bs"
    and glue_block_of_appendR: "vs = ε  last vs  w  glue_block w (vs  us) bs"
  using glue_block w us bs by (elim glue_blockE, use nothing in intro glue_blockI[of _ w _ _ "_  vs" "us  vs" bs]
          glue_blockI[OF append_last_neq, of "vs"  w _ _ _ _ "vs  us" bs],
    simp_all only: eq_commute[of _ us] rassoc append_Cons refl not_False_eq_True)+

lemma glue_block_of_block_append:
  "u  w  glue_block w us bs  glue_block w ([w] @ k  u # us) bs"
  by (simp only: hd_word[of _ us] lassoc) (elim glue_block_of_appendR, simp_all)

lemma in_set_glueE:
  assumes last_neq: "us = ε  last us  w"
      and "b  set (glue w us)"
  obtains bs where "glue_block w us bs" and "concat bs = b"
using assms proof (induction us rule: last_neq_block_induct)
  case (block k u us)
    show thesis using b  set (glue w ([w] @ k  u # us))
      proof (auto simp add: glue_block_append u  w)
        show "b = w @ k  u  thesis"
          by (auto  intro!: block.prems(1) glue_blockI[OF _ u  w _ refl])
        show "b  set (glue w us)  thesis"
          by (auto intro!: block.IH[OF block.prems(1)] glue_block_of_block_append u  w)
      qed
qed simp

definition unglue :: "'a list  'a list list  'a list  'a list list"
  where "unglue w us b = (THE bs. glue_block w us bs  concat bs = b)"

lemma unglueI:
  assumes unique_blocks: "bs1 bs2. glue_block w us bs1  glue_block w us bs2
             concat bs1 = concat bs2  bs1 = bs2"
  shows "glue_block w us bs  concat bs = b  unglue w us b = bs"
  unfolding unglue_def by (blast intro: unique_blocks)

lemma concat_map_unglue_glue:
  assumes last_neq: "us = ε  last us  w"
      and unique_blocks: "vs1 vs2. glue_block w us vs1  glue_block w us vs2
             concat vs1 = concat vs2  vs1 = vs2"
  shows "concat (map (unglue w us) (glue w us)) = us"
using assms proof (induction us rule: last_neq_block_induct)
  case (block k u us)
    have IH: "concat (map (unglue w us) (glue w us)) = us"
      using block.IH[OF block.prems] by (blast intro!: glue_block_of_block_append u  w)
    have *: "map (unglue w ([w] @ k  u # us)) (glue w us) = map (unglue w us) (glue w us)"
      by (auto simp only: map_eq_conv unglue_def del: the_equality
          elim!: in_set_glueE[OF us = ε  last us  w], intro the_equality)
         (simp_all only: the_equality block.prems glue_block_of_block_append[OF u  w])
    show "concat (map (unglue w ([w] @ k  u # us)) (glue w ([w] @ k  u # us))) = [w] @ k  u # us"
      by (auto simp add: glue_block_append[OF u  w] * IH
          intro!: unglueI intro: glue_blockI[OF _ u  w] block.prems)
qed simp

lemma prim_glue:
  assumes last_neq: "us = ε  last us  w"
      and unique_blocks: "bs1 bs2. glue_block w us bs1  glue_block w us bs2
             concat bs1 = concat bs2  bs1 = bs2"
  shows "primitive us  primitive (glue w us)"
  using prim_map_prim[OF prim_concat_prim, of "unglue w us" "glue w us"]
  by (simp only: concat_map_unglue_glue assms)

subsection ‹Gluing on binary alphabet›

lemma bin_glue_blockE:
  assumes "us  lists {x, y}"
      and "glue_block x us bs"
  obtains k where "[x] @ k  [y] = bs"
  using assms by (auto simp only: glue_block_def del: in_listsD)

lemma unique_bin_glue_blocks:
  assumes "us  lists {x, y}" and "x  ε"
  shows "glue_block x us bs1  glue_block x us bs2  concat bs1 = concat bs2  bs1 = bs2"
  by (auto simp: eq_pow_exp[OF x  ε] elim!: bin_glue_blockE[OF us  lists {x, y}])

lemma prim_bin_glue:
  assumes "us  lists {x, y}" and "x  ε"
      and "us = ε  last us  x"
  shows "primitive us  primitive (glue x us)"
  using prim_glue[OF us = ε  last us  x unique_bin_glue_blocks[OF assms(1-2)]].

end