Theory Lyndon

(*  Title:      Combinatorics_Words_Lyndon.Lyndon
    Author:     Štěpán Holub, Charles University
    Author:     Štěpán Starosta, CTU in Prague

Part of Combinatorics on Words Formalized. See https://gitlab.com/formalcow/combinatorics-on-words-formalized/
*)
theory Lyndon
  imports Combinatorics_Words.CoWBasic
begin

chapter "Lyndon words"

text‹A Lyndon word is a non-empty word that is lexicographically
strictly smaller than any other word in its conjugacy class, i.e., than any its rotations.
They are named after R. Lyndon who introduced them in @{cite Lyndon54} as ``standard'' sequences.

We present elementary results on Lyndon words, mostly covered by results in @{cite ‹Chapter 5› Lo83} and @{cite Duval80 and Duval83}.

This definition assumes a linear order on letters given by the context.
›


section "Definition and elementary properties"

subsection "Underlying order"

lemma (in linorder) lexordp_mid_pref: "ord_class.lexordp u v  ord_class.lexordp v (us) 
   u ≤p v"
  by (induct rule: lexordp_induct, simp_all)

lemma (in linorder) lexordp_ext: "ord_class.lexordp u v   ¬ u ≤p v 
  ord_class.lexordp (uw) (vz)"
  by (induct rule: lexordp_induct, simp_all)

context linorder
begin

abbreviation Lyndon_less :: "'a list  'a list  bool" (infixl "<lex" 50)
  where "Lyndon_less xs ys  ord_class.lexordp xs ys"

abbreviation Lyndon_le :: "'a list  'a list  bool" (infixl "≤lex" 50)
  where "Lyndon_le xs ys  ord_class.lexordp_eq xs ys"

interpretation rlex: linorder "(≤lex)" "(<lex)"
  using lexordp_linorder.

interpretation dual_rlex: linorder "λ x y. y ≤lex x" "λ x y. y <lex x"
  using rlex.dual_linorder.

lemma sorted_dual_rev_iff: "dual_rlex.sorted ws  rlex.sorted (rev ws)"
  unfolding rlex.sorted_rev_iff_nth_mono dual_rlex.sorted_iff_nth_mono by blast

text ‹Several useful lemmas that are formulated for relations, interpreted for the default linear order.›

lemmas lexord_suf_linorder = lexord_sufE[of _ _ _ _ "{(x, y). x < y}", folded lexordp_conv_lexord]
  and lexord_append_leftI_linorder = lexord_append_leftI[of  _ _  "{(x, y). x < y}" _, folded lexordp_conv_lexord]
  and lexord_app_right_linorder = lexord_sufI[of  _ _  "{(x, y). x < y}" _, folded lexordp_conv_lexord]
  and lexord_take_index_conv_linorder = lexord_take_index_conv[of  _ _  "{(x, y). x < y}", folded lexordp_conv_lexord]
  and mismatch_lexord_linorder = mismatch_lexord[of  _ _  "{(x, y). x < y}", folded lexordp_conv_lexord]
  and lexord_cancel_right_linorder = lexord_cancel_right[of _ _ _ _ "{(a,b). a < b}", folded lexordp_conv_lexord]

subsection "Lyndon word definition"

fun Lyndon :: "'a list  bool"
  where "Lyndon w = (w  ε  (n. 0 < n  n < |w|  w <lex rotate n w))"

lemma LyndonD: "Lyndon w  0 < n  n < |w|  w <lex rotate n w"
  unfolding Lyndon.simps by blast

lemma LyndonD_nemp: "Lyndon w  w  ε"
  unfolding Lyndon.simps by blast

lemma LyndonI: "w  ε   n. 0 < n  n < |w|  w <lex rotate n w   Lyndon w"
  unfolding  Lyndon.simps by blast

lemma Lyndon_sing: "Lyndon [a]"
  unfolding Lyndon.simps by auto

lemma Lyndon_prim: assumes "Lyndon w"
  shows "primitive w"
proof-
  have "0 < n  n < |w|  rotate n w  w" for n
    using LyndonD[OF Lyndon w, of n] rlex.less_irrefl[of w] by argo
  from no_rotate_prim[OF LyndonD_nemp[OF Lyndon w]] this
  show ?thesis by blast
qed

lemma Lyndon_conj_greater: "Lyndon (uv)  u  ε  v  ε  uv <lex vu"
  using LyndonD[of "uv" "|u|", unfolded rotate_append[of u v]]
  by force

subsection "Code equations for Lyndon words"

primrec Lyndon_rec :: "'a list  nat  bool" where
    "Lyndon_rec w 0 = True" |
    "Lyndon_rec w (Suc n) = (if w <lex rotate (Suc n) w then Lyndon_rec w n else False)"

lemma Lyndon_rec_all: assumes "Lyndon_rec (a # w) (|w|)"
  shows "n < |a#w|  0 < n  Lyndon_rec (a#w) n"
proof(induction n rule: strict_inc_induct)
  case (base i)
  then show ?case
    using assms by auto
next
  case (step i)
  then show ?case
    by (meson Lyndon_rec.simps(2) zero_less_Suc)
qed

lemma Lyndon_Lyndon_rec: assumes "Lyndon w"
  shows "0 < n  n < |w|  Lyndon_rec w n"
proof(induction n, simp)
  case (Suc n)
  have "w <lex rotate (Suc n) w"
    using LyndonD Suc.prems(2) assms by blast
  then show ?case
    using Suc.IH[OF _ Suc_lessD[OF Suc n < |w|], folded neq0_conv] Lyndon_rec.simps(1)[of w]
    unfolding Lyndon_rec.simps(2)
    by metis
qed

lemma Lyndon_code [code]:
    "Lyndon Nil = False"
    "Lyndon (a # w) = Lyndon_rec (a # w) (|w|)"
proof-
  show "Lyndon Nil = False" by simp
  have "a # w  ε"
    by simp
  have ax: "0 < n  Lyndon_rec (a#w) n  (a#w) <lex rotate n (a#w)" for n
    using Lyndon_rec.simps(2)[of "a#w"] gr0_implies_Suc[of n] by metis
  have bx: "Lyndon_rec (a # w) (|w|) = (n. n < |a#w|  0 < n  Lyndon_rec (a#w) n)"
  proof(cases "w = ε", simp)
    assume "w  ε"
    from this[folded length_greater_0_conv]
    show ?thesis
      using Lyndon_rec_all[of a w] length_Cons[of a w] lessI[of "|w|"]
      by fastforce
  qed
  show "Lyndon (a # w) = Lyndon_rec (a # w) |w|"
    unfolding bx Lyndon.simps
    using ax  LyndonI[OF a # w  ε]Lyndon_Lyndon_rec by blast
qed

subsection "Properties of Lyndon words"

subsubsection "Lyndon words are unbordered"

theorem Lyndon_unbordered: assumes "Lyndon w" shows "¬ bordered w"
proof
  assume "bordered w"
  from bordered_dec[OF this]
  obtain u v where "uvu = w" and "u  ε".
  hence "v  u  ε" and "u  v  ε" by blast+
  note lyn = Lyndon w[folded  uvu = w]
  have "uvu <lex vuu"
    using Lyndon_conj_greater[of u "vu", OF lyn u  ε v  u  ε, unfolded rassoc].
  from this[unfolded lassoc]
  have "u  v  v  u"
    by force
  from lexord_suf_linorder[OF _ this, of u u]
  have "uv <lex vu"
    using  uvu <lex vuu  by simp
  from lexord_append_leftI_linorder[of  "uv" "vu", unfolded lassoc, OF this, unfolded rassoc]
  have "uuv <lex uvu".
  from this Lyndon_conj_greater[of "uv" u, unfolded rassoc, OF lyn u  v  ε  u  ε]
  show False
    by simp
qed

subsubsection "Each conjugacy class contains a Lyndon word"

lemma conjug_Lyndon_ex: assumes "primitive w"
  obtains n where "Lyndon (rotate n w)"
proof-
  have "w  ε"
    using  prim_nemp[OF primitive w].

  let ?ConClass = "{rotate n w | n. 0  n  n < |w|}"

  have "?ConClass  {}"
    using w  ε by blast
  have "finite ?ConClass"
    by force
  have "w  ?ConClass"
    by (rule CollectI)
    (use  le0[of 0] nemp_pos_len[OF w  ε] id_apply[of w, folded rotate0] in metis)
  have all_rot: "rotate m w  ?ConClass" for m
    using rotate_conv_mod[of _ w] mod_less_divisor[of "|w|"] w  ε
    by blast

  obtain w' n where "w'  ?ConClass" and all_b: " b  ?ConClass. b ≤lex w'  w' = b" and w': "w' = rotate n w"
    using rlex.finite_has_minimal[OF finite ?ConClass ?ConClass  {}] by auto

  have "rotate n w <lex rotate na (rotate n w)" if  "0 < na" and "na < |w|" for na
  proof-
    from prim_no_rotate[OF assms[unfolded prim_rotate_conv[of w n]], of na] na < |w| 0 < na
    have "rotate na (rotate n w)  rotate n w" by force
    hence "¬ rotate na (rotate n w) ≤lex rotate n w"
      using all_b[rule_format, OF all_rot[of "na + n", folded rotate_rotate[of na n w]]] unfolding w'
      by auto
    from rlex.not_le_imp_less[OF this]
    show "rotate n w <lex rotate na (rotate n w)".
  qed
  hence "Lyndon (rotate n w)"
    using w  ε by auto
  from that[OF this] show thesis.
qed

lemma conjug_Lyndon_ex': assumes "primitive w"
  obtains v where "w  v" and "Lyndon v"
  unfolding conjug_rotate_iff
  using conjug_Lyndon_ex[OF primitive w]
  by metis

section "Characterization by suffixes"

lemma Lyndon_suf_less: assumes "Lyndon w" "s ≤ns w" "s  w"
  shows "w <lex s"
proof-
  define p where "p = take |s| w"
  have "|s|  |w|"
    using nsD[OF s ≤ns w]
    by (simp add: suffix_length_le)
  have "p ≤p w" and "|p|  = |s|"
    unfolding p_def
    using take_is_prefix |s|  |w| take_len by blast+
  hence "p  s"
    using Lyndon_unbordered[OF Lyndon w] s ≤ns w s  w assms
    by auto
  define p' s' where "p' = drop |s| w" and "s' = take |p'| w"
  have "p  p' = w"
    unfolding p'_def p_def s'_def by simp
  have "s'  s = w"
    unfolding p'_def p_def s'_def
     using suf_len[OF nsD[OF s ≤ns w]] nsD[OF s ≤ns w]
     length_drop suffix_take by metis
  have "|p'| = |s'|"
    using s'_def pp' = w by auto
  have "w <lex ss'"
    using Lyndon_conj_greater[of s' s, unfolded s'  s = w, OF Lyndon w] p  s
    unfolding s'  s = w p_def using s'  s = w assms(3) by fastforce
  from lexord_suf_linorder[OF _ p  s |p| = |s| |p'| = |s'|, OF this[folded p  p' = w]]
  have "p <lex s".
  from lexord_app_right_linorder[OF this, of p' ε, unfolded p  p' = w]  |p| = |s|
  show "w <lex s"
    by simp
qed

lemma Lyndon_pref_suf_less:  assumes "Lyndon w" "p ≤p w" "s ≤ns w" "s  w"
  shows "p <lex s"
proof(cases "p = w", simp add: Lyndon_suf_less[OF assms(1) assms(3) assms(4)])
  assume "p  w"
  show "p <lex s"
  proof(rule rlex.less_trans)
    show "p <lex w"
      using p  w assms(2) lexordp_append_rightI
      by (fastforce simp add: prefix_def)
    show "w <lex s"
      using Lyndon_suf_less assms(1) assms(3) assms(4) by blast
  qed
qed

lemma suf_less_Lyndon: assumes "w  ε" and "s. (s ≤ns w  s  w  w <lex s)"
  shows "Lyndon w"
proof (cases "primitive w")
  assume "¬ primitive w"
  obtain q k where "q  ε" "1 < k" "q@k=w" "wq" ― ‹the exact match of @{thm non_prim} fastens the proof considerably›
    using non_prim[OF ¬ primitive w w  ε] by blast
  hence "q ≤ns w"
    unfolding nonempty_suffix_def pow_eq_if_list[of q k] pow_comm[symmetric]
    using sufI[of "q @ (k - 1)" q w]
    by presburger

  have "q <p w"
    using 1 < k  q @ k = w
    unfolding pow_eq_if_list[of q k] pow_eq_if_list[of q "k-1"]
    using w  ε  by auto
  from lexordp_append_rightI[of "q¯>w" q,
      unfolded lq_pref[OF sprefD1[OF this]], OF lq_spref[OF this]]
  have "q <lex w".
  thus  "Lyndon w"
    unfolding Lyndon.simps using q ≤ns w w  ε assms(2) rlex.order.strict_trans by blast
next
  assume "primitive w"
  have "w <lex rotate l w" if assms_l: "0 < l" "l < |w|" for l
  proof-
    have "take l w ≤np w" and "|take l w| = l"
      using assms_l take_is_prefix  l < |w| by auto
    have "drop l w ≤ns w"
      using l < |w| suffix_drop by auto
    have "drop l w  w"
      using append_take_drop_id[of l w] npD'[OF take l w ≤np w] by force
    have "drop l w  take l w = rotate l w"
      using rotate_append[of "take l w" "drop l w", symmetric, unfolded |take l w| = l,
          unfolded append_take_drop_id].

    have "w <lex drop l w"
      using drop l w ≤ns w drop l w  w assms(2) by blast
    from lexord_app_right_linorder[OF this suffix_length_le[OF conjunct2[OF drop l w ≤ns w[unfolded nonempty_suffix_def]]], of ε "take l w", unfolded  append.right_neutral]
    have "w <lex drop l w  take l w".
    thus "w <lex rotate l w"
      by (simp add: drop l w  take l w = rotate l w)
  qed
  thus "Lyndon w"
    by (simp add: w  ε local.LyndonI)
qed

corollary Lyndon_suf_char: "w  ε  Lyndon w  (s. s ≤ns w  s  w  w <lex s)"
  using Lyndon_suf_less suf_less_Lyndon by blast

lemma Lyndon_suf_le: "Lyndon w  s ≤ns w  w ≤lex s"
  using Lyndon_suf_less rlex.not_less rlex.order.asym by blast

section "Unbordered prefix of a Lyndon word is Lyndon"

lemma unbordered_pref_Lyndon: "Lyndon (uv)  u  ε  ¬ bordered u  Lyndon u"
  unfolding Lyndon_suf_char
proof(standard+)
  fix s
  assume "Lyndon (u  v)" and  "u  ε" and "¬ bordered u" and "s ≤ns u" and "s  u"
    hence "u  v <lex s  v"
      using  Lyndon_suf_less[OF Lyndon (u  v), of "s  v"] by auto
    have "¬ s ≤p u"
      using ¬ bordered u s ≤ns u s  u by auto
    moreover have "¬ u ≤p s"
      using suf_pref_eq[OF nsD[OFs ≤ns u]] s  u by blast
    ultimately show "u <lex s"
      using lexord_cancel_right_linorder[OF u  v <lex s  v] by blast
qed

section "Concatenation of Lyndon words"

theorem Lyndon_concat: assumes  "Lyndon u" and "Lyndon v" and "u <lex v"
  shows "Lyndon (uv)"
proof-
  have "uv <lex v"
  proof(cases "u ≤p v")
    assume "u ≤p v"
    obtain z where "uz = v" and "z ≤ns v"
      using  lq_pref[OF u ≤p v] nsI' rlex.less_imp_neq[OF u <lex v] self_append_conv by metis
    from Lyndon_suf_less[OF Lyndon v this(2), THEN lexord_append_leftI_linorder, of u]
      LyndonD_nemp[OF Lyndon u] this(1)
    show ?thesis
      by blast
  next
    assume "¬ u ≤p v"
    then show ?thesis
      using local.lexordp_linear[of v "uv"]
        local.lexordp_mid_pref[OF u <lex v,of v]
        prefixI[of v u v]
      by argo
  qed

  { fix z
    assume "z ≤ns (uv)" "z  uv"
    have "uv <lex z"
    proof(cases "z ≤ns v")
      assume "z ≤ns v"
      from Lyndon_suf_less[OF Lyndon v this]
      have "z  v  v <lex z"
        by blast
      thus "uv <lex z"
        using u  v <lex v rlex.less_trans
        by fast
    next
      assume "¬ z ≤ns v"
      then obtain z' where "z' ≤ns u" "z'  u" "z'v = z"
        using z ≤ns u  v z  u  v suffix_append[of z u v]
        unfolding nonempty_suffix_def
        by force
      from Lyndon_suf_less[OF Lyndon u this(1) this(2)]
      have "u <lex z'".
      thus "uv <lex z"
        using  z' ≤ns u lexord_app_right_linorder[of u z' v v] suffix_length_le[of z' u]
        unfolding nonempty_suffix_def z'  v = z
        by blast
    qed
  }
  thus ?thesis
    using suf_nemp[OF LyndonD_nemp[OF Lyndon v], of u, THEN suf_less_Lyndon]
    by blast
qed

section "Longest Lyndon suffix"

fun longest_Lyndon_suffix:: "'a list  'a list" ("LynSuf") where
  "longest_Lyndon_suffix ε = ε" |
  "longest_Lyndon_suffix (a#w) = (if Lyndon (a#w) then a#w else longest_Lyndon_suffix w)"

lemma longest_Lyndon_suf_ext: "¬ Lyndon (a # w)  LynSuf w = LynSuf (a # w)"
  using longest_Lyndon_suffix.simps(2) by presburger

lemma longest_Lyndon_suf_suf: "w  ε  LynSuf w ≤s w"
proof(induction w rule: longest_Lyndon_suffix.induct)
  case 1
  then show ?case by simp
next
  case (2 a w)
  show ?case
  proof(cases "Lyndon (a#w)")
    case True
    then show ?thesis by auto
  next
    case False
    from "2.IH"[OF this, unfolded longest_Lyndon_suf_ext[OF this], THEN suffix_ConsI, of a]
      Lyndon_sing False
    show ?thesis by blast
  qed
qed

lemma longest_Lyndon_suf_max:
  "v ≤s w   Lyndon v  v ≤s (LynSuf w)"
proof(induction w arbitrary: v rule: longest_Lyndon_suffix.induct)
  case 1
  then show ?case
    using longest_Lyndon_suffix.simps(1) by presburger
next
  case (2 a w)
  show ?case
  proof(cases "Lyndon (a#w)")
    case True
    then show ?thesis
      using "2.prems"(1) longest_Lyndon_suffix.simps(2) by presburger
  next
    case False
    have "v  a # w"
      using "2.prems"(2) False by blast
    from "2.IH"[OF False _ "2.prems"(2), unfolded longest_Lyndon_suf_ext[OF False]]
       "2.prems"(1)[unfolded suffix_Cons] this
    show ?thesis by fast
  qed
qed

lemma longest_Lyndon_suf_Lyndon_id: assumes "Lyndon w"
  shows "LynSuf w = w"
proof(cases "w = ε", simp)
  case False
  from longest_Lyndon_suf_suf[OF this]
    suffix_order.order_refl[THEN longest_Lyndon_suf_max[OF _ assms]]
    suffix_order.antisym
  show ?thesis by blast
qed

lemma longest_Lyndon_suf_longest: "w  ε  v' ≤s w  Lyndon v'  |v'|  |(LynSuf w)|"
  using longest_Lyndon_suf_max suffix_length_le by blast

lemma longest_Lyndon_suf_sing: "LynSuf [a] = [a]"
  using Lyndon_sing longest_Lyndon_suf_Lyndon_id by blast

lemma longest_Lyndon_suf_Lyndon: "w  ε  Lyndon (LynSuf w)"
proof(induction w rule: longest_Lyndon_suffix.induct, blast)
  case (2 a w)
  show ?case
  proof(cases "Lyndon (a#w)")
    case True
    then show ?thesis
      using longest_Lyndon_suf_Lyndon_id by presburger
  next
    case False
    from "2.IH"[OF this, unfolded longest_Lyndon_suf_ext[OF this]] Lyndon_sing
    show ?thesis by fastforce
  qed
qed

lemma longest_Lyndon_suf_nemp: "w  ε  LynSuf w  ε"
  using longest_Lyndon_suf_Lyndon[THEN LyndonD_nemp].

lemma longest_Lyndon_sufI:
  assumes "q ≤s w" and "Lyndon q" and all_s: "( s. (s ≤s w  Lyndon s)  s ≤s q)"
  shows "LynSuf w = q"
proof(cases "w = ε")
  case True
  then show ?thesis
    using assms(1) longest_Lyndon_suffix.simps(1) suffix_bot.extremum_uniqueI by blast
next
  case False
  from all_s longest_Lyndon_suf_Lyndon[OF this] longest_Lyndon_suf_max[OF assms(1) assms(2)]
    longest_Lyndon_suf_suf[OF this] suffix_order.eq_iff
  show ?thesis by blast
qed

corollary longest_Lyndon_sufI':
  assumes "q ≤s w" and "Lyndon q" and all_s: " s. (s ≤s w  Lyndon s)  |s|  |q|"
  shows "LynSuf w = q"
  using longest_Lyndon_sufI[OF q ≤s w Lyndon q] suf_ruler_le all_s q ≤s w by blast

text‹The next lemma is fabricated to suit the upcoming definition of longest Lyndon factorization.›

lemma longest_Lyndon_suf_shorter: assumes "w  ε"
  shows "|w<¯(LynSuf w)| < |w|"
  using nemp_len[OF longest_Lyndon_suf_nemp[OF w  ε]] arg_cong[OF rq_suf[OF longest_Lyndon_suf_suf[OF w  ε]], of length]
  unfolding lenmorph by linarith

section "Lyndon factorizations"

function Lyndon_fac::"'a list   'a list list" ("LynFac")
  where "Lyndon_fac w  = (if w  ε then ((Lyndon_fac (w <¯(LynSuf w) ))  [LynSuf w]) else ε)"
  using longest_Lyndon_suffix.cases by blast+
termination
proof(relation "measure length", simp)
  show "w. w  ε  (w<¯LynSuf w, w)  measure length"
    unfolding measure_def inv_image_def using longest_Lyndon_suf_shorter by blast
qed

text‹The factorization @{term "Lyndon_fac w"} obtained by taking always the longest Lyndon suffix is well defined,
and called ``Lyndon factorization (of $w$)''.›


lemma Lyndon_fac_simp: "w  ε  Lyndon_fac w =  Lyndon_fac (w<¯LynSuf w)  (LynSuf w # ε)"
  using Lyndon_fac.simps[of w] by meson

lemma Lyndon_fac_emp: "Lyndon_fac ε = ε"
  by simp

text‹Note that the Lyndon factorization of a Lyndon word is trivial.›

lemma Lyndon_fac_longest_Lyndon_id: "Lyndon w  Lyndon_fac w = [w]"
  by (simp add: longest_Lyndon_suf_Lyndon_id)

text‹Lyndon factorization is composed of Lyndon words ...›

lemma Lyndon_fac_set: "z  set (Lyndon_fac w)  Lyndon z"
proof(induction w rule:  Lyndon_fac.induct)
  case (1 w)
  then show "Lyndon z"
  proof (cases "w = ε")
    assume "w  ε"
    have "Lyndon_fac w  = (Lyndon_fac (w <¯(LynSuf w) ))  [LynSuf w]"
      using Lyndon_fac_simp[OF w  ε].
    from set_ConsD[OF "1.prems"(1)[unfolded rotate1.simps(2)[of "LynSuf w" "Lyndon_fac (w <¯(LynSuf w) )", folded this, symmetric], unfolded set_rotate1]]
    have "z = LynSuf w  z  set (Lyndon_fac (w <¯(LynSuf w) ))".
    thus "Lyndon z"
      using "1.IH"[OF w  ε] longest_Lyndon_suf_Lyndon[OF w  ε]
      by blast
  next
    assume "w = ε"
    thus "Lyndon z"
      using "1.prems"
      unfolding Lyndon_fac_emp[folded w = ε] list.set(1) empty_iff
      by blast
  qed
qed

text‹...and it indeed is a factorization of the argument.›

lemma Lyndon_fac_longest_dec: "concat (Lyndon_fac w) = w"
proof(induction w rule: Lyndon_fac.induct)
  case (1 w)
  thus "concat (LynFac w) = w"
  proof (cases "w = ε", simp)
  assume "w  ε"
    have eq: "concat (Lyndon_fac w) = concat ( (Lyndon_fac (w <¯(LynSuf w) )) )  concat ([LynSuf w])"
      unfolding Lyndon_fac_simp[OF w  ε] concat_morph..
    from this[unfolded "1.IH"[OF w  ε] concat_sing' rq_suf[OF longest_Lyndon_suf_suf[OF w  ε]]]
    show ?case.
  qed
qed

text‹The following lemma makes explicit the inductive character of the definition of @{term Lyndon_fac}.›

lemma Lyndon_fac_longest_pref: "us ≤p Lyndon_fac w  Lyndon_fac (concat us) = us"
proof(induction w arbitrary: us rule:  Lyndon_fac.induct)
  case (1 w)
  thus "LynFac (concat us) = us"
  proof (cases "w = ε", simp)
    assume "w  ε"
    have step: "Lyndon_fac w = (Lyndon_fac (w <¯(LynSuf w)))  [LynSuf w]"
      using Lyndon_fac_simp[OF w  ε].

    consider (neq) "us  Lyndon_fac w" | (eq) "us = Lyndon_fac w"
      using "1.prems" le_neq_implies_less by blast
    then show "LynFac (concat us) = us"
    proof(cases)
      case neq
      hence "us ≤p Lyndon_fac (w<¯LynSuf w)"
        using "1.prems" last_no_split[of us "Lyndon_fac (w<¯LynSuf w)" "LynSuf w"]
        unfolding step[symmetric] by blast
      thus "LynFac (concat us) = us"
        using "1.IH" w  ε by blast
    next
      case eq
      show "LynFac (concat us) = us"
        using Lyndon_fac_longest_dec[of w, folded eq] eq by simp
    qed
  qed
qed

text‹We give name to an important predicate: monotone (nonincreasing) list of Lyndon words.›

definition Lyndon_mono :: "'a list list   bool" where
  "Lyndon_mono ws  ( u  set ws. Lyndon u)  (rlex.sorted (rev ws))"

lemma Lyndon_mono_set: "Lyndon_mono ws  u  set ws  Lyndon u"
  unfolding Lyndon_mono_def by blast

lemma Lyndon_mono_sorted: "Lyndon_mono ws  rlex.sorted (rev ws)"
  unfolding Lyndon_mono_def by blast

lemma Lyndon_mono_nth: "Lyndon_mono ws  i  j  j < |ws|  ws!j ≤lex ws!i"
  unfolding Lyndon_mono_def using  rlex.sorted_rev_nth_mono by blast

lemma Lyndon_mono_empty[simp]: "Lyndon_mono ε"
  unfolding Lyndon_mono_def by auto

lemma Lyndon_mono_sing: "Lyndon u  Lyndon_mono [u]"
  unfolding Lyndon_mono_def by auto

lemma Lyndon_mono_fac_Lyndon_mono:
  assumes "ps ≤f ws" and "Lyndon_mono ws" shows "Lyndon_mono ps"
unfolding Lyndon_mono_def
proof
  show "x  (set ps). Lyndon x"
    using Lyndon_mono ws[unfolded Lyndon_mono_def] set_mono_sublist[OF ps ≤f ws] by blast
  show "linorder.sorted (≤lex) (rev ps)"
    using rlex.sorted_append Lyndon_mono ws[unfolded Lyndon_mono_def] ps ≤f ws[unfolded sublist_def] by fastforce
qed

text‹Lyndon factorization is monotone!
Altogether, we have shown that the Lyndon factorization is a monotone factorization
into Lyndon words.›

theorem fac_Lyndon_mono: "Lyndon_mono (Lyndon_fac w)"
proof (induct "Lyndon_fac w" arbitrary: w rule: rev_induct, simp)
  case (snoc x xs)
  have "Lyndon x"
    using Lyndon_fac_set[of x, unfolded in_set_conv_decomp, of w, folded snoc.hyps(2)] by fast
  have "concat (xs  [x]) = w"
    using Lyndon_fac_longest_dec[of w, folded snoc.hyps(2)] by auto
  then show "Lyndon_mono (LynFac w)"
  proof (cases "xs = ε")
    assume "xs = ε"
    show "Lyndon_mono (LynFac w)"
      unfolding Lyndon_mono_def xs  [x] = LynFac w[symmetric] xs = ε append.left_neutral
                rlex.sorted1[of x]
      using Lyndon x by force
  next
    assume "xs  ε"
    have "concat (xs  [x])  ε" and "w  ε"
      using Lyndon_fac_longest_dec snoc.hyps(2) by auto
    have "x = LynSuf w" and "xs = LynFac (w<¯LynSuf w )"
      using Lyndon_fac.simps[of w, folded snoc.hyps(2)] w  ε
        Lyndon_fac_longest_dec append1_eq_conv[of xs x "LynFac (w<¯LynSuf w )" "LynSuf w"] by presburger+
    from Lyndon_mono_sorted[OF snoc.hyps(1)[OF xs = LynFac (w<¯LynSuf w )], folded this]
    have "dual_rlex.sorted  xs"
      unfolding sorted_dual_rev_iff.
    have "Lyndon (last xs)"
      using Lyndon_fac_set[of "last xs" "w<¯LynSuf w", folded xs = LynFac (w<¯LynSuf w), OF last_in_set[OF xs  ε]].
    have "x ≤lex last xs"
    proof(rule ccontr)
      assume "¬ x ≤lex last xs" hence "last xs <lex x" by auto
      from Lyndon_concat[OF  Lyndon (last xs) Lyndon x this]
      have "Lyndon ((last xs)  x)".
      have "(last xs)  x ≤s concat (xs  [x])"
        using xs  ε concat_last_suf by auto
      from longest_Lyndon_suf_longest[OF concat (xs  [x])  ε  this Lyndon ((last xs)  x),
          unfolded concat (xs  [x]) = w, folded x = LynSuf w]
      show False
        using Lyndon (last xs) by simp
    qed
    have "dual_rlex.sorted (butlast xs  [last xs])"
      by (simp add: linorder.sorted (λx y. y ≤lex x) xs xs  ε)
    from this x ≤lex last xs
    have "dual_rlex.sorted (butlast xs  [last xs,x])"
      using dual_rlex.sorted_append by auto
    from this[unfolded hd_word[of "last xs" "[x]"] lassoc append_butlast_last_id[OF xs  ε]]
    have "rlex.sorted (rev (LynFac w))"
      unfolding  xs  [x] = LynFac w[symmetric] sorted_dual_rev_iff[symmetric].
    thus "Lyndon_mono (LynFac w)"
      unfolding Lyndon_mono_def using Lyndon_fac_set by blast
  qed
qed

text‹Now we want to show the converse: any monotone factorization into Lyndon words is the Lyndon factorization›

text‹The last element in the Lyndon factorization is the smallest suffix.›

lemma Lyndon_mono_last_smallest: "Lyndon_mono ws s ≤ns (concat ws)  last ws ≤lex s"
proof(induct ws arbitrary: s rule: rev_induct, fastforce)
  case (snoc a ws)
  have "ws[a]  ε"
    by blast
  have "last (ws[a]) = a"
    by simp
  from last_in_set[OF ws[a]  ε, unfolded this] Lyndon_mono (ws  [a])[unfolded Lyndon_mono_def]
  have "Lyndon a"
    by blast
  show ?case
  proof(cases "s ≤ns a")
    case True
    from Lyndon_suf_le[OF Lyndon a] this
    show ?thesis
      by simp
  next
    case False
    hence "ws  ε"
      using snoc.prems(2) by force
    obtain s' where "s = s'a"
      using False snoc.prems(2)[unfolded concat_append[of ws "[a]", unfolded concat_sing']] suffix_append[of s "concat ws" a]
      unfolding nonempty_suffix_def
      by blast
    hence "s' ≤ns concat ws"
      using False snoc.prems(2) by fastforce
    have "Lyndon_mono ws"
      using Lyndon_mono (ws[a]) Lyndon_mono_fac_Lyndon_mono
      by blast
    from snoc.hyps[OF this s' ≤ns concat ws]
    have "last ws ≤lex s'"
      by auto
    hence "last ws ≤lex s'a"
      using local.lexordp_eq_trans ord.lexordp_eq_pref by blast
    have "a ≤lex last ws"
      using Lyndon_mono (ws[a])
      unfolding Lyndon_mono_def
      by (simp add: ws  ε last_ConsL)
    from dual_rlex.order_trans[OF last ws ≤lex s'  a this, folded s = s'  a]
    show ?thesis
      unfolding last (ws[a]) = a
      by blast
  qed
qed


text‹A monotone list, if seen as a factorization,
must end with the longest suffix›

lemma Lyndon_mono_last_longest: assumes "ws  ε" and "Lyndon_mono ws"
  shows "LynSuf (concat ws) = last ws"
proof-
  have "Lyndon (last ws)"
    using Lyndon_mono_set assms(1) assms(2) last_in_set by blast
  hence "last ws  ε"
    using LyndonD_nemp by blast
  hence "last ws ≤ns LynSuf (concat ws)"
    using longest_Lyndon_suf_max[OF  concat_last_suf[OF assms(1)] Lyndon (last ws)]
    unfolding nonempty_suffix_def
    by blast

  have "concat ws  ε"
    using Lyndon.simps assms(2)[unfolded Lyndon_mono_def] set_nemp_concat_nemp[OF assms(1)]
    by blast
  from longest_Lyndon_suf_nemp[OF this] longest_Lyndon_suf_suf[OF this]
  have "LynSuf (concat ws) ≤ns concat ws"
    unfolding nonempty_suffix_def
    by simp

  show ?thesis
    using Lyndon_mono_last_smallest[OF Lyndon_mono ws LynSuf (concat ws) ≤ns concat ws]
      Lyndon_suf_le[OF longest_Lyndon_suf_Lyndon[OF concat ws  ε], OF last ws ≤ns LynSuf (concat ws)]
      eq_iff
    by simp
qed

text‹Therefore, by construction,
any monotone list is the Lyndon factorization of its concatenation›

lemma Lyndon_mono_fac:
  "Lyndon_mono ws  ws = Lyndon_fac (concat ws)"
proof (induct ws rule: rev_induct, simp)
  case (snoc x xs)
  have "Lyndon_mono xs"
    using Lyndon_mono (xs  [x])
    unfolding Lyndon_mono_def
    by simp
  from snoc.hyps[OF this]
  have "xs = LynFac (concat xs)".
  have "x = LynSuf (concat (xs  [x]))"
    using Lyndon_mono_last_longest[OF _ Lyndon_mono (xs  [x]), unfolded last_snoc] by simp
  have "concat (xs  [x])<¯x = concat xs"
    by simp
  have "concat (xs  [x])  ε"
    using Lyndon_mono_set snoc.prems by auto
  from this
  show ?case
    using Lyndon_fac.simps[of "concat (xs  [x])", folded x = LynSuf (concat (xs  [x])),
        unfolded concat (xs  [x])<¯x = concat xs, folded xs = LynFac (concat xs)]
    by presburger
qed


text‹This implies that the Lyndon factorization can be characterized
in two equivalent ways: as the (unique) monotone factorization (into Lyndon words) or as
the "suffix greedy" factorization (into Lyndon words).
›

corollary Lyndon_mono_fac_iff: "Lyndon_mono ws  ws = LynFac (concat ws)"
  using Lyndon_mono_fac fac_Lyndon_mono[of "concat ws"] by fastforce

corollary Lyndon_mono_unique: assumes "Lyndon_mono ws" and "Lyndon_mono zs" and "concat ws = concat zs"
  shows "ws = zs"
  using Lyndon_mono_fac[OF Lyndon_mono ws] Lyndon_mono_fac[OF Lyndon_mono zs]
  unfolding concat ws = concat zs  by simp


subsection "Standard factorization"

lemma Lyndon_std: assumes "Lyndon w" "1 < |w|"
  obtains l m where "w = lm" and "Lyndon l" and "Lyndon m" and "l <lex m"
proof-
  have "w  ε" "tl w  ε"
    using 1 < |w| long_list_tl by auto
  define m where  "m = LynSuf (tl w)"
  hence "Lyndon m"
    using tl w  ε local.longest_Lyndon_suf_Lyndon by blast
  have "m ≤s w"
    unfolding m_def
    using suffix_order.trans[OF longest_Lyndon_suf_suf[OF tl w  ε] suffix_tl[of w]].
  moreover have "m  w"
    unfolding m_def using  hd_tl[OF w  ε] longest_Lyndon_suf_suf[OF tl w  ε] same_suffix_nil
    not_Cons_self2 by metis
  ultimately obtain l where "w = lm" and "l  ε"
   by (auto simp add: suffix_def)

  have "Lyndon l"
  proof (rule unbordered_pref_Lyndon[OF Lyndon w[unfolded w = lm] l  ε], rule)
    assume "bordered l"
    from unbordered_border[OF this, unfolded border_def]
    obtain s where "s  ε" and "s  l" and "s ≤p l" and "s ≤s l" and "¬ bordered s"
      by blast
    have "Lyndon s"
      using unbordered_pref_Lyndon[OF _ s  ε ¬ bordered s, of "s¯>lm", unfolded lassoc lq_pref[OF s ≤p l]]
      Lyndon w[unfolded w = l  m] by blast
    have "s <lex m"
      using Lyndon_pref_suf_less[OF Lyndon w _ nsI[OF LyndonD_nemp[OF Lyndon m] m ≤s w]
         m  w, of s] Lyndon.elims(2)[OF Lyndon m]
         s ≤p l prefix_append[of s l m, folded w = l  m]
      by presburger
    from Lyndon_concat[OF Lyndon s Lyndon m this]
    have "Lyndon (sm)".
    moreover have "sm ≤s tl w"
      unfolding w = l  m using s  l s ≤s l list.collapse[OF w  ε, unfolded w = l  m]
      by (auto simp add: suffix_def)
    ultimately show False
      using m_def s  ε longest_Lyndon_suf_max same_suffix_nil by blast
  qed

  have "l <lex m"
    using Lyndon_pref_suf_less[OF Lyndon w prefI[OF w = l  m[symmetric]]
        nsI[OF longest_Lyndon_suf_nemp[OF tl w  ε, folded  m_def] m ≤s w] m  w].
  from that[OF w = l  m Lyndon l  Lyndon m this]
  show thesis.
qed

corollary Lyndon_std_iff:
  "Lyndon w  (|w| = 1  ( l m. w = lm  Lyndon l  Lyndon m  l <lex m))"
  (is "?L  ?R")
proof
  assume ?L
  show ?R
    using Lyndon_std[OF Lyndon w]
      nemp_le_len[OF LyndonD_nemp[OF Lyndon w], unfolded le_eq_less_or_eq]
    by metis
next
  assume ?R
  thus ?L
  proof(rule disjE, fastforce)
    show l m. w = l  m  Lyndon l  Lyndon m  l <lex m  Lyndon w
      using Lyndon_concat by blast
  qed
qed

end  ― ‹end context linorder›

end