Theory Lyndon

(*  Title:      Combinatorics_Words_Lyndon.Lyndon
    Author:     Štěpán Holub, Charles University
    Author:     Štěpán Starosta, CTU in Prague

Part of Combinatorics on Words Formalized. See https://gitlab.com/formalcow/combinatorics-on-words-formalized/

*)

theory Lyndon
  imports Combinatorics_Words.CoWBasic
begin

chapter "Lyndon words"

text‹A Lyndon word is a non-empty word that is lexicographically
strictly smaller than any other word in its conjugacy class, i.e., than any its rotations.
They are named after R. Lyndon who introduced them in \cite{Lyndon54} as ``standard'' sequences.

We present elementary results on Lyndon words, mostly covered by results in \cite[Chapter 5]{Lo83} and \cite{Duval80,Duval83}.

This definition assumes a linear order on letters given by the context.
›


section "Definition and elementary properties"

subsection "Underlying order"

lemma (in linorder) lexordp_mid_pref: "ord_class.lexordp u v  ord_class.lexordp v (us) 
   u ≤p v"
  by (induct rule: lexordp_induct, simp_all)

lemma (in linorder) lexordp_ext: "ord_class.lexordp u v   ¬ u ≤p v 
  ord_class.lexordp (uw) (vz)"
  by (induct rule: lexordp_induct, simp_all)

context linorder
begin

abbreviation Lyndon_less :: "'a list  'a list  bool" (infixl "<lex" 50)
  where "Lyndon_less xs ys  ord_class.lexordp xs ys"

abbreviation Lyndon_le :: "'a list  'a list  bool" (infixl "≤lex" 50)
  where "Lyndon_le xs ys  ord_class.lexordp_eq xs ys"

interpretation rlex: linorder "(≤lex)" "(<lex)"
  using lexordp_linorder.

interpretation dual_rlex: linorder "λ x y. y ≤lex x" "λ x y. y <lex x"
  using rlex.dual_linorder.

lemma sorted_dual_rev_iff: "dual_rlex.sorted ws  rlex.sorted (rev ws)"
  unfolding rlex.sorted_rev_iff_nth_mono dual_rlex.sorted_iff_nth_mono by blast

text ‹Several useful lemmas that are formulated for relations, interpreted for the default linear order.›

lemmas lexord_suf_linorder = lexord_sufE[of _ _ _ _ "{(x, y). x < y}", folded lexordp_conv_lexord]
  and lexord_append_leftI_linorder = lexord_append_leftI[of  _ _  "{(x, y). x < y}" _, folded lexordp_conv_lexord]
  and lexord_app_right_linorder = lexord_sufI[of  _ _  "{(x, y). x < y}" _, folded lexordp_conv_lexord]
  and lexord_take_index_conv_linorder = lexord_take_index_conv[of  _ _  "{(x, y). x < y}", folded lexordp_conv_lexord]
  and mismatch_lexord_linorder = mismatch_lexord[of  _ _  "{(x, y). x < y}", folded lexordp_conv_lexord]
  and lexord_cancel_right_linorder = lexord_cancel_right[of _ _ _ _ "{(a,b). a < b}", folded lexordp_conv_lexord]

subsection "Lyndon word definition"

fun Lyndon :: "'a list  bool"
  where "Lyndon w = (w  ε  (n. 0 < n  n < |w|  w <lex rotate n w))"

lemma LyndonD: "Lyndon w  0 < n  n < |w|  w <lex rotate n w"
  unfolding Lyndon.simps by blast

lemma LyndonD_nemp: "Lyndon w  w  ε"
  unfolding Lyndon.simps by blast

lemma LyndonI: "w  ε   n. 0 < n  n < |w|  w <lex rotate n w   Lyndon w"
  unfolding  Lyndon.simps by blast

lemma Lyndon_sing: "Lyndon [a]"
  unfolding Lyndon.simps by auto

lemma Lyndon_prim: assumes "Lyndon w"
  shows "primitive w"
proof-
  have "0 < n  n < |w|  rotate n w  w" for n
    using LyndonD[OF Lyndon w, of n] rlex.less_irrefl[of w] by argo
  from no_rotate_prim[OF LyndonD_nemp[OF Lyndon w]] this
  show ?thesis by blast
qed

lemma Lyndon_conj_greater: "Lyndon (uv)  u  ε  v  ε  uv <lex vu"
  using LyndonD[of "uv" "|u|", unfolded rotate_append[of u v]]
  by force

subsection "Code equations for Lyndon words"

primrec Lyndon_rec :: "'a list  nat  bool" where
    "Lyndon_rec w 0 = True" |
    "Lyndon_rec w (Suc n) = (if w <lex rotate (Suc n) w then Lyndon_rec w n else False)"

lemma Lyndon_rec_all: assumes "Lyndon_rec (a # w) (|w|)"
  shows "n < |a#w|  Lyndon_rec (a#w) n"
proof(induction n rule: strict_inc_induct)
  case (base i)
  then show ?case
    using assms by auto
next
  case (step i)
  then show ?case
    by (meson Lyndon_rec.simps(2) zero_less_Suc)
qed

lemma Lyndon_Lyndon_rec: assumes "Lyndon w"
  shows "n < |w|  Lyndon_rec w n"
proof(induction n, simp)
  case (Suc n)
  have "w <lex rotate (Suc n) w"
    using LyndonD Suc.prems(1) assms by blast
  then show ?case
    using Suc.IH[OF Suc_lessD[OF Suc n < |w|], folded neq0_conv] Lyndon_rec.simps(1)[of w]
    unfolding Lyndon_rec.simps(2)
    by metis
qed

lemma Lyndon_code [code]:
    "Lyndon Nil = False"
    "Lyndon (a # w) = Lyndon_rec (a # w) (|w|)"
proof-
  show "Lyndon Nil = False" by simp
  have "a # w  ε"
    by simp
  have ax: "0 < n  Lyndon_rec (a#w) n  (a#w) <lex rotate n (a#w)" for n
    using Lyndon_rec.simps(2)[of "a#w"] gr0_implies_Suc[of n] by metis
  have bx: "Lyndon_rec (a # w) (|w|) = (n. n < |a#w|  Lyndon_rec (a#w) n)"
    using Lyndon_rec_all by auto
  show "Lyndon (a # w) = Lyndon_rec (a # w) |w|"
    unfolding bx Lyndon.simps
    using ax LyndonI[OF a # w  ε]Lyndon_Lyndon_rec by blast
qed

subsection "Properties of Lyndon words"

subsubsection "Lyndon words are unbordered"

theorem Lyndon_unbordered: assumes "Lyndon w" shows "¬ bordered w"
proof
  assume "bordered w"
  from bordered_dec[OF this]
  obtain u v where "uvu = w" and "u  ε".
  hence "v  u  ε" and "u  v  ε" by blast+
  note lyn = Lyndon w[folded  uvu = w]
  have "uvu <lex vuu"
    using Lyndon_conj_greater[of u "vu", OF lyn u  ε v  u  ε, unfolded rassoc].
  from this[unfolded lassoc]
  have "u  v  v  u"
    by force
  from lexord_suf_linorder[OF _ this, of u u]
  have "uv <lex vu"
    using  uvu <lex vuu  by simp
  from lexord_append_leftI_linorder[of  "uv" "vu", unfolded lassoc, OF this, unfolded rassoc]
  have "uuv <lex uvu".
  from this Lyndon_conj_greater[of "uv" u, unfolded rassoc, OF lyn u  v  ε  u  ε]
  show False
    by simp
qed

subsubsection "Each conjugacy class contains a Lyndon word"

lemma conjug_Lyndon_ex: assumes "primitive w"
  obtains n where "Lyndon (rotate n w)"
proof-
  have "w  ε"
    using  prim_nemp[OF primitive w].

  let ?ConClass = "{rotate n w | n. 0  n  n < |w|}"

  have "?ConClass  {}"
    using w  ε by blast
  have "finite ?ConClass"
    by force
  have "w  ?ConClass"
    by (rule CollectI)
    (use  le0[of 0] nemp_len[OF w  ε] id_apply[of w, folded rotate0] in metis)
  have all_rot: "rotate m w  ?ConClass" for m
    using rotate_conv_mod[of _ w] mod_less_divisor[of "|w|"] w  ε
    by blast

  obtain w' n where "w'  ?ConClass" and all_b: " b  ?ConClass. b ≤lex w'  w' = b" and w': "w' = rotate n w"
    using rlex.finite_has_minimal[OF finite ?ConClass ?ConClass  {}] by auto

  have "rotate n w <lex rotate na (rotate n w)" if  "0 < na" and "na < |w|" for na
  proof-
    from prim_no_rotate[OF assms[unfolded prim_rotate_conv[of w n]], of na] na < |w| 0 < na
    have "rotate na (rotate n w)  rotate n w" by force
    hence "¬ rotate na (rotate n w) ≤lex rotate n w"
      using all_b[rule_format, OF all_rot[of "na + n", folded rotate_rotate[of na n w]]] unfolding w'
      by auto
    from rlex.not_le_imp_less[OF this]
    show "rotate n w <lex rotate na (rotate n w)".
  qed
  hence "Lyndon (rotate n w)"
    using w  ε by auto
  from that[OF this] show thesis.
qed

lemma conjug_Lyndon_ex': assumes "primitive w"
  obtains v where "w  v" and "Lyndon v"
  unfolding conjug_rotate_iff
  using conjug_Lyndon_ex[OF primitive w]
  by metis

section "Characterization by suffixes"

lemma Lyndon_suf_less: assumes "Lyndon w" "s ≤s w" "s  ε" "s  w"
  shows "w <lex s"
proof-
  define p where "p = take |s| w"
  have "|s|  |w|"
    using s ≤s w
    by (simp add: suffix_length_le)
  have "p ≤p w" and "|p|  = |s|"
    unfolding p_def
    using take_is_prefix |s|  |w| take_len by blast+
  hence "p  s"
    using Lyndon_unbordered[OF Lyndon w] s ≤s w s  w s  ε
    by auto
  define p' s' where "p' = drop |s| w" and "s' = take |p'| w"
  have "p  p' = w"
    unfolding p'_def p_def s'_def by simp
  have "s'  s = w"
    unfolding p'_def p_def s'_def
     using suf_len[OF s ≤s w] s ≤s w
     length_drop suffix_take by metis
  have "|p'| = |s'|"
    using s'_def pp' = w by auto
  have "w <lex ss'"
    using Lyndon_conj_greater[of s' s, unfolded s'  s = w, OF Lyndon w] p  s
    unfolding s'  s = w p_def using s'  s = w assms(3) by fastforce
  from lexord_suf_linorder[OF _ p  s |p| = |s| |p'| = |s'|, OF this[folded p  p' = w]]
  have "p <lex s".
  from lexord_app_right_linorder[OF this, of p' ε, unfolded p  p' = w]  |p| = |s|
  show "w <lex s"
    by simp
qed

lemma Lyndon_pref_suf_less:  assumes "Lyndon w" "p ≤p w" "s ≤s w" "s  ε" "s  w"
  shows "p <lex s"
proof(cases "p = w", simp add: Lyndon_suf_less[OF assms(1) assms(3-5)])
  assume "p  w"
  show "p <lex s"
  proof(rule rlex.less_trans)
    show "p <lex w"
      using p  w lexordp_append_rightI[of "p¯>w" p]
      lq_pref[OF p ≤p w] by force
    show "w <lex s"
      using Lyndon_suf_less assms by blast
  qed
qed

lemma suf_less_Lyndon: assumes "w  ε" and "s. (s ≤s w  s  ε  s  w  w <lex s)"
  shows "Lyndon w"
proof-
  have "primitive w"
  proof (rule ccontr)
  assume "¬ primitive w"
  obtain q k where "q  ε" "1 < k" "q@k=w" "wq" ― ‹the exact match of @{thm non_prim} fastens the proof considerably›
    using non_prim[OF ¬ primitive w w  ε] by blast
  hence "q ≤s w" "q  ε" "q  w"
    unfolding pow_list_eq_if[of k q] pow_comm[symmetric]
    using sufI[of "q @ (k - 1)" q w]
    by presburger+
  hence "w <lex q"
    using assms(2) by blast
  have "q <p w"
    using 1 < k  q @ k = w
    unfolding pow_list_eq_if[of k q] pow_list_eq_if[of "k-1" q]
    using w  ε  by auto
  from lexordp_append_rightI[of "q¯>w" q,
      unfolded lq_spref[OF this], OF lq_spref_nemp[OF this]]
  have "q <lex w".
  thus False
    using w <lex q by simp
qed
  have "w <lex rotate l w" if assms_l: "0 < l" "l < |w|" for l
  proof-
    have "take l w ≤p w" and "take l w  ε"  and "|take l w| = l"
      using assms_l take_is_prefix  l < |w| by auto
    have "drop l w ≤s w" "drop l w  ε"
      using l < |w| suffix_drop by auto
    have "drop l w  w"
      using append_take_drop_id[of l w] take l w  ε by force
    have "drop l w  take l w = rotate l w"
      using rotate_append[of "take l w" "drop l w", symmetric, unfolded |take l w| = l,
          unfolded append_take_drop_id].

    have "w <lex drop l w"
      using drop l w ≤s w  drop l w  w drop l w  ε  assms(2) by blast
    from lexord_app_right_linorder[OF this suffix_length_le[OF drop l w ≤s w], of ε "take l w",
         unfolded  append.right_neutral]
    have "w <lex drop l w  take l w".
    thus "w <lex rotate l w"
      by (simp add: drop l w  take l w = rotate l w)
  qed
  thus "Lyndon w"
    by (simp add: w  ε local.LyndonI)
qed

corollary Lyndon_suf_char: "w  ε  Lyndon w  (s. s ≤s w  s  ε  s  w  w <lex s)"
  using Lyndon_suf_less suf_less_Lyndon by blast

lemma Lyndon_suf_le: "Lyndon w  s ≤s w  s  ε  w ≤lex s"
  using Lyndon_suf_less rlex.not_less rlex.order.asym by metis

section "Unbordered prefix of a Lyndon word is Lyndon"

lemma unbordered_pref_Lyndon: "Lyndon (uv)  u  ε  ¬ bordered u  Lyndon u"
  unfolding Lyndon_suf_char
proof(standard+)
  fix s
  assume "Lyndon (u  v)" and  "u  ε" and "¬ bordered u" and "s ≤s u" and "s  ε" and "s  u"
    hence "u  v <lex s  v"
      using  Lyndon_suf_less[OF Lyndon (u  v), of "s  v"] by auto
    have "¬ s ≤p u"
      using ¬ bordered u s ≤s u s  ε s  u by auto
    moreover have "¬ u ≤p s"
      using suf_pref_eq[OF s ≤s u] s  u by blast
    ultimately show "u <lex s"
      using lexord_cancel_right_linorder[OF u  v <lex s  v] by blast
qed

section "Concatenation of Lyndon words"

theorem Lyndon_concat: assumes  "Lyndon u" and "Lyndon v" and "u <lex v"
  shows "Lyndon (uv)"
proof-
  have "uv <lex v"
  proof(cases "u ≤p v")
    assume "u ≤p v"
    obtain z where "uz = v" and "z ≤s v" and "z  ε"
      using  lq_pref[OF u ≤p v] rlex.less_imp_neq[OF u <lex v] self_append_conv by force
    from Lyndon_suf_less[OF Lyndon v this(2-3), THEN lexord_append_leftI_linorder, of u]
      LyndonD_nemp[OF Lyndon u] this(1)
    show ?thesis
      by blast
  next
    assume "¬ u ≤p v"
    then show ?thesis
      using local.lexordp_linear[of v "uv"]
        local.lexordp_mid_pref[OF u <lex v,of v]
        prefixI[of v u v]
      by argo
  qed

  { fix z
    assume "z ≤s (uv)" "z  ε" "z  uv"
    have "uv <lex z"
    proof(cases "z ≤s v  z  ε")
      assume "z ≤s v  z  ε"
      from Lyndon_suf_less[OF Lyndon v conjunct1[OF this] conjunct2[OF this]]
      have "z  v  v <lex z"
        by blast
      thus "uv <lex z"
        using u  v <lex v rlex.less_trans
        by fast
    next
      assume "¬ (z ≤s v  z  ε)"
      then obtain z' where "z' ≤s u" "z'  ε" "z'  u" "z'v = z"
        using z ≤s u  v z  ε z  u  v suffix_append[of z u v]
        by force
      from Lyndon_suf_less[OF Lyndon u this(1-3)]
      have "u <lex z'".
      thus "uv <lex z"
        using  z' ≤s u lexord_app_right_linorder[of u z' v v] suffix_length_le[of z' u]
        unfolding  z'  v = z
        by blast
    qed
  }
  thus ?thesis
    using suf_nemp[OF LyndonD_nemp[OF Lyndon v], of u, THEN suf_less_Lyndon]
    by blast
qed

section "Longest Lyndon suffix"

fun longest_Lyndon_suffix:: "'a list  'a list" ("LynSuf") where
  "longest_Lyndon_suffix ε = ε" |
  "longest_Lyndon_suffix (a#w) = (if Lyndon (a#w) then a#w else longest_Lyndon_suffix w)"

lemma longest_Lyndon_suf_ext: "¬ Lyndon (a # w)  LynSuf w = LynSuf (a # w)"
  using longest_Lyndon_suffix.simps(2) by presburger

lemma longest_Lyndon_suf_suf: "w  ε  LynSuf w ≤s w"
proof(induction w rule: longest_Lyndon_suffix.induct)
  case 1
  then show ?case by simp
next
  case (2 a w)
  show ?case
  proof(cases "Lyndon (a#w)")
    case True
    then show ?thesis by auto
  next
    case False
    from "2.IH"[OF this, unfolded longest_Lyndon_suf_ext[OF this], THEN suffix_ConsI, of a]
      Lyndon_sing False
    show ?thesis by blast
  qed
qed

lemma longest_Lyndon_suf_max:
  "v ≤s w   Lyndon v  v ≤s (LynSuf w)"
proof(induction w arbitrary: v rule: longest_Lyndon_suffix.induct)
  case 1
  then show ?case
    using longest_Lyndon_suffix.simps(1) by presburger
next
  case (2 a w)
  show ?case
  proof(cases "Lyndon (a#w)")
    case True
    then show ?thesis
      using "2.prems"(1) longest_Lyndon_suffix.simps(2) by presburger
  next
    case False
    have "v  a # w"
      using "2.prems"(2) False by blast
    from "2.IH"[OF False _ "2.prems"(2), unfolded longest_Lyndon_suf_ext[OF False]]
       "2.prems"(1)[unfolded suffix_Cons] this
    show ?thesis by fast
  qed
qed

lemma longest_Lyndon_suf_Lyndon_id: assumes "Lyndon w"
  shows "LynSuf w = w"
proof(cases "w = ε", simp)
  case False
  from longest_Lyndon_suf_suf[OF this]
    suffix_order.order_refl[THEN longest_Lyndon_suf_max[OF _ assms]]
    suffix_order.antisym
  show ?thesis by blast
qed

lemma longest_Lyndon_suf_longest: "w  ε  v' ≤s w  Lyndon v'  |v'|  |(LynSuf w)|"
  using longest_Lyndon_suf_max suffix_length_le by blast

lemma longest_Lyndon_suf_sing: "LynSuf [a] = [a]"
  using Lyndon_sing longest_Lyndon_suf_Lyndon_id by blast

lemma longest_Lyndon_suf_Lyndon: "w  ε  Lyndon (LynSuf w)"
proof(induction w rule: longest_Lyndon_suffix.induct, blast)
  case (2 a w)
  show ?case
  proof(cases "Lyndon (a#w)")
    case True
    then show ?thesis
      using longest_Lyndon_suf_Lyndon_id by presburger
  next
    case False
    from "2.IH"[OF this, unfolded longest_Lyndon_suf_ext[OF this]] Lyndon_sing
    show ?thesis by fastforce
  qed
qed

lemma longest_Lyndon_suf_nemp: "w  ε  LynSuf w  ε"
  using longest_Lyndon_suf_Lyndon[THEN LyndonD_nemp].

lemma longest_Lyndon_sufI:
  assumes "q ≤s w" and "Lyndon q" and all_s: "( s. (s ≤s w  Lyndon s)  s ≤s q)"
  shows "LynSuf w = q"
proof(cases "w = ε")
  case True
  then show ?thesis
    using assms(1) longest_Lyndon_suffix.simps(1) suffix_bot.extremum_uniqueI by blast
next
  case False
  from all_s longest_Lyndon_suf_Lyndon[OF this] longest_Lyndon_suf_max[OF assms(1) assms(2)]
    longest_Lyndon_suf_suf[OF this] suffix_order.eq_iff
  show ?thesis by blast
qed

corollary longest_Lyndon_sufI':
  assumes "q ≤s w" and "Lyndon q" and all_s: " s. (s ≤s w  Lyndon s)  |s|  |q|"
  shows "LynSuf w = q"
  using longest_Lyndon_sufI[OF q ≤s w Lyndon q] suf_ruler_le all_s q ≤s w by blast

text‹The next lemma is fabricated to suit the upcoming definition of longest Lyndon factorization.›

lemma longest_Lyndon_suf_shorter: assumes "w  ε"
  shows "|w<¯(LynSuf w)| < |w|"
  using nemp_len[OF longest_Lyndon_suf_nemp[OF w  ε]] arg_cong[OF rq_suf[OF longest_Lyndon_suf_suf[OF w  ε]], of length]
  unfolding lenmorph by linarith

section "Lyndon factorizations"

function Lyndon_fac::"'a list   'a list list" ("LynFac")
  where "Lyndon_fac w  = (if w  ε then ((Lyndon_fac (w <¯(LynSuf w) ))  [LynSuf w]) else ε)"
  using longest_Lyndon_suffix.cases by blast+
termination
proof(relation "measure length", simp)
  show "w. w  ε  (w<¯LynSuf w, w)  measure length"
    unfolding measure_def inv_image_def using longest_Lyndon_suf_shorter by blast
qed

text‹The factorization @{term "Lyndon_fac w"} obtained by taking always the longest Lyndon suffix is well defined,
and called ``Lyndon factorization (of $w$)''.›


lemma Lyndon_fac_simp: "w  ε  Lyndon_fac w =  Lyndon_fac (w<¯LynSuf w)  (LynSuf w # ε)"
  using Lyndon_fac.simps[of w] by meson

lemma Lyndon_fac_emp: "Lyndon_fac ε = ε"
  by simp

text‹Note that the Lyndon factorization of a Lyndon word is trivial.›

lemma Lyndon_fac_longest_Lyndon_id: "Lyndon w  Lyndon_fac w = [w]"
  by (simp add: longest_Lyndon_suf_Lyndon_id)

text‹Lyndon factorization is composed of Lyndon words ...›

lemma Lyndon_fac_set: "z  set (Lyndon_fac w)  Lyndon z"
proof(induction w rule:  Lyndon_fac.induct)
  case (1 w)
  then show "Lyndon z"
  proof (cases "w = ε")
    assume "w  ε"
    have "Lyndon_fac w  = (Lyndon_fac (w <¯(LynSuf w) ))  [LynSuf w]"
      using Lyndon_fac_simp[OF w  ε].
    from set_ConsD[OF "1.prems"(1)[unfolded rotate1.simps(2)[of "LynSuf w" "Lyndon_fac (w <¯(LynSuf w) )", folded this, symmetric], unfolded set_rotate1]]
    have "z = LynSuf w  z  set (Lyndon_fac (w <¯(LynSuf w) ))".
    thus "Lyndon z"
      using "1.IH"[OF w  ε] longest_Lyndon_suf_Lyndon[OF w  ε]
      by blast
  next
    assume "w = ε"
    thus "Lyndon z"
      using "1.prems"
      unfolding Lyndon_fac_emp[folded w = ε] list.set(1) empty_iff
      by blast
  qed
qed

text‹...and it indeed is a factorization of the argument.›

lemma Lyndon_fac_longest_dec: "concat (Lyndon_fac w) = w"
proof(induction w rule: Lyndon_fac.induct)
  case (1 w)
  thus "concat (LynFac w) = w"
  proof (cases "w = ε", simp)
  assume "w  ε"
    have eq: "concat (Lyndon_fac w) = concat ( (Lyndon_fac (w <¯(LynSuf w) )) )  concat ([LynSuf w])"
      unfolding Lyndon_fac_simp[OF w  ε] concat_morph..
    from this[unfolded "1.IH"[OF w  ε] concat_sing' rq_suf[OF longest_Lyndon_suf_suf[OF w  ε]]]
    show ?case.
  qed
qed

text‹The following lemma makes explicit the inductive character of the definition of @{term Lyndon_fac}.›

lemma Lyndon_fac_longest_pref: "us ≤p Lyndon_fac w  Lyndon_fac (concat us) = us"
proof(induction w arbitrary: us rule:  Lyndon_fac.induct)
  case (1 w)
  thus "LynFac (concat us) = us"
  proof (cases "w = ε", simp)
    assume "w  ε"
    have step: "Lyndon_fac w = (Lyndon_fac (w <¯(LynSuf w)))  [LynSuf w]"
      using Lyndon_fac_simp[OF w  ε].

    consider (neq) "us  Lyndon_fac w" | (eq) "us = Lyndon_fac w"
      using "1.prems" le_neq_implies_less by blast
    then show "LynFac (concat us) = us"
    proof(cases)
      case neq
      hence "us ≤p Lyndon_fac (w<¯LynSuf w)"
        using "1.prems" last_no_split[of us "Lyndon_fac (w<¯LynSuf w)" "LynSuf w"]
        unfolding step[symmetric] by blast
      thus "LynFac (concat us) = us"
        using "1.IH" w  ε by blast
    next
      case eq
      show "LynFac (concat us) = us"
        using Lyndon_fac_longest_dec[of w, folded eq] eq by simp
    qed
  qed
qed

text‹We give name to an important predicate: monotone (nonincreasing) list of Lyndon words.›

definition Lyndon_mono :: "'a list list   bool" where
  "Lyndon_mono ws  ( u  set ws. Lyndon u)  (rlex.sorted (rev ws))"

lemma Lyndon_mono_set: "Lyndon_mono ws  u  set ws  Lyndon u"
  unfolding Lyndon_mono_def by blast

lemma Lyndon_mono_sorted: "Lyndon_mono ws  rlex.sorted (rev ws)"
  unfolding Lyndon_mono_def by blast

lemma Lyndon_mono_nth: "Lyndon_mono ws  i  j  j < |ws|  ws!j ≤lex ws!i"
  unfolding Lyndon_mono_def using  rlex.sorted_rev_nth_mono by blast

lemma Lyndon_mono_empty[simp]: "Lyndon_mono ε"
  unfolding Lyndon_mono_def by auto

lemma Lyndon_mono_sing: "Lyndon u  Lyndon_mono [u]"
  unfolding Lyndon_mono_def by auto

lemma Lyndon_mono_fac_Lyndon_mono:
  assumes "ps ≤f ws" and "Lyndon_mono ws" shows "Lyndon_mono ps"
unfolding Lyndon_mono_def
proof
  show "x  (set ps). Lyndon x"
    using Lyndon_mono ws[unfolded Lyndon_mono_def] set_mono_sublist[OF ps ≤f ws] by blast
  show "linorder.sorted (≤lex) (rev ps)"
    using rlex.sorted_append Lyndon_mono ws[unfolded Lyndon_mono_def] ps ≤f ws[unfolded sublist_def] by fastforce
qed

text‹Lyndon factorization is monotone!
Altogether, we have shown that the Lyndon factorization is a monotone factorization
into Lyndon words.›

theorem fac_Lyndon_mono: "Lyndon_mono (Lyndon_fac w)"
proof (induct "Lyndon_fac w" arbitrary: w rule: rev_induct, simp)
  case (snoc x xs)
  have "Lyndon x"
    using Lyndon_fac_set[of x, unfolded in_set_conv_decomp, of w, folded snoc.hyps(2)] by fast
  have "concat (xs  [x]) = w"
    using Lyndon_fac_longest_dec[of w, folded snoc.hyps(2)] by auto
  then show "Lyndon_mono (LynFac w)"
  proof (cases "xs = ε")
    assume "xs = ε"
    show "Lyndon_mono (LynFac w)"
      unfolding Lyndon_mono_def xs  [x] = LynFac w[symmetric] xs = ε append.left_neutral
                rlex.sorted1[of x]
      using Lyndon x by force
  next
    assume "xs  ε"
    have "concat (xs  [x])  ε" and "w  ε"
      using Lyndon_fac_longest_dec snoc.hyps(2) by auto
    have "x = LynSuf w" and "xs = LynFac (w<¯LynSuf w )"
      using Lyndon_fac.simps[of w, folded snoc.hyps(2)] w  ε
        Lyndon_fac_longest_dec append1_eq_conv[of xs x "LynFac (w<¯LynSuf w )" "LynSuf w"] by presburger+
    from Lyndon_mono_sorted[OF snoc.hyps(1)[OF xs = LynFac (w<¯LynSuf w )], folded this]
    have "dual_rlex.sorted  xs"
      unfolding sorted_dual_rev_iff.
    have "Lyndon (last xs)"
      using Lyndon_fac_set[of "last xs" "w<¯LynSuf w", folded xs = LynFac (w<¯LynSuf w), OF last_in_set[OF xs  ε]].
    have "x ≤lex last xs"
    proof(rule ccontr)
      assume "¬ x ≤lex last xs" hence "last xs <lex x" by auto
      from Lyndon_concat[OF  Lyndon (last xs) Lyndon x this]
      have "Lyndon ((last xs)  x)".
      have "(last xs)  x ≤s concat (xs  [x])"
        using xs  ε concat_last_suf by auto
      from longest_Lyndon_suf_longest[OF concat (xs  [x])  ε  this Lyndon ((last xs)  x),
          unfolded concat (xs  [x]) = w, folded x = LynSuf w]
      show False
        using Lyndon (last xs) by simp
    qed
    have "dual_rlex.sorted (butlast xs  [last xs])"
      by (simp add: linorder.sorted (λx y. y ≤lex x) xs xs  ε)
    from this x ≤lex last xs
    have "dual_rlex.sorted (butlast xs  [last xs,x])"
      using dual_rlex.sorted_append by auto
    from this[unfolded hd_word[of "last xs" "[x]"] lassoc append_butlast_last_id[OF xs  ε]]
    have "rlex.sorted (rev (LynFac w))"
      unfolding  xs  [x] = LynFac w[symmetric] sorted_dual_rev_iff[symmetric].
    thus "Lyndon_mono (LynFac w)"
      unfolding Lyndon_mono_def using Lyndon_fac_set by blast
  qed
qed

text‹Now we want to show the converse: any monotone factorization into Lyndon words is the Lyndon factorization›

text‹The last element in the Lyndon factorization is the smallest suffix.›

lemma Lyndon_mono_last_smallest: "Lyndon_mono ws  s ≤s (concat ws)  s  ε  last ws ≤lex s"
proof(induct ws arbitrary: s rule: rev_induct, fastforce)
  case (snoc a ws)
  have "ws[a]  ε"
    by blast
  have "last (ws[a]) = a"
    by simp
  from last_in_set[OF ws[a]  ε, unfolded this] Lyndon_mono (ws  [a])[unfolded Lyndon_mono_def]
  have "Lyndon a"
    by blast
  show ?case
  proof(cases "s ≤s a  s  ε")
    case True
    from Lyndon_suf_le[OF Lyndon a] this
    show ?thesis
      by simp
  next
    case False
    hence "ws  ε"
      using snoc.prems(2-3) by force
    obtain s' where "s = s'a"
      using False snoc.prems(2-3)[unfolded concat_append[of ws "[a]", unfolded concat_sing']] suffix_append[of s "concat ws" a]
      by blast
    hence "s' ≤s concat ws"
      using False snoc.prems(2) by fastforce
    have "Lyndon_mono ws"
      using Lyndon_mono (ws[a]) Lyndon_mono_fac_Lyndon_mono
      by blast
    from snoc.hyps[OF this s' ≤s concat ws]
    have "last ws ≤lex s'"
      using False s = s'  a same_suffix_nil snoc.prems(3) by blast
    hence "last ws ≤lex s'a"
      using local.lexordp_eq_trans ord.lexordp_eq_pref by blast
    have "a ≤lex last ws"
      using Lyndon_mono (ws[a])
      unfolding Lyndon_mono_def
      by (simp add: ws  ε last_ConsL)
    from dual_rlex.order_trans[OF last ws ≤lex s'  a this, folded s = s'  a]
    show ?thesis
      unfolding last (ws[a]) = a
      by blast
  qed
qed


text‹A monotone list, if seen as a factorization,
must end with the longest suffix›

lemma Lyndon_mono_last_longest: assumes "ws  ε" and "Lyndon_mono ws"
  shows "LynSuf (concat ws) = last ws"
proof-
  have "Lyndon (last ws)"
    using Lyndon_mono_set assms(1) assms(2) last_in_set by blast
  hence "last ws  ε"
    using LyndonD_nemp by blast
  hence "last ws ≤s LynSuf (concat ws)"
    using longest_Lyndon_suf_max[OF  concat_last_suf[OF assms(1)] Lyndon (last ws)]
    by blast

  have "concat ws  ε"
    using Lyndon.simps assms(2)[unfolded Lyndon_mono_def] set_nemp_concat_nemp[OF assms(1)]
    by blast
  from longest_Lyndon_suf_nemp[OF this] longest_Lyndon_suf_suf[OF this]
  have "LynSuf (concat ws) ≤s concat ws"
    by simp

  show ?thesis
    using Lyndon_mono_last_smallest[OF Lyndon_mono ws LynSuf (concat ws) ≤s concat ws]
      Lyndon_suf_le[OF longest_Lyndon_suf_Lyndon[OF concat ws  ε], OF last ws ≤s LynSuf (concat ws)]
      eq_iff LynSuf (concat ws)  ε last ws  ε
    by simp
qed

text‹Therefore, by construction,
any monotone list is the Lyndon factorization of its concatenation›

lemma Lyndon_mono_fac:
  "Lyndon_mono ws  ws = Lyndon_fac (concat ws)"
proof (induct ws rule: rev_induct, simp)
  case (snoc x xs)
  have "Lyndon_mono xs"
    using Lyndon_mono (xs  [x])
    unfolding Lyndon_mono_def
    by simp
  from snoc.hyps[OF this]
  have "xs = LynFac (concat xs)".
  have "x = LynSuf (concat (xs  [x]))"
    using Lyndon_mono_last_longest[OF _ Lyndon_mono (xs  [x]), unfolded last_snoc] by simp
  have "concat (xs  [x])<¯x = concat xs"
    by simp
  have "concat (xs  [x])  ε"
    using Lyndon_mono_set snoc.prems by auto
  from this
  show ?case
    using Lyndon_fac.simps[of "concat (xs  [x])", folded x = LynSuf (concat (xs  [x])),
        unfolded concat (xs  [x])<¯x = concat xs, folded xs = LynFac (concat xs)]
    by presburger
qed


text‹This implies that the Lyndon factorization can be characterized
in two equivalent ways: as the (unique) monotone factorization (into Lyndon words) or as
the "suffix greedy" factorization (into Lyndon words).
›

corollary Lyndon_mono_fac_iff: "Lyndon_mono ws  ws = LynFac (concat ws)"
  using Lyndon_mono_fac fac_Lyndon_mono[of "concat ws"] by fastforce

corollary Lyndon_mono_unique: assumes "Lyndon_mono ws" and "Lyndon_mono zs" and "concat ws = concat zs"
  shows "ws = zs"
  using Lyndon_mono_fac[OF Lyndon_mono ws] Lyndon_mono_fac[OF Lyndon_mono zs]
  unfolding concat ws = concat zs  by simp


subsection "Standard factorization"

lemma Lyndon_std: assumes "Lyndon w" "1 < |w|"
  obtains l m where "w = lm" and "Lyndon l" and "Lyndon m" and "l <lex m"
proof-
  have "w  ε" "tl w  ε"
    using 1 < |w| long_list_tl by auto
  define m where  "m = LynSuf (tl w)"
  hence "Lyndon m"
    using tl w  ε local.longest_Lyndon_suf_Lyndon by blast
  have "m ≤s w"
    unfolding m_def
    using suffix_order.trans[OF longest_Lyndon_suf_suf[OF tl w  ε] suffix_tl[of w]].
  moreover have "m  w"
    unfolding m_def using  hd_tl[OF w  ε] longest_Lyndon_suf_suf[OF tl w  ε] same_suffix_nil
    not_Cons_self2 by metis
  ultimately obtain l where "w = lm" and "l  ε"
   by (auto simp add: suffix_def)

  have "Lyndon l"
  proof (rule unbordered_pref_Lyndon[OF Lyndon w[unfolded w = lm] l  ε], rule)
    assume "bordered l"
    from unbordered_border[OF this, unfolded border_def]
    obtain s where "s  ε" and "s  l" and "s ≤p l" and "s ≤s l" and "¬ bordered s"
      by blast
    have "Lyndon s"
      using unbordered_pref_Lyndon[OF _ s  ε ¬ bordered s, of "s¯>lm", unfolded lassoc lq_pref[OF s ≤p l]]
      Lyndon w[unfolded w = l  m] by blast
    have "s <lex m"
      using Lyndon_pref_suf_less[OF Lyndon w _  m ≤s w LyndonD_nemp[OF Lyndon m]
         m  w, of s] Lyndon.elims(2)[OF Lyndon m]
         s ≤p l prefix_append[of s l m, folded w = l  m] by argo

    from Lyndon_concat[OF Lyndon s Lyndon m this]
    have "Lyndon (sm)".
    moreover have "sm ≤s tl w"
      unfolding w = l  m using s  l s ≤s l list.collapse[OF w  ε, unfolded w = l  m]
      by (auto simp add: suffix_def)
    ultimately show False
      using m_def s  ε longest_Lyndon_suf_max same_suffix_nil by blast
  qed

  have "l <lex m"
    using Lyndon_pref_suf_less[OF Lyndon w prefI[OF w = l  m[symmetric]] m ≤s w
        longest_Lyndon_suf_nemp[OF tl w  ε, folded  m_def] m  w].
  from that[OF w = l  m Lyndon l  Lyndon m this]
  show thesis.
qed

corollary Lyndon_std_iff:
  "Lyndon w  (|w| = 1  ( l m. w = lm  Lyndon l  Lyndon m  l <lex m))"
  (is "?L  ?R")
proof
  assume ?L
  show ?R
    using Lyndon_std[OF Lyndon w]
      nemp_le_len[OF LyndonD_nemp[OF Lyndon w], unfolded le_eq_less_or_eq]
    by metis
next
  assume ?R
  thus ?L
  proof(rule disjE, fastforce)
    show l m. w = l  m  Lyndon l  Lyndon m  l <lex m  Lyndon w
      using Lyndon_concat by blast
  qed
qed

end  ― ‹end context linorder›

end