Theory Lyndon

(*  Title:      Combinatorics_Words_Lyndon.Lyndon
    Author:     Štěpán Holub, Charles University
    Author:     Štěpán Starosta, CTU in Prague

Part of Combinatorics on Words Formalized. See https://gitlab.com/formalcow/combinatorics-on-words-formalized/
*)
theory Lyndon
  imports Combinatorics_Words.CoWBasic
begin

chapter "Lyndon words"

text‹A Lyndon word is a non-empty word that is lexicographically
strictly smaller than any other word in its conjugacy class, i.e., than any its rotations.
They are named after R. Lyndon who introduced them in @{cite Lyndon54} as ``standard'' sequences.

We present elementary results on Lyndon words, mostly covered by results in @{cite ‹Chapter 5› Lo83} and @{cite Duval80 and Duval83}.

This definition assumes a linear order on letters given by the context.
›


section "Definition and elementary properties"

subsection "Underlying order"

lemma (in linorder) lexordp_mid_pref: "ord_class.lexordp u v ⟹ ord_class.lexordp v (u⋅s) ⟹
   u ≤p v"
  by (induct rule: lexordp_induct, simp_all)

lemma (in linorder) lexordp_ext: "ord_class.lexordp u v ⟹  ¬ u ≤p v ⟹
  ord_class.lexordp (u⋅w) (v⋅z)"
  by (induct rule: lexordp_induct, simp_all)

context linorder
begin

abbreviation Lyndon_less :: "'a list ⇒ 'a list ⇒ bool" (infixl ‹<lex› 50)
  where "Lyndon_less xs ys ≡ ord_class.lexordp xs ys"

abbreviation Lyndon_le :: "'a list ⇒ 'a list ⇒ bool" (infixl ‹≤lex› 50)
  where "Lyndon_le xs ys ≡ ord_class.lexordp_eq xs ys"

interpretation rlex: linorder "(≤lex)" "(<lex)"
  using lexordp_linorder.

interpretation dual_rlex: linorder "λ x y. y ≤lex x" "λ x y. y <lex x"
  using rlex.dual_linorder.

lemma sorted_dual_rev_iff: "dual_rlex.sorted ws ⟷ rlex.sorted (rev ws)"
  unfolding rlex.sorted_rev_iff_nth_mono dual_rlex.sorted_iff_nth_mono by blast

text ‹Several useful lemmas that are formulated for relations, interpreted for the default linear order.›

lemmas lexord_suf_linorder = lexord_sufE[of _ _ _ _ "{(x, y). x < y}", folded lexordp_conv_lexord]
  and lexord_append_leftI_linorder = lexord_append_leftI[of  _ _  "{(x, y). x < y}" _, folded lexordp_conv_lexord]
  and lexord_app_right_linorder = lexord_sufI[of  _ _  "{(x, y). x < y}" _, folded lexordp_conv_lexord]
  and lexord_take_index_conv_linorder = lexord_take_index_conv[of  _ _  "{(x, y). x < y}", folded lexordp_conv_lexord]
  and mismatch_lexord_linorder = mismatch_lexord[of  _ _  "{(x, y). x < y}", folded lexordp_conv_lexord]
  and lexord_cancel_right_linorder = lexord_cancel_right[of _ _ _ _ "{(a,b). a < b}", folded lexordp_conv_lexord]

subsection "Lyndon word definition"

fun Lyndon :: "'a list ⇒ bool"
  where "Lyndon w = (w ≠ ε ∧ (∀n. 0 < n ∧ n < ❙|w❙| ⟶ w <lex rotate n w))"

lemma LyndonD: "Lyndon w ⟹ 0 < n ⟹ n < ❙|w❙| ⟹ w <lex rotate n w"
  unfolding Lyndon.simps by blast

lemma LyndonD_nemp: "Lyndon w ⟹ w ≠ ε"
  unfolding Lyndon.simps by blast

lemma LyndonI: "w ≠ ε ⟹ ∀ n. 0 < n ∧ n < ❙|w❙| ⟶ w <lex rotate n w ⟹  Lyndon w"
  unfolding  Lyndon.simps by blast

lemma Lyndon_sing: "Lyndon [a]"
  unfolding Lyndon.simps by auto

lemma Lyndon_prim: assumes "Lyndon w"
  shows "primitive w"
proof-
  have "0 < n ⟹ n < ❙|w❙| ⟹ rotate n w ≠ w" for n
    using LyndonD[OF ‹Lyndon w›, of n] rlex.less_irrefl[of w] by argo
  from no_rotate_prim[OF LyndonD_nemp[OF ‹Lyndon w›]] this
  show ?thesis by blast
qed

lemma Lyndon_conj_greater: "Lyndon (u⋅v) ⟹ u ≠ ε ⟹ v ≠ ε ⟹ u⋅v <lex v⋅u"
  using LyndonD[of "u⋅v" "❙|u❙|", unfolded rotate_append[of u v]]
  by force

subsection "Code equations for Lyndon words"

primrec Lyndon_rec :: "'a list ⇒ nat ⇒ bool" where
    "Lyndon_rec w 0 = True" |
    "Lyndon_rec w (Suc n) = (if w <lex rotate (Suc n) w then Lyndon_rec w n else False)"

lemma Lyndon_rec_all: assumes "Lyndon_rec (a # w) (❙|w❙|)"
  shows "n < ❙|a#w❙| ⟹ 0 < n ⟹ Lyndon_rec (a#w) n"
proof(induction n rule: strict_inc_induct)
  case (base i)
  then show ?case
    using assms by auto
next
  case (step i)
  then show ?case
    by (meson Lyndon_rec.simps(2) zero_less_Suc)
qed

lemma Lyndon_Lyndon_rec: assumes "Lyndon w"
  shows "0 < n ⟹ n < ❙|w❙| ⟹ Lyndon_rec w n"
proof(induction n, simp)
  case (Suc n)
  have "w <lex rotate (Suc n) w"
    using LyndonD Suc.prems(2) assms by blast
  then show ?case
    using Suc.IH[OF _ Suc_lessD[OF ‹Suc n < ❙|w❙|›], folded neq0_conv] Lyndon_rec.simps(1)[of w]
    unfolding Lyndon_rec.simps(2)
    by metis
qed

lemma Lyndon_code [code]:
    "Lyndon Nil = False"
    "Lyndon (a # w) = Lyndon_rec (a # w) (❙|w❙|)"
proof-
  show "Lyndon Nil = False" by simp
  have "a # w ≠ ε"
    by simp
  have ax: "0 < n ⟹ Lyndon_rec (a#w) n ⟹ (a#w) <lex rotate n (a#w)" for n
    using Lyndon_rec.simps(2)[of "a#w"] gr0_implies_Suc[of n] by metis
  have bx: "Lyndon_rec (a # w) (❙|w❙|) = (∀n. n < ❙|a#w❙| ∧ 0 < n ⟶ Lyndon_rec (a#w) n)"
  proof(cases "w = ε", simp)
    assume "w ≠ ε"
    from this[folded length_greater_0_conv]
    show ?thesis
      using Lyndon_rec_all[of a w] length_Cons[of a w] lessI[of "❙|w❙|"]
      by fastforce
  qed
  show "Lyndon (a # w) = Lyndon_rec (a # w) ❙|w❙|"
    unfolding bx Lyndon.simps
    using ax  LyndonI[OF ‹a # w ≠ ε›]Lyndon_Lyndon_rec by blast
qed

subsection "Properties of Lyndon words"

subsubsection "Lyndon words are unbordered"

theorem Lyndon_unbordered: assumes "Lyndon w" shows "¬ bordered w"
proof
  assume "bordered w"
  from bordered_dec[OF this]
  obtain u v where "u⋅v⋅u = w" and "u ≠ ε".
  hence "v ⋅ u ≠ ε" and "u ⋅ v ≠ ε" by blast+
  note lyn = ‹Lyndon w›[folded  ‹u⋅v⋅u = w›]
  have "u⋅v⋅u <lex v⋅u⋅u"
    using Lyndon_conj_greater[of u "v⋅u", OF lyn ‹u ≠ ε› ‹v ⋅ u ≠ ε›, unfolded rassoc].
  from this[unfolded lassoc]
  have "u ⋅ v ≠ v ⋅ u"
    by force
  from lexord_suf_linorder[OF _ this, of u u]
  have "u⋅v <lex v⋅u"
    using  ‹u⋅v⋅u <lex v⋅u⋅u›  by simp
  from lexord_append_leftI_linorder[of  "u⋅v" "v⋅u", unfolded lassoc, OF this, unfolded rassoc]
  have "u⋅u⋅v <lex u⋅v⋅u".
  from this Lyndon_conj_greater[of "u⋅v" u, unfolded rassoc, OF lyn ‹u ⋅ v ≠ ε›  ‹u ≠ ε›]
  show False
    by simp
qed

subsubsection "Each conjugacy class contains a Lyndon word"

lemma conjug_Lyndon_ex: assumes "primitive w"
  obtains n where "Lyndon (rotate n w)"
proof-
  have "w ≠ ε"
    using  prim_nemp[OF ‹primitive w›].

  let ?ConClass = "{rotate n w | n. 0 ≤ n ∧ n < ❙|w❙|}"

  have "?ConClass ≠ {}"
    using ‹w ≠ ε› by blast
  have "finite ?ConClass"
    by force
  have "w ∈ ?ConClass"
    by (rule CollectI)
    (use  le0[of 0] nemp_pos_len[OF ‹w ≠ ε›] id_apply[of w, folded rotate0] in metis)
  have all_rot: "rotate m w ∈ ?ConClass" for m
    using rotate_conv_mod[of _ w] mod_less_divisor[of "❙|w❙|"] ‹w ≠ ε›
    by blast

  obtain w' n where "w' ∈ ?ConClass" and all_b: "∀ b ∈ ?ConClass. b ≤lex w' ⟶ w' = b" and w': "w' = rotate n w"
    using rlex.finite_has_minimal[OF ‹finite ?ConClass› ‹?ConClass ≠ {}›] by auto

  have "rotate n w <lex rotate na (rotate n w)" if  "0 < na" and "na < ❙|w❙|" for na
  proof-
    from prim_no_rotate[OF assms[unfolded prim_rotate_conv[of w n]], of na] ‹na < ❙|w❙|› ‹0 < na›
    have "rotate na (rotate n w) ≠ rotate n w" by force
    hence "¬ rotate na (rotate n w) ≤lex rotate n w"
      using all_b[rule_format, OF all_rot[of "na + n", folded rotate_rotate[of na n w]]] unfolding w'
      by auto
    from rlex.not_le_imp_less[OF this]
    show "rotate n w <lex rotate na (rotate n w)".
  qed
  hence "Lyndon (rotate n w)"
    using ‹w ≠ ε› by auto
  from that[OF this] show thesis.
qed

lemma conjug_Lyndon_ex': assumes "primitive w"
  obtains v where "w ∼ v" and "Lyndon v"
  unfolding conjug_rotate_iff
  using conjug_Lyndon_ex[OF ‹primitive w›]
  by metis

section "Characterization by suffixes"

lemma Lyndon_suf_less: assumes "Lyndon w" "s ≤ns w" "s ≠ w"
  shows "w <lex s"
proof-
  define p where "p = take ❙|s❙| w"
  have "❙|s❙| ≤ ❙|w❙|"
    using nsD[OF ‹s ≤ns w›]
    by (simp add: suffix_length_le)
  have "p ≤p w" and "❙|p❙|  = ❙|s❙|"
    unfolding p_def
    using take_is_prefix ‹❙|s❙| ≤ ❙|w❙|› take_len by blast+
  hence "p ≠ s"
    using Lyndon_unbordered[OF ‹Lyndon w›] ‹s ≤ns w› ‹s ≠ w› assms
    by auto
  define p' s' where "p' = drop ❙|s❙| w" and "s' = take ❙|p'❙| w"
  have "p ⋅ p' = w"
    unfolding p'_def p_def s'_def by simp
  have "s' ⋅ s = w"
    unfolding p'_def p_def s'_def
     using suf_len[OF nsD[OF ‹s ≤ns w›]] nsD[OF ‹s ≤ns w›]
     length_drop suffix_take by metis
  have "❙|p'❙| = ❙|s'❙|"
    using s'_def ‹p⋅p' = w› by auto
  have "w <lex s⋅s'"
    using Lyndon_conj_greater[of s' s, unfolded ‹s' ⋅ s = w›, OF ‹Lyndon w›] ‹p ≠ s›
    unfolding ‹s' ⋅ s = w› p_def using ‹s' ⋅ s = w› assms(3) by fastforce
  from lexord_suf_linorder[OF _ ‹p ≠ s› ‹❙|p❙| = ❙|s❙|› ‹❙|p'❙| = ❙|s'❙|›, OF this[folded ‹p ⋅ p' = w›]]
  have "p <lex s".
  from lexord_app_right_linorder[OF this, of p' ε, unfolded ‹p ⋅ p' = w›]  ‹❙|p❙| = ❙|s❙|›
  show "w <lex s"
    by simp
qed

lemma Lyndon_pref_suf_less:  assumes "Lyndon w" "p ≤p w" "s ≤ns w" "s ≠ w"
  shows "p <lex s"
proof(cases "p = w", simp add: Lyndon_suf_less[OF assms(1) assms(3) assms(4)])
  assume "p ≠ w"
  show "p <lex s"
  proof(rule rlex.less_trans)
    show "p <lex w"
      using ‹p ≠ w› assms(2) lexordp_append_rightI
      by (fastforce simp add: prefix_def)
    show "w <lex s"
      using Lyndon_suf_less assms(1) assms(3) assms(4) by blast
  qed
qed

lemma suf_less_Lyndon: assumes "w ≠ ε" and "∀s. (s ≤ns w ⟶ s ≠ w ⟶ w <lex s)"
  shows "Lyndon w"
proof (cases "primitive w")
  assume "¬ primitive w"
  obtain q k where "q ≠ ε" "1 < k" "q⇧^@k=w" "w≠q" ― ‹the exact match of @{thm non_prim} fastens the proof considerably›
    using non_prim[OF ‹¬ primitive w› ‹w ≠ ε›] by blast
  hence "q ≤ns w"
    unfolding nonempty_suffix_def pow_eq_if_list[of q k] pow_comm[symmetric]
    using sufI[of "q ⇧^@ (k - 1)" q w]
    by presburger

  have "q <p w"
    using ‹1 < k›  ‹q ⇧^@ k = w›
    unfolding pow_eq_if_list[of q k] pow_eq_if_list[of q "k-1"]
    using ‹w ≠ ε›  by auto
  from lexordp_append_rightI[of "q¯⇧^>w" q,
      unfolded lq_pref[OF sprefD1[OF this]], OF lq_spref[OF this]]
  have "q <lex w".
  thus  "Lyndon w"
    unfolding Lyndon.simps using ‹q ≤ns w› ‹w ≠ ε› assms(2) rlex.order.strict_trans by blast
next
  assume "primitive w"
  have "w <lex rotate l w" if assms_l: "0 < l" "l < ❙|w❙|" for l
  proof-
    have "take l w ≤np w" and "❙|take l w❙| = l"
      using assms_l take_is_prefix  ‹l < ❙|w❙|› by auto
    have "drop l w ≤ns w"
      using ‹l < ❙|w❙|› suffix_drop by auto
    have "drop l w ≠ w"
      using append_take_drop_id[of l w] npD'[OF ‹take l w ≤np w›] by force
    have "drop l w ⋅ take l w = rotate l w"
      using rotate_append[of "take l w" "drop l w", symmetric, unfolded ‹❙|take l w❙| = l›,
          unfolded append_take_drop_id].

    have "w <lex drop l w"
      using ‹drop l w ≤ns w› ‹drop l w ≠ w› assms(2) by blast
    from lexord_app_right_linorder[OF this suffix_length_le[OF conjunct2[OF ‹drop l w ≤ns w›[unfolded nonempty_suffix_def]]], of ε "take l w", unfolded  append.right_neutral]
    have "w <lex drop l w ⋅ take l w".
    thus "w <lex rotate l w"
      by (simp add: ‹drop l w ⋅ take l w = rotate l w›)
  qed
  thus "Lyndon w"
    by (simp add: ‹w ≠ ε› local.LyndonI)
qed

corollary Lyndon_suf_char: "w ≠ ε ⟹ Lyndon w ⟷ (∀s. s ≤ns w ⟶ s ≠ w ⟶ w <lex s)"
  using Lyndon_suf_less suf_less_Lyndon by blast

lemma Lyndon_suf_le: "Lyndon w ⟹ s ≤ns w ⟹ w ≤lex s"
  using Lyndon_suf_less rlex.not_less rlex.order.asym by blast

section "Unbordered prefix of a Lyndon word is Lyndon"

lemma unbordered_pref_Lyndon: "Lyndon (u⋅v) ⟹ u ≠ ε ⟹ ¬ bordered u ⟹ Lyndon u"
  unfolding Lyndon_suf_char
proof(standard+)
  fix s
  assume "Lyndon (u ⋅ v)" and  "u ≠ ε" and "¬ bordered u" and "s ≤ns u" and "s ≠ u"
    hence "u ⋅ v <lex s ⋅ v"
      using  Lyndon_suf_less[OF ‹Lyndon (u ⋅ v)›, of "s ⋅ v"] by auto
    have "¬ s ≤p u"
      using ‹¬ bordered u› ‹s ≤ns u› ‹s ≠ u› by auto
    moreover have "¬ u ≤p s"
      using suf_pref_eq[OF nsD[OF‹s ≤ns u›]] ‹s ≠ u› by blast
    ultimately show "u <lex s"
      using lexord_cancel_right_linorder[OF ‹u ⋅ v <lex s ⋅ v›] by blast
qed

section "Concatenation of Lyndon words"

theorem Lyndon_concat: assumes  "Lyndon u" and "Lyndon v" and "u <lex v"
  shows "Lyndon (u⋅v)"
proof-
  have "u⋅v <lex v"
  proof(cases "u ≤p v")
    assume "u ≤p v"
    obtain z where "u⋅z = v" and "z ≤ns v"
      using  lq_pref[OF ‹u ≤p v›] nsI' rlex.less_imp_neq[OF ‹u <lex v›] self_append_conv by metis
    from Lyndon_suf_less[OF ‹Lyndon v› this(2), THEN lexord_append_leftI_linorder, of u]
      LyndonD_nemp[OF ‹Lyndon u›] this(1)
    show ?thesis
      by blast
  next
    assume "¬ u ≤p v"
    then show ?thesis
      using local.lexordp_linear[of v "u⋅v"]
        local.lexordp_mid_pref[OF ‹u <lex v›,of v]
        prefixI[of v u v]
      by argo
  qed

  { fix z
    assume "z ≤ns (u⋅v)" "z ≠ u⋅v"
    have "u⋅v <lex z"
    proof(cases "z ≤ns v")
      assume "z ≤ns v"
      from Lyndon_suf_less[OF ‹Lyndon v› this]
      have "z ≠ v ⟹ v <lex z"
        by blast
      thus "u⋅v <lex z"
        using ‹u ⋅ v <lex v› rlex.less_trans
        by fast
    next
      assume "¬ z ≤ns v"
      then obtain z' where "z' ≤ns u" "z' ≠ u" "z'⋅v = z"
        using ‹z ≤ns u ⋅ v› ‹z ≠ u ⋅ v› suffix_append[of z u v]
        unfolding nonempty_suffix_def
        by force
      from Lyndon_suf_less[OF ‹Lyndon u› this(1) this(2)]
      have "u <lex z'".
      thus "u⋅v <lex z"
        using  ‹z' ≤ns u› lexord_app_right_linorder[of u z' v v] suffix_length_le[of z' u]
        unfolding nonempty_suffix_def ‹z' ⋅ v = z›
        by blast
    qed
  }
  thus ?thesis
    using suf_nemp[OF LyndonD_nemp[OF ‹Lyndon v›], of u, THEN suf_less_Lyndon]
    by blast
qed

section "Longest Lyndon suffix"

fun longest_Lyndon_suffix:: "'a list ⇒ 'a list" (‹LynSuf›) where
  "longest_Lyndon_suffix ε = ε" |
  "longest_Lyndon_suffix (a#w) = (if Lyndon (a#w) then a#w else longest_Lyndon_suffix w)"

lemma longest_Lyndon_suf_ext: "¬ Lyndon (a # w) ⟹ LynSuf w = LynSuf (a # w)"
  using longest_Lyndon_suffix.simps(2) by presburger

lemma longest_Lyndon_suf_suf: "w ≠ ε ⟹ LynSuf w ≤s w"
proof(induction w rule: longest_Lyndon_suffix.induct)
  case 1
  then show ?case by simp
next
  case (2 a w)
  show ?case
  proof(cases "Lyndon (a#w)")
    case True
    then show ?thesis by auto
  next
    case False
    from "2.IH"[OF this, unfolded longest_Lyndon_suf_ext[OF this], THEN suffix_ConsI, of a]
      Lyndon_sing False
    show ?thesis by blast
  qed
qed

lemma longest_Lyndon_suf_max:
  "v ≤s w  ⟹ Lyndon v ⟹ v ≤s (LynSuf w)"
proof(induction w arbitrary: v rule: longest_Lyndon_suffix.induct)
  case 1
  then show ?case
    using longest_Lyndon_suffix.simps(1) by presburger
next
  case (2 a w)
  show ?case
  proof(cases "Lyndon (a#w)")
    case True
    then show ?thesis
      using "2.prems"(1) longest_Lyndon_suffix.simps(2) by presburger
  next
    case False
    have "v ≠ a # w"
      using "2.prems"(2) False by blast
    from "2.IH"[OF False _ "2.prems"(2), unfolded longest_Lyndon_suf_ext[OF False]]
       "2.prems"(1)[unfolded suffix_Cons] this
    show ?thesis by fast
  qed
qed

lemma longest_Lyndon_suf_Lyndon_id: assumes "Lyndon w"
  shows "LynSuf w = w"
proof(cases "w = ε", simp)
  case False
  from longest_Lyndon_suf_suf[OF this]
    suffix_order.order_refl[THEN longest_Lyndon_suf_max[OF _ assms]]
    suffix_order.antisym
  show ?thesis by blast
qed

lemma longest_Lyndon_suf_longest: "w ≠ ε ⟹ v' ≤s w ⟹ Lyndon v' ⟹ ❙|v'❙| ≤ ❙|(LynSuf w)❙|"
  using longest_Lyndon_suf_max suffix_length_le by blast

lemma longest_Lyndon_suf_sing: "LynSuf [a] = [a]"
  using Lyndon_sing longest_Lyndon_suf_Lyndon_id by blast

lemma longest_Lyndon_suf_Lyndon: "w ≠ ε ⟹ Lyndon (LynSuf w)"
proof(induction w rule: longest_Lyndon_suffix.induct, blast)
  case (2 a w)
  show ?case
  proof(cases "Lyndon (a#w)")
    case True
    then show ?thesis
      using longest_Lyndon_suf_Lyndon_id by presburger
  next
    case False
    from "2.IH"[OF this, unfolded longest_Lyndon_suf_ext[OF this]] Lyndon_sing
    show ?thesis by fastforce
  qed
qed

lemma longest_Lyndon_suf_nemp: "w ≠ ε ⟹ LynSuf w ≠ ε"
  using longest_Lyndon_suf_Lyndon[THEN LyndonD_nemp].

lemma longest_Lyndon_sufI:
  assumes "q ≤s w" and "Lyndon q" and all_s: "(∀ s. (s ≤s w ∧ Lyndon s) ⟶ s ≤s q)"
  shows "LynSuf w = q"
proof(cases "w = ε")
  case True
  then show ?thesis
    using assms(1) longest_Lyndon_suffix.simps(1) suffix_bot.extremum_uniqueI by blast
next
  case False
  from all_s longest_Lyndon_suf_Lyndon[OF this] longest_Lyndon_suf_max[OF assms(1) assms(2)]
    longest_Lyndon_suf_suf[OF this] suffix_order.eq_iff
  show ?thesis by blast
qed

corollary longest_Lyndon_sufI':
  assumes "q ≤s w" and "Lyndon q" and all_s: "∀ s. (s ≤s w ∧ Lyndon s) ⟶ ❙|s❙| ≤ ❙|q❙|"
  shows "LynSuf w = q"
  using longest_Lyndon_sufI[OF ‹q ≤s w› ‹Lyndon q›] suf_ruler_le all_s ‹q ≤s w› by blast

text‹The next lemma is fabricated to suit the upcoming definition of longest Lyndon factorization.›

lemma longest_Lyndon_suf_shorter: assumes "w ≠ ε"
  shows "❙|w⇧^<¯(LynSuf w)❙| < ❙|w❙|"
  using nemp_len[OF longest_Lyndon_suf_nemp[OF ‹w ≠ ε›]] arg_cong[OF rq_suf[OF longest_Lyndon_suf_suf[OF ‹w ≠ ε›]], of length]
  unfolding lenmorph by linarith

section "Lyndon factorizations"

function Lyndon_fac::"'a list  ⇒ 'a list list" (‹LynFac›)
  where "Lyndon_fac w  = (if w ≠ ε then ((Lyndon_fac (w ⇧^<¯(LynSuf w) )) ⋅ [LynSuf w]) else ε)"
  using longest_Lyndon_suffix.cases by blast+
termination
proof(relation "measure length", simp)
  show "⋀w. w ≠ ε ⟹ (w⇧^<¯LynSuf w, w) ∈ measure length"
    unfolding measure_def inv_image_def using longest_Lyndon_suf_shorter by blast
qed

text‹The factorization @{term "Lyndon_fac w"} obtained by taking always the longest Lyndon suffix is well defined,
and called ``Lyndon factorization (of $w$)''.›


lemma Lyndon_fac_simp: "w ≠ ε ⟹ Lyndon_fac w =  Lyndon_fac (w⇧^<¯LynSuf w) ⋅ (LynSuf w # ε)"
  using Lyndon_fac.simps[of w] by meson

lemma Lyndon_fac_emp: "Lyndon_fac ε = ε"
  by simp

text‹Note that the Lyndon factorization of a Lyndon word is trivial.›

lemma Lyndon_fac_longest_Lyndon_id: "Lyndon w ⟹ Lyndon_fac w = [w]"
  by (simp add: longest_Lyndon_suf_Lyndon_id)

text‹Lyndon factorization is composed of Lyndon words ...›

lemma Lyndon_fac_set: "z ∈ set (Lyndon_fac w) ⟹ Lyndon z"
proof(induction w rule:  Lyndon_fac.induct)
  case (1 w)
  then show "Lyndon z"
  proof (cases "w = ε")
    assume "w ≠ ε"
    have "Lyndon_fac w  = (Lyndon_fac (w ⇧^<¯(LynSuf w) )) ⋅ [LynSuf w]"
      using Lyndon_fac_simp[OF ‹w ≠ ε›].
    from set_ConsD[OF "1.prems"(1)[unfolded rotate1.simps(2)[of "LynSuf w" "Lyndon_fac (w ⇧^<¯(LynSuf w) )", folded this, symmetric], unfolded set_rotate1]]
    have "z = LynSuf w ∨ z ∈ set (Lyndon_fac (w ⇧^<¯(LynSuf w) ))".
    thus "Lyndon z"
      using "1.IH"[OF ‹w ≠ ε›] longest_Lyndon_suf_Lyndon[OF ‹w ≠ ε›]
      by blast
  next
    assume "w = ε"
    thus "Lyndon z"
      using "1.prems"
      unfolding Lyndon_fac_emp[folded ‹w = ε›] list.set(1) empty_iff
      by blast
  qed
qed

text‹...and it indeed is a factorization of the argument.›

lemma Lyndon_fac_longest_dec: "concat (Lyndon_fac w) = w"
proof(induction w rule: Lyndon_fac.induct)
  case (1 w)
  thus "concat (LynFac w) = w"
  proof (cases "w = ε", simp)
  assume "w ≠ ε"
    have eq: "concat (Lyndon_fac w) = concat ( (Lyndon_fac (w ⇧^<¯(LynSuf w) )) ) ⋅ concat ([LynSuf w])"
      unfolding Lyndon_fac_simp[OF ‹w ≠ ε›] concat_morph..
    from this[unfolded "1.IH"[OF ‹w ≠ ε›] concat_sing' rq_suf[OF longest_Lyndon_suf_suf[OF ‹w ≠ ε›]]]
    show ?case.
  qed
qed

text‹The following lemma makes explicit the inductive character of the definition of @{term Lyndon_fac}.›

lemma Lyndon_fac_longest_pref: "us ≤p Lyndon_fac w ⟹ Lyndon_fac (concat us) = us"
proof(induction w arbitrary: us rule:  Lyndon_fac.induct)
  case (1 w)
  thus "LynFac (concat us) = us"
  proof (cases "w = ε", simp)
    assume "w ≠ ε"
    have step: "Lyndon_fac w = (Lyndon_fac (w ⇧^<¯(LynSuf w))) ⋅ [LynSuf w]"
      using Lyndon_fac_simp[OF ‹w ≠ ε›].

    consider (neq) "us ≠ Lyndon_fac w" | (eq) "us = Lyndon_fac w"
      using "1.prems" le_neq_implies_less by blast
    then show "LynFac (concat us) = us"
    proof(cases)
      case neq
      hence "us ≤p Lyndon_fac (w⇧^<¯LynSuf w)"
        using "1.prems" last_no_split[of us "Lyndon_fac (w⇧^<¯LynSuf w)" "LynSuf w"]
        unfolding step[symmetric] by blast
      thus "LynFac (concat us) = us"
        using "1.IH" ‹w ≠ ε› by blast
    next
      case eq
      show "LynFac (concat us) = us"
        using Lyndon_fac_longest_dec[of w, folded eq] eq by simp
    qed
  qed
qed

text‹We give name to an important predicate: monotone (nonincreasing) list of Lyndon words.›

definition Lyndon_mono :: "'a list list ⇒  bool" where
  "Lyndon_mono ws ⟷ (∀ u ∈ set ws. Lyndon u) ∧ (rlex.sorted (rev ws))"

lemma Lyndon_mono_set: "Lyndon_mono ws ⟹ u ∈ set ws ⟹ Lyndon u"
  unfolding Lyndon_mono_def by blast

lemma Lyndon_mono_sorted: "Lyndon_mono ws ⟹ rlex.sorted (rev ws)"
  unfolding Lyndon_mono_def by blast

lemma Lyndon_mono_nth: "Lyndon_mono ws ⟹ i ≤ j ⟹ j < ❙|ws❙| ⟹ ws!j ≤lex ws!i"
  unfolding Lyndon_mono_def using  rlex.sorted_rev_nth_mono by blast

lemma Lyndon_mono_empty[simp]: "Lyndon_mono ε"
  unfolding Lyndon_mono_def by auto

lemma Lyndon_mono_sing: "Lyndon u ⟹ Lyndon_mono [u]"
  unfolding Lyndon_mono_def by auto

lemma Lyndon_mono_fac_Lyndon_mono:
  assumes "ps ≤f ws" and "Lyndon_mono ws" shows "Lyndon_mono ps"
unfolding Lyndon_mono_def
proof
  show "∀x ∈ (set ps). Lyndon x"
    using ‹Lyndon_mono ws›[unfolded Lyndon_mono_def] set_mono_sublist[OF ‹ps ≤f ws›] by blast
  show "linorder.sorted (≤lex) (rev ps)"
    using rlex.sorted_append ‹Lyndon_mono ws›[unfolded Lyndon_mono_def] ‹ps ≤f ws›[unfolded sublist_def] by fastforce
qed

text‹Lyndon factorization is monotone!
Altogether, we have shown that the Lyndon factorization is a monotone factorization
into Lyndon words.›

theorem fac_Lyndon_mono: "Lyndon_mono (Lyndon_fac w)"
proof (induct "Lyndon_fac w" arbitrary: w rule: rev_induct, simp)
  case (snoc x xs)
  have "Lyndon x"
    using Lyndon_fac_set[of x, unfolded in_set_conv_decomp, of w, folded snoc.hyps(2)] by fast
  have "concat (xs ⋅ [x]) = w"
    using Lyndon_fac_longest_dec[of w, folded snoc.hyps(2)] by auto
  then show "Lyndon_mono (LynFac w)"
  proof (cases "xs = ε")
    assume "xs = ε"
    show "Lyndon_mono (LynFac w)"
      unfolding Lyndon_mono_def ‹xs ⋅ [x] = LynFac w›[symmetric] ‹xs = ε› append.left_neutral
                rlex.sorted1[of x]
      using ‹Lyndon x› by force
  next
    assume "xs ≠ ε"
    have "concat (xs ⋅ [x]) ≠ ε" and "w ≠ ε"
      using Lyndon_fac_longest_dec snoc.hyps(2) by auto
    have "x = LynSuf w" and "xs = LynFac (w⇧^<¯LynSuf w )"
      using Lyndon_fac.simps[of w, folded snoc.hyps(2)] ‹w ≠ ε›
        Lyndon_fac_longest_dec append1_eq_conv[of xs x "LynFac (w⇧^<¯LynSuf w )" "LynSuf w"] by presburger+
    from Lyndon_mono_sorted[OF snoc.hyps(1)[OF ‹xs = LynFac (w⇧^<¯LynSuf w )›], folded this]
    have "dual_rlex.sorted  xs"
      unfolding sorted_dual_rev_iff.
    have "Lyndon (last xs)"
      using Lyndon_fac_set[of "last xs" "w⇧^<¯LynSuf w", folded ‹xs = LynFac (w⇧^<¯LynSuf w)›, OF last_in_set[OF ‹xs ≠ ε›]].
    have "x ≤lex last xs"
    proof(rule ccontr)
      assume "¬ x ≤lex last xs" hence "last xs <lex x" by auto
      from Lyndon_concat[OF  ‹Lyndon (last xs)› ‹Lyndon x› this]
      have "Lyndon ((last xs) ⋅ x)".
      have "(last xs) ⋅ x ≤s concat (xs ⋅ [x])"
        using ‹xs ≠ ε› concat_last_suf by auto
      from longest_Lyndon_suf_longest[OF ‹concat (xs ⋅ [x]) ≠ ε›  this ‹Lyndon ((last xs) ⋅ x)›,
          unfolded ‹concat (xs ⋅ [x]) = w›, folded ‹x = LynSuf w›]
      show False
        using ‹Lyndon (last xs)› by simp
    qed
    have "dual_rlex.sorted (butlast xs ⋅ [last xs])"
      by (simp add: ‹linorder.sorted (λx y. y ≤lex x) xs› ‹xs ≠ ε›)
    from this ‹x ≤lex last xs›
    have "dual_rlex.sorted (butlast xs ⋅ [last xs,x])"
      using dual_rlex.sorted_append by auto
    from this[unfolded hd_word[of "last xs" "[x]"] lassoc append_butlast_last_id[OF ‹xs ≠ ε›]]
    have "rlex.sorted (rev (LynFac w))"
      unfolding  ‹xs ⋅ [x] = LynFac w›[symmetric] sorted_dual_rev_iff[symmetric].
    thus "Lyndon_mono (LynFac w)"
      unfolding Lyndon_mono_def using Lyndon_fac_set by blast
  qed
qed

text‹Now we want to show the converse: any monotone factorization into Lyndon words is the Lyndon factorization›

text‹The last element in the Lyndon factorization is the smallest suffix.›

lemma Lyndon_mono_last_smallest: "Lyndon_mono ws ⟹s ≤ns (concat ws) ⟹ last ws ≤lex s"
proof(induct ws arbitrary: s rule: rev_induct, fastforce)
  case (snoc a ws)
  have "ws⋅[a] ≠ ε"
    by blast
  have "last (ws⋅[a]) = a"
    by simp
  from last_in_set[OF ‹ws⋅[a] ≠ ε›, unfolded this] ‹Lyndon_mono (ws ⋅ [a])›[unfolded Lyndon_mono_def]
  have "Lyndon a"
    by blast
  show ?case
  proof(cases "s ≤ns a")
    case True
    from Lyndon_suf_le[OF ‹Lyndon a›] this
    show ?thesis
      by simp
  next
    case False
    hence "ws ≠ ε"
      using snoc.prems(2) by force
    obtain s' where "s = s'⋅a"
      using False snoc.prems(2)[unfolded concat_append[of ws "[a]", unfolded concat_sing']] suffix_append[of s "concat ws" a]
      unfolding nonempty_suffix_def
      by blast
    hence "s' ≤ns concat ws"
      using False snoc.prems(2) by fastforce
    have "Lyndon_mono ws"
      using ‹Lyndon_mono (ws⋅[a])› Lyndon_mono_fac_Lyndon_mono
      by blast
    from snoc.hyps[OF this ‹s' ≤ns concat ws›]
    have "last ws ≤lex s'"
      by auto
    hence "last ws ≤lex s'⋅a"
      using local.lexordp_eq_trans ord.lexordp_eq_pref by blast
    have "a ≤lex last ws"
      using ‹Lyndon_mono (ws⋅[a])›
      unfolding Lyndon_mono_def
      by (simp add: ‹ws ≠ ε› last_ConsL)
    from dual_rlex.order_trans[OF ‹last ws ≤lex s' ⋅ a› this, folded ‹s = s' ⋅ a›]
    show ?thesis
      unfolding ‹last (ws⋅[a]) = a›
      by blast
  qed
qed


text‹A monotone list, if seen as a factorization,
must end with the longest suffix›

lemma Lyndon_mono_last_longest: assumes "ws ≠ ε" and "Lyndon_mono ws"
  shows "LynSuf (concat ws) = last ws"
proof-
  have "Lyndon (last ws)"
    using Lyndon_mono_set assms(1) assms(2) last_in_set by blast
  hence "last ws ≠ ε"
    using LyndonD_nemp by blast
  hence "last ws ≤ns LynSuf (concat ws)"
    using longest_Lyndon_suf_max[OF  concat_last_suf[OF assms(1)] ‹Lyndon (last ws)›]
    unfolding nonempty_suffix_def
    by blast

  have "concat ws ≠ ε"
    using Lyndon.simps assms(2)[unfolded Lyndon_mono_def] set_nemp_concat_nemp[OF assms(1)]
    by blast
  from longest_Lyndon_suf_nemp[OF this] longest_Lyndon_suf_suf[OF this]
  have "LynSuf (concat ws) ≤ns concat ws"
    unfolding nonempty_suffix_def
    by simp

  show ?thesis
    using Lyndon_mono_last_smallest[OF ‹Lyndon_mono ws› ‹LynSuf (concat ws) ≤ns concat ws›]
      Lyndon_suf_le[OF longest_Lyndon_suf_Lyndon[OF ‹concat ws ≠ ε›], OF ‹last ws ≤ns LynSuf (concat ws)›]
      eq_iff
    by simp
qed

text‹Therefore, by construction,
any monotone list is the Lyndon factorization of its concatenation›

lemma Lyndon_mono_fac:
  "Lyndon_mono ws ⟹ ws = Lyndon_fac (concat ws)"
proof (induct ws rule: rev_induct, simp)
  case (snoc x xs)
  have "Lyndon_mono xs"
    using ‹Lyndon_mono (xs ⋅ [x])›
    unfolding Lyndon_mono_def
    by simp
  from snoc.hyps[OF this]
  have "xs = LynFac (concat xs)".
  have "x = LynSuf (concat (xs ⋅ [x]))"
    using Lyndon_mono_last_longest[OF _ ‹Lyndon_mono (xs ⋅ [x])›, unfolded last_snoc] by simp
  have "concat (xs ⋅ [x])⇧^<¯x = concat xs"
    by simp
  have "concat (xs ⋅ [x]) ≠ ε"
    using Lyndon_mono_set snoc.prems by auto
  from this
  show ?case
    using Lyndon_fac.simps[of "concat (xs ⋅ [x])", folded ‹x = LynSuf (concat (xs ⋅ [x]))›,
        unfolded ‹concat (xs ⋅ [x])⇧^<¯x = concat xs›, folded ‹xs = LynFac (concat xs)›]
    by presburger
qed


text‹This implies that the Lyndon factorization can be characterized
in two equivalent ways: as the (unique) monotone factorization (into Lyndon words) or as
the "suffix greedy" factorization (into Lyndon words).
›

corollary Lyndon_mono_fac_iff: "Lyndon_mono ws ⟷ ws = LynFac (concat ws)"
  using Lyndon_mono_fac fac_Lyndon_mono[of "concat ws"] by fastforce

corollary Lyndon_mono_unique: assumes "Lyndon_mono ws" and "Lyndon_mono zs" and "concat ws = concat zs"
  shows "ws = zs"
  using Lyndon_mono_fac[OF ‹Lyndon_mono ws›] Lyndon_mono_fac[OF ‹Lyndon_mono zs›]
  unfolding ‹concat ws = concat zs›  by simp


subsection "Standard factorization"

lemma Lyndon_std: assumes "Lyndon w" "1 < ❙|w❙|"
  obtains l m where "w = l⋅m" and "Lyndon l" and "Lyndon m" and "l <lex m"
proof-
  have "w ≠ ε" "tl w ≠ ε"
    using ‹1 < ❙|w❙|› long_list_tl by auto
  define m where  "m = LynSuf (tl w)"
  hence "Lyndon m"
    using ‹tl w ≠ ε› local.longest_Lyndon_suf_Lyndon by blast
  have "m ≤s w"
    unfolding m_def
    using suffix_order.trans[OF longest_Lyndon_suf_suf[OF ‹tl w ≠ ε›] suffix_tl[of w]].
  moreover have "m ≠ w"
    unfolding m_def using  hd_tl[OF ‹w ≠ ε›] longest_Lyndon_suf_suf[OF ‹tl w ≠ ε›] same_suffix_nil
    not_Cons_self2 by metis
  ultimately obtain l where "w = l⋅m" and "l ≠ ε"
   by (auto simp add: suffix_def)

  have "Lyndon l"
  proof (rule unbordered_pref_Lyndon[OF ‹Lyndon w›[unfolded ‹w = l⋅m›] ‹l ≠ ε›], rule)
    assume "bordered l"
    from unbordered_border[OF this, unfolded border_def]
    obtain s where "s ≠ ε" and "s ≠ l" and "s ≤p l" and "s ≤s l" and "¬ bordered s"
      by blast
    have "Lyndon s"
      using unbordered_pref_Lyndon[OF _ ‹s ≠ ε› ‹¬ bordered s›, of "s¯⇧^>l⋅m", unfolded lassoc lq_pref[OF ‹s ≤p l›]]
      ‹Lyndon w›[unfolded ‹w = l ⋅ m›] by blast
    have "s <lex m"
      using Lyndon_pref_suf_less[OF ‹Lyndon w› _ nsI[OF LyndonD_nemp[OF ‹Lyndon m›] ‹m ≤s w›]
         ‹m ≠ w›, of s] Lyndon.elims(2)[OF ‹Lyndon m›]
         ‹s ≤p l› prefix_append[of s l m, folded ‹w = l ⋅ m›]
      by presburger
    from Lyndon_concat[OF ‹Lyndon s› ‹Lyndon m› this]
    have "Lyndon (s⋅m)".
    moreover have "s⋅m ≤s tl w"
      unfolding ‹w = l ⋅ m› using ‹s ≠ l› ‹s ≤s l› list.collapse[OF ‹w ≠ ε›, unfolded ‹w = l ⋅ m›]
      by (auto simp add: suffix_def)
    ultimately show False
      using m_def ‹s ≠ ε› longest_Lyndon_suf_max same_suffix_nil by blast
  qed

  have "l <lex m"
    using Lyndon_pref_suf_less[OF ‹Lyndon w› prefI[OF ‹w = l ⋅ m›[symmetric]]
        nsI[OF longest_Lyndon_suf_nemp[OF ‹tl w ≠ ε›, folded  m_def] ‹m ≤s w›] ‹m ≠ w›].
  from that[OF ‹w = l ⋅ m› ‹Lyndon l›  ‹Lyndon m› this]
  show thesis.
qed

corollary Lyndon_std_iff:
  "Lyndon w ⟷ (❙|w❙| = 1 ∨ (∃ l m. w = l⋅m ∧ Lyndon l ∧ Lyndon m ∧ l <lex m))"
  (is "?L ⟷ ?R")
proof
  assume ?L
  show ?R
    using Lyndon_std[OF ‹Lyndon w›]
      nemp_le_len[OF LyndonD_nemp[OF ‹Lyndon w›], unfolded le_eq_less_or_eq]
    by metis
next
  assume ?R
  thus ?L
  proof(rule disjE, fastforce)
    show ‹∃l m. w = l ⋅ m ∧ Lyndon l ∧ Lyndon m ∧ l <lex m ⟹ Lyndon w›
      using Lyndon_concat by blast
  qed
qed

end  ― ‹end context linorder›

end