Theory Lyndon
theory Lyndon
imports Combinatorics_Words.CoWBasic
begin
chapter "Lyndon words"
text‹A Lyndon word is a non-empty word that is lexicographically
strictly smaller than any other word in its conjugacy class, i.e., than any its rotations.
They are named after R. Lyndon who introduced them in @{cite Lyndon54} as ``standard'' sequences.
We present elementary results on Lyndon words, mostly covered by results in @{cite ‹Chapter 5› Lo83} and @{cite Duval80 and Duval83}.
This definition assumes a linear order on letters given by the context.
›
section "Definition and elementary properties"
subsection "Underlying order"
lemma (in linorder) lexordp_mid_pref: "ord_class.lexordp u v ⟹ ord_class.lexordp v (u⋅s) ⟹
u ≤p v"
by (induct rule: lexordp_induct, simp_all)
lemma (in linorder) lexordp_ext: "ord_class.lexordp u v ⟹ ¬ u ≤p v ⟹
ord_class.lexordp (u⋅w) (v⋅z)"
by (induct rule: lexordp_induct, simp_all)
context linorder
begin
abbreviation Lyndon_less :: "'a list ⇒ 'a list ⇒ bool" (infixl ‹<lex› 50)
where "Lyndon_less xs ys ≡ ord_class.lexordp xs ys"
abbreviation Lyndon_le :: "'a list ⇒ 'a list ⇒ bool" (infixl ‹≤lex› 50)
where "Lyndon_le xs ys ≡ ord_class.lexordp_eq xs ys"
interpretation rlex: linorder "(≤lex)" "(<lex)"
using lexordp_linorder.
interpretation dual_rlex: linorder "λ x y. y ≤lex x" "λ x y. y <lex x"
using rlex.dual_linorder.
lemma sorted_dual_rev_iff: "dual_rlex.sorted ws ⟷ rlex.sorted (rev ws)"
unfolding rlex.sorted_rev_iff_nth_mono dual_rlex.sorted_iff_nth_mono by blast
text ‹Several useful lemmas that are formulated for relations, interpreted for the default linear order.›
lemmas lexord_suf_linorder = lexord_sufE[of _ _ _ _ "{(x, y). x < y}", folded lexordp_conv_lexord]
and lexord_append_leftI_linorder = lexord_append_leftI[of _ _ "{(x, y). x < y}" _, folded lexordp_conv_lexord]
and lexord_app_right_linorder = lexord_sufI[of _ _ "{(x, y). x < y}" _, folded lexordp_conv_lexord]
and lexord_take_index_conv_linorder = lexord_take_index_conv[of _ _ "{(x, y). x < y}", folded lexordp_conv_lexord]
and mismatch_lexord_linorder = mismatch_lexord[of _ _ "{(x, y). x < y}", folded lexordp_conv_lexord]
and lexord_cancel_right_linorder = lexord_cancel_right[of _ _ _ _ "{(a,b). a < b}", folded lexordp_conv_lexord]
subsection "Lyndon word definition"
fun Lyndon :: "'a list ⇒ bool"
where "Lyndon w = (w ≠ ε ∧ (∀n. 0 < n ∧ n < ❙|w❙| ⟶ w <lex rotate n w))"
lemma LyndonD: "Lyndon w ⟹ 0 < n ⟹ n < ❙|w❙| ⟹ w <lex rotate n w"
unfolding Lyndon.simps by blast
lemma LyndonD_nemp: "Lyndon w ⟹ w ≠ ε"
unfolding Lyndon.simps by blast
lemma LyndonI: "w ≠ ε ⟹ ∀ n. 0 < n ∧ n < ❙|w❙| ⟶ w <lex rotate n w ⟹ Lyndon w"
unfolding Lyndon.simps by blast
lemma Lyndon_sing: "Lyndon [a]"
unfolding Lyndon.simps by auto
lemma Lyndon_prim: assumes "Lyndon w"
shows "primitive w"
proof-
have "0 < n ⟹ n < ❙|w❙| ⟹ rotate n w ≠ w" for n
using LyndonD[OF ‹Lyndon w›, of n] rlex.less_irrefl[of w] by argo
from no_rotate_prim[OF LyndonD_nemp[OF ‹Lyndon w›]] this
show ?thesis by blast
qed
lemma Lyndon_conj_greater: "Lyndon (u⋅v) ⟹ u ≠ ε ⟹ v ≠ ε ⟹ u⋅v <lex v⋅u"
using LyndonD[of "u⋅v" "❙|u❙|", unfolded rotate_append[of u v]]
by force
subsection "Code equations for Lyndon words"
primrec Lyndon_rec :: "'a list ⇒ nat ⇒ bool" where
"Lyndon_rec w 0 = True" |
"Lyndon_rec w (Suc n) = (if w <lex rotate (Suc n) w then Lyndon_rec w n else False)"
lemma Lyndon_rec_all: assumes "Lyndon_rec (a # w) (❙|w❙|)"
shows "n < ❙|a#w❙| ⟹ 0 < n ⟹ Lyndon_rec (a#w) n"
proof(induction n rule: strict_inc_induct)
case (base i)
then show ?case
using assms by auto
next
case (step i)
then show ?case
by (meson Lyndon_rec.simps(2) zero_less_Suc)
qed
lemma Lyndon_Lyndon_rec: assumes "Lyndon w"
shows "0 < n ⟹ n < ❙|w❙| ⟹ Lyndon_rec w n"
proof(induction n, simp)
case (Suc n)
have "w <lex rotate (Suc n) w"
using LyndonD Suc.prems(2) assms by blast
then show ?case
using Suc.IH[OF _ Suc_lessD[OF ‹Suc n < ❙|w❙|›], folded neq0_conv] Lyndon_rec.simps(1)[of w]
unfolding Lyndon_rec.simps(2)
by metis
qed
lemma Lyndon_code [code]:
"Lyndon Nil = False"
"Lyndon (a # w) = Lyndon_rec (a # w) (❙|w❙|)"
proof-
show "Lyndon Nil = False" by simp
have "a # w ≠ ε"
by simp
have ax: "0 < n ⟹ Lyndon_rec (a#w) n ⟹ (a#w) <lex rotate n (a#w)" for n
using Lyndon_rec.simps(2)[of "a#w"] gr0_implies_Suc[of n] by metis
have bx: "Lyndon_rec (a # w) (❙|w❙|) = (∀n. n < ❙|a#w❙| ∧ 0 < n ⟶ Lyndon_rec (a#w) n)"
proof(cases "w = ε", simp)
assume "w ≠ ε"
from this[folded length_greater_0_conv]
show ?thesis
using Lyndon_rec_all[of a w] length_Cons[of a w] lessI[of "❙|w❙|"]
by fastforce
qed
show "Lyndon (a # w) = Lyndon_rec (a # w) ❙|w❙|"
unfolding bx Lyndon.simps
using ax LyndonI[OF ‹a # w ≠ ε›]Lyndon_Lyndon_rec by blast
qed
subsection "Properties of Lyndon words"
subsubsection "Lyndon words are unbordered"
theorem Lyndon_unbordered: assumes "Lyndon w" shows "¬ bordered w"
proof
assume "bordered w"
from bordered_dec[OF this]
obtain u v where "u⋅v⋅u = w" and "u ≠ ε".
hence "v ⋅ u ≠ ε" and "u ⋅ v ≠ ε" by blast+
note lyn = ‹Lyndon w›[folded ‹u⋅v⋅u = w›]
have "u⋅v⋅u <lex v⋅u⋅u"
using Lyndon_conj_greater[of u "v⋅u", OF lyn ‹u ≠ ε› ‹v ⋅ u ≠ ε›, unfolded rassoc].
from this[unfolded lassoc]
have "u ⋅ v ≠ v ⋅ u"
by force
from lexord_suf_linorder[OF _ this, of u u]
have "u⋅v <lex v⋅u"
using ‹u⋅v⋅u <lex v⋅u⋅u› by simp
from lexord_append_leftI_linorder[of "u⋅v" "v⋅u", unfolded lassoc, OF this, unfolded rassoc]
have "u⋅u⋅v <lex u⋅v⋅u".
from this Lyndon_conj_greater[of "u⋅v" u, unfolded rassoc, OF lyn ‹u ⋅ v ≠ ε› ‹u ≠ ε›]
show False
by simp
qed
subsubsection "Each conjugacy class contains a Lyndon word"
lemma conjug_Lyndon_ex: assumes "primitive w"
obtains n where "Lyndon (rotate n w)"
proof-
have "w ≠ ε"
using prim_nemp[OF ‹primitive w›].
let ?ConClass = "{rotate n w | n. 0 ≤ n ∧ n < ❙|w❙|}"
have "?ConClass ≠ {}"
using ‹w ≠ ε› by blast
have "finite ?ConClass"
by force
have "w ∈ ?ConClass"
by (rule CollectI)
(use le0[of 0] nemp_pos_len[OF ‹w ≠ ε›] id_apply[of w, folded rotate0] in metis)
have all_rot: "rotate m w ∈ ?ConClass" for m
using rotate_conv_mod[of _ w] mod_less_divisor[of "❙|w❙|"] ‹w ≠ ε›
by blast
obtain w' n where "w' ∈ ?ConClass" and all_b: "∀ b ∈ ?ConClass. b ≤lex w' ⟶ w' = b" and w': "w' = rotate n w"
using rlex.finite_has_minimal[OF ‹finite ?ConClass› ‹?ConClass ≠ {}›] by auto
have "rotate n w <lex rotate na (rotate n w)" if "0 < na" and "na < ❙|w❙|" for na
proof-
from prim_no_rotate[OF assms[unfolded prim_rotate_conv[of w n]], of na] ‹na < ❙|w❙|› ‹0 < na›
have "rotate na (rotate n w) ≠ rotate n w" by force
hence "¬ rotate na (rotate n w) ≤lex rotate n w"
using all_b[rule_format, OF all_rot[of "na + n", folded rotate_rotate[of na n w]]] unfolding w'
by auto
from rlex.not_le_imp_less[OF this]
show "rotate n w <lex rotate na (rotate n w)".
qed
hence "Lyndon (rotate n w)"
using ‹w ≠ ε› by auto
from that[OF this] show thesis.
qed
lemma conjug_Lyndon_ex': assumes "primitive w"
obtains v where "w ∼ v" and "Lyndon v"
unfolding conjug_rotate_iff
using conjug_Lyndon_ex[OF ‹primitive w›]
by metis
section "Characterization by suffixes"
lemma Lyndon_suf_less: assumes "Lyndon w" "s ≤ns w" "s ≠ w"
shows "w <lex s"
proof-
define p where "p = take ❙|s❙| w"
have "❙|s❙| ≤ ❙|w❙|"
using nsD[OF ‹s ≤ns w›]
by (simp add: suffix_length_le)
have "p ≤p w" and "❙|p❙| = ❙|s❙|"
unfolding p_def
using take_is_prefix ‹❙|s❙| ≤ ❙|w❙|› take_len by blast+
hence "p ≠ s"
using Lyndon_unbordered[OF ‹Lyndon w›] ‹s ≤ns w› ‹s ≠ w› assms
by auto
define p' s' where "p' = drop ❙|s❙| w" and "s' = take ❙|p'❙| w"
have "p ⋅ p' = w"
unfolding p'_def p_def s'_def by simp
have "s' ⋅ s = w"
unfolding p'_def p_def s'_def
using suf_len[OF nsD[OF ‹s ≤ns w›]] nsD[OF ‹s ≤ns w›]
length_drop suffix_take by metis
have "❙|p'❙| = ❙|s'❙|"
using s'_def ‹p⋅p' = w› by auto
have "w <lex s⋅s'"
using Lyndon_conj_greater[of s' s, unfolded ‹s' ⋅ s = w›, OF ‹Lyndon w›] ‹p ≠ s›
unfolding ‹s' ⋅ s = w› p_def using ‹s' ⋅ s = w› assms(3) by fastforce
from lexord_suf_linorder[OF _ ‹p ≠ s› ‹❙|p❙| = ❙|s❙|› ‹❙|p'❙| = ❙|s'❙|›, OF this[folded ‹p ⋅ p' = w›]]
have "p <lex s".
from lexord_app_right_linorder[OF this, of p' ε, unfolded ‹p ⋅ p' = w›] ‹❙|p❙| = ❙|s❙|›
show "w <lex s"
by simp
qed
lemma Lyndon_pref_suf_less: assumes "Lyndon w" "p ≤p w" "s ≤ns w" "s ≠ w"
shows "p <lex s"
proof(cases "p = w", simp add: Lyndon_suf_less[OF assms(1) assms(3) assms(4)])
assume "p ≠ w"
show "p <lex s"
proof(rule rlex.less_trans)
show "p <lex w"
using ‹p ≠ w› assms(2) lexordp_append_rightI
by (fastforce simp add: prefix_def)
show "w <lex s"
using Lyndon_suf_less assms(1) assms(3) assms(4) by blast
qed
qed
lemma suf_less_Lyndon: assumes "w ≠ ε" and "∀s. (s ≤ns w ⟶ s ≠ w ⟶ w <lex s)"
shows "Lyndon w"
proof (cases "primitive w")
assume "¬ primitive w"
obtain q k where "q ≠ ε" "1 < k" "q⇧@k=w" "w≠q"
using non_prim[OF ‹¬ primitive w› ‹w ≠ ε›] by blast
hence "q ≤ns w"
unfolding nonempty_suffix_def pow_eq_if_list[of q k] pow_comm[symmetric]
using sufI[of "q ⇧@ (k - 1)" q w]
by presburger
have "q <p w"
using ‹1 < k› ‹q ⇧@ k = w›
unfolding pow_eq_if_list[of q k] pow_eq_if_list[of q "k-1"]
using ‹w ≠ ε› by auto
from lexordp_append_rightI[of "q¯⇧>w" q,
unfolded lq_pref[OF sprefD1[OF this]], OF lq_spref[OF this]]
have "q <lex w".
thus "Lyndon w"
unfolding Lyndon.simps using ‹q ≤ns w› ‹w ≠ ε› assms(2) rlex.order.strict_trans by blast
next
assume "primitive w"
have "w <lex rotate l w" if assms_l: "0 < l" "l < ❙|w❙|" for l
proof-
have "take l w ≤np w" and "❙|take l w❙| = l"
using assms_l take_is_prefix ‹l < ❙|w❙|› by auto
have "drop l w ≤ns w"
using ‹l < ❙|w❙|› suffix_drop by auto
have "drop l w ≠ w"
using append_take_drop_id[of l w] npD'[OF ‹take l w ≤np w›] by force
have "drop l w ⋅ take l w = rotate l w"
using rotate_append[of "take l w" "drop l w", symmetric, unfolded ‹❙|take l w❙| = l›,
unfolded append_take_drop_id].
have "w <lex drop l w"
using ‹drop l w ≤ns w› ‹drop l w ≠ w› assms(2) by blast
from lexord_app_right_linorder[OF this suffix_length_le[OF conjunct2[OF ‹drop l w ≤ns w›[unfolded nonempty_suffix_def]]], of ε "take l w", unfolded append.right_neutral]
have "w <lex drop l w ⋅ take l w".
thus "w <lex rotate l w"
by (simp add: ‹drop l w ⋅ take l w = rotate l w›)
qed
thus "Lyndon w"
by (simp add: ‹w ≠ ε› local.LyndonI)
qed
corollary Lyndon_suf_char: "w ≠ ε ⟹ Lyndon w ⟷ (∀s. s ≤ns w ⟶ s ≠ w ⟶ w <lex s)"
using Lyndon_suf_less suf_less_Lyndon by blast
lemma Lyndon_suf_le: "Lyndon w ⟹ s ≤ns w ⟹ w ≤lex s"
using Lyndon_suf_less rlex.not_less rlex.order.asym by blast
section "Unbordered prefix of a Lyndon word is Lyndon"
lemma unbordered_pref_Lyndon: "Lyndon (u⋅v) ⟹ u ≠ ε ⟹ ¬ bordered u ⟹ Lyndon u"
unfolding Lyndon_suf_char
proof(standard+)
fix s
assume "Lyndon (u ⋅ v)" and "u ≠ ε" and "¬ bordered u" and "s ≤ns u" and "s ≠ u"
hence "u ⋅ v <lex s ⋅ v"
using Lyndon_suf_less[OF ‹Lyndon (u ⋅ v)›, of "s ⋅ v"] by auto
have "¬ s ≤p u"
using ‹¬ bordered u› ‹s ≤ns u› ‹s ≠ u› by auto
moreover have "¬ u ≤p s"
using suf_pref_eq[OF nsD[OF‹s ≤ns u›]] ‹s ≠ u› by blast
ultimately show "u <lex s"
using lexord_cancel_right_linorder[OF ‹u ⋅ v <lex s ⋅ v›] by blast
qed
section "Concatenation of Lyndon words"
theorem Lyndon_concat: assumes "Lyndon u" and "Lyndon v" and "u <lex v"
shows "Lyndon (u⋅v)"
proof-
have "u⋅v <lex v"
proof(cases "u ≤p v")
assume "u ≤p v"
obtain z where "u⋅z = v" and "z ≤ns v"
using lq_pref[OF ‹u ≤p v›] nsI' rlex.less_imp_neq[OF ‹u <lex v›] self_append_conv by metis
from Lyndon_suf_less[OF ‹Lyndon v› this(2), THEN lexord_append_leftI_linorder, of u]
LyndonD_nemp[OF ‹Lyndon u›] this(1)
show ?thesis
by blast
next
assume "¬ u ≤p v"
then show ?thesis
using local.lexordp_linear[of v "u⋅v"]
local.lexordp_mid_pref[OF ‹u <lex v›,of v]
prefixI[of v u v]
by argo
qed
{ fix z
assume "z ≤ns (u⋅v)" "z ≠ u⋅v"
have "u⋅v <lex z"
proof(cases "z ≤ns v")
assume "z ≤ns v"
from Lyndon_suf_less[OF ‹Lyndon v› this]
have "z ≠ v ⟹ v <lex z"
by blast
thus "u⋅v <lex z"
using ‹u ⋅ v <lex v› rlex.less_trans
by fast
next
assume "¬ z ≤ns v"
then obtain z' where "z' ≤ns u" "z' ≠ u" "z'⋅v = z"
using ‹z ≤ns u ⋅ v› ‹z ≠ u ⋅ v› suffix_append[of z u v]
unfolding nonempty_suffix_def
by force
from Lyndon_suf_less[OF ‹Lyndon u› this(1) this(2)]
have "u <lex z'".
thus "u⋅v <lex z"
using ‹z' ≤ns u› lexord_app_right_linorder[of u z' v v] suffix_length_le[of z' u]
unfolding nonempty_suffix_def ‹z' ⋅ v = z›
by blast
qed
}
thus ?thesis
using suf_nemp[OF LyndonD_nemp[OF ‹Lyndon v›], of u, THEN suf_less_Lyndon]
by blast
qed
section "Longest Lyndon suffix"
fun longest_Lyndon_suffix:: "'a list ⇒ 'a list" (‹LynSuf›) where
"longest_Lyndon_suffix ε = ε" |
"longest_Lyndon_suffix (a#w) = (if Lyndon (a#w) then a#w else longest_Lyndon_suffix w)"
lemma longest_Lyndon_suf_ext: "¬ Lyndon (a # w) ⟹ LynSuf w = LynSuf (a # w)"
using longest_Lyndon_suffix.simps(2) by presburger
lemma longest_Lyndon_suf_suf: "w ≠ ε ⟹ LynSuf w ≤s w"
proof(induction w rule: longest_Lyndon_suffix.induct)
case 1
then show ?case by simp
next
case (2 a w)
show ?case
proof(cases "Lyndon (a#w)")
case True
then show ?thesis by auto
next
case False
from "2.IH"[OF this, unfolded longest_Lyndon_suf_ext[OF this], THEN suffix_ConsI, of a]
Lyndon_sing False
show ?thesis by blast
qed
qed
lemma longest_Lyndon_suf_max:
"v ≤s w ⟹ Lyndon v ⟹ v ≤s (LynSuf w)"
proof(induction w arbitrary: v rule: longest_Lyndon_suffix.induct)
case 1
then show ?case
using longest_Lyndon_suffix.simps(1) by presburger
next
case (2 a w)
show ?case
proof(cases "Lyndon (a#w)")
case True
then show ?thesis
using "2.prems"(1) longest_Lyndon_suffix.simps(2) by presburger
next
case False
have "v ≠ a # w"
using "2.prems"(2) False by blast
from "2.IH"[OF False _ "2.prems"(2), unfolded longest_Lyndon_suf_ext[OF False]]
"2.prems"(1)[unfolded suffix_Cons] this
show ?thesis by fast
qed
qed
lemma longest_Lyndon_suf_Lyndon_id: assumes "Lyndon w"
shows "LynSuf w = w"
proof(cases "w = ε", simp)
case False
from longest_Lyndon_suf_suf[OF this]
suffix_order.order_refl[THEN longest_Lyndon_suf_max[OF _ assms]]
suffix_order.antisym
show ?thesis by blast
qed
lemma longest_Lyndon_suf_longest: "w ≠ ε ⟹ v' ≤s w ⟹ Lyndon v' ⟹ ❙|v'❙| ≤ ❙|(LynSuf w)❙|"
using longest_Lyndon_suf_max suffix_length_le by blast
lemma longest_Lyndon_suf_sing: "LynSuf [a] = [a]"
using Lyndon_sing longest_Lyndon_suf_Lyndon_id by blast
lemma longest_Lyndon_suf_Lyndon: "w ≠ ε ⟹ Lyndon (LynSuf w)"
proof(induction w rule: longest_Lyndon_suffix.induct, blast)
case (2 a w)
show ?case
proof(cases "Lyndon (a#w)")
case True
then show ?thesis
using longest_Lyndon_suf_Lyndon_id by presburger
next
case False
from "2.IH"[OF this, unfolded longest_Lyndon_suf_ext[OF this]] Lyndon_sing
show ?thesis by fastforce
qed
qed
lemma longest_Lyndon_suf_nemp: "w ≠ ε ⟹ LynSuf w ≠ ε"
using longest_Lyndon_suf_Lyndon[THEN LyndonD_nemp].
lemma longest_Lyndon_sufI:
assumes "q ≤s w" and "Lyndon q" and all_s: "(∀ s. (s ≤s w ∧ Lyndon s) ⟶ s ≤s q)"
shows "LynSuf w = q"
proof(cases "w = ε")
case True
then show ?thesis
using assms(1) longest_Lyndon_suffix.simps(1) suffix_bot.extremum_uniqueI by blast
next
case False
from all_s longest_Lyndon_suf_Lyndon[OF this] longest_Lyndon_suf_max[OF assms(1) assms(2)]
longest_Lyndon_suf_suf[OF this] suffix_order.eq_iff
show ?thesis by blast
qed
corollary longest_Lyndon_sufI':
assumes "q ≤s w" and "Lyndon q" and all_s: "∀ s. (s ≤s w ∧ Lyndon s) ⟶ ❙|s❙| ≤ ❙|q❙|"
shows "LynSuf w = q"
using longest_Lyndon_sufI[OF ‹q ≤s w› ‹Lyndon q›] suf_ruler_le all_s ‹q ≤s w› by blast
text‹The next lemma is fabricated to suit the upcoming definition of longest Lyndon factorization.›
lemma longest_Lyndon_suf_shorter: assumes "w ≠ ε"
shows "❙|w⇧<¯(LynSuf w)❙| < ❙|w❙|"
using nemp_len[OF longest_Lyndon_suf_nemp[OF ‹w ≠ ε›]] arg_cong[OF rq_suf[OF longest_Lyndon_suf_suf[OF ‹w ≠ ε›]], of length]
unfolding lenmorph by linarith
section "Lyndon factorizations"
function Lyndon_fac::"'a list ⇒ 'a list list" (‹LynFac›)
where "Lyndon_fac w = (if w ≠ ε then ((Lyndon_fac (w ⇧<¯(LynSuf w) )) ⋅ [LynSuf w]) else ε)"
using longest_Lyndon_suffix.cases by blast+
termination
proof(relation "measure length", simp)
show "⋀w. w ≠ ε ⟹ (w⇧<¯LynSuf w, w) ∈ measure length"
unfolding measure_def inv_image_def using longest_Lyndon_suf_shorter by blast
qed
text‹The factorization @{term "Lyndon_fac w"} obtained by taking always the longest Lyndon suffix is well defined,
and called ``Lyndon factorization (of $w$)''.›
lemma Lyndon_fac_simp: "w ≠ ε ⟹ Lyndon_fac w = Lyndon_fac (w⇧<¯LynSuf w) ⋅ (LynSuf w # ε)"
using Lyndon_fac.simps[of w] by meson
lemma Lyndon_fac_emp: "Lyndon_fac ε = ε"
by simp
text‹Note that the Lyndon factorization of a Lyndon word is trivial.›
lemma Lyndon_fac_longest_Lyndon_id: "Lyndon w ⟹ Lyndon_fac w = [w]"
by (simp add: longest_Lyndon_suf_Lyndon_id)
text‹Lyndon factorization is composed of Lyndon words ...›
lemma Lyndon_fac_set: "z ∈ set (Lyndon_fac w) ⟹ Lyndon z"
proof(induction w rule: Lyndon_fac.induct)
case (1 w)
then show "Lyndon z"
proof (cases "w = ε")
assume "w ≠ ε"
have "Lyndon_fac w = (Lyndon_fac (w ⇧<¯(LynSuf w) )) ⋅ [LynSuf w]"
using Lyndon_fac_simp[OF ‹w ≠ ε›].
from set_ConsD[OF "1.prems"(1)[unfolded rotate1.simps(2)[of "LynSuf w" "Lyndon_fac (w ⇧<¯(LynSuf w) )", folded this, symmetric], unfolded set_rotate1]]
have "z = LynSuf w ∨ z ∈ set (Lyndon_fac (w ⇧<¯(LynSuf w) ))".
thus "Lyndon z"
using "1.IH"[OF ‹w ≠ ε›] longest_Lyndon_suf_Lyndon[OF ‹w ≠ ε›]
by blast
next
assume "w = ε"
thus "Lyndon z"
using "1.prems"
unfolding Lyndon_fac_emp[folded ‹w = ε›] list.set(1) empty_iff
by blast
qed
qed
text‹...and it indeed is a factorization of the argument.›
lemma Lyndon_fac_longest_dec: "concat (Lyndon_fac w) = w"
proof(induction w rule: Lyndon_fac.induct)
case (1 w)
thus "concat (LynFac w) = w"
proof (cases "w = ε", simp)
assume "w ≠ ε"
have eq: "concat (Lyndon_fac w) = concat ( (Lyndon_fac (w ⇧<¯(LynSuf w) )) ) ⋅ concat ([LynSuf w])"
unfolding Lyndon_fac_simp[OF ‹w ≠ ε›] concat_morph..
from this[unfolded "1.IH"[OF ‹w ≠ ε›] concat_sing' rq_suf[OF longest_Lyndon_suf_suf[OF ‹w ≠ ε›]]]
show ?case.
qed
qed
text‹The following lemma makes explicit the inductive character of the definition of @{term Lyndon_fac}.›
lemma Lyndon_fac_longest_pref: "us ≤p Lyndon_fac w ⟹ Lyndon_fac (concat us) = us"
proof(induction w arbitrary: us rule: Lyndon_fac.induct)
case (1 w)
thus "LynFac (concat us) = us"
proof (cases "w = ε", simp)
assume "w ≠ ε"
have step: "Lyndon_fac w = (Lyndon_fac (w ⇧<¯(LynSuf w))) ⋅ [LynSuf w]"
using Lyndon_fac_simp[OF ‹w ≠ ε›].
consider (neq) "us ≠ Lyndon_fac w" | (eq) "us = Lyndon_fac w"
using "1.prems" le_neq_implies_less by blast
then show "LynFac (concat us) = us"
proof(cases)
case neq
hence "us ≤p Lyndon_fac (w⇧<¯LynSuf w)"
using "1.prems" last_no_split[of us "Lyndon_fac (w⇧<¯LynSuf w)" "LynSuf w"]
unfolding step[symmetric] by blast
thus "LynFac (concat us) = us"
using "1.IH" ‹w ≠ ε› by blast
next
case eq
show "LynFac (concat us) = us"
using Lyndon_fac_longest_dec[of w, folded eq] eq by simp
qed
qed
qed
text‹We give name to an important predicate: monotone (nonincreasing) list of Lyndon words.›
definition Lyndon_mono :: "'a list list ⇒ bool" where
"Lyndon_mono ws ⟷ (∀ u ∈ set ws. Lyndon u) ∧ (rlex.sorted (rev ws))"
lemma Lyndon_mono_set: "Lyndon_mono ws ⟹ u ∈ set ws ⟹ Lyndon u"
unfolding Lyndon_mono_def by blast
lemma Lyndon_mono_sorted: "Lyndon_mono ws ⟹ rlex.sorted (rev ws)"
unfolding Lyndon_mono_def by blast
lemma Lyndon_mono_nth: "Lyndon_mono ws ⟹ i ≤ j ⟹ j < ❙|ws❙| ⟹ ws!j ≤lex ws!i"
unfolding Lyndon_mono_def using rlex.sorted_rev_nth_mono by blast
lemma Lyndon_mono_empty[simp]: "Lyndon_mono ε"
unfolding Lyndon_mono_def by auto
lemma Lyndon_mono_sing: "Lyndon u ⟹ Lyndon_mono [u]"
unfolding Lyndon_mono_def by auto
lemma Lyndon_mono_fac_Lyndon_mono:
assumes "ps ≤f ws" and "Lyndon_mono ws" shows "Lyndon_mono ps"
unfolding Lyndon_mono_def
proof
show "∀x ∈ (set ps). Lyndon x"
using ‹Lyndon_mono ws›[unfolded Lyndon_mono_def] set_mono_sublist[OF ‹ps ≤f ws›] by blast
show "linorder.sorted (≤lex) (rev ps)"
using rlex.sorted_append ‹Lyndon_mono ws›[unfolded Lyndon_mono_def] ‹ps ≤f ws›[unfolded sublist_def] by fastforce
qed
text‹Lyndon factorization is monotone!
Altogether, we have shown that the Lyndon factorization is a monotone factorization
into Lyndon words.›
theorem fac_Lyndon_mono: "Lyndon_mono (Lyndon_fac w)"
proof (induct "Lyndon_fac w" arbitrary: w rule: rev_induct, simp)
case (snoc x xs)
have "Lyndon x"
using Lyndon_fac_set[of x, unfolded in_set_conv_decomp, of w, folded snoc.hyps(2)] by fast
have "concat (xs ⋅ [x]) = w"
using Lyndon_fac_longest_dec[of w, folded snoc.hyps(2)] by auto
then show "Lyndon_mono (LynFac w)"
proof (cases "xs = ε")
assume "xs = ε"
show "Lyndon_mono (LynFac w)"
unfolding Lyndon_mono_def ‹xs ⋅ [x] = LynFac w›[symmetric] ‹xs = ε› append.left_neutral
rlex.sorted1[of x]
using ‹Lyndon x› by force
next
assume "xs ≠ ε"
have "concat (xs ⋅ [x]) ≠ ε" and "w ≠ ε"
using Lyndon_fac_longest_dec snoc.hyps(2) by auto
have "x = LynSuf w" and "xs = LynFac (w⇧<¯LynSuf w )"
using Lyndon_fac.simps[of w, folded snoc.hyps(2)] ‹w ≠ ε›
Lyndon_fac_longest_dec append1_eq_conv[of xs x "LynFac (w⇧<¯LynSuf w )" "LynSuf w"] by presburger+
from Lyndon_mono_sorted[OF snoc.hyps(1)[OF ‹xs = LynFac (w⇧<¯LynSuf w )›], folded this]
have "dual_rlex.sorted xs"
unfolding sorted_dual_rev_iff.
have "Lyndon (last xs)"
using Lyndon_fac_set[of "last xs" "w⇧<¯LynSuf w", folded ‹xs = LynFac (w⇧<¯LynSuf w)›, OF last_in_set[OF ‹xs ≠ ε›]].
have "x ≤lex last xs"
proof(rule ccontr)
assume "¬ x ≤lex last xs" hence "last xs <lex x" by auto
from Lyndon_concat[OF ‹Lyndon (last xs)› ‹Lyndon x› this]
have "Lyndon ((last xs) ⋅ x)".
have "(last xs) ⋅ x ≤s concat (xs ⋅ [x])"
using ‹xs ≠ ε› concat_last_suf by auto
from longest_Lyndon_suf_longest[OF ‹concat (xs ⋅ [x]) ≠ ε› this ‹Lyndon ((last xs) ⋅ x)›,
unfolded ‹concat (xs ⋅ [x]) = w›, folded ‹x = LynSuf w›]
show False
using ‹Lyndon (last xs)› by simp
qed
have "dual_rlex.sorted (butlast xs ⋅ [last xs])"
by (simp add: ‹linorder.sorted (λx y. y ≤lex x) xs› ‹xs ≠ ε›)
from this ‹x ≤lex last xs›
have "dual_rlex.sorted (butlast xs ⋅ [last xs,x])"
using dual_rlex.sorted_append by auto
from this[unfolded hd_word[of "last xs" "[x]"] lassoc append_butlast_last_id[OF ‹xs ≠ ε›]]
have "rlex.sorted (rev (LynFac w))"
unfolding ‹xs ⋅ [x] = LynFac w›[symmetric] sorted_dual_rev_iff[symmetric].
thus "Lyndon_mono (LynFac w)"
unfolding Lyndon_mono_def using Lyndon_fac_set by blast
qed
qed
text‹Now we want to show the converse: any monotone factorization into Lyndon words is the Lyndon factorization›
text‹The last element in the Lyndon factorization is the smallest suffix.›
lemma Lyndon_mono_last_smallest: "Lyndon_mono ws ⟹s ≤ns (concat ws) ⟹ last ws ≤lex s"
proof(induct ws arbitrary: s rule: rev_induct, fastforce)
case (snoc a ws)
have "ws⋅[a] ≠ ε"
by blast
have "last (ws⋅[a]) = a"
by simp
from last_in_set[OF ‹ws⋅[a] ≠ ε›, unfolded this] ‹Lyndon_mono (ws ⋅ [a])›[unfolded Lyndon_mono_def]
have "Lyndon a"
by blast
show ?case
proof(cases "s ≤ns a")
case True
from Lyndon_suf_le[OF ‹Lyndon a›] this
show ?thesis
by simp
next
case False
hence "ws ≠ ε"
using snoc.prems(2) by force
obtain s' where "s = s'⋅a"
using False snoc.prems(2)[unfolded concat_append[of ws "[a]", unfolded concat_sing']] suffix_append[of s "concat ws" a]
unfolding nonempty_suffix_def
by blast
hence "s' ≤ns concat ws"
using False snoc.prems(2) by fastforce
have "Lyndon_mono ws"
using ‹Lyndon_mono (ws⋅[a])› Lyndon_mono_fac_Lyndon_mono
by blast
from snoc.hyps[OF this ‹s' ≤ns concat ws›]
have "last ws ≤lex s'"
by auto
hence "last ws ≤lex s'⋅a"
using local.lexordp_eq_trans ord.lexordp_eq_pref by blast
have "a ≤lex last ws"
using ‹Lyndon_mono (ws⋅[a])›
unfolding Lyndon_mono_def
by (simp add: ‹ws ≠ ε› last_ConsL)
from dual_rlex.order_trans[OF ‹last ws ≤lex s' ⋅ a› this, folded ‹s = s' ⋅ a›]
show ?thesis
unfolding ‹last (ws⋅[a]) = a›
by blast
qed
qed
text‹A monotone list, if seen as a factorization,
must end with the longest suffix›
lemma Lyndon_mono_last_longest: assumes "ws ≠ ε" and "Lyndon_mono ws"
shows "LynSuf (concat ws) = last ws"
proof-
have "Lyndon (last ws)"
using Lyndon_mono_set assms(1) assms(2) last_in_set by blast
hence "last ws ≠ ε"
using LyndonD_nemp by blast
hence "last ws ≤ns LynSuf (concat ws)"
using longest_Lyndon_suf_max[OF concat_last_suf[OF assms(1)] ‹Lyndon (last ws)›]
unfolding nonempty_suffix_def
by blast
have "concat ws ≠ ε"
using Lyndon.simps assms(2)[unfolded Lyndon_mono_def] set_nemp_concat_nemp[OF assms(1)]
by blast
from longest_Lyndon_suf_nemp[OF this] longest_Lyndon_suf_suf[OF this]
have "LynSuf (concat ws) ≤ns concat ws"
unfolding nonempty_suffix_def
by simp
show ?thesis
using Lyndon_mono_last_smallest[OF ‹Lyndon_mono ws› ‹LynSuf (concat ws) ≤ns concat ws›]
Lyndon_suf_le[OF longest_Lyndon_suf_Lyndon[OF ‹concat ws ≠ ε›], OF ‹last ws ≤ns LynSuf (concat ws)›]
eq_iff
by simp
qed
text‹Therefore, by construction,
any monotone list is the Lyndon factorization of its concatenation›
lemma Lyndon_mono_fac:
"Lyndon_mono ws ⟹ ws = Lyndon_fac (concat ws)"
proof (induct ws rule: rev_induct, simp)
case (snoc x xs)
have "Lyndon_mono xs"
using ‹Lyndon_mono (xs ⋅ [x])›
unfolding Lyndon_mono_def
by simp
from snoc.hyps[OF this]
have "xs = LynFac (concat xs)".
have "x = LynSuf (concat (xs ⋅ [x]))"
using Lyndon_mono_last_longest[OF _ ‹Lyndon_mono (xs ⋅ [x])›, unfolded last_snoc] by simp
have "concat (xs ⋅ [x])⇧<¯x = concat xs"
by simp
have "concat (xs ⋅ [x]) ≠ ε"
using Lyndon_mono_set snoc.prems by auto
from this
show ?case
using Lyndon_fac.simps[of "concat (xs ⋅ [x])", folded ‹x = LynSuf (concat (xs ⋅ [x]))›,
unfolded ‹concat (xs ⋅ [x])⇧<¯x = concat xs›, folded ‹xs = LynFac (concat xs)›]
by presburger
qed
text‹This implies that the Lyndon factorization can be characterized
in two equivalent ways: as the (unique) monotone factorization (into Lyndon words) or as
the "suffix greedy" factorization (into Lyndon words).
›
corollary Lyndon_mono_fac_iff: "Lyndon_mono ws ⟷ ws = LynFac (concat ws)"
using Lyndon_mono_fac fac_Lyndon_mono[of "concat ws"] by fastforce
corollary Lyndon_mono_unique: assumes "Lyndon_mono ws" and "Lyndon_mono zs" and "concat ws = concat zs"
shows "ws = zs"
using Lyndon_mono_fac[OF ‹Lyndon_mono ws›] Lyndon_mono_fac[OF ‹Lyndon_mono zs›]
unfolding ‹concat ws = concat zs› by simp
subsection "Standard factorization"
lemma Lyndon_std: assumes "Lyndon w" "1 < ❙|w❙|"
obtains l m where "w = l⋅m" and "Lyndon l" and "Lyndon m" and "l <lex m"
proof-
have "w ≠ ε" "tl w ≠ ε"
using ‹1 < ❙|w❙|› long_list_tl by auto
define m where "m = LynSuf (tl w)"
hence "Lyndon m"
using ‹tl w ≠ ε› local.longest_Lyndon_suf_Lyndon by blast
have "m ≤s w"
unfolding m_def
using suffix_order.trans[OF longest_Lyndon_suf_suf[OF ‹tl w ≠ ε›] suffix_tl[of w]].
moreover have "m ≠ w"
unfolding m_def using hd_tl[OF ‹w ≠ ε›] longest_Lyndon_suf_suf[OF ‹tl w ≠ ε›] same_suffix_nil
not_Cons_self2 by metis
ultimately obtain l where "w = l⋅m" and "l ≠ ε"
by (auto simp add: suffix_def)
have "Lyndon l"
proof (rule unbordered_pref_Lyndon[OF ‹Lyndon w›[unfolded ‹w = l⋅m›] ‹l ≠ ε›], rule)
assume "bordered l"
from unbordered_border[OF this, unfolded border_def]
obtain s where "s ≠ ε" and "s ≠ l" and "s ≤p l" and "s ≤s l" and "¬ bordered s"
by blast
have "Lyndon s"
using unbordered_pref_Lyndon[OF _ ‹s ≠ ε› ‹¬ bordered s›, of "s¯⇧>l⋅m", unfolded lassoc lq_pref[OF ‹s ≤p l›]]
‹Lyndon w›[unfolded ‹w = l ⋅ m›] by blast
have "s <lex m"
using Lyndon_pref_suf_less[OF ‹Lyndon w› _ nsI[OF LyndonD_nemp[OF ‹Lyndon m›] ‹m ≤s w›]
‹m ≠ w›, of s] Lyndon.elims(2)[OF ‹Lyndon m›]
‹s ≤p l› prefix_append[of s l m, folded ‹w = l ⋅ m›]
by presburger
from Lyndon_concat[OF ‹Lyndon s› ‹Lyndon m› this]
have "Lyndon (s⋅m)".
moreover have "s⋅m ≤s tl w"
unfolding ‹w = l ⋅ m› using ‹s ≠ l› ‹s ≤s l› list.collapse[OF ‹w ≠ ε›, unfolded ‹w = l ⋅ m›]
by (auto simp add: suffix_def)
ultimately show False
using m_def ‹s ≠ ε› longest_Lyndon_suf_max same_suffix_nil by blast
qed
have "l <lex m"
using Lyndon_pref_suf_less[OF ‹Lyndon w› prefI[OF ‹w = l ⋅ m›[symmetric]]
nsI[OF longest_Lyndon_suf_nemp[OF ‹tl w ≠ ε›, folded m_def] ‹m ≤s w›] ‹m ≠ w›].
from that[OF ‹w = l ⋅ m› ‹Lyndon l› ‹Lyndon m› this]
show thesis.
qed
corollary Lyndon_std_iff:
"Lyndon w ⟷ (❙|w❙| = 1 ∨ (∃ l m. w = l⋅m ∧ Lyndon l ∧ Lyndon m ∧ l <lex m))"
(is "?L ⟷ ?R")
proof
assume ?L
show ?R
using Lyndon_std[OF ‹Lyndon w›]
nemp_le_len[OF LyndonD_nemp[OF ‹Lyndon w›], unfolded le_eq_less_or_eq]
by metis
next
assume ?R
thus ?L
proof(rule disjE, fastforce)
show ‹∃l m. w = l ⋅ m ∧ Lyndon l ∧ Lyndon m ∧ l <lex m ⟹ Lyndon w›
using Lyndon_concat by blast
qed
qed
end
end