Theory Lyndon
theory Lyndon
imports Combinatorics_Words.CoWBasic
begin
chapter "Lyndon words"
text‹A Lyndon word is a non-empty word that is lexicographically
strictly smaller than any other word in its conjugacy class, i.e., than any its rotations.
They are named after R. Lyndon who introduced them in \cite{Lyndon54} as ``standard'' sequences.
We present elementary results on Lyndon words, mostly covered by results in \cite[Chapter 5]{Lo83} and \cite{Duval80,Duval83}.
This definition assumes a linear order on letters given by the context.
›
section "Definition and elementary properties"
subsection "Underlying order"
lemma (in linorder) lexordp_mid_pref: "ord_class.lexordp u v ⟹ ord_class.lexordp v (u⋅s) ⟹
u ≤p v"
by (induct rule: lexordp_induct, simp_all)
lemma (in linorder) lexordp_ext: "ord_class.lexordp u v ⟹ ¬ u ≤p v ⟹
ord_class.lexordp (u⋅w) (v⋅z)"
by (induct rule: lexordp_induct, simp_all)
context linorder
begin
abbreviation Lyndon_less :: "'a list ⇒ 'a list ⇒ bool" (infixl "<lex" 50)
where "Lyndon_less xs ys ≡ ord_class.lexordp xs ys"
abbreviation Lyndon_le :: "'a list ⇒ 'a list ⇒ bool" (infixl "≤lex" 50)
where "Lyndon_le xs ys ≡ ord_class.lexordp_eq xs ys"
interpretation rlex: linorder "(≤lex)" "(<lex)"
using lexordp_linorder.
interpretation dual_rlex: linorder "λ x y. y ≤lex x" "λ x y. y <lex x"
using rlex.dual_linorder.
lemma sorted_dual_rev_iff: "dual_rlex.sorted ws ⟷ rlex.sorted (rev ws)"
unfolding rlex.sorted_rev_iff_nth_mono dual_rlex.sorted_iff_nth_mono by blast
text ‹Several useful lemmas that are formulated for relations, interpreted for the default linear order.›
lemmas lexord_suf_linorder = lexord_sufE[of _ _ _ _ "{(x, y). x < y}", folded lexordp_conv_lexord]
and lexord_append_leftI_linorder = lexord_append_leftI[of _ _ "{(x, y). x < y}" _, folded lexordp_conv_lexord]
and lexord_app_right_linorder = lexord_sufI[of _ _ "{(x, y). x < y}" _, folded lexordp_conv_lexord]
and lexord_take_index_conv_linorder = lexord_take_index_conv[of _ _ "{(x, y). x < y}", folded lexordp_conv_lexord]
and mismatch_lexord_linorder = mismatch_lexord[of _ _ "{(x, y). x < y}", folded lexordp_conv_lexord]
and lexord_cancel_right_linorder = lexord_cancel_right[of _ _ _ _ "{(a,b). a < b}", folded lexordp_conv_lexord]
subsection "Lyndon word definition"
fun Lyndon :: "'a list ⇒ bool"
where "Lyndon w = (w ≠ ε ∧ (∀n. 0 < n ∧ n < ❙|w❙| ⟶ w <lex rotate n w))"
lemma LyndonD: "Lyndon w ⟹ 0 < n ⟹ n < ❙|w❙| ⟹ w <lex rotate n w"
unfolding Lyndon.simps by blast
lemma LyndonD_nemp: "Lyndon w ⟹ w ≠ ε"
unfolding Lyndon.simps by blast
lemma LyndonI: "w ≠ ε ⟹ ∀ n. 0 < n ∧ n < ❙|w❙| ⟶ w <lex rotate n w ⟹ Lyndon w"
unfolding Lyndon.simps by blast
lemma Lyndon_sing: "Lyndon [a]"
unfolding Lyndon.simps by auto
lemma Lyndon_prim: assumes "Lyndon w"
shows "primitive w"
proof-
have "0 < n ⟹ n < ❙|w❙| ⟹ rotate n w ≠ w" for n
using LyndonD[OF ‹Lyndon w›, of n] rlex.less_irrefl[of w] by argo
from no_rotate_prim[OF LyndonD_nemp[OF ‹Lyndon w›]] this
show ?thesis by blast
qed
lemma Lyndon_conj_greater: "Lyndon (u⋅v) ⟹ u ≠ ε ⟹ v ≠ ε ⟹ u⋅v <lex v⋅u"
using LyndonD[of "u⋅v" "❙|u❙|", unfolded rotate_append[of u v]]
by force
subsection "Code equations for Lyndon words"
primrec Lyndon_rec :: "'a list ⇒ nat ⇒ bool" where
"Lyndon_rec w 0 = True" |
"Lyndon_rec w (Suc n) = (if w <lex rotate (Suc n) w then Lyndon_rec w n else False)"
lemma Lyndon_rec_all: assumes "Lyndon_rec (a # w) (❙|w❙|)"
shows "n < ❙|a#w❙| ⟹ Lyndon_rec (a#w) n"
proof(induction n rule: strict_inc_induct)
case (base i)
then show ?case
using assms by auto
next
case (step i)
then show ?case
by (meson Lyndon_rec.simps(2) zero_less_Suc)
qed
lemma Lyndon_Lyndon_rec: assumes "Lyndon w"
shows "n < ❙|w❙| ⟹ Lyndon_rec w n"
proof(induction n, simp)
case (Suc n)
have "w <lex rotate (Suc n) w"
using LyndonD Suc.prems(1) assms by blast
then show ?case
using Suc.IH[OF Suc_lessD[OF ‹Suc n < ❙|w❙|›], folded neq0_conv] Lyndon_rec.simps(1)[of w]
unfolding Lyndon_rec.simps(2)
by metis
qed
lemma Lyndon_code [code]:
"Lyndon Nil = False"
"Lyndon (a # w) = Lyndon_rec (a # w) (❙|w❙|)"
proof-
show "Lyndon Nil = False" by simp
have "a # w ≠ ε"
by simp
have ax: "0 < n ⟹ Lyndon_rec (a#w) n ⟹ (a#w) <lex rotate n (a#w)" for n
using Lyndon_rec.simps(2)[of "a#w"] gr0_implies_Suc[of n] by metis
have bx: "Lyndon_rec (a # w) (❙|w❙|) = (∀n. n < ❙|a#w❙| ⟶ Lyndon_rec (a#w) n)"
using Lyndon_rec_all by auto
show "Lyndon (a # w) = Lyndon_rec (a # w) ❙|w❙|"
unfolding bx Lyndon.simps
using ax LyndonI[OF ‹a # w ≠ ε›]Lyndon_Lyndon_rec by blast
qed
subsection "Properties of Lyndon words"
subsubsection "Lyndon words are unbordered"
theorem Lyndon_unbordered: assumes "Lyndon w" shows "¬ bordered w"
proof
assume "bordered w"
from bordered_dec[OF this]
obtain u v where "u⋅v⋅u = w" and "u ≠ ε".
hence "v ⋅ u ≠ ε" and "u ⋅ v ≠ ε" by blast+
note lyn = ‹Lyndon w›[folded ‹u⋅v⋅u = w›]
have "u⋅v⋅u <lex v⋅u⋅u"
using Lyndon_conj_greater[of u "v⋅u", OF lyn ‹u ≠ ε› ‹v ⋅ u ≠ ε›, unfolded rassoc].
from this[unfolded lassoc]
have "u ⋅ v ≠ v ⋅ u"
by force
from lexord_suf_linorder[OF _ this, of u u]
have "u⋅v <lex v⋅u"
using ‹u⋅v⋅u <lex v⋅u⋅u› by simp
from lexord_append_leftI_linorder[of "u⋅v" "v⋅u", unfolded lassoc, OF this, unfolded rassoc]
have "u⋅u⋅v <lex u⋅v⋅u".
from this Lyndon_conj_greater[of "u⋅v" u, unfolded rassoc, OF lyn ‹u ⋅ v ≠ ε› ‹u ≠ ε›]
show False
by simp
qed
subsubsection "Each conjugacy class contains a Lyndon word"
lemma conjug_Lyndon_ex: assumes "primitive w"
obtains n where "Lyndon (rotate n w)"
proof-
have "w ≠ ε"
using prim_nemp[OF ‹primitive w›].
let ?ConClass = "{rotate n w | n. 0 ≤ n ∧ n < ❙|w❙|}"
have "?ConClass ≠ {}"
using ‹w ≠ ε› by blast
have "finite ?ConClass"
by force
have "w ∈ ?ConClass"
by (rule CollectI)
(use le0[of 0] nemp_len[OF ‹w ≠ ε›] id_apply[of w, folded rotate0] in metis)
have all_rot: "rotate m w ∈ ?ConClass" for m
using rotate_conv_mod[of _ w] mod_less_divisor[of "❙|w❙|"] ‹w ≠ ε›
by blast
obtain w' n where "w' ∈ ?ConClass" and all_b: "∀ b ∈ ?ConClass. b ≤lex w' ⟶ w' = b" and w': "w' = rotate n w"
using rlex.finite_has_minimal[OF ‹finite ?ConClass› ‹?ConClass ≠ {}›] by auto
have "rotate n w <lex rotate na (rotate n w)" if "0 < na" and "na < ❙|w❙|" for na
proof-
from prim_no_rotate[OF assms[unfolded prim_rotate_conv[of w n]], of na] ‹na < ❙|w❙|› ‹0 < na›
have "rotate na (rotate n w) ≠ rotate n w" by force
hence "¬ rotate na (rotate n w) ≤lex rotate n w"
using all_b[rule_format, OF all_rot[of "na + n", folded rotate_rotate[of na n w]]] unfolding w'
by auto
from rlex.not_le_imp_less[OF this]
show "rotate n w <lex rotate na (rotate n w)".
qed
hence "Lyndon (rotate n w)"
using ‹w ≠ ε› by auto
from that[OF this] show thesis.
qed
lemma conjug_Lyndon_ex': assumes "primitive w"
obtains v where "w ∼ v" and "Lyndon v"
unfolding conjug_rotate_iff
using conjug_Lyndon_ex[OF ‹primitive w›]
by metis
section "Characterization by suffixes"
lemma Lyndon_suf_less: assumes "Lyndon w" "s ≤s w" "s ≠ ε" "s ≠ w"
shows "w <lex s"
proof-
define p where "p = take ❙|s❙| w"
have "❙|s❙| ≤ ❙|w❙|"
using ‹s ≤s w›
by (simp add: suffix_length_le)
have "p ≤p w" and "❙|p❙| = ❙|s❙|"
unfolding p_def
using take_is_prefix ‹❙|s❙| ≤ ❙|w❙|› take_len by blast+
hence "p ≠ s"
using Lyndon_unbordered[OF ‹Lyndon w›] ‹s ≤s w› ‹s ≠ w› ‹s ≠ ε›
by auto
define p' s' where "p' = drop ❙|s❙| w" and "s' = take ❙|p'❙| w"
have "p ⋅ p' = w"
unfolding p'_def p_def s'_def by simp
have "s' ⋅ s = w"
unfolding p'_def p_def s'_def
using suf_len[OF ‹s ≤s w›] ‹s ≤s w›
length_drop suffix_take by metis
have "❙|p'❙| = ❙|s'❙|"
using s'_def ‹p⋅p' = w› by auto
have "w <lex s⋅s'"
using Lyndon_conj_greater[of s' s, unfolded ‹s' ⋅ s = w›, OF ‹Lyndon w›] ‹p ≠ s›
unfolding ‹s' ⋅ s = w› p_def using ‹s' ⋅ s = w› assms(3) by fastforce
from lexord_suf_linorder[OF _ ‹p ≠ s› ‹❙|p❙| = ❙|s❙|› ‹❙|p'❙| = ❙|s'❙|›, OF this[folded ‹p ⋅ p' = w›]]
have "p <lex s".
from lexord_app_right_linorder[OF this, of p' ε, unfolded ‹p ⋅ p' = w›] ‹❙|p❙| = ❙|s❙|›
show "w <lex s"
by simp
qed
lemma Lyndon_pref_suf_less: assumes "Lyndon w" "p ≤p w" "s ≤s w" "s ≠ ε" "s ≠ w"
shows "p <lex s"
proof(cases "p = w", simp add: Lyndon_suf_less[OF assms(1) assms(3-5)])
assume "p ≠ w"
show "p <lex s"
proof(rule rlex.less_trans)
show "p <lex w"
using ‹p ≠ w› lexordp_append_rightI[of "p¯⇧>w" p]
lq_pref[OF ‹p ≤p w›] by force
show "w <lex s"
using Lyndon_suf_less assms by blast
qed
qed
lemma suf_less_Lyndon: assumes "w ≠ ε" and "∀s. (s ≤s w ⟶ s ≠ ε ⟶ s ≠ w ⟶ w <lex s)"
shows "Lyndon w"
proof-
have "primitive w"
proof (rule ccontr)
assume "¬ primitive w"
obtain q k where "q ≠ ε" "1 < k" "q⇧@k=w" "w≠q"
using non_prim[OF ‹¬ primitive w› ‹w ≠ ε›] by blast
hence "q ≤s w" "q ≠ ε" "q ≠ w"
unfolding pow_list_eq_if[of k q] pow_comm[symmetric]
using sufI[of "q ⇧@ (k - 1)" q w]
by presburger+
hence "w <lex q"
using assms(2) by blast
have "q <p w"
using ‹1 < k› ‹q ⇧@ k = w›
unfolding pow_list_eq_if[of k q] pow_list_eq_if[of "k-1" q]
using ‹w ≠ ε› by auto
from lexordp_append_rightI[of "q¯⇧>w" q,
unfolded lq_spref[OF this], OF lq_spref_nemp[OF this]]
have "q <lex w".
thus False
using ‹w <lex q› by simp
qed
have "w <lex rotate l w" if assms_l: "0 < l" "l < ❙|w❙|" for l
proof-
have "take l w ≤p w" and "take l w ≠ ε" and "❙|take l w❙| = l"
using assms_l take_is_prefix ‹l < ❙|w❙|› by auto
have "drop l w ≤s w" "drop l w ≠ ε"
using ‹l < ❙|w❙|› suffix_drop by auto
have "drop l w ≠ w"
using append_take_drop_id[of l w] ‹take l w ≠ ε› by force
have "drop l w ⋅ take l w = rotate l w"
using rotate_append[of "take l w" "drop l w", symmetric, unfolded ‹❙|take l w❙| = l›,
unfolded append_take_drop_id].
have "w <lex drop l w"
using ‹drop l w ≤s w› ‹drop l w ≠ w› ‹drop l w ≠ ε› assms(2) by blast
from lexord_app_right_linorder[OF this suffix_length_le[OF ‹drop l w ≤s w›], of ε "take l w",
unfolded append.right_neutral]
have "w <lex drop l w ⋅ take l w".
thus "w <lex rotate l w"
by (simp add: ‹drop l w ⋅ take l w = rotate l w›)
qed
thus "Lyndon w"
by (simp add: ‹w ≠ ε› local.LyndonI)
qed
corollary Lyndon_suf_char: "w ≠ ε ⟹ Lyndon w ⟷ (∀s. s ≤s w ⟶ s ≠ ε ⟶ s ≠ w ⟶ w <lex s)"
using Lyndon_suf_less suf_less_Lyndon by blast
lemma Lyndon_suf_le: "Lyndon w ⟹ s ≤s w ⟹ s ≠ ε ⟹ w ≤lex s"
using Lyndon_suf_less rlex.not_less rlex.order.asym by metis
section "Unbordered prefix of a Lyndon word is Lyndon"
lemma unbordered_pref_Lyndon: "Lyndon (u⋅v) ⟹ u ≠ ε ⟹ ¬ bordered u ⟹ Lyndon u"
unfolding Lyndon_suf_char
proof(standard+)
fix s
assume "Lyndon (u ⋅ v)" and "u ≠ ε" and "¬ bordered u" and "s ≤s u" and "s ≠ ε" and "s ≠ u"
hence "u ⋅ v <lex s ⋅ v"
using Lyndon_suf_less[OF ‹Lyndon (u ⋅ v)›, of "s ⋅ v"] by auto
have "¬ s ≤p u"
using ‹¬ bordered u› ‹s ≤s u› ‹s ≠ ε› ‹s ≠ u› by auto
moreover have "¬ u ≤p s"
using suf_pref_eq[OF ‹s ≤s u›] ‹s ≠ u› by blast
ultimately show "u <lex s"
using lexord_cancel_right_linorder[OF ‹u ⋅ v <lex s ⋅ v›] by blast
qed
section "Concatenation of Lyndon words"
theorem Lyndon_concat: assumes "Lyndon u" and "Lyndon v" and "u <lex v"
shows "Lyndon (u⋅v)"
proof-
have "u⋅v <lex v"
proof(cases "u ≤p v")
assume "u ≤p v"
obtain z where "u⋅z = v" and "z ≤s v" and "z ≠ ε"
using lq_pref[OF ‹u ≤p v›] rlex.less_imp_neq[OF ‹u <lex v›] self_append_conv by force
from Lyndon_suf_less[OF ‹Lyndon v› this(2-3), THEN lexord_append_leftI_linorder, of u]
LyndonD_nemp[OF ‹Lyndon u›] this(1)
show ?thesis
by blast
next
assume "¬ u ≤p v"
then show ?thesis
using local.lexordp_linear[of v "u⋅v"]
local.lexordp_mid_pref[OF ‹u <lex v›,of v]
prefixI[of v u v]
by argo
qed
{ fix z
assume "z ≤s (u⋅v)" "z ≠ ε" "z ≠ u⋅v"
have "u⋅v <lex z"
proof(cases "z ≤s v ∧ z ≠ ε")
assume "z ≤s v ∧ z ≠ ε"
from Lyndon_suf_less[OF ‹Lyndon v› conjunct1[OF this] conjunct2[OF this]]
have "z ≠ v ⟹ v <lex z"
by blast
thus "u⋅v <lex z"
using ‹u ⋅ v <lex v› rlex.less_trans
by fast
next
assume "¬ (z ≤s v ∧ z ≠ ε)"
then obtain z' where "z' ≤s u" "z' ≠ ε" "z' ≠ u" "z'⋅v = z"
using ‹z ≤s u ⋅ v› ‹z ≠ ε› ‹z ≠ u ⋅ v› suffix_append[of z u v]
by force
from Lyndon_suf_less[OF ‹Lyndon u› this(1-3)]
have "u <lex z'".
thus "u⋅v <lex z"
using ‹z' ≤s u› lexord_app_right_linorder[of u z' v v] suffix_length_le[of z' u]
unfolding ‹z' ⋅ v = z›
by blast
qed
}
thus ?thesis
using suf_nemp[OF LyndonD_nemp[OF ‹Lyndon v›], of u, THEN suf_less_Lyndon]
by blast
qed
section "Longest Lyndon suffix"
fun longest_Lyndon_suffix:: "'a list ⇒ 'a list" ("LynSuf") where
"longest_Lyndon_suffix ε = ε" |
"longest_Lyndon_suffix (a#w) = (if Lyndon (a#w) then a#w else longest_Lyndon_suffix w)"
lemma longest_Lyndon_suf_ext: "¬ Lyndon (a # w) ⟹ LynSuf w = LynSuf (a # w)"
using longest_Lyndon_suffix.simps(2) by presburger
lemma longest_Lyndon_suf_suf: "w ≠ ε ⟹ LynSuf w ≤s w"
proof(induction w rule: longest_Lyndon_suffix.induct)
case 1
then show ?case by simp
next
case (2 a w)
show ?case
proof(cases "Lyndon (a#w)")
case True
then show ?thesis by auto
next
case False
from "2.IH"[OF this, unfolded longest_Lyndon_suf_ext[OF this], THEN suffix_ConsI, of a]
Lyndon_sing False
show ?thesis by blast
qed
qed
lemma longest_Lyndon_suf_max:
"v ≤s w ⟹ Lyndon v ⟹ v ≤s (LynSuf w)"
proof(induction w arbitrary: v rule: longest_Lyndon_suffix.induct)
case 1
then show ?case
using longest_Lyndon_suffix.simps(1) by presburger
next
case (2 a w)
show ?case
proof(cases "Lyndon (a#w)")
case True
then show ?thesis
using "2.prems"(1) longest_Lyndon_suffix.simps(2) by presburger
next
case False
have "v ≠ a # w"
using "2.prems"(2) False by blast
from "2.IH"[OF False _ "2.prems"(2), unfolded longest_Lyndon_suf_ext[OF False]]
"2.prems"(1)[unfolded suffix_Cons] this
show ?thesis by fast
qed
qed
lemma longest_Lyndon_suf_Lyndon_id: assumes "Lyndon w"
shows "LynSuf w = w"
proof(cases "w = ε", simp)
case False
from longest_Lyndon_suf_suf[OF this]
suffix_order.order_refl[THEN longest_Lyndon_suf_max[OF _ assms]]
suffix_order.antisym
show ?thesis by blast
qed
lemma longest_Lyndon_suf_longest: "w ≠ ε ⟹ v' ≤s w ⟹ Lyndon v' ⟹ ❙|v'❙| ≤ ❙|(LynSuf w)❙|"
using longest_Lyndon_suf_max suffix_length_le by blast
lemma longest_Lyndon_suf_sing: "LynSuf [a] = [a]"
using Lyndon_sing longest_Lyndon_suf_Lyndon_id by blast
lemma longest_Lyndon_suf_Lyndon: "w ≠ ε ⟹ Lyndon (LynSuf w)"
proof(induction w rule: longest_Lyndon_suffix.induct, blast)
case (2 a w)
show ?case
proof(cases "Lyndon (a#w)")
case True
then show ?thesis
using longest_Lyndon_suf_Lyndon_id by presburger
next
case False
from "2.IH"[OF this, unfolded longest_Lyndon_suf_ext[OF this]] Lyndon_sing
show ?thesis by fastforce
qed
qed
lemma longest_Lyndon_suf_nemp: "w ≠ ε ⟹ LynSuf w ≠ ε"
using longest_Lyndon_suf_Lyndon[THEN LyndonD_nemp].
lemma longest_Lyndon_sufI:
assumes "q ≤s w" and "Lyndon q" and all_s: "(∀ s. (s ≤s w ∧ Lyndon s) ⟶ s ≤s q)"
shows "LynSuf w = q"
proof(cases "w = ε")
case True
then show ?thesis
using assms(1) longest_Lyndon_suffix.simps(1) suffix_bot.extremum_uniqueI by blast
next
case False
from all_s longest_Lyndon_suf_Lyndon[OF this] longest_Lyndon_suf_max[OF assms(1) assms(2)]
longest_Lyndon_suf_suf[OF this] suffix_order.eq_iff
show ?thesis by blast
qed
corollary longest_Lyndon_sufI':
assumes "q ≤s w" and "Lyndon q" and all_s: "∀ s. (s ≤s w ∧ Lyndon s) ⟶ ❙|s❙| ≤ ❙|q❙|"
shows "LynSuf w = q"
using longest_Lyndon_sufI[OF ‹q ≤s w› ‹Lyndon q›] suf_ruler_le all_s ‹q ≤s w› by blast
text‹The next lemma is fabricated to suit the upcoming definition of longest Lyndon factorization.›
lemma longest_Lyndon_suf_shorter: assumes "w ≠ ε"
shows "❙|w⇧<¯(LynSuf w)❙| < ❙|w❙|"
using nemp_len[OF longest_Lyndon_suf_nemp[OF ‹w ≠ ε›]] arg_cong[OF rq_suf[OF longest_Lyndon_suf_suf[OF ‹w ≠ ε›]], of length]
unfolding lenmorph by linarith
section "Lyndon factorizations"
function Lyndon_fac::"'a list ⇒ 'a list list" ("LynFac")
where "Lyndon_fac w = (if w ≠ ε then ((Lyndon_fac (w ⇧<¯(LynSuf w) )) ⋅ [LynSuf w]) else ε)"
using longest_Lyndon_suffix.cases by blast+
termination
proof(relation "measure length", simp)
show "⋀w. w ≠ ε ⟹ (w⇧<¯LynSuf w, w) ∈ measure length"
unfolding measure_def inv_image_def using longest_Lyndon_suf_shorter by blast
qed
text‹The factorization @{term "Lyndon_fac w"} obtained by taking always the longest Lyndon suffix is well defined,
and called ``Lyndon factorization (of $w$)''.›
lemma Lyndon_fac_simp: "w ≠ ε ⟹ Lyndon_fac w = Lyndon_fac (w⇧<¯LynSuf w) ⋅ (LynSuf w # ε)"
using Lyndon_fac.simps[of w] by meson
lemma Lyndon_fac_emp: "Lyndon_fac ε = ε"
by simp
text‹Note that the Lyndon factorization of a Lyndon word is trivial.›
lemma Lyndon_fac_longest_Lyndon_id: "Lyndon w ⟹ Lyndon_fac w = [w]"
by (simp add: longest_Lyndon_suf_Lyndon_id)
text‹Lyndon factorization is composed of Lyndon words ...›
lemma Lyndon_fac_set: "z ∈ set (Lyndon_fac w) ⟹ Lyndon z"
proof(induction w rule: Lyndon_fac.induct)
case (1 w)
then show "Lyndon z"
proof (cases "w = ε")
assume "w ≠ ε"
have "Lyndon_fac w = (Lyndon_fac (w ⇧<¯(LynSuf w) )) ⋅ [LynSuf w]"
using Lyndon_fac_simp[OF ‹w ≠ ε›].
from set_ConsD[OF "1.prems"(1)[unfolded rotate1.simps(2)[of "LynSuf w" "Lyndon_fac (w ⇧<¯(LynSuf w) )", folded this, symmetric], unfolded set_rotate1]]
have "z = LynSuf w ∨ z ∈ set (Lyndon_fac (w ⇧<¯(LynSuf w) ))".
thus "Lyndon z"
using "1.IH"[OF ‹w ≠ ε›] longest_Lyndon_suf_Lyndon[OF ‹w ≠ ε›]
by blast
next
assume "w = ε"
thus "Lyndon z"
using "1.prems"
unfolding Lyndon_fac_emp[folded ‹w = ε›] list.set(1) empty_iff
by blast
qed
qed
text‹...and it indeed is a factorization of the argument.›
lemma Lyndon_fac_longest_dec: "concat (Lyndon_fac w) = w"
proof(induction w rule: Lyndon_fac.induct)
case (1 w)
thus "concat (LynFac w) = w"
proof (cases "w = ε", simp)
assume "w ≠ ε"
have eq: "concat (Lyndon_fac w) = concat ( (Lyndon_fac (w ⇧<¯(LynSuf w) )) ) ⋅ concat ([LynSuf w])"
unfolding Lyndon_fac_simp[OF ‹w ≠ ε›] concat_morph..
from this[unfolded "1.IH"[OF ‹w ≠ ε›] concat_sing' rq_suf[OF longest_Lyndon_suf_suf[OF ‹w ≠ ε›]]]
show ?case.
qed
qed
text‹The following lemma makes explicit the inductive character of the definition of @{term Lyndon_fac}.›
lemma Lyndon_fac_longest_pref: "us ≤p Lyndon_fac w ⟹ Lyndon_fac (concat us) = us"
proof(induction w arbitrary: us rule: Lyndon_fac.induct)
case (1 w)
thus "LynFac (concat us) = us"
proof (cases "w = ε", simp)
assume "w ≠ ε"
have step: "Lyndon_fac w = (Lyndon_fac (w ⇧<¯(LynSuf w))) ⋅ [LynSuf w]"
using Lyndon_fac_simp[OF ‹w ≠ ε›].
consider (neq) "us ≠ Lyndon_fac w" | (eq) "us = Lyndon_fac w"
using "1.prems" le_neq_implies_less by blast
then show "LynFac (concat us) = us"
proof(cases)
case neq
hence "us ≤p Lyndon_fac (w⇧<¯LynSuf w)"
using "1.prems" last_no_split[of us "Lyndon_fac (w⇧<¯LynSuf w)" "LynSuf w"]
unfolding step[symmetric] by blast
thus "LynFac (concat us) = us"
using "1.IH" ‹w ≠ ε› by blast
next
case eq
show "LynFac (concat us) = us"
using Lyndon_fac_longest_dec[of w, folded eq] eq by simp
qed
qed
qed
text‹We give name to an important predicate: monotone (nonincreasing) list of Lyndon words.›
definition Lyndon_mono :: "'a list list ⇒ bool" where
"Lyndon_mono ws ⟷ (∀ u ∈ set ws. Lyndon u) ∧ (rlex.sorted (rev ws))"
lemma Lyndon_mono_set: "Lyndon_mono ws ⟹ u ∈ set ws ⟹ Lyndon u"
unfolding Lyndon_mono_def by blast
lemma Lyndon_mono_sorted: "Lyndon_mono ws ⟹ rlex.sorted (rev ws)"
unfolding Lyndon_mono_def by blast
lemma Lyndon_mono_nth: "Lyndon_mono ws ⟹ i ≤ j ⟹ j < ❙|ws❙| ⟹ ws!j ≤lex ws!i"
unfolding Lyndon_mono_def using rlex.sorted_rev_nth_mono by blast
lemma Lyndon_mono_empty[simp]: "Lyndon_mono ε"
unfolding Lyndon_mono_def by auto
lemma Lyndon_mono_sing: "Lyndon u ⟹ Lyndon_mono [u]"
unfolding Lyndon_mono_def by auto
lemma Lyndon_mono_fac_Lyndon_mono:
assumes "ps ≤f ws" and "Lyndon_mono ws" shows "Lyndon_mono ps"
unfolding Lyndon_mono_def
proof
show "∀x ∈ (set ps). Lyndon x"
using ‹Lyndon_mono ws›[unfolded Lyndon_mono_def] set_mono_sublist[OF ‹ps ≤f ws›] by blast
show "linorder.sorted (≤lex) (rev ps)"
using rlex.sorted_append ‹Lyndon_mono ws›[unfolded Lyndon_mono_def] ‹ps ≤f ws›[unfolded sublist_def] by fastforce
qed
text‹Lyndon factorization is monotone!
Altogether, we have shown that the Lyndon factorization is a monotone factorization
into Lyndon words.›
theorem fac_Lyndon_mono: "Lyndon_mono (Lyndon_fac w)"
proof (induct "Lyndon_fac w" arbitrary: w rule: rev_induct, simp)
case (snoc x xs)
have "Lyndon x"
using Lyndon_fac_set[of x, unfolded in_set_conv_decomp, of w, folded snoc.hyps(2)] by fast
have "concat (xs ⋅ [x]) = w"
using Lyndon_fac_longest_dec[of w, folded snoc.hyps(2)] by auto
then show "Lyndon_mono (LynFac w)"
proof (cases "xs = ε")
assume "xs = ε"
show "Lyndon_mono (LynFac w)"
unfolding Lyndon_mono_def ‹xs ⋅ [x] = LynFac w›[symmetric] ‹xs = ε› append.left_neutral
rlex.sorted1[of x]
using ‹Lyndon x› by force
next
assume "xs ≠ ε"
have "concat (xs ⋅ [x]) ≠ ε" and "w ≠ ε"
using Lyndon_fac_longest_dec snoc.hyps(2) by auto
have "x = LynSuf w" and "xs = LynFac (w⇧<¯LynSuf w )"
using Lyndon_fac.simps[of w, folded snoc.hyps(2)] ‹w ≠ ε›
Lyndon_fac_longest_dec append1_eq_conv[of xs x "LynFac (w⇧<¯LynSuf w )" "LynSuf w"] by presburger+
from Lyndon_mono_sorted[OF snoc.hyps(1)[OF ‹xs = LynFac (w⇧<¯LynSuf w )›], folded this]
have "dual_rlex.sorted xs"
unfolding sorted_dual_rev_iff.
have "Lyndon (last xs)"
using Lyndon_fac_set[of "last xs" "w⇧<¯LynSuf w", folded ‹xs = LynFac (w⇧<¯LynSuf w)›, OF last_in_set[OF ‹xs ≠ ε›]].
have "x ≤lex last xs"
proof(rule ccontr)
assume "¬ x ≤lex last xs" hence "last xs <lex x" by auto
from Lyndon_concat[OF ‹Lyndon (last xs)› ‹Lyndon x› this]
have "Lyndon ((last xs) ⋅ x)".
have "(last xs) ⋅ x ≤s concat (xs ⋅ [x])"
using ‹xs ≠ ε› concat_last_suf by auto
from longest_Lyndon_suf_longest[OF ‹concat (xs ⋅ [x]) ≠ ε› this ‹Lyndon ((last xs) ⋅ x)›,
unfolded ‹concat (xs ⋅ [x]) = w›, folded ‹x = LynSuf w›]
show False
using ‹Lyndon (last xs)› by simp
qed
have "dual_rlex.sorted (butlast xs ⋅ [last xs])"
by (simp add: ‹linorder.sorted (λx y. y ≤lex x) xs› ‹xs ≠ ε›)
from this ‹x ≤lex last xs›
have "dual_rlex.sorted (butlast xs ⋅ [last xs,x])"
using dual_rlex.sorted_append by auto
from this[unfolded hd_word[of "last xs" "[x]"] lassoc append_butlast_last_id[OF ‹xs ≠ ε›]]
have "rlex.sorted (rev (LynFac w))"
unfolding ‹xs ⋅ [x] = LynFac w›[symmetric] sorted_dual_rev_iff[symmetric].
thus "Lyndon_mono (LynFac w)"
unfolding Lyndon_mono_def using Lyndon_fac_set by blast
qed
qed
text‹Now we want to show the converse: any monotone factorization into Lyndon words is the Lyndon factorization›
text‹The last element in the Lyndon factorization is the smallest suffix.›
lemma Lyndon_mono_last_smallest: "Lyndon_mono ws ⟹ s ≤s (concat ws) ⟹ s ≠ ε ⟹ last ws ≤lex s"
proof(induct ws arbitrary: s rule: rev_induct, fastforce)
case (snoc a ws)
have "ws⋅[a] ≠ ε"
by blast
have "last (ws⋅[a]) = a"
by simp
from last_in_set[OF ‹ws⋅[a] ≠ ε›, unfolded this] ‹Lyndon_mono (ws ⋅ [a])›[unfolded Lyndon_mono_def]
have "Lyndon a"
by blast
show ?case
proof(cases "s ≤s a ∧ s ≠ ε")
case True
from Lyndon_suf_le[OF ‹Lyndon a›] this
show ?thesis
by simp
next
case False
hence "ws ≠ ε"
using snoc.prems(2-3) by force
obtain s' where "s = s'⋅a"
using False snoc.prems(2-3)[unfolded concat_append[of ws "[a]", unfolded concat_sing']] suffix_append[of s "concat ws" a]
by blast
hence "s' ≤s concat ws"
using False snoc.prems(2) by fastforce
have "Lyndon_mono ws"
using ‹Lyndon_mono (ws⋅[a])› Lyndon_mono_fac_Lyndon_mono
by blast
from snoc.hyps[OF this ‹s' ≤s concat ws›]
have "last ws ≤lex s'"
using False ‹s = s' ⋅ a› same_suffix_nil snoc.prems(3) by blast
hence "last ws ≤lex s'⋅a"
using local.lexordp_eq_trans ord.lexordp_eq_pref by blast
have "a ≤lex last ws"
using ‹Lyndon_mono (ws⋅[a])›
unfolding Lyndon_mono_def
by (simp add: ‹ws ≠ ε› last_ConsL)
from dual_rlex.order_trans[OF ‹last ws ≤lex s' ⋅ a› this, folded ‹s = s' ⋅ a›]
show ?thesis
unfolding ‹last (ws⋅[a]) = a›
by blast
qed
qed
text‹A monotone list, if seen as a factorization,
must end with the longest suffix›
lemma Lyndon_mono_last_longest: assumes "ws ≠ ε" and "Lyndon_mono ws"
shows "LynSuf (concat ws) = last ws"
proof-
have "Lyndon (last ws)"
using Lyndon_mono_set assms(1) assms(2) last_in_set by blast
hence "last ws ≠ ε"
using LyndonD_nemp by blast
hence "last ws ≤s LynSuf (concat ws)"
using longest_Lyndon_suf_max[OF concat_last_suf[OF assms(1)] ‹Lyndon (last ws)›]
by blast
have "concat ws ≠ ε"
using Lyndon.simps assms(2)[unfolded Lyndon_mono_def] set_nemp_concat_nemp[OF assms(1)]
by blast
from longest_Lyndon_suf_nemp[OF this] longest_Lyndon_suf_suf[OF this]
have "LynSuf (concat ws) ≤s concat ws"
by simp
show ?thesis
using Lyndon_mono_last_smallest[OF ‹Lyndon_mono ws› ‹LynSuf (concat ws) ≤s concat ws›]
Lyndon_suf_le[OF longest_Lyndon_suf_Lyndon[OF ‹concat ws ≠ ε›], OF ‹last ws ≤s LynSuf (concat ws)›]
eq_iff ‹LynSuf (concat ws) ≠ ε› ‹last ws ≠ ε›
by simp
qed
text‹Therefore, by construction,
any monotone list is the Lyndon factorization of its concatenation›
lemma Lyndon_mono_fac:
"Lyndon_mono ws ⟹ ws = Lyndon_fac (concat ws)"
proof (induct ws rule: rev_induct, simp)
case (snoc x xs)
have "Lyndon_mono xs"
using ‹Lyndon_mono (xs ⋅ [x])›
unfolding Lyndon_mono_def
by simp
from snoc.hyps[OF this]
have "xs = LynFac (concat xs)".
have "x = LynSuf (concat (xs ⋅ [x]))"
using Lyndon_mono_last_longest[OF _ ‹Lyndon_mono (xs ⋅ [x])›, unfolded last_snoc] by simp
have "concat (xs ⋅ [x])⇧<¯x = concat xs"
by simp
have "concat (xs ⋅ [x]) ≠ ε"
using Lyndon_mono_set snoc.prems by auto
from this
show ?case
using Lyndon_fac.simps[of "concat (xs ⋅ [x])", folded ‹x = LynSuf (concat (xs ⋅ [x]))›,
unfolded ‹concat (xs ⋅ [x])⇧<¯x = concat xs›, folded ‹xs = LynFac (concat xs)›]
by presburger
qed
text‹This implies that the Lyndon factorization can be characterized
in two equivalent ways: as the (unique) monotone factorization (into Lyndon words) or as
the "suffix greedy" factorization (into Lyndon words).
›
corollary Lyndon_mono_fac_iff: "Lyndon_mono ws ⟷ ws = LynFac (concat ws)"
using Lyndon_mono_fac fac_Lyndon_mono[of "concat ws"] by fastforce
corollary Lyndon_mono_unique: assumes "Lyndon_mono ws" and "Lyndon_mono zs" and "concat ws = concat zs"
shows "ws = zs"
using Lyndon_mono_fac[OF ‹Lyndon_mono ws›] Lyndon_mono_fac[OF ‹Lyndon_mono zs›]
unfolding ‹concat ws = concat zs› by simp
subsection "Standard factorization"
lemma Lyndon_std: assumes "Lyndon w" "1 < ❙|w❙|"
obtains l m where "w = l⋅m" and "Lyndon l" and "Lyndon m" and "l <lex m"
proof-
have "w ≠ ε" "tl w ≠ ε"
using ‹1 < ❙|w❙|› long_list_tl by auto
define m where "m = LynSuf (tl w)"
hence "Lyndon m"
using ‹tl w ≠ ε› local.longest_Lyndon_suf_Lyndon by blast
have "m ≤s w"
unfolding m_def
using suffix_order.trans[OF longest_Lyndon_suf_suf[OF ‹tl w ≠ ε›] suffix_tl[of w]].
moreover have "m ≠ w"
unfolding m_def using hd_tl[OF ‹w ≠ ε›] longest_Lyndon_suf_suf[OF ‹tl w ≠ ε›] same_suffix_nil
not_Cons_self2 by metis
ultimately obtain l where "w = l⋅m" and "l ≠ ε"
by (auto simp add: suffix_def)
have "Lyndon l"
proof (rule unbordered_pref_Lyndon[OF ‹Lyndon w›[unfolded ‹w = l⋅m›] ‹l ≠ ε›], rule)
assume "bordered l"
from unbordered_border[OF this, unfolded border_def]
obtain s where "s ≠ ε" and "s ≠ l" and "s ≤p l" and "s ≤s l" and "¬ bordered s"
by blast
have "Lyndon s"
using unbordered_pref_Lyndon[OF _ ‹s ≠ ε› ‹¬ bordered s›, of "s¯⇧>l⋅m", unfolded lassoc lq_pref[OF ‹s ≤p l›]]
‹Lyndon w›[unfolded ‹w = l ⋅ m›] by blast
have "s <lex m"
using Lyndon_pref_suf_less[OF ‹Lyndon w› _ ‹m ≤s w› LyndonD_nemp[OF ‹Lyndon m›]
‹m ≠ w›, of s] Lyndon.elims(2)[OF ‹Lyndon m›]
‹s ≤p l› prefix_append[of s l m, folded ‹w = l ⋅ m›] by argo
from Lyndon_concat[OF ‹Lyndon s› ‹Lyndon m› this]
have "Lyndon (s⋅m)".
moreover have "s⋅m ≤s tl w"
unfolding ‹w = l ⋅ m› using ‹s ≠ l› ‹s ≤s l› list.collapse[OF ‹w ≠ ε›, unfolded ‹w = l ⋅ m›]
by (auto simp add: suffix_def)
ultimately show False
using m_def ‹s ≠ ε› longest_Lyndon_suf_max same_suffix_nil by blast
qed
have "l <lex m"
using Lyndon_pref_suf_less[OF ‹Lyndon w› prefI[OF ‹w = l ⋅ m›[symmetric]] ‹m ≤s w›
longest_Lyndon_suf_nemp[OF ‹tl w ≠ ε›, folded m_def] ‹m ≠ w›].
from that[OF ‹w = l ⋅ m› ‹Lyndon l› ‹Lyndon m› this]
show thesis.
qed
corollary Lyndon_std_iff:
"Lyndon w ⟷ (❙|w❙| = 1 ∨ (∃ l m. w = l⋅m ∧ Lyndon l ∧ Lyndon m ∧ l <lex m))"
(is "?L ⟷ ?R")
proof
assume ?L
show ?R
using Lyndon_std[OF ‹Lyndon w›]
nemp_le_len[OF LyndonD_nemp[OF ‹Lyndon w›], unfolded le_eq_less_or_eq]
by metis
next
assume ?R
thus ?L
proof(rule disjE, fastforce)
show ‹∃l m. w = l ⋅ m ∧ Lyndon l ∧ Lyndon m ∧ l <lex m ⟹ Lyndon w›
using Lyndon_concat by blast
qed
qed
end
end