Theory Ta

(*  Title:       Tree Automata
    Author:      Peter Lammich <peter dot lammich at uni-muenster.de>
    Maintainer:  Peter Lammich <peter dot lammich at uni-muenster.de>
*)
section "Tree Automata"
theory Ta
imports Main Automatic_Refinement.Misc Tree
begin
text_raw ‹\label{sec:ta}›


text ‹
  This theory defines tree automata, tree regular languages and 
  specifies basic algorithms.

  Nondeterministic and deterministic (bottom-up) tree automata are defined.

  For non-deterministic tree automata, basic algorithms for membership, union,
  intersection, forward and backward reduction, and emptiness check are 
  specified.
  Moreover, a (brute-force) determinization algorithm is specified.
  
  For deterministic tree automata, we specify algorithms for complement 
  and completion. 

  Finally, the class of regular languages over a given ranked alphabet is defined
  and its standard closure properties are proved.

  The specification of the algorithms in this theory is very high-level, and the
  specifications are not executable. A bit more specific algorithms are defined 
  in Section~\ref{sec:absalgo}, and a refinement to executable definitions is 
  done in Section~\ref{sec:taimpl}.
›

subsection "Basic Definitions"

subsubsection "Tree Automata"

text ‹
  A tree automata consists of a (finite) set of initial states
  and a (finite) set of rules. 

  A rule has the form q → l q1…qn›, 
  with the meaning that one can derive 
  l(q1…qn)› from the state q›.
›

(* Workaround for bug in Haskell-code generator: Type variables have to be 
  lower-case *)
(* datatype ('Q,'L) ta_rule = RULE 'Q 'L "'Q list" ("_ → _ _") *)
datatype ('q,'l) ta_rule = RULE 'q 'l "'q list" (‹_  _ _›)

record ('Q,'L) tree_automaton_rec =
  ta_initial :: "'Q set"
  ta_rules :: "('Q,'L) ta_rule set"

  ― ‹Rule deconstruction›
fun lhs where "lhs (q  l qs) = q"
fun rhsq where "rhsq (q  l qs) = qs"
fun rhsl where "rhsl (q  l qs) = l"
  ― ‹States in a rule›
fun rule_states where "rule_states (q  l qs) = insert q (set qs)"
  ― ‹States in a set of rules›
definition "δ_states δ == (rule_states ` δ)"
  ― ‹States in a tree automaton›
definition "ta_rstates TA = ta_initial TA  δ_states (ta_rules TA)"
  ― ‹Symbols occurring in rules›
definition "δ_symbols δ == rhsl`δ"

  ― ‹Nondeterministic, finite tree automaton (NFTA)›
locale tree_automaton = 
  fixes TA :: "('Q,'L) tree_automaton_rec"
  assumes finite_rules[simp, intro!]: "finite (ta_rules TA)"
  assumes finite_initial[simp, intro!]: "finite (ta_initial TA)"
begin
  abbreviation "Qi == ta_initial TA"
  abbreviation "δ == ta_rules TA"
  abbreviation "Q == ta_rstates TA"
end

subsubsection "Acceptance"
text ‹
  The predicate accs δ t q› is true, iff the tree t› is accepted
  in state q› w.r.t. the rules in δ›.
  
  A tree is accepted in state $q$, if it can be produced from $q$ using the 
  rules.
›
inductive accs :: "('Q,'L) ta_rule set  'L tree  'Q  bool"
where
  "
     (q  f qs)  δ; length ts = length qs; 
     !!i. i<length qs  accs δ (ts ! i) (qs ! i) 
     accs δ (NODE f ts) q"


― ‹Characterization of @{const accs} using @{const list_all_zip}
inductive accs_laz :: "('Q,'L) ta_rule set  'L tree  'Q  bool"
where
  "
     (q  f qs)  δ; 
     list_all_zip (accs_laz δ) ts qs
     accs_laz δ (NODE f ts) q"

lemma accs_laz: "accs = accs_laz"
  apply (intro ext)
  apply (rule iffI)
  apply (erule accs.induct)
  apply (auto intro: accs_laz.intros[simplified list_all_zip_alt]) 
  apply (erule accs_laz.induct)
  apply (auto intro: accs.intros simp add: list_all_zip_alt)
  done


subsubsection "Language"
text ‹
  The language of a tree automaton is the set of all trees that are accepted
  in an initial state.
›
definition "ta_lang TA == { t . qta_initial TA. accs (ta_rules TA) t q }"

subsection "Basic Properties"

lemma rule_states_simp: 
  "rule_states x = (case x of (q  l qs)  insert q (set qs))"
  by (case_tac x) auto

lemma rule_states_lhs[simp]: "lhs r  rule_states r" 
  by (auto split: ta_rule.split simp add: rule_states_simp)

lemma rule_states_rhsq: "set (rhsq r)  rule_states r"
  by (auto split: ta_rule.split simp add: rule_states_simp)

lemma rule_states_finite[simp, intro!]: "finite (rule_states r)"
  by (simp add: rule_states_simp split: ta_rule.split)

lemma δ_statesI: 
  assumes A: "(q  l qs)δ"
  shows "qδ_states δ"
        "set qs  δ_states δ"
  using A
  apply (unfold δ_states_def)
  by (auto split: ta_rule.split simp add: rule_states_simp)

lemma δ_statesI': "(q  l qs)δ; qiset qs  qiδ_states δ"
  using δ_statesI(2) by fast

lemma δ_states_accsI: "accs δ n q  qδ_states δ"
  by (auto elim: accs.cases intro: δ_statesI)

lemma δ_states_union[simp]: "δ_states (δδ') = δ_states δ  δ_states δ'"
  by (auto simp add: δ_states_def)

lemma δ_states_insert[simp]: 
  "δ_states (insert r δ) = (rule_states r  δ_states δ)"
  by (unfold δ_states_def) auto

lemma δ_states_mono: "δ  δ'  δ_states δ  δ_states δ'"
  by (unfold δ_states_def) auto

lemma δ_states_finite[simp, intro]: "finite δ  finite (δ_states δ)"
  by (unfold δ_states_def) auto

lemma δ_statesE: "qδ_states Δ;
    !!f qs.  (q  f qs)Δ   P;
    !!ql f qs.  (ql  f qs)Δ; qset qs   P
    P"
  apply (unfold δ_states_def)
  apply (auto)
  apply (auto simp add: rule_states_simp split: ta_rule.split_asm)
  done

lemma δ_symbolsI: "(q  f qs)δ  fδ_symbols δ" 
  by (force simp add: δ_symbols_def)

lemma δ_symbolsE: 
  assumes A: "fδ_symbols δ"
  obtains q qs where "(q  f qs)  δ"
  using A
  apply (simp add: δ_symbols_def)
  apply (erule imageE)
  apply (case_tac x)
  apply simp
  done

lemma δ_symbols_simps[simp]:
  "δ_symbols {} = {}"
  "δ_symbols (insert r δ) = insert (rhsl r) (δ_symbols δ)"
  "δ_symbols (δδ') = δ_symbols δ  δ_symbols δ'"
  by (auto simp add: δ_symbols_def)

lemma δ_symbols_finite[simp, intro!]:
  "finite δ  finite (δ_symbols δ)"
  by (auto simp add: δ_symbols_def)

lemma accs_mono: "accs δ n q; δδ'  accs δ' n q"
proof (induct rule: accs.induct[case_names step])
  case (step q l qs δ n)
  hence R': "(q  l qs)  δ'" by auto
  from accs.intros[OF R' step.hyps(2)] 
       step.hyps(4)[OF _ step.prems] 
  show ?case .
qed

context tree_automaton
begin
  lemma initial_subset: "ta_initial TA  ta_rstates TA" 
    by (unfold ta_rstates_def) auto
  lemma states_subset: "δ_states (ta_rules TA)  ta_rstates TA" 
    by (unfold ta_rstates_def) auto
  
  lemma finite_states[simp, intro!]: "finite (ta_rstates TA)"
    by (auto simp add: ta_rstates_def δ_states_def 
             intro: finite_rules finite_UN_I)

  lemma finite_symbols[simp, intro!]: "finite (δ_symbols (ta_rules TA))"
    by simp

  lemmas is_subset = rev_subsetD[OF _ initial_subset] 
                     rev_subsetD[OF _ states_subset]
end

subsection "Other Classes of Tree Automata"

subsubsection "Automata over Ranked Alphabets"
  ― ‹All trees over ranked alphabet›
inductive_set ranked_trees :: "('L  nat)  'L tree set"
  for A where
  " tset ts. tranked_trees A; A f = Some (length ts)  
     NODE f ts  ranked_trees A"

locale finite_alphabet =
  fixes A :: "('L  nat)"
  assumes A_finite[simp, intro!]: "finite (dom A)"
begin
  abbreviation "F == dom A"
end

context finite_alphabet
begin

  definition "legal_rules Q == { (q  f qs) | q f qs.
    q  Q
     qs  lists Q
     A f = Some (length qs)}"

  lemma legal_rulesI: 
    " 
      rδ; 
      rule_states r  Q; 
      A (rhsl r) = Some (length (rhsq r)) 
      rlegal_rules Q"
    apply (unfold legal_rules_def)
    apply (cases r)
    apply (auto)
    done

  lemma legal_rules_finite[simp, intro!]:
    fixes Q::"'Q set"
    assumes [simp, intro!]: "finite Q"
    shows "finite (legal_rules Q)"
  proof -
    define possible_rules_f
      where "possible_rules_f = (λ(Q::'Q set) f. 
      (λ(q,qs). (q  f qs)) ` (Q × (lists Q  {qs. A f = Some (length qs)})))"
    
    have "legal_rules Q = (possible_rules_f Q`F)"
      by (auto simp add: legal_rules_def possible_rules_f_def)
    moreover have "!!f. finite (possible_rules_f Q f)" 
      apply (unfold possible_rules_f_def)
      apply (rule finite_imageI)
      apply (rule finite_SigmaI)
      apply simp
      apply (case_tac "A f")
      apply simp
      apply (simp add: lists_of_len_fin)
      done
    ultimately show ?thesis by auto
  qed
end

  ― ‹Finite tree automata with ranked alphabet›
locale ranked_tree_automaton = 
  tree_automaton TA +
  finite_alphabet A
  for TA :: "('Q,'L) tree_automaton_rec" 
  and A :: "'L  nat" +
  assumes ranked: "(q  f qs)δ  A f = Some (length qs)"
begin

  lemma rules_legal: "rδ  rlegal_rules Q"
    apply (rule legal_rulesI)
    apply assumption
    apply (auto simp add: ta_rstates_def δ_states_def) [1]
    apply (case_tac r)
    apply (auto intro: ranked)
    done

    ― ‹Only well-ranked trees are accepted›
  lemma accs_is_ranked: "accs δ t q  tranked_trees A"
    apply (induct δδ t q rule: accs.induct)
    apply (rule ranked_trees.intros)
    apply (auto simp add: set_conv_nth ranked)
    done

    ― ‹The language consists of well-ranked trees›
  theorem lang_is_ranked: "ta_lang TA  ranked_trees A"
    using accs_is_ranked by (auto simp add: ta_lang_def)

end

subsubsection "Deterministic Tree Automata"

  ― ‹Deterministic, (bottom-up) finite tree automaton (DFTA)›
locale det_tree_automaton = ranked_tree_automaton TA A
  for TA :: "('Q,'L) tree_automaton_rec" and A +
  assumes deterministic: " (q  f qs)δ; (q'  f qs)δ   q=q'"
begin
  theorem accs_unique: " accs δ t q; accs δ t q'   q=q'"
    unfolding accs_laz
  proof (induct δδ t q arbitrary: q' rule: accs_laz.induct[case_names step])
    case (step q f qs ts q')
    hence I: 
      "(q  f qs)  δ"
      "list_all_zip (accs_laz δ) ts qs"
      "list_all_zip (λt q. (q'. accs_laz δ t q'  q=q')) ts qs"
      "accs_laz δ (NODE f ts) q'"
      by auto
    from I(4) obtain qs' where A':
      "(q'  f qs')  δ"
      "list_all_zip (accs_laz δ) ts qs'"
      by (auto elim!: accs_laz.cases)

    from I(2,3) A'(2) have "list_all_zip (=) qs qs'"
      by (auto simp add: list_all_zip_alt)
    hence "qs=qs'" by (auto simp add: laz_eq)
    with deterministic[OF I(1), of q'] A'(1) show "q=q'" by simp
  qed
    
end

subsubsection "Complete Tree Automata"

locale complete_tree_automaton = det_tree_automaton TA A 
  for TA :: "('Q,'L) tree_automaton_rec" and A
  +
  assumes complete: 
  " qslists Q; A f = Some (length qs)   q. (q  f qs)δ"
begin

    ― ‹In a complete DFTA, all trees can be labeled by some state›
  theorem label_all: "tranked_trees A  qQ. accs δ t q"
  proof (induct rule: ranked_trees.induct[case_names constr])
    case (constr ts f)
    obtain qs where QS:
      "qslists Q"
      "list_all_zip (accs δ) ts qs" 
      and [simp]: "length qs = length ts"
    proof -
      from constr(1) have "i<length ts. q. qQ  accs δ (ts!i) q" 
        by (auto)
      thus ?thesis
        apply (erule_tac obtain_list_from_elements)
        apply (rule_tac that)
        apply (auto simp add: list_all_zip_alt set_conv_nth)
        done
    qed
    moreover from complete[OF QS(1), simplified, OF constr(2)] obtain q 
      where "(q  f qs) δ" ..
    ultimately show ?case 
      by (auto simp add: accs_laz ta_rstates_def 
               intro: accs_laz.intros δ_statesI)
  qed

end


subsection "Algorithms"
text ‹
  In this section, basic algorithms on tree-automata are specified.
  The specification is a high-level, non-executable specification, intended
  to be refined to more low-level specifications, as done in 
  Sections~\ref{sec:absalgo} and \ref{sec:taimpl}.
›

subsubsection "Empty Automaton"
definition "ta_empty ==  ta_initial = {}, ta_rules = {}"

theorem ta_empty_lang[simp]: "ta_lang ta_empty = {}"
  by (auto simp add: ta_empty_def ta_lang_def)

theorem ta_empty_ta[simp, intro!]: "tree_automaton ta_empty"
  apply (unfold_locales)
  apply (unfold ta_empty_def)
  apply auto
  done

theorem (in finite_alphabet) ta_empty_rta[simp, intro!]: 
  "ranked_tree_automaton ta_empty A"
  apply (unfold_locales)
  apply (unfold ta_empty_def)
  apply auto
  done

theorem (in finite_alphabet) ta_empty_dta[simp, intro!]: 
  "det_tree_automaton ta_empty A"
  apply (unfold_locales)
  apply (unfold ta_empty_def)
  apply (auto)
  done

subsubsection "Remapping of States"

fun remap_rule where "remap_rule f (q  l qs) = ((f q)  l (map f qs))"
definition 
  "ta_remap f TA ==  ta_initial = f ` ta_initial TA, 
                      ta_rules = remap_rule f ` ta_rules TA 
                    "

lemma δ_states_remap[simp]: "δ_states (remap_rule f ` δ) = f` δ_states δ"
  apply (auto simp add: δ_states_def)
  apply (case_tac a)
  apply force
  apply (case_tac xb)
  apply force
  done

lemma remap_accs1: "accs δ n q  accs (remap_rule f ` δ) n (f q)"
proof (induct rule: accs.induct[case_names step])
  case (step q l qs δ ts)
  from step.hyps(1) have 1: "((f q)  l (map f qs))  remap_rule f ` δ" 
    by (drule_tac f="remap_rule f" in imageI) simp
  show ?case proof (rule accs.intros[OF 1])
    fix i assume "i<length (map f qs)"
    with step.hyps(4) show "accs (remap_rule f ` δ) (ts ! i) (map f qs ! i)" 
      by auto
  qed (auto simp add: step.hyps(2))
qed

lemma remap_lang1: "tta_lang TA  tta_lang (ta_remap f TA)"
  by (unfold ta_lang_def ta_remap_def) (auto dest: remap_accs1)

lemma remap_accs2: " 
    accs δ' n q'; 
    δ'=(remap_rule f ` δ); 
    q'=f q; 
    inj_on f Q; 
    qQ; 
    δ_states δ  Q 
    accs δ n q"
proof (induct arbitrary: δ q rule: accs.induct[case_names step])
  case (step q' l qs δ' ts δ q)
  note [simp] = step.prems(1,2)
  from step.hyps(1)[simplified] step.prems(3,4,5) have 
    R: "(q  l (map (inv_on f Q) qs))δ"
    apply (erule_tac imageE)
    apply (case_tac x)
    apply (auto simp del:map_map)
    apply (subst inj_on_map_inv_f)
    apply (auto dest: δ_statesI) [2]
    apply (subgoal_tac "qδ_states δ")
    apply (unfold inj_on_def) [1]
    apply (metis δ_statesI(1) contra_subsetD)
    apply (fastforce intro: δ_statesI(1) dest: inj_onD)
    done
  show ?case proof (rule accs.intros[OF R])
    fix i 
    assume "i < length (map (inv_on f Q) qs)"
    hence L: "i<length qs" by simp

    from step.hyps(1)[simplified] step.prems(5) have 
      IR: "!!i. i<length qs  qs!i  f ` Q"
      apply auto
      apply (case_tac x)
      apply (auto)
      apply (rename_tac list)
      apply (subgoal_tac "list!i  δ_states δ")
      apply blast
      apply (auto dest!: δ_statesI(2))
      done

    show "accs δ (ts ! i) (map (inv_on f Q) qs ! i)"
      apply (rule step.hyps(4)[OF L, simplified])
      apply (simp_all add: f_inv_on_f[OF IR[OF L]] 
                      inv_on_f_range[OF IR[OF L]] 
                      L step.prems(3,5))
      done
  qed (auto simp add: step.hyps(2))
qed

lemma (in tree_automaton) remap_lang2: 
  assumes I: "inj_on f (ta_rstates TA)" 
  shows "tta_lang (ta_remap f TA)  tta_lang TA"
  apply (unfold ta_lang_def ta_remap_def) 
  apply auto
  apply (rule_tac x=x in bexI)
  apply (drule remap_accs2[OF _ _ _ I])
  apply (auto dest: is_subset)
  done

theorem (in tree_automaton) remap_lang: 
  "inj_on f (ta_rstates TA)  ta_lang (ta_remap f TA) = ta_lang TA"
  by (auto intro: remap_lang1 remap_lang2)

lemma (in tree_automaton) remap_ta[intro!, simp]: 
  "tree_automaton (ta_remap f TA)"
  using initial_subset states_subset finite_states finite_rules
  by (unfold_locales) (auto simp add: ta_remap_def ta_rstates_def)

lemma (in ranked_tree_automaton) remap_rta[intro!, simp]:
  "ranked_tree_automaton (ta_remap f TA) A"
proof -
  interpret ta: tree_automaton "(ta_remap f TA)" by simp
  show ?thesis
    apply (unfold_locales)
    apply (auto simp add: ta_remap_def)
    apply (case_tac x)
    apply (auto simp add: ta_remap_def intro: ranked)
    done
qed

lemma (in det_tree_automaton) remap_dta[intro, simp]:
  assumes INJ: "inj_on f Q"
  shows "det_tree_automaton (ta_remap f TA) A"
proof -
  interpret ta: ranked_tree_automaton "(ta_remap f TA)" A by simp
  show ?thesis 
  proof
    fix q q' l qs
    assume A: 
      "(q  l qs) ta_rules (ta_remap f TA)"
      "(q'  l qs) ta_rules (ta_remap f TA)"
    then obtain qo qo' qso qso' where RO:
      "(qo  l qso)  δ"
      "(qo'  l qso')  δ"
      and [simp]:
      "q=f qo"
      "q'=f qo'"
      "qs = map f qso"
      "map f qso = map f qso'"
      apply (auto simp add: ta_remap_def)
      apply (case_tac x, case_tac xa)
      apply auto
      done
    from RO have OQ: "qoQ" "qo'Q" "set qso  Q" "set qso'  Q"
      by (unfold ta_rstates_def)
         (auto dest: δ_statesI)
    
    from OQ(3,4) have INJQSO: "inj_on f (set qso  set qso')"
      by (auto intro: subset_inj_on[OF INJ])

    from inj_on_map_eq_map[OF INJQSO] have "qso=qso'" by simp
    with deterministic[OF RO(1)] RO(2) have "qo=qo'" by simp
    thus "q=q'" by simp
  qed
qed

  
lemma (in complete_tree_automaton) remap_cta[intro, simp]:
  assumes INJ: "inj_on f Q"
  shows "complete_tree_automaton (ta_remap f TA) A"
proof -
  interpret ta: det_tree_automaton "(ta_remap f TA)" A by (simp add: INJ)
  show ?thesis
  proof
    fix qs l
    assume A:
      "qs  lists (ta_rstates (ta_remap f TA))" 
      "A l = Some (length qs)"
    from A(1) have "qslists (f`Q)"
      by (auto simp add: ta_rstates_def ta_remap_def)
    then obtain qso where QSO:
      "qsolists Q"
      "qs = map f qso"
      by (blast elim!: lists_image_witness)
    hence [simp]: "length qso = length qs" by simp

    from complete[OF QSO(1)] A(2) obtain qo where "(qo  l qso)  δ"
      by auto
    
    with QSO(2) have "((f qo)  l qs)ta_rules (ta_remap f TA)" 
      by (force simp add: ta_remap_def)
    thus "q. q  l qs  ta_rules (ta_remap f TA)" ..
  qed
qed

subsubsection "Union"

definition "ta_union TA TA' == 
   ta_initial = ta_initial TA  ta_initial TA', 
    ta_rules = ta_rules TA  ta_rules TA' 
  "

― ‹Given two disjoint sets of states, where no rule contains states from
  both sets, then any accepted tree is also accepted when only using one of the 
  subsets of states and rules. 
  This lemma and its corollaries capture the basic idea of 
  the union-algorithm.›
lemma accs_exclusive_aux: 
  " accs δn n q; δn=δδ'; δ_states δ  δ_states δ' = {}; qδ_states δ  
    accs δ n q"
proof (induct arbitrary: δ δ' rule: accs.induct[case_names step])
  case (step q l qs δn ts δ δ')
  note [simp] = step.prems(1)
  note [simp] = step.hyps(2)[symmetric] step.hyps(3)
  from step.prems have "qδ_states δ'" by blast
  with step.hyps(1) have "set qs  δ_states δ" and R: "(q  l qs)δ" 
    by (auto dest: δ_statesI)
  hence "!!i. i<length qs  accs δ (ts ! i) (qs ! i)" 
    by (force intro: step.hyps(4)[OF _ step.prems(1,2)])
  with accs.intros[OF R step.hyps(2)] show ?case .
qed

corollary accs_exclusive1: 
  " accs (δδ') n q; δ_states δ  δ_states δ' = {}; qδ_states δ  
    accs δ n q"
using accs_exclusive_aux[of _ n q δ δ'] by blast

corollary accs_exclusive2: 
  " accs (δδ') n q; δ_states δ  δ_states δ' = {}; qδ_states δ'  
    accs δ' n q"
using accs_exclusive_aux[of _ n q δ' δ] by blast

lemma ta_union_correct_aux1: 
  fixes TA TA'
  assumes TA: "tree_automaton TA"
  assumes TA': "tree_automaton TA'"
  assumes DJ: "ta_rstates TA  ta_rstates TA' = {}" 
  shows "ta_lang (ta_union TA TA') = ta_lang TA  ta_lang TA'"
proof (safe)
  interpret ta: tree_automaton TA using TA .
  interpret ta': tree_automaton TA' using TA' .

  from DJ ta.states_subset ta'.states_subset have 
    DJ': "δ_states (ta_rules TA)  δ_states (ta_rules TA') = {}" 
    by blast

  fix n
  assume A: "n  ta_lang (ta_union TA TA')" "n  ta_lang TA'"
  from A(1) obtain q where 
    B: "qta_initial TA  ta_initial TA'" 
       "accs (ta_rules TA  ta_rules TA') n q"
    by (auto simp add: ta_lang_def ta_union_def)
  from δ_states_accsI[OF B(2), simplified] show "nta_lang TA" proof
    assume C: "qδ_states (ta_rules TA)"
    with accs_exclusive1[OF B(2) DJ'] have "accs (ta_rules TA) n q" .
    moreover from DJ C ta'.initial_subset ta.states_subset B(1) have 
      "qta_initial TA" 
      by auto
    ultimately show ?thesis by (unfold ta_lang_def) auto
  next
    assume C: "qδ_states (ta_rules TA')"
    with accs_exclusive2[OF B(2) DJ'] have "accs (ta_rules TA') n q" .
    moreover from DJ C ta.initial_subset B(1) ta'.states_subset have 
      "qta_initial TA'" 
      by auto
    ultimately have False using A(2) by (unfold ta_lang_def) auto
    thus ?thesis ..
  qed
qed (unfold ta_lang_def ta_union_def, auto intro: accs_mono)

lemma ta_union_correct_aux2: 
  fixes TA TA'
  assumes TA: "tree_automaton TA"
  assumes TA': "tree_automaton TA'"
  shows "tree_automaton (ta_union TA TA')"
proof -
  interpret ta: tree_automaton TA using TA .
  interpret ta': tree_automaton TA' using TA' .

  show ?thesis
    apply (unfold_locales)
    apply (unfold ta_union_def)
    apply auto
    done
qed

  ― ‹If the sets of states are disjoint, the language of the union-automaton
    is the union of the languages of the original automata.›
theorem ta_union_correct:
  fixes TA TA'
  assumes TA: "tree_automaton TA"
  assumes TA': "tree_automaton TA'"
  assumes DJ: "ta_rstates TA  ta_rstates TA' = {}" 
  shows "ta_lang (ta_union TA TA') = ta_lang TA  ta_lang TA'"
        "tree_automaton (ta_union TA TA')"
  using ta_union_correct_aux1[OF TA TA' DJ]
        ta_union_correct_aux2[OF TA TA']
  by auto

lemma ta_union_rta: 
  fixes TA TA'
  assumes TA: "ranked_tree_automaton TA A"
  assumes TA': "ranked_tree_automaton TA' A"
  shows "ranked_tree_automaton (ta_union TA TA') A"
proof -
  interpret ta: ranked_tree_automaton TA A using TA .
  interpret ta': ranked_tree_automaton TA' A using TA' .

  show ?thesis
    apply (unfold_locales)
    apply (unfold ta_union_def)
    apply (auto intro: ta.ranked ta'.ranked)
    done
qed

text "The union-algorithm may wrap the states of the first and second automaton 
      in order to make them disjoint"
datatype ('q1,'q2) ustate_wrapper = USW1 'q1 | USW2 'q2 

lemma usw_disjoint[simp]: 
  "USW1 ` X  USW2 ` Y = {}"
  "remap_rule USW1 ` X  remap_rule USW2 ` Y = {}"
  apply auto
  apply (case_tac xa, case_tac xb)
  apply auto
  done
  
lemma states_usw_disjoint[simp]: 
  "ta_rstates (ta_remap USW1 X)  ta_rstates (ta_remap USW2 Y) = {}"
  by (auto simp add: ta_remap_def ta_rstates_def)

lemma usw_inj_on[simp, intro!]:
  "inj_on USW1 X" 
  "inj_on USW2 X" 
  by (auto intro: inj_onI)

definition "ta_union_wrap TA TA' = 
  ta_union (ta_remap USW1 TA) (ta_remap USW2 TA')"

lemma ta_union_wrap_correct:
  fixes TA :: "('Q1,'L) tree_automaton_rec"
  fixes TA' :: "('Q2,'L) tree_automaton_rec"
  assumes TA: "tree_automaton TA"
  assumes TA': "tree_automaton TA'"
  shows "ta_lang (ta_union_wrap TA TA') = ta_lang TA  ta_lang TA'" (is ?T1)
        "tree_automaton (ta_union_wrap TA TA')" (is ?T2)
proof -
  interpret a1: tree_automaton TA by fact
  interpret a2: tree_automaton TA' by fact

  show ?T1 ?T2
    by (unfold ta_union_wrap_def)
       (simp_all add: ta_union_correct a1.remap_lang a2.remap_lang)
qed

lemma ta_union_wrap_rta:
  fixes TA TA'
  assumes TA: "ranked_tree_automaton TA A"
  assumes TA': "ranked_tree_automaton TA' A"
  shows "ranked_tree_automaton (ta_union_wrap TA TA') A"
proof -
  interpret ta: ranked_tree_automaton TA A using TA .
  interpret ta': ranked_tree_automaton TA' A using TA' .

  show ?thesis
    by (unfold ta_union_wrap_def)
       (simp add: ta_union_rta)

qed


subsubsection "Reduction"

definition "reduce_rules δ P == δ  { r. rule_states r  P }"

lemma reduce_rulesI: "rδ; rule_states r  P  rreduce_rules δ P"
  by (unfold reduce_rules_def) auto

lemma reduce_rulesD: 
  " rreduce_rules δ P   rδ"
  " rreduce_rules δ P; qrule_states r  qP"
  by (unfold reduce_rules_def) auto

lemma reduce_rules_subset: "reduce_rules δ P  δ"
  by (unfold reduce_rules_def) auto

lemma reduce_rules_mono: "P  P'  reduce_rules δ P  reduce_rules δ P'"
  by (unfold reduce_rules_def) auto

lemma δ_states_reduce_subset: 
  shows "δ_states (reduce_rules δ Q)  δ_states δ  Q"
  by (unfold δ_states_def reduce_rules_def)
    auto

lemmas δ_states_reduce_subsetI = rev_subsetD[OF _ δ_states_reduce_subset]

definition ta_reduce 
  :: "('Q,'L) tree_automaton_rec  ('Q set)  ('Q,'L) tree_automaton_rec"
  where "ta_reduce TA P ==
     ta_initial = ta_initial TA  P,
      ta_rules = reduce_rules (ta_rules TA) P "

― ‹Reducing a tree automaton preserves the tree automata invariants›
theorem ta_reduce_inv: assumes A: "tree_automaton TA" 
  shows "tree_automaton (ta_reduce TA P)"
proof -
  interpret tree_automaton TA using A .
  show ?thesis proof
    show "finite (ta_rules (ta_reduce TA P))" 
         "finite (ta_initial (ta_reduce TA P))"
      using finite_states finite_rules finite_subset[OF reduce_rules_subset]
      by (unfold ta_reduce_def) (auto simp add: Let_def)
  qed
qed
 
lemma reduce_δ_states_rules[simp]: 
  "(ta_rules (ta_reduce TA (δ_states (ta_rules TA)))) = ta_rules TA"
  by (auto simp add: ta_reduce_def δ_states_def reduce_rules_def)

― ‹Reducing a tree automaton to the states that occur in its rules does 
      not change its language›
lemma ta_reduce_δ_states: 
  "ta_lang (ta_reduce TA (δ_states (ta_rules TA))) = ta_lang TA"
  apply (auto simp add: ta_lang_def)
  apply (frule δ_states_accsI)
  apply (auto simp add: ta_reduce_def δ_states_def reduce_rules_def) [1]
  apply (frule δ_states_accsI)
  apply (auto simp add: ta_reduce_def δ_states_def reduce_rules_def) [1]
done

text_raw ‹\paragraph{Forward Reduction}›
text ‹
  We characterize the set of forward accessible states by the reflexive,
  transitive closure of a forward-successor (f_succ ⊆ Q×Q›) relation 
  applied to the initial states.
  
  The forward-successors of a state $q$ are those states $q'$ such that there is
  a rule $q \leftarrow f(\ldots q' \ldots)$.
›

  ― ‹Forward successors›
inductive_set f_succ for δ where
  "(q  l qs)δ; q'set qs  (q,q')  f_succ δ"

  ― ‹Alternative characterization of forward successors›
lemma f_succ_alt: "f_succ δ = {(q,q'). l qs. (q  l qs)δ  q'set qs}"
  by (auto intro: f_succ.intros elim!: f_succ.cases)

  ― ‹Forward accessible states›
definition "f_accessible δ Q0 == ((f_succ δ)*) `` Q0"

  ― ‹Alternative characterization of forward accessible states.
      The initial states are forward accessible, and if there is a rule
      whose lhs-state is forward-accessible, all rhs-states of that rule
      are forward-accessible, too.›
inductive_set f_accessible_alt :: "('Q,'L) ta_rule set  'Q set  'Q set"
for δ Q0
where
  fa_refl: "q0Q0  q0  f_accessible_alt δ Q0" |
  fa_step: " qf_accessible_alt δ Q0; (q  l qs)δ; q'set qs  
             q'  f_accessible_alt δ Q0"

lemma f_accessible_alt: "f_accessible δ Q0 = f_accessible_alt δ Q0"
  apply (unfold f_accessible_def f_succ_alt)
  apply auto
proof goal_cases
  case 1 thus ?case
    apply (induct rule: rtrancl_induct)
    apply (auto intro: f_accessible_alt.intros)
    done
next
  case 2 thus ?case
    apply (induct rule: f_accessible_alt.induct)
    apply (auto simp add: Image_def intro: rtrancl.intros)
    done
qed

lemmas f_accessibleI = f_accessible_alt.intros[folded f_accessible_alt]
lemmas f_accessibleE = f_accessible_alt.cases[folded f_accessible_alt]

lemma f_succ_finite[simp, intro]: "finite δ  finite (f_succ δ)"
  apply (unfold f_succ_alt)
  apply (rule_tac B="δ_states δ × δ_states δ" in finite_subset)
  apply (auto dest: δ_statesI simp add: δ_states_finite)
  done

lemma f_accessible_mono: "QQ'  xf_accessible δ Q  xf_accessible δ Q'"
  by (auto simp add: f_accessible_def)
  
lemma f_accessible_prepend: 
  " (q  l qs)  δ; q'set qs; xf_accessible δ {q'}  
     xf_accessible δ {q}"
  by (auto dest: f_succ.intros simp add: f_accessible_def)

lemma f_accessible_subset: "qf_accessible δ Q  qQ  δ_states δ"
  apply (unfold f_accessible_alt)
  apply (induct rule: f_accessible_alt.induct)
  apply (force simp add: δ_states_def split: ta_rule.split_asm)+
  done

lemma (in tree_automaton) f_accessible_in_states: 
  "qf_accessible (ta_rules TA) (ta_initial TA)  qta_rstates TA"
  using initial_subset states_subset
  by (drule_tac f_accessible_subset) (auto)

lemma f_accessible_refl_inter_simp[simp]: "Q  f_accessible r Q = Q"
  by (unfold f_accessible_alt) (auto intro: fa_refl)

  ― ‹A tree remains accepted by a state @{text q} if the rules are reduced to 
        the states that are forward-accessible from @{text q}
lemma accs_reduce_f_acc: 
  "accs δ t q  accs (reduce_rules δ (f_accessible δ {q})) t q"
proof (induct rule: accs.induct[case_names step])
  case (step q l qs δ n) 
  show ?case proof (rule accs.intros[of q l qs])
    show "(q  l qs)  reduce_rules δ (f_accessible δ {q})"
      using step(1)
      by (fastforce 
        intro!: reduce_rulesI 
        intro: f_succ.intros 
        simp add: f_accessible_def)
  next
    fix i 
    assume A: "i<length qs"

    have B: "f_accessible δ {q}  f_accessible δ {qs!i}" using step.hyps(1)
      by (force 
        simp add: A f_accessible_def 
        intro: converse_rtrancl_into_rtrancl f_succ.intros[where q'="qs!i"])
    show "accs (reduce_rules δ (f_accessible δ {q})) (n ! i) (qs ! i)"
      using accs_mono[OF step.hyps(4)[OF A] reduce_rules_mono[OF B]] .
  qed (simp_all add: step.hyps(2,3))
qed

  ― ‹Short-hand notation for forward-reducing a tree-automaton›
abbreviation "ta_fwd_reduce TA == 
  (ta_reduce TA (f_accessible (ta_rules TA) (ta_initial TA)))"

― ‹Forward-reducing a tree automaton does not change its language›
theorem ta_reduce_f_acc[simp]: "ta_lang (ta_fwd_reduce TA) = ta_lang TA"
  apply (rule sym)
  apply (unfold ta_reduce_def ta_lang_def)
  apply (auto simp add: Let_def)
  apply (rule_tac x=q in bexI)
  apply (drule accs_reduce_f_acc)
  apply (rule_tac 
    P1="(f_accessible (ta_rules TA) {q})" 
    in accs_mono[OF _ reduce_rules_mono])
  apply (auto simp add: f_accessible_def)
  apply (rule_tac x=q in bexI)
  apply (blast intro: accs_mono[OF _ reduce_rules_subset])
  .

text_raw ‹\paragraph{Backward Reduction}›

text ‹
  A state is backward accessible, iff at least one tree is accepted in it.

  Inductively, backward accessible states can be characterized as follows:
  A state is backward accessible, if it occurs on the left hand side of a 
  rule, and all states on this rule's right hand side are backward accessible.
›
inductive_set b_accessible :: "('Q,'L) ta_rule set  'Q set" 
  for δ
  where
  " (q  l qs)δ; !!x. xset qs  xb_accessible δ   qb_accessible δ"

lemma b_accessibleI: 
  "(q  l qs)δ; set qs  b_accessible δ  qb_accessible δ"
  by (auto intro: b_accessible.intros)

― ‹States that accept a tree are backward accessible›
lemma accs_is_b_accessible: "accs δ t q  qb_accessible δ"
  apply (induct rule: accs.induct)
  apply (rule b_accessible.intros)
  apply assumption
  apply (fastforce simp add: in_set_conv_nth)
  done

lemma b_acc_subset_δ_statesI: "xb_accessible δ  xδ_states δ"
  apply (erule b_accessible.cases)
  apply (auto intro: δ_statesI)
  done

lemma b_acc_subset_δ_states: "b_accessible δ  δ_states δ"
  by (auto simp add: b_acc_subset_δ_statesI)

lemma b_acc_finite[simp, intro!]: "finite δ  finite (b_accessible δ)"
  apply (rule finite_subset[OF b_acc_subset_δ_states])
  apply auto
  done

  ― ‹Backward accessible states accept at least one tree›
lemma b_accessible_is_accs: 
  " qb_accessible (ta_rules TA); 
     !!t. accs (ta_rules TA) t q  P
     P"
proof (induct arbitrary: P rule: b_accessible.induct[case_names IH])
  case (IH q l qs) 

  obtain ts where 
    A: "i<length qs. accs (ta_rules TA) (ts!i) (qs!i)" 
       "length ts = length qs"
  proof -
    from IH(3) have "xset qs. t. accs (ta_rules TA) t x" by auto
    hence "ts. (i<length qs. accs (ta_rules TA) (ts!i) (qs!i)) 
                 length ts = length qs"
    proof (induct qs)
      case Nil thus ?case by simp
    next
      case (Cons q qs) then obtain ts where 
        IHAPP: "i<length qs. accs (ta_rules TA) (ts ! i) (qs ! i)" and 
        L: "length ts = length qs" 
        by auto
      moreover from Cons obtain t where "accs (ta_rules TA) t q" by auto
      ultimately have 
        "i<length (q#qs). accs (ta_rules TA) ((t#ts) ! i) ((q#qs) ! i)"
        apply auto
        apply (case_tac i)
        apply auto
        done
      thus ?case using L by auto
    qed
    thus thesis by (blast intro: that)
  qed

  from A show ?case 
    apply (rule_tac IH(4)[OF accs.intros[OF IH(1)]])
    apply auto
    done
qed

  ― ‹All trees remain accepted when reducing the rules to 
      backward-accessible states›
lemma accs_reduce_b_acc: 
  "accs δ t q  accs (reduce_rules δ (b_accessible δ)) t q"
  apply (induct rule: accs.induct)
  apply (rule accs.intros)
  apply (rule reduce_rulesI)
  apply assumption
  apply (auto)
  apply (rule_tac t="NODE f ts" in accs_is_b_accessible)
  apply (rule_tac accs.intros)
  apply auto
  apply (simp only: in_set_conv_nth)
  apply (erule_tac exE)
  apply (rule_tac t="ts ! i" in accs_is_b_accessible)
  apply auto
  done

  ― ‹Shorthand notation for backward-reduction of a tree automaton›
abbreviation "ta_bwd_reduce TA == (ta_reduce TA (b_accessible (ta_rules TA)))"

― ‹Backwards-reducing a tree automaton does not change its language›
theorem ta_reduce_b_acc[simp]: "ta_lang (ta_bwd_reduce TA) = ta_lang TA"
  apply (rule sym)
  apply (unfold ta_reduce_def ta_lang_def)
  apply (auto simp add: Let_def)
  apply (rule_tac x=q in bexI)
  apply (blast intro: accs_reduce_b_acc)
  apply (blast dest: accs_is_b_accessible)
  apply (rule_tac x=q in bexI)
  apply (blast intro: accs_mono[OF _ reduce_rules_subset])
  .

  ― ‹Emptiness check by backward reduction. The language of a tree automaton 
    is empty, if and only if no initial state is backwards-accessible.›
theorem empty_if_no_b_accessible: 
  "ta_lang TA = {}  ta_initial TA  b_accessible (ta_rules TA) = {}"
  by (auto 
    simp add: ta_lang_def 
    intro: accs_is_b_accessible b_accessible_is_accs)

subsubsection "Product Automaton"
text ‹
  The product automaton of two tree automata accepts the intersection 
  of the languages of the two automata.
›

  ― ‹Product rule›
fun r_prod where
  "r_prod (q1  l1 qs1) (q2  l2 qs2) = ((q1,q2)  l1 (zip qs1 qs2))"

  ― ‹Product rules›
definition "δ_prod δ1 δ2 == {
  r_prod (q1  l qs1) (q2  l qs2) | q1 q2 l qs1 qs2.
    length qs1 = length qs2  
    (q1  l qs1)δ1  
    (q2  l qs2)δ2
}"

lemma δ_prodI: " 
    length qs1 = length qs2;
    (q1  l qs1)δ1;
    (q2  l qs2)δ2   ((q1,q2)  l (zip qs1 qs2))  δ_prod δ1 δ2"
  by (auto simp add: δ_prod_def)

lemma δ_prodE: 
  " 
    rδ_prod δ1 δ2; 
    !!q1 q2 l qs1 qs2.  length qs1 = length qs2;
                         (q1  l qs1)δ1;
                         (q2  l qs2)δ2;
                         r = ((q1,q2)  l (zip qs1 qs2)) 
                         P 
     P"
  by (auto simp add: δ_prod_def)

  ― ‹With the product rules, only trees can be constructed that can also be 
      constructed with the two original sets of rules›
lemma δ_prod_sound: 
  assumes A: "accs (δ_prod δ1 δ2) t (q1,q2)" 
  shows "accs δ1 t q1" "accs δ2 t q2"
proof -
  {
    fix δ q
    assume "accs δ t q" "δ = (δ_prod δ1 δ2)" "q=(q1,q2)"
    hence "accs δ1 t q1  accs δ2 t q2"
      by (induct arbitrary: δ1 δ2 q1 q2 rule: accs.induct)
         (auto intro: accs.intros simp add: δ_prod_def)
  } with A show "accs δ1 t q1" "accs δ2 t q2" by auto
qed

  ― ‹Any tree that can be constructed with both original sets of rules can also
      be constructed with the product rules›
lemma δ_prod_precise: 
  " accs δ1 t q1; accs δ2 t q2   accs (δ_prod δ1 δ2) t (q1,q2)"
proof (induct arbitrary: δ2 q2 rule: accs.induct[case_names step])
  case (step q1 l qs1 δ1 ts δ2 q2)
  note [simp] = step.hyps(2,3)
  from step.hyps(2) obtain qs2 where 
    I2: "(q2  l qs2)δ2" 
        "!!i. i<length qs2  accs δ2 (ts ! i) (qs2 ! i)" and 
    [simp]: "length qs2 = length ts"
    by (rule_tac accs.cases[OF step.prems]) fastforce
  show ?case 
  proof (rule accs.intros)
    from step.hyps(1) I2(1) show 
      "((q1,q2)  l (zip qs1 qs2))δ_prod δ1 δ2" and 
      [simp]: "length ts = length (zip qs1 qs2)"
      by (unfold δ_prod_def) force+
  next
    fix i
    assume L: "i<length (zip qs1 qs2)"
    with step.hyps(4)[OF _ I2(2), of i] have 
      "accs (δ_prod δ1 δ2) (ts ! i) (qs1 ! i, qs2 ! i)" 
      by simp
    also have "(qs1 ! i, qs2 ! i) = zip qs1 qs2 ! i" using L by auto
    finally show "accs (δ_prod δ1 δ2) (ts ! i) (zip qs1 qs2 ! i)" .
  qed
qed

lemma δ_prod_empty[simp]: 
  "δ_prod {} δ = {}"
  "δ_prod δ {} = {}"
  by (unfold δ_prod_def) auto

lemma δ_prod_2sng[simp]: 
  " rhsl r1  rhsl r2   δ_prod {r1} {r2} = {}"
  " length (rhsq r1)  length (rhsq r2)   δ_prod {r1} {r2} = {}"
  " rhsl r1 = rhsl r2; length (rhsq r1) = length (rhsq r2)  
     δ_prod {r1} {r2} = {r_prod r1 r2}"
  apply (unfold δ_prod_def)
  apply (cases r1, cases r2, auto)+
  done

lemma δ_prod_Un[simp]: 
  "δ_prod (δ1δ1') δ2 = δ_prod δ1 δ2  δ_prod δ1' δ2"
  "δ_prod δ1 (δ2δ2') = δ_prod δ1 δ2  δ_prod δ1 δ2'"
  by (auto elim: δ_prodE intro: δ_prodI)

text ‹The next two definitions are solely for technical reasons.
  They are required to allow simplification of expressions of the form
  @{term "δ_prod (insert r δ1) δ2"} or @{term "δ_prod δ1 (insert r δ2)"}, 
  without making the simplifier loop.
›
definition "δ_prod_sng1 r δ2 == 
  case r of (q1  l qs1)  
    { r_prod r (q2  l qs2) | 
         q2 qs2. length qs1 = length qs2  (q2  l qs2)δ2 
    }"
definition "δ_prod_sng2 δ1 r == 
  case r of (q2  l qs2)  
    { r_prod (q1  l qs1) r | 
         q1 qs1. length qs1 = length qs2  (q1  l qs1)δ1 
    }"

lemma δ_prod_sng_alt:
  "δ_prod_sng1 r δ2 = δ_prod {r} δ2"
  "δ_prod_sng2 δ1 r = δ_prod δ1 {r}"
  apply (unfold δ_prod_def δ_prod_sng1_def δ_prod_sng2_def)
  apply (auto split: ta_rule.split)
  done
  
lemmas δ_prod_insert = 
  δ_prod_Un(1)[where ?δ1.0="{x}", simplified, folded δ_prod_sng_alt]
  δ_prod_Un(2)[where ?δ2.0="{x}", simplified, folded δ_prod_sng_alt]
  for x

  ― ‹Product automaton›
definition "ta_prod TA1 TA2 == 
   ta_initial = ta_initial TA1 × ta_initial TA2, 
    ta_rules = δ_prod (ta_rules TA1) (ta_rules TA2) 
  "
   
lemma ta_prod_correct_aux1: 
  "ta_lang (ta_prod TA1 TA2) = ta_lang TA1  ta_lang TA2"
  by (unfold ta_lang_def ta_prod_def) (auto dest: δ_prod_sound δ_prod_precise)

lemma δ_states_cart: 
  "q  δ_states (δ_prod δ1 δ2)  q  δ_states δ1 × δ_states δ2"
  by (unfold δ_states_def δ_prod_def) 
     (force split: ta_rule.split simp add: set_zip)

lemma δ_prod_finite [simp, intro]: 
  "finite δ1  finite δ2  finite (δ_prod δ1 δ2)"
proof -
  have 
    "δ_prod δ1 δ2 
     (λ(r1,r2). case r1 of (q1  l1 qs1)  
                  case r2 of (q2  l2 qs2)  
                    ((q1,q2)  l1 (zip qs1 qs2))) 
       ` (δ1 × δ2)"
    by (unfold δ_prod_def) force
  moreover assume "finite δ1" "finite δ2"
  ultimately show ?thesis 
    by (metis finite_imageI finite_cartesian_product finite_subset)
qed

lemma ta_prod_correct_aux2: 
  assumes TA: "tree_automaton TA1" "tree_automaton TA2" 
  shows "tree_automaton (ta_prod TA1 TA2)"
proof -
  interpret ta1: tree_automaton TA1 using TA by blast
  interpret ta2: tree_automaton TA2 using TA by blast
  show ?thesis 
    apply unfold_locales
    apply (unfold ta_prod_def)
    apply (auto 
      intro: ta1.is_subset ta2.is_subset δ_prod_finite 
      dest: δ_states_cart 
      simp add: ta1.finite_states ta2.finite_states 
                ta1.finite_rules ta2.finite_rules)
    done
qed

  ― ‹The language of the product automaton is the intersection of the languages
      of the two original automata›
theorem ta_prod_correct:
  assumes TA: "tree_automaton TA1" "tree_automaton TA2" 
  shows 
    "ta_lang (ta_prod TA1 TA2) = ta_lang TA1  ta_lang TA2"
    "tree_automaton (ta_prod TA1 TA2)"
  using ta_prod_correct_aux1 
        ta_prod_correct_aux2[OF TA] 
  by auto


lemma ta_prod_rta: 
  assumes TA: "ranked_tree_automaton TA1 A" "ranked_tree_automaton TA2 A" 
  shows "ranked_tree_automaton (ta_prod TA1 TA2) A"
proof -
  interpret ta1: ranked_tree_automaton TA1 A using TA by blast
  interpret ta2: ranked_tree_automaton TA2 A using TA by blast

  interpret tap: tree_automaton "(ta_prod TA1 TA2)"
    apply (rule ta_prod_correct_aux2)
    by unfold_locales

  show ?thesis 
    apply unfold_locales
    apply (unfold ta_prod_def δ_prod_def)
    apply (auto intro: ta1.ranked ta2.ranked)
    done
qed

subsubsection "Determinization"
text ‹
  We only formalize the brute-force subset construction without reduction. 

  The basic idea of this construction is to construct an automaton where the
  states are sets of original states, and the lhs of a rule consists of all
  states that a term with given rhs and function symbol may be labeled by.
›

context ranked_tree_automaton
begin
  ― ‹Left-hand side of subset rule for given symbol and rhs›
  definition "δss_lhs f ss == 
    { q | q qs. (q  f qs)δ  list_all_zip (∈) qs ss }"

  ― ‹Subset construction›
  inductive_set δss :: "('Q set,'L) ta_rule set" where
    " A f = Some (length ss); 
       ss  lists {s. s  ta_rstates TA}; 
       s = δss_lhs f ss
       (s  f ss)  δss"

  lemma δssI: 
    assumes A: "A f = Some (length ss)"
               "ss  lists {s. s  ta_rstates TA}"
    shows 
      "( (δss_lhs f ss)  f ss)  δss"
    using δss.intros[where s="(δss_lhs f ss)"] A
    by auto

  lemma δss_subset[simp, intro!]: "δss_lhs f ss  Q"
    by (unfold ta_rstates_def δss_lhs_def) (auto intro: δ_statesI)

  lemma δss_finite[simp, intro!]: "finite δss"
  proof -
    have "δss  ((λf. (λ(s,ss). (s  f ss))
                     `({s. sQ} 
                       × (lists {s. sQ}  {l. length l = the (A f)}))
                  ) ` F)" 
      (is "_((λf. ?tr f ` ?prod f)`F)")
    proof (intro equalityI subsetI)
      fix r
      assume "rδss"
      then obtain f s ss where 
        U: "r=(s  f ss)" 
           "A f = Some (length ss)" 
           "sslists {s. sQ}" 
           "s=δss_lhs f ss"
        by (force elim!: δss.cases)
      from U(4) have "sQ" by simp 
      moreover from U(2) have "length ss = the (A f)" by simp
      ultimately have "(s,ss)?prod f" using U(3) by auto
      hence "(s  f ss)?tr f ` ?prod f" by auto
      moreover from U(2) have "fF" by auto
      ultimately show "r((λf. ?tr f ` ?prod f)`F)" using U(1) by auto
    qed
    moreover have "finite "
      by (auto intro!: finite_imageI finite_SigmaI lists_of_len_fin)
    ultimately show ?thesis by (blast intro: finite_subset)
  qed

  lemma δss_det: " (q  f qs)  δss; (q'  f qs) δss   q=q'"
    by (auto elim!: δss.cases)

  lemma δss_accs_sound: 
    assumes A: "accs δ t q"  
    obtains s where
    "sQ"
    "qs"
    "accs δss t s"
  proof -
    have "sQ. qs  accs_laz δss t s" using A[unfolded accs_laz]
    proof (induct δδ t q rule: accs_laz.induct[case_names step])
      case (step q f qs ts)
      hence I:
        "(q  f qs)δ"
        "list_all_zip (accs_laz δ) ts qs"
        "list_all_zip (λt q. s. sQ  qs  accs_laz δss t s) ts qs"
        by simp_all
      from I(3) obtain ss where SS: 
        "ss  lists {s. sQ}"
        "list_all_zip (∈) qs ss"
        "list_all_zip (accs_laz δss) ts ss"
        by (erule_tac laz_swap_ex) auto
      from I(2) SS(2) have 
        LEN[simp]: "length qs = length ts" "length ss = length ts"
        by (auto simp add: list_all_zip_alt)
      from ranked[OF I(1)] have AF: "A f = Some (length ts)" by simp

      from δssI[of f ss, OF _ SS(1)] AF have 
        RULE_S: "((δss_lhs f ss)  f ss)  δss"
        by simp
      
      from accs_laz.intros[OF RULE_S SS(3)] have 
        G1: "accs_laz δss (NODE f ts) (δss_lhs f ss)" .
      from I(1) SS(2) have "q(δss_lhs f ss)" by (auto simp add: δss_lhs_def)
      thus ?case using G1 by auto
    qed
    thus ?thesis 
      apply (elim exE conjE)
      apply (rule_tac that)
      apply assumption
      apply (auto simp add: accs_laz)
      done
  qed


  lemma δss_accs_precise:
    assumes A: "accs δss t s" "qs"  
    shows "accs δ t q"
    using A
    unfolding accs_laz
  proof (induct δδss t s 
                arbitrary: q 
                rule: accs_laz.induct[case_names step])
    case (step s f ss ts)
    hence I:
      "(s  f ss)  δss"
      "list_all_zip (accs_laz δss) ts ss"
      "list_all_zip (λt s. qs. accs_laz δ t q) ts ss"
      "qs"
      by (auto simp add: Ball_def)
      
    from I(2) have [simp]: "length ss = length ts" 
      by (simp add: list_all_zip_alt)

    from I(1) have SS: 
      "A f = Some (length ts)"
      "ss  lists {s. sQ}"
      "s=δss_lhs f ss"
      by (force elim!: δss.cases)+
      
    from I(4) SS(3) obtain qs where
      RULE: "(q  f qs)  δ" and
      QSISS: "list_all_zip (∈) qs ss"
      by (auto simp add: δss_lhs_def)
    from I(3) QSISS have CA: "list_all_zip (accs_laz δ) ts qs"
      by (auto simp add: list_all_zip_alt)
    from accs_laz.intros[OF RULE CA] show ?case .
  qed
    

  ― ‹Determinization›
  definition "detTA ==  ta_initial = { s. sQ  sQi  {} }, 
                         ta_rules = δss "
      
  theorem detTA_is_ta[simp, intro]:
    "det_tree_automaton detTA A"
    apply (unfold_locales)
    apply (auto simp add: detTA_def elim: δss.cases)
    done
    

  theorem detTA_lang[simp]:
    "ta_lang (detTA) = ta_lang TA"
    apply (unfold ta_lang_def detTA_def)
    apply safe
    apply simp_all
  proof -
    fix t s
    assume A: 
      "sQ  sQi  {}"
      "accs δss t s"
    from A(1) obtain qi where QI: "qis" "qiQi" by auto

    from δss_accs_precise[OF A(2) QI(1)] have "accs δ t qi" .
    with QI(2) show "qiQi. accs δ t qi" by blast
  next
    fix t qi
    assume A: 
      "qiQi" 
      "accs δ t qi"
    from δss_accs_sound[OF A(2)] obtain s where SS: 
      "sQ" 
      "qis" 
      "accs δss t s" .
    with A(1) show "sQ. s  Qi  {}  accs δss t s" by blast
  qed
    
  lemmas detTA_correct = detTA_is_ta detTA_lang
end

subsubsection "Completion"
text ‹
  To each deterministic tree automaton, rules and states can be added to make
  it complete, without changing its language.
›

context det_tree_automaton
begin
  ― ‹States of the complete automaton›
  definition "Qcomplete == insert None (Some`Q)"

  lemma Qcomplete_finite[simp, intro!]: "finite Qcomplete"
    by (auto simp add: Qcomplete_def)

  ― ‹Rules of the complete automaton›
  definition δcomplete :: "('Q option, 'L) ta_rule set" where
    "δcomplete == (remap_rule Some ` δ) 
                   { (None  f qs) | f qs. 
                         A f = Some (length qs) 
                          qslists Qcomplete 
                          ¬(qo qso. (qo  f qso)δ  qs=map Some qso ) }"


  lemma δ_states_complete: "qδ_states δcomplete  qQcomplete"
    apply (erule δ_statesE)
    apply (unfold δcomplete_def Qcomplete_def)
    apply auto
    apply (case_tac x)
    apply (auto simp add: ta_rstates_def intro: δ_statesI) [1]
    apply (case_tac x)
    apply (auto simp add: ta_rstates_def dest: δ_statesI)
    done
    

  definition 
    "completeTA ==  ta_initial = Some`Qi, ta_rules = δcomplete "

  lemma δcomplete_finite[simp, intro]: "finite δcomplete"
  proof -
    have "δcomplete  legal_rules Qcomplete"
      apply (rule)
      apply (rule legal_rulesI)
      apply assumption
      apply (case_tac x)
      apply (unfold δcomplete_def Qcomplete_def ta_rstates_def) [1]
      apply auto
      apply (case_tac xa)
      apply (auto dest: δ_statesI)
      apply (case_tac xa)
      apply (auto dest: δ_statesI)
      apply (unfold δcomplete_def Qcomplete_def ta_rstates_def) [1]
      apply (auto)
      apply (case_tac xa)
      apply (auto intro: ranked)
      done
    thus ?thesis by (auto intro: finite_subset)
  qed

  theorem completeTA_is_ta: "complete_tree_automaton completeTA A"
  proof (standard, goal_cases)
    case 1 thus ?case by (simp add: completeTA_def)
  next
    case 2 thus ?case by (simp add: completeTA_def)
  next
    case 3 thus ?case
      apply (auto simp add: completeTA_def δcomplete_def)
      apply (case_tac x)
      apply (auto intro: ranked)
      done
  next
    case 4 thus ?case
      apply (auto simp add: completeTA_def δcomplete_def)
      apply (case_tac x, case_tac xa)
      apply (auto intro: deterministic) [1]
      apply (case_tac x)
      apply auto [1]
      apply (case_tac x)
      apply auto [1]
      done
  next
    case prems: (5 qs f)
    {
      fix qo qso
      assume R: "(qo  f qso)δ" and [simp]: "qs=map Some qso"
      hence "((Some qo)  f qs)  remap_rule Some ` δ" by force
      hence ?case by (simp add: completeTA_def δcomplete_def) blast
    } moreover {
      assume A: "¬(qo qso. (qo  f qso)δ  qs=map Some qso)"

      have "(Some ` Qi  δ_states δcomplete)  Qcomplete"
        apply (auto intro: δ_states_complete)
        apply (simp add: Qcomplete_def ta_rstates_def)
        done

      with prems have B: "qslists Qcomplete"
        by (auto simp add: completeTA_def ta_rstates_def)

      from A B prems(2) have ?case
        apply (rule_tac x=None in exI)
        apply (simp add: completeTA_def δcomplete_def)
        done
    } ultimately show ?case by blast
  qed

  theorem completeTA_lang: "ta_lang completeTA = ta_lang TA"
  proof (intro equalityI subsetI)
    ― ‹This direction is done by a monotonicity argument›
    fix t
    assume "tta_lang TA"
    then obtain qi where "qiQi" "accs δ t qi" by (auto simp add: ta_lang_def)
    hence 
      QI: "Some qi  Some`Qi" and
      ACCS: "accs (remap_rule Some`δ) t (Some qi)"
      by (auto intro: remap_accs1)
    have "(remap_rule Some`δ)  δcomplete" by (unfold δcomplete_def) auto
    with ACCS have "accs δcomplete t (Some qi)" by (blast dest: accs_mono)
    thus "tta_lang completeTA" using QI 
      by (auto simp add: ta_lang_def completeTA_def)
  next
    fix t
    assume A: "tta_lang completeTA"
    then obtain qi where 
      QI: "qiQi" and
      ACCS: "accs δcomplete t (Some qi)" 
      by (auto simp add: ta_lang_def completeTA_def)
    moreover
    {
      fix qi
      have " accs δcomplete t (Some qi)   accs δ t qi"
        unfolding accs_laz
      proof (induct δδcomplete t q"Some qi"
                    arbitrary: qi
                    rule: accs_laz.induct[case_names step])
        case (step f qs ts qi)
        hence I:
          "((Some qi)  f qs)  δcomplete"
          "list_all_zip (accs_laz δcomplete) ts qs"
          "list_all_zip (λt q. (qo. q=Some qo  accs_laz δ t qo)) ts qs"
          by auto
        from I(1) have "((Some qi)  f qs)  remap_rule Some`δ"
          by (unfold δcomplete_def) auto
        then obtain qso where 
          RULE: "(qi  f qso)δ" and
          QSF: "qs=map Some qso"
          apply (auto)
          apply (case_tac x)
          apply auto
          done
        from I(3) QSF have ACCS: "list_all_zip (accs_laz δ) ts qso"
          by (auto simp add: list_all_zip_alt)
        from accs_laz.intros[OF RULE ACCS] show ?case .
      qed
    }
    ultimately have "accs δ t qi" by blast
    thus "tta_lang TA" using QI by (auto simp add: ta_lang_def)
  qed
        
  lemmas completeTA_correct = completeTA_is_ta completeTA_lang
end

subsubsection "Complement"
text ‹
  A deterministic, complete tree automaton can be transformed into an automaton
  accepting the complement language by complementing its initial states.
›

context complete_tree_automaton
begin

    ― ‹Complement automaton, i.e. that accepts exactly the 
        trees not accepted by this automaton›
  definition "complementTA == 
    ta_initial = Q - Qi,
    ta_rules = δ "

    
  lemma cta_rules[simp]: "ta_rules complementTA = δ" 
    by (auto simp add: complementTA_def)

  theorem complementTA_correct:
    "ta_lang complementTA = ranked_trees A - ta_lang TA" (is ?T1)
    "complete_tree_automaton complementTA A" (is ?T2)
  proof -
    show ?T1
      apply (unfold ta_lang_def complementTA_def)
      apply (force intro: accs_is_ranked dest: accs_unique label_all)
      done

    have QSS: "!!q. qta_rstates complementTA  qQ"
      by (auto simp add: complementTA_def ta_rstates_def)

    show ?T2
      apply (unfold_locales)
      apply (unfold complementTA_def)[4]
      apply (auto simp add: deterministic ranked 
                  intro: complete QSS)
      done
  qed

end


subsection "Regular Tree Languages"
subsubsection "Definitions"

  ― ‹Regular languages over alphabet @{text A}›
definition regular_languages :: "('L  nat)  'L tree set set" 
  where "regular_languages A == 
    { ta_lang TA | (TA::(nat,'L) tree_automaton_rec). 
                          ranked_tree_automaton TA A }"


lemma rtlE:
  fixes L :: "'L tree set"
  assumes A: "Lregular_languages A"
  obtains TA::"(nat,'L) tree_automaton_rec" where 
    "L=ta_lang TA"
    "ranked_tree_automaton TA A"
  using A that
  by (unfold regular_languages_def) blast

context ranked_tree_automaton
begin

  lemma (in ranked_tree_automaton) rtlI[simp]:
    shows "ta_lang TA  regular_languages A"
  proof -
    ― ‹Obtain injective mapping from the finite set of states to the 
        natural numbers›
    from finite_imp_inj_to_nat_seg[OF finite_states] obtain f :: "'Q  nat" 
      where INJMAP: "inj_on f (ta_rstates TA)" by blast
    ― ‹Remap automaton. The language remains the same.›
    from remap_lang[OF INJMAP] have LE: "ta_lang (ta_remap f TA) = ta_lang TA" .
    moreover have "ranked_tree_automaton (ta_remap f TA) A" ..
    ultimately show ?thesis by (auto simp add: regular_languages_def)
  qed

  text ‹
    It is sometimes more handy to obtain a complete, deterministic tree automaton
    accepting a given regular language.
›
  theorem obtain_complete:
    obtains TAC::"('Q set option,'L) tree_automaton_rec" where
    "ta_lang TAC = ta_lang TA"
    "complete_tree_automaton TAC A"
  proof -
    from detTA_correct have 
      DT: "det_tree_automaton detTA A" and
      [simp]: "ta_lang detTA = ta_lang TA"
      by simp_all
    
    interpret dt: det_tree_automaton detTA A using DT .
    
    from dt.completeTA_correct have G: 
      "ta_lang (det_tree_automaton.completeTA detTA A) = ta_lang TA"
      "complete_tree_automaton (det_tree_automaton.completeTA detTA A) A"
      by simp_all
    thus ?thesis by (blast intro: that)
  qed

end


lemma rtlE_complete:
  fixes L :: "'L tree set"
  assumes A: "Lregular_languages A"
  obtains TA::"(nat,'L) tree_automaton_rec" where 
    "L=ta_lang TA"
    "complete_tree_automaton TA A"
proof -
  from rtlE[OF A] obtain TA :: "(nat,'L) tree_automaton_rec" where
    [simp, symmetric]: "L = ta_lang TA" and
        RT: "ranked_tree_automaton TA A" .

  interpret ta: ranked_tree_automaton TA A using RT .

  obtain TAC :: "(nat set option,'L) tree_automaton_rec" 
    where [simp]: "ta_lang TAC = L" and CT: "complete_tree_automaton TAC A"
    by (rule_tac ta.obtain_complete) auto
  
  interpret tac: complete_tree_automaton TAC A using CT .

  ― ‹Obtain injective mapping from the finite set of states to the 
      natural numbers›
  from finite_imp_inj_to_nat_seg[OF tac.finite_states] 
  obtain f :: "nat set option  nat" where
    INJMAP: "inj_on f (ta_rstates TAC)" by blast
  ― ‹Remap automaton. The language remains the same.›
  from tac.remap_lang[OF INJMAP] have LE: "ta_lang (ta_remap f TAC) = L" 
    by simp
  have "complete_tree_automaton (ta_remap f TAC) A" 
    using tac.remap_cta[OF INJMAP] .
  thus ?thesis by (rule_tac that[OF LE[symmetric]])
qed

subsubsection "Closure Properties"
text ‹
  In this section, we derive the standard closure properties of regular languages,
  i.e. that regular languages are closed under union, intersection, complement, 
  and difference, as well as that the empty and the universal language are
  regular.
  
  Note that we do not formalize homomorphisms or tree transducers here.
›
  
theorem (in finite_alphabet) rtl_empty[simp, intro!]: "{}  regular_languages A"
  by (rule ranked_tree_automaton.rtlI[OF ta_empty_rta, simplified])


theorem rtl_union_closed: 
  " L1regular_languages A; L2regular_languages A  
     L1L2  regular_languages A"
proof (elim rtlE)
  fix TA1 TA2
  assume TA[simp]: "ranked_tree_automaton TA1 A" "ranked_tree_automaton TA2 A"
    and [simp]: "L1=ta_lang TA1" "L2=ta_lang TA2"


  interpret ta1: ranked_tree_automaton TA1 A by simp
  interpret ta2: ranked_tree_automaton TA2 A by simp

  have "ta_lang (ta_union_wrap TA1 TA2) = ta_lang TA1  ta_lang TA2" 
    apply (rule ta_union_wrap_correct)
    by unfold_locales
  with ranked_tree_automaton.rtlI[OF ta_union_wrap_rta[OF TA]] show ?thesis
    by (simp)

qed
   
theorem rtl_inter_closed: 
  "L1regular_languages A; L2regular_languages A  
    L1L2  regular_languages A" 
proof (elim rtlE, goal_cases)
  case (1 TA1 TA2)
  with ta_prod_correct[of TA1 TA2] ta_prod_rta[of TA1 A TA2] have 
     L: "ta_lang (ta_prod TA1 TA2) = L1L2"  and
     A: "ranked_tree_automaton (ta_prod TA1 TA2) A"
    by (simp_all add: ranked_tree_automaton.axioms)
  show ?thesis using ranked_tree_automaton.rtlI[OF A]
    by (simp add: L)
qed

theorem rtl_complement_closed:
  "Lregular_languages A  ranked_trees A - L  regular_languages A"
proof (elim rtlE_complete, goal_cases)
  case prems: (1 TA)
  then interpret ta: complete_tree_automaton TA A by simp
  
  from ta.complementTA_correct have
    [simp]: "ta_lang (ta.complementTA) = ranked_trees A - ta_lang TA" and
    CTA: "complete_tree_automaton ta.complementTA A" by auto
  interpret cta: complete_tree_automaton ta.complementTA A using CTA .
  
  from cta.rtlI prems(1) show ?case by simp
qed

theorem (in finite_alphabet) rtl_univ: 
  "ranked_trees A  regular_languages A"
  using rtl_complement_closed[OF rtl_empty] 
  by simp

theorem rtl_diff_closed:
  fixes L1 :: "'L tree set"
  assumes A[simp]: "L1  regular_languages A" "L2regular_languages A" 
  shows "L1-L2  regular_languages A"
proof -
  from rtlE[OF A(1)] obtain TA1::"(nat,'L) tree_automaton_rec" where
    L1: "L1=ta_lang TA1" and
    RT1: "ranked_tree_automaton TA1 A"
    .
  from rtlE[OF A(2)] obtain TA2::"(nat,'L) tree_automaton_rec" where
    L2: "L2=ta_lang TA2" and
    RT2: "ranked_tree_automaton TA2 A"
    .

  interpret ta1: ranked_tree_automaton TA1 A using RT1 .
  interpret ta2: ranked_tree_automaton TA2 A using RT2 .

  from ta1.lang_is_ranked have ALT: "L1-L2 = L1  (ranked_trees A - L2)"
    by (auto simp add: L1 L2)

  show ?thesis
    unfolding ALT
    by (simp add: rtl_complement_closed rtl_inter_closed)
qed


lemmas rtl_closed = finite_alphabet.rtl_empty finite_alphabet.rtl_univ 
  rtl_complement_closed
  rtl_inter_closed rtl_union_closed rtl_diff_closed


end