Theory Tseytin

subsubsection‹Tseytin transformation›

theory Tseytin
imports Formulas CNF_Formulas
begin

text‹The @{const cnf} transformation clearly has exponential complexity.
If the intention is to use Resolution to decide validity of a formula,
that is clearly a deal-breaker for any practical implementation,
since validity can be decided by brute force in exponential time.
This theory pair shows the Tseytin transformation, a way to transform a formula
while preserving validity.
The @{const cnf} of the transformed formula will have clauses with maximally 3 atoms,
and an amount of clauses linear in the size of the formula,
at the cost of introducing one new atom for each subformula of ‹F› (i.e. @{term "size F"} many).
›
  
definition "pair_fun_upd f p ≡ (case p of (k,v) ⇒ fun_upd f k v)"

lemma fold_pair_upd_triv: "A ∉ fst ` set U ⟹ foldl pair_fun_upd F U A = F A"
  by(induction U arbitrary: F; simp)
    (metis fun_upd_apply pair_fun_upd_def prod.simps(2) surjective_pairing)

lemma distinct_pair_update_one: "(k,v) ∈ set U ⟹ distinct (map fst U) ⟹ foldl pair_fun_upd F U k = v"
  by(induction U arbitrary: F; clarsimp simp add: pair_fun_upd_def fold_pair_upd_triv split: prod.splits) 
    (insert fold_pair_upd_triv, fastforce)
    
lemma distinct_zipunzip: "distinct xs ⟹ distinct (map fst (zip xs ys))" by (simp add: distinct_conv_nth)
    
(* hm, this foldl pair_fun_upd business might have been a bad idea. *)
lemma foldl_pair_fun_upd_map_of: "distinct (map fst U) ⟹ foldl pair_fun_upd F U = (λk. case map_of U k of Some v ⇒ v | None ⇒ F k)"
  by(unfold fun_eq_iff; induction U arbitrary: F; clarsimp split: option.splits simp: pair_fun_upd_def rev_image_eqI)

lemma map_of_map_apsnd: "map_of (map (apsnd t) M) = map_option t ∘ (map_of M)"
  by(unfold fun_eq_iff comp_def; induction M; simp)
    

definition biimp (infix ‹❙↔› 67) where "F ❙↔ G ≡ (F ❙→ G) ❙∧ (G ❙→ F)"
lemma atoms_biimp[simp]: "atoms (F ❙↔ G) = atoms F ∪ atoms G"
  unfolding biimp_def by auto
lemma biimp_size[simp]: "size (F ❙↔ G) = (2 * (size F + size G)) + 3"
  by(simp add: biimp_def)
  
locale freshstuff =
  fixes fresh :: "'a set ⇒ 'a"
  assumes isfresh: "finite S ⟹ fresh S ∉ S"
begin
  
primrec nfresh where
"nfresh S 0 = []" |
"nfresh S (Suc n) = (let f = fresh S in f # nfresh (f ▹ S) n)"

lemma length_nfresh: "length (nfresh S n) = n"
  by(induction n arbitrary: S; simp add: Let_def)
    
lemma nfresh_isfresh: "finite S ⟹ set (nfresh S n) ∩ S = {}"
  by(induction n arbitrary: S; auto simp add: Let_def isfresh)
    
lemma nfresh_distinct: "finite S ⟹ distinct (nfresh S n)"
  by(induction n arbitrary: S; simp add: Let_def; meson disjoint_iff_not_equal finite_insert insertI1 nfresh_isfresh)

definition "tseytin_assmt F ≡ let SF = remdups (subformulae F) in zip (nfresh (atoms F) (length SF)) SF"

lemma tseytin_assmt_distinct: "distinct (map fst (tseytin_assmt F))"
  unfolding tseytin_assmt_def using nfresh_distinct by (simp add: Let_def length_nfresh)
    
lemma tseytin_assmt_has: "G ∈ set (subformulae F) ⟹ ∃n. (n,G) ∈ set (tseytin_assmt F)"
proof -
  assume "G ∈ set (subformulae F)"
  then have "∃n. G = subformulae F ! n ∧ n < length (subformulae F)"
    by (simp add: set_conv_nth)
  then have "∃a. (a, G) ∈ set (zip (nfresh (atoms F) (length (subformulae F))) (subformulae F))"
    by (metis (no_types) in_set_zip length_nfresh prod.sel(1) prod.sel(2))
  thus ?thesis by(simp add: tseytin_assmt_def Let_def) (metis fst_conv in_set_conv_nth in_set_zip length_nfresh set_remdups snd_conv)
qed

lemma tseytin_assmt_new_atoms: "(k,l) ∈ set (tseytin_assmt F) ⟹ k ∉ atoms F"
  unfolding tseytin_assmt_def Let_def using nfresh_isfresh by (fastforce dest: set_zip_leftD)

primrec tseytin_tran1 where
"tseytin_tran1 S (Atom k) = [Atom k ❙↔ S (Atom k)]" |
"tseytin_tran1 S ⊥ = [⊥ ❙↔ S ⊥]" |
"tseytin_tran1 S (Not F) = [S (Not F) ❙↔ Not (S F)] @ tseytin_tran1 S F" |
"tseytin_tran1 S (And F G) = [S (And F G) ❙↔ And (S F) (S G)] @ tseytin_tran1 S F @ tseytin_tran1 S G" |
"tseytin_tran1 S (Or F G) = [S (Or F G) ❙↔ Or (S F) (S G)] @ tseytin_tran1 S F @ tseytin_tran1 S G" |
"tseytin_tran1 S (Imp F G) = [S (Imp F G) ❙↔ Imp (S F) (S G)] @ tseytin_tran1 S F @ tseytin_tran1 S G"
definition "tseytin_toatom F ≡ Atom ∘ the ∘ map_of (map (λ(a,b). (b,a)) (tseytin_assmt F))"
definition "tseytin_tran F ≡ ❙⋀(let S = tseytin_toatom F in S F # tseytin_tran1 S F)"
  
lemma distinct_snd_tseytin_assmt: "distinct (map snd (tseytin_assmt F))" 
  unfolding tseytin_assmt_def by(simp add: Let_def length_nfresh)

lemma tseytin_assmt_backlookup: assumes "J ∈ set (subformulae F)" 
  shows "(the (map_of (map (λ(a, b). (b, a)) (tseytin_assmt F)) J), J) ∈ set (tseytin_assmt F)"
proof -
  have 1: "distinct (map snd M) ⟹ J ∈ snd ` set M ⟹ (the (map_of (map (λ(a, b). (b, a)) M) J), J) ∈ set M" for J M
    by(induction M; clarsimp split: prod.splits)
  have 2: "J ∈ set (subformulae F) ⟹ J ∈ snd ` set (tseytin_assmt F)" for J using image_iff tseytin_assmt_has by fastforce
  from 1[OF distinct_snd_tseytin_assmt 2, OF assms] show ?thesis .
qed

lemma tseytin_tran_small_clauses: "∀C ∈ cnf (nnf (tseytin_tran F)). card C ≤ 3"
proof -
  have 3: "card S ≤ 2 ⟹ card (a ▹ S) ≤ 3" for a S 
    by(cases "finite S"; simp add: card_insert_le_m1)
  have 2: "card S ≤ 1 ⟹ card (a ▹ S) ≤ 2" for a S 
    by(cases "finite S"; simp add: card_insert_le_m1)
  have 1: "card S ≤ 0 ⟹ card (a ▹ S) ≤ 1" for a S 
    by(cases "finite S"; simp add: card_insert_le_m1)
  have *: "⟦G ∈ set (tseytin_tran1 (Atom ∘ S) F); C ∈ cnf (nnf G)⟧ ⟹ card C ≤ 3" for G C S
    by(induction F arbitrary: G; simp add: biimp_def; (elim disjE exE conjE | intro 1 2 3 | simp)+)
  show ?thesis
    unfolding tseytin_tran_def  tseytin_toatom_def Let_def
    by(clarsimp simp add: cnf_BigAnd nnf_BigAnd comp_assoc *)
qed
  
lemma tseytin_tran_few_clauses: "card (cnf (nnf (tseytin_tran F))) ≤ 3 * size F + 1"
proof -
  have "size Bot = 1" by simp  (* just a thing to keep in mind. *)
  have ws: "{c ▹ D |D. D = {c1} ∨ D = {c2}} = {{c,c1},{c,c2}}" for c1 c2 c by auto
  have grr: "Suc (card S) ≤ c ⟹ card (a ▹ S) ≤ c" for a S c
    by(cases "finite S"; simp add: card_insert_le_m1)
  have *: "card (⋃a∈set (tseytin_tran1 (Atom ∘ S) F). cnf (nnf a)) ≤ 3 * size F" for S
    by(induction F; simp add: biimp_def; ((intro grr card_Un_le[THEN le_trans] | simp add: ws)+)?) 
  show ?thesis
    unfolding tseytin_tran_def tseytin_toatom_def Let_def
    by(clarsimp simp: nnf_BigAnd cnf_BigAnd; intro grr; simp add: comp_assoc *)
qed
  
lemma tseytin_tran_new_atom_count: "card (atoms (tseytin_tran F)) ≤ size F + card (atoms F)"
proof -
  have tseytin_tran1_atoms: "H ∈ set (tseytin_tran1 (tseytin_toatom F) G) ⟹ G ∈ set (subformulae F) ⟹
    atoms H ⊆ atoms F ∪ (⋃I ∈ set (subformulae F). atoms (tseytin_toatom F I))" for G H
  proof(induction G arbitrary: H)
    case (Atom k)
    hence "k ∈ atoms F"
      by simp (meson formula.set_intros(1) rev_subsetD subformula_atoms)
    with Atom show ?case by simp blast
  next
    case Bot then show ?case by simp blast
  next
    case (Not G)
    show ?case by(insert Not.prems(1,2); 
         frule subformulas_in_subformulas; simp; elim disjE; (elim Not.IH | force))
  next
    case (And G1 G2)
    show ?case by(insert And.prems(1,2); 
         frule subformulas_in_subformulas; simp; elim disjE; (elim And.IH; simp | force))
  next
    case (Or G1 G2)
    show ?case by(insert Or.prems(1,2); 
         frule subformulas_in_subformulas; simp; elim disjE; (elim Or.IH; simp | force))
  next
    case (Imp G1 G2)
    show ?case by(insert Imp.prems(1,2); 
         frule subformulas_in_subformulas; simp; elim disjE; (elim Imp.IH; simp | force))
  qed
  have tseytin_tran1_atoms: "(⋃G∈set (tseytin_tran1 (tseytin_toatom F) F). atoms G) ⊆
      atoms F ∪ (⋃I∈set (subformulae F). atoms (tseytin_toatom F I))"
    using tseytin_tran1_atoms[OF _ subformulae_self] by blast
  have 1: "card (atoms (tseytin_tran F)) ≤
  card (atoms (tseytin_toatom F F) ∪ (⋃x∈set (tseytin_tran1 (tseytin_toatom F) F). atoms x))" 
    unfolding tseytin_tran_def by(simp add: Let_def tseytin_tran1_atoms)
  have 2: "atoms (tseytin_toatom F F) ∪ (⋃x∈set (tseytin_tran1 (tseytin_toatom F) F). atoms x) ⊆
     (atoms F ∪ (⋃I∈set (subformulae F). atoms (tseytin_toatom F I)))"
    using tseytin_tran1_atoms by blast
  have twofin: "finite (atoms F ∪ (⋃I∈set (subformulae F). atoms (tseytin_toatom F I)))" by simp
  have card_subformulae: "card (set (subformulae F)) ≤ size F" using length_subformulae by (metis card_length)
  have card_singleton_union: "finite S ⟹ card (⋃x∈S. {f x}) ≤ card S" for f S 
    by(induction S rule: finite_induct; simp add: card_insert_if)
  have 3: "card (⋃I∈set (subformulae F). atoms (tseytin_toatom F I)) ≤ size F"
    unfolding tseytin_toatom_def using le_trans[OF card_singleton_union card_subformulae]
    by simp fast
  have 4: "card (atoms (tseytin_tran F)) ≤ card (atoms F) + card (⋃f∈set (subformulae F). atoms (tseytin_toatom F f))"
    using le_trans[OF 1 card_mono[OF twofin 2]] card_Un_le le_trans by blast
  show ?thesis using 3 4 by linarith
qed
    
end

definition "freshnat S ≡  Suc (Max (0 ▹ S))"
primrec nfresh_natcode where
"nfresh_natcode S 0 = []" |
"nfresh_natcode S (Suc n) = (let f = freshnat S in f # nfresh_natcode (f ▹ S) n)"
interpretation freshnats: freshstuff freshnat unfolding freshnat_def by standard (meson Max_ge Suc_n_not_le_n finite_insert insertCI)
(* interpreting the locale is easy. making it executable… not so. *)
lemma [code_unfold]: "freshnats.nfresh = nfresh_natcode"
proof -
  have "freshnats.nfresh S n  = nfresh_natcode S n" for S n by(induction n arbitrary: S; simp)
  thus ?thesis by auto
qed
lemmas freshnats_code[code_unfold] = freshnats.tseytin_tran_def freshnats.tseytin_toatom_def freshnats.tseytin_assmt_def freshnats.nfresh.simps
  
lemma "freshnats.tseytin_tran (Atom 0 ❙→ (❙¬ (Atom 1))) = ❙⋀[
  Atom 2,
  Atom 2 ❙↔ Atom 3 ❙→ Atom 4,
  Atom 0 ❙↔ Atom 3,
  Atom 4 ❙↔ ❙¬ (Atom 5),
  Atom 1 ❙↔ Atom 5
]" (is "?l = ?r")
proof -
  have "cnf (nnf ?r) =
    {{Pos 2},
     {Neg 4, Pos 2}, {Pos 3, Pos 2}, {Neg 2, Neg 3, Pos 4},
     {Neg 3, Pos 0}, {Neg 0, Pos 3},
     {Pos 5, Pos 4}, {Neg 4, Neg 5},
     {Neg 5, Pos 1}, {Neg 1, Pos 5}}" by eval
  have ?thesis by eval
  show ?thesis by code_simp
qed
    
end