Theory Earley

theory Earley
  imports
    Derivations
begin

section ‹Slices›

fun slice :: "'a list  nat  nat  'a list" where
  "slice [] _ _ = []"
| "slice (x#xs) _ 0 = []"
| "slice (x#xs) 0 (Suc b) = x # slice xs 0 b"
| "slice (x#xs) (Suc a) (Suc b) = slice xs a b"

syntax
  slice :: "'a list  nat  nat  'a list" (‹__'/_ [1000,0,0] 1000)

notation (latex output)
  "slice" (‹__'/_ [1000,0,0] 1000)

lemma slice_drop_take:
  "slice xs a b = drop a (take b xs)"
  by (induction xs a b rule: slice.induct) auto

lemma slice_append_aux:
  "Suc b  c  slice (x#xs) (Suc b) c = slice xs b (c-1)"
  using Suc_le_D by fastforce

lemma slice_concat:
  "a  b  b  c  slice xs a b @ slice xs b c = slice xs a c"
proof (induction xs a b arbitrary: c rule: slice.induct)
  case (3 b x xs)
  then show ?case
      using Suc_le_D by(fastforce simp: slice_append_aux)
qed (auto simp: slice_append_aux)

lemma slice_concat_Ex:
  "a  c  slice xs a c = ys @ zs  b. ys = slice xs a b  zs = slice xs b c  a  b  b  c"
proof (induction xs a c arbitrary: ys zs rule: slice.induct)
  case (3 x xs b)
  show ?case
  proof (cases ys)
    case Nil
    then obtain zs' where "x # slice xs 0 b = x # zs'" "x # zs' = zs"
      using "3.prems"(2) by auto
    thus ?thesis
      using Nil by force
  next
    case (Cons y ys')
    then obtain ys' where "x # slice xs 0 b = x # ys' @ zs" "x # ys' = ys"
      using "3.prems"(2) by auto
    thus ?thesis
      using "3.IH"[of ys' zs] by force
  qed
next
  case (4 a b x xs)
  thus ?case
    by (auto, metis slice.simps(4) Suc_le_mono)
qed auto

lemma slice_nth:
  "a < length xs  slice xs a (a+1) = [xs!a]"
  unfolding slice_drop_take
  by (metis Cons_nth_drop_Suc One_nat_def diff_add_inverse drop_take take_Suc_Cons take_eq_Nil)

lemma slice_append_nth:
  "a  b  b < length xs  slice xs a b @ [xs!b] = slice xs a (b+1)"
  by (metis le_add1 slice_concat slice_nth)

lemma slice_empty:
  "b  a  slice xs a b = []"
  by (simp add: slice_drop_take)

lemma slice_id[simp]:
  "slice xs 0 (length xs) = xs"
  by (simp add: slice_drop_take)

lemma slice_singleton:
  "b  length xs  [x] = slice xs a b  b = a + 1"
  by (induction xs a b rule: slice.induct) (auto simp: slice_drop_take)


section ‹Earley recognizer›

subsection ‹Earley items›

definition lhs_rule :: "'a rule  'a" where
  "lhs_rule  fst"

definition rhs_rule :: "'a rule  'a list" where
  "rhs_rule  snd"

datatype 'a item = 
  Item (rule_item: "'a rule") (dot_item : nat) (start_item : nat) (end_item : nat)

definition lhs_item :: "'a item  'a" where
  "lhs_item x  lhs_rule (rule_item x)"

definition rhs_item :: "'a item  'a list" where
  "rhs_item x  rhs_rule (rule_item x)"

definition α_item :: "'a item  'a list" where
  "α_item x  take (dot_item x) (rhs_item x)"

definition β_item :: "'a item  'a list" where 
  "β_item x  drop (dot_item x) (rhs_item x)"

definition is_complete :: "'a item  bool" where
  "is_complete x  dot_item x  length (rhs_item x)"

definition next_symbol :: "'a item  'a option" where
  "next_symbol x  if is_complete x then None else Some (rhs_item x ! dot_item x)"

lemmas item_defs = lhs_item_def rhs_item_def α_item_def β_item_def lhs_rule_def rhs_rule_def

definition is_finished :: "'a cfg  'a list  'a item  bool" where
  "is_finished 𝒢 ω x 
    lhs_item x = 𝔖 𝒢 
    start_item x = 0 
    end_item x = length ω  
    is_complete x"

definition recognizing :: "'a item set  'a cfg  'a list  bool" where
  "recognizing I 𝒢 ω  x  I. is_finished 𝒢 ω x"

inductive_set Earley :: "'a cfg  'a list  'a item set"
  for 𝒢 :: "'a cfg" and ω :: "'a list" where
    Init: "r  set ( 𝒢)  fst r = 𝔖 𝒢 
      Item r 0 0 0  Earley 𝒢 ω"
  | Scan: "x = Item r b i j  x  Earley 𝒢 ω 
    ω!j = a  j < length ω  next_symbol x = Some a 
      Item r (b + 1) i (j + 1)  Earley 𝒢 ω"
  | Predict: "x = Item r b i j  x  Earley 𝒢 ω 
    r'  set ( 𝒢)  next_symbol x = Some (lhs_rule r') 
      Item r' 0 j j  Earley 𝒢 ω"
  | Complete: "x = Item rx bx i j  x  Earley 𝒢 ω  y = Item ry by j k  y  Earley 𝒢 ω 
      is_complete y  next_symbol x = Some (lhs_item y) 
        Item rx (bx + 1) i k  Earley 𝒢 ω"


subsection ‹Well-formedness›

definition wf_item :: "'a cfg  'a list => 'a item  bool" where 
  "wf_item 𝒢 ω x 
    rule_item x  set ( 𝒢)  
    dot_item x  length (rhs_item x) 
    start_item x  end_item x  
    end_item x  length ω"

lemma wf_Init:
  assumes "r  set ( 𝒢)" "fst r = 𝔖 𝒢"
  shows "wf_item 𝒢 ω (Item r 0 0 0)"
  using assms unfolding wf_item_def by simp

lemma wf_Scan:
  assumes "x = Item r b i j" "wf_item 𝒢 ω x" "ω!j = a" "j < length ω" "next_symbol x = Some a"
  shows "wf_item 𝒢 ω (Item r (b + 1) i (j+1))"
  using assms unfolding wf_item_def by (auto simp: item_defs is_complete_def next_symbol_def split: if_splits)

lemma wf_Predict:
  assumes "x = Item r b i j" "wf_item 𝒢 ω x" "r'  set ( 𝒢)" "next_symbol x = Some (lhs_rule r')"
  shows "wf_item 𝒢 ω (Item r' 0 j j)"
  using assms unfolding wf_item_def by simp

lemma wf_Complete:
  assumes "x = Item rx bx i j" "wf_item 𝒢 ω x" "y = Item ry by j k" "wf_item 𝒢 ω y"
  assumes "is_complete y" "next_symbol x = Some (lhs_item y)"
  shows "wf_item 𝒢 ω (Item rx (bx + 1) i k)"
  using assms unfolding wf_item_def is_complete_def next_symbol_def rhs_item_def
  by (auto split: if_splits)

lemma wf_Earley:
  assumes "x  Earley 𝒢 ω"
  shows "wf_item 𝒢 ω x"
  using assms wf_Init wf_Scan wf_Predict wf_Complete
  by (induction rule: Earley.induct) fast+


subsection ‹Soundness›

definition sound_item :: "'a cfg  'a list  'a item  bool" where
  "sound_item 𝒢 ω x  𝒢  [lhs_item x] * (slice ω (start_item x) (end_item x) @ β_item x)"

lemma sound_Init:
  assumes "r  set ( 𝒢)" "fst r = 𝔖 𝒢"
  shows "sound_item 𝒢 ω (Item r 0 0 0)"
proof -
  let ?x = "Item r 0 0 0"
  have "(lhs_item ?x, β_item ?x)  set ( 𝒢)"
    using assms(1) by (simp add: item_defs)
  hence "derives 𝒢 [lhs_item ?x] (β_item ?x)"
    using derives_if_valid_rule by metis
  thus "sound_item 𝒢 ω ?x"
    unfolding sound_item_def by (simp add: slice_empty)
qed

lemma sound_Scan:
  assumes "x = Item r b i j" "wf_item 𝒢 ω x" "sound_item 𝒢 ω x"
  assumes "ω!j = a" "j < length ω" "next_symbol x = Some a"
  shows "sound_item 𝒢 ω (Item r (b+1) i (j+1))"
proof -
  define x' where [simp]: "x' = Item r (b+1) i (j+1)"
  obtain β_item' where *: "β_item x = a # β_item'" "β_item x' = β_item'"
    using assms(1,6) apply (auto simp: item_defs next_symbol_def is_complete_def split: if_splits)
    by (metis Cons_nth_drop_Suc leI)
  have "slice ω i j @ β_item x = slice ω i (j+1) @ β_item'"
    using * assms(1,2,4,5) by (auto simp: slice_append_nth wf_item_def)
  moreover have "derives 𝒢 [lhs_item x] (slice ω i j @ β_item x)"
    using assms(1,3) sound_item_def by force
  ultimately show ?thesis
    using assms(1) * by (auto simp: item_defs sound_item_def)
qed

lemma sound_Predict:
  assumes "x = Item r b i j" "wf_item 𝒢 ω x" "sound_item 𝒢 ω x"
  assumes "r'  set ( 𝒢)" "next_symbol x = Some (lhs_rule r')"
  shows "sound_item 𝒢 ω (Item r' 0 j j)"
  using assms by (auto simp: sound_item_def derives_if_valid_rule slice_empty item_defs)

lemma sound_Complete:
  assumes "x = Item rx bx i j" "wf_item 𝒢 ω x" "sound_item 𝒢 ω x"
  assumes "y = Item ry by j k" "wf_item 𝒢 ω y" "sound_item 𝒢 ω y"
  assumes "is_complete y" "next_symbol x = Some (lhs_item y)"
  shows "sound_item 𝒢 ω (Item rx (bx + 1) i k)"
proof -
  have "derives 𝒢 [lhs_item y] (slice ω j k)"
    using assms(4,6,7) by (auto simp: sound_item_def is_complete_def item_defs)
  then obtain E where E: "Derivation 𝒢 [lhs_item y] E (slice ω j k)"
    using derives_implies_Derivation by blast
  have "derives 𝒢 [lhs_item x] (slice ω i j @ β_item x)"
    using assms(1,3,4) by (auto simp: sound_item_def)
  moreover have 0: "β_item x = (lhs_item y) # tl (β_item x)"
    using assms(8) apply (auto simp: next_symbol_def is_complete_def item_defs split: if_splits)
    by (metis drop_eq_Nil hd_drop_conv_nth leI list.collapse)
  ultimately obtain D where D: 
    "Derivation 𝒢 [lhs_item x] D (slice ω i j @ [lhs_item y] @ (tl (β_item x)))"
    using derives_implies_Derivation by (metis append_Cons append_Nil)
  obtain F where F:
    "Derivation 𝒢 [lhs_item x] F (slice ω i j @ slice ω j k @ tl (β_item x))"
    using Derivation_append_rewrite D E
    by metis
  moreover have "i  j"
    using assms(1,2) wf_item_def by force
  moreover have "j  k"
    using assms(4,5) wf_item_def by force
  ultimately have "derives 𝒢 [lhs_item x] (slice ω i k @ tl (β_item x))"
    by (metis Derivation_implies_derives append.assoc slice_concat)
  thus "sound_item 𝒢 ω (Item rx (bx + 1) i k)"
    using assms(1,4) by (auto simp: sound_item_def item_defs drop_Suc tl_drop)
qed

lemma sound_Earley:
  assumes "x  Earley 𝒢 ω" "wf_item 𝒢 ω x"
  shows "sound_item 𝒢 ω x"
  using assms
proof (induction rule: Earley.induct)
  case (Init r)
  thus ?case
    using sound_Init by blast
next
  case (Scan x r b i j a)
  thus ?case
    using wf_Earley sound_Scan by fast
next
  case (Predict x r b i j r')
  thus ?case
    using wf_Earley sound_Predict by blast
next
  case (Complete x rx bx i j y ry by k)
  thus ?case
    using wf_Earley sound_Complete by metis
qed

theorem soundness_Earley:
  assumes "recognizing (Earley 𝒢 ω) 𝒢 ω"
  shows "𝒢  [𝔖 𝒢] * ω"
proof -
  obtain x where x: "x  Earley 𝒢 ω" "is_finished 𝒢 ω x"
    using assms recognizing_def by blast
  hence "sound_item 𝒢 ω x"
    using wf_Earley sound_Earley by blast
  thus ?thesis
    unfolding sound_item_def using x by (auto simp: is_finished_def is_complete_def item_defs)
qed


subsection ‹Completeness›

definition partially_completed :: "nat  'a cfg  'a list  'a item set  ('a derivation  bool)  bool" where
  "partially_completed k 𝒢 ω I P  r b i' i j x a D.
    i  j  j  k  k  length ω 
    x = Item r b i' i  x  I  next_symbol x = Some a 
    Derivation 𝒢 [a] D (slice ω i j)  P D 
    Item r (b+1) i' j  I"

lemma partially_completed_upto:
  assumes "j  k" "k  length ω"
  assumes "x = Item (N,α) d i j" "x  I" "x  I. wf_item 𝒢 ω x"
  assumes "Derivation 𝒢 (β_item x) D (slice ω j k)"
  assumes "partially_completed k 𝒢 ω I (λD'. length D'  length D)"
  shows "Item (N,α) (length α) i k  I"
  using assms
proof (induction "β_item x" arbitrary: d i j k N α x D)
  case Nil
  have "α_item x = α"
    using Nil(1,4) unfolding α_item_def β_item_def rhs_item_def rhs_rule_def by simp
  hence "x = Item (N,α) (length α) i j"
    using Nil.hyps Nil.prems(3-5) unfolding wf_item_def item_defs by auto
  have "Derivation 𝒢 [] D (slice ω j k)"
    using Nil.hyps Nil.prems(6) by auto
  hence "slice ω j k = []"
    using Derivation_from_empty by blast
  hence "j = k"
    unfolding slice_drop_take using Nil.prems(1,2) by simp
  thus ?case
    using x = Item (N, α) (length α) i j Nil.prems(4) by blast
next
  case (Cons b bs)
  obtain j' E F where *: 
    "Derivation 𝒢 [b] E (slice ω j j')"
    "Derivation 𝒢 bs F (slice ω j' k)"
    "j  j'" "j'  k" "length E  length D" "length F  length D"
    using Derivation_concat_split[of 𝒢 "[b]" bs D "slice ω j k"] slice_concat_Ex
    using Cons.hyps(2) Cons.prems(1,6)
    by (smt (verit, ccfv_threshold) Cons_eq_appendI append_self_conv2)
  have "next_symbol x = Some b"
    using Cons.hyps(2) unfolding item_defs(4) next_symbol_def is_complete_def by (auto, metis nth_via_drop)
  hence "Item (N, α) (d+1) i j'  I"
    using Cons.prems(7) unfolding partially_completed_def
    using Cons.prems(2,3,4) *(1,3-5) by blast
  moreover have "partially_completed k 𝒢 ω I (λD'. length D'  length F)"
    using Cons.prems(7) *(6) unfolding partially_completed_def by fastforce
  moreover have "bs = β_item (Item (N,α) (d+1) i j')"
    using Cons.hyps(2) Cons.prems(3) unfolding item_defs(4) rhs_item_def 
    by (auto, metis List.list.sel(3) drop_Suc drop_tl)
  ultimately show ?case
    using Cons.hyps(1) *(2,4) Cons.prems(2,3,5) wf_item_def by blast
qed

lemma partially_completed_Earley:
  "partially_completed k 𝒢 ω (Earley 𝒢 ω) (λ_. True)"
unfolding partially_completed_def
proof (intro allI impI)
  fix r b i' i j x a D
  assume
    "i  j  j  k  k  length ω 
     x = Item r b i' i  x  Earley 𝒢 ω 
     next_symbol x = Some a 
     Derivation 𝒢 [a] D (slice ω i j)  True"
  thus "Item r (b + 1) i' j  Earley 𝒢 ω"
  proof (induction "length D" arbitrary: r b i' i j x a D rule: nat_less_induct)
    case 1
    show ?case
    proof cases
      assume "D = []"
      hence "[a] = slice ω i j"
        using "1.prems" by force
      moreover have "j  length ω"
        using le_trans "1.prems" by blast
      ultimately have "j = i+1"
        using slice_singleton by metis
      hence "i < length ω"
        using j  length ω by simp
      hence "ω!i = a"
        using slice_nth [a] = slice ω i j j = i + 1 by fastforce
      hence "Item r (b + 1) i' j  Earley 𝒢 ω"
        using Earley.Scan "1.prems" i < length ω j = i + 1 by metis
      thus ?thesis
        by (simp add: j = i + 1)
    next
      assume "¬ D = []"
      then obtain d D' where "D = d # D'"
        by (meson List.list.exhaust)
      then obtain α where *: "Derives1 𝒢 [a] (fst d) (snd d) α" "Derivation 𝒢 α D' (slice ω i j)"
        using "1.prems" by auto
      hence rule: "(a, α)  set ( 𝒢)" "fst d = 0" "snd d = (a ,α)"
        using *(1) unfolding Derives1_def by (simp add: Cons_eq_append_conv)+
        define y where y_def: "y = Item (a ,α) 0 i i"
        have "length D' < length D"
          using D = d # D' by fastforce
        hence "partially_completed k 𝒢 ω (Earley 𝒢 ω) (λE. length E  length D')"
          unfolding partially_completed_def using "1.hyps" order_le_less_trans by (smt (verit, best))
        hence "partially_completed j 𝒢 ω (Earley 𝒢 ω) (λE. length E  length D')"
          unfolding partially_completed_def using "1.prems" by force
        moreover have "Derivation 𝒢 (β_item y) D' (slice ω i j)"
          using *(2) by (auto simp: item_defs y_def)
        moreover have "y  Earley 𝒢 ω"
          using y_def "1.prems" rule by (auto simp: item_defs Earley.Predict)
        moreover have "j  length ω"
          using "1.prems" by simp
        ultimately have "Item (a,α) (length α) i j  Earley 𝒢 ω"
          using partially_completed_upto "1.prems" wf_Earley y_def by metis
        moreover have x: "x = Item r b i' i" "x  Earley 𝒢 ω"
          using "1.prems" by blast+
        moreover have "next_symbol x = Some a"
          using "1.prems" by linarith
        ultimately show ?thesis
          using Earley.Complete[OF x] by (auto simp: is_complete_def item_defs)
      qed
    qed
  qed

theorem completeness_Earley:
  assumes "𝒢  [𝔖 𝒢] * ω" "is_word 𝒢 ω"
  shows "recognizing (Earley 𝒢 ω) 𝒢 ω"
proof -
  obtain α D where *: "(𝔖 𝒢 ,α)  set ( 𝒢)" "Derivation 𝒢 α D ω"
    using Derivation_𝔖1 assms derives_implies_Derivation by metis
  define x where x_def: "x = Item (𝔖 𝒢, α) 0 0 0"
  have "partially_completed (length ω) 𝒢 ω (Earley 𝒢 ω) (λ_. True)"
    using assms(2) partially_completed_Earley by blast
  hence 0: "partially_completed (length ω) 𝒢 ω (Earley 𝒢 ω) (λD'. length D'  length D)"
    unfolding partially_completed_def by blast
  have 1: "x  Earley 𝒢 ω"
    using x_def Earley.Init *(1) by fastforce
  have 2: "Derivation 𝒢 (β_item x) D (slice ω 0 (length ω))"
    using *(2) x_def by (simp add: item_defs)
  have "Item (𝔖 𝒢,α) (length α) 0 (length ω)  Earley 𝒢 ω"
    using partially_completed_upto[OF _ _ _ _ _ 2 0] wf_Earley 1 x_def by auto
  then show ?thesis
    unfolding recognizing_def is_finished_def by (auto simp: is_complete_def item_defs, force)
qed


subsection ‹Correctness›

theorem correctness_Earley:
  assumes "is_word 𝒢 ω"
  shows "recognizing (Earley 𝒢 ω) 𝒢 ω  𝒢  [𝔖 𝒢] * ω"
  using assms soundness_Earley completeness_Earley by blast


subsection ‹Finiteness›

lemma finiteness_empty:
  "set ( 𝒢) = {}  finite { x | x. wf_item 𝒢 ω x }"
  unfolding wf_item_def by simp

fun item_intro :: "'a rule × nat × nat × nat  'a item" where
  "item_intro (rule, dot, origin, ends) = Item rule dot origin ends" 

lemma finiteness_nonempty:
  assumes "set ( 𝒢)  {}"
  shows "finite { x. wf_item 𝒢 ω x }"
proof -
  define M where "M = Max { length (rhs_rule r) | r. r  set ( 𝒢) }"
  define Top where "Top = (set ( 𝒢) × {0..M} × {0..length ω} × {0..length ω})"
  hence "finite Top"
    using finite_cartesian_product finite by blast
  have "inj_on item_intro Top"
    unfolding Top_def inj_on_def by simp
  hence "finite (item_intro ` Top)"
    using finite_image_iff finite Top by auto
  have "{ x | x. wf_item 𝒢 ω x }  item_intro ` Top"
  proof standard
    fix x
    assume "x  { x | x. wf_item 𝒢 ω x }"
    then obtain rule dot origin endp where *: "x = Item rule dot origin endp"
      "rule  set ( 𝒢)" "dot  length (rhs_item x)" "origin  length ω" "endp  length ω"
      unfolding wf_item_def using item.exhaust_sel le_trans by blast
    hence "length (rhs_rule rule)  { length (rhs_rule r) | r. r  set ( 𝒢) }"
      using *(1,2) rhs_item_def by blast
    moreover have "finite { length (rhs_rule r) | r. r  set ( 𝒢) }"
      using finite finite_image_set[of "λx. x  set ( 𝒢)"] by fastforce
    ultimately have "M  length (rhs_rule rule)"
      unfolding M_def by simp
    hence "dot  M"
      using *(1,3) rhs_item_def by (metis item.sel(1) le_trans)
    hence "(rule, dot, origin, endp)  Top"
      using *(2,4,5) unfolding Top_def by simp
    thus "x  item_intro ` Top"
      using *(1) by force
  qed
  thus ?thesis
    using finite (item_intro ` Top) rev_finite_subset by auto
qed

lemma finiteness_UNIV_wf_item:
  "finite { x. wf_item 𝒢 ω x }"
  using finiteness_empty finiteness_nonempty by fastforce

theorem finiteness_Earley:
  "finite (Earley 𝒢 ω)"
  using finiteness_UNIV_wf_item wf_Earley rev_finite_subset by (metis mem_Collect_eq subsetI)

end