Theory LLEarleyParsing

theory LLEarleyParsing 
imports LocalLexing
begin

datatype item = 
  Item 
    (item_rule: rule) 
    (item_dot : nat) 
    (item_origin : nat)
    (item_end : nat)

type_synonym items = "item set"

definition item_nonterminal :: "item  symbol"
where
  "item_nonterminal x = fst (item_rule x)"

definition item_rhs :: "item  sentence"
where
  "item_rhs x = snd (item_rule x)"

definition item_α :: "item  sentence"
where
  "item_α x = take (item_dot x) (item_rhs x)"

definition item_β :: "item  sentence"
where 
  "item_β x = drop (item_dot x) (item_rhs x)"

definition init_item :: "rule  nat  item"
where
  "init_item r k = Item r 0 k k"

definition is_complete :: "item  bool"
where
  "is_complete x = (item_dot x  length (item_rhs x))"

definition next_symbol :: "item  symbol option"
where
  "next_symbol x = (if is_complete x then None else Some ((item_rhs x) ! (item_dot x)))"

definition inc_item :: "item  nat  item"
where
  "inc_item x k = Item (item_rule x) (item_dot x + 1) (item_origin x) k"

definition bin :: "items  nat  items"
where
  "bin I k = { x . x  I  item_end x = k }"

context LocalLexing begin

definition Init :: "items"
where
  "Init = { init_item r 0 | r. r    fst r = 𝔖 }"

definition Predict :: "nat  items  items"
where
  "Predict k I = I   
     { init_item r k | r x. r    x  bin I k  
       next_symbol x = Some(fst r) }"

definition Complete :: "nat  items  items"
where
  "Complete k I = I  { inc_item x k | x y. 
     x  bin I (item_origin y)  y  bin I k  is_complete y  
     next_symbol x = Some (item_nonterminal y) }"

definition TokensAt :: "nat  items  token set"
where
  "TokensAt k I = { (t, s) | t s x l. x  bin I k  
     next_symbol x = Some t  is_terminal t  
     l  Lex t Doc k  s = take l (drop k Doc) }"

definition Tokens :: "nat  token set  items  token set"
where
  "Tokens k T I = Sel T (TokensAt k I)"

definition Scan :: "token set  nat  items  items"
where
  "Scan T k I = I 
     { inc_item x (k + length c) | x t c. x  bin I k  (t, c)  T 
       next_symbol x = Some t }"
      
definition π :: "nat  token set  items  items"
where
  "π k T I = 
     limit (λ I. Scan T k (Complete k (Predict k I))) I"

fun 𝒥 :: "nat  nat  items"
and  :: "nat  items"
and 𝒯 :: "nat  nat  token set"
where
  "𝒥 0 0 = π 0 {} Init"
| "𝒥 k (Suc u) = π k (𝒯 k (Suc u)) (𝒥 k u)"
| "𝒥 (Suc k) 0 = π (Suc k) {} ( k)"
| "𝒯 k 0 = {}"
| "𝒯 k (Suc u) = Tokens k (𝒯 k u) (𝒥 k u)"
| " k = natUnion (𝒥 k)"

definition  :: "items"
where
  " =  (length Doc)"

definition is_finished :: "item  bool" where
  "is_finished x = (item_nonterminal x = 𝔖  item_origin x = 0  item_end x = length Doc  
    is_complete x)"

definition earley_recognised :: "bool"
where
  "earley_recognised = ( x  . is_finished x)"

end

end