Theory Fresh_String

section ‹Fresh identifier generation for strings›

theory Fresh_String
  imports Fresh
begin

subsection ‹A partial order on strings›

text
‹The first criterion is the length, and the second the encoding of last character. ›

definition ordst :: "string  string  bool" where
"ordst X Y 
 (length X  length Y  X  []  Y  []  of_char (last X) < (of_char(last Y) :: nat))
  (length X < length Y)"

definition ordstNS :: "string  string  bool" where
"ordstNS X Y  X = Y  ordst X Y"

lemma ordst_antirefl: "¬ ordst X X"
by(auto simp add: ordst_def)

lemma ordst_trans:
assumes As1: "ordst X Y" and As2: "ordst Y Z"
shows "ordst X Z"
proof(cases "(length X < length Y)  (length Y < length Z)")
  assume "(length X < length Y)  (length Y < length Z)"
  moreover
  {assume "length X < length Y"
   moreover have "length Y  length Z"
   using As2 ordst_def by force
   ultimately have "length X < length Z" by force
   hence ?thesis using ordst_def by force}
  moreover
  {assume "length Y < length Z"
   moreover have "length X  length Y"
   using As1 ordst_def by force
   ultimately have "length X < length Z" by force
   hence ?thesis using ordst_def by force}
  ultimately show ?thesis by force
next
  assume "¬ (length X < length Y  length Y < length Z)"
  hence Ft: "¬ length X < length Y   ¬ length Y < length Z" by force
  hence "(of_char(last X) :: nat) < of_char(last Y) 
         (of_char(last Y) :: nat) < of_char(last Z) 
         length X  length Y  length Y  length Z"
  using As1 As2 ordst_def by force
  hence "(of_char(last X) :: nat) < of_char(last Z) 
         length X  length Z" by force
  moreover have "X  []  Z  []"
  using As1 As2 Ft ordst_def by force
  ultimately show ?thesis using ordst_def[of X Z] by force
qed

lemma ordstNS_refl: "ordstNS X X"
by(simp add: ordstNS_def)

lemma ordstNS_trans:
"ordstNS X Y  ordstNS Y Z  ordstNS X Z"
by (metis ordstNS_def ordst_trans)

lemma ordst_ordstNS_trans:
"ordst X Y  ordstNS Y Z  ordst X Z"
by (metis ordstNS_def ordst_trans)

lemma ordstNS_ordst_trans:
"ordstNS X Y  ordst Y Z  ordst X Z"
by (metis ordstNS_def ordst_trans)


subsection ‹Incrementing a string ›

text ‹If the last character is $\geq$ 'a' and $<$ 'z',
   then upChar› increments this last character;
   otherwise upChar› appends an 'a'.  ›

fun upChar :: "string  string" where
"upChar Y =
 (if (Y  []  of_char(last Y)  (97 :: nat) 
               of_char(last Y) < (122 :: nat))
    then (butlast Y) @
         [char_of(of_char(last Y) + (1 :: nat))]
    else Y @ ''a''
 )"

lemma upChar_ordst: "ordst Y (upChar Y)"
proof-
  {assume "¬(Y  []  of_char(last Y)  (97 :: nat)
                        of_char(last Y) < (122 :: nat))"
   hence "upChar Y = Y @ ''a''" by force
   hence ?thesis using ordst_def by force
  }
  moreover
  {assume As: "Y  []  of_char(last Y)  (97 :: nat)
                        of_char(last Y) < (122 :: nat)"
   hence Ft: "upChar Y = (butlast Y) @
                     [char_of(of_char(last Y) + (1 :: nat))]"
   by force
   hence Ft': "last (upChar Y) = char_of(of_char(last Y) + (1 :: nat))"
   by force
   hence "of_char(last (upChar Y)) mod (256 :: nat)  =
          (of_char(last Y) + 1) mod 256"
   by force
   moreover
   have "of_char(last(upChar Y))  < (256 :: nat) 
         of_char(last Y) + 1 < (256 :: nat)"
   using As Ft' by force
   ultimately
   have "of_char (last Y) < (of_char (last(upChar Y)) :: nat)" by force
   moreover
   from Ft have "length Y  length (upChar Y)" by force
   ultimately have ?thesis using ordst_def by force}
  ultimately show ?thesis by force
qed


subsection ‹The fresh-identifier operator›

text fresh Xs Y› changes Y› as little as possible so that
   it becomes disjoint from all strings in Xs›. ›

function fresh_string :: "string set  string  string"
where
Up: "Y  Xs  finite Xs  fresh_string Xs Y = fresh_string (Xs - {Y}) (upChar Y)"
|
Fresh: "Y  Xs  infinite Xs  fresh_string Xs Y = Y"
by auto
termination
  apply(relation "measure (λ(Xs,Y). card Xs)", simp_all)
  by (metis card_gt_0_iff diff_Suc_less empty_iff)

lemma fresh_string_ordstNS: "ordstNS Y (fresh_string Xs Y)"
proof (induction Xs Y rule: fresh_string.induct[case_names Up Fresh])
  case (Up Y Xs)
  hence "ordst Y (fresh_string (Xs - {Y}) (upChar Y))"
    using upChar_ordst[of Y] ordst_ordstNS_trans by force
  hence "ordstNS Y (fresh_string (Xs - {Y}) (upChar Y))"
    using ordstNS_def by auto
  thus ?case
    using Up.hyps by auto
next
  case (Fresh Y Xs)
  then show ?case
    by (auto intro: ordstNS_refl)
qed

lemma fresh_string_set: "finite Xs  fresh_string Xs Y  Xs"
proof (induction Xs Y rule: fresh_string.induct[case_names Up Fresh])
  case (Up Y Xs)
  show ?case
  proof
    assume "fresh_string Xs Y  Xs"
    then have "fresh_string (Xs - {Y}) (upChar Y)  Xs"
      using Up.hyps by force
    then have "fresh_string (Xs - {Y}) (upChar Y) = Y"
      using Up.IH finite Xs by blast
    moreover have "ordst Y (fresh_string (Xs - {Y}) (upChar Y))"
      using upChar_ordst[of Y] fresh_string_ordstNS ordst_ordstNS_trans by auto
    ultimately show False
      using ordst_antirefl by auto
  qed
qed auto

text ‹Code generation:›

lemma fresh_string_if:
  "fresh_string Xs Y = (
     if Y  Xs  finite Xs then fresh_string (Xs - {Y}) (upChar Y)
     else Y)"
  by simp

lemmas fresh_string_list[code] = fresh_string_if[where Xs = "set Xs" for Xs, simplified]

text ‹Some tests: ›

value "[fresh_string {} ''Abc'',
        fresh_string {''X'', ''Abc''} ''Abd'',
        fresh_string {''X'', ''Y''} ''Y'',
        fresh_string {''X'', ''Yaa'', ''Ya'', ''Yaa''} ''Ya'',
        fresh_string {''X'', ''Yaa'', ''Yz'', ''Yza''} ''Yz'',
        fresh_string {''X'', ''Y'', ''Yab'', ''Y''} ''Y'']"

text ‹Here we do locale interpretation rather than class instantiation,
since typstring is a type synonym for typchar list.›

interpretation fresh_string: fresh where fresh = fresh_string
  by standard (use fresh_string_set in auto)

subsection ‹Lifting to string literals›

abbreviation "is_ascii str  (c  set str. ¬digit7 c)"

lemma map_ascii_of_idem:
  "is_ascii str  map String.ascii_of str = str"
  by (induction str) (auto simp: String.ascii_of_idem)

lemma is_ascii_butlast:
  "is_ascii str  is_ascii (butlast str)"
  by (auto dest: in_set_butlastD)

lemma ascii_char_of:
  fixes c :: nat
  assumes "c < 128"
  shows "¬digit7 (char_of c)"
  using assms
  by (auto simp: char_of_def bit_iff_odd)

lemmas ascii_of_char_of_idem = ascii_char_of[THEN String.ascii_of_idem]

lemma is_ascii_upChar:
  "is_ascii str  is_ascii (upChar str)"
  by (auto simp: ascii_char_of is_ascii_butlast)

lemma is_ascii_fresh_string:
  "is_ascii Y  is_ascii (fresh_string Xs Y)"
proof (induction Xs Y rule: fresh_string.induct[case_names Up Fresh])
  case (Up Y Xs)
  show ?case
    using Up.IH[OF is_ascii_upChar[OF is_ascii Y]] Up.hyps
    by auto
qed auto

text ‹For string literals we can properly instantiate the class.›

instantiation String.literal :: fresh
begin

context
  includes literal.lifting
begin

lift_definition fresh_literal :: "String.literal set  String.literal  String.literal"
  is fresh_string
  using is_ascii_fresh_string
  by blast

instance by (standard; transfer) (use fresh_string_set in auto)

end

end

text ‹Code generation:›

context
  includes literal.lifting
begin

lift_definition upChar_literal :: "String.literal  String.literal" is upChar
  using is_ascii_upChar
  by blast

lemma upChar_literal_upChar[code]:
  "upChar_literal s = String.implode (upChar (String.explode s))"
  by transfer (auto simp: map_ascii_of_idem is_ascii_butlast ascii_of_char_of_idem)

lemma fresh_literal_if:
  "fresh xs y = (if y  xs  finite xs then fresh (xs - {y}) (upChar_literal y) else y)"
  by transfer (intro fresh_string_if)

lemmas fresh_literal_list[code] = fresh_literal_if[where xs = "set xs" for xs, simplified]

end

text ‹Some tests: ›

value "[fresh {} (STR ''Abc''),
        fresh {STR ''X'', STR ''Abc''} (STR ''Abd''),
        fresh {STR ''X'', STR ''Y''} (STR  ''Y''),
        fresh {STR ''X'', STR ''Yaa'', STR ''Ya'', STR ''Yaa''} (STR ''Ya''),
        fresh {STR ''X'', STR ''Yaa'', STR ''Yz'', STR ''Yza''} (STR ''Yz''),
        fresh {STR ''X'', STR ''Y'', STR ''Yab'', STR ''Y''} (STR ''Y'')]"

end