# Theory GroupF

```section‹Group by Function›
theory GroupF
imports Main
begin

text‹Grouping elements of a list according to a function.›

fun groupF ::  "('a ⇒ 'b) ⇒ 'a list ⇒ 'a list list"  where
"groupF f [] = []" |
"groupF f (x#xs) = (x#(filter (λy. f x = f y) xs))#(groupF f (filter (λy. f x ≠ f y) xs))"

text‹trying a more efficient implementation of @{term groupF}›
context
begin
private fun select_p_tuple :: "('a ⇒ bool) ⇒ 'a ⇒ ('a list × 'a list) ⇒ ('a list × 'a list)"
where
"select_p_tuple p x (ts,fs) = (if p x then (x#ts, fs) else (ts, x#fs))"

private definition partition_tailrec :: "('a ⇒ bool) ⇒ 'a list ⇒ ('a list × 'a list)"
where
"partition_tailrec p xs = foldr (select_p_tuple p) xs ([],[])"

private lemma partition_tailrec: "partition_tailrec f as =  (filter f as,  filter (λx. ¬f x) as)"
proof -
{fix ts_accu fs_accu
have "foldr (select_p_tuple f) as (ts_accu, fs_accu) =
(filter f as @ ts_accu,  filter (λx. ¬f x) as @ fs_accu)"
by(induction as arbitrary: ts_accu fs_accu) simp_all
} thus ?thesis unfolding partition_tailrec_def by simp
qed

private lemma
"groupF f (x#xs) = (let (ts, fs) = partition_tailrec (λy. f x = f y) xs in (x#ts)#(groupF f fs))"

(*is this more efficient?*)
private function groupF_code ::  "('a ⇒ 'b) ⇒ 'a list ⇒ 'a list list"  where
"groupF_code f [] = []" |
"groupF_code f (x#xs) = (let
(ts, fs) = partition_tailrec (λy. f x = f y) xs
in
(x#ts)#(groupF_code f fs))"
by(pat_completeness) auto

private termination groupF_code
apply(relation "measure (λ(f,as). length (filter (λx. (λy. f x = f y) x) as))")
apply(simp; fail)
using le_imp_less_Suc length_filter_le by blast

lemma groupF_code[code]: "groupF f as = groupF_code f as"
by(induction f as rule: groupF_code.induct) (simp_all add: partition_tailrec)

export_code groupF checking SML
end

lemma groupF_concat_set: "set (concat (groupF f xs)) = set xs"
proof(induction f xs rule: groupF.induct)
case 2 thus ?case by (simp) blast
qed(simp)

lemma groupF_Union_set: "(⋃x ∈ set (groupF f xs). set x) = set xs"
proof(induction f xs rule: groupF.induct)
case 2 thus ?case by (simp) blast
qed(simp)

lemma groupF_set: "∀X ∈ set (groupF f xs). ∀x ∈ set X. x ∈ set xs"
using groupF_concat_set by fastforce

lemma groupF_equality:
defines "same f A ≡ ∀a1 ∈ set A. ∀a2 ∈ set A. f a1 = f a2"
shows "∀A ∈ set (groupF f xs). same f A"
proof(induction f xs rule: groupF.induct)
case 1 thus ?case by simp
next
case (2 f x xs)
have groupF_fst:
"groupF f (x # xs) = (x # [y←xs . f x = f y]) # groupF f [y←xs . f x ≠ f y]" by force
have step: " ∀A∈set [x # [y←xs . f x = f y]]. same f A" unfolding same_def by fastforce
with 2 show ?case unfolding groupF_fst by fastforce
qed

lemma groupF_nequality: "A ∈ set (groupF f xs) ⟹ B ∈ set (groupF f xs) ⟹ A ≠ B ⟹
∀a ∈ set A. ∀b ∈ set B. f a ≠ f b"
proof(induction f xs rule: groupF.induct)
case 1 thus ?case by simp
next
case 2 thus ?case
apply -
apply(subst (asm) groupF.simps)+
using groupF_set by fastforce (*1s*)
qed

lemma groupF_cong: fixes xs::"'a list" and f1::"'a ⇒ 'b" and f2::"'a ⇒ 'c"
assumes "∀x ∈ set xs. ∀y ∈ set xs. (f1 x = f1 y ⟷ f2 x = f2 y)"
shows "groupF f1 xs = groupF f2 xs"
using assms proof(induction f1 xs rule: groupF.induct)
case (2 f x xs) thus ?case using filter_cong[of xs xs "λy. f x = f y" "λy. f2 x = f2 y"]
filter_cong[of xs xs "λy. f x ≠ f y" "λy. f2 x ≠ f2 y"] by auto
qed (simp)

lemma groupF_empty: "groupF f xs ≠ [] ⟷ xs ≠ []"
by(induction f xs rule: groupF.induct) auto
lemma groupF_empty_elem: "x ∈ set (groupF f xs) ⟹ x ≠ []"
by(induction f xs rule: groupF.induct) auto

lemma groupF_distinct: "distinct xs ⟹ distinct (concat (groupF f xs))"
by (induction f xs rule: groupF.induct) (auto simp add: groupF_Union_set)

text‹It is possible to use
@{term "map (map fst) (groupF snd (map (λx. (x, f x)) P))"}
@{term "groupF f P"}
for the following reasons:
@{const groupF} executes its compare function (first parameter) very often;
it always tests for @{term "(f x = f y)"}.
The function @{term f} may be really expensive.
At least polyML does not share the result of @{term f} but (probably) always recomputes (part of) it.
The optimization pre-computes @{term f} and tells @{const groupF} to use
a really cheap function (@{const snd}) to compare.
The following lemma tells that those are equal.›
(* is this also faster for Haskell?*)

lemma groupF_tuple: "groupF f xs = map (map fst) (groupF snd (map (λx. (x, f x)) xs))"
proof(induction f xs rule: groupF.induct)
case (1 f) thus ?case by simp
next
case (2 f x xs)
have g1: "[y←xs . f x = f y] = map fst [y←map (λx. (x, f x)) xs . f x = snd y]"
proof(induction xs arbitrary: f x)
case Cons thus ?case by fastforce
qed(simp)
have g2: "(map (λx. (x, f x)) [y←xs . f x ≠ f y]) = [y←map (λx. (x, f x)) xs . f x ≠ snd y]"
proof(induction xs)
case Cons thus ?case by fastforce
qed(simp)
from 2 g1 g2 show ?case by simp
qed
end
```