section‹Group by Function› theory GroupF imports Main begin text‹Grouping elements of a list according to a function.› fun groupF :: "('a ⇒ 'b) ⇒ 'a list ⇒ 'a list list" where "groupF f [] = []" | "groupF f (x#xs) = (x#(filter (λy. f x = f y) xs))#(groupF f (filter (λy. f x ≠ f y) xs))" text‹trying a more efficient implementation of @{term groupF}› context begin private fun select_p_tuple :: "('a ⇒ bool) ⇒ 'a ⇒ ('a list × 'a list) ⇒ ('a list × 'a list)" where "select_p_tuple p x (ts,fs) = (if p x then (x#ts, fs) else (ts, x#fs))" private definition partition_tailrec :: "('a ⇒ bool) ⇒ 'a list ⇒ ('a list × 'a list)" where "partition_tailrec p xs = foldr (select_p_tuple p) xs ([],[])" private lemma partition_tailrec: "partition_tailrec f as = (filter f as, filter (λx. ¬f x) as)" proof - {fix ts_accu fs_accu have "foldr (select_p_tuple f) as (ts_accu, fs_accu) = (filter f as @ ts_accu, filter (λx. ¬f x) as @ fs_accu)" by(induction as arbitrary: ts_accu fs_accu) simp_all } thus ?thesis unfolding partition_tailrec_def by simp qed private lemma "groupF f (x#xs) = (let (ts, fs) = partition_tailrec (λy. f x = f y) xs in (x#ts)#(groupF f fs))" by(simp add: partition_tailrec) (*is this more efficient?*) private function groupF_code :: "('a ⇒ 'b) ⇒ 'a list ⇒ 'a list list" where "groupF_code f [] = []" | "groupF_code f (x#xs) = (let (ts, fs) = partition_tailrec (λy. f x = f y) xs in (x#ts)#(groupF_code f fs))" by(pat_completeness) auto private termination groupF_code apply(relation "measure (λ(f,as). length (filter (λx. (λy. f x = f y) x) as))") apply(simp; fail) apply(simp add: partition_tailrec) using le_imp_less_Suc length_filter_le by blast lemma groupF_code[code]: "groupF f as = groupF_code f as" by(induction f as rule: groupF_code.induct) (simp_all add: partition_tailrec) export_code groupF checking SML end lemma groupF_concat_set: "set (concat (groupF f xs)) = set xs" proof(induction f xs rule: groupF.induct) case 2 thus ?case by (simp) blast qed(simp) lemma groupF_Union_set: "(⋃x ∈ set (groupF f xs). set x) = set xs" proof(induction f xs rule: groupF.induct) case 2 thus ?case by (simp) blast qed(simp) lemma groupF_set: "∀X ∈ set (groupF f xs). ∀x ∈ set X. x ∈ set xs" using groupF_concat_set by fastforce lemma groupF_equality: defines "same f A ≡ ∀a1 ∈ set A. ∀a2 ∈ set A. f a1 = f a2" shows "∀A ∈ set (groupF f xs). same f A" proof(induction f xs rule: groupF.induct) case 1 thus ?case by simp next case (2 f x xs) have groupF_fst: "groupF f (x # xs) = (x # [y←xs . f x = f y]) # groupF f [y←xs . f x ≠ f y]" by force have step: " ∀A∈set [x # [y←xs . f x = f y]]. same f A" unfolding same_def by fastforce with 2 show ?case unfolding groupF_fst by fastforce qed lemma groupF_nequality: "A ∈ set (groupF f xs) ⟹ B ∈ set (groupF f xs) ⟹ A ≠ B ⟹ ∀a ∈ set A. ∀b ∈ set B. f a ≠ f b" proof(induction f xs rule: groupF.induct) case 1 thus ?case by simp next case 2 thus ?case apply - apply(subst (asm) groupF.simps)+ using groupF_set by fastforce (*1s*) qed lemma groupF_cong: fixes xs::"'a list" and f1::"'a ⇒ 'b" and f2::"'a ⇒ 'c" assumes "∀x ∈ set xs. ∀y ∈ set xs. (f1 x = f1 y ⟷ f2 x = f2 y)" shows "groupF f1 xs = groupF f2 xs" using assms proof(induction f1 xs rule: groupF.induct) case (2 f x xs) thus ?case using filter_cong[of xs xs "λy. f x = f y" "λy. f2 x = f2 y"] filter_cong[of xs xs "λy. f x ≠ f y" "λy. f2 x ≠ f2 y"] by auto qed (simp) lemma groupF_empty: "groupF f xs ≠ [] ⟷ xs ≠ []" by(induction f xs rule: groupF.induct) auto lemma groupF_empty_elem: "x ∈ set (groupF f xs) ⟹ x ≠ []" by(induction f xs rule: groupF.induct) auto lemma groupF_distinct: "distinct xs ⟹ distinct (concat (groupF f xs))" by (induction f xs rule: groupF.induct) (auto simp add: groupF_Union_set) text‹It is possible to use @{term "map (map fst) (groupF snd (map (λx. (x, f x)) P))"} instead of @{term "groupF f P"} for the following reasons: @{const groupF} executes its compare function (first parameter) very often; it always tests for @{term "(f x = f y)"}. The function @{term f} may be really expensive. At least polyML does not share the result of @{term f} but (probably) always recomputes (part of) it. The optimization pre-computes @{term f} and tells @{const groupF} to use a really cheap function (@{const snd}) to compare. The following lemma tells that those are equal.› (* is this also faster for Haskell?*) lemma groupF_tuple: "groupF f xs = map (map fst) (groupF snd (map (λx. (x, f x)) xs))" proof(induction f xs rule: groupF.induct) case (1 f) thus ?case by simp next case (2 f x xs) have g1: "[y←xs . f x = f y] = map fst [y←map (λx. (x, f x)) xs . f x = snd y]" proof(induction xs arbitrary: f x) case Cons thus ?case by fastforce qed(simp) have g2: "(map (λx. (x, f x)) [y←xs . f x ≠ f y]) = [y←map (λx. (x, f x)) xs . f x ≠ snd y]" proof(induction xs) case Cons thus ?case by fastforce qed(simp) from 2 g1 g2 show ?case by simp qed end