From 595b2c96507b8a4c31b3597c09094eebccd1d5c0 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Fri, 24 Mar 2017 18:26:12 +0100 Subject: [PATCH] fix purity --- geval.cabal | 2 +- src/GEval/{Purity.hs => ClusteringMetrics.hs} | 10 +++++----- test/Spec.hs | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) rename src/GEval/{Purity.hs => ClusteringMetrics.hs} (79%) diff --git a/geval.cabal b/geval.cabal index c7b6408..5ab3709 100644 --- a/geval.cabal +++ b/geval.cabal @@ -21,7 +21,7 @@ library , GEval.BLEU , GEval.ClippEU , GEval.PrecisionRecall - , GEval.Purity + , GEval.ClusteringMetrics , GEval.Common build-depends: base >= 4.7 && < 5 , cond diff --git a/src/GEval/Purity.hs b/src/GEval/ClusteringMetrics.hs similarity index 79% rename from src/GEval/Purity.hs rename to src/GEval/ClusteringMetrics.hs index d12fa35..9dd2fa0 100644 --- a/src/GEval/Purity.hs +++ b/src/GEval/ClusteringMetrics.hs @@ -1,4 +1,4 @@ -module GEval.Purity +module GEval.ClusteringMetrics (purity, purityFromConfusionMap, updateConfusionMap) where @@ -12,17 +12,17 @@ purity :: (Hashable a, Eq a, Hashable b, Eq b) => [(a, b)] -> Double purity pL = purityFromConfusionMap cM where cM = confusionMap pL -purityFromConfusionMap :: (Hashable a, Eq a, Hashable b, Eq b) => M.HashMap a (M.HashMap b Int) -> Double +purityFromConfusionMap :: (Hashable a, Eq a, Hashable b, Eq b) => M.HashMap b (M.HashMap a Int) -> Double purityFromConfusionMap cM = numberOfMajorityItems /. numberOfItems where numberOfItems = sum $ map fst classCounts numberOfMajorityItems = sum $ map snd classCounts classCounts = map getClassCount $ M.toList cM getClassCount (_, sh) = foldl' (\(s, m) (_, c) -> (s + c, max m c)) (0, 0) $ M.toList sh -confusionMap :: (Hashable a, Eq a, Hashable b, Eq b) => [(a, b)] -> M.HashMap a (M.HashMap b Int) +confusionMap :: (Hashable a, Eq a, Hashable b, Eq b) => [(a, b)] -> M.HashMap b (M.HashMap a Int) confusionMap = foldl' updateConfusionMap M.empty -updateConfusionMap :: (Hashable a, Eq a, Hashable b, Eq b) => M.HashMap a (M.HashMap b Int) -> (a, b) -> M.HashMap a (M.HashMap b Int) -updateConfusionMap h (e, g) = M.insertWith updateSubHash e (unitHash g) h +updateConfusionMap :: (Hashable a, Eq a, Hashable b, Eq b) => M.HashMap b (M.HashMap a Int) -> (a, b) -> M.HashMap b (M.HashMap a Int) +updateConfusionMap h (e, g) = M.insertWith updateSubHash g (unitHash e) h where unitHash k = M.singleton k 1 updateSubHash uh sh = M.unionWith (+) uh sh diff --git a/test/Spec.hs b/test/Spec.hs index 664b6ab..5ac7abd 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -7,7 +7,7 @@ import GEval.OptionsParser import GEval.BLEU import GEval.ClippEU import GEval.PrecisionRecall -import GEval.Purity +import GEval.ClusteringMetrics import Data.Attoparsec.Text import Options.Applicative import Data.Text @@ -53,10 +53,10 @@ main = hspec $ do precisionCount [["foo", "baz"], ["bar"], ["baz", "xyz"]] ["foo", "bar", "foo"] `shouldBe` 2 describe "purity (in flat clustering)" $ do it "the example from Information Retrieval Book" $ do - purity [(2, "o") :: (Int, String), (2, "o"), (2, "d"), (3, "x"), (3, "d"), - (1, "x"), (1, "o"), (1, "x"), (1, "x"), (1, "x"), (1, "x"), - (2, "x"), (2, "o"), (2, "o"), - (3, "x"), (3, "d"), (3, "d")] `shouldBeAlmost` 0.70588 + purity [("o", 2) :: (String, Int), ("o", 2), ("d", 2), ("x", 3), ("d", 3), + ("x", 1), ("o", 1), ("x", 1), ( "x", 1), ("x", 1), ("x", 1), + ("x", 2), ("o", 2), ("o", 2), + ("x", 3), ("d", 3), ("d", 3)] `shouldBeAlmost` 0.70588 describe "reading options" $ do it "can get the metric" $ do