fix purity

This commit is contained in:
Filip Gralinski 2017-03-24 18:26:12 +01:00 committed by Filip Gralinski
parent 065a3ce9cd
commit 595b2c9650
3 changed files with 11 additions and 11 deletions

View File

@ -21,7 +21,7 @@ library
, GEval.BLEU
, GEval.ClippEU
, GEval.PrecisionRecall
, GEval.Purity
, GEval.ClusteringMetrics
, GEval.Common
build-depends: base >= 4.7 && < 5
, cond

View File

@ -1,4 +1,4 @@
module GEval.Purity
module GEval.ClusteringMetrics
(purity, purityFromConfusionMap, updateConfusionMap)
where
@ -12,17 +12,17 @@ purity :: (Hashable a, Eq a, Hashable b, Eq b) => [(a, b)] -> Double
purity pL = purityFromConfusionMap cM
where cM = confusionMap pL
purityFromConfusionMap :: (Hashable a, Eq a, Hashable b, Eq b) => M.HashMap a (M.HashMap b Int) -> Double
purityFromConfusionMap :: (Hashable a, Eq a, Hashable b, Eq b) => M.HashMap b (M.HashMap a Int) -> Double
purityFromConfusionMap cM = numberOfMajorityItems /. numberOfItems
where numberOfItems = sum $ map fst classCounts
numberOfMajorityItems = sum $ map snd classCounts
classCounts = map getClassCount $ M.toList cM
getClassCount (_, sh) = foldl' (\(s, m) (_, c) -> (s + c, max m c)) (0, 0) $ M.toList sh
confusionMap :: (Hashable a, Eq a, Hashable b, Eq b) => [(a, b)] -> M.HashMap a (M.HashMap b Int)
confusionMap :: (Hashable a, Eq a, Hashable b, Eq b) => [(a, b)] -> M.HashMap b (M.HashMap a Int)
confusionMap = foldl' updateConfusionMap M.empty
updateConfusionMap :: (Hashable a, Eq a, Hashable b, Eq b) => M.HashMap a (M.HashMap b Int) -> (a, b) -> M.HashMap a (M.HashMap b Int)
updateConfusionMap h (e, g) = M.insertWith updateSubHash e (unitHash g) h
updateConfusionMap :: (Hashable a, Eq a, Hashable b, Eq b) => M.HashMap b (M.HashMap a Int) -> (a, b) -> M.HashMap b (M.HashMap a Int)
updateConfusionMap h (e, g) = M.insertWith updateSubHash g (unitHash e) h
where unitHash k = M.singleton k 1
updateSubHash uh sh = M.unionWith (+) uh sh

View File

@ -7,7 +7,7 @@ import GEval.OptionsParser
import GEval.BLEU
import GEval.ClippEU
import GEval.PrecisionRecall
import GEval.Purity
import GEval.ClusteringMetrics
import Data.Attoparsec.Text
import Options.Applicative
import Data.Text
@ -53,10 +53,10 @@ main = hspec $ do
precisionCount [["foo", "baz"], ["bar"], ["baz", "xyz"]] ["foo", "bar", "foo"] `shouldBe` 2
describe "purity (in flat clustering)" $ do
it "the example from Information Retrieval Book" $ do
purity [(2, "o") :: (Int, String), (2, "o"), (2, "d"), (3, "x"), (3, "d"),
(1, "x"), (1, "o"), (1, "x"), (1, "x"), (1, "x"), (1, "x"),
(2, "x"), (2, "o"), (2, "o"),
(3, "x"), (3, "d"), (3, "d")] `shouldBeAlmost` 0.70588
purity [("o", 2) :: (String, Int), ("o", 2), ("d", 2), ("x", 3), ("d", 3),
("x", 1), ("o", 1), ("x", 1), ( "x", 1), ("x", 1), ("x", 1),
("x", 2), ("o", 2), ("o", 2),
("x", 3), ("d", 3), ("d", 3)] `shouldBeAlmost` 0.70588
describe "reading options" $ do
it "can get the metric" $ do