geval/src/GEval/ClusteringMetrics.hs

29 lines
1.2 KiB
Haskell
Raw Normal View History

2017-03-24 18:26:12 +01:00
module GEval.ClusteringMetrics
(purity, purityFromConfusionMap, updateConfusionMap)
where
import GEval.Common
import qualified Data.HashMap.Strict as M
import Data.Hashable
import Data.List
purity :: (Hashable a, Eq a, Hashable b, Eq b) => [(a, b)] -> Double
purity pL = purityFromConfusionMap cM
where cM = confusionMap pL
2017-03-24 18:26:12 +01:00
purityFromConfusionMap :: (Hashable a, Eq a, Hashable b, Eq b) => M.HashMap b (M.HashMap a Int) -> Double
purityFromConfusionMap cM = numberOfMajorityItems /. numberOfItems
where numberOfItems = sum $ map fst classCounts
numberOfMajorityItems = sum $ map snd classCounts
classCounts = map getClassCount $ M.toList cM
getClassCount (_, sh) = foldl' (\(s, m) (_, c) -> (s + c, max m c)) (0, 0) $ M.toList sh
2017-03-24 18:26:12 +01:00
confusionMap :: (Hashable a, Eq a, Hashable b, Eq b) => [(a, b)] -> M.HashMap b (M.HashMap a Int)
confusionMap = foldl' updateConfusionMap M.empty
2017-03-24 18:26:12 +01:00
updateConfusionMap :: (Hashable a, Eq a, Hashable b, Eq b) => M.HashMap b (M.HashMap a Int) -> (a, b) -> M.HashMap b (M.HashMap a Int)
updateConfusionMap h (e, g) = M.insertWith updateSubHash g (unitHash e) h
where unitHash k = M.singleton k 1
updateSubHash uh sh = M.unionWith (+) uh sh