add NMI
This commit is contained in:
parent
595b2c9650
commit
6f428d6496
@ -1,5 +1,8 @@
|
|||||||
module GEval.ClusteringMetrics
|
module GEval.ClusteringMetrics
|
||||||
(purity, purityFromConfusionMap, updateConfusionMap)
|
(purity, purityFromConfusionMap, updateConfusionMap,
|
||||||
|
normalizedMutualInformation,
|
||||||
|
normalizedMutualInformationFromConfusionMatrix,
|
||||||
|
updateConfusionMatrix)
|
||||||
where
|
where
|
||||||
|
|
||||||
import GEval.Common
|
import GEval.Common
|
||||||
@ -26,3 +29,31 @@ updateConfusionMap :: (Hashable a, Eq a, Hashable b, Eq b) => M.HashMap b (M.Has
|
|||||||
updateConfusionMap h (e, g) = M.insertWith updateSubHash g (unitHash e) h
|
updateConfusionMap h (e, g) = M.insertWith updateSubHash g (unitHash e) h
|
||||||
where unitHash k = M.singleton k 1
|
where unitHash k = M.singleton k 1
|
||||||
updateSubHash uh sh = M.unionWith (+) uh sh
|
updateSubHash uh sh = M.unionWith (+) uh sh
|
||||||
|
|
||||||
|
|
||||||
|
normalizedMutualInformation :: (Hashable a, Eq a, Hashable b, Eq b) => [(a, b)] -> Double
|
||||||
|
normalizedMutualInformation pL = normalizedMutualInformationFromConfusionMatrix cM
|
||||||
|
where cM = confusionMatrix pL
|
||||||
|
|
||||||
|
|
||||||
|
normalizedMutualInformationFromConfusionMatrix :: (Hashable a, Eq a, Hashable b, Eq b) => M.HashMap (a, b) Int -> Double
|
||||||
|
normalizedMutualInformationFromConfusionMatrix cM = 2.0 * mutualInformation / (classEntropy + clusterEntropy)
|
||||||
|
where mutualInformation = sum $ map pairMutualInformation $ M.toList cM
|
||||||
|
pairMutualInformation ((klass, cluster), count) =
|
||||||
|
(count /. total) * (log2 ((total /. (classDistribution M.! klass)) * (count /. (clusterDistribution M.! cluster))))
|
||||||
|
total = sum $ map snd $ M.toList cM
|
||||||
|
|
||||||
|
classEntropy = entropyWithTotalGiven total $ map snd $ M.toList classDistribution
|
||||||
|
clusterEntropy = entropyWithTotalGiven total $ map snd $ M.toList clusterDistribution
|
||||||
|
|
||||||
|
classDistribution = getDistribution fst cM
|
||||||
|
clusterDistribution = getDistribution snd cM
|
||||||
|
|
||||||
|
getDistribution fun cM = M.foldlWithKey' (\m kv count -> M.insertWith (+) (fun kv) count m) M.empty cM
|
||||||
|
|
||||||
|
|
||||||
|
confusionMatrix :: (Hashable a, Eq a, Hashable b, Eq b) => [(a, b)] -> M.HashMap (a, b) Int
|
||||||
|
confusionMatrix = foldl' updateConfusionMatrix M.empty
|
||||||
|
|
||||||
|
updateConfusionMatrix :: (Hashable a, Eq a, Hashable b, Eq b) => M.HashMap (a, b) Int -> (a, b) -> M.HashMap (a, b) Int
|
||||||
|
updateConfusionMatrix m p = M.insertWith (+) p 1 m
|
||||||
|
@ -8,3 +8,12 @@ x /. y = (fromIntegral x) / (fromIntegral y)
|
|||||||
safeDoubleDiv :: Double -> Double -> Double
|
safeDoubleDiv :: Double -> Double -> Double
|
||||||
safeDoubleDiv _ 0.0 = 0.0
|
safeDoubleDiv _ 0.0 = 0.0
|
||||||
safeDoubleDiv x y = x / y
|
safeDoubleDiv x y = x / y
|
||||||
|
|
||||||
|
log2 :: Double -> Double
|
||||||
|
log2 x = (log x) / (log 2.0)
|
||||||
|
|
||||||
|
entropyWithTotalGiven total distribution = - (sum $ map (entropyCount total) distribution)
|
||||||
|
|
||||||
|
entropyCount :: Int -> Int -> Double
|
||||||
|
entropyCount total count = prob * (log2 prob)
|
||||||
|
where prob = count /. total
|
||||||
|
33
test/Spec.hs
33
test/Spec.hs
@ -13,6 +13,24 @@ import Options.Applicative
|
|||||||
import Data.Text
|
import Data.Text
|
||||||
import qualified Test.HUnit as HU
|
import qualified Test.HUnit as HU
|
||||||
|
|
||||||
|
informationRetrievalBookExample :: [(String, Int)]
|
||||||
|
informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3),
|
||||||
|
("x", 1), ("o", 1), ("x", 1), ( "x", 1), ("x", 1), ("x", 1),
|
||||||
|
("x", 2), ("o", 2), ("o", 2),
|
||||||
|
("x", 3), ("d", 3), ("d", 3)]
|
||||||
|
|
||||||
|
perfectClustering :: [(Int, Char)]
|
||||||
|
perfectClustering = [(0, 'a'), (2, 'b'), (3, 'c'), (2, 'b'), (2, 'b'), (1, 'd'), (0, 'a')]
|
||||||
|
|
||||||
|
stupidClusteringOneBigCluster :: [(Int, Int)]
|
||||||
|
stupidClusteringOneBigCluster = [(0, 2), (2, 2), (1, 2), (2, 2), (0, 2), (0, 2), (0, 2), (0, 2), (1, 2), (1, 2)]
|
||||||
|
|
||||||
|
stupidClusteringManySmallClusters :: [(Int, Int)]
|
||||||
|
stupidClusteringManySmallClusters = [(0, 0), (2, 1), (1, 2), (2, 3), (0, 4), (0, 5), (0, 6), (0, 7), (1, 8), (1, 9)]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main = hspec $ do
|
main = hspec $ do
|
||||||
describe "root mean square error" $ do
|
describe "root mean square error" $ do
|
||||||
@ -53,11 +71,16 @@ main = hspec $ do
|
|||||||
precisionCount [["foo", "baz"], ["bar"], ["baz", "xyz"]] ["foo", "bar", "foo"] `shouldBe` 2
|
precisionCount [["foo", "baz"], ["bar"], ["baz", "xyz"]] ["foo", "bar", "foo"] `shouldBe` 2
|
||||||
describe "purity (in flat clustering)" $ do
|
describe "purity (in flat clustering)" $ do
|
||||||
it "the example from Information Retrieval Book" $ do
|
it "the example from Information Retrieval Book" $ do
|
||||||
purity [("o", 2) :: (String, Int), ("o", 2), ("d", 2), ("x", 3), ("d", 3),
|
purity informationRetrievalBookExample `shouldBeAlmost` 0.70588
|
||||||
("x", 1), ("o", 1), ("x", 1), ( "x", 1), ("x", 1), ("x", 1),
|
describe "NMI (in flat clustering)" $ do
|
||||||
("x", 2), ("o", 2), ("o", 2),
|
it "the example from Information Retrieval Book" $ do
|
||||||
("x", 3), ("d", 3), ("d", 3)] `shouldBeAlmost` 0.70588
|
normalizedMutualInformation informationRetrievalBookExample `shouldBeAlmost` 0.36456
|
||||||
|
it "perfect clustering" $ do
|
||||||
|
normalizedMutualInformation perfectClustering `shouldBeAlmost` 1.0
|
||||||
|
it "stupid clustering with one big cluster" $ do
|
||||||
|
normalizedMutualInformation stupidClusteringOneBigCluster `shouldBeAlmost` 0.0
|
||||||
|
it "stupid clustering with many small clusters" $ do
|
||||||
|
normalizedMutualInformation stupidClusteringManySmallClusters `shouldBeAlmost` 0.61799
|
||||||
describe "reading options" $ do
|
describe "reading options" $ do
|
||||||
it "can get the metric" $ do
|
it "can get the metric" $ do
|
||||||
extractMetric "bleu-complex" `shouldReturn` (Just BLEU)
|
extractMetric "bleu-complex" `shouldReturn` (Just BLEU)
|
||||||
|
Loading…
Reference in New Issue
Block a user