add auxiliary function for calculating purity
This commit is contained in:
parent
422d7c63d9
commit
065a3ce9cd
@ -1,5 +1,5 @@
|
|||||||
name: geval
|
name: geval
|
||||||
version: 0.2.6.0
|
version: 0.3.0.0
|
||||||
synopsis: Machine learning evaluation tools
|
synopsis: Machine learning evaluation tools
|
||||||
description: Please see README.md
|
description: Please see README.md
|
||||||
homepage: http://github.com/name/project
|
homepage: http://github.com/name/project
|
||||||
@ -21,6 +21,7 @@ library
|
|||||||
, GEval.BLEU
|
, GEval.BLEU
|
||||||
, GEval.ClippEU
|
, GEval.ClippEU
|
||||||
, GEval.PrecisionRecall
|
, GEval.PrecisionRecall
|
||||||
|
, GEval.Purity
|
||||||
, GEval.Common
|
, GEval.Common
|
||||||
build-depends: base >= 4.7 && < 5
|
build-depends: base >= 4.7 && < 5
|
||||||
, cond
|
, cond
|
||||||
@ -38,6 +39,8 @@ library
|
|||||||
, unix
|
, unix
|
||||||
, fgl
|
, fgl
|
||||||
, attoparsec
|
, attoparsec
|
||||||
|
, unordered-containers
|
||||||
|
, hashable
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
executable geval
|
executable geval
|
||||||
|
28
src/GEval/Purity.hs
Normal file
28
src/GEval/Purity.hs
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
module GEval.Purity
|
||||||
|
(purity, purityFromConfusionMap, updateConfusionMap)
|
||||||
|
where
|
||||||
|
|
||||||
|
import GEval.Common
|
||||||
|
|
||||||
|
import qualified Data.HashMap.Strict as M
|
||||||
|
import Data.Hashable
|
||||||
|
import Data.List
|
||||||
|
|
||||||
|
purity :: (Hashable a, Eq a, Hashable b, Eq b) => [(a, b)] -> Double
|
||||||
|
purity pL = purityFromConfusionMap cM
|
||||||
|
where cM = confusionMap pL
|
||||||
|
|
||||||
|
purityFromConfusionMap :: (Hashable a, Eq a, Hashable b, Eq b) => M.HashMap a (M.HashMap b Int) -> Double
|
||||||
|
purityFromConfusionMap cM = numberOfMajorityItems /. numberOfItems
|
||||||
|
where numberOfItems = sum $ map fst classCounts
|
||||||
|
numberOfMajorityItems = sum $ map snd classCounts
|
||||||
|
classCounts = map getClassCount $ M.toList cM
|
||||||
|
getClassCount (_, sh) = foldl' (\(s, m) (_, c) -> (s + c, max m c)) (0, 0) $ M.toList sh
|
||||||
|
|
||||||
|
confusionMap :: (Hashable a, Eq a, Hashable b, Eq b) => [(a, b)] -> M.HashMap a (M.HashMap b Int)
|
||||||
|
confusionMap = foldl' updateConfusionMap M.empty
|
||||||
|
|
||||||
|
updateConfusionMap :: (Hashable a, Eq a, Hashable b, Eq b) => M.HashMap a (M.HashMap b Int) -> (a, b) -> M.HashMap a (M.HashMap b Int)
|
||||||
|
updateConfusionMap h (e, g) = M.insertWith updateSubHash e (unitHash g) h
|
||||||
|
where unitHash k = M.singleton k 1
|
||||||
|
updateSubHash uh sh = M.unionWith (+) uh sh
|
@ -7,6 +7,7 @@ import GEval.OptionsParser
|
|||||||
import GEval.BLEU
|
import GEval.BLEU
|
||||||
import GEval.ClippEU
|
import GEval.ClippEU
|
||||||
import GEval.PrecisionRecall
|
import GEval.PrecisionRecall
|
||||||
|
import GEval.Purity
|
||||||
import Data.Attoparsec.Text
|
import Data.Attoparsec.Text
|
||||||
import Options.Applicative
|
import Options.Applicative
|
||||||
import Data.Text
|
import Data.Text
|
||||||
@ -50,6 +51,13 @@ main = hspec $ do
|
|||||||
precisionCount [["bar", "bar", "bar", "bar", "foo", "xyz", "foo"]] ["foo", "bar", "foo", "baz", "bar", "foo"] `shouldBe` 4
|
precisionCount [["bar", "bar", "bar", "bar", "foo", "xyz", "foo"]] ["foo", "bar", "foo", "baz", "bar", "foo"] `shouldBe` 4
|
||||||
it "multiple refs" $ do
|
it "multiple refs" $ do
|
||||||
precisionCount [["foo", "baz"], ["bar"], ["baz", "xyz"]] ["foo", "bar", "foo"] `shouldBe` 2
|
precisionCount [["foo", "baz"], ["bar"], ["baz", "xyz"]] ["foo", "bar", "foo"] `shouldBe` 2
|
||||||
|
describe "purity (in flat clustering)" $ do
|
||||||
|
it "the example from Information Retrieval Book" $ do
|
||||||
|
purity [(2, "o") :: (Int, String), (2, "o"), (2, "d"), (3, "x"), (3, "d"),
|
||||||
|
(1, "x"), (1, "o"), (1, "x"), (1, "x"), (1, "x"), (1, "x"),
|
||||||
|
(2, "x"), (2, "o"), (2, "o"),
|
||||||
|
(3, "x"), (3, "d"), (3, "d")] `shouldBeAlmost` 0.70588
|
||||||
|
|
||||||
describe "reading options" $ do
|
describe "reading options" $ do
|
||||||
it "can get the metric" $ do
|
it "can get the metric" $ do
|
||||||
extractMetric "bleu-complex" `shouldReturn` (Just BLEU)
|
extractMetric "bleu-complex" `shouldReturn` (Just BLEU)
|
||||||
|
Loading…
Reference in New Issue
Block a user