NMI implemented as geval metric

This commit is contained in:
Filip Gralinski 2017-03-26 08:01:19 +02:00 committed by Filip Gralinski
parent 6f428d6496
commit 37c31e6075
6 changed files with 48 additions and 2 deletions

View File

@ -41,10 +41,13 @@ import GEval.BLEU
import GEval.Common import GEval.Common
import GEval.ClippEU import GEval.ClippEU
import GEval.PrecisionRecall import GEval.PrecisionRecall
import GEval.ClusteringMetrics
import qualified Data.HashMap.Strict as M
type MetricValue = Double type MetricValue = Double
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI
deriving (Eq) deriving (Eq)
instance Show Metric where instance Show Metric where
@ -54,6 +57,7 @@ instance Show Metric where
show Accuracy = "Accuracy" show Accuracy = "Accuracy"
show ClippEU = "ClippEU" show ClippEU = "ClippEU"
show (FMeasure beta) = "F" ++ (show beta) show (FMeasure beta) = "F" ++ (show beta)
show NMI = "NMI"
instance Read Metric where instance Read Metric where
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)] readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
@ -61,6 +65,7 @@ instance Read Metric where
readsPrec _ ('B':'L':'E':'U':theRest) = [(BLEU, theRest)] readsPrec _ ('B':'L':'E':'U':theRest) = [(BLEU, theRest)]
readsPrec _ ('A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(Accuracy, theRest)] readsPrec _ ('A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(Accuracy, theRest)]
readsPrec _ ('C':'l':'i':'p':'p':'E':'U':theRest) = [(ClippEU, theRest)] readsPrec _ ('C':'l':'i':'p':'p':'E':'U':theRest) = [(ClippEU, theRest)]
readsPrec _ ('N':'M':'I':theRest) = [(NMI, theRest)]
readsPrec p ('F':theRest) = case readsPrec p theRest of readsPrec p ('F':theRest) = case readsPrec p theRest of
[(beta, theRest)] -> [(FMeasure beta, theRest)] [(beta, theRest)] -> [(FMeasure beta, theRest)]
_ -> [] _ -> []
@ -75,6 +80,7 @@ getMetricOrdering BLEU = TheHigherTheBetter
getMetricOrdering Accuracy = TheHigherTheBetter getMetricOrdering Accuracy = TheHigherTheBetter
getMetricOrdering ClippEU = TheHigherTheBetter getMetricOrdering ClippEU = TheHigherTheBetter
getMetricOrdering (FMeasure _) = TheHigherTheBetter getMetricOrdering (FMeasure _) = TheHigherTheBetter
getMetricOrdering NMI = TheHigherTheBetter
defaultOutDirectory = "." defaultOutDirectory = "."
defaultTestName = "test-A" defaultTestName = "test-A"
@ -221,6 +227,8 @@ gevalCore' ClippEU = gevalCore'' parseClippingSpecs parseClippings matchStep cli
clippeuAgg = CC.foldl countFolder (0, 0, 0) clippeuAgg = CC.foldl countFolder (0, 0, 0)
finalStep counts = f2MeasureOnCounts counts finalStep counts = f2MeasureOnCounts counts
gevalCore' NMI = gevalCore'' id id id (CC.foldl updateConfusionMatrix M.empty) normalizedMutualInformationFromConfusionMatrix
data SourceItem a = Got a | Done data SourceItem a = Got a | Done
gevalCore'' :: (Text -> a) -> (Text -> b) -> ((a, b) -> c) -> (Sink c (ResourceT IO) d) -> (d -> Double) -> String -> String -> IO (MetricValue) gevalCore'' :: (Text -> a) -> (Text -> b) -> ((a, b) -> c) -> (Sink c (ResourceT IO) d) -> (d -> Double) -> String -> String -> IO (MetricValue)

View File

@ -74,7 +74,7 @@ metricReader = option auto
<> value defaultMetric <> value defaultMetric
<> showDefault <> showDefault
<> metavar "METRIC" <> metavar "METRIC"
<> help "Metric to be used - RMSE, MSE, Accuracy, F-measure (specify as F1, F2, F0.25, etc.), BLEU or ClippEU" ) <> help "Metric to be used - RMSE, MSE, Accuracy, F-measure (specify as F1, F2, F0.25, etc.), BLEU, NMI or ClippEU" )
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue)) runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
runGEval args = do runGEval args = do

View File

@ -81,6 +81,9 @@ main = hspec $ do
normalizedMutualInformation stupidClusteringOneBigCluster `shouldBeAlmost` 0.0 normalizedMutualInformation stupidClusteringOneBigCluster `shouldBeAlmost` 0.0
it "stupid clustering with many small clusters" $ do it "stupid clustering with many small clusters" $ do
normalizedMutualInformation stupidClusteringManySmallClusters `shouldBeAlmost` 0.61799 normalizedMutualInformation stupidClusteringManySmallClusters `shouldBeAlmost` 0.61799
describe "NMI challenge" $ do
it "complex test" $ do
runGEvalTest "nmi-complex" `shouldReturnAlmost` 0.36456
describe "reading options" $ do describe "reading options" $ do
it "can get the metric" $ do it "can get the metric" $ do
extractMetric "bleu-complex" `shouldReturn` (Just BLEU) extractMetric "bleu-complex" `shouldReturn` (Just BLEU)

View File

@ -0,0 +1,17 @@
3
1
1
1
1
1
1
2
2
2
2
2
2
3
3
3
3
1 3
2 1
3 1
4 1
5 1
6 1
7 1
8 2
9 2
10 2
11 2
12 2
13 2
14 3
15 3
16 3
17 3

View File

@ -0,0 +1 @@
--metric NMI

View File

@ -0,0 +1,17 @@
x
x
x
o
x
x
x
x
o
o
o
o
d
x
d
d
d
1 x
2 x
3 x
4 o
5 x
6 x
7 x
8 x
9 o
10 o
11 o
12 o
13 d
14 x
15 d
16 d
17 d