From 37c31e607558316073e7a96f2c3a86e5814c890b Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Sun, 26 Mar 2017 08:01:19 +0200 Subject: [PATCH] NMI implemented as geval metric --- src/GEval/Core.hs | 10 +++++++++- src/GEval/OptionsParser.hs | 2 +- test/Spec.hs | 3 +++ .../nmi-complex-solution/test-A/out.tsv | 17 +++++++++++++++++ test/nmi-complex/nmi-complex/config.txt | 1 + .../nmi-complex/nmi-complex/test-A/expected.tsv | 17 +++++++++++++++++ 6 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 test/nmi-complex/nmi-complex-solution/test-A/out.tsv create mode 100644 test/nmi-complex/nmi-complex/config.txt create mode 100644 test/nmi-complex/nmi-complex/test-A/expected.tsv diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index dd5ca2b..470c9ad 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -41,10 +41,13 @@ import GEval.BLEU import GEval.Common import GEval.ClippEU import GEval.PrecisionRecall +import GEval.ClusteringMetrics + +import qualified Data.HashMap.Strict as M type MetricValue = Double -data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double +data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI deriving (Eq) instance Show Metric where @@ -54,6 +57,7 @@ instance Show Metric where show Accuracy = "Accuracy" show ClippEU = "ClippEU" show (FMeasure beta) = "F" ++ (show beta) + show NMI = "NMI" instance Read Metric where readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)] @@ -61,6 +65,7 @@ instance Read Metric where readsPrec _ ('B':'L':'E':'U':theRest) = [(BLEU, theRest)] readsPrec _ ('A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(Accuracy, theRest)] readsPrec _ ('C':'l':'i':'p':'p':'E':'U':theRest) = [(ClippEU, theRest)] + readsPrec _ ('N':'M':'I':theRest) = [(NMI, theRest)] readsPrec p ('F':theRest) = case readsPrec p theRest of [(beta, theRest)] -> [(FMeasure beta, theRest)] _ -> [] @@ -75,6 +80,7 @@ getMetricOrdering BLEU = TheHigherTheBetter getMetricOrdering Accuracy = TheHigherTheBetter getMetricOrdering ClippEU = TheHigherTheBetter getMetricOrdering (FMeasure _) = TheHigherTheBetter +getMetricOrdering NMI = TheHigherTheBetter defaultOutDirectory = "." defaultTestName = "test-A" @@ -221,6 +227,8 @@ gevalCore' ClippEU = gevalCore'' parseClippingSpecs parseClippings matchStep cli clippeuAgg = CC.foldl countFolder (0, 0, 0) finalStep counts = f2MeasureOnCounts counts +gevalCore' NMI = gevalCore'' id id id (CC.foldl updateConfusionMatrix M.empty) normalizedMutualInformationFromConfusionMatrix + data SourceItem a = Got a | Done gevalCore'' :: (Text -> a) -> (Text -> b) -> ((a, b) -> c) -> (Sink c (ResourceT IO) d) -> (d -> Double) -> String -> String -> IO (MetricValue) diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index afd0b2e..5d0c74f 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -74,7 +74,7 @@ metricReader = option auto <> value defaultMetric <> showDefault <> metavar "METRIC" - <> help "Metric to be used - RMSE, MSE, Accuracy, F-measure (specify as F1, F2, F0.25, etc.), BLEU or ClippEU" ) + <> help "Metric to be used - RMSE, MSE, Accuracy, F-measure (specify as F1, F2, F0.25, etc.), BLEU, NMI or ClippEU" ) runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue)) runGEval args = do diff --git a/test/Spec.hs b/test/Spec.hs index 884a366..8bdcc5a 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -81,6 +81,9 @@ main = hspec $ do normalizedMutualInformation stupidClusteringOneBigCluster `shouldBeAlmost` 0.0 it "stupid clustering with many small clusters" $ do normalizedMutualInformation stupidClusteringManySmallClusters `shouldBeAlmost` 0.61799 + describe "NMI challenge" $ do + it "complex test" $ do + runGEvalTest "nmi-complex" `shouldReturnAlmost` 0.36456 describe "reading options" $ do it "can get the metric" $ do extractMetric "bleu-complex" `shouldReturn` (Just BLEU) diff --git a/test/nmi-complex/nmi-complex-solution/test-A/out.tsv b/test/nmi-complex/nmi-complex-solution/test-A/out.tsv new file mode 100644 index 0000000..2f8d3b6 --- /dev/null +++ b/test/nmi-complex/nmi-complex-solution/test-A/out.tsv @@ -0,0 +1,17 @@ +3 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +3 +3 +3 +3 diff --git a/test/nmi-complex/nmi-complex/config.txt b/test/nmi-complex/nmi-complex/config.txt new file mode 100644 index 0000000..23793ee --- /dev/null +++ b/test/nmi-complex/nmi-complex/config.txt @@ -0,0 +1 @@ +--metric NMI diff --git a/test/nmi-complex/nmi-complex/test-A/expected.tsv b/test/nmi-complex/nmi-complex/test-A/expected.tsv new file mode 100644 index 0000000..cf98d57 --- /dev/null +++ b/test/nmi-complex/nmi-complex/test-A/expected.tsv @@ -0,0 +1,17 @@ +x +x +x +o +x +x +x +x +o +o +o +o +d +x +d +d +d