Implement Probabilistic-MultiLabel-F1

2019-09-07 14:16:06 +02:00 · 2019-09-07 14:16:06 +02:00 · b540cba7da
commit b540cba7da
parent c011ba3962
10 changed files with 57 additions and 21 deletions
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@ -629,25 +629,14 @@ gevalCore' (SoftFMeasure beta) _ = gevalCoreWithoutInput parseAnnotations
                                                             Prelude.length expected,
                                                             Prelude.length got)

-gevalCore' (ProbabilisticSoftFMeasure beta) _ = gevalCoreWithoutInput parseAnnotations
-                                                                      parseObtainedAnnotations
-                                                                      getProbabilisticCounts
-                                                                      probabilisticSoftAgg
-                                                                      (fMeasureOnProbabilisticCounts beta)
-                                                                      loessGraph
-  where probabilisticSoftAgg :: Monad m => ConduitM ([Double], [Double], Double, Int) o m ([Double], [Double], Double, Int)
-        probabilisticSoftAgg = CC.foldl probabilisticSoftFolder ([], [], fromInteger 0, 0)
-        probabilisticSoftFolder (r1, p1, g1, e1) (r2, p2, g2, e2) = (r1 ++ r2, p1 ++ p2, g1 + g2, e1 + e2)
-        loessGraph :: ([Double], [Double], Double, Int) -> Maybe GraphSeries
-        loessGraph (results, probs, _, _) = Just $ GraphSeries $ Prelude.map (\x -> (x, clippedLoess probs' results' x)) $ Prelude.filter (\p -> p > lowest && p < highest) $ Prelude.map (\d -> 0.01 * (fromIntegral d)) [1..99]
-           where results' = DVU.fromList results
-                 probs' = DVU.fromList probs
-                 lowest = Data.List.minimum probs
-                 highest = Data.List.maximum probs
-        fMeasureOnProbabilisticCounts :: Double -> ([Double], [Double], Double, Int) -> Double
-        fMeasureOnProbabilisticCounts beta (results, probs, got, nbExpected) = weightedHarmonicMean beta calibrationMeasure recall
-           where calibrationMeasure = softCalibration results probs
-                 recall = got /. nbExpected
+gevalCore' (ProbabilisticMultiLabelFMeasure beta) _ = generalizedProbabilisticFMeasure beta
+                                                                                       intoWords
+                                                                                       (Right . (\(ProbList es) -> es) . parseIntoProbList)
+  where intoWords = Right . Data.Text.words
+
+gevalCore' (ProbabilisticSoftFMeasure beta) _ = generalizedProbabilisticFMeasure beta
+                                                                                 parseAnnotations
+                                                                                 parseObtainedAnnotations

 gevalCore' (Soft2DFMeasure beta) _ = gevalCoreWithoutInput parseLabeledClippings
                                                           parseLabeledClippings
@ -751,6 +740,27 @@ gevalCore' MultiLabelLogLoss _ = gevalCoreWithoutInput intoWords
    where
      intoWords = Right . Data.Text.words

+generalizedProbabilisticFMeasure beta parseBareEntities parseEntities = gevalCoreWithoutInput parseBareEntities
+                                                                                              parseEntities
+                                                                                              getProbabilisticCounts
+                                                                                              probabilisticSoftAgg
+                                                                                              (fMeasureOnProbabilisticCounts beta)
+                                                                                              loessGraph
+  where probabilisticSoftAgg :: Monad m => ConduitM ([Double], [Double], Double, Int) o m ([Double], [Double], Double, Int)
+        probabilisticSoftAgg = CC.foldl probabilisticSoftFolder ([], [], fromInteger 0, 0)
+        probabilisticSoftFolder (r1, p1, g1, e1) (r2, p2, g2, e2) = (r1 ++ r2, p1 ++ p2, g1 + g2, e1 + e2)
+        loessGraph :: ([Double], [Double], Double, Int) -> Maybe GraphSeries
+        loessGraph (results, probs, _, _) = Just $ GraphSeries $ Prelude.map (\x -> (x, clippedLoess probs' results' x)) $ Prelude.filter (\p -> p > lowest && p < highest) $ Prelude.map (\d -> 0.01 * (fromIntegral d)) [1..99]
+           where results' = DVU.fromList results
+                 probs' = DVU.fromList probs
+                 lowest = Data.List.minimum probs
+                 highest = Data.List.maximum probs
+        fMeasureOnProbabilisticCounts :: Double -> ([Double], [Double], Double, Int) -> Double
+        fMeasureOnProbabilisticCounts beta (results, probs, got, nbExpected) = weightedHarmonicMean beta calibrationMeasure recall
+           where calibrationMeasure = softCalibration results probs
+                 recall = got /. nbExpected
+
+
 countAgg :: (Num n, Num v, Monad m) => ConduitM (n, v, v) o m (n, v, v)
 countAgg = CC.foldl countFolder (fromInteger 0, fromInteger 0, fromInteger 0)

--- a/src/GEval/Metric.hs
+++ b/src/GEval/Metric.hs
@ -28,7 +28,7 @@ data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | C
              | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
              | BIOF1 | BIOF1Labels | TokenAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double
              | MultiLabelLogLoss | MultiLabelLikelihood
-              | SoftFMeasure Double | ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double
+              | SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double | ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double
              deriving (Eq)

 instance Show Metric where
@ -44,6 +44,7 @@ instance Show Metric where
  show (FMeasure beta) = "F" ++ (show beta)
  show (MacroFMeasure beta) = "Macro-F" ++ (show beta)
  show (SoftFMeasure beta) = "Soft-F" ++ (show beta)
+  show (ProbabilisticMultiLabelFMeasure beta) = "Probabilistic-MultiLabel-F" ++ (show beta)
  show (ProbabilisticSoftFMeasure beta) = "Probabilistic-Soft-F" ++ (show beta)
  show (Soft2DFMeasure beta) = "Soft2D-F" ++ (show beta)
  show NMI = "NMI"
@ -98,6 +99,9 @@ instance Read Metric where
  readsPrec p ('S':'o':'f':'t':'-':'F':theRest) = case readsPrec p theRest of
    [(beta, theRest)] -> [(SoftFMeasure beta, theRest)]
    _ -> []
+  readsPrec p ('P':'r':'o':'b':'a':'b':'i':'l':'i':'s':'t':'i':'c':'-':'M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of
+    [(beta, theRest)] -> [(ProbabilisticMultiLabelFMeasure beta, theRest)]
+    _ -> []
  readsPrec p ('P':'r':'o':'b':'a':'b':'i':'l':'i':'s':'t':'i':'c':'-':'S':'o':'f':'t':'-':'F':theRest) = case readsPrec p theRest of
    [(beta, theRest)] -> [(ProbabilisticSoftFMeasure beta, theRest)]
    _ -> []
@ -137,6 +141,7 @@ getMetricOrdering ClippEU  = TheHigherTheBetter
 getMetricOrdering (FMeasure _) = TheHigherTheBetter
 getMetricOrdering (MacroFMeasure _) = TheHigherTheBetter
 getMetricOrdering (SoftFMeasure _) = TheHigherTheBetter
+getMetricOrdering (ProbabilisticMultiLabelFMeasure _) = TheHigherTheBetter
 getMetricOrdering (ProbabilisticSoftFMeasure _) = TheHigherTheBetter
 getMetricOrdering (Soft2DFMeasure _) = TheHigherTheBetter
 getMetricOrdering NMI = TheHigherTheBetter
--- a/src/GEval/ProbList.hs
+++ b/src/GEval/ProbList.hs
@ -2,7 +2,7 @@
 {-# LANGUAGE TypeFamilies #-}

 module GEval.ProbList
-       (parseIntoProbList, selectByStandardThreshold, countLogLossOnProbList)
+       (parseIntoProbList, selectByStandardThreshold, countLogLossOnProbList, ProbList(..))
       where

 import qualified Data.Text as T
--- a/test/Spec.hs
+++ b/test/Spec.hs
@ -264,6 +264,11 @@ main = hspec $ do
      read "F2" `shouldBe` (FMeasure 2.0)
      read "F1" `shouldBe` (FMeasure 1.0)
      read "F0.5" `shouldBe` (FMeasure 0.5)
+  describe "Probabilistic-F1" $ do
+    it "simple test" $ do
+      runGEvalTest "probabilistic-f1-simple" `shouldReturnAlmost` 0.5
+    it "with probs" $ do
+      runGEvalTest "probabilistic-f1-probs" `shouldReturnAlmost` 0.5451223333805993
  describe "Soft-F1" $ do
    it "simple test" $ do
      runGEvalTest "soft-f1-simple" `shouldReturnAlmost` 0.33333333333333
--- a/test/probabilistic-f1-probs/probabilistic-f1-probs-solution/test-A/out.tsv
+++ b/test/probabilistic-f1-probs/probabilistic-f1-probs-solution/test-A/out.tsv
@ -0,0 +1,4 @@
+foo bar:0.7
+baz:0.2 foo:0.5
+
+foo:0.7 foo:0.8 baq:0.8
--- a/test/probabilistic-f1-probs/probabilistic-f1-probs/config.txt
+++ b/test/probabilistic-f1-probs/probabilistic-f1-probs/config.txt
@ -0,0 +1 @@
+--metric Probabilistic-MultiLabel-F1
--- a/test/probabilistic-f1-probs/probabilistic-f1-probs/test-A/expected.tsv
+++ b/test/probabilistic-f1-probs/probabilistic-f1-probs/test-A/expected.tsv
@ -0,0 +1,4 @@
+foo bar
+baz
+
+baq foo foo
--- a/test/probabilistic-f1-simple/probabilistic-f1-simple-solution/test-A/out.tsv
+++ b/test/probabilistic-f1-simple/probabilistic-f1-simple-solution/test-A/out.tsv
@ -0,0 +1,3 @@
+bar:1.0
+baz:1.0
+foo baz:1.0 bar:1.0 foo:1.0 foo
--- a/test/probabilistic-f1-simple/probabilistic-f1-simple/config.txt
+++ b/test/probabilistic-f1-simple/probabilistic-f1-simple/config.txt
@ -0,0 +1 @@
+--metric Probabilistic-MultiLabel-F1
--- a/test/probabilistic-f1-simple/probabilistic-f1-simple/test-A/expected.tsv
+++ b/test/probabilistic-f1-simple/probabilistic-f1-simple/test-A/expected.tsv
@ -0,0 +1,3 @@
+foo
+
+bar baz baz foo