implement MultiLabel-LogLoss and MultiLabel-Likelihood

2018-08-09 16:00:19 +02:00 · 2018-08-09 16:00:19 +02:00 · efcceae26a
commit efcceae26a
parent bd2bfde287
6 changed files with 53 additions and 1 deletions
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@ -98,6 +98,7 @@ defaultLogLossHashedSize = 10
 data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI
              | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
              | BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double
              | MultiLabelLogLoss | MultiLabelLikelihood
              deriving (Eq)
 instance Show Metric where
@ -128,6 +129,8 @@ instance Show Metric where
  show BIOF1Labels = "BIO-F1-Labels"
  show MAE = "MAE"
  show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta)
  show MultiLabelLogLoss = "MultiLabel-Logloss"
  show MultiLabelLikelihood = "MultiLabel-Likelihood"
 instance Read Metric where
  readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
@ -155,6 +158,10 @@ instance Read Metric where
  readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
  readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
  readsPrec _ ('M':'A':'E':theRest) = [(MAE, theRest)]
  readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'o':'g':'L':'o':'s':'s':theRest) = [(MultiLabelLogLoss, theRest)]
  readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(MultiLabelLikelihood, theRest)]
 data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
@ -177,6 +184,9 @@ getMetricOrdering BIOF1 = TheHigherTheBetter
 getMetricOrdering BIOF1Labels = TheHigherTheBetter
 getMetricOrdering MAE = TheLowerTheBetter
 getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter
 getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter
 getMetricOrdering MultiLabelLikelihood = TheHigherTheBetter
 isInputNeeded :: Metric -> Bool
 isInputNeeded CharMatch = True
@ -448,6 +458,10 @@ gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLi
  logLoss <- gevalCoreOnSources (LogLossHashed b) inputLineSource expectedLineSource outLineSource
  return $ logLossToLikehood logLoss
 gevalCoreOnSources MultiLabelLikelihood inputLineSource expectedLineSource outLineSource = do
  logLoss <- gevalCoreOnSources MultiLabelLogLoss inputLineSource expectedLineSource outLineSource
  return $ logLossToLikehood logLoss
 gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
  gevalCore' metric inputLineSource expectedLineSource outLineSource
@ -581,6 +595,13 @@ gevalCore' (MultiLabelFMeasure beta) _ = gevalCoreWithoutInput intoWords
      getWords = Right . (Prelude.map unpack) . selectByStandardThreshold . parseIntoProbList
      intoWords = Right . (Prelude.map unpack) . Data.Text.words
 gevalCore' MultiLabelLogLoss _ = gevalCoreWithoutInput intoWords
                                                       (Right . parseIntoProbList)
                                                       (uncurry countLogLossOnProbList)
                                                       averageC
                                                       id
    where
      intoWords = Right . Data.Text.words
 countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
 countAgg = CC.foldl countFolder (0, 0, 0)
--- a/src/GEval/ProbList.hs
+++ b/src/GEval/ProbList.hs
@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 module GEval.ProbList
-       (parseIntoProbList, selectByStandardThreshold)
+       (parseIntoProbList, selectByStandardThreshold, countLogLossOnProbList)
       where
 import qualified Data.Text as T
@ -23,6 +23,9 @@ mkProbability p
 probabilityOne :: Probability
 probabilityOne = mkProbability 1.0
 probabilityZero :: Probability
 probabilityZero = mkProbability 0.0
 data ProbList = ProbList [WordWithProb]
  deriving (Show)
@ -63,3 +66,19 @@ standardThreshold = 0.5
 selectByStandardThreshold :: ProbList -> [T.Text]
 selectByStandardThreshold = selectByThreshold (mkProbability standardThreshold)
 findProb :: ProbList -> T.Text -> Probability
 findProb (ProbList probList) target =
  case filter (\(WordWithProb w _) -> w == target) probList of
    ((WordWithProb _ p):_) -> p
    [] -> probabilityZero
 countLogLossOnProbList :: [T.Text] -> ProbList -> Double
 countLogLossOnProbList expected probList@(ProbList l)  =
  - (logLossForCorrectOnes + logLossForIncorrectOnes)
  where logLossForCorrectOnes =
          sum  $ map (\ew -> log ( getP (findProb probList ew))) expected
        logLossForIncorrectOnes =
          sum
          $ map (\(WordWithProb _ p) -> log (1.0 - getP p))
          $ filter (\(WordWithProb w p) -> w `notElem` expected) l
--- a/test/Spec.hs
+++ b/test/Spec.hs
@ -224,6 +224,9 @@ main = hspec $ do
      runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615
    it "labels given with probs and numbers" $ do
      runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666
  describe "MultiLabel-Likelihood" $ do
    it "simple" $ do
      runGEvalTest "multilabel-likelihood-simple" `shouldReturnAlmost` 0.115829218528827
  describe "evaluating single lines" $ do
    it "RMSE" $ do
      gevalCoreOnSingleLines RMSE (LineInFile (FilePathSpec "stub1") 1 "blabla")
--- a/test/multilabel-likelihood-simple/multilabel-likelihood-simple-solution/test-A/out.tsv
+++ b/test/multilabel-likelihood-simple/multilabel-likelihood-simple-solution/test-A/out.tsv
@ -0,0 +1,4 @@
 foo:0.3 bar
 foo:0.9
 baz:1.0
 bar:0.8 baz:0.3 foo:0.1
--- a/test/multilabel-likelihood-simple/multilabel-likelihood-simple/config.txt
+++ b/test/multilabel-likelihood-simple/multilabel-likelihood-simple/config.txt
@ -0,0 +1 @@
 --metric MultiLabel-Likelihood
--- a/test/multilabel-likelihood-simple/multilabel-likelihood-simple/test-A/expected.tsv
+++ b/test/multilabel-likelihood-simple/multilabel-likelihood-simple/test-A/expected.tsv
@ -0,0 +1,4 @@
 foo bar
 baz
 foo baz