implement MultiLabel-LogLoss and MultiLabel-Likelihood

This commit is contained in:
Filip Graliński 2018-08-09 16:00:19 +02:00
parent bd2bfde287
commit efcceae26a
6 changed files with 53 additions and 1 deletions

View File

@ -98,6 +98,7 @@ defaultLogLossHashedSize = 10
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
| BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double | BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double
| MultiLabelLogLoss | MultiLabelLikelihood
deriving (Eq) deriving (Eq)
instance Show Metric where instance Show Metric where
@ -128,6 +129,8 @@ instance Show Metric where
show BIOF1Labels = "BIO-F1-Labels" show BIOF1Labels = "BIO-F1-Labels"
show MAE = "MAE" show MAE = "MAE"
show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta) show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta)
show MultiLabelLogLoss = "MultiLabel-Logloss"
show MultiLabelLikelihood = "MultiLabel-Likelihood"
instance Read Metric where instance Read Metric where
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)] readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
@ -155,6 +158,10 @@ instance Read Metric where
readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)] readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)] readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
readsPrec _ ('M':'A':'E':theRest) = [(MAE, theRest)] readsPrec _ ('M':'A':'E':theRest) = [(MAE, theRest)]
readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'o':'g':'L':'o':'s':'s':theRest) = [(MultiLabelLogLoss, theRest)]
readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(MultiLabelLikelihood, theRest)]
data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
@ -177,6 +184,9 @@ getMetricOrdering BIOF1 = TheHigherTheBetter
getMetricOrdering BIOF1Labels = TheHigherTheBetter getMetricOrdering BIOF1Labels = TheHigherTheBetter
getMetricOrdering MAE = TheLowerTheBetter getMetricOrdering MAE = TheLowerTheBetter
getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter
getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter
getMetricOrdering MultiLabelLikelihood = TheHigherTheBetter
isInputNeeded :: Metric -> Bool isInputNeeded :: Metric -> Bool
isInputNeeded CharMatch = True isInputNeeded CharMatch = True
@ -448,6 +458,10 @@ gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLi
logLoss <- gevalCoreOnSources (LogLossHashed b) inputLineSource expectedLineSource outLineSource logLoss <- gevalCoreOnSources (LogLossHashed b) inputLineSource expectedLineSource outLineSource
return $ logLossToLikehood logLoss return $ logLossToLikehood logLoss
gevalCoreOnSources MultiLabelLikelihood inputLineSource expectedLineSource outLineSource = do
logLoss <- gevalCoreOnSources MultiLabelLogLoss inputLineSource expectedLineSource outLineSource
return $ logLossToLikehood logLoss
gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
gevalCore' metric inputLineSource expectedLineSource outLineSource gevalCore' metric inputLineSource expectedLineSource outLineSource
@ -581,6 +595,13 @@ gevalCore' (MultiLabelFMeasure beta) _ = gevalCoreWithoutInput intoWords
getWords = Right . (Prelude.map unpack) . selectByStandardThreshold . parseIntoProbList getWords = Right . (Prelude.map unpack) . selectByStandardThreshold . parseIntoProbList
intoWords = Right . (Prelude.map unpack) . Data.Text.words intoWords = Right . (Prelude.map unpack) . Data.Text.words
gevalCore' MultiLabelLogLoss _ = gevalCoreWithoutInput intoWords
(Right . parseIntoProbList)
(uncurry countLogLossOnProbList)
averageC
id
where
intoWords = Right . Data.Text.words
countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int) countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
countAgg = CC.foldl countFolder (0, 0, 0) countAgg = CC.foldl countFolder (0, 0, 0)

View File

@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module GEval.ProbList module GEval.ProbList
(parseIntoProbList, selectByStandardThreshold) (parseIntoProbList, selectByStandardThreshold, countLogLossOnProbList)
where where
import qualified Data.Text as T import qualified Data.Text as T
@ -23,6 +23,9 @@ mkProbability p
probabilityOne :: Probability probabilityOne :: Probability
probabilityOne = mkProbability 1.0 probabilityOne = mkProbability 1.0
probabilityZero :: Probability
probabilityZero = mkProbability 0.0
data ProbList = ProbList [WordWithProb] data ProbList = ProbList [WordWithProb]
deriving (Show) deriving (Show)
@ -63,3 +66,19 @@ standardThreshold = 0.5
selectByStandardThreshold :: ProbList -> [T.Text] selectByStandardThreshold :: ProbList -> [T.Text]
selectByStandardThreshold = selectByThreshold (mkProbability standardThreshold) selectByStandardThreshold = selectByThreshold (mkProbability standardThreshold)
findProb :: ProbList -> T.Text -> Probability
findProb (ProbList probList) target =
case filter (\(WordWithProb w _) -> w == target) probList of
((WordWithProb _ p):_) -> p
[] -> probabilityZero
countLogLossOnProbList :: [T.Text] -> ProbList -> Double
countLogLossOnProbList expected probList@(ProbList l) =
- (logLossForCorrectOnes + logLossForIncorrectOnes)
where logLossForCorrectOnes =
sum $ map (\ew -> log ( getP (findProb probList ew))) expected
logLossForIncorrectOnes =
sum
$ map (\(WordWithProb _ p) -> log (1.0 - getP p))
$ filter (\(WordWithProb w p) -> w `notElem` expected) l

View File

@ -224,6 +224,9 @@ main = hspec $ do
runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615 runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615
it "labels given with probs and numbers" $ do it "labels given with probs and numbers" $ do
runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666 runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666
describe "MultiLabel-Likelihood" $ do
it "simple" $ do
runGEvalTest "multilabel-likelihood-simple" `shouldReturnAlmost` 0.115829218528827
describe "evaluating single lines" $ do describe "evaluating single lines" $ do
it "RMSE" $ do it "RMSE" $ do
gevalCoreOnSingleLines RMSE (LineInFile (FilePathSpec "stub1") 1 "blabla") gevalCoreOnSingleLines RMSE (LineInFile (FilePathSpec "stub1") 1 "blabla")

View File

@ -0,0 +1,4 @@
foo:0.3 bar
foo:0.9
baz:1.0
bar:0.8 baz:0.3 foo:0.1
1 foo:0.3 bar
2 foo:0.9
3 baz:1.0
4 bar:0.8 baz:0.3 foo:0.1

View File

@ -0,0 +1 @@
--metric MultiLabel-Likelihood

View File

@ -0,0 +1,4 @@
foo bar
baz
foo baz
1 foo bar
2 baz
3 foo baz