implement MultiLabel-LogLoss and MultiLabel-Likelihood
This commit is contained in:
parent
bd2bfde287
commit
efcceae26a
@ -98,6 +98,7 @@ defaultLogLossHashedSize = 10
|
||||
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI
|
||||
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
||||
| BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double
|
||||
| MultiLabelLogLoss | MultiLabelLikelihood
|
||||
deriving (Eq)
|
||||
|
||||
instance Show Metric where
|
||||
@ -128,6 +129,8 @@ instance Show Metric where
|
||||
show BIOF1Labels = "BIO-F1-Labels"
|
||||
show MAE = "MAE"
|
||||
show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta)
|
||||
show MultiLabelLogLoss = "MultiLabel-Logloss"
|
||||
show MultiLabelLikelihood = "MultiLabel-Likelihood"
|
||||
|
||||
instance Read Metric where
|
||||
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
||||
@ -155,6 +158,10 @@ instance Read Metric where
|
||||
readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
|
||||
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
|
||||
readsPrec _ ('M':'A':'E':theRest) = [(MAE, theRest)]
|
||||
readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'o':'g':'L':'o':'s':'s':theRest) = [(MultiLabelLogLoss, theRest)]
|
||||
readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(MultiLabelLikelihood, theRest)]
|
||||
|
||||
|
||||
|
||||
data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
|
||||
|
||||
@ -177,6 +184,9 @@ getMetricOrdering BIOF1 = TheHigherTheBetter
|
||||
getMetricOrdering BIOF1Labels = TheHigherTheBetter
|
||||
getMetricOrdering MAE = TheLowerTheBetter
|
||||
getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter
|
||||
getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter
|
||||
getMetricOrdering MultiLabelLikelihood = TheHigherTheBetter
|
||||
|
||||
|
||||
isInputNeeded :: Metric -> Bool
|
||||
isInputNeeded CharMatch = True
|
||||
@ -448,6 +458,10 @@ gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLi
|
||||
logLoss <- gevalCoreOnSources (LogLossHashed b) inputLineSource expectedLineSource outLineSource
|
||||
return $ logLossToLikehood logLoss
|
||||
|
||||
gevalCoreOnSources MultiLabelLikelihood inputLineSource expectedLineSource outLineSource = do
|
||||
logLoss <- gevalCoreOnSources MultiLabelLogLoss inputLineSource expectedLineSource outLineSource
|
||||
return $ logLossToLikehood logLoss
|
||||
|
||||
gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
|
||||
gevalCore' metric inputLineSource expectedLineSource outLineSource
|
||||
|
||||
@ -581,6 +595,13 @@ gevalCore' (MultiLabelFMeasure beta) _ = gevalCoreWithoutInput intoWords
|
||||
getWords = Right . (Prelude.map unpack) . selectByStandardThreshold . parseIntoProbList
|
||||
intoWords = Right . (Prelude.map unpack) . Data.Text.words
|
||||
|
||||
gevalCore' MultiLabelLogLoss _ = gevalCoreWithoutInput intoWords
|
||||
(Right . parseIntoProbList)
|
||||
(uncurry countLogLossOnProbList)
|
||||
averageC
|
||||
id
|
||||
where
|
||||
intoWords = Right . Data.Text.words
|
||||
|
||||
countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
|
||||
countAgg = CC.foldl countFolder (0, 0, 0)
|
||||
|
@ -1,7 +1,7 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module GEval.ProbList
|
||||
(parseIntoProbList, selectByStandardThreshold)
|
||||
(parseIntoProbList, selectByStandardThreshold, countLogLossOnProbList)
|
||||
where
|
||||
|
||||
import qualified Data.Text as T
|
||||
@ -23,6 +23,9 @@ mkProbability p
|
||||
probabilityOne :: Probability
|
||||
probabilityOne = mkProbability 1.0
|
||||
|
||||
probabilityZero :: Probability
|
||||
probabilityZero = mkProbability 0.0
|
||||
|
||||
data ProbList = ProbList [WordWithProb]
|
||||
deriving (Show)
|
||||
|
||||
@ -63,3 +66,19 @@ standardThreshold = 0.5
|
||||
|
||||
selectByStandardThreshold :: ProbList -> [T.Text]
|
||||
selectByStandardThreshold = selectByThreshold (mkProbability standardThreshold)
|
||||
|
||||
findProb :: ProbList -> T.Text -> Probability
|
||||
findProb (ProbList probList) target =
|
||||
case filter (\(WordWithProb w _) -> w == target) probList of
|
||||
((WordWithProb _ p):_) -> p
|
||||
[] -> probabilityZero
|
||||
|
||||
countLogLossOnProbList :: [T.Text] -> ProbList -> Double
|
||||
countLogLossOnProbList expected probList@(ProbList l) =
|
||||
- (logLossForCorrectOnes + logLossForIncorrectOnes)
|
||||
where logLossForCorrectOnes =
|
||||
sum $ map (\ew -> log ( getP (findProb probList ew))) expected
|
||||
logLossForIncorrectOnes =
|
||||
sum
|
||||
$ map (\(WordWithProb _ p) -> log (1.0 - getP p))
|
||||
$ filter (\(WordWithProb w p) -> w `notElem` expected) l
|
||||
|
@ -224,6 +224,9 @@ main = hspec $ do
|
||||
runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615
|
||||
it "labels given with probs and numbers" $ do
|
||||
runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666
|
||||
describe "MultiLabel-Likelihood" $ do
|
||||
it "simple" $ do
|
||||
runGEvalTest "multilabel-likelihood-simple" `shouldReturnAlmost` 0.115829218528827
|
||||
describe "evaluating single lines" $ do
|
||||
it "RMSE" $ do
|
||||
gevalCoreOnSingleLines RMSE (LineInFile (FilePathSpec "stub1") 1 "blabla")
|
||||
|
@ -0,0 +1,4 @@
|
||||
foo:0.3 bar
|
||||
foo:0.9
|
||||
baz:1.0
|
||||
bar:0.8 baz:0.3 foo:0.1
|
|
@ -0,0 +1 @@
|
||||
--metric MultiLabel-Likelihood
|
@ -0,0 +1,4 @@
|
||||
foo bar
|
||||
|
||||
baz
|
||||
foo baz
|
|
Loading…
Reference in New Issue
Block a user