add likelihood as evaluation metrics

This commit is contained in:
Filip Graliński 2018-05-17 15:21:03 +02:00
parent 1caec3de35
commit 192d531969
10 changed files with 67 additions and 2 deletions

View File

@ -83,7 +83,7 @@ defaultLogLossHashedSize :: Word32
defaultLogLossHashedSize = 10 defaultLogLossHashedSize = 10
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch
| MAP | LogLoss | BIOF1 | MAP | LogLoss | Likelihood | BIOF1 | LikelihoodHashed Word32
deriving (Eq) deriving (Eq)
instance Show Metric where instance Show Metric where
@ -100,9 +100,16 @@ instance Show Metric where
"" ""
else else
(show nbOfBits)) (show nbOfBits))
show (LikelihoodHashed nbOfBits) = "LikelihoodHashed" ++ (if
nbOfBits == defaultLogLossHashedSize
then
""
else
(show nbOfBits))
show CharMatch = "CharMatch" show CharMatch = "CharMatch"
show MAP = "MAP" show MAP = "MAP"
show LogLoss = "LogLoss" show LogLoss = "LogLoss"
show Likelihood = "Likelihood"
show BIOF1 = "BIO-F1" show BIOF1 = "BIO-F1"
instance Read Metric where instance Read Metric where
@ -118,7 +125,11 @@ instance Read Metric where
readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
[(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)] [(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
_ -> [(LogLossHashed defaultLogLossHashedSize, theRest)] _ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
readsPrec p ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
[(nbOfBits, theRest)] -> [(LikelihoodHashed nbOfBits, theRest)]
_ -> [(LikelihoodHashed defaultLogLossHashedSize, theRest)]
readsPrec _ ('L':'o':'g':'L':'o':'s':'s':theRest) = [(LogLoss, theRest)] readsPrec _ ('L':'o':'g':'L':'o':'s':'s':theRest) = [(LogLoss, theRest)]
readsPrec _ ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(Likelihood, theRest)]
readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)] readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)] readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)] readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
@ -134,9 +145,11 @@ getMetricOrdering ClippEU = TheHigherTheBetter
getMetricOrdering (FMeasure _) = TheHigherTheBetter getMetricOrdering (FMeasure _) = TheHigherTheBetter
getMetricOrdering NMI = TheHigherTheBetter getMetricOrdering NMI = TheHigherTheBetter
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
getMetricOrdering CharMatch = TheHigherTheBetter getMetricOrdering CharMatch = TheHigherTheBetter
getMetricOrdering MAP = TheHigherTheBetter getMetricOrdering MAP = TheHigherTheBetter
getMetricOrdering LogLoss = TheLowerTheBetter getMetricOrdering LogLoss = TheLowerTheBetter
getMetricOrdering Likelihood = TheHigherTheBetter
getMetricOrdering BIOF1 = TheHigherTheBetter getMetricOrdering BIOF1 = TheHigherTheBetter
defaultOutDirectory = "." defaultOutDirectory = "."
@ -308,6 +321,8 @@ gevalCore metric inputFilePath expectedFilePath outFilePath = do
(fileAsLineSource expectedFilePath) (fileAsLineSource expectedFilePath)
(fileAsLineSource outFilePath) (fileAsLineSource outFilePath)
logLossToLikehood logLoss = exp (-logLoss)
gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric
-> LineSource (ResourceT m) -> LineSource (ResourceT m)
-> LineSource (ResourceT m) -> LineSource (ResourceT m)
@ -317,6 +332,14 @@ gevalCoreOnSources RMSE inputLineSource expectedLineSource outLineSource = do
mse <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource mse <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource
return $ mse ** 0.5 return $ mse ** 0.5
gevalCoreOnSources Likelihood inputLineSource expectedLineSource outLineSource = do
logLoss <- gevalCoreOnSources LogLoss inputLineSource expectedLineSource outLineSource
return $ logLossToLikehood logLoss
gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLineSource = do
logLoss <- gevalCoreOnSources (LogLossHashed b) inputLineSource expectedLineSource outLineSource
return $ logLossToLikehood logLoss
gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
gevalCore' metric inputLineSource expectedLineSource outLineSource gevalCore' metric inputLineSource expectedLineSource outLineSource

View File

@ -101,6 +101,8 @@ Cluster proverbs for languages.
This is a sample challenge for flat clustering (unsupervised learning challenge). This is a sample challenge for flat clustering (unsupervised learning challenge).
|] ++ (commonReadmeMDContents testName) |] ++ (commonReadmeMDContents testName)
readmeMDContents (LikelihoodHashed b) testname = readmeMDContents (LogLossHashed b) testname
readmeMDContents (LogLossHashed _) testName = [i| readmeMDContents (LogLossHashed _) testName = [i|
GEval sample challenge language model evaluation GEval sample challenge language model evaluation
================================================== ==================================================
@ -203,6 +205,16 @@ This a sample challenge for the log-loss metric.
|] ++ (commonReadmeMDContents testName) |] ++ (commonReadmeMDContents testName)
readmeMDContents Likelihood testName = [i|
Give the probability of a positive sentiment
============================================
Give the probability that a sentence expresses a positive sentiment.
This a sample challenge for the likelihood metric.
|] ++ (commonReadmeMDContents testName)
readmeMDContents BIOF1 testName = [i| readmeMDContents BIOF1 testName = [i|
Tag and normalize names Tag and normalize names
======================= =======================
@ -284,6 +296,7 @@ trainContents NMI = [hereLit|pl Kto pod kim dołki kopie, ten sam w nie wpada.
en The pen is mightier than the sword. en The pen is mightier than the sword.
pl Baba z wozu, koniom lżej. pl Baba z wozu, koniom lżej.
|] |]
trainContents (LikelihoodHashed b) = trainContents (LogLossHashed b)
trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota
Basia ma psa Basia ma psa
Nie kupujemy kota w worku Nie kupujemy kota w worku
@ -299,6 +312,7 @@ honour GB honor
titbit GB smakołyk titbit GB smakołyk
tidbit US smakołyk tidbit US smakołyk
|] |]
trainContents Likelihood = trainContents LogLoss
trainContents LogLoss = [hereLit|0.0 Hell, no!!! trainContents LogLoss = [hereLit|0.0 Hell, no!!!
0.0 I hate this stuff 0.0 I hate this stuff
1.0 Lekker!!! 1.0 Lekker!!!
@ -328,6 +342,7 @@ When the going gets tough, the tough get going.
devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0 devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0
b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000 b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000
|] |]
devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b)
devInContents (LogLossHashed _) = [hereLit|Nie kupuj w worku devInContents (LogLossHashed _) = [hereLit|Nie kupuj w worku
Ona psa Ona psa
|] |]
@ -339,6 +354,7 @@ devInContents MAP = [hereLit|US noc
GB wózek dziecięcy GB wózek dziecięcy
GB wizualizować GB wizualizować
|] |]
devInContents Likelihood = devInContents LogLoss
devInContents LogLoss = [hereLit|Great stuff! devInContents LogLoss = [hereLit|Great stuff!
Boring stuff Boring stuff
That's good That's good
@ -364,6 +380,7 @@ devExpectedContents NMI = [hereLit|en
pl pl
en en
|] |]
devExpectedContents (LikelihoodHashed b) = devExpectedContents (LogLossHashed b)
devExpectedContents (LogLossHashed _) = [hereLit|kota devExpectedContents (LogLossHashed _) = [hereLit|kota
ma ma
|] |]
@ -375,6 +392,7 @@ devExpectedContents MAP = [hereLit|night nite
pram pram
visualise visualise
|] |]
devExpectedContents Likelihood = devExpectedContents LogLoss
devExpectedContents LogLoss = [hereLit|1.0 devExpectedContents LogLoss = [hereLit|1.0
0.0 0.0
1.0 1.0
@ -402,6 +420,7 @@ W marcu, jak w garncu.
A cada necio agrada su porrada. A cada necio agrada su porrada.
Kwiecień plecień, bo przeplata trochę zimy, trochę lata. Kwiecień plecień, bo przeplata trochę zimy, trochę lata.
|] |]
testInContents (LikelihoodHashed b) = testInContents (LogLossHashed b)
testInContents (LogLossHashed _) = [hereLit|Ala ma testInContents (LogLossHashed _) = [hereLit|Ala ma
Ona ma kota worku Ona ma kota worku
|] |]
@ -413,6 +432,7 @@ testInContents MAP = [hereLit|US wózek dziecięcy
GB słoń GB słoń
US słoń US słoń
|] |]
testInContents Likelihood = testInContents LogLoss
testInContents LogLoss = [hereLit|That's great, ha, ha, I love it! testInContents LogLoss = [hereLit|That's great, ha, ha, I love it!
Super-duper!! Super-duper!!
That is incredibly boring. That is incredibly boring.
@ -440,6 +460,7 @@ pl
es es
pl pl
|] |]
testExpectedContents (LikelihoodHashed b) = testExpectedContents (LogLossHashed b)
testExpectedContents (LogLossHashed _) = [hereLit|ma testExpectedContents (LogLossHashed _) = [hereLit|ma
w w
|] |]
@ -451,6 +472,7 @@ testExpectedContents MAP = [hereLit|trolley
elephant elephant
elephant elephant
|] |]
testExpectedContents Likelihood = testExpectedContents LogLoss
testExpectedContents LogLoss = [hereLit|1.0 testExpectedContents LogLoss = [hereLit|1.0
1.0 1.0
0.0 0.0

View File

@ -100,7 +100,7 @@ metricReader = option auto
<> value defaultMetric <> value defaultMetric
<> showDefault <> showDefault
<> metavar "METRIC" <> metavar "METRIC"
<> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, BIO-F1 or CharMatch" ) <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1 or CharMatch" )
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue)) runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
runGEval args = do runGEval args = do

View File

@ -102,6 +102,9 @@ main = hspec $ do
runGEvalTest "log-loss-hashed-probs-normalized" `shouldReturnAlmost` 1.55537749098853 runGEvalTest "log-loss-hashed-probs-normalized" `shouldReturnAlmost` 1.55537749098853
it "with log probs whose probs are summing up to less than 1.0" $ do it "with log probs whose probs are summing up to less than 1.0" $ do
runGEvalTest "log-loss-hashed-normalization" `shouldReturnAlmost` 5.16395069238851 runGEvalTest "log-loss-hashed-normalization" `shouldReturnAlmost` 5.16395069238851
describe "LikelihoodHashed challenge" $ do
it "example with unnormalized values" $ do
runGEvalTest "likelihood-hashed-not-normalized" `shouldReturnAlmost` 0.351043364110715
describe "reading options" $ do describe "reading options" $ do
it "can get the metric" $ do it "can get the metric" $ do
@ -190,6 +193,9 @@ main = hspec $ do
runGEvalTest "logloss-simple" `shouldReturnAlmost` 0.31824 runGEvalTest "logloss-simple" `shouldReturnAlmost` 0.31824
it "perfect" $ do it "perfect" $ do
runGEvalTest "logloss-perfect" `shouldReturnAlmost` 0.0 runGEvalTest "logloss-perfect" `shouldReturnAlmost` 0.0
describe "Likelihood" $ do
it "simple" $ do
runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866
describe "evaluating single lines" $ do describe "evaluating single lines" $ do
it "RMSE" $ do it "RMSE" $ do
gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla") gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla")

View File

@ -0,0 +1,2 @@
tak:10 nie:8.9
niebieski:0 żółty:1.5 czerwony:-0.5
1 tak:10 nie:8.9
2 niebieski:0 żółty:1.5 czerwony:-0.5

View File

@ -0,0 +1 @@
--metric LikelihoodHashed8

View File

@ -0,0 +1,2 @@
tak
niebieski
1 tak
2 niebieski

View File

@ -0,0 +1,4 @@
0.7
0
0.0
0.6
1 0.7
2 0
3 0.0
4 0.6

View File

@ -0,0 +1 @@
--metric Likelihood

View File

@ -0,0 +1,4 @@
1
0
0
0
1 1
2 0
3 0
4 0