add likelihood as evaluation metrics

This commit is contained in:
Filip Graliński 2018-05-17 15:21:03 +02:00
parent 1caec3de35
commit 192d531969
10 changed files with 67 additions and 2 deletions

View File

@ -83,7 +83,7 @@ defaultLogLossHashedSize :: Word32
defaultLogLossHashedSize = 10
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch
| MAP | LogLoss | BIOF1
| MAP | LogLoss | Likelihood | BIOF1 | LikelihoodHashed Word32
deriving (Eq)
instance Show Metric where
@ -100,9 +100,16 @@ instance Show Metric where
""
else
(show nbOfBits))
show (LikelihoodHashed nbOfBits) = "LikelihoodHashed" ++ (if
nbOfBits == defaultLogLossHashedSize
then
""
else
(show nbOfBits))
show CharMatch = "CharMatch"
show MAP = "MAP"
show LogLoss = "LogLoss"
show Likelihood = "Likelihood"
show BIOF1 = "BIO-F1"
instance Read Metric where
@ -118,7 +125,11 @@ instance Read Metric where
readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
[(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
_ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
readsPrec p ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
[(nbOfBits, theRest)] -> [(LikelihoodHashed nbOfBits, theRest)]
_ -> [(LikelihoodHashed defaultLogLossHashedSize, theRest)]
readsPrec _ ('L':'o':'g':'L':'o':'s':'s':theRest) = [(LogLoss, theRest)]
readsPrec _ ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(Likelihood, theRest)]
readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
@ -134,9 +145,11 @@ getMetricOrdering ClippEU = TheHigherTheBetter
getMetricOrdering (FMeasure _) = TheHigherTheBetter
getMetricOrdering NMI = TheHigherTheBetter
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
getMetricOrdering CharMatch = TheHigherTheBetter
getMetricOrdering MAP = TheHigherTheBetter
getMetricOrdering LogLoss = TheLowerTheBetter
getMetricOrdering Likelihood = TheHigherTheBetter
getMetricOrdering BIOF1 = TheHigherTheBetter
defaultOutDirectory = "."
@ -308,6 +321,8 @@ gevalCore metric inputFilePath expectedFilePath outFilePath = do
(fileAsLineSource expectedFilePath)
(fileAsLineSource outFilePath)
logLossToLikehood logLoss = exp (-logLoss)
gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric
-> LineSource (ResourceT m)
-> LineSource (ResourceT m)
@ -317,6 +332,14 @@ gevalCoreOnSources RMSE inputLineSource expectedLineSource outLineSource = do
mse <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource
return $ mse ** 0.5
gevalCoreOnSources Likelihood inputLineSource expectedLineSource outLineSource = do
logLoss <- gevalCoreOnSources LogLoss inputLineSource expectedLineSource outLineSource
return $ logLossToLikehood logLoss
gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLineSource = do
logLoss <- gevalCoreOnSources (LogLossHashed b) inputLineSource expectedLineSource outLineSource
return $ logLossToLikehood logLoss
gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
gevalCore' metric inputLineSource expectedLineSource outLineSource

View File

@ -101,6 +101,8 @@ Cluster proverbs for languages.
This is a sample challenge for flat clustering (unsupervised learning challenge).
|] ++ (commonReadmeMDContents testName)
readmeMDContents (LikelihoodHashed b) testname = readmeMDContents (LogLossHashed b) testname
readmeMDContents (LogLossHashed _) testName = [i|
GEval sample challenge language model evaluation
==================================================
@ -203,6 +205,16 @@ This a sample challenge for the log-loss metric.
|] ++ (commonReadmeMDContents testName)
readmeMDContents Likelihood testName = [i|
Give the probability of a positive sentiment
============================================
Give the probability that a sentence expresses a positive sentiment.
This a sample challenge for the likelihood metric.
|] ++ (commonReadmeMDContents testName)
readmeMDContents BIOF1 testName = [i|
Tag and normalize names
=======================
@ -284,6 +296,7 @@ trainContents NMI = [hereLit|pl Kto pod kim dołki kopie, ten sam w nie wpada.
en The pen is mightier than the sword.
pl Baba z wozu, koniom lżej.
|]
trainContents (LikelihoodHashed b) = trainContents (LogLossHashed b)
trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota
Basia ma psa
Nie kupujemy kota w worku
@ -299,6 +312,7 @@ honour GB honor
titbit GB smakołyk
tidbit US smakołyk
|]
trainContents Likelihood = trainContents LogLoss
trainContents LogLoss = [hereLit|0.0 Hell, no!!!
0.0 I hate this stuff
1.0 Lekker!!!
@ -328,6 +342,7 @@ When the going gets tough, the tough get going.
devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0
b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000
|]
devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b)
devInContents (LogLossHashed _) = [hereLit|Nie kupuj w worku
Ona psa
|]
@ -339,6 +354,7 @@ devInContents MAP = [hereLit|US noc
GB wózek dziecięcy
GB wizualizować
|]
devInContents Likelihood = devInContents LogLoss
devInContents LogLoss = [hereLit|Great stuff!
Boring stuff
That's good
@ -364,6 +380,7 @@ devExpectedContents NMI = [hereLit|en
pl
en
|]
devExpectedContents (LikelihoodHashed b) = devExpectedContents (LogLossHashed b)
devExpectedContents (LogLossHashed _) = [hereLit|kota
ma
|]
@ -375,6 +392,7 @@ devExpectedContents MAP = [hereLit|night nite
pram
visualise
|]
devExpectedContents Likelihood = devExpectedContents LogLoss
devExpectedContents LogLoss = [hereLit|1.0
0.0
1.0
@ -402,6 +420,7 @@ W marcu, jak w garncu.
A cada necio agrada su porrada.
Kwiecień plecień, bo przeplata trochę zimy, trochę lata.
|]
testInContents (LikelihoodHashed b) = testInContents (LogLossHashed b)
testInContents (LogLossHashed _) = [hereLit|Ala ma
Ona ma kota worku
|]
@ -413,6 +432,7 @@ testInContents MAP = [hereLit|US wózek dziecięcy
GB słoń
US słoń
|]
testInContents Likelihood = testInContents LogLoss
testInContents LogLoss = [hereLit|That's great, ha, ha, I love it!
Super-duper!!
That is incredibly boring.
@ -440,6 +460,7 @@ pl
es
pl
|]
testExpectedContents (LikelihoodHashed b) = testExpectedContents (LogLossHashed b)
testExpectedContents (LogLossHashed _) = [hereLit|ma
w
|]
@ -451,6 +472,7 @@ testExpectedContents MAP = [hereLit|trolley
elephant
elephant
|]
testExpectedContents Likelihood = testExpectedContents LogLoss
testExpectedContents LogLoss = [hereLit|1.0
1.0
0.0

View File

@ -100,7 +100,7 @@ metricReader = option auto
<> value defaultMetric
<> showDefault
<> metavar "METRIC"
<> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, BIO-F1 or CharMatch" )
<> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1 or CharMatch" )
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
runGEval args = do

View File

@ -102,6 +102,9 @@ main = hspec $ do
runGEvalTest "log-loss-hashed-probs-normalized" `shouldReturnAlmost` 1.55537749098853
it "with log probs whose probs are summing up to less than 1.0" $ do
runGEvalTest "log-loss-hashed-normalization" `shouldReturnAlmost` 5.16395069238851
describe "LikelihoodHashed challenge" $ do
it "example with unnormalized values" $ do
runGEvalTest "likelihood-hashed-not-normalized" `shouldReturnAlmost` 0.351043364110715
describe "reading options" $ do
it "can get the metric" $ do
@ -190,6 +193,9 @@ main = hspec $ do
runGEvalTest "logloss-simple" `shouldReturnAlmost` 0.31824
it "perfect" $ do
runGEvalTest "logloss-perfect" `shouldReturnAlmost` 0.0
describe "Likelihood" $ do
it "simple" $ do
runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866
describe "evaluating single lines" $ do
it "RMSE" $ do
gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla")

View File

@ -0,0 +1,2 @@
tak:10 nie:8.9
niebieski:0 żółty:1.5 czerwony:-0.5
1 tak:10 nie:8.9
2 niebieski:0 żółty:1.5 czerwony:-0.5

View File

@ -0,0 +1 @@
--metric LikelihoodHashed8

View File

@ -0,0 +1,2 @@
tak
niebieski
1 tak
2 niebieski

View File

@ -0,0 +1,4 @@
0.7
0
0.0
0.6
1 0.7
2 0
3 0.0
4 0.6

View File

@ -0,0 +1 @@
--metric Likelihood

View File

@ -0,0 +1,4 @@
1
0
0
0
1 1
2 0
3 0
4 0