add likelihood as evaluation metrics
This commit is contained in:
parent
1caec3de35
commit
192d531969
@ -83,7 +83,7 @@ defaultLogLossHashedSize :: Word32
|
||||
defaultLogLossHashedSize = 10
|
||||
|
||||
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch
|
||||
| MAP | LogLoss | BIOF1
|
||||
| MAP | LogLoss | Likelihood | BIOF1 | LikelihoodHashed Word32
|
||||
deriving (Eq)
|
||||
|
||||
instance Show Metric where
|
||||
@ -100,9 +100,16 @@ instance Show Metric where
|
||||
""
|
||||
else
|
||||
(show nbOfBits))
|
||||
show (LikelihoodHashed nbOfBits) = "LikelihoodHashed" ++ (if
|
||||
nbOfBits == defaultLogLossHashedSize
|
||||
then
|
||||
""
|
||||
else
|
||||
(show nbOfBits))
|
||||
show CharMatch = "CharMatch"
|
||||
show MAP = "MAP"
|
||||
show LogLoss = "LogLoss"
|
||||
show Likelihood = "Likelihood"
|
||||
show BIOF1 = "BIO-F1"
|
||||
|
||||
instance Read Metric where
|
||||
@ -118,7 +125,11 @@ instance Read Metric where
|
||||
readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
|
||||
[(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
|
||||
_ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
|
||||
readsPrec p ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
|
||||
[(nbOfBits, theRest)] -> [(LikelihoodHashed nbOfBits, theRest)]
|
||||
_ -> [(LikelihoodHashed defaultLogLossHashedSize, theRest)]
|
||||
readsPrec _ ('L':'o':'g':'L':'o':'s':'s':theRest) = [(LogLoss, theRest)]
|
||||
readsPrec _ ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(Likelihood, theRest)]
|
||||
readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
|
||||
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
|
||||
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
|
||||
@ -134,9 +145,11 @@ getMetricOrdering ClippEU = TheHigherTheBetter
|
||||
getMetricOrdering (FMeasure _) = TheHigherTheBetter
|
||||
getMetricOrdering NMI = TheHigherTheBetter
|
||||
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
|
||||
getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
|
||||
getMetricOrdering CharMatch = TheHigherTheBetter
|
||||
getMetricOrdering MAP = TheHigherTheBetter
|
||||
getMetricOrdering LogLoss = TheLowerTheBetter
|
||||
getMetricOrdering Likelihood = TheHigherTheBetter
|
||||
getMetricOrdering BIOF1 = TheHigherTheBetter
|
||||
|
||||
defaultOutDirectory = "."
|
||||
@ -308,6 +321,8 @@ gevalCore metric inputFilePath expectedFilePath outFilePath = do
|
||||
(fileAsLineSource expectedFilePath)
|
||||
(fileAsLineSource outFilePath)
|
||||
|
||||
logLossToLikehood logLoss = exp (-logLoss)
|
||||
|
||||
gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric
|
||||
-> LineSource (ResourceT m)
|
||||
-> LineSource (ResourceT m)
|
||||
@ -317,6 +332,14 @@ gevalCoreOnSources RMSE inputLineSource expectedLineSource outLineSource = do
|
||||
mse <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource
|
||||
return $ mse ** 0.5
|
||||
|
||||
gevalCoreOnSources Likelihood inputLineSource expectedLineSource outLineSource = do
|
||||
logLoss <- gevalCoreOnSources LogLoss inputLineSource expectedLineSource outLineSource
|
||||
return $ logLossToLikehood logLoss
|
||||
|
||||
gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLineSource = do
|
||||
logLoss <- gevalCoreOnSources (LogLossHashed b) inputLineSource expectedLineSource outLineSource
|
||||
return $ logLossToLikehood logLoss
|
||||
|
||||
gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
|
||||
gevalCore' metric inputLineSource expectedLineSource outLineSource
|
||||
|
||||
|
@ -101,6 +101,8 @@ Cluster proverbs for languages.
|
||||
This is a sample challenge for flat clustering (unsupervised learning challenge).
|
||||
|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents (LikelihoodHashed b) testname = readmeMDContents (LogLossHashed b) testname
|
||||
|
||||
readmeMDContents (LogLossHashed _) testName = [i|
|
||||
GEval sample challenge — language model evaluation
|
||||
==================================================
|
||||
@ -203,6 +205,16 @@ This a sample challenge for the log-loss metric.
|
||||
|
||||
|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents Likelihood testName = [i|
|
||||
Give the probability of a positive sentiment
|
||||
============================================
|
||||
|
||||
Give the probability that a sentence expresses a positive sentiment.
|
||||
|
||||
This a sample challenge for the likelihood metric.
|
||||
|
||||
|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents BIOF1 testName = [i|
|
||||
Tag and normalize names
|
||||
=======================
|
||||
@ -284,6 +296,7 @@ trainContents NMI = [hereLit|pl Kto pod kim dołki kopie, ten sam w nie wpada.
|
||||
en The pen is mightier than the sword.
|
||||
pl Baba z wozu, koniom lżej.
|
||||
|]
|
||||
trainContents (LikelihoodHashed b) = trainContents (LogLossHashed b)
|
||||
trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota
|
||||
Basia ma psa
|
||||
Nie kupujemy kota w worku
|
||||
@ -299,6 +312,7 @@ honour GB honor
|
||||
titbit GB smakołyk
|
||||
tidbit US smakołyk
|
||||
|]
|
||||
trainContents Likelihood = trainContents LogLoss
|
||||
trainContents LogLoss = [hereLit|0.0 Hell, no!!!
|
||||
0.0 I hate this stuff
|
||||
1.0 Lekker!!!
|
||||
@ -328,6 +342,7 @@ When the going gets tough, the tough get going.
|
||||
devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0
|
||||
b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000
|
||||
|]
|
||||
devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b)
|
||||
devInContents (LogLossHashed _) = [hereLit|Nie kupuj w worku
|
||||
Ona psa
|
||||
|]
|
||||
@ -339,6 +354,7 @@ devInContents MAP = [hereLit|US noc
|
||||
GB wózek dziecięcy
|
||||
GB wizualizować
|
||||
|]
|
||||
devInContents Likelihood = devInContents LogLoss
|
||||
devInContents LogLoss = [hereLit|Great stuff!
|
||||
Boring stuff
|
||||
That's good
|
||||
@ -364,6 +380,7 @@ devExpectedContents NMI = [hereLit|en
|
||||
pl
|
||||
en
|
||||
|]
|
||||
devExpectedContents (LikelihoodHashed b) = devExpectedContents (LogLossHashed b)
|
||||
devExpectedContents (LogLossHashed _) = [hereLit|kota
|
||||
ma
|
||||
|]
|
||||
@ -375,6 +392,7 @@ devExpectedContents MAP = [hereLit|night nite
|
||||
pram
|
||||
visualise
|
||||
|]
|
||||
devExpectedContents Likelihood = devExpectedContents LogLoss
|
||||
devExpectedContents LogLoss = [hereLit|1.0
|
||||
0.0
|
||||
1.0
|
||||
@ -402,6 +420,7 @@ W marcu, jak w garncu.
|
||||
A cada necio agrada su porrada.
|
||||
Kwiecień plecień, bo przeplata trochę zimy, trochę lata.
|
||||
|]
|
||||
testInContents (LikelihoodHashed b) = testInContents (LogLossHashed b)
|
||||
testInContents (LogLossHashed _) = [hereLit|Ala ma
|
||||
Ona ma kota worku
|
||||
|]
|
||||
@ -413,6 +432,7 @@ testInContents MAP = [hereLit|US wózek dziecięcy
|
||||
GB słoń
|
||||
US słoń
|
||||
|]
|
||||
testInContents Likelihood = testInContents LogLoss
|
||||
testInContents LogLoss = [hereLit|That's great, ha, ha, I love it!
|
||||
Super-duper!!
|
||||
That is incredibly boring.
|
||||
@ -440,6 +460,7 @@ pl
|
||||
es
|
||||
pl
|
||||
|]
|
||||
testExpectedContents (LikelihoodHashed b) = testExpectedContents (LogLossHashed b)
|
||||
testExpectedContents (LogLossHashed _) = [hereLit|ma
|
||||
w
|
||||
|]
|
||||
@ -451,6 +472,7 @@ testExpectedContents MAP = [hereLit|trolley
|
||||
elephant
|
||||
elephant
|
||||
|]
|
||||
testExpectedContents Likelihood = testExpectedContents LogLoss
|
||||
testExpectedContents LogLoss = [hereLit|1.0
|
||||
1.0
|
||||
0.0
|
||||
|
@ -100,7 +100,7 @@ metricReader = option auto
|
||||
<> value defaultMetric
|
||||
<> showDefault
|
||||
<> metavar "METRIC"
|
||||
<> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, BIO-F1 or CharMatch" )
|
||||
<> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1 or CharMatch" )
|
||||
|
||||
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
|
||||
runGEval args = do
|
||||
|
@ -102,6 +102,9 @@ main = hspec $ do
|
||||
runGEvalTest "log-loss-hashed-probs-normalized" `shouldReturnAlmost` 1.55537749098853
|
||||
it "with log probs whose probs are summing up to less than 1.0" $ do
|
||||
runGEvalTest "log-loss-hashed-normalization" `shouldReturnAlmost` 5.16395069238851
|
||||
describe "LikelihoodHashed challenge" $ do
|
||||
it "example with unnormalized values" $ do
|
||||
runGEvalTest "likelihood-hashed-not-normalized" `shouldReturnAlmost` 0.351043364110715
|
||||
|
||||
describe "reading options" $ do
|
||||
it "can get the metric" $ do
|
||||
@ -190,6 +193,9 @@ main = hspec $ do
|
||||
runGEvalTest "logloss-simple" `shouldReturnAlmost` 0.31824
|
||||
it "perfect" $ do
|
||||
runGEvalTest "logloss-perfect" `shouldReturnAlmost` 0.0
|
||||
describe "Likelihood" $ do
|
||||
it "simple" $ do
|
||||
runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866
|
||||
describe "evaluating single lines" $ do
|
||||
it "RMSE" $ do
|
||||
gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla")
|
||||
|
@ -0,0 +1,2 @@
|
||||
tak:10 nie:8.9
|
||||
niebieski:0 żółty:1.5 czerwony:-0.5
|
|
@ -0,0 +1 @@
|
||||
--metric LikelihoodHashed8
|
@ -0,0 +1,2 @@
|
||||
tak
|
||||
niebieski
|
|
@ -0,0 +1,4 @@
|
||||
0.7
|
||||
0
|
||||
0.0
|
||||
0.6
|
|
1
test/likelihood-simple/likelihood-simple/config.txt
Normal file
1
test/likelihood-simple/likelihood-simple/config.txt
Normal file
@ -0,0 +1 @@
|
||||
--metric Likelihood
|
@ -0,0 +1,4 @@
|
||||
1
|
||||
0
|
||||
0
|
||||
0
|
|
Loading…
Reference in New Issue
Block a user