Merge branch 'master' of ssh://gonito.net/geval
This commit is contained in:
commit
dac486c8e4
@ -83,7 +83,7 @@ defaultLogLossHashedSize :: Word32
|
|||||||
defaultLogLossHashedSize = 10
|
defaultLogLossHashedSize = 10
|
||||||
|
|
||||||
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch
|
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch
|
||||||
| MAP | LogLoss | BIOF1
|
| MAP | LogLoss | Likelihood | BIOF1 | LikelihoodHashed Word32
|
||||||
deriving (Eq)
|
deriving (Eq)
|
||||||
|
|
||||||
instance Show Metric where
|
instance Show Metric where
|
||||||
@ -100,9 +100,16 @@ instance Show Metric where
|
|||||||
""
|
""
|
||||||
else
|
else
|
||||||
(show nbOfBits))
|
(show nbOfBits))
|
||||||
|
show (LikelihoodHashed nbOfBits) = "LikelihoodHashed" ++ (if
|
||||||
|
nbOfBits == defaultLogLossHashedSize
|
||||||
|
then
|
||||||
|
""
|
||||||
|
else
|
||||||
|
(show nbOfBits))
|
||||||
show CharMatch = "CharMatch"
|
show CharMatch = "CharMatch"
|
||||||
show MAP = "MAP"
|
show MAP = "MAP"
|
||||||
show LogLoss = "LogLoss"
|
show LogLoss = "LogLoss"
|
||||||
|
show Likelihood = "Likelihood"
|
||||||
show BIOF1 = "BIO-F1"
|
show BIOF1 = "BIO-F1"
|
||||||
|
|
||||||
instance Read Metric where
|
instance Read Metric where
|
||||||
@ -118,7 +125,11 @@ instance Read Metric where
|
|||||||
readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
|
readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
|
||||||
[(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
|
[(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
|
||||||
_ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
|
_ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
|
||||||
|
readsPrec p ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
|
||||||
|
[(nbOfBits, theRest)] -> [(LikelihoodHashed nbOfBits, theRest)]
|
||||||
|
_ -> [(LikelihoodHashed defaultLogLossHashedSize, theRest)]
|
||||||
readsPrec _ ('L':'o':'g':'L':'o':'s':'s':theRest) = [(LogLoss, theRest)]
|
readsPrec _ ('L':'o':'g':'L':'o':'s':'s':theRest) = [(LogLoss, theRest)]
|
||||||
|
readsPrec _ ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(Likelihood, theRest)]
|
||||||
readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
|
readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
|
||||||
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
|
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
|
||||||
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
|
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
|
||||||
@ -134,9 +145,11 @@ getMetricOrdering ClippEU = TheHigherTheBetter
|
|||||||
getMetricOrdering (FMeasure _) = TheHigherTheBetter
|
getMetricOrdering (FMeasure _) = TheHigherTheBetter
|
||||||
getMetricOrdering NMI = TheHigherTheBetter
|
getMetricOrdering NMI = TheHigherTheBetter
|
||||||
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
|
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
|
||||||
|
getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
|
||||||
getMetricOrdering CharMatch = TheHigherTheBetter
|
getMetricOrdering CharMatch = TheHigherTheBetter
|
||||||
getMetricOrdering MAP = TheHigherTheBetter
|
getMetricOrdering MAP = TheHigherTheBetter
|
||||||
getMetricOrdering LogLoss = TheLowerTheBetter
|
getMetricOrdering LogLoss = TheLowerTheBetter
|
||||||
|
getMetricOrdering Likelihood = TheHigherTheBetter
|
||||||
getMetricOrdering BIOF1 = TheHigherTheBetter
|
getMetricOrdering BIOF1 = TheHigherTheBetter
|
||||||
|
|
||||||
defaultOutDirectory = "."
|
defaultOutDirectory = "."
|
||||||
@ -308,6 +321,8 @@ gevalCore metric inputFilePath expectedFilePath outFilePath = do
|
|||||||
(fileAsLineSource expectedFilePath)
|
(fileAsLineSource expectedFilePath)
|
||||||
(fileAsLineSource outFilePath)
|
(fileAsLineSource outFilePath)
|
||||||
|
|
||||||
|
logLossToLikehood logLoss = exp (-logLoss)
|
||||||
|
|
||||||
gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric
|
gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric
|
||||||
-> LineSource (ResourceT m)
|
-> LineSource (ResourceT m)
|
||||||
-> LineSource (ResourceT m)
|
-> LineSource (ResourceT m)
|
||||||
@ -317,6 +332,14 @@ gevalCoreOnSources RMSE inputLineSource expectedLineSource outLineSource = do
|
|||||||
mse <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource
|
mse <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource
|
||||||
return $ mse ** 0.5
|
return $ mse ** 0.5
|
||||||
|
|
||||||
|
gevalCoreOnSources Likelihood inputLineSource expectedLineSource outLineSource = do
|
||||||
|
logLoss <- gevalCoreOnSources LogLoss inputLineSource expectedLineSource outLineSource
|
||||||
|
return $ logLossToLikehood logLoss
|
||||||
|
|
||||||
|
gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLineSource = do
|
||||||
|
logLoss <- gevalCoreOnSources (LogLossHashed b) inputLineSource expectedLineSource outLineSource
|
||||||
|
return $ logLossToLikehood logLoss
|
||||||
|
|
||||||
gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
|
gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
|
||||||
gevalCore' metric inputLineSource expectedLineSource outLineSource
|
gevalCore' metric inputLineSource expectedLineSource outLineSource
|
||||||
|
|
||||||
|
@ -101,6 +101,8 @@ Cluster proverbs for languages.
|
|||||||
This is a sample challenge for flat clustering (unsupervised learning challenge).
|
This is a sample challenge for flat clustering (unsupervised learning challenge).
|
||||||
|] ++ (commonReadmeMDContents testName)
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
|
readmeMDContents (LikelihoodHashed b) testname = readmeMDContents (LogLossHashed b) testname
|
||||||
|
|
||||||
readmeMDContents (LogLossHashed _) testName = [i|
|
readmeMDContents (LogLossHashed _) testName = [i|
|
||||||
GEval sample challenge — language model evaluation
|
GEval sample challenge — language model evaluation
|
||||||
==================================================
|
==================================================
|
||||||
@ -203,6 +205,16 @@ This a sample challenge for the log-loss metric.
|
|||||||
|
|
||||||
|] ++ (commonReadmeMDContents testName)
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
|
readmeMDContents Likelihood testName = [i|
|
||||||
|
Give the probability of a positive sentiment
|
||||||
|
============================================
|
||||||
|
|
||||||
|
Give the probability that a sentence expresses a positive sentiment.
|
||||||
|
|
||||||
|
This a sample challenge for the likelihood metric.
|
||||||
|
|
||||||
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
readmeMDContents BIOF1 testName = [i|
|
readmeMDContents BIOF1 testName = [i|
|
||||||
Tag and normalize names
|
Tag and normalize names
|
||||||
=======================
|
=======================
|
||||||
@ -284,6 +296,7 @@ trainContents NMI = [hereLit|pl Kto pod kim dołki kopie, ten sam w nie wpada.
|
|||||||
en The pen is mightier than the sword.
|
en The pen is mightier than the sword.
|
||||||
pl Baba z wozu, koniom lżej.
|
pl Baba z wozu, koniom lżej.
|
||||||
|]
|
|]
|
||||||
|
trainContents (LikelihoodHashed b) = trainContents (LogLossHashed b)
|
||||||
trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota
|
trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota
|
||||||
Basia ma psa
|
Basia ma psa
|
||||||
Nie kupujemy kota w worku
|
Nie kupujemy kota w worku
|
||||||
@ -299,6 +312,7 @@ honour GB honor
|
|||||||
titbit GB smakołyk
|
titbit GB smakołyk
|
||||||
tidbit US smakołyk
|
tidbit US smakołyk
|
||||||
|]
|
|]
|
||||||
|
trainContents Likelihood = trainContents LogLoss
|
||||||
trainContents LogLoss = [hereLit|0.0 Hell, no!!!
|
trainContents LogLoss = [hereLit|0.0 Hell, no!!!
|
||||||
0.0 I hate this stuff
|
0.0 I hate this stuff
|
||||||
1.0 Lekker!!!
|
1.0 Lekker!!!
|
||||||
@ -328,6 +342,7 @@ When the going gets tough, the tough get going.
|
|||||||
devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0
|
devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0
|
||||||
b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000
|
b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000
|
||||||
|]
|
|]
|
||||||
|
devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b)
|
||||||
devInContents (LogLossHashed _) = [hereLit|Nie kupuj w worku
|
devInContents (LogLossHashed _) = [hereLit|Nie kupuj w worku
|
||||||
Ona psa
|
Ona psa
|
||||||
|]
|
|]
|
||||||
@ -339,6 +354,7 @@ devInContents MAP = [hereLit|US noc
|
|||||||
GB wózek dziecięcy
|
GB wózek dziecięcy
|
||||||
GB wizualizować
|
GB wizualizować
|
||||||
|]
|
|]
|
||||||
|
devInContents Likelihood = devInContents LogLoss
|
||||||
devInContents LogLoss = [hereLit|Great stuff!
|
devInContents LogLoss = [hereLit|Great stuff!
|
||||||
Boring stuff
|
Boring stuff
|
||||||
That's good
|
That's good
|
||||||
@ -364,6 +380,7 @@ devExpectedContents NMI = [hereLit|en
|
|||||||
pl
|
pl
|
||||||
en
|
en
|
||||||
|]
|
|]
|
||||||
|
devExpectedContents (LikelihoodHashed b) = devExpectedContents (LogLossHashed b)
|
||||||
devExpectedContents (LogLossHashed _) = [hereLit|kota
|
devExpectedContents (LogLossHashed _) = [hereLit|kota
|
||||||
ma
|
ma
|
||||||
|]
|
|]
|
||||||
@ -375,6 +392,7 @@ devExpectedContents MAP = [hereLit|night nite
|
|||||||
pram
|
pram
|
||||||
visualise
|
visualise
|
||||||
|]
|
|]
|
||||||
|
devExpectedContents Likelihood = devExpectedContents LogLoss
|
||||||
devExpectedContents LogLoss = [hereLit|1.0
|
devExpectedContents LogLoss = [hereLit|1.0
|
||||||
0.0
|
0.0
|
||||||
1.0
|
1.0
|
||||||
@ -402,6 +420,7 @@ W marcu, jak w garncu.
|
|||||||
A cada necio agrada su porrada.
|
A cada necio agrada su porrada.
|
||||||
Kwiecień plecień, bo przeplata trochę zimy, trochę lata.
|
Kwiecień plecień, bo przeplata trochę zimy, trochę lata.
|
||||||
|]
|
|]
|
||||||
|
testInContents (LikelihoodHashed b) = testInContents (LogLossHashed b)
|
||||||
testInContents (LogLossHashed _) = [hereLit|Ala ma
|
testInContents (LogLossHashed _) = [hereLit|Ala ma
|
||||||
Ona ma kota worku
|
Ona ma kota worku
|
||||||
|]
|
|]
|
||||||
@ -413,6 +432,7 @@ testInContents MAP = [hereLit|US wózek dziecięcy
|
|||||||
GB słoń
|
GB słoń
|
||||||
US słoń
|
US słoń
|
||||||
|]
|
|]
|
||||||
|
testInContents Likelihood = testInContents LogLoss
|
||||||
testInContents LogLoss = [hereLit|That's great, ha, ha, I love it!
|
testInContents LogLoss = [hereLit|That's great, ha, ha, I love it!
|
||||||
Super-duper!!
|
Super-duper!!
|
||||||
That is incredibly boring.
|
That is incredibly boring.
|
||||||
@ -440,6 +460,7 @@ pl
|
|||||||
es
|
es
|
||||||
pl
|
pl
|
||||||
|]
|
|]
|
||||||
|
testExpectedContents (LikelihoodHashed b) = testExpectedContents (LogLossHashed b)
|
||||||
testExpectedContents (LogLossHashed _) = [hereLit|ma
|
testExpectedContents (LogLossHashed _) = [hereLit|ma
|
||||||
w
|
w
|
||||||
|]
|
|]
|
||||||
@ -451,6 +472,7 @@ testExpectedContents MAP = [hereLit|trolley
|
|||||||
elephant
|
elephant
|
||||||
elephant
|
elephant
|
||||||
|]
|
|]
|
||||||
|
testExpectedContents Likelihood = testExpectedContents LogLoss
|
||||||
testExpectedContents LogLoss = [hereLit|1.0
|
testExpectedContents LogLoss = [hereLit|1.0
|
||||||
1.0
|
1.0
|
||||||
0.0
|
0.0
|
||||||
|
@ -100,7 +100,7 @@ metricReader = option auto
|
|||||||
<> value defaultMetric
|
<> value defaultMetric
|
||||||
<> showDefault
|
<> showDefault
|
||||||
<> metavar "METRIC"
|
<> metavar "METRIC"
|
||||||
<> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, BIO-F1 or CharMatch" )
|
<> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1 or CharMatch" )
|
||||||
|
|
||||||
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
|
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
|
||||||
runGEval args = do
|
runGEval args = do
|
||||||
|
@ -102,6 +102,9 @@ main = hspec $ do
|
|||||||
runGEvalTest "log-loss-hashed-probs-normalized" `shouldReturnAlmost` 1.55537749098853
|
runGEvalTest "log-loss-hashed-probs-normalized" `shouldReturnAlmost` 1.55537749098853
|
||||||
it "with log probs whose probs are summing up to less than 1.0" $ do
|
it "with log probs whose probs are summing up to less than 1.0" $ do
|
||||||
runGEvalTest "log-loss-hashed-normalization" `shouldReturnAlmost` 5.16395069238851
|
runGEvalTest "log-loss-hashed-normalization" `shouldReturnAlmost` 5.16395069238851
|
||||||
|
describe "LikelihoodHashed challenge" $ do
|
||||||
|
it "example with unnormalized values" $ do
|
||||||
|
runGEvalTest "likelihood-hashed-not-normalized" `shouldReturnAlmost` 0.351043364110715
|
||||||
|
|
||||||
describe "reading options" $ do
|
describe "reading options" $ do
|
||||||
it "can get the metric" $ do
|
it "can get the metric" $ do
|
||||||
@ -190,6 +193,9 @@ main = hspec $ do
|
|||||||
runGEvalTest "logloss-simple" `shouldReturnAlmost` 0.31824
|
runGEvalTest "logloss-simple" `shouldReturnAlmost` 0.31824
|
||||||
it "perfect" $ do
|
it "perfect" $ do
|
||||||
runGEvalTest "logloss-perfect" `shouldReturnAlmost` 0.0
|
runGEvalTest "logloss-perfect" `shouldReturnAlmost` 0.0
|
||||||
|
describe "Likelihood" $ do
|
||||||
|
it "simple" $ do
|
||||||
|
runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866
|
||||||
describe "evaluating single lines" $ do
|
describe "evaluating single lines" $ do
|
||||||
it "RMSE" $ do
|
it "RMSE" $ do
|
||||||
gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla")
|
gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla")
|
||||||
|
@ -0,0 +1,2 @@
|
|||||||
|
tak:10 nie:8.9
|
||||||
|
niebieski:0 żółty:1.5 czerwony:-0.5
|
|
@ -0,0 +1 @@
|
|||||||
|
--metric LikelihoodHashed8
|
@ -0,0 +1,2 @@
|
|||||||
|
tak
|
||||||
|
niebieski
|
|
@ -0,0 +1,4 @@
|
|||||||
|
0.7
|
||||||
|
0
|
||||||
|
0.0
|
||||||
|
0.6
|
|
1
test/likelihood-simple/likelihood-simple/config.txt
Normal file
1
test/likelihood-simple/likelihood-simple/config.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
--metric Likelihood
|
@ -0,0 +1,4 @@
|
|||||||
|
1
|
||||||
|
0
|
||||||
|
0
|
||||||
|
0
|
|
Loading…
Reference in New Issue
Block a user