diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index c5a7194..bed64b7 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -83,7 +83,7 @@ defaultLogLossHashedSize :: Word32 defaultLogLossHashedSize = 10 data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch - | MAP | LogLoss | BIOF1 + | MAP | LogLoss | Likelihood | BIOF1 | LikelihoodHashed Word32 deriving (Eq) instance Show Metric where @@ -100,9 +100,16 @@ instance Show Metric where "" else (show nbOfBits)) + show (LikelihoodHashed nbOfBits) = "LikelihoodHashed" ++ (if + nbOfBits == defaultLogLossHashedSize + then + "" + else + (show nbOfBits)) show CharMatch = "CharMatch" show MAP = "MAP" show LogLoss = "LogLoss" + show Likelihood = "Likelihood" show BIOF1 = "BIO-F1" instance Read Metric where @@ -118,7 +125,11 @@ instance Read Metric where readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of [(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)] _ -> [(LogLossHashed defaultLogLossHashedSize, theRest)] + readsPrec p ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of + [(nbOfBits, theRest)] -> [(LikelihoodHashed nbOfBits, theRest)] + _ -> [(LikelihoodHashed defaultLogLossHashedSize, theRest)] readsPrec _ ('L':'o':'g':'L':'o':'s':'s':theRest) = [(LogLoss, theRest)] + readsPrec _ ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(Likelihood, theRest)] readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)] readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)] readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)] @@ -134,9 +145,11 @@ getMetricOrdering ClippEU = TheHigherTheBetter getMetricOrdering (FMeasure _) = TheHigherTheBetter getMetricOrdering NMI = TheHigherTheBetter getMetricOrdering (LogLossHashed _) = TheLowerTheBetter +getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter getMetricOrdering CharMatch = TheHigherTheBetter getMetricOrdering MAP = TheHigherTheBetter getMetricOrdering LogLoss = TheLowerTheBetter +getMetricOrdering Likelihood = TheHigherTheBetter getMetricOrdering BIOF1 = TheHigherTheBetter defaultOutDirectory = "." @@ -308,6 +321,8 @@ gevalCore metric inputFilePath expectedFilePath outFilePath = do (fileAsLineSource expectedFilePath) (fileAsLineSource outFilePath) +logLossToLikehood logLoss = exp (-logLoss) + gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) @@ -317,6 +332,14 @@ gevalCoreOnSources RMSE inputLineSource expectedLineSource outLineSource = do mse <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource return $ mse ** 0.5 +gevalCoreOnSources Likelihood inputLineSource expectedLineSource outLineSource = do + logLoss <- gevalCoreOnSources LogLoss inputLineSource expectedLineSource outLineSource + return $ logLossToLikehood logLoss + +gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLineSource = do + logLoss <- gevalCoreOnSources (LogLossHashed b) inputLineSource expectedLineSource outLineSource + return $ logLossToLikehood logLoss + gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do gevalCore' metric inputLineSource expectedLineSource outLineSource diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index 3e00993..0d141d5 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -101,6 +101,8 @@ Cluster proverbs for languages. This is a sample challenge for flat clustering (unsupervised learning challenge). |] ++ (commonReadmeMDContents testName) +readmeMDContents (LikelihoodHashed b) testname = readmeMDContents (LogLossHashed b) testname + readmeMDContents (LogLossHashed _) testName = [i| GEval sample challenge — language model evaluation ================================================== @@ -203,6 +205,16 @@ This a sample challenge for the log-loss metric. |] ++ (commonReadmeMDContents testName) +readmeMDContents Likelihood testName = [i| +Give the probability of a positive sentiment +============================================ + +Give the probability that a sentence expresses a positive sentiment. + +This a sample challenge for the likelihood metric. + +|] ++ (commonReadmeMDContents testName) + readmeMDContents BIOF1 testName = [i| Tag and normalize names ======================= @@ -284,6 +296,7 @@ trainContents NMI = [hereLit|pl Kto pod kim dołki kopie, ten sam w nie wpada. en The pen is mightier than the sword. pl Baba z wozu, koniom lżej. |] +trainContents (LikelihoodHashed b) = trainContents (LogLossHashed b) trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota Basia ma psa Nie kupujemy kota w worku @@ -299,6 +312,7 @@ honour GB honor titbit GB smakołyk tidbit US smakołyk |] +trainContents Likelihood = trainContents LogLoss trainContents LogLoss = [hereLit|0.0 Hell, no!!! 0.0 I hate this stuff 1.0 Lekker!!! @@ -328,6 +342,7 @@ When the going gets tough, the tough get going. devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0 b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000 |] +devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b) devInContents (LogLossHashed _) = [hereLit|Nie kupuj w worku Ona psa |] @@ -339,6 +354,7 @@ devInContents MAP = [hereLit|US noc GB wózek dziecięcy GB wizualizować |] +devInContents Likelihood = devInContents LogLoss devInContents LogLoss = [hereLit|Great stuff! Boring stuff That's good @@ -364,6 +380,7 @@ devExpectedContents NMI = [hereLit|en pl en |] +devExpectedContents (LikelihoodHashed b) = devExpectedContents (LogLossHashed b) devExpectedContents (LogLossHashed _) = [hereLit|kota ma |] @@ -375,6 +392,7 @@ devExpectedContents MAP = [hereLit|night nite pram visualise |] +devExpectedContents Likelihood = devExpectedContents LogLoss devExpectedContents LogLoss = [hereLit|1.0 0.0 1.0 @@ -402,6 +420,7 @@ W marcu, jak w garncu. A cada necio agrada su porrada. Kwiecień plecień, bo przeplata trochę zimy, trochę lata. |] +testInContents (LikelihoodHashed b) = testInContents (LogLossHashed b) testInContents (LogLossHashed _) = [hereLit|Ala ma Ona ma kota worku |] @@ -413,6 +432,7 @@ testInContents MAP = [hereLit|US wózek dziecięcy GB słoń US słoń |] +testInContents Likelihood = testInContents LogLoss testInContents LogLoss = [hereLit|That's great, ha, ha, I love it! Super-duper!! That is incredibly boring. @@ -440,6 +460,7 @@ pl es pl |] +testExpectedContents (LikelihoodHashed b) = testExpectedContents (LogLossHashed b) testExpectedContents (LogLossHashed _) = [hereLit|ma w |] @@ -451,6 +472,7 @@ testExpectedContents MAP = [hereLit|trolley elephant elephant |] +testExpectedContents Likelihood = testExpectedContents LogLoss testExpectedContents LogLoss = [hereLit|1.0 1.0 0.0 diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index eaabd31..d8ce4e7 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -100,7 +100,7 @@ metricReader = option auto <> value defaultMetric <> showDefault <> metavar "METRIC" - <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, BIO-F1 or CharMatch" ) + <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1 or CharMatch" ) runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue)) runGEval args = do diff --git a/test/Spec.hs b/test/Spec.hs index 5784042..68f595c 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -102,6 +102,9 @@ main = hspec $ do runGEvalTest "log-loss-hashed-probs-normalized" `shouldReturnAlmost` 1.55537749098853 it "with log probs whose probs are summing up to less than 1.0" $ do runGEvalTest "log-loss-hashed-normalization" `shouldReturnAlmost` 5.16395069238851 + describe "LikelihoodHashed challenge" $ do + it "example with unnormalized values" $ do + runGEvalTest "likelihood-hashed-not-normalized" `shouldReturnAlmost` 0.351043364110715 describe "reading options" $ do it "can get the metric" $ do @@ -190,6 +193,9 @@ main = hspec $ do runGEvalTest "logloss-simple" `shouldReturnAlmost` 0.31824 it "perfect" $ do runGEvalTest "logloss-perfect" `shouldReturnAlmost` 0.0 + describe "Likelihood" $ do + it "simple" $ do + runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866 describe "evaluating single lines" $ do it "RMSE" $ do gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla") diff --git a/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized-solution/test-A/out.tsv b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized-solution/test-A/out.tsv new file mode 100644 index 0000000..02c35e7 --- /dev/null +++ b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized-solution/test-A/out.tsv @@ -0,0 +1,2 @@ +tak:10 nie:8.9 +niebieski:0 żółty:1.5 czerwony:-0.5 diff --git a/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/config.txt b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/config.txt new file mode 100644 index 0000000..00b7f18 --- /dev/null +++ b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/config.txt @@ -0,0 +1 @@ +--metric LikelihoodHashed8 diff --git a/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/test-A/expected.tsv b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/test-A/expected.tsv new file mode 100644 index 0000000..70d9d14 --- /dev/null +++ b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/test-A/expected.tsv @@ -0,0 +1,2 @@ +tak +niebieski diff --git a/test/likelihood-simple/likelihood-simple-solution/test-A/out.tsv b/test/likelihood-simple/likelihood-simple-solution/test-A/out.tsv new file mode 100644 index 0000000..6978be2 --- /dev/null +++ b/test/likelihood-simple/likelihood-simple-solution/test-A/out.tsv @@ -0,0 +1,4 @@ +0.7 +0 +0.0 +0.6 diff --git a/test/likelihood-simple/likelihood-simple/config.txt b/test/likelihood-simple/likelihood-simple/config.txt new file mode 100644 index 0000000..34e7898 --- /dev/null +++ b/test/likelihood-simple/likelihood-simple/config.txt @@ -0,0 +1 @@ +--metric Likelihood diff --git a/test/likelihood-simple/likelihood-simple/test-A/expected.tsv b/test/likelihood-simple/likelihood-simple/test-A/expected.tsv new file mode 100644 index 0000000..968ac3e --- /dev/null +++ b/test/likelihood-simple/likelihood-simple/test-A/expected.tsv @@ -0,0 +1,4 @@ +1 +0 +0 +0