diff --git a/geval.cabal b/geval.cabal index 0bf6348..52bd563 100644 --- a/geval.cabal +++ b/geval.cabal @@ -1,5 +1,5 @@ name: geval -version: 0.5.2.0 +version: 0.5.3.0 synopsis: Machine learning evaluation tools description: Please see README.md homepage: http://github.com/name/project diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index ba8d734..219db1e 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -79,7 +79,7 @@ defaultLogLossHashedSize :: Word32 defaultLogLossHashedSize = 10 data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch - | MAP + | MAP | LogLoss deriving (Eq) instance Show Metric where @@ -98,6 +98,7 @@ instance Show Metric where (show nbOfBits)) show CharMatch = "CharMatch" show MAP = "MAP" + show LogLoss = "LogLoss" instance Read Metric where readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)] @@ -112,6 +113,7 @@ instance Read Metric where readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of [(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)] _ -> [(LogLossHashed defaultLogLossHashedSize, theRest)] + readsPrec _ ('L':'o':'g':'L':'o':'s':'s':theRest) = [(LogLoss, theRest)] readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)] readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)] @@ -128,6 +130,7 @@ getMetricOrdering NMI = TheHigherTheBetter getMetricOrdering (LogLossHashed _) = TheLowerTheBetter getMetricOrdering CharMatch = TheHigherTheBetter getMetricOrdering MAP = TheHigherTheBetter +getMetricOrdering LogLoss = TheLowerTheBetter defaultOutDirectory = "." defaultTestName = "test-A" @@ -295,6 +298,9 @@ gevalCore' :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric -> Line gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemError averageC id where outParser = getValue . TR.double +gevalCore' LogLoss _ = gevalCoreWithoutInput outParser outParser itemLogLossError averageC id + where outParser = getValue . TR.double + gevalCore' BLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . DLS.splitOn "\t" . unpack) (Right . Prelude.words . unpack) bleuCombine bleuAgg bleuFinal where bleuFinal (p1, p2, p3, p4, rl, l1, l2, l3, l4) = ((p1 /. l1) * (p2 /. l2) * (p3 /. l3) * (p4 /. l4)) ** 0.25 * (brevityPenalty l1 rl) bleuCombine (refs, sen) = bleuStep refs sen @@ -490,6 +496,17 @@ items (LineSource lineSource _ _) parser = itemError :: (Double, Double) -> Double itemError (exp, out) = (exp-out)**2 +itemLogLossError :: (Double, Double) -> Double +itemLogLossError (exp, out) + | exp' > 0.5 = - (log out') + | otherwise = - (log (1 - out')) + where exp' = normalizeAsProb exp + out' = normalizeAsProb out + normalizeAsProb v + | v >= 1.0 = 1.0 + | v <= 0.0 = 0.0 + | otherwise = v + getValue :: Num a => Either String (a, Text) -> Either String a getValue (Right (x, reminder)) = if Data.Text.null reminder || Data.Text.head reminder == '\t' diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index 411f272..b0e53a5 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -160,6 +160,15 @@ See Christopher D. Manning, Prabhakar Raghavan and Hinrich Schütze, more discussion of the metric. |] ++ (commonReadmeMDContents testName) +readmeMDContents LogLoss testName = [i| +Give the probability of a positive sentiment +============================================ + +Give the probability that a sentence expresses a positive sentiment. + +This a sample challenge for the log-loss metric. + +|] ++ (commonReadmeMDContents testName) readmeMDContents _ testName = [i| GEval sample challenge @@ -245,6 +254,11 @@ honour GB honor titbit GB smakołyk tidbit US smakołyk |] +trainContents LogLoss = [hereLit|0.0 Hell, no!!! +0.0 I hate this stuff +1.0 Lekker!!! +0.0 Boring, boring, boring +|] trainContents _ = [hereLit|0.06 0.39 0 0.206 1.00 1.00 1 0.017 317.8 5.20 67 0.048 @@ -276,6 +290,10 @@ devInContents MAP = [hereLit|US noc GB wózek dziecięcy GB wizualizować |] +devInContents LogLoss = [hereLit|Great stuff! +Boring stuff +That's good +|] devInContents _ = [hereLit|0.72 0 0.007 9.54 62 0.054 |] @@ -305,6 +323,10 @@ devExpectedContents MAP = [hereLit|night nite pram visualise |] +devExpectedContents LogLoss = [hereLit|1.0 +0.0 +1.0 +|] devExpectedContents _ = [hereLit|0.82 95.2 |] @@ -336,6 +358,10 @@ testInContents MAP = [hereLit|US wózek dziecięcy GB słoń US słoń |] +testInContents LogLoss = [hereLit|That's great, ha, ha, I love it! +Super-duper!! +That is incredibly boring. +|] testInContents _ = [hereLit|1.52 2 0.093 30.06 14 0.009 |] @@ -367,6 +393,10 @@ testExpectedContents MAP = [hereLit|trolley elephant elephant |] +testExpectedContents LogLoss = [hereLit|1.0 +1.0 +0.0 +|] testExpectedContents _ = [hereLit|0.11 17.2 |] diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index 617d718..d5fe2f8 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -100,7 +100,7 @@ metricReader = option auto <> value defaultMetric <> showDefault <> metavar "METRIC" - <> help "Metric to be used - RMSE, MSE, Accuracy, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed or CharMatch" ) + <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed or CharMatch" ) runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue)) runGEval args = do diff --git a/test/Spec.hs b/test/Spec.hs index e522718..c190b4d 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -175,6 +175,11 @@ main = hspec $ do ["one", "one"]) `shouldBeAlmost` 0.5 it "simple test" $ do runGEvalTest "map-simple" `shouldReturnAlmost` 0.444444444 + describe "LogLoss" $ do + it "simple" $ do + runGEvalTest "logloss-simple" `shouldReturnAlmost` 0.31824 + it "perfect" $ do + runGEvalTest "logloss-perfect" `shouldReturnAlmost` 0.0 describe "evaluating single lines" $ do it "RMSE" $ do gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla") diff --git a/test/logloss-perfect/logloss-perfect-solution/test-A/out.tsv b/test/logloss-perfect/logloss-perfect-solution/test-A/out.tsv new file mode 100644 index 0000000..56c3026 --- /dev/null +++ b/test/logloss-perfect/logloss-perfect-solution/test-A/out.tsv @@ -0,0 +1,5 @@ +1.0 +0 +0.0 +1 +0 diff --git a/test/logloss-perfect/logloss-perfect/config.txt b/test/logloss-perfect/logloss-perfect/config.txt new file mode 100644 index 0000000..84fee6c --- /dev/null +++ b/test/logloss-perfect/logloss-perfect/config.txt @@ -0,0 +1 @@ +--metric LogLoss diff --git a/test/logloss-perfect/logloss-perfect/test-A/expected.tsv b/test/logloss-perfect/logloss-perfect/test-A/expected.tsv new file mode 100644 index 0000000..d69e70b --- /dev/null +++ b/test/logloss-perfect/logloss-perfect/test-A/expected.tsv @@ -0,0 +1,5 @@ +1 +0 +0 +1 +0 diff --git a/test/logloss-simple/logloss-simple-solution/test-A/out.tsv b/test/logloss-simple/logloss-simple-solution/test-A/out.tsv new file mode 100644 index 0000000..6978be2 --- /dev/null +++ b/test/logloss-simple/logloss-simple-solution/test-A/out.tsv @@ -0,0 +1,4 @@ +0.7 +0 +0.0 +0.6 diff --git a/test/logloss-simple/logloss-simple/config.txt b/test/logloss-simple/logloss-simple/config.txt new file mode 100644 index 0000000..84fee6c --- /dev/null +++ b/test/logloss-simple/logloss-simple/config.txt @@ -0,0 +1 @@ +--metric LogLoss diff --git a/test/logloss-simple/logloss-simple/test-A/expected.tsv b/test/logloss-simple/logloss-simple/test-A/expected.tsv new file mode 100644 index 0000000..968ac3e --- /dev/null +++ b/test/logloss-simple/logloss-simple/test-A/expected.tsv @@ -0,0 +1,4 @@ +1 +0 +0 +0