diff --git a/geval.cabal b/geval.cabal index 7cea59f..c5950cb 100644 --- a/geval.cabal +++ b/geval.cabal @@ -34,6 +34,7 @@ library , Data.Conduit.SmartSource , Data.Conduit.Rank , GEval.FeatureExtractor + , GEval.WER , Text.Tokenizer , Paths_geval build-depends: base >= 4.7 && < 5 @@ -74,6 +75,7 @@ library , process , uri-encode , MissingH + , array default-language: Haskell2010 executable geval diff --git a/src/GEval/BLEU.hs b/src/GEval/BLEU.hs index 3bb0e42..3d7650f 100644 --- a/src/GEval/BLEU.hs +++ b/src/GEval/BLEU.hs @@ -1,5 +1,3 @@ -{-# LANGUAGE BangPatterns #-} - module GEval.BLEU (precisionCount, bleuStep, gleuStep) where diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index b5fa607..8dab4b7 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -80,6 +80,7 @@ import GEval.LogLossHashed import GEval.CharMatch import GEval.BIO import GEval.ProbList +import GEval.WER import Data.Conduit.AutoDecompress import Text.Tokenizer @@ -97,7 +98,7 @@ defaultLogLossHashedSize :: Word32 defaultLogLossHashedSize = 10 -- | evaluation metric -data Metric = RMSE | MSE | BLEU | GLEU | Accuracy | ClippEU | FMeasure Double | NMI +data Metric = RMSE | MSE | BLEU | GLEU | WER | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood | BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double | MultiLabelLogLoss | MultiLabelLikelihood @@ -108,6 +109,7 @@ instance Show Metric where show MSE = "MSE" show BLEU = "BLEU" show GLEU = "GLEU" + show WER = "WER" show Accuracy = "Accuracy" show ClippEU = "ClippEU" show (FMeasure beta) = "F" ++ (show beta) @@ -140,6 +142,7 @@ instance Read Metric where readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)] readsPrec _ ('B':'L':'E':'U':theRest) = [(BLEU, theRest)] readsPrec _ ('G':'L':'E':'U':theRest) = [(GLEU, theRest)] + readsPrec _ ('W':'E':'R':theRest) = [(WER, theRest)] readsPrec _ ('A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(Accuracy, theRest)] readsPrec _ ('C':'l':'i':'p':'p':'E':'U':theRest) = [(ClippEU, theRest)] readsPrec _ ('N':'M':'I':theRest) = [(NMI, theRest)] @@ -175,6 +178,7 @@ getMetricOrdering RMSE = TheLowerTheBetter getMetricOrdering MSE = TheLowerTheBetter getMetricOrdering BLEU = TheHigherTheBetter getMetricOrdering GLEU = TheHigherTheBetter +getMetricOrdering WER = TheLowerTheBetter getMetricOrdering Accuracy = TheHigherTheBetter getMetricOrdering ClippEU = TheHigherTheBetter getMetricOrdering (FMeasure _) = TheHigherTheBetter @@ -525,6 +529,8 @@ gevalCore' GLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . D gleuAgg = CC.foldl gleuFuse (0, 0) gleuFuse (a1, a2) (b1, b2) = (a1+b1, a2+b2) +gevalCore' WER _ = gevalCoreWithoutInput (Right . Prelude.words . unpack) (Right . Prelude.words . unpack) (uncurry werStep) averageC id + gevalCore' Accuracy _ = gevalCoreWithoutInput (Right . strip) (Right . strip) hitOrMiss averageC id where hitOrMiss (exp, got) = -- first try to parse what we got as a probability distribution diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index 9f563eb..b5d12ff 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -169,7 +169,7 @@ metricReader = many $ option auto -- actually `some` should be used inst ( long "metric" -- --metric might be in the config.txt file... <> short 'm' <> metavar "METRIC" - <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels or CharMatch" ) + <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels or CharMatch" ) altMetricReader :: Parser (Maybe Metric) altMetricReader = optional $ option auto diff --git a/src/GEval/WER.hs b/src/GEval/WER.hs new file mode 100644 index 0000000..0767592 --- /dev/null +++ b/src/GEval/WER.hs @@ -0,0 +1,26 @@ +module GEval.WER + (werStep) + where + +import Data.Array +import GEval.Common + +werStep :: Eq a => [a] -> [a] -> Double +werStep expected got = (fromIntegral $ distance expected got) `safeDoubleDiv` (fromIntegral $ length expected) + +-- see https://stackoverflow.com/questions/6718787/levenshtein-distance-cost +distance u v = memo ! (m, n) + where memo = listArray ((0, 0), (m, n)) [dist i j | i <- [0..m], j <- [0..n]] + + dist 0 j = j + dist i 0 = i + dist i j = minimum [ + 1 + memo ! (i, j-1), + 1 + memo ! (i-1, j), + fromEnum (u' ! (i-1) /= v' ! (j-1)) + memo ! (i-1, j-1) ] + + u' = listArray (0, m-1) u + v' = listArray (0, n-1) v + + m = length u + n = length v diff --git a/test/Spec.hs b/test/Spec.hs index c0e0414..6dc5721 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -88,6 +88,9 @@ main = hspec $ do runGEvalTest "gleu-empty" `shouldReturnAlmost` 0.0 it "perfect translation" $ runGEvalTest "gleu-perfect" `shouldReturnAlmost` 1.0 + describe "WER" $ do + it "simple example" $ + runGEvalTest "wer-simple" `shouldReturnAlmost` 0.5555555555 describe "Accuracy" $ do it "simple example" $ runGEvalTest "accuracy-simple" `shouldReturnAlmost` 0.6 diff --git a/test/wer-simple/wer-simple-solution/test-A/out.tsv b/test/wer-simple/wer-simple-solution/test-A/out.tsv new file mode 100644 index 0000000..d961166 --- /dev/null +++ b/test/wer-simple/wer-simple-solution/test-A/out.tsv @@ -0,0 +1,3 @@ +It was a very very tall tree +No +A perfect match . diff --git a/test/wer-simple/wer-simple/config.txt b/test/wer-simple/wer-simple/config.txt new file mode 100644 index 0000000..7b39834 --- /dev/null +++ b/test/wer-simple/wer-simple/config.txt @@ -0,0 +1 @@ +--metric WER diff --git a/test/wer-simple/wer-simple/test-A/expected.tsv b/test/wer-simple/wer-simple/test-A/expected.tsv new file mode 100644 index 0000000..edd035d --- /dev/null +++ b/test/wer-simple/wer-simple/test-A/expected.tsv @@ -0,0 +1,3 @@ +It is a tall tree . +Yes +A perfect match .