Add GLEU
This commit is contained in:
parent
f147438a04
commit
5cff29cf06
@ -1,9 +1,12 @@
|
|||||||
module GEval.BLEU
|
module GEval.BLEU
|
||||||
(precisionCount, bleuStep)
|
(precisionCount, bleuStep, gleuStep)
|
||||||
where
|
where
|
||||||
|
|
||||||
|
import GEval.Common
|
||||||
|
import GEval.PrecisionRecall
|
||||||
|
|
||||||
import qualified Data.MultiSet as MS
|
import qualified Data.MultiSet as MS
|
||||||
import Data.List (minimumBy, zip, zip3, zip4)
|
import Data.List (minimumBy, maximumBy, zip, zip3, zip4)
|
||||||
|
|
||||||
bleuStep :: Ord a => [[a]] -> [a] -> (Int, Int, Int, Int, Int, Int, Int, Int, Int)
|
bleuStep :: Ord a => [[a]] -> [a] -> (Int, Int, Int, Int, Int, Int, Int, Int, Int)
|
||||||
bleuStep refs trans = (prec1, prec2, prec3, prec4, closestLen, len1, len2, len3, len4)
|
bleuStep refs trans = (prec1, prec2, prec3, prec4, closestLen, len1, len2, len3, len4)
|
||||||
@ -21,18 +24,13 @@ bleuStep refs trans = (prec1, prec2, prec3, prec4, closestLen, len1, len2, len3
|
|||||||
len2 = max 0 (len1 - 1)
|
len2 = max 0 (len1 - 1)
|
||||||
len3 = max 0 (len1 - 2)
|
len3 = max 0 (len1 - 2)
|
||||||
len4 = max 0 (len1 - 3)
|
len4 = max 0 (len1 - 3)
|
||||||
bigrams [] = []
|
|
||||||
bigrams [_] = []
|
gleuStep :: Ord a => [[a]] -> [a] -> (Int, Int)
|
||||||
bigrams u = zip u $ tail u
|
gleuStep refs trans = maximumBy (\(g1, t1) (g2, t2) -> (g1 /. t1) `compare` (g2 /. t2)) $ map getBetterCounts refs
|
||||||
trigrams [] = []
|
where getBetterCounts ref = let (matched, expected, got) = getCounts (==) (upToTetragrams ref, transNgrams)
|
||||||
trigrams [_] = []
|
total = max expected got
|
||||||
trigrams [_, _] = []
|
in (matched, total)
|
||||||
trigrams u = zip3 u (tail u) (tail $ tail u)
|
transNgrams = upToTetragrams trans
|
||||||
tetragrams [] = []
|
|
||||||
tetragrams [_] = []
|
|
||||||
tetragrams [_, _] = []
|
|
||||||
tetragrams [_, _, _] = []
|
|
||||||
tetragrams u = zip4 u (tail u) (tail $ tail u) (tail $ tail $ tail u)
|
|
||||||
|
|
||||||
precisionCount :: Ord a => [[a]] -> [a] -> Int
|
precisionCount :: Ord a => [[a]] -> [a] -> Int
|
||||||
precisionCount refs = sum . map (lookFor refs) . MS.toOccurList . MS.fromList
|
precisionCount refs = sum . map (lookFor refs) . MS.toOccurList . MS.fromList
|
||||||
@ -40,3 +38,30 @@ precisionCount refs = sum . map (lookFor refs) . MS.toOccurList . MS.fromList
|
|||||||
findE e freq m = min freq (MS.occur e m)
|
findE e freq m = min freq (MS.occur e m)
|
||||||
minimumOrZero [] = 0
|
minimumOrZero [] = 0
|
||||||
minimumOrZero l = minimum l
|
minimumOrZero l = minimum l
|
||||||
|
|
||||||
|
data Ngram a = Unigram a | Bigram (a, a) | Trigram (a, a, a) | Tetragram (a, a, a, a)
|
||||||
|
deriving (Eq, Show)
|
||||||
|
|
||||||
|
upToTetragrams :: [a] -> [Ngram a]
|
||||||
|
upToTetragrams l = (map Unigram l)
|
||||||
|
++ (map Bigram $ bigrams l)
|
||||||
|
++ (map Trigram $ trigrams l)
|
||||||
|
++ (map Tetragram $ tetragrams l)
|
||||||
|
|
||||||
|
bigrams :: [a] -> [(a, a)]
|
||||||
|
bigrams [] = []
|
||||||
|
bigrams [_] = []
|
||||||
|
bigrams u = zip u $ tail u
|
||||||
|
|
||||||
|
trigrams :: [a] -> [(a, a, a)]
|
||||||
|
trigrams [] = []
|
||||||
|
trigrams [_] = []
|
||||||
|
trigrams [_, _] = []
|
||||||
|
trigrams u = zip3 u (tail u) (tail $ tail u)
|
||||||
|
|
||||||
|
tetragrams :: [a] -> [(a, a, a, a)]
|
||||||
|
tetragrams [] = []
|
||||||
|
tetragrams [_] = []
|
||||||
|
tetragrams [_, _] = []
|
||||||
|
tetragrams [_, _, _] = []
|
||||||
|
tetragrams u = zip4 u (tail u) (tail $ tail u) (tail $ tail $ tail u)
|
||||||
|
@ -97,7 +97,7 @@ defaultLogLossHashedSize :: Word32
|
|||||||
defaultLogLossHashedSize = 10
|
defaultLogLossHashedSize = 10
|
||||||
|
|
||||||
-- | evaluation metric
|
-- | evaluation metric
|
||||||
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI
|
data Metric = RMSE | MSE | BLEU | GLEU | Accuracy | ClippEU | FMeasure Double | NMI
|
||||||
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
||||||
| BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double
|
| BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double
|
||||||
| MultiLabelLogLoss | MultiLabelLikelihood
|
| MultiLabelLogLoss | MultiLabelLikelihood
|
||||||
@ -107,6 +107,7 @@ instance Show Metric where
|
|||||||
show RMSE = "RMSE"
|
show RMSE = "RMSE"
|
||||||
show MSE = "MSE"
|
show MSE = "MSE"
|
||||||
show BLEU = "BLEU"
|
show BLEU = "BLEU"
|
||||||
|
show GLEU = "GLEU"
|
||||||
show Accuracy = "Accuracy"
|
show Accuracy = "Accuracy"
|
||||||
show ClippEU = "ClippEU"
|
show ClippEU = "ClippEU"
|
||||||
show (FMeasure beta) = "F" ++ (show beta)
|
show (FMeasure beta) = "F" ++ (show beta)
|
||||||
@ -138,6 +139,7 @@ instance Read Metric where
|
|||||||
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
||||||
readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)]
|
readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)]
|
||||||
readsPrec _ ('B':'L':'E':'U':theRest) = [(BLEU, theRest)]
|
readsPrec _ ('B':'L':'E':'U':theRest) = [(BLEU, theRest)]
|
||||||
|
readsPrec _ ('G':'L':'E':'U':theRest) = [(GLEU, theRest)]
|
||||||
readsPrec _ ('A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(Accuracy, theRest)]
|
readsPrec _ ('A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(Accuracy, theRest)]
|
||||||
readsPrec _ ('C':'l':'i':'p':'p':'E':'U':theRest) = [(ClippEU, theRest)]
|
readsPrec _ ('C':'l':'i':'p':'p':'E':'U':theRest) = [(ClippEU, theRest)]
|
||||||
readsPrec _ ('N':'M':'I':theRest) = [(NMI, theRest)]
|
readsPrec _ ('N':'M':'I':theRest) = [(NMI, theRest)]
|
||||||
@ -172,6 +174,7 @@ getMetricOrdering :: Metric -> MetricOrdering
|
|||||||
getMetricOrdering RMSE = TheLowerTheBetter
|
getMetricOrdering RMSE = TheLowerTheBetter
|
||||||
getMetricOrdering MSE = TheLowerTheBetter
|
getMetricOrdering MSE = TheLowerTheBetter
|
||||||
getMetricOrdering BLEU = TheHigherTheBetter
|
getMetricOrdering BLEU = TheHigherTheBetter
|
||||||
|
getMetricOrdering GLEU = TheHigherTheBetter
|
||||||
getMetricOrdering Accuracy = TheHigherTheBetter
|
getMetricOrdering Accuracy = TheHigherTheBetter
|
||||||
getMetricOrdering ClippEU = TheHigherTheBetter
|
getMetricOrdering ClippEU = TheHigherTheBetter
|
||||||
getMetricOrdering (FMeasure _) = TheHigherTheBetter
|
getMetricOrdering (FMeasure _) = TheHigherTheBetter
|
||||||
@ -516,6 +519,12 @@ gevalCore' BLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . D
|
|||||||
| c == 0 && r > 0 = 0.0
|
| c == 0 && r > 0 = 0.0
|
||||||
| otherwise = exp (1.0 - (r /. c))
|
| otherwise = exp (1.0 - (r /. c))
|
||||||
|
|
||||||
|
gevalCore' GLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . DLS.splitOn "\t" . unpack) (Right . Prelude.words . unpack) gleuCombine gleuAgg gleuFinal
|
||||||
|
where gleuFinal (m, t) = m /. t
|
||||||
|
gleuCombine (refs, sen) = gleuStep refs sen
|
||||||
|
gleuAgg = CC.foldl gleuFuse (0, 0)
|
||||||
|
gleuFuse (a1, a2) (b1, b2) = (a1+b1, a2+b2)
|
||||||
|
|
||||||
gevalCore' Accuracy _ = gevalCoreWithoutInput (Right . strip) (Right . strip) hitOrMiss averageC id
|
gevalCore' Accuracy _ = gevalCoreWithoutInput (Right . strip) (Right . strip) hitOrMiss averageC id
|
||||||
where hitOrMiss (exp, got) =
|
where hitOrMiss (exp, got) =
|
||||||
-- first try to parse what we got as a probability distribution
|
-- first try to parse what we got as a probability distribution
|
||||||
|
@ -52,6 +52,7 @@ createFile filePath contents = do
|
|||||||
writeFile filePath contents
|
writeFile filePath contents
|
||||||
|
|
||||||
readmeMDContents :: Metric -> String -> String
|
readmeMDContents :: Metric -> String -> String
|
||||||
|
readmeMDContents GLEU testName = readmeMDContents BLEU testName
|
||||||
readmeMDContents BLEU testName = [i|
|
readmeMDContents BLEU testName = [i|
|
||||||
GEval sample machine translation challenge
|
GEval sample machine translation challenge
|
||||||
==========================================
|
==========================================
|
||||||
@ -313,6 +314,7 @@ configContents metrics precision testName = unwords (Prelude.map (\metric -> ("-
|
|||||||
precisionOpt (Just p) = " --precision " ++ (show p)
|
precisionOpt (Just p) = " --precision " ++ (show p)
|
||||||
|
|
||||||
trainContents :: Metric -> String
|
trainContents :: Metric -> String
|
||||||
|
trainContents GLEU = trainContents BLEU
|
||||||
trainContents BLEU = [hereLit|alussa loi jumala taivaan ja maan he mea hanga na te atua i te timatanga te rangi me te whenua
|
trainContents BLEU = [hereLit|alussa loi jumala taivaan ja maan he mea hanga na te atua i te timatanga te rangi me te whenua
|
||||||
ja maa oli autio ja tyhjä , ja pimeys oli syvyyden päällä a kahore he ahua o te whenua , i takoto kau ; he pouri ano a runga i te mata o te hohonu
|
ja maa oli autio ja tyhjä , ja pimeys oli syvyyden päällä a kahore he ahua o te whenua , i takoto kau ; he pouri ano a runga i te mata o te hohonu
|
||||||
ja jumalan henki liikkui vetten päällä na ka whakapaho te wairua o te atua i runga i te kare o nga wai
|
ja jumalan henki liikkui vetten päällä na ka whakapaho te wairua o te atua i runga i te kare o nga wai
|
||||||
@ -384,6 +386,7 @@ trainContents _ = [hereLit|0.06 0.39 0 0.206
|
|||||||
|]
|
|]
|
||||||
|
|
||||||
devInContents :: Metric -> String
|
devInContents :: Metric -> String
|
||||||
|
devInContents GLEU = devInContents BLEU
|
||||||
devInContents BLEU = [hereLit|ja jumala sanoi : " tulkoon valkeus " , ja valkeus tuli
|
devInContents BLEU = [hereLit|ja jumala sanoi : " tulkoon valkeus " , ja valkeus tuli
|
||||||
ja jumala näki , että valkeus oli hyvä ; ja jumala erotti valkeuden pimeydestä
|
ja jumala näki , että valkeus oli hyvä ; ja jumala erotti valkeuden pimeydestä
|
||||||
|]
|
|]
|
||||||
@ -431,6 +434,7 @@ devInContents _ = [hereLit|0.72 0 0.007
|
|||||||
|]
|
|]
|
||||||
|
|
||||||
devExpectedContents :: Metric -> String
|
devExpectedContents :: Metric -> String
|
||||||
|
devExpectedContents GLEU = devExpectedContents BLEU
|
||||||
devExpectedContents BLEU = [hereLit|a ka ki te atua , kia marama : na ka marama
|
devExpectedContents BLEU = [hereLit|a ka ki te atua , kia marama : na ka marama
|
||||||
a ka kite te atua i te marama , he pai : a ka wehea e te atua te marama i te pouri
|
a ka kite te atua i te marama , he pai : a ka wehea e te atua te marama i te pouri
|
||||||
|]
|
|]
|
||||||
@ -478,6 +482,7 @@ devExpectedContents _ = [hereLit|0.82
|
|||||||
|]
|
|]
|
||||||
|
|
||||||
testInContents :: Metric -> String
|
testInContents :: Metric -> String
|
||||||
|
testInContents GLEU = testInContents BLEU
|
||||||
testInContents BLEU = [hereLit|ja jumala kutsui valkeuden päiväksi , ja pimeyden hän kutsui yöksi
|
testInContents BLEU = [hereLit|ja jumala kutsui valkeuden päiväksi , ja pimeyden hän kutsui yöksi
|
||||||
ja tuli ehtoo , ja tuli aamu , ensimmäinen päivä
|
ja tuli ehtoo , ja tuli aamu , ensimmäinen päivä
|
||||||
|]
|
|]
|
||||||
@ -524,6 +529,7 @@ I hate
|
|||||||
|]
|
|]
|
||||||
|
|
||||||
testExpectedContents :: Metric -> String
|
testExpectedContents :: Metric -> String
|
||||||
|
testExpectedContents GLEU = testExpectedContents BLEU
|
||||||
testExpectedContents BLEU = [hereLit|na ka huaina e te atua te marama ko te awatea , a ko te pouri i huaina e ia ko te po
|
testExpectedContents BLEU = [hereLit|na ka huaina e te atua te marama ko te awatea , a ko te pouri i huaina e ia ko te po
|
||||||
a ko te ahiahi , ko te ata , he ra kotahi
|
a ko te ahiahi , ko te ata , he ra kotahi
|
||||||
|]
|
|]
|
||||||
|
@ -169,7 +169,7 @@ metricReader = many $ option auto -- actually `some` should be used inst
|
|||||||
( long "metric" -- --metric might be in the config.txt file...
|
( long "metric" -- --metric might be in the config.txt file...
|
||||||
<> short 'm'
|
<> short 'm'
|
||||||
<> metavar "METRIC"
|
<> metavar "METRIC"
|
||||||
<> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels or CharMatch" )
|
<> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels or CharMatch" )
|
||||||
|
|
||||||
altMetricReader :: Parser (Maybe Metric)
|
altMetricReader :: Parser (Maybe Metric)
|
||||||
altMetricReader = optional $ option auto
|
altMetricReader = optional $ option auto
|
||||||
|
@ -81,6 +81,9 @@ main = hspec $ do
|
|||||||
runGEvalTest "bleu-empty" `shouldReturnAlmost` 0.0000
|
runGEvalTest "bleu-empty" `shouldReturnAlmost` 0.0000
|
||||||
it "with tokenization" $
|
it "with tokenization" $
|
||||||
runGEvalTest "bleu-with-tokenization" `shouldReturnAlmost` 0.6501914150070065
|
runGEvalTest "bleu-with-tokenization" `shouldReturnAlmost` 0.6501914150070065
|
||||||
|
describe "GLEU" $ do
|
||||||
|
it "simple example" $
|
||||||
|
runGEvalTest "gleu-simple" `shouldReturnAlmost` 0.462962962962963
|
||||||
describe "Accuracy" $ do
|
describe "Accuracy" $ do
|
||||||
it "simple example" $
|
it "simple example" $
|
||||||
runGEvalTest "accuracy-simple" `shouldReturnAlmost` 0.6
|
runGEvalTest "accuracy-simple" `shouldReturnAlmost` 0.6
|
||||||
|
3
test/gleu-simple/gleu-simple-solution/test-A/out.tsv
Normal file
3
test/gleu-simple/gleu-simple-solution/test-A/out.tsv
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
I can sing the rainbow in a book .
|
||||||
|
The
|
||||||
|
This is a sample sentence .
|
|
1
test/gleu-simple/gleu-simple/config.txt
Normal file
1
test/gleu-simple/gleu-simple/config.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
--metric GLEU
|
3
test/gleu-simple/gleu-simple/test-A/expected.tsv
Normal file
3
test/gleu-simple/gleu-simple/test-A/expected.tsv
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
I can see a rainbow .
|
||||||
|
The dog .
|
||||||
|
This is a sample sentence .
|
|
Loading…
Reference in New Issue
Block a user