Speed up GLEU
This commit is contained in:
parent
680bc80f40
commit
4f09a1802f
@ -1,3 +1,5 @@
|
|||||||
|
{-# LANGUAGE BangPatterns #-}
|
||||||
|
|
||||||
module GEval.BLEU
|
module GEval.BLEU
|
||||||
(precisionCount, bleuStep, gleuStep)
|
(precisionCount, bleuStep, gleuStep)
|
||||||
where
|
where
|
||||||
@ -27,10 +29,13 @@ bleuStep refs trans = (prec1, prec2, prec3, prec4, closestLen, len1, len2, len3
|
|||||||
|
|
||||||
gleuStep :: Ord a => [[a]] -> [a] -> (Int, Int)
|
gleuStep :: Ord a => [[a]] -> [a] -> (Int, Int)
|
||||||
gleuStep refs trans = maximumBy (\(g1, t1) (g2, t2) -> (g1 /. t1) `compare` (g2 /. t2)) $ map getBetterCounts refs
|
gleuStep refs trans = maximumBy (\(g1, t1) (g2, t2) -> (g1 /. t1) `compare` (g2 /. t2)) $ map getBetterCounts refs
|
||||||
where getBetterCounts ref = let (matched, expected, got) = getCounts (==) (upToTetragrams ref, transNgrams)
|
where getBetterCounts ref = let (matched1, expected1, got1) = getCounts (==) (ref, trans1grams)
|
||||||
total = max expected got
|
(matched2, expected2, got2) = getCounts (==) (bigrams ref, trans2grams)
|
||||||
in (matched, total)
|
(matched3, expected3, got3) = getCounts (==) (trigrams ref, trans3grams)
|
||||||
transNgrams = upToTetragrams trans
|
(matched4, expected4, got4) = getCounts (==) (tetragrams ref, trans4grams)
|
||||||
|
total = max (expected1 + expected2 + expected3 + expected4) (got1 + got2 + got3 + got4)
|
||||||
|
in (matched1 + matched2 + matched3 + matched4, total)
|
||||||
|
(trans1grams, trans2grams, trans3grams, trans4grams) = upToTetragrams trans
|
||||||
|
|
||||||
precisionCount :: Ord a => [[a]] -> [a] -> Int
|
precisionCount :: Ord a => [[a]] -> [a] -> Int
|
||||||
precisionCount refs = sum . map (lookFor refs) . MS.toOccurList . MS.fromList
|
precisionCount refs = sum . map (lookFor refs) . MS.toOccurList . MS.fromList
|
||||||
@ -39,14 +44,8 @@ precisionCount refs = sum . map (lookFor refs) . MS.toOccurList . MS.fromList
|
|||||||
minimumOrZero [] = 0
|
minimumOrZero [] = 0
|
||||||
minimumOrZero l = minimum l
|
minimumOrZero l = minimum l
|
||||||
|
|
||||||
data Ngram a = Unigram a | Bigram (a, a) | Trigram (a, a, a) | Tetragram (a, a, a, a)
|
upToTetragrams :: [a] -> ([a], [(a, a)], [(a, a, a)], [(a, a, a, a)])
|
||||||
deriving (Eq, Show)
|
upToTetragrams l = (l, bigrams l, trigrams l, tetragrams l)
|
||||||
|
|
||||||
upToTetragrams :: [a] -> [Ngram a]
|
|
||||||
upToTetragrams l = (map Unigram l)
|
|
||||||
++ (map Bigram $ bigrams l)
|
|
||||||
++ (map Trigram $ trigrams l)
|
|
||||||
++ (map Tetragram $ tetragrams l)
|
|
||||||
|
|
||||||
bigrams :: [a] -> [(a, a)]
|
bigrams :: [a] -> [(a, a)]
|
||||||
bigrams [] = []
|
bigrams [] = []
|
||||||
|
Loading…
Reference in New Issue
Block a user