43 lines
1.7 KiB
Haskell
43 lines
1.7 KiB
Haskell
|
module GEval.BLEU
|
||
|
(precisionCount)
|
||
|
where
|
||
|
|
||
|
import qualified Data.MultiSet as MS
|
||
|
import Data.List (minimumBy, zip, zip3, zip4)
|
||
|
|
||
|
bleuStep :: Ord a => [[a]] -> [a] -> (Int, Int, Int, Int, Int, Int, Int, Int, Int)
|
||
|
bleuStep refs trans = (prec1, prec2, prec3, prec4, closestLen, len1, len2, len3, len4)
|
||
|
where prec1 = precisionCountForNgrams id
|
||
|
prec2 = precisionCountForNgrams bigrams
|
||
|
prec3 = precisionCountForNgrams trigrams
|
||
|
prec4 = precisionCountForNgrams tetragrams
|
||
|
precisionCountForNgrams fun = precisionCount (map fun refs) (fun trans)
|
||
|
closestLen = minimumBy closestCmp $ map length refs
|
||
|
closestCmp x y
|
||
|
| ((abs (x - len1)) < (abs (y - len1))) = LT
|
||
|
| ((abs (x - len1)) > (abs (y - len1))) = GT
|
||
|
| ((abs (x - len1)) == (abs (y - len1))) = x `compare` y
|
||
|
len1 = length trans
|
||
|
len2 = max 0 (len1 - 1)
|
||
|
len3 = max 0 (len1 - 2)
|
||
|
len4 = max 0 (len1 - 3)
|
||
|
bigrams [] = []
|
||
|
bigrams [_] = []
|
||
|
bigrams u = zip u $ tail u
|
||
|
trigrams [] = []
|
||
|
trigrams [_] = []
|
||
|
trigrams [_, _] = []
|
||
|
trigrams u = zip3 u (tail u) (tail $ tail u)
|
||
|
tetragrams [] = []
|
||
|
tetragrams [_] = []
|
||
|
tetragrams [_, _] = []
|
||
|
tetragrams [_, _, _] = []
|
||
|
tetragrams u = zip4 u (tail u) (tail $ tail u) (tail $ tail $ tail u)
|
||
|
|
||
|
precisionCount :: Ord a => [[a]] -> [a] -> Int
|
||
|
precisionCount refs = sum . map (lookFor refs) . MS.toOccurList . MS.fromList
|
||
|
where lookFor refs (e, freq) = minimumOrZero $ filter (> 0) $ map (findE e freq) $ map MS.fromList refs
|
||
|
findE e freq m = min freq (MS.occur e m)
|
||
|
minimumOrZero [] = 0
|
||
|
minimumOrZero l = minimum l
|