geval/src/GEval/BLEU.hs

47 lines
1.7 KiB
Haskell
Raw Normal View History

2015-08-24 22:23:35 +02:00
module GEval.BLEU
2015-08-24 23:40:40 +02:00
(precisionCount, bleuStep)
2015-08-24 22:23:35 +02:00
where
import qualified Data.MultiSet as MS
import Data.List (minimumBy, zip, zip3, zip4)
2015-08-25 16:10:20 +02:00
import Debug.Trace
2015-08-24 22:23:35 +02:00
bleuStep :: Ord a => [[a]] -> [a] -> (Int, Int, Int, Int, Int, Int, Int, Int, Int)
bleuStep refs trans = (prec1, prec2, prec3, prec4, closestLen, len1, len2, len3, len4)
where prec1 = precisionCountForNgrams id
prec2 = precisionCountForNgrams bigrams
prec3 = precisionCountForNgrams trigrams
prec4 = precisionCountForNgrams tetragrams
precisionCountForNgrams fun = precisionCount (map fun refs) (fun trans)
closestLen = minimumBy closestCmp $ map length refs
closestCmp x y
| ((abs (x - len1)) < (abs (y - len1))) = LT
| ((abs (x - len1)) > (abs (y - len1))) = GT
| ((abs (x - len1)) == (abs (y - len1))) = x `compare` y
len1 = length trans
len2 = max 0 (len1 - 1)
len3 = max 0 (len1 - 2)
len4 = max 0 (len1 - 3)
bigrams [] = []
bigrams [_] = []
bigrams u = zip u $ tail u
trigrams [] = []
trigrams [_] = []
trigrams [_, _] = []
trigrams u = zip3 u (tail u) (tail $ tail u)
tetragrams [] = []
tetragrams [_] = []
tetragrams [_, _] = []
tetragrams [_, _, _] = []
tetragrams u = zip4 u (tail u) (tail $ tail u) (tail $ tail $ tail u)
precisionCount :: Ord a => [[a]] -> [a] -> Int
precisionCount refs = sum . map (lookFor refs) . MS.toOccurList . MS.fromList
where lookFor refs (e, freq) = minimumOrZero $ filter (> 0) $ map (findE e freq) $ map MS.fromList refs
findE e freq m = min freq (MS.occur e m)
minimumOrZero [] = 0
minimumOrZero l = minimum l