add MAP metric

This commit is contained in:
Filip Gralinski 2017-12-12 07:54:21 +01:00 committed by Filip Gralinski
parent 7eef53832d
commit 9643719193
8 changed files with 80 additions and 4 deletions

View File

@ -58,6 +58,7 @@ defaultLogLossHashedSize :: Word32
defaultLogLossHashedSize = 10 defaultLogLossHashedSize = 10
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch
| MAP
deriving (Eq) deriving (Eq)
instance Show Metric where instance Show Metric where
@ -75,6 +76,7 @@ instance Show Metric where
else else
(show nbOfBits)) (show nbOfBits))
show CharMatch = "CharMatch" show CharMatch = "CharMatch"
show MAP = "MAP"
instance Read Metric where instance Read Metric where
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)] readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
@ -90,6 +92,7 @@ instance Read Metric where
[(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)] [(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
_ -> [(LogLossHashed defaultLogLossHashedSize, theRest)] _ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)] readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
@ -103,6 +106,7 @@ getMetricOrdering (FMeasure _) = TheHigherTheBetter
getMetricOrdering NMI = TheHigherTheBetter getMetricOrdering NMI = TheHigherTheBetter
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
getMetricOrdering CharMatch = TheHigherTheBetter getMetricOrdering CharMatch = TheHigherTheBetter
getMetricOrdering MAP = TheHigherTheBetter
defaultOutDirectory = "." defaultOutDirectory = "."
defaultTestName = "test-A" defaultTestName = "test-A"
@ -262,6 +266,12 @@ gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings m
gevalCore' NMI _ = gevalCoreWithoutInput id id id (CC.foldl updateConfusionMatrix M.empty) normalizedMutualInformationFromConfusionMatrix gevalCore' NMI _ = gevalCoreWithoutInput id id id (CC.foldl updateConfusionMatrix M.empty) normalizedMutualInformationFromConfusionMatrix
gevalCore' MAP _ = gevalCoreWithoutInput (DLS.splitOn "\t" . unpack)
(DLS.splitOn "\t" . unpack)
(\(e,g) -> calculateMAPForOneResult e g)
averageC
id
gevalCore' (LogLossHashed nbOfBits) _ = helper nbOfBits gevalCore' (LogLossHashed nbOfBits) _ = helper nbOfBits
-- for LogLossHashed we "salt" each hash with the line number -- for LogLossHashed we "salt" each hash with the line number
where helper nbOfBits expectedFilePath outFilePath = where helper nbOfBits expectedFilePath outFilePath =

View File

@ -142,6 +142,24 @@ Directory structure
* `${testName}/expected.tsv` American reference text for the test set * `${testName}/expected.tsv` American reference text for the test set
|] |]
readmeMDContents MAP testName = [i|
English word for a Polish word
================================================
Give a (British or American) English equivalent of a Polish word.
This is a sample challenge for MAP evaluation metric. MAP (Mean Average Precision)
is used, mostly in information retrieval, for evaluation of ranked retrieval results.
The relevant items are separated by TABs (could be just one item) and returned items
should be separated by TABs.
See Christopher D. Manning, Prabhakar Raghavan and Hinrich Schütze,
"Introduction to Information Retrieval", Cambridge University Press, 2008 for
more discussion of the metric.
|] ++ (commonReadmeMDContents testName)
readmeMDContents _ testName = [i| readmeMDContents _ testName = [i|
GEval sample challenge GEval sample challenge
====================== ======================
@ -209,7 +227,6 @@ trainContents NMI = [hereLit|pl Kto pod kim dołki kopie, ten sam w nie wpada.
en The pen is mightier than the sword. en The pen is mightier than the sword.
pl Baba z wozu, koniom lżej. pl Baba z wozu, koniom lżej.
|] |]
trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota
Basia ma psa Basia ma psa
Nie kupujemy kota w worku Nie kupujemy kota w worku
@ -220,6 +237,11 @@ Camptown race-track five miles long, Oh, doo-dah day!
I come down dah wid my hat caved in, Doo-dah! doo-dah! I come down dah wid my hat caved in, Doo-dah! doo-dah!
I go back home wid a pocket full of tin, Oh, doo-dah day! I go back home wid a pocket full of tin, Oh, doo-dah day!
|] |]
trainContents MAP = [hereLit|honor US honor
honour GB honor
titbit GB smakołyk
tidbit US smakołyk
|]
trainContents _ = [hereLit|0.06 0.39 0 0.206 trainContents _ = [hereLit|0.06 0.39 0 0.206
1.00 1.00 1 0.017 1.00 1.00 1 0.017
317.8 5.20 67 0.048 317.8 5.20 67 0.048
@ -247,6 +269,10 @@ devInContents CharMatch = [hereLit|honour to organise
nothing to change nothing to change
time traveller time traveller
|] |]
devInContents MAP = [hereLit|US noc
GB wózek dziecięcy
GB wizualizować
|]
devInContents _ = [hereLit|0.72 0 0.007 devInContents _ = [hereLit|0.72 0 0.007
9.54 62 0.054 9.54 62 0.054
|] |]
@ -272,6 +298,10 @@ devExpectedContents CharMatch = [hereLit|honor to organize
nothing to change nothing to change
time traveler time traveler
|] |]
devExpectedContents MAP = [hereLit|night nite
pram
visualise
|]
devExpectedContents _ = [hereLit|0.82 devExpectedContents _ = [hereLit|0.82
95.2 95.2
|] |]
@ -299,6 +329,10 @@ testInContents CharMatch = [hereLit|paralysed by practise
recognise recognise
nothing nothing
|] |]
testInContents MAP = [hereLit|US wózek dziecięcy
GB słoń
US słoń
|]
testInContents _ = [hereLit|1.52 2 0.093 testInContents _ = [hereLit|1.52 2 0.093
30.06 14 0.009 30.06 14 0.009
|] |]
@ -326,6 +360,10 @@ testExpectedContents CharMatch = [hereLit|paralyzed by practice
recognize recognize
nothing nothing
|] |]
testExpectedContents MAP = [hereLit|trolley
elephant
elephant
|]
testExpectedContents _ = [hereLit|0.11 testExpectedContents _ = [hereLit|0.11
17.2 17.2
|] |]

View File

@ -82,7 +82,7 @@ metricReader = option auto
<> value defaultMetric <> value defaultMetric
<> showDefault <> showDefault
<> metavar "METRIC" <> metavar "METRIC"
<> help "Metric to be used - RMSE, MSE, Accuracy, F-measure (specify as F1, F2, F0.25, etc.), BLEU, NMI, ClippEU, LogLossHashed or CharMatch" ) <> help "Metric to be used - RMSE, MSE, Accuracy, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed or CharMatch" )
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue)) runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
runGEval args = do runGEval args = do

View File

@ -1,6 +1,7 @@
{-# LANGUAGE PartialTypeSignatures #-} {-# LANGUAGE PartialTypeSignatures #-}
module GEval.PrecisionRecall(fMeasure, f1Measure, f2Measure, precision, recall, module GEval.PrecisionRecall(calculateMAPForOneResult,
fMeasure, f1Measure, f2Measure, precision, recall,
fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder, fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder,
precisionAndRecall, precisionAndRecallFromCounts, maxMatch) precisionAndRecall, precisionAndRecallFromCounts, maxMatch)
where where
@ -10,6 +11,18 @@ import GEval.Common
import Data.Graph.Inductive import Data.Graph.Inductive
import Data.Graph.Inductive.Query.MaxFlow import Data.Graph.Inductive.Query.MaxFlow
import Data.List (nub, foldl')
calculateMAPForOneResult :: (Eq a) => [a] -> [a] -> Double
calculateMAPForOneResult expected got = precisionSum / fromIntegral (length expected)
where (_, _, precisionSum) = calculateMAPForOneResultCore expected (nub got)
calculateMAPForOneResultCore expected got = foldl' (oneMAPStep expected) (0, 0, 0.0) got
oneMAPStep expected (gotCount, allCount, precisionSum) gotItem
| gotItem `elem` expected = (newGotCount, newAllCount, precisionSum + (newGotCount /. newAllCount))
| otherwise = (gotCount, newAllCount, precisionSum)
where newGotCount = gotCount + 1
newAllCount = allCount + 1
f2Measure :: (a -> b -> Bool) -> [a] -> [b] -> Double f2Measure :: (a -> b -> Bool) -> [a] -> [b] -> Double
f2Measure = fMeasure 2.0 f2Measure = fMeasure 2.0

View File

@ -166,7 +166,15 @@ main = hspec $ do
runGEvalTest "charmatch-complex" `shouldReturnAlmost` 0.1923076923076923 runGEvalTest "charmatch-complex" `shouldReturnAlmost` 0.1923076923076923
it "broken test without input" $ do it "broken test without input" $ do
runGEvalTest "charmatch-no-input" `shouldThrow` (== NoInputFile "test/charmatch-no-input/charmatch-no-input/test-A/in.tsv") runGEvalTest "charmatch-no-input" `shouldThrow` (== NoInputFile "test/charmatch-no-input/charmatch-no-input/test-A/in.tsv")
describe "MAP" $ do
it "one result" $ do
(calculateMAPForOneResult ["Berlin", "London", "Warsaw"]
["Warsaw", "Moscow", "Berlin", "Prague"]) `shouldBeAlmost` 0.55555555
it "check whether you cannot cheat with duplicated results" $ do
(calculateMAPForOneResult ["one", "two"]
["one", "one"]) `shouldBeAlmost` 0.5
it "simple test" $ do
runGEvalTest "map-simple" `shouldReturnAlmost` 0.444444444
neverMatch :: Char -> Int -> Bool neverMatch :: Char -> Int -> Bool
neverMatch _ _ = False neverMatch _ _ = False

View File

@ -0,0 +1,3 @@
pink blue
red yellow rose black
white gray
Can't render this file because it has a wrong number of fields in line 2.

View File

@ -0,0 +1 @@
--metric MAP

View File

@ -0,0 +1,3 @@
blue
red rose
green
Can't render this file because it has a wrong number of fields in line 2.