add MAP metric

This commit is contained in:
Filip Gralinski 2017-12-12 07:54:21 +01:00 committed by Filip Gralinski
parent 7eef53832d
commit 9643719193
8 changed files with 80 additions and 4 deletions

View File

@ -58,6 +58,7 @@ defaultLogLossHashedSize :: Word32
defaultLogLossHashedSize = 10
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch
| MAP
deriving (Eq)
instance Show Metric where
@ -75,6 +76,7 @@ instance Show Metric where
else
(show nbOfBits))
show CharMatch = "CharMatch"
show MAP = "MAP"
instance Read Metric where
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
@ -90,6 +92,7 @@ instance Read Metric where
[(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
_ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
@ -103,6 +106,7 @@ getMetricOrdering (FMeasure _) = TheHigherTheBetter
getMetricOrdering NMI = TheHigherTheBetter
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
getMetricOrdering CharMatch = TheHigherTheBetter
getMetricOrdering MAP = TheHigherTheBetter
defaultOutDirectory = "."
defaultTestName = "test-A"
@ -262,6 +266,12 @@ gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings m
gevalCore' NMI _ = gevalCoreWithoutInput id id id (CC.foldl updateConfusionMatrix M.empty) normalizedMutualInformationFromConfusionMatrix
gevalCore' MAP _ = gevalCoreWithoutInput (DLS.splitOn "\t" . unpack)
(DLS.splitOn "\t" . unpack)
(\(e,g) -> calculateMAPForOneResult e g)
averageC
id
gevalCore' (LogLossHashed nbOfBits) _ = helper nbOfBits
-- for LogLossHashed we "salt" each hash with the line number
where helper nbOfBits expectedFilePath outFilePath =

View File

@ -142,6 +142,24 @@ Directory structure
* `${testName}/expected.tsv` American reference text for the test set
|]
readmeMDContents MAP testName = [i|
English word for a Polish word
================================================
Give a (British or American) English equivalent of a Polish word.
This is a sample challenge for MAP evaluation metric. MAP (Mean Average Precision)
is used, mostly in information retrieval, for evaluation of ranked retrieval results.
The relevant items are separated by TABs (could be just one item) and returned items
should be separated by TABs.
See Christopher D. Manning, Prabhakar Raghavan and Hinrich Schütze,
"Introduction to Information Retrieval", Cambridge University Press, 2008 for
more discussion of the metric.
|] ++ (commonReadmeMDContents testName)
readmeMDContents _ testName = [i|
GEval sample challenge
======================
@ -209,7 +227,6 @@ trainContents NMI = [hereLit|pl Kto pod kim dołki kopie, ten sam w nie wpada.
en The pen is mightier than the sword.
pl Baba z wozu, koniom lżej.
|]
trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota
Basia ma psa
Nie kupujemy kota w worku
@ -220,6 +237,11 @@ Camptown race-track five miles long, Oh, doo-dah day!
I come down dah wid my hat caved in, Doo-dah! doo-dah!
I go back home wid a pocket full of tin, Oh, doo-dah day!
|]
trainContents MAP = [hereLit|honor US honor
honour GB honor
titbit GB smakołyk
tidbit US smakołyk
|]
trainContents _ = [hereLit|0.06 0.39 0 0.206
1.00 1.00 1 0.017
317.8 5.20 67 0.048
@ -247,6 +269,10 @@ devInContents CharMatch = [hereLit|honour to organise
nothing to change
time traveller
|]
devInContents MAP = [hereLit|US noc
GB wózek dziecięcy
GB wizualizować
|]
devInContents _ = [hereLit|0.72 0 0.007
9.54 62 0.054
|]
@ -272,6 +298,10 @@ devExpectedContents CharMatch = [hereLit|honor to organize
nothing to change
time traveler
|]
devExpectedContents MAP = [hereLit|night nite
pram
visualise
|]
devExpectedContents _ = [hereLit|0.82
95.2
|]
@ -299,6 +329,10 @@ testInContents CharMatch = [hereLit|paralysed by practise
recognise
nothing
|]
testInContents MAP = [hereLit|US wózek dziecięcy
GB słoń
US słoń
|]
testInContents _ = [hereLit|1.52 2 0.093
30.06 14 0.009
|]
@ -326,6 +360,10 @@ testExpectedContents CharMatch = [hereLit|paralyzed by practice
recognize
nothing
|]
testExpectedContents MAP = [hereLit|trolley
elephant
elephant
|]
testExpectedContents _ = [hereLit|0.11
17.2
|]

View File

@ -82,7 +82,7 @@ metricReader = option auto
<> value defaultMetric
<> showDefault
<> metavar "METRIC"
<> help "Metric to be used - RMSE, MSE, Accuracy, F-measure (specify as F1, F2, F0.25, etc.), BLEU, NMI, ClippEU, LogLossHashed or CharMatch" )
<> help "Metric to be used - RMSE, MSE, Accuracy, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed or CharMatch" )
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
runGEval args = do

View File

@ -1,6 +1,7 @@
{-# LANGUAGE PartialTypeSignatures #-}
module GEval.PrecisionRecall(fMeasure, f1Measure, f2Measure, precision, recall,
module GEval.PrecisionRecall(calculateMAPForOneResult,
fMeasure, f1Measure, f2Measure, precision, recall,
fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder,
precisionAndRecall, precisionAndRecallFromCounts, maxMatch)
where
@ -10,6 +11,18 @@ import GEval.Common
import Data.Graph.Inductive
import Data.Graph.Inductive.Query.MaxFlow
import Data.List (nub, foldl')
calculateMAPForOneResult :: (Eq a) => [a] -> [a] -> Double
calculateMAPForOneResult expected got = precisionSum / fromIntegral (length expected)
where (_, _, precisionSum) = calculateMAPForOneResultCore expected (nub got)
calculateMAPForOneResultCore expected got = foldl' (oneMAPStep expected) (0, 0, 0.0) got
oneMAPStep expected (gotCount, allCount, precisionSum) gotItem
| gotItem `elem` expected = (newGotCount, newAllCount, precisionSum + (newGotCount /. newAllCount))
| otherwise = (gotCount, newAllCount, precisionSum)
where newGotCount = gotCount + 1
newAllCount = allCount + 1
f2Measure :: (a -> b -> Bool) -> [a] -> [b] -> Double
f2Measure = fMeasure 2.0

View File

@ -166,7 +166,15 @@ main = hspec $ do
runGEvalTest "charmatch-complex" `shouldReturnAlmost` 0.1923076923076923
it "broken test without input" $ do
runGEvalTest "charmatch-no-input" `shouldThrow` (== NoInputFile "test/charmatch-no-input/charmatch-no-input/test-A/in.tsv")
describe "MAP" $ do
it "one result" $ do
(calculateMAPForOneResult ["Berlin", "London", "Warsaw"]
["Warsaw", "Moscow", "Berlin", "Prague"]) `shouldBeAlmost` 0.55555555
it "check whether you cannot cheat with duplicated results" $ do
(calculateMAPForOneResult ["one", "two"]
["one", "one"]) `shouldBeAlmost` 0.5
it "simple test" $ do
runGEvalTest "map-simple" `shouldReturnAlmost` 0.444444444
neverMatch :: Char -> Int -> Bool
neverMatch _ _ = False

View File

@ -0,0 +1,3 @@
pink blue
red yellow rose black
white gray
Can't render this file because it has a wrong number of fields in line 2.

View File

@ -0,0 +1 @@
--metric MAP

View File

@ -0,0 +1,3 @@
blue
red rose
green
Can't render this file because it has a wrong number of fields in line 2.