add MAP metric
This commit is contained in:
parent
7eef53832d
commit
9643719193
@ -58,6 +58,7 @@ defaultLogLossHashedSize :: Word32
|
|||||||
defaultLogLossHashedSize = 10
|
defaultLogLossHashedSize = 10
|
||||||
|
|
||||||
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch
|
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch
|
||||||
|
| MAP
|
||||||
deriving (Eq)
|
deriving (Eq)
|
||||||
|
|
||||||
instance Show Metric where
|
instance Show Metric where
|
||||||
@ -75,6 +76,7 @@ instance Show Metric where
|
|||||||
else
|
else
|
||||||
(show nbOfBits))
|
(show nbOfBits))
|
||||||
show CharMatch = "CharMatch"
|
show CharMatch = "CharMatch"
|
||||||
|
show MAP = "MAP"
|
||||||
|
|
||||||
instance Read Metric where
|
instance Read Metric where
|
||||||
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
||||||
@ -90,6 +92,7 @@ instance Read Metric where
|
|||||||
[(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
|
[(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
|
||||||
_ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
|
_ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
|
||||||
readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
|
readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
|
||||||
|
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
|
||||||
|
|
||||||
data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
|
data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
|
||||||
|
|
||||||
@ -103,6 +106,7 @@ getMetricOrdering (FMeasure _) = TheHigherTheBetter
|
|||||||
getMetricOrdering NMI = TheHigherTheBetter
|
getMetricOrdering NMI = TheHigherTheBetter
|
||||||
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
|
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
|
||||||
getMetricOrdering CharMatch = TheHigherTheBetter
|
getMetricOrdering CharMatch = TheHigherTheBetter
|
||||||
|
getMetricOrdering MAP = TheHigherTheBetter
|
||||||
|
|
||||||
defaultOutDirectory = "."
|
defaultOutDirectory = "."
|
||||||
defaultTestName = "test-A"
|
defaultTestName = "test-A"
|
||||||
@ -262,6 +266,12 @@ gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings m
|
|||||||
|
|
||||||
gevalCore' NMI _ = gevalCoreWithoutInput id id id (CC.foldl updateConfusionMatrix M.empty) normalizedMutualInformationFromConfusionMatrix
|
gevalCore' NMI _ = gevalCoreWithoutInput id id id (CC.foldl updateConfusionMatrix M.empty) normalizedMutualInformationFromConfusionMatrix
|
||||||
|
|
||||||
|
gevalCore' MAP _ = gevalCoreWithoutInput (DLS.splitOn "\t" . unpack)
|
||||||
|
(DLS.splitOn "\t" . unpack)
|
||||||
|
(\(e,g) -> calculateMAPForOneResult e g)
|
||||||
|
averageC
|
||||||
|
id
|
||||||
|
|
||||||
gevalCore' (LogLossHashed nbOfBits) _ = helper nbOfBits
|
gevalCore' (LogLossHashed nbOfBits) _ = helper nbOfBits
|
||||||
-- for LogLossHashed we "salt" each hash with the line number
|
-- for LogLossHashed we "salt" each hash with the line number
|
||||||
where helper nbOfBits expectedFilePath outFilePath =
|
where helper nbOfBits expectedFilePath outFilePath =
|
||||||
|
@ -142,6 +142,24 @@ Directory structure
|
|||||||
* `${testName}/expected.tsv` — American reference text for the test set
|
* `${testName}/expected.tsv` — American reference text for the test set
|
||||||
|]
|
|]
|
||||||
|
|
||||||
|
readmeMDContents MAP testName = [i|
|
||||||
|
English word for a Polish word
|
||||||
|
================================================
|
||||||
|
|
||||||
|
Give a (British or American) English equivalent of a Polish word.
|
||||||
|
|
||||||
|
This is a sample challenge for MAP evaluation metric. MAP (Mean Average Precision)
|
||||||
|
is used, mostly in information retrieval, for evaluation of ranked retrieval results.
|
||||||
|
|
||||||
|
The relevant items are separated by TABs (could be just one item) and returned items
|
||||||
|
should be separated by TABs.
|
||||||
|
|
||||||
|
See Christopher D. Manning, Prabhakar Raghavan and Hinrich Schütze,
|
||||||
|
"Introduction to Information Retrieval", Cambridge University Press, 2008 for
|
||||||
|
more discussion of the metric.
|
||||||
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
|
|
||||||
readmeMDContents _ testName = [i|
|
readmeMDContents _ testName = [i|
|
||||||
GEval sample challenge
|
GEval sample challenge
|
||||||
======================
|
======================
|
||||||
@ -209,7 +227,6 @@ trainContents NMI = [hereLit|pl Kto pod kim dołki kopie, ten sam w nie wpada.
|
|||||||
en The pen is mightier than the sword.
|
en The pen is mightier than the sword.
|
||||||
pl Baba z wozu, koniom lżej.
|
pl Baba z wozu, koniom lżej.
|
||||||
|]
|
|]
|
||||||
|
|
||||||
trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota
|
trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota
|
||||||
Basia ma psa
|
Basia ma psa
|
||||||
Nie kupujemy kota w worku
|
Nie kupujemy kota w worku
|
||||||
@ -220,6 +237,11 @@ Camptown race-track five miles long, Oh, doo-dah day!
|
|||||||
I come down dah wid my hat caved in, Doo-dah! doo-dah!
|
I come down dah wid my hat caved in, Doo-dah! doo-dah!
|
||||||
I go back home wid a pocket full of tin, Oh, doo-dah day!
|
I go back home wid a pocket full of tin, Oh, doo-dah day!
|
||||||
|]
|
|]
|
||||||
|
trainContents MAP = [hereLit|honor US honor
|
||||||
|
honour GB honor
|
||||||
|
titbit GB smakołyk
|
||||||
|
tidbit US smakołyk
|
||||||
|
|]
|
||||||
trainContents _ = [hereLit|0.06 0.39 0 0.206
|
trainContents _ = [hereLit|0.06 0.39 0 0.206
|
||||||
1.00 1.00 1 0.017
|
1.00 1.00 1 0.017
|
||||||
317.8 5.20 67 0.048
|
317.8 5.20 67 0.048
|
||||||
@ -247,6 +269,10 @@ devInContents CharMatch = [hereLit|honour to organise
|
|||||||
nothing to change
|
nothing to change
|
||||||
time traveller
|
time traveller
|
||||||
|]
|
|]
|
||||||
|
devInContents MAP = [hereLit|US noc
|
||||||
|
GB wózek dziecięcy
|
||||||
|
GB wizualizować
|
||||||
|
|]
|
||||||
devInContents _ = [hereLit|0.72 0 0.007
|
devInContents _ = [hereLit|0.72 0 0.007
|
||||||
9.54 62 0.054
|
9.54 62 0.054
|
||||||
|]
|
|]
|
||||||
@ -272,6 +298,10 @@ devExpectedContents CharMatch = [hereLit|honor to organize
|
|||||||
nothing to change
|
nothing to change
|
||||||
time traveler
|
time traveler
|
||||||
|]
|
|]
|
||||||
|
devExpectedContents MAP = [hereLit|night nite
|
||||||
|
pram
|
||||||
|
visualise
|
||||||
|
|]
|
||||||
devExpectedContents _ = [hereLit|0.82
|
devExpectedContents _ = [hereLit|0.82
|
||||||
95.2
|
95.2
|
||||||
|]
|
|]
|
||||||
@ -299,6 +329,10 @@ testInContents CharMatch = [hereLit|paralysed by practise
|
|||||||
recognise
|
recognise
|
||||||
nothing
|
nothing
|
||||||
|]
|
|]
|
||||||
|
testInContents MAP = [hereLit|US wózek dziecięcy
|
||||||
|
GB słoń
|
||||||
|
US słoń
|
||||||
|
|]
|
||||||
testInContents _ = [hereLit|1.52 2 0.093
|
testInContents _ = [hereLit|1.52 2 0.093
|
||||||
30.06 14 0.009
|
30.06 14 0.009
|
||||||
|]
|
|]
|
||||||
@ -326,6 +360,10 @@ testExpectedContents CharMatch = [hereLit|paralyzed by practice
|
|||||||
recognize
|
recognize
|
||||||
nothing
|
nothing
|
||||||
|]
|
|]
|
||||||
|
testExpectedContents MAP = [hereLit|trolley
|
||||||
|
elephant
|
||||||
|
elephant
|
||||||
|
|]
|
||||||
testExpectedContents _ = [hereLit|0.11
|
testExpectedContents _ = [hereLit|0.11
|
||||||
17.2
|
17.2
|
||||||
|]
|
|]
|
||||||
|
@ -82,7 +82,7 @@ metricReader = option auto
|
|||||||
<> value defaultMetric
|
<> value defaultMetric
|
||||||
<> showDefault
|
<> showDefault
|
||||||
<> metavar "METRIC"
|
<> metavar "METRIC"
|
||||||
<> help "Metric to be used - RMSE, MSE, Accuracy, F-measure (specify as F1, F2, F0.25, etc.), BLEU, NMI, ClippEU, LogLossHashed or CharMatch" )
|
<> help "Metric to be used - RMSE, MSE, Accuracy, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed or CharMatch" )
|
||||||
|
|
||||||
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
|
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
|
||||||
runGEval args = do
|
runGEval args = do
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
{-# LANGUAGE PartialTypeSignatures #-}
|
{-# LANGUAGE PartialTypeSignatures #-}
|
||||||
|
|
||||||
module GEval.PrecisionRecall(fMeasure, f1Measure, f2Measure, precision, recall,
|
module GEval.PrecisionRecall(calculateMAPForOneResult,
|
||||||
|
fMeasure, f1Measure, f2Measure, precision, recall,
|
||||||
fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder,
|
fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder,
|
||||||
precisionAndRecall, precisionAndRecallFromCounts, maxMatch)
|
precisionAndRecall, precisionAndRecallFromCounts, maxMatch)
|
||||||
where
|
where
|
||||||
@ -10,6 +11,18 @@ import GEval.Common
|
|||||||
import Data.Graph.Inductive
|
import Data.Graph.Inductive
|
||||||
import Data.Graph.Inductive.Query.MaxFlow
|
import Data.Graph.Inductive.Query.MaxFlow
|
||||||
|
|
||||||
|
import Data.List (nub, foldl')
|
||||||
|
|
||||||
|
calculateMAPForOneResult :: (Eq a) => [a] -> [a] -> Double
|
||||||
|
calculateMAPForOneResult expected got = precisionSum / fromIntegral (length expected)
|
||||||
|
where (_, _, precisionSum) = calculateMAPForOneResultCore expected (nub got)
|
||||||
|
calculateMAPForOneResultCore expected got = foldl' (oneMAPStep expected) (0, 0, 0.0) got
|
||||||
|
oneMAPStep expected (gotCount, allCount, precisionSum) gotItem
|
||||||
|
| gotItem `elem` expected = (newGotCount, newAllCount, precisionSum + (newGotCount /. newAllCount))
|
||||||
|
| otherwise = (gotCount, newAllCount, precisionSum)
|
||||||
|
where newGotCount = gotCount + 1
|
||||||
|
newAllCount = allCount + 1
|
||||||
|
|
||||||
f2Measure :: (a -> b -> Bool) -> [a] -> [b] -> Double
|
f2Measure :: (a -> b -> Bool) -> [a] -> [b] -> Double
|
||||||
f2Measure = fMeasure 2.0
|
f2Measure = fMeasure 2.0
|
||||||
|
|
||||||
|
10
test/Spec.hs
10
test/Spec.hs
@ -166,7 +166,15 @@ main = hspec $ do
|
|||||||
runGEvalTest "charmatch-complex" `shouldReturnAlmost` 0.1923076923076923
|
runGEvalTest "charmatch-complex" `shouldReturnAlmost` 0.1923076923076923
|
||||||
it "broken test without input" $ do
|
it "broken test without input" $ do
|
||||||
runGEvalTest "charmatch-no-input" `shouldThrow` (== NoInputFile "test/charmatch-no-input/charmatch-no-input/test-A/in.tsv")
|
runGEvalTest "charmatch-no-input" `shouldThrow` (== NoInputFile "test/charmatch-no-input/charmatch-no-input/test-A/in.tsv")
|
||||||
|
describe "MAP" $ do
|
||||||
|
it "one result" $ do
|
||||||
|
(calculateMAPForOneResult ["Berlin", "London", "Warsaw"]
|
||||||
|
["Warsaw", "Moscow", "Berlin", "Prague"]) `shouldBeAlmost` 0.55555555
|
||||||
|
it "check whether you cannot cheat with duplicated results" $ do
|
||||||
|
(calculateMAPForOneResult ["one", "two"]
|
||||||
|
["one", "one"]) `shouldBeAlmost` 0.5
|
||||||
|
it "simple test" $ do
|
||||||
|
runGEvalTest "map-simple" `shouldReturnAlmost` 0.444444444
|
||||||
|
|
||||||
neverMatch :: Char -> Int -> Bool
|
neverMatch :: Char -> Int -> Bool
|
||||||
neverMatch _ _ = False
|
neverMatch _ _ = False
|
||||||
|
3
test/map-simple/map-simple-solution/test-A/out.tsv
Normal file
3
test/map-simple/map-simple-solution/test-A/out.tsv
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
pink blue
|
||||||
|
red yellow rose black
|
||||||
|
white gray
|
Can't render this file because it has a wrong number of fields in line 2.
|
1
test/map-simple/map-simple/config.txt
Normal file
1
test/map-simple/map-simple/config.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
--metric MAP
|
3
test/map-simple/map-simple/test-A/expected.tsv
Normal file
3
test/map-simple/map-simple/test-A/expected.tsv
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
blue
|
||||||
|
red rose
|
||||||
|
green
|
Can't render this file because it has a wrong number of fields in line 2.
|
Loading…
Reference in New Issue
Block a user