Add Macro-F1 metric
This commit is contained in:
parent
4bf54ed1c5
commit
782c556f8c
@ -98,7 +98,7 @@ defaultLogLossHashedSize :: Word32
|
||||
defaultLogLossHashedSize = 10
|
||||
|
||||
-- | evaluation metric
|
||||
data Metric = RMSE | MSE | BLEU | GLEU | WER | Accuracy | ClippEU | FMeasure Double | NMI
|
||||
data Metric = RMSE | MSE | BLEU | GLEU | WER | Accuracy | ClippEU | FMeasure Double | MacroFMeasure Double | NMI
|
||||
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
||||
| BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double
|
||||
| MultiLabelLogLoss | MultiLabelLikelihood
|
||||
@ -113,6 +113,7 @@ instance Show Metric where
|
||||
show Accuracy = "Accuracy"
|
||||
show ClippEU = "ClippEU"
|
||||
show (FMeasure beta) = "F" ++ (show beta)
|
||||
show (MacroFMeasure beta) = "Macro-F" ++ (show beta)
|
||||
show NMI = "NMI"
|
||||
show (LogLossHashed nbOfBits) = "LogLossHashed" ++ (if
|
||||
nbOfBits == defaultLogLossHashedSize
|
||||
@ -149,6 +150,9 @@ instance Read Metric where
|
||||
readsPrec p ('F':theRest) = case readsPrec p theRest of
|
||||
[(beta, theRest)] -> [(FMeasure beta, theRest)]
|
||||
_ -> []
|
||||
readsPrec p ('M':'a':'c':'r':'o':'-':'F':theRest) = case readsPrec p theRest of
|
||||
[(beta, theRest)] -> [(MacroFMeasure beta, theRest)]
|
||||
_ -> []
|
||||
readsPrec p ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of
|
||||
[(beta, theRest)] -> [(MultiLabelFMeasure beta, theRest)]
|
||||
_ -> []
|
||||
@ -182,6 +186,7 @@ getMetricOrdering WER = TheLowerTheBetter
|
||||
getMetricOrdering Accuracy = TheHigherTheBetter
|
||||
getMetricOrdering ClippEU = TheHigherTheBetter
|
||||
getMetricOrdering (FMeasure _) = TheHigherTheBetter
|
||||
getMetricOrdering (MacroFMeasure _) = TheHigherTheBetter
|
||||
getMetricOrdering NMI = TheHigherTheBetter
|
||||
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
|
||||
getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
|
||||
@ -573,6 +578,35 @@ gevalCore' (FMeasure beta) _ = gevalCoreWithoutInput outParser outParser getCoun
|
||||
getCount (False, True) = (0, 0, 1)
|
||||
getCount (False, False) = (0, 0, 0)
|
||||
|
||||
gevalCore' (MacroFMeasure beta) _ = gevalCoreWithoutInput (Right . Just . strip) (Right . predicted . strip) getClassesInvolved gatherClassC macroAverageOnCounts
|
||||
where predicted got =
|
||||
-- first try to parse what we got as a probability distribution
|
||||
-- (like the one used for Likelikehood/LogLossHashed metric)
|
||||
case parseWordSpecs got of
|
||||
Right wordSpecs -> if Prelude.null pairs
|
||||
then Nothing
|
||||
else Just $ snd $ Prelude.maximum pairs
|
||||
where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs
|
||||
Left _ -> Just got
|
||||
getClassesInvolved (Just a, Nothing) = (Nothing, Just a, Nothing)
|
||||
getClassesInvolved (Nothing, Just b) = (Nothing, Nothing, Just b) -- should not occur, for completeness
|
||||
getClassesInvolved (Just a, Just b) = if a == b
|
||||
then (Just a, Just a, Just a)
|
||||
else (Nothing, Just a, Just b)
|
||||
gatherClassC = CC.foldl gatherClassCombiner (M.empty, M.empty, M.empty)
|
||||
gatherClassCombiner (tpMap, expectedMap, gotMap) (tp, expected, got) =
|
||||
(insertMaybeToMap tp tpMap,
|
||||
insertMaybeToMap expected expectedMap,
|
||||
insertMaybeToMap got gotMap)
|
||||
insertMaybeToMap Nothing m = m
|
||||
insertMaybeToMap (Just c) m = M.insertWith (+) c 1 m
|
||||
macroAverageOnCounts (tpMap, expectedMap, gotMap) =
|
||||
(Prelude.sum
|
||||
$ Prelude.map (\c -> fMeasureOnCounts beta (M.lookupDefault 0 c tpMap,
|
||||
M.lookupDefault 0 c expectedMap,
|
||||
M.lookupDefault 0 c gotMap))
|
||||
$ M.keys expectedMap) / (fromIntegral $ Prelude.length $ M.keys expectedMap)
|
||||
|
||||
gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep
|
||||
where
|
||||
parseClippings = controlledParse lineClippingsParser
|
||||
|
@ -103,6 +103,13 @@ The output value could a probability where value greater than or equal to 0.5 is
|
||||
as 1.
|
||||
|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents (MacroFMeasure _) testName = [i|
|
||||
GEval sample challenge — guess the language of a first name
|
||||
===========================================================
|
||||
|
||||
This is a sample/toy classification challenge for Gonito framework with Macro-F-measure as the metric.
|
||||
|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents NMI testName = [i|
|
||||
Cluster proverbs
|
||||
================
|
||||
@ -342,6 +349,16 @@ trainContents (FMeasure _) = [hereLit|0 b b W 289580 1986 -38 2 a 2 0 1 1 0 0 0
|
||||
1 a a W 268170 1352 -41 -35 a 1 1 0 0 0 0 0 0 400 400
|
||||
|]
|
||||
|
||||
trainContents (MacroFMeasure _) = [hereLit|pl Stanisław
|
||||
en John
|
||||
de Hans
|
||||
pl Wacław
|
||||
pl Jan
|
||||
pl Kazimierz
|
||||
en Matthew
|
||||
en Richard
|
||||
|]
|
||||
|
||||
trainContents NMI = [hereLit|pl Kto pod kim dołki kopie, ten sam w nie wpada.
|
||||
en The pen is mightier than the sword.
|
||||
pl Baba z wozu, koniom lżej.
|
||||
@ -403,6 +420,10 @@ When the going gets tough, the tough get going.
|
||||
devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0
|
||||
b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000
|
||||
|]
|
||||
devInContents (MacroFMeasure _) = [hereLit|Władysław
|
||||
Steven
|
||||
Helmut
|
||||
|]
|
||||
devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b)
|
||||
devInContents (LogLossHashed _) = [hereLit|Nie kupuj w worku
|
||||
Ona psa
|
||||
@ -447,6 +468,10 @@ Y
|
||||
devExpectedContents (FMeasure _) = [hereLit|0
|
||||
1
|
||||
|]
|
||||
devExpectedContents (MacroFMeasure _) = [hereLit|pl
|
||||
en
|
||||
de
|
||||
|]
|
||||
devExpectedContents NMI = [hereLit|en
|
||||
pl
|
||||
en
|
||||
@ -495,6 +520,10 @@ testInContents Accuracy = [hereLit|2 mild yes
|
||||
testInContents (FMeasure _) = [hereLit|b b W 15210 527 -64 -56 a 0 0 0 0 0 0 0 0 0 0
|
||||
b b N 38060 486 357 189 b 0 0 0 0 0 0 0 0 0 0
|
||||
|]
|
||||
testInContents (MacroFMeasure _) = [hereLit|Arkadiusz
|
||||
Heinrich
|
||||
Henry
|
||||
|]
|
||||
testInContents NMI = [hereLit|Fortune favors the bold.
|
||||
People who live in glass houses should not throw stones.
|
||||
W marcu, jak w garncu.
|
||||
@ -542,6 +571,10 @@ Y
|
||||
testExpectedContents (FMeasure _) = [hereLit|0
|
||||
0
|
||||
|]
|
||||
testExpectedContents (MacroFMeasure _) = [hereLit|pl
|
||||
de
|
||||
en
|
||||
|]
|
||||
testExpectedContents NMI = [hereLit|en
|
||||
en
|
||||
pl
|
||||
|
@ -107,6 +107,11 @@ main = hspec $ do
|
||||
runGEvalTest "f-measure-all-false" `shouldReturnAlmost` 1.0
|
||||
it "F2-measure" $
|
||||
runGEvalTest "f2-simple" `shouldReturnAlmost` 0.714285714
|
||||
describe "Macro-F-measure" $ do
|
||||
it "simple example" $
|
||||
runGEvalTest "macro-f1-simple" `shouldReturnAlmost` 0.266666
|
||||
it "perfect soltion" $
|
||||
runGEvalTest "macro-f-measure-perfect" `shouldReturnAlmost` 1.00000
|
||||
describe "precision count" $ do
|
||||
it "simple test" $ do
|
||||
precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2
|
||||
|
@ -0,0 +1,4 @@
|
||||
B:0.4 A:0.6
|
||||
A:1.0
|
||||
B:0.4 C:0.3 A:0.3
|
||||
C:0.8 A:0.1 B:0.1
|
|
@ -0,0 +1 @@
|
||||
--metric Macro-F2.5
|
@ -0,0 +1,4 @@
|
||||
A
|
||||
A
|
||||
B
|
||||
C
|
|
@ -0,0 +1,6 @@
|
||||
0
|
||||
2
|
||||
1
|
||||
0
|
||||
0
|
||||
1
|
|
1
test/macro-f1-simple/macro-f1-simple/config.txt
Normal file
1
test/macro-f1-simple/macro-f1-simple/config.txt
Normal file
@ -0,0 +1 @@
|
||||
--metric Macro-F1
|
6
test/macro-f1-simple/macro-f1-simple/test-A/expected.tsv
Normal file
6
test/macro-f1-simple/macro-f1-simple/test-A/expected.tsv
Normal file
@ -0,0 +1,6 @@
|
||||
0
|
||||
1
|
||||
2
|
||||
0
|
||||
1
|
||||
2
|
|
Loading…
Reference in New Issue
Block a user