Add Macro-F1 metric
This commit is contained in:
parent
4bf54ed1c5
commit
782c556f8c
@ -98,7 +98,7 @@ defaultLogLossHashedSize :: Word32
|
|||||||
defaultLogLossHashedSize = 10
|
defaultLogLossHashedSize = 10
|
||||||
|
|
||||||
-- | evaluation metric
|
-- | evaluation metric
|
||||||
data Metric = RMSE | MSE | BLEU | GLEU | WER | Accuracy | ClippEU | FMeasure Double | NMI
|
data Metric = RMSE | MSE | BLEU | GLEU | WER | Accuracy | ClippEU | FMeasure Double | MacroFMeasure Double | NMI
|
||||||
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
||||||
| BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double
|
| BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double
|
||||||
| MultiLabelLogLoss | MultiLabelLikelihood
|
| MultiLabelLogLoss | MultiLabelLikelihood
|
||||||
@ -113,6 +113,7 @@ instance Show Metric where
|
|||||||
show Accuracy = "Accuracy"
|
show Accuracy = "Accuracy"
|
||||||
show ClippEU = "ClippEU"
|
show ClippEU = "ClippEU"
|
||||||
show (FMeasure beta) = "F" ++ (show beta)
|
show (FMeasure beta) = "F" ++ (show beta)
|
||||||
|
show (MacroFMeasure beta) = "Macro-F" ++ (show beta)
|
||||||
show NMI = "NMI"
|
show NMI = "NMI"
|
||||||
show (LogLossHashed nbOfBits) = "LogLossHashed" ++ (if
|
show (LogLossHashed nbOfBits) = "LogLossHashed" ++ (if
|
||||||
nbOfBits == defaultLogLossHashedSize
|
nbOfBits == defaultLogLossHashedSize
|
||||||
@ -149,6 +150,9 @@ instance Read Metric where
|
|||||||
readsPrec p ('F':theRest) = case readsPrec p theRest of
|
readsPrec p ('F':theRest) = case readsPrec p theRest of
|
||||||
[(beta, theRest)] -> [(FMeasure beta, theRest)]
|
[(beta, theRest)] -> [(FMeasure beta, theRest)]
|
||||||
_ -> []
|
_ -> []
|
||||||
|
readsPrec p ('M':'a':'c':'r':'o':'-':'F':theRest) = case readsPrec p theRest of
|
||||||
|
[(beta, theRest)] -> [(MacroFMeasure beta, theRest)]
|
||||||
|
_ -> []
|
||||||
readsPrec p ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of
|
readsPrec p ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of
|
||||||
[(beta, theRest)] -> [(MultiLabelFMeasure beta, theRest)]
|
[(beta, theRest)] -> [(MultiLabelFMeasure beta, theRest)]
|
||||||
_ -> []
|
_ -> []
|
||||||
@ -182,6 +186,7 @@ getMetricOrdering WER = TheLowerTheBetter
|
|||||||
getMetricOrdering Accuracy = TheHigherTheBetter
|
getMetricOrdering Accuracy = TheHigherTheBetter
|
||||||
getMetricOrdering ClippEU = TheHigherTheBetter
|
getMetricOrdering ClippEU = TheHigherTheBetter
|
||||||
getMetricOrdering (FMeasure _) = TheHigherTheBetter
|
getMetricOrdering (FMeasure _) = TheHigherTheBetter
|
||||||
|
getMetricOrdering (MacroFMeasure _) = TheHigherTheBetter
|
||||||
getMetricOrdering NMI = TheHigherTheBetter
|
getMetricOrdering NMI = TheHigherTheBetter
|
||||||
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
|
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
|
||||||
getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
|
getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
|
||||||
@ -573,6 +578,35 @@ gevalCore' (FMeasure beta) _ = gevalCoreWithoutInput outParser outParser getCoun
|
|||||||
getCount (False, True) = (0, 0, 1)
|
getCount (False, True) = (0, 0, 1)
|
||||||
getCount (False, False) = (0, 0, 0)
|
getCount (False, False) = (0, 0, 0)
|
||||||
|
|
||||||
|
gevalCore' (MacroFMeasure beta) _ = gevalCoreWithoutInput (Right . Just . strip) (Right . predicted . strip) getClassesInvolved gatherClassC macroAverageOnCounts
|
||||||
|
where predicted got =
|
||||||
|
-- first try to parse what we got as a probability distribution
|
||||||
|
-- (like the one used for Likelikehood/LogLossHashed metric)
|
||||||
|
case parseWordSpecs got of
|
||||||
|
Right wordSpecs -> if Prelude.null pairs
|
||||||
|
then Nothing
|
||||||
|
else Just $ snd $ Prelude.maximum pairs
|
||||||
|
where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs
|
||||||
|
Left _ -> Just got
|
||||||
|
getClassesInvolved (Just a, Nothing) = (Nothing, Just a, Nothing)
|
||||||
|
getClassesInvolved (Nothing, Just b) = (Nothing, Nothing, Just b) -- should not occur, for completeness
|
||||||
|
getClassesInvolved (Just a, Just b) = if a == b
|
||||||
|
then (Just a, Just a, Just a)
|
||||||
|
else (Nothing, Just a, Just b)
|
||||||
|
gatherClassC = CC.foldl gatherClassCombiner (M.empty, M.empty, M.empty)
|
||||||
|
gatherClassCombiner (tpMap, expectedMap, gotMap) (tp, expected, got) =
|
||||||
|
(insertMaybeToMap tp tpMap,
|
||||||
|
insertMaybeToMap expected expectedMap,
|
||||||
|
insertMaybeToMap got gotMap)
|
||||||
|
insertMaybeToMap Nothing m = m
|
||||||
|
insertMaybeToMap (Just c) m = M.insertWith (+) c 1 m
|
||||||
|
macroAverageOnCounts (tpMap, expectedMap, gotMap) =
|
||||||
|
(Prelude.sum
|
||||||
|
$ Prelude.map (\c -> fMeasureOnCounts beta (M.lookupDefault 0 c tpMap,
|
||||||
|
M.lookupDefault 0 c expectedMap,
|
||||||
|
M.lookupDefault 0 c gotMap))
|
||||||
|
$ M.keys expectedMap) / (fromIntegral $ Prelude.length $ M.keys expectedMap)
|
||||||
|
|
||||||
gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep
|
gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep
|
||||||
where
|
where
|
||||||
parseClippings = controlledParse lineClippingsParser
|
parseClippings = controlledParse lineClippingsParser
|
||||||
|
@ -103,6 +103,13 @@ The output value could a probability where value greater than or equal to 0.5 is
|
|||||||
as 1.
|
as 1.
|
||||||
|] ++ (commonReadmeMDContents testName)
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
|
readmeMDContents (MacroFMeasure _) testName = [i|
|
||||||
|
GEval sample challenge — guess the language of a first name
|
||||||
|
===========================================================
|
||||||
|
|
||||||
|
This is a sample/toy classification challenge for Gonito framework with Macro-F-measure as the metric.
|
||||||
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
readmeMDContents NMI testName = [i|
|
readmeMDContents NMI testName = [i|
|
||||||
Cluster proverbs
|
Cluster proverbs
|
||||||
================
|
================
|
||||||
@ -342,6 +349,16 @@ trainContents (FMeasure _) = [hereLit|0 b b W 289580 1986 -38 2 a 2 0 1 1 0 0 0
|
|||||||
1 a a W 268170 1352 -41 -35 a 1 1 0 0 0 0 0 0 400 400
|
1 a a W 268170 1352 -41 -35 a 1 1 0 0 0 0 0 0 400 400
|
||||||
|]
|
|]
|
||||||
|
|
||||||
|
trainContents (MacroFMeasure _) = [hereLit|pl Stanisław
|
||||||
|
en John
|
||||||
|
de Hans
|
||||||
|
pl Wacław
|
||||||
|
pl Jan
|
||||||
|
pl Kazimierz
|
||||||
|
en Matthew
|
||||||
|
en Richard
|
||||||
|
|]
|
||||||
|
|
||||||
trainContents NMI = [hereLit|pl Kto pod kim dołki kopie, ten sam w nie wpada.
|
trainContents NMI = [hereLit|pl Kto pod kim dołki kopie, ten sam w nie wpada.
|
||||||
en The pen is mightier than the sword.
|
en The pen is mightier than the sword.
|
||||||
pl Baba z wozu, koniom lżej.
|
pl Baba z wozu, koniom lżej.
|
||||||
@ -403,6 +420,10 @@ When the going gets tough, the tough get going.
|
|||||||
devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0
|
devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0
|
||||||
b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000
|
b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000
|
||||||
|]
|
|]
|
||||||
|
devInContents (MacroFMeasure _) = [hereLit|Władysław
|
||||||
|
Steven
|
||||||
|
Helmut
|
||||||
|
|]
|
||||||
devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b)
|
devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b)
|
||||||
devInContents (LogLossHashed _) = [hereLit|Nie kupuj w worku
|
devInContents (LogLossHashed _) = [hereLit|Nie kupuj w worku
|
||||||
Ona psa
|
Ona psa
|
||||||
@ -447,6 +468,10 @@ Y
|
|||||||
devExpectedContents (FMeasure _) = [hereLit|0
|
devExpectedContents (FMeasure _) = [hereLit|0
|
||||||
1
|
1
|
||||||
|]
|
|]
|
||||||
|
devExpectedContents (MacroFMeasure _) = [hereLit|pl
|
||||||
|
en
|
||||||
|
de
|
||||||
|
|]
|
||||||
devExpectedContents NMI = [hereLit|en
|
devExpectedContents NMI = [hereLit|en
|
||||||
pl
|
pl
|
||||||
en
|
en
|
||||||
@ -495,6 +520,10 @@ testInContents Accuracy = [hereLit|2 mild yes
|
|||||||
testInContents (FMeasure _) = [hereLit|b b W 15210 527 -64 -56 a 0 0 0 0 0 0 0 0 0 0
|
testInContents (FMeasure _) = [hereLit|b b W 15210 527 -64 -56 a 0 0 0 0 0 0 0 0 0 0
|
||||||
b b N 38060 486 357 189 b 0 0 0 0 0 0 0 0 0 0
|
b b N 38060 486 357 189 b 0 0 0 0 0 0 0 0 0 0
|
||||||
|]
|
|]
|
||||||
|
testInContents (MacroFMeasure _) = [hereLit|Arkadiusz
|
||||||
|
Heinrich
|
||||||
|
Henry
|
||||||
|
|]
|
||||||
testInContents NMI = [hereLit|Fortune favors the bold.
|
testInContents NMI = [hereLit|Fortune favors the bold.
|
||||||
People who live in glass houses should not throw stones.
|
People who live in glass houses should not throw stones.
|
||||||
W marcu, jak w garncu.
|
W marcu, jak w garncu.
|
||||||
@ -542,6 +571,10 @@ Y
|
|||||||
testExpectedContents (FMeasure _) = [hereLit|0
|
testExpectedContents (FMeasure _) = [hereLit|0
|
||||||
0
|
0
|
||||||
|]
|
|]
|
||||||
|
testExpectedContents (MacroFMeasure _) = [hereLit|pl
|
||||||
|
de
|
||||||
|
en
|
||||||
|
|]
|
||||||
testExpectedContents NMI = [hereLit|en
|
testExpectedContents NMI = [hereLit|en
|
||||||
en
|
en
|
||||||
pl
|
pl
|
||||||
|
@ -107,6 +107,11 @@ main = hspec $ do
|
|||||||
runGEvalTest "f-measure-all-false" `shouldReturnAlmost` 1.0
|
runGEvalTest "f-measure-all-false" `shouldReturnAlmost` 1.0
|
||||||
it "F2-measure" $
|
it "F2-measure" $
|
||||||
runGEvalTest "f2-simple" `shouldReturnAlmost` 0.714285714
|
runGEvalTest "f2-simple" `shouldReturnAlmost` 0.714285714
|
||||||
|
describe "Macro-F-measure" $ do
|
||||||
|
it "simple example" $
|
||||||
|
runGEvalTest "macro-f1-simple" `shouldReturnAlmost` 0.266666
|
||||||
|
it "perfect soltion" $
|
||||||
|
runGEvalTest "macro-f-measure-perfect" `shouldReturnAlmost` 1.00000
|
||||||
describe "precision count" $ do
|
describe "precision count" $ do
|
||||||
it "simple test" $ do
|
it "simple test" $ do
|
||||||
precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2
|
precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2
|
||||||
|
@ -0,0 +1,4 @@
|
|||||||
|
B:0.4 A:0.6
|
||||||
|
A:1.0
|
||||||
|
B:0.4 C:0.3 A:0.3
|
||||||
|
C:0.8 A:0.1 B:0.1
|
|
@ -0,0 +1 @@
|
|||||||
|
--metric Macro-F2.5
|
@ -0,0 +1,4 @@
|
|||||||
|
A
|
||||||
|
A
|
||||||
|
B
|
||||||
|
C
|
|
@ -0,0 +1,6 @@
|
|||||||
|
0
|
||||||
|
2
|
||||||
|
1
|
||||||
|
0
|
||||||
|
0
|
||||||
|
1
|
|
1
test/macro-f1-simple/macro-f1-simple/config.txt
Normal file
1
test/macro-f1-simple/macro-f1-simple/config.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
--metric Macro-F1
|
6
test/macro-f1-simple/macro-f1-simple/test-A/expected.tsv
Normal file
6
test/macro-f1-simple/macro-f1-simple/test-A/expected.tsv
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
0
|
||||||
|
1
|
||||||
|
2
|
||||||
|
0
|
||||||
|
1
|
||||||
|
2
|
|
Loading…
Reference in New Issue
Block a user