Add Macro-F1 metric

2018-09-27 16:33:35 +02:00 · 2018-09-27 16:33:35 +02:00 · 782c556f8c
commit 782c556f8c
parent 4bf54ed1c5
9 changed files with 95 additions and 1 deletions
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@ -98,7 +98,7 @@ defaultLogLossHashedSize :: Word32
 defaultLogLossHashedSize = 10
 -- | evaluation metric
-data Metric = RMSE | MSE | BLEU | GLEU | WER | Accuracy | ClippEU | FMeasure Double | NMI
+data Metric = RMSE | MSE | BLEU | GLEU | WER | Accuracy | ClippEU | FMeasure Double | MacroFMeasure Double | NMI
              | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
              | BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double
              | MultiLabelLogLoss | MultiLabelLikelihood
@ -113,6 +113,7 @@ instance Show Metric where
  show Accuracy = "Accuracy"
  show ClippEU = "ClippEU"
  show (FMeasure beta) = "F" ++ (show beta)
  show (MacroFMeasure beta) = "Macro-F" ++ (show beta)
  show NMI = "NMI"
  show (LogLossHashed nbOfBits) = "LogLossHashed" ++ (if
                                                       nbOfBits == defaultLogLossHashedSize
@ -149,6 +150,9 @@ instance Read Metric where
  readsPrec p ('F':theRest) = case readsPrec p theRest of
    [(beta, theRest)] -> [(FMeasure beta, theRest)]
    _ -> []
  readsPrec p ('M':'a':'c':'r':'o':'-':'F':theRest) = case readsPrec p theRest of
    [(beta, theRest)] -> [(MacroFMeasure beta, theRest)]
    _ -> []
  readsPrec p ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of
    [(beta, theRest)] -> [(MultiLabelFMeasure beta, theRest)]
    _ -> []
@ -182,6 +186,7 @@ getMetricOrdering WER      = TheLowerTheBetter
 getMetricOrdering Accuracy = TheHigherTheBetter
 getMetricOrdering ClippEU  = TheHigherTheBetter
 getMetricOrdering (FMeasure _) = TheHigherTheBetter
 getMetricOrdering (MacroFMeasure _) = TheHigherTheBetter
 getMetricOrdering NMI = TheHigherTheBetter
 getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
 getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
@ -573,6 +578,35 @@ gevalCore' (FMeasure beta) _ = gevalCoreWithoutInput outParser outParser getCoun
        getCount (False, True)  = (0, 0, 1)
        getCount (False, False) = (0, 0, 0)
 gevalCore' (MacroFMeasure beta) _ = gevalCoreWithoutInput (Right . Just . strip) (Right . predicted . strip) getClassesInvolved gatherClassC macroAverageOnCounts
                      where predicted got =
                              -- first try to parse what we got as a probability distribution
                              -- (like the one used for Likelikehood/LogLossHashed metric)
                              case parseWordSpecs got of
                                Right wordSpecs -> if Prelude.null pairs
                                                   then Nothing
                                                   else Just $ snd $ Prelude.maximum pairs
                                                 where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs
                                Left _ -> Just got
                            getClassesInvolved (Just a, Nothing) = (Nothing, Just a, Nothing)
                            getClassesInvolved (Nothing, Just b) = (Nothing, Nothing, Just b) -- should not occur, for completeness
                            getClassesInvolved (Just a, Just b) = if a == b
                                                                     then (Just a, Just a, Just a)
                                                                     else (Nothing, Just a, Just b)
                            gatherClassC = CC.foldl gatherClassCombiner (M.empty, M.empty, M.empty)
                            gatherClassCombiner (tpMap, expectedMap, gotMap) (tp, expected, got) =
                              (insertMaybeToMap tp tpMap,
                               insertMaybeToMap expected expectedMap,
                               insertMaybeToMap got gotMap)
                            insertMaybeToMap Nothing m = m
                            insertMaybeToMap (Just c) m = M.insertWith (+) c 1 m
                            macroAverageOnCounts (tpMap, expectedMap, gotMap) =
                              (Prelude.sum
                               $ Prelude.map (\c -> fMeasureOnCounts beta (M.lookupDefault 0 c tpMap,
                                                                         M.lookupDefault 0 c expectedMap,
                                                                         M.lookupDefault 0 c gotMap))
                               $ M.keys expectedMap) / (fromIntegral $ Prelude.length $ M.keys expectedMap)
 gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep
  where
    parseClippings = controlledParse lineClippingsParser
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@ -103,6 +103,13 @@ The output value could a probability where value greater than or equal to 0.5 is
 as 1.
 |] ++ (commonReadmeMDContents testName)
 readmeMDContents (MacroFMeasure _) testName = [i|
 GEval sample challenge — guess the language of a first name
 ===========================================================
 This is a sample/toy classification challenge for Gonito framework with Macro-F-measure as the metric.
 |] ++ (commonReadmeMDContents testName)
 readmeMDContents NMI testName = [i|
 Cluster proverbs
 ================
@ -342,6 +349,16 @@ trainContents (FMeasure _) = [hereLit|0	b	b	W	289580	1986	-38	2	a	2	0	1	1	0	0	0
 1	a	a	W	268170	1352	-41	-35	a	1	1	0	0	0	0	0	0	400	400
 |]
 trainContents (MacroFMeasure _) = [hereLit|pl	Stanisław
 en	John
 de	Hans
 pl	Wacław
 pl	Jan
 pl	Kazimierz
 en	Matthew
 en	Richard
 |]
 trainContents NMI = [hereLit|pl	Kto pod kim dołki kopie, ten sam w nie wpada.
 en	The pen is mightier than the sword.
 pl	Baba z wozu, koniom lżej.
@ -403,6 +420,10 @@ When the going gets tough, the tough get going.
 devInContents (FMeasure _) = [hereLit|b	b	W	29520	779	-28	-32	a	0	0	0	0	0	0	0	0	0	0
 b	b	W	55200	1259	35	9	a	1	0	1	0	0	0	0	0	4000	4000
 |]
 devInContents (MacroFMeasure _) = [hereLit|Władysław
 Steven
 Helmut
 |]
 devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b)
 devInContents (LogLossHashed _) = [hereLit|Nie kupuj	w worku
 Ona	psa
@ -447,6 +468,10 @@ Y
 devExpectedContents (FMeasure _) = [hereLit|0
 1
 |]
 devExpectedContents (MacroFMeasure _) = [hereLit|pl
 en
 de
 |]
 devExpectedContents NMI = [hereLit|en
 pl
 en
@ -495,6 +520,10 @@ testInContents Accuracy = [hereLit|2	mild	yes
 testInContents (FMeasure _) = [hereLit|b	b	W	15210	527	-64	-56	a	0	0	0	0	0	0	0	0	0	0
 b	b	N	38060	486	357	189	b	0	0	0	0	0	0	0	0	0	0
 |]
 testInContents (MacroFMeasure _) = [hereLit|Arkadiusz
 Heinrich
 Henry
 |]
 testInContents NMI = [hereLit|Fortune favors the bold.
 People who live in glass houses should not throw stones.
 W marcu, jak w garncu.
@ -542,6 +571,10 @@ Y
 testExpectedContents (FMeasure _) = [hereLit|0
 0
 |]
 testExpectedContents (MacroFMeasure _) = [hereLit|pl
 de
 en
 |]
 testExpectedContents NMI = [hereLit|en
 en
 pl
--- a/test/Spec.hs
+++ b/test/Spec.hs
@ -107,6 +107,11 @@ main = hspec $ do
      runGEvalTest "f-measure-all-false" `shouldReturnAlmost` 1.0
    it "F2-measure" $
      runGEvalTest "f2-simple" `shouldReturnAlmost` 0.714285714
  describe "Macro-F-measure" $ do
    it "simple example" $
      runGEvalTest "macro-f1-simple" `shouldReturnAlmost` 0.266666
    it "perfect soltion" $
      runGEvalTest "macro-f-measure-perfect" `shouldReturnAlmost` 1.00000
  describe "precision count" $ do
    it "simple test" $ do
      precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2
--- a/test/macro-f-measure-perfect/macro-f-measure-perfect-solution/test-A/out.tsv
+++ b/test/macro-f-measure-perfect/macro-f-measure-perfect-solution/test-A/out.tsv
@ -0,0 +1,4 @@
 B:0.4 A:0.6
 A:1.0
 B:0.4 C:0.3 A:0.3
 C:0.8 A:0.1 B:0.1
--- a/test/macro-f-measure-perfect/macro-f-measure-perfect/config.txt
+++ b/test/macro-f-measure-perfect/macro-f-measure-perfect/config.txt
@ -0,0 +1 @@
 --metric Macro-F2.5
--- a/test/macro-f-measure-perfect/macro-f-measure-perfect/test-A/expected.tsv
+++ b/test/macro-f-measure-perfect/macro-f-measure-perfect/test-A/expected.tsv
@ -0,0 +1,4 @@
 A
 A
 B
 C
--- a/test/macro-f1-simple/macro-f1-simple-solution/test-A/out.tsv
+++ b/test/macro-f1-simple/macro-f1-simple-solution/test-A/out.tsv
@ -0,0 +1,6 @@
 0
 2
 1
 0
 0
 1
--- a/test/macro-f1-simple/macro-f1-simple/config.txt
+++ b/test/macro-f1-simple/macro-f1-simple/config.txt
@ -0,0 +1 @@
 --metric Macro-F1
--- a/test/macro-f1-simple/macro-f1-simple/test-A/expected.tsv
+++ b/test/macro-f1-simple/macro-f1-simple/test-A/expected.tsv
@ -0,0 +1,6 @@
 0
 1
 2
 0
 1
 2