diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 8dab4b7..380332f 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -98,7 +98,7 @@ defaultLogLossHashedSize :: Word32 defaultLogLossHashedSize = 10 -- | evaluation metric -data Metric = RMSE | MSE | BLEU | GLEU | WER | Accuracy | ClippEU | FMeasure Double | NMI +data Metric = RMSE | MSE | BLEU | GLEU | WER | Accuracy | ClippEU | FMeasure Double | MacroFMeasure Double | NMI | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood | BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double | MultiLabelLogLoss | MultiLabelLikelihood @@ -113,6 +113,7 @@ instance Show Metric where show Accuracy = "Accuracy" show ClippEU = "ClippEU" show (FMeasure beta) = "F" ++ (show beta) + show (MacroFMeasure beta) = "Macro-F" ++ (show beta) show NMI = "NMI" show (LogLossHashed nbOfBits) = "LogLossHashed" ++ (if nbOfBits == defaultLogLossHashedSize @@ -149,6 +150,9 @@ instance Read Metric where readsPrec p ('F':theRest) = case readsPrec p theRest of [(beta, theRest)] -> [(FMeasure beta, theRest)] _ -> [] + readsPrec p ('M':'a':'c':'r':'o':'-':'F':theRest) = case readsPrec p theRest of + [(beta, theRest)] -> [(MacroFMeasure beta, theRest)] + _ -> [] readsPrec p ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of [(beta, theRest)] -> [(MultiLabelFMeasure beta, theRest)] _ -> [] @@ -182,6 +186,7 @@ getMetricOrdering WER = TheLowerTheBetter getMetricOrdering Accuracy = TheHigherTheBetter getMetricOrdering ClippEU = TheHigherTheBetter getMetricOrdering (FMeasure _) = TheHigherTheBetter +getMetricOrdering (MacroFMeasure _) = TheHigherTheBetter getMetricOrdering NMI = TheHigherTheBetter getMetricOrdering (LogLossHashed _) = TheLowerTheBetter getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter @@ -573,6 +578,35 @@ gevalCore' (FMeasure beta) _ = gevalCoreWithoutInput outParser outParser getCoun getCount (False, True) = (0, 0, 1) getCount (False, False) = (0, 0, 0) +gevalCore' (MacroFMeasure beta) _ = gevalCoreWithoutInput (Right . Just . strip) (Right . predicted . strip) getClassesInvolved gatherClassC macroAverageOnCounts + where predicted got = + -- first try to parse what we got as a probability distribution + -- (like the one used for Likelikehood/LogLossHashed metric) + case parseWordSpecs got of + Right wordSpecs -> if Prelude.null pairs + then Nothing + else Just $ snd $ Prelude.maximum pairs + where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs + Left _ -> Just got + getClassesInvolved (Just a, Nothing) = (Nothing, Just a, Nothing) + getClassesInvolved (Nothing, Just b) = (Nothing, Nothing, Just b) -- should not occur, for completeness + getClassesInvolved (Just a, Just b) = if a == b + then (Just a, Just a, Just a) + else (Nothing, Just a, Just b) + gatherClassC = CC.foldl gatherClassCombiner (M.empty, M.empty, M.empty) + gatherClassCombiner (tpMap, expectedMap, gotMap) (tp, expected, got) = + (insertMaybeToMap tp tpMap, + insertMaybeToMap expected expectedMap, + insertMaybeToMap got gotMap) + insertMaybeToMap Nothing m = m + insertMaybeToMap (Just c) m = M.insertWith (+) c 1 m + macroAverageOnCounts (tpMap, expectedMap, gotMap) = + (Prelude.sum + $ Prelude.map (\c -> fMeasureOnCounts beta (M.lookupDefault 0 c tpMap, + M.lookupDefault 0 c expectedMap, + M.lookupDefault 0 c gotMap)) + $ M.keys expectedMap) / (fromIntegral $ Prelude.length $ M.keys expectedMap) + gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep where parseClippings = controlledParse lineClippingsParser diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index de13af9..dbd114d 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -103,6 +103,13 @@ The output value could a probability where value greater than or equal to 0.5 is as 1. |] ++ (commonReadmeMDContents testName) +readmeMDContents (MacroFMeasure _) testName = [i| +GEval sample challenge — guess the language of a first name +=========================================================== + +This is a sample/toy classification challenge for Gonito framework with Macro-F-measure as the metric. +|] ++ (commonReadmeMDContents testName) + readmeMDContents NMI testName = [i| Cluster proverbs ================ @@ -342,6 +349,16 @@ trainContents (FMeasure _) = [hereLit|0 b b W 289580 1986 -38 2 a 2 0 1 1 0 0 0 1 a a W 268170 1352 -41 -35 a 1 1 0 0 0 0 0 0 400 400 |] +trainContents (MacroFMeasure _) = [hereLit|pl Stanisław +en John +de Hans +pl Wacław +pl Jan +pl Kazimierz +en Matthew +en Richard +|] + trainContents NMI = [hereLit|pl Kto pod kim dołki kopie, ten sam w nie wpada. en The pen is mightier than the sword. pl Baba z wozu, koniom lżej. @@ -403,6 +420,10 @@ When the going gets tough, the tough get going. devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0 b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000 |] +devInContents (MacroFMeasure _) = [hereLit|Władysław +Steven +Helmut +|] devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b) devInContents (LogLossHashed _) = [hereLit|Nie kupuj w worku Ona psa @@ -447,6 +468,10 @@ Y devExpectedContents (FMeasure _) = [hereLit|0 1 |] +devExpectedContents (MacroFMeasure _) = [hereLit|pl +en +de +|] devExpectedContents NMI = [hereLit|en pl en @@ -495,6 +520,10 @@ testInContents Accuracy = [hereLit|2 mild yes testInContents (FMeasure _) = [hereLit|b b W 15210 527 -64 -56 a 0 0 0 0 0 0 0 0 0 0 b b N 38060 486 357 189 b 0 0 0 0 0 0 0 0 0 0 |] +testInContents (MacroFMeasure _) = [hereLit|Arkadiusz +Heinrich +Henry +|] testInContents NMI = [hereLit|Fortune favors the bold. People who live in glass houses should not throw stones. W marcu, jak w garncu. @@ -542,6 +571,10 @@ Y testExpectedContents (FMeasure _) = [hereLit|0 0 |] +testExpectedContents (MacroFMeasure _) = [hereLit|pl +de +en +|] testExpectedContents NMI = [hereLit|en en pl diff --git a/test/Spec.hs b/test/Spec.hs index 6dc5721..f4a3bfb 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -107,6 +107,11 @@ main = hspec $ do runGEvalTest "f-measure-all-false" `shouldReturnAlmost` 1.0 it "F2-measure" $ runGEvalTest "f2-simple" `shouldReturnAlmost` 0.714285714 + describe "Macro-F-measure" $ do + it "simple example" $ + runGEvalTest "macro-f1-simple" `shouldReturnAlmost` 0.266666 + it "perfect soltion" $ + runGEvalTest "macro-f-measure-perfect" `shouldReturnAlmost` 1.00000 describe "precision count" $ do it "simple test" $ do precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2 diff --git a/test/macro-f-measure-perfect/macro-f-measure-perfect-solution/test-A/out.tsv b/test/macro-f-measure-perfect/macro-f-measure-perfect-solution/test-A/out.tsv new file mode 100644 index 0000000..06fc404 --- /dev/null +++ b/test/macro-f-measure-perfect/macro-f-measure-perfect-solution/test-A/out.tsv @@ -0,0 +1,4 @@ +B:0.4 A:0.6 +A:1.0 +B:0.4 C:0.3 A:0.3 +C:0.8 A:0.1 B:0.1 diff --git a/test/macro-f-measure-perfect/macro-f-measure-perfect/config.txt b/test/macro-f-measure-perfect/macro-f-measure-perfect/config.txt new file mode 100644 index 0000000..9a6e51d --- /dev/null +++ b/test/macro-f-measure-perfect/macro-f-measure-perfect/config.txt @@ -0,0 +1 @@ +--metric Macro-F2.5 diff --git a/test/macro-f-measure-perfect/macro-f-measure-perfect/test-A/expected.tsv b/test/macro-f-measure-perfect/macro-f-measure-perfect/test-A/expected.tsv new file mode 100644 index 0000000..5e5eaae --- /dev/null +++ b/test/macro-f-measure-perfect/macro-f-measure-perfect/test-A/expected.tsv @@ -0,0 +1,4 @@ +A +A +B +C diff --git a/test/macro-f1-simple/macro-f1-simple-solution/test-A/out.tsv b/test/macro-f1-simple/macro-f1-simple-solution/test-A/out.tsv new file mode 100644 index 0000000..18680a6 --- /dev/null +++ b/test/macro-f1-simple/macro-f1-simple-solution/test-A/out.tsv @@ -0,0 +1,6 @@ +0 +2 +1 +0 +0 +1 diff --git a/test/macro-f1-simple/macro-f1-simple/config.txt b/test/macro-f1-simple/macro-f1-simple/config.txt new file mode 100644 index 0000000..a08dfdf --- /dev/null +++ b/test/macro-f1-simple/macro-f1-simple/config.txt @@ -0,0 +1 @@ +--metric Macro-F1 diff --git a/test/macro-f1-simple/macro-f1-simple/test-A/expected.tsv b/test/macro-f1-simple/macro-f1-simple/test-A/expected.tsv new file mode 100644 index 0000000..df8d594 --- /dev/null +++ b/test/macro-f1-simple/macro-f1-simple/test-A/expected.tsv @@ -0,0 +1,6 @@ +0 +1 +2 +0 +1 +2