add F-measure

This commit is contained in:
Filip Gralinski 2016-12-03 09:18:04 +01:00 committed by Filip Gralinski
parent bc910c3a9b
commit 8e87e97f2d
21 changed files with 167 additions and 10 deletions

View File

@ -2,7 +2,7 @@ module GEval.Common
where
(/.) :: (Eq a, Integral a) => a -> a -> Double
x /. 0 = 0.0
x /. 0 = 1.0
x /. y = (fromIntegral x) / (fromIntegral y)
safeDoubleDiv :: Double -> Double -> Double

View File

@ -44,8 +44,27 @@ import GEval.PrecisionRecall
type MetricValue = Double
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU
deriving (Show, Read, Eq)
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double
deriving (Eq)
instance Show Metric where
show RMSE = "RMSE"
show MSE = "MSE"
show BLEU = "BLEU"
show Accuracy = "Accuracy"
show ClippEU = "ClippEU"
show (FMeasure beta) = "F" ++ (show beta)
instance Read Metric where
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)]
readsPrec _ ('B':'L':'E':'U':theRest) = [(BLEU, theRest)]
readsPrec _ ('A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(Accuracy, theRest)]
readsPrec _ ('C':'l':'i':'p':'p':'E':'U':theRest) = [(ClippEU, theRest)]
readsPrec p ('F':theRest) = case readsPrec p theRest of
[(beta, theRest)] -> [(FMeasure beta, theRest)]
_ -> []
data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
@ -55,6 +74,7 @@ getMetricOrdering MSE = TheLowerTheBetter
getMetricOrdering BLEU = TheHigherTheBetter
getMetricOrdering Accuracy = TheHigherTheBetter
getMetricOrdering ClippEU = TheHigherTheBetter
getMetricOrdering (FMeasure _) = TheHigherTheBetter
defaultOutDirectory = "."
defaultTestName = "test-A"
@ -173,6 +193,24 @@ gevalCore' BLEU = gevalCore'' (Prelude.map Prelude.words . DLS.splitOn "\t" . un
gevalCore' Accuracy = gevalCore'' strip strip hitOrMiss averageC id
where hitOrMiss (x,y) = if x == y then 1.0 else 0.0
gevalCore' (FMeasure beta) = gevalCore'' outParser outParser getCount countAgg (fMeasureOnCounts beta)
where outParser = detected . getValue . TR.double
expParser = expected . getValue . TR.decimal
expected 1 = True
expected 0 = False
expected _ = throw $ UnexpectedData "expected 0 or 1"
-- output value could be a probability (for compatibility with other measures)
detected prob
| prob >= 0.0 && prob < detectionThreshold = False
| prob >= detectionThreshold && prob <= 1.0 = True
| otherwise = throw $ UnexpectedData "expected probability"
detectionThreshold = 0.5
getCount (True, True) = (1, 1, 1)
getCount (True, False) = (0, 1, 0)
getCount (False, True) = (0, 0, 1)
getCount (False, False) = (0, 0, 0)
countAgg = CC.foldl countFolder (0, 0, 0)
gevalCore' ClippEU = gevalCore'' parseClippingSpecs parseClippings matchStep clippeuAgg finalStep
where
parseClippings = controlledParse lineClippingsParser
@ -180,8 +218,7 @@ gevalCore' ClippEU = gevalCore'' parseClippingSpecs parseClippings matchStep cli
matchStep (clippingSpecs, clippings) = (maxMatch matchClippingToSpec clippingSpecs clippings,
Prelude.length clippingSpecs,
Prelude.length clippings)
clippeuAgg = CC.foldl clippeuFuse (0, 0, 0)
clippeuFuse (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3)
clippeuAgg = CC.foldl countFolder (0, 0, 0)
finalStep counts = f2MeasureOnCounts counts
data SourceItem a = Got a | Done
@ -220,7 +257,7 @@ items filePath parser =
itemError :: (Double, Double) -> Double
itemError (exp, out) = (exp-out)**2
getValue :: Either String (Double, Text) -> Double
getValue :: Num a => Either String (a, Text) -> a
getValue (Right (x, reminder)) =
if Data.Text.null reminder || Data.Text.head reminder == '\t'
then x

View File

@ -76,6 +76,17 @@ This is a sample/toy classification challenge for Gonito framework. Replace it w
the description of your challenge.
|] ++ (commonReadmeMDContents testName)
readmeMDContents (FMeasure _) testName = [i|
GEval sample challenge forecast high energy seismic bumps
===========================================================
Based on data set provided by M. Sikora and L. Wróbel, see
https://archive.ics.uci.edu/ml/machine-learning-databases/00266/seismic-bumps.arff
This is a sample/toy classification challenge for Gonito framework with F-measure as the metric.
Replace it with the description of your challenge.
|] ++ (commonReadmeMDContents testName)
readmeMDContents _ testName = [i|
GEval sample challenge
======================
@ -130,6 +141,15 @@ N -6 mild no
N -6 none no
|]
trainContents (FMeasure _) = [hereLit|0 b b W 289580 1986 -38 2 a 2 0 1 1 0 0 0 0 54000 50000
1 b a W 577770 2765 27 38 a 1 0 1 0 0 0 0 0 2000 2000
0 b a W 347400 1684 -28 -22 a 2 0 1 1 0 0 0 0 31000 30000
0 b a N 72370 581 -79 -70 a 0 0 0 0 0 0 0 0 0 0
0 b a N 59210 440 -82 -76 a 1 0 1 0 0 0 0 0 2000 2000
0 a a N 42560 379 -73 -57 a 1 0 1 0 0 0 0 0 4000 4000
1 a a W 268170 1352 -41 -35 a 1 1 0 0 0 0 0 0 400 400
|]
trainContents _ = [hereLit|0.06 0.39 0 0.206
1.00 1.00 1 0.017
317.8 5.20 67 0.048
@ -143,6 +163,10 @@ ja jumala näki , että valkeus oli hyvä ; ja jumala erotti valkeuden pimeydest
devInContents Accuracy = [hereLit|-8 none no
1 mild no
|]
devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0
b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000
|]
devInContents _ = [hereLit|0.72 0 0.007
9.54 62 0.054
|]
@ -154,6 +178,9 @@ a ka kite te atua i te marama , he pai : a ka wehea e te atua te marama i te pou
devExpectedContents Accuracy = [hereLit|N
Y
|]
devExpectedContents (FMeasure _) = [hereLit|0
1
|]
devExpectedContents _ = [hereLit|0.82
95.2
|]
@ -162,11 +189,12 @@ testInContents :: Metric -> String
testInContents BLEU = [hereLit|ja jumala kutsui valkeuden päiväksi , ja pimeyden hän kutsui yöksi
ja tuli ehtoo , ja tuli aamu , ensimmäinen päivä
|]
testInContents Accuracy = [hereLit|2 mild yes
-5 mild no
|]
testInContents (FMeasure _) = [hereLit|b b W 15210 527 -64 -56 a 0 0 0 0 0 0 0 0 0 0
b b N 38060 486 357 189 b 0 0 0 0 0 0 0 0 0 0
|]
testInContents _ = [hereLit|1.52 2 0.093
30.06 14 0.009
|]
@ -178,6 +206,9 @@ a ko te ahiahi , ko te ata , he ra kotahi
testExpectedContents Accuracy = [hereLit|N
Y
|]
testExpectedContents (FMeasure _) = [hereLit|0
0
|]
testExpectedContents _ = [hereLit|0.11
17.2
|]

View File

@ -74,7 +74,7 @@ metricReader = option auto
<> value defaultMetric
<> showDefault
<> metavar "METRIC"
<> help "Metric to be used - RMSE, MSE, Accuracy, BLEU or ClippEU" )
<> help "Metric to be used - RMSE, MSE, Accuracy, F-measure (specify as F1, F2, F0.25, etc.), BLEU or ClippEU" )
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
runGEval args = do

View File

@ -1,7 +1,7 @@
{-# LANGUAGE PartialTypeSignatures #-}
module GEval.PrecisionRecall(fMeasure, f1Measure, f2Measure, precision, recall,
fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts,
fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder,
precisionAndRecall, precisionAndRecallFromCounts, maxMatch)
where
@ -34,6 +34,9 @@ fMeasureOnCounts beta (tp, nbExpected, nbGot) =
where betaSquared = beta ^ 2
(p, r) = precisionAndRecallFromCounts (tp, nbExpected, nbGot)
countFolder :: (Int, Int, Int) -> (Int, Int, Int) -> (Int, Int, Int)
countFolder (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3)
precisionAndRecall :: (a -> b -> Bool) -> [a] -> [b] -> (Double, Double)
precisionAndRecall matchFun expected got
= precisionAndRecallFromCounts (tp, length expected, length got)

View File

@ -30,6 +30,17 @@ main = hspec $ do
describe "Accuracy" $ do
it "simple example" $
runGEvalTest "accuracy-simple" `shouldReturnAlmost` 0.6
describe "F-measure" $ do
it "simple example" $
runGEvalTest "f-measure-simple" `shouldReturnAlmost` 0.57142857
it "perfect classifier" $
runGEvalTest "f-measure-perfect" `shouldReturnAlmost` 1.0
it "stupid classifier" $
runGEvalTest "f-measure-stupid" `shouldReturnAlmost` 0.0
it "all false" $
runGEvalTest "f-measure-all-false" `shouldReturnAlmost` 1.0
it "F2-measure" $
runGEvalTest "f2-simple" `shouldReturnAlmost` 0.714285714
describe "precision count" $ do
it "simple test" $ do
precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2
@ -90,6 +101,16 @@ main = hspec $ do
(Rectangle (Point 10 20) (Point 50 60))]
it "full test" $ do
runGEvalTest "clippeu-simple" `shouldReturnAlmost` 0.399999999999
describe "evaluation metric specification is parsed" $ do
it "for simple names" $ do
let metrics = [RMSE, MSE, BLEU, Accuracy, ClippEU]
let parsedMetrics = Prelude.map (read . show) metrics
metrics `shouldBe` parsedMetrics
it "for F-Measure" $ do
read "F2" `shouldBe` (FMeasure 2.0)
read "F1" `shouldBe` (FMeasure 1.0)
read "F0.5" `shouldBe` (FMeasure 0.5)
neverMatch :: Char -> Int -> Bool
neverMatch _ _ = False

View File

@ -0,0 +1,5 @@
0
0
0
0
0
1 0
2 0
3 0
4 0
5 0

View File

@ -0,0 +1 @@
--metric F1

View File

@ -0,0 +1,5 @@
0
0
0
0
0
1 0
2 0
3 0
4 0
5 0

View File

@ -0,0 +1,5 @@
1
1
0
0
1
1 1
2 1
3 0
4 0
5 1

View File

@ -0,0 +1 @@
--metric F1

View File

@ -0,0 +1,5 @@
1
1
0
0
1
1 1
2 1
3 0
4 0
5 1

View File

@ -0,0 +1,5 @@
1
1
1
1
0
1 1
2 1
3 1
4 1
5 0

View File

@ -0,0 +1 @@
--metric F1

View File

@ -0,0 +1,5 @@
1
1
0
0
1
1 1
2 1
3 0
4 0
5 1

View File

@ -0,0 +1,5 @@
0
0
0
0
0
1 0
2 0
3 0
4 0
5 0

View File

@ -0,0 +1 @@
--metric F1

View File

@ -0,0 +1,5 @@
1
1
0
0
1
1 1
2 1
3 0
4 0
5 1

View File

@ -0,0 +1,10 @@
1
1
0
1
0
1
0
0
0
1
1 1
2 1
3 0
4 1
5 0
6 1
7 0
8 0
9 0
10 1

View File

@ -0,0 +1 @@
--metric F2

View File

@ -0,0 +1,10 @@
0
0
0
1
0
1
0
1
0
1
1 0
2 0
3 0
4 1
5 0
6 1
7 0
8 1
9 0
10 1