add F-measure
This commit is contained in:
parent
bc910c3a9b
commit
8e87e97f2d
@ -2,7 +2,7 @@ module GEval.Common
|
||||
where
|
||||
|
||||
(/.) :: (Eq a, Integral a) => a -> a -> Double
|
||||
x /. 0 = 0.0
|
||||
x /. 0 = 1.0
|
||||
x /. y = (fromIntegral x) / (fromIntegral y)
|
||||
|
||||
safeDoubleDiv :: Double -> Double -> Double
|
||||
|
@ -44,8 +44,27 @@ import GEval.PrecisionRecall
|
||||
|
||||
type MetricValue = Double
|
||||
|
||||
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU
|
||||
deriving (Show, Read, Eq)
|
||||
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double
|
||||
deriving (Eq)
|
||||
|
||||
instance Show Metric where
|
||||
show RMSE = "RMSE"
|
||||
show MSE = "MSE"
|
||||
show BLEU = "BLEU"
|
||||
show Accuracy = "Accuracy"
|
||||
show ClippEU = "ClippEU"
|
||||
show (FMeasure beta) = "F" ++ (show beta)
|
||||
|
||||
instance Read Metric where
|
||||
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
||||
readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)]
|
||||
readsPrec _ ('B':'L':'E':'U':theRest) = [(BLEU, theRest)]
|
||||
readsPrec _ ('A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(Accuracy, theRest)]
|
||||
readsPrec _ ('C':'l':'i':'p':'p':'E':'U':theRest) = [(ClippEU, theRest)]
|
||||
readsPrec p ('F':theRest) = case readsPrec p theRest of
|
||||
[(beta, theRest)] -> [(FMeasure beta, theRest)]
|
||||
_ -> []
|
||||
|
||||
|
||||
data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
|
||||
|
||||
@ -55,6 +74,7 @@ getMetricOrdering MSE = TheLowerTheBetter
|
||||
getMetricOrdering BLEU = TheHigherTheBetter
|
||||
getMetricOrdering Accuracy = TheHigherTheBetter
|
||||
getMetricOrdering ClippEU = TheHigherTheBetter
|
||||
getMetricOrdering (FMeasure _) = TheHigherTheBetter
|
||||
|
||||
defaultOutDirectory = "."
|
||||
defaultTestName = "test-A"
|
||||
@ -173,6 +193,24 @@ gevalCore' BLEU = gevalCore'' (Prelude.map Prelude.words . DLS.splitOn "\t" . un
|
||||
gevalCore' Accuracy = gevalCore'' strip strip hitOrMiss averageC id
|
||||
where hitOrMiss (x,y) = if x == y then 1.0 else 0.0
|
||||
|
||||
gevalCore' (FMeasure beta) = gevalCore'' outParser outParser getCount countAgg (fMeasureOnCounts beta)
|
||||
where outParser = detected . getValue . TR.double
|
||||
expParser = expected . getValue . TR.decimal
|
||||
expected 1 = True
|
||||
expected 0 = False
|
||||
expected _ = throw $ UnexpectedData "expected 0 or 1"
|
||||
-- output value could be a probability (for compatibility with other measures)
|
||||
detected prob
|
||||
| prob >= 0.0 && prob < detectionThreshold = False
|
||||
| prob >= detectionThreshold && prob <= 1.0 = True
|
||||
| otherwise = throw $ UnexpectedData "expected probability"
|
||||
detectionThreshold = 0.5
|
||||
getCount (True, True) = (1, 1, 1)
|
||||
getCount (True, False) = (0, 1, 0)
|
||||
getCount (False, True) = (0, 0, 1)
|
||||
getCount (False, False) = (0, 0, 0)
|
||||
countAgg = CC.foldl countFolder (0, 0, 0)
|
||||
|
||||
gevalCore' ClippEU = gevalCore'' parseClippingSpecs parseClippings matchStep clippeuAgg finalStep
|
||||
where
|
||||
parseClippings = controlledParse lineClippingsParser
|
||||
@ -180,8 +218,7 @@ gevalCore' ClippEU = gevalCore'' parseClippingSpecs parseClippings matchStep cli
|
||||
matchStep (clippingSpecs, clippings) = (maxMatch matchClippingToSpec clippingSpecs clippings,
|
||||
Prelude.length clippingSpecs,
|
||||
Prelude.length clippings)
|
||||
clippeuAgg = CC.foldl clippeuFuse (0, 0, 0)
|
||||
clippeuFuse (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3)
|
||||
clippeuAgg = CC.foldl countFolder (0, 0, 0)
|
||||
finalStep counts = f2MeasureOnCounts counts
|
||||
|
||||
data SourceItem a = Got a | Done
|
||||
@ -220,7 +257,7 @@ items filePath parser =
|
||||
itemError :: (Double, Double) -> Double
|
||||
itemError (exp, out) = (exp-out)**2
|
||||
|
||||
getValue :: Either String (Double, Text) -> Double
|
||||
getValue :: Num a => Either String (a, Text) -> a
|
||||
getValue (Right (x, reminder)) =
|
||||
if Data.Text.null reminder || Data.Text.head reminder == '\t'
|
||||
then x
|
||||
|
@ -76,6 +76,17 @@ This is a sample/toy classification challenge for Gonito framework. Replace it w
|
||||
the description of your challenge.
|
||||
|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents (FMeasure _) testName = [i|
|
||||
GEval sample challenge — forecast high energy seismic bumps
|
||||
===========================================================
|
||||
|
||||
Based on data set provided by M. Sikora and L. Wróbel, see
|
||||
https://archive.ics.uci.edu/ml/machine-learning-databases/00266/seismic-bumps.arff
|
||||
|
||||
This is a sample/toy classification challenge for Gonito framework with F-measure as the metric.
|
||||
Replace it with the description of your challenge.
|
||||
|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents _ testName = [i|
|
||||
GEval sample challenge
|
||||
======================
|
||||
@ -130,6 +141,15 @@ N -6 mild no
|
||||
N -6 none no
|
||||
|]
|
||||
|
||||
trainContents (FMeasure _) = [hereLit|0 b b W 289580 1986 -38 2 a 2 0 1 1 0 0 0 0 54000 50000
|
||||
1 b a W 577770 2765 27 38 a 1 0 1 0 0 0 0 0 2000 2000
|
||||
0 b a W 347400 1684 -28 -22 a 2 0 1 1 0 0 0 0 31000 30000
|
||||
0 b a N 72370 581 -79 -70 a 0 0 0 0 0 0 0 0 0 0
|
||||
0 b a N 59210 440 -82 -76 a 1 0 1 0 0 0 0 0 2000 2000
|
||||
0 a a N 42560 379 -73 -57 a 1 0 1 0 0 0 0 0 4000 4000
|
||||
1 a a W 268170 1352 -41 -35 a 1 1 0 0 0 0 0 0 400 400
|
||||
|]
|
||||
|
||||
trainContents _ = [hereLit|0.06 0.39 0 0.206
|
||||
1.00 1.00 1 0.017
|
||||
317.8 5.20 67 0.048
|
||||
@ -143,6 +163,10 @@ ja jumala näki , että valkeus oli hyvä ; ja jumala erotti valkeuden pimeydest
|
||||
devInContents Accuracy = [hereLit|-8 none no
|
||||
1 mild no
|
||||
|]
|
||||
devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0
|
||||
b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000
|
||||
|]
|
||||
|
||||
devInContents _ = [hereLit|0.72 0 0.007
|
||||
9.54 62 0.054
|
||||
|]
|
||||
@ -154,6 +178,9 @@ a ka kite te atua i te marama , he pai : a ka wehea e te atua te marama i te pou
|
||||
devExpectedContents Accuracy = [hereLit|N
|
||||
Y
|
||||
|]
|
||||
devExpectedContents (FMeasure _) = [hereLit|0
|
||||
1
|
||||
|]
|
||||
devExpectedContents _ = [hereLit|0.82
|
||||
95.2
|
||||
|]
|
||||
@ -162,11 +189,12 @@ testInContents :: Metric -> String
|
||||
testInContents BLEU = [hereLit|ja jumala kutsui valkeuden päiväksi , ja pimeyden hän kutsui yöksi
|
||||
ja tuli ehtoo , ja tuli aamu , ensimmäinen päivä
|
||||
|]
|
||||
|
||||
testInContents Accuracy = [hereLit|2 mild yes
|
||||
-5 mild no
|
||||
|]
|
||||
|
||||
testInContents (FMeasure _) = [hereLit|b b W 15210 527 -64 -56 a 0 0 0 0 0 0 0 0 0 0
|
||||
b b N 38060 486 357 189 b 0 0 0 0 0 0 0 0 0 0
|
||||
|]
|
||||
testInContents _ = [hereLit|1.52 2 0.093
|
||||
30.06 14 0.009
|
||||
|]
|
||||
@ -178,6 +206,9 @@ a ko te ahiahi , ko te ata , he ra kotahi
|
||||
testExpectedContents Accuracy = [hereLit|N
|
||||
Y
|
||||
|]
|
||||
testExpectedContents (FMeasure _) = [hereLit|0
|
||||
0
|
||||
|]
|
||||
testExpectedContents _ = [hereLit|0.11
|
||||
17.2
|
||||
|]
|
||||
|
@ -74,7 +74,7 @@ metricReader = option auto
|
||||
<> value defaultMetric
|
||||
<> showDefault
|
||||
<> metavar "METRIC"
|
||||
<> help "Metric to be used - RMSE, MSE, Accuracy, BLEU or ClippEU" )
|
||||
<> help "Metric to be used - RMSE, MSE, Accuracy, F-measure (specify as F1, F2, F0.25, etc.), BLEU or ClippEU" )
|
||||
|
||||
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
|
||||
runGEval args = do
|
||||
|
@ -1,7 +1,7 @@
|
||||
{-# LANGUAGE PartialTypeSignatures #-}
|
||||
|
||||
module GEval.PrecisionRecall(fMeasure, f1Measure, f2Measure, precision, recall,
|
||||
fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts,
|
||||
fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder,
|
||||
precisionAndRecall, precisionAndRecallFromCounts, maxMatch)
|
||||
where
|
||||
|
||||
@ -34,6 +34,9 @@ fMeasureOnCounts beta (tp, nbExpected, nbGot) =
|
||||
where betaSquared = beta ^ 2
|
||||
(p, r) = precisionAndRecallFromCounts (tp, nbExpected, nbGot)
|
||||
|
||||
countFolder :: (Int, Int, Int) -> (Int, Int, Int) -> (Int, Int, Int)
|
||||
countFolder (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3)
|
||||
|
||||
precisionAndRecall :: (a -> b -> Bool) -> [a] -> [b] -> (Double, Double)
|
||||
precisionAndRecall matchFun expected got
|
||||
= precisionAndRecallFromCounts (tp, length expected, length got)
|
||||
|
21
test/Spec.hs
21
test/Spec.hs
@ -30,6 +30,17 @@ main = hspec $ do
|
||||
describe "Accuracy" $ do
|
||||
it "simple example" $
|
||||
runGEvalTest "accuracy-simple" `shouldReturnAlmost` 0.6
|
||||
describe "F-measure" $ do
|
||||
it "simple example" $
|
||||
runGEvalTest "f-measure-simple" `shouldReturnAlmost` 0.57142857
|
||||
it "perfect classifier" $
|
||||
runGEvalTest "f-measure-perfect" `shouldReturnAlmost` 1.0
|
||||
it "stupid classifier" $
|
||||
runGEvalTest "f-measure-stupid" `shouldReturnAlmost` 0.0
|
||||
it "all false" $
|
||||
runGEvalTest "f-measure-all-false" `shouldReturnAlmost` 1.0
|
||||
it "F2-measure" $
|
||||
runGEvalTest "f2-simple" `shouldReturnAlmost` 0.714285714
|
||||
describe "precision count" $ do
|
||||
it "simple test" $ do
|
||||
precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2
|
||||
@ -90,6 +101,16 @@ main = hspec $ do
|
||||
(Rectangle (Point 10 20) (Point 50 60))]
|
||||
it "full test" $ do
|
||||
runGEvalTest "clippeu-simple" `shouldReturnAlmost` 0.399999999999
|
||||
describe "evaluation metric specification is parsed" $ do
|
||||
it "for simple names" $ do
|
||||
let metrics = [RMSE, MSE, BLEU, Accuracy, ClippEU]
|
||||
let parsedMetrics = Prelude.map (read . show) metrics
|
||||
metrics `shouldBe` parsedMetrics
|
||||
it "for F-Measure" $ do
|
||||
read "F2" `shouldBe` (FMeasure 2.0)
|
||||
read "F1" `shouldBe` (FMeasure 1.0)
|
||||
read "F0.5" `shouldBe` (FMeasure 0.5)
|
||||
|
||||
|
||||
neverMatch :: Char -> Int -> Bool
|
||||
neverMatch _ _ = False
|
||||
|
@ -0,0 +1,5 @@
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
|
1
test/f-measure-all-false/f-measure-all-false/config.txt
Normal file
1
test/f-measure-all-false/f-measure-all-false/config.txt
Normal file
@ -0,0 +1 @@
|
||||
--metric F1
|
@ -0,0 +1,5 @@
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
|
@ -0,0 +1,5 @@
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
1
|
|
1
test/f-measure-perfect/f-measure-perfect/config.txt
Normal file
1
test/f-measure-perfect/f-measure-perfect/config.txt
Normal file
@ -0,0 +1 @@
|
||||
--metric F1
|
@ -0,0 +1,5 @@
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
1
|
|
@ -0,0 +1,5 @@
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
|
1
test/f-measure-simple/f-measure-simple/config.txt
Normal file
1
test/f-measure-simple/f-measure-simple/config.txt
Normal file
@ -0,0 +1 @@
|
||||
--metric F1
|
@ -0,0 +1,5 @@
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
1
|
|
@ -0,0 +1,5 @@
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
|
1
test/f-measure-stupid/f-measure-stupid/config.txt
Normal file
1
test/f-measure-stupid/f-measure-stupid/config.txt
Normal file
@ -0,0 +1 @@
|
||||
--metric F1
|
@ -0,0 +1,5 @@
|
||||
1
|
||||
1
|
||||
0
|
||||
0
|
||||
1
|
|
10
test/f2-simple/f2-simple-solution/test-A/out.tsv
Normal file
10
test/f2-simple/f2-simple-solution/test-A/out.tsv
Normal file
@ -0,0 +1,10 @@
|
||||
1
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
0
|
||||
0
|
||||
1
|
|
1
test/f2-simple/f2-simple/config.txt
Normal file
1
test/f2-simple/f2-simple/config.txt
Normal file
@ -0,0 +1 @@
|
||||
--metric F2
|
10
test/f2-simple/f2-simple/test-A/expected.tsv
Normal file
10
test/f2-simple/f2-simple/test-A/expected.tsv
Normal file
@ -0,0 +1,10 @@
|
||||
0
|
||||
0
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
|
Loading…
Reference in New Issue
Block a user