add F-measure
This commit is contained in:
parent
bc910c3a9b
commit
8e87e97f2d
@ -2,7 +2,7 @@ module GEval.Common
|
|||||||
where
|
where
|
||||||
|
|
||||||
(/.) :: (Eq a, Integral a) => a -> a -> Double
|
(/.) :: (Eq a, Integral a) => a -> a -> Double
|
||||||
x /. 0 = 0.0
|
x /. 0 = 1.0
|
||||||
x /. y = (fromIntegral x) / (fromIntegral y)
|
x /. y = (fromIntegral x) / (fromIntegral y)
|
||||||
|
|
||||||
safeDoubleDiv :: Double -> Double -> Double
|
safeDoubleDiv :: Double -> Double -> Double
|
||||||
|
@ -44,8 +44,27 @@ import GEval.PrecisionRecall
|
|||||||
|
|
||||||
type MetricValue = Double
|
type MetricValue = Double
|
||||||
|
|
||||||
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU
|
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double
|
||||||
deriving (Show, Read, Eq)
|
deriving (Eq)
|
||||||
|
|
||||||
|
instance Show Metric where
|
||||||
|
show RMSE = "RMSE"
|
||||||
|
show MSE = "MSE"
|
||||||
|
show BLEU = "BLEU"
|
||||||
|
show Accuracy = "Accuracy"
|
||||||
|
show ClippEU = "ClippEU"
|
||||||
|
show (FMeasure beta) = "F" ++ (show beta)
|
||||||
|
|
||||||
|
instance Read Metric where
|
||||||
|
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
||||||
|
readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)]
|
||||||
|
readsPrec _ ('B':'L':'E':'U':theRest) = [(BLEU, theRest)]
|
||||||
|
readsPrec _ ('A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(Accuracy, theRest)]
|
||||||
|
readsPrec _ ('C':'l':'i':'p':'p':'E':'U':theRest) = [(ClippEU, theRest)]
|
||||||
|
readsPrec p ('F':theRest) = case readsPrec p theRest of
|
||||||
|
[(beta, theRest)] -> [(FMeasure beta, theRest)]
|
||||||
|
_ -> []
|
||||||
|
|
||||||
|
|
||||||
data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
|
data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
|
||||||
|
|
||||||
@ -55,6 +74,7 @@ getMetricOrdering MSE = TheLowerTheBetter
|
|||||||
getMetricOrdering BLEU = TheHigherTheBetter
|
getMetricOrdering BLEU = TheHigherTheBetter
|
||||||
getMetricOrdering Accuracy = TheHigherTheBetter
|
getMetricOrdering Accuracy = TheHigherTheBetter
|
||||||
getMetricOrdering ClippEU = TheHigherTheBetter
|
getMetricOrdering ClippEU = TheHigherTheBetter
|
||||||
|
getMetricOrdering (FMeasure _) = TheHigherTheBetter
|
||||||
|
|
||||||
defaultOutDirectory = "."
|
defaultOutDirectory = "."
|
||||||
defaultTestName = "test-A"
|
defaultTestName = "test-A"
|
||||||
@ -173,6 +193,24 @@ gevalCore' BLEU = gevalCore'' (Prelude.map Prelude.words . DLS.splitOn "\t" . un
|
|||||||
gevalCore' Accuracy = gevalCore'' strip strip hitOrMiss averageC id
|
gevalCore' Accuracy = gevalCore'' strip strip hitOrMiss averageC id
|
||||||
where hitOrMiss (x,y) = if x == y then 1.0 else 0.0
|
where hitOrMiss (x,y) = if x == y then 1.0 else 0.0
|
||||||
|
|
||||||
|
gevalCore' (FMeasure beta) = gevalCore'' outParser outParser getCount countAgg (fMeasureOnCounts beta)
|
||||||
|
where outParser = detected . getValue . TR.double
|
||||||
|
expParser = expected . getValue . TR.decimal
|
||||||
|
expected 1 = True
|
||||||
|
expected 0 = False
|
||||||
|
expected _ = throw $ UnexpectedData "expected 0 or 1"
|
||||||
|
-- output value could be a probability (for compatibility with other measures)
|
||||||
|
detected prob
|
||||||
|
| prob >= 0.0 && prob < detectionThreshold = False
|
||||||
|
| prob >= detectionThreshold && prob <= 1.0 = True
|
||||||
|
| otherwise = throw $ UnexpectedData "expected probability"
|
||||||
|
detectionThreshold = 0.5
|
||||||
|
getCount (True, True) = (1, 1, 1)
|
||||||
|
getCount (True, False) = (0, 1, 0)
|
||||||
|
getCount (False, True) = (0, 0, 1)
|
||||||
|
getCount (False, False) = (0, 0, 0)
|
||||||
|
countAgg = CC.foldl countFolder (0, 0, 0)
|
||||||
|
|
||||||
gevalCore' ClippEU = gevalCore'' parseClippingSpecs parseClippings matchStep clippeuAgg finalStep
|
gevalCore' ClippEU = gevalCore'' parseClippingSpecs parseClippings matchStep clippeuAgg finalStep
|
||||||
where
|
where
|
||||||
parseClippings = controlledParse lineClippingsParser
|
parseClippings = controlledParse lineClippingsParser
|
||||||
@ -180,8 +218,7 @@ gevalCore' ClippEU = gevalCore'' parseClippingSpecs parseClippings matchStep cli
|
|||||||
matchStep (clippingSpecs, clippings) = (maxMatch matchClippingToSpec clippingSpecs clippings,
|
matchStep (clippingSpecs, clippings) = (maxMatch matchClippingToSpec clippingSpecs clippings,
|
||||||
Prelude.length clippingSpecs,
|
Prelude.length clippingSpecs,
|
||||||
Prelude.length clippings)
|
Prelude.length clippings)
|
||||||
clippeuAgg = CC.foldl clippeuFuse (0, 0, 0)
|
clippeuAgg = CC.foldl countFolder (0, 0, 0)
|
||||||
clippeuFuse (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3)
|
|
||||||
finalStep counts = f2MeasureOnCounts counts
|
finalStep counts = f2MeasureOnCounts counts
|
||||||
|
|
||||||
data SourceItem a = Got a | Done
|
data SourceItem a = Got a | Done
|
||||||
@ -220,7 +257,7 @@ items filePath parser =
|
|||||||
itemError :: (Double, Double) -> Double
|
itemError :: (Double, Double) -> Double
|
||||||
itemError (exp, out) = (exp-out)**2
|
itemError (exp, out) = (exp-out)**2
|
||||||
|
|
||||||
getValue :: Either String (Double, Text) -> Double
|
getValue :: Num a => Either String (a, Text) -> a
|
||||||
getValue (Right (x, reminder)) =
|
getValue (Right (x, reminder)) =
|
||||||
if Data.Text.null reminder || Data.Text.head reminder == '\t'
|
if Data.Text.null reminder || Data.Text.head reminder == '\t'
|
||||||
then x
|
then x
|
||||||
|
@ -76,6 +76,17 @@ This is a sample/toy classification challenge for Gonito framework. Replace it w
|
|||||||
the description of your challenge.
|
the description of your challenge.
|
||||||
|] ++ (commonReadmeMDContents testName)
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
|
readmeMDContents (FMeasure _) testName = [i|
|
||||||
|
GEval sample challenge — forecast high energy seismic bumps
|
||||||
|
===========================================================
|
||||||
|
|
||||||
|
Based on data set provided by M. Sikora and L. Wróbel, see
|
||||||
|
https://archive.ics.uci.edu/ml/machine-learning-databases/00266/seismic-bumps.arff
|
||||||
|
|
||||||
|
This is a sample/toy classification challenge for Gonito framework with F-measure as the metric.
|
||||||
|
Replace it with the description of your challenge.
|
||||||
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
readmeMDContents _ testName = [i|
|
readmeMDContents _ testName = [i|
|
||||||
GEval sample challenge
|
GEval sample challenge
|
||||||
======================
|
======================
|
||||||
@ -130,6 +141,15 @@ N -6 mild no
|
|||||||
N -6 none no
|
N -6 none no
|
||||||
|]
|
|]
|
||||||
|
|
||||||
|
trainContents (FMeasure _) = [hereLit|0 b b W 289580 1986 -38 2 a 2 0 1 1 0 0 0 0 54000 50000
|
||||||
|
1 b a W 577770 2765 27 38 a 1 0 1 0 0 0 0 0 2000 2000
|
||||||
|
0 b a W 347400 1684 -28 -22 a 2 0 1 1 0 0 0 0 31000 30000
|
||||||
|
0 b a N 72370 581 -79 -70 a 0 0 0 0 0 0 0 0 0 0
|
||||||
|
0 b a N 59210 440 -82 -76 a 1 0 1 0 0 0 0 0 2000 2000
|
||||||
|
0 a a N 42560 379 -73 -57 a 1 0 1 0 0 0 0 0 4000 4000
|
||||||
|
1 a a W 268170 1352 -41 -35 a 1 1 0 0 0 0 0 0 400 400
|
||||||
|
|]
|
||||||
|
|
||||||
trainContents _ = [hereLit|0.06 0.39 0 0.206
|
trainContents _ = [hereLit|0.06 0.39 0 0.206
|
||||||
1.00 1.00 1 0.017
|
1.00 1.00 1 0.017
|
||||||
317.8 5.20 67 0.048
|
317.8 5.20 67 0.048
|
||||||
@ -143,6 +163,10 @@ ja jumala näki , että valkeus oli hyvä ; ja jumala erotti valkeuden pimeydest
|
|||||||
devInContents Accuracy = [hereLit|-8 none no
|
devInContents Accuracy = [hereLit|-8 none no
|
||||||
1 mild no
|
1 mild no
|
||||||
|]
|
|]
|
||||||
|
devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0
|
||||||
|
b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000
|
||||||
|
|]
|
||||||
|
|
||||||
devInContents _ = [hereLit|0.72 0 0.007
|
devInContents _ = [hereLit|0.72 0 0.007
|
||||||
9.54 62 0.054
|
9.54 62 0.054
|
||||||
|]
|
|]
|
||||||
@ -154,6 +178,9 @@ a ka kite te atua i te marama , he pai : a ka wehea e te atua te marama i te pou
|
|||||||
devExpectedContents Accuracy = [hereLit|N
|
devExpectedContents Accuracy = [hereLit|N
|
||||||
Y
|
Y
|
||||||
|]
|
|]
|
||||||
|
devExpectedContents (FMeasure _) = [hereLit|0
|
||||||
|
1
|
||||||
|
|]
|
||||||
devExpectedContents _ = [hereLit|0.82
|
devExpectedContents _ = [hereLit|0.82
|
||||||
95.2
|
95.2
|
||||||
|]
|
|]
|
||||||
@ -162,11 +189,12 @@ testInContents :: Metric -> String
|
|||||||
testInContents BLEU = [hereLit|ja jumala kutsui valkeuden päiväksi , ja pimeyden hän kutsui yöksi
|
testInContents BLEU = [hereLit|ja jumala kutsui valkeuden päiväksi , ja pimeyden hän kutsui yöksi
|
||||||
ja tuli ehtoo , ja tuli aamu , ensimmäinen päivä
|
ja tuli ehtoo , ja tuli aamu , ensimmäinen päivä
|
||||||
|]
|
|]
|
||||||
|
|
||||||
testInContents Accuracy = [hereLit|2 mild yes
|
testInContents Accuracy = [hereLit|2 mild yes
|
||||||
-5 mild no
|
-5 mild no
|
||||||
|]
|
|]
|
||||||
|
testInContents (FMeasure _) = [hereLit|b b W 15210 527 -64 -56 a 0 0 0 0 0 0 0 0 0 0
|
||||||
|
b b N 38060 486 357 189 b 0 0 0 0 0 0 0 0 0 0
|
||||||
|
|]
|
||||||
testInContents _ = [hereLit|1.52 2 0.093
|
testInContents _ = [hereLit|1.52 2 0.093
|
||||||
30.06 14 0.009
|
30.06 14 0.009
|
||||||
|]
|
|]
|
||||||
@ -178,6 +206,9 @@ a ko te ahiahi , ko te ata , he ra kotahi
|
|||||||
testExpectedContents Accuracy = [hereLit|N
|
testExpectedContents Accuracy = [hereLit|N
|
||||||
Y
|
Y
|
||||||
|]
|
|]
|
||||||
|
testExpectedContents (FMeasure _) = [hereLit|0
|
||||||
|
0
|
||||||
|
|]
|
||||||
testExpectedContents _ = [hereLit|0.11
|
testExpectedContents _ = [hereLit|0.11
|
||||||
17.2
|
17.2
|
||||||
|]
|
|]
|
||||||
|
@ -74,7 +74,7 @@ metricReader = option auto
|
|||||||
<> value defaultMetric
|
<> value defaultMetric
|
||||||
<> showDefault
|
<> showDefault
|
||||||
<> metavar "METRIC"
|
<> metavar "METRIC"
|
||||||
<> help "Metric to be used - RMSE, MSE, Accuracy, BLEU or ClippEU" )
|
<> help "Metric to be used - RMSE, MSE, Accuracy, F-measure (specify as F1, F2, F0.25, etc.), BLEU or ClippEU" )
|
||||||
|
|
||||||
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
|
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
|
||||||
runGEval args = do
|
runGEval args = do
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
{-# LANGUAGE PartialTypeSignatures #-}
|
{-# LANGUAGE PartialTypeSignatures #-}
|
||||||
|
|
||||||
module GEval.PrecisionRecall(fMeasure, f1Measure, f2Measure, precision, recall,
|
module GEval.PrecisionRecall(fMeasure, f1Measure, f2Measure, precision, recall,
|
||||||
fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts,
|
fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder,
|
||||||
precisionAndRecall, precisionAndRecallFromCounts, maxMatch)
|
precisionAndRecall, precisionAndRecallFromCounts, maxMatch)
|
||||||
where
|
where
|
||||||
|
|
||||||
@ -34,6 +34,9 @@ fMeasureOnCounts beta (tp, nbExpected, nbGot) =
|
|||||||
where betaSquared = beta ^ 2
|
where betaSquared = beta ^ 2
|
||||||
(p, r) = precisionAndRecallFromCounts (tp, nbExpected, nbGot)
|
(p, r) = precisionAndRecallFromCounts (tp, nbExpected, nbGot)
|
||||||
|
|
||||||
|
countFolder :: (Int, Int, Int) -> (Int, Int, Int) -> (Int, Int, Int)
|
||||||
|
countFolder (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3)
|
||||||
|
|
||||||
precisionAndRecall :: (a -> b -> Bool) -> [a] -> [b] -> (Double, Double)
|
precisionAndRecall :: (a -> b -> Bool) -> [a] -> [b] -> (Double, Double)
|
||||||
precisionAndRecall matchFun expected got
|
precisionAndRecall matchFun expected got
|
||||||
= precisionAndRecallFromCounts (tp, length expected, length got)
|
= precisionAndRecallFromCounts (tp, length expected, length got)
|
||||||
|
21
test/Spec.hs
21
test/Spec.hs
@ -30,6 +30,17 @@ main = hspec $ do
|
|||||||
describe "Accuracy" $ do
|
describe "Accuracy" $ do
|
||||||
it "simple example" $
|
it "simple example" $
|
||||||
runGEvalTest "accuracy-simple" `shouldReturnAlmost` 0.6
|
runGEvalTest "accuracy-simple" `shouldReturnAlmost` 0.6
|
||||||
|
describe "F-measure" $ do
|
||||||
|
it "simple example" $
|
||||||
|
runGEvalTest "f-measure-simple" `shouldReturnAlmost` 0.57142857
|
||||||
|
it "perfect classifier" $
|
||||||
|
runGEvalTest "f-measure-perfect" `shouldReturnAlmost` 1.0
|
||||||
|
it "stupid classifier" $
|
||||||
|
runGEvalTest "f-measure-stupid" `shouldReturnAlmost` 0.0
|
||||||
|
it "all false" $
|
||||||
|
runGEvalTest "f-measure-all-false" `shouldReturnAlmost` 1.0
|
||||||
|
it "F2-measure" $
|
||||||
|
runGEvalTest "f2-simple" `shouldReturnAlmost` 0.714285714
|
||||||
describe "precision count" $ do
|
describe "precision count" $ do
|
||||||
it "simple test" $ do
|
it "simple test" $ do
|
||||||
precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2
|
precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2
|
||||||
@ -90,6 +101,16 @@ main = hspec $ do
|
|||||||
(Rectangle (Point 10 20) (Point 50 60))]
|
(Rectangle (Point 10 20) (Point 50 60))]
|
||||||
it "full test" $ do
|
it "full test" $ do
|
||||||
runGEvalTest "clippeu-simple" `shouldReturnAlmost` 0.399999999999
|
runGEvalTest "clippeu-simple" `shouldReturnAlmost` 0.399999999999
|
||||||
|
describe "evaluation metric specification is parsed" $ do
|
||||||
|
it "for simple names" $ do
|
||||||
|
let metrics = [RMSE, MSE, BLEU, Accuracy, ClippEU]
|
||||||
|
let parsedMetrics = Prelude.map (read . show) metrics
|
||||||
|
metrics `shouldBe` parsedMetrics
|
||||||
|
it "for F-Measure" $ do
|
||||||
|
read "F2" `shouldBe` (FMeasure 2.0)
|
||||||
|
read "F1" `shouldBe` (FMeasure 1.0)
|
||||||
|
read "F0.5" `shouldBe` (FMeasure 0.5)
|
||||||
|
|
||||||
|
|
||||||
neverMatch :: Char -> Int -> Bool
|
neverMatch :: Char -> Int -> Bool
|
||||||
neverMatch _ _ = False
|
neverMatch _ _ = False
|
||||||
|
@ -0,0 +1,5 @@
|
|||||||
|
0
|
||||||
|
0
|
||||||
|
0
|
||||||
|
0
|
||||||
|
0
|
|
1
test/f-measure-all-false/f-measure-all-false/config.txt
Normal file
1
test/f-measure-all-false/f-measure-all-false/config.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
--metric F1
|
@ -0,0 +1,5 @@
|
|||||||
|
0
|
||||||
|
0
|
||||||
|
0
|
||||||
|
0
|
||||||
|
0
|
|
@ -0,0 +1,5 @@
|
|||||||
|
1
|
||||||
|
1
|
||||||
|
0
|
||||||
|
0
|
||||||
|
1
|
|
1
test/f-measure-perfect/f-measure-perfect/config.txt
Normal file
1
test/f-measure-perfect/f-measure-perfect/config.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
--metric F1
|
@ -0,0 +1,5 @@
|
|||||||
|
1
|
||||||
|
1
|
||||||
|
0
|
||||||
|
0
|
||||||
|
1
|
|
@ -0,0 +1,5 @@
|
|||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
0
|
|
1
test/f-measure-simple/f-measure-simple/config.txt
Normal file
1
test/f-measure-simple/f-measure-simple/config.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
--metric F1
|
@ -0,0 +1,5 @@
|
|||||||
|
1
|
||||||
|
1
|
||||||
|
0
|
||||||
|
0
|
||||||
|
1
|
|
@ -0,0 +1,5 @@
|
|||||||
|
0
|
||||||
|
0
|
||||||
|
0
|
||||||
|
0
|
||||||
|
0
|
|
1
test/f-measure-stupid/f-measure-stupid/config.txt
Normal file
1
test/f-measure-stupid/f-measure-stupid/config.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
--metric F1
|
@ -0,0 +1,5 @@
|
|||||||
|
1
|
||||||
|
1
|
||||||
|
0
|
||||||
|
0
|
||||||
|
1
|
|
10
test/f2-simple/f2-simple-solution/test-A/out.tsv
Normal file
10
test/f2-simple/f2-simple-solution/test-A/out.tsv
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
1
|
||||||
|
1
|
||||||
|
0
|
||||||
|
1
|
||||||
|
0
|
||||||
|
1
|
||||||
|
0
|
||||||
|
0
|
||||||
|
0
|
||||||
|
1
|
|
1
test/f2-simple/f2-simple/config.txt
Normal file
1
test/f2-simple/f2-simple/config.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
--metric F2
|
10
test/f2-simple/f2-simple/test-A/expected.tsv
Normal file
10
test/f2-simple/f2-simple/test-A/expected.tsv
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
0
|
||||||
|
0
|
||||||
|
0
|
||||||
|
1
|
||||||
|
0
|
||||||
|
1
|
||||||
|
0
|
||||||
|
1
|
||||||
|
0
|
||||||
|
1
|
|
Loading…
Reference in New Issue
Block a user