implement MultiLabel-F metric
This commit is contained in:
parent
c0fd359590
commit
4b3a4fa665
@ -1,5 +1,5 @@
|
|||||||
name: geval
|
name: geval
|
||||||
version: 1.1.2.0
|
version: 1.2.0.0
|
||||||
synopsis: Machine learning evaluation tools
|
synopsis: Machine learning evaluation tools
|
||||||
description: Please see README.md
|
description: Please see README.md
|
||||||
homepage: http://github.com/name/project
|
homepage: http://github.com/name/project
|
||||||
|
@ -96,7 +96,7 @@ defaultLogLossHashedSize = 10
|
|||||||
-- | evaluation metric
|
-- | evaluation metric
|
||||||
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI
|
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI
|
||||||
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
||||||
| BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE
|
| BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double
|
||||||
deriving (Eq)
|
deriving (Eq)
|
||||||
|
|
||||||
instance Show Metric where
|
instance Show Metric where
|
||||||
@ -126,8 +126,7 @@ instance Show Metric where
|
|||||||
show BIOF1 = "BIO-F1"
|
show BIOF1 = "BIO-F1"
|
||||||
show BIOF1Labels = "BIO-F1-Labels"
|
show BIOF1Labels = "BIO-F1-Labels"
|
||||||
show MAE = "MAE"
|
show MAE = "MAE"
|
||||||
|
show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta)
|
||||||
|
|
||||||
|
|
||||||
instance Read Metric where
|
instance Read Metric where
|
||||||
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
||||||
@ -139,6 +138,9 @@ instance Read Metric where
|
|||||||
readsPrec p ('F':theRest) = case readsPrec p theRest of
|
readsPrec p ('F':theRest) = case readsPrec p theRest of
|
||||||
[(beta, theRest)] -> [(FMeasure beta, theRest)]
|
[(beta, theRest)] -> [(FMeasure beta, theRest)]
|
||||||
_ -> []
|
_ -> []
|
||||||
|
readsPrec p ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of
|
||||||
|
[(beta, theRest)] -> [(MultiLabelFMeasure beta, theRest)]
|
||||||
|
_ -> []
|
||||||
readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
|
readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
|
||||||
[(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
|
[(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
|
||||||
_ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
|
_ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
|
||||||
@ -173,6 +175,7 @@ getMetricOrdering Likelihood = TheHigherTheBetter
|
|||||||
getMetricOrdering BIOF1 = TheHigherTheBetter
|
getMetricOrdering BIOF1 = TheHigherTheBetter
|
||||||
getMetricOrdering BIOF1Labels = TheHigherTheBetter
|
getMetricOrdering BIOF1Labels = TheHigherTheBetter
|
||||||
getMetricOrdering MAE = TheLowerTheBetter
|
getMetricOrdering MAE = TheLowerTheBetter
|
||||||
|
getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter
|
||||||
|
|
||||||
isInputNeeded :: Metric -> Bool
|
isInputNeeded :: Metric -> Bool
|
||||||
isInputNeeded CharMatch = True
|
isInputNeeded CharMatch = True
|
||||||
@ -521,7 +524,6 @@ gevalCore' (FMeasure beta) _ = gevalCoreWithoutInput outParser outParser getCoun
|
|||||||
getCount (True, False) = (0, 1, 0)
|
getCount (True, False) = (0, 1, 0)
|
||||||
getCount (False, True) = (0, 0, 1)
|
getCount (False, True) = (0, 0, 1)
|
||||||
getCount (False, False) = (0, 0, 0)
|
getCount (False, False) = (0, 0, 0)
|
||||||
countAgg = CC.foldl countFolder (0, 0, 0)
|
|
||||||
|
|
||||||
gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep
|
gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep
|
||||||
where
|
where
|
||||||
@ -566,6 +568,13 @@ gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWit
|
|||||||
entities <- parseBioSequenceIntoEntities s
|
entities <- parseBioSequenceIntoEntities s
|
||||||
return $ Prelude.map eraseNormalisation entities
|
return $ Prelude.map eraseNormalisation entities
|
||||||
|
|
||||||
|
gevalCore' (MultiLabelFMeasure beta) _ = gevalCoreWithoutInput intoWords
|
||||||
|
intoWords
|
||||||
|
(getCounts (==))
|
||||||
|
countAgg
|
||||||
|
(fMeasureOnCounts beta)
|
||||||
|
where intoWords = Right . (Prelude.map unpack) . Data.Text.words
|
||||||
|
|
||||||
countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
|
countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
|
||||||
countAgg = CC.foldl countFolder (0, 0, 0)
|
countAgg = CC.foldl countFolder (0, 0, 0)
|
||||||
|
|
||||||
|
@ -229,6 +229,22 @@ The output should be given in the BIO format with the normalized forms given aft
|
|||||||
The metric is F1 counted on entities (not labels).
|
The metric is F1 counted on entities (not labels).
|
||||||
|] ++ (commonReadmeMDContents testName)
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
|
readmeMDContents (MultiLabelFMeasure beta) testName = [i|
|
||||||
|
Tag names and their component
|
||||||
|
=============================
|
||||||
|
|
||||||
|
Tag names and their components (first name/surname) in a text.
|
||||||
|
|
||||||
|
Tags:
|
||||||
|
* person
|
||||||
|
* surname
|
||||||
|
* first-name
|
||||||
|
|
||||||
|
For each tag a sequence of token IDs separated with commas should be given (after a colon).
|
||||||
|
|
||||||
|
The metric is F1 on labels.
|
||||||
|
|] ++ (commonReadmeMDContents testName)
|
||||||
|
|
||||||
readmeMDContents _ testName = [i|
|
readmeMDContents _ testName = [i|
|
||||||
GEval sample challenge
|
GEval sample challenge
|
||||||
======================
|
======================
|
||||||
@ -324,6 +340,10 @@ trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surnam
|
|||||||
O O O O O There is no name here
|
O O O O O There is no name here
|
||||||
B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman
|
B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman
|
||||||
|]
|
|]
|
||||||
|
trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person:3,4,5 first-name:4 surname:5
|
||||||
|
Steven bloody Brown person:1,3 first-name:1 surname:3
|
||||||
|
James and James first-name:1 firstname:3
|
||||||
|
|]
|
||||||
trainContents _ = [hereLit|0.06 0.39 0 0.206
|
trainContents _ = [hereLit|0.06 0.39 0 0.206
|
||||||
1.00 1.00 1 0.017
|
1.00 1.00 1 0.017
|
||||||
317.8 5.20 67 0.048
|
317.8 5.20 67 0.048
|
||||||
@ -365,6 +385,10 @@ devInContents BIOF1Labels = devInContents BIOF1
|
|||||||
devInContents BIOF1 = [hereLit|Adam and Eve
|
devInContents BIOF1 = [hereLit|Adam and Eve
|
||||||
Mr Jan Kowalski
|
Mr Jan Kowalski
|
||||||
|]
|
|]
|
||||||
|
devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here
|
||||||
|
I see him
|
||||||
|
Barbara
|
||||||
|
|]
|
||||||
devInContents _ = [hereLit|0.72 0 0.007
|
devInContents _ = [hereLit|0.72 0 0.007
|
||||||
9.54 62 0.054
|
9.54 62 0.054
|
||||||
|]
|
|]
|
||||||
@ -404,6 +428,10 @@ devExpectedContents BIOF1Labels = devExpectedContents BIOF1
|
|||||||
devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE
|
devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE
|
||||||
O B-firstname/JAN B-surname/KOWALSKI
|
O B-firstname/JAN B-surname/KOWALSKI
|
||||||
|]
|
|]
|
||||||
|
devExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,2 first-name:1 surname:2
|
||||||
|
|
||||||
|
first-name:1
|
||||||
|
|]
|
||||||
devExpectedContents _ = [hereLit|0.82
|
devExpectedContents _ = [hereLit|0.82
|
||||||
95.2
|
95.2
|
||||||
|]
|
|]
|
||||||
@ -445,6 +473,10 @@ testInContents BIOF1Labels = testInContents BIOF1
|
|||||||
testInContents BIOF1 = [hereLit|Alan Tring
|
testInContents BIOF1 = [hereLit|Alan Tring
|
||||||
No name here
|
No name here
|
||||||
|]
|
|]
|
||||||
|
testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith
|
||||||
|
Nobody is there
|
||||||
|
I saw Marketa
|
||||||
|
|]
|
||||||
testInContents _ = [hereLit|1.52 2 0.093
|
testInContents _ = [hereLit|1.52 2 0.093
|
||||||
30.06 14 0.009
|
30.06 14 0.009
|
||||||
|]
|
|]
|
||||||
@ -486,6 +518,10 @@ testExpectedContents BIOF1Labels = testExpectedContents BIOF1
|
|||||||
testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING
|
testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING
|
||||||
O O O
|
O O O
|
||||||
|]
|
|]
|
||||||
|
testExpectedContents (MultiLabelFMeasure _) = [hereLit|person:1,3 first-name:1 surname:3
|
||||||
|
|
||||||
|
first-name:3
|
||||||
|
|]
|
||||||
testExpectedContents _ = [hereLit|0.11
|
testExpectedContents _ = [hereLit|0.11
|
||||||
17.2
|
17.2
|
||||||
|]
|
|]
|
||||||
|
@ -3,7 +3,8 @@
|
|||||||
module GEval.PrecisionRecall(calculateMAPForOneResult,
|
module GEval.PrecisionRecall(calculateMAPForOneResult,
|
||||||
fMeasure, f1Measure, f2Measure, precision, recall,
|
fMeasure, f1Measure, f2Measure, precision, recall,
|
||||||
fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder,
|
fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder,
|
||||||
precisionAndRecall, precisionAndRecallFromCounts, maxMatch, maxMatchOnOrdered)
|
precisionAndRecall, precisionAndRecallFromCounts,
|
||||||
|
maxMatch, maxMatchOnOrdered, getCounts)
|
||||||
where
|
where
|
||||||
|
|
||||||
import GEval.Common
|
import GEval.Common
|
||||||
@ -30,8 +31,8 @@ f1Measure :: (a -> b -> Bool) -> [a] -> [b] -> Double
|
|||||||
f1Measure = fMeasure 1.0
|
f1Measure = fMeasure 1.0
|
||||||
|
|
||||||
-- | Calculates both generalized) F-measure
|
-- | Calculates both generalized) F-measure
|
||||||
fMeasure :: Double
|
fMeasure :: Double -- ^ beta parameter
|
||||||
-> (a -> b -> Bool) -- ^ beta parameter
|
-> (a -> b -> Bool) -- ^ function to check whether there is a match
|
||||||
-> [a] -- ^ the ground truth
|
-> [a] -- ^ the ground truth
|
||||||
-> [b] -- ^ what we got
|
-> [b] -- ^ what we got
|
||||||
-> Double -- ^ f-Measure
|
-> Double -- ^ f-Measure
|
||||||
@ -55,6 +56,11 @@ fMeasureOnCounts beta (tp, nbExpected, nbGot) =
|
|||||||
countFolder :: (Int, Int, Int) -> (Int, Int, Int) -> (Int, Int, Int)
|
countFolder :: (Int, Int, Int) -> (Int, Int, Int) -> (Int, Int, Int)
|
||||||
countFolder (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3)
|
countFolder (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3)
|
||||||
|
|
||||||
|
getCounts :: (a -> b -> Bool) -> ([a], [b]) -> (Int, Int, Int)
|
||||||
|
getCounts matchingFun (expected, got) = (maxMatch matchingFun expected got,
|
||||||
|
length expected,
|
||||||
|
length got)
|
||||||
|
|
||||||
-- | Calculates both precision and recall.
|
-- | Calculates both precision and recall.
|
||||||
--
|
--
|
||||||
-- (See https://en.wikipedia.org/wiki/Precision_and_recall)
|
-- (See https://en.wikipedia.org/wiki/Precision_and_recall)
|
||||||
|
@ -212,6 +212,11 @@ main = hspec $ do
|
|||||||
describe "Likelihood" $ do
|
describe "Likelihood" $ do
|
||||||
it "simple" $ do
|
it "simple" $ do
|
||||||
runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866
|
runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866
|
||||||
|
describe "MultiLabel-F" $ do
|
||||||
|
it "simple" $ do
|
||||||
|
runGEvalTest "multilabel-f1-simple" `shouldReturnAlmost` 0.66666666666
|
||||||
|
it "simple F2" $ do
|
||||||
|
runGEvalTest "multilabel-f2-simple" `shouldReturnAlmost` 0.441176470588235
|
||||||
describe "evaluating single lines" $ do
|
describe "evaluating single lines" $ do
|
||||||
it "RMSE" $ do
|
it "RMSE" $ do
|
||||||
gevalCoreOnSingleLines RMSE (LineInFile (FilePathSpec "stub1") 1 "blabla")
|
gevalCoreOnSingleLines RMSE (LineInFile (FilePathSpec "stub1") 1 "blabla")
|
||||||
|
@ -0,0 +1,4 @@
|
|||||||
|
foo bar baz
|
||||||
|
uuu
|
||||||
|
foo bar baz
|
||||||
|
qqq aaa
|
|
@ -0,0 +1 @@
|
|||||||
|
--metric MultiLabel-F1
|
@ -0,0 +1,4 @@
|
|||||||
|
foo bar baz
|
||||||
|
|
||||||
|
foo
|
||||||
|
qqq qqq
|
|
@ -0,0 +1,4 @@
|
|||||||
|
bar:2
|
||||||
|
other:6
|
||||||
|
xyz:12
|
||||||
|
t:6 t:8 t:9
|
|
@ -0,0 +1 @@
|
|||||||
|
--metric MultiLabel-F2
|
@ -0,0 +1,4 @@
|
|||||||
|
foo:1 bar:2 baz:3
|
||||||
|
other:5
|
||||||
|
|
||||||
|
t:6 t:7 t:8
|
|
Loading…
Reference in New Issue
Block a user