MultiLabel-F1 works on labels given with probs now
This commit is contained in:
parent
82bdf70031
commit
bd2bfde287
@ -28,6 +28,7 @@ library
|
|||||||
, GEval.LineByLine
|
, GEval.LineByLine
|
||||||
, GEval.BIO
|
, GEval.BIO
|
||||||
, GEval.ParseParams
|
, GEval.ParseParams
|
||||||
|
, GEval.ProbList
|
||||||
, Data.Conduit.AutoDecompress
|
, Data.Conduit.AutoDecompress
|
||||||
, Data.Conduit.SmartSource
|
, Data.Conduit.SmartSource
|
||||||
, Data.Conduit.Rank
|
, Data.Conduit.Rank
|
||||||
|
@ -78,6 +78,7 @@ import GEval.ClusteringMetrics
|
|||||||
import GEval.LogLossHashed
|
import GEval.LogLossHashed
|
||||||
import GEval.CharMatch
|
import GEval.CharMatch
|
||||||
import GEval.BIO
|
import GEval.BIO
|
||||||
|
import GEval.ProbList
|
||||||
import Data.Conduit.AutoDecompress
|
import Data.Conduit.AutoDecompress
|
||||||
|
|
||||||
import qualified Data.HashMap.Strict as M
|
import qualified Data.HashMap.Strict as M
|
||||||
@ -572,11 +573,14 @@ gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWit
|
|||||||
return $ Prelude.map eraseNormalisation entities
|
return $ Prelude.map eraseNormalisation entities
|
||||||
|
|
||||||
gevalCore' (MultiLabelFMeasure beta) _ = gevalCoreWithoutInput intoWords
|
gevalCore' (MultiLabelFMeasure beta) _ = gevalCoreWithoutInput intoWords
|
||||||
intoWords
|
getWords
|
||||||
(getCounts (==))
|
(getCounts (==))
|
||||||
countAgg
|
countAgg
|
||||||
(fMeasureOnCounts beta)
|
(fMeasureOnCounts beta)
|
||||||
where intoWords = Right . (Prelude.map unpack) . Data.Text.words
|
where
|
||||||
|
getWords = Right . (Prelude.map unpack) . selectByStandardThreshold . parseIntoProbList
|
||||||
|
intoWords = Right . (Prelude.map unpack) . Data.Text.words
|
||||||
|
|
||||||
|
|
||||||
countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
|
countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
|
||||||
countAgg = CC.foldl countFolder (0, 0, 0)
|
countAgg = CC.foldl countFolder (0, 0, 0)
|
||||||
|
65
src/GEval/ProbList.hs
Normal file
65
src/GEval/ProbList.hs
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
|
module GEval.ProbList
|
||||||
|
(parseIntoProbList, selectByStandardThreshold)
|
||||||
|
where
|
||||||
|
|
||||||
|
import qualified Data.Text as T
|
||||||
|
|
||||||
|
import GEval.Common
|
||||||
|
|
||||||
|
|
||||||
|
newtype Probability = P { getP :: Double }
|
||||||
|
deriving (Eq,Ord,Show)
|
||||||
|
|
||||||
|
isProbability :: Double -> Bool
|
||||||
|
isProbability p = 0.0 <= p && p <= 1.0
|
||||||
|
|
||||||
|
mkProbability :: Double -> Probability
|
||||||
|
mkProbability p
|
||||||
|
| isProbability p = P p
|
||||||
|
| otherwise = error $ show p ++ " is not in [0, 1]"
|
||||||
|
|
||||||
|
probabilityOne :: Probability
|
||||||
|
probabilityOne = mkProbability 1.0
|
||||||
|
|
||||||
|
data ProbList = ProbList [WordWithProb]
|
||||||
|
deriving (Show)
|
||||||
|
|
||||||
|
data WordWithProb = WordWithProb T.Text Probability
|
||||||
|
deriving (Show)
|
||||||
|
|
||||||
|
parseIntoWordWithProb :: T.Text -> WordWithProb
|
||||||
|
parseIntoWordWithProb t =
|
||||||
|
-- If anything is wrong the whole word is treated as a label,
|
||||||
|
-- even if it contains colons, digits, dots, etc.
|
||||||
|
if T.null wordSpecPart
|
||||||
|
then wordWithoutProb
|
||||||
|
else
|
||||||
|
if "." `T.isInfixOf` numberPart
|
||||||
|
then case textToDouble numberPart of
|
||||||
|
Right p -> if isProbability p
|
||||||
|
then WordWithProb wordSpecPart (mkProbability p)
|
||||||
|
else wordWithoutProb
|
||||||
|
Left _ -> wordWithoutProb
|
||||||
|
else wordWithoutProb
|
||||||
|
where
|
||||||
|
(wordSpecPart', numberPart) = T.breakOnEnd ":" t
|
||||||
|
wordWithoutProb = WordWithProb t probabilityOne
|
||||||
|
wordSpecPart = if T.null wordSpecPart'
|
||||||
|
then wordSpecPart'
|
||||||
|
else T.init wordSpecPart'
|
||||||
|
|
||||||
|
parseIntoProbList :: T.Text -> ProbList
|
||||||
|
parseIntoProbList = ProbList . map parseIntoWordWithProb . T.words
|
||||||
|
|
||||||
|
selectByThreshold :: Probability -> ProbList -> [T.Text]
|
||||||
|
selectByThreshold threshold (ProbList l) =
|
||||||
|
map (\(WordWithProb w _) -> w)
|
||||||
|
$ filter (\(WordWithProb _ p) -> p >= threshold) l
|
||||||
|
|
||||||
|
standardThreshold :: Double
|
||||||
|
standardThreshold = 0.5
|
||||||
|
|
||||||
|
selectByStandardThreshold :: ProbList -> [T.Text]
|
||||||
|
selectByStandardThreshold = selectByThreshold (mkProbability standardThreshold)
|
@ -220,6 +220,10 @@ main = hspec $ do
|
|||||||
runGEvalTest "multilabel-f1-simple" `shouldReturnAlmost` 0.66666666666
|
runGEvalTest "multilabel-f1-simple" `shouldReturnAlmost` 0.66666666666
|
||||||
it "simple F2" $ do
|
it "simple F2" $ do
|
||||||
runGEvalTest "multilabel-f2-simple" `shouldReturnAlmost` 0.441176470588235
|
runGEvalTest "multilabel-f2-simple" `shouldReturnAlmost` 0.441176470588235
|
||||||
|
it "labels given with probs" $ do
|
||||||
|
runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615
|
||||||
|
it "labels given with probs and numbers" $ do
|
||||||
|
runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666
|
||||||
describe "evaluating single lines" $ do
|
describe "evaluating single lines" $ do
|
||||||
it "RMSE" $ do
|
it "RMSE" $ do
|
||||||
gevalCoreOnSingleLines RMSE (LineInFile (FilePathSpec "stub1") 1 "blabla")
|
gevalCoreOnSingleLines RMSE (LineInFile (FilePathSpec "stub1") 1 "blabla")
|
||||||
|
@ -0,0 +1,3 @@
|
|||||||
|
foo:1 bar:2:0.7
|
||||||
|
|
||||||
|
foo
|
|
@ -0,0 +1 @@
|
|||||||
|
--metric MultiLabel-F1
|
@ -0,0 +1,3 @@
|
|||||||
|
foo:1 bar:2
|
||||||
|
|
||||||
|
foo:0
|
|
@ -0,0 +1,5 @@
|
|||||||
|
foo 90:0.751 bar:0.3
|
||||||
|
91:0.4 90:0.6
|
||||||
|
90:0.99
|
||||||
|
nic:0.3 baz:0.6
|
||||||
|
foo:0.87 baz:0.8111
|
|
@ -0,0 +1 @@
|
|||||||
|
--metric MultiLabel-F1
|
@ -0,0 +1,5 @@
|
|||||||
|
foo 90
|
||||||
|
91
|
||||||
|
90 bar
|
||||||
|
|
||||||
|
foo
|
|
Loading…
Reference in New Issue
Block a user