MultiLabel-F1 works on labels given with probs now
This commit is contained in:
parent
82bdf70031
commit
bd2bfde287
@ -28,6 +28,7 @@ library
|
||||
, GEval.LineByLine
|
||||
, GEval.BIO
|
||||
, GEval.ParseParams
|
||||
, GEval.ProbList
|
||||
, Data.Conduit.AutoDecompress
|
||||
, Data.Conduit.SmartSource
|
||||
, Data.Conduit.Rank
|
||||
|
@ -78,6 +78,7 @@ import GEval.ClusteringMetrics
|
||||
import GEval.LogLossHashed
|
||||
import GEval.CharMatch
|
||||
import GEval.BIO
|
||||
import GEval.ProbList
|
||||
import Data.Conduit.AutoDecompress
|
||||
|
||||
import qualified Data.HashMap.Strict as M
|
||||
@ -572,11 +573,14 @@ gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWit
|
||||
return $ Prelude.map eraseNormalisation entities
|
||||
|
||||
gevalCore' (MultiLabelFMeasure beta) _ = gevalCoreWithoutInput intoWords
|
||||
intoWords
|
||||
getWords
|
||||
(getCounts (==))
|
||||
countAgg
|
||||
(fMeasureOnCounts beta)
|
||||
where intoWords = Right . (Prelude.map unpack) . Data.Text.words
|
||||
where
|
||||
getWords = Right . (Prelude.map unpack) . selectByStandardThreshold . parseIntoProbList
|
||||
intoWords = Right . (Prelude.map unpack) . Data.Text.words
|
||||
|
||||
|
||||
countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
|
||||
countAgg = CC.foldl countFolder (0, 0, 0)
|
||||
|
65
src/GEval/ProbList.hs
Normal file
65
src/GEval/ProbList.hs
Normal file
@ -0,0 +1,65 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module GEval.ProbList
|
||||
(parseIntoProbList, selectByStandardThreshold)
|
||||
where
|
||||
|
||||
import qualified Data.Text as T
|
||||
|
||||
import GEval.Common
|
||||
|
||||
|
||||
newtype Probability = P { getP :: Double }
|
||||
deriving (Eq,Ord,Show)
|
||||
|
||||
isProbability :: Double -> Bool
|
||||
isProbability p = 0.0 <= p && p <= 1.0
|
||||
|
||||
mkProbability :: Double -> Probability
|
||||
mkProbability p
|
||||
| isProbability p = P p
|
||||
| otherwise = error $ show p ++ " is not in [0, 1]"
|
||||
|
||||
probabilityOne :: Probability
|
||||
probabilityOne = mkProbability 1.0
|
||||
|
||||
data ProbList = ProbList [WordWithProb]
|
||||
deriving (Show)
|
||||
|
||||
data WordWithProb = WordWithProb T.Text Probability
|
||||
deriving (Show)
|
||||
|
||||
parseIntoWordWithProb :: T.Text -> WordWithProb
|
||||
parseIntoWordWithProb t =
|
||||
-- If anything is wrong the whole word is treated as a label,
|
||||
-- even if it contains colons, digits, dots, etc.
|
||||
if T.null wordSpecPart
|
||||
then wordWithoutProb
|
||||
else
|
||||
if "." `T.isInfixOf` numberPart
|
||||
then case textToDouble numberPart of
|
||||
Right p -> if isProbability p
|
||||
then WordWithProb wordSpecPart (mkProbability p)
|
||||
else wordWithoutProb
|
||||
Left _ -> wordWithoutProb
|
||||
else wordWithoutProb
|
||||
where
|
||||
(wordSpecPart', numberPart) = T.breakOnEnd ":" t
|
||||
wordWithoutProb = WordWithProb t probabilityOne
|
||||
wordSpecPart = if T.null wordSpecPart'
|
||||
then wordSpecPart'
|
||||
else T.init wordSpecPart'
|
||||
|
||||
parseIntoProbList :: T.Text -> ProbList
|
||||
parseIntoProbList = ProbList . map parseIntoWordWithProb . T.words
|
||||
|
||||
selectByThreshold :: Probability -> ProbList -> [T.Text]
|
||||
selectByThreshold threshold (ProbList l) =
|
||||
map (\(WordWithProb w _) -> w)
|
||||
$ filter (\(WordWithProb _ p) -> p >= threshold) l
|
||||
|
||||
standardThreshold :: Double
|
||||
standardThreshold = 0.5
|
||||
|
||||
selectByStandardThreshold :: ProbList -> [T.Text]
|
||||
selectByStandardThreshold = selectByThreshold (mkProbability standardThreshold)
|
@ -220,6 +220,10 @@ main = hspec $ do
|
||||
runGEvalTest "multilabel-f1-simple" `shouldReturnAlmost` 0.66666666666
|
||||
it "simple F2" $ do
|
||||
runGEvalTest "multilabel-f2-simple" `shouldReturnAlmost` 0.441176470588235
|
||||
it "labels given with probs" $ do
|
||||
runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615
|
||||
it "labels given with probs and numbers" $ do
|
||||
runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666
|
||||
describe "evaluating single lines" $ do
|
||||
it "RMSE" $ do
|
||||
gevalCoreOnSingleLines RMSE (LineInFile (FilePathSpec "stub1") 1 "blabla")
|
||||
|
@ -0,0 +1,3 @@
|
||||
foo:1 bar:2:0.7
|
||||
|
||||
foo
|
|
@ -0,0 +1 @@
|
||||
--metric MultiLabel-F1
|
@ -0,0 +1,3 @@
|
||||
foo:1 bar:2
|
||||
|
||||
foo:0
|
|
@ -0,0 +1,5 @@
|
||||
foo 90:0.751 bar:0.3
|
||||
91:0.4 90:0.6
|
||||
90:0.99
|
||||
nic:0.3 baz:0.6
|
||||
foo:0.87 baz:0.8111
|
|
@ -0,0 +1 @@
|
||||
--metric MultiLabel-F1
|
@ -0,0 +1,5 @@
|
||||
foo 90
|
||||
91
|
||||
90 bar
|
||||
|
||||
foo
|
|
Loading…
Reference in New Issue
Block a user