Implement soft f-score

This commit is contained in:
Filip Graliński 2018-10-17 17:52:43 +02:00 committed by Filip Gralinski
parent 26ba5ae92a
commit 8735610745
15 changed files with 173 additions and 16 deletions

View File

@ -36,6 +36,7 @@ library
, GEval.FeatureExtractor
, GEval.WER
, Text.Tokenizer
, GEval.Annotation
, Paths_geval
build-depends: base >= 4.7 && < 5
, cond
@ -76,6 +77,7 @@ library
, uri-encode
, MissingH
, array
, Munkres
default-language: Haskell2010
executable geval

46
src/GEval/Annotation.hs Normal file
View File

@ -0,0 +1,46 @@
{-# LANGUAGE OverloadedStrings #-}
module GEval.Annotation
(parseAnnotations, Annotation(..), matchScore)
where
import qualified Data.IntSet as IS
import qualified Data.Text as T
import Data.Attoparsec.Text
import Data.Attoparsec.Combinator
import Control.Applicative
import GEval.Common (sepByWhitespaces, (/.))
import Data.Char
data Annotation = Annotation T.Text IS.IntSet
deriving (Eq, Show)
parseAnnotations :: T.Text -> Either String [Annotation]
parseAnnotations t = parseOnly (annotationsParser <* endOfInput) t
annotationsParser :: Parser [Annotation]
annotationsParser = sepByWhitespaces annotationParser
annotationParser :: Parser Annotation
annotationParser = do
label <- takeWhile1 (\c -> not (isSpace c) && c /= ':')
string ":"
intSet <- intSetParser
return $ Annotation label intSet
intSetParser :: Parser IS.IntSet
intSetParser = IS.unions <$> intervalParser `sepBy` (string ",")
intervalParser :: Parser IS.IntSet
intervalParser = do
startIx <- decimal
endIx <- (string "-" *> decimal <|> pure startIx)
pure $ IS.fromList [startIx..endIx]
matchScore :: Annotation -> Annotation -> Double
matchScore (Annotation labelA intSetA) (Annotation labelB intSetB)
| labelA == labelB = (intSetLength intersect) /. (intSetLength $ intSetA `IS.union` intSetB)
| otherwise = 0.0
where intSetLength = Prelude.length . IS.toList
intersect = intSetA `IS.intersection` intSetB

View File

@ -6,9 +6,20 @@ import Data.Text.Read as TR
import Data.Attoparsec.Text
(/.) :: (Eq a, Integral a) => a -> a -> Double
-- some operations can be "hard" (on ints) or "soft" (on doubles),
-- introduce a typeclass so that we could generalise easily
class ConvertibleToDouble n where
toDouble :: n -> Double
instance ConvertibleToDouble Double where
toDouble = id
instance ConvertibleToDouble Int where
toDouble = fromIntegral
(/.) :: (ConvertibleToDouble f, Integral a) => f -> a -> Double
x /. 0 = 1.0
x /. y = (fromIntegral x) / (fromIntegral y)
x /. y = (toDouble x) / (fromIntegral y)
safeDoubleDiv :: Double -> Double -> Double
safeDoubleDiv _ 0.0 = 0.0

View File

@ -71,6 +71,8 @@ import Data.Attoparsec.Text (parseOnly)
import Data.Conduit.SmartSource
import qualified Data.IntSet as IS
import GEval.BLEU
import GEval.Common
import GEval.ClippEU
@ -83,6 +85,7 @@ import GEval.ProbList
import GEval.WER
import Data.Conduit.AutoDecompress
import Text.Tokenizer
import GEval.Annotation
import qualified Data.HashMap.Strict as M
import qualified Data.Vector as V
@ -107,6 +110,7 @@ data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | C
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
| BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double
| MultiLabelLogLoss | MultiLabelLikelihood
| SoftFMeasure Double
deriving (Eq)
instance Show Metric where
@ -121,6 +125,7 @@ instance Show Metric where
show ClippEU = "ClippEU"
show (FMeasure beta) = "F" ++ (show beta)
show (MacroFMeasure beta) = "Macro-F" ++ (show beta)
show (SoftFMeasure beta) = "Soft-F" ++ (show beta)
show NMI = "NMI"
show (LogLossHashed nbOfBits) = "LogLossHashed" ++ (if
nbOfBits == defaultLogLossHashedSize
@ -165,6 +170,9 @@ instance Read Metric where
readsPrec p ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of
[(beta, theRest)] -> [(MultiLabelFMeasure beta, theRest)]
_ -> []
readsPrec p ('S':'o':'f':'t':'-':'F':theRest) = case readsPrec p theRest of
[(beta, theRest)] -> [(SoftFMeasure beta, theRest)]
_ -> []
readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
[(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
_ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
@ -198,6 +206,7 @@ getMetricOrdering Accuracy = TheHigherTheBetter
getMetricOrdering ClippEU = TheHigherTheBetter
getMetricOrdering (FMeasure _) = TheHigherTheBetter
getMetricOrdering (MacroFMeasure _) = TheHigherTheBetter
getMetricOrdering (SoftFMeasure _) = TheHigherTheBetter
getMetricOrdering NMI = TheHigherTheBetter
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
@ -586,6 +595,7 @@ gevalCore' (FMeasure beta) _ = gevalCoreWithoutInput outParser outParser getCoun
| prob >= detectionThreshold && prob <= 1.0 = Right True
| otherwise = Left "expected probability"
detectionThreshold = 0.5
getCount :: (Bool, Bool) -> (Int, Int, Int)
getCount (True, True) = (1, 1, 1)
getCount (True, False) = (0, 1, 0)
getCount (False, True) = (0, 0, 1)
@ -615,11 +625,20 @@ gevalCore' (MacroFMeasure beta) _ = gevalCoreWithoutInput (Right . Just . strip)
insertMaybeToMap (Just c) m = M.insertWith (+) c 1 m
macroAverageOnCounts (tpMap, expectedMap, gotMap) =
(Prelude.sum
$ Prelude.map (\c -> fMeasureOnCounts beta (M.lookupDefault 0 c tpMap,
$ Prelude.map (\c -> fMeasureOnCounts beta (M.lookupDefault (0::Int) c tpMap,
M.lookupDefault 0 c expectedMap,
M.lookupDefault 0 c gotMap))
$ M.keys expectedMap) / (fromIntegral $ Prelude.length $ M.keys expectedMap)
gevalCore' (SoftFMeasure beta) _ = gevalCoreWithoutInput parseAnnotations
parseAnnotations
getSoftCounts
countAgg
(fMeasureOnCounts beta)
where getSoftCounts (expected, got) = (weightedMaxMatch matchScore expected got,
Prelude.length expected,
Prelude.length got)
gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep
where
parseClippings = controlledParse lineClippingsParser
@ -680,8 +699,8 @@ gevalCore' MultiLabelLogLoss _ = gevalCoreWithoutInput intoWords
where
intoWords = Right . Data.Text.words
countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
countAgg = CC.foldl countFolder (0, 0, 0)
countAgg :: (Num n, Monad m) => ConduitM (n, Int, Int) o m (n, Int, Int)
countAgg = CC.foldl countFolder (fromInteger 0, 0, 0)
gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
(V.Vector (Double, Double) -> Double) -> -- ^ correlation function
@ -707,7 +726,7 @@ skipLineNumber fun = fun . snd
-- | A helper function to run evaluation when the input is not needed to calculate the metric value.
gevalCoreWithoutInput :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
(Text -> Either String a) -> -- ^ parser for values in the expected output
(Text -> Either String b) -> -- ^ parser for values in the output
(Text -> Either String b) -> -- ^ parser for values in the actual output
((a, b) -> c) -> -- ^ function which combines parsed values into a single value
-- (will be launched for each item, e.g. an error/cost function
-- could be calculated here)

View File

@ -110,6 +110,13 @@ GEval sample challenge — guess the language of a first name
This is a sample/toy classification challenge for Gonito framework with Macro-F-measure as the metric.
|] ++ (commonReadmeMDContents testName)
readmeMDContents (SoftFMeasure _) testName = [i|
GEval sample challenge mark numbers
=====================================
This is a sample/toy classification challenge for Gonito framework with Soft-F-measure as the metric.
|] ++ (commonReadmeMDContents testName)
readmeMDContents NMI testName = [i|
Cluster proverbs
================
@ -358,7 +365,10 @@ pl Kazimierz
en Matthew
en Richard
|]
trainContents (SoftFMeasure _) = [hereLit|indigits:8 I have 3 daughters
indigits:1-2 indigits:9-12 12 July 1812
inwords:11-13 I can see two dogs
|]
trainContents NMI = [hereLit|pl Kto pod kim dołki kopie, ten sam w nie wpada.
en The pen is mightier than the sword.
pl Baba z wozu, koniom lżej.
@ -424,6 +434,9 @@ devInContents (MacroFMeasure _) = [hereLit|Władysław
Steven
Helmut
|]
devInContents (SoftFMeasure _) = [hereLit|I have two kids
7 April 2003
|]
devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b)
devInContents (LogLossHashed _) = [hereLit|Nie kupuj w worku
Ona psa
@ -472,6 +485,9 @@ devExpectedContents (MacroFMeasure _) = [hereLit|pl
en
de
|]
devExpectedContents (SoftFMeasure _) = [hereLit|inwords:8-10
indigits:1 indigits:9-12
|]
devExpectedContents NMI = [hereLit|en
pl
en
@ -524,6 +540,9 @@ testInContents (MacroFMeasure _) = [hereLit|Arkadiusz
Heinrich
Henry
|]
testInContents (SoftFMeasure _) = [hereLit|Nothing
Four sides
|]
testInContents NMI = [hereLit|Fortune favors the bold.
People who live in glass houses should not throw stones.
W marcu, jak w garncu.
@ -575,6 +594,9 @@ testExpectedContents (MacroFMeasure _) = [hereLit|pl
de
en
|]
testExpectedContents (SoftFMeasure _) = [hereLit|
inwords:1-4
|]
testExpectedContents NMI = [hereLit|en
en
pl

View File

@ -169,7 +169,7 @@ metricReader = many $ option auto -- actually `some` should be used inst
( long "metric" -- --metric might be in the config.txt file...
<> short 'm'
<> metavar "METRIC"
<> help "Metric to be used - RMSE, MSE, Pearson, Spearman, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), macro F-measure (specify as Macro-F1, Macro-F2, Macro-F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels or CharMatch" )
<> help "Metric to be used - RMSE, MSE, Pearson, Spearman, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), macro F-measure (specify as Macro-F1, Macro-F2, Macro-F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels, soft F-measure (specify as Soft-F1, Soft-F2, Soft-F0.25) or CharMatch" )
altMetricReader :: Parser (Maybe Metric)
altMetricReader = optional $ option auto

View File

@ -1,10 +1,11 @@
{-# LANGUAGE PartialTypeSignatures #-}
{-# LANGUAGE ScopedTypeVariables #-}
module GEval.PrecisionRecall(calculateMAPForOneResult,
fMeasure, f1Measure, f2Measure, precision, recall,
fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder,
precisionAndRecall, precisionAndRecallFromCounts,
maxMatch, maxMatchOnOrdered, getCounts)
maxMatch, maxMatchOnOrdered, getCounts, weightedMaxMatch)
where
import GEval.Common
@ -14,9 +15,13 @@ import Data.Graph.Inductive.Query.MaxFlow
import Data.List (nub, foldl')
import Data.Algorithm.Munkres
import qualified Data.Array.IArray as DAI
calculateMAPForOneResult :: (Eq a) => [a] -> [a] -> Double
calculateMAPForOneResult expected got = precisionSum / fromIntegral (length expected)
where (_, _, precisionSum) = calculateMAPForOneResultCore expected (nub got)
where (_::Int, _, precisionSum) = calculateMAPForOneResultCore expected (nub got)
calculateMAPForOneResultCore expected got = foldl' (oneMAPStep expected) (0, 0, 0.0) got
oneMAPStep expected (gotCount, allCount, precisionSum) gotItem
| gotItem `elem` expected = (newGotCount, newAllCount, precisionSum + (newGotCount /. newAllCount))
@ -41,19 +46,19 @@ fMeasure beta matchingFun expected got =
where betaSquared = beta ^ 2
(p, r) = precisionAndRecall matchingFun expected got
f2MeasureOnCounts :: (Int, Int, Int) -> Double
f2MeasureOnCounts :: ConvertibleToDouble n => (n, Int, Int) -> Double
f2MeasureOnCounts = fMeasureOnCounts 2.0
f1MeasureOnCounts :: (Int, Int, Int) -> Double
f1MeasureOnCounts :: ConvertibleToDouble n => (n, Int, Int) -> Double
f1MeasureOnCounts = fMeasureOnCounts 1.0
fMeasureOnCounts :: Double -> (Int, Int, Int) -> Double
fMeasureOnCounts :: ConvertibleToDouble n => Double -> (n, Int, Int) -> Double
fMeasureOnCounts beta (tp, nbExpected, nbGot) =
(1 + betaSquared) * p * r `safeDoubleDiv` (betaSquared * p + r)
where betaSquared = beta ^ 2
(p, r) = precisionAndRecallFromCounts (tp, nbExpected, nbGot)
countFolder :: (Int, Int, Int) -> (Int, Int, Int) -> (Int, Int, Int)
countFolder :: Num n => (n, Int, Int) -> (n, Int, Int) -> (n, Int, Int)
countFolder (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3)
getCounts :: (a -> b -> Bool) -> ([a], [b]) -> (Int, Int, Int)
@ -72,7 +77,7 @@ precisionAndRecall matchFun expected got
= precisionAndRecallFromCounts (tp, length expected, length got)
where tp = maxMatch matchFun expected got
precisionAndRecallFromCounts :: (Int, Int, Int) -> (Double, Double)
precisionAndRecallFromCounts :: ConvertibleToDouble n => (n, Int, Int) -> (Double, Double)
precisionAndRecallFromCounts (tp, nbExpected, nbGot) =
(tp /. nbGot, tp /. nbExpected)
@ -116,3 +121,16 @@ buildGraph matchFun expected got = (b, e, g)
return (b,e)
where expectedIxs = [2..1+(length expected)]
gotIxs = [2+(length expected)..1+(length expected)+(length got)]
-- the weight are assumed to be between 0.0 and 1.0
weightedMaxMatch :: (a -> b -> Double) -> [a] -> [b] -> Double
weightedMaxMatch matchFun expected got = (fromIntegral $ length matching) - score
where (matching, score) = hungarianMethodDouble complementWeightArray
-- unfortunately `hungarianMethodDouble` looks
-- for minimal bipartite matching
-- rather than the maximal one
complementWeightArray = DAI.array ((1, 1), (m, n)) weightList
m = length expected
n = length got
weightList = [((i, j), 1.0 - (matchFun x y)) | (i, x) <- zip [1..m] expected,
(j, y) <- zip [1..n] got]

View File

@ -1,5 +1,5 @@
flags: {}
packages:
- '.'
extra-deps: [murmur3-1.0.3,naturalcomp-0.0.3]
extra-deps: [murmur3-1.0.3,naturalcomp-0.0.3,Munkres-0.1]
resolver: lts-11.9

View File

@ -18,6 +18,7 @@ import Data.Attoparsec.Text
import Options.Applicative
import Data.Text
import Text.EditDistance
import GEval.Annotation
import Data.Map.Strict
@ -32,6 +33,8 @@ import System.IO.Silently
import qualified Test.HUnit as HU
import qualified Data.IntSet as IS
import Data.Conduit.SmartSource
import Data.Conduit.Rank
import qualified Data.Conduit.Text as CT
@ -185,6 +188,10 @@ main = hspec $ do
precision alwaysMatch ['a', 'b', 'c'] [0, 1, 2, 3, 4, 5] `shouldBeAlmost` 0.5
recall alwaysMatch ['a', 'b', 'c'] [0, 1, 2, 3, 4, 5] `shouldBeAlmost` 1.0
f1Measure alwaysMatch ['a', 'b', 'c'] [0, 1, 2, 3 , 4, 5] `shouldBeAlmost` 0.66666666666666
describe "max match" $ do
it "simple" $ do
maxMatch (==) [1,2,2] [3,2] `shouldBe` 1
maxMatch (==) [3,2] [1,2,2] `shouldBe` 1
describe "ClippEU" $ do
it "parsing rectangles" $ do
let (Right r) = parseOnly (lineClippingsParser <* endOfInput) "2/0,0,2,3 10/20,30,40,50 18/0,1,500,3 "
@ -214,6 +221,11 @@ main = hspec $ do
read "F2" `shouldBe` (FMeasure 2.0)
read "F1" `shouldBe` (FMeasure 1.0)
read "F0.5" `shouldBe` (FMeasure 0.5)
describe "Soft-F1" $ do
it "simple test" $ do
runGEvalTest "soft-f1-simple" `shouldReturnAlmost` 0.33333333333333
it "perfect test" $ do
runGEvalTest "soft-f1-perfect" `shouldReturnAlmost` 1.0
describe "test edit-distance library" $ do
it "for handling UTF8" $ do
levenshteinDistance defaultEditCosts "źdźbło" "źd好bło" `shouldBe` 1
@ -261,6 +273,19 @@ main = hspec $ do
gevalCoreOnSingleLines RMSE id (LineInFile (FilePathSpec "stub1") 1 "blabla")
(LineInFile (FilePathSpec "stub2") 1 "3.4")
(LineInFile (FilePathSpec "stub3") 1 "2.6") `shouldReturnAlmost` 0.8
describe "Annotation format" $ do
it "just parse" $ do
parseAnnotations "foo:3,7-10 baz:4-6" `shouldBe` Right [Annotation "foo" (IS.fromList [3,7,8,9,10]),
Annotation "baz" (IS.fromList [4,5,6])]
it "empty" $ do
parseAnnotations "" `shouldBe` Right []
it "empty (just spaces)" $ do
parseAnnotations " " `shouldBe` Right []
it "match score" $ do
matchScore (Annotation "x" (IS.fromList [3..6])) (Annotation "y" (IS.fromList [3..6])) `shouldBeAlmost` 0.0
matchScore (Annotation "x" (IS.fromList [3..6])) (Annotation "x" (IS.fromList [3..6])) `shouldBeAlmost` 1.0
matchScore (Annotation "x" (IS.fromList [123..140])) (Annotation "x" (IS.fromList [125..130])) `shouldBeAlmost` 0.33333
matchScore (Annotation "x" (IS.fromList [3..4])) (Annotation "x" (IS.fromList [2..13])) `shouldBeAlmost` 0.1666666
describe "BIO format" $ do
it "just parse" $ do
let (Right r) = parseOnly (bioSequenceParser <* endOfInput) "O B-city/NEW_YORK I-city B-city/KALISZ I-city O B-name"

View File

@ -0,0 +1,3 @@
foo:3-4 bar:5-11 foo:12
baz:10-16 xyz:123-140
1 foo:3-4 bar:5-11 foo:12
2 baz:10-16 xyz:123-140

View File

@ -0,0 +1 @@
--metric Soft-F1

View File

@ -0,0 +1,3 @@
foo:3-4 bar:5-11 foo:12
baz:10-16 xyz:123-140
1 foo:3-4 bar:5-11 foo:12
2 baz:10-16 xyz:123-140

View File

@ -0,0 +1,3 @@
foo:2-13
bar:3-7
baz:10-16 foo:123-140
1 foo:2-13
2 bar:3-7
3 baz:10-16 foo:123-140

View File

@ -0,0 +1 @@
--metric Soft-F1

View File

@ -0,0 +1,3 @@
foo:3-4 baz:3-7 foo:12
baz:10-16 foo:125-130
1 foo:3-4 baz:3-7 foo:12
2 baz:10-16 foo:125-130