Implement soft f-score

2018-10-17 17:52:43 +02:00 · 2018-10-17 17:52:43 +02:00 · 8735610745
commit 8735610745
parent 26ba5ae92a
15 changed files with 173 additions and 16 deletions
--- a/geval.cabal
+++ b/geval.cabal
@ -36,6 +36,7 @@ library
                     , GEval.FeatureExtractor
                     , GEval.WER
                     , Text.Tokenizer
+                     , GEval.Annotation
                     , Paths_geval
  build-depends:       base >= 4.7 && < 5
                     , cond
@ -76,6 +77,7 @@ library
                     , uri-encode
                     , MissingH
                     , array
+                     , Munkres
  default-language:    Haskell2010

 executable geval
--- a/src/GEval/Annotation.hs
+++ b/src/GEval/Annotation.hs
@ -0,0 +1,46 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module GEval.Annotation
+       (parseAnnotations, Annotation(..), matchScore)
+       where
+
+import qualified Data.IntSet as IS
+import qualified Data.Text as T
+
+import Data.Attoparsec.Text
+import Data.Attoparsec.Combinator
+import Control.Applicative
+import GEval.Common (sepByWhitespaces, (/.))
+import Data.Char
+
+data Annotation = Annotation T.Text IS.IntSet
+                  deriving (Eq, Show)
+
+parseAnnotations :: T.Text -> Either String [Annotation]
+parseAnnotations t = parseOnly (annotationsParser <* endOfInput) t
+
+annotationsParser :: Parser [Annotation]
+annotationsParser = sepByWhitespaces annotationParser
+
+annotationParser :: Parser Annotation
+annotationParser = do
+  label <- takeWhile1 (\c -> not (isSpace c) && c /= ':')
+  string ":"
+  intSet <- intSetParser
+  return $ Annotation label intSet
+
+intSetParser :: Parser IS.IntSet
+intSetParser = IS.unions <$> intervalParser `sepBy` (string ",")
+
+intervalParser :: Parser IS.IntSet
+intervalParser = do
+  startIx <- decimal
+  endIx <- (string "-" *> decimal <|> pure startIx)
+  pure $ IS.fromList [startIx..endIx]
+
+matchScore :: Annotation -> Annotation -> Double
+matchScore (Annotation labelA intSetA) (Annotation labelB intSetB)
+  | labelA == labelB = (intSetLength intersect) /. (intSetLength $ intSetA `IS.union` intSetB)
+  | otherwise = 0.0
+  where intSetLength = Prelude.length . IS.toList
+        intersect = intSetA `IS.intersection` intSetB
--- a/src/GEval/Common.hs
+++ b/src/GEval/Common.hs
@ -6,9 +6,20 @@ import Data.Text.Read as TR

 import Data.Attoparsec.Text

-(/.) :: (Eq a, Integral a) => a -> a -> Double
+-- some operations can be "hard" (on ints) or "soft" (on doubles),
+-- introduce a typeclass so that we could generalise easily
+class ConvertibleToDouble n where
+  toDouble :: n -> Double
+
+instance ConvertibleToDouble Double where
+  toDouble = id
+
+instance ConvertibleToDouble Int where
+  toDouble = fromIntegral
+
+(/.) :: (ConvertibleToDouble f, Integral a) => f -> a -> Double
 x /. 0 = 1.0
-x /. y = (fromIntegral x) / (fromIntegral y)
+x /. y = (toDouble x) / (fromIntegral y)

 safeDoubleDiv :: Double -> Double -> Double
 safeDoubleDiv _ 0.0 = 0.0
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@ -71,6 +71,8 @@ import Data.Attoparsec.Text (parseOnly)

 import Data.Conduit.SmartSource

+import qualified Data.IntSet as IS
+
 import GEval.BLEU
 import GEval.Common
 import GEval.ClippEU
@ -83,6 +85,7 @@ import GEval.ProbList
 import GEval.WER
 import Data.Conduit.AutoDecompress
 import Text.Tokenizer
+import GEval.Annotation

 import qualified Data.HashMap.Strict as M
 import qualified Data.Vector as V
@ -107,6 +110,7 @@ data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | C
              | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
              | BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double
              | MultiLabelLogLoss | MultiLabelLikelihood
+              | SoftFMeasure Double
              deriving (Eq)

 instance Show Metric where
@ -121,6 +125,7 @@ instance Show Metric where
  show ClippEU = "ClippEU"
  show (FMeasure beta) = "F" ++ (show beta)
  show (MacroFMeasure beta) = "Macro-F" ++ (show beta)
+  show (SoftFMeasure beta) = "Soft-F" ++ (show beta)
  show NMI = "NMI"
  show (LogLossHashed nbOfBits) = "LogLossHashed" ++ (if
                                                       nbOfBits == defaultLogLossHashedSize
@ -165,6 +170,9 @@ instance Read Metric where
  readsPrec p ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of
    [(beta, theRest)] -> [(MultiLabelFMeasure beta, theRest)]
    _ -> []
+  readsPrec p ('S':'o':'f':'t':'-':'F':theRest) = case readsPrec p theRest of
+    [(beta, theRest)] -> [(SoftFMeasure beta, theRest)]
+    _ -> []
  readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
    [(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
    _ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
@ -198,6 +206,7 @@ getMetricOrdering Accuracy = TheHigherTheBetter
 getMetricOrdering ClippEU  = TheHigherTheBetter
 getMetricOrdering (FMeasure _) = TheHigherTheBetter
 getMetricOrdering (MacroFMeasure _) = TheHigherTheBetter
+getMetricOrdering (SoftFMeasure _) = TheHigherTheBetter
 getMetricOrdering NMI = TheHigherTheBetter
 getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
 getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
@ -586,6 +595,7 @@ gevalCore' (FMeasure beta) _ = gevalCoreWithoutInput outParser outParser getCoun
          | prob >= detectionThreshold && prob <= 1.0 = Right True
          | otherwise = Left "expected probability"
        detectionThreshold = 0.5
+        getCount :: (Bool, Bool) -> (Int, Int, Int)
        getCount (True, True)   = (1, 1, 1)
        getCount (True, False)  = (0, 1, 0)
        getCount (False, True)  = (0, 0, 1)
@ -615,11 +625,20 @@ gevalCore' (MacroFMeasure beta) _ = gevalCoreWithoutInput (Right . Just . strip)
                            insertMaybeToMap (Just c) m = M.insertWith (+) c 1 m
                            macroAverageOnCounts (tpMap, expectedMap, gotMap) =
                              (Prelude.sum
-                               $ Prelude.map (\c -> fMeasureOnCounts beta (M.lookupDefault 0 c tpMap,
+                               $ Prelude.map (\c -> fMeasureOnCounts beta (M.lookupDefault (0::Int) c tpMap,
                                                                         M.lookupDefault 0 c expectedMap,
                                                                         M.lookupDefault 0 c gotMap))
                               $ M.keys expectedMap) / (fromIntegral $ Prelude.length $ M.keys expectedMap)

+gevalCore' (SoftFMeasure beta) _ = gevalCoreWithoutInput parseAnnotations
+                                                         parseAnnotations
+                                                         getSoftCounts
+                                                         countAgg
+                                                         (fMeasureOnCounts beta)
+                      where getSoftCounts (expected, got) = (weightedMaxMatch matchScore expected got,
+                                                             Prelude.length expected,
+                                                             Prelude.length got)
+
 gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep
  where
    parseClippings = controlledParse lineClippingsParser
@ -680,8 +699,8 @@ gevalCore' MultiLabelLogLoss _ = gevalCoreWithoutInput intoWords
    where
      intoWords = Right . Data.Text.words

-countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
-countAgg = CC.foldl countFolder (0, 0, 0)
+countAgg :: (Num n, Monad m) => ConduitM (n, Int, Int) o m (n, Int, Int)
+countAgg = CC.foldl countFolder (fromInteger 0, 0, 0)

 gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
                                (V.Vector (Double, Double) -> Double) -> -- ^ correlation function
@ -707,7 +726,7 @@ skipLineNumber fun = fun . snd
 -- | A helper function to run evaluation when the input is not needed to calculate the metric value.
 gevalCoreWithoutInput :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
                      (Text -> Either String a) ->  -- ^ parser for values in the expected output
-                      (Text -> Either String b) ->  -- ^ parser for values in the output
+                      (Text -> Either String b) ->  -- ^ parser for values in the actual output
                      ((a, b) -> c) ->              -- ^ function which combines parsed values into a single value
                                                  -- (will be launched for each item, e.g. an error/cost function
                                                  -- could be calculated here)
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@ -110,6 +110,13 @@ GEval sample challenge — guess the language of a first name
 This is a sample/toy classification challenge for Gonito framework with Macro-F-measure as the metric.
 |] ++ (commonReadmeMDContents testName)

+readmeMDContents (SoftFMeasure _) testName = [i|
+GEval sample challenge — mark numbers
+=====================================
+
+This is a sample/toy classification challenge for Gonito framework with Soft-F-measure as the metric.
+|] ++ (commonReadmeMDContents testName)
+
 readmeMDContents NMI testName = [i|
 Cluster proverbs
 ================
@ -358,7 +365,10 @@ pl	Kazimierz
 en	Matthew
 en	Richard
 |]
-
+trainContents (SoftFMeasure _) = [hereLit|indigits:8	I have 3 daughters
+indigits:1-2 indigits:9-12	12 July 1812
+inwords:11-13	I can see two dogs
+|]
 trainContents NMI = [hereLit|pl	Kto pod kim dołki kopie, ten sam w nie wpada.
 en	The pen is mightier than the sword.
 pl	Baba z wozu, koniom lżej.
@ -424,6 +434,9 @@ devInContents (MacroFMeasure _) = [hereLit|Władysław
 Steven
 Helmut
 |]
+devInContents (SoftFMeasure _) = [hereLit|I have two kids
+7 April 2003
+|]
 devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b)
 devInContents (LogLossHashed _) = [hereLit|Nie kupuj	w worku
 Ona	psa
@ -472,6 +485,9 @@ devExpectedContents (MacroFMeasure _) = [hereLit|pl
 en
 de
 |]
+devExpectedContents (SoftFMeasure _) = [hereLit|inwords:8-10
+indigits:1 indigits:9-12
+|]
 devExpectedContents NMI = [hereLit|en
 pl
 en
@ -524,6 +540,9 @@ testInContents (MacroFMeasure _) = [hereLit|Arkadiusz
 Heinrich
 Henry
 |]
+testInContents (SoftFMeasure _) = [hereLit|Nothing
+Four sides
+|]
 testInContents NMI = [hereLit|Fortune favors the bold.
 People who live in glass houses should not throw stones.
 W marcu, jak w garncu.
@ -575,6 +594,9 @@ testExpectedContents (MacroFMeasure _) = [hereLit|pl
 de
 en
 |]
+testExpectedContents (SoftFMeasure _) = [hereLit|
+inwords:1-4
+|]
 testExpectedContents NMI = [hereLit|en
 en
 pl
--- a/src/GEval/OptionsParser.hs
+++ b/src/GEval/OptionsParser.hs
@ -169,7 +169,7 @@ metricReader = many $ option auto         -- actually `some` should be used inst
               ( long "metric"            -- --metric might be in the config.txt file...
                 <> short 'm'
                 <> metavar "METRIC"
-                 <> help "Metric to be used - RMSE, MSE, Pearson, Spearman, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), macro F-measure (specify as Macro-F1, Macro-F2, Macro-F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels or CharMatch" )
+                 <> help "Metric to be used - RMSE, MSE, Pearson, Spearman, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), macro F-measure (specify as Macro-F1, Macro-F2, Macro-F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels, soft F-measure (specify as Soft-F1, Soft-F2, Soft-F0.25) or CharMatch" )

 altMetricReader :: Parser (Maybe Metric)
 altMetricReader = optional $ option auto
--- a/src/GEval/PrecisionRecall.hs
+++ b/src/GEval/PrecisionRecall.hs
@ -1,10 +1,11 @@
 {-# LANGUAGE PartialTypeSignatures #-}
+{-# LANGUAGE ScopedTypeVariables #-}

 module GEval.PrecisionRecall(calculateMAPForOneResult,
                             fMeasure, f1Measure, f2Measure, precision, recall,
                             fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder,
                             precisionAndRecall, precisionAndRecallFromCounts,
-                             maxMatch, maxMatchOnOrdered, getCounts)
+                             maxMatch, maxMatchOnOrdered, getCounts, weightedMaxMatch)
       where

 import GEval.Common
@ -14,9 +15,13 @@ import Data.Graph.Inductive.Query.MaxFlow

 import Data.List (nub, foldl')

+import Data.Algorithm.Munkres
+import qualified Data.Array.IArray as DAI
+
+
 calculateMAPForOneResult :: (Eq a) => [a] -> [a] -> Double
 calculateMAPForOneResult expected got = precisionSum / fromIntegral (length expected)
-  where (_, _, precisionSum) = calculateMAPForOneResultCore expected (nub got)
+  where (_::Int, _, precisionSum) = calculateMAPForOneResultCore expected (nub got)
        calculateMAPForOneResultCore expected got = foldl' (oneMAPStep expected) (0, 0, 0.0) got
        oneMAPStep expected (gotCount, allCount, precisionSum) gotItem
          | gotItem `elem` expected = (newGotCount, newAllCount, precisionSum + (newGotCount /. newAllCount))
@ -41,19 +46,19 @@ fMeasure beta matchingFun expected got =
  where betaSquared = beta ^ 2
        (p, r) = precisionAndRecall matchingFun expected got

-f2MeasureOnCounts :: (Int, Int, Int) -> Double
+f2MeasureOnCounts :: ConvertibleToDouble n => (n, Int, Int) -> Double
 f2MeasureOnCounts = fMeasureOnCounts 2.0

-f1MeasureOnCounts :: (Int, Int, Int) -> Double
+f1MeasureOnCounts :: ConvertibleToDouble n => (n, Int, Int) -> Double
 f1MeasureOnCounts = fMeasureOnCounts 1.0

-fMeasureOnCounts :: Double -> (Int, Int, Int) -> Double
+fMeasureOnCounts :: ConvertibleToDouble n => Double -> (n, Int, Int) -> Double
 fMeasureOnCounts beta (tp, nbExpected, nbGot) =
  (1 + betaSquared) * p * r `safeDoubleDiv` (betaSquared * p + r)
  where betaSquared = beta ^ 2
        (p, r) = precisionAndRecallFromCounts (tp, nbExpected, nbGot)

-countFolder :: (Int, Int, Int) -> (Int, Int, Int) -> (Int, Int, Int)
+countFolder :: Num n => (n, Int, Int) -> (n, Int, Int) -> (n, Int, Int)
 countFolder (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3)

 getCounts :: (a -> b -> Bool) -> ([a], [b]) -> (Int, Int, Int)
@ -72,7 +77,7 @@ precisionAndRecall matchFun expected got
  = precisionAndRecallFromCounts (tp, length expected, length got)
    where tp = maxMatch matchFun expected got

-precisionAndRecallFromCounts :: (Int, Int, Int) -> (Double, Double)
+precisionAndRecallFromCounts :: ConvertibleToDouble n => (n, Int, Int) -> (Double, Double)
 precisionAndRecallFromCounts (tp, nbExpected, nbGot) =
  (tp /. nbGot, tp /. nbExpected)

@ -116,3 +121,16 @@ buildGraph matchFun expected got = (b, e, g)
                return (b,e)
                where expectedIxs = [2..1+(length expected)]
                      gotIxs = [2+(length expected)..1+(length expected)+(length got)]
+
+-- the weight are assumed to be between 0.0 and 1.0
+weightedMaxMatch :: (a -> b -> Double) -> [a] -> [b] -> Double
+weightedMaxMatch matchFun expected got = (fromIntegral $ length matching) - score
+   where (matching, score) = hungarianMethodDouble complementWeightArray
+                             -- unfortunately `hungarianMethodDouble` looks
+                             -- for minimal bipartite matching
+                             -- rather than the maximal one
+         complementWeightArray = DAI.array ((1, 1), (m, n)) weightList
+         m = length expected
+         n = length got
+         weightList = [((i, j), 1.0 - (matchFun x y)) | (i, x) <- zip [1..m] expected,
+                                                        (j, y) <- zip [1..n] got]
--- a/stack.yaml
+++ b/stack.yaml
@ -1,5 +1,5 @@
 flags: {}
 packages:
 - '.'
-extra-deps: [murmur3-1.0.3,naturalcomp-0.0.3]
+extra-deps: [murmur3-1.0.3,naturalcomp-0.0.3,Munkres-0.1]
 resolver: lts-11.9
--- a/test/Spec.hs
+++ b/test/Spec.hs
@ -18,6 +18,7 @@ import Data.Attoparsec.Text
 import Options.Applicative
 import Data.Text
 import Text.EditDistance
+import GEval.Annotation

 import Data.Map.Strict

@ -32,6 +33,8 @@ import System.IO.Silently

 import qualified Test.HUnit as HU

+import qualified Data.IntSet as IS
+
 import Data.Conduit.SmartSource
 import Data.Conduit.Rank
 import qualified Data.Conduit.Text as CT
@ -185,6 +188,10 @@ main = hspec $ do
      precision alwaysMatch ['a', 'b', 'c'] [0, 1, 2, 3, 4, 5] `shouldBeAlmost` 0.5
      recall alwaysMatch ['a', 'b', 'c'] [0, 1, 2, 3, 4, 5] `shouldBeAlmost` 1.0
      f1Measure alwaysMatch ['a', 'b', 'c'] [0, 1, 2, 3 , 4, 5] `shouldBeAlmost` 0.66666666666666
+  describe "max match" $ do
+    it "simple" $ do
+      maxMatch (==) [1,2,2] [3,2] `shouldBe` 1
+      maxMatch (==) [3,2] [1,2,2] `shouldBe` 1
  describe "ClippEU" $ do
    it "parsing rectangles" $ do
      let (Right r) = parseOnly (lineClippingsParser <* endOfInput) "2/0,0,2,3 10/20,30,40,50 18/0,1,500,3 "
@ -214,6 +221,11 @@ main = hspec $ do
      read "F2" `shouldBe` (FMeasure 2.0)
      read "F1" `shouldBe` (FMeasure 1.0)
      read "F0.5" `shouldBe` (FMeasure 0.5)
+  describe "Soft-F1" $ do
+    it "simple test" $ do
+      runGEvalTest "soft-f1-simple" `shouldReturnAlmost` 0.33333333333333
+    it "perfect test" $ do
+      runGEvalTest "soft-f1-perfect" `shouldReturnAlmost` 1.0
  describe "test edit-distance library" $ do
    it "for handling UTF8" $ do
      levenshteinDistance defaultEditCosts "źdźbło" "źd好bło" `shouldBe` 1
@ -261,6 +273,19 @@ main = hspec $ do
      gevalCoreOnSingleLines RMSE id (LineInFile (FilePathSpec "stub1") 1 "blabla")
                                     (LineInFile (FilePathSpec "stub2") 1 "3.4")
                                     (LineInFile (FilePathSpec "stub3") 1 "2.6") `shouldReturnAlmost` 0.8
+  describe "Annotation format" $ do
+    it "just parse" $ do
+      parseAnnotations "foo:3,7-10 baz:4-6" `shouldBe` Right [Annotation "foo" (IS.fromList [3,7,8,9,10]),
+                                                               Annotation "baz" (IS.fromList [4,5,6])]
+    it "empty" $ do
+      parseAnnotations "" `shouldBe` Right []
+    it "empty (just spaces)" $ do
+      parseAnnotations "   " `shouldBe` Right []
+    it "match score" $ do
+      matchScore (Annotation "x" (IS.fromList [3..6])) (Annotation "y" (IS.fromList [3..6])) `shouldBeAlmost` 0.0
+      matchScore (Annotation "x" (IS.fromList [3..6])) (Annotation "x" (IS.fromList [3..6])) `shouldBeAlmost` 1.0
+      matchScore (Annotation "x" (IS.fromList [123..140])) (Annotation "x" (IS.fromList [125..130])) `shouldBeAlmost` 0.33333
+      matchScore (Annotation "x" (IS.fromList [3..4])) (Annotation "x" (IS.fromList [2..13])) `shouldBeAlmost` 0.1666666
  describe "BIO format" $ do
    it "just parse" $ do
      let (Right r) = parseOnly (bioSequenceParser <* endOfInput) "O B-city/NEW_YORK I-city B-city/KALISZ I-city O B-name"
--- a/test/soft-f1-perfect/soft-f1-perfect-solution/test-A/out.tsv
+++ b/test/soft-f1-perfect/soft-f1-perfect-solution/test-A/out.tsv
@ -0,0 +1,3 @@
+foo:3-4 bar:5-11 foo:12
+
+baz:10-16 xyz:123-140
--- a/test/soft-f1-perfect/soft-f1-perfect/config.txt
+++ b/test/soft-f1-perfect/soft-f1-perfect/config.txt
@ -0,0 +1 @@
+--metric Soft-F1
--- a/test/soft-f1-perfect/soft-f1-perfect/test-A/expected.tsv
+++ b/test/soft-f1-perfect/soft-f1-perfect/test-A/expected.tsv
@ -0,0 +1,3 @@
+foo:3-4 bar:5-11 foo:12
+
+baz:10-16 xyz:123-140
--- a/test/soft-f1-simple/soft-f1-simple-solution/test-A/out.tsv
+++ b/test/soft-f1-simple/soft-f1-simple-solution/test-A/out.tsv
@ -0,0 +1,3 @@
+foo:2-13
+bar:3-7
+baz:10-16 foo:123-140
--- a/test/soft-f1-simple/soft-f1-simple/config.txt
+++ b/test/soft-f1-simple/soft-f1-simple/config.txt
@ -0,0 +1 @@
+--metric Soft-F1
--- a/test/soft-f1-simple/soft-f1-simple/test-A/expected.tsv
+++ b/test/soft-f1-simple/soft-f1-simple/test-A/expected.tsv
@ -0,0 +1,3 @@
+foo:3-4 baz:3-7 foo:12
+
+baz:10-16 foo:125-130