Fix bug with inconsistent handling of probs in MultiLabel-F1

This commit is contained in:
Filip Gralinski 2021-07-23 17:26:41 +02:00
parent 4b80625cc2
commit c71bba81f3
8 changed files with 67 additions and 48 deletions

View File

@ -680,60 +680,60 @@ gevalCoreOnSources (LikelihoodHashed nbOfBits) = helperLogLossHashed nbOfBits lo
gevalCoreOnSources (Mean (MultiLabelFMeasure beta matchingSpec)) gevalCoreOnSources (Mean (MultiLabelFMeasure beta matchingSpec))
= gevalCoreWithoutInputOnItemTargets (Right . intoWords) = gevalCoreWithoutInputOnItemTargets intoWords
(Right . getWords) getWords
((fMeasureOnCounts beta) . (getWeightedCounts (getMatchingFunctionForString matchingSpec))) ((fMeasureOnCounts beta) . (getWeightedCounts (getMatchingFunctionForText matchingSpec)))
averageC averageC
id id
noGraph noGraph
where where
-- repeated as below, as it will be refactored into dependent types soon anyway -- repeated as below, as it will be refactored into dependent types soon anyway
getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t getWords (RawItemTarget t) = outputParser (SAMultiLabelFMeasure SExactMatch) t
getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts getWords (PartiallyParsedItemTarget ts) = Right ts
intoWords (RawItemTarget t) = Prelude.map unpack $ Data.Text.words t intoWords (RawItemTarget t) = expectedParser (SAMultiLabelFMeasure SExactMatch) t
intoWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts intoWords (PartiallyParsedItemTarget ts) = Right ts
gevalCoreOnSources (Mean WER) gevalCoreOnSources (Mean WER)
= gevalCoreWithoutInputOnItemTargets (Right . intoWords) = gevalCoreWithoutInputOnItemTargets intoWords
(Right . getWords) getWords
((uncurry (/.)) . (uncurry werStep)) ((uncurry (/.)) . (uncurry werStep))
averageC averageC
id id
noGraph noGraph
where where
-- repeated as below, as it will be refactored into dependent types soon anyway -- repeated as below, as it will be refactored into dependent types soon anyway
getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t getWords (RawItemTarget t) = outputParser SAWER t
getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts getWords (PartiallyParsedItemTarget ts) = Right $ Prelude.map unpack ts
intoWords (RawItemTarget t) = Prelude.map unpack $ Data.Text.words t intoWords (RawItemTarget t) = expectedParser SAWER t
intoWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts intoWords (PartiallyParsedItemTarget ts) = Right $ Prelude.map unpack ts
gevalCoreOnSources (Mean CER) gevalCoreOnSources (Mean CER)
= gevalCoreWithoutInputOnItemTargets (Right . getString) = gevalCoreWithoutInputOnItemTargets getString
(Right . getString) getString
((uncurry (/.)) . (uncurry werStep)) ((uncurry (/.)) . (uncurry werStep))
averageC averageC
id id
noGraph noGraph
where where
-- repeated as below, as it will be refactored into dependent types soon anyway -- repeated as below, as it will be refactored into dependent types soon anyway
getString (RawItemTarget t) = unpack t getString (RawItemTarget t) = expectedParser SACER t
getString (PartiallyParsedItemTarget ts) = Prelude.unwords $ Prelude.map unpack ts getString (PartiallyParsedItemTarget ts) = Right $ Prelude.unwords $ Prelude.map unpack ts
gevalCoreOnSources (Mean _) = error $ "Mean/ meta-metric defined only for MultiLabel-F1, WER and CER for the time being" gevalCoreOnSources (Mean _) = error $ "Mean/ meta-metric defined only for MultiLabel-F1, WER and CER for the time being"
-- only MultiLabel-F1 handled for JSONs for the time being... -- only MultiLabel-F1 handled for JSONs for the time being...
gevalCoreOnSources (MultiLabelFMeasure beta matchingSpec) = gevalCoreOnSources (MultiLabelFMeasure beta matchingSpec) =
gevalCoreWithoutInputOnItemTargets (Right . intoWords) gevalCoreWithoutInputOnItemTargets intoWords
(Right . getWords) getWords
(getWeightedCounts (getMatchingFunctionForString matchingSpec)) (getWeightedCounts (getMatchingFunctionForText matchingSpec))
countAgg countAgg
(fMeasureOnCounts beta) (fMeasureOnCounts beta)
noGraph noGraph
where where
getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t getWords (RawItemTarget t) = outputParser (SAMultiLabelFMeasure SExactMatch) t
getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts getWords (PartiallyParsedItemTarget ts) = Right ts
intoWords (RawItemTarget t) = Prelude.map unpack $ Data.Text.words t intoWords (RawItemTarget t) = expectedParser (SAMultiLabelFMeasure SExactMatch) t
intoWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts intoWords (PartiallyParsedItemTarget ts) = Right ts
gevalCoreOnSources Pearson = gevalCoreByCorrelationMeasure pearson gevalCoreOnSources Pearson = gevalCoreByCorrelationMeasure pearson
gevalCoreOnSources Spearman = gevalCoreByCorrelationMeasure spearman gevalCoreOnSources Spearman = gevalCoreByCorrelationMeasure spearman

View File

@ -13,7 +13,7 @@ module GEval.Metric
where where
import Data.Word import Data.Word
import Data.Text import Data.Text hiding (map)
import Data.Monoid ((<>)) import Data.Monoid ((<>))
import GEval.Common import GEval.Common
@ -262,10 +262,11 @@ fixedNumberOfColumnsInInput (ProbabilisticSoftFMeasure _) = False
fixedNumberOfColumnsInInput (Soft2DFMeasure _) = False fixedNumberOfColumnsInInput (Soft2DFMeasure _) = False
fixedNumberOfColumnsInInput _ = True fixedNumberOfColumnsInInput _ = True
perfectOutLineFromExpectedLine :: Metric -> Text -> Text perfectOutLineFromExpectedLine :: Metric -> Text -> Text
perfectOutLineFromExpectedLine (Mean metric) t = perfectOutLineFromExpectedLine metric t perfectOutLineFromExpectedLine (Mean metric) t = perfectOutLineFromExpectedLine metric t
perfectOutLineFromExpectedLine (LogLossHashed _) t = t <> ":1.0" perfectOutLineFromExpectedLine (LogLossHashed _) t = addProbOne t
perfectOutLineFromExpectedLine (LikelihoodHashed _) t = t <> ":1.0" perfectOutLineFromExpectedLine (LikelihoodHashed _) t = addProbOne t
perfectOutLineFromExpectedLine BLEU t = getFirstColumn t perfectOutLineFromExpectedLine BLEU t = getFirstColumn t
perfectOutLineFromExpectedLine GLEU t = getFirstColumn t perfectOutLineFromExpectedLine GLEU t = getFirstColumn t
perfectOutLineFromExpectedLine ClippEU t = cleanMarginFromClippEU t perfectOutLineFromExpectedLine ClippEU t = cleanMarginFromClippEU t
@ -273,6 +274,9 @@ perfectOutLineFromExpectedLine (Accuracy ExactMatch) t = t
perfectOutLineFromExpectedLine (Accuracy _) t = getFirstColumn t perfectOutLineFromExpectedLine (Accuracy _) t = getFirstColumn t
perfectOutLineFromExpectedLine _ t = t perfectOutLineFromExpectedLine _ t = t
addProbOne :: Text -> Text
addProbOne = (<> ":1.0")
getFirstColumn :: Text -> Text getFirstColumn :: Text -> Text
getFirstColumn t = case splitOn "\t" t of getFirstColumn t = case splitOn "\t" t of
[] -> "" [] -> ""
@ -280,7 +284,7 @@ getFirstColumn t = case splitOn "\t" t of
cleanMarginFromClippEU :: Text -> Text cleanMarginFromClippEU :: Text -> Text
cleanMarginFromClippEU t = Data.Text.unwords outs cleanMarginFromClippEU t = Data.Text.unwords outs
where outs = Prelude.map toOut specs where outs = map toOut specs
(Right specs) = parseOnly lineClippingSpecsParser t (Right specs) = parseOnly lineClippingSpecsParser t
toOut (ClippingSpec (PageNumber pageNumber) (Rectangle (Point x0 y0) (Point x1 y1)) _) = toOut (ClippingSpec (PageNumber pageNumber) (Rectangle (Point x0 y0) (Point x1 y1)) _) =
pack ((show pageNumber) ++ "/" ++ (show x0) ++ "," ++ (show y0) ++ "," ++ (show x1) ++ "," ++ (show y1)) pack ((show pageNumber) ++ "/" ++ (show x0) ++ "," ++ (show y0) ++ "," ++ (show x1) ++ "," ++ (show y1))

View File

@ -27,7 +27,7 @@ import GEval.PrecisionRecall (weightedMaxMatch, fMeasureOnCounts, calculateMAPFo
import Control.Exception import Control.Exception
import Data.Text import Data.Text hiding (map, maximum, zip)
import Data.Text.Read as TR import Data.Text.Read as TR
import qualified Data.List.Split as DLS import qualified Data.List.Split as DLS
import Data.Attoparsec.Text (parseOnly) import Data.Attoparsec.Text (parseOnly)
@ -41,7 +41,8 @@ import GEval.Annotation (Annotation, ObtainedAnnotation,
import GEval.Clippings (Clipping, ClippingSpec, LabeledClipping, lineClippingsParser, lineClippingSpecsParser, lineLabeledClippingsParser) import GEval.Clippings (Clipping, ClippingSpec, LabeledClipping, lineClippingsParser, lineClippingSpecsParser, lineLabeledClippingsParser)
import GEval.BIO (TaggedEntity, parseBioSequenceIntoEntities, parseBioSequenceIntoEntitiesWithoutNormalization) import GEval.BIO (TaggedEntity, parseBioSequenceIntoEntities, parseBioSequenceIntoEntitiesWithoutNormalization)
import GEval.LogLossHashed (parseWordSpecs, wordSpecToPair) import GEval.LogLossHashed (parseWordSpecs, wordSpecToPair)
import GEval.ProbList (ProbList(..), parseIntoProbList, WordWithProb(..), countLogLossOnProbList) import GEval.ProbList (ProbList(..), WordWithProb(..),
parseIntoProbList, countLogLossOnProbList, selectByStandardThreshold)
import GEval.MatchingSpecification import GEval.MatchingSpecification
import GEval.Haversine import GEval.Haversine
@ -222,7 +223,7 @@ outputParser SATokenAccuracy = intoWords
outputParser SASegmentAccuracy = parseSegmentAnnotations outputParser SASegmentAccuracy = parseSegmentAnnotations
outputParser SAMAE = doubleParser outputParser SAMAE = doubleParser
outputParser SASMAPE = doubleParser outputParser SASMAPE = doubleParser
outputParser (SAMultiLabelFMeasure _) = intoWords outputParser (SAMultiLabelFMeasure _) = Right . selectByStandardThreshold . parseIntoProbList
outputParser SAMultiLabelLogLoss = Right . parseIntoProbList outputParser SAMultiLabelLogLoss = Right . parseIntoProbList
outputParser SAMultiLabelLikelihood = Right . parseIntoProbList outputParser SAMultiLabelLikelihood = Right . parseIntoProbList
outputParser SAHaversine = parseSpherePoints outputParser SAHaversine = parseSpherePoints
@ -258,7 +259,7 @@ type family ItemIntermediateRepresentationType (t :: AMetric) :: * where
ItemIntermediateRepresentationType t = Double ItemIntermediateRepresentationType t = Double
findBest :: (Text -> Text -> Double) -> (Text -> Text -> Double) findBest :: (Text -> Text -> Double) -> (Text -> Text -> Double)
findBest fun expected got = Prelude.maximum $ Prelude.map (fun got) expectedVals findBest fun expected got = maximum $ map (fun got) expectedVals
where expectedVals = case splitOn "\t" expected of where expectedVals = case splitOn "\t" expected of
[] -> [""] [] -> [""]
l -> l l -> l
@ -310,7 +311,7 @@ intoWords = Right . Data.Text.words
intoStringWords = Right . Prelude.words . unpack intoStringWords = Right . Prelude.words . unpack
alternativeSentencesParser = Right . Prelude.map Prelude.words . DLS.splitOn "\t" . unpack alternativeSentencesParser = Right . map Prelude.words . DLS.splitOn "\t" . unpack
onlyStrip = Right . strip onlyStrip = Right . strip
@ -322,8 +323,8 @@ predictedParser got =
case parseWordSpecs got of case parseWordSpecs got of
Right wordSpecs -> if Prelude.null pairs Right wordSpecs -> if Prelude.null pairs
then Nothing then Nothing
else Just $ snd $ Prelude.maximum pairs else Just $ snd $ maximum pairs
where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs where pairs = catMaybes $ map wordSpecToPair wordSpecs
Left _ -> Just got Left _ -> Just got
splitByTabs = Right . DLS.splitOn "\t" . unpack splitByTabs = Right . DLS.splitOn "\t" . unpack
@ -361,8 +362,8 @@ hitOrMiss (exp, got) =
case parseWordSpecs got of case parseWordSpecs got of
Right wordSpecs -> if Prelude.null pairs Right wordSpecs -> if Prelude.null pairs
then 0.0 then 0.0
else indicator (exp == (snd $ Prelude.maximum pairs)) else indicator (exp == (snd $ maximum pairs))
where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs where pairs = catMaybes $ map wordSpecToPair wordSpecs
Left _ -> indicator ((normalizeProbForAccuracy exp got) == exp) Left _ -> indicator ((normalizeProbForAccuracy exp got) == exp)
-- if the expected value is 0 or 1 treat values -- if the expected value is 0 or 1 treat values
-- between 0.0 and 1.0 as probabilities -- between 0.0 and 1.0 as probabilities
@ -405,7 +406,7 @@ getSoft2DCounts (expected, got) = (tpArea, expArea, gotArea)
getFragCounts :: CoverableEntityWithProbability e => ([BareEntity e], [e]) -> (Double, Double, Int, Int) getFragCounts :: CoverableEntityWithProbability e => ([BareEntity e], [e]) -> (Double, Double, Int, Int)
getFragCounts (expected, got) getFragCounts (expected, got)
| allDisjoint (Prelude.map getBareEntity got) = ( | allDisjoint (map getBareEntity got) = (
recallScoreTotal expected got, recallScoreTotal expected got,
precisionScoreTotal got expected, precisionScoreTotal got expected,
Prelude.length expected, Prelude.length expected,
@ -418,7 +419,7 @@ countHitsAndTotals (es, os) =
then throw $ OtherException "wrong number of tokens" then throw $ OtherException "wrong number of tokens"
else Prelude.foldl matchFun else Prelude.foldl matchFun
(0, 0) (0, 0)
(Prelude.zip es os) (zip es os)
where matchFun :: (Int, Int) -> (Text, Text) -> (Int, Int) where matchFun :: (Int, Int) -> (Text, Text) -> (Int, Int)
matchFun (h, t) (e, o) matchFun (h, t) (e, o)
| e == (pack "*") = (h, t) | e == (pack "*") = (h, t)

View File

@ -126,7 +126,7 @@ Mean/Multilabel-F1.
|] |]
getMetricDescription (SoftFMeasure _) = getMetricDescription (SoftFMeasure _) =
[i|"Soft" F-measure on intervals, i.e. partial "hits" are considered. For instance, [i|"Soft" F-measure on intervals, i.e. partial "hits" are considered. For instance,
if a label `foo` is expected for the span 2-9 and this label is returned but with if a label `foo` is expected for the span 2-9 and this label is returned, but with
the span 8-12, it is counted as 2/8=0.25 instead of 0 or 1 when precision/recall counts the span 8-12, it is counted as 2/8=0.25 instead of 0 or 1 when precision/recall counts
are gathered. are gathered.
|] |]
@ -183,9 +183,9 @@ getMetricDescription BIOWeightedF1 =
|] |]
outContents :: Metric -> String outContents :: Metric -> String
outContents (MultiLabelFMeasure _ _) = [hereLit|person/1,3 first-name/1 first-name/3 outContents (MultiLabelFMeasure _ _) = [hereLit|person/1,3 first-name/1:0.8 first-name/3:0.75
surname/2 surname/2 county/1:0.33
first-name/3 first-name/3:0.52
|] |]
outContents (SoftFMeasure _) = [hereLit|inwords:1-4 outContents (SoftFMeasure _) = [hereLit|inwords:1-4
inwords:1-3 indigits:5 inwords:1-3 indigits:5
@ -279,8 +279,12 @@ formatEvaluationSchemeDescription scheme@(EvaluationScheme metric _) = show sche
IF YOU WANT TO HAVE IT DESCRIBED|] IF YOU WANT TO HAVE IT DESCRIBED|]
formatDescription :: Metric -> String formatDescription :: Metric -> String
formatDescription (MultiLabelFMeasure _ ExactMatch) = [hereLit|Any label separated by spaces can be used. They are formatDescription (MultiLabelFMeasure _ ExactMatch) = [hereLit|Any label separated by spaces can be used. Labels are not
not intepreted in any way when the metric is calculated. interpreted except that they can be accompanied by probabilities
(after a colon): only labels with probabilities >= 0.5 are considered.
This is for compatibility with probalistic metrics. By default, 1.0 is
assumed as the probability, but it is recommended to add probabilities
explicitly.
|] |]
formatDescription (SoftFMeasure _) = [hereLit|Each line is a sequence of entities separated by spaces, each entity is of formatDescription (SoftFMeasure _) = [hereLit|Each line is a sequence of entities separated by spaces, each entity is of
the form LABEL:SPAN, where LABEL is any label and SPAN is defined using single integers, intervals or such the form LABEL:SPAN, where LABEL is any label and SPAN is defined using single integers, intervals or such
@ -316,9 +320,10 @@ B-tags and I-tags can accompanied by an extra label after a slash.
scoreExplanation :: EvaluationScheme -> Maybe String scoreExplanation :: EvaluationScheme -> Maybe String
scoreExplanation (EvaluationScheme (MultiLabelFMeasure _ ExactMatch) []) scoreExplanation (EvaluationScheme (MultiLabelFMeasure _ ExactMatch) [])
= Just [hereLit|Out of the total 5 labels in the output, 3 are correct (person/1,3, first-name/1 and = Just [hereLit|Out of the total 5 labels in the output, 3 are correct (person/1,3, first-name/1 and
first-name/3), hence precision is 3/5=0.6, whereas out of the 4 labels in gold standard, first-name/3, only labels with probabilities >= 0.5 are considered, otherwise the probabilities are just
again 3 were retrieved, so recall is 3/4=0.75. The harmonic mean of precision and recall discarded), hence precision is 3/5=0.6, whereas out of the 4 labels in gold standard, again 3 were
is 2/(4/3 + 5/3) = 2/3 = 0.6666|] retrieved, so recall is 3/4=0.75. The harmonic mean of precision and recall is 2/(4/3 + 5/3) =
= 2/3 = 0.6666|]
scoreExplanation (EvaluationScheme (SoftFMeasure _) []) scoreExplanation (EvaluationScheme (SoftFMeasure _) [])
= Just [hereLit|We have a partial (0.75) success for the entity `inwords:1-4`, hence Recall = 0.75/1 = 0.75, = Just [hereLit|We have a partial (0.75) success for the entity `inwords:1-4`, hence Recall = 0.75/1 = 0.75,
Precision = (0 + 0.75 + 0) / 3 = 0.25, so F-score = 0.375|] Precision = (0 + 0.75 + 0) / 3 = 0.25, so F-score = 0.375|]

View File

@ -368,6 +368,8 @@ main = hspec $ do
runGEvalTest "multilabel-f1-ie-fuzzy-smart" `shouldReturnAlmost` 0.598444 runGEvalTest "multilabel-f1-ie-fuzzy-smart" `shouldReturnAlmost` 0.598444
it "information extraction with smart fuzzy matching hardened" $ do it "information extraction with smart fuzzy matching hardened" $ do
runGEvalTest "multilabel-f1-ie-fuzzy-harden" `shouldReturnAlmost` 0.555555555 runGEvalTest "multilabel-f1-ie-fuzzy-harden" `shouldReturnAlmost` 0.555555555
it "information extraction" $ do
runGEvalTest "multilabel-f1-ie-probs" `shouldReturnAlmost` 0.1111111111
describe "Mean/MultiLabel-F" $ do describe "Mean/MultiLabel-F" $ do
it "simple" $ do it "simple" $ do
runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5 runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5

View File

@ -0,0 +1,3 @@
important-person=JOHN_BROWN:0.52 important-person=JOHN_SMITH company-name=Axaxaxaxas_Mlo profit=12031
company-name=Foo_Bar profit=1220:0.8223 unwanted=none:0.49
company-name=Whatever important-person=PIERRE_MENARD
1 important-person=JOHN_BROWN:0.52 important-person=JOHN_SMITH company-name=Axaxaxaxas_Mlo profit=12031
2 company-name=Foo_Bar profit=1220:0.8223 unwanted=none:0.49
3 company-name=Whatever important-person=PIERRE_MENARD

View File

@ -0,0 +1 @@
--metric MultiLabel-F1

View File

@ -0,0 +1,3 @@
company-name=Axaxaxas_Mlö profit=12031 important-person=John_Smith important-person=James_Brown
company-name=Orbis_Tertius profit=1020 important-person=Anna_Smith
company-name=Whatever_Inc profit=5600 important-person=Pierre_Menard
1 company-name=Axaxaxas_Mlö profit=12031 important-person=John_Smith important-person=James_Brown
2 company-name=Orbis_Tertius profit=1020 important-person=Anna_Smith
3 company-name=Whatever_Inc profit=5600 important-person=Pierre_Menard