Add BIOWeightedF1 metric

This commit is contained in:
Filip Gralinski 2021-06-09 22:16:13 +02:00
parent e26275eff2
commit 0afa1fe0ba
11 changed files with 97 additions and 5 deletions

View File

@ -3,7 +3,7 @@
module GEval.BIO
(BIOLabel(..), bioSequenceParser, parseBioSequenceIntoEntities,
parseBioSequenceIntoEntitiesWithoutNormalization,
TaggedSpan(..), TaggedEntity(..), gatherCountsForBIO,
TaggedSpan(..), TaggedEntity(..), gatherCountsForBIO, gatherSeparatedCountsForBIO,
eraseNormalisation)
where
@ -16,9 +16,12 @@ import Data.Attoparsec.Combinator
import Control.Applicative
import Data.Char
import Data.Maybe (catMaybes)
import Data.List (groupBy, sortBy)
import GEval.Common
import qualified Data.HashMap.Strict as M
data BIOLabel = Outside | Beginning T.Text (Maybe T.Text) | Inside T.Text (Maybe T.Text)
deriving (Eq, Show)
@ -43,6 +46,23 @@ gatherCountsForBIO expected got = (maxMatchOnOrdered laterThan expected got, len
where
laterThan (TaggedEntity (TaggedSpan a _) _ _) (TaggedEntity (TaggedSpan b _) _ _) = a > b
compareByLabel :: TaggedEntity -> TaggedEntity -> Ordering
compareByLabel (TaggedEntity _ labelA _) (TaggedEntity _ labelB _) = labelA `compare` labelB
equalLabel :: TaggedEntity -> TaggedEntity -> Bool
equalLabel (TaggedEntity _ labelA _) (TaggedEntity _ labelB _) = labelA == labelB
gatherSeparatedCountsForBIO :: [TaggedEntity] -> [TaggedEntity] -> M.HashMap T.Text (Int, Int, Int)
gatherSeparatedCountsForBIO expected got = M.mapWithKey process expectedMapped
where expectedMapped = groupEntitiesByLabel expected
gotMapped = groupEntitiesByLabel got
groupEntitiesByLabel =
M.fromList
. map (\l@((TaggedEntity _ lab _):_) -> (lab, l))
. groupBy equalLabel
. sortBy compareByLabel
process lab expectedGroup = gatherCountsForBIO expectedGroup (M.lookupDefault [] lab gotMapped)
parseBioSequenceIntoEntities :: T.Text -> Either String [TaggedEntity]
parseBioSequenceIntoEntities t = labelsIntoEntities =<< (parseOnly (bioSequenceParser <* endOfInput) t)

View File

@ -177,6 +177,7 @@ isPreprocessable LogLoss = False
isPreprocessable Likelihood = False
isPreprocessable BIOF1 = False
isPreprocessable BIOF1Labels = False
isPreprocessable BIOWeightedF1 = False
isPreprocessable TokenAccuracy = True
isPreprocessable SegmentAccuracy = True
isPreprocessable MAE = False
@ -788,6 +789,11 @@ generalizedProbabilisticFMeasure beta metric = gevalCoreWithoutInput metric
countAgg :: (Num n, Num v, Monad m) => ConduitM (n, v, v) o m (n, v, v)
countAgg = CC.foldl countFolder (fromInteger 0, fromInteger 0, fromInteger 0)
separatedCountAgg :: Monad m => ConduitM (M.HashMap Text (Int, Int, Int)) o m (M.HashMap Text (Int, Int, Int))
separatedCountAgg = CC.foldl separatedCountFolder M.empty
where separatedCountFolder = M.unionWith countFolder
countFragAgg :: (Num n, Num v, Monad m) => ConduitM (n, n, v, v) o m (n, n, v, v)
countFragAgg = CC.foldl countFragFolder (fromInteger 0, fromInteger 0, fromInteger 0, fromInteger 0)
@ -990,6 +996,8 @@ continueGEvalCalculations SABIOF1 BIOF1 = defineContinuation countAgg f1MeasureO
continueGEvalCalculations SABIOF1Labels BIOF1Labels = defineContinuation countAgg f1MeasureOnCounts noGraph
continueGEvalCalculations SABIOWeightedF1 BIOWeightedF1 = defineContinuation separatedCountAgg f1MeasureOnSeparatedCounts noGraph
continueGEvalCalculations SASegmentAccuracy SegmentAccuracy = defineContinuation averageC id noGraph
continueGEvalCalculations SATokenAccuracy TokenAccuracy = defineContinuation hitsAndTotalsAgg

View File

@ -316,6 +316,7 @@ This a sample challenge for the likelihood metric.
|] ++ (commonReadmeMDContents testName)
readmeMDContents BIOF1Labels testName = readmeMDContents BIOF1 testName
readmeMDContents BIOWeightedF1 testName = readmeMDContents BIOF1 testName
readmeMDContents BIOF1 testName = [i|
Tag and normalize names
=======================
@ -568,6 +569,7 @@ trainContents LogLoss = [hereLit|0.0 Hell, no!!!
0.0 Boring, boring, boring
|]
trainContents BIOF1Labels = trainContents BIOF1
trainContents BIOWeightedF1 = trainContents BIOF1
trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surname/BOND My name is Bond , James Bond
O O O O O There is no name here
B-firstname/JOHN B-surname/VON I-surname/NEUMANN John von Nueman
@ -646,6 +648,7 @@ Boring stuff
That's good
|]
devInContents BIOF1Labels = devInContents BIOF1
devInContents BIOWeightedF1 = devInContents BIOF1
devInContents BIOF1 = [hereLit|Adam and Eve
Mr Jan Kowalski
|]
@ -720,6 +723,7 @@ devExpectedContents LogLoss = [hereLit|1.0
1.0
|]
devExpectedContents BIOF1Labels = devExpectedContents BIOF1
devExpectedContents BIOWeightedF1 = devExpectedContents BIOF1
devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE
O B-firstname/JAN B-surname/KOWALSKI
|]
@ -799,8 +803,10 @@ Super-duper!!
That is incredibly boring.
|]
testInContents BIOF1Labels = testInContents BIOF1
testInContents BIOWeightedF1 = testInContents BIOF1
testInContents BIOF1 = [hereLit|Alan Tring
No name here
Tarski is NOT here
|]
testInContents TokenAccuracy = [hereLit|I have cats
I know
@ -875,8 +881,10 @@ testExpectedContents LogLoss = [hereLit|1.0
0.0
|]
testExpectedContents BIOF1Labels = testExpectedContents BIOF1
testExpectedContents BIOWeightedF1 = testExpectedContents BIOF1
testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING
O O O
B-surname/TARSKI O O O
|]
testExpectedContents TokenAccuracy = [hereLit|* V N
* V
@ -945,6 +953,7 @@ inHeaderContents MAP = Just ["Dialect", "PolishPhrase"]
inHeaderContents Likelihood = inHeaderContents LogLoss
inHeaderContents LogLoss = Just ["Text"]
inHeaderContents BIOF1Labels = inHeaderContents BIOF1
inHeaderContents BIOWeightedF1 = inHeaderContents BIOF1
inHeaderContents BIOF1 = Just ["Text"]
inHeaderContents TokenAccuracy = Just ["TokenizedText"]
inHeaderContents SegmentAccuracy = Just ["Segment"]
@ -976,6 +985,7 @@ outHeaderContents MAP = Nothing
outHeaderContents Likelihood = outHeaderContents LogLoss
outHeaderContents LogLoss = Just ["Probability"]
outHeaderContents BIOF1Labels = outHeaderContents BIOF1
outHeaderContents BIOWeightedF1 = outHeaderContents BIOF1
outHeaderContents BIOF1 = Just ["BIOOutput"]
outHeaderContents TokenAccuracy = Just ["PartsOfSpeech"]
outHeaderContents SegmentAccuracy = Just ["PartsOfSpeech"]

View File

@ -28,7 +28,7 @@ import Data.Attoparsec.Text (parseOnly)
data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | CER | Accuracy | ClippEU
| FMeasure Double | MacroFMeasure Double | NMI
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
| BIOF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE
| BIOF1 | BIOWeightedF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE
| MultiLabelFMeasure Double MatchingSpecification
| MultiLabelLogLoss | MultiLabelLikelihood
| SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double
@ -78,6 +78,7 @@ instance Show Metric where
show Likelihood = "Likelihood"
show BIOF1 = "BIO-F1"
show BIOF1Labels = "BIO-F1-Labels"
show BIOWeightedF1 = "BIO-Weighted-F1"
show TokenAccuracy = "TokenAccuracy"
show SegmentAccuracy = "SegmentAccuracy"
show MAE = "MAE"
@ -161,6 +162,7 @@ instance Read Metric where
readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
readsPrec _ ('B':'I':'O':'-':'W':'e':'i':'g':'h':'t':'e':'d':'-':'F':'1': theRest) = [(BIOWeightedF1, theRest)]
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
readsPrec _ ('T':'o':'k':'e':'n':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(TokenAccuracy, theRest)]
readsPrec _ ('S':'e':'g':'m':'e':'n':'t':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(SegmentAccuracy, theRest)]
@ -201,6 +203,7 @@ getMetricOrdering MAP = TheHigherTheBetter
getMetricOrdering LogLoss = TheLowerTheBetter
getMetricOrdering Likelihood = TheHigherTheBetter
getMetricOrdering BIOF1 = TheHigherTheBetter
getMetricOrdering BIOWeightedF1 = TheHigherTheBetter
getMetricOrdering BIOF1Labels = TheHigherTheBetter
getMetricOrdering TokenAccuracy = TheHigherTheBetter
getMetricOrdering SegmentAccuracy = TheHigherTheBetter

View File

@ -20,7 +20,7 @@ import GEval.Common
import GEval.BLEU (bleuStep, gleuStep)
import GEval.WER (werStep)
import GEval.Clippings (totalArea, coveredBy, clippEUMatchStep)
import GEval.BIO (gatherCountsForBIO)
import GEval.BIO (gatherCountsForBIO, gatherSeparatedCountsForBIO)
import GEval.Probability
import GEval.PrecisionRecall (weightedMaxMatch, fMeasureOnCounts, calculateMAPForOneResult, getProbabilisticCounts, getCounts)
@ -45,13 +45,15 @@ import GEval.ProbList (ProbList(..), parseIntoProbList, WordWithProb(..), countL
import GEval.MatchingSpecification
import GEval.Haversine
import qualified Data.HashMap.Strict as M
-- | Helper type so that singleton can be used.
-- | (The problem is that some metrics are parametrized by Double
-- | Word32 and this is not handled by the singleton libary.)
singletons [d|data AMetric = ARMSE | AMSE | APearson | ASpearman | ABLEU | AGLEU | AWER | ACER | AAccuracy | AClippEU
| AFMeasure | AMacroFMeasure | ANMI
| ALogLossHashed | ACharMatch | AMAP | ALogLoss | ALikelihood
| ABIOF1 | ABIOF1Labels | ATokenAccuracy | ASegmentAccuracy | ALikelihoodHashed | AMAE | ASMAPE | AMultiLabelFMeasure MatchingSpecification
| ABIOF1 | ABIOWeightedF1 | ABIOF1Labels | ATokenAccuracy | ASegmentAccuracy | ALikelihoodHashed | AMAE | ASMAPE | AMultiLabelFMeasure MatchingSpecification
| AMultiLabelLogLoss | AMultiLabelLikelihood
| ASoftFMeasure | AProbabilisticMultiLabelFMeasure | AProbabilisticSoftFMeasure | ASoft2DFMeasure
| AFLCFMeasure | AHaversine
@ -79,6 +81,7 @@ toHelper MAP = AMAP
toHelper LogLoss = ALogLoss
toHelper Likelihood = ALikelihood
toHelper BIOF1 = ABIOF1
toHelper BIOWeightedF1 = ABIOWeightedF1
toHelper BIOF1Labels = ABIOF1Labels
toHelper TokenAccuracy = ATokenAccuracy
toHelper SegmentAccuracy = ASegmentAccuracy
@ -125,6 +128,7 @@ type family ParsedExpectedType (t :: AMetric) :: * where
ParsedExpectedType ALogLoss = Double
ParsedExpectedType ALikelihood = Double
ParsedExpectedType ABIOF1 = [TaggedEntity]
ParsedExpectedType ABIOWeightedF1 = [TaggedEntity]
ParsedExpectedType ABIOF1Labels = [TaggedEntity]
ParsedExpectedType ATokenAccuracy = [Text]
ParsedExpectedType ASegmentAccuracy = [Annotation]
@ -161,6 +165,7 @@ expectedParser SAMAP = splitByTabs
expectedParser SALogLoss = doubleParser
expectedParser SALikelihood = doubleParser
expectedParser SABIOF1 = parseBioSequenceIntoEntities
expectedParser SABIOWeightedF1 = parseBioSequenceIntoEntities
expectedParser SABIOF1Labels = parseBioSequenceIntoEntitiesWithoutNormalization
expectedParser SATokenAccuracy = intoWords
expectedParser SASegmentAccuracy = parseSegmentAnnotations
@ -211,6 +216,7 @@ outputParser SAMAP = splitByTabs
outputParser SALogLoss = doubleParser
outputParser SALikelihood = doubleParser
outputParser SABIOF1 = parseBioSequenceIntoEntities
outputParser SABIOWeightedF1 = parseBioSequenceIntoEntities
outputParser SABIOF1Labels = parseBioSequenceIntoEntitiesWithoutNormalization
outputParser SATokenAccuracy = intoWords
outputParser SASegmentAccuracy = parseSegmentAnnotations
@ -232,6 +238,7 @@ type family ItemIntermediateRepresentationType (t :: AMetric) :: * where
ItemIntermediateRepresentationType AClippEU = (Int, Int, Int)
ItemIntermediateRepresentationType ANMI = (Text, Text)
ItemIntermediateRepresentationType ABIOF1 = (Int, Int, Int)
ItemIntermediateRepresentationType ABIOWeightedF1 = M.HashMap Text (Int, Int, Int)
ItemIntermediateRepresentationType ABIOF1Labels = (Int, Int, Int)
ItemIntermediateRepresentationType ATokenAccuracy = (Int, Int)
ItemIntermediateRepresentationType AProbabilisticMultiLabelFMeasure = ([Double], [Double], Double, Int)
@ -277,6 +284,7 @@ itemStep SAMAP = uncurry calculateMAPForOneResult
itemStep SALogLoss = itemLogLossError
itemStep SALikelihood = itemLogLossError
itemStep SABIOF1 = uncurry gatherCountsForBIO
itemStep SABIOWeightedF1 = uncurry gatherSeparatedCountsForBIO
itemStep SABIOF1Labels = uncurry gatherCountsForBIO
itemStep SATokenAccuracy = countHitsAndTotals
itemStep SASegmentAccuracy = uncurry segmentAccuracy

View File

@ -64,6 +64,7 @@ listOfAvailableMetrics = [RMSE,
LogLossHashed defaultLogLossHashedSize,
LikelihoodHashed defaultLogLossHashedSize,
BIOF1,
BIOWeightedF1,
BIOF1Labels,
TokenAccuracy,
SegmentAccuracy,
@ -105,6 +106,7 @@ isMetricDescribed WER = True
isMetricDescribed CER = True
isMetricDescribed SegmentAccuracy = True
isMetricDescribed Haversine = True
isMetricDescribed BIOWeightedF1 = True
isMetricDescribed _ = False
getEvaluationSchemeDescription :: EvaluationScheme -> String
@ -176,6 +178,9 @@ getMetricDescription Haversine =
[i|The haversine formula determines the great-circle distance between
two points on a sphere given their longitudes and latitudes (in degrees).
|]
getMetricDescription BIOWeightedF1 =
[i|Weighted-average F1-score calculated on output expressed in the BIO format.
|]
outContents :: Metric -> String
outContents (MultiLabelFMeasure _ _) = [hereLit|person/1,3 first-name/1 first-name/3
@ -206,6 +211,10 @@ N:1-4 V:6-7 A:9-13
outContents Haversine = [hereLit|39.575264 -56.995928
29.949932 -90.070116
|]
outContents BIOWeightedF1 = [hereLit|B-firstname/ALAN B-surname/TURING
O O O
B-surname/TARSKI O B-surname/NOT O
|]
expectedScore :: EvaluationScheme -> MetricValue
expectedScore (EvaluationScheme (MultiLabelFMeasure 1.0 ExactMatch) []) = 0.6666
@ -231,6 +240,8 @@ expectedScore (EvaluationScheme CER [])
= 0.14814
expectedScore (EvaluationScheme Haversine [])
= 1044.2633358563135
expectedScore (EvaluationScheme BIOWeightedF1 [])
= 0.86666666
helpMetricParameterMetricsList :: String
helpMetricParameterMetricsList = intercalate ", " $ map (\s -> (show s) ++ (case extraInfo s of
@ -297,6 +308,10 @@ formatDescription CER = [hereLit|Any text, whitespace and punctuation marks are
formatDescription Haversine = [hereLit|Each line is a latitude and longitude of sphere separated by tabulation,
e.g. "41.558153 -73.051497".
|]
formatDescription BIOWeightedF1 = [hereLit|Each line is a sequence of tags encoded in the BIO format, i.e. O, B-tag, I-tag;
B-tags and I-tags can accompanied by an extra label after a slash.
|]
scoreExplanation :: EvaluationScheme -> Maybe String
scoreExplanation (EvaluationScheme (MultiLabelFMeasure _ ExactMatch) [])
@ -332,6 +347,11 @@ scoreExplanation (EvaluationScheme CER [])
= Just [hereLit|The total length of expected output (in characters) is 27. There are 4 errors
(1 word substituted, 1 inserted, 1 deleted) in the actual output. Hence,
CER = (2+1+1) / 27 = 4 / 27 = 0.14814.|]
scoreExplanation (EvaluationScheme Haversine []) = Nothing
scoreExplanation (EvaluationScheme BIOWeightedF1 [])
= Just [hereLit|There are two labels (firstname and surname, O is not considered). Firstname was
predicted in the perfect way, hence F1=1, whereas for surname recall is 1, precision - 2/3 and F1 - 4/5.
The weighted average is (1 * 1 + 2 * 4/5) / 3 = 13/15 = 0.8667.|]
pasteLines :: String -> String -> String
pasteLines a b = printf "%-35s %s\n" a b

View File

@ -7,9 +7,11 @@ module GEval.PrecisionRecall(calculateMAPForOneResult,
precisionAndRecall, precisionAndRecallFromCounts,
maxMatch, maxMatchOnOrdered, getCounts, weightedMaxMatch, weightedMaxMatching,
getProbabilisticCounts,
countFragFolder)
countFragFolder, fMeasureOnSeparatedCounts, f1MeasureOnSeparatedCounts)
where
import Debug.Trace
import GEval.Common
import GEval.Probability
@ -21,6 +23,7 @@ import Data.List (find, foldl', nub)
import Data.Algorithm.Munkres
import qualified Data.Array.IArray as DAI
import qualified Data.HashMap.Strict as M
calculateMAPForOneResult :: (Eq a) => [a] -> [a] -> Double
calculateMAPForOneResult expected got = precisionSum / fromIntegral (length expected)
@ -69,6 +72,15 @@ fMeasureOnFragCounts beta (rC, pC, nbExpected, nbGot) =
where r = rC /. nbExpected
p = pC /. nbGot
f1MeasureOnSeparatedCounts :: M.HashMap a (Int, Int, Int) -> Double
f1MeasureOnSeparatedCounts m = fMeasureOnSeparatedCounts 1.0 m
fMeasureOnSeparatedCounts :: Double -> M.HashMap a (Int, Int, Int) -> Double
fMeasureOnSeparatedCounts beta m = (sum $ map (\c@(_, t, _) -> (fromIntegral t) * (fMeasureOnCounts beta c)) mAsList) /. total
where mAsList = M.elems m
total = sum $ map (\(_, t, _) -> t) mAsList
countFolder :: (Num n, Num v) => (n, v, v) -> (n, v, v) -> (n, v, v)
countFolder (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3)

View File

@ -536,6 +536,8 @@ main = hspec $ do
runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0
it "check inconsistent input" $ do
runGEvalTest "bio-f1-error" `shouldThrow` (== UnexpectedData 2 "inconsistent label sequence `B-NAME/JOHN I-FOO/SMITH I-FOO/X`")
it "weighted F1" $ do
runGEvalTest "bio-weighted-f1-simple" `shouldReturnAlmost` 0.82539682
describe "automatic decompression" $ do
it "more complex test" $ do
runGEvalTest "charmatch-complex-compressed" `shouldReturnAlmost` 0.1923076923076923

View File

@ -0,0 +1,4 @@
O B-FOO O O O B-FOO I-FOO I-FOO B-BAR
B-BAR O B-XYZ
B-BAZ O B-BAZ O B-BAR
O B-BAZ I-BAZ B-FOO
1 O B-FOO O O O B-FOO I-FOO I-FOO B-BAR
2 B-BAR O B-XYZ
3 B-BAZ O B-BAZ O B-BAR
4 O B-BAZ I-BAZ B-FOO

View File

@ -0,0 +1 @@
--metric BIO-Weighted-F1

View File

@ -0,0 +1,4 @@
O B-FOO O O O B-FOO I-FOO O B-BAR
B-BAR O O
B-BAZ B-BAZ B-BAZ O O
O B-BAZ I-BAZ B-FOO
1 O B-FOO O O O B-FOO I-FOO O B-BAR
2 B-BAR O O
3 B-BAZ B-BAZ B-BAZ O O
4 O B-BAZ I-BAZ B-FOO