Add BIOWeightedF1 metric

This commit is contained in:
Filip Gralinski 2021-06-09 22:16:13 +02:00
parent e26275eff2
commit 0afa1fe0ba
11 changed files with 97 additions and 5 deletions

View File

@ -3,7 +3,7 @@
module GEval.BIO module GEval.BIO
(BIOLabel(..), bioSequenceParser, parseBioSequenceIntoEntities, (BIOLabel(..), bioSequenceParser, parseBioSequenceIntoEntities,
parseBioSequenceIntoEntitiesWithoutNormalization, parseBioSequenceIntoEntitiesWithoutNormalization,
TaggedSpan(..), TaggedEntity(..), gatherCountsForBIO, TaggedSpan(..), TaggedEntity(..), gatherCountsForBIO, gatherSeparatedCountsForBIO,
eraseNormalisation) eraseNormalisation)
where where
@ -16,9 +16,12 @@ import Data.Attoparsec.Combinator
import Control.Applicative import Control.Applicative
import Data.Char import Data.Char
import Data.Maybe (catMaybes) import Data.Maybe (catMaybes)
import Data.List (groupBy, sortBy)
import GEval.Common import GEval.Common
import qualified Data.HashMap.Strict as M
data BIOLabel = Outside | Beginning T.Text (Maybe T.Text) | Inside T.Text (Maybe T.Text) data BIOLabel = Outside | Beginning T.Text (Maybe T.Text) | Inside T.Text (Maybe T.Text)
deriving (Eq, Show) deriving (Eq, Show)
@ -43,6 +46,23 @@ gatherCountsForBIO expected got = (maxMatchOnOrdered laterThan expected got, len
where where
laterThan (TaggedEntity (TaggedSpan a _) _ _) (TaggedEntity (TaggedSpan b _) _ _) = a > b laterThan (TaggedEntity (TaggedSpan a _) _ _) (TaggedEntity (TaggedSpan b _) _ _) = a > b
compareByLabel :: TaggedEntity -> TaggedEntity -> Ordering
compareByLabel (TaggedEntity _ labelA _) (TaggedEntity _ labelB _) = labelA `compare` labelB
equalLabel :: TaggedEntity -> TaggedEntity -> Bool
equalLabel (TaggedEntity _ labelA _) (TaggedEntity _ labelB _) = labelA == labelB
gatherSeparatedCountsForBIO :: [TaggedEntity] -> [TaggedEntity] -> M.HashMap T.Text (Int, Int, Int)
gatherSeparatedCountsForBIO expected got = M.mapWithKey process expectedMapped
where expectedMapped = groupEntitiesByLabel expected
gotMapped = groupEntitiesByLabel got
groupEntitiesByLabel =
M.fromList
. map (\l@((TaggedEntity _ lab _):_) -> (lab, l))
. groupBy equalLabel
. sortBy compareByLabel
process lab expectedGroup = gatherCountsForBIO expectedGroup (M.lookupDefault [] lab gotMapped)
parseBioSequenceIntoEntities :: T.Text -> Either String [TaggedEntity] parseBioSequenceIntoEntities :: T.Text -> Either String [TaggedEntity]
parseBioSequenceIntoEntities t = labelsIntoEntities =<< (parseOnly (bioSequenceParser <* endOfInput) t) parseBioSequenceIntoEntities t = labelsIntoEntities =<< (parseOnly (bioSequenceParser <* endOfInput) t)

View File

@ -177,6 +177,7 @@ isPreprocessable LogLoss = False
isPreprocessable Likelihood = False isPreprocessable Likelihood = False
isPreprocessable BIOF1 = False isPreprocessable BIOF1 = False
isPreprocessable BIOF1Labels = False isPreprocessable BIOF1Labels = False
isPreprocessable BIOWeightedF1 = False
isPreprocessable TokenAccuracy = True isPreprocessable TokenAccuracy = True
isPreprocessable SegmentAccuracy = True isPreprocessable SegmentAccuracy = True
isPreprocessable MAE = False isPreprocessable MAE = False
@ -788,6 +789,11 @@ generalizedProbabilisticFMeasure beta metric = gevalCoreWithoutInput metric
countAgg :: (Num n, Num v, Monad m) => ConduitM (n, v, v) o m (n, v, v) countAgg :: (Num n, Num v, Monad m) => ConduitM (n, v, v) o m (n, v, v)
countAgg = CC.foldl countFolder (fromInteger 0, fromInteger 0, fromInteger 0) countAgg = CC.foldl countFolder (fromInteger 0, fromInteger 0, fromInteger 0)
separatedCountAgg :: Monad m => ConduitM (M.HashMap Text (Int, Int, Int)) o m (M.HashMap Text (Int, Int, Int))
separatedCountAgg = CC.foldl separatedCountFolder M.empty
where separatedCountFolder = M.unionWith countFolder
countFragAgg :: (Num n, Num v, Monad m) => ConduitM (n, n, v, v) o m (n, n, v, v) countFragAgg :: (Num n, Num v, Monad m) => ConduitM (n, n, v, v) o m (n, n, v, v)
countFragAgg = CC.foldl countFragFolder (fromInteger 0, fromInteger 0, fromInteger 0, fromInteger 0) countFragAgg = CC.foldl countFragFolder (fromInteger 0, fromInteger 0, fromInteger 0, fromInteger 0)
@ -990,6 +996,8 @@ continueGEvalCalculations SABIOF1 BIOF1 = defineContinuation countAgg f1MeasureO
continueGEvalCalculations SABIOF1Labels BIOF1Labels = defineContinuation countAgg f1MeasureOnCounts noGraph continueGEvalCalculations SABIOF1Labels BIOF1Labels = defineContinuation countAgg f1MeasureOnCounts noGraph
continueGEvalCalculations SABIOWeightedF1 BIOWeightedF1 = defineContinuation separatedCountAgg f1MeasureOnSeparatedCounts noGraph
continueGEvalCalculations SASegmentAccuracy SegmentAccuracy = defineContinuation averageC id noGraph continueGEvalCalculations SASegmentAccuracy SegmentAccuracy = defineContinuation averageC id noGraph
continueGEvalCalculations SATokenAccuracy TokenAccuracy = defineContinuation hitsAndTotalsAgg continueGEvalCalculations SATokenAccuracy TokenAccuracy = defineContinuation hitsAndTotalsAgg

View File

@ -316,6 +316,7 @@ This a sample challenge for the likelihood metric.
|] ++ (commonReadmeMDContents testName) |] ++ (commonReadmeMDContents testName)
readmeMDContents BIOF1Labels testName = readmeMDContents BIOF1 testName readmeMDContents BIOF1Labels testName = readmeMDContents BIOF1 testName
readmeMDContents BIOWeightedF1 testName = readmeMDContents BIOF1 testName
readmeMDContents BIOF1 testName = [i| readmeMDContents BIOF1 testName = [i|
Tag and normalize names Tag and normalize names
======================= =======================
@ -568,6 +569,7 @@ trainContents LogLoss = [hereLit|0.0 Hell, no!!!
0.0 Boring, boring, boring 0.0 Boring, boring, boring
|] |]
trainContents BIOF1Labels = trainContents BIOF1 trainContents BIOF1Labels = trainContents BIOF1
trainContents BIOWeightedF1 = trainContents BIOF1
trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surname/BOND My name is Bond , James Bond trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surname/BOND My name is Bond , James Bond
O O O O O There is no name here O O O O O There is no name here
B-firstname/JOHN B-surname/VON I-surname/NEUMANN John von Nueman B-firstname/JOHN B-surname/VON I-surname/NEUMANN John von Nueman
@ -646,6 +648,7 @@ Boring stuff
That's good That's good
|] |]
devInContents BIOF1Labels = devInContents BIOF1 devInContents BIOF1Labels = devInContents BIOF1
devInContents BIOWeightedF1 = devInContents BIOF1
devInContents BIOF1 = [hereLit|Adam and Eve devInContents BIOF1 = [hereLit|Adam and Eve
Mr Jan Kowalski Mr Jan Kowalski
|] |]
@ -720,6 +723,7 @@ devExpectedContents LogLoss = [hereLit|1.0
1.0 1.0
|] |]
devExpectedContents BIOF1Labels = devExpectedContents BIOF1 devExpectedContents BIOF1Labels = devExpectedContents BIOF1
devExpectedContents BIOWeightedF1 = devExpectedContents BIOF1
devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE
O B-firstname/JAN B-surname/KOWALSKI O B-firstname/JAN B-surname/KOWALSKI
|] |]
@ -799,8 +803,10 @@ Super-duper!!
That is incredibly boring. That is incredibly boring.
|] |]
testInContents BIOF1Labels = testInContents BIOF1 testInContents BIOF1Labels = testInContents BIOF1
testInContents BIOWeightedF1 = testInContents BIOF1
testInContents BIOF1 = [hereLit|Alan Tring testInContents BIOF1 = [hereLit|Alan Tring
No name here No name here
Tarski is NOT here
|] |]
testInContents TokenAccuracy = [hereLit|I have cats testInContents TokenAccuracy = [hereLit|I have cats
I know I know
@ -875,8 +881,10 @@ testExpectedContents LogLoss = [hereLit|1.0
0.0 0.0
|] |]
testExpectedContents BIOF1Labels = testExpectedContents BIOF1 testExpectedContents BIOF1Labels = testExpectedContents BIOF1
testExpectedContents BIOWeightedF1 = testExpectedContents BIOF1
testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING
O O O O O O
B-surname/TARSKI O O O
|] |]
testExpectedContents TokenAccuracy = [hereLit|* V N testExpectedContents TokenAccuracy = [hereLit|* V N
* V * V
@ -945,6 +953,7 @@ inHeaderContents MAP = Just ["Dialect", "PolishPhrase"]
inHeaderContents Likelihood = inHeaderContents LogLoss inHeaderContents Likelihood = inHeaderContents LogLoss
inHeaderContents LogLoss = Just ["Text"] inHeaderContents LogLoss = Just ["Text"]
inHeaderContents BIOF1Labels = inHeaderContents BIOF1 inHeaderContents BIOF1Labels = inHeaderContents BIOF1
inHeaderContents BIOWeightedF1 = inHeaderContents BIOF1
inHeaderContents BIOF1 = Just ["Text"] inHeaderContents BIOF1 = Just ["Text"]
inHeaderContents TokenAccuracy = Just ["TokenizedText"] inHeaderContents TokenAccuracy = Just ["TokenizedText"]
inHeaderContents SegmentAccuracy = Just ["Segment"] inHeaderContents SegmentAccuracy = Just ["Segment"]
@ -976,6 +985,7 @@ outHeaderContents MAP = Nothing
outHeaderContents Likelihood = outHeaderContents LogLoss outHeaderContents Likelihood = outHeaderContents LogLoss
outHeaderContents LogLoss = Just ["Probability"] outHeaderContents LogLoss = Just ["Probability"]
outHeaderContents BIOF1Labels = outHeaderContents BIOF1 outHeaderContents BIOF1Labels = outHeaderContents BIOF1
outHeaderContents BIOWeightedF1 = outHeaderContents BIOF1
outHeaderContents BIOF1 = Just ["BIOOutput"] outHeaderContents BIOF1 = Just ["BIOOutput"]
outHeaderContents TokenAccuracy = Just ["PartsOfSpeech"] outHeaderContents TokenAccuracy = Just ["PartsOfSpeech"]
outHeaderContents SegmentAccuracy = Just ["PartsOfSpeech"] outHeaderContents SegmentAccuracy = Just ["PartsOfSpeech"]

View File

@ -28,7 +28,7 @@ import Data.Attoparsec.Text (parseOnly)
data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | CER | Accuracy | ClippEU data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | CER | Accuracy | ClippEU
| FMeasure Double | MacroFMeasure Double | NMI | FMeasure Double | MacroFMeasure Double | NMI
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
| BIOF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | BIOF1 | BIOWeightedF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE
| MultiLabelFMeasure Double MatchingSpecification | MultiLabelFMeasure Double MatchingSpecification
| MultiLabelLogLoss | MultiLabelLikelihood | MultiLabelLogLoss | MultiLabelLikelihood
| SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double | SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double
@ -78,6 +78,7 @@ instance Show Metric where
show Likelihood = "Likelihood" show Likelihood = "Likelihood"
show BIOF1 = "BIO-F1" show BIOF1 = "BIO-F1"
show BIOF1Labels = "BIO-F1-Labels" show BIOF1Labels = "BIO-F1-Labels"
show BIOWeightedF1 = "BIO-Weighted-F1"
show TokenAccuracy = "TokenAccuracy" show TokenAccuracy = "TokenAccuracy"
show SegmentAccuracy = "SegmentAccuracy" show SegmentAccuracy = "SegmentAccuracy"
show MAE = "MAE" show MAE = "MAE"
@ -161,6 +162,7 @@ instance Read Metric where
readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)] readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)] readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)] readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
readsPrec _ ('B':'I':'O':'-':'W':'e':'i':'g':'h':'t':'e':'d':'-':'F':'1': theRest) = [(BIOWeightedF1, theRest)]
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)] readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
readsPrec _ ('T':'o':'k':'e':'n':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(TokenAccuracy, theRest)] readsPrec _ ('T':'o':'k':'e':'n':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(TokenAccuracy, theRest)]
readsPrec _ ('S':'e':'g':'m':'e':'n':'t':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(SegmentAccuracy, theRest)] readsPrec _ ('S':'e':'g':'m':'e':'n':'t':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(SegmentAccuracy, theRest)]
@ -201,6 +203,7 @@ getMetricOrdering MAP = TheHigherTheBetter
getMetricOrdering LogLoss = TheLowerTheBetter getMetricOrdering LogLoss = TheLowerTheBetter
getMetricOrdering Likelihood = TheHigherTheBetter getMetricOrdering Likelihood = TheHigherTheBetter
getMetricOrdering BIOF1 = TheHigherTheBetter getMetricOrdering BIOF1 = TheHigherTheBetter
getMetricOrdering BIOWeightedF1 = TheHigherTheBetter
getMetricOrdering BIOF1Labels = TheHigherTheBetter getMetricOrdering BIOF1Labels = TheHigherTheBetter
getMetricOrdering TokenAccuracy = TheHigherTheBetter getMetricOrdering TokenAccuracy = TheHigherTheBetter
getMetricOrdering SegmentAccuracy = TheHigherTheBetter getMetricOrdering SegmentAccuracy = TheHigherTheBetter

View File

@ -20,7 +20,7 @@ import GEval.Common
import GEval.BLEU (bleuStep, gleuStep) import GEval.BLEU (bleuStep, gleuStep)
import GEval.WER (werStep) import GEval.WER (werStep)
import GEval.Clippings (totalArea, coveredBy, clippEUMatchStep) import GEval.Clippings (totalArea, coveredBy, clippEUMatchStep)
import GEval.BIO (gatherCountsForBIO) import GEval.BIO (gatherCountsForBIO, gatherSeparatedCountsForBIO)
import GEval.Probability import GEval.Probability
import GEval.PrecisionRecall (weightedMaxMatch, fMeasureOnCounts, calculateMAPForOneResult, getProbabilisticCounts, getCounts) import GEval.PrecisionRecall (weightedMaxMatch, fMeasureOnCounts, calculateMAPForOneResult, getProbabilisticCounts, getCounts)
@ -45,13 +45,15 @@ import GEval.ProbList (ProbList(..), parseIntoProbList, WordWithProb(..), countL
import GEval.MatchingSpecification import GEval.MatchingSpecification
import GEval.Haversine import GEval.Haversine
import qualified Data.HashMap.Strict as M
-- | Helper type so that singleton can be used. -- | Helper type so that singleton can be used.
-- | (The problem is that some metrics are parametrized by Double -- | (The problem is that some metrics are parametrized by Double
-- | Word32 and this is not handled by the singleton libary.) -- | Word32 and this is not handled by the singleton libary.)
singletons [d|data AMetric = ARMSE | AMSE | APearson | ASpearman | ABLEU | AGLEU | AWER | ACER | AAccuracy | AClippEU singletons [d|data AMetric = ARMSE | AMSE | APearson | ASpearman | ABLEU | AGLEU | AWER | ACER | AAccuracy | AClippEU
| AFMeasure | AMacroFMeasure | ANMI | AFMeasure | AMacroFMeasure | ANMI
| ALogLossHashed | ACharMatch | AMAP | ALogLoss | ALikelihood | ALogLossHashed | ACharMatch | AMAP | ALogLoss | ALikelihood
| ABIOF1 | ABIOF1Labels | ATokenAccuracy | ASegmentAccuracy | ALikelihoodHashed | AMAE | ASMAPE | AMultiLabelFMeasure MatchingSpecification | ABIOF1 | ABIOWeightedF1 | ABIOF1Labels | ATokenAccuracy | ASegmentAccuracy | ALikelihoodHashed | AMAE | ASMAPE | AMultiLabelFMeasure MatchingSpecification
| AMultiLabelLogLoss | AMultiLabelLikelihood | AMultiLabelLogLoss | AMultiLabelLikelihood
| ASoftFMeasure | AProbabilisticMultiLabelFMeasure | AProbabilisticSoftFMeasure | ASoft2DFMeasure | ASoftFMeasure | AProbabilisticMultiLabelFMeasure | AProbabilisticSoftFMeasure | ASoft2DFMeasure
| AFLCFMeasure | AHaversine | AFLCFMeasure | AHaversine
@ -79,6 +81,7 @@ toHelper MAP = AMAP
toHelper LogLoss = ALogLoss toHelper LogLoss = ALogLoss
toHelper Likelihood = ALikelihood toHelper Likelihood = ALikelihood
toHelper BIOF1 = ABIOF1 toHelper BIOF1 = ABIOF1
toHelper BIOWeightedF1 = ABIOWeightedF1
toHelper BIOF1Labels = ABIOF1Labels toHelper BIOF1Labels = ABIOF1Labels
toHelper TokenAccuracy = ATokenAccuracy toHelper TokenAccuracy = ATokenAccuracy
toHelper SegmentAccuracy = ASegmentAccuracy toHelper SegmentAccuracy = ASegmentAccuracy
@ -125,6 +128,7 @@ type family ParsedExpectedType (t :: AMetric) :: * where
ParsedExpectedType ALogLoss = Double ParsedExpectedType ALogLoss = Double
ParsedExpectedType ALikelihood = Double ParsedExpectedType ALikelihood = Double
ParsedExpectedType ABIOF1 = [TaggedEntity] ParsedExpectedType ABIOF1 = [TaggedEntity]
ParsedExpectedType ABIOWeightedF1 = [TaggedEntity]
ParsedExpectedType ABIOF1Labels = [TaggedEntity] ParsedExpectedType ABIOF1Labels = [TaggedEntity]
ParsedExpectedType ATokenAccuracy = [Text] ParsedExpectedType ATokenAccuracy = [Text]
ParsedExpectedType ASegmentAccuracy = [Annotation] ParsedExpectedType ASegmentAccuracy = [Annotation]
@ -161,6 +165,7 @@ expectedParser SAMAP = splitByTabs
expectedParser SALogLoss = doubleParser expectedParser SALogLoss = doubleParser
expectedParser SALikelihood = doubleParser expectedParser SALikelihood = doubleParser
expectedParser SABIOF1 = parseBioSequenceIntoEntities expectedParser SABIOF1 = parseBioSequenceIntoEntities
expectedParser SABIOWeightedF1 = parseBioSequenceIntoEntities
expectedParser SABIOF1Labels = parseBioSequenceIntoEntitiesWithoutNormalization expectedParser SABIOF1Labels = parseBioSequenceIntoEntitiesWithoutNormalization
expectedParser SATokenAccuracy = intoWords expectedParser SATokenAccuracy = intoWords
expectedParser SASegmentAccuracy = parseSegmentAnnotations expectedParser SASegmentAccuracy = parseSegmentAnnotations
@ -211,6 +216,7 @@ outputParser SAMAP = splitByTabs
outputParser SALogLoss = doubleParser outputParser SALogLoss = doubleParser
outputParser SALikelihood = doubleParser outputParser SALikelihood = doubleParser
outputParser SABIOF1 = parseBioSequenceIntoEntities outputParser SABIOF1 = parseBioSequenceIntoEntities
outputParser SABIOWeightedF1 = parseBioSequenceIntoEntities
outputParser SABIOF1Labels = parseBioSequenceIntoEntitiesWithoutNormalization outputParser SABIOF1Labels = parseBioSequenceIntoEntitiesWithoutNormalization
outputParser SATokenAccuracy = intoWords outputParser SATokenAccuracy = intoWords
outputParser SASegmentAccuracy = parseSegmentAnnotations outputParser SASegmentAccuracy = parseSegmentAnnotations
@ -232,6 +238,7 @@ type family ItemIntermediateRepresentationType (t :: AMetric) :: * where
ItemIntermediateRepresentationType AClippEU = (Int, Int, Int) ItemIntermediateRepresentationType AClippEU = (Int, Int, Int)
ItemIntermediateRepresentationType ANMI = (Text, Text) ItemIntermediateRepresentationType ANMI = (Text, Text)
ItemIntermediateRepresentationType ABIOF1 = (Int, Int, Int) ItemIntermediateRepresentationType ABIOF1 = (Int, Int, Int)
ItemIntermediateRepresentationType ABIOWeightedF1 = M.HashMap Text (Int, Int, Int)
ItemIntermediateRepresentationType ABIOF1Labels = (Int, Int, Int) ItemIntermediateRepresentationType ABIOF1Labels = (Int, Int, Int)
ItemIntermediateRepresentationType ATokenAccuracy = (Int, Int) ItemIntermediateRepresentationType ATokenAccuracy = (Int, Int)
ItemIntermediateRepresentationType AProbabilisticMultiLabelFMeasure = ([Double], [Double], Double, Int) ItemIntermediateRepresentationType AProbabilisticMultiLabelFMeasure = ([Double], [Double], Double, Int)
@ -277,6 +284,7 @@ itemStep SAMAP = uncurry calculateMAPForOneResult
itemStep SALogLoss = itemLogLossError itemStep SALogLoss = itemLogLossError
itemStep SALikelihood = itemLogLossError itemStep SALikelihood = itemLogLossError
itemStep SABIOF1 = uncurry gatherCountsForBIO itemStep SABIOF1 = uncurry gatherCountsForBIO
itemStep SABIOWeightedF1 = uncurry gatherSeparatedCountsForBIO
itemStep SABIOF1Labels = uncurry gatherCountsForBIO itemStep SABIOF1Labels = uncurry gatherCountsForBIO
itemStep SATokenAccuracy = countHitsAndTotals itemStep SATokenAccuracy = countHitsAndTotals
itemStep SASegmentAccuracy = uncurry segmentAccuracy itemStep SASegmentAccuracy = uncurry segmentAccuracy

View File

@ -64,6 +64,7 @@ listOfAvailableMetrics = [RMSE,
LogLossHashed defaultLogLossHashedSize, LogLossHashed defaultLogLossHashedSize,
LikelihoodHashed defaultLogLossHashedSize, LikelihoodHashed defaultLogLossHashedSize,
BIOF1, BIOF1,
BIOWeightedF1,
BIOF1Labels, BIOF1Labels,
TokenAccuracy, TokenAccuracy,
SegmentAccuracy, SegmentAccuracy,
@ -105,6 +106,7 @@ isMetricDescribed WER = True
isMetricDescribed CER = True isMetricDescribed CER = True
isMetricDescribed SegmentAccuracy = True isMetricDescribed SegmentAccuracy = True
isMetricDescribed Haversine = True isMetricDescribed Haversine = True
isMetricDescribed BIOWeightedF1 = True
isMetricDescribed _ = False isMetricDescribed _ = False
getEvaluationSchemeDescription :: EvaluationScheme -> String getEvaluationSchemeDescription :: EvaluationScheme -> String
@ -176,6 +178,9 @@ getMetricDescription Haversine =
[i|The haversine formula determines the great-circle distance between [i|The haversine formula determines the great-circle distance between
two points on a sphere given their longitudes and latitudes (in degrees). two points on a sphere given their longitudes and latitudes (in degrees).
|] |]
getMetricDescription BIOWeightedF1 =
[i|Weighted-average F1-score calculated on output expressed in the BIO format.
|]
outContents :: Metric -> String outContents :: Metric -> String
outContents (MultiLabelFMeasure _ _) = [hereLit|person/1,3 first-name/1 first-name/3 outContents (MultiLabelFMeasure _ _) = [hereLit|person/1,3 first-name/1 first-name/3
@ -206,6 +211,10 @@ N:1-4 V:6-7 A:9-13
outContents Haversine = [hereLit|39.575264 -56.995928 outContents Haversine = [hereLit|39.575264 -56.995928
29.949932 -90.070116 29.949932 -90.070116
|] |]
outContents BIOWeightedF1 = [hereLit|B-firstname/ALAN B-surname/TURING
O O O
B-surname/TARSKI O B-surname/NOT O
|]
expectedScore :: EvaluationScheme -> MetricValue expectedScore :: EvaluationScheme -> MetricValue
expectedScore (EvaluationScheme (MultiLabelFMeasure 1.0 ExactMatch) []) = 0.6666 expectedScore (EvaluationScheme (MultiLabelFMeasure 1.0 ExactMatch) []) = 0.6666
@ -231,6 +240,8 @@ expectedScore (EvaluationScheme CER [])
= 0.14814 = 0.14814
expectedScore (EvaluationScheme Haversine []) expectedScore (EvaluationScheme Haversine [])
= 1044.2633358563135 = 1044.2633358563135
expectedScore (EvaluationScheme BIOWeightedF1 [])
= 0.86666666
helpMetricParameterMetricsList :: String helpMetricParameterMetricsList :: String
helpMetricParameterMetricsList = intercalate ", " $ map (\s -> (show s) ++ (case extraInfo s of helpMetricParameterMetricsList = intercalate ", " $ map (\s -> (show s) ++ (case extraInfo s of
@ -297,6 +308,10 @@ formatDescription CER = [hereLit|Any text, whitespace and punctuation marks are
formatDescription Haversine = [hereLit|Each line is a latitude and longitude of sphere separated by tabulation, formatDescription Haversine = [hereLit|Each line is a latitude and longitude of sphere separated by tabulation,
e.g. "41.558153 -73.051497". e.g. "41.558153 -73.051497".
|] |]
formatDescription BIOWeightedF1 = [hereLit|Each line is a sequence of tags encoded in the BIO format, i.e. O, B-tag, I-tag;
B-tags and I-tags can accompanied by an extra label after a slash.
|]
scoreExplanation :: EvaluationScheme -> Maybe String scoreExplanation :: EvaluationScheme -> Maybe String
scoreExplanation (EvaluationScheme (MultiLabelFMeasure _ ExactMatch) []) scoreExplanation (EvaluationScheme (MultiLabelFMeasure _ ExactMatch) [])
@ -332,6 +347,11 @@ scoreExplanation (EvaluationScheme CER [])
= Just [hereLit|The total length of expected output (in characters) is 27. There are 4 errors = Just [hereLit|The total length of expected output (in characters) is 27. There are 4 errors
(1 word substituted, 1 inserted, 1 deleted) in the actual output. Hence, (1 word substituted, 1 inserted, 1 deleted) in the actual output. Hence,
CER = (2+1+1) / 27 = 4 / 27 = 0.14814.|] CER = (2+1+1) / 27 = 4 / 27 = 0.14814.|]
scoreExplanation (EvaluationScheme Haversine []) = Nothing
scoreExplanation (EvaluationScheme BIOWeightedF1 [])
= Just [hereLit|There are two labels (firstname and surname, O is not considered). Firstname was
predicted in the perfect way, hence F1=1, whereas for surname recall is 1, precision - 2/3 and F1 - 4/5.
The weighted average is (1 * 1 + 2 * 4/5) / 3 = 13/15 = 0.8667.|]
pasteLines :: String -> String -> String pasteLines :: String -> String -> String
pasteLines a b = printf "%-35s %s\n" a b pasteLines a b = printf "%-35s %s\n" a b

View File

@ -7,9 +7,11 @@ module GEval.PrecisionRecall(calculateMAPForOneResult,
precisionAndRecall, precisionAndRecallFromCounts, precisionAndRecall, precisionAndRecallFromCounts,
maxMatch, maxMatchOnOrdered, getCounts, weightedMaxMatch, weightedMaxMatching, maxMatch, maxMatchOnOrdered, getCounts, weightedMaxMatch, weightedMaxMatching,
getProbabilisticCounts, getProbabilisticCounts,
countFragFolder) countFragFolder, fMeasureOnSeparatedCounts, f1MeasureOnSeparatedCounts)
where where
import Debug.Trace
import GEval.Common import GEval.Common
import GEval.Probability import GEval.Probability
@ -21,6 +23,7 @@ import Data.List (find, foldl', nub)
import Data.Algorithm.Munkres import Data.Algorithm.Munkres
import qualified Data.Array.IArray as DAI import qualified Data.Array.IArray as DAI
import qualified Data.HashMap.Strict as M
calculateMAPForOneResult :: (Eq a) => [a] -> [a] -> Double calculateMAPForOneResult :: (Eq a) => [a] -> [a] -> Double
calculateMAPForOneResult expected got = precisionSum / fromIntegral (length expected) calculateMAPForOneResult expected got = precisionSum / fromIntegral (length expected)
@ -69,6 +72,15 @@ fMeasureOnFragCounts beta (rC, pC, nbExpected, nbGot) =
where r = rC /. nbExpected where r = rC /. nbExpected
p = pC /. nbGot p = pC /. nbGot
f1MeasureOnSeparatedCounts :: M.HashMap a (Int, Int, Int) -> Double
f1MeasureOnSeparatedCounts m = fMeasureOnSeparatedCounts 1.0 m
fMeasureOnSeparatedCounts :: Double -> M.HashMap a (Int, Int, Int) -> Double
fMeasureOnSeparatedCounts beta m = (sum $ map (\c@(_, t, _) -> (fromIntegral t) * (fMeasureOnCounts beta c)) mAsList) /. total
where mAsList = M.elems m
total = sum $ map (\(_, t, _) -> t) mAsList
countFolder :: (Num n, Num v) => (n, v, v) -> (n, v, v) -> (n, v, v) countFolder :: (Num n, Num v) => (n, v, v) -> (n, v, v) -> (n, v, v)
countFolder (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3) countFolder (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3)

View File

@ -536,6 +536,8 @@ main = hspec $ do
runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0 runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0
it "check inconsistent input" $ do it "check inconsistent input" $ do
runGEvalTest "bio-f1-error" `shouldThrow` (== UnexpectedData 2 "inconsistent label sequence `B-NAME/JOHN I-FOO/SMITH I-FOO/X`") runGEvalTest "bio-f1-error" `shouldThrow` (== UnexpectedData 2 "inconsistent label sequence `B-NAME/JOHN I-FOO/SMITH I-FOO/X`")
it "weighted F1" $ do
runGEvalTest "bio-weighted-f1-simple" `shouldReturnAlmost` 0.82539682
describe "automatic decompression" $ do describe "automatic decompression" $ do
it "more complex test" $ do it "more complex test" $ do
runGEvalTest "charmatch-complex-compressed" `shouldReturnAlmost` 0.1923076923076923 runGEvalTest "charmatch-complex-compressed" `shouldReturnAlmost` 0.1923076923076923

View File

@ -0,0 +1,4 @@
O B-FOO O O O B-FOO I-FOO I-FOO B-BAR
B-BAR O B-XYZ
B-BAZ O B-BAZ O B-BAR
O B-BAZ I-BAZ B-FOO
1 O B-FOO O O O B-FOO I-FOO I-FOO B-BAR
2 B-BAR O B-XYZ
3 B-BAZ O B-BAZ O B-BAR
4 O B-BAZ I-BAZ B-FOO

View File

@ -0,0 +1 @@
--metric BIO-Weighted-F1

View File

@ -0,0 +1,4 @@
O B-FOO O O O B-FOO I-FOO O B-BAR
B-BAR O O
B-BAZ B-BAZ B-BAZ O O
O B-BAZ I-BAZ B-FOO
1 O B-FOO O O O B-FOO I-FOO O B-BAR
2 B-BAR O O
3 B-BAZ B-BAZ B-BAZ O O
4 O B-BAZ I-BAZ B-FOO