From 0afa1fe0bab3e5c39e568f40e5a4d44932dc2fa2 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Wed, 9 Jun 2021 22:16:13 +0200 Subject: [PATCH] Add BIOWeightedF1 metric --- src/GEval/BIO.hs | 22 ++++++++++++++++++- src/GEval/Core.hs | 8 +++++++ src/GEval/CreateChallenge.hs | 10 +++++++++ src/GEval/Metric.hs | 5 ++++- src/GEval/MetricsMechanics.hs | 12 ++++++++-- src/GEval/MetricsMeta.hs | 20 +++++++++++++++++ src/GEval/PrecisionRecall.hs | 14 +++++++++++- test/Spec.hs | 2 ++ .../test-A/out.tsv | 4 ++++ .../bio-weighted-f1-simple/config.txt | 1 + .../test-A/expected.tsv | 4 ++++ 11 files changed, 97 insertions(+), 5 deletions(-) create mode 100644 test/bio-weighted-f1-simple/bio-weighted-f1-simple-solution/test-A/out.tsv create mode 100644 test/bio-weighted-f1-simple/bio-weighted-f1-simple/config.txt create mode 100644 test/bio-weighted-f1-simple/bio-weighted-f1-simple/test-A/expected.tsv diff --git a/src/GEval/BIO.hs b/src/GEval/BIO.hs index f909dd3..acffa80 100644 --- a/src/GEval/BIO.hs +++ b/src/GEval/BIO.hs @@ -3,7 +3,7 @@ module GEval.BIO (BIOLabel(..), bioSequenceParser, parseBioSequenceIntoEntities, parseBioSequenceIntoEntitiesWithoutNormalization, - TaggedSpan(..), TaggedEntity(..), gatherCountsForBIO, + TaggedSpan(..), TaggedEntity(..), gatherCountsForBIO, gatherSeparatedCountsForBIO, eraseNormalisation) where @@ -16,9 +16,12 @@ import Data.Attoparsec.Combinator import Control.Applicative import Data.Char import Data.Maybe (catMaybes) +import Data.List (groupBy, sortBy) import GEval.Common +import qualified Data.HashMap.Strict as M + data BIOLabel = Outside | Beginning T.Text (Maybe T.Text) | Inside T.Text (Maybe T.Text) deriving (Eq, Show) @@ -43,6 +46,23 @@ gatherCountsForBIO expected got = (maxMatchOnOrdered laterThan expected got, len where laterThan (TaggedEntity (TaggedSpan a _) _ _) (TaggedEntity (TaggedSpan b _) _ _) = a > b +compareByLabel :: TaggedEntity -> TaggedEntity -> Ordering +compareByLabel (TaggedEntity _ labelA _) (TaggedEntity _ labelB _) = labelA `compare` labelB + +equalLabel :: TaggedEntity -> TaggedEntity -> Bool +equalLabel (TaggedEntity _ labelA _) (TaggedEntity _ labelB _) = labelA == labelB + +gatherSeparatedCountsForBIO :: [TaggedEntity] -> [TaggedEntity] -> M.HashMap T.Text (Int, Int, Int) +gatherSeparatedCountsForBIO expected got = M.mapWithKey process expectedMapped + where expectedMapped = groupEntitiesByLabel expected + gotMapped = groupEntitiesByLabel got + groupEntitiesByLabel = + M.fromList + . map (\l@((TaggedEntity _ lab _):_) -> (lab, l)) + . groupBy equalLabel + . sortBy compareByLabel + process lab expectedGroup = gatherCountsForBIO expectedGroup (M.lookupDefault [] lab gotMapped) + parseBioSequenceIntoEntities :: T.Text -> Either String [TaggedEntity] parseBioSequenceIntoEntities t = labelsIntoEntities =<< (parseOnly (bioSequenceParser <* endOfInput) t) diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index ad4b90e..00d7361 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -177,6 +177,7 @@ isPreprocessable LogLoss = False isPreprocessable Likelihood = False isPreprocessable BIOF1 = False isPreprocessable BIOF1Labels = False +isPreprocessable BIOWeightedF1 = False isPreprocessable TokenAccuracy = True isPreprocessable SegmentAccuracy = True isPreprocessable MAE = False @@ -788,6 +789,11 @@ generalizedProbabilisticFMeasure beta metric = gevalCoreWithoutInput metric countAgg :: (Num n, Num v, Monad m) => ConduitM (n, v, v) o m (n, v, v) countAgg = CC.foldl countFolder (fromInteger 0, fromInteger 0, fromInteger 0) +separatedCountAgg :: Monad m => ConduitM (M.HashMap Text (Int, Int, Int)) o m (M.HashMap Text (Int, Int, Int)) +separatedCountAgg = CC.foldl separatedCountFolder M.empty + where separatedCountFolder = M.unionWith countFolder + + countFragAgg :: (Num n, Num v, Monad m) => ConduitM (n, n, v, v) o m (n, n, v, v) countFragAgg = CC.foldl countFragFolder (fromInteger 0, fromInteger 0, fromInteger 0, fromInteger 0) @@ -990,6 +996,8 @@ continueGEvalCalculations SABIOF1 BIOF1 = defineContinuation countAgg f1MeasureO continueGEvalCalculations SABIOF1Labels BIOF1Labels = defineContinuation countAgg f1MeasureOnCounts noGraph +continueGEvalCalculations SABIOWeightedF1 BIOWeightedF1 = defineContinuation separatedCountAgg f1MeasureOnSeparatedCounts noGraph + continueGEvalCalculations SASegmentAccuracy SegmentAccuracy = defineContinuation averageC id noGraph continueGEvalCalculations SATokenAccuracy TokenAccuracy = defineContinuation hitsAndTotalsAgg diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index a52c663..41129e6 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -316,6 +316,7 @@ This a sample challenge for the likelihood metric. |] ++ (commonReadmeMDContents testName) readmeMDContents BIOF1Labels testName = readmeMDContents BIOF1 testName +readmeMDContents BIOWeightedF1 testName = readmeMDContents BIOF1 testName readmeMDContents BIOF1 testName = [i| Tag and normalize names ======================= @@ -568,6 +569,7 @@ trainContents LogLoss = [hereLit|0.0 Hell, no!!! 0.0 Boring, boring, boring |] trainContents BIOF1Labels = trainContents BIOF1 +trainContents BIOWeightedF1 = trainContents BIOF1 trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surname/BOND My name is Bond , James Bond O O O O O There is no name here B-firstname/JOHN B-surname/VON I-surname/NEUMANN John von Nueman @@ -646,6 +648,7 @@ Boring stuff That's good |] devInContents BIOF1Labels = devInContents BIOF1 +devInContents BIOWeightedF1 = devInContents BIOF1 devInContents BIOF1 = [hereLit|Adam and Eve Mr Jan Kowalski |] @@ -720,6 +723,7 @@ devExpectedContents LogLoss = [hereLit|1.0 1.0 |] devExpectedContents BIOF1Labels = devExpectedContents BIOF1 +devExpectedContents BIOWeightedF1 = devExpectedContents BIOF1 devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE O B-firstname/JAN B-surname/KOWALSKI |] @@ -799,8 +803,10 @@ Super-duper!! That is incredibly boring. |] testInContents BIOF1Labels = testInContents BIOF1 +testInContents BIOWeightedF1 = testInContents BIOF1 testInContents BIOF1 = [hereLit|Alan Tring No name here +Tarski is NOT here |] testInContents TokenAccuracy = [hereLit|I have cats I know @@ -875,8 +881,10 @@ testExpectedContents LogLoss = [hereLit|1.0 0.0 |] testExpectedContents BIOF1Labels = testExpectedContents BIOF1 +testExpectedContents BIOWeightedF1 = testExpectedContents BIOF1 testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING O O O +B-surname/TARSKI O O O |] testExpectedContents TokenAccuracy = [hereLit|* V N * V @@ -945,6 +953,7 @@ inHeaderContents MAP = Just ["Dialect", "PolishPhrase"] inHeaderContents Likelihood = inHeaderContents LogLoss inHeaderContents LogLoss = Just ["Text"] inHeaderContents BIOF1Labels = inHeaderContents BIOF1 +inHeaderContents BIOWeightedF1 = inHeaderContents BIOF1 inHeaderContents BIOF1 = Just ["Text"] inHeaderContents TokenAccuracy = Just ["TokenizedText"] inHeaderContents SegmentAccuracy = Just ["Segment"] @@ -976,6 +985,7 @@ outHeaderContents MAP = Nothing outHeaderContents Likelihood = outHeaderContents LogLoss outHeaderContents LogLoss = Just ["Probability"] outHeaderContents BIOF1Labels = outHeaderContents BIOF1 +outHeaderContents BIOWeightedF1 = outHeaderContents BIOF1 outHeaderContents BIOF1 = Just ["BIOOutput"] outHeaderContents TokenAccuracy = Just ["PartsOfSpeech"] outHeaderContents SegmentAccuracy = Just ["PartsOfSpeech"] diff --git a/src/GEval/Metric.hs b/src/GEval/Metric.hs index 8597600..2b04067 100644 --- a/src/GEval/Metric.hs +++ b/src/GEval/Metric.hs @@ -28,7 +28,7 @@ import Data.Attoparsec.Text (parseOnly) data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | CER | Accuracy | ClippEU | FMeasure Double | MacroFMeasure Double | NMI | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood - | BIOF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE + | BIOF1 | BIOWeightedF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double MatchingSpecification | MultiLabelLogLoss | MultiLabelLikelihood | SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double @@ -78,6 +78,7 @@ instance Show Metric where show Likelihood = "Likelihood" show BIOF1 = "BIO-F1" show BIOF1Labels = "BIO-F1-Labels" + show BIOWeightedF1 = "BIO-Weighted-F1" show TokenAccuracy = "TokenAccuracy" show SegmentAccuracy = "SegmentAccuracy" show MAE = "MAE" @@ -161,6 +162,7 @@ instance Read Metric where readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)] readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)] readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)] + readsPrec _ ('B':'I':'O':'-':'W':'e':'i':'g':'h':'t':'e':'d':'-':'F':'1': theRest) = [(BIOWeightedF1, theRest)] readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)] readsPrec _ ('T':'o':'k':'e':'n':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(TokenAccuracy, theRest)] readsPrec _ ('S':'e':'g':'m':'e':'n':'t':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(SegmentAccuracy, theRest)] @@ -201,6 +203,7 @@ getMetricOrdering MAP = TheHigherTheBetter getMetricOrdering LogLoss = TheLowerTheBetter getMetricOrdering Likelihood = TheHigherTheBetter getMetricOrdering BIOF1 = TheHigherTheBetter +getMetricOrdering BIOWeightedF1 = TheHigherTheBetter getMetricOrdering BIOF1Labels = TheHigherTheBetter getMetricOrdering TokenAccuracy = TheHigherTheBetter getMetricOrdering SegmentAccuracy = TheHigherTheBetter diff --git a/src/GEval/MetricsMechanics.hs b/src/GEval/MetricsMechanics.hs index 6e66336..ff4be79 100644 --- a/src/GEval/MetricsMechanics.hs +++ b/src/GEval/MetricsMechanics.hs @@ -20,7 +20,7 @@ import GEval.Common import GEval.BLEU (bleuStep, gleuStep) import GEval.WER (werStep) import GEval.Clippings (totalArea, coveredBy, clippEUMatchStep) -import GEval.BIO (gatherCountsForBIO) +import GEval.BIO (gatherCountsForBIO, gatherSeparatedCountsForBIO) import GEval.Probability import GEval.PrecisionRecall (weightedMaxMatch, fMeasureOnCounts, calculateMAPForOneResult, getProbabilisticCounts, getCounts) @@ -45,13 +45,15 @@ import GEval.ProbList (ProbList(..), parseIntoProbList, WordWithProb(..), countL import GEval.MatchingSpecification import GEval.Haversine +import qualified Data.HashMap.Strict as M + -- | Helper type so that singleton can be used. -- | (The problem is that some metrics are parametrized by Double -- | Word32 and this is not handled by the singleton libary.) singletons [d|data AMetric = ARMSE | AMSE | APearson | ASpearman | ABLEU | AGLEU | AWER | ACER | AAccuracy | AClippEU | AFMeasure | AMacroFMeasure | ANMI | ALogLossHashed | ACharMatch | AMAP | ALogLoss | ALikelihood - | ABIOF1 | ABIOF1Labels | ATokenAccuracy | ASegmentAccuracy | ALikelihoodHashed | AMAE | ASMAPE | AMultiLabelFMeasure MatchingSpecification + | ABIOF1 | ABIOWeightedF1 | ABIOF1Labels | ATokenAccuracy | ASegmentAccuracy | ALikelihoodHashed | AMAE | ASMAPE | AMultiLabelFMeasure MatchingSpecification | AMultiLabelLogLoss | AMultiLabelLikelihood | ASoftFMeasure | AProbabilisticMultiLabelFMeasure | AProbabilisticSoftFMeasure | ASoft2DFMeasure | AFLCFMeasure | AHaversine @@ -79,6 +81,7 @@ toHelper MAP = AMAP toHelper LogLoss = ALogLoss toHelper Likelihood = ALikelihood toHelper BIOF1 = ABIOF1 +toHelper BIOWeightedF1 = ABIOWeightedF1 toHelper BIOF1Labels = ABIOF1Labels toHelper TokenAccuracy = ATokenAccuracy toHelper SegmentAccuracy = ASegmentAccuracy @@ -125,6 +128,7 @@ type family ParsedExpectedType (t :: AMetric) :: * where ParsedExpectedType ALogLoss = Double ParsedExpectedType ALikelihood = Double ParsedExpectedType ABIOF1 = [TaggedEntity] + ParsedExpectedType ABIOWeightedF1 = [TaggedEntity] ParsedExpectedType ABIOF1Labels = [TaggedEntity] ParsedExpectedType ATokenAccuracy = [Text] ParsedExpectedType ASegmentAccuracy = [Annotation] @@ -161,6 +165,7 @@ expectedParser SAMAP = splitByTabs expectedParser SALogLoss = doubleParser expectedParser SALikelihood = doubleParser expectedParser SABIOF1 = parseBioSequenceIntoEntities +expectedParser SABIOWeightedF1 = parseBioSequenceIntoEntities expectedParser SABIOF1Labels = parseBioSequenceIntoEntitiesWithoutNormalization expectedParser SATokenAccuracy = intoWords expectedParser SASegmentAccuracy = parseSegmentAnnotations @@ -211,6 +216,7 @@ outputParser SAMAP = splitByTabs outputParser SALogLoss = doubleParser outputParser SALikelihood = doubleParser outputParser SABIOF1 = parseBioSequenceIntoEntities +outputParser SABIOWeightedF1 = parseBioSequenceIntoEntities outputParser SABIOF1Labels = parseBioSequenceIntoEntitiesWithoutNormalization outputParser SATokenAccuracy = intoWords outputParser SASegmentAccuracy = parseSegmentAnnotations @@ -232,6 +238,7 @@ type family ItemIntermediateRepresentationType (t :: AMetric) :: * where ItemIntermediateRepresentationType AClippEU = (Int, Int, Int) ItemIntermediateRepresentationType ANMI = (Text, Text) ItemIntermediateRepresentationType ABIOF1 = (Int, Int, Int) + ItemIntermediateRepresentationType ABIOWeightedF1 = M.HashMap Text (Int, Int, Int) ItemIntermediateRepresentationType ABIOF1Labels = (Int, Int, Int) ItemIntermediateRepresentationType ATokenAccuracy = (Int, Int) ItemIntermediateRepresentationType AProbabilisticMultiLabelFMeasure = ([Double], [Double], Double, Int) @@ -277,6 +284,7 @@ itemStep SAMAP = uncurry calculateMAPForOneResult itemStep SALogLoss = itemLogLossError itemStep SALikelihood = itemLogLossError itemStep SABIOF1 = uncurry gatherCountsForBIO +itemStep SABIOWeightedF1 = uncurry gatherSeparatedCountsForBIO itemStep SABIOF1Labels = uncurry gatherCountsForBIO itemStep SATokenAccuracy = countHitsAndTotals itemStep SASegmentAccuracy = uncurry segmentAccuracy diff --git a/src/GEval/MetricsMeta.hs b/src/GEval/MetricsMeta.hs index 6f62d5d..8fba402 100644 --- a/src/GEval/MetricsMeta.hs +++ b/src/GEval/MetricsMeta.hs @@ -64,6 +64,7 @@ listOfAvailableMetrics = [RMSE, LogLossHashed defaultLogLossHashedSize, LikelihoodHashed defaultLogLossHashedSize, BIOF1, + BIOWeightedF1, BIOF1Labels, TokenAccuracy, SegmentAccuracy, @@ -105,6 +106,7 @@ isMetricDescribed WER = True isMetricDescribed CER = True isMetricDescribed SegmentAccuracy = True isMetricDescribed Haversine = True +isMetricDescribed BIOWeightedF1 = True isMetricDescribed _ = False getEvaluationSchemeDescription :: EvaluationScheme -> String @@ -176,6 +178,9 @@ getMetricDescription Haversine = [i|The haversine formula determines the great-circle distance between two points on a sphere given their longitudes and latitudes (in degrees). |] +getMetricDescription BIOWeightedF1 = + [i|Weighted-average F1-score calculated on output expressed in the BIO format. +|] outContents :: Metric -> String outContents (MultiLabelFMeasure _ _) = [hereLit|person/1,3 first-name/1 first-name/3 @@ -206,6 +211,10 @@ N:1-4 V:6-7 A:9-13 outContents Haversine = [hereLit|39.575264 -56.995928 29.949932 -90.070116 |] +outContents BIOWeightedF1 = [hereLit|B-firstname/ALAN B-surname/TURING +O O O +B-surname/TARSKI O B-surname/NOT O +|] expectedScore :: EvaluationScheme -> MetricValue expectedScore (EvaluationScheme (MultiLabelFMeasure 1.0 ExactMatch) []) = 0.6666 @@ -231,6 +240,8 @@ expectedScore (EvaluationScheme CER []) = 0.14814 expectedScore (EvaluationScheme Haversine []) = 1044.2633358563135 +expectedScore (EvaluationScheme BIOWeightedF1 []) + = 0.86666666 helpMetricParameterMetricsList :: String helpMetricParameterMetricsList = intercalate ", " $ map (\s -> (show s) ++ (case extraInfo s of @@ -297,6 +308,10 @@ formatDescription CER = [hereLit|Any text, whitespace and punctuation marks are formatDescription Haversine = [hereLit|Each line is a latitude and longitude of sphere separated by tabulation, e.g. "41.558153 -73.051497". |] +formatDescription BIOWeightedF1 = [hereLit|Each line is a sequence of tags encoded in the BIO format, i.e. O, B-tag, I-tag; +B-tags and I-tags can accompanied by an extra label after a slash. +|] + scoreExplanation :: EvaluationScheme -> Maybe String scoreExplanation (EvaluationScheme (MultiLabelFMeasure _ ExactMatch) []) @@ -332,6 +347,11 @@ scoreExplanation (EvaluationScheme CER []) = Just [hereLit|The total length of expected output (in characters) is 27. There are 4 errors (1 word substituted, 1 inserted, 1 deleted) in the actual output. Hence, CER = (2+1+1) / 27 = 4 / 27 = 0.14814.|] +scoreExplanation (EvaluationScheme Haversine []) = Nothing +scoreExplanation (EvaluationScheme BIOWeightedF1 []) + = Just [hereLit|There are two labels (firstname and surname, O is not considered). Firstname was +predicted in the perfect way, hence F1=1, whereas for surname recall is 1, precision - 2/3 and F1 - 4/5. +The weighted average is (1 * 1 + 2 * 4/5) / 3 = 13/15 = 0.8667.|] pasteLines :: String -> String -> String pasteLines a b = printf "%-35s %s\n" a b diff --git a/src/GEval/PrecisionRecall.hs b/src/GEval/PrecisionRecall.hs index 15dd33e..a4e889e 100644 --- a/src/GEval/PrecisionRecall.hs +++ b/src/GEval/PrecisionRecall.hs @@ -7,9 +7,11 @@ module GEval.PrecisionRecall(calculateMAPForOneResult, precisionAndRecall, precisionAndRecallFromCounts, maxMatch, maxMatchOnOrdered, getCounts, weightedMaxMatch, weightedMaxMatching, getProbabilisticCounts, - countFragFolder) + countFragFolder, fMeasureOnSeparatedCounts, f1MeasureOnSeparatedCounts) where +import Debug.Trace + import GEval.Common import GEval.Probability @@ -21,6 +23,7 @@ import Data.List (find, foldl', nub) import Data.Algorithm.Munkres import qualified Data.Array.IArray as DAI +import qualified Data.HashMap.Strict as M calculateMAPForOneResult :: (Eq a) => [a] -> [a] -> Double calculateMAPForOneResult expected got = precisionSum / fromIntegral (length expected) @@ -69,6 +72,15 @@ fMeasureOnFragCounts beta (rC, pC, nbExpected, nbGot) = where r = rC /. nbExpected p = pC /. nbGot +f1MeasureOnSeparatedCounts :: M.HashMap a (Int, Int, Int) -> Double +f1MeasureOnSeparatedCounts m = fMeasureOnSeparatedCounts 1.0 m + +fMeasureOnSeparatedCounts :: Double -> M.HashMap a (Int, Int, Int) -> Double +fMeasureOnSeparatedCounts beta m = (sum $ map (\c@(_, t, _) -> (fromIntegral t) * (fMeasureOnCounts beta c)) mAsList) /. total + where mAsList = M.elems m + total = sum $ map (\(_, t, _) -> t) mAsList + + countFolder :: (Num n, Num v) => (n, v, v) -> (n, v, v) -> (n, v, v) countFolder (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3) diff --git a/test/Spec.hs b/test/Spec.hs index 22834fc..eefc80a 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -536,6 +536,8 @@ main = hspec $ do runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0 it "check inconsistent input" $ do runGEvalTest "bio-f1-error" `shouldThrow` (== UnexpectedData 2 "inconsistent label sequence `B-NAME/JOHN I-FOO/SMITH I-FOO/X`") + it "weighted F1" $ do + runGEvalTest "bio-weighted-f1-simple" `shouldReturnAlmost` 0.82539682 describe "automatic decompression" $ do it "more complex test" $ do runGEvalTest "charmatch-complex-compressed" `shouldReturnAlmost` 0.1923076923076923 diff --git a/test/bio-weighted-f1-simple/bio-weighted-f1-simple-solution/test-A/out.tsv b/test/bio-weighted-f1-simple/bio-weighted-f1-simple-solution/test-A/out.tsv new file mode 100644 index 0000000..8c2aa2f --- /dev/null +++ b/test/bio-weighted-f1-simple/bio-weighted-f1-simple-solution/test-A/out.tsv @@ -0,0 +1,4 @@ +O B-FOO O O O B-FOO I-FOO I-FOO B-BAR +B-BAR O B-XYZ +B-BAZ O B-BAZ O B-BAR +O B-BAZ I-BAZ B-FOO diff --git a/test/bio-weighted-f1-simple/bio-weighted-f1-simple/config.txt b/test/bio-weighted-f1-simple/bio-weighted-f1-simple/config.txt new file mode 100644 index 0000000..80706b6 --- /dev/null +++ b/test/bio-weighted-f1-simple/bio-weighted-f1-simple/config.txt @@ -0,0 +1 @@ +--metric BIO-Weighted-F1 diff --git a/test/bio-weighted-f1-simple/bio-weighted-f1-simple/test-A/expected.tsv b/test/bio-weighted-f1-simple/bio-weighted-f1-simple/test-A/expected.tsv new file mode 100644 index 0000000..3c5ccb0 --- /dev/null +++ b/test/bio-weighted-f1-simple/bio-weighted-f1-simple/test-A/expected.tsv @@ -0,0 +1,4 @@ +O B-FOO O O O B-FOO I-FOO O B-BAR +B-BAR O O +B-BAZ B-BAZ B-BAZ O O +O B-BAZ I-BAZ B-FOO