Implement soft f-score
This commit is contained in:
parent
26ba5ae92a
commit
8735610745
@ -36,6 +36,7 @@ library
|
||||
, GEval.FeatureExtractor
|
||||
, GEval.WER
|
||||
, Text.Tokenizer
|
||||
, GEval.Annotation
|
||||
, Paths_geval
|
||||
build-depends: base >= 4.7 && < 5
|
||||
, cond
|
||||
@ -76,6 +77,7 @@ library
|
||||
, uri-encode
|
||||
, MissingH
|
||||
, array
|
||||
, Munkres
|
||||
default-language: Haskell2010
|
||||
|
||||
executable geval
|
||||
|
46
src/GEval/Annotation.hs
Normal file
46
src/GEval/Annotation.hs
Normal file
@ -0,0 +1,46 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module GEval.Annotation
|
||||
(parseAnnotations, Annotation(..), matchScore)
|
||||
where
|
||||
|
||||
import qualified Data.IntSet as IS
|
||||
import qualified Data.Text as T
|
||||
|
||||
import Data.Attoparsec.Text
|
||||
import Data.Attoparsec.Combinator
|
||||
import Control.Applicative
|
||||
import GEval.Common (sepByWhitespaces, (/.))
|
||||
import Data.Char
|
||||
|
||||
data Annotation = Annotation T.Text IS.IntSet
|
||||
deriving (Eq, Show)
|
||||
|
||||
parseAnnotations :: T.Text -> Either String [Annotation]
|
||||
parseAnnotations t = parseOnly (annotationsParser <* endOfInput) t
|
||||
|
||||
annotationsParser :: Parser [Annotation]
|
||||
annotationsParser = sepByWhitespaces annotationParser
|
||||
|
||||
annotationParser :: Parser Annotation
|
||||
annotationParser = do
|
||||
label <- takeWhile1 (\c -> not (isSpace c) && c /= ':')
|
||||
string ":"
|
||||
intSet <- intSetParser
|
||||
return $ Annotation label intSet
|
||||
|
||||
intSetParser :: Parser IS.IntSet
|
||||
intSetParser = IS.unions <$> intervalParser `sepBy` (string ",")
|
||||
|
||||
intervalParser :: Parser IS.IntSet
|
||||
intervalParser = do
|
||||
startIx <- decimal
|
||||
endIx <- (string "-" *> decimal <|> pure startIx)
|
||||
pure $ IS.fromList [startIx..endIx]
|
||||
|
||||
matchScore :: Annotation -> Annotation -> Double
|
||||
matchScore (Annotation labelA intSetA) (Annotation labelB intSetB)
|
||||
| labelA == labelB = (intSetLength intersect) /. (intSetLength $ intSetA `IS.union` intSetB)
|
||||
| otherwise = 0.0
|
||||
where intSetLength = Prelude.length . IS.toList
|
||||
intersect = intSetA `IS.intersection` intSetB
|
@ -6,9 +6,20 @@ import Data.Text.Read as TR
|
||||
|
||||
import Data.Attoparsec.Text
|
||||
|
||||
(/.) :: (Eq a, Integral a) => a -> a -> Double
|
||||
-- some operations can be "hard" (on ints) or "soft" (on doubles),
|
||||
-- introduce a typeclass so that we could generalise easily
|
||||
class ConvertibleToDouble n where
|
||||
toDouble :: n -> Double
|
||||
|
||||
instance ConvertibleToDouble Double where
|
||||
toDouble = id
|
||||
|
||||
instance ConvertibleToDouble Int where
|
||||
toDouble = fromIntegral
|
||||
|
||||
(/.) :: (ConvertibleToDouble f, Integral a) => f -> a -> Double
|
||||
x /. 0 = 1.0
|
||||
x /. y = (fromIntegral x) / (fromIntegral y)
|
||||
x /. y = (toDouble x) / (fromIntegral y)
|
||||
|
||||
safeDoubleDiv :: Double -> Double -> Double
|
||||
safeDoubleDiv _ 0.0 = 0.0
|
||||
|
@ -71,6 +71,8 @@ import Data.Attoparsec.Text (parseOnly)
|
||||
|
||||
import Data.Conduit.SmartSource
|
||||
|
||||
import qualified Data.IntSet as IS
|
||||
|
||||
import GEval.BLEU
|
||||
import GEval.Common
|
||||
import GEval.ClippEU
|
||||
@ -83,6 +85,7 @@ import GEval.ProbList
|
||||
import GEval.WER
|
||||
import Data.Conduit.AutoDecompress
|
||||
import Text.Tokenizer
|
||||
import GEval.Annotation
|
||||
|
||||
import qualified Data.HashMap.Strict as M
|
||||
import qualified Data.Vector as V
|
||||
@ -107,6 +110,7 @@ data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | C
|
||||
| LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
|
||||
| BIOF1 | BIOF1Labels | LikelihoodHashed Word32 | MAE | MultiLabelFMeasure Double
|
||||
| MultiLabelLogLoss | MultiLabelLikelihood
|
||||
| SoftFMeasure Double
|
||||
deriving (Eq)
|
||||
|
||||
instance Show Metric where
|
||||
@ -121,6 +125,7 @@ instance Show Metric where
|
||||
show ClippEU = "ClippEU"
|
||||
show (FMeasure beta) = "F" ++ (show beta)
|
||||
show (MacroFMeasure beta) = "Macro-F" ++ (show beta)
|
||||
show (SoftFMeasure beta) = "Soft-F" ++ (show beta)
|
||||
show NMI = "NMI"
|
||||
show (LogLossHashed nbOfBits) = "LogLossHashed" ++ (if
|
||||
nbOfBits == defaultLogLossHashedSize
|
||||
@ -165,6 +170,9 @@ instance Read Metric where
|
||||
readsPrec p ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'F':theRest) = case readsPrec p theRest of
|
||||
[(beta, theRest)] -> [(MultiLabelFMeasure beta, theRest)]
|
||||
_ -> []
|
||||
readsPrec p ('S':'o':'f':'t':'-':'F':theRest) = case readsPrec p theRest of
|
||||
[(beta, theRest)] -> [(SoftFMeasure beta, theRest)]
|
||||
_ -> []
|
||||
readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
|
||||
[(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
|
||||
_ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
|
||||
@ -198,6 +206,7 @@ getMetricOrdering Accuracy = TheHigherTheBetter
|
||||
getMetricOrdering ClippEU = TheHigherTheBetter
|
||||
getMetricOrdering (FMeasure _) = TheHigherTheBetter
|
||||
getMetricOrdering (MacroFMeasure _) = TheHigherTheBetter
|
||||
getMetricOrdering (SoftFMeasure _) = TheHigherTheBetter
|
||||
getMetricOrdering NMI = TheHigherTheBetter
|
||||
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
|
||||
getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
|
||||
@ -586,6 +595,7 @@ gevalCore' (FMeasure beta) _ = gevalCoreWithoutInput outParser outParser getCoun
|
||||
| prob >= detectionThreshold && prob <= 1.0 = Right True
|
||||
| otherwise = Left "expected probability"
|
||||
detectionThreshold = 0.5
|
||||
getCount :: (Bool, Bool) -> (Int, Int, Int)
|
||||
getCount (True, True) = (1, 1, 1)
|
||||
getCount (True, False) = (0, 1, 0)
|
||||
getCount (False, True) = (0, 0, 1)
|
||||
@ -615,11 +625,20 @@ gevalCore' (MacroFMeasure beta) _ = gevalCoreWithoutInput (Right . Just . strip)
|
||||
insertMaybeToMap (Just c) m = M.insertWith (+) c 1 m
|
||||
macroAverageOnCounts (tpMap, expectedMap, gotMap) =
|
||||
(Prelude.sum
|
||||
$ Prelude.map (\c -> fMeasureOnCounts beta (M.lookupDefault 0 c tpMap,
|
||||
$ Prelude.map (\c -> fMeasureOnCounts beta (M.lookupDefault (0::Int) c tpMap,
|
||||
M.lookupDefault 0 c expectedMap,
|
||||
M.lookupDefault 0 c gotMap))
|
||||
$ M.keys expectedMap) / (fromIntegral $ Prelude.length $ M.keys expectedMap)
|
||||
|
||||
gevalCore' (SoftFMeasure beta) _ = gevalCoreWithoutInput parseAnnotations
|
||||
parseAnnotations
|
||||
getSoftCounts
|
||||
countAgg
|
||||
(fMeasureOnCounts beta)
|
||||
where getSoftCounts (expected, got) = (weightedMaxMatch matchScore expected got,
|
||||
Prelude.length expected,
|
||||
Prelude.length got)
|
||||
|
||||
gevalCore' ClippEU _ = gevalCoreWithoutInput parseClippingSpecs parseClippings matchStep clippeuAgg finalStep
|
||||
where
|
||||
parseClippings = controlledParse lineClippingsParser
|
||||
@ -680,8 +699,8 @@ gevalCore' MultiLabelLogLoss _ = gevalCoreWithoutInput intoWords
|
||||
where
|
||||
intoWords = Right . Data.Text.words
|
||||
|
||||
countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
|
||||
countAgg = CC.foldl countFolder (0, 0, 0)
|
||||
countAgg :: (Num n, Monad m) => ConduitM (n, Int, Int) o m (n, Int, Int)
|
||||
countAgg = CC.foldl countFolder (fromInteger 0, 0, 0)
|
||||
|
||||
gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
|
||||
(V.Vector (Double, Double) -> Double) -> -- ^ correlation function
|
||||
@ -707,7 +726,7 @@ skipLineNumber fun = fun . snd
|
||||
-- | A helper function to run evaluation when the input is not needed to calculate the metric value.
|
||||
gevalCoreWithoutInput :: (MonadUnliftIO m, MonadThrow m, MonadIO m) =>
|
||||
(Text -> Either String a) -> -- ^ parser for values in the expected output
|
||||
(Text -> Either String b) -> -- ^ parser for values in the output
|
||||
(Text -> Either String b) -> -- ^ parser for values in the actual output
|
||||
((a, b) -> c) -> -- ^ function which combines parsed values into a single value
|
||||
-- (will be launched for each item, e.g. an error/cost function
|
||||
-- could be calculated here)
|
||||
|
@ -110,6 +110,13 @@ GEval sample challenge — guess the language of a first name
|
||||
This is a sample/toy classification challenge for Gonito framework with Macro-F-measure as the metric.
|
||||
|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents (SoftFMeasure _) testName = [i|
|
||||
GEval sample challenge — mark numbers
|
||||
=====================================
|
||||
|
||||
This is a sample/toy classification challenge for Gonito framework with Soft-F-measure as the metric.
|
||||
|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents NMI testName = [i|
|
||||
Cluster proverbs
|
||||
================
|
||||
@ -358,7 +365,10 @@ pl Kazimierz
|
||||
en Matthew
|
||||
en Richard
|
||||
|]
|
||||
|
||||
trainContents (SoftFMeasure _) = [hereLit|indigits:8 I have 3 daughters
|
||||
indigits:1-2 indigits:9-12 12 July 1812
|
||||
inwords:11-13 I can see two dogs
|
||||
|]
|
||||
trainContents NMI = [hereLit|pl Kto pod kim dołki kopie, ten sam w nie wpada.
|
||||
en The pen is mightier than the sword.
|
||||
pl Baba z wozu, koniom lżej.
|
||||
@ -424,6 +434,9 @@ devInContents (MacroFMeasure _) = [hereLit|Władysław
|
||||
Steven
|
||||
Helmut
|
||||
|]
|
||||
devInContents (SoftFMeasure _) = [hereLit|I have two kids
|
||||
7 April 2003
|
||||
|]
|
||||
devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b)
|
||||
devInContents (LogLossHashed _) = [hereLit|Nie kupuj w worku
|
||||
Ona psa
|
||||
@ -472,6 +485,9 @@ devExpectedContents (MacroFMeasure _) = [hereLit|pl
|
||||
en
|
||||
de
|
||||
|]
|
||||
devExpectedContents (SoftFMeasure _) = [hereLit|inwords:8-10
|
||||
indigits:1 indigits:9-12
|
||||
|]
|
||||
devExpectedContents NMI = [hereLit|en
|
||||
pl
|
||||
en
|
||||
@ -524,6 +540,9 @@ testInContents (MacroFMeasure _) = [hereLit|Arkadiusz
|
||||
Heinrich
|
||||
Henry
|
||||
|]
|
||||
testInContents (SoftFMeasure _) = [hereLit|Nothing
|
||||
Four sides
|
||||
|]
|
||||
testInContents NMI = [hereLit|Fortune favors the bold.
|
||||
People who live in glass houses should not throw stones.
|
||||
W marcu, jak w garncu.
|
||||
@ -575,6 +594,9 @@ testExpectedContents (MacroFMeasure _) = [hereLit|pl
|
||||
de
|
||||
en
|
||||
|]
|
||||
testExpectedContents (SoftFMeasure _) = [hereLit|
|
||||
inwords:1-4
|
||||
|]
|
||||
testExpectedContents NMI = [hereLit|en
|
||||
en
|
||||
pl
|
||||
|
@ -169,7 +169,7 @@ metricReader = many $ option auto -- actually `some` should be used inst
|
||||
( long "metric" -- --metric might be in the config.txt file...
|
||||
<> short 'm'
|
||||
<> metavar "METRIC"
|
||||
<> help "Metric to be used - RMSE, MSE, Pearson, Spearman, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), macro F-measure (specify as Macro-F1, Macro-F2, Macro-F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels or CharMatch" )
|
||||
<> help "Metric to be used - RMSE, MSE, Pearson, Spearman, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), macro F-measure (specify as Macro-F1, Macro-F2, Macro-F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels, soft F-measure (specify as Soft-F1, Soft-F2, Soft-F0.25) or CharMatch" )
|
||||
|
||||
altMetricReader :: Parser (Maybe Metric)
|
||||
altMetricReader = optional $ option auto
|
||||
|
@ -1,10 +1,11 @@
|
||||
{-# LANGUAGE PartialTypeSignatures #-}
|
||||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
|
||||
module GEval.PrecisionRecall(calculateMAPForOneResult,
|
||||
fMeasure, f1Measure, f2Measure, precision, recall,
|
||||
fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder,
|
||||
precisionAndRecall, precisionAndRecallFromCounts,
|
||||
maxMatch, maxMatchOnOrdered, getCounts)
|
||||
maxMatch, maxMatchOnOrdered, getCounts, weightedMaxMatch)
|
||||
where
|
||||
|
||||
import GEval.Common
|
||||
@ -14,9 +15,13 @@ import Data.Graph.Inductive.Query.MaxFlow
|
||||
|
||||
import Data.List (nub, foldl')
|
||||
|
||||
import Data.Algorithm.Munkres
|
||||
import qualified Data.Array.IArray as DAI
|
||||
|
||||
|
||||
calculateMAPForOneResult :: (Eq a) => [a] -> [a] -> Double
|
||||
calculateMAPForOneResult expected got = precisionSum / fromIntegral (length expected)
|
||||
where (_, _, precisionSum) = calculateMAPForOneResultCore expected (nub got)
|
||||
where (_::Int, _, precisionSum) = calculateMAPForOneResultCore expected (nub got)
|
||||
calculateMAPForOneResultCore expected got = foldl' (oneMAPStep expected) (0, 0, 0.0) got
|
||||
oneMAPStep expected (gotCount, allCount, precisionSum) gotItem
|
||||
| gotItem `elem` expected = (newGotCount, newAllCount, precisionSum + (newGotCount /. newAllCount))
|
||||
@ -41,19 +46,19 @@ fMeasure beta matchingFun expected got =
|
||||
where betaSquared = beta ^ 2
|
||||
(p, r) = precisionAndRecall matchingFun expected got
|
||||
|
||||
f2MeasureOnCounts :: (Int, Int, Int) -> Double
|
||||
f2MeasureOnCounts :: ConvertibleToDouble n => (n, Int, Int) -> Double
|
||||
f2MeasureOnCounts = fMeasureOnCounts 2.0
|
||||
|
||||
f1MeasureOnCounts :: (Int, Int, Int) -> Double
|
||||
f1MeasureOnCounts :: ConvertibleToDouble n => (n, Int, Int) -> Double
|
||||
f1MeasureOnCounts = fMeasureOnCounts 1.0
|
||||
|
||||
fMeasureOnCounts :: Double -> (Int, Int, Int) -> Double
|
||||
fMeasureOnCounts :: ConvertibleToDouble n => Double -> (n, Int, Int) -> Double
|
||||
fMeasureOnCounts beta (tp, nbExpected, nbGot) =
|
||||
(1 + betaSquared) * p * r `safeDoubleDiv` (betaSquared * p + r)
|
||||
where betaSquared = beta ^ 2
|
||||
(p, r) = precisionAndRecallFromCounts (tp, nbExpected, nbGot)
|
||||
|
||||
countFolder :: (Int, Int, Int) -> (Int, Int, Int) -> (Int, Int, Int)
|
||||
countFolder :: Num n => (n, Int, Int) -> (n, Int, Int) -> (n, Int, Int)
|
||||
countFolder (a1, a2, a3) (b1, b2, b3) = (a1+b1, a2+b2, a3+b3)
|
||||
|
||||
getCounts :: (a -> b -> Bool) -> ([a], [b]) -> (Int, Int, Int)
|
||||
@ -72,7 +77,7 @@ precisionAndRecall matchFun expected got
|
||||
= precisionAndRecallFromCounts (tp, length expected, length got)
|
||||
where tp = maxMatch matchFun expected got
|
||||
|
||||
precisionAndRecallFromCounts :: (Int, Int, Int) -> (Double, Double)
|
||||
precisionAndRecallFromCounts :: ConvertibleToDouble n => (n, Int, Int) -> (Double, Double)
|
||||
precisionAndRecallFromCounts (tp, nbExpected, nbGot) =
|
||||
(tp /. nbGot, tp /. nbExpected)
|
||||
|
||||
@ -116,3 +121,16 @@ buildGraph matchFun expected got = (b, e, g)
|
||||
return (b,e)
|
||||
where expectedIxs = [2..1+(length expected)]
|
||||
gotIxs = [2+(length expected)..1+(length expected)+(length got)]
|
||||
|
||||
-- the weight are assumed to be between 0.0 and 1.0
|
||||
weightedMaxMatch :: (a -> b -> Double) -> [a] -> [b] -> Double
|
||||
weightedMaxMatch matchFun expected got = (fromIntegral $ length matching) - score
|
||||
where (matching, score) = hungarianMethodDouble complementWeightArray
|
||||
-- unfortunately `hungarianMethodDouble` looks
|
||||
-- for minimal bipartite matching
|
||||
-- rather than the maximal one
|
||||
complementWeightArray = DAI.array ((1, 1), (m, n)) weightList
|
||||
m = length expected
|
||||
n = length got
|
||||
weightList = [((i, j), 1.0 - (matchFun x y)) | (i, x) <- zip [1..m] expected,
|
||||
(j, y) <- zip [1..n] got]
|
||||
|
@ -1,5 +1,5 @@
|
||||
flags: {}
|
||||
packages:
|
||||
- '.'
|
||||
extra-deps: [murmur3-1.0.3,naturalcomp-0.0.3]
|
||||
extra-deps: [murmur3-1.0.3,naturalcomp-0.0.3,Munkres-0.1]
|
||||
resolver: lts-11.9
|
||||
|
25
test/Spec.hs
25
test/Spec.hs
@ -18,6 +18,7 @@ import Data.Attoparsec.Text
|
||||
import Options.Applicative
|
||||
import Data.Text
|
||||
import Text.EditDistance
|
||||
import GEval.Annotation
|
||||
|
||||
import Data.Map.Strict
|
||||
|
||||
@ -32,6 +33,8 @@ import System.IO.Silently
|
||||
|
||||
import qualified Test.HUnit as HU
|
||||
|
||||
import qualified Data.IntSet as IS
|
||||
|
||||
import Data.Conduit.SmartSource
|
||||
import Data.Conduit.Rank
|
||||
import qualified Data.Conduit.Text as CT
|
||||
@ -185,6 +188,10 @@ main = hspec $ do
|
||||
precision alwaysMatch ['a', 'b', 'c'] [0, 1, 2, 3, 4, 5] `shouldBeAlmost` 0.5
|
||||
recall alwaysMatch ['a', 'b', 'c'] [0, 1, 2, 3, 4, 5] `shouldBeAlmost` 1.0
|
||||
f1Measure alwaysMatch ['a', 'b', 'c'] [0, 1, 2, 3 , 4, 5] `shouldBeAlmost` 0.66666666666666
|
||||
describe "max match" $ do
|
||||
it "simple" $ do
|
||||
maxMatch (==) [1,2,2] [3,2] `shouldBe` 1
|
||||
maxMatch (==) [3,2] [1,2,2] `shouldBe` 1
|
||||
describe "ClippEU" $ do
|
||||
it "parsing rectangles" $ do
|
||||
let (Right r) = parseOnly (lineClippingsParser <* endOfInput) "2/0,0,2,3 10/20,30,40,50 18/0,1,500,3 "
|
||||
@ -214,6 +221,11 @@ main = hspec $ do
|
||||
read "F2" `shouldBe` (FMeasure 2.0)
|
||||
read "F1" `shouldBe` (FMeasure 1.0)
|
||||
read "F0.5" `shouldBe` (FMeasure 0.5)
|
||||
describe "Soft-F1" $ do
|
||||
it "simple test" $ do
|
||||
runGEvalTest "soft-f1-simple" `shouldReturnAlmost` 0.33333333333333
|
||||
it "perfect test" $ do
|
||||
runGEvalTest "soft-f1-perfect" `shouldReturnAlmost` 1.0
|
||||
describe "test edit-distance library" $ do
|
||||
it "for handling UTF8" $ do
|
||||
levenshteinDistance defaultEditCosts "źdźbło" "źd好bło" `shouldBe` 1
|
||||
@ -261,6 +273,19 @@ main = hspec $ do
|
||||
gevalCoreOnSingleLines RMSE id (LineInFile (FilePathSpec "stub1") 1 "blabla")
|
||||
(LineInFile (FilePathSpec "stub2") 1 "3.4")
|
||||
(LineInFile (FilePathSpec "stub3") 1 "2.6") `shouldReturnAlmost` 0.8
|
||||
describe "Annotation format" $ do
|
||||
it "just parse" $ do
|
||||
parseAnnotations "foo:3,7-10 baz:4-6" `shouldBe` Right [Annotation "foo" (IS.fromList [3,7,8,9,10]),
|
||||
Annotation "baz" (IS.fromList [4,5,6])]
|
||||
it "empty" $ do
|
||||
parseAnnotations "" `shouldBe` Right []
|
||||
it "empty (just spaces)" $ do
|
||||
parseAnnotations " " `shouldBe` Right []
|
||||
it "match score" $ do
|
||||
matchScore (Annotation "x" (IS.fromList [3..6])) (Annotation "y" (IS.fromList [3..6])) `shouldBeAlmost` 0.0
|
||||
matchScore (Annotation "x" (IS.fromList [3..6])) (Annotation "x" (IS.fromList [3..6])) `shouldBeAlmost` 1.0
|
||||
matchScore (Annotation "x" (IS.fromList [123..140])) (Annotation "x" (IS.fromList [125..130])) `shouldBeAlmost` 0.33333
|
||||
matchScore (Annotation "x" (IS.fromList [3..4])) (Annotation "x" (IS.fromList [2..13])) `shouldBeAlmost` 0.1666666
|
||||
describe "BIO format" $ do
|
||||
it "just parse" $ do
|
||||
let (Right r) = parseOnly (bioSequenceParser <* endOfInput) "O B-city/NEW_YORK I-city B-city/KALISZ I-city O B-name"
|
||||
|
@ -0,0 +1,3 @@
|
||||
foo:3-4 bar:5-11 foo:12
|
||||
|
||||
baz:10-16 xyz:123-140
|
|
1
test/soft-f1-perfect/soft-f1-perfect/config.txt
Normal file
1
test/soft-f1-perfect/soft-f1-perfect/config.txt
Normal file
@ -0,0 +1 @@
|
||||
--metric Soft-F1
|
3
test/soft-f1-perfect/soft-f1-perfect/test-A/expected.tsv
Normal file
3
test/soft-f1-perfect/soft-f1-perfect/test-A/expected.tsv
Normal file
@ -0,0 +1,3 @@
|
||||
foo:3-4 bar:5-11 foo:12
|
||||
|
||||
baz:10-16 xyz:123-140
|
|
@ -0,0 +1,3 @@
|
||||
foo:2-13
|
||||
bar:3-7
|
||||
baz:10-16 foo:123-140
|
|
1
test/soft-f1-simple/soft-f1-simple/config.txt
Normal file
1
test/soft-f1-simple/soft-f1-simple/config.txt
Normal file
@ -0,0 +1 @@
|
||||
--metric Soft-F1
|
3
test/soft-f1-simple/soft-f1-simple/test-A/expected.tsv
Normal file
3
test/soft-f1-simple/soft-f1-simple/test-A/expected.tsv
Normal file
@ -0,0 +1,3 @@
|
||||
foo:3-4 baz:3-7 foo:12
|
||||
|
||||
baz:10-16 foo:125-130
|
|
Loading…
Reference in New Issue
Block a user