Merge branch 'master' into smart-conduit
This commit is contained in:
commit
18ed47322e
@ -1,12 +1,15 @@
|
||||
# GEval
|
||||
|
||||
GEval is a Haskell library (and a stand-alone tool) for evaluating the
|
||||
GEval is a Haskell library and a stand-alone tool for evaluating the
|
||||
results of solutions to machine learning challenges as defined on the
|
||||
[Gonito](http://gonito.net) platform.
|
||||
|
||||
Note that GEval is only about machine learning evaluation. No actual
|
||||
machine learning algorithms are available here.
|
||||
|
||||
The official repository is `git://gonito.net/geval`, browsable at
|
||||
<https://gonito.net/gitlist/geval.git/>.
|
||||
|
||||
## Installing
|
||||
|
||||
You need [Haskell Stack](https://github.com/commercialhaskell/stack).
|
||||
|
12
geval.cabal
12
geval.cabal
@ -1,5 +1,5 @@
|
||||
name: geval
|
||||
version: 0.5.4.0
|
||||
version: 0.7.0.0
|
||||
synopsis: Machine learning evaluation tools
|
||||
description: Please see README.md
|
||||
homepage: http://github.com/name/project
|
||||
@ -26,10 +26,12 @@ library
|
||||
, GEval.LogLossHashed
|
||||
, GEval.CharMatch
|
||||
, GEval.LineByLine
|
||||
, GEval.BIO
|
||||
, Data.Conduit.AutoDecompress
|
||||
, Data.Conduit.SmartSource
|
||||
build-depends: base >= 4.7 && < 5
|
||||
, cond
|
||||
, conduit
|
||||
, conduit >= 1.3.0
|
||||
, conduit-combinators
|
||||
, conduit-extra
|
||||
, directory
|
||||
@ -52,6 +54,11 @@ library
|
||||
, bytestring
|
||||
, http-conduit
|
||||
, transformers
|
||||
, word8
|
||||
, primitive
|
||||
, transformers-base
|
||||
, bzlib-conduit
|
||||
, lzma-conduit
|
||||
default-language: Haskell2010
|
||||
|
||||
executable geval
|
||||
@ -79,6 +86,7 @@ test-suite geval-test
|
||||
, resourcet
|
||||
, conduit
|
||||
, conduit-extra
|
||||
, conduit
|
||||
ghc-options: -threaded -rtsopts -with-rtsopts=-N
|
||||
default-language: Haskell2010
|
||||
|
||||
|
45
src/Data/Conduit/AutoDecompress.hs
Normal file
45
src/Data/Conduit/AutoDecompress.hs
Normal file
@ -0,0 +1,45 @@
|
||||
{-# LANGUAGE AllowAmbiguousTypes #-}
|
||||
|
||||
module Data.Conduit.AutoDecompress
|
||||
(autoDecompress,
|
||||
doNothing)
|
||||
where
|
||||
|
||||
import Data.Conduit
|
||||
import Data.Conduit.Combinators
|
||||
import Data.ByteString
|
||||
import Data.Conduit.Zlib
|
||||
import Data.Word8
|
||||
import Control.Monad.Trans.Resource (MonadThrow, MonadResource)
|
||||
import Control.Monad.Primitive (PrimMonad)
|
||||
import Control.Monad.Base (MonadBase)
|
||||
import qualified Data.Conduit.Lzma as XZ
|
||||
import qualified Data.Conduit.BZlib as BZ
|
||||
|
||||
autoDecompress :: (MonadResource m, MonadThrow m, PrimMonad m) => ConduitM ByteString ByteString m ()
|
||||
autoDecompress = do
|
||||
f <- await
|
||||
case f of
|
||||
Just chunk -> if Data.ByteString.length chunk > 1
|
||||
then
|
||||
do
|
||||
let firstByte = Data.ByteString.head chunk
|
||||
let secondByte = Data.ByteString.index chunk 1
|
||||
leftover chunk
|
||||
lookAtMagicNumbers (firstByte, secondByte)
|
||||
else
|
||||
do
|
||||
leftover chunk
|
||||
doNothing
|
||||
|
||||
Nothing -> return ()
|
||||
|
||||
|
||||
lookAtMagicNumbers :: (MonadResource m, MonadThrow m, PrimMonad m) => (Word8, Word8) -> ConduitT ByteString ByteString m ()
|
||||
lookAtMagicNumbers (31, 139) = ungzip
|
||||
lookAtMagicNumbers (66, 90) = BZ.bunzip2
|
||||
lookAtMagicNumbers (253, 55) = XZ.decompress Nothing
|
||||
lookAtMagicNumbers _ = doNothing
|
||||
|
||||
doNothing :: Monad m => ConduitT a a m ()
|
||||
doNothing = Data.Conduit.Combinators.filter (const True)
|
@ -35,17 +35,17 @@ pureSmartSource _ (FileNameSpec fileName) = sourceFile fileName
|
||||
pureSmartSource _ (FilePathSpec fileName) = sourceFile fileName
|
||||
pureSmartSource [] (PossiblyGitSpec spec) = sourceFile spec
|
||||
pureSmartSource (firstDir:_) (PossiblyGitSpec spec) = sourceFile (firstDir </> spec)
|
||||
pureSmartSource _ (Https url) = httpSource url
|
||||
pureSmartSource _ (Http url) = httpSource url
|
||||
--pureSmartSource _ (Https url) = httpSource url
|
||||
--pureSmartSource _ (Http url) = httpSource url
|
||||
|
||||
httpSource :: MonadResource m => String -> ConduitM () S.ByteString m ()
|
||||
httpSource url = do
|
||||
request <- liftIO $ parseRequest url
|
||||
manager <- liftIO $ newManager tlsManagerSettings
|
||||
response <- lift $ http request manager
|
||||
(httpsource, finalizer) <- lift $ unwrapResumable (responseBody response)
|
||||
httpsource
|
||||
lift finalizer
|
||||
-- httpSource :: MonadResource m => String -> ConduitM () S.ByteString m ()
|
||||
-- httpSource url = do
|
||||
-- request <- liftIO $ parseRequest url
|
||||
-- manager <- liftIO $ newManager tlsManagerSettings
|
||||
-- response <- lift $ http request manager
|
||||
-- (httpsource, finalizer) <- lift $ unwrapResumable (responseBody response)
|
||||
-- httpsource
|
||||
-- lift finalizer
|
||||
|
||||
parseSmartSpec :: FilePath -> SmartSpec
|
||||
parseSmartSpec "" = NoSpec
|
||||
|
111
src/GEval/BIO.hs
Normal file
111
src/GEval/BIO.hs
Normal file
@ -0,0 +1,111 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module GEval.BIO
|
||||
(BIOLabel(..), bioSequenceParser, parseBioSequenceIntoEntities,
|
||||
TaggedSpan(..), TaggedEntity(..), gatherCountsForBIO,
|
||||
eraseNormalisation)
|
||||
where
|
||||
|
||||
import GEval.PrecisionRecall
|
||||
|
||||
import qualified Data.Text as T
|
||||
|
||||
import Data.Attoparsec.Text
|
||||
import Data.Attoparsec.Combinator
|
||||
import Control.Applicative
|
||||
import Data.Char
|
||||
import Data.Maybe (catMaybes)
|
||||
|
||||
import GEval.Common
|
||||
|
||||
data BIOLabel = Outside | Beginning T.Text (Maybe T.Text) | Inside T.Text (Maybe T.Text)
|
||||
deriving (Eq, Show)
|
||||
|
||||
formatBioLabel :: BIOLabel -> T.Text
|
||||
formatBioLabel Outside = "O"
|
||||
formatBioLabel (Beginning label Nothing) = T.concat ["B-", label]
|
||||
formatBioLabel (Beginning label (Just normalized)) = T.concat ["B-", label, "/", normalized]
|
||||
formatBioLabel (Inside label Nothing) = T.concat ["I-", label]
|
||||
formatBioLabel (Inside label (Just normalized)) = T.concat ["I-", label, "/", normalized]
|
||||
|
||||
data TaggedSpan = TaggedSpan Int Int
|
||||
deriving (Eq, Show)
|
||||
|
||||
data TaggedEntity = TaggedEntity TaggedSpan T.Text (Maybe T.Text)
|
||||
deriving (Eq, Show)
|
||||
|
||||
eraseNormalisation :: TaggedEntity -> TaggedEntity
|
||||
eraseNormalisation (TaggedEntity span label normalized) = (TaggedEntity span label Nothing)
|
||||
|
||||
gatherCountsForBIO :: [TaggedEntity] -> [TaggedEntity] -> (Int, Int, Int)
|
||||
gatherCountsForBIO expected got = (maxMatchOnOrdered laterThan expected got, length expected, length got)
|
||||
where
|
||||
laterThan (TaggedEntity (TaggedSpan a _) _ _) (TaggedEntity (TaggedSpan b _) _ _) = a > b
|
||||
|
||||
parseBioSequenceIntoEntities :: T.Text -> Either String [TaggedEntity]
|
||||
parseBioSequenceIntoEntities t = labelsIntoEntities =<< (parseOnly (bioSequenceParser <* endOfInput) t)
|
||||
|
||||
labelsIntoEntities :: [BIOLabel] -> Either String [TaggedEntity]
|
||||
labelsIntoEntities labels = labelsIntoEntities' $ zip labels [1..]
|
||||
|
||||
labelsIntoEntities' :: [(BIOLabel, Int)] -> Either String [TaggedEntity]
|
||||
labelsIntoEntities' labelsWithPositions = mapM labelSplitToEntity labelsGathered
|
||||
where labelsGathered = splitLabels labelsWithPositions
|
||||
|
||||
labelSplitToEntity :: [(BIOLabel, Int)] -> Either String TaggedEntity
|
||||
labelSplitToEntity labs@(h@(_,begIx):t) = if isBeginning h && all (\tp -> isInside tp && tt tp == btp) t
|
||||
then
|
||||
Right $ TaggedEntity (TaggedSpan begIx lastItemIx) btp mNormalized
|
||||
else
|
||||
Left $ "inconsistent label sequence `" ++ (T.unpack $ T.intercalate " " $ map (formatBioLabel . fst) labs) ++ "`"
|
||||
where isBeginning (Beginning _ _, _) = True
|
||||
isBeginning _ = False
|
||||
isInside (Inside _ _, _) = True
|
||||
isInside _ = False
|
||||
tt (Beginning t _, _) = t
|
||||
tt (Inside t _, _) = t
|
||||
btp = tt h
|
||||
lastItemIx = case t of
|
||||
[] -> begIx
|
||||
_ -> let (_, ix) = last t
|
||||
in ix
|
||||
normalized (Beginning _ n, _) = n
|
||||
normalized (Inside _ n, _) = n
|
||||
mNormalized = if all (\tp -> normalized tp == Nothing) labs
|
||||
then
|
||||
Nothing
|
||||
else
|
||||
Just $ T.intercalate "_" $ catMaybes $ map normalized labs
|
||||
|
||||
splitLabels :: [(BIOLabel, Int)] -> [[(BIOLabel, Int)]]
|
||||
splitLabels [] = []
|
||||
splitLabels ((Outside, _):r) = splitLabels r
|
||||
splitLabels (e@(_, ix):r) =
|
||||
case splitLabels r of
|
||||
l@(((Beginning _ _, _):_):_) -> ([e]:l)
|
||||
(s@((Inside _ _, ix'):_):l) -> if ix' == ix + 1
|
||||
then
|
||||
((e:s):l)
|
||||
else
|
||||
([e]:(s:l))
|
||||
[] -> [[e]]
|
||||
|
||||
bioSequenceParser :: Parser [BIOLabel]
|
||||
bioSequenceParser = sepByWhitespaces bioLabelParser
|
||||
|
||||
bioLabelParser :: Parser BIOLabel
|
||||
bioLabelParser =
|
||||
(string "O" *> pure Outside) <|>
|
||||
(do
|
||||
labelType <- bioMarkerParser
|
||||
(string "-" <|> string "_")
|
||||
label <- takeWhile1 (\c -> not (isSpace c) && c /= '/')
|
||||
normalized <- (do
|
||||
string "/"
|
||||
normalized <- takeWhile1 (not . isSpace)
|
||||
return $ Just normalized) <|> pure Nothing
|
||||
return $ labelType label normalized)
|
||||
|
||||
bioMarkerParser :: Parser (T.Text -> Maybe T.Text -> BIOLabel)
|
||||
bioMarkerParser =
|
||||
(string "B" *> pure Beginning) <|> (string "I" *> pure Inside)
|
@ -38,3 +38,7 @@ sepByWhitespaces parser = possibleWhitespace *> parser `sepBy` whitespace <* pos
|
||||
possibleWhitespace = many' (satisfy isHorizontalSpace)
|
||||
|
||||
whitespace = many1 (satisfy isHorizontalSpace)
|
||||
|
||||
indicator :: Bool -> Double
|
||||
indicator True = 1.0
|
||||
indicator False = 0.0
|
||||
|
@ -16,6 +16,7 @@ module GEval.Core
|
||||
MetricValue,
|
||||
GEvalSpecialCommand(..),
|
||||
GEvalSpecification(..),
|
||||
ResultOrdering(..),
|
||||
GEvalOptions(..),
|
||||
GEvalException(..),
|
||||
defaultGEvalSpecification,
|
||||
@ -53,10 +54,11 @@ import qualified System.Directory as D
|
||||
import System.Posix
|
||||
import System.FilePath
|
||||
import Data.Maybe
|
||||
import Data.Tuple
|
||||
import qualified Data.List.Split as DLS
|
||||
|
||||
import Control.Monad.IO.Class
|
||||
import Control.Monad ((<=<))
|
||||
import Control.Monad ((<=<), filterM)
|
||||
|
||||
import Data.Attoparsec.Text (parseOnly)
|
||||
|
||||
@ -69,6 +71,8 @@ import GEval.PrecisionRecall
|
||||
import GEval.ClusteringMetrics
|
||||
import GEval.LogLossHashed
|
||||
import GEval.CharMatch
|
||||
import GEval.BIO
|
||||
import Data.Conduit.AutoDecompress
|
||||
|
||||
import qualified Data.HashMap.Strict as M
|
||||
|
||||
@ -82,7 +86,7 @@ defaultLogLossHashedSize :: Word32
|
||||
defaultLogLossHashedSize = 10
|
||||
|
||||
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch
|
||||
| MAP | LogLoss
|
||||
| MAP | LogLoss | Likelihood | BIOF1 | BIOF1Labels | LikelihoodHashed Word32
|
||||
deriving (Eq)
|
||||
|
||||
instance Show Metric where
|
||||
@ -99,9 +103,18 @@ instance Show Metric where
|
||||
""
|
||||
else
|
||||
(show nbOfBits))
|
||||
show (LikelihoodHashed nbOfBits) = "LikelihoodHashed" ++ (if
|
||||
nbOfBits == defaultLogLossHashedSize
|
||||
then
|
||||
""
|
||||
else
|
||||
(show nbOfBits))
|
||||
show CharMatch = "CharMatch"
|
||||
show MAP = "MAP"
|
||||
show LogLoss = "LogLoss"
|
||||
show Likelihood = "Likelihood"
|
||||
show BIOF1 = "BIO-F1"
|
||||
show BIOF1Labels = "BIO-F1-Labels"
|
||||
|
||||
instance Read Metric where
|
||||
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
|
||||
@ -116,9 +129,15 @@ instance Read Metric where
|
||||
readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
|
||||
[(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
|
||||
_ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
|
||||
readsPrec p ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
|
||||
[(nbOfBits, theRest)] -> [(LikelihoodHashed nbOfBits, theRest)]
|
||||
_ -> [(LikelihoodHashed defaultLogLossHashedSize, theRest)]
|
||||
readsPrec _ ('L':'o':'g':'L':'o':'s':'s':theRest) = [(LogLoss, theRest)]
|
||||
readsPrec _ ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(Likelihood, theRest)]
|
||||
readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
|
||||
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
|
||||
readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
|
||||
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
|
||||
|
||||
data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
|
||||
|
||||
@ -131,9 +150,13 @@ getMetricOrdering ClippEU = TheHigherTheBetter
|
||||
getMetricOrdering (FMeasure _) = TheHigherTheBetter
|
||||
getMetricOrdering NMI = TheHigherTheBetter
|
||||
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
|
||||
getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
|
||||
getMetricOrdering CharMatch = TheHigherTheBetter
|
||||
getMetricOrdering MAP = TheHigherTheBetter
|
||||
getMetricOrdering LogLoss = TheLowerTheBetter
|
||||
getMetricOrdering Likelihood = TheHigherTheBetter
|
||||
getMetricOrdering BIOF1 = TheHigherTheBetter
|
||||
getMetricOrdering BIOF1Labels = TheHigherTheBetter
|
||||
|
||||
defaultOutDirectory = "."
|
||||
defaultTestName = "test-A"
|
||||
@ -163,11 +186,13 @@ getExpectedDirectory spec = fromMaybe outDirectory $ gesExpectedDirectory spec
|
||||
|
||||
data GEvalSpecialCommand = Init | LineByLine | Diff FilePath
|
||||
|
||||
data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest
|
||||
|
||||
data GEvalOptions = GEvalOptions
|
||||
{ geoSpecialCommand :: Maybe GEvalSpecialCommand,
|
||||
geoResultOrdering :: ResultOrdering,
|
||||
geoSpec :: GEvalSpecification }
|
||||
|
||||
|
||||
data GEvalException = NoExpectedFile FilePath
|
||||
| NoOutFile FilePath
|
||||
| NoExpectedDirectory FilePath
|
||||
@ -236,11 +261,14 @@ checkAndGetFiles gevalSpec = do
|
||||
unlessM (D.doesDirectoryExist expectedDirectory) $ throwM $ NoExpectedDirectory expectedDirectory
|
||||
unlessM (D.doesDirectoryExist outTestDirectory) $ throwM $ NoOutTestDirectory outTestDirectory
|
||||
unlessM (D.doesDirectoryExist expectedTestDirectory) $ throwM $ NoExpectedTestDirectory expectedTestDirectory
|
||||
inputFilePath <- lookForCompressedFiles inputFilePath'
|
||||
expectedFilePath <- lookForCompressedFiles expectedFilePath'
|
||||
outFilePath <- lookForCompressedFiles outFilePath'
|
||||
checkInputFileIfNeeded metric inputFilePath
|
||||
return (inputFilePath, expectedFilePath, outFilePath)
|
||||
where expectedFilePath = expectedTestDirectory </> (gesExpectedFile gevalSpec)
|
||||
outFilePath = getOutFile gevalSpec (gesOutFile gevalSpec)
|
||||
inputFilePath = expectedTestDirectory </> (gesInputFile gevalSpec)
|
||||
where expectedFilePath' = expectedTestDirectory </> (gesExpectedFile gevalSpec)
|
||||
outFilePath' = getOutFile gevalSpec (gesOutFile gevalSpec)
|
||||
inputFilePath' = expectedTestDirectory </> (gesInputFile gevalSpec)
|
||||
expectedTestDirectory = expectedDirectory </> testName
|
||||
outTestDirectory = outDirectory </> testName
|
||||
expectedDirectory = getExpectedDirectory gevalSpec
|
||||
@ -248,6 +276,24 @@ checkAndGetFiles gevalSpec = do
|
||||
testName = gesTestName gevalSpec
|
||||
metric = gesMetric gevalSpec
|
||||
|
||||
lookForCompressedFiles :: FilePath -> IO FilePath
|
||||
lookForCompressedFiles = lookForAlternativeFiles [".gz", ".xz", ".bz2"]
|
||||
|
||||
lookForAlternativeFiles :: [String] -> FilePath -> IO FilePath
|
||||
lookForAlternativeFiles suffixes filePath
|
||||
| takeExtension filePath `Prelude.elem` suffixes = return filePath
|
||||
| otherwise = do
|
||||
fileIsThere <- D.doesFileExist filePath
|
||||
if fileIsThere
|
||||
then
|
||||
return filePath
|
||||
else
|
||||
do
|
||||
found <- Control.Monad.filterM D.doesFileExist $ Prelude.map (filePath <.>) suffixes
|
||||
return $ case found of
|
||||
[fp] -> fp
|
||||
_ -> filePath
|
||||
|
||||
getOutFile :: GEvalSpecification -> FilePath -> FilePath
|
||||
getOutFile gevalSpec out = outDirectory </> testName </> out
|
||||
where outDirectory = gesOutDirectory gevalSpec
|
||||
@ -261,7 +307,7 @@ checkInputFileIfNeeded _ _ = return ()
|
||||
|
||||
fileAsLineSource :: FilePath -> LineSource (ResourceT IO)
|
||||
fileAsLineSource filePath =
|
||||
LineSource (smartSource [] Nothing (parseSmartSpec filePath) $= CT.decodeUtf8Lenient =$= CT.lines) filePath 1
|
||||
LineSource (smartSource [] Nothing (parseSmartSpec filePath) $= autoDecompress $= CT.decodeUtf8Lenient =$= CT.lines) filePath 1
|
||||
|
||||
gevalCoreOnSingleLines :: Metric -> LineInFile -> LineInFile -> LineInFile -> IO (MetricValue)
|
||||
gevalCoreOnSingleLines metric inpLine expLine outLine =
|
||||
@ -283,7 +329,9 @@ gevalCore metric inputFilePath expectedFilePath outFilePath = do
|
||||
(fileAsLineSource expectedFilePath)
|
||||
(fileAsLineSource outFilePath)
|
||||
|
||||
gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric
|
||||
logLossToLikehood logLoss = exp (-logLoss)
|
||||
|
||||
gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Metric
|
||||
-> LineSource (ResourceT m)
|
||||
-> LineSource (ResourceT m)
|
||||
-> LineSource (ResourceT m)
|
||||
@ -292,12 +340,20 @@ gevalCoreOnSources RMSE inputLineSource expectedLineSource outLineSource = do
|
||||
mse <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource
|
||||
return $ mse ** 0.5
|
||||
|
||||
gevalCoreOnSources Likelihood inputLineSource expectedLineSource outLineSource = do
|
||||
logLoss <- gevalCoreOnSources LogLoss inputLineSource expectedLineSource outLineSource
|
||||
return $ logLossToLikehood logLoss
|
||||
|
||||
gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLineSource = do
|
||||
logLoss <- gevalCoreOnSources (LogLossHashed b) inputLineSource expectedLineSource outLineSource
|
||||
return $ logLossToLikehood logLoss
|
||||
|
||||
gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
|
||||
gevalCore' metric inputLineSource expectedLineSource outLineSource
|
||||
|
||||
data LineInFile = LineInFile FilePath Word32 Text
|
||||
|
||||
gevalCore' :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
|
||||
gevalCore' :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
|
||||
gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemError averageC id
|
||||
where outParser = getValue . TR.double
|
||||
|
||||
@ -315,9 +371,18 @@ gevalCore' BLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . D
|
||||
| otherwise = exp (1.0 - (r /. c))
|
||||
|
||||
gevalCore' Accuracy _ = gevalCoreWithoutInput (Right . strip) (Right . strip) hitOrMiss averageC id
|
||||
where hitOrMiss (exp,got) = if (normalizeProbForAccuracy exp got) == exp then 1.0 else 0.0
|
||||
-- if the expected value is 0 or 1 treat values between 0.0 and 1.0 as probabilities
|
||||
-- for the positive outcome
|
||||
where hitOrMiss (exp, got) =
|
||||
-- first try to parse what we got as a probability distribution
|
||||
-- (like the one used for Likelikehood/LogLossHashed metric)
|
||||
case parseWordSpecs got of
|
||||
Right wordSpecs -> if Prelude.null pairs
|
||||
then 0.0
|
||||
else indicator (exp == (snd $ Prelude.maximum pairs))
|
||||
where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs
|
||||
Left _ -> indicator ((normalizeProbForAccuracy exp got) == exp)
|
||||
-- if the expected value is 0 or 1 treat values
|
||||
-- between 0.0 and 1.0 as probabilities
|
||||
-- for the positive outcome
|
||||
normalizeProbForAccuracy :: Text -> Text -> Text
|
||||
normalizeProbForAccuracy exp got
|
||||
| exp == (pack "1") = case tryReadingAsFloat got of
|
||||
@ -383,7 +448,16 @@ gevalCore' CharMatch inputLineSource = helper inputLineSource
|
||||
helper inputLineSource expectedLineSource outputLineSource = do
|
||||
gevalCoreGeneralized (ParserSpecWithInput (Right . unpack) (Right . unpack) (Right . unpack)) step countAgg (fMeasureOnCounts charMatchBeta) (WithInput inputLineSource expectedLineSource outputLineSource)
|
||||
step (ParsedRecordWithInput inp exp out) = getCharMatchCount inp exp out
|
||||
countAgg = CC.foldl countFolder (0, 0, 0)
|
||||
|
||||
gevalCore' BIOF1 _ = gevalCoreWithoutInput parseBioSequenceIntoEntities parseBioSequenceIntoEntities (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts
|
||||
|
||||
gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWithoutNormalization parseBioSequenceIntoEntitiesWithoutNormalization (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts
|
||||
where parseBioSequenceIntoEntitiesWithoutNormalization s = do
|
||||
entities <- parseBioSequenceIntoEntities s
|
||||
return $ Prelude.map eraseNormalisation entities
|
||||
|
||||
countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
|
||||
countAgg = CC.foldl countFolder (0, 0, 0)
|
||||
|
||||
parseDistributionWrapper :: Word32 -> Word32 -> Text -> HashedDistribution
|
||||
parseDistributionWrapper nbOfBits seed distroSpec = case parseDistribution nbOfBits seed distroSpec of
|
||||
@ -395,25 +469,25 @@ data SourceItem a = Got a | Wrong String | Done
|
||||
skipLineNumber :: (x -> c) -> ((Word32, x) -> c)
|
||||
skipLineNumber fun = fun . snd
|
||||
|
||||
gevalCoreWithoutInput :: (MonadBaseControl IO m, MonadThrow m, MonadIO m) => (Text -> Either String a) -> (Text -> Either String b) -> ((a, b) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
|
||||
gevalCoreWithoutInput :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => (Text -> Either String a) -> (Text -> Either String b) -> ((a, b) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
|
||||
gevalCoreWithoutInput expParser outParser itemStep aggregator finalStep expectedLineStream outLineStream =
|
||||
gevalCoreGeneralized (ParserSpecWithoutInput expParser outParser) (trans itemStep) aggregator finalStep (WithoutInput expectedLineStream outLineStream)
|
||||
where
|
||||
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
|
||||
trans step (ParsedRecordWithoutInput x y) = step (x, y)
|
||||
|
||||
gevalCore''' :: (MonadBaseControl IO m, MonadThrow m, MonadIO m) => ParserSpec (WithoutInput m a b) -> ((Word32, (a, b)) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> WithoutInput m a b -> m (MetricValue)
|
||||
gevalCore''' :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec (WithoutInput m a b) -> ((Word32, (a, b)) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> WithoutInput m a b -> m (MetricValue)
|
||||
gevalCore''' parserSpec itemStep aggregator finalStep context =
|
||||
gevalCoreGeneralized' parserSpec (trans itemStep) aggregator finalStep context
|
||||
where
|
||||
trans :: ((Word32, (a, b)) -> c) -> (Word32, ParsedRecord (WithoutInput m a b)) -> c
|
||||
trans step (n, ParsedRecordWithoutInput x y) = step (n, (x, y))
|
||||
|
||||
gevalCoreGeneralized :: (EvaluationContext ctxt m, MonadBaseControl IO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> (ParsedRecord ctxt -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
|
||||
gevalCoreGeneralized :: (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> (ParsedRecord ctxt -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
|
||||
gevalCoreGeneralized parserSpec itemStep aggregator finalStep context =
|
||||
gevalCoreGeneralized' parserSpec (skipLineNumber itemStep) aggregator finalStep context
|
||||
|
||||
gevalCoreGeneralized' :: forall m ctxt c d . (EvaluationContext ctxt m, MonadBaseControl IO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> ((Word32, ParsedRecord ctxt) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
|
||||
gevalCoreGeneralized' :: forall m ctxt c d . (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> ((Word32, ParsedRecord ctxt) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
|
||||
gevalCoreGeneralized' parserSpec itemStep aggregator finalStep context = do
|
||||
v <- runResourceT $
|
||||
(((getZipSource $ (,)
|
||||
@ -434,7 +508,7 @@ class EvaluationContext ctxt m where
|
||||
|
||||
data WithoutInput m e o = WithoutInput (LineSource (ResourceT m)) (LineSource (ResourceT m))
|
||||
|
||||
instance (MonadBaseControl IO m, MonadIO m, MonadThrow m) => EvaluationContext (WithoutInput m e o) m where
|
||||
instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithoutInput m e o) m where
|
||||
data ParserSpec (WithoutInput m e o) = ParserSpecWithoutInput (Text -> Either String e) (Text -> Either String o)
|
||||
data WrappedParsedRecord (WithoutInput m e o) = WrappedParsedRecordWithoutInput (SourceItem e) (SourceItem o)
|
||||
data ParsedRecord (WithoutInput m e o) = ParsedRecordWithoutInput e o
|
||||
@ -463,7 +537,7 @@ data WithInput m i e o = WithInput (LineSource (ResourceT m)) (LineSource (Resou
|
||||
|
||||
getInputFilePath (WithInput (LineSource _ inputFilePath _) _ _) = inputFilePath
|
||||
|
||||
instance (MonadBaseControl IO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where
|
||||
instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where
|
||||
data ParserSpec (WithInput m i e o) = ParserSpecWithInput (Text -> Either String i) (Text -> Either String e) (Text -> Either String o)
|
||||
data WrappedParsedRecord (WithInput m i e o) = WrappedParsedRecordWithInput (SourceItem i) (SourceItem e) (SourceItem o)
|
||||
data ParsedRecord (WithInput m i e o) = ParsedRecordWithInput i e o
|
||||
|
@ -101,6 +101,8 @@ Cluster proverbs for languages.
|
||||
This is a sample challenge for flat clustering (unsupervised learning challenge).
|
||||
|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents (LikelihoodHashed b) testname = readmeMDContents (LogLossHashed b) testname
|
||||
|
||||
readmeMDContents (LogLossHashed _) testName = [i|
|
||||
GEval sample challenge — language model evaluation
|
||||
==================================================
|
||||
@ -113,6 +115,39 @@ The metric is average log-loss calculated for 10-bit hashes.
|
||||
Train file is a just text file (one utterance per line).
|
||||
In an input file, left and right contexts (TAB-separated) are given.
|
||||
In an expected file, the word to be guessed is given.
|
||||
|
||||
Format of the output files
|
||||
--------------------------
|
||||
|
||||
For each input line, a probability distribution for words in a gap
|
||||
must be given:
|
||||
|
||||
word1:logprob1 word2:logprob2 ... wordN:logprobN :logprob0
|
||||
|
||||
where *logprobi* is the logarithm of the probability for *wordi* and
|
||||
*logprob0* is the logarithm of the probability mass for all the other
|
||||
words (it will be spread between all 1024 fingerprint values). If the
|
||||
respective probabilities do not sum up to 1:
|
||||
|
||||
* if the sum is larger than 0.0 and smaller than 1.0, and no logprob0
|
||||
is given, log of the remaining probablity mass will be assigned to logprob0,
|
||||
* otherwise they will be normalised with.
|
||||
softmax
|
||||
|
||||
Note: the separator here is space, not TAB!
|
||||
|
||||
### Probs
|
||||
|
||||
Probabilities could be given (instead of logprobs):
|
||||
|
||||
* if **all** values look as probs and **at least value** is positive, we treat
|
||||
the values as probs rather then logprobs (single value 0.0 is treated
|
||||
as a logprob, i.e. probability 1.0!);
|
||||
* if their sum is greater than 1.0, then we normalize simply by dividing by the sum;
|
||||
* if the sum is smaller than 1.0 and there is no entry for all the other words,
|
||||
we add such an entry for the missing probability mass;
|
||||
* if the sum is smaller than 1.0 and there is an entry for all the other words,
|
||||
we normalize by dividing by the sum.
|
||||
|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents CharMatch testName = [i|
|
||||
@ -170,6 +205,29 @@ This a sample challenge for the log-loss metric.
|
||||
|
||||
|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents Likelihood testName = [i|
|
||||
Give the probability of a positive sentiment
|
||||
============================================
|
||||
|
||||
Give the probability that a sentence expresses a positive sentiment.
|
||||
|
||||
This a sample challenge for the likelihood metric.
|
||||
|
||||
|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents BIOF1Labels testName = readmeMDContents BIOF1 testName
|
||||
readmeMDContents BIOF1 testName = [i|
|
||||
Tag and normalize names
|
||||
=======================
|
||||
|
||||
Tag names in the tokenized text and normalized them.
|
||||
|
||||
The output should be given in the BIO format with the normalized forms given after slashes (see
|
||||
`dev-0/expected.tsv` for an example).
|
||||
|
||||
The metric is F1 counted on entities (not labels).
|
||||
|] ++ (commonReadmeMDContents testName)
|
||||
|
||||
readmeMDContents _ testName = [i|
|
||||
GEval sample challenge
|
||||
======================
|
||||
@ -239,6 +297,7 @@ trainContents NMI = [hereLit|pl Kto pod kim dołki kopie, ten sam w nie wpada.
|
||||
en The pen is mightier than the sword.
|
||||
pl Baba z wozu, koniom lżej.
|
||||
|]
|
||||
trainContents (LikelihoodHashed b) = trainContents (LogLossHashed b)
|
||||
trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota
|
||||
Basia ma psa
|
||||
Nie kupujemy kota w worku
|
||||
@ -254,11 +313,17 @@ honour GB honor
|
||||
titbit GB smakołyk
|
||||
tidbit US smakołyk
|
||||
|]
|
||||
trainContents Likelihood = trainContents LogLoss
|
||||
trainContents LogLoss = [hereLit|0.0 Hell, no!!!
|
||||
0.0 I hate this stuff
|
||||
1.0 Lekker!!!
|
||||
0.0 Boring, boring, boring
|
||||
|]
|
||||
trainContents BIOF1Labels = trainContents BIOF1
|
||||
trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surname/BOND My name is Bond , James Bond
|
||||
O O O O O There is no name here
|
||||
B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman
|
||||
|]
|
||||
trainContents _ = [hereLit|0.06 0.39 0 0.206
|
||||
1.00 1.00 1 0.017
|
||||
317.8 5.20 67 0.048
|
||||
@ -279,6 +344,7 @@ When the going gets tough, the tough get going.
|
||||
devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0
|
||||
b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000
|
||||
|]
|
||||
devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b)
|
||||
devInContents (LogLossHashed _) = [hereLit|Nie kupuj w worku
|
||||
Ona psa
|
||||
|]
|
||||
@ -290,10 +356,15 @@ devInContents MAP = [hereLit|US noc
|
||||
GB wózek dziecięcy
|
||||
GB wizualizować
|
||||
|]
|
||||
devInContents Likelihood = devInContents LogLoss
|
||||
devInContents LogLoss = [hereLit|Great stuff!
|
||||
Boring stuff
|
||||
That's good
|
||||
|]
|
||||
devInContents BIOF1Labels = devInContents BIOF1
|
||||
devInContents BIOF1 = [hereLit|Adam and Eve
|
||||
Mr Jan Kowalski
|
||||
|]
|
||||
devInContents _ = [hereLit|0.72 0 0.007
|
||||
9.54 62 0.054
|
||||
|]
|
||||
@ -312,6 +383,7 @@ devExpectedContents NMI = [hereLit|en
|
||||
pl
|
||||
en
|
||||
|]
|
||||
devExpectedContents (LikelihoodHashed b) = devExpectedContents (LogLossHashed b)
|
||||
devExpectedContents (LogLossHashed _) = [hereLit|kota
|
||||
ma
|
||||
|]
|
||||
@ -323,10 +395,15 @@ devExpectedContents MAP = [hereLit|night nite
|
||||
pram
|
||||
visualise
|
||||
|]
|
||||
devExpectedContents Likelihood = devExpectedContents LogLoss
|
||||
devExpectedContents LogLoss = [hereLit|1.0
|
||||
0.0
|
||||
1.0
|
||||
|]
|
||||
devExpectedContents BIOF1Labels = devExpectedContents BIOF1
|
||||
devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE
|
||||
O B-firstname/JAN B-surname/KOWALSKI
|
||||
|]
|
||||
devExpectedContents _ = [hereLit|0.82
|
||||
95.2
|
||||
|]
|
||||
@ -347,6 +424,7 @@ W marcu, jak w garncu.
|
||||
A cada necio agrada su porrada.
|
||||
Kwiecień plecień, bo przeplata trochę zimy, trochę lata.
|
||||
|]
|
||||
testInContents (LikelihoodHashed b) = testInContents (LogLossHashed b)
|
||||
testInContents (LogLossHashed _) = [hereLit|Ala ma
|
||||
Ona ma kota worku
|
||||
|]
|
||||
@ -358,10 +436,15 @@ testInContents MAP = [hereLit|US wózek dziecięcy
|
||||
GB słoń
|
||||
US słoń
|
||||
|]
|
||||
testInContents Likelihood = testInContents LogLoss
|
||||
testInContents LogLoss = [hereLit|That's great, ha, ha, I love it!
|
||||
Super-duper!!
|
||||
That is incredibly boring.
|
||||
|]
|
||||
testInContents BIOF1Labels = testInContents BIOF1
|
||||
testInContents BIOF1 = [hereLit|Alan Tring
|
||||
No name here
|
||||
|]
|
||||
testInContents _ = [hereLit|1.52 2 0.093
|
||||
30.06 14 0.009
|
||||
|]
|
||||
@ -382,6 +465,7 @@ pl
|
||||
es
|
||||
pl
|
||||
|]
|
||||
testExpectedContents (LikelihoodHashed b) = testExpectedContents (LogLossHashed b)
|
||||
testExpectedContents (LogLossHashed _) = [hereLit|ma
|
||||
w
|
||||
|]
|
||||
@ -393,10 +477,15 @@ testExpectedContents MAP = [hereLit|trolley
|
||||
elephant
|
||||
elephant
|
||||
|]
|
||||
testExpectedContents Likelihood = testExpectedContents LogLoss
|
||||
testExpectedContents LogLoss = [hereLit|1.0
|
||||
1.0
|
||||
0.0
|
||||
|]
|
||||
testExpectedContents BIOF1Labels = testExpectedContents BIOF1
|
||||
testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING
|
||||
O O O
|
||||
|]
|
||||
testExpectedContents _ = [hereLit|0.11
|
||||
17.2
|
||||
|]
|
||||
|
@ -9,17 +9,25 @@
|
||||
|
||||
module GEval.LineByLine
|
||||
(runLineByLine,
|
||||
runDiff
|
||||
runLineByLineGeneralized,
|
||||
runDiff,
|
||||
runDiffGeneralized,
|
||||
LineRecord(..),
|
||||
ResultOrdering(..)
|
||||
) where
|
||||
|
||||
import GEval.Core
|
||||
|
||||
import Data.Conduit.AutoDecompress (doNothing)
|
||||
|
||||
import Data.Conduit
|
||||
import qualified Data.Conduit.List as CL
|
||||
import qualified Data.Conduit.Combinators as CC
|
||||
import Data.Text
|
||||
import Data.Text.Encoding
|
||||
|
||||
import Data.List (sortBy, sort)
|
||||
|
||||
import Control.Monad.IO.Class
|
||||
import Control.Monad.Trans.Resource
|
||||
|
||||
@ -30,13 +38,10 @@ import Text.Printf
|
||||
data LineRecord = LineRecord Text Text Text Word32 MetricValue
|
||||
deriving (Eq, Show)
|
||||
|
||||
runLineByLine :: GEvalSpecification -> IO ()
|
||||
runLineByLine spec = do
|
||||
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
|
||||
gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum
|
||||
where metric = gesMetric spec
|
||||
consum :: Consumer LineRecord (ResourceT IO) ()
|
||||
consum = (CL.map (encodeUtf8 . formatOutput) =$= CC.unlinesAscii =$= CC.stdout)
|
||||
runLineByLine :: ResultOrdering -> GEvalSpecification -> IO ()
|
||||
runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum
|
||||
where consum :: ConduitT LineRecord Void (ResourceT IO) ()
|
||||
consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
|
||||
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
|
||||
formatScore score,
|
||||
escapeTabs inp,
|
||||
@ -45,19 +50,27 @@ runLineByLine spec = do
|
||||
formatScore :: MetricValue -> Text
|
||||
formatScore = Data.Text.pack . printf "%f"
|
||||
|
||||
runDiff :: FilePath -> GEvalSpecification -> IO ()
|
||||
runDiff otherOut spec = do
|
||||
let otherOutFilePath = getOutFile spec otherOut
|
||||
runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
|
||||
runLineByLineGeneralized ordering spec consum = do
|
||||
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
|
||||
let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath
|
||||
let sourceB = gevalLineByLineSource metric inputFilePath expectedFilePath otherOutFilePath
|
||||
runResourceT $
|
||||
((getZipSource $ (,)
|
||||
<$> ZipSource sourceA
|
||||
<*> ZipSource sourceB) $$ consum)
|
||||
gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath (sorter ordering .| consum)
|
||||
where metric = gesMetric spec
|
||||
consum :: Consumer (LineRecord, LineRecord) (ResourceT IO) ()
|
||||
consum = (CL.filter shouldBeShown =$= CL.map (encodeUtf8 . formatOutput) =$= CC.unlinesAscii =$= CC.stdout)
|
||||
sorter KeepTheOriginalOrder = doNothing
|
||||
sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
|
||||
sortOrder FirstTheWorst TheHigherTheBetter = compareScores
|
||||
sortOrder FirstTheBest TheLowerTheBetter = compareScores
|
||||
sortOrder _ _ = flip compareScores
|
||||
compareScores (LineRecord _ _ _ _ s1) (LineRecord _ _ _ _ s2) = s1 `compare` s2
|
||||
|
||||
gobbleAndDo :: Monad m => ([a] -> [b]) -> ConduitT a b m ()
|
||||
gobbleAndDo fun = do
|
||||
l <- CC.sinkList
|
||||
CC.yieldMany $ fun l
|
||||
|
||||
runDiff :: ResultOrdering -> FilePath -> GEvalSpecification -> IO ()
|
||||
runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consum
|
||||
where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
|
||||
consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
|
||||
shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) =
|
||||
outA /= outB && scoreA /= scoreB
|
||||
formatOutput (LineRecord inp exp outA _ scoreA, LineRecord _ _ outB _ scoreB) = Data.Text.intercalate "\t" [
|
||||
@ -69,19 +82,40 @@ runDiff otherOut spec = do
|
||||
formatScoreDiff :: Double -> Text
|
||||
formatScoreDiff = Data.Text.pack . printf "%f"
|
||||
|
||||
runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a
|
||||
runDiffGeneralized ordering otherOut spec consum = do
|
||||
let otherOutFilePath = getOutFile spec otherOut
|
||||
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
|
||||
let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath
|
||||
let sourceB = gevalLineByLineSource metric inputFilePath expectedFilePath otherOutFilePath
|
||||
runResourceT $ runConduit $
|
||||
((getZipSource $ (,)
|
||||
<$> ZipSource sourceA
|
||||
<*> ZipSource sourceB) .| sorter ordering .| consum)
|
||||
where metric = gesMetric spec
|
||||
sorter KeepTheOriginalOrder = doNothing
|
||||
sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
|
||||
sortOrder FirstTheWorst TheHigherTheBetter = compareScores
|
||||
sortOrder FirstTheBest TheLowerTheBetter = compareScores
|
||||
sortOrder _ _ = flip compareScores
|
||||
compareScores ((LineRecord _ _ _ _ o1), (LineRecord _ _ _ _ n1))
|
||||
((LineRecord _ _ _ _ o2), (LineRecord _ _ _ _ n2))
|
||||
= (n1 - o1) `compare` (n2 - o2)
|
||||
|
||||
|
||||
escapeTabs :: Text -> Text
|
||||
escapeTabs = Data.Text.replace "\t" "<tab>"
|
||||
|
||||
gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> Sink LineRecord (ResourceT IO) () -> IO ()
|
||||
gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
|
||||
gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum =
|
||||
runResourceT $
|
||||
((gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath) $$ consum)
|
||||
runResourceT $ runConduit $
|
||||
((gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath) .| consum)
|
||||
|
||||
gevalLineByLineSource :: Metric -> FilePath -> FilePath -> FilePath -> Source (ResourceT IO) LineRecord
|
||||
gevalLineByLineSource :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT () LineRecord (ResourceT IO) ()
|
||||
gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath =
|
||||
(getZipSource $ (,)
|
||||
<$> ZipSource (CL.sourceList [1..])
|
||||
<*> (ZipSource $ recordSource context parserSpec)) =$= CL.mapM (checkStepM evaluateLine) =$= CL.catMaybes
|
||||
<*> (ZipSource $ recordSource context parserSpec)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes
|
||||
where parserSpec = (ParserSpecWithInput (Right . id) (Right . id) (Right . id))
|
||||
context = (WithInput inputLineSource expectedLineSource outputLineSource)
|
||||
inputLineSource = fileAsLineSource inputFilePath
|
||||
|
@ -1,7 +1,7 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module GEval.LogLossHashed
|
||||
(HashedDistribution, parseDistribution, calculateLogLoss)
|
||||
(HashedDistribution, parseDistribution, calculateLogLoss, parseWordSpecs, wordSpecToPair)
|
||||
where
|
||||
|
||||
import qualified Data.Vector as V
|
||||
@ -36,24 +36,41 @@ parseDistribution nbOfBits seed distroSpec =
|
||||
-- a direct list of 2^nbOfBits log probs
|
||||
else parseDistributionFromLogProbList nbOfBits distroSpec
|
||||
|
||||
isProbTotalIncorrect :: Double -> Bool
|
||||
isProbTotalIncorrect probTotal = probTotal > 1.0 || probTotal < (1.0 - epsilon)
|
||||
where epsilon = 0.00000001
|
||||
|
||||
normalizeDistribution :: HashedDistribution -> HashedDistribution
|
||||
normalizeDistribution distro =
|
||||
-- we do softmax (if needed)
|
||||
if probSum > 1.0 || probSum < (1.0 - epsilon)
|
||||
if isProbTotalIncorrect probSum
|
||||
then normalized
|
||||
else distro
|
||||
where probSum = V.foldl' (\s l -> (s + exp l)) 0.0 distro
|
||||
normalized = V.map (\v -> log ((exp v) / probSum)) distro
|
||||
epsilon = 0.00000001
|
||||
|
||||
type DistroMonad s = ReaderT (VM.MVector s Double) (ST s)
|
||||
|
||||
data WordSpec = AnyWord | SpecificWord T.Text
|
||||
deriving (Eq, Show)
|
||||
|
||||
isAnyWord AnyWord = True
|
||||
isAnyWord _ = False
|
||||
|
||||
data WordSpecWithLogProb = WordSpecWithLogProb WordSpec Double
|
||||
|
||||
wordSpecToPair :: WordSpecWithLogProb -> Maybe (Double, T.Text)
|
||||
wordSpecToPair (WordSpecWithLogProb AnyWord _) = Nothing
|
||||
wordSpecToPair (WordSpecWithLogProb (SpecificWord w) lp) = Just (lp, w)
|
||||
|
||||
parseDistributionFromWordList :: Word32 -> Word32 -> T.Text -> Either String HashedDistribution
|
||||
parseDistributionFromWordList nbOfBits seed distroSpec = (parseDistributionFromWordList' nbOfBits seed) =<< (
|
||||
processEithers $ map getWordSpecWithLogProb $ T.splitOn " " distroSpec)
|
||||
parseDistributionFromWordList nbOfBits seed distroSpec = (parseDistributionFromWordList' nbOfBits seed) =<<
|
||||
normalizeLogProbs =<<
|
||||
lookForProbs =<<
|
||||
(parseWordSpecs distroSpec)
|
||||
|
||||
parseWordSpecs :: T.Text -> Either String [WordSpecWithLogProb]
|
||||
parseWordSpecs distroSpec = processEithers $ map getWordSpecWithLogProb $ T.splitOn " " distroSpec
|
||||
|
||||
getWordSpecWithLogProb :: T.Text -> Either String WordSpecWithLogProb
|
||||
getWordSpecWithLogProb t =
|
||||
@ -77,6 +94,42 @@ parseDistributionFromWordList' nbOfBits seed specs = runST $ do
|
||||
frozen <- V.freeze emp
|
||||
return $ Right frozen
|
||||
|
||||
lookForProbs :: [WordSpecWithLogProb] -> Either String [WordSpecWithLogProb]
|
||||
lookForProbs specs
|
||||
| areProbs specs = Right $ toLogProbs $ normalizeProbs specs
|
||||
| otherwise = Right $ specs
|
||||
|
||||
areProbs :: [WordSpecWithLogProb] -> Bool
|
||||
areProbs specs = all isProb specs && any isPositiveProb specs
|
||||
where isProb (WordSpecWithLogProb _ v) = v >= 0.0 && v <= 1.0
|
||||
isPositiveProb (WordSpecWithLogProb _ p) = p > 0.0 && p <= 1.0
|
||||
|
||||
toLogProbs :: [WordSpecWithLogProb] -> [WordSpecWithLogProb]
|
||||
toLogProbs = map (\(WordSpecWithLogProb w p) -> (WordSpecWithLogProb w (log p)))
|
||||
|
||||
normalizeLogProbs :: [WordSpecWithLogProb] -> Either String [WordSpecWithLogProb]
|
||||
normalizeLogProbs specs = if isProbTotalIncorrect probTotal
|
||||
&& probTotal < 1.0 && probTotal > 0.0
|
||||
&& not (any (\(WordSpecWithLogProb w _) -> isAnyWord w) specs)
|
||||
&& all (\(WordSpecWithLogProb _ lp) -> lp <= 0) specs
|
||||
then
|
||||
Right ((WordSpecWithLogProb AnyWord (log (1-probTotal))):specs)
|
||||
else
|
||||
Right specs
|
||||
where probTotal = sum $ map (\(WordSpecWithLogProb _ logp) -> exp logp) specs
|
||||
|
||||
normalizeProbs :: [WordSpecWithLogProb] -> [WordSpecWithLogProb]
|
||||
normalizeProbs specs = if isProbTotalIncorrect probTotal
|
||||
then
|
||||
if probTotal > 1.0 || any (\(WordSpecWithLogProb w _) -> isAnyWord w) specs
|
||||
then
|
||||
map (\(WordSpecWithLogProb w p) -> WordSpecWithLogProb w (p / probTotal)) specs
|
||||
else
|
||||
((WordSpecWithLogProb AnyWord (1-probTotal)):specs)
|
||||
else
|
||||
specs
|
||||
where probTotal = sum $ map (\(WordSpecWithLogProb _ p) -> p) specs
|
||||
|
||||
addSpecs :: Word32 -> Word32 -> [WordSpecWithLogProb] -> DistroMonad s ()
|
||||
addSpecs nbOfBits seed = mapM_ (updateDistro nbOfBits seed)
|
||||
|
||||
|
@ -41,6 +41,16 @@ optionsParser = GEvalOptions
|
||||
<> short 'd'
|
||||
<> metavar "OTHER-OUT"
|
||||
<> help "compare results")))
|
||||
<*> ((flag' FirstTheWorst
|
||||
(long "sort"
|
||||
<> short 's'
|
||||
<> help "When in line-by-line or diff mode, sort the results from the worst to the best"))
|
||||
<|>
|
||||
(flag' FirstTheBest
|
||||
(long "reverse-sort"
|
||||
<> short 'r'
|
||||
<> help "When in line-by-line or diff mode, sort the results from the best to the worst"))
|
||||
<|> pure KeepTheOriginalOrder)
|
||||
<*> specParser
|
||||
|
||||
precisionArgParser :: Parser Int
|
||||
@ -90,9 +100,13 @@ specParser = GEvalSpecification
|
||||
<> showDefault
|
||||
<> metavar "INPUT"
|
||||
<> help "The name of the file with the input (applicable only for some metrics)" )
|
||||
<*> metricReader
|
||||
<*> ((flip fromMaybe) <$> altMetricReader <*> metricReader)
|
||||
<*> optional precisionArgParser
|
||||
|
||||
sel :: Maybe Metric -> Metric -> Metric
|
||||
sel Nothing m = m
|
||||
sel (Just m) _ = m
|
||||
|
||||
metricReader :: Parser Metric
|
||||
metricReader = option auto
|
||||
( long "metric"
|
||||
@ -100,7 +114,14 @@ metricReader = option auto
|
||||
<> value defaultMetric
|
||||
<> showDefault
|
||||
<> metavar "METRIC"
|
||||
<> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed or CharMatch" )
|
||||
<> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels or CharMatch" )
|
||||
|
||||
altMetricReader :: Parser (Maybe Metric)
|
||||
altMetricReader = optional $ option auto
|
||||
( long "alt-metric"
|
||||
<> short 'a'
|
||||
<> metavar "METRIC"
|
||||
<> help "Alternative metric (overrides --metric option)" )
|
||||
|
||||
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
|
||||
runGEval args = do
|
||||
@ -145,20 +166,20 @@ attemptToReadOptsFromConfigFile args opts = do
|
||||
|
||||
|
||||
runGEval'' :: GEvalOptions -> IO (Maybe MetricValue)
|
||||
runGEval'' opts = runGEval''' (geoSpecialCommand opts) (geoSpec opts)
|
||||
runGEval'' opts = runGEval''' (geoSpecialCommand opts) (geoResultOrdering opts) (geoSpec opts)
|
||||
|
||||
runGEval''' :: Maybe GEvalSpecialCommand -> GEvalSpecification -> IO (Maybe MetricValue)
|
||||
runGEval''' Nothing spec = do
|
||||
runGEval''' :: Maybe GEvalSpecialCommand -> ResultOrdering -> GEvalSpecification -> IO (Maybe MetricValue)
|
||||
runGEval''' Nothing _ spec = do
|
||||
val <- geval spec
|
||||
return $ Just val
|
||||
runGEval''' (Just Init) spec = do
|
||||
runGEval''' (Just Init) _ spec = do
|
||||
initChallenge spec
|
||||
return Nothing
|
||||
runGEval''' (Just LineByLine) spec = do
|
||||
runLineByLine spec
|
||||
runGEval''' (Just LineByLine) ordering spec = do
|
||||
runLineByLine ordering spec
|
||||
return Nothing
|
||||
runGEval''' (Just (Diff otherOut)) spec = do
|
||||
runDiff otherOut spec
|
||||
runGEval''' (Just (Diff otherOut)) ordering spec = do
|
||||
runDiff ordering otherOut spec
|
||||
return Nothing
|
||||
|
||||
initChallenge :: GEvalSpecification -> IO ()
|
||||
@ -169,11 +190,9 @@ initChallenge spec = case gesExpectedDirectory spec of
|
||||
showInitInstructions = do
|
||||
putStrLn [here|
|
||||
Run:
|
||||
geval --init --expected-directory CHALLENGE
|
||||
geval --init --expected-directory CHALLENGE --metric METRIC-NAME --precision NUMBER-OF-DIGITS
|
||||
to create a directory CHALLENGE representing a Gonito challenge.
|
||||
|
||||
You can specify a metric with `--metric METRIC-NAME` option.
|
||||
|
||||
Note that `--out-directory` option is not taken into account with `--init` option.
|
||||
(Note that `--out-directory` option is not taken into account with `--init` option.)
|
||||
|]
|
||||
exitFailure
|
||||
|
@ -3,7 +3,7 @@
|
||||
module GEval.PrecisionRecall(calculateMAPForOneResult,
|
||||
fMeasure, f1Measure, f2Measure, precision, recall,
|
||||
fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder,
|
||||
precisionAndRecall, precisionAndRecallFromCounts, maxMatch)
|
||||
precisionAndRecall, precisionAndRecallFromCounts, maxMatch, maxMatchOnOrdered)
|
||||
where
|
||||
|
||||
import GEval.Common
|
||||
@ -65,6 +65,17 @@ precision matchFun expected got = fst $ precisionAndRecall matchFun expected got
|
||||
recall :: (a -> b -> Bool) -> [a] -> [b] -> Double
|
||||
recall matchFun expected got = snd $ precisionAndRecall matchFun expected got
|
||||
|
||||
|
||||
maxMatchOnOrdered :: Eq a => (a -> a -> Bool) -> [a] -> [a] -> Int
|
||||
maxMatchOnOrdered laterThan expected got =
|
||||
let (matched, _) = foldl' step (0, expected) got
|
||||
in matched
|
||||
where step (matched, l@(h:t)) g
|
||||
| h == g = (matched+1, t)
|
||||
| h `laterThan` g = (matched, l)
|
||||
| otherwise = step (matched, t) g
|
||||
step (matched, []) g = (matched, [])
|
||||
|
||||
-- counting maximum match with maximum bipartite matching
|
||||
-- (we build an auxiliary graph and do a max-flow on this)
|
||||
maxMatch :: (a -> b -> Bool) -> [a] -> [b] -> Int
|
||||
@ -72,7 +83,6 @@ maxMatch matchFun expected got = mf
|
||||
where (b, e, g) = buildGraph matchFun expected got
|
||||
mf = maxFlow g (fst b) (fst e)
|
||||
|
||||
|
||||
buildGraph :: (a -> b -> Bool) -> [a] -> [b] -> (LNode Int, LNode Int, Gr Int Int)
|
||||
buildGraph matchFun expected got = (b, e, g)
|
||||
where ((b, e), (_, g)) = buildGraph' matchFun expected got
|
||||
|
@ -1,5 +1,5 @@
|
||||
flags: {}
|
||||
packages:
|
||||
- '.'
|
||||
extra-deps: [cond-0.4.1.1,murmur3-1.0.3]
|
||||
resolver: lts-9.14
|
||||
extra-deps: [murmur3-1.0.3]
|
||||
resolver: lts-11.9
|
||||
|
118
test/Spec.hs
118
test/Spec.hs
@ -8,11 +8,15 @@ import GEval.BLEU
|
||||
import GEval.ClippEU
|
||||
import GEval.PrecisionRecall
|
||||
import GEval.ClusteringMetrics
|
||||
import GEval.BIO
|
||||
import GEval.LineByLine
|
||||
import Data.Attoparsec.Text
|
||||
import Options.Applicative
|
||||
import Data.Text
|
||||
import Text.EditDistance
|
||||
|
||||
import Data.Conduit.List (consume)
|
||||
|
||||
import qualified Test.HUnit as HU
|
||||
|
||||
import Data.Conduit.SmartSource
|
||||
@ -101,6 +105,16 @@ main = hspec $ do
|
||||
runGEvalTest "log-loss-hashed-simple" `shouldReturnAlmost` 2.398479083333333
|
||||
it "example with unnormalized values" $ do
|
||||
runGEvalTest "log-loss-hashed-not-normalized" `shouldReturnAlmost` 1.0468455186722887
|
||||
it "with probs instead of log probs" $ do
|
||||
runGEvalTest "log-loss-hashed-probs" `shouldReturnAlmost` 4.11631293099392
|
||||
it "with probs instead of log probs (with normalization)" $ do
|
||||
runGEvalTest "log-loss-hashed-probs-normalized" `shouldReturnAlmost` 1.55537749098853
|
||||
it "with log probs whose probs are summing up to less than 1.0" $ do
|
||||
runGEvalTest "log-loss-hashed-normalization" `shouldReturnAlmost` 5.16395069238851
|
||||
describe "LikelihoodHashed challenge" $ do
|
||||
it "example with unnormalized values" $ do
|
||||
runGEvalTest "likelihood-hashed-not-normalized" `shouldReturnAlmost` 0.351043364110715
|
||||
|
||||
describe "reading options" $ do
|
||||
it "can get the metric" $ do
|
||||
extractMetric "bleu-complex" `shouldReturn` (Just BLEU)
|
||||
@ -188,11 +202,105 @@ main = hspec $ do
|
||||
runGEvalTest "logloss-simple" `shouldReturnAlmost` 0.31824
|
||||
it "perfect" $ do
|
||||
runGEvalTest "logloss-perfect" `shouldReturnAlmost` 0.0
|
||||
describe "Likelihood" $ do
|
||||
it "simple" $ do
|
||||
runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866
|
||||
describe "evaluating single lines" $ do
|
||||
it "RMSE" $ do
|
||||
gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla")
|
||||
(LineInFile "stub2" 1 "3.4")
|
||||
(LineInFile "stub3" 1 "2.6") `shouldReturnAlmost` 0.8
|
||||
describe "BIO format" $ do
|
||||
it "just parse" $ do
|
||||
let (Right r) = parseOnly (bioSequenceParser <* endOfInput) "O B-city/NEW_YORK I-city B-city/KALISZ I-city O B-name"
|
||||
r `shouldBe` [Outside,
|
||||
Beginning "city" (Just "NEW_YORK"),
|
||||
Inside "city" Nothing,
|
||||
Beginning "city" (Just "KALISZ"),
|
||||
Inside "city" Nothing,
|
||||
Outside,
|
||||
Beginning "name" Nothing]
|
||||
it "simplest entity" $ do
|
||||
let (Right ents) = parseBioSequenceIntoEntities "B-city"
|
||||
ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "city" Nothing]
|
||||
it "multi-word entity" $ do
|
||||
let (Right ents) = parseBioSequenceIntoEntities "B-date I-date"
|
||||
ents `shouldBe` [TaggedEntity (TaggedSpan 1 2) "date" Nothing]
|
||||
it "multi-word entity with normalized text" $ do
|
||||
let (Right ents) = parseBioSequenceIntoEntities "B-date/FOO I-date/BAR"
|
||||
ents `shouldBe` [TaggedEntity (TaggedSpan 1 2) "date" (Just "FOO_BAR")]
|
||||
it "simplest entity with something outside" $ do
|
||||
let (Right ents) = parseBioSequenceIntoEntities "O B-city"
|
||||
ents `shouldBe` [TaggedEntity (TaggedSpan 2 2) "city" Nothing]
|
||||
it "another simple case" $ do
|
||||
let (Right ents) = parseBioSequenceIntoEntities "B-city B-city"
|
||||
ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "city" Nothing,
|
||||
TaggedEntity (TaggedSpan 2 2) "city" Nothing]
|
||||
it "just parse into entities" $ do
|
||||
let (Right ents) = parseBioSequenceIntoEntities "O O B-city/LOS_ANGELES I-city B-city/KLUCZBORK O B-name O B-person/JOHN I-person/VON I-person/NEUMANN"
|
||||
ents `shouldBe` [TaggedEntity (TaggedSpan 3 4) "city" (Just "LOS_ANGELES"),
|
||||
TaggedEntity (TaggedSpan 5 5) "city" (Just "KLUCZBORK"),
|
||||
TaggedEntity (TaggedSpan 7 7) "name" (Nothing),
|
||||
TaggedEntity (TaggedSpan 9 11) "person" (Just "JOHN_VON_NEUMANN")]
|
||||
it "another entity parse" $ do
|
||||
let (Right ents) = parseBioSequenceIntoEntities "B-month/JULY B-month/JULY O O B-foo/bar"
|
||||
ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "month" (Just "JULY"),
|
||||
TaggedEntity (TaggedSpan 2 2) "month" (Just "JULY"),
|
||||
TaggedEntity (TaggedSpan 5 5) "foo" (Just "bar")]
|
||||
it "another entity parse" $ do
|
||||
let (Right ents) = parseBioSequenceIntoEntities "B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK"
|
||||
ents `shouldBe` [TaggedEntity (TaggedSpan 1 2) "city" (Just "LOS_ANGELES"),
|
||||
TaggedEntity (TaggedSpan 4 5) "city" (Just "NEW_YORK")]
|
||||
it "parse entity" $ do
|
||||
let (Right ents) = parseBioSequenceIntoEntities "B-surname/BROWN B-surname/SMITH"
|
||||
ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "surname" (Just "BROWN"),
|
||||
TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")]
|
||||
it "parse entity" $ do
|
||||
let (Right ents) = parseBioSequenceIntoEntities "O B-surname/SMITH"
|
||||
ents `shouldBe` [TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")]
|
||||
it "check counting" $ do
|
||||
gatherCountsForBIO [TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")] [TaggedEntity (TaggedSpan 1 1) "surname" (Just "BROWN"),
|
||||
TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")] `shouldBe` (1, 1, 2)
|
||||
it "check F1 on a more complicated example" $ do
|
||||
runGEvalTest "bio-f1-complex" `shouldReturnAlmost` 0.625
|
||||
it "check F1 on labels only" $ do
|
||||
runGEvalTest "bio-f1-complex-labels" `shouldReturnAlmost` 0.6666666666
|
||||
it "calculate F1" $ do
|
||||
runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5
|
||||
it "calculate F1 with underscores rather than minus signs" $ do
|
||||
runGEvalTest "bio-f1-simple-underscores" `shouldReturnAlmost` 0.5
|
||||
it "check perfect score" $ do
|
||||
runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0
|
||||
it "check inconsistent input" $ do
|
||||
runGEvalTest "bio-f1-error" `shouldThrow` (== UnexpectedData 2 "inconsistent label sequence `B-NAME/JOHN I-FOO/SMITH I-FOO/X`")
|
||||
describe "automatic decompression" $ do
|
||||
it "more complex test" $ do
|
||||
runGEvalTest "charmatch-complex-compressed" `shouldReturnAlmost` 0.1923076923076923
|
||||
describe "line by line mode" $ do
|
||||
let sampleChallenge =
|
||||
GEvalSpecification
|
||||
{ gesOutDirectory = "test/likelihood-simple/likelihood-simple-solution",
|
||||
gesExpectedDirectory = Just "test/likelihood-simple/likelihood-simple",
|
||||
gesTestName = "test-A",
|
||||
gesOutFile = "out.tsv",
|
||||
gesExpectedFile = "expected.tsv",
|
||||
gesInputFile = "in.tsv",
|
||||
gesMetric = Likelihood,
|
||||
gesPrecision = Nothing }
|
||||
it "simple test" $ do
|
||||
results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge Data.Conduit.List.consume
|
||||
Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo",
|
||||
"bar",
|
||||
"baz",
|
||||
"baq"]
|
||||
it "test sorting" $ do
|
||||
results <- runLineByLineGeneralized FirstTheWorst sampleChallenge Data.Conduit.List.consume
|
||||
Prelude.head (Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results) `shouldBe` "baq"
|
||||
describe "handle --alt-metric option" $ do
|
||||
it "accuracy instead of likelihood" $ do
|
||||
runGEvalTestExtraOptions ["--alt-metric", "Accuracy"] "likelihood-simple" `shouldReturnAlmost` 0.75
|
||||
it "accuracy instead of log loss" $ do
|
||||
runGEvalTestExtraOptions ["--alt-metric", "Accuracy"] "log-loss-hashed-probs" `shouldReturnAlmost` 0.4
|
||||
describe "smart sources" $ do
|
||||
it "smart specs are parsed" $ do
|
||||
parseSmartSpec "" `shouldBe` NoSpec
|
||||
@ -217,8 +325,8 @@ main = hspec $ do
|
||||
parseSmartSpecInContext ["foo/bar"] Nothing "" `shouldBe` Nothing
|
||||
it "sources are accessed" $ do
|
||||
readFromSmartSource [] Nothing "test/files/foo.txt" `shouldReturn` ["foo\n"]
|
||||
readFromSmartSource [] Nothing "https://httpbin.org/robots.txt" `shouldReturn`
|
||||
["User-agent: *\nDisallow: /deny\n"]
|
||||
-- readFromSmartSource [] Nothing "https://httpbin.org/robots.txt" `shouldReturn`
|
||||
-- ["User-agent: *\nDisallow: /deny\n"]
|
||||
|
||||
readFromSmartSource :: [FilePath] -> Maybe FilePath -> String -> IO [String]
|
||||
readFromSmartSource defaultDirs defaultFile specS = do
|
||||
@ -244,11 +352,13 @@ testMatchFun _ _ = False
|
||||
extractVal :: (Either (ParserResult GEvalOptions) (Maybe MetricValue)) -> IO MetricValue
|
||||
extractVal (Right (Just val)) = return val
|
||||
|
||||
runGEvalTest testName = (runGEval [
|
||||
runGEvalTest = runGEvalTestExtraOptions []
|
||||
|
||||
runGEvalTestExtraOptions extraOptions testName = (runGEval ([
|
||||
"--expected-directory",
|
||||
"test/" ++ testName ++ "/" ++ testName,
|
||||
"--out-directory",
|
||||
"test/" ++ testName ++ "/" ++ testName ++ "-solution"]) >>= extractVal
|
||||
"test/" ++ testName ++ "/" ++ testName ++ "-solution"] ++ extraOptions)) >>= extractVal
|
||||
|
||||
extractMetric :: String -> IO (Maybe Metric)
|
||||
extractMetric testName = do
|
||||
|
@ -0,0 +1,6 @@
|
||||
B-wrong
|
||||
B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK_CITY
|
||||
B-surname/BROWN B-surname/SMIT
|
||||
B-month B-month O O B-foo/bar
|
||||
O B-class I-class I-class
|
||||
O O
|
|
@ -0,0 +1 @@
|
||||
--metric BIO-F1-Labels
|
@ -0,0 +1,6 @@
|
||||
O
|
||||
B-city/LOS I-city/ANGELES O B-city/NEW_YORK O
|
||||
O B-surname/SMITH
|
||||
B-month/JULY O O O B-foo/bar
|
||||
O B-class I-class I-class
|
||||
O O
|
|
@ -0,0 +1,6 @@
|
||||
B-wrong
|
||||
B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK
|
||||
B-surname/BROWN B-surname/SMITH
|
||||
B-month/JULY B-month/JULY O O B-foo/bar
|
||||
O B-class I-class I-class
|
||||
O B-wrong
|
|
1
test/bio-f1-complex/bio-f1-complex/config.txt
Normal file
1
test/bio-f1-complex/bio-f1-complex/config.txt
Normal file
@ -0,0 +1 @@
|
||||
--metric BIO-F1
|
6
test/bio-f1-complex/bio-f1-complex/test-A/expected.tsv
Normal file
6
test/bio-f1-complex/bio-f1-complex/test-A/expected.tsv
Normal file
@ -0,0 +1,6 @@
|
||||
O
|
||||
B-city/LOS I-city/ANGELES O B-city/NEW_YORK O
|
||||
O B-surname/SMITH
|
||||
B-month/JULY O O O B-foo/bar
|
||||
O B-class I-class I-class
|
||||
O O
|
|
2
test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv
Normal file
2
test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv
Normal file
@ -0,0 +1,2 @@
|
||||
O B-CITY/WARSZAWA I-CITY/WARSZAWA
|
||||
O B-NAME/JOHN I-FOO/SMITH I-FOO/X O
|
|
1
test/bio-f1-error/bio-f1-error/config.txt
Normal file
1
test/bio-f1-error/bio-f1-error/config.txt
Normal file
@ -0,0 +1 @@
|
||||
--metric BIO-F1
|
2
test/bio-f1-error/bio-f1-error/test-A/expected.tsv
Normal file
2
test/bio-f1-error/bio-f1-error/test-A/expected.tsv
Normal file
@ -0,0 +1,2 @@
|
||||
O B-CITY/WARSZAWA I-CITY/WARSZAWA
|
||||
O B-NAME/JOHN I-NAME/SMITH O O
|
|
@ -0,0 +1,4 @@
|
||||
O O O
|
||||
O B-city/NEW I-city/YORK I-city/CITY O B-month/July
|
||||
B-surname/SMITH
|
||||
B-city/LONDON B-city/PARIS
|
|
1
test/bio-f1-perfect/bio-f1-perfect/config.txt
Normal file
1
test/bio-f1-perfect/bio-f1-perfect/config.txt
Normal file
@ -0,0 +1 @@
|
||||
--metric BIO-F1
|
4
test/bio-f1-perfect/bio-f1-perfect/test-A/expected.tsv
Normal file
4
test/bio-f1-perfect/bio-f1-perfect/test-A/expected.tsv
Normal file
@ -0,0 +1,4 @@
|
||||
O O O
|
||||
O B-city/NEW I-city/YORK I-city/CITY O B-month/July
|
||||
B-surname/SMITH
|
||||
B-city/LONDON B-city/PARIS
|
|
@ -0,0 +1,3 @@
|
||||
O O B_city/POZNAŃ O O B_date/MARCH I_date/12
|
||||
B_city/BUK O O O
|
||||
B_name/FOO O B_surname/KOWALSKI
|
|
@ -0,0 +1 @@
|
||||
--metric BIO-F1
|
@ -0,0 +1,3 @@
|
||||
O O B_city/POZNAŃ O O B_date/MARCH I_date/12
|
||||
O O O O
|
||||
O B_city/KONIN O
|
|
3
test/bio-f1-simple/bio-f1-simple-solution/test-A/out.tsv
Normal file
3
test/bio-f1-simple/bio-f1-simple-solution/test-A/out.tsv
Normal file
@ -0,0 +1,3 @@
|
||||
O O B-city/POZNAŃ O O B-date/MARCH I-date/12
|
||||
B-city/BUK O O O
|
||||
B-name/FOO O B-surname/KOWALSKI
|
|
1
test/bio-f1-simple/bio-f1-simple/config.txt
Normal file
1
test/bio-f1-simple/bio-f1-simple/config.txt
Normal file
@ -0,0 +1 @@
|
||||
--metric BIO-F1
|
3
test/bio-f1-simple/bio-f1-simple/test-A/expected.tsv
Normal file
3
test/bio-f1-simple/bio-f1-simple/test-A/expected.tsv
Normal file
@ -0,0 +1,3 @@
|
||||
O O B-city/POZNAŃ O O B-date/MARCH I-date/12
|
||||
O O O O
|
||||
O B-city/KONIN O
|
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
--metric CharMatch
|
Binary file not shown.
Binary file not shown.
@ -0,0 +1,2 @@
|
||||
tak:10 nie:8.9
|
||||
niebieski:0 żółty:1.5 czerwony:-0.5
|
|
@ -0,0 +1 @@
|
||||
--metric LikelihoodHashed8
|
@ -0,0 +1,2 @@
|
||||
tak
|
||||
niebieski
|
|
@ -0,0 +1,4 @@
|
||||
0.7
|
||||
0
|
||||
0.0
|
||||
0.6
|
|
1
test/likelihood-simple/likelihood-simple/config.txt
Normal file
1
test/likelihood-simple/likelihood-simple/config.txt
Normal file
@ -0,0 +1 @@
|
||||
--metric Likelihood
|
@ -0,0 +1,4 @@
|
||||
1
|
||||
0
|
||||
0
|
||||
0
|
|
4
test/likelihood-simple/likelihood-simple/test-A/in.tsv
Normal file
4
test/likelihood-simple/likelihood-simple/test-A/in.tsv
Normal file
@ -0,0 +1,4 @@
|
||||
foo
|
||||
bar
|
||||
baz
|
||||
baq
|
|
@ -0,0 +1,3 @@
|
||||
B:-1.20397280432594 A:-0.916290731874155
|
||||
A:-2.3025850929940 C:-1.6094379124341
|
||||
A:-2.3025850929940 C:-1.6094379124341 :-0.356674943938732
|
|
@ -0,0 +1 @@
|
||||
--metric LogLossHashed10
|
@ -0,0 +1,3 @@
|
||||
A
|
||||
B
|
||||
B
|
|
@ -0,0 +1,3 @@
|
||||
1:0.5 2:0.6 3:1.0 4:1.0 5:0.9
|
||||
1:0.3 2:0.2 3:0.3
|
||||
1:0.2 :0.6
|
|
@ -0,0 +1 @@
|
||||
--metric LogLossHashed10
|
@ -0,0 +1,3 @@
|
||||
1
|
||||
3
|
||||
1
|
|
@ -0,0 +1,5 @@
|
||||
A:0.6 B:0.4
|
||||
C:0.2 A:0.1
|
||||
A:0.4 C:0.4
|
||||
D:1.0
|
||||
C:0.4 B:0.5 :0.1
|
|
@ -0,0 +1 @@
|
||||
--metric LogLossHashed10
|
@ -0,0 +1,5 @@
|
||||
A
|
||||
A
|
||||
B
|
||||
D
|
||||
A
|
|
Loading…
Reference in New Issue
Block a user