Merge branch 'master' into smart-conduit

This commit is contained in:
Filip Gralinski 2018-06-02 16:31:36 +02:00
commit 18ed47322e
52 changed files with 741 additions and 83 deletions

View File

@ -1,12 +1,15 @@
# GEval # GEval
GEval is a Haskell library (and a stand-alone tool) for evaluating the GEval is a Haskell library and a stand-alone tool for evaluating the
results of solutions to machine learning challenges as defined on the results of solutions to machine learning challenges as defined on the
[Gonito](http://gonito.net) platform. [Gonito](http://gonito.net) platform.
Note that GEval is only about machine learning evaluation. No actual Note that GEval is only about machine learning evaluation. No actual
machine learning algorithms are available here. machine learning algorithms are available here.
The official repository is `git://gonito.net/geval`, browsable at
<https://gonito.net/gitlist/geval.git/>.
## Installing ## Installing
You need [Haskell Stack](https://github.com/commercialhaskell/stack). You need [Haskell Stack](https://github.com/commercialhaskell/stack).

View File

@ -1,5 +1,5 @@
name: geval name: geval
version: 0.5.4.0 version: 0.7.0.0
synopsis: Machine learning evaluation tools synopsis: Machine learning evaluation tools
description: Please see README.md description: Please see README.md
homepage: http://github.com/name/project homepage: http://github.com/name/project
@ -26,10 +26,12 @@ library
, GEval.LogLossHashed , GEval.LogLossHashed
, GEval.CharMatch , GEval.CharMatch
, GEval.LineByLine , GEval.LineByLine
, GEval.BIO
, Data.Conduit.AutoDecompress
, Data.Conduit.SmartSource , Data.Conduit.SmartSource
build-depends: base >= 4.7 && < 5 build-depends: base >= 4.7 && < 5
, cond , cond
, conduit , conduit >= 1.3.0
, conduit-combinators , conduit-combinators
, conduit-extra , conduit-extra
, directory , directory
@ -52,6 +54,11 @@ library
, bytestring , bytestring
, http-conduit , http-conduit
, transformers , transformers
, word8
, primitive
, transformers-base
, bzlib-conduit
, lzma-conduit
default-language: Haskell2010 default-language: Haskell2010
executable geval executable geval
@ -79,6 +86,7 @@ test-suite geval-test
, resourcet , resourcet
, conduit , conduit
, conduit-extra , conduit-extra
, conduit
ghc-options: -threaded -rtsopts -with-rtsopts=-N ghc-options: -threaded -rtsopts -with-rtsopts=-N
default-language: Haskell2010 default-language: Haskell2010

View File

@ -0,0 +1,45 @@
{-# LANGUAGE AllowAmbiguousTypes #-}
module Data.Conduit.AutoDecompress
(autoDecompress,
doNothing)
where
import Data.Conduit
import Data.Conduit.Combinators
import Data.ByteString
import Data.Conduit.Zlib
import Data.Word8
import Control.Monad.Trans.Resource (MonadThrow, MonadResource)
import Control.Monad.Primitive (PrimMonad)
import Control.Monad.Base (MonadBase)
import qualified Data.Conduit.Lzma as XZ
import qualified Data.Conduit.BZlib as BZ
autoDecompress :: (MonadResource m, MonadThrow m, PrimMonad m) => ConduitM ByteString ByteString m ()
autoDecompress = do
f <- await
case f of
Just chunk -> if Data.ByteString.length chunk > 1
then
do
let firstByte = Data.ByteString.head chunk
let secondByte = Data.ByteString.index chunk 1
leftover chunk
lookAtMagicNumbers (firstByte, secondByte)
else
do
leftover chunk
doNothing
Nothing -> return ()
lookAtMagicNumbers :: (MonadResource m, MonadThrow m, PrimMonad m) => (Word8, Word8) -> ConduitT ByteString ByteString m ()
lookAtMagicNumbers (31, 139) = ungzip
lookAtMagicNumbers (66, 90) = BZ.bunzip2
lookAtMagicNumbers (253, 55) = XZ.decompress Nothing
lookAtMagicNumbers _ = doNothing
doNothing :: Monad m => ConduitT a a m ()
doNothing = Data.Conduit.Combinators.filter (const True)

View File

@ -35,17 +35,17 @@ pureSmartSource _ (FileNameSpec fileName) = sourceFile fileName
pureSmartSource _ (FilePathSpec fileName) = sourceFile fileName pureSmartSource _ (FilePathSpec fileName) = sourceFile fileName
pureSmartSource [] (PossiblyGitSpec spec) = sourceFile spec pureSmartSource [] (PossiblyGitSpec spec) = sourceFile spec
pureSmartSource (firstDir:_) (PossiblyGitSpec spec) = sourceFile (firstDir </> spec) pureSmartSource (firstDir:_) (PossiblyGitSpec spec) = sourceFile (firstDir </> spec)
pureSmartSource _ (Https url) = httpSource url --pureSmartSource _ (Https url) = httpSource url
pureSmartSource _ (Http url) = httpSource url --pureSmartSource _ (Http url) = httpSource url
httpSource :: MonadResource m => String -> ConduitM () S.ByteString m () -- httpSource :: MonadResource m => String -> ConduitM () S.ByteString m ()
httpSource url = do -- httpSource url = do
request <- liftIO $ parseRequest url -- request <- liftIO $ parseRequest url
manager <- liftIO $ newManager tlsManagerSettings -- manager <- liftIO $ newManager tlsManagerSettings
response <- lift $ http request manager -- response <- lift $ http request manager
(httpsource, finalizer) <- lift $ unwrapResumable (responseBody response) -- (httpsource, finalizer) <- lift $ unwrapResumable (responseBody response)
httpsource -- httpsource
lift finalizer -- lift finalizer
parseSmartSpec :: FilePath -> SmartSpec parseSmartSpec :: FilePath -> SmartSpec
parseSmartSpec "" = NoSpec parseSmartSpec "" = NoSpec

111
src/GEval/BIO.hs Normal file
View File

@ -0,0 +1,111 @@
{-# LANGUAGE OverloadedStrings #-}
module GEval.BIO
(BIOLabel(..), bioSequenceParser, parseBioSequenceIntoEntities,
TaggedSpan(..), TaggedEntity(..), gatherCountsForBIO,
eraseNormalisation)
where
import GEval.PrecisionRecall
import qualified Data.Text as T
import Data.Attoparsec.Text
import Data.Attoparsec.Combinator
import Control.Applicative
import Data.Char
import Data.Maybe (catMaybes)
import GEval.Common
data BIOLabel = Outside | Beginning T.Text (Maybe T.Text) | Inside T.Text (Maybe T.Text)
deriving (Eq, Show)
formatBioLabel :: BIOLabel -> T.Text
formatBioLabel Outside = "O"
formatBioLabel (Beginning label Nothing) = T.concat ["B-", label]
formatBioLabel (Beginning label (Just normalized)) = T.concat ["B-", label, "/", normalized]
formatBioLabel (Inside label Nothing) = T.concat ["I-", label]
formatBioLabel (Inside label (Just normalized)) = T.concat ["I-", label, "/", normalized]
data TaggedSpan = TaggedSpan Int Int
deriving (Eq, Show)
data TaggedEntity = TaggedEntity TaggedSpan T.Text (Maybe T.Text)
deriving (Eq, Show)
eraseNormalisation :: TaggedEntity -> TaggedEntity
eraseNormalisation (TaggedEntity span label normalized) = (TaggedEntity span label Nothing)
gatherCountsForBIO :: [TaggedEntity] -> [TaggedEntity] -> (Int, Int, Int)
gatherCountsForBIO expected got = (maxMatchOnOrdered laterThan expected got, length expected, length got)
where
laterThan (TaggedEntity (TaggedSpan a _) _ _) (TaggedEntity (TaggedSpan b _) _ _) = a > b
parseBioSequenceIntoEntities :: T.Text -> Either String [TaggedEntity]
parseBioSequenceIntoEntities t = labelsIntoEntities =<< (parseOnly (bioSequenceParser <* endOfInput) t)
labelsIntoEntities :: [BIOLabel] -> Either String [TaggedEntity]
labelsIntoEntities labels = labelsIntoEntities' $ zip labels [1..]
labelsIntoEntities' :: [(BIOLabel, Int)] -> Either String [TaggedEntity]
labelsIntoEntities' labelsWithPositions = mapM labelSplitToEntity labelsGathered
where labelsGathered = splitLabels labelsWithPositions
labelSplitToEntity :: [(BIOLabel, Int)] -> Either String TaggedEntity
labelSplitToEntity labs@(h@(_,begIx):t) = if isBeginning h && all (\tp -> isInside tp && tt tp == btp) t
then
Right $ TaggedEntity (TaggedSpan begIx lastItemIx) btp mNormalized
else
Left $ "inconsistent label sequence `" ++ (T.unpack $ T.intercalate " " $ map (formatBioLabel . fst) labs) ++ "`"
where isBeginning (Beginning _ _, _) = True
isBeginning _ = False
isInside (Inside _ _, _) = True
isInside _ = False
tt (Beginning t _, _) = t
tt (Inside t _, _) = t
btp = tt h
lastItemIx = case t of
[] -> begIx
_ -> let (_, ix) = last t
in ix
normalized (Beginning _ n, _) = n
normalized (Inside _ n, _) = n
mNormalized = if all (\tp -> normalized tp == Nothing) labs
then
Nothing
else
Just $ T.intercalate "_" $ catMaybes $ map normalized labs
splitLabels :: [(BIOLabel, Int)] -> [[(BIOLabel, Int)]]
splitLabels [] = []
splitLabels ((Outside, _):r) = splitLabels r
splitLabels (e@(_, ix):r) =
case splitLabels r of
l@(((Beginning _ _, _):_):_) -> ([e]:l)
(s@((Inside _ _, ix'):_):l) -> if ix' == ix + 1
then
((e:s):l)
else
([e]:(s:l))
[] -> [[e]]
bioSequenceParser :: Parser [BIOLabel]
bioSequenceParser = sepByWhitespaces bioLabelParser
bioLabelParser :: Parser BIOLabel
bioLabelParser =
(string "O" *> pure Outside) <|>
(do
labelType <- bioMarkerParser
(string "-" <|> string "_")
label <- takeWhile1 (\c -> not (isSpace c) && c /= '/')
normalized <- (do
string "/"
normalized <- takeWhile1 (not . isSpace)
return $ Just normalized) <|> pure Nothing
return $ labelType label normalized)
bioMarkerParser :: Parser (T.Text -> Maybe T.Text -> BIOLabel)
bioMarkerParser =
(string "B" *> pure Beginning) <|> (string "I" *> pure Inside)

View File

@ -38,3 +38,7 @@ sepByWhitespaces parser = possibleWhitespace *> parser `sepBy` whitespace <* pos
possibleWhitespace = many' (satisfy isHorizontalSpace) possibleWhitespace = many' (satisfy isHorizontalSpace)
whitespace = many1 (satisfy isHorizontalSpace) whitespace = many1 (satisfy isHorizontalSpace)
indicator :: Bool -> Double
indicator True = 1.0
indicator False = 0.0

View File

@ -16,6 +16,7 @@ module GEval.Core
MetricValue, MetricValue,
GEvalSpecialCommand(..), GEvalSpecialCommand(..),
GEvalSpecification(..), GEvalSpecification(..),
ResultOrdering(..),
GEvalOptions(..), GEvalOptions(..),
GEvalException(..), GEvalException(..),
defaultGEvalSpecification, defaultGEvalSpecification,
@ -53,10 +54,11 @@ import qualified System.Directory as D
import System.Posix import System.Posix
import System.FilePath import System.FilePath
import Data.Maybe import Data.Maybe
import Data.Tuple
import qualified Data.List.Split as DLS import qualified Data.List.Split as DLS
import Control.Monad.IO.Class import Control.Monad.IO.Class
import Control.Monad ((<=<)) import Control.Monad ((<=<), filterM)
import Data.Attoparsec.Text (parseOnly) import Data.Attoparsec.Text (parseOnly)
@ -69,6 +71,8 @@ import GEval.PrecisionRecall
import GEval.ClusteringMetrics import GEval.ClusteringMetrics
import GEval.LogLossHashed import GEval.LogLossHashed
import GEval.CharMatch import GEval.CharMatch
import GEval.BIO
import Data.Conduit.AutoDecompress
import qualified Data.HashMap.Strict as M import qualified Data.HashMap.Strict as M
@ -82,7 +86,7 @@ defaultLogLossHashedSize :: Word32
defaultLogLossHashedSize = 10 defaultLogLossHashedSize = 10
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch
| MAP | LogLoss | MAP | LogLoss | Likelihood | BIOF1 | BIOF1Labels | LikelihoodHashed Word32
deriving (Eq) deriving (Eq)
instance Show Metric where instance Show Metric where
@ -99,9 +103,18 @@ instance Show Metric where
"" ""
else else
(show nbOfBits)) (show nbOfBits))
show (LikelihoodHashed nbOfBits) = "LikelihoodHashed" ++ (if
nbOfBits == defaultLogLossHashedSize
then
""
else
(show nbOfBits))
show CharMatch = "CharMatch" show CharMatch = "CharMatch"
show MAP = "MAP" show MAP = "MAP"
show LogLoss = "LogLoss" show LogLoss = "LogLoss"
show Likelihood = "Likelihood"
show BIOF1 = "BIO-F1"
show BIOF1Labels = "BIO-F1-Labels"
instance Read Metric where instance Read Metric where
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)] readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
@ -116,9 +129,15 @@ instance Read Metric where
readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
[(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)] [(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
_ -> [(LogLossHashed defaultLogLossHashedSize, theRest)] _ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
readsPrec p ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
[(nbOfBits, theRest)] -> [(LikelihoodHashed nbOfBits, theRest)]
_ -> [(LikelihoodHashed defaultLogLossHashedSize, theRest)]
readsPrec _ ('L':'o':'g':'L':'o':'s':'s':theRest) = [(LogLoss, theRest)] readsPrec _ ('L':'o':'g':'L':'o':'s':'s':theRest) = [(LogLoss, theRest)]
readsPrec _ ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(Likelihood, theRest)]
readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)] readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)] readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
@ -131,9 +150,13 @@ getMetricOrdering ClippEU = TheHigherTheBetter
getMetricOrdering (FMeasure _) = TheHigherTheBetter getMetricOrdering (FMeasure _) = TheHigherTheBetter
getMetricOrdering NMI = TheHigherTheBetter getMetricOrdering NMI = TheHigherTheBetter
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
getMetricOrdering CharMatch = TheHigherTheBetter getMetricOrdering CharMatch = TheHigherTheBetter
getMetricOrdering MAP = TheHigherTheBetter getMetricOrdering MAP = TheHigherTheBetter
getMetricOrdering LogLoss = TheLowerTheBetter getMetricOrdering LogLoss = TheLowerTheBetter
getMetricOrdering Likelihood = TheHigherTheBetter
getMetricOrdering BIOF1 = TheHigherTheBetter
getMetricOrdering BIOF1Labels = TheHigherTheBetter
defaultOutDirectory = "." defaultOutDirectory = "."
defaultTestName = "test-A" defaultTestName = "test-A"
@ -163,11 +186,13 @@ getExpectedDirectory spec = fromMaybe outDirectory $ gesExpectedDirectory spec
data GEvalSpecialCommand = Init | LineByLine | Diff FilePath data GEvalSpecialCommand = Init | LineByLine | Diff FilePath
data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest
data GEvalOptions = GEvalOptions data GEvalOptions = GEvalOptions
{ geoSpecialCommand :: Maybe GEvalSpecialCommand, { geoSpecialCommand :: Maybe GEvalSpecialCommand,
geoResultOrdering :: ResultOrdering,
geoSpec :: GEvalSpecification } geoSpec :: GEvalSpecification }
data GEvalException = NoExpectedFile FilePath data GEvalException = NoExpectedFile FilePath
| NoOutFile FilePath | NoOutFile FilePath
| NoExpectedDirectory FilePath | NoExpectedDirectory FilePath
@ -236,11 +261,14 @@ checkAndGetFiles gevalSpec = do
unlessM (D.doesDirectoryExist expectedDirectory) $ throwM $ NoExpectedDirectory expectedDirectory unlessM (D.doesDirectoryExist expectedDirectory) $ throwM $ NoExpectedDirectory expectedDirectory
unlessM (D.doesDirectoryExist outTestDirectory) $ throwM $ NoOutTestDirectory outTestDirectory unlessM (D.doesDirectoryExist outTestDirectory) $ throwM $ NoOutTestDirectory outTestDirectory
unlessM (D.doesDirectoryExist expectedTestDirectory) $ throwM $ NoExpectedTestDirectory expectedTestDirectory unlessM (D.doesDirectoryExist expectedTestDirectory) $ throwM $ NoExpectedTestDirectory expectedTestDirectory
inputFilePath <- lookForCompressedFiles inputFilePath'
expectedFilePath <- lookForCompressedFiles expectedFilePath'
outFilePath <- lookForCompressedFiles outFilePath'
checkInputFileIfNeeded metric inputFilePath checkInputFileIfNeeded metric inputFilePath
return (inputFilePath, expectedFilePath, outFilePath) return (inputFilePath, expectedFilePath, outFilePath)
where expectedFilePath = expectedTestDirectory </> (gesExpectedFile gevalSpec) where expectedFilePath' = expectedTestDirectory </> (gesExpectedFile gevalSpec)
outFilePath = getOutFile gevalSpec (gesOutFile gevalSpec) outFilePath' = getOutFile gevalSpec (gesOutFile gevalSpec)
inputFilePath = expectedTestDirectory </> (gesInputFile gevalSpec) inputFilePath' = expectedTestDirectory </> (gesInputFile gevalSpec)
expectedTestDirectory = expectedDirectory </> testName expectedTestDirectory = expectedDirectory </> testName
outTestDirectory = outDirectory </> testName outTestDirectory = outDirectory </> testName
expectedDirectory = getExpectedDirectory gevalSpec expectedDirectory = getExpectedDirectory gevalSpec
@ -248,6 +276,24 @@ checkAndGetFiles gevalSpec = do
testName = gesTestName gevalSpec testName = gesTestName gevalSpec
metric = gesMetric gevalSpec metric = gesMetric gevalSpec
lookForCompressedFiles :: FilePath -> IO FilePath
lookForCompressedFiles = lookForAlternativeFiles [".gz", ".xz", ".bz2"]
lookForAlternativeFiles :: [String] -> FilePath -> IO FilePath
lookForAlternativeFiles suffixes filePath
| takeExtension filePath `Prelude.elem` suffixes = return filePath
| otherwise = do
fileIsThere <- D.doesFileExist filePath
if fileIsThere
then
return filePath
else
do
found <- Control.Monad.filterM D.doesFileExist $ Prelude.map (filePath <.>) suffixes
return $ case found of
[fp] -> fp
_ -> filePath
getOutFile :: GEvalSpecification -> FilePath -> FilePath getOutFile :: GEvalSpecification -> FilePath -> FilePath
getOutFile gevalSpec out = outDirectory </> testName </> out getOutFile gevalSpec out = outDirectory </> testName </> out
where outDirectory = gesOutDirectory gevalSpec where outDirectory = gesOutDirectory gevalSpec
@ -261,7 +307,7 @@ checkInputFileIfNeeded _ _ = return ()
fileAsLineSource :: FilePath -> LineSource (ResourceT IO) fileAsLineSource :: FilePath -> LineSource (ResourceT IO)
fileAsLineSource filePath = fileAsLineSource filePath =
LineSource (smartSource [] Nothing (parseSmartSpec filePath) $= CT.decodeUtf8Lenient =$= CT.lines) filePath 1 LineSource (smartSource [] Nothing (parseSmartSpec filePath) $= autoDecompress $= CT.decodeUtf8Lenient =$= CT.lines) filePath 1
gevalCoreOnSingleLines :: Metric -> LineInFile -> LineInFile -> LineInFile -> IO (MetricValue) gevalCoreOnSingleLines :: Metric -> LineInFile -> LineInFile -> LineInFile -> IO (MetricValue)
gevalCoreOnSingleLines metric inpLine expLine outLine = gevalCoreOnSingleLines metric inpLine expLine outLine =
@ -283,7 +329,9 @@ gevalCore metric inputFilePath expectedFilePath outFilePath = do
(fileAsLineSource expectedFilePath) (fileAsLineSource expectedFilePath)
(fileAsLineSource outFilePath) (fileAsLineSource outFilePath)
gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric logLossToLikehood logLoss = exp (-logLoss)
gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Metric
-> LineSource (ResourceT m) -> LineSource (ResourceT m)
-> LineSource (ResourceT m) -> LineSource (ResourceT m)
-> LineSource (ResourceT m) -> LineSource (ResourceT m)
@ -292,12 +340,20 @@ gevalCoreOnSources RMSE inputLineSource expectedLineSource outLineSource = do
mse <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource mse <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource
return $ mse ** 0.5 return $ mse ** 0.5
gevalCoreOnSources Likelihood inputLineSource expectedLineSource outLineSource = do
logLoss <- gevalCoreOnSources LogLoss inputLineSource expectedLineSource outLineSource
return $ logLossToLikehood logLoss
gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLineSource = do
logLoss <- gevalCoreOnSources (LogLossHashed b) inputLineSource expectedLineSource outLineSource
return $ logLossToLikehood logLoss
gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
gevalCore' metric inputLineSource expectedLineSource outLineSource gevalCore' metric inputLineSource expectedLineSource outLineSource
data LineInFile = LineInFile FilePath Word32 Text data LineInFile = LineInFile FilePath Word32 Text
gevalCore' :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue) gevalCore' :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemError averageC id gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemError averageC id
where outParser = getValue . TR.double where outParser = getValue . TR.double
@ -315,8 +371,17 @@ gevalCore' BLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . D
| otherwise = exp (1.0 - (r /. c)) | otherwise = exp (1.0 - (r /. c))
gevalCore' Accuracy _ = gevalCoreWithoutInput (Right . strip) (Right . strip) hitOrMiss averageC id gevalCore' Accuracy _ = gevalCoreWithoutInput (Right . strip) (Right . strip) hitOrMiss averageC id
where hitOrMiss (exp,got) = if (normalizeProbForAccuracy exp got) == exp then 1.0 else 0.0 where hitOrMiss (exp, got) =
-- if the expected value is 0 or 1 treat values between 0.0 and 1.0 as probabilities -- first try to parse what we got as a probability distribution
-- (like the one used for Likelikehood/LogLossHashed metric)
case parseWordSpecs got of
Right wordSpecs -> if Prelude.null pairs
then 0.0
else indicator (exp == (snd $ Prelude.maximum pairs))
where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs
Left _ -> indicator ((normalizeProbForAccuracy exp got) == exp)
-- if the expected value is 0 or 1 treat values
-- between 0.0 and 1.0 as probabilities
-- for the positive outcome -- for the positive outcome
normalizeProbForAccuracy :: Text -> Text -> Text normalizeProbForAccuracy :: Text -> Text -> Text
normalizeProbForAccuracy exp got normalizeProbForAccuracy exp got
@ -383,6 +448,15 @@ gevalCore' CharMatch inputLineSource = helper inputLineSource
helper inputLineSource expectedLineSource outputLineSource = do helper inputLineSource expectedLineSource outputLineSource = do
gevalCoreGeneralized (ParserSpecWithInput (Right . unpack) (Right . unpack) (Right . unpack)) step countAgg (fMeasureOnCounts charMatchBeta) (WithInput inputLineSource expectedLineSource outputLineSource) gevalCoreGeneralized (ParserSpecWithInput (Right . unpack) (Right . unpack) (Right . unpack)) step countAgg (fMeasureOnCounts charMatchBeta) (WithInput inputLineSource expectedLineSource outputLineSource)
step (ParsedRecordWithInput inp exp out) = getCharMatchCount inp exp out step (ParsedRecordWithInput inp exp out) = getCharMatchCount inp exp out
gevalCore' BIOF1 _ = gevalCoreWithoutInput parseBioSequenceIntoEntities parseBioSequenceIntoEntities (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts
gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWithoutNormalization parseBioSequenceIntoEntitiesWithoutNormalization (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts
where parseBioSequenceIntoEntitiesWithoutNormalization s = do
entities <- parseBioSequenceIntoEntities s
return $ Prelude.map eraseNormalisation entities
countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
countAgg = CC.foldl countFolder (0, 0, 0) countAgg = CC.foldl countFolder (0, 0, 0)
parseDistributionWrapper :: Word32 -> Word32 -> Text -> HashedDistribution parseDistributionWrapper :: Word32 -> Word32 -> Text -> HashedDistribution
@ -395,25 +469,25 @@ data SourceItem a = Got a | Wrong String | Done
skipLineNumber :: (x -> c) -> ((Word32, x) -> c) skipLineNumber :: (x -> c) -> ((Word32, x) -> c)
skipLineNumber fun = fun . snd skipLineNumber fun = fun . snd
gevalCoreWithoutInput :: (MonadBaseControl IO m, MonadThrow m, MonadIO m) => (Text -> Either String a) -> (Text -> Either String b) -> ((a, b) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue) gevalCoreWithoutInput :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => (Text -> Either String a) -> (Text -> Either String b) -> ((a, b) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
gevalCoreWithoutInput expParser outParser itemStep aggregator finalStep expectedLineStream outLineStream = gevalCoreWithoutInput expParser outParser itemStep aggregator finalStep expectedLineStream outLineStream =
gevalCoreGeneralized (ParserSpecWithoutInput expParser outParser) (trans itemStep) aggregator finalStep (WithoutInput expectedLineStream outLineStream) gevalCoreGeneralized (ParserSpecWithoutInput expParser outParser) (trans itemStep) aggregator finalStep (WithoutInput expectedLineStream outLineStream)
where where
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
trans step (ParsedRecordWithoutInput x y) = step (x, y) trans step (ParsedRecordWithoutInput x y) = step (x, y)
gevalCore''' :: (MonadBaseControl IO m, MonadThrow m, MonadIO m) => ParserSpec (WithoutInput m a b) -> ((Word32, (a, b)) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> WithoutInput m a b -> m (MetricValue) gevalCore''' :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec (WithoutInput m a b) -> ((Word32, (a, b)) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> WithoutInput m a b -> m (MetricValue)
gevalCore''' parserSpec itemStep aggregator finalStep context = gevalCore''' parserSpec itemStep aggregator finalStep context =
gevalCoreGeneralized' parserSpec (trans itemStep) aggregator finalStep context gevalCoreGeneralized' parserSpec (trans itemStep) aggregator finalStep context
where where
trans :: ((Word32, (a, b)) -> c) -> (Word32, ParsedRecord (WithoutInput m a b)) -> c trans :: ((Word32, (a, b)) -> c) -> (Word32, ParsedRecord (WithoutInput m a b)) -> c
trans step (n, ParsedRecordWithoutInput x y) = step (n, (x, y)) trans step (n, ParsedRecordWithoutInput x y) = step (n, (x, y))
gevalCoreGeneralized :: (EvaluationContext ctxt m, MonadBaseControl IO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> (ParsedRecord ctxt -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue) gevalCoreGeneralized :: (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> (ParsedRecord ctxt -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
gevalCoreGeneralized parserSpec itemStep aggregator finalStep context = gevalCoreGeneralized parserSpec itemStep aggregator finalStep context =
gevalCoreGeneralized' parserSpec (skipLineNumber itemStep) aggregator finalStep context gevalCoreGeneralized' parserSpec (skipLineNumber itemStep) aggregator finalStep context
gevalCoreGeneralized' :: forall m ctxt c d . (EvaluationContext ctxt m, MonadBaseControl IO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> ((Word32, ParsedRecord ctxt) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue) gevalCoreGeneralized' :: forall m ctxt c d . (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> ((Word32, ParsedRecord ctxt) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
gevalCoreGeneralized' parserSpec itemStep aggregator finalStep context = do gevalCoreGeneralized' parserSpec itemStep aggregator finalStep context = do
v <- runResourceT $ v <- runResourceT $
(((getZipSource $ (,) (((getZipSource $ (,)
@ -434,7 +508,7 @@ class EvaluationContext ctxt m where
data WithoutInput m e o = WithoutInput (LineSource (ResourceT m)) (LineSource (ResourceT m)) data WithoutInput m e o = WithoutInput (LineSource (ResourceT m)) (LineSource (ResourceT m))
instance (MonadBaseControl IO m, MonadIO m, MonadThrow m) => EvaluationContext (WithoutInput m e o) m where instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithoutInput m e o) m where
data ParserSpec (WithoutInput m e o) = ParserSpecWithoutInput (Text -> Either String e) (Text -> Either String o) data ParserSpec (WithoutInput m e o) = ParserSpecWithoutInput (Text -> Either String e) (Text -> Either String o)
data WrappedParsedRecord (WithoutInput m e o) = WrappedParsedRecordWithoutInput (SourceItem e) (SourceItem o) data WrappedParsedRecord (WithoutInput m e o) = WrappedParsedRecordWithoutInput (SourceItem e) (SourceItem o)
data ParsedRecord (WithoutInput m e o) = ParsedRecordWithoutInput e o data ParsedRecord (WithoutInput m e o) = ParsedRecordWithoutInput e o
@ -463,7 +537,7 @@ data WithInput m i e o = WithInput (LineSource (ResourceT m)) (LineSource (Resou
getInputFilePath (WithInput (LineSource _ inputFilePath _) _ _) = inputFilePath getInputFilePath (WithInput (LineSource _ inputFilePath _) _ _) = inputFilePath
instance (MonadBaseControl IO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where
data ParserSpec (WithInput m i e o) = ParserSpecWithInput (Text -> Either String i) (Text -> Either String e) (Text -> Either String o) data ParserSpec (WithInput m i e o) = ParserSpecWithInput (Text -> Either String i) (Text -> Either String e) (Text -> Either String o)
data WrappedParsedRecord (WithInput m i e o) = WrappedParsedRecordWithInput (SourceItem i) (SourceItem e) (SourceItem o) data WrappedParsedRecord (WithInput m i e o) = WrappedParsedRecordWithInput (SourceItem i) (SourceItem e) (SourceItem o)
data ParsedRecord (WithInput m i e o) = ParsedRecordWithInput i e o data ParsedRecord (WithInput m i e o) = ParsedRecordWithInput i e o

View File

@ -101,6 +101,8 @@ Cluster proverbs for languages.
This is a sample challenge for flat clustering (unsupervised learning challenge). This is a sample challenge for flat clustering (unsupervised learning challenge).
|] ++ (commonReadmeMDContents testName) |] ++ (commonReadmeMDContents testName)
readmeMDContents (LikelihoodHashed b) testname = readmeMDContents (LogLossHashed b) testname
readmeMDContents (LogLossHashed _) testName = [i| readmeMDContents (LogLossHashed _) testName = [i|
GEval sample challenge language model evaluation GEval sample challenge language model evaluation
================================================== ==================================================
@ -113,6 +115,39 @@ The metric is average log-loss calculated for 10-bit hashes.
Train file is a just text file (one utterance per line). Train file is a just text file (one utterance per line).
In an input file, left and right contexts (TAB-separated) are given. In an input file, left and right contexts (TAB-separated) are given.
In an expected file, the word to be guessed is given. In an expected file, the word to be guessed is given.
Format of the output files
--------------------------
For each input line, a probability distribution for words in a gap
must be given:
word1:logprob1 word2:logprob2 ... wordN:logprobN :logprob0
where *logprobi* is the logarithm of the probability for *wordi* and
*logprob0* is the logarithm of the probability mass for all the other
words (it will be spread between all 1024 fingerprint values). If the
respective probabilities do not sum up to 1:
* if the sum is larger than 0.0 and smaller than 1.0, and no logprob0
is given, log of the remaining probablity mass will be assigned to logprob0,
* otherwise they will be normalised with.
softmax
Note: the separator here is space, not TAB!
### Probs
Probabilities could be given (instead of logprobs):
* if **all** values look as probs and **at least value** is positive, we treat
the values as probs rather then logprobs (single value 0.0 is treated
as a logprob, i.e. probability 1.0!);
* if their sum is greater than 1.0, then we normalize simply by dividing by the sum;
* if the sum is smaller than 1.0 and there is no entry for all the other words,
we add such an entry for the missing probability mass;
* if the sum is smaller than 1.0 and there is an entry for all the other words,
we normalize by dividing by the sum.
|] ++ (commonReadmeMDContents testName) |] ++ (commonReadmeMDContents testName)
readmeMDContents CharMatch testName = [i| readmeMDContents CharMatch testName = [i|
@ -170,6 +205,29 @@ This a sample challenge for the log-loss metric.
|] ++ (commonReadmeMDContents testName) |] ++ (commonReadmeMDContents testName)
readmeMDContents Likelihood testName = [i|
Give the probability of a positive sentiment
============================================
Give the probability that a sentence expresses a positive sentiment.
This a sample challenge for the likelihood metric.
|] ++ (commonReadmeMDContents testName)
readmeMDContents BIOF1Labels testName = readmeMDContents BIOF1 testName
readmeMDContents BIOF1 testName = [i|
Tag and normalize names
=======================
Tag names in the tokenized text and normalized them.
The output should be given in the BIO format with the normalized forms given after slashes (see
`dev-0/expected.tsv` for an example).
The metric is F1 counted on entities (not labels).
|] ++ (commonReadmeMDContents testName)
readmeMDContents _ testName = [i| readmeMDContents _ testName = [i|
GEval sample challenge GEval sample challenge
====================== ======================
@ -239,6 +297,7 @@ trainContents NMI = [hereLit|pl Kto pod kim dołki kopie, ten sam w nie wpada.
en The pen is mightier than the sword. en The pen is mightier than the sword.
pl Baba z wozu, koniom lżej. pl Baba z wozu, koniom lżej.
|] |]
trainContents (LikelihoodHashed b) = trainContents (LogLossHashed b)
trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota
Basia ma psa Basia ma psa
Nie kupujemy kota w worku Nie kupujemy kota w worku
@ -254,11 +313,17 @@ honour GB honor
titbit GB smakołyk titbit GB smakołyk
tidbit US smakołyk tidbit US smakołyk
|] |]
trainContents Likelihood = trainContents LogLoss
trainContents LogLoss = [hereLit|0.0 Hell, no!!! trainContents LogLoss = [hereLit|0.0 Hell, no!!!
0.0 I hate this stuff 0.0 I hate this stuff
1.0 Lekker!!! 1.0 Lekker!!!
0.0 Boring, boring, boring 0.0 Boring, boring, boring
|] |]
trainContents BIOF1Labels = trainContents BIOF1
trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surname/BOND My name is Bond , James Bond
O O O O O There is no name here
B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman
|]
trainContents _ = [hereLit|0.06 0.39 0 0.206 trainContents _ = [hereLit|0.06 0.39 0 0.206
1.00 1.00 1 0.017 1.00 1.00 1 0.017
317.8 5.20 67 0.048 317.8 5.20 67 0.048
@ -279,6 +344,7 @@ When the going gets tough, the tough get going.
devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0 devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0
b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000 b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000
|] |]
devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b)
devInContents (LogLossHashed _) = [hereLit|Nie kupuj w worku devInContents (LogLossHashed _) = [hereLit|Nie kupuj w worku
Ona psa Ona psa
|] |]
@ -290,10 +356,15 @@ devInContents MAP = [hereLit|US noc
GB wózek dziecięcy GB wózek dziecięcy
GB wizualizować GB wizualizować
|] |]
devInContents Likelihood = devInContents LogLoss
devInContents LogLoss = [hereLit|Great stuff! devInContents LogLoss = [hereLit|Great stuff!
Boring stuff Boring stuff
That's good That's good
|] |]
devInContents BIOF1Labels = devInContents BIOF1
devInContents BIOF1 = [hereLit|Adam and Eve
Mr Jan Kowalski
|]
devInContents _ = [hereLit|0.72 0 0.007 devInContents _ = [hereLit|0.72 0 0.007
9.54 62 0.054 9.54 62 0.054
|] |]
@ -312,6 +383,7 @@ devExpectedContents NMI = [hereLit|en
pl pl
en en
|] |]
devExpectedContents (LikelihoodHashed b) = devExpectedContents (LogLossHashed b)
devExpectedContents (LogLossHashed _) = [hereLit|kota devExpectedContents (LogLossHashed _) = [hereLit|kota
ma ma
|] |]
@ -323,10 +395,15 @@ devExpectedContents MAP = [hereLit|night nite
pram pram
visualise visualise
|] |]
devExpectedContents Likelihood = devExpectedContents LogLoss
devExpectedContents LogLoss = [hereLit|1.0 devExpectedContents LogLoss = [hereLit|1.0
0.0 0.0
1.0 1.0
|] |]
devExpectedContents BIOF1Labels = devExpectedContents BIOF1
devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE
O B-firstname/JAN B-surname/KOWALSKI
|]
devExpectedContents _ = [hereLit|0.82 devExpectedContents _ = [hereLit|0.82
95.2 95.2
|] |]
@ -347,6 +424,7 @@ W marcu, jak w garncu.
A cada necio agrada su porrada. A cada necio agrada su porrada.
Kwiecień plecień, bo przeplata trochę zimy, trochę lata. Kwiecień plecień, bo przeplata trochę zimy, trochę lata.
|] |]
testInContents (LikelihoodHashed b) = testInContents (LogLossHashed b)
testInContents (LogLossHashed _) = [hereLit|Ala ma testInContents (LogLossHashed _) = [hereLit|Ala ma
Ona ma kota worku Ona ma kota worku
|] |]
@ -358,10 +436,15 @@ testInContents MAP = [hereLit|US wózek dziecięcy
GB słoń GB słoń
US słoń US słoń
|] |]
testInContents Likelihood = testInContents LogLoss
testInContents LogLoss = [hereLit|That's great, ha, ha, I love it! testInContents LogLoss = [hereLit|That's great, ha, ha, I love it!
Super-duper!! Super-duper!!
That is incredibly boring. That is incredibly boring.
|] |]
testInContents BIOF1Labels = testInContents BIOF1
testInContents BIOF1 = [hereLit|Alan Tring
No name here
|]
testInContents _ = [hereLit|1.52 2 0.093 testInContents _ = [hereLit|1.52 2 0.093
30.06 14 0.009 30.06 14 0.009
|] |]
@ -382,6 +465,7 @@ pl
es es
pl pl
|] |]
testExpectedContents (LikelihoodHashed b) = testExpectedContents (LogLossHashed b)
testExpectedContents (LogLossHashed _) = [hereLit|ma testExpectedContents (LogLossHashed _) = [hereLit|ma
w w
|] |]
@ -393,10 +477,15 @@ testExpectedContents MAP = [hereLit|trolley
elephant elephant
elephant elephant
|] |]
testExpectedContents Likelihood = testExpectedContents LogLoss
testExpectedContents LogLoss = [hereLit|1.0 testExpectedContents LogLoss = [hereLit|1.0
1.0 1.0
0.0 0.0
|] |]
testExpectedContents BIOF1Labels = testExpectedContents BIOF1
testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING
O O O
|]
testExpectedContents _ = [hereLit|0.11 testExpectedContents _ = [hereLit|0.11
17.2 17.2
|] |]

View File

@ -9,17 +9,25 @@
module GEval.LineByLine module GEval.LineByLine
(runLineByLine, (runLineByLine,
runDiff runLineByLineGeneralized,
runDiff,
runDiffGeneralized,
LineRecord(..),
ResultOrdering(..)
) where ) where
import GEval.Core import GEval.Core
import Data.Conduit.AutoDecompress (doNothing)
import Data.Conduit import Data.Conduit
import qualified Data.Conduit.List as CL import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Combinators as CC import qualified Data.Conduit.Combinators as CC
import Data.Text import Data.Text
import Data.Text.Encoding import Data.Text.Encoding
import Data.List (sortBy, sort)
import Control.Monad.IO.Class import Control.Monad.IO.Class
import Control.Monad.Trans.Resource import Control.Monad.Trans.Resource
@ -30,13 +38,10 @@ import Text.Printf
data LineRecord = LineRecord Text Text Text Word32 MetricValue data LineRecord = LineRecord Text Text Text Word32 MetricValue
deriving (Eq, Show) deriving (Eq, Show)
runLineByLine :: GEvalSpecification -> IO () runLineByLine :: ResultOrdering -> GEvalSpecification -> IO ()
runLineByLine spec = do runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec where consum :: ConduitT LineRecord Void (ResourceT IO) ()
gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
where metric = gesMetric spec
consum :: Consumer LineRecord (ResourceT IO) ()
consum = (CL.map (encodeUtf8 . formatOutput) =$= CC.unlinesAscii =$= CC.stdout)
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [ formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
formatScore score, formatScore score,
escapeTabs inp, escapeTabs inp,
@ -45,19 +50,27 @@ runLineByLine spec = do
formatScore :: MetricValue -> Text formatScore :: MetricValue -> Text
formatScore = Data.Text.pack . printf "%f" formatScore = Data.Text.pack . printf "%f"
runDiff :: FilePath -> GEvalSpecification -> IO () runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
runDiff otherOut spec = do runLineByLineGeneralized ordering spec consum = do
let otherOutFilePath = getOutFile spec otherOut
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath (sorter ordering .| consum)
let sourceB = gevalLineByLineSource metric inputFilePath expectedFilePath otherOutFilePath
runResourceT $
((getZipSource $ (,)
<$> ZipSource sourceA
<*> ZipSource sourceB) $$ consum)
where metric = gesMetric spec where metric = gesMetric spec
consum :: Consumer (LineRecord, LineRecord) (ResourceT IO) () sorter KeepTheOriginalOrder = doNothing
consum = (CL.filter shouldBeShown =$= CL.map (encodeUtf8 . formatOutput) =$= CC.unlinesAscii =$= CC.stdout) sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
sortOrder FirstTheWorst TheHigherTheBetter = compareScores
sortOrder FirstTheBest TheLowerTheBetter = compareScores
sortOrder _ _ = flip compareScores
compareScores (LineRecord _ _ _ _ s1) (LineRecord _ _ _ _ s2) = s1 `compare` s2
gobbleAndDo :: Monad m => ([a] -> [b]) -> ConduitT a b m ()
gobbleAndDo fun = do
l <- CC.sinkList
CC.yieldMany $ fun l
runDiff :: ResultOrdering -> FilePath -> GEvalSpecification -> IO ()
runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consum
where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) = shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) =
outA /= outB && scoreA /= scoreB outA /= outB && scoreA /= scoreB
formatOutput (LineRecord inp exp outA _ scoreA, LineRecord _ _ outB _ scoreB) = Data.Text.intercalate "\t" [ formatOutput (LineRecord inp exp outA _ scoreA, LineRecord _ _ outB _ scoreB) = Data.Text.intercalate "\t" [
@ -69,19 +82,40 @@ runDiff otherOut spec = do
formatScoreDiff :: Double -> Text formatScoreDiff :: Double -> Text
formatScoreDiff = Data.Text.pack . printf "%f" formatScoreDiff = Data.Text.pack . printf "%f"
runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a
runDiffGeneralized ordering otherOut spec consum = do
let otherOutFilePath = getOutFile spec otherOut
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath
let sourceB = gevalLineByLineSource metric inputFilePath expectedFilePath otherOutFilePath
runResourceT $ runConduit $
((getZipSource $ (,)
<$> ZipSource sourceA
<*> ZipSource sourceB) .| sorter ordering .| consum)
where metric = gesMetric spec
sorter KeepTheOriginalOrder = doNothing
sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
sortOrder FirstTheWorst TheHigherTheBetter = compareScores
sortOrder FirstTheBest TheLowerTheBetter = compareScores
sortOrder _ _ = flip compareScores
compareScores ((LineRecord _ _ _ _ o1), (LineRecord _ _ _ _ n1))
((LineRecord _ _ _ _ o2), (LineRecord _ _ _ _ n2))
= (n1 - o1) `compare` (n2 - o2)
escapeTabs :: Text -> Text escapeTabs :: Text -> Text
escapeTabs = Data.Text.replace "\t" "<tab>" escapeTabs = Data.Text.replace "\t" "<tab>"
gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> Sink LineRecord (ResourceT IO) () -> IO () gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum = gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum =
runResourceT $ runResourceT $ runConduit $
((gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath) $$ consum) ((gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath) .| consum)
gevalLineByLineSource :: Metric -> FilePath -> FilePath -> FilePath -> Source (ResourceT IO) LineRecord gevalLineByLineSource :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT () LineRecord (ResourceT IO) ()
gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath =
(getZipSource $ (,) (getZipSource $ (,)
<$> ZipSource (CL.sourceList [1..]) <$> ZipSource (CL.sourceList [1..])
<*> (ZipSource $ recordSource context parserSpec)) =$= CL.mapM (checkStepM evaluateLine) =$= CL.catMaybes <*> (ZipSource $ recordSource context parserSpec)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes
where parserSpec = (ParserSpecWithInput (Right . id) (Right . id) (Right . id)) where parserSpec = (ParserSpecWithInput (Right . id) (Right . id) (Right . id))
context = (WithInput inputLineSource expectedLineSource outputLineSource) context = (WithInput inputLineSource expectedLineSource outputLineSource)
inputLineSource = fileAsLineSource inputFilePath inputLineSource = fileAsLineSource inputFilePath

View File

@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module GEval.LogLossHashed module GEval.LogLossHashed
(HashedDistribution, parseDistribution, calculateLogLoss) (HashedDistribution, parseDistribution, calculateLogLoss, parseWordSpecs, wordSpecToPair)
where where
import qualified Data.Vector as V import qualified Data.Vector as V
@ -36,24 +36,41 @@ parseDistribution nbOfBits seed distroSpec =
-- a direct list of 2^nbOfBits log probs -- a direct list of 2^nbOfBits log probs
else parseDistributionFromLogProbList nbOfBits distroSpec else parseDistributionFromLogProbList nbOfBits distroSpec
isProbTotalIncorrect :: Double -> Bool
isProbTotalIncorrect probTotal = probTotal > 1.0 || probTotal < (1.0 - epsilon)
where epsilon = 0.00000001
normalizeDistribution :: HashedDistribution -> HashedDistribution normalizeDistribution :: HashedDistribution -> HashedDistribution
normalizeDistribution distro = normalizeDistribution distro =
-- we do softmax (if needed) -- we do softmax (if needed)
if probSum > 1.0 || probSum < (1.0 - epsilon) if isProbTotalIncorrect probSum
then normalized then normalized
else distro else distro
where probSum = V.foldl' (\s l -> (s + exp l)) 0.0 distro where probSum = V.foldl' (\s l -> (s + exp l)) 0.0 distro
normalized = V.map (\v -> log ((exp v) / probSum)) distro normalized = V.map (\v -> log ((exp v) / probSum)) distro
epsilon = 0.00000001
type DistroMonad s = ReaderT (VM.MVector s Double) (ST s) type DistroMonad s = ReaderT (VM.MVector s Double) (ST s)
data WordSpec = AnyWord | SpecificWord T.Text data WordSpec = AnyWord | SpecificWord T.Text
deriving (Eq, Show)
isAnyWord AnyWord = True
isAnyWord _ = False
data WordSpecWithLogProb = WordSpecWithLogProb WordSpec Double data WordSpecWithLogProb = WordSpecWithLogProb WordSpec Double
wordSpecToPair :: WordSpecWithLogProb -> Maybe (Double, T.Text)
wordSpecToPair (WordSpecWithLogProb AnyWord _) = Nothing
wordSpecToPair (WordSpecWithLogProb (SpecificWord w) lp) = Just (lp, w)
parseDistributionFromWordList :: Word32 -> Word32 -> T.Text -> Either String HashedDistribution parseDistributionFromWordList :: Word32 -> Word32 -> T.Text -> Either String HashedDistribution
parseDistributionFromWordList nbOfBits seed distroSpec = (parseDistributionFromWordList' nbOfBits seed) =<< ( parseDistributionFromWordList nbOfBits seed distroSpec = (parseDistributionFromWordList' nbOfBits seed) =<<
processEithers $ map getWordSpecWithLogProb $ T.splitOn " " distroSpec) normalizeLogProbs =<<
lookForProbs =<<
(parseWordSpecs distroSpec)
parseWordSpecs :: T.Text -> Either String [WordSpecWithLogProb]
parseWordSpecs distroSpec = processEithers $ map getWordSpecWithLogProb $ T.splitOn " " distroSpec
getWordSpecWithLogProb :: T.Text -> Either String WordSpecWithLogProb getWordSpecWithLogProb :: T.Text -> Either String WordSpecWithLogProb
getWordSpecWithLogProb t = getWordSpecWithLogProb t =
@ -77,6 +94,42 @@ parseDistributionFromWordList' nbOfBits seed specs = runST $ do
frozen <- V.freeze emp frozen <- V.freeze emp
return $ Right frozen return $ Right frozen
lookForProbs :: [WordSpecWithLogProb] -> Either String [WordSpecWithLogProb]
lookForProbs specs
| areProbs specs = Right $ toLogProbs $ normalizeProbs specs
| otherwise = Right $ specs
areProbs :: [WordSpecWithLogProb] -> Bool
areProbs specs = all isProb specs && any isPositiveProb specs
where isProb (WordSpecWithLogProb _ v) = v >= 0.0 && v <= 1.0
isPositiveProb (WordSpecWithLogProb _ p) = p > 0.0 && p <= 1.0
toLogProbs :: [WordSpecWithLogProb] -> [WordSpecWithLogProb]
toLogProbs = map (\(WordSpecWithLogProb w p) -> (WordSpecWithLogProb w (log p)))
normalizeLogProbs :: [WordSpecWithLogProb] -> Either String [WordSpecWithLogProb]
normalizeLogProbs specs = if isProbTotalIncorrect probTotal
&& probTotal < 1.0 && probTotal > 0.0
&& not (any (\(WordSpecWithLogProb w _) -> isAnyWord w) specs)
&& all (\(WordSpecWithLogProb _ lp) -> lp <= 0) specs
then
Right ((WordSpecWithLogProb AnyWord (log (1-probTotal))):specs)
else
Right specs
where probTotal = sum $ map (\(WordSpecWithLogProb _ logp) -> exp logp) specs
normalizeProbs :: [WordSpecWithLogProb] -> [WordSpecWithLogProb]
normalizeProbs specs = if isProbTotalIncorrect probTotal
then
if probTotal > 1.0 || any (\(WordSpecWithLogProb w _) -> isAnyWord w) specs
then
map (\(WordSpecWithLogProb w p) -> WordSpecWithLogProb w (p / probTotal)) specs
else
((WordSpecWithLogProb AnyWord (1-probTotal)):specs)
else
specs
where probTotal = sum $ map (\(WordSpecWithLogProb _ p) -> p) specs
addSpecs :: Word32 -> Word32 -> [WordSpecWithLogProb] -> DistroMonad s () addSpecs :: Word32 -> Word32 -> [WordSpecWithLogProb] -> DistroMonad s ()
addSpecs nbOfBits seed = mapM_ (updateDistro nbOfBits seed) addSpecs nbOfBits seed = mapM_ (updateDistro nbOfBits seed)

View File

@ -41,6 +41,16 @@ optionsParser = GEvalOptions
<> short 'd' <> short 'd'
<> metavar "OTHER-OUT" <> metavar "OTHER-OUT"
<> help "compare results"))) <> help "compare results")))
<*> ((flag' FirstTheWorst
(long "sort"
<> short 's'
<> help "When in line-by-line or diff mode, sort the results from the worst to the best"))
<|>
(flag' FirstTheBest
(long "reverse-sort"
<> short 'r'
<> help "When in line-by-line or diff mode, sort the results from the best to the worst"))
<|> pure KeepTheOriginalOrder)
<*> specParser <*> specParser
precisionArgParser :: Parser Int precisionArgParser :: Parser Int
@ -90,9 +100,13 @@ specParser = GEvalSpecification
<> showDefault <> showDefault
<> metavar "INPUT" <> metavar "INPUT"
<> help "The name of the file with the input (applicable only for some metrics)" ) <> help "The name of the file with the input (applicable only for some metrics)" )
<*> metricReader <*> ((flip fromMaybe) <$> altMetricReader <*> metricReader)
<*> optional precisionArgParser <*> optional precisionArgParser
sel :: Maybe Metric -> Metric -> Metric
sel Nothing m = m
sel (Just m) _ = m
metricReader :: Parser Metric metricReader :: Parser Metric
metricReader = option auto metricReader = option auto
( long "metric" ( long "metric"
@ -100,7 +114,14 @@ metricReader = option auto
<> value defaultMetric <> value defaultMetric
<> showDefault <> showDefault
<> metavar "METRIC" <> metavar "METRIC"
<> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed or CharMatch" ) <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels or CharMatch" )
altMetricReader :: Parser (Maybe Metric)
altMetricReader = optional $ option auto
( long "alt-metric"
<> short 'a'
<> metavar "METRIC"
<> help "Alternative metric (overrides --metric option)" )
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue)) runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
runGEval args = do runGEval args = do
@ -145,20 +166,20 @@ attemptToReadOptsFromConfigFile args opts = do
runGEval'' :: GEvalOptions -> IO (Maybe MetricValue) runGEval'' :: GEvalOptions -> IO (Maybe MetricValue)
runGEval'' opts = runGEval''' (geoSpecialCommand opts) (geoSpec opts) runGEval'' opts = runGEval''' (geoSpecialCommand opts) (geoResultOrdering opts) (geoSpec opts)
runGEval''' :: Maybe GEvalSpecialCommand -> GEvalSpecification -> IO (Maybe MetricValue) runGEval''' :: Maybe GEvalSpecialCommand -> ResultOrdering -> GEvalSpecification -> IO (Maybe MetricValue)
runGEval''' Nothing spec = do runGEval''' Nothing _ spec = do
val <- geval spec val <- geval spec
return $ Just val return $ Just val
runGEval''' (Just Init) spec = do runGEval''' (Just Init) _ spec = do
initChallenge spec initChallenge spec
return Nothing return Nothing
runGEval''' (Just LineByLine) spec = do runGEval''' (Just LineByLine) ordering spec = do
runLineByLine spec runLineByLine ordering spec
return Nothing return Nothing
runGEval''' (Just (Diff otherOut)) spec = do runGEval''' (Just (Diff otherOut)) ordering spec = do
runDiff otherOut spec runDiff ordering otherOut spec
return Nothing return Nothing
initChallenge :: GEvalSpecification -> IO () initChallenge :: GEvalSpecification -> IO ()
@ -169,11 +190,9 @@ initChallenge spec = case gesExpectedDirectory spec of
showInitInstructions = do showInitInstructions = do
putStrLn [here| putStrLn [here|
Run: Run:
geval --init --expected-directory CHALLENGE geval --init --expected-directory CHALLENGE --metric METRIC-NAME --precision NUMBER-OF-DIGITS
to create a directory CHALLENGE representing a Gonito challenge. to create a directory CHALLENGE representing a Gonito challenge.
You can specify a metric with `--metric METRIC-NAME` option. (Note that `--out-directory` option is not taken into account with `--init` option.)
Note that `--out-directory` option is not taken into account with `--init` option.
|] |]
exitFailure exitFailure

View File

@ -3,7 +3,7 @@
module GEval.PrecisionRecall(calculateMAPForOneResult, module GEval.PrecisionRecall(calculateMAPForOneResult,
fMeasure, f1Measure, f2Measure, precision, recall, fMeasure, f1Measure, f2Measure, precision, recall,
fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder, fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder,
precisionAndRecall, precisionAndRecallFromCounts, maxMatch) precisionAndRecall, precisionAndRecallFromCounts, maxMatch, maxMatchOnOrdered)
where where
import GEval.Common import GEval.Common
@ -65,6 +65,17 @@ precision matchFun expected got = fst $ precisionAndRecall matchFun expected got
recall :: (a -> b -> Bool) -> [a] -> [b] -> Double recall :: (a -> b -> Bool) -> [a] -> [b] -> Double
recall matchFun expected got = snd $ precisionAndRecall matchFun expected got recall matchFun expected got = snd $ precisionAndRecall matchFun expected got
maxMatchOnOrdered :: Eq a => (a -> a -> Bool) -> [a] -> [a] -> Int
maxMatchOnOrdered laterThan expected got =
let (matched, _) = foldl' step (0, expected) got
in matched
where step (matched, l@(h:t)) g
| h == g = (matched+1, t)
| h `laterThan` g = (matched, l)
| otherwise = step (matched, t) g
step (matched, []) g = (matched, [])
-- counting maximum match with maximum bipartite matching -- counting maximum match with maximum bipartite matching
-- (we build an auxiliary graph and do a max-flow on this) -- (we build an auxiliary graph and do a max-flow on this)
maxMatch :: (a -> b -> Bool) -> [a] -> [b] -> Int maxMatch :: (a -> b -> Bool) -> [a] -> [b] -> Int
@ -72,7 +83,6 @@ maxMatch matchFun expected got = mf
where (b, e, g) = buildGraph matchFun expected got where (b, e, g) = buildGraph matchFun expected got
mf = maxFlow g (fst b) (fst e) mf = maxFlow g (fst b) (fst e)
buildGraph :: (a -> b -> Bool) -> [a] -> [b] -> (LNode Int, LNode Int, Gr Int Int) buildGraph :: (a -> b -> Bool) -> [a] -> [b] -> (LNode Int, LNode Int, Gr Int Int)
buildGraph matchFun expected got = (b, e, g) buildGraph matchFun expected got = (b, e, g)
where ((b, e), (_, g)) = buildGraph' matchFun expected got where ((b, e), (_, g)) = buildGraph' matchFun expected got

View File

@ -1,5 +1,5 @@
flags: {} flags: {}
packages: packages:
- '.' - '.'
extra-deps: [cond-0.4.1.1,murmur3-1.0.3] extra-deps: [murmur3-1.0.3]
resolver: lts-9.14 resolver: lts-11.9

View File

@ -8,11 +8,15 @@ import GEval.BLEU
import GEval.ClippEU import GEval.ClippEU
import GEval.PrecisionRecall import GEval.PrecisionRecall
import GEval.ClusteringMetrics import GEval.ClusteringMetrics
import GEval.BIO
import GEval.LineByLine
import Data.Attoparsec.Text import Data.Attoparsec.Text
import Options.Applicative import Options.Applicative
import Data.Text import Data.Text
import Text.EditDistance import Text.EditDistance
import Data.Conduit.List (consume)
import qualified Test.HUnit as HU import qualified Test.HUnit as HU
import Data.Conduit.SmartSource import Data.Conduit.SmartSource
@ -101,6 +105,16 @@ main = hspec $ do
runGEvalTest "log-loss-hashed-simple" `shouldReturnAlmost` 2.398479083333333 runGEvalTest "log-loss-hashed-simple" `shouldReturnAlmost` 2.398479083333333
it "example with unnormalized values" $ do it "example with unnormalized values" $ do
runGEvalTest "log-loss-hashed-not-normalized" `shouldReturnAlmost` 1.0468455186722887 runGEvalTest "log-loss-hashed-not-normalized" `shouldReturnAlmost` 1.0468455186722887
it "with probs instead of log probs" $ do
runGEvalTest "log-loss-hashed-probs" `shouldReturnAlmost` 4.11631293099392
it "with probs instead of log probs (with normalization)" $ do
runGEvalTest "log-loss-hashed-probs-normalized" `shouldReturnAlmost` 1.55537749098853
it "with log probs whose probs are summing up to less than 1.0" $ do
runGEvalTest "log-loss-hashed-normalization" `shouldReturnAlmost` 5.16395069238851
describe "LikelihoodHashed challenge" $ do
it "example with unnormalized values" $ do
runGEvalTest "likelihood-hashed-not-normalized" `shouldReturnAlmost` 0.351043364110715
describe "reading options" $ do describe "reading options" $ do
it "can get the metric" $ do it "can get the metric" $ do
extractMetric "bleu-complex" `shouldReturn` (Just BLEU) extractMetric "bleu-complex" `shouldReturn` (Just BLEU)
@ -188,11 +202,105 @@ main = hspec $ do
runGEvalTest "logloss-simple" `shouldReturnAlmost` 0.31824 runGEvalTest "logloss-simple" `shouldReturnAlmost` 0.31824
it "perfect" $ do it "perfect" $ do
runGEvalTest "logloss-perfect" `shouldReturnAlmost` 0.0 runGEvalTest "logloss-perfect" `shouldReturnAlmost` 0.0
describe "Likelihood" $ do
it "simple" $ do
runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866
describe "evaluating single lines" $ do describe "evaluating single lines" $ do
it "RMSE" $ do it "RMSE" $ do
gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla") gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla")
(LineInFile "stub2" 1 "3.4") (LineInFile "stub2" 1 "3.4")
(LineInFile "stub3" 1 "2.6") `shouldReturnAlmost` 0.8 (LineInFile "stub3" 1 "2.6") `shouldReturnAlmost` 0.8
describe "BIO format" $ do
it "just parse" $ do
let (Right r) = parseOnly (bioSequenceParser <* endOfInput) "O B-city/NEW_YORK I-city B-city/KALISZ I-city O B-name"
r `shouldBe` [Outside,
Beginning "city" (Just "NEW_YORK"),
Inside "city" Nothing,
Beginning "city" (Just "KALISZ"),
Inside "city" Nothing,
Outside,
Beginning "name" Nothing]
it "simplest entity" $ do
let (Right ents) = parseBioSequenceIntoEntities "B-city"
ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "city" Nothing]
it "multi-word entity" $ do
let (Right ents) = parseBioSequenceIntoEntities "B-date I-date"
ents `shouldBe` [TaggedEntity (TaggedSpan 1 2) "date" Nothing]
it "multi-word entity with normalized text" $ do
let (Right ents) = parseBioSequenceIntoEntities "B-date/FOO I-date/BAR"
ents `shouldBe` [TaggedEntity (TaggedSpan 1 2) "date" (Just "FOO_BAR")]
it "simplest entity with something outside" $ do
let (Right ents) = parseBioSequenceIntoEntities "O B-city"
ents `shouldBe` [TaggedEntity (TaggedSpan 2 2) "city" Nothing]
it "another simple case" $ do
let (Right ents) = parseBioSequenceIntoEntities "B-city B-city"
ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "city" Nothing,
TaggedEntity (TaggedSpan 2 2) "city" Nothing]
it "just parse into entities" $ do
let (Right ents) = parseBioSequenceIntoEntities "O O B-city/LOS_ANGELES I-city B-city/KLUCZBORK O B-name O B-person/JOHN I-person/VON I-person/NEUMANN"
ents `shouldBe` [TaggedEntity (TaggedSpan 3 4) "city" (Just "LOS_ANGELES"),
TaggedEntity (TaggedSpan 5 5) "city" (Just "KLUCZBORK"),
TaggedEntity (TaggedSpan 7 7) "name" (Nothing),
TaggedEntity (TaggedSpan 9 11) "person" (Just "JOHN_VON_NEUMANN")]
it "another entity parse" $ do
let (Right ents) = parseBioSequenceIntoEntities "B-month/JULY B-month/JULY O O B-foo/bar"
ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "month" (Just "JULY"),
TaggedEntity (TaggedSpan 2 2) "month" (Just "JULY"),
TaggedEntity (TaggedSpan 5 5) "foo" (Just "bar")]
it "another entity parse" $ do
let (Right ents) = parseBioSequenceIntoEntities "B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK"
ents `shouldBe` [TaggedEntity (TaggedSpan 1 2) "city" (Just "LOS_ANGELES"),
TaggedEntity (TaggedSpan 4 5) "city" (Just "NEW_YORK")]
it "parse entity" $ do
let (Right ents) = parseBioSequenceIntoEntities "B-surname/BROWN B-surname/SMITH"
ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "surname" (Just "BROWN"),
TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")]
it "parse entity" $ do
let (Right ents) = parseBioSequenceIntoEntities "O B-surname/SMITH"
ents `shouldBe` [TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")]
it "check counting" $ do
gatherCountsForBIO [TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")] [TaggedEntity (TaggedSpan 1 1) "surname" (Just "BROWN"),
TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")] `shouldBe` (1, 1, 2)
it "check F1 on a more complicated example" $ do
runGEvalTest "bio-f1-complex" `shouldReturnAlmost` 0.625
it "check F1 on labels only" $ do
runGEvalTest "bio-f1-complex-labels" `shouldReturnAlmost` 0.6666666666
it "calculate F1" $ do
runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5
it "calculate F1 with underscores rather than minus signs" $ do
runGEvalTest "bio-f1-simple-underscores" `shouldReturnAlmost` 0.5
it "check perfect score" $ do
runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0
it "check inconsistent input" $ do
runGEvalTest "bio-f1-error" `shouldThrow` (== UnexpectedData 2 "inconsistent label sequence `B-NAME/JOHN I-FOO/SMITH I-FOO/X`")
describe "automatic decompression" $ do
it "more complex test" $ do
runGEvalTest "charmatch-complex-compressed" `shouldReturnAlmost` 0.1923076923076923
describe "line by line mode" $ do
let sampleChallenge =
GEvalSpecification
{ gesOutDirectory = "test/likelihood-simple/likelihood-simple-solution",
gesExpectedDirectory = Just "test/likelihood-simple/likelihood-simple",
gesTestName = "test-A",
gesOutFile = "out.tsv",
gesExpectedFile = "expected.tsv",
gesInputFile = "in.tsv",
gesMetric = Likelihood,
gesPrecision = Nothing }
it "simple test" $ do
results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge Data.Conduit.List.consume
Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo",
"bar",
"baz",
"baq"]
it "test sorting" $ do
results <- runLineByLineGeneralized FirstTheWorst sampleChallenge Data.Conduit.List.consume
Prelude.head (Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results) `shouldBe` "baq"
describe "handle --alt-metric option" $ do
it "accuracy instead of likelihood" $ do
runGEvalTestExtraOptions ["--alt-metric", "Accuracy"] "likelihood-simple" `shouldReturnAlmost` 0.75
it "accuracy instead of log loss" $ do
runGEvalTestExtraOptions ["--alt-metric", "Accuracy"] "log-loss-hashed-probs" `shouldReturnAlmost` 0.4
describe "smart sources" $ do describe "smart sources" $ do
it "smart specs are parsed" $ do it "smart specs are parsed" $ do
parseSmartSpec "" `shouldBe` NoSpec parseSmartSpec "" `shouldBe` NoSpec
@ -217,8 +325,8 @@ main = hspec $ do
parseSmartSpecInContext ["foo/bar"] Nothing "" `shouldBe` Nothing parseSmartSpecInContext ["foo/bar"] Nothing "" `shouldBe` Nothing
it "sources are accessed" $ do it "sources are accessed" $ do
readFromSmartSource [] Nothing "test/files/foo.txt" `shouldReturn` ["foo\n"] readFromSmartSource [] Nothing "test/files/foo.txt" `shouldReturn` ["foo\n"]
readFromSmartSource [] Nothing "https://httpbin.org/robots.txt" `shouldReturn` -- readFromSmartSource [] Nothing "https://httpbin.org/robots.txt" `shouldReturn`
["User-agent: *\nDisallow: /deny\n"] -- ["User-agent: *\nDisallow: /deny\n"]
readFromSmartSource :: [FilePath] -> Maybe FilePath -> String -> IO [String] readFromSmartSource :: [FilePath] -> Maybe FilePath -> String -> IO [String]
readFromSmartSource defaultDirs defaultFile specS = do readFromSmartSource defaultDirs defaultFile specS = do
@ -244,11 +352,13 @@ testMatchFun _ _ = False
extractVal :: (Either (ParserResult GEvalOptions) (Maybe MetricValue)) -> IO MetricValue extractVal :: (Either (ParserResult GEvalOptions) (Maybe MetricValue)) -> IO MetricValue
extractVal (Right (Just val)) = return val extractVal (Right (Just val)) = return val
runGEvalTest testName = (runGEval [ runGEvalTest = runGEvalTestExtraOptions []
runGEvalTestExtraOptions extraOptions testName = (runGEval ([
"--expected-directory", "--expected-directory",
"test/" ++ testName ++ "/" ++ testName, "test/" ++ testName ++ "/" ++ testName,
"--out-directory", "--out-directory",
"test/" ++ testName ++ "/" ++ testName ++ "-solution"]) >>= extractVal "test/" ++ testName ++ "/" ++ testName ++ "-solution"] ++ extraOptions)) >>= extractVal
extractMetric :: String -> IO (Maybe Metric) extractMetric :: String -> IO (Maybe Metric)
extractMetric testName = do extractMetric testName = do

View File

@ -0,0 +1,6 @@
B-wrong
B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK_CITY
B-surname/BROWN B-surname/SMIT
B-month B-month O O B-foo/bar
O B-class I-class I-class
O O
1 B-wrong
2 B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK_CITY
3 B-surname/BROWN B-surname/SMIT
4 B-month B-month O O B-foo/bar
5 O B-class I-class I-class
6 O O

View File

@ -0,0 +1 @@
--metric BIO-F1-Labels

View File

@ -0,0 +1,6 @@
O
B-city/LOS I-city/ANGELES O B-city/NEW_YORK O
O B-surname/SMITH
B-month/JULY O O O B-foo/bar
O B-class I-class I-class
O O
1 O
2 B-city/LOS I-city/ANGELES O B-city/NEW_YORK O
3 O B-surname/SMITH
4 B-month/JULY O O O B-foo/bar
5 O B-class I-class I-class
6 O O

View File

@ -0,0 +1,6 @@
B-wrong
B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK
B-surname/BROWN B-surname/SMITH
B-month/JULY B-month/JULY O O B-foo/bar
O B-class I-class I-class
O B-wrong
1 B-wrong
2 B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK
3 B-surname/BROWN B-surname/SMITH
4 B-month/JULY B-month/JULY O O B-foo/bar
5 O B-class I-class I-class
6 O B-wrong

View File

@ -0,0 +1 @@
--metric BIO-F1

View File

@ -0,0 +1,6 @@
O
B-city/LOS I-city/ANGELES O B-city/NEW_YORK O
O B-surname/SMITH
B-month/JULY O O O B-foo/bar
O B-class I-class I-class
O O
1 O
2 B-city/LOS I-city/ANGELES O B-city/NEW_YORK O
3 O B-surname/SMITH
4 B-month/JULY O O O B-foo/bar
5 O B-class I-class I-class
6 O O

View File

@ -0,0 +1,2 @@
O B-CITY/WARSZAWA I-CITY/WARSZAWA
O B-NAME/JOHN I-FOO/SMITH I-FOO/X O
1 O B-CITY/WARSZAWA I-CITY/WARSZAWA
2 O B-NAME/JOHN I-FOO/SMITH I-FOO/X O

View File

@ -0,0 +1 @@
--metric BIO-F1

View File

@ -0,0 +1,2 @@
O B-CITY/WARSZAWA I-CITY/WARSZAWA
O B-NAME/JOHN I-NAME/SMITH O O
1 O B-CITY/WARSZAWA I-CITY/WARSZAWA
2 O B-NAME/JOHN I-NAME/SMITH O O

View File

@ -0,0 +1,4 @@
O O O
O B-city/NEW I-city/YORK I-city/CITY O B-month/July
B-surname/SMITH
B-city/LONDON B-city/PARIS
1 O O O
2 O B-city/NEW I-city/YORK I-city/CITY O B-month/July
3 B-surname/SMITH
4 B-city/LONDON B-city/PARIS

View File

@ -0,0 +1 @@
--metric BIO-F1

View File

@ -0,0 +1,4 @@
O O O
O B-city/NEW I-city/YORK I-city/CITY O B-month/July
B-surname/SMITH
B-city/LONDON B-city/PARIS
1 O O O
2 O B-city/NEW I-city/YORK I-city/CITY O B-month/July
3 B-surname/SMITH
4 B-city/LONDON B-city/PARIS

View File

@ -0,0 +1,3 @@
O O B_city/POZNAŃ O O B_date/MARCH I_date/12
B_city/BUK O O O
B_name/FOO O B_surname/KOWALSKI
1 O O B_city/POZNAŃ O O B_date/MARCH I_date/12
2 B_city/BUK O O O
3 B_name/FOO O B_surname/KOWALSKI

View File

@ -0,0 +1 @@
--metric BIO-F1

View File

@ -0,0 +1,3 @@
O O B_city/POZNAŃ O O B_date/MARCH I_date/12
O O O O
O B_city/KONIN O
1 O O B_city/POZNAŃ O O B_date/MARCH I_date/12
2 O O O O
3 O B_city/KONIN O

View File

@ -0,0 +1,3 @@
O O B-city/POZNAŃ O O B-date/MARCH I-date/12
B-city/BUK O O O
B-name/FOO O B-surname/KOWALSKI
1 O O B-city/POZNAŃ O O B-date/MARCH I-date/12
2 B-city/BUK O O O
3 B-name/FOO O B-surname/KOWALSKI

View File

@ -0,0 +1 @@
--metric BIO-F1

View File

@ -0,0 +1,3 @@
O O B-city/POZNAŃ O O B-date/MARCH I-date/12
O O O O
O B-city/KONIN O
1 O O B-city/POZNAŃ O O B-date/MARCH I-date/12
2 O O O O
3 O B-city/KONIN O

View File

@ -0,0 +1 @@
--metric CharMatch

View File

@ -0,0 +1,2 @@
tak:10 nie:8.9
niebieski:0 żółty:1.5 czerwony:-0.5
1 tak:10 nie:8.9
2 niebieski:0 żółty:1.5 czerwony:-0.5

View File

@ -0,0 +1 @@
--metric LikelihoodHashed8

View File

@ -0,0 +1,2 @@
tak
niebieski
1 tak
2 niebieski

View File

@ -0,0 +1,4 @@
0.7
0
0.0
0.6
1 0.7
2 0
3 0.0
4 0.6

View File

@ -0,0 +1 @@
--metric Likelihood

View File

@ -0,0 +1,4 @@
1
0
0
0
1 1
2 0
3 0
4 0

View File

@ -0,0 +1,4 @@
foo
bar
baz
baq
1 foo
2 bar
3 baz
4 baq

View File

@ -0,0 +1,3 @@
B:-1.20397280432594 A:-0.916290731874155
A:-2.3025850929940 C:-1.6094379124341
A:-2.3025850929940 C:-1.6094379124341 :-0.356674943938732
1 B:-1.20397280432594 A:-0.916290731874155
2 A:-2.3025850929940 C:-1.6094379124341
3 A:-2.3025850929940 C:-1.6094379124341 :-0.356674943938732

View File

@ -0,0 +1 @@
--metric LogLossHashed10

View File

@ -0,0 +1,3 @@
A
B
B
1 A
2 B
3 B

View File

@ -0,0 +1,3 @@
1:0.5 2:0.6 3:1.0 4:1.0 5:0.9
1:0.3 2:0.2 3:0.3
1:0.2 :0.6
1 1:0.5 2:0.6 3:1.0 4:1.0 5:0.9
2 1:0.3 2:0.2 3:0.3
3 1:0.2 :0.6

View File

@ -0,0 +1 @@
--metric LogLossHashed10

View File

@ -0,0 +1,3 @@
1
3
1
1 1
2 3
3 1

View File

@ -0,0 +1,5 @@
A:0.6 B:0.4
C:0.2 A:0.1
A:0.4 C:0.4
D:1.0
C:0.4 B:0.5 :0.1
1 A:0.6 B:0.4
2 C:0.2 A:0.1
3 A:0.4 C:0.4
4 D:1.0
5 C:0.4 B:0.5 :0.1

View File

@ -0,0 +1 @@
--metric LogLossHashed10

View File

@ -0,0 +1,5 @@
A
A
B
D
A
1 A
2 A
3 B
4 D
5 A