Merge branch 'master' into smart-conduit

This commit is contained in:
Filip Gralinski 2018-06-02 16:31:36 +02:00
commit 18ed47322e
52 changed files with 741 additions and 83 deletions

View File

@ -1,12 +1,15 @@
# GEval
GEval is a Haskell library (and a stand-alone tool) for evaluating the
GEval is a Haskell library and a stand-alone tool for evaluating the
results of solutions to machine learning challenges as defined on the
[Gonito](http://gonito.net) platform.
Note that GEval is only about machine learning evaluation. No actual
machine learning algorithms are available here.
The official repository is `git://gonito.net/geval`, browsable at
<https://gonito.net/gitlist/geval.git/>.
## Installing
You need [Haskell Stack](https://github.com/commercialhaskell/stack).

View File

@ -1,5 +1,5 @@
name: geval
version: 0.5.4.0
version: 0.7.0.0
synopsis: Machine learning evaluation tools
description: Please see README.md
homepage: http://github.com/name/project
@ -26,10 +26,12 @@ library
, GEval.LogLossHashed
, GEval.CharMatch
, GEval.LineByLine
, GEval.BIO
, Data.Conduit.AutoDecompress
, Data.Conduit.SmartSource
build-depends: base >= 4.7 && < 5
, cond
, conduit
, conduit >= 1.3.0
, conduit-combinators
, conduit-extra
, directory
@ -52,6 +54,11 @@ library
, bytestring
, http-conduit
, transformers
, word8
, primitive
, transformers-base
, bzlib-conduit
, lzma-conduit
default-language: Haskell2010
executable geval
@ -79,6 +86,7 @@ test-suite geval-test
, resourcet
, conduit
, conduit-extra
, conduit
ghc-options: -threaded -rtsopts -with-rtsopts=-N
default-language: Haskell2010

View File

@ -0,0 +1,45 @@
{-# LANGUAGE AllowAmbiguousTypes #-}
module Data.Conduit.AutoDecompress
(autoDecompress,
doNothing)
where
import Data.Conduit
import Data.Conduit.Combinators
import Data.ByteString
import Data.Conduit.Zlib
import Data.Word8
import Control.Monad.Trans.Resource (MonadThrow, MonadResource)
import Control.Monad.Primitive (PrimMonad)
import Control.Monad.Base (MonadBase)
import qualified Data.Conduit.Lzma as XZ
import qualified Data.Conduit.BZlib as BZ
autoDecompress :: (MonadResource m, MonadThrow m, PrimMonad m) => ConduitM ByteString ByteString m ()
autoDecompress = do
f <- await
case f of
Just chunk -> if Data.ByteString.length chunk > 1
then
do
let firstByte = Data.ByteString.head chunk
let secondByte = Data.ByteString.index chunk 1
leftover chunk
lookAtMagicNumbers (firstByte, secondByte)
else
do
leftover chunk
doNothing
Nothing -> return ()
lookAtMagicNumbers :: (MonadResource m, MonadThrow m, PrimMonad m) => (Word8, Word8) -> ConduitT ByteString ByteString m ()
lookAtMagicNumbers (31, 139) = ungzip
lookAtMagicNumbers (66, 90) = BZ.bunzip2
lookAtMagicNumbers (253, 55) = XZ.decompress Nothing
lookAtMagicNumbers _ = doNothing
doNothing :: Monad m => ConduitT a a m ()
doNothing = Data.Conduit.Combinators.filter (const True)

View File

@ -35,17 +35,17 @@ pureSmartSource _ (FileNameSpec fileName) = sourceFile fileName
pureSmartSource _ (FilePathSpec fileName) = sourceFile fileName
pureSmartSource [] (PossiblyGitSpec spec) = sourceFile spec
pureSmartSource (firstDir:_) (PossiblyGitSpec spec) = sourceFile (firstDir </> spec)
pureSmartSource _ (Https url) = httpSource url
pureSmartSource _ (Http url) = httpSource url
--pureSmartSource _ (Https url) = httpSource url
--pureSmartSource _ (Http url) = httpSource url
httpSource :: MonadResource m => String -> ConduitM () S.ByteString m ()
httpSource url = do
request <- liftIO $ parseRequest url
manager <- liftIO $ newManager tlsManagerSettings
response <- lift $ http request manager
(httpsource, finalizer) <- lift $ unwrapResumable (responseBody response)
httpsource
lift finalizer
-- httpSource :: MonadResource m => String -> ConduitM () S.ByteString m ()
-- httpSource url = do
-- request <- liftIO $ parseRequest url
-- manager <- liftIO $ newManager tlsManagerSettings
-- response <- lift $ http request manager
-- (httpsource, finalizer) <- lift $ unwrapResumable (responseBody response)
-- httpsource
-- lift finalizer
parseSmartSpec :: FilePath -> SmartSpec
parseSmartSpec "" = NoSpec

111
src/GEval/BIO.hs Normal file
View File

@ -0,0 +1,111 @@
{-# LANGUAGE OverloadedStrings #-}
module GEval.BIO
(BIOLabel(..), bioSequenceParser, parseBioSequenceIntoEntities,
TaggedSpan(..), TaggedEntity(..), gatherCountsForBIO,
eraseNormalisation)
where
import GEval.PrecisionRecall
import qualified Data.Text as T
import Data.Attoparsec.Text
import Data.Attoparsec.Combinator
import Control.Applicative
import Data.Char
import Data.Maybe (catMaybes)
import GEval.Common
data BIOLabel = Outside | Beginning T.Text (Maybe T.Text) | Inside T.Text (Maybe T.Text)
deriving (Eq, Show)
formatBioLabel :: BIOLabel -> T.Text
formatBioLabel Outside = "O"
formatBioLabel (Beginning label Nothing) = T.concat ["B-", label]
formatBioLabel (Beginning label (Just normalized)) = T.concat ["B-", label, "/", normalized]
formatBioLabel (Inside label Nothing) = T.concat ["I-", label]
formatBioLabel (Inside label (Just normalized)) = T.concat ["I-", label, "/", normalized]
data TaggedSpan = TaggedSpan Int Int
deriving (Eq, Show)
data TaggedEntity = TaggedEntity TaggedSpan T.Text (Maybe T.Text)
deriving (Eq, Show)
eraseNormalisation :: TaggedEntity -> TaggedEntity
eraseNormalisation (TaggedEntity span label normalized) = (TaggedEntity span label Nothing)
gatherCountsForBIO :: [TaggedEntity] -> [TaggedEntity] -> (Int, Int, Int)
gatherCountsForBIO expected got = (maxMatchOnOrdered laterThan expected got, length expected, length got)
where
laterThan (TaggedEntity (TaggedSpan a _) _ _) (TaggedEntity (TaggedSpan b _) _ _) = a > b
parseBioSequenceIntoEntities :: T.Text -> Either String [TaggedEntity]
parseBioSequenceIntoEntities t = labelsIntoEntities =<< (parseOnly (bioSequenceParser <* endOfInput) t)
labelsIntoEntities :: [BIOLabel] -> Either String [TaggedEntity]
labelsIntoEntities labels = labelsIntoEntities' $ zip labels [1..]
labelsIntoEntities' :: [(BIOLabel, Int)] -> Either String [TaggedEntity]
labelsIntoEntities' labelsWithPositions = mapM labelSplitToEntity labelsGathered
where labelsGathered = splitLabels labelsWithPositions
labelSplitToEntity :: [(BIOLabel, Int)] -> Either String TaggedEntity
labelSplitToEntity labs@(h@(_,begIx):t) = if isBeginning h && all (\tp -> isInside tp && tt tp == btp) t
then
Right $ TaggedEntity (TaggedSpan begIx lastItemIx) btp mNormalized
else
Left $ "inconsistent label sequence `" ++ (T.unpack $ T.intercalate " " $ map (formatBioLabel . fst) labs) ++ "`"
where isBeginning (Beginning _ _, _) = True
isBeginning _ = False
isInside (Inside _ _, _) = True
isInside _ = False
tt (Beginning t _, _) = t
tt (Inside t _, _) = t
btp = tt h
lastItemIx = case t of
[] -> begIx
_ -> let (_, ix) = last t
in ix
normalized (Beginning _ n, _) = n
normalized (Inside _ n, _) = n
mNormalized = if all (\tp -> normalized tp == Nothing) labs
then
Nothing
else
Just $ T.intercalate "_" $ catMaybes $ map normalized labs
splitLabels :: [(BIOLabel, Int)] -> [[(BIOLabel, Int)]]
splitLabels [] = []
splitLabels ((Outside, _):r) = splitLabels r
splitLabels (e@(_, ix):r) =
case splitLabels r of
l@(((Beginning _ _, _):_):_) -> ([e]:l)
(s@((Inside _ _, ix'):_):l) -> if ix' == ix + 1
then
((e:s):l)
else
([e]:(s:l))
[] -> [[e]]
bioSequenceParser :: Parser [BIOLabel]
bioSequenceParser = sepByWhitespaces bioLabelParser
bioLabelParser :: Parser BIOLabel
bioLabelParser =
(string "O" *> pure Outside) <|>
(do
labelType <- bioMarkerParser
(string "-" <|> string "_")
label <- takeWhile1 (\c -> not (isSpace c) && c /= '/')
normalized <- (do
string "/"
normalized <- takeWhile1 (not . isSpace)
return $ Just normalized) <|> pure Nothing
return $ labelType label normalized)
bioMarkerParser :: Parser (T.Text -> Maybe T.Text -> BIOLabel)
bioMarkerParser =
(string "B" *> pure Beginning) <|> (string "I" *> pure Inside)

View File

@ -38,3 +38,7 @@ sepByWhitespaces parser = possibleWhitespace *> parser `sepBy` whitespace <* pos
possibleWhitespace = many' (satisfy isHorizontalSpace)
whitespace = many1 (satisfy isHorizontalSpace)
indicator :: Bool -> Double
indicator True = 1.0
indicator False = 0.0

View File

@ -16,6 +16,7 @@ module GEval.Core
MetricValue,
GEvalSpecialCommand(..),
GEvalSpecification(..),
ResultOrdering(..),
GEvalOptions(..),
GEvalException(..),
defaultGEvalSpecification,
@ -53,10 +54,11 @@ import qualified System.Directory as D
import System.Posix
import System.FilePath
import Data.Maybe
import Data.Tuple
import qualified Data.List.Split as DLS
import Control.Monad.IO.Class
import Control.Monad ((<=<))
import Control.Monad ((<=<), filterM)
import Data.Attoparsec.Text (parseOnly)
@ -69,6 +71,8 @@ import GEval.PrecisionRecall
import GEval.ClusteringMetrics
import GEval.LogLossHashed
import GEval.CharMatch
import GEval.BIO
import Data.Conduit.AutoDecompress
import qualified Data.HashMap.Strict as M
@ -82,7 +86,7 @@ defaultLogLossHashedSize :: Word32
defaultLogLossHashedSize = 10
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch
| MAP | LogLoss
| MAP | LogLoss | Likelihood | BIOF1 | BIOF1Labels | LikelihoodHashed Word32
deriving (Eq)
instance Show Metric where
@ -99,9 +103,18 @@ instance Show Metric where
""
else
(show nbOfBits))
show (LikelihoodHashed nbOfBits) = "LikelihoodHashed" ++ (if
nbOfBits == defaultLogLossHashedSize
then
""
else
(show nbOfBits))
show CharMatch = "CharMatch"
show MAP = "MAP"
show LogLoss = "LogLoss"
show Likelihood = "Likelihood"
show BIOF1 = "BIO-F1"
show BIOF1Labels = "BIO-F1-Labels"
instance Read Metric where
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
@ -116,9 +129,15 @@ instance Read Metric where
readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
[(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
_ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
readsPrec p ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
[(nbOfBits, theRest)] -> [(LikelihoodHashed nbOfBits, theRest)]
_ -> [(LikelihoodHashed defaultLogLossHashedSize, theRest)]
readsPrec _ ('L':'o':'g':'L':'o':'s':'s':theRest) = [(LogLoss, theRest)]
readsPrec _ ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(Likelihood, theRest)]
readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
@ -131,9 +150,13 @@ getMetricOrdering ClippEU = TheHigherTheBetter
getMetricOrdering (FMeasure _) = TheHigherTheBetter
getMetricOrdering NMI = TheHigherTheBetter
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
getMetricOrdering CharMatch = TheHigherTheBetter
getMetricOrdering MAP = TheHigherTheBetter
getMetricOrdering LogLoss = TheLowerTheBetter
getMetricOrdering Likelihood = TheHigherTheBetter
getMetricOrdering BIOF1 = TheHigherTheBetter
getMetricOrdering BIOF1Labels = TheHigherTheBetter
defaultOutDirectory = "."
defaultTestName = "test-A"
@ -163,11 +186,13 @@ getExpectedDirectory spec = fromMaybe outDirectory $ gesExpectedDirectory spec
data GEvalSpecialCommand = Init | LineByLine | Diff FilePath
data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest
data GEvalOptions = GEvalOptions
{ geoSpecialCommand :: Maybe GEvalSpecialCommand,
geoResultOrdering :: ResultOrdering,
geoSpec :: GEvalSpecification }
data GEvalException = NoExpectedFile FilePath
| NoOutFile FilePath
| NoExpectedDirectory FilePath
@ -236,11 +261,14 @@ checkAndGetFiles gevalSpec = do
unlessM (D.doesDirectoryExist expectedDirectory) $ throwM $ NoExpectedDirectory expectedDirectory
unlessM (D.doesDirectoryExist outTestDirectory) $ throwM $ NoOutTestDirectory outTestDirectory
unlessM (D.doesDirectoryExist expectedTestDirectory) $ throwM $ NoExpectedTestDirectory expectedTestDirectory
inputFilePath <- lookForCompressedFiles inputFilePath'
expectedFilePath <- lookForCompressedFiles expectedFilePath'
outFilePath <- lookForCompressedFiles outFilePath'
checkInputFileIfNeeded metric inputFilePath
return (inputFilePath, expectedFilePath, outFilePath)
where expectedFilePath = expectedTestDirectory </> (gesExpectedFile gevalSpec)
outFilePath = getOutFile gevalSpec (gesOutFile gevalSpec)
inputFilePath = expectedTestDirectory </> (gesInputFile gevalSpec)
where expectedFilePath' = expectedTestDirectory </> (gesExpectedFile gevalSpec)
outFilePath' = getOutFile gevalSpec (gesOutFile gevalSpec)
inputFilePath' = expectedTestDirectory </> (gesInputFile gevalSpec)
expectedTestDirectory = expectedDirectory </> testName
outTestDirectory = outDirectory </> testName
expectedDirectory = getExpectedDirectory gevalSpec
@ -248,6 +276,24 @@ checkAndGetFiles gevalSpec = do
testName = gesTestName gevalSpec
metric = gesMetric gevalSpec
lookForCompressedFiles :: FilePath -> IO FilePath
lookForCompressedFiles = lookForAlternativeFiles [".gz", ".xz", ".bz2"]
lookForAlternativeFiles :: [String] -> FilePath -> IO FilePath
lookForAlternativeFiles suffixes filePath
| takeExtension filePath `Prelude.elem` suffixes = return filePath
| otherwise = do
fileIsThere <- D.doesFileExist filePath
if fileIsThere
then
return filePath
else
do
found <- Control.Monad.filterM D.doesFileExist $ Prelude.map (filePath <.>) suffixes
return $ case found of
[fp] -> fp
_ -> filePath
getOutFile :: GEvalSpecification -> FilePath -> FilePath
getOutFile gevalSpec out = outDirectory </> testName </> out
where outDirectory = gesOutDirectory gevalSpec
@ -261,7 +307,7 @@ checkInputFileIfNeeded _ _ = return ()
fileAsLineSource :: FilePath -> LineSource (ResourceT IO)
fileAsLineSource filePath =
LineSource (smartSource [] Nothing (parseSmartSpec filePath) $= CT.decodeUtf8Lenient =$= CT.lines) filePath 1
LineSource (smartSource [] Nothing (parseSmartSpec filePath) $= autoDecompress $= CT.decodeUtf8Lenient =$= CT.lines) filePath 1
gevalCoreOnSingleLines :: Metric -> LineInFile -> LineInFile -> LineInFile -> IO (MetricValue)
gevalCoreOnSingleLines metric inpLine expLine outLine =
@ -283,7 +329,9 @@ gevalCore metric inputFilePath expectedFilePath outFilePath = do
(fileAsLineSource expectedFilePath)
(fileAsLineSource outFilePath)
gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric
logLossToLikehood logLoss = exp (-logLoss)
gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Metric
-> LineSource (ResourceT m)
-> LineSource (ResourceT m)
-> LineSource (ResourceT m)
@ -292,12 +340,20 @@ gevalCoreOnSources RMSE inputLineSource expectedLineSource outLineSource = do
mse <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource
return $ mse ** 0.5
gevalCoreOnSources Likelihood inputLineSource expectedLineSource outLineSource = do
logLoss <- gevalCoreOnSources LogLoss inputLineSource expectedLineSource outLineSource
return $ logLossToLikehood logLoss
gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLineSource = do
logLoss <- gevalCoreOnSources (LogLossHashed b) inputLineSource expectedLineSource outLineSource
return $ logLossToLikehood logLoss
gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
gevalCore' metric inputLineSource expectedLineSource outLineSource
data LineInFile = LineInFile FilePath Word32 Text
gevalCore' :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
gevalCore' :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemError averageC id
where outParser = getValue . TR.double
@ -315,9 +371,18 @@ gevalCore' BLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . D
| otherwise = exp (1.0 - (r /. c))
gevalCore' Accuracy _ = gevalCoreWithoutInput (Right . strip) (Right . strip) hitOrMiss averageC id
where hitOrMiss (exp,got) = if (normalizeProbForAccuracy exp got) == exp then 1.0 else 0.0
-- if the expected value is 0 or 1 treat values between 0.0 and 1.0 as probabilities
-- for the positive outcome
where hitOrMiss (exp, got) =
-- first try to parse what we got as a probability distribution
-- (like the one used for Likelikehood/LogLossHashed metric)
case parseWordSpecs got of
Right wordSpecs -> if Prelude.null pairs
then 0.0
else indicator (exp == (snd $ Prelude.maximum pairs))
where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs
Left _ -> indicator ((normalizeProbForAccuracy exp got) == exp)
-- if the expected value is 0 or 1 treat values
-- between 0.0 and 1.0 as probabilities
-- for the positive outcome
normalizeProbForAccuracy :: Text -> Text -> Text
normalizeProbForAccuracy exp got
| exp == (pack "1") = case tryReadingAsFloat got of
@ -383,7 +448,16 @@ gevalCore' CharMatch inputLineSource = helper inputLineSource
helper inputLineSource expectedLineSource outputLineSource = do
gevalCoreGeneralized (ParserSpecWithInput (Right . unpack) (Right . unpack) (Right . unpack)) step countAgg (fMeasureOnCounts charMatchBeta) (WithInput inputLineSource expectedLineSource outputLineSource)
step (ParsedRecordWithInput inp exp out) = getCharMatchCount inp exp out
countAgg = CC.foldl countFolder (0, 0, 0)
gevalCore' BIOF1 _ = gevalCoreWithoutInput parseBioSequenceIntoEntities parseBioSequenceIntoEntities (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts
gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWithoutNormalization parseBioSequenceIntoEntitiesWithoutNormalization (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts
where parseBioSequenceIntoEntitiesWithoutNormalization s = do
entities <- parseBioSequenceIntoEntities s
return $ Prelude.map eraseNormalisation entities
countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
countAgg = CC.foldl countFolder (0, 0, 0)
parseDistributionWrapper :: Word32 -> Word32 -> Text -> HashedDistribution
parseDistributionWrapper nbOfBits seed distroSpec = case parseDistribution nbOfBits seed distroSpec of
@ -395,25 +469,25 @@ data SourceItem a = Got a | Wrong String | Done
skipLineNumber :: (x -> c) -> ((Word32, x) -> c)
skipLineNumber fun = fun . snd
gevalCoreWithoutInput :: (MonadBaseControl IO m, MonadThrow m, MonadIO m) => (Text -> Either String a) -> (Text -> Either String b) -> ((a, b) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
gevalCoreWithoutInput :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => (Text -> Either String a) -> (Text -> Either String b) -> ((a, b) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
gevalCoreWithoutInput expParser outParser itemStep aggregator finalStep expectedLineStream outLineStream =
gevalCoreGeneralized (ParserSpecWithoutInput expParser outParser) (trans itemStep) aggregator finalStep (WithoutInput expectedLineStream outLineStream)
where
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
trans step (ParsedRecordWithoutInput x y) = step (x, y)
gevalCore''' :: (MonadBaseControl IO m, MonadThrow m, MonadIO m) => ParserSpec (WithoutInput m a b) -> ((Word32, (a, b)) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> WithoutInput m a b -> m (MetricValue)
gevalCore''' :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec (WithoutInput m a b) -> ((Word32, (a, b)) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> WithoutInput m a b -> m (MetricValue)
gevalCore''' parserSpec itemStep aggregator finalStep context =
gevalCoreGeneralized' parserSpec (trans itemStep) aggregator finalStep context
where
trans :: ((Word32, (a, b)) -> c) -> (Word32, ParsedRecord (WithoutInput m a b)) -> c
trans step (n, ParsedRecordWithoutInput x y) = step (n, (x, y))
gevalCoreGeneralized :: (EvaluationContext ctxt m, MonadBaseControl IO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> (ParsedRecord ctxt -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
gevalCoreGeneralized :: (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> (ParsedRecord ctxt -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
gevalCoreGeneralized parserSpec itemStep aggregator finalStep context =
gevalCoreGeneralized' parserSpec (skipLineNumber itemStep) aggregator finalStep context
gevalCoreGeneralized' :: forall m ctxt c d . (EvaluationContext ctxt m, MonadBaseControl IO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> ((Word32, ParsedRecord ctxt) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
gevalCoreGeneralized' :: forall m ctxt c d . (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> ((Word32, ParsedRecord ctxt) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
gevalCoreGeneralized' parserSpec itemStep aggregator finalStep context = do
v <- runResourceT $
(((getZipSource $ (,)
@ -434,7 +508,7 @@ class EvaluationContext ctxt m where
data WithoutInput m e o = WithoutInput (LineSource (ResourceT m)) (LineSource (ResourceT m))
instance (MonadBaseControl IO m, MonadIO m, MonadThrow m) => EvaluationContext (WithoutInput m e o) m where
instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithoutInput m e o) m where
data ParserSpec (WithoutInput m e o) = ParserSpecWithoutInput (Text -> Either String e) (Text -> Either String o)
data WrappedParsedRecord (WithoutInput m e o) = WrappedParsedRecordWithoutInput (SourceItem e) (SourceItem o)
data ParsedRecord (WithoutInput m e o) = ParsedRecordWithoutInput e o
@ -463,7 +537,7 @@ data WithInput m i e o = WithInput (LineSource (ResourceT m)) (LineSource (Resou
getInputFilePath (WithInput (LineSource _ inputFilePath _) _ _) = inputFilePath
instance (MonadBaseControl IO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where
instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where
data ParserSpec (WithInput m i e o) = ParserSpecWithInput (Text -> Either String i) (Text -> Either String e) (Text -> Either String o)
data WrappedParsedRecord (WithInput m i e o) = WrappedParsedRecordWithInput (SourceItem i) (SourceItem e) (SourceItem o)
data ParsedRecord (WithInput m i e o) = ParsedRecordWithInput i e o

View File

@ -101,6 +101,8 @@ Cluster proverbs for languages.
This is a sample challenge for flat clustering (unsupervised learning challenge).
|] ++ (commonReadmeMDContents testName)
readmeMDContents (LikelihoodHashed b) testname = readmeMDContents (LogLossHashed b) testname
readmeMDContents (LogLossHashed _) testName = [i|
GEval sample challenge language model evaluation
==================================================
@ -113,6 +115,39 @@ The metric is average log-loss calculated for 10-bit hashes.
Train file is a just text file (one utterance per line).
In an input file, left and right contexts (TAB-separated) are given.
In an expected file, the word to be guessed is given.
Format of the output files
--------------------------
For each input line, a probability distribution for words in a gap
must be given:
word1:logprob1 word2:logprob2 ... wordN:logprobN :logprob0
where *logprobi* is the logarithm of the probability for *wordi* and
*logprob0* is the logarithm of the probability mass for all the other
words (it will be spread between all 1024 fingerprint values). If the
respective probabilities do not sum up to 1:
* if the sum is larger than 0.0 and smaller than 1.0, and no logprob0
is given, log of the remaining probablity mass will be assigned to logprob0,
* otherwise they will be normalised with.
softmax
Note: the separator here is space, not TAB!
### Probs
Probabilities could be given (instead of logprobs):
* if **all** values look as probs and **at least value** is positive, we treat
the values as probs rather then logprobs (single value 0.0 is treated
as a logprob, i.e. probability 1.0!);
* if their sum is greater than 1.0, then we normalize simply by dividing by the sum;
* if the sum is smaller than 1.0 and there is no entry for all the other words,
we add such an entry for the missing probability mass;
* if the sum is smaller than 1.0 and there is an entry for all the other words,
we normalize by dividing by the sum.
|] ++ (commonReadmeMDContents testName)
readmeMDContents CharMatch testName = [i|
@ -170,6 +205,29 @@ This a sample challenge for the log-loss metric.
|] ++ (commonReadmeMDContents testName)
readmeMDContents Likelihood testName = [i|
Give the probability of a positive sentiment
============================================
Give the probability that a sentence expresses a positive sentiment.
This a sample challenge for the likelihood metric.
|] ++ (commonReadmeMDContents testName)
readmeMDContents BIOF1Labels testName = readmeMDContents BIOF1 testName
readmeMDContents BIOF1 testName = [i|
Tag and normalize names
=======================
Tag names in the tokenized text and normalized them.
The output should be given in the BIO format with the normalized forms given after slashes (see
`dev-0/expected.tsv` for an example).
The metric is F1 counted on entities (not labels).
|] ++ (commonReadmeMDContents testName)
readmeMDContents _ testName = [i|
GEval sample challenge
======================
@ -239,6 +297,7 @@ trainContents NMI = [hereLit|pl Kto pod kim dołki kopie, ten sam w nie wpada.
en The pen is mightier than the sword.
pl Baba z wozu, koniom lżej.
|]
trainContents (LikelihoodHashed b) = trainContents (LogLossHashed b)
trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota
Basia ma psa
Nie kupujemy kota w worku
@ -254,11 +313,17 @@ honour GB honor
titbit GB smakołyk
tidbit US smakołyk
|]
trainContents Likelihood = trainContents LogLoss
trainContents LogLoss = [hereLit|0.0 Hell, no!!!
0.0 I hate this stuff
1.0 Lekker!!!
0.0 Boring, boring, boring
|]
trainContents BIOF1Labels = trainContents BIOF1
trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surname/BOND My name is Bond , James Bond
O O O O O There is no name here
B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman
|]
trainContents _ = [hereLit|0.06 0.39 0 0.206
1.00 1.00 1 0.017
317.8 5.20 67 0.048
@ -279,6 +344,7 @@ When the going gets tough, the tough get going.
devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0
b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000
|]
devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b)
devInContents (LogLossHashed _) = [hereLit|Nie kupuj w worku
Ona psa
|]
@ -290,10 +356,15 @@ devInContents MAP = [hereLit|US noc
GB wózek dziecięcy
GB wizualizować
|]
devInContents Likelihood = devInContents LogLoss
devInContents LogLoss = [hereLit|Great stuff!
Boring stuff
That's good
|]
devInContents BIOF1Labels = devInContents BIOF1
devInContents BIOF1 = [hereLit|Adam and Eve
Mr Jan Kowalski
|]
devInContents _ = [hereLit|0.72 0 0.007
9.54 62 0.054
|]
@ -312,6 +383,7 @@ devExpectedContents NMI = [hereLit|en
pl
en
|]
devExpectedContents (LikelihoodHashed b) = devExpectedContents (LogLossHashed b)
devExpectedContents (LogLossHashed _) = [hereLit|kota
ma
|]
@ -323,10 +395,15 @@ devExpectedContents MAP = [hereLit|night nite
pram
visualise
|]
devExpectedContents Likelihood = devExpectedContents LogLoss
devExpectedContents LogLoss = [hereLit|1.0
0.0
1.0
|]
devExpectedContents BIOF1Labels = devExpectedContents BIOF1
devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE
O B-firstname/JAN B-surname/KOWALSKI
|]
devExpectedContents _ = [hereLit|0.82
95.2
|]
@ -347,6 +424,7 @@ W marcu, jak w garncu.
A cada necio agrada su porrada.
Kwiecień plecień, bo przeplata trochę zimy, trochę lata.
|]
testInContents (LikelihoodHashed b) = testInContents (LogLossHashed b)
testInContents (LogLossHashed _) = [hereLit|Ala ma
Ona ma kota worku
|]
@ -358,10 +436,15 @@ testInContents MAP = [hereLit|US wózek dziecięcy
GB słoń
US słoń
|]
testInContents Likelihood = testInContents LogLoss
testInContents LogLoss = [hereLit|That's great, ha, ha, I love it!
Super-duper!!
That is incredibly boring.
|]
testInContents BIOF1Labels = testInContents BIOF1
testInContents BIOF1 = [hereLit|Alan Tring
No name here
|]
testInContents _ = [hereLit|1.52 2 0.093
30.06 14 0.009
|]
@ -382,6 +465,7 @@ pl
es
pl
|]
testExpectedContents (LikelihoodHashed b) = testExpectedContents (LogLossHashed b)
testExpectedContents (LogLossHashed _) = [hereLit|ma
w
|]
@ -393,10 +477,15 @@ testExpectedContents MAP = [hereLit|trolley
elephant
elephant
|]
testExpectedContents Likelihood = testExpectedContents LogLoss
testExpectedContents LogLoss = [hereLit|1.0
1.0
0.0
|]
testExpectedContents BIOF1Labels = testExpectedContents BIOF1
testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING
O O O
|]
testExpectedContents _ = [hereLit|0.11
17.2
|]

View File

@ -9,17 +9,25 @@
module GEval.LineByLine
(runLineByLine,
runDiff
runLineByLineGeneralized,
runDiff,
runDiffGeneralized,
LineRecord(..),
ResultOrdering(..)
) where
import GEval.Core
import Data.Conduit.AutoDecompress (doNothing)
import Data.Conduit
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Combinators as CC
import Data.Text
import Data.Text.Encoding
import Data.List (sortBy, sort)
import Control.Monad.IO.Class
import Control.Monad.Trans.Resource
@ -30,13 +38,10 @@ import Text.Printf
data LineRecord = LineRecord Text Text Text Word32 MetricValue
deriving (Eq, Show)
runLineByLine :: GEvalSpecification -> IO ()
runLineByLine spec = do
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum
where metric = gesMetric spec
consum :: Consumer LineRecord (ResourceT IO) ()
consum = (CL.map (encodeUtf8 . formatOutput) =$= CC.unlinesAscii =$= CC.stdout)
runLineByLine :: ResultOrdering -> GEvalSpecification -> IO ()
runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum
where consum :: ConduitT LineRecord Void (ResourceT IO) ()
consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
formatScore score,
escapeTabs inp,
@ -45,19 +50,27 @@ runLineByLine spec = do
formatScore :: MetricValue -> Text
formatScore = Data.Text.pack . printf "%f"
runDiff :: FilePath -> GEvalSpecification -> IO ()
runDiff otherOut spec = do
let otherOutFilePath = getOutFile spec otherOut
runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
runLineByLineGeneralized ordering spec consum = do
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath
let sourceB = gevalLineByLineSource metric inputFilePath expectedFilePath otherOutFilePath
runResourceT $
((getZipSource $ (,)
<$> ZipSource sourceA
<*> ZipSource sourceB) $$ consum)
gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath (sorter ordering .| consum)
where metric = gesMetric spec
consum :: Consumer (LineRecord, LineRecord) (ResourceT IO) ()
consum = (CL.filter shouldBeShown =$= CL.map (encodeUtf8 . formatOutput) =$= CC.unlinesAscii =$= CC.stdout)
sorter KeepTheOriginalOrder = doNothing
sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
sortOrder FirstTheWorst TheHigherTheBetter = compareScores
sortOrder FirstTheBest TheLowerTheBetter = compareScores
sortOrder _ _ = flip compareScores
compareScores (LineRecord _ _ _ _ s1) (LineRecord _ _ _ _ s2) = s1 `compare` s2
gobbleAndDo :: Monad m => ([a] -> [b]) -> ConduitT a b m ()
gobbleAndDo fun = do
l <- CC.sinkList
CC.yieldMany $ fun l
runDiff :: ResultOrdering -> FilePath -> GEvalSpecification -> IO ()
runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consum
where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) =
outA /= outB && scoreA /= scoreB
formatOutput (LineRecord inp exp outA _ scoreA, LineRecord _ _ outB _ scoreB) = Data.Text.intercalate "\t" [
@ -69,19 +82,40 @@ runDiff otherOut spec = do
formatScoreDiff :: Double -> Text
formatScoreDiff = Data.Text.pack . printf "%f"
runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a
runDiffGeneralized ordering otherOut spec consum = do
let otherOutFilePath = getOutFile spec otherOut
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath
let sourceB = gevalLineByLineSource metric inputFilePath expectedFilePath otherOutFilePath
runResourceT $ runConduit $
((getZipSource $ (,)
<$> ZipSource sourceA
<*> ZipSource sourceB) .| sorter ordering .| consum)
where metric = gesMetric spec
sorter KeepTheOriginalOrder = doNothing
sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
sortOrder FirstTheWorst TheHigherTheBetter = compareScores
sortOrder FirstTheBest TheLowerTheBetter = compareScores
sortOrder _ _ = flip compareScores
compareScores ((LineRecord _ _ _ _ o1), (LineRecord _ _ _ _ n1))
((LineRecord _ _ _ _ o2), (LineRecord _ _ _ _ n2))
= (n1 - o1) `compare` (n2 - o2)
escapeTabs :: Text -> Text
escapeTabs = Data.Text.replace "\t" "<tab>"
gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> Sink LineRecord (ResourceT IO) () -> IO ()
gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum =
runResourceT $
((gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath) $$ consum)
runResourceT $ runConduit $
((gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath) .| consum)
gevalLineByLineSource :: Metric -> FilePath -> FilePath -> FilePath -> Source (ResourceT IO) LineRecord
gevalLineByLineSource :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT () LineRecord (ResourceT IO) ()
gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath =
(getZipSource $ (,)
<$> ZipSource (CL.sourceList [1..])
<*> (ZipSource $ recordSource context parserSpec)) =$= CL.mapM (checkStepM evaluateLine) =$= CL.catMaybes
<*> (ZipSource $ recordSource context parserSpec)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes
where parserSpec = (ParserSpecWithInput (Right . id) (Right . id) (Right . id))
context = (WithInput inputLineSource expectedLineSource outputLineSource)
inputLineSource = fileAsLineSource inputFilePath

View File

@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
module GEval.LogLossHashed
(HashedDistribution, parseDistribution, calculateLogLoss)
(HashedDistribution, parseDistribution, calculateLogLoss, parseWordSpecs, wordSpecToPair)
where
import qualified Data.Vector as V
@ -36,24 +36,41 @@ parseDistribution nbOfBits seed distroSpec =
-- a direct list of 2^nbOfBits log probs
else parseDistributionFromLogProbList nbOfBits distroSpec
isProbTotalIncorrect :: Double -> Bool
isProbTotalIncorrect probTotal = probTotal > 1.0 || probTotal < (1.0 - epsilon)
where epsilon = 0.00000001
normalizeDistribution :: HashedDistribution -> HashedDistribution
normalizeDistribution distro =
-- we do softmax (if needed)
if probSum > 1.0 || probSum < (1.0 - epsilon)
if isProbTotalIncorrect probSum
then normalized
else distro
where probSum = V.foldl' (\s l -> (s + exp l)) 0.0 distro
normalized = V.map (\v -> log ((exp v) / probSum)) distro
epsilon = 0.00000001
type DistroMonad s = ReaderT (VM.MVector s Double) (ST s)
data WordSpec = AnyWord | SpecificWord T.Text
deriving (Eq, Show)
isAnyWord AnyWord = True
isAnyWord _ = False
data WordSpecWithLogProb = WordSpecWithLogProb WordSpec Double
wordSpecToPair :: WordSpecWithLogProb -> Maybe (Double, T.Text)
wordSpecToPair (WordSpecWithLogProb AnyWord _) = Nothing
wordSpecToPair (WordSpecWithLogProb (SpecificWord w) lp) = Just (lp, w)
parseDistributionFromWordList :: Word32 -> Word32 -> T.Text -> Either String HashedDistribution
parseDistributionFromWordList nbOfBits seed distroSpec = (parseDistributionFromWordList' nbOfBits seed) =<< (
processEithers $ map getWordSpecWithLogProb $ T.splitOn " " distroSpec)
parseDistributionFromWordList nbOfBits seed distroSpec = (parseDistributionFromWordList' nbOfBits seed) =<<
normalizeLogProbs =<<
lookForProbs =<<
(parseWordSpecs distroSpec)
parseWordSpecs :: T.Text -> Either String [WordSpecWithLogProb]
parseWordSpecs distroSpec = processEithers $ map getWordSpecWithLogProb $ T.splitOn " " distroSpec
getWordSpecWithLogProb :: T.Text -> Either String WordSpecWithLogProb
getWordSpecWithLogProb t =
@ -77,6 +94,42 @@ parseDistributionFromWordList' nbOfBits seed specs = runST $ do
frozen <- V.freeze emp
return $ Right frozen
lookForProbs :: [WordSpecWithLogProb] -> Either String [WordSpecWithLogProb]
lookForProbs specs
| areProbs specs = Right $ toLogProbs $ normalizeProbs specs
| otherwise = Right $ specs
areProbs :: [WordSpecWithLogProb] -> Bool
areProbs specs = all isProb specs && any isPositiveProb specs
where isProb (WordSpecWithLogProb _ v) = v >= 0.0 && v <= 1.0
isPositiveProb (WordSpecWithLogProb _ p) = p > 0.0 && p <= 1.0
toLogProbs :: [WordSpecWithLogProb] -> [WordSpecWithLogProb]
toLogProbs = map (\(WordSpecWithLogProb w p) -> (WordSpecWithLogProb w (log p)))
normalizeLogProbs :: [WordSpecWithLogProb] -> Either String [WordSpecWithLogProb]
normalizeLogProbs specs = if isProbTotalIncorrect probTotal
&& probTotal < 1.0 && probTotal > 0.0
&& not (any (\(WordSpecWithLogProb w _) -> isAnyWord w) specs)
&& all (\(WordSpecWithLogProb _ lp) -> lp <= 0) specs
then
Right ((WordSpecWithLogProb AnyWord (log (1-probTotal))):specs)
else
Right specs
where probTotal = sum $ map (\(WordSpecWithLogProb _ logp) -> exp logp) specs
normalizeProbs :: [WordSpecWithLogProb] -> [WordSpecWithLogProb]
normalizeProbs specs = if isProbTotalIncorrect probTotal
then
if probTotal > 1.0 || any (\(WordSpecWithLogProb w _) -> isAnyWord w) specs
then
map (\(WordSpecWithLogProb w p) -> WordSpecWithLogProb w (p / probTotal)) specs
else
((WordSpecWithLogProb AnyWord (1-probTotal)):specs)
else
specs
where probTotal = sum $ map (\(WordSpecWithLogProb _ p) -> p) specs
addSpecs :: Word32 -> Word32 -> [WordSpecWithLogProb] -> DistroMonad s ()
addSpecs nbOfBits seed = mapM_ (updateDistro nbOfBits seed)

View File

@ -41,6 +41,16 @@ optionsParser = GEvalOptions
<> short 'd'
<> metavar "OTHER-OUT"
<> help "compare results")))
<*> ((flag' FirstTheWorst
(long "sort"
<> short 's'
<> help "When in line-by-line or diff mode, sort the results from the worst to the best"))
<|>
(flag' FirstTheBest
(long "reverse-sort"
<> short 'r'
<> help "When in line-by-line or diff mode, sort the results from the best to the worst"))
<|> pure KeepTheOriginalOrder)
<*> specParser
precisionArgParser :: Parser Int
@ -90,9 +100,13 @@ specParser = GEvalSpecification
<> showDefault
<> metavar "INPUT"
<> help "The name of the file with the input (applicable only for some metrics)" )
<*> metricReader
<*> ((flip fromMaybe) <$> altMetricReader <*> metricReader)
<*> optional precisionArgParser
sel :: Maybe Metric -> Metric -> Metric
sel Nothing m = m
sel (Just m) _ = m
metricReader :: Parser Metric
metricReader = option auto
( long "metric"
@ -100,7 +114,14 @@ metricReader = option auto
<> value defaultMetric
<> showDefault
<> metavar "METRIC"
<> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed or CharMatch" )
<> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels or CharMatch" )
altMetricReader :: Parser (Maybe Metric)
altMetricReader = optional $ option auto
( long "alt-metric"
<> short 'a'
<> metavar "METRIC"
<> help "Alternative metric (overrides --metric option)" )
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
runGEval args = do
@ -145,20 +166,20 @@ attemptToReadOptsFromConfigFile args opts = do
runGEval'' :: GEvalOptions -> IO (Maybe MetricValue)
runGEval'' opts = runGEval''' (geoSpecialCommand opts) (geoSpec opts)
runGEval'' opts = runGEval''' (geoSpecialCommand opts) (geoResultOrdering opts) (geoSpec opts)
runGEval''' :: Maybe GEvalSpecialCommand -> GEvalSpecification -> IO (Maybe MetricValue)
runGEval''' Nothing spec = do
runGEval''' :: Maybe GEvalSpecialCommand -> ResultOrdering -> GEvalSpecification -> IO (Maybe MetricValue)
runGEval''' Nothing _ spec = do
val <- geval spec
return $ Just val
runGEval''' (Just Init) spec = do
runGEval''' (Just Init) _ spec = do
initChallenge spec
return Nothing
runGEval''' (Just LineByLine) spec = do
runLineByLine spec
runGEval''' (Just LineByLine) ordering spec = do
runLineByLine ordering spec
return Nothing
runGEval''' (Just (Diff otherOut)) spec = do
runDiff otherOut spec
runGEval''' (Just (Diff otherOut)) ordering spec = do
runDiff ordering otherOut spec
return Nothing
initChallenge :: GEvalSpecification -> IO ()
@ -169,11 +190,9 @@ initChallenge spec = case gesExpectedDirectory spec of
showInitInstructions = do
putStrLn [here|
Run:
geval --init --expected-directory CHALLENGE
geval --init --expected-directory CHALLENGE --metric METRIC-NAME --precision NUMBER-OF-DIGITS
to create a directory CHALLENGE representing a Gonito challenge.
You can specify a metric with `--metric METRIC-NAME` option.
Note that `--out-directory` option is not taken into account with `--init` option.
(Note that `--out-directory` option is not taken into account with `--init` option.)
|]
exitFailure

View File

@ -3,7 +3,7 @@
module GEval.PrecisionRecall(calculateMAPForOneResult,
fMeasure, f1Measure, f2Measure, precision, recall,
fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder,
precisionAndRecall, precisionAndRecallFromCounts, maxMatch)
precisionAndRecall, precisionAndRecallFromCounts, maxMatch, maxMatchOnOrdered)
where
import GEval.Common
@ -65,6 +65,17 @@ precision matchFun expected got = fst $ precisionAndRecall matchFun expected got
recall :: (a -> b -> Bool) -> [a] -> [b] -> Double
recall matchFun expected got = snd $ precisionAndRecall matchFun expected got
maxMatchOnOrdered :: Eq a => (a -> a -> Bool) -> [a] -> [a] -> Int
maxMatchOnOrdered laterThan expected got =
let (matched, _) = foldl' step (0, expected) got
in matched
where step (matched, l@(h:t)) g
| h == g = (matched+1, t)
| h `laterThan` g = (matched, l)
| otherwise = step (matched, t) g
step (matched, []) g = (matched, [])
-- counting maximum match with maximum bipartite matching
-- (we build an auxiliary graph and do a max-flow on this)
maxMatch :: (a -> b -> Bool) -> [a] -> [b] -> Int
@ -72,7 +83,6 @@ maxMatch matchFun expected got = mf
where (b, e, g) = buildGraph matchFun expected got
mf = maxFlow g (fst b) (fst e)
buildGraph :: (a -> b -> Bool) -> [a] -> [b] -> (LNode Int, LNode Int, Gr Int Int)
buildGraph matchFun expected got = (b, e, g)
where ((b, e), (_, g)) = buildGraph' matchFun expected got

View File

@ -1,5 +1,5 @@
flags: {}
packages:
- '.'
extra-deps: [cond-0.4.1.1,murmur3-1.0.3]
resolver: lts-9.14
extra-deps: [murmur3-1.0.3]
resolver: lts-11.9

View File

@ -8,11 +8,15 @@ import GEval.BLEU
import GEval.ClippEU
import GEval.PrecisionRecall
import GEval.ClusteringMetrics
import GEval.BIO
import GEval.LineByLine
import Data.Attoparsec.Text
import Options.Applicative
import Data.Text
import Text.EditDistance
import Data.Conduit.List (consume)
import qualified Test.HUnit as HU
import Data.Conduit.SmartSource
@ -101,6 +105,16 @@ main = hspec $ do
runGEvalTest "log-loss-hashed-simple" `shouldReturnAlmost` 2.398479083333333
it "example with unnormalized values" $ do
runGEvalTest "log-loss-hashed-not-normalized" `shouldReturnAlmost` 1.0468455186722887
it "with probs instead of log probs" $ do
runGEvalTest "log-loss-hashed-probs" `shouldReturnAlmost` 4.11631293099392
it "with probs instead of log probs (with normalization)" $ do
runGEvalTest "log-loss-hashed-probs-normalized" `shouldReturnAlmost` 1.55537749098853
it "with log probs whose probs are summing up to less than 1.0" $ do
runGEvalTest "log-loss-hashed-normalization" `shouldReturnAlmost` 5.16395069238851
describe "LikelihoodHashed challenge" $ do
it "example with unnormalized values" $ do
runGEvalTest "likelihood-hashed-not-normalized" `shouldReturnAlmost` 0.351043364110715
describe "reading options" $ do
it "can get the metric" $ do
extractMetric "bleu-complex" `shouldReturn` (Just BLEU)
@ -188,11 +202,105 @@ main = hspec $ do
runGEvalTest "logloss-simple" `shouldReturnAlmost` 0.31824
it "perfect" $ do
runGEvalTest "logloss-perfect" `shouldReturnAlmost` 0.0
describe "Likelihood" $ do
it "simple" $ do
runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866
describe "evaluating single lines" $ do
it "RMSE" $ do
gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla")
(LineInFile "stub2" 1 "3.4")
(LineInFile "stub3" 1 "2.6") `shouldReturnAlmost` 0.8
describe "BIO format" $ do
it "just parse" $ do
let (Right r) = parseOnly (bioSequenceParser <* endOfInput) "O B-city/NEW_YORK I-city B-city/KALISZ I-city O B-name"
r `shouldBe` [Outside,
Beginning "city" (Just "NEW_YORK"),
Inside "city" Nothing,
Beginning "city" (Just "KALISZ"),
Inside "city" Nothing,
Outside,
Beginning "name" Nothing]
it "simplest entity" $ do
let (Right ents) = parseBioSequenceIntoEntities "B-city"
ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "city" Nothing]
it "multi-word entity" $ do
let (Right ents) = parseBioSequenceIntoEntities "B-date I-date"
ents `shouldBe` [TaggedEntity (TaggedSpan 1 2) "date" Nothing]
it "multi-word entity with normalized text" $ do
let (Right ents) = parseBioSequenceIntoEntities "B-date/FOO I-date/BAR"
ents `shouldBe` [TaggedEntity (TaggedSpan 1 2) "date" (Just "FOO_BAR")]
it "simplest entity with something outside" $ do
let (Right ents) = parseBioSequenceIntoEntities "O B-city"
ents `shouldBe` [TaggedEntity (TaggedSpan 2 2) "city" Nothing]
it "another simple case" $ do
let (Right ents) = parseBioSequenceIntoEntities "B-city B-city"
ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "city" Nothing,
TaggedEntity (TaggedSpan 2 2) "city" Nothing]
it "just parse into entities" $ do
let (Right ents) = parseBioSequenceIntoEntities "O O B-city/LOS_ANGELES I-city B-city/KLUCZBORK O B-name O B-person/JOHN I-person/VON I-person/NEUMANN"
ents `shouldBe` [TaggedEntity (TaggedSpan 3 4) "city" (Just "LOS_ANGELES"),
TaggedEntity (TaggedSpan 5 5) "city" (Just "KLUCZBORK"),
TaggedEntity (TaggedSpan 7 7) "name" (Nothing),
TaggedEntity (TaggedSpan 9 11) "person" (Just "JOHN_VON_NEUMANN")]
it "another entity parse" $ do
let (Right ents) = parseBioSequenceIntoEntities "B-month/JULY B-month/JULY O O B-foo/bar"
ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "month" (Just "JULY"),
TaggedEntity (TaggedSpan 2 2) "month" (Just "JULY"),
TaggedEntity (TaggedSpan 5 5) "foo" (Just "bar")]
it "another entity parse" $ do
let (Right ents) = parseBioSequenceIntoEntities "B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK"
ents `shouldBe` [TaggedEntity (TaggedSpan 1 2) "city" (Just "LOS_ANGELES"),
TaggedEntity (TaggedSpan 4 5) "city" (Just "NEW_YORK")]
it "parse entity" $ do
let (Right ents) = parseBioSequenceIntoEntities "B-surname/BROWN B-surname/SMITH"
ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "surname" (Just "BROWN"),
TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")]
it "parse entity" $ do
let (Right ents) = parseBioSequenceIntoEntities "O B-surname/SMITH"
ents `shouldBe` [TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")]
it "check counting" $ do
gatherCountsForBIO [TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")] [TaggedEntity (TaggedSpan 1 1) "surname" (Just "BROWN"),
TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")] `shouldBe` (1, 1, 2)
it "check F1 on a more complicated example" $ do
runGEvalTest "bio-f1-complex" `shouldReturnAlmost` 0.625
it "check F1 on labels only" $ do
runGEvalTest "bio-f1-complex-labels" `shouldReturnAlmost` 0.6666666666
it "calculate F1" $ do
runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5
it "calculate F1 with underscores rather than minus signs" $ do
runGEvalTest "bio-f1-simple-underscores" `shouldReturnAlmost` 0.5
it "check perfect score" $ do
runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0
it "check inconsistent input" $ do
runGEvalTest "bio-f1-error" `shouldThrow` (== UnexpectedData 2 "inconsistent label sequence `B-NAME/JOHN I-FOO/SMITH I-FOO/X`")
describe "automatic decompression" $ do
it "more complex test" $ do
runGEvalTest "charmatch-complex-compressed" `shouldReturnAlmost` 0.1923076923076923
describe "line by line mode" $ do
let sampleChallenge =
GEvalSpecification
{ gesOutDirectory = "test/likelihood-simple/likelihood-simple-solution",
gesExpectedDirectory = Just "test/likelihood-simple/likelihood-simple",
gesTestName = "test-A",
gesOutFile = "out.tsv",
gesExpectedFile = "expected.tsv",
gesInputFile = "in.tsv",
gesMetric = Likelihood,
gesPrecision = Nothing }
it "simple test" $ do
results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge Data.Conduit.List.consume
Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo",
"bar",
"baz",
"baq"]
it "test sorting" $ do
results <- runLineByLineGeneralized FirstTheWorst sampleChallenge Data.Conduit.List.consume
Prelude.head (Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results) `shouldBe` "baq"
describe "handle --alt-metric option" $ do
it "accuracy instead of likelihood" $ do
runGEvalTestExtraOptions ["--alt-metric", "Accuracy"] "likelihood-simple" `shouldReturnAlmost` 0.75
it "accuracy instead of log loss" $ do
runGEvalTestExtraOptions ["--alt-metric", "Accuracy"] "log-loss-hashed-probs" `shouldReturnAlmost` 0.4
describe "smart sources" $ do
it "smart specs are parsed" $ do
parseSmartSpec "" `shouldBe` NoSpec
@ -217,8 +325,8 @@ main = hspec $ do
parseSmartSpecInContext ["foo/bar"] Nothing "" `shouldBe` Nothing
it "sources are accessed" $ do
readFromSmartSource [] Nothing "test/files/foo.txt" `shouldReturn` ["foo\n"]
readFromSmartSource [] Nothing "https://httpbin.org/robots.txt" `shouldReturn`
["User-agent: *\nDisallow: /deny\n"]
-- readFromSmartSource [] Nothing "https://httpbin.org/robots.txt" `shouldReturn`
-- ["User-agent: *\nDisallow: /deny\n"]
readFromSmartSource :: [FilePath] -> Maybe FilePath -> String -> IO [String]
readFromSmartSource defaultDirs defaultFile specS = do
@ -244,11 +352,13 @@ testMatchFun _ _ = False
extractVal :: (Either (ParserResult GEvalOptions) (Maybe MetricValue)) -> IO MetricValue
extractVal (Right (Just val)) = return val
runGEvalTest testName = (runGEval [
runGEvalTest = runGEvalTestExtraOptions []
runGEvalTestExtraOptions extraOptions testName = (runGEval ([
"--expected-directory",
"test/" ++ testName ++ "/" ++ testName,
"--out-directory",
"test/" ++ testName ++ "/" ++ testName ++ "-solution"]) >>= extractVal
"test/" ++ testName ++ "/" ++ testName ++ "-solution"] ++ extraOptions)) >>= extractVal
extractMetric :: String -> IO (Maybe Metric)
extractMetric testName = do

View File

@ -0,0 +1,6 @@
B-wrong
B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK_CITY
B-surname/BROWN B-surname/SMIT
B-month B-month O O B-foo/bar
O B-class I-class I-class
O O
1 B-wrong
2 B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK_CITY
3 B-surname/BROWN B-surname/SMIT
4 B-month B-month O O B-foo/bar
5 O B-class I-class I-class
6 O O

View File

@ -0,0 +1 @@
--metric BIO-F1-Labels

View File

@ -0,0 +1,6 @@
O
B-city/LOS I-city/ANGELES O B-city/NEW_YORK O
O B-surname/SMITH
B-month/JULY O O O B-foo/bar
O B-class I-class I-class
O O
1 O
2 B-city/LOS I-city/ANGELES O B-city/NEW_YORK O
3 O B-surname/SMITH
4 B-month/JULY O O O B-foo/bar
5 O B-class I-class I-class
6 O O

View File

@ -0,0 +1,6 @@
B-wrong
B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK
B-surname/BROWN B-surname/SMITH
B-month/JULY B-month/JULY O O B-foo/bar
O B-class I-class I-class
O B-wrong
1 B-wrong
2 B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK
3 B-surname/BROWN B-surname/SMITH
4 B-month/JULY B-month/JULY O O B-foo/bar
5 O B-class I-class I-class
6 O B-wrong

View File

@ -0,0 +1 @@
--metric BIO-F1

View File

@ -0,0 +1,6 @@
O
B-city/LOS I-city/ANGELES O B-city/NEW_YORK O
O B-surname/SMITH
B-month/JULY O O O B-foo/bar
O B-class I-class I-class
O O
1 O
2 B-city/LOS I-city/ANGELES O B-city/NEW_YORK O
3 O B-surname/SMITH
4 B-month/JULY O O O B-foo/bar
5 O B-class I-class I-class
6 O O

View File

@ -0,0 +1,2 @@
O B-CITY/WARSZAWA I-CITY/WARSZAWA
O B-NAME/JOHN I-FOO/SMITH I-FOO/X O
1 O B-CITY/WARSZAWA I-CITY/WARSZAWA
2 O B-NAME/JOHN I-FOO/SMITH I-FOO/X O

View File

@ -0,0 +1 @@
--metric BIO-F1

View File

@ -0,0 +1,2 @@
O B-CITY/WARSZAWA I-CITY/WARSZAWA
O B-NAME/JOHN I-NAME/SMITH O O
1 O B-CITY/WARSZAWA I-CITY/WARSZAWA
2 O B-NAME/JOHN I-NAME/SMITH O O

View File

@ -0,0 +1,4 @@
O O O
O B-city/NEW I-city/YORK I-city/CITY O B-month/July
B-surname/SMITH
B-city/LONDON B-city/PARIS
1 O O O
2 O B-city/NEW I-city/YORK I-city/CITY O B-month/July
3 B-surname/SMITH
4 B-city/LONDON B-city/PARIS

View File

@ -0,0 +1 @@
--metric BIO-F1

View File

@ -0,0 +1,4 @@
O O O
O B-city/NEW I-city/YORK I-city/CITY O B-month/July
B-surname/SMITH
B-city/LONDON B-city/PARIS
1 O O O
2 O B-city/NEW I-city/YORK I-city/CITY O B-month/July
3 B-surname/SMITH
4 B-city/LONDON B-city/PARIS

View File

@ -0,0 +1,3 @@
O O B_city/POZNAŃ O O B_date/MARCH I_date/12
B_city/BUK O O O
B_name/FOO O B_surname/KOWALSKI
1 O O B_city/POZNAŃ O O B_date/MARCH I_date/12
2 B_city/BUK O O O
3 B_name/FOO O B_surname/KOWALSKI

View File

@ -0,0 +1 @@
--metric BIO-F1

View File

@ -0,0 +1,3 @@
O O B_city/POZNAŃ O O B_date/MARCH I_date/12
O O O O
O B_city/KONIN O
1 O O B_city/POZNAŃ O O B_date/MARCH I_date/12
2 O O O O
3 O B_city/KONIN O

View File

@ -0,0 +1,3 @@
O O B-city/POZNAŃ O O B-date/MARCH I-date/12
B-city/BUK O O O
B-name/FOO O B-surname/KOWALSKI
1 O O B-city/POZNAŃ O O B-date/MARCH I-date/12
2 B-city/BUK O O O
3 B-name/FOO O B-surname/KOWALSKI

View File

@ -0,0 +1 @@
--metric BIO-F1

View File

@ -0,0 +1,3 @@
O O B-city/POZNAŃ O O B-date/MARCH I-date/12
O O O O
O B-city/KONIN O
1 O O B-city/POZNAŃ O O B-date/MARCH I-date/12
2 O O O O
3 O B-city/KONIN O

View File

@ -0,0 +1 @@
--metric CharMatch

View File

@ -0,0 +1,2 @@
tak:10 nie:8.9
niebieski:0 żółty:1.5 czerwony:-0.5
1 tak:10 nie:8.9
2 niebieski:0 żółty:1.5 czerwony:-0.5

View File

@ -0,0 +1 @@
--metric LikelihoodHashed8

View File

@ -0,0 +1,2 @@
tak
niebieski
1 tak
2 niebieski

View File

@ -0,0 +1,4 @@
0.7
0
0.0
0.6
1 0.7
2 0
3 0.0
4 0.6

View File

@ -0,0 +1 @@
--metric Likelihood

View File

@ -0,0 +1,4 @@
1
0
0
0
1 1
2 0
3 0
4 0

View File

@ -0,0 +1,4 @@
foo
bar
baz
baq
1 foo
2 bar
3 baz
4 baq

View File

@ -0,0 +1,3 @@
B:-1.20397280432594 A:-0.916290731874155
A:-2.3025850929940 C:-1.6094379124341
A:-2.3025850929940 C:-1.6094379124341 :-0.356674943938732
1 B:-1.20397280432594 A:-0.916290731874155
2 A:-2.3025850929940 C:-1.6094379124341
3 A:-2.3025850929940 C:-1.6094379124341 :-0.356674943938732

View File

@ -0,0 +1 @@
--metric LogLossHashed10

View File

@ -0,0 +1,3 @@
A
B
B
1 A
2 B
3 B

View File

@ -0,0 +1,3 @@
1:0.5 2:0.6 3:1.0 4:1.0 5:0.9
1:0.3 2:0.2 3:0.3
1:0.2 :0.6
1 1:0.5 2:0.6 3:1.0 4:1.0 5:0.9
2 1:0.3 2:0.2 3:0.3
3 1:0.2 :0.6

View File

@ -0,0 +1 @@
--metric LogLossHashed10

View File

@ -0,0 +1,3 @@
1
3
1
1 1
2 3
3 1

View File

@ -0,0 +1,5 @@
A:0.6 B:0.4
C:0.2 A:0.1
A:0.4 C:0.4
D:1.0
C:0.4 B:0.5 :0.1
1 A:0.6 B:0.4
2 C:0.2 A:0.1
3 A:0.4 C:0.4
4 D:1.0
5 C:0.4 B:0.5 :0.1

View File

@ -0,0 +1 @@
--metric LogLossHashed10

View File

@ -0,0 +1,5 @@
A
A
B
D
A
1 A
2 A
3 B
4 D
5 A