diff --git a/README.md b/README.md
index 5e89c28..411f81c 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,15 @@
# GEval
-GEval is a Haskell library (and a stand-alone tool) for evaluating the
+GEval is a Haskell library and a stand-alone tool for evaluating the
results of solutions to machine learning challenges as defined on the
[Gonito](http://gonito.net) platform.
Note that GEval is only about machine learning evaluation. No actual
machine learning algorithms are available here.
+The official repository is `git://gonito.net/geval`, browsable at
+.
+
## Installing
You need [Haskell Stack](https://github.com/commercialhaskell/stack).
diff --git a/geval.cabal b/geval.cabal
index 9b683ca..17486ed 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -1,5 +1,5 @@
name: geval
-version: 0.5.4.0
+version: 0.7.0.0
synopsis: Machine learning evaluation tools
description: Please see README.md
homepage: http://github.com/name/project
@@ -26,10 +26,12 @@ library
, GEval.LogLossHashed
, GEval.CharMatch
, GEval.LineByLine
+ , GEval.BIO
+ , Data.Conduit.AutoDecompress
, Data.Conduit.SmartSource
build-depends: base >= 4.7 && < 5
, cond
- , conduit
+ , conduit >= 1.3.0
, conduit-combinators
, conduit-extra
, directory
@@ -52,6 +54,11 @@ library
, bytestring
, http-conduit
, transformers
+ , word8
+ , primitive
+ , transformers-base
+ , bzlib-conduit
+ , lzma-conduit
default-language: Haskell2010
executable geval
@@ -79,6 +86,7 @@ test-suite geval-test
, resourcet
, conduit
, conduit-extra
+ , conduit
ghc-options: -threaded -rtsopts -with-rtsopts=-N
default-language: Haskell2010
diff --git a/src/Data/Conduit/AutoDecompress.hs b/src/Data/Conduit/AutoDecompress.hs
new file mode 100644
index 0000000..b959d6f
--- /dev/null
+++ b/src/Data/Conduit/AutoDecompress.hs
@@ -0,0 +1,45 @@
+{-# LANGUAGE AllowAmbiguousTypes #-}
+
+module Data.Conduit.AutoDecompress
+ (autoDecompress,
+ doNothing)
+ where
+
+import Data.Conduit
+import Data.Conduit.Combinators
+import Data.ByteString
+import Data.Conduit.Zlib
+import Data.Word8
+import Control.Monad.Trans.Resource (MonadThrow, MonadResource)
+import Control.Monad.Primitive (PrimMonad)
+import Control.Monad.Base (MonadBase)
+import qualified Data.Conduit.Lzma as XZ
+import qualified Data.Conduit.BZlib as BZ
+
+autoDecompress :: (MonadResource m, MonadThrow m, PrimMonad m) => ConduitM ByteString ByteString m ()
+autoDecompress = do
+ f <- await
+ case f of
+ Just chunk -> if Data.ByteString.length chunk > 1
+ then
+ do
+ let firstByte = Data.ByteString.head chunk
+ let secondByte = Data.ByteString.index chunk 1
+ leftover chunk
+ lookAtMagicNumbers (firstByte, secondByte)
+ else
+ do
+ leftover chunk
+ doNothing
+
+ Nothing -> return ()
+
+
+lookAtMagicNumbers :: (MonadResource m, MonadThrow m, PrimMonad m) => (Word8, Word8) -> ConduitT ByteString ByteString m ()
+lookAtMagicNumbers (31, 139) = ungzip
+lookAtMagicNumbers (66, 90) = BZ.bunzip2
+lookAtMagicNumbers (253, 55) = XZ.decompress Nothing
+lookAtMagicNumbers _ = doNothing
+
+doNothing :: Monad m => ConduitT a a m ()
+doNothing = Data.Conduit.Combinators.filter (const True)
diff --git a/src/Data/Conduit/SmartSource.hs b/src/Data/Conduit/SmartSource.hs
index 52d9365..e75449b 100644
--- a/src/Data/Conduit/SmartSource.hs
+++ b/src/Data/Conduit/SmartSource.hs
@@ -35,17 +35,17 @@ pureSmartSource _ (FileNameSpec fileName) = sourceFile fileName
pureSmartSource _ (FilePathSpec fileName) = sourceFile fileName
pureSmartSource [] (PossiblyGitSpec spec) = sourceFile spec
pureSmartSource (firstDir:_) (PossiblyGitSpec spec) = sourceFile (firstDir > spec)
-pureSmartSource _ (Https url) = httpSource url
-pureSmartSource _ (Http url) = httpSource url
+--pureSmartSource _ (Https url) = httpSource url
+--pureSmartSource _ (Http url) = httpSource url
-httpSource :: MonadResource m => String -> ConduitM () S.ByteString m ()
-httpSource url = do
- request <- liftIO $ parseRequest url
- manager <- liftIO $ newManager tlsManagerSettings
- response <- lift $ http request manager
- (httpsource, finalizer) <- lift $ unwrapResumable (responseBody response)
- httpsource
- lift finalizer
+-- httpSource :: MonadResource m => String -> ConduitM () S.ByteString m ()
+-- httpSource url = do
+-- request <- liftIO $ parseRequest url
+-- manager <- liftIO $ newManager tlsManagerSettings
+-- response <- lift $ http request manager
+-- (httpsource, finalizer) <- lift $ unwrapResumable (responseBody response)
+-- httpsource
+-- lift finalizer
parseSmartSpec :: FilePath -> SmartSpec
parseSmartSpec "" = NoSpec
diff --git a/src/GEval/BIO.hs b/src/GEval/BIO.hs
new file mode 100644
index 0000000..1f4b75b
--- /dev/null
+++ b/src/GEval/BIO.hs
@@ -0,0 +1,111 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module GEval.BIO
+ (BIOLabel(..), bioSequenceParser, parseBioSequenceIntoEntities,
+ TaggedSpan(..), TaggedEntity(..), gatherCountsForBIO,
+ eraseNormalisation)
+ where
+
+import GEval.PrecisionRecall
+
+import qualified Data.Text as T
+
+import Data.Attoparsec.Text
+import Data.Attoparsec.Combinator
+import Control.Applicative
+import Data.Char
+import Data.Maybe (catMaybes)
+
+import GEval.Common
+
+data BIOLabel = Outside | Beginning T.Text (Maybe T.Text) | Inside T.Text (Maybe T.Text)
+ deriving (Eq, Show)
+
+formatBioLabel :: BIOLabel -> T.Text
+formatBioLabel Outside = "O"
+formatBioLabel (Beginning label Nothing) = T.concat ["B-", label]
+formatBioLabel (Beginning label (Just normalized)) = T.concat ["B-", label, "/", normalized]
+formatBioLabel (Inside label Nothing) = T.concat ["I-", label]
+formatBioLabel (Inside label (Just normalized)) = T.concat ["I-", label, "/", normalized]
+
+data TaggedSpan = TaggedSpan Int Int
+ deriving (Eq, Show)
+
+data TaggedEntity = TaggedEntity TaggedSpan T.Text (Maybe T.Text)
+ deriving (Eq, Show)
+
+eraseNormalisation :: TaggedEntity -> TaggedEntity
+eraseNormalisation (TaggedEntity span label normalized) = (TaggedEntity span label Nothing)
+
+gatherCountsForBIO :: [TaggedEntity] -> [TaggedEntity] -> (Int, Int, Int)
+gatherCountsForBIO expected got = (maxMatchOnOrdered laterThan expected got, length expected, length got)
+ where
+ laterThan (TaggedEntity (TaggedSpan a _) _ _) (TaggedEntity (TaggedSpan b _) _ _) = a > b
+
+parseBioSequenceIntoEntities :: T.Text -> Either String [TaggedEntity]
+parseBioSequenceIntoEntities t = labelsIntoEntities =<< (parseOnly (bioSequenceParser <* endOfInput) t)
+
+labelsIntoEntities :: [BIOLabel] -> Either String [TaggedEntity]
+labelsIntoEntities labels = labelsIntoEntities' $ zip labels [1..]
+
+labelsIntoEntities' :: [(BIOLabel, Int)] -> Either String [TaggedEntity]
+labelsIntoEntities' labelsWithPositions = mapM labelSplitToEntity labelsGathered
+ where labelsGathered = splitLabels labelsWithPositions
+
+labelSplitToEntity :: [(BIOLabel, Int)] -> Either String TaggedEntity
+labelSplitToEntity labs@(h@(_,begIx):t) = if isBeginning h && all (\tp -> isInside tp && tt tp == btp) t
+ then
+ Right $ TaggedEntity (TaggedSpan begIx lastItemIx) btp mNormalized
+ else
+ Left $ "inconsistent label sequence `" ++ (T.unpack $ T.intercalate " " $ map (formatBioLabel . fst) labs) ++ "`"
+ where isBeginning (Beginning _ _, _) = True
+ isBeginning _ = False
+ isInside (Inside _ _, _) = True
+ isInside _ = False
+ tt (Beginning t _, _) = t
+ tt (Inside t _, _) = t
+ btp = tt h
+ lastItemIx = case t of
+ [] -> begIx
+ _ -> let (_, ix) = last t
+ in ix
+ normalized (Beginning _ n, _) = n
+ normalized (Inside _ n, _) = n
+ mNormalized = if all (\tp -> normalized tp == Nothing) labs
+ then
+ Nothing
+ else
+ Just $ T.intercalate "_" $ catMaybes $ map normalized labs
+
+splitLabels :: [(BIOLabel, Int)] -> [[(BIOLabel, Int)]]
+splitLabels [] = []
+splitLabels ((Outside, _):r) = splitLabels r
+splitLabels (e@(_, ix):r) =
+ case splitLabels r of
+ l@(((Beginning _ _, _):_):_) -> ([e]:l)
+ (s@((Inside _ _, ix'):_):l) -> if ix' == ix + 1
+ then
+ ((e:s):l)
+ else
+ ([e]:(s:l))
+ [] -> [[e]]
+
+bioSequenceParser :: Parser [BIOLabel]
+bioSequenceParser = sepByWhitespaces bioLabelParser
+
+bioLabelParser :: Parser BIOLabel
+bioLabelParser =
+ (string "O" *> pure Outside) <|>
+ (do
+ labelType <- bioMarkerParser
+ (string "-" <|> string "_")
+ label <- takeWhile1 (\c -> not (isSpace c) && c /= '/')
+ normalized <- (do
+ string "/"
+ normalized <- takeWhile1 (not . isSpace)
+ return $ Just normalized) <|> pure Nothing
+ return $ labelType label normalized)
+
+bioMarkerParser :: Parser (T.Text -> Maybe T.Text -> BIOLabel)
+bioMarkerParser =
+ (string "B" *> pure Beginning) <|> (string "I" *> pure Inside)
diff --git a/src/GEval/Common.hs b/src/GEval/Common.hs
index ad89c79..38828ab 100644
--- a/src/GEval/Common.hs
+++ b/src/GEval/Common.hs
@@ -38,3 +38,7 @@ sepByWhitespaces parser = possibleWhitespace *> parser `sepBy` whitespace <* pos
possibleWhitespace = many' (satisfy isHorizontalSpace)
whitespace = many1 (satisfy isHorizontalSpace)
+
+indicator :: Bool -> Double
+indicator True = 1.0
+indicator False = 0.0
diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs
index 09a422c..5d4719c 100644
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@@ -16,6 +16,7 @@ module GEval.Core
MetricValue,
GEvalSpecialCommand(..),
GEvalSpecification(..),
+ ResultOrdering(..),
GEvalOptions(..),
GEvalException(..),
defaultGEvalSpecification,
@@ -53,10 +54,11 @@ import qualified System.Directory as D
import System.Posix
import System.FilePath
import Data.Maybe
+import Data.Tuple
import qualified Data.List.Split as DLS
import Control.Monad.IO.Class
-import Control.Monad ((<=<))
+import Control.Monad ((<=<), filterM)
import Data.Attoparsec.Text (parseOnly)
@@ -69,6 +71,8 @@ import GEval.PrecisionRecall
import GEval.ClusteringMetrics
import GEval.LogLossHashed
import GEval.CharMatch
+import GEval.BIO
+import Data.Conduit.AutoDecompress
import qualified Data.HashMap.Strict as M
@@ -82,7 +86,7 @@ defaultLogLossHashedSize :: Word32
defaultLogLossHashedSize = 10
data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch
- | MAP | LogLoss
+ | MAP | LogLoss | Likelihood | BIOF1 | BIOF1Labels | LikelihoodHashed Word32
deriving (Eq)
instance Show Metric where
@@ -99,9 +103,18 @@ instance Show Metric where
""
else
(show nbOfBits))
+ show (LikelihoodHashed nbOfBits) = "LikelihoodHashed" ++ (if
+ nbOfBits == defaultLogLossHashedSize
+ then
+ ""
+ else
+ (show nbOfBits))
show CharMatch = "CharMatch"
show MAP = "MAP"
show LogLoss = "LogLoss"
+ show Likelihood = "Likelihood"
+ show BIOF1 = "BIO-F1"
+ show BIOF1Labels = "BIO-F1-Labels"
instance Read Metric where
readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
@@ -116,9 +129,15 @@ instance Read Metric where
readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
[(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
_ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
+ readsPrec p ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
+ [(nbOfBits, theRest)] -> [(LikelihoodHashed nbOfBits, theRest)]
+ _ -> [(LikelihoodHashed defaultLogLossHashedSize, theRest)]
readsPrec _ ('L':'o':'g':'L':'o':'s':'s':theRest) = [(LogLoss, theRest)]
+ readsPrec _ ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(Likelihood, theRest)]
readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
+ readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
+ readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
@@ -131,9 +150,13 @@ getMetricOrdering ClippEU = TheHigherTheBetter
getMetricOrdering (FMeasure _) = TheHigherTheBetter
getMetricOrdering NMI = TheHigherTheBetter
getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
+getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
getMetricOrdering CharMatch = TheHigherTheBetter
getMetricOrdering MAP = TheHigherTheBetter
getMetricOrdering LogLoss = TheLowerTheBetter
+getMetricOrdering Likelihood = TheHigherTheBetter
+getMetricOrdering BIOF1 = TheHigherTheBetter
+getMetricOrdering BIOF1Labels = TheHigherTheBetter
defaultOutDirectory = "."
defaultTestName = "test-A"
@@ -163,11 +186,13 @@ getExpectedDirectory spec = fromMaybe outDirectory $ gesExpectedDirectory spec
data GEvalSpecialCommand = Init | LineByLine | Diff FilePath
+data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest
+
data GEvalOptions = GEvalOptions
{ geoSpecialCommand :: Maybe GEvalSpecialCommand,
+ geoResultOrdering :: ResultOrdering,
geoSpec :: GEvalSpecification }
-
data GEvalException = NoExpectedFile FilePath
| NoOutFile FilePath
| NoExpectedDirectory FilePath
@@ -236,11 +261,14 @@ checkAndGetFiles gevalSpec = do
unlessM (D.doesDirectoryExist expectedDirectory) $ throwM $ NoExpectedDirectory expectedDirectory
unlessM (D.doesDirectoryExist outTestDirectory) $ throwM $ NoOutTestDirectory outTestDirectory
unlessM (D.doesDirectoryExist expectedTestDirectory) $ throwM $ NoExpectedTestDirectory expectedTestDirectory
+ inputFilePath <- lookForCompressedFiles inputFilePath'
+ expectedFilePath <- lookForCompressedFiles expectedFilePath'
+ outFilePath <- lookForCompressedFiles outFilePath'
checkInputFileIfNeeded metric inputFilePath
return (inputFilePath, expectedFilePath, outFilePath)
- where expectedFilePath = expectedTestDirectory > (gesExpectedFile gevalSpec)
- outFilePath = getOutFile gevalSpec (gesOutFile gevalSpec)
- inputFilePath = expectedTestDirectory > (gesInputFile gevalSpec)
+ where expectedFilePath' = expectedTestDirectory > (gesExpectedFile gevalSpec)
+ outFilePath' = getOutFile gevalSpec (gesOutFile gevalSpec)
+ inputFilePath' = expectedTestDirectory > (gesInputFile gevalSpec)
expectedTestDirectory = expectedDirectory > testName
outTestDirectory = outDirectory > testName
expectedDirectory = getExpectedDirectory gevalSpec
@@ -248,6 +276,24 @@ checkAndGetFiles gevalSpec = do
testName = gesTestName gevalSpec
metric = gesMetric gevalSpec
+lookForCompressedFiles :: FilePath -> IO FilePath
+lookForCompressedFiles = lookForAlternativeFiles [".gz", ".xz", ".bz2"]
+
+lookForAlternativeFiles :: [String] -> FilePath -> IO FilePath
+lookForAlternativeFiles suffixes filePath
+ | takeExtension filePath `Prelude.elem` suffixes = return filePath
+ | otherwise = do
+ fileIsThere <- D.doesFileExist filePath
+ if fileIsThere
+ then
+ return filePath
+ else
+ do
+ found <- Control.Monad.filterM D.doesFileExist $ Prelude.map (filePath <.>) suffixes
+ return $ case found of
+ [fp] -> fp
+ _ -> filePath
+
getOutFile :: GEvalSpecification -> FilePath -> FilePath
getOutFile gevalSpec out = outDirectory > testName > out
where outDirectory = gesOutDirectory gevalSpec
@@ -261,7 +307,7 @@ checkInputFileIfNeeded _ _ = return ()
fileAsLineSource :: FilePath -> LineSource (ResourceT IO)
fileAsLineSource filePath =
- LineSource (smartSource [] Nothing (parseSmartSpec filePath) $= CT.decodeUtf8Lenient =$= CT.lines) filePath 1
+ LineSource (smartSource [] Nothing (parseSmartSpec filePath) $= autoDecompress $= CT.decodeUtf8Lenient =$= CT.lines) filePath 1
gevalCoreOnSingleLines :: Metric -> LineInFile -> LineInFile -> LineInFile -> IO (MetricValue)
gevalCoreOnSingleLines metric inpLine expLine outLine =
@@ -283,7 +329,9 @@ gevalCore metric inputFilePath expectedFilePath outFilePath = do
(fileAsLineSource expectedFilePath)
(fileAsLineSource outFilePath)
-gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric
+logLossToLikehood logLoss = exp (-logLoss)
+
+gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Metric
-> LineSource (ResourceT m)
-> LineSource (ResourceT m)
-> LineSource (ResourceT m)
@@ -292,12 +340,20 @@ gevalCoreOnSources RMSE inputLineSource expectedLineSource outLineSource = do
mse <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource
return $ mse ** 0.5
+gevalCoreOnSources Likelihood inputLineSource expectedLineSource outLineSource = do
+ logLoss <- gevalCoreOnSources LogLoss inputLineSource expectedLineSource outLineSource
+ return $ logLossToLikehood logLoss
+
+gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLineSource = do
+ logLoss <- gevalCoreOnSources (LogLossHashed b) inputLineSource expectedLineSource outLineSource
+ return $ logLossToLikehood logLoss
+
gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
gevalCore' metric inputLineSource expectedLineSource outLineSource
data LineInFile = LineInFile FilePath Word32 Text
-gevalCore' :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
+gevalCore' :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemError averageC id
where outParser = getValue . TR.double
@@ -315,9 +371,18 @@ gevalCore' BLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . D
| otherwise = exp (1.0 - (r /. c))
gevalCore' Accuracy _ = gevalCoreWithoutInput (Right . strip) (Right . strip) hitOrMiss averageC id
- where hitOrMiss (exp,got) = if (normalizeProbForAccuracy exp got) == exp then 1.0 else 0.0
- -- if the expected value is 0 or 1 treat values between 0.0 and 1.0 as probabilities
- -- for the positive outcome
+ where hitOrMiss (exp, got) =
+ -- first try to parse what we got as a probability distribution
+ -- (like the one used for Likelikehood/LogLossHashed metric)
+ case parseWordSpecs got of
+ Right wordSpecs -> if Prelude.null pairs
+ then 0.0
+ else indicator (exp == (snd $ Prelude.maximum pairs))
+ where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs
+ Left _ -> indicator ((normalizeProbForAccuracy exp got) == exp)
+ -- if the expected value is 0 or 1 treat values
+ -- between 0.0 and 1.0 as probabilities
+ -- for the positive outcome
normalizeProbForAccuracy :: Text -> Text -> Text
normalizeProbForAccuracy exp got
| exp == (pack "1") = case tryReadingAsFloat got of
@@ -383,7 +448,16 @@ gevalCore' CharMatch inputLineSource = helper inputLineSource
helper inputLineSource expectedLineSource outputLineSource = do
gevalCoreGeneralized (ParserSpecWithInput (Right . unpack) (Right . unpack) (Right . unpack)) step countAgg (fMeasureOnCounts charMatchBeta) (WithInput inputLineSource expectedLineSource outputLineSource)
step (ParsedRecordWithInput inp exp out) = getCharMatchCount inp exp out
- countAgg = CC.foldl countFolder (0, 0, 0)
+
+gevalCore' BIOF1 _ = gevalCoreWithoutInput parseBioSequenceIntoEntities parseBioSequenceIntoEntities (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts
+
+gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWithoutNormalization parseBioSequenceIntoEntitiesWithoutNormalization (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts
+ where parseBioSequenceIntoEntitiesWithoutNormalization s = do
+ entities <- parseBioSequenceIntoEntities s
+ return $ Prelude.map eraseNormalisation entities
+
+countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
+countAgg = CC.foldl countFolder (0, 0, 0)
parseDistributionWrapper :: Word32 -> Word32 -> Text -> HashedDistribution
parseDistributionWrapper nbOfBits seed distroSpec = case parseDistribution nbOfBits seed distroSpec of
@@ -395,25 +469,25 @@ data SourceItem a = Got a | Wrong String | Done
skipLineNumber :: (x -> c) -> ((Word32, x) -> c)
skipLineNumber fun = fun . snd
-gevalCoreWithoutInput :: (MonadBaseControl IO m, MonadThrow m, MonadIO m) => (Text -> Either String a) -> (Text -> Either String b) -> ((a, b) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
+gevalCoreWithoutInput :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => (Text -> Either String a) -> (Text -> Either String b) -> ((a, b) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
gevalCoreWithoutInput expParser outParser itemStep aggregator finalStep expectedLineStream outLineStream =
gevalCoreGeneralized (ParserSpecWithoutInput expParser outParser) (trans itemStep) aggregator finalStep (WithoutInput expectedLineStream outLineStream)
where
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
trans step (ParsedRecordWithoutInput x y) = step (x, y)
-gevalCore''' :: (MonadBaseControl IO m, MonadThrow m, MonadIO m) => ParserSpec (WithoutInput m a b) -> ((Word32, (a, b)) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> WithoutInput m a b -> m (MetricValue)
+gevalCore''' :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec (WithoutInput m a b) -> ((Word32, (a, b)) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> WithoutInput m a b -> m (MetricValue)
gevalCore''' parserSpec itemStep aggregator finalStep context =
gevalCoreGeneralized' parserSpec (trans itemStep) aggregator finalStep context
where
trans :: ((Word32, (a, b)) -> c) -> (Word32, ParsedRecord (WithoutInput m a b)) -> c
trans step (n, ParsedRecordWithoutInput x y) = step (n, (x, y))
-gevalCoreGeneralized :: (EvaluationContext ctxt m, MonadBaseControl IO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> (ParsedRecord ctxt -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
+gevalCoreGeneralized :: (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> (ParsedRecord ctxt -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
gevalCoreGeneralized parserSpec itemStep aggregator finalStep context =
gevalCoreGeneralized' parserSpec (skipLineNumber itemStep) aggregator finalStep context
-gevalCoreGeneralized' :: forall m ctxt c d . (EvaluationContext ctxt m, MonadBaseControl IO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> ((Word32, ParsedRecord ctxt) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
+gevalCoreGeneralized' :: forall m ctxt c d . (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> ((Word32, ParsedRecord ctxt) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
gevalCoreGeneralized' parserSpec itemStep aggregator finalStep context = do
v <- runResourceT $
(((getZipSource $ (,)
@@ -434,7 +508,7 @@ class EvaluationContext ctxt m where
data WithoutInput m e o = WithoutInput (LineSource (ResourceT m)) (LineSource (ResourceT m))
-instance (MonadBaseControl IO m, MonadIO m, MonadThrow m) => EvaluationContext (WithoutInput m e o) m where
+instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithoutInput m e o) m where
data ParserSpec (WithoutInput m e o) = ParserSpecWithoutInput (Text -> Either String e) (Text -> Either String o)
data WrappedParsedRecord (WithoutInput m e o) = WrappedParsedRecordWithoutInput (SourceItem e) (SourceItem o)
data ParsedRecord (WithoutInput m e o) = ParsedRecordWithoutInput e o
@@ -463,7 +537,7 @@ data WithInput m i e o = WithInput (LineSource (ResourceT m)) (LineSource (Resou
getInputFilePath (WithInput (LineSource _ inputFilePath _) _ _) = inputFilePath
-instance (MonadBaseControl IO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where
+instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where
data ParserSpec (WithInput m i e o) = ParserSpecWithInput (Text -> Either String i) (Text -> Either String e) (Text -> Either String o)
data WrappedParsedRecord (WithInput m i e o) = WrappedParsedRecordWithInput (SourceItem i) (SourceItem e) (SourceItem o)
data ParsedRecord (WithInput m i e o) = ParsedRecordWithInput i e o
diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs
index b0e53a5..8880afa 100644
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@@ -101,6 +101,8 @@ Cluster proverbs for languages.
This is a sample challenge for flat clustering (unsupervised learning challenge).
|] ++ (commonReadmeMDContents testName)
+readmeMDContents (LikelihoodHashed b) testname = readmeMDContents (LogLossHashed b) testname
+
readmeMDContents (LogLossHashed _) testName = [i|
GEval sample challenge — language model evaluation
==================================================
@@ -113,6 +115,39 @@ The metric is average log-loss calculated for 10-bit hashes.
Train file is a just text file (one utterance per line).
In an input file, left and right contexts (TAB-separated) are given.
In an expected file, the word to be guessed is given.
+
+Format of the output files
+--------------------------
+
+For each input line, a probability distribution for words in a gap
+must be given:
+
+ word1:logprob1 word2:logprob2 ... wordN:logprobN :logprob0
+
+where *logprobi* is the logarithm of the probability for *wordi* and
+*logprob0* is the logarithm of the probability mass for all the other
+words (it will be spread between all 1024 fingerprint values). If the
+respective probabilities do not sum up to 1:
+
+ * if the sum is larger than 0.0 and smaller than 1.0, and no logprob0
+ is given, log of the remaining probablity mass will be assigned to logprob0,
+ * otherwise they will be normalised with.
+softmax
+
+Note: the separator here is space, not TAB!
+
+### Probs
+
+Probabilities could be given (instead of logprobs):
+
+ * if **all** values look as probs and **at least value** is positive, we treat
+ the values as probs rather then logprobs (single value 0.0 is treated
+ as a logprob, i.e. probability 1.0!);
+ * if their sum is greater than 1.0, then we normalize simply by dividing by the sum;
+ * if the sum is smaller than 1.0 and there is no entry for all the other words,
+ we add such an entry for the missing probability mass;
+ * if the sum is smaller than 1.0 and there is an entry for all the other words,
+ we normalize by dividing by the sum.
|] ++ (commonReadmeMDContents testName)
readmeMDContents CharMatch testName = [i|
@@ -170,6 +205,29 @@ This a sample challenge for the log-loss metric.
|] ++ (commonReadmeMDContents testName)
+readmeMDContents Likelihood testName = [i|
+Give the probability of a positive sentiment
+============================================
+
+Give the probability that a sentence expresses a positive sentiment.
+
+This a sample challenge for the likelihood metric.
+
+|] ++ (commonReadmeMDContents testName)
+
+readmeMDContents BIOF1Labels testName = readmeMDContents BIOF1 testName
+readmeMDContents BIOF1 testName = [i|
+Tag and normalize names
+=======================
+
+Tag names in the tokenized text and normalized them.
+
+The output should be given in the BIO format with the normalized forms given after slashes (see
+`dev-0/expected.tsv` for an example).
+
+The metric is F1 counted on entities (not labels).
+|] ++ (commonReadmeMDContents testName)
+
readmeMDContents _ testName = [i|
GEval sample challenge
======================
@@ -239,6 +297,7 @@ trainContents NMI = [hereLit|pl Kto pod kim dołki kopie, ten sam w nie wpada.
en The pen is mightier than the sword.
pl Baba z wozu, koniom lżej.
|]
+trainContents (LikelihoodHashed b) = trainContents (LogLossHashed b)
trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota
Basia ma psa
Nie kupujemy kota w worku
@@ -254,11 +313,17 @@ honour GB honor
titbit GB smakołyk
tidbit US smakołyk
|]
+trainContents Likelihood = trainContents LogLoss
trainContents LogLoss = [hereLit|0.0 Hell, no!!!
0.0 I hate this stuff
1.0 Lekker!!!
0.0 Boring, boring, boring
|]
+trainContents BIOF1Labels = trainContents BIOF1
+trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surname/BOND My name is Bond , James Bond
+O O O O O There is no name here
+B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman
+|]
trainContents _ = [hereLit|0.06 0.39 0 0.206
1.00 1.00 1 0.017
317.8 5.20 67 0.048
@@ -279,6 +344,7 @@ When the going gets tough, the tough get going.
devInContents (FMeasure _) = [hereLit|b b W 29520 779 -28 -32 a 0 0 0 0 0 0 0 0 0 0
b b W 55200 1259 35 9 a 1 0 1 0 0 0 0 0 4000 4000
|]
+devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b)
devInContents (LogLossHashed _) = [hereLit|Nie kupuj w worku
Ona psa
|]
@@ -290,10 +356,15 @@ devInContents MAP = [hereLit|US noc
GB wózek dziecięcy
GB wizualizować
|]
+devInContents Likelihood = devInContents LogLoss
devInContents LogLoss = [hereLit|Great stuff!
Boring stuff
That's good
|]
+devInContents BIOF1Labels = devInContents BIOF1
+devInContents BIOF1 = [hereLit|Adam and Eve
+Mr Jan Kowalski
+|]
devInContents _ = [hereLit|0.72 0 0.007
9.54 62 0.054
|]
@@ -312,6 +383,7 @@ devExpectedContents NMI = [hereLit|en
pl
en
|]
+devExpectedContents (LikelihoodHashed b) = devExpectedContents (LogLossHashed b)
devExpectedContents (LogLossHashed _) = [hereLit|kota
ma
|]
@@ -323,10 +395,15 @@ devExpectedContents MAP = [hereLit|night nite
pram
visualise
|]
+devExpectedContents Likelihood = devExpectedContents LogLoss
devExpectedContents LogLoss = [hereLit|1.0
0.0
1.0
|]
+devExpectedContents BIOF1Labels = devExpectedContents BIOF1
+devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE
+O B-firstname/JAN B-surname/KOWALSKI
+|]
devExpectedContents _ = [hereLit|0.82
95.2
|]
@@ -347,6 +424,7 @@ W marcu, jak w garncu.
A cada necio agrada su porrada.
Kwiecień plecień, bo przeplata trochę zimy, trochę lata.
|]
+testInContents (LikelihoodHashed b) = testInContents (LogLossHashed b)
testInContents (LogLossHashed _) = [hereLit|Ala ma
Ona ma kota worku
|]
@@ -358,10 +436,15 @@ testInContents MAP = [hereLit|US wózek dziecięcy
GB słoń
US słoń
|]
+testInContents Likelihood = testInContents LogLoss
testInContents LogLoss = [hereLit|That's great, ha, ha, I love it!
Super-duper!!
That is incredibly boring.
|]
+testInContents BIOF1Labels = testInContents BIOF1
+testInContents BIOF1 = [hereLit|Alan Tring
+No name here
+|]
testInContents _ = [hereLit|1.52 2 0.093
30.06 14 0.009
|]
@@ -382,6 +465,7 @@ pl
es
pl
|]
+testExpectedContents (LikelihoodHashed b) = testExpectedContents (LogLossHashed b)
testExpectedContents (LogLossHashed _) = [hereLit|ma
w
|]
@@ -393,10 +477,15 @@ testExpectedContents MAP = [hereLit|trolley
elephant
elephant
|]
+testExpectedContents Likelihood = testExpectedContents LogLoss
testExpectedContents LogLoss = [hereLit|1.0
1.0
0.0
|]
+testExpectedContents BIOF1Labels = testExpectedContents BIOF1
+testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING
+O O O
+|]
testExpectedContents _ = [hereLit|0.11
17.2
|]
diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs
index 9afdf83..74f053b 100644
--- a/src/GEval/LineByLine.hs
+++ b/src/GEval/LineByLine.hs
@@ -9,17 +9,25 @@
module GEval.LineByLine
(runLineByLine,
- runDiff
+ runLineByLineGeneralized,
+ runDiff,
+ runDiffGeneralized,
+ LineRecord(..),
+ ResultOrdering(..)
) where
import GEval.Core
+import Data.Conduit.AutoDecompress (doNothing)
+
import Data.Conduit
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Combinators as CC
import Data.Text
import Data.Text.Encoding
+import Data.List (sortBy, sort)
+
import Control.Monad.IO.Class
import Control.Monad.Trans.Resource
@@ -30,13 +38,10 @@ import Text.Printf
data LineRecord = LineRecord Text Text Text Word32 MetricValue
deriving (Eq, Show)
-runLineByLine :: GEvalSpecification -> IO ()
-runLineByLine spec = do
- (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
- gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum
- where metric = gesMetric spec
- consum :: Consumer LineRecord (ResourceT IO) ()
- consum = (CL.map (encodeUtf8 . formatOutput) =$= CC.unlinesAscii =$= CC.stdout)
+runLineByLine :: ResultOrdering -> GEvalSpecification -> IO ()
+runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum
+ where consum :: ConduitT LineRecord Void (ResourceT IO) ()
+ consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
formatScore score,
escapeTabs inp,
@@ -45,19 +50,27 @@ runLineByLine spec = do
formatScore :: MetricValue -> Text
formatScore = Data.Text.pack . printf "%f"
-runDiff :: FilePath -> GEvalSpecification -> IO ()
-runDiff otherOut spec = do
- let otherOutFilePath = getOutFile spec otherOut
+runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
+runLineByLineGeneralized ordering spec consum = do
(inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
- let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath
- let sourceB = gevalLineByLineSource metric inputFilePath expectedFilePath otherOutFilePath
- runResourceT $
- ((getZipSource $ (,)
- <$> ZipSource sourceA
- <*> ZipSource sourceB) $$ consum)
+ gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath (sorter ordering .| consum)
where metric = gesMetric spec
- consum :: Consumer (LineRecord, LineRecord) (ResourceT IO) ()
- consum = (CL.filter shouldBeShown =$= CL.map (encodeUtf8 . formatOutput) =$= CC.unlinesAscii =$= CC.stdout)
+ sorter KeepTheOriginalOrder = doNothing
+ sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
+ sortOrder FirstTheWorst TheHigherTheBetter = compareScores
+ sortOrder FirstTheBest TheLowerTheBetter = compareScores
+ sortOrder _ _ = flip compareScores
+ compareScores (LineRecord _ _ _ _ s1) (LineRecord _ _ _ _ s2) = s1 `compare` s2
+
+gobbleAndDo :: Monad m => ([a] -> [b]) -> ConduitT a b m ()
+gobbleAndDo fun = do
+ l <- CC.sinkList
+ CC.yieldMany $ fun l
+
+runDiff :: ResultOrdering -> FilePath -> GEvalSpecification -> IO ()
+runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consum
+ where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
+ consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) =
outA /= outB && scoreA /= scoreB
formatOutput (LineRecord inp exp outA _ scoreA, LineRecord _ _ outB _ scoreB) = Data.Text.intercalate "\t" [
@@ -69,19 +82,40 @@ runDiff otherOut spec = do
formatScoreDiff :: Double -> Text
formatScoreDiff = Data.Text.pack . printf "%f"
+runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a
+runDiffGeneralized ordering otherOut spec consum = do
+ let otherOutFilePath = getOutFile spec otherOut
+ (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
+ let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath
+ let sourceB = gevalLineByLineSource metric inputFilePath expectedFilePath otherOutFilePath
+ runResourceT $ runConduit $
+ ((getZipSource $ (,)
+ <$> ZipSource sourceA
+ <*> ZipSource sourceB) .| sorter ordering .| consum)
+ where metric = gesMetric spec
+ sorter KeepTheOriginalOrder = doNothing
+ sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
+ sortOrder FirstTheWorst TheHigherTheBetter = compareScores
+ sortOrder FirstTheBest TheLowerTheBetter = compareScores
+ sortOrder _ _ = flip compareScores
+ compareScores ((LineRecord _ _ _ _ o1), (LineRecord _ _ _ _ n1))
+ ((LineRecord _ _ _ _ o2), (LineRecord _ _ _ _ n2))
+ = (n1 - o1) `compare` (n2 - o2)
+
+
escapeTabs :: Text -> Text
escapeTabs = Data.Text.replace "\t" ""
-gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> Sink LineRecord (ResourceT IO) () -> IO ()
+gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum =
- runResourceT $
- ((gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath) $$ consum)
+ runResourceT $ runConduit $
+ ((gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath) .| consum)
-gevalLineByLineSource :: Metric -> FilePath -> FilePath -> FilePath -> Source (ResourceT IO) LineRecord
+gevalLineByLineSource :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT () LineRecord (ResourceT IO) ()
gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath =
(getZipSource $ (,)
<$> ZipSource (CL.sourceList [1..])
- <*> (ZipSource $ recordSource context parserSpec)) =$= CL.mapM (checkStepM evaluateLine) =$= CL.catMaybes
+ <*> (ZipSource $ recordSource context parserSpec)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes
where parserSpec = (ParserSpecWithInput (Right . id) (Right . id) (Right . id))
context = (WithInput inputLineSource expectedLineSource outputLineSource)
inputLineSource = fileAsLineSource inputFilePath
diff --git a/src/GEval/LogLossHashed.hs b/src/GEval/LogLossHashed.hs
index 3e092ab..5b3dfcc 100644
--- a/src/GEval/LogLossHashed.hs
+++ b/src/GEval/LogLossHashed.hs
@@ -1,7 +1,7 @@
{-# LANGUAGE OverloadedStrings #-}
module GEval.LogLossHashed
- (HashedDistribution, parseDistribution, calculateLogLoss)
+ (HashedDistribution, parseDistribution, calculateLogLoss, parseWordSpecs, wordSpecToPair)
where
import qualified Data.Vector as V
@@ -36,24 +36,41 @@ parseDistribution nbOfBits seed distroSpec =
-- a direct list of 2^nbOfBits log probs
else parseDistributionFromLogProbList nbOfBits distroSpec
+isProbTotalIncorrect :: Double -> Bool
+isProbTotalIncorrect probTotal = probTotal > 1.0 || probTotal < (1.0 - epsilon)
+ where epsilon = 0.00000001
+
normalizeDistribution :: HashedDistribution -> HashedDistribution
normalizeDistribution distro =
-- we do softmax (if needed)
- if probSum > 1.0 || probSum < (1.0 - epsilon)
+ if isProbTotalIncorrect probSum
then normalized
else distro
where probSum = V.foldl' (\s l -> (s + exp l)) 0.0 distro
normalized = V.map (\v -> log ((exp v) / probSum)) distro
- epsilon = 0.00000001
type DistroMonad s = ReaderT (VM.MVector s Double) (ST s)
data WordSpec = AnyWord | SpecificWord T.Text
+ deriving (Eq, Show)
+
+isAnyWord AnyWord = True
+isAnyWord _ = False
+
data WordSpecWithLogProb = WordSpecWithLogProb WordSpec Double
+wordSpecToPair :: WordSpecWithLogProb -> Maybe (Double, T.Text)
+wordSpecToPair (WordSpecWithLogProb AnyWord _) = Nothing
+wordSpecToPair (WordSpecWithLogProb (SpecificWord w) lp) = Just (lp, w)
+
parseDistributionFromWordList :: Word32 -> Word32 -> T.Text -> Either String HashedDistribution
-parseDistributionFromWordList nbOfBits seed distroSpec = (parseDistributionFromWordList' nbOfBits seed) =<< (
- processEithers $ map getWordSpecWithLogProb $ T.splitOn " " distroSpec)
+parseDistributionFromWordList nbOfBits seed distroSpec = (parseDistributionFromWordList' nbOfBits seed) =<<
+ normalizeLogProbs =<<
+ lookForProbs =<<
+ (parseWordSpecs distroSpec)
+
+parseWordSpecs :: T.Text -> Either String [WordSpecWithLogProb]
+parseWordSpecs distroSpec = processEithers $ map getWordSpecWithLogProb $ T.splitOn " " distroSpec
getWordSpecWithLogProb :: T.Text -> Either String WordSpecWithLogProb
getWordSpecWithLogProb t =
@@ -77,6 +94,42 @@ parseDistributionFromWordList' nbOfBits seed specs = runST $ do
frozen <- V.freeze emp
return $ Right frozen
+lookForProbs :: [WordSpecWithLogProb] -> Either String [WordSpecWithLogProb]
+lookForProbs specs
+ | areProbs specs = Right $ toLogProbs $ normalizeProbs specs
+ | otherwise = Right $ specs
+
+areProbs :: [WordSpecWithLogProb] -> Bool
+areProbs specs = all isProb specs && any isPositiveProb specs
+ where isProb (WordSpecWithLogProb _ v) = v >= 0.0 && v <= 1.0
+ isPositiveProb (WordSpecWithLogProb _ p) = p > 0.0 && p <= 1.0
+
+toLogProbs :: [WordSpecWithLogProb] -> [WordSpecWithLogProb]
+toLogProbs = map (\(WordSpecWithLogProb w p) -> (WordSpecWithLogProb w (log p)))
+
+normalizeLogProbs :: [WordSpecWithLogProb] -> Either String [WordSpecWithLogProb]
+normalizeLogProbs specs = if isProbTotalIncorrect probTotal
+ && probTotal < 1.0 && probTotal > 0.0
+ && not (any (\(WordSpecWithLogProb w _) -> isAnyWord w) specs)
+ && all (\(WordSpecWithLogProb _ lp) -> lp <= 0) specs
+ then
+ Right ((WordSpecWithLogProb AnyWord (log (1-probTotal))):specs)
+ else
+ Right specs
+ where probTotal = sum $ map (\(WordSpecWithLogProb _ logp) -> exp logp) specs
+
+normalizeProbs :: [WordSpecWithLogProb] -> [WordSpecWithLogProb]
+normalizeProbs specs = if isProbTotalIncorrect probTotal
+ then
+ if probTotal > 1.0 || any (\(WordSpecWithLogProb w _) -> isAnyWord w) specs
+ then
+ map (\(WordSpecWithLogProb w p) -> WordSpecWithLogProb w (p / probTotal)) specs
+ else
+ ((WordSpecWithLogProb AnyWord (1-probTotal)):specs)
+ else
+ specs
+ where probTotal = sum $ map (\(WordSpecWithLogProb _ p) -> p) specs
+
addSpecs :: Word32 -> Word32 -> [WordSpecWithLogProb] -> DistroMonad s ()
addSpecs nbOfBits seed = mapM_ (updateDistro nbOfBits seed)
diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs
index d5fe2f8..e46b033 100644
--- a/src/GEval/OptionsParser.hs
+++ b/src/GEval/OptionsParser.hs
@@ -41,6 +41,16 @@ optionsParser = GEvalOptions
<> short 'd'
<> metavar "OTHER-OUT"
<> help "compare results")))
+ <*> ((flag' FirstTheWorst
+ (long "sort"
+ <> short 's'
+ <> help "When in line-by-line or diff mode, sort the results from the worst to the best"))
+ <|>
+ (flag' FirstTheBest
+ (long "reverse-sort"
+ <> short 'r'
+ <> help "When in line-by-line or diff mode, sort the results from the best to the worst"))
+ <|> pure KeepTheOriginalOrder)
<*> specParser
precisionArgParser :: Parser Int
@@ -90,9 +100,13 @@ specParser = GEvalSpecification
<> showDefault
<> metavar "INPUT"
<> help "The name of the file with the input (applicable only for some metrics)" )
- <*> metricReader
+ <*> ((flip fromMaybe) <$> altMetricReader <*> metricReader)
<*> optional precisionArgParser
+sel :: Maybe Metric -> Metric -> Metric
+sel Nothing m = m
+sel (Just m) _ = m
+
metricReader :: Parser Metric
metricReader = option auto
( long "metric"
@@ -100,7 +114,14 @@ metricReader = option auto
<> value defaultMetric
<> showDefault
<> metavar "METRIC"
- <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed or CharMatch" )
+ <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels or CharMatch" )
+
+altMetricReader :: Parser (Maybe Metric)
+altMetricReader = optional $ option auto
+ ( long "alt-metric"
+ <> short 'a'
+ <> metavar "METRIC"
+ <> help "Alternative metric (overrides --metric option)" )
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
runGEval args = do
@@ -145,20 +166,20 @@ attemptToReadOptsFromConfigFile args opts = do
runGEval'' :: GEvalOptions -> IO (Maybe MetricValue)
-runGEval'' opts = runGEval''' (geoSpecialCommand opts) (geoSpec opts)
+runGEval'' opts = runGEval''' (geoSpecialCommand opts) (geoResultOrdering opts) (geoSpec opts)
-runGEval''' :: Maybe GEvalSpecialCommand -> GEvalSpecification -> IO (Maybe MetricValue)
-runGEval''' Nothing spec = do
+runGEval''' :: Maybe GEvalSpecialCommand -> ResultOrdering -> GEvalSpecification -> IO (Maybe MetricValue)
+runGEval''' Nothing _ spec = do
val <- geval spec
return $ Just val
-runGEval''' (Just Init) spec = do
+runGEval''' (Just Init) _ spec = do
initChallenge spec
return Nothing
-runGEval''' (Just LineByLine) spec = do
- runLineByLine spec
+runGEval''' (Just LineByLine) ordering spec = do
+ runLineByLine ordering spec
return Nothing
-runGEval''' (Just (Diff otherOut)) spec = do
- runDiff otherOut spec
+runGEval''' (Just (Diff otherOut)) ordering spec = do
+ runDiff ordering otherOut spec
return Nothing
initChallenge :: GEvalSpecification -> IO ()
@@ -169,11 +190,9 @@ initChallenge spec = case gesExpectedDirectory spec of
showInitInstructions = do
putStrLn [here|
Run:
- geval --init --expected-directory CHALLENGE
+ geval --init --expected-directory CHALLENGE --metric METRIC-NAME --precision NUMBER-OF-DIGITS
to create a directory CHALLENGE representing a Gonito challenge.
-You can specify a metric with `--metric METRIC-NAME` option.
-
-Note that `--out-directory` option is not taken into account with `--init` option.
+(Note that `--out-directory` option is not taken into account with `--init` option.)
|]
exitFailure
diff --git a/src/GEval/PrecisionRecall.hs b/src/GEval/PrecisionRecall.hs
index 8781914..16879b2 100644
--- a/src/GEval/PrecisionRecall.hs
+++ b/src/GEval/PrecisionRecall.hs
@@ -3,7 +3,7 @@
module GEval.PrecisionRecall(calculateMAPForOneResult,
fMeasure, f1Measure, f2Measure, precision, recall,
fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder,
- precisionAndRecall, precisionAndRecallFromCounts, maxMatch)
+ precisionAndRecall, precisionAndRecallFromCounts, maxMatch, maxMatchOnOrdered)
where
import GEval.Common
@@ -65,6 +65,17 @@ precision matchFun expected got = fst $ precisionAndRecall matchFun expected got
recall :: (a -> b -> Bool) -> [a] -> [b] -> Double
recall matchFun expected got = snd $ precisionAndRecall matchFun expected got
+
+maxMatchOnOrdered :: Eq a => (a -> a -> Bool) -> [a] -> [a] -> Int
+maxMatchOnOrdered laterThan expected got =
+ let (matched, _) = foldl' step (0, expected) got
+ in matched
+ where step (matched, l@(h:t)) g
+ | h == g = (matched+1, t)
+ | h `laterThan` g = (matched, l)
+ | otherwise = step (matched, t) g
+ step (matched, []) g = (matched, [])
+
-- counting maximum match with maximum bipartite matching
-- (we build an auxiliary graph and do a max-flow on this)
maxMatch :: (a -> b -> Bool) -> [a] -> [b] -> Int
@@ -72,7 +83,6 @@ maxMatch matchFun expected got = mf
where (b, e, g) = buildGraph matchFun expected got
mf = maxFlow g (fst b) (fst e)
-
buildGraph :: (a -> b -> Bool) -> [a] -> [b] -> (LNode Int, LNode Int, Gr Int Int)
buildGraph matchFun expected got = (b, e, g)
where ((b, e), (_, g)) = buildGraph' matchFun expected got
diff --git a/stack.yaml b/stack.yaml
index 1d9e84d..86aa566 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -1,5 +1,5 @@
flags: {}
packages:
- '.'
-extra-deps: [cond-0.4.1.1,murmur3-1.0.3]
-resolver: lts-9.14
+extra-deps: [murmur3-1.0.3]
+resolver: lts-11.9
diff --git a/test/Spec.hs b/test/Spec.hs
index 1f486f2..6ddf120 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -8,11 +8,15 @@ import GEval.BLEU
import GEval.ClippEU
import GEval.PrecisionRecall
import GEval.ClusteringMetrics
+import GEval.BIO
+import GEval.LineByLine
import Data.Attoparsec.Text
import Options.Applicative
import Data.Text
import Text.EditDistance
+import Data.Conduit.List (consume)
+
import qualified Test.HUnit as HU
import Data.Conduit.SmartSource
@@ -101,6 +105,16 @@ main = hspec $ do
runGEvalTest "log-loss-hashed-simple" `shouldReturnAlmost` 2.398479083333333
it "example with unnormalized values" $ do
runGEvalTest "log-loss-hashed-not-normalized" `shouldReturnAlmost` 1.0468455186722887
+ it "with probs instead of log probs" $ do
+ runGEvalTest "log-loss-hashed-probs" `shouldReturnAlmost` 4.11631293099392
+ it "with probs instead of log probs (with normalization)" $ do
+ runGEvalTest "log-loss-hashed-probs-normalized" `shouldReturnAlmost` 1.55537749098853
+ it "with log probs whose probs are summing up to less than 1.0" $ do
+ runGEvalTest "log-loss-hashed-normalization" `shouldReturnAlmost` 5.16395069238851
+ describe "LikelihoodHashed challenge" $ do
+ it "example with unnormalized values" $ do
+ runGEvalTest "likelihood-hashed-not-normalized" `shouldReturnAlmost` 0.351043364110715
+
describe "reading options" $ do
it "can get the metric" $ do
extractMetric "bleu-complex" `shouldReturn` (Just BLEU)
@@ -188,11 +202,105 @@ main = hspec $ do
runGEvalTest "logloss-simple" `shouldReturnAlmost` 0.31824
it "perfect" $ do
runGEvalTest "logloss-perfect" `shouldReturnAlmost` 0.0
+ describe "Likelihood" $ do
+ it "simple" $ do
+ runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866
describe "evaluating single lines" $ do
it "RMSE" $ do
gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla")
(LineInFile "stub2" 1 "3.4")
(LineInFile "stub3" 1 "2.6") `shouldReturnAlmost` 0.8
+ describe "BIO format" $ do
+ it "just parse" $ do
+ let (Right r) = parseOnly (bioSequenceParser <* endOfInput) "O B-city/NEW_YORK I-city B-city/KALISZ I-city O B-name"
+ r `shouldBe` [Outside,
+ Beginning "city" (Just "NEW_YORK"),
+ Inside "city" Nothing,
+ Beginning "city" (Just "KALISZ"),
+ Inside "city" Nothing,
+ Outside,
+ Beginning "name" Nothing]
+ it "simplest entity" $ do
+ let (Right ents) = parseBioSequenceIntoEntities "B-city"
+ ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "city" Nothing]
+ it "multi-word entity" $ do
+ let (Right ents) = parseBioSequenceIntoEntities "B-date I-date"
+ ents `shouldBe` [TaggedEntity (TaggedSpan 1 2) "date" Nothing]
+ it "multi-word entity with normalized text" $ do
+ let (Right ents) = parseBioSequenceIntoEntities "B-date/FOO I-date/BAR"
+ ents `shouldBe` [TaggedEntity (TaggedSpan 1 2) "date" (Just "FOO_BAR")]
+ it "simplest entity with something outside" $ do
+ let (Right ents) = parseBioSequenceIntoEntities "O B-city"
+ ents `shouldBe` [TaggedEntity (TaggedSpan 2 2) "city" Nothing]
+ it "another simple case" $ do
+ let (Right ents) = parseBioSequenceIntoEntities "B-city B-city"
+ ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "city" Nothing,
+ TaggedEntity (TaggedSpan 2 2) "city" Nothing]
+ it "just parse into entities" $ do
+ let (Right ents) = parseBioSequenceIntoEntities "O O B-city/LOS_ANGELES I-city B-city/KLUCZBORK O B-name O B-person/JOHN I-person/VON I-person/NEUMANN"
+ ents `shouldBe` [TaggedEntity (TaggedSpan 3 4) "city" (Just "LOS_ANGELES"),
+ TaggedEntity (TaggedSpan 5 5) "city" (Just "KLUCZBORK"),
+ TaggedEntity (TaggedSpan 7 7) "name" (Nothing),
+ TaggedEntity (TaggedSpan 9 11) "person" (Just "JOHN_VON_NEUMANN")]
+ it "another entity parse" $ do
+ let (Right ents) = parseBioSequenceIntoEntities "B-month/JULY B-month/JULY O O B-foo/bar"
+ ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "month" (Just "JULY"),
+ TaggedEntity (TaggedSpan 2 2) "month" (Just "JULY"),
+ TaggedEntity (TaggedSpan 5 5) "foo" (Just "bar")]
+ it "another entity parse" $ do
+ let (Right ents) = parseBioSequenceIntoEntities "B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK"
+ ents `shouldBe` [TaggedEntity (TaggedSpan 1 2) "city" (Just "LOS_ANGELES"),
+ TaggedEntity (TaggedSpan 4 5) "city" (Just "NEW_YORK")]
+ it "parse entity" $ do
+ let (Right ents) = parseBioSequenceIntoEntities "B-surname/BROWN B-surname/SMITH"
+ ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "surname" (Just "BROWN"),
+ TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")]
+ it "parse entity" $ do
+ let (Right ents) = parseBioSequenceIntoEntities "O B-surname/SMITH"
+ ents `shouldBe` [TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")]
+ it "check counting" $ do
+ gatherCountsForBIO [TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")] [TaggedEntity (TaggedSpan 1 1) "surname" (Just "BROWN"),
+ TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")] `shouldBe` (1, 1, 2)
+ it "check F1 on a more complicated example" $ do
+ runGEvalTest "bio-f1-complex" `shouldReturnAlmost` 0.625
+ it "check F1 on labels only" $ do
+ runGEvalTest "bio-f1-complex-labels" `shouldReturnAlmost` 0.6666666666
+ it "calculate F1" $ do
+ runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5
+ it "calculate F1 with underscores rather than minus signs" $ do
+ runGEvalTest "bio-f1-simple-underscores" `shouldReturnAlmost` 0.5
+ it "check perfect score" $ do
+ runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0
+ it "check inconsistent input" $ do
+ runGEvalTest "bio-f1-error" `shouldThrow` (== UnexpectedData 2 "inconsistent label sequence `B-NAME/JOHN I-FOO/SMITH I-FOO/X`")
+ describe "automatic decompression" $ do
+ it "more complex test" $ do
+ runGEvalTest "charmatch-complex-compressed" `shouldReturnAlmost` 0.1923076923076923
+ describe "line by line mode" $ do
+ let sampleChallenge =
+ GEvalSpecification
+ { gesOutDirectory = "test/likelihood-simple/likelihood-simple-solution",
+ gesExpectedDirectory = Just "test/likelihood-simple/likelihood-simple",
+ gesTestName = "test-A",
+ gesOutFile = "out.tsv",
+ gesExpectedFile = "expected.tsv",
+ gesInputFile = "in.tsv",
+ gesMetric = Likelihood,
+ gesPrecision = Nothing }
+ it "simple test" $ do
+ results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge Data.Conduit.List.consume
+ Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo",
+ "bar",
+ "baz",
+ "baq"]
+ it "test sorting" $ do
+ results <- runLineByLineGeneralized FirstTheWorst sampleChallenge Data.Conduit.List.consume
+ Prelude.head (Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results) `shouldBe` "baq"
+ describe "handle --alt-metric option" $ do
+ it "accuracy instead of likelihood" $ do
+ runGEvalTestExtraOptions ["--alt-metric", "Accuracy"] "likelihood-simple" `shouldReturnAlmost` 0.75
+ it "accuracy instead of log loss" $ do
+ runGEvalTestExtraOptions ["--alt-metric", "Accuracy"] "log-loss-hashed-probs" `shouldReturnAlmost` 0.4
describe "smart sources" $ do
it "smart specs are parsed" $ do
parseSmartSpec "" `shouldBe` NoSpec
@@ -217,8 +325,8 @@ main = hspec $ do
parseSmartSpecInContext ["foo/bar"] Nothing "" `shouldBe` Nothing
it "sources are accessed" $ do
readFromSmartSource [] Nothing "test/files/foo.txt" `shouldReturn` ["foo\n"]
- readFromSmartSource [] Nothing "https://httpbin.org/robots.txt" `shouldReturn`
- ["User-agent: *\nDisallow: /deny\n"]
+-- readFromSmartSource [] Nothing "https://httpbin.org/robots.txt" `shouldReturn`
+-- ["User-agent: *\nDisallow: /deny\n"]
readFromSmartSource :: [FilePath] -> Maybe FilePath -> String -> IO [String]
readFromSmartSource defaultDirs defaultFile specS = do
@@ -244,11 +352,13 @@ testMatchFun _ _ = False
extractVal :: (Either (ParserResult GEvalOptions) (Maybe MetricValue)) -> IO MetricValue
extractVal (Right (Just val)) = return val
-runGEvalTest testName = (runGEval [
+runGEvalTest = runGEvalTestExtraOptions []
+
+runGEvalTestExtraOptions extraOptions testName = (runGEval ([
"--expected-directory",
"test/" ++ testName ++ "/" ++ testName,
"--out-directory",
- "test/" ++ testName ++ "/" ++ testName ++ "-solution"]) >>= extractVal
+ "test/" ++ testName ++ "/" ++ testName ++ "-solution"] ++ extraOptions)) >>= extractVal
extractMetric :: String -> IO (Maybe Metric)
extractMetric testName = do
diff --git a/test/bio-f1-complex-labels/bio-f1-complex-labels-solution/test-A/out.tsv b/test/bio-f1-complex-labels/bio-f1-complex-labels-solution/test-A/out.tsv
new file mode 100644
index 0000000..0e8b8a1
--- /dev/null
+++ b/test/bio-f1-complex-labels/bio-f1-complex-labels-solution/test-A/out.tsv
@@ -0,0 +1,6 @@
+B-wrong
+B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK_CITY
+B-surname/BROWN B-surname/SMIT
+B-month B-month O O B-foo/bar
+O B-class I-class I-class
+O O
diff --git a/test/bio-f1-complex-labels/bio-f1-complex-labels/config.txt b/test/bio-f1-complex-labels/bio-f1-complex-labels/config.txt
new file mode 100644
index 0000000..eee37bb
--- /dev/null
+++ b/test/bio-f1-complex-labels/bio-f1-complex-labels/config.txt
@@ -0,0 +1 @@
+--metric BIO-F1-Labels
diff --git a/test/bio-f1-complex-labels/bio-f1-complex-labels/test-A/expected.tsv b/test/bio-f1-complex-labels/bio-f1-complex-labels/test-A/expected.tsv
new file mode 100644
index 0000000..c6e719d
--- /dev/null
+++ b/test/bio-f1-complex-labels/bio-f1-complex-labels/test-A/expected.tsv
@@ -0,0 +1,6 @@
+O
+B-city/LOS I-city/ANGELES O B-city/NEW_YORK O
+O B-surname/SMITH
+B-month/JULY O O O B-foo/bar
+O B-class I-class I-class
+O O
diff --git a/test/bio-f1-complex/bio-f1-complex-solution/test-A/out.tsv b/test/bio-f1-complex/bio-f1-complex-solution/test-A/out.tsv
new file mode 100644
index 0000000..f84fa1d
--- /dev/null
+++ b/test/bio-f1-complex/bio-f1-complex-solution/test-A/out.tsv
@@ -0,0 +1,6 @@
+B-wrong
+B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK
+B-surname/BROWN B-surname/SMITH
+B-month/JULY B-month/JULY O O B-foo/bar
+O B-class I-class I-class
+O B-wrong
diff --git a/test/bio-f1-complex/bio-f1-complex/config.txt b/test/bio-f1-complex/bio-f1-complex/config.txt
new file mode 100644
index 0000000..70977e1
--- /dev/null
+++ b/test/bio-f1-complex/bio-f1-complex/config.txt
@@ -0,0 +1 @@
+--metric BIO-F1
diff --git a/test/bio-f1-complex/bio-f1-complex/test-A/expected.tsv b/test/bio-f1-complex/bio-f1-complex/test-A/expected.tsv
new file mode 100644
index 0000000..c6e719d
--- /dev/null
+++ b/test/bio-f1-complex/bio-f1-complex/test-A/expected.tsv
@@ -0,0 +1,6 @@
+O
+B-city/LOS I-city/ANGELES O B-city/NEW_YORK O
+O B-surname/SMITH
+B-month/JULY O O O B-foo/bar
+O B-class I-class I-class
+O O
diff --git a/test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv b/test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv
new file mode 100644
index 0000000..5b95d34
--- /dev/null
+++ b/test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv
@@ -0,0 +1,2 @@
+O B-CITY/WARSZAWA I-CITY/WARSZAWA
+O B-NAME/JOHN I-FOO/SMITH I-FOO/X O
diff --git a/test/bio-f1-error/bio-f1-error/config.txt b/test/bio-f1-error/bio-f1-error/config.txt
new file mode 100644
index 0000000..70977e1
--- /dev/null
+++ b/test/bio-f1-error/bio-f1-error/config.txt
@@ -0,0 +1 @@
+--metric BIO-F1
diff --git a/test/bio-f1-error/bio-f1-error/test-A/expected.tsv b/test/bio-f1-error/bio-f1-error/test-A/expected.tsv
new file mode 100644
index 0000000..f6465b5
--- /dev/null
+++ b/test/bio-f1-error/bio-f1-error/test-A/expected.tsv
@@ -0,0 +1,2 @@
+O B-CITY/WARSZAWA I-CITY/WARSZAWA
+O B-NAME/JOHN I-NAME/SMITH O O
diff --git a/test/bio-f1-perfect/bio-f1-perfect-solution/test-A/out.tsv b/test/bio-f1-perfect/bio-f1-perfect-solution/test-A/out.tsv
new file mode 100644
index 0000000..e0fa64b
--- /dev/null
+++ b/test/bio-f1-perfect/bio-f1-perfect-solution/test-A/out.tsv
@@ -0,0 +1,4 @@
+O O O
+O B-city/NEW I-city/YORK I-city/CITY O B-month/July
+B-surname/SMITH
+B-city/LONDON B-city/PARIS
diff --git a/test/bio-f1-perfect/bio-f1-perfect/config.txt b/test/bio-f1-perfect/bio-f1-perfect/config.txt
new file mode 100644
index 0000000..70977e1
--- /dev/null
+++ b/test/bio-f1-perfect/bio-f1-perfect/config.txt
@@ -0,0 +1 @@
+--metric BIO-F1
diff --git a/test/bio-f1-perfect/bio-f1-perfect/test-A/expected.tsv b/test/bio-f1-perfect/bio-f1-perfect/test-A/expected.tsv
new file mode 100644
index 0000000..e0fa64b
--- /dev/null
+++ b/test/bio-f1-perfect/bio-f1-perfect/test-A/expected.tsv
@@ -0,0 +1,4 @@
+O O O
+O B-city/NEW I-city/YORK I-city/CITY O B-month/July
+B-surname/SMITH
+B-city/LONDON B-city/PARIS
diff --git a/test/bio-f1-simple-underscores/bio-f1-simple-underscores-solution/test-A/out.tsv b/test/bio-f1-simple-underscores/bio-f1-simple-underscores-solution/test-A/out.tsv
new file mode 100644
index 0000000..4380412
--- /dev/null
+++ b/test/bio-f1-simple-underscores/bio-f1-simple-underscores-solution/test-A/out.tsv
@@ -0,0 +1,3 @@
+O O B_city/POZNAŃ O O B_date/MARCH I_date/12
+B_city/BUK O O O
+B_name/FOO O B_surname/KOWALSKI
diff --git a/test/bio-f1-simple-underscores/bio-f1-simple-underscores/config.txt b/test/bio-f1-simple-underscores/bio-f1-simple-underscores/config.txt
new file mode 100644
index 0000000..70977e1
--- /dev/null
+++ b/test/bio-f1-simple-underscores/bio-f1-simple-underscores/config.txt
@@ -0,0 +1 @@
+--metric BIO-F1
diff --git a/test/bio-f1-simple-underscores/bio-f1-simple-underscores/test-A/expected.tsv b/test/bio-f1-simple-underscores/bio-f1-simple-underscores/test-A/expected.tsv
new file mode 100644
index 0000000..31de6dc
--- /dev/null
+++ b/test/bio-f1-simple-underscores/bio-f1-simple-underscores/test-A/expected.tsv
@@ -0,0 +1,3 @@
+O O B_city/POZNAŃ O O B_date/MARCH I_date/12
+O O O O
+O B_city/KONIN O
diff --git a/test/bio-f1-simple/bio-f1-simple-solution/test-A/out.tsv b/test/bio-f1-simple/bio-f1-simple-solution/test-A/out.tsv
new file mode 100644
index 0000000..0666d27
--- /dev/null
+++ b/test/bio-f1-simple/bio-f1-simple-solution/test-A/out.tsv
@@ -0,0 +1,3 @@
+O O B-city/POZNAŃ O O B-date/MARCH I-date/12
+B-city/BUK O O O
+B-name/FOO O B-surname/KOWALSKI
diff --git a/test/bio-f1-simple/bio-f1-simple/config.txt b/test/bio-f1-simple/bio-f1-simple/config.txt
new file mode 100644
index 0000000..70977e1
--- /dev/null
+++ b/test/bio-f1-simple/bio-f1-simple/config.txt
@@ -0,0 +1 @@
+--metric BIO-F1
diff --git a/test/bio-f1-simple/bio-f1-simple/test-A/expected.tsv b/test/bio-f1-simple/bio-f1-simple/test-A/expected.tsv
new file mode 100644
index 0000000..659986b
--- /dev/null
+++ b/test/bio-f1-simple/bio-f1-simple/test-A/expected.tsv
@@ -0,0 +1,3 @@
+O O B-city/POZNAŃ O O B-date/MARCH I-date/12
+O O O O
+O B-city/KONIN O
diff --git a/test/charmatch-complex-compressed/charmatch-complex-compressed-solution/test-A/out.tsv.bz2 b/test/charmatch-complex-compressed/charmatch-complex-compressed-solution/test-A/out.tsv.bz2
new file mode 100644
index 0000000..f74ec2e
Binary files /dev/null and b/test/charmatch-complex-compressed/charmatch-complex-compressed-solution/test-A/out.tsv.bz2 differ
diff --git a/test/charmatch-complex-compressed/charmatch-complex-compressed/config.txt b/test/charmatch-complex-compressed/charmatch-complex-compressed/config.txt
new file mode 100644
index 0000000..ceb23f3
--- /dev/null
+++ b/test/charmatch-complex-compressed/charmatch-complex-compressed/config.txt
@@ -0,0 +1 @@
+--metric CharMatch
diff --git a/test/charmatch-complex-compressed/charmatch-complex-compressed/test-A/expected.tsv.gz b/test/charmatch-complex-compressed/charmatch-complex-compressed/test-A/expected.tsv.gz
new file mode 100644
index 0000000..6457aec
Binary files /dev/null and b/test/charmatch-complex-compressed/charmatch-complex-compressed/test-A/expected.tsv.gz differ
diff --git a/test/charmatch-complex-compressed/charmatch-complex-compressed/test-A/in.tsv.xz b/test/charmatch-complex-compressed/charmatch-complex-compressed/test-A/in.tsv.xz
new file mode 100644
index 0000000..34c5c1e
Binary files /dev/null and b/test/charmatch-complex-compressed/charmatch-complex-compressed/test-A/in.tsv.xz differ
diff --git a/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized-solution/test-A/out.tsv b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized-solution/test-A/out.tsv
new file mode 100644
index 0000000..02c35e7
--- /dev/null
+++ b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized-solution/test-A/out.tsv
@@ -0,0 +1,2 @@
+tak:10 nie:8.9
+niebieski:0 żółty:1.5 czerwony:-0.5
diff --git a/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/config.txt b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/config.txt
new file mode 100644
index 0000000..00b7f18
--- /dev/null
+++ b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/config.txt
@@ -0,0 +1 @@
+--metric LikelihoodHashed8
diff --git a/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/test-A/expected.tsv b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/test-A/expected.tsv
new file mode 100644
index 0000000..70d9d14
--- /dev/null
+++ b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/test-A/expected.tsv
@@ -0,0 +1,2 @@
+tak
+niebieski
diff --git a/test/likelihood-simple/likelihood-simple-solution/test-A/out.tsv b/test/likelihood-simple/likelihood-simple-solution/test-A/out.tsv
new file mode 100644
index 0000000..6978be2
--- /dev/null
+++ b/test/likelihood-simple/likelihood-simple-solution/test-A/out.tsv
@@ -0,0 +1,4 @@
+0.7
+0
+0.0
+0.6
diff --git a/test/likelihood-simple/likelihood-simple/config.txt b/test/likelihood-simple/likelihood-simple/config.txt
new file mode 100644
index 0000000..34e7898
--- /dev/null
+++ b/test/likelihood-simple/likelihood-simple/config.txt
@@ -0,0 +1 @@
+--metric Likelihood
diff --git a/test/likelihood-simple/likelihood-simple/test-A/expected.tsv b/test/likelihood-simple/likelihood-simple/test-A/expected.tsv
new file mode 100644
index 0000000..968ac3e
--- /dev/null
+++ b/test/likelihood-simple/likelihood-simple/test-A/expected.tsv
@@ -0,0 +1,4 @@
+1
+0
+0
+0
diff --git a/test/likelihood-simple/likelihood-simple/test-A/in.tsv b/test/likelihood-simple/likelihood-simple/test-A/in.tsv
new file mode 100644
index 0000000..655a059
--- /dev/null
+++ b/test/likelihood-simple/likelihood-simple/test-A/in.tsv
@@ -0,0 +1,4 @@
+foo
+bar
+baz
+baq
diff --git a/test/log-loss-hashed-normalization/log-loss-hashed-normalization-solution/test-A/out.tsv b/test/log-loss-hashed-normalization/log-loss-hashed-normalization-solution/test-A/out.tsv
new file mode 100644
index 0000000..22e39a7
--- /dev/null
+++ b/test/log-loss-hashed-normalization/log-loss-hashed-normalization-solution/test-A/out.tsv
@@ -0,0 +1,3 @@
+B:-1.20397280432594 A:-0.916290731874155
+A:-2.3025850929940 C:-1.6094379124341
+A:-2.3025850929940 C:-1.6094379124341 :-0.356674943938732
diff --git a/test/log-loss-hashed-normalization/log-loss-hashed-normalization/config.txt b/test/log-loss-hashed-normalization/log-loss-hashed-normalization/config.txt
new file mode 100644
index 0000000..8121298
--- /dev/null
+++ b/test/log-loss-hashed-normalization/log-loss-hashed-normalization/config.txt
@@ -0,0 +1 @@
+--metric LogLossHashed10
diff --git a/test/log-loss-hashed-normalization/log-loss-hashed-normalization/test-A/expected.tsv b/test/log-loss-hashed-normalization/log-loss-hashed-normalization/test-A/expected.tsv
new file mode 100644
index 0000000..3042490
--- /dev/null
+++ b/test/log-loss-hashed-normalization/log-loss-hashed-normalization/test-A/expected.tsv
@@ -0,0 +1,3 @@
+A
+B
+B
diff --git a/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized-solution/test-A/out.tsv b/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized-solution/test-A/out.tsv
new file mode 100644
index 0000000..d1dbc6f
--- /dev/null
+++ b/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized-solution/test-A/out.tsv
@@ -0,0 +1,3 @@
+1:0.5 2:0.6 3:1.0 4:1.0 5:0.9
+1:0.3 2:0.2 3:0.3
+1:0.2 :0.6
diff --git a/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/config.txt b/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/config.txt
new file mode 100644
index 0000000..8121298
--- /dev/null
+++ b/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/config.txt
@@ -0,0 +1 @@
+--metric LogLossHashed10
diff --git a/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/test-A/expected.tsv b/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/test-A/expected.tsv
new file mode 100644
index 0000000..39a8593
--- /dev/null
+++ b/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/test-A/expected.tsv
@@ -0,0 +1,3 @@
+1
+3
+1
diff --git a/test/log-loss-hashed-probs/log-loss-hashed-probs-solution/test-A/out.tsv b/test/log-loss-hashed-probs/log-loss-hashed-probs-solution/test-A/out.tsv
new file mode 100644
index 0000000..2658337
--- /dev/null
+++ b/test/log-loss-hashed-probs/log-loss-hashed-probs-solution/test-A/out.tsv
@@ -0,0 +1,5 @@
+A:0.6 B:0.4
+C:0.2 A:0.1
+A:0.4 C:0.4
+D:1.0
+C:0.4 B:0.5 :0.1
diff --git a/test/log-loss-hashed-probs/log-loss-hashed-probs/config.txt b/test/log-loss-hashed-probs/log-loss-hashed-probs/config.txt
new file mode 100644
index 0000000..8121298
--- /dev/null
+++ b/test/log-loss-hashed-probs/log-loss-hashed-probs/config.txt
@@ -0,0 +1 @@
+--metric LogLossHashed10
diff --git a/test/log-loss-hashed-probs/log-loss-hashed-probs/test-A/expected.tsv b/test/log-loss-hashed-probs/log-loss-hashed-probs/test-A/expected.tsv
new file mode 100644
index 0000000..b8ccfd3
--- /dev/null
+++ b/test/log-loss-hashed-probs/log-loss-hashed-probs/test-A/expected.tsv
@@ -0,0 +1,5 @@
+A
+A
+B
+D
+A