From 06fd09334965a4ae2d6c05817265fa8f93535299 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Tue, 15 May 2018 08:07:47 +0200
Subject: [PATCH 01/24] probs can be given for LogLossHashed

---
 src/GEval/LogLossHashed.hs                    | 42 +++++++++++++++++--
 test/Spec.hs                                  |  4 ++
 .../test-A/out.tsv                            |  3 ++
 .../config.txt                                |  1 +
 .../test-A/expected.tsv                       |  3 ++
 .../test-A/out.tsv                            |  5 +++
 .../log-loss-hashed-probs/config.txt          |  1 +
 .../log-loss-hashed-probs/test-A/expected.tsv |  5 +++
 8 files changed, 60 insertions(+), 4 deletions(-)
 create mode 100644 test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized-solution/test-A/out.tsv
 create mode 100644 test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/config.txt
 create mode 100644 test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/test-A/expected.tsv
 create mode 100644 test/log-loss-hashed-probs/log-loss-hashed-probs-solution/test-A/out.tsv
 create mode 100644 test/log-loss-hashed-probs/log-loss-hashed-probs/config.txt
 create mode 100644 test/log-loss-hashed-probs/log-loss-hashed-probs/test-A/expected.tsv

diff --git a/src/GEval/LogLossHashed.hs b/src/GEval/LogLossHashed.hs
index 3e092ab..bed3c7b 100644
--- a/src/GEval/LogLossHashed.hs
+++ b/src/GEval/LogLossHashed.hs
@@ -36,24 +36,33 @@ parseDistribution nbOfBits seed distroSpec =
                             -- a direct list of 2^nbOfBits log probs
                             else parseDistributionFromLogProbList nbOfBits distroSpec
 
+isProbTotalIncorrect :: Double -> Bool
+isProbTotalIncorrect probTotal = probTotal > 1.0 || probTotal < (1.0 - epsilon)
+   where epsilon = 0.00000001
+
 normalizeDistribution :: HashedDistribution -> HashedDistribution
 normalizeDistribution distro =
   -- we do softmax (if needed)
-  if probSum > 1.0 || probSum < (1.0 - epsilon)
+  if isProbTotalIncorrect probSum
   then normalized
   else distro
   where probSum = V.foldl' (\s l -> (s + exp l)) 0.0 distro
         normalized = V.map (\v -> log ((exp v) / probSum)) distro
-        epsilon = 0.00000001
 
 type DistroMonad s = ReaderT (VM.MVector s Double) (ST s)
 
 data WordSpec = AnyWord | SpecificWord T.Text
+                deriving (Eq, Show)
+
+isAnyWord AnyWord = True
+isAnyWord _ = False
+
 data WordSpecWithLogProb = WordSpecWithLogProb WordSpec Double
 
 parseDistributionFromWordList :: Word32 -> Word32 -> T.Text -> Either String HashedDistribution
-parseDistributionFromWordList nbOfBits seed distroSpec = (parseDistributionFromWordList' nbOfBits seed) =<< (
-  processEithers $ map getWordSpecWithLogProb $ T.splitOn " " distroSpec)
+parseDistributionFromWordList nbOfBits seed distroSpec = (parseDistributionFromWordList' nbOfBits seed) =<<
+  lookForProbs =<<
+  (processEithers $ map getWordSpecWithLogProb $ T.splitOn " " distroSpec)
 
 getWordSpecWithLogProb :: T.Text -> Either String WordSpecWithLogProb
 getWordSpecWithLogProb t =
@@ -77,6 +86,31 @@ parseDistributionFromWordList' nbOfBits seed specs = runST $ do
   frozen <- V.freeze emp
   return $ Right frozen
 
+lookForProbs :: [WordSpecWithLogProb] -> Either String [WordSpecWithLogProb]
+lookForProbs specs
+  | areProbs specs = Right $ toLogProbs $ normalizeProbs specs
+  | otherwise = Right $ specs
+
+areProbs :: [WordSpecWithLogProb] -> Bool
+areProbs specs = all isProb specs && any isPositiveProb specs
+  where isProb (WordSpecWithLogProb _ v) = v >= 0.0 && v <= 1.0
+        isPositiveProb (WordSpecWithLogProb _ p) = p > 0.0 && p <= 1.0
+
+toLogProbs :: [WordSpecWithLogProb] -> [WordSpecWithLogProb]
+toLogProbs = map (\(WordSpecWithLogProb w p) -> (WordSpecWithLogProb w (log p)))
+
+normalizeProbs :: [WordSpecWithLogProb] -> [WordSpecWithLogProb]
+normalizeProbs specs = if isProbTotalIncorrect probTotal
+                         then
+                           if probTotal > 1.0 || any (\(WordSpecWithLogProb w _) -> isAnyWord w) specs
+                             then
+                               map (\(WordSpecWithLogProb w p) -> WordSpecWithLogProb w (p / probTotal)) specs
+                             else
+                               ((WordSpecWithLogProb AnyWord (1-probTotal)):specs)
+                         else
+                           specs
+  where probTotal = sum $ map (\(WordSpecWithLogProb _ p) -> p) specs
+
 addSpecs :: Word32 -> Word32 -> [WordSpecWithLogProb] -> DistroMonad s ()
 addSpecs nbOfBits seed = mapM_ (updateDistro nbOfBits seed)
 
diff --git a/test/Spec.hs b/test/Spec.hs
index 9be9045..27b6653 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -95,6 +95,10 @@ main = hspec $ do
       runGEvalTest "log-loss-hashed-simple" `shouldReturnAlmost` 2.398479083333333
     it "example with unnormalized values" $ do
       runGEvalTest "log-loss-hashed-not-normalized" `shouldReturnAlmost` 1.0468455186722887
+    it "with probs instead of log probs" $ do
+      runGEvalTest "log-loss-hashed-probs" `shouldReturnAlmost` 4.11631293099392
+    it "with probs instead of log probs (with normalization)" $ do
+      runGEvalTest "log-loss-hashed-probs-normalized" `shouldReturnAlmost` 1.55537749098853
   describe "reading options" $ do
     it "can get the metric" $ do
       extractMetric "bleu-complex" `shouldReturn` (Just BLEU)
diff --git a/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized-solution/test-A/out.tsv b/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized-solution/test-A/out.tsv
new file mode 100644
index 0000000..d1dbc6f
--- /dev/null
+++ b/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized-solution/test-A/out.tsv
@@ -0,0 +1,3 @@
+1:0.5 2:0.6 3:1.0 4:1.0 5:0.9
+1:0.3 2:0.2 3:0.3
+1:0.2 :0.6
diff --git a/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/config.txt b/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/config.txt
new file mode 100644
index 0000000..8121298
--- /dev/null
+++ b/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/config.txt
@@ -0,0 +1 @@
+--metric LogLossHashed10
diff --git a/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/test-A/expected.tsv b/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/test-A/expected.tsv
new file mode 100644
index 0000000..39a8593
--- /dev/null
+++ b/test/log-loss-hashed-probs-normalized/log-loss-hashed-probs-normalized/test-A/expected.tsv
@@ -0,0 +1,3 @@
+1
+3
+1
diff --git a/test/log-loss-hashed-probs/log-loss-hashed-probs-solution/test-A/out.tsv b/test/log-loss-hashed-probs/log-loss-hashed-probs-solution/test-A/out.tsv
new file mode 100644
index 0000000..2658337
--- /dev/null
+++ b/test/log-loss-hashed-probs/log-loss-hashed-probs-solution/test-A/out.tsv
@@ -0,0 +1,5 @@
+A:0.6 B:0.4
+C:0.2 A:0.1
+A:0.4 C:0.4
+D:1.0
+C:0.4 B:0.5 :0.1
diff --git a/test/log-loss-hashed-probs/log-loss-hashed-probs/config.txt b/test/log-loss-hashed-probs/log-loss-hashed-probs/config.txt
new file mode 100644
index 0000000..8121298
--- /dev/null
+++ b/test/log-loss-hashed-probs/log-loss-hashed-probs/config.txt
@@ -0,0 +1 @@
+--metric LogLossHashed10
diff --git a/test/log-loss-hashed-probs/log-loss-hashed-probs/test-A/expected.tsv b/test/log-loss-hashed-probs/log-loss-hashed-probs/test-A/expected.tsv
new file mode 100644
index 0000000..b8ccfd3
--- /dev/null
+++ b/test/log-loss-hashed-probs/log-loss-hashed-probs/test-A/expected.tsv
@@ -0,0 +1,5 @@
+A
+A
+B
+D
+A

From 9fc4beaba169eb9d443f17ea245861dbf1d06565 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Tue, 15 May 2018 08:14:52 +0200
Subject: [PATCH 02/24] improve sample challenge for LogLossHashed

---
 src/GEval/CreateChallenge.hs | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs
index b0e53a5..3b31b3f 100644
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@@ -113,6 +113,35 @@ The metric is average log-loss calculated for 10-bit hashes.
 Train file is a just text file (one utterance per line).
 In an input file, left and right contexts (TAB-separated) are given.
 In an expected file, the word to be guessed is given.
+
+Format of the output files
+--------------------------
+
+For each input line, a probability distribution for words in a gap
+must be given:
+
+    word1:logprob1 word2:logprob2 ... wordN:logprobN :logprob0
+
+where *logprobi* is the logarithm of the probability for *wordi* and
+*logprob0* is the logarithm of the probability mass for all the other
+words (it will be spread between all 1024 fingerprint values). If the
+respective probabilities do not sum up to 1, they will be normalised with
+softmax.
+
+Note: the separator here is space, not TAB!
+
+### Probs
+
+Probabilities could be given (instead of logprobs):
+
+  * if **all** values look as probs and **at least value** is positive, we treat
+    the values as probs rather then logprobs (single value 0.0 is treated
+    as a logprob, i.e. probability 1.0!);
+  * if their sum is greater than 1.0, then we normalize simply by dividing by the sum;
+  * if the sum is smaller than 1.0 and there is no entry for all the other words,
+    we add such an entry for the missing probability mass;
+  * if the sum is smaller than 1.0 and there is an entry for all the other words,
+    we normalize by dividing by the sum.
 |] ++ (commonReadmeMDContents testName)
 
 readmeMDContents CharMatch testName = [i|

From a1c357948e8dd7ae615ffab2bd10f25ad7328ea1 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Tue, 15 May 2018 08:16:51 +0200
Subject: [PATCH 03/24] bump up version

---
 geval.cabal | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/geval.cabal b/geval.cabal
index 42531ec..8cfc841 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -1,5 +1,5 @@
 name:                geval
-version:             0.5.4.0
+version:             0.5.5.0
 synopsis:            Machine learning evaluation tools
 description:         Please see README.md
 homepage:            http://github.com/name/project

From 82e794ae3c53024116c2a5fca3a18fa6378d25e6 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Tue, 15 May 2018 09:38:13 +0200
Subject: [PATCH 04/24] implement BIO-F1

---
 geval.cabal                                   |  1 +
 src/GEval/BIO.hs                              | 99 +++++++++++++++++++
 src/GEval/Core.hs                             | 13 ++-
 src/GEval/CreateChallenge.hs                  | 28 ++++++
 src/GEval/OptionsParser.hs                    |  2 +-
 src/GEval/PrecisionRecall.hs                  | 14 ++-
 test/Spec.hs                                  | 59 ++++++++++-
 .../bio-f1-complex-solution/test-A/out.tsv    |  6 ++
 test/bio-f1-complex/bio-f1-complex/config.txt |  1 +
 .../bio-f1-complex/test-A/expected.tsv        |  6 ++
 .../bio-f1-perfect-solution/test-A/out.tsv    |  4 +
 test/bio-f1-perfect/bio-f1-perfect/config.txt |  1 +
 .../bio-f1-perfect/test-A/expected.tsv        |  4 +
 .../bio-f1-simple-solution/test-A/out.tsv     |  3 +
 test/bio-f1-simple/bio-f1-simple/config.txt   |  1 +
 .../bio-f1-simple/test-A/expected.tsv         |  3 +
 16 files changed, 239 insertions(+), 6 deletions(-)
 create mode 100644 src/GEval/BIO.hs
 create mode 100644 test/bio-f1-complex/bio-f1-complex-solution/test-A/out.tsv
 create mode 100644 test/bio-f1-complex/bio-f1-complex/config.txt
 create mode 100644 test/bio-f1-complex/bio-f1-complex/test-A/expected.tsv
 create mode 100644 test/bio-f1-perfect/bio-f1-perfect-solution/test-A/out.tsv
 create mode 100644 test/bio-f1-perfect/bio-f1-perfect/config.txt
 create mode 100644 test/bio-f1-perfect/bio-f1-perfect/test-A/expected.tsv
 create mode 100644 test/bio-f1-simple/bio-f1-simple-solution/test-A/out.tsv
 create mode 100644 test/bio-f1-simple/bio-f1-simple/config.txt
 create mode 100644 test/bio-f1-simple/bio-f1-simple/test-A/expected.tsv

diff --git a/geval.cabal b/geval.cabal
index 8cfc841..23e3672 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -26,6 +26,7 @@ library
                      , GEval.LogLossHashed
                      , GEval.CharMatch
                      , GEval.LineByLine
+                     , GEval.BIO
   build-depends:       base >= 4.7 && < 5
                      , cond
                      , conduit
diff --git a/src/GEval/BIO.hs b/src/GEval/BIO.hs
new file mode 100644
index 0000000..a3e8819
--- /dev/null
+++ b/src/GEval/BIO.hs
@@ -0,0 +1,99 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module GEval.BIO
+       (BIOLabel(..), bioSequenceParser, parseBioSequenceIntoEntities, TaggedSpan(..), TaggedEntity(..), gatherCountsForBIO)
+       where
+
+import GEval.PrecisionRecall
+
+import qualified Data.Text as T
+
+import Data.Attoparsec.Text
+import Data.Attoparsec.Combinator
+import Control.Applicative
+import Data.Char
+import Data.Maybe (catMaybes)
+
+import GEval.Common
+
+data BIOLabel = Outside | Beginning T.Text (Maybe T.Text) | Inside T.Text (Maybe T.Text)
+                deriving (Eq, Show)
+
+data TaggedSpan = TaggedSpan Int Int
+                  deriving (Eq, Show)
+
+data TaggedEntity = TaggedEntity TaggedSpan T.Text (Maybe T.Text)
+                    deriving (Eq, Show)
+
+gatherCountsForBIO :: [TaggedEntity] -> [TaggedEntity] -> (Int, Int, Int)
+gatherCountsForBIO expected got = (maxMatchOnOrdered laterThan expected got, length expected, length got)
+  where
+    laterThan (TaggedEntity (TaggedSpan a _) _ _) (TaggedEntity (TaggedSpan b _) _ _) = a > b
+
+parseBioSequenceIntoEntities :: T.Text -> Either String [TaggedEntity]
+parseBioSequenceIntoEntities t = labelsIntoEntities =<< (parseOnly (bioSequenceParser <* endOfInput) t)
+
+labelsIntoEntities :: [BIOLabel] -> Either String [TaggedEntity]
+labelsIntoEntities labels = labelsIntoEntities' $ zip labels [1..]
+
+labelsIntoEntities' :: [(BIOLabel, Int)] -> Either String [TaggedEntity]
+labelsIntoEntities' labelsWithPositions = mapM labelSplitToEntity labelsGathered
+  where labelsGathered = splitLabels labelsWithPositions
+
+labelSplitToEntity :: [(BIOLabel, Int)] -> Either String TaggedEntity
+labelSplitToEntity labs@(h@(_,begIx):t) = if isBeginning h && all (\tp -> isInside tp && tt tp == btp) t
+                                     then
+                                       Right $ TaggedEntity (TaggedSpan begIx lastItemIx) btp mNormalized
+                                     else
+                                       Left "something wrong with label sequence"
+  where isBeginning (Beginning _ _, _) = True
+        isBeginning _ = False
+        isInside (Inside _ _, _) = True
+        isInside _ = False
+        tt (Beginning t _, _) = t
+        tt (Inside t _, _) = t
+        btp = tt h
+        lastItemIx = case t of
+          [] -> begIx
+          _ -> let (_, ix) = last t
+              in ix
+        normalized (Beginning _ n, _) = n
+        normalized (Inside _ n, _) = n
+        mNormalized = if all (\tp -> normalized tp == Nothing) labs
+                        then
+                          Nothing
+                        else
+                          Just $ T.intercalate "_" $ catMaybes $ map normalized labs
+
+splitLabels :: [(BIOLabel, Int)] -> [[(BIOLabel, Int)]]
+splitLabels [] = []
+splitLabels ((Outside, _):r) = splitLabels r
+splitLabels (e@(_, ix):r) =
+  case splitLabels r of
+    l@(((Beginning _ _, _):_):_) -> ([e]:l)
+    (s@((Inside _ _, ix'):_):l) -> if ix' == ix + 1
+                                    then
+                                      ((e:s):l)
+                                    else
+                                      ([e]:(s:l))
+    [] -> [[e]]
+
+bioSequenceParser :: Parser [BIOLabel]
+bioSequenceParser = sepByWhitespaces bioLabelParser
+
+bioLabelParser :: Parser BIOLabel
+bioLabelParser =
+  (string "O" *> pure Outside) <|>
+  (do
+      labelType <- bioMarkerParser
+      string "-"
+      label <- takeWhile1 (\c -> not (isSpace c) && c /= '/')
+      normalized <- (do
+                       string "/"
+                       normalized <- takeWhile1 (not . isSpace)
+                       return $ Just normalized) <|> pure Nothing
+      return $ labelType label normalized)
+
+bioMarkerParser :: Parser (T.Text -> Maybe T.Text -> BIOLabel)
+bioMarkerParser =
+  (string "B" *> pure Beginning) <|> (string "I" *> pure Inside)
diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs
index 6b29f7e..2ba9b27 100644
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@@ -53,6 +53,7 @@ import qualified System.Directory as D
 import System.Posix
 import System.FilePath
 import Data.Maybe
+import Data.Tuple
 import qualified Data.List.Split as DLS
 
 import Control.Monad.IO.Class
@@ -67,6 +68,7 @@ import GEval.PrecisionRecall
 import GEval.ClusteringMetrics
 import GEval.LogLossHashed
 import GEval.CharMatch
+import GEval.BIO
 
 import qualified Data.HashMap.Strict as M
 
@@ -80,7 +82,7 @@ defaultLogLossHashedSize :: Word32
 defaultLogLossHashedSize = 10
 
 data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch
-              | MAP | LogLoss
+              | MAP | LogLoss | BIOF1
               deriving (Eq)
 
 instance Show Metric where
@@ -100,6 +102,7 @@ instance Show Metric where
   show CharMatch = "CharMatch"
   show MAP = "MAP"
   show LogLoss = "LogLoss"
+  show BIOF1 = "BIO-F1"
 
 instance Read Metric where
   readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
@@ -117,6 +120,7 @@ instance Read Metric where
   readsPrec _ ('L':'o':'g':'L':'o':'s':'s':theRest) = [(LogLoss, theRest)]
   readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
   readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
+  readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
 
 data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
 
@@ -132,6 +136,7 @@ getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
 getMetricOrdering CharMatch = TheHigherTheBetter
 getMetricOrdering MAP = TheHigherTheBetter
 getMetricOrdering LogLoss = TheLowerTheBetter
+getMetricOrdering BIOF1 = TheHigherTheBetter
 
 defaultOutDirectory = "."
 defaultTestName = "test-A"
@@ -381,7 +386,11 @@ gevalCore' CharMatch inputLineSource = helper inputLineSource
    helper inputLineSource expectedLineSource outputLineSource = do
      gevalCoreGeneralized (ParserSpecWithInput (Right . unpack) (Right . unpack) (Right . unpack)) step countAgg (fMeasureOnCounts charMatchBeta) (WithInput inputLineSource expectedLineSource outputLineSource)
    step (ParsedRecordWithInput inp exp out) = getCharMatchCount inp exp out
-   countAgg = CC.foldl countFolder (0, 0, 0)
+
+gevalCore' BIOF1 _ = gevalCoreWithoutInput parseBioSequenceIntoEntities parseBioSequenceIntoEntities (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts
+
+countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
+countAgg = CC.foldl countFolder (0, 0, 0)
 
 parseDistributionWrapper :: Word32 -> Word32 -> Text -> HashedDistribution
 parseDistributionWrapper nbOfBits seed distroSpec = case parseDistribution nbOfBits seed distroSpec of
diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs
index 3b31b3f..09a85e4 100644
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@@ -199,6 +199,18 @@ This a sample challenge for the log-loss metric.
 
 |] ++ (commonReadmeMDContents testName)
 
+readmeMDContents BIOF1 testName = [i|
+Tag and normalize names
+=======================
+
+Tag names in the tokenized text and normalized them.
+
+The output should be given in the BIO format with the normalized forms given after slashes (see
+`dev-0/expected.tsv` for an example).
+
+The metric is F1 counted on entities (not labels).
+|] ++ (commonReadmeMDContents testName)
+
 readmeMDContents _ testName = [i|
 GEval sample challenge
 ======================
@@ -288,6 +300,10 @@ trainContents LogLoss = [hereLit|0.0	Hell, no!!!
 1.0	Lekker!!!
 0.0	Boring, boring, boring
 |]
+trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surname/BOND	My name is Bond , James Bond
+O O O O O	There is no name here
+B-firstname/JOHN I-surname/VON I-surname/NEUMANN	John von Nueman
+|]
 trainContents _ = [hereLit|0.06	0.39	0	0.206
 1.00	1.00	1	0.017
 317.8	5.20	67	0.048
@@ -323,6 +339,9 @@ devInContents LogLoss = [hereLit|Great stuff!
 Boring stuff
 That's good
 |]
+devInContents BIOF1 = [hereLit|Adam and Eve
+Mr Jan Kowalski
+|]
 devInContents _ = [hereLit|0.72	0	0.007
 9.54	62	0.054
 |]
@@ -356,6 +375,9 @@ devExpectedContents LogLoss = [hereLit|1.0
 0.0
 1.0
 |]
+devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE
+O B-firstname/JAN B-surname/KOWALSKI
+|]
 devExpectedContents _ = [hereLit|0.82
 95.2
 |]
@@ -391,6 +413,9 @@ testInContents LogLoss = [hereLit|That's great, ha, ha, I love it!
 Super-duper!!
 That is incredibly boring.
 |]
+testInContents BIOF1 = [hereLit|Alan Tring
+No name here
+|]
 testInContents _ = [hereLit|1.52	2	0.093
 30.06	14	0.009
 |]
@@ -426,6 +451,9 @@ testExpectedContents LogLoss = [hereLit|1.0
 1.0
 0.0
 |]
+testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING
+O O O
+|]
 testExpectedContents _ = [hereLit|0.11
 17.2
 |]
diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs
index d5fe2f8..bad86fe 100644
--- a/src/GEval/OptionsParser.hs
+++ b/src/GEval/OptionsParser.hs
@@ -100,7 +100,7 @@ metricReader = option auto
                  <> value defaultMetric
                  <> showDefault
                  <> metavar "METRIC"
-                 <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed or CharMatch" )
+                 <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, BIO-F1 or CharMatch" )
 
 runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
 runGEval args = do
diff --git a/src/GEval/PrecisionRecall.hs b/src/GEval/PrecisionRecall.hs
index 8781914..16879b2 100644
--- a/src/GEval/PrecisionRecall.hs
+++ b/src/GEval/PrecisionRecall.hs
@@ -3,7 +3,7 @@
 module GEval.PrecisionRecall(calculateMAPForOneResult,
                              fMeasure, f1Measure, f2Measure, precision, recall,
                              fMeasureOnCounts, f1MeasureOnCounts, f2MeasureOnCounts, countFolder,
-                             precisionAndRecall, precisionAndRecallFromCounts, maxMatch)
+                             precisionAndRecall, precisionAndRecallFromCounts, maxMatch, maxMatchOnOrdered)
        where
 
 import GEval.Common
@@ -65,6 +65,17 @@ precision matchFun expected got = fst $ precisionAndRecall matchFun expected got
 recall :: (a -> b -> Bool) -> [a] -> [b] -> Double
 recall matchFun expected got = snd $ precisionAndRecall matchFun expected got
 
+
+maxMatchOnOrdered :: Eq a => (a -> a -> Bool) -> [a] -> [a] -> Int
+maxMatchOnOrdered laterThan expected got =
+   let (matched, _) = foldl' step (0, expected) got
+   in matched
+         where step (matched, l@(h:t)) g
+                | h == g = (matched+1, t)
+                | h `laterThan` g  = (matched, l)
+                | otherwise = step (matched, t) g
+               step (matched, []) g = (matched, [])
+
 -- counting maximum match with maximum bipartite matching
 -- (we build an auxiliary graph and do a max-flow on this)
 maxMatch :: (a -> b -> Bool) -> [a] -> [b] -> Int
@@ -72,7 +83,6 @@ maxMatch matchFun expected got = mf
    where (b, e, g) = buildGraph matchFun expected got
          mf = maxFlow g (fst b) (fst e)
 
-
 buildGraph :: (a -> b -> Bool) -> [a] -> [b] -> (LNode Int, LNode Int, Gr Int Int)
 buildGraph matchFun expected got = (b, e, g)
    where ((b, e), (_, g)) = buildGraph' matchFun expected got
diff --git a/test/Spec.hs b/test/Spec.hs
index 27b6653..8cd27bf 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -8,6 +8,7 @@ import GEval.BLEU
 import GEval.ClippEU
 import GEval.PrecisionRecall
 import GEval.ClusteringMetrics
+import GEval.BIO
 import Data.Attoparsec.Text
 import Options.Applicative
 import Data.Text
@@ -191,7 +192,63 @@ main = hspec $ do
       gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla")
                                   (LineInFile "stub2" 1 "3.4")
                                   (LineInFile "stub3" 1 "2.6") `shouldReturnAlmost` 0.8
-
+  describe "BIO format" $ do
+    it "just parse" $ do
+      let (Right r) = parseOnly (bioSequenceParser <* endOfInput) "O B-city/NEW_YORK I-city B-city/KALISZ I-city O B-name"
+      r `shouldBe` [Outside,
+                    Beginning "city" (Just "NEW_YORK"),
+                    Inside "city" Nothing,
+                    Beginning "city" (Just "KALISZ"),
+                    Inside "city" Nothing,
+                    Outside,
+                    Beginning "name" Nothing]
+    it "simplest entity" $ do
+      let (Right ents) = parseBioSequenceIntoEntities "B-city"
+      ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "city" Nothing]
+    it "multi-word entity" $ do
+      let (Right ents) = parseBioSequenceIntoEntities "B-date I-date"
+      ents `shouldBe` [TaggedEntity (TaggedSpan 1 2) "date" Nothing]
+    it "multi-word entity with normalized text" $ do
+      let (Right ents) = parseBioSequenceIntoEntities "B-date/FOO I-date/BAR"
+      ents `shouldBe` [TaggedEntity (TaggedSpan 1 2) "date" (Just "FOO_BAR")]
+    it "simplest entity with something outside" $ do
+      let (Right ents) = parseBioSequenceIntoEntities "O B-city"
+      ents `shouldBe` [TaggedEntity (TaggedSpan 2 2) "city" Nothing]
+    it "another simple case" $ do
+      let (Right ents) = parseBioSequenceIntoEntities "B-city B-city"
+      ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "city" Nothing,
+                       TaggedEntity (TaggedSpan 2 2) "city" Nothing]
+    it "just parse into entities" $ do
+      let (Right ents) = parseBioSequenceIntoEntities "O O B-city/LOS_ANGELES I-city B-city/KLUCZBORK O B-name O B-person/JOHN I-person/VON I-person/NEUMANN"
+      ents `shouldBe` [TaggedEntity (TaggedSpan 3 4) "city" (Just "LOS_ANGELES"),
+                       TaggedEntity (TaggedSpan 5 5) "city" (Just "KLUCZBORK"),
+                       TaggedEntity (TaggedSpan 7 7) "name" (Nothing),
+                       TaggedEntity (TaggedSpan 9 11) "person" (Just "JOHN_VON_NEUMANN")]
+    it "another entity parse" $ do
+      let (Right ents) = parseBioSequenceIntoEntities "B-month/JULY B-month/JULY O O B-foo/bar"
+      ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "month" (Just "JULY"),
+                       TaggedEntity (TaggedSpan 2 2) "month" (Just "JULY"),
+                       TaggedEntity (TaggedSpan 5 5) "foo" (Just "bar")]
+    it "another entity parse" $ do
+      let (Right ents) = parseBioSequenceIntoEntities "B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK"
+      ents `shouldBe` [TaggedEntity (TaggedSpan 1 2) "city" (Just "LOS_ANGELES"),
+                       TaggedEntity (TaggedSpan 4 5) "city" (Just "NEW_YORK")]
+    it "parse entity" $ do
+      let (Right ents) = parseBioSequenceIntoEntities "B-surname/BROWN B-surname/SMITH"
+      ents `shouldBe` [TaggedEntity (TaggedSpan 1 1) "surname" (Just "BROWN"),
+                       TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")]
+    it "parse entity" $ do
+      let (Right ents) = parseBioSequenceIntoEntities "O B-surname/SMITH"
+      ents `shouldBe` [TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")]
+    it "check counting" $ do
+      gatherCountsForBIO [TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")] [TaggedEntity (TaggedSpan 1 1) "surname" (Just "BROWN"),
+                                                                                   TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")] `shouldBe` (1, 1, 2)
+    it "check F1 on a more complicated example" $ do
+      runGEvalTest "bio-f1-complex" `shouldReturnAlmost` 0.625
+    it "calculate F1" $ do
+      runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5
+    it "check perfect score" $ do
+      runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0
 
 neverMatch :: Char -> Int -> Bool
 neverMatch _ _ = False
diff --git a/test/bio-f1-complex/bio-f1-complex-solution/test-A/out.tsv b/test/bio-f1-complex/bio-f1-complex-solution/test-A/out.tsv
new file mode 100644
index 0000000..f84fa1d
--- /dev/null
+++ b/test/bio-f1-complex/bio-f1-complex-solution/test-A/out.tsv
@@ -0,0 +1,6 @@
+B-wrong
+B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK
+B-surname/BROWN B-surname/SMITH
+B-month/JULY B-month/JULY O O B-foo/bar
+O B-class I-class I-class
+O B-wrong
diff --git a/test/bio-f1-complex/bio-f1-complex/config.txt b/test/bio-f1-complex/bio-f1-complex/config.txt
new file mode 100644
index 0000000..70977e1
--- /dev/null
+++ b/test/bio-f1-complex/bio-f1-complex/config.txt
@@ -0,0 +1 @@
+--metric BIO-F1
diff --git a/test/bio-f1-complex/bio-f1-complex/test-A/expected.tsv b/test/bio-f1-complex/bio-f1-complex/test-A/expected.tsv
new file mode 100644
index 0000000..c6e719d
--- /dev/null
+++ b/test/bio-f1-complex/bio-f1-complex/test-A/expected.tsv
@@ -0,0 +1,6 @@
+O
+B-city/LOS I-city/ANGELES O B-city/NEW_YORK O
+O B-surname/SMITH
+B-month/JULY O O O B-foo/bar
+O B-class I-class I-class
+O O
diff --git a/test/bio-f1-perfect/bio-f1-perfect-solution/test-A/out.tsv b/test/bio-f1-perfect/bio-f1-perfect-solution/test-A/out.tsv
new file mode 100644
index 0000000..e0fa64b
--- /dev/null
+++ b/test/bio-f1-perfect/bio-f1-perfect-solution/test-A/out.tsv
@@ -0,0 +1,4 @@
+O O O
+O B-city/NEW I-city/YORK I-city/CITY O B-month/July
+B-surname/SMITH
+B-city/LONDON B-city/PARIS
diff --git a/test/bio-f1-perfect/bio-f1-perfect/config.txt b/test/bio-f1-perfect/bio-f1-perfect/config.txt
new file mode 100644
index 0000000..70977e1
--- /dev/null
+++ b/test/bio-f1-perfect/bio-f1-perfect/config.txt
@@ -0,0 +1 @@
+--metric BIO-F1
diff --git a/test/bio-f1-perfect/bio-f1-perfect/test-A/expected.tsv b/test/bio-f1-perfect/bio-f1-perfect/test-A/expected.tsv
new file mode 100644
index 0000000..e0fa64b
--- /dev/null
+++ b/test/bio-f1-perfect/bio-f1-perfect/test-A/expected.tsv
@@ -0,0 +1,4 @@
+O O O
+O B-city/NEW I-city/YORK I-city/CITY O B-month/July
+B-surname/SMITH
+B-city/LONDON B-city/PARIS
diff --git a/test/bio-f1-simple/bio-f1-simple-solution/test-A/out.tsv b/test/bio-f1-simple/bio-f1-simple-solution/test-A/out.tsv
new file mode 100644
index 0000000..0666d27
--- /dev/null
+++ b/test/bio-f1-simple/bio-f1-simple-solution/test-A/out.tsv
@@ -0,0 +1,3 @@
+O O B-city/POZNAŃ O O B-date/MARCH I-date/12
+B-city/BUK O O O
+B-name/FOO O B-surname/KOWALSKI
diff --git a/test/bio-f1-simple/bio-f1-simple/config.txt b/test/bio-f1-simple/bio-f1-simple/config.txt
new file mode 100644
index 0000000..70977e1
--- /dev/null
+++ b/test/bio-f1-simple/bio-f1-simple/config.txt
@@ -0,0 +1 @@
+--metric BIO-F1
diff --git a/test/bio-f1-simple/bio-f1-simple/test-A/expected.tsv b/test/bio-f1-simple/bio-f1-simple/test-A/expected.tsv
new file mode 100644
index 0000000..659986b
--- /dev/null
+++ b/test/bio-f1-simple/bio-f1-simple/test-A/expected.tsv
@@ -0,0 +1,3 @@
+O O B-city/POZNAŃ O O B-date/MARCH I-date/12
+O O O O
+O B-city/KONIN O

From b01f9439b73ba772c9b81ea8ff1f297857ad9c3e Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Wed, 16 May 2018 20:59:40 +0200
Subject: [PATCH 05/24] log probs

---
 src/GEval/CreateChallenge.hs                         |  8 ++++++--
 src/GEval/LogLossHashed.hs                           | 12 ++++++++++++
 test/Spec.hs                                         |  3 +++
 .../test-A/out.tsv                                   |  3 +++
 .../log-loss-hashed-normalization/config.txt         |  1 +
 .../test-A/expected.tsv                              |  3 +++
 6 files changed, 28 insertions(+), 2 deletions(-)
 create mode 100644 test/log-loss-hashed-normalization/log-loss-hashed-normalization-solution/test-A/out.tsv
 create mode 100644 test/log-loss-hashed-normalization/log-loss-hashed-normalization/config.txt
 create mode 100644 test/log-loss-hashed-normalization/log-loss-hashed-normalization/test-A/expected.tsv

diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs
index 09a85e4..3e00993 100644
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@@ -125,8 +125,12 @@ must be given:
 where *logprobi* is the logarithm of the probability for *wordi* and
 *logprob0* is the logarithm of the probability mass for all the other
 words (it will be spread between all 1024 fingerprint values). If the
-respective probabilities do not sum up to 1, they will be normalised with
-softmax.
+respective probabilities do not sum up to 1:
+
+  * if the sum is larger than 0.0 and smaller than 1.0, and no logprob0
+    is given, log of the remaining probablity mass will be assigned to logprob0,
+  * otherwise they will be normalised with.
+softmax
 
 Note: the separator here is space, not TAB!
 
diff --git a/src/GEval/LogLossHashed.hs b/src/GEval/LogLossHashed.hs
index bed3c7b..a4388ee 100644
--- a/src/GEval/LogLossHashed.hs
+++ b/src/GEval/LogLossHashed.hs
@@ -61,6 +61,7 @@ data WordSpecWithLogProb = WordSpecWithLogProb WordSpec Double
 
 parseDistributionFromWordList :: Word32 -> Word32 -> T.Text -> Either String HashedDistribution
 parseDistributionFromWordList nbOfBits seed distroSpec = (parseDistributionFromWordList' nbOfBits seed) =<<
+  normalizeLogProbs =<<
   lookForProbs =<<
   (processEithers $ map getWordSpecWithLogProb $ T.splitOn " " distroSpec)
 
@@ -99,6 +100,17 @@ areProbs specs = all isProb specs && any isPositiveProb specs
 toLogProbs :: [WordSpecWithLogProb] -> [WordSpecWithLogProb]
 toLogProbs = map (\(WordSpecWithLogProb w p) -> (WordSpecWithLogProb w (log p)))
 
+normalizeLogProbs :: [WordSpecWithLogProb] -> Either String [WordSpecWithLogProb]
+normalizeLogProbs specs = if isProbTotalIncorrect probTotal
+                             && probTotal < 1.0 && probTotal > 0.0
+                             && not (any (\(WordSpecWithLogProb w _) -> isAnyWord w) specs)
+                             && all (\(WordSpecWithLogProb _ lp) -> lp <= 0) specs
+                          then
+                            Right ((WordSpecWithLogProb AnyWord (log (1-probTotal))):specs)
+                          else
+                            Right specs
+  where probTotal = sum $ map (\(WordSpecWithLogProb _ logp) -> exp logp) specs
+
 normalizeProbs :: [WordSpecWithLogProb] -> [WordSpecWithLogProb]
 normalizeProbs specs = if isProbTotalIncorrect probTotal
                          then
diff --git a/test/Spec.hs b/test/Spec.hs
index 8cd27bf..831d6b5 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -100,6 +100,9 @@ main = hspec $ do
       runGEvalTest "log-loss-hashed-probs" `shouldReturnAlmost` 4.11631293099392
     it "with probs instead of log probs (with normalization)" $ do
       runGEvalTest "log-loss-hashed-probs-normalized" `shouldReturnAlmost` 1.55537749098853
+    it "with log probs whose probs are summing up to less than 1.0" $ do
+      runGEvalTest "log-loss-hashed-normalization" `shouldReturnAlmost` 5.16395069238851
+
   describe "reading options" $ do
     it "can get the metric" $ do
       extractMetric "bleu-complex" `shouldReturn` (Just BLEU)
diff --git a/test/log-loss-hashed-normalization/log-loss-hashed-normalization-solution/test-A/out.tsv b/test/log-loss-hashed-normalization/log-loss-hashed-normalization-solution/test-A/out.tsv
new file mode 100644
index 0000000..22e39a7
--- /dev/null
+++ b/test/log-loss-hashed-normalization/log-loss-hashed-normalization-solution/test-A/out.tsv
@@ -0,0 +1,3 @@
+B:-1.20397280432594 A:-0.916290731874155
+A:-2.3025850929940 C:-1.6094379124341
+A:-2.3025850929940 C:-1.6094379124341 :-0.356674943938732
diff --git a/test/log-loss-hashed-normalization/log-loss-hashed-normalization/config.txt b/test/log-loss-hashed-normalization/log-loss-hashed-normalization/config.txt
new file mode 100644
index 0000000..8121298
--- /dev/null
+++ b/test/log-loss-hashed-normalization/log-loss-hashed-normalization/config.txt
@@ -0,0 +1 @@
+--metric LogLossHashed10
diff --git a/test/log-loss-hashed-normalization/log-loss-hashed-normalization/test-A/expected.tsv b/test/log-loss-hashed-normalization/log-loss-hashed-normalization/test-A/expected.tsv
new file mode 100644
index 0000000..3042490
--- /dev/null
+++ b/test/log-loss-hashed-normalization/log-loss-hashed-normalization/test-A/expected.tsv
@@ -0,0 +1,3 @@
+A
+B
+B

From 01b93dd2437ee3fa76395e0df94045359ebe3ebd Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Wed, 16 May 2018 21:00:45 +0200
Subject: [PATCH 06/24] improve help for geval --init

---
 src/GEval/OptionsParser.hs | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs
index bad86fe..eaabd31 100644
--- a/src/GEval/OptionsParser.hs
+++ b/src/GEval/OptionsParser.hs
@@ -169,11 +169,9 @@ initChallenge spec = case gesExpectedDirectory spec of
 showInitInstructions = do
   putStrLn [here|
 Run:
-    geval --init --expected-directory CHALLENGE
+    geval --init --expected-directory CHALLENGE --metric METRIC-NAME --precision NUMBER-OF-DIGITS
 to create a directory CHALLENGE representing a Gonito challenge.
 
-You can specify a metric with `--metric METRIC-NAME` option.
-
-Note that `--out-directory` option is not taken into account with `--init` option.
+(Note that `--out-directory` option is not taken into account with `--init` option.)
 |]
   exitFailure

From bab4f7d94c0ec078395dd42ed4583c1337cd3921 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Wed, 16 May 2018 21:01:16 +0200
Subject: [PATCH 07/24] bump up version number

---
 geval.cabal | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/geval.cabal b/geval.cabal
index 23e3672..a5611cb 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -1,5 +1,5 @@
 name:                geval
-version:             0.5.5.0
+version:             0.5.6.0
 synopsis:            Machine learning evaluation tools
 description:         Please see README.md
 homepage:            http://github.com/name/project

From 438f0139141674e5ec05c1b39b014f3c6937d5fa Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Thu, 17 May 2018 08:26:57 +0200
Subject: [PATCH 08/24] automatic decompression

---
 geval.cabal                                   |   7 +++
 src/Data/Conduit/AutoDecompress.hs            |  44 ++++++++++++++++++
 src/GEval/Core.hs                             |  32 +++++++++++--
 test/Spec.hs                                  |   4 ++
 .../test-A/out.tsv.bz2                        | Bin 0 -> 60 bytes
 .../charmatch-complex-compressed/config.txt   |   1 +
 .../test-A/expected.tsv.gz                    | Bin 0 -> 59 bytes
 .../test-A/in.tsv.xz                          | Bin 0 -> 84 bytes
 8 files changed, 83 insertions(+), 5 deletions(-)
 create mode 100644 src/Data/Conduit/AutoDecompress.hs
 create mode 100644 test/charmatch-complex-compressed/charmatch-complex-compressed-solution/test-A/out.tsv.bz2
 create mode 100644 test/charmatch-complex-compressed/charmatch-complex-compressed/config.txt
 create mode 100644 test/charmatch-complex-compressed/charmatch-complex-compressed/test-A/expected.tsv.gz
 create mode 100644 test/charmatch-complex-compressed/charmatch-complex-compressed/test-A/in.tsv.xz

diff --git a/geval.cabal b/geval.cabal
index a5611cb..4a2046d 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -27,6 +27,7 @@ library
                      , GEval.CharMatch
                      , GEval.LineByLine
                      , GEval.BIO
+                     , Data.Conduit.AutoDecompress
   build-depends:       base >= 4.7 && < 5
                      , cond
                      , conduit
@@ -49,6 +50,12 @@ library
                      , vector
                      , mtl
                      , edit-distance
+                     , bytestring
+                     , word8
+                     , primitive
+                     , transformers-base
+                     , bzlib-conduit
+                     , lzma-conduit
   default-language:    Haskell2010
 
 executable geval
diff --git a/src/Data/Conduit/AutoDecompress.hs b/src/Data/Conduit/AutoDecompress.hs
new file mode 100644
index 0000000..eb5a51f
--- /dev/null
+++ b/src/Data/Conduit/AutoDecompress.hs
@@ -0,0 +1,44 @@
+{-# LANGUAGE AllowAmbiguousTypes #-}
+
+module Data.Conduit.AutoDecompress
+       (autoDecompress)
+       where
+
+import Data.Conduit
+import Data.Conduit.Combinators
+import Data.ByteString
+import Data.Conduit.Zlib
+import Data.Word8
+import Control.Monad.Trans.Resource (MonadThrow, MonadResource)
+import Control.Monad.Primitive (PrimMonad)
+import Control.Monad.Base (MonadBase)
+import qualified Data.Conduit.Lzma as XZ
+import qualified Data.Conduit.BZlib as BZ
+
+autoDecompress :: (MonadResource m, MonadBase base m, MonadThrow m, PrimMonad base) => ConduitM ByteString ByteString m ()
+autoDecompress = do
+  f <- await
+  case f of
+    Just chunk -> if Data.ByteString.length chunk > 1
+                   then
+                     do
+                       let firstByte = Data.ByteString.head chunk
+                       let secondByte = Data.ByteString.index chunk 1
+                       leftover chunk
+                       lookAtMagicNumbers (firstByte, secondByte)
+                   else
+                     do
+                      leftover chunk
+                      doNothing
+
+    Nothing -> return ()
+
+
+lookAtMagicNumbers :: (MonadResource m, MonadBase base m, MonadThrow m, PrimMonad base) => (Word8, Word8) -> Conduit ByteString m ByteString
+lookAtMagicNumbers (31, 139) = ungzip
+lookAtMagicNumbers (66, 90) = BZ.bunzip2
+lookAtMagicNumbers (253, 55) = XZ.decompress Nothing
+lookAtMagicNumbers _ = doNothing
+
+doNothing :: Monad m => Conduit ByteString m ByteString
+doNothing = Data.Conduit.Combinators.filter (const True)
diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs
index 2ba9b27..c5a7194 100644
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@@ -57,7 +57,7 @@ import Data.Tuple
 import qualified Data.List.Split as DLS
 
 import Control.Monad.IO.Class
-import Control.Monad ((<=<))
+import Control.Monad ((<=<), filterM)
 
 import Data.Attoparsec.Text (parseOnly)
 
@@ -69,6 +69,7 @@ import GEval.ClusteringMetrics
 import GEval.LogLossHashed
 import GEval.CharMatch
 import GEval.BIO
+import Data.Conduit.AutoDecompress
 
 import qualified Data.HashMap.Strict as M
 
@@ -239,11 +240,14 @@ checkAndGetFiles gevalSpec = do
   unlessM (D.doesDirectoryExist expectedDirectory) $ throwM $ NoExpectedDirectory expectedDirectory
   unlessM (D.doesDirectoryExist outTestDirectory) $ throwM $ NoOutTestDirectory outTestDirectory
   unlessM (D.doesDirectoryExist expectedTestDirectory) $ throwM $ NoExpectedTestDirectory expectedTestDirectory
+  inputFilePath <- lookForCompressedFiles inputFilePath'
+  expectedFilePath <- lookForCompressedFiles expectedFilePath'
+  outFilePath <- lookForCompressedFiles outFilePath'
   checkInputFileIfNeeded metric inputFilePath
   return (inputFilePath, expectedFilePath, outFilePath)
-   where expectedFilePath = expectedTestDirectory </> (gesExpectedFile gevalSpec)
-         outFilePath = getOutFile gevalSpec (gesOutFile gevalSpec)
-         inputFilePath = expectedTestDirectory </> (gesInputFile gevalSpec)
+   where expectedFilePath' = expectedTestDirectory </> (gesExpectedFile gevalSpec)
+         outFilePath' = getOutFile gevalSpec (gesOutFile gevalSpec)
+         inputFilePath' = expectedTestDirectory </> (gesInputFile gevalSpec)
          expectedTestDirectory = expectedDirectory </> testName
          outTestDirectory = outDirectory </> testName
          expectedDirectory = getExpectedDirectory gevalSpec
@@ -251,6 +255,24 @@ checkAndGetFiles gevalSpec = do
          testName = gesTestName gevalSpec
          metric = gesMetric gevalSpec
 
+lookForCompressedFiles :: FilePath -> IO FilePath
+lookForCompressedFiles = lookForAlternativeFiles [".gz", ".xz", ".bz2"]
+
+lookForAlternativeFiles :: [String] -> FilePath -> IO FilePath
+lookForAlternativeFiles suffixes filePath
+   | takeExtension filePath `Prelude.elem` suffixes = return filePath
+   | otherwise = do
+       fileIsThere <- D.doesFileExist filePath
+       if fileIsThere
+         then
+           return filePath
+         else
+           do
+             found <- Control.Monad.filterM D.doesFileExist $ Prelude.map (filePath <.>) suffixes
+             return $ case found of
+                        [fp] -> fp
+                        _ -> filePath
+
 getOutFile :: GEvalSpecification -> FilePath -> FilePath
 getOutFile gevalSpec out = outDirectory </> testName </> out
   where outDirectory = gesOutDirectory gevalSpec
@@ -264,7 +286,7 @@ checkInputFileIfNeeded _ _ = return ()
 
 fileAsLineSource :: FilePath -> LineSource (ResourceT IO)
 fileAsLineSource filePath =
-  LineSource (CB.sourceFile filePath $= CT.decodeUtf8Lenient =$= CT.lines) filePath 1
+  LineSource (CB.sourceFile filePath $= autoDecompress =$= CT.decodeUtf8Lenient =$= CT.lines) filePath 1
 
 gevalCoreOnSingleLines :: Metric -> LineInFile -> LineInFile -> LineInFile -> IO (MetricValue)
 gevalCoreOnSingleLines metric inpLine expLine outLine =
diff --git a/test/Spec.hs b/test/Spec.hs
index 831d6b5..5784042 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -252,6 +252,10 @@ main = hspec $ do
       runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5
     it "check perfect score" $ do
       runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0
+  describe "automatic decompression" $ do
+    it "more complex test" $ do
+      runGEvalTest "charmatch-complex-compressed" `shouldReturnAlmost` 0.1923076923076923
+
 
 neverMatch :: Char -> Int -> Bool
 neverMatch _ _ = False
diff --git a/test/charmatch-complex-compressed/charmatch-complex-compressed-solution/test-A/out.tsv.bz2 b/test/charmatch-complex-compressed/charmatch-complex-compressed-solution/test-A/out.tsv.bz2
new file mode 100644
index 0000000000000000000000000000000000000000..f74ec2e50d093e4d690edbcb2850a1a60a5a61c7
GIT binary patch
literal 60
zcmV-C0K@-6T4*^jL0KkKSp`@L_5c6|#ee`100J%=&>$cHF#ym3Pf4TEsp$wo*ZVR{
S<3X?~V7rnj!i0b-z)!F!h83m&

literal 0
HcmV?d00001

diff --git a/test/charmatch-complex-compressed/charmatch-complex-compressed/config.txt b/test/charmatch-complex-compressed/charmatch-complex-compressed/config.txt
new file mode 100644
index 0000000..ceb23f3
--- /dev/null
+++ b/test/charmatch-complex-compressed/charmatch-complex-compressed/config.txt
@@ -0,0 +1 @@
+--metric CharMatch
diff --git a/test/charmatch-complex-compressed/charmatch-complex-compressed/test-A/expected.tsv.gz b/test/charmatch-complex-compressed/charmatch-complex-compressed/test-A/expected.tsv.gz
new file mode 100644
index 0000000000000000000000000000000000000000..6457aec741699222c58087eafebbd5a161ded1eb
GIT binary patch
literal 59
zcmb2|=HSrt<d0%tPOT_NO)g1I(JLt~V>o^8ysn0>r|u&!&$DOG>v{Q{^Lp~hSLc+^
P6DEd*-XG+p7#J7;aC;T?

literal 0
HcmV?d00001

diff --git a/test/charmatch-complex-compressed/charmatch-complex-compressed/test-A/in.tsv.xz b/test/charmatch-complex-compressed/charmatch-complex-compressed/test-A/in.tsv.xz
new file mode 100644
index 0000000000000000000000000000000000000000..34c5c1eb48ee631df5e893b91e0e66e1902cbea2
GIT binary patch
literal 84
zcmexsUKJ6=z`*kC+7>q^21Q0O1_p)_{ill=86>mwa|=pKQj54!QgidOixQJ76S;g7
mi?R~AfNEG&`0S#d9lFZEXe6Z|F~vZB+vi$Fpg03dWE22b;TX#R

literal 0
HcmV?d00001


From 1caec3de357be94e4e88100254b5beac8e1c330b Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Thu, 17 May 2018 08:27:15 +0200
Subject: [PATCH 09/24] bump up version

---
 geval.cabal | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/geval.cabal b/geval.cabal
index 4a2046d..0533593 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -1,5 +1,5 @@
 name:                geval
-version:             0.5.6.0
+version:             0.5.7.0
 synopsis:            Machine learning evaluation tools
 description:         Please see README.md
 homepage:            http://github.com/name/project

From 192d531969bc886ae2c636a82d45695aba3908eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Filip=20Grali=C5=84ski?= <filipg@amu.edu.pl>
Date: Thu, 17 May 2018 15:21:03 +0200
Subject: [PATCH 10/24] add likelihood as evaluation metrics

---
 src/GEval/Core.hs                             | 25 ++++++++++++++++++-
 src/GEval/CreateChallenge.hs                  | 22 ++++++++++++++++
 src/GEval/OptionsParser.hs                    |  2 +-
 test/Spec.hs                                  |  6 +++++
 .../test-A/out.tsv                            |  2 ++
 .../config.txt                                |  1 +
 .../test-A/expected.tsv                       |  2 ++
 .../likelihood-simple-solution/test-A/out.tsv |  4 +++
 .../likelihood-simple/config.txt              |  1 +
 .../likelihood-simple/test-A/expected.tsv     |  4 +++
 10 files changed, 67 insertions(+), 2 deletions(-)
 create mode 100644 test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized-solution/test-A/out.tsv
 create mode 100644 test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/config.txt
 create mode 100644 test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/test-A/expected.tsv
 create mode 100644 test/likelihood-simple/likelihood-simple-solution/test-A/out.tsv
 create mode 100644 test/likelihood-simple/likelihood-simple/config.txt
 create mode 100644 test/likelihood-simple/likelihood-simple/test-A/expected.tsv

diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs
index c5a7194..bed64b7 100644
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@@ -83,7 +83,7 @@ defaultLogLossHashedSize :: Word32
 defaultLogLossHashedSize = 10
 
 data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch
-              | MAP | LogLoss | BIOF1
+              | MAP | LogLoss | Likelihood | BIOF1 | LikelihoodHashed Word32
               deriving (Eq)
 
 instance Show Metric where
@@ -100,9 +100,16 @@ instance Show Metric where
                                                        ""
                                                       else
                                                        (show nbOfBits))
+  show (LikelihoodHashed nbOfBits) = "LikelihoodHashed" ++ (if
+                                                               nbOfBits == defaultLogLossHashedSize
+                                                            then
+                                                              ""
+                                                            else
+                                                              (show nbOfBits))
   show CharMatch = "CharMatch"
   show MAP = "MAP"
   show LogLoss = "LogLoss"
+  show Likelihood = "Likelihood"
   show BIOF1 = "BIO-F1"
 
 instance Read Metric where
@@ -118,7 +125,11 @@ instance Read Metric where
   readsPrec p ('L':'o':'g':'L':'o':'s':'s':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
     [(nbOfBits, theRest)] -> [(LogLossHashed nbOfBits, theRest)]
     _ -> [(LogLossHashed defaultLogLossHashedSize, theRest)]
+  readsPrec p ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':'H':'a':'s':'h':'e':'d':theRest) = case readsPrec p theRest of
+    [(nbOfBits, theRest)] -> [(LikelihoodHashed nbOfBits, theRest)]
+    _ -> [(LikelihoodHashed defaultLogLossHashedSize, theRest)]
   readsPrec _ ('L':'o':'g':'L':'o':'s':'s':theRest) = [(LogLoss, theRest)]
+  readsPrec _ ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(Likelihood, theRest)]
   readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
   readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
   readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
@@ -134,9 +145,11 @@ getMetricOrdering ClippEU  = TheHigherTheBetter
 getMetricOrdering (FMeasure _) = TheHigherTheBetter
 getMetricOrdering NMI = TheHigherTheBetter
 getMetricOrdering (LogLossHashed _) = TheLowerTheBetter
+getMetricOrdering (LikelihoodHashed _) = TheHigherTheBetter
 getMetricOrdering CharMatch = TheHigherTheBetter
 getMetricOrdering MAP = TheHigherTheBetter
 getMetricOrdering LogLoss = TheLowerTheBetter
+getMetricOrdering Likelihood = TheHigherTheBetter
 getMetricOrdering BIOF1 = TheHigherTheBetter
 
 defaultOutDirectory = "."
@@ -308,6 +321,8 @@ gevalCore metric inputFilePath expectedFilePath outFilePath = do
                      (fileAsLineSource expectedFilePath)
                      (fileAsLineSource outFilePath)
 
+logLossToLikehood logLoss = exp (-logLoss)
+
 gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric
                      -> LineSource (ResourceT m)
                      -> LineSource (ResourceT m)
@@ -317,6 +332,14 @@ gevalCoreOnSources RMSE inputLineSource expectedLineSource outLineSource = do
   mse <- gevalCoreOnSources MSE inputLineSource expectedLineSource outLineSource
   return $ mse ** 0.5
 
+gevalCoreOnSources Likelihood inputLineSource expectedLineSource outLineSource = do
+  logLoss <- gevalCoreOnSources LogLoss inputLineSource expectedLineSource outLineSource
+  return $ logLossToLikehood logLoss
+
+gevalCoreOnSources (LikelihoodHashed b) inputLineSource expectedLineSource outLineSource = do
+  logLoss <- gevalCoreOnSources (LogLossHashed b) inputLineSource expectedLineSource outLineSource
+  return $ logLossToLikehood logLoss
+
 gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
   gevalCore' metric inputLineSource expectedLineSource outLineSource
 
diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs
index 3e00993..0d141d5 100644
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@@ -101,6 +101,8 @@ Cluster proverbs for languages.
 This is a sample challenge for flat clustering (unsupervised learning challenge).
 |] ++ (commonReadmeMDContents testName)
 
+readmeMDContents (LikelihoodHashed b) testname = readmeMDContents (LogLossHashed b) testname
+
 readmeMDContents (LogLossHashed _) testName = [i|
 GEval sample challenge — language model evaluation
 ==================================================
@@ -203,6 +205,16 @@ This a sample challenge for the log-loss metric.
 
 |] ++ (commonReadmeMDContents testName)
 
+readmeMDContents Likelihood testName = [i|
+Give the probability of a positive sentiment
+============================================
+
+Give the probability that a sentence expresses a positive sentiment.
+
+This a sample challenge for the likelihood metric.
+
+|] ++ (commonReadmeMDContents testName)
+
 readmeMDContents BIOF1 testName = [i|
 Tag and normalize names
 =======================
@@ -284,6 +296,7 @@ trainContents NMI = [hereLit|pl	Kto pod kim dołki kopie, ten sam w nie wpada.
 en	The pen is mightier than the sword.
 pl	Baba z wozu, koniom lżej.
 |]
+trainContents (LikelihoodHashed b) = trainContents (LogLossHashed b)
 trainContents (LogLossHashed _) = [hereLit|Ala ma psa i kota
 Basia ma psa
 Nie kupujemy kota w worku
@@ -299,6 +312,7 @@ honour	GB	honor
 titbit	GB	smakołyk
 tidbit	US	smakołyk
 |]
+trainContents Likelihood = trainContents LogLoss
 trainContents LogLoss = [hereLit|0.0	Hell, no!!!
 0.0	I hate this stuff
 1.0	Lekker!!!
@@ -328,6 +342,7 @@ When the going gets tough, the tough get going.
 devInContents (FMeasure _) = [hereLit|b	b	W	29520	779	-28	-32	a	0	0	0	0	0	0	0	0	0	0
 b	b	W	55200	1259	35	9	a	1	0	1	0	0	0	0	0	4000	4000
 |]
+devInContents (LikelihoodHashed b) = devInContents (LogLossHashed b)
 devInContents (LogLossHashed _) = [hereLit|Nie kupuj	w worku
 Ona	psa
 |]
@@ -339,6 +354,7 @@ devInContents MAP = [hereLit|US	noc
 GB	wózek dziecięcy
 GB	wizualizować
 |]
+devInContents Likelihood = devInContents LogLoss
 devInContents LogLoss = [hereLit|Great stuff!
 Boring stuff
 That's good
@@ -364,6 +380,7 @@ devExpectedContents NMI = [hereLit|en
 pl
 en
 |]
+devExpectedContents (LikelihoodHashed b) = devExpectedContents (LogLossHashed b)
 devExpectedContents (LogLossHashed _) = [hereLit|kota
 ma
 |]
@@ -375,6 +392,7 @@ devExpectedContents MAP = [hereLit|night	nite
 pram
 visualise
 |]
+devExpectedContents Likelihood = devExpectedContents LogLoss
 devExpectedContents LogLoss = [hereLit|1.0
 0.0
 1.0
@@ -402,6 +420,7 @@ W marcu, jak w garncu.
 A cada necio agrada su porrada.
 Kwiecień plecień, bo przeplata trochę zimy, trochę lata.
 |]
+testInContents (LikelihoodHashed b) = testInContents (LogLossHashed b)
 testInContents (LogLossHashed _) = [hereLit|Ala	ma
 Ona ma kota	worku
 |]
@@ -413,6 +432,7 @@ testInContents MAP = [hereLit|US	wózek dziecięcy
 GB	słoń
 US	słoń
 |]
+testInContents Likelihood = testInContents LogLoss
 testInContents LogLoss = [hereLit|That's great, ha, ha, I love it!
 Super-duper!!
 That is incredibly boring.
@@ -440,6 +460,7 @@ pl
 es
 pl
 |]
+testExpectedContents (LikelihoodHashed b) = testExpectedContents (LogLossHashed b)
 testExpectedContents (LogLossHashed _) = [hereLit|ma
 w
 |]
@@ -451,6 +472,7 @@ testExpectedContents MAP = [hereLit|trolley
 elephant
 elephant
 |]
+testExpectedContents Likelihood = testExpectedContents LogLoss
 testExpectedContents LogLoss = [hereLit|1.0
 1.0
 0.0
diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs
index eaabd31..d8ce4e7 100644
--- a/src/GEval/OptionsParser.hs
+++ b/src/GEval/OptionsParser.hs
@@ -100,7 +100,7 @@ metricReader = option auto
                  <> value defaultMetric
                  <> showDefault
                  <> metavar "METRIC"
-                 <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, BIO-F1 or CharMatch" )
+                 <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1 or CharMatch" )
 
 runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
 runGEval args = do
diff --git a/test/Spec.hs b/test/Spec.hs
index 5784042..68f595c 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -102,6 +102,9 @@ main = hspec $ do
       runGEvalTest "log-loss-hashed-probs-normalized" `shouldReturnAlmost` 1.55537749098853
     it "with log probs whose probs are summing up to less than 1.0" $ do
       runGEvalTest "log-loss-hashed-normalization" `shouldReturnAlmost` 5.16395069238851
+  describe "LikelihoodHashed challenge" $ do
+    it "example with unnormalized values" $ do
+      runGEvalTest "likelihood-hashed-not-normalized" `shouldReturnAlmost` 0.351043364110715
 
   describe "reading options" $ do
     it "can get the metric" $ do
@@ -190,6 +193,9 @@ main = hspec $ do
       runGEvalTest "logloss-simple" `shouldReturnAlmost` 0.31824
     it "perfect" $ do
       runGEvalTest "logloss-perfect" `shouldReturnAlmost` 0.0
+  describe "Likelihood" $ do
+    it "simple" $ do
+      runGEvalTest "likelihood-simple" `shouldReturnAlmost` 0.72742818469866
   describe "evaluating single lines" $ do
     it "RMSE" $ do
       gevalCoreOnSingleLines RMSE (LineInFile "stub1" 1 "blabla")
diff --git a/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized-solution/test-A/out.tsv b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized-solution/test-A/out.tsv
new file mode 100644
index 0000000..02c35e7
--- /dev/null
+++ b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized-solution/test-A/out.tsv
@@ -0,0 +1,2 @@
+tak:10 nie:8.9
+niebieski:0 żółty:1.5 czerwony:-0.5
diff --git a/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/config.txt b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/config.txt
new file mode 100644
index 0000000..00b7f18
--- /dev/null
+++ b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/config.txt
@@ -0,0 +1 @@
+--metric LikelihoodHashed8
diff --git a/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/test-A/expected.tsv b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/test-A/expected.tsv
new file mode 100644
index 0000000..70d9d14
--- /dev/null
+++ b/test/likelihood-hashed-not-normalized/likelihood-hashed-not-normalized/test-A/expected.tsv
@@ -0,0 +1,2 @@
+tak
+niebieski
diff --git a/test/likelihood-simple/likelihood-simple-solution/test-A/out.tsv b/test/likelihood-simple/likelihood-simple-solution/test-A/out.tsv
new file mode 100644
index 0000000..6978be2
--- /dev/null
+++ b/test/likelihood-simple/likelihood-simple-solution/test-A/out.tsv
@@ -0,0 +1,4 @@
+0.7
+0
+0.0
+0.6
diff --git a/test/likelihood-simple/likelihood-simple/config.txt b/test/likelihood-simple/likelihood-simple/config.txt
new file mode 100644
index 0000000..34e7898
--- /dev/null
+++ b/test/likelihood-simple/likelihood-simple/config.txt
@@ -0,0 +1 @@
+--metric Likelihood
diff --git a/test/likelihood-simple/likelihood-simple/test-A/expected.tsv b/test/likelihood-simple/likelihood-simple/test-A/expected.tsv
new file mode 100644
index 0000000..968ac3e
--- /dev/null
+++ b/test/likelihood-simple/likelihood-simple/test-A/expected.tsv
@@ -0,0 +1,4 @@
+1
+0
+0
+0

From 862ca9fd3ba9a9008f1c3ab9ad86a39bc6339417 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Thu, 17 May 2018 21:17:57 +0200
Subject: [PATCH 11/24] use newer stack

---
 stack.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stack.yaml b/stack.yaml
index 1d9e84d..042d516 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -2,4 +2,4 @@ flags: {}
 packages:
 - '.'
 extra-deps: [cond-0.4.1.1,murmur3-1.0.3]
-resolver: lts-9.14
+resolver: lts-9.21

From 3e201d11ef9b339797ba5f1a92ded05ef5f31a28 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 19 May 2018 13:49:53 +0200
Subject: [PATCH 12/24] update for Stack LTS 11.9

---
 geval.cabal                        |  2 +-
 src/Data/Conduit/AutoDecompress.hs |  4 ++--
 src/GEval/Core.hs                  | 16 ++++++++--------
 stack.yaml                         |  4 ++--
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/geval.cabal b/geval.cabal
index 0533593..97c5847 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -30,7 +30,7 @@ library
                      , Data.Conduit.AutoDecompress
   build-depends:       base >= 4.7 && < 5
                      , cond
-                     , conduit
+                     , conduit >= 1.3.0
                      , conduit-combinators
                      , conduit-extra
                      , directory
diff --git a/src/Data/Conduit/AutoDecompress.hs b/src/Data/Conduit/AutoDecompress.hs
index eb5a51f..2e92402 100644
--- a/src/Data/Conduit/AutoDecompress.hs
+++ b/src/Data/Conduit/AutoDecompress.hs
@@ -15,7 +15,7 @@ import Control.Monad.Base (MonadBase)
 import qualified Data.Conduit.Lzma as XZ
 import qualified Data.Conduit.BZlib as BZ
 
-autoDecompress :: (MonadResource m, MonadBase base m, MonadThrow m, PrimMonad base) => ConduitM ByteString ByteString m ()
+autoDecompress :: (MonadResource m, MonadThrow m, PrimMonad m) => ConduitM ByteString ByteString m ()
 autoDecompress = do
   f <- await
   case f of
@@ -34,7 +34,7 @@ autoDecompress = do
     Nothing -> return ()
 
 
-lookAtMagicNumbers :: (MonadResource m, MonadBase base m, MonadThrow m, PrimMonad base) => (Word8, Word8) -> Conduit ByteString m ByteString
+lookAtMagicNumbers :: (MonadResource m, MonadThrow m, PrimMonad m) => (Word8, Word8) -> Conduit ByteString m ByteString
 lookAtMagicNumbers (31, 139) = ungzip
 lookAtMagicNumbers (66, 90) = BZ.bunzip2
 lookAtMagicNumbers (253, 55) = XZ.decompress Nothing
diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs
index bed64b7..e0a110c 100644
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@@ -323,7 +323,7 @@ gevalCore metric inputFilePath expectedFilePath outFilePath = do
 
 logLossToLikehood logLoss = exp (-logLoss)
 
-gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric
+gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Metric
                      -> LineSource (ResourceT m)
                      -> LineSource (ResourceT m)
                      -> LineSource (ResourceT m)
@@ -345,7 +345,7 @@ gevalCoreOnSources metric inputLineSource expectedLineSource outLineSource = do
 
 data LineInFile = LineInFile FilePath Word32 Text
 
-gevalCore' :: (MonadIO m, MonadThrow m, MonadBaseControl IO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
+gevalCore' :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => Metric -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
 gevalCore' MSE _ = gevalCoreWithoutInput outParser outParser itemError averageC id
   where outParser = getValue . TR.double
 
@@ -447,25 +447,25 @@ data SourceItem a = Got a | Wrong String | Done
 skipLineNumber :: (x -> c) -> ((Word32, x) -> c)
 skipLineNumber fun = fun . snd
 
-gevalCoreWithoutInput :: (MonadBaseControl IO m, MonadThrow m, MonadIO m) => (Text -> Either String a) -> (Text -> Either String b) -> ((a, b) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
+gevalCoreWithoutInput :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => (Text -> Either String a) -> (Text -> Either String b) -> ((a, b) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> LineSource (ResourceT m) -> LineSource (ResourceT m) -> m (MetricValue)
 gevalCoreWithoutInput expParser outParser itemStep aggregator finalStep expectedLineStream outLineStream =
   gevalCoreGeneralized (ParserSpecWithoutInput expParser outParser) (trans itemStep) aggregator finalStep (WithoutInput expectedLineStream outLineStream)
  where
    trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
    trans step (ParsedRecordWithoutInput x y) = step (x, y)
 
-gevalCore''' :: (MonadBaseControl IO m, MonadThrow m, MonadIO m) => ParserSpec (WithoutInput m a b) -> ((Word32, (a, b)) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> WithoutInput m a b -> m (MetricValue)
+gevalCore''' :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec (WithoutInput m a b) -> ((Word32, (a, b)) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> WithoutInput m a b -> m (MetricValue)
 gevalCore''' parserSpec itemStep aggregator finalStep context =
   gevalCoreGeneralized' parserSpec (trans itemStep) aggregator finalStep context
  where
    trans :: ((Word32, (a, b)) -> c) -> (Word32, ParsedRecord (WithoutInput m a b)) -> c
    trans step (n, ParsedRecordWithoutInput x y) = step (n, (x, y))
 
-gevalCoreGeneralized :: (EvaluationContext ctxt m, MonadBaseControl IO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> (ParsedRecord ctxt -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
+gevalCoreGeneralized :: (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> (ParsedRecord ctxt -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
 gevalCoreGeneralized parserSpec itemStep aggregator finalStep context =
   gevalCoreGeneralized' parserSpec (skipLineNumber itemStep) aggregator finalStep context
 
-gevalCoreGeneralized' :: forall m ctxt c d . (EvaluationContext ctxt m, MonadBaseControl IO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> ((Word32, ParsedRecord ctxt) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
+gevalCoreGeneralized' :: forall m ctxt c d . (EvaluationContext ctxt m, MonadUnliftIO m, MonadThrow m, MonadIO m) => ParserSpec ctxt -> ((Word32, ParsedRecord ctxt) -> c) -> (Sink c (ResourceT m) d) -> (d -> Double) -> ctxt -> m (MetricValue)
 gevalCoreGeneralized' parserSpec itemStep aggregator finalStep context = do
    v <- runResourceT $
      (((getZipSource $ (,)
@@ -486,7 +486,7 @@ class EvaluationContext ctxt m where
 
 data WithoutInput m e o = WithoutInput (LineSource (ResourceT m)) (LineSource (ResourceT m))
 
-instance (MonadBaseControl IO m, MonadIO m, MonadThrow m) => EvaluationContext (WithoutInput m e o) m where
+instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithoutInput m e o) m where
   data ParserSpec (WithoutInput m e o) = ParserSpecWithoutInput (Text -> Either String e) (Text -> Either String o)
   data WrappedParsedRecord (WithoutInput m e o) = WrappedParsedRecordWithoutInput (SourceItem e) (SourceItem o)
   data ParsedRecord (WithoutInput m e o) = ParsedRecordWithoutInput e o
@@ -515,7 +515,7 @@ data WithInput m i e o = WithInput (LineSource (ResourceT m)) (LineSource (Resou
 
 getInputFilePath (WithInput (LineSource _ inputFilePath _) _ _) = inputFilePath
 
-instance (MonadBaseControl IO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where
+instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where
   data ParserSpec (WithInput m i e o) = ParserSpecWithInput (Text -> Either String i) (Text -> Either String e) (Text -> Either String o)
   data WrappedParsedRecord (WithInput m i e o) = WrappedParsedRecordWithInput (SourceItem i) (SourceItem e) (SourceItem o)
   data ParsedRecord (WithInput m i e o) = ParsedRecordWithInput i e o
diff --git a/stack.yaml b/stack.yaml
index 042d516..86aa566 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -1,5 +1,5 @@
 flags: {}
 packages:
 - '.'
-extra-deps: [cond-0.4.1.1,murmur3-1.0.3]
-resolver: lts-9.21
+extra-deps: [murmur3-1.0.3]
+resolver: lts-11.9

From 881a77e2399f0f4057ea22cc9c87cb77ce9bb4ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Filip=20Grali=C5=84ski?= <filipg@amu.edu.pl>
Date: Fri, 25 May 2018 14:44:19 +0200
Subject: [PATCH 13/24] better diagnostic messages for BIO

---
 src/GEval/BIO.hs                                       | 9 ++++++++-
 test/Spec.hs                                           | 2 ++
 test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv | 2 ++
 test/bio-f1-error/bio-f1-error/config.txt              | 1 +
 test/bio-f1-error/bio-f1-error/test-A/expected.tsv     | 2 ++
 5 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv
 create mode 100644 test/bio-f1-error/bio-f1-error/config.txt
 create mode 100644 test/bio-f1-error/bio-f1-error/test-A/expected.tsv

diff --git a/src/GEval/BIO.hs b/src/GEval/BIO.hs
index a3e8819..f213ab4 100644
--- a/src/GEval/BIO.hs
+++ b/src/GEval/BIO.hs
@@ -19,6 +19,13 @@ import GEval.Common
 data BIOLabel = Outside | Beginning T.Text (Maybe T.Text) | Inside T.Text (Maybe T.Text)
                 deriving (Eq, Show)
 
+formatBioLabel :: BIOLabel -> T.Text
+formatBioLabel Outside = "O"
+formatBioLabel (Beginning label Nothing) = T.concat ["B-", label]
+formatBioLabel (Beginning label (Just normalized)) = T.concat ["B-", label, "/", normalized]
+formatBioLabel (Inside label Nothing) = T.concat ["I-", label]
+formatBioLabel (Inside label (Just normalized)) = T.concat ["I-", label, "/", normalized]
+
 data TaggedSpan = TaggedSpan Int Int
                   deriving (Eq, Show)
 
@@ -45,7 +52,7 @@ labelSplitToEntity labs@(h@(_,begIx):t) = if isBeginning h && all (\tp -> isInsi
                                      then
                                        Right $ TaggedEntity (TaggedSpan begIx lastItemIx) btp mNormalized
                                      else
-                                       Left "something wrong with label sequence"
+                                       Left $ "inconsistent label sequence `" ++ (T.unpack $ T.intercalate " " $ map (formatBioLabel . fst) labs)  ++ "`"
   where isBeginning (Beginning _ _, _) = True
         isBeginning _ = False
         isInside (Inside _ _, _) = True
diff --git a/test/Spec.hs b/test/Spec.hs
index 68f595c..d183bc0 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -258,6 +258,8 @@ main = hspec $ do
       runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5
     it "check perfect score" $ do
       runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0
+    it "check inconsistent input" $ do
+      runGEvalTest "bio-f1-error" `shouldThrow` (== UnexpectedData 2 "inconsistent label sequence `B-NAME/JOHN I-FOO/SMITH I-FOO/X`")
   describe "automatic decompression" $ do
     it "more complex test" $ do
       runGEvalTest "charmatch-complex-compressed" `shouldReturnAlmost` 0.1923076923076923
diff --git a/test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv b/test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv
new file mode 100644
index 0000000..5b95d34
--- /dev/null
+++ b/test/bio-f1-error/bio-f1-error-solution/test-A/out.tsv
@@ -0,0 +1,2 @@
+O B-CITY/WARSZAWA I-CITY/WARSZAWA
+O B-NAME/JOHN I-FOO/SMITH I-FOO/X O
diff --git a/test/bio-f1-error/bio-f1-error/config.txt b/test/bio-f1-error/bio-f1-error/config.txt
new file mode 100644
index 0000000..70977e1
--- /dev/null
+++ b/test/bio-f1-error/bio-f1-error/config.txt
@@ -0,0 +1 @@
+--metric BIO-F1
diff --git a/test/bio-f1-error/bio-f1-error/test-A/expected.tsv b/test/bio-f1-error/bio-f1-error/test-A/expected.tsv
new file mode 100644
index 0000000..f6465b5
--- /dev/null
+++ b/test/bio-f1-error/bio-f1-error/test-A/expected.tsv
@@ -0,0 +1,2 @@
+O B-CITY/WARSZAWA I-CITY/WARSZAWA
+O B-NAME/JOHN I-NAME/SMITH O O

From c71c7a019d7feb711687ebbec5e87925b42c28ac Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 26 May 2018 13:09:06 +0200
Subject: [PATCH 14/24] remove warning in LineByLine.hs

---
 src/GEval/LineByLine.hs | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs
index 9afdf83..d44e9d4 100644
--- a/src/GEval/LineByLine.hs
+++ b/src/GEval/LineByLine.hs
@@ -35,8 +35,8 @@ runLineByLine spec = do
   (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
   gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum
    where metric = gesMetric spec
-         consum :: Consumer LineRecord (ResourceT IO) ()
-         consum = (CL.map (encodeUtf8 . formatOutput) =$= CC.unlinesAscii =$= CC.stdout)
+         consum :: ConduitT LineRecord Void (ResourceT IO) ()
+         consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
          formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
            formatScore score,
            escapeTabs inp,
@@ -51,13 +51,13 @@ runDiff otherOut spec = do
   (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
   let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath
   let sourceB = gevalLineByLineSource metric inputFilePath expectedFilePath otherOutFilePath
-  runResourceT $
+  runResourceT $ runConduit $
      ((getZipSource $ (,)
        <$> ZipSource sourceA
-       <*> ZipSource sourceB) $$ consum)
+       <*> ZipSource sourceB) .| consum)
   where metric = gesMetric spec
-        consum :: Consumer (LineRecord, LineRecord) (ResourceT IO) ()
-        consum = (CL.filter shouldBeShown =$= CL.map (encodeUtf8 . formatOutput) =$= CC.unlinesAscii =$= CC.stdout)
+        consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
+        consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
         shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) =
           outA /= outB && scoreA /= scoreB
         formatOutput (LineRecord inp exp outA _ scoreA, LineRecord _ _ outB _ scoreB) = Data.Text.intercalate "\t" [
@@ -72,16 +72,16 @@ runDiff otherOut spec = do
 escapeTabs :: Text -> Text
 escapeTabs = Data.Text.replace "\t" "<tab>"
 
-gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> Sink LineRecord (ResourceT IO) () -> IO ()
+gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT LineRecord Void (ResourceT IO) () -> IO ()
 gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum =
-  runResourceT $
-     ((gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath) $$ consum)
+  runResourceT $ runConduit $
+     ((gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath) .| consum)
 
-gevalLineByLineSource :: Metric -> FilePath -> FilePath -> FilePath -> Source (ResourceT IO) LineRecord
+gevalLineByLineSource :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT () LineRecord (ResourceT IO) ()
 gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath =
   (getZipSource $ (,)
        <$> ZipSource (CL.sourceList [1..])
-       <*> (ZipSource $ recordSource context parserSpec)) =$= CL.mapM (checkStepM evaluateLine) =$= CL.catMaybes
+       <*> (ZipSource $ recordSource context parserSpec)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes
   where parserSpec = (ParserSpecWithInput (Right . id) (Right . id) (Right . id))
         context = (WithInput inputLineSource expectedLineSource outputLineSource)
         inputLineSource = fileAsLineSource inputFilePath

From cb655cd2ae0fb4cb7dd7cf9ebcf9a57e2d33ee2e Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 26 May 2018 14:40:26 +0200
Subject: [PATCH 15/24] refactor LineByLine

---
 src/GEval/LineByLine.hs | 42 ++++++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs
index d44e9d4..b2ee4b3 100644
--- a/src/GEval/LineByLine.hs
+++ b/src/GEval/LineByLine.hs
@@ -9,7 +9,9 @@
 
 module GEval.LineByLine
        (runLineByLine,
-        runDiff
+        runLineByLineGeneralized,
+        runDiff,
+        runDiffGeneralized
        ) where
 
 import GEval.Core
@@ -31,11 +33,8 @@ data LineRecord = LineRecord Text Text Text Word32 MetricValue
                   deriving (Eq, Show)
 
 runLineByLine :: GEvalSpecification -> IO ()
-runLineByLine spec = do
-  (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
-  gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum
-   where metric = gesMetric spec
-         consum :: ConduitT LineRecord Void (ResourceT IO) ()
+runLineByLine spec = runLineByLineGeneralized spec consum
+   where consum :: ConduitT LineRecord Void (ResourceT IO) ()
          consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
          formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
            formatScore score,
@@ -45,18 +44,15 @@ runLineByLine spec = do
          formatScore :: MetricValue -> Text
          formatScore = Data.Text.pack . printf "%f"
 
-runDiff :: FilePath -> GEvalSpecification -> IO ()
-runDiff otherOut spec = do
-  let otherOutFilePath = getOutFile spec otherOut
+runLineByLineGeneralized :: GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
+runLineByLineGeneralized spec consum = do
   (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
-  let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath
-  let sourceB = gevalLineByLineSource metric inputFilePath expectedFilePath otherOutFilePath
-  runResourceT $ runConduit $
-     ((getZipSource $ (,)
-       <$> ZipSource sourceA
-       <*> ZipSource sourceB) .| consum)
+  gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum
   where metric = gesMetric spec
-        consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
+
+runDiff :: FilePath -> GEvalSpecification -> IO ()
+runDiff otherOut spec = runDiffGeneralized otherOut spec consum
+  where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
         consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
         shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) =
           outA /= outB && scoreA /= scoreB
@@ -69,10 +65,22 @@ runDiff otherOut spec = do
         formatScoreDiff :: Double -> Text
         formatScoreDiff = Data.Text.pack . printf "%f"
 
+runDiffGeneralized :: FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a
+runDiffGeneralized otherOut spec consum = do
+  let otherOutFilePath = getOutFile spec otherOut
+  (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
+  let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath
+  let sourceB = gevalLineByLineSource metric inputFilePath expectedFilePath otherOutFilePath
+  runResourceT $ runConduit $
+     ((getZipSource $ (,)
+       <$> ZipSource sourceA
+       <*> ZipSource sourceB) .| consum)
+  where metric = gesMetric spec
+
 escapeTabs :: Text -> Text
 escapeTabs = Data.Text.replace "\t" "<tab>"
 
-gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT LineRecord Void (ResourceT IO) () -> IO ()
+gevalLineByLineCore :: Metric -> FilePath -> FilePath -> FilePath -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
 gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum =
   runResourceT $ runConduit $
      ((gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath) .| consum)

From f68223409e32a7313d69a8263923880fd2610ab9 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 26 May 2018 21:10:22 +0200
Subject: [PATCH 16/24] add test for the line-by-line mode

---
 geval.cabal                                   |  1 +
 src/GEval/LineByLine.hs                       |  3 ++-
 test/Spec.hs                                  | 21 ++++++++++++++++++-
 .../likelihood-simple/test-A/in.tsv           |  4 ++++
 4 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 test/likelihood-simple/likelihood-simple/test-A/in.tsv

diff --git a/geval.cabal b/geval.cabal
index 97c5847..08ce371 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -80,6 +80,7 @@ test-suite geval-test
                      , text
                      , attoparsec
                      , edit-distance
+                     , conduit
   ghc-options:         -threaded -rtsopts -with-rtsopts=-N
   default-language:    Haskell2010
 
diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs
index b2ee4b3..fe6aa30 100644
--- a/src/GEval/LineByLine.hs
+++ b/src/GEval/LineByLine.hs
@@ -11,7 +11,8 @@ module GEval.LineByLine
        (runLineByLine,
         runLineByLineGeneralized,
         runDiff,
-        runDiffGeneralized
+        runDiffGeneralized,
+        LineRecord(..)
        ) where
 
 import GEval.Core
diff --git a/test/Spec.hs b/test/Spec.hs
index d183bc0..c1b24c8 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -9,11 +9,14 @@ import GEval.ClippEU
 import GEval.PrecisionRecall
 import GEval.ClusteringMetrics
 import GEval.BIO
+import GEval.LineByLine
 import Data.Attoparsec.Text
 import Options.Applicative
 import Data.Text
 import Text.EditDistance
 
+import Data.Conduit.List (consume)
+
 import qualified Test.HUnit as HU
 
 informationRetrievalBookExample :: [(String, Int)]
@@ -263,7 +266,23 @@ main = hspec $ do
   describe "automatic decompression" $ do
     it "more complex test" $ do
       runGEvalTest "charmatch-complex-compressed" `shouldReturnAlmost` 0.1923076923076923
-
+  describe "line by line mode" $ do
+    let sampleChallenge =
+          GEvalSpecification
+          { gesOutDirectory = "test/likelihood-simple/likelihood-simple-solution",
+            gesExpectedDirectory = Just "test/likelihood-simple/likelihood-simple",
+            gesTestName = "test-A",
+            gesOutFile = "out.tsv",
+            gesExpectedFile = "expected.tsv",
+            gesInputFile = "in.tsv",
+            gesMetric = Likelihood,
+            gesPrecision = Nothing }
+    it "simple test" $ do
+      results <- runLineByLineGeneralized sampleChallenge Data.Conduit.List.consume
+      Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo",
+                                                                        "bar",
+                                                                        "baz",
+                                                                        "baq"]
 
 neverMatch :: Char -> Int -> Bool
 neverMatch _ _ = False
diff --git a/test/likelihood-simple/likelihood-simple/test-A/in.tsv b/test/likelihood-simple/likelihood-simple/test-A/in.tsv
new file mode 100644
index 0000000..655a059
--- /dev/null
+++ b/test/likelihood-simple/likelihood-simple/test-A/in.tsv
@@ -0,0 +1,4 @@
+foo
+bar
+baz
+baq

From ab1056301eccfd0bf4459297544eda6a8426a43e Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Mon, 28 May 2018 09:45:08 +0200
Subject: [PATCH 17/24] add sorting for --line-by-line internally

---
 src/Data/Conduit/AutoDecompress.hs |  7 +++---
 src/GEval/LineByLine.hs            | 38 ++++++++++++++++++++++--------
 src/GEval/OptionsParser.hs         |  4 ++--
 test/Spec.hs                       |  6 ++++-
 4 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/src/Data/Conduit/AutoDecompress.hs b/src/Data/Conduit/AutoDecompress.hs
index 2e92402..b959d6f 100644
--- a/src/Data/Conduit/AutoDecompress.hs
+++ b/src/Data/Conduit/AutoDecompress.hs
@@ -1,7 +1,8 @@
 {-# LANGUAGE AllowAmbiguousTypes #-}
 
 module Data.Conduit.AutoDecompress
-       (autoDecompress)
+       (autoDecompress,
+        doNothing)
        where
 
 import Data.Conduit
@@ -34,11 +35,11 @@ autoDecompress = do
     Nothing -> return ()
 
 
-lookAtMagicNumbers :: (MonadResource m, MonadThrow m, PrimMonad m) => (Word8, Word8) -> Conduit ByteString m ByteString
+lookAtMagicNumbers :: (MonadResource m, MonadThrow m, PrimMonad m) => (Word8, Word8) -> ConduitT ByteString ByteString m ()
 lookAtMagicNumbers (31, 139) = ungzip
 lookAtMagicNumbers (66, 90) = BZ.bunzip2
 lookAtMagicNumbers (253, 55) = XZ.decompress Nothing
 lookAtMagicNumbers _ = doNothing
 
-doNothing :: Monad m => Conduit ByteString m ByteString
+doNothing :: Monad m => ConduitT a a m ()
 doNothing = Data.Conduit.Combinators.filter (const True)
diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs
index fe6aa30..91848db 100644
--- a/src/GEval/LineByLine.hs
+++ b/src/GEval/LineByLine.hs
@@ -12,17 +12,22 @@ module GEval.LineByLine
         runLineByLineGeneralized,
         runDiff,
         runDiffGeneralized,
-        LineRecord(..)
+        LineRecord(..),
+        ResultOrdering(..)
        ) where
 
 import GEval.Core
 
+import Data.Conduit.AutoDecompress (doNothing)
+
 import Data.Conduit
 import qualified Data.Conduit.List as CL
 import qualified Data.Conduit.Combinators as CC
 import Data.Text
 import Data.Text.Encoding
 
+import Data.List (sortBy, sort)
+
 import Control.Monad.IO.Class
 import Control.Monad.Trans.Resource
 
@@ -33,8 +38,10 @@ import Text.Printf
 data LineRecord = LineRecord Text Text Text Word32 MetricValue
                   deriving (Eq, Show)
 
-runLineByLine :: GEvalSpecification -> IO ()
-runLineByLine spec = runLineByLineGeneralized spec consum
+data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest
+
+runLineByLine :: ResultOrdering -> GEvalSpecification -> IO ()
+runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum
    where consum :: ConduitT LineRecord Void (ResourceT IO) ()
          consum = (CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
          formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
@@ -45,14 +52,25 @@ runLineByLine spec = runLineByLineGeneralized spec consum
          formatScore :: MetricValue -> Text
          formatScore = Data.Text.pack . printf "%f"
 
-runLineByLineGeneralized :: GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
-runLineByLineGeneralized spec consum = do
+runLineByLineGeneralized :: ResultOrdering -> GEvalSpecification -> ConduitT LineRecord Void (ResourceT IO) a -> IO a
+runLineByLineGeneralized ordering spec consum = do
   (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
-  gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath consum
+  gevalLineByLineCore metric inputFilePath expectedFilePath outFilePath (sorter ordering .| consum)
   where metric = gesMetric spec
+        sorter KeepTheOriginalOrder = doNothing
+        sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
+        sortOrder FirstTheWorst TheHigherTheBetter = compareScores
+        sortOrder FirstTheBest TheLowerTheBetter = compareScores
+        sortOrder _ _ = flip compareScores
+        compareScores (LineRecord _ _ _ _ s1) (LineRecord _ _ _ _ s2) = s1 `compare` s2
 
-runDiff :: FilePath -> GEvalSpecification -> IO ()
-runDiff otherOut spec = runDiffGeneralized otherOut spec consum
+gobbleAndDo :: Monad m => ([a] -> [b]) -> ConduitT a b m ()
+gobbleAndDo fun = do
+  l <- CC.sinkList
+  CC.yieldMany $ fun l
+
+runDiff :: ResultOrdering -> FilePath -> GEvalSpecification -> IO ()
+runDiff ordering otherOut spec = runDiffGeneralized ordering otherOut spec consum
   where consum :: ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
         consum = (CL.filter shouldBeShown .| CL.map (encodeUtf8 . formatOutput) .| CC.unlinesAscii .| CC.stdout)
         shouldBeShown (LineRecord _ _ outA _ scoreA, LineRecord _ _ outB _ scoreB) =
@@ -66,8 +84,8 @@ runDiff otherOut spec = runDiffGeneralized otherOut spec consum
         formatScoreDiff :: Double -> Text
         formatScoreDiff = Data.Text.pack . printf "%f"
 
-runDiffGeneralized :: FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a
-runDiffGeneralized otherOut spec consum = do
+runDiffGeneralized :: ResultOrdering -> FilePath -> GEvalSpecification -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) a -> IO a
+runDiffGeneralized ordering otherOut spec consum = do
   let otherOutFilePath = getOutFile spec otherOut
   (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFiles spec
   let sourceA = gevalLineByLineSource metric inputFilePath expectedFilePath outFilePath
diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs
index d8ce4e7..76d6808 100644
--- a/src/GEval/OptionsParser.hs
+++ b/src/GEval/OptionsParser.hs
@@ -155,10 +155,10 @@ runGEval''' (Just Init) spec = do
   initChallenge spec
   return Nothing
 runGEval''' (Just LineByLine) spec = do
-  runLineByLine spec
+  runLineByLine KeepTheOriginalOrder spec
   return Nothing
 runGEval''' (Just (Diff otherOut)) spec = do
-  runDiff otherOut spec
+  runDiff KeepTheOriginalOrder otherOut spec
   return Nothing
 
 initChallenge :: GEvalSpecification -> IO ()
diff --git a/test/Spec.hs b/test/Spec.hs
index c1b24c8..b8dac23 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -278,11 +278,15 @@ main = hspec $ do
             gesMetric = Likelihood,
             gesPrecision = Nothing }
     it "simple test" $ do
-      results <- runLineByLineGeneralized sampleChallenge Data.Conduit.List.consume
+      results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge Data.Conduit.List.consume
       Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo",
                                                                         "bar",
                                                                         "baz",
                                                                         "baq"]
+    it "test sorting" $ do
+      results <- runLineByLineGeneralized FirstTheWorst sampleChallenge Data.Conduit.List.consume
+      Prelude.head (Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results) `shouldBe` "baq"
+
 
 neverMatch :: Char -> Int -> Bool
 neverMatch _ _ = False

From 3f7384f46748f6e900b422ac8513bbf239f64df9 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Mon, 28 May 2018 10:04:27 +0200
Subject: [PATCH 18/24] add --sort and --reverse-sort options

---
 geval.cabal                |  2 +-
 src/GEval/Core.hs          |  5 ++++-
 src/GEval/LineByLine.hs    | 13 ++++++++++---
 src/GEval/OptionsParser.hs | 26 ++++++++++++++++++--------
 4 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/geval.cabal b/geval.cabal
index 08ce371..e51ad70 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -1,5 +1,5 @@
 name:                geval
-version:             0.5.7.0
+version:             0.6.0.0
 synopsis:            Machine learning evaluation tools
 description:         Please see README.md
 homepage:            http://github.com/name/project
diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs
index e0a110c..37cb237 100644
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@@ -16,6 +16,7 @@ module GEval.Core
       MetricValue,
       GEvalSpecialCommand(..),
       GEvalSpecification(..),
+      ResultOrdering(..),
       GEvalOptions(..),
       GEvalException(..),
       defaultGEvalSpecification,
@@ -180,11 +181,13 @@ getExpectedDirectory spec = fromMaybe outDirectory $ gesExpectedDirectory spec
 
 data GEvalSpecialCommand = Init | LineByLine | Diff FilePath
 
+data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest
+
 data GEvalOptions = GEvalOptions
                     { geoSpecialCommand :: Maybe GEvalSpecialCommand,
+                      geoResultOrdering :: ResultOrdering,
                       geoSpec :: GEvalSpecification }
 
-
 data GEvalException = NoExpectedFile FilePath
                       | NoOutFile FilePath
                       | NoExpectedDirectory FilePath
diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs
index 91848db..74f053b 100644
--- a/src/GEval/LineByLine.hs
+++ b/src/GEval/LineByLine.hs
@@ -38,8 +38,6 @@ import Text.Printf
 data LineRecord = LineRecord Text Text Text Word32 MetricValue
                   deriving (Eq, Show)
 
-data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest
-
 runLineByLine :: ResultOrdering -> GEvalSpecification -> IO ()
 runLineByLine ordering spec = runLineByLineGeneralized ordering spec consum
    where consum :: ConduitT LineRecord Void (ResourceT IO) ()
@@ -93,8 +91,17 @@ runDiffGeneralized ordering otherOut spec consum = do
   runResourceT $ runConduit $
      ((getZipSource $ (,)
        <$> ZipSource sourceA
-       <*> ZipSource sourceB) .| consum)
+       <*> ZipSource sourceB) .| sorter ordering .| consum)
   where metric = gesMetric spec
+        sorter KeepTheOriginalOrder = doNothing
+        sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
+        sortOrder FirstTheWorst TheHigherTheBetter = compareScores
+        sortOrder FirstTheBest TheLowerTheBetter = compareScores
+        sortOrder _ _ = flip compareScores
+        compareScores ((LineRecord _ _ _ _ o1), (LineRecord _ _ _ _ n1))
+                      ((LineRecord _ _ _ _ o2), (LineRecord _ _ _ _ n2))
+          = (n1 - o1) `compare` (n2 - o2)
+
 
 escapeTabs :: Text -> Text
 escapeTabs = Data.Text.replace "\t" "<tab>"
diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs
index 76d6808..dbdd6d6 100644
--- a/src/GEval/OptionsParser.hs
+++ b/src/GEval/OptionsParser.hs
@@ -41,6 +41,16 @@ optionsParser = GEvalOptions
                       <> short 'd'
                       <> metavar "OTHER-OUT"
                       <> help "compare results")))
+   <*> ((flag' FirstTheWorst
+         (long "sort"
+          <> short 's'
+          <> help "When in line-by-line or diff mode, sort the results from the worst to the best"))
+        <|>
+        (flag' FirstTheBest
+         (long "reverse-sort"
+          <> short 'r'
+          <> help "When in line-by-line or diff mode, sort the results from the best to the worst"))
+        <|> pure KeepTheOriginalOrder)
    <*> specParser
 
 precisionArgParser :: Parser Int
@@ -145,20 +155,20 @@ attemptToReadOptsFromConfigFile args opts = do
 
 
 runGEval'' :: GEvalOptions -> IO (Maybe MetricValue)
-runGEval'' opts = runGEval''' (geoSpecialCommand opts) (geoSpec opts)
+runGEval'' opts = runGEval''' (geoSpecialCommand opts) (geoResultOrdering opts) (geoSpec opts)
 
-runGEval''' :: Maybe GEvalSpecialCommand -> GEvalSpecification -> IO (Maybe MetricValue)
-runGEval''' Nothing spec = do
+runGEval''' :: Maybe GEvalSpecialCommand -> ResultOrdering -> GEvalSpecification -> IO (Maybe MetricValue)
+runGEval''' Nothing _ spec = do
   val <- geval spec
   return $ Just val
-runGEval''' (Just Init) spec = do
+runGEval''' (Just Init) _ spec = do
   initChallenge spec
   return Nothing
-runGEval''' (Just LineByLine) spec = do
-  runLineByLine KeepTheOriginalOrder spec
+runGEval''' (Just LineByLine) ordering spec = do
+  runLineByLine ordering spec
   return Nothing
-runGEval''' (Just (Diff otherOut)) spec = do
-  runDiff KeepTheOriginalOrder otherOut spec
+runGEval''' (Just (Diff otherOut)) ordering spec = do
+  runDiff ordering otherOut spec
   return Nothing
 
 initChallenge :: GEvalSpecification -> IO ()

From 65e8d2562e31c4305f52cf699c778abe99a44ef2 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Tue, 29 May 2018 20:59:00 +0200
Subject: [PATCH 19/24] underscores can be used in the BIO format

---
 src/GEval/BIO.hs                                               | 2 +-
 test/Spec.hs                                                   | 2 ++
 .../bio-f1-simple-underscores-solution/test-A/out.tsv          | 3 +++
 .../bio-f1-simple-underscores/config.txt                       | 1 +
 .../bio-f1-simple-underscores/test-A/expected.tsv              | 3 +++
 5 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 test/bio-f1-simple-underscores/bio-f1-simple-underscores-solution/test-A/out.tsv
 create mode 100644 test/bio-f1-simple-underscores/bio-f1-simple-underscores/config.txt
 create mode 100644 test/bio-f1-simple-underscores/bio-f1-simple-underscores/test-A/expected.tsv

diff --git a/src/GEval/BIO.hs b/src/GEval/BIO.hs
index f213ab4..7b7b601 100644
--- a/src/GEval/BIO.hs
+++ b/src/GEval/BIO.hs
@@ -93,7 +93,7 @@ bioLabelParser =
   (string "O" *> pure Outside) <|>
   (do
       labelType <- bioMarkerParser
-      string "-"
+      (string "-" <|> string "_")
       label <- takeWhile1 (\c -> not (isSpace c) && c /= '/')
       normalized <- (do
                        string "/"
diff --git a/test/Spec.hs b/test/Spec.hs
index b8dac23..83572e6 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -259,6 +259,8 @@ main = hspec $ do
       runGEvalTest "bio-f1-complex" `shouldReturnAlmost` 0.625
     it "calculate F1" $ do
       runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5
+    it "calculate F1 with underscores rather than minus signs" $ do
+      runGEvalTest "bio-f1-simple-underscores" `shouldReturnAlmost` 0.5
     it "check perfect score" $ do
       runGEvalTest "bio-f1-perfect" `shouldReturnAlmost` 1.0
     it "check inconsistent input" $ do
diff --git a/test/bio-f1-simple-underscores/bio-f1-simple-underscores-solution/test-A/out.tsv b/test/bio-f1-simple-underscores/bio-f1-simple-underscores-solution/test-A/out.tsv
new file mode 100644
index 0000000..4380412
--- /dev/null
+++ b/test/bio-f1-simple-underscores/bio-f1-simple-underscores-solution/test-A/out.tsv
@@ -0,0 +1,3 @@
+O O B_city/POZNAŃ O O B_date/MARCH I_date/12
+B_city/BUK O O O
+B_name/FOO O B_surname/KOWALSKI
diff --git a/test/bio-f1-simple-underscores/bio-f1-simple-underscores/config.txt b/test/bio-f1-simple-underscores/bio-f1-simple-underscores/config.txt
new file mode 100644
index 0000000..70977e1
--- /dev/null
+++ b/test/bio-f1-simple-underscores/bio-f1-simple-underscores/config.txt
@@ -0,0 +1 @@
+--metric BIO-F1
diff --git a/test/bio-f1-simple-underscores/bio-f1-simple-underscores/test-A/expected.tsv b/test/bio-f1-simple-underscores/bio-f1-simple-underscores/test-A/expected.tsv
new file mode 100644
index 0000000..31de6dc
--- /dev/null
+++ b/test/bio-f1-simple-underscores/bio-f1-simple-underscores/test-A/expected.tsv
@@ -0,0 +1,3 @@
+O O B_city/POZNAŃ O O B_date/MARCH I_date/12
+O O O O
+O B_city/KONIN O

From 4768931221f23ef83997104048a63f5282ef84e3 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Tue, 29 May 2018 22:04:19 +0200
Subject: [PATCH 20/24] add BIO-F1-Labels metric

---
 src/GEval/BIO.hs                                       |  7 ++++++-
 src/GEval/Core.hs                                      | 10 +++++++++-
 src/GEval/CreateChallenge.hs                           |  6 ++++++
 src/GEval/OptionsParser.hs                             |  2 +-
 test/Spec.hs                                           |  2 ++
 .../bio-f1-complex-labels-solution/test-A/out.tsv      |  6 ++++++
 .../bio-f1-complex-labels/config.txt                   |  1 +
 .../bio-f1-complex-labels/test-A/expected.tsv          |  6 ++++++
 8 files changed, 37 insertions(+), 3 deletions(-)
 create mode 100644 test/bio-f1-complex-labels/bio-f1-complex-labels-solution/test-A/out.tsv
 create mode 100644 test/bio-f1-complex-labels/bio-f1-complex-labels/config.txt
 create mode 100644 test/bio-f1-complex-labels/bio-f1-complex-labels/test-A/expected.tsv

diff --git a/src/GEval/BIO.hs b/src/GEval/BIO.hs
index 7b7b601..1f4b75b 100644
--- a/src/GEval/BIO.hs
+++ b/src/GEval/BIO.hs
@@ -1,7 +1,9 @@
 {-# LANGUAGE OverloadedStrings #-}
 
 module GEval.BIO
-       (BIOLabel(..), bioSequenceParser, parseBioSequenceIntoEntities, TaggedSpan(..), TaggedEntity(..), gatherCountsForBIO)
+       (BIOLabel(..), bioSequenceParser, parseBioSequenceIntoEntities,
+        TaggedSpan(..), TaggedEntity(..), gatherCountsForBIO,
+        eraseNormalisation)
        where
 
 import GEval.PrecisionRecall
@@ -32,6 +34,9 @@ data TaggedSpan = TaggedSpan Int Int
 data TaggedEntity = TaggedEntity TaggedSpan T.Text (Maybe T.Text)
                     deriving (Eq, Show)
 
+eraseNormalisation :: TaggedEntity -> TaggedEntity
+eraseNormalisation (TaggedEntity span label normalized) = (TaggedEntity span label Nothing)
+
 gatherCountsForBIO :: [TaggedEntity] -> [TaggedEntity] -> (Int, Int, Int)
 gatherCountsForBIO expected got = (maxMatchOnOrdered laterThan expected got, length expected, length got)
   where
diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs
index 37cb237..1353aab 100644
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@@ -84,7 +84,7 @@ defaultLogLossHashedSize :: Word32
 defaultLogLossHashedSize = 10
 
 data Metric = RMSE | MSE | BLEU | Accuracy | ClippEU | FMeasure Double | NMI | LogLossHashed Word32 | CharMatch
-              | MAP | LogLoss | Likelihood | BIOF1 | LikelihoodHashed Word32
+              | MAP | LogLoss | Likelihood | BIOF1 | BIOF1Labels | LikelihoodHashed Word32
               deriving (Eq)
 
 instance Show Metric where
@@ -112,6 +112,7 @@ instance Show Metric where
   show LogLoss = "LogLoss"
   show Likelihood = "Likelihood"
   show BIOF1 = "BIO-F1"
+  show BIOF1Labels = "BIO-F1-Labels"
 
 instance Read Metric where
   readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
@@ -133,6 +134,7 @@ instance Read Metric where
   readsPrec _ ('L':'i':'k':'e':'l':'i':'h':'o':'o':'d':theRest) = [(Likelihood, theRest)]
   readsPrec p ('C':'h':'a':'r':'M':'a':'t':'c':'h':theRest) = [(CharMatch, theRest)]
   readsPrec _ ('M':'A':'P':theRest) = [(MAP, theRest)]
+  readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)]
   readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)]
 
 data MetricOrdering = TheLowerTheBetter | TheHigherTheBetter
@@ -152,6 +154,7 @@ getMetricOrdering MAP = TheHigherTheBetter
 getMetricOrdering LogLoss = TheLowerTheBetter
 getMetricOrdering Likelihood = TheHigherTheBetter
 getMetricOrdering BIOF1 = TheHigherTheBetter
+getMetricOrdering BIOF1Labels = TheHigherTheBetter
 
 defaultOutDirectory = "."
 defaultTestName = "test-A"
@@ -437,6 +440,11 @@ gevalCore' CharMatch inputLineSource = helper inputLineSource
 
 gevalCore' BIOF1 _ = gevalCoreWithoutInput parseBioSequenceIntoEntities parseBioSequenceIntoEntities (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts
 
+gevalCore' BIOF1Labels _ = gevalCoreWithoutInput parseBioSequenceIntoEntitiesWithoutNormalization parseBioSequenceIntoEntitiesWithoutNormalization (uncurry gatherCountsForBIO) countAgg f1MeasureOnCounts
+   where parseBioSequenceIntoEntitiesWithoutNormalization s = do
+           entities <- parseBioSequenceIntoEntities s
+           return $ Prelude.map eraseNormalisation entities
+
 countAgg :: Monad m => ConduitM (Int, Int, Int) o m (Int, Int, Int)
 countAgg = CC.foldl countFolder (0, 0, 0)
 
diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs
index 0d141d5..8880afa 100644
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@@ -215,6 +215,7 @@ This a sample challenge for the likelihood metric.
 
 |] ++ (commonReadmeMDContents testName)
 
+readmeMDContents BIOF1Labels testName = readmeMDContents BIOF1 testName
 readmeMDContents BIOF1 testName = [i|
 Tag and normalize names
 =======================
@@ -318,6 +319,7 @@ trainContents LogLoss = [hereLit|0.0	Hell, no!!!
 1.0	Lekker!!!
 0.0	Boring, boring, boring
 |]
+trainContents BIOF1Labels = trainContents BIOF1
 trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surname/BOND	My name is Bond , James Bond
 O O O O O	There is no name here
 B-firstname/JOHN I-surname/VON I-surname/NEUMANN	John von Nueman
@@ -359,6 +361,7 @@ devInContents LogLoss = [hereLit|Great stuff!
 Boring stuff
 That's good
 |]
+devInContents BIOF1Labels = devInContents BIOF1
 devInContents BIOF1 = [hereLit|Adam and Eve
 Mr Jan Kowalski
 |]
@@ -397,6 +400,7 @@ devExpectedContents LogLoss = [hereLit|1.0
 0.0
 1.0
 |]
+devExpectedContents BIOF1Labels = devExpectedContents BIOF1
 devExpectedContents BIOF1 = [hereLit|B-firstname/ADAM O B-firstname/EVE
 O B-firstname/JAN B-surname/KOWALSKI
 |]
@@ -437,6 +441,7 @@ testInContents LogLoss = [hereLit|That's great, ha, ha, I love it!
 Super-duper!!
 That is incredibly boring.
 |]
+testInContents BIOF1Labels = testInContents BIOF1
 testInContents BIOF1 = [hereLit|Alan Tring
 No name here
 |]
@@ -477,6 +482,7 @@ testExpectedContents LogLoss = [hereLit|1.0
 1.0
 0.0
 |]
+testExpectedContents BIOF1Labels = testExpectedContents BIOF1
 testExpectedContents BIOF1 = [hereLit|B-firstname/ALAN B-surname/TURING
 O O O
 |]
diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs
index dbdd6d6..13ac6ec 100644
--- a/src/GEval/OptionsParser.hs
+++ b/src/GEval/OptionsParser.hs
@@ -110,7 +110,7 @@ metricReader = option auto
                  <> value defaultMetric
                  <> showDefault
                  <> metavar "METRIC"
-                 <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1 or CharMatch" )
+                 <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels or CharMatch" )
 
 runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
 runGEval args = do
diff --git a/test/Spec.hs b/test/Spec.hs
index 83572e6..644e8e1 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -257,6 +257,8 @@ main = hspec $ do
                                                                                    TaggedEntity (TaggedSpan 2 2) "surname" (Just "SMITH")] `shouldBe` (1, 1, 2)
     it "check F1 on a more complicated example" $ do
       runGEvalTest "bio-f1-complex" `shouldReturnAlmost` 0.625
+    it "check F1 on labels only" $ do
+      runGEvalTest "bio-f1-complex-labels" `shouldReturnAlmost` 0.6666666666
     it "calculate F1" $ do
       runGEvalTest "bio-f1-simple" `shouldReturnAlmost` 0.5
     it "calculate F1 with underscores rather than minus signs" $ do
diff --git a/test/bio-f1-complex-labels/bio-f1-complex-labels-solution/test-A/out.tsv b/test/bio-f1-complex-labels/bio-f1-complex-labels-solution/test-A/out.tsv
new file mode 100644
index 0000000..0e8b8a1
--- /dev/null
+++ b/test/bio-f1-complex-labels/bio-f1-complex-labels-solution/test-A/out.tsv
@@ -0,0 +1,6 @@
+B-wrong
+B-city/LOS I-city/ANGELES O B-city/NEW I-city/YORK_CITY
+B-surname/BROWN B-surname/SMIT
+B-month B-month O O B-foo/bar
+O B-class I-class I-class
+O O
diff --git a/test/bio-f1-complex-labels/bio-f1-complex-labels/config.txt b/test/bio-f1-complex-labels/bio-f1-complex-labels/config.txt
new file mode 100644
index 0000000..eee37bb
--- /dev/null
+++ b/test/bio-f1-complex-labels/bio-f1-complex-labels/config.txt
@@ -0,0 +1 @@
+--metric BIO-F1-Labels
diff --git a/test/bio-f1-complex-labels/bio-f1-complex-labels/test-A/expected.tsv b/test/bio-f1-complex-labels/bio-f1-complex-labels/test-A/expected.tsv
new file mode 100644
index 0000000..c6e719d
--- /dev/null
+++ b/test/bio-f1-complex-labels/bio-f1-complex-labels/test-A/expected.tsv
@@ -0,0 +1,6 @@
+O
+B-city/LOS I-city/ANGELES O B-city/NEW_YORK O
+O B-surname/SMITH
+B-month/JULY O O O B-foo/bar
+O B-class I-class I-class
+O O

From d370e375a0c5c06871a37662090c1e00bb440bd3 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 2 Jun 2018 11:29:54 +0200
Subject: [PATCH 21/24] add --alt-metric option

---
 src/GEval/OptionsParser.hs | 13 ++++++++++++-
 test/Spec.hs               |  9 +++++++--
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs
index 13ac6ec..e46b033 100644
--- a/src/GEval/OptionsParser.hs
+++ b/src/GEval/OptionsParser.hs
@@ -100,9 +100,13 @@ specParser = GEvalSpecification
     <> showDefault
     <> metavar "INPUT"
     <> help "The name of the file with the input (applicable only for some metrics)" )
-  <*> metricReader
+  <*> ((flip fromMaybe) <$> altMetricReader <*> metricReader)
   <*> optional precisionArgParser
 
+sel :: Maybe Metric -> Metric -> Metric
+sel Nothing m = m
+sel (Just m) _ = m
+
 metricReader :: Parser Metric
 metricReader = option auto
                ( long "metric"
@@ -112,6 +116,13 @@ metricReader = option auto
                  <> metavar "METRIC"
                  <> help "Metric to be used - RMSE, MSE, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), MAP, BLEU, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels or CharMatch" )
 
+altMetricReader :: Parser (Maybe Metric)
+altMetricReader = optional $ option auto
+               ( long "alt-metric"
+                 <> short 'a'
+                 <> metavar "METRIC"
+                 <> help "Alternative metric (overrides --metric option)" )
+
 runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe MetricValue))
 runGEval args = do
   ret <- runGEvalGetOptions args
diff --git a/test/Spec.hs b/test/Spec.hs
index 644e8e1..0474ae2 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -290,6 +290,9 @@ main = hspec $ do
     it "test sorting" $ do
       results <- runLineByLineGeneralized FirstTheWorst sampleChallenge Data.Conduit.List.consume
       Prelude.head (Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results) `shouldBe` "baq"
+  describe "handle --alt-metric option" $ do
+    it "accuracy instead of likelihood" $ do
+      runGEvalTestExtraOptions ["--alt-metric", "Accuracy"] "likelihood-simple" `shouldReturnAlmost` 0.75
 
 
 neverMatch :: Char -> Int -> Bool
@@ -309,11 +312,13 @@ testMatchFun _ _ = False
 extractVal :: (Either (ParserResult GEvalOptions) (Maybe MetricValue)) -> IO MetricValue
 extractVal (Right (Just val)) = return val
 
-runGEvalTest testName = (runGEval [
+runGEvalTest = runGEvalTestExtraOptions []
+
+runGEvalTestExtraOptions extraOptions testName = (runGEval ([
   "--expected-directory",
   "test/" ++ testName ++ "/" ++ testName,
   "--out-directory",
-  "test/" ++ testName ++ "/" ++ testName ++ "-solution"]) >>= extractVal
+  "test/" ++ testName ++ "/" ++ testName ++ "-solution"] ++ extraOptions)) >>= extractVal
 
 extractMetric :: String -> IO (Maybe Metric)
 extractMetric testName = do

From f9dfbc14661b6a3675031f088e1ee01ecf3f75a6 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 2 Jun 2018 12:24:14 +0200
Subject: [PATCH 22/24] accuracy can work on probablity distributions now

---
 src/GEval/Common.hs        |  4 ++++
 src/GEval/Core.hs          | 15 ++++++++++++---
 src/GEval/LogLossHashed.hs | 11 +++++++++--
 test/Spec.hs               |  3 ++-
 4 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/src/GEval/Common.hs b/src/GEval/Common.hs
index ad89c79..38828ab 100644
--- a/src/GEval/Common.hs
+++ b/src/GEval/Common.hs
@@ -38,3 +38,7 @@ sepByWhitespaces parser = possibleWhitespace *> parser `sepBy` whitespace <* pos
 possibleWhitespace = many' (satisfy isHorizontalSpace)
 
 whitespace = many1 (satisfy isHorizontalSpace)
+
+indicator :: Bool -> Double
+indicator True = 1.0
+indicator False = 0.0
diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs
index 1353aab..180cc41 100644
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@@ -369,9 +369,18 @@ gevalCore' BLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.words . D
           | otherwise = exp (1.0 - (r /. c))
 
 gevalCore' Accuracy _ = gevalCoreWithoutInput (Right . strip) (Right . strip) hitOrMiss averageC id
-                      where hitOrMiss (exp,got) = if (normalizeProbForAccuracy exp got) == exp then 1.0 else 0.0
-                            -- if the expected value is 0 or 1 treat values between 0.0 and 1.0 as probabilities
-                            -- for the positive outcome
+                      where hitOrMiss (exp, got) =
+                              -- first try to parse what we got as a probability distribution
+                              -- (like the one used for Likelikehood/LogLossHashed metric)
+                              case parseWordSpecs got of
+                                Right wordSpecs -> if Prelude.null pairs
+                                                   then 0.0
+                                                   else indicator (exp == (snd $ Prelude.maximum pairs))
+                                                 where pairs = catMaybes $ Prelude.map wordSpecToPair wordSpecs
+                                Left _ ->  indicator ((normalizeProbForAccuracy exp got) == exp)
+                                          -- if the expected value is 0 or 1 treat values
+                                          -- between 0.0 and 1.0 as probabilities
+                                          -- for the positive outcome
                             normalizeProbForAccuracy :: Text -> Text -> Text
                             normalizeProbForAccuracy exp got
                               | exp == (pack "1") = case tryReadingAsFloat got of
diff --git a/src/GEval/LogLossHashed.hs b/src/GEval/LogLossHashed.hs
index a4388ee..5b3dfcc 100644
--- a/src/GEval/LogLossHashed.hs
+++ b/src/GEval/LogLossHashed.hs
@@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 
 module GEval.LogLossHashed
-       (HashedDistribution, parseDistribution, calculateLogLoss)
+       (HashedDistribution, parseDistribution, calculateLogLoss, parseWordSpecs, wordSpecToPair)
        where
 
 import qualified Data.Vector as V
@@ -59,11 +59,18 @@ isAnyWord _ = False
 
 data WordSpecWithLogProb = WordSpecWithLogProb WordSpec Double
 
+wordSpecToPair :: WordSpecWithLogProb -> Maybe (Double, T.Text)
+wordSpecToPair (WordSpecWithLogProb AnyWord _) = Nothing
+wordSpecToPair (WordSpecWithLogProb (SpecificWord w) lp) = Just (lp, w)
+
 parseDistributionFromWordList :: Word32 -> Word32 -> T.Text -> Either String HashedDistribution
 parseDistributionFromWordList nbOfBits seed distroSpec = (parseDistributionFromWordList' nbOfBits seed) =<<
   normalizeLogProbs =<<
   lookForProbs =<<
-  (processEithers $ map getWordSpecWithLogProb $ T.splitOn " " distroSpec)
+  (parseWordSpecs distroSpec)
+
+parseWordSpecs :: T.Text -> Either String [WordSpecWithLogProb]
+parseWordSpecs distroSpec = processEithers $ map getWordSpecWithLogProb $ T.splitOn " " distroSpec
 
 getWordSpecWithLogProb :: T.Text -> Either String WordSpecWithLogProb
 getWordSpecWithLogProb t =
diff --git a/test/Spec.hs b/test/Spec.hs
index 0474ae2..90039bb 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -293,7 +293,8 @@ main = hspec $ do
   describe "handle --alt-metric option" $ do
     it "accuracy instead of likelihood" $ do
       runGEvalTestExtraOptions ["--alt-metric", "Accuracy"] "likelihood-simple" `shouldReturnAlmost` 0.75
-
+    it "accuracy instead of log loss" $ do
+      runGEvalTestExtraOptions ["--alt-metric", "Accuracy"] "log-loss-hashed-probs" `shouldReturnAlmost` 0.4
 
 neverMatch :: Char -> Int -> Bool
 neverMatch _ _ = False

From 546f2a4e24eb6fc3e145bd9cb019ad8caeb06268 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 2 Jun 2018 12:24:49 +0200
Subject: [PATCH 23/24] bump up version number

---
 geval.cabal | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/geval.cabal b/geval.cabal
index e51ad70..395267a 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -1,5 +1,5 @@
 name:                geval
-version:             0.6.0.0
+version:             0.7.0.0
 synopsis:            Machine learning evaluation tools
 description:         Please see README.md
 homepage:            http://github.com/name/project

From 04ffaeeb9e07ab80a24bcb24a87dda825b09d616 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 2 Jun 2018 13:02:18 +0200
Subject: [PATCH 24/24] minor changes to README

---
 README.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5e89c28..411f81c 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,15 @@
 # GEval
 
-GEval is a Haskell library (and a stand-alone tool) for evaluating the
+GEval is a Haskell library and a stand-alone tool for evaluating the
 results of solutions to machine learning challenges as defined on the
 [Gonito](http://gonito.net) platform.
 
 Note that GEval is only about machine learning evaluation. No actual
 machine learning algorithms are available here.
 
+The official repository is `git://gonito.net/geval`, browsable at
+<https://gonito.net/gitlist/geval.git/>.
+
 ## Installing
 
 You need [Haskell Stack](https://github.com/commercialhaskell/stack).