From cb4efe1d6b469a166e1970902a3b9b39f5246285 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Mon, 25 Nov 2019 21:31:17 +0100
Subject: [PATCH 01/23] Introduce :S flag (sorting words within a line)

---
 src/GEval/Core.hs                               | 17 +++++++++++++++++
 src/GEval/CreateChallenge.hs                    |  6 ++++++
 src/GEval/EvaluationScheme.hs                   | 10 +++++++---
 src/GEval/Metric.hs                             | 15 ++++++++++++++-
 src/GEval/MetricsMeta.hs                        |  1 +
 test/Spec.hs                                    |  5 +++++
 .../accuracy-on-sorted-solution/test-A/out.tsv  |  4 ++++
 .../accuracy-on-sorted/config.txt               |  1 +
 .../accuracy-on-sorted/test-A/expected.tsv      |  4 ++++
 .../test-A/out.tsv                              |  4 ++++
 .../mean-multilabel-f1-simple/config.txt        |  1 +
 .../test-A/expected.tsv                         |  4 ++++
 12 files changed, 68 insertions(+), 4 deletions(-)
 create mode 100644 test/accuracy-on-sorted/accuracy-on-sorted-solution/test-A/out.tsv
 create mode 100644 test/accuracy-on-sorted/accuracy-on-sorted/config.txt
 create mode 100644 test/accuracy-on-sorted/accuracy-on-sorted/test-A/expected.tsv
 create mode 100644 test/mean-multilabel-f1-simple/mean-multilabel-f1-simple-solution/test-A/out.tsv
 create mode 100644 test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/config.txt
 create mode 100644 test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/test-A/expected.tsv

diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs
index 4611671..3aa5d46 100644
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@@ -492,6 +492,23 @@ gevalCoreOnSources CharMatch inputLineSource = helper inputLineSource
 gevalCoreOnSources (LogLossHashed nbOfBits) _ = helperLogLossHashed nbOfBits id
 gevalCoreOnSources (LikelihoodHashed nbOfBits) _ = helperLogLossHashed nbOfBits logLossToLikehood
 
+
+gevalCoreOnSources (Mean (MultiLabelFMeasure beta)) _
+  = gevalCoreWithoutInputOnItemTargets (Right . intoWords)
+                                       (Right . getWords)
+                                       ((fMeasureOnCounts beta) . (getCounts (==)))
+                                       averageC
+                                       id
+                                       noGraph
+    where
+      -- repeated as below, as it will be refactored into dependent types soon anyway
+      getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t
+      getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
+      intoWords (RawItemTarget t) = Prelude.map unpack $ Data.Text.words t
+      intoWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
+
+gevalCoreOnSources (Mean _) _ = error $ "Mean/ meta-metric defined only for MultiLabel-F1 for the time being"
+
 -- only MultiLabel-F1 handled for JSONs for the time being...
 gevalCoreOnSources (MultiLabelFMeasure beta) _ = gevalCoreWithoutInputOnItemTargets (Right . intoWords)
                                                                             (Right . getWords)
diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs
index 3a915e5..325aed4 100644
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@@ -55,6 +55,7 @@ createFile filePath contents = do
   writeFile filePath contents
 
 readmeMDContents :: Metric -> String -> String
+readmeMDContents (Mean metric) testName = readmeMDContents metric testName
 readmeMDContents GLEU testName = readmeMDContents BLEU testName
 readmeMDContents BLEU testName = [i|
 GEval sample machine translation challenge
@@ -413,6 +414,7 @@ configContents schemes precision testName = unwords (Prelude.map (\scheme -> ("-
           precisionOpt (Just p) = " --precision " ++ (show p)
 
 trainContents :: Metric -> String
+trainContents (Mean metric) = trainContents metric
 trainContents GLEU = trainContents BLEU
 trainContents BLEU = [hereLit|alussa loi jumala taivaan ja maan	he mea hanga na te atua i te timatanga te rangi me te whenua
 ja maa oli autio ja tyhjä , ja pimeys oli syvyyden päällä	a kahore he ahua o te whenua , i takoto kau ; he pouri ano a runga i te mata o te hohonu
@@ -510,6 +512,7 @@ trainContents _ = [hereLit|0.06        0.39    0       0.206
 |]
 
 devInContents :: Metric -> String
+devInContents (Mean metric) = devInContents metric
 devInContents GLEU = devInContents BLEU
 devInContents BLEU = [hereLit|ja jumala sanoi : " tulkoon valkeus " , ja valkeus tuli
 ja jumala näki , että valkeus oli hyvä ; ja jumala erotti valkeuden pimeydestä
@@ -577,6 +580,7 @@ devInContents _ = [hereLit|0.72	0	0.007
 |]
 
 devExpectedContents :: Metric -> String
+devExpectedContents (Mean metric) = devExpectedContents metric
 devExpectedContents GLEU = devExpectedContents BLEU
 devExpectedContents BLEU = [hereLit|a ka ki te atua , kia marama : na ka marama
 a ka kite te atua i te marama , he pai : a ka wehea e te atua te marama i te pouri
@@ -646,6 +650,7 @@ devExpectedContents _ = [hereLit|0.82
 |]
 
 testInContents :: Metric -> String
+testInContents (Mean metric) = testInContents metric
 testInContents GLEU = [hereLit|Alice has a black
 |]
 testInContents BLEU = [hereLit|ja jumala kutsui valkeuden päiväksi , ja pimeyden hän kutsui yöksi
@@ -716,6 +721,7 @@ testInContents _ = [hereLit|0.72	0	0.007
 |]
 
 testExpectedContents :: Metric -> String
+testExpectedContents (Mean metric) = testExpectedContents metric
 testExpectedContents BLEU = [hereLit|na ka huaina e te atua te marama ko te awatea , a ko te pouri i huaina e ia ko te po
 a ko te ahiahi , ko te ata , he ra kotahi
 |]
diff --git a/src/GEval/EvaluationScheme.hs b/src/GEval/EvaluationScheme.hs
index 29840c7..a464d6c 100644
--- a/src/GEval/EvaluationScheme.hs
+++ b/src/GEval/EvaluationScheme.hs
@@ -6,8 +6,8 @@ import GEval.Metric
 
 import Text.Regex.PCRE.Heavy
 import Text.Regex.PCRE.Light.Base (Regex(..))
-import Data.Text (Text(..), concat, toLower, toUpper, pack, unpack)
-import Data.List (intercalate, break)
+import Data.Text (Text(..), concat, toLower, toUpper, pack, unpack, words, unwords)
+import Data.List (intercalate, break, sort)
 import Data.Either
 import Data.Maybe (fromMaybe)
 import qualified Data.ByteString.UTF8 as BSU
@@ -16,7 +16,7 @@ import qualified Data.ByteString.UTF8 as BSU
 data EvaluationScheme = EvaluationScheme Metric [PreprocessingOperation]
   deriving (Eq)
 
-data PreprocessingOperation = RegexpMatch Regex | LowerCasing | UpperCasing | SetName Text
+data PreprocessingOperation = RegexpMatch Regex | LowerCasing | UpperCasing | Sorting | SetName Text
   deriving (Eq)
 
 leftParameterBracket :: Char
@@ -39,6 +39,8 @@ readOps ('l':theRest) = (LowerCasing:ops, theRest')
 readOps ('u':theRest) = (UpperCasing:ops, theRest')
     where (ops, theRest') = readOps theRest
 readOps ('m':theRest) = handleParametrizedOp (RegexpMatch . (fromRight undefined) . ((flip compileM) []) . BSU.fromString) theRest
+readOps ('S':theRest) = (Sorting:ops, theRest')
+    where (ops, theRest') = readOps theRest
 readOps ('N':theRest) = handleParametrizedOp (SetName . pack) theRest
 readOps s = ([], s)
 
@@ -70,6 +72,7 @@ instance Show PreprocessingOperation where
   show (RegexpMatch (Regex _ regexp)) = parametrizedOperation "m" (BSU.toString regexp)
   show LowerCasing = "l"
   show UpperCasing = "u"
+  show Sorting = "S"
   show (SetName t) = parametrizedOperation "N" (unpack t)
 
 parametrizedOperation :: String -> String -> String
@@ -82,4 +85,5 @@ applyPreprocessingOperation :: PreprocessingOperation -> Text -> Text
 applyPreprocessingOperation (RegexpMatch regex) = Data.Text.concat . (map fst) . (scan regex)
 applyPreprocessingOperation LowerCasing = toLower
 applyPreprocessingOperation UpperCasing = toUpper
+applyPreprocessingOperation Sorting = Data.Text.unwords . sort . Data.Text.words
 applyPreprocessingOperation (SetName _) = id
diff --git a/src/GEval/Metric.hs b/src/GEval/Metric.hs
index b87c599..a508997 100644
--- a/src/GEval/Metric.hs
+++ b/src/GEval/Metric.hs
@@ -28,7 +28,12 @@ data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | C
               | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood
               | BIOF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double
               | MultiLabelLogLoss | MultiLabelLikelihood
-              | SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double | ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double
+              | SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double
+              | ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double
+              -- it would be better to avoid infinite recursion here
+              -- `Mean (Mean BLEU)` is not useful, but as it would mean
+              -- a larger refactor, we will postpone this
+              | Mean Metric
               deriving (Eq)
 
 instance Show Metric where
@@ -73,8 +78,12 @@ instance Show Metric where
   show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta)
   show MultiLabelLogLoss = "MultiLabel-Logloss"
   show MultiLabelLikelihood = "MultiLabel-Likelihood"
+  show (Mean metric) = "Mean/" ++ (show metric)
 
 instance Read Metric where
+  readsPrec p ('M':'e':'a':'n':'/':theRest) = case readsPrec p theRest of
+    [(metric, theRest)] -> [(Mean metric, theRest)]
+    _ -> []
   readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)]
   readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)]
   readsPrec _ ('P':'e':'a':'r':'s':'o':'n':theRest) = [(Pearson, theRest)]
@@ -162,6 +171,7 @@ getMetricOrdering SMAPE = TheLowerTheBetter
 getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter
 getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter
 getMetricOrdering MultiLabelLikelihood = TheHigherTheBetter
+getMetricOrdering (Mean metric) = getMetricOrdering metric
 
 bestPossibleValue :: Metric -> MetricValue
 bestPossibleValue metric = case getMetricOrdering metric of
@@ -169,18 +179,21 @@ bestPossibleValue metric = case getMetricOrdering metric of
   TheHigherTheBetter -> 1.0
 
 fixedNumberOfColumnsInExpected :: Metric -> Bool
+fixedNumberOfColumnsInExpected (Mean metric) = fixedNumberOfColumnsInExpected metric
 fixedNumberOfColumnsInExpected MAP = False
 fixedNumberOfColumnsInExpected BLEU = False
 fixedNumberOfColumnsInExpected GLEU = False
 fixedNumberOfColumnsInExpected _ = True
 
 fixedNumberOfColumnsInInput :: Metric -> Bool
+fixedNumberOfColumnsInInput (Mean metric) = fixedNumberOfColumnsInInput metric
 fixedNumberOfColumnsInInput (SoftFMeasure _) = False
 fixedNumberOfColumnsInInput (ProbabilisticSoftFMeasure _) = False
 fixedNumberOfColumnsInInput (Soft2DFMeasure _) = False
 fixedNumberOfColumnsInInput _ = True
 
 perfectOutLineFromExpectedLine :: Metric -> Text -> Text
+perfectOutLineFromExpectedLine (Mean metric) t = perfectOutLineFromExpectedLine metric t
 perfectOutLineFromExpectedLine (LogLossHashed _) t = t <> ":1.0"
 perfectOutLineFromExpectedLine (LikelihoodHashed _) t = t <> ":1.0"
 perfectOutLineFromExpectedLine BLEU t = getFirstColumn t
diff --git a/src/GEval/MetricsMeta.hs b/src/GEval/MetricsMeta.hs
index 21659ab..8fad9c6 100644
--- a/src/GEval/MetricsMeta.hs
+++ b/src/GEval/MetricsMeta.hs
@@ -48,6 +48,7 @@ listOfAvailableMetrics = [RMSE,
                           MultiLabelFMeasure 1.0,
                           MultiLabelFMeasure 2.0,
                           MultiLabelFMeasure 0.25,
+                          Mean (MultiLabelFMeasure 1.0),
                           ProbabilisticMultiLabelFMeasure 1.0,
                           ProbabilisticMultiLabelFMeasure 2.0,
                           ProbabilisticMultiLabelFMeasure 0.25,
diff --git a/test/Spec.hs b/test/Spec.hs
index dc68beb..c350775 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -127,6 +127,8 @@ main = hspec $ do
       runGEvalTest "accuracy-simple" `shouldReturnAlmost` 0.6
     it "with probs" $
       runGEvalTest "accuracy-probs" `shouldReturnAlmost` 0.4
+    it "sorted" $
+      runGEvalTest "accuracy-on-sorted" `shouldReturnAlmost` 0.75
   describe "F-measure" $ do
     it "simple example" $
       runGEvalTest "f-measure-simple" `shouldReturnAlmost` 0.57142857
@@ -326,6 +328,9 @@ main = hspec $ do
       runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615
     it "labels given with probs and numbers" $ do
       runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666
+  describe "Mean/MultiLabel-F" $ do
+    it "simple" $ do
+      runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5
   describe "MultiLabel-Likelihood" $ do
     it "simple" $ do
       runGEvalTest "multilabel-likelihood-simple" `shouldReturnAlmost` 0.115829218528827
diff --git a/test/accuracy-on-sorted/accuracy-on-sorted-solution/test-A/out.tsv b/test/accuracy-on-sorted/accuracy-on-sorted-solution/test-A/out.tsv
new file mode 100644
index 0000000..b9c8997
--- /dev/null
+++ b/test/accuracy-on-sorted/accuracy-on-sorted-solution/test-A/out.tsv
@@ -0,0 +1,4 @@
+foo baz bar
+
+xyz aaa
+2 a:1 3
diff --git a/test/accuracy-on-sorted/accuracy-on-sorted/config.txt b/test/accuracy-on-sorted/accuracy-on-sorted/config.txt
new file mode 100644
index 0000000..0de8e69
--- /dev/null
+++ b/test/accuracy-on-sorted/accuracy-on-sorted/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:S
diff --git a/test/accuracy-on-sorted/accuracy-on-sorted/test-A/expected.tsv b/test/accuracy-on-sorted/accuracy-on-sorted/test-A/expected.tsv
new file mode 100644
index 0000000..7ec7ae4
--- /dev/null
+++ b/test/accuracy-on-sorted/accuracy-on-sorted/test-A/expected.tsv
@@ -0,0 +1,4 @@
+bar baz foo
+
+xyz
+a:1 2 3
diff --git a/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple-solution/test-A/out.tsv b/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple-solution/test-A/out.tsv
new file mode 100644
index 0000000..6a8bd3a
--- /dev/null
+++ b/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple-solution/test-A/out.tsv
@@ -0,0 +1,4 @@
+foo bar baz
+uuu
+foo bar baz
+qqq aaa
diff --git a/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/config.txt b/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/config.txt
new file mode 100644
index 0000000..885d505
--- /dev/null
+++ b/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/config.txt
@@ -0,0 +1 @@
+--metric Mean/MultiLabel-F1
diff --git a/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/test-A/expected.tsv b/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/test-A/expected.tsv
new file mode 100644
index 0000000..64612c3
--- /dev/null
+++ b/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/test-A/expected.tsv
@@ -0,0 +1,4 @@
+foo bar baz
+
+foo
+qqq qqq

From ef8945af11a06ad009c070abeab9268e3ad6fc90 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Mon, 25 Nov 2019 21:35:22 +0100
Subject: [PATCH 02/23] Bump up version number

---
 CHANGELOG.md | 5 +++++
 geval.cabal  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 63513ed..a8e4b00 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,9 @@
 
+## 1.22.1.0
+
+* Add "Mean/" meta-metric (for the time being working only with MultiLabel-F-measure)
+* Add :S flag
+
 ## 1.22.0.0
 
 * Add SegmentAccuracy
diff --git a/geval.cabal b/geval.cabal
index 3103b53..5b425ce 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -1,5 +1,5 @@
 name:                geval
-version:             1.22.0.0
+version:             1.22.1.0
 synopsis:            Machine learning evaluation tools
 description:         Please see README.md
 homepage:            http://github.com/name/project

From 74d999d4bfd4c06355364a6e7b39da714c90fa7d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Filip=20Grali=C5=84ski?= <filipg@amu.edu.pl>
Date: Tue, 26 Nov 2019 16:41:33 +0100
Subject: [PATCH 03/23] Towards new-style of train

---
 src/GEval/CreateChallenge.hs | 32 +++++++++++++++++++++++++-------
 src/GEval/Validation.hs      | 13 ++++++-------
 2 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs
index 325aed4..410d657 100644
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@@ -18,6 +18,9 @@ import Control.Exception
 import Control.Monad.Trans.Resource
 import Data.String.Here
 
+import Data.List (intercalate)
+import Data.List.Split (splitOn)
+
 createChallenge :: Bool -> FilePath -> GEvalSpecification -> IO ()
 createChallenge withDataFiles expectedDirectory spec = do
   D.createDirectoryIfMissing False expectedDirectory
@@ -30,7 +33,8 @@ createChallenge withDataFiles expectedDirectory spec = do
   if withDataFiles
     then
      do
-      createFile (trainDirectory </> "train.tsv") $ trainContents metric
+      createFile (trainDirectory </> "in.tsv") $ trainInContents metric
+      createFile (trainDirectory </> expectedFile) $ trainExpectedContents metric
 
       createFile (devDirectory </> "in.tsv") $ devInContents metric
       createFile (devDirectory </> expectedFile) $ devExpectedContents metric
@@ -413,6 +417,20 @@ configContents schemes precision testName = unwords (Prelude.map (\scheme -> ("-
     where precisionOpt Nothing = ""
           precisionOpt (Just p) = " --precision " ++ (show p)
 
+-- Originally train content was in one file, to avoid large changes
+-- for the time being we are using the original function.
+
+trainInContents :: Metric -> String
+trainInContents metric = unlines
+                         $ map (intercalate "\t")
+                         $ map tail
+                         $ map (splitOn "\t")
+                         $ lines
+                         $ trainContents metric
+
+trainExpectedContents :: Metric -> String
+trainExpectedContents metric = unlines $ map head $ map (splitOn "\t") $ lines $ trainContents metric
+
 trainContents :: Metric -> String
 trainContents (Mean metric) = trainContents metric
 trainContents GLEU = trainContents BLEU
@@ -502,13 +520,13 @@ I am sad	SADNESS
 I am so sad and hateful	SADNESS HATE
 |]
 trainContents (Soft2DFMeasure _) = trainContents ClippEU
-trainContents ClippEU = [hereLit|2/0,0,10,150	foo.djvu
-1/30,40,100,1000	bar.djvu
+trainContents ClippEU = [hereLit|2/0,0,10,150/10	foo.djvu
+1/30,40,100,1000/10	bar.djvu
 |]
-trainContents _ = [hereLit|0.06        0.39    0       0.206
-1.00   1.00    1       0.017
-317.8  5.20    67      0.048
-14.6   19.22   27      0.047
+trainContents _ = [hereLit|0.06	0.39	0	0.206
+1.00	1.00	1	0.017
+317.8	5.20	67	0.048
+14.6	19.22	27	0.047
 |]
 
 devInContents :: Metric -> String
diff --git a/src/GEval/Validation.hs b/src/GEval/Validation.hs
index ef2e981..235d356 100644
--- a/src/GEval/Validation.hs
+++ b/src/GEval/Validation.hs
@@ -45,6 +45,7 @@ data ValidationException = NoChallengeDirectory FilePath
                          | SpaceSuffixDetect FilePath
                          | VaryingNumberOfColumns FilePath
                          | BestPossibleValueNotObtainedWithExpectedData MetricValue MetricValue
+                         | OldStyleTrainFile
 
 instance Exception ValidationException
 
@@ -65,6 +66,7 @@ instance Show ValidationException where
   show (SpaceSuffixDetect filePaths) = somethingWrongWithFilesMessage "Found space at the end of line" filePaths
   show (VaryingNumberOfColumns filePaths) = somethingWrongWithFilesMessage "The file contains varying number of columns" filePaths
   show (BestPossibleValueNotObtainedWithExpectedData expected got) = "The best possible value was not obtained with the expected data, expected: " ++ (show expected) ++ " , obtained: " ++ (show got)
+  show OldStyleTrainFile = "Found old-style train file `train.tsv`, whereas the same convention as in test directories should be used (`in.tsv` and `expected.tsv`)"
 
 validationChallenge :: FilePath -> GEvalSpecification -> IO ()
 validationChallenge challengeDirectory spec = do
@@ -147,7 +149,9 @@ never :: FindClause Bool
 never = depth ==? 0
 
 testDirFilter :: FindClause Bool
-testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*" ||? SFF.fileName ~~? "test-*")
+testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*"
+                                                  ||? SFF.fileName ~~? "test-*"
+                                                  ||? SFF.fileName ==? "train")
 
 fileFilter :: String -> FindClause Bool
 fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileName ||? SFF.fileName ~~? fileName ++ exts)
@@ -189,12 +193,7 @@ checkTrainDirectory metric challengeDirectory = do
   let trainDirectory = challengeDirectory </> "train"
   whenM (doesDirectoryExist trainDirectory) $ do
     trainFiles <- findTrainFiles trainDirectory
-    when (null trainFiles) $ throw $ NoInputFile "train.tsv"
-    when (length trainFiles > 1) $ throw $ TooManyTrainFiles trainFiles
-    let [trainFile] = trainFiles
-    checkCorrectFile trainFile
-    when (fixedNumberOfColumnsInInput metric && fixedNumberOfColumnsInExpected metric) $ do
-      checkColumns trainFile
+    when (not $ null trainFiles) $ throw $ OldStyleTrainFile
 
 checkColumns :: FilePath -> IO ()
 checkColumns filePath = do

From 66e2350b1a040bbec00f7647a8ca77495c95c015 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Wed, 4 Dec 2019 20:00:45 +0100
Subject: [PATCH 04/23] Remove commented out code

---
 src/GEval/OptionsParser.hs | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs
index d0b46a9..61c2952 100644
--- a/src/GEval/OptionsParser.hs
+++ b/src/GEval/OptionsParser.hs
@@ -249,9 +249,6 @@ metricReader = many $ option auto         -- actually `some` should be used inst
                  <> metavar "METRIC"
                  <> help ("Metric to be used, e.g.:" ++ helpMetricParameterMetricsList))
 
-
---                   RMSE, MSE, MAE, SMAPE, Pearson, Spearman, Accuracy, LogLoss, Likelihood, F-measure (specify as F1, F2, F0.25, etc.), macro F-measure (specify as Macro-F1, Macro-F2, Macro-F0.25, etc.), multi-label F-measure (specify as MultiLabel-F1, MultiLabel-F2, MultiLabel-F0.25, etc.), MultiLabel-Likelihood, MAP, BLEU, GLEU (\"Google GLEU\" not the grammar correction metric), WER, NMI, ClippEU, LogLossHashed, LikelihoodHashed, BIO-F1, BIO-F1-Labels, TokenAccuracy, soft F-measure (specify as Soft-F1, Soft-F2, Soft-F0.25), probabilistic soft F-measure (specify as Probabilistic-Soft-F1, Probabilistic-Soft-F2, Probabilistic-Soft-F0.25) or CharMatch" )
-
 altMetricReader :: Parser (Maybe EvaluationScheme)
 altMetricReader = optional $ option auto
                ( long "alt-metric"

From 6d95dee27558372775ceaf5b24b2f359fdfc00ba Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Wed, 4 Dec 2019 20:41:07 +0100
Subject: [PATCH 05/23] More fixes

---
 src/GEval/CreateChallenge.hs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs
index 410d657..0630adb 100644
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@@ -501,7 +501,7 @@ trainContents LogLoss = [hereLit|0.0	Hell, no!!!
 trainContents BIOF1Labels = trainContents BIOF1
 trainContents BIOF1 = [hereLit|O O O B-surname/BOND O B-firstname/JAMES B-surname/BOND	My name is Bond , James Bond
 O O O O O	There is no name here
-B-firstname/JOHN I-surname/VON I-surname/NEUMANN	John von Nueman
+B-firstname/JOHN B-surname/VON I-surname/NEUMANN	John von Nueman
 |]
 trainContents TokenAccuracy = [hereLit|* V N	I like cats
 * * V * N	I can see the rainbow
@@ -519,7 +519,9 @@ Love and hate	LOVE HATE
 I am sad	SADNESS
 I am so sad and hateful	SADNESS HATE
 |]
-trainContents (Soft2DFMeasure _) = trainContents ClippEU
+trainContents (Soft2DFMeasure _) = [hereLit|2/0,0,10,150	foo.djvu
+1/30,40,100,1000	bar.djvu
+|]
 trainContents ClippEU = [hereLit|2/0,0,10,150/10	foo.djvu
 1/30,40,100,1000/10	bar.djvu
 |]

From 26f20ba466c2ede5ac66336afbc54f9f7b801620 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Wed, 4 Dec 2019 21:15:44 +0100
Subject: [PATCH 06/23] Fixes

---
 src/GEval/CreateChallenge.hs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs
index 0630adb..44095ea 100644
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@@ -522,8 +522,8 @@ I am so sad and hateful	SADNESS HATE
 trainContents (Soft2DFMeasure _) = [hereLit|2/0,0,10,150	foo.djvu
 1/30,40,100,1000	bar.djvu
 |]
-trainContents ClippEU = [hereLit|2/0,0,10,150/10	foo.djvu
-1/30,40,100,1000/10	bar.djvu
+trainContents ClippEU = [hereLit|1/30,40,100,1000/10	bar.djvu
+2/30,40,500,600/10	foo.djvu
 |]
 trainContents _ = [hereLit|0.06	0.39	0	0.206
 1.00	1.00	1	0.017

From 0826d457b2544e198ffa551f14b723ef132dbe2b Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Fri, 13 Dec 2019 20:31:40 +0100
Subject: [PATCH 07/23] Complete move to the new style of train files

---
 src/GEval/CreateChallenge.hs | 13 ++++++++++--
 src/GEval/Validation.hs      | 39 +++++++++++++++++++++---------------
 2 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs
index 44095ea..de087af 100644
--- a/src/GEval/CreateChallenge.hs
+++ b/src/GEval/CreateChallenge.hs
@@ -33,8 +33,7 @@ createChallenge withDataFiles expectedDirectory spec = do
   if withDataFiles
     then
      do
-      createFile (trainDirectory </> "in.tsv") $ trainInContents metric
-      createFile (trainDirectory </> expectedFile) $ trainExpectedContents metric
+      createTrainFiles metric trainDirectory expectedFile
 
       createFile (devDirectory </> "in.tsv") $ devInContents metric
       createFile (devDirectory </> expectedFile) $ devExpectedContents metric
@@ -53,6 +52,16 @@ createChallenge withDataFiles expectedDirectory spec = do
         testDirectory = expectedDirectory </> testName
         expectedFile = gesExpectedFile spec
 
+createTrainFiles :: Metric -> FilePath -> FilePath -> IO ()
+createTrainFiles metric@(LogLossHashed _) trainDirectory _ = createSingleTrainFile metric trainDirectory
+createTrainFiles metric@(LikelihoodHashed _) trainDirectory _ = createSingleTrainFile metric trainDirectory
+createTrainFiles metric trainDirectory expectedFile = do
+  createFile (trainDirectory </> "in.tsv") $ trainInContents metric
+  createFile (trainDirectory </> expectedFile) $ trainExpectedContents metric
+
+createSingleTrainFile metric trainDirectory =
+  createFile (trainDirectory </> "train.tsv") $ trainContents metric
+
 createFile :: FilePath -> String -> IO ()
 createFile filePath contents = do
   whenM (D.doesFileExist filePath) $ throwM $ FileAlreadyThere filePath
diff --git a/src/GEval/Validation.hs b/src/GEval/Validation.hs
index 235d356..91f9de1 100644
--- a/src/GEval/Validation.hs
+++ b/src/GEval/Validation.hs
@@ -45,7 +45,6 @@ data ValidationException = NoChallengeDirectory FilePath
                          | SpaceSuffixDetect FilePath
                          | VaryingNumberOfColumns FilePath
                          | BestPossibleValueNotObtainedWithExpectedData MetricValue MetricValue
-                         | OldStyleTrainFile
 
 instance Exception ValidationException
 
@@ -66,7 +65,6 @@ instance Show ValidationException where
   show (SpaceSuffixDetect filePaths) = somethingWrongWithFilesMessage "Found space at the end of line" filePaths
   show (VaryingNumberOfColumns filePaths) = somethingWrongWithFilesMessage "The file contains varying number of columns" filePaths
   show (BestPossibleValueNotObtainedWithExpectedData expected got) = "The best possible value was not obtained with the expected data, expected: " ++ (show expected) ++ " , obtained: " ++ (show got)
-  show OldStyleTrainFile = "Found old-style train file `train.tsv`, whereas the same convention as in test directories should be used (`in.tsv` and `expected.tsv`)"
 
 validationChallenge :: FilePath -> GEvalSpecification -> IO ()
 validationChallenge challengeDirectory spec = do
@@ -78,16 +76,13 @@ validationChallenge challengeDirectory spec = do
   checkCorrectFile gitignoreFile
   checkCorrectFile readmeFile
   testDirectories <- findTestDirs challengeDirectory
-  checkTestDirectories mainMetric testDirectories
-  checkTrainDirectory mainMetric challengeDirectory
-
-  mapM_ (runOnTest spec) testDirectories
+  checkTestDirectories spec testDirectories
+  checkTrainDirectory spec challengeDirectory
 
   where
     configFile = challengeDirectory </> "config.txt"
     gitignoreFile = challengeDirectory </> ".gitignore"
     readmeFile = challengeDirectory </> "README.md"
-    mainMetric = evaluationSchemeMetric $ head $ gesMetrics spec
 
 checkCorrectFile :: FilePath -> IO ()
 checkCorrectFile filePath = do
@@ -150,8 +145,7 @@ never = depth ==? 0
 
 testDirFilter :: FindClause Bool
 testDirFilter = (SFF.fileType ==? Directory) &&? (SFF.fileName ~~? "dev-*"
-                                                  ||? SFF.fileName ~~? "test-*"
-                                                  ||? SFF.fileName ==? "train")
+                                                  ||? SFF.fileName ~~? "test-*")
 
 fileFilter :: String -> FindClause Bool
 fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileName ||? SFF.fileName ~~? fileName ++ exts)
@@ -159,12 +153,12 @@ fileFilter fileName = (SFF.fileType ==? RegularFile) &&? (SFF.fileName ~~? fileN
     exts = Prelude.concat [ "(", intercalate "|" compressedFilesHandled, ")" ]
 
 
-checkTestDirectories :: Metric -> [FilePath] -> IO ()
+checkTestDirectories :: GEvalSpecification -> [FilePath] -> IO ()
 checkTestDirectories _ [] = throwM NoTestDirectories
-checkTestDirectories metric directories = mapM_ (checkTestDirectory metric) directories
+checkTestDirectories spec directories = mapM_ (checkTestDirectory spec) directories
 
-checkTestDirectory :: Metric -> FilePath -> IO ()
-checkTestDirectory metric directoryPath = do
+checkTestDirectory :: GEvalSpecification -> FilePath -> IO ()
+checkTestDirectory spec directoryPath = do
   inputFiles <- findInputFiles directoryPath
   when (null inputFiles) $ throw $ NoInputFile inputFile
   when (length inputFiles > 1) $ throw $ TooManyInputFiles inputFiles
@@ -184,16 +178,29 @@ checkTestDirectory metric directoryPath = do
 
   outputFiles <- findOutputFiles directoryPath
   unless (null outputFiles) $ throw $ OutputFileDetected outputFiles
+
+  runOnTest spec directoryPath
+
   where
+    metric = evaluationSchemeMetric $ head $ gesMetrics spec
     inputFile = directoryPath </> defaultInputFile
+
     expectedFile = directoryPath </> defaultExpectedFile
 
-checkTrainDirectory :: Metric -> FilePath -> IO ()
-checkTrainDirectory metric challengeDirectory = do
+checkTrainDirectory :: GEvalSpecification -> FilePath -> IO ()
+checkTrainDirectory spec challengeDirectory = do
   let trainDirectory = challengeDirectory </> "train"
   whenM (doesDirectoryExist trainDirectory) $ do
     trainFiles <- findTrainFiles trainDirectory
-    when (not $ null trainFiles) $ throw $ OldStyleTrainFile
+    if (not $ null trainFiles)
+    then
+     do
+      putStrLn "WARNING: Found old-style train file `train.tsv`, whereas the same convention as in"
+      putStrLn "WARNING: test directories if preferred (`in.tsv` and `expected.tsv`)."
+      putStrLn "WARNING: (Though, there might still be some cases when `train.tsv` is needed, e.g. for training LMs.)"
+    else
+     do
+      runOnTest spec trainDirectory
 
 checkColumns :: FilePath -> IO ()
 checkColumns filePath = do

From 059f81a79742b4d6a40d31bb12e8b8a49e9fd227 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Fri, 13 Dec 2019 20:38:08 +0100
Subject: [PATCH 08/23] Bump up version number

---
 CHANGELOG.md | 7 +++++++
 geval.cabal  | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a8e4b00..e979e47 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,11 @@
 
+## 1.23.0.0
+
+* New style of train data is preferred
+  - `in.tsv` and `expected.tsv` instead of `train.tsv`
+  - though this is not required as sometimes training data look different than test data
+  - `--validate` option was changed accordingly
+
 ## 1.22.1.0
 
 * Add "Mean/" meta-metric (for the time being working only with MultiLabel-F-measure)
diff --git a/geval.cabal b/geval.cabal
index 5b425ce..c76d561 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -1,5 +1,5 @@
 name:                geval
-version:             1.22.1.0
+version:             1.23.0.0
 synopsis:            Machine learning evaluation tools
 description:         Please see README.md
 homepage:            http://github.com/name/project

From 5f532c71c785dc71ceb892604060b2da5e98208c Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 14 Dec 2019 19:58:02 +0100
Subject: [PATCH 09/23] Add setting priorities, names can be set multiple times

If more than one is given for a metric, they are concatenated
(with spaces).
---
 src/GEval/EvaluationScheme.hs | 36 +++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/src/GEval/EvaluationScheme.hs b/src/GEval/EvaluationScheme.hs
index a464d6c..97235d6 100644
--- a/src/GEval/EvaluationScheme.hs
+++ b/src/GEval/EvaluationScheme.hs
@@ -1,5 +1,10 @@
 module GEval.EvaluationScheme
-  (EvaluationScheme(..), evaluationSchemeMetric, applyPreprocessingOperations, evaluationSchemeName, PreprocessingOperation(..))
+  (EvaluationScheme(..),
+   evaluationSchemeMetric,
+   applyPreprocessingOperations,
+   evaluationSchemeName,
+   evaluationSchemePriority,
+   PreprocessingOperation(..))
   where
 
 import GEval.Metric
@@ -9,14 +14,19 @@ import Text.Regex.PCRE.Light.Base (Regex(..))
 import Data.Text (Text(..), concat, toLower, toUpper, pack, unpack, words, unwords)
 import Data.List (intercalate, break, sort)
 import Data.Either
-import Data.Maybe (fromMaybe)
+import Data.Maybe (fromMaybe, catMaybes)
 import qualified Data.ByteString.UTF8 as BSU
 
 
 data EvaluationScheme = EvaluationScheme Metric [PreprocessingOperation]
   deriving (Eq)
 
-data PreprocessingOperation = RegexpMatch Regex | LowerCasing | UpperCasing | Sorting | SetName Text
+data PreprocessingOperation = RegexpMatch Regex
+                              | LowerCasing
+                              | UpperCasing
+                              | Sorting
+                              | SetName Text
+                              | SetPriority Int
   deriving (Eq)
 
 leftParameterBracket :: Char
@@ -42,6 +52,7 @@ readOps ('m':theRest) = handleParametrizedOp (RegexpMatch . (fromRight undefined
 readOps ('S':theRest) = (Sorting:ops, theRest')
     where (ops, theRest') = readOps theRest
 readOps ('N':theRest) = handleParametrizedOp (SetName . pack) theRest
+readOps ('P':theRest) = handleParametrizedOp (SetPriority . read) theRest
 readOps s = ([], s)
 
 handleParametrizedOp :: (String -> PreprocessingOperation) -> String -> ([PreprocessingOperation], String)
@@ -60,10 +71,21 @@ instance Show EvaluationScheme where
 evaluationSchemeName :: EvaluationScheme -> String
 evaluationSchemeName scheme@(EvaluationScheme metric operations) = fromMaybe (show scheme) (findNameSet operations)
 
+evaluationSchemePriority scheme@(EvaluationScheme _ operations) = fromMaybe defaultPriority (findPrioritySet operations)
+  where defaultPriority = 1
+
 findNameSet :: [PreprocessingOperation] -> Maybe String
-findNameSet [] = Nothing
-findNameSet ((SetName name):_) = Just (unpack name)
-findNameSet (_:ops) = findNameSet ops
+findNameSet ops = case names of
+  [] -> Nothing
+  _ -> Just $ intercalate " " names
+  where names = catMaybes $ map extractName ops
+        extractName (SetName n) = Just (unpack n)
+        extractName _ = Nothing
+
+findPrioritySet :: [PreprocessingOperation] -> Maybe Int
+findPrioritySet [] = Nothing
+findPrioritySet ((SetPriority p):_) = Just p
+findPrioritySet (_:ops) = findPrioritySet ops
 
 evaluationSchemeMetric :: EvaluationScheme -> Metric
 evaluationSchemeMetric (EvaluationScheme metric _) = metric
@@ -74,6 +96,7 @@ instance Show PreprocessingOperation where
   show UpperCasing = "u"
   show Sorting = "S"
   show (SetName t) = parametrizedOperation "N" (unpack t)
+  show (SetPriority p) = parametrizedOperation "P" (show p)
 
 parametrizedOperation :: String -> String -> String
 parametrizedOperation opCode opArg = opCode ++ [leftParameterBracket] ++ opArg ++ [rightParameterBracket]
@@ -87,3 +110,4 @@ applyPreprocessingOperation LowerCasing = toLower
 applyPreprocessingOperation UpperCasing = toUpper
 applyPreprocessingOperation Sorting = Data.Text.unwords . sort . Data.Text.words
 applyPreprocessingOperation (SetName _) = id
+applyPreprocessingOperation (SetPriority _) = id

From 2234efa1079690cbe418738a5f94ac697a363f32 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 14 Dec 2019 20:59:00 +0100
Subject: [PATCH 10/23] Multiple metrics can be packed via "Cartesian" strings

---
 geval.cabal                  |  1 +
 src/Data/CartesianStrings.hs | 42 ++++++++++++++++++++++++++++++++++++
 src/GEval/OptionsParser.hs   | 12 ++++++-----
 test/Spec.hs                 | 16 ++++++++++++++
 4 files changed, 66 insertions(+), 5 deletions(-)
 create mode 100644 src/Data/CartesianStrings.hs

diff --git a/geval.cabal b/geval.cabal
index c76d561..ab9d753 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -48,6 +48,7 @@ library
                      , GEval.Selector
                      , Data.Statistics.Loess
                      , Data.Statistics.Calibration
+                     , Data.CartesianStrings
                      , Paths_geval
   build-depends:       base >= 4.7 && < 5
                      , cond
diff --git a/src/Data/CartesianStrings.hs b/src/Data/CartesianStrings.hs
new file mode 100644
index 0000000..70f2418
--- /dev/null
+++ b/src/Data/CartesianStrings.hs
@@ -0,0 +1,42 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module Data.CartesianStrings
+       (parseCartesianString,
+        concatCartesianStrings,
+        CartesianStrings(..))
+       where
+
+import Data.List (findIndex)
+import Data.List.Split (splitOn)
+
+-- A helper library for parsing strings representing sets of strings
+-- obtained via a Cartesian product, e.g.:
+-- - "foo" represents just ["foo"]
+-- - "a-{foo,bar,baz}-b" represents ["a-foo-b", "a-bar-b", "a-baz-b"]
+-- - "{foo,bar,baz}-{x,y}-{0,1,2}" represents a set containing 18 strings
+
+cartProd :: [a] -> [b] -> [(a, b)]
+cartProd xs ys = [(x,y) | x <- xs, y <- ys]
+
+parseCartesianString :: String -> [String]
+parseCartesianString s =
+  case findIndex (=='{') s of
+    Just begIx ->
+      let pref = take begIx s
+          c = drop (begIx + 1) s
+      in case findIndex (=='}') c of
+           Just endIx ->
+             let inf = take endIx c
+                 current = splitOn "," inf
+                 rest = parseCartesianString $ drop (endIx + 1) c
+             in map (uncurry (++)) $ cartProd (map (pref++) current) rest
+    Nothing -> [s]
+
+data CartesianStrings a = CartesianStrings [a]
+  deriving (Eq)
+
+instance Read a => Read (CartesianStrings a) where
+  readsPrec _ s = [(CartesianStrings (map read $ parseCartesianString s), "")]
+
+concatCartesianStrings :: [CartesianStrings a] -> [a]
+concatCartesianStrings = concat . map (\(CartesianStrings ss) -> ss)
diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs
index 61c2952..d0767d8 100644
--- a/src/GEval/OptionsParser.hs
+++ b/src/GEval/OptionsParser.hs
@@ -39,6 +39,7 @@ import GEval.Validation
 import Data.List (intercalate)
 
 import Data.Conduit.SmartSource
+import Data.CartesianStrings
 
 fullOptionsParser = info (helper <*> optionsParser)
        (fullDesc
@@ -243,11 +244,12 @@ sel (Just m) _ = m
 
 
 metricReader :: Parser [EvaluationScheme]
-metricReader = many $ option auto         -- actually `some` should be used instead of `many`, the problem is that
-               ( long "metric"            -- --metric might be in the config.txt file...
-                 <> short 'm'
-                 <> metavar "METRIC"
-                 <> help ("Metric to be used, e.g.:" ++ helpMetricParameterMetricsList))
+metricReader = concatCartesianStrings <$>
+  (many $ option auto         -- actually `some` should be used instead of `many`, the problem is that
+    ( long "metric"           -- --metric might be in the config.txt file...
+      <> short 'm'
+      <> metavar "METRIC"
+      <> help ("Metric to be used, e.g.:" ++ helpMetricParameterMetricsList)))
 
 altMetricReader :: Parser (Maybe EvaluationScheme)
 altMetricReader = optional $ option auto
diff --git a/test/Spec.hs b/test/Spec.hs
index c350775..3348a14 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -64,6 +64,7 @@ import qualified Data.Vector.Unboxed as DVU
 import qualified Statistics.Matrix.Types as SMT
 import Data.Statistics.Loess (loess)
 import Data.Statistics.Calibration (calibration)
+import Data.CartesianStrings (parseCartesianString)
 
 informationRetrievalBookExample :: [(String, Int)]
 informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3),
@@ -670,6 +671,21 @@ main = hspec $ do
       calibration [True, False] [0.0, 1.0] `shouldBeAlmost` 0.0
       calibration [True, False, False, True, False] [0.0, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0
       calibration [False, True, True, True, True, False, False, True, False] [0.25, 0.25, 0.0, 0.25, 0.25, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0
+  describe "Cartesian strings" $ do
+    it "singleton" $ do
+      (parseCartesianString "foo") `shouldBe` ["foo"]
+    it "simple" $ do
+      parseCartesianString "a-{foo,bar,baz}-b" `shouldBe` ["a-foo-b", "a-bar-b", "a-baz-b"]
+    it "3x2" $ do
+      parseCartesianString "a-{foo,bar,baz}-{b,c}" `shouldBe` ["a-foo-b", "a-foo-c", "a-bar-b",
+                                                               "a-bar-c", "a-baz-b", "a-baz-c" ]
+    it "3x2x3" $ do
+      parseCartesianString "{foo,bar,ba}-{b,c}-{0,1,2}x" `shouldBe` ["foo-b-0x", "foo-b-1x", "foo-b-2x",
+                                                                      "foo-c-0x", "foo-c-1x", "foo-c-2x",
+                                                                      "bar-b-0x", "bar-b-1x", "bar-b-2x",
+                                                                      "bar-c-0x", "bar-c-1x", "bar-c-2x",
+                                                                      "ba-b-0x", "ba-b-1x", "ba-b-2x",
+                                                                      "ba-c-0x", "ba-c-1x", "ba-c-2x" ]
 
 checkConduitPure conduit inList expList = do
   let outList = runConduitPure $ CC.yieldMany inList .| conduit .| CC.sinkList

From d95e2878a667fa331af94a3c38942a9f144e5dc8 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 14 Dec 2019 21:10:40 +0100
Subject: [PATCH 11/23] Refactor line-by-line mode

---
 src/GEval/LineByLine.hs | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs
index 1766638..bb5108e 100644
--- a/src/GEval/LineByLine.hs
+++ b/src/GEval/LineByLine.hs
@@ -16,7 +16,8 @@ module GEval.LineByLine
         runDiffGeneralized,
         LineRecord(..),
         ResultOrdering(..),
-        justTokenize
+        justTokenize,
+        worstFeaturesPipeline
        ) where
 
 import GEval.Core
@@ -107,28 +108,26 @@ runFeatureFilter (Just feature) spec bbdo mReferences = CC.map (\l -> (fakeRank,
         checkFeature feature (_, LineWithFactors _ _ fs) = feature `elem` (Prelude.map show fs)
 
 runWorstFeatures :: ResultOrdering -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
-runWorstFeatures ordering spec bbdo = runLineByLineGeneralized ordering' spec (\mReferences -> worstFeaturesPipeline False spec bbdo mReferences)
+runWorstFeatures ordering spec bbdo = runLineByLineGeneralized ordering' spec (\mReferences -> worstFeaturesPipeline False spec bbdo mReferences consumFeatures)
   where ordering' = forceSomeOrdering ordering
 
+consumFeatures = CL.map (encodeUtf8 . formatFeatureWithPValue)
+                 .| CC.unlinesAscii
+                 .| CC.stdout
 
-
-worstFeaturesPipeline :: Bool -> GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe References -> ConduitT LineRecord Void (ResourceT IO) ()
-worstFeaturesPipeline reversed spec bbdo mReferences = rank (lessByMetric reversed $ gesMainMetric spec)
+worstFeaturesPipeline :: Bool
+                        -> GEvalSpecification
+                        -> BlackBoxDebuggingOptions
+                        -> Maybe References
+                        -> ConduitT FeatureWithPValue Void (ResourceT IO) ()
+                        -> ConduitT LineRecord Void (ResourceT IO) ()
+worstFeaturesPipeline reversed spec bbdo mReferences consum = rank (lessByMetric reversed $ gesMainMetric spec)
                                       .| evalStateC 0 (extractFeaturesAndPValues spec bbdo mReferences)
                                       .| CC.filter (\(FeatureWithPValue _ p _ _) -> not $ isNaN p) -- NaN values would poison sorting
                                       .| gobbleAndDo (sortBy featureOrder)
                                       .| filtreCartesian (bbdoCartesian bbdo)
-                                      .| CL.map (encodeUtf8 . formatFeatureWithPValue)
-                                      .| CC.unlinesAscii
-                                      .| CC.stdout
-  where  formatOutput (LineRecord inp exp out _ score) = Data.Text.intercalate "\t" [
-           formatScore score,
-           escapeTabs inp,
-           escapeTabs exp,
-           escapeTabs out]
-         formatScore :: MetricValue -> Text
-         formatScore = Data.Text.pack . printf "%f"
-         featureOrder (FeatureWithPValue _ p1 _ _) (FeatureWithPValue _ p2 _ _) =
+                                      .| consum
+  where  featureOrder (FeatureWithPValue _ p1 _ _) (FeatureWithPValue _ p2 _ _) =
            p1 `compare` p2
 
 -- for commands like --worst-features we need some ordering (KeepTheOriginalOrder
@@ -402,7 +401,7 @@ runMostWorseningFeatures ordering otherOut spec bbdo  = runDiffGeneralized order
           FirstTheBest -> True
         consum :: Maybe References -> ConduitT (LineRecord, LineRecord) Void (ResourceT IO) ()
         consum = \mReferences -> CC.map prepareFakeLineRecord
-                                .| (worstFeaturesPipeline reversed spec bbdo mReferences)
+                                .| (worstFeaturesPipeline reversed spec bbdo mReferences consumFeatures)
         prepareFakeLineRecord :: (LineRecord, LineRecord) -> LineRecord
         prepareFakeLineRecord (LineRecord _ _ _ _ scorePrev, LineRecord inp exp out c score) =
           LineRecord inp exp out c (score - scorePrev)

From 1e74174c0b0c52eb316150a7231c0a17ae43a288 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 14 Dec 2019 21:12:46 +0100
Subject: [PATCH 12/23] Bump up version

---
 CHANGELOG.md | 5 +++++
 geval.cabal  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e979e47..44bfa58 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,9 @@
 
+## 1.24.0.0
+
+* Introduce metric priorities
+* Use "Cartesian" strings in metrics
+
 ## 1.23.0.0
 
 * New style of train data is preferred
diff --git a/geval.cabal b/geval.cabal
index ab9d753..4b355be 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -1,5 +1,5 @@
 name:                geval
-version:             1.23.0.0
+version:             1.24.0.0
 synopsis:            Machine learning evaluation tools
 description:         Please see README.md
 homepage:            http://github.com/name/project

From 9a3a28a813141a26e95e5617721f61762eddd597 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Filip=20Grali=C5=84ski?= <filipg@amu.edu.pl>
Date: Mon, 16 Dec 2019 11:17:22 +0100
Subject: [PATCH 13/23] Add --oracle-item-based option

---
 src/GEval/Core.hs                             |  3 ++
 src/GEval/LineByLine.hs                       | 31 +++++++++++++++++--
 src/GEval/Metric.hs                           |  8 ++++-
 src/GEval/OptionsParser.hs                    | 11 +++++++
 test/Spec.hs                                  |  1 +
 .../test-A/out-X.tsv                          |  4 +++
 .../test-A/out-Y.tsv                          |  4 +++
 .../oracle-item-based-solution/test-A/out.tsv |  4 +++
 .../oracle-item-based/config.txt              |  1 +
 .../oracle-item-based/test-A/expected.tsv     |  4 +++
 .../oracle-item-based/test-A/in.tsv           |  4 +++
 .../oracle-item-based/test-A/out-O.tsv        |  4 +++
 12 files changed, 75 insertions(+), 4 deletions(-)
 create mode 100644 test/oracle-item-based/oracle-item-based-solution/test-A/out-X.tsv
 create mode 100644 test/oracle-item-based/oracle-item-based-solution/test-A/out-Y.tsv
 create mode 100644 test/oracle-item-based/oracle-item-based-solution/test-A/out.tsv
 create mode 100644 test/oracle-item-based/oracle-item-based/config.txt
 create mode 100644 test/oracle-item-based/oracle-item-based/test-A/expected.tsv
 create mode 100644 test/oracle-item-based/oracle-item-based/test-A/in.tsv
 create mode 100644 test/oracle-item-based/oracle-item-based/test-A/out-O.tsv

diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs
index 3aa5d46..f7e8b04 100644
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@@ -160,6 +160,7 @@ data GEvalSpecification = GEvalSpecification
                             gesTestName :: String,
                             gesSelector :: Maybe Selector,
                             gesOutFile :: String,
+                            gesAltOutFiles :: Maybe [String],
                             gesExpectedFile :: String,
                             gesInputFile :: String,
                             gesMetrics :: [EvaluationScheme],
@@ -190,6 +191,7 @@ data GEvalSpecialCommand = Init
                            | Diff FilePath | MostWorseningFeatures FilePath
                            | PrintVersion | JustTokenize | Submit
                            | Validate | ListMetrics
+                           | OracleItemBased
 
 data ResultOrdering = KeepTheOriginalOrder | FirstTheWorst | FirstTheBest
 
@@ -249,6 +251,7 @@ defaultGEvalSpecification = GEvalSpecification {
   gesTestName = defaultTestName,
   gesSelector = Nothing,
   gesOutFile = defaultOutFile,
+  gesAltOutFiles = Nothing,
   gesExpectedFile = defaultExpectedFile,
   gesInputFile = defaultInputFile,
   gesMetrics = [EvaluationScheme defaultMetric []],
diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs
index bb5108e..41c9283 100644
--- a/src/GEval/LineByLine.hs
+++ b/src/GEval/LineByLine.hs
@@ -17,7 +17,8 @@ module GEval.LineByLine
         LineRecord(..),
         ResultOrdering(..),
         justTokenize,
-        worstFeaturesPipeline
+        worstFeaturesPipeline,
+        runOracleItemBased
        ) where
 
 import GEval.Core
@@ -34,10 +35,11 @@ import Data.Text
 import Data.Text.Encoding
 import Data.Conduit.Rank
 import Data.Maybe (fromMaybe)
+import Data.Either (rights)
 
 import qualified Data.Vector as V
 
-import Data.List (sortBy, sortOn, sort, concat)
+import Data.List (sortBy, sortOn, sort, concat, maximumBy)
 
 import Control.Monad.IO.Class
 import Control.Monad.Trans.Resource
@@ -84,7 +86,6 @@ parseReferenceEntry :: Text -> (Integer, Text)
 parseReferenceEntry line = (read $ unpack refId, t)
   where [refId, t] = splitOn "\t" line
 
-
 runLineByLine :: ResultOrdering -> Maybe String -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
 runLineByLine ordering featureFilter spec bbdo = runLineByLineGeneralized ordering spec consum
    where consum :: Maybe References -> ConduitT LineRecord Void (ResourceT IO) ()
@@ -392,6 +393,30 @@ runDiff ordering featureFilter otherOut spec bbdo = runDiffGeneralized ordering
         formatScoreDiff :: Double -> Text
         formatScoreDiff = Data.Text.pack . printf "%f"
 
+runOracleItemBased :: GEvalSpecification -> IO ()
+runOracleItemBased spec = runMultiOutputGeneralized spec consum
+  where consum = CL.map picker .| format
+        picker = maximumBy (\(LineRecord _ _ _ _ scoreA) (LineRecord _ _ _ _ scoreB) -> metricCompare metric scoreA scoreB)
+        format = CL.map (encodeUtf8 . formatOutput)
+                 .| CC.unlinesAscii
+                 .| CC.stdout
+        formatOutput (LineRecord _ _ out _ _) = out
+        metric = gesMainMetric spec
+
+runMultiOutputGeneralized :: GEvalSpecification -> ConduitT [LineRecord] Void (ResourceT IO) () -> IO ()
+runMultiOutputGeneralized spec consum = do
+  (inputSource, expectedSource, outSource) <- checkAndGetFilesSingleOut True spec
+  let (Just altOuts) = gesAltOutFiles spec
+  altSourceSpecs' <- mapM (getSmartSourceSpec ((gesOutDirectory spec) </> (gesTestName spec)) "out.tsv") altOuts
+  let altSourceSpecs = rights altSourceSpecs'
+  let sourceSpecs = (outSource:altSourceSpecs)
+  let sources = Prelude.map (gevalLineByLineSource metric mSelector preprocess inputSource expectedSource) sourceSpecs
+  runResourceT $ runConduit $
+    (sequenceSources sources .| consum)
+  where metric = gesMainMetric spec
+        preprocess = gesPreprocess spec
+        mSelector = gesSelector spec
+
 runMostWorseningFeatures :: ResultOrdering -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()
 runMostWorseningFeatures ordering otherOut spec bbdo  = runDiffGeneralized ordering' otherOut spec consum
   where ordering' = forceSomeOrdering ordering
diff --git a/src/GEval/Metric.hs b/src/GEval/Metric.hs
index a508997..c5ac7ba 100644
--- a/src/GEval/Metric.hs
+++ b/src/GEval/Metric.hs
@@ -8,7 +8,8 @@ module GEval.Metric
    bestPossibleValue,
    perfectOutLineFromExpectedLine,
    fixedNumberOfColumnsInExpected,
-   fixedNumberOfColumnsInInput)
+   fixedNumberOfColumnsInInput,
+   metricCompare)
   where
 
 import Data.Word
@@ -173,6 +174,11 @@ getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter
 getMetricOrdering MultiLabelLikelihood = TheHigherTheBetter
 getMetricOrdering (Mean metric) = getMetricOrdering metric
 
+metricCompare :: Metric -> MetricValue -> MetricValue -> Ordering
+metricCompare metric a b = metricCompare' (getMetricOrdering metric) a b
+  where metricCompare' TheHigherTheBetter a b = a `compare` b
+        metricCompare' TheLowerTheBetter a b = b `compare` a
+
 bestPossibleValue :: Metric -> MetricValue
 bestPossibleValue metric = case getMetricOrdering metric of
   TheLowerTheBetter -> 0.0
diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs
index d0767d8..39d4900 100644
--- a/src/GEval/OptionsParser.hs
+++ b/src/GEval/OptionsParser.hs
@@ -95,6 +95,10 @@ optionsParser = GEvalOptions
                 (flag' ListMetrics
                     ( long "list-metrics"
                       <> help "List all metrics with their descriptions"))
+                <|>
+                (flag' OracleItemBased
+                    ( long "oracle-item-based"
+                      <> help "Generate the best possible output considering outputs given by --out-file and --alt-out-file options (and peeking into the expected file)."))
                 )
 
    <*> ((flag' FirstTheWorst
@@ -152,6 +156,10 @@ specParser = GEvalSpecification
     <> showDefault
     <> metavar "OUT"
     <> help "The name of the file to be evaluated" )
+  <*> (optional $ some $ strOption
+        ( long "alt-out-file"
+          <> metavar "OUT"
+          <> help "Alternative output file, makes sense only for some options, e.g. --oracle-item-based"))
   <*> strOption
   ( long "expected-file"
     <> short 'e'
@@ -355,6 +363,9 @@ runGEval''' (Just Validate) _ _ spec _ _ = do
 runGEval''' (Just ListMetrics) _ _ _ _ _ = do
   listMetrics
   return Nothing
+runGEval''' (Just OracleItemBased) _ _ spec _ _ = do
+  runOracleItemBased spec
+  return Nothing
 
 getGraphFilename :: Int -> FilePath -> FilePath
 getGraphFilename 0 fp = fp
diff --git a/test/Spec.hs b/test/Spec.hs
index 3348a14..e10ebb4 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -445,6 +445,7 @@ main = hspec $ do
             gesTestName = "test-A",
             gesSelector = Nothing,
             gesOutFile = "out.tsv",
+            gesAltOutFiles = Nothing,
             gesExpectedFile = "expected.tsv",
             gesInputFile = "in.tsv",
             gesMetrics = [EvaluationScheme Likelihood []],
diff --git a/test/oracle-item-based/oracle-item-based-solution/test-A/out-X.tsv b/test/oracle-item-based/oracle-item-based-solution/test-A/out-X.tsv
new file mode 100644
index 0000000..94972cc
--- /dev/null
+++ b/test/oracle-item-based/oracle-item-based-solution/test-A/out-X.tsv
@@ -0,0 +1,4 @@
+A
+C
+D
+D
diff --git a/test/oracle-item-based/oracle-item-based-solution/test-A/out-Y.tsv b/test/oracle-item-based/oracle-item-based-solution/test-A/out-Y.tsv
new file mode 100644
index 0000000..031d052
--- /dev/null
+++ b/test/oracle-item-based/oracle-item-based-solution/test-A/out-Y.tsv
@@ -0,0 +1,4 @@
+D
+C
+B
+A
diff --git a/test/oracle-item-based/oracle-item-based-solution/test-A/out.tsv b/test/oracle-item-based/oracle-item-based-solution/test-A/out.tsv
new file mode 100644
index 0000000..e930626
--- /dev/null
+++ b/test/oracle-item-based/oracle-item-based-solution/test-A/out.tsv
@@ -0,0 +1,4 @@
+B
+A
+C
+A
diff --git a/test/oracle-item-based/oracle-item-based/config.txt b/test/oracle-item-based/oracle-item-based/config.txt
new file mode 100644
index 0000000..337a0cc
--- /dev/null
+++ b/test/oracle-item-based/oracle-item-based/config.txt
@@ -0,0 +1 @@
+--metric Accuracy
diff --git a/test/oracle-item-based/oracle-item-based/test-A/expected.tsv b/test/oracle-item-based/oracle-item-based/test-A/expected.tsv
new file mode 100644
index 0000000..8422d40
--- /dev/null
+++ b/test/oracle-item-based/oracle-item-based/test-A/expected.tsv
@@ -0,0 +1,4 @@
+A
+B
+C
+D
diff --git a/test/oracle-item-based/oracle-item-based/test-A/in.tsv b/test/oracle-item-based/oracle-item-based/test-A/in.tsv
new file mode 100644
index 0000000..94ebaf9
--- /dev/null
+++ b/test/oracle-item-based/oracle-item-based/test-A/in.tsv
@@ -0,0 +1,4 @@
+1
+2
+3
+4
diff --git a/test/oracle-item-based/oracle-item-based/test-A/out-O.tsv b/test/oracle-item-based/oracle-item-based/test-A/out-O.tsv
new file mode 100644
index 0000000..c0dcbfe
--- /dev/null
+++ b/test/oracle-item-based/oracle-item-based/test-A/out-O.tsv
@@ -0,0 +1,4 @@
+A
+C
+C
+D

From ad30bb9384d1f12818be658d87ac9c652234bbdb Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Mon, 16 Dec 2019 12:47:35 +0100
Subject: [PATCH 14/23] Fix bug with preprocessing ops not handled in
 --line-by-line mode

---
 src/GEval/Core.hs       | 6 ++++++
 src/GEval/LineByLine.hs | 7 +++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs
index f7e8b04..e5ff9b1 100644
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@@ -40,6 +40,7 @@ module GEval.Core
       checkMultipleOuts,
       checkMultipleOutsCore,
       gesMainMetric,
+      gesMainScheme,
       gesPreprocess,
       getDataDecoder,
       threeLineSource,
@@ -177,6 +178,11 @@ gesMainMetric spec = case gesMetrics spec of
   (scheme:_) -> evaluationSchemeMetric scheme
   otherwise -> error "no metric given"
 
+gesMainScheme :: GEvalSpecification -> EvaluationScheme
+gesMainScheme spec = case gesMetrics spec of
+  (scheme:_) -> scheme
+  otherwise -> error "no metric given"
+
 gesPreprocess :: GEvalSpecification -> (Text -> Text)
 gesPreprocess spec = tokenizeTabSeparatedWithSpaces (gesTokenizer spec)
 
diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs
index 41c9283..549de1e 100644
--- a/src/GEval/LineByLine.hs
+++ b/src/GEval/LineByLine.hs
@@ -23,6 +23,7 @@ module GEval.LineByLine
 
 import GEval.Core
 import GEval.Common
+import GEval.EvaluationScheme
 import Text.Tokenizer
 
 import Data.Conduit.AutoDecompress (doNothing)
@@ -359,8 +360,9 @@ runLineByLineGeneralized ordering spec consum = do
   (inputFilePath, expectedFilePath, outFilePath) <- checkAndGetFilesSingleOut True spec
   gevalLineByLineCore metric mSelector preprocess inputFilePath expectedFilePath outFilePath (sorter ordering .| consum mReferences)
   where metric = gesMainMetric spec
+        scheme = gesMainScheme spec
         mSelector = gesSelector spec
-        preprocess = gesPreprocess spec
+        preprocess = (gesPreprocess spec) . (applyPreprocessingOperations scheme)
         sorter KeepTheOriginalOrder = doNothing
         sorter ordering = gobbleAndDo $ sortBy (sortOrder ordering (getMetricOrdering metric))
         sortOrder FirstTheWorst TheHigherTheBetter = compareScores
@@ -414,7 +416,8 @@ runMultiOutputGeneralized spec consum = do
   runResourceT $ runConduit $
     (sequenceSources sources .| consum)
   where metric = gesMainMetric spec
-        preprocess = gesPreprocess spec
+        scheme = gesMainScheme spec
+        preprocess = (gesPreprocess spec) . (applyPreprocessingOperations scheme)
         mSelector = gesSelector spec
 
 runMostWorseningFeatures :: ResultOrdering -> FilePath -> GEvalSpecification -> BlackBoxDebuggingOptions -> IO ()

From 170cc417cacffe3b5916cc3841c73a901c302118 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Mon, 16 Dec 2019 12:49:18 +0100
Subject: [PATCH 15/23] Bump up version

---
 CHANGELOG.md | 4 ++++
 geval.cabal  | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 44bfa58..f5f7e51 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,8 @@
 
+## 1.25.0.0
+
+* Add --oracle-item-based
+
 ## 1.24.0.0
 
 * Introduce metric priorities
diff --git a/geval.cabal b/geval.cabal
index 4b355be..b4f4071 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -1,5 +1,5 @@
 name:                geval
-version:             1.24.0.0
+version:             1.25.0.0
 synopsis:            Machine learning evaluation tools
 description:         Please see README.md
 homepage:            http://github.com/name/project

From 01486d23aae9b5b30ad957a25e23000d7500d861 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 21 Dec 2019 16:03:52 +0100
Subject: [PATCH 16/23] Change the meaning of WER

---
 src/GEval/Core.hs                     | 28 ++++++++++++++++++++++++---
 src/GEval/WER.hs                      |  4 ++--
 test/wer-simple/wer-simple/config.txt |  2 +-
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs
index e5ff9b1..3aebc93 100644
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@@ -516,7 +516,21 @@ gevalCoreOnSources (Mean (MultiLabelFMeasure beta)) _
       intoWords (RawItemTarget t) = Prelude.map unpack $ Data.Text.words t
       intoWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
 
-gevalCoreOnSources (Mean _) _ = error $ "Mean/ meta-metric defined only for MultiLabel-F1 for the time being"
+gevalCoreOnSources (Mean WER) _
+  = gevalCoreWithoutInputOnItemTargets (Right . intoWords)
+                                       (Right . getWords)
+                                       ((uncurry (/.)) . (uncurry werStep))
+                                       averageC
+                                       id
+                                       noGraph
+    where
+      -- repeated as below, as it will be refactored into dependent types soon anyway
+      getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t
+      getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
+      intoWords (RawItemTarget t) = Prelude.map unpack $ Data.Text.words t
+      intoWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts
+
+gevalCoreOnSources (Mean _) _ = error $ "Mean/ meta-metric defined only for MultiLabel-F1 and WER for the time being"
 
 -- only MultiLabel-F1 handled for JSONs for the time being...
 gevalCoreOnSources (MultiLabelFMeasure beta) _ = gevalCoreWithoutInputOnItemTargets (Right . intoWords)
@@ -586,9 +600,17 @@ gevalCoreOnSources GLEU _ = gevalCoreWithoutInput (Right . Prelude.map Prelude.w
   where gleuFinal (m, t) = m /. t
         gleuCombine (refs, sen) = gleuStep refs sen
         gleuAgg = CC.foldl gleuFuse (0, 0)
-        gleuFuse (a1, a2) (b1, b2) = (a1+b1, a2+b2)
+        gleuFuse (a1, a2) (b1, b2) = (a1 + b1, a2 + b2)
 
-gevalCoreOnSources WER _ = gevalCoreWithoutInput (Right . Prelude.words . unpack) (Right . Prelude.words . unpack) (uncurry werStep) averageC id noGraph
+gevalCoreOnSources WER _ = gevalCoreWithoutInput (Right . Prelude.words . unpack)
+                                                 (Right . Prelude.words . unpack)
+                                                 (uncurry werStep)
+                                                 werAgg
+                                                 werFinal
+                                                 noGraph
+  where werAgg = CC.foldl werFuse (0, 0)
+        werFuse (a1, a2) (b1, b2) = (a1 + b1, a2 + b2)
+        werFinal (errors, ref) = errors /. ref
 
 gevalCoreOnSources Accuracy _ = gevalCoreWithoutInput (Right . strip) (Right . strip) hitOrMiss averageC id noGraph
                       where hitOrMiss (exp, got) =
diff --git a/src/GEval/WER.hs b/src/GEval/WER.hs
index 0767592..c545efd 100644
--- a/src/GEval/WER.hs
+++ b/src/GEval/WER.hs
@@ -5,8 +5,8 @@ module GEval.WER
 import Data.Array
 import GEval.Common
 
-werStep :: Eq a => [a] -> [a] -> Double
-werStep expected got = (fromIntegral $ distance expected got) `safeDoubleDiv` (fromIntegral $ length expected)
+werStep :: Eq a => [a] -> [a] -> (Int, Int)
+werStep expected got = (distance expected got, length expected)
 
 -- see https://stackoverflow.com/questions/6718787/levenshtein-distance-cost
 distance u v = memo ! (m, n)
diff --git a/test/wer-simple/wer-simple/config.txt b/test/wer-simple/wer-simple/config.txt
index 7b39834..b933671 100644
--- a/test/wer-simple/wer-simple/config.txt
+++ b/test/wer-simple/wer-simple/config.txt
@@ -1 +1 @@
---metric WER
+--metric Mean/WER

From 06a0b1148d206ae0483560285bc7921fbbe15ddc Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 21 Dec 2019 16:05:51 +0100
Subject: [PATCH 17/23] Bump up version number

---
 CHANGELOG.md | 6 ++++++
 geval.cabal  | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f5f7e51..b2bb09e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,10 @@
 
+## 1.26.0.0
+
+* Change the meaning of WER (WER is calculated for the whole set now
+  - similar to the way BLEU is calculated)
+* Use `Mean/WER` if you want the old meaning (average of per-item results)
+
 ## 1.25.0.0
 
 * Add --oracle-item-based
diff --git a/geval.cabal b/geval.cabal
index b4f4071..170d37e 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -1,5 +1,5 @@
 name:                geval
-version:             1.25.0.0
+version:             1.26.0.0
 synopsis:            Machine learning evaluation tools
 description:         Please see README.md
 homepage:            http://github.com/name/project

From 4ba61b6e6ea82b053a788ca8c68be46900890c54 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Mon, 16 Dec 2019 13:01:41 +0100
Subject: [PATCH 18/23] Prepare helper functions for cross-tabs

---
 geval.cabal                    |   2 +
 src/Data/SplitIntoCrossTabs.hs | 119 +++++++++++++++++++++++++++++++++
 stack.yaml                     |   2 +-
 test/Spec.hs                   |  21 ++++++
 4 files changed, 143 insertions(+), 1 deletion(-)
 create mode 100644 src/Data/SplitIntoCrossTabs.hs

diff --git a/geval.cabal b/geval.cabal
index 170d37e..28fd65a 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -49,6 +49,7 @@ library
                      , Data.Statistics.Loess
                      , Data.Statistics.Calibration
                      , Data.CartesianStrings
+                     , Data.SplitIntoCrossTabs
                      , Paths_geval
   build-depends:       base >= 4.7 && < 5
                      , cond
@@ -101,6 +102,7 @@ library
                      , filemanip
                      , temporary
                      , utf8-string
+                     , ordered-containers
   default-language:    Haskell2010
 
 executable geval
diff --git a/src/Data/SplitIntoCrossTabs.hs b/src/Data/SplitIntoCrossTabs.hs
new file mode 100644
index 0000000..1088d5f
--- /dev/null
+++ b/src/Data/SplitIntoCrossTabs.hs
@@ -0,0 +1,119 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module Data.SplitIntoCrossTabs
+       (splitIntoCrossTabs,
+        CrossTab(..),
+        TextFrag(..))
+       where
+
+import qualified Data.Text as T
+import Data.Text (Text)
+import qualified Data.Set.Ordered as OS
+import qualified Data.Map.Ordered as OM
+import qualified Data.Set as S
+import qualified Data.Foldable as F
+import qualified Data.Map as M
+
+import Debug.Trace
+
+import Data.List (unfoldr, sortBy, maximumBy, minimumBy)
+
+data CrossTab = SingleItem Text | CrossTab [TextFrag] [TextFrag]
+  deriving (Show, Eq)
+
+data TextFrag = Prefix Text | Suffix Text
+  deriving (Show, Eq, Ord)
+
+
+splitIntoCrossTabs :: [Text] -> [CrossTab]
+splitIntoCrossTabs inputs =
+  map preferVertical
+  $ map snd
+  $ sortBy (\(r1,_) (r2, _) -> r1 `compare` r2)
+  $ map (getRank inputRanks)
+  $ unfoldr extractTheBestCrossTab inputs
+  where inputRanks = M.fromList $ zip inputs [1..]
+
+preferVertical :: CrossTab -> CrossTab
+preferVertical s@(SingleItem _) = s
+preferVertical c@(CrossTab rowNames columnNames)
+  | length rowNames < length columnNames = CrossTab columnNames rowNames
+  | otherwise = c
+
+getRank :: M.Map Text Int -> CrossTab -> (Int, CrossTab)
+getRank ranks c = (bestRank, c)
+  where bestRank = minimum
+                   $ map (ranks M.!)
+                   $ S.toList
+                   $ toSet c
+
+extractTheBestCrossTab :: [Text] -> Maybe (CrossTab, [Text])
+extractTheBestCrossTab [] = Nothing
+extractTheBestCrossTab ts = Just (theBestCrossTab, rest)
+  where theBestCrossTab = findTheBestCrossTab ts
+        rest = filter (`S.notMember` (toSet theBestCrossTab)) ts
+
+findTheBestCrossTab :: [Text] -> CrossTab
+findTheBestCrossTab ts = case orderedEntries of
+  [] -> SingleItem defaultSingleton
+  _ -> maximumBy (\t1 t2 -> crossTabSize t1 `compare` crossTabSize t2)
+      $ map (findTheBestCrossTabForTextPart (SingleItem defaultSingleton) orderedEntries)
+      $ map snd orderedEntries
+  where mapping = gatherTextParts ts
+        orderedEntries = sortBy entryComparator
+                         $ filter (\(_, (_, tset)) -> OS.size tset >= 2)
+                         $ zip [1..] (OM.assocs mapping)
+        (defaultSingleton:_) = ts
+
+thenCmp :: Ordering -> Ordering -> Ordering
+thenCmp EQ o2 = o2
+thenCmp o1 _  = o1
+
+entryComparator (r1, (_, s1)) (r2, (_, s2)) = (OS.size s2 `compare` OS.size s1)
+                                                      `thenCmp`
+                                                    (r1 `compare` r2)
+
+findTheBestCrossTabForTextPart :: CrossTab -> [(Int, (TextFrag, OS.OSet TextFrag))] -> (TextFrag, OS.OSet TextFrag) -> CrossTab
+findTheBestCrossTabForTextPart defaultCrossTab entries chosenEntry@(t, tset) = if crossTabSize bestCrossTabFound > 1
+                                                                               then bestCrossTabFound
+                                                                               else defaultCrossTab
+  where bestCrossTabFound = foldr step (CrossTab [] (F.toList tset)) entriesOrderedByIntersection
+        entriesOrderedByIntersection =
+          sortBy entryComparator
+          $ filter (\(_, (_, tset')) -> OS.size tset' >= 2)
+          $ map (\(r, (t', tset')) -> (r, (t', tset' OS.|/\ tset))) entries
+        step (_, (t', tset')) currentTab@(CrossTab frags common) = selectedTab
+            where newTab = CrossTab newT (F.toList newCommon)
+                  newT = t':frags
+                  newCommon = (OS.fromList common) OS.|/\ tset'
+                  selectedTab = if crossTabSize newTab >= crossTabSize currentTab
+                                then newTab
+                                else currentTab
+
+crossTabSize :: CrossTab -> Int
+crossTabSize (SingleItem _) = 1
+crossTabSize (CrossTab [] _) = 0
+crossTabSize (CrossTab _ []) = 0
+-- tables really start from 2x2
+crossTabSize (CrossTab [_] _) = 0
+crossTabSize (CrossTab _ [_]) = 0
+crossTabSize (CrossTab rows columns) = length rows * length columns
+
+toSet :: CrossTab -> S.Set Text
+toSet (SingleItem t) = S.singleton t
+toSet (CrossTab rowNames columnNames) = S.fromList [rName `combineFrags` cName | rName <- rowNames, cName <- columnNames]
+
+combineFrags :: TextFrag -> TextFrag -> Text
+combineFrags (Prefix prefix) (Suffix suffix) = prefix <> suffix
+combineFrags (Suffix suffix) (Prefix prefix) = prefix <> suffix
+combineFrags _ _ = error $ "incompatible text fragments"
+
+getTextParts :: Text -> [(TextFrag, TextFrag)]
+getTextParts t = [(Prefix (T.take ix t), Suffix (T.drop ix t)) | ix <- [1..(T.length t)-1]]
+
+gatherTextParts :: [Text] -> OM.OMap TextFrag (OS.OSet TextFrag)
+gatherTextParts = (gather OS.singleton (OS.|<>)) . concat . (map getTextParts)
+
+gather :: Ord a => (b -> c) -> (c -> c -> c) -> [(a, b)] -> OM.OMap a c
+gather createEntry combine = foldr extend OM.empty
+  where extend (k, v) m = OM.unionWithL (\_ v1 v2 -> combine v1 v2) (OM.singleton (k, (createEntry v))) m
diff --git a/stack.yaml b/stack.yaml
index b6e0361..3805ab6 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -1,5 +1,5 @@
 flags: {}
 packages:
 - '.'
-extra-deps: [murmur3-1.0.3,naturalcomp-0.0.3,Munkres-0.1,numeric-tools-0.2.0.1,Chart-1.9.1,Chart-cairo-1.9.1,multiset-0.3.4.1]
+extra-deps: [murmur3-1.0.3,naturalcomp-0.0.3,Munkres-0.1,numeric-tools-0.2.0.1,Chart-1.9.1,Chart-cairo-1.9.1,multiset-0.3.4.1,'ordered-containers-0.2.2@sha256:ebf2be3f592d9cf148ea6b8375f8af97148d44f82d8d04476899285e965afdbf,810']
 resolver: lts-12.26
diff --git a/test/Spec.hs b/test/Spec.hs
index e10ebb4..b4d3511 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -65,6 +65,7 @@ import qualified Statistics.Matrix.Types as SMT
 import Data.Statistics.Loess (loess)
 import Data.Statistics.Calibration (calibration)
 import Data.CartesianStrings (parseCartesianString)
+import Data.SplitIntoCrossTabs (splitIntoCrossTabs, CrossTab(..), TextFrag(..))
 
 informationRetrievalBookExample :: [(String, Int)]
 informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3),
@@ -687,6 +688,26 @@ main = hspec $ do
                                                                       "bar-c-0x", "bar-c-1x", "bar-c-2x",
                                                                       "ba-b-0x", "ba-b-1x", "ba-b-2x",
                                                                       "ba-c-0x", "ba-c-1x", "ba-c-2x" ]
+  describe "cross-tabs" $ do
+    it "singleton" $ do
+      splitIntoCrossTabs ["abababab"] `shouldBe` [SingleItem "abababab"]
+    it "too small" $ do
+      splitIntoCrossTabs ["aabb", "aacc"] `shouldBe` [SingleItem "aabb", SingleItem "aacc"]
+    it "two tables" $ do
+      splitIntoCrossTabs ["yABC", "xx00", "yABD", "ZC", "xx11", "yy00", "yy11", "ZD"] `shouldBe` [
+                                         CrossTab [Prefix "yAB", Prefix "Z"] [Suffix "C", Suffix "D"],
+                                         CrossTab [Prefix "xx", Prefix "yy"] [Suffix "00", Suffix "11"]]
+    it "simple" $ do
+      splitIntoCrossTabs ["aabsolutely",
+                          "aaafoo",
+                          "other",
+                          "aaabaz",
+                          "aaabaq",
+                          "bbbfoo",
+                          "bbbbaz",
+                          "bbbbaq"] `shouldBe` [SingleItem "aabsolutely",
+                                                CrossTab [Suffix "foo", Suffix "baz", Suffix "baq"] [Prefix "aaa", Prefix "bbb"],
+                                                SingleItem "other"]
 
 checkConduitPure conduit inList expList = do
   let outList = runConduitPure $ CC.yieldMany inList .| conduit .| CC.sinkList

From 5171cf0ac64a5142a5c0824cc7161cfb1bcfb88e Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 4 Jan 2020 20:48:36 +0100
Subject: [PATCH 19/23] Results are presented as cross tables (if possible)

---
 app/Main.hs                    | 25 ++++++++++++++++++++++++-
 src/Data/SplitIntoCrossTabs.hs | 29 +++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/app/Main.hs b/app/Main.hs
index 53ef2aa..d825428 100644
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -16,6 +16,8 @@ import System.Exit
 
 import Data.Conduit.SmartSource
 
+import Data.SplitIntoCrossTabs
+
 import System.FilePath
 
 import Data.List (intercalate, sort)
@@ -23,6 +25,7 @@ import Data.List (intercalate, sort)
 import qualified Data.Text as T
 
 import Data.Map.Strict as M
+import qualified Data.Map.Lazy as LM
 import Data.Set as S
 
 main :: IO ()
@@ -79,7 +82,27 @@ showTheResult' opts [val] = putStrLn $ formatTheResult (gesPrecision $ geoSpec o
 showTheResult' opts [] = do
   hPutStrLn stderr "no metric given, use --metric option"
   exitFailure
-showTheResult' opts vals =  mapM_ putStrLn $ Prelude.map (formatTheMetricAndResult (gesPrecision $ geoSpec opts)) $ zip (gesMetrics $ geoSpec opts) vals
+showTheResult' opts vals =  mapM_ putStrLn
+                            $ intercalate [""]
+                            $ Prelude.map (formatCrossTable (gesPrecision $ geoSpec opts))
+                            $ splitIntoTablesWithValues (T.pack "metric") (T.pack "value") mapping metricLabels
+  where mapping = LM.fromList $ zip metricLabels vals
+        metricLabels = Prelude.map T.pack $ Prelude.map evaluationSchemeName $ gesMetrics $ geoSpec opts
+
+formatCrossTable :: Maybe Int -> TableWithValues MetricValue -> [String]
+formatCrossTable mPrecision (TableWithValues [_, _] body) =
+  -- actually we won't print metric/value header
+  -- (1) to keep backward-compatible with the previous version
+  -- (2) to be concise
+  Prelude.map (formatCrossTableLine mPrecision) body
+formatCrossTable mPrecision (TableWithValues header body) =
+  (intercalate "\t" $ Prelude.map T.unpack header) : Prelude.map (formatCrossTableLine mPrecision) body
+
+
+
+formatCrossTableLine :: Maybe Int -> (T.Text, [MetricValue]) -> String
+formatCrossTableLine mPrecision (rowName, values) =
+  intercalate "\t" ((T.unpack rowName):Prelude.map (formatTheResult mPrecision) values)
 
 formatSourceSpec :: SourceSpec -> String
 formatSourceSpec (FilePathSpec fp) = dropExtensions $ takeFileName fp
diff --git a/src/Data/SplitIntoCrossTabs.hs b/src/Data/SplitIntoCrossTabs.hs
index 1088d5f..a843a8c 100644
--- a/src/Data/SplitIntoCrossTabs.hs
+++ b/src/Data/SplitIntoCrossTabs.hs
@@ -2,7 +2,9 @@
 
 module Data.SplitIntoCrossTabs
        (splitIntoCrossTabs,
+        splitIntoTablesWithValues,
         CrossTab(..),
+        TableWithValues(..),
         TextFrag(..))
        where
 
@@ -13,17 +15,40 @@ import qualified Data.Map.Ordered as OM
 import qualified Data.Set as S
 import qualified Data.Foldable as F
 import qualified Data.Map as M
+import qualified Data.Map.Lazy as LM
 
 import Debug.Trace
 
 import Data.List (unfoldr, sortBy, maximumBy, minimumBy)
 
+data TableWithValues a = TableWithValues [Text] [(Text, [a])]
+
 data CrossTab = SingleItem Text | CrossTab [TextFrag] [TextFrag]
   deriving (Show, Eq)
 
 data TextFrag = Prefix Text | Suffix Text
   deriving (Show, Eq, Ord)
 
+splitIntoTablesWithValues :: Text
+                            -> Text
+                            -> LM.Map Text a -- ^ map from which values will be taken,
+                                            -- deliberately a lazy map so that
+                                            -- values could be shown one by one
+                            -> [Text]
+                            -> [TableWithValues a]
+splitIntoTablesWithValues defaultMainHeader defaultSecondaryHeader mapping =
+  joinSingleItems . map (convertIntoTableWithValues defaultMainHeader defaultSecondaryHeader mapping) . splitIntoCrossTabs
+  where joinSingleItems (TableWithValues h@[_, _] arows : TableWithValues [_, _] brows : rest) =
+          joinSingleItems (TableWithValues h (arows ++ brows) : rest)
+        joinSingleItems (e : rest) = e : joinSingleItems rest
+        joinSingleItems [] = []
+
+convertIntoTableWithValues :: Text -> Text -> LM.Map Text a -> CrossTab -> TableWithValues a
+convertIntoTableWithValues defaultMainHeader defaultSecondaryHeader mapping (SingleItem t) =
+  TableWithValues [defaultMainHeader, defaultSecondaryHeader] [(t, [mapping LM.! t])]
+convertIntoTableWithValues defaultMainHeader defaultSecondaryHeader mapping (CrossTab rowNames columnNames) =
+  TableWithValues (T.empty : (map toText columnNames)) (map processRow rowNames)
+  where processRow rowName = (toText rowName, map (\colName -> mapping LM.! (combineFrags rowName colName)) columnNames)
 
 splitIntoCrossTabs :: [Text] -> [CrossTab]
 splitIntoCrossTabs inputs =
@@ -103,6 +128,10 @@ toSet :: CrossTab -> S.Set Text
 toSet (SingleItem t) = S.singleton t
 toSet (CrossTab rowNames columnNames) = S.fromList [rName `combineFrags` cName | rName <- rowNames, cName <- columnNames]
 
+toText :: TextFrag -> Text
+toText (Prefix prefix) = T.stripEnd prefix
+toText (Suffix prefix) = T.stripStart prefix
+
 combineFrags :: TextFrag -> TextFrag -> Text
 combineFrags (Prefix prefix) (Suffix suffix) = prefix <> suffix
 combineFrags (Suffix suffix) (Prefix prefix) = prefix <> suffix

From 4f38ecada949b2cf1e6a745f70ed53a05aa0c242 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 4 Jan 2020 21:58:32 +0100
Subject: [PATCH 20/23] Fix nix script

(Because new extra dependency was added.)
---
 default.nix | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/default.nix b/default.nix
index 99f7992..666a32a 100644
--- a/default.nix
+++ b/default.nix
@@ -26,7 +26,7 @@ let
   stack2nix-script = import "${static-haskell-nix}/static-stack2nix-builder/stack2nix-script.nix" {
     inherit pkgs;
     stack-project-dir = toString ./.; # where stack.yaml is
-    hackageSnapshot = "2019-05-08T00:00:00Z"; # pins e.g. extra-deps without hashes or revisions
+    hackageSnapshot = "2020-01-03T00:00:00Z"; # pins e.g. extra-deps without hashes or revisions
   };
 
   static-stack2nix-builder = import "${static-haskell-nix}/static-stack2nix-builder/default.nix" {

From 8f87b881b8b970422fb7da4606a8785cf3bf163b Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 4 Jan 2020 22:01:56 +0100
Subject: [PATCH 21/23] Bump up versio

---
 CHANGELOG.md | 4 ++++
 geval.cabal  | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b2bb09e..96a8094 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,8 @@
 
+## 1.27.0.0
+
+* Results are formatted in cross-tables (if possible)
+
 ## 1.26.0.0
 
 * Change the meaning of WER (WER is calculated for the whole set now
diff --git a/geval.cabal b/geval.cabal
index 28fd65a..7737a40 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -1,5 +1,5 @@
 name:                geval
-version:             1.26.0.0
+version:             1.27.0.0
 synopsis:            Machine learning evaluation tools
 description:         Please see README.md
 homepage:            http://github.com/name/project

From e170c378648ce87e413a6d39fb85ac3e4ab025d1 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 11 Jan 2020 17:02:49 +0100
Subject: [PATCH 22/23] Add substitution operation

---
 src/GEval/EvaluationScheme.hs                 | 51 ++++++++++++++++---
 test/Spec.hs                                  |  2 +
 .../test-A/out.tsv                            |  5 ++
 .../accuracy-with-flags/config.txt            |  1 +
 .../accuracy-with-flags/test-A/expected.tsv   |  5 ++
 5 files changed, 58 insertions(+), 6 deletions(-)
 create mode 100644 test/accuracy-with-flags/accuracy-with-flags-solution/test-A/out.tsv
 create mode 100644 test/accuracy-with-flags/accuracy-with-flags/config.txt
 create mode 100644 test/accuracy-with-flags/accuracy-with-flags/test-A/expected.tsv

diff --git a/src/GEval/EvaluationScheme.hs b/src/GEval/EvaluationScheme.hs
index 97235d6..d138f06 100644
--- a/src/GEval/EvaluationScheme.hs
+++ b/src/GEval/EvaluationScheme.hs
@@ -27,6 +27,7 @@ data PreprocessingOperation = RegexpMatch Regex
                               | Sorting
                               | SetName Text
                               | SetPriority Int
+                              | RegexpSubstition Regex Text
   deriving (Eq)
 
 leftParameterBracket :: Char
@@ -53,15 +54,33 @@ readOps ('S':theRest) = (Sorting:ops, theRest')
     where (ops, theRest') = readOps theRest
 readOps ('N':theRest) = handleParametrizedOp (SetName . pack) theRest
 readOps ('P':theRest) = handleParametrizedOp (SetPriority . read) theRest
+readOps ('s':theRest) = handleParametrizedBinaryOp (\a b -> RegexpSubstition (fromRight undefined $ compileM (BSU.fromString a) []) (pack b)) theRest
 readOps s = ([], s)
 
 handleParametrizedOp :: (String -> PreprocessingOperation) -> String -> ([PreprocessingOperation], String)
-handleParametrizedOp constructor (leftParameterBracket:theRest) =
+handleParametrizedOp constructor theRest =
+  case parseParameter theRest of
+    (Nothing, s) -> ([], s)
+    (Just param, theRest') -> let (ops, theRest'') = readOps theRest'
+                             in ((constructor param):ops, theRest'')
+
+handleParametrizedBinaryOp :: (String -> String -> PreprocessingOperation) -> String -> ([PreprocessingOperation], String)
+handleParametrizedBinaryOp constructor theRest =
+  case parseParameter theRest of
+    (Nothing, s) -> ([], s)
+    (Just paramA, theRest') ->
+      case parseParameter theRest' of
+        (Nothing, s) -> ([], s)
+        (Just paramB, theRest'') -> let (ops, theRest''') = readOps theRest''
+                                   in ((constructor paramA paramB):ops, theRest''')
+
+parseParameter :: String -> (Maybe String, String)
+parseParameter (leftParameterBracket:theRest) =
   case break (== rightParameterBracket) theRest of
-    (s, []) -> ([], s)
-    (param, (_:theRest')) -> let (ops, theRest'') = readOps theRest'
-                            in ((constructor param):ops, theRest'')
-handleParametrizedOp _ s = ([], s)
+    (s, []) -> (Nothing, s)
+    (param, (_:theRest')) -> (Just param, theRest')
+parseParameter s = (Nothing, s)
+
 
 instance Show EvaluationScheme where
   show (EvaluationScheme metric operations) = (show metric) ++ (if null operations
@@ -97,9 +116,28 @@ instance Show PreprocessingOperation where
   show Sorting = "S"
   show (SetName t) = parametrizedOperation "N" (unpack t)
   show (SetPriority p) = parametrizedOperation "P" (show p)
+  show (RegexpSubstition (Regex _ regexp) s) = "s" ++ (formatParameter $ BSU.toString regexp) ++ (formatParameter $ unpack s)
+
+applySubstitution :: Regex -> Text -> Text -> Text
+applySubstitution r substitution t =
+  gsub r (handleRefs substitution) t
+
+handleRefs :: Text -> Text -> [Text] -> Text
+handleRefs substitution mainMatch subMatches = gsub refRegexp handleRef substitution
+  where Right refRegexp = compileM (BSU.fromString "\\\\\\d+") []
+        indexables = mainMatch : subMatches
+        handleRef :: Text -> Text
+        handleRef ref =
+          let ix = (read $ tail $ unpack ref)
+          in if ix >= length indexables
+             then (pack "")
+             else indexables !! ix
 
 parametrizedOperation :: String -> String -> String
-parametrizedOperation opCode opArg = opCode ++ [leftParameterBracket] ++ opArg ++ [rightParameterBracket]
+parametrizedOperation opCode opArg = opCode ++ (formatParameter opArg)
+
+formatParameter :: String -> String
+formatParameter p = [leftParameterBracket] ++ p ++ [rightParameterBracket]
 
 applyPreprocessingOperations :: EvaluationScheme -> Text -> Text
 applyPreprocessingOperations (EvaluationScheme _ operations) t = foldl (flip applyPreprocessingOperation) t operations
@@ -111,3 +149,4 @@ applyPreprocessingOperation UpperCasing = toUpper
 applyPreprocessingOperation Sorting = Data.Text.unwords . sort . Data.Text.words
 applyPreprocessingOperation (SetName _) = id
 applyPreprocessingOperation (SetPriority _) = id
+applyPreprocessingOperation (RegexpSubstition regex substition) = applySubstitution regex substition
diff --git a/test/Spec.hs b/test/Spec.hs
index b4d3511..ff8334b 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -339,6 +339,8 @@ main = hspec $ do
   describe "Preprocessing operations" $ do
     it "F1 with preprocessing" $ do
       runGEvalTest "f1-with-preprocessing" `shouldReturnAlmost` 0.57142857142857
+    it "Regexp substition" $ do
+      runGEvalTest "accuracy-with-flags" `shouldReturnAlmost` 0.8
   describe "evaluating single lines" $ do
     it "RMSE" $ do
       (MetricOutput v _) <- gevalCoreOnSingleLines RMSE id RawItemTarget
diff --git a/test/accuracy-with-flags/accuracy-with-flags-solution/test-A/out.tsv b/test/accuracy-with-flags/accuracy-with-flags-solution/test-A/out.tsv
new file mode 100644
index 0000000..f4cf94b
--- /dev/null
+++ b/test/accuracy-with-flags/accuracy-with-flags-solution/test-A/out.tsv
@@ -0,0 +1,5 @@
+b88 b901
+a100
+a93
+t34
+y23
diff --git a/test/accuracy-with-flags/accuracy-with-flags/config.txt b/test/accuracy-with-flags/accuracy-with-flags/config.txt
new file mode 100644
index 0000000..0013dd6
--- /dev/null
+++ b/test/accuracy-with-flags/accuracy-with-flags/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:s<[abc](\d+)><!\1>
diff --git a/test/accuracy-with-flags/accuracy-with-flags/test-A/expected.tsv b/test/accuracy-with-flags/accuracy-with-flags/test-A/expected.tsv
new file mode 100644
index 0000000..16a810e
--- /dev/null
+++ b/test/accuracy-with-flags/accuracy-with-flags/test-A/expected.tsv
@@ -0,0 +1,5 @@
+a88 b901
+c100
+b93
+t34
+z23

From 68dc2fad0c987dd1ef2d72275a0da54c1dcd6a0a Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 11 Jan 2020 17:03:57 +0100
Subject: [PATCH 23/23] Bump up version

---
 CHANGELOG.md | 4 ++++
 geval.cabal  | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 96a8094..f63fa8e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,8 @@
 
+## 1.28.0.0
+
+* Add `s` flag for substitution
+
 ## 1.27.0.0
 
 * Results are formatted in cross-tables (if possible)
diff --git a/geval.cabal b/geval.cabal
index 7737a40..dfd1057 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -1,5 +1,5 @@
 name:                geval
-version:             1.27.0.0
+version:             1.28.0.0
 synopsis:            Machine learning evaluation tools
 description:         Please see README.md
 homepage:            http://github.com/name/project