From 3ebe158e55c4bdb96b3709cdfef673867778a8d9 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 1 Aug 2020 21:27:04 +0200
Subject: [PATCH 1/4] Describe flags, add "c" and "t" flags.

Also add tests for flags
---
 README.md                                     | 259 +++++++++++++++++-
 src/GEval/EvaluationScheme.hs                 |  11 +-
 test/Spec.hs                                  |  21 ++
 .../flags-case-fold-solution/test-A/out.tsv   |  10 +
 .../flags-case-fold/config.txt                |   1 +
 .../flags-case-fold/test-A/expected.tsv       |  10 +
 .../flags-filtering-solution/test-A/out.tsv   |  10 +
 .../flags-filtering/config.txt                |   1 +
 .../flags-filtering/test-A/expected.tsv       |  10 +
 .../flags-filtering/test-A/in.tsv             |  10 +
 .../flags-lowercase-solution/test-A/out.tsv   |  10 +
 .../flags-lowercase/config.txt                |   1 +
 .../flags-lowercase/test-A/expected.tsv       |  10 +
 .../flags-none-solution/test-A/out.tsv        |  10 +
 test/flags-none/flags-none/config.txt         |   1 +
 .../flags-none/flags-none/test-A/expected.tsv |  10 +
 .../test-A/out.tsv                            |  10 +
 .../flags-regexp-matching-anchor/config.txt   |   1 +
 .../test-A/expected.tsv                       |  10 +
 .../test-A/out.tsv                            |  10 +
 .../flags-regexp-matching/config.txt          |   1 +
 .../flags-regexp-matching/test-A/expected.tsv |  10 +
 .../test-A/out.tsv                            |  10 +
 .../flags-regexp-substitution-ref/config.txt  |   1 +
 .../test-A/expected.tsv                       |  10 +
 .../test-A/out.tsv                            |  10 +
 .../flags-regexp-substitution/config.txt      |   1 +
 .../test-A/expected.tsv                       |  10 +
 .../test-A/out.tsv                            |  10 +
 .../config.txt                                |   1 +
 .../test-A/expected.tsv                       |  10 +
 .../test-A/out.tsv                            |  10 +
 .../flags-regexp-token-matching/config.txt    |   1 +
 .../test-A/expected.tsv                       |  10 +
 .../flags-uppercase-solution/test-A/out.tsv   |  10 +
 .../flags-uppercase/config.txt                |   1 +
 .../flags-uppercase/test-A/expected.tsv       |  10 +
 37 files changed, 530 insertions(+), 2 deletions(-)
 create mode 100644 test/flags-case-fold/flags-case-fold-solution/test-A/out.tsv
 create mode 100644 test/flags-case-fold/flags-case-fold/config.txt
 create mode 100644 test/flags-case-fold/flags-case-fold/test-A/expected.tsv
 create mode 100644 test/flags-filtering/flags-filtering-solution/test-A/out.tsv
 create mode 100644 test/flags-filtering/flags-filtering/config.txt
 create mode 100644 test/flags-filtering/flags-filtering/test-A/expected.tsv
 create mode 100644 test/flags-filtering/flags-filtering/test-A/in.tsv
 create mode 100644 test/flags-lowercase/flags-lowercase-solution/test-A/out.tsv
 create mode 100644 test/flags-lowercase/flags-lowercase/config.txt
 create mode 100644 test/flags-lowercase/flags-lowercase/test-A/expected.tsv
 create mode 100644 test/flags-none/flags-none-solution/test-A/out.tsv
 create mode 100644 test/flags-none/flags-none/config.txt
 create mode 100644 test/flags-none/flags-none/test-A/expected.tsv
 create mode 100644 test/flags-regexp-matching-anchor/flags-regexp-matching-anchor-solution/test-A/out.tsv
 create mode 100644 test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/config.txt
 create mode 100644 test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/test-A/expected.tsv
 create mode 100644 test/flags-regexp-matching/flags-regexp-matching-solution/test-A/out.tsv
 create mode 100644 test/flags-regexp-matching/flags-regexp-matching/config.txt
 create mode 100644 test/flags-regexp-matching/flags-regexp-matching/test-A/expected.tsv
 create mode 100644 test/flags-regexp-substitution-ref/flags-regexp-substitution-ref-solution/test-A/out.tsv
 create mode 100644 test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/config.txt
 create mode 100644 test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/test-A/expected.tsv
 create mode 100644 test/flags-regexp-substitution/flags-regexp-substitution-solution/test-A/out.tsv
 create mode 100644 test/flags-regexp-substitution/flags-regexp-substitution/config.txt
 create mode 100644 test/flags-regexp-substitution/flags-regexp-substitution/test-A/expected.tsv
 create mode 100644 test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor-solution/test-A/out.tsv
 create mode 100644 test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/config.txt
 create mode 100644 test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/test-A/expected.tsv
 create mode 100644 test/flags-regexp-token-matching/flags-regexp-token-matching-solution/test-A/out.tsv
 create mode 100644 test/flags-regexp-token-matching/flags-regexp-token-matching/config.txt
 create mode 100644 test/flags-regexp-token-matching/flags-regexp-token-matching/test-A/expected.tsv
 create mode 100644 test/flags-uppercase/flags-uppercase-solution/test-A/out.tsv
 create mode 100644 test/flags-uppercase/flags-uppercase/config.txt
 create mode 100644 test/flags-uppercase/flags-uppercase/test-A/expected.tsv

diff --git a/README.md b/README.md
index 3ed16b9..e410d12 100644
--- a/README.md
+++ b/README.md
@@ -88,7 +88,7 @@ to happen on macOS, as these packages are usually installed out of the box on Li
 In case the `lzma` package is not installed on your Linux, you need to run (assuming Debian/Ubuntu):
 
     sudo apt-get install pkg-config liblzma-dev libpq-dev libpcre3-dev libcairo2-dev libbz2-dev
-    
+
 #### Windows issues
 
 If you see this message on Windows during executing `stack test` command:
@@ -480,6 +480,263 @@ So now you can see that the accuracy is over 78% and the likelihood
     in<1>:Brytania	2	0.53333333	0.01357876718525224600
     in<1>:rewolucja	2	0.53333333	0.01357876718525224600
 
+## Metric flags
+
+GEval offers a number of *flags* to modify the way an evaluation
+metric is calculated or presented. For instance, if you use `BLEU:u`
+instead of `BLEU`, the BLEU metric (a standard metric for machine
+translation) will be evaluated on the actual and expected outputs
+upper-cased. In other words, flags can be used to _normalize_ the text
+before running the actual evaluation metric.
+
+Flags are given after a colon (`:`) and can be combined. Some flags
+can have arguments, they should be given in angle brackets (`<...>`).
+
+The following files will be used in example calculations, `expected.tsv`:
+
+    foo 123 bar
+    29008 Straße
+    xyz
+    aaa 3 4 bbb
+    qwerty 100
+    WWW WWW
+    test
+    104
+    BAR Foo baz
+    OK 7777
+
+`out.tsv`:
+
+    foo 999 BAR
+    29008 STRASSE
+    xyz
+    aaa BBB 34
+    qwerty 1000
+    WWW WWW WWW WWW WWW WWW WWW WWW
+    testtttttt
+    104
+    Foo baz BAR
+    Ok 7777
+
+`in.tsv`:
+
+
+
+Without any flags, the `Accuracy` metric is:
+
+    $ geval -o out.tsv -e expected.tsv --metric Accuracy
+    0.2
+
+(As only two items are correct: `xyz` and `104`.)
+
+### Case change
+
+#### `l` — lower-case
+
+    $ geval -o out.tsv -e expected.tsv --metric Accuracy:l
+    0.3
+
+#### `u` — upper-case
+
+    $ geval -o out.tsv -e expected.tsv --metric Accuracy:l
+    0.4
+
+Why the result is differnt for lower-casing and upper-casing? Some
+characters, e.g. German _ß_, are tricky. If you upper-case _Straße_
+you've got _STRASSE_, but if you lower-case it, you obtain _straße_,
+not _strasse_! For this reason, when you want to disregard case when
+evaluating your metric, it is better to case _case folding_ rather
+than lower- or upper-casing:
+
+#### `c` — case fold
+
+    $ geval -o out.tsv -e expected.tsv --metric Accuracy:c
+    0.4
+
+### Manipulations with regular expressions
+
+#### `m<REGEXP>` matching a given PCRE regexp
+
+The evaluation metric will be calculated only on the parts of the
+outputs matching a given regular expression. This can be used when you
+want to focus on some specific parts of a text. For instance, we could
+calculate Accuracy only considering (disregarding all other
+characters, including spaces).
+
+    $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:m<\d+>'
+    0.8
+
+(Note that apostrophes are due to using Bash here, if you put it into
+the `config.txt` file you should omit apostrophes: `--metric Accuracy:m<\d+>`.)
+
+All matches are considered and concatenated, if no match is found, an empty string is assumed
+(hence, e.g., `testtttttt` is considered a hit for `test` after this normalization).
+Note that both  `aaa 3 4 bbb` and `aaa BBB 34` will be normalized to `34` here.
+
+You can use regexp anchoring operators (`^` or `$`). This will refer
+to the beginning or end of the whole *line*. You could use it to
+calculate the accuracy considering only the first two characters of output lines:
+
+    $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:m<^..>'
+    0.8
+
+#### `t<REGEXP>` — filtering tokens using a PCRE regexp
+
+This applies a regexp for each token separately (tokens are seperated
+by spaces, you can use a non-standard tokenizer with the `--tokenizer` option if needed).
+All the tokens not matching the regexp are filtered out (but spaces are recovered).
+
+    $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:t<\d+>'
+    0.7
+
+Now, the anchoring operators refer to the beginning or end of a
+*token*. For instance, let's consider only tokens starting with _b_:
+
+    $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:t<^b>'
+    0.8
+
+With `m` or `t` flags you can only select parts of output lines. What
+if you want to do some replacements, e.g. collapse some
+characters/strings into a standard form? You should use the `s` flag for this:
+
+#### `s<REGEXP><REPLACEMENT>` — replace parts of output lines matching a regexp
+
+This will substitute all occurrences of strings matching REGEXP with
+REPLACEMENT. For instance, we could replace all numbers with a special token NUMBER.
+All the other parts of a line are left intact.
+
+    $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:s<\d+><NUMBER>'
+    0.3
+
+You can use special operators `\0`, `\1`, `\2` to refer to parts matched by the regexp.
+
+    $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:s<([A-Za-z])\S+><WORD-WITH-FIRST-LETTER-\1>'
+    0.5
+
+### Other normalizations
+
+#### `S` — sort all tokens
+
+This will sort all tokens, e.g. `foo bar baz` will be treated as `bar baz foo`.
+
+### Filtering
+
+#### `f<FEATURE>` — filtering
+
+Flags such as `u`, `m<...>`, `s<...><...>` etc. work within a line
+(item), they won't change the number items being evaluated. To
+consider only a subset of items, use the `f<FEATURE>` flag — only the
+lines containing the feature FEATURE will be considered during metric
+calculation. Features are the same as listed by the `--worst-features`
+option, e.g. `exp:foo` would accept only lines with the expected
+output containing the token `foo`, `in[2]:bar` — lines with the second
+columns of input contaning the token `bar` (contrary to
+`--worst-features` square brackets should be used be instead of angle ones for indexing).
+
+You *MUST* supply an input file when you use the `f<...>` flag. Assume
+the following `in.txt` file:
+
+    12	this aaa
+    32	this bbb
+    32	this ccc
+    12	that aaa
+    12	that aaa
+    10	that aaa
+    11	that
+    11	that
+    17	this
+    12	that
+
+    $ geval -o out.tsv -e expected.tsv -i in.tsv --metric 'Accuracy:f<in[2]:this>'
+    0.25
+
+### Presentation
+
+Some flags are used not for modifying the result, but rather changing
+the way it is presented by GEval (or the associated
+[Gonito](https://gonito.net) Web application).
+
+#### `N<NAME>` — use an alternative name
+
+Sometimes, the metric name gets complicated, you can use the `N<...>`
+to get a more human-readable way.
+
+This will be used:
+
+* by GEval when presenting results from more than one metric (when
+  only one metric is calculated, its name is not given anyway),
+* by Gonito, e.g. in table headers.
+
+    $ geval -o out.tsv -e expected.tsv --metric Accuracy --metric MultiLabel-F1:N<F-score> --metric 'MultiLabel-F0:N<Precision>' --metric 'MultiLabelF9999:N<Recall>'
+    Accuracy	0.200
+    F-score	0.511
+    Precision	0.462
+    Recall	0.571
+
+(GEval does not have separate Precision/Recall metrics, but they can
+be easily obtained by setting the parameter of the F-score to,
+respectively, 0 and a large number.)
+
+More than one name can be given. In such a case, or names will concatenated with spaces.
+
+    $ geval --precision 3 -o out.tsv -e expected.tsv --metric 'Accuracy' --metric 'MultiLabel-F1:N<F-score>N<on>N<tokens>'
+    Accuracy	0.200
+    F-score on tokens	0.511
+
+This is handy, when combined with the `{...}` operator (see below).
+
+#### `P<priority>` — set the priority (within the Gonito platform)
+
+This sets the priority level, considered when the results are displayed in the Gonito platform.
+It has no effect in GEval as such (it is simply disregarded in GEval).
+
+    $ geval --precision 3 -o out.tsv -e expected.tsv --metric 'Accuracy:P<1>' --metric 'MultiLabel-F1:P<3>'               Accuracy:P<1>	0.200
+    MultiLabel-F1.0:P<3>	0.511
+
+The priority is interpreted by Gonito in the following way:
+
+  * 1 — show everywhere, including the main leaderboard table
+  * 2 — show on the secondary leaderboard table and in detailed information for a submission
+  * 3 — show only in detailed information for a submission
+
+Although you can specify `P<...>` more than once, only the first value
+will be considered for a given metric (this might be important when combined with the `{...}` operator.
+
+### Combining flags
+
+Flags can be combined, just by concatenation (`:` should be given only once):
+
+    $ geval -o out.tsv -e expected.tsv -i in.tsv --metric Accuracy --metric 'Accuracy:f<in[2]:this>cs<\d><X>N<MyWeirdMetric>'
+    Accuracy	0.2
+    MyWeirdMetric	0.75
+
+Note that the order of flags might be sometimes significant, in
+general, they are considered from left to right.
+
+### Cartesian operator `{...}`
+
+Sometimes, you need to define a large number of similar metrics. Then
+you can use the special `{...}` operator interpreted by GEval (not
+Bash!). For instance `{foo,bar}xyz{aaa,bbb,ccc}` will be internally
+considered as the Cartesian product (i.e. you'll get all the
+combinations): `fooxyzaaa`, `fooxyzbbb`, `fooxyzccc`, `barxyzaaa`,
+`barxyzbbb`, `barxyzccc`.
+
+For example, let's assume that we want accuracy, F-score, precision
+and recall in both case-sensitive and case-insensitive versions.
+Here's the way to calculate all these 8 metrics in a concise manner:
+
+    $ geval --precision 3 -o out.tsv -e expected.tsv -i in.tsv --metric '{Accuracy:N<Acc>,MultiLabel-F1:N<F1>,MultiLabel-F0:N<P>,MultiLabel-F9999:N<R>}N<case>{N<sensitive>,cN<non-sensitive>}'
+        sensitive	non-sensitive
+    Acc case	0.200	0.400
+    F1 case	0.511	0.681
+    P case	0.462	0.615
+    R case	0.571	0.762
+
+Note that GEval automagically put the results in a table! (Well,
+_case_ probably should be written in headers, but, well, it generates
+the table totally on its own.)
+
 ## Handling headers
 
 When dealing with TSV files, you often face a dilemma whether to add a
diff --git a/src/GEval/EvaluationScheme.hs b/src/GEval/EvaluationScheme.hs
index 135f9d8..b335087 100644
--- a/src/GEval/EvaluationScheme.hs
+++ b/src/GEval/EvaluationScheme.hs
@@ -12,7 +12,7 @@ import GEval.Metric
 
 import Text.Regex.PCRE.Heavy
 import Text.Regex.PCRE.Light.Base (Regex(..))
-import Data.Text (Text(..), concat, toLower, toUpper, pack, unpack, words, unwords)
+import Data.Text (Text(..), concat, toCaseFold, toLower, toUpper, pack, unpack, words, unwords)
 import Data.List (intercalate, break, sort)
 import Data.Either
 import Data.Maybe (fromMaybe, catMaybes)
@@ -23,8 +23,10 @@ data EvaluationScheme = EvaluationScheme Metric [PreprocessingOperation]
   deriving (Eq)
 
 data PreprocessingOperation = RegexpMatch Regex
+                              | RegexpTokenMatch Regex
                               | LowerCasing
                               | UpperCasing
+                              | CaseFolding
                               | Sorting
                               | SetName Text
                               | SetPriority Int
@@ -51,7 +53,10 @@ readOps ('l':theRest) = (LowerCasing:ops, theRest')
     where (ops, theRest') = readOps theRest
 readOps ('u':theRest) = (UpperCasing:ops, theRest')
     where (ops, theRest') = readOps theRest
+readOps ('c':theRest) = (CaseFolding:ops, theRest')
+    where (ops, theRest') = readOps theRest
 readOps ('m':theRest) = handleParametrizedOp (RegexpMatch . (fromRight undefined) . ((flip compileM) []) . BSU.fromString) theRest
+readOps ('t':theRest) = handleParametrizedOp (RegexpTokenMatch . (fromRight undefined) . ((flip compileM) []) . BSU.fromString) theRest
 readOps ('S':theRest) = (Sorting:ops, theRest')
     where (ops, theRest') = readOps theRest
 readOps ('N':theRest) = handleParametrizedOp (SetName . pack) theRest
@@ -120,8 +125,10 @@ evaluationSchemeMetric (EvaluationScheme metric _) = metric
 
 instance Show PreprocessingOperation where
   show (RegexpMatch (Regex _ regexp)) = parametrizedOperation "m" (BSU.toString regexp)
+  show (RegexpTokenMatch (Regex _ regexp)) = parametrizedOperation "t" (BSU.toString regexp)
   show LowerCasing = "l"
   show UpperCasing = "u"
+  show CaseFolding = "c"
   show Sorting = "S"
   show (SetName t) = parametrizedOperation "N" (unpack t)
   show (SetPriority p) = parametrizedOperation "P" (show p)
@@ -154,8 +161,10 @@ applyPreprocessingOperations (EvaluationScheme _ operations) t = foldl (flip app
 
 applyPreprocessingOperation :: PreprocessingOperation -> Text -> Text
 applyPreprocessingOperation (RegexpMatch regex) = Data.Text.concat . (map fst) . (scan regex)
+applyPreprocessingOperation (RegexpTokenMatch regex) = Data.Text.unwords . (filter (≈ regex)) . Data.Text.words
 applyPreprocessingOperation LowerCasing = toLower
 applyPreprocessingOperation UpperCasing = toUpper
+applyPreprocessingOperation CaseFolding = toCaseFold
 applyPreprocessingOperation Sorting = Data.Text.unwords . sort . Data.Text.words
 applyPreprocessingOperation (SetName _) = id
 applyPreprocessingOperation (SetPriority _) = id
diff --git a/test/Spec.hs b/test/Spec.hs
index 43b8a3d..2730046 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -364,6 +364,27 @@ main = hspec $ do
       runGEvalTest "f1-with-preprocessing" `shouldReturnAlmost` 0.57142857142857
     it "Regexp substition" $ do
       runGEvalTest "accuracy-with-flags" `shouldReturnAlmost` 0.8
+  describe "Flag examples" $ do
+    it "none" $ do
+      runGEvalTest "flags-none" `shouldReturnAlmost` 0.2
+    it "lower-case" $ do
+      runGEvalTest "flags-lowercase" `shouldReturnAlmost` 0.3
+    it "upper-case" $ do
+      runGEvalTest "flags-uppercase" `shouldReturnAlmost` 0.4
+    it "regexp-matching" $ do
+      runGEvalTest "flags-regexp-matching" `shouldReturnAlmost` 0.8
+    it "regexp-matching-anchor" $ do
+      runGEvalTest "flags-regexp-matching-anchor" `shouldReturnAlmost` 0.8
+    it "regexp-token-matching" $ do
+      runGEvalTest "flags-regexp-token-matching" `shouldReturnAlmost` 0.7
+    it "regexp-token-matching-anchor" $ do
+      runGEvalTest "flags-regexp-token-matching-anchor" `shouldReturnAlmost` 0.8
+    it "regexp-substitution" $ do
+      runGEvalTest "flags-regexp-substitution" `shouldReturnAlmost` 0.3
+    it "regexp-substitution-ref" $ do
+      runGEvalTest "flags-regexp-substitution-ref" `shouldReturnAlmost` 0.5
+    it "filtering" $ do
+      runGEvalTest "flags-filtering" `shouldReturnAlmost` 0.25
   describe "evaluating single lines" $ do
     it "RMSE" $ do
       (MetricOutput (SimpleRun v) _) <- gevalCoreOnSingleLines RMSE id RawItemTarget
diff --git a/test/flags-case-fold/flags-case-fold-solution/test-A/out.tsv b/test/flags-case-fold/flags-case-fold-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-case-fold/flags-case-fold-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-case-fold/flags-case-fold/config.txt b/test/flags-case-fold/flags-case-fold/config.txt
new file mode 100644
index 0000000..82c1775
--- /dev/null
+++ b/test/flags-case-fold/flags-case-fold/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:c
diff --git a/test/flags-case-fold/flags-case-fold/test-A/expected.tsv b/test/flags-case-fold/flags-case-fold/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-case-fold/flags-case-fold/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-filtering/flags-filtering-solution/test-A/out.tsv b/test/flags-filtering/flags-filtering-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-filtering/flags-filtering-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-filtering/flags-filtering/config.txt b/test/flags-filtering/flags-filtering/config.txt
new file mode 100644
index 0000000..dfa6b66
--- /dev/null
+++ b/test/flags-filtering/flags-filtering/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:f<in[2]:this>
diff --git a/test/flags-filtering/flags-filtering/test-A/expected.tsv b/test/flags-filtering/flags-filtering/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-filtering/flags-filtering/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-filtering/flags-filtering/test-A/in.tsv b/test/flags-filtering/flags-filtering/test-A/in.tsv
new file mode 100644
index 0000000..72292ef
--- /dev/null
+++ b/test/flags-filtering/flags-filtering/test-A/in.tsv
@@ -0,0 +1,10 @@
+12	this aaa
+32	this bbb
+32	this ccc
+12	that aaa
+12	that aaa
+10	that aaa
+11	that
+11	that
+17	this
+12	that
diff --git a/test/flags-lowercase/flags-lowercase-solution/test-A/out.tsv b/test/flags-lowercase/flags-lowercase-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-lowercase/flags-lowercase-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-lowercase/flags-lowercase/config.txt b/test/flags-lowercase/flags-lowercase/config.txt
new file mode 100644
index 0000000..e3c75cd
--- /dev/null
+++ b/test/flags-lowercase/flags-lowercase/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:l
diff --git a/test/flags-lowercase/flags-lowercase/test-A/expected.tsv b/test/flags-lowercase/flags-lowercase/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-lowercase/flags-lowercase/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-none/flags-none-solution/test-A/out.tsv b/test/flags-none/flags-none-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-none/flags-none-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-none/flags-none/config.txt b/test/flags-none/flags-none/config.txt
new file mode 100644
index 0000000..337a0cc
--- /dev/null
+++ b/test/flags-none/flags-none/config.txt
@@ -0,0 +1 @@
+--metric Accuracy
diff --git a/test/flags-none/flags-none/test-A/expected.tsv b/test/flags-none/flags-none/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-none/flags-none/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor-solution/test-A/out.tsv b/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/config.txt b/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/config.txt
new file mode 100644
index 0000000..f420c60
--- /dev/null
+++ b/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:m<^..>
diff --git a/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/test-A/expected.tsv b/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-regexp-matching/flags-regexp-matching-solution/test-A/out.tsv b/test/flags-regexp-matching/flags-regexp-matching-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-regexp-matching/flags-regexp-matching-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-regexp-matching/flags-regexp-matching/config.txt b/test/flags-regexp-matching/flags-regexp-matching/config.txt
new file mode 100644
index 0000000..ce11160
--- /dev/null
+++ b/test/flags-regexp-matching/flags-regexp-matching/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:m<\d+>
diff --git a/test/flags-regexp-matching/flags-regexp-matching/test-A/expected.tsv b/test/flags-regexp-matching/flags-regexp-matching/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-regexp-matching/flags-regexp-matching/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref-solution/test-A/out.tsv b/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/config.txt b/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/config.txt
new file mode 100644
index 0000000..2fca6f8
--- /dev/null
+++ b/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:s<([A-Za-z])\S+><WORD-WITH-FIRST-LETTER-\1>
diff --git a/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/test-A/expected.tsv b/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-regexp-substitution/flags-regexp-substitution-solution/test-A/out.tsv b/test/flags-regexp-substitution/flags-regexp-substitution-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-regexp-substitution/flags-regexp-substitution-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-regexp-substitution/flags-regexp-substitution/config.txt b/test/flags-regexp-substitution/flags-regexp-substitution/config.txt
new file mode 100644
index 0000000..3d1a792
--- /dev/null
+++ b/test/flags-regexp-substitution/flags-regexp-substitution/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:s<\d+><NUMBER>
diff --git a/test/flags-regexp-substitution/flags-regexp-substitution/test-A/expected.tsv b/test/flags-regexp-substitution/flags-regexp-substitution/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-regexp-substitution/flags-regexp-substitution/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor-solution/test-A/out.tsv b/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/config.txt b/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/config.txt
new file mode 100644
index 0000000..8a33294
--- /dev/null
+++ b/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:t<^b>
diff --git a/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/test-A/expected.tsv b/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-regexp-token-matching/flags-regexp-token-matching-solution/test-A/out.tsv b/test/flags-regexp-token-matching/flags-regexp-token-matching-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-regexp-token-matching/flags-regexp-token-matching-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-regexp-token-matching/flags-regexp-token-matching/config.txt b/test/flags-regexp-token-matching/flags-regexp-token-matching/config.txt
new file mode 100644
index 0000000..3aa87e0
--- /dev/null
+++ b/test/flags-regexp-token-matching/flags-regexp-token-matching/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:t<\d+>
diff --git a/test/flags-regexp-token-matching/flags-regexp-token-matching/test-A/expected.tsv b/test/flags-regexp-token-matching/flags-regexp-token-matching/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-regexp-token-matching/flags-regexp-token-matching/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-uppercase/flags-uppercase-solution/test-A/out.tsv b/test/flags-uppercase/flags-uppercase-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-uppercase/flags-uppercase-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-uppercase/flags-uppercase/config.txt b/test/flags-uppercase/flags-uppercase/config.txt
new file mode 100644
index 0000000..a94cfa3
--- /dev/null
+++ b/test/flags-uppercase/flags-uppercase/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:u
diff --git a/test/flags-uppercase/flags-uppercase/test-A/expected.tsv b/test/flags-uppercase/flags-uppercase/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-uppercase/flags-uppercase/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777

From 23acb0133872ae0af20d40eb076d40067fbc3cb8 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 1 Aug 2020 21:37:48 +0200
Subject: [PATCH 2/4] Fixes in README (description of flags)

---
 README.md                                     | 28 ++++++++++---------
 test/Spec.hs                                  |  2 ++
 .../flags-sort-solution/test-A/out.tsv        | 10 +++++++
 test/flags-sort/flags-sort/config.txt         |  1 +
 .../flags-sort/flags-sort/test-A/expected.tsv | 10 +++++++
 5 files changed, 38 insertions(+), 13 deletions(-)
 create mode 100644 test/flags-sort/flags-sort-solution/test-A/out.tsv
 create mode 100644 test/flags-sort/flags-sort/config.txt
 create mode 100644 test/flags-sort/flags-sort/test-A/expected.tsv

diff --git a/README.md b/README.md
index e410d12..7609006 100644
--- a/README.md
+++ b/README.md
@@ -518,10 +518,6 @@ The following files will be used in example calculations, `expected.tsv`:
     Foo baz BAR
     Ok 7777
 
-`in.tsv`:
-
-
-
 Without any flags, the `Accuracy` metric is:
 
     $ geval -o out.tsv -e expected.tsv --metric Accuracy
@@ -541,7 +537,7 @@ Without any flags, the `Accuracy` metric is:
     $ geval -o out.tsv -e expected.tsv --metric Accuracy:l
     0.4
 
-Why the result is differnt for lower-casing and upper-casing? Some
+Why the result is different for lower-casing and upper-casing? Some
 characters, e.g. German _ß_, are tricky. If you upper-case _Straße_
 you've got _STRASSE_, but if you lower-case it, you obtain _straße_,
 not _strasse_! For this reason, when you want to disregard case when
@@ -555,12 +551,12 @@ than lower- or upper-casing:
 
 ### Manipulations with regular expressions
 
-#### `m<REGEXP>` matching a given PCRE regexp
+#### `m<REGEXP>` — matching a given PCRE regexp
 
 The evaluation metric will be calculated only on the parts of the
 outputs matching a given regular expression. This can be used when you
 want to focus on some specific parts of a text. For instance, we could
-calculate Accuracy only considering (disregarding all other
+calculate Accuracy only considering numbers (disregarding all other
 characters, including spaces).
 
     $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:m<\d+>'
@@ -569,9 +565,11 @@ characters, including spaces).
 (Note that apostrophes are due to using Bash here, if you put it into
 the `config.txt` file you should omit apostrophes: `--metric Accuracy:m<\d+>`.)
 
-All matches are considered and concatenated, if no match is found, an empty string is assumed
-(hence, e.g., `testtttttt` is considered a hit for `test` after this normalization).
-Note that both  `aaa 3 4 bbb` and `aaa BBB 34` will be normalized to `34` here.
+All matches are considered and concatenated, if no match is found, an
+empty string is assumed (hence, e.g., `testtttttt` is considered a hit
+for `test` after this normalization, as both will be transformed into
+the empty string). Note that both `aaa 3 4 bbb` and `aaa BBB 34` will
+be normalized to `34` here.
 
 You can use regexp anchoring operators (`^` or `$`). This will refer
 to the beginning or end of the whole *line*. You could use it to
@@ -619,6 +617,9 @@ You can use special operators `\0`, `\1`, `\2` to refer to parts matched by the
 
 This will sort all tokens, e.g. `foo bar baz` will be treated as `bar baz foo`.
 
+    $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:S'
+    0.3
+
 ### Filtering
 
 #### `f<FEATURE>` — filtering
@@ -626,12 +627,12 @@ This will sort all tokens, e.g. `foo bar baz` will be treated as `bar baz foo`.
 Flags such as `u`, `m<...>`, `s<...><...>` etc. work within a line
 (item), they won't change the number items being evaluated. To
 consider only a subset of items, use the `f<FEATURE>` flag — only the
-lines containing the feature FEATURE will be considered during metric
+lines containing the feature FEATURE will be taken during metric
 calculation. Features are the same as listed by the `--worst-features`
 option, e.g. `exp:foo` would accept only lines with the expected
 output containing the token `foo`, `in[2]:bar` — lines with the second
 columns of input contaning the token `bar` (contrary to
-`--worst-features` square brackets should be used be instead of angle ones for indexing).
+`--worst-features` square brackets should be used, instead of angle ones, for indexing).
 
 You *MUST* supply an input file when you use the `f<...>` flag. Assume
 the following `in.txt` file:
@@ -690,7 +691,8 @@ This is handy, when combined with the `{...}` operator (see below).
 This sets the priority level, considered when the results are displayed in the Gonito platform.
 It has no effect in GEval as such (it is simply disregarded in GEval).
 
-    $ geval --precision 3 -o out.tsv -e expected.tsv --metric 'Accuracy:P<1>' --metric 'MultiLabel-F1:P<3>'               Accuracy:P<1>	0.200
+    $ geval --precision 3 -o out.tsv -e expected.tsv --metric 'Accuracy:P<1>' --metric 'MultiLabel-F1:P<3>'
+    Accuracy:P<1>	0.200
     MultiLabel-F1.0:P<3>	0.511
 
 The priority is interpreted by Gonito in the following way:
diff --git a/test/Spec.hs b/test/Spec.hs
index 2730046..d54bd3d 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -383,6 +383,8 @@ main = hspec $ do
       runGEvalTest "flags-regexp-substitution" `shouldReturnAlmost` 0.3
     it "regexp-substitution-ref" $ do
       runGEvalTest "flags-regexp-substitution-ref" `shouldReturnAlmost` 0.5
+    it "sort" $ do
+      runGEvalTest "flags-sort" `shouldReturnAlmost` 0.3
     it "filtering" $ do
       runGEvalTest "flags-filtering" `shouldReturnAlmost` 0.25
   describe "evaluating single lines" $ do
diff --git a/test/flags-sort/flags-sort-solution/test-A/out.tsv b/test/flags-sort/flags-sort-solution/test-A/out.tsv
new file mode 100644
index 0000000..4be9eae
--- /dev/null
+++ b/test/flags-sort/flags-sort-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW WWW WWW WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-sort/flags-sort/config.txt b/test/flags-sort/flags-sort/config.txt
new file mode 100644
index 0000000..0de8e69
--- /dev/null
+++ b/test/flags-sort/flags-sort/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:S
diff --git a/test/flags-sort/flags-sort/test-A/expected.tsv b/test/flags-sort/flags-sort/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-sort/flags-sort/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777

From b719d3190d287d37e4a5a936f1ff0a3ca6fd6e93 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 1 Aug 2020 21:44:27 +0200
Subject: [PATCH 3/4] Minore fix

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7609006..e0fbc7a 100644
--- a/README.md
+++ b/README.md
@@ -541,7 +541,7 @@ Why the result is different for lower-casing and upper-casing? Some
 characters, e.g. German _ß_, are tricky. If you upper-case _Straße_
 you've got _STRASSE_, but if you lower-case it, you obtain _straße_,
 not _strasse_! For this reason, when you want to disregard case when
-evaluating your metric, it is better to case _case folding_ rather
+evaluating your metric, it is better to use _case folding_ rather
 than lower- or upper-casing:
 
 #### `c` — case fold

From 198aa1f080d3d09493b9cf11869ccdc55bba4bd4 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 1 Aug 2020 21:46:59 +0200
Subject: [PATCH 4/4] Bump up version number

---
 CHANGELOG.md | 4 ++++
 geval.cabal  | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4f8cfaf..23eff90 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,8 @@
 
+## 1.36.1.0
+
+* Add "c" and "t" flags
+
 ## 1.36.0.0
 
 * Add fuzzy matching for MultiLabel-F1
diff --git a/geval.cabal b/geval.cabal
index 8009c99..8b88a10 100644
--- a/geval.cabal
+++ b/geval.cabal
@@ -1,5 +1,5 @@
 name:                geval
-version:             1.36.0.0
+version:             1.36.1.0
 synopsis:            Machine learning evaluation tools
 description:         Please see README.md
 homepage:            http://github.com/name/project