From 3ebe158e55c4bdb96b3709cdfef673867778a8d9 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 1 Aug 2020 21:27:04 +0200
Subject: [PATCH] Describe flags, add "c" and "t" flags.

Also add tests for flags
---
 README.md                                     | 259 +++++++++++++++++-
 src/GEval/EvaluationScheme.hs                 |  11 +-
 test/Spec.hs                                  |  21 ++
 .../flags-case-fold-solution/test-A/out.tsv   |  10 +
 .../flags-case-fold/config.txt                |   1 +
 .../flags-case-fold/test-A/expected.tsv       |  10 +
 .../flags-filtering-solution/test-A/out.tsv   |  10 +
 .../flags-filtering/config.txt                |   1 +
 .../flags-filtering/test-A/expected.tsv       |  10 +
 .../flags-filtering/test-A/in.tsv             |  10 +
 .../flags-lowercase-solution/test-A/out.tsv   |  10 +
 .../flags-lowercase/config.txt                |   1 +
 .../flags-lowercase/test-A/expected.tsv       |  10 +
 .../flags-none-solution/test-A/out.tsv        |  10 +
 test/flags-none/flags-none/config.txt         |   1 +
 .../flags-none/flags-none/test-A/expected.tsv |  10 +
 .../test-A/out.tsv                            |  10 +
 .../flags-regexp-matching-anchor/config.txt   |   1 +
 .../test-A/expected.tsv                       |  10 +
 .../test-A/out.tsv                            |  10 +
 .../flags-regexp-matching/config.txt          |   1 +
 .../flags-regexp-matching/test-A/expected.tsv |  10 +
 .../test-A/out.tsv                            |  10 +
 .../flags-regexp-substitution-ref/config.txt  |   1 +
 .../test-A/expected.tsv                       |  10 +
 .../test-A/out.tsv                            |  10 +
 .../flags-regexp-substitution/config.txt      |   1 +
 .../test-A/expected.tsv                       |  10 +
 .../test-A/out.tsv                            |  10 +
 .../config.txt                                |   1 +
 .../test-A/expected.tsv                       |  10 +
 .../test-A/out.tsv                            |  10 +
 .../flags-regexp-token-matching/config.txt    |   1 +
 .../test-A/expected.tsv                       |  10 +
 .../flags-uppercase-solution/test-A/out.tsv   |  10 +
 .../flags-uppercase/config.txt                |   1 +
 .../flags-uppercase/test-A/expected.tsv       |  10 +
 37 files changed, 530 insertions(+), 2 deletions(-)
 create mode 100644 test/flags-case-fold/flags-case-fold-solution/test-A/out.tsv
 create mode 100644 test/flags-case-fold/flags-case-fold/config.txt
 create mode 100644 test/flags-case-fold/flags-case-fold/test-A/expected.tsv
 create mode 100644 test/flags-filtering/flags-filtering-solution/test-A/out.tsv
 create mode 100644 test/flags-filtering/flags-filtering/config.txt
 create mode 100644 test/flags-filtering/flags-filtering/test-A/expected.tsv
 create mode 100644 test/flags-filtering/flags-filtering/test-A/in.tsv
 create mode 100644 test/flags-lowercase/flags-lowercase-solution/test-A/out.tsv
 create mode 100644 test/flags-lowercase/flags-lowercase/config.txt
 create mode 100644 test/flags-lowercase/flags-lowercase/test-A/expected.tsv
 create mode 100644 test/flags-none/flags-none-solution/test-A/out.tsv
 create mode 100644 test/flags-none/flags-none/config.txt
 create mode 100644 test/flags-none/flags-none/test-A/expected.tsv
 create mode 100644 test/flags-regexp-matching-anchor/flags-regexp-matching-anchor-solution/test-A/out.tsv
 create mode 100644 test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/config.txt
 create mode 100644 test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/test-A/expected.tsv
 create mode 100644 test/flags-regexp-matching/flags-regexp-matching-solution/test-A/out.tsv
 create mode 100644 test/flags-regexp-matching/flags-regexp-matching/config.txt
 create mode 100644 test/flags-regexp-matching/flags-regexp-matching/test-A/expected.tsv
 create mode 100644 test/flags-regexp-substitution-ref/flags-regexp-substitution-ref-solution/test-A/out.tsv
 create mode 100644 test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/config.txt
 create mode 100644 test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/test-A/expected.tsv
 create mode 100644 test/flags-regexp-substitution/flags-regexp-substitution-solution/test-A/out.tsv
 create mode 100644 test/flags-regexp-substitution/flags-regexp-substitution/config.txt
 create mode 100644 test/flags-regexp-substitution/flags-regexp-substitution/test-A/expected.tsv
 create mode 100644 test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor-solution/test-A/out.tsv
 create mode 100644 test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/config.txt
 create mode 100644 test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/test-A/expected.tsv
 create mode 100644 test/flags-regexp-token-matching/flags-regexp-token-matching-solution/test-A/out.tsv
 create mode 100644 test/flags-regexp-token-matching/flags-regexp-token-matching/config.txt
 create mode 100644 test/flags-regexp-token-matching/flags-regexp-token-matching/test-A/expected.tsv
 create mode 100644 test/flags-uppercase/flags-uppercase-solution/test-A/out.tsv
 create mode 100644 test/flags-uppercase/flags-uppercase/config.txt
 create mode 100644 test/flags-uppercase/flags-uppercase/test-A/expected.tsv

diff --git a/README.md b/README.md
index 3ed16b9..e410d12 100644
--- a/README.md
+++ b/README.md
@@ -88,7 +88,7 @@ to happen on macOS, as these packages are usually installed out of the box on Li
 In case the `lzma` package is not installed on your Linux, you need to run (assuming Debian/Ubuntu):
 
     sudo apt-get install pkg-config liblzma-dev libpq-dev libpcre3-dev libcairo2-dev libbz2-dev
-    
+
 #### Windows issues
 
 If you see this message on Windows during executing `stack test` command:
@@ -480,6 +480,263 @@ So now you can see that the accuracy is over 78% and the likelihood
     in<1>:Brytania	2	0.53333333	0.01357876718525224600
     in<1>:rewolucja	2	0.53333333	0.01357876718525224600
 
+## Metric flags
+
+GEval offers a number of *flags* to modify the way an evaluation
+metric is calculated or presented. For instance, if you use `BLEU:u`
+instead of `BLEU`, the BLEU metric (a standard metric for machine
+translation) will be evaluated on the actual and expected outputs
+upper-cased. In other words, flags can be used to _normalize_ the text
+before running the actual evaluation metric.
+
+Flags are given after a colon (`:`) and can be combined. Some flags
+can have arguments, they should be given in angle brackets (`<...>`).
+
+The following files will be used in example calculations, `expected.tsv`:
+
+    foo 123 bar
+    29008 Straße
+    xyz
+    aaa 3 4 bbb
+    qwerty 100
+    WWW WWW
+    test
+    104
+    BAR Foo baz
+    OK 7777
+
+`out.tsv`:
+
+    foo 999 BAR
+    29008 STRASSE
+    xyz
+    aaa BBB 34
+    qwerty 1000
+    WWW WWW WWW WWW WWW WWW WWW WWW
+    testtttttt
+    104
+    Foo baz BAR
+    Ok 7777
+
+`in.tsv`:
+
+
+
+Without any flags, the `Accuracy` metric is:
+
+    $ geval -o out.tsv -e expected.tsv --metric Accuracy
+    0.2
+
+(As only two items are correct: `xyz` and `104`.)
+
+### Case change
+
+#### `l` — lower-case
+
+    $ geval -o out.tsv -e expected.tsv --metric Accuracy:l
+    0.3
+
+#### `u` — upper-case
+
+    $ geval -o out.tsv -e expected.tsv --metric Accuracy:l
+    0.4
+
+Why the result is differnt for lower-casing and upper-casing? Some
+characters, e.g. German _ß_, are tricky. If you upper-case _Straße_
+you've got _STRASSE_, but if you lower-case it, you obtain _straße_,
+not _strasse_! For this reason, when you want to disregard case when
+evaluating your metric, it is better to case _case folding_ rather
+than lower- or upper-casing:
+
+#### `c` — case fold
+
+    $ geval -o out.tsv -e expected.tsv --metric Accuracy:c
+    0.4
+
+### Manipulations with regular expressions
+
+#### `m<REGEXP>` matching a given PCRE regexp
+
+The evaluation metric will be calculated only on the parts of the
+outputs matching a given regular expression. This can be used when you
+want to focus on some specific parts of a text. For instance, we could
+calculate Accuracy only considering (disregarding all other
+characters, including spaces).
+
+    $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:m<\d+>'
+    0.8
+
+(Note that apostrophes are due to using Bash here, if you put it into
+the `config.txt` file you should omit apostrophes: `--metric Accuracy:m<\d+>`.)
+
+All matches are considered and concatenated, if no match is found, an empty string is assumed
+(hence, e.g., `testtttttt` is considered a hit for `test` after this normalization).
+Note that both  `aaa 3 4 bbb` and `aaa BBB 34` will be normalized to `34` here.
+
+You can use regexp anchoring operators (`^` or `$`). This will refer
+to the beginning or end of the whole *line*. You could use it to
+calculate the accuracy considering only the first two characters of output lines:
+
+    $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:m<^..>'
+    0.8
+
+#### `t<REGEXP>` — filtering tokens using a PCRE regexp
+
+This applies a regexp for each token separately (tokens are seperated
+by spaces, you can use a non-standard tokenizer with the `--tokenizer` option if needed).
+All the tokens not matching the regexp are filtered out (but spaces are recovered).
+
+    $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:t<\d+>'
+    0.7
+
+Now, the anchoring operators refer to the beginning or end of a
+*token*. For instance, let's consider only tokens starting with _b_:
+
+    $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:t<^b>'
+    0.8
+
+With `m` or `t` flags you can only select parts of output lines. What
+if you want to do some replacements, e.g. collapse some
+characters/strings into a standard form? You should use the `s` flag for this:
+
+#### `s<REGEXP><REPLACEMENT>` — replace parts of output lines matching a regexp
+
+This will substitute all occurrences of strings matching REGEXP with
+REPLACEMENT. For instance, we could replace all numbers with a special token NUMBER.
+All the other parts of a line are left intact.
+
+    $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:s<\d+><NUMBER>'
+    0.3
+
+You can use special operators `\0`, `\1`, `\2` to refer to parts matched by the regexp.
+
+    $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:s<([A-Za-z])\S+><WORD-WITH-FIRST-LETTER-\1>'
+    0.5
+
+### Other normalizations
+
+#### `S` — sort all tokens
+
+This will sort all tokens, e.g. `foo bar baz` will be treated as `bar baz foo`.
+
+### Filtering
+
+#### `f<FEATURE>` — filtering
+
+Flags such as `u`, `m<...>`, `s<...><...>` etc. work within a line
+(item), they won't change the number items being evaluated. To
+consider only a subset of items, use the `f<FEATURE>` flag — only the
+lines containing the feature FEATURE will be considered during metric
+calculation. Features are the same as listed by the `--worst-features`
+option, e.g. `exp:foo` would accept only lines with the expected
+output containing the token `foo`, `in[2]:bar` — lines with the second
+columns of input contaning the token `bar` (contrary to
+`--worst-features` square brackets should be used be instead of angle ones for indexing).
+
+You *MUST* supply an input file when you use the `f<...>` flag. Assume
+the following `in.txt` file:
+
+    12	this aaa
+    32	this bbb
+    32	this ccc
+    12	that aaa
+    12	that aaa
+    10	that aaa
+    11	that
+    11	that
+    17	this
+    12	that
+
+    $ geval -o out.tsv -e expected.tsv -i in.tsv --metric 'Accuracy:f<in[2]:this>'
+    0.25
+
+### Presentation
+
+Some flags are used not for modifying the result, but rather changing
+the way it is presented by GEval (or the associated
+[Gonito](https://gonito.net) Web application).
+
+#### `N<NAME>` — use an alternative name
+
+Sometimes, the metric name gets complicated, you can use the `N<...>`
+to get a more human-readable way.
+
+This will be used:
+
+* by GEval when presenting results from more than one metric (when
+  only one metric is calculated, its name is not given anyway),
+* by Gonito, e.g. in table headers.
+
+    $ geval -o out.tsv -e expected.tsv --metric Accuracy --metric MultiLabel-F1:N<F-score> --metric 'MultiLabel-F0:N<Precision>' --metric 'MultiLabelF9999:N<Recall>'
+    Accuracy	0.200
+    F-score	0.511
+    Precision	0.462
+    Recall	0.571
+
+(GEval does not have separate Precision/Recall metrics, but they can
+be easily obtained by setting the parameter of the F-score to,
+respectively, 0 and a large number.)
+
+More than one name can be given. In such a case, or names will concatenated with spaces.
+
+    $ geval --precision 3 -o out.tsv -e expected.tsv --metric 'Accuracy' --metric 'MultiLabel-F1:N<F-score>N<on>N<tokens>'
+    Accuracy	0.200
+    F-score on tokens	0.511
+
+This is handy, when combined with the `{...}` operator (see below).
+
+#### `P<priority>` — set the priority (within the Gonito platform)
+
+This sets the priority level, considered when the results are displayed in the Gonito platform.
+It has no effect in GEval as such (it is simply disregarded in GEval).
+
+    $ geval --precision 3 -o out.tsv -e expected.tsv --metric 'Accuracy:P<1>' --metric 'MultiLabel-F1:P<3>'               Accuracy:P<1>	0.200
+    MultiLabel-F1.0:P<3>	0.511
+
+The priority is interpreted by Gonito in the following way:
+
+  * 1 — show everywhere, including the main leaderboard table
+  * 2 — show on the secondary leaderboard table and in detailed information for a submission
+  * 3 — show only in detailed information for a submission
+
+Although you can specify `P<...>` more than once, only the first value
+will be considered for a given metric (this might be important when combined with the `{...}` operator.
+
+### Combining flags
+
+Flags can be combined, just by concatenation (`:` should be given only once):
+
+    $ geval -o out.tsv -e expected.tsv -i in.tsv --metric Accuracy --metric 'Accuracy:f<in[2]:this>cs<\d><X>N<MyWeirdMetric>'
+    Accuracy	0.2
+    MyWeirdMetric	0.75
+
+Note that the order of flags might be sometimes significant, in
+general, they are considered from left to right.
+
+### Cartesian operator `{...}`
+
+Sometimes, you need to define a large number of similar metrics. Then
+you can use the special `{...}` operator interpreted by GEval (not
+Bash!). For instance `{foo,bar}xyz{aaa,bbb,ccc}` will be internally
+considered as the Cartesian product (i.e. you'll get all the
+combinations): `fooxyzaaa`, `fooxyzbbb`, `fooxyzccc`, `barxyzaaa`,
+`barxyzbbb`, `barxyzccc`.
+
+For example, let's assume that we want accuracy, F-score, precision
+and recall in both case-sensitive and case-insensitive versions.
+Here's the way to calculate all these 8 metrics in a concise manner:
+
+    $ geval --precision 3 -o out.tsv -e expected.tsv -i in.tsv --metric '{Accuracy:N<Acc>,MultiLabel-F1:N<F1>,MultiLabel-F0:N<P>,MultiLabel-F9999:N<R>}N<case>{N<sensitive>,cN<non-sensitive>}'
+        sensitive	non-sensitive
+    Acc case	0.200	0.400
+    F1 case	0.511	0.681
+    P case	0.462	0.615
+    R case	0.571	0.762
+
+Note that GEval automagically put the results in a table! (Well,
+_case_ probably should be written in headers, but, well, it generates
+the table totally on its own.)
+
 ## Handling headers
 
 When dealing with TSV files, you often face a dilemma whether to add a
diff --git a/src/GEval/EvaluationScheme.hs b/src/GEval/EvaluationScheme.hs
index 135f9d8..b335087 100644
--- a/src/GEval/EvaluationScheme.hs
+++ b/src/GEval/EvaluationScheme.hs
@@ -12,7 +12,7 @@ import GEval.Metric
 
 import Text.Regex.PCRE.Heavy
 import Text.Regex.PCRE.Light.Base (Regex(..))
-import Data.Text (Text(..), concat, toLower, toUpper, pack, unpack, words, unwords)
+import Data.Text (Text(..), concat, toCaseFold, toLower, toUpper, pack, unpack, words, unwords)
 import Data.List (intercalate, break, sort)
 import Data.Either
 import Data.Maybe (fromMaybe, catMaybes)
@@ -23,8 +23,10 @@ data EvaluationScheme = EvaluationScheme Metric [PreprocessingOperation]
   deriving (Eq)
 
 data PreprocessingOperation = RegexpMatch Regex
+                              | RegexpTokenMatch Regex
                               | LowerCasing
                               | UpperCasing
+                              | CaseFolding
                               | Sorting
                               | SetName Text
                               | SetPriority Int
@@ -51,7 +53,10 @@ readOps ('l':theRest) = (LowerCasing:ops, theRest')
     where (ops, theRest') = readOps theRest
 readOps ('u':theRest) = (UpperCasing:ops, theRest')
     where (ops, theRest') = readOps theRest
+readOps ('c':theRest) = (CaseFolding:ops, theRest')
+    where (ops, theRest') = readOps theRest
 readOps ('m':theRest) = handleParametrizedOp (RegexpMatch . (fromRight undefined) . ((flip compileM) []) . BSU.fromString) theRest
+readOps ('t':theRest) = handleParametrizedOp (RegexpTokenMatch . (fromRight undefined) . ((flip compileM) []) . BSU.fromString) theRest
 readOps ('S':theRest) = (Sorting:ops, theRest')
     where (ops, theRest') = readOps theRest
 readOps ('N':theRest) = handleParametrizedOp (SetName . pack) theRest
@@ -120,8 +125,10 @@ evaluationSchemeMetric (EvaluationScheme metric _) = metric
 
 instance Show PreprocessingOperation where
   show (RegexpMatch (Regex _ regexp)) = parametrizedOperation "m" (BSU.toString regexp)
+  show (RegexpTokenMatch (Regex _ regexp)) = parametrizedOperation "t" (BSU.toString regexp)
   show LowerCasing = "l"
   show UpperCasing = "u"
+  show CaseFolding = "c"
   show Sorting = "S"
   show (SetName t) = parametrizedOperation "N" (unpack t)
   show (SetPriority p) = parametrizedOperation "P" (show p)
@@ -154,8 +161,10 @@ applyPreprocessingOperations (EvaluationScheme _ operations) t = foldl (flip app
 
 applyPreprocessingOperation :: PreprocessingOperation -> Text -> Text
 applyPreprocessingOperation (RegexpMatch regex) = Data.Text.concat . (map fst) . (scan regex)
+applyPreprocessingOperation (RegexpTokenMatch regex) = Data.Text.unwords . (filter (≈ regex)) . Data.Text.words
 applyPreprocessingOperation LowerCasing = toLower
 applyPreprocessingOperation UpperCasing = toUpper
+applyPreprocessingOperation CaseFolding = toCaseFold
 applyPreprocessingOperation Sorting = Data.Text.unwords . sort . Data.Text.words
 applyPreprocessingOperation (SetName _) = id
 applyPreprocessingOperation (SetPriority _) = id
diff --git a/test/Spec.hs b/test/Spec.hs
index 43b8a3d..2730046 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -364,6 +364,27 @@ main = hspec $ do
       runGEvalTest "f1-with-preprocessing" `shouldReturnAlmost` 0.57142857142857
     it "Regexp substition" $ do
       runGEvalTest "accuracy-with-flags" `shouldReturnAlmost` 0.8
+  describe "Flag examples" $ do
+    it "none" $ do
+      runGEvalTest "flags-none" `shouldReturnAlmost` 0.2
+    it "lower-case" $ do
+      runGEvalTest "flags-lowercase" `shouldReturnAlmost` 0.3
+    it "upper-case" $ do
+      runGEvalTest "flags-uppercase" `shouldReturnAlmost` 0.4
+    it "regexp-matching" $ do
+      runGEvalTest "flags-regexp-matching" `shouldReturnAlmost` 0.8
+    it "regexp-matching-anchor" $ do
+      runGEvalTest "flags-regexp-matching-anchor" `shouldReturnAlmost` 0.8
+    it "regexp-token-matching" $ do
+      runGEvalTest "flags-regexp-token-matching" `shouldReturnAlmost` 0.7
+    it "regexp-token-matching-anchor" $ do
+      runGEvalTest "flags-regexp-token-matching-anchor" `shouldReturnAlmost` 0.8
+    it "regexp-substitution" $ do
+      runGEvalTest "flags-regexp-substitution" `shouldReturnAlmost` 0.3
+    it "regexp-substitution-ref" $ do
+      runGEvalTest "flags-regexp-substitution-ref" `shouldReturnAlmost` 0.5
+    it "filtering" $ do
+      runGEvalTest "flags-filtering" `shouldReturnAlmost` 0.25
   describe "evaluating single lines" $ do
     it "RMSE" $ do
       (MetricOutput (SimpleRun v) _) <- gevalCoreOnSingleLines RMSE id RawItemTarget
diff --git a/test/flags-case-fold/flags-case-fold-solution/test-A/out.tsv b/test/flags-case-fold/flags-case-fold-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-case-fold/flags-case-fold-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-case-fold/flags-case-fold/config.txt b/test/flags-case-fold/flags-case-fold/config.txt
new file mode 100644
index 0000000..82c1775
--- /dev/null
+++ b/test/flags-case-fold/flags-case-fold/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:c
diff --git a/test/flags-case-fold/flags-case-fold/test-A/expected.tsv b/test/flags-case-fold/flags-case-fold/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-case-fold/flags-case-fold/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-filtering/flags-filtering-solution/test-A/out.tsv b/test/flags-filtering/flags-filtering-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-filtering/flags-filtering-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-filtering/flags-filtering/config.txt b/test/flags-filtering/flags-filtering/config.txt
new file mode 100644
index 0000000..dfa6b66
--- /dev/null
+++ b/test/flags-filtering/flags-filtering/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:f<in[2]:this>
diff --git a/test/flags-filtering/flags-filtering/test-A/expected.tsv b/test/flags-filtering/flags-filtering/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-filtering/flags-filtering/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-filtering/flags-filtering/test-A/in.tsv b/test/flags-filtering/flags-filtering/test-A/in.tsv
new file mode 100644
index 0000000..72292ef
--- /dev/null
+++ b/test/flags-filtering/flags-filtering/test-A/in.tsv
@@ -0,0 +1,10 @@
+12	this aaa
+32	this bbb
+32	this ccc
+12	that aaa
+12	that aaa
+10	that aaa
+11	that
+11	that
+17	this
+12	that
diff --git a/test/flags-lowercase/flags-lowercase-solution/test-A/out.tsv b/test/flags-lowercase/flags-lowercase-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-lowercase/flags-lowercase-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-lowercase/flags-lowercase/config.txt b/test/flags-lowercase/flags-lowercase/config.txt
new file mode 100644
index 0000000..e3c75cd
--- /dev/null
+++ b/test/flags-lowercase/flags-lowercase/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:l
diff --git a/test/flags-lowercase/flags-lowercase/test-A/expected.tsv b/test/flags-lowercase/flags-lowercase/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-lowercase/flags-lowercase/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-none/flags-none-solution/test-A/out.tsv b/test/flags-none/flags-none-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-none/flags-none-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-none/flags-none/config.txt b/test/flags-none/flags-none/config.txt
new file mode 100644
index 0000000..337a0cc
--- /dev/null
+++ b/test/flags-none/flags-none/config.txt
@@ -0,0 +1 @@
+--metric Accuracy
diff --git a/test/flags-none/flags-none/test-A/expected.tsv b/test/flags-none/flags-none/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-none/flags-none/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor-solution/test-A/out.tsv b/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/config.txt b/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/config.txt
new file mode 100644
index 0000000..f420c60
--- /dev/null
+++ b/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:m<^..>
diff --git a/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/test-A/expected.tsv b/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-regexp-matching/flags-regexp-matching-solution/test-A/out.tsv b/test/flags-regexp-matching/flags-regexp-matching-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-regexp-matching/flags-regexp-matching-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-regexp-matching/flags-regexp-matching/config.txt b/test/flags-regexp-matching/flags-regexp-matching/config.txt
new file mode 100644
index 0000000..ce11160
--- /dev/null
+++ b/test/flags-regexp-matching/flags-regexp-matching/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:m<\d+>
diff --git a/test/flags-regexp-matching/flags-regexp-matching/test-A/expected.tsv b/test/flags-regexp-matching/flags-regexp-matching/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-regexp-matching/flags-regexp-matching/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref-solution/test-A/out.tsv b/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/config.txt b/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/config.txt
new file mode 100644
index 0000000..2fca6f8
--- /dev/null
+++ b/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:s<([A-Za-z])\S+><WORD-WITH-FIRST-LETTER-\1>
diff --git a/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/test-A/expected.tsv b/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-regexp-substitution/flags-regexp-substitution-solution/test-A/out.tsv b/test/flags-regexp-substitution/flags-regexp-substitution-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-regexp-substitution/flags-regexp-substitution-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-regexp-substitution/flags-regexp-substitution/config.txt b/test/flags-regexp-substitution/flags-regexp-substitution/config.txt
new file mode 100644
index 0000000..3d1a792
--- /dev/null
+++ b/test/flags-regexp-substitution/flags-regexp-substitution/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:s<\d+><NUMBER>
diff --git a/test/flags-regexp-substitution/flags-regexp-substitution/test-A/expected.tsv b/test/flags-regexp-substitution/flags-regexp-substitution/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-regexp-substitution/flags-regexp-substitution/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor-solution/test-A/out.tsv b/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/config.txt b/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/config.txt
new file mode 100644
index 0000000..8a33294
--- /dev/null
+++ b/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:t<^b>
diff --git a/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/test-A/expected.tsv b/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-regexp-token-matching/flags-regexp-token-matching-solution/test-A/out.tsv b/test/flags-regexp-token-matching/flags-regexp-token-matching-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-regexp-token-matching/flags-regexp-token-matching-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-regexp-token-matching/flags-regexp-token-matching/config.txt b/test/flags-regexp-token-matching/flags-regexp-token-matching/config.txt
new file mode 100644
index 0000000..3aa87e0
--- /dev/null
+++ b/test/flags-regexp-token-matching/flags-regexp-token-matching/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:t<\d+>
diff --git a/test/flags-regexp-token-matching/flags-regexp-token-matching/test-A/expected.tsv b/test/flags-regexp-token-matching/flags-regexp-token-matching/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-regexp-token-matching/flags-regexp-token-matching/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777
diff --git a/test/flags-uppercase/flags-uppercase-solution/test-A/out.tsv b/test/flags-uppercase/flags-uppercase-solution/test-A/out.tsv
new file mode 100644
index 0000000..d94143b
--- /dev/null
+++ b/test/flags-uppercase/flags-uppercase-solution/test-A/out.tsv
@@ -0,0 +1,10 @@
+foo 999 BAR
+29008 STRASSE
+xyz
+aaa BBB 34
+qwerty 1000
+WWW WWW WWW
+testtttttt
+104
+Foo baz BAR
+Ok 7777
diff --git a/test/flags-uppercase/flags-uppercase/config.txt b/test/flags-uppercase/flags-uppercase/config.txt
new file mode 100644
index 0000000..a94cfa3
--- /dev/null
+++ b/test/flags-uppercase/flags-uppercase/config.txt
@@ -0,0 +1 @@
+--metric Accuracy:u
diff --git a/test/flags-uppercase/flags-uppercase/test-A/expected.tsv b/test/flags-uppercase/flags-uppercase/test-A/expected.tsv
new file mode 100644
index 0000000..a95a323
--- /dev/null
+++ b/test/flags-uppercase/flags-uppercase/test-A/expected.tsv
@@ -0,0 +1,10 @@
+foo 123 bar
+29008 Straße
+xyz
+aaa 3 4 bbb
+qwerty 100
+WWW WWW
+test
+104
+BAR Foo baz
+OK 7777