From 3ebe158e55c4bdb96b3709cdfef673867778a8d9 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Sat, 1 Aug 2020 21:27:04 +0200 Subject: [PATCH] Describe flags, add "c" and "t" flags. Also add tests for flags --- README.md | 259 +++++++++++++++++- src/GEval/EvaluationScheme.hs | 11 +- test/Spec.hs | 21 ++ .../flags-case-fold-solution/test-A/out.tsv | 10 + .../flags-case-fold/config.txt | 1 + .../flags-case-fold/test-A/expected.tsv | 10 + .../flags-filtering-solution/test-A/out.tsv | 10 + .../flags-filtering/config.txt | 1 + .../flags-filtering/test-A/expected.tsv | 10 + .../flags-filtering/test-A/in.tsv | 10 + .../flags-lowercase-solution/test-A/out.tsv | 10 + .../flags-lowercase/config.txt | 1 + .../flags-lowercase/test-A/expected.tsv | 10 + .../flags-none-solution/test-A/out.tsv | 10 + test/flags-none/flags-none/config.txt | 1 + .../flags-none/flags-none/test-A/expected.tsv | 10 + .../test-A/out.tsv | 10 + .../flags-regexp-matching-anchor/config.txt | 1 + .../test-A/expected.tsv | 10 + .../test-A/out.tsv | 10 + .../flags-regexp-matching/config.txt | 1 + .../flags-regexp-matching/test-A/expected.tsv | 10 + .../test-A/out.tsv | 10 + .../flags-regexp-substitution-ref/config.txt | 1 + .../test-A/expected.tsv | 10 + .../test-A/out.tsv | 10 + .../flags-regexp-substitution/config.txt | 1 + .../test-A/expected.tsv | 10 + .../test-A/out.tsv | 10 + .../config.txt | 1 + .../test-A/expected.tsv | 10 + .../test-A/out.tsv | 10 + .../flags-regexp-token-matching/config.txt | 1 + .../test-A/expected.tsv | 10 + .../flags-uppercase-solution/test-A/out.tsv | 10 + .../flags-uppercase/config.txt | 1 + .../flags-uppercase/test-A/expected.tsv | 10 + 37 files changed, 530 insertions(+), 2 deletions(-) create mode 100644 test/flags-case-fold/flags-case-fold-solution/test-A/out.tsv create mode 100644 test/flags-case-fold/flags-case-fold/config.txt create mode 100644 test/flags-case-fold/flags-case-fold/test-A/expected.tsv create mode 100644 test/flags-filtering/flags-filtering-solution/test-A/out.tsv create mode 100644 test/flags-filtering/flags-filtering/config.txt create mode 100644 test/flags-filtering/flags-filtering/test-A/expected.tsv create mode 100644 test/flags-filtering/flags-filtering/test-A/in.tsv create mode 100644 test/flags-lowercase/flags-lowercase-solution/test-A/out.tsv create mode 100644 test/flags-lowercase/flags-lowercase/config.txt create mode 100644 test/flags-lowercase/flags-lowercase/test-A/expected.tsv create mode 100644 test/flags-none/flags-none-solution/test-A/out.tsv create mode 100644 test/flags-none/flags-none/config.txt create mode 100644 test/flags-none/flags-none/test-A/expected.tsv create mode 100644 test/flags-regexp-matching-anchor/flags-regexp-matching-anchor-solution/test-A/out.tsv create mode 100644 test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/config.txt create mode 100644 test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/test-A/expected.tsv create mode 100644 test/flags-regexp-matching/flags-regexp-matching-solution/test-A/out.tsv create mode 100644 test/flags-regexp-matching/flags-regexp-matching/config.txt create mode 100644 test/flags-regexp-matching/flags-regexp-matching/test-A/expected.tsv create mode 100644 test/flags-regexp-substitution-ref/flags-regexp-substitution-ref-solution/test-A/out.tsv create mode 100644 test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/config.txt create mode 100644 test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/test-A/expected.tsv create mode 100644 test/flags-regexp-substitution/flags-regexp-substitution-solution/test-A/out.tsv create mode 100644 test/flags-regexp-substitution/flags-regexp-substitution/config.txt create mode 100644 test/flags-regexp-substitution/flags-regexp-substitution/test-A/expected.tsv create mode 100644 test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor-solution/test-A/out.tsv create mode 100644 test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/config.txt create mode 100644 test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/test-A/expected.tsv create mode 100644 test/flags-regexp-token-matching/flags-regexp-token-matching-solution/test-A/out.tsv create mode 100644 test/flags-regexp-token-matching/flags-regexp-token-matching/config.txt create mode 100644 test/flags-regexp-token-matching/flags-regexp-token-matching/test-A/expected.tsv create mode 100644 test/flags-uppercase/flags-uppercase-solution/test-A/out.tsv create mode 100644 test/flags-uppercase/flags-uppercase/config.txt create mode 100644 test/flags-uppercase/flags-uppercase/test-A/expected.tsv diff --git a/README.md b/README.md index 3ed16b9..e410d12 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ to happen on macOS, as these packages are usually installed out of the box on Li In case the `lzma` package is not installed on your Linux, you need to run (assuming Debian/Ubuntu): sudo apt-get install pkg-config liblzma-dev libpq-dev libpcre3-dev libcairo2-dev libbz2-dev - + #### Windows issues If you see this message on Windows during executing `stack test` command: @@ -480,6 +480,263 @@ So now you can see that the accuracy is over 78% and the likelihood in<1>:Brytania 2 0.53333333 0.01357876718525224600 in<1>:rewolucja 2 0.53333333 0.01357876718525224600 +## Metric flags + +GEval offers a number of *flags* to modify the way an evaluation +metric is calculated or presented. For instance, if you use `BLEU:u` +instead of `BLEU`, the BLEU metric (a standard metric for machine +translation) will be evaluated on the actual and expected outputs +upper-cased. In other words, flags can be used to _normalize_ the text +before running the actual evaluation metric. + +Flags are given after a colon (`:`) and can be combined. Some flags +can have arguments, they should be given in angle brackets (`<...>`). + +The following files will be used in example calculations, `expected.tsv`: + + foo 123 bar + 29008 Straße + xyz + aaa 3 4 bbb + qwerty 100 + WWW WWW + test + 104 + BAR Foo baz + OK 7777 + +`out.tsv`: + + foo 999 BAR + 29008 STRASSE + xyz + aaa BBB 34 + qwerty 1000 + WWW WWW WWW WWW WWW WWW WWW WWW + testtttttt + 104 + Foo baz BAR + Ok 7777 + +`in.tsv`: + + + +Without any flags, the `Accuracy` metric is: + + $ geval -o out.tsv -e expected.tsv --metric Accuracy + 0.2 + +(As only two items are correct: `xyz` and `104`.) + +### Case change + +#### `l` — lower-case + + $ geval -o out.tsv -e expected.tsv --metric Accuracy:l + 0.3 + +#### `u` — upper-case + + $ geval -o out.tsv -e expected.tsv --metric Accuracy:l + 0.4 + +Why the result is differnt for lower-casing and upper-casing? Some +characters, e.g. German _ß_, are tricky. If you upper-case _Straße_ +you've got _STRASSE_, but if you lower-case it, you obtain _straße_, +not _strasse_! For this reason, when you want to disregard case when +evaluating your metric, it is better to case _case folding_ rather +than lower- or upper-casing: + +#### `c` — case fold + + $ geval -o out.tsv -e expected.tsv --metric Accuracy:c + 0.4 + +### Manipulations with regular expressions + +#### `m` matching a given PCRE regexp + +The evaluation metric will be calculated only on the parts of the +outputs matching a given regular expression. This can be used when you +want to focus on some specific parts of a text. For instance, we could +calculate Accuracy only considering (disregarding all other +characters, including spaces). + + $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:m<\d+>' + 0.8 + +(Note that apostrophes are due to using Bash here, if you put it into +the `config.txt` file you should omit apostrophes: `--metric Accuracy:m<\d+>`.) + +All matches are considered and concatenated, if no match is found, an empty string is assumed +(hence, e.g., `testtttttt` is considered a hit for `test` after this normalization). +Note that both `aaa 3 4 bbb` and `aaa BBB 34` will be normalized to `34` here. + +You can use regexp anchoring operators (`^` or `$`). This will refer +to the beginning or end of the whole *line*. You could use it to +calculate the accuracy considering only the first two characters of output lines: + + $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:m<^..>' + 0.8 + +#### `t` — filtering tokens using a PCRE regexp + +This applies a regexp for each token separately (tokens are seperated +by spaces, you can use a non-standard tokenizer with the `--tokenizer` option if needed). +All the tokens not matching the regexp are filtered out (but spaces are recovered). + + $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:t<\d+>' + 0.7 + +Now, the anchoring operators refer to the beginning or end of a +*token*. For instance, let's consider only tokens starting with _b_: + + $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:t<^b>' + 0.8 + +With `m` or `t` flags you can only select parts of output lines. What +if you want to do some replacements, e.g. collapse some +characters/strings into a standard form? You should use the `s` flag for this: + +#### `s` — replace parts of output lines matching a regexp + +This will substitute all occurrences of strings matching REGEXP with +REPLACEMENT. For instance, we could replace all numbers with a special token NUMBER. +All the other parts of a line are left intact. + + $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:s<\d+>' + 0.3 + +You can use special operators `\0`, `\1`, `\2` to refer to parts matched by the regexp. + + $ geval -o out.tsv -e expected.tsv --metric 'Accuracy:s<([A-Za-z])\S+>' + 0.5 + +### Other normalizations + +#### `S` — sort all tokens + +This will sort all tokens, e.g. `foo bar baz` will be treated as `bar baz foo`. + +### Filtering + +#### `f` — filtering + +Flags such as `u`, `m<...>`, `s<...><...>` etc. work within a line +(item), they won't change the number items being evaluated. To +consider only a subset of items, use the `f` flag — only the +lines containing the feature FEATURE will be considered during metric +calculation. Features are the same as listed by the `--worst-features` +option, e.g. `exp:foo` would accept only lines with the expected +output containing the token `foo`, `in[2]:bar` — lines with the second +columns of input contaning the token `bar` (contrary to +`--worst-features` square brackets should be used be instead of angle ones for indexing). + +You *MUST* supply an input file when you use the `f<...>` flag. Assume +the following `in.txt` file: + + 12 this aaa + 32 this bbb + 32 this ccc + 12 that aaa + 12 that aaa + 10 that aaa + 11 that + 11 that + 17 this + 12 that + + $ geval -o out.tsv -e expected.tsv -i in.tsv --metric 'Accuracy:f' + 0.25 + +### Presentation + +Some flags are used not for modifying the result, but rather changing +the way it is presented by GEval (or the associated +[Gonito](https://gonito.net) Web application). + +#### `N` — use an alternative name + +Sometimes, the metric name gets complicated, you can use the `N<...>` +to get a more human-readable way. + +This will be used: + +* by GEval when presenting results from more than one metric (when + only one metric is calculated, its name is not given anyway), +* by Gonito, e.g. in table headers. + + $ geval -o out.tsv -e expected.tsv --metric Accuracy --metric MultiLabel-F1:N --metric 'MultiLabel-F0:N' --metric 'MultiLabelF9999:N' + Accuracy 0.200 + F-score 0.511 + Precision 0.462 + Recall 0.571 + +(GEval does not have separate Precision/Recall metrics, but they can +be easily obtained by setting the parameter of the F-score to, +respectively, 0 and a large number.) + +More than one name can be given. In such a case, or names will concatenated with spaces. + + $ geval --precision 3 -o out.tsv -e expected.tsv --metric 'Accuracy' --metric 'MultiLabel-F1:NNN' + Accuracy 0.200 + F-score on tokens 0.511 + +This is handy, when combined with the `{...}` operator (see below). + +#### `P` — set the priority (within the Gonito platform) + +This sets the priority level, considered when the results are displayed in the Gonito platform. +It has no effect in GEval as such (it is simply disregarded in GEval). + + $ geval --precision 3 -o out.tsv -e expected.tsv --metric 'Accuracy:P<1>' --metric 'MultiLabel-F1:P<3>' Accuracy:P<1> 0.200 + MultiLabel-F1.0:P<3> 0.511 + +The priority is interpreted by Gonito in the following way: + + * 1 — show everywhere, including the main leaderboard table + * 2 — show on the secondary leaderboard table and in detailed information for a submission + * 3 — show only in detailed information for a submission + +Although you can specify `P<...>` more than once, only the first value +will be considered for a given metric (this might be important when combined with the `{...}` operator. + +### Combining flags + +Flags can be combined, just by concatenation (`:` should be given only once): + + $ geval -o out.tsv -e expected.tsv -i in.tsv --metric Accuracy --metric 'Accuracy:fcs<\d>N' + Accuracy 0.2 + MyWeirdMetric 0.75 + +Note that the order of flags might be sometimes significant, in +general, they are considered from left to right. + +### Cartesian operator `{...}` + +Sometimes, you need to define a large number of similar metrics. Then +you can use the special `{...}` operator interpreted by GEval (not +Bash!). For instance `{foo,bar}xyz{aaa,bbb,ccc}` will be internally +considered as the Cartesian product (i.e. you'll get all the +combinations): `fooxyzaaa`, `fooxyzbbb`, `fooxyzccc`, `barxyzaaa`, +`barxyzbbb`, `barxyzccc`. + +For example, let's assume that we want accuracy, F-score, precision +and recall in both case-sensitive and case-insensitive versions. +Here's the way to calculate all these 8 metrics in a concise manner: + + $ geval --precision 3 -o out.tsv -e expected.tsv -i in.tsv --metric '{Accuracy:N,MultiLabel-F1:N,MultiLabel-F0:N

,MultiLabel-F9999:N}N{N,cN}' + sensitive non-sensitive + Acc case 0.200 0.400 + F1 case 0.511 0.681 + P case 0.462 0.615 + R case 0.571 0.762 + +Note that GEval automagically put the results in a table! (Well, +_case_ probably should be written in headers, but, well, it generates +the table totally on its own.) + ## Handling headers When dealing with TSV files, you often face a dilemma whether to add a diff --git a/src/GEval/EvaluationScheme.hs b/src/GEval/EvaluationScheme.hs index 135f9d8..b335087 100644 --- a/src/GEval/EvaluationScheme.hs +++ b/src/GEval/EvaluationScheme.hs @@ -12,7 +12,7 @@ import GEval.Metric import Text.Regex.PCRE.Heavy import Text.Regex.PCRE.Light.Base (Regex(..)) -import Data.Text (Text(..), concat, toLower, toUpper, pack, unpack, words, unwords) +import Data.Text (Text(..), concat, toCaseFold, toLower, toUpper, pack, unpack, words, unwords) import Data.List (intercalate, break, sort) import Data.Either import Data.Maybe (fromMaybe, catMaybes) @@ -23,8 +23,10 @@ data EvaluationScheme = EvaluationScheme Metric [PreprocessingOperation] deriving (Eq) data PreprocessingOperation = RegexpMatch Regex + | RegexpTokenMatch Regex | LowerCasing | UpperCasing + | CaseFolding | Sorting | SetName Text | SetPriority Int @@ -51,7 +53,10 @@ readOps ('l':theRest) = (LowerCasing:ops, theRest') where (ops, theRest') = readOps theRest readOps ('u':theRest) = (UpperCasing:ops, theRest') where (ops, theRest') = readOps theRest +readOps ('c':theRest) = (CaseFolding:ops, theRest') + where (ops, theRest') = readOps theRest readOps ('m':theRest) = handleParametrizedOp (RegexpMatch . (fromRight undefined) . ((flip compileM) []) . BSU.fromString) theRest +readOps ('t':theRest) = handleParametrizedOp (RegexpTokenMatch . (fromRight undefined) . ((flip compileM) []) . BSU.fromString) theRest readOps ('S':theRest) = (Sorting:ops, theRest') where (ops, theRest') = readOps theRest readOps ('N':theRest) = handleParametrizedOp (SetName . pack) theRest @@ -120,8 +125,10 @@ evaluationSchemeMetric (EvaluationScheme metric _) = metric instance Show PreprocessingOperation where show (RegexpMatch (Regex _ regexp)) = parametrizedOperation "m" (BSU.toString regexp) + show (RegexpTokenMatch (Regex _ regexp)) = parametrizedOperation "t" (BSU.toString regexp) show LowerCasing = "l" show UpperCasing = "u" + show CaseFolding = "c" show Sorting = "S" show (SetName t) = parametrizedOperation "N" (unpack t) show (SetPriority p) = parametrizedOperation "P" (show p) @@ -154,8 +161,10 @@ applyPreprocessingOperations (EvaluationScheme _ operations) t = foldl (flip app applyPreprocessingOperation :: PreprocessingOperation -> Text -> Text applyPreprocessingOperation (RegexpMatch regex) = Data.Text.concat . (map fst) . (scan regex) +applyPreprocessingOperation (RegexpTokenMatch regex) = Data.Text.unwords . (filter (≈ regex)) . Data.Text.words applyPreprocessingOperation LowerCasing = toLower applyPreprocessingOperation UpperCasing = toUpper +applyPreprocessingOperation CaseFolding = toCaseFold applyPreprocessingOperation Sorting = Data.Text.unwords . sort . Data.Text.words applyPreprocessingOperation (SetName _) = id applyPreprocessingOperation (SetPriority _) = id diff --git a/test/Spec.hs b/test/Spec.hs index 43b8a3d..2730046 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -364,6 +364,27 @@ main = hspec $ do runGEvalTest "f1-with-preprocessing" `shouldReturnAlmost` 0.57142857142857 it "Regexp substition" $ do runGEvalTest "accuracy-with-flags" `shouldReturnAlmost` 0.8 + describe "Flag examples" $ do + it "none" $ do + runGEvalTest "flags-none" `shouldReturnAlmost` 0.2 + it "lower-case" $ do + runGEvalTest "flags-lowercase" `shouldReturnAlmost` 0.3 + it "upper-case" $ do + runGEvalTest "flags-uppercase" `shouldReturnAlmost` 0.4 + it "regexp-matching" $ do + runGEvalTest "flags-regexp-matching" `shouldReturnAlmost` 0.8 + it "regexp-matching-anchor" $ do + runGEvalTest "flags-regexp-matching-anchor" `shouldReturnAlmost` 0.8 + it "regexp-token-matching" $ do + runGEvalTest "flags-regexp-token-matching" `shouldReturnAlmost` 0.7 + it "regexp-token-matching-anchor" $ do + runGEvalTest "flags-regexp-token-matching-anchor" `shouldReturnAlmost` 0.8 + it "regexp-substitution" $ do + runGEvalTest "flags-regexp-substitution" `shouldReturnAlmost` 0.3 + it "regexp-substitution-ref" $ do + runGEvalTest "flags-regexp-substitution-ref" `shouldReturnAlmost` 0.5 + it "filtering" $ do + runGEvalTest "flags-filtering" `shouldReturnAlmost` 0.25 describe "evaluating single lines" $ do it "RMSE" $ do (MetricOutput (SimpleRun v) _) <- gevalCoreOnSingleLines RMSE id RawItemTarget diff --git a/test/flags-case-fold/flags-case-fold-solution/test-A/out.tsv b/test/flags-case-fold/flags-case-fold-solution/test-A/out.tsv new file mode 100644 index 0000000..d94143b --- /dev/null +++ b/test/flags-case-fold/flags-case-fold-solution/test-A/out.tsv @@ -0,0 +1,10 @@ +foo 999 BAR +29008 STRASSE +xyz +aaa BBB 34 +qwerty 1000 +WWW WWW WWW +testtttttt +104 +Foo baz BAR +Ok 7777 diff --git a/test/flags-case-fold/flags-case-fold/config.txt b/test/flags-case-fold/flags-case-fold/config.txt new file mode 100644 index 0000000..82c1775 --- /dev/null +++ b/test/flags-case-fold/flags-case-fold/config.txt @@ -0,0 +1 @@ +--metric Accuracy:c diff --git a/test/flags-case-fold/flags-case-fold/test-A/expected.tsv b/test/flags-case-fold/flags-case-fold/test-A/expected.tsv new file mode 100644 index 0000000..a95a323 --- /dev/null +++ b/test/flags-case-fold/flags-case-fold/test-A/expected.tsv @@ -0,0 +1,10 @@ +foo 123 bar +29008 Straße +xyz +aaa 3 4 bbb +qwerty 100 +WWW WWW +test +104 +BAR Foo baz +OK 7777 diff --git a/test/flags-filtering/flags-filtering-solution/test-A/out.tsv b/test/flags-filtering/flags-filtering-solution/test-A/out.tsv new file mode 100644 index 0000000..d94143b --- /dev/null +++ b/test/flags-filtering/flags-filtering-solution/test-A/out.tsv @@ -0,0 +1,10 @@ +foo 999 BAR +29008 STRASSE +xyz +aaa BBB 34 +qwerty 1000 +WWW WWW WWW +testtttttt +104 +Foo baz BAR +Ok 7777 diff --git a/test/flags-filtering/flags-filtering/config.txt b/test/flags-filtering/flags-filtering/config.txt new file mode 100644 index 0000000..dfa6b66 --- /dev/null +++ b/test/flags-filtering/flags-filtering/config.txt @@ -0,0 +1 @@ +--metric Accuracy:f diff --git a/test/flags-filtering/flags-filtering/test-A/expected.tsv b/test/flags-filtering/flags-filtering/test-A/expected.tsv new file mode 100644 index 0000000..a95a323 --- /dev/null +++ b/test/flags-filtering/flags-filtering/test-A/expected.tsv @@ -0,0 +1,10 @@ +foo 123 bar +29008 Straße +xyz +aaa 3 4 bbb +qwerty 100 +WWW WWW +test +104 +BAR Foo baz +OK 7777 diff --git a/test/flags-filtering/flags-filtering/test-A/in.tsv b/test/flags-filtering/flags-filtering/test-A/in.tsv new file mode 100644 index 0000000..72292ef --- /dev/null +++ b/test/flags-filtering/flags-filtering/test-A/in.tsv @@ -0,0 +1,10 @@ +12 this aaa +32 this bbb +32 this ccc +12 that aaa +12 that aaa +10 that aaa +11 that +11 that +17 this +12 that diff --git a/test/flags-lowercase/flags-lowercase-solution/test-A/out.tsv b/test/flags-lowercase/flags-lowercase-solution/test-A/out.tsv new file mode 100644 index 0000000..d94143b --- /dev/null +++ b/test/flags-lowercase/flags-lowercase-solution/test-A/out.tsv @@ -0,0 +1,10 @@ +foo 999 BAR +29008 STRASSE +xyz +aaa BBB 34 +qwerty 1000 +WWW WWW WWW +testtttttt +104 +Foo baz BAR +Ok 7777 diff --git a/test/flags-lowercase/flags-lowercase/config.txt b/test/flags-lowercase/flags-lowercase/config.txt new file mode 100644 index 0000000..e3c75cd --- /dev/null +++ b/test/flags-lowercase/flags-lowercase/config.txt @@ -0,0 +1 @@ +--metric Accuracy:l diff --git a/test/flags-lowercase/flags-lowercase/test-A/expected.tsv b/test/flags-lowercase/flags-lowercase/test-A/expected.tsv new file mode 100644 index 0000000..a95a323 --- /dev/null +++ b/test/flags-lowercase/flags-lowercase/test-A/expected.tsv @@ -0,0 +1,10 @@ +foo 123 bar +29008 Straße +xyz +aaa 3 4 bbb +qwerty 100 +WWW WWW +test +104 +BAR Foo baz +OK 7777 diff --git a/test/flags-none/flags-none-solution/test-A/out.tsv b/test/flags-none/flags-none-solution/test-A/out.tsv new file mode 100644 index 0000000..d94143b --- /dev/null +++ b/test/flags-none/flags-none-solution/test-A/out.tsv @@ -0,0 +1,10 @@ +foo 999 BAR +29008 STRASSE +xyz +aaa BBB 34 +qwerty 1000 +WWW WWW WWW +testtttttt +104 +Foo baz BAR +Ok 7777 diff --git a/test/flags-none/flags-none/config.txt b/test/flags-none/flags-none/config.txt new file mode 100644 index 0000000..337a0cc --- /dev/null +++ b/test/flags-none/flags-none/config.txt @@ -0,0 +1 @@ +--metric Accuracy diff --git a/test/flags-none/flags-none/test-A/expected.tsv b/test/flags-none/flags-none/test-A/expected.tsv new file mode 100644 index 0000000..a95a323 --- /dev/null +++ b/test/flags-none/flags-none/test-A/expected.tsv @@ -0,0 +1,10 @@ +foo 123 bar +29008 Straße +xyz +aaa 3 4 bbb +qwerty 100 +WWW WWW +test +104 +BAR Foo baz +OK 7777 diff --git a/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor-solution/test-A/out.tsv b/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor-solution/test-A/out.tsv new file mode 100644 index 0000000..d94143b --- /dev/null +++ b/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor-solution/test-A/out.tsv @@ -0,0 +1,10 @@ +foo 999 BAR +29008 STRASSE +xyz +aaa BBB 34 +qwerty 1000 +WWW WWW WWW +testtttttt +104 +Foo baz BAR +Ok 7777 diff --git a/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/config.txt b/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/config.txt new file mode 100644 index 0000000..f420c60 --- /dev/null +++ b/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/config.txt @@ -0,0 +1 @@ +--metric Accuracy:m<^..> diff --git a/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/test-A/expected.tsv b/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/test-A/expected.tsv new file mode 100644 index 0000000..a95a323 --- /dev/null +++ b/test/flags-regexp-matching-anchor/flags-regexp-matching-anchor/test-A/expected.tsv @@ -0,0 +1,10 @@ +foo 123 bar +29008 Straße +xyz +aaa 3 4 bbb +qwerty 100 +WWW WWW +test +104 +BAR Foo baz +OK 7777 diff --git a/test/flags-regexp-matching/flags-regexp-matching-solution/test-A/out.tsv b/test/flags-regexp-matching/flags-regexp-matching-solution/test-A/out.tsv new file mode 100644 index 0000000..d94143b --- /dev/null +++ b/test/flags-regexp-matching/flags-regexp-matching-solution/test-A/out.tsv @@ -0,0 +1,10 @@ +foo 999 BAR +29008 STRASSE +xyz +aaa BBB 34 +qwerty 1000 +WWW WWW WWW +testtttttt +104 +Foo baz BAR +Ok 7777 diff --git a/test/flags-regexp-matching/flags-regexp-matching/config.txt b/test/flags-regexp-matching/flags-regexp-matching/config.txt new file mode 100644 index 0000000..ce11160 --- /dev/null +++ b/test/flags-regexp-matching/flags-regexp-matching/config.txt @@ -0,0 +1 @@ +--metric Accuracy:m<\d+> diff --git a/test/flags-regexp-matching/flags-regexp-matching/test-A/expected.tsv b/test/flags-regexp-matching/flags-regexp-matching/test-A/expected.tsv new file mode 100644 index 0000000..a95a323 --- /dev/null +++ b/test/flags-regexp-matching/flags-regexp-matching/test-A/expected.tsv @@ -0,0 +1,10 @@ +foo 123 bar +29008 Straße +xyz +aaa 3 4 bbb +qwerty 100 +WWW WWW +test +104 +BAR Foo baz +OK 7777 diff --git a/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref-solution/test-A/out.tsv b/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref-solution/test-A/out.tsv new file mode 100644 index 0000000..d94143b --- /dev/null +++ b/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref-solution/test-A/out.tsv @@ -0,0 +1,10 @@ +foo 999 BAR +29008 STRASSE +xyz +aaa BBB 34 +qwerty 1000 +WWW WWW WWW +testtttttt +104 +Foo baz BAR +Ok 7777 diff --git a/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/config.txt b/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/config.txt new file mode 100644 index 0000000..2fca6f8 --- /dev/null +++ b/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/config.txt @@ -0,0 +1 @@ +--metric Accuracy:s<([A-Za-z])\S+> diff --git a/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/test-A/expected.tsv b/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/test-A/expected.tsv new file mode 100644 index 0000000..a95a323 --- /dev/null +++ b/test/flags-regexp-substitution-ref/flags-regexp-substitution-ref/test-A/expected.tsv @@ -0,0 +1,10 @@ +foo 123 bar +29008 Straße +xyz +aaa 3 4 bbb +qwerty 100 +WWW WWW +test +104 +BAR Foo baz +OK 7777 diff --git a/test/flags-regexp-substitution/flags-regexp-substitution-solution/test-A/out.tsv b/test/flags-regexp-substitution/flags-regexp-substitution-solution/test-A/out.tsv new file mode 100644 index 0000000..d94143b --- /dev/null +++ b/test/flags-regexp-substitution/flags-regexp-substitution-solution/test-A/out.tsv @@ -0,0 +1,10 @@ +foo 999 BAR +29008 STRASSE +xyz +aaa BBB 34 +qwerty 1000 +WWW WWW WWW +testtttttt +104 +Foo baz BAR +Ok 7777 diff --git a/test/flags-regexp-substitution/flags-regexp-substitution/config.txt b/test/flags-regexp-substitution/flags-regexp-substitution/config.txt new file mode 100644 index 0000000..3d1a792 --- /dev/null +++ b/test/flags-regexp-substitution/flags-regexp-substitution/config.txt @@ -0,0 +1 @@ +--metric Accuracy:s<\d+> diff --git a/test/flags-regexp-substitution/flags-regexp-substitution/test-A/expected.tsv b/test/flags-regexp-substitution/flags-regexp-substitution/test-A/expected.tsv new file mode 100644 index 0000000..a95a323 --- /dev/null +++ b/test/flags-regexp-substitution/flags-regexp-substitution/test-A/expected.tsv @@ -0,0 +1,10 @@ +foo 123 bar +29008 Straße +xyz +aaa 3 4 bbb +qwerty 100 +WWW WWW +test +104 +BAR Foo baz +OK 7777 diff --git a/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor-solution/test-A/out.tsv b/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor-solution/test-A/out.tsv new file mode 100644 index 0000000..d94143b --- /dev/null +++ b/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor-solution/test-A/out.tsv @@ -0,0 +1,10 @@ +foo 999 BAR +29008 STRASSE +xyz +aaa BBB 34 +qwerty 1000 +WWW WWW WWW +testtttttt +104 +Foo baz BAR +Ok 7777 diff --git a/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/config.txt b/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/config.txt new file mode 100644 index 0000000..8a33294 --- /dev/null +++ b/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/config.txt @@ -0,0 +1 @@ +--metric Accuracy:t<^b> diff --git a/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/test-A/expected.tsv b/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/test-A/expected.tsv new file mode 100644 index 0000000..a95a323 --- /dev/null +++ b/test/flags-regexp-token-matching-anchor/flags-regexp-token-matching-anchor/test-A/expected.tsv @@ -0,0 +1,10 @@ +foo 123 bar +29008 Straße +xyz +aaa 3 4 bbb +qwerty 100 +WWW WWW +test +104 +BAR Foo baz +OK 7777 diff --git a/test/flags-regexp-token-matching/flags-regexp-token-matching-solution/test-A/out.tsv b/test/flags-regexp-token-matching/flags-regexp-token-matching-solution/test-A/out.tsv new file mode 100644 index 0000000..d94143b --- /dev/null +++ b/test/flags-regexp-token-matching/flags-regexp-token-matching-solution/test-A/out.tsv @@ -0,0 +1,10 @@ +foo 999 BAR +29008 STRASSE +xyz +aaa BBB 34 +qwerty 1000 +WWW WWW WWW +testtttttt +104 +Foo baz BAR +Ok 7777 diff --git a/test/flags-regexp-token-matching/flags-regexp-token-matching/config.txt b/test/flags-regexp-token-matching/flags-regexp-token-matching/config.txt new file mode 100644 index 0000000..3aa87e0 --- /dev/null +++ b/test/flags-regexp-token-matching/flags-regexp-token-matching/config.txt @@ -0,0 +1 @@ +--metric Accuracy:t<\d+> diff --git a/test/flags-regexp-token-matching/flags-regexp-token-matching/test-A/expected.tsv b/test/flags-regexp-token-matching/flags-regexp-token-matching/test-A/expected.tsv new file mode 100644 index 0000000..a95a323 --- /dev/null +++ b/test/flags-regexp-token-matching/flags-regexp-token-matching/test-A/expected.tsv @@ -0,0 +1,10 @@ +foo 123 bar +29008 Straße +xyz +aaa 3 4 bbb +qwerty 100 +WWW WWW +test +104 +BAR Foo baz +OK 7777 diff --git a/test/flags-uppercase/flags-uppercase-solution/test-A/out.tsv b/test/flags-uppercase/flags-uppercase-solution/test-A/out.tsv new file mode 100644 index 0000000..d94143b --- /dev/null +++ b/test/flags-uppercase/flags-uppercase-solution/test-A/out.tsv @@ -0,0 +1,10 @@ +foo 999 BAR +29008 STRASSE +xyz +aaa BBB 34 +qwerty 1000 +WWW WWW WWW +testtttttt +104 +Foo baz BAR +Ok 7777 diff --git a/test/flags-uppercase/flags-uppercase/config.txt b/test/flags-uppercase/flags-uppercase/config.txt new file mode 100644 index 0000000..a94cfa3 --- /dev/null +++ b/test/flags-uppercase/flags-uppercase/config.txt @@ -0,0 +1 @@ +--metric Accuracy:u diff --git a/test/flags-uppercase/flags-uppercase/test-A/expected.tsv b/test/flags-uppercase/flags-uppercase/test-A/expected.tsv new file mode 100644 index 0000000..a95a323 --- /dev/null +++ b/test/flags-uppercase/flags-uppercase/test-A/expected.tsv @@ -0,0 +1,10 @@ +foo 123 bar +29008 Straße +xyz +aaa 3 4 bbb +qwerty 100 +WWW WWW +test +104 +BAR Foo baz +OK 7777