From 3ed1b98e28c7e66ed9267a8e90166d11ff47438e Mon Sep 17 00:00:00 2001 From: Dawid Majsnerowski Date: Sat, 19 Oct 2019 22:29:05 +0200 Subject: [PATCH 1/7] Windows instal description --- README.md | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1d60508..93c0a27 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,8 @@ order to run `geval` you need to either add `$HOME/.local/bin` to `$PATH` in your configuration or to type: PATH="$HOME/.local/bin" geval ... + +In Windows you should add new global variable with name 'geval' and path should be the same as above. ### Troubleshooting @@ -63,6 +65,31 @@ to happen on macOS, as these packages are usually installed out of the box on Li In case the lzma package is not installed on your Linux, you need to run (assuming Debian/Ubuntu): sudo apt-get install pkg-config liblzma-dev libpq-dev libpcre3-dev + +If you see this message on Windows during executing `stack test` command: + + In the dependencies for geval-1.21.1.0: +     unix needed, but the stack configuration has no specified version + In the dependencies for lzma-0.0.0.3: +     lzma-clib needed, but the stack configuration has no specified version + +You should replace `unix` with `unix-compat` in `geval.cabal` file, +because `unix` package is not supported for Windows. + +And you should add `lzma-clib-5.2.2` and `unix-compat-0.5.2` to section extra-deps in `stack.yaml` file. + +If you see message about missing pkg-config on Windpws you should download two packages from the site: +http://ftp.gnome.org/pub/gnome/binaries/win32/dependencies/ +These packages are: + - pkg-config (newest version) + - gettext-runtime (newest version) +Extract `pkg-config.exe` file in Windows PATH +Extract init.dll file from gettext-runtime + +You should also download from http://ftp.gnome.org/pub/gnome/binaries/win32/glib/2.28 glib package +and extract libglib-2.0-0.dll file. + +All files you should put for example in `C:\MinGW\bin` directory. ### Plan B — just download the GEval binary @@ -72,7 +99,17 @@ In case the lzma package is not installed on your Linux, you need to run (assumi chmod u+x geval ./geval --help -This is a fully static binary, it should work on any 64-bit Linux. + +For Windows you should use Windows Powershell. + + wget https://gonito.net/get/bin/geval + +Next you should go to the folder where you download `geval` and right-click to `geval` file. +Go to `Properties` and in the section `Security` grant full access to the folder. + +Or you should use `icacls "folder path to geval" /grant USER:` + +This is a fully static binary, it should work on any 64-bit Linux or 64-bit Windows. ## Quick tour From 3001803c567b4e7a7a1217e03f7fa93f365a641c Mon Sep 17 00:00:00 2001 From: huntekah Date: Tue, 29 Oct 2019 19:12:26 +0000 Subject: [PATCH 2/7] Add GLEU metric description #29 --- src/GEval/CreateChallenge.hs | 7 +++++-- src/GEval/MetricsMeta.hs | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index 85430eb..8b501a3 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -624,7 +624,8 @@ devExpectedContents _ = [hereLit|0.82 |] testInContents :: Metric -> String -testInContents GLEU = testInContents BLEU +testInContents GLEU = [hereLit|Alice has a black +|] testInContents BLEU = [hereLit|ja jumala kutsui valkeuden päiväksi , ja pimeyden hän kutsui yöksi ja tuli ehtoo , ja tuli aamu , ensimmäinen päivä |] @@ -690,7 +691,6 @@ testInContents _ = [hereLit|0.72 0 0.007 |] testExpectedContents :: Metric -> String -testExpectedContents GLEU = testExpectedContents BLEU testExpectedContents BLEU = [hereLit|na ka huaina e te atua te marama ko te awatea , a ko te pouri i huaina e ia ko te po a ko te ahiahi , ko te ata , he ra kotahi |] @@ -753,10 +753,13 @@ bar:1/50,50,1000,1000 testExpectedContents ClippEU = [hereLit|3/0,0,100,100/10 1/10,10,1000,1000/10 |] +testExpectedContents GLEU = [hereLit|Alice has a black cat +|] testExpectedContents _ = [hereLit|0.11 17.2 |] + gitignoreContents :: String gitignoreContents = [hereLit| *~ diff --git a/src/GEval/MetricsMeta.hs b/src/GEval/MetricsMeta.hs index 2158ba9..8747bd9 100644 --- a/src/GEval/MetricsMeta.hs +++ b/src/GEval/MetricsMeta.hs @@ -93,6 +93,7 @@ isMetricDescribed :: Metric -> Bool isMetricDescribed (SoftFMeasure _) = True isMetricDescribed (Soft2DFMeasure _) = True isMetricDescribed (ProbabilisticMultiLabelFMeasure _) = True +isMetricDescribed GLEU = True isMetricDescribed _ = False getEvaluationSchemeDescription :: EvaluationScheme -> String @@ -118,7 +119,21 @@ where calibration measures the quality of probabilities (how well they are calib if we have 10 items with probability 0.5 and 5 of them are correct, then the calibration is perfect. |] - +getMetricDescription GLEU = + [i|For the GLEU score, we record all sub-sequences of +1, 2, 3 or 4 tokens in output and target sequence (n-grams). We then +compute a recall, which is the ratio of the number of matching n-grams +to the number of total n-grams in the target (ground truth) sequence, +and a precision, which is the ratio of the number of matching n-grams +to the number of total n-grams in the generated output sequence. Then +GLEU score is simply the minimum of recall and precision. This GLEU +score's range is always between 0 (no matches) and 1 (all match) and +it is symmetrical when switching output and target. According to +the article, GLEU score correlates quite well with the BLEU +metric on a corpus level but does not have its drawbacks for our per +sentence reward objective. +see: https://arxiv.org/pdf/1609.08144.pdf +|] outContents :: Metric -> String @@ -132,6 +147,8 @@ outContents (ProbabilisticMultiLabelFMeasure _) = [hereLit|first-name/1:0.8 surn surname/1:0.4 first-name/3:0.9 |] +outContents GLEU = [hereLit|Alice has a black +|] expectedScore :: EvaluationScheme -> MetricValue expectedScore (EvaluationScheme (SoftFMeasure beta) []) @@ -146,6 +163,8 @@ expectedScore (EvaluationScheme (ProbabilisticMultiLabelFMeasure beta) []) = let precision = 0.6569596940847289 recall = 0.675 in weightedHarmonicMean beta precision recall +expectedScore (EvaluationScheme GLEU []) + = 0.7142857142857143 helpMetricParameterMetricsList :: String helpMetricParameterMetricsList = intercalate ", " $ map (\s -> (show s) ++ (case extraInfo s of @@ -194,7 +213,8 @@ the form LABEL:PAGE/X0,Y0,X1,Y1 where LABEL is any label, page is the page numbe formatDescription (ProbabilisticMultiLabelFMeasure _) = [hereLit|In each line a number of labels (entities) can be given. A label probability can be provided with a colon (e.g. "foo:0.7"). By default, 1.0 is assumed. |] - +formatDescription GLEU = [hereLit|In each line a there is a space sparated sentence of words. +|] scoreExplanation :: EvaluationScheme -> Maybe String scoreExplanation (EvaluationScheme (SoftFMeasure _) []) @@ -206,6 +226,14 @@ As far as the second item is concerned, the total area that covered by the outpu Hence, recall is 247500/902500=0.274 and precision - 247500/(20000+912000+240000)=0.211. Therefore, the F-score for the second item is 0.238 and the F-score for the whole set is (0 + 0.238)/2 = 0.119.|] scoreExplanation (EvaluationScheme (ProbabilisticMultiLabelFMeasure _) []) = Nothing +scoreExplanation (EvaluationScheme GLEU []) + = Just [hereLit|To find out GLEU score we first count number of tp (true positives) fp(false positives) and fn(false negatives). + We have 4 matching unigrams ("Alice", "has", "a", "black") , 3 bigrams ("Alice has", "has a", "a black"), 2 trigrams ("Alice has a", "has a black") and 1 tetragram ("Alice has a black"), +so tp=10. We have no fp, therefore fp=0. There are 4 fn - ("cat", "black cat", "a black cat", "has a black cat"). +Now we have to calculate precision and recall: + Precision is tp / (tp+fp) = 10/(10+0) = 1, + recall is tp / (tp+fn) = 10 / (10+4) = 10/14 =~ 0.71428... + The GLEU score is min(precision,recall)=0.71428 |] pasteLines :: String -> String -> String pasteLines a b = printf "%-35s %s\n" a b From fb74f568bbfdfe32d38785bcd70577ba7a14a777 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Sat, 16 Nov 2019 10:27:03 +0100 Subject: [PATCH 3/7] Improvements in README --- README.md | 63 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 93c0a27..026ec26 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ GEval is a Haskell library and a stand-alone tool for evaluating the results of solutions to machine learning challenges as defined in the -[Gonito](https://gonito.net) platform. Also could be used outside the +[Gonito](https://gonito.net) platform. Also, could be used outside the context of Gonito.net challenges, assuming the test data is given in simple TSV (tab-separated values) files. @@ -14,6 +14,29 @@ The official repository is `git://gonito.net/geval`, browsable at ## Installing +### The easy way: just download the fully static GEval binary + +(Assuming you have a 64-bit Linux.) + + wget https://gonito.net/get/bin/geval + chmod u+x geval + ./geval --help + +#### On Windows + +For Windows, you should use Windows PowerShell. + + wget https://gonito.net/get/bin/geval + +Next, you should go to the folder where you download `geval` and right-click to `geval` file. +Go to `Properties` and in the section `Security` grant full access to the folder. + +Or you should use `icacls "folder path to geval" /grant USER:` + +This is a fully static binary, it should work on any 64-bit Linux or 64-bit Windows. + +### Build from scratch + You need [Haskell Stack](https://github.com/commercialhaskell/stack). You could install Stack with your package manager or with: @@ -35,7 +58,7 @@ order to run `geval` you need to either add `$HOME/.local/bin` to `$PATH` in your configuration or to type: PATH="$HOME/.local/bin" geval ... - + In Windows you should add new global variable with name 'geval' and path should be the same as above. ### Troubleshooting @@ -65,7 +88,9 @@ to happen on macOS, as these packages are usually installed out of the box on Li In case the lzma package is not installed on your Linux, you need to run (assuming Debian/Ubuntu): sudo apt-get install pkg-config liblzma-dev libpq-dev libpcre3-dev - + +#### Windows issues + If you see this message on Windows during executing `stack test` command: In the dependencies for geval-1.21.1.0: @@ -81,8 +106,8 @@ And you should add `lzma-clib-5.2.2` and `unix-compat-0.5.2` to section extra-de If you see message about missing pkg-config on Windpws you should download two packages from the site: http://ftp.gnome.org/pub/gnome/binaries/win32/dependencies/ These packages are: - - pkg-config (newest version) - - gettext-runtime (newest version) + - pkg-config (the newest version) + - gettext-runtime (the newest version) Extract `pkg-config.exe` file in Windows PATH Extract init.dll file from gettext-runtime @@ -91,26 +116,6 @@ and extract libglib-2.0-0.dll file. All files you should put for example in `C:\MinGW\bin` directory. -### Plan B — just download the GEval binary - -(Assuming you have a 64-bit Linux.) - - wget https://gonito.net/get/bin/geval - chmod u+x geval - ./geval --help - - -For Windows you should use Windows Powershell. - - wget https://gonito.net/get/bin/geval - -Next you should go to the folder where you download `geval` and right-click to `geval` file. -Go to `Properties` and in the section `Security` grant full access to the folder. - -Or you should use `icacls "folder path to geval" /grant USER:` - -This is a fully static binary, it should work on any 64-bit Linux or 64-bit Windows. - ## Quick tour Let's use GEval to evaluate machine translation (MT) systems (but keep @@ -226,7 +231,7 @@ But why were double quotes so problematic in German-English translation?! Well, look at the second-worst feature — `''` in the _output_! Oops, it seems like a very stupid mistake with post-processing was done and no double quote was correctly generated, -which decreased the score a little bit for each sentence in which the +which decreased the score a little for each sentence in which the quote was expected. When I fixed this simple bug, the BLUE metric increased from 0.27358 @@ -539,9 +544,9 @@ submitted. The suggested way to do this is as follows: `test-A/expected.tsv` added. This branch should be accessible by Gonito platform, but should be kept “hidden” for regular users (or at least they should be kindly asked not to peek there). It is - recommended (though not obligatory) that this branch contain all + recommended (though not obligatory) that this branch contains all the source codes and data used to generate the train/dev/test sets. - (Use [git-annex](https://git-annex.branchable.com/) if you have really big files there.) + (Use [git-annex](https://git-annex.branchable.com/) if you have huge files there.) Branch (1) should be the parent of the branch (2), for instance, the repo (for the toy “planets” challenge) could be created as follows: @@ -604,7 +609,7 @@ be nice and commit also your source codes. git push mine master Then let Gonito pull them and evaluate your results, either manually clicking -"submit" at the Gonito web site or using `--submit` option (see below). +"submit" at the Gonito website or using `--submit` option (see below). ### Submitting a solution to a Gonito platform with GEval From 03aacdef98ed566074e21c49ca942fbfe1b12c8f Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Sun, 17 Nov 2019 21:59:20 +0100 Subject: [PATCH 4/7] Add SegmentAccuracy metric --- src/GEval/Annotation.hs | 40 ++++++++++++++++++- src/GEval/Core.hs | 7 ++++ src/GEval/CreateChallenge.hs | 28 +++++++++++++ src/GEval/Metric.hs | 5 ++- src/GEval/MetricsMeta.hs | 33 ++++++++++++--- test/Spec.hs | 8 ++++ .../test-A/out.tsv | 3 ++ .../segment-accuracy-simple/config.txt | 1 + .../test-A/expected.tsv | 3 ++ 9 files changed, 119 insertions(+), 9 deletions(-) create mode 100644 test/segment-accuracy-simple/segment-accuracy-simple-solution/test-A/out.tsv create mode 100644 test/segment-accuracy-simple/segment-accuracy-simple/config.txt create mode 100644 test/segment-accuracy-simple/segment-accuracy-simple/test-A/expected.tsv diff --git a/src/GEval/Annotation.hs b/src/GEval/Annotation.hs index abc8b59..950c93d 100644 --- a/src/GEval/Annotation.hs +++ b/src/GEval/Annotation.hs @@ -4,11 +4,12 @@ module GEval.Annotation (parseAnnotations, Annotation(..), parseObtainedAnnotations, ObtainedAnnotation(..), - matchScore, intSetParser) + matchScore, intSetParser, segmentAccuracy, parseSegmentAnnotations) where import qualified Data.IntSet as IS import qualified Data.Text as T +import Data.Set (intersection, fromList) import Data.Attoparsec.Text import Data.Attoparsec.Combinator @@ -17,11 +18,12 @@ import GEval.Common (sepByWhitespaces, (/.)) import GEval.Probability import Data.Char import Data.Maybe (fromMaybe) +import Data.Either (partitionEithers) import GEval.PrecisionRecall(weightedMaxMatching) data Annotation = Annotation T.Text IS.IntSet - deriving (Eq, Show) + deriving (Eq, Show, Ord) data ObtainedAnnotation = ObtainedAnnotation Annotation Double deriving (Eq, Show) @@ -52,6 +54,36 @@ obtainedAnnotationParser = do parseAnnotations :: T.Text -> Either String [Annotation] parseAnnotations t = parseOnly (annotationsParser <* endOfInput) t +parseSegmentAnnotations :: T.Text -> Either String [Annotation] +parseSegmentAnnotations t = case parseAnnotationsWithColons t of + Left m -> Left m + Right annotations -> if areSegmentsDisjoint annotations + then (Right annotations) + else (Left "Overlapping segments") + +areSegmentsDisjoint :: [Annotation] -> Bool +areSegmentsDisjoint = areIntSetsDisjoint . map (\(Annotation _ s) -> s) + +areIntSetsDisjoint :: [IS.IntSet] -> Bool +areIntSetsDisjoint ss = snd $ foldr step (IS.empty, True) ss + where step _ w@(_, False) = w + step s (u, True) = (s `IS.union` u, s `IS.disjoint` u) + +-- unfortunately, attoparsec does not seem to back-track properly +-- so we need a special function if labels can contain colons +parseAnnotationsWithColons :: T.Text -> Either String [Annotation] +parseAnnotationsWithColons t = case partitionEithers (map parseAnnotationWithColons $ T.words t) of + ([], annotations) -> Right annotations + ((firstProblem:_), _) -> Left firstProblem + +parseAnnotationWithColons :: T.Text -> Either String Annotation +parseAnnotationWithColons t = if T.null label + then Left "Colon expected" + else case parseOnly (intSetParser <* endOfInput) position of + Left m -> Left m + Right s -> Right (Annotation (T.init label) s) + where (label, position) = T.breakOnEnd ":" t + annotationsParser :: Parser [Annotation] annotationsParser = sepByWhitespaces annotationParser @@ -70,3 +102,7 @@ intervalParser = do startIx <- decimal endIx <- (string "-" *> decimal <|> pure startIx) pure $ IS.fromList [startIx..endIx] + +segmentAccuracy :: [Annotation] -> [Annotation] -> Double +segmentAccuracy expected output = (fromIntegral $ length matched) / (fromIntegral $ length expected) + where matched = (fromList expected) `intersection` (fromList output) diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 1897fb7..4611671 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -706,6 +706,13 @@ gevalCoreOnSources TokenAccuracy _ = gevalCoreWithoutInput intoTokens | otherwise = (h, t + 1) hitsAndTotalsAgg = CC.foldl (\(h1, t1) (h2, t2) -> (h1 + h2, t1 + t2)) (0, 0) +gevalCoreOnSources SegmentAccuracy _ = gevalCoreWithoutInput parseSegmentAnnotations + parseSegmentAnnotations + (uncurry segmentAccuracy) + averageC + id + noGraph + gevalCoreOnSources MultiLabelLogLoss _ = gevalCoreWithoutInput intoWords (Right . parseIntoProbList) (uncurry countLogLossOnProbList) diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index 8b501a3..3a915e5 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -297,6 +297,19 @@ in the expected file (but not in the output file). |] ++ (commonReadmeMDContents testName) +readmeMDContents SegmentAccuracy testName = [i| +Segment a sentence and tag with POS tags +======================================== + +This is a sample, toy challenge for SegmentAccuracy. + +For each sentence, give a sequence of POS tags, each one with +its position (1-indexed). For instance, `N:1-10` means a nouns +starting from the beginning (the first character) up to to the tenth +character (inclusively). + +|] ++ (commonReadmeMDContents testName) + readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta) testName readmeMDContents (MultiLabelFMeasure beta) testName = [i| Tag names and their component @@ -473,6 +486,9 @@ B-firstname/JOHN I-surname/VON I-surname/NEUMANN John von Nueman trainContents TokenAccuracy = [hereLit|* V N I like cats * * V * N I can see the rainbow |] +trainContents SegmentAccuracy = [hereLit|Art:1-3 N:5-11 V:12-13 A:15-19 The student's smart +N:1-6 N:8-10 V:12-13 A:15-18 Mary's dog is nice +|] trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta) trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5 Steven bloody Brown person/1,3 first-name/1 surname/3 @@ -540,6 +556,9 @@ Mr Jan Kowalski devInContents TokenAccuracy = [hereLit|The cats on the mat Ala has a cat |] +devInContents SegmentAccuracy = [hereLit|John is smart +Mary's intelligent +|] devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta) devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here I see him @@ -604,6 +623,9 @@ O B-firstname/JAN B-surname/KOWALSKI devExpectedContents TokenAccuracy = [hereLit|* N * * N N V * N |] +devExpectedContents SegmentAccuracy = [hereLit|N:1-4 V:6-7 A:9-13 +N:1-4 V:6-7 A:9-19 +|] devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta) devExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,2 first-name/1 surname/2 @@ -673,6 +695,9 @@ No name here testInContents TokenAccuracy = [hereLit|I have cats I know |] +testInContents SegmentAccuracy = [hereLit|Mary's cat is old +John is young +|] testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta) testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith Nobody is there @@ -738,6 +763,9 @@ O O O testExpectedContents TokenAccuracy = [hereLit|* V N * V |] +testExpectedContents SegmentAccuracy = [hereLit|N:1-6 N:8-10 V:12-13 A:15-17 +N:1-4 V:6-7 A:9-13 +|] testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta) testExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,3 first-name/1 surname/3 diff --git a/src/GEval/Metric.hs b/src/GEval/Metric.hs index 0a53a61..b87c599 100644 --- a/src/GEval/Metric.hs +++ b/src/GEval/Metric.hs @@ -26,7 +26,7 @@ import Data.Attoparsec.Text (parseOnly) data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | ClippEU | FMeasure Double | MacroFMeasure Double | NMI | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood - | BIOF1 | BIOF1Labels | TokenAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double + | BIOF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double | MultiLabelLogLoss | MultiLabelLikelihood | SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double | ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double deriving (Eq) @@ -67,6 +67,7 @@ instance Show Metric where show BIOF1 = "BIO-F1" show BIOF1Labels = "BIO-F1-Labels" show TokenAccuracy = "TokenAccuracy" + show SegmentAccuracy = "SegmentAccuracy" show MAE = "MAE" show SMAPE = "SMAPE" show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta) @@ -118,6 +119,7 @@ instance Read Metric where readsPrec _ ('B':'I':'O':'-':'F':'1':'-':'L':'a':'b':'e':'l':'s':theRest) = [(BIOF1Labels, theRest)] readsPrec _ ('B':'I':'O':'-':'F':'1':theRest) = [(BIOF1, theRest)] readsPrec _ ('T':'o':'k':'e':'n':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(TokenAccuracy, theRest)] + readsPrec _ ('S':'e':'g':'m':'e':'n':'t':'A':'c':'c':'u':'r':'a':'c':'y':theRest) = [(SegmentAccuracy, theRest)] readsPrec _ ('M':'A':'E':theRest) = [(MAE, theRest)] readsPrec _ ('S':'M':'A':'P':'E':theRest) = [(SMAPE, theRest)] readsPrec _ ('M':'u':'l':'t':'i':'L':'a':'b':'e':'l':'-':'L':'o':'g':'L':'o':'s':'s':theRest) = [(MultiLabelLogLoss, theRest)] @@ -154,6 +156,7 @@ getMetricOrdering Likelihood = TheHigherTheBetter getMetricOrdering BIOF1 = TheHigherTheBetter getMetricOrdering BIOF1Labels = TheHigherTheBetter getMetricOrdering TokenAccuracy = TheHigherTheBetter +getMetricOrdering SegmentAccuracy = TheHigherTheBetter getMetricOrdering MAE = TheLowerTheBetter getMetricOrdering SMAPE = TheLowerTheBetter getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter diff --git a/src/GEval/MetricsMeta.hs b/src/GEval/MetricsMeta.hs index 8747bd9..21659ab 100644 --- a/src/GEval/MetricsMeta.hs +++ b/src/GEval/MetricsMeta.hs @@ -63,6 +63,7 @@ listOfAvailableMetrics = [RMSE, BIOF1, BIOF1Labels, TokenAccuracy, + SegmentAccuracy, SoftFMeasure 1.0, SoftFMeasure 2.0, SoftFMeasure 0.25, @@ -94,6 +95,7 @@ isMetricDescribed (SoftFMeasure _) = True isMetricDescribed (Soft2DFMeasure _) = True isMetricDescribed (ProbabilisticMultiLabelFMeasure _) = True isMetricDescribed GLEU = True +isMetricDescribed SegmentAccuracy = True isMetricDescribed _ = False getEvaluationSchemeDescription :: EvaluationScheme -> String @@ -134,7 +136,11 @@ metric on a corpus level but does not have its drawbacks for our per sentence reward objective. see: https://arxiv.org/pdf/1609.08144.pdf |] - +getMetricDescription SegmentAccuracy = + [i|Accuracy counted for segments, i.e. labels with positions. +The percentage of labels in the ground truth retrieved in the actual output is returned. +Accuracy is calculated separately for each item and then averaged. +|] outContents :: Metric -> String outContents (SoftFMeasure _) = [hereLit|inwords:1-4 @@ -147,7 +153,10 @@ outContents (ProbabilisticMultiLabelFMeasure _) = [hereLit|first-name/1:0.8 surn surname/1:0.4 first-name/3:0.9 |] -outContents GLEU = [hereLit|Alice has a black +outContents GLEU = [hereLit|Alice has a black +|] +outContents SegmentAccuracy = [hereLit|N:1-4 V:5-6 N:8-10 V:12-13 A:15-17 +N:1-4 V:6-7 A:9-13 |] expectedScore :: EvaluationScheme -> MetricValue @@ -165,6 +174,8 @@ expectedScore (EvaluationScheme (ProbabilisticMultiLabelFMeasure beta) []) in weightedHarmonicMean beta precision recall expectedScore (EvaluationScheme GLEU []) = 0.7142857142857143 +expectedScore (EvaluationScheme SegmentAccuracy []) + = 0.875 helpMetricParameterMetricsList :: String helpMetricParameterMetricsList = intercalate ", " $ map (\s -> (show s) ++ (case extraInfo s of @@ -213,7 +224,14 @@ the form LABEL:PAGE/X0,Y0,X1,Y1 where LABEL is any label, page is the page numbe formatDescription (ProbabilisticMultiLabelFMeasure _) = [hereLit|In each line a number of labels (entities) can be given. A label probability can be provided with a colon (e.g. "foo:0.7"). By default, 1.0 is assumed. |] -formatDescription GLEU = [hereLit|In each line a there is a space sparated sentence of words. +formatDescription GLEU = [hereLit|In each line a there is a space sparated sentence of words. +|] +formatDescription SegmentAccuracy = [hereLit|Labels can be any strings (without spaces), whereas is a list of +1-based indexes or spans separated by commas (spans are inclusive +ranges, e.g. "10-14"). For instance, "foo:bar:2,4-7,10" is a +label "foo:bar" for positions 2, 4, 5, 6, 7 and 10. Note that no +overlapping segments can be returned (evaluation will fail in +such a case). |] scoreExplanation :: EvaluationScheme -> Maybe String @@ -227,13 +245,16 @@ Hence, recall is 247500/902500=0.274 and precision - 247500/(20000+912000+240000 for the second item is 0.238 and the F-score for the whole set is (0 + 0.238)/2 = 0.119.|] scoreExplanation (EvaluationScheme (ProbabilisticMultiLabelFMeasure _) []) = Nothing scoreExplanation (EvaluationScheme GLEU []) - = Just [hereLit|To find out GLEU score we first count number of tp (true positives) fp(false positives) and fn(false negatives). + = Just [hereLit|To find out GLEU score we first count number of tp (true positives) fp(false positives) and fn(false negatives). We have 4 matching unigrams ("Alice", "has", "a", "black") , 3 bigrams ("Alice has", "has a", "a black"), 2 trigrams ("Alice has a", "has a black") and 1 tetragram ("Alice has a black"), -so tp=10. We have no fp, therefore fp=0. There are 4 fn - ("cat", "black cat", "a black cat", "has a black cat"). +so tp=10. We have no fp, therefore fp=0. There are 4 fn - ("cat", "black cat", "a black cat", "has a black cat"). Now we have to calculate precision and recall: - Precision is tp / (tp+fp) = 10/(10+0) = 1, + Precision is tp / (tp+fp) = 10/(10+0) = 1, recall is tp / (tp+fn) = 10 / (10+4) = 10/14 =~ 0.71428... The GLEU score is min(precision,recall)=0.71428 |] +scoreExplanation (EvaluationScheme SegmentAccuracy []) + = Just [hereLit|Out of 4 segments in the expected output for the first item, 3 were retrieved correcly (accuracy is 3/4=0.75). +The second item was retrieved perfectly (accuracy is 1.0). Hence, the average is (0.75+1.0)/2=0.875.|] pasteLines :: String -> String -> String pasteLines a b = printf "%-35s %s\n" a b diff --git a/test/Spec.hs b/test/Spec.hs index 1fccc45..dc68beb 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -146,6 +146,9 @@ main = hspec $ do describe "TokenAccuracy" $ do it "simple example" $ do runGEvalTest "token-accuracy-simple" `shouldReturnAlmost` 0.5 + describe "SegmentAccuracy" $ do + it "simple test" $ do + runGEvalTest "segment-accuracy-simple" `shouldReturnAlmost` 0.4444444 describe "precision count" $ do it "simple test" $ do precisionCount [["Alice", "has", "a", "cat" ]] ["Ala", "has", "cat"] `shouldBe` 2 @@ -342,6 +345,11 @@ main = hspec $ do it "just parse" $ do parseAnnotations "foo:3,7-10 baz:4-6" `shouldBe` Right [Annotation "foo" (IS.fromList [3,7,8,9,10]), Annotation "baz" (IS.fromList [4,5,6])] + it "just parse wit colons" $ do + parseSegmentAnnotations "foo:x:3,7-10 baz:4-6" `shouldBe` Right [Annotation "foo:x" (IS.fromList [3,7,8,9,10]), + Annotation "baz" (IS.fromList [4,5,6])] + it "just parse wit colons" $ do + parseSegmentAnnotations "foo:x:3,7-10 baz:2-6" `shouldBe` Left "Overlapping segments" it "just parse 2" $ do parseAnnotations "inwords:1-3 indigits:5" `shouldBe` Right [Annotation "inwords" (IS.fromList [1,2,3]), Annotation "indigits" (IS.fromList [5])] diff --git a/test/segment-accuracy-simple/segment-accuracy-simple-solution/test-A/out.tsv b/test/segment-accuracy-simple/segment-accuracy-simple-solution/test-A/out.tsv new file mode 100644 index 0000000..4af8b51 --- /dev/null +++ b/test/segment-accuracy-simple/segment-accuracy-simple-solution/test-A/out.tsv @@ -0,0 +1,3 @@ +foo:0 baq:1-2 baz:3 +aaa:0-1 +xyz:0 bbb:x:1 diff --git a/test/segment-accuracy-simple/segment-accuracy-simple/config.txt b/test/segment-accuracy-simple/segment-accuracy-simple/config.txt new file mode 100644 index 0000000..2f838f0 --- /dev/null +++ b/test/segment-accuracy-simple/segment-accuracy-simple/config.txt @@ -0,0 +1 @@ +--metric SegmentAccuracy diff --git a/test/segment-accuracy-simple/segment-accuracy-simple/test-A/expected.tsv b/test/segment-accuracy-simple/segment-accuracy-simple/test-A/expected.tsv new file mode 100644 index 0000000..bc95bcb --- /dev/null +++ b/test/segment-accuracy-simple/segment-accuracy-simple/test-A/expected.tsv @@ -0,0 +1,3 @@ +foo:0 bar:1-2 baz:3 +aaa:0-2 +xyz:0 bbb:x:1 ccc:x:2 From db7a1bb03e3d7ba940cb1c52f77e009e4b955b2e Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Mon, 18 Nov 2019 21:53:40 +0100 Subject: [PATCH 5/7] Bump up version number --- CHANGELOG.md | 4 ++++ geval.cabal | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c857bf..63513ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,8 @@ +## 1.22.0.0 + +* Add SegmentAccuracy + ## 1.21.0.0 * Add Probabilistic-MultiLabel-F-measure diff --git a/geval.cabal b/geval.cabal index e48552b..3103b53 100644 --- a/geval.cabal +++ b/geval.cabal @@ -1,5 +1,5 @@ name: geval -version: 1.21.1.0 +version: 1.22.0.0 synopsis: Machine learning evaluation tools description: Please see README.md homepage: http://github.com/name/project From cb4efe1d6b469a166e1970902a3b9b39f5246285 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Mon, 25 Nov 2019 21:31:17 +0100 Subject: [PATCH 6/7] Introduce :S flag (sorting words within a line) --- src/GEval/Core.hs | 17 +++++++++++++++++ src/GEval/CreateChallenge.hs | 6 ++++++ src/GEval/EvaluationScheme.hs | 10 +++++++--- src/GEval/Metric.hs | 15 ++++++++++++++- src/GEval/MetricsMeta.hs | 1 + test/Spec.hs | 5 +++++ .../accuracy-on-sorted-solution/test-A/out.tsv | 4 ++++ .../accuracy-on-sorted/config.txt | 1 + .../accuracy-on-sorted/test-A/expected.tsv | 4 ++++ .../test-A/out.tsv | 4 ++++ .../mean-multilabel-f1-simple/config.txt | 1 + .../test-A/expected.tsv | 4 ++++ 12 files changed, 68 insertions(+), 4 deletions(-) create mode 100644 test/accuracy-on-sorted/accuracy-on-sorted-solution/test-A/out.tsv create mode 100644 test/accuracy-on-sorted/accuracy-on-sorted/config.txt create mode 100644 test/accuracy-on-sorted/accuracy-on-sorted/test-A/expected.tsv create mode 100644 test/mean-multilabel-f1-simple/mean-multilabel-f1-simple-solution/test-A/out.tsv create mode 100644 test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/config.txt create mode 100644 test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/test-A/expected.tsv diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 4611671..3aa5d46 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -492,6 +492,23 @@ gevalCoreOnSources CharMatch inputLineSource = helper inputLineSource gevalCoreOnSources (LogLossHashed nbOfBits) _ = helperLogLossHashed nbOfBits id gevalCoreOnSources (LikelihoodHashed nbOfBits) _ = helperLogLossHashed nbOfBits logLossToLikehood + +gevalCoreOnSources (Mean (MultiLabelFMeasure beta)) _ + = gevalCoreWithoutInputOnItemTargets (Right . intoWords) + (Right . getWords) + ((fMeasureOnCounts beta) . (getCounts (==))) + averageC + id + noGraph + where + -- repeated as below, as it will be refactored into dependent types soon anyway + getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t + getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts + intoWords (RawItemTarget t) = Prelude.map unpack $ Data.Text.words t + intoWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts + +gevalCoreOnSources (Mean _) _ = error $ "Mean/ meta-metric defined only for MultiLabel-F1 for the time being" + -- only MultiLabel-F1 handled for JSONs for the time being... gevalCoreOnSources (MultiLabelFMeasure beta) _ = gevalCoreWithoutInputOnItemTargets (Right . intoWords) (Right . getWords) diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index 3a915e5..325aed4 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -55,6 +55,7 @@ createFile filePath contents = do writeFile filePath contents readmeMDContents :: Metric -> String -> String +readmeMDContents (Mean metric) testName = readmeMDContents metric testName readmeMDContents GLEU testName = readmeMDContents BLEU testName readmeMDContents BLEU testName = [i| GEval sample machine translation challenge @@ -413,6 +414,7 @@ configContents schemes precision testName = unwords (Prelude.map (\scheme -> ("- precisionOpt (Just p) = " --precision " ++ (show p) trainContents :: Metric -> String +trainContents (Mean metric) = trainContents metric trainContents GLEU = trainContents BLEU trainContents BLEU = [hereLit|alussa loi jumala taivaan ja maan he mea hanga na te atua i te timatanga te rangi me te whenua ja maa oli autio ja tyhjä , ja pimeys oli syvyyden päällä a kahore he ahua o te whenua , i takoto kau ; he pouri ano a runga i te mata o te hohonu @@ -510,6 +512,7 @@ trainContents _ = [hereLit|0.06 0.39 0 0.206 |] devInContents :: Metric -> String +devInContents (Mean metric) = devInContents metric devInContents GLEU = devInContents BLEU devInContents BLEU = [hereLit|ja jumala sanoi : " tulkoon valkeus " , ja valkeus tuli ja jumala näki , että valkeus oli hyvä ; ja jumala erotti valkeuden pimeydestä @@ -577,6 +580,7 @@ devInContents _ = [hereLit|0.72 0 0.007 |] devExpectedContents :: Metric -> String +devExpectedContents (Mean metric) = devExpectedContents metric devExpectedContents GLEU = devExpectedContents BLEU devExpectedContents BLEU = [hereLit|a ka ki te atua , kia marama : na ka marama a ka kite te atua i te marama , he pai : a ka wehea e te atua te marama i te pouri @@ -646,6 +650,7 @@ devExpectedContents _ = [hereLit|0.82 |] testInContents :: Metric -> String +testInContents (Mean metric) = testInContents metric testInContents GLEU = [hereLit|Alice has a black |] testInContents BLEU = [hereLit|ja jumala kutsui valkeuden päiväksi , ja pimeyden hän kutsui yöksi @@ -716,6 +721,7 @@ testInContents _ = [hereLit|0.72 0 0.007 |] testExpectedContents :: Metric -> String +testExpectedContents (Mean metric) = testExpectedContents metric testExpectedContents BLEU = [hereLit|na ka huaina e te atua te marama ko te awatea , a ko te pouri i huaina e ia ko te po a ko te ahiahi , ko te ata , he ra kotahi |] diff --git a/src/GEval/EvaluationScheme.hs b/src/GEval/EvaluationScheme.hs index 29840c7..a464d6c 100644 --- a/src/GEval/EvaluationScheme.hs +++ b/src/GEval/EvaluationScheme.hs @@ -6,8 +6,8 @@ import GEval.Metric import Text.Regex.PCRE.Heavy import Text.Regex.PCRE.Light.Base (Regex(..)) -import Data.Text (Text(..), concat, toLower, toUpper, pack, unpack) -import Data.List (intercalate, break) +import Data.Text (Text(..), concat, toLower, toUpper, pack, unpack, words, unwords) +import Data.List (intercalate, break, sort) import Data.Either import Data.Maybe (fromMaybe) import qualified Data.ByteString.UTF8 as BSU @@ -16,7 +16,7 @@ import qualified Data.ByteString.UTF8 as BSU data EvaluationScheme = EvaluationScheme Metric [PreprocessingOperation] deriving (Eq) -data PreprocessingOperation = RegexpMatch Regex | LowerCasing | UpperCasing | SetName Text +data PreprocessingOperation = RegexpMatch Regex | LowerCasing | UpperCasing | Sorting | SetName Text deriving (Eq) leftParameterBracket :: Char @@ -39,6 +39,8 @@ readOps ('l':theRest) = (LowerCasing:ops, theRest') readOps ('u':theRest) = (UpperCasing:ops, theRest') where (ops, theRest') = readOps theRest readOps ('m':theRest) = handleParametrizedOp (RegexpMatch . (fromRight undefined) . ((flip compileM) []) . BSU.fromString) theRest +readOps ('S':theRest) = (Sorting:ops, theRest') + where (ops, theRest') = readOps theRest readOps ('N':theRest) = handleParametrizedOp (SetName . pack) theRest readOps s = ([], s) @@ -70,6 +72,7 @@ instance Show PreprocessingOperation where show (RegexpMatch (Regex _ regexp)) = parametrizedOperation "m" (BSU.toString regexp) show LowerCasing = "l" show UpperCasing = "u" + show Sorting = "S" show (SetName t) = parametrizedOperation "N" (unpack t) parametrizedOperation :: String -> String -> String @@ -82,4 +85,5 @@ applyPreprocessingOperation :: PreprocessingOperation -> Text -> Text applyPreprocessingOperation (RegexpMatch regex) = Data.Text.concat . (map fst) . (scan regex) applyPreprocessingOperation LowerCasing = toLower applyPreprocessingOperation UpperCasing = toUpper +applyPreprocessingOperation Sorting = Data.Text.unwords . sort . Data.Text.words applyPreprocessingOperation (SetName _) = id diff --git a/src/GEval/Metric.hs b/src/GEval/Metric.hs index b87c599..a508997 100644 --- a/src/GEval/Metric.hs +++ b/src/GEval/Metric.hs @@ -28,7 +28,12 @@ data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | C | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood | BIOF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double | MultiLabelLogLoss | MultiLabelLikelihood - | SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double | ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double + | SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double + | ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double + -- it would be better to avoid infinite recursion here + -- `Mean (Mean BLEU)` is not useful, but as it would mean + -- a larger refactor, we will postpone this + | Mean Metric deriving (Eq) instance Show Metric where @@ -73,8 +78,12 @@ instance Show Metric where show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta) show MultiLabelLogLoss = "MultiLabel-Logloss" show MultiLabelLikelihood = "MultiLabel-Likelihood" + show (Mean metric) = "Mean/" ++ (show metric) instance Read Metric where + readsPrec p ('M':'e':'a':'n':'/':theRest) = case readsPrec p theRest of + [(metric, theRest)] -> [(Mean metric, theRest)] + _ -> [] readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)] readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)] readsPrec _ ('P':'e':'a':'r':'s':'o':'n':theRest) = [(Pearson, theRest)] @@ -162,6 +171,7 @@ getMetricOrdering SMAPE = TheLowerTheBetter getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter getMetricOrdering MultiLabelLikelihood = TheHigherTheBetter +getMetricOrdering (Mean metric) = getMetricOrdering metric bestPossibleValue :: Metric -> MetricValue bestPossibleValue metric = case getMetricOrdering metric of @@ -169,18 +179,21 @@ bestPossibleValue metric = case getMetricOrdering metric of TheHigherTheBetter -> 1.0 fixedNumberOfColumnsInExpected :: Metric -> Bool +fixedNumberOfColumnsInExpected (Mean metric) = fixedNumberOfColumnsInExpected metric fixedNumberOfColumnsInExpected MAP = False fixedNumberOfColumnsInExpected BLEU = False fixedNumberOfColumnsInExpected GLEU = False fixedNumberOfColumnsInExpected _ = True fixedNumberOfColumnsInInput :: Metric -> Bool +fixedNumberOfColumnsInInput (Mean metric) = fixedNumberOfColumnsInInput metric fixedNumberOfColumnsInInput (SoftFMeasure _) = False fixedNumberOfColumnsInInput (ProbabilisticSoftFMeasure _) = False fixedNumberOfColumnsInInput (Soft2DFMeasure _) = False fixedNumberOfColumnsInInput _ = True perfectOutLineFromExpectedLine :: Metric -> Text -> Text +perfectOutLineFromExpectedLine (Mean metric) t = perfectOutLineFromExpectedLine metric t perfectOutLineFromExpectedLine (LogLossHashed _) t = t <> ":1.0" perfectOutLineFromExpectedLine (LikelihoodHashed _) t = t <> ":1.0" perfectOutLineFromExpectedLine BLEU t = getFirstColumn t diff --git a/src/GEval/MetricsMeta.hs b/src/GEval/MetricsMeta.hs index 21659ab..8fad9c6 100644 --- a/src/GEval/MetricsMeta.hs +++ b/src/GEval/MetricsMeta.hs @@ -48,6 +48,7 @@ listOfAvailableMetrics = [RMSE, MultiLabelFMeasure 1.0, MultiLabelFMeasure 2.0, MultiLabelFMeasure 0.25, + Mean (MultiLabelFMeasure 1.0), ProbabilisticMultiLabelFMeasure 1.0, ProbabilisticMultiLabelFMeasure 2.0, ProbabilisticMultiLabelFMeasure 0.25, diff --git a/test/Spec.hs b/test/Spec.hs index dc68beb..c350775 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -127,6 +127,8 @@ main = hspec $ do runGEvalTest "accuracy-simple" `shouldReturnAlmost` 0.6 it "with probs" $ runGEvalTest "accuracy-probs" `shouldReturnAlmost` 0.4 + it "sorted" $ + runGEvalTest "accuracy-on-sorted" `shouldReturnAlmost` 0.75 describe "F-measure" $ do it "simple example" $ runGEvalTest "f-measure-simple" `shouldReturnAlmost` 0.57142857 @@ -326,6 +328,9 @@ main = hspec $ do runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615 it "labels given with probs and numbers" $ do runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666 + describe "Mean/MultiLabel-F" $ do + it "simple" $ do + runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5 describe "MultiLabel-Likelihood" $ do it "simple" $ do runGEvalTest "multilabel-likelihood-simple" `shouldReturnAlmost` 0.115829218528827 diff --git a/test/accuracy-on-sorted/accuracy-on-sorted-solution/test-A/out.tsv b/test/accuracy-on-sorted/accuracy-on-sorted-solution/test-A/out.tsv new file mode 100644 index 0000000..b9c8997 --- /dev/null +++ b/test/accuracy-on-sorted/accuracy-on-sorted-solution/test-A/out.tsv @@ -0,0 +1,4 @@ +foo baz bar + +xyz aaa +2 a:1 3 diff --git a/test/accuracy-on-sorted/accuracy-on-sorted/config.txt b/test/accuracy-on-sorted/accuracy-on-sorted/config.txt new file mode 100644 index 0000000..0de8e69 --- /dev/null +++ b/test/accuracy-on-sorted/accuracy-on-sorted/config.txt @@ -0,0 +1 @@ +--metric Accuracy:S diff --git a/test/accuracy-on-sorted/accuracy-on-sorted/test-A/expected.tsv b/test/accuracy-on-sorted/accuracy-on-sorted/test-A/expected.tsv new file mode 100644 index 0000000..7ec7ae4 --- /dev/null +++ b/test/accuracy-on-sorted/accuracy-on-sorted/test-A/expected.tsv @@ -0,0 +1,4 @@ +bar baz foo + +xyz +a:1 2 3 diff --git a/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple-solution/test-A/out.tsv b/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple-solution/test-A/out.tsv new file mode 100644 index 0000000..6a8bd3a --- /dev/null +++ b/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple-solution/test-A/out.tsv @@ -0,0 +1,4 @@ +foo bar baz +uuu +foo bar baz +qqq aaa diff --git a/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/config.txt b/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/config.txt new file mode 100644 index 0000000..885d505 --- /dev/null +++ b/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/config.txt @@ -0,0 +1 @@ +--metric Mean/MultiLabel-F1 diff --git a/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/test-A/expected.tsv b/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/test-A/expected.tsv new file mode 100644 index 0000000..64612c3 --- /dev/null +++ b/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/test-A/expected.tsv @@ -0,0 +1,4 @@ +foo bar baz + +foo +qqq qqq From ef8945af11a06ad009c070abeab9268e3ad6fc90 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Mon, 25 Nov 2019 21:35:22 +0100 Subject: [PATCH 7/7] Bump up version number --- CHANGELOG.md | 5 +++++ geval.cabal | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 63513ed..a8e4b00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,9 @@ +## 1.22.1.0 + +* Add "Mean/" meta-metric (for the time being working only with MultiLabel-F-measure) +* Add :S flag + ## 1.22.0.0 * Add SegmentAccuracy diff --git a/geval.cabal b/geval.cabal index 3103b53..5b425ce 100644 --- a/geval.cabal +++ b/geval.cabal @@ -1,5 +1,5 @@ name: geval -version: 1.22.0.0 +version: 1.22.1.0 synopsis: Machine learning evaluation tools description: Please see README.md homepage: http://github.com/name/project