From cb4efe1d6b469a166e1970902a3b9b39f5246285 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Mon, 25 Nov 2019 21:31:17 +0100 Subject: [PATCH] Introduce :S flag (sorting words within a line) --- src/GEval/Core.hs | 17 +++++++++++++++++ src/GEval/CreateChallenge.hs | 6 ++++++ src/GEval/EvaluationScheme.hs | 10 +++++++--- src/GEval/Metric.hs | 15 ++++++++++++++- src/GEval/MetricsMeta.hs | 1 + test/Spec.hs | 5 +++++ .../accuracy-on-sorted-solution/test-A/out.tsv | 4 ++++ .../accuracy-on-sorted/config.txt | 1 + .../accuracy-on-sorted/test-A/expected.tsv | 4 ++++ .../test-A/out.tsv | 4 ++++ .../mean-multilabel-f1-simple/config.txt | 1 + .../test-A/expected.tsv | 4 ++++ 12 files changed, 68 insertions(+), 4 deletions(-) create mode 100644 test/accuracy-on-sorted/accuracy-on-sorted-solution/test-A/out.tsv create mode 100644 test/accuracy-on-sorted/accuracy-on-sorted/config.txt create mode 100644 test/accuracy-on-sorted/accuracy-on-sorted/test-A/expected.tsv create mode 100644 test/mean-multilabel-f1-simple/mean-multilabel-f1-simple-solution/test-A/out.tsv create mode 100644 test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/config.txt create mode 100644 test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/test-A/expected.tsv diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 4611671..3aa5d46 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -492,6 +492,23 @@ gevalCoreOnSources CharMatch inputLineSource = helper inputLineSource gevalCoreOnSources (LogLossHashed nbOfBits) _ = helperLogLossHashed nbOfBits id gevalCoreOnSources (LikelihoodHashed nbOfBits) _ = helperLogLossHashed nbOfBits logLossToLikehood + +gevalCoreOnSources (Mean (MultiLabelFMeasure beta)) _ + = gevalCoreWithoutInputOnItemTargets (Right . intoWords) + (Right . getWords) + ((fMeasureOnCounts beta) . (getCounts (==))) + averageC + id + noGraph + where + -- repeated as below, as it will be refactored into dependent types soon anyway + getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t + getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts + intoWords (RawItemTarget t) = Prelude.map unpack $ Data.Text.words t + intoWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts + +gevalCoreOnSources (Mean _) _ = error $ "Mean/ meta-metric defined only for MultiLabel-F1 for the time being" + -- only MultiLabel-F1 handled for JSONs for the time being... gevalCoreOnSources (MultiLabelFMeasure beta) _ = gevalCoreWithoutInputOnItemTargets (Right . intoWords) (Right . getWords) diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index 3a915e5..325aed4 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -55,6 +55,7 @@ createFile filePath contents = do writeFile filePath contents readmeMDContents :: Metric -> String -> String +readmeMDContents (Mean metric) testName = readmeMDContents metric testName readmeMDContents GLEU testName = readmeMDContents BLEU testName readmeMDContents BLEU testName = [i| GEval sample machine translation challenge @@ -413,6 +414,7 @@ configContents schemes precision testName = unwords (Prelude.map (\scheme -> ("- precisionOpt (Just p) = " --precision " ++ (show p) trainContents :: Metric -> String +trainContents (Mean metric) = trainContents metric trainContents GLEU = trainContents BLEU trainContents BLEU = [hereLit|alussa loi jumala taivaan ja maan he mea hanga na te atua i te timatanga te rangi me te whenua ja maa oli autio ja tyhjä , ja pimeys oli syvyyden päällä a kahore he ahua o te whenua , i takoto kau ; he pouri ano a runga i te mata o te hohonu @@ -510,6 +512,7 @@ trainContents _ = [hereLit|0.06 0.39 0 0.206 |] devInContents :: Metric -> String +devInContents (Mean metric) = devInContents metric devInContents GLEU = devInContents BLEU devInContents BLEU = [hereLit|ja jumala sanoi : " tulkoon valkeus " , ja valkeus tuli ja jumala näki , että valkeus oli hyvä ; ja jumala erotti valkeuden pimeydestä @@ -577,6 +580,7 @@ devInContents _ = [hereLit|0.72 0 0.007 |] devExpectedContents :: Metric -> String +devExpectedContents (Mean metric) = devExpectedContents metric devExpectedContents GLEU = devExpectedContents BLEU devExpectedContents BLEU = [hereLit|a ka ki te atua , kia marama : na ka marama a ka kite te atua i te marama , he pai : a ka wehea e te atua te marama i te pouri @@ -646,6 +650,7 @@ devExpectedContents _ = [hereLit|0.82 |] testInContents :: Metric -> String +testInContents (Mean metric) = testInContents metric testInContents GLEU = [hereLit|Alice has a black |] testInContents BLEU = [hereLit|ja jumala kutsui valkeuden päiväksi , ja pimeyden hän kutsui yöksi @@ -716,6 +721,7 @@ testInContents _ = [hereLit|0.72 0 0.007 |] testExpectedContents :: Metric -> String +testExpectedContents (Mean metric) = testExpectedContents metric testExpectedContents BLEU = [hereLit|na ka huaina e te atua te marama ko te awatea , a ko te pouri i huaina e ia ko te po a ko te ahiahi , ko te ata , he ra kotahi |] diff --git a/src/GEval/EvaluationScheme.hs b/src/GEval/EvaluationScheme.hs index 29840c7..a464d6c 100644 --- a/src/GEval/EvaluationScheme.hs +++ b/src/GEval/EvaluationScheme.hs @@ -6,8 +6,8 @@ import GEval.Metric import Text.Regex.PCRE.Heavy import Text.Regex.PCRE.Light.Base (Regex(..)) -import Data.Text (Text(..), concat, toLower, toUpper, pack, unpack) -import Data.List (intercalate, break) +import Data.Text (Text(..), concat, toLower, toUpper, pack, unpack, words, unwords) +import Data.List (intercalate, break, sort) import Data.Either import Data.Maybe (fromMaybe) import qualified Data.ByteString.UTF8 as BSU @@ -16,7 +16,7 @@ import qualified Data.ByteString.UTF8 as BSU data EvaluationScheme = EvaluationScheme Metric [PreprocessingOperation] deriving (Eq) -data PreprocessingOperation = RegexpMatch Regex | LowerCasing | UpperCasing | SetName Text +data PreprocessingOperation = RegexpMatch Regex | LowerCasing | UpperCasing | Sorting | SetName Text deriving (Eq) leftParameterBracket :: Char @@ -39,6 +39,8 @@ readOps ('l':theRest) = (LowerCasing:ops, theRest') readOps ('u':theRest) = (UpperCasing:ops, theRest') where (ops, theRest') = readOps theRest readOps ('m':theRest) = handleParametrizedOp (RegexpMatch . (fromRight undefined) . ((flip compileM) []) . BSU.fromString) theRest +readOps ('S':theRest) = (Sorting:ops, theRest') + where (ops, theRest') = readOps theRest readOps ('N':theRest) = handleParametrizedOp (SetName . pack) theRest readOps s = ([], s) @@ -70,6 +72,7 @@ instance Show PreprocessingOperation where show (RegexpMatch (Regex _ regexp)) = parametrizedOperation "m" (BSU.toString regexp) show LowerCasing = "l" show UpperCasing = "u" + show Sorting = "S" show (SetName t) = parametrizedOperation "N" (unpack t) parametrizedOperation :: String -> String -> String @@ -82,4 +85,5 @@ applyPreprocessingOperation :: PreprocessingOperation -> Text -> Text applyPreprocessingOperation (RegexpMatch regex) = Data.Text.concat . (map fst) . (scan regex) applyPreprocessingOperation LowerCasing = toLower applyPreprocessingOperation UpperCasing = toUpper +applyPreprocessingOperation Sorting = Data.Text.unwords . sort . Data.Text.words applyPreprocessingOperation (SetName _) = id diff --git a/src/GEval/Metric.hs b/src/GEval/Metric.hs index b87c599..a508997 100644 --- a/src/GEval/Metric.hs +++ b/src/GEval/Metric.hs @@ -28,7 +28,12 @@ data Metric = RMSE | MSE | Pearson | Spearman | BLEU | GLEU | WER | Accuracy | C | LogLossHashed Word32 | CharMatch | MAP | LogLoss | Likelihood | BIOF1 | BIOF1Labels | TokenAccuracy | SegmentAccuracy | LikelihoodHashed Word32 | MAE | SMAPE | MultiLabelFMeasure Double | MultiLabelLogLoss | MultiLabelLikelihood - | SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double | ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double + | SoftFMeasure Double | ProbabilisticMultiLabelFMeasure Double + | ProbabilisticSoftFMeasure Double | Soft2DFMeasure Double + -- it would be better to avoid infinite recursion here + -- `Mean (Mean BLEU)` is not useful, but as it would mean + -- a larger refactor, we will postpone this + | Mean Metric deriving (Eq) instance Show Metric where @@ -73,8 +78,12 @@ instance Show Metric where show (MultiLabelFMeasure beta) = "MultiLabel-F" ++ (show beta) show MultiLabelLogLoss = "MultiLabel-Logloss" show MultiLabelLikelihood = "MultiLabel-Likelihood" + show (Mean metric) = "Mean/" ++ (show metric) instance Read Metric where + readsPrec p ('M':'e':'a':'n':'/':theRest) = case readsPrec p theRest of + [(metric, theRest)] -> [(Mean metric, theRest)] + _ -> [] readsPrec _ ('R':'M':'S':'E':theRest) = [(RMSE, theRest)] readsPrec _ ('M':'S':'E':theRest) = [(MSE, theRest)] readsPrec _ ('P':'e':'a':'r':'s':'o':'n':theRest) = [(Pearson, theRest)] @@ -162,6 +171,7 @@ getMetricOrdering SMAPE = TheLowerTheBetter getMetricOrdering (MultiLabelFMeasure _) = TheHigherTheBetter getMetricOrdering MultiLabelLogLoss = TheLowerTheBetter getMetricOrdering MultiLabelLikelihood = TheHigherTheBetter +getMetricOrdering (Mean metric) = getMetricOrdering metric bestPossibleValue :: Metric -> MetricValue bestPossibleValue metric = case getMetricOrdering metric of @@ -169,18 +179,21 @@ bestPossibleValue metric = case getMetricOrdering metric of TheHigherTheBetter -> 1.0 fixedNumberOfColumnsInExpected :: Metric -> Bool +fixedNumberOfColumnsInExpected (Mean metric) = fixedNumberOfColumnsInExpected metric fixedNumberOfColumnsInExpected MAP = False fixedNumberOfColumnsInExpected BLEU = False fixedNumberOfColumnsInExpected GLEU = False fixedNumberOfColumnsInExpected _ = True fixedNumberOfColumnsInInput :: Metric -> Bool +fixedNumberOfColumnsInInput (Mean metric) = fixedNumberOfColumnsInInput metric fixedNumberOfColumnsInInput (SoftFMeasure _) = False fixedNumberOfColumnsInInput (ProbabilisticSoftFMeasure _) = False fixedNumberOfColumnsInInput (Soft2DFMeasure _) = False fixedNumberOfColumnsInInput _ = True perfectOutLineFromExpectedLine :: Metric -> Text -> Text +perfectOutLineFromExpectedLine (Mean metric) t = perfectOutLineFromExpectedLine metric t perfectOutLineFromExpectedLine (LogLossHashed _) t = t <> ":1.0" perfectOutLineFromExpectedLine (LikelihoodHashed _) t = t <> ":1.0" perfectOutLineFromExpectedLine BLEU t = getFirstColumn t diff --git a/src/GEval/MetricsMeta.hs b/src/GEval/MetricsMeta.hs index 21659ab..8fad9c6 100644 --- a/src/GEval/MetricsMeta.hs +++ b/src/GEval/MetricsMeta.hs @@ -48,6 +48,7 @@ listOfAvailableMetrics = [RMSE, MultiLabelFMeasure 1.0, MultiLabelFMeasure 2.0, MultiLabelFMeasure 0.25, + Mean (MultiLabelFMeasure 1.0), ProbabilisticMultiLabelFMeasure 1.0, ProbabilisticMultiLabelFMeasure 2.0, ProbabilisticMultiLabelFMeasure 0.25, diff --git a/test/Spec.hs b/test/Spec.hs index dc68beb..c350775 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -127,6 +127,8 @@ main = hspec $ do runGEvalTest "accuracy-simple" `shouldReturnAlmost` 0.6 it "with probs" $ runGEvalTest "accuracy-probs" `shouldReturnAlmost` 0.4 + it "sorted" $ + runGEvalTest "accuracy-on-sorted" `shouldReturnAlmost` 0.75 describe "F-measure" $ do it "simple example" $ runGEvalTest "f-measure-simple" `shouldReturnAlmost` 0.57142857 @@ -326,6 +328,9 @@ main = hspec $ do runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615 it "labels given with probs and numbers" $ do runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666 + describe "Mean/MultiLabel-F" $ do + it "simple" $ do + runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5 describe "MultiLabel-Likelihood" $ do it "simple" $ do runGEvalTest "multilabel-likelihood-simple" `shouldReturnAlmost` 0.115829218528827 diff --git a/test/accuracy-on-sorted/accuracy-on-sorted-solution/test-A/out.tsv b/test/accuracy-on-sorted/accuracy-on-sorted-solution/test-A/out.tsv new file mode 100644 index 0000000..b9c8997 --- /dev/null +++ b/test/accuracy-on-sorted/accuracy-on-sorted-solution/test-A/out.tsv @@ -0,0 +1,4 @@ +foo baz bar + +xyz aaa +2 a:1 3 diff --git a/test/accuracy-on-sorted/accuracy-on-sorted/config.txt b/test/accuracy-on-sorted/accuracy-on-sorted/config.txt new file mode 100644 index 0000000..0de8e69 --- /dev/null +++ b/test/accuracy-on-sorted/accuracy-on-sorted/config.txt @@ -0,0 +1 @@ +--metric Accuracy:S diff --git a/test/accuracy-on-sorted/accuracy-on-sorted/test-A/expected.tsv b/test/accuracy-on-sorted/accuracy-on-sorted/test-A/expected.tsv new file mode 100644 index 0000000..7ec7ae4 --- /dev/null +++ b/test/accuracy-on-sorted/accuracy-on-sorted/test-A/expected.tsv @@ -0,0 +1,4 @@ +bar baz foo + +xyz +a:1 2 3 diff --git a/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple-solution/test-A/out.tsv b/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple-solution/test-A/out.tsv new file mode 100644 index 0000000..6a8bd3a --- /dev/null +++ b/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple-solution/test-A/out.tsv @@ -0,0 +1,4 @@ +foo bar baz +uuu +foo bar baz +qqq aaa diff --git a/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/config.txt b/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/config.txt new file mode 100644 index 0000000..885d505 --- /dev/null +++ b/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/config.txt @@ -0,0 +1 @@ +--metric Mean/MultiLabel-F1 diff --git a/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/test-A/expected.tsv b/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/test-A/expected.tsv new file mode 100644 index 0000000..64612c3 --- /dev/null +++ b/test/mean-multilabel-f1-simple/mean-multilabel-f1-simple/test-A/expected.tsv @@ -0,0 +1,4 @@ +foo bar baz + +foo +qqq qqq