From 32290d371559f4f02a3bbdc6bbc40edc7913f879 Mon Sep 17 00:00:00 2001 From: welp Date: Mon, 13 Jul 2020 16:20:36 +0200 Subject: [PATCH] pure percantage format feature commit --- src/GEval/Common.hs | 5 ++++ src/GEval/Core.hs | 49 ++++++++++++++++------------------ src/GEval/CreateChallenge.hs | 51 ++++++++++++++++++------------------ src/GEval/Formatting.hs | 34 ++++++++++++++---------- src/GEval/OptionsParser.hs | 15 ++++++----- test/Spec.hs | 14 ++-------- 6 files changed, 85 insertions(+), 83 deletions(-) diff --git a/src/GEval/Common.hs b/src/GEval/Common.hs index 4a9cc4e..c8b581d 100644 --- a/src/GEval/Common.hs +++ b/src/GEval/Common.hs @@ -15,6 +15,11 @@ type MetricValue = Double data GraphSeries = GraphSeries [(Double, Double)] +data FormattingOptions = FormattingOptions { + decimalPlaces :: Maybe Int, + asPercentage :: Bool + } + data MetricResult = SimpleRun MetricValue | BootstrapResampling [MetricValue] instance Show MetricResult where diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 01b0a82..ebe7b8d 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -6,6 +6,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE PackageImports #-} + module GEval.Core ( geval, gevalCore, @@ -112,7 +113,6 @@ import GEval.Annotation import GEval.BlackBoxDebugging import Data.Conduit.Bootstrap import GEval.DataSource -import GEval.MatchingSpecification import qualified Data.HashMap.Strict as M import qualified Data.Vector as V @@ -182,7 +182,7 @@ data GEvalSpecification = GEvalSpecification gesExpectedFile :: String, gesInputFile :: String, gesMetrics :: [EvaluationScheme], - gesPrecision :: Maybe Int, + gesFormatting :: FormattingOptions, gesTokenizer :: Maybe Tokenizer, gesGonitoHost :: Maybe String, gesToken :: Maybe String, @@ -253,7 +253,7 @@ defaultGEvalSpecification = GEvalSpecification { gesExpectedFile = defaultExpectedFile, gesInputFile = defaultInputFile, gesMetrics = [EvaluationScheme defaultMetric []], - gesPrecision = Nothing, + gesFormatting = FormattingOptions Nothing False, gesTokenizer = Nothing, gesGonitoHost = Nothing, gesToken = Nothing, @@ -522,7 +522,7 @@ singleLineAsLineSource (LineInFile sourceSpec lineNo line) itemDecoder preproces -- some metrics are handled by Bootstrap due to legacy issues, -- fix on the way handleBootstrap :: Metric -> Bool -handleBootstrap (Mean (MultiLabelFMeasure _ _)) = True +handleBootstrap (Mean (MultiLabelFMeasure _)) = True handleBootstrap (Mean _) = False handleBootstrap CharMatch = False handleBootstrap (LogLossHashed _) = False @@ -567,16 +567,13 @@ gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => -> m (MetricOutput) -- ^ metric values for the output against the expected output -- for the time being hardcoded -gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta matchingSpec)) lsSpec = do +gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta)) lsSpec = do gevalRunPipeline parserSpec (trans step) finalPipeline context where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser)) context = fromSpecificationToWithoutInput lsSpec - step = case toSing matchingSpec of - SomeSing s -> itemStep (SAMultiLabelFMeasure s) - expParser = case toSing matchingSpec of - SomeSing s -> expectedParser (SAMultiLabelFMeasure s) - outParser = case toSing matchingSpec of - SomeSing s -> outputParser (SAMultiLabelFMeasure s) + step = itemStep SAMultiLabelFMeasure + expParser = expectedParser SAMultiLabelFMeasure + outParser = outputParser SAMultiLabelFMeasure finalPipeline = fixer ( CL.map (fMeasureOnCounts beta) .| (bootstrapC numberOfSamples @@ -633,10 +630,10 @@ gevalCoreOnSources (LogLossHashed nbOfBits) = helperLogLossHashed nbOfBits id gevalCoreOnSources (LikelihoodHashed nbOfBits) = helperLogLossHashed nbOfBits logLossToLikehood -gevalCoreOnSources (Mean (MultiLabelFMeasure beta matchingSpec)) +gevalCoreOnSources (Mean (MultiLabelFMeasure beta)) = gevalCoreWithoutInputOnItemTargets (Right . intoWords) (Right . getWords) - ((fMeasureOnCounts beta) . (getWeightedCounts (getMatchingFunctionForString matchingSpec))) + ((fMeasureOnCounts beta) . (getCounts (==))) averageC id noGraph @@ -664,13 +661,12 @@ gevalCoreOnSources (Mean WER) gevalCoreOnSources (Mean _) = error $ "Mean/ meta-metric defined only for MultiLabel-F1 and WER for the time being" -- only MultiLabel-F1 handled for JSONs for the time being... -gevalCoreOnSources (MultiLabelFMeasure beta matchingSpec) = - gevalCoreWithoutInputOnItemTargets (Right . intoWords) - (Right . getWords) - (getWeightedCounts (getMatchingFunctionForString matchingSpec)) - countAgg - (fMeasureOnCounts beta) - noGraph +gevalCoreOnSources (MultiLabelFMeasure beta) = gevalCoreWithoutInputOnItemTargets (Right . intoWords) + (Right . getWords) + (getCounts (==)) + countAgg + (fMeasureOnCounts beta) + noGraph where getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts @@ -748,9 +744,9 @@ countFragAgg :: (Num n, Num v, Monad m) => ConduitM (n, n, v, v) o m (n, n, v, v countFragAgg = CC.foldl countFragFolder (fromInteger 0, fromInteger 0, fromInteger 0, fromInteger 0) gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => - (V.Vector (Double, Double) -> Double) -> -- ^ correlation function - LineSourcesSpecification (ResourceT m) -> - m (MetricOutput) -- ^ metric values for the output against the expected output + (V.Vector (Double, Double) -> Double) -- ^ correlation function + -> LineSourcesSpecification (ResourceT m) + -> m (MetricOutput) -- ^ metric values for the output against the expected output gevalCoreByCorrelationMeasure correlationFunction = gevalCoreWithoutInput SAPearson correlationC finalStep noGraph where correlationC = CC.foldl (flip (:)) [] @@ -850,13 +846,14 @@ gevalRunPipeline' parserSpec itemStep finalPipeline context = do <$> ZipSource (CL.sourceList [(getFirstLineNo (Proxy :: Proxy m) context)..]) <*> (ZipSource $ recordSource context parserSpec)) .| CL.map (checkStep (Proxy :: Proxy m) itemStep)) .| CL.catMaybes .| finalPipeline) -continueGEvalCalculations :: forall m t . (MonadIO m) => + + +continueGEvalCalculations :: (MonadIO m) => SAMetric t -> Metric -> ConduitT (ItemIntermediateRepresentationType t) Void (ResourceT m) MetricOutput -continueGEvalCalculations (SAMultiLabelFMeasure matchingSpec) (MultiLabelFMeasure beta matchingSpec') - = defineContinuation countAgg (fMeasureOnCounts beta) noGraph +continueGEvalCalculations SAMultiLabelFMeasure (MultiLabelFMeasure beta) = defineContinuation countAgg (fMeasureOnCounts beta) noGraph continueGEvalCalculations SALikelihood Likelihood = defineContinuation averageC logLossToLikehood noGraph diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index 69e1150..6649e80 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -7,10 +7,9 @@ module GEval.CreateChallenge import GEval.Metric import GEval.EvaluationScheme -import GEval.Common (GEvalException(..)) +import GEval.Common (GEvalException(..), FormattingOptions(..)) import GEval.Core (GEvalSpecification(..), configFileName, gesMainMetric, defaultTestName) import GEval.Submit (tokenFileName) -import GEval.MatchingSpecification (MatchingSpecification(ExactMatch)) import qualified System.Directory as D import Control.Conditional (whenM) @@ -22,6 +21,9 @@ import Data.String.Here import Data.List (intercalate) import Data.List.Split (splitOn) +import Data.Bool + +import Text.Printf createChallenge :: Bool -> FilePath -> GEvalSpecification -> IO () createChallenge withDataFiles expectedDirectory spec = do @@ -31,7 +33,7 @@ createChallenge withDataFiles expectedDirectory spec = do D.createDirectoryIfMissing False testDirectory createFile (expectedDirectory ".gitignore") $ gitignoreContents createFile (expectedDirectory "README.md") $ readmeMDContents metric testName - createFile (expectedDirectory configFileName) $ configContents metrics precision testName + createFile (expectedDirectory configFileName) $ configContents metrics format testName createHeaderFile expectedDirectory "in-header.tsv" $ inHeaderContents metric createHeaderFile expectedDirectory "out-header.tsv" $ outHeaderContents metric if withDataFiles @@ -49,7 +51,7 @@ createChallenge withDataFiles expectedDirectory spec = do return () where metric = gesMainMetric spec metrics = gesMetrics spec - precision = gesPrecision spec + format = gesFormatting spec testName = gesTestName spec trainDirectory = expectedDirectory "train" devDirectory = expectedDirectory "dev-0" @@ -332,8 +334,8 @@ character (inclusively). |] ++ (commonReadmeMDContents testName) -readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta ExactMatch) testName -readmeMDContents (MultiLabelFMeasure beta _) testName = [i| +readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta) testName +readmeMDContents (MultiLabelFMeasure beta) testName = [i| Tag names and their component ============================= @@ -423,18 +425,17 @@ Directory structure |] -configContents :: [EvaluationScheme] -> Maybe Int -> String -> String -configContents schemes precision testName = unwords (Prelude.map (\scheme -> ("--metric " ++ (show scheme))) schemes) ++ +configContents :: [EvaluationScheme] -> FormattingOptions -> String -> String +configContents schemes format testName = unwords (Prelude.map (\scheme -> ("--metric " ++ (show scheme))) schemes) ++ (if testName /= defaultTestName then " --test-name " ++ testName else "") ++ - (precisionOpt precision) ++ + (precisionOpt format) ++ inHeaderOpts ++ outHeaderOpts - where precisionOpt Nothing = "" - precisionOpt (Just p) = " --precision " ++ (show p) + where precisionOpt (FormattingOptions m b) = maybe "" (printf "--precision %d ") m ++ bool "" "--show-as-percentage" b ((EvaluationScheme mainMetric _):_) = schemes inHeaderOpts = getHeaderOpts "in-header" inHeaderContents outHeaderOpts = getHeaderOpts "out-header" outHeaderContents @@ -534,8 +535,8 @@ trainContents TokenAccuracy = [hereLit|* V N I like cats trainContents SegmentAccuracy = [hereLit|Art:1-3 N:5-11 V:12-13 A:15-19 The student's smart N:1-6 N:8-10 V:12-13 A:15-18 Mary's dog is nice |] -trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta ExactMatch) -trainContents (MultiLabelFMeasure _ _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5 +trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta) +trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5 Steven bloody Brown person/1,3 first-name/1 surname/3 James and James first-name/1 firstname/3 |] @@ -607,8 +608,8 @@ Ala has a cat devInContents SegmentAccuracy = [hereLit|John is smart Mary's intelligent |] -devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta ExactMatch) -devInContents (MultiLabelFMeasure _ _) = [hereLit|Jan Kowalski is here +devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta) +devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here I see him Barbara |] @@ -675,8 +676,8 @@ N V * N devExpectedContents SegmentAccuracy = [hereLit|N:1-4 V:6-7 A:9-13 N:1-4 V:6-7 A:9-19 |] -devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta ExactMatch) -devExpectedContents (MultiLabelFMeasure _ _) = [hereLit|person/1,2 first-name/1 surname/2 +devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta) +devExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,2 first-name/1 surname/2 first-name/1 |] @@ -748,8 +749,8 @@ I know testInContents SegmentAccuracy = [hereLit|Mary's cat is old John is young |] -testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta ExactMatch) -testInContents (MultiLabelFMeasure _ _) = [hereLit|John bloody Smith +testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta) +testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith Nobody is there I saw Marketa |] @@ -817,8 +818,8 @@ testExpectedContents TokenAccuracy = [hereLit|* V N testExpectedContents SegmentAccuracy = [hereLit|N:1-6 N:8-10 V:12-13 A:15-17 N:1-4 V:6-7 A:9-13 |] -testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta ExactMatch) -testExpectedContents (MultiLabelFMeasure _ _) = [hereLit|person/1,3 first-name/1 surname/3 +testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta) +testExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,3 first-name/1 surname/3 first-name/3 |] @@ -876,8 +877,8 @@ inHeaderContents BIOF1Labels = inHeaderContents BIOF1 inHeaderContents BIOF1 = Just ["Text"] inHeaderContents TokenAccuracy = Just ["TokenizedText"] inHeaderContents SegmentAccuracy = Just ["Segment"] -inHeaderContents (ProbabilisticMultiLabelFMeasure beta) = inHeaderContents (MultiLabelFMeasure beta ExactMatch) -inHeaderContents (MultiLabelFMeasure _ _) = Just ["Text"] +inHeaderContents (ProbabilisticMultiLabelFMeasure beta) = inHeaderContents (MultiLabelFMeasure beta) +inHeaderContents (MultiLabelFMeasure _) = Just ["Text"] inHeaderContents MultiLabelLikelihood = inHeaderContents MultiLabelLogLoss inHeaderContents MultiLabelLogLoss = Just ["Utterance"] inHeaderContents (Soft2DFMeasure _) = inHeaderContents ClippEU @@ -904,8 +905,8 @@ outHeaderContents BIOF1Labels = outHeaderContents BIOF1 outHeaderContents BIOF1 = Just ["BIOOutput"] outHeaderContents TokenAccuracy = Just ["PartsOfSpeech"] outHeaderContents SegmentAccuracy = Just ["PartsOfSpeech"] -outHeaderContents (ProbabilisticMultiLabelFMeasure beta) = outHeaderContents (MultiLabelFMeasure beta ExactMatch) -outHeaderContents (MultiLabelFMeasure _ _) = Just ["Entities"] +outHeaderContents (ProbabilisticMultiLabelFMeasure beta) = outHeaderContents (MultiLabelFMeasure beta) +outHeaderContents (MultiLabelFMeasure _) = Just ["Entities"] outHeaderContents MultiLabelLikelihood = outHeaderContents MultiLabelLogLoss outHeaderContents MultiLabelLogLoss = Just ["Emotion"] outHeaderContents (Soft2DFMeasure _) = Just ["Rectangle"] diff --git a/src/GEval/Formatting.hs b/src/GEval/Formatting.hs index 149c1b9..39a9799 100644 --- a/src/GEval/Formatting.hs +++ b/src/GEval/Formatting.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE LambdaCase #-} + module GEval.Formatting (formatTheResult, formatSimpleResult, formatTheResultWithErrorBounds) where @@ -7,25 +9,29 @@ import Data.Conduit.Bootstrap import Text.Printf -formatTheResult :: Maybe Int -> MetricResult -> String -formatTheResult mPrecision (SimpleRun val) = formatSimpleResult mPrecision val -formatTheResult mPrecision (BootstrapResampling vals) = formatTheResultWithErrorBounds mPrecision pointEstimate (Just errorBound) +formatTheResult :: FormattingOptions -> MetricResult -> String +formatTheResult format (SimpleRun val) = formatSimpleResult format val +formatTheResult format (BootstrapResampling vals) = formatTheResultWithErrorBounds format pointEstimate (Just errorBound) where pointEstimate = (upperBound + lowerBound) / 2.0 errorBound = (upperBound - lowerBound) / 2.0 (lowerBound, upperBound) = getConfidenceBounds defaultConfidenceLevel vals -formatTheResultWithErrorBounds :: Maybe Int -> MetricValue -> Maybe MetricValue -> String -formatTheResultWithErrorBounds mPrecision pointEstimate Nothing = formatSimpleResult mPrecision pointEstimate -formatTheResultWithErrorBounds mPrecision pointEstimate (Just errorBound) = (formatSimpleResult correctedPrecision pointEstimate) +formatTheResultWithErrorBounds :: FormattingOptions -> MetricValue -> Maybe MetricValue -> String +formatTheResultWithErrorBounds format pointEstimate Nothing = formatSimpleResult format pointEstimate +formatTheResultWithErrorBounds format pointEstimate (Just errorBound) = (formatSimpleResult formatWithCorrectedPrecision pointEstimate) ++ "±" - ++ (formatSimpleResult correctedPrecision errorBound) + ++ (formatSimpleResult formatWithCorrectedPrecision errorBound) where errorBoundMagnitude = (floor (logBase 10.0 errorBound)) - 1 - correctedPrecision = Just $ selectLowerPrecision (max (-errorBoundMagnitude) 0) mPrecision + formatWithCorrectedPrecision = selectLowerPrecision (max (-errorBoundMagnitude) 0) format -formatSimpleResult :: Maybe Int -> MetricValue -> String -formatSimpleResult Nothing = show -formatSimpleResult (Just prec) = printf "%0.*f" prec +formatSimpleResult :: FormattingOptions -> MetricValue -> String +formatSimpleResult = \case + FormattingOptions (Just prec) True -> printf "%.*f" (prec-2) . (*100) + FormattingOptions (Just prec) _ -> printf "0.*f" prec + _ -> show -selectLowerPrecision :: Int -> Maybe Int -> Int -selectLowerPrecision p Nothing = p -selectLowerPrecision p (Just p') = min p p' +selectLowerPrecision :: Int -> FormattingOptions -> FormattingOptions +selectLowerPrecision p = \case + a@(FormattingOptions _ True) -> a + FormattingOptions (Just prec) _ -> FormattingOptions (Just $ min prec p) False + _ -> FormattingOptions (Just p) False diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index fdddad9..0290083 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -6,7 +6,7 @@ module GEval.OptionsParser runGEvalGetOptions, getOptions, metricReader, - precisionArgParser + formatParser ) where import Paths_geval (version) @@ -138,12 +138,15 @@ optionsParser = GEvalOptions <> help "Mark worst features when in the line-by-line mode") -precisionArgParser :: Parser Int -precisionArgParser = option auto - ( long "precision" +formatParser :: Parser FormattingOptions +formatParser = FormattingOptions + <$> (optional $ option auto ( long "precision" <> short 'p' <> metavar "NUMBER-OF-FRACTIONAL-DIGITS" - <> help "Arithmetic precision, i.e. the number of fractional digits to be shown" ) + <> help "Arithmetic precision, i.e. the number of fractional digits to be shown" )) + <*> switch ( long "show-as-percentage" + <> short '%' + <> help "Returns the result as a percentage (i.e. maximum value of 100 instead of 1)" ) specParser :: Parser GEvalSpecification specParser = GEvalSpecification @@ -191,7 +194,7 @@ specParser = GEvalSpecification <> metavar "INPUT" <> help "The name of the file with the input (applicable only for some metrics)" ) <*> ((flip fromMaybe) <$> (singletonMaybe <$> altMetricReader) <*> metricReader) - <*> optional precisionArgParser + <*> formatParser <*> (optional $ option auto ( long "tokenizer" <> short 'T' diff --git a/test/Spec.hs b/test/Spec.hs index 43b8a3d..d208fc6 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -343,16 +343,6 @@ main = hspec $ do runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615 it "labels given with probs and numbers" $ do runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666 - it "information extraction" $ do - runGEvalTest "multilabel-f1-ie" `shouldReturnAlmost` 0.1111111111 - it "information extraction with flags" $ do - runGEvalTest "multilabel-f1-ie-flags" `shouldReturnAlmost` 0.444444444444 - it "information extraction with fuzzy matching" $ do - runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.681777777777 - it "information extraction with smart fuzzy matching" $ do - runGEvalTest "multilabel-f1-ie-fuzzy-smart" `shouldReturnAlmost` 0.598444 - it "information extraction with smart fuzzy matching hardened" $ do - runGEvalTest "multilabel-f1-ie-fuzzy-harden" `shouldReturnAlmost` 0.555555555 describe "Mean/MultiLabel-F" $ do it "simple" $ do runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5 @@ -478,7 +468,7 @@ main = hspec $ do gesExpectedFile = "expected.tsv", gesInputFile = "in.tsv", gesMetrics = [EvaluationScheme Likelihood []], - gesPrecision = Nothing, + gesFormatting = FormattingOptions Nothing False, gesTokenizer = Nothing, gesGonitoHost = Nothing, gesToken = Nothing, @@ -606,7 +596,7 @@ main = hspec $ do let spec = defaultGEvalSpecification { gesExpectedDirectory = Just tempDir, gesMetrics = [scheme], - gesPrecision = Just 4 } + gesFormatting = FormattingOptions (Just 4) False } createChallenge True tempDir spec validationChallenge tempDir spec describe "test sample outputs" $ do