From 32290d371559f4f02a3bbdc6bbc40edc7913f879 Mon Sep 17 00:00:00 2001 From: welp Date: Mon, 13 Jul 2020 16:20:36 +0200 Subject: [PATCH 1/7] pure percantage format feature commit --- src/GEval/Common.hs | 5 ++++ src/GEval/Core.hs | 49 ++++++++++++++++------------------ src/GEval/CreateChallenge.hs | 51 ++++++++++++++++++------------------ src/GEval/Formatting.hs | 34 ++++++++++++++---------- src/GEval/OptionsParser.hs | 15 ++++++----- test/Spec.hs | 14 ++-------- 6 files changed, 85 insertions(+), 83 deletions(-) diff --git a/src/GEval/Common.hs b/src/GEval/Common.hs index 4a9cc4e..c8b581d 100644 --- a/src/GEval/Common.hs +++ b/src/GEval/Common.hs @@ -15,6 +15,11 @@ type MetricValue = Double data GraphSeries = GraphSeries [(Double, Double)] +data FormattingOptions = FormattingOptions { + decimalPlaces :: Maybe Int, + asPercentage :: Bool + } + data MetricResult = SimpleRun MetricValue | BootstrapResampling [MetricValue] instance Show MetricResult where diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 01b0a82..ebe7b8d 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -6,6 +6,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE PackageImports #-} + module GEval.Core ( geval, gevalCore, @@ -112,7 +113,6 @@ import GEval.Annotation import GEval.BlackBoxDebugging import Data.Conduit.Bootstrap import GEval.DataSource -import GEval.MatchingSpecification import qualified Data.HashMap.Strict as M import qualified Data.Vector as V @@ -182,7 +182,7 @@ data GEvalSpecification = GEvalSpecification gesExpectedFile :: String, gesInputFile :: String, gesMetrics :: [EvaluationScheme], - gesPrecision :: Maybe Int, + gesFormatting :: FormattingOptions, gesTokenizer :: Maybe Tokenizer, gesGonitoHost :: Maybe String, gesToken :: Maybe String, @@ -253,7 +253,7 @@ defaultGEvalSpecification = GEvalSpecification { gesExpectedFile = defaultExpectedFile, gesInputFile = defaultInputFile, gesMetrics = [EvaluationScheme defaultMetric []], - gesPrecision = Nothing, + gesFormatting = FormattingOptions Nothing False, gesTokenizer = Nothing, gesGonitoHost = Nothing, gesToken = Nothing, @@ -522,7 +522,7 @@ singleLineAsLineSource (LineInFile sourceSpec lineNo line) itemDecoder preproces -- some metrics are handled by Bootstrap due to legacy issues, -- fix on the way handleBootstrap :: Metric -> Bool -handleBootstrap (Mean (MultiLabelFMeasure _ _)) = True +handleBootstrap (Mean (MultiLabelFMeasure _)) = True handleBootstrap (Mean _) = False handleBootstrap CharMatch = False handleBootstrap (LogLossHashed _) = False @@ -567,16 +567,13 @@ gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => -> m (MetricOutput) -- ^ metric values for the output against the expected output -- for the time being hardcoded -gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta matchingSpec)) lsSpec = do +gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta)) lsSpec = do gevalRunPipeline parserSpec (trans step) finalPipeline context where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser)) context = fromSpecificationToWithoutInput lsSpec - step = case toSing matchingSpec of - SomeSing s -> itemStep (SAMultiLabelFMeasure s) - expParser = case toSing matchingSpec of - SomeSing s -> expectedParser (SAMultiLabelFMeasure s) - outParser = case toSing matchingSpec of - SomeSing s -> outputParser (SAMultiLabelFMeasure s) + step = itemStep SAMultiLabelFMeasure + expParser = expectedParser SAMultiLabelFMeasure + outParser = outputParser SAMultiLabelFMeasure finalPipeline = fixer ( CL.map (fMeasureOnCounts beta) .| (bootstrapC numberOfSamples @@ -633,10 +630,10 @@ gevalCoreOnSources (LogLossHashed nbOfBits) = helperLogLossHashed nbOfBits id gevalCoreOnSources (LikelihoodHashed nbOfBits) = helperLogLossHashed nbOfBits logLossToLikehood -gevalCoreOnSources (Mean (MultiLabelFMeasure beta matchingSpec)) +gevalCoreOnSources (Mean (MultiLabelFMeasure beta)) = gevalCoreWithoutInputOnItemTargets (Right . intoWords) (Right . getWords) - ((fMeasureOnCounts beta) . (getWeightedCounts (getMatchingFunctionForString matchingSpec))) + ((fMeasureOnCounts beta) . (getCounts (==))) averageC id noGraph @@ -664,13 +661,12 @@ gevalCoreOnSources (Mean WER) gevalCoreOnSources (Mean _) = error $ "Mean/ meta-metric defined only for MultiLabel-F1 and WER for the time being" -- only MultiLabel-F1 handled for JSONs for the time being... -gevalCoreOnSources (MultiLabelFMeasure beta matchingSpec) = - gevalCoreWithoutInputOnItemTargets (Right . intoWords) - (Right . getWords) - (getWeightedCounts (getMatchingFunctionForString matchingSpec)) - countAgg - (fMeasureOnCounts beta) - noGraph +gevalCoreOnSources (MultiLabelFMeasure beta) = gevalCoreWithoutInputOnItemTargets (Right . intoWords) + (Right . getWords) + (getCounts (==)) + countAgg + (fMeasureOnCounts beta) + noGraph where getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts @@ -748,9 +744,9 @@ countFragAgg :: (Num n, Num v, Monad m) => ConduitM (n, n, v, v) o m (n, n, v, v countFragAgg = CC.foldl countFragFolder (fromInteger 0, fromInteger 0, fromInteger 0, fromInteger 0) gevalCoreByCorrelationMeasure :: (MonadUnliftIO m, MonadThrow m, MonadIO m) => - (V.Vector (Double, Double) -> Double) -> -- ^ correlation function - LineSourcesSpecification (ResourceT m) -> - m (MetricOutput) -- ^ metric values for the output against the expected output + (V.Vector (Double, Double) -> Double) -- ^ correlation function + -> LineSourcesSpecification (ResourceT m) + -> m (MetricOutput) -- ^ metric values for the output against the expected output gevalCoreByCorrelationMeasure correlationFunction = gevalCoreWithoutInput SAPearson correlationC finalStep noGraph where correlationC = CC.foldl (flip (:)) [] @@ -850,13 +846,14 @@ gevalRunPipeline' parserSpec itemStep finalPipeline context = do <$> ZipSource (CL.sourceList [(getFirstLineNo (Proxy :: Proxy m) context)..]) <*> (ZipSource $ recordSource context parserSpec)) .| CL.map (checkStep (Proxy :: Proxy m) itemStep)) .| CL.catMaybes .| finalPipeline) -continueGEvalCalculations :: forall m t . (MonadIO m) => + + +continueGEvalCalculations :: (MonadIO m) => SAMetric t -> Metric -> ConduitT (ItemIntermediateRepresentationType t) Void (ResourceT m) MetricOutput -continueGEvalCalculations (SAMultiLabelFMeasure matchingSpec) (MultiLabelFMeasure beta matchingSpec') - = defineContinuation countAgg (fMeasureOnCounts beta) noGraph +continueGEvalCalculations SAMultiLabelFMeasure (MultiLabelFMeasure beta) = defineContinuation countAgg (fMeasureOnCounts beta) noGraph continueGEvalCalculations SALikelihood Likelihood = defineContinuation averageC logLossToLikehood noGraph diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index 69e1150..6649e80 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -7,10 +7,9 @@ module GEval.CreateChallenge import GEval.Metric import GEval.EvaluationScheme -import GEval.Common (GEvalException(..)) +import GEval.Common (GEvalException(..), FormattingOptions(..)) import GEval.Core (GEvalSpecification(..), configFileName, gesMainMetric, defaultTestName) import GEval.Submit (tokenFileName) -import GEval.MatchingSpecification (MatchingSpecification(ExactMatch)) import qualified System.Directory as D import Control.Conditional (whenM) @@ -22,6 +21,9 @@ import Data.String.Here import Data.List (intercalate) import Data.List.Split (splitOn) +import Data.Bool + +import Text.Printf createChallenge :: Bool -> FilePath -> GEvalSpecification -> IO () createChallenge withDataFiles expectedDirectory spec = do @@ -31,7 +33,7 @@ createChallenge withDataFiles expectedDirectory spec = do D.createDirectoryIfMissing False testDirectory createFile (expectedDirectory ".gitignore") $ gitignoreContents createFile (expectedDirectory "README.md") $ readmeMDContents metric testName - createFile (expectedDirectory configFileName) $ configContents metrics precision testName + createFile (expectedDirectory configFileName) $ configContents metrics format testName createHeaderFile expectedDirectory "in-header.tsv" $ inHeaderContents metric createHeaderFile expectedDirectory "out-header.tsv" $ outHeaderContents metric if withDataFiles @@ -49,7 +51,7 @@ createChallenge withDataFiles expectedDirectory spec = do return () where metric = gesMainMetric spec metrics = gesMetrics spec - precision = gesPrecision spec + format = gesFormatting spec testName = gesTestName spec trainDirectory = expectedDirectory "train" devDirectory = expectedDirectory "dev-0" @@ -332,8 +334,8 @@ character (inclusively). |] ++ (commonReadmeMDContents testName) -readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta ExactMatch) testName -readmeMDContents (MultiLabelFMeasure beta _) testName = [i| +readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta) testName +readmeMDContents (MultiLabelFMeasure beta) testName = [i| Tag names and their component ============================= @@ -423,18 +425,17 @@ Directory structure |] -configContents :: [EvaluationScheme] -> Maybe Int -> String -> String -configContents schemes precision testName = unwords (Prelude.map (\scheme -> ("--metric " ++ (show scheme))) schemes) ++ +configContents :: [EvaluationScheme] -> FormattingOptions -> String -> String +configContents schemes format testName = unwords (Prelude.map (\scheme -> ("--metric " ++ (show scheme))) schemes) ++ (if testName /= defaultTestName then " --test-name " ++ testName else "") ++ - (precisionOpt precision) ++ + (precisionOpt format) ++ inHeaderOpts ++ outHeaderOpts - where precisionOpt Nothing = "" - precisionOpt (Just p) = " --precision " ++ (show p) + where precisionOpt (FormattingOptions m b) = maybe "" (printf "--precision %d ") m ++ bool "" "--show-as-percentage" b ((EvaluationScheme mainMetric _):_) = schemes inHeaderOpts = getHeaderOpts "in-header" inHeaderContents outHeaderOpts = getHeaderOpts "out-header" outHeaderContents @@ -534,8 +535,8 @@ trainContents TokenAccuracy = [hereLit|* V N I like cats trainContents SegmentAccuracy = [hereLit|Art:1-3 N:5-11 V:12-13 A:15-19 The student's smart N:1-6 N:8-10 V:12-13 A:15-18 Mary's dog is nice |] -trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta ExactMatch) -trainContents (MultiLabelFMeasure _ _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5 +trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta) +trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5 Steven bloody Brown person/1,3 first-name/1 surname/3 James and James first-name/1 firstname/3 |] @@ -607,8 +608,8 @@ Ala has a cat devInContents SegmentAccuracy = [hereLit|John is smart Mary's intelligent |] -devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta ExactMatch) -devInContents (MultiLabelFMeasure _ _) = [hereLit|Jan Kowalski is here +devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta) +devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here I see him Barbara |] @@ -675,8 +676,8 @@ N V * N devExpectedContents SegmentAccuracy = [hereLit|N:1-4 V:6-7 A:9-13 N:1-4 V:6-7 A:9-19 |] -devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta ExactMatch) -devExpectedContents (MultiLabelFMeasure _ _) = [hereLit|person/1,2 first-name/1 surname/2 +devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta) +devExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,2 first-name/1 surname/2 first-name/1 |] @@ -748,8 +749,8 @@ I know testInContents SegmentAccuracy = [hereLit|Mary's cat is old John is young |] -testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta ExactMatch) -testInContents (MultiLabelFMeasure _ _) = [hereLit|John bloody Smith +testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta) +testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith Nobody is there I saw Marketa |] @@ -817,8 +818,8 @@ testExpectedContents TokenAccuracy = [hereLit|* V N testExpectedContents SegmentAccuracy = [hereLit|N:1-6 N:8-10 V:12-13 A:15-17 N:1-4 V:6-7 A:9-13 |] -testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta ExactMatch) -testExpectedContents (MultiLabelFMeasure _ _) = [hereLit|person/1,3 first-name/1 surname/3 +testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta) +testExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,3 first-name/1 surname/3 first-name/3 |] @@ -876,8 +877,8 @@ inHeaderContents BIOF1Labels = inHeaderContents BIOF1 inHeaderContents BIOF1 = Just ["Text"] inHeaderContents TokenAccuracy = Just ["TokenizedText"] inHeaderContents SegmentAccuracy = Just ["Segment"] -inHeaderContents (ProbabilisticMultiLabelFMeasure beta) = inHeaderContents (MultiLabelFMeasure beta ExactMatch) -inHeaderContents (MultiLabelFMeasure _ _) = Just ["Text"] +inHeaderContents (ProbabilisticMultiLabelFMeasure beta) = inHeaderContents (MultiLabelFMeasure beta) +inHeaderContents (MultiLabelFMeasure _) = Just ["Text"] inHeaderContents MultiLabelLikelihood = inHeaderContents MultiLabelLogLoss inHeaderContents MultiLabelLogLoss = Just ["Utterance"] inHeaderContents (Soft2DFMeasure _) = inHeaderContents ClippEU @@ -904,8 +905,8 @@ outHeaderContents BIOF1Labels = outHeaderContents BIOF1 outHeaderContents BIOF1 = Just ["BIOOutput"] outHeaderContents TokenAccuracy = Just ["PartsOfSpeech"] outHeaderContents SegmentAccuracy = Just ["PartsOfSpeech"] -outHeaderContents (ProbabilisticMultiLabelFMeasure beta) = outHeaderContents (MultiLabelFMeasure beta ExactMatch) -outHeaderContents (MultiLabelFMeasure _ _) = Just ["Entities"] +outHeaderContents (ProbabilisticMultiLabelFMeasure beta) = outHeaderContents (MultiLabelFMeasure beta) +outHeaderContents (MultiLabelFMeasure _) = Just ["Entities"] outHeaderContents MultiLabelLikelihood = outHeaderContents MultiLabelLogLoss outHeaderContents MultiLabelLogLoss = Just ["Emotion"] outHeaderContents (Soft2DFMeasure _) = Just ["Rectangle"] diff --git a/src/GEval/Formatting.hs b/src/GEval/Formatting.hs index 149c1b9..39a9799 100644 --- a/src/GEval/Formatting.hs +++ b/src/GEval/Formatting.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE LambdaCase #-} + module GEval.Formatting (formatTheResult, formatSimpleResult, formatTheResultWithErrorBounds) where @@ -7,25 +9,29 @@ import Data.Conduit.Bootstrap import Text.Printf -formatTheResult :: Maybe Int -> MetricResult -> String -formatTheResult mPrecision (SimpleRun val) = formatSimpleResult mPrecision val -formatTheResult mPrecision (BootstrapResampling vals) = formatTheResultWithErrorBounds mPrecision pointEstimate (Just errorBound) +formatTheResult :: FormattingOptions -> MetricResult -> String +formatTheResult format (SimpleRun val) = formatSimpleResult format val +formatTheResult format (BootstrapResampling vals) = formatTheResultWithErrorBounds format pointEstimate (Just errorBound) where pointEstimate = (upperBound + lowerBound) / 2.0 errorBound = (upperBound - lowerBound) / 2.0 (lowerBound, upperBound) = getConfidenceBounds defaultConfidenceLevel vals -formatTheResultWithErrorBounds :: Maybe Int -> MetricValue -> Maybe MetricValue -> String -formatTheResultWithErrorBounds mPrecision pointEstimate Nothing = formatSimpleResult mPrecision pointEstimate -formatTheResultWithErrorBounds mPrecision pointEstimate (Just errorBound) = (formatSimpleResult correctedPrecision pointEstimate) +formatTheResultWithErrorBounds :: FormattingOptions -> MetricValue -> Maybe MetricValue -> String +formatTheResultWithErrorBounds format pointEstimate Nothing = formatSimpleResult format pointEstimate +formatTheResultWithErrorBounds format pointEstimate (Just errorBound) = (formatSimpleResult formatWithCorrectedPrecision pointEstimate) ++ "±" - ++ (formatSimpleResult correctedPrecision errorBound) + ++ (formatSimpleResult formatWithCorrectedPrecision errorBound) where errorBoundMagnitude = (floor (logBase 10.0 errorBound)) - 1 - correctedPrecision = Just $ selectLowerPrecision (max (-errorBoundMagnitude) 0) mPrecision + formatWithCorrectedPrecision = selectLowerPrecision (max (-errorBoundMagnitude) 0) format -formatSimpleResult :: Maybe Int -> MetricValue -> String -formatSimpleResult Nothing = show -formatSimpleResult (Just prec) = printf "%0.*f" prec +formatSimpleResult :: FormattingOptions -> MetricValue -> String +formatSimpleResult = \case + FormattingOptions (Just prec) True -> printf "%.*f" (prec-2) . (*100) + FormattingOptions (Just prec) _ -> printf "0.*f" prec + _ -> show -selectLowerPrecision :: Int -> Maybe Int -> Int -selectLowerPrecision p Nothing = p -selectLowerPrecision p (Just p') = min p p' +selectLowerPrecision :: Int -> FormattingOptions -> FormattingOptions +selectLowerPrecision p = \case + a@(FormattingOptions _ True) -> a + FormattingOptions (Just prec) _ -> FormattingOptions (Just $ min prec p) False + _ -> FormattingOptions (Just p) False diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index fdddad9..0290083 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -6,7 +6,7 @@ module GEval.OptionsParser runGEvalGetOptions, getOptions, metricReader, - precisionArgParser + formatParser ) where import Paths_geval (version) @@ -138,12 +138,15 @@ optionsParser = GEvalOptions <> help "Mark worst features when in the line-by-line mode") -precisionArgParser :: Parser Int -precisionArgParser = option auto - ( long "precision" +formatParser :: Parser FormattingOptions +formatParser = FormattingOptions + <$> (optional $ option auto ( long "precision" <> short 'p' <> metavar "NUMBER-OF-FRACTIONAL-DIGITS" - <> help "Arithmetic precision, i.e. the number of fractional digits to be shown" ) + <> help "Arithmetic precision, i.e. the number of fractional digits to be shown" )) + <*> switch ( long "show-as-percentage" + <> short '%' + <> help "Returns the result as a percentage (i.e. maximum value of 100 instead of 1)" ) specParser :: Parser GEvalSpecification specParser = GEvalSpecification @@ -191,7 +194,7 @@ specParser = GEvalSpecification <> metavar "INPUT" <> help "The name of the file with the input (applicable only for some metrics)" ) <*> ((flip fromMaybe) <$> (singletonMaybe <$> altMetricReader) <*> metricReader) - <*> optional precisionArgParser + <*> formatParser <*> (optional $ option auto ( long "tokenizer" <> short 'T' diff --git a/test/Spec.hs b/test/Spec.hs index 43b8a3d..d208fc6 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -343,16 +343,6 @@ main = hspec $ do runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615 it "labels given with probs and numbers" $ do runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666 - it "information extraction" $ do - runGEvalTest "multilabel-f1-ie" `shouldReturnAlmost` 0.1111111111 - it "information extraction with flags" $ do - runGEvalTest "multilabel-f1-ie-flags" `shouldReturnAlmost` 0.444444444444 - it "information extraction with fuzzy matching" $ do - runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.681777777777 - it "information extraction with smart fuzzy matching" $ do - runGEvalTest "multilabel-f1-ie-fuzzy-smart" `shouldReturnAlmost` 0.598444 - it "information extraction with smart fuzzy matching hardened" $ do - runGEvalTest "multilabel-f1-ie-fuzzy-harden" `shouldReturnAlmost` 0.555555555 describe "Mean/MultiLabel-F" $ do it "simple" $ do runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5 @@ -478,7 +468,7 @@ main = hspec $ do gesExpectedFile = "expected.tsv", gesInputFile = "in.tsv", gesMetrics = [EvaluationScheme Likelihood []], - gesPrecision = Nothing, + gesFormatting = FormattingOptions Nothing False, gesTokenizer = Nothing, gesGonitoHost = Nothing, gesToken = Nothing, @@ -606,7 +596,7 @@ main = hspec $ do let spec = defaultGEvalSpecification { gesExpectedDirectory = Just tempDir, gesMetrics = [scheme], - gesPrecision = Just 4 } + gesFormatting = FormattingOptions (Just 4) False } createChallenge True tempDir spec validationChallenge tempDir spec describe "test sample outputs" $ do From 1010902baa8704f9d62f4f0c8f2c88f5dca9b07e Mon Sep 17 00:00:00 2001 From: welp Date: Mon, 13 Jul 2020 16:25:20 +0200 Subject: [PATCH 2/7] Main changes relating percentage feature --- app/Main.hs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/app/Main.hs b/app/Main.hs index 337da7f..b7337eb 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -70,7 +70,7 @@ getHeader params schemes = Just $ intercalate "\t" (Prelude.map T.unpack params formatTableEntry :: GEvalOptions -> [T.Text] -> ((SourceSpec, [MetricResult]), OutputFileParsed) -> String formatTableEntry opts paramNames ((sourceSpec, metrics), ofParsed) = intercalate "\t" ((initialColumns paramNames sourceSpec ofParsed) ++ vals) - where vals = Prelude.map (formatTheResult (gesPrecision $ geoSpec opts)) metrics + where vals = Prelude.map (formatTheResult (gesFormatting $ geoSpec opts)) metrics initialColumns :: [T.Text] -> SourceSpec -> OutputFileParsed -> [String] initialColumns [] sourceSpec ofParsed = [formatSourceSpec sourceSpec] @@ -79,35 +79,35 @@ initialColumns params sourceSpec (OutputFileParsed _ paramMap) = showTheResult' :: GEvalOptions -> [MetricResult] -> IO () -- do not show the metric if just one was given -showTheResult' opts [val] = putStrLn $ formatTheResult (gesPrecision $ geoSpec opts) val +showTheResult' opts [val] = putStrLn $ formatTheResult (gesFormatting $ geoSpec opts) val showTheResult' opts [] = do hPutStrLn stderr "no metric given, use --metric option" exitFailure showTheResult' opts vals = mapM_ putStrLn $ intercalate [""] - $ Prelude.map (formatCrossTable (gesPrecision $ geoSpec opts)) + $ Prelude.map (formatCrossTable (gesFormatting $ geoSpec opts)) $ splitIntoTablesWithValues (T.pack "metric") (T.pack "value") mapping metricLabels where mapping = LM.fromList $ zip metricLabels vals metricLabels = Prelude.map T.pack $ Prelude.map evaluationSchemeName $ gesMetrics $ geoSpec opts -formatCrossTable :: Maybe Int -> TableWithValues MetricResult -> [String] -formatCrossTable mPrecision (TableWithValues [_, _] body) = +formatCrossTable :: FormattingOptions -> TableWithValues MetricResult -> [String] +formatCrossTable format (TableWithValues [_, _] body) = -- actually we won't print metric/value header -- (1) to keep backward-compatible with the previous version -- (2) to be concise - Prelude.map (formatCrossTableLine mPrecision) body -formatCrossTable mPrecision (TableWithValues header body) = - (intercalate "\t" $ Prelude.map T.unpack header) : Prelude.map (formatCrossTableLine mPrecision) body + Prelude.map (formatCrossTableLine format) body +formatCrossTable format (TableWithValues header body) = + (intercalate "\t" $ Prelude.map T.unpack header) : Prelude.map (formatCrossTableLine format) body -formatCrossTableLine :: Maybe Int -> (T.Text, [MetricResult]) -> String -formatCrossTableLine mPrecision (rowName, values) = - intercalate "\t" ((T.unpack rowName):Prelude.map (formatTheResult mPrecision) values) +formatCrossTableLine :: FormattingOptions-> (T.Text, [MetricResult]) -> String +formatCrossTableLine format (rowName, values) = + intercalate "\t" ((T.unpack rowName):Prelude.map (formatTheResult format) values) formatSourceSpec :: SourceSpec -> String formatSourceSpec (FilePathSpec fp) = dropExtensions $ takeFileName fp formatSourceSpec spec = show spec -formatTheMetricAndResult :: Maybe Int -> (EvaluationScheme, MetricResult) -> String -formatTheMetricAndResult mPrecision (scheme, val) = (evaluationSchemeName scheme) ++ "\t" ++ (formatTheResult mPrecision val) +formatTheMetricAndResult :: FormattingOptions -> (EvaluationScheme, MetricResult) -> String +formatTheMetricAndResult format (scheme, val) = (evaluationSchemeName scheme) ++ "\t" ++ (formatTheResult format val) From d31d4320a9d92cde898a3bd1df3f484a39dc9fdb Mon Sep 17 00:00:00 2001 From: welp Date: Mon, 13 Jul 2020 17:15:01 +0200 Subject: [PATCH 3/7] incorporate changes --- src/GEval/Core.hs | 36 +++++++++++++++++++++--------------- src/GEval/CreateChallenge.hs | 33 +++++++++++++++++---------------- 2 files changed, 38 insertions(+), 31 deletions(-) diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index ebe7b8d..0fb4b16 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -113,6 +113,7 @@ import GEval.Annotation import GEval.BlackBoxDebugging import Data.Conduit.Bootstrap import GEval.DataSource +import GEval.MatchingSpecification import qualified Data.HashMap.Strict as M import qualified Data.Vector as V @@ -522,7 +523,7 @@ singleLineAsLineSource (LineInFile sourceSpec lineNo line) itemDecoder preproces -- some metrics are handled by Bootstrap due to legacy issues, -- fix on the way handleBootstrap :: Metric -> Bool -handleBootstrap (Mean (MultiLabelFMeasure _)) = True +handleBootstrap (Mean (MultiLabelFMeasure _ _)) = True handleBootstrap (Mean _) = False handleBootstrap CharMatch = False handleBootstrap (LogLossHashed _) = False @@ -567,13 +568,16 @@ gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => -> m (MetricOutput) -- ^ metric values for the output against the expected output -- for the time being hardcoded -gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta)) lsSpec = do +gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta matchingSpec)) lsSpec = do gevalRunPipeline parserSpec (trans step) finalPipeline context where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser)) context = fromSpecificationToWithoutInput lsSpec - step = itemStep SAMultiLabelFMeasure - expParser = expectedParser SAMultiLabelFMeasure - outParser = outputParser SAMultiLabelFMeasure + step = case toSing matchingSpec of + SomeSing s -> itemStep (SAMultiLabelFMeasure s) + expParser = case toSing matchingSpec of + SomeSing s -> expectedParser (SAMultiLabelFMeasure s) + outParser = case toSing matchingSpec of + SomeSing s -> outputParser (SAMultiLabelFMeasure s) finalPipeline = fixer ( CL.map (fMeasureOnCounts beta) .| (bootstrapC numberOfSamples @@ -630,10 +634,10 @@ gevalCoreOnSources (LogLossHashed nbOfBits) = helperLogLossHashed nbOfBits id gevalCoreOnSources (LikelihoodHashed nbOfBits) = helperLogLossHashed nbOfBits logLossToLikehood -gevalCoreOnSources (Mean (MultiLabelFMeasure beta)) +gevalCoreOnSources (Mean (MultiLabelFMeasure beta matchingSpec)) = gevalCoreWithoutInputOnItemTargets (Right . intoWords) (Right . getWords) - ((fMeasureOnCounts beta) . (getCounts (==))) + ((fMeasureOnCounts beta) . (getWeightedCounts (getMatchingFunctionForString matchingSpec))) averageC id noGraph @@ -661,12 +665,13 @@ gevalCoreOnSources (Mean WER) gevalCoreOnSources (Mean _) = error $ "Mean/ meta-metric defined only for MultiLabel-F1 and WER for the time being" -- only MultiLabel-F1 handled for JSONs for the time being... -gevalCoreOnSources (MultiLabelFMeasure beta) = gevalCoreWithoutInputOnItemTargets (Right . intoWords) - (Right . getWords) - (getCounts (==)) - countAgg - (fMeasureOnCounts beta) - noGraph +gevalCoreOnSources (MultiLabelFMeasure beta matchingSpec) = + gevalCoreWithoutInputOnItemTargets (Right . intoWords) + (Right . getWords) + (getWeightedCounts (getMatchingFunctionForString matchingSpec)) + countAgg + (fMeasureOnCounts beta) + noGraph where getWords (RawItemTarget t) = Prelude.map unpack $ selectByStandardThreshold $ parseIntoProbList t getWords (PartiallyParsedItemTarget ts) = Prelude.map unpack ts @@ -848,12 +853,13 @@ gevalRunPipeline' parserSpec itemStep finalPipeline context = do -continueGEvalCalculations :: (MonadIO m) => +continueGEvalCalculations :: forall m t . (MonadIO m) => SAMetric t -> Metric -> ConduitT (ItemIntermediateRepresentationType t) Void (ResourceT m) MetricOutput -continueGEvalCalculations SAMultiLabelFMeasure (MultiLabelFMeasure beta) = defineContinuation countAgg (fMeasureOnCounts beta) noGraph +continueGEvalCalculations (SAMultiLabelFMeasure matchingSpec) (MultiLabelFMeasure beta matchingSpec') + = defineContinuation countAgg (fMeasureOnCounts beta) noGraph continueGEvalCalculations SALikelihood Likelihood = defineContinuation averageC logLossToLikehood noGraph diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index 6649e80..7fea41e 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -10,6 +10,7 @@ import GEval.EvaluationScheme import GEval.Common (GEvalException(..), FormattingOptions(..)) import GEval.Core (GEvalSpecification(..), configFileName, gesMainMetric, defaultTestName) import GEval.Submit (tokenFileName) +import GEval.MatchingSpecification (MatchingSpecification(ExactMatch)) import qualified System.Directory as D import Control.Conditional (whenM) @@ -334,8 +335,8 @@ character (inclusively). |] ++ (commonReadmeMDContents testName) -readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta) testName -readmeMDContents (MultiLabelFMeasure beta) testName = [i| +readmeMDContents (ProbabilisticMultiLabelFMeasure beta) testName = readmeMDContents (MultiLabelFMeasure beta ExactMatch) testName +readmeMDContents (MultiLabelFMeasure beta _) testName = [i| Tag names and their component ============================= @@ -535,8 +536,8 @@ trainContents TokenAccuracy = [hereLit|* V N I like cats trainContents SegmentAccuracy = [hereLit|Art:1-3 N:5-11 V:12-13 A:15-19 The student's smart N:1-6 N:8-10 V:12-13 A:15-18 Mary's dog is nice |] -trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta) -trainContents (MultiLabelFMeasure _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5 +trainContents (ProbabilisticMultiLabelFMeasure beta) = trainContents (MultiLabelFMeasure beta ExactMatch) +trainContents (MultiLabelFMeasure _ _) = [hereLit|I know Mr John Smith person/3,4,5 first-name/4 surname/5 Steven bloody Brown person/1,3 first-name/1 surname/3 James and James first-name/1 firstname/3 |] @@ -608,8 +609,8 @@ Ala has a cat devInContents SegmentAccuracy = [hereLit|John is smart Mary's intelligent |] -devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta) -devInContents (MultiLabelFMeasure _) = [hereLit|Jan Kowalski is here +devInContents (ProbabilisticMultiLabelFMeasure beta) = devInContents (MultiLabelFMeasure beta ExactMatch) +devInContents (MultiLabelFMeasure _ _) = [hereLit|Jan Kowalski is here I see him Barbara |] @@ -676,8 +677,8 @@ N V * N devExpectedContents SegmentAccuracy = [hereLit|N:1-4 V:6-7 A:9-13 N:1-4 V:6-7 A:9-19 |] -devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta) -devExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,2 first-name/1 surname/2 +devExpectedContents (ProbabilisticMultiLabelFMeasure beta) = devExpectedContents (MultiLabelFMeasure beta ExactMatch) +devExpectedContents (MultiLabelFMeasure _ _) = [hereLit|person/1,2 first-name/1 surname/2 first-name/1 |] @@ -749,8 +750,8 @@ I know testInContents SegmentAccuracy = [hereLit|Mary's cat is old John is young |] -testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta) -testInContents (MultiLabelFMeasure _) = [hereLit|John bloody Smith +testInContents (ProbabilisticMultiLabelFMeasure beta) = testInContents (MultiLabelFMeasure beta ExactMatch) +testInContents (MultiLabelFMeasure _ _) = [hereLit|John bloody Smith Nobody is there I saw Marketa |] @@ -818,8 +819,8 @@ testExpectedContents TokenAccuracy = [hereLit|* V N testExpectedContents SegmentAccuracy = [hereLit|N:1-6 N:8-10 V:12-13 A:15-17 N:1-4 V:6-7 A:9-13 |] -testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta) -testExpectedContents (MultiLabelFMeasure _) = [hereLit|person/1,3 first-name/1 surname/3 +testExpectedContents (ProbabilisticMultiLabelFMeasure beta) = testExpectedContents (MultiLabelFMeasure beta ExactMatch) +testExpectedContents (MultiLabelFMeasure _ _) = [hereLit|person/1,3 first-name/1 surname/3 first-name/3 |] @@ -877,8 +878,8 @@ inHeaderContents BIOF1Labels = inHeaderContents BIOF1 inHeaderContents BIOF1 = Just ["Text"] inHeaderContents TokenAccuracy = Just ["TokenizedText"] inHeaderContents SegmentAccuracy = Just ["Segment"] -inHeaderContents (ProbabilisticMultiLabelFMeasure beta) = inHeaderContents (MultiLabelFMeasure beta) -inHeaderContents (MultiLabelFMeasure _) = Just ["Text"] +inHeaderContents (ProbabilisticMultiLabelFMeasure beta) = inHeaderContents (MultiLabelFMeasure beta ExactMatch) +inHeaderContents (MultiLabelFMeasure _ _) = Just ["Text"] inHeaderContents MultiLabelLikelihood = inHeaderContents MultiLabelLogLoss inHeaderContents MultiLabelLogLoss = Just ["Utterance"] inHeaderContents (Soft2DFMeasure _) = inHeaderContents ClippEU @@ -905,8 +906,8 @@ outHeaderContents BIOF1Labels = outHeaderContents BIOF1 outHeaderContents BIOF1 = Just ["BIOOutput"] outHeaderContents TokenAccuracy = Just ["PartsOfSpeech"] outHeaderContents SegmentAccuracy = Just ["PartsOfSpeech"] -outHeaderContents (ProbabilisticMultiLabelFMeasure beta) = outHeaderContents (MultiLabelFMeasure beta) -outHeaderContents (MultiLabelFMeasure _) = Just ["Entities"] +outHeaderContents (ProbabilisticMultiLabelFMeasure beta) = outHeaderContents (MultiLabelFMeasure beta ExactMatch) +outHeaderContents (MultiLabelFMeasure _ _) = Just ["Entities"] outHeaderContents MultiLabelLikelihood = outHeaderContents MultiLabelLogLoss outHeaderContents MultiLabelLogLoss = Just ["Emotion"] outHeaderContents (Soft2DFMeasure _) = Just ["Rectangle"] From 0d613d004efdfdd71e1f263f699af1b0b20aae91 Mon Sep 17 00:00:00 2001 From: welp Date: Mon, 13 Jul 2020 17:45:10 +0200 Subject: [PATCH 4/7] incorporate test changes --- test/Spec.hs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/Spec.hs b/test/Spec.hs index d208fc6..9b30fca 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -343,6 +343,16 @@ main = hspec $ do runGEvalTest "multilabel-f1-with-probs" `shouldReturnAlmost` 0.615384615384615 it "labels given with probs and numbers" $ do runGEvalTest "multilabel-f1-with-probs-and-numbers" `shouldReturnAlmost` 0.6666666666666 + it "information extraction" $ do + runGEvalTest "multilabel-f1-ie" `shouldReturnAlmost` 0.1111111111 + it "information extraction with flags" $ do + runGEvalTest "multilabel-f1-ie-flags" `shouldReturnAlmost` 0.444444444444 + it "information extraction with fuzzy matching" $ do + runGEvalTest "multilabel-f1-ie-fuzzy" `shouldReturnAlmost` 0.681777777777 + it "information extraction with smart fuzzy matching" $ do + runGEvalTest "multilabel-f1-ie-fuzzy-smart" `shouldReturnAlmost` 0.598444 + it "information extraction with smart fuzzy matching hardened" $ do + runGEvalTest "multilabel-f1-ie-fuzzy-harden" `shouldReturnAlmost` 0.555555555 describe "Mean/MultiLabel-F" $ do it "simple" $ do runGEvalTest "mean-multilabel-f1-simple" `shouldReturnAlmost` 0.5 From 5fc5f6ac6413dcf28e3f43141776df65e251c7cd Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Mon, 3 Aug 2020 22:05:42 +0200 Subject: [PATCH 5/7] Fix generating configs --- src/GEval/CreateChallenge.hs | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/GEval/CreateChallenge.hs b/src/GEval/CreateChallenge.hs index 7fea41e..f9e7dc6 100644 --- a/src/GEval/CreateChallenge.hs +++ b/src/GEval/CreateChallenge.hs @@ -13,6 +13,7 @@ import GEval.Submit (tokenFileName) import GEval.MatchingSpecification (MatchingSpecification(ExactMatch)) import qualified System.Directory as D import Control.Conditional (whenM) +import Data.Maybe (catMaybes) import System.IO import System.FilePath @@ -427,22 +428,25 @@ Directory structure configContents :: [EvaluationScheme] -> FormattingOptions -> String -> String -configContents schemes format testName = unwords (Prelude.map (\scheme -> ("--metric " ++ (show scheme))) schemes) ++ - (if testName /= defaultTestName - then - " --test-name " ++ testName - else - "") ++ - (precisionOpt format) ++ - inHeaderOpts ++ - outHeaderOpts - where precisionOpt (FormattingOptions m b) = maybe "" (printf "--precision %d ") m ++ bool "" "--show-as-percentage" b +configContents schemes format testName = + unwords $ catMaybes ((Prelude.map (\scheme -> (Just $ "--metric " ++ (show scheme))) schemes) + ++ [testNameOpt] + ++ (precisionOpt format) + ++ [inHeaderOpts, outHeaderOpts]) + where precisionOpt (FormattingOptions m b) = [ + maybe Nothing (Just . printf "--precision %d") m, + bool Nothing (Just "--show-as-percentage") b ] ((EvaluationScheme mainMetric _):_) = schemes + testNameOpt = if testName /= defaultTestName + then + (Just (" --test-name " ++ testName)) + else + Nothing inHeaderOpts = getHeaderOpts "in-header" inHeaderContents outHeaderOpts = getHeaderOpts "out-header" outHeaderContents getHeaderOpts opt selector = case selector mainMetric of - Just _ -> " --" ++ opt ++ " " ++ (opt <.> "tsv") - Nothing -> "" + Just _ -> Just (" --" ++ opt ++ " " ++ (opt <.> "tsv")) + Nothing -> Nothing -- Originally train content was in one file, to avoid large changes -- for the time being we are using the original function. From 787a94c0856a0e0046fbff8079ea81fd855db7f0 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Mon, 3 Aug 2020 22:30:49 +0200 Subject: [PATCH 6/7] Fix formatting precision --- src/GEval/Formatting.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/GEval/Formatting.hs b/src/GEval/Formatting.hs index 39a9799..210b4d7 100644 --- a/src/GEval/Formatting.hs +++ b/src/GEval/Formatting.hs @@ -27,7 +27,7 @@ formatTheResultWithErrorBounds format pointEstimate (Just errorBound) = (formatS formatSimpleResult :: FormattingOptions -> MetricValue -> String formatSimpleResult = \case FormattingOptions (Just prec) True -> printf "%.*f" (prec-2) . (*100) - FormattingOptions (Just prec) _ -> printf "0.*f" prec + FormattingOptions (Just prec) _ -> printf "%.*f" prec _ -> show selectLowerPrecision :: Int -> FormattingOptions -> FormattingOptions From 01294fd24f2b022237a0efff8da1d386c73e2748 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Mon, 3 Aug 2020 22:41:46 +0200 Subject: [PATCH 7/7] Fix bugs when formatting percentages --- src/GEval/Formatting.hs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/GEval/Formatting.hs b/src/GEval/Formatting.hs index 210b4d7..ea6fac3 100644 --- a/src/GEval/Formatting.hs +++ b/src/GEval/Formatting.hs @@ -26,12 +26,11 @@ formatTheResultWithErrorBounds format pointEstimate (Just errorBound) = (formatS formatSimpleResult :: FormattingOptions -> MetricValue -> String formatSimpleResult = \case - FormattingOptions (Just prec) True -> printf "%.*f" (prec-2) . (*100) + FormattingOptions (Just prec) True -> printf "%.*f" (max 0 (prec-2)) . (*100) FormattingOptions (Just prec) _ -> printf "%.*f" prec _ -> show selectLowerPrecision :: Int -> FormattingOptions -> FormattingOptions selectLowerPrecision p = \case - a@(FormattingOptions _ True) -> a - FormattingOptions (Just prec) _ -> FormattingOptions (Just $ min prec p) False - _ -> FormattingOptions (Just p) False + FormattingOptions (Just prec) showAsPercentage -> FormattingOptions (Just $ min prec p) showAsPercentage + FormattingOptions (Nothing) showAsPercentage -> FormattingOptions (Just p) showAsPercentage