From a66fa35a428f2493d605982891f9de46ef4802b1 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Sat, 1 Feb 2020 11:54:54 +0100 Subject: [PATCH] Mean/Multilabel-F works with Bootstrap --- geval.cabal | 2 +- src/GEval/Core.hs | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/geval.cabal b/geval.cabal index 4c1ea88..b5c8826 100644 --- a/geval.cabal +++ b/geval.cabal @@ -1,5 +1,5 @@ name: geval -version: 1.32.0.0 +version: 1.32.1.0 synopsis: Machine learning evaluation tools description: Please see README.md homepage: http://github.com/name/project diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 1762fe2..4a9b033 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -408,7 +408,10 @@ singleLineAsLineSource :: LineInFile -> (Text -> ItemTarget) -> (Text -> Text) - singleLineAsLineSource (LineInFile sourceSpec lineNo line) itemDecoder preprocess = LineSource (CL.sourceList [line]) itemDecoder preprocess sourceSpec lineNo +-- some metrics are handled by Bootstrap due to legacy issues, +-- fix on the way handleBootstrap :: Metric -> Bool +handleBootstrap (Mean (MultiLabelFMeasure _)) = True handleBootstrap (Mean _) = False handleBootstrap CharMatch = False handleBootstrap (LogLossHashed _) = False @@ -458,6 +461,22 @@ gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => -> LineSource (ResourceT m) -- ^ source to read the expected output -> LineSource (ResourceT m) -- ^ source to read the output -> m (MetricOutput) -- ^ metric values for the output against the expected output + +-- for the time being hardcoded +gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta)) inputLineStream expectedLineStream outLineStream = do + gevalRunPipeline parserSpec (trans step) finalPipeline context + where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser)) + context = (WithoutInput expectedLineStream outLineStream) + step = itemStep SAMultiLabelFMeasure + expParser = expectedParser SAMultiLabelFMeasure + outParser = outputParser SAMultiLabelFMeasure + finalPipeline = fixer ( + CL.map (fMeasureOnCounts beta) + .| (bootstrapC numberOfSamples + $ continueGEvalCalculations SAMSE MSE)) + trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c + trans step (ParsedRecordWithoutInput x y) = step (x, y) + gevalBootstrapOnSources numberOfSamples metric inputLineStream expectedLineStream outLineStream = do case toSing $ toHelper metric of SomeSing smetric -> gevalRunPipeline parserSpec (trans step) finalPipeline context