From 41fe0d228383d936d3cf1b3c686e4be404a859da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Grali=C5=84ski?= Date: Sat, 2 Nov 2019 12:09:09 +0100 Subject: [PATCH] Make room for storing the results of bootstrap resampling --- app/Main.hs | 19 +++++++++++-------- src/GEval/Common.hs | 12 ++++++++++-- src/GEval/Core.hs | 6 +++--- src/GEval/LineByLine.hs | 2 +- src/GEval/OptionsParser.hs | 8 ++++---- src/GEval/Validation.hs | 2 +- test/Spec.hs | 11 ++++++----- 7 files changed, 36 insertions(+), 24 deletions(-) diff --git a/app/Main.hs b/app/Main.hs index 53ef2aa..2fa93fe 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -34,12 +34,12 @@ main = do Right (opts, Just results) -> showTheResult opts results Right (_, Nothing) -> return () -showTheResult :: GEvalOptions -> [(SourceSpec, [MetricValue])] -> IO () +showTheResult :: GEvalOptions -> [(SourceSpec, [MetricResult])] -> IO () showTheResult opts [(_, vals)] = showTheResult' opts vals showTheResult opts [] = error "no output given" showTheResult opts multipleResults = showTable opts multipleResults -showTable :: GEvalOptions -> [(SourceSpec, [MetricValue])] -> IO () +showTable :: GEvalOptions -> [(SourceSpec, [MetricResult])] -> IO () showTable opts multipleResults = do let params = Prelude.map (\(ss, _) -> parseParamsFromSourceSpec ss) multipleResults @@ -64,7 +64,7 @@ getHeader [] schemes = Just $ intercalate "\t" ("File name" : Prelude.map evalua getHeader params schemes = Just $ intercalate "\t" (Prelude.map T.unpack params ++ Prelude.map evaluationSchemeName schemes) -formatTableEntry :: GEvalOptions -> [T.Text] -> ((SourceSpec, [MetricValue]), OutputFileParsed) -> String +formatTableEntry :: GEvalOptions -> [T.Text] -> ((SourceSpec, [MetricResult]), OutputFileParsed) -> String formatTableEntry opts paramNames ((sourceSpec, metrics), ofParsed) = intercalate "\t" ((initialColumns paramNames sourceSpec ofParsed) ++ vals) where vals = Prelude.map (formatTheResult (gesPrecision $ geoSpec opts)) metrics @@ -73,7 +73,7 @@ initialColumns [] sourceSpec ofParsed = [formatSourceSpec sourceSpec] initialColumns params sourceSpec (OutputFileParsed _ paramMap) = Prelude.map (\p -> T.unpack $ M.findWithDefault (T.pack "") p paramMap) params -showTheResult' :: GEvalOptions -> [MetricValue] -> IO () +showTheResult' :: GEvalOptions -> [MetricResult] -> IO () -- do not show the metric if just one was given showTheResult' opts [val] = putStrLn $ formatTheResult (gesPrecision $ geoSpec opts) val showTheResult' opts [] = do @@ -85,10 +85,13 @@ formatSourceSpec :: SourceSpec -> String formatSourceSpec (FilePathSpec fp) = dropExtensions $ takeFileName fp formatSourceSpec spec = show spec -formatTheMetricAndResult :: Maybe Int -> (EvaluationScheme, MetricValue) -> String +formatTheMetricAndResult :: Maybe Int -> (EvaluationScheme, MetricResult) -> String formatTheMetricAndResult mPrecision (scheme, val) = (evaluationSchemeName scheme) ++ "\t" ++ (formatTheResult mPrecision val) -formatTheResult :: Maybe Int -> MetricValue -> String -formatTheResult Nothing = show -formatTheResult (Just prec) = printf "%0.*f" prec +formatTheResult :: Maybe Int -> MetricResult -> String +formatTheResult mPrecision (SimpleRun val) = formatSimpleResult mPrecision val + +formatSimpleResult :: Maybe Int -> MetricValue -> String +formatSimpleResult Nothing = show +formatSimpleResult (Just prec) = printf "%0.*f" prec diff --git a/src/GEval/Common.hs b/src/GEval/Common.hs index 80a1ede..a60b6e4 100644 --- a/src/GEval/Common.hs +++ b/src/GEval/Common.hs @@ -13,11 +13,19 @@ type MetricValue = Double data GraphSeries = GraphSeries [(Double, Double)] -data MetricOutput = MetricOutput MetricValue (Maybe GraphSeries) +data MetricResult = SimpleRun MetricValue | BootstrapResampling [MetricValue] -getMetricValue :: MetricOutput -> MetricValue +instance Show MetricResult where + show (SimpleRun val) = show val + +data MetricOutput = MetricOutput MetricResult (Maybe GraphSeries) + +getMetricValue :: MetricOutput -> MetricResult getMetricValue (MetricOutput v _) = v +extractSimpleRunValue :: MetricResult -> MetricValue +extractSimpleRunValue (SimpleRun v) = v + getGraphSeries :: MetricOutput -> Maybe GraphSeries getGraphSeries (MetricOutput _ gs) = gs diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 6119dd5..b7cdf0f 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -169,7 +169,7 @@ data GEvalSpecification = GEvalSpecification gesToken :: Maybe String, gesGonitoGitAnnexRemote :: Maybe String, gesReferences :: Maybe String, - gesBootstrapSampling :: Maybe Int } + gesBootstrapResampling :: Maybe Int } gesMainMetric :: GEvalSpecification -> Metric @@ -218,7 +218,7 @@ defaultGEvalSpecification = GEvalSpecification { gesToken = Nothing, gesGonitoGitAnnexRemote = Nothing, gesReferences = Nothing, - gesBootstrapSampling = Nothing } + gesBootstrapResampling = Nothing } isEmptyFile :: FilePath -> IO (Bool) isEmptyFile path = do @@ -693,7 +693,7 @@ gevalCoreGeneralized' parserSpec itemStep aggregator finalStep generateGraph con (((getZipSource $ (,) <$> ZipSource (CL.sourceList [(getFirstLineNo (Proxy :: Proxy m) context)..]) <*> (ZipSource $ recordSource context parserSpec)) .| CL.map (checkStep (Proxy :: Proxy m) itemStep)) .| CL.catMaybes .| aggregator) - return $ MetricOutput (finalStep v) (generateGraph v) + return $ MetricOutput (SimpleRun $ finalStep v) (generateGraph v) -- | A type family to handle all the evaluation "context". -- diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs index 1766638..2c121d7 100644 --- a/src/GEval/LineByLine.hs +++ b/src/GEval/LineByLine.hs @@ -461,7 +461,7 @@ gevalLineByLineSource metric mSelector preprocess inputSource expectedSource out s <- liftIO $ gevalCoreOnSingleLines metric preprocess (getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp) (getDataDecoder expectedLineSource) (LineInFile expectedSource lineNo exp) (getDataDecoder outputLineSource) (LineInFile outSource lineNo out) - return $ LineRecord inp exp out lineNo (getMetricValue s) + return $ LineRecord inp exp out lineNo (extractSimpleRunValue $ getMetricValue s) justTokenize :: Maybe Tokenizer -> IO () justTokenize Nothing = error "a tokenizer must be specified with --tokenizer option" diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index 9797123..d6878f9 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -265,14 +265,14 @@ altMetricReader = optional $ option auto <> metavar "METRIC" <> help "Alternative metric (overrides --metric option)" ) -runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricValue])])) +runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricResult])])) runGEval args = do ret <- runGEvalGetOptions args case ret of Left e -> return $ Left e Right (_, mmv) -> return $ Right mmv -runGEvalGetOptions :: [String] -> IO (Either (ParserResult GEvalOptions) (GEvalOptions, Maybe [(SourceSpec, [MetricValue])])) +runGEvalGetOptions :: [String] -> IO (Either (ParserResult GEvalOptions) (GEvalOptions, Maybe [(SourceSpec, [MetricResult])])) runGEvalGetOptions args = do optionExtractionResult <- getOptions args case optionExtractionResult of @@ -308,7 +308,7 @@ attemptToReadOptsFromConfigFile args opts = do where configFilePath = (getExpectedDirectory $ geoSpec opts) configFileName -runGEval'' :: GEvalOptions -> IO (Maybe [(SourceSpec, [MetricValue])]) +runGEval'' :: GEvalOptions -> IO (Maybe [(SourceSpec, [MetricResult])]) runGEval'' opts = runGEval''' (geoSpecialCommand opts) (geoResultOrdering opts) (geoFilter opts) @@ -322,7 +322,7 @@ runGEval''' :: Maybe GEvalSpecialCommand -> GEvalSpecification -> BlackBoxDebuggingOptions -> Maybe FilePath - -> IO (Maybe [(SourceSpec, [MetricValue])]) + -> IO (Maybe [(SourceSpec, [MetricResult])]) runGEval''' Nothing _ _ spec _ mGraphFile = do vals' <- geval spec let vals = map (\(s, val) -> (s, map getMetricValue val)) vals' diff --git a/src/GEval/Validation.hs b/src/GEval/Validation.hs index a9b89e6..43cd369 100644 --- a/src/GEval/Validation.hs +++ b/src/GEval/Validation.hs @@ -223,5 +223,5 @@ runOnTest spec testPath = do createPerfectOutputFromExpected metric expectedFile tmpOutFile [(_, [MetricOutput value _])] <- geval specificSpec let bestValue = bestPossibleValue metric - unless (bestValue =~ value) $ throw $ BestPossibleValueNotObtainedWithExpectedData bestValue value + unless (bestValue =~ (extractSimpleRunValue value)) $ throw $ BestPossibleValueNotObtainedWithExpectedData bestValue (extractSimpleRunValue value) return () diff --git a/test/Spec.hs b/test/Spec.hs index 1fccc45..7eea51c 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -87,7 +87,7 @@ main :: IO () main = hspec $ do describe "root mean square error" $ do it "simple test" $ do - [(_, ((MetricOutput val _):_))] <- geval $ defaultGEvalSpecification {gesExpectedDirectory=Just "test/rmse-simple/rmse-simple", gesOutDirectory="test/rmse-simple/rmse-simple-solution"} + [(_, ((MetricOutput (SimpleRun val) _):_))] <- geval $ defaultGEvalSpecification {gesExpectedDirectory=Just "test/rmse-simple/rmse-simple", gesOutDirectory="test/rmse-simple/rmse-simple-solution"} val `shouldBeAlmost` 0.64549722436790 describe "mean square error" $ do it "simple test with arguments" $ @@ -331,7 +331,7 @@ main = hspec $ do runGEvalTest "f1-with-preprocessing" `shouldReturnAlmost` 0.57142857142857 describe "evaluating single lines" $ do it "RMSE" $ do - (MetricOutput v _) <- gevalCoreOnSingleLines RMSE id RawItemTarget + (MetricOutput (SimpleRun v) _) <- gevalCoreOnSingleLines RMSE id RawItemTarget (LineInFile (FilePathSpec "stub1") 1 "blabla") RawItemTarget (LineInFile (FilePathSpec "stub2") 1 "3.4") @@ -439,7 +439,8 @@ main = hspec $ do gesGonitoHost = Nothing, gesToken = Nothing, gesGonitoGitAnnexRemote = Nothing, - gesReferences = Nothing } + gesReferences = Nothing, + gesBootstrapResampling = Nothing } it "simple test" $ do results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge (const Data.Conduit.List.consume) Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo", @@ -683,8 +684,8 @@ testMatchFun 'b' 1 = True testMatchFun 'c' 1 = True testMatchFun _ _ = False -extractVal :: (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricValue])])) -> IO MetricValue -extractVal (Right (Just ([(_, val:_)]))) = return val +extractVal :: (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricResult])])) -> IO MetricValue +extractVal (Right (Just ([(_, (SimpleRun val):_)]))) = return val extractVal (Right Nothing) = return $ error "no metrics???" extractVal (Right (Just [])) = return $ error "emtpy metric list???" extractVal (Left result) = do