Make room for storing the results of bootstrap resampling

This commit is contained in:
Filip Graliński 2019-11-02 12:09:09 +01:00 committed by Filip Gralinski
parent 402ed73111
commit 41fe0d2283
7 changed files with 36 additions and 24 deletions

View File

@ -34,12 +34,12 @@ main = do
Right (opts, Just results) -> showTheResult opts results Right (opts, Just results) -> showTheResult opts results
Right (_, Nothing) -> return () Right (_, Nothing) -> return ()
showTheResult :: GEvalOptions -> [(SourceSpec, [MetricValue])] -> IO () showTheResult :: GEvalOptions -> [(SourceSpec, [MetricResult])] -> IO ()
showTheResult opts [(_, vals)] = showTheResult' opts vals showTheResult opts [(_, vals)] = showTheResult' opts vals
showTheResult opts [] = error "no output given" showTheResult opts [] = error "no output given"
showTheResult opts multipleResults = showTable opts multipleResults showTheResult opts multipleResults = showTable opts multipleResults
showTable :: GEvalOptions -> [(SourceSpec, [MetricValue])] -> IO () showTable :: GEvalOptions -> [(SourceSpec, [MetricResult])] -> IO ()
showTable opts multipleResults = do showTable opts multipleResults = do
let params = Prelude.map (\(ss, _) -> parseParamsFromSourceSpec ss) multipleResults let params = Prelude.map (\(ss, _) -> parseParamsFromSourceSpec ss) multipleResults
@ -64,7 +64,7 @@ getHeader [] schemes = Just $ intercalate "\t" ("File name" : Prelude.map evalua
getHeader params schemes = Just $ intercalate "\t" (Prelude.map T.unpack params getHeader params schemes = Just $ intercalate "\t" (Prelude.map T.unpack params
++ Prelude.map evaluationSchemeName schemes) ++ Prelude.map evaluationSchemeName schemes)
formatTableEntry :: GEvalOptions -> [T.Text] -> ((SourceSpec, [MetricValue]), OutputFileParsed) -> String formatTableEntry :: GEvalOptions -> [T.Text] -> ((SourceSpec, [MetricResult]), OutputFileParsed) -> String
formatTableEntry opts paramNames ((sourceSpec, metrics), ofParsed) = intercalate "\t" ((initialColumns paramNames sourceSpec ofParsed) ++ vals) formatTableEntry opts paramNames ((sourceSpec, metrics), ofParsed) = intercalate "\t" ((initialColumns paramNames sourceSpec ofParsed) ++ vals)
where vals = Prelude.map (formatTheResult (gesPrecision $ geoSpec opts)) metrics where vals = Prelude.map (formatTheResult (gesPrecision $ geoSpec opts)) metrics
@ -73,7 +73,7 @@ initialColumns [] sourceSpec ofParsed = [formatSourceSpec sourceSpec]
initialColumns params sourceSpec (OutputFileParsed _ paramMap) = initialColumns params sourceSpec (OutputFileParsed _ paramMap) =
Prelude.map (\p -> T.unpack $ M.findWithDefault (T.pack "") p paramMap) params Prelude.map (\p -> T.unpack $ M.findWithDefault (T.pack "") p paramMap) params
showTheResult' :: GEvalOptions -> [MetricValue] -> IO () showTheResult' :: GEvalOptions -> [MetricResult] -> IO ()
-- do not show the metric if just one was given -- do not show the metric if just one was given
showTheResult' opts [val] = putStrLn $ formatTheResult (gesPrecision $ geoSpec opts) val showTheResult' opts [val] = putStrLn $ formatTheResult (gesPrecision $ geoSpec opts) val
showTheResult' opts [] = do showTheResult' opts [] = do
@ -85,10 +85,13 @@ formatSourceSpec :: SourceSpec -> String
formatSourceSpec (FilePathSpec fp) = dropExtensions $ takeFileName fp formatSourceSpec (FilePathSpec fp) = dropExtensions $ takeFileName fp
formatSourceSpec spec = show spec formatSourceSpec spec = show spec
formatTheMetricAndResult :: Maybe Int -> (EvaluationScheme, MetricValue) -> String formatTheMetricAndResult :: Maybe Int -> (EvaluationScheme, MetricResult) -> String
formatTheMetricAndResult mPrecision (scheme, val) = (evaluationSchemeName scheme) ++ "\t" ++ (formatTheResult mPrecision val) formatTheMetricAndResult mPrecision (scheme, val) = (evaluationSchemeName scheme) ++ "\t" ++ (formatTheResult mPrecision val)
formatTheResult :: Maybe Int -> MetricValue -> String formatTheResult :: Maybe Int -> MetricResult -> String
formatTheResult Nothing = show formatTheResult mPrecision (SimpleRun val) = formatSimpleResult mPrecision val
formatTheResult (Just prec) = printf "%0.*f" prec
formatSimpleResult :: Maybe Int -> MetricValue -> String
formatSimpleResult Nothing = show
formatSimpleResult (Just prec) = printf "%0.*f" prec

View File

@ -13,11 +13,19 @@ type MetricValue = Double
data GraphSeries = GraphSeries [(Double, Double)] data GraphSeries = GraphSeries [(Double, Double)]
data MetricOutput = MetricOutput MetricValue (Maybe GraphSeries) data MetricResult = SimpleRun MetricValue | BootstrapResampling [MetricValue]
getMetricValue :: MetricOutput -> MetricValue instance Show MetricResult where
show (SimpleRun val) = show val
data MetricOutput = MetricOutput MetricResult (Maybe GraphSeries)
getMetricValue :: MetricOutput -> MetricResult
getMetricValue (MetricOutput v _) = v getMetricValue (MetricOutput v _) = v
extractSimpleRunValue :: MetricResult -> MetricValue
extractSimpleRunValue (SimpleRun v) = v
getGraphSeries :: MetricOutput -> Maybe GraphSeries getGraphSeries :: MetricOutput -> Maybe GraphSeries
getGraphSeries (MetricOutput _ gs) = gs getGraphSeries (MetricOutput _ gs) = gs

View File

@ -169,7 +169,7 @@ data GEvalSpecification = GEvalSpecification
gesToken :: Maybe String, gesToken :: Maybe String,
gesGonitoGitAnnexRemote :: Maybe String, gesGonitoGitAnnexRemote :: Maybe String,
gesReferences :: Maybe String, gesReferences :: Maybe String,
gesBootstrapSampling :: Maybe Int } gesBootstrapResampling :: Maybe Int }
gesMainMetric :: GEvalSpecification -> Metric gesMainMetric :: GEvalSpecification -> Metric
@ -218,7 +218,7 @@ defaultGEvalSpecification = GEvalSpecification {
gesToken = Nothing, gesToken = Nothing,
gesGonitoGitAnnexRemote = Nothing, gesGonitoGitAnnexRemote = Nothing,
gesReferences = Nothing, gesReferences = Nothing,
gesBootstrapSampling = Nothing } gesBootstrapResampling = Nothing }
isEmptyFile :: FilePath -> IO (Bool) isEmptyFile :: FilePath -> IO (Bool)
isEmptyFile path = do isEmptyFile path = do
@ -693,7 +693,7 @@ gevalCoreGeneralized' parserSpec itemStep aggregator finalStep generateGraph con
(((getZipSource $ (,) (((getZipSource $ (,)
<$> ZipSource (CL.sourceList [(getFirstLineNo (Proxy :: Proxy m) context)..]) <$> ZipSource (CL.sourceList [(getFirstLineNo (Proxy :: Proxy m) context)..])
<*> (ZipSource $ recordSource context parserSpec)) .| CL.map (checkStep (Proxy :: Proxy m) itemStep)) .| CL.catMaybes .| aggregator) <*> (ZipSource $ recordSource context parserSpec)) .| CL.map (checkStep (Proxy :: Proxy m) itemStep)) .| CL.catMaybes .| aggregator)
return $ MetricOutput (finalStep v) (generateGraph v) return $ MetricOutput (SimpleRun $ finalStep v) (generateGraph v)
-- | A type family to handle all the evaluation "context". -- | A type family to handle all the evaluation "context".
-- --

View File

@ -461,7 +461,7 @@ gevalLineByLineSource metric mSelector preprocess inputSource expectedSource out
s <- liftIO $ gevalCoreOnSingleLines metric preprocess (getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp) s <- liftIO $ gevalCoreOnSingleLines metric preprocess (getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp)
(getDataDecoder expectedLineSource) (LineInFile expectedSource lineNo exp) (getDataDecoder expectedLineSource) (LineInFile expectedSource lineNo exp)
(getDataDecoder outputLineSource) (LineInFile outSource lineNo out) (getDataDecoder outputLineSource) (LineInFile outSource lineNo out)
return $ LineRecord inp exp out lineNo (getMetricValue s) return $ LineRecord inp exp out lineNo (extractSimpleRunValue $ getMetricValue s)
justTokenize :: Maybe Tokenizer -> IO () justTokenize :: Maybe Tokenizer -> IO ()
justTokenize Nothing = error "a tokenizer must be specified with --tokenizer option" justTokenize Nothing = error "a tokenizer must be specified with --tokenizer option"

View File

@ -265,14 +265,14 @@ altMetricReader = optional $ option auto
<> metavar "METRIC" <> metavar "METRIC"
<> help "Alternative metric (overrides --metric option)" ) <> help "Alternative metric (overrides --metric option)" )
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricValue])])) runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricResult])]))
runGEval args = do runGEval args = do
ret <- runGEvalGetOptions args ret <- runGEvalGetOptions args
case ret of case ret of
Left e -> return $ Left e Left e -> return $ Left e
Right (_, mmv) -> return $ Right mmv Right (_, mmv) -> return $ Right mmv
runGEvalGetOptions :: [String] -> IO (Either (ParserResult GEvalOptions) (GEvalOptions, Maybe [(SourceSpec, [MetricValue])])) runGEvalGetOptions :: [String] -> IO (Either (ParserResult GEvalOptions) (GEvalOptions, Maybe [(SourceSpec, [MetricResult])]))
runGEvalGetOptions args = do runGEvalGetOptions args = do
optionExtractionResult <- getOptions args optionExtractionResult <- getOptions args
case optionExtractionResult of case optionExtractionResult of
@ -308,7 +308,7 @@ attemptToReadOptsFromConfigFile args opts = do
where configFilePath = (getExpectedDirectory $ geoSpec opts) </> configFileName where configFilePath = (getExpectedDirectory $ geoSpec opts) </> configFileName
runGEval'' :: GEvalOptions -> IO (Maybe [(SourceSpec, [MetricValue])]) runGEval'' :: GEvalOptions -> IO (Maybe [(SourceSpec, [MetricResult])])
runGEval'' opts = runGEval''' (geoSpecialCommand opts) runGEval'' opts = runGEval''' (geoSpecialCommand opts)
(geoResultOrdering opts) (geoResultOrdering opts)
(geoFilter opts) (geoFilter opts)
@ -322,7 +322,7 @@ runGEval''' :: Maybe GEvalSpecialCommand
-> GEvalSpecification -> GEvalSpecification
-> BlackBoxDebuggingOptions -> BlackBoxDebuggingOptions
-> Maybe FilePath -> Maybe FilePath
-> IO (Maybe [(SourceSpec, [MetricValue])]) -> IO (Maybe [(SourceSpec, [MetricResult])])
runGEval''' Nothing _ _ spec _ mGraphFile = do runGEval''' Nothing _ _ spec _ mGraphFile = do
vals' <- geval spec vals' <- geval spec
let vals = map (\(s, val) -> (s, map getMetricValue val)) vals' let vals = map (\(s, val) -> (s, map getMetricValue val)) vals'

View File

@ -223,5 +223,5 @@ runOnTest spec testPath = do
createPerfectOutputFromExpected metric expectedFile tmpOutFile createPerfectOutputFromExpected metric expectedFile tmpOutFile
[(_, [MetricOutput value _])] <- geval specificSpec [(_, [MetricOutput value _])] <- geval specificSpec
let bestValue = bestPossibleValue metric let bestValue = bestPossibleValue metric
unless (bestValue =~ value) $ throw $ BestPossibleValueNotObtainedWithExpectedData bestValue value unless (bestValue =~ (extractSimpleRunValue value)) $ throw $ BestPossibleValueNotObtainedWithExpectedData bestValue (extractSimpleRunValue value)
return () return ()

View File

@ -87,7 +87,7 @@ main :: IO ()
main = hspec $ do main = hspec $ do
describe "root mean square error" $ do describe "root mean square error" $ do
it "simple test" $ do it "simple test" $ do
[(_, ((MetricOutput val _):_))] <- geval $ defaultGEvalSpecification {gesExpectedDirectory=Just "test/rmse-simple/rmse-simple", gesOutDirectory="test/rmse-simple/rmse-simple-solution"} [(_, ((MetricOutput (SimpleRun val) _):_))] <- geval $ defaultGEvalSpecification {gesExpectedDirectory=Just "test/rmse-simple/rmse-simple", gesOutDirectory="test/rmse-simple/rmse-simple-solution"}
val `shouldBeAlmost` 0.64549722436790 val `shouldBeAlmost` 0.64549722436790
describe "mean square error" $ do describe "mean square error" $ do
it "simple test with arguments" $ it "simple test with arguments" $
@ -331,7 +331,7 @@ main = hspec $ do
runGEvalTest "f1-with-preprocessing" `shouldReturnAlmost` 0.57142857142857 runGEvalTest "f1-with-preprocessing" `shouldReturnAlmost` 0.57142857142857
describe "evaluating single lines" $ do describe "evaluating single lines" $ do
it "RMSE" $ do it "RMSE" $ do
(MetricOutput v _) <- gevalCoreOnSingleLines RMSE id RawItemTarget (MetricOutput (SimpleRun v) _) <- gevalCoreOnSingleLines RMSE id RawItemTarget
(LineInFile (FilePathSpec "stub1") 1 "blabla") (LineInFile (FilePathSpec "stub1") 1 "blabla")
RawItemTarget RawItemTarget
(LineInFile (FilePathSpec "stub2") 1 "3.4") (LineInFile (FilePathSpec "stub2") 1 "3.4")
@ -439,7 +439,8 @@ main = hspec $ do
gesGonitoHost = Nothing, gesGonitoHost = Nothing,
gesToken = Nothing, gesToken = Nothing,
gesGonitoGitAnnexRemote = Nothing, gesGonitoGitAnnexRemote = Nothing,
gesReferences = Nothing } gesReferences = Nothing,
gesBootstrapResampling = Nothing }
it "simple test" $ do it "simple test" $ do
results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge (const Data.Conduit.List.consume) results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge (const Data.Conduit.List.consume)
Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo", Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo",
@ -683,8 +684,8 @@ testMatchFun 'b' 1 = True
testMatchFun 'c' 1 = True testMatchFun 'c' 1 = True
testMatchFun _ _ = False testMatchFun _ _ = False
extractVal :: (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricValue])])) -> IO MetricValue extractVal :: (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricResult])])) -> IO MetricValue
extractVal (Right (Just ([(_, val:_)]))) = return val extractVal (Right (Just ([(_, (SimpleRun val):_)]))) = return val
extractVal (Right Nothing) = return $ error "no metrics???" extractVal (Right Nothing) = return $ error "no metrics???"
extractVal (Right (Just [])) = return $ error "emtpy metric list???" extractVal (Right (Just [])) = return $ error "emtpy metric list???"
extractVal (Left result) = do extractVal (Left result) = do