Make room for storing the results of bootstrap resampling

This commit is contained in:
Filip Graliński 2019-11-02 12:09:09 +01:00 committed by Filip Gralinski
parent 402ed73111
commit 41fe0d2283
7 changed files with 36 additions and 24 deletions

View File

@ -34,12 +34,12 @@ main = do
Right (opts, Just results) -> showTheResult opts results
Right (_, Nothing) -> return ()
showTheResult :: GEvalOptions -> [(SourceSpec, [MetricValue])] -> IO ()
showTheResult :: GEvalOptions -> [(SourceSpec, [MetricResult])] -> IO ()
showTheResult opts [(_, vals)] = showTheResult' opts vals
showTheResult opts [] = error "no output given"
showTheResult opts multipleResults = showTable opts multipleResults
showTable :: GEvalOptions -> [(SourceSpec, [MetricValue])] -> IO ()
showTable :: GEvalOptions -> [(SourceSpec, [MetricResult])] -> IO ()
showTable opts multipleResults = do
let params = Prelude.map (\(ss, _) -> parseParamsFromSourceSpec ss) multipleResults
@ -64,7 +64,7 @@ getHeader [] schemes = Just $ intercalate "\t" ("File name" : Prelude.map evalua
getHeader params schemes = Just $ intercalate "\t" (Prelude.map T.unpack params
++ Prelude.map evaluationSchemeName schemes)
formatTableEntry :: GEvalOptions -> [T.Text] -> ((SourceSpec, [MetricValue]), OutputFileParsed) -> String
formatTableEntry :: GEvalOptions -> [T.Text] -> ((SourceSpec, [MetricResult]), OutputFileParsed) -> String
formatTableEntry opts paramNames ((sourceSpec, metrics), ofParsed) = intercalate "\t" ((initialColumns paramNames sourceSpec ofParsed) ++ vals)
where vals = Prelude.map (formatTheResult (gesPrecision $ geoSpec opts)) metrics
@ -73,7 +73,7 @@ initialColumns [] sourceSpec ofParsed = [formatSourceSpec sourceSpec]
initialColumns params sourceSpec (OutputFileParsed _ paramMap) =
Prelude.map (\p -> T.unpack $ M.findWithDefault (T.pack "") p paramMap) params
showTheResult' :: GEvalOptions -> [MetricValue] -> IO ()
showTheResult' :: GEvalOptions -> [MetricResult] -> IO ()
-- do not show the metric if just one was given
showTheResult' opts [val] = putStrLn $ formatTheResult (gesPrecision $ geoSpec opts) val
showTheResult' opts [] = do
@ -85,10 +85,13 @@ formatSourceSpec :: SourceSpec -> String
formatSourceSpec (FilePathSpec fp) = dropExtensions $ takeFileName fp
formatSourceSpec spec = show spec
formatTheMetricAndResult :: Maybe Int -> (EvaluationScheme, MetricValue) -> String
formatTheMetricAndResult :: Maybe Int -> (EvaluationScheme, MetricResult) -> String
formatTheMetricAndResult mPrecision (scheme, val) = (evaluationSchemeName scheme) ++ "\t" ++ (formatTheResult mPrecision val)
formatTheResult :: Maybe Int -> MetricValue -> String
formatTheResult Nothing = show
formatTheResult (Just prec) = printf "%0.*f" prec
formatTheResult :: Maybe Int -> MetricResult -> String
formatTheResult mPrecision (SimpleRun val) = formatSimpleResult mPrecision val
formatSimpleResult :: Maybe Int -> MetricValue -> String
formatSimpleResult Nothing = show
formatSimpleResult (Just prec) = printf "%0.*f" prec

View File

@ -13,11 +13,19 @@ type MetricValue = Double
data GraphSeries = GraphSeries [(Double, Double)]
data MetricOutput = MetricOutput MetricValue (Maybe GraphSeries)
data MetricResult = SimpleRun MetricValue | BootstrapResampling [MetricValue]
getMetricValue :: MetricOutput -> MetricValue
instance Show MetricResult where
show (SimpleRun val) = show val
data MetricOutput = MetricOutput MetricResult (Maybe GraphSeries)
getMetricValue :: MetricOutput -> MetricResult
getMetricValue (MetricOutput v _) = v
extractSimpleRunValue :: MetricResult -> MetricValue
extractSimpleRunValue (SimpleRun v) = v
getGraphSeries :: MetricOutput -> Maybe GraphSeries
getGraphSeries (MetricOutput _ gs) = gs

View File

@ -169,7 +169,7 @@ data GEvalSpecification = GEvalSpecification
gesToken :: Maybe String,
gesGonitoGitAnnexRemote :: Maybe String,
gesReferences :: Maybe String,
gesBootstrapSampling :: Maybe Int }
gesBootstrapResampling :: Maybe Int }
gesMainMetric :: GEvalSpecification -> Metric
@ -218,7 +218,7 @@ defaultGEvalSpecification = GEvalSpecification {
gesToken = Nothing,
gesGonitoGitAnnexRemote = Nothing,
gesReferences = Nothing,
gesBootstrapSampling = Nothing }
gesBootstrapResampling = Nothing }
isEmptyFile :: FilePath -> IO (Bool)
isEmptyFile path = do
@ -693,7 +693,7 @@ gevalCoreGeneralized' parserSpec itemStep aggregator finalStep generateGraph con
(((getZipSource $ (,)
<$> ZipSource (CL.sourceList [(getFirstLineNo (Proxy :: Proxy m) context)..])
<*> (ZipSource $ recordSource context parserSpec)) .| CL.map (checkStep (Proxy :: Proxy m) itemStep)) .| CL.catMaybes .| aggregator)
return $ MetricOutput (finalStep v) (generateGraph v)
return $ MetricOutput (SimpleRun $ finalStep v) (generateGraph v)
-- | A type family to handle all the evaluation "context".
--

View File

@ -461,7 +461,7 @@ gevalLineByLineSource metric mSelector preprocess inputSource expectedSource out
s <- liftIO $ gevalCoreOnSingleLines metric preprocess (getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp)
(getDataDecoder expectedLineSource) (LineInFile expectedSource lineNo exp)
(getDataDecoder outputLineSource) (LineInFile outSource lineNo out)
return $ LineRecord inp exp out lineNo (getMetricValue s)
return $ LineRecord inp exp out lineNo (extractSimpleRunValue $ getMetricValue s)
justTokenize :: Maybe Tokenizer -> IO ()
justTokenize Nothing = error "a tokenizer must be specified with --tokenizer option"

View File

@ -265,14 +265,14 @@ altMetricReader = optional $ option auto
<> metavar "METRIC"
<> help "Alternative metric (overrides --metric option)" )
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricValue])]))
runGEval :: [String] -> IO (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricResult])]))
runGEval args = do
ret <- runGEvalGetOptions args
case ret of
Left e -> return $ Left e
Right (_, mmv) -> return $ Right mmv
runGEvalGetOptions :: [String] -> IO (Either (ParserResult GEvalOptions) (GEvalOptions, Maybe [(SourceSpec, [MetricValue])]))
runGEvalGetOptions :: [String] -> IO (Either (ParserResult GEvalOptions) (GEvalOptions, Maybe [(SourceSpec, [MetricResult])]))
runGEvalGetOptions args = do
optionExtractionResult <- getOptions args
case optionExtractionResult of
@ -308,7 +308,7 @@ attemptToReadOptsFromConfigFile args opts = do
where configFilePath = (getExpectedDirectory $ geoSpec opts) </> configFileName
runGEval'' :: GEvalOptions -> IO (Maybe [(SourceSpec, [MetricValue])])
runGEval'' :: GEvalOptions -> IO (Maybe [(SourceSpec, [MetricResult])])
runGEval'' opts = runGEval''' (geoSpecialCommand opts)
(geoResultOrdering opts)
(geoFilter opts)
@ -322,7 +322,7 @@ runGEval''' :: Maybe GEvalSpecialCommand
-> GEvalSpecification
-> BlackBoxDebuggingOptions
-> Maybe FilePath
-> IO (Maybe [(SourceSpec, [MetricValue])])
-> IO (Maybe [(SourceSpec, [MetricResult])])
runGEval''' Nothing _ _ spec _ mGraphFile = do
vals' <- geval spec
let vals = map (\(s, val) -> (s, map getMetricValue val)) vals'

View File

@ -223,5 +223,5 @@ runOnTest spec testPath = do
createPerfectOutputFromExpected metric expectedFile tmpOutFile
[(_, [MetricOutput value _])] <- geval specificSpec
let bestValue = bestPossibleValue metric
unless (bestValue =~ value) $ throw $ BestPossibleValueNotObtainedWithExpectedData bestValue value
unless (bestValue =~ (extractSimpleRunValue value)) $ throw $ BestPossibleValueNotObtainedWithExpectedData bestValue (extractSimpleRunValue value)
return ()

View File

@ -87,7 +87,7 @@ main :: IO ()
main = hspec $ do
describe "root mean square error" $ do
it "simple test" $ do
[(_, ((MetricOutput val _):_))] <- geval $ defaultGEvalSpecification {gesExpectedDirectory=Just "test/rmse-simple/rmse-simple", gesOutDirectory="test/rmse-simple/rmse-simple-solution"}
[(_, ((MetricOutput (SimpleRun val) _):_))] <- geval $ defaultGEvalSpecification {gesExpectedDirectory=Just "test/rmse-simple/rmse-simple", gesOutDirectory="test/rmse-simple/rmse-simple-solution"}
val `shouldBeAlmost` 0.64549722436790
describe "mean square error" $ do
it "simple test with arguments" $
@ -331,7 +331,7 @@ main = hspec $ do
runGEvalTest "f1-with-preprocessing" `shouldReturnAlmost` 0.57142857142857
describe "evaluating single lines" $ do
it "RMSE" $ do
(MetricOutput v _) <- gevalCoreOnSingleLines RMSE id RawItemTarget
(MetricOutput (SimpleRun v) _) <- gevalCoreOnSingleLines RMSE id RawItemTarget
(LineInFile (FilePathSpec "stub1") 1 "blabla")
RawItemTarget
(LineInFile (FilePathSpec "stub2") 1 "3.4")
@ -439,7 +439,8 @@ main = hspec $ do
gesGonitoHost = Nothing,
gesToken = Nothing,
gesGonitoGitAnnexRemote = Nothing,
gesReferences = Nothing }
gesReferences = Nothing,
gesBootstrapResampling = Nothing }
it "simple test" $ do
results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge (const Data.Conduit.List.consume)
Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo",
@ -683,8 +684,8 @@ testMatchFun 'b' 1 = True
testMatchFun 'c' 1 = True
testMatchFun _ _ = False
extractVal :: (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricValue])])) -> IO MetricValue
extractVal (Right (Just ([(_, val:_)]))) = return val
extractVal :: (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricResult])])) -> IO MetricValue
extractVal (Right (Just ([(_, (SimpleRun val):_)]))) = return val
extractVal (Right Nothing) = return $ error "no metrics???"
extractVal (Right (Just [])) = return $ error "emtpy metric list???"
extractVal (Left result) = do