Add bootstrap resampling

This commit is contained in:
Filip Gralinski 2020-01-28 23:14:46 +01:00
parent 580c141a8e
commit d2a59e59a5
5 changed files with 24 additions and 12 deletions

View File

@ -17,12 +17,14 @@ import qualified Data.Map.Strict as M
import GEval.Core import GEval.Core
import GEval.EvaluationScheme import GEval.EvaluationScheme
import GEval.Common (MetricValue)
import GEval.OptionsParser import GEval.OptionsParser
import GEval.ParseParams (parseParamsFromFilePath, OutputFileParsed(..)) import GEval.ParseParams (parseParamsFromFilePath, OutputFileParsed(..))
import GEval.Common (GEvalException, MetricResult(..), MetricValue)
import GEval.Formatting (formatTheResult)
import Options.Applicative import Options.Applicative
import Data.Conduit.SmartSource import Data.Conduit.SmartSource
import Data.Conduit.Bootstrap (defaultConfidenceLevel, getConfidenceBounds)
import System.FilePath (takeFileName, dropExtensions, (-<.>)) import System.FilePath (takeFileName, dropExtensions, (-<.>))
@ -222,15 +224,17 @@ checkOrInsertEvaluation repoDir chan version out = do
Right (Left _) -> do Right (Left _) -> do
err chan "Cannot parse options, check the challenge repo" err chan "Cannot parse options, check the challenge repo"
Right (Right (_, Just [(_, [result])])) -> do Right (Right (_, Just [(_, [result])])) -> do
msg chan $ concat [ "Evaluated! Score ", (formatNonScientifically result) ] msg chan $ concat [ "Evaluated! Score ", (T.pack $ formatTheResult Nothing result) ]
time <- liftIO getCurrentTime time <- liftIO getCurrentTime
_ <- runDB $ insert $ Evaluation { _ <- runDB $ insert $ let (pointResult, errorBound) = extractResult result
evaluationTest=outTest out, in Evaluation {
evaluationChecksum=outChecksum out, evaluationTest=outTest out,
evaluationScore=Just result, evaluationChecksum=outChecksum out,
evaluationErrorMessage=Nothing, evaluationScore=Just pointResult,
evaluationStamp=time, evaluationErrorBound=errorBound,
evaluationVersion=Just version } evaluationErrorMessage=Nothing,
evaluationStamp=time,
evaluationVersion=Just version }
msg chan "Evaluation done" msg chan "Evaluation done"
Right (Right (_, Just _)) -> do Right (Right (_, Just _)) -> do
err chan "Unexpected multiple results (???)" err chan "Unexpected multiple results (???)"
@ -239,12 +243,17 @@ checkOrInsertEvaluation repoDir chan version out = do
Left exception -> do Left exception -> do
err chan $ "Evaluation failed: " ++ (T.pack $ show exception) err chan $ "Evaluation failed: " ++ (T.pack $ show exception)
extractResult :: MetricResult -> (MetricValue, Maybe MetricValue)
extractResult (SimpleRun r) = (r, Nothing)
extractResult (BootstrapResampling vals) = ((upperBound + lowerBound) / 2.0, Just ((upperBound - lowerBound) / 2.0))
where (lowerBound, upperBound) = getConfidenceBounds defaultConfidenceLevel vals
rawEval :: FilePath rawEval :: FilePath
-> EvaluationScheme -> EvaluationScheme
-> FilePath -> FilePath
-> Text -> Text
-> FilePath -> FilePath
-> IO (Either GEvalException (Either (ParserResult GEvalOptions) (GEvalOptions, Maybe [(SourceSpec, [MetricValue])]))) -> IO (Either GEvalException (Either (ParserResult GEvalOptions) (GEvalOptions, Maybe [(SourceSpec, [MetricResult])])))
rawEval challengeDir metric repoDir name outF = Import.try (runGEvalGetOptions [ rawEval challengeDir metric repoDir name outF = Import.try (runGEvalGetOptions [
"--alt-metric", (show metric), "--alt-metric", (show metric),
"--expected-directory", challengeDir, "--expected-directory", challengeDir,

View File

@ -301,6 +301,7 @@ lineByLineTable (Entity testId test) theStamp = mempty
evaluationTest = testId, evaluationTest = testId,
evaluationChecksum = testChecksum test, evaluationChecksum = testChecksum test,
evaluationScore = Just score, evaluationScore = Just score,
evaluationErrorBound = Nothing,
evaluationErrorMessage = Nothing, evaluationErrorMessage = Nothing,
evaluationStamp = theStamp, evaluationStamp = theStamp,
evaluationVersion = Nothing } evaluationVersion = Nothing }

View File

@ -41,6 +41,7 @@ import Text.Regex.TDFA
import GEval.Core import GEval.Core
import GEval.EvaluationScheme import GEval.EvaluationScheme
import GEval.Formatting (formatTheResultWithErrorBounds)
import qualified Data.Vector as DV import qualified Data.Vector as DV
@ -449,7 +450,7 @@ formatTruncatedScore :: Maybe Int -> Maybe Evaluation -> Text
formatTruncatedScore Nothing e = formatFullScore e formatTruncatedScore Nothing e = formatFullScore e
formatTruncatedScore _ Nothing = formatFullScore Nothing formatTruncatedScore _ Nothing = formatFullScore Nothing
formatTruncatedScore (Just precision) (Just evaluation) = case evaluationScore evaluation of formatTruncatedScore (Just precision) (Just evaluation) = case evaluationScore evaluation of
Just score -> T.pack $ printf "%0.*f" precision score Just score -> T.pack $ formatTheResultWithErrorBounds (Just precision) score (evaluationErrorBound evaluation)
Nothing -> formatFullScore Nothing Nothing -> formatFullScore Nothing
formatScore :: Maybe Int -> Double -> Text formatScore :: Maybe Int -> Double -> Text

View File

@ -122,6 +122,7 @@ Evaluation
test TestId test TestId
checksum SHA1 checksum SHA1
score Double Maybe score Double Maybe
errorBound Double Maybe
errorMessage Text Maybe errorMessage Text Maybe
stamp UTCTime default=now() stamp UTCTime default=now()
-- Should be just SHA1 (without Maybe) - Maybe is just a legacy -- Should be just SHA1 (without Maybe) - Maybe is just a legacy

View File

@ -128,7 +128,7 @@ library
, filemanip , filemanip
, cryptohash , cryptohash
, markdown , markdown
, geval >= 1.27 && < 1.29 , geval >= 1.31.1 && < 1.32
, filepath , filepath
, yesod-table , yesod-table
, regex-tdfa , regex-tdfa