Add bootstrap resampling

This commit is contained in:
Filip Gralinski 2020-01-28 23:14:46 +01:00
parent 580c141a8e
commit d2a59e59a5
5 changed files with 24 additions and 12 deletions

View File

@ -17,12 +17,14 @@ import qualified Data.Map.Strict as M
import GEval.Core
import GEval.EvaluationScheme
import GEval.Common (MetricValue)
import GEval.OptionsParser
import GEval.ParseParams (parseParamsFromFilePath, OutputFileParsed(..))
import GEval.Common (GEvalException, MetricResult(..), MetricValue)
import GEval.Formatting (formatTheResult)
import Options.Applicative
import Data.Conduit.SmartSource
import Data.Conduit.Bootstrap (defaultConfidenceLevel, getConfidenceBounds)
import System.FilePath (takeFileName, dropExtensions, (-<.>))
@ -222,15 +224,17 @@ checkOrInsertEvaluation repoDir chan version out = do
Right (Left _) -> do
err chan "Cannot parse options, check the challenge repo"
Right (Right (_, Just [(_, [result])])) -> do
msg chan $ concat [ "Evaluated! Score ", (formatNonScientifically result) ]
msg chan $ concat [ "Evaluated! Score ", (T.pack $ formatTheResult Nothing result) ]
time <- liftIO getCurrentTime
_ <- runDB $ insert $ Evaluation {
evaluationTest=outTest out,
evaluationChecksum=outChecksum out,
evaluationScore=Just result,
evaluationErrorMessage=Nothing,
evaluationStamp=time,
evaluationVersion=Just version }
_ <- runDB $ insert $ let (pointResult, errorBound) = extractResult result
in Evaluation {
evaluationTest=outTest out,
evaluationChecksum=outChecksum out,
evaluationScore=Just pointResult,
evaluationErrorBound=errorBound,
evaluationErrorMessage=Nothing,
evaluationStamp=time,
evaluationVersion=Just version }
msg chan "Evaluation done"
Right (Right (_, Just _)) -> do
err chan "Unexpected multiple results (???)"
@ -239,12 +243,17 @@ checkOrInsertEvaluation repoDir chan version out = do
Left exception -> do
err chan $ "Evaluation failed: " ++ (T.pack $ show exception)
extractResult :: MetricResult -> (MetricValue, Maybe MetricValue)
extractResult (SimpleRun r) = (r, Nothing)
extractResult (BootstrapResampling vals) = ((upperBound + lowerBound) / 2.0, Just ((upperBound - lowerBound) / 2.0))
where (lowerBound, upperBound) = getConfidenceBounds defaultConfidenceLevel vals
rawEval :: FilePath
-> EvaluationScheme
-> FilePath
-> Text
-> FilePath
-> IO (Either GEvalException (Either (ParserResult GEvalOptions) (GEvalOptions, Maybe [(SourceSpec, [MetricValue])])))
-> IO (Either GEvalException (Either (ParserResult GEvalOptions) (GEvalOptions, Maybe [(SourceSpec, [MetricResult])])))
rawEval challengeDir metric repoDir name outF = Import.try (runGEvalGetOptions [
"--alt-metric", (show metric),
"--expected-directory", challengeDir,

View File

@ -301,6 +301,7 @@ lineByLineTable (Entity testId test) theStamp = mempty
evaluationTest = testId,
evaluationChecksum = testChecksum test,
evaluationScore = Just score,
evaluationErrorBound = Nothing,
evaluationErrorMessage = Nothing,
evaluationStamp = theStamp,
evaluationVersion = Nothing }

View File

@ -41,6 +41,7 @@ import Text.Regex.TDFA
import GEval.Core
import GEval.EvaluationScheme
import GEval.Formatting (formatTheResultWithErrorBounds)
import qualified Data.Vector as DV
@ -449,7 +450,7 @@ formatTruncatedScore :: Maybe Int -> Maybe Evaluation -> Text
formatTruncatedScore Nothing e = formatFullScore e
formatTruncatedScore _ Nothing = formatFullScore Nothing
formatTruncatedScore (Just precision) (Just evaluation) = case evaluationScore evaluation of
Just score -> T.pack $ printf "%0.*f" precision score
Just score -> T.pack $ formatTheResultWithErrorBounds (Just precision) score (evaluationErrorBound evaluation)
Nothing -> formatFullScore Nothing
formatScore :: Maybe Int -> Double -> Text

View File

@ -122,6 +122,7 @@ Evaluation
test TestId
checksum SHA1
score Double Maybe
errorBound Double Maybe
errorMessage Text Maybe
stamp UTCTime default=now()
-- Should be just SHA1 (without Maybe) - Maybe is just a legacy

View File

@ -128,7 +128,7 @@ library
, filemanip
, cryptohash
, markdown
, geval >= 1.27 && < 1.29
, geval >= 1.31.1 && < 1.32
, filepath
, yesod-table
, regex-tdfa