forked from filipg/gonito
Add bootstrap resampling
This commit is contained in:
parent
580c141a8e
commit
d2a59e59a5
@ -17,12 +17,14 @@ import qualified Data.Map.Strict as M
|
|||||||
|
|
||||||
import GEval.Core
|
import GEval.Core
|
||||||
import GEval.EvaluationScheme
|
import GEval.EvaluationScheme
|
||||||
import GEval.Common (MetricValue)
|
|
||||||
import GEval.OptionsParser
|
import GEval.OptionsParser
|
||||||
import GEval.ParseParams (parseParamsFromFilePath, OutputFileParsed(..))
|
import GEval.ParseParams (parseParamsFromFilePath, OutputFileParsed(..))
|
||||||
|
import GEval.Common (GEvalException, MetricResult(..), MetricValue)
|
||||||
|
import GEval.Formatting (formatTheResult)
|
||||||
|
|
||||||
import Options.Applicative
|
import Options.Applicative
|
||||||
import Data.Conduit.SmartSource
|
import Data.Conduit.SmartSource
|
||||||
|
import Data.Conduit.Bootstrap (defaultConfidenceLevel, getConfidenceBounds)
|
||||||
|
|
||||||
import System.FilePath (takeFileName, dropExtensions, (-<.>))
|
import System.FilePath (takeFileName, dropExtensions, (-<.>))
|
||||||
|
|
||||||
@ -222,15 +224,17 @@ checkOrInsertEvaluation repoDir chan version out = do
|
|||||||
Right (Left _) -> do
|
Right (Left _) -> do
|
||||||
err chan "Cannot parse options, check the challenge repo"
|
err chan "Cannot parse options, check the challenge repo"
|
||||||
Right (Right (_, Just [(_, [result])])) -> do
|
Right (Right (_, Just [(_, [result])])) -> do
|
||||||
msg chan $ concat [ "Evaluated! Score ", (formatNonScientifically result) ]
|
msg chan $ concat [ "Evaluated! Score ", (T.pack $ formatTheResult Nothing result) ]
|
||||||
time <- liftIO getCurrentTime
|
time <- liftIO getCurrentTime
|
||||||
_ <- runDB $ insert $ Evaluation {
|
_ <- runDB $ insert $ let (pointResult, errorBound) = extractResult result
|
||||||
evaluationTest=outTest out,
|
in Evaluation {
|
||||||
evaluationChecksum=outChecksum out,
|
evaluationTest=outTest out,
|
||||||
evaluationScore=Just result,
|
evaluationChecksum=outChecksum out,
|
||||||
evaluationErrorMessage=Nothing,
|
evaluationScore=Just pointResult,
|
||||||
evaluationStamp=time,
|
evaluationErrorBound=errorBound,
|
||||||
evaluationVersion=Just version }
|
evaluationErrorMessage=Nothing,
|
||||||
|
evaluationStamp=time,
|
||||||
|
evaluationVersion=Just version }
|
||||||
msg chan "Evaluation done"
|
msg chan "Evaluation done"
|
||||||
Right (Right (_, Just _)) -> do
|
Right (Right (_, Just _)) -> do
|
||||||
err chan "Unexpected multiple results (???)"
|
err chan "Unexpected multiple results (???)"
|
||||||
@ -239,12 +243,17 @@ checkOrInsertEvaluation repoDir chan version out = do
|
|||||||
Left exception -> do
|
Left exception -> do
|
||||||
err chan $ "Evaluation failed: " ++ (T.pack $ show exception)
|
err chan $ "Evaluation failed: " ++ (T.pack $ show exception)
|
||||||
|
|
||||||
|
extractResult :: MetricResult -> (MetricValue, Maybe MetricValue)
|
||||||
|
extractResult (SimpleRun r) = (r, Nothing)
|
||||||
|
extractResult (BootstrapResampling vals) = ((upperBound + lowerBound) / 2.0, Just ((upperBound - lowerBound) / 2.0))
|
||||||
|
where (lowerBound, upperBound) = getConfidenceBounds defaultConfidenceLevel vals
|
||||||
|
|
||||||
rawEval :: FilePath
|
rawEval :: FilePath
|
||||||
-> EvaluationScheme
|
-> EvaluationScheme
|
||||||
-> FilePath
|
-> FilePath
|
||||||
-> Text
|
-> Text
|
||||||
-> FilePath
|
-> FilePath
|
||||||
-> IO (Either GEvalException (Either (ParserResult GEvalOptions) (GEvalOptions, Maybe [(SourceSpec, [MetricValue])])))
|
-> IO (Either GEvalException (Either (ParserResult GEvalOptions) (GEvalOptions, Maybe [(SourceSpec, [MetricResult])])))
|
||||||
rawEval challengeDir metric repoDir name outF = Import.try (runGEvalGetOptions [
|
rawEval challengeDir metric repoDir name outF = Import.try (runGEvalGetOptions [
|
||||||
"--alt-metric", (show metric),
|
"--alt-metric", (show metric),
|
||||||
"--expected-directory", challengeDir,
|
"--expected-directory", challengeDir,
|
||||||
|
@ -301,6 +301,7 @@ lineByLineTable (Entity testId test) theStamp = mempty
|
|||||||
evaluationTest = testId,
|
evaluationTest = testId,
|
||||||
evaluationChecksum = testChecksum test,
|
evaluationChecksum = testChecksum test,
|
||||||
evaluationScore = Just score,
|
evaluationScore = Just score,
|
||||||
|
evaluationErrorBound = Nothing,
|
||||||
evaluationErrorMessage = Nothing,
|
evaluationErrorMessage = Nothing,
|
||||||
evaluationStamp = theStamp,
|
evaluationStamp = theStamp,
|
||||||
evaluationVersion = Nothing }
|
evaluationVersion = Nothing }
|
||||||
|
@ -41,6 +41,7 @@ import Text.Regex.TDFA
|
|||||||
|
|
||||||
import GEval.Core
|
import GEval.Core
|
||||||
import GEval.EvaluationScheme
|
import GEval.EvaluationScheme
|
||||||
|
import GEval.Formatting (formatTheResultWithErrorBounds)
|
||||||
|
|
||||||
import qualified Data.Vector as DV
|
import qualified Data.Vector as DV
|
||||||
|
|
||||||
@ -449,7 +450,7 @@ formatTruncatedScore :: Maybe Int -> Maybe Evaluation -> Text
|
|||||||
formatTruncatedScore Nothing e = formatFullScore e
|
formatTruncatedScore Nothing e = formatFullScore e
|
||||||
formatTruncatedScore _ Nothing = formatFullScore Nothing
|
formatTruncatedScore _ Nothing = formatFullScore Nothing
|
||||||
formatTruncatedScore (Just precision) (Just evaluation) = case evaluationScore evaluation of
|
formatTruncatedScore (Just precision) (Just evaluation) = case evaluationScore evaluation of
|
||||||
Just score -> T.pack $ printf "%0.*f" precision score
|
Just score -> T.pack $ formatTheResultWithErrorBounds (Just precision) score (evaluationErrorBound evaluation)
|
||||||
Nothing -> formatFullScore Nothing
|
Nothing -> formatFullScore Nothing
|
||||||
|
|
||||||
formatScore :: Maybe Int -> Double -> Text
|
formatScore :: Maybe Int -> Double -> Text
|
||||||
|
@ -122,6 +122,7 @@ Evaluation
|
|||||||
test TestId
|
test TestId
|
||||||
checksum SHA1
|
checksum SHA1
|
||||||
score Double Maybe
|
score Double Maybe
|
||||||
|
errorBound Double Maybe
|
||||||
errorMessage Text Maybe
|
errorMessage Text Maybe
|
||||||
stamp UTCTime default=now()
|
stamp UTCTime default=now()
|
||||||
-- Should be just SHA1 (without Maybe) - Maybe is just a legacy
|
-- Should be just SHA1 (without Maybe) - Maybe is just a legacy
|
||||||
|
@ -128,7 +128,7 @@ library
|
|||||||
, filemanip
|
, filemanip
|
||||||
, cryptohash
|
, cryptohash
|
||||||
, markdown
|
, markdown
|
||||||
, geval >= 1.27 && < 1.29
|
, geval >= 1.31.1 && < 1.32
|
||||||
, filepath
|
, filepath
|
||||||
, yesod-table
|
, yesod-table
|
||||||
, regex-tdfa
|
, regex-tdfa
|
||||||
|
Loading…
Reference in New Issue
Block a user