Add --show-preprocessed option

This commit is contained in:
Filip Gralinski 2020-08-08 18:41:53 +02:00
parent 425c1b5102
commit 197c198a07
5 changed files with 35 additions and 19 deletions

View File

@ -28,6 +28,7 @@ module GEval.Core
defaultMetric,
getExpectedDirectory,
configFileName,
isPreprocessable,
ParsedRecord(..),
WithoutInput(..),
WithInput(..),
@ -215,7 +216,8 @@ data GEvalSpecification = GEvalSpecification
gesReferences :: Maybe String,
gesBootstrapResampling :: Maybe Int,
gesInHeader :: Maybe String,
gesOutHeader :: Maybe String }
gesOutHeader :: Maybe String,
gesShowPreprocessed :: Bool }
deriving (Show)
gesMainMetric :: GEvalSpecification -> Metric
@ -286,7 +288,8 @@ defaultGEvalSpecification = GEvalSpecification {
gesReferences = Nothing,
gesBootstrapResampling = Nothing,
gesInHeader = Nothing,
gesOutHeader = Nothing }
gesOutHeader = Nothing,
gesShowPreprocessed = False }
isEmptyFile :: FilePath -> IO (Bool)
isEmptyFile path = do
@ -412,7 +415,8 @@ checkAndGetDataSources forceInput gevalSpec = do
challengeDataSourcePreprocess = preprocess,
challengeDataSourceFilter = noFilter,
challengeDataSourceInHeader = mInHeader,
challengeDataSourceOutHeader = mOutHeader }
challengeDataSourceOutHeader = mOutHeader,
challengeDataSourceShowPreprocessed = gesShowPreprocessed gevalSpec }
return $ Prelude.map (\oss -> DataSource {
dataSourceChallengeData = chDataSource,

View File

@ -72,7 +72,10 @@ data ChallengeDataSource = ChallengeDataSource {
challengeDataSourcePreprocess :: Text -> Text,
challengeDataSourceFilter :: Filter,
challengeDataSourceInHeader :: Maybe TabularHeader,
challengeDataSourceOutHeader :: Maybe TabularHeader }
challengeDataSourceOutHeader :: Maybe TabularHeader,
-- whether the data will be shown preprocessed (not only
-- the evaluation will be done on the preprocessed data)
challengeDataSourceShowPreprocessed :: Bool }
-- | This type specifies all the data flowing into evaluation,
-- including the output data to be evaluated.

View File

@ -611,23 +611,27 @@ gevalLineByLineSource metric dataSource =
expectedLineSource = lineSourcesExpectedSource lsSpec
outputLineSource = lineSourcesOutputSource lsSpec
justLine (LineInFile _ _ l) = l
evaluateLine (lineNo, ParsedRecordWithInput inp exp out) = do
s <- liftIO $ gevalCoreOnSingleLines metric preprocess (getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp)
(getDataDecoder expectedLineSource) (LineInFile expectedSource lineNo exp)
(getDataDecoder outputLineSource) (LineInFile outSource lineNo out)
evaluateLine (lineNo, ParsedRecordWithInput inp' exp' out') = do
let inp = if shouldBePreprocessedForPresentation
then preprocess inp'
else inp'
let exp = preprocessOut exp'
let out = preprocessOut out'
s <- liftIO $ gevalCoreOnSingleLines metric
-- if also to be shown preprocessed, preprocessing
-- will be done earlier
(if shouldBePreprocessedForPresentation then id else preprocess)
(getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp)
(getDataDecoder expectedLineSource) (LineInFile expectedSource lineNo exp) (getDataDecoder outputLineSource) (LineInFile outSource lineNo out)
return $ LineRecord inp exp out lineNo (extractSimpleRunValue $ getMetricValue s)
-- preparing sources, `id` means that no preprocessing is done (to avoid double preprocessing)
outOptions = FileProcessingOptions {
fileProcessingOptionsSelector = mSelector,
fileProcessingOptionsPreprocess = id,
fileProcessingOptionsHeader = mOutHeader }
inOptions = FileProcessingOptions {
fileProcessingOptionsSelector = mSelector,
fileProcessingOptionsPreprocess = id,
fileProcessingOptionsHeader = mInHeader }
preprocessOut = if shouldBePreprocessedForPresentation && isPreprocessable metric
then preprocess
else id
challengeDataSource = dataSourceChallengeData dataSource
mSelector = challengeDataSourceSelector challengeDataSource
preprocess = challengeDataSourcePreprocess challengeDataSource
shouldBePreprocessedForPresentation = challengeDataSourceShowPreprocessed challengeDataSource
mInHeader = challengeDataSourceInHeader challengeDataSource
mOutHeader = challengeDataSourceOutHeader challengeDataSource
inputSource = challengeDataSourceInput challengeDataSource

View File

@ -238,6 +238,9 @@ specParser = GEvalSpecification
( long "out-header"
<> metavar "FILE"
<> help "One-line TSV file specifying a list of field names for output and expected files"))
<*> switch
( long "show-preprocessed"
<> help "When in --line-by-line or similar modes, not just work preprocessed data, but show them as such")
selectMetricsByName :: [String] -> [EvaluationScheme] -> [EvaluationScheme]
selectMetricsByName [] schemes = schemes

View File

@ -382,7 +382,8 @@ main = hspec $ do
gesReferences = Nothing,
gesBootstrapResampling = Nothing,
gesInHeader = Nothing,
gesOutHeader = Nothing }
gesOutHeader = Nothing,
gesShowPreprocessed = False }
it "In line-by-line mode Accuracy" $ do
results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge (const Data.Conduit.List.consume)
results `shouldBe` [
@ -554,7 +555,8 @@ main = hspec $ do
gesReferences = Nothing,
gesBootstrapResampling = Nothing,
gesInHeader = Nothing,
gesOutHeader = Nothing }
gesOutHeader = Nothing,
gesShowPreprocessed = False }
it "simple test" $ do
results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge (const Data.Conduit.List.consume)
Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo",