Add --show-preprocessed option

This commit is contained in:
Filip Gralinski 2020-08-08 18:41:53 +02:00
parent 425c1b5102
commit 197c198a07
5 changed files with 35 additions and 19 deletions

View File

@ -28,6 +28,7 @@ module GEval.Core
defaultMetric, defaultMetric,
getExpectedDirectory, getExpectedDirectory,
configFileName, configFileName,
isPreprocessable,
ParsedRecord(..), ParsedRecord(..),
WithoutInput(..), WithoutInput(..),
WithInput(..), WithInput(..),
@ -215,7 +216,8 @@ data GEvalSpecification = GEvalSpecification
gesReferences :: Maybe String, gesReferences :: Maybe String,
gesBootstrapResampling :: Maybe Int, gesBootstrapResampling :: Maybe Int,
gesInHeader :: Maybe String, gesInHeader :: Maybe String,
gesOutHeader :: Maybe String } gesOutHeader :: Maybe String,
gesShowPreprocessed :: Bool }
deriving (Show) deriving (Show)
gesMainMetric :: GEvalSpecification -> Metric gesMainMetric :: GEvalSpecification -> Metric
@ -286,7 +288,8 @@ defaultGEvalSpecification = GEvalSpecification {
gesReferences = Nothing, gesReferences = Nothing,
gesBootstrapResampling = Nothing, gesBootstrapResampling = Nothing,
gesInHeader = Nothing, gesInHeader = Nothing,
gesOutHeader = Nothing } gesOutHeader = Nothing,
gesShowPreprocessed = False }
isEmptyFile :: FilePath -> IO (Bool) isEmptyFile :: FilePath -> IO (Bool)
isEmptyFile path = do isEmptyFile path = do
@ -412,7 +415,8 @@ checkAndGetDataSources forceInput gevalSpec = do
challengeDataSourcePreprocess = preprocess, challengeDataSourcePreprocess = preprocess,
challengeDataSourceFilter = noFilter, challengeDataSourceFilter = noFilter,
challengeDataSourceInHeader = mInHeader, challengeDataSourceInHeader = mInHeader,
challengeDataSourceOutHeader = mOutHeader } challengeDataSourceOutHeader = mOutHeader,
challengeDataSourceShowPreprocessed = gesShowPreprocessed gevalSpec }
return $ Prelude.map (\oss -> DataSource { return $ Prelude.map (\oss -> DataSource {
dataSourceChallengeData = chDataSource, dataSourceChallengeData = chDataSource,

View File

@ -72,7 +72,10 @@ data ChallengeDataSource = ChallengeDataSource {
challengeDataSourcePreprocess :: Text -> Text, challengeDataSourcePreprocess :: Text -> Text,
challengeDataSourceFilter :: Filter, challengeDataSourceFilter :: Filter,
challengeDataSourceInHeader :: Maybe TabularHeader, challengeDataSourceInHeader :: Maybe TabularHeader,
challengeDataSourceOutHeader :: Maybe TabularHeader } challengeDataSourceOutHeader :: Maybe TabularHeader,
-- whether the data will be shown preprocessed (not only
-- the evaluation will be done on the preprocessed data)
challengeDataSourceShowPreprocessed :: Bool }
-- | This type specifies all the data flowing into evaluation, -- | This type specifies all the data flowing into evaluation,
-- including the output data to be evaluated. -- including the output data to be evaluated.

View File

@ -611,23 +611,27 @@ gevalLineByLineSource metric dataSource =
expectedLineSource = lineSourcesExpectedSource lsSpec expectedLineSource = lineSourcesExpectedSource lsSpec
outputLineSource = lineSourcesOutputSource lsSpec outputLineSource = lineSourcesOutputSource lsSpec
justLine (LineInFile _ _ l) = l justLine (LineInFile _ _ l) = l
evaluateLine (lineNo, ParsedRecordWithInput inp exp out) = do
s <- liftIO $ gevalCoreOnSingleLines metric preprocess (getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp) evaluateLine (lineNo, ParsedRecordWithInput inp' exp' out') = do
(getDataDecoder expectedLineSource) (LineInFile expectedSource lineNo exp) let inp = if shouldBePreprocessedForPresentation
(getDataDecoder outputLineSource) (LineInFile outSource lineNo out) then preprocess inp'
else inp'
let exp = preprocessOut exp'
let out = preprocessOut out'
s <- liftIO $ gevalCoreOnSingleLines metric
-- if also to be shown preprocessed, preprocessing
-- will be done earlier
(if shouldBePreprocessedForPresentation then id else preprocess)
(getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp)
(getDataDecoder expectedLineSource) (LineInFile expectedSource lineNo exp) (getDataDecoder outputLineSource) (LineInFile outSource lineNo out)
return $ LineRecord inp exp out lineNo (extractSimpleRunValue $ getMetricValue s) return $ LineRecord inp exp out lineNo (extractSimpleRunValue $ getMetricValue s)
-- preparing sources, `id` means that no preprocessing is done (to avoid double preprocessing) preprocessOut = if shouldBePreprocessedForPresentation && isPreprocessable metric
outOptions = FileProcessingOptions { then preprocess
fileProcessingOptionsSelector = mSelector, else id
fileProcessingOptionsPreprocess = id,
fileProcessingOptionsHeader = mOutHeader }
inOptions = FileProcessingOptions {
fileProcessingOptionsSelector = mSelector,
fileProcessingOptionsPreprocess = id,
fileProcessingOptionsHeader = mInHeader }
challengeDataSource = dataSourceChallengeData dataSource challengeDataSource = dataSourceChallengeData dataSource
mSelector = challengeDataSourceSelector challengeDataSource mSelector = challengeDataSourceSelector challengeDataSource
preprocess = challengeDataSourcePreprocess challengeDataSource preprocess = challengeDataSourcePreprocess challengeDataSource
shouldBePreprocessedForPresentation = challengeDataSourceShowPreprocessed challengeDataSource
mInHeader = challengeDataSourceInHeader challengeDataSource mInHeader = challengeDataSourceInHeader challengeDataSource
mOutHeader = challengeDataSourceOutHeader challengeDataSource mOutHeader = challengeDataSourceOutHeader challengeDataSource
inputSource = challengeDataSourceInput challengeDataSource inputSource = challengeDataSourceInput challengeDataSource

View File

@ -238,6 +238,9 @@ specParser = GEvalSpecification
( long "out-header" ( long "out-header"
<> metavar "FILE" <> metavar "FILE"
<> help "One-line TSV file specifying a list of field names for output and expected files")) <> help "One-line TSV file specifying a list of field names for output and expected files"))
<*> switch
( long "show-preprocessed"
<> help "When in --line-by-line or similar modes, not just work preprocessed data, but show them as such")
selectMetricsByName :: [String] -> [EvaluationScheme] -> [EvaluationScheme] selectMetricsByName :: [String] -> [EvaluationScheme] -> [EvaluationScheme]
selectMetricsByName [] schemes = schemes selectMetricsByName [] schemes = schemes

View File

@ -382,7 +382,8 @@ main = hspec $ do
gesReferences = Nothing, gesReferences = Nothing,
gesBootstrapResampling = Nothing, gesBootstrapResampling = Nothing,
gesInHeader = Nothing, gesInHeader = Nothing,
gesOutHeader = Nothing } gesOutHeader = Nothing,
gesShowPreprocessed = False }
it "In line-by-line mode Accuracy" $ do it "In line-by-line mode Accuracy" $ do
results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge (const Data.Conduit.List.consume) results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge (const Data.Conduit.List.consume)
results `shouldBe` [ results `shouldBe` [
@ -554,7 +555,8 @@ main = hspec $ do
gesReferences = Nothing, gesReferences = Nothing,
gesBootstrapResampling = Nothing, gesBootstrapResampling = Nothing,
gesInHeader = Nothing, gesInHeader = Nothing,
gesOutHeader = Nothing } gesOutHeader = Nothing,
gesShowPreprocessed = False }
it "simple test" $ do it "simple test" $ do
results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge (const Data.Conduit.List.consume) results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge (const Data.Conduit.List.consume)
Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo", Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo",