From 197c198a07109634ed80f28187afd3bc7f519047 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Sat, 8 Aug 2020 18:41:53 +0200 Subject: [PATCH] Add --show-preprocessed option --- src/GEval/Core.hs | 10 +++++++--- src/GEval/DataSource.hs | 5 ++++- src/GEval/LineByLine.hs | 30 +++++++++++++++++------------- src/GEval/OptionsParser.hs | 3 +++ test/Spec.hs | 6 ++++-- 5 files changed, 35 insertions(+), 19 deletions(-) diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 74ec429..a9b0f16 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -28,6 +28,7 @@ module GEval.Core defaultMetric, getExpectedDirectory, configFileName, + isPreprocessable, ParsedRecord(..), WithoutInput(..), WithInput(..), @@ -215,7 +216,8 @@ data GEvalSpecification = GEvalSpecification gesReferences :: Maybe String, gesBootstrapResampling :: Maybe Int, gesInHeader :: Maybe String, - gesOutHeader :: Maybe String } + gesOutHeader :: Maybe String, + gesShowPreprocessed :: Bool } deriving (Show) gesMainMetric :: GEvalSpecification -> Metric @@ -286,7 +288,8 @@ defaultGEvalSpecification = GEvalSpecification { gesReferences = Nothing, gesBootstrapResampling = Nothing, gesInHeader = Nothing, - gesOutHeader = Nothing } + gesOutHeader = Nothing, + gesShowPreprocessed = False } isEmptyFile :: FilePath -> IO (Bool) isEmptyFile path = do @@ -412,7 +415,8 @@ checkAndGetDataSources forceInput gevalSpec = do challengeDataSourcePreprocess = preprocess, challengeDataSourceFilter = noFilter, challengeDataSourceInHeader = mInHeader, - challengeDataSourceOutHeader = mOutHeader } + challengeDataSourceOutHeader = mOutHeader, + challengeDataSourceShowPreprocessed = gesShowPreprocessed gevalSpec } return $ Prelude.map (\oss -> DataSource { dataSourceChallengeData = chDataSource, diff --git a/src/GEval/DataSource.hs b/src/GEval/DataSource.hs index 3d89760..22fd3db 100644 --- a/src/GEval/DataSource.hs +++ b/src/GEval/DataSource.hs @@ -72,7 +72,10 @@ data ChallengeDataSource = ChallengeDataSource { challengeDataSourcePreprocess :: Text -> Text, challengeDataSourceFilter :: Filter, challengeDataSourceInHeader :: Maybe TabularHeader, - challengeDataSourceOutHeader :: Maybe TabularHeader } + challengeDataSourceOutHeader :: Maybe TabularHeader, + -- whether the data will be shown preprocessed (not only + -- the evaluation will be done on the preprocessed data) + challengeDataSourceShowPreprocessed :: Bool } -- | This type specifies all the data flowing into evaluation, -- including the output data to be evaluated. diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs index 1326444..4f83221 100644 --- a/src/GEval/LineByLine.hs +++ b/src/GEval/LineByLine.hs @@ -611,23 +611,27 @@ gevalLineByLineSource metric dataSource = expectedLineSource = lineSourcesExpectedSource lsSpec outputLineSource = lineSourcesOutputSource lsSpec justLine (LineInFile _ _ l) = l - evaluateLine (lineNo, ParsedRecordWithInput inp exp out) = do - s <- liftIO $ gevalCoreOnSingleLines metric preprocess (getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp) - (getDataDecoder expectedLineSource) (LineInFile expectedSource lineNo exp) - (getDataDecoder outputLineSource) (LineInFile outSource lineNo out) + + evaluateLine (lineNo, ParsedRecordWithInput inp' exp' out') = do + let inp = if shouldBePreprocessedForPresentation + then preprocess inp' + else inp' + let exp = preprocessOut exp' + let out = preprocessOut out' + s <- liftIO $ gevalCoreOnSingleLines metric + -- if also to be shown preprocessed, preprocessing + -- will be done earlier + (if shouldBePreprocessedForPresentation then id else preprocess) + (getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp) + (getDataDecoder expectedLineSource) (LineInFile expectedSource lineNo exp) (getDataDecoder outputLineSource) (LineInFile outSource lineNo out) return $ LineRecord inp exp out lineNo (extractSimpleRunValue $ getMetricValue s) - -- preparing sources, `id` means that no preprocessing is done (to avoid double preprocessing) - outOptions = FileProcessingOptions { - fileProcessingOptionsSelector = mSelector, - fileProcessingOptionsPreprocess = id, - fileProcessingOptionsHeader = mOutHeader } - inOptions = FileProcessingOptions { - fileProcessingOptionsSelector = mSelector, - fileProcessingOptionsPreprocess = id, - fileProcessingOptionsHeader = mInHeader } + preprocessOut = if shouldBePreprocessedForPresentation && isPreprocessable metric + then preprocess + else id challengeDataSource = dataSourceChallengeData dataSource mSelector = challengeDataSourceSelector challengeDataSource preprocess = challengeDataSourcePreprocess challengeDataSource + shouldBePreprocessedForPresentation = challengeDataSourceShowPreprocessed challengeDataSource mInHeader = challengeDataSourceInHeader challengeDataSource mOutHeader = challengeDataSourceOutHeader challengeDataSource inputSource = challengeDataSourceInput challengeDataSource diff --git a/src/GEval/OptionsParser.hs b/src/GEval/OptionsParser.hs index 544a355..98e022c 100644 --- a/src/GEval/OptionsParser.hs +++ b/src/GEval/OptionsParser.hs @@ -238,6 +238,9 @@ specParser = GEvalSpecification ( long "out-header" <> metavar "FILE" <> help "One-line TSV file specifying a list of field names for output and expected files")) + <*> switch + ( long "show-preprocessed" + <> help "When in --line-by-line or similar modes, not just work preprocessed data, but show them as such") selectMetricsByName :: [String] -> [EvaluationScheme] -> [EvaluationScheme] selectMetricsByName [] schemes = schemes diff --git a/test/Spec.hs b/test/Spec.hs index 5d547a3..f7d81f6 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -382,7 +382,8 @@ main = hspec $ do gesReferences = Nothing, gesBootstrapResampling = Nothing, gesInHeader = Nothing, - gesOutHeader = Nothing } + gesOutHeader = Nothing, + gesShowPreprocessed = False } it "In line-by-line mode Accuracy" $ do results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge (const Data.Conduit.List.consume) results `shouldBe` [ @@ -554,7 +555,8 @@ main = hspec $ do gesReferences = Nothing, gesBootstrapResampling = Nothing, gesInHeader = Nothing, - gesOutHeader = Nothing } + gesOutHeader = Nothing, + gesShowPreprocessed = False } it "simple test" $ do results <- runLineByLineGeneralized KeepTheOriginalOrder sampleChallenge (const Data.Conduit.List.consume) Prelude.map (\(LineRecord inp _ _ _ _) -> inp) results `shouldBe` ["foo",