From fb63894db0de017bca82f1d23b882d26d9799e0a Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Mon, 7 Jun 2021 18:00:43 +0200 Subject: [PATCH] Fix filter and match combination --- src/GEval/Core.hs | 25 +++++++++++++------ src/GEval/DataSource.hs | 3 ++- src/GEval/EvaluationScheme.hs | 2 ++ src/GEval/LineByLine.hs | 9 ++++--- test/Spec.hs | 2 ++ .../test-A/out.tsv | 3 +++ .../flags-filter-and-match/config.txt | 1 + .../flags-filter-and-match/test-A/config.txt | 1 + .../test-A/expected.tsv | 3 +++ .../flags-filter-and-match/test-A/in.tsv | 3 +++ 10 files changed, 40 insertions(+), 12 deletions(-) create mode 100644 test/flags-filter-and-match/flags-filter-and-match-solution/test-A/out.tsv create mode 100644 test/flags-filter-and-match/flags-filter-and-match/config.txt create mode 100644 test/flags-filter-and-match/flags-filter-and-match/test-A/config.txt create mode 100644 test/flags-filter-and-match/flags-filter-and-match/test-A/expected.tsv create mode 100644 test/flags-filter-and-match/flags-filter-and-match/test-A/in.tsv diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index e003f72..48b4b70 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -186,6 +186,9 @@ isPreprocessable MultiLabelLogLoss = False isPreprocessable MultiLabelLikelihood = False isPreprocessable (Mean metric) = isPreprocessable metric +isInputModifiable CharMatch = True +isInputModifiable _ = False + defaultOutDirectory = "." defaultTestName = "test-A" defaultOutFile = "out.tsv" @@ -323,14 +326,15 @@ dataSourceToLineSourcesSpecification dataSource = LineSourcesSpecification { outSource = dataSourceOut dataSource outOptions = FileProcessingOptions { fileProcessingOptionsSelector = mSelector, - fileProcessingOptionsPreprocess = preprocess, + fileProcessingOptionsPreprocess = outPreprocess, fileProcessingOptionsHeader = mOutHeader } inOptions = FileProcessingOptions { fileProcessingOptionsSelector = mSelector, - fileProcessingOptionsPreprocess = preprocess, + fileProcessingOptionsPreprocess = inPreprocess, fileProcessingOptionsHeader = mInHeader } mSelector = challengeDataSourceSelector chDataSource - preprocess = challengeDataSourcePreprocess chDataSource + outPreprocess = challengeDataSourceOutPreprocess chDataSource + inPreprocess = challengeDataSourceInPreprocess chDataSource mInHeader = challengeDataSourceInHeader chDataSource mOutHeader = challengeDataSourceOutHeader chDataSource @@ -355,12 +359,18 @@ gevalOnSingleOut gevalSpec dataSource = do schemes = gesMetrics gevalSpec addSchemeSpecifics :: EvaluationScheme -> DataSource -> DataSource -addSchemeSpecifics scheme dataSource = +addSchemeSpecifics scheme@(EvaluationScheme metric _) dataSource = dataSource { dataSourceChallengeData = (dataSourceChallengeData dataSource) { challengeDataSourceFilter = getFilterForScheme (challengeDataSourceInHeader $ dataSourceChallengeData dataSource) scheme, - challengeDataSourcePreprocess = - (challengeDataSourcePreprocess $ dataSourceChallengeData dataSource) . (applyPreprocessingOperations scheme) }} + challengeDataSourceOutPreprocess = outPreprocess, + challengeDataSourceInPreprocess = inPreprocess + }} + where outPreprocess = (challengeDataSourceOutPreprocess $ dataSourceChallengeData dataSource) . (applyPreprocessingOperations scheme) + inPreprocess = (challengeDataSourceInPreprocess $ dataSourceChallengeData dataSource) . (if isInputModifiable metric + then (applyPreprocessingOperations scheme) + else id) + readHeaderFileWrapper :: Maybe FilePath -> IO (Maybe TabularHeader) readHeaderFileWrapper Nothing = return Nothing @@ -413,7 +423,8 @@ checkAndGetDataSources forceInput gevalSpec = do challengeDataSourceInput = inputSource, challengeDataSourceExpected = expectedSource, challengeDataSourceSelector = mSelector, - challengeDataSourcePreprocess = preprocess, + challengeDataSourceOutPreprocess = preprocess, + challengeDataSourceInPreprocess = preprocess, challengeDataSourceFilter = noFilter, challengeDataSourceInHeader = mInHeader, challengeDataSourceOutHeader = mOutHeader, diff --git a/src/GEval/DataSource.hs b/src/GEval/DataSource.hs index 22fd3db..309aca6 100644 --- a/src/GEval/DataSource.hs +++ b/src/GEval/DataSource.hs @@ -69,7 +69,8 @@ data ChallengeDataSource = ChallengeDataSource { challengeDataSourceInput :: SourceSpec, challengeDataSourceExpected :: SourceSpec, challengeDataSourceSelector :: Maybe Selector, - challengeDataSourcePreprocess :: Text -> Text, + challengeDataSourceOutPreprocess :: Text -> Text, + challengeDataSourceInPreprocess :: Text -> Text, challengeDataSourceFilter :: Filter, challengeDataSourceInHeader :: Maybe TabularHeader, challengeDataSourceOutHeader :: Maybe TabularHeader, diff --git a/src/GEval/EvaluationScheme.hs b/src/GEval/EvaluationScheme.hs index 9d9c6c3..18926be 100644 --- a/src/GEval/EvaluationScheme.hs +++ b/src/GEval/EvaluationScheme.hs @@ -10,6 +10,8 @@ module GEval.EvaluationScheme import GEval.Metric +import Debug.Trace + import Text.Regex.PCRE.Heavy import Text.Regex.PCRE.Light.Base (Regex(..)) import Text.Regex.PCRE.Light (compile) diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs index 07b291c..7a93da4 100644 --- a/src/GEval/LineByLine.hs +++ b/src/GEval/LineByLine.hs @@ -617,23 +617,24 @@ gevalLineByLineSource metric dataSource = evaluateLine (lineNo, ParsedRecordWithInput inp' exp' out') = do let inp = if shouldBePreprocessedForPresentation - then preprocess inp' + then inPreprocess inp' else inp' let exp = preprocessOut exp' let out = preprocessOut out' s <- liftIO $ gevalCoreOnSingleLines metric -- if also to be shown preprocessed, preprocessing -- will be done earlier - (if shouldBePreprocessedForPresentation then id else preprocess) + (if shouldBePreprocessedForPresentation then id else outPreprocess) (getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp) (getDataDecoder expectedLineSource) (LineInFile expectedSource lineNo exp) (getDataDecoder outputLineSource) (LineInFile outSource lineNo out) return $ LineRecord inp exp out lineNo (extractSimpleRunValue $ getMetricValue s) preprocessOut = if shouldBePreprocessedForPresentation && isPreprocessable metric - then preprocess + then outPreprocess else id challengeDataSource = dataSourceChallengeData dataSource mSelector = challengeDataSourceSelector challengeDataSource - preprocess = challengeDataSourcePreprocess challengeDataSource + outPreprocess = challengeDataSourceOutPreprocess challengeDataSource + inPreprocess = challengeDataSourceInPreprocess challengeDataSource shouldBePreprocessedForPresentation = challengeDataSourceShowPreprocessed challengeDataSource mInHeader = challengeDataSourceInHeader challengeDataSource mOutHeader = challengeDataSourceOutHeader challengeDataSource diff --git a/test/Spec.hs b/test/Spec.hs index c0e7392..36db0dc 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -440,6 +440,8 @@ main = hspec $ do runGEvalTest "flags-sort" `shouldReturnAlmost` 0.3 it "filtering" $ do runGEvalTest "flags-filtering" `shouldReturnAlmost` 0.25 + it "filtering and matching" $ do + runGEvalTest "flags-filter-and-match" `shouldReturnAlmost` 0.8 describe "evaluating single lines" $ do it "RMSE" $ do (MetricOutput (SimpleRun v) _) <- gevalCoreOnSingleLines RMSE id RawItemTarget diff --git a/test/flags-filter-and-match/flags-filter-and-match-solution/test-A/out.tsv b/test/flags-filter-and-match/flags-filter-and-match-solution/test-A/out.tsv new file mode 100644 index 0000000..98f7844 --- /dev/null +++ b/test/flags-filter-and-match/flags-filter-and-match-solution/test-A/out.tsv @@ -0,0 +1,3 @@ +bar baz +ss +ss diff --git a/test/flags-filter-and-match/flags-filter-and-match/config.txt b/test/flags-filter-and-match/flags-filter-and-match/config.txt new file mode 100644 index 0000000..ecdf541 --- /dev/null +++ b/test/flags-filter-and-match/flags-filter-and-match/config.txt @@ -0,0 +1 @@ +--metric MultiLabel-F1:ft<^b> diff --git a/test/flags-filter-and-match/flags-filter-and-match/test-A/config.txt b/test/flags-filter-and-match/flags-filter-and-match/test-A/config.txt new file mode 100644 index 0000000..e3e8bc3 --- /dev/null +++ b/test/flags-filter-and-match/flags-filter-and-match/test-A/config.txt @@ -0,0 +1 @@ +--MultiLabel F1 diff --git a/test/flags-filter-and-match/flags-filter-and-match/test-A/expected.tsv b/test/flags-filter-and-match/flags-filter-and-match/test-A/expected.tsv new file mode 100644 index 0000000..18241b2 --- /dev/null +++ b/test/flags-filter-and-match/flags-filter-and-match/test-A/expected.tsv @@ -0,0 +1,3 @@ +bar baz baq aaa aaa aaa aaa aaa aaa aaa +baz +aaa diff --git a/test/flags-filter-and-match/flags-filter-and-match/test-A/in.tsv b/test/flags-filter-and-match/flags-filter-and-match/test-A/in.tsv new file mode 100644 index 0000000..86e041d --- /dev/null +++ b/test/flags-filter-and-match/flags-filter-and-match/test-A/in.tsv @@ -0,0 +1,3 @@ +foo +bar +baz