diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 00d7361..194547c 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -188,6 +188,9 @@ isPreprocessable MultiLabelLikelihood = False isPreprocessable (Mean metric) = isPreprocessable metric isPreprocessable Haversine = False +isInputModifiable CharMatch = True +isInputModifiable _ = False + defaultOutDirectory = "." defaultTestName = "test-A" defaultOutFile = "out.tsv" @@ -325,14 +328,15 @@ dataSourceToLineSourcesSpecification dataSource = LineSourcesSpecification { outSource = dataSourceOut dataSource outOptions = FileProcessingOptions { fileProcessingOptionsSelector = mSelector, - fileProcessingOptionsPreprocess = preprocess, + fileProcessingOptionsPreprocess = outPreprocess, fileProcessingOptionsHeader = mOutHeader } inOptions = FileProcessingOptions { fileProcessingOptionsSelector = mSelector, - fileProcessingOptionsPreprocess = preprocess, + fileProcessingOptionsPreprocess = inPreprocess, fileProcessingOptionsHeader = mInHeader } mSelector = challengeDataSourceSelector chDataSource - preprocess = challengeDataSourcePreprocess chDataSource + outPreprocess = challengeDataSourceOutPreprocess chDataSource + inPreprocess = challengeDataSourceInPreprocess chDataSource mInHeader = challengeDataSourceInHeader chDataSource mOutHeader = challengeDataSourceOutHeader chDataSource @@ -357,12 +361,18 @@ gevalOnSingleOut gevalSpec dataSource = do schemes = gesMetrics gevalSpec addSchemeSpecifics :: EvaluationScheme -> DataSource -> DataSource -addSchemeSpecifics scheme dataSource = +addSchemeSpecifics scheme@(EvaluationScheme metric _) dataSource = dataSource { dataSourceChallengeData = (dataSourceChallengeData dataSource) { challengeDataSourceFilter = getFilterForScheme (challengeDataSourceInHeader $ dataSourceChallengeData dataSource) scheme, - challengeDataSourcePreprocess = - (challengeDataSourcePreprocess $ dataSourceChallengeData dataSource) . (applyPreprocessingOperations scheme) }} + challengeDataSourceOutPreprocess = outPreprocess, + challengeDataSourceInPreprocess = inPreprocess + }} + where outPreprocess = (challengeDataSourceOutPreprocess $ dataSourceChallengeData dataSource) . (applyPreprocessingOperations scheme) + inPreprocess = (challengeDataSourceInPreprocess $ dataSourceChallengeData dataSource) . (if isInputModifiable metric + then (applyPreprocessingOperations scheme) + else id) + readHeaderFileWrapper :: Maybe FilePath -> IO (Maybe TabularHeader) readHeaderFileWrapper Nothing = return Nothing @@ -415,7 +425,8 @@ checkAndGetDataSources forceInput gevalSpec = do challengeDataSourceInput = inputSource, challengeDataSourceExpected = expectedSource, challengeDataSourceSelector = mSelector, - challengeDataSourcePreprocess = preprocess, + challengeDataSourceOutPreprocess = preprocess, + challengeDataSourceInPreprocess = preprocess, challengeDataSourceFilter = noFilter, challengeDataSourceInHeader = mInHeader, challengeDataSourceOutHeader = mOutHeader, diff --git a/src/GEval/DataSource.hs b/src/GEval/DataSource.hs index 22fd3db..309aca6 100644 --- a/src/GEval/DataSource.hs +++ b/src/GEval/DataSource.hs @@ -69,7 +69,8 @@ data ChallengeDataSource = ChallengeDataSource { challengeDataSourceInput :: SourceSpec, challengeDataSourceExpected :: SourceSpec, challengeDataSourceSelector :: Maybe Selector, - challengeDataSourcePreprocess :: Text -> Text, + challengeDataSourceOutPreprocess :: Text -> Text, + challengeDataSourceInPreprocess :: Text -> Text, challengeDataSourceFilter :: Filter, challengeDataSourceInHeader :: Maybe TabularHeader, challengeDataSourceOutHeader :: Maybe TabularHeader, diff --git a/src/GEval/EvaluationScheme.hs b/src/GEval/EvaluationScheme.hs index 9d9c6c3..18926be 100644 --- a/src/GEval/EvaluationScheme.hs +++ b/src/GEval/EvaluationScheme.hs @@ -10,6 +10,8 @@ module GEval.EvaluationScheme import GEval.Metric +import Debug.Trace + import Text.Regex.PCRE.Heavy import Text.Regex.PCRE.Light.Base (Regex(..)) import Text.Regex.PCRE.Light (compile) diff --git a/src/GEval/LineByLine.hs b/src/GEval/LineByLine.hs index 07b291c..7a93da4 100644 --- a/src/GEval/LineByLine.hs +++ b/src/GEval/LineByLine.hs @@ -617,23 +617,24 @@ gevalLineByLineSource metric dataSource = evaluateLine (lineNo, ParsedRecordWithInput inp' exp' out') = do let inp = if shouldBePreprocessedForPresentation - then preprocess inp' + then inPreprocess inp' else inp' let exp = preprocessOut exp' let out = preprocessOut out' s <- liftIO $ gevalCoreOnSingleLines metric -- if also to be shown preprocessed, preprocessing -- will be done earlier - (if shouldBePreprocessedForPresentation then id else preprocess) + (if shouldBePreprocessedForPresentation then id else outPreprocess) (getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp) (getDataDecoder expectedLineSource) (LineInFile expectedSource lineNo exp) (getDataDecoder outputLineSource) (LineInFile outSource lineNo out) return $ LineRecord inp exp out lineNo (extractSimpleRunValue $ getMetricValue s) preprocessOut = if shouldBePreprocessedForPresentation && isPreprocessable metric - then preprocess + then outPreprocess else id challengeDataSource = dataSourceChallengeData dataSource mSelector = challengeDataSourceSelector challengeDataSource - preprocess = challengeDataSourcePreprocess challengeDataSource + outPreprocess = challengeDataSourceOutPreprocess challengeDataSource + inPreprocess = challengeDataSourceInPreprocess challengeDataSource shouldBePreprocessedForPresentation = challengeDataSourceShowPreprocessed challengeDataSource mInHeader = challengeDataSourceInHeader challengeDataSource mOutHeader = challengeDataSourceOutHeader challengeDataSource diff --git a/test/Spec.hs b/test/Spec.hs index eefc80a..b80c16b 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -443,6 +443,8 @@ main = hspec $ do runGEvalTest "flags-sort" `shouldReturnAlmost` 0.3 it "filtering" $ do runGEvalTest "flags-filtering" `shouldReturnAlmost` 0.25 + it "filtering and matching" $ do + runGEvalTest "flags-filter-and-match" `shouldReturnAlmost` 0.8 describe "evaluating single lines" $ do it "RMSE" $ do (MetricOutput (SimpleRun v) _) <- gevalCoreOnSingleLines RMSE id RawItemTarget diff --git a/test/flags-filter-and-match/flags-filter-and-match-solution/test-A/out.tsv b/test/flags-filter-and-match/flags-filter-and-match-solution/test-A/out.tsv new file mode 100644 index 0000000..98f7844 --- /dev/null +++ b/test/flags-filter-and-match/flags-filter-and-match-solution/test-A/out.tsv @@ -0,0 +1,3 @@ +bar baz +ss +ss diff --git a/test/flags-filter-and-match/flags-filter-and-match/config.txt b/test/flags-filter-and-match/flags-filter-and-match/config.txt new file mode 100644 index 0000000..ecdf541 --- /dev/null +++ b/test/flags-filter-and-match/flags-filter-and-match/config.txt @@ -0,0 +1 @@ +--metric MultiLabel-F1:ft<^b> diff --git a/test/flags-filter-and-match/flags-filter-and-match/test-A/config.txt b/test/flags-filter-and-match/flags-filter-and-match/test-A/config.txt new file mode 100644 index 0000000..e3e8bc3 --- /dev/null +++ b/test/flags-filter-and-match/flags-filter-and-match/test-A/config.txt @@ -0,0 +1 @@ +--MultiLabel F1 diff --git a/test/flags-filter-and-match/flags-filter-and-match/test-A/expected.tsv b/test/flags-filter-and-match/flags-filter-and-match/test-A/expected.tsv new file mode 100644 index 0000000..18241b2 --- /dev/null +++ b/test/flags-filter-and-match/flags-filter-and-match/test-A/expected.tsv @@ -0,0 +1,3 @@ +bar baz baq aaa aaa aaa aaa aaa aaa aaa +baz +aaa diff --git a/test/flags-filter-and-match/flags-filter-and-match/test-A/in.tsv b/test/flags-filter-and-match/flags-filter-and-match/test-A/in.tsv new file mode 100644 index 0000000..86e041d --- /dev/null +++ b/test/flags-filter-and-match/flags-filter-and-match/test-A/in.tsv @@ -0,0 +1,3 @@ +foo +bar +baz