Fix filter and match combination

This commit is contained in:
Filip Gralinski 2021-06-07 18:00:43 +02:00
parent 54547fe0f8
commit fb63894db0
10 changed files with 40 additions and 12 deletions

View File

@ -186,6 +186,9 @@ isPreprocessable MultiLabelLogLoss = False
isPreprocessable MultiLabelLikelihood = False
isPreprocessable (Mean metric) = isPreprocessable metric
isInputModifiable CharMatch = True
isInputModifiable _ = False
defaultOutDirectory = "."
defaultTestName = "test-A"
defaultOutFile = "out.tsv"
@ -323,14 +326,15 @@ dataSourceToLineSourcesSpecification dataSource = LineSourcesSpecification {
outSource = dataSourceOut dataSource
outOptions = FileProcessingOptions {
fileProcessingOptionsSelector = mSelector,
fileProcessingOptionsPreprocess = preprocess,
fileProcessingOptionsPreprocess = outPreprocess,
fileProcessingOptionsHeader = mOutHeader }
inOptions = FileProcessingOptions {
fileProcessingOptionsSelector = mSelector,
fileProcessingOptionsPreprocess = preprocess,
fileProcessingOptionsPreprocess = inPreprocess,
fileProcessingOptionsHeader = mInHeader }
mSelector = challengeDataSourceSelector chDataSource
preprocess = challengeDataSourcePreprocess chDataSource
outPreprocess = challengeDataSourceOutPreprocess chDataSource
inPreprocess = challengeDataSourceInPreprocess chDataSource
mInHeader = challengeDataSourceInHeader chDataSource
mOutHeader = challengeDataSourceOutHeader chDataSource
@ -355,12 +359,18 @@ gevalOnSingleOut gevalSpec dataSource = do
schemes = gesMetrics gevalSpec
addSchemeSpecifics :: EvaluationScheme -> DataSource -> DataSource
addSchemeSpecifics scheme dataSource =
addSchemeSpecifics scheme@(EvaluationScheme metric _) dataSource =
dataSource {
dataSourceChallengeData = (dataSourceChallengeData dataSource) {
challengeDataSourceFilter = getFilterForScheme (challengeDataSourceInHeader $ dataSourceChallengeData dataSource) scheme,
challengeDataSourcePreprocess =
(challengeDataSourcePreprocess $ dataSourceChallengeData dataSource) . (applyPreprocessingOperations scheme) }}
challengeDataSourceOutPreprocess = outPreprocess,
challengeDataSourceInPreprocess = inPreprocess
}}
where outPreprocess = (challengeDataSourceOutPreprocess $ dataSourceChallengeData dataSource) . (applyPreprocessingOperations scheme)
inPreprocess = (challengeDataSourceInPreprocess $ dataSourceChallengeData dataSource) . (if isInputModifiable metric
then (applyPreprocessingOperations scheme)
else id)
readHeaderFileWrapper :: Maybe FilePath -> IO (Maybe TabularHeader)
readHeaderFileWrapper Nothing = return Nothing
@ -413,7 +423,8 @@ checkAndGetDataSources forceInput gevalSpec = do
challengeDataSourceInput = inputSource,
challengeDataSourceExpected = expectedSource,
challengeDataSourceSelector = mSelector,
challengeDataSourcePreprocess = preprocess,
challengeDataSourceOutPreprocess = preprocess,
challengeDataSourceInPreprocess = preprocess,
challengeDataSourceFilter = noFilter,
challengeDataSourceInHeader = mInHeader,
challengeDataSourceOutHeader = mOutHeader,

View File

@ -69,7 +69,8 @@ data ChallengeDataSource = ChallengeDataSource {
challengeDataSourceInput :: SourceSpec,
challengeDataSourceExpected :: SourceSpec,
challengeDataSourceSelector :: Maybe Selector,
challengeDataSourcePreprocess :: Text -> Text,
challengeDataSourceOutPreprocess :: Text -> Text,
challengeDataSourceInPreprocess :: Text -> Text,
challengeDataSourceFilter :: Filter,
challengeDataSourceInHeader :: Maybe TabularHeader,
challengeDataSourceOutHeader :: Maybe TabularHeader,

View File

@ -10,6 +10,8 @@ module GEval.EvaluationScheme
import GEval.Metric
import Debug.Trace
import Text.Regex.PCRE.Heavy
import Text.Regex.PCRE.Light.Base (Regex(..))
import Text.Regex.PCRE.Light (compile)

View File

@ -617,23 +617,24 @@ gevalLineByLineSource metric dataSource =
evaluateLine (lineNo, ParsedRecordWithInput inp' exp' out') = do
let inp = if shouldBePreprocessedForPresentation
then preprocess inp'
then inPreprocess inp'
else inp'
let exp = preprocessOut exp'
let out = preprocessOut out'
s <- liftIO $ gevalCoreOnSingleLines metric
-- if also to be shown preprocessed, preprocessing
-- will be done earlier
(if shouldBePreprocessedForPresentation then id else preprocess)
(if shouldBePreprocessedForPresentation then id else outPreprocess)
(getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp)
(getDataDecoder expectedLineSource) (LineInFile expectedSource lineNo exp) (getDataDecoder outputLineSource) (LineInFile outSource lineNo out)
return $ LineRecord inp exp out lineNo (extractSimpleRunValue $ getMetricValue s)
preprocessOut = if shouldBePreprocessedForPresentation && isPreprocessable metric
then preprocess
then outPreprocess
else id
challengeDataSource = dataSourceChallengeData dataSource
mSelector = challengeDataSourceSelector challengeDataSource
preprocess = challengeDataSourcePreprocess challengeDataSource
outPreprocess = challengeDataSourceOutPreprocess challengeDataSource
inPreprocess = challengeDataSourceInPreprocess challengeDataSource
shouldBePreprocessedForPresentation = challengeDataSourceShowPreprocessed challengeDataSource
mInHeader = challengeDataSourceInHeader challengeDataSource
mOutHeader = challengeDataSourceOutHeader challengeDataSource

View File

@ -440,6 +440,8 @@ main = hspec $ do
runGEvalTest "flags-sort" `shouldReturnAlmost` 0.3
it "filtering" $ do
runGEvalTest "flags-filtering" `shouldReturnAlmost` 0.25
it "filtering and matching" $ do
runGEvalTest "flags-filter-and-match" `shouldReturnAlmost` 0.8
describe "evaluating single lines" $ do
it "RMSE" $ do
(MetricOutput (SimpleRun v) _) <- gevalCoreOnSingleLines RMSE id RawItemTarget

View File

@ -0,0 +1,3 @@
bar baz
ss
ss
1 bar baz
2 ss
3 ss

View File

@ -0,0 +1 @@
--metric MultiLabel-F1:f<in[1]:foo>t<^b>

View File

@ -0,0 +1 @@
--MultiLabel F1

View File

@ -0,0 +1,3 @@
bar baz baq aaa aaa aaa aaa aaa aaa aaa
baz
aaa
1 bar baz baq aaa aaa aaa aaa aaa aaa aaa
2 baz
3 aaa

View File

@ -0,0 +1,3 @@
foo
bar
baz
1 foo
2 bar
3 baz