Merge branch 'filter-and-match'

This commit is contained in:
Filip Gralinski 2021-06-10 15:01:27 +02:00
commit 4eb20f5bb0
10 changed files with 40 additions and 12 deletions

View File

@ -188,6 +188,9 @@ isPreprocessable MultiLabelLikelihood = False
isPreprocessable (Mean metric) = isPreprocessable metric isPreprocessable (Mean metric) = isPreprocessable metric
isPreprocessable Haversine = False isPreprocessable Haversine = False
isInputModifiable CharMatch = True
isInputModifiable _ = False
defaultOutDirectory = "." defaultOutDirectory = "."
defaultTestName = "test-A" defaultTestName = "test-A"
defaultOutFile = "out.tsv" defaultOutFile = "out.tsv"
@ -325,14 +328,15 @@ dataSourceToLineSourcesSpecification dataSource = LineSourcesSpecification {
outSource = dataSourceOut dataSource outSource = dataSourceOut dataSource
outOptions = FileProcessingOptions { outOptions = FileProcessingOptions {
fileProcessingOptionsSelector = mSelector, fileProcessingOptionsSelector = mSelector,
fileProcessingOptionsPreprocess = preprocess, fileProcessingOptionsPreprocess = outPreprocess,
fileProcessingOptionsHeader = mOutHeader } fileProcessingOptionsHeader = mOutHeader }
inOptions = FileProcessingOptions { inOptions = FileProcessingOptions {
fileProcessingOptionsSelector = mSelector, fileProcessingOptionsSelector = mSelector,
fileProcessingOptionsPreprocess = preprocess, fileProcessingOptionsPreprocess = inPreprocess,
fileProcessingOptionsHeader = mInHeader } fileProcessingOptionsHeader = mInHeader }
mSelector = challengeDataSourceSelector chDataSource mSelector = challengeDataSourceSelector chDataSource
preprocess = challengeDataSourcePreprocess chDataSource outPreprocess = challengeDataSourceOutPreprocess chDataSource
inPreprocess = challengeDataSourceInPreprocess chDataSource
mInHeader = challengeDataSourceInHeader chDataSource mInHeader = challengeDataSourceInHeader chDataSource
mOutHeader = challengeDataSourceOutHeader chDataSource mOutHeader = challengeDataSourceOutHeader chDataSource
@ -357,12 +361,18 @@ gevalOnSingleOut gevalSpec dataSource = do
schemes = gesMetrics gevalSpec schemes = gesMetrics gevalSpec
addSchemeSpecifics :: EvaluationScheme -> DataSource -> DataSource addSchemeSpecifics :: EvaluationScheme -> DataSource -> DataSource
addSchemeSpecifics scheme dataSource = addSchemeSpecifics scheme@(EvaluationScheme metric _) dataSource =
dataSource { dataSource {
dataSourceChallengeData = (dataSourceChallengeData dataSource) { dataSourceChallengeData = (dataSourceChallengeData dataSource) {
challengeDataSourceFilter = getFilterForScheme (challengeDataSourceInHeader $ dataSourceChallengeData dataSource) scheme, challengeDataSourceFilter = getFilterForScheme (challengeDataSourceInHeader $ dataSourceChallengeData dataSource) scheme,
challengeDataSourcePreprocess = challengeDataSourceOutPreprocess = outPreprocess,
(challengeDataSourcePreprocess $ dataSourceChallengeData dataSource) . (applyPreprocessingOperations scheme) }} challengeDataSourceInPreprocess = inPreprocess
}}
where outPreprocess = (challengeDataSourceOutPreprocess $ dataSourceChallengeData dataSource) . (applyPreprocessingOperations scheme)
inPreprocess = (challengeDataSourceInPreprocess $ dataSourceChallengeData dataSource) . (if isInputModifiable metric
then (applyPreprocessingOperations scheme)
else id)
readHeaderFileWrapper :: Maybe FilePath -> IO (Maybe TabularHeader) readHeaderFileWrapper :: Maybe FilePath -> IO (Maybe TabularHeader)
readHeaderFileWrapper Nothing = return Nothing readHeaderFileWrapper Nothing = return Nothing
@ -415,7 +425,8 @@ checkAndGetDataSources forceInput gevalSpec = do
challengeDataSourceInput = inputSource, challengeDataSourceInput = inputSource,
challengeDataSourceExpected = expectedSource, challengeDataSourceExpected = expectedSource,
challengeDataSourceSelector = mSelector, challengeDataSourceSelector = mSelector,
challengeDataSourcePreprocess = preprocess, challengeDataSourceOutPreprocess = preprocess,
challengeDataSourceInPreprocess = preprocess,
challengeDataSourceFilter = noFilter, challengeDataSourceFilter = noFilter,
challengeDataSourceInHeader = mInHeader, challengeDataSourceInHeader = mInHeader,
challengeDataSourceOutHeader = mOutHeader, challengeDataSourceOutHeader = mOutHeader,

View File

@ -69,7 +69,8 @@ data ChallengeDataSource = ChallengeDataSource {
challengeDataSourceInput :: SourceSpec, challengeDataSourceInput :: SourceSpec,
challengeDataSourceExpected :: SourceSpec, challengeDataSourceExpected :: SourceSpec,
challengeDataSourceSelector :: Maybe Selector, challengeDataSourceSelector :: Maybe Selector,
challengeDataSourcePreprocess :: Text -> Text, challengeDataSourceOutPreprocess :: Text -> Text,
challengeDataSourceInPreprocess :: Text -> Text,
challengeDataSourceFilter :: Filter, challengeDataSourceFilter :: Filter,
challengeDataSourceInHeader :: Maybe TabularHeader, challengeDataSourceInHeader :: Maybe TabularHeader,
challengeDataSourceOutHeader :: Maybe TabularHeader, challengeDataSourceOutHeader :: Maybe TabularHeader,

View File

@ -10,6 +10,8 @@ module GEval.EvaluationScheme
import GEval.Metric import GEval.Metric
import Debug.Trace
import Text.Regex.PCRE.Heavy import Text.Regex.PCRE.Heavy
import Text.Regex.PCRE.Light.Base (Regex(..)) import Text.Regex.PCRE.Light.Base (Regex(..))
import Text.Regex.PCRE.Light (compile) import Text.Regex.PCRE.Light (compile)

View File

@ -617,23 +617,24 @@ gevalLineByLineSource metric dataSource =
evaluateLine (lineNo, ParsedRecordWithInput inp' exp' out') = do evaluateLine (lineNo, ParsedRecordWithInput inp' exp' out') = do
let inp = if shouldBePreprocessedForPresentation let inp = if shouldBePreprocessedForPresentation
then preprocess inp' then inPreprocess inp'
else inp' else inp'
let exp = preprocessOut exp' let exp = preprocessOut exp'
let out = preprocessOut out' let out = preprocessOut out'
s <- liftIO $ gevalCoreOnSingleLines metric s <- liftIO $ gevalCoreOnSingleLines metric
-- if also to be shown preprocessed, preprocessing -- if also to be shown preprocessed, preprocessing
-- will be done earlier -- will be done earlier
(if shouldBePreprocessedForPresentation then id else preprocess) (if shouldBePreprocessedForPresentation then id else outPreprocess)
(getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp) (getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp)
(getDataDecoder expectedLineSource) (LineInFile expectedSource lineNo exp) (getDataDecoder outputLineSource) (LineInFile outSource lineNo out) (getDataDecoder expectedLineSource) (LineInFile expectedSource lineNo exp) (getDataDecoder outputLineSource) (LineInFile outSource lineNo out)
return $ LineRecord inp exp out lineNo (extractSimpleRunValue $ getMetricValue s) return $ LineRecord inp exp out lineNo (extractSimpleRunValue $ getMetricValue s)
preprocessOut = if shouldBePreprocessedForPresentation && isPreprocessable metric preprocessOut = if shouldBePreprocessedForPresentation && isPreprocessable metric
then preprocess then outPreprocess
else id else id
challengeDataSource = dataSourceChallengeData dataSource challengeDataSource = dataSourceChallengeData dataSource
mSelector = challengeDataSourceSelector challengeDataSource mSelector = challengeDataSourceSelector challengeDataSource
preprocess = challengeDataSourcePreprocess challengeDataSource outPreprocess = challengeDataSourceOutPreprocess challengeDataSource
inPreprocess = challengeDataSourceInPreprocess challengeDataSource
shouldBePreprocessedForPresentation = challengeDataSourceShowPreprocessed challengeDataSource shouldBePreprocessedForPresentation = challengeDataSourceShowPreprocessed challengeDataSource
mInHeader = challengeDataSourceInHeader challengeDataSource mInHeader = challengeDataSourceInHeader challengeDataSource
mOutHeader = challengeDataSourceOutHeader challengeDataSource mOutHeader = challengeDataSourceOutHeader challengeDataSource

View File

@ -443,6 +443,8 @@ main = hspec $ do
runGEvalTest "flags-sort" `shouldReturnAlmost` 0.3 runGEvalTest "flags-sort" `shouldReturnAlmost` 0.3
it "filtering" $ do it "filtering" $ do
runGEvalTest "flags-filtering" `shouldReturnAlmost` 0.25 runGEvalTest "flags-filtering" `shouldReturnAlmost` 0.25
it "filtering and matching" $ do
runGEvalTest "flags-filter-and-match" `shouldReturnAlmost` 0.8
describe "evaluating single lines" $ do describe "evaluating single lines" $ do
it "RMSE" $ do it "RMSE" $ do
(MetricOutput (SimpleRun v) _) <- gevalCoreOnSingleLines RMSE id RawItemTarget (MetricOutput (SimpleRun v) _) <- gevalCoreOnSingleLines RMSE id RawItemTarget

View File

@ -0,0 +1,3 @@
bar baz
ss
ss
1 bar baz
2 ss
3 ss

View File

@ -0,0 +1 @@
--metric MultiLabel-F1:f<in[1]:foo>t<^b>

View File

@ -0,0 +1 @@
--MultiLabel F1

View File

@ -0,0 +1,3 @@
bar baz baq aaa aaa aaa aaa aaa aaa aaa
baz
aaa
1 bar baz baq aaa aaa aaa aaa aaa aaa aaa
2 baz
3 aaa

View File

@ -0,0 +1,3 @@
foo
bar
baz
1 foo
2 bar
3 baz