Refactor sources

This commit is contained in:
Filip Gralinski 2020-05-13 11:34:51 +02:00
parent 015053ec87
commit da5304014e
3 changed files with 48 additions and 35 deletions

View File

@ -50,6 +50,10 @@ module GEval.Core
getInHeader, getInHeader,
getOutHeader, getOutHeader,
addPreprocessing, addPreprocessing,
LineSourcesSpecification(..),
dataSourceToLineSourcesSpecification,
fromSpecificationToWithoutInput,
fromSpecificationToWithInput
) where ) where
import Debug.Trace import Debug.Trace
@ -294,10 +298,6 @@ dataSourceToLineSourcesSpecification dataSource = LineSourcesSpecification {
mInHeader = challengeDataSourceInHeader chDataSource mInHeader = challengeDataSourceInHeader chDataSource
mOutHeader = challengeDataSourceOutHeader chDataSource mOutHeader = challengeDataSourceOutHeader chDataSource
toWithoutInput lsSpec = WithoutInput expectedSource outSource
where expectedSource = lineSourcesExpectedSource lsSpec
outSource = lineSourcesOutputSource lsSpec
geval :: GEvalSpecification -> IO [(SourceSpec, [MetricOutput])] geval :: GEvalSpecification -> IO [(SourceSpec, [MetricOutput])]
geval gevalSpec = do geval gevalSpec = do
dataSources <- checkAndGetDataSources False gevalSpec dataSources <- checkAndGetDataSources False gevalSpec
@ -562,7 +562,7 @@ gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta)) lsSpec = do gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta)) lsSpec = do
gevalRunPipeline parserSpec (trans step) finalPipeline context gevalRunPipeline parserSpec (trans step) finalPipeline context
where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser)) where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
context = (WithoutInput expectedLineStream outLineStream) context = fromSpecificationToWithoutInput lsSpec
step = itemStep SAMultiLabelFMeasure step = itemStep SAMultiLabelFMeasure
expParser = expectedParser SAMultiLabelFMeasure expParser = expectedParser SAMultiLabelFMeasure
outParser = outputParser SAMultiLabelFMeasure outParser = outputParser SAMultiLabelFMeasure
@ -572,22 +572,19 @@ gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta)) lsSpec
$ continueGEvalCalculations SAMSE MSE)) $ continueGEvalCalculations SAMSE MSE))
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
trans step (ParsedRecordWithoutInput x y) = step (x, y) trans step (ParsedRecordWithoutInput x y) = step (x, y)
expectedLineStream = lineSourcesExpectedSource lsSpec
outLineStream = lineSourcesOutputSource lsSpec
gevalBootstrapOnSources numberOfSamples metric lsSpec = do gevalBootstrapOnSources numberOfSamples metric lsSpec = do
case toSing $ toHelper metric of case toSing $ toHelper metric of
SomeSing smetric -> gevalRunPipeline parserSpec (trans step) finalPipeline context SomeSing smetric -> gevalRunPipeline parserSpec (trans step) finalPipeline context
where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser)) where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
context = (WithoutInput expectedLineStream outLineStream) context = fromSpecificationToWithoutInput lsSpec
step = itemStep smetric step = itemStep smetric
expParser = expectedParser smetric expParser = expectedParser smetric
outParser = outputParser smetric outParser = outputParser smetric
finalPipeline = fixer (bootstrapC numberOfSamples $ continueGEvalCalculations smetric metric) finalPipeline = fixer (bootstrapC numberOfSamples $ continueGEvalCalculations smetric metric)
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
trans step (ParsedRecordWithoutInput x y) = step (x, y) trans step (ParsedRecordWithoutInput x y) = step (x, y)
expectedLineStream = lineSourcesExpectedSource lsSpec
outLineStream = lineSourcesOutputSource lsSpec
fixer :: ConduitT c Void (ResourceT m) [MetricOutput] -> ConduitT c Void (ResourceT m) MetricOutput fixer :: ConduitT c Void (ResourceT m) [MetricOutput] -> ConduitT c Void (ResourceT m) MetricOutput
fixer c = do fixer c = do
@ -617,10 +614,7 @@ gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
gevalCoreOnSources CharMatch = helper gevalCoreOnSources CharMatch = helper
where where
helper lsSpec = do helper lsSpec = do
gevalCoreGeneralized (ParserSpecWithInput justUnpack justUnpack justUnpack) step countAgg (fMeasureOnCounts charMatchBeta) noGraph (WithInput inputLineSource expectedLineSource outputLineSource) gevalCoreGeneralized (ParserSpecWithInput justUnpack justUnpack justUnpack) step countAgg (fMeasureOnCounts charMatchBeta) noGraph (fromSpecificationToWithInput lsSpec)
where inputLineSource = lineSourcesInputSource lsSpec
expectedLineSource = lineSourcesExpectedSource lsSpec
outputLineSource = lineSourcesOutputSource lsSpec
step (ParsedRecordWithInput inp exp out) = getCharMatchCount inp exp out step (ParsedRecordWithInput inp exp out) = getCharMatchCount inp exp out
justUnpack = liftOp (Right . unpack) justUnpack = liftOp (Right . unpack)
@ -698,7 +692,7 @@ gevalCoreOnSourcesStandardWay metric lsSpec =
case toSing $ toHelper metric of case toSing $ toHelper metric of
SomeSing smetric -> gevalRunPipeline parserSpec (trans step) finalPipeline context SomeSing smetric -> gevalRunPipeline parserSpec (trans step) finalPipeline context
where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser)) where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
context = toWithoutInput lsSpec context = fromSpecificationToWithoutInput lsSpec
step = itemStep smetric step = itemStep smetric
expParser = expectedParser smetric expParser = expectedParser smetric
outParser = outputParser smetric outParser = outputParser smetric
@ -707,7 +701,7 @@ gevalCoreOnSourcesStandardWay metric lsSpec =
trans step (ParsedRecordWithoutInput x y) = step (x, y) trans step (ParsedRecordWithoutInput x y) = step (x, y)
helperLogLossHashed nbOfBits finalStep lsSpec = helperLogLossHashed nbOfBits finalStep lsSpec =
gevalCore''' (ParserSpecWithoutInput (liftOp (Right . id)) (liftOp tentativeParser)) (\(lineNo, (t,d)) -> calculateLogLoss nbOfBits lineNo t (parseDistributionWrapper nbOfBits lineNo d)) averageC (finalStep . negate) noGraph (toWithoutInput lsSpec) gevalCore''' (ParserSpecWithoutInput (liftOp (Right . id)) (liftOp tentativeParser)) (\(lineNo, (t,d)) -> calculateLogLoss nbOfBits lineNo t (parseDistributionWrapper nbOfBits lineNo d)) averageC (finalStep . negate) noGraph (fromSpecificationToWithoutInput lsSpec)
where -- Unfortunately, we're parsing the distribution twice. We need to where -- Unfortunately, we're parsing the distribution twice. We need to
-- tentatively parse the distribution when the line number is unknown -- tentatively parse the distribution when the line number is unknown
-- (so we just set it to 1) -- (so we just set it to 1)
@ -788,7 +782,7 @@ gevalCoreWithoutInputOnItemTargets :: (MonadUnliftIO m, MonadThrow m, MonadIO m)
-> LineSourcesSpecification (ResourceT m) -> LineSourcesSpecification (ResourceT m)
-> m (MetricOutput) -- ^ metric values for the output against the expected output -> m (MetricOutput) -- ^ metric values for the output against the expected output
gevalCoreWithoutInputOnItemTargets expParser outParser itemStep aggregator finalStep generateGraph lsSpec = gevalCoreWithoutInputOnItemTargets expParser outParser itemStep aggregator finalStep generateGraph lsSpec =
gevalCoreGeneralized (ParserSpecWithoutInput expParser outParser) (trans itemStep) aggregator finalStep generateGraph (toWithoutInput lsSpec) gevalCoreGeneralized (ParserSpecWithoutInput expParser outParser) (trans itemStep) aggregator finalStep generateGraph (fromSpecificationToWithoutInput lsSpec)
where where
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
trans step (ParsedRecordWithoutInput x y) = step (x, y) trans step (ParsedRecordWithoutInput x y) = step (x, y)
@ -958,6 +952,16 @@ defineContinuation aggregator finalStep generateGraph = do
v <- aggregator v <- aggregator
return $ MetricOutput (SimpleRun $ finalStep v) (generateGraph v) return $ MetricOutput (SimpleRun $ finalStep v) (generateGraph v)
fromSpecificationToWithoutInput lsSpec = WithoutInput expectedSource outSource
where expectedSource = lineSourcesExpectedSource lsSpec
outSource = lineSourcesOutputSource lsSpec
fromSpecificationToWithInput lsSpec = WithInput theFilter inpSource expectedSource outSource
where inpSource = lineSourcesInputSource lsSpec
expectedSource = lineSourcesExpectedSource lsSpec
outSource = lineSourcesOutputSource lsSpec
theFilter = lineSourcesFilter lsSpec
-- | A type family to handle all the evaluation "context". -- | A type family to handle all the evaluation "context".
-- --
-- This is needed as for some metrics the output and the expected metric is enough -- This is needed as for some metrics the output and the expected metric is enough
@ -1001,18 +1005,19 @@ instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (Withou
checkStepM _ (_, WrappedParsedRecordWithoutInput Done Done) = return Nothing checkStepM _ (_, WrappedParsedRecordWithoutInput Done Done) = return Nothing
data WithInput m i e o = WithInput (LineSource (ResourceT m)) (LineSource (ResourceT m)) (LineSource (ResourceT m))
getInputFilePath (WithInput (LineSource _ _ _ inputFilePath _) _ _) = inputFilePath data WithInput m i e o = WithInput Filter (LineSource (ResourceT m)) (LineSource (ResourceT m)) (LineSource (ResourceT m))
getInputFilePath (WithInput _ (LineSource _ _ _ inputFilePath _) _ _) = inputFilePath
instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where
data ParserSpec (WithInput m i e o) = ParserSpecWithInput (ItemTarget -> Either String i) (ItemTarget -> Either String e) (ItemTarget -> Either String o) data ParserSpec (WithInput m i e o) = ParserSpecWithInput (ItemTarget -> Either String i) (ItemTarget -> Either String e) (ItemTarget -> Either String o)
data WrappedParsedRecord (WithInput m i e o) = WrappedParsedRecordWithInput (SourceItem i) (SourceItem e) (SourceItem o) data WrappedParsedRecord (WithInput m i e o) = WrappedParsedRecordWithInput (SourceItem i) (SourceItem e) (SourceItem o)
data ParsedRecord (WithInput m i e o) = ParsedRecordWithInput i e o data ParsedRecord (WithInput m i e o) = ParsedRecordWithInput i e o
getFirstLineNo _ (WithInput _ _ (LineSource _ _ _ _ lineNo)) = lineNo getFirstLineNo _ (WithInput _ _ _ (LineSource _ _ _ _ lineNo)) = lineNo
getExpectedSource (WithInput _ (LineSource _ _ _ expectedSource _) _) = expectedSource getExpectedSource (WithInput _ _ (LineSource _ _ _ expectedSource _) _) = expectedSource
getOutSource (WithInput _ _ (LineSource _ _ _ outSource _)) = outSource getOutSource (WithInput _ _ _ (LineSource _ _ _ outSource _)) = outSource
recordSource (WithInput inputLineSource expectedLineSource outLineSource) (ParserSpecWithInput inpParser expParser outParser) = getZipSource $ (\x (y,z) -> WrappedParsedRecordWithInput x y z) recordSource (WithInput _ inputLineSource expectedLineSource outLineSource) (ParserSpecWithInput inpParser expParser outParser) = getZipSource $ (\x (y,z) -> WrappedParsedRecordWithInput x y z)
<$> ZipSource (items inputLineSource inpParser) <*> (ZipSource $ getZipSource $ (,) <$> ZipSource (items inputLineSource inpParser) <*> (ZipSource $ getZipSource $ (,)
<$> ZipSource (items expectedLineSource expParser) <$> ZipSource (items expectedLineSource expParser)
<*> ZipSource (items outLineSource outParser)) <*> ZipSource (items outLineSource outParser))
@ -1036,12 +1041,15 @@ instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithIn
checkStepM _ (_, WrappedParsedRecordWithInput (Got _) Done Done) = throw TooManyLinesInInput checkStepM _ (_, WrappedParsedRecordWithInput (Got _) Done Done) = throw TooManyLinesInInput
checkStepM _ (_, WrappedParsedRecordWithInput Done Done Done) = return Nothing checkStepM _ (_, WrappedParsedRecordWithInput Done Done Done) = return Nothing
threeLineSource :: (MonadUnliftIO m, MonadIO m, MonadThrow m) => WithInput m Text Text Text -> ConduitT () (WrappedParsedRecord (WithInput m Text Text Text)) (ResourceT m) () threeLineSource :: (MonadUnliftIO m, MonadIO m, MonadThrow m) => WithInput m Text Text Text -> ConduitT () (WrappedParsedRecord (WithInput m Text Text Text)) (ResourceT m) ()
threeLineSource (WithInput inputLineSource expectedLineSource outLineSource) = getZipSource $ (\x (y,z) -> WrappedParsedRecordWithInput x y z) threeLineSource (WithInput theFilter inputLineSource expectedLineSource outLineSource) =
<$> ZipSource (linesAsItems inputLineSource) <*> (ZipSource $ getZipSource $ (,) (getZipSource $ (\x (y,z) -> (x, (y,z)))
<$> ZipSource (linesAsItems inputLineSource)
<*> (ZipSource $ getZipSource $ (,)
<$> ZipSource (linesAsItems expectedLineSource) <$> ZipSource (linesAsItems expectedLineSource)
<*> ZipSource (linesAsItems outLineSource)) <*> ZipSource (linesAsItems outLineSource)))
.| (CC.filter (applyFilterToSourceItems theFilter))
.| (CC.map (\(x, (y,z)) -> WrappedParsedRecordWithInput x y z))
averageC :: MonadResource m => ConduitT Double Void m Double averageC :: MonadResource m => ConduitT Double Void m Double
averageC = getZipSink averageC = getZipSink
@ -1064,3 +1072,7 @@ items (LineSource lineSource itemDecoder preprocess _ _) parser =
linesAsItems :: MonadResource m => LineSource m -> ConduitT () (SourceItem Text) m () linesAsItems :: MonadResource m => LineSource m -> ConduitT () (SourceItem Text) m ()
linesAsItems (LineSource lineSource _ _ _ _) = linesAsItems (LineSource lineSource _ _ _ _) =
(lineSource .| CL.map Got) >> yield Done (lineSource .| CL.map Got) >> yield Done
applyFilterToSourceItems :: Filter -> (SourceItem Text, (SourceItem Text, SourceItem Text)) -> Bool
applyFilterToSourceItems filter (Got x, (Got y, Got z)) = applyFilter filter (x, (y, z))
applyFilterToSourceItems _ special = True

View File

@ -12,14 +12,14 @@ import Data.Conduit.SmartSource
import Data.Conduit.Header import Data.Conduit.Header
import GEval.Selector import GEval.Selector
newtype Filter = Filter (Maybe (Text -> Bool)) data Filter = NoFilter | InputFilter (Text -> Bool)
noFilter :: Filter noFilter :: Filter
noFilter = Filter Nothing noFilter = NoFilter
applyFilter :: Filter -> (Text, (Text, Text)) -> Bool applyFilter :: Filter -> (Text, (Text, Text)) -> Bool
applyFilter (Filter Nothing) _ = True applyFilter NoFilter _ = True
applyFilter (Filter (Just fun)) (inp, (exp, out)) = fun inp applyFilter (InputFilter fun) (inp, (exp, out)) = fun inp
-- | This type specifies the way the challenge data (input and -- | This type specifies the way the challenge data (input and
-- expected data, but not outputs) flow into evaluation. -- expected data, but not outputs) flow into evaluation.

View File

@ -610,10 +610,11 @@ gevalLineByLineSource metric dataSource =
(getZipSource $ (,) (getZipSource $ (,)
<$> ZipSource (CL.sourceList [1..]) <$> ZipSource (CL.sourceList [1..])
<*> (ZipSource $ threeLineSource context)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes <*> (ZipSource $ threeLineSource context)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes
where context = (WithInput inputLineSource expectedLineSource outputLineSource) where context = fromSpecificationToWithInput lsSpec
inputLineSource = fileAsLineSource inputSource inOptions lsSpec = dataSourceToLineSourcesSpecification dataSource
expectedLineSource = fileAsLineSource expectedSource outOptions inputLineSource = lineSourcesInputSource lsSpec
outputLineSource = fileAsLineSource outSource outOptions expectedLineSource = lineSourcesExpectedSource lsSpec
outputLineSource = lineSourcesOutputSource lsSpec
justLine (LineInFile _ _ l) = l justLine (LineInFile _ _ l) = l
evaluateLine (lineNo, ParsedRecordWithInput inp exp out) = do evaluateLine (lineNo, ParsedRecordWithInput inp exp out) = do
s <- liftIO $ gevalCoreOnSingleLines metric preprocess (getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp) s <- liftIO $ gevalCoreOnSingleLines metric preprocess (getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp)