Refactor sources
This commit is contained in:
parent
015053ec87
commit
da5304014e
@ -50,6 +50,10 @@ module GEval.Core
|
|||||||
getInHeader,
|
getInHeader,
|
||||||
getOutHeader,
|
getOutHeader,
|
||||||
addPreprocessing,
|
addPreprocessing,
|
||||||
|
LineSourcesSpecification(..),
|
||||||
|
dataSourceToLineSourcesSpecification,
|
||||||
|
fromSpecificationToWithoutInput,
|
||||||
|
fromSpecificationToWithInput
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Debug.Trace
|
import Debug.Trace
|
||||||
@ -294,10 +298,6 @@ dataSourceToLineSourcesSpecification dataSource = LineSourcesSpecification {
|
|||||||
mInHeader = challengeDataSourceInHeader chDataSource
|
mInHeader = challengeDataSourceInHeader chDataSource
|
||||||
mOutHeader = challengeDataSourceOutHeader chDataSource
|
mOutHeader = challengeDataSourceOutHeader chDataSource
|
||||||
|
|
||||||
toWithoutInput lsSpec = WithoutInput expectedSource outSource
|
|
||||||
where expectedSource = lineSourcesExpectedSource lsSpec
|
|
||||||
outSource = lineSourcesOutputSource lsSpec
|
|
||||||
|
|
||||||
geval :: GEvalSpecification -> IO [(SourceSpec, [MetricOutput])]
|
geval :: GEvalSpecification -> IO [(SourceSpec, [MetricOutput])]
|
||||||
geval gevalSpec = do
|
geval gevalSpec = do
|
||||||
dataSources <- checkAndGetDataSources False gevalSpec
|
dataSources <- checkAndGetDataSources False gevalSpec
|
||||||
@ -562,7 +562,7 @@ gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
|
|||||||
gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta)) lsSpec = do
|
gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta)) lsSpec = do
|
||||||
gevalRunPipeline parserSpec (trans step) finalPipeline context
|
gevalRunPipeline parserSpec (trans step) finalPipeline context
|
||||||
where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
|
where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
|
||||||
context = (WithoutInput expectedLineStream outLineStream)
|
context = fromSpecificationToWithoutInput lsSpec
|
||||||
step = itemStep SAMultiLabelFMeasure
|
step = itemStep SAMultiLabelFMeasure
|
||||||
expParser = expectedParser SAMultiLabelFMeasure
|
expParser = expectedParser SAMultiLabelFMeasure
|
||||||
outParser = outputParser SAMultiLabelFMeasure
|
outParser = outputParser SAMultiLabelFMeasure
|
||||||
@ -572,22 +572,19 @@ gevalBootstrapOnSources numberOfSamples (Mean (MultiLabelFMeasure beta)) lsSpec
|
|||||||
$ continueGEvalCalculations SAMSE MSE))
|
$ continueGEvalCalculations SAMSE MSE))
|
||||||
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
|
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
|
||||||
trans step (ParsedRecordWithoutInput x y) = step (x, y)
|
trans step (ParsedRecordWithoutInput x y) = step (x, y)
|
||||||
expectedLineStream = lineSourcesExpectedSource lsSpec
|
|
||||||
outLineStream = lineSourcesOutputSource lsSpec
|
|
||||||
|
|
||||||
gevalBootstrapOnSources numberOfSamples metric lsSpec = do
|
gevalBootstrapOnSources numberOfSamples metric lsSpec = do
|
||||||
case toSing $ toHelper metric of
|
case toSing $ toHelper metric of
|
||||||
SomeSing smetric -> gevalRunPipeline parserSpec (trans step) finalPipeline context
|
SomeSing smetric -> gevalRunPipeline parserSpec (trans step) finalPipeline context
|
||||||
where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
|
where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
|
||||||
context = (WithoutInput expectedLineStream outLineStream)
|
context = fromSpecificationToWithoutInput lsSpec
|
||||||
step = itemStep smetric
|
step = itemStep smetric
|
||||||
expParser = expectedParser smetric
|
expParser = expectedParser smetric
|
||||||
outParser = outputParser smetric
|
outParser = outputParser smetric
|
||||||
finalPipeline = fixer (bootstrapC numberOfSamples $ continueGEvalCalculations smetric metric)
|
finalPipeline = fixer (bootstrapC numberOfSamples $ continueGEvalCalculations smetric metric)
|
||||||
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
|
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
|
||||||
trans step (ParsedRecordWithoutInput x y) = step (x, y)
|
trans step (ParsedRecordWithoutInput x y) = step (x, y)
|
||||||
expectedLineStream = lineSourcesExpectedSource lsSpec
|
|
||||||
outLineStream = lineSourcesOutputSource lsSpec
|
|
||||||
|
|
||||||
fixer :: ConduitT c Void (ResourceT m) [MetricOutput] -> ConduitT c Void (ResourceT m) MetricOutput
|
fixer :: ConduitT c Void (ResourceT m) [MetricOutput] -> ConduitT c Void (ResourceT m) MetricOutput
|
||||||
fixer c = do
|
fixer c = do
|
||||||
@ -617,10 +614,7 @@ gevalCoreOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
|
|||||||
gevalCoreOnSources CharMatch = helper
|
gevalCoreOnSources CharMatch = helper
|
||||||
where
|
where
|
||||||
helper lsSpec = do
|
helper lsSpec = do
|
||||||
gevalCoreGeneralized (ParserSpecWithInput justUnpack justUnpack justUnpack) step countAgg (fMeasureOnCounts charMatchBeta) noGraph (WithInput inputLineSource expectedLineSource outputLineSource)
|
gevalCoreGeneralized (ParserSpecWithInput justUnpack justUnpack justUnpack) step countAgg (fMeasureOnCounts charMatchBeta) noGraph (fromSpecificationToWithInput lsSpec)
|
||||||
where inputLineSource = lineSourcesInputSource lsSpec
|
|
||||||
expectedLineSource = lineSourcesExpectedSource lsSpec
|
|
||||||
outputLineSource = lineSourcesOutputSource lsSpec
|
|
||||||
step (ParsedRecordWithInput inp exp out) = getCharMatchCount inp exp out
|
step (ParsedRecordWithInput inp exp out) = getCharMatchCount inp exp out
|
||||||
justUnpack = liftOp (Right . unpack)
|
justUnpack = liftOp (Right . unpack)
|
||||||
|
|
||||||
@ -698,7 +692,7 @@ gevalCoreOnSourcesStandardWay metric lsSpec =
|
|||||||
case toSing $ toHelper metric of
|
case toSing $ toHelper metric of
|
||||||
SomeSing smetric -> gevalRunPipeline parserSpec (trans step) finalPipeline context
|
SomeSing smetric -> gevalRunPipeline parserSpec (trans step) finalPipeline context
|
||||||
where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
|
where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
|
||||||
context = toWithoutInput lsSpec
|
context = fromSpecificationToWithoutInput lsSpec
|
||||||
step = itemStep smetric
|
step = itemStep smetric
|
||||||
expParser = expectedParser smetric
|
expParser = expectedParser smetric
|
||||||
outParser = outputParser smetric
|
outParser = outputParser smetric
|
||||||
@ -707,7 +701,7 @@ gevalCoreOnSourcesStandardWay metric lsSpec =
|
|||||||
trans step (ParsedRecordWithoutInput x y) = step (x, y)
|
trans step (ParsedRecordWithoutInput x y) = step (x, y)
|
||||||
|
|
||||||
helperLogLossHashed nbOfBits finalStep lsSpec =
|
helperLogLossHashed nbOfBits finalStep lsSpec =
|
||||||
gevalCore''' (ParserSpecWithoutInput (liftOp (Right . id)) (liftOp tentativeParser)) (\(lineNo, (t,d)) -> calculateLogLoss nbOfBits lineNo t (parseDistributionWrapper nbOfBits lineNo d)) averageC (finalStep . negate) noGraph (toWithoutInput lsSpec)
|
gevalCore''' (ParserSpecWithoutInput (liftOp (Right . id)) (liftOp tentativeParser)) (\(lineNo, (t,d)) -> calculateLogLoss nbOfBits lineNo t (parseDistributionWrapper nbOfBits lineNo d)) averageC (finalStep . negate) noGraph (fromSpecificationToWithoutInput lsSpec)
|
||||||
where -- Unfortunately, we're parsing the distribution twice. We need to
|
where -- Unfortunately, we're parsing the distribution twice. We need to
|
||||||
-- tentatively parse the distribution when the line number is unknown
|
-- tentatively parse the distribution when the line number is unknown
|
||||||
-- (so we just set it to 1)
|
-- (so we just set it to 1)
|
||||||
@ -788,7 +782,7 @@ gevalCoreWithoutInputOnItemTargets :: (MonadUnliftIO m, MonadThrow m, MonadIO m)
|
|||||||
-> LineSourcesSpecification (ResourceT m)
|
-> LineSourcesSpecification (ResourceT m)
|
||||||
-> m (MetricOutput) -- ^ metric values for the output against the expected output
|
-> m (MetricOutput) -- ^ metric values for the output against the expected output
|
||||||
gevalCoreWithoutInputOnItemTargets expParser outParser itemStep aggregator finalStep generateGraph lsSpec =
|
gevalCoreWithoutInputOnItemTargets expParser outParser itemStep aggregator finalStep generateGraph lsSpec =
|
||||||
gevalCoreGeneralized (ParserSpecWithoutInput expParser outParser) (trans itemStep) aggregator finalStep generateGraph (toWithoutInput lsSpec)
|
gevalCoreGeneralized (ParserSpecWithoutInput expParser outParser) (trans itemStep) aggregator finalStep generateGraph (fromSpecificationToWithoutInput lsSpec)
|
||||||
where
|
where
|
||||||
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
|
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
|
||||||
trans step (ParsedRecordWithoutInput x y) = step (x, y)
|
trans step (ParsedRecordWithoutInput x y) = step (x, y)
|
||||||
@ -958,6 +952,16 @@ defineContinuation aggregator finalStep generateGraph = do
|
|||||||
v <- aggregator
|
v <- aggregator
|
||||||
return $ MetricOutput (SimpleRun $ finalStep v) (generateGraph v)
|
return $ MetricOutput (SimpleRun $ finalStep v) (generateGraph v)
|
||||||
|
|
||||||
|
fromSpecificationToWithoutInput lsSpec = WithoutInput expectedSource outSource
|
||||||
|
where expectedSource = lineSourcesExpectedSource lsSpec
|
||||||
|
outSource = lineSourcesOutputSource lsSpec
|
||||||
|
|
||||||
|
fromSpecificationToWithInput lsSpec = WithInput theFilter inpSource expectedSource outSource
|
||||||
|
where inpSource = lineSourcesInputSource lsSpec
|
||||||
|
expectedSource = lineSourcesExpectedSource lsSpec
|
||||||
|
outSource = lineSourcesOutputSource lsSpec
|
||||||
|
theFilter = lineSourcesFilter lsSpec
|
||||||
|
|
||||||
-- | A type family to handle all the evaluation "context".
|
-- | A type family to handle all the evaluation "context".
|
||||||
--
|
--
|
||||||
-- This is needed as for some metrics the output and the expected metric is enough
|
-- This is needed as for some metrics the output and the expected metric is enough
|
||||||
@ -1001,18 +1005,19 @@ instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (Withou
|
|||||||
checkStepM _ (_, WrappedParsedRecordWithoutInput Done Done) = return Nothing
|
checkStepM _ (_, WrappedParsedRecordWithoutInput Done Done) = return Nothing
|
||||||
|
|
||||||
|
|
||||||
data WithInput m i e o = WithInput (LineSource (ResourceT m)) (LineSource (ResourceT m)) (LineSource (ResourceT m))
|
|
||||||
|
|
||||||
getInputFilePath (WithInput (LineSource _ _ _ inputFilePath _) _ _) = inputFilePath
|
data WithInput m i e o = WithInput Filter (LineSource (ResourceT m)) (LineSource (ResourceT m)) (LineSource (ResourceT m))
|
||||||
|
|
||||||
|
getInputFilePath (WithInput _ (LineSource _ _ _ inputFilePath _) _ _) = inputFilePath
|
||||||
|
|
||||||
instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where
|
instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithInput m i e o) m where
|
||||||
data ParserSpec (WithInput m i e o) = ParserSpecWithInput (ItemTarget -> Either String i) (ItemTarget -> Either String e) (ItemTarget -> Either String o)
|
data ParserSpec (WithInput m i e o) = ParserSpecWithInput (ItemTarget -> Either String i) (ItemTarget -> Either String e) (ItemTarget -> Either String o)
|
||||||
data WrappedParsedRecord (WithInput m i e o) = WrappedParsedRecordWithInput (SourceItem i) (SourceItem e) (SourceItem o)
|
data WrappedParsedRecord (WithInput m i e o) = WrappedParsedRecordWithInput (SourceItem i) (SourceItem e) (SourceItem o)
|
||||||
data ParsedRecord (WithInput m i e o) = ParsedRecordWithInput i e o
|
data ParsedRecord (WithInput m i e o) = ParsedRecordWithInput i e o
|
||||||
getFirstLineNo _ (WithInput _ _ (LineSource _ _ _ _ lineNo)) = lineNo
|
getFirstLineNo _ (WithInput _ _ _ (LineSource _ _ _ _ lineNo)) = lineNo
|
||||||
getExpectedSource (WithInput _ (LineSource _ _ _ expectedSource _) _) = expectedSource
|
getExpectedSource (WithInput _ _ (LineSource _ _ _ expectedSource _) _) = expectedSource
|
||||||
getOutSource (WithInput _ _ (LineSource _ _ _ outSource _)) = outSource
|
getOutSource (WithInput _ _ _ (LineSource _ _ _ outSource _)) = outSource
|
||||||
recordSource (WithInput inputLineSource expectedLineSource outLineSource) (ParserSpecWithInput inpParser expParser outParser) = getZipSource $ (\x (y,z) -> WrappedParsedRecordWithInput x y z)
|
recordSource (WithInput _ inputLineSource expectedLineSource outLineSource) (ParserSpecWithInput inpParser expParser outParser) = getZipSource $ (\x (y,z) -> WrappedParsedRecordWithInput x y z)
|
||||||
<$> ZipSource (items inputLineSource inpParser) <*> (ZipSource $ getZipSource $ (,)
|
<$> ZipSource (items inputLineSource inpParser) <*> (ZipSource $ getZipSource $ (,)
|
||||||
<$> ZipSource (items expectedLineSource expParser)
|
<$> ZipSource (items expectedLineSource expParser)
|
||||||
<*> ZipSource (items outLineSource outParser))
|
<*> ZipSource (items outLineSource outParser))
|
||||||
@ -1036,12 +1041,15 @@ instance (MonadUnliftIO m, MonadIO m, MonadThrow m) => EvaluationContext (WithIn
|
|||||||
checkStepM _ (_, WrappedParsedRecordWithInput (Got _) Done Done) = throw TooManyLinesInInput
|
checkStepM _ (_, WrappedParsedRecordWithInput (Got _) Done Done) = throw TooManyLinesInInput
|
||||||
checkStepM _ (_, WrappedParsedRecordWithInput Done Done Done) = return Nothing
|
checkStepM _ (_, WrappedParsedRecordWithInput Done Done Done) = return Nothing
|
||||||
|
|
||||||
|
|
||||||
threeLineSource :: (MonadUnliftIO m, MonadIO m, MonadThrow m) => WithInput m Text Text Text -> ConduitT () (WrappedParsedRecord (WithInput m Text Text Text)) (ResourceT m) ()
|
threeLineSource :: (MonadUnliftIO m, MonadIO m, MonadThrow m) => WithInput m Text Text Text -> ConduitT () (WrappedParsedRecord (WithInput m Text Text Text)) (ResourceT m) ()
|
||||||
threeLineSource (WithInput inputLineSource expectedLineSource outLineSource) = getZipSource $ (\x (y,z) -> WrappedParsedRecordWithInput x y z)
|
threeLineSource (WithInput theFilter inputLineSource expectedLineSource outLineSource) =
|
||||||
<$> ZipSource (linesAsItems inputLineSource) <*> (ZipSource $ getZipSource $ (,)
|
(getZipSource $ (\x (y,z) -> (x, (y,z)))
|
||||||
|
<$> ZipSource (linesAsItems inputLineSource)
|
||||||
|
<*> (ZipSource $ getZipSource $ (,)
|
||||||
<$> ZipSource (linesAsItems expectedLineSource)
|
<$> ZipSource (linesAsItems expectedLineSource)
|
||||||
<*> ZipSource (linesAsItems outLineSource))
|
<*> ZipSource (linesAsItems outLineSource)))
|
||||||
|
.| (CC.filter (applyFilterToSourceItems theFilter))
|
||||||
|
.| (CC.map (\(x, (y,z)) -> WrappedParsedRecordWithInput x y z))
|
||||||
|
|
||||||
averageC :: MonadResource m => ConduitT Double Void m Double
|
averageC :: MonadResource m => ConduitT Double Void m Double
|
||||||
averageC = getZipSink
|
averageC = getZipSink
|
||||||
@ -1064,3 +1072,7 @@ items (LineSource lineSource itemDecoder preprocess _ _) parser =
|
|||||||
linesAsItems :: MonadResource m => LineSource m -> ConduitT () (SourceItem Text) m ()
|
linesAsItems :: MonadResource m => LineSource m -> ConduitT () (SourceItem Text) m ()
|
||||||
linesAsItems (LineSource lineSource _ _ _ _) =
|
linesAsItems (LineSource lineSource _ _ _ _) =
|
||||||
(lineSource .| CL.map Got) >> yield Done
|
(lineSource .| CL.map Got) >> yield Done
|
||||||
|
|
||||||
|
applyFilterToSourceItems :: Filter -> (SourceItem Text, (SourceItem Text, SourceItem Text)) -> Bool
|
||||||
|
applyFilterToSourceItems filter (Got x, (Got y, Got z)) = applyFilter filter (x, (y, z))
|
||||||
|
applyFilterToSourceItems _ special = True
|
||||||
|
@ -12,14 +12,14 @@ import Data.Conduit.SmartSource
|
|||||||
import Data.Conduit.Header
|
import Data.Conduit.Header
|
||||||
import GEval.Selector
|
import GEval.Selector
|
||||||
|
|
||||||
newtype Filter = Filter (Maybe (Text -> Bool))
|
data Filter = NoFilter | InputFilter (Text -> Bool)
|
||||||
|
|
||||||
noFilter :: Filter
|
noFilter :: Filter
|
||||||
noFilter = Filter Nothing
|
noFilter = NoFilter
|
||||||
|
|
||||||
applyFilter :: Filter -> (Text, (Text, Text)) -> Bool
|
applyFilter :: Filter -> (Text, (Text, Text)) -> Bool
|
||||||
applyFilter (Filter Nothing) _ = True
|
applyFilter NoFilter _ = True
|
||||||
applyFilter (Filter (Just fun)) (inp, (exp, out)) = fun inp
|
applyFilter (InputFilter fun) (inp, (exp, out)) = fun inp
|
||||||
|
|
||||||
-- | This type specifies the way the challenge data (input and
|
-- | This type specifies the way the challenge data (input and
|
||||||
-- expected data, but not outputs) flow into evaluation.
|
-- expected data, but not outputs) flow into evaluation.
|
||||||
|
@ -610,10 +610,11 @@ gevalLineByLineSource metric dataSource =
|
|||||||
(getZipSource $ (,)
|
(getZipSource $ (,)
|
||||||
<$> ZipSource (CL.sourceList [1..])
|
<$> ZipSource (CL.sourceList [1..])
|
||||||
<*> (ZipSource $ threeLineSource context)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes
|
<*> (ZipSource $ threeLineSource context)) .| CL.mapM (checkStepM evaluateLine) .| CL.catMaybes
|
||||||
where context = (WithInput inputLineSource expectedLineSource outputLineSource)
|
where context = fromSpecificationToWithInput lsSpec
|
||||||
inputLineSource = fileAsLineSource inputSource inOptions
|
lsSpec = dataSourceToLineSourcesSpecification dataSource
|
||||||
expectedLineSource = fileAsLineSource expectedSource outOptions
|
inputLineSource = lineSourcesInputSource lsSpec
|
||||||
outputLineSource = fileAsLineSource outSource outOptions
|
expectedLineSource = lineSourcesExpectedSource lsSpec
|
||||||
|
outputLineSource = lineSourcesOutputSource lsSpec
|
||||||
justLine (LineInFile _ _ l) = l
|
justLine (LineInFile _ _ l) = l
|
||||||
evaluateLine (lineNo, ParsedRecordWithInput inp exp out) = do
|
evaluateLine (lineNo, ParsedRecordWithInput inp exp out) = do
|
||||||
s <- liftIO $ gevalCoreOnSingleLines metric preprocess (getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp)
|
s <- liftIO $ gevalCoreOnSingleLines metric preprocess (getDataDecoder inputLineSource) (LineInFile inputSource lineNo inp)
|
||||||
|
Loading…
Reference in New Issue
Block a user