Introduce DataSource
This commit is contained in:
parent
8f3550493c
commit
454f60812c
@ -54,6 +54,7 @@ library
|
|||||||
, Data.Conduit.Bootstrap
|
, Data.Conduit.Bootstrap
|
||||||
, GEval.Formatting
|
, GEval.Formatting
|
||||||
, Data.Conduit.Header
|
, Data.Conduit.Header
|
||||||
|
, GEval.DataSource
|
||||||
, Paths_geval
|
, Paths_geval
|
||||||
build-depends: base >= 4.7 && < 5
|
build-depends: base >= 4.7 && < 5
|
||||||
, cond
|
, cond
|
||||||
|
@ -106,6 +106,7 @@ import GEval.Selector
|
|||||||
import GEval.Annotation
|
import GEval.Annotation
|
||||||
import GEval.BlackBoxDebugging
|
import GEval.BlackBoxDebugging
|
||||||
import Data.Conduit.Bootstrap
|
import Data.Conduit.Bootstrap
|
||||||
|
import GEval.DataSource
|
||||||
|
|
||||||
import qualified Data.HashMap.Strict as M
|
import qualified Data.HashMap.Strict as M
|
||||||
import qualified Data.Vector as V
|
import qualified Data.Vector as V
|
||||||
@ -265,31 +266,44 @@ data LineSource m = LineSource (ConduitT () Text m ()) (Text -> ItemTarget) (Tex
|
|||||||
|
|
||||||
geval :: GEvalSpecification -> IO [(SourceSpec, [MetricOutput])]
|
geval :: GEvalSpecification -> IO [(SourceSpec, [MetricOutput])]
|
||||||
geval gevalSpec = do
|
geval gevalSpec = do
|
||||||
|
mInHeader <- readHeaderFileWrapper $ getInHeader gevalSpec
|
||||||
|
mOutHeader <- readHeaderFileWrapper $ getOutHeader gevalSpec
|
||||||
(inputSource, expectedSource, outSources) <- checkAndGetFiles False gevalSpec
|
(inputSource, expectedSource, outSources) <- checkAndGetFiles False gevalSpec
|
||||||
results <- Prelude.mapM (gevalOnSingleOut gevalSpec inputSource expectedSource) outSources
|
let chDataSource = ChallengeDataSource {
|
||||||
|
challengeDataSourceInput = inputSource,
|
||||||
|
challengeDataSourceExpected = expectedSource,
|
||||||
|
challengeDataSourceSelector = gesSelector gevalSpec,
|
||||||
|
challengeDataSourcePreprocess = gesPreprocess gevalSpec,
|
||||||
|
challengeDataSourceFilter = Nothing,
|
||||||
|
challengeDataSourceInHeader = mInHeader,
|
||||||
|
challengeDataSourceOutHeader = mOutHeader }
|
||||||
|
|
||||||
|
results <- Prelude.mapM (\outSource -> gevalOnSingleOut gevalSpec
|
||||||
|
DataSource {
|
||||||
|
dataSourceChallengeData = chDataSource,
|
||||||
|
dataSourceOut = outSource }) outSources
|
||||||
return $ sortBy (\a b -> (show $ fst a) `naturalComp` (show $ fst b)) results
|
return $ sortBy (\a b -> (show $ fst a) `naturalComp` (show $ fst b)) results
|
||||||
|
|
||||||
noGraph :: d -> Maybe GraphSeries
|
noGraph :: d -> Maybe GraphSeries
|
||||||
noGraph = const Nothing
|
noGraph = const Nothing
|
||||||
|
|
||||||
gevalOnSingleOut :: GEvalSpecification -> SourceSpec -> SourceSpec -> SourceSpec -> IO (SourceSpec, [MetricOutput])
|
gevalOnSingleOut :: GEvalSpecification -> DataSource -> IO (SourceSpec, [MetricOutput])
|
||||||
gevalOnSingleOut gevalSpec inputSource expectedSource outSource = do
|
gevalOnSingleOut gevalSpec dataSource = do
|
||||||
mInHeader <- readHeaderFileWrapper $ getInHeader gevalSpec
|
vals <- Prelude.mapM (\scheme ->
|
||||||
mOutHeader <- readHeaderFileWrapper $ getOutHeader gevalSpec
|
gevalCore (evaluationSchemeMetric scheme)
|
||||||
vals <- Prelude.mapM (\scheme -> gevalCore (evaluationSchemeMetric scheme)
|
(gesBootstrapResampling gevalSpec)
|
||||||
mSelector
|
(addPreprocessing (applyPreprocessingOperations scheme) dataSource))
|
||||||
(preprocess . applyPreprocessingOperations scheme)
|
schemes
|
||||||
mInHeader
|
|
||||||
mOutHeader
|
|
||||||
(gesBootstrapResampling gevalSpec)
|
|
||||||
inputSource
|
|
||||||
expectedSource
|
|
||||||
outSource) schemes
|
|
||||||
return (outSource, vals)
|
return (outSource, vals)
|
||||||
where schemes = gesMetrics gevalSpec
|
where outSource = dataSourceOut dataSource
|
||||||
preprocess = gesPreprocess gevalSpec
|
schemes = gesMetrics gevalSpec
|
||||||
mSelector = gesSelector gevalSpec
|
|
||||||
|
|
||||||
|
addPreprocessing :: (Text -> Text) -> DataSource -> DataSource
|
||||||
|
addPreprocessing prep dataSource =
|
||||||
|
dataSource {
|
||||||
|
dataSourceChallengeData = (dataSourceChallengeData dataSource) {
|
||||||
|
challengeDataSourcePreprocess =
|
||||||
|
(challengeDataSourcePreprocess $ dataSourceChallengeData dataSource) . prep }}
|
||||||
|
|
||||||
readHeaderFileWrapper :: Maybe FilePath -> IO (Maybe TabularHeader)
|
readHeaderFileWrapper :: Maybe FilePath -> IO (Maybe TabularHeader)
|
||||||
readHeaderFileWrapper Nothing = return Nothing
|
readHeaderFileWrapper Nothing = return Nothing
|
||||||
@ -468,16 +482,10 @@ handleBootstrap _ = True
|
|||||||
-- Throws @GEvalException@ if something was wrong in the data (e.g.
|
-- Throws @GEvalException@ if something was wrong in the data (e.g.
|
||||||
-- inconsistent number of lines in the sources).
|
-- inconsistent number of lines in the sources).
|
||||||
gevalCore :: Metric -- ^ evaluation metric
|
gevalCore :: Metric -- ^ evaluation metric
|
||||||
-> Maybe Selector -- ^ selector to be used
|
|
||||||
-> (Text -> Text) -- ^ preprocessing function (e.g. tokenization)
|
|
||||||
-> (Maybe TabularHeader) -- ^ header for input
|
|
||||||
-> (Maybe TabularHeader) -- ^ header for output/expected files
|
|
||||||
-> (Maybe Int) -- ^ number of bootstrap samples
|
-> (Maybe Int) -- ^ number of bootstrap samples
|
||||||
-> SourceSpec -- ^ source specification for the input values
|
-> DataSource
|
||||||
-> SourceSpec -- ^ source specification for the expected output
|
|
||||||
-> SourceSpec -- ^ source specification for the output
|
|
||||||
-> IO (MetricOutput) -- ^ metric value for the output against the expected output
|
-> IO (MetricOutput) -- ^ metric value for the output against the expected output
|
||||||
gevalCore metric mSelector preprocess mInHeader mOutHeader mBootstrapResampling inputSource expectedSource outSource = do
|
gevalCore metric mBootstrapResampling dataSource = do
|
||||||
whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput
|
whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput
|
||||||
go metric
|
go metric
|
||||||
(fileAsLineSource inputSource inOptions)
|
(fileAsLineSource inputSource inOptions)
|
||||||
@ -496,6 +504,16 @@ gevalCore metric mSelector preprocess mInHeader mOutHeader mBootstrapResampling
|
|||||||
fileProcessingOptionsSelector = mSelector,
|
fileProcessingOptionsSelector = mSelector,
|
||||||
fileProcessingOptionsPreprocess = preprocess,
|
fileProcessingOptionsPreprocess = preprocess,
|
||||||
fileProcessingOptionsHeader = mInHeader }
|
fileProcessingOptionsHeader = mInHeader }
|
||||||
|
challengeDataSource = dataSourceChallengeData dataSource
|
||||||
|
mSelector = challengeDataSourceSelector challengeDataSource
|
||||||
|
preprocess = challengeDataSourcePreprocess challengeDataSource
|
||||||
|
mInHeader = challengeDataSourceInHeader challengeDataSource
|
||||||
|
mOutHeader = challengeDataSourceOutHeader challengeDataSource
|
||||||
|
inputSource = challengeDataSourceInput challengeDataSource
|
||||||
|
expectedSource = challengeDataSourceExpected challengeDataSource
|
||||||
|
outSource = dataSourceOut dataSource
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
isEmptyFileSource :: SourceSpec -> IO Bool
|
isEmptyFileSource :: SourceSpec -> IO Bool
|
||||||
isEmptyFileSource (FilePathSpec filePath) = isEmptyFile filePath
|
isEmptyFileSource (FilePathSpec filePath) = isEmptyFile filePath
|
||||||
|
29
src/GEval/DataSource.hs
Normal file
29
src/GEval/DataSource.hs
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
module GEval.DataSource
|
||||||
|
(ChallengeDataSource(..),
|
||||||
|
DataSource(..))
|
||||||
|
where
|
||||||
|
|
||||||
|
import Data.Text
|
||||||
|
|
||||||
|
import Data.Conduit.SmartSource
|
||||||
|
import Data.Conduit.Header
|
||||||
|
import GEval.Selector
|
||||||
|
|
||||||
|
-- | This type specifies the way the challenge data (input and
|
||||||
|
-- expected data, but not outputs) flow into evaluation.
|
||||||
|
--
|
||||||
|
-- At some point, it is turned into conduit for reading data.
|
||||||
|
data ChallengeDataSource = ChallengeDataSource {
|
||||||
|
challengeDataSourceInput :: SourceSpec,
|
||||||
|
challengeDataSourceExpected :: SourceSpec,
|
||||||
|
challengeDataSourceSelector :: Maybe Selector,
|
||||||
|
challengeDataSourcePreprocess :: Text -> Text,
|
||||||
|
challengeDataSourceFilter :: Maybe (Text -> Bool),
|
||||||
|
challengeDataSourceInHeader :: Maybe TabularHeader,
|
||||||
|
challengeDataSourceOutHeader :: Maybe TabularHeader }
|
||||||
|
|
||||||
|
-- | This type specifies all the data flowing into evaluation,
|
||||||
|
-- including the output data to be evaluated.
|
||||||
|
data DataSource = DataSource {
|
||||||
|
dataSourceChallengeData :: ChallengeDataSource,
|
||||||
|
dataSourceOut :: SourceSpec }
|
Loading…
Reference in New Issue
Block a user