From ae2769b7b92de5b9715bb0aaf23e0bedf1464883 Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Sat, 25 Jan 2020 23:46:33 +0100 Subject: [PATCH] Implement bootstrap in GEval --- src/Data/Conduit/Bootstrap.hs | 4 +- src/GEval/Core.hs | 41 ++++++++++++++++--- test/Spec.hs | 3 ++ .../test-A/out.tsv | 6 +++ .../bleu-complex-bootstrap/config.txt | 1 + .../test-A/expected.tsv | 6 +++ 6 files changed, 54 insertions(+), 7 deletions(-) create mode 100644 test/bleu-complex-bootstrap/bleu-complex-bootstrap-solution/test-A/out.tsv create mode 100644 test/bleu-complex-bootstrap/bleu-complex-bootstrap/config.txt create mode 100644 test/bleu-complex-bootstrap/bleu-complex-bootstrap/test-A/expected.tsv diff --git a/src/Data/Conduit/Bootstrap.hs b/src/Data/Conduit/Bootstrap.hs index bc2ebf5..229fd78 100644 --- a/src/Data/Conduit/Bootstrap.hs +++ b/src/Data/Conduit/Bootstrap.hs @@ -10,14 +10,14 @@ import Data.Conduit import qualified Data.Conduit.List as CL import qualified Data.Conduit.Combinators as CC import Control.Monad.Trans.Resource -import Data.Vector.Unboxed +import Data.Vector import qualified Data.Vector.Generic as VG import Debug.Trace import System.Random (mkStdGen, randomRs) -bootstrapC :: (Show c, Show f, Unbox c, Monad m) => Int -> ConduitT c Void (ResourceT m) f -> ConduitT c Void (ResourceT m) [f] +bootstrapC :: Monad m => Int -> ConduitT c Void (ResourceT m) f -> ConduitT c Void (ResourceT m) [f] bootstrapC numberOfSamples final = do l <- CC.sinkList let v = fromList l diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs index 0ca1489..1fde07d 100644 --- a/src/GEval/Core.hs +++ b/src/GEval/Core.hs @@ -100,6 +100,7 @@ import Text.Tokenizer import GEval.Selector import GEval.Annotation import GEval.BlackBoxDebugging +import Data.Conduit.Bootstrap import qualified Data.HashMap.Strict as M import qualified Data.Vector as V @@ -256,6 +257,7 @@ gevalOnSingleOut gevalSpec inputSource expectedSource outSource = do vals <- Prelude.mapM (\scheme -> gevalCore (evaluationSchemeMetric scheme) mSelector (preprocess . applyPreprocessingOperations scheme) + (gesBootstrapResampling gevalSpec) inputSource expectedSource outSource) schemes @@ -412,16 +414,20 @@ singleLineAsLineSource (LineInFile sourceSpec lineNo line) itemDecoder preproces gevalCore :: Metric -- ^ evaluation metric -> Maybe Selector -- ^ selector to be used -> (Text -> Text) -- ^ preprocessing function (e.g. tokenization) + -> (Maybe Int) -- ^ number of bootstrap samples -> SourceSpec -- ^ source specification for the input values -> SourceSpec -- ^ source specification for the expected output -> SourceSpec -- ^ source specification for the output -> IO (MetricOutput) -- ^ metric value for the output against the expected output -gevalCore metric mSelector preprocess inputSource expectedSource outSource = do +gevalCore metric mSelector preprocess mBootstrapResampling inputSource expectedSource outSource = do whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput - gevalCoreOnSources metric - (fileAsLineSource inputSource mSelector preprocess) - (fileAsLineSource expectedSource mSelector preprocess) - (fileAsLineSource outSource mSelector preprocess) + go metric + (fileAsLineSource inputSource mSelector preprocess) + (fileAsLineSource expectedSource mSelector preprocess) + (fileAsLineSource outSource mSelector preprocess) + where go = case mBootstrapResampling of + Nothing -> gevalCoreOnSources + Just bootstrapResampling -> gevalBootstrapOnSources bootstrapResampling isEmptyFileSource :: SourceSpec -> IO Bool isEmptyFileSource (FilePathSpec filePath) = isEmptyFile filePath @@ -431,6 +437,31 @@ logLossToLikehood logLoss = exp (-logLoss) data LineInFile = LineInFile SourceSpec Word32 Text +gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) => + Int -- ^ number of samples + -> Metric -- ^ evaluation metric + -> LineSource (ResourceT m) -- ^ source of the input values + -> LineSource (ResourceT m) -- ^ source to read the expected output + -> LineSource (ResourceT m) -- ^ source to read the output + -> m (MetricOutput) -- ^ metric values for the output against the expected output +gevalBootstrapOnSources numberOfSamples metric inputLineStream expectedLineStream outLineStream = do + case toSing $ toHelper metric of + SomeSing smetric -> gevalRunPipeline parserSpec (trans step) finalPipeline context + where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser)) + context = (WithoutInput expectedLineStream outLineStream) + step = itemStep smetric + expParser = expectedParser smetric + outParser = outputParser smetric + finalPipeline = fixer (bootstrapC numberOfSamples $ continueGEvalCalculations smetric metric) + trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c + trans step (ParsedRecordWithoutInput x y) = step (x, y) + +fixer :: ConduitT c Void (ResourceT m) [MetricOutput] -> ConduitT c Void (ResourceT m) MetricOutput +fixer c = do + outputs <- c + let values = Prelude.map (\(MetricOutput (SimpleRun v) _) -> v) outputs + return $ MetricOutput (BootstrapResampling values) Nothing + -- | Runs evaluation for a given metric using the sources given -- for input, expected output and output. Returns the metric value. -- Throws @GEvalException@ if something was wrong in the data (e.g. diff --git a/test/Spec.hs b/test/Spec.hs index c7e7b4a..adca65f 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -116,6 +116,8 @@ main = hspec $ do runGEvalTest "bleu-empty" `shouldReturnAlmost` 0.0000 it "with tokenization" $ runGEvalTest "bleu-with-tokenization" `shouldReturnAlmost` 0.6501914150070065 + it "with bootstrap" $ + runGEvalTest "bleu-complex-bootstrap" `shouldReturnAlmost` 0.7061420723046241 describe "GLEU" $ do it "simple example" $ runGEvalTest "gleu-simple" `shouldReturnAlmost` 0.462962962962963 @@ -751,6 +753,7 @@ testMatchFun _ _ = False extractVal :: (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricResult])])) -> IO MetricValue extractVal (Right (Just ([(_, (SimpleRun val):_)]))) = return val +extractVal (Right (Just ([(_, (BootstrapResampling vals):_)]))) = return (sum vals / fromIntegral (Prelude.length vals)) extractVal (Right Nothing) = return $ error "no metrics???" extractVal (Right (Just [])) = return $ error "emtpy metric list???" extractVal (Left result) = do diff --git a/test/bleu-complex-bootstrap/bleu-complex-bootstrap-solution/test-A/out.tsv b/test/bleu-complex-bootstrap/bleu-complex-bootstrap-solution/test-A/out.tsv new file mode 100644 index 0000000..63441d0 --- /dev/null +++ b/test/bleu-complex-bootstrap/bleu-complex-bootstrap-solution/test-A/out.tsv @@ -0,0 +1,6 @@ +Ala has a white cat +It is a trap +All your base belong to us +bar +expected result +thrash diff --git a/test/bleu-complex-bootstrap/bleu-complex-bootstrap/config.txt b/test/bleu-complex-bootstrap/bleu-complex-bootstrap/config.txt new file mode 100644 index 0000000..b1ffebd --- /dev/null +++ b/test/bleu-complex-bootstrap/bleu-complex-bootstrap/config.txt @@ -0,0 +1 @@ +--metric BLEU --bootstrap-resampling 100 diff --git a/test/bleu-complex-bootstrap/bleu-complex-bootstrap/test-A/expected.tsv b/test/bleu-complex-bootstrap/bleu-complex-bootstrap/test-A/expected.tsv new file mode 100644 index 0000000..112a43a --- /dev/null +++ b/test/bleu-complex-bootstrap/bleu-complex-bootstrap/test-A/expected.tsv @@ -0,0 +1,6 @@ +Alice has a white cat Ala has a whitecat +It is a trap +All your base are belong to us +foo bar baz +the expected result result expected +foo bar foo bar baz baq