From ae2769b7b92de5b9715bb0aaf23e0bedf1464883 Mon Sep 17 00:00:00 2001
From: Filip Gralinski <filipg@amu.edu.pl>
Date: Sat, 25 Jan 2020 23:46:33 +0100
Subject: [PATCH] Implement bootstrap in GEval

---
 src/Data/Conduit/Bootstrap.hs                 |  4 +-
 src/GEval/Core.hs                             | 41 ++++++++++++++++---
 test/Spec.hs                                  |  3 ++
 .../test-A/out.tsv                            |  6 +++
 .../bleu-complex-bootstrap/config.txt         |  1 +
 .../test-A/expected.tsv                       |  6 +++
 6 files changed, 54 insertions(+), 7 deletions(-)
 create mode 100644 test/bleu-complex-bootstrap/bleu-complex-bootstrap-solution/test-A/out.tsv
 create mode 100644 test/bleu-complex-bootstrap/bleu-complex-bootstrap/config.txt
 create mode 100644 test/bleu-complex-bootstrap/bleu-complex-bootstrap/test-A/expected.tsv

diff --git a/src/Data/Conduit/Bootstrap.hs b/src/Data/Conduit/Bootstrap.hs
index bc2ebf5..229fd78 100644
--- a/src/Data/Conduit/Bootstrap.hs
+++ b/src/Data/Conduit/Bootstrap.hs
@@ -10,14 +10,14 @@ import Data.Conduit
 import qualified Data.Conduit.List as CL
 import qualified Data.Conduit.Combinators as CC
 import Control.Monad.Trans.Resource
-import Data.Vector.Unboxed
+import Data.Vector
 import qualified Data.Vector.Generic as VG
 
 import Debug.Trace
 
 import System.Random (mkStdGen, randomRs)
 
-bootstrapC :: (Show c, Show f, Unbox c, Monad m) => Int -> ConduitT c Void (ResourceT m) f -> ConduitT c Void (ResourceT m) [f]
+bootstrapC :: Monad m => Int -> ConduitT c Void (ResourceT m) f -> ConduitT c Void (ResourceT m) [f]
 bootstrapC numberOfSamples final = do
   l <- CC.sinkList
   let v = fromList l
diff --git a/src/GEval/Core.hs b/src/GEval/Core.hs
index 0ca1489..1fde07d 100644
--- a/src/GEval/Core.hs
+++ b/src/GEval/Core.hs
@@ -100,6 +100,7 @@ import Text.Tokenizer
 import GEval.Selector
 import GEval.Annotation
 import GEval.BlackBoxDebugging
+import Data.Conduit.Bootstrap
 
 import qualified Data.HashMap.Strict as M
 import qualified Data.Vector as V
@@ -256,6 +257,7 @@ gevalOnSingleOut gevalSpec inputSource expectedSource outSource = do
   vals <- Prelude.mapM (\scheme -> gevalCore (evaluationSchemeMetric scheme)
                                            mSelector
                                            (preprocess . applyPreprocessingOperations scheme)
+                                           (gesBootstrapResampling gevalSpec)
                                            inputSource
                                            expectedSource
                                            outSource) schemes
@@ -412,16 +414,20 @@ singleLineAsLineSource (LineInFile sourceSpec lineNo line) itemDecoder preproces
 gevalCore :: Metric           -- ^ evaluation metric
           -> Maybe Selector   -- ^ selector to be used
           -> (Text -> Text)    -- ^ preprocessing function (e.g. tokenization)
+          -> (Maybe Int)      -- ^ number of bootstrap samples
           -> SourceSpec       -- ^ source specification for the input values
           -> SourceSpec       -- ^ source specification for the expected output
           -> SourceSpec       -- ^ source specification for the output
           -> IO (MetricOutput) -- ^ metric value for the output against the expected output
-gevalCore metric mSelector preprocess inputSource expectedSource outSource = do
+gevalCore metric mSelector preprocess mBootstrapResampling inputSource expectedSource outSource = do
   whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput
-  gevalCoreOnSources metric
-                     (fileAsLineSource inputSource mSelector preprocess)
-                     (fileAsLineSource expectedSource mSelector preprocess)
-                     (fileAsLineSource outSource mSelector preprocess)
+  go metric
+     (fileAsLineSource inputSource mSelector preprocess)
+     (fileAsLineSource expectedSource mSelector preprocess)
+     (fileAsLineSource outSource mSelector preprocess)
+  where go = case mBootstrapResampling of
+               Nothing -> gevalCoreOnSources
+               Just bootstrapResampling -> gevalBootstrapOnSources bootstrapResampling
 
 isEmptyFileSource :: SourceSpec -> IO Bool
 isEmptyFileSource (FilePathSpec filePath) = isEmptyFile filePath
@@ -431,6 +437,31 @@ logLossToLikehood logLoss = exp (-logLoss)
 
 data LineInFile = LineInFile SourceSpec Word32 Text
 
+gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
+                          Int                         -- ^ number of samples
+                          -> Metric                    -- ^ evaluation metric
+                          -> LineSource (ResourceT m)  -- ^ source of the input values
+                          -> LineSource (ResourceT m)  -- ^ source to read the expected output
+                          -> LineSource (ResourceT m)  -- ^ source to read the output
+                          -> m (MetricOutput)           -- ^ metric values for the output against the expected output
+gevalBootstrapOnSources numberOfSamples metric inputLineStream expectedLineStream outLineStream = do
+    case toSing $ toHelper metric of
+                SomeSing smetric -> gevalRunPipeline parserSpec (trans step) finalPipeline context
+                                     where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
+                                           context = (WithoutInput expectedLineStream outLineStream)
+                                           step = itemStep smetric
+                                           expParser = expectedParser smetric
+                                           outParser = outputParser smetric
+                                           finalPipeline = fixer (bootstrapC numberOfSamples $ continueGEvalCalculations smetric metric)
+                                           trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
+                                           trans step (ParsedRecordWithoutInput x y) = step (x, y)
+
+fixer :: ConduitT c Void (ResourceT m) [MetricOutput] -> ConduitT c Void (ResourceT m) MetricOutput
+fixer c = do
+  outputs <- c
+  let values = Prelude.map (\(MetricOutput (SimpleRun v) _) -> v) outputs
+  return $ MetricOutput (BootstrapResampling values) Nothing
+
 -- | Runs evaluation for a given metric using the sources given
 -- for input, expected output and output. Returns the metric value.
 -- Throws @GEvalException@ if something was wrong in the data (e.g.
diff --git a/test/Spec.hs b/test/Spec.hs
index c7e7b4a..adca65f 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -116,6 +116,8 @@ main = hspec $ do
       runGEvalTest "bleu-empty" `shouldReturnAlmost` 0.0000
     it "with tokenization" $
       runGEvalTest "bleu-with-tokenization" `shouldReturnAlmost` 0.6501914150070065
+    it "with bootstrap" $
+      runGEvalTest "bleu-complex-bootstrap" `shouldReturnAlmost` 0.7061420723046241
   describe "GLEU" $ do
     it "simple example" $
       runGEvalTest "gleu-simple" `shouldReturnAlmost` 0.462962962962963
@@ -751,6 +753,7 @@ testMatchFun _ _ = False
 
 extractVal :: (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricResult])])) -> IO MetricValue
 extractVal (Right (Just ([(_, (SimpleRun val):_)]))) = return val
+extractVal (Right (Just ([(_, (BootstrapResampling vals):_)]))) = return (sum vals / fromIntegral (Prelude.length vals))
 extractVal (Right Nothing) = return $ error "no metrics???"
 extractVal (Right (Just [])) = return $ error "emtpy metric list???"
 extractVal (Left result) = do
diff --git a/test/bleu-complex-bootstrap/bleu-complex-bootstrap-solution/test-A/out.tsv b/test/bleu-complex-bootstrap/bleu-complex-bootstrap-solution/test-A/out.tsv
new file mode 100644
index 0000000..63441d0
--- /dev/null
+++ b/test/bleu-complex-bootstrap/bleu-complex-bootstrap-solution/test-A/out.tsv
@@ -0,0 +1,6 @@
+Ala has a white cat
+It is a trap
+All your base belong to us
+bar
+expected result
+thrash
diff --git a/test/bleu-complex-bootstrap/bleu-complex-bootstrap/config.txt b/test/bleu-complex-bootstrap/bleu-complex-bootstrap/config.txt
new file mode 100644
index 0000000..b1ffebd
--- /dev/null
+++ b/test/bleu-complex-bootstrap/bleu-complex-bootstrap/config.txt
@@ -0,0 +1 @@
+--metric BLEU --bootstrap-resampling 100
diff --git a/test/bleu-complex-bootstrap/bleu-complex-bootstrap/test-A/expected.tsv b/test/bleu-complex-bootstrap/bleu-complex-bootstrap/test-A/expected.tsv
new file mode 100644
index 0000000..112a43a
--- /dev/null
+++ b/test/bleu-complex-bootstrap/bleu-complex-bootstrap/test-A/expected.tsv
@@ -0,0 +1,6 @@
+Alice has a white cat	Ala has a whitecat
+It is a trap
+All your base are belong to us
+foo	bar	baz
+the expected result	result expected
+foo bar	foo bar baz baq