Implement bootstrap in GEval

This commit is contained in:
Filip Gralinski 2020-01-25 23:46:33 +01:00
parent deb14c6702
commit ae2769b7b9
6 changed files with 54 additions and 7 deletions

View File

@ -10,14 +10,14 @@ import Data.Conduit
import qualified Data.Conduit.List as CL import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Combinators as CC import qualified Data.Conduit.Combinators as CC
import Control.Monad.Trans.Resource import Control.Monad.Trans.Resource
import Data.Vector.Unboxed import Data.Vector
import qualified Data.Vector.Generic as VG import qualified Data.Vector.Generic as VG
import Debug.Trace import Debug.Trace
import System.Random (mkStdGen, randomRs) import System.Random (mkStdGen, randomRs)
bootstrapC :: (Show c, Show f, Unbox c, Monad m) => Int -> ConduitT c Void (ResourceT m) f -> ConduitT c Void (ResourceT m) [f] bootstrapC :: Monad m => Int -> ConduitT c Void (ResourceT m) f -> ConduitT c Void (ResourceT m) [f]
bootstrapC numberOfSamples final = do bootstrapC numberOfSamples final = do
l <- CC.sinkList l <- CC.sinkList
let v = fromList l let v = fromList l

View File

@ -100,6 +100,7 @@ import Text.Tokenizer
import GEval.Selector import GEval.Selector
import GEval.Annotation import GEval.Annotation
import GEval.BlackBoxDebugging import GEval.BlackBoxDebugging
import Data.Conduit.Bootstrap
import qualified Data.HashMap.Strict as M import qualified Data.HashMap.Strict as M
import qualified Data.Vector as V import qualified Data.Vector as V
@ -256,6 +257,7 @@ gevalOnSingleOut gevalSpec inputSource expectedSource outSource = do
vals <- Prelude.mapM (\scheme -> gevalCore (evaluationSchemeMetric scheme) vals <- Prelude.mapM (\scheme -> gevalCore (evaluationSchemeMetric scheme)
mSelector mSelector
(preprocess . applyPreprocessingOperations scheme) (preprocess . applyPreprocessingOperations scheme)
(gesBootstrapResampling gevalSpec)
inputSource inputSource
expectedSource expectedSource
outSource) schemes outSource) schemes
@ -412,16 +414,20 @@ singleLineAsLineSource (LineInFile sourceSpec lineNo line) itemDecoder preproces
gevalCore :: Metric -- ^ evaluation metric gevalCore :: Metric -- ^ evaluation metric
-> Maybe Selector -- ^ selector to be used -> Maybe Selector -- ^ selector to be used
-> (Text -> Text) -- ^ preprocessing function (e.g. tokenization) -> (Text -> Text) -- ^ preprocessing function (e.g. tokenization)
-> (Maybe Int) -- ^ number of bootstrap samples
-> SourceSpec -- ^ source specification for the input values -> SourceSpec -- ^ source specification for the input values
-> SourceSpec -- ^ source specification for the expected output -> SourceSpec -- ^ source specification for the expected output
-> SourceSpec -- ^ source specification for the output -> SourceSpec -- ^ source specification for the output
-> IO (MetricOutput) -- ^ metric value for the output against the expected output -> IO (MetricOutput) -- ^ metric value for the output against the expected output
gevalCore metric mSelector preprocess inputSource expectedSource outSource = do gevalCore metric mSelector preprocess mBootstrapResampling inputSource expectedSource outSource = do
whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput
gevalCoreOnSources metric go metric
(fileAsLineSource inputSource mSelector preprocess) (fileAsLineSource inputSource mSelector preprocess)
(fileAsLineSource expectedSource mSelector preprocess) (fileAsLineSource expectedSource mSelector preprocess)
(fileAsLineSource outSource mSelector preprocess) (fileAsLineSource outSource mSelector preprocess)
where go = case mBootstrapResampling of
Nothing -> gevalCoreOnSources
Just bootstrapResampling -> gevalBootstrapOnSources bootstrapResampling
isEmptyFileSource :: SourceSpec -> IO Bool isEmptyFileSource :: SourceSpec -> IO Bool
isEmptyFileSource (FilePathSpec filePath) = isEmptyFile filePath isEmptyFileSource (FilePathSpec filePath) = isEmptyFile filePath
@ -431,6 +437,31 @@ logLossToLikehood logLoss = exp (-logLoss)
data LineInFile = LineInFile SourceSpec Word32 Text data LineInFile = LineInFile SourceSpec Word32 Text
gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
Int -- ^ number of samples
-> Metric -- ^ evaluation metric
-> LineSource (ResourceT m) -- ^ source of the input values
-> LineSource (ResourceT m) -- ^ source to read the expected output
-> LineSource (ResourceT m) -- ^ source to read the output
-> m (MetricOutput) -- ^ metric values for the output against the expected output
gevalBootstrapOnSources numberOfSamples metric inputLineStream expectedLineStream outLineStream = do
case toSing $ toHelper metric of
SomeSing smetric -> gevalRunPipeline parserSpec (trans step) finalPipeline context
where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
context = (WithoutInput expectedLineStream outLineStream)
step = itemStep smetric
expParser = expectedParser smetric
outParser = outputParser smetric
finalPipeline = fixer (bootstrapC numberOfSamples $ continueGEvalCalculations smetric metric)
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
trans step (ParsedRecordWithoutInput x y) = step (x, y)
fixer :: ConduitT c Void (ResourceT m) [MetricOutput] -> ConduitT c Void (ResourceT m) MetricOutput
fixer c = do
outputs <- c
let values = Prelude.map (\(MetricOutput (SimpleRun v) _) -> v) outputs
return $ MetricOutput (BootstrapResampling values) Nothing
-- | Runs evaluation for a given metric using the sources given -- | Runs evaluation for a given metric using the sources given
-- for input, expected output and output. Returns the metric value. -- for input, expected output and output. Returns the metric value.
-- Throws @GEvalException@ if something was wrong in the data (e.g. -- Throws @GEvalException@ if something was wrong in the data (e.g.

View File

@ -116,6 +116,8 @@ main = hspec $ do
runGEvalTest "bleu-empty" `shouldReturnAlmost` 0.0000 runGEvalTest "bleu-empty" `shouldReturnAlmost` 0.0000
it "with tokenization" $ it "with tokenization" $
runGEvalTest "bleu-with-tokenization" `shouldReturnAlmost` 0.6501914150070065 runGEvalTest "bleu-with-tokenization" `shouldReturnAlmost` 0.6501914150070065
it "with bootstrap" $
runGEvalTest "bleu-complex-bootstrap" `shouldReturnAlmost` 0.7061420723046241
describe "GLEU" $ do describe "GLEU" $ do
it "simple example" $ it "simple example" $
runGEvalTest "gleu-simple" `shouldReturnAlmost` 0.462962962962963 runGEvalTest "gleu-simple" `shouldReturnAlmost` 0.462962962962963
@ -751,6 +753,7 @@ testMatchFun _ _ = False
extractVal :: (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricResult])])) -> IO MetricValue extractVal :: (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricResult])])) -> IO MetricValue
extractVal (Right (Just ([(_, (SimpleRun val):_)]))) = return val extractVal (Right (Just ([(_, (SimpleRun val):_)]))) = return val
extractVal (Right (Just ([(_, (BootstrapResampling vals):_)]))) = return (sum vals / fromIntegral (Prelude.length vals))
extractVal (Right Nothing) = return $ error "no metrics???" extractVal (Right Nothing) = return $ error "no metrics???"
extractVal (Right (Just [])) = return $ error "emtpy metric list???" extractVal (Right (Just [])) = return $ error "emtpy metric list???"
extractVal (Left result) = do extractVal (Left result) = do

View File

@ -0,0 +1,6 @@
Ala has a white cat
It is a trap
All your base belong to us
bar
expected result
thrash
1 Ala has a white cat
2 It is a trap
3 All your base belong to us
4 bar
5 expected result
6 thrash

View File

@ -0,0 +1 @@
--metric BLEU --bootstrap-resampling 100

View File

@ -0,0 +1,6 @@
Alice has a white cat Ala has a whitecat
It is a trap
All your base are belong to us
foo bar baz
the expected result result expected
foo bar foo bar baz baq
Can't render this file because it has a wrong number of fields in line 2.