Implement bootstrap in GEval

This commit is contained in:
Filip Gralinski 2020-01-25 23:46:33 +01:00
parent deb14c6702
commit ae2769b7b9
6 changed files with 54 additions and 7 deletions

View File

@ -10,14 +10,14 @@ import Data.Conduit
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Combinators as CC
import Control.Monad.Trans.Resource
import Data.Vector.Unboxed
import Data.Vector
import qualified Data.Vector.Generic as VG
import Debug.Trace
import System.Random (mkStdGen, randomRs)
bootstrapC :: (Show c, Show f, Unbox c, Monad m) => Int -> ConduitT c Void (ResourceT m) f -> ConduitT c Void (ResourceT m) [f]
bootstrapC :: Monad m => Int -> ConduitT c Void (ResourceT m) f -> ConduitT c Void (ResourceT m) [f]
bootstrapC numberOfSamples final = do
l <- CC.sinkList
let v = fromList l

View File

@ -100,6 +100,7 @@ import Text.Tokenizer
import GEval.Selector
import GEval.Annotation
import GEval.BlackBoxDebugging
import Data.Conduit.Bootstrap
import qualified Data.HashMap.Strict as M
import qualified Data.Vector as V
@ -256,6 +257,7 @@ gevalOnSingleOut gevalSpec inputSource expectedSource outSource = do
vals <- Prelude.mapM (\scheme -> gevalCore (evaluationSchemeMetric scheme)
mSelector
(preprocess . applyPreprocessingOperations scheme)
(gesBootstrapResampling gevalSpec)
inputSource
expectedSource
outSource) schemes
@ -412,16 +414,20 @@ singleLineAsLineSource (LineInFile sourceSpec lineNo line) itemDecoder preproces
gevalCore :: Metric -- ^ evaluation metric
-> Maybe Selector -- ^ selector to be used
-> (Text -> Text) -- ^ preprocessing function (e.g. tokenization)
-> (Maybe Int) -- ^ number of bootstrap samples
-> SourceSpec -- ^ source specification for the input values
-> SourceSpec -- ^ source specification for the expected output
-> SourceSpec -- ^ source specification for the output
-> IO (MetricOutput) -- ^ metric value for the output against the expected output
gevalCore metric mSelector preprocess inputSource expectedSource outSource = do
gevalCore metric mSelector preprocess mBootstrapResampling inputSource expectedSource outSource = do
whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput
gevalCoreOnSources metric
(fileAsLineSource inputSource mSelector preprocess)
(fileAsLineSource expectedSource mSelector preprocess)
(fileAsLineSource outSource mSelector preprocess)
go metric
(fileAsLineSource inputSource mSelector preprocess)
(fileAsLineSource expectedSource mSelector preprocess)
(fileAsLineSource outSource mSelector preprocess)
where go = case mBootstrapResampling of
Nothing -> gevalCoreOnSources
Just bootstrapResampling -> gevalBootstrapOnSources bootstrapResampling
isEmptyFileSource :: SourceSpec -> IO Bool
isEmptyFileSource (FilePathSpec filePath) = isEmptyFile filePath
@ -431,6 +437,31 @@ logLossToLikehood logLoss = exp (-logLoss)
data LineInFile = LineInFile SourceSpec Word32 Text
gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
Int -- ^ number of samples
-> Metric -- ^ evaluation metric
-> LineSource (ResourceT m) -- ^ source of the input values
-> LineSource (ResourceT m) -- ^ source to read the expected output
-> LineSource (ResourceT m) -- ^ source to read the output
-> m (MetricOutput) -- ^ metric values for the output against the expected output
gevalBootstrapOnSources numberOfSamples metric inputLineStream expectedLineStream outLineStream = do
case toSing $ toHelper metric of
SomeSing smetric -> gevalRunPipeline parserSpec (trans step) finalPipeline context
where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
context = (WithoutInput expectedLineStream outLineStream)
step = itemStep smetric
expParser = expectedParser smetric
outParser = outputParser smetric
finalPipeline = fixer (bootstrapC numberOfSamples $ continueGEvalCalculations smetric metric)
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
trans step (ParsedRecordWithoutInput x y) = step (x, y)
fixer :: ConduitT c Void (ResourceT m) [MetricOutput] -> ConduitT c Void (ResourceT m) MetricOutput
fixer c = do
outputs <- c
let values = Prelude.map (\(MetricOutput (SimpleRun v) _) -> v) outputs
return $ MetricOutput (BootstrapResampling values) Nothing
-- | Runs evaluation for a given metric using the sources given
-- for input, expected output and output. Returns the metric value.
-- Throws @GEvalException@ if something was wrong in the data (e.g.

View File

@ -116,6 +116,8 @@ main = hspec $ do
runGEvalTest "bleu-empty" `shouldReturnAlmost` 0.0000
it "with tokenization" $
runGEvalTest "bleu-with-tokenization" `shouldReturnAlmost` 0.6501914150070065
it "with bootstrap" $
runGEvalTest "bleu-complex-bootstrap" `shouldReturnAlmost` 0.7061420723046241
describe "GLEU" $ do
it "simple example" $
runGEvalTest "gleu-simple" `shouldReturnAlmost` 0.462962962962963
@ -751,6 +753,7 @@ testMatchFun _ _ = False
extractVal :: (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricResult])])) -> IO MetricValue
extractVal (Right (Just ([(_, (SimpleRun val):_)]))) = return val
extractVal (Right (Just ([(_, (BootstrapResampling vals):_)]))) = return (sum vals / fromIntegral (Prelude.length vals))
extractVal (Right Nothing) = return $ error "no metrics???"
extractVal (Right (Just [])) = return $ error "emtpy metric list???"
extractVal (Left result) = do

View File

@ -0,0 +1,6 @@
Ala has a white cat
It is a trap
All your base belong to us
bar
expected result
thrash
1 Ala has a white cat
2 It is a trap
3 All your base belong to us
4 bar
5 expected result
6 thrash

View File

@ -0,0 +1 @@
--metric BLEU --bootstrap-resampling 100

View File

@ -0,0 +1,6 @@
Alice has a white cat Ala has a whitecat
It is a trap
All your base are belong to us
foo bar baz
the expected result result expected
foo bar foo bar baz baq
Can't render this file because it has a wrong number of fields in line 2.