Implement bootstrap in GEval

This commit is contained in:
Filip Gralinski 2020-01-25 23:46:33 +01:00
parent deb14c6702
commit ae2769b7b9
6 changed files with 54 additions and 7 deletions

View File

@ -10,14 +10,14 @@ import Data.Conduit
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Combinators as CC
import Control.Monad.Trans.Resource
import Data.Vector.Unboxed
import Data.Vector
import qualified Data.Vector.Generic as VG
import Debug.Trace
import System.Random (mkStdGen, randomRs)
bootstrapC :: (Show c, Show f, Unbox c, Monad m) => Int -> ConduitT c Void (ResourceT m) f -> ConduitT c Void (ResourceT m) [f]
bootstrapC :: Monad m => Int -> ConduitT c Void (ResourceT m) f -> ConduitT c Void (ResourceT m) [f]
bootstrapC numberOfSamples final = do
l <- CC.sinkList
let v = fromList l

View File

@ -100,6 +100,7 @@ import Text.Tokenizer
import GEval.Selector
import GEval.Annotation
import GEval.BlackBoxDebugging
import Data.Conduit.Bootstrap
import qualified Data.HashMap.Strict as M
import qualified Data.Vector as V
@ -256,6 +257,7 @@ gevalOnSingleOut gevalSpec inputSource expectedSource outSource = do
vals <- Prelude.mapM (\scheme -> gevalCore (evaluationSchemeMetric scheme)
mSelector
(preprocess . applyPreprocessingOperations scheme)
(gesBootstrapResampling gevalSpec)
inputSource
expectedSource
outSource) schemes
@ -412,16 +414,20 @@ singleLineAsLineSource (LineInFile sourceSpec lineNo line) itemDecoder preproces
gevalCore :: Metric -- ^ evaluation metric
-> Maybe Selector -- ^ selector to be used
-> (Text -> Text) -- ^ preprocessing function (e.g. tokenization)
-> (Maybe Int) -- ^ number of bootstrap samples
-> SourceSpec -- ^ source specification for the input values
-> SourceSpec -- ^ source specification for the expected output
-> SourceSpec -- ^ source specification for the output
-> IO (MetricOutput) -- ^ metric value for the output against the expected output
gevalCore metric mSelector preprocess inputSource expectedSource outSource = do
gevalCore metric mSelector preprocess mBootstrapResampling inputSource expectedSource outSource = do
whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput
gevalCoreOnSources metric
go metric
(fileAsLineSource inputSource mSelector preprocess)
(fileAsLineSource expectedSource mSelector preprocess)
(fileAsLineSource outSource mSelector preprocess)
where go = case mBootstrapResampling of
Nothing -> gevalCoreOnSources
Just bootstrapResampling -> gevalBootstrapOnSources bootstrapResampling
isEmptyFileSource :: SourceSpec -> IO Bool
isEmptyFileSource (FilePathSpec filePath) = isEmptyFile filePath
@ -431,6 +437,31 @@ logLossToLikehood logLoss = exp (-logLoss)
data LineInFile = LineInFile SourceSpec Word32 Text
gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
Int -- ^ number of samples
-> Metric -- ^ evaluation metric
-> LineSource (ResourceT m) -- ^ source of the input values
-> LineSource (ResourceT m) -- ^ source to read the expected output
-> LineSource (ResourceT m) -- ^ source to read the output
-> m (MetricOutput) -- ^ metric values for the output against the expected output
gevalBootstrapOnSources numberOfSamples metric inputLineStream expectedLineStream outLineStream = do
case toSing $ toHelper metric of
SomeSing smetric -> gevalRunPipeline parserSpec (trans step) finalPipeline context
where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
context = (WithoutInput expectedLineStream outLineStream)
step = itemStep smetric
expParser = expectedParser smetric
outParser = outputParser smetric
finalPipeline = fixer (bootstrapC numberOfSamples $ continueGEvalCalculations smetric metric)
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
trans step (ParsedRecordWithoutInput x y) = step (x, y)
fixer :: ConduitT c Void (ResourceT m) [MetricOutput] -> ConduitT c Void (ResourceT m) MetricOutput
fixer c = do
outputs <- c
let values = Prelude.map (\(MetricOutput (SimpleRun v) _) -> v) outputs
return $ MetricOutput (BootstrapResampling values) Nothing
-- | Runs evaluation for a given metric using the sources given
-- for input, expected output and output. Returns the metric value.
-- Throws @GEvalException@ if something was wrong in the data (e.g.

View File

@ -116,6 +116,8 @@ main = hspec $ do
runGEvalTest "bleu-empty" `shouldReturnAlmost` 0.0000
it "with tokenization" $
runGEvalTest "bleu-with-tokenization" `shouldReturnAlmost` 0.6501914150070065
it "with bootstrap" $
runGEvalTest "bleu-complex-bootstrap" `shouldReturnAlmost` 0.7061420723046241
describe "GLEU" $ do
it "simple example" $
runGEvalTest "gleu-simple" `shouldReturnAlmost` 0.462962962962963
@ -751,6 +753,7 @@ testMatchFun _ _ = False
extractVal :: (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricResult])])) -> IO MetricValue
extractVal (Right (Just ([(_, (SimpleRun val):_)]))) = return val
extractVal (Right (Just ([(_, (BootstrapResampling vals):_)]))) = return (sum vals / fromIntegral (Prelude.length vals))
extractVal (Right Nothing) = return $ error "no metrics???"
extractVal (Right (Just [])) = return $ error "emtpy metric list???"
extractVal (Left result) = do

View File

@ -0,0 +1,6 @@
Ala has a white cat
It is a trap
All your base belong to us
bar
expected result
thrash
1 Ala has a white cat
2 It is a trap
3 All your base belong to us
4 bar
5 expected result
6 thrash

View File

@ -0,0 +1 @@
--metric BLEU --bootstrap-resampling 100

View File

@ -0,0 +1,6 @@
Alice has a white cat Ala has a whitecat
It is a trap
All your base are belong to us
foo bar baz
the expected result result expected
foo bar foo bar baz baq
Can't render this file because it has a wrong number of fields in line 2.