Implement bootstrap in GEval
This commit is contained in:
parent
deb14c6702
commit
ae2769b7b9
@ -10,14 +10,14 @@ import Data.Conduit
|
|||||||
import qualified Data.Conduit.List as CL
|
import qualified Data.Conduit.List as CL
|
||||||
import qualified Data.Conduit.Combinators as CC
|
import qualified Data.Conduit.Combinators as CC
|
||||||
import Control.Monad.Trans.Resource
|
import Control.Monad.Trans.Resource
|
||||||
import Data.Vector.Unboxed
|
import Data.Vector
|
||||||
import qualified Data.Vector.Generic as VG
|
import qualified Data.Vector.Generic as VG
|
||||||
|
|
||||||
import Debug.Trace
|
import Debug.Trace
|
||||||
|
|
||||||
import System.Random (mkStdGen, randomRs)
|
import System.Random (mkStdGen, randomRs)
|
||||||
|
|
||||||
bootstrapC :: (Show c, Show f, Unbox c, Monad m) => Int -> ConduitT c Void (ResourceT m) f -> ConduitT c Void (ResourceT m) [f]
|
bootstrapC :: Monad m => Int -> ConduitT c Void (ResourceT m) f -> ConduitT c Void (ResourceT m) [f]
|
||||||
bootstrapC numberOfSamples final = do
|
bootstrapC numberOfSamples final = do
|
||||||
l <- CC.sinkList
|
l <- CC.sinkList
|
||||||
let v = fromList l
|
let v = fromList l
|
||||||
|
@ -100,6 +100,7 @@ import Text.Tokenizer
|
|||||||
import GEval.Selector
|
import GEval.Selector
|
||||||
import GEval.Annotation
|
import GEval.Annotation
|
||||||
import GEval.BlackBoxDebugging
|
import GEval.BlackBoxDebugging
|
||||||
|
import Data.Conduit.Bootstrap
|
||||||
|
|
||||||
import qualified Data.HashMap.Strict as M
|
import qualified Data.HashMap.Strict as M
|
||||||
import qualified Data.Vector as V
|
import qualified Data.Vector as V
|
||||||
@ -256,6 +257,7 @@ gevalOnSingleOut gevalSpec inputSource expectedSource outSource = do
|
|||||||
vals <- Prelude.mapM (\scheme -> gevalCore (evaluationSchemeMetric scheme)
|
vals <- Prelude.mapM (\scheme -> gevalCore (evaluationSchemeMetric scheme)
|
||||||
mSelector
|
mSelector
|
||||||
(preprocess . applyPreprocessingOperations scheme)
|
(preprocess . applyPreprocessingOperations scheme)
|
||||||
|
(gesBootstrapResampling gevalSpec)
|
||||||
inputSource
|
inputSource
|
||||||
expectedSource
|
expectedSource
|
||||||
outSource) schemes
|
outSource) schemes
|
||||||
@ -412,16 +414,20 @@ singleLineAsLineSource (LineInFile sourceSpec lineNo line) itemDecoder preproces
|
|||||||
gevalCore :: Metric -- ^ evaluation metric
|
gevalCore :: Metric -- ^ evaluation metric
|
||||||
-> Maybe Selector -- ^ selector to be used
|
-> Maybe Selector -- ^ selector to be used
|
||||||
-> (Text -> Text) -- ^ preprocessing function (e.g. tokenization)
|
-> (Text -> Text) -- ^ preprocessing function (e.g. tokenization)
|
||||||
|
-> (Maybe Int) -- ^ number of bootstrap samples
|
||||||
-> SourceSpec -- ^ source specification for the input values
|
-> SourceSpec -- ^ source specification for the input values
|
||||||
-> SourceSpec -- ^ source specification for the expected output
|
-> SourceSpec -- ^ source specification for the expected output
|
||||||
-> SourceSpec -- ^ source specification for the output
|
-> SourceSpec -- ^ source specification for the output
|
||||||
-> IO (MetricOutput) -- ^ metric value for the output against the expected output
|
-> IO (MetricOutput) -- ^ metric value for the output against the expected output
|
||||||
gevalCore metric mSelector preprocess inputSource expectedSource outSource = do
|
gevalCore metric mSelector preprocess mBootstrapResampling inputSource expectedSource outSource = do
|
||||||
whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput
|
whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput
|
||||||
gevalCoreOnSources metric
|
go metric
|
||||||
(fileAsLineSource inputSource mSelector preprocess)
|
(fileAsLineSource inputSource mSelector preprocess)
|
||||||
(fileAsLineSource expectedSource mSelector preprocess)
|
(fileAsLineSource expectedSource mSelector preprocess)
|
||||||
(fileAsLineSource outSource mSelector preprocess)
|
(fileAsLineSource outSource mSelector preprocess)
|
||||||
|
where go = case mBootstrapResampling of
|
||||||
|
Nothing -> gevalCoreOnSources
|
||||||
|
Just bootstrapResampling -> gevalBootstrapOnSources bootstrapResampling
|
||||||
|
|
||||||
isEmptyFileSource :: SourceSpec -> IO Bool
|
isEmptyFileSource :: SourceSpec -> IO Bool
|
||||||
isEmptyFileSource (FilePathSpec filePath) = isEmptyFile filePath
|
isEmptyFileSource (FilePathSpec filePath) = isEmptyFile filePath
|
||||||
@ -431,6 +437,31 @@ logLossToLikehood logLoss = exp (-logLoss)
|
|||||||
|
|
||||||
data LineInFile = LineInFile SourceSpec Word32 Text
|
data LineInFile = LineInFile SourceSpec Word32 Text
|
||||||
|
|
||||||
|
gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
|
||||||
|
Int -- ^ number of samples
|
||||||
|
-> Metric -- ^ evaluation metric
|
||||||
|
-> LineSource (ResourceT m) -- ^ source of the input values
|
||||||
|
-> LineSource (ResourceT m) -- ^ source to read the expected output
|
||||||
|
-> LineSource (ResourceT m) -- ^ source to read the output
|
||||||
|
-> m (MetricOutput) -- ^ metric values for the output against the expected output
|
||||||
|
gevalBootstrapOnSources numberOfSamples metric inputLineStream expectedLineStream outLineStream = do
|
||||||
|
case toSing $ toHelper metric of
|
||||||
|
SomeSing smetric -> gevalRunPipeline parserSpec (trans step) finalPipeline context
|
||||||
|
where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
|
||||||
|
context = (WithoutInput expectedLineStream outLineStream)
|
||||||
|
step = itemStep smetric
|
||||||
|
expParser = expectedParser smetric
|
||||||
|
outParser = outputParser smetric
|
||||||
|
finalPipeline = fixer (bootstrapC numberOfSamples $ continueGEvalCalculations smetric metric)
|
||||||
|
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
|
||||||
|
trans step (ParsedRecordWithoutInput x y) = step (x, y)
|
||||||
|
|
||||||
|
fixer :: ConduitT c Void (ResourceT m) [MetricOutput] -> ConduitT c Void (ResourceT m) MetricOutput
|
||||||
|
fixer c = do
|
||||||
|
outputs <- c
|
||||||
|
let values = Prelude.map (\(MetricOutput (SimpleRun v) _) -> v) outputs
|
||||||
|
return $ MetricOutput (BootstrapResampling values) Nothing
|
||||||
|
|
||||||
-- | Runs evaluation for a given metric using the sources given
|
-- | Runs evaluation for a given metric using the sources given
|
||||||
-- for input, expected output and output. Returns the metric value.
|
-- for input, expected output and output. Returns the metric value.
|
||||||
-- Throws @GEvalException@ if something was wrong in the data (e.g.
|
-- Throws @GEvalException@ if something was wrong in the data (e.g.
|
||||||
|
@ -116,6 +116,8 @@ main = hspec $ do
|
|||||||
runGEvalTest "bleu-empty" `shouldReturnAlmost` 0.0000
|
runGEvalTest "bleu-empty" `shouldReturnAlmost` 0.0000
|
||||||
it "with tokenization" $
|
it "with tokenization" $
|
||||||
runGEvalTest "bleu-with-tokenization" `shouldReturnAlmost` 0.6501914150070065
|
runGEvalTest "bleu-with-tokenization" `shouldReturnAlmost` 0.6501914150070065
|
||||||
|
it "with bootstrap" $
|
||||||
|
runGEvalTest "bleu-complex-bootstrap" `shouldReturnAlmost` 0.7061420723046241
|
||||||
describe "GLEU" $ do
|
describe "GLEU" $ do
|
||||||
it "simple example" $
|
it "simple example" $
|
||||||
runGEvalTest "gleu-simple" `shouldReturnAlmost` 0.462962962962963
|
runGEvalTest "gleu-simple" `shouldReturnAlmost` 0.462962962962963
|
||||||
@ -751,6 +753,7 @@ testMatchFun _ _ = False
|
|||||||
|
|
||||||
extractVal :: (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricResult])])) -> IO MetricValue
|
extractVal :: (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricResult])])) -> IO MetricValue
|
||||||
extractVal (Right (Just ([(_, (SimpleRun val):_)]))) = return val
|
extractVal (Right (Just ([(_, (SimpleRun val):_)]))) = return val
|
||||||
|
extractVal (Right (Just ([(_, (BootstrapResampling vals):_)]))) = return (sum vals / fromIntegral (Prelude.length vals))
|
||||||
extractVal (Right Nothing) = return $ error "no metrics???"
|
extractVal (Right Nothing) = return $ error "no metrics???"
|
||||||
extractVal (Right (Just [])) = return $ error "emtpy metric list???"
|
extractVal (Right (Just [])) = return $ error "emtpy metric list???"
|
||||||
extractVal (Left result) = do
|
extractVal (Left result) = do
|
||||||
|
@ -0,0 +1,6 @@
|
|||||||
|
Ala has a white cat
|
||||||
|
It is a trap
|
||||||
|
All your base belong to us
|
||||||
|
bar
|
||||||
|
expected result
|
||||||
|
thrash
|
|
@ -0,0 +1 @@
|
|||||||
|
--metric BLEU --bootstrap-resampling 100
|
@ -0,0 +1,6 @@
|
|||||||
|
Alice has a white cat Ala has a whitecat
|
||||||
|
It is a trap
|
||||||
|
All your base are belong to us
|
||||||
|
foo bar baz
|
||||||
|
the expected result result expected
|
||||||
|
foo bar foo bar baz baq
|
Can't render this file because it has a wrong number of fields in line 2.
|
Loading…
Reference in New Issue
Block a user