Implement bootstrap in GEval
This commit is contained in:
parent
deb14c6702
commit
ae2769b7b9
@ -10,14 +10,14 @@ import Data.Conduit
|
||||
import qualified Data.Conduit.List as CL
|
||||
import qualified Data.Conduit.Combinators as CC
|
||||
import Control.Monad.Trans.Resource
|
||||
import Data.Vector.Unboxed
|
||||
import Data.Vector
|
||||
import qualified Data.Vector.Generic as VG
|
||||
|
||||
import Debug.Trace
|
||||
|
||||
import System.Random (mkStdGen, randomRs)
|
||||
|
||||
bootstrapC :: (Show c, Show f, Unbox c, Monad m) => Int -> ConduitT c Void (ResourceT m) f -> ConduitT c Void (ResourceT m) [f]
|
||||
bootstrapC :: Monad m => Int -> ConduitT c Void (ResourceT m) f -> ConduitT c Void (ResourceT m) [f]
|
||||
bootstrapC numberOfSamples final = do
|
||||
l <- CC.sinkList
|
||||
let v = fromList l
|
||||
|
@ -100,6 +100,7 @@ import Text.Tokenizer
|
||||
import GEval.Selector
|
||||
import GEval.Annotation
|
||||
import GEval.BlackBoxDebugging
|
||||
import Data.Conduit.Bootstrap
|
||||
|
||||
import qualified Data.HashMap.Strict as M
|
||||
import qualified Data.Vector as V
|
||||
@ -256,6 +257,7 @@ gevalOnSingleOut gevalSpec inputSource expectedSource outSource = do
|
||||
vals <- Prelude.mapM (\scheme -> gevalCore (evaluationSchemeMetric scheme)
|
||||
mSelector
|
||||
(preprocess . applyPreprocessingOperations scheme)
|
||||
(gesBootstrapResampling gevalSpec)
|
||||
inputSource
|
||||
expectedSource
|
||||
outSource) schemes
|
||||
@ -412,16 +414,20 @@ singleLineAsLineSource (LineInFile sourceSpec lineNo line) itemDecoder preproces
|
||||
gevalCore :: Metric -- ^ evaluation metric
|
||||
-> Maybe Selector -- ^ selector to be used
|
||||
-> (Text -> Text) -- ^ preprocessing function (e.g. tokenization)
|
||||
-> (Maybe Int) -- ^ number of bootstrap samples
|
||||
-> SourceSpec -- ^ source specification for the input values
|
||||
-> SourceSpec -- ^ source specification for the expected output
|
||||
-> SourceSpec -- ^ source specification for the output
|
||||
-> IO (MetricOutput) -- ^ metric value for the output against the expected output
|
||||
gevalCore metric mSelector preprocess inputSource expectedSource outSource = do
|
||||
gevalCore metric mSelector preprocess mBootstrapResampling inputSource expectedSource outSource = do
|
||||
whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput
|
||||
gevalCoreOnSources metric
|
||||
go metric
|
||||
(fileAsLineSource inputSource mSelector preprocess)
|
||||
(fileAsLineSource expectedSource mSelector preprocess)
|
||||
(fileAsLineSource outSource mSelector preprocess)
|
||||
where go = case mBootstrapResampling of
|
||||
Nothing -> gevalCoreOnSources
|
||||
Just bootstrapResampling -> gevalBootstrapOnSources bootstrapResampling
|
||||
|
||||
isEmptyFileSource :: SourceSpec -> IO Bool
|
||||
isEmptyFileSource (FilePathSpec filePath) = isEmptyFile filePath
|
||||
@ -431,6 +437,31 @@ logLossToLikehood logLoss = exp (-logLoss)
|
||||
|
||||
data LineInFile = LineInFile SourceSpec Word32 Text
|
||||
|
||||
gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
|
||||
Int -- ^ number of samples
|
||||
-> Metric -- ^ evaluation metric
|
||||
-> LineSource (ResourceT m) -- ^ source of the input values
|
||||
-> LineSource (ResourceT m) -- ^ source to read the expected output
|
||||
-> LineSource (ResourceT m) -- ^ source to read the output
|
||||
-> m (MetricOutput) -- ^ metric values for the output against the expected output
|
||||
gevalBootstrapOnSources numberOfSamples metric inputLineStream expectedLineStream outLineStream = do
|
||||
case toSing $ toHelper metric of
|
||||
SomeSing smetric -> gevalRunPipeline parserSpec (trans step) finalPipeline context
|
||||
where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
|
||||
context = (WithoutInput expectedLineStream outLineStream)
|
||||
step = itemStep smetric
|
||||
expParser = expectedParser smetric
|
||||
outParser = outputParser smetric
|
||||
finalPipeline = fixer (bootstrapC numberOfSamples $ continueGEvalCalculations smetric metric)
|
||||
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
|
||||
trans step (ParsedRecordWithoutInput x y) = step (x, y)
|
||||
|
||||
fixer :: ConduitT c Void (ResourceT m) [MetricOutput] -> ConduitT c Void (ResourceT m) MetricOutput
|
||||
fixer c = do
|
||||
outputs <- c
|
||||
let values = Prelude.map (\(MetricOutput (SimpleRun v) _) -> v) outputs
|
||||
return $ MetricOutput (BootstrapResampling values) Nothing
|
||||
|
||||
-- | Runs evaluation for a given metric using the sources given
|
||||
-- for input, expected output and output. Returns the metric value.
|
||||
-- Throws @GEvalException@ if something was wrong in the data (e.g.
|
||||
|
@ -116,6 +116,8 @@ main = hspec $ do
|
||||
runGEvalTest "bleu-empty" `shouldReturnAlmost` 0.0000
|
||||
it "with tokenization" $
|
||||
runGEvalTest "bleu-with-tokenization" `shouldReturnAlmost` 0.6501914150070065
|
||||
it "with bootstrap" $
|
||||
runGEvalTest "bleu-complex-bootstrap" `shouldReturnAlmost` 0.7061420723046241
|
||||
describe "GLEU" $ do
|
||||
it "simple example" $
|
||||
runGEvalTest "gleu-simple" `shouldReturnAlmost` 0.462962962962963
|
||||
@ -751,6 +753,7 @@ testMatchFun _ _ = False
|
||||
|
||||
extractVal :: (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricResult])])) -> IO MetricValue
|
||||
extractVal (Right (Just ([(_, (SimpleRun val):_)]))) = return val
|
||||
extractVal (Right (Just ([(_, (BootstrapResampling vals):_)]))) = return (sum vals / fromIntegral (Prelude.length vals))
|
||||
extractVal (Right Nothing) = return $ error "no metrics???"
|
||||
extractVal (Right (Just [])) = return $ error "emtpy metric list???"
|
||||
extractVal (Left result) = do
|
||||
|
@ -0,0 +1,6 @@
|
||||
Ala has a white cat
|
||||
It is a trap
|
||||
All your base belong to us
|
||||
bar
|
||||
expected result
|
||||
thrash
|
|
@ -0,0 +1 @@
|
||||
--metric BLEU --bootstrap-resampling 100
|
@ -0,0 +1,6 @@
|
||||
Alice has a white cat Ala has a whitecat
|
||||
It is a trap
|
||||
All your base are belong to us
|
||||
foo bar baz
|
||||
the expected result result expected
|
||||
foo bar foo bar baz baq
|
Can't render this file because it has a wrong number of fields in line 2.
|
Loading…
Reference in New Issue
Block a user