Filip Gralinski 2020-01-25 23:46:33 +01:00
6 changed files with 54 additions and 7 deletions

@ -10,14 +10,14 @@ import Data.Conduit
import qualified Data.Conduit.List as CL
import qualified Data.Conduit.Combinators as CC
import Control.Monad.Trans.Resource
import Data.Vector.Unboxed
import Data.Vector
import qualified Data.Vector.Generic as VG
import Debug.Trace
import System.Random (mkStdGen, randomRs)
bootstrapC :: (Show c, Show f, Unbox c, Monad m) => Int -> ConduitT c Void (ResourceT m) f -> ConduitT c Void (ResourceT m) [f]
bootstrapC :: Monad m => Int -> ConduitT c Void (ResourceT m) f -> ConduitT c Void (ResourceT m) [f]
bootstrapC numberOfSamples final = do
l <- CC.sinkList
let v = fromList l

@ -100,6 +100,7 @@ import Text.Tokenizer
import GEval.Selector
import GEval.Annotation
import GEval.BlackBoxDebugging
import Data.Conduit.Bootstrap
import qualified Data.HashMap.Strict as M
import qualified Data.Vector as V
@ -256,6 +257,7 @@ gevalOnSingleOut gevalSpec inputSource expectedSource outSource = do
vals <- Prelude.mapM (\scheme -> gevalCore (evaluationSchemeMetric scheme)
(preprocess . applyPreprocessingOperations scheme)
(gesBootstrapResampling gevalSpec)
outSource) schemes
@ -412,16 +414,20 @@ singleLineAsLineSource (LineInFile sourceSpec lineNo line) itemDecoder preproces
gevalCore :: Metric -- ^ evaluation metric
-> Maybe Selector -- ^ selector to be used
-> (Text -> Text) -- ^ preprocessing function (e.g. tokenization)
-> (Maybe Int) -- ^ number of bootstrap samples
-> SourceSpec -- ^ source specification for the input values
-> SourceSpec -- ^ source specification for the expected output
-> SourceSpec -- ^ source specification for the output
-> IO (MetricOutput) -- ^ metric value for the output against the expected output
gevalCore metric mSelector preprocess inputSource expectedSource outSource = do
gevalCore metric mSelector preprocess mBootstrapResampling inputSource expectedSource outSource = do
whenM (isEmptyFileSource outSource) $ throwM $ EmptyOutput
gevalCoreOnSources metric
(fileAsLineSource inputSource mSelector preprocess)
(fileAsLineSource expectedSource mSelector preprocess)
(fileAsLineSource outSource mSelector preprocess)
go metric
(fileAsLineSource inputSource mSelector preprocess)
(fileAsLineSource expectedSource mSelector preprocess)
(fileAsLineSource outSource mSelector preprocess)
where go = case mBootstrapResampling of
Nothing -> gevalCoreOnSources
Just bootstrapResampling -> gevalBootstrapOnSources bootstrapResampling
isEmptyFileSource :: SourceSpec -> IO Bool
isEmptyFileSource (FilePathSpec filePath) = isEmptyFile filePath
@ -431,6 +437,31 @@ logLossToLikehood logLoss = exp (-logLoss)
data LineInFile = LineInFile SourceSpec Word32 Text
gevalBootstrapOnSources :: (MonadIO m, MonadThrow m, MonadUnliftIO m) =>
Int -- ^ number of samples
-> Metric -- ^ evaluation metric
-> LineSource (ResourceT m) -- ^ source of the input values
-> LineSource (ResourceT m) -- ^ source to read the expected output
-> LineSource (ResourceT m) -- ^ source to read the output
-> m (MetricOutput) -- ^ metric values for the output against the expected output
gevalBootstrapOnSources numberOfSamples metric inputLineStream expectedLineStream outLineStream = do
case toSing $ toHelper metric of
SomeSing smetric -> gevalRunPipeline parserSpec (trans step) finalPipeline context
where parserSpec = (ParserSpecWithoutInput (liftOp expParser) (liftOp outParser))
context = (WithoutInput expectedLineStream outLineStream)
step = itemStep smetric
expParser = expectedParser smetric
outParser = outputParser smetric
finalPipeline = fixer (bootstrapC numberOfSamples $ continueGEvalCalculations smetric metric)
trans :: ((a, b) -> c) -> ParsedRecord (WithoutInput m a b) -> c
trans step (ParsedRecordWithoutInput x y) = step (x, y)
fixer :: ConduitT c Void (ResourceT m) [MetricOutput] -> ConduitT c Void (ResourceT m) MetricOutput
fixer c = do
outputs <- c
let values = (\(MetricOutput (SimpleRun v) _) -> v) outputs
return $ MetricOutput (BootstrapResampling values) Nothing
-- | Runs evaluation for a given metric using the sources given
-- for input, expected output and output. Returns the metric value.
-- Throws @GEvalException@ if something was wrong in the data (e.g.

@ -116,6 +116,8 @@ main = hspec $ do
runGEvalTest "bleu-empty" `shouldReturnAlmost` 0.0000
it "with tokenization" $
runGEvalTest "bleu-with-tokenization" `shouldReturnAlmost` 0.6501914150070065
it "with bootstrap" $
runGEvalTest "bleu-complex-bootstrap" `shouldReturnAlmost` 0.7061420723046241
describe "GLEU" $ do
it "simple example" $
runGEvalTest "gleu-simple" `shouldReturnAlmost` 0.462962962962963
@ -751,6 +753,7 @@ testMatchFun _ _ = False
extractVal :: (Either (ParserResult GEvalOptions) (Maybe [(SourceSpec, [MetricResult])])) -> IO MetricValue
extractVal (Right (Just ([(_, (SimpleRun val):_)]))) = return val
extractVal (Right (Just ([(_, (BootstrapResampling vals):_)]))) = return (sum vals / fromIntegral (Prelude.length vals))
extractVal (Right Nothing) = return $ error "no metrics???"
extractVal (Right (Just [])) = return $ error "emtpy metric list???"
extractVal (Left result) = do

@ -0,0 +1,6 @@
Ala has a white cat
It is a trap
All your base belong to us
expected result
1 Ala has a white cat
2 It is a trap
3 All your base belong to us
4 bar
5 expected result
6 thrash

@ -0,0 +1 @@
--metric BLEU --bootstrap-resampling 100

@ -0,0 +1,6 @@
Alice has a white cat Ala has a whitecat
It is a trap
All your base are belong to us
foo bar baz
the expected result result expected
foo bar foo bar baz baq
