diff --git a/geval.cabal b/geval.cabal index 54dcd47..06c846b 100644 --- a/geval.cabal +++ b/geval.cabal @@ -51,6 +51,7 @@ library , Data.Statistics.Calibration , Data.CartesianStrings , Data.SplitIntoCrossTabs + , Data.Conduit.Bootstrap , Paths_geval build-depends: base >= 4.7 && < 5 , cond @@ -105,6 +106,7 @@ library , utf8-string , singletons , ordered-containers + , random default-language: Haskell2010 executable geval diff --git a/src/Data/Conduit/Bootstrap.hs b/src/Data/Conduit/Bootstrap.hs new file mode 100644 index 0000000..bc2ebf5 --- /dev/null +++ b/src/Data/Conduit/Bootstrap.hs @@ -0,0 +1,28 @@ +{-# LANGUAGE ScopedTypeVariables #-} + +-- Bootstrap re-sampling + +module Data.Conduit.Bootstrap + (bootstrapC) + where + +import Data.Conduit +import qualified Data.Conduit.List as CL +import qualified Data.Conduit.Combinators as CC +import Control.Monad.Trans.Resource +import Data.Vector.Unboxed +import qualified Data.Vector.Generic as VG + +import Debug.Trace + +import System.Random (mkStdGen, randomRs) + +bootstrapC :: (Show c, Show f, Unbox c, Monad m) => Int -> ConduitT c Void (ResourceT m) f -> ConduitT c Void (ResourceT m) [f] +bootstrapC numberOfSamples final = do + l <- CC.sinkList + let v = fromList l + results <- Prelude.mapM (\i -> (CC.yieldMany (resampleVector (mkStdGen i) v) .| final)) [1..numberOfSamples] + return results + +resampleVector gen v = Prelude.map (\ix -> v VG.! ix) $ Prelude.take n $ randomRs (0, n-1) gen + where n = VG.length v diff --git a/test/Spec.hs b/test/Spec.hs index 1a55a3e..c7e7b4a 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -1,5 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ScopedTypeVariables #-} import Test.Hspec @@ -29,6 +30,7 @@ import GEval.FeatureExtractor import GEval.Selector import GEval.CreateChallenge import GEval.Validation +import Data.Conduit.Bootstrap import Data.Map.Strict import Data.Conduit.List (consume) @@ -539,6 +541,16 @@ main = hspec $ do (1.5, 3.0), (3.0, 2.0), (4.0, 1.0)] + describe "bootstrap conduit" $ do + it "sanity test" $ do + let nbOfSamples = 1000 + let listChecked :: [Int] = [0..10] + + (runResourceT $ runConduit (CL.sourceList listChecked .| CC.product)) `shouldReturn` 0 + + results <- runResourceT $ runConduit (CL.sourceList listChecked .| bootstrapC nbOfSamples CC.product) + Prelude.length results `shouldBe` nbOfSamples + (Prelude.length (Prelude.filter (> 0) results)) `shouldNotBe` 0 describe "tokenizer" $ do it "simple utterance with '13a' tokenizer" $ do tokenize (Just V13a) "To be or not to be, that's the question." `shouldBe`