Add Bootstrap facilities
This commit is contained in:
parent
bfcd5aa631
commit
deb14c6702
@ -51,6 +51,7 @@ library
|
|||||||
, Data.Statistics.Calibration
|
, Data.Statistics.Calibration
|
||||||
, Data.CartesianStrings
|
, Data.CartesianStrings
|
||||||
, Data.SplitIntoCrossTabs
|
, Data.SplitIntoCrossTabs
|
||||||
|
, Data.Conduit.Bootstrap
|
||||||
, Paths_geval
|
, Paths_geval
|
||||||
build-depends: base >= 4.7 && < 5
|
build-depends: base >= 4.7 && < 5
|
||||||
, cond
|
, cond
|
||||||
@ -105,6 +106,7 @@ library
|
|||||||
, utf8-string
|
, utf8-string
|
||||||
, singletons
|
, singletons
|
||||||
, ordered-containers
|
, ordered-containers
|
||||||
|
, random
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
executable geval
|
executable geval
|
||||||
|
28
src/Data/Conduit/Bootstrap.hs
Normal file
28
src/Data/Conduit/Bootstrap.hs
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
|
|
||||||
|
-- Bootstrap re-sampling
|
||||||
|
|
||||||
|
module Data.Conduit.Bootstrap
|
||||||
|
(bootstrapC)
|
||||||
|
where
|
||||||
|
|
||||||
|
import Data.Conduit
|
||||||
|
import qualified Data.Conduit.List as CL
|
||||||
|
import qualified Data.Conduit.Combinators as CC
|
||||||
|
import Control.Monad.Trans.Resource
|
||||||
|
import Data.Vector.Unboxed
|
||||||
|
import qualified Data.Vector.Generic as VG
|
||||||
|
|
||||||
|
import Debug.Trace
|
||||||
|
|
||||||
|
import System.Random (mkStdGen, randomRs)
|
||||||
|
|
||||||
|
bootstrapC :: (Show c, Show f, Unbox c, Monad m) => Int -> ConduitT c Void (ResourceT m) f -> ConduitT c Void (ResourceT m) [f]
|
||||||
|
bootstrapC numberOfSamples final = do
|
||||||
|
l <- CC.sinkList
|
||||||
|
let v = fromList l
|
||||||
|
results <- Prelude.mapM (\i -> (CC.yieldMany (resampleVector (mkStdGen i) v) .| final)) [1..numberOfSamples]
|
||||||
|
return results
|
||||||
|
|
||||||
|
resampleVector gen v = Prelude.map (\ix -> v VG.! ix) $ Prelude.take n $ randomRs (0, n-1) gen
|
||||||
|
where n = VG.length v
|
12
test/Spec.hs
12
test/Spec.hs
@ -1,5 +1,6 @@
|
|||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
{-# LANGUAGE FlexibleContexts #-}
|
{-# LANGUAGE FlexibleContexts #-}
|
||||||
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
|
|
||||||
import Test.Hspec
|
import Test.Hspec
|
||||||
|
|
||||||
@ -29,6 +30,7 @@ import GEval.FeatureExtractor
|
|||||||
import GEval.Selector
|
import GEval.Selector
|
||||||
import GEval.CreateChallenge
|
import GEval.CreateChallenge
|
||||||
import GEval.Validation
|
import GEval.Validation
|
||||||
|
import Data.Conduit.Bootstrap
|
||||||
|
|
||||||
import Data.Map.Strict
|
import Data.Map.Strict
|
||||||
import Data.Conduit.List (consume)
|
import Data.Conduit.List (consume)
|
||||||
@ -539,6 +541,16 @@ main = hspec $ do
|
|||||||
(1.5, 3.0),
|
(1.5, 3.0),
|
||||||
(3.0, 2.0),
|
(3.0, 2.0),
|
||||||
(4.0, 1.0)]
|
(4.0, 1.0)]
|
||||||
|
describe "bootstrap conduit" $ do
|
||||||
|
it "sanity test" $ do
|
||||||
|
let nbOfSamples = 1000
|
||||||
|
let listChecked :: [Int] = [0..10]
|
||||||
|
|
||||||
|
(runResourceT $ runConduit (CL.sourceList listChecked .| CC.product)) `shouldReturn` 0
|
||||||
|
|
||||||
|
results <- runResourceT $ runConduit (CL.sourceList listChecked .| bootstrapC nbOfSamples CC.product)
|
||||||
|
Prelude.length results `shouldBe` nbOfSamples
|
||||||
|
(Prelude.length (Prelude.filter (> 0) results)) `shouldNotBe` 0
|
||||||
describe "tokenizer" $ do
|
describe "tokenizer" $ do
|
||||||
it "simple utterance with '13a' tokenizer" $ do
|
it "simple utterance with '13a' tokenizer" $ do
|
||||||
tokenize (Just V13a) "To be or not to be, that's the question." `shouldBe`
|
tokenize (Just V13a) "To be or not to be, that's the question." `shouldBe`
|
||||||
|
Loading…
Reference in New Issue
Block a user