Add Bootstrap facilities
This commit is contained in:
parent
bfcd5aa631
commit
deb14c6702
@ -51,6 +51,7 @@ library
|
||||
, Data.Statistics.Calibration
|
||||
, Data.CartesianStrings
|
||||
, Data.SplitIntoCrossTabs
|
||||
, Data.Conduit.Bootstrap
|
||||
, Paths_geval
|
||||
build-depends: base >= 4.7 && < 5
|
||||
, cond
|
||||
@ -105,6 +106,7 @@ library
|
||||
, utf8-string
|
||||
, singletons
|
||||
, ordered-containers
|
||||
, random
|
||||
default-language: Haskell2010
|
||||
|
||||
executable geval
|
||||
|
28
src/Data/Conduit/Bootstrap.hs
Normal file
28
src/Data/Conduit/Bootstrap.hs
Normal file
@ -0,0 +1,28 @@
|
||||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
|
||||
-- Bootstrap re-sampling
|
||||
|
||||
module Data.Conduit.Bootstrap
|
||||
(bootstrapC)
|
||||
where
|
||||
|
||||
import Data.Conduit
|
||||
import qualified Data.Conduit.List as CL
|
||||
import qualified Data.Conduit.Combinators as CC
|
||||
import Control.Monad.Trans.Resource
|
||||
import Data.Vector.Unboxed
|
||||
import qualified Data.Vector.Generic as VG
|
||||
|
||||
import Debug.Trace
|
||||
|
||||
import System.Random (mkStdGen, randomRs)
|
||||
|
||||
bootstrapC :: (Show c, Show f, Unbox c, Monad m) => Int -> ConduitT c Void (ResourceT m) f -> ConduitT c Void (ResourceT m) [f]
|
||||
bootstrapC numberOfSamples final = do
|
||||
l <- CC.sinkList
|
||||
let v = fromList l
|
||||
results <- Prelude.mapM (\i -> (CC.yieldMany (resampleVector (mkStdGen i) v) .| final)) [1..numberOfSamples]
|
||||
return results
|
||||
|
||||
resampleVector gen v = Prelude.map (\ix -> v VG.! ix) $ Prelude.take n $ randomRs (0, n-1) gen
|
||||
where n = VG.length v
|
12
test/Spec.hs
12
test/Spec.hs
@ -1,5 +1,6 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
|
||||
import Test.Hspec
|
||||
|
||||
@ -29,6 +30,7 @@ import GEval.FeatureExtractor
|
||||
import GEval.Selector
|
||||
import GEval.CreateChallenge
|
||||
import GEval.Validation
|
||||
import Data.Conduit.Bootstrap
|
||||
|
||||
import Data.Map.Strict
|
||||
import Data.Conduit.List (consume)
|
||||
@ -539,6 +541,16 @@ main = hspec $ do
|
||||
(1.5, 3.0),
|
||||
(3.0, 2.0),
|
||||
(4.0, 1.0)]
|
||||
describe "bootstrap conduit" $ do
|
||||
it "sanity test" $ do
|
||||
let nbOfSamples = 1000
|
||||
let listChecked :: [Int] = [0..10]
|
||||
|
||||
(runResourceT $ runConduit (CL.sourceList listChecked .| CC.product)) `shouldReturn` 0
|
||||
|
||||
results <- runResourceT $ runConduit (CL.sourceList listChecked .| bootstrapC nbOfSamples CC.product)
|
||||
Prelude.length results `shouldBe` nbOfSamples
|
||||
(Prelude.length (Prelude.filter (> 0) results)) `shouldNotBe` 0
|
||||
describe "tokenizer" $ do
|
||||
it "simple utterance with '13a' tokenizer" $ do
|
||||
tokenize (Just V13a) "To be or not to be, that's the question." `shouldBe`
|
||||
|
Loading…
Reference in New Issue
Block a user