Helper functions for confidence bounds

This commit is contained in:
Filip Gralinski 2020-01-27 21:54:34 +01:00
parent ae2769b7b9
commit 1cea36ac93
2 changed files with 19 additions and 3 deletions

View File

@ -3,7 +3,7 @@
-- Bootstrap re-sampling -- Bootstrap re-sampling
module Data.Conduit.Bootstrap module Data.Conduit.Bootstrap
(bootstrapC) (bootstrapC, getConfidenceBounds, defaultConfidenceLevel)
where where
import Data.Conduit import Data.Conduit
@ -12,8 +12,7 @@ import qualified Data.Conduit.Combinators as CC
import Control.Monad.Trans.Resource import Control.Monad.Trans.Resource
import Data.Vector import Data.Vector
import qualified Data.Vector.Generic as VG import qualified Data.Vector.Generic as VG
import Data.List (sort)
import Debug.Trace
import System.Random (mkStdGen, randomRs) import System.Random (mkStdGen, randomRs)
@ -26,3 +25,15 @@ bootstrapC numberOfSamples final = do
resampleVector gen v = Prelude.map (\ix -> v VG.! ix) $ Prelude.take n $ randomRs (0, n-1) gen resampleVector gen v = Prelude.map (\ix -> v VG.! ix) $ Prelude.take n $ randomRs (0, n-1) gen
where n = VG.length v where n = VG.length v
defaultConfidenceLevel = 0.95
getConfidenceBounds :: Ord a => Double -> [a] -> (a, a)
getConfidenceBounds confidenceLevel samples = ((samplesSorted !! toBeCut), (samplesSorted !! (n - 1 - toBeCut)))
where n = Prelude.length samples
toBeCut' = floor (((1 - confidenceLevel + epsilon) * (fromIntegral n)) / 2)
toBeCut = if 2 * toBeCut' >= n
then toBeCut' - 1
else toBeCut'
samplesSorted = sort samples
epsilon = 0.0001

View File

@ -553,6 +553,11 @@ main = hspec $ do
results <- runResourceT $ runConduit (CL.sourceList listChecked .| bootstrapC nbOfSamples CC.product) results <- runResourceT $ runConduit (CL.sourceList listChecked .| bootstrapC nbOfSamples CC.product)
Prelude.length results `shouldBe` nbOfSamples Prelude.length results `shouldBe` nbOfSamples
(Prelude.length (Prelude.filter (> 0) results)) `shouldNotBe` 0 (Prelude.length (Prelude.filter (> 0) results)) `shouldNotBe` 0
it "test gettings bounds" $ do
let sample = [3.0, 11.0, 2.0, 4.0, 15.0, 12.0, 2013.5, 19.0, 17.0, -10000.0,
16.0, 13.0, 6.0, 7.0, 8.0, 5.0, 9.0, 10.0, 14.0, 18]
getConfidenceBounds defaultConfidenceLevel sample `shouldBe` (-10000.0, 2013.5)
getConfidenceBounds 0.9 sample `shouldBe` (2.0, 19.0)
describe "tokenizer" $ do describe "tokenizer" $ do
it "simple utterance with '13a' tokenizer" $ do it "simple utterance with '13a' tokenizer" $ do
tokenize (Just V13a) "To be or not to be, that's the question." `shouldBe` tokenize (Just V13a) "To be or not to be, that's the question." `shouldBe`