Helper functions for confidence bounds

This commit is contained in:
Filip Gralinski 2020-01-27 21:54:34 +01:00
parent ae2769b7b9
commit 1cea36ac93
2 changed files with 19 additions and 3 deletions

View File

@ -3,7 +3,7 @@
-- Bootstrap re-sampling
module Data.Conduit.Bootstrap
(bootstrapC)
(bootstrapC, getConfidenceBounds, defaultConfidenceLevel)
where
import Data.Conduit
@ -12,8 +12,7 @@ import qualified Data.Conduit.Combinators as CC
import Control.Monad.Trans.Resource
import Data.Vector
import qualified Data.Vector.Generic as VG
import Debug.Trace
import Data.List (sort)
import System.Random (mkStdGen, randomRs)
@ -26,3 +25,15 @@ bootstrapC numberOfSamples final = do
resampleVector gen v = Prelude.map (\ix -> v VG.! ix) $ Prelude.take n $ randomRs (0, n-1) gen
where n = VG.length v
defaultConfidenceLevel = 0.95
getConfidenceBounds :: Ord a => Double -> [a] -> (a, a)
getConfidenceBounds confidenceLevel samples = ((samplesSorted !! toBeCut), (samplesSorted !! (n - 1 - toBeCut)))
where n = Prelude.length samples
toBeCut' = floor (((1 - confidenceLevel + epsilon) * (fromIntegral n)) / 2)
toBeCut = if 2 * toBeCut' >= n
then toBeCut' - 1
else toBeCut'
samplesSorted = sort samples
epsilon = 0.0001

View File

@ -553,6 +553,11 @@ main = hspec $ do
results <- runResourceT $ runConduit (CL.sourceList listChecked .| bootstrapC nbOfSamples CC.product)
Prelude.length results `shouldBe` nbOfSamples
(Prelude.length (Prelude.filter (> 0) results)) `shouldNotBe` 0
it "test gettings bounds" $ do
let sample = [3.0, 11.0, 2.0, 4.0, 15.0, 12.0, 2013.5, 19.0, 17.0, -10000.0,
16.0, 13.0, 6.0, 7.0, 8.0, 5.0, 9.0, 10.0, 14.0, 18]
getConfidenceBounds defaultConfidenceLevel sample `shouldBe` (-10000.0, 2013.5)
getConfidenceBounds 0.9 sample `shouldBe` (2.0, 19.0)
describe "tokenizer" $ do
it "simple utterance with '13a' tokenizer" $ do
tokenize (Just V13a) "To be or not to be, that's the question." `shouldBe`