From 1cea36ac930ec32d7c5a4f307361e8879e5ddecb Mon Sep 17 00:00:00 2001 From: Filip Gralinski Date: Mon, 27 Jan 2020 21:54:34 +0100 Subject: [PATCH] Helper functions for confidence bounds --- src/Data/Conduit/Bootstrap.hs | 17 ++++++++++++++--- test/Spec.hs | 5 +++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/Data/Conduit/Bootstrap.hs b/src/Data/Conduit/Bootstrap.hs index 229fd78..25993aa 100644 --- a/src/Data/Conduit/Bootstrap.hs +++ b/src/Data/Conduit/Bootstrap.hs @@ -3,7 +3,7 @@ -- Bootstrap re-sampling module Data.Conduit.Bootstrap - (bootstrapC) + (bootstrapC, getConfidenceBounds, defaultConfidenceLevel) where import Data.Conduit @@ -12,8 +12,7 @@ import qualified Data.Conduit.Combinators as CC import Control.Monad.Trans.Resource import Data.Vector import qualified Data.Vector.Generic as VG - -import Debug.Trace +import Data.List (sort) import System.Random (mkStdGen, randomRs) @@ -26,3 +25,15 @@ bootstrapC numberOfSamples final = do resampleVector gen v = Prelude.map (\ix -> v VG.! ix) $ Prelude.take n $ randomRs (0, n-1) gen where n = VG.length v + +defaultConfidenceLevel = 0.95 + +getConfidenceBounds :: Ord a => Double -> [a] -> (a, a) +getConfidenceBounds confidenceLevel samples = ((samplesSorted !! toBeCut), (samplesSorted !! (n - 1 - toBeCut))) + where n = Prelude.length samples + toBeCut' = floor (((1 - confidenceLevel + epsilon) * (fromIntegral n)) / 2) + toBeCut = if 2 * toBeCut' >= n + then toBeCut' - 1 + else toBeCut' + samplesSorted = sort samples + epsilon = 0.0001 diff --git a/test/Spec.hs b/test/Spec.hs index adca65f..513b6e8 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -553,6 +553,11 @@ main = hspec $ do results <- runResourceT $ runConduit (CL.sourceList listChecked .| bootstrapC nbOfSamples CC.product) Prelude.length results `shouldBe` nbOfSamples (Prelude.length (Prelude.filter (> 0) results)) `shouldNotBe` 0 + it "test gettings bounds" $ do + let sample = [3.0, 11.0, 2.0, 4.0, 15.0, 12.0, 2013.5, 19.0, 17.0, -10000.0, + 16.0, 13.0, 6.0, 7.0, 8.0, 5.0, 9.0, 10.0, 14.0, 18] + getConfidenceBounds defaultConfidenceLevel sample `shouldBe` (-10000.0, 2013.5) + getConfidenceBounds 0.9 sample `shouldBe` (2.0, 19.0) describe "tokenizer" $ do it "simple utterance with '13a' tokenizer" $ do tokenize (Just V13a) "To be or not to be, that's the question." `shouldBe`