Implement auxiliary calibration function
This commit is contained in:
parent
19642db43f
commit
8393bec3ae
@ -41,6 +41,8 @@ library
|
|||||||
, Text.WordShape
|
, Text.WordShape
|
||||||
, Data.Statistics.Kendall
|
, Data.Statistics.Kendall
|
||||||
, GEval.Selector
|
, GEval.Selector
|
||||||
|
, Data.Statistics.Loess
|
||||||
|
, Data.Statistics.Calibration
|
||||||
, Paths_geval
|
, Paths_geval
|
||||||
build-depends: base >= 4.7 && < 5
|
build-depends: base >= 4.7 && < 5
|
||||||
, cond
|
, cond
|
||||||
@ -85,6 +87,8 @@ library
|
|||||||
, vector-algorithms
|
, vector-algorithms
|
||||||
, aeson
|
, aeson
|
||||||
, aeson-pretty
|
, aeson-pretty
|
||||||
|
, numeric-tools
|
||||||
|
, integration
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
executable geval
|
executable geval
|
||||||
|
46
src/Data/Statistics/Calibration.hs
Normal file
46
src/Data/Statistics/Calibration.hs
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
module Data.Statistics.Calibration
|
||||||
|
(calibration, softCalibration) where
|
||||||
|
|
||||||
|
import Data.Statistics.Loess(loess)
|
||||||
|
import Numeric.Tools.Integration
|
||||||
|
import Numeric.Integration.TanhSinh
|
||||||
|
import Data.List(minimum, maximum)
|
||||||
|
import qualified Data.Vector.Unboxed as DVU
|
||||||
|
|
||||||
|
minBand :: Double
|
||||||
|
minBand = 0.001
|
||||||
|
|
||||||
|
bool2Double :: Bool -> Double
|
||||||
|
bool2Double True = 1.0
|
||||||
|
bool2Double False = 0.0
|
||||||
|
|
||||||
|
mean :: [Double] -> Double
|
||||||
|
mean results = (sum results) / (fromIntegral n)
|
||||||
|
where n = length results
|
||||||
|
|
||||||
|
band :: [Double] -> Double
|
||||||
|
band xs = (maximum xs) - (minimum xs)
|
||||||
|
|
||||||
|
calibration :: [Bool] -> [Double] -> Double
|
||||||
|
calibration results probs = softCalibration results' probs
|
||||||
|
where results' = map bool2Double results
|
||||||
|
|
||||||
|
integrate :: (Double, Double) -> (Double -> Double) -> Double
|
||||||
|
integrate (a, b) fun = case simpson fun a b of
|
||||||
|
(r:_) -> result r
|
||||||
|
|
||||||
|
softCalibration :: [Double] -> [Double] -> Double
|
||||||
|
softCalibration [] [] = 1.0
|
||||||
|
softCalibration [] _ = error "too few booleans in calibration"
|
||||||
|
softCalibration _ [] = error "too few probabilities in calibration"
|
||||||
|
softCalibration results probs
|
||||||
|
| band probs < minBand = handleNarrowBand results probs
|
||||||
|
| otherwise = 1.0 - (min 1.0 (2.0 * (highest - lowest) * (integrate (lowest, highest) (\x -> abs ((loess (DVU.fromList probs) (DVU.fromList results) x) - x)))))
|
||||||
|
where lowest = minimum probs
|
||||||
|
highest = maximum probs
|
||||||
|
|
||||||
|
handleNarrowBand :: [Double] -> [Double] -> Double
|
||||||
|
handleNarrowBand results probs = 1.0 - deviation
|
||||||
|
where deviation = abs (g - t)
|
||||||
|
g = mean probs
|
||||||
|
t = mean results
|
21
src/Data/Statistics/Loess.hs
Normal file
21
src/Data/Statistics/Loess.hs
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
module Data.Statistics.Loess
|
||||||
|
(loess) where
|
||||||
|
|
||||||
|
import qualified Statistics.Matrix.Types as SMT
|
||||||
|
import Statistics.Regression (ols)
|
||||||
|
import Data.Vector.Unboxed((!), zipWith, length, (++), map)
|
||||||
|
import Statistics.Matrix(transpose)
|
||||||
|
|
||||||
|
|
||||||
|
triCube :: Double -> Double
|
||||||
|
triCube d = (1.0 - (abs d) ** 3) ** 3
|
||||||
|
|
||||||
|
loess :: SMT.Vector -> SMT.Vector -> Double -> Double
|
||||||
|
loess inputs outputs x = a * x + b
|
||||||
|
where a = params ! 1
|
||||||
|
b = params ! 0
|
||||||
|
params = ols inputMatrix scaledOutputs
|
||||||
|
weights = Data.Vector.Unboxed.map (\v -> triCube (x - v)) inputs
|
||||||
|
scaledOutputs = Data.Vector.Unboxed.zipWith (*) outputs weights
|
||||||
|
scaledInputs = Data.Vector.Unboxed.zipWith (*) inputs weights
|
||||||
|
inputMatrix = transpose (SMT.Matrix 2 (Data.Vector.Unboxed.length inputs) 1000 (weights Data.Vector.Unboxed.++ scaledInputs))
|
@ -1,5 +1,5 @@
|
|||||||
flags: {}
|
flags: {}
|
||||||
packages:
|
packages:
|
||||||
- '.'
|
- '.'
|
||||||
extra-deps: [murmur3-1.0.3,naturalcomp-0.0.3,Munkres-0.1]
|
extra-deps: [murmur3-1.0.3,naturalcomp-0.0.3,Munkres-0.1,numeric-tools-0.2.0.1]
|
||||||
resolver: lts-11.9
|
resolver: lts-11.9
|
||||||
|
28
test/Spec.hs
28
test/Spec.hs
@ -54,6 +54,10 @@ import qualified Data.Conduit.Combinators as CC
|
|||||||
import Statistics.Distribution (cumulative)
|
import Statistics.Distribution (cumulative)
|
||||||
import Statistics.Distribution.Normal (normalDistr)
|
import Statistics.Distribution.Normal (normalDistr)
|
||||||
import Data.Statistics.Kendall (kendall, kendallZ)
|
import Data.Statistics.Kendall (kendall, kendallZ)
|
||||||
|
import qualified Data.Vector.Unboxed as DVU
|
||||||
|
import qualified Statistics.Matrix.Types as SMT
|
||||||
|
import Data.Statistics.Loess (loess)
|
||||||
|
import Data.Statistics.Calibration (calibration)
|
||||||
|
|
||||||
informationRetrievalBookExample :: [(String, Int)]
|
informationRetrievalBookExample :: [(String, Int)]
|
||||||
informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3),
|
informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3),
|
||||||
@ -564,7 +568,29 @@ main = hspec $ do
|
|||||||
kendallZ (V.fromList $ Prelude.zip [12, 2, 1, 12, 2] [1, 4, 7, 1, 0]) `shouldBeAlmost` (-1.0742)
|
kendallZ (V.fromList $ Prelude.zip [12, 2, 1, 12, 2] [1, 4, 7, 1, 0]) `shouldBeAlmost` (-1.0742)
|
||||||
it "p-value" $ do
|
it "p-value" $ do
|
||||||
(2 * (cumulative (normalDistr 0.0 1.0) $ kendallZ (V.fromList $ Prelude.zip [12, 2, 1, 12, 2] [1, 4, 7, 1, 0]))) `shouldBeAlmost` 0.2827
|
(2 * (cumulative (normalDistr 0.0 1.0) $ kendallZ (V.fromList $ Prelude.zip [12, 2, 1, 12, 2] [1, 4, 7, 1, 0]))) `shouldBeAlmost` 0.2827
|
||||||
|
describe "Loess" $ do
|
||||||
|
it "simple" $ do
|
||||||
|
loess (DVU.fromList [0.2, 0.6, 1.0])
|
||||||
|
(DVU.fromList [-0.6, 0.2, 1.0])
|
||||||
|
0.4 `shouldBeAlmost` (-0.2)
|
||||||
|
describe "Calibration" $ do
|
||||||
|
it "empty list" $ do
|
||||||
|
calibration [] [] `shouldBeAlmost` 1.0
|
||||||
|
it "one element" $ do
|
||||||
|
calibration [True] [1.0] `shouldBeAlmost` 1.0
|
||||||
|
calibration [False] [0.0] `shouldBeAlmost` 1.0
|
||||||
|
calibration [True] [0.0] `shouldBeAlmost` 0.0
|
||||||
|
calibration [False] [1.0] `shouldBeAlmost` 0.0
|
||||||
|
calibration [True] [0.7] `shouldBeAlmost` 0.7
|
||||||
|
calibration [True] [0.3] `shouldBeAlmost` 0.3
|
||||||
|
calibration [False] [0.7] `shouldBeAlmost` 0.3
|
||||||
|
calibration [False] [0.3] `shouldBeAlmost` 0.7
|
||||||
|
it "perfect calibration" $ do
|
||||||
|
calibration [True, True, False] [0.5, 1.0, 0.5] `shouldBeAlmost` 1.0
|
||||||
|
it "totally wrong" $ do
|
||||||
|
calibration [True, False] [0.0, 1.0] `shouldBeAlmost` 0.0
|
||||||
|
calibration [True, False, False, True, False] [0.0, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0
|
||||||
|
calibration [False, True, True, True, True, False, False, True, False] [0.25, 0.25, 0.0, 0.25, 0.25, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0
|
||||||
|
|
||||||
checkConduitPure conduit inList expList = do
|
checkConduitPure conduit inList expList = do
|
||||||
let outList = runConduitPure $ CC.yieldMany inList .| conduit .| CC.sinkList
|
let outList = runConduitPure $ CC.yieldMany inList .| conduit .| CC.sinkList
|
||||||
|
Loading…
Reference in New Issue
Block a user