Implement auxiliary calibration function
This commit is contained in:
parent
19642db43f
commit
8393bec3ae
@ -41,6 +41,8 @@ library
|
||||
, Text.WordShape
|
||||
, Data.Statistics.Kendall
|
||||
, GEval.Selector
|
||||
, Data.Statistics.Loess
|
||||
, Data.Statistics.Calibration
|
||||
, Paths_geval
|
||||
build-depends: base >= 4.7 && < 5
|
||||
, cond
|
||||
@ -85,6 +87,8 @@ library
|
||||
, vector-algorithms
|
||||
, aeson
|
||||
, aeson-pretty
|
||||
, numeric-tools
|
||||
, integration
|
||||
default-language: Haskell2010
|
||||
|
||||
executable geval
|
||||
|
46
src/Data/Statistics/Calibration.hs
Normal file
46
src/Data/Statistics/Calibration.hs
Normal file
@ -0,0 +1,46 @@
|
||||
module Data.Statistics.Calibration
|
||||
(calibration, softCalibration) where
|
||||
|
||||
import Data.Statistics.Loess(loess)
|
||||
import Numeric.Tools.Integration
|
||||
import Numeric.Integration.TanhSinh
|
||||
import Data.List(minimum, maximum)
|
||||
import qualified Data.Vector.Unboxed as DVU
|
||||
|
||||
minBand :: Double
|
||||
minBand = 0.001
|
||||
|
||||
bool2Double :: Bool -> Double
|
||||
bool2Double True = 1.0
|
||||
bool2Double False = 0.0
|
||||
|
||||
mean :: [Double] -> Double
|
||||
mean results = (sum results) / (fromIntegral n)
|
||||
where n = length results
|
||||
|
||||
band :: [Double] -> Double
|
||||
band xs = (maximum xs) - (minimum xs)
|
||||
|
||||
calibration :: [Bool] -> [Double] -> Double
|
||||
calibration results probs = softCalibration results' probs
|
||||
where results' = map bool2Double results
|
||||
|
||||
integrate :: (Double, Double) -> (Double -> Double) -> Double
|
||||
integrate (a, b) fun = case simpson fun a b of
|
||||
(r:_) -> result r
|
||||
|
||||
softCalibration :: [Double] -> [Double] -> Double
|
||||
softCalibration [] [] = 1.0
|
||||
softCalibration [] _ = error "too few booleans in calibration"
|
||||
softCalibration _ [] = error "too few probabilities in calibration"
|
||||
softCalibration results probs
|
||||
| band probs < minBand = handleNarrowBand results probs
|
||||
| otherwise = 1.0 - (min 1.0 (2.0 * (highest - lowest) * (integrate (lowest, highest) (\x -> abs ((loess (DVU.fromList probs) (DVU.fromList results) x) - x)))))
|
||||
where lowest = minimum probs
|
||||
highest = maximum probs
|
||||
|
||||
handleNarrowBand :: [Double] -> [Double] -> Double
|
||||
handleNarrowBand results probs = 1.0 - deviation
|
||||
where deviation = abs (g - t)
|
||||
g = mean probs
|
||||
t = mean results
|
21
src/Data/Statistics/Loess.hs
Normal file
21
src/Data/Statistics/Loess.hs
Normal file
@ -0,0 +1,21 @@
|
||||
module Data.Statistics.Loess
|
||||
(loess) where
|
||||
|
||||
import qualified Statistics.Matrix.Types as SMT
|
||||
import Statistics.Regression (ols)
|
||||
import Data.Vector.Unboxed((!), zipWith, length, (++), map)
|
||||
import Statistics.Matrix(transpose)
|
||||
|
||||
|
||||
triCube :: Double -> Double
|
||||
triCube d = (1.0 - (abs d) ** 3) ** 3
|
||||
|
||||
loess :: SMT.Vector -> SMT.Vector -> Double -> Double
|
||||
loess inputs outputs x = a * x + b
|
||||
where a = params ! 1
|
||||
b = params ! 0
|
||||
params = ols inputMatrix scaledOutputs
|
||||
weights = Data.Vector.Unboxed.map (\v -> triCube (x - v)) inputs
|
||||
scaledOutputs = Data.Vector.Unboxed.zipWith (*) outputs weights
|
||||
scaledInputs = Data.Vector.Unboxed.zipWith (*) inputs weights
|
||||
inputMatrix = transpose (SMT.Matrix 2 (Data.Vector.Unboxed.length inputs) 1000 (weights Data.Vector.Unboxed.++ scaledInputs))
|
@ -1,5 +1,5 @@
|
||||
flags: {}
|
||||
packages:
|
||||
- '.'
|
||||
extra-deps: [murmur3-1.0.3,naturalcomp-0.0.3,Munkres-0.1]
|
||||
extra-deps: [murmur3-1.0.3,naturalcomp-0.0.3,Munkres-0.1,numeric-tools-0.2.0.1]
|
||||
resolver: lts-11.9
|
||||
|
28
test/Spec.hs
28
test/Spec.hs
@ -54,6 +54,10 @@ import qualified Data.Conduit.Combinators as CC
|
||||
import Statistics.Distribution (cumulative)
|
||||
import Statistics.Distribution.Normal (normalDistr)
|
||||
import Data.Statistics.Kendall (kendall, kendallZ)
|
||||
import qualified Data.Vector.Unboxed as DVU
|
||||
import qualified Statistics.Matrix.Types as SMT
|
||||
import Data.Statistics.Loess (loess)
|
||||
import Data.Statistics.Calibration (calibration)
|
||||
|
||||
informationRetrievalBookExample :: [(String, Int)]
|
||||
informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3),
|
||||
@ -564,7 +568,29 @@ main = hspec $ do
|
||||
kendallZ (V.fromList $ Prelude.zip [12, 2, 1, 12, 2] [1, 4, 7, 1, 0]) `shouldBeAlmost` (-1.0742)
|
||||
it "p-value" $ do
|
||||
(2 * (cumulative (normalDistr 0.0 1.0) $ kendallZ (V.fromList $ Prelude.zip [12, 2, 1, 12, 2] [1, 4, 7, 1, 0]))) `shouldBeAlmost` 0.2827
|
||||
|
||||
describe "Loess" $ do
|
||||
it "simple" $ do
|
||||
loess (DVU.fromList [0.2, 0.6, 1.0])
|
||||
(DVU.fromList [-0.6, 0.2, 1.0])
|
||||
0.4 `shouldBeAlmost` (-0.2)
|
||||
describe "Calibration" $ do
|
||||
it "empty list" $ do
|
||||
calibration [] [] `shouldBeAlmost` 1.0
|
||||
it "one element" $ do
|
||||
calibration [True] [1.0] `shouldBeAlmost` 1.0
|
||||
calibration [False] [0.0] `shouldBeAlmost` 1.0
|
||||
calibration [True] [0.0] `shouldBeAlmost` 0.0
|
||||
calibration [False] [1.0] `shouldBeAlmost` 0.0
|
||||
calibration [True] [0.7] `shouldBeAlmost` 0.7
|
||||
calibration [True] [0.3] `shouldBeAlmost` 0.3
|
||||
calibration [False] [0.7] `shouldBeAlmost` 0.3
|
||||
calibration [False] [0.3] `shouldBeAlmost` 0.7
|
||||
it "perfect calibration" $ do
|
||||
calibration [True, True, False] [0.5, 1.0, 0.5] `shouldBeAlmost` 1.0
|
||||
it "totally wrong" $ do
|
||||
calibration [True, False] [0.0, 1.0] `shouldBeAlmost` 0.0
|
||||
calibration [True, False, False, True, False] [0.0, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0
|
||||
calibration [False, True, True, True, True, False, False, True, False] [0.25, 0.25, 0.0, 0.25, 0.25, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0
|
||||
|
||||
checkConduitPure conduit inList expList = do
|
||||
let outList = runConduitPure $ CC.yieldMany inList .| conduit .| CC.sinkList
|
||||
|
Loading…
Reference in New Issue
Block a user