Implement auxiliary calibration function

This commit is contained in:
Filip Graliński 2019-03-11 11:42:26 +01:00
parent 19642db43f
commit 8393bec3ae
5 changed files with 99 additions and 2 deletions

View File

@ -41,6 +41,8 @@ library
, Text.WordShape , Text.WordShape
, Data.Statistics.Kendall , Data.Statistics.Kendall
, GEval.Selector , GEval.Selector
, Data.Statistics.Loess
, Data.Statistics.Calibration
, Paths_geval , Paths_geval
build-depends: base >= 4.7 && < 5 build-depends: base >= 4.7 && < 5
, cond , cond
@ -85,6 +87,8 @@ library
, vector-algorithms , vector-algorithms
, aeson , aeson
, aeson-pretty , aeson-pretty
, numeric-tools
, integration
default-language: Haskell2010 default-language: Haskell2010
executable geval executable geval

View File

@ -0,0 +1,46 @@
module Data.Statistics.Calibration
(calibration, softCalibration) where
import Data.Statistics.Loess(loess)
import Numeric.Tools.Integration
import Numeric.Integration.TanhSinh
import Data.List(minimum, maximum)
import qualified Data.Vector.Unboxed as DVU
minBand :: Double
minBand = 0.001
bool2Double :: Bool -> Double
bool2Double True = 1.0
bool2Double False = 0.0
mean :: [Double] -> Double
mean results = (sum results) / (fromIntegral n)
where n = length results
band :: [Double] -> Double
band xs = (maximum xs) - (minimum xs)
calibration :: [Bool] -> [Double] -> Double
calibration results probs = softCalibration results' probs
where results' = map bool2Double results
integrate :: (Double, Double) -> (Double -> Double) -> Double
integrate (a, b) fun = case simpson fun a b of
(r:_) -> result r
softCalibration :: [Double] -> [Double] -> Double
softCalibration [] [] = 1.0
softCalibration [] _ = error "too few booleans in calibration"
softCalibration _ [] = error "too few probabilities in calibration"
softCalibration results probs
| band probs < minBand = handleNarrowBand results probs
| otherwise = 1.0 - (min 1.0 (2.0 * (highest - lowest) * (integrate (lowest, highest) (\x -> abs ((loess (DVU.fromList probs) (DVU.fromList results) x) - x)))))
where lowest = minimum probs
highest = maximum probs
handleNarrowBand :: [Double] -> [Double] -> Double
handleNarrowBand results probs = 1.0 - deviation
where deviation = abs (g - t)
g = mean probs
t = mean results

View File

@ -0,0 +1,21 @@
module Data.Statistics.Loess
(loess) where
import qualified Statistics.Matrix.Types as SMT
import Statistics.Regression (ols)
import Data.Vector.Unboxed((!), zipWith, length, (++), map)
import Statistics.Matrix(transpose)
triCube :: Double -> Double
triCube d = (1.0 - (abs d) ** 3) ** 3
loess :: SMT.Vector -> SMT.Vector -> Double -> Double
loess inputs outputs x = a * x + b
where a = params ! 1
b = params ! 0
params = ols inputMatrix scaledOutputs
weights = Data.Vector.Unboxed.map (\v -> triCube (x - v)) inputs
scaledOutputs = Data.Vector.Unboxed.zipWith (*) outputs weights
scaledInputs = Data.Vector.Unboxed.zipWith (*) inputs weights
inputMatrix = transpose (SMT.Matrix 2 (Data.Vector.Unboxed.length inputs) 1000 (weights Data.Vector.Unboxed.++ scaledInputs))

View File

@ -1,5 +1,5 @@
flags: {} flags: {}
packages: packages:
- '.' - '.'
extra-deps: [murmur3-1.0.3,naturalcomp-0.0.3,Munkres-0.1] extra-deps: [murmur3-1.0.3,naturalcomp-0.0.3,Munkres-0.1,numeric-tools-0.2.0.1]
resolver: lts-11.9 resolver: lts-11.9

View File

@ -54,6 +54,10 @@ import qualified Data.Conduit.Combinators as CC
import Statistics.Distribution (cumulative) import Statistics.Distribution (cumulative)
import Statistics.Distribution.Normal (normalDistr) import Statistics.Distribution.Normal (normalDistr)
import Data.Statistics.Kendall (kendall, kendallZ) import Data.Statistics.Kendall (kendall, kendallZ)
import qualified Data.Vector.Unboxed as DVU
import qualified Statistics.Matrix.Types as SMT
import Data.Statistics.Loess (loess)
import Data.Statistics.Calibration (calibration)
informationRetrievalBookExample :: [(String, Int)] informationRetrievalBookExample :: [(String, Int)]
informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3), informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3),
@ -564,7 +568,29 @@ main = hspec $ do
kendallZ (V.fromList $ Prelude.zip [12, 2, 1, 12, 2] [1, 4, 7, 1, 0]) `shouldBeAlmost` (-1.0742) kendallZ (V.fromList $ Prelude.zip [12, 2, 1, 12, 2] [1, 4, 7, 1, 0]) `shouldBeAlmost` (-1.0742)
it "p-value" $ do it "p-value" $ do
(2 * (cumulative (normalDistr 0.0 1.0) $ kendallZ (V.fromList $ Prelude.zip [12, 2, 1, 12, 2] [1, 4, 7, 1, 0]))) `shouldBeAlmost` 0.2827 (2 * (cumulative (normalDistr 0.0 1.0) $ kendallZ (V.fromList $ Prelude.zip [12, 2, 1, 12, 2] [1, 4, 7, 1, 0]))) `shouldBeAlmost` 0.2827
describe "Loess" $ do
it "simple" $ do
loess (DVU.fromList [0.2, 0.6, 1.0])
(DVU.fromList [-0.6, 0.2, 1.0])
0.4 `shouldBeAlmost` (-0.2)
describe "Calibration" $ do
it "empty list" $ do
calibration [] [] `shouldBeAlmost` 1.0
it "one element" $ do
calibration [True] [1.0] `shouldBeAlmost` 1.0
calibration [False] [0.0] `shouldBeAlmost` 1.0
calibration [True] [0.0] `shouldBeAlmost` 0.0
calibration [False] [1.0] `shouldBeAlmost` 0.0
calibration [True] [0.7] `shouldBeAlmost` 0.7
calibration [True] [0.3] `shouldBeAlmost` 0.3
calibration [False] [0.7] `shouldBeAlmost` 0.3
calibration [False] [0.3] `shouldBeAlmost` 0.7
it "perfect calibration" $ do
calibration [True, True, False] [0.5, 1.0, 0.5] `shouldBeAlmost` 1.0
it "totally wrong" $ do
calibration [True, False] [0.0, 1.0] `shouldBeAlmost` 0.0
calibration [True, False, False, True, False] [0.0, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0
calibration [False, True, True, True, True, False, False, True, False] [0.25, 0.25, 0.0, 0.25, 0.25, 1.0, 1.0, 0.5, 0.5] `shouldBeAlmost` 0.0
checkConduitPure conduit inList expList = do checkConduitPure conduit inList expList = do
let outList = runConduitPure $ CC.yieldMany inList .| conduit .| CC.sinkList let outList = runConduitPure $ CC.yieldMany inList .| conduit .| CC.sinkList