Clip loess and switch to gaussian in Calibration

This commit is contained in:
Filip Gralinski 2019-03-19 20:41:06 +01:00
parent de40851b5a
commit e5220d71d8
4 changed files with 18 additions and 11 deletions

View File

@ -1,9 +1,9 @@
module Data.Statistics.Calibration module Data.Statistics.Calibration
(calibration, softCalibration) where (calibration, softCalibration) where
import Data.Statistics.Loess(loess) import Data.Statistics.Loess (clippedLoess)
import Numeric.Integration.TanhSinh import Numeric.Integration.TanhSinh
import Data.List(minimum, maximum) import Data.List (minimum, maximum)
import qualified Data.Vector.Unboxed as DVU import qualified Data.Vector.Unboxed as DVU
minBand :: Double minBand :: Double
@ -34,7 +34,7 @@ softCalibration [] _ = error "too few booleans in calibration"
softCalibration _ [] = error "too few probabilities in calibration" softCalibration _ [] = error "too few probabilities in calibration"
softCalibration results probs softCalibration results probs
| band probs < minBand = handleNarrowBand results probs | band probs < minBand = handleNarrowBand results probs
| otherwise = 1.0 - (min 1.0 (2.0 * (integrate (lowest, highest) (\x -> abs ((loess (DVU.fromList probs) (DVU.fromList results) x) - x))) / (highest - lowest))) | otherwise = 1.0 - (min 1.0 (2.0 * (integrate (lowest, highest) (\x -> abs ((clippedLoess (DVU.fromList probs) (DVU.fromList results) x) - x))) / (highest - lowest)))
where lowest = (minimum probs) + epsilon -- integrating loess gets crazy at edges where lowest = (minimum probs) + epsilon -- integrating loess gets crazy at edges
highest = (maximum probs) - epsilon highest = (maximum probs) - epsilon
epsilon = 0.0001 epsilon = 0.0001

View File

@ -1,23 +1,32 @@
module Data.Statistics.Loess module Data.Statistics.Loess
(loess) where (loess, clippedLoess) where
import qualified Statistics.Matrix.Types as SMT import qualified Statistics.Matrix.Types as SMT
import Statistics.Regression (ols) import Statistics.Regression (ols)
import Data.Vector.Unboxed((!), zipWith, length, (++), map) import Data.Vector.Unboxed((!), zipWith, length, (++), map)
import Statistics.Matrix(transpose) import Statistics.Matrix(transpose)
import Statistics.Distribution.Normal (standard)
import Statistics.Distribution (density)
lambda :: Double lambda :: Double
lambda = 2.0 lambda = 8.0
triCube :: Double -> Double triCube :: Double -> Double
triCube d = (1.0 - (abs d) ** 3) ** 3 triCube d = (1.0 - (abs d) ** 3) ** 3
gaussian :: Double -> Double
gaussian = density standard
clippedLoess :: SMT.Vector -> SMT.Vector -> Double -> Double
clippedLoess inputs outputs x = min 1.0 $ max 0.0 $ loess inputs outputs x
loess :: SMT.Vector -> SMT.Vector -> Double -> Double loess :: SMT.Vector -> SMT.Vector -> Double -> Double
loess inputs outputs x = a * x + b loess inputs outputs x = a * x + b
where a = params ! 1 where a = params ! 1
b = params ! 0 b = params ! 0
params = ols inputMatrix scaledOutputs params = ols inputMatrix scaledOutputs
weights = Data.Vector.Unboxed.map (\v -> lambda * triCube (lambda * (x - v))) inputs weights = Data.Vector.Unboxed.map (\v -> lambda * gaussian (lambda * (x - v))) inputs
scaledOutputs = Data.Vector.Unboxed.zipWith (*) outputs weights scaledOutputs = Data.Vector.Unboxed.zipWith (*) outputs weights
scaledInputs = Data.Vector.Unboxed.zipWith (*) inputs weights scaledInputs = Data.Vector.Unboxed.zipWith (*) inputs weights
inputMatrix = transpose (SMT.Matrix 2 (Data.Vector.Unboxed.length inputs) 1000 (weights Data.Vector.Unboxed.++ scaledInputs)) inputMatrix = transpose (SMT.Matrix 2 (Data.Vector.Unboxed.length inputs) 1000 (weights Data.Vector.Unboxed.++ scaledInputs))

View File

@ -102,8 +102,8 @@ import qualified Data.Vector.Unboxed as DVU
import Statistics.Correlation import Statistics.Correlation
import Data.Statistics.Calibration(softCalibration) import Data.Statistics.Calibration (softCalibration)
import Data.Statistics.Loess(loess) import Data.Statistics.Loess (clippedLoess)
import Data.Proxy import Data.Proxy
@ -755,7 +755,7 @@ gevalCore' (ProbabilisticSoftFMeasure beta) _ = gevalCoreWithoutInput parseAnnot
probabilisticSoftAgg = CC.foldl probabilisticSoftFolder ([], [], fromInteger 0, 0) probabilisticSoftAgg = CC.foldl probabilisticSoftFolder ([], [], fromInteger 0, 0)
probabilisticSoftFolder (r1, p1, g1, e1) (r2, p2, g2, e2) = (r1 ++ r2, p1 ++ p2, g1 + g2, e1 + e2) probabilisticSoftFolder (r1, p1, g1, e1) (r2, p2, g2, e2) = (r1 ++ r2, p1 ++ p2, g1 + g2, e1 + e2)
loessGraph :: ([Double], [Double], Double, Int) -> Maybe GraphSeries loessGraph :: ([Double], [Double], Double, Int) -> Maybe GraphSeries
loessGraph (results, probs, _, _) = Just $ GraphSeries $ Prelude.map (\x -> (x, loess probs' results' x)) $ Prelude.filter (\p -> p > lowest && p < highest) $ Prelude.map (\d -> 0.01 * (fromIntegral d)) [1..99] loessGraph (results, probs, _, _) = Just $ GraphSeries $ Prelude.map (\x -> (x, clippedLoess probs' results' x)) $ Prelude.filter (\p -> p > lowest && p < highest) $ Prelude.map (\d -> 0.01 * (fromIntegral d)) [1..99]
where results' = DVU.fromList results where results' = DVU.fromList results
probs' = DVU.fromList probs probs' = DVU.fromList probs
lowest = Data.List.minimum probs lowest = Data.List.minimum probs

View File

@ -9,8 +9,6 @@ module GEval.OptionsParser
precisionArgParser precisionArgParser
) where ) where
import Debug.Trace
import Paths_geval (version) import Paths_geval (version)
import Data.Version (showVersion) import Data.Version (showVersion)