Add numerical features

2019-01-27 20:18:39 +01:00 · 2019-01-27 20:18:39 +01:00 · abcce9bf68
commit abcce9bf68
parent d5a8908599
5 changed files with 320 additions and 21 deletions
--- a/geval.cabal
+++ b/geval.cabal
@ -1,5 +1,5 @@
 name:                geval
-version:             1.13.0.0
+version:             1.14.0.0
 synopsis:            Machine learning evaluation tools
 description:         Please see README.md
 homepage:            http://github.com/name/project
@ -39,6 +39,7 @@ library
                     , GEval.Annotation
                     , GEval.BlackBoxDebugging
                     , Text.WordShape
+                     , Data.Statistics.Kendall
                     , Paths_geval
  build-depends:       base >= 4.7 && < 5
                     , cond
@ -80,6 +81,7 @@ library
                     , MissingH
                     , array
                     , Munkres
+                     , vector-algorithms
  default-language:    Haskell2010

 executable geval
@ -117,6 +119,8 @@ test-suite geval-test
                     , directory
                     , temporary
                     , silently
+                     , vector
+                     , statistics
  ghc-options:         -threaded -rtsopts -with-rtsopts=-N
  default-language:    Haskell2010

--- a/src/Data/Statistics/Kendall.hs
+++ b/src/Data/Statistics/Kendall.hs
@ -0,0 +1,178 @@
+{-# LANGUAGE BangPatterns, CPP, FlexibleContexts, ScopedTypeVariables #-}
+-- |
+-- (Taken from http://hackage.haskell.org/package/statistics-0.15.0.0/docs/src/Statistics.Correlation.Kendall.html)
+--
+-- Module      : Statistics.Correlation.Kendall
+--
+-- Fast O(NlogN) implementation of
+-- <http://en.wikipedia.org/wiki/Kendall_tau_rank_correlation_coefficient Kendall's tau>.
+--
+-- This module implements Kendall's tau form b which allows ties in the data.
+-- This is the same formula used by other statistical packages, e.g., R, matlab.
+--
+-- > \tau = \frac{n_c - n_d}{\sqrt{(n_0 - n_1)(n_0 - n_2)}}
+--
+-- where n_0 = n(n-1)\/2, n_1 = number of pairs tied for the first quantify,
+-- n_2 = number of pairs tied for the second quantify,
+-- n_c = number of concordant pairs$, n_d = number of discordant pairs.
+
+module Data.Statistics.Kendall
+    ( kendall,
+      kendallZ
+    -- * References
+    -- $references
+    ) where
+
+import Control.Monad.ST (ST, runST)
+import Data.Bits (shiftR)
+import Data.Function (on)
+import Data.STRef
+import qualified Data.Vector.Algorithms.Intro as I
+import qualified Data.Vector.Generic as G
+import qualified Data.Vector.Generic.Mutable as GM
+
+-- | /O(nlogn)/ Compute the Kendall's tau from a vector of paired data.
+-- Return NaN when number of pairs <= 1.
+kendall :: (Ord a, Ord b, G.Vector v (a, b)) => v (a, b) -> Double
+kendall xy'
+  | G.length xy' <= 1 = 0/0
+  | otherwise  = runST $ do
+    xy <- G.thaw xy'
+    let n = GM.length xy
+    n_dRef <- newSTRef 0
+    I.sort xy
+    tieX <- numOfTiesBy ((==) `on` fst) xy
+    tieXY <- numOfTiesBy (==) xy
+    tmp <- GM.new n
+    mergeSort (compare `on` snd) xy tmp n_dRef
+    tieY <- numOfTiesBy ((==) `on` snd) xy
+    n_d <- readSTRef n_dRef
+    let n_0 = (fromIntegral n * (fromIntegral n-1)) `shiftR` 1 :: Integer
+        n_c = n_0 - n_d - tieX - tieY + tieXY
+    return $ fromIntegral (n_c - n_d) /
+             (sqrt.fromIntegral) ((n_0 - tieX) * (n_0 - tieY))
+{-# INLINE kendall #-}
+
+kendallZ :: (Ord a, Ord b, G.Vector v (a, b)) => v (a, b) -> Double
+kendallZ xy'
+  | G.length xy' <= 1 = 0/0
+  | otherwise  = runST $ do
+    xy <- G.thaw xy'
+    let n = GM.length xy
+    let vfun x = x * (x - 1) * (2*x + 5)
+    let tttfun x = x * (x - 1) * (x - 2)
+    n_dRef <- newSTRef 0
+    I.sort xy
+    tieX <- numOfTiesBy ((==) `on` fst) xy
+    tieXY <- numOfTiesBy (==) xy
+    vt <- numOfTiesByGeneralized vfun ((==) `on` fst) xy
+    tttX <- numOfTiesByGeneralized tttfun ((==) `on` fst) xy
+    tmp <- GM.new n
+    mergeSort (compare `on` snd) xy tmp n_dRef
+    tieY <- numOfTiesBy ((==) `on` snd) xy
+    vu <- numOfTiesByGeneralized vfun ((==) `on` snd) xy
+    tttY <- numOfTiesByGeneralized tttfun ((==) `on` snd) xy
+    n_d <- readSTRef n_dRef
+    let n_0 = (fromIntegral n * (fromIntegral n-1)) `shiftR` 1 :: Integer
+        n_c = n_0 - n_d - tieX - tieY + tieXY
+        v0 = vfun (fromIntegral n)
+        v1 = 2.0 * (fromIntegral tieX) * (fromIntegral tieY) / (fromIntegral (n * (n-1)))
+        v2 = (fromIntegral tttX) * (fromIntegral tttY) / (fromIntegral (9 * (tttfun n)))
+        v = (fromIntegral (v0 - vt - vu)) / 18.0 + v1 + v2
+    return $ (fromIntegral (n_c - n_d)) / sqrt v
+{-# INLINE kendallZ #-}
+
+-- calculate number of tied pairs in a sorted vector
+numOfTiesBy :: GM.MVector v a
+            => (a -> a -> Bool) -> v s a -> ST s Integer
+numOfTiesBy f xs = numOfTiesByGeneralized (\x -> (x * (x - 1)) `shiftR` 1) f xs
+
+numOfTiesByGeneralized :: GM.MVector v a
+            => (Int -> Int) -> (a -> a -> Bool) -> v s a -> ST s Integer
+numOfTiesByGeneralized op f xs = do count <- newSTRef (0::Integer)
+                                    loop count (1::Int) (0::Int)
+                                    readSTRef count
+  where
+    n = GM.length xs
+    loop c !acc !i | i >= n - 1 = modifySTRef' c (+ g acc)
+                   | otherwise = do
+                       x1 <- GM.unsafeRead xs i
+                       x2 <- GM.unsafeRead xs (i+1)
+                       if f x1 x2
+                          then loop c (acc+1) (i+1)
+                          else modifySTRef' c (+ g acc) >> loop c 1 (i+1)
+    g x = fromIntegral $ op x
+{-# INLINE numOfTiesByGeneralized #-}
+
+-- Implementation of Knight's merge sort (adapted from vector-algorithm). This
+-- function is used to count the number of discordant pairs.
+mergeSort :: GM.MVector v e
+          => (e -> e -> Ordering)
+          -> v s e
+          -> v s e
+          -> STRef s Integer
+          -> ST s ()
+mergeSort cmp src buf count = loop 0 (GM.length src - 1)
+  where
+    loop l u
+      | u == l = return ()
+      | u - l == 1 = do
+          eL <- GM.unsafeRead src l
+          eU <- GM.unsafeRead src u
+          case cmp eL eU of
+              GT -> do GM.unsafeWrite src l eU
+                       GM.unsafeWrite src u eL
+                       modifySTRef' count (+1)
+              _ -> return ()
+      | otherwise  = do
+          let mid = (u + l) `shiftR` 1
+          loop l mid
+          loop mid u
+          merge cmp (GM.unsafeSlice l (u-l+1) src) buf (mid - l) count
+{-# INLINE mergeSort #-}
+
+merge :: GM.MVector v e
+      => (e -> e -> Ordering)
+      -> v s e
+      -> v s e
+      -> Int
+      -> STRef s Integer
+      -> ST s ()
+merge cmp src buf mid count = do GM.unsafeCopy tmp lower
+                                 eTmp <- GM.unsafeRead tmp 0
+                                 eUpp <- GM.unsafeRead upper 0
+                                 loop tmp 0 eTmp upper 0 eUpp 0
+  where
+    lower = GM.unsafeSlice 0 mid src
+    upper = GM.unsafeSlice mid (GM.length src - mid) src
+    tmp = GM.unsafeSlice 0 mid buf
+    wroteHigh low iLow eLow high iHigh iIns
+      | iHigh >= GM.length high =
+          GM.unsafeCopy (GM.unsafeSlice iIns (GM.length low - iLow) src)
+                        (GM.unsafeSlice iLow (GM.length low - iLow) low)
+      | otherwise = do eHigh <- GM.unsafeRead high iHigh
+                       loop low iLow eLow high iHigh eHigh iIns
+
+    wroteLow low iLow high iHigh eHigh iIns
+      | iLow  >= GM.length low  = return ()
+      | otherwise = do eLow <- GM.unsafeRead low iLow
+                       loop low iLow eLow high iHigh eHigh iIns
+
+    loop !low !iLow !eLow !high !iHigh !eHigh !iIns = case cmp eHigh eLow of
+        LT -> do GM.unsafeWrite src iIns eHigh
+                 modifySTRef' count (+ fromIntegral (GM.length low - iLow))
+                 wroteHigh low iLow eLow high (iHigh+1) (iIns+1)
+        _  -> do GM.unsafeWrite src iIns eLow
+                 wroteLow low (iLow+1) high iHigh eHigh (iIns+1)
+{-# INLINE merge #-}
+
+#if !MIN_VERSION_base(4,6,0)
+modifySTRef' :: STRef s a -> (a -> a) -> ST s ()
+modifySTRef' = modifySTRef
+#endif
+
+-- $references
+--
+-- * William R. Knight. (1966) A computer method for calculating Kendall's Tau
+--   with ungrouped data. /Journal of the American Statistical Association/,
+--   Vol. 61, No. 314, Part 1, pp. 436-439. <http://www.jstor.org/pss/2282833>
--- a/src/GEval/FeatureExtractor.hs
+++ b/src/GEval/FeatureExtractor.hs
@ -4,6 +4,10 @@ module GEval.FeatureExtractor
  (extractFactors,
   extractFactorsFromTabbed,
   cartesianFeatures,
+   Feature(..),
+   NumericalType(..),
+   NumericalDirection(..),
+   Featuroid(..),
   LineWithFactors(..),
   LineWithPeggedFactors(..),
   PeggedFactor(..),
@ -26,15 +30,55 @@ import GEval.BlackBoxDebugging
 import GEval.Common
 import Text.Read (readMaybe)

+data Feature = UnaryFeature PeggedExistentialFactor
+               | CartesianFeature PeggedExistentialFactor PeggedExistentialFactor
+               | NumericalFeature FeatureNamespace NumericalType NumericalDirection
+                 deriving (Eq, Ord)
+
+instance Show Feature where
+  show (UnaryFeature p) = show p
+  show (CartesianFeature pA pB) = formatCartesian pA pB
+  show (NumericalFeature namespace ntype direction) = (show namespace) ++ ":" ++ (show ntype) ++ (show direction)
+
+data NumericalType = DirectValue | LengthOf
+                     deriving (Eq, Ord)
+
+instance Show NumericalType where
+  show DirectValue = "="
+  show LengthOf = "=#"
+
+data NumericalDirection = Big | Small
+                          deriving (Eq, Ord)
+
+instance Show NumericalDirection where
+  show Big = "+"
+  show Small = "-"
+
+-- | Featuroid is something between a factor and a feature, i.e. for numerical factors
+-- it's not a single value, but still without the direction.
+data Featuroid = UnaryFeaturoid PeggedExistentialFactor
+                 | CartesianFeaturoid PeggedExistentialFactor PeggedExistentialFactor
+                 | NumericalFeaturoid FeatureNamespace
+                 deriving (Eq, Ord)
+
+instance Show Featuroid where
+  show (UnaryFeaturoid p) = show p
+  show (CartesianFeaturoid pA pB) = formatCartesian pA pB
+  show (NumericalFeaturoid namespace) = (show namespace) ++ ":="
+
 data LineWithFactors = LineWithFactors Double MetricValue [Factor]
                              deriving (Eq, Ord)

+-- | A factor extracted from a single item (its input, expected output or actual output).
 data Factor = UnaryFactor PeggedFactor | CartesianFactor PeggedExistentialFactor PeggedExistentialFactor
               deriving (Eq, Ord)

 instance Show Factor where
  show (UnaryFactor factor) = show factor
-  show (CartesianFactor factorA factorB) = (show factorA) ++ "~~" ++ (show factorB)
+  show (CartesianFactor factorA factorB) = formatCartesian factorA factorB
+
+formatCartesian :: PeggedExistentialFactor -> PeggedExistentialFactor -> String
+formatCartesian factorA factorB = (show factorA) ++ "~~" ++ (show factorB)

 data LineWithPeggedFactors = LineWithPeggedFactors Double MetricValue [PeggedFactor]
                              deriving (Eq, Ord)
--- a/src/GEval/LineByLine.hs
+++ b/src/GEval/LineByLine.hs
@ -34,7 +34,9 @@ import Data.Text.Encoding
 import Data.Conduit.Rank
 import Data.Maybe (fromMaybe)

-import Data.List (sortBy, sort, concat)
+import qualified Data.Vector as V
+
+import Data.List (sortBy, sortOn, sort, concat)

 import Control.Monad.IO.Class
 import Control.Monad.Trans.Resource
@ -56,6 +58,7 @@ import System.FilePath

 import Statistics.Distribution (cumulative)
 import Statistics.Distribution.Normal (normalDistr)
+import Data.Statistics.Kendall (kendallZ)

 import qualified Data.Map.Strict as M
 import qualified Data.Set as S
@ -125,7 +128,7 @@ extractFeaturesAndPValues spec bbdo =
 data RankedFactor = RankedFactor Factor Double MetricValue
                    deriving (Show)

-data FeatureWithPValue = FeatureWithPValue Factor      -- ^ feature itself
+data FeatureWithPValue = FeatureWithPValue Feature     -- ^ feature itself
                                           Double      -- ^ p-value
                                           MetricValue -- ^ average metric value
                                           Integer     -- ^ count
@ -184,11 +187,12 @@ finalFeatures True minFreq = do

 filtreCartesian False = CC.map id
 filtreCartesian True = CC.concatMapAccum step S.empty
-   where step f@(FeatureWithPValue (UnaryFactor (PeggedFactor namespace (SimpleExistentialFactor p))) _ _ _) mp = (S.insert (PeggedExistentialFactor namespace p) mp, [f])
-         step f@(FeatureWithPValue (UnaryFactor (PeggedFactor namespace (NumericalFactor _ _))) _ _ _) mp = (mp, [f])
-         step f@(FeatureWithPValue (CartesianFactor pA pB) _ _ _) mp = (mp, if pA `S.member` mp || pB `S.member` mp
-                                                      then []
-                                                      else [f])
+   where step f@(FeatureWithPValue (UnaryFeature fac) _ _ _) mp = (S.insert fac mp, [f])
+         step f@(FeatureWithPValue (CartesianFeature pA pB) _ _ _) mp = (mp, if pA `S.member` mp || pB `S.member` mp
+                                                                             then []
+                                                                             else [f])
+         step f@(FeatureWithPValue (NumericalFeature _ _ _) _ _ _) mp = (mp, [f])
+

 peggedToUnaryLine :: LineWithPeggedFactors -> LineWithFactors
 peggedToUnaryLine (LineWithPeggedFactors rank score fs) = LineWithFactors rank score (Prelude.map UnaryFactor fs)
@ -200,33 +204,69 @@ getFeatures mTokenizer bbdo (LineRecord inLine expLine outLine _ _) =
     extractFactorsFromTabbed mTokenizer bbdo "in" inLine,
     extractFactors mTokenizer bbdo "out" outLine]

+data FeatureAggregate = ExistentialFactorAggregate Double MetricValue Integer
+                        | NumericalValueAggregate [Double] [MetricValue] [Int] [MetricValue]
+                        | LengthAggregate [Double] [MetricValue] [Int]
+
+aggreggate :: FeatureAggregate -> FeatureAggregate -> FeatureAggregate
+aggreggate (ExistentialFactorAggregate r1 s1 c1) (ExistentialFactorAggregate r2 s2 c2) =
+  ExistentialFactorAggregate (r1 + r2) (s1 + s2) (c1 + c2)
+aggreggate (NumericalValueAggregate ranks1 scores1 lengths1 values1) (NumericalValueAggregate ranks2 scores2 lengths2 values2) =
+  NumericalValueAggregate (ranks1 ++ ranks2) (scores1 ++ scores2) (lengths1 ++ lengths2) (values1 ++ values2)
+aggreggate (NumericalValueAggregate ranks1 scores1 lengths1 _) (LengthAggregate ranks2 scores2 lengths2) =
+  LengthAggregate (ranks1 ++ ranks2) (scores1 ++ scores2) (lengths1 ++ lengths2)
+aggreggate (LengthAggregate ranks1 scores1 lengths1) (NumericalValueAggregate ranks2 scores2 lengths2 _) =
+  LengthAggregate (ranks1 ++ ranks2) (scores1 ++ scores2) (lengths1 ++ lengths2)
+aggreggate (LengthAggregate ranks1 scores1 lengths1) (LengthAggregate ranks2 scores2 lengths2) =
+  LengthAggregate (ranks1 ++ ranks2) (scores1 ++ scores2) (lengths1 ++ lengths2)
+aggreggate _ _ = error "Mismatched aggregates!"
+
+initAggregate :: RankedFactor -> (Featuroid, FeatureAggregate)
+initAggregate (RankedFactor (UnaryFactor (PeggedFactor namespace (NumericalFactor Nothing l))) r s)  =
+  (NumericalFeaturoid namespace, LengthAggregate [r] [s] [l])
+initAggregate (RankedFactor (UnaryFactor (PeggedFactor namespace (NumericalFactor (Just v) l))) r s) =
+  (NumericalFeaturoid namespace, NumericalValueAggregate [r] [s] [l] [v])
+initAggregate (RankedFactor (UnaryFactor (PeggedFactor namespace (SimpleExistentialFactor f))) r s) =
+  (UnaryFeaturoid (PeggedExistentialFactor namespace f), ExistentialFactorAggregate r s 1)
+initAggregate (RankedFactor (CartesianFactor pA pB) r s) =
+  (CartesianFeaturoid pA pB, ExistentialFactorAggregate r s 1)
+
+filterAggregateByFreq :: Integer -> (Maybe Integer) -> FeatureAggregate -> Bool
+filterAggregateByFreq minFreq Nothing (ExistentialFactorAggregate _ _ c) = c >= minFreq
+filterAggregateByFreq minFreq (Just total) (ExistentialFactorAggregate _ _ c) = c >= minFreq && total - c >= minFreq
+filterAggregateByFreq _ _ _ = True
+
 uScoresCounter :: Monad m => Integer -> ConduitT RankedFactor FeatureWithPValue (StateT Integer m) ()
-uScoresCounter minFreq = CC.map (\(RankedFactor feature r score) -> (feature, (r, score, 1)))
+uScoresCounter minFreq = CC.map initAggregate
                         .| gobbleAndDo countUScores
                         .| lowerFreqFiltre
                         .| pValueCalculator minFreq
  where countUScores l =
           M.toList
-           $ M.fromListWith (\(r1, s1, c1) (r2, s2, c2) -> ((r1 + r2), (s1 + s2), (c1 + c2))) l
-        lowerFreqFiltre = CC.filter (\(_, (_, _, c)) -> c >= minFreq)
+           $ M.fromListWith aggreggate l
+        lowerFreqFiltre = CC.filter (\(_, fAgg) -> filterAggregateByFreq minFreq Nothing fAgg)

-pValueCalculator :: Monad m => Integer -> ConduitT (Factor, (Double, MetricValue, Integer)) FeatureWithPValue (StateT Integer m) ()
+pValueCalculator :: Monad m => Integer -> ConduitT (Featuroid, FeatureAggregate) FeatureWithPValue (StateT Integer m) ()
 pValueCalculator minFreq = do
  firstVal <- await
  case firstVal of
-    Just i@(_, (_, _, c)) -> do
+    Just i@(_, fAgg) -> do
      total <- lift get
-      if total - c >= minFreq
+      if filterAggregateByFreq minFreq (Just total) fAgg
        then yield $ calculatePValue total i
        else return ()
-      CC.filter (\(_, (_, _, c)) -> total - c >= minFreq) .| CC.map (calculatePValue total)
+      CC.filter (\(_, fAgg) -> filterAggregateByFreq minFreq (Just total) fAgg) .| CC.map (calculatePValue total)
    Nothing -> return ()

-calculatePValue :: Integer -> (Factor, (Double, MetricValue, Integer)) -> FeatureWithPValue
-calculatePValue total (f, (r, s, c)) = FeatureWithPValue f
-                                                         (pvalue (r - minusR c) c (total - c))
-                                                         (s / (fromIntegral c))
-                                                         c
+calculatePValue :: Integer -> (Featuroid, FeatureAggregate) -> FeatureWithPValue
+calculatePValue _ (NumericalFeaturoid namespace, NumericalValueAggregate ranks scores _ values) =
+                kendallPValueFeature namespace DirectValue ranks scores values
+calculatePValue _ (NumericalFeaturoid namespace, LengthAggregate ranks scores lens) =
+                kendallPValueFeature namespace LengthOf ranks scores lens
+calculatePValue total (f, ExistentialFactorAggregate r s c) = FeatureWithPValue (featoroidToFeature f)
+                                                                                (pvalue (r - minusR c) c (total - c))
+                                                                                (s / (fromIntegral c))
+                                                                                c
  where minusR c = (c' * (c' + 1)) / 2.0
              where c' = fromIntegral c
        -- calulating p-value from Mann–Whitney U test
@ -237,6 +277,26 @@ calculatePValue total (f, (r, s, c)) = FeatureWithPValue f
                             sigma = sqrt $ n1' * n2' * (n1' + n2' + 1) / 12
                             z = (u - mean) / sigma
                         in cumulative (normalDistr 0.0 1.0) z
+        featoroidToFeature (UnaryFeaturoid fac) = UnaryFeature fac
+        featoroidToFeature (CartesianFeaturoid facA facB) = (CartesianFeature facA facB)
+
+
+kendallPValueFeature :: Ord a => FeatureNamespace -> NumericalType -> [Double] -> [MetricValue] -> [a] -> FeatureWithPValue
+kendallPValueFeature namespace ntype ranks scores values = FeatureWithPValue (NumericalFeature namespace ntype ndirection)
+                                                                      pv
+                                                                      ((sum selectedScores) / (fromIntegral selected))
+                                                                      (fromIntegral selected)
+                     where z = kendallZ (V.fromList $ Prelude.zip ranks values)
+                           pv = 2 * (cumulative (normalDistr 0.0 1.0) (- (abs z)))
+                           ndirection = if z > 0
+                                        then Small
+                                        else Big
+                           selected = (Prelude.length scores) `div` 4
+
+                           selectedScores = Prelude.take selected $ Prelude.map snd $ turner $ sortOn fst $ Prelude.zip values scores
+                           turner = case ndirection of
+                             Small -> id
+                             Big -> Prelude.reverse


 totalCounter :: Monad m => ConduitT a a (StateT Integer m) ()
--- a/test/Spec.hs
+++ b/test/Spec.hs
@ -40,6 +40,7 @@ import Data.List (sort)
 import qualified Test.HUnit as HU

 import qualified Data.IntSet as IS
+import qualified Data.Vector as V

 import Data.Conduit.SmartSource
 import Data.Conduit.Rank
@ -49,6 +50,10 @@ import Control.Monad.Trans.Resource
 import qualified Data.Conduit.List as CL
 import qualified Data.Conduit.Combinators as CC

+import Statistics.Distribution (cumulative)
+import Statistics.Distribution.Normal (normalDistr)
+import Data.Statistics.Kendall (kendall, kendallZ)
+
 informationRetrievalBookExample :: [(String, Int)]
 informationRetrievalBookExample = [("o", 2), ("o", 2), ("d", 2), ("x", 3), ("d", 3),
                                   ("x", 1), ("o", 1), ("x", 1), ( "x", 1), ("x", 1), ("x", 1),
@ -541,6 +546,14 @@ main = hspec $ do
                      (SimpleExistentialFactor (SimpleAtomicFactor (TextFactor "tests"))),
         PeggedFactor (FeatureTabbedNamespace "in" 3)
                      (NumericalFactor Nothing 5) ]
+  describe "Kendall's tau" $ do
+    it "tau" $ do
+      kendall (V.fromList $ Prelude.zip [12, 2, 1, 12, 2] [1, 4, 7, 1, 0]) `shouldBeAlmost` (-0.47140452079103173)
+    it "z" $ do
+      kendallZ (V.fromList $ Prelude.zip [12, 2, 1, 12, 2] [1, 4, 7, 1, 0]) `shouldBeAlmost` (-1.0742)
+    it "p-value" $ do
+      (2 * (cumulative (normalDistr 0.0 1.0) $ kendallZ (V.fromList $ Prelude.zip [12, 2, 1, 12, 2] [1, 4, 7, 1, 0]))) `shouldBeAlmost` 0.2827
+

 checkConduitPure conduit inList expList = do
  let outList = runConduitPure $ CC.yieldMany inList .| conduit .| CC.sinkList