174 lines
5.1 KiB
Python
174 lines
5.1 KiB
Python
|
import math
|
||
|
|
||
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
import pandas.util._test_decorators as td
|
||
|
|
||
|
import pandas as pd
|
||
|
from pandas import Series, isna
|
||
|
import pandas._testing as tm
|
||
|
|
||
|
|
||
|
class TestSeriesCov:
|
||
|
def test_cov(self, datetime_series):
|
||
|
# full overlap
|
||
|
tm.assert_almost_equal(
|
||
|
datetime_series.cov(datetime_series), datetime_series.std() ** 2
|
||
|
)
|
||
|
|
||
|
# partial overlap
|
||
|
tm.assert_almost_equal(
|
||
|
datetime_series[:15].cov(datetime_series[5:]),
|
||
|
datetime_series[5:15].std() ** 2,
|
||
|
)
|
||
|
|
||
|
# No overlap
|
||
|
assert np.isnan(datetime_series[::2].cov(datetime_series[1::2]))
|
||
|
|
||
|
# all NA
|
||
|
cp = datetime_series[:10].copy()
|
||
|
cp[:] = np.nan
|
||
|
assert isna(cp.cov(cp))
|
||
|
|
||
|
# min_periods
|
||
|
assert isna(datetime_series[:15].cov(datetime_series[5:], min_periods=12))
|
||
|
|
||
|
ts1 = datetime_series[:15].reindex(datetime_series.index)
|
||
|
ts2 = datetime_series[5:].reindex(datetime_series.index)
|
||
|
assert isna(ts1.cov(ts2, min_periods=12))
|
||
|
|
||
|
@pytest.mark.parametrize("test_ddof", [None, 0, 1, 2, 3])
|
||
|
def test_cov_ddof(self, test_ddof):
|
||
|
# GH#34611
|
||
|
np_array1 = np.random.rand(10)
|
||
|
np_array2 = np.random.rand(10)
|
||
|
|
||
|
s1 = Series(np_array1)
|
||
|
s2 = Series(np_array2)
|
||
|
|
||
|
result = s1.cov(s2, ddof=test_ddof)
|
||
|
expected = np.cov(np_array1, np_array2, ddof=test_ddof)[0][1]
|
||
|
assert math.isclose(expected, result)
|
||
|
|
||
|
|
||
|
class TestSeriesCorr:
|
||
|
@td.skip_if_no_scipy
|
||
|
def test_corr(self, datetime_series):
|
||
|
import scipy.stats as stats
|
||
|
|
||
|
# full overlap
|
||
|
tm.assert_almost_equal(datetime_series.corr(datetime_series), 1)
|
||
|
|
||
|
# partial overlap
|
||
|
tm.assert_almost_equal(datetime_series[:15].corr(datetime_series[5:]), 1)
|
||
|
|
||
|
assert isna(datetime_series[:15].corr(datetime_series[5:], min_periods=12))
|
||
|
|
||
|
ts1 = datetime_series[:15].reindex(datetime_series.index)
|
||
|
ts2 = datetime_series[5:].reindex(datetime_series.index)
|
||
|
assert isna(ts1.corr(ts2, min_periods=12))
|
||
|
|
||
|
# No overlap
|
||
|
assert np.isnan(datetime_series[::2].corr(datetime_series[1::2]))
|
||
|
|
||
|
# all NA
|
||
|
cp = datetime_series[:10].copy()
|
||
|
cp[:] = np.nan
|
||
|
assert isna(cp.corr(cp))
|
||
|
|
||
|
A = tm.makeTimeSeries()
|
||
|
B = tm.makeTimeSeries()
|
||
|
result = A.corr(B)
|
||
|
expected, _ = stats.pearsonr(A, B)
|
||
|
tm.assert_almost_equal(result, expected)
|
||
|
|
||
|
@td.skip_if_no_scipy
|
||
|
def test_corr_rank(self):
|
||
|
import scipy.stats as stats
|
||
|
|
||
|
# kendall and spearman
|
||
|
A = tm.makeTimeSeries()
|
||
|
B = tm.makeTimeSeries()
|
||
|
A[-5:] = A[:5]
|
||
|
result = A.corr(B, method="kendall")
|
||
|
expected = stats.kendalltau(A, B)[0]
|
||
|
tm.assert_almost_equal(result, expected)
|
||
|
|
||
|
result = A.corr(B, method="spearman")
|
||
|
expected = stats.spearmanr(A, B)[0]
|
||
|
tm.assert_almost_equal(result, expected)
|
||
|
|
||
|
# results from R
|
||
|
A = Series(
|
||
|
[
|
||
|
-0.89926396,
|
||
|
0.94209606,
|
||
|
-1.03289164,
|
||
|
-0.95445587,
|
||
|
0.76910310,
|
||
|
-0.06430576,
|
||
|
-2.09704447,
|
||
|
0.40660407,
|
||
|
-0.89926396,
|
||
|
0.94209606,
|
||
|
]
|
||
|
)
|
||
|
B = Series(
|
||
|
[
|
||
|
-1.01270225,
|
||
|
-0.62210117,
|
||
|
-1.56895827,
|
||
|
0.59592943,
|
||
|
-0.01680292,
|
||
|
1.17258718,
|
||
|
-1.06009347,
|
||
|
-0.10222060,
|
||
|
-0.89076239,
|
||
|
0.89372375,
|
||
|
]
|
||
|
)
|
||
|
kexp = 0.4319297
|
||
|
sexp = 0.5853767
|
||
|
tm.assert_almost_equal(A.corr(B, method="kendall"), kexp)
|
||
|
tm.assert_almost_equal(A.corr(B, method="spearman"), sexp)
|
||
|
|
||
|
def test_corr_invalid_method(self):
|
||
|
# GH PR #22298
|
||
|
s1 = Series(np.random.randn(10))
|
||
|
s2 = Series(np.random.randn(10))
|
||
|
msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, "
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
s1.corr(s2, method="____")
|
||
|
|
||
|
def test_corr_callable_method(self, datetime_series):
|
||
|
# simple correlation example
|
||
|
# returns 1 if exact equality, 0 otherwise
|
||
|
my_corr = lambda a, b: 1.0 if (a == b).all() else 0.0
|
||
|
|
||
|
# simple example
|
||
|
s1 = Series([1, 2, 3, 4, 5])
|
||
|
s2 = Series([5, 4, 3, 2, 1])
|
||
|
expected = 0
|
||
|
tm.assert_almost_equal(s1.corr(s2, method=my_corr), expected)
|
||
|
|
||
|
# full overlap
|
||
|
tm.assert_almost_equal(
|
||
|
datetime_series.corr(datetime_series, method=my_corr), 1.0
|
||
|
)
|
||
|
|
||
|
# partial overlap
|
||
|
tm.assert_almost_equal(
|
||
|
datetime_series[:15].corr(datetime_series[5:], method=my_corr), 1.0
|
||
|
)
|
||
|
|
||
|
# No overlap
|
||
|
assert np.isnan(
|
||
|
datetime_series[::2].corr(datetime_series[1::2], method=my_corr)
|
||
|
)
|
||
|
|
||
|
# dataframe example
|
||
|
df = pd.DataFrame([s1, s2])
|
||
|
expected = pd.DataFrame([{0: 1.0, 1: 0}, {0: 0, 1: 1.0}])
|
||
|
tm.assert_almost_equal(df.transpose().corr(method=my_corr), expected)
|