529 lines
18 KiB
Python
529 lines
18 KiB
Python
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
from pandas import DataFrame, Index, MultiIndex, Series, isna, notna
|
||
|
import pandas._testing as tm
|
||
|
|
||
|
|
||
|
def test_expanding_corr(series):
|
||
|
A = series.dropna()
|
||
|
B = (A + np.random.randn(len(A)))[:-5]
|
||
|
|
||
|
result = A.expanding().corr(B)
|
||
|
|
||
|
rolling_result = A.rolling(window=len(A), min_periods=1).corr(B)
|
||
|
|
||
|
tm.assert_almost_equal(rolling_result, result)
|
||
|
|
||
|
|
||
|
def test_expanding_count(series):
|
||
|
result = series.expanding(min_periods=0).count()
|
||
|
tm.assert_almost_equal(
|
||
|
result, series.rolling(window=len(series), min_periods=0).count()
|
||
|
)
|
||
|
|
||
|
|
||
|
def test_expanding_quantile(series):
|
||
|
result = series.expanding().quantile(0.5)
|
||
|
|
||
|
rolling_result = series.rolling(window=len(series), min_periods=1).quantile(0.5)
|
||
|
|
||
|
tm.assert_almost_equal(result, rolling_result)
|
||
|
|
||
|
|
||
|
def test_expanding_cov(series):
|
||
|
A = series
|
||
|
B = (A + np.random.randn(len(A)))[:-5]
|
||
|
|
||
|
result = A.expanding().cov(B)
|
||
|
|
||
|
rolling_result = A.rolling(window=len(A), min_periods=1).cov(B)
|
||
|
|
||
|
tm.assert_almost_equal(rolling_result, result)
|
||
|
|
||
|
|
||
|
def test_expanding_cov_pairwise(frame):
|
||
|
result = frame.expanding().cov()
|
||
|
|
||
|
rolling_result = frame.rolling(window=len(frame), min_periods=1).cov()
|
||
|
|
||
|
tm.assert_frame_equal(result, rolling_result)
|
||
|
|
||
|
|
||
|
def test_expanding_corr_pairwise(frame):
|
||
|
result = frame.expanding().corr()
|
||
|
|
||
|
rolling_result = frame.rolling(window=len(frame), min_periods=1).corr()
|
||
|
tm.assert_frame_equal(result, rolling_result)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"func,static_comp",
|
||
|
[("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)],
|
||
|
ids=["sum", "mean", "max", "min"],
|
||
|
)
|
||
|
def test_expanding_func(func, static_comp, frame_or_series):
|
||
|
data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10))
|
||
|
result = getattr(data.expanding(min_periods=1, axis=0), func)()
|
||
|
assert isinstance(result, frame_or_series)
|
||
|
|
||
|
if frame_or_series is Series:
|
||
|
tm.assert_almost_equal(result[10], static_comp(data[:11]))
|
||
|
else:
|
||
|
tm.assert_series_equal(
|
||
|
result.iloc[10], static_comp(data[:11]), check_names=False
|
||
|
)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"func,static_comp",
|
||
|
[("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)],
|
||
|
ids=["sum", "mean", "max", "min"],
|
||
|
)
|
||
|
def test_expanding_min_periods(func, static_comp):
|
||
|
ser = Series(np.random.randn(50))
|
||
|
|
||
|
result = getattr(ser.expanding(min_periods=30, axis=0), func)()
|
||
|
assert result[:29].isna().all()
|
||
|
tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50]))
|
||
|
|
||
|
# min_periods is working correctly
|
||
|
result = getattr(ser.expanding(min_periods=15, axis=0), func)()
|
||
|
assert isna(result.iloc[13])
|
||
|
assert notna(result.iloc[14])
|
||
|
|
||
|
ser2 = Series(np.random.randn(20))
|
||
|
result = getattr(ser2.expanding(min_periods=5, axis=0), func)()
|
||
|
assert isna(result[3])
|
||
|
assert notna(result[4])
|
||
|
|
||
|
# min_periods=0
|
||
|
result0 = getattr(ser.expanding(min_periods=0, axis=0), func)()
|
||
|
result1 = getattr(ser.expanding(min_periods=1, axis=0), func)()
|
||
|
tm.assert_almost_equal(result0, result1)
|
||
|
|
||
|
result = getattr(ser.expanding(min_periods=1, axis=0), func)()
|
||
|
tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50]))
|
||
|
|
||
|
|
||
|
def test_expanding_apply(engine_and_raw, frame_or_series):
|
||
|
engine, raw = engine_and_raw
|
||
|
data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10))
|
||
|
result = data.expanding(min_periods=1).apply(
|
||
|
lambda x: x.mean(), raw=raw, engine=engine
|
||
|
)
|
||
|
assert isinstance(result, frame_or_series)
|
||
|
|
||
|
if frame_or_series is Series:
|
||
|
tm.assert_almost_equal(result[9], np.mean(data[:11]))
|
||
|
else:
|
||
|
tm.assert_series_equal(result.iloc[9], np.mean(data[:11]), check_names=False)
|
||
|
|
||
|
|
||
|
def test_expanding_min_periods_apply(engine_and_raw):
|
||
|
engine, raw = engine_and_raw
|
||
|
ser = Series(np.random.randn(50))
|
||
|
|
||
|
result = ser.expanding(min_periods=30).apply(
|
||
|
lambda x: x.mean(), raw=raw, engine=engine
|
||
|
)
|
||
|
assert result[:29].isna().all()
|
||
|
tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50]))
|
||
|
|
||
|
# min_periods is working correctly
|
||
|
result = ser.expanding(min_periods=15).apply(
|
||
|
lambda x: x.mean(), raw=raw, engine=engine
|
||
|
)
|
||
|
assert isna(result.iloc[13])
|
||
|
assert notna(result.iloc[14])
|
||
|
|
||
|
ser2 = Series(np.random.randn(20))
|
||
|
result = ser2.expanding(min_periods=5).apply(
|
||
|
lambda x: x.mean(), raw=raw, engine=engine
|
||
|
)
|
||
|
assert isna(result[3])
|
||
|
assert notna(result[4])
|
||
|
|
||
|
# min_periods=0
|
||
|
result0 = ser.expanding(min_periods=0).apply(
|
||
|
lambda x: x.mean(), raw=raw, engine=engine
|
||
|
)
|
||
|
result1 = ser.expanding(min_periods=1).apply(
|
||
|
lambda x: x.mean(), raw=raw, engine=engine
|
||
|
)
|
||
|
tm.assert_almost_equal(result0, result1)
|
||
|
|
||
|
result = ser.expanding(min_periods=1).apply(
|
||
|
lambda x: x.mean(), raw=raw, engine=engine
|
||
|
)
|
||
|
tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50]))
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
|
||
|
@pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum])
|
||
|
def test_expanding_apply_consistency_sum_nans(consistency_data, min_periods, f):
|
||
|
x, is_constant, no_nans = consistency_data
|
||
|
|
||
|
if f is np.nansum and min_periods == 0:
|
||
|
pass
|
||
|
else:
|
||
|
expanding_f_result = x.expanding(min_periods=min_periods).sum()
|
||
|
expanding_apply_f_result = x.expanding(min_periods=min_periods).apply(
|
||
|
func=f, raw=True
|
||
|
)
|
||
|
tm.assert_equal(expanding_f_result, expanding_apply_f_result)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
|
||
|
@pytest.mark.parametrize("f", [lambda v: Series(v).sum(), np.nansum, np.sum])
|
||
|
def test_expanding_apply_consistency_sum_no_nans(consistency_data, min_periods, f):
|
||
|
|
||
|
x, is_constant, no_nans = consistency_data
|
||
|
|
||
|
if no_nans:
|
||
|
if f is np.nansum and min_periods == 0:
|
||
|
pass
|
||
|
else:
|
||
|
expanding_f_result = x.expanding(min_periods=min_periods).sum()
|
||
|
expanding_apply_f_result = x.expanding(min_periods=min_periods).apply(
|
||
|
func=f, raw=True
|
||
|
)
|
||
|
tm.assert_equal(expanding_f_result, expanding_apply_f_result)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
|
||
|
@pytest.mark.parametrize("ddof", [0, 1])
|
||
|
def test_moments_consistency_var(consistency_data, min_periods, ddof):
|
||
|
x, is_constant, no_nans = consistency_data
|
||
|
|
||
|
mean_x = x.expanding(min_periods=min_periods).mean()
|
||
|
var_x = x.expanding(min_periods=min_periods).var(ddof=ddof)
|
||
|
assert not (var_x < 0).any().any()
|
||
|
|
||
|
if ddof == 0:
|
||
|
# check that biased var(x) == mean(x^2) - mean(x)^2
|
||
|
mean_x2 = (x * x).expanding(min_periods=min_periods).mean()
|
||
|
tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x))
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
|
||
|
@pytest.mark.parametrize("ddof", [0, 1])
|
||
|
def test_moments_consistency_var_constant(consistency_data, min_periods, ddof):
|
||
|
x, is_constant, no_nans = consistency_data
|
||
|
|
||
|
if is_constant:
|
||
|
count_x = x.expanding(min_periods=min_periods).count()
|
||
|
var_x = x.expanding(min_periods=min_periods).var(ddof=ddof)
|
||
|
|
||
|
# check that variance of constant series is identically 0
|
||
|
assert not (var_x > 0).any().any()
|
||
|
expected = x * np.nan
|
||
|
expected[count_x >= max(min_periods, 1)] = 0.0
|
||
|
if ddof == 1:
|
||
|
expected[count_x < 2] = np.nan
|
||
|
tm.assert_equal(var_x, expected)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
|
||
|
@pytest.mark.parametrize("ddof", [0, 1])
|
||
|
def test_expanding_consistency_std(consistency_data, min_periods, ddof):
|
||
|
x, is_constant, no_nans = consistency_data
|
||
|
|
||
|
var_x = x.expanding(min_periods=min_periods).var(ddof=ddof)
|
||
|
std_x = x.expanding(min_periods=min_periods).std(ddof=ddof)
|
||
|
assert not (var_x < 0).any().any()
|
||
|
assert not (std_x < 0).any().any()
|
||
|
|
||
|
# check that var(x) == std(x)^2
|
||
|
tm.assert_equal(var_x, std_x * std_x)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
|
||
|
@pytest.mark.parametrize("ddof", [0, 1])
|
||
|
def test_expanding_consistency_cov(consistency_data, min_periods, ddof):
|
||
|
x, is_constant, no_nans = consistency_data
|
||
|
var_x = x.expanding(min_periods=min_periods).var(ddof=ddof)
|
||
|
assert not (var_x < 0).any().any()
|
||
|
|
||
|
cov_x_x = x.expanding(min_periods=min_periods).cov(x, ddof=ddof)
|
||
|
assert not (cov_x_x < 0).any().any()
|
||
|
|
||
|
# check that var(x) == cov(x, x)
|
||
|
tm.assert_equal(var_x, cov_x_x)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
|
||
|
@pytest.mark.parametrize("ddof", [0, 1])
|
||
|
def test_expanding_consistency_series_cov_corr(consistency_data, min_periods, ddof):
|
||
|
x, is_constant, no_nans = consistency_data
|
||
|
|
||
|
if isinstance(x, Series):
|
||
|
var_x_plus_y = (x + x).expanding(min_periods=min_periods).var(ddof=ddof)
|
||
|
var_x = x.expanding(min_periods=min_periods).var(ddof=ddof)
|
||
|
var_y = x.expanding(min_periods=min_periods).var(ddof=ddof)
|
||
|
cov_x_y = x.expanding(min_periods=min_periods).cov(x, ddof=ddof)
|
||
|
# check that cov(x, y) == (var(x+y) - var(x) -
|
||
|
# var(y)) / 2
|
||
|
tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y))
|
||
|
|
||
|
# check that corr(x, y) == cov(x, y) / (std(x) *
|
||
|
# std(y))
|
||
|
corr_x_y = x.expanding(min_periods=min_periods).corr(x)
|
||
|
std_x = x.expanding(min_periods=min_periods).std(ddof=ddof)
|
||
|
std_y = x.expanding(min_periods=min_periods).std(ddof=ddof)
|
||
|
tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y))
|
||
|
|
||
|
if ddof == 0:
|
||
|
# check that biased cov(x, y) == mean(x*y) -
|
||
|
# mean(x)*mean(y)
|
||
|
mean_x = x.expanding(min_periods=min_periods).mean()
|
||
|
mean_y = x.expanding(min_periods=min_periods).mean()
|
||
|
mean_x_times_y = (x * x).expanding(min_periods=min_periods).mean()
|
||
|
tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y))
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
|
||
|
def test_expanding_consistency_mean(consistency_data, min_periods):
|
||
|
x, is_constant, no_nans = consistency_data
|
||
|
|
||
|
result = x.expanding(min_periods=min_periods).mean()
|
||
|
expected = (
|
||
|
x.expanding(min_periods=min_periods).sum()
|
||
|
/ x.expanding(min_periods=min_periods).count()
|
||
|
)
|
||
|
tm.assert_equal(result, expected.astype("float64"))
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
|
||
|
def test_expanding_consistency_constant(consistency_data, min_periods):
|
||
|
x, is_constant, no_nans = consistency_data
|
||
|
|
||
|
if is_constant:
|
||
|
count_x = x.expanding().count()
|
||
|
mean_x = x.expanding(min_periods=min_periods).mean()
|
||
|
# check that correlation of a series with itself is either 1 or NaN
|
||
|
corr_x_x = x.expanding(min_periods=min_periods).corr(x)
|
||
|
|
||
|
exp = x.max() if isinstance(x, Series) else x.max().max()
|
||
|
|
||
|
# check mean of constant series
|
||
|
expected = x * np.nan
|
||
|
expected[count_x >= max(min_periods, 1)] = exp
|
||
|
tm.assert_equal(mean_x, expected)
|
||
|
|
||
|
# check correlation of constant series with itself is NaN
|
||
|
expected[:] = np.nan
|
||
|
tm.assert_equal(corr_x_x, expected)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4])
|
||
|
def test_expanding_consistency_var_debiasing_factors(consistency_data, min_periods):
|
||
|
x, is_constant, no_nans = consistency_data
|
||
|
|
||
|
# check variance debiasing factors
|
||
|
var_unbiased_x = x.expanding(min_periods=min_periods).var()
|
||
|
var_biased_x = x.expanding(min_periods=min_periods).var(ddof=0)
|
||
|
var_debiasing_factors_x = x.expanding().count() / (
|
||
|
x.expanding().count() - 1.0
|
||
|
).replace(0.0, np.nan)
|
||
|
tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"f",
|
||
|
[
|
||
|
lambda x: (x.expanding(min_periods=5).cov(x, pairwise=True)),
|
||
|
lambda x: (x.expanding(min_periods=5).corr(x, pairwise=True)),
|
||
|
],
|
||
|
)
|
||
|
def test_moment_functions_zero_length_pairwise(f):
|
||
|
|
||
|
df1 = DataFrame()
|
||
|
df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar"))
|
||
|
df2["a"] = df2["a"].astype("float64")
|
||
|
|
||
|
df1_expected = DataFrame(
|
||
|
index=MultiIndex.from_product([df1.index, df1.columns]), columns=Index([])
|
||
|
)
|
||
|
df2_expected = DataFrame(
|
||
|
index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]),
|
||
|
columns=Index(["a"], name="foo"),
|
||
|
dtype="float64",
|
||
|
)
|
||
|
|
||
|
df1_result = f(df1)
|
||
|
tm.assert_frame_equal(df1_result, df1_expected)
|
||
|
|
||
|
df2_result = f(df2)
|
||
|
tm.assert_frame_equal(df2_result, df2_expected)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"f",
|
||
|
[
|
||
|
lambda x: x.expanding().count(),
|
||
|
lambda x: x.expanding(min_periods=5).cov(x, pairwise=False),
|
||
|
lambda x: x.expanding(min_periods=5).corr(x, pairwise=False),
|
||
|
lambda x: x.expanding(min_periods=5).max(),
|
||
|
lambda x: x.expanding(min_periods=5).min(),
|
||
|
lambda x: x.expanding(min_periods=5).sum(),
|
||
|
lambda x: x.expanding(min_periods=5).mean(),
|
||
|
lambda x: x.expanding(min_periods=5).std(),
|
||
|
lambda x: x.expanding(min_periods=5).var(),
|
||
|
lambda x: x.expanding(min_periods=5).skew(),
|
||
|
lambda x: x.expanding(min_periods=5).kurt(),
|
||
|
lambda x: x.expanding(min_periods=5).quantile(0.5),
|
||
|
lambda x: x.expanding(min_periods=5).median(),
|
||
|
lambda x: x.expanding(min_periods=5).apply(sum, raw=False),
|
||
|
lambda x: x.expanding(min_periods=5).apply(sum, raw=True),
|
||
|
],
|
||
|
)
|
||
|
def test_moment_functions_zero_length(f):
|
||
|
# GH 8056
|
||
|
s = Series(dtype=np.float64)
|
||
|
s_expected = s
|
||
|
df1 = DataFrame()
|
||
|
df1_expected = df1
|
||
|
df2 = DataFrame(columns=["a"])
|
||
|
df2["a"] = df2["a"].astype("float64")
|
||
|
df2_expected = df2
|
||
|
|
||
|
s_result = f(s)
|
||
|
tm.assert_series_equal(s_result, s_expected)
|
||
|
|
||
|
df1_result = f(df1)
|
||
|
tm.assert_frame_equal(df1_result, df1_expected)
|
||
|
|
||
|
df2_result = f(df2)
|
||
|
tm.assert_frame_equal(df2_result, df2_expected)
|
||
|
|
||
|
|
||
|
def test_expanding_apply_empty_series(engine_and_raw):
|
||
|
engine, raw = engine_and_raw
|
||
|
ser = Series([], dtype=np.float64)
|
||
|
tm.assert_series_equal(
|
||
|
ser, ser.expanding().apply(lambda x: x.mean(), raw=raw, engine=engine)
|
||
|
)
|
||
|
|
||
|
|
||
|
def test_expanding_apply_min_periods_0(engine_and_raw):
|
||
|
# GH 8080
|
||
|
engine, raw = engine_and_raw
|
||
|
s = Series([None, None, None])
|
||
|
result = s.expanding(min_periods=0).apply(lambda x: len(x), raw=raw, engine=engine)
|
||
|
expected = Series([1.0, 2.0, 3.0])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_expanding_cov_diff_index():
|
||
|
# GH 7512
|
||
|
s1 = Series([1, 2, 3], index=[0, 1, 2])
|
||
|
s2 = Series([1, 3], index=[0, 2])
|
||
|
result = s1.expanding().cov(s2)
|
||
|
expected = Series([None, None, 2.0])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
s2a = Series([1, None, 3], index=[0, 1, 2])
|
||
|
result = s1.expanding().cov(s2a)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
s1 = Series([7, 8, 10], index=[0, 1, 3])
|
||
|
s2 = Series([7, 9, 10], index=[0, 2, 3])
|
||
|
result = s1.expanding().cov(s2)
|
||
|
expected = Series([None, None, None, 4.5])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_expanding_corr_diff_index():
|
||
|
# GH 7512
|
||
|
s1 = Series([1, 2, 3], index=[0, 1, 2])
|
||
|
s2 = Series([1, 3], index=[0, 2])
|
||
|
result = s1.expanding().corr(s2)
|
||
|
expected = Series([None, None, 1.0])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
s2a = Series([1, None, 3], index=[0, 1, 2])
|
||
|
result = s1.expanding().corr(s2a)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
s1 = Series([7, 8, 10], index=[0, 1, 3])
|
||
|
s2 = Series([7, 9, 10], index=[0, 2, 3])
|
||
|
result = s1.expanding().corr(s2)
|
||
|
expected = Series([None, None, None, 1.0])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_expanding_cov_pairwise_diff_length():
|
||
|
# GH 7512
|
||
|
df1 = DataFrame([[1, 5], [3, 2], [3, 9]], columns=Index(["A", "B"], name="foo"))
|
||
|
df1a = DataFrame(
|
||
|
[[1, 5], [3, 9]], index=[0, 2], columns=Index(["A", "B"], name="foo")
|
||
|
)
|
||
|
df2 = DataFrame(
|
||
|
[[5, 6], [None, None], [2, 1]], columns=Index(["X", "Y"], name="foo")
|
||
|
)
|
||
|
df2a = DataFrame(
|
||
|
[[5, 6], [2, 1]], index=[0, 2], columns=Index(["X", "Y"], name="foo")
|
||
|
)
|
||
|
# TODO: xref gh-15826
|
||
|
# .loc is not preserving the names
|
||
|
result1 = df1.expanding().cov(df2, pairwise=True).loc[2]
|
||
|
result2 = df1.expanding().cov(df2a, pairwise=True).loc[2]
|
||
|
result3 = df1a.expanding().cov(df2, pairwise=True).loc[2]
|
||
|
result4 = df1a.expanding().cov(df2a, pairwise=True).loc[2]
|
||
|
expected = DataFrame(
|
||
|
[[-3.0, -6.0], [-5.0, -10.0]],
|
||
|
columns=Index(["A", "B"], name="foo"),
|
||
|
index=Index(["X", "Y"], name="foo"),
|
||
|
)
|
||
|
tm.assert_frame_equal(result1, expected)
|
||
|
tm.assert_frame_equal(result2, expected)
|
||
|
tm.assert_frame_equal(result3, expected)
|
||
|
tm.assert_frame_equal(result4, expected)
|
||
|
|
||
|
|
||
|
def test_expanding_corr_pairwise_diff_length():
|
||
|
# GH 7512
|
||
|
df1 = DataFrame(
|
||
|
[[1, 2], [3, 2], [3, 4]], columns=["A", "B"], index=Index(range(3), name="bar")
|
||
|
)
|
||
|
df1a = DataFrame(
|
||
|
[[1, 2], [3, 4]], index=Index([0, 2], name="bar"), columns=["A", "B"]
|
||
|
)
|
||
|
df2 = DataFrame(
|
||
|
[[5, 6], [None, None], [2, 1]],
|
||
|
columns=["X", "Y"],
|
||
|
index=Index(range(3), name="bar"),
|
||
|
)
|
||
|
df2a = DataFrame(
|
||
|
[[5, 6], [2, 1]], index=Index([0, 2], name="bar"), columns=["X", "Y"]
|
||
|
)
|
||
|
result1 = df1.expanding().corr(df2, pairwise=True).loc[2]
|
||
|
result2 = df1.expanding().corr(df2a, pairwise=True).loc[2]
|
||
|
result3 = df1a.expanding().corr(df2, pairwise=True).loc[2]
|
||
|
result4 = df1a.expanding().corr(df2a, pairwise=True).loc[2]
|
||
|
expected = DataFrame(
|
||
|
[[-1.0, -1.0], [-1.0, -1.0]], columns=["A", "B"], index=Index(["X", "Y"])
|
||
|
)
|
||
|
tm.assert_frame_equal(result1, expected)
|
||
|
tm.assert_frame_equal(result2, expected)
|
||
|
tm.assert_frame_equal(result3, expected)
|
||
|
tm.assert_frame_equal(result4, expected)
|
||
|
|
||
|
|
||
|
def test_expanding_apply_args_kwargs(engine_and_raw):
|
||
|
def mean_w_arg(x, const):
|
||
|
return np.mean(x) + const
|
||
|
|
||
|
engine, raw = engine_and_raw
|
||
|
|
||
|
df = DataFrame(np.random.rand(20, 3))
|
||
|
|
||
|
expected = df.expanding().apply(np.mean, engine=engine, raw=raw) + 20.0
|
||
|
|
||
|
result = df.expanding().apply(mean_w_arg, engine=engine, raw=raw, args=(20,))
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.expanding().apply(mean_w_arg, raw=raw, kwargs={"const": 20})
|
||
|
tm.assert_frame_equal(result, expected)
|