270 lines
8.8 KiB
Python
270 lines
8.8 KiB
Python
from datetime import datetime, timedelta
|
|
|
|
import numpy as np
|
|
import pytest
|
|
|
|
import pandas as pd
|
|
from pandas import DataFrame, Series
|
|
import pandas._testing as tm
|
|
from pandas.core.groupby.groupby import DataError
|
|
from pandas.core.groupby.grouper import Grouper
|
|
from pandas.core.indexes.datetimes import date_range
|
|
from pandas.core.indexes.period import PeriodIndex, period_range
|
|
from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range
|
|
|
|
# a fixture value can be overridden by the test parameter value. Note that the
|
|
# value of the fixture can be overridden this way even if the test doesn't use
|
|
# it directly (doesn't mention it in the function prototype).
|
|
# see https://docs.pytest.org/en/latest/fixture.html#override-a-fixture-with-direct-test-parametrization # noqa
|
|
# in this module we override the fixture values defined in conftest.py
|
|
# tuples of '_index_factory,_series_name,_index_start,_index_end'
|
|
DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10))
|
|
PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10))
|
|
TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day")
|
|
|
|
all_ts = pytest.mark.parametrize(
|
|
"_index_factory,_series_name,_index_start,_index_end",
|
|
[DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE],
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def create_index(_index_factory):
|
|
def _create_index(*args, **kwargs):
|
|
""" return the _index_factory created using the args, kwargs """
|
|
return _index_factory(*args, **kwargs)
|
|
|
|
return _create_index
|
|
|
|
|
|
@pytest.mark.parametrize("freq", ["2D", "1H"])
|
|
@pytest.mark.parametrize(
|
|
"_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE]
|
|
)
|
|
def test_asfreq(series_and_frame, freq, create_index):
|
|
obj = series_and_frame
|
|
|
|
result = obj.resample(freq).asfreq()
|
|
new_index = create_index(obj.index[0], obj.index[-1], freq=freq)
|
|
expected = obj.reindex(new_index)
|
|
tm.assert_almost_equal(result, expected)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE]
|
|
)
|
|
def test_asfreq_fill_value(series, create_index):
|
|
# test for fill value during resampling, issue 3715
|
|
|
|
s = series
|
|
|
|
result = s.resample("1H").asfreq()
|
|
new_index = create_index(s.index[0], s.index[-1], freq="1H")
|
|
expected = s.reindex(new_index)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
frame = s.to_frame("value")
|
|
frame.iloc[1] = None
|
|
result = frame.resample("1H").asfreq(fill_value=4.0)
|
|
new_index = create_index(frame.index[0], frame.index[-1], freq="1H")
|
|
expected = frame.reindex(new_index, fill_value=4.0)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
@all_ts
|
|
def test_resample_interpolate(frame):
|
|
# # 12925
|
|
df = frame
|
|
tm.assert_frame_equal(
|
|
df.resample("1T").asfreq().interpolate(), df.resample("1T").interpolate()
|
|
)
|
|
|
|
|
|
def test_raises_on_non_datetimelike_index():
|
|
# this is a non datetimelike index
|
|
xp = DataFrame()
|
|
msg = (
|
|
"Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, "
|
|
"but got an instance of 'Index'"
|
|
)
|
|
with pytest.raises(TypeError, match=msg):
|
|
xp.resample("A").mean()
|
|
|
|
|
|
@all_ts
|
|
@pytest.mark.parametrize("freq", ["M", "D", "H"])
|
|
def test_resample_empty_series(freq, empty_series, resample_method):
|
|
# GH12771 & GH12868
|
|
|
|
if resample_method == "ohlc":
|
|
pytest.skip("need to test for ohlc from GH13083")
|
|
|
|
s = empty_series
|
|
result = getattr(s.resample(freq), resample_method)()
|
|
|
|
expected = s.copy()
|
|
if isinstance(s.index, PeriodIndex):
|
|
expected.index = s.index.asfreq(freq=freq)
|
|
else:
|
|
expected.index = s.index._shallow_copy(freq=freq)
|
|
tm.assert_index_equal(result.index, expected.index)
|
|
assert result.index.freq == expected.index.freq
|
|
tm.assert_series_equal(result, expected, check_dtype=False)
|
|
|
|
|
|
@all_ts
|
|
@pytest.mark.parametrize("freq", ["M", "D", "H"])
|
|
@pytest.mark.parametrize("resample_method", ["count", "size"])
|
|
def test_resample_count_empty_series(freq, empty_series, resample_method):
|
|
# GH28427
|
|
result = getattr(empty_series.resample(freq), resample_method)()
|
|
|
|
if isinstance(empty_series.index, PeriodIndex):
|
|
index = empty_series.index.asfreq(freq=freq)
|
|
else:
|
|
index = empty_series.index._shallow_copy(freq=freq)
|
|
expected = pd.Series([], dtype="int64", index=index, name=empty_series.name)
|
|
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
|
|
@all_ts
|
|
@pytest.mark.parametrize("freq", ["M", "D", "H"])
|
|
def test_resample_empty_dataframe(empty_frame, freq, resample_method):
|
|
# GH13212
|
|
df = empty_frame
|
|
# count retains dimensions too
|
|
result = getattr(df.resample(freq), resample_method)()
|
|
if resample_method != "size":
|
|
expected = df.copy()
|
|
else:
|
|
# GH14962
|
|
expected = Series([], dtype=object)
|
|
|
|
if isinstance(df.index, PeriodIndex):
|
|
expected.index = df.index.asfreq(freq=freq)
|
|
else:
|
|
expected.index = df.index._shallow_copy(freq=freq)
|
|
tm.assert_index_equal(result.index, expected.index)
|
|
assert result.index.freq == expected.index.freq
|
|
tm.assert_almost_equal(result, expected, check_dtype=False)
|
|
|
|
# test size for GH13212 (currently stays as df)
|
|
|
|
|
|
@all_ts
|
|
@pytest.mark.parametrize("freq", ["M", "D", "H"])
|
|
def test_resample_count_empty_dataframe(freq, empty_frame):
|
|
# GH28427
|
|
|
|
empty_frame = empty_frame.copy()
|
|
empty_frame["a"] = []
|
|
|
|
result = empty_frame.resample(freq).count()
|
|
|
|
if isinstance(empty_frame.index, PeriodIndex):
|
|
index = empty_frame.index.asfreq(freq=freq)
|
|
else:
|
|
index = empty_frame.index._shallow_copy(freq=freq)
|
|
expected = pd.DataFrame({"a": []}, dtype="int64", index=index)
|
|
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
@all_ts
|
|
@pytest.mark.parametrize("freq", ["M", "D", "H"])
|
|
def test_resample_size_empty_dataframe(freq, empty_frame):
|
|
# GH28427
|
|
|
|
empty_frame = empty_frame.copy()
|
|
empty_frame["a"] = []
|
|
|
|
result = empty_frame.resample(freq).size()
|
|
|
|
if isinstance(empty_frame.index, PeriodIndex):
|
|
index = empty_frame.index.asfreq(freq=freq)
|
|
else:
|
|
index = empty_frame.index._shallow_copy(freq=freq)
|
|
expected = pd.Series([], dtype="int64", index=index)
|
|
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
|
|
@pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0))
|
|
@pytest.mark.parametrize("dtype", [np.float, np.int, np.object, "datetime64[ns]"])
|
|
def test_resample_empty_dtypes(index, dtype, resample_method):
|
|
|
|
# Empty series were sometimes causing a segfault (for the functions
|
|
# with Cython bounds-checking disabled) or an IndexError. We just run
|
|
# them to ensure they no longer do. (GH #10228)
|
|
empty_series = Series([], index, dtype)
|
|
try:
|
|
getattr(empty_series.resample("d"), resample_method)()
|
|
except DataError:
|
|
# Ignore these since some combinations are invalid
|
|
# (ex: doing mean with dtype of np.object)
|
|
pass
|
|
|
|
|
|
@all_ts
|
|
@pytest.mark.parametrize("arg", ["mean", {"value": "mean"}, ["mean"]])
|
|
def test_resample_loffset_arg_type(frame, create_index, arg):
|
|
# GH 13218, 15002
|
|
df = frame
|
|
expected_means = [df.values[i : i + 2].mean() for i in range(0, len(df.values), 2)]
|
|
expected_index = create_index(df.index[0], periods=len(df.index) / 2, freq="2D")
|
|
|
|
# loffset coerces PeriodIndex to DateTimeIndex
|
|
if isinstance(expected_index, PeriodIndex):
|
|
expected_index = expected_index.to_timestamp()
|
|
|
|
expected_index += timedelta(hours=2)
|
|
expected = DataFrame({"value": expected_means}, index=expected_index)
|
|
|
|
result_agg = df.resample("2D", loffset="2H").agg(arg)
|
|
|
|
if isinstance(arg, list):
|
|
expected.columns = pd.MultiIndex.from_tuples([("value", "mean")])
|
|
|
|
# GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex
|
|
if isinstance(expected.index, TimedeltaIndex):
|
|
msg = "DataFrame are different"
|
|
with pytest.raises(AssertionError, match=msg):
|
|
tm.assert_frame_equal(result_agg, expected)
|
|
else:
|
|
tm.assert_frame_equal(result_agg, expected)
|
|
|
|
|
|
@all_ts
|
|
def test_apply_to_empty_series(empty_series):
|
|
# GH 14313
|
|
s = empty_series
|
|
for freq in ["M", "D", "H"]:
|
|
result = s.resample(freq).apply(lambda x: 1)
|
|
expected = s.resample(freq).apply(np.sum)
|
|
|
|
tm.assert_series_equal(result, expected, check_dtype=False)
|
|
|
|
|
|
@all_ts
|
|
def test_resampler_is_iterable(series):
|
|
# GH 15314
|
|
freq = "H"
|
|
tg = Grouper(freq=freq, convention="start")
|
|
grouped = series.groupby(tg)
|
|
resampled = series.resample(freq)
|
|
for (rk, rv), (gk, gv) in zip(resampled, grouped):
|
|
assert rk == gk
|
|
tm.assert_series_equal(rv, gv)
|
|
|
|
|
|
@all_ts
|
|
def test_resample_quantile(series):
|
|
# GH 15023
|
|
s = series
|
|
q = 0.75
|
|
freq = "H"
|
|
result = s.resample(freq).quantile(q)
|
|
expected = s.resample(freq).agg(lambda x: x.quantile(q)).rename(s.name)
|
|
tm.assert_series_equal(result, expected)
|