from datetime import datetime, timedelta import numpy as np import pytest from pandas.errors import UnsupportedFunctionCall import pandas.util._test_decorators as td import pandas as pd from pandas import DataFrame, Index, Series import pandas._testing as tm from pandas.core.window import Rolling from pandas.tests.window.common import Base class TestRolling(Base): def setup_method(self, method): self._create_data() def test_doc_string(self): df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) df df.rolling(2).sum() df.rolling(2, min_periods=1).sum() @pytest.mark.parametrize("which", ["series", "frame"]) def test_constructor(self, which): # GH 12669 o = getattr(self, which) c = o.rolling # valid c(window=2) c(window=2, min_periods=1) c(window=2, min_periods=1, center=True) c(window=2, min_periods=1, center=False) # GH 13383 with pytest.raises(ValueError): c(0) c(-1) # not valid for w in [2.0, "foo", np.array([2])]: with pytest.raises(ValueError): c(window=w) with pytest.raises(ValueError): c(window=2, min_periods=w) with pytest.raises(ValueError): c(window=2, min_periods=1, center=w) @td.skip_if_no_scipy @pytest.mark.parametrize("which", ["series", "frame"]) def test_constructor_with_win_type(self, which): # GH 13383 o = getattr(self, which) c = o.rolling with pytest.raises(ValueError): c(-1, win_type="boxcar") @pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3)]) def test_constructor_with_timedelta_window(self, window): # GH 15440 n = 10 df = DataFrame( {"value": np.arange(n)}, index=pd.date_range("2015-12-24", periods=n, freq="D"), ) expected_data = np.append([0.0, 1.0], np.arange(3.0, 27.0, 3)) result = df.rolling(window=window).sum() expected = DataFrame( {"value": expected_data}, index=pd.date_range("2015-12-24", periods=n, freq="D"), ) tm.assert_frame_equal(result, expected) expected = df.rolling("3D").sum() tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3), "3D"]) def test_constructor_timedelta_window_and_minperiods(self, window, raw): # GH 15305 n = 10 df = DataFrame( {"value": np.arange(n)}, index=pd.date_range("2017-08-08", periods=n, freq="D"), ) expected = DataFrame( {"value": np.append([np.NaN, 1.0], np.arange(3.0, 27.0, 3))}, index=pd.date_range("2017-08-08", periods=n, freq="D"), ) result_roll_sum = df.rolling(window=window, min_periods=2).sum() result_roll_generic = df.rolling(window=window, min_periods=2).apply( sum, raw=raw ) tm.assert_frame_equal(result_roll_sum, expected) tm.assert_frame_equal(result_roll_generic, expected) @pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) def test_numpy_compat(self, method): # see gh-12811 r = Rolling(Series([2, 4, 6]), window=2) msg = "numpy operations are not valid with window objects" with pytest.raises(UnsupportedFunctionCall, match=msg): getattr(r, method)(1, 2, 3) with pytest.raises(UnsupportedFunctionCall, match=msg): getattr(r, method)(dtype=np.float64) def test_closed(self): df = DataFrame({"A": [0, 1, 2, 3, 4]}) # closed only allowed for datetimelike with pytest.raises(ValueError): df.rolling(window=3, closed="neither") @pytest.mark.parametrize("closed", ["neither", "left"]) def test_closed_empty(self, closed, arithmetic_win_operators): # GH 26005 func_name = arithmetic_win_operators ser = pd.Series( data=np.arange(5), index=pd.date_range("2000", periods=5, freq="2D") ) roll = ser.rolling("1D", closed=closed) result = getattr(roll, func_name)() expected = pd.Series([np.nan] * 5, index=ser.index) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("func", ["min", "max"]) def test_closed_one_entry(self, func): # GH24718 ser = pd.Series(data=[2], index=pd.date_range("2000", periods=1)) result = getattr(ser.rolling("10D", closed="left"), func)() tm.assert_series_equal(result, pd.Series([np.nan], index=ser.index)) @pytest.mark.parametrize("func", ["min", "max"]) def test_closed_one_entry_groupby(self, func): # GH24718 ser = pd.DataFrame( data={"A": [1, 1, 2], "B": [3, 2, 1]}, index=pd.date_range("2000", periods=3), ) result = getattr( ser.groupby("A", sort=False)["B"].rolling("10D", closed="left"), func )() exp_idx = pd.MultiIndex.from_arrays( arrays=[[1, 1, 2], ser.index], names=("A", None) ) expected = pd.Series(data=[np.nan, 3, np.nan], index=exp_idx, name="B") tm.assert_series_equal(result, expected) @pytest.mark.parametrize("input_dtype", ["int", "float"]) @pytest.mark.parametrize( "func,closed,expected", [ ("min", "right", [0.0, 0, 0, 1, 2, 3, 4, 5, 6, 7]), ("min", "both", [0.0, 0, 0, 0, 1, 2, 3, 4, 5, 6]), ("min", "neither", [np.nan, 0, 0, 1, 2, 3, 4, 5, 6, 7]), ("min", "left", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, 6]), ("max", "right", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ("max", "both", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ("max", "neither", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), ("max", "left", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), ], ) def test_closed_min_max_datetime(self, input_dtype, func, closed, expected): # see gh-21704 ser = pd.Series( data=np.arange(10).astype(input_dtype), index=pd.date_range("2000", periods=10), ) result = getattr(ser.rolling("3D", closed=closed), func)() expected = pd.Series(expected, index=ser.index) tm.assert_series_equal(result, expected) def test_closed_uneven(self): # see gh-21704 ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) # uneven ser = ser.drop(index=ser.index[[1, 5]]) result = ser.rolling("3D", closed="left").min() expected = pd.Series([np.nan, 0, 0, 2, 3, 4, 6, 6], index=ser.index) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "func,closed,expected", [ ("min", "right", [np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), ("min", "both", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, np.nan]), ("min", "neither", [np.nan, np.nan, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), ("min", "left", [np.nan, np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan]), ("max", "right", [np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan, np.nan]), ("max", "both", [np.nan, 1, 2, 3, 4, 5, 6, 6, 6, np.nan]), ("max", "neither", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, np.nan, np.nan]), ("max", "left", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan]), ], ) def test_closed_min_max_minp(self, func, closed, expected): # see gh-21704 ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) ser[ser.index[-3:]] = np.nan result = getattr(ser.rolling("3D", min_periods=2, closed=closed), func)() expected = pd.Series(expected, index=ser.index) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "closed,expected", [ ("right", [0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8]), ("both", [0, 0.5, 1, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), ("neither", [np.nan, 0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), ("left", [np.nan, 0, 0.5, 1, 2, 3, 4, 5, 6, 7]), ], ) def test_closed_median_quantile(self, closed, expected): # GH 26005 ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) roll = ser.rolling("3D", closed=closed) expected = pd.Series(expected, index=ser.index) result = roll.median() tm.assert_series_equal(result, expected) result = roll.quantile(0.5) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("roller", ["1s", 1]) def tests_empty_df_rolling(self, roller): # GH 15819 Verifies that datetime and integer rolling windows can be # applied to empty DataFrames expected = DataFrame() result = DataFrame().rolling(roller).sum() tm.assert_frame_equal(result, expected) # Verifies that datetime and integer rolling windows can be applied to # empty DataFrames with datetime index expected = DataFrame(index=pd.DatetimeIndex([])) result = DataFrame(index=pd.DatetimeIndex([])).rolling(roller).sum() tm.assert_frame_equal(result, expected) def test_empty_window_median_quantile(self): # GH 26005 expected = pd.Series([np.nan, np.nan, np.nan]) roll = pd.Series(np.arange(3)).rolling(0) result = roll.median() tm.assert_series_equal(result, expected) result = roll.quantile(0.1) tm.assert_series_equal(result, expected) def test_missing_minp_zero(self): # https://github.com/pandas-dev/pandas/pull/18921 # minp=0 x = pd.Series([np.nan]) result = x.rolling(1, min_periods=0).sum() expected = pd.Series([0.0]) tm.assert_series_equal(result, expected) # minp=1 result = x.rolling(1, min_periods=1).sum() expected = pd.Series([np.nan]) tm.assert_series_equal(result, expected) def test_missing_minp_zero_variable(self): # https://github.com/pandas-dev/pandas/pull/18921 x = pd.Series( [np.nan] * 4, index=pd.DatetimeIndex( ["2017-01-01", "2017-01-04", "2017-01-06", "2017-01-07"] ), ) result = x.rolling(pd.Timedelta("2d"), min_periods=0).sum() expected = pd.Series(0.0, index=x.index) tm.assert_series_equal(result, expected) def test_multi_index_names(self): # GH 16789, 16825 cols = pd.MultiIndex.from_product( [["A", "B"], ["C", "D", "E"]], names=["1", "2"] ) df = DataFrame(np.ones((10, 6)), columns=cols) result = df.rolling(3).cov() tm.assert_index_equal(result.columns, df.columns) assert result.index.names == [None, "1", "2"] @pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) def test_iter_raises(self, klass): # https://github.com/pandas-dev/pandas/issues/11704 # Iteration over a Window obj = klass([1, 2, 3, 4]) with pytest.raises(NotImplementedError): iter(obj.rolling(2)) def test_rolling_axis_sum(self, axis_frame): # see gh-23372. df = DataFrame(np.ones((10, 20))) axis = df._get_axis_number(axis_frame) if axis == 0: expected = DataFrame({i: [np.nan] * 2 + [3.0] * 8 for i in range(20)}) else: # axis == 1 expected = DataFrame([[np.nan] * 2 + [3.0] * 18] * 10) result = df.rolling(3, axis=axis_frame).sum() tm.assert_frame_equal(result, expected) def test_rolling_axis_count(self, axis_frame): # see gh-26055 df = DataFrame({"x": range(3), "y": range(3)}) axis = df._get_axis_number(axis_frame) if axis in [0, "index"]: expected = DataFrame({"x": [1.0, 2.0, 2.0], "y": [1.0, 2.0, 2.0]}) else: expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]}) result = df.rolling(2, axis=axis_frame, min_periods=0).count() tm.assert_frame_equal(result, expected) def test_readonly_array(self): # GH-27766 arr = np.array([1, 3, np.nan, 3, 5]) arr.setflags(write=False) result = pd.Series(arr).rolling(2).mean() expected = pd.Series([np.nan, 2, np.nan, np.nan, 4]) tm.assert_series_equal(result, expected) def test_rolling_datetime(self, axis_frame, tz_naive_fixture): # GH-28192 tz = tz_naive_fixture df = pd.DataFrame( { i: [1] * 2 for i in pd.date_range("2019-8-01", "2019-08-03", freq="D", tz=tz) } ) if axis_frame in [0, "index"]: result = df.T.rolling("2D", axis=axis_frame).sum().T else: result = df.rolling("2D", axis=axis_frame).sum() expected = pd.DataFrame( { **{ i: [1.0] * 2 for i in pd.date_range("2019-8-01", periods=1, freq="D", tz=tz) }, **{ i: [2.0] * 2 for i in pd.date_range("2019-8-02", "2019-8-03", freq="D", tz=tz) }, } ) tm.assert_frame_equal(result, expected) def test_rolling_window_as_string(): # see gh-22590 date_today = datetime.now() days = pd.date_range(date_today, date_today + timedelta(365), freq="D") npr = np.random.RandomState(seed=421) data = npr.randint(1, high=100, size=len(days)) df = DataFrame({"DateCol": days, "metric": data}) df.set_index("DateCol", inplace=True) result = df.rolling(window="21D", min_periods=2, closed="left")["metric"].agg("max") expData = ( [np.nan] * 2 + [88.0] * 16 + [97.0] * 9 + [98.0] + [99.0] * 21 + [95.0] * 16 + [93.0] * 5 + [89.0] * 5 + [96.0] * 21 + [94.0] * 14 + [90.0] * 13 + [88.0] * 2 + [90.0] * 9 + [96.0] * 21 + [95.0] * 6 + [91.0] + [87.0] * 6 + [92.0] * 21 + [83.0] * 2 + [86.0] * 10 + [87.0] * 5 + [98.0] * 21 + [97.0] * 14 + [93.0] * 7 + [87.0] * 4 + [86.0] * 4 + [95.0] * 21 + [85.0] * 14 + [83.0] * 2 + [76.0] * 5 + [81.0] * 2 + [98.0] * 21 + [95.0] * 14 + [91.0] * 7 + [86.0] + [93.0] * 3 + [95.0] * 20 ) expected = Series(expData, index=Index(days, name="DateCol"), name="metric") tm.assert_series_equal(result, expected) def test_min_periods1(): # GH#6795 df = pd.DataFrame([0, 1, 2, 1, 0], columns=["a"]) result = df["a"].rolling(3, center=True, min_periods=1).max() expected = pd.Series([1.0, 2.0, 2.0, 2.0, 1.0], name="a") tm.assert_series_equal(result, expected) @pytest.mark.parametrize("constructor", [Series, DataFrame]) def test_rolling_count_with_min_periods(constructor): # GH 26996 result = constructor(range(5)).rolling(3, min_periods=3).count() expected = constructor([np.nan, np.nan, 3.0, 3.0, 3.0]) tm.assert_equal(result, expected) @pytest.mark.parametrize("constructor", [Series, DataFrame]) def test_rolling_count_default_min_periods_with_null_values(constructor): # GH 26996 values = [1, 2, 3, np.nan, 4, 5, 6] expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0] result = constructor(values).rolling(3).count() expected = constructor(expected_counts) tm.assert_equal(result, expected)