from textwrap import dedent from typing import Any, Callable, Dict, Optional, Tuple, Union import numpy as np from pandas._typing import FrameOrSeries from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, doc from pandas.core.window.common import _doc_template, _shared_docs from pandas.core.window.indexers import BaseIndexer, ExpandingIndexer, GroupbyIndexer from pandas.core.window.rolling import BaseWindowGroupby, RollingAndExpandingMixin class Expanding(RollingAndExpandingMixin): """ Provide expanding transformations. Parameters ---------- min_periods : int, default 1 Minimum number of observations in window required to have a value (otherwise result is NA). center : bool, default False Set the labels at the center of the window. axis : int or str, default 0 Returns ------- a Window sub-classed for the particular operation See Also -------- rolling : Provides rolling window calculations. ewm : Provides exponential weighted functions. Notes ----- By default, the result is set to the right edge of the window. This can be changed to the center of the window by setting ``center=True``. Examples -------- >>> df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]}) >>> df B 0 0.0 1 1.0 2 2.0 3 NaN 4 4.0 >>> df.expanding(2).sum() B 0 NaN 1 1.0 2 3.0 3 3.0 4 7.0 """ _attributes = ["min_periods", "center", "axis"] def __init__(self, obj, min_periods=1, center=None, axis=0, **kwargs): super().__init__(obj=obj, min_periods=min_periods, center=center, axis=axis) @property def _constructor(self): return Expanding def _get_window_indexer(self) -> BaseIndexer: """ Return an indexer class that will compute the window start and end bounds """ return ExpandingIndexer() def _get_cov_corr_window( self, other: Optional[Union[np.ndarray, FrameOrSeries]] = None, **kwargs ) -> int: """ Get the window length over which to perform cov and corr operations. Parameters ---------- other : object, default None The other object that is involved in the operation. Such an object is involved for operations like covariance. Returns ------- window : int The window length. """ axis = self.obj._get_axis(self.axis) length = len(axis) + (other is not None) * len(axis) other = self.min_periods or -1 return max(length, other) _agg_see_also_doc = dedent( """ See Also -------- pandas.DataFrame.aggregate : Similar DataFrame method. pandas.Series.aggregate : Similar Series method. """ ) _agg_examples_doc = dedent( """ Examples -------- >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) >>> df A B C 0 1 4 7 1 2 5 8 2 3 6 9 >>> df.ewm(alpha=0.5).mean() A B C 0 1.000000 4.000000 7.000000 1 1.666667 4.666667 7.666667 2 2.428571 5.428571 8.428571 """ ) @doc( _shared_docs["aggregate"], see_also=_agg_see_also_doc, examples=_agg_examples_doc, klass="Series/Dataframe", axis="", ) def aggregate(self, func, *args, **kwargs): return super().aggregate(func, *args, **kwargs) agg = aggregate @Substitution(name="expanding") @Appender(_shared_docs["count"]) def count(self): return super().count() @Substitution(name="expanding") @Appender(_shared_docs["apply"]) def apply( self, func: Callable[..., Any], raw: bool = False, engine: Optional[str] = None, engine_kwargs: Optional[Dict[str, bool]] = None, args: Optional[Tuple[Any, ...]] = None, kwargs: Optional[Dict[str, Any]] = None, ): return super().apply( func, raw=raw, engine=engine, engine_kwargs=engine_kwargs, args=args, kwargs=kwargs, ) @Substitution(name="expanding") @Appender(_shared_docs["sum"]) def sum(self, *args, **kwargs): nv.validate_expanding_func("sum", args, kwargs) return super().sum(*args, **kwargs) @Substitution(name="expanding", func_name="max") @Appender(_doc_template) @Appender(_shared_docs["max"]) def max(self, *args, **kwargs): nv.validate_expanding_func("max", args, kwargs) return super().max(*args, **kwargs) @Substitution(name="expanding") @Appender(_shared_docs["min"]) def min(self, *args, **kwargs): nv.validate_expanding_func("min", args, kwargs) return super().min(*args, **kwargs) @Substitution(name="expanding") @Appender(_shared_docs["mean"]) def mean(self, *args, **kwargs): nv.validate_expanding_func("mean", args, kwargs) return super().mean(*args, **kwargs) @Substitution(name="expanding") @Appender(_shared_docs["median"]) def median(self, **kwargs): return super().median(**kwargs) @Substitution(name="expanding", versionadded="") @Appender(_shared_docs["std"]) def std(self, ddof: int = 1, *args, **kwargs): nv.validate_expanding_func("std", args, kwargs) return super().std(ddof=ddof, **kwargs) @Substitution(name="expanding", versionadded="") @Appender(_shared_docs["var"]) def var(self, ddof: int = 1, *args, **kwargs): nv.validate_expanding_func("var", args, kwargs) return super().var(ddof=ddof, **kwargs) @Substitution(name="expanding") @Appender(_shared_docs["sem"]) def sem(self, ddof: int = 1, *args, **kwargs): return super().sem(ddof=ddof, **kwargs) @Substitution(name="expanding", func_name="skew") @Appender(_doc_template) @Appender(_shared_docs["skew"]) def skew(self, **kwargs): return super().skew(**kwargs) _agg_doc = dedent( """ Examples -------- The example below will show an expanding calculation with a window size of four matching the equivalent function call using `scipy.stats`. >>> arr = [1, 2, 3, 4, 999] >>> import scipy.stats >>> print(f"{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}") -1.200000 >>> print(f"{scipy.stats.kurtosis(arr, bias=False):.6f}") 4.999874 >>> s = pd.Series(arr) >>> s.expanding(4).kurt() 0 NaN 1 NaN 2 NaN 3 -1.200000 4 4.999874 dtype: float64 """ ) @Appender(_agg_doc) @Substitution(name="expanding") @Appender(_shared_docs["kurt"]) def kurt(self, **kwargs): return super().kurt(**kwargs) @Substitution(name="expanding") @Appender(_shared_docs["quantile"]) def quantile(self, quantile, interpolation="linear", **kwargs): return super().quantile( quantile=quantile, interpolation=interpolation, **kwargs ) @Substitution(name="expanding", func_name="cov") @Appender(_doc_template) @Appender(_shared_docs["cov"]) def cov( self, other: Optional[Union[np.ndarray, FrameOrSeries]] = None, pairwise: Optional[bool] = None, ddof: int = 1, **kwargs, ): return super().cov(other=other, pairwise=pairwise, ddof=ddof, **kwargs) @Substitution(name="expanding") @Appender(_shared_docs["corr"]) def corr( self, other: Optional[Union[np.ndarray, FrameOrSeries]] = None, pairwise: Optional[bool] = None, **kwargs, ): return super().corr(other=other, pairwise=pairwise, **kwargs) class ExpandingGroupby(BaseWindowGroupby, Expanding): """ Provide a expanding groupby implementation. """ def _get_window_indexer(self) -> GroupbyIndexer: """ Return an indexer class that will compute the window start and end bounds Returns ------- GroupbyIndexer """ window_indexer = GroupbyIndexer( groupby_indicies=self._groupby.indices, window_indexer=ExpandingIndexer, ) return window_indexer