import datetime from functools import partial from textwrap import dedent from typing import TYPE_CHECKING, Optional, Union import numpy as np from pandas._libs.tslibs import Timedelta import pandas._libs.window.aggregations as window_aggregations from pandas._typing import FrameOrSeries, TimedeltaConvertibleTypes from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, doc from pandas.core.dtypes.common import is_datetime64_ns_dtype import pandas.core.common as common from pandas.core.util.numba_ import maybe_use_numba from pandas.core.window.common import ( _doc_template, _shared_docs, flex_binary_moment, zsqrt, ) from pandas.core.window.indexers import ( BaseIndexer, ExponentialMovingWindowIndexer, GroupbyIndexer, ) from pandas.core.window.numba_ import generate_numba_groupby_ewma_func from pandas.core.window.rolling import BaseWindow, BaseWindowGroupby, dispatch if TYPE_CHECKING: from pandas import Series _bias_template = """ Parameters ---------- bias : bool, default False Use a standard estimation bias correction. *args, **kwargs Arguments and keyword arguments to be passed into func. """ def get_center_of_mass( comass: Optional[float], span: Optional[float], halflife: Optional[float], alpha: Optional[float], ) -> float: valid_count = common.count_not_none(comass, span, halflife, alpha) if valid_count > 1: raise ValueError("comass, span, halflife, and alpha are mutually exclusive") # Convert to center of mass; domain checks ensure 0 < alpha <= 1 if comass is not None: if comass < 0: raise ValueError("comass must satisfy: comass >= 0") elif span is not None: if span < 1: raise ValueError("span must satisfy: span >= 1") comass = (span - 1) / 2.0 elif halflife is not None: if halflife <= 0: raise ValueError("halflife must satisfy: halflife > 0") decay = 1 - np.exp(np.log(0.5) / halflife) comass = 1 / decay - 1 elif alpha is not None: if alpha <= 0 or alpha > 1: raise ValueError("alpha must satisfy: 0 < alpha <= 1") comass = (1.0 - alpha) / alpha else: raise ValueError("Must pass one of comass, span, halflife, or alpha") return float(comass) def wrap_result(obj: "Series", result: np.ndarray) -> "Series": """ Wrap a single 1D result. """ obj = obj._selected_obj return obj._constructor(result, obj.index, name=obj.name) class ExponentialMovingWindow(BaseWindow): r""" Provide exponential weighted (EW) functions. Available EW functions: ``mean()``, ``var()``, ``std()``, ``corr()``, ``cov()``. Exactly one parameter: ``com``, ``span``, ``halflife``, or ``alpha`` must be provided. Parameters ---------- com : float, optional Specify decay in terms of center of mass, :math:`\alpha = 1 / (1 + com)`, for :math:`com \geq 0`. span : float, optional Specify decay in terms of span, :math:`\alpha = 2 / (span + 1)`, for :math:`span \geq 1`. halflife : float, str, timedelta, optional Specify decay in terms of half-life, :math:`\alpha = 1 - \exp\left(-\ln(2) / halflife\right)`, for :math:`halflife > 0`. If ``times`` is specified, the time unit (str or timedelta) over which an observation decays to half its value. Only applicable to ``mean()`` and halflife value will not apply to the other functions. .. versionadded:: 1.1.0 alpha : float, optional Specify smoothing factor :math:`\alpha` directly, :math:`0 < \alpha \leq 1`. min_periods : int, default 0 Minimum number of observations in window required to have a value (otherwise result is NA). adjust : bool, default True Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings (viewing EWMA as a moving average). - When ``adjust=True`` (default), the EW function is calculated using weights :math:`w_i = (1 - \alpha)^i`. For example, the EW moving average of the series [:math:`x_0, x_1, ..., x_t`] would be: .. math:: y_t = \frac{x_t + (1 - \alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ... + (1 - \alpha)^t x_0}{1 + (1 - \alpha) + (1 - \alpha)^2 + ... + (1 - \alpha)^t} - When ``adjust=False``, the exponentially weighted function is calculated recursively: .. math:: \begin{split} y_0 &= x_0\\ y_t &= (1 - \alpha) y_{t-1} + \alpha x_t, \end{split} ignore_na : bool, default False Ignore missing values when calculating weights; specify ``True`` to reproduce pre-0.15.0 behavior. - When ``ignore_na=False`` (default), weights are based on absolute positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are :math:`(1-\alpha)^2` and :math:`1` if ``adjust=True``, and :math:`(1-\alpha)^2` and :math:`\alpha` if ``adjust=False``. - When ``ignore_na=True`` (reproducing pre-0.15.0 behavior), weights are based on relative positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are :math:`1-\alpha` and :math:`1` if ``adjust=True``, and :math:`1-\alpha` and :math:`\alpha` if ``adjust=False``. axis : {0, 1}, default 0 The axis to use. The value 0 identifies the rows, and 1 identifies the columns. times : str, np.ndarray, Series, default None .. versionadded:: 1.1.0 Times corresponding to the observations. Must be monotonically increasing and ``datetime64[ns]`` dtype. If str, the name of the column in the DataFrame representing the times. If 1-D array like, a sequence with the same shape as the observations. Only applicable to ``mean()``. Returns ------- DataFrame A Window sub-classed for the particular operation. See Also -------- rolling : Provides rolling window calculations. expanding : Provides expanding transformations. Notes ----- More details can be found at: :ref:`Exponentially weighted windows `. Examples -------- >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) >>> df B 0 0.0 1 1.0 2 2.0 3 NaN 4 4.0 >>> df.ewm(com=0.5).mean() B 0 0.000000 1 0.750000 2 1.615385 3 1.615385 4 3.670213 Specifying ``times`` with a timedelta ``halflife`` when computing mean. >>> times = ['2020-01-01', '2020-01-03', '2020-01-10', '2020-01-15', '2020-01-17'] >>> df.ewm(halflife='4 days', times=pd.DatetimeIndex(times)).mean() B 0 0.000000 1 0.585786 2 1.523889 3 1.523889 4 3.233686 """ _attributes = ["com", "min_periods", "adjust", "ignore_na", "axis"] def __init__( self, obj, com: Optional[float] = None, span: Optional[float] = None, halflife: Optional[Union[float, TimedeltaConvertibleTypes]] = None, alpha: Optional[float] = None, min_periods: int = 0, adjust: bool = True, ignore_na: bool = False, axis: int = 0, times: Optional[Union[str, np.ndarray, FrameOrSeries]] = None, **kwargs, ): self.obj = obj self.min_periods = max(int(min_periods), 1) self.adjust = adjust self.ignore_na = ignore_na self.axis = axis self.on = None self.center = False self.closed = None if times is not None: if isinstance(times, str): times = self._selected_obj[times] if not is_datetime64_ns_dtype(times): raise ValueError("times must be datetime64[ns] dtype.") if len(times) != len(obj): raise ValueError("times must be the same length as the object.") if not isinstance(halflife, (str, datetime.timedelta)): raise ValueError( "halflife must be a string or datetime.timedelta object" ) self.times = np.asarray(times.astype(np.int64)) self.halflife = Timedelta(halflife).value # Halflife is no longer applicable when calculating COM # But allow COM to still be calculated if the user passes other decay args if common.count_not_none(com, span, alpha) > 0: self.com = get_center_of_mass(com, span, None, alpha) else: self.com = 0.0 else: if halflife is not None and isinstance(halflife, (str, datetime.timedelta)): raise ValueError( "halflife can only be a timedelta convertible argument if " "times is not None." ) self.times = None self.halflife = None self.com = get_center_of_mass(com, span, halflife, alpha) @property def _constructor(self): return ExponentialMovingWindow def _get_window_indexer(self) -> BaseIndexer: """ Return an indexer class that will compute the window start and end bounds """ return ExponentialMovingWindowIndexer() _agg_see_also_doc = dedent( """ See Also -------- pandas.DataFrame.rolling.aggregate """ ) _agg_examples_doc = dedent( """ Examples -------- >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) >>> df A B C 0 1 4 7 1 2 5 8 2 3 6 9 >>> df.ewm(alpha=0.5).mean() A B C 0 1.000000 4.000000 7.000000 1 1.666667 4.666667 7.666667 2 2.428571 5.428571 8.428571 """ ) @doc( _shared_docs["aggregate"], see_also=_agg_see_also_doc, examples=_agg_examples_doc, klass="Series/Dataframe", axis="", ) def aggregate(self, func, *args, **kwargs): return super().aggregate(func, *args, **kwargs) agg = aggregate @Substitution(name="ewm", func_name="mean") @Appender(_doc_template) def mean(self, *args, **kwargs): """ Exponential weighted moving average. Parameters ---------- *args, **kwargs Arguments and keyword arguments to be passed into func. """ nv.validate_window_func("mean", args, kwargs) if self.times is not None: window_func = self._get_roll_func("ewma_time") window_func = partial( window_func, times=self.times, halflife=self.halflife, ) else: window_func = self._get_roll_func("ewma") window_func = partial( window_func, com=self.com, adjust=self.adjust, ignore_na=self.ignore_na, ) return self._apply(window_func) @Substitution(name="ewm", func_name="std") @Appender(_doc_template) @Appender(_bias_template) def std(self, bias: bool = False, *args, **kwargs): """ Exponential weighted moving stddev. """ nv.validate_window_func("std", args, kwargs) return zsqrt(self.var(bias=bias, **kwargs)) vol = std @Substitution(name="ewm", func_name="var") @Appender(_doc_template) @Appender(_bias_template) def var(self, bias: bool = False, *args, **kwargs): """ Exponential weighted moving variance. """ nv.validate_window_func("var", args, kwargs) window_func = self._get_roll_func("ewmcov") window_func = partial( window_func, com=self.com, adjust=self.adjust, ignore_na=self.ignore_na, bias=bias, ) def var_func(values, begin, end, min_periods): return window_func(values, begin, end, min_periods, values) return self._apply(var_func) @Substitution(name="ewm", func_name="cov") @Appender(_doc_template) def cov( self, other: Optional[Union[np.ndarray, FrameOrSeries]] = None, pairwise: Optional[bool] = None, bias: bool = False, **kwargs, ): """ Exponential weighted sample covariance. Parameters ---------- other : Series, DataFrame, or ndarray, optional If not supplied then will default to self and produce pairwise output. pairwise : bool, default None If False then only matching columns between self and other will be used and the output will be a DataFrame. If True then all pairwise combinations will be calculated and the output will be a MultiIndex DataFrame in the case of DataFrame inputs. In the case of missing elements, only complete pairwise observations will be used. bias : bool, default False Use a standard estimation bias correction. **kwargs Keyword arguments to be passed into func. """ if other is None: other = self._selected_obj # only default unset pairwise = True if pairwise is None else pairwise other = self._shallow_copy(other) def _get_cov(X, Y): X = self._shallow_copy(X) Y = self._shallow_copy(Y) cov = window_aggregations.ewmcov( X._prep_values(), np.array([0], dtype=np.int64), np.array([0], dtype=np.int64), self.min_periods, Y._prep_values(), self.com, self.adjust, self.ignore_na, bias, ) return wrap_result(X, cov) return flex_binary_moment( self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise) ) @Substitution(name="ewm", func_name="corr") @Appender(_doc_template) def corr( self, other: Optional[Union[np.ndarray, FrameOrSeries]] = None, pairwise: Optional[bool] = None, **kwargs, ): """ Exponential weighted sample correlation. Parameters ---------- other : Series, DataFrame, or ndarray, optional If not supplied then will default to self and produce pairwise output. pairwise : bool, default None If False then only matching columns between self and other will be used and the output will be a DataFrame. If True then all pairwise combinations will be calculated and the output will be a MultiIndex DataFrame in the case of DataFrame inputs. In the case of missing elements, only complete pairwise observations will be used. **kwargs Keyword arguments to be passed into func. """ if other is None: other = self._selected_obj # only default unset pairwise = True if pairwise is None else pairwise other = self._shallow_copy(other) def _get_corr(X, Y): X = self._shallow_copy(X) Y = self._shallow_copy(Y) def _cov(x, y): return window_aggregations.ewmcov( x, np.array([0], dtype=np.int64), np.array([0], dtype=np.int64), self.min_periods, y, self.com, self.adjust, self.ignore_na, 1, ) x_values = X._prep_values() y_values = Y._prep_values() with np.errstate(all="ignore"): cov = _cov(x_values, y_values) x_var = _cov(x_values, x_values) y_var = _cov(y_values, y_values) corr = cov / zsqrt(x_var * y_var) return wrap_result(X, corr) return flex_binary_moment( self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise) ) class ExponentialMovingWindowGroupby(BaseWindowGroupby, ExponentialMovingWindow): """ Provide an exponential moving window groupby implementation. """ def _get_window_indexer(self) -> GroupbyIndexer: """ Return an indexer class that will compute the window start and end bounds Returns ------- GroupbyIndexer """ window_indexer = GroupbyIndexer( groupby_indicies=self._groupby.indices, window_indexer=ExponentialMovingWindowIndexer, ) return window_indexer var = dispatch("var", bias=False) std = dispatch("std", bias=False) cov = dispatch("cov", other=None, pairwise=None, bias=False) corr = dispatch("corr", other=None, pairwise=None) def mean(self, engine=None, engine_kwargs=None): """ Parameters ---------- engine : str, default None * ``'cython'`` : Runs mean through C-extensions from cython. * ``'numba'`` : Runs mean through JIT compiled code from numba. Only available when ``raw`` is set to ``True``. * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` .. versionadded:: 1.2.0 engine_kwargs : dict, default None * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` and ``parallel`` dictionary keys. The values must either be ``True`` or ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is ``{'nopython': True, 'nogil': False, 'parallel': False}``. .. versionadded:: 1.2.0 Returns ------- Series or DataFrame Return type is determined by the caller. """ if maybe_use_numba(engine): groupby_ewma_func = generate_numba_groupby_ewma_func( engine_kwargs, self.com, self.adjust, self.ignore_na, ) return self._apply( groupby_ewma_func, numba_cache_key=(lambda x: x, "groupby_ewma"), ) elif engine in ("cython", None): if engine_kwargs is not None: raise ValueError("cython engine does not accept engine_kwargs") def f(x): x = self._shallow_copy(x, groupby=self._groupby) return x.mean() return self._groupby.apply(f) else: raise ValueError("engine must be either 'numba' or 'cython'")