817 lines
23 KiB
Python
817 lines
23 KiB
Python
from __future__ import annotations
|
|
|
|
from textwrap import dedent
|
|
from typing import (
|
|
TYPE_CHECKING,
|
|
Any,
|
|
Callable,
|
|
)
|
|
|
|
from pandas._typing import (
|
|
Axis,
|
|
QuantileInterpolation,
|
|
WindowingRankType,
|
|
)
|
|
|
|
if TYPE_CHECKING:
|
|
from pandas import DataFrame, Series
|
|
from pandas.core.generic import NDFrame
|
|
|
|
from pandas.util._decorators import doc
|
|
|
|
from pandas.core.indexers.objects import (
|
|
BaseIndexer,
|
|
ExpandingIndexer,
|
|
GroupbyIndexer,
|
|
)
|
|
from pandas.core.window.doc import (
|
|
_shared_docs,
|
|
create_section_header,
|
|
kwargs_numeric_only,
|
|
numba_notes,
|
|
template_header,
|
|
template_returns,
|
|
template_see_also,
|
|
window_agg_numba_parameters,
|
|
window_apply_parameters,
|
|
)
|
|
from pandas.core.window.rolling import (
|
|
BaseWindowGroupby,
|
|
RollingAndExpandingMixin,
|
|
)
|
|
|
|
|
|
class Expanding(RollingAndExpandingMixin):
|
|
"""
|
|
Provide expanding window calculations.
|
|
|
|
Parameters
|
|
----------
|
|
min_periods : int, default 1
|
|
Minimum number of observations in window required to have a value;
|
|
otherwise, result is ``np.nan``.
|
|
|
|
axis : int or str, default 0
|
|
If ``0`` or ``'index'``, roll across the rows.
|
|
|
|
If ``1`` or ``'columns'``, roll across the columns.
|
|
|
|
For `Series` this parameter is unused and defaults to 0.
|
|
|
|
method : str {'single', 'table'}, default 'single'
|
|
Execute the rolling operation per single column or row (``'single'``)
|
|
or over the entire object (``'table'``).
|
|
|
|
This argument is only implemented when specifying ``engine='numba'``
|
|
in the method call.
|
|
|
|
.. versionadded:: 1.3.0
|
|
|
|
Returns
|
|
-------
|
|
``Expanding`` subclass
|
|
|
|
See Also
|
|
--------
|
|
rolling : Provides rolling window calculations.
|
|
ewm : Provides exponential weighted functions.
|
|
|
|
Notes
|
|
-----
|
|
See :ref:`Windowing Operations <window.expanding>` for further usage details
|
|
and examples.
|
|
|
|
Examples
|
|
--------
|
|
>>> df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]})
|
|
>>> df
|
|
B
|
|
0 0.0
|
|
1 1.0
|
|
2 2.0
|
|
3 NaN
|
|
4 4.0
|
|
|
|
**min_periods**
|
|
|
|
Expanding sum with 1 vs 3 observations needed to calculate a value.
|
|
|
|
>>> df.expanding(1).sum()
|
|
B
|
|
0 0.0
|
|
1 1.0
|
|
2 3.0
|
|
3 3.0
|
|
4 7.0
|
|
>>> df.expanding(3).sum()
|
|
B
|
|
0 NaN
|
|
1 NaN
|
|
2 3.0
|
|
3 3.0
|
|
4 7.0
|
|
"""
|
|
|
|
_attributes: list[str] = ["min_periods", "axis", "method"]
|
|
|
|
def __init__(
|
|
self,
|
|
obj: NDFrame,
|
|
min_periods: int = 1,
|
|
axis: Axis = 0,
|
|
method: str = "single",
|
|
selection=None,
|
|
) -> None:
|
|
super().__init__(
|
|
obj=obj,
|
|
min_periods=min_periods,
|
|
axis=axis,
|
|
method=method,
|
|
selection=selection,
|
|
)
|
|
|
|
def _get_window_indexer(self) -> BaseIndexer:
|
|
"""
|
|
Return an indexer class that will compute the window start and end bounds
|
|
"""
|
|
return ExpandingIndexer()
|
|
|
|
@doc(
|
|
_shared_docs["aggregate"],
|
|
see_also=dedent(
|
|
"""
|
|
See Also
|
|
--------
|
|
pandas.DataFrame.aggregate : Similar DataFrame method.
|
|
pandas.Series.aggregate : Similar Series method.
|
|
"""
|
|
),
|
|
examples=dedent(
|
|
"""
|
|
Examples
|
|
--------
|
|
>>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
|
|
>>> df
|
|
A B C
|
|
0 1 4 7
|
|
1 2 5 8
|
|
2 3 6 9
|
|
|
|
>>> df.ewm(alpha=0.5).mean()
|
|
A B C
|
|
0 1.000000 4.000000 7.000000
|
|
1 1.666667 4.666667 7.666667
|
|
2 2.428571 5.428571 8.428571
|
|
"""
|
|
),
|
|
klass="Series/Dataframe",
|
|
axis="",
|
|
)
|
|
def aggregate(self, func, *args, **kwargs):
|
|
return super().aggregate(func, *args, **kwargs)
|
|
|
|
agg = aggregate
|
|
|
|
@doc(
|
|
template_header,
|
|
create_section_header("Returns"),
|
|
template_returns,
|
|
create_section_header("See Also"),
|
|
template_see_also[:-1],
|
|
window_method="expanding",
|
|
aggregation_description="count of non NaN observations",
|
|
agg_method="count",
|
|
)
|
|
def count(self, numeric_only: bool = False):
|
|
return super().count(numeric_only=numeric_only)
|
|
|
|
@doc(
|
|
template_header,
|
|
create_section_header("Parameters"),
|
|
window_apply_parameters,
|
|
create_section_header("Returns"),
|
|
template_returns,
|
|
create_section_header("See Also"),
|
|
template_see_also[:-1],
|
|
window_method="expanding",
|
|
aggregation_description="custom aggregation function",
|
|
agg_method="apply",
|
|
)
|
|
def apply(
|
|
self,
|
|
func: Callable[..., Any],
|
|
raw: bool = False,
|
|
engine: str | None = None,
|
|
engine_kwargs: dict[str, bool] | None = None,
|
|
args: tuple[Any, ...] | None = None,
|
|
kwargs: dict[str, Any] | None = None,
|
|
):
|
|
return super().apply(
|
|
func,
|
|
raw=raw,
|
|
engine=engine,
|
|
engine_kwargs=engine_kwargs,
|
|
args=args,
|
|
kwargs=kwargs,
|
|
)
|
|
|
|
@doc(
|
|
template_header,
|
|
create_section_header("Parameters"),
|
|
kwargs_numeric_only,
|
|
window_agg_numba_parameters(),
|
|
create_section_header("Returns"),
|
|
template_returns,
|
|
create_section_header("See Also"),
|
|
template_see_also,
|
|
create_section_header("Notes"),
|
|
numba_notes[:-1],
|
|
window_method="expanding",
|
|
aggregation_description="sum",
|
|
agg_method="sum",
|
|
)
|
|
def sum(
|
|
self,
|
|
numeric_only: bool = False,
|
|
engine: str | None = None,
|
|
engine_kwargs: dict[str, bool] | None = None,
|
|
):
|
|
return super().sum(
|
|
numeric_only=numeric_only,
|
|
engine=engine,
|
|
engine_kwargs=engine_kwargs,
|
|
)
|
|
|
|
@doc(
|
|
template_header,
|
|
create_section_header("Parameters"),
|
|
kwargs_numeric_only,
|
|
window_agg_numba_parameters(),
|
|
create_section_header("Returns"),
|
|
template_returns,
|
|
create_section_header("See Also"),
|
|
template_see_also,
|
|
create_section_header("Notes"),
|
|
numba_notes[:-1],
|
|
window_method="expanding",
|
|
aggregation_description="maximum",
|
|
agg_method="max",
|
|
)
|
|
def max(
|
|
self,
|
|
numeric_only: bool = False,
|
|
engine: str | None = None,
|
|
engine_kwargs: dict[str, bool] | None = None,
|
|
):
|
|
return super().max(
|
|
numeric_only=numeric_only,
|
|
engine=engine,
|
|
engine_kwargs=engine_kwargs,
|
|
)
|
|
|
|
@doc(
|
|
template_header,
|
|
create_section_header("Parameters"),
|
|
kwargs_numeric_only,
|
|
window_agg_numba_parameters(),
|
|
create_section_header("Returns"),
|
|
template_returns,
|
|
create_section_header("See Also"),
|
|
template_see_also,
|
|
create_section_header("Notes"),
|
|
numba_notes[:-1],
|
|
window_method="expanding",
|
|
aggregation_description="minimum",
|
|
agg_method="min",
|
|
)
|
|
def min(
|
|
self,
|
|
numeric_only: bool = False,
|
|
engine: str | None = None,
|
|
engine_kwargs: dict[str, bool] | None = None,
|
|
):
|
|
return super().min(
|
|
numeric_only=numeric_only,
|
|
engine=engine,
|
|
engine_kwargs=engine_kwargs,
|
|
)
|
|
|
|
@doc(
|
|
template_header,
|
|
create_section_header("Parameters"),
|
|
kwargs_numeric_only,
|
|
window_agg_numba_parameters(),
|
|
create_section_header("Returns"),
|
|
template_returns,
|
|
create_section_header("See Also"),
|
|
template_see_also,
|
|
create_section_header("Notes"),
|
|
numba_notes[:-1],
|
|
window_method="expanding",
|
|
aggregation_description="mean",
|
|
agg_method="mean",
|
|
)
|
|
def mean(
|
|
self,
|
|
numeric_only: bool = False,
|
|
engine: str | None = None,
|
|
engine_kwargs: dict[str, bool] | None = None,
|
|
):
|
|
return super().mean(
|
|
numeric_only=numeric_only,
|
|
engine=engine,
|
|
engine_kwargs=engine_kwargs,
|
|
)
|
|
|
|
@doc(
|
|
template_header,
|
|
create_section_header("Parameters"),
|
|
kwargs_numeric_only,
|
|
window_agg_numba_parameters(),
|
|
create_section_header("Returns"),
|
|
template_returns,
|
|
create_section_header("See Also"),
|
|
template_see_also,
|
|
create_section_header("Notes"),
|
|
numba_notes[:-1],
|
|
window_method="expanding",
|
|
aggregation_description="median",
|
|
agg_method="median",
|
|
)
|
|
def median(
|
|
self,
|
|
numeric_only: bool = False,
|
|
engine: str | None = None,
|
|
engine_kwargs: dict[str, bool] | None = None,
|
|
):
|
|
return super().median(
|
|
numeric_only=numeric_only,
|
|
engine=engine,
|
|
engine_kwargs=engine_kwargs,
|
|
)
|
|
|
|
@doc(
|
|
template_header,
|
|
create_section_header("Parameters"),
|
|
dedent(
|
|
"""
|
|
ddof : int, default 1
|
|
Delta Degrees of Freedom. The divisor used in calculations
|
|
is ``N - ddof``, where ``N`` represents the number of elements.\n
|
|
"""
|
|
).replace("\n", "", 1),
|
|
kwargs_numeric_only,
|
|
window_agg_numba_parameters("1.4"),
|
|
create_section_header("Returns"),
|
|
template_returns,
|
|
create_section_header("See Also"),
|
|
"numpy.std : Equivalent method for NumPy array.\n",
|
|
template_see_also,
|
|
create_section_header("Notes"),
|
|
dedent(
|
|
"""
|
|
The default ``ddof`` of 1 used in :meth:`Series.std` is different
|
|
than the default ``ddof`` of 0 in :func:`numpy.std`.
|
|
|
|
A minimum of one period is required for the rolling calculation.\n
|
|
"""
|
|
).replace("\n", "", 1),
|
|
create_section_header("Examples"),
|
|
dedent(
|
|
"""
|
|
>>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
|
|
|
|
>>> s.expanding(3).std()
|
|
0 NaN
|
|
1 NaN
|
|
2 0.577350
|
|
3 0.957427
|
|
4 0.894427
|
|
5 0.836660
|
|
6 0.786796
|
|
dtype: float64
|
|
"""
|
|
).replace("\n", "", 1),
|
|
window_method="expanding",
|
|
aggregation_description="standard deviation",
|
|
agg_method="std",
|
|
)
|
|
def std(
|
|
self,
|
|
ddof: int = 1,
|
|
numeric_only: bool = False,
|
|
engine: str | None = None,
|
|
engine_kwargs: dict[str, bool] | None = None,
|
|
):
|
|
return super().std(
|
|
ddof=ddof,
|
|
numeric_only=numeric_only,
|
|
engine=engine,
|
|
engine_kwargs=engine_kwargs,
|
|
)
|
|
|
|
@doc(
|
|
template_header,
|
|
create_section_header("Parameters"),
|
|
dedent(
|
|
"""
|
|
ddof : int, default 1
|
|
Delta Degrees of Freedom. The divisor used in calculations
|
|
is ``N - ddof``, where ``N`` represents the number of elements.\n
|
|
"""
|
|
).replace("\n", "", 1),
|
|
kwargs_numeric_only,
|
|
window_agg_numba_parameters("1.4"),
|
|
create_section_header("Returns"),
|
|
template_returns,
|
|
create_section_header("See Also"),
|
|
"numpy.var : Equivalent method for NumPy array.\n",
|
|
template_see_also,
|
|
create_section_header("Notes"),
|
|
dedent(
|
|
"""
|
|
The default ``ddof`` of 1 used in :meth:`Series.var` is different
|
|
than the default ``ddof`` of 0 in :func:`numpy.var`.
|
|
|
|
A minimum of one period is required for the rolling calculation.\n
|
|
"""
|
|
).replace("\n", "", 1),
|
|
create_section_header("Examples"),
|
|
dedent(
|
|
"""
|
|
>>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
|
|
|
|
>>> s.expanding(3).var()
|
|
0 NaN
|
|
1 NaN
|
|
2 0.333333
|
|
3 0.916667
|
|
4 0.800000
|
|
5 0.700000
|
|
6 0.619048
|
|
dtype: float64
|
|
"""
|
|
).replace("\n", "", 1),
|
|
window_method="expanding",
|
|
aggregation_description="variance",
|
|
agg_method="var",
|
|
)
|
|
def var(
|
|
self,
|
|
ddof: int = 1,
|
|
numeric_only: bool = False,
|
|
engine: str | None = None,
|
|
engine_kwargs: dict[str, bool] | None = None,
|
|
):
|
|
return super().var(
|
|
ddof=ddof,
|
|
numeric_only=numeric_only,
|
|
engine=engine,
|
|
engine_kwargs=engine_kwargs,
|
|
)
|
|
|
|
@doc(
|
|
template_header,
|
|
create_section_header("Parameters"),
|
|
dedent(
|
|
"""
|
|
ddof : int, default 1
|
|
Delta Degrees of Freedom. The divisor used in calculations
|
|
is ``N - ddof``, where ``N`` represents the number of elements.\n
|
|
"""
|
|
).replace("\n", "", 1),
|
|
kwargs_numeric_only,
|
|
create_section_header("Returns"),
|
|
template_returns,
|
|
create_section_header("See Also"),
|
|
template_see_also,
|
|
create_section_header("Notes"),
|
|
"A minimum of one period is required for the calculation.\n\n",
|
|
create_section_header("Examples"),
|
|
dedent(
|
|
"""
|
|
>>> s = pd.Series([0, 1, 2, 3])
|
|
|
|
>>> s.expanding().sem()
|
|
0 NaN
|
|
1 0.707107
|
|
2 0.707107
|
|
3 0.745356
|
|
dtype: float64
|
|
"""
|
|
).replace("\n", "", 1),
|
|
window_method="expanding",
|
|
aggregation_description="standard error of mean",
|
|
agg_method="sem",
|
|
)
|
|
def sem(self, ddof: int = 1, numeric_only: bool = False):
|
|
return super().sem(ddof=ddof, numeric_only=numeric_only)
|
|
|
|
@doc(
|
|
template_header,
|
|
create_section_header("Parameters"),
|
|
kwargs_numeric_only,
|
|
create_section_header("Returns"),
|
|
template_returns,
|
|
create_section_header("See Also"),
|
|
"scipy.stats.skew : Third moment of a probability density.\n",
|
|
template_see_also,
|
|
create_section_header("Notes"),
|
|
"A minimum of three periods is required for the rolling calculation.\n",
|
|
window_method="expanding",
|
|
aggregation_description="unbiased skewness",
|
|
agg_method="skew",
|
|
)
|
|
def skew(self, numeric_only: bool = False):
|
|
return super().skew(numeric_only=numeric_only)
|
|
|
|
@doc(
|
|
template_header,
|
|
create_section_header("Parameters"),
|
|
kwargs_numeric_only,
|
|
create_section_header("Returns"),
|
|
template_returns,
|
|
create_section_header("See Also"),
|
|
"scipy.stats.kurtosis : Reference SciPy method.\n",
|
|
template_see_also,
|
|
create_section_header("Notes"),
|
|
"A minimum of four periods is required for the calculation.\n\n",
|
|
create_section_header("Examples"),
|
|
dedent(
|
|
"""
|
|
The example below will show a rolling calculation with a window size of
|
|
four matching the equivalent function call using `scipy.stats`.
|
|
|
|
>>> arr = [1, 2, 3, 4, 999]
|
|
>>> import scipy.stats
|
|
>>> print(f"{{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}}")
|
|
-1.200000
|
|
>>> print(f"{{scipy.stats.kurtosis(arr, bias=False):.6f}}")
|
|
4.999874
|
|
>>> s = pd.Series(arr)
|
|
>>> s.expanding(4).kurt()
|
|
0 NaN
|
|
1 NaN
|
|
2 NaN
|
|
3 -1.200000
|
|
4 4.999874
|
|
dtype: float64
|
|
"""
|
|
).replace("\n", "", 1),
|
|
window_method="expanding",
|
|
aggregation_description="Fisher's definition of kurtosis without bias",
|
|
agg_method="kurt",
|
|
)
|
|
def kurt(self, numeric_only: bool = False):
|
|
return super().kurt(numeric_only=numeric_only)
|
|
|
|
@doc(
|
|
template_header,
|
|
create_section_header("Parameters"),
|
|
dedent(
|
|
"""
|
|
quantile : float
|
|
Quantile to compute. 0 <= quantile <= 1.
|
|
interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}}
|
|
This optional parameter specifies the interpolation method to use,
|
|
when the desired quantile lies between two data points `i` and `j`:
|
|
|
|
* linear: `i + (j - i) * fraction`, where `fraction` is the
|
|
fractional part of the index surrounded by `i` and `j`.
|
|
* lower: `i`.
|
|
* higher: `j`.
|
|
* nearest: `i` or `j` whichever is nearest.
|
|
* midpoint: (`i` + `j`) / 2.
|
|
"""
|
|
).replace("\n", "", 1),
|
|
kwargs_numeric_only,
|
|
create_section_header("Returns"),
|
|
template_returns,
|
|
create_section_header("See Also"),
|
|
template_see_also[:-1],
|
|
window_method="expanding",
|
|
aggregation_description="quantile",
|
|
agg_method="quantile",
|
|
)
|
|
def quantile(
|
|
self,
|
|
quantile: float,
|
|
interpolation: QuantileInterpolation = "linear",
|
|
numeric_only: bool = False,
|
|
):
|
|
return super().quantile(
|
|
quantile=quantile,
|
|
interpolation=interpolation,
|
|
numeric_only=numeric_only,
|
|
)
|
|
|
|
@doc(
|
|
template_header,
|
|
".. versionadded:: 1.4.0 \n\n",
|
|
create_section_header("Parameters"),
|
|
dedent(
|
|
"""
|
|
method : {{'average', 'min', 'max'}}, default 'average'
|
|
How to rank the group of records that have the same value (i.e. ties):
|
|
|
|
* average: average rank of the group
|
|
* min: lowest rank in the group
|
|
* max: highest rank in the group
|
|
|
|
ascending : bool, default True
|
|
Whether or not the elements should be ranked in ascending order.
|
|
pct : bool, default False
|
|
Whether or not to display the returned rankings in percentile
|
|
form.
|
|
"""
|
|
).replace("\n", "", 1),
|
|
kwargs_numeric_only,
|
|
create_section_header("Returns"),
|
|
template_returns,
|
|
create_section_header("See Also"),
|
|
template_see_also,
|
|
create_section_header("Examples"),
|
|
dedent(
|
|
"""
|
|
>>> s = pd.Series([1, 4, 2, 3, 5, 3])
|
|
>>> s.expanding().rank()
|
|
0 1.0
|
|
1 2.0
|
|
2 2.0
|
|
3 3.0
|
|
4 5.0
|
|
5 3.5
|
|
dtype: float64
|
|
|
|
>>> s.expanding().rank(method="max")
|
|
0 1.0
|
|
1 2.0
|
|
2 2.0
|
|
3 3.0
|
|
4 5.0
|
|
5 4.0
|
|
dtype: float64
|
|
|
|
>>> s.expanding().rank(method="min")
|
|
0 1.0
|
|
1 2.0
|
|
2 2.0
|
|
3 3.0
|
|
4 5.0
|
|
5 3.0
|
|
dtype: float64
|
|
"""
|
|
).replace("\n", "", 1),
|
|
window_method="expanding",
|
|
aggregation_description="rank",
|
|
agg_method="rank",
|
|
)
|
|
def rank(
|
|
self,
|
|
method: WindowingRankType = "average",
|
|
ascending: bool = True,
|
|
pct: bool = False,
|
|
numeric_only: bool = False,
|
|
):
|
|
return super().rank(
|
|
method=method,
|
|
ascending=ascending,
|
|
pct=pct,
|
|
numeric_only=numeric_only,
|
|
)
|
|
|
|
@doc(
|
|
template_header,
|
|
create_section_header("Parameters"),
|
|
dedent(
|
|
"""
|
|
other : Series or DataFrame, optional
|
|
If not supplied then will default to self and produce pairwise
|
|
output.
|
|
pairwise : bool, default None
|
|
If False then only matching columns between self and other will be
|
|
used and the output will be a DataFrame.
|
|
If True then all pairwise combinations will be calculated and the
|
|
output will be a MultiIndexed DataFrame in the case of DataFrame
|
|
inputs. In the case of missing elements, only complete pairwise
|
|
observations will be used.
|
|
ddof : int, default 1
|
|
Delta Degrees of Freedom. The divisor used in calculations
|
|
is ``N - ddof``, where ``N`` represents the number of elements.
|
|
"""
|
|
).replace("\n", "", 1),
|
|
kwargs_numeric_only,
|
|
create_section_header("Returns"),
|
|
template_returns,
|
|
create_section_header("See Also"),
|
|
template_see_also[:-1],
|
|
window_method="expanding",
|
|
aggregation_description="sample covariance",
|
|
agg_method="cov",
|
|
)
|
|
def cov(
|
|
self,
|
|
other: DataFrame | Series | None = None,
|
|
pairwise: bool | None = None,
|
|
ddof: int = 1,
|
|
numeric_only: bool = False,
|
|
):
|
|
return super().cov(
|
|
other=other,
|
|
pairwise=pairwise,
|
|
ddof=ddof,
|
|
numeric_only=numeric_only,
|
|
)
|
|
|
|
@doc(
|
|
template_header,
|
|
create_section_header("Parameters"),
|
|
dedent(
|
|
"""
|
|
other : Series or DataFrame, optional
|
|
If not supplied then will default to self and produce pairwise
|
|
output.
|
|
pairwise : bool, default None
|
|
If False then only matching columns between self and other will be
|
|
used and the output will be a DataFrame.
|
|
If True then all pairwise combinations will be calculated and the
|
|
output will be a MultiIndexed DataFrame in the case of DataFrame
|
|
inputs. In the case of missing elements, only complete pairwise
|
|
observations will be used.
|
|
"""
|
|
).replace("\n", "", 1),
|
|
kwargs_numeric_only,
|
|
create_section_header("Returns"),
|
|
template_returns,
|
|
create_section_header("See Also"),
|
|
dedent(
|
|
"""
|
|
cov : Similar method to calculate covariance.
|
|
numpy.corrcoef : NumPy Pearson's correlation calculation.
|
|
"""
|
|
).replace("\n", "", 1),
|
|
template_see_also,
|
|
create_section_header("Notes"),
|
|
dedent(
|
|
"""
|
|
This function uses Pearson's definition of correlation
|
|
(https://en.wikipedia.org/wiki/Pearson_correlation_coefficient).
|
|
|
|
When `other` is not specified, the output will be self correlation (e.g.
|
|
all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise`
|
|
set to `True`.
|
|
|
|
Function will return ``NaN`` for correlations of equal valued sequences;
|
|
this is the result of a 0/0 division error.
|
|
|
|
When `pairwise` is set to `False`, only matching columns between `self` and
|
|
`other` will be used.
|
|
|
|
When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame
|
|
with the original index on the first level, and the `other` DataFrame
|
|
columns on the second level.
|
|
|
|
In the case of missing elements, only complete pairwise observations
|
|
will be used.
|
|
"""
|
|
).replace("\n", "", 1),
|
|
window_method="expanding",
|
|
aggregation_description="correlation",
|
|
agg_method="corr",
|
|
)
|
|
def corr(
|
|
self,
|
|
other: DataFrame | Series | None = None,
|
|
pairwise: bool | None = None,
|
|
ddof: int = 1,
|
|
numeric_only: bool = False,
|
|
):
|
|
return super().corr(
|
|
other=other,
|
|
pairwise=pairwise,
|
|
ddof=ddof,
|
|
numeric_only=numeric_only,
|
|
)
|
|
|
|
|
|
class ExpandingGroupby(BaseWindowGroupby, Expanding):
|
|
"""
|
|
Provide a expanding groupby implementation.
|
|
"""
|
|
|
|
_attributes = Expanding._attributes + BaseWindowGroupby._attributes
|
|
|
|
def _get_window_indexer(self) -> GroupbyIndexer:
|
|
"""
|
|
Return an indexer class that will compute the window start and end bounds
|
|
|
|
Returns
|
|
-------
|
|
GroupbyIndexer
|
|
"""
|
|
window_indexer = GroupbyIndexer(
|
|
groupby_indices=self._grouper.indices,
|
|
window_indexer=ExpandingIndexer,
|
|
)
|
|
return window_indexer
|