143 lines
5.5 KiB
Python
143 lines
5.5 KiB
Python
|
"""
|
||
|
Test the consistency of the groupby API, both internally and with other pandas objects.
|
||
|
"""
|
||
|
|
||
|
import inspect
|
||
|
|
||
|
import pytest
|
||
|
|
||
|
from pandas import (
|
||
|
DataFrame,
|
||
|
Series,
|
||
|
)
|
||
|
from pandas.core.groupby.generic import (
|
||
|
DataFrameGroupBy,
|
||
|
SeriesGroupBy,
|
||
|
)
|
||
|
|
||
|
|
||
|
def test_frame_consistency(request, groupby_func):
|
||
|
# GH#48028
|
||
|
if groupby_func in ("first", "last"):
|
||
|
msg = "first and last are entirely different between frame and groupby"
|
||
|
request.node.add_marker(pytest.mark.xfail(reason=msg))
|
||
|
if groupby_func in ("cumcount",):
|
||
|
msg = "DataFrame has no such method"
|
||
|
request.node.add_marker(pytest.mark.xfail(reason=msg))
|
||
|
|
||
|
if groupby_func == "ngroup":
|
||
|
assert not hasattr(DataFrame, groupby_func)
|
||
|
return
|
||
|
|
||
|
frame_method = getattr(DataFrame, groupby_func)
|
||
|
gb_method = getattr(DataFrameGroupBy, groupby_func)
|
||
|
result = set(inspect.signature(gb_method).parameters)
|
||
|
if groupby_func == "size":
|
||
|
# "size" is a method on GroupBy but property on DataFrame:
|
||
|
expected = {"self"}
|
||
|
else:
|
||
|
expected = set(inspect.signature(frame_method).parameters)
|
||
|
|
||
|
# Exclude certain arguments from result and expected depending on the operation
|
||
|
# Some of these may be purposeful inconsistencies between the APIs
|
||
|
exclude_expected, exclude_result = set(), set()
|
||
|
if groupby_func in ("any", "all"):
|
||
|
exclude_expected = {"kwargs", "bool_only", "axis"}
|
||
|
elif groupby_func in ("count",):
|
||
|
exclude_expected = {"numeric_only", "axis"}
|
||
|
elif groupby_func in ("nunique",):
|
||
|
exclude_expected = {"axis"}
|
||
|
elif groupby_func in ("max", "min"):
|
||
|
exclude_expected = {"axis", "kwargs", "skipna"}
|
||
|
exclude_result = {"min_count", "engine", "engine_kwargs"}
|
||
|
elif groupby_func in ("mean", "std", "sum", "var"):
|
||
|
exclude_expected = {"axis", "kwargs", "skipna"}
|
||
|
exclude_result = {"engine", "engine_kwargs"}
|
||
|
elif groupby_func in ("median", "prod", "sem"):
|
||
|
exclude_expected = {"axis", "kwargs", "skipna"}
|
||
|
elif groupby_func in ("backfill", "bfill", "ffill", "pad"):
|
||
|
exclude_expected = {"downcast", "inplace", "axis"}
|
||
|
elif groupby_func in ("cummax", "cummin"):
|
||
|
exclude_expected = {"skipna", "args"}
|
||
|
exclude_result = {"numeric_only"}
|
||
|
elif groupby_func in ("cumprod", "cumsum"):
|
||
|
exclude_expected = {"skipna"}
|
||
|
elif groupby_func in ("pct_change",):
|
||
|
exclude_expected = {"kwargs"}
|
||
|
exclude_result = {"axis"}
|
||
|
elif groupby_func in ("rank",):
|
||
|
exclude_expected = {"numeric_only"}
|
||
|
elif groupby_func in ("quantile",):
|
||
|
exclude_expected = {"method", "axis"}
|
||
|
|
||
|
# Ensure excluded arguments are actually in the signatures
|
||
|
assert result & exclude_result == exclude_result
|
||
|
assert expected & exclude_expected == exclude_expected
|
||
|
|
||
|
result -= exclude_result
|
||
|
expected -= exclude_expected
|
||
|
assert result == expected
|
||
|
|
||
|
|
||
|
def test_series_consistency(request, groupby_func):
|
||
|
# GH#48028
|
||
|
if groupby_func in ("first", "last"):
|
||
|
msg = "first and last are entirely different between Series and groupby"
|
||
|
request.node.add_marker(pytest.mark.xfail(reason=msg))
|
||
|
if groupby_func in ("cumcount", "corrwith"):
|
||
|
msg = "Series has no such method"
|
||
|
request.node.add_marker(pytest.mark.xfail(reason=msg))
|
||
|
|
||
|
if groupby_func == "ngroup":
|
||
|
assert not hasattr(Series, groupby_func)
|
||
|
return
|
||
|
|
||
|
series_method = getattr(Series, groupby_func)
|
||
|
gb_method = getattr(SeriesGroupBy, groupby_func)
|
||
|
result = set(inspect.signature(gb_method).parameters)
|
||
|
if groupby_func == "size":
|
||
|
# "size" is a method on GroupBy but property on Series
|
||
|
expected = {"self"}
|
||
|
else:
|
||
|
expected = set(inspect.signature(series_method).parameters)
|
||
|
|
||
|
# Exclude certain arguments from result and expected depending on the operation
|
||
|
# Some of these may be purposeful inconsistencies between the APIs
|
||
|
exclude_expected, exclude_result = set(), set()
|
||
|
if groupby_func in ("any", "all"):
|
||
|
exclude_expected = {"kwargs", "bool_only", "axis"}
|
||
|
elif groupby_func in ("diff",):
|
||
|
exclude_result = {"axis"}
|
||
|
elif groupby_func in ("max", "min"):
|
||
|
exclude_expected = {"axis", "kwargs", "skipna"}
|
||
|
exclude_result = {"min_count", "engine", "engine_kwargs"}
|
||
|
elif groupby_func in ("mean", "std", "sum", "var"):
|
||
|
exclude_expected = {"axis", "kwargs", "skipna"}
|
||
|
exclude_result = {"engine", "engine_kwargs"}
|
||
|
elif groupby_func in ("median", "prod", "sem"):
|
||
|
exclude_expected = {"axis", "kwargs", "skipna"}
|
||
|
elif groupby_func in ("backfill", "bfill", "ffill", "pad"):
|
||
|
exclude_expected = {"downcast", "inplace", "axis"}
|
||
|
elif groupby_func in ("cummax", "cummin"):
|
||
|
exclude_expected = {"skipna", "args"}
|
||
|
exclude_result = {"numeric_only"}
|
||
|
elif groupby_func in ("cumprod", "cumsum"):
|
||
|
exclude_expected = {"skipna"}
|
||
|
elif groupby_func in ("pct_change",):
|
||
|
exclude_expected = {"kwargs"}
|
||
|
exclude_result = {"axis"}
|
||
|
elif groupby_func in ("rank",):
|
||
|
exclude_expected = {"numeric_only"}
|
||
|
elif groupby_func in ("idxmin", "idxmax"):
|
||
|
exclude_expected = {"args", "kwargs"}
|
||
|
elif groupby_func in ("quantile",):
|
||
|
exclude_result = {"numeric_only"}
|
||
|
|
||
|
# Ensure excluded arguments are actually in the signatures
|
||
|
assert result & exclude_result == exclude_result
|
||
|
assert expected & exclude_expected == exclude_expected
|
||
|
|
||
|
result -= exclude_result
|
||
|
expected -= exclude_expected
|
||
|
assert result == expected
|