Inzynierka/Lib/site-packages/pandas/tests/groupby/test_raises.py
2023-06-02 12:51:02 +02:00

634 lines
20 KiB
Python

# Only tests that raise an error and have no better location should go here.
# Tests for specific groupby methods should go in their respective
# test file.
import datetime
import numpy as np
import pytest
from pandas import (
Categorical,
DataFrame,
Grouper,
Series,
)
from pandas.tests.groupby import get_groupby_method_args
@pytest.fixture(
params=[
"a",
["a"],
["a", "b"],
Grouper(key="a"),
lambda x: x % 2,
[0, 0, 0, 1, 2, 2, 2, 3, 3],
np.array([0, 0, 0, 1, 2, 2, 2, 3, 3]),
dict(zip(range(9), [0, 0, 0, 1, 2, 2, 2, 3, 3])),
Series([1, 1, 1, 1, 1, 2, 2, 2, 2]),
[Series([1, 1, 1, 1, 1, 2, 2, 2, 2]), Series([3, 3, 4, 4, 4, 4, 4, 3, 3])],
]
)
def by(request):
return request.param
@pytest.fixture(params=[True, False])
def groupby_series(request):
return request.param
@pytest.mark.parametrize("how", ["method", "agg", "transform"])
def test_groupby_raises_string(how, by, groupby_series, groupby_func):
df = DataFrame(
{
"a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
"b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
"c": range(9),
"d": list("xyzwtyuio"),
}
)
args = get_groupby_method_args(groupby_func, df)
gb = df.groupby(by=by)
if groupby_series:
gb = gb["d"]
if groupby_func == "corrwith":
assert not hasattr(gb, "corrwith")
return
klass, msg = {
"all": (None, ""),
"any": (None, ""),
"bfill": (None, ""),
"corrwith": (TypeError, "Could not convert"),
"count": (None, ""),
"cumcount": (None, ""),
"cummax": (
(NotImplementedError, TypeError),
"(function|cummax) is not (implemented|supported) for (this|object) dtype",
),
"cummin": (
(NotImplementedError, TypeError),
"(function|cummin) is not (implemented|supported) for (this|object) dtype",
),
"cumprod": (
(NotImplementedError, TypeError),
"(function|cumprod) is not (implemented|supported) for (this|object) dtype",
),
"cumsum": (
(NotImplementedError, TypeError),
"(function|cumsum) is not (implemented|supported) for (this|object) dtype",
),
"diff": (TypeError, "unsupported operand type"),
"ffill": (None, ""),
"fillna": (None, ""),
"first": (None, ""),
"idxmax": (TypeError, "'argmax' not allowed for this dtype"),
"idxmin": (TypeError, "'argmin' not allowed for this dtype"),
"last": (None, ""),
"max": (None, ""),
"mean": (TypeError, "Could not convert xy?z?w?t?y?u?i?o? to numeric"),
"median": (TypeError, "could not convert string to float"),
"min": (None, ""),
"ngroup": (None, ""),
"nunique": (None, ""),
"pct_change": (TypeError, "unsupported operand type"),
"prod": (TypeError, "can't multiply sequence by non-int of type 'str'"),
"quantile": (TypeError, "cannot be performed against 'object' dtypes!"),
"rank": (None, ""),
"sem": (ValueError, "could not convert string to float"),
"shift": (None, ""),
"size": (None, ""),
"skew": (TypeError, "could not convert string to float"),
"std": (ValueError, "could not convert string to float"),
"sum": (None, ""),
"var": (TypeError, "could not convert string to float"),
}[groupby_func]
if klass is None:
if how == "method":
getattr(gb, groupby_func)(*args)
elif how == "agg":
gb.agg(groupby_func, *args)
else:
gb.transform(groupby_func, *args)
else:
with pytest.raises(klass, match=msg):
if how == "method":
getattr(gb, groupby_func)(*args)
elif how == "agg":
gb.agg(groupby_func, *args)
else:
gb.transform(groupby_func, *args)
@pytest.mark.parametrize("how", ["agg", "transform"])
def test_groupby_raises_string_udf(how, by, groupby_series):
df = DataFrame(
{
"a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
"b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
"c": range(9),
"d": list("xyzwtyuio"),
}
)
gb = df.groupby(by=by)
if groupby_series:
gb = gb["d"]
def func(x):
raise TypeError("Test error message")
with pytest.raises(TypeError, match="Test error message"):
getattr(gb, how)(func)
@pytest.mark.parametrize("how", ["agg", "transform"])
@pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean])
def test_groupby_raises_string_np(how, by, groupby_series, groupby_func_np):
# GH#50749
df = DataFrame(
{
"a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
"b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
"c": range(9),
"d": list("xyzwtyuio"),
}
)
gb = df.groupby(by=by)
if groupby_series:
gb = gb["d"]
klass, msg = {
np.sum: (None, ""),
np.mean: (TypeError, "Could not convert xy?z?w?t?y?u?i?o? to numeric"),
}[groupby_func_np]
if klass is None:
getattr(gb, how)(groupby_func_np)
else:
with pytest.raises(klass, match=msg):
getattr(gb, how)(groupby_func_np)
@pytest.mark.parametrize("how", ["method", "agg", "transform"])
def test_groupby_raises_datetime(how, by, groupby_series, groupby_func):
df = DataFrame(
{
"a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
"b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
"c": range(9),
"d": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000),
}
)
args = get_groupby_method_args(groupby_func, df)
gb = df.groupby(by=by)
if groupby_series:
gb = gb["d"]
if groupby_func == "corrwith":
assert not hasattr(gb, "corrwith")
return
klass, msg = {
"all": (None, ""),
"any": (None, ""),
"bfill": (None, ""),
"corrwith": (TypeError, "cannot perform __mul__ with this index type"),
"count": (None, ""),
"cumcount": (None, ""),
"cummax": (None, ""),
"cummin": (None, ""),
"cumprod": (TypeError, "datetime64 type does not support cumprod operations"),
"cumsum": (TypeError, "datetime64 type does not support cumsum operations"),
"diff": (None, ""),
"ffill": (None, ""),
"fillna": (None, ""),
"first": (None, ""),
"idxmax": (None, ""),
"idxmin": (None, ""),
"last": (None, ""),
"max": (None, ""),
"mean": (None, ""),
"median": (None, ""),
"min": (None, ""),
"ngroup": (None, ""),
"nunique": (None, ""),
"pct_change": (TypeError, "cannot perform __truediv__ with this index type"),
"prod": (TypeError, "datetime64 type does not support prod"),
"quantile": (None, ""),
"rank": (None, ""),
"sem": (None, ""),
"shift": (None, ""),
"size": (None, ""),
"skew": (TypeError, r"dtype datetime64\[ns\] does not support reduction"),
"std": (None, ""),
"sum": (TypeError, "datetime64 type does not support sum operations"),
"var": (None, ""),
}[groupby_func]
if klass is None:
if how == "method":
getattr(gb, groupby_func)(*args)
elif how == "agg":
gb.agg(groupby_func, *args)
else:
gb.transform(groupby_func, *args)
else:
with pytest.raises(klass, match=msg):
if how == "method":
getattr(gb, groupby_func)(*args)
elif how == "agg":
gb.agg(groupby_func, *args)
else:
gb.transform(groupby_func, *args)
@pytest.mark.parametrize("how", ["agg", "transform"])
def test_groupby_raises_datetime_udf(how, by, groupby_series):
df = DataFrame(
{
"a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
"b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
"c": range(9),
"d": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000),
}
)
gb = df.groupby(by=by)
if groupby_series:
gb = gb["d"]
def func(x):
raise TypeError("Test error message")
with pytest.raises(TypeError, match="Test error message"):
getattr(gb, how)(func)
@pytest.mark.parametrize("how", ["agg", "transform"])
@pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean])
def test_groupby_raises_datetime_np(how, by, groupby_series, groupby_func_np):
# GH#50749
df = DataFrame(
{
"a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
"b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
"c": range(9),
"d": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000),
}
)
gb = df.groupby(by=by)
if groupby_series:
gb = gb["d"]
klass, msg = {
np.sum: (TypeError, "datetime64 type does not support sum operations"),
np.mean: (None, ""),
}[groupby_func_np]
if klass is None:
getattr(gb, how)(groupby_func_np)
else:
with pytest.raises(klass, match=msg):
getattr(gb, how)(groupby_func_np)
@pytest.mark.parametrize("how", ["method", "agg", "transform"])
def test_groupby_raises_category(
how, by, groupby_series, groupby_func, using_copy_on_write
):
# GH#50749
df = DataFrame(
{
"a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
"b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
"c": range(9),
"d": Categorical(
["a", "a", "a", "a", "b", "b", "b", "b", "c"],
categories=["a", "b", "c", "d"],
ordered=True,
),
}
)
args = get_groupby_method_args(groupby_func, df)
gb = df.groupby(by=by)
if groupby_series:
gb = gb["d"]
if groupby_func == "corrwith":
assert not hasattr(gb, "corrwith")
return
klass, msg = {
"all": (None, ""),
"any": (None, ""),
"bfill": (None, ""),
"corrwith": (
TypeError,
r"unsupported operand type\(s\) for \*: 'Categorical' and 'int'",
),
"count": (None, ""),
"cumcount": (None, ""),
"cummax": (
(NotImplementedError, TypeError),
"(category type does not support cummax operations|"
+ "category dtype not supported|"
+ "cummax is not supported for category dtype)",
),
"cummin": (
(NotImplementedError, TypeError),
"(category type does not support cummin operations|"
+ "category dtype not supported|"
"cummin is not supported for category dtype)",
),
"cumprod": (
(NotImplementedError, TypeError),
"(category type does not support cumprod operations|"
+ "category dtype not supported|"
"cumprod is not supported for category dtype)",
),
"cumsum": (
(NotImplementedError, TypeError),
"(category type does not support cumsum operations|"
+ "category dtype not supported|"
"cumsum is not supported for category dtype)",
),
"diff": (
TypeError,
r"unsupported operand type\(s\) for -: 'Categorical' and 'Categorical'",
),
"ffill": (None, ""),
"fillna": (
TypeError,
r"Cannot setitem on a Categorical with a new category \(0\), "
+ "set the categories first",
)
if not using_copy_on_write
else (None, ""), # no-op with CoW
"first": (None, ""),
"idxmax": (None, ""),
"idxmin": (None, ""),
"last": (None, ""),
"max": (None, ""),
"mean": (
TypeError,
"'Categorical' with dtype category does not support reduction 'mean'",
),
"median": (
TypeError,
"'Categorical' with dtype category does not support reduction 'median'",
),
"min": (None, ""),
"ngroup": (None, ""),
"nunique": (None, ""),
"pct_change": (
TypeError,
r"unsupported operand type\(s\) for /: 'Categorical' and 'Categorical'",
),
"prod": (TypeError, "category type does not support prod operations"),
"quantile": (TypeError, "No matching signature found"),
"rank": (None, ""),
"sem": (ValueError, "Cannot cast object dtype to float64"),
"shift": (None, ""),
"size": (None, ""),
"skew": (
TypeError,
"'Categorical' with dtype category does not support reduction 'skew'",
),
"std": (ValueError, "Cannot cast object dtype to float64"),
"sum": (TypeError, "category type does not support sum operations"),
"var": (
TypeError,
"'Categorical' with dtype category does not support reduction 'var'",
),
}[groupby_func]
if klass is None:
if how == "method":
getattr(gb, groupby_func)(*args)
elif how == "agg":
gb.agg(groupby_func, *args)
else:
gb.transform(groupby_func, *args)
else:
with pytest.raises(klass, match=msg):
if how == "method":
getattr(gb, groupby_func)(*args)
elif how == "agg":
gb.agg(groupby_func, *args)
else:
gb.transform(groupby_func, *args)
@pytest.mark.parametrize("how", ["agg", "transform"])
def test_groupby_raises_category_udf(how, by, groupby_series):
# GH#50749
df = DataFrame(
{
"a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
"b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
"c": range(9),
"d": Categorical(
["a", "a", "a", "a", "b", "b", "b", "b", "c"],
categories=["a", "b", "c", "d"],
ordered=True,
),
}
)
gb = df.groupby(by=by)
if groupby_series:
gb = gb["d"]
def func(x):
raise TypeError("Test error message")
with pytest.raises(TypeError, match="Test error message"):
getattr(gb, how)(func)
@pytest.mark.parametrize("how", ["agg", "transform"])
@pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean])
def test_groupby_raises_category_np(how, by, groupby_series, groupby_func_np):
# GH#50749
df = DataFrame(
{
"a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
"b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
"c": range(9),
"d": Categorical(
["a", "a", "a", "a", "b", "b", "b", "b", "c"],
categories=["a", "b", "c", "d"],
ordered=True,
),
}
)
gb = df.groupby(by=by)
if groupby_series:
gb = gb["d"]
klass, msg = {
np.sum: (TypeError, "category type does not support sum operations"),
np.mean: (
TypeError,
"'Categorical' with dtype category does not support reduction 'mean'",
),
}[groupby_func_np]
if klass is None:
getattr(gb, how)(groupby_func_np)
else:
with pytest.raises(klass, match=msg):
getattr(gb, how)(groupby_func_np)
@pytest.mark.parametrize("how", ["method", "agg", "transform"])
def test_groupby_raises_category_on_category(
how, by, groupby_series, groupby_func, observed, using_copy_on_write
):
# GH#50749
df = DataFrame(
{
"a": Categorical(
["a", "a", "a", "a", "b", "b", "b", "b", "c"],
categories=["a", "b", "c", "d"],
ordered=True,
),
"b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
"c": range(9),
"d": Categorical(
["a", "a", "a", "a", "b", "b", "c", "c", "c"],
categories=["a", "b", "c", "d"],
ordered=True,
),
}
)
args = get_groupby_method_args(groupby_func, df)
gb = df.groupby(by=by, observed=observed)
if groupby_series:
gb = gb["d"]
if groupby_func == "corrwith":
assert not hasattr(gb, "corrwith")
return
empty_groups = any(group.empty for group in gb.groups.values())
klass, msg = {
"all": (None, ""),
"any": (None, ""),
"bfill": (None, ""),
"corrwith": (
TypeError,
r"unsupported operand type\(s\) for \*: 'Categorical' and 'int'",
),
"count": (None, ""),
"cumcount": (None, ""),
"cummax": (
(NotImplementedError, TypeError),
"(cummax is not supported for category dtype|"
+ "category dtype not supported|"
+ "category type does not support cummax operations)",
),
"cummin": (
(NotImplementedError, TypeError),
"(cummin is not supported for category dtype|"
+ "category dtype not supported|"
"category type does not support cummin operations)",
),
"cumprod": (
(NotImplementedError, TypeError),
"(cumprod is not supported for category dtype|"
+ "category dtype not supported|"
"category type does not support cumprod operations)",
),
"cumsum": (
(NotImplementedError, TypeError),
"(cumsum is not supported for category dtype|"
+ "category dtype not supported|"
+ "category type does not support cumsum operations)",
),
"diff": (TypeError, "unsupported operand type"),
"ffill": (None, ""),
"fillna": (
TypeError,
r"Cannot setitem on a Categorical with a new category \(0\), "
+ "set the categories first",
)
if not using_copy_on_write
else (None, ""), # no-op with CoW
"first": (None, ""),
"idxmax": (ValueError, "attempt to get argmax of an empty sequence")
if empty_groups
else (None, ""),
"idxmin": (ValueError, "attempt to get argmin of an empty sequence")
if empty_groups
else (None, ""),
"last": (None, ""),
"max": (None, ""),
"mean": (
TypeError,
"'Categorical' with dtype category does not support reduction 'mean'",
),
"median": (
TypeError,
"'Categorical' with dtype category does not support reduction 'median'",
),
"min": (None, ""),
"ngroup": (None, ""),
"nunique": (None, ""),
"pct_change": (TypeError, "unsupported operand type"),
"prod": (TypeError, "category type does not support prod operations"),
"quantile": (TypeError, ""),
"rank": (None, ""),
"sem": (ValueError, "Cannot cast object dtype to float64"),
"shift": (None, ""),
"size": (None, ""),
"skew": (
TypeError,
"'Categorical' with dtype category does not support reduction 'skew'",
),
"std": (ValueError, "Cannot cast object dtype to float64"),
"sum": (TypeError, "category type does not support sum operations"),
"var": (
TypeError,
"'Categorical' with dtype category does not support reduction 'var'",
),
}[groupby_func]
if klass is None:
if how == "method":
getattr(gb, groupby_func)(*args)
elif how == "agg":
gb.agg(groupby_func, *args)
else:
gb.transform(groupby_func, *args)
else:
with pytest.raises(klass, match=msg):
if how == "method":
getattr(gb, groupby_func)(*args)
elif how == "agg":
gb.agg(groupby_func, *args)
else:
gb.transform(groupby_func, *args)
def test_subsetting_columns_axis_1_raises():
# GH 35443
df = DataFrame({"a": [1], "b": [2], "c": [3]})
gb = df.groupby("a", axis=1)
with pytest.raises(ValueError, match="Cannot subset columns when using axis=1"):
gb["b"]