projektAI/venv/Lib/site-packages/pandas/tests/groupby/aggregate/test_other.py

"""
test all other .agg behavior
"""

import datetime as dt
from functools import partial

import numpy as np
import pytest

import pandas as pd
from pandas import (
    DataFrame,
    Index,
    MultiIndex,
    PeriodIndex,
    Series,
    date_range,
    period_range,
)
import pandas._testing as tm
from pandas.core.base import SpecificationError

from pandas.io.formats.printing import pprint_thing


def test_agg_api():
    # GH 6337
    # https://stackoverflow.com/questions/21706030/pandas-groupby-agg-function-column-dtype-error
    # different api for agg when passed custom function with mixed frame

    df = DataFrame(
        {
            "data1": np.random.randn(5),
            "data2": np.random.randn(5),
            "key1": ["a", "a", "b", "b", "a"],
            "key2": ["one", "two", "one", "two", "one"],
        }
    )
    grouped = df.groupby("key1")

    def peak_to_peak(arr):
        return arr.max() - arr.min()

    expected = grouped.agg([peak_to_peak])
    expected.columns = ["data1", "data2"]
    result = grouped.agg(peak_to_peak)
    tm.assert_frame_equal(result, expected)


def test_agg_datetimes_mixed():
    data = [[1, "2012-01-01", 1.0], [2, "2012-01-02", 2.0], [3, None, 3.0]]

    df1 = DataFrame(
        {
            "key": [x[0] for x in data],
            "date": [x[1] for x in data],
            "value": [x[2] for x in data],
        }
    )

    data = [
        [
            row[0],
            (dt.datetime.strptime(row[1], "%Y-%m-%d").date() if row[1] else None),
            row[2],
        ]
        for row in data
    ]

    df2 = DataFrame(
        {
            "key": [x[0] for x in data],
            "date": [x[1] for x in data],
            "value": [x[2] for x in data],
        }
    )

    df1["weights"] = df1["value"] / df1["value"].sum()
    gb1 = df1.groupby("date").aggregate(np.sum)

    df2["weights"] = df1["value"] / df1["value"].sum()
    gb2 = df2.groupby("date").aggregate(np.sum)

    assert len(gb1) == len(gb2)


def test_agg_period_index():
    prng = period_range("2012-1-1", freq="M", periods=3)
    df = DataFrame(np.random.randn(3, 2), index=prng)
    rs = df.groupby(level=0).sum()
    assert isinstance(rs.index, PeriodIndex)

    # GH 3579
    index = period_range(start="1999-01", periods=5, freq="M")
    s1 = Series(np.random.rand(len(index)), index=index)
    s2 = Series(np.random.rand(len(index)), index=index)
    df = DataFrame.from_dict({"s1": s1, "s2": s2})
    grouped = df.groupby(df.index.month)
    list(grouped)


def test_agg_dict_parameter_cast_result_dtypes():
    # GH 12821

    df = DataFrame(
        {
            "class": ["A", "A", "B", "B", "C", "C", "D", "D"],
            "time": date_range("1/1/2011", periods=8, freq="H"),
        }
    )
    df.loc[[0, 1, 2, 5], "time"] = None

    # test for `first` function
    exp = df.loc[[0, 3, 4, 6]].set_index("class")
    grouped = df.groupby("class")
    tm.assert_frame_equal(grouped.first(), exp)
    tm.assert_frame_equal(grouped.agg("first"), exp)
    tm.assert_frame_equal(grouped.agg({"time": "first"}), exp)
    tm.assert_series_equal(grouped.time.first(), exp["time"])
    tm.assert_series_equal(grouped.time.agg("first"), exp["time"])

    # test for `last` function
    exp = df.loc[[0, 3, 4, 7]].set_index("class")
    grouped = df.groupby("class")
    tm.assert_frame_equal(grouped.last(), exp)
    tm.assert_frame_equal(grouped.agg("last"), exp)
    tm.assert_frame_equal(grouped.agg({"time": "last"}), exp)
    tm.assert_series_equal(grouped.time.last(), exp["time"])
    tm.assert_series_equal(grouped.time.agg("last"), exp["time"])

    # count
    exp = Series([2, 2, 2, 2], index=Index(list("ABCD"), name="class"), name="time")
    tm.assert_series_equal(grouped.time.agg(len), exp)
    tm.assert_series_equal(grouped.time.size(), exp)

    exp = Series([0, 1, 1, 2], index=Index(list("ABCD"), name="class"), name="time")
    tm.assert_series_equal(grouped.time.count(), exp)


def test_agg_cast_results_dtypes():
    # similar to GH12821
    # xref #11444
    u = [dt.datetime(2015, x + 1, 1) for x in range(12)]
    v = list("aaabbbbbbccd")
    df = DataFrame({"X": v, "Y": u})

    result = df.groupby("X")["Y"].agg(len)
    expected = df.groupby("X")["Y"].count()
    tm.assert_series_equal(result, expected)


def test_aggregate_float64_no_int64():
    # see gh-11199
    df = DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 2, 4, 5], "c": [1, 2, 3, 4, 5]})

    expected = DataFrame({"a": [1, 2.5, 4, 5]}, index=[1, 2, 4, 5])
    expected.index.name = "b"

    result = df.groupby("b")[["a"]].mean()
    tm.assert_frame_equal(result, expected)

    expected = DataFrame({"a": [1, 2.5, 4, 5], "c": [1, 2.5, 4, 5]}, index=[1, 2, 4, 5])
    expected.index.name = "b"

    result = df.groupby("b")[["a", "c"]].mean()
    tm.assert_frame_equal(result, expected)


def test_aggregate_api_consistency():
    # GH 9052
    # make sure that the aggregates via dict
    # are consistent
    df = DataFrame(
        {
            "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
            "B": ["one", "one", "two", "two", "two", "two", "one", "two"],
            "C": np.random.randn(8) + 1.0,
            "D": np.arange(8),
        }
    )

    grouped = df.groupby(["A", "B"])
    c_mean = grouped["C"].mean()
    c_sum = grouped["C"].sum()
    d_mean = grouped["D"].mean()
    d_sum = grouped["D"].sum()

    result = grouped["D"].agg(["sum", "mean"])
    expected = pd.concat([d_sum, d_mean], axis=1)
    expected.columns = ["sum", "mean"]
    tm.assert_frame_equal(result, expected, check_like=True)

    result = grouped.agg([np.sum, np.mean])
    expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1)
    expected.columns = MultiIndex.from_product([["C", "D"], ["sum", "mean"]])
    tm.assert_frame_equal(result, expected, check_like=True)

    result = grouped[["D", "C"]].agg([np.sum, np.mean])
    expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1)
    expected.columns = MultiIndex.from_product([["D", "C"], ["sum", "mean"]])
    tm.assert_frame_equal(result, expected, check_like=True)

    result = grouped.agg({"C": "mean", "D": "sum"})
    expected = pd.concat([d_sum, c_mean], axis=1)
    tm.assert_frame_equal(result, expected, check_like=True)

    result = grouped.agg({"C": ["mean", "sum"], "D": ["mean", "sum"]})
    expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1)
    expected.columns = MultiIndex.from_product([["C", "D"], ["mean", "sum"]])

    msg = r"Column\(s\) \['r', 'r2'\] do not exist"
    with pytest.raises(SpecificationError, match=msg):
        grouped[["D", "C"]].agg({"r": np.sum, "r2": np.mean})


def test_agg_dict_renaming_deprecation():
    # 15931
    df = DataFrame({"A": [1, 1, 1, 2, 2], "B": range(5), "C": range(5)})

    msg = r"nested renamer is not supported"
    with pytest.raises(SpecificationError, match=msg):
        df.groupby("A").agg(
            {"B": {"foo": ["sum", "max"]}, "C": {"bar": ["count", "min"]}}
        )

    msg = r"Column\(s\) \['ma'\] do not exist"
    with pytest.raises(SpecificationError, match=msg):
        df.groupby("A")[["B", "C"]].agg({"ma": "max"})

    msg = r"nested renamer is not supported"
    with pytest.raises(SpecificationError, match=msg):
        df.groupby("A").B.agg({"foo": "count"})


def test_agg_compat():
    # GH 12334
    df = DataFrame(
        {
            "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
            "B": ["one", "one", "two", "two", "two", "two", "one", "two"],
            "C": np.random.randn(8) + 1.0,
            "D": np.arange(8),
        }
    )

    g = df.groupby(["A", "B"])

    msg = r"nested renamer is not supported"
    with pytest.raises(SpecificationError, match=msg):
        g["D"].agg({"C": ["sum", "std"]})

    with pytest.raises(SpecificationError, match=msg):
        g["D"].agg({"C": "sum", "D": "std"})


def test_agg_nested_dicts():
    # API change for disallowing these types of nested dicts
    df = DataFrame(
        {
            "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
            "B": ["one", "one", "two", "two", "two", "two", "one", "two"],
            "C": np.random.randn(8) + 1.0,
            "D": np.arange(8),
        }
    )

    g = df.groupby(["A", "B"])

    msg = r"nested renamer is not supported"
    with pytest.raises(SpecificationError, match=msg):
        g.aggregate({"r1": {"C": ["mean", "sum"]}, "r2": {"D": ["mean", "sum"]}})

    with pytest.raises(SpecificationError, match=msg):
        g.agg({"C": {"ra": ["mean", "std"]}, "D": {"rb": ["mean", "std"]}})

    # same name as the original column
    # GH9052
    with pytest.raises(SpecificationError, match=msg):
        g["D"].agg({"result1": np.sum, "result2": np.mean})

    with pytest.raises(SpecificationError, match=msg):
        g["D"].agg({"D": np.sum, "result2": np.mean})


def test_agg_item_by_item_raise_typeerror():
    df = DataFrame(np.random.randint(10, size=(20, 10)))

    def raiseException(df):
        pprint_thing("----------------------------------------")
        pprint_thing(df.to_string())
        raise TypeError("test")

    with pytest.raises(TypeError, match="test"):
        df.groupby(0).agg(raiseException)


def test_series_agg_multikey():
    ts = tm.makeTimeSeries()
    grouped = ts.groupby([lambda x: x.year, lambda x: x.month])

    result = grouped.agg(np.sum)
    expected = grouped.sum()
    tm.assert_series_equal(result, expected)


def test_series_agg_multi_pure_python():
    data = DataFrame(
        {
            "A": [
                "foo",
                "foo",
                "foo",
                "foo",
                "bar",
                "bar",
                "bar",
                "bar",
                "foo",
                "foo",
                "foo",
            ],
            "B": [
                "one",
                "one",
                "one",
                "two",
                "one",
                "one",
                "one",
                "two",
                "two",
                "two",
                "one",
            ],
            "C": [
                "dull",
                "dull",
                "shiny",
                "dull",
                "dull",
                "shiny",
                "shiny",
                "dull",
                "shiny",
                "shiny",
                "shiny",
            ],
            "D": np.random.randn(11),
            "E": np.random.randn(11),
            "F": np.random.randn(11),
        }
    )

    def bad(x):
        assert len(x.values.base) > 0
        return "foo"

    result = data.groupby(["A", "B"]).agg(bad)
    expected = data.groupby(["A", "B"]).agg(lambda x: "foo")
    tm.assert_frame_equal(result, expected)


def test_agg_consistency():
    # agg with ([]) and () not consistent
    # GH 6715
    def P1(a):
        return np.percentile(a.dropna(), q=1)

    df = DataFrame(
        {
            "col1": [1, 2, 3, 4],
            "col2": [10, 25, 26, 31],
            "date": [
                dt.date(2013, 2, 10),
                dt.date(2013, 2, 10),
                dt.date(2013, 2, 11),
                dt.date(2013, 2, 11),
            ],
        }
    )

    g = df.groupby("date")

    expected = g.agg([P1])
    expected.columns = expected.columns.levels[0]

    result = g.agg(P1)
    tm.assert_frame_equal(result, expected)


def test_agg_callables():
    # GH 7929
    df = DataFrame({"foo": [1, 2], "bar": [3, 4]}).astype(np.int64)

    class fn_class:
        def __call__(self, x):
            return sum(x)

    equiv_callables = [
        sum,
        np.sum,
        lambda x: sum(x),
        lambda x: x.sum(),
        partial(sum),
        fn_class(),
    ]

    expected = df.groupby("foo").agg(sum)
    for ecall in equiv_callables:
        result = df.groupby("foo").agg(ecall)
        tm.assert_frame_equal(result, expected)


def test_agg_over_numpy_arrays():
    # GH 3788
    df = DataFrame(
        [
            [1, np.array([10, 20, 30])],
            [1, np.array([40, 50, 60])],
            [2, np.array([20, 30, 40])],
        ],
        columns=["category", "arraydata"],
    )
    result = df.groupby("category").agg(sum)

    expected_data = [[np.array([50, 70, 90])], [np.array([20, 30, 40])]]
    expected_index = Index([1, 2], name="category")
    expected_column = ["arraydata"]
    expected = DataFrame(expected_data, index=expected_index, columns=expected_column)

    tm.assert_frame_equal(result, expected)


def test_agg_tzaware_non_datetime_result():
    # discussed in GH#29589, fixed in GH#29641, operating on tzaware values
    #  with function that is not dtype-preserving
    dti = pd.date_range("2012-01-01", periods=4, tz="UTC")
    df = DataFrame({"a": [0, 0, 1, 1], "b": dti})
    gb = df.groupby("a")

    # Case that _does_ preserve the dtype
    result = gb["b"].agg(lambda x: x.iloc[0])
    expected = Series(dti[::2], name="b")
    expected.index.name = "a"
    tm.assert_series_equal(result, expected)

    # Cases that do _not_ preserve the dtype
    result = gb["b"].agg(lambda x: x.iloc[0].year)
    expected = Series([2012, 2012], name="b")
    expected.index.name = "a"
    tm.assert_series_equal(result, expected)

    result = gb["b"].agg(lambda x: x.iloc[-1] - x.iloc[0])
    expected = Series([pd.Timedelta(days=1), pd.Timedelta(days=1)], name="b")
    expected.index.name = "a"
    tm.assert_series_equal(result, expected)


def test_agg_timezone_round_trip():
    # GH 15426
    ts = pd.Timestamp("2016-01-01 12:00:00", tz="US/Pacific")
    df = DataFrame({"a": 1, "b": [ts + dt.timedelta(minutes=nn) for nn in range(10)]})

    result1 = df.groupby("a")["b"].agg(np.min).iloc[0]
    result2 = df.groupby("a")["b"].agg(lambda x: np.min(x)).iloc[0]
    result3 = df.groupby("a")["b"].min().iloc[0]

    assert result1 == ts
    assert result2 == ts
    assert result3 == ts

    dates = [
        pd.Timestamp(f"2016-01-0{i:d} 12:00:00", tz="US/Pacific") for i in range(1, 5)
    ]
    df = DataFrame({"A": ["a", "b"] * 2, "B": dates})
    grouped = df.groupby("A")

    ts = df["B"].iloc[0]
    assert ts == grouped.nth(0)["B"].iloc[0]
    assert ts == grouped.head(1)["B"].iloc[0]
    assert ts == grouped.first()["B"].iloc[0]

    # GH#27110 applying iloc should return a DataFrame
    assert ts == grouped.apply(lambda x: x.iloc[0]).iloc[0, 1]

    ts = df["B"].iloc[2]
    assert ts == grouped.last()["B"].iloc[0]

    # GH#27110 applying iloc should return a DataFrame
    assert ts == grouped.apply(lambda x: x.iloc[-1]).iloc[0, 1]


def test_sum_uint64_overflow():
    # see gh-14758
    # Convert to uint64 and don't overflow
    df = DataFrame([[1, 2], [3, 4], [5, 6]], dtype=object)
    df = df + 9223372036854775807

    index = Index(
        [9223372036854775808, 9223372036854775810, 9223372036854775812], dtype=np.uint64
    )
    expected = DataFrame(
        {1: [9223372036854775809, 9223372036854775811, 9223372036854775813]},
        index=index,
    )

    expected.index.name = 0
    result = df.groupby(0).sum()
    tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
    "structure, expected",
    [
        (tuple, DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}})),
        (list, DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}})),
        (
            lambda x: tuple(x),
            DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}}),
        ),
        (
            lambda x: list(x),
            DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}}),
        ),
    ],
)
def test_agg_structs_dataframe(structure, expected):
    df = DataFrame(
        {"A": [1, 1, 1, 3, 3, 3], "B": [1, 1, 1, 4, 4, 4], "C": [1, 1, 1, 3, 4, 4]}
    )

    result = df.groupby(["A", "B"]).aggregate(structure)
    expected.index.names = ["A", "B"]
    tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
    "structure, expected",
    [
        (tuple, Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name="C")),
        (list, Series([[1, 1, 1], [3, 4, 4]], index=[1, 3], name="C")),
        (lambda x: tuple(x), Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name="C")),
        (lambda x: list(x), Series([[1, 1, 1], [3, 4, 4]], index=[1, 3], name="C")),
    ],
)
def test_agg_structs_series(structure, expected):
    # Issue #18079
    df = DataFrame(
        {"A": [1, 1, 1, 3, 3, 3], "B": [1, 1, 1, 4, 4, 4], "C": [1, 1, 1, 3, 4, 4]}
    )

    result = df.groupby("A")["C"].aggregate(structure)
    expected.index.name = "A"
    tm.assert_series_equal(result, expected)


def test_agg_category_nansum(observed):
    categories = ["a", "b", "c"]
    df = DataFrame(
        {"A": pd.Categorical(["a", "a", "b"], categories=categories), "B": [1, 2, 3]}
    )
    result = df.groupby("A", observed=observed).B.agg(np.nansum)
    expected = Series(
        [3, 3, 0],
        index=pd.CategoricalIndex(["a", "b", "c"], categories=categories, name="A"),
        name="B",
    )
    if observed:
        expected = expected[expected != 0]
    tm.assert_series_equal(result, expected)


def test_agg_list_like_func():
    # GH 18473
    df = DataFrame({"A": [str(x) for x in range(3)], "B": [str(x) for x in range(3)]})
    grouped = df.groupby("A", as_index=False, sort=False)
    result = grouped.agg({"B": lambda x: list(x)})
    expected = DataFrame(
        {"A": [str(x) for x in range(3)], "B": [[str(x)] for x in range(3)]}
    )
    tm.assert_frame_equal(result, expected)


def test_agg_lambda_with_timezone():
    # GH 23683
    df = DataFrame(
        {
            "tag": [1, 1],
            "date": [
                pd.Timestamp("2018-01-01", tz="UTC"),
                pd.Timestamp("2018-01-02", tz="UTC"),
            ],
        }
    )
    result = df.groupby("tag").agg({"date": lambda e: e.head(1)})
    expected = DataFrame(
        [pd.Timestamp("2018-01-01", tz="UTC")],
        index=Index([1], name="tag"),
        columns=["date"],
    )
    tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
    "err_cls",
    [
        NotImplementedError,
        RuntimeError,
        KeyError,
        IndexError,
        OSError,
        ValueError,
        ArithmeticError,
        AttributeError,
    ],
)
def test_groupby_agg_err_catching(err_cls):
    # make sure we suppress anything other than TypeError or AssertionError
    #  in _python_agg_general

    # Use a non-standard EA to make sure we don't go down ndarray paths
    from pandas.tests.extension.decimal.array import DecimalArray, make_data, to_decimal

    data = make_data()[:5]
    df = DataFrame(
        {"id1": [0, 0, 0, 1, 1], "id2": [0, 1, 0, 1, 1], "decimals": DecimalArray(data)}
    )

    expected = Series(to_decimal([data[0], data[3]]))

    def weird_func(x):
        # weird function that raise something other than TypeError or IndexError
        #  in _python_agg_general
        if len(x) == 0:
            raise err_cls
        return x.iloc[0]

    result = df["decimals"].groupby(df["id1"]).agg(weird_func)
    tm.assert_series_equal(result, expected, check_names=False)
Działa 2021-06-06 22:13:05 +02:00			`"""`
			`test all other .agg behavior`
			`"""`

			`import datetime as dt`
			`from functools import partial`

			`import numpy as np`
			`import pytest`

			`import pandas as pd`
			`from pandas import (`
			`DataFrame,`
			`Index,`
			`MultiIndex,`
			`PeriodIndex,`
			`Series,`
			`date_range,`
			`period_range,`
			`)`
			`import pandas._testing as tm`
			`from pandas.core.base import SpecificationError`

			`from pandas.io.formats.printing import pprint_thing`


			`def test_agg_api():`
			`# GH 6337`
			`# https://stackoverflow.com/questions/21706030/pandas-groupby-agg-function-column-dtype-error`
			`# different api for agg when passed custom function with mixed frame`

			`df = DataFrame(`
			`{`
			`"data1": np.random.randn(5),`
			`"data2": np.random.randn(5),`
			`"key1": ["a", "a", "b", "b", "a"],`
			`"key2": ["one", "two", "one", "two", "one"],`
			`}`
			`)`
			`grouped = df.groupby("key1")`

			`def peak_to_peak(arr):`
			`return arr.max() - arr.min()`

			`expected = grouped.agg([peak_to_peak])`
			`expected.columns = ["data1", "data2"]`
			`result = grouped.agg(peak_to_peak)`
			`tm.assert_frame_equal(result, expected)`


			`def test_agg_datetimes_mixed():`
			`data = [[1, "2012-01-01", 1.0], [2, "2012-01-02", 2.0], [3, None, 3.0]]`

			`df1 = DataFrame(`
			`{`
			`"key": [x[0] for x in data],`
			`"date": [x[1] for x in data],`
			`"value": [x[2] for x in data],`
			`}`
			`)`

			`data = [`
			`[`
			`row[0],`
			`(dt.datetime.strptime(row[1], "%Y-%m-%d").date() if row[1] else None),`
			`row[2],`
			`]`
			`for row in data`
			`]`

			`df2 = DataFrame(`
			`{`
			`"key": [x[0] for x in data],`
			`"date": [x[1] for x in data],`
			`"value": [x[2] for x in data],`
			`}`
			`)`

			`df1["weights"] = df1["value"] / df1["value"].sum()`
			`gb1 = df1.groupby("date").aggregate(np.sum)`

			`df2["weights"] = df1["value"] / df1["value"].sum()`
			`gb2 = df2.groupby("date").aggregate(np.sum)`

			`assert len(gb1) == len(gb2)`


			`def test_agg_period_index():`
			`prng = period_range("2012-1-1", freq="M", periods=3)`
			`df = DataFrame(np.random.randn(3, 2), index=prng)`
			`rs = df.groupby(level=0).sum()`
			`assert isinstance(rs.index, PeriodIndex)`

			`# GH 3579`
			`index = period_range(start="1999-01", periods=5, freq="M")`
			`s1 = Series(np.random.rand(len(index)), index=index)`
			`s2 = Series(np.random.rand(len(index)), index=index)`
			`df = DataFrame.from_dict({"s1": s1, "s2": s2})`
			`grouped = df.groupby(df.index.month)`
			`list(grouped)`


			`def test_agg_dict_parameter_cast_result_dtypes():`
			`# GH 12821`

			`df = DataFrame(`
			`{`
			`"class": ["A", "A", "B", "B", "C", "C", "D", "D"],`
			`"time": date_range("1/1/2011", periods=8, freq="H"),`
			`}`
			`)`
			`df.loc[[0, 1, 2, 5], "time"] = None`

			# test for `first` function
			`exp = df.loc[[0, 3, 4, 6]].set_index("class")`
			`grouped = df.groupby("class")`
			`tm.assert_frame_equal(grouped.first(), exp)`
			`tm.assert_frame_equal(grouped.agg("first"), exp)`
			`tm.assert_frame_equal(grouped.agg({"time": "first"}), exp)`
			`tm.assert_series_equal(grouped.time.first(), exp["time"])`
			`tm.assert_series_equal(grouped.time.agg("first"), exp["time"])`

			# test for `last` function
			`exp = df.loc[[0, 3, 4, 7]].set_index("class")`
			`grouped = df.groupby("class")`
			`tm.assert_frame_equal(grouped.last(), exp)`
			`tm.assert_frame_equal(grouped.agg("last"), exp)`
			`tm.assert_frame_equal(grouped.agg({"time": "last"}), exp)`
			`tm.assert_series_equal(grouped.time.last(), exp["time"])`
			`tm.assert_series_equal(grouped.time.agg("last"), exp["time"])`

			`# count`
			`exp = Series([2, 2, 2, 2], index=Index(list("ABCD"), name="class"), name="time")`
			`tm.assert_series_equal(grouped.time.agg(len), exp)`
			`tm.assert_series_equal(grouped.time.size(), exp)`

			`exp = Series([0, 1, 1, 2], index=Index(list("ABCD"), name="class"), name="time")`
			`tm.assert_series_equal(grouped.time.count(), exp)`


			`def test_agg_cast_results_dtypes():`
			`# similar to GH12821`
			`# xref #11444`
			`u = [dt.datetime(2015, x + 1, 1) for x in range(12)]`
			`v = list("aaabbbbbbccd")`
			`df = DataFrame({"X": v, "Y": u})`

			`result = df.groupby("X")["Y"].agg(len)`
			`expected = df.groupby("X")["Y"].count()`
			`tm.assert_series_equal(result, expected)`


			`def test_aggregate_float64_no_int64():`
			`# see gh-11199`
			`df = DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 2, 4, 5], "c": [1, 2, 3, 4, 5]})`

			`expected = DataFrame({"a": [1, 2.5, 4, 5]}, index=[1, 2, 4, 5])`
			`expected.index.name = "b"`

			`result = df.groupby("b")[["a"]].mean()`
			`tm.assert_frame_equal(result, expected)`

			`expected = DataFrame({"a": [1, 2.5, 4, 5], "c": [1, 2.5, 4, 5]}, index=[1, 2, 4, 5])`
			`expected.index.name = "b"`

			`result = df.groupby("b")[["a", "c"]].mean()`
			`tm.assert_frame_equal(result, expected)`


			`def test_aggregate_api_consistency():`
			`# GH 9052`
			`# make sure that the aggregates via dict`
			`# are consistent`
			`df = DataFrame(`
			`{`
			`"A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],`
			`"B": ["one", "one", "two", "two", "two", "two", "one", "two"],`
			`"C": np.random.randn(8) + 1.0,`
			`"D": np.arange(8),`
			`}`
			`)`

			`grouped = df.groupby(["A", "B"])`
			`c_mean = grouped["C"].mean()`
			`c_sum = grouped["C"].sum()`
			`d_mean = grouped["D"].mean()`
			`d_sum = grouped["D"].sum()`

			`result = grouped["D"].agg(["sum", "mean"])`
			`expected = pd.concat([d_sum, d_mean], axis=1)`
			`expected.columns = ["sum", "mean"]`
			`tm.assert_frame_equal(result, expected, check_like=True)`

			`result = grouped.agg([np.sum, np.mean])`
			`expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1)`
			`expected.columns = MultiIndex.from_product([["C", "D"], ["sum", "mean"]])`
			`tm.assert_frame_equal(result, expected, check_like=True)`

			`result = grouped[["D", "C"]].agg([np.sum, np.mean])`
			`expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1)`
			`expected.columns = MultiIndex.from_product([["D", "C"], ["sum", "mean"]])`
			`tm.assert_frame_equal(result, expected, check_like=True)`

			`result = grouped.agg({"C": "mean", "D": "sum"})`
			`expected = pd.concat([d_sum, c_mean], axis=1)`
			`tm.assert_frame_equal(result, expected, check_like=True)`

			`result = grouped.agg({"C": ["mean", "sum"], "D": ["mean", "sum"]})`
			`expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1)`
			`expected.columns = MultiIndex.from_product([["C", "D"], ["mean", "sum"]])`

			`msg = r"Column\(s\) \['r', 'r2'\] do not exist"`
			`with pytest.raises(SpecificationError, match=msg):`
			`grouped[["D", "C"]].agg({"r": np.sum, "r2": np.mean})`


			`def test_agg_dict_renaming_deprecation():`
			`# 15931`
			`df = DataFrame({"A": [1, 1, 1, 2, 2], "B": range(5), "C": range(5)})`

			`msg = r"nested renamer is not supported"`
			`with pytest.raises(SpecificationError, match=msg):`
			`df.groupby("A").agg(`
			`{"B": {"foo": ["sum", "max"]}, "C": {"bar": ["count", "min"]}}`
			`)`

			`msg = r"Column\(s\) \['ma'\] do not exist"`
			`with pytest.raises(SpecificationError, match=msg):`
			`df.groupby("A")[["B", "C"]].agg({"ma": "max"})`

			`msg = r"nested renamer is not supported"`
			`with pytest.raises(SpecificationError, match=msg):`
			`df.groupby("A").B.agg({"foo": "count"})`


			`def test_agg_compat():`
			`# GH 12334`
			`df = DataFrame(`
			`{`
			`"A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],`
			`"B": ["one", "one", "two", "two", "two", "two", "one", "two"],`
			`"C": np.random.randn(8) + 1.0,`
			`"D": np.arange(8),`
			`}`
			`)`

			`g = df.groupby(["A", "B"])`

			`msg = r"nested renamer is not supported"`
			`with pytest.raises(SpecificationError, match=msg):`
			`g["D"].agg({"C": ["sum", "std"]})`

			`with pytest.raises(SpecificationError, match=msg):`
			`g["D"].agg({"C": "sum", "D": "std"})`


			`def test_agg_nested_dicts():`
			`# API change for disallowing these types of nested dicts`
			`df = DataFrame(`
			`{`
			`"A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],`
			`"B": ["one", "one", "two", "two", "two", "two", "one", "two"],`
			`"C": np.random.randn(8) + 1.0,`
			`"D": np.arange(8),`
			`}`
			`)`

			`g = df.groupby(["A", "B"])`

			`msg = r"nested renamer is not supported"`
			`with pytest.raises(SpecificationError, match=msg):`
			`g.aggregate({"r1": {"C": ["mean", "sum"]}, "r2": {"D": ["mean", "sum"]}})`

			`with pytest.raises(SpecificationError, match=msg):`
			`g.agg({"C": {"ra": ["mean", "std"]}, "D": {"rb": ["mean", "std"]}})`

			`# same name as the original column`
			`# GH9052`
			`with pytest.raises(SpecificationError, match=msg):`
			`g["D"].agg({"result1": np.sum, "result2": np.mean})`

			`with pytest.raises(SpecificationError, match=msg):`
			`g["D"].agg({"D": np.sum, "result2": np.mean})`


			`def test_agg_item_by_item_raise_typeerror():`
			`df = DataFrame(np.random.randint(10, size=(20, 10)))`

			`def raiseException(df):`
			`pprint_thing("----------------------------------------")`
			`pprint_thing(df.to_string())`
			`raise TypeError("test")`

			`with pytest.raises(TypeError, match="test"):`
			`df.groupby(0).agg(raiseException)`


			`def test_series_agg_multikey():`
			`ts = tm.makeTimeSeries()`
			`grouped = ts.groupby([lambda x: x.year, lambda x: x.month])`

			`result = grouped.agg(np.sum)`
			`expected = grouped.sum()`
			`tm.assert_series_equal(result, expected)`


			`def test_series_agg_multi_pure_python():`
			`data = DataFrame(`
			`{`
			`"A": [`
			`"foo",`
			`"foo",`
			`"foo",`
			`"foo",`
			`"bar",`
			`"bar",`
			`"bar",`
			`"bar",`
			`"foo",`
			`"foo",`
			`"foo",`
			`],`
			`"B": [`
			`"one",`
			`"one",`
			`"one",`
			`"two",`
			`"one",`
			`"one",`
			`"one",`
			`"two",`
			`"two",`
			`"two",`
			`"one",`
			`],`
			`"C": [`
			`"dull",`
			`"dull",`
			`"shiny",`
			`"dull",`
			`"dull",`
			`"shiny",`
			`"shiny",`
			`"dull",`
			`"shiny",`
			`"shiny",`
			`"shiny",`
			`],`
			`"D": np.random.randn(11),`
			`"E": np.random.randn(11),`
			`"F": np.random.randn(11),`
			`}`
			`)`

			`def bad(x):`
			`assert len(x.values.base) > 0`
			`return "foo"`

			`result = data.groupby(["A", "B"]).agg(bad)`
			`expected = data.groupby(["A", "B"]).agg(lambda x: "foo")`
			`tm.assert_frame_equal(result, expected)`


			`def test_agg_consistency():`
			`# agg with ([]) and () not consistent`
			`# GH 6715`
			`def P1(a):`
			`return np.percentile(a.dropna(), q=1)`

			`df = DataFrame(`
			`{`
			`"col1": [1, 2, 3, 4],`
			`"col2": [10, 25, 26, 31],`
			`"date": [`
			`dt.date(2013, 2, 10),`
			`dt.date(2013, 2, 10),`
			`dt.date(2013, 2, 11),`
			`dt.date(2013, 2, 11),`
			`],`
			`}`
			`)`

			`g = df.groupby("date")`

			`expected = g.agg([P1])`
			`expected.columns = expected.columns.levels[0]`

			`result = g.agg(P1)`
			`tm.assert_frame_equal(result, expected)`


			`def test_agg_callables():`
			`# GH 7929`
			`df = DataFrame({"foo": [1, 2], "bar": [3, 4]}).astype(np.int64)`

			`class fn_class:`
			`def __call__(self, x):`
			`return sum(x)`

			`equiv_callables = [`
			`sum,`
			`np.sum,`
			`lambda x: sum(x),`
			`lambda x: x.sum(),`
			`partial(sum),`
			`fn_class(),`
			`]`

			`expected = df.groupby("foo").agg(sum)`
			`for ecall in equiv_callables:`
			`result = df.groupby("foo").agg(ecall)`
			`tm.assert_frame_equal(result, expected)`


			`def test_agg_over_numpy_arrays():`
			`# GH 3788`
			`df = DataFrame(`
			`[`
			`[1, np.array([10, 20, 30])],`
			`[1, np.array([40, 50, 60])],`
			`[2, np.array([20, 30, 40])],`
			`],`
			`columns=["category", "arraydata"],`
			`)`
			`result = df.groupby("category").agg(sum)`

			`expected_data = [[np.array([50, 70, 90])], [np.array([20, 30, 40])]]`
			`expected_index = Index([1, 2], name="category")`
			`expected_column = ["arraydata"]`
			`expected = DataFrame(expected_data, index=expected_index, columns=expected_column)`

			`tm.assert_frame_equal(result, expected)`


			`def test_agg_tzaware_non_datetime_result():`
			`# discussed in GH#29589, fixed in GH#29641, operating on tzaware values`
			`# with function that is not dtype-preserving`
			`dti = pd.date_range("2012-01-01", periods=4, tz="UTC")`
			`df = DataFrame({"a": [0, 0, 1, 1], "b": dti})`
			`gb = df.groupby("a")`

			`# Case that _does_ preserve the dtype`
			`result = gb["b"].agg(lambda x: x.iloc[0])`
			`expected = Series(dti[::2], name="b")`
			`expected.index.name = "a"`
			`tm.assert_series_equal(result, expected)`

			`# Cases that do _not_ preserve the dtype`
			`result = gb["b"].agg(lambda x: x.iloc[0].year)`
			`expected = Series([2012, 2012], name="b")`
			`expected.index.name = "a"`
			`tm.assert_series_equal(result, expected)`

			`result = gb["b"].agg(lambda x: x.iloc[-1] - x.iloc[0])`
			`expected = Series([pd.Timedelta(days=1), pd.Timedelta(days=1)], name="b")`
			`expected.index.name = "a"`
			`tm.assert_series_equal(result, expected)`


			`def test_agg_timezone_round_trip():`
			`# GH 15426`
			`ts = pd.Timestamp("2016-01-01 12:00:00", tz="US/Pacific")`
			`df = DataFrame({"a": 1, "b": [ts + dt.timedelta(minutes=nn) for nn in range(10)]})`

			`result1 = df.groupby("a")["b"].agg(np.min).iloc[0]`
			`result2 = df.groupby("a")["b"].agg(lambda x: np.min(x)).iloc[0]`
			`result3 = df.groupby("a")["b"].min().iloc[0]`

			`assert result1 == ts`
			`assert result2 == ts`
			`assert result3 == ts`

			`dates = [`
			`pd.Timestamp(f"2016-01-0{i:d} 12:00:00", tz="US/Pacific") for i in range(1, 5)`
			`]`
			`df = DataFrame({"A": ["a", "b"] * 2, "B": dates})`
			`grouped = df.groupby("A")`

			`ts = df["B"].iloc[0]`
			`assert ts == grouped.nth(0)["B"].iloc[0]`
			`assert ts == grouped.head(1)["B"].iloc[0]`
			`assert ts == grouped.first()["B"].iloc[0]`

			`# GH#27110 applying iloc should return a DataFrame`
			`assert ts == grouped.apply(lambda x: x.iloc[0]).iloc[0, 1]`

			`ts = df["B"].iloc[2]`
			`assert ts == grouped.last()["B"].iloc[0]`

			`# GH#27110 applying iloc should return a DataFrame`
			`assert ts == grouped.apply(lambda x: x.iloc[-1]).iloc[0, 1]`


			`def test_sum_uint64_overflow():`
			`# see gh-14758`
			`# Convert to uint64 and don't overflow`
			`df = DataFrame([[1, 2], [3, 4], [5, 6]], dtype=object)`
			`df = df + 9223372036854775807`

			`index = Index(`
			`[9223372036854775808, 9223372036854775810, 9223372036854775812], dtype=np.uint64`
			`)`
			`expected = DataFrame(`
			`{1: [9223372036854775809, 9223372036854775811, 9223372036854775813]},`
			`index=index,`
			`)`

			`expected.index.name = 0`
			`result = df.groupby(0).sum()`
			`tm.assert_frame_equal(result, expected)`


			`@pytest.mark.parametrize(`
			`"structure, expected",`
			`[`
			`(tuple, DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}})),`
			`(list, DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}})),`
			`(`
			`lambda x: tuple(x),`
			`DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}}),`
			`),`
			`(`
			`lambda x: list(x),`
			`DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}}),`
			`),`
			`],`
			`)`
			`def test_agg_structs_dataframe(structure, expected):`
			`df = DataFrame(`
			`{"A": [1, 1, 1, 3, 3, 3], "B": [1, 1, 1, 4, 4, 4], "C": [1, 1, 1, 3, 4, 4]}`
			`)`

			`result = df.groupby(["A", "B"]).aggregate(structure)`
			`expected.index.names = ["A", "B"]`
			`tm.assert_frame_equal(result, expected)`


			`@pytest.mark.parametrize(`
			`"structure, expected",`
			`[`
			`(tuple, Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name="C")),`
			`(list, Series([[1, 1, 1], [3, 4, 4]], index=[1, 3], name="C")),`
			`(lambda x: tuple(x), Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name="C")),`
			`(lambda x: list(x), Series([[1, 1, 1], [3, 4, 4]], index=[1, 3], name="C")),`
			`],`
			`)`
			`def test_agg_structs_series(structure, expected):`
			`# Issue #18079`
			`df = DataFrame(`
			`{"A": [1, 1, 1, 3, 3, 3], "B": [1, 1, 1, 4, 4, 4], "C": [1, 1, 1, 3, 4, 4]}`
			`)`

			`result = df.groupby("A")["C"].aggregate(structure)`
			`expected.index.name = "A"`
			`tm.assert_series_equal(result, expected)`


			`def test_agg_category_nansum(observed):`
			`categories = ["a", "b", "c"]`
			`df = DataFrame(`
			`{"A": pd.Categorical(["a", "a", "b"], categories=categories), "B": [1, 2, 3]}`
			`)`
			`result = df.groupby("A", observed=observed).B.agg(np.nansum)`
			`expected = Series(`
			`[3, 3, 0],`
			`index=pd.CategoricalIndex(["a", "b", "c"], categories=categories, name="A"),`
			`name="B",`
			`)`
			`if observed:`
			`expected = expected[expected != 0]`
			`tm.assert_series_equal(result, expected)`


			`def test_agg_list_like_func():`
			`# GH 18473`
			`df = DataFrame({"A": [str(x) for x in range(3)], "B": [str(x) for x in range(3)]})`
			`grouped = df.groupby("A", as_index=False, sort=False)`
			`result = grouped.agg({"B": lambda x: list(x)})`
			`expected = DataFrame(`
			`{"A": [str(x) for x in range(3)], "B": [[str(x)] for x in range(3)]}`
			`)`
			`tm.assert_frame_equal(result, expected)`


			`def test_agg_lambda_with_timezone():`
			`# GH 23683`
			`df = DataFrame(`
			`{`
			`"tag": [1, 1],`
			`"date": [`
			`pd.Timestamp("2018-01-01", tz="UTC"),`
			`pd.Timestamp("2018-01-02", tz="UTC"),`
			`],`
			`}`
			`)`
			`result = df.groupby("tag").agg({"date": lambda e: e.head(1)})`
			`expected = DataFrame(`
			`[pd.Timestamp("2018-01-01", tz="UTC")],`
			`index=Index([1], name="tag"),`
			`columns=["date"],`
			`)`
			`tm.assert_frame_equal(result, expected)`


			`@pytest.mark.parametrize(`
			`"err_cls",`
			`[`
			`NotImplementedError,`
			`RuntimeError,`
			`KeyError,`
			`IndexError,`
			`OSError,`
			`ValueError,`
			`ArithmeticError,`
			`AttributeError,`
			`],`
			`)`
			`def test_groupby_agg_err_catching(err_cls):`
			`# make sure we suppress anything other than TypeError or AssertionError`
			`# in _python_agg_general`

			`# Use a non-standard EA to make sure we don't go down ndarray paths`
			`from pandas.tests.extension.decimal.array import DecimalArray, make_data, to_decimal`

			`data = make_data()[:5]`
			`df = DataFrame(`
			`{"id1": [0, 0, 0, 1, 1], "id2": [0, 1, 0, 1, 1], "decimals": DecimalArray(data)}`
			`)`

			`expected = Series(to_decimal([data[0], data[3]]))`

			`def weird_func(x):`
			`# weird function that raise something other than TypeError or IndexError`
			`# in _python_agg_general`
			`if len(x) == 0:`
			`raise err_cls`
			`return x.iloc[0]`

			`result = df["decimals"].groupby(df["id1"]).agg(weird_func)`
			`tm.assert_series_equal(result, expected, check_names=False)`