projektAI/venv/Lib/site-packages/pandas/tests/frame/methods/test_describe.py

import numpy as np

import pandas as pd
from pandas import Categorical, DataFrame, Series, Timestamp, date_range
import pandas._testing as tm


class TestDataFrameDescribe:
    def test_describe_bool_in_mixed_frame(self):
        df = DataFrame(
            {
                "string_data": ["a", "b", "c", "d", "e"],
                "bool_data": [True, True, False, False, False],
                "int_data": [10, 20, 30, 40, 50],
            }
        )

        # Integer data are included in .describe() output,
        # Boolean and string data are not.
        result = df.describe()
        expected = DataFrame(
            {"int_data": [5, 30, df.int_data.std(), 10, 20, 30, 40, 50]},
            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
        )
        tm.assert_frame_equal(result, expected)

        # Top value is a boolean value that is False
        result = df.describe(include=["bool"])

        expected = DataFrame(
            {"bool_data": [5, 2, False, 3]}, index=["count", "unique", "top", "freq"]
        )
        tm.assert_frame_equal(result, expected)

    def test_describe_empty_object(self):
        # GH#27183
        df = DataFrame({"A": [None, None]}, dtype=object)
        result = df.describe()
        expected = DataFrame(
            {"A": [0, 0, np.nan, np.nan]},
            dtype=object,
            index=["count", "unique", "top", "freq"],
        )
        tm.assert_frame_equal(result, expected)

        result = df.iloc[:0].describe()
        tm.assert_frame_equal(result, expected)

    def test_describe_bool_frame(self):
        # GH#13891
        df = DataFrame(
            {
                "bool_data_1": [False, False, True, True],
                "bool_data_2": [False, True, True, True],
            }
        )
        result = df.describe()
        expected = DataFrame(
            {"bool_data_1": [4, 2, False, 2], "bool_data_2": [4, 2, True, 3]},
            index=["count", "unique", "top", "freq"],
        )
        tm.assert_frame_equal(result, expected)

        df = DataFrame(
            {
                "bool_data": [False, False, True, True, False],
                "int_data": [0, 1, 2, 3, 4],
            }
        )
        result = df.describe()
        expected = DataFrame(
            {"int_data": [5, 2, df.int_data.std(), 0, 1, 2, 3, 4]},
            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
        )
        tm.assert_frame_equal(result, expected)

        df = DataFrame(
            {"bool_data": [False, False, True, True], "str_data": ["a", "b", "c", "a"]}
        )
        result = df.describe()
        expected = DataFrame(
            {"bool_data": [4, 2, False, 2], "str_data": [4, 3, "a", 2]},
            index=["count", "unique", "top", "freq"],
        )
        tm.assert_frame_equal(result, expected)

    def test_describe_categorical(self):
        df = DataFrame({"value": np.random.randint(0, 10000, 100)})
        labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)]
        cat_labels = Categorical(labels, labels)

        df = df.sort_values(by=["value"], ascending=True)
        df["value_group"] = pd.cut(
            df.value, range(0, 10500, 500), right=False, labels=cat_labels
        )
        cat = df

        # Categoricals should not show up together with numerical columns
        result = cat.describe()
        assert len(result.columns) == 1

        # In a frame, describe() for the cat should be the same as for string
        # arrays (count, unique, top, freq)

        cat = Categorical(
            ["a", "b", "b", "b"], categories=["a", "b", "c"], ordered=True
        )
        s = Series(cat)
        result = s.describe()
        expected = Series([4, 2, "b", 3], index=["count", "unique", "top", "freq"])
        tm.assert_series_equal(result, expected)

        cat = Series(Categorical(["a", "b", "c", "c"]))
        df3 = DataFrame({"cat": cat, "s": ["a", "b", "c", "c"]})
        result = df3.describe()
        tm.assert_numpy_array_equal(result["cat"].values, result["s"].values)

    def test_describe_empty_categorical_column(self):
        # GH#26397
        # Ensure the index of an empty categorical DataFrame column
        # also contains (count, unique, top, freq)
        df = DataFrame({"empty_col": Categorical([])})
        result = df.describe()
        expected = DataFrame(
            {"empty_col": [0, 0, np.nan, np.nan]},
            index=["count", "unique", "top", "freq"],
            dtype="object",
        )
        tm.assert_frame_equal(result, expected)
        # ensure NaN, not None
        assert np.isnan(result.iloc[2, 0])
        assert np.isnan(result.iloc[3, 0])

    def test_describe_categorical_columns(self):
        # GH#11558
        columns = pd.CategoricalIndex(["int1", "int2", "obj"], ordered=True, name="XXX")
        df = DataFrame(
            {
                "int1": [10, 20, 30, 40, 50],
                "int2": [10, 20, 30, 40, 50],
                "obj": ["A", 0, None, "X", 1],
            },
            columns=columns,
        )
        result = df.describe()

        exp_columns = pd.CategoricalIndex(
            ["int1", "int2"],
            categories=["int1", "int2", "obj"],
            ordered=True,
            name="XXX",
        )
        expected = DataFrame(
            {
                "int1": [5, 30, df.int1.std(), 10, 20, 30, 40, 50],
                "int2": [5, 30, df.int2.std(), 10, 20, 30, 40, 50],
            },
            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
            columns=exp_columns,
        )

        tm.assert_frame_equal(result, expected)
        tm.assert_categorical_equal(result.columns.values, expected.columns.values)

    def test_describe_datetime_columns(self):
        columns = pd.DatetimeIndex(
            ["2011-01-01", "2011-02-01", "2011-03-01"],
            freq="MS",
            tz="US/Eastern",
            name="XXX",
        )
        df = DataFrame(
            {
                0: [10, 20, 30, 40, 50],
                1: [10, 20, 30, 40, 50],
                2: ["A", 0, None, "X", 1],
            }
        )
        df.columns = columns
        result = df.describe()

        exp_columns = pd.DatetimeIndex(
            ["2011-01-01", "2011-02-01"], freq="MS", tz="US/Eastern", name="XXX"
        )
        expected = DataFrame(
            {
                0: [5, 30, df.iloc[:, 0].std(), 10, 20, 30, 40, 50],
                1: [5, 30, df.iloc[:, 1].std(), 10, 20, 30, 40, 50],
            },
            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
        )
        expected.columns = exp_columns
        tm.assert_frame_equal(result, expected)
        assert result.columns.freq == "MS"
        assert result.columns.tz == expected.columns.tz

    def test_describe_timedelta_values(self):
        # GH#6145
        t1 = pd.timedelta_range("1 days", freq="D", periods=5)
        t2 = pd.timedelta_range("1 hours", freq="H", periods=5)
        df = DataFrame({"t1": t1, "t2": t2})

        expected = DataFrame(
            {
                "t1": [
                    5,
                    pd.Timedelta("3 days"),
                    df.iloc[:, 0].std(),
                    pd.Timedelta("1 days"),
                    pd.Timedelta("2 days"),
                    pd.Timedelta("3 days"),
                    pd.Timedelta("4 days"),
                    pd.Timedelta("5 days"),
                ],
                "t2": [
                    5,
                    pd.Timedelta("3 hours"),
                    df.iloc[:, 1].std(),
                    pd.Timedelta("1 hours"),
                    pd.Timedelta("2 hours"),
                    pd.Timedelta("3 hours"),
                    pd.Timedelta("4 hours"),
                    pd.Timedelta("5 hours"),
                ],
            },
            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
        )

        result = df.describe()
        tm.assert_frame_equal(result, expected)

        exp_repr = (
            "                              t1                         t2\n"
            "count                          5                          5\n"
            "mean             3 days 00:00:00            0 days 03:00:00\n"
            "std    1 days 13:56:50.394919273  0 days 01:34:52.099788303\n"
            "min              1 days 00:00:00            0 days 01:00:00\n"
            "25%              2 days 00:00:00            0 days 02:00:00\n"
            "50%              3 days 00:00:00            0 days 03:00:00\n"
            "75%              4 days 00:00:00            0 days 04:00:00\n"
            "max              5 days 00:00:00            0 days 05:00:00"
        )
        assert repr(result) == exp_repr

    def test_describe_tz_values(self, tz_naive_fixture):
        # GH#21332
        tz = tz_naive_fixture
        s1 = Series(range(5))
        start = Timestamp(2018, 1, 1)
        end = Timestamp(2018, 1, 5)
        s2 = Series(date_range(start, end, tz=tz))
        df = DataFrame({"s1": s1, "s2": s2})

        expected = DataFrame(
            {
                "s1": [5, 2, 0, 1, 2, 3, 4, 1.581139],
                "s2": [
                    5,
                    Timestamp(2018, 1, 3).tz_localize(tz),
                    start.tz_localize(tz),
                    s2[1],
                    s2[2],
                    s2[3],
                    end.tz_localize(tz),
                    np.nan,
                ],
            },
            index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"],
        )
        result = df.describe(include="all", datetime_is_numeric=True)
        tm.assert_frame_equal(result, expected)

    def test_datetime_is_numeric_includes_datetime(self):
        df = DataFrame({"a": pd.date_range("2012", periods=3), "b": [1, 2, 3]})
        result = df.describe(datetime_is_numeric=True)
        expected = DataFrame(
            {
                "a": [
                    3,
                    Timestamp("2012-01-02"),
                    Timestamp("2012-01-01"),
                    Timestamp("2012-01-01T12:00:00"),
                    Timestamp("2012-01-02"),
                    Timestamp("2012-01-02T12:00:00"),
                    Timestamp("2012-01-03"),
                    np.nan,
                ],
                "b": [3, 2, 1, 1.5, 2, 2.5, 3, 1],
            },
            index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"],
        )
        tm.assert_frame_equal(result, expected)

    def test_describe_tz_values2(self):
        tz = "CET"
        s1 = Series(range(5))
        start = Timestamp(2018, 1, 1)
        end = Timestamp(2018, 1, 5)
        s2 = Series(date_range(start, end, tz=tz))
        df = DataFrame({"s1": s1, "s2": s2})

        s1_ = s1.describe()
        s2_ = Series(
            [
                5,
                5,
                s2.value_counts().index[0],
                1,
                start.tz_localize(tz),
                end.tz_localize(tz),
            ],
            index=["count", "unique", "top", "freq", "first", "last"],
        )
        idx = [
            "count",
            "unique",
            "top",
            "freq",
            "first",
            "last",
            "mean",
            "std",
            "min",
            "25%",
            "50%",
            "75%",
            "max",
        ]
        expected = pd.concat([s1_, s2_], axis=1, keys=["s1", "s2"]).loc[idx]

        with tm.assert_produces_warning(FutureWarning):
            result = df.describe(include="all")
        tm.assert_frame_equal(result, expected)

    def test_describe_percentiles_integer_idx(self):
        # GH#26660
        df = DataFrame({"x": [1]})
        pct = np.linspace(0, 1, 10 + 1)
        result = df.describe(percentiles=pct)

        expected = DataFrame(
            {"x": [1.0, 1.0, np.NaN, 1.0, *[1.0 for _ in pct], 1.0]},
            index=[
                "count",
                "mean",
                "std",
                "min",
                "0%",
                "10%",
                "20%",
                "30%",
                "40%",
                "50%",
                "60%",
                "70%",
                "80%",
                "90%",
                "100%",
                "max",
            ],
        )
        tm.assert_frame_equal(result, expected)
Działa 2021-06-06 22:13:05 +02:00			`import numpy as np`

			`import pandas as pd`
			`from pandas import Categorical, DataFrame, Series, Timestamp, date_range`
			`import pandas._testing as tm`


			`class TestDataFrameDescribe:`
			`def test_describe_bool_in_mixed_frame(self):`
			`df = DataFrame(`
			`{`
			`"string_data": ["a", "b", "c", "d", "e"],`
			`"bool_data": [True, True, False, False, False],`
			`"int_data": [10, 20, 30, 40, 50],`
			`}`
			`)`

			`# Integer data are included in .describe() output,`
			`# Boolean and string data are not.`
			`result = df.describe()`
			`expected = DataFrame(`
			`{"int_data": [5, 30, df.int_data.std(), 10, 20, 30, 40, 50]},`
			`index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],`
			`)`
			`tm.assert_frame_equal(result, expected)`

			`# Top value is a boolean value that is False`
			`result = df.describe(include=["bool"])`

			`expected = DataFrame(`
			`{"bool_data": [5, 2, False, 3]}, index=["count", "unique", "top", "freq"]`
			`)`
			`tm.assert_frame_equal(result, expected)`

			`def test_describe_empty_object(self):`
			`# GH#27183`
			`df = DataFrame({"A": [None, None]}, dtype=object)`
			`result = df.describe()`
			`expected = DataFrame(`
			`{"A": [0, 0, np.nan, np.nan]},`
			`dtype=object,`
			`index=["count", "unique", "top", "freq"],`
			`)`
			`tm.assert_frame_equal(result, expected)`

			`result = df.iloc[:0].describe()`
			`tm.assert_frame_equal(result, expected)`

			`def test_describe_bool_frame(self):`
			`# GH#13891`
			`df = DataFrame(`
			`{`
			`"bool_data_1": [False, False, True, True],`
			`"bool_data_2": [False, True, True, True],`
			`}`
			`)`
			`result = df.describe()`
			`expected = DataFrame(`
			`{"bool_data_1": [4, 2, False, 2], "bool_data_2": [4, 2, True, 3]},`
			`index=["count", "unique", "top", "freq"],`
			`)`
			`tm.assert_frame_equal(result, expected)`

			`df = DataFrame(`
			`{`
			`"bool_data": [False, False, True, True, False],`
			`"int_data": [0, 1, 2, 3, 4],`
			`}`
			`)`
			`result = df.describe()`
			`expected = DataFrame(`
			`{"int_data": [5, 2, df.int_data.std(), 0, 1, 2, 3, 4]},`
			`index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],`
			`)`
			`tm.assert_frame_equal(result, expected)`

			`df = DataFrame(`
			`{"bool_data": [False, False, True, True], "str_data": ["a", "b", "c", "a"]}`
			`)`
			`result = df.describe()`
			`expected = DataFrame(`
			`{"bool_data": [4, 2, False, 2], "str_data": [4, 3, "a", 2]},`
			`index=["count", "unique", "top", "freq"],`
			`)`
			`tm.assert_frame_equal(result, expected)`

			`def test_describe_categorical(self):`
			`df = DataFrame({"value": np.random.randint(0, 10000, 100)})`
			`labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)]`
			`cat_labels = Categorical(labels, labels)`

			`df = df.sort_values(by=["value"], ascending=True)`
			`df["value_group"] = pd.cut(`
			`df.value, range(0, 10500, 500), right=False, labels=cat_labels`
			`)`
			`cat = df`

			`# Categoricals should not show up together with numerical columns`
			`result = cat.describe()`
			`assert len(result.columns) == 1`

			`# In a frame, describe() for the cat should be the same as for string`
			`# arrays (count, unique, top, freq)`

			`cat = Categorical(`
			`["a", "b", "b", "b"], categories=["a", "b", "c"], ordered=True`
			`)`
			`s = Series(cat)`
			`result = s.describe()`
			`expected = Series([4, 2, "b", 3], index=["count", "unique", "top", "freq"])`
			`tm.assert_series_equal(result, expected)`

			`cat = Series(Categorical(["a", "b", "c", "c"]))`
			`df3 = DataFrame({"cat": cat, "s": ["a", "b", "c", "c"]})`
			`result = df3.describe()`
			`tm.assert_numpy_array_equal(result["cat"].values, result["s"].values)`

			`def test_describe_empty_categorical_column(self):`
			`# GH#26397`
			`# Ensure the index of an empty categorical DataFrame column`
			`# also contains (count, unique, top, freq)`
			`df = DataFrame({"empty_col": Categorical([])})`
			`result = df.describe()`
			`expected = DataFrame(`
			`{"empty_col": [0, 0, np.nan, np.nan]},`
			`index=["count", "unique", "top", "freq"],`
			`dtype="object",`
			`)`
			`tm.assert_frame_equal(result, expected)`
			`# ensure NaN, not None`
			`assert np.isnan(result.iloc[2, 0])`
			`assert np.isnan(result.iloc[3, 0])`

			`def test_describe_categorical_columns(self):`
			`# GH#11558`
			`columns = pd.CategoricalIndex(["int1", "int2", "obj"], ordered=True, name="XXX")`
			`df = DataFrame(`
			`{`
			`"int1": [10, 20, 30, 40, 50],`
			`"int2": [10, 20, 30, 40, 50],`
			`"obj": ["A", 0, None, "X", 1],`
			`},`
			`columns=columns,`
			`)`
			`result = df.describe()`

			`exp_columns = pd.CategoricalIndex(`
			`["int1", "int2"],`
			`categories=["int1", "int2", "obj"],`
			`ordered=True,`
			`name="XXX",`
			`)`
			`expected = DataFrame(`
			`{`
			`"int1": [5, 30, df.int1.std(), 10, 20, 30, 40, 50],`
			`"int2": [5, 30, df.int2.std(), 10, 20, 30, 40, 50],`
			`},`
			`index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],`
			`columns=exp_columns,`
			`)`

			`tm.assert_frame_equal(result, expected)`
			`tm.assert_categorical_equal(result.columns.values, expected.columns.values)`

			`def test_describe_datetime_columns(self):`
			`columns = pd.DatetimeIndex(`
			`["2011-01-01", "2011-02-01", "2011-03-01"],`
			`freq="MS",`
			`tz="US/Eastern",`
			`name="XXX",`
			`)`
			`df = DataFrame(`
			`{`
			`0: [10, 20, 30, 40, 50],`
			`1: [10, 20, 30, 40, 50],`
			`2: ["A", 0, None, "X", 1],`
			`}`
			`)`
			`df.columns = columns`
			`result = df.describe()`

			`exp_columns = pd.DatetimeIndex(`
			`["2011-01-01", "2011-02-01"], freq="MS", tz="US/Eastern", name="XXX"`
			`)`
			`expected = DataFrame(`
			`{`
			`0: [5, 30, df.iloc[:, 0].std(), 10, 20, 30, 40, 50],`
			`1: [5, 30, df.iloc[:, 1].std(), 10, 20, 30, 40, 50],`
			`},`
			`index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],`
			`)`
			`expected.columns = exp_columns`
			`tm.assert_frame_equal(result, expected)`
			`assert result.columns.freq == "MS"`
			`assert result.columns.tz == expected.columns.tz`

			`def test_describe_timedelta_values(self):`
			`# GH#6145`
			`t1 = pd.timedelta_range("1 days", freq="D", periods=5)`
			`t2 = pd.timedelta_range("1 hours", freq="H", periods=5)`
			`df = DataFrame({"t1": t1, "t2": t2})`

			`expected = DataFrame(`
			`{`
			`"t1": [`
			`5,`
			`pd.Timedelta("3 days"),`
			`df.iloc[:, 0].std(),`
			`pd.Timedelta("1 days"),`
			`pd.Timedelta("2 days"),`
			`pd.Timedelta("3 days"),`
			`pd.Timedelta("4 days"),`
			`pd.Timedelta("5 days"),`
			`],`
			`"t2": [`
			`5,`
			`pd.Timedelta("3 hours"),`
			`df.iloc[:, 1].std(),`
			`pd.Timedelta("1 hours"),`
			`pd.Timedelta("2 hours"),`
			`pd.Timedelta("3 hours"),`
			`pd.Timedelta("4 hours"),`
			`pd.Timedelta("5 hours"),`
			`],`
			`},`
			`index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],`
			`)`

			`result = df.describe()`
			`tm.assert_frame_equal(result, expected)`

			`exp_repr = (`
			`" t1 t2\n"`
			`"count 5 5\n"`
			`"mean 3 days 00:00:00 0 days 03:00:00\n"`
			`"std 1 days 13:56:50.394919273 0 days 01:34:52.099788303\n"`
			`"min 1 days 00:00:00 0 days 01:00:00\n"`
			`"25% 2 days 00:00:00 0 days 02:00:00\n"`
			`"50% 3 days 00:00:00 0 days 03:00:00\n"`
			`"75% 4 days 00:00:00 0 days 04:00:00\n"`
			`"max 5 days 00:00:00 0 days 05:00:00"`
			`)`
			`assert repr(result) == exp_repr`

			`def test_describe_tz_values(self, tz_naive_fixture):`
			`# GH#21332`
			`tz = tz_naive_fixture`
			`s1 = Series(range(5))`
			`start = Timestamp(2018, 1, 1)`
			`end = Timestamp(2018, 1, 5)`
			`s2 = Series(date_range(start, end, tz=tz))`
			`df = DataFrame({"s1": s1, "s2": s2})`

			`expected = DataFrame(`
			`{`
			`"s1": [5, 2, 0, 1, 2, 3, 4, 1.581139],`
			`"s2": [`
			`5,`
			`Timestamp(2018, 1, 3).tz_localize(tz),`
			`start.tz_localize(tz),`
			`s2[1],`
			`s2[2],`
			`s2[3],`
			`end.tz_localize(tz),`
			`np.nan,`
			`],`
			`},`
			`index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"],`
			`)`
			`result = df.describe(include="all", datetime_is_numeric=True)`
			`tm.assert_frame_equal(result, expected)`

			`def test_datetime_is_numeric_includes_datetime(self):`
			`df = DataFrame({"a": pd.date_range("2012", periods=3), "b": [1, 2, 3]})`
			`result = df.describe(datetime_is_numeric=True)`
			`expected = DataFrame(`
			`{`
			`"a": [`
			`3,`
			`Timestamp("2012-01-02"),`
			`Timestamp("2012-01-01"),`
			`Timestamp("2012-01-01T12:00:00"),`
			`Timestamp("2012-01-02"),`
			`Timestamp("2012-01-02T12:00:00"),`
			`Timestamp("2012-01-03"),`
			`np.nan,`
			`],`
			`"b": [3, 2, 1, 1.5, 2, 2.5, 3, 1],`
			`},`
			`index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"],`
			`)`
			`tm.assert_frame_equal(result, expected)`

			`def test_describe_tz_values2(self):`
			`tz = "CET"`
			`s1 = Series(range(5))`
			`start = Timestamp(2018, 1, 1)`
			`end = Timestamp(2018, 1, 5)`
			`s2 = Series(date_range(start, end, tz=tz))`
			`df = DataFrame({"s1": s1, "s2": s2})`

			`s1_ = s1.describe()`
			`s2_ = Series(`
			`[`
			`5,`
			`5,`
			`s2.value_counts().index[0],`
			`1,`
			`start.tz_localize(tz),`
			`end.tz_localize(tz),`
			`],`
			`index=["count", "unique", "top", "freq", "first", "last"],`
			`)`
			`idx = [`
			`"count",`
			`"unique",`
			`"top",`
			`"freq",`
			`"first",`
			`"last",`
			`"mean",`
			`"std",`
			`"min",`
			`"25%",`
			`"50%",`
			`"75%",`
			`"max",`
			`]`
			`expected = pd.concat([s1_, s2_], axis=1, keys=["s1", "s2"]).loc[idx]`

			`with tm.assert_produces_warning(FutureWarning):`
			`result = df.describe(include="all")`
			`tm.assert_frame_equal(result, expected)`

			`def test_describe_percentiles_integer_idx(self):`
			`# GH#26660`
			`df = DataFrame({"x": [1]})`
			`pct = np.linspace(0, 1, 10 + 1)`
			`result = df.describe(percentiles=pct)`

			`expected = DataFrame(`
			`{"x": [1.0, 1.0, np.NaN, 1.0, *[1.0 for _ in pct], 1.0]},`
			`index=[`
			`"count",`
			`"mean",`
			`"std",`
			`"min",`
			`"0%",`
			`"10%",`
			`"20%",`
			`"30%",`
			`"40%",`
			`"50%",`
			`"60%",`
			`"70%",`
			`"80%",`
			`"90%",`
			`"100%",`
			`"max",`
			`],`
			`)`
			`tm.assert_frame_equal(result, expected)`