projektAI/venv/Lib/site-packages/pandas/tests/io/formats/test_info.py

from io import StringIO
import re
from string import ascii_uppercase as uppercase
import sys
import textwrap

import numpy as np
import pytest

from pandas.compat import IS64, PYPY

from pandas import (
    CategoricalIndex,
    DataFrame,
    MultiIndex,
    Series,
    date_range,
    option_context,
)


@pytest.fixture
def duplicate_columns_frame():
    """Dataframe with duplicate column names."""
    return DataFrame(np.random.randn(1500, 4), columns=["a", "a", "b", "b"])


def test_info_empty():
    df = DataFrame()
    buf = StringIO()
    df.info(buf=buf)
    result = buf.getvalue()
    expected = textwrap.dedent(
        """\
        <class 'pandas.core.frame.DataFrame'>
        Index: 0 entries
        Empty DataFrame"""
    )
    assert result == expected


def test_info_categorical_column_smoke_test():
    n = 2500
    df = DataFrame({"int64": np.random.randint(100, size=n)})
    df["category"] = Series(
        np.array(list("abcdefghij")).take(np.random.randint(0, 10, size=n))
    ).astype("category")
    df.isna()
    buf = StringIO()
    df.info(buf=buf)

    df2 = df[df["category"] == "d"]
    buf = StringIO()
    df2.info(buf=buf)


@pytest.mark.parametrize(
    "fixture_func_name",
    [
        "int_frame",
        "float_frame",
        "datetime_frame",
        "duplicate_columns_frame",
    ],
)
def test_info_smoke_test(fixture_func_name, request):
    frame = request.getfixturevalue(fixture_func_name)
    buf = StringIO()
    frame.info(buf=buf)
    result = buf.getvalue().splitlines()
    assert len(result) > 10


@pytest.mark.parametrize(
    "num_columns, max_info_columns, verbose",
    [
        (10, 100, True),
        (10, 11, True),
        (10, 10, True),
        (10, 9, False),
        (10, 1, False),
    ],
)
def test_info_default_verbose_selection(num_columns, max_info_columns, verbose):
    frame = DataFrame(np.random.randn(5, num_columns))
    with option_context("display.max_info_columns", max_info_columns):
        io_default = StringIO()
        frame.info(buf=io_default)
        result = io_default.getvalue()

        io_explicit = StringIO()
        frame.info(buf=io_explicit, verbose=verbose)
        expected = io_explicit.getvalue()

        assert result == expected


def test_info_verbose_check_header_separator_body():
    buf = StringIO()
    size = 1001
    start = 5
    frame = DataFrame(np.random.randn(3, size))
    frame.info(verbose=True, buf=buf)

    res = buf.getvalue()
    header = " #     Column  Dtype  \n---    ------  -----  "
    assert header in res

    frame.info(verbose=True, buf=buf)
    buf.seek(0)
    lines = buf.readlines()
    assert len(lines) > 0

    for i, line in enumerate(lines):
        if i >= start and i < start + size:
            line_nr = f" {i - start} "
            assert line.startswith(line_nr)


@pytest.mark.parametrize(
    "size, header_exp, separator_exp, first_line_exp, last_line_exp",
    [
        (
            4,
            " #   Column  Non-Null Count  Dtype  ",
            "---  ------  --------------  -----  ",
            " 0   0       3 non-null      float64",
            " 3   3       3 non-null      float64",
        ),
        (
            11,
            " #   Column  Non-Null Count  Dtype  ",
            "---  ------  --------------  -----  ",
            " 0   0       3 non-null      float64",
            " 10  10      3 non-null      float64",
        ),
        (
            101,
            " #    Column  Non-Null Count  Dtype  ",
            "---   ------  --------------  -----  ",
            " 0    0       3 non-null      float64",
            " 100  100     3 non-null      float64",
        ),
        (
            1001,
            " #     Column  Non-Null Count  Dtype  ",
            "---    ------  --------------  -----  ",
            " 0     0       3 non-null      float64",
            " 1000  1000    3 non-null      float64",
        ),
        (
            10001,
            " #      Column  Non-Null Count  Dtype  ",
            "---     ------  --------------  -----  ",
            " 0      0       3 non-null      float64",
            " 10000  10000   3 non-null      float64",
        ),
    ],
)
def test_info_verbose_with_counts_spacing(
    size, header_exp, separator_exp, first_line_exp, last_line_exp
):
    """Test header column, spacer, first line and last line in verbose mode."""
    frame = DataFrame(np.random.randn(3, size))
    buf = StringIO()
    frame.info(verbose=True, show_counts=True, buf=buf)
    all_lines = buf.getvalue().splitlines()
    # Here table would contain only header, separator and table lines
    # dframe repr, index summary, memory usage and dtypes are excluded
    table = all_lines[3:-2]
    header, separator, first_line, *rest, last_line = table
    assert header == header_exp
    assert separator == separator_exp
    assert first_line == first_line_exp
    assert last_line == last_line_exp


def test_info_memory():
    # https://github.com/pandas-dev/pandas/issues/21056
    df = DataFrame({"a": Series([1, 2], dtype="i8")})
    buf = StringIO()
    df.info(buf=buf)
    result = buf.getvalue()
    bytes = float(df.memory_usage().sum())
    expected = textwrap.dedent(
        f"""\
    <class 'pandas.core.frame.DataFrame'>
    RangeIndex: 2 entries, 0 to 1
    Data columns (total 1 columns):
     #   Column  Non-Null Count  Dtype
    ---  ------  --------------  -----
     0   a       2 non-null      int64
    dtypes: int64(1)
    memory usage: {bytes} bytes
    """
    )
    assert result == expected


def test_info_wide():
    io = StringIO()
    df = DataFrame(np.random.randn(5, 101))
    df.info(buf=io)

    io = StringIO()
    df.info(buf=io, max_cols=101)
    result = io.getvalue()
    assert len(result.splitlines()) > 100

    expected = result
    with option_context("display.max_info_columns", 101):
        io = StringIO()
        df.info(buf=io)
        result = io.getvalue()
        assert result == expected


def test_info_duplicate_columns_shows_correct_dtypes():
    # GH11761
    io = StringIO()
    frame = DataFrame([[1, 2.0]], columns=["a", "a"])
    frame.info(buf=io)
    lines = io.getvalue().splitlines(True)
    assert " 0   a       1 non-null      int64  \n" == lines[5]
    assert " 1   a       1 non-null      float64\n" == lines[6]


def test_info_shows_column_dtypes():
    dtypes = [
        "int64",
        "float64",
        "datetime64[ns]",
        "timedelta64[ns]",
        "complex128",
        "object",
        "bool",
    ]
    data = {}
    n = 10
    for i, dtype in enumerate(dtypes):
        data[i] = np.random.randint(2, size=n).astype(dtype)
    df = DataFrame(data)
    buf = StringIO()
    df.info(buf=buf)
    res = buf.getvalue()
    header = (
        " #   Column  Non-Null Count  Dtype          \n"
        "---  ------  --------------  -----          "
    )
    assert header in res
    for i, dtype in enumerate(dtypes):
        name = f" {i:d}   {i:d}       {n:d} non-null     {dtype}"
        assert name in res


def test_info_max_cols():
    df = DataFrame(np.random.randn(10, 5))
    for len_, verbose in [(5, None), (5, False), (12, True)]:
        # For verbose always      ^ setting  ^ summarize ^ full output
        with option_context("max_info_columns", 4):
            buf = StringIO()
            df.info(buf=buf, verbose=verbose)
            res = buf.getvalue()
            assert len(res.strip().split("\n")) == len_

    for len_, verbose in [(12, None), (5, False), (12, True)]:
        # max_cols not exceeded
        with option_context("max_info_columns", 5):
            buf = StringIO()
            df.info(buf=buf, verbose=verbose)
            res = buf.getvalue()
            assert len(res.strip().split("\n")) == len_

    for len_, max_cols in [(12, 5), (5, 4)]:
        # setting truncates
        with option_context("max_info_columns", 4):
            buf = StringIO()
            df.info(buf=buf, max_cols=max_cols)
            res = buf.getvalue()
            assert len(res.strip().split("\n")) == len_

        # setting wouldn't truncate
        with option_context("max_info_columns", 5):
            buf = StringIO()
            df.info(buf=buf, max_cols=max_cols)
            res = buf.getvalue()
            assert len(res.strip().split("\n")) == len_


def test_info_memory_usage():
    # Ensure memory usage is displayed, when asserted, on the last line
    dtypes = [
        "int64",
        "float64",
        "datetime64[ns]",
        "timedelta64[ns]",
        "complex128",
        "object",
        "bool",
    ]
    data = {}
    n = 10
    for i, dtype in enumerate(dtypes):
        data[i] = np.random.randint(2, size=n).astype(dtype)
    df = DataFrame(data)
    buf = StringIO()

    # display memory usage case
    df.info(buf=buf, memory_usage=True)
    res = buf.getvalue().splitlines()
    assert "memory usage: " in res[-1]

    # do not display memory usage case
    df.info(buf=buf, memory_usage=False)
    res = buf.getvalue().splitlines()
    assert "memory usage: " not in res[-1]

    df.info(buf=buf, memory_usage=True)
    res = buf.getvalue().splitlines()

    # memory usage is a lower bound, so print it as XYZ+ MB
    assert re.match(r"memory usage: [^+]+\+", res[-1])

    df.iloc[:, :5].info(buf=buf, memory_usage=True)
    res = buf.getvalue().splitlines()

    # excluded column with object dtype, so estimate is accurate
    assert not re.match(r"memory usage: [^+]+\+", res[-1])

    # Test a DataFrame with duplicate columns
    dtypes = ["int64", "int64", "int64", "float64"]
    data = {}
    n = 100
    for i, dtype in enumerate(dtypes):
        data[i] = np.random.randint(2, size=n).astype(dtype)
    df = DataFrame(data)
    df.columns = dtypes

    df_with_object_index = DataFrame({"a": [1]}, index=["foo"])
    df_with_object_index.info(buf=buf, memory_usage=True)
    res = buf.getvalue().splitlines()
    assert re.match(r"memory usage: [^+]+\+", res[-1])

    df_with_object_index.info(buf=buf, memory_usage="deep")
    res = buf.getvalue().splitlines()
    assert re.match(r"memory usage: [^+]+$", res[-1])

    # Ensure df size is as expected
    # (cols * rows * bytes) + index size
    df_size = df.memory_usage().sum()
    exp_size = len(dtypes) * n * 8 + df.index.nbytes
    assert df_size == exp_size

    # Ensure number of cols in memory_usage is the same as df
    size_df = np.size(df.columns.values) + 1  # index=True; default
    assert size_df == np.size(df.memory_usage())

    # assert deep works only on object
    assert df.memory_usage().sum() == df.memory_usage(deep=True).sum()

    # test for validity
    DataFrame(1, index=["a"], columns=["A"]).memory_usage(index=True)
    DataFrame(1, index=["a"], columns=["A"]).index.nbytes
    df = DataFrame(
        data=1, index=MultiIndex.from_product([["a"], range(1000)]), columns=["A"]
    )
    df.index.nbytes
    df.memory_usage(index=True)
    df.index.values.nbytes

    mem = df.memory_usage(deep=True).sum()
    assert mem > 0


@pytest.mark.skipif(PYPY, reason="on PyPy deep=True doesn't change result")
def test_info_memory_usage_deep_not_pypy():
    df_with_object_index = DataFrame({"a": [1]}, index=["foo"])
    assert (
        df_with_object_index.memory_usage(index=True, deep=True).sum()
        > df_with_object_index.memory_usage(index=True).sum()
    )

    df_object = DataFrame({"a": ["a"]})
    assert df_object.memory_usage(deep=True).sum() > df_object.memory_usage().sum()


@pytest.mark.skipif(not PYPY, reason="on PyPy deep=True does not change result")
def test_info_memory_usage_deep_pypy():
    df_with_object_index = DataFrame({"a": [1]}, index=["foo"])
    assert (
        df_with_object_index.memory_usage(index=True, deep=True).sum()
        == df_with_object_index.memory_usage(index=True).sum()
    )

    df_object = DataFrame({"a": ["a"]})
    assert df_object.memory_usage(deep=True).sum() == df_object.memory_usage().sum()


@pytest.mark.skipif(PYPY, reason="PyPy getsizeof() fails by design")
def test_usage_via_getsizeof():
    df = DataFrame(
        data=1, index=MultiIndex.from_product([["a"], range(1000)]), columns=["A"]
    )
    mem = df.memory_usage(deep=True).sum()
    # sys.getsizeof will call the .memory_usage with
    # deep=True, and add on some GC overhead
    diff = mem - sys.getsizeof(df)
    assert abs(diff) < 100


def test_info_memory_usage_qualified():
    buf = StringIO()
    df = DataFrame(1, columns=list("ab"), index=[1, 2, 3])
    df.info(buf=buf)
    assert "+" not in buf.getvalue()

    buf = StringIO()
    df = DataFrame(1, columns=list("ab"), index=list("ABC"))
    df.info(buf=buf)
    assert "+" in buf.getvalue()

    buf = StringIO()
    df = DataFrame(
        1, columns=list("ab"), index=MultiIndex.from_product([range(3), range(3)])
    )
    df.info(buf=buf)
    assert "+" not in buf.getvalue()

    buf = StringIO()
    df = DataFrame(
        1, columns=list("ab"), index=MultiIndex.from_product([range(3), ["foo", "bar"]])
    )
    df.info(buf=buf)
    assert "+" in buf.getvalue()


def test_info_memory_usage_bug_on_multiindex():
    # GH 14308
    # memory usage introspection should not materialize .values

    def memory_usage(f):
        return f.memory_usage(deep=True).sum()

    N = 100
    M = len(uppercase)
    index = MultiIndex.from_product(
        [list(uppercase), date_range("20160101", periods=N)],
        names=["id", "date"],
    )
    df = DataFrame({"value": np.random.randn(N * M)}, index=index)

    unstacked = df.unstack("id")
    assert df.values.nbytes == unstacked.values.nbytes
    assert memory_usage(df) > memory_usage(unstacked)

    # high upper bound
    assert memory_usage(unstacked) - memory_usage(df) < 2000


def test_info_categorical():
    # GH14298
    idx = CategoricalIndex(["a", "b"])
    df = DataFrame(np.zeros((2, 2)), index=idx, columns=idx)

    buf = StringIO()
    df.info(buf=buf)


@pytest.mark.xfail(not IS64, reason="GH 36579: fail on 32-bit system")
def test_info_int_columns():
    # GH#37245
    df = DataFrame({1: [1, 2], 2: [2, 3]}, index=["A", "B"])
    buf = StringIO()
    df.info(show_counts=True, buf=buf)
    result = buf.getvalue()
    expected = textwrap.dedent(
        """\
        <class 'pandas.core.frame.DataFrame'>
        Index: 2 entries, A to B
        Data columns (total 2 columns):
         #   Column  Non-Null Count  Dtype
        ---  ------  --------------  -----
         0   1       2 non-null      int64
         1   2       2 non-null      int64
        dtypes: int64(2)
        memory usage: 48.0+ bytes
        """
    )
    assert result == expected
Działa 2021-06-06 22:13:05 +02:00			`from io import StringIO`
			`import re`
			`from string import ascii_uppercase as uppercase`
			`import sys`
			`import textwrap`

			`import numpy as np`
			`import pytest`

			`from pandas.compat import IS64, PYPY`

			`from pandas import (`
			`CategoricalIndex,`
			`DataFrame,`
			`MultiIndex,`
			`Series,`
			`date_range,`
			`option_context,`
			`)`


			`@pytest.fixture`
			`def duplicate_columns_frame():`
			`"""Dataframe with duplicate column names."""`
			`return DataFrame(np.random.randn(1500, 4), columns=["a", "a", "b", "b"])`


			`def test_info_empty():`
			`df = DataFrame()`
			`buf = StringIO()`
			`df.info(buf=buf)`
			`result = buf.getvalue()`
			`expected = textwrap.dedent(`
			`"""\`
			`<class 'pandas.core.frame.DataFrame'>`
			`Index: 0 entries`
			`Empty DataFrame"""`
			`)`
			`assert result == expected`


			`def test_info_categorical_column_smoke_test():`
			`n = 2500`
			`df = DataFrame({"int64": np.random.randint(100, size=n)})`
			`df["category"] = Series(`
			`np.array(list("abcdefghij")).take(np.random.randint(0, 10, size=n))`
			`).astype("category")`
			`df.isna()`
			`buf = StringIO()`
			`df.info(buf=buf)`

			`df2 = df[df["category"] == "d"]`
			`buf = StringIO()`
			`df2.info(buf=buf)`


			`@pytest.mark.parametrize(`
			`"fixture_func_name",`
			`[`
			`"int_frame",`
			`"float_frame",`
			`"datetime_frame",`
			`"duplicate_columns_frame",`
			`],`
			`)`
			`def test_info_smoke_test(fixture_func_name, request):`
			`frame = request.getfixturevalue(fixture_func_name)`
			`buf = StringIO()`
			`frame.info(buf=buf)`
			`result = buf.getvalue().splitlines()`
			`assert len(result) > 10`


			`@pytest.mark.parametrize(`
			`"num_columns, max_info_columns, verbose",`
			`[`
			`(10, 100, True),`
			`(10, 11, True),`
			`(10, 10, True),`
			`(10, 9, False),`
			`(10, 1, False),`
			`],`
			`)`
			`def test_info_default_verbose_selection(num_columns, max_info_columns, verbose):`
			`frame = DataFrame(np.random.randn(5, num_columns))`
			`with option_context("display.max_info_columns", max_info_columns):`
			`io_default = StringIO()`
			`frame.info(buf=io_default)`
			`result = io_default.getvalue()`

			`io_explicit = StringIO()`
			`frame.info(buf=io_explicit, verbose=verbose)`
			`expected = io_explicit.getvalue()`

			`assert result == expected`


			`def test_info_verbose_check_header_separator_body():`
			`buf = StringIO()`
			`size = 1001`
			`start = 5`
			`frame = DataFrame(np.random.randn(3, size))`
			`frame.info(verbose=True, buf=buf)`

			`res = buf.getvalue()`
			`header = " # Column Dtype \n--- ------ ----- "`
			`assert header in res`

			`frame.info(verbose=True, buf=buf)`
			`buf.seek(0)`
			`lines = buf.readlines()`
			`assert len(lines) > 0`

			`for i, line in enumerate(lines):`
			`if i >= start and i < start + size:`
			`line_nr = f" {i - start} "`
			`assert line.startswith(line_nr)`


			`@pytest.mark.parametrize(`
			`"size, header_exp, separator_exp, first_line_exp, last_line_exp",`
			`[`
			`(`
			`4,`
			`" # Column Non-Null Count Dtype ",`
			`"--- ------ -------------- ----- ",`
			`" 0 0 3 non-null float64",`
			`" 3 3 3 non-null float64",`
			`),`
			`(`
			`11,`
			`" # Column Non-Null Count Dtype ",`
			`"--- ------ -------------- ----- ",`
			`" 0 0 3 non-null float64",`
			`" 10 10 3 non-null float64",`
			`),`
			`(`
			`101,`
			`" # Column Non-Null Count Dtype ",`
			`"--- ------ -------------- ----- ",`
			`" 0 0 3 non-null float64",`
			`" 100 100 3 non-null float64",`
			`),`
			`(`
			`1001,`
			`" # Column Non-Null Count Dtype ",`
			`"--- ------ -------------- ----- ",`
			`" 0 0 3 non-null float64",`
			`" 1000 1000 3 non-null float64",`
			`),`
			`(`
			`10001,`
			`" # Column Non-Null Count Dtype ",`
			`"--- ------ -------------- ----- ",`
			`" 0 0 3 non-null float64",`
			`" 10000 10000 3 non-null float64",`
			`),`
			`],`
			`)`
			`def test_info_verbose_with_counts_spacing(`
			`size, header_exp, separator_exp, first_line_exp, last_line_exp`
			`):`
			`"""Test header column, spacer, first line and last line in verbose mode."""`
			`frame = DataFrame(np.random.randn(3, size))`
			`buf = StringIO()`
			`frame.info(verbose=True, show_counts=True, buf=buf)`
			`all_lines = buf.getvalue().splitlines()`
			`# Here table would contain only header, separator and table lines`
			`# dframe repr, index summary, memory usage and dtypes are excluded`
			`table = all_lines[3:-2]`
			`header, separator, first_line, *rest, last_line = table`
			`assert header == header_exp`
			`assert separator == separator_exp`
			`assert first_line == first_line_exp`
			`assert last_line == last_line_exp`


			`def test_info_memory():`
			`# https://github.com/pandas-dev/pandas/issues/21056`
			`df = DataFrame({"a": Series([1, 2], dtype="i8")})`
			`buf = StringIO()`
			`df.info(buf=buf)`
			`result = buf.getvalue()`
			`bytes = float(df.memory_usage().sum())`
			`expected = textwrap.dedent(`
			`f"""\`
			`<class 'pandas.core.frame.DataFrame'>`
			`RangeIndex: 2 entries, 0 to 1`
			`Data columns (total 1 columns):`
			`# Column Non-Null Count Dtype`
			`--- ------ -------------- -----`
			`0 a 2 non-null int64`
			`dtypes: int64(1)`
			`memory usage: {bytes} bytes`
			`"""`
			`)`
			`assert result == expected`


			`def test_info_wide():`
			`io = StringIO()`
			`df = DataFrame(np.random.randn(5, 101))`
			`df.info(buf=io)`

			`io = StringIO()`
			`df.info(buf=io, max_cols=101)`
			`result = io.getvalue()`
			`assert len(result.splitlines()) > 100`

			`expected = result`
			`with option_context("display.max_info_columns", 101):`
			`io = StringIO()`
			`df.info(buf=io)`
			`result = io.getvalue()`
			`assert result == expected`


			`def test_info_duplicate_columns_shows_correct_dtypes():`
			`# GH11761`
			`io = StringIO()`
			`frame = DataFrame([[1, 2.0]], columns=["a", "a"])`
			`frame.info(buf=io)`
			`lines = io.getvalue().splitlines(True)`
			`assert " 0 a 1 non-null int64 \n" == lines[5]`
			`assert " 1 a 1 non-null float64\n" == lines[6]`


			`def test_info_shows_column_dtypes():`
			`dtypes = [`
			`"int64",`
			`"float64",`
			`"datetime64[ns]",`
			`"timedelta64[ns]",`
			`"complex128",`
			`"object",`
			`"bool",`
			`]`
			`data = {}`
			`n = 10`
			`for i, dtype in enumerate(dtypes):`
			`data[i] = np.random.randint(2, size=n).astype(dtype)`
			`df = DataFrame(data)`
			`buf = StringIO()`
			`df.info(buf=buf)`
			`res = buf.getvalue()`
			`header = (`
			`" # Column Non-Null Count Dtype \n"`
			`"--- ------ -------------- ----- "`
			`)`
			`assert header in res`
			`for i, dtype in enumerate(dtypes):`
			`name = f" {i:d} {i:d} {n:d} non-null {dtype}"`
			`assert name in res`


			`def test_info_max_cols():`
			`df = DataFrame(np.random.randn(10, 5))`
			`for len_, verbose in [(5, None), (5, False), (12, True)]:`
			`# For verbose always ^ setting ^ summarize ^ full output`
			`with option_context("max_info_columns", 4):`
			`buf = StringIO()`
			`df.info(buf=buf, verbose=verbose)`
			`res = buf.getvalue()`
			`assert len(res.strip().split("\n")) == len_`

			`for len_, verbose in [(12, None), (5, False), (12, True)]:`
			`# max_cols not exceeded`
			`with option_context("max_info_columns", 5):`
			`buf = StringIO()`
			`df.info(buf=buf, verbose=verbose)`
			`res = buf.getvalue()`
			`assert len(res.strip().split("\n")) == len_`

			`for len_, max_cols in [(12, 5), (5, 4)]:`
			`# setting truncates`
			`with option_context("max_info_columns", 4):`
			`buf = StringIO()`
			`df.info(buf=buf, max_cols=max_cols)`
			`res = buf.getvalue()`
			`assert len(res.strip().split("\n")) == len_`

			`# setting wouldn't truncate`
			`with option_context("max_info_columns", 5):`
			`buf = StringIO()`
			`df.info(buf=buf, max_cols=max_cols)`
			`res = buf.getvalue()`
			`assert len(res.strip().split("\n")) == len_`


			`def test_info_memory_usage():`
			`# Ensure memory usage is displayed, when asserted, on the last line`
			`dtypes = [`
			`"int64",`
			`"float64",`
			`"datetime64[ns]",`
			`"timedelta64[ns]",`
			`"complex128",`
			`"object",`
			`"bool",`
			`]`
			`data = {}`
			`n = 10`
			`for i, dtype in enumerate(dtypes):`
			`data[i] = np.random.randint(2, size=n).astype(dtype)`
			`df = DataFrame(data)`
			`buf = StringIO()`

			`# display memory usage case`
			`df.info(buf=buf, memory_usage=True)`
			`res = buf.getvalue().splitlines()`
			`assert "memory usage: " in res[-1]`

			`# do not display memory usage case`
			`df.info(buf=buf, memory_usage=False)`
			`res = buf.getvalue().splitlines()`
			`assert "memory usage: " not in res[-1]`

			`df.info(buf=buf, memory_usage=True)`
			`res = buf.getvalue().splitlines()`

			`# memory usage is a lower bound, so print it as XYZ+ MB`
			`assert re.match(r"memory usage: [^+]+\+", res[-1])`

			`df.iloc[:, :5].info(buf=buf, memory_usage=True)`
			`res = buf.getvalue().splitlines()`

			`# excluded column with object dtype, so estimate is accurate`
			`assert not re.match(r"memory usage: [^+]+\+", res[-1])`

			`# Test a DataFrame with duplicate columns`
			`dtypes = ["int64", "int64", "int64", "float64"]`
			`data = {}`
			`n = 100`
			`for i, dtype in enumerate(dtypes):`
			`data[i] = np.random.randint(2, size=n).astype(dtype)`
			`df = DataFrame(data)`
			`df.columns = dtypes`

			`df_with_object_index = DataFrame({"a": [1]}, index=["foo"])`
			`df_with_object_index.info(buf=buf, memory_usage=True)`
			`res = buf.getvalue().splitlines()`
			`assert re.match(r"memory usage: [^+]+\+", res[-1])`

			`df_with_object_index.info(buf=buf, memory_usage="deep")`
			`res = buf.getvalue().splitlines()`
			`assert re.match(r"memory usage: [^+]+$", res[-1])`

			`# Ensure df size is as expected`
			`# (cols * rows * bytes) + index size`
			`df_size = df.memory_usage().sum()`
			`exp_size = len(dtypes) * n * 8 + df.index.nbytes`
			`assert df_size == exp_size`

			`# Ensure number of cols in memory_usage is the same as df`
			`size_df = np.size(df.columns.values) + 1 # index=True; default`
			`assert size_df == np.size(df.memory_usage())`

			`# assert deep works only on object`
			`assert df.memory_usage().sum() == df.memory_usage(deep=True).sum()`

			`# test for validity`
			`DataFrame(1, index=["a"], columns=["A"]).memory_usage(index=True)`
			`DataFrame(1, index=["a"], columns=["A"]).index.nbytes`
			`df = DataFrame(`
			`data=1, index=MultiIndex.from_product([["a"], range(1000)]), columns=["A"]`
			`)`
			`df.index.nbytes`
			`df.memory_usage(index=True)`
			`df.index.values.nbytes`

			`mem = df.memory_usage(deep=True).sum()`
			`assert mem > 0`


			`@pytest.mark.skipif(PYPY, reason="on PyPy deep=True doesn't change result")`
			`def test_info_memory_usage_deep_not_pypy():`
			`df_with_object_index = DataFrame({"a": [1]}, index=["foo"])`
			`assert (`
			`df_with_object_index.memory_usage(index=True, deep=True).sum()`
			`> df_with_object_index.memory_usage(index=True).sum()`
			`)`

			`df_object = DataFrame({"a": ["a"]})`
			`assert df_object.memory_usage(deep=True).sum() > df_object.memory_usage().sum()`


			`@pytest.mark.skipif(not PYPY, reason="on PyPy deep=True does not change result")`
			`def test_info_memory_usage_deep_pypy():`
			`df_with_object_index = DataFrame({"a": [1]}, index=["foo"])`
			`assert (`
			`df_with_object_index.memory_usage(index=True, deep=True).sum()`
			`== df_with_object_index.memory_usage(index=True).sum()`
			`)`

			`df_object = DataFrame({"a": ["a"]})`
			`assert df_object.memory_usage(deep=True).sum() == df_object.memory_usage().sum()`


			`@pytest.mark.skipif(PYPY, reason="PyPy getsizeof() fails by design")`
			`def test_usage_via_getsizeof():`
			`df = DataFrame(`
			`data=1, index=MultiIndex.from_product([["a"], range(1000)]), columns=["A"]`
			`)`
			`mem = df.memory_usage(deep=True).sum()`
			`# sys.getsizeof will call the .memory_usage with`
			`# deep=True, and add on some GC overhead`
			`diff = mem - sys.getsizeof(df)`
			`assert abs(diff) < 100`


			`def test_info_memory_usage_qualified():`
			`buf = StringIO()`
			`df = DataFrame(1, columns=list("ab"), index=[1, 2, 3])`
			`df.info(buf=buf)`
			`assert "+" not in buf.getvalue()`

			`buf = StringIO()`
			`df = DataFrame(1, columns=list("ab"), index=list("ABC"))`
			`df.info(buf=buf)`
			`assert "+" in buf.getvalue()`

			`buf = StringIO()`
			`df = DataFrame(`
			`1, columns=list("ab"), index=MultiIndex.from_product([range(3), range(3)])`
			`)`
			`df.info(buf=buf)`
			`assert "+" not in buf.getvalue()`

			`buf = StringIO()`
			`df = DataFrame(`
			`1, columns=list("ab"), index=MultiIndex.from_product([range(3), ["foo", "bar"]])`
			`)`
			`df.info(buf=buf)`
			`assert "+" in buf.getvalue()`


			`def test_info_memory_usage_bug_on_multiindex():`
			`# GH 14308`
			`# memory usage introspection should not materialize .values`

			`def memory_usage(f):`
			`return f.memory_usage(deep=True).sum()`

			`N = 100`
			`M = len(uppercase)`
			`index = MultiIndex.from_product(`
			`[list(uppercase), date_range("20160101", periods=N)],`
			`names=["id", "date"],`
			`)`
			`df = DataFrame({"value": np.random.randn(N * M)}, index=index)`

			`unstacked = df.unstack("id")`
			`assert df.values.nbytes == unstacked.values.nbytes`
			`assert memory_usage(df) > memory_usage(unstacked)`

			`# high upper bound`
			`assert memory_usage(unstacked) - memory_usage(df) < 2000`


			`def test_info_categorical():`
			`# GH14298`
			`idx = CategoricalIndex(["a", "b"])`
			`df = DataFrame(np.zeros((2, 2)), index=idx, columns=idx)`

			`buf = StringIO()`
			`df.info(buf=buf)`


			`@pytest.mark.xfail(not IS64, reason="GH 36579: fail on 32-bit system")`
			`def test_info_int_columns():`
			`# GH#37245`
			`df = DataFrame({1: [1, 2], 2: [2, 3]}, index=["A", "B"])`
			`buf = StringIO()`
			`df.info(show_counts=True, buf=buf)`
			`result = buf.getvalue()`
			`expected = textwrap.dedent(`
			`"""\`
			`<class 'pandas.core.frame.DataFrame'>`
			`Index: 2 entries, A to B`
			`Data columns (total 2 columns):`
			`# Column Non-Null Count Dtype`
			`--- ------ -------------- -----`
			`0 1 2 non-null int64`
			`1 2 2 non-null int64`
			`dtypes: int64(2)`
			`memory usage: 48.0+ bytes`
			`"""`
			`)`
			`assert result == expected`