Inzynierka/Lib/site-packages/pandas/tests/apply/test_frame_transform.py

import numpy as np
import pytest

from pandas import (
    DataFrame,
    MultiIndex,
    Series,
)
import pandas._testing as tm
from pandas.tests.apply.common import frame_transform_kernels
from pandas.tests.frame.common import zip_frames


def unpack_obj(obj, klass, axis):
    """
    Helper to ensure we have the right type of object for a test parametrized
    over frame_or_series.
    """
    if klass is not DataFrame:
        obj = obj["A"]
        if axis != 0:
            pytest.skip(f"Test is only for DataFrame with axis={axis}")
    return obj


def test_transform_ufunc(axis, float_frame, frame_or_series):
    # GH 35964
    obj = unpack_obj(float_frame, frame_or_series, axis)

    with np.errstate(all="ignore"):
        f_sqrt = np.sqrt(obj)

    # ufunc
    result = obj.transform(np.sqrt, axis=axis)
    expected = f_sqrt
    tm.assert_equal(result, expected)


@pytest.mark.parametrize(
    "ops, names",
    [
        ([np.sqrt], ["sqrt"]),
        ([np.abs, np.sqrt], ["absolute", "sqrt"]),
        (np.array([np.sqrt]), ["sqrt"]),
        (np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
    ],
)
def test_transform_listlike(axis, float_frame, ops, names):
    # GH 35964
    other_axis = 1 if axis in {0, "index"} else 0
    with np.errstate(all="ignore"):
        expected = zip_frames([op(float_frame) for op in ops], axis=other_axis)
    if axis in {0, "index"}:
        expected.columns = MultiIndex.from_product([float_frame.columns, names])
    else:
        expected.index = MultiIndex.from_product([float_frame.index, names])
    result = float_frame.transform(ops, axis=axis)
    tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("ops", [[], np.array([])])
def test_transform_empty_listlike(float_frame, ops, frame_or_series):
    obj = unpack_obj(float_frame, frame_or_series, 0)

    with pytest.raises(ValueError, match="No transform functions were provided"):
        obj.transform(ops)


@pytest.mark.parametrize("box", [dict, Series])
def test_transform_dictlike(axis, float_frame, box):
    # GH 35964
    if axis in (0, "index"):
        e = float_frame.columns[0]
        expected = float_frame[[e]].transform(np.abs)
    else:
        e = float_frame.index[0]
        expected = float_frame.iloc[[0]].transform(np.abs)
    result = float_frame.transform(box({e: np.abs}), axis=axis)
    tm.assert_frame_equal(result, expected)


def test_transform_dictlike_mixed():
    # GH 40018 - mix of lists and non-lists in values of a dictionary
    df = DataFrame({"a": [1, 2], "b": [1, 4], "c": [1, 4]})
    result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"})
    expected = DataFrame(
        [[1.0, 1, 1.0], [2.0, 4, 2.0]],
        columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]),
    )
    tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
    "ops",
    [
        {},
        {"A": []},
        {"A": [], "B": "cumsum"},
        {"A": "cumsum", "B": []},
        {"A": [], "B": ["cumsum"]},
        {"A": ["cumsum"], "B": []},
    ],
)
def test_transform_empty_dictlike(float_frame, ops, frame_or_series):
    obj = unpack_obj(float_frame, frame_or_series, 0)

    with pytest.raises(ValueError, match="No transform functions were provided"):
        obj.transform(ops)


@pytest.mark.parametrize("use_apply", [True, False])
def test_transform_udf(axis, float_frame, use_apply, frame_or_series):
    # GH 35964
    obj = unpack_obj(float_frame, frame_or_series, axis)

    # transform uses UDF either via apply or passing the entire DataFrame
    def func(x):
        # transform is using apply iff x is not a DataFrame
        if use_apply == isinstance(x, frame_or_series):
            # Force transform to fallback
            raise ValueError
        return x + 1

    result = obj.transform(func, axis=axis)
    expected = obj + 1
    tm.assert_equal(result, expected)


wont_fail = ["ffill", "bfill", "fillna", "pad", "backfill", "shift"]
frame_kernels_raise = [x for x in frame_transform_kernels if x not in wont_fail]


@pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1])
def test_transform_bad_dtype(op, frame_or_series, request):
    # GH 35964
    if op == "ngroup":
        request.node.add_marker(
            pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
        )

    obj = DataFrame({"A": 3 * [object]})  # DataFrame that will fail on most transforms
    obj = tm.get_obj(obj, frame_or_series)
    error = TypeError
    msg = "|".join(
        [
            "not supported between instances of 'type' and 'type'",
            "unsupported operand type",
        ]
    )

    with pytest.raises(error, match=msg):
        obj.transform(op)
    with pytest.raises(error, match=msg):
        obj.transform([op])
    with pytest.raises(error, match=msg):
        obj.transform({"A": op})
    with pytest.raises(error, match=msg):
        obj.transform({"A": [op]})


@pytest.mark.parametrize("op", frame_kernels_raise)
def test_transform_failure_typeerror(request, op):
    # GH 35964

    if op == "ngroup":
        request.node.add_marker(
            pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
        )

    # Using object makes most transform kernels fail
    df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]})
    error = TypeError
    msg = "|".join(
        [
            "not supported between instances of 'type' and 'type'",
            "unsupported operand type",
        ]
    )

    with pytest.raises(error, match=msg):
        df.transform([op])

    with pytest.raises(error, match=msg):
        df.transform({"A": op, "B": op})

    with pytest.raises(error, match=msg):
        df.transform({"A": [op], "B": [op]})

    with pytest.raises(error, match=msg):
        df.transform({"A": [op, "shift"], "B": [op]})


def test_transform_failure_valueerror():
    # GH 40211
    def op(x):
        if np.sum(np.sum(x)) < 10:
            raise ValueError
        return x

    df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]})
    msg = "Transform function failed"

    with pytest.raises(ValueError, match=msg):
        df.transform([op])

    with pytest.raises(ValueError, match=msg):
        df.transform({"A": op, "B": op})

    with pytest.raises(ValueError, match=msg):
        df.transform({"A": [op], "B": [op]})

    with pytest.raises(ValueError, match=msg):
        df.transform({"A": [op, "shift"], "B": [op]})


@pytest.mark.parametrize("use_apply", [True, False])
def test_transform_passes_args(use_apply, frame_or_series):
    # GH 35964
    # transform uses UDF either via apply or passing the entire DataFrame
    expected_args = [1, 2]
    expected_kwargs = {"c": 3}

    def f(x, a, b, c):
        # transform is using apply iff x is not a DataFrame
        if use_apply == isinstance(x, frame_or_series):
            # Force transform to fallback
            raise ValueError
        assert [a, b] == expected_args
        assert c == expected_kwargs["c"]
        return x

    frame_or_series([1]).transform(f, 0, *expected_args, **expected_kwargs)


def test_transform_empty_dataframe():
    # https://github.com/pandas-dev/pandas/issues/39636
    df = DataFrame([], columns=["col1", "col2"])
    result = df.transform(lambda x: x + 10)
    tm.assert_frame_equal(result, df)

    result = df["col1"].transform(lambda x: x + 10)
    tm.assert_series_equal(result, df["col1"])
first commit 2023-06-02 12:51:02 +02:00			`import numpy as np`
			`import pytest`

			`from pandas import (`
			`DataFrame,`
			`MultiIndex,`
			`Series,`
			`)`
			`import pandas._testing as tm`
			`from pandas.tests.apply.common import frame_transform_kernels`
			`from pandas.tests.frame.common import zip_frames`


			`def unpack_obj(obj, klass, axis):`
			`"""`
			`Helper to ensure we have the right type of object for a test parametrized`
			`over frame_or_series.`
			`"""`
			`if klass is not DataFrame:`
			`obj = obj["A"]`
			`if axis != 0:`
			`pytest.skip(f"Test is only for DataFrame with axis={axis}")`
			`return obj`


			`def test_transform_ufunc(axis, float_frame, frame_or_series):`
			`# GH 35964`
			`obj = unpack_obj(float_frame, frame_or_series, axis)`

			`with np.errstate(all="ignore"):`
			`f_sqrt = np.sqrt(obj)`

			`# ufunc`
			`result = obj.transform(np.sqrt, axis=axis)`
			`expected = f_sqrt`
			`tm.assert_equal(result, expected)`


			`@pytest.mark.parametrize(`
			`"ops, names",`
			`[`
			`([np.sqrt], ["sqrt"]),`
			`([np.abs, np.sqrt], ["absolute", "sqrt"]),`
			`(np.array([np.sqrt]), ["sqrt"]),`
			`(np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),`
			`],`
			`)`
			`def test_transform_listlike(axis, float_frame, ops, names):`
			`# GH 35964`
			`other_axis = 1 if axis in {0, "index"} else 0`
			`with np.errstate(all="ignore"):`
			`expected = zip_frames([op(float_frame) for op in ops], axis=other_axis)`
			`if axis in {0, "index"}:`
			`expected.columns = MultiIndex.from_product([float_frame.columns, names])`
			`else:`
			`expected.index = MultiIndex.from_product([float_frame.index, names])`
			`result = float_frame.transform(ops, axis=axis)`
			`tm.assert_frame_equal(result, expected)`


			`@pytest.mark.parametrize("ops", [[], np.array([])])`
			`def test_transform_empty_listlike(float_frame, ops, frame_or_series):`
			`obj = unpack_obj(float_frame, frame_or_series, 0)`

			`with pytest.raises(ValueError, match="No transform functions were provided"):`
			`obj.transform(ops)`


			`@pytest.mark.parametrize("box", [dict, Series])`
			`def test_transform_dictlike(axis, float_frame, box):`
			`# GH 35964`
			`if axis in (0, "index"):`
			`e = float_frame.columns[0]`
			`expected = float_frame[[e]].transform(np.abs)`
			`else:`
			`e = float_frame.index[0]`
			`expected = float_frame.iloc[[0]].transform(np.abs)`
			`result = float_frame.transform(box({e: np.abs}), axis=axis)`
			`tm.assert_frame_equal(result, expected)`


			`def test_transform_dictlike_mixed():`
			`# GH 40018 - mix of lists and non-lists in values of a dictionary`
			`df = DataFrame({"a": [1, 2], "b": [1, 4], "c": [1, 4]})`
			`result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"})`
			`expected = DataFrame(`
			`[[1.0, 1, 1.0], [2.0, 4, 2.0]],`
			`columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]),`
			`)`
			`tm.assert_frame_equal(result, expected)`


			`@pytest.mark.parametrize(`
			`"ops",`
			`[`
			`{},`
			`{"A": []},`
			`{"A": [], "B": "cumsum"},`
			`{"A": "cumsum", "B": []},`
			`{"A": [], "B": ["cumsum"]},`
			`{"A": ["cumsum"], "B": []},`
			`],`
			`)`
			`def test_transform_empty_dictlike(float_frame, ops, frame_or_series):`
			`obj = unpack_obj(float_frame, frame_or_series, 0)`

			`with pytest.raises(ValueError, match="No transform functions were provided"):`
			`obj.transform(ops)`


			`@pytest.mark.parametrize("use_apply", [True, False])`
			`def test_transform_udf(axis, float_frame, use_apply, frame_or_series):`
			`# GH 35964`
			`obj = unpack_obj(float_frame, frame_or_series, axis)`

			`# transform uses UDF either via apply or passing the entire DataFrame`
			`def func(x):`
			`# transform is using apply iff x is not a DataFrame`
			`if use_apply == isinstance(x, frame_or_series):`
			`# Force transform to fallback`
			`raise ValueError`
			`return x + 1`

			`result = obj.transform(func, axis=axis)`
			`expected = obj + 1`
			`tm.assert_equal(result, expected)`


			`wont_fail = ["ffill", "bfill", "fillna", "pad", "backfill", "shift"]`
			`frame_kernels_raise = [x for x in frame_transform_kernels if x not in wont_fail]`


			`@pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1])`
			`def test_transform_bad_dtype(op, frame_or_series, request):`
			`# GH 35964`
			`if op == "ngroup":`
			`request.node.add_marker(`
			`pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")`
			`)`

			`obj = DataFrame({"A": 3 * [object]}) # DataFrame that will fail on most transforms`
			`obj = tm.get_obj(obj, frame_or_series)`
			`error = TypeError`
			`msg = "\|".join(`
			`[`
			`"not supported between instances of 'type' and 'type'",`
			`"unsupported operand type",`
			`]`
			`)`

			`with pytest.raises(error, match=msg):`
			`obj.transform(op)`
			`with pytest.raises(error, match=msg):`
			`obj.transform([op])`
			`with pytest.raises(error, match=msg):`
			`obj.transform({"A": op})`
			`with pytest.raises(error, match=msg):`
			`obj.transform({"A": [op]})`


			`@pytest.mark.parametrize("op", frame_kernels_raise)`
			`def test_transform_failure_typeerror(request, op):`
			`# GH 35964`

			`if op == "ngroup":`
			`request.node.add_marker(`
			`pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")`
			`)`

			`# Using object makes most transform kernels fail`
			`df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]})`
			`error = TypeError`
			`msg = "\|".join(`
			`[`
			`"not supported between instances of 'type' and 'type'",`
			`"unsupported operand type",`
			`]`
			`)`

			`with pytest.raises(error, match=msg):`
			`df.transform([op])`

			`with pytest.raises(error, match=msg):`
			`df.transform({"A": op, "B": op})`

			`with pytest.raises(error, match=msg):`
			`df.transform({"A": [op], "B": [op]})`

			`with pytest.raises(error, match=msg):`
			`df.transform({"A": [op, "shift"], "B": [op]})`


			`def test_transform_failure_valueerror():`
			`# GH 40211`
			`def op(x):`
			`if np.sum(np.sum(x)) < 10:`
			`raise ValueError`
			`return x`

			`df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]})`
			`msg = "Transform function failed"`

			`with pytest.raises(ValueError, match=msg):`
			`df.transform([op])`

			`with pytest.raises(ValueError, match=msg):`
			`df.transform({"A": op, "B": op})`

			`with pytest.raises(ValueError, match=msg):`
			`df.transform({"A": [op], "B": [op]})`

			`with pytest.raises(ValueError, match=msg):`
			`df.transform({"A": [op, "shift"], "B": [op]})`


			`@pytest.mark.parametrize("use_apply", [True, False])`
			`def test_transform_passes_args(use_apply, frame_or_series):`
			`# GH 35964`
			`# transform uses UDF either via apply or passing the entire DataFrame`
			`expected_args = [1, 2]`
			`expected_kwargs = {"c": 3}`

			`def f(x, a, b, c):`
			`# transform is using apply iff x is not a DataFrame`
			`if use_apply == isinstance(x, frame_or_series):`
			`# Force transform to fallback`
			`raise ValueError`
			`assert [a, b] == expected_args`
			`assert c == expected_kwargs["c"]`
			`return x`

			`frame_or_series([1]).transform(f, 0, expected_args, *expected_kwargs)`


			`def test_transform_empty_dataframe():`
			`# https://github.com/pandas-dev/pandas/issues/39636`
			`df = DataFrame([], columns=["col1", "col2"])`
			`result = df.transform(lambda x: x + 10)`
			`tm.assert_frame_equal(result, df)`

			`result = df["col1"].transform(lambda x: x + 10)`
			`tm.assert_series_equal(result, df["col1"])`