import numpy as np import pytest from pandas import ( DataFrame, MultiIndex, Series, ) import pandas._testing as tm from pandas.tests.apply.common import frame_transform_kernels from pandas.tests.frame.common import zip_frames def unpack_obj(obj, klass, axis): """ Helper to ensure we have the right type of object for a test parametrized over frame_or_series. """ if klass is not DataFrame: obj = obj["A"] if axis != 0: pytest.skip(f"Test is only for DataFrame with axis={axis}") return obj def test_transform_ufunc(axis, float_frame, frame_or_series): # GH 35964 obj = unpack_obj(float_frame, frame_or_series, axis) with np.errstate(all="ignore"): f_sqrt = np.sqrt(obj) # ufunc result = obj.transform(np.sqrt, axis=axis) expected = f_sqrt tm.assert_equal(result, expected) @pytest.mark.parametrize( "ops, names", [ ([np.sqrt], ["sqrt"]), ([np.abs, np.sqrt], ["absolute", "sqrt"]), (np.array([np.sqrt]), ["sqrt"]), (np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]), ], ) def test_transform_listlike(axis, float_frame, ops, names): # GH 35964 other_axis = 1 if axis in {0, "index"} else 0 with np.errstate(all="ignore"): expected = zip_frames([op(float_frame) for op in ops], axis=other_axis) if axis in {0, "index"}: expected.columns = MultiIndex.from_product([float_frame.columns, names]) else: expected.index = MultiIndex.from_product([float_frame.index, names]) result = float_frame.transform(ops, axis=axis) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("ops", [[], np.array([])]) def test_transform_empty_listlike(float_frame, ops, frame_or_series): obj = unpack_obj(float_frame, frame_or_series, 0) with pytest.raises(ValueError, match="No transform functions were provided"): obj.transform(ops) @pytest.mark.parametrize("box", [dict, Series]) def test_transform_dictlike(axis, float_frame, box): # GH 35964 if axis in (0, "index"): e = float_frame.columns[0] expected = float_frame[[e]].transform(np.abs) else: e = float_frame.index[0] expected = float_frame.iloc[[0]].transform(np.abs) result = float_frame.transform(box({e: np.abs}), axis=axis) tm.assert_frame_equal(result, expected) def test_transform_dictlike_mixed(): # GH 40018 - mix of lists and non-lists in values of a dictionary df = DataFrame({"a": [1, 2], "b": [1, 4], "c": [1, 4]}) result = df.transform({"b": ["sqrt", "abs"], "c": "sqrt"}) expected = DataFrame( [[1.0, 1, 1.0], [2.0, 4, 2.0]], columns=MultiIndex([("b", "c"), ("sqrt", "abs")], [(0, 0, 1), (0, 1, 0)]), ) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "ops", [ {}, {"A": []}, {"A": [], "B": "cumsum"}, {"A": "cumsum", "B": []}, {"A": [], "B": ["cumsum"]}, {"A": ["cumsum"], "B": []}, ], ) def test_transform_empty_dictlike(float_frame, ops, frame_or_series): obj = unpack_obj(float_frame, frame_or_series, 0) with pytest.raises(ValueError, match="No transform functions were provided"): obj.transform(ops) @pytest.mark.parametrize("use_apply", [True, False]) def test_transform_udf(axis, float_frame, use_apply, frame_or_series): # GH 35964 obj = unpack_obj(float_frame, frame_or_series, axis) # transform uses UDF either via apply or passing the entire DataFrame def func(x): # transform is using apply iff x is not a DataFrame if use_apply == isinstance(x, frame_or_series): # Force transform to fallback raise ValueError return x + 1 result = obj.transform(func, axis=axis) expected = obj + 1 tm.assert_equal(result, expected) wont_fail = ["ffill", "bfill", "fillna", "pad", "backfill", "shift"] frame_kernels_raise = [x for x in frame_transform_kernels if x not in wont_fail] @pytest.mark.parametrize("op", [*frame_kernels_raise, lambda x: x + 1]) def test_transform_bad_dtype(op, frame_or_series, request): # GH 35964 if op == "ngroup": request.node.add_marker( pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame") ) obj = DataFrame({"A": 3 * [object]}) # DataFrame that will fail on most transforms obj = tm.get_obj(obj, frame_or_series) error = TypeError msg = "|".join( [ "not supported between instances of 'type' and 'type'", "unsupported operand type", ] ) with pytest.raises(error, match=msg): obj.transform(op) with pytest.raises(error, match=msg): obj.transform([op]) with pytest.raises(error, match=msg): obj.transform({"A": op}) with pytest.raises(error, match=msg): obj.transform({"A": [op]}) @pytest.mark.parametrize("op", frame_kernels_raise) def test_transform_failure_typeerror(request, op): # GH 35964 if op == "ngroup": request.node.add_marker( pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame") ) # Using object makes most transform kernels fail df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]}) error = TypeError msg = "|".join( [ "not supported between instances of 'type' and 'type'", "unsupported operand type", ] ) with pytest.raises(error, match=msg): df.transform([op]) with pytest.raises(error, match=msg): df.transform({"A": op, "B": op}) with pytest.raises(error, match=msg): df.transform({"A": [op], "B": [op]}) with pytest.raises(error, match=msg): df.transform({"A": [op, "shift"], "B": [op]}) def test_transform_failure_valueerror(): # GH 40211 def op(x): if np.sum(np.sum(x)) < 10: raise ValueError return x df = DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]}) msg = "Transform function failed" with pytest.raises(ValueError, match=msg): df.transform([op]) with pytest.raises(ValueError, match=msg): df.transform({"A": op, "B": op}) with pytest.raises(ValueError, match=msg): df.transform({"A": [op], "B": [op]}) with pytest.raises(ValueError, match=msg): df.transform({"A": [op, "shift"], "B": [op]}) @pytest.mark.parametrize("use_apply", [True, False]) def test_transform_passes_args(use_apply, frame_or_series): # GH 35964 # transform uses UDF either via apply or passing the entire DataFrame expected_args = [1, 2] expected_kwargs = {"c": 3} def f(x, a, b, c): # transform is using apply iff x is not a DataFrame if use_apply == isinstance(x, frame_or_series): # Force transform to fallback raise ValueError assert [a, b] == expected_args assert c == expected_kwargs["c"] return x frame_or_series([1]).transform(f, 0, *expected_args, **expected_kwargs) def test_transform_empty_dataframe(): # https://github.com/pandas-dev/pandas/issues/39636 df = DataFrame([], columns=["col1", "col2"]) result = df.transform(lambda x: x + 10) tm.assert_frame_equal(result, df) result = df["col1"].transform(lambda x: x + 10) tm.assert_series_equal(result, df["col1"])