Inzynierka/Lib/site-packages/pandas/tests/copy_view/test_interp_fillna.py
2023-06-02 12:51:02 +02:00

320 lines
10 KiB
Python

import numpy as np
import pytest
from pandas import (
NA,
DataFrame,
Interval,
NaT,
Series,
Timestamp,
interval_range,
)
import pandas._testing as tm
from pandas.tests.copy_view.util import get_array
@pytest.mark.parametrize("method", ["pad", "nearest", "linear"])
def test_interpolate_no_op(using_copy_on_write, method):
df = DataFrame({"a": [1, 2]})
df_orig = df.copy()
result = df.interpolate(method=method)
if using_copy_on_write:
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
else:
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
result.iloc[0, 0] = 100
if using_copy_on_write:
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
tm.assert_frame_equal(df, df_orig)
@pytest.mark.parametrize("func", ["ffill", "bfill"])
def test_interp_fill_functions(using_copy_on_write, func):
# Check that these takes the same code paths as interpolate
df = DataFrame({"a": [1, 2]})
df_orig = df.copy()
result = getattr(df, func)()
if using_copy_on_write:
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
else:
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
result.iloc[0, 0] = 100
if using_copy_on_write:
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
tm.assert_frame_equal(df, df_orig)
@pytest.mark.parametrize("func", ["ffill", "bfill"])
@pytest.mark.parametrize(
"vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
)
def test_interpolate_triggers_copy(using_copy_on_write, vals, func):
df = DataFrame({"a": vals})
result = getattr(df, func)()
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
if using_copy_on_write:
# Check that we don't have references when triggering a copy
assert result._mgr._has_no_reference(0)
@pytest.mark.parametrize(
"vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
)
def test_interpolate_inplace_no_reference_no_copy(using_copy_on_write, vals):
df = DataFrame({"a": vals})
arr = get_array(df, "a")
df.interpolate(method="linear", inplace=True)
assert np.shares_memory(arr, get_array(df, "a"))
if using_copy_on_write:
# Check that we don't have references when triggering a copy
assert df._mgr._has_no_reference(0)
@pytest.mark.parametrize(
"vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
)
def test_interpolate_inplace_with_refs(using_copy_on_write, vals):
df = DataFrame({"a": [1, np.nan, 2]})
df_orig = df.copy()
arr = get_array(df, "a")
view = df[:]
df.interpolate(method="linear", inplace=True)
if using_copy_on_write:
# Check that copy was triggered in interpolate and that we don't
# have any references left
assert not np.shares_memory(arr, get_array(df, "a"))
tm.assert_frame_equal(df_orig, view)
assert df._mgr._has_no_reference(0)
assert view._mgr._has_no_reference(0)
else:
assert np.shares_memory(arr, get_array(df, "a"))
def test_interpolate_cleaned_fill_method(using_copy_on_write):
# Check that "method is set to None" case works correctly
df = DataFrame({"a": ["a", np.nan, "c"], "b": 1})
df_orig = df.copy()
result = df.interpolate(method="asfreq")
if using_copy_on_write:
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
else:
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
result.iloc[0, 0] = Timestamp("2021-12-31")
if using_copy_on_write:
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
tm.assert_frame_equal(df, df_orig)
def test_interpolate_object_convert_no_op(using_copy_on_write):
df = DataFrame({"a": ["a", "b", "c"], "b": 1})
arr_a = get_array(df, "a")
df.interpolate(method="pad", inplace=True)
# Now CoW makes a copy, it should not!
if using_copy_on_write:
assert df._mgr._has_no_reference(0)
assert np.shares_memory(arr_a, get_array(df, "a"))
def test_interpolate_object_convert_copies(using_copy_on_write):
df = DataFrame({"a": Series([1, 2], dtype=object), "b": 1})
arr_a = get_array(df, "a")
df.interpolate(method="pad", inplace=True)
if using_copy_on_write:
assert df._mgr._has_no_reference(0)
assert not np.shares_memory(arr_a, get_array(df, "a"))
def test_interpolate_downcast(using_copy_on_write):
df = DataFrame({"a": [1, np.nan, 2.5], "b": 1})
arr_a = get_array(df, "a")
df.interpolate(method="pad", inplace=True, downcast="infer")
if using_copy_on_write:
assert df._mgr._has_no_reference(0)
assert np.shares_memory(arr_a, get_array(df, "a"))
def test_interpolate_downcast_reference_triggers_copy(using_copy_on_write):
df = DataFrame({"a": [1, np.nan, 2.5], "b": 1})
df_orig = df.copy()
arr_a = get_array(df, "a")
view = df[:]
df.interpolate(method="pad", inplace=True, downcast="infer")
if using_copy_on_write:
assert df._mgr._has_no_reference(0)
assert not np.shares_memory(arr_a, get_array(df, "a"))
tm.assert_frame_equal(df_orig, view)
else:
tm.assert_frame_equal(df, view)
def test_fillna(using_copy_on_write):
df = DataFrame({"a": [1.5, np.nan], "b": 1})
df_orig = df.copy()
df2 = df.fillna(5.5)
if using_copy_on_write:
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
else:
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
df2.iloc[0, 1] = 100
tm.assert_frame_equal(df_orig, df)
def test_fillna_dict(using_copy_on_write):
df = DataFrame({"a": [1.5, np.nan], "b": 1})
df_orig = df.copy()
df2 = df.fillna({"a": 100.5})
if using_copy_on_write:
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
else:
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
df2.iloc[0, 1] = 100
tm.assert_frame_equal(df_orig, df)
@pytest.mark.parametrize("downcast", [None, False])
def test_fillna_inplace(using_copy_on_write, downcast):
df = DataFrame({"a": [1.5, np.nan], "b": 1})
arr_a = get_array(df, "a")
arr_b = get_array(df, "b")
df.fillna(5.5, inplace=True, downcast=downcast)
assert np.shares_memory(get_array(df, "a"), arr_a)
assert np.shares_memory(get_array(df, "b"), arr_b)
if using_copy_on_write:
assert df._mgr._has_no_reference(0)
assert df._mgr._has_no_reference(1)
def test_fillna_inplace_reference(using_copy_on_write):
df = DataFrame({"a": [1.5, np.nan], "b": 1})
df_orig = df.copy()
arr_a = get_array(df, "a")
arr_b = get_array(df, "b")
view = df[:]
df.fillna(5.5, inplace=True)
if using_copy_on_write:
assert not np.shares_memory(get_array(df, "a"), arr_a)
assert np.shares_memory(get_array(df, "b"), arr_b)
assert view._mgr._has_no_reference(0)
assert df._mgr._has_no_reference(0)
tm.assert_frame_equal(view, df_orig)
else:
assert np.shares_memory(get_array(df, "a"), arr_a)
assert np.shares_memory(get_array(df, "b"), arr_b)
expected = DataFrame({"a": [1.5, 5.5], "b": 1})
tm.assert_frame_equal(df, expected)
def test_fillna_interval_inplace_reference(using_copy_on_write):
ser = Series(interval_range(start=0, end=5), name="a")
ser.iloc[1] = np.nan
ser_orig = ser.copy()
view = ser[:]
ser.fillna(value=Interval(left=0, right=5), inplace=True)
if using_copy_on_write:
assert not np.shares_memory(
get_array(ser, "a").left.values, get_array(view, "a").left.values
)
tm.assert_series_equal(view, ser_orig)
else:
assert np.shares_memory(
get_array(ser, "a").left.values, get_array(view, "a").left.values
)
def test_fillna_series_empty_arg(using_copy_on_write):
ser = Series([1, np.nan, 2])
ser_orig = ser.copy()
result = ser.fillna({})
if using_copy_on_write:
assert np.shares_memory(get_array(ser), get_array(result))
else:
assert not np.shares_memory(get_array(ser), get_array(result))
ser.iloc[0] = 100.5
tm.assert_series_equal(ser_orig, result)
def test_fillna_series_empty_arg_inplace(using_copy_on_write):
ser = Series([1, np.nan, 2])
arr = get_array(ser)
ser.fillna({}, inplace=True)
assert np.shares_memory(get_array(ser), arr)
if using_copy_on_write:
assert ser._mgr._has_no_reference(0)
def test_fillna_ea_noop_shares_memory(
using_copy_on_write, any_numeric_ea_and_arrow_dtype
):
df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype)
df_orig = df.copy()
df2 = df.fillna(100)
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
if using_copy_on_write:
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
assert not df2._mgr._has_no_reference(1)
else:
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
tm.assert_frame_equal(df_orig, df)
df2.iloc[0, 1] = 100
if using_copy_on_write:
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
assert df2._mgr._has_no_reference(1)
assert df._mgr._has_no_reference(1)
tm.assert_frame_equal(df_orig, df)
def test_fillna_inplace_ea_noop_shares_memory(
using_copy_on_write, any_numeric_ea_and_arrow_dtype
):
df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype)
df_orig = df.copy()
view = df[:]
df.fillna(100, inplace=True)
assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
if using_copy_on_write:
assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
assert not df._mgr._has_no_reference(1)
assert not view._mgr._has_no_reference(1)
else:
assert not np.shares_memory(get_array(df, "b"), get_array(view, "b"))
df.iloc[0, 1] = 100
tm.assert_frame_equal(df_orig, view)