320 lines
10 KiB
Python
320 lines
10 KiB
Python
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
from pandas import (
|
||
|
NA,
|
||
|
DataFrame,
|
||
|
Interval,
|
||
|
NaT,
|
||
|
Series,
|
||
|
Timestamp,
|
||
|
interval_range,
|
||
|
)
|
||
|
import pandas._testing as tm
|
||
|
from pandas.tests.copy_view.util import get_array
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("method", ["pad", "nearest", "linear"])
|
||
|
def test_interpolate_no_op(using_copy_on_write, method):
|
||
|
df = DataFrame({"a": [1, 2]})
|
||
|
df_orig = df.copy()
|
||
|
|
||
|
result = df.interpolate(method=method)
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||
|
else:
|
||
|
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||
|
|
||
|
result.iloc[0, 0] = 100
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||
|
tm.assert_frame_equal(df, df_orig)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("func", ["ffill", "bfill"])
|
||
|
def test_interp_fill_functions(using_copy_on_write, func):
|
||
|
# Check that these takes the same code paths as interpolate
|
||
|
df = DataFrame({"a": [1, 2]})
|
||
|
df_orig = df.copy()
|
||
|
|
||
|
result = getattr(df, func)()
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||
|
else:
|
||
|
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||
|
|
||
|
result.iloc[0, 0] = 100
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||
|
tm.assert_frame_equal(df, df_orig)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("func", ["ffill", "bfill"])
|
||
|
@pytest.mark.parametrize(
|
||
|
"vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
|
||
|
)
|
||
|
def test_interpolate_triggers_copy(using_copy_on_write, vals, func):
|
||
|
df = DataFrame({"a": vals})
|
||
|
result = getattr(df, func)()
|
||
|
|
||
|
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||
|
if using_copy_on_write:
|
||
|
# Check that we don't have references when triggering a copy
|
||
|
assert result._mgr._has_no_reference(0)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
|
||
|
)
|
||
|
def test_interpolate_inplace_no_reference_no_copy(using_copy_on_write, vals):
|
||
|
df = DataFrame({"a": vals})
|
||
|
arr = get_array(df, "a")
|
||
|
df.interpolate(method="linear", inplace=True)
|
||
|
|
||
|
assert np.shares_memory(arr, get_array(df, "a"))
|
||
|
if using_copy_on_write:
|
||
|
# Check that we don't have references when triggering a copy
|
||
|
assert df._mgr._has_no_reference(0)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
|
||
|
)
|
||
|
def test_interpolate_inplace_with_refs(using_copy_on_write, vals):
|
||
|
df = DataFrame({"a": [1, np.nan, 2]})
|
||
|
df_orig = df.copy()
|
||
|
arr = get_array(df, "a")
|
||
|
view = df[:]
|
||
|
df.interpolate(method="linear", inplace=True)
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
# Check that copy was triggered in interpolate and that we don't
|
||
|
# have any references left
|
||
|
assert not np.shares_memory(arr, get_array(df, "a"))
|
||
|
tm.assert_frame_equal(df_orig, view)
|
||
|
assert df._mgr._has_no_reference(0)
|
||
|
assert view._mgr._has_no_reference(0)
|
||
|
else:
|
||
|
assert np.shares_memory(arr, get_array(df, "a"))
|
||
|
|
||
|
|
||
|
def test_interpolate_cleaned_fill_method(using_copy_on_write):
|
||
|
# Check that "method is set to None" case works correctly
|
||
|
df = DataFrame({"a": ["a", np.nan, "c"], "b": 1})
|
||
|
df_orig = df.copy()
|
||
|
|
||
|
result = df.interpolate(method="asfreq")
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||
|
else:
|
||
|
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||
|
|
||
|
result.iloc[0, 0] = Timestamp("2021-12-31")
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
|
||
|
tm.assert_frame_equal(df, df_orig)
|
||
|
|
||
|
|
||
|
def test_interpolate_object_convert_no_op(using_copy_on_write):
|
||
|
df = DataFrame({"a": ["a", "b", "c"], "b": 1})
|
||
|
arr_a = get_array(df, "a")
|
||
|
df.interpolate(method="pad", inplace=True)
|
||
|
|
||
|
# Now CoW makes a copy, it should not!
|
||
|
if using_copy_on_write:
|
||
|
assert df._mgr._has_no_reference(0)
|
||
|
assert np.shares_memory(arr_a, get_array(df, "a"))
|
||
|
|
||
|
|
||
|
def test_interpolate_object_convert_copies(using_copy_on_write):
|
||
|
df = DataFrame({"a": Series([1, 2], dtype=object), "b": 1})
|
||
|
arr_a = get_array(df, "a")
|
||
|
df.interpolate(method="pad", inplace=True)
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
assert df._mgr._has_no_reference(0)
|
||
|
assert not np.shares_memory(arr_a, get_array(df, "a"))
|
||
|
|
||
|
|
||
|
def test_interpolate_downcast(using_copy_on_write):
|
||
|
df = DataFrame({"a": [1, np.nan, 2.5], "b": 1})
|
||
|
arr_a = get_array(df, "a")
|
||
|
df.interpolate(method="pad", inplace=True, downcast="infer")
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
assert df._mgr._has_no_reference(0)
|
||
|
assert np.shares_memory(arr_a, get_array(df, "a"))
|
||
|
|
||
|
|
||
|
def test_interpolate_downcast_reference_triggers_copy(using_copy_on_write):
|
||
|
df = DataFrame({"a": [1, np.nan, 2.5], "b": 1})
|
||
|
df_orig = df.copy()
|
||
|
arr_a = get_array(df, "a")
|
||
|
view = df[:]
|
||
|
df.interpolate(method="pad", inplace=True, downcast="infer")
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
assert df._mgr._has_no_reference(0)
|
||
|
assert not np.shares_memory(arr_a, get_array(df, "a"))
|
||
|
tm.assert_frame_equal(df_orig, view)
|
||
|
else:
|
||
|
tm.assert_frame_equal(df, view)
|
||
|
|
||
|
|
||
|
def test_fillna(using_copy_on_write):
|
||
|
df = DataFrame({"a": [1.5, np.nan], "b": 1})
|
||
|
df_orig = df.copy()
|
||
|
|
||
|
df2 = df.fillna(5.5)
|
||
|
if using_copy_on_write:
|
||
|
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||
|
else:
|
||
|
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||
|
|
||
|
df2.iloc[0, 1] = 100
|
||
|
tm.assert_frame_equal(df_orig, df)
|
||
|
|
||
|
|
||
|
def test_fillna_dict(using_copy_on_write):
|
||
|
df = DataFrame({"a": [1.5, np.nan], "b": 1})
|
||
|
df_orig = df.copy()
|
||
|
|
||
|
df2 = df.fillna({"a": 100.5})
|
||
|
if using_copy_on_write:
|
||
|
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||
|
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||
|
else:
|
||
|
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||
|
|
||
|
df2.iloc[0, 1] = 100
|
||
|
tm.assert_frame_equal(df_orig, df)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("downcast", [None, False])
|
||
|
def test_fillna_inplace(using_copy_on_write, downcast):
|
||
|
df = DataFrame({"a": [1.5, np.nan], "b": 1})
|
||
|
arr_a = get_array(df, "a")
|
||
|
arr_b = get_array(df, "b")
|
||
|
|
||
|
df.fillna(5.5, inplace=True, downcast=downcast)
|
||
|
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||
|
assert np.shares_memory(get_array(df, "b"), arr_b)
|
||
|
if using_copy_on_write:
|
||
|
assert df._mgr._has_no_reference(0)
|
||
|
assert df._mgr._has_no_reference(1)
|
||
|
|
||
|
|
||
|
def test_fillna_inplace_reference(using_copy_on_write):
|
||
|
df = DataFrame({"a": [1.5, np.nan], "b": 1})
|
||
|
df_orig = df.copy()
|
||
|
arr_a = get_array(df, "a")
|
||
|
arr_b = get_array(df, "b")
|
||
|
view = df[:]
|
||
|
|
||
|
df.fillna(5.5, inplace=True)
|
||
|
if using_copy_on_write:
|
||
|
assert not np.shares_memory(get_array(df, "a"), arr_a)
|
||
|
assert np.shares_memory(get_array(df, "b"), arr_b)
|
||
|
assert view._mgr._has_no_reference(0)
|
||
|
assert df._mgr._has_no_reference(0)
|
||
|
tm.assert_frame_equal(view, df_orig)
|
||
|
else:
|
||
|
assert np.shares_memory(get_array(df, "a"), arr_a)
|
||
|
assert np.shares_memory(get_array(df, "b"), arr_b)
|
||
|
expected = DataFrame({"a": [1.5, 5.5], "b": 1})
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
|
||
|
def test_fillna_interval_inplace_reference(using_copy_on_write):
|
||
|
ser = Series(interval_range(start=0, end=5), name="a")
|
||
|
ser.iloc[1] = np.nan
|
||
|
|
||
|
ser_orig = ser.copy()
|
||
|
view = ser[:]
|
||
|
ser.fillna(value=Interval(left=0, right=5), inplace=True)
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
assert not np.shares_memory(
|
||
|
get_array(ser, "a").left.values, get_array(view, "a").left.values
|
||
|
)
|
||
|
tm.assert_series_equal(view, ser_orig)
|
||
|
else:
|
||
|
assert np.shares_memory(
|
||
|
get_array(ser, "a").left.values, get_array(view, "a").left.values
|
||
|
)
|
||
|
|
||
|
|
||
|
def test_fillna_series_empty_arg(using_copy_on_write):
|
||
|
ser = Series([1, np.nan, 2])
|
||
|
ser_orig = ser.copy()
|
||
|
result = ser.fillna({})
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
assert np.shares_memory(get_array(ser), get_array(result))
|
||
|
else:
|
||
|
assert not np.shares_memory(get_array(ser), get_array(result))
|
||
|
|
||
|
ser.iloc[0] = 100.5
|
||
|
tm.assert_series_equal(ser_orig, result)
|
||
|
|
||
|
|
||
|
def test_fillna_series_empty_arg_inplace(using_copy_on_write):
|
||
|
ser = Series([1, np.nan, 2])
|
||
|
arr = get_array(ser)
|
||
|
ser.fillna({}, inplace=True)
|
||
|
|
||
|
assert np.shares_memory(get_array(ser), arr)
|
||
|
if using_copy_on_write:
|
||
|
assert ser._mgr._has_no_reference(0)
|
||
|
|
||
|
|
||
|
def test_fillna_ea_noop_shares_memory(
|
||
|
using_copy_on_write, any_numeric_ea_and_arrow_dtype
|
||
|
):
|
||
|
df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype)
|
||
|
df_orig = df.copy()
|
||
|
df2 = df.fillna(100)
|
||
|
|
||
|
assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||
|
assert not df2._mgr._has_no_reference(1)
|
||
|
else:
|
||
|
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||
|
|
||
|
tm.assert_frame_equal(df_orig, df)
|
||
|
|
||
|
df2.iloc[0, 1] = 100
|
||
|
if using_copy_on_write:
|
||
|
assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
|
||
|
assert df2._mgr._has_no_reference(1)
|
||
|
assert df._mgr._has_no_reference(1)
|
||
|
tm.assert_frame_equal(df_orig, df)
|
||
|
|
||
|
|
||
|
def test_fillna_inplace_ea_noop_shares_memory(
|
||
|
using_copy_on_write, any_numeric_ea_and_arrow_dtype
|
||
|
):
|
||
|
df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype)
|
||
|
df_orig = df.copy()
|
||
|
view = df[:]
|
||
|
df.fillna(100, inplace=True)
|
||
|
|
||
|
assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
|
||
|
assert not df._mgr._has_no_reference(1)
|
||
|
assert not view._mgr._has_no_reference(1)
|
||
|
else:
|
||
|
assert not np.shares_memory(get_array(df, "b"), get_array(view, "b"))
|
||
|
df.iloc[0, 1] = 100
|
||
|
tm.assert_frame_equal(df_orig, view)
|