648 lines
23 KiB
Python
648 lines
23 KiB
Python
|
from string import ascii_letters
|
||
|
|
||
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
from pandas.errors import (
|
||
|
SettingWithCopyError,
|
||
|
SettingWithCopyWarning,
|
||
|
)
|
||
|
import pandas.util._test_decorators as td
|
||
|
|
||
|
import pandas as pd
|
||
|
from pandas import (
|
||
|
DataFrame,
|
||
|
Index,
|
||
|
Series,
|
||
|
Timestamp,
|
||
|
date_range,
|
||
|
option_context,
|
||
|
)
|
||
|
import pandas._testing as tm
|
||
|
|
||
|
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
||
|
|
||
|
|
||
|
def random_text(nobs=100):
|
||
|
# Construct a DataFrame where each row is a random slice from 'letters'
|
||
|
idxs = np.random.default_rng(2).integers(len(ascii_letters), size=(nobs, 2))
|
||
|
idxs.sort(axis=1)
|
||
|
strings = [ascii_letters[x[0] : x[1]] for x in idxs]
|
||
|
|
||
|
return DataFrame(strings, columns=["letters"])
|
||
|
|
||
|
|
||
|
class TestCaching:
|
||
|
def test_slice_consolidate_invalidate_item_cache(self, using_copy_on_write):
|
||
|
# this is chained assignment, but will 'work'
|
||
|
with option_context("chained_assignment", None):
|
||
|
# #3970
|
||
|
df = DataFrame({"aa": np.arange(5), "bb": [2.2] * 5})
|
||
|
|
||
|
# Creates a second float block
|
||
|
df["cc"] = 0.0
|
||
|
|
||
|
# caches a reference to the 'bb' series
|
||
|
df["bb"]
|
||
|
|
||
|
# Assignment to wrong series
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df["bb"].iloc[0] = 0.17
|
||
|
df._clear_item_cache()
|
||
|
if not using_copy_on_write:
|
||
|
tm.assert_almost_equal(df["bb"][0], 0.17)
|
||
|
else:
|
||
|
# with ArrayManager, parent is not mutated with chained assignment
|
||
|
tm.assert_almost_equal(df["bb"][0], 2.2)
|
||
|
|
||
|
@pytest.mark.parametrize("do_ref", [True, False])
|
||
|
def test_setitem_cache_updating(self, do_ref):
|
||
|
# GH 5424
|
||
|
cont = ["one", "two", "three", "four", "five", "six", "seven"]
|
||
|
|
||
|
df = DataFrame({"a": cont, "b": cont[3:] + cont[:3], "c": np.arange(7)})
|
||
|
|
||
|
# ref the cache
|
||
|
if do_ref:
|
||
|
df.loc[0, "c"]
|
||
|
|
||
|
# set it
|
||
|
df.loc[7, "c"] = 1
|
||
|
|
||
|
assert df.loc[0, "c"] == 0.0
|
||
|
assert df.loc[7, "c"] == 1.0
|
||
|
|
||
|
def test_setitem_cache_updating_slices(
|
||
|
self, using_copy_on_write, warn_copy_on_write
|
||
|
):
|
||
|
# GH 7084
|
||
|
# not updating cache on series setting with slices
|
||
|
expected = DataFrame(
|
||
|
{"A": [600, 600, 600]}, index=date_range("5/7/2014", "5/9/2014")
|
||
|
)
|
||
|
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
|
||
|
df = DataFrame({"C": ["A", "A", "A"], "D": [100, 200, 300]})
|
||
|
|
||
|
# loop through df to update out
|
||
|
six = Timestamp("5/7/2014")
|
||
|
eix = Timestamp("5/9/2014")
|
||
|
for ix, row in df.iterrows():
|
||
|
out.loc[six:eix, row["C"]] = out.loc[six:eix, row["C"]] + row["D"]
|
||
|
|
||
|
tm.assert_frame_equal(out, expected)
|
||
|
tm.assert_series_equal(out["A"], expected["A"])
|
||
|
|
||
|
# try via a chain indexing
|
||
|
# this actually works
|
||
|
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
|
||
|
out_original = out.copy()
|
||
|
for ix, row in df.iterrows():
|
||
|
v = out[row["C"]][six:eix] + row["D"]
|
||
|
with tm.raises_chained_assignment_error(
|
||
|
(ix == 0) or warn_copy_on_write or using_copy_on_write
|
||
|
):
|
||
|
out[row["C"]][six:eix] = v
|
||
|
|
||
|
if not using_copy_on_write:
|
||
|
tm.assert_frame_equal(out, expected)
|
||
|
tm.assert_series_equal(out["A"], expected["A"])
|
||
|
else:
|
||
|
tm.assert_frame_equal(out, out_original)
|
||
|
tm.assert_series_equal(out["A"], out_original["A"])
|
||
|
|
||
|
out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014"))
|
||
|
for ix, row in df.iterrows():
|
||
|
out.loc[six:eix, row["C"]] += row["D"]
|
||
|
|
||
|
tm.assert_frame_equal(out, expected)
|
||
|
tm.assert_series_equal(out["A"], expected["A"])
|
||
|
|
||
|
def test_altering_series_clears_parent_cache(
|
||
|
self, using_copy_on_write, warn_copy_on_write
|
||
|
):
|
||
|
# GH #33675
|
||
|
df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"])
|
||
|
ser = df["A"]
|
||
|
|
||
|
if using_copy_on_write or warn_copy_on_write:
|
||
|
assert "A" not in df._item_cache
|
||
|
else:
|
||
|
assert "A" in df._item_cache
|
||
|
|
||
|
# Adding a new entry to ser swaps in a new array, so "A" needs to
|
||
|
# be removed from df._item_cache
|
||
|
ser["c"] = 5
|
||
|
assert len(ser) == 3
|
||
|
assert "A" not in df._item_cache
|
||
|
assert df["A"] is not ser
|
||
|
assert len(df["A"]) == 2
|
||
|
|
||
|
|
||
|
class TestChaining:
|
||
|
def test_setitem_chained_setfault(self, using_copy_on_write):
|
||
|
# GH6026
|
||
|
data = ["right", "left", "left", "left", "right", "left", "timeout"]
|
||
|
mdata = ["right", "left", "left", "left", "right", "left", "none"]
|
||
|
|
||
|
df = DataFrame({"response": np.array(data)})
|
||
|
mask = df.response == "timeout"
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df.response[mask] = "none"
|
||
|
if using_copy_on_write:
|
||
|
tm.assert_frame_equal(df, DataFrame({"response": data}))
|
||
|
else:
|
||
|
tm.assert_frame_equal(df, DataFrame({"response": mdata}))
|
||
|
|
||
|
recarray = np.rec.fromarrays([data], names=["response"])
|
||
|
df = DataFrame(recarray)
|
||
|
mask = df.response == "timeout"
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df.response[mask] = "none"
|
||
|
if using_copy_on_write:
|
||
|
tm.assert_frame_equal(df, DataFrame({"response": data}))
|
||
|
else:
|
||
|
tm.assert_frame_equal(df, DataFrame({"response": mdata}))
|
||
|
|
||
|
df = DataFrame({"response": data, "response1": data})
|
||
|
df_original = df.copy()
|
||
|
mask = df.response == "timeout"
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df.response[mask] = "none"
|
||
|
if using_copy_on_write:
|
||
|
tm.assert_frame_equal(df, df_original)
|
||
|
else:
|
||
|
tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data}))
|
||
|
|
||
|
# GH 6056
|
||
|
expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]})
|
||
|
df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])})
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df["A"].iloc[0] = np.nan
|
||
|
if using_copy_on_write:
|
||
|
expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]})
|
||
|
else:
|
||
|
expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]})
|
||
|
result = df.head()
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])})
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df.A.iloc[0] = np.nan
|
||
|
result = df.head()
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
@pytest.mark.arm_slow
|
||
|
def test_detect_chained_assignment(self, using_copy_on_write):
|
||
|
with option_context("chained_assignment", "raise"):
|
||
|
# work with the chain
|
||
|
expected = DataFrame([[-5, 1], [-6, 3]], columns=list("AB"))
|
||
|
df = DataFrame(
|
||
|
np.arange(4).reshape(2, 2), columns=list("AB"), dtype="int64"
|
||
|
)
|
||
|
df_original = df.copy()
|
||
|
assert df._is_copy is None
|
||
|
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df["A"][0] = -5
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df["A"][1] = -6
|
||
|
if using_copy_on_write:
|
||
|
tm.assert_frame_equal(df, df_original)
|
||
|
else:
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
@pytest.mark.arm_slow
|
||
|
def test_detect_chained_assignment_raises(
|
||
|
self, using_array_manager, using_copy_on_write, warn_copy_on_write
|
||
|
):
|
||
|
# test with the chaining
|
||
|
df = DataFrame(
|
||
|
{
|
||
|
"A": Series(range(2), dtype="int64"),
|
||
|
"B": np.array(np.arange(2, 4), dtype=np.float64),
|
||
|
}
|
||
|
)
|
||
|
df_original = df.copy()
|
||
|
assert df._is_copy is None
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df["A"][0] = -5
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df["A"][1] = -6
|
||
|
tm.assert_frame_equal(df, df_original)
|
||
|
elif warn_copy_on_write:
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df["A"][0] = -5
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df["A"][1] = np.nan
|
||
|
elif not using_array_manager:
|
||
|
with pytest.raises(SettingWithCopyError, match=msg):
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df["A"][0] = -5
|
||
|
|
||
|
with pytest.raises(SettingWithCopyError, match=msg):
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df["A"][1] = np.nan
|
||
|
|
||
|
assert df["A"]._is_copy is None
|
||
|
else:
|
||
|
# INFO(ArrayManager) for ArrayManager it doesn't matter that it's
|
||
|
# a mixed dataframe
|
||
|
df["A"][0] = -5
|
||
|
df["A"][1] = -6
|
||
|
expected = DataFrame([[-5, 2], [-6, 3]], columns=list("AB"))
|
||
|
expected["B"] = expected["B"].astype("float64")
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
@pytest.mark.arm_slow
|
||
|
def test_detect_chained_assignment_fails(
|
||
|
self, using_copy_on_write, warn_copy_on_write
|
||
|
):
|
||
|
# Using a copy (the chain), fails
|
||
|
df = DataFrame(
|
||
|
{
|
||
|
"A": Series(range(2), dtype="int64"),
|
||
|
"B": np.array(np.arange(2, 4), dtype=np.float64),
|
||
|
}
|
||
|
)
|
||
|
|
||
|
if using_copy_on_write or warn_copy_on_write:
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df.loc[0]["A"] = -5
|
||
|
else:
|
||
|
with pytest.raises(SettingWithCopyError, match=msg):
|
||
|
df.loc[0]["A"] = -5
|
||
|
|
||
|
@pytest.mark.arm_slow
|
||
|
def test_detect_chained_assignment_doc_example(
|
||
|
self, using_copy_on_write, warn_copy_on_write
|
||
|
):
|
||
|
# Doc example
|
||
|
df = DataFrame(
|
||
|
{
|
||
|
"a": ["one", "one", "two", "three", "two", "one", "six"],
|
||
|
"c": Series(range(7), dtype="int64"),
|
||
|
}
|
||
|
)
|
||
|
assert df._is_copy is None
|
||
|
|
||
|
indexer = df.a.str.startswith("o")
|
||
|
if using_copy_on_write or warn_copy_on_write:
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df[indexer]["c"] = 42
|
||
|
else:
|
||
|
with pytest.raises(SettingWithCopyError, match=msg):
|
||
|
df[indexer]["c"] = 42
|
||
|
|
||
|
@pytest.mark.arm_slow
|
||
|
def test_detect_chained_assignment_object_dtype(
|
||
|
self, using_array_manager, using_copy_on_write, warn_copy_on_write
|
||
|
):
|
||
|
expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]})
|
||
|
df = DataFrame(
|
||
|
{"A": Series(["aaa", "bbb", "ccc"], dtype=object), "B": [1, 2, 3]}
|
||
|
)
|
||
|
df_original = df.copy()
|
||
|
|
||
|
if not using_copy_on_write and not warn_copy_on_write:
|
||
|
with pytest.raises(SettingWithCopyError, match=msg):
|
||
|
df.loc[0]["A"] = 111
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df["A"][0] = 111
|
||
|
tm.assert_frame_equal(df, df_original)
|
||
|
elif warn_copy_on_write:
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df["A"][0] = 111
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
elif not using_array_manager:
|
||
|
with pytest.raises(SettingWithCopyError, match=msg):
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df["A"][0] = 111
|
||
|
|
||
|
df.loc[0, "A"] = 111
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
else:
|
||
|
# INFO(ArrayManager) for ArrayManager it doesn't matter that it's
|
||
|
# a mixed dataframe
|
||
|
df["A"][0] = 111
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
@pytest.mark.arm_slow
|
||
|
def test_detect_chained_assignment_is_copy_pickle(self):
|
||
|
# gh-5475: Make sure that is_copy is picked up reconstruction
|
||
|
df = DataFrame({"A": [1, 2]})
|
||
|
assert df._is_copy is None
|
||
|
|
||
|
with tm.ensure_clean("__tmp__pickle") as path:
|
||
|
df.to_pickle(path)
|
||
|
df2 = pd.read_pickle(path)
|
||
|
df2["B"] = df2["A"]
|
||
|
df2["B"] = df2["A"]
|
||
|
|
||
|
@pytest.mark.arm_slow
|
||
|
def test_detect_chained_assignment_setting_entire_column(self):
|
||
|
# gh-5597: a spurious raise as we are setting the entire column here
|
||
|
|
||
|
df = random_text(100000)
|
||
|
|
||
|
# Always a copy
|
||
|
x = df.iloc[[0, 1, 2]]
|
||
|
assert x._is_copy is not None
|
||
|
|
||
|
x = df.iloc[[0, 1, 2, 4]]
|
||
|
assert x._is_copy is not None
|
||
|
|
||
|
# Explicitly copy
|
||
|
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||
|
df = df.loc[indexer].copy()
|
||
|
|
||
|
assert df._is_copy is None
|
||
|
df["letters"] = df["letters"].apply(str.lower)
|
||
|
|
||
|
@pytest.mark.arm_slow
|
||
|
def test_detect_chained_assignment_implicit_take(self):
|
||
|
# Implicitly take
|
||
|
df = random_text(100000)
|
||
|
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||
|
df = df.loc[indexer]
|
||
|
|
||
|
assert df._is_copy is not None
|
||
|
df["letters"] = df["letters"].apply(str.lower)
|
||
|
|
||
|
@pytest.mark.arm_slow
|
||
|
def test_detect_chained_assignment_implicit_take2(
|
||
|
self, using_copy_on_write, warn_copy_on_write
|
||
|
):
|
||
|
if using_copy_on_write or warn_copy_on_write:
|
||
|
pytest.skip("_is_copy is not always set for CoW")
|
||
|
# Implicitly take 2
|
||
|
df = random_text(100000)
|
||
|
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||
|
|
||
|
df = df.loc[indexer]
|
||
|
assert df._is_copy is not None
|
||
|
df.loc[:, "letters"] = df["letters"].apply(str.lower)
|
||
|
|
||
|
# with the enforcement of #45333 in 2.0, the .loc[:, letters] setting
|
||
|
# is inplace, so df._is_copy remains non-None.
|
||
|
assert df._is_copy is not None
|
||
|
|
||
|
df["letters"] = df["letters"].apply(str.lower)
|
||
|
assert df._is_copy is None
|
||
|
|
||
|
@pytest.mark.arm_slow
|
||
|
def test_detect_chained_assignment_str(self):
|
||
|
df = random_text(100000)
|
||
|
indexer = df.letters.apply(lambda x: len(x) > 10)
|
||
|
df.loc[indexer, "letters"] = df.loc[indexer, "letters"].apply(str.lower)
|
||
|
|
||
|
@pytest.mark.arm_slow
|
||
|
def test_detect_chained_assignment_is_copy(self):
|
||
|
# an identical take, so no copy
|
||
|
df = DataFrame({"a": [1]}).dropna()
|
||
|
assert df._is_copy is None
|
||
|
df["a"] += 1
|
||
|
|
||
|
@pytest.mark.arm_slow
|
||
|
def test_detect_chained_assignment_sorting(self):
|
||
|
df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
|
||
|
ser = df.iloc[:, 0].sort_values()
|
||
|
|
||
|
tm.assert_series_equal(ser, df.iloc[:, 0].sort_values())
|
||
|
tm.assert_series_equal(ser, df[0].sort_values())
|
||
|
|
||
|
@pytest.mark.arm_slow
|
||
|
def test_detect_chained_assignment_false_positives(self):
|
||
|
# see gh-6025: false positives
|
||
|
df = DataFrame({"column1": ["a", "a", "a"], "column2": [4, 8, 9]})
|
||
|
str(df)
|
||
|
|
||
|
df["column1"] = df["column1"] + "b"
|
||
|
str(df)
|
||
|
|
||
|
df = df[df["column2"] != 8]
|
||
|
str(df)
|
||
|
|
||
|
df["column1"] = df["column1"] + "c"
|
||
|
str(df)
|
||
|
|
||
|
@pytest.mark.arm_slow
|
||
|
def test_detect_chained_assignment_undefined_column(
|
||
|
self, using_copy_on_write, warn_copy_on_write
|
||
|
):
|
||
|
# from SO:
|
||
|
# https://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc
|
||
|
df = DataFrame(np.arange(0, 9), columns=["count"])
|
||
|
df["group"] = "b"
|
||
|
df_original = df.copy()
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df.iloc[0:5]["group"] = "a"
|
||
|
tm.assert_frame_equal(df, df_original)
|
||
|
elif warn_copy_on_write:
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df.iloc[0:5]["group"] = "a"
|
||
|
else:
|
||
|
with pytest.raises(SettingWithCopyError, match=msg):
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df.iloc[0:5]["group"] = "a"
|
||
|
|
||
|
@pytest.mark.arm_slow
|
||
|
def test_detect_chained_assignment_changing_dtype(
|
||
|
self, using_array_manager, using_copy_on_write, warn_copy_on_write
|
||
|
):
|
||
|
# Mixed type setting but same dtype & changing dtype
|
||
|
df = DataFrame(
|
||
|
{
|
||
|
"A": date_range("20130101", periods=5),
|
||
|
"B": np.random.default_rng(2).standard_normal(5),
|
||
|
"C": np.arange(5, dtype="int64"),
|
||
|
"D": ["a", "b", "c", "d", "e"],
|
||
|
}
|
||
|
)
|
||
|
df_original = df.copy()
|
||
|
|
||
|
if using_copy_on_write or warn_copy_on_write:
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df.loc[2]["D"] = "foo"
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df.loc[2]["C"] = "foo"
|
||
|
tm.assert_frame_equal(df, df_original)
|
||
|
with tm.raises_chained_assignment_error(extra_warnings=(FutureWarning,)):
|
||
|
df["C"][2] = "foo"
|
||
|
if using_copy_on_write:
|
||
|
tm.assert_frame_equal(df, df_original)
|
||
|
else:
|
||
|
assert df.loc[2, "C"] == "foo"
|
||
|
else:
|
||
|
with pytest.raises(SettingWithCopyError, match=msg):
|
||
|
df.loc[2]["D"] = "foo"
|
||
|
|
||
|
with pytest.raises(SettingWithCopyError, match=msg):
|
||
|
df.loc[2]["C"] = "foo"
|
||
|
|
||
|
if not using_array_manager:
|
||
|
with pytest.raises(SettingWithCopyError, match=msg):
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df["C"][2] = "foo"
|
||
|
else:
|
||
|
# INFO(ArrayManager) for ArrayManager it doesn't matter if it's
|
||
|
# changing the dtype or not
|
||
|
df["C"][2] = "foo"
|
||
|
assert df.loc[2, "C"] == "foo"
|
||
|
|
||
|
def test_setting_with_copy_bug(self, using_copy_on_write, warn_copy_on_write):
|
||
|
# operating on a copy
|
||
|
df = DataFrame(
|
||
|
{"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]}
|
||
|
)
|
||
|
df_original = df.copy()
|
||
|
mask = pd.isna(df.c)
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df[["c"]][mask] = df[["b"]][mask]
|
||
|
tm.assert_frame_equal(df, df_original)
|
||
|
elif warn_copy_on_write:
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df[["c"]][mask] = df[["b"]][mask]
|
||
|
else:
|
||
|
with pytest.raises(SettingWithCopyError, match=msg):
|
||
|
df[["c"]][mask] = df[["b"]][mask]
|
||
|
|
||
|
def test_setting_with_copy_bug_no_warning(self):
|
||
|
# invalid warning as we are returning a new object
|
||
|
# GH 8730
|
||
|
df1 = DataFrame({"x": Series(["a", "b", "c"]), "y": Series(["d", "e", "f"])})
|
||
|
df2 = df1[["x"]]
|
||
|
|
||
|
# this should not raise
|
||
|
df2["y"] = ["g", "h", "i"]
|
||
|
|
||
|
def test_detect_chained_assignment_warnings_errors(
|
||
|
self, using_copy_on_write, warn_copy_on_write
|
||
|
):
|
||
|
df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
|
||
|
if using_copy_on_write or warn_copy_on_write:
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df.loc[0]["A"] = 111
|
||
|
return
|
||
|
|
||
|
with option_context("chained_assignment", "warn"):
|
||
|
with tm.assert_produces_warning(SettingWithCopyWarning):
|
||
|
df.loc[0]["A"] = 111
|
||
|
|
||
|
with option_context("chained_assignment", "raise"):
|
||
|
with pytest.raises(SettingWithCopyError, match=msg):
|
||
|
df.loc[0]["A"] = 111
|
||
|
|
||
|
@pytest.mark.parametrize("rhs", [3, DataFrame({0: [1, 2, 3, 4]})])
|
||
|
def test_detect_chained_assignment_warning_stacklevel(
|
||
|
self, rhs, using_copy_on_write, warn_copy_on_write
|
||
|
):
|
||
|
# GH#42570
|
||
|
df = DataFrame(np.arange(25).reshape(5, 5))
|
||
|
df_original = df.copy()
|
||
|
chained = df.loc[:3]
|
||
|
with option_context("chained_assignment", "warn"):
|
||
|
if not using_copy_on_write and not warn_copy_on_write:
|
||
|
with tm.assert_produces_warning(SettingWithCopyWarning) as t:
|
||
|
chained[2] = rhs
|
||
|
assert t[0].filename == __file__
|
||
|
else:
|
||
|
# INFO(CoW) no warning, and original dataframe not changed
|
||
|
chained[2] = rhs
|
||
|
tm.assert_frame_equal(df, df_original)
|
||
|
|
||
|
# TODO(ArrayManager) fast_xs with array-like scalars is not yet working
|
||
|
@td.skip_array_manager_not_yet_implemented
|
||
|
def test_chained_getitem_with_lists(self):
|
||
|
# GH6394
|
||
|
# Regression in chained getitem indexing with embedded list-like from
|
||
|
# 0.12
|
||
|
|
||
|
df = DataFrame({"A": 5 * [np.zeros(3)], "B": 5 * [np.ones(3)]})
|
||
|
expected = df["A"].iloc[2]
|
||
|
result = df.loc[2, "A"]
|
||
|
tm.assert_numpy_array_equal(result, expected)
|
||
|
result2 = df.iloc[2]["A"]
|
||
|
tm.assert_numpy_array_equal(result2, expected)
|
||
|
result3 = df["A"].loc[2]
|
||
|
tm.assert_numpy_array_equal(result3, expected)
|
||
|
result4 = df["A"].iloc[2]
|
||
|
tm.assert_numpy_array_equal(result4, expected)
|
||
|
|
||
|
def test_cache_updating(self):
|
||
|
# GH 4939, make sure to update the cache on setitem
|
||
|
|
||
|
df = DataFrame(
|
||
|
np.zeros((10, 4)),
|
||
|
columns=Index(list("ABCD"), dtype=object),
|
||
|
)
|
||
|
df["A"] # cache series
|
||
|
df.loc["Hello Friend"] = df.iloc[0]
|
||
|
assert "Hello Friend" in df["A"].index
|
||
|
assert "Hello Friend" in df["B"].index
|
||
|
|
||
|
def test_cache_updating2(self, using_copy_on_write):
|
||
|
# 10264
|
||
|
df = DataFrame(
|
||
|
np.zeros((5, 5), dtype="int64"),
|
||
|
columns=["a", "b", "c", "d", "e"],
|
||
|
index=range(5),
|
||
|
)
|
||
|
df["f"] = 0
|
||
|
df_orig = df.copy()
|
||
|
if using_copy_on_write:
|
||
|
with pytest.raises(ValueError, match="read-only"):
|
||
|
df.f.values[3] = 1
|
||
|
tm.assert_frame_equal(df, df_orig)
|
||
|
return
|
||
|
|
||
|
df.f.values[3] = 1
|
||
|
|
||
|
df.f.values[3] = 2
|
||
|
expected = DataFrame(
|
||
|
np.zeros((5, 6), dtype="int64"),
|
||
|
columns=["a", "b", "c", "d", "e", "f"],
|
||
|
index=range(5),
|
||
|
)
|
||
|
expected.at[3, "f"] = 2
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
expected = Series([0, 0, 0, 2, 0], name="f")
|
||
|
tm.assert_series_equal(df.f, expected)
|
||
|
|
||
|
def test_iloc_setitem_chained_assignment(self, using_copy_on_write):
|
||
|
# GH#3970
|
||
|
with option_context("chained_assignment", None):
|
||
|
df = DataFrame({"aa": range(5), "bb": [2.2] * 5})
|
||
|
df["cc"] = 0.0
|
||
|
|
||
|
ck = [True] * len(df)
|
||
|
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df["bb"].iloc[0] = 0.13
|
||
|
|
||
|
# GH#3970 this lookup used to break the chained setting to 0.15
|
||
|
df.iloc[ck]
|
||
|
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df["bb"].iloc[0] = 0.15
|
||
|
|
||
|
if not using_copy_on_write:
|
||
|
assert df["bb"].iloc[0] == 0.15
|
||
|
else:
|
||
|
assert df["bb"].iloc[0] == 2.2
|
||
|
|
||
|
def test_getitem_loc_assignment_slice_state(self):
|
||
|
# GH 13569
|
||
|
df = DataFrame({"a": [10, 20, 30]})
|
||
|
with tm.raises_chained_assignment_error():
|
||
|
df["a"].loc[4] = 40
|
||
|
tm.assert_frame_equal(df, DataFrame({"a": [10, 20, 30]}))
|
||
|
tm.assert_series_equal(df["a"], Series([10, 20, 30], name="a"))
|