186 lines
5.5 KiB
Python
186 lines
5.5 KiB
Python
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
from pandas import (
|
||
|
DataFrame,
|
||
|
Series,
|
||
|
date_range,
|
||
|
)
|
||
|
import pandas._testing as tm
|
||
|
from pandas.tests.copy_view.util import get_array
|
||
|
|
||
|
# -----------------------------------------------------------------------------
|
||
|
# Copy/view behaviour for accessing underlying array of Series/DataFrame
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"method",
|
||
|
[lambda ser: ser.values, lambda ser: np.asarray(ser)],
|
||
|
ids=["values", "asarray"],
|
||
|
)
|
||
|
def test_series_values(using_copy_on_write, method):
|
||
|
ser = Series([1, 2, 3], name="name")
|
||
|
ser_orig = ser.copy()
|
||
|
|
||
|
arr = method(ser)
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
# .values still gives a view but is read-only
|
||
|
assert np.shares_memory(arr, get_array(ser, "name"))
|
||
|
assert arr.flags.writeable is False
|
||
|
|
||
|
# mutating series through arr therefore doesn't work
|
||
|
with pytest.raises(ValueError, match="read-only"):
|
||
|
arr[0] = 0
|
||
|
tm.assert_series_equal(ser, ser_orig)
|
||
|
|
||
|
# mutating the series itself still works
|
||
|
ser.iloc[0] = 0
|
||
|
assert ser.values[0] == 0
|
||
|
else:
|
||
|
assert arr.flags.writeable is True
|
||
|
arr[0] = 0
|
||
|
assert ser.iloc[0] == 0
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"method",
|
||
|
[lambda df: df.values, lambda df: np.asarray(df)],
|
||
|
ids=["values", "asarray"],
|
||
|
)
|
||
|
def test_dataframe_values(using_copy_on_write, using_array_manager, method):
|
||
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||
|
df_orig = df.copy()
|
||
|
|
||
|
arr = method(df)
|
||
|
|
||
|
if using_copy_on_write:
|
||
|
# .values still gives a view but is read-only
|
||
|
assert np.shares_memory(arr, get_array(df, "a"))
|
||
|
assert arr.flags.writeable is False
|
||
|
|
||
|
# mutating series through arr therefore doesn't work
|
||
|
with pytest.raises(ValueError, match="read-only"):
|
||
|
arr[0, 0] = 0
|
||
|
tm.assert_frame_equal(df, df_orig)
|
||
|
|
||
|
# mutating the series itself still works
|
||
|
df.iloc[0, 0] = 0
|
||
|
assert df.values[0, 0] == 0
|
||
|
else:
|
||
|
assert arr.flags.writeable is True
|
||
|
arr[0, 0] = 0
|
||
|
if not using_array_manager:
|
||
|
assert df.iloc[0, 0] == 0
|
||
|
else:
|
||
|
tm.assert_frame_equal(df, df_orig)
|
||
|
|
||
|
|
||
|
def test_series_to_numpy(using_copy_on_write):
|
||
|
ser = Series([1, 2, 3], name="name")
|
||
|
ser_orig = ser.copy()
|
||
|
|
||
|
# default: copy=False, no dtype or NAs
|
||
|
arr = ser.to_numpy()
|
||
|
if using_copy_on_write:
|
||
|
# to_numpy still gives a view but is read-only
|
||
|
assert np.shares_memory(arr, get_array(ser, "name"))
|
||
|
assert arr.flags.writeable is False
|
||
|
|
||
|
# mutating series through arr therefore doesn't work
|
||
|
with pytest.raises(ValueError, match="read-only"):
|
||
|
arr[0] = 0
|
||
|
tm.assert_series_equal(ser, ser_orig)
|
||
|
|
||
|
# mutating the series itself still works
|
||
|
ser.iloc[0] = 0
|
||
|
assert ser.values[0] == 0
|
||
|
else:
|
||
|
assert arr.flags.writeable is True
|
||
|
arr[0] = 0
|
||
|
assert ser.iloc[0] == 0
|
||
|
|
||
|
# specify copy=False gives a writeable array
|
||
|
ser = Series([1, 2, 3], name="name")
|
||
|
arr = ser.to_numpy(copy=True)
|
||
|
assert not np.shares_memory(arr, get_array(ser, "name"))
|
||
|
assert arr.flags.writeable is True
|
||
|
|
||
|
# specifying a dtype that already causes a copy also gives a writeable array
|
||
|
ser = Series([1, 2, 3], name="name")
|
||
|
arr = ser.to_numpy(dtype="float64")
|
||
|
assert not np.shares_memory(arr, get_array(ser, "name"))
|
||
|
assert arr.flags.writeable is True
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("order", ["F", "C"])
|
||
|
def test_ravel_read_only(using_copy_on_write, order):
|
||
|
ser = Series([1, 2, 3])
|
||
|
arr = ser.ravel(order=order)
|
||
|
if using_copy_on_write:
|
||
|
assert arr.flags.writeable is False
|
||
|
assert np.shares_memory(get_array(ser), arr)
|
||
|
|
||
|
|
||
|
def test_series_array_ea_dtypes(using_copy_on_write):
|
||
|
ser = Series([1, 2, 3], dtype="Int64")
|
||
|
arr = np.asarray(ser, dtype="int64")
|
||
|
assert np.shares_memory(arr, get_array(ser))
|
||
|
if using_copy_on_write:
|
||
|
assert arr.flags.writeable is False
|
||
|
else:
|
||
|
assert arr.flags.writeable is True
|
||
|
|
||
|
arr = np.asarray(ser)
|
||
|
assert not np.shares_memory(arr, get_array(ser))
|
||
|
assert arr.flags.writeable is True
|
||
|
|
||
|
|
||
|
def test_dataframe_array_ea_dtypes(using_copy_on_write):
|
||
|
df = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
|
||
|
arr = np.asarray(df, dtype="int64")
|
||
|
# TODO: This should be able to share memory, but we are roundtripping
|
||
|
# through object
|
||
|
assert not np.shares_memory(arr, get_array(df, "a"))
|
||
|
assert arr.flags.writeable is True
|
||
|
|
||
|
arr = np.asarray(df)
|
||
|
if using_copy_on_write:
|
||
|
# TODO(CoW): This should be True
|
||
|
assert arr.flags.writeable is False
|
||
|
else:
|
||
|
assert arr.flags.writeable is True
|
||
|
|
||
|
|
||
|
def test_dataframe_array_string_dtype(using_copy_on_write, using_array_manager):
|
||
|
df = DataFrame({"a": ["a", "b"]}, dtype="string")
|
||
|
arr = np.asarray(df)
|
||
|
if not using_array_manager:
|
||
|
assert np.shares_memory(arr, get_array(df, "a"))
|
||
|
if using_copy_on_write:
|
||
|
assert arr.flags.writeable is False
|
||
|
else:
|
||
|
assert arr.flags.writeable is True
|
||
|
|
||
|
|
||
|
def test_dataframe_multiple_numpy_dtypes():
|
||
|
df = DataFrame({"a": [1, 2, 3], "b": 1.5})
|
||
|
arr = np.asarray(df)
|
||
|
assert not np.shares_memory(arr, get_array(df, "a"))
|
||
|
assert arr.flags.writeable is True
|
||
|
|
||
|
|
||
|
def test_values_is_ea(using_copy_on_write):
|
||
|
df = DataFrame({"a": date_range("2012-01-01", periods=3)})
|
||
|
arr = np.asarray(df)
|
||
|
if using_copy_on_write:
|
||
|
assert arr.flags.writeable is False
|
||
|
else:
|
||
|
assert arr.flags.writeable is True
|
||
|
|
||
|
|
||
|
def test_empty_dataframe():
|
||
|
df = DataFrame()
|
||
|
arr = np.asarray(df)
|
||
|
assert arr.flags.writeable is True
|