467 lines
12 KiB
Python
467 lines
12 KiB
Python
import numpy as np
|
|
import pytest
|
|
|
|
from pandas.core.dtypes.common import is_integer
|
|
|
|
import pandas as pd
|
|
from pandas import Series, Timestamp, date_range, isna
|
|
import pandas._testing as tm
|
|
|
|
|
|
def test_where_unsafe_int(sint_dtype):
|
|
s = Series(np.arange(10), dtype=sint_dtype)
|
|
mask = s < 5
|
|
|
|
s[mask] = range(2, 7)
|
|
expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype=sint_dtype)
|
|
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
|
|
def test_where_unsafe_float(float_dtype):
|
|
s = Series(np.arange(10), dtype=float_dtype)
|
|
mask = s < 5
|
|
|
|
s[mask] = range(2, 7)
|
|
data = list(range(2, 7)) + list(range(5, 10))
|
|
expected = Series(data, dtype=float_dtype)
|
|
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"dtype,expected_dtype",
|
|
[
|
|
(np.int8, np.float64),
|
|
(np.int16, np.float64),
|
|
(np.int32, np.float64),
|
|
(np.int64, np.float64),
|
|
(np.float32, np.float32),
|
|
(np.float64, np.float64),
|
|
],
|
|
)
|
|
def test_where_unsafe_upcast(dtype, expected_dtype):
|
|
# see gh-9743
|
|
s = Series(np.arange(10), dtype=dtype)
|
|
values = [2.5, 3.5, 4.5, 5.5, 6.5]
|
|
mask = s < 5
|
|
expected = Series(values + list(range(5, 10)), dtype=expected_dtype)
|
|
s[mask] = values
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
|
|
def test_where_unsafe():
|
|
# see gh-9731
|
|
s = Series(np.arange(10), dtype="int64")
|
|
values = [2.5, 3.5, 4.5, 5.5]
|
|
|
|
mask = s > 5
|
|
expected = Series(list(range(6)) + values, dtype="float64")
|
|
|
|
s[mask] = values
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
# see gh-3235
|
|
s = Series(np.arange(10), dtype="int64")
|
|
mask = s < 5
|
|
s[mask] = range(2, 7)
|
|
expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype="int64")
|
|
tm.assert_series_equal(s, expected)
|
|
assert s.dtype == expected.dtype
|
|
|
|
s = Series(np.arange(10), dtype="int64")
|
|
mask = s > 5
|
|
s[mask] = [0] * 4
|
|
expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype="int64")
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
s = Series(np.arange(10))
|
|
mask = s > 5
|
|
|
|
msg = "cannot assign mismatch length to masked array"
|
|
with pytest.raises(ValueError, match=msg):
|
|
s[mask] = [5, 4, 3, 2, 1]
|
|
|
|
with pytest.raises(ValueError, match=msg):
|
|
s[mask] = [0] * 5
|
|
|
|
# dtype changes
|
|
s = Series([1, 2, 3, 4])
|
|
result = s.where(s > 2, np.nan)
|
|
expected = Series([np.nan, np.nan, 3, 4])
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# GH 4667
|
|
# setting with None changes dtype
|
|
s = Series(range(10)).astype(float)
|
|
s[8] = None
|
|
result = s[8]
|
|
assert isna(result)
|
|
|
|
s = Series(range(10)).astype(float)
|
|
s[s > 8] = None
|
|
result = s[isna(s)]
|
|
expected = Series(np.nan, index=[9])
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
|
|
def test_where():
|
|
s = Series(np.random.randn(5))
|
|
cond = s > 0
|
|
|
|
rs = s.where(cond).dropna()
|
|
rs2 = s[cond]
|
|
tm.assert_series_equal(rs, rs2)
|
|
|
|
rs = s.where(cond, -s)
|
|
tm.assert_series_equal(rs, s.abs())
|
|
|
|
rs = s.where(cond)
|
|
assert s.shape == rs.shape
|
|
assert rs is not s
|
|
|
|
# test alignment
|
|
cond = Series([True, False, False, True, False], index=s.index)
|
|
s2 = -(s.abs())
|
|
|
|
expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index)
|
|
rs = s2.where(cond[:3])
|
|
tm.assert_series_equal(rs, expected)
|
|
|
|
expected = s2.abs()
|
|
expected.iloc[0] = s2[0]
|
|
rs = s2.where(cond[:3], -s2)
|
|
tm.assert_series_equal(rs, expected)
|
|
|
|
|
|
def test_where_error():
|
|
s = Series(np.random.randn(5))
|
|
cond = s > 0
|
|
|
|
msg = "Array conditional must be same shape as self"
|
|
with pytest.raises(ValueError, match=msg):
|
|
s.where(1)
|
|
with pytest.raises(ValueError, match=msg):
|
|
s.where(cond[:3].values, -s)
|
|
|
|
# GH 2745
|
|
s = Series([1, 2])
|
|
s[[True, False]] = [0, 1]
|
|
expected = Series([0, 2])
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
# failures
|
|
msg = "cannot assign mismatch length to masked array"
|
|
with pytest.raises(ValueError, match=msg):
|
|
s[[True, False]] = [0, 2, 3]
|
|
msg = (
|
|
"NumPy boolean array indexing assignment cannot assign 0 input "
|
|
"values to the 1 output values where the mask is true"
|
|
)
|
|
with pytest.raises(ValueError, match=msg):
|
|
s[[True, False]] = []
|
|
|
|
|
|
@pytest.mark.parametrize("klass", [list, tuple, np.array, Series])
|
|
def test_where_array_like(klass):
|
|
# see gh-15414
|
|
s = Series([1, 2, 3])
|
|
cond = [False, True, True]
|
|
expected = Series([np.nan, 2, 3])
|
|
|
|
result = s.where(klass(cond))
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"cond",
|
|
[
|
|
[1, 0, 1],
|
|
Series([2, 5, 7]),
|
|
["True", "False", "True"],
|
|
[Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")],
|
|
],
|
|
)
|
|
def test_where_invalid_input(cond):
|
|
# see gh-15414: only boolean arrays accepted
|
|
s = Series([1, 2, 3])
|
|
msg = "Boolean array expected for the condition"
|
|
|
|
with pytest.raises(ValueError, match=msg):
|
|
s.where(cond)
|
|
|
|
msg = "Array conditional must be same shape as self"
|
|
with pytest.raises(ValueError, match=msg):
|
|
s.where([True])
|
|
|
|
|
|
def test_where_ndframe_align():
|
|
msg = "Array conditional must be same shape as self"
|
|
s = Series([1, 2, 3])
|
|
|
|
cond = [True]
|
|
with pytest.raises(ValueError, match=msg):
|
|
s.where(cond)
|
|
|
|
expected = Series([1, np.nan, np.nan])
|
|
|
|
out = s.where(Series(cond))
|
|
tm.assert_series_equal(out, expected)
|
|
|
|
cond = np.array([False, True, False, True])
|
|
with pytest.raises(ValueError, match=msg):
|
|
s.where(cond)
|
|
|
|
expected = Series([np.nan, 2, np.nan])
|
|
|
|
out = s.where(Series(cond))
|
|
tm.assert_series_equal(out, expected)
|
|
|
|
|
|
def test_where_setitem_invalid():
|
|
# GH 2702
|
|
# make sure correct exceptions are raised on invalid list assignment
|
|
|
|
msg = (
|
|
lambda x: f"cannot set using a {x} indexer with a "
|
|
"different length than the value"
|
|
)
|
|
# slice
|
|
s = Series(list("abc"))
|
|
|
|
with pytest.raises(ValueError, match=msg("slice")):
|
|
s[0:3] = list(range(27))
|
|
|
|
s[0:3] = list(range(3))
|
|
expected = Series([0, 1, 2])
|
|
tm.assert_series_equal(s.astype(np.int64), expected)
|
|
|
|
# slice with step
|
|
s = Series(list("abcdef"))
|
|
|
|
with pytest.raises(ValueError, match=msg("slice")):
|
|
s[0:4:2] = list(range(27))
|
|
|
|
s = Series(list("abcdef"))
|
|
s[0:4:2] = list(range(2))
|
|
expected = Series([0, "b", 1, "d", "e", "f"])
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
# neg slices
|
|
s = Series(list("abcdef"))
|
|
|
|
with pytest.raises(ValueError, match=msg("slice")):
|
|
s[:-1] = list(range(27))
|
|
|
|
s[-3:-1] = list(range(2))
|
|
expected = Series(["a", "b", "c", 0, 1, "f"])
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
# list
|
|
s = Series(list("abc"))
|
|
|
|
with pytest.raises(ValueError, match=msg("list-like")):
|
|
s[[0, 1, 2]] = list(range(27))
|
|
|
|
s = Series(list("abc"))
|
|
|
|
with pytest.raises(ValueError, match=msg("list-like")):
|
|
s[[0, 1, 2]] = list(range(2))
|
|
|
|
# scalar
|
|
s = Series(list("abc"))
|
|
s[0] = list(range(10))
|
|
expected = Series([list(range(10)), "b", "c"])
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
|
|
@pytest.mark.parametrize("size", range(2, 6))
|
|
@pytest.mark.parametrize(
|
|
"mask", [[True, False, False, False, False], [True, False], [False]]
|
|
)
|
|
@pytest.mark.parametrize(
|
|
"item", [2.0, np.nan, np.finfo(float).max, np.finfo(float).min]
|
|
)
|
|
# Test numpy arrays, lists and tuples as the input to be
|
|
# broadcast
|
|
@pytest.mark.parametrize(
|
|
"box", [lambda x: np.array([x]), lambda x: [x], lambda x: (x,)]
|
|
)
|
|
def test_broadcast(size, mask, item, box):
|
|
selection = np.resize(mask, size)
|
|
|
|
data = np.arange(size, dtype=float)
|
|
|
|
# Construct the expected series by taking the source
|
|
# data or item based on the selection
|
|
expected = Series(
|
|
[item if use_item else data[i] for i, use_item in enumerate(selection)]
|
|
)
|
|
|
|
s = Series(data)
|
|
s[selection] = box(item)
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
s = Series(data)
|
|
result = s.where(~selection, box(item))
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
s = Series(data)
|
|
result = s.mask(selection, box(item))
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
|
|
def test_where_inplace():
|
|
s = Series(np.random.randn(5))
|
|
cond = s > 0
|
|
|
|
rs = s.copy()
|
|
|
|
rs.where(cond, inplace=True)
|
|
tm.assert_series_equal(rs.dropna(), s[cond])
|
|
tm.assert_series_equal(rs, s.where(cond))
|
|
|
|
rs = s.copy()
|
|
rs.where(cond, -s, inplace=True)
|
|
tm.assert_series_equal(rs, s.where(cond, -s))
|
|
|
|
|
|
def test_where_dups():
|
|
# GH 4550
|
|
# where crashes with dups in index
|
|
s1 = Series(list(range(3)))
|
|
s2 = Series(list(range(3)))
|
|
comb = pd.concat([s1, s2])
|
|
result = comb.where(comb < 2)
|
|
expected = Series([0, 1, np.nan, 0, 1, np.nan], index=[0, 1, 2, 0, 1, 2])
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# GH 4548
|
|
# inplace updating not working with dups
|
|
comb[comb < 1] = 5
|
|
expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2])
|
|
tm.assert_series_equal(comb, expected)
|
|
|
|
comb[comb < 2] += 10
|
|
expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2])
|
|
tm.assert_series_equal(comb, expected)
|
|
|
|
|
|
def test_where_numeric_with_string():
|
|
# GH 9280
|
|
s = Series([1, 2, 3])
|
|
w = s.where(s > 1, "X")
|
|
|
|
assert not is_integer(w[0])
|
|
assert is_integer(w[1])
|
|
assert is_integer(w[2])
|
|
assert isinstance(w[0], str)
|
|
assert w.dtype == "object"
|
|
|
|
w = s.where(s > 1, ["X", "Y", "Z"])
|
|
assert not is_integer(w[0])
|
|
assert is_integer(w[1])
|
|
assert is_integer(w[2])
|
|
assert isinstance(w[0], str)
|
|
assert w.dtype == "object"
|
|
|
|
w = s.where(s > 1, np.array(["X", "Y", "Z"]))
|
|
assert not is_integer(w[0])
|
|
assert is_integer(w[1])
|
|
assert is_integer(w[2])
|
|
assert isinstance(w[0], str)
|
|
assert w.dtype == "object"
|
|
|
|
|
|
def test_where_timedelta_coerce():
|
|
s = Series([1, 2], dtype="timedelta64[ns]")
|
|
expected = Series([10, 10])
|
|
mask = np.array([False, False])
|
|
|
|
rs = s.where(mask, [10, 10])
|
|
tm.assert_series_equal(rs, expected)
|
|
|
|
rs = s.where(mask, 10)
|
|
tm.assert_series_equal(rs, expected)
|
|
|
|
rs = s.where(mask, 10.0)
|
|
tm.assert_series_equal(rs, expected)
|
|
|
|
rs = s.where(mask, [10.0, 10.0])
|
|
tm.assert_series_equal(rs, expected)
|
|
|
|
rs = s.where(mask, [10.0, np.nan])
|
|
expected = Series([10, None], dtype="object")
|
|
tm.assert_series_equal(rs, expected)
|
|
|
|
|
|
def test_where_datetime_conversion():
|
|
s = Series(date_range("20130102", periods=2))
|
|
expected = Series([10, 10])
|
|
mask = np.array([False, False])
|
|
|
|
rs = s.where(mask, [10, 10])
|
|
tm.assert_series_equal(rs, expected)
|
|
|
|
rs = s.where(mask, 10)
|
|
tm.assert_series_equal(rs, expected)
|
|
|
|
rs = s.where(mask, 10.0)
|
|
tm.assert_series_equal(rs, expected)
|
|
|
|
rs = s.where(mask, [10.0, 10.0])
|
|
tm.assert_series_equal(rs, expected)
|
|
|
|
rs = s.where(mask, [10.0, np.nan])
|
|
expected = Series([10, None], dtype="object")
|
|
tm.assert_series_equal(rs, expected)
|
|
|
|
# GH 15701
|
|
timestamps = ["2016-12-31 12:00:04+00:00", "2016-12-31 12:00:04.010000+00:00"]
|
|
s = Series([Timestamp(t) for t in timestamps])
|
|
rs = s.where(Series([False, True]))
|
|
expected = Series([pd.NaT, s[1]])
|
|
tm.assert_series_equal(rs, expected)
|
|
|
|
|
|
def test_where_dt_tz_values(tz_naive_fixture):
|
|
ser1 = Series(
|
|
pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture)
|
|
)
|
|
ser2 = Series(
|
|
pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture)
|
|
)
|
|
mask = Series([True, True, False])
|
|
result = ser1.where(mask, ser2)
|
|
exp = Series(
|
|
pd.DatetimeIndex(["20150101", "20150102", "20160516"], tz=tz_naive_fixture)
|
|
)
|
|
tm.assert_series_equal(exp, result)
|
|
|
|
|
|
def test_where_sparse():
|
|
# GH#17198 make sure we dont get an AttributeError for sp_index
|
|
ser = Series(pd.arrays.SparseArray([1, 2]))
|
|
result = ser.where(ser >= 2, 0)
|
|
expected = Series(pd.arrays.SparseArray([0, 2]))
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
|
|
def test_where_empty_series_and_empty_cond_having_non_bool_dtypes():
|
|
# https://github.com/pandas-dev/pandas/issues/34592
|
|
ser = Series([], dtype=float)
|
|
result = ser.where([])
|
|
tm.assert_series_equal(result, ser)
|
|
|
|
|
|
@pytest.mark.parametrize("klass", [Series, pd.DataFrame])
|
|
def test_where_categorical(klass):
|
|
# https://github.com/pandas-dev/pandas/issues/18888
|
|
exp = klass(
|
|
pd.Categorical(["A", "A", "B", "B", np.nan], categories=["A", "B", "C"]),
|
|
dtype="category",
|
|
)
|
|
df = klass(["A", "A", "B", "B", "C"], dtype="category")
|
|
res = df.where(df != "C")
|
|
tm.assert_equal(exp, res)
|