848 lines
23 KiB
Python
848 lines
23 KiB
Python
""" test get/set & misc """
|
|
|
|
from datetime import timedelta
|
|
|
|
import numpy as np
|
|
import pytest
|
|
|
|
from pandas.core.dtypes.common import is_scalar
|
|
|
|
import pandas as pd
|
|
from pandas import (
|
|
Categorical,
|
|
DataFrame,
|
|
IndexSlice,
|
|
MultiIndex,
|
|
Series,
|
|
Timedelta,
|
|
Timestamp,
|
|
date_range,
|
|
period_range,
|
|
timedelta_range,
|
|
)
|
|
import pandas._testing as tm
|
|
|
|
from pandas.tseries.offsets import BDay
|
|
|
|
|
|
def test_basic_indexing():
|
|
s = Series(np.random.randn(5), index=["a", "b", "a", "a", "b"])
|
|
|
|
msg = "index 5 is out of bounds for axis 0 with size 5"
|
|
with pytest.raises(IndexError, match=msg):
|
|
s[5]
|
|
with pytest.raises(IndexError, match=msg):
|
|
s[5] = 0
|
|
|
|
with pytest.raises(KeyError, match=r"^'c'$"):
|
|
s["c"]
|
|
|
|
s = s.sort_index()
|
|
|
|
with pytest.raises(IndexError, match=msg):
|
|
s[5]
|
|
msg = r"index 5 is out of bounds for axis (0|1) with size 5|^5$"
|
|
with pytest.raises(IndexError, match=msg):
|
|
s[5] = 0
|
|
|
|
|
|
def test_basic_getitem_with_labels(datetime_series):
|
|
indices = datetime_series.index[[5, 10, 15]]
|
|
|
|
result = datetime_series[indices]
|
|
expected = datetime_series.reindex(indices)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
result = datetime_series[indices[0] : indices[2]]
|
|
expected = datetime_series.loc[indices[0] : indices[2]]
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# integer indexes, be careful
|
|
s = Series(np.random.randn(10), index=list(range(0, 20, 2)))
|
|
inds = [0, 2, 5, 7, 8]
|
|
arr_inds = np.array([0, 2, 5, 7, 8])
|
|
with pytest.raises(KeyError, match="with any missing labels"):
|
|
s[inds]
|
|
|
|
with pytest.raises(KeyError, match="with any missing labels"):
|
|
s[arr_inds]
|
|
|
|
# GH12089
|
|
# with tz for values
|
|
s = Series(
|
|
pd.date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"]
|
|
)
|
|
expected = Timestamp("2011-01-01", tz="US/Eastern")
|
|
result = s.loc["a"]
|
|
assert result == expected
|
|
result = s.iloc[0]
|
|
assert result == expected
|
|
result = s["a"]
|
|
assert result == expected
|
|
|
|
|
|
def test_getitem_setitem_ellipsis():
|
|
s = Series(np.random.randn(10))
|
|
|
|
np.fix(s)
|
|
|
|
result = s[...]
|
|
tm.assert_series_equal(result, s)
|
|
|
|
s[...] = 5
|
|
assert (result == 5).all()
|
|
|
|
|
|
def test_getitem_get(datetime_series, string_series, object_series):
|
|
idx1 = string_series.index[5]
|
|
idx2 = object_series.index[5]
|
|
|
|
assert string_series[idx1] == string_series.get(idx1)
|
|
assert object_series[idx2] == object_series.get(idx2)
|
|
|
|
assert string_series[idx1] == string_series[5]
|
|
assert object_series[idx2] == object_series[5]
|
|
|
|
assert string_series.get(-1) == string_series.get(string_series.index[-1])
|
|
assert string_series[5] == string_series.get(string_series.index[5])
|
|
|
|
# missing
|
|
d = datetime_series.index[0] - BDay()
|
|
msg = r"Timestamp\('1999-12-31 00:00:00', freq='B'\)"
|
|
with pytest.raises(KeyError, match=msg):
|
|
datetime_series[d]
|
|
|
|
# None
|
|
# GH 5652
|
|
s1 = Series(dtype=object)
|
|
s2 = Series(dtype=object, index=list("abc"))
|
|
for s in [s1, s2]:
|
|
result = s.get(None)
|
|
assert result is None
|
|
|
|
|
|
def test_getitem_fancy(string_series, object_series):
|
|
slice1 = string_series[[1, 2, 3]]
|
|
slice2 = object_series[[1, 2, 3]]
|
|
assert string_series.index[2] == slice1.index[1]
|
|
assert object_series.index[2] == slice2.index[1]
|
|
assert string_series[2] == slice1[1]
|
|
assert object_series[2] == slice2[1]
|
|
|
|
|
|
def test_type_promotion():
|
|
# GH12599
|
|
s = Series(dtype=object)
|
|
s["a"] = Timestamp("2016-01-01")
|
|
s["b"] = 3.0
|
|
s["c"] = "foo"
|
|
expected = Series([Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"])
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"result_1, duplicate_item, expected_1",
|
|
[
|
|
[
|
|
Series({1: 12, 2: [1, 2, 2, 3]}),
|
|
Series({1: 313}),
|
|
Series({1: 12}, dtype=object),
|
|
],
|
|
[
|
|
Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}),
|
|
Series({1: [1, 2, 3]}),
|
|
Series({1: [1, 2, 3]}),
|
|
],
|
|
],
|
|
)
|
|
def test_getitem_with_duplicates_indices(result_1, duplicate_item, expected_1):
|
|
# GH 17610
|
|
result = result_1.append(duplicate_item)
|
|
expected = expected_1.append(duplicate_item)
|
|
tm.assert_series_equal(result[1], expected)
|
|
assert result[2] == result_1[2]
|
|
|
|
|
|
def test_getitem_setitem_integers():
|
|
# caused bug without test
|
|
s = Series([1, 2, 3], ["a", "b", "c"])
|
|
|
|
assert s.iloc[0] == s["a"]
|
|
s.iloc[0] = 5
|
|
tm.assert_almost_equal(s["a"], 5)
|
|
|
|
|
|
def test_getitem_box_float64(datetime_series):
|
|
value = datetime_series[5]
|
|
assert isinstance(value, np.float64)
|
|
|
|
|
|
def test_series_box_timestamp():
|
|
rng = pd.date_range("20090415", "20090519", freq="B")
|
|
ser = Series(rng)
|
|
|
|
assert isinstance(ser[5], Timestamp)
|
|
|
|
rng = pd.date_range("20090415", "20090519", freq="B")
|
|
ser = Series(rng, index=rng)
|
|
assert isinstance(ser[5], Timestamp)
|
|
|
|
assert isinstance(ser.iat[5], Timestamp)
|
|
|
|
|
|
def test_series_box_timedelta():
|
|
rng = pd.timedelta_range("1 day 1 s", periods=5, freq="h")
|
|
ser = Series(rng)
|
|
assert isinstance(ser[0], Timedelta)
|
|
assert isinstance(ser.at[1], Timedelta)
|
|
assert isinstance(ser.iat[2], Timedelta)
|
|
assert isinstance(ser.loc[3], Timedelta)
|
|
assert isinstance(ser.iloc[4], Timedelta)
|
|
|
|
|
|
def test_getitem_ambiguous_keyerror():
|
|
s = Series(range(10), index=list(range(0, 20, 2)))
|
|
with pytest.raises(KeyError, match=r"^1$"):
|
|
s[1]
|
|
with pytest.raises(KeyError, match=r"^1$"):
|
|
s.loc[1]
|
|
|
|
|
|
def test_getitem_unordered_dup():
|
|
obj = Series(range(5), index=["c", "a", "a", "b", "b"])
|
|
assert is_scalar(obj["c"])
|
|
assert obj["c"] == 0
|
|
|
|
|
|
def test_getitem_dups_with_missing():
|
|
# breaks reindex, so need to use .loc internally
|
|
# GH 4246
|
|
s = Series([1, 2, 3, 4], ["foo", "bar", "foo", "bah"])
|
|
with pytest.raises(KeyError, match="with any missing labels"):
|
|
s.loc[["foo", "bar", "bah", "bam"]]
|
|
|
|
with pytest.raises(KeyError, match="with any missing labels"):
|
|
s[["foo", "bar", "bah", "bam"]]
|
|
|
|
|
|
def test_getitem_dups():
|
|
s = Series(range(5), index=["A", "A", "B", "C", "C"], dtype=np.int64)
|
|
expected = Series([3, 4], index=["C", "C"], dtype=np.int64)
|
|
result = s["C"]
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
|
|
def test_setitem_ambiguous_keyerror():
|
|
s = Series(range(10), index=list(range(0, 20, 2)))
|
|
|
|
# equivalent of an append
|
|
s2 = s.copy()
|
|
s2[1] = 5
|
|
expected = s.append(Series([5], index=[1]))
|
|
tm.assert_series_equal(s2, expected)
|
|
|
|
s2 = s.copy()
|
|
s2.loc[1] = 5
|
|
expected = s.append(Series([5], index=[1]))
|
|
tm.assert_series_equal(s2, expected)
|
|
|
|
|
|
def test_setitem(datetime_series, string_series):
|
|
datetime_series[datetime_series.index[5]] = np.NaN
|
|
datetime_series[[1, 2, 17]] = np.NaN
|
|
datetime_series[6] = np.NaN
|
|
assert np.isnan(datetime_series[6])
|
|
assert np.isnan(datetime_series[2])
|
|
datetime_series[np.isnan(datetime_series)] = 5
|
|
assert not np.isnan(datetime_series[2])
|
|
|
|
# caught this bug when writing tests
|
|
series = Series(tm.makeIntIndex(20).astype(float), index=tm.makeIntIndex(20))
|
|
|
|
series[::2] = 0
|
|
assert (series[::2] == 0).all()
|
|
|
|
# set item that's not contained
|
|
s = string_series.copy()
|
|
s["foobar"] = 1
|
|
|
|
app = Series([1], index=["foobar"], name="series")
|
|
expected = string_series.append(app)
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
|
|
def test_setitem_dtypes():
|
|
# change dtypes
|
|
# GH 4463
|
|
expected = Series([np.nan, 2, 3])
|
|
|
|
s = Series([1, 2, 3])
|
|
s.iloc[0] = np.nan
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
s = Series([1, 2, 3])
|
|
s.loc[0] = np.nan
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
s = Series([1, 2, 3])
|
|
s[0] = np.nan
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
s = Series([False])
|
|
s.loc[0] = np.nan
|
|
tm.assert_series_equal(s, Series([np.nan]))
|
|
|
|
s = Series([False, True])
|
|
s.loc[0] = np.nan
|
|
tm.assert_series_equal(s, Series([np.nan, 1.0]))
|
|
|
|
|
|
def test_setslice(datetime_series):
|
|
sl = datetime_series[5:20]
|
|
assert len(sl) == len(sl.index)
|
|
assert sl.index.is_unique is True
|
|
|
|
|
|
def test_loc_setitem_2d_to_1d_raises():
|
|
x = np.random.randn(2, 2)
|
|
y = Series(range(2))
|
|
|
|
msg = "|".join(
|
|
[
|
|
r"shape mismatch: value array of shape \(2,2\)",
|
|
r"cannot reshape array of size 4 into shape \(2,\)",
|
|
]
|
|
)
|
|
with pytest.raises(ValueError, match=msg):
|
|
y.loc[range(2)] = x
|
|
|
|
msg = r"could not broadcast input array from shape \(2,2\) into shape \(2,?\)"
|
|
with pytest.raises(ValueError, match=msg):
|
|
y.loc[:] = x
|
|
|
|
|
|
# FutureWarning from NumPy about [slice(None, 5).
|
|
@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning")
|
|
def test_basic_getitem_setitem_corner(datetime_series):
|
|
# invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2]
|
|
msg = "key of type tuple not found and not a MultiIndex"
|
|
with pytest.raises(KeyError, match=msg):
|
|
datetime_series[:, 2]
|
|
with pytest.raises(KeyError, match=msg):
|
|
datetime_series[:, 2] = 2
|
|
|
|
# weird lists. [slice(0, 5)] will work but not two slices
|
|
with tm.assert_produces_warning(FutureWarning):
|
|
# GH#31299
|
|
result = datetime_series[[slice(None, 5)]]
|
|
expected = datetime_series[:5]
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# OK
|
|
msg = r"unhashable type(: 'slice')?"
|
|
with pytest.raises(TypeError, match=msg):
|
|
datetime_series[[5, slice(None, None)]]
|
|
with pytest.raises(TypeError, match=msg):
|
|
datetime_series[[5, slice(None, None)]] = 2
|
|
|
|
|
|
@pytest.mark.parametrize("tz", ["US/Eastern", "UTC", "Asia/Tokyo"])
|
|
def test_setitem_with_tz(tz):
|
|
orig = Series(pd.date_range("2016-01-01", freq="H", periods=3, tz=tz))
|
|
assert orig.dtype == f"datetime64[ns, {tz}]"
|
|
|
|
# scalar
|
|
s = orig.copy()
|
|
s[1] = Timestamp("2011-01-01", tz=tz)
|
|
exp = Series(
|
|
[
|
|
Timestamp("2016-01-01 00:00", tz=tz),
|
|
Timestamp("2011-01-01 00:00", tz=tz),
|
|
Timestamp("2016-01-01 02:00", tz=tz),
|
|
]
|
|
)
|
|
tm.assert_series_equal(s, exp)
|
|
|
|
s = orig.copy()
|
|
s.loc[1] = Timestamp("2011-01-01", tz=tz)
|
|
tm.assert_series_equal(s, exp)
|
|
|
|
s = orig.copy()
|
|
s.iloc[1] = Timestamp("2011-01-01", tz=tz)
|
|
tm.assert_series_equal(s, exp)
|
|
|
|
# vector
|
|
vals = Series(
|
|
[Timestamp("2011-01-01", tz=tz), Timestamp("2012-01-01", tz=tz)],
|
|
index=[1, 2],
|
|
)
|
|
assert vals.dtype == f"datetime64[ns, {tz}]"
|
|
|
|
s[[1, 2]] = vals
|
|
exp = Series(
|
|
[
|
|
Timestamp("2016-01-01 00:00", tz=tz),
|
|
Timestamp("2011-01-01 00:00", tz=tz),
|
|
Timestamp("2012-01-01 00:00", tz=tz),
|
|
]
|
|
)
|
|
tm.assert_series_equal(s, exp)
|
|
|
|
s = orig.copy()
|
|
s.loc[[1, 2]] = vals
|
|
tm.assert_series_equal(s, exp)
|
|
|
|
s = orig.copy()
|
|
s.iloc[[1, 2]] = vals
|
|
tm.assert_series_equal(s, exp)
|
|
|
|
|
|
def test_setitem_with_tz_dst():
|
|
# GH XXX TODO: fill in GH ref
|
|
tz = "US/Eastern"
|
|
orig = Series(pd.date_range("2016-11-06", freq="H", periods=3, tz=tz))
|
|
assert orig.dtype == f"datetime64[ns, {tz}]"
|
|
|
|
# scalar
|
|
s = orig.copy()
|
|
s[1] = Timestamp("2011-01-01", tz=tz)
|
|
exp = Series(
|
|
[
|
|
Timestamp("2016-11-06 00:00-04:00", tz=tz),
|
|
Timestamp("2011-01-01 00:00-05:00", tz=tz),
|
|
Timestamp("2016-11-06 01:00-05:00", tz=tz),
|
|
]
|
|
)
|
|
tm.assert_series_equal(s, exp)
|
|
|
|
s = orig.copy()
|
|
s.loc[1] = Timestamp("2011-01-01", tz=tz)
|
|
tm.assert_series_equal(s, exp)
|
|
|
|
s = orig.copy()
|
|
s.iloc[1] = Timestamp("2011-01-01", tz=tz)
|
|
tm.assert_series_equal(s, exp)
|
|
|
|
# vector
|
|
vals = Series(
|
|
[Timestamp("2011-01-01", tz=tz), Timestamp("2012-01-01", tz=tz)],
|
|
index=[1, 2],
|
|
)
|
|
assert vals.dtype == f"datetime64[ns, {tz}]"
|
|
|
|
s[[1, 2]] = vals
|
|
exp = Series(
|
|
[
|
|
Timestamp("2016-11-06 00:00", tz=tz),
|
|
Timestamp("2011-01-01 00:00", tz=tz),
|
|
Timestamp("2012-01-01 00:00", tz=tz),
|
|
]
|
|
)
|
|
tm.assert_series_equal(s, exp)
|
|
|
|
s = orig.copy()
|
|
s.loc[[1, 2]] = vals
|
|
tm.assert_series_equal(s, exp)
|
|
|
|
s = orig.copy()
|
|
s.iloc[[1, 2]] = vals
|
|
tm.assert_series_equal(s, exp)
|
|
|
|
|
|
def test_categorical_assigning_ops():
|
|
orig = Series(Categorical(["b", "b"], categories=["a", "b"]))
|
|
s = orig.copy()
|
|
s[:] = "a"
|
|
exp = Series(Categorical(["a", "a"], categories=["a", "b"]))
|
|
tm.assert_series_equal(s, exp)
|
|
|
|
s = orig.copy()
|
|
s[1] = "a"
|
|
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
|
|
tm.assert_series_equal(s, exp)
|
|
|
|
s = orig.copy()
|
|
s[s.index > 0] = "a"
|
|
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
|
|
tm.assert_series_equal(s, exp)
|
|
|
|
s = orig.copy()
|
|
s[[False, True]] = "a"
|
|
exp = Series(Categorical(["b", "a"], categories=["a", "b"]))
|
|
tm.assert_series_equal(s, exp)
|
|
|
|
s = orig.copy()
|
|
s.index = ["x", "y"]
|
|
s["y"] = "a"
|
|
exp = Series(Categorical(["b", "a"], categories=["a", "b"]), index=["x", "y"])
|
|
tm.assert_series_equal(s, exp)
|
|
|
|
# ensure that one can set something to np.nan
|
|
s = Series(Categorical([1, 2, 3]))
|
|
exp = Series(Categorical([1, np.nan, 3], categories=[1, 2, 3]))
|
|
s[1] = np.nan
|
|
tm.assert_series_equal(s, exp)
|
|
|
|
|
|
def test_getitem_categorical_str():
|
|
# GH#31765
|
|
ser = Series(range(5), index=Categorical(["a", "b", "c", "a", "b"]))
|
|
result = ser["a"]
|
|
expected = ser.iloc[[0, 3]]
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# Check the intermediate steps work as expected
|
|
with tm.assert_produces_warning(FutureWarning):
|
|
result = ser.index.get_value(ser, "a")
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
|
|
def test_slice(string_series, object_series):
|
|
numSlice = string_series[10:20]
|
|
numSliceEnd = string_series[-10:]
|
|
objSlice = object_series[10:20]
|
|
|
|
assert string_series.index[9] not in numSlice.index
|
|
assert object_series.index[9] not in objSlice.index
|
|
|
|
assert len(numSlice) == len(numSlice.index)
|
|
assert string_series[numSlice.index[0]] == numSlice[numSlice.index[0]]
|
|
|
|
assert numSlice.index[1] == string_series.index[11]
|
|
assert tm.equalContents(numSliceEnd, np.array(string_series)[-10:])
|
|
|
|
# Test return view.
|
|
sl = string_series[10:20]
|
|
sl[:] = 0
|
|
|
|
assert (string_series[10:20] == 0).all()
|
|
|
|
|
|
def test_slice_can_reorder_not_uniquely_indexed():
|
|
s = Series(1, index=["a", "a", "b", "b", "c"])
|
|
s[::-1] # it works!
|
|
|
|
|
|
def test_loc_setitem(string_series):
|
|
inds = string_series.index[[3, 4, 7]]
|
|
|
|
result = string_series.copy()
|
|
result.loc[inds] = 5
|
|
|
|
expected = string_series.copy()
|
|
expected[[3, 4, 7]] = 5
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
result.iloc[5:10] = 10
|
|
expected[5:10] = 10
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# set slice with indices
|
|
d1, d2 = string_series.index[[5, 15]]
|
|
result.loc[d1:d2] = 6
|
|
expected[5:16] = 6 # because it's inclusive
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# set index value
|
|
string_series.loc[d1] = 4
|
|
string_series.loc[d2] = 6
|
|
assert string_series[d1] == 4
|
|
assert string_series[d2] == 6
|
|
|
|
|
|
def test_timedelta_assignment():
|
|
# GH 8209
|
|
s = Series([], dtype=object)
|
|
s.loc["B"] = timedelta(1)
|
|
tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"]))
|
|
|
|
s = s.reindex(s.index.insert(0, "A"))
|
|
tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"]))
|
|
|
|
s.loc["A"] = timedelta(1)
|
|
expected = Series(Timedelta("1 days"), index=["A", "B"])
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
# GH 14155
|
|
s = Series(10 * [np.timedelta64(10, "m")])
|
|
s.loc[[1, 2, 3]] = np.timedelta64(20, "m")
|
|
expected = Series(10 * [np.timedelta64(10, "m")])
|
|
expected.loc[[1, 2, 3]] = Timedelta(np.timedelta64(20, "m"))
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"nat_val,should_cast",
|
|
[
|
|
(pd.NaT, True),
|
|
(np.timedelta64("NaT", "ns"), False),
|
|
(np.datetime64("NaT", "ns"), True),
|
|
],
|
|
)
|
|
@pytest.mark.parametrize("tz", [None, "UTC"])
|
|
def test_dt64_series_assign_nat(nat_val, should_cast, tz):
|
|
# some nat-like values should be cast to datetime64 when inserting
|
|
# into a datetime64 series. Others should coerce to object
|
|
# and retain their dtypes.
|
|
dti = pd.date_range("2016-01-01", periods=3, tz=tz)
|
|
base = Series(dti)
|
|
expected = Series([pd.NaT] + list(dti[1:]), dtype=dti.dtype)
|
|
if not should_cast:
|
|
expected = expected.astype(object)
|
|
|
|
ser = base.copy(deep=True)
|
|
ser[0] = nat_val
|
|
tm.assert_series_equal(ser, expected)
|
|
|
|
ser = base.copy(deep=True)
|
|
ser.loc[0] = nat_val
|
|
tm.assert_series_equal(ser, expected)
|
|
|
|
ser = base.copy(deep=True)
|
|
ser.iloc[0] = nat_val
|
|
tm.assert_series_equal(ser, expected)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"nat_val,should_cast",
|
|
[
|
|
(pd.NaT, True),
|
|
(np.timedelta64("NaT", "ns"), True),
|
|
(np.datetime64("NaT", "ns"), False),
|
|
],
|
|
)
|
|
def test_td64_series_assign_nat(nat_val, should_cast):
|
|
# some nat-like values should be cast to timedelta64 when inserting
|
|
# into a timedelta64 series. Others should coerce to object
|
|
# and retain their dtypes.
|
|
base = Series([0, 1, 2], dtype="m8[ns]")
|
|
expected = Series([pd.NaT, 1, 2], dtype="m8[ns]")
|
|
if not should_cast:
|
|
expected = expected.astype(object)
|
|
|
|
ser = base.copy(deep=True)
|
|
ser[0] = nat_val
|
|
tm.assert_series_equal(ser, expected)
|
|
|
|
ser = base.copy(deep=True)
|
|
ser.loc[0] = nat_val
|
|
tm.assert_series_equal(ser, expected)
|
|
|
|
ser = base.copy(deep=True)
|
|
ser.iloc[0] = nat_val
|
|
tm.assert_series_equal(ser, expected)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"td",
|
|
[
|
|
Timedelta("9 days"),
|
|
Timedelta("9 days").to_timedelta64(),
|
|
Timedelta("9 days").to_pytimedelta(),
|
|
],
|
|
)
|
|
def test_append_timedelta_does_not_cast(td):
|
|
# GH#22717 inserting a Timedelta should _not_ cast to int64
|
|
expected = Series(["x", td], index=[0, "td"], dtype=object)
|
|
|
|
ser = Series(["x"])
|
|
ser["td"] = td
|
|
tm.assert_series_equal(ser, expected)
|
|
assert isinstance(ser["td"], Timedelta)
|
|
|
|
ser = Series(["x"])
|
|
ser.loc["td"] = Timedelta("9 days")
|
|
tm.assert_series_equal(ser, expected)
|
|
assert isinstance(ser["td"], Timedelta)
|
|
|
|
|
|
def test_underlying_data_conversion():
|
|
# GH 4080
|
|
df = DataFrame({c: [1, 2, 3] for c in ["a", "b", "c"]})
|
|
return_value = df.set_index(["a", "b", "c"], inplace=True)
|
|
assert return_value is None
|
|
s = Series([1], index=[(2, 2, 2)])
|
|
df["val"] = 0
|
|
df
|
|
df["val"].update(s)
|
|
|
|
expected = DataFrame(
|
|
{"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3], "val": [0, 1, 0]}
|
|
)
|
|
return_value = expected.set_index(["a", "b", "c"], inplace=True)
|
|
assert return_value is None
|
|
tm.assert_frame_equal(df, expected)
|
|
|
|
# GH 3970
|
|
# these are chained assignments as well
|
|
pd.set_option("chained_assignment", None)
|
|
df = DataFrame({"aa": range(5), "bb": [2.2] * 5})
|
|
df["cc"] = 0.0
|
|
|
|
ck = [True] * len(df)
|
|
|
|
df["bb"].iloc[0] = 0.13
|
|
|
|
# TODO: unused
|
|
df_tmp = df.iloc[ck] # noqa
|
|
|
|
df["bb"].iloc[0] = 0.15
|
|
assert df["bb"].iloc[0] == 0.15
|
|
pd.set_option("chained_assignment", "raise")
|
|
|
|
# GH 3217
|
|
df = DataFrame({"a": [1, 3], "b": [np.nan, 2]})
|
|
df["c"] = np.nan
|
|
df["c"].update(Series(["foo"], index=[0]))
|
|
|
|
expected = DataFrame({"a": [1, 3], "b": [np.nan, 2], "c": ["foo", np.nan]})
|
|
tm.assert_frame_equal(df, expected)
|
|
|
|
|
|
def test_preserve_refs(datetime_series):
|
|
seq = datetime_series[[5, 10, 15]]
|
|
seq[1] = np.NaN
|
|
assert not np.isnan(datetime_series[10])
|
|
|
|
|
|
def test_cast_on_putmask():
|
|
# GH 2746
|
|
|
|
# need to upcast
|
|
s = Series([1, 2], index=[1, 2], dtype="int64")
|
|
s[[True, False]] = Series([0], index=[1], dtype="int64")
|
|
expected = Series([0, 2], index=[1, 2], dtype="int64")
|
|
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
|
|
def test_type_promote_putmask():
|
|
# GH8387: test that changing types does not break alignment
|
|
ts = Series(np.random.randn(100), index=np.arange(100, 0, -1)).round(5)
|
|
left, mask = ts.copy(), ts > 0
|
|
right = ts[mask].copy().map(str)
|
|
left[mask] = right
|
|
tm.assert_series_equal(left, ts.map(lambda t: str(t) if t > 0 else t))
|
|
|
|
s = Series([0, 1, 2, 0])
|
|
mask = s > 0
|
|
s2 = s[mask].map(str)
|
|
s[mask] = s2
|
|
tm.assert_series_equal(s, Series([0, "1", "2", 0]))
|
|
|
|
s = Series([0, "foo", "bar", 0])
|
|
mask = Series([False, True, True, False])
|
|
s2 = s[mask]
|
|
s[mask] = s2
|
|
tm.assert_series_equal(s, Series([0, "foo", "bar", 0]))
|
|
|
|
|
|
def test_multilevel_preserve_name():
|
|
index = MultiIndex(
|
|
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
|
|
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
|
names=["first", "second"],
|
|
)
|
|
s = Series(np.random.randn(len(index)), index=index, name="sth")
|
|
|
|
result = s["foo"]
|
|
result2 = s.loc["foo"]
|
|
assert result.name == s.name
|
|
assert result2.name == s.name
|
|
|
|
|
|
"""
|
|
miscellaneous methods
|
|
"""
|
|
|
|
|
|
def test_uint_drop(any_int_dtype):
|
|
# see GH18311
|
|
# assigning series.loc[0] = 4 changed series.dtype to int
|
|
series = Series([1, 2, 3], dtype=any_int_dtype)
|
|
series.loc[0] = 4
|
|
expected = Series([4, 2, 3], dtype=any_int_dtype)
|
|
tm.assert_series_equal(series, expected)
|
|
|
|
|
|
def test_getitem_unrecognized_scalar():
|
|
# GH#32684 a scalar key that is not recognized by lib.is_scalar
|
|
|
|
# a series that might be produced via `frame.dtypes`
|
|
ser = Series([1, 2], index=[np.dtype("O"), np.dtype("i8")])
|
|
|
|
key = ser.index[1]
|
|
|
|
result = ser[key]
|
|
assert result == 2
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"index",
|
|
[
|
|
date_range("2014-01-01", periods=20, freq="MS"),
|
|
period_range("2014-01", periods=20, freq="M"),
|
|
timedelta_range("0", periods=20, freq="H"),
|
|
],
|
|
)
|
|
def test_slice_with_zero_step_raises(index):
|
|
ts = Series(np.arange(20), index)
|
|
|
|
with pytest.raises(ValueError, match="slice step cannot be zero"):
|
|
ts[::0]
|
|
with pytest.raises(ValueError, match="slice step cannot be zero"):
|
|
ts.loc[::0]
|
|
with pytest.raises(ValueError, match="slice step cannot be zero"):
|
|
ts.iloc[::0]
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"index",
|
|
[
|
|
date_range("2014-01-01", periods=20, freq="MS"),
|
|
period_range("2014-01", periods=20, freq="M"),
|
|
timedelta_range("0", periods=20, freq="H"),
|
|
],
|
|
)
|
|
def test_slice_with_negative_step(index):
|
|
def assert_slices_equivalent(l_slc, i_slc):
|
|
expected = ts.iloc[i_slc]
|
|
|
|
tm.assert_series_equal(ts[l_slc], expected)
|
|
tm.assert_series_equal(ts.loc[l_slc], expected)
|
|
tm.assert_series_equal(ts.loc[l_slc], expected)
|
|
|
|
keystr1 = str(index[9])
|
|
keystr2 = str(index[13])
|
|
box = type(index[0])
|
|
|
|
ts = Series(np.arange(20), index)
|
|
SLC = IndexSlice
|
|
|
|
for key in [keystr1, box(keystr1)]:
|
|
assert_slices_equivalent(SLC[key::-1], SLC[9::-1])
|
|
assert_slices_equivalent(SLC[:key:-1], SLC[:8:-1])
|
|
|
|
for key2 in [keystr2, box(keystr2)]:
|
|
assert_slices_equivalent(SLC[key2:key:-1], SLC[13:8:-1])
|
|
assert_slices_equivalent(SLC[key:key2:-1], SLC[0:0:-1])
|
|
|
|
|
|
def test_tuple_index():
|
|
# GH 35534 - Selecting values when a Series has an Index of tuples
|
|
s = Series([1, 2], index=[("a",), ("b",)])
|
|
assert s[("a",)] == 1
|
|
assert s[("b",)] == 2
|
|
s[("b",)] = 3
|
|
assert s[("b",)] == 3
|
|
|
|
|
|
def test_frozenset_index():
|
|
# GH35747 - Selecting values when a Series has an Index of frozenset
|
|
idx0, idx1 = frozenset("a"), frozenset("b")
|
|
s = Series([1, 2], index=[idx0, idx1])
|
|
assert s[idx0] == 1
|
|
assert s[idx1] == 2
|
|
s[idx1] = 3
|
|
assert s[idx1] == 3
|