449 lines
14 KiB
Python
449 lines
14 KiB
Python
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
from pandas._config import using_pyarrow_string_dtype
|
||
|
|
||
|
import pandas.util._test_decorators as td
|
||
|
|
||
|
from pandas import (
|
||
|
NA,
|
||
|
Categorical,
|
||
|
Float64Dtype,
|
||
|
Index,
|
||
|
MultiIndex,
|
||
|
NaT,
|
||
|
Period,
|
||
|
PeriodIndex,
|
||
|
RangeIndex,
|
||
|
Series,
|
||
|
Timedelta,
|
||
|
Timestamp,
|
||
|
date_range,
|
||
|
isna,
|
||
|
)
|
||
|
import pandas._testing as tm
|
||
|
|
||
|
|
||
|
@pytest.mark.xfail(
|
||
|
using_pyarrow_string_dtype(), reason="share memory doesn't work for arrow"
|
||
|
)
|
||
|
def test_reindex(datetime_series, string_series):
|
||
|
identity = string_series.reindex(string_series.index)
|
||
|
|
||
|
assert np.may_share_memory(string_series.index, identity.index)
|
||
|
|
||
|
assert identity.index.is_(string_series.index)
|
||
|
assert identity.index.identical(string_series.index)
|
||
|
|
||
|
subIndex = string_series.index[10:20]
|
||
|
subSeries = string_series.reindex(subIndex)
|
||
|
|
||
|
for idx, val in subSeries.items():
|
||
|
assert val == string_series[idx]
|
||
|
|
||
|
subIndex2 = datetime_series.index[10:20]
|
||
|
subTS = datetime_series.reindex(subIndex2)
|
||
|
|
||
|
for idx, val in subTS.items():
|
||
|
assert val == datetime_series[idx]
|
||
|
stuffSeries = datetime_series.reindex(subIndex)
|
||
|
|
||
|
assert np.isnan(stuffSeries).all()
|
||
|
|
||
|
# This is extremely important for the Cython code to not screw up
|
||
|
nonContigIndex = datetime_series.index[::2]
|
||
|
subNonContig = datetime_series.reindex(nonContigIndex)
|
||
|
for idx, val in subNonContig.items():
|
||
|
assert val == datetime_series[idx]
|
||
|
|
||
|
# return a copy the same index here
|
||
|
result = datetime_series.reindex()
|
||
|
assert result is not datetime_series
|
||
|
|
||
|
|
||
|
def test_reindex_nan():
|
||
|
ts = Series([2, 3, 5, 7], index=[1, 4, np.nan, 8])
|
||
|
|
||
|
i, j = [np.nan, 1, np.nan, 8, 4, np.nan], [2, 0, 2, 3, 1, 2]
|
||
|
tm.assert_series_equal(ts.reindex(i), ts.iloc[j])
|
||
|
|
||
|
ts.index = ts.index.astype("object")
|
||
|
|
||
|
# reindex coerces index.dtype to float, loc/iloc doesn't
|
||
|
tm.assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False)
|
||
|
|
||
|
|
||
|
def test_reindex_series_add_nat():
|
||
|
rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s")
|
||
|
series = Series(rng)
|
||
|
|
||
|
result = series.reindex(range(15))
|
||
|
assert np.issubdtype(result.dtype, np.dtype("M8[ns]"))
|
||
|
|
||
|
mask = result.isna()
|
||
|
assert mask[-5:].all()
|
||
|
assert not mask[:-5].any()
|
||
|
|
||
|
|
||
|
def test_reindex_with_datetimes():
|
||
|
rng = date_range("1/1/2000", periods=20)
|
||
|
ts = Series(np.random.default_rng(2).standard_normal(20), index=rng)
|
||
|
|
||
|
result = ts.reindex(list(ts.index[5:10]))
|
||
|
expected = ts[5:10]
|
||
|
expected.index = expected.index._with_freq(None)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = ts[list(ts.index[5:10])]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_reindex_corner(datetime_series):
|
||
|
# (don't forget to fix this) I think it's fixed
|
||
|
empty = Series(index=[])
|
||
|
empty.reindex(datetime_series.index, method="pad") # it works
|
||
|
|
||
|
# corner case: pad empty series
|
||
|
reindexed = empty.reindex(datetime_series.index, method="pad")
|
||
|
|
||
|
# pass non-Index
|
||
|
reindexed = datetime_series.reindex(list(datetime_series.index))
|
||
|
datetime_series.index = datetime_series.index._with_freq(None)
|
||
|
tm.assert_series_equal(datetime_series, reindexed)
|
||
|
|
||
|
# bad fill method
|
||
|
ts = datetime_series[::2]
|
||
|
msg = (
|
||
|
r"Invalid fill method\. Expecting pad \(ffill\), backfill "
|
||
|
r"\(bfill\) or nearest\. Got foo"
|
||
|
)
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
ts.reindex(datetime_series.index, method="foo")
|
||
|
|
||
|
|
||
|
def test_reindex_pad():
|
||
|
s = Series(np.arange(10), dtype="int64")
|
||
|
s2 = s[::2]
|
||
|
|
||
|
reindexed = s2.reindex(s.index, method="pad")
|
||
|
reindexed2 = s2.reindex(s.index, method="ffill")
|
||
|
tm.assert_series_equal(reindexed, reindexed2)
|
||
|
|
||
|
expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8])
|
||
|
tm.assert_series_equal(reindexed, expected)
|
||
|
|
||
|
|
||
|
def test_reindex_pad2():
|
||
|
# GH4604
|
||
|
s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"])
|
||
|
new_index = ["a", "g", "c", "f"]
|
||
|
expected = Series([1, 1, 3, 3], index=new_index)
|
||
|
|
||
|
# this changes dtype because the ffill happens after
|
||
|
result = s.reindex(new_index).ffill()
|
||
|
tm.assert_series_equal(result, expected.astype("float64"))
|
||
|
|
||
|
msg = "The 'downcast' keyword in ffill is deprecated"
|
||
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||
|
result = s.reindex(new_index).ffill(downcast="infer")
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
expected = Series([1, 5, 3, 5], index=new_index)
|
||
|
result = s.reindex(new_index, method="ffill")
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_reindex_inference():
|
||
|
# inference of new dtype
|
||
|
s = Series([True, False, False, True], index=list("abcd"))
|
||
|
new_index = "agc"
|
||
|
msg = "Downcasting object dtype arrays on"
|
||
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||
|
result = s.reindex(list(new_index)).ffill()
|
||
|
expected = Series([True, True, False], index=list(new_index))
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_reindex_downcasting():
|
||
|
# GH4618 shifted series downcasting
|
||
|
s = Series(False, index=range(5))
|
||
|
msg = "Downcasting object dtype arrays on"
|
||
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||
|
result = s.shift(1).bfill()
|
||
|
expected = Series(False, index=range(5))
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_reindex_nearest():
|
||
|
s = Series(np.arange(10, dtype="int64"))
|
||
|
target = [0.1, 0.9, 1.5, 2.0]
|
||
|
result = s.reindex(target, method="nearest")
|
||
|
expected = Series(np.around(target).astype("int64"), target)
|
||
|
tm.assert_series_equal(expected, result)
|
||
|
|
||
|
result = s.reindex(target, method="nearest", tolerance=0.2)
|
||
|
expected = Series([0, 1, np.nan, 2], target)
|
||
|
tm.assert_series_equal(expected, result)
|
||
|
|
||
|
result = s.reindex(target, method="nearest", tolerance=[0.3, 0.01, 0.4, 3])
|
||
|
expected = Series([0, np.nan, np.nan, 2], target)
|
||
|
tm.assert_series_equal(expected, result)
|
||
|
|
||
|
|
||
|
def test_reindex_int(datetime_series):
|
||
|
ts = datetime_series[::2]
|
||
|
int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index)
|
||
|
|
||
|
# this should work fine
|
||
|
reindexed_int = int_ts.reindex(datetime_series.index)
|
||
|
|
||
|
# if NaNs introduced
|
||
|
assert reindexed_int.dtype == np.float64
|
||
|
|
||
|
# NO NaNs introduced
|
||
|
reindexed_int = int_ts.reindex(int_ts.index[::2])
|
||
|
assert reindexed_int.dtype == np.dtype(int)
|
||
|
|
||
|
|
||
|
def test_reindex_bool(datetime_series):
|
||
|
# A series other than float, int, string, or object
|
||
|
ts = datetime_series[::2]
|
||
|
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
|
||
|
|
||
|
# this should work fine
|
||
|
reindexed_bool = bool_ts.reindex(datetime_series.index)
|
||
|
|
||
|
# if NaNs introduced
|
||
|
assert reindexed_bool.dtype == np.object_
|
||
|
|
||
|
# NO NaNs introduced
|
||
|
reindexed_bool = bool_ts.reindex(bool_ts.index[::2])
|
||
|
assert reindexed_bool.dtype == np.bool_
|
||
|
|
||
|
|
||
|
def test_reindex_bool_pad(datetime_series):
|
||
|
# fail
|
||
|
ts = datetime_series[5:]
|
||
|
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
|
||
|
filled_bool = bool_ts.reindex(datetime_series.index, method="pad")
|
||
|
assert isna(filled_bool[:5]).all()
|
||
|
|
||
|
|
||
|
def test_reindex_categorical():
|
||
|
index = date_range("20000101", periods=3)
|
||
|
|
||
|
# reindexing to an invalid Categorical
|
||
|
s = Series(["a", "b", "c"], dtype="category")
|
||
|
result = s.reindex(index)
|
||
|
expected = Series(
|
||
|
Categorical(values=[np.nan, np.nan, np.nan], categories=["a", "b", "c"])
|
||
|
)
|
||
|
expected.index = index
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
# partial reindexing
|
||
|
expected = Series(Categorical(values=["b", "c"], categories=["a", "b", "c"]))
|
||
|
expected.index = [1, 2]
|
||
|
result = s.reindex([1, 2])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
expected = Series(Categorical(values=["c", np.nan], categories=["a", "b", "c"]))
|
||
|
expected.index = [2, 3]
|
||
|
result = s.reindex([2, 3])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_reindex_astype_order_consistency():
|
||
|
# GH#17444
|
||
|
ser = Series([1, 2, 3], index=[2, 0, 1])
|
||
|
new_index = [0, 1, 2]
|
||
|
temp_dtype = "category"
|
||
|
new_dtype = str
|
||
|
result = ser.reindex(new_index).astype(temp_dtype).astype(new_dtype)
|
||
|
expected = ser.astype(temp_dtype).reindex(new_index).astype(new_dtype)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_reindex_fill_value():
|
||
|
# -----------------------------------------------------------
|
||
|
# floats
|
||
|
floats = Series([1.0, 2.0, 3.0])
|
||
|
result = floats.reindex([1, 2, 3])
|
||
|
expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = floats.reindex([1, 2, 3], fill_value=0)
|
||
|
expected = Series([2.0, 3.0, 0], index=[1, 2, 3])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
# -----------------------------------------------------------
|
||
|
# ints
|
||
|
ints = Series([1, 2, 3])
|
||
|
|
||
|
result = ints.reindex([1, 2, 3])
|
||
|
expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
# don't upcast
|
||
|
result = ints.reindex([1, 2, 3], fill_value=0)
|
||
|
expected = Series([2, 3, 0], index=[1, 2, 3])
|
||
|
assert issubclass(result.dtype.type, np.integer)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
# -----------------------------------------------------------
|
||
|
# objects
|
||
|
objects = Series([1, 2, 3], dtype=object)
|
||
|
|
||
|
result = objects.reindex([1, 2, 3])
|
||
|
expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = objects.reindex([1, 2, 3], fill_value="foo")
|
||
|
expected = Series([2, 3, "foo"], index=[1, 2, 3], dtype=object)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
# ------------------------------------------------------------
|
||
|
# bools
|
||
|
bools = Series([True, False, True])
|
||
|
|
||
|
result = bools.reindex([1, 2, 3])
|
||
|
expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = bools.reindex([1, 2, 3], fill_value=False)
|
||
|
expected = Series([False, True, False], index=[1, 2, 3])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
@td.skip_array_manager_not_yet_implemented
|
||
|
@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
|
||
|
@pytest.mark.parametrize("fill_value", ["string", 0, Timedelta(0)])
|
||
|
def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value, using_array_manager):
|
||
|
# https://github.com/pandas-dev/pandas/issues/42921
|
||
|
if dtype == "timedelta64[ns]" and fill_value == Timedelta(0):
|
||
|
# use the scalar that is not compatible with the dtype for this test
|
||
|
fill_value = Timestamp(0)
|
||
|
|
||
|
ser = Series([NaT], dtype=dtype)
|
||
|
|
||
|
result = ser.reindex([0, 1], fill_value=fill_value)
|
||
|
expected = Series([NaT, fill_value], index=[0, 1], dtype=object)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_reindex_datetimeindexes_tz_naive_and_aware():
|
||
|
# GH 8306
|
||
|
idx = date_range("20131101", tz="America/Chicago", periods=7)
|
||
|
newidx = date_range("20131103", periods=10, freq="h")
|
||
|
s = Series(range(7), index=idx)
|
||
|
msg = (
|
||
|
r"Cannot compare dtypes datetime64\[ns, America/Chicago\] "
|
||
|
r"and datetime64\[ns\]"
|
||
|
)
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
s.reindex(newidx, method="ffill")
|
||
|
|
||
|
|
||
|
def test_reindex_empty_series_tz_dtype():
|
||
|
# GH 20869
|
||
|
result = Series(dtype="datetime64[ns, UTC]").reindex([0, 1])
|
||
|
expected = Series([NaT] * 2, dtype="datetime64[ns, UTC]")
|
||
|
tm.assert_equal(result, expected)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"p_values, o_values, values, expected_values",
|
||
|
[
|
||
|
(
|
||
|
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")],
|
||
|
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC"), "All"],
|
||
|
[1.0, 1.0],
|
||
|
[1.0, 1.0, np.nan],
|
||
|
),
|
||
|
(
|
||
|
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")],
|
||
|
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")],
|
||
|
[1.0, 1.0],
|
||
|
[1.0, 1.0],
|
||
|
),
|
||
|
],
|
||
|
)
|
||
|
def test_reindex_periodindex_with_object(p_values, o_values, values, expected_values):
|
||
|
# GH#28337
|
||
|
period_index = PeriodIndex(p_values)
|
||
|
object_index = Index(o_values)
|
||
|
|
||
|
ser = Series(values, index=period_index)
|
||
|
result = ser.reindex(object_index)
|
||
|
expected = Series(expected_values, index=object_index)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_reindex_too_many_args():
|
||
|
# GH 40980
|
||
|
ser = Series([1, 2])
|
||
|
msg = r"reindex\(\) takes from 1 to 2 positional arguments but 3 were given"
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
ser.reindex([2, 3], False)
|
||
|
|
||
|
|
||
|
def test_reindex_double_index():
|
||
|
# GH 40980
|
||
|
ser = Series([1, 2])
|
||
|
msg = r"reindex\(\) got multiple values for argument 'index'"
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
ser.reindex([2, 3], index=[3, 4])
|
||
|
|
||
|
|
||
|
def test_reindex_no_posargs():
|
||
|
# GH 40980
|
||
|
ser = Series([1, 2])
|
||
|
result = ser.reindex(index=[1, 0])
|
||
|
expected = Series([2, 1], index=[1, 0])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])
|
||
|
def test_reindex_empty_with_level(values):
|
||
|
# GH41170
|
||
|
ser = Series(
|
||
|
range(len(values[0])), index=MultiIndex.from_arrays(values), dtype="object"
|
||
|
)
|
||
|
result = ser.reindex(np.array(["b"]), level=0)
|
||
|
expected = Series(
|
||
|
index=MultiIndex(levels=[["b"], values[1]], codes=[[], []]), dtype="object"
|
||
|
)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_reindex_missing_category():
|
||
|
# GH#18185
|
||
|
ser = Series([1, 2, 3, 1], dtype="category")
|
||
|
msg = r"Cannot setitem on a Categorical with a new category \(-1\)"
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
ser.reindex([1, 2, 3, 4, 5], fill_value=-1)
|
||
|
|
||
|
|
||
|
def test_reindexing_with_float64_NA_log():
|
||
|
# GH 47055
|
||
|
s = Series([1.0, NA], dtype=Float64Dtype())
|
||
|
s_reindex = s.reindex(range(3))
|
||
|
result = s_reindex.values._data
|
||
|
expected = np.array([1, np.nan, np.nan])
|
||
|
tm.assert_numpy_array_equal(result, expected)
|
||
|
with tm.assert_produces_warning(None):
|
||
|
result_log = np.log(s_reindex)
|
||
|
expected_log = Series([0, np.nan, np.nan], dtype=Float64Dtype())
|
||
|
tm.assert_series_equal(result_log, expected_log)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("dtype", ["timedelta64", "datetime64"])
|
||
|
def test_reindex_expand_nonnano_nat(dtype):
|
||
|
# GH 53497
|
||
|
ser = Series(np.array([1], dtype=f"{dtype}[s]"))
|
||
|
result = ser.reindex(RangeIndex(2))
|
||
|
expected = Series(
|
||
|
np.array([1, getattr(np, dtype)("nat", "s")], dtype=f"{dtype}[s]")
|
||
|
)
|
||
|
tm.assert_series_equal(result, expected)
|