LSR/env/lib/python3.6/site-packages/pandas/tests/indexes/test_base.py

2842 lines
99 KiB
Python
Raw Normal View History

2020-06-04 17:24:47 +02:00
from collections import defaultdict
from datetime import datetime, timedelta
from io import StringIO
import math
import operator
import re
import numpy as np
import pytest
import pandas._config.config as cf
from pandas._libs.tslib import Timestamp
from pandas.compat.numpy import np_datetime64_compat
from pandas.util._test_decorators import async_mark
from pandas.core.dtypes.common import is_unsigned_integer_dtype
from pandas.core.dtypes.generic import ABCIndex
import pandas as pd
from pandas import (
CategoricalIndex,
DataFrame,
DatetimeIndex,
Float64Index,
Int64Index,
PeriodIndex,
RangeIndex,
Series,
TimedeltaIndex,
UInt64Index,
date_range,
isna,
period_range,
)
import pandas._testing as tm
from pandas.core.algorithms import safe_sort
from pandas.core.indexes.api import (
Index,
MultiIndex,
_get_combined_index,
ensure_index,
ensure_index_from_sequences,
)
from pandas.tests.indexes.common import Base
from pandas.tests.indexes.conftest import indices_dict
class TestIndex(Base):
_holder = Index
@pytest.fixture
def index(self, request):
"""
Fixture for selectively parametrizing indices_dict via indirect parametrization
(parametrize over indices_dict keys with indirect=True). Defaults to string
index if no keys are provided.
"""
key = getattr(request, "param", "string")
# copy to avoid mutation, e.g. setting .name
return indices_dict[key].copy()
def create_index(self):
return Index(list("abcde"))
def test_can_hold_identifiers(self):
index = self.create_index()
key = index[0]
assert index._can_hold_identifiers_and_holds_name(key) is True
@pytest.mark.parametrize("index", ["datetime"], indirect=True)
def test_new_axis(self, index):
with tm.assert_produces_warning(DeprecationWarning):
# GH#30588 multi-dimensional indexing deprecated
new_index = index[None, :]
assert new_index.ndim == 2
assert isinstance(new_index, np.ndarray)
@pytest.mark.parametrize("index", ["int", "uint", "float"], indirect=True)
def test_copy_and_deepcopy(self, index):
new_copy2 = index.copy(dtype=int)
assert new_copy2.dtype.kind == "i"
def test_constructor_regular(self, indices):
tm.assert_contains_all(indices, indices)
def test_constructor_casting(self, index):
# casting
arr = np.array(index)
new_index = Index(arr)
tm.assert_contains_all(arr, new_index)
tm.assert_index_equal(index, new_index)
def test_constructor_copy(self, index):
# copy
# index = self.create_index()
arr = np.array(index)
new_index = Index(arr, copy=True, name="name")
assert isinstance(new_index, Index)
assert new_index.name == "name"
tm.assert_numpy_array_equal(arr, new_index.values)
arr[0] = "SOMEBIGLONGSTRING"
assert new_index[0] != "SOMEBIGLONGSTRING"
# FIXME: dont leave commented-out
# what to do here?
# arr = np.array(5.)
# pytest.raises(Exception, arr.view, Index)
def test_constructor_corner(self):
# corner case
msg = (
r"Index\(\.\.\.\) must be called with a collection of some "
"kind, 0 was passed"
)
with pytest.raises(TypeError, match=msg):
Index(0)
@pytest.mark.parametrize("index_vals", [[("A", 1), "B"], ["B", ("A", 1)]])
def test_construction_list_mixed_tuples(self, index_vals):
# see gh-10697: if we are constructing from a mixed list of tuples,
# make sure that we are independent of the sorting order.
index = Index(index_vals)
assert isinstance(index, Index)
assert not isinstance(index, MultiIndex)
@pytest.mark.parametrize("na_value", [None, np.nan])
@pytest.mark.parametrize("vtype", [list, tuple, iter])
def test_construction_list_tuples_nan(self, na_value, vtype):
# GH 18505 : valid tuples containing NaN
values = [(1, "two"), (3.0, na_value)]
result = Index(vtype(values))
expected = MultiIndex.from_tuples(values)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("cast_as_obj", [True, False])
@pytest.mark.parametrize(
"index",
[
pd.date_range(
"2015-01-01 10:00",
freq="D",
periods=3,
tz="US/Eastern",
name="Green Eggs & Ham",
), # DTI with tz
pd.date_range("2015-01-01 10:00", freq="D", periods=3), # DTI no tz
pd.timedelta_range("1 days", freq="D", periods=3), # td
pd.period_range("2015-01-01", freq="D", periods=3), # period
],
)
def test_constructor_from_index_dtlike(self, cast_as_obj, index):
if cast_as_obj:
result = pd.Index(index.astype(object))
else:
result = pd.Index(index)
tm.assert_index_equal(result, index)
if isinstance(index, pd.DatetimeIndex):
assert result.tz == index.tz
if cast_as_obj:
# GH#23524 check that Index(dti, dtype=object) does not
# incorrectly raise ValueError, and that nanoseconds are not
# dropped
index += pd.Timedelta(nanoseconds=50)
result = pd.Index(index, dtype=object)
assert result.dtype == np.object_
assert list(result) == list(index)
@pytest.mark.parametrize(
"index,has_tz",
[
(
pd.date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"),
True,
), # datetimetz
(pd.timedelta_range("1 days", freq="D", periods=3), False), # td
(pd.period_range("2015-01-01", freq="D", periods=3), False), # period
],
)
def test_constructor_from_series_dtlike(self, index, has_tz):
result = pd.Index(pd.Series(index))
tm.assert_index_equal(result, index)
if has_tz:
assert result.tz == index.tz
@pytest.mark.parametrize("klass", [Index, DatetimeIndex])
def test_constructor_from_series(self, klass):
expected = DatetimeIndex(
[Timestamp("20110101"), Timestamp("20120101"), Timestamp("20130101")]
)
s = Series(
[Timestamp("20110101"), Timestamp("20120101"), Timestamp("20130101")]
)
result = klass(s)
tm.assert_index_equal(result, expected)
def test_constructor_from_series_freq(self):
# GH 6273
# create from a series, passing a freq
dts = ["1-1-1990", "2-1-1990", "3-1-1990", "4-1-1990", "5-1-1990"]
expected = DatetimeIndex(dts, freq="MS")
s = Series(pd.to_datetime(dts))
result = DatetimeIndex(s, freq="MS")
tm.assert_index_equal(result, expected)
def test_constructor_from_frame_series_freq(self):
# GH 6273
# create from a series, passing a freq
dts = ["1-1-1990", "2-1-1990", "3-1-1990", "4-1-1990", "5-1-1990"]
expected = DatetimeIndex(dts, freq="MS")
df = pd.DataFrame(np.random.rand(5, 3))
df["date"] = dts
result = DatetimeIndex(df["date"], freq="MS")
assert df["date"].dtype == object
expected.name = "date"
tm.assert_index_equal(result, expected)
expected = pd.Series(dts, name="date")
tm.assert_series_equal(df["date"], expected)
# GH 6274
# infer freq of same
freq = pd.infer_freq(df["date"])
assert freq == "MS"
@pytest.mark.parametrize(
"array",
[
np.arange(5),
np.array(["a", "b", "c"]),
date_range("2000-01-01", periods=3).values,
],
)
def test_constructor_ndarray_like(self, array):
# GH 5460#issuecomment-44474502
# it should be possible to convert any object that satisfies the numpy
# ndarray interface directly into an Index
class ArrayLike:
def __init__(self, array):
self.array = array
def __array__(self, dtype=None) -> np.ndarray:
return self.array
expected = pd.Index(array)
result = pd.Index(ArrayLike(array))
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"dtype",
[int, "int64", "int32", "int16", "int8", "uint64", "uint32", "uint16", "uint8"],
)
def test_constructor_int_dtype_float(self, dtype):
# GH 18400
if is_unsigned_integer_dtype(dtype):
index_type = UInt64Index
else:
index_type = Int64Index
expected = index_type([0, 1, 2, 3])
result = Index([0.0, 1.0, 2.0, 3.0], dtype=dtype)
tm.assert_index_equal(result, expected)
def test_constructor_int_dtype_nan(self):
# see gh-15187
data = [np.nan]
expected = Float64Index(data)
result = Index(data, dtype="float")
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("dtype", ["int64", "uint64"])
def test_constructor_int_dtype_nan_raises(self, dtype):
# see gh-15187
data = [np.nan]
msg = "cannot convert"
with pytest.raises(ValueError, match=msg):
Index(data, dtype=dtype)
def test_constructor_no_pandas_array(self):
ser = pd.Series([1, 2, 3])
result = pd.Index(ser.array)
expected = pd.Index([1, 2, 3])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"klass,dtype,na_val",
[
(pd.Float64Index, np.float64, np.nan),
(pd.DatetimeIndex, "datetime64[ns]", pd.NaT),
],
)
def test_index_ctor_infer_nan_nat(self, klass, dtype, na_val):
# GH 13467
na_list = [na_val, na_val]
expected = klass(na_list)
assert expected.dtype == dtype
result = Index(na_list)
tm.assert_index_equal(result, expected)
result = Index(np.array(na_list))
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("pos", [0, 1])
@pytest.mark.parametrize(
"klass,dtype,ctor",
[
(pd.DatetimeIndex, "datetime64[ns]", np.datetime64("nat")),
(pd.TimedeltaIndex, "timedelta64[ns]", np.timedelta64("nat")),
],
)
def test_index_ctor_infer_nat_dt_like(self, pos, klass, dtype, ctor, nulls_fixture):
expected = klass([pd.NaT, pd.NaT])
assert expected.dtype == dtype
data = [ctor]
data.insert(pos, nulls_fixture)
result = Index(data)
tm.assert_index_equal(result, expected)
result = Index(np.array(data, dtype=object))
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("swap_objs", [True, False])
def test_index_ctor_nat_result(self, swap_objs):
# mixed np.datetime64/timedelta64 nat results in object
data = [np.datetime64("nat"), np.timedelta64("nat")]
if swap_objs:
data = data[::-1]
expected = pd.Index(data, dtype=object)
tm.assert_index_equal(Index(data), expected)
tm.assert_index_equal(Index(np.array(data, dtype=object)), expected)
def test_index_ctor_infer_periodindex(self):
xp = period_range("2012-1-1", freq="M", periods=3)
rs = Index(xp)
tm.assert_index_equal(rs, xp)
assert isinstance(rs, PeriodIndex)
@pytest.mark.parametrize(
"vals,dtype",
[
([1, 2, 3, 4, 5], "int"),
([1.1, np.nan, 2.2, 3.0], "float"),
(["A", "B", "C", np.nan], "obj"),
],
)
def test_constructor_simple_new(self, vals, dtype):
index = Index(vals, name=dtype)
result = index._simple_new(index.values, dtype)
tm.assert_index_equal(result, index)
def test_constructor_wrong_kwargs(self):
# GH #19348
with pytest.raises(TypeError, match="Unexpected keyword arguments {'foo'}"):
Index([], foo="bar")
@pytest.mark.parametrize(
"vals",
[
[1, 2, 3],
np.array([1, 2, 3]),
np.array([1, 2, 3], dtype=int),
# below should coerce
[1.0, 2.0, 3.0],
np.array([1.0, 2.0, 3.0], dtype=float),
],
)
def test_constructor_dtypes_to_int64(self, vals):
index = Index(vals, dtype=int)
assert isinstance(index, Int64Index)
@pytest.mark.parametrize(
"vals",
[
[1, 2, 3],
[1.0, 2.0, 3.0],
np.array([1.0, 2.0, 3.0]),
np.array([1, 2, 3], dtype=int),
np.array([1.0, 2.0, 3.0], dtype=float),
],
)
def test_constructor_dtypes_to_float64(self, vals):
index = Index(vals, dtype=float)
assert isinstance(index, Float64Index)
@pytest.mark.parametrize("cast_index", [True, False])
@pytest.mark.parametrize(
"vals", [[True, False, True], np.array([True, False, True], dtype=bool)]
)
def test_constructor_dtypes_to_object(self, cast_index, vals):
if cast_index:
index = Index(vals, dtype=bool)
else:
index = Index(vals)
assert isinstance(index, Index)
assert index.dtype == object
@pytest.mark.parametrize(
"vals",
[
[1, 2, 3],
np.array([1, 2, 3], dtype=int),
np.array(
[np_datetime64_compat("2011-01-01"), np_datetime64_compat("2011-01-02")]
),
[datetime(2011, 1, 1), datetime(2011, 1, 2)],
],
)
def test_constructor_dtypes_to_categorical(self, vals):
index = Index(vals, dtype="category")
assert isinstance(index, CategoricalIndex)
@pytest.mark.parametrize("cast_index", [True, False])
@pytest.mark.parametrize(
"vals",
[
Index(
np.array(
[
np_datetime64_compat("2011-01-01"),
np_datetime64_compat("2011-01-02"),
]
)
),
Index([datetime(2011, 1, 1), datetime(2011, 1, 2)]),
],
)
def test_constructor_dtypes_to_datetime(self, cast_index, vals):
if cast_index:
index = Index(vals, dtype=object)
assert isinstance(index, Index)
assert index.dtype == object
else:
index = Index(vals)
assert isinstance(index, DatetimeIndex)
@pytest.mark.parametrize("cast_index", [True, False])
@pytest.mark.parametrize(
"vals",
[
np.array([np.timedelta64(1, "D"), np.timedelta64(1, "D")]),
[timedelta(1), timedelta(1)],
],
)
def test_constructor_dtypes_to_timedelta(self, cast_index, vals):
if cast_index:
index = Index(vals, dtype=object)
assert isinstance(index, Index)
assert index.dtype == object
else:
index = Index(vals)
assert isinstance(index, TimedeltaIndex)
@pytest.mark.parametrize("attr", ["values", "asi8"])
@pytest.mark.parametrize("klass", [pd.Index, pd.DatetimeIndex])
def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass):
# Test constructing with a datetimetz dtype
# .values produces numpy datetimes, so these are considered naive
# .asi8 produces integers, so these are considered epoch timestamps
# ^the above will be true in a later version. Right now we `.view`
# the i8 values as NS_DTYPE, effectively treating them as wall times.
index = pd.date_range("2011-01-01", periods=5)
arg = getattr(index, attr)
index = index.tz_localize(tz_naive_fixture)
dtype = index.dtype
if attr == "asi8":
result = pd.DatetimeIndex(arg).tz_localize(tz_naive_fixture)
else:
result = klass(arg, tz=tz_naive_fixture)
tm.assert_index_equal(result, index)
if attr == "asi8":
result = pd.DatetimeIndex(arg).astype(dtype)
else:
result = klass(arg, dtype=dtype)
tm.assert_index_equal(result, index)
if attr == "asi8":
result = pd.DatetimeIndex(list(arg)).tz_localize(tz_naive_fixture)
else:
result = klass(list(arg), tz=tz_naive_fixture)
tm.assert_index_equal(result, index)
if attr == "asi8":
result = pd.DatetimeIndex(list(arg)).astype(dtype)
else:
result = klass(list(arg), dtype=dtype)
tm.assert_index_equal(result, index)
@pytest.mark.parametrize("attr", ["values", "asi8"])
@pytest.mark.parametrize("klass", [pd.Index, pd.TimedeltaIndex])
def test_constructor_dtypes_timedelta(self, attr, klass):
index = pd.timedelta_range("1 days", periods=5)
dtype = index.dtype
values = getattr(index, attr)
result = klass(values, dtype=dtype)
tm.assert_index_equal(result, index)
result = klass(list(values), dtype=dtype)
tm.assert_index_equal(result, index)
@pytest.mark.parametrize("value", [[], iter([]), (_ for _ in [])])
@pytest.mark.parametrize(
"klass",
[
Index,
Float64Index,
Int64Index,
UInt64Index,
CategoricalIndex,
DatetimeIndex,
TimedeltaIndex,
],
)
def test_constructor_empty(self, value, klass):
empty = klass(value)
assert isinstance(empty, klass)
assert not len(empty)
@pytest.mark.parametrize(
"empty,klass",
[
(PeriodIndex([], freq="B"), PeriodIndex),
(PeriodIndex(iter([]), freq="B"), PeriodIndex),
(PeriodIndex((_ for _ in []), freq="B"), PeriodIndex),
(RangeIndex(step=1), pd.RangeIndex),
(MultiIndex(levels=[[1, 2], ["blue", "red"]], codes=[[], []]), MultiIndex),
],
)
def test_constructor_empty_special(self, empty, klass):
assert isinstance(empty, klass)
assert not len(empty)
def test_constructor_overflow_int64(self):
# see gh-15832
msg = (
"The elements provided in the data cannot "
"all be casted to the dtype int64"
)
with pytest.raises(OverflowError, match=msg):
Index([np.iinfo(np.uint64).max - 1], dtype="int64")
@pytest.mark.xfail(reason="see GH#21311: Index doesn't enforce dtype argument")
def test_constructor_cast(self):
msg = "could not convert string to float"
with pytest.raises(ValueError, match=msg):
Index(["a", "b", "c"], dtype=float)
@pytest.mark.parametrize(
"index",
[
"datetime",
"float",
"int",
"period",
"range",
"repeats",
"timedelta",
"tuples",
"uint",
],
indirect=True,
)
def test_view_with_args(self, index):
index.view("i8")
@pytest.mark.parametrize(
"index",
[
"unicode",
"string",
pytest.param("categorical", marks=pytest.mark.xfail(reason="gh-25464")),
"bool",
"empty",
],
indirect=True,
)
def test_view_with_args_object_array_raises(self, index):
msg = "Cannot change data-type for object array"
with pytest.raises(TypeError, match=msg):
index.view("i8")
@pytest.mark.parametrize("index", ["int", "range"], indirect=True)
def test_astype(self, index):
casted = index.astype("i8")
# it works!
casted.get_loc(5)
# pass on name
index.name = "foobar"
casted = index.astype("i8")
assert casted.name == "foobar"
def test_equals_object(self):
# same
assert Index(["a", "b", "c"]).equals(Index(["a", "b", "c"]))
@pytest.mark.parametrize(
"comp", [Index(["a", "b"]), Index(["a", "b", "d"]), ["a", "b", "c"]]
)
def test_not_equals_object(self, comp):
assert not Index(["a", "b", "c"]).equals(comp)
def test_insert(self):
# GH 7256
# validate neg/pos inserts
result = Index(["b", "c", "d"])
# test 0th element
tm.assert_index_equal(Index(["a", "b", "c", "d"]), result.insert(0, "a"))
# test Nth element that follows Python list behavior
tm.assert_index_equal(Index(["b", "c", "e", "d"]), result.insert(-1, "e"))
# test loc +/- neq (0, -1)
tm.assert_index_equal(result.insert(1, "z"), result.insert(-2, "z"))
# test empty
null_index = Index([])
tm.assert_index_equal(Index(["a"]), null_index.insert(0, "a"))
def test_insert_missing(self, nulls_fixture):
# GH 22295
# test there is no mangling of NA values
expected = Index(["a", nulls_fixture, "b", "c"])
result = Index(list("abc")).insert(1, nulls_fixture)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"pos,expected",
[
(0, Index(["b", "c", "d"], name="index")),
(-1, Index(["a", "b", "c"], name="index")),
],
)
def test_delete(self, pos, expected):
index = Index(["a", "b", "c", "d"], name="index")
result = index.delete(pos)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
def test_delete_raises(self):
index = Index(["a", "b", "c", "d"], name="index")
msg = "index 5 is out of bounds for axis 0 with size 4"
with pytest.raises(IndexError, match=msg):
index.delete(5)
def test_identical(self):
# index
i1 = Index(["a", "b", "c"])
i2 = Index(["a", "b", "c"])
assert i1.identical(i2)
i1 = i1.rename("foo")
assert i1.equals(i2)
assert not i1.identical(i2)
i2 = i2.rename("foo")
assert i1.identical(i2)
i3 = Index([("a", "a"), ("a", "b"), ("b", "a")])
i4 = Index([("a", "a"), ("a", "b"), ("b", "a")], tupleize_cols=False)
assert not i3.identical(i4)
def test_is_(self):
ind = Index(range(10))
assert ind.is_(ind)
assert ind.is_(ind.view().view().view().view())
assert not ind.is_(Index(range(10)))
assert not ind.is_(ind.copy())
assert not ind.is_(ind.copy(deep=False))
assert not ind.is_(ind[:])
assert not ind.is_(np.array(range(10)))
# quasi-implementation dependent
assert ind.is_(ind.view())
ind2 = ind.view()
ind2.name = "bob"
assert ind.is_(ind2)
assert ind2.is_(ind)
# doesn't matter if Indices are *actually* views of underlying data,
assert not ind.is_(Index(ind.values))
arr = np.array(range(1, 11))
ind1 = Index(arr, copy=False)
ind2 = Index(arr, copy=False)
assert not ind1.is_(ind2)
@pytest.mark.parametrize("index", ["datetime"], indirect=True)
def test_asof(self, index):
d = index[0]
assert index.asof(d) == d
assert isna(index.asof(d - timedelta(1)))
d = index[-1]
assert index.asof(d + timedelta(1)) == d
d = index[0].to_pydatetime()
assert isinstance(index.asof(d), Timestamp)
def test_asof_datetime_partial(self):
index = pd.date_range("2010-01-01", periods=2, freq="m")
expected = Timestamp("2010-02-28")
result = index.asof("2010-02")
assert result == expected
assert not isinstance(result, Index)
def test_nanosecond_index_access(self):
s = Series([Timestamp("20130101")]).values.view("i8")[0]
r = DatetimeIndex([s + 50 + i for i in range(100)])
x = Series(np.random.randn(100), index=r)
first_value = x.asof(x.index[0])
# this does not yet work, as parsing strings is done via dateutil
# assert first_value == x['2013-01-01 00:00:00.000000050+0000']
expected_ts = np_datetime64_compat("2013-01-01 00:00:00.000000050+0000", "ns")
assert first_value == x[Timestamp(expected_ts)]
def test_booleanindex(self, index):
bool_index = np.ones(len(index), dtype=bool)
bool_index[5:30:2] = False
sub_index = index[bool_index]
for i, val in enumerate(sub_index):
assert sub_index.get_loc(val) == i
sub_index = index[list(bool_index)]
for i, val in enumerate(sub_index):
assert sub_index.get_loc(val) == i
def test_fancy(self):
index = self.create_index()
sl = index[[1, 2, 3]]
for i in sl:
assert i == sl[sl.get_loc(i)]
@pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True)
@pytest.mark.parametrize("dtype", [np.int_, np.bool_])
def test_empty_fancy(self, index, dtype):
empty_arr = np.array([], dtype=dtype)
empty_index = type(index)([])
assert index[[]].identical(empty_index)
assert index[empty_arr].identical(empty_index)
@pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True)
def test_empty_fancy_raises(self, index):
# pd.DatetimeIndex is excluded, because it overrides getitem and should
# be tested separately.
empty_farr = np.array([], dtype=np.float_)
empty_index = type(index)([])
assert index[[]].identical(empty_index)
# np.ndarray only accepts ndarray of int & bool dtypes, so should Index
msg = r"arrays used as indices must be of integer \(or boolean\) type"
with pytest.raises(IndexError, match=msg):
index[empty_farr]
@pytest.mark.parametrize("sort", [None, False])
def test_intersection(self, index, sort):
first = index[:20]
second = index[:10]
intersect = first.intersection(second, sort=sort)
if sort is None:
tm.assert_index_equal(intersect, second.sort_values())
assert tm.equalContents(intersect, second)
# Corner cases
inter = first.intersection(first, sort=sort)
assert inter is first
@pytest.mark.parametrize(
"index2,keeps_name",
[
(Index([3, 4, 5, 6, 7], name="index"), True), # preserve same name
(Index([3, 4, 5, 6, 7], name="other"), False), # drop diff names
(Index([3, 4, 5, 6, 7]), False),
],
)
@pytest.mark.parametrize("sort", [None, False])
def test_intersection_name_preservation(self, index2, keeps_name, sort):
index1 = Index([1, 2, 3, 4, 5], name="index")
expected = Index([3, 4, 5])
result = index1.intersection(index2, sort)
if keeps_name:
expected.name = "index"
assert result.name == expected.name
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"first_name,second_name,expected_name",
[("A", "A", "A"), ("A", "B", None), (None, "B", None)],
)
@pytest.mark.parametrize("sort", [None, False])
def test_intersection_name_preservation2(
self, index, first_name, second_name, expected_name, sort
):
first = index[5:20]
second = index[:10]
first.name = first_name
second.name = second_name
intersect = first.intersection(second, sort=sort)
assert intersect.name == expected_name
@pytest.mark.parametrize(
"index2,keeps_name",
[
(Index([4, 7, 6, 5, 3], name="index"), True),
(Index([4, 7, 6, 5, 3], name="other"), False),
],
)
@pytest.mark.parametrize("sort", [None, False])
def test_intersection_monotonic(self, index2, keeps_name, sort):
index1 = Index([5, 3, 2, 4, 1], name="index")
expected = Index([5, 3, 4])
if keeps_name:
expected.name = "index"
result = index1.intersection(index2, sort=sort)
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"index2,expected_arr",
[(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B", "A"])],
)
@pytest.mark.parametrize("sort", [None, False])
def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort):
# non-monotonic non-unique
index1 = Index(["A", "B", "A", "C"])
expected = Index(expected_arr, dtype="object")
result = index1.intersection(index2, sort=sort)
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("sort", [None, False])
def test_intersect_str_dates(self, sort):
dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]
i1 = Index(dt_dates, dtype=object)
i2 = Index(["aa"], dtype=object)
result = i2.intersection(i1, sort=sort)
assert len(result) == 0
def test_intersect_nosort(self):
result = pd.Index(["c", "b", "a"]).intersection(["b", "a"])
expected = pd.Index(["b", "a"])
tm.assert_index_equal(result, expected)
def test_intersection_equal_sort(self):
idx = pd.Index(["c", "a", "b"])
tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
@pytest.mark.xfail(reason="Not implemented")
def test_intersection_equal_sort_true(self):
# TODO decide on True behaviour
idx = pd.Index(["c", "a", "b"])
sorted_ = pd.Index(["a", "b", "c"])
tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)
@pytest.mark.parametrize("sort", [None, False])
def test_chained_union(self, sort):
# Chained unions handles names correctly
i1 = Index([1, 2], name="i1")
i2 = Index([5, 6], name="i2")
i3 = Index([3, 4], name="i3")
union = i1.union(i2.union(i3, sort=sort), sort=sort)
expected = i1.union(i2, sort=sort).union(i3, sort=sort)
tm.assert_index_equal(union, expected)
j1 = Index([1, 2], name="j1")
j2 = Index([], name="j2")
j3 = Index([], name="j3")
union = j1.union(j2.union(j3, sort=sort), sort=sort)
expected = j1.union(j2, sort=sort).union(j3, sort=sort)
tm.assert_index_equal(union, expected)
@pytest.mark.parametrize("sort", [None, False])
def test_union(self, index, sort):
first = index[5:20]
second = index[:10]
everything = index[:20]
union = first.union(second, sort=sort)
if sort is None:
tm.assert_index_equal(union, everything.sort_values())
assert tm.equalContents(union, everything)
@pytest.mark.parametrize("slice_", [slice(None), slice(0)])
def test_union_sort_other_special(self, slice_):
# https://github.com/pandas-dev/pandas/issues/24959
idx = pd.Index([1, 0, 2])
# default, sort=None
other = idx[slice_]
tm.assert_index_equal(idx.union(other), idx)
tm.assert_index_equal(other.union(idx), idx)
# sort=False
tm.assert_index_equal(idx.union(other, sort=False), idx)
@pytest.mark.xfail(reason="Not implemented")
@pytest.mark.parametrize("slice_", [slice(None), slice(0)])
def test_union_sort_special_true(self, slice_):
# TODO decide on True behaviour
# sort=True
idx = pd.Index([1, 0, 2])
# default, sort=None
other = idx[slice_]
result = idx.union(other, sort=True)
expected = pd.Index([0, 1, 2])
tm.assert_index_equal(result, expected)
def test_union_sort_other_incomparable(self):
# https://github.com/pandas-dev/pandas/issues/24959
idx = pd.Index([1, pd.Timestamp("2000")])
# default (sort=None)
with tm.assert_produces_warning(RuntimeWarning):
result = idx.union(idx[:1])
tm.assert_index_equal(result, idx)
# sort=None
with tm.assert_produces_warning(RuntimeWarning):
result = idx.union(idx[:1], sort=None)
tm.assert_index_equal(result, idx)
# sort=False
result = idx.union(idx[:1], sort=False)
tm.assert_index_equal(result, idx)
@pytest.mark.xfail(reason="Not implemented")
def test_union_sort_other_incomparable_true(self):
# TODO decide on True behaviour
# sort=True
idx = pd.Index([1, pd.Timestamp("2000")])
with pytest.raises(TypeError, match=".*"):
idx.union(idx[:1], sort=True)
@pytest.mark.parametrize("klass", [np.array, Series, list])
@pytest.mark.parametrize("sort", [None, False])
def test_union_from_iterables(self, index, klass, sort):
# GH 10149
first = index[5:20]
second = index[:10]
everything = index[:20]
case = klass(second.values)
result = first.union(case, sort=sort)
if sort is None:
tm.assert_index_equal(result, everything.sort_values())
assert tm.equalContents(result, everything)
@pytest.mark.parametrize("sort", [None, False])
def test_union_identity(self, index, sort):
first = index[5:20]
union = first.union(first, sort=sort)
# i.e. identity is not preserved when sort is True
assert (union is first) is (not sort)
# This should no longer be the same object, since [] is not consistent,
# both objects will be recast to dtype('O')
union = first.union([], sort=sort)
assert (union is first) is (not sort)
union = Index([]).union(first, sort=sort)
assert (union is first) is (not sort)
@pytest.mark.parametrize("first_list", [list("ba"), list()])
@pytest.mark.parametrize("second_list", [list("ab"), list()])
@pytest.mark.parametrize(
"first_name, second_name, expected_name",
[("A", "B", None), (None, "B", None), ("A", None, None)],
)
@pytest.mark.parametrize("sort", [None, False])
def test_union_name_preservation(
self, first_list, second_list, first_name, second_name, expected_name, sort
):
first = Index(first_list, name=first_name)
second = Index(second_list, name=second_name)
union = first.union(second, sort=sort)
vals = set(first_list).union(second_list)
if sort is None and len(first_list) > 0 and len(second_list) > 0:
expected = Index(sorted(vals), name=expected_name)
tm.assert_index_equal(union, expected)
else:
expected = Index(vals, name=expected_name)
assert tm.equalContents(union, expected)
@pytest.mark.parametrize("sort", [None, False])
def test_union_dt_as_obj(self, sort):
# TODO: Replace with fixturesult
index = self.create_index()
date_index = pd.date_range("2019-01-01", periods=10)
first_cat = index.union(date_index)
second_cat = index.union(index)
if date_index.dtype == np.object_:
appended = np.append(index, date_index)
else:
appended = np.append(index, date_index.astype("O"))
assert tm.equalContents(first_cat, appended)
assert tm.equalContents(second_cat, index)
tm.assert_contains_all(index, first_cat)
tm.assert_contains_all(index, second_cat)
tm.assert_contains_all(date_index, first_cat)
@pytest.mark.parametrize(
"method", ["union", "intersection", "difference", "symmetric_difference"]
)
def test_setops_disallow_true(self, method):
idx1 = pd.Index(["a", "b"])
idx2 = pd.Index(["b", "c"])
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
getattr(idx1, method)(idx2, sort=True)
def test_map_identity_mapping(self, indices):
# GH 12766
tm.assert_index_equal(indices, indices.map(lambda x: x))
def test_map_with_tuples(self):
# GH 12766
# Test that returning a single tuple from an Index
# returns an Index.
index = tm.makeIntIndex(3)
result = tm.makeIntIndex(3).map(lambda x: (x,))
expected = Index([(i,) for i in index])
tm.assert_index_equal(result, expected)
# Test that returning a tuple from a map of a single index
# returns a MultiIndex object.
result = index.map(lambda x: (x, x == 1))
expected = MultiIndex.from_tuples([(i, i == 1) for i in index])
tm.assert_index_equal(result, expected)
def test_map_with_tuples_mi(self):
# Test that returning a single object from a MultiIndex
# returns an Index.
first_level = ["foo", "bar", "baz"]
multi_index = MultiIndex.from_tuples(zip(first_level, [1, 2, 3]))
reduced_index = multi_index.map(lambda x: x[0])
tm.assert_index_equal(reduced_index, Index(first_level))
@pytest.mark.parametrize(
"attr", ["makeDateIndex", "makePeriodIndex", "makeTimedeltaIndex"]
)
def test_map_tseries_indices_return_index(self, attr):
index = getattr(tm, attr)(10)
expected = Index([1] * 10)
result = index.map(lambda x: 1)
tm.assert_index_equal(expected, result)
def test_map_tseries_indices_accsr_return_index(self):
date_index = tm.makeDateIndex(24, freq="h", name="hourly")
expected = Index(range(24), name="hourly")
tm.assert_index_equal(expected, date_index.map(lambda x: x.hour))
@pytest.mark.parametrize(
"mapper",
[
lambda values, index: {i: e for e, i in zip(values, index)},
lambda values, index: pd.Series(values, index),
],
)
def test_map_dictlike_simple(self, mapper):
# GH 12756
expected = Index(["foo", "bar", "baz"])
index = tm.makeIntIndex(3)
result = index.map(mapper(expected.values, index))
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"mapper",
[
lambda values, index: {i: e for e, i in zip(values, index)},
lambda values, index: pd.Series(values, index),
],
)
def test_map_dictlike(self, indices, mapper):
# GH 12756
if isinstance(indices, CategoricalIndex):
# Tested in test_categorical
return
elif not indices.is_unique:
# Cannot map duplicated index
return
if indices.empty:
# to match proper result coercion for uints
expected = Index([])
else:
expected = Index(np.arange(len(indices), 0, -1))
result = indices.map(mapper(expected, indices))
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"mapper",
[Series(["foo", 2.0, "baz"], index=[0, 2, -1]), {0: "foo", 2: 2.0, -1: "baz"}],
)
def test_map_with_non_function_missing_values(self, mapper):
# GH 12756
expected = Index([2.0, np.nan, "foo"])
result = Index([2, 1, 0]).map(mapper)
tm.assert_index_equal(expected, result)
def test_map_na_exclusion(self):
index = Index([1.5, np.nan, 3, np.nan, 5])
result = index.map(lambda x: x * 2, na_action="ignore")
expected = index * 2
tm.assert_index_equal(result, expected)
def test_map_defaultdict(self):
index = Index([1, 2, 3])
default_dict = defaultdict(lambda: "blank")
default_dict[1] = "stuff"
result = index.map(default_dict)
expected = Index(["stuff", "blank", "blank"])
tm.assert_index_equal(result, expected)
def test_append_multiple(self):
index = Index(["a", "b", "c", "d", "e", "f"])
foos = [index[:2], index[2:4], index[4:]]
result = foos[0].append(foos[1:])
tm.assert_index_equal(result, index)
# empty
result = index.append([])
tm.assert_index_equal(result, index)
@pytest.mark.parametrize("name,expected", [("foo", "foo"), ("bar", None)])
def test_append_empty_preserve_name(self, name, expected):
left = Index([], name="foo")
right = Index([1, 2, 3], name=name)
result = left.append(right)
assert result.name == expected
@pytest.mark.parametrize("second_name,expected", [(None, None), ("name", "name")])
@pytest.mark.parametrize("sort", [None, False])
def test_difference_name_preservation(self, index, second_name, expected, sort):
first = index[5:20]
second = index[:10]
answer = index[10:20]
first.name = "name"
second.name = second_name
result = first.difference(second, sort=sort)
assert tm.equalContents(result, answer)
if expected is None:
assert result.name is None
else:
assert result.name == expected
@pytest.mark.parametrize("sort", [None, False])
def test_difference_empty_arg(self, index, sort):
first = index[5:20]
first.name == "name"
result = first.difference([], sort)
assert tm.equalContents(result, first)
assert result.name == first.name
@pytest.mark.parametrize("sort", [None, False])
def test_difference_identity(self, index, sort):
first = index[5:20]
first.name == "name"
result = first.difference(first, sort)
assert len(result) == 0
assert result.name == first.name
@pytest.mark.parametrize("sort", [None, False])
def test_difference_sort(self, index, sort):
first = index[5:20]
second = index[:10]
result = first.difference(second, sort)
expected = index[10:20]
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("sort", [None, False])
def test_symmetric_difference(self, sort):
# smoke
index1 = Index([5, 2, 3, 4], name="index1")
index2 = Index([2, 3, 4, 1])
result = index1.symmetric_difference(index2, sort=sort)
expected = Index([5, 1])
assert tm.equalContents(result, expected)
assert result.name is None
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
# __xor__ syntax
expected = index1 ^ index2
assert tm.equalContents(result, expected)
assert result.name is None
@pytest.mark.parametrize("opname", ["difference", "symmetric_difference"])
def test_difference_incomparable(self, opname):
a = pd.Index([3, pd.Timestamp("2000"), 1])
b = pd.Index([2, pd.Timestamp("1999"), 1])
op = operator.methodcaller(opname, b)
# sort=None, the default
result = op(a)
expected = pd.Index([3, pd.Timestamp("2000"), 2, pd.Timestamp("1999")])
if opname == "difference":
expected = expected[:2]
tm.assert_index_equal(result, expected)
# sort=False
op = operator.methodcaller(opname, b, sort=False)
result = op(a)
tm.assert_index_equal(result, expected)
@pytest.mark.xfail(reason="Not implemented")
@pytest.mark.parametrize("opname", ["difference", "symmetric_difference"])
def test_difference_incomparable_true(self, opname):
# TODO decide on True behaviour
# # sort=True, raises
a = pd.Index([3, pd.Timestamp("2000"), 1])
b = pd.Index([2, pd.Timestamp("1999"), 1])
op = operator.methodcaller(opname, b, sort=True)
with pytest.raises(TypeError, match="Cannot compare"):
op(a)
@pytest.mark.parametrize("sort", [None, False])
def test_symmetric_difference_mi(self, sort):
index1 = MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3]))
index2 = MultiIndex.from_tuples([("foo", 1), ("bar", 3)])
result = index1.symmetric_difference(index2, sort=sort)
expected = MultiIndex.from_tuples([("bar", 2), ("baz", 3), ("bar", 3)])
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
assert tm.equalContents(result, expected)
@pytest.mark.parametrize(
"index2,expected",
[
(Index([0, 1, np.nan]), Index([2.0, 3.0, 0.0])),
(Index([0, 1]), Index([np.nan, 2.0, 3.0, 0.0])),
],
)
@pytest.mark.parametrize("sort", [None, False])
def test_symmetric_difference_missing(self, index2, expected, sort):
# GH 13514 change: {nan} - {nan} == {}
# (GH 6444, sorting of nans, is no longer an issue)
index1 = Index([1, np.nan, 2, 3])
result = index1.symmetric_difference(index2, sort=sort)
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("sort", [None, False])
def test_symmetric_difference_non_index(self, sort):
index1 = Index([1, 2, 3, 4], name="index1")
index2 = np.array([2, 3, 4, 5])
expected = Index([1, 5])
result = index1.symmetric_difference(index2, sort=sort)
assert tm.equalContents(result, expected)
assert result.name == "index1"
result = index1.symmetric_difference(index2, result_name="new_name", sort=sort)
assert tm.equalContents(result, expected)
assert result.name == "new_name"
@pytest.mark.parametrize("sort", [None, False])
def test_difference_type(self, indices, sort):
# GH 20040
# If taking difference of a set and itself, it
# needs to preserve the type of the index
if not indices.is_unique:
return
result = indices.difference(indices, sort=sort)
expected = indices.drop(indices)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("sort", [None, False])
def test_intersection_difference(self, indices, sort):
# GH 20040
# Test that the intersection of an index with an
# empty index produces the same index as the difference
# of an index with itself. Test for all types
if not indices.is_unique:
return
inter = indices.intersection(indices.drop(indices))
diff = indices.difference(indices, sort=sort)
tm.assert_index_equal(inter, diff)
@pytest.mark.parametrize(
"index, expected",
[
("string", False),
("bool", False),
("categorical", False),
("int", True),
("datetime", False),
("float", True),
],
indirect=["index"],
)
def test_is_numeric(self, index, expected):
assert index.is_numeric() is expected
@pytest.mark.parametrize(
"index, expected",
[
("string", True),
("bool", True),
("categorical", False),
("int", False),
("datetime", False),
("float", False),
],
indirect=["index"],
)
def test_is_object(self, index, expected):
assert index.is_object() is expected
@pytest.mark.parametrize(
"index, expected",
[
("string", False),
("bool", False),
("categorical", False),
("int", False),
("datetime", True),
("float", False),
],
indirect=["index"],
)
def test_is_all_dates(self, index, expected):
assert index.is_all_dates is expected
def test_summary(self, indices):
self._check_method_works(Index._summary, indices)
def test_summary_bug(self):
# GH3869`
ind = Index(["{other}%s", "~:{range}:0"], name="A")
result = ind._summary()
# shouldn't be formatted accidentally.
assert "~:{range}:0" in result
assert "{other}%s" in result
def test_format(self, indices):
self._check_method_works(Index.format, indices)
def test_format_bug(self):
# GH 14626
# windows has different precision on datetime.datetime.now (it doesn't
# include us since the default for Timestamp shows these but Index
# formatting does not we are skipping)
now = datetime.now()
if not str(now).endswith("000"):
index = Index([now])
formatted = index.format()
expected = [str(index[0])]
assert formatted == expected
Index([]).format()
@pytest.mark.parametrize("vals", [[1, 2.0 + 3.0j, 4.0], ["a", "b", "c"]])
def test_format_missing(self, vals, nulls_fixture):
# 2845
vals = list(vals) # Copy for each iteration
vals.append(nulls_fixture)
index = Index(vals)
formatted = index.format()
expected = [str(index[0]), str(index[1]), str(index[2]), "NaN"]
assert formatted == expected
assert index[3] is nulls_fixture
def test_format_with_name_time_info(self):
# bug I fixed 12/20/2011
dates = date_range("2011-01-01 04:00:00", periods=10, name="something")
formatted = dates.format(name=True)
assert formatted[0] == "something"
def test_format_datetime_with_time(self):
t = Index([datetime(2012, 2, 7), datetime(2012, 2, 7, 23)])
result = t.format()
expected = ["2012-02-07 00:00:00", "2012-02-07 23:00:00"]
assert len(result) == 2
assert result == expected
@pytest.mark.parametrize("op", ["any", "all"])
def test_logical_compat(self, op):
index = self.create_index()
assert getattr(index, op)() == getattr(index.values, op)()
def _check_method_works(self, method, index):
method(index)
def test_get_indexer(self):
index1 = Index([1, 2, 3, 4, 5])
index2 = Index([2, 4, 6])
r1 = index1.get_indexer(index2)
e1 = np.array([1, 3, -1], dtype=np.intp)
tm.assert_almost_equal(r1, e1)
@pytest.mark.parametrize("reverse", [True, False])
@pytest.mark.parametrize(
"expected,method",
[
(np.array([-1, 0, 0, 1, 1], dtype=np.intp), "pad"),
(np.array([-1, 0, 0, 1, 1], dtype=np.intp), "ffill"),
(np.array([0, 0, 1, 1, 2], dtype=np.intp), "backfill"),
(np.array([0, 0, 1, 1, 2], dtype=np.intp), "bfill"),
],
)
def test_get_indexer_methods(self, reverse, expected, method):
index1 = Index([1, 2, 3, 4, 5])
index2 = Index([2, 4, 6])
if reverse:
index1 = index1[::-1]
expected = expected[::-1]
result = index2.get_indexer(index1, method=method)
tm.assert_almost_equal(result, expected)
def test_get_indexer_invalid(self):
# GH10411
index = Index(np.arange(10))
with pytest.raises(ValueError, match="tolerance argument"):
index.get_indexer([1, 0], tolerance=1)
with pytest.raises(ValueError, match="limit argument"):
index.get_indexer([1, 0], limit=1)
@pytest.mark.parametrize(
"method, tolerance, indexer, expected",
[
("pad", None, [0, 5, 9], [0, 5, 9]),
("backfill", None, [0, 5, 9], [0, 5, 9]),
("nearest", None, [0, 5, 9], [0, 5, 9]),
("pad", 0, [0, 5, 9], [0, 5, 9]),
("backfill", 0, [0, 5, 9], [0, 5, 9]),
("nearest", 0, [0, 5, 9], [0, 5, 9]),
("pad", None, [0.2, 1.8, 8.5], [0, 1, 8]),
("backfill", None, [0.2, 1.8, 8.5], [1, 2, 9]),
("nearest", None, [0.2, 1.8, 8.5], [0, 2, 9]),
("pad", 1, [0.2, 1.8, 8.5], [0, 1, 8]),
("backfill", 1, [0.2, 1.8, 8.5], [1, 2, 9]),
("nearest", 1, [0.2, 1.8, 8.5], [0, 2, 9]),
("pad", 0.2, [0.2, 1.8, 8.5], [0, -1, -1]),
("backfill", 0.2, [0.2, 1.8, 8.5], [-1, 2, -1]),
("nearest", 0.2, [0.2, 1.8, 8.5], [0, 2, -1]),
],
)
def test_get_indexer_nearest(self, method, tolerance, indexer, expected):
index = Index(np.arange(10))
actual = index.get_indexer(indexer, method=method, tolerance=tolerance)
tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp))
@pytest.mark.parametrize("listtype", [list, tuple, Series, np.array])
@pytest.mark.parametrize(
"tolerance, expected",
list(
zip(
[[0.3, 0.3, 0.1], [0.2, 0.1, 0.1], [0.1, 0.5, 0.5]],
[[0, 2, -1], [0, -1, -1], [-1, 2, 9]],
)
),
)
def test_get_indexer_nearest_listlike_tolerance(
self, tolerance, expected, listtype
):
index = Index(np.arange(10))
actual = index.get_indexer(
[0.2, 1.8, 8.5], method="nearest", tolerance=listtype(tolerance)
)
tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp))
def test_get_indexer_nearest_error(self):
index = Index(np.arange(10))
with pytest.raises(ValueError, match="limit argument"):
index.get_indexer([1, 0], method="nearest", limit=1)
with pytest.raises(ValueError, match="tolerance size must match"):
index.get_indexer([1, 0], method="nearest", tolerance=[1, 2, 3])
@pytest.mark.parametrize(
"method,expected",
[("pad", [8, 7, 0]), ("backfill", [9, 8, 1]), ("nearest", [9, 7, 0])],
)
def test_get_indexer_nearest_decreasing(self, method, expected):
index = Index(np.arange(10))[::-1]
actual = index.get_indexer([0, 5, 9], method=method)
tm.assert_numpy_array_equal(actual, np.array([9, 4, 0], dtype=np.intp))
actual = index.get_indexer([0.2, 1.8, 8.5], method=method)
tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp))
@pytest.mark.parametrize(
"method,expected",
[
("pad", np.array([-1, 0, 1, 1], dtype=np.intp)),
("backfill", np.array([0, 0, 1, -1], dtype=np.intp)),
],
)
def test_get_indexer_strings(self, method, expected):
index = pd.Index(["b", "c"])
actual = index.get_indexer(["a", "b", "c", "d"], method=method)
tm.assert_numpy_array_equal(actual, expected)
def test_get_indexer_strings_raises(self):
index = pd.Index(["b", "c"])
msg = r"unsupported operand type\(s\) for -: 'str' and 'str'"
with pytest.raises(TypeError, match=msg):
index.get_indexer(["a", "b", "c", "d"], method="nearest")
with pytest.raises(TypeError, match=msg):
index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)
with pytest.raises(TypeError, match=msg):
index.get_indexer(
["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
)
@pytest.mark.parametrize("idx_class", [Int64Index, RangeIndex, Float64Index])
def test_get_indexer_numeric_index_boolean_target(self, idx_class):
# GH 16877
numeric_index = idx_class(RangeIndex((4)))
result = numeric_index.get_indexer([True, False, True])
expected = np.array([-1, -1, -1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_with_NA_values(
self, unique_nulls_fixture, unique_nulls_fixture2
):
# GH 22332
# check pairwise, that no pair of na values
# is mangled
if unique_nulls_fixture is unique_nulls_fixture2:
return # skip it, values are not unique
arr = np.array([unique_nulls_fixture, unique_nulls_fixture2], dtype=np.object)
index = pd.Index(arr, dtype=np.object)
result = index.get_indexer(
[unique_nulls_fixture, unique_nulls_fixture2, "Unknown"]
)
expected = np.array([0, 1, -1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"])
def test_get_loc(self, method):
index = pd.Index([0, 1, 2])
assert index.get_loc(1, method=method) == 1
if method:
assert index.get_loc(1, method=method, tolerance=0) == 1
@pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"])
def test_get_loc_raises_bad_label(self, method):
index = pd.Index([0, 1, 2])
if method:
msg = "not supported between"
else:
msg = "invalid key"
with pytest.raises(TypeError, match=msg):
index.get_loc([1, 2], method=method)
@pytest.mark.parametrize(
"method,loc", [("pad", 1), ("backfill", 2), ("nearest", 1)]
)
def test_get_loc_tolerance(self, method, loc):
index = pd.Index([0, 1, 2])
assert index.get_loc(1.1, method) == loc
assert index.get_loc(1.1, method, tolerance=1) == loc
@pytest.mark.parametrize("method", ["pad", "backfill", "nearest"])
def test_get_loc_outside_tolerance_raises(self, method):
index = pd.Index([0, 1, 2])
with pytest.raises(KeyError, match="1.1"):
index.get_loc(1.1, method, tolerance=0.05)
def test_get_loc_bad_tolerance_raises(self):
index = pd.Index([0, 1, 2])
with pytest.raises(ValueError, match="must be numeric"):
index.get_loc(1.1, "nearest", tolerance="invalid")
def test_get_loc_tolerance_no_method_raises(self):
index = pd.Index([0, 1, 2])
with pytest.raises(ValueError, match="tolerance .* valid if"):
index.get_loc(1.1, tolerance=1)
def test_get_loc_raises_missized_tolerance(self):
index = pd.Index([0, 1, 2])
with pytest.raises(ValueError, match="tolerance size must match"):
index.get_loc(1.1, "nearest", tolerance=[1, 1])
def test_get_loc_raises_object_nearest(self):
index = pd.Index(["a", "c"])
with pytest.raises(TypeError, match="unsupported operand type"):
index.get_loc("a", method="nearest")
def test_get_loc_raises_object_tolerance(self):
index = pd.Index(["a", "c"])
with pytest.raises(TypeError, match="unsupported operand type"):
index.get_loc("a", method="pad", tolerance="invalid")
@pytest.mark.parametrize("dtype", [int, float])
def test_slice_locs(self, dtype):
index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype))
n = len(index)
assert index.slice_locs(start=2) == (2, n)
assert index.slice_locs(start=3) == (3, n)
assert index.slice_locs(3, 8) == (3, 6)
assert index.slice_locs(5, 10) == (3, n)
assert index.slice_locs(end=8) == (0, 6)
assert index.slice_locs(end=9) == (0, 7)
# reversed
index2 = index[::-1]
assert index2.slice_locs(8, 2) == (2, 6)
assert index2.slice_locs(7, 3) == (2, 5)
@pytest.mark.parametrize("dtype", [int, float])
def test_slice_float_locs(self, dtype):
index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype))
n = len(index)
assert index.slice_locs(5.0, 10.0) == (3, n)
assert index.slice_locs(4.5, 10.5) == (3, 8)
index2 = index[::-1]
assert index2.slice_locs(8.5, 1.5) == (2, 6)
assert index2.slice_locs(10.5, -1) == (0, n)
def test_slice_locs_dup(self):
index = Index(["a", "a", "b", "c", "d", "d"])
assert index.slice_locs("a", "d") == (0, 6)
assert index.slice_locs(end="d") == (0, 6)
assert index.slice_locs("a", "c") == (0, 4)
assert index.slice_locs("b", "d") == (2, 6)
index2 = index[::-1]
assert index2.slice_locs("d", "a") == (0, 6)
assert index2.slice_locs(end="a") == (0, 6)
assert index2.slice_locs("d", "b") == (0, 4)
assert index2.slice_locs("c", "a") == (2, 6)
@pytest.mark.parametrize("dtype", [int, float])
def test_slice_locs_dup_numeric(self, dtype):
index = Index(np.array([10, 12, 12, 14], dtype=dtype))
assert index.slice_locs(12, 12) == (1, 3)
assert index.slice_locs(11, 13) == (1, 3)
index2 = index[::-1]
assert index2.slice_locs(12, 12) == (1, 3)
assert index2.slice_locs(13, 11) == (1, 3)
def test_slice_locs_na(self):
index = Index([np.nan, 1, 2])
assert index.slice_locs(1) == (1, 3)
assert index.slice_locs(np.nan) == (0, 3)
index = Index([0, np.nan, np.nan, 1, 2])
assert index.slice_locs(np.nan) == (1, 5)
def test_slice_locs_na_raises(self):
index = Index([np.nan, 1, 2])
with pytest.raises(KeyError, match=""):
index.slice_locs(start=1.5)
with pytest.raises(KeyError, match=""):
index.slice_locs(end=1.5)
@pytest.mark.parametrize(
"in_slice,expected",
[
(pd.IndexSlice[::-1], "yxdcb"),
(pd.IndexSlice["b":"y":-1], ""), # type: ignore
(pd.IndexSlice["b"::-1], "b"), # type: ignore
(pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore
(pd.IndexSlice[:"y":-1], "y"), # type: ignore
(pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore
(pd.IndexSlice["y"::-4], "yb"), # type: ignore
# absent labels
(pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore
(pd.IndexSlice[:"a":-2], "ydb"), # type: ignore
(pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore
(pd.IndexSlice["z"::-3], "yc"), # type: ignore
(pd.IndexSlice["m"::-1], "dcb"), # type: ignore
(pd.IndexSlice[:"m":-1], "yx"), # type: ignore
(pd.IndexSlice["a":"a":-1], ""), # type: ignore
(pd.IndexSlice["z":"z":-1], ""), # type: ignore
(pd.IndexSlice["m":"m":-1], ""), # type: ignore
],
)
def test_slice_locs_negative_step(self, in_slice, expected):
index = Index(list("bcdxy"))
s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
result = index[s_start : s_stop : in_slice.step]
expected = pd.Index(list(expected))
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True)
def test_drop_by_str_label(self, index):
n = len(index)
drop = index[list(range(5, 10))]
dropped = index.drop(drop)
expected = index[list(range(5)) + list(range(10, n))]
tm.assert_index_equal(dropped, expected)
dropped = index.drop(index[0])
expected = index[1:]
tm.assert_index_equal(dropped, expected)
@pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True)
@pytest.mark.parametrize("keys", [["foo", "bar"], ["1", "bar"]])
def test_drop_by_str_label_raises_missing_keys(self, index, keys):
with pytest.raises(KeyError, match=""):
index.drop(keys)
@pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True)
def test_drop_by_str_label_errors_ignore(self, index):
n = len(index)
drop = index[list(range(5, 10))]
mixed = drop.tolist() + ["foo"]
dropped = index.drop(mixed, errors="ignore")
expected = index[list(range(5)) + list(range(10, n))]
tm.assert_index_equal(dropped, expected)
dropped = index.drop(["foo", "bar"], errors="ignore")
expected = index[list(range(n))]
tm.assert_index_equal(dropped, expected)
def test_drop_by_numeric_label_loc(self):
# TODO: Parametrize numeric and str tests after self.strIndex fixture
index = Index([1, 2, 3])
dropped = index.drop(1)
expected = Index([2, 3])
tm.assert_index_equal(dropped, expected)
def test_drop_by_numeric_label_raises_missing_keys(self):
index = Index([1, 2, 3])
with pytest.raises(KeyError, match=""):
index.drop([3, 4])
@pytest.mark.parametrize(
"key,expected", [(4, Index([1, 2, 3])), ([3, 4, 5], Index([1, 2]))]
)
def test_drop_by_numeric_label_errors_ignore(self, key, expected):
index = Index([1, 2, 3])
dropped = index.drop(key, errors="ignore")
tm.assert_index_equal(dropped, expected)
@pytest.mark.parametrize(
"values",
[["a", "b", ("c", "d")], ["a", ("c", "d"), "b"], [("c", "d"), "a", "b"]],
)
@pytest.mark.parametrize("to_drop", [[("c", "d"), "a"], ["a", ("c", "d")]])
def test_drop_tuple(self, values, to_drop):
# GH 18304
index = pd.Index(values)
expected = pd.Index(["b"])
result = index.drop(to_drop)
tm.assert_index_equal(result, expected)
removed = index.drop(to_drop[0])
for drop_me in to_drop[1], [to_drop[1]]:
result = removed.drop(drop_me)
tm.assert_index_equal(result, expected)
removed = index.drop(to_drop[1])
msg = fr"\"\[{re.escape(to_drop[1].__repr__())}\] not found in axis\""
for drop_me in to_drop[1], [to_drop[1]]:
with pytest.raises(KeyError, match=msg):
removed.drop(drop_me)
@pytest.mark.parametrize(
"method,expected,sort",
[
(
"intersection",
np.array(
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
dtype=[("num", int), ("let", "a1")],
),
False,
),
(
"intersection",
np.array(
[(1, "A"), (1, "B"), (2, "A"), (2, "B")],
dtype=[("num", int), ("let", "a1")],
),
None,
),
(
"union",
np.array(
[(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")],
dtype=[("num", int), ("let", "a1")],
),
None,
),
],
)
def test_tuple_union_bug(self, method, expected, sort):
index1 = Index(
np.array(
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
dtype=[("num", int), ("let", "a1")],
)
)
index2 = Index(
np.array(
[(1, "A"), (2, "A"), (1, "B"), (2, "B"), (1, "C"), (2, "C")],
dtype=[("num", int), ("let", "a1")],
)
)
result = getattr(index1, method)(index2, sort=sort)
assert result.ndim == 1
expected = Index(expected)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"attr",
[
"is_monotonic_increasing",
"is_monotonic_decreasing",
"_is_strictly_monotonic_increasing",
"_is_strictly_monotonic_decreasing",
],
)
def test_is_monotonic_incomparable(self, attr):
index = Index([5, datetime.now(), 7])
assert not getattr(index, attr)
def test_set_value_deprecated(self):
# GH 28621
idx = self.create_index()
arr = np.array([1, 2, 3])
with tm.assert_produces_warning(FutureWarning):
idx.set_value(arr, idx[1], 80)
assert arr[1] == 80
@pytest.mark.parametrize(
"index", ["string", "int", "datetime", "timedelta"], indirect=True
)
def test_get_value(self, index):
# TODO: Remove function? GH 19728
values = np.random.randn(100)
value = index[67]
tm.assert_almost_equal(index.get_value(values, value), values[67])
@pytest.mark.parametrize("values", [["foo", "bar", "quux"], {"foo", "bar", "quux"}])
@pytest.mark.parametrize(
"index,expected",
[
(Index(["qux", "baz", "foo", "bar"]), np.array([False, False, True, True])),
(Index([]), np.array([], dtype=bool)), # empty
],
)
def test_isin(self, values, index, expected):
result = index.isin(values)
tm.assert_numpy_array_equal(result, expected)
def test_isin_nan_common_object(self, nulls_fixture, nulls_fixture2):
# Test cartesian product of null fixtures and ensure that we don't
# mangle the various types (save a corner case with PyPy)
# all nans are the same
if (
isinstance(nulls_fixture, float)
and isinstance(nulls_fixture2, float)
and math.isnan(nulls_fixture)
and math.isnan(nulls_fixture2)
):
tm.assert_numpy_array_equal(
Index(["a", nulls_fixture]).isin([nulls_fixture2]),
np.array([False, True]),
)
elif nulls_fixture is nulls_fixture2: # should preserve NA type
tm.assert_numpy_array_equal(
Index(["a", nulls_fixture]).isin([nulls_fixture2]),
np.array([False, True]),
)
else:
tm.assert_numpy_array_equal(
Index(["a", nulls_fixture]).isin([nulls_fixture2]),
np.array([False, False]),
)
def test_isin_nan_common_float64(self, nulls_fixture):
if nulls_fixture is pd.NaT:
pytest.skip("pd.NaT not compatible with Float64Index")
# Float64Index overrides isin, so must be checked separately
tm.assert_numpy_array_equal(
Float64Index([1.0, nulls_fixture]).isin([np.nan]), np.array([False, True])
)
# we cannot compare NaT with NaN
tm.assert_numpy_array_equal(
Float64Index([1.0, nulls_fixture]).isin([pd.NaT]), np.array([False, False])
)
@pytest.mark.parametrize("level", [0, -1])
@pytest.mark.parametrize(
"index",
[
Index(["qux", "baz", "foo", "bar"]),
# Float64Index overrides isin, so must be checked separately
Float64Index([1.0, 2.0, 3.0, 4.0]),
],
)
def test_isin_level_kwarg(self, level, index):
values = index.tolist()[-2:] + ["nonexisting"]
expected = np.array([False, False, True, True])
tm.assert_numpy_array_equal(expected, index.isin(values, level=level))
index.name = "foobar"
tm.assert_numpy_array_equal(expected, index.isin(values, level="foobar"))
@pytest.mark.parametrize("level", [2, 10, -3])
def test_isin_level_kwarg_bad_level_raises(self, level, indices):
index = indices
with pytest.raises(IndexError, match="Too many levels"):
index.isin([], level=level)
@pytest.mark.parametrize("label", [1.0, "foobar", "xyzzy", np.nan])
def test_isin_level_kwarg_bad_label_raises(self, label, indices):
index = indices
if isinstance(index, MultiIndex):
index = index.rename(["foo", "bar"])
msg = f"'Level {label} not found'"
else:
index = index.rename("foo")
msg = fr"Requested level \({label}\) does not match index name \(foo\)"
with pytest.raises(KeyError, match=msg):
index.isin([], level=label)
@pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])])
def test_isin_empty(self, empty):
# see gh-16991
index = Index(["a", "b"])
expected = np.array([False, False])
result = index.isin(empty)
tm.assert_numpy_array_equal(expected, result)
@pytest.mark.parametrize(
"values",
[
[1, 2, 3, 4],
[1.0, 2.0, 3.0, 4.0],
[True, True, True, True],
["foo", "bar", "baz", "qux"],
pd.date_range("2018-01-01", freq="D", periods=4),
],
)
def test_boolean_cmp(self, values):
index = Index(values)
result = index == values
expected = np.array([True, True, True, True], dtype=bool)
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("name,level", [(None, 0), ("a", "a")])
def test_get_level_values(self, index, name, level):
expected = index.copy()
if name:
expected.name = name
result = expected.get_level_values(level)
tm.assert_index_equal(result, expected)
def test_slice_keep_name(self):
index = Index(["a", "b"], name="asdf")
assert index.name == index[1:].name
@pytest.mark.parametrize(
"index", ["unicode", "string", "datetime", "int", "float"], indirect=True
)
def test_join_self(self, index, join_type):
joined = index.join(index, how=join_type)
assert index is joined
@pytest.mark.parametrize("method", ["strip", "rstrip", "lstrip"])
def test_str_attribute(self, method):
# GH9068
index = Index([" jack", "jill ", " jesse ", "frank"])
expected = Index([getattr(str, method)(x) for x in index.values])
result = getattr(index.str, method)()
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"index",
[
Index(range(5)),
tm.makeDateIndex(10),
MultiIndex.from_tuples([("foo", "1"), ("bar", "3")]),
period_range(start="2000", end="2010", freq="A"),
],
)
def test_str_attribute_raises(self, index):
with pytest.raises(AttributeError, match="only use .str accessor"):
index.str.repeat(2)
@pytest.mark.parametrize(
"expand,expected",
[
(None, Index([["a", "b", "c"], ["d", "e"], ["f"]])),
(False, Index([["a", "b", "c"], ["d", "e"], ["f"]])),
(
True,
MultiIndex.from_tuples(
[("a", "b", "c"), ("d", "e", np.nan), ("f", np.nan, np.nan)]
),
),
],
)
def test_str_split(self, expand, expected):
index = Index(["a b c", "d e", "f"])
if expand is not None:
result = index.str.split(expand=expand)
else:
result = index.str.split()
tm.assert_index_equal(result, expected)
def test_str_bool_return(self):
# test boolean case, should return np.array instead of boolean Index
index = Index(["a1", "a2", "b1", "b2"])
result = index.str.startswith("a")
expected = np.array([True, True, False, False])
tm.assert_numpy_array_equal(result, expected)
assert isinstance(result, np.ndarray)
def test_str_bool_series_indexing(self):
index = Index(["a1", "a2", "b1", "b2"])
s = Series(range(4), index=index)
result = s[s.index.str.startswith("a")]
expected = Series(range(2), index=["a1", "a2"])
tm.assert_series_equal(result, expected)
@pytest.mark.parametrize(
"index,expected", [(Index(list("abcd")), True), (Index(range(4)), False)]
)
def test_tab_completion(self, index, expected):
# GH 9910
result = "str" in dir(index)
assert result == expected
def test_indexing_doesnt_change_class(self):
index = Index([1, 2, 3, "a", "b", "c"])
assert index[1:3].identical(pd.Index([2, 3], dtype=np.object_))
assert index[[0, 1]].identical(pd.Index([1, 2], dtype=np.object_))
def test_outer_join_sort(self):
left_index = Index(np.random.permutation(15))
right_index = tm.makeDateIndex(10)
with tm.assert_produces_warning(RuntimeWarning):
result = left_index.join(right_index, how="outer")
# right_index in this case because DatetimeIndex has join precedence
# over Int64Index
with tm.assert_produces_warning(RuntimeWarning):
expected = right_index.astype(object).union(left_index.astype(object))
tm.assert_index_equal(result, expected)
def test_nan_first_take_datetime(self):
index = Index([pd.NaT, Timestamp("20130101"), Timestamp("20130102")])
result = index.take([-1, 0, 1])
expected = Index([index[-1], index[0], index[1]])
tm.assert_index_equal(result, expected)
def test_take_fill_value(self):
# GH 12631
index = pd.Index(list("ABC"), name="xxx")
result = index.take(np.array([1, 0, -1]))
expected = pd.Index(list("BAC"), name="xxx")
tm.assert_index_equal(result, expected)
# fill_value
result = index.take(np.array([1, 0, -1]), fill_value=True)
expected = pd.Index(["B", "A", np.nan], name="xxx")
tm.assert_index_equal(result, expected)
# allow_fill=False
result = index.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
expected = pd.Index(["B", "A", "C"], name="xxx")
tm.assert_index_equal(result, expected)
def test_take_fill_value_none_raises(self):
index = pd.Index(list("ABC"), name="xxx")
msg = (
"When allow_fill=True and fill_value is not None, "
"all indices must be >= -1"
)
with pytest.raises(ValueError, match=msg):
index.take(np.array([1, 0, -2]), fill_value=True)
with pytest.raises(ValueError, match=msg):
index.take(np.array([1, 0, -5]), fill_value=True)
def test_take_bad_bounds_raises(self):
index = pd.Index(list("ABC"), name="xxx")
with pytest.raises(IndexError, match="out of bounds"):
index.take(np.array([1, -5]))
@pytest.mark.parametrize("name", [None, "foobar"])
@pytest.mark.parametrize(
"labels",
[
[],
np.array([]),
["A", "B", "C"],
["C", "B", "A"],
np.array(["A", "B", "C"]),
np.array(["C", "B", "A"]),
# Must preserve name even if dtype changes
pd.date_range("20130101", periods=3).values,
pd.date_range("20130101", periods=3).tolist(),
],
)
def test_reindex_preserves_name_if_target_is_list_or_ndarray(self, name, labels):
# GH6552
index = pd.Index([0, 1, 2])
index.name = name
assert index.reindex(labels)[0].name == name
@pytest.mark.parametrize("labels", [[], np.array([]), np.array([], dtype=np.int64)])
def test_reindex_preserves_type_if_target_is_empty_list_or_array(self, labels):
# GH7774
index = pd.Index(list("abc"))
assert index.reindex(labels)[0].dtype.type == np.object_
@pytest.mark.parametrize(
"labels,dtype",
[
(pd.Int64Index([]), np.int64),
(pd.Float64Index([]), np.float64),
(pd.DatetimeIndex([]), np.datetime64),
],
)
def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self, labels, dtype):
# GH7774
index = pd.Index(list("abc"))
assert index.reindex(labels)[0].dtype.type == dtype
def test_reindex_no_type_preserve_target_empty_mi(self):
index = pd.Index(list("abc"))
result = index.reindex(
pd.MultiIndex([pd.Int64Index([]), pd.Float64Index([])], [[], []])
)[0]
assert result.levels[0].dtype.type == np.int64
assert result.levels[1].dtype.type == np.float64
def test_groupby(self):
index = Index(range(5))
result = index.groupby(np.array([1, 1, 2, 2, 2]))
expected = {1: pd.Index([0, 1]), 2: pd.Index([2, 3, 4])}
tm.assert_dict_equal(result, expected)
@pytest.mark.parametrize(
"mi,expected",
[
(MultiIndex.from_tuples([(1, 2), (4, 5)]), np.array([True, True])),
(MultiIndex.from_tuples([(1, 2), (4, 6)]), np.array([True, False])),
],
)
def test_equals_op_multiindex(self, mi, expected):
# GH9785
# test comparisons of multiindex
df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1])
result = df.index == mi
tm.assert_numpy_array_equal(result, expected)
def test_equals_op_multiindex_identify(self):
df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1])
result = df.index == df.index
expected = np.array([True, True])
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize(
"index",
[
MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)]),
Index(["foo", "bar", "baz"]),
],
)
def test_equals_op_mismatched_multiindex_raises(self, index):
df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1])
with pytest.raises(ValueError, match="Lengths must match"):
df.index == index
def test_equals_op_index_vs_mi_same_length(self):
mi = MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)])
index = Index(["foo", "bar", "baz"])
result = mi == index
expected = np.array([False, False, False])
tm.assert_numpy_array_equal(result, expected)
@pytest.mark.parametrize("dt_conv", [pd.to_datetime, pd.to_timedelta])
def test_dt_conversion_preserves_name(self, dt_conv):
# GH 10875
index = pd.Index(["01:02:03", "01:02:04"], name="label")
assert index.name == dt_conv(index).name
@pytest.mark.parametrize(
"index,expected",
[
# ASCII
# short
(
pd.Index(["a", "bb", "ccc"]),
"""Index(['a', 'bb', 'ccc'], dtype='object')""",
),
# multiple lines
(
pd.Index(["a", "bb", "ccc"] * 10),
"""\
Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc',
'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc',
'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
dtype='object')""",
),
# truncated
(
pd.Index(["a", "bb", "ccc"] * 100),
"""\
Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
...
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
dtype='object', length=300)""",
),
# Non-ASCII
# short
(
pd.Index(["", "いい", "ううう"]),
"""Index(['', 'いい', 'ううう'], dtype='object')""",
),
# multiple lines
(
pd.Index(["", "いい", "ううう"] * 10),
(
"Index(['', 'いい', 'ううう', '', 'いい', 'ううう', "
"'', 'いい', 'ううう', '', 'いい', 'ううう',\n"
" '', 'いい', 'ううう', '', 'いい', 'ううう', "
"'', 'いい', 'ううう', '', 'いい', 'ううう',\n"
" '', 'いい', 'ううう', '', 'いい', "
"'ううう'],\n"
" dtype='object')"
),
),
# truncated
(
pd.Index(["", "いい", "ううう"] * 100),
(
"Index(['', 'いい', 'ううう', '', 'いい', 'ううう', "
"'', 'いい', 'ううう', '',\n"
" ...\n"
" 'ううう', '', 'いい', 'ううう', '', 'いい', "
"'ううう', '', 'いい', 'ううう'],\n"
" dtype='object', length=300)"
),
),
],
)
def test_string_index_repr(self, index, expected):
result = repr(index)
assert result == expected
@pytest.mark.parametrize(
"index,expected",
[
# short
(
pd.Index(["", "いい", "ううう"]),
("Index(['', 'いい', 'ううう'], dtype='object')"),
),
# multiple lines
(
pd.Index(["", "いい", "ううう"] * 10),
(
"Index(['', 'いい', 'ううう', '', 'いい', "
"'ううう', '', 'いい', 'ううう',\n"
" '', 'いい', 'ううう', '', 'いい', "
"'ううう', '', 'いい', 'ううう',\n"
" '', 'いい', 'ううう', '', 'いい', "
"'ううう', '', 'いい', 'ううう',\n"
" '', 'いい', 'ううう'],\n"
" dtype='object')"
""
),
),
# truncated
(
pd.Index(["", "いい", "ううう"] * 100),
(
"Index(['', 'いい', 'ううう', '', 'いい', "
"'ううう', '', 'いい', 'ううう',\n"
" '',\n"
" ...\n"
" 'ううう', '', 'いい', 'ううう', '', "
"'いい', 'ううう', '', 'いい',\n"
" 'ううう'],\n"
" dtype='object', length=300)"
),
),
],
)
def test_string_index_repr_with_unicode_option(self, index, expected):
# Enable Unicode option -----------------------------------------
with cf.option_context("display.unicode.east_asian_width", True):
result = repr(index)
assert result == expected
def test_cached_properties_not_settable(self):
index = pd.Index([1, 2, 3])
with pytest.raises(AttributeError, match="Can't set attribute"):
index.is_unique = False
@async_mark()
async def test_tab_complete_warning(self, ip):
# https://github.com/pandas-dev/pandas/issues/16409
pytest.importorskip("IPython", minversion="6.0.0")
from IPython.core.completer import provisionalcompleter
code = "import pandas as pd; idx = pd.Index([1, 2])"
await ip.run_code(code)
# GH 31324 newer jedi version raises Deprecation warning
import jedi
if jedi.__version__ < "0.16.0":
warning = tm.assert_produces_warning(None)
else:
warning = tm.assert_produces_warning(
DeprecationWarning, check_stacklevel=False
)
with warning:
with provisionalcompleter("ignore"):
list(ip.Completer.completions("idx.", 4))
def test_contains_method_removed(self, indices):
# GH#30103 method removed for all types except IntervalIndex
if isinstance(indices, pd.IntervalIndex):
indices.contains(1)
else:
with pytest.raises(AttributeError):
indices.contains(1)
class TestMixedIntIndex(Base):
# Mostly the tests from common.py for which the results differ
# in py2 and py3 because ints and strings are uncomparable in py3
# (GH 13514)
_holder = Index
@pytest.fixture(params=[[0, "a", 1, "b", 2, "c"]], ids=["mixedIndex"])
def indices(self, request):
return Index(request.param)
def create_index(self):
return Index([0, "a", 1, "b", 2, "c"])
def test_argsort(self):
index = self.create_index()
with pytest.raises(TypeError, match="'>|<' not supported"):
index.argsort()
def test_numpy_argsort(self):
index = self.create_index()
with pytest.raises(TypeError, match="'>|<' not supported"):
np.argsort(index)
def test_copy_name(self):
# Check that "name" argument passed at initialization is honoured
# GH12309
index = self.create_index()
first = type(index)(index, copy=True, name="mario")
second = type(first)(first, copy=False)
# Even though "copy=False", we want a new object.
assert first is not second
tm.assert_index_equal(first, second)
assert first.name == "mario"
assert second.name == "mario"
s1 = Series(2, index=first)
s2 = Series(3, index=second[:-1])
s3 = s1 * s2
assert s3.index.name == "mario"
def test_copy_name2(self):
# Check that adding a "name" parameter to the copy is honored
# GH14302
index = pd.Index([1, 2], name="MyName")
index1 = index.copy()
tm.assert_index_equal(index, index1)
index2 = index.copy(name="NewName")
tm.assert_index_equal(index, index2, check_names=False)
assert index.name == "MyName"
assert index2.name == "NewName"
index3 = index.copy(names=["NewName"])
tm.assert_index_equal(index, index3, check_names=False)
assert index.name == "MyName"
assert index.names == ["MyName"]
assert index3.name == "NewName"
assert index3.names == ["NewName"]
def test_union_base(self):
index = self.create_index()
first = index[3:]
second = index[:5]
result = first.union(second)
expected = Index([0, 1, 2, "a", "b", "c"])
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("klass", [np.array, Series, list])
def test_union_different_type_base(self, klass):
# GH 10149
index = self.create_index()
first = index[3:]
second = index[:5]
result = first.union(klass(second.values))
assert tm.equalContents(result, index)
def test_unique_na(self):
idx = pd.Index([2, np.nan, 2, 1], name="my_index")
expected = pd.Index([2, np.nan, 1], name="my_index")
result = idx.unique()
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("sort", [None, False])
def test_intersection_base(self, sort):
# (same results for py2 and py3 but sortedness not tested elsewhere)
index = self.create_index()
first = index[:5]
second = index[:3]
expected = Index([0, 1, "a"]) if sort is None else Index([0, "a", 1])
result = first.intersection(second, sort=sort)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("klass", [np.array, Series, list])
@pytest.mark.parametrize("sort", [None, False])
def test_intersection_different_type_base(self, klass, sort):
# GH 10149
index = self.create_index()
first = index[:5]
second = index[:3]
result = first.intersection(klass(second.values), sort=sort)
assert tm.equalContents(result, second)
@pytest.mark.parametrize("sort", [None, False])
def test_difference_base(self, sort):
# (same results for py2 and py3 but sortedness not tested elsewhere)
index = self.create_index()
first = index[:4]
second = index[3:]
result = first.difference(second, sort)
expected = Index([0, "a", 1])
if sort is None:
expected = Index(safe_sort(expected))
tm.assert_index_equal(result, expected)
def test_symmetric_difference(self):
# (same results for py2 and py3 but sortedness not tested elsewhere)
index = self.create_index()
first = index[:4]
second = index[3:]
result = first.symmetric_difference(second)
expected = Index([0, 1, 2, "a", "c"])
tm.assert_index_equal(result, expected)
def test_logical_compat(self):
index = self.create_index()
assert index.all() == index.values.all()
assert index.any() == index.values.any()
@pytest.mark.parametrize("how", ["any", "all"])
@pytest.mark.parametrize("dtype", [None, object, "category"])
@pytest.mark.parametrize(
"vals,expected",
[
([1, 2, 3], [1, 2, 3]),
([1.0, 2.0, 3.0], [1.0, 2.0, 3.0]),
([1.0, 2.0, np.nan, 3.0], [1.0, 2.0, 3.0]),
(["A", "B", "C"], ["A", "B", "C"]),
(["A", np.nan, "B", "C"], ["A", "B", "C"]),
],
)
def test_dropna(self, how, dtype, vals, expected):
# GH 6194
index = pd.Index(vals, dtype=dtype)
result = index.dropna(how=how)
expected = pd.Index(expected, dtype=dtype)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("how", ["any", "all"])
@pytest.mark.parametrize(
"index,expected",
[
(
pd.DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"]),
pd.DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"]),
),
(
pd.DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03", pd.NaT]),
pd.DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"]),
),
(
pd.TimedeltaIndex(["1 days", "2 days", "3 days"]),
pd.TimedeltaIndex(["1 days", "2 days", "3 days"]),
),
(
pd.TimedeltaIndex([pd.NaT, "1 days", "2 days", "3 days", pd.NaT]),
pd.TimedeltaIndex(["1 days", "2 days", "3 days"]),
),
(
pd.PeriodIndex(["2012-02", "2012-04", "2012-05"], freq="M"),
pd.PeriodIndex(["2012-02", "2012-04", "2012-05"], freq="M"),
),
(
pd.PeriodIndex(["2012-02", "2012-04", "NaT", "2012-05"], freq="M"),
pd.PeriodIndex(["2012-02", "2012-04", "2012-05"], freq="M"),
),
],
)
def test_dropna_dt_like(self, how, index, expected):
result = index.dropna(how=how)
tm.assert_index_equal(result, expected)
def test_dropna_invalid_how_raises(self):
msg = "invalid how option: xxx"
with pytest.raises(ValueError, match=msg):
pd.Index([1, 2, 3]).dropna(how="xxx")
def test_get_combined_index(self):
result = _get_combined_index([])
expected = Index([])
tm.assert_index_equal(result, expected)
def test_repeat(self):
repeats = 2
index = pd.Index([1, 2, 3])
expected = pd.Index([1, 1, 2, 2, 3, 3])
result = index.repeat(repeats)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"index",
[
pd.Index([np.nan]),
pd.Index([np.nan, 1]),
pd.Index([1, 2, np.nan]),
pd.Index(["a", "b", np.nan]),
pd.to_datetime(["NaT"]),
pd.to_datetime(["NaT", "2000-01-01"]),
pd.to_datetime(["2000-01-01", "NaT", "2000-01-02"]),
pd.to_timedelta(["1 day", "NaT"]),
],
)
def test_is_monotonic_na(self, index):
assert index.is_monotonic_increasing is False
assert index.is_monotonic_decreasing is False
assert index._is_strictly_monotonic_increasing is False
assert index._is_strictly_monotonic_decreasing is False
def test_repr_summary(self):
with cf.option_context("display.max_seq_items", 10):
result = repr(pd.Index(np.arange(1000)))
assert len(result) < 200
assert "..." in result
@pytest.mark.parametrize("klass", [Series, DataFrame])
def test_int_name_format(self, klass):
index = Index(["a", "b", "c"], name=0)
result = klass(list(range(3)), index=index)
assert "0" in repr(result)
def test_print_unicode_columns(self):
df = pd.DataFrame({"\u05d0": [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]})
repr(df.columns) # should not raise UnicodeDecodeError
def test_str_to_bytes_raises(self):
# GH 26447
index = Index([str(x) for x in range(10)])
msg = "^'str' object cannot be interpreted as an integer$"
with pytest.raises(TypeError, match=msg):
bytes(index)
def test_intersect_str_dates(self):
dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]
index1 = Index(dt_dates, dtype=object)
index2 = Index(["aa"], dtype=object)
result = index2.intersection(index1)
expected = Index([], dtype=object)
tm.assert_index_equal(result, expected)
def test_index_repr_bool_nan(self):
# GH32146
arr = Index([True, False, np.nan], dtype=object)
exp1 = arr.format()
out1 = ["True", "False", "NaN"]
assert out1 == exp1
exp2 = repr(arr)
out2 = "Index([True, False, nan], dtype='object')"
assert out2 == exp2
class TestIndexUtils:
@pytest.mark.parametrize(
"data, names, expected",
[
([[1, 2, 3]], None, Index([1, 2, 3])),
([[1, 2, 3]], ["name"], Index([1, 2, 3], name="name")),
(
[["a", "a"], ["c", "d"]],
None,
MultiIndex([["a"], ["c", "d"]], [[0, 0], [0, 1]]),
),
(
[["a", "a"], ["c", "d"]],
["L1", "L2"],
MultiIndex([["a"], ["c", "d"]], [[0, 0], [0, 1]], names=["L1", "L2"]),
),
],
)
def test_ensure_index_from_sequences(self, data, names, expected):
result = ensure_index_from_sequences(data, names)
tm.assert_index_equal(result, expected)
def test_ensure_index_mixed_closed_intervals(self):
# GH27172
intervals = [
pd.Interval(0, 1, closed="left"),
pd.Interval(1, 2, closed="right"),
pd.Interval(2, 3, closed="neither"),
pd.Interval(3, 4, closed="both"),
]
result = ensure_index(intervals)
expected = Index(intervals, dtype=object)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize(
"opname",
[
"eq",
"ne",
"le",
"lt",
"ge",
"gt",
"add",
"radd",
"sub",
"rsub",
"mul",
"rmul",
"truediv",
"rtruediv",
"floordiv",
"rfloordiv",
"pow",
"rpow",
"mod",
"divmod",
],
)
def test_generated_op_names(opname, indices):
if isinstance(indices, ABCIndex) and opname == "rsub":
# pd.Index.__rsub__ does not exist; though the method does exist
# for subclasses. see GH#19723
return
opname = f"__{opname}__"
method = getattr(indices, opname)
assert method.__name__ == opname
@pytest.mark.parametrize("index_maker", tm.index_subclass_makers_generator())
def test_index_subclass_constructor_wrong_kwargs(index_maker):
# GH #19348
with pytest.raises(TypeError, match="unexpected keyword argument"):
index_maker(foo="bar")
def test_deprecated_fastpath():
msg = "[Uu]nexpected keyword argument"
with pytest.raises(TypeError, match=msg):
pd.Index(np.array(["a", "b"], dtype=object), name="test", fastpath=True)
with pytest.raises(TypeError, match=msg):
pd.Int64Index(np.array([1, 2, 3], dtype="int64"), name="test", fastpath=True)
with pytest.raises(TypeError, match=msg):
pd.RangeIndex(0, 5, 2, name="test", fastpath=True)
with pytest.raises(TypeError, match=msg):
pd.CategoricalIndex(["a", "b", "c"], name="test", fastpath=True)
def test_shape_of_invalid_index():
# Currently, it is possible to create "invalid" index objects backed by
# a multi-dimensional array (see https://github.com/pandas-dev/pandas/issues/27125
# about this). However, as long as this is not solved in general,this test ensures
# that the returned shape is consistent with this underlying array for
# compat with matplotlib (see https://github.com/pandas-dev/pandas/issues/27775)
idx = pd.Index([0, 1, 2, 3])
with tm.assert_produces_warning(DeprecationWarning):
# GH#30588 multi-dimensional indexing deprecated
assert idx[:, None].shape == (4, 1)
def test_validate_1d_input():
# GH#27125 check that we do not have >1-dimensional input
msg = "Index data must be 1-dimensional"
arr = np.arange(8).reshape(2, 2, 2)
with pytest.raises(ValueError, match=msg):
pd.Index(arr)
with pytest.raises(ValueError, match=msg):
pd.Float64Index(arr.astype(np.float64))
with pytest.raises(ValueError, match=msg):
pd.Int64Index(arr.astype(np.int64))
with pytest.raises(ValueError, match=msg):
pd.UInt64Index(arr.astype(np.uint64))
df = pd.DataFrame(arr.reshape(4, 2))
with pytest.raises(ValueError, match=msg):
pd.Index(df)
# GH#13601 trying to assign a multi-dimensional array to an index is not
# allowed
ser = pd.Series(0, range(4))
with pytest.raises(ValueError, match=msg):
ser.index = np.array([[2, 3]] * 4)