512 lines
18 KiB
Python
512 lines
18 KiB
Python
|
"""
|
||
|
Collection of tests asserting things that should be true for
|
||
|
any index subclass except for MultiIndex. Makes use of the `index_flat`
|
||
|
fixture defined in pandas/conftest.py.
|
||
|
"""
|
||
|
from copy import (
|
||
|
copy,
|
||
|
deepcopy,
|
||
|
)
|
||
|
import re
|
||
|
|
||
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
from pandas.compat import IS64
|
||
|
from pandas.compat.numpy import np_version_gte1p25
|
||
|
|
||
|
from pandas.core.dtypes.common import (
|
||
|
is_integer_dtype,
|
||
|
is_numeric_dtype,
|
||
|
)
|
||
|
|
||
|
import pandas as pd
|
||
|
from pandas import (
|
||
|
CategoricalIndex,
|
||
|
MultiIndex,
|
||
|
PeriodIndex,
|
||
|
RangeIndex,
|
||
|
)
|
||
|
import pandas._testing as tm
|
||
|
|
||
|
|
||
|
class TestCommon:
|
||
|
@pytest.mark.parametrize("name", [None, "new_name"])
|
||
|
def test_to_frame(self, name, index_flat, using_copy_on_write):
|
||
|
# see GH#15230, GH#22580
|
||
|
idx = index_flat
|
||
|
|
||
|
if name:
|
||
|
idx_name = name
|
||
|
else:
|
||
|
idx_name = idx.name or 0
|
||
|
|
||
|
df = idx.to_frame(name=idx_name)
|
||
|
|
||
|
assert df.index is idx
|
||
|
assert len(df.columns) == 1
|
||
|
assert df.columns[0] == idx_name
|
||
|
if not using_copy_on_write:
|
||
|
assert df[idx_name].values is not idx.values
|
||
|
|
||
|
df = idx.to_frame(index=False, name=idx_name)
|
||
|
assert df.index is not idx
|
||
|
|
||
|
def test_droplevel(self, index_flat):
|
||
|
# GH 21115
|
||
|
# MultiIndex is tested separately in test_multi.py
|
||
|
index = index_flat
|
||
|
|
||
|
assert index.droplevel([]).equals(index)
|
||
|
|
||
|
for level in [index.name, [index.name]]:
|
||
|
if isinstance(index.name, tuple) and level is index.name:
|
||
|
# GH 21121 : droplevel with tuple name
|
||
|
continue
|
||
|
msg = (
|
||
|
"Cannot remove 1 levels from an index with 1 levels: at least one "
|
||
|
"level must be left."
|
||
|
)
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
index.droplevel(level)
|
||
|
|
||
|
for level in "wrong", ["wrong"]:
|
||
|
with pytest.raises(
|
||
|
KeyError,
|
||
|
match=r"'Requested level \(wrong\) does not match index name \(None\)'",
|
||
|
):
|
||
|
index.droplevel(level)
|
||
|
|
||
|
def test_constructor_non_hashable_name(self, index_flat):
|
||
|
# GH 20527
|
||
|
index = index_flat
|
||
|
|
||
|
message = "Index.name must be a hashable type"
|
||
|
renamed = [["1"]]
|
||
|
|
||
|
# With .rename()
|
||
|
with pytest.raises(TypeError, match=message):
|
||
|
index.rename(name=renamed)
|
||
|
|
||
|
# With .set_names()
|
||
|
with pytest.raises(TypeError, match=message):
|
||
|
index.set_names(names=renamed)
|
||
|
|
||
|
def test_constructor_unwraps_index(self, index_flat):
|
||
|
a = index_flat
|
||
|
# Passing dtype is necessary for Index([True, False], dtype=object)
|
||
|
# case.
|
||
|
b = type(a)(a, dtype=a.dtype)
|
||
|
tm.assert_equal(a._data, b._data)
|
||
|
|
||
|
def test_to_flat_index(self, index_flat):
|
||
|
# 22866
|
||
|
index = index_flat
|
||
|
|
||
|
result = index.to_flat_index()
|
||
|
tm.assert_index_equal(result, index)
|
||
|
|
||
|
def test_set_name_methods(self, index_flat):
|
||
|
# MultiIndex tested separately
|
||
|
index = index_flat
|
||
|
new_name = "This is the new name for this index"
|
||
|
|
||
|
original_name = index.name
|
||
|
new_ind = index.set_names([new_name])
|
||
|
assert new_ind.name == new_name
|
||
|
assert index.name == original_name
|
||
|
res = index.rename(new_name, inplace=True)
|
||
|
|
||
|
# should return None
|
||
|
assert res is None
|
||
|
assert index.name == new_name
|
||
|
assert index.names == [new_name]
|
||
|
with pytest.raises(ValueError, match="Level must be None"):
|
||
|
index.set_names("a", level=0)
|
||
|
|
||
|
# rename in place just leaves tuples and other containers alone
|
||
|
name = ("A", "B")
|
||
|
index.rename(name, inplace=True)
|
||
|
assert index.name == name
|
||
|
assert index.names == [name]
|
||
|
|
||
|
@pytest.mark.xfail
|
||
|
def test_set_names_single_label_no_level(self, index_flat):
|
||
|
with pytest.raises(TypeError, match="list-like"):
|
||
|
# should still fail even if it would be the right length
|
||
|
index_flat.set_names("a")
|
||
|
|
||
|
def test_copy_and_deepcopy(self, index_flat):
|
||
|
index = index_flat
|
||
|
|
||
|
for func in (copy, deepcopy):
|
||
|
idx_copy = func(index)
|
||
|
assert idx_copy is not index
|
||
|
assert idx_copy.equals(index)
|
||
|
|
||
|
new_copy = index.copy(deep=True, name="banana")
|
||
|
assert new_copy.name == "banana"
|
||
|
|
||
|
def test_copy_name(self, index_flat):
|
||
|
# GH#12309: Check that the "name" argument
|
||
|
# passed at initialization is honored.
|
||
|
index = index_flat
|
||
|
|
||
|
first = type(index)(index, copy=True, name="mario")
|
||
|
second = type(first)(first, copy=False)
|
||
|
|
||
|
# Even though "copy=False", we want a new object.
|
||
|
assert first is not second
|
||
|
tm.assert_index_equal(first, second)
|
||
|
|
||
|
# Not using tm.assert_index_equal() since names differ.
|
||
|
assert index.equals(first)
|
||
|
|
||
|
assert first.name == "mario"
|
||
|
assert second.name == "mario"
|
||
|
|
||
|
# TODO: belongs in series arithmetic tests?
|
||
|
s1 = pd.Series(2, index=first)
|
||
|
s2 = pd.Series(3, index=second[:-1])
|
||
|
# See GH#13365
|
||
|
s3 = s1 * s2
|
||
|
assert s3.index.name == "mario"
|
||
|
|
||
|
def test_copy_name2(self, index_flat):
|
||
|
# GH#35592
|
||
|
index = index_flat
|
||
|
|
||
|
assert index.copy(name="mario").name == "mario"
|
||
|
|
||
|
with pytest.raises(ValueError, match="Length of new names must be 1, got 2"):
|
||
|
index.copy(name=["mario", "luigi"])
|
||
|
|
||
|
msg = f"{type(index).__name__}.name must be a hashable type"
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
index.copy(name=[["mario"]])
|
||
|
|
||
|
def test_unique_level(self, index_flat):
|
||
|
# don't test a MultiIndex here (as its tested separated)
|
||
|
index = index_flat
|
||
|
|
||
|
# GH 17896
|
||
|
expected = index.drop_duplicates()
|
||
|
for level in [0, index.name, None]:
|
||
|
result = index.unique(level=level)
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
msg = "Too many levels: Index has only 1 level, not 4"
|
||
|
with pytest.raises(IndexError, match=msg):
|
||
|
index.unique(level=3)
|
||
|
|
||
|
msg = (
|
||
|
rf"Requested level \(wrong\) does not match index name "
|
||
|
rf"\({re.escape(index.name.__repr__())}\)"
|
||
|
)
|
||
|
with pytest.raises(KeyError, match=msg):
|
||
|
index.unique(level="wrong")
|
||
|
|
||
|
def test_unique(self, index_flat):
|
||
|
# MultiIndex tested separately
|
||
|
index = index_flat
|
||
|
if not len(index):
|
||
|
pytest.skip("Skip check for empty Index and MultiIndex")
|
||
|
|
||
|
idx = index[[0] * 5]
|
||
|
idx_unique = index[[0]]
|
||
|
|
||
|
# We test against `idx_unique`, so first we make sure it's unique
|
||
|
# and doesn't contain nans.
|
||
|
assert idx_unique.is_unique is True
|
||
|
try:
|
||
|
assert idx_unique.hasnans is False
|
||
|
except NotImplementedError:
|
||
|
pass
|
||
|
|
||
|
result = idx.unique()
|
||
|
tm.assert_index_equal(result, idx_unique)
|
||
|
|
||
|
# nans:
|
||
|
if not index._can_hold_na:
|
||
|
pytest.skip("Skip na-check if index cannot hold na")
|
||
|
|
||
|
vals = index._values[[0] * 5]
|
||
|
vals[0] = np.nan
|
||
|
|
||
|
vals_unique = vals[:2]
|
||
|
idx_nan = index._shallow_copy(vals)
|
||
|
idx_unique_nan = index._shallow_copy(vals_unique)
|
||
|
assert idx_unique_nan.is_unique is True
|
||
|
|
||
|
assert idx_nan.dtype == index.dtype
|
||
|
assert idx_unique_nan.dtype == index.dtype
|
||
|
|
||
|
expected = idx_unique_nan
|
||
|
for pos, i in enumerate([idx_nan, idx_unique_nan]):
|
||
|
result = i.unique()
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
@pytest.mark.filterwarnings("ignore:Period with BDay freq:FutureWarning")
|
||
|
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
||
|
def test_searchsorted_monotonic(self, index_flat, request):
|
||
|
# GH17271
|
||
|
index = index_flat
|
||
|
# not implemented for tuple searches in MultiIndex
|
||
|
# or Intervals searches in IntervalIndex
|
||
|
if isinstance(index, pd.IntervalIndex):
|
||
|
mark = pytest.mark.xfail(
|
||
|
reason="IntervalIndex.searchsorted does not support Interval arg",
|
||
|
raises=NotImplementedError,
|
||
|
)
|
||
|
request.applymarker(mark)
|
||
|
|
||
|
# nothing to test if the index is empty
|
||
|
if index.empty:
|
||
|
pytest.skip("Skip check for empty Index")
|
||
|
value = index[0]
|
||
|
|
||
|
# determine the expected results (handle dupes for 'right')
|
||
|
expected_left, expected_right = 0, (index == value).argmin()
|
||
|
if expected_right == 0:
|
||
|
# all values are the same, expected_right should be length
|
||
|
expected_right = len(index)
|
||
|
|
||
|
# test _searchsorted_monotonic in all cases
|
||
|
# test searchsorted only for increasing
|
||
|
if index.is_monotonic_increasing:
|
||
|
ssm_left = index._searchsorted_monotonic(value, side="left")
|
||
|
assert expected_left == ssm_left
|
||
|
|
||
|
ssm_right = index._searchsorted_monotonic(value, side="right")
|
||
|
assert expected_right == ssm_right
|
||
|
|
||
|
ss_left = index.searchsorted(value, side="left")
|
||
|
assert expected_left == ss_left
|
||
|
|
||
|
ss_right = index.searchsorted(value, side="right")
|
||
|
assert expected_right == ss_right
|
||
|
|
||
|
elif index.is_monotonic_decreasing:
|
||
|
ssm_left = index._searchsorted_monotonic(value, side="left")
|
||
|
assert expected_left == ssm_left
|
||
|
|
||
|
ssm_right = index._searchsorted_monotonic(value, side="right")
|
||
|
assert expected_right == ssm_right
|
||
|
else:
|
||
|
# non-monotonic should raise.
|
||
|
msg = "index must be monotonic increasing or decreasing"
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
index._searchsorted_monotonic(value, side="left")
|
||
|
|
||
|
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
||
|
def test_drop_duplicates(self, index_flat, keep):
|
||
|
# MultiIndex is tested separately
|
||
|
index = index_flat
|
||
|
if isinstance(index, RangeIndex):
|
||
|
pytest.skip(
|
||
|
"RangeIndex is tested in test_drop_duplicates_no_duplicates "
|
||
|
"as it cannot hold duplicates"
|
||
|
)
|
||
|
if len(index) == 0:
|
||
|
pytest.skip(
|
||
|
"empty index is tested in test_drop_duplicates_no_duplicates "
|
||
|
"as it cannot hold duplicates"
|
||
|
)
|
||
|
|
||
|
# make unique index
|
||
|
holder = type(index)
|
||
|
unique_values = list(set(index))
|
||
|
dtype = index.dtype if is_numeric_dtype(index) else None
|
||
|
unique_idx = holder(unique_values, dtype=dtype)
|
||
|
|
||
|
# make duplicated index
|
||
|
n = len(unique_idx)
|
||
|
duplicated_selection = np.random.default_rng(2).choice(n, int(n * 1.5))
|
||
|
idx = holder(unique_idx.values[duplicated_selection])
|
||
|
|
||
|
# Series.duplicated is tested separately
|
||
|
expected_duplicated = (
|
||
|
pd.Series(duplicated_selection).duplicated(keep=keep).values
|
||
|
)
|
||
|
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected_duplicated)
|
||
|
|
||
|
# Series.drop_duplicates is tested separately
|
||
|
expected_dropped = holder(pd.Series(idx).drop_duplicates(keep=keep))
|
||
|
tm.assert_index_equal(idx.drop_duplicates(keep=keep), expected_dropped)
|
||
|
|
||
|
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
||
|
def test_drop_duplicates_no_duplicates(self, index_flat):
|
||
|
# MultiIndex is tested separately
|
||
|
index = index_flat
|
||
|
|
||
|
# make unique index
|
||
|
if isinstance(index, RangeIndex):
|
||
|
# RangeIndex cannot have duplicates
|
||
|
unique_idx = index
|
||
|
else:
|
||
|
holder = type(index)
|
||
|
unique_values = list(set(index))
|
||
|
dtype = index.dtype if is_numeric_dtype(index) else None
|
||
|
unique_idx = holder(unique_values, dtype=dtype)
|
||
|
|
||
|
# check on unique index
|
||
|
expected_duplicated = np.array([False] * len(unique_idx), dtype="bool")
|
||
|
tm.assert_numpy_array_equal(unique_idx.duplicated(), expected_duplicated)
|
||
|
result_dropped = unique_idx.drop_duplicates()
|
||
|
tm.assert_index_equal(result_dropped, unique_idx)
|
||
|
# validate shallow copy
|
||
|
assert result_dropped is not unique_idx
|
||
|
|
||
|
def test_drop_duplicates_inplace(self, index):
|
||
|
msg = r"drop_duplicates\(\) got an unexpected keyword argument"
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
index.drop_duplicates(inplace=True)
|
||
|
|
||
|
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
||
|
def test_has_duplicates(self, index_flat):
|
||
|
# MultiIndex tested separately in:
|
||
|
# tests/indexes/multi/test_unique_and_duplicates.
|
||
|
index = index_flat
|
||
|
holder = type(index)
|
||
|
if not len(index) or isinstance(index, RangeIndex):
|
||
|
# MultiIndex tested separately in:
|
||
|
# tests/indexes/multi/test_unique_and_duplicates.
|
||
|
# RangeIndex is unique by definition.
|
||
|
pytest.skip("Skip check for empty Index, MultiIndex, and RangeIndex")
|
||
|
|
||
|
idx = holder([index[0]] * 5)
|
||
|
assert idx.is_unique is False
|
||
|
assert idx.has_duplicates is True
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype",
|
||
|
["int64", "uint64", "float64", "category", "datetime64[ns]", "timedelta64[ns]"],
|
||
|
)
|
||
|
def test_astype_preserves_name(self, index, dtype):
|
||
|
# https://github.com/pandas-dev/pandas/issues/32013
|
||
|
if isinstance(index, MultiIndex):
|
||
|
index.names = ["idx" + str(i) for i in range(index.nlevels)]
|
||
|
else:
|
||
|
index.name = "idx"
|
||
|
|
||
|
warn = None
|
||
|
if index.dtype.kind == "c" and dtype in ["float64", "int64", "uint64"]:
|
||
|
# imaginary components discarded
|
||
|
if np_version_gte1p25:
|
||
|
warn = np.exceptions.ComplexWarning
|
||
|
else:
|
||
|
warn = np.ComplexWarning
|
||
|
|
||
|
is_pyarrow_str = str(index.dtype) == "string[pyarrow]" and dtype == "category"
|
||
|
try:
|
||
|
# Some of these conversions cannot succeed so we use a try / except
|
||
|
with tm.assert_produces_warning(
|
||
|
warn,
|
||
|
raise_on_extra_warnings=is_pyarrow_str,
|
||
|
check_stacklevel=False,
|
||
|
):
|
||
|
result = index.astype(dtype)
|
||
|
except (ValueError, TypeError, NotImplementedError, SystemError):
|
||
|
return
|
||
|
|
||
|
if isinstance(index, MultiIndex):
|
||
|
assert result.names == index.names
|
||
|
else:
|
||
|
assert result.name == index.name
|
||
|
|
||
|
def test_hasnans_isnans(self, index_flat):
|
||
|
# GH#11343, added tests for hasnans / isnans
|
||
|
index = index_flat
|
||
|
|
||
|
# cases in indices doesn't include NaN
|
||
|
idx = index.copy(deep=True)
|
||
|
expected = np.array([False] * len(idx), dtype=bool)
|
||
|
tm.assert_numpy_array_equal(idx._isnan, expected)
|
||
|
assert idx.hasnans is False
|
||
|
|
||
|
idx = index.copy(deep=True)
|
||
|
values = idx._values
|
||
|
|
||
|
if len(index) == 0:
|
||
|
return
|
||
|
elif is_integer_dtype(index.dtype):
|
||
|
return
|
||
|
elif index.dtype == bool:
|
||
|
# values[1] = np.nan below casts to True!
|
||
|
return
|
||
|
|
||
|
values[1] = np.nan
|
||
|
|
||
|
idx = type(index)(values)
|
||
|
|
||
|
expected = np.array([False] * len(idx), dtype=bool)
|
||
|
expected[1] = True
|
||
|
tm.assert_numpy_array_equal(idx._isnan, expected)
|
||
|
assert idx.hasnans is True
|
||
|
|
||
|
|
||
|
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
||
|
@pytest.mark.parametrize("na_position", [None, "middle"])
|
||
|
def test_sort_values_invalid_na_position(index_with_missing, na_position):
|
||
|
with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
|
||
|
index_with_missing.sort_values(na_position=na_position)
|
||
|
|
||
|
|
||
|
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
||
|
@pytest.mark.parametrize("na_position", ["first", "last"])
|
||
|
def test_sort_values_with_missing(index_with_missing, na_position, request):
|
||
|
# GH 35584. Test that sort_values works with missing values,
|
||
|
# sort non-missing and place missing according to na_position
|
||
|
|
||
|
if isinstance(index_with_missing, CategoricalIndex):
|
||
|
request.applymarker(
|
||
|
pytest.mark.xfail(
|
||
|
reason="missing value sorting order not well-defined", strict=False
|
||
|
)
|
||
|
)
|
||
|
|
||
|
missing_count = np.sum(index_with_missing.isna())
|
||
|
not_na_vals = index_with_missing[index_with_missing.notna()].values
|
||
|
sorted_values = np.sort(not_na_vals)
|
||
|
if na_position == "first":
|
||
|
sorted_values = np.concatenate([[None] * missing_count, sorted_values])
|
||
|
else:
|
||
|
sorted_values = np.concatenate([sorted_values, [None] * missing_count])
|
||
|
|
||
|
# Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray
|
||
|
expected = type(index_with_missing)(sorted_values, dtype=index_with_missing.dtype)
|
||
|
|
||
|
result = index_with_missing.sort_values(na_position=na_position)
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_ndarray_compat_properties(index):
|
||
|
if isinstance(index, PeriodIndex) and not IS64:
|
||
|
pytest.skip("Overflow")
|
||
|
idx = index
|
||
|
assert idx.T.equals(idx)
|
||
|
assert idx.transpose().equals(idx)
|
||
|
|
||
|
values = idx.values
|
||
|
|
||
|
assert idx.shape == values.shape
|
||
|
assert idx.ndim == values.ndim
|
||
|
assert idx.size == values.size
|
||
|
|
||
|
if not isinstance(index, (RangeIndex, MultiIndex)):
|
||
|
# These two are not backed by an ndarray
|
||
|
assert idx.nbytes == values.nbytes
|
||
|
|
||
|
# test for validity
|
||
|
idx.nbytes
|
||
|
idx.values.nbytes
|
||
|
|
||
|
|
||
|
def test_compare_read_only_array():
|
||
|
# GH#57130
|
||
|
arr = np.array([], dtype=object)
|
||
|
arr.flags.writeable = False
|
||
|
idx = pd.Index(arr)
|
||
|
result = idx > 69
|
||
|
assert result.dtype == bool
|