""" Collection of tests asserting things that should be true for any index subclass. Makes use of the `indices` fixture defined in pandas/tests/indexes/conftest.py. """ import re import numpy as np import pytest from pandas._libs.tslibs import iNaT from pandas.core.dtypes.common import is_period_dtype, needs_i8_conversion import pandas as pd from pandas import ( CategoricalIndex, DatetimeIndex, Int64Index, MultiIndex, PeriodIndex, RangeIndex, TimedeltaIndex, UInt64Index, ) import pandas._testing as tm class TestCommon: def test_droplevel(self, index): # GH 21115 if isinstance(index, MultiIndex): # Tested separately in test_multi.py return assert index.droplevel([]).equals(index) for level in index.name, [index.name]: if isinstance(index.name, tuple) and level is index.name: # GH 21121 : droplevel with tuple name continue with pytest.raises(ValueError): index.droplevel(level) for level in "wrong", ["wrong"]: with pytest.raises( KeyError, match=r"'Requested level \(wrong\) does not match index name \(None\)'", ): index.droplevel(level) def test_constructor_non_hashable_name(self, index): # GH 20527 if isinstance(index, MultiIndex): pytest.skip("multiindex handled in test_multi.py") message = "Index.name must be a hashable type" renamed = [["1"]] # With .rename() with pytest.raises(TypeError, match=message): index.rename(name=renamed) # With .set_names() with pytest.raises(TypeError, match=message): index.set_names(names=renamed) def test_constructor_unwraps_index(self, index): if isinstance(index, pd.MultiIndex): raise pytest.skip("MultiIndex has no ._data") a = index b = type(a)(a) tm.assert_equal(a._data, b._data) @pytest.mark.parametrize("itm", [101, "no_int"]) # FutureWarning from non-tuple sequence of nd indexing @pytest.mark.filterwarnings("ignore::FutureWarning") def test_getitem_error(self, index, itm): with pytest.raises(IndexError): index[itm] def test_to_flat_index(self, index): # 22866 if isinstance(index, MultiIndex): pytest.skip("Separate expectation for MultiIndex") result = index.to_flat_index() tm.assert_index_equal(result, index) def test_set_name_methods(self, index): new_name = "This is the new name for this index" # don't tests a MultiIndex here (as its tested separated) if isinstance(index, MultiIndex): pytest.skip("Skip check for MultiIndex") original_name = index.name new_ind = index.set_names([new_name]) assert new_ind.name == new_name assert index.name == original_name res = index.rename(new_name, inplace=True) # should return None assert res is None assert index.name == new_name assert index.names == [new_name] # FIXME: dont leave commented-out # with pytest.raises(TypeError, match="list-like"): # # should still fail even if it would be the right length # ind.set_names("a") with pytest.raises(ValueError, match="Level must be None"): index.set_names("a", level=0) # rename in place just leaves tuples and other containers alone name = ("A", "B") index.rename(name, inplace=True) assert index.name == name assert index.names == [name] def test_copy_and_deepcopy(self, index): from copy import copy, deepcopy if isinstance(index, MultiIndex): pytest.skip("Skip check for MultiIndex") for func in (copy, deepcopy): idx_copy = func(index) assert idx_copy is not index assert idx_copy.equals(index) new_copy = index.copy(deep=True, name="banana") assert new_copy.name == "banana" def test_unique(self, index): # don't test a MultiIndex here (as its tested separated) # don't test a CategoricalIndex because categories change (GH 18291) if isinstance(index, (MultiIndex, CategoricalIndex)): pytest.skip("Skip check for MultiIndex/CategoricalIndex") # GH 17896 expected = index.drop_duplicates() for level in 0, index.name, None: result = index.unique(level=level) tm.assert_index_equal(result, expected) msg = "Too many levels: Index has only 1 level, not 4" with pytest.raises(IndexError, match=msg): index.unique(level=3) msg = ( fr"Requested level \(wrong\) does not match index name " fr"\({re.escape(index.name.__repr__())}\)" ) with pytest.raises(KeyError, match=msg): index.unique(level="wrong") def test_get_unique_index(self, index): # MultiIndex tested separately if not len(index) or isinstance(index, MultiIndex): pytest.skip("Skip check for empty Index and MultiIndex") idx = index[[0] * 5] idx_unique = index[[0]] # We test against `idx_unique`, so first we make sure it's unique # and doesn't contain nans. assert idx_unique.is_unique is True try: assert idx_unique.hasnans is False except NotImplementedError: pass for dropna in [False, True]: result = idx._get_unique_index(dropna=dropna) tm.assert_index_equal(result, idx_unique) # nans: if not index._can_hold_na: pytest.skip("Skip na-check if index cannot hold na") if is_period_dtype(index.dtype): vals = index[[0] * 5]._data vals[0] = pd.NaT elif needs_i8_conversion(index.dtype): vals = index.asi8[[0] * 5] vals[0] = iNaT else: vals = index.values[[0] * 5] vals[0] = np.nan vals_unique = vals[:2] if index.dtype.kind in ["m", "M"]: # i.e. needs_i8_conversion but not period_dtype, as above vals = type(index._data)._simple_new(vals, dtype=index.dtype) vals_unique = type(index._data)._simple_new(vals_unique, dtype=index.dtype) idx_nan = index._shallow_copy(vals) idx_unique_nan = index._shallow_copy(vals_unique) assert idx_unique_nan.is_unique is True assert idx_nan.dtype == index.dtype assert idx_unique_nan.dtype == index.dtype for dropna, expected in zip([False, True], [idx_unique_nan, idx_unique]): for i in [idx_nan, idx_unique_nan]: result = i._get_unique_index(dropna=dropna) tm.assert_index_equal(result, expected) def test_view(self, index): assert index.view().name == index.name def test_searchsorted_monotonic(self, index): # GH17271 # not implemented for tuple searches in MultiIndex # or Intervals searches in IntervalIndex if isinstance(index, (MultiIndex, pd.IntervalIndex)): pytest.skip("Skip check for MultiIndex/IntervalIndex") # nothing to test if the index is empty if index.empty: pytest.skip("Skip check for empty Index") value = index[0] # determine the expected results (handle dupes for 'right') expected_left, expected_right = 0, (index == value).argmin() if expected_right == 0: # all values are the same, expected_right should be length expected_right = len(index) # test _searchsorted_monotonic in all cases # test searchsorted only for increasing if index.is_monotonic_increasing: ssm_left = index._searchsorted_monotonic(value, side="left") assert expected_left == ssm_left ssm_right = index._searchsorted_monotonic(value, side="right") assert expected_right == ssm_right ss_left = index.searchsorted(value, side="left") assert expected_left == ss_left ss_right = index.searchsorted(value, side="right") assert expected_right == ss_right elif index.is_monotonic_decreasing: ssm_left = index._searchsorted_monotonic(value, side="left") assert expected_left == ssm_left ssm_right = index._searchsorted_monotonic(value, side="right") assert expected_right == ssm_right else: # non-monotonic should raise. with pytest.raises(ValueError): index._searchsorted_monotonic(value, side="left") def test_pickle(self, index): original_name, index.name = index.name, "foo" unpickled = tm.round_trip_pickle(index) assert index.equals(unpickled) index.name = original_name def test_drop_duplicates(self, index, keep): if isinstance(index, MultiIndex): pytest.skip("MultiIndex is tested separately") if isinstance(index, RangeIndex): pytest.skip( "RangeIndex is tested in test_drop_duplicates_no_duplicates " "as it cannot hold duplicates" ) if len(index) == 0: pytest.skip( "empty index is tested in test_drop_duplicates_no_duplicates " "as it cannot hold duplicates" ) # make unique index holder = type(index) unique_values = list(set(index)) unique_idx = holder(unique_values) # make duplicated index n = len(unique_idx) duplicated_selection = np.random.choice(n, int(n * 1.5)) idx = holder(unique_idx.values[duplicated_selection]) # Series.duplicated is tested separately expected_duplicated = ( pd.Series(duplicated_selection).duplicated(keep=keep).values ) tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected_duplicated) # Series.drop_duplicates is tested separately expected_dropped = holder(pd.Series(idx).drop_duplicates(keep=keep)) tm.assert_index_equal(idx.drop_duplicates(keep=keep), expected_dropped) def test_drop_duplicates_no_duplicates(self, index): if isinstance(index, MultiIndex): pytest.skip("MultiIndex is tested separately") # make unique index if isinstance(index, RangeIndex): # RangeIndex cannot have duplicates unique_idx = index else: holder = type(index) unique_values = list(set(index)) unique_idx = holder(unique_values) # check on unique index expected_duplicated = np.array([False] * len(unique_idx), dtype="bool") tm.assert_numpy_array_equal(unique_idx.duplicated(), expected_duplicated) result_dropped = unique_idx.drop_duplicates() tm.assert_index_equal(result_dropped, unique_idx) # validate shallow copy assert result_dropped is not unique_idx def test_drop_duplicates_inplace(self, index): msg = r"drop_duplicates\(\) got an unexpected keyword argument" with pytest.raises(TypeError, match=msg): index.drop_duplicates(inplace=True) def test_has_duplicates(self, index): holder = type(index) if not len(index) or isinstance(index, (MultiIndex, RangeIndex)): # MultiIndex tested separately in: # tests/indexes/multi/test_unique_and_duplicates. # RangeIndex is unique by definition. pytest.skip("Skip check for empty Index, MultiIndex, and RangeIndex") idx = holder([index[0]] * 5) assert idx.is_unique is False assert idx.has_duplicates is True @pytest.mark.parametrize( "dtype", ["int64", "uint64", "float64", "category", "datetime64[ns]", "timedelta64[ns]"], ) def test_astype_preserves_name(self, index, dtype): # https://github.com/pandas-dev/pandas/issues/32013 if isinstance(index, MultiIndex): index.names = ["idx" + str(i) for i in range(index.nlevels)] else: index.name = "idx" try: # Some of these conversions cannot succeed so we use a try / except result = index.astype(dtype) except (ValueError, TypeError, NotImplementedError, SystemError): return if isinstance(index, MultiIndex): assert result.names == index.names else: assert result.name == index.name def test_ravel_deprecation(self, index): # GH#19956 ravel returning ndarray is deprecated with tm.assert_produces_warning(FutureWarning): index.ravel() def test_asi8_deprecation(self, index): # GH#37877 if isinstance( index, (Int64Index, UInt64Index, DatetimeIndex, TimedeltaIndex, PeriodIndex) ): warn = None else: warn = FutureWarning with tm.assert_produces_warning(warn): index.asi8 @pytest.mark.parametrize("na_position", [None, "middle"]) def test_sort_values_invalid_na_position(index_with_missing, na_position): if isinstance(index_with_missing, (CategoricalIndex, MultiIndex)): pytest.xfail("missing value sorting order not defined for index type") if na_position not in ["first", "last"]: with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"): index_with_missing.sort_values(na_position=na_position) @pytest.mark.parametrize("na_position", ["first", "last"]) def test_sort_values_with_missing(index_with_missing, na_position): # GH 35584. Test that sort_values works with missing values, # sort non-missing and place missing according to na_position if isinstance(index_with_missing, (CategoricalIndex, MultiIndex)): pytest.xfail("missing value sorting order not defined for index type") missing_count = np.sum(index_with_missing.isna()) not_na_vals = index_with_missing[index_with_missing.notna()].values sorted_values = np.sort(not_na_vals) if na_position == "first": sorted_values = np.concatenate([[None] * missing_count, sorted_values]) else: sorted_values = np.concatenate([sorted_values, [None] * missing_count]) expected = type(index_with_missing)(sorted_values) result = index_with_missing.sort_values(na_position=na_position) tm.assert_index_equal(result, expected)