Inzynierka/Lib/site-packages/pandas/tests/indexes/test_indexing.py

"""
test_indexing tests the following Index methods:
    __getitem__
    get_loc
    get_value
    __contains__
    take
    where
    get_indexer
    get_indexer_for
    slice_locs
    asof_locs

The corresponding tests.indexes.[index_type].test_indexing files
contain tests for the corresponding methods specific to those Index subclasses.
"""
import numpy as np
import pytest

from pandas.errors import InvalidIndexError

from pandas.core.dtypes.common import (
    is_float_dtype,
    is_scalar,
)

from pandas import (
    NA,
    DatetimeIndex,
    Index,
    IntervalIndex,
    MultiIndex,
    NaT,
    PeriodIndex,
    TimedeltaIndex,
)
import pandas._testing as tm


class TestTake:
    def test_take_invalid_kwargs(self, index):
        indices = [1, 2]

        msg = r"take\(\) got an unexpected keyword argument 'foo'"
        with pytest.raises(TypeError, match=msg):
            index.take(indices, foo=2)

        msg = "the 'out' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            index.take(indices, out=indices)

        msg = "the 'mode' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            index.take(indices, mode="clip")

    def test_take(self, index):
        indexer = [4, 3, 0, 2]
        if len(index) < 5:
            # not enough elements; ignore
            return

        result = index.take(indexer)
        expected = index[indexer]
        assert result.equals(expected)

        if not isinstance(index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
            # GH 10791
            msg = r"'(.*Index)' object has no attribute 'freq'"
            with pytest.raises(AttributeError, match=msg):
                index.freq

    def test_take_indexer_type(self):
        # GH#42875
        integer_index = Index([0, 1, 2, 3])
        scalar_index = 1
        msg = "Expected indices to be array-like"
        with pytest.raises(TypeError, match=msg):
            integer_index.take(scalar_index)

    def test_take_minus1_without_fill(self, index):
        # -1 does not get treated as NA unless allow_fill=True is passed
        if len(index) == 0:
            # Test is not applicable
            return

        result = index.take([0, 0, -1])

        expected = index.take([0, 0, len(index) - 1])
        tm.assert_index_equal(result, expected)


class TestContains:
    @pytest.mark.parametrize(
        "index,val",
        [
            (Index([0, 1, 2]), 2),
            (Index([0, 1, "2"]), "2"),
            (Index([0, 1, 2, np.inf, 4]), 4),
            (Index([0, 1, 2, np.nan, 4]), 4),
            (Index([0, 1, 2, np.inf]), np.inf),
            (Index([0, 1, 2, np.nan]), np.nan),
        ],
    )
    def test_index_contains(self, index, val):
        assert val in index

    @pytest.mark.parametrize(
        "index,val",
        [
            (Index([0, 1, 2]), "2"),
            (Index([0, 1, "2"]), 2),
            (Index([0, 1, 2, np.inf]), 4),
            (Index([0, 1, 2, np.nan]), 4),
            (Index([0, 1, 2, np.inf]), np.nan),
            (Index([0, 1, 2, np.nan]), np.inf),
            # Checking if np.inf in int64 Index should not cause an OverflowError
            # Related to GH 16957
            (Index([0, 1, 2], dtype=np.int64), np.inf),
            (Index([0, 1, 2], dtype=np.int64), np.nan),
            (Index([0, 1, 2], dtype=np.uint64), np.inf),
            (Index([0, 1, 2], dtype=np.uint64), np.nan),
        ],
    )
    def test_index_not_contains(self, index, val):
        assert val not in index

    @pytest.mark.parametrize(
        "index,val", [(Index([0, 1, "2"]), 0), (Index([0, 1, "2"]), "2")]
    )
    def test_mixed_index_contains(self, index, val):
        # GH#19860
        assert val in index

    @pytest.mark.parametrize(
        "index,val", [(Index([0, 1, "2"]), "1"), (Index([0, 1, "2"]), 2)]
    )
    def test_mixed_index_not_contains(self, index, val):
        # GH#19860
        assert val not in index

    def test_contains_with_float_index(self, any_real_numpy_dtype):
        # GH#22085
        dtype = any_real_numpy_dtype
        data = [0, 1, 2, 3] if not is_float_dtype(dtype) else [0.1, 1.1, 2.2, 3.3]
        index = Index(data, dtype=dtype)

        if not is_float_dtype(index.dtype):
            assert 1.1 not in index
            assert 1.0 in index
            assert 1 in index
        else:
            assert 1.1 in index
            assert 1.0 not in index
            assert 1 not in index

    def test_contains_requires_hashable_raises(self, index):
        if isinstance(index, MultiIndex):
            return  # TODO: do we want this to raise?

        msg = "unhashable type: 'list'"
        with pytest.raises(TypeError, match=msg):
            [] in index

        msg = "|".join(
            [
                r"unhashable type: 'dict'",
                r"must be real number, not dict",
                r"an integer is required",
                r"\{\}",
                r"pandas\._libs\.interval\.IntervalTree' is not iterable",
            ]
        )
        with pytest.raises(TypeError, match=msg):
            {} in index._engine


class TestGetLoc:
    def test_get_loc_non_hashable(self, index):
        # MultiIndex and Index raise TypeError, others InvalidIndexError

        with pytest.raises((TypeError, InvalidIndexError), match="slice"):
            index.get_loc(slice(0, 1))

    def test_get_loc_non_scalar_hashable(self, index):
        # GH52877
        from enum import Enum

        class E(Enum):
            X1 = "x1"

        assert not is_scalar(E.X1)

        exc = KeyError
        msg = "<E.X1: 'x1'>"
        if isinstance(
            index,
            (
                DatetimeIndex,
                TimedeltaIndex,
                PeriodIndex,
                IntervalIndex,
            ),
        ):
            # TODO: make these more consistent?
            exc = InvalidIndexError
            msg = "E.X1"
        with pytest.raises(exc, match=msg):
            index.get_loc(E.X1)

    def test_get_loc_generator(self, index):
        exc = KeyError
        if isinstance(
            index,
            (
                DatetimeIndex,
                TimedeltaIndex,
                PeriodIndex,
                IntervalIndex,
                MultiIndex,
            ),
        ):
            # TODO: make these more consistent?
            exc = InvalidIndexError
        with pytest.raises(exc, match="generator object"):
            # MultiIndex specifically checks for generator; others for scalar
            index.get_loc(x for x in range(5))

    def test_get_loc_masked_duplicated_na(self):
        # GH#48411
        idx = Index([1, 2, NA, NA], dtype="Int64")
        result = idx.get_loc(NA)
        expected = np.array([False, False, True, True])
        tm.assert_numpy_array_equal(result, expected)


class TestGetIndexer:
    def test_get_indexer_base(self, index):
        if index._index_as_unique:
            expected = np.arange(index.size, dtype=np.intp)
            actual = index.get_indexer(index)
            tm.assert_numpy_array_equal(expected, actual)
        else:
            msg = "Reindexing only valid with uniquely valued Index objects"
            with pytest.raises(InvalidIndexError, match=msg):
                index.get_indexer(index)

        with pytest.raises(ValueError, match="Invalid fill method"):
            index.get_indexer(index, method="invalid")

    def test_get_indexer_consistency(self, index):
        # See GH#16819

        if index._index_as_unique:
            indexer = index.get_indexer(index[0:2])
            assert isinstance(indexer, np.ndarray)
            assert indexer.dtype == np.intp
        else:
            msg = "Reindexing only valid with uniquely valued Index objects"
            with pytest.raises(InvalidIndexError, match=msg):
                index.get_indexer(index[0:2])

        indexer, _ = index.get_indexer_non_unique(index[0:2])
        assert isinstance(indexer, np.ndarray)
        assert indexer.dtype == np.intp

    def test_get_indexer_masked_duplicated_na(self):
        # GH#48411
        idx = Index([1, 2, NA, NA], dtype="Int64")
        result = idx.get_indexer_for(Index([1, NA], dtype="Int64"))
        expected = np.array([0, 2, 3], dtype=result.dtype)
        tm.assert_numpy_array_equal(result, expected)


class TestConvertSliceIndexer:
    def test_convert_almost_null_slice(self, index):
        # slice with None at both ends, but not step

        key = slice(None, None, "foo")

        if isinstance(index, IntervalIndex):
            msg = "label-based slicing with step!=1 is not supported for IntervalIndex"
            with pytest.raises(ValueError, match=msg):
                index._convert_slice_indexer(key, "loc")
        else:
            msg = "'>=' not supported between instances of 'str' and 'int'"
            with pytest.raises(TypeError, match=msg):
                index._convert_slice_indexer(key, "loc")


class TestPutmask:
    def test_putmask_with_wrong_mask(self, index):
        # GH#18368
        if not len(index):
            return

        fill = index[0]

        msg = "putmask: mask and data must be the same size"
        with pytest.raises(ValueError, match=msg):
            index.putmask(np.ones(len(index) + 1, np.bool_), fill)

        with pytest.raises(ValueError, match=msg):
            index.putmask(np.ones(len(index) - 1, np.bool_), fill)

        with pytest.raises(ValueError, match=msg):
            index.putmask("foo", fill)


@pytest.mark.parametrize(
    "idx", [Index([1, 2, 3]), Index([0.1, 0.2, 0.3]), Index(["a", "b", "c"])]
)
def test_getitem_deprecated_float(idx):
    # https://github.com/pandas-dev/pandas/issues/34191

    msg = "Indexing with a float is no longer supported"
    with pytest.raises(IndexError, match=msg):
        idx[1.0]


@pytest.mark.parametrize(
    "idx,target,expected",
    [
        ([np.nan, "var1", np.nan], [np.nan], np.array([0, 2], dtype=np.intp)),
        (
            [np.nan, "var1", np.nan],
            [np.nan, "var1"],
            np.array([0, 2, 1], dtype=np.intp),
        ),
        (
            np.array([np.nan, "var1", np.nan], dtype=object),
            [np.nan],
            np.array([0, 2], dtype=np.intp),
        ),
        (
            DatetimeIndex(["2020-08-05", NaT, NaT]),
            [NaT],
            np.array([1, 2], dtype=np.intp),
        ),
        (["a", "b", "a", np.nan], [np.nan], np.array([3], dtype=np.intp)),
        (
            np.array(["b", np.nan, float("NaN"), "b"], dtype=object),
            Index([np.nan], dtype=object),
            np.array([1, 2], dtype=np.intp),
        ),
    ],
)
def test_get_indexer_non_unique_multiple_nans(idx, target, expected):
    # GH 35392
    axis = Index(idx)
    actual = axis.get_indexer_for(target)
    tm.assert_numpy_array_equal(actual, expected)


def test_get_indexer_non_unique_nans_in_object_dtype_target(nulls_fixture):
    idx = Index([1.0, 2.0])
    target = Index([1, nulls_fixture], dtype="object")

    result_idx, result_missing = idx.get_indexer_non_unique(target)
    tm.assert_numpy_array_equal(result_idx, np.array([0, -1], dtype=np.intp))
    tm.assert_numpy_array_equal(result_missing, np.array([1], dtype=np.intp))
first commit 2023-06-02 12:51:02 +02:00			`"""`
			`test_indexing tests the following Index methods:`
			`__getitem__`
			`get_loc`
			`get_value`
			`__contains__`
			`take`
			`where`
			`get_indexer`
			`get_indexer_for`
			`slice_locs`
			`asof_locs`

			`The corresponding tests.indexes.[index_type].test_indexing files`
			`contain tests for the corresponding methods specific to those Index subclasses.`
			`"""`
			`import numpy as np`
			`import pytest`

			`from pandas.errors import InvalidIndexError`

			`from pandas.core.dtypes.common import (`
			`is_float_dtype,`
			`is_scalar,`
			`)`

			`from pandas import (`
			`NA,`
			`DatetimeIndex,`
			`Index,`
			`IntervalIndex,`
			`MultiIndex,`
			`NaT,`
			`PeriodIndex,`
			`TimedeltaIndex,`
			`)`
			`import pandas._testing as tm`


			`class TestTake:`
			`def test_take_invalid_kwargs(self, index):`
			`indices = [1, 2]`

			`msg = r"take\(\) got an unexpected keyword argument 'foo'"`
			`with pytest.raises(TypeError, match=msg):`
			`index.take(indices, foo=2)`

			`msg = "the 'out' parameter is not supported"`
			`with pytest.raises(ValueError, match=msg):`
			`index.take(indices, out=indices)`

			`msg = "the 'mode' parameter is not supported"`
			`with pytest.raises(ValueError, match=msg):`
			`index.take(indices, mode="clip")`

			`def test_take(self, index):`
			`indexer = [4, 3, 0, 2]`
			`if len(index) < 5:`
			`# not enough elements; ignore`
			`return`

			`result = index.take(indexer)`
			`expected = index[indexer]`
			`assert result.equals(expected)`

			`if not isinstance(index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)):`
			`# GH 10791`
			`msg = r"'(.*Index)' object has no attribute 'freq'"`
			`with pytest.raises(AttributeError, match=msg):`
			`index.freq`

			`def test_take_indexer_type(self):`
			`# GH#42875`
			`integer_index = Index([0, 1, 2, 3])`
			`scalar_index = 1`
			`msg = "Expected indices to be array-like"`
			`with pytest.raises(TypeError, match=msg):`
			`integer_index.take(scalar_index)`

			`def test_take_minus1_without_fill(self, index):`
			`# -1 does not get treated as NA unless allow_fill=True is passed`
			`if len(index) == 0:`
			`# Test is not applicable`
			`return`

			`result = index.take([0, 0, -1])`

			`expected = index.take([0, 0, len(index) - 1])`
			`tm.assert_index_equal(result, expected)`


			`class TestContains:`
			`@pytest.mark.parametrize(`
			`"index,val",`
			`[`
			`(Index([0, 1, 2]), 2),`
			`(Index([0, 1, "2"]), "2"),`
			`(Index([0, 1, 2, np.inf, 4]), 4),`
			`(Index([0, 1, 2, np.nan, 4]), 4),`
			`(Index([0, 1, 2, np.inf]), np.inf),`
			`(Index([0, 1, 2, np.nan]), np.nan),`
			`],`
			`)`
			`def test_index_contains(self, index, val):`
			`assert val in index`

			`@pytest.mark.parametrize(`
			`"index,val",`
			`[`
			`(Index([0, 1, 2]), "2"),`
			`(Index([0, 1, "2"]), 2),`
			`(Index([0, 1, 2, np.inf]), 4),`
			`(Index([0, 1, 2, np.nan]), 4),`
			`(Index([0, 1, 2, np.inf]), np.nan),`
			`(Index([0, 1, 2, np.nan]), np.inf),`
			`# Checking if np.inf in int64 Index should not cause an OverflowError`
			`# Related to GH 16957`
			`(Index([0, 1, 2], dtype=np.int64), np.inf),`
			`(Index([0, 1, 2], dtype=np.int64), np.nan),`
			`(Index([0, 1, 2], dtype=np.uint64), np.inf),`
			`(Index([0, 1, 2], dtype=np.uint64), np.nan),`
			`],`
			`)`
			`def test_index_not_contains(self, index, val):`
			`assert val not in index`

			`@pytest.mark.parametrize(`
			`"index,val", [(Index([0, 1, "2"]), 0), (Index([0, 1, "2"]), "2")]`
			`)`
			`def test_mixed_index_contains(self, index, val):`
			`# GH#19860`
			`assert val in index`

			`@pytest.mark.parametrize(`
			`"index,val", [(Index([0, 1, "2"]), "1"), (Index([0, 1, "2"]), 2)]`
			`)`
			`def test_mixed_index_not_contains(self, index, val):`
			`# GH#19860`
			`assert val not in index`

			`def test_contains_with_float_index(self, any_real_numpy_dtype):`
			`# GH#22085`
			`dtype = any_real_numpy_dtype`
			`data = [0, 1, 2, 3] if not is_float_dtype(dtype) else [0.1, 1.1, 2.2, 3.3]`
			`index = Index(data, dtype=dtype)`

			`if not is_float_dtype(index.dtype):`
			`assert 1.1 not in index`
			`assert 1.0 in index`
			`assert 1 in index`
			`else:`
			`assert 1.1 in index`
			`assert 1.0 not in index`
			`assert 1 not in index`

			`def test_contains_requires_hashable_raises(self, index):`
			`if isinstance(index, MultiIndex):`
			`return # TODO: do we want this to raise?`

			`msg = "unhashable type: 'list'"`
			`with pytest.raises(TypeError, match=msg):`
			`[] in index`

			`msg = "\|".join(`
			`[`
			`r"unhashable type: 'dict'",`
			`r"must be real number, not dict",`
			`r"an integer is required",`
			`r"\{\}",`
			`r"pandas\._libs\.interval\.IntervalTree' is not iterable",`
			`]`
			`)`
			`with pytest.raises(TypeError, match=msg):`
			`{} in index._engine`


			`class TestGetLoc:`
			`def test_get_loc_non_hashable(self, index):`
			`# MultiIndex and Index raise TypeError, others InvalidIndexError`

			`with pytest.raises((TypeError, InvalidIndexError), match="slice"):`
			`index.get_loc(slice(0, 1))`

			`def test_get_loc_non_scalar_hashable(self, index):`
			`# GH52877`
			`from enum import Enum`

			`class E(Enum):`
			`X1 = "x1"`

			`assert not is_scalar(E.X1)`

			`exc = KeyError`
			`msg = "<E.X1: 'x1'>"`
			`if isinstance(`
			`index,`
			`(`
			`DatetimeIndex,`
			`TimedeltaIndex,`
			`PeriodIndex,`
			`IntervalIndex,`
			`),`
			`):`
			`# TODO: make these more consistent?`
			`exc = InvalidIndexError`
			`msg = "E.X1"`
			`with pytest.raises(exc, match=msg):`
			`index.get_loc(E.X1)`

			`def test_get_loc_generator(self, index):`
			`exc = KeyError`
			`if isinstance(`
			`index,`
			`(`
			`DatetimeIndex,`
			`TimedeltaIndex,`
			`PeriodIndex,`
			`IntervalIndex,`
			`MultiIndex,`
			`),`
			`):`
			`# TODO: make these more consistent?`
			`exc = InvalidIndexError`
			`with pytest.raises(exc, match="generator object"):`
			`# MultiIndex specifically checks for generator; others for scalar`
			`index.get_loc(x for x in range(5))`

			`def test_get_loc_masked_duplicated_na(self):`
			`# GH#48411`
			`idx = Index([1, 2, NA, NA], dtype="Int64")`
			`result = idx.get_loc(NA)`
			`expected = np.array([False, False, True, True])`
			`tm.assert_numpy_array_equal(result, expected)`


			`class TestGetIndexer:`
			`def test_get_indexer_base(self, index):`
			`if index._index_as_unique:`
			`expected = np.arange(index.size, dtype=np.intp)`
			`actual = index.get_indexer(index)`
			`tm.assert_numpy_array_equal(expected, actual)`
			`else:`
			`msg = "Reindexing only valid with uniquely valued Index objects"`
			`with pytest.raises(InvalidIndexError, match=msg):`
			`index.get_indexer(index)`

			`with pytest.raises(ValueError, match="Invalid fill method"):`
			`index.get_indexer(index, method="invalid")`

			`def test_get_indexer_consistency(self, index):`
			`# See GH#16819`

			`if index._index_as_unique:`
			`indexer = index.get_indexer(index[0:2])`
			`assert isinstance(indexer, np.ndarray)`
			`assert indexer.dtype == np.intp`
			`else:`
			`msg = "Reindexing only valid with uniquely valued Index objects"`
			`with pytest.raises(InvalidIndexError, match=msg):`
			`index.get_indexer(index[0:2])`

			`indexer, _ = index.get_indexer_non_unique(index[0:2])`
			`assert isinstance(indexer, np.ndarray)`
			`assert indexer.dtype == np.intp`

			`def test_get_indexer_masked_duplicated_na(self):`
			`# GH#48411`
			`idx = Index([1, 2, NA, NA], dtype="Int64")`
			`result = idx.get_indexer_for(Index([1, NA], dtype="Int64"))`
			`expected = np.array([0, 2, 3], dtype=result.dtype)`
			`tm.assert_numpy_array_equal(result, expected)`


			`class TestConvertSliceIndexer:`
			`def test_convert_almost_null_slice(self, index):`
			`# slice with None at both ends, but not step`

			`key = slice(None, None, "foo")`

			`if isinstance(index, IntervalIndex):`
			`msg = "label-based slicing with step!=1 is not supported for IntervalIndex"`
			`with pytest.raises(ValueError, match=msg):`
			`index._convert_slice_indexer(key, "loc")`
			`else:`
			`msg = "'>=' not supported between instances of 'str' and 'int'"`
			`with pytest.raises(TypeError, match=msg):`
			`index._convert_slice_indexer(key, "loc")`


			`class TestPutmask:`
			`def test_putmask_with_wrong_mask(self, index):`
			`# GH#18368`
			`if not len(index):`
			`return`

			`fill = index[0]`

			`msg = "putmask: mask and data must be the same size"`
			`with pytest.raises(ValueError, match=msg):`
			`index.putmask(np.ones(len(index) + 1, np.bool_), fill)`

			`with pytest.raises(ValueError, match=msg):`
			`index.putmask(np.ones(len(index) - 1, np.bool_), fill)`

			`with pytest.raises(ValueError, match=msg):`
			`index.putmask("foo", fill)`


			`@pytest.mark.parametrize(`
			`"idx", [Index([1, 2, 3]), Index([0.1, 0.2, 0.3]), Index(["a", "b", "c"])]`
			`)`
			`def test_getitem_deprecated_float(idx):`
			`# https://github.com/pandas-dev/pandas/issues/34191`

			`msg = "Indexing with a float is no longer supported"`
			`with pytest.raises(IndexError, match=msg):`
			`idx[1.0]`


			`@pytest.mark.parametrize(`
			`"idx,target,expected",`
			`[`
			`([np.nan, "var1", np.nan], [np.nan], np.array([0, 2], dtype=np.intp)),`
			`(`
			`[np.nan, "var1", np.nan],`
			`[np.nan, "var1"],`
			`np.array([0, 2, 1], dtype=np.intp),`
			`),`
			`(`
			`np.array([np.nan, "var1", np.nan], dtype=object),`
			`[np.nan],`
			`np.array([0, 2], dtype=np.intp),`
			`),`
			`(`
			`DatetimeIndex(["2020-08-05", NaT, NaT]),`
			`[NaT],`
			`np.array([1, 2], dtype=np.intp),`
			`),`
			`(["a", "b", "a", np.nan], [np.nan], np.array([3], dtype=np.intp)),`
			`(`
			`np.array(["b", np.nan, float("NaN"), "b"], dtype=object),`
			`Index([np.nan], dtype=object),`
			`np.array([1, 2], dtype=np.intp),`
			`),`
			`],`
			`)`
			`def test_get_indexer_non_unique_multiple_nans(idx, target, expected):`
			`# GH 35392`
			`axis = Index(idx)`
			`actual = axis.get_indexer_for(target)`
			`tm.assert_numpy_array_equal(actual, expected)`


			`def test_get_indexer_non_unique_nans_in_object_dtype_target(nulls_fixture):`
			`idx = Index([1.0, 2.0])`
			`target = Index([1, nulls_fixture], dtype="object")`

			`result_idx, result_missing = idx.get_indexer_non_unique(target)`
			`tm.assert_numpy_array_equal(result_idx, np.array([0, -1], dtype=np.intp))`
			`tm.assert_numpy_array_equal(result_missing, np.array([1], dtype=np.intp))`