Inzynierka/Lib/site-packages/pandas/tests/indexes/object/test_indexing.py

190 lines
7.6 KiB
Python
Raw Normal View History

2023-06-02 12:51:02 +02:00
from decimal import Decimal
import numpy as np
import pytest
from pandas._libs.missing import is_matching_na
import pandas as pd
from pandas import Index
import pandas._testing as tm
class TestGetIndexer:
@pytest.mark.parametrize(
"method,expected",
[
("pad", np.array([-1, 0, 1, 1], dtype=np.intp)),
("backfill", np.array([0, 0, 1, -1], dtype=np.intp)),
],
)
def test_get_indexer_strings(self, method, expected):
index = Index(["b", "c"])
actual = index.get_indexer(["a", "b", "c", "d"], method=method)
tm.assert_numpy_array_equal(actual, expected)
def test_get_indexer_strings_raises(self):
index = Index(["b", "c"])
msg = r"unsupported operand type\(s\) for -: 'str' and 'str'"
with pytest.raises(TypeError, match=msg):
index.get_indexer(["a", "b", "c", "d"], method="nearest")
with pytest.raises(TypeError, match=msg):
index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)
with pytest.raises(TypeError, match=msg):
index.get_indexer(
["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
)
def test_get_indexer_with_NA_values(
self, unique_nulls_fixture, unique_nulls_fixture2
):
# GH#22332
# check pairwise, that no pair of na values
# is mangled
if unique_nulls_fixture is unique_nulls_fixture2:
return # skip it, values are not unique
arr = np.array([unique_nulls_fixture, unique_nulls_fixture2], dtype=object)
index = Index(arr, dtype=object)
result = index.get_indexer(
[unique_nulls_fixture, unique_nulls_fixture2, "Unknown"]
)
expected = np.array([0, 1, -1], dtype=np.intp)
tm.assert_numpy_array_equal(result, expected)
class TestGetIndexerNonUnique:
def test_get_indexer_non_unique_nas(self, nulls_fixture):
# even though this isn't non-unique, this should still work
index = Index(["a", "b", nulls_fixture])
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
expected_indexer = np.array([2], dtype=np.intp)
expected_missing = np.array([], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected_indexer)
tm.assert_numpy_array_equal(missing, expected_missing)
# actually non-unique
index = Index(["a", nulls_fixture, "b", nulls_fixture])
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
expected_indexer = np.array([1, 3], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected_indexer)
tm.assert_numpy_array_equal(missing, expected_missing)
# matching-but-not-identical nans
if is_matching_na(nulls_fixture, float("NaN")):
index = Index(["a", float("NaN"), "b", float("NaN")])
match_but_not_identical = True
elif is_matching_na(nulls_fixture, Decimal("NaN")):
index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")])
match_but_not_identical = True
else:
match_but_not_identical = False
if match_but_not_identical:
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
expected_indexer = np.array([1, 3], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected_indexer)
tm.assert_numpy_array_equal(missing, expected_missing)
@pytest.mark.filterwarnings("ignore:elementwise comp:DeprecationWarning")
def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2):
expected_missing = np.array([], dtype=np.intp)
# matching-but-not-identical nats
if is_matching_na(np_nat_fixture, np_nat_fixture2):
# ensure nats are different objects
index = Index(
np.array(
["2021-10-02", np_nat_fixture.copy(), np_nat_fixture2.copy()],
dtype=object,
),
dtype=object,
)
# pass as index to prevent target from being casted to DatetimeIndex
indexer, missing = index.get_indexer_non_unique(
Index([np_nat_fixture], dtype=object)
)
expected_indexer = np.array([1, 2], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected_indexer)
tm.assert_numpy_array_equal(missing, expected_missing)
# dt64nat vs td64nat
else:
try:
np_nat_fixture == np_nat_fixture2
except (TypeError, OverflowError):
# Numpy will raise on uncomparable types, like
# np.datetime64('NaT', 'Y') and np.datetime64('NaT', 'ps')
# https://github.com/numpy/numpy/issues/22762
return
index = Index(
np.array(
[
"2021-10-02",
np_nat_fixture,
np_nat_fixture2,
np_nat_fixture,
np_nat_fixture2,
],
dtype=object,
),
dtype=object,
)
# pass as index to prevent target from being casted to DatetimeIndex
indexer, missing = index.get_indexer_non_unique(
Index([np_nat_fixture], dtype=object)
)
expected_indexer = np.array([1, 3], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected_indexer)
tm.assert_numpy_array_equal(missing, expected_missing)
class TestSliceLocs:
@pytest.mark.parametrize(
"in_slice,expected",
[
# error: Slice index must be an integer or None
(pd.IndexSlice[::-1], "yxdcb"),
(pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc]
(pd.IndexSlice["b"::-1], "b"), # type: ignore[misc]
(pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc]
(pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc]
# absent labels
(pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc]
(pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc]
(pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc]
(pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc]
(pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc]
(pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc]
(pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc]
],
)
def test_slice_locs_negative_step(self, in_slice, expected):
index = Index(list("bcdxy"))
s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
result = index[s_start : s_stop : in_slice.step]
expected = Index(list(expected))
tm.assert_index_equal(result, expected)
def test_slice_locs_dup(self):
index = Index(["a", "a", "b", "c", "d", "d"])
assert index.slice_locs("a", "d") == (0, 6)
assert index.slice_locs(end="d") == (0, 6)
assert index.slice_locs("a", "c") == (0, 4)
assert index.slice_locs("b", "d") == (2, 6)
index2 = index[::-1]
assert index2.slice_locs("d", "a") == (0, 6)
assert index2.slice_locs(end="a") == (0, 6)
assert index2.slice_locs("d", "b") == (0, 4)
assert index2.slice_locs("c", "a") == (2, 6)