projektAI/venv/Lib/site-packages/pandas/tests/indexes/multi/test_indexing.py
2021-06-06 22:13:05 +02:00

850 lines
30 KiB
Python

from datetime import timedelta
import numpy as np
import pytest
from pandas.errors import InvalidIndexError, PerformanceWarning
import pandas as pd
from pandas import Categorical, Index, MultiIndex, date_range
import pandas._testing as tm
class TestSliceLocs:
def test_slice_locs_partial(self, idx):
sorted_idx, _ = idx.sortlevel(0)
result = sorted_idx.slice_locs(("foo", "two"), ("qux", "one"))
assert result == (1, 5)
result = sorted_idx.slice_locs(None, ("qux", "one"))
assert result == (0, 5)
result = sorted_idx.slice_locs(("foo", "two"), None)
assert result == (1, len(sorted_idx))
result = sorted_idx.slice_locs("bar", "baz")
assert result == (2, 4)
def test_slice_locs(self):
df = tm.makeTimeDataFrame()
stacked = df.stack()
idx = stacked.index
slob = slice(*idx.slice_locs(df.index[5], df.index[15]))
sliced = stacked[slob]
expected = df[5:16].stack()
tm.assert_almost_equal(sliced.values, expected.values)
slob = slice(
*idx.slice_locs(
df.index[5] + timedelta(seconds=30),
df.index[15] - timedelta(seconds=30),
)
)
sliced = stacked[slob]
expected = df[6:15].stack()
tm.assert_almost_equal(sliced.values, expected.values)
def test_slice_locs_with_type_mismatch(self):
df = tm.makeTimeDataFrame()
stacked = df.stack()
idx = stacked.index
with pytest.raises(TypeError, match="^Level type mismatch"):
idx.slice_locs((1, 3))
with pytest.raises(TypeError, match="^Level type mismatch"):
idx.slice_locs(df.index[5] + timedelta(seconds=30), (5, 2))
df = tm.makeCustomDataframe(5, 5)
stacked = df.stack()
idx = stacked.index
with pytest.raises(TypeError, match="^Level type mismatch"):
idx.slice_locs(timedelta(seconds=30))
# TODO: Try creating a UnicodeDecodeError in exception message
with pytest.raises(TypeError, match="^Level type mismatch"):
idx.slice_locs(df.index[1], (16, "a"))
def test_slice_locs_not_sorted(self):
index = MultiIndex(
levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
)
msg = "[Kk]ey length.*greater than MultiIndex lexsort depth"
with pytest.raises(KeyError, match=msg):
index.slice_locs((1, 0, 1), (2, 1, 0))
# works
sorted_index, _ = index.sortlevel(0)
# should there be a test case here???
sorted_index.slice_locs((1, 0, 1), (2, 1, 0))
def test_slice_locs_not_contained(self):
# some searchsorted action
index = MultiIndex(
levels=[[0, 2, 4, 6], [0, 2, 4]],
codes=[[0, 0, 0, 1, 1, 2, 3, 3, 3], [0, 1, 2, 1, 2, 2, 0, 1, 2]],
)
result = index.slice_locs((1, 0), (5, 2))
assert result == (3, 6)
result = index.slice_locs(1, 5)
assert result == (3, 6)
result = index.slice_locs((2, 2), (5, 2))
assert result == (3, 6)
result = index.slice_locs(2, 5)
assert result == (3, 6)
result = index.slice_locs((1, 0), (6, 3))
assert result == (3, 8)
result = index.slice_locs(-1, 10)
assert result == (0, len(index))
@pytest.mark.parametrize(
"index_arr,expected,start_idx,end_idx",
[
([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, None),
([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, "b"),
([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, ("b", "e")),
([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), None),
([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), "c"),
([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), ("c", "e")),
],
)
def test_slice_locs_with_missing_value(
self, index_arr, expected, start_idx, end_idx
):
# issue 19132
idx = MultiIndex.from_arrays(index_arr)
result = idx.slice_locs(start=start_idx, end=end_idx)
assert result == expected
def test_putmask_with_wrong_mask(idx):
# GH18368
msg = "putmask: mask and data must be the same size"
with pytest.raises(ValueError, match=msg):
idx.putmask(np.ones(len(idx) + 1, np.bool_), 1)
with pytest.raises(ValueError, match=msg):
idx.putmask(np.ones(len(idx) - 1, np.bool_), 1)
with pytest.raises(ValueError, match=msg):
idx.putmask("foo", 1)
class TestGetIndexer:
def test_get_indexer(self):
major_axis = Index(np.arange(4))
minor_axis = Index(np.arange(2))
major_codes = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp)
minor_codes = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp)
index = MultiIndex(
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
)
idx1 = index[:5]
idx2 = index[[1, 3, 5]]
r1 = idx1.get_indexer(idx2)
tm.assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp))
r1 = idx2.get_indexer(idx1, method="pad")
e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp)
tm.assert_almost_equal(r1, e1)
r2 = idx2.get_indexer(idx1[::-1], method="pad")
tm.assert_almost_equal(r2, e1[::-1])
rffill1 = idx2.get_indexer(idx1, method="ffill")
tm.assert_almost_equal(r1, rffill1)
r1 = idx2.get_indexer(idx1, method="backfill")
e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp)
tm.assert_almost_equal(r1, e1)
r2 = idx2.get_indexer(idx1[::-1], method="backfill")
tm.assert_almost_equal(r2, e1[::-1])
rbfill1 = idx2.get_indexer(idx1, method="bfill")
tm.assert_almost_equal(r1, rbfill1)
# pass non-MultiIndex
r1 = idx1.get_indexer(idx2.values)
rexp1 = idx1.get_indexer(idx2)
tm.assert_almost_equal(r1, rexp1)
r1 = idx1.get_indexer([1, 2, 3])
assert (r1 == [-1, -1, -1]).all()
# create index with duplicates
idx1 = Index(list(range(10)) + list(range(10)))
idx2 = Index(list(range(20)))
msg = "Reindexing only valid with uniquely valued Index objects"
with pytest.raises(InvalidIndexError, match=msg):
idx1.get_indexer(idx2)
def test_get_indexer_nearest(self):
midx = MultiIndex.from_tuples([("a", 1), ("b", 2)])
msg = (
"method='nearest' not implemented yet for MultiIndex; "
"see GitHub issue 9365"
)
with pytest.raises(NotImplementedError, match=msg):
midx.get_indexer(["a"], method="nearest")
msg = "tolerance not implemented yet for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
midx.get_indexer(["a"], method="pad", tolerance=2)
def test_get_indexer_categorical_time(self):
# https://github.com/pandas-dev/pandas/issues/21390
midx = MultiIndex.from_product(
[
Categorical(["a", "b", "c"]),
Categorical(date_range("2012-01-01", periods=3, freq="H")),
]
)
result = midx.get_indexer(midx)
tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp))
@pytest.mark.parametrize(
"index_arr,labels,expected",
[
(
[[1, np.nan, 2], [3, 4, 5]],
[1, np.nan, 2],
np.array([-1, -1, -1], dtype=np.intp),
),
([[1, np.nan, 2], [3, 4, 5]], [(np.nan, 4)], np.array([1], dtype=np.intp)),
([[1, 2, 3], [np.nan, 4, 5]], [(1, np.nan)], np.array([0], dtype=np.intp)),
(
[[1, 2, 3], [np.nan, 4, 5]],
[np.nan, 4, 5],
np.array([-1, -1, -1], dtype=np.intp),
),
],
)
def test_get_indexer_with_missing_value(self, index_arr, labels, expected):
# issue 19132
idx = MultiIndex.from_arrays(index_arr)
result = idx.get_indexer(labels)
tm.assert_numpy_array_equal(result, expected)
def test_get_indexer_methods(self):
# https://github.com/pandas-dev/pandas/issues/29896
# test getting an indexer for another index with different methods
# confirms that getting an indexer without a filling method, getting an
# indexer and backfilling, and getting an indexer and padding all behave
# correctly in the case where all of the target values fall in between
# several levels in the MultiIndex into which they are getting an indexer
#
# visually, the MultiIndexes used in this test are:
# mult_idx_1:
# 0: -1 0
# 1: 2
# 2: 3
# 3: 4
# 4: 0 0
# 5: 2
# 6: 3
# 7: 4
# 8: 1 0
# 9: 2
# 10: 3
# 11: 4
#
# mult_idx_2:
# 0: 0 1
# 1: 3
# 2: 4
mult_idx_1 = MultiIndex.from_product([[-1, 0, 1], [0, 2, 3, 4]])
mult_idx_2 = MultiIndex.from_product([[0], [1, 3, 4]])
indexer = mult_idx_1.get_indexer(mult_idx_2)
expected = np.array([-1, 6, 7], dtype=indexer.dtype)
tm.assert_almost_equal(expected, indexer)
backfill_indexer = mult_idx_1.get_indexer(mult_idx_2, method="backfill")
expected = np.array([5, 6, 7], dtype=backfill_indexer.dtype)
tm.assert_almost_equal(expected, backfill_indexer)
# ensure the legacy "bfill" option functions identically to "backfill"
backfill_indexer = mult_idx_1.get_indexer(mult_idx_2, method="bfill")
expected = np.array([5, 6, 7], dtype=backfill_indexer.dtype)
tm.assert_almost_equal(expected, backfill_indexer)
pad_indexer = mult_idx_1.get_indexer(mult_idx_2, method="pad")
expected = np.array([4, 6, 7], dtype=pad_indexer.dtype)
tm.assert_almost_equal(expected, pad_indexer)
# ensure the legacy "ffill" option functions identically to "pad"
pad_indexer = mult_idx_1.get_indexer(mult_idx_2, method="ffill")
expected = np.array([4, 6, 7], dtype=pad_indexer.dtype)
tm.assert_almost_equal(expected, pad_indexer)
def test_get_indexer_three_or_more_levels(self):
# https://github.com/pandas-dev/pandas/issues/29896
# tests get_indexer() on MultiIndexes with 3+ levels
# visually, these are
# mult_idx_1:
# 0: 1 2 5
# 1: 7
# 2: 4 5
# 3: 7
# 4: 6 5
# 5: 7
# 6: 3 2 5
# 7: 7
# 8: 4 5
# 9: 7
# 10: 6 5
# 11: 7
#
# mult_idx_2:
# 0: 1 1 8
# 1: 1 5 9
# 2: 1 6 7
# 3: 2 1 6
# 4: 2 7 6
# 5: 2 7 8
# 6: 3 6 8
mult_idx_1 = MultiIndex.from_product([[1, 3], [2, 4, 6], [5, 7]])
mult_idx_2 = MultiIndex.from_tuples(
[
(1, 1, 8),
(1, 5, 9),
(1, 6, 7),
(2, 1, 6),
(2, 7, 7),
(2, 7, 8),
(3, 6, 8),
]
)
# sanity check
assert mult_idx_1.is_monotonic
assert mult_idx_1.is_unique
assert mult_idx_2.is_monotonic
assert mult_idx_2.is_unique
# show the relationships between the two
assert mult_idx_2[0] < mult_idx_1[0]
assert mult_idx_1[3] < mult_idx_2[1] < mult_idx_1[4]
assert mult_idx_1[5] == mult_idx_2[2]
assert mult_idx_1[5] < mult_idx_2[3] < mult_idx_1[6]
assert mult_idx_1[5] < mult_idx_2[4] < mult_idx_1[6]
assert mult_idx_1[5] < mult_idx_2[5] < mult_idx_1[6]
assert mult_idx_1[-1] < mult_idx_2[6]
indexer_no_fill = mult_idx_1.get_indexer(mult_idx_2)
expected = np.array([-1, -1, 5, -1, -1, -1, -1], dtype=indexer_no_fill.dtype)
tm.assert_almost_equal(expected, indexer_no_fill)
# test with backfilling
indexer_backfilled = mult_idx_1.get_indexer(mult_idx_2, method="backfill")
expected = np.array([0, 4, 5, 6, 6, 6, -1], dtype=indexer_backfilled.dtype)
tm.assert_almost_equal(expected, indexer_backfilled)
# now, the same thing, but forward-filled (aka "padded")
indexer_padded = mult_idx_1.get_indexer(mult_idx_2, method="pad")
expected = np.array([-1, 3, 5, 5, 5, 5, 11], dtype=indexer_padded.dtype)
tm.assert_almost_equal(expected, indexer_padded)
# now, do the indexing in the other direction
assert mult_idx_2[0] < mult_idx_1[0] < mult_idx_2[1]
assert mult_idx_2[0] < mult_idx_1[1] < mult_idx_2[1]
assert mult_idx_2[0] < mult_idx_1[2] < mult_idx_2[1]
assert mult_idx_2[0] < mult_idx_1[3] < mult_idx_2[1]
assert mult_idx_2[1] < mult_idx_1[4] < mult_idx_2[2]
assert mult_idx_2[2] == mult_idx_1[5]
assert mult_idx_2[5] < mult_idx_1[6] < mult_idx_2[6]
assert mult_idx_2[5] < mult_idx_1[7] < mult_idx_2[6]
assert mult_idx_2[5] < mult_idx_1[8] < mult_idx_2[6]
assert mult_idx_2[5] < mult_idx_1[9] < mult_idx_2[6]
assert mult_idx_2[5] < mult_idx_1[10] < mult_idx_2[6]
assert mult_idx_2[5] < mult_idx_1[11] < mult_idx_2[6]
indexer = mult_idx_2.get_indexer(mult_idx_1)
expected = np.array(
[-1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1], dtype=indexer.dtype
)
tm.assert_almost_equal(expected, indexer)
backfill_indexer = mult_idx_2.get_indexer(mult_idx_1, method="bfill")
expected = np.array(
[1, 1, 1, 1, 2, 2, 6, 6, 6, 6, 6, 6], dtype=backfill_indexer.dtype
)
tm.assert_almost_equal(expected, backfill_indexer)
pad_indexer = mult_idx_2.get_indexer(mult_idx_1, method="pad")
expected = np.array(
[0, 0, 0, 0, 1, 2, 5, 5, 5, 5, 5, 5], dtype=pad_indexer.dtype
)
tm.assert_almost_equal(expected, pad_indexer)
def test_get_indexer_crossing_levels(self):
# https://github.com/pandas-dev/pandas/issues/29896
# tests a corner case with get_indexer() with MultiIndexes where, when we
# need to "carry" across levels, proper tuple ordering is respected
#
# the MultiIndexes used in this test, visually, are:
# mult_idx_1:
# 0: 1 1 1 1
# 1: 2
# 2: 2 1
# 3: 2
# 4: 1 2 1 1
# 5: 2
# 6: 2 1
# 7: 2
# 8: 2 1 1 1
# 9: 2
# 10: 2 1
# 11: 2
# 12: 2 2 1 1
# 13: 2
# 14: 2 1
# 15: 2
#
# mult_idx_2:
# 0: 1 3 2 2
# 1: 2 3 2 2
mult_idx_1 = MultiIndex.from_product([[1, 2]] * 4)
mult_idx_2 = MultiIndex.from_tuples([(1, 3, 2, 2), (2, 3, 2, 2)])
# show the tuple orderings, which get_indexer() should respect
assert mult_idx_1[7] < mult_idx_2[0] < mult_idx_1[8]
assert mult_idx_1[-1] < mult_idx_2[1]
indexer = mult_idx_1.get_indexer(mult_idx_2)
expected = np.array([-1, -1], dtype=indexer.dtype)
tm.assert_almost_equal(expected, indexer)
backfill_indexer = mult_idx_1.get_indexer(mult_idx_2, method="bfill")
expected = np.array([8, -1], dtype=backfill_indexer.dtype)
tm.assert_almost_equal(expected, backfill_indexer)
pad_indexer = mult_idx_1.get_indexer(mult_idx_2, method="ffill")
expected = np.array([7, 15], dtype=pad_indexer.dtype)
tm.assert_almost_equal(expected, pad_indexer)
def test_getitem(idx):
# scalar
assert idx[2] == ("bar", "one")
# slice
result = idx[2:5]
expected = idx[[2, 3, 4]]
assert result.equals(expected)
# boolean
result = idx[[True, False, True, False, True, True]]
result2 = idx[np.array([True, False, True, False, True, True])]
expected = idx[[0, 2, 4, 5]]
assert result.equals(expected)
assert result2.equals(expected)
def test_getitem_group_select(idx):
sorted_idx, _ = idx.sortlevel(0)
assert sorted_idx.get_loc("baz") == slice(3, 4)
assert sorted_idx.get_loc("foo") == slice(0, 2)
@pytest.mark.parametrize("ind1", [[True] * 5, Index([True] * 5)])
@pytest.mark.parametrize(
"ind2",
[[True, False, True, False, False], Index([True, False, True, False, False])],
)
def test_getitem_bool_index_all(ind1, ind2):
# GH#22533
idx = MultiIndex.from_tuples([(10, 1), (20, 2), (30, 3), (40, 4), (50, 5)])
tm.assert_index_equal(idx[ind1], idx)
expected = MultiIndex.from_tuples([(10, 1), (30, 3)])
tm.assert_index_equal(idx[ind2], expected)
@pytest.mark.parametrize("ind1", [[True], Index([True])])
@pytest.mark.parametrize("ind2", [[False], Index([False])])
def test_getitem_bool_index_single(ind1, ind2):
# GH#22533
idx = MultiIndex.from_tuples([(10, 1)])
tm.assert_index_equal(idx[ind1], idx)
expected = MultiIndex(
levels=[np.array([], dtype=np.int64), np.array([], dtype=np.int64)],
codes=[[], []],
)
tm.assert_index_equal(idx[ind2], expected)
class TestGetLoc:
def test_get_loc(self, idx):
assert idx.get_loc(("foo", "two")) == 1
assert idx.get_loc(("baz", "two")) == 3
with pytest.raises(KeyError, match=r"^10$"):
idx.get_loc(("bar", "two"))
with pytest.raises(KeyError, match=r"^'quux'$"):
idx.get_loc("quux")
msg = "only the default get_loc method is currently supported for MultiIndex"
with pytest.raises(NotImplementedError, match=msg):
idx.get_loc("foo", method="nearest")
# 3 levels
index = MultiIndex(
levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
)
with pytest.raises(KeyError, match=r"^\(1, 1\)$"):
index.get_loc((1, 1))
assert index.get_loc((2, 0)) == slice(3, 5)
def test_get_loc_duplicates(self):
index = Index([2, 2, 2, 2])
result = index.get_loc(2)
expected = slice(0, 4)
assert result == expected
index = Index(["c", "a", "a", "b", "b"])
rs = index.get_loc("c")
xp = 0
assert rs == xp
with pytest.raises(KeyError):
index.get_loc(2)
def test_get_loc_level(self):
index = MultiIndex(
levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))],
codes=[
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
],
)
loc, new_index = index.get_loc_level((0, 1))
expected = slice(1, 2)
exp_index = index[expected].droplevel(0).droplevel(0)
assert loc == expected
assert new_index.equals(exp_index)
loc, new_index = index.get_loc_level((0, 1, 0))
expected = 1
assert loc == expected
assert new_index is None
with pytest.raises(KeyError, match=r"^\(2, 2\)$"):
index.get_loc_level((2, 2))
# GH 22221: unused label
with pytest.raises(KeyError, match=r"^2$"):
index.drop(2).get_loc_level(2)
# Unused label on unsorted level:
with pytest.raises(KeyError, match=r"^2$"):
index.drop(1, level=2).get_loc_level(2, level=2)
index = MultiIndex(
levels=[[2000], list(range(4))],
codes=[np.array([0, 0, 0, 0]), np.array([0, 1, 2, 3])],
)
result, new_index = index.get_loc_level((2000, slice(None, None)))
expected = slice(None, None)
assert result == expected
assert new_index.equals(index.droplevel(0))
@pytest.mark.parametrize("dtype1", [int, float, bool, str])
@pytest.mark.parametrize("dtype2", [int, float, bool, str])
def test_get_loc_multiple_dtypes(self, dtype1, dtype2):
# GH 18520
levels = [np.array([0, 1]).astype(dtype1), np.array([0, 1]).astype(dtype2)]
idx = MultiIndex.from_product(levels)
assert idx.get_loc(idx[2]) == 2
@pytest.mark.parametrize("level", [0, 1])
@pytest.mark.parametrize("dtypes", [[int, float], [float, int]])
def test_get_loc_implicit_cast(self, level, dtypes):
# GH 18818, GH 15994 : as flat index, cast int to float and vice-versa
levels = [["a", "b"], ["c", "d"]]
key = ["b", "d"]
lev_dtype, key_dtype = dtypes
levels[level] = np.array([0, 1], dtype=lev_dtype)
key[level] = key_dtype(1)
idx = MultiIndex.from_product(levels)
assert idx.get_loc(tuple(key)) == 3
def test_get_loc_cast_bool(self):
# GH 19086 : int is casted to bool, but not vice-versa
levels = [[False, True], np.arange(2, dtype="int64")]
idx = MultiIndex.from_product(levels)
assert idx.get_loc((0, 1)) == 1
assert idx.get_loc((1, 0)) == 2
with pytest.raises(KeyError, match=r"^\(False, True\)$"):
idx.get_loc((False, True))
with pytest.raises(KeyError, match=r"^\(True, False\)$"):
idx.get_loc((True, False))
@pytest.mark.parametrize("level", [0, 1])
def test_get_loc_nan(self, level, nulls_fixture):
# GH 18485 : NaN in MultiIndex
levels = [["a", "b"], ["c", "d"]]
key = ["b", "d"]
levels[level] = np.array([0, nulls_fixture], dtype=type(nulls_fixture))
key[level] = nulls_fixture
idx = MultiIndex.from_product(levels)
assert idx.get_loc(tuple(key)) == 3
def test_get_loc_missing_nan(self):
# GH 8569
idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]])
assert isinstance(idx.get_loc(1), slice)
with pytest.raises(KeyError, match=r"^3$"):
idx.get_loc(3)
with pytest.raises(KeyError, match=r"^nan$"):
idx.get_loc(np.nan)
with pytest.raises(TypeError, match="unhashable type: 'list'"):
# listlike/non-hashable raises TypeError
idx.get_loc([np.nan])
def test_get_loc_with_values_including_missing_values(self):
# issue 19132
idx = MultiIndex.from_product([[np.nan, 1]] * 2)
expected = slice(0, 2, None)
assert idx.get_loc(np.nan) == expected
idx = MultiIndex.from_arrays([[np.nan, 1, 2, np.nan]])
expected = np.array([True, False, False, True])
tm.assert_numpy_array_equal(idx.get_loc(np.nan), expected)
idx = MultiIndex.from_product([[np.nan, 1]] * 3)
expected = slice(2, 4, None)
assert idx.get_loc((np.nan, 1)) == expected
def test_get_loc_duplicates2(self):
# TODO: de-duplicate with test_get_loc_duplicates above?
index = MultiIndex(
levels=[["D", "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]],
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
names=["tag", "day"],
)
assert index.get_loc("D") == slice(0, 3)
def test_get_loc_past_lexsort_depth(self):
# GH#30053
idx = MultiIndex(
levels=[["a"], [0, 7], [1]],
codes=[[0, 0], [1, 0], [0, 0]],
names=["x", "y", "z"],
sortorder=0,
)
key = ("a", 7)
with tm.assert_produces_warning(PerformanceWarning):
# PerformanceWarning: indexing past lexsort depth may impact performance
result = idx.get_loc(key)
assert result == slice(0, 1, None)
def test_multiindex_get_loc_list_raises(self):
# GH#35878
idx = MultiIndex.from_tuples([("a", 1), ("b", 2)])
msg = "unhashable type"
with pytest.raises(TypeError, match=msg):
idx.get_loc([])
class TestWhere:
def test_where(self):
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
msg = r"\.where is not supported for MultiIndex operations"
with pytest.raises(NotImplementedError, match=msg):
i.where(True)
@pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series])
def test_where_array_like(self, klass):
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
cond = [False, True]
msg = r"\.where is not supported for MultiIndex operations"
with pytest.raises(NotImplementedError, match=msg):
i.where(klass(cond))
class TestContains:
def test_contains_top_level(self):
midx = MultiIndex.from_product([["A", "B"], [1, 2]])
assert "A" in midx
assert "A" not in midx._engine
def test_contains_with_nat(self):
# MI with a NaT
mi = MultiIndex(
levels=[["C"], date_range("2012-01-01", periods=5)],
codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]],
names=[None, "B"],
)
assert ("C", pd.Timestamp("2012-01-01")) in mi
for val in mi.values:
assert val in mi
def test_contains(self, idx):
assert ("foo", "two") in idx
assert ("bar", "two") not in idx
assert None not in idx
def test_contains_with_missing_value(self):
# GH#19132
idx = MultiIndex.from_arrays([[1, np.nan, 2]])
assert np.nan in idx
idx = MultiIndex.from_arrays([[1, 2], [np.nan, 3]])
assert np.nan not in idx
assert (1, np.nan) in idx
def test_multiindex_contains_dropped(self):
# GH#19027
# test that dropped MultiIndex levels are not in the MultiIndex
# despite continuing to be in the MultiIndex's levels
idx = MultiIndex.from_product([[1, 2], [3, 4]])
assert 2 in idx
idx = idx.drop(2)
# drop implementation keeps 2 in the levels
assert 2 in idx.levels[0]
# but it should no longer be in the index itself
assert 2 not in idx
# also applies to strings
idx = MultiIndex.from_product([["a", "b"], ["c", "d"]])
assert "a" in idx
idx = idx.drop("a")
assert "a" in idx.levels[0]
assert "a" not in idx
def test_contains_td64_level(self):
# GH#24570
tx = pd.timedelta_range("09:30:00", "16:00:00", freq="30 min")
idx = MultiIndex.from_arrays([tx, np.arange(len(tx))])
assert tx[0] in idx
assert "element_not_exit" not in idx
assert "0 day 09:30:00" in idx
@pytest.mark.slow
def test_large_mi_contains(self):
# GH#10645
result = MultiIndex.from_arrays([range(10 ** 6), range(10 ** 6)])
assert not (10 ** 6, 0) in result
def test_timestamp_multiindex_indexer():
# https://github.com/pandas-dev/pandas/issues/26944
idx = MultiIndex.from_product(
[
date_range("2019-01-01T00:15:33", periods=100, freq="H", name="date"),
["x"],
[3],
]
)
df = pd.DataFrame({"foo": np.arange(len(idx))}, idx)
result = df.loc[pd.IndexSlice["2019-1-2":, "x", :], "foo"]
qidx = MultiIndex.from_product(
[
date_range(
start="2019-01-02T00:15:33",
end="2019-01-05T03:15:33",
freq="H",
name="date",
),
["x"],
[3],
]
)
should_be = pd.Series(data=np.arange(24, len(qidx) + 24), index=qidx, name="foo")
tm.assert_series_equal(result, should_be)
@pytest.mark.parametrize(
"index_arr,expected,target,algo",
[
([[np.nan, "a", "b"], ["c", "d", "e"]], 0, np.nan, "left"),
([[np.nan, "a", "b"], ["c", "d", "e"]], 1, (np.nan, "c"), "right"),
([["a", "b", "c"], ["d", np.nan, "d"]], 1, ("b", np.nan), "left"),
],
)
def test_get_slice_bound_with_missing_value(index_arr, expected, target, algo):
# issue 19132
idx = MultiIndex.from_arrays(index_arr)
result = idx.get_slice_bound(target, side=algo, kind="loc")
assert result == expected
@pytest.mark.parametrize(
"index_arr,expected,start_idx,end_idx",
[
([[np.nan, 1, 2], [3, 4, 5]], slice(0, 2, None), np.nan, 1),
([[np.nan, 1, 2], [3, 4, 5]], slice(0, 3, None), np.nan, (2, 5)),
([[1, 2, 3], [4, np.nan, 5]], slice(1, 3, None), (2, np.nan), 3),
([[1, 2, 3], [4, np.nan, 5]], slice(1, 3, None), (2, np.nan), (3, 5)),
],
)
def test_slice_indexer_with_missing_value(index_arr, expected, start_idx, end_idx):
# issue 19132
idx = MultiIndex.from_arrays(index_arr)
result = idx.slice_indexer(start=start_idx, end=end_idx)
assert result == expected
def test_pyint_engine():
# GH#18519 : when combinations of codes cannot be represented in 64
# bits, the index underlying the MultiIndex engine works with Python
# integers, rather than uint64.
N = 5
keys = [
tuple(arr)
for arr in [
[0] * 10 * N,
[1] * 10 * N,
[2] * 10 * N,
[np.nan] * N + [2] * 9 * N,
[0] * N + [2] * 9 * N,
[np.nan] * N + [2] * 8 * N + [0] * N,
]
]
# Each level contains 4 elements (including NaN), so it is represented
# in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a
# 64 bit engine and truncating the first levels, the fourth and fifth
# keys would collide; if truncating the last levels, the fifth and
# sixth; if rotating bits rather than shifting, the third and fifth.
for idx in range(len(keys)):
index = MultiIndex.from_tuples(keys)
assert index.get_loc(keys[idx]) == idx
expected = np.arange(idx + 1, dtype=np.intp)
result = index.get_indexer([keys[i] for i in expected])
tm.assert_numpy_array_equal(result, expected)
# With missing key:
idces = range(len(keys))
expected = np.array([-1] + list(idces), dtype=np.intp)
missing = tuple([0, 1] * 5 * N)
result = index.get_indexer([missing] + [keys[i] for i in idces])
tm.assert_numpy_array_equal(result, expected)