149 lines
4.7 KiB
Python
149 lines
4.7 KiB
Python
import numpy as np
|
|
import pytest
|
|
|
|
from pandas import (
|
|
DataFrame,
|
|
IndexSlice,
|
|
MultiIndex,
|
|
date_range,
|
|
)
|
|
import pandas._testing as tm
|
|
|
|
|
|
@pytest.fixture
|
|
def df():
|
|
# c1
|
|
# 2016-01-01 00:00:00 a 0
|
|
# b 1
|
|
# c 2
|
|
# 2016-01-01 12:00:00 a 3
|
|
# b 4
|
|
# c 5
|
|
# 2016-01-02 00:00:00 a 6
|
|
# b 7
|
|
# c 8
|
|
# 2016-01-02 12:00:00 a 9
|
|
# b 10
|
|
# c 11
|
|
# 2016-01-03 00:00:00 a 12
|
|
# b 13
|
|
# c 14
|
|
dr = date_range("2016-01-01", "2016-01-03", freq="12H")
|
|
abc = ["a", "b", "c"]
|
|
mi = MultiIndex.from_product([dr, abc])
|
|
frame = DataFrame({"c1": range(0, 15)}, index=mi)
|
|
return frame
|
|
|
|
|
|
def test_partial_string_matching_single_index(df):
|
|
# partial string matching on a single index
|
|
for df_swap in [df.swaplevel(), df.swaplevel(0), df.swaplevel(0, 1)]:
|
|
df_swap = df_swap.sort_index()
|
|
just_a = df_swap.loc["a"]
|
|
result = just_a.loc["2016-01-01"]
|
|
expected = df.loc[IndexSlice[:, "a"], :].iloc[0:2]
|
|
expected.index = expected.index.droplevel(1)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
def test_get_loc_partial_timestamp_multiindex(df):
|
|
mi = df.index
|
|
key = ("2016-01-01", "a")
|
|
loc = mi.get_loc(key)
|
|
|
|
expected = np.zeros(len(mi), dtype=bool)
|
|
expected[[0, 3]] = True
|
|
tm.assert_numpy_array_equal(loc, expected)
|
|
|
|
key2 = ("2016-01-02", "a")
|
|
loc2 = mi.get_loc(key2)
|
|
expected2 = np.zeros(len(mi), dtype=bool)
|
|
expected2[[6, 9]] = True
|
|
tm.assert_numpy_array_equal(loc2, expected2)
|
|
|
|
key3 = ("2016-01", "a")
|
|
loc3 = mi.get_loc(key3)
|
|
expected3 = np.zeros(len(mi), dtype=bool)
|
|
expected3[mi.get_level_values(1).get_loc("a")] = True
|
|
tm.assert_numpy_array_equal(loc3, expected3)
|
|
|
|
key4 = ("2016", "a")
|
|
loc4 = mi.get_loc(key4)
|
|
expected4 = expected3
|
|
tm.assert_numpy_array_equal(loc4, expected4)
|
|
|
|
# non-monotonic
|
|
taker = np.arange(len(mi), dtype=np.intp)
|
|
taker[::2] = taker[::-2]
|
|
mi2 = mi.take(taker)
|
|
loc5 = mi2.get_loc(key)
|
|
expected5 = np.zeros(len(mi2), dtype=bool)
|
|
expected5[[3, 14]] = True
|
|
tm.assert_numpy_array_equal(loc5, expected5)
|
|
|
|
|
|
def test_partial_string_timestamp_multiindex(df):
|
|
# GH10331
|
|
df_swap = df.swaplevel(0, 1).sort_index()
|
|
SLC = IndexSlice
|
|
|
|
# indexing with IndexSlice
|
|
result = df.loc[SLC["2016-01-01":"2016-02-01", :], :]
|
|
expected = df
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# match on secondary index
|
|
result = df_swap.loc[SLC[:, "2016-01-01":"2016-01-01"], :]
|
|
expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# partial string match on year only
|
|
result = df.loc["2016"]
|
|
expected = df
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# partial string match on date
|
|
result = df.loc["2016-01-01"]
|
|
expected = df.iloc[0:6]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# partial string match on date and hour, from middle
|
|
result = df.loc["2016-01-02 12"]
|
|
# hourly resolution, same as index.levels[0], so we are _not_ slicing on
|
|
# that level, so that level gets dropped
|
|
expected = df.iloc[9:12].droplevel(0)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# partial string match on secondary index
|
|
result = df_swap.loc[SLC[:, "2016-01-02"], :]
|
|
expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# tuple selector with partial string match on date
|
|
# "2016-01-01" has daily resolution, so _is_ a slice on the first level.
|
|
result = df.loc[("2016-01-01", "a"), :]
|
|
expected = df.iloc[[0, 3]]
|
|
expected = df.iloc[[0, 3]].droplevel(1)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# Slicing date on first level should break (of course) bc the DTI is the
|
|
# second level on df_swap
|
|
with pytest.raises(KeyError, match="'2016-01-01'"):
|
|
df_swap.loc["2016-01-01"]
|
|
|
|
|
|
def test_partial_string_timestamp_multiindex_str_key_raises(df):
|
|
# Even though this syntax works on a single index, this is somewhat
|
|
# ambiguous and we don't want to extend this behavior forward to work
|
|
# in multi-indexes. This would amount to selecting a scalar from a
|
|
# column.
|
|
with pytest.raises(KeyError, match="'2016-01-01'"):
|
|
df["2016-01-01"]
|
|
|
|
|
|
def test_partial_string_timestamp_multiindex_daily_resolution(df):
|
|
# GH12685 (partial string with daily resolution or below)
|
|
result = df.loc[IndexSlice["2013-03":"2013-03", :], :]
|
|
expected = df.iloc[118:180]
|
|
tm.assert_frame_equal(result, expected)
|