310 lines
12 KiB
Python
310 lines
12 KiB
Python
import numpy as np
|
|
import pytest
|
|
|
|
import pandas as pd
|
|
from pandas import Index, NaT, PeriodIndex, Series
|
|
import pandas._testing as tm
|
|
|
|
|
|
class TestPeriodIndexOps:
|
|
@pytest.mark.parametrize(
|
|
"freq,expected",
|
|
[
|
|
("A", "year"),
|
|
("Q", "quarter"),
|
|
("M", "month"),
|
|
("D", "day"),
|
|
("H", "hour"),
|
|
("T", "minute"),
|
|
("S", "second"),
|
|
("L", "millisecond"),
|
|
("U", "microsecond"),
|
|
],
|
|
)
|
|
def test_resolution(self, freq, expected):
|
|
idx = pd.period_range(start="2013-04-01", periods=30, freq=freq)
|
|
assert idx.resolution == expected
|
|
|
|
def test_value_counts_unique(self):
|
|
# GH 7735
|
|
idx = pd.period_range("2011-01-01 09:00", freq="H", periods=10)
|
|
# create repeated values, 'n'th element is repeated by n+1 times
|
|
idx = PeriodIndex(np.repeat(idx._values, range(1, len(idx) + 1)), freq="H")
|
|
|
|
exp_idx = PeriodIndex(
|
|
[
|
|
"2011-01-01 18:00",
|
|
"2011-01-01 17:00",
|
|
"2011-01-01 16:00",
|
|
"2011-01-01 15:00",
|
|
"2011-01-01 14:00",
|
|
"2011-01-01 13:00",
|
|
"2011-01-01 12:00",
|
|
"2011-01-01 11:00",
|
|
"2011-01-01 10:00",
|
|
"2011-01-01 09:00",
|
|
],
|
|
freq="H",
|
|
)
|
|
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
|
|
|
|
for obj in [idx, Series(idx)]:
|
|
tm.assert_series_equal(obj.value_counts(), expected)
|
|
|
|
expected = pd.period_range("2011-01-01 09:00", freq="H", periods=10)
|
|
tm.assert_index_equal(idx.unique(), expected)
|
|
|
|
idx = PeriodIndex(
|
|
[
|
|
"2013-01-01 09:00",
|
|
"2013-01-01 09:00",
|
|
"2013-01-01 09:00",
|
|
"2013-01-01 08:00",
|
|
"2013-01-01 08:00",
|
|
NaT,
|
|
],
|
|
freq="H",
|
|
)
|
|
|
|
exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00"], freq="H")
|
|
expected = Series([3, 2], index=exp_idx)
|
|
|
|
for obj in [idx, Series(idx)]:
|
|
tm.assert_series_equal(obj.value_counts(), expected)
|
|
|
|
exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00", NaT], freq="H")
|
|
expected = Series([3, 2, 1], index=exp_idx)
|
|
|
|
for obj in [idx, Series(idx)]:
|
|
tm.assert_series_equal(obj.value_counts(dropna=False), expected)
|
|
|
|
tm.assert_index_equal(idx.unique(), exp_idx)
|
|
|
|
@pytest.mark.parametrize("freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"])
|
|
def test_drop_duplicates_metadata(self, freq):
|
|
# GH 10115
|
|
idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx")
|
|
result = idx.drop_duplicates()
|
|
tm.assert_index_equal(idx, result)
|
|
assert idx.freq == result.freq
|
|
|
|
idx_dup = idx.append(idx) # freq will not be reset
|
|
result = idx_dup.drop_duplicates()
|
|
tm.assert_index_equal(idx, result)
|
|
assert idx.freq == result.freq
|
|
|
|
@pytest.mark.parametrize("freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"])
|
|
@pytest.mark.parametrize(
|
|
"keep, expected, index",
|
|
[
|
|
("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
|
|
("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
|
|
(
|
|
False,
|
|
np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
|
|
np.arange(5, 10),
|
|
),
|
|
],
|
|
)
|
|
def test_drop_duplicates(self, freq, keep, expected, index):
|
|
# to check Index/Series compat
|
|
idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx")
|
|
idx = idx.append(idx[:5])
|
|
|
|
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
|
|
expected = idx[~expected]
|
|
|
|
result = idx.drop_duplicates(keep=keep)
|
|
tm.assert_index_equal(result, expected)
|
|
|
|
result = Series(idx).drop_duplicates(keep=keep)
|
|
tm.assert_series_equal(result, Series(expected, index=index))
|
|
|
|
def test_order_compat(self):
|
|
def _check_freq(index, expected_index):
|
|
if isinstance(index, PeriodIndex):
|
|
assert index.freq == expected_index.freq
|
|
|
|
pidx = PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="A")
|
|
# for compatibility check
|
|
iidx = Index([2011, 2012, 2013], name="idx")
|
|
for idx in [pidx, iidx]:
|
|
ordered = idx.sort_values()
|
|
tm.assert_index_equal(ordered, idx)
|
|
_check_freq(ordered, idx)
|
|
|
|
ordered = idx.sort_values(ascending=False)
|
|
tm.assert_index_equal(ordered, idx[::-1])
|
|
_check_freq(ordered, idx[::-1])
|
|
|
|
ordered, indexer = idx.sort_values(return_indexer=True)
|
|
tm.assert_index_equal(ordered, idx)
|
|
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
|
|
_check_freq(ordered, idx)
|
|
|
|
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
|
tm.assert_index_equal(ordered, idx[::-1])
|
|
tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
|
|
_check_freq(ordered, idx[::-1])
|
|
|
|
pidx = PeriodIndex(
|
|
["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A"
|
|
)
|
|
pexpected = PeriodIndex(
|
|
["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="A"
|
|
)
|
|
# for compatibility check
|
|
iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx")
|
|
iexpected = Index([2011, 2011, 2012, 2013, 2015], name="idx")
|
|
for idx, expected in [(pidx, pexpected), (iidx, iexpected)]:
|
|
ordered = idx.sort_values()
|
|
tm.assert_index_equal(ordered, expected)
|
|
_check_freq(ordered, idx)
|
|
|
|
ordered = idx.sort_values(ascending=False)
|
|
tm.assert_index_equal(ordered, expected[::-1])
|
|
_check_freq(ordered, idx)
|
|
|
|
ordered, indexer = idx.sort_values(return_indexer=True)
|
|
tm.assert_index_equal(ordered, expected)
|
|
|
|
exp = np.array([0, 4, 3, 1, 2])
|
|
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
|
|
_check_freq(ordered, idx)
|
|
|
|
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
|
tm.assert_index_equal(ordered, expected[::-1])
|
|
_check_freq(ordered, idx)
|
|
|
|
pidx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D")
|
|
|
|
result = pidx.sort_values(na_position="first")
|
|
expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D")
|
|
tm.assert_index_equal(result, expected)
|
|
assert result.freq == "D"
|
|
|
|
result = pidx.sort_values(ascending=False)
|
|
expected = PeriodIndex(["2013", "2011", "2011", "NaT"], name="pidx", freq="D")
|
|
tm.assert_index_equal(result, expected)
|
|
assert result.freq == "D"
|
|
|
|
def test_order(self):
|
|
for freq in ["D", "2D", "4D"]:
|
|
idx = PeriodIndex(
|
|
["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx"
|
|
)
|
|
|
|
ordered = idx.sort_values()
|
|
tm.assert_index_equal(ordered, idx)
|
|
assert ordered.freq == idx.freq
|
|
|
|
ordered = idx.sort_values(ascending=False)
|
|
expected = idx[::-1]
|
|
tm.assert_index_equal(ordered, expected)
|
|
assert ordered.freq == expected.freq
|
|
assert ordered.freq == freq
|
|
|
|
ordered, indexer = idx.sort_values(return_indexer=True)
|
|
tm.assert_index_equal(ordered, idx)
|
|
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
|
|
assert ordered.freq == idx.freq
|
|
assert ordered.freq == freq
|
|
|
|
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
|
expected = idx[::-1]
|
|
tm.assert_index_equal(ordered, expected)
|
|
tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False)
|
|
assert ordered.freq == expected.freq
|
|
assert ordered.freq == freq
|
|
|
|
idx1 = PeriodIndex(
|
|
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
|
|
freq="D",
|
|
name="idx1",
|
|
)
|
|
exp1 = PeriodIndex(
|
|
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
|
|
freq="D",
|
|
name="idx1",
|
|
)
|
|
|
|
idx2 = PeriodIndex(
|
|
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
|
|
freq="D",
|
|
name="idx2",
|
|
)
|
|
exp2 = PeriodIndex(
|
|
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
|
|
freq="D",
|
|
name="idx2",
|
|
)
|
|
|
|
idx3 = PeriodIndex(
|
|
[NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], freq="D", name="idx3"
|
|
)
|
|
exp3 = PeriodIndex(
|
|
[NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], freq="D", name="idx3"
|
|
)
|
|
|
|
for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]:
|
|
ordered = idx.sort_values(na_position="first")
|
|
tm.assert_index_equal(ordered, expected)
|
|
assert ordered.freq == "D"
|
|
|
|
ordered = idx.sort_values(ascending=False)
|
|
tm.assert_index_equal(ordered, expected[::-1])
|
|
assert ordered.freq == "D"
|
|
|
|
ordered, indexer = idx.sort_values(return_indexer=True, na_position="first")
|
|
tm.assert_index_equal(ordered, expected)
|
|
|
|
exp = np.array([0, 4, 3, 1, 2])
|
|
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
|
|
assert ordered.freq == "D"
|
|
|
|
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
|
tm.assert_index_equal(ordered, expected[::-1])
|
|
|
|
exp = np.array([2, 1, 3, 0, 4])
|
|
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
|
|
assert ordered.freq == "D"
|
|
|
|
def test_nat(self):
|
|
assert PeriodIndex._na_value is NaT
|
|
assert PeriodIndex([], freq="M")._na_value is NaT
|
|
|
|
idx = PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
|
|
assert idx._can_hold_na
|
|
|
|
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
|
|
assert idx.hasnans is False
|
|
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))
|
|
|
|
idx = PeriodIndex(["2011-01-01", "NaT"], freq="D")
|
|
assert idx._can_hold_na
|
|
|
|
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
|
|
assert idx.hasnans is True
|
|
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))
|
|
|
|
def test_freq_setter_deprecated(self):
|
|
# GH 20678
|
|
idx = pd.period_range("2018Q1", periods=4, freq="Q")
|
|
|
|
# no warning for getter
|
|
with tm.assert_produces_warning(None):
|
|
idx.freq
|
|
|
|
# warning for setter
|
|
with pytest.raises(AttributeError, match="can't set attribute"):
|
|
idx.freq = pd.offsets.Day()
|
|
|
|
|
|
def test_order_stability_compat():
|
|
# GH 35922. sort_values is stable both for normal and datetime-like Index
|
|
pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A")
|
|
iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx")
|
|
ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False)
|
|
ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False)
|
|
tm.assert_numpy_array_equal(indexer1, indexer2)
|