import numpy as np import pytest import pandas as pd from pandas import Index, NaT, PeriodIndex, Series import pandas._testing as tm class TestPeriodIndexOps: @pytest.mark.parametrize( "freq,expected", [ ("A", "year"), ("Q", "quarter"), ("M", "month"), ("D", "day"), ("H", "hour"), ("T", "minute"), ("S", "second"), ("L", "millisecond"), ("U", "microsecond"), ], ) def test_resolution(self, freq, expected): idx = pd.period_range(start="2013-04-01", periods=30, freq=freq) assert idx.resolution == expected def test_value_counts_unique(self): # GH 7735 idx = pd.period_range("2011-01-01 09:00", freq="H", periods=10) # create repeated values, 'n'th element is repeated by n+1 times idx = PeriodIndex(np.repeat(idx._values, range(1, len(idx) + 1)), freq="H") exp_idx = PeriodIndex( [ "2011-01-01 18:00", "2011-01-01 17:00", "2011-01-01 16:00", "2011-01-01 15:00", "2011-01-01 14:00", "2011-01-01 13:00", "2011-01-01 12:00", "2011-01-01 11:00", "2011-01-01 10:00", "2011-01-01 09:00", ], freq="H", ) expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) expected = pd.period_range("2011-01-01 09:00", freq="H", periods=10) tm.assert_index_equal(idx.unique(), expected) idx = PeriodIndex( [ "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 08:00", "2013-01-01 08:00", NaT, ], freq="H", ) exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00"], freq="H") expected = Series([3, 2], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00", NaT], freq="H") expected = Series([3, 2, 1], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(dropna=False), expected) tm.assert_index_equal(idx.unique(), exp_idx) @pytest.mark.parametrize("freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"]) def test_drop_duplicates_metadata(self, freq): # GH 10115 idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx") result = idx.drop_duplicates() tm.assert_index_equal(idx, result) assert idx.freq == result.freq idx_dup = idx.append(idx) # freq will not be reset result = idx_dup.drop_duplicates() tm.assert_index_equal(idx, result) assert idx.freq == result.freq @pytest.mark.parametrize("freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"]) @pytest.mark.parametrize( "keep, expected, index", [ ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), ( False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), np.arange(5, 10), ), ], ) def test_drop_duplicates(self, freq, keep, expected, index): # to check Index/Series compat idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx") idx = idx.append(idx[:5]) tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) expected = idx[~expected] result = idx.drop_duplicates(keep=keep) tm.assert_index_equal(result, expected) result = Series(idx).drop_duplicates(keep=keep) tm.assert_series_equal(result, Series(expected, index=index)) def test_order_compat(self): def _check_freq(index, expected_index): if isinstance(index, PeriodIndex): assert index.freq == expected_index.freq pidx = PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="A") # for compatibility check iidx = Index([2011, 2012, 2013], name="idx") for idx in [pidx, iidx]: ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) _check_freq(ordered, idx) ordered = idx.sort_values(ascending=False) tm.assert_index_equal(ordered, idx[::-1]) _check_freq(ordered, idx[::-1]) ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) _check_freq(ordered, idx) ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, idx[::-1]) tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) _check_freq(ordered, idx[::-1]) pidx = PeriodIndex( ["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A" ) pexpected = PeriodIndex( ["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="A" ) # for compatibility check iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx") iexpected = Index([2011, 2011, 2012, 2013, 2015], name="idx") for idx, expected in [(pidx, pexpected), (iidx, iexpected)]: ordered = idx.sort_values() tm.assert_index_equal(ordered, expected) _check_freq(ordered, idx) ordered = idx.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) _check_freq(ordered, idx) ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) _check_freq(ordered, idx) ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) _check_freq(ordered, idx) pidx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D") result = pidx.sort_values(na_position="first") expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D") tm.assert_index_equal(result, expected) assert result.freq == "D" result = pidx.sort_values(ascending=False) expected = PeriodIndex(["2013", "2011", "2011", "NaT"], name="pidx", freq="D") tm.assert_index_equal(result, expected) assert result.freq == "D" def test_order(self): for freq in ["D", "2D", "4D"]: idx = PeriodIndex( ["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx" ) ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) assert ordered.freq == expected.freq assert ordered.freq == freq ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) assert ordered.freq == idx.freq assert ordered.freq == freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) assert ordered.freq == expected.freq assert ordered.freq == freq idx1 = PeriodIndex( ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], freq="D", name="idx1", ) exp1 = PeriodIndex( ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], freq="D", name="idx1", ) idx2 = PeriodIndex( ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], freq="D", name="idx2", ) exp2 = PeriodIndex( ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], freq="D", name="idx2", ) idx3 = PeriodIndex( [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], freq="D", name="idx3" ) exp3 = PeriodIndex( [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], freq="D", name="idx3" ) for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]: ordered = idx.sort_values(na_position="first") tm.assert_index_equal(ordered, expected) assert ordered.freq == "D" ordered = idx.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq == "D" ordered, indexer = idx.sort_values(return_indexer=True, na_position="first") tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq == "D" ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 0, 4]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq == "D" def test_nat(self): assert PeriodIndex._na_value is NaT assert PeriodIndex([], freq="M")._na_value is NaT idx = PeriodIndex(["2011-01-01", "2011-01-02"], freq="D") assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) assert idx.hasnans is False tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) idx = PeriodIndex(["2011-01-01", "NaT"], freq="D") assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) assert idx.hasnans is True tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) def test_freq_setter_deprecated(self): # GH 20678 idx = pd.period_range("2018Q1", periods=4, freq="Q") # no warning for getter with tm.assert_produces_warning(None): idx.freq # warning for setter with pytest.raises(AttributeError, match="can't set attribute"): idx.freq = pd.offsets.Day() def test_order_stability_compat(): # GH 35922. sort_values is stable both for normal and datetime-like Index pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A") iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx") ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False) ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False) tm.assert_numpy_array_equal(indexer1, indexer2)