import numpy as np import pytest from pandas._libs.tslibs.period import IncompatibleFrequency import pandas.util._test_decorators as td import pandas as pd from pandas import ( DataFrame, DatetimeIndex, Index, NaT, Period, PeriodIndex, Series, date_range, offsets, period_range, ) import pandas._testing as tm from ..datetimelike import DatetimeLike class TestPeriodIndex(DatetimeLike): _holder = PeriodIndex @pytest.fixture( params=[ tm.makePeriodIndex(10), period_range("20130101", periods=10, freq="D")[::-1], ], ids=["index_inc", "index_dec"], ) def index(self, request): return request.param def create_index(self) -> PeriodIndex: return period_range("20130101", periods=5, freq="D") def test_pickle_compat_construction(self): pass @pytest.mark.parametrize("freq", ["D", "M", "A"]) def test_pickle_round_trip(self, freq): idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq=freq) result = tm.round_trip_pickle(idx) tm.assert_index_equal(result, idx) def test_where(self): # This is handled in test_indexing pass @pytest.mark.parametrize("use_numpy", [True, False]) @pytest.mark.parametrize( "index", [ period_range("2000-01-01", periods=3, freq="D"), period_range("2001-01-01", periods=3, freq="2D"), PeriodIndex(["2001-01", "NaT", "2003-01"], freq="M"), ], ) def test_repeat_freqstr(self, index, use_numpy): # GH10183 expected = PeriodIndex([p for p in index for _ in range(3)]) result = np.repeat(index, 3) if use_numpy else index.repeat(3) tm.assert_index_equal(result, expected) assert result.freqstr == index.freqstr def test_no_millisecond_field(self): msg = "type object 'DatetimeIndex' has no attribute 'millisecond'" with pytest.raises(AttributeError, match=msg): DatetimeIndex.millisecond msg = "'DatetimeIndex' object has no attribute 'millisecond'" with pytest.raises(AttributeError, match=msg): DatetimeIndex([]).millisecond def test_make_time_series(self): index = period_range(freq="A", start="1/1/2001", end="12/1/2009") series = Series(1, index=index) assert isinstance(series, Series) def test_shallow_copy_empty(self): # GH13067 idx = PeriodIndex([], freq="M") result = idx._shallow_copy() expected = idx tm.assert_index_equal(result, expected) def test_shallow_copy_disallow_i8(self): # GH-24391 pi = period_range("2018-01-01", periods=3, freq="2D") with pytest.raises(AssertionError, match="ndarray"): pi._shallow_copy(pi.asi8) def test_shallow_copy_requires_disallow_period_index(self): pi = period_range("2018-01-01", periods=3, freq="2D") with pytest.raises(AssertionError, match="PeriodIndex"): pi._shallow_copy(pi) def test_view_asi8(self): idx = PeriodIndex([], freq="M") exp = np.array([], dtype=np.int64) tm.assert_numpy_array_equal(idx.view("i8"), exp) tm.assert_numpy_array_equal(idx.asi8, exp) idx = PeriodIndex(["2011-01", NaT], freq="M") exp = np.array([492, -9223372036854775808], dtype=np.int64) tm.assert_numpy_array_equal(idx.view("i8"), exp) tm.assert_numpy_array_equal(idx.asi8, exp) exp = np.array([14975, -9223372036854775808], dtype=np.int64) idx = PeriodIndex(["2011-01-01", NaT], freq="D") tm.assert_numpy_array_equal(idx.view("i8"), exp) tm.assert_numpy_array_equal(idx.asi8, exp) def test_values(self): idx = PeriodIndex([], freq="M") exp = np.array([], dtype=object) tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.to_numpy(), exp) exp = np.array([], dtype=np.int64) tm.assert_numpy_array_equal(idx.asi8, exp) idx = PeriodIndex(["2011-01", NaT], freq="M") exp = np.array([Period("2011-01", freq="M"), NaT], dtype=object) tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.to_numpy(), exp) exp = np.array([492, -9223372036854775808], dtype=np.int64) tm.assert_numpy_array_equal(idx.asi8, exp) idx = PeriodIndex(["2011-01-01", NaT], freq="D") exp = np.array([Period("2011-01-01", freq="D"), NaT], dtype=object) tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.to_numpy(), exp) exp = np.array([14975, -9223372036854775808], dtype=np.int64) tm.assert_numpy_array_equal(idx.asi8, exp) def test_period_index_length(self): pi = period_range(freq="A", start="1/1/2001", end="12/1/2009") assert len(pi) == 9 pi = period_range(freq="Q", start="1/1/2001", end="12/1/2009") assert len(pi) == 4 * 9 pi = period_range(freq="M", start="1/1/2001", end="12/1/2009") assert len(pi) == 12 * 9 start = Period("02-Apr-2005", "B") i1 = period_range(start=start, periods=20) assert len(i1) == 20 assert i1.freq == start.freq assert i1[0] == start end_intv = Period("2006-12-31", "W") i1 = period_range(end=end_intv, periods=10) assert len(i1) == 10 assert i1.freq == end_intv.freq assert i1[-1] == end_intv end_intv = Period("2006-12-31", "1w") i2 = period_range(end=end_intv, periods=10) assert len(i1) == len(i2) assert (i1 == i2).all() assert i1.freq == i2.freq msg = "start and end must have same freq" with pytest.raises(ValueError, match=msg): period_range(start=start, end=end_intv) end_intv = Period("2005-05-01", "B") i1 = period_range(start=start, end=end_intv) msg = ( "Of the three parameters: start, end, and periods, exactly two " "must be specified" ) with pytest.raises(ValueError, match=msg): period_range(start=start) # infer freq from first element i2 = PeriodIndex([end_intv, Period("2005-05-05", "B")]) assert len(i2) == 2 assert i2[0] == end_intv i2 = PeriodIndex(np.array([end_intv, Period("2005-05-05", "B")])) assert len(i2) == 2 assert i2[0] == end_intv # Mixed freq should fail vals = [end_intv, Period("2006-12-31", "w")] msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)" with pytest.raises(IncompatibleFrequency, match=msg): PeriodIndex(vals) vals = np.array(vals) with pytest.raises(ValueError, match=msg): PeriodIndex(vals) def test_fields(self): # year, month, day, hour, minute # second, weekofyear, week, dayofweek, weekday, dayofyear, quarter # qyear pi = period_range(freq="A", start="1/1/2001", end="12/1/2005") self._check_all_fields(pi) pi = period_range(freq="Q", start="1/1/2001", end="12/1/2002") self._check_all_fields(pi) pi = period_range(freq="M", start="1/1/2001", end="1/1/2002") self._check_all_fields(pi) pi = period_range(freq="D", start="12/1/2001", end="6/1/2001") self._check_all_fields(pi) pi = period_range(freq="B", start="12/1/2001", end="6/1/2001") self._check_all_fields(pi) pi = period_range(freq="H", start="12/31/2001", end="1/1/2002 23:00") self._check_all_fields(pi) pi = period_range(freq="Min", start="12/31/2001", end="1/1/2002 00:20") self._check_all_fields(pi) pi = period_range( freq="S", start="12/31/2001 00:00:00", end="12/31/2001 00:05:00" ) self._check_all_fields(pi) end_intv = Period("2006-12-31", "W") i1 = period_range(end=end_intv, periods=10) self._check_all_fields(i1) def _check_all_fields(self, periodindex): fields = [ "year", "month", "day", "hour", "minute", "second", "weekofyear", "week", "dayofweek", "day_of_week", "dayofyear", "day_of_year", "quarter", "qyear", "days_in_month", ] periods = list(periodindex) s = Series(periodindex) for field in fields: field_idx = getattr(periodindex, field) assert len(periodindex) == len(field_idx) for x, val in zip(periods, field_idx): assert getattr(x, field) == val if len(s) == 0: continue field_s = getattr(s.dt, field) assert len(periodindex) == len(field_s) for x, val in zip(periods, field_s): assert getattr(x, field) == val def test_period_set_index_reindex(self): # GH 6631 df = DataFrame(np.random.random(6)) idx1 = period_range("2011/01/01", periods=6, freq="M") idx2 = period_range("2013", periods=6, freq="A") df = df.set_index(idx1) tm.assert_index_equal(df.index, idx1) df = df.set_index(idx2) tm.assert_index_equal(df.index, idx2) @pytest.mark.parametrize( "p_values, o_values, values, expected_values", [ ( [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")], [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC"), "All"], [1.0, 1.0], [1.0, 1.0, np.nan], ), ( [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")], [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")], [1.0, 1.0], [1.0, 1.0], ), ], ) def test_period_reindex_with_object( self, p_values, o_values, values, expected_values ): # GH 28337 period_index = PeriodIndex(p_values) object_index = Index(o_values) s = Series(values, index=period_index) result = s.reindex(object_index) expected = Series(expected_values, index=object_index) tm.assert_series_equal(result, expected) def test_is_(self): create_index = lambda: period_range(freq="A", start="1/1/2001", end="12/1/2009") index = create_index() assert index.is_(index) assert not index.is_(create_index()) assert index.is_(index.view()) assert index.is_(index.view().view().view().view().view()) assert index.view().is_(index) ind2 = index.view() index.name = "Apple" assert ind2.is_(index) assert not index.is_(index[:]) assert not index.is_(index.asfreq("M")) assert not index.is_(index.asfreq("A")) assert not index.is_(index - 2) assert not index.is_(index - 0) def test_periods_number_check(self): msg = ( "Of the three parameters: start, end, and periods, exactly two " "must be specified" ) with pytest.raises(ValueError, match=msg): period_range("2011-1-1", "2012-1-1", "B") def test_index_duplicate_periods(self): # monotonic idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN") ts = Series(np.random.randn(len(idx)), index=idx) result = ts["2007"] expected = ts[1:3] tm.assert_series_equal(result, expected) result[:] = 1 assert (ts[1:3] == 1).all() # not monotonic idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN") ts = Series(np.random.randn(len(idx)), index=idx) result = ts["2007"] expected = ts[idx == "2007"] tm.assert_series_equal(result, expected) def test_index_unique(self): idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN") expected = PeriodIndex([2000, 2007, 2009], freq="A-JUN") tm.assert_index_equal(idx.unique(), expected) assert idx.nunique() == 3 idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN", tz="US/Eastern") expected = PeriodIndex([2000, 2007, 2009], freq="A-JUN", tz="US/Eastern") tm.assert_index_equal(idx.unique(), expected) assert idx.nunique() == 3 def test_shift(self): # This is tested in test_arithmetic pass @td.skip_if_32bit def test_ndarray_compat_properties(self): super().test_ndarray_compat_properties() def test_negative_ordinals(self): Period(ordinal=-1000, freq="A") Period(ordinal=0, freq="A") idx1 = PeriodIndex(ordinal=[-1, 0, 1], freq="A") idx2 = PeriodIndex(ordinal=np.array([-1, 0, 1]), freq="A") tm.assert_index_equal(idx1, idx2) def test_pindex_fieldaccessor_nat(self): idx = PeriodIndex( ["2011-01", "2011-02", "NaT", "2012-03", "2012-04"], freq="D", name="name" ) exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64, name="name") tm.assert_index_equal(idx.year, exp) exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name="name") tm.assert_index_equal(idx.month, exp) def test_pindex_qaccess(self): pi = PeriodIndex(["2Q05", "3Q05", "4Q05", "1Q06", "2Q06"], freq="Q") s = Series(np.random.rand(len(pi)), index=pi).cumsum() # Todo: fix these accessors! assert s["05Q4"] == s[2] def test_pindex_multiples(self): expected = PeriodIndex( ["2011-01", "2011-03", "2011-05", "2011-07", "2011-09", "2011-11"], freq="2M", ) pi = period_range(start="1/1/11", end="12/31/11", freq="2M") tm.assert_index_equal(pi, expected) assert pi.freq == offsets.MonthEnd(2) assert pi.freqstr == "2M" pi = period_range(start="1/1/11", periods=6, freq="2M") tm.assert_index_equal(pi, expected) assert pi.freq == offsets.MonthEnd(2) assert pi.freqstr == "2M" def test_iteration(self): index = period_range(start="1/1/10", periods=4, freq="B") result = list(index) assert isinstance(result[0], Period) assert result[0].freq == index.freq def test_is_full(self): index = PeriodIndex([2005, 2007, 2009], freq="A") assert not index.is_full index = PeriodIndex([2005, 2006, 2007], freq="A") assert index.is_full index = PeriodIndex([2005, 2005, 2007], freq="A") assert not index.is_full index = PeriodIndex([2005, 2005, 2006], freq="A") assert index.is_full index = PeriodIndex([2006, 2005, 2005], freq="A") with pytest.raises(ValueError, match="Index is not monotonic"): index.is_full assert index[:0].is_full def test_with_multi_index(self): # #1705 index = date_range("1/1/2012", periods=4, freq="12H") index_as_arrays = [index.to_period(freq="D"), index.hour] s = Series([0, 1, 2, 3], index_as_arrays) assert isinstance(s.index.levels[0], PeriodIndex) assert isinstance(s.index.values[0][0], Period) def test_convert_array_of_periods(self): rng = period_range("1/1/2000", periods=20, freq="D") periods = list(rng) result = Index(periods) assert isinstance(result, PeriodIndex) def test_append_concat(self): # #1815 d1 = date_range("12/31/1990", "12/31/1999", freq="A-DEC") d2 = date_range("12/31/2000", "12/31/2009", freq="A-DEC") s1 = Series(np.random.randn(10), d1) s2 = Series(np.random.randn(10), d2) s1 = s1.to_period() s2 = s2.to_period() # drops index result = pd.concat([s1, s2]) assert isinstance(result.index, PeriodIndex) assert result.index[0] == s1.index[0] def test_pickle_freq(self): # GH2891 prng = period_range("1/1/2011", "1/1/2012", freq="M") new_prng = tm.round_trip_pickle(prng) assert new_prng.freq == offsets.MonthEnd() assert new_prng.freqstr == "M" def test_map(self): # test_map_dictlike generally tests index = PeriodIndex([2005, 2007, 2009], freq="A") result = index.map(lambda x: x.ordinal) exp = Index([x.ordinal for x in index]) tm.assert_index_equal(result, exp) def test_insert(self): # GH 18295 (test missing) expected = PeriodIndex(["2017Q1", NaT, "2017Q2", "2017Q3", "2017Q4"], freq="Q") for na in (np.nan, NaT, None): result = period_range("2017Q1", periods=4, freq="Q").insert(1, na) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "msg, key", [ (r"Period\('2019', 'A-DEC'\), 'foo', 'bar'", (Period(2019), "foo", "bar")), (r"Period\('2019', 'A-DEC'\), 'y1', 'bar'", (Period(2019), "y1", "bar")), (r"Period\('2019', 'A-DEC'\), 'foo', 'z1'", (Period(2019), "foo", "z1")), ( r"Period\('2018', 'A-DEC'\), Period\('2016', 'A-DEC'\), 'bar'", (Period(2018), Period(2016), "bar"), ), (r"Period\('2018', 'A-DEC'\), 'foo', 'y1'", (Period(2018), "foo", "y1")), ( r"Period\('2017', 'A-DEC'\), 'foo', Period\('2015', 'A-DEC'\)", (Period(2017), "foo", Period(2015)), ), (r"Period\('2017', 'A-DEC'\), 'z1', 'bar'", (Period(2017), "z1", "bar")), ], ) def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key): # issue 20684 """ parse_time_string return parameter if type not matched. PeriodIndex.get_loc takes returned value from parse_time_string as a tuple. If first argument is Period and a tuple has 3 items, process go on not raise exception """ df = DataFrame( { "A": [Period(2019), "x1", "x2"], "B": [Period(2018), Period(2016), "y1"], "C": [Period(2017), "z1", Period(2015)], "V1": [1, 2, 3], "V2": [10, 20, 30], } ).set_index(["A", "B", "C"]) with pytest.raises(KeyError, match=msg): df.loc[key] def test_format_empty(self): # GH35712 empty_idx = self._holder([], freq="A") assert empty_idx.format() == [] assert empty_idx.format(name=True) == [""] def test_maybe_convert_timedelta(): pi = PeriodIndex(["2000", "2001"], freq="D") offset = offsets.Day(2) assert pi._maybe_convert_timedelta(offset) == 2 assert pi._maybe_convert_timedelta(2) == 2 offset = offsets.BusinessDay() msg = r"Input has different freq=B from PeriodIndex\(freq=D\)" with pytest.raises(ValueError, match=msg): pi._maybe_convert_timedelta(offset) def test_is_monotonic_with_nat(): # GH#31437 # PeriodIndex.is_monotonic should behave analogously to DatetimeIndex, # in particular never be monotonic when we have NaT dti = date_range("2016-01-01", periods=3) pi = dti.to_period("D") tdi = Index(dti.view("timedelta64[ns]")) for obj in [pi, pi._engine, dti, dti._engine, tdi, tdi._engine]: if isinstance(obj, Index): # i.e. not Engines assert obj.is_monotonic assert obj.is_monotonic_increasing assert not obj.is_monotonic_decreasing assert obj.is_unique dti1 = dti.insert(0, NaT) pi1 = dti1.to_period("D") tdi1 = Index(dti1.view("timedelta64[ns]")) for obj in [pi1, pi1._engine, dti1, dti1._engine, tdi1, tdi1._engine]: if isinstance(obj, Index): # i.e. not Engines assert not obj.is_monotonic assert not obj.is_monotonic_increasing assert not obj.is_monotonic_decreasing assert obj.is_unique dti2 = dti.insert(3, NaT) pi2 = dti2.to_period("H") tdi2 = Index(dti2.view("timedelta64[ns]")) for obj in [pi2, pi2._engine, dti2, dti2._engine, tdi2, tdi2._engine]: if isinstance(obj, Index): # i.e. not Engines assert not obj.is_monotonic assert not obj.is_monotonic_increasing assert not obj.is_monotonic_decreasing assert obj.is_unique @pytest.mark.parametrize("array", [True, False]) def test_dunder_array(array): obj = PeriodIndex(["2000-01-01", "2001-01-01"], freq="D") if array: obj = obj._data expected = np.array([obj[0], obj[1]], dtype=object) result = np.array(obj) tm.assert_numpy_array_equal(result, expected) result = np.asarray(obj) tm.assert_numpy_array_equal(result, expected) expected = obj.asi8 for dtype in ["i8", "int64", np.int64]: result = np.array(obj, dtype=dtype) tm.assert_numpy_array_equal(result, expected) result = np.asarray(obj, dtype=dtype) tm.assert_numpy_array_equal(result, expected) for dtype in ["float64", "int32", "uint64"]: msg = "argument must be" with pytest.raises(TypeError, match=msg): np.array(obj, dtype=dtype) with pytest.raises(TypeError, match=msg): np.array(obj, dtype=getattr(np, dtype))