from __future__ import annotations from datetime import ( datetime, timedelta, ) from typing import Hashable import numpy as np from pandas._libs import index as libindex from pandas._libs.tslibs import ( BaseOffset, NaT, Period, Resolution, Tick, ) from pandas._typing import ( Dtype, DtypeObj, npt, ) from pandas.util._decorators import ( cache_readonly, doc, ) from pandas.core.dtypes.common import is_integer from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import is_valid_na_for_dtype from pandas.core.arrays.period import ( PeriodArray, period_array, raise_on_incompatible, validate_dtype_freq, ) import pandas.core.common as com import pandas.core.indexes.base as ibase from pandas.core.indexes.base import maybe_extract_name from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin from pandas.core.indexes.datetimes import ( DatetimeIndex, Index, ) from pandas.core.indexes.extension import inherit_names _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"}) _shared_doc_kwargs = { "klass": "PeriodArray", } # --- Period index sketch def _new_PeriodIndex(cls, **d): # GH13277 for unpickling values = d.pop("data") if values.dtype == "int64": freq = d.pop("freq", None) values = PeriodArray(values, freq=freq) return cls._simple_new(values, **d) else: return cls(values, **d) @inherit_names( ["strftime", "start_time", "end_time"] + PeriodArray._field_ops, PeriodArray, wrap=True, ) @inherit_names(["is_leap_year", "_format_native_types"], PeriodArray) class PeriodIndex(DatetimeIndexOpsMixin): """ Immutable ndarray holding ordinal values indicating regular periods in time. Index keys are boxed to Period objects which carries the metadata (eg, frequency information). Parameters ---------- data : array-like (1d int np.ndarray or PeriodArray), optional Optional period-like data to construct index with. copy : bool Make a copy of input ndarray. freq : str or period object, optional One of pandas period strings or corresponding objects. year : int, array, or Series, default None month : int, array, or Series, default None quarter : int, array, or Series, default None day : int, array, or Series, default None hour : int, array, or Series, default None minute : int, array, or Series, default None second : int, array, or Series, default None dtype : str or PeriodDtype, default None Attributes ---------- day dayofweek day_of_week dayofyear day_of_year days_in_month daysinmonth end_time freq freqstr hour is_leap_year minute month quarter qyear second start_time week weekday weekofyear year Methods ------- asfreq strftime to_timestamp See Also -------- Index : The base pandas Index type. Period : Represents a period of time. DatetimeIndex : Index with datetime64 data. TimedeltaIndex : Index of timedelta64 data. period_range : Create a fixed-frequency PeriodIndex. Examples -------- >>> idx = pd.PeriodIndex(year=[2000, 2002], quarter=[1, 3]) >>> idx PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]') """ _typ = "periodindex" _data: PeriodArray freq: BaseOffset dtype: PeriodDtype _data_cls = PeriodArray _supports_partial_string_indexing = True @property def _engine_type(self) -> type[libindex.PeriodEngine]: return libindex.PeriodEngine @cache_readonly def _resolution_obj(self) -> Resolution: # for compat with DatetimeIndex return self.dtype._resolution_obj # -------------------------------------------------------------------- # methods that dispatch to array and wrap result in Index # These are defined here instead of via inherit_names for mypy @doc( PeriodArray.asfreq, other="pandas.arrays.PeriodArray", other_name="PeriodArray", **_shared_doc_kwargs, ) def asfreq(self, freq=None, how: str = "E") -> PeriodIndex: arr = self._data.asfreq(freq, how) return type(self)._simple_new(arr, name=self.name) @doc(PeriodArray.to_timestamp) def to_timestamp(self, freq=None, how: str = "start") -> DatetimeIndex: arr = self._data.to_timestamp(freq, how) return DatetimeIndex._simple_new(arr, name=self.name) @property @doc(PeriodArray.hour.fget) def hour(self) -> Index: return Index(self._data.hour, name=self.name) @property @doc(PeriodArray.minute.fget) def minute(self) -> Index: return Index(self._data.minute, name=self.name) @property @doc(PeriodArray.second.fget) def second(self) -> Index: return Index(self._data.second, name=self.name) # ------------------------------------------------------------------------ # Index Constructors def __new__( cls, data=None, ordinal=None, freq=None, dtype: Dtype | None = None, copy: bool = False, name: Hashable = None, **fields, ) -> PeriodIndex: valid_field_set = { "year", "month", "day", "quarter", "hour", "minute", "second", } refs = None if not copy and isinstance(data, (Index, ABCSeries)): refs = data._references if not set(fields).issubset(valid_field_set): argument = list(set(fields) - valid_field_set)[0] raise TypeError(f"__new__() got an unexpected keyword argument {argument}") name = maybe_extract_name(name, data, cls) if data is None and ordinal is None: # range-based. if not fields: # test_pickle_compat_construction cls._raise_scalar_data_error(None) data, freq2 = PeriodArray._generate_range(None, None, None, freq, fields) # PeriodArray._generate range does validation that fields is # empty when really using the range-based constructor. freq = freq2 data = PeriodArray(data, freq=freq) else: freq = validate_dtype_freq(dtype, freq) # PeriodIndex allow PeriodIndex(period_index, freq=different) # Let's not encourage that kind of behavior in PeriodArray. if freq and isinstance(data, cls) and data.freq != freq: # TODO: We can do some of these with no-copy / coercion? # e.g. D -> 2D seems to be OK data = data.asfreq(freq) if data is None and ordinal is not None: # we strangely ignore `ordinal` if data is passed. ordinal = np.asarray(ordinal, dtype=np.int64) data = PeriodArray(ordinal, freq=freq) else: # don't pass copy here, since we copy later. data = period_array(data=data, freq=freq) if copy: data = data.copy() return cls._simple_new(data, name=name, refs=refs) # ------------------------------------------------------------------------ # Data @property def values(self) -> np.ndarray: return np.asarray(self, dtype=object) def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]: """ Convert timedelta-like input to an integer multiple of self.freq Parameters ---------- other : timedelta, np.timedelta64, DateOffset, int, np.ndarray Returns ------- converted : int, np.ndarray[int64] Raises ------ IncompatibleFrequency : if the input cannot be written as a multiple of self.freq. Note IncompatibleFrequency subclasses ValueError. """ if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)): if isinstance(self.freq, Tick): # _check_timedeltalike_freq_compat will raise if incompatible delta = self._data._check_timedeltalike_freq_compat(other) return delta elif isinstance(other, BaseOffset): if other.base == self.freq.base: return other.n raise raise_on_incompatible(self, other) elif is_integer(other): # integer is passed to .shift via # _add_datetimelike_methods basically # but ufunc may pass integer to _add_delta return other # raise when input doesn't have freq raise raise_on_incompatible(self, None) def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: """ Can we compare values of the given dtype to our own? """ if not isinstance(dtype, PeriodDtype): return False # For the subset of DateOffsets that can be a dtype.freq, it # suffices (and is much faster) to compare the dtype_code rather than # the freq itself. # See also: PeriodDtype.__eq__ freq = dtype.freq own_freq = self.freq return ( freq._period_dtype_code # error: "BaseOffset" has no attribute "_period_dtype_code" == own_freq._period_dtype_code # type: ignore[attr-defined] and freq.n == own_freq.n ) # ------------------------------------------------------------------------ # Index Methods def asof_locs(self, where: Index, mask: npt.NDArray[np.bool_]) -> np.ndarray: """ where : array of timestamps mask : np.ndarray[bool] Array of booleans where data is not NA. """ if isinstance(where, DatetimeIndex): where = PeriodIndex(where._values, freq=self.freq) elif not isinstance(where, PeriodIndex): raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex") return super().asof_locs(where, mask) @property def is_full(self) -> bool: """ Returns True if this PeriodIndex is range-like in that all Periods between start and end are present, in order. """ if len(self) == 0: return True if not self.is_monotonic_increasing: raise ValueError("Index is not monotonic") values = self.asi8 return bool(((values[1:] - values[:-1]) < 2).all()) @property def inferred_type(self) -> str: # b/c data is represented as ints make sure we can't have ambiguous # indexing return "period" # ------------------------------------------------------------------------ # Indexing Methods def _convert_tolerance(self, tolerance, target): # Returned tolerance must be in dtype/units so that # `|self._get_engine_target() - target._engine_target()| <= tolerance` # is meaningful. Since PeriodIndex returns int64 for engine_target, # we may need to convert timedelta64 tolerance to int64. tolerance = super()._convert_tolerance(tolerance, target) if self.dtype == target.dtype: # convert tolerance to i8 tolerance = self._maybe_convert_timedelta(tolerance) return tolerance def get_loc(self, key): """ Get integer location for requested label. Parameters ---------- key : Period, NaT, str, or datetime String or datetime key must be parsable as Period. Returns ------- loc : int or ndarray[int64] Raises ------ KeyError Key is not present in the index. TypeError If key is listlike or otherwise not hashable. """ orig_key = key self._check_indexing_error(key) if is_valid_na_for_dtype(key, self.dtype): key = NaT elif isinstance(key, str): try: parsed, reso = self._parse_with_reso(key) except ValueError as err: # A string with invalid format raise KeyError(f"Cannot interpret '{key}' as period") from err if self._can_partial_date_slice(reso): try: return self._partial_date_slice(reso, parsed) except KeyError as err: raise KeyError(key) from err if reso == self._resolution_obj: # the reso < self._resolution_obj case goes # through _get_string_slice key = self._cast_partial_indexing_scalar(parsed) else: raise KeyError(key) elif isinstance(key, Period): self._disallow_mismatched_indexing(key) elif isinstance(key, datetime): key = self._cast_partial_indexing_scalar(key) else: # in particular integer, which Period constructor would cast to string raise KeyError(key) try: return Index.get_loc(self, key) except KeyError as err: raise KeyError(orig_key) from err def _disallow_mismatched_indexing(self, key: Period) -> None: sfreq = self.freq kfreq = key.freq if not ( sfreq.n == kfreq.n # error: "BaseOffset" has no attribute "_period_dtype_code" and sfreq._period_dtype_code # type: ignore[attr-defined] # error: "BaseOffset" has no attribute "_period_dtype_code" == kfreq._period_dtype_code # type: ignore[attr-defined] ): # GH#42247 For the subset of DateOffsets that can be Period freqs, # checking these two attributes is sufficient to check equality, # and much more performant than `self.freq == key.freq` raise KeyError(key) def _cast_partial_indexing_scalar(self, label: datetime) -> Period: try: period = Period(label, freq=self.freq) except ValueError as err: # we cannot construct the Period raise KeyError(label) from err return period @doc(DatetimeIndexOpsMixin._maybe_cast_slice_bound) def _maybe_cast_slice_bound(self, label, side: str): if isinstance(label, datetime): label = self._cast_partial_indexing_scalar(label) return super()._maybe_cast_slice_bound(label, side) def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime): iv = Period(parsed, freq=reso.attr_abbrev) return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end")) @doc(DatetimeIndexOpsMixin.shift) def shift(self, periods: int = 1, freq=None): if freq is not None: raise TypeError( f"`freq` argument is not supported for {type(self).__name__}.shift" ) return self + periods def period_range( start=None, end=None, periods: int | None = None, freq=None, name=None ) -> PeriodIndex: """ Return a fixed frequency PeriodIndex. The day (calendar) is the default frequency. Parameters ---------- start : str or period-like, default None Left bound for generating periods. end : str or period-like, default None Right bound for generating periods. periods : int, default None Number of periods to generate. freq : str or DateOffset, optional Frequency alias. By default the freq is taken from `start` or `end` if those are Period objects. Otherwise, the default is ``"D"`` for daily frequency. name : str, default None Name of the resulting PeriodIndex. Returns ------- PeriodIndex Notes ----- Of the three parameters: ``start``, ``end``, and ``periods``, exactly two must be specified. To learn more about the frequency strings, please see `this link `__. Examples -------- >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M') PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', '2018-01'], dtype='period[M]') If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor endpoints for a ``PeriodIndex`` with frequency matching that of the ``period_range`` constructor. >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'), ... end=pd.Period('2017Q2', freq='Q'), freq='M') PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'], dtype='period[M]') """ if com.count_not_none(start, end, periods) != 2: raise ValueError( "Of the three parameters: start, end, and periods, " "exactly two must be specified" ) if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)): freq = "D" data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={}) data = PeriodArray(data, freq=freq) return PeriodIndex(data, name=name)