548 lines
17 KiB
Python
548 lines
17 KiB
Python
|
from __future__ import annotations
|
||
|
|
||
|
from datetime import (
|
||
|
datetime,
|
||
|
timedelta,
|
||
|
)
|
||
|
from typing import Hashable
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
from pandas._libs import index as libindex
|
||
|
from pandas._libs.tslibs import (
|
||
|
BaseOffset,
|
||
|
NaT,
|
||
|
Period,
|
||
|
Resolution,
|
||
|
Tick,
|
||
|
)
|
||
|
from pandas._typing import (
|
||
|
Dtype,
|
||
|
DtypeObj,
|
||
|
npt,
|
||
|
)
|
||
|
from pandas.util._decorators import (
|
||
|
cache_readonly,
|
||
|
doc,
|
||
|
)
|
||
|
|
||
|
from pandas.core.dtypes.common import is_integer
|
||
|
from pandas.core.dtypes.dtypes import PeriodDtype
|
||
|
from pandas.core.dtypes.generic import ABCSeries
|
||
|
from pandas.core.dtypes.missing import is_valid_na_for_dtype
|
||
|
|
||
|
from pandas.core.arrays.period import (
|
||
|
PeriodArray,
|
||
|
period_array,
|
||
|
raise_on_incompatible,
|
||
|
validate_dtype_freq,
|
||
|
)
|
||
|
import pandas.core.common as com
|
||
|
import pandas.core.indexes.base as ibase
|
||
|
from pandas.core.indexes.base import maybe_extract_name
|
||
|
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
|
||
|
from pandas.core.indexes.datetimes import (
|
||
|
DatetimeIndex,
|
||
|
Index,
|
||
|
)
|
||
|
from pandas.core.indexes.extension import inherit_names
|
||
|
|
||
|
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
|
||
|
_index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"})
|
||
|
_shared_doc_kwargs = {
|
||
|
"klass": "PeriodArray",
|
||
|
}
|
||
|
|
||
|
# --- Period index sketch
|
||
|
|
||
|
|
||
|
def _new_PeriodIndex(cls, **d):
|
||
|
# GH13277 for unpickling
|
||
|
values = d.pop("data")
|
||
|
if values.dtype == "int64":
|
||
|
freq = d.pop("freq", None)
|
||
|
values = PeriodArray(values, freq=freq)
|
||
|
return cls._simple_new(values, **d)
|
||
|
else:
|
||
|
return cls(values, **d)
|
||
|
|
||
|
|
||
|
@inherit_names(
|
||
|
["strftime", "start_time", "end_time"] + PeriodArray._field_ops,
|
||
|
PeriodArray,
|
||
|
wrap=True,
|
||
|
)
|
||
|
@inherit_names(["is_leap_year", "_format_native_types"], PeriodArray)
|
||
|
class PeriodIndex(DatetimeIndexOpsMixin):
|
||
|
"""
|
||
|
Immutable ndarray holding ordinal values indicating regular periods in time.
|
||
|
|
||
|
Index keys are boxed to Period objects which carries the metadata (eg,
|
||
|
frequency information).
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
data : array-like (1d int np.ndarray or PeriodArray), optional
|
||
|
Optional period-like data to construct index with.
|
||
|
copy : bool
|
||
|
Make a copy of input ndarray.
|
||
|
freq : str or period object, optional
|
||
|
One of pandas period strings or corresponding objects.
|
||
|
year : int, array, or Series, default None
|
||
|
month : int, array, or Series, default None
|
||
|
quarter : int, array, or Series, default None
|
||
|
day : int, array, or Series, default None
|
||
|
hour : int, array, or Series, default None
|
||
|
minute : int, array, or Series, default None
|
||
|
second : int, array, or Series, default None
|
||
|
dtype : str or PeriodDtype, default None
|
||
|
|
||
|
Attributes
|
||
|
----------
|
||
|
day
|
||
|
dayofweek
|
||
|
day_of_week
|
||
|
dayofyear
|
||
|
day_of_year
|
||
|
days_in_month
|
||
|
daysinmonth
|
||
|
end_time
|
||
|
freq
|
||
|
freqstr
|
||
|
hour
|
||
|
is_leap_year
|
||
|
minute
|
||
|
month
|
||
|
quarter
|
||
|
qyear
|
||
|
second
|
||
|
start_time
|
||
|
week
|
||
|
weekday
|
||
|
weekofyear
|
||
|
year
|
||
|
|
||
|
Methods
|
||
|
-------
|
||
|
asfreq
|
||
|
strftime
|
||
|
to_timestamp
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Index : The base pandas Index type.
|
||
|
Period : Represents a period of time.
|
||
|
DatetimeIndex : Index with datetime64 data.
|
||
|
TimedeltaIndex : Index of timedelta64 data.
|
||
|
period_range : Create a fixed-frequency PeriodIndex.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.PeriodIndex(year=[2000, 2002], quarter=[1, 3])
|
||
|
>>> idx
|
||
|
PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]')
|
||
|
"""
|
||
|
|
||
|
_typ = "periodindex"
|
||
|
|
||
|
_data: PeriodArray
|
||
|
freq: BaseOffset
|
||
|
dtype: PeriodDtype
|
||
|
|
||
|
_data_cls = PeriodArray
|
||
|
_supports_partial_string_indexing = True
|
||
|
|
||
|
@property
|
||
|
def _engine_type(self) -> type[libindex.PeriodEngine]:
|
||
|
return libindex.PeriodEngine
|
||
|
|
||
|
@cache_readonly
|
||
|
def _resolution_obj(self) -> Resolution:
|
||
|
# for compat with DatetimeIndex
|
||
|
return self.dtype._resolution_obj
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# methods that dispatch to array and wrap result in Index
|
||
|
# These are defined here instead of via inherit_names for mypy
|
||
|
|
||
|
@doc(
|
||
|
PeriodArray.asfreq,
|
||
|
other="pandas.arrays.PeriodArray",
|
||
|
other_name="PeriodArray",
|
||
|
**_shared_doc_kwargs,
|
||
|
)
|
||
|
def asfreq(self, freq=None, how: str = "E") -> PeriodIndex:
|
||
|
arr = self._data.asfreq(freq, how)
|
||
|
return type(self)._simple_new(arr, name=self.name)
|
||
|
|
||
|
@doc(PeriodArray.to_timestamp)
|
||
|
def to_timestamp(self, freq=None, how: str = "start") -> DatetimeIndex:
|
||
|
arr = self._data.to_timestamp(freq, how)
|
||
|
return DatetimeIndex._simple_new(arr, name=self.name)
|
||
|
|
||
|
@property
|
||
|
@doc(PeriodArray.hour.fget)
|
||
|
def hour(self) -> Index:
|
||
|
return Index(self._data.hour, name=self.name)
|
||
|
|
||
|
@property
|
||
|
@doc(PeriodArray.minute.fget)
|
||
|
def minute(self) -> Index:
|
||
|
return Index(self._data.minute, name=self.name)
|
||
|
|
||
|
@property
|
||
|
@doc(PeriodArray.second.fget)
|
||
|
def second(self) -> Index:
|
||
|
return Index(self._data.second, name=self.name)
|
||
|
|
||
|
# ------------------------------------------------------------------------
|
||
|
# Index Constructors
|
||
|
|
||
|
def __new__(
|
||
|
cls,
|
||
|
data=None,
|
||
|
ordinal=None,
|
||
|
freq=None,
|
||
|
dtype: Dtype | None = None,
|
||
|
copy: bool = False,
|
||
|
name: Hashable = None,
|
||
|
**fields,
|
||
|
) -> PeriodIndex:
|
||
|
valid_field_set = {
|
||
|
"year",
|
||
|
"month",
|
||
|
"day",
|
||
|
"quarter",
|
||
|
"hour",
|
||
|
"minute",
|
||
|
"second",
|
||
|
}
|
||
|
|
||
|
refs = None
|
||
|
if not copy and isinstance(data, (Index, ABCSeries)):
|
||
|
refs = data._references
|
||
|
|
||
|
if not set(fields).issubset(valid_field_set):
|
||
|
argument = list(set(fields) - valid_field_set)[0]
|
||
|
raise TypeError(f"__new__() got an unexpected keyword argument {argument}")
|
||
|
|
||
|
name = maybe_extract_name(name, data, cls)
|
||
|
|
||
|
if data is None and ordinal is None:
|
||
|
# range-based.
|
||
|
if not fields:
|
||
|
# test_pickle_compat_construction
|
||
|
cls._raise_scalar_data_error(None)
|
||
|
|
||
|
data, freq2 = PeriodArray._generate_range(None, None, None, freq, fields)
|
||
|
# PeriodArray._generate range does validation that fields is
|
||
|
# empty when really using the range-based constructor.
|
||
|
freq = freq2
|
||
|
|
||
|
data = PeriodArray(data, freq=freq)
|
||
|
else:
|
||
|
freq = validate_dtype_freq(dtype, freq)
|
||
|
|
||
|
# PeriodIndex allow PeriodIndex(period_index, freq=different)
|
||
|
# Let's not encourage that kind of behavior in PeriodArray.
|
||
|
|
||
|
if freq and isinstance(data, cls) and data.freq != freq:
|
||
|
# TODO: We can do some of these with no-copy / coercion?
|
||
|
# e.g. D -> 2D seems to be OK
|
||
|
data = data.asfreq(freq)
|
||
|
|
||
|
if data is None and ordinal is not None:
|
||
|
# we strangely ignore `ordinal` if data is passed.
|
||
|
ordinal = np.asarray(ordinal, dtype=np.int64)
|
||
|
data = PeriodArray(ordinal, freq=freq)
|
||
|
else:
|
||
|
# don't pass copy here, since we copy later.
|
||
|
data = period_array(data=data, freq=freq)
|
||
|
|
||
|
if copy:
|
||
|
data = data.copy()
|
||
|
|
||
|
return cls._simple_new(data, name=name, refs=refs)
|
||
|
|
||
|
# ------------------------------------------------------------------------
|
||
|
# Data
|
||
|
|
||
|
@property
|
||
|
def values(self) -> np.ndarray:
|
||
|
return np.asarray(self, dtype=object)
|
||
|
|
||
|
def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]:
|
||
|
"""
|
||
|
Convert timedelta-like input to an integer multiple of self.freq
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
other : timedelta, np.timedelta64, DateOffset, int, np.ndarray
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
converted : int, np.ndarray[int64]
|
||
|
|
||
|
Raises
|
||
|
------
|
||
|
IncompatibleFrequency : if the input cannot be written as a multiple
|
||
|
of self.freq. Note IncompatibleFrequency subclasses ValueError.
|
||
|
"""
|
||
|
if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)):
|
||
|
if isinstance(self.freq, Tick):
|
||
|
# _check_timedeltalike_freq_compat will raise if incompatible
|
||
|
delta = self._data._check_timedeltalike_freq_compat(other)
|
||
|
return delta
|
||
|
elif isinstance(other, BaseOffset):
|
||
|
if other.base == self.freq.base:
|
||
|
return other.n
|
||
|
|
||
|
raise raise_on_incompatible(self, other)
|
||
|
elif is_integer(other):
|
||
|
# integer is passed to .shift via
|
||
|
# _add_datetimelike_methods basically
|
||
|
# but ufunc may pass integer to _add_delta
|
||
|
return other
|
||
|
|
||
|
# raise when input doesn't have freq
|
||
|
raise raise_on_incompatible(self, None)
|
||
|
|
||
|
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
|
||
|
"""
|
||
|
Can we compare values of the given dtype to our own?
|
||
|
"""
|
||
|
if not isinstance(dtype, PeriodDtype):
|
||
|
return False
|
||
|
# For the subset of DateOffsets that can be a dtype.freq, it
|
||
|
# suffices (and is much faster) to compare the dtype_code rather than
|
||
|
# the freq itself.
|
||
|
# See also: PeriodDtype.__eq__
|
||
|
freq = dtype.freq
|
||
|
own_freq = self.freq
|
||
|
return (
|
||
|
freq._period_dtype_code
|
||
|
# error: "BaseOffset" has no attribute "_period_dtype_code"
|
||
|
== own_freq._period_dtype_code # type: ignore[attr-defined]
|
||
|
and freq.n == own_freq.n
|
||
|
)
|
||
|
|
||
|
# ------------------------------------------------------------------------
|
||
|
# Index Methods
|
||
|
|
||
|
def asof_locs(self, where: Index, mask: npt.NDArray[np.bool_]) -> np.ndarray:
|
||
|
"""
|
||
|
where : array of timestamps
|
||
|
mask : np.ndarray[bool]
|
||
|
Array of booleans where data is not NA.
|
||
|
"""
|
||
|
if isinstance(where, DatetimeIndex):
|
||
|
where = PeriodIndex(where._values, freq=self.freq)
|
||
|
elif not isinstance(where, PeriodIndex):
|
||
|
raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex")
|
||
|
|
||
|
return super().asof_locs(where, mask)
|
||
|
|
||
|
@property
|
||
|
def is_full(self) -> bool:
|
||
|
"""
|
||
|
Returns True if this PeriodIndex is range-like in that all Periods
|
||
|
between start and end are present, in order.
|
||
|
"""
|
||
|
if len(self) == 0:
|
||
|
return True
|
||
|
if not self.is_monotonic_increasing:
|
||
|
raise ValueError("Index is not monotonic")
|
||
|
values = self.asi8
|
||
|
return bool(((values[1:] - values[:-1]) < 2).all())
|
||
|
|
||
|
@property
|
||
|
def inferred_type(self) -> str:
|
||
|
# b/c data is represented as ints make sure we can't have ambiguous
|
||
|
# indexing
|
||
|
return "period"
|
||
|
|
||
|
# ------------------------------------------------------------------------
|
||
|
# Indexing Methods
|
||
|
|
||
|
def _convert_tolerance(self, tolerance, target):
|
||
|
# Returned tolerance must be in dtype/units so that
|
||
|
# `|self._get_engine_target() - target._engine_target()| <= tolerance`
|
||
|
# is meaningful. Since PeriodIndex returns int64 for engine_target,
|
||
|
# we may need to convert timedelta64 tolerance to int64.
|
||
|
tolerance = super()._convert_tolerance(tolerance, target)
|
||
|
|
||
|
if self.dtype == target.dtype:
|
||
|
# convert tolerance to i8
|
||
|
tolerance = self._maybe_convert_timedelta(tolerance)
|
||
|
|
||
|
return tolerance
|
||
|
|
||
|
def get_loc(self, key):
|
||
|
"""
|
||
|
Get integer location for requested label.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
key : Period, NaT, str, or datetime
|
||
|
String or datetime key must be parsable as Period.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
loc : int or ndarray[int64]
|
||
|
|
||
|
Raises
|
||
|
------
|
||
|
KeyError
|
||
|
Key is not present in the index.
|
||
|
TypeError
|
||
|
If key is listlike or otherwise not hashable.
|
||
|
"""
|
||
|
orig_key = key
|
||
|
|
||
|
self._check_indexing_error(key)
|
||
|
|
||
|
if is_valid_na_for_dtype(key, self.dtype):
|
||
|
key = NaT
|
||
|
|
||
|
elif isinstance(key, str):
|
||
|
try:
|
||
|
parsed, reso = self._parse_with_reso(key)
|
||
|
except ValueError as err:
|
||
|
# A string with invalid format
|
||
|
raise KeyError(f"Cannot interpret '{key}' as period") from err
|
||
|
|
||
|
if self._can_partial_date_slice(reso):
|
||
|
try:
|
||
|
return self._partial_date_slice(reso, parsed)
|
||
|
except KeyError as err:
|
||
|
raise KeyError(key) from err
|
||
|
|
||
|
if reso == self._resolution_obj:
|
||
|
# the reso < self._resolution_obj case goes
|
||
|
# through _get_string_slice
|
||
|
key = self._cast_partial_indexing_scalar(parsed)
|
||
|
else:
|
||
|
raise KeyError(key)
|
||
|
|
||
|
elif isinstance(key, Period):
|
||
|
self._disallow_mismatched_indexing(key)
|
||
|
|
||
|
elif isinstance(key, datetime):
|
||
|
key = self._cast_partial_indexing_scalar(key)
|
||
|
|
||
|
else:
|
||
|
# in particular integer, which Period constructor would cast to string
|
||
|
raise KeyError(key)
|
||
|
|
||
|
try:
|
||
|
return Index.get_loc(self, key)
|
||
|
except KeyError as err:
|
||
|
raise KeyError(orig_key) from err
|
||
|
|
||
|
def _disallow_mismatched_indexing(self, key: Period) -> None:
|
||
|
sfreq = self.freq
|
||
|
kfreq = key.freq
|
||
|
if not (
|
||
|
sfreq.n == kfreq.n
|
||
|
# error: "BaseOffset" has no attribute "_period_dtype_code"
|
||
|
and sfreq._period_dtype_code # type: ignore[attr-defined]
|
||
|
# error: "BaseOffset" has no attribute "_period_dtype_code"
|
||
|
== kfreq._period_dtype_code # type: ignore[attr-defined]
|
||
|
):
|
||
|
# GH#42247 For the subset of DateOffsets that can be Period freqs,
|
||
|
# checking these two attributes is sufficient to check equality,
|
||
|
# and much more performant than `self.freq == key.freq`
|
||
|
raise KeyError(key)
|
||
|
|
||
|
def _cast_partial_indexing_scalar(self, label: datetime) -> Period:
|
||
|
try:
|
||
|
period = Period(label, freq=self.freq)
|
||
|
except ValueError as err:
|
||
|
# we cannot construct the Period
|
||
|
raise KeyError(label) from err
|
||
|
return period
|
||
|
|
||
|
@doc(DatetimeIndexOpsMixin._maybe_cast_slice_bound)
|
||
|
def _maybe_cast_slice_bound(self, label, side: str):
|
||
|
if isinstance(label, datetime):
|
||
|
label = self._cast_partial_indexing_scalar(label)
|
||
|
|
||
|
return super()._maybe_cast_slice_bound(label, side)
|
||
|
|
||
|
def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
|
||
|
iv = Period(parsed, freq=reso.attr_abbrev)
|
||
|
return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end"))
|
||
|
|
||
|
@doc(DatetimeIndexOpsMixin.shift)
|
||
|
def shift(self, periods: int = 1, freq=None):
|
||
|
if freq is not None:
|
||
|
raise TypeError(
|
||
|
f"`freq` argument is not supported for {type(self).__name__}.shift"
|
||
|
)
|
||
|
return self + periods
|
||
|
|
||
|
|
||
|
def period_range(
|
||
|
start=None, end=None, periods: int | None = None, freq=None, name=None
|
||
|
) -> PeriodIndex:
|
||
|
"""
|
||
|
Return a fixed frequency PeriodIndex.
|
||
|
|
||
|
The day (calendar) is the default frequency.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
start : str or period-like, default None
|
||
|
Left bound for generating periods.
|
||
|
end : str or period-like, default None
|
||
|
Right bound for generating periods.
|
||
|
periods : int, default None
|
||
|
Number of periods to generate.
|
||
|
freq : str or DateOffset, optional
|
||
|
Frequency alias. By default the freq is taken from `start` or `end`
|
||
|
if those are Period objects. Otherwise, the default is ``"D"`` for
|
||
|
daily frequency.
|
||
|
name : str, default None
|
||
|
Name of the resulting PeriodIndex.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
PeriodIndex
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
|
||
|
must be specified.
|
||
|
|
||
|
To learn more about the frequency strings, please see `this link
|
||
|
<https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M')
|
||
|
PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',
|
||
|
'2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',
|
||
|
'2018-01'],
|
||
|
dtype='period[M]')
|
||
|
|
||
|
If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor
|
||
|
endpoints for a ``PeriodIndex`` with frequency matching that of the
|
||
|
``period_range`` constructor.
|
||
|
|
||
|
>>> pd.period_range(start=pd.Period('2017Q1', freq='Q'),
|
||
|
... end=pd.Period('2017Q2', freq='Q'), freq='M')
|
||
|
PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'],
|
||
|
dtype='period[M]')
|
||
|
"""
|
||
|
if com.count_not_none(start, end, periods) != 2:
|
||
|
raise ValueError(
|
||
|
"Of the three parameters: start, end, and periods, "
|
||
|
"exactly two must be specified"
|
||
|
)
|
||
|
if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)):
|
||
|
freq = "D"
|
||
|
|
||
|
data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={})
|
||
|
data = PeriodArray(data, freq=freq)
|
||
|
return PeriodIndex(data, name=name)
|