""" Base and utility classes for tseries type pandas objects. """ from datetime import datetime from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Type, TypeVar, Union, cast import numpy as np from pandas._libs import NaT, Timedelta, iNaT, join as libjoin, lib from pandas._libs.tslibs import BaseOffset, Resolution, Tick from pandas._typing import Callable, Label from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, cache_readonly, doc from pandas.core.dtypes.common import ( is_bool_dtype, is_categorical_dtype, is_dtype_equal, is_integer, is_list_like, is_period_dtype, is_scalar, ) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ABCSeries from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin import pandas.core.common as com import pandas.core.indexes.base as ibase from pandas.core.indexes.base import Index, _index_shared_docs from pandas.core.indexes.extension import ( NDArrayBackedExtensionIndex, inherit_names, make_wrapped_arith_op, ) from pandas.core.indexes.numeric import Int64Index from pandas.core.tools.timedeltas import to_timedelta if TYPE_CHECKING: from pandas import CategoricalIndex _index_doc_kwargs = dict(ibase._index_doc_kwargs) _T = TypeVar("_T", bound="DatetimeIndexOpsMixin") def _join_i8_wrapper(joinf, with_indexers: bool = True): """ Create the join wrapper methods. """ # error: 'staticmethod' used with a non-method @staticmethod # type: ignore[misc] def wrapper(left, right): # Note: these only get called with left.dtype == right.dtype if isinstance( left, (np.ndarray, DatetimeIndexOpsMixin, ABCSeries, DatetimeLikeArrayMixin) ): left = left.view("i8") if isinstance( right, (np.ndarray, DatetimeIndexOpsMixin, ABCSeries, DatetimeLikeArrayMixin), ): right = right.view("i8") results = joinf(left, right) if with_indexers: # dtype should be timedelta64[ns] for TimedeltaIndex # and datetime64[ns] for DatetimeIndex dtype = cast(np.dtype, left.dtype).base join_index, left_indexer, right_indexer = results join_index = join_index.view(dtype) return join_index, left_indexer, right_indexer return results return wrapper @inherit_names( ["inferred_freq", "_resolution_obj", "resolution"], DatetimeLikeArrayMixin, cache=True, ) @inherit_names(["mean", "asi8", "freq", "freqstr"], DatetimeLikeArrayMixin) class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex): """ Common ops mixin to support a unified interface datetimelike Index. """ _can_hold_strings = False _data: Union[DatetimeArray, TimedeltaArray, PeriodArray] _data_cls: Union[Type[DatetimeArray], Type[TimedeltaArray], Type[PeriodArray]] freq: Optional[BaseOffset] freqstr: Optional[str] _resolution_obj: Resolution _bool_ops: List[str] = [] _field_ops: List[str] = [] # error: "Callable[[Any], Any]" has no attribute "fget" hasnans = cache_readonly( DatetimeLikeArrayMixin._hasnans.fget # type: ignore[attr-defined] ) _hasnans = hasnans # for index / array -agnostic code @classmethod def _simple_new( cls, values: Union[DatetimeArray, TimedeltaArray, PeriodArray], name: Label = None, ): assert isinstance(values, cls._data_cls), type(values) result = object.__new__(cls) result._data = values result._name = name result._cache = {} # For groupby perf. See note in indexes/base about _index_data result._index_data = values._data result._reset_identity() return result @property def _is_all_dates(self) -> bool: return True # ------------------------------------------------------------------------ # Abstract data attributes @property def values(self) -> np.ndarray: # Note: PeriodArray overrides this to return an ndarray of objects. return self._data._data def __array_wrap__(self, result, context=None): """ Gets called after a ufunc and other functions. """ result = lib.item_from_zerodim(result) if is_bool_dtype(result) or lib.is_scalar(result): return result attrs = self._get_attributes_dict() if not is_period_dtype(self.dtype) and attrs["freq"]: # no need to infer if freq is None attrs["freq"] = "infer" return Index(result, **attrs) # ------------------------------------------------------------------------ def equals(self, other: object) -> bool: """ Determines if two Index objects contain the same elements. """ if self.is_(other): return True if not isinstance(other, Index): return False elif other.dtype.kind in ["f", "i", "u", "c"]: return False elif not isinstance(other, type(self)): should_try = False inferrable = self._data._infer_matches if other.dtype == object: should_try = other.inferred_type in inferrable elif is_categorical_dtype(other.dtype): other = cast("CategoricalIndex", other) should_try = other.categories.inferred_type in inferrable if should_try: try: other = type(self)(other) except (ValueError, TypeError, OverflowError): # e.g. # ValueError -> cannot parse str entry, or OutOfBoundsDatetime # TypeError -> trying to convert IntervalIndex to DatetimeIndex # OverflowError -> Index([very_large_timedeltas]) return False if not is_dtype_equal(self.dtype, other.dtype): # have different timezone return False return np.array_equal(self.asi8, other.asi8) @Appender(Index.__contains__.__doc__) def __contains__(self, key: Any) -> bool: hash(key) try: res = self.get_loc(key) except (KeyError, TypeError, ValueError): return False return bool( is_scalar(res) or isinstance(res, slice) or (is_list_like(res) and len(res)) ) @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take((), kwargs) indices = np.asarray(indices, dtype=np.intp) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) result = NDArrayBackedExtensionIndex.take( self, indices, axis, allow_fill, fill_value, **kwargs ) if isinstance(maybe_slice, slice): freq = self._data._get_getitem_freq(maybe_slice) result._data._freq = freq return result _can_hold_na = True _na_value = NaT """The expected NA value to use with this index.""" def _convert_tolerance(self, tolerance, target): tolerance = np.asarray(to_timedelta(tolerance).to_numpy()) if target.size != tolerance.size and tolerance.size > 1: raise ValueError("list-like tolerance size must match target index size") return tolerance def tolist(self) -> List: """ Return a list of the underlying data. """ return list(self.astype(object)) def min(self, axis=None, skipna=True, *args, **kwargs): """ Return the minimum value of the Index or minimum along an axis. See Also -------- numpy.ndarray.min Series.min : Return the minimum value in a Series. """ nv.validate_min(args, kwargs) nv.validate_minmax_axis(axis) if not len(self): return self._na_value i8 = self.asi8 if len(i8) and self.is_monotonic_increasing: # quick check if i8[0] != iNaT: return self._data._box_func(i8[0]) if self.hasnans: if not skipna: return self._na_value i8 = i8[~self._isnan] if not len(i8): return self._na_value min_stamp = i8.min() return self._data._box_func(min_stamp) def argmin(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the minimum values along an axis. See `numpy.ndarray.argmin` for more information on the `axis` parameter. See Also -------- numpy.ndarray.argmin """ nv.validate_argmin(args, kwargs) nv.validate_minmax_axis(axis) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all() or not skipna: return -1 i8 = i8.copy() i8[mask] = np.iinfo("int64").max return i8.argmin() def max(self, axis=None, skipna=True, *args, **kwargs): """ Return the maximum value of the Index or maximum along an axis. See Also -------- numpy.ndarray.max Series.max : Return the maximum value in a Series. """ nv.validate_max(args, kwargs) nv.validate_minmax_axis(axis) if not len(self): return self._na_value i8 = self.asi8 if len(i8) and self.is_monotonic: # quick check if i8[-1] != iNaT: return self._data._box_func(i8[-1]) if self.hasnans: if not skipna: return self._na_value i8 = i8[~self._isnan] if not len(i8): return self._na_value max_stamp = i8.max() return self._data._box_func(max_stamp) def argmax(self, axis=None, skipna=True, *args, **kwargs): """ Returns the indices of the maximum values along an axis. See `numpy.ndarray.argmax` for more information on the `axis` parameter. See Also -------- numpy.ndarray.argmax """ nv.validate_argmax(args, kwargs) nv.validate_minmax_axis(axis) i8 = self.asi8 if self.hasnans: mask = self._isnan if mask.all() or not skipna: return -1 i8 = i8.copy() i8[mask] = 0 return i8.argmax() # -------------------------------------------------------------------- # Rendering Methods def format( self, name: bool = False, formatter: Optional[Callable] = None, na_rep: str = "NaT", date_format: Optional[str] = None, ) -> List[str]: """ Render a string representation of the Index. """ header = [] if name: header.append( ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n")) if self.name is not None else "" ) if formatter is not None: return header + list(self.map(formatter)) return self._format_with_header(header, na_rep=na_rep, date_format=date_format) def _format_with_header( self, header: List[str], na_rep: str = "NaT", date_format: Optional[str] = None ) -> List[str]: return header + list( self._format_native_types(na_rep=na_rep, date_format=date_format) ) @property def _formatter_func(self): return self._data._formatter() def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value). """ attrs = super()._format_attrs() for attrib in self._attributes: if attrib == "freq": freq = self.freqstr if freq is not None: freq = repr(freq) attrs.append(("freq", freq)) return attrs def _summary(self, name=None) -> str: """ Return a summarized representation. Parameters ---------- name : str Name to use in the summary representation. Returns ------- str Summarized representation of the index. """ formatter = self._formatter_func if len(self) > 0: index_summary = f", {formatter(self[0])} to {formatter(self[-1])}" else: index_summary = "" if name is None: name = type(self).__name__ result = f"{name}: {len(self)} entries{index_summary}" if self.freq: result += f"\nFreq: {self.freqstr}" # display as values, not quoted result = result.replace("'", "") return result # -------------------------------------------------------------------- # Indexing Methods def _validate_partial_date_slice(self, reso: Resolution): raise NotImplementedError def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime): raise NotImplementedError def _partial_date_slice( self, reso: Resolution, parsed: datetime, ): """ Parameters ---------- reso : Resolution parsed : datetime Returns ------- slice or ndarray[intp] """ self._validate_partial_date_slice(reso) t1, t2 = self._parsed_string_to_bounds(reso, parsed) vals = self._data._ndarray unbox = self._data._unbox if self.is_monotonic_increasing: if len(self) and ( (t1 < self[0] and t2 < self[0]) or (t1 > self[-1] and t2 > self[-1]) ): # we are out of range raise KeyError # TODO: does this depend on being monotonic _increasing_? # a monotonic (sorted) series can be sliced left = vals.searchsorted(unbox(t1), side="left") right = vals.searchsorted(unbox(t2), side="right") return slice(left, right) else: lhs_mask = vals >= unbox(t1) rhs_mask = vals <= unbox(t2) # try to find the dates return (lhs_mask & rhs_mask).nonzero()[0] # -------------------------------------------------------------------- # Arithmetic Methods __add__ = make_wrapped_arith_op("__add__") __sub__ = make_wrapped_arith_op("__sub__") __radd__ = make_wrapped_arith_op("__radd__") __rsub__ = make_wrapped_arith_op("__rsub__") __pow__ = make_wrapped_arith_op("__pow__") __rpow__ = make_wrapped_arith_op("__rpow__") __mul__ = make_wrapped_arith_op("__mul__") __rmul__ = make_wrapped_arith_op("__rmul__") __floordiv__ = make_wrapped_arith_op("__floordiv__") __rfloordiv__ = make_wrapped_arith_op("__rfloordiv__") __mod__ = make_wrapped_arith_op("__mod__") __rmod__ = make_wrapped_arith_op("__rmod__") __divmod__ = make_wrapped_arith_op("__divmod__") __rdivmod__ = make_wrapped_arith_op("__rdivmod__") __truediv__ = make_wrapped_arith_op("__truediv__") __rtruediv__ = make_wrapped_arith_op("__rtruediv__") def shift(self, periods=1, freq=None): """ Shift index by desired number of time frequency increments. This method is for shifting the values of datetime-like indexes by a specified time increment a given number of times. Parameters ---------- periods : int, default 1 Number of periods (or increments) to shift by, can be positive or negative. .. versionchanged:: 0.24.0 freq : pandas.DateOffset, pandas.Timedelta or string, optional Frequency increment to shift by. If None, the index is shifted by its own `freq` attribute. Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. Returns ------- pandas.DatetimeIndex Shifted index. See Also -------- Index.shift : Shift values of Index. PeriodIndex.shift : Shift values of PeriodIndex. """ arr = self._data.view() arr._freq = self.freq result = arr._time_shift(periods, freq=freq) return type(self)(result, name=self.name) # -------------------------------------------------------------------- # List-like Methods def _get_delete_freq(self, loc: int): """ Find the `freq` for self.delete(loc). """ freq = None if is_period_dtype(self.dtype): freq = self.freq elif self.freq is not None: if is_integer(loc): if loc in (0, -len(self), -1, len(self) - 1): freq = self.freq else: if is_list_like(loc): loc = lib.maybe_indices_to_slice( np.asarray(loc, dtype=np.intp), len(self) ) if isinstance(loc, slice) and loc.step in (1, None): if loc.start in (0, None) or loc.stop in (len(self), None): freq = self.freq return freq def _get_insert_freq(self, loc, item): """ Find the `freq` for self.insert(loc, item). """ value = self._data._validate_scalar(item) item = self._data._box_func(value) freq = None if is_period_dtype(self.dtype): freq = self.freq elif self.freq is not None: # freq can be preserved on edge cases if self.size: if item is NaT: pass elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]: freq = self.freq elif (loc == len(self)) and item - self.freq == self[-1]: freq = self.freq else: # Adding a single item to an empty index may preserve freq if self.freq.is_on_offset(item): freq = self.freq return freq @doc(NDArrayBackedExtensionIndex.delete) def delete(self, loc): result = super().delete(loc) result._data._freq = self._get_delete_freq(loc) return result @doc(NDArrayBackedExtensionIndex.insert) def insert(self, loc: int, item): result = super().insert(loc, item) result._data._freq = self._get_insert_freq(loc, item) return result # -------------------------------------------------------------------- # Join/Set Methods def _can_union_without_object_cast(self, other) -> bool: return is_dtype_equal(self.dtype, other.dtype) def _get_join_freq(self, other): """ Get the freq to attach to the result of a join operation. """ if is_period_dtype(self.dtype): freq = self.freq else: self = cast(DatetimeTimedeltaMixin, self) freq = self.freq if self._can_fast_union(other) else None return freq def _wrap_joined_index(self, joined: np.ndarray, other): assert other.dtype == self.dtype, (other.dtype, self.dtype) result = super()._wrap_joined_index(joined, other) result._data._freq = self._get_join_freq(other) return result @doc(Index._convert_arr_indexer) def _convert_arr_indexer(self, keyarr): try: return self._data._validate_listlike(keyarr, allow_object=True) except (ValueError, TypeError): return com.asarray_tuplesafe(keyarr) class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index): """ Mixin class for methods shared by DatetimeIndex and TimedeltaIndex, but not PeriodIndex """ # Compat for frequency inference, see GH#23789 _is_monotonic_increasing = Index.is_monotonic_increasing _is_monotonic_decreasing = Index.is_monotonic_decreasing _is_unique = Index.is_unique def _with_freq(self, freq): arr = self._data._with_freq(freq) return type(self)._simple_new(arr, name=self.name) @property def _has_complex_internals(self) -> bool: # used to avoid libreduction code paths, which raise or require conversion return False def is_type_compatible(self, kind: str) -> bool: return kind in self._data._infer_matches # -------------------------------------------------------------------- # Set Operation Methods @Appender(Index.difference.__doc__) def difference(self, other, sort=None): new_idx = super().difference(other, sort=sort)._with_freq(None) return new_idx def intersection(self, other, sort=False): """ Specialized intersection for DatetimeIndex/TimedeltaIndex. May be much faster than Index.intersection Parameters ---------- other : Same type as self or array-like sort : False or None, default False Sort the resulting index if possible. .. versionadded:: 0.24.0 .. versionchanged:: 0.24.1 Changed the default to ``False`` to match the behaviour from before 0.24.0. .. versionchanged:: 0.25.0 The `sort` keyword is added Returns ------- y : Index or same type as self """ self._validate_sort_keyword(sort) self._assert_can_do_setop(other) other, _ = self._convert_can_do_setop(other) if self.equals(other): if self.has_duplicates: return self.unique()._get_reconciled_name_object(other) return self._get_reconciled_name_object(other) return self._intersection(other, sort=sort) def _intersection(self, other: Index, sort=False) -> Index: """ intersection specialized to the case with matching dtypes. """ if len(self) == 0: return self.copy()._get_reconciled_name_object(other) if len(other) == 0: return other.copy()._get_reconciled_name_object(self) if not isinstance(other, type(self)): result = Index.intersection(self, other, sort=sort) return result elif not self._can_fast_intersect(other): result = Index._intersection(self, other, sort=sort) # We need to invalidate the freq because Index._intersection # uses _shallow_copy on a view of self._data, which will preserve # self.freq if we're not careful. result = self._wrap_setop_result(other, result) return result._with_freq(None)._with_freq("infer") # to make our life easier, "sort" the two ranges if self[0] <= other[0]: left, right = self, other else: left, right = other, self # after sorting, the intersection always starts with the right index # and ends with the index of which the last elements is smallest end = min(left[-1], right[-1]) start = right[0] if end < start: result = self[:0] else: lslice = slice(*left.slice_locs(start, end)) left_chunk = left._values[lslice] # error: Argument 1 to "_simple_new" of "DatetimeIndexOpsMixin" has # incompatible type "Union[ExtensionArray, Any]"; expected # "Union[DatetimeArray, TimedeltaArray, PeriodArray]" [arg-type] result = type(self)._simple_new(left_chunk) # type: ignore[arg-type] return self._wrap_setop_result(other, result) def _can_fast_intersect(self: _T, other: _T) -> bool: if self.freq is None: return False elif other.freq != self.freq: return False elif not self.is_monotonic_increasing: # Because freq is not None, we must then be monotonic decreasing return False elif self.freq.is_anchored(): # this along with matching freqs ensure that we "line up", # so intersection will preserve freq return True elif isinstance(self.freq, Tick): # We "line up" if and only if the difference between two of our points # is a multiple of our freq diff = self[0] - other[0] remainder = diff % self.freq.delta return remainder == Timedelta(0) return True def _can_fast_union(self: _T, other: _T) -> bool: # Assumes that type(self) == type(other), as per the annotation # The ability to fast_union also implies that `freq` should be # retained on union. if not isinstance(other, type(self)): return False freq = self.freq if freq is None or freq != other.freq: return False if not self.is_monotonic_increasing: # Because freq is not None, we must then be monotonic decreasing # TODO: do union on the reversed indexes? return False if len(self) == 0 or len(other) == 0: return True # to make our life easier, "sort" the two ranges if self[0] <= other[0]: left, right = self, other else: left, right = other, self right_start = right[0] left_end = left[-1] # Only need to "adjoin", not overlap return (right_start == left_end + freq) or right_start in left def _fast_union(self, other, sort=None): if len(other) == 0: return self.view(type(self)) if len(self) == 0: return other.view(type(self)) # to make our life easier, "sort" the two ranges if self[0] <= other[0]: left, right = self, other elif sort is False: # TDIs are not in the "correct" order and we don't want # to sort but want to remove overlaps left, right = self, other left_start = left[0] loc = right.searchsorted(left_start, side="left") right_chunk = right._values[:loc] dates = concat_compat((left._values, right_chunk)) # With sort being False, we can't infer that result.freq == self.freq # TODO: no tests rely on the _with_freq("infer"); needed? result = self._shallow_copy(dates)._with_freq("infer") return result else: left, right = other, self left_end = left[-1] right_end = right[-1] # concatenate if left_end < right_end: loc = right.searchsorted(left_end, side="right") right_chunk = right._values[loc:] dates = concat_compat([left._values, right_chunk]) # The can_fast_union check ensures that the result.freq # should match self.freq dates = type(self._data)(dates, freq=self.freq) result = type(self)._simple_new(dates) return result else: return left def _union(self, other, sort): if not len(other) or self.equals(other) or not len(self): return super()._union(other, sort=sort) # We are called by `union`, which is responsible for this validation assert isinstance(other, type(self)) this, other = self._maybe_utc_convert(other) if this._can_fast_union(other): result = this._fast_union(other, sort=sort) if sort is None: # In the case where sort is None, _can_fast_union # implies that result.freq should match self.freq assert result.freq == self.freq, (result.freq, self.freq) elif result.freq is None: # TODO: no tests rely on this; needed? result = result._with_freq("infer") return result else: i8self = Int64Index._simple_new(self.asi8) i8other = Int64Index._simple_new(other.asi8) i8result = i8self._union(i8other, sort=sort) # pandas\core\indexes\datetimelike.py:887: error: Unexpected # keyword argument "freq" for "DatetimeTimedeltaMixin" [call-arg] result = type(self)( i8result, dtype=self.dtype, freq="infer" # type: ignore[call-arg] ) return result # -------------------------------------------------------------------- # Join Methods _join_precedence = 10 _inner_indexer = _join_i8_wrapper(libjoin.inner_join_indexer) _outer_indexer = _join_i8_wrapper(libjoin.outer_join_indexer) _left_indexer = _join_i8_wrapper(libjoin.left_join_indexer) _left_indexer_unique = _join_i8_wrapper( libjoin.left_join_indexer_unique, with_indexers=False ) def join( self, other, how: str = "left", level=None, return_indexers=False, sort=False ): """ See Index.join """ pself, pother = self._maybe_promote(other) if pself is not self or pother is not other: return pself.join( pother, how=how, level=level, return_indexers=return_indexers, sort=sort ) this, other = self._maybe_utc_convert(other) return Index.join( this, other, how=how, level=level, return_indexers=return_indexers, sort=sort, ) def _maybe_utc_convert(self: _T, other: Index) -> Tuple[_T, Index]: # Overridden by DatetimeIndex return self, other # -------------------------------------------------------------------- # List-Like Methods @Appender(DatetimeIndexOpsMixin.insert.__doc__) def insert(self, loc, item): if isinstance(item, str): # TODO: Why are strings special? # TODO: Should we attempt _scalar_from_string? return self.astype(object).insert(loc, item) return DatetimeIndexOpsMixin.insert(self, loc, item)