918 lines
30 KiB
Python
918 lines
30 KiB
Python
"""
|
|
Base and utility classes for tseries type pandas objects.
|
|
"""
|
|
from datetime import datetime
|
|
from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Type, TypeVar, Union, cast
|
|
|
|
import numpy as np
|
|
|
|
from pandas._libs import NaT, Timedelta, iNaT, join as libjoin, lib
|
|
from pandas._libs.tslibs import BaseOffset, Resolution, Tick
|
|
from pandas._typing import Callable, Label
|
|
from pandas.compat.numpy import function as nv
|
|
from pandas.util._decorators import Appender, cache_readonly, doc
|
|
|
|
from pandas.core.dtypes.common import (
|
|
is_bool_dtype,
|
|
is_categorical_dtype,
|
|
is_dtype_equal,
|
|
is_integer,
|
|
is_list_like,
|
|
is_period_dtype,
|
|
is_scalar,
|
|
)
|
|
from pandas.core.dtypes.concat import concat_compat
|
|
from pandas.core.dtypes.generic import ABCSeries
|
|
|
|
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
|
|
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
|
|
import pandas.core.common as com
|
|
import pandas.core.indexes.base as ibase
|
|
from pandas.core.indexes.base import Index, _index_shared_docs
|
|
from pandas.core.indexes.extension import (
|
|
NDArrayBackedExtensionIndex,
|
|
inherit_names,
|
|
make_wrapped_arith_op,
|
|
)
|
|
from pandas.core.indexes.numeric import Int64Index
|
|
from pandas.core.tools.timedeltas import to_timedelta
|
|
|
|
if TYPE_CHECKING:
|
|
from pandas import CategoricalIndex
|
|
|
|
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
|
|
|
|
_T = TypeVar("_T", bound="DatetimeIndexOpsMixin")
|
|
|
|
|
|
def _join_i8_wrapper(joinf, with_indexers: bool = True):
|
|
"""
|
|
Create the join wrapper methods.
|
|
"""
|
|
|
|
# error: 'staticmethod' used with a non-method
|
|
@staticmethod # type: ignore[misc]
|
|
def wrapper(left, right):
|
|
# Note: these only get called with left.dtype == right.dtype
|
|
if isinstance(
|
|
left, (np.ndarray, DatetimeIndexOpsMixin, ABCSeries, DatetimeLikeArrayMixin)
|
|
):
|
|
left = left.view("i8")
|
|
if isinstance(
|
|
right,
|
|
(np.ndarray, DatetimeIndexOpsMixin, ABCSeries, DatetimeLikeArrayMixin),
|
|
):
|
|
right = right.view("i8")
|
|
|
|
results = joinf(left, right)
|
|
if with_indexers:
|
|
# dtype should be timedelta64[ns] for TimedeltaIndex
|
|
# and datetime64[ns] for DatetimeIndex
|
|
dtype = cast(np.dtype, left.dtype).base
|
|
|
|
join_index, left_indexer, right_indexer = results
|
|
join_index = join_index.view(dtype)
|
|
return join_index, left_indexer, right_indexer
|
|
return results
|
|
|
|
return wrapper
|
|
|
|
|
|
@inherit_names(
|
|
["inferred_freq", "_resolution_obj", "resolution"],
|
|
DatetimeLikeArrayMixin,
|
|
cache=True,
|
|
)
|
|
@inherit_names(["mean", "asi8", "freq", "freqstr"], DatetimeLikeArrayMixin)
|
|
class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex):
|
|
"""
|
|
Common ops mixin to support a unified interface datetimelike Index.
|
|
"""
|
|
|
|
_can_hold_strings = False
|
|
_data: Union[DatetimeArray, TimedeltaArray, PeriodArray]
|
|
_data_cls: Union[Type[DatetimeArray], Type[TimedeltaArray], Type[PeriodArray]]
|
|
freq: Optional[BaseOffset]
|
|
freqstr: Optional[str]
|
|
_resolution_obj: Resolution
|
|
_bool_ops: List[str] = []
|
|
_field_ops: List[str] = []
|
|
|
|
# error: "Callable[[Any], Any]" has no attribute "fget"
|
|
hasnans = cache_readonly(
|
|
DatetimeLikeArrayMixin._hasnans.fget # type: ignore[attr-defined]
|
|
)
|
|
_hasnans = hasnans # for index / array -agnostic code
|
|
|
|
@classmethod
|
|
def _simple_new(
|
|
cls,
|
|
values: Union[DatetimeArray, TimedeltaArray, PeriodArray],
|
|
name: Label = None,
|
|
):
|
|
assert isinstance(values, cls._data_cls), type(values)
|
|
|
|
result = object.__new__(cls)
|
|
result._data = values
|
|
result._name = name
|
|
result._cache = {}
|
|
|
|
# For groupby perf. See note in indexes/base about _index_data
|
|
result._index_data = values._data
|
|
|
|
result._reset_identity()
|
|
return result
|
|
|
|
@property
|
|
def _is_all_dates(self) -> bool:
|
|
return True
|
|
|
|
# ------------------------------------------------------------------------
|
|
# Abstract data attributes
|
|
|
|
@property
|
|
def values(self) -> np.ndarray:
|
|
# Note: PeriodArray overrides this to return an ndarray of objects.
|
|
return self._data._data
|
|
|
|
def __array_wrap__(self, result, context=None):
|
|
"""
|
|
Gets called after a ufunc and other functions.
|
|
"""
|
|
result = lib.item_from_zerodim(result)
|
|
if is_bool_dtype(result) or lib.is_scalar(result):
|
|
return result
|
|
|
|
attrs = self._get_attributes_dict()
|
|
if not is_period_dtype(self.dtype) and attrs["freq"]:
|
|
# no need to infer if freq is None
|
|
attrs["freq"] = "infer"
|
|
return Index(result, **attrs)
|
|
|
|
# ------------------------------------------------------------------------
|
|
|
|
def equals(self, other: object) -> bool:
|
|
"""
|
|
Determines if two Index objects contain the same elements.
|
|
"""
|
|
if self.is_(other):
|
|
return True
|
|
|
|
if not isinstance(other, Index):
|
|
return False
|
|
elif other.dtype.kind in ["f", "i", "u", "c"]:
|
|
return False
|
|
elif not isinstance(other, type(self)):
|
|
should_try = False
|
|
inferrable = self._data._infer_matches
|
|
if other.dtype == object:
|
|
should_try = other.inferred_type in inferrable
|
|
elif is_categorical_dtype(other.dtype):
|
|
other = cast("CategoricalIndex", other)
|
|
should_try = other.categories.inferred_type in inferrable
|
|
|
|
if should_try:
|
|
try:
|
|
other = type(self)(other)
|
|
except (ValueError, TypeError, OverflowError):
|
|
# e.g.
|
|
# ValueError -> cannot parse str entry, or OutOfBoundsDatetime
|
|
# TypeError -> trying to convert IntervalIndex to DatetimeIndex
|
|
# OverflowError -> Index([very_large_timedeltas])
|
|
return False
|
|
|
|
if not is_dtype_equal(self.dtype, other.dtype):
|
|
# have different timezone
|
|
return False
|
|
|
|
return np.array_equal(self.asi8, other.asi8)
|
|
|
|
@Appender(Index.__contains__.__doc__)
|
|
def __contains__(self, key: Any) -> bool:
|
|
hash(key)
|
|
try:
|
|
res = self.get_loc(key)
|
|
except (KeyError, TypeError, ValueError):
|
|
return False
|
|
return bool(
|
|
is_scalar(res) or isinstance(res, slice) or (is_list_like(res) and len(res))
|
|
)
|
|
|
|
@Appender(_index_shared_docs["take"] % _index_doc_kwargs)
|
|
def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
|
|
nv.validate_take((), kwargs)
|
|
indices = np.asarray(indices, dtype=np.intp)
|
|
|
|
maybe_slice = lib.maybe_indices_to_slice(indices, len(self))
|
|
|
|
result = NDArrayBackedExtensionIndex.take(
|
|
self, indices, axis, allow_fill, fill_value, **kwargs
|
|
)
|
|
if isinstance(maybe_slice, slice):
|
|
freq = self._data._get_getitem_freq(maybe_slice)
|
|
result._data._freq = freq
|
|
return result
|
|
|
|
_can_hold_na = True
|
|
|
|
_na_value = NaT
|
|
"""The expected NA value to use with this index."""
|
|
|
|
def _convert_tolerance(self, tolerance, target):
|
|
tolerance = np.asarray(to_timedelta(tolerance).to_numpy())
|
|
|
|
if target.size != tolerance.size and tolerance.size > 1:
|
|
raise ValueError("list-like tolerance size must match target index size")
|
|
return tolerance
|
|
|
|
def tolist(self) -> List:
|
|
"""
|
|
Return a list of the underlying data.
|
|
"""
|
|
return list(self.astype(object))
|
|
|
|
def min(self, axis=None, skipna=True, *args, **kwargs):
|
|
"""
|
|
Return the minimum value of the Index or minimum along
|
|
an axis.
|
|
|
|
See Also
|
|
--------
|
|
numpy.ndarray.min
|
|
Series.min : Return the minimum value in a Series.
|
|
"""
|
|
nv.validate_min(args, kwargs)
|
|
nv.validate_minmax_axis(axis)
|
|
|
|
if not len(self):
|
|
return self._na_value
|
|
|
|
i8 = self.asi8
|
|
|
|
if len(i8) and self.is_monotonic_increasing:
|
|
# quick check
|
|
if i8[0] != iNaT:
|
|
return self._data._box_func(i8[0])
|
|
|
|
if self.hasnans:
|
|
if not skipna:
|
|
return self._na_value
|
|
i8 = i8[~self._isnan]
|
|
|
|
if not len(i8):
|
|
return self._na_value
|
|
|
|
min_stamp = i8.min()
|
|
return self._data._box_func(min_stamp)
|
|
|
|
def argmin(self, axis=None, skipna=True, *args, **kwargs):
|
|
"""
|
|
Returns the indices of the minimum values along an axis.
|
|
|
|
See `numpy.ndarray.argmin` for more information on the
|
|
`axis` parameter.
|
|
|
|
See Also
|
|
--------
|
|
numpy.ndarray.argmin
|
|
"""
|
|
nv.validate_argmin(args, kwargs)
|
|
nv.validate_minmax_axis(axis)
|
|
|
|
i8 = self.asi8
|
|
if self.hasnans:
|
|
mask = self._isnan
|
|
if mask.all() or not skipna:
|
|
return -1
|
|
i8 = i8.copy()
|
|
i8[mask] = np.iinfo("int64").max
|
|
return i8.argmin()
|
|
|
|
def max(self, axis=None, skipna=True, *args, **kwargs):
|
|
"""
|
|
Return the maximum value of the Index or maximum along
|
|
an axis.
|
|
|
|
See Also
|
|
--------
|
|
numpy.ndarray.max
|
|
Series.max : Return the maximum value in a Series.
|
|
"""
|
|
nv.validate_max(args, kwargs)
|
|
nv.validate_minmax_axis(axis)
|
|
|
|
if not len(self):
|
|
return self._na_value
|
|
|
|
i8 = self.asi8
|
|
|
|
if len(i8) and self.is_monotonic:
|
|
# quick check
|
|
if i8[-1] != iNaT:
|
|
return self._data._box_func(i8[-1])
|
|
|
|
if self.hasnans:
|
|
if not skipna:
|
|
return self._na_value
|
|
i8 = i8[~self._isnan]
|
|
|
|
if not len(i8):
|
|
return self._na_value
|
|
|
|
max_stamp = i8.max()
|
|
return self._data._box_func(max_stamp)
|
|
|
|
def argmax(self, axis=None, skipna=True, *args, **kwargs):
|
|
"""
|
|
Returns the indices of the maximum values along an axis.
|
|
|
|
See `numpy.ndarray.argmax` for more information on the
|
|
`axis` parameter.
|
|
|
|
See Also
|
|
--------
|
|
numpy.ndarray.argmax
|
|
"""
|
|
nv.validate_argmax(args, kwargs)
|
|
nv.validate_minmax_axis(axis)
|
|
|
|
i8 = self.asi8
|
|
if self.hasnans:
|
|
mask = self._isnan
|
|
if mask.all() or not skipna:
|
|
return -1
|
|
i8 = i8.copy()
|
|
i8[mask] = 0
|
|
return i8.argmax()
|
|
|
|
# --------------------------------------------------------------------
|
|
# Rendering Methods
|
|
|
|
def format(
|
|
self,
|
|
name: bool = False,
|
|
formatter: Optional[Callable] = None,
|
|
na_rep: str = "NaT",
|
|
date_format: Optional[str] = None,
|
|
) -> List[str]:
|
|
"""
|
|
Render a string representation of the Index.
|
|
"""
|
|
header = []
|
|
if name:
|
|
header.append(
|
|
ibase.pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
|
|
if self.name is not None
|
|
else ""
|
|
)
|
|
|
|
if formatter is not None:
|
|
return header + list(self.map(formatter))
|
|
|
|
return self._format_with_header(header, na_rep=na_rep, date_format=date_format)
|
|
|
|
def _format_with_header(
|
|
self, header: List[str], na_rep: str = "NaT", date_format: Optional[str] = None
|
|
) -> List[str]:
|
|
return header + list(
|
|
self._format_native_types(na_rep=na_rep, date_format=date_format)
|
|
)
|
|
|
|
@property
|
|
def _formatter_func(self):
|
|
return self._data._formatter()
|
|
|
|
def _format_attrs(self):
|
|
"""
|
|
Return a list of tuples of the (attr,formatted_value).
|
|
"""
|
|
attrs = super()._format_attrs()
|
|
for attrib in self._attributes:
|
|
if attrib == "freq":
|
|
freq = self.freqstr
|
|
if freq is not None:
|
|
freq = repr(freq)
|
|
attrs.append(("freq", freq))
|
|
return attrs
|
|
|
|
def _summary(self, name=None) -> str:
|
|
"""
|
|
Return a summarized representation.
|
|
|
|
Parameters
|
|
----------
|
|
name : str
|
|
Name to use in the summary representation.
|
|
|
|
Returns
|
|
-------
|
|
str
|
|
Summarized representation of the index.
|
|
"""
|
|
formatter = self._formatter_func
|
|
if len(self) > 0:
|
|
index_summary = f", {formatter(self[0])} to {formatter(self[-1])}"
|
|
else:
|
|
index_summary = ""
|
|
|
|
if name is None:
|
|
name = type(self).__name__
|
|
result = f"{name}: {len(self)} entries{index_summary}"
|
|
if self.freq:
|
|
result += f"\nFreq: {self.freqstr}"
|
|
|
|
# display as values, not quoted
|
|
result = result.replace("'", "")
|
|
return result
|
|
|
|
# --------------------------------------------------------------------
|
|
# Indexing Methods
|
|
|
|
def _validate_partial_date_slice(self, reso: Resolution):
|
|
raise NotImplementedError
|
|
|
|
def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
|
|
raise NotImplementedError
|
|
|
|
def _partial_date_slice(
|
|
self,
|
|
reso: Resolution,
|
|
parsed: datetime,
|
|
):
|
|
"""
|
|
Parameters
|
|
----------
|
|
reso : Resolution
|
|
parsed : datetime
|
|
|
|
Returns
|
|
-------
|
|
slice or ndarray[intp]
|
|
"""
|
|
self._validate_partial_date_slice(reso)
|
|
|
|
t1, t2 = self._parsed_string_to_bounds(reso, parsed)
|
|
vals = self._data._ndarray
|
|
unbox = self._data._unbox
|
|
|
|
if self.is_monotonic_increasing:
|
|
|
|
if len(self) and (
|
|
(t1 < self[0] and t2 < self[0]) or (t1 > self[-1] and t2 > self[-1])
|
|
):
|
|
# we are out of range
|
|
raise KeyError
|
|
|
|
# TODO: does this depend on being monotonic _increasing_?
|
|
|
|
# a monotonic (sorted) series can be sliced
|
|
left = vals.searchsorted(unbox(t1), side="left")
|
|
right = vals.searchsorted(unbox(t2), side="right")
|
|
return slice(left, right)
|
|
|
|
else:
|
|
lhs_mask = vals >= unbox(t1)
|
|
rhs_mask = vals <= unbox(t2)
|
|
|
|
# try to find the dates
|
|
return (lhs_mask & rhs_mask).nonzero()[0]
|
|
|
|
# --------------------------------------------------------------------
|
|
# Arithmetic Methods
|
|
|
|
__add__ = make_wrapped_arith_op("__add__")
|
|
__sub__ = make_wrapped_arith_op("__sub__")
|
|
__radd__ = make_wrapped_arith_op("__radd__")
|
|
__rsub__ = make_wrapped_arith_op("__rsub__")
|
|
__pow__ = make_wrapped_arith_op("__pow__")
|
|
__rpow__ = make_wrapped_arith_op("__rpow__")
|
|
__mul__ = make_wrapped_arith_op("__mul__")
|
|
__rmul__ = make_wrapped_arith_op("__rmul__")
|
|
__floordiv__ = make_wrapped_arith_op("__floordiv__")
|
|
__rfloordiv__ = make_wrapped_arith_op("__rfloordiv__")
|
|
__mod__ = make_wrapped_arith_op("__mod__")
|
|
__rmod__ = make_wrapped_arith_op("__rmod__")
|
|
__divmod__ = make_wrapped_arith_op("__divmod__")
|
|
__rdivmod__ = make_wrapped_arith_op("__rdivmod__")
|
|
__truediv__ = make_wrapped_arith_op("__truediv__")
|
|
__rtruediv__ = make_wrapped_arith_op("__rtruediv__")
|
|
|
|
def shift(self, periods=1, freq=None):
|
|
"""
|
|
Shift index by desired number of time frequency increments.
|
|
|
|
This method is for shifting the values of datetime-like indexes
|
|
by a specified time increment a given number of times.
|
|
|
|
Parameters
|
|
----------
|
|
periods : int, default 1
|
|
Number of periods (or increments) to shift by,
|
|
can be positive or negative.
|
|
|
|
.. versionchanged:: 0.24.0
|
|
|
|
freq : pandas.DateOffset, pandas.Timedelta or string, optional
|
|
Frequency increment to shift by.
|
|
If None, the index is shifted by its own `freq` attribute.
|
|
Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.
|
|
|
|
Returns
|
|
-------
|
|
pandas.DatetimeIndex
|
|
Shifted index.
|
|
|
|
See Also
|
|
--------
|
|
Index.shift : Shift values of Index.
|
|
PeriodIndex.shift : Shift values of PeriodIndex.
|
|
"""
|
|
arr = self._data.view()
|
|
arr._freq = self.freq
|
|
result = arr._time_shift(periods, freq=freq)
|
|
return type(self)(result, name=self.name)
|
|
|
|
# --------------------------------------------------------------------
|
|
# List-like Methods
|
|
|
|
def _get_delete_freq(self, loc: int):
|
|
"""
|
|
Find the `freq` for self.delete(loc).
|
|
"""
|
|
freq = None
|
|
if is_period_dtype(self.dtype):
|
|
freq = self.freq
|
|
elif self.freq is not None:
|
|
if is_integer(loc):
|
|
if loc in (0, -len(self), -1, len(self) - 1):
|
|
freq = self.freq
|
|
else:
|
|
if is_list_like(loc):
|
|
loc = lib.maybe_indices_to_slice(
|
|
np.asarray(loc, dtype=np.intp), len(self)
|
|
)
|
|
if isinstance(loc, slice) and loc.step in (1, None):
|
|
if loc.start in (0, None) or loc.stop in (len(self), None):
|
|
freq = self.freq
|
|
return freq
|
|
|
|
def _get_insert_freq(self, loc, item):
|
|
"""
|
|
Find the `freq` for self.insert(loc, item).
|
|
"""
|
|
value = self._data._validate_scalar(item)
|
|
item = self._data._box_func(value)
|
|
|
|
freq = None
|
|
if is_period_dtype(self.dtype):
|
|
freq = self.freq
|
|
elif self.freq is not None:
|
|
# freq can be preserved on edge cases
|
|
if self.size:
|
|
if item is NaT:
|
|
pass
|
|
elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]:
|
|
freq = self.freq
|
|
elif (loc == len(self)) and item - self.freq == self[-1]:
|
|
freq = self.freq
|
|
else:
|
|
# Adding a single item to an empty index may preserve freq
|
|
if self.freq.is_on_offset(item):
|
|
freq = self.freq
|
|
return freq
|
|
|
|
@doc(NDArrayBackedExtensionIndex.delete)
|
|
def delete(self, loc):
|
|
result = super().delete(loc)
|
|
result._data._freq = self._get_delete_freq(loc)
|
|
return result
|
|
|
|
@doc(NDArrayBackedExtensionIndex.insert)
|
|
def insert(self, loc: int, item):
|
|
result = super().insert(loc, item)
|
|
|
|
result._data._freq = self._get_insert_freq(loc, item)
|
|
return result
|
|
|
|
# --------------------------------------------------------------------
|
|
# Join/Set Methods
|
|
|
|
def _can_union_without_object_cast(self, other) -> bool:
|
|
return is_dtype_equal(self.dtype, other.dtype)
|
|
|
|
def _get_join_freq(self, other):
|
|
"""
|
|
Get the freq to attach to the result of a join operation.
|
|
"""
|
|
if is_period_dtype(self.dtype):
|
|
freq = self.freq
|
|
else:
|
|
self = cast(DatetimeTimedeltaMixin, self)
|
|
freq = self.freq if self._can_fast_union(other) else None
|
|
return freq
|
|
|
|
def _wrap_joined_index(self, joined: np.ndarray, other):
|
|
assert other.dtype == self.dtype, (other.dtype, self.dtype)
|
|
|
|
result = super()._wrap_joined_index(joined, other)
|
|
result._data._freq = self._get_join_freq(other)
|
|
return result
|
|
|
|
@doc(Index._convert_arr_indexer)
|
|
def _convert_arr_indexer(self, keyarr):
|
|
try:
|
|
return self._data._validate_listlike(keyarr, allow_object=True)
|
|
except (ValueError, TypeError):
|
|
return com.asarray_tuplesafe(keyarr)
|
|
|
|
|
|
class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index):
|
|
"""
|
|
Mixin class for methods shared by DatetimeIndex and TimedeltaIndex,
|
|
but not PeriodIndex
|
|
"""
|
|
|
|
# Compat for frequency inference, see GH#23789
|
|
_is_monotonic_increasing = Index.is_monotonic_increasing
|
|
_is_monotonic_decreasing = Index.is_monotonic_decreasing
|
|
_is_unique = Index.is_unique
|
|
|
|
def _with_freq(self, freq):
|
|
arr = self._data._with_freq(freq)
|
|
return type(self)._simple_new(arr, name=self.name)
|
|
|
|
@property
|
|
def _has_complex_internals(self) -> bool:
|
|
# used to avoid libreduction code paths, which raise or require conversion
|
|
return False
|
|
|
|
def is_type_compatible(self, kind: str) -> bool:
|
|
return kind in self._data._infer_matches
|
|
|
|
# --------------------------------------------------------------------
|
|
# Set Operation Methods
|
|
|
|
@Appender(Index.difference.__doc__)
|
|
def difference(self, other, sort=None):
|
|
new_idx = super().difference(other, sort=sort)._with_freq(None)
|
|
return new_idx
|
|
|
|
def intersection(self, other, sort=False):
|
|
"""
|
|
Specialized intersection for DatetimeIndex/TimedeltaIndex.
|
|
|
|
May be much faster than Index.intersection
|
|
|
|
Parameters
|
|
----------
|
|
other : Same type as self or array-like
|
|
sort : False or None, default False
|
|
Sort the resulting index if possible.
|
|
|
|
.. versionadded:: 0.24.0
|
|
|
|
.. versionchanged:: 0.24.1
|
|
|
|
Changed the default to ``False`` to match the behaviour
|
|
from before 0.24.0.
|
|
|
|
.. versionchanged:: 0.25.0
|
|
|
|
The `sort` keyword is added
|
|
|
|
Returns
|
|
-------
|
|
y : Index or same type as self
|
|
"""
|
|
self._validate_sort_keyword(sort)
|
|
self._assert_can_do_setop(other)
|
|
other, _ = self._convert_can_do_setop(other)
|
|
|
|
if self.equals(other):
|
|
if self.has_duplicates:
|
|
return self.unique()._get_reconciled_name_object(other)
|
|
return self._get_reconciled_name_object(other)
|
|
|
|
return self._intersection(other, sort=sort)
|
|
|
|
def _intersection(self, other: Index, sort=False) -> Index:
|
|
"""
|
|
intersection specialized to the case with matching dtypes.
|
|
"""
|
|
if len(self) == 0:
|
|
return self.copy()._get_reconciled_name_object(other)
|
|
if len(other) == 0:
|
|
return other.copy()._get_reconciled_name_object(self)
|
|
|
|
if not isinstance(other, type(self)):
|
|
result = Index.intersection(self, other, sort=sort)
|
|
return result
|
|
|
|
elif not self._can_fast_intersect(other):
|
|
result = Index._intersection(self, other, sort=sort)
|
|
# We need to invalidate the freq because Index._intersection
|
|
# uses _shallow_copy on a view of self._data, which will preserve
|
|
# self.freq if we're not careful.
|
|
result = self._wrap_setop_result(other, result)
|
|
return result._with_freq(None)._with_freq("infer")
|
|
|
|
# to make our life easier, "sort" the two ranges
|
|
if self[0] <= other[0]:
|
|
left, right = self, other
|
|
else:
|
|
left, right = other, self
|
|
|
|
# after sorting, the intersection always starts with the right index
|
|
# and ends with the index of which the last elements is smallest
|
|
end = min(left[-1], right[-1])
|
|
start = right[0]
|
|
|
|
if end < start:
|
|
result = self[:0]
|
|
else:
|
|
lslice = slice(*left.slice_locs(start, end))
|
|
left_chunk = left._values[lslice]
|
|
# error: Argument 1 to "_simple_new" of "DatetimeIndexOpsMixin" has
|
|
# incompatible type "Union[ExtensionArray, Any]"; expected
|
|
# "Union[DatetimeArray, TimedeltaArray, PeriodArray]" [arg-type]
|
|
result = type(self)._simple_new(left_chunk) # type: ignore[arg-type]
|
|
|
|
return self._wrap_setop_result(other, result)
|
|
|
|
def _can_fast_intersect(self: _T, other: _T) -> bool:
|
|
if self.freq is None:
|
|
return False
|
|
|
|
elif other.freq != self.freq:
|
|
return False
|
|
|
|
elif not self.is_monotonic_increasing:
|
|
# Because freq is not None, we must then be monotonic decreasing
|
|
return False
|
|
|
|
elif self.freq.is_anchored():
|
|
# this along with matching freqs ensure that we "line up",
|
|
# so intersection will preserve freq
|
|
return True
|
|
|
|
elif isinstance(self.freq, Tick):
|
|
# We "line up" if and only if the difference between two of our points
|
|
# is a multiple of our freq
|
|
diff = self[0] - other[0]
|
|
remainder = diff % self.freq.delta
|
|
return remainder == Timedelta(0)
|
|
|
|
return True
|
|
|
|
def _can_fast_union(self: _T, other: _T) -> bool:
|
|
# Assumes that type(self) == type(other), as per the annotation
|
|
# The ability to fast_union also implies that `freq` should be
|
|
# retained on union.
|
|
if not isinstance(other, type(self)):
|
|
return False
|
|
|
|
freq = self.freq
|
|
|
|
if freq is None or freq != other.freq:
|
|
return False
|
|
|
|
if not self.is_monotonic_increasing:
|
|
# Because freq is not None, we must then be monotonic decreasing
|
|
# TODO: do union on the reversed indexes?
|
|
return False
|
|
|
|
if len(self) == 0 or len(other) == 0:
|
|
return True
|
|
|
|
# to make our life easier, "sort" the two ranges
|
|
if self[0] <= other[0]:
|
|
left, right = self, other
|
|
else:
|
|
left, right = other, self
|
|
|
|
right_start = right[0]
|
|
left_end = left[-1]
|
|
|
|
# Only need to "adjoin", not overlap
|
|
return (right_start == left_end + freq) or right_start in left
|
|
|
|
def _fast_union(self, other, sort=None):
|
|
if len(other) == 0:
|
|
return self.view(type(self))
|
|
|
|
if len(self) == 0:
|
|
return other.view(type(self))
|
|
|
|
# to make our life easier, "sort" the two ranges
|
|
if self[0] <= other[0]:
|
|
left, right = self, other
|
|
elif sort is False:
|
|
# TDIs are not in the "correct" order and we don't want
|
|
# to sort but want to remove overlaps
|
|
left, right = self, other
|
|
left_start = left[0]
|
|
loc = right.searchsorted(left_start, side="left")
|
|
right_chunk = right._values[:loc]
|
|
dates = concat_compat((left._values, right_chunk))
|
|
# With sort being False, we can't infer that result.freq == self.freq
|
|
# TODO: no tests rely on the _with_freq("infer"); needed?
|
|
result = self._shallow_copy(dates)._with_freq("infer")
|
|
return result
|
|
else:
|
|
left, right = other, self
|
|
|
|
left_end = left[-1]
|
|
right_end = right[-1]
|
|
|
|
# concatenate
|
|
if left_end < right_end:
|
|
loc = right.searchsorted(left_end, side="right")
|
|
right_chunk = right._values[loc:]
|
|
dates = concat_compat([left._values, right_chunk])
|
|
# The can_fast_union check ensures that the result.freq
|
|
# should match self.freq
|
|
dates = type(self._data)(dates, freq=self.freq)
|
|
result = type(self)._simple_new(dates)
|
|
return result
|
|
else:
|
|
return left
|
|
|
|
def _union(self, other, sort):
|
|
if not len(other) or self.equals(other) or not len(self):
|
|
return super()._union(other, sort=sort)
|
|
|
|
# We are called by `union`, which is responsible for this validation
|
|
assert isinstance(other, type(self))
|
|
|
|
this, other = self._maybe_utc_convert(other)
|
|
|
|
if this._can_fast_union(other):
|
|
result = this._fast_union(other, sort=sort)
|
|
if sort is None:
|
|
# In the case where sort is None, _can_fast_union
|
|
# implies that result.freq should match self.freq
|
|
assert result.freq == self.freq, (result.freq, self.freq)
|
|
elif result.freq is None:
|
|
# TODO: no tests rely on this; needed?
|
|
result = result._with_freq("infer")
|
|
return result
|
|
else:
|
|
i8self = Int64Index._simple_new(self.asi8)
|
|
i8other = Int64Index._simple_new(other.asi8)
|
|
i8result = i8self._union(i8other, sort=sort)
|
|
# pandas\core\indexes\datetimelike.py:887: error: Unexpected
|
|
# keyword argument "freq" for "DatetimeTimedeltaMixin" [call-arg]
|
|
result = type(self)(
|
|
i8result, dtype=self.dtype, freq="infer" # type: ignore[call-arg]
|
|
)
|
|
return result
|
|
|
|
# --------------------------------------------------------------------
|
|
# Join Methods
|
|
_join_precedence = 10
|
|
|
|
_inner_indexer = _join_i8_wrapper(libjoin.inner_join_indexer)
|
|
_outer_indexer = _join_i8_wrapper(libjoin.outer_join_indexer)
|
|
_left_indexer = _join_i8_wrapper(libjoin.left_join_indexer)
|
|
_left_indexer_unique = _join_i8_wrapper(
|
|
libjoin.left_join_indexer_unique, with_indexers=False
|
|
)
|
|
|
|
def join(
|
|
self, other, how: str = "left", level=None, return_indexers=False, sort=False
|
|
):
|
|
"""
|
|
See Index.join
|
|
"""
|
|
pself, pother = self._maybe_promote(other)
|
|
if pself is not self or pother is not other:
|
|
return pself.join(
|
|
pother, how=how, level=level, return_indexers=return_indexers, sort=sort
|
|
)
|
|
|
|
this, other = self._maybe_utc_convert(other)
|
|
return Index.join(
|
|
this,
|
|
other,
|
|
how=how,
|
|
level=level,
|
|
return_indexers=return_indexers,
|
|
sort=sort,
|
|
)
|
|
|
|
def _maybe_utc_convert(self: _T, other: Index) -> Tuple[_T, Index]:
|
|
# Overridden by DatetimeIndex
|
|
return self, other
|
|
|
|
# --------------------------------------------------------------------
|
|
# List-Like Methods
|
|
|
|
@Appender(DatetimeIndexOpsMixin.insert.__doc__)
|
|
def insert(self, loc, item):
|
|
if isinstance(item, str):
|
|
# TODO: Why are strings special?
|
|
# TODO: Should we attempt _scalar_from_string?
|
|
return self.astype(object).insert(loc, item)
|
|
|
|
return DatetimeIndexOpsMixin.insert(self, loc, item)
|