403 lines
12 KiB
Python
403 lines
12 KiB
Python
"""
|
|
Shared methods for Index subclasses backed by ExtensionArray.
|
|
"""
|
|
from typing import List, Optional, TypeVar
|
|
|
|
import numpy as np
|
|
|
|
from pandas._libs import lib
|
|
from pandas._typing import Label
|
|
from pandas.compat.numpy import function as nv
|
|
from pandas.errors import AbstractMethodError
|
|
from pandas.util._decorators import cache_readonly, doc
|
|
|
|
from pandas.core.dtypes.common import is_dtype_equal, is_object_dtype
|
|
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
|
|
|
|
from pandas.core.arrays import ExtensionArray
|
|
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
|
|
from pandas.core.indexers import deprecate_ndim_indexing
|
|
from pandas.core.indexes.base import Index
|
|
from pandas.core.ops import get_op_result_name
|
|
|
|
_T = TypeVar("_T", bound="NDArrayBackedExtensionIndex")
|
|
|
|
|
|
def inherit_from_data(name: str, delegate, cache: bool = False, wrap: bool = False):
|
|
"""
|
|
Make an alias for a method of the underlying ExtensionArray.
|
|
|
|
Parameters
|
|
----------
|
|
name : str
|
|
Name of an attribute the class should inherit from its EA parent.
|
|
delegate : class
|
|
cache : bool, default False
|
|
Whether to convert wrapped properties into cache_readonly
|
|
wrap : bool, default False
|
|
Whether to wrap the inherited result in an Index.
|
|
|
|
Returns
|
|
-------
|
|
attribute, method, property, or cache_readonly
|
|
"""
|
|
attr = getattr(delegate, name)
|
|
|
|
if isinstance(attr, property):
|
|
if cache:
|
|
|
|
def cached(self):
|
|
return getattr(self._data, name)
|
|
|
|
cached.__name__ = name
|
|
cached.__doc__ = attr.__doc__
|
|
method = cache_readonly(cached)
|
|
|
|
else:
|
|
|
|
def fget(self):
|
|
result = getattr(self._data, name)
|
|
if wrap:
|
|
if isinstance(result, type(self._data)):
|
|
return type(self)._simple_new(result, name=self.name)
|
|
elif isinstance(result, ABCDataFrame):
|
|
return result.set_index(self)
|
|
return Index(result, name=self.name)
|
|
return result
|
|
|
|
def fset(self, value):
|
|
setattr(self._data, name, value)
|
|
|
|
fget.__name__ = name
|
|
fget.__doc__ = attr.__doc__
|
|
|
|
method = property(fget, fset)
|
|
|
|
elif not callable(attr):
|
|
# just a normal attribute, no wrapping
|
|
method = attr
|
|
|
|
else:
|
|
|
|
def method(self, *args, **kwargs):
|
|
result = attr(self._data, *args, **kwargs)
|
|
if wrap:
|
|
if isinstance(result, type(self._data)):
|
|
return type(self)._simple_new(result, name=self.name)
|
|
elif isinstance(result, ABCDataFrame):
|
|
return result.set_index(self)
|
|
return Index(result, name=self.name)
|
|
return result
|
|
|
|
method.__name__ = name
|
|
method.__doc__ = attr.__doc__
|
|
return method
|
|
|
|
|
|
def inherit_names(names: List[str], delegate, cache: bool = False, wrap: bool = False):
|
|
"""
|
|
Class decorator to pin attributes from an ExtensionArray to a Index subclass.
|
|
|
|
Parameters
|
|
----------
|
|
names : List[str]
|
|
delegate : class
|
|
cache : bool, default False
|
|
wrap : bool, default False
|
|
Whether to wrap the inherited result in an Index.
|
|
"""
|
|
|
|
def wrapper(cls):
|
|
for name in names:
|
|
meth = inherit_from_data(name, delegate, cache=cache, wrap=wrap)
|
|
setattr(cls, name, meth)
|
|
|
|
return cls
|
|
|
|
return wrapper
|
|
|
|
|
|
def _make_wrapped_comparison_op(opname: str):
|
|
"""
|
|
Create a comparison method that dispatches to ``._data``.
|
|
"""
|
|
|
|
def wrapper(self, other):
|
|
if isinstance(other, ABCSeries):
|
|
# the arrays defer to Series for comparison ops but the indexes
|
|
# don't, so we have to unwrap here.
|
|
other = other._values
|
|
|
|
other = _maybe_unwrap_index(other)
|
|
|
|
op = getattr(self._data, opname)
|
|
return op(other)
|
|
|
|
wrapper.__name__ = opname
|
|
return wrapper
|
|
|
|
|
|
def make_wrapped_arith_op(opname: str):
|
|
def method(self, other):
|
|
if (
|
|
isinstance(other, Index)
|
|
and is_object_dtype(other.dtype)
|
|
and type(other) is not Index
|
|
):
|
|
# We return NotImplemented for object-dtype index *subclasses* so they have
|
|
# a chance to implement ops before we unwrap them.
|
|
# See https://github.com/pandas-dev/pandas/issues/31109
|
|
return NotImplemented
|
|
meth = getattr(self._data, opname)
|
|
result = meth(_maybe_unwrap_index(other))
|
|
return _wrap_arithmetic_op(self, other, result)
|
|
|
|
method.__name__ = opname
|
|
return method
|
|
|
|
|
|
def _wrap_arithmetic_op(self, other, result):
|
|
if result is NotImplemented:
|
|
return NotImplemented
|
|
|
|
if isinstance(result, tuple):
|
|
# divmod, rdivmod
|
|
assert len(result) == 2
|
|
return (
|
|
_wrap_arithmetic_op(self, other, result[0]),
|
|
_wrap_arithmetic_op(self, other, result[1]),
|
|
)
|
|
|
|
if not isinstance(result, Index):
|
|
# Index.__new__ will choose appropriate subclass for dtype
|
|
result = Index(result)
|
|
|
|
res_name = get_op_result_name(self, other)
|
|
result.name = res_name
|
|
return result
|
|
|
|
|
|
def _maybe_unwrap_index(obj):
|
|
"""
|
|
If operating against another Index object, we need to unwrap the underlying
|
|
data before deferring to the DatetimeArray/TimedeltaArray/PeriodArray
|
|
implementation, otherwise we will incorrectly return NotImplemented.
|
|
|
|
Parameters
|
|
----------
|
|
obj : object
|
|
|
|
Returns
|
|
-------
|
|
unwrapped object
|
|
"""
|
|
if isinstance(obj, Index):
|
|
return obj._data
|
|
return obj
|
|
|
|
|
|
class ExtensionIndex(Index):
|
|
"""
|
|
Index subclass for indexes backed by ExtensionArray.
|
|
"""
|
|
|
|
# The base class already passes through to _data:
|
|
# size, __len__, dtype
|
|
|
|
_data: ExtensionArray
|
|
|
|
__eq__ = _make_wrapped_comparison_op("__eq__")
|
|
__ne__ = _make_wrapped_comparison_op("__ne__")
|
|
__lt__ = _make_wrapped_comparison_op("__lt__")
|
|
__gt__ = _make_wrapped_comparison_op("__gt__")
|
|
__le__ = _make_wrapped_comparison_op("__le__")
|
|
__ge__ = _make_wrapped_comparison_op("__ge__")
|
|
|
|
@doc(Index._shallow_copy)
|
|
def _shallow_copy(
|
|
self, values: Optional[ExtensionArray] = None, name: Label = lib.no_default
|
|
):
|
|
name = self.name if name is lib.no_default else name
|
|
|
|
if values is not None:
|
|
return self._simple_new(values, name=name)
|
|
|
|
result = self._simple_new(self._data, name=name)
|
|
result._cache = self._cache
|
|
return result
|
|
|
|
@property
|
|
def _has_complex_internals(self) -> bool:
|
|
# used to avoid libreduction code paths, which raise or require conversion
|
|
return True
|
|
|
|
# ---------------------------------------------------------------------
|
|
# NDarray-Like Methods
|
|
|
|
def __getitem__(self, key):
|
|
result = self._data[key]
|
|
if isinstance(result, type(self._data)):
|
|
if result.ndim == 1:
|
|
return type(self)(result, name=self.name)
|
|
# Unpack to ndarray for MPL compat
|
|
# pandas\core\indexes\extension.py:220: error: "ExtensionArray" has
|
|
# no attribute "_data" [attr-defined]
|
|
result = result._data # type: ignore[attr-defined]
|
|
|
|
# Includes cases where we get a 2D ndarray back for MPL compat
|
|
deprecate_ndim_indexing(result)
|
|
return result
|
|
|
|
def searchsorted(self, value, side="left", sorter=None) -> np.ndarray:
|
|
# overriding IndexOpsMixin improves performance GH#38083
|
|
return self._data.searchsorted(value, side=side, sorter=sorter)
|
|
|
|
# ---------------------------------------------------------------------
|
|
|
|
def _check_indexing_method(self, method):
|
|
"""
|
|
Raise if we have a get_indexer `method` that is not supported or valid.
|
|
"""
|
|
# GH#37871 for now this is only for IntervalIndex and CategoricalIndex
|
|
if method is None:
|
|
return
|
|
|
|
if method in ["bfill", "backfill", "pad", "ffill", "nearest"]:
|
|
raise NotImplementedError(
|
|
f"method {method} not yet implemented for {type(self).__name__}"
|
|
)
|
|
|
|
raise ValueError("Invalid fill method")
|
|
|
|
def _get_engine_target(self) -> np.ndarray:
|
|
return np.asarray(self._data)
|
|
|
|
def repeat(self, repeats, axis=None):
|
|
nv.validate_repeat((), {"axis": axis})
|
|
result = self._data.repeat(repeats, axis=axis)
|
|
return type(self)._simple_new(result, name=self.name)
|
|
|
|
def insert(self, loc: int, item):
|
|
# ExtensionIndex subclasses must override Index.insert
|
|
raise AbstractMethodError(self)
|
|
|
|
def _get_unique_index(self, dropna=False):
|
|
if self.is_unique and not dropna:
|
|
return self
|
|
|
|
result = self._data.unique()
|
|
if dropna and self.hasnans:
|
|
result = result[~result.isna()]
|
|
return self._shallow_copy(result)
|
|
|
|
@doc(Index.map)
|
|
def map(self, mapper, na_action=None):
|
|
# Try to run function on index first, and then on elements of index
|
|
# Especially important for group-by functionality
|
|
try:
|
|
result = mapper(self)
|
|
|
|
# Try to use this result if we can
|
|
if isinstance(result, np.ndarray):
|
|
result = Index(result)
|
|
|
|
if not isinstance(result, Index):
|
|
raise TypeError("The map function must return an Index object")
|
|
return result
|
|
except Exception:
|
|
return self.astype(object).map(mapper)
|
|
|
|
@doc(Index.astype)
|
|
def astype(self, dtype, copy=True):
|
|
if is_dtype_equal(self.dtype, dtype) and copy is False:
|
|
# Ensure that self.astype(self.dtype) is self
|
|
return self
|
|
|
|
new_values = self._data.astype(dtype, copy=copy)
|
|
|
|
# pass copy=False because any copying will be done in the
|
|
# _data.astype call above
|
|
return Index(new_values, dtype=new_values.dtype, name=self.name, copy=False)
|
|
|
|
@cache_readonly
|
|
def _isnan(self) -> np.ndarray:
|
|
return self._data.isna()
|
|
|
|
@doc(Index.equals)
|
|
def equals(self, other) -> bool:
|
|
# Dispatch to the ExtensionArray's .equals method.
|
|
if self.is_(other):
|
|
return True
|
|
|
|
if not isinstance(other, type(self)):
|
|
return False
|
|
|
|
return self._data.equals(other._data)
|
|
|
|
|
|
class NDArrayBackedExtensionIndex(ExtensionIndex):
|
|
"""
|
|
Index subclass for indexes backed by NDArrayBackedExtensionArray.
|
|
"""
|
|
|
|
_data: NDArrayBackedExtensionArray
|
|
|
|
def _get_engine_target(self) -> np.ndarray:
|
|
return self._data._ndarray
|
|
|
|
def delete(self, loc):
|
|
"""
|
|
Make new Index with passed location(-s) deleted
|
|
|
|
Returns
|
|
-------
|
|
new_index : Index
|
|
"""
|
|
new_vals = np.delete(self._data._ndarray, loc)
|
|
arr = self._data._from_backing_data(new_vals)
|
|
return type(self)._simple_new(arr, name=self.name)
|
|
|
|
def insert(self, loc: int, item):
|
|
"""
|
|
Make new Index inserting new item at location. Follows
|
|
Python list.append semantics for negative values.
|
|
|
|
Parameters
|
|
----------
|
|
loc : int
|
|
item : object
|
|
|
|
Returns
|
|
-------
|
|
new_index : Index
|
|
|
|
Raises
|
|
------
|
|
ValueError if the item is not valid for this dtype.
|
|
"""
|
|
arr = self._data
|
|
code = arr._validate_scalar(item)
|
|
|
|
new_vals = np.concatenate((arr._ndarray[:loc], [code], arr._ndarray[loc:]))
|
|
new_arr = arr._from_backing_data(new_vals)
|
|
return type(self)._simple_new(new_arr, name=self.name)
|
|
|
|
@doc(Index.where)
|
|
def where(self, cond, other=None):
|
|
res_values = self._data.where(cond, other)
|
|
return type(self)._simple_new(res_values, name=self.name)
|
|
|
|
def putmask(self, mask, value):
|
|
res_values = self._data.copy()
|
|
try:
|
|
res_values.putmask(mask, value)
|
|
except (TypeError, ValueError):
|
|
return self.astype(object).putmask(mask, value)
|
|
|
|
return type(self)._simple_new(res_values, name=self.name)
|
|
|
|
def _wrap_joined_index(self: _T, joined: np.ndarray, other: _T) -> _T:
|
|
name = get_op_result_name(self, other)
|
|
arr = self._data._from_backing_data(joined)
|
|
return type(self)._simple_new(arr, name=name)
|