projektAI/venv/Lib/site-packages/pandas/core/indexes/extension.py

403 lines
12 KiB
Python
Raw Normal View History

2021-06-06 22:13:05 +02:00
"""
Shared methods for Index subclasses backed by ExtensionArray.
"""
from typing import List, Optional, TypeVar
import numpy as np
from pandas._libs import lib
from pandas._typing import Label
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
from pandas.util._decorators import cache_readonly, doc
from pandas.core.dtypes.common import is_dtype_equal, is_object_dtype
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
from pandas.core.arrays import ExtensionArray
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
from pandas.core.indexers import deprecate_ndim_indexing
from pandas.core.indexes.base import Index
from pandas.core.ops import get_op_result_name
_T = TypeVar("_T", bound="NDArrayBackedExtensionIndex")
def inherit_from_data(name: str, delegate, cache: bool = False, wrap: bool = False):
"""
Make an alias for a method of the underlying ExtensionArray.
Parameters
----------
name : str
Name of an attribute the class should inherit from its EA parent.
delegate : class
cache : bool, default False
Whether to convert wrapped properties into cache_readonly
wrap : bool, default False
Whether to wrap the inherited result in an Index.
Returns
-------
attribute, method, property, or cache_readonly
"""
attr = getattr(delegate, name)
if isinstance(attr, property):
if cache:
def cached(self):
return getattr(self._data, name)
cached.__name__ = name
cached.__doc__ = attr.__doc__
method = cache_readonly(cached)
else:
def fget(self):
result = getattr(self._data, name)
if wrap:
if isinstance(result, type(self._data)):
return type(self)._simple_new(result, name=self.name)
elif isinstance(result, ABCDataFrame):
return result.set_index(self)
return Index(result, name=self.name)
return result
def fset(self, value):
setattr(self._data, name, value)
fget.__name__ = name
fget.__doc__ = attr.__doc__
method = property(fget, fset)
elif not callable(attr):
# just a normal attribute, no wrapping
method = attr
else:
def method(self, *args, **kwargs):
result = attr(self._data, *args, **kwargs)
if wrap:
if isinstance(result, type(self._data)):
return type(self)._simple_new(result, name=self.name)
elif isinstance(result, ABCDataFrame):
return result.set_index(self)
return Index(result, name=self.name)
return result
method.__name__ = name
method.__doc__ = attr.__doc__
return method
def inherit_names(names: List[str], delegate, cache: bool = False, wrap: bool = False):
"""
Class decorator to pin attributes from an ExtensionArray to a Index subclass.
Parameters
----------
names : List[str]
delegate : class
cache : bool, default False
wrap : bool, default False
Whether to wrap the inherited result in an Index.
"""
def wrapper(cls):
for name in names:
meth = inherit_from_data(name, delegate, cache=cache, wrap=wrap)
setattr(cls, name, meth)
return cls
return wrapper
def _make_wrapped_comparison_op(opname: str):
"""
Create a comparison method that dispatches to ``._data``.
"""
def wrapper(self, other):
if isinstance(other, ABCSeries):
# the arrays defer to Series for comparison ops but the indexes
# don't, so we have to unwrap here.
other = other._values
other = _maybe_unwrap_index(other)
op = getattr(self._data, opname)
return op(other)
wrapper.__name__ = opname
return wrapper
def make_wrapped_arith_op(opname: str):
def method(self, other):
if (
isinstance(other, Index)
and is_object_dtype(other.dtype)
and type(other) is not Index
):
# We return NotImplemented for object-dtype index *subclasses* so they have
# a chance to implement ops before we unwrap them.
# See https://github.com/pandas-dev/pandas/issues/31109
return NotImplemented
meth = getattr(self._data, opname)
result = meth(_maybe_unwrap_index(other))
return _wrap_arithmetic_op(self, other, result)
method.__name__ = opname
return method
def _wrap_arithmetic_op(self, other, result):
if result is NotImplemented:
return NotImplemented
if isinstance(result, tuple):
# divmod, rdivmod
assert len(result) == 2
return (
_wrap_arithmetic_op(self, other, result[0]),
_wrap_arithmetic_op(self, other, result[1]),
)
if not isinstance(result, Index):
# Index.__new__ will choose appropriate subclass for dtype
result = Index(result)
res_name = get_op_result_name(self, other)
result.name = res_name
return result
def _maybe_unwrap_index(obj):
"""
If operating against another Index object, we need to unwrap the underlying
data before deferring to the DatetimeArray/TimedeltaArray/PeriodArray
implementation, otherwise we will incorrectly return NotImplemented.
Parameters
----------
obj : object
Returns
-------
unwrapped object
"""
if isinstance(obj, Index):
return obj._data
return obj
class ExtensionIndex(Index):
"""
Index subclass for indexes backed by ExtensionArray.
"""
# The base class already passes through to _data:
# size, __len__, dtype
_data: ExtensionArray
__eq__ = _make_wrapped_comparison_op("__eq__")
__ne__ = _make_wrapped_comparison_op("__ne__")
__lt__ = _make_wrapped_comparison_op("__lt__")
__gt__ = _make_wrapped_comparison_op("__gt__")
__le__ = _make_wrapped_comparison_op("__le__")
__ge__ = _make_wrapped_comparison_op("__ge__")
@doc(Index._shallow_copy)
def _shallow_copy(
self, values: Optional[ExtensionArray] = None, name: Label = lib.no_default
):
name = self.name if name is lib.no_default else name
if values is not None:
return self._simple_new(values, name=name)
result = self._simple_new(self._data, name=name)
result._cache = self._cache
return result
@property
def _has_complex_internals(self) -> bool:
# used to avoid libreduction code paths, which raise or require conversion
return True
# ---------------------------------------------------------------------
# NDarray-Like Methods
def __getitem__(self, key):
result = self._data[key]
if isinstance(result, type(self._data)):
if result.ndim == 1:
return type(self)(result, name=self.name)
# Unpack to ndarray for MPL compat
# pandas\core\indexes\extension.py:220: error: "ExtensionArray" has
# no attribute "_data" [attr-defined]
result = result._data # type: ignore[attr-defined]
# Includes cases where we get a 2D ndarray back for MPL compat
deprecate_ndim_indexing(result)
return result
def searchsorted(self, value, side="left", sorter=None) -> np.ndarray:
# overriding IndexOpsMixin improves performance GH#38083
return self._data.searchsorted(value, side=side, sorter=sorter)
# ---------------------------------------------------------------------
def _check_indexing_method(self, method):
"""
Raise if we have a get_indexer `method` that is not supported or valid.
"""
# GH#37871 for now this is only for IntervalIndex and CategoricalIndex
if method is None:
return
if method in ["bfill", "backfill", "pad", "ffill", "nearest"]:
raise NotImplementedError(
f"method {method} not yet implemented for {type(self).__name__}"
)
raise ValueError("Invalid fill method")
def _get_engine_target(self) -> np.ndarray:
return np.asarray(self._data)
def repeat(self, repeats, axis=None):
nv.validate_repeat((), {"axis": axis})
result = self._data.repeat(repeats, axis=axis)
return type(self)._simple_new(result, name=self.name)
def insert(self, loc: int, item):
# ExtensionIndex subclasses must override Index.insert
raise AbstractMethodError(self)
def _get_unique_index(self, dropna=False):
if self.is_unique and not dropna:
return self
result = self._data.unique()
if dropna and self.hasnans:
result = result[~result.isna()]
return self._shallow_copy(result)
@doc(Index.map)
def map(self, mapper, na_action=None):
# Try to run function on index first, and then on elements of index
# Especially important for group-by functionality
try:
result = mapper(self)
# Try to use this result if we can
if isinstance(result, np.ndarray):
result = Index(result)
if not isinstance(result, Index):
raise TypeError("The map function must return an Index object")
return result
except Exception:
return self.astype(object).map(mapper)
@doc(Index.astype)
def astype(self, dtype, copy=True):
if is_dtype_equal(self.dtype, dtype) and copy is False:
# Ensure that self.astype(self.dtype) is self
return self
new_values = self._data.astype(dtype, copy=copy)
# pass copy=False because any copying will be done in the
# _data.astype call above
return Index(new_values, dtype=new_values.dtype, name=self.name, copy=False)
@cache_readonly
def _isnan(self) -> np.ndarray:
return self._data.isna()
@doc(Index.equals)
def equals(self, other) -> bool:
# Dispatch to the ExtensionArray's .equals method.
if self.is_(other):
return True
if not isinstance(other, type(self)):
return False
return self._data.equals(other._data)
class NDArrayBackedExtensionIndex(ExtensionIndex):
"""
Index subclass for indexes backed by NDArrayBackedExtensionArray.
"""
_data: NDArrayBackedExtensionArray
def _get_engine_target(self) -> np.ndarray:
return self._data._ndarray
def delete(self, loc):
"""
Make new Index with passed location(-s) deleted
Returns
-------
new_index : Index
"""
new_vals = np.delete(self._data._ndarray, loc)
arr = self._data._from_backing_data(new_vals)
return type(self)._simple_new(arr, name=self.name)
def insert(self, loc: int, item):
"""
Make new Index inserting new item at location. Follows
Python list.append semantics for negative values.
Parameters
----------
loc : int
item : object
Returns
-------
new_index : Index
Raises
------
ValueError if the item is not valid for this dtype.
"""
arr = self._data
code = arr._validate_scalar(item)
new_vals = np.concatenate((arr._ndarray[:loc], [code], arr._ndarray[loc:]))
new_arr = arr._from_backing_data(new_vals)
return type(self)._simple_new(new_arr, name=self.name)
@doc(Index.where)
def where(self, cond, other=None):
res_values = self._data.where(cond, other)
return type(self)._simple_new(res_values, name=self.name)
def putmask(self, mask, value):
res_values = self._data.copy()
try:
res_values.putmask(mask, value)
except (TypeError, ValueError):
return self.astype(object).putmask(mask, value)
return type(self)._simple_new(res_values, name=self.name)
def _wrap_joined_index(self: _T, joined: np.ndarray, other: _T) -> _T:
name = get_op_result_name(self, other)
arr = self._data._from_backing_data(joined)
return type(self)._simple_new(arr, name=name)