6212 lines
195 KiB
Python
6212 lines
195 KiB
Python
![]() |
from copy import copy as copy_func
|
||
|
from datetime import datetime
|
||
|
from itertools import zip_longest
|
||
|
import operator
|
||
|
from typing import (
|
||
|
TYPE_CHECKING,
|
||
|
Any,
|
||
|
Callable,
|
||
|
FrozenSet,
|
||
|
Hashable,
|
||
|
List,
|
||
|
NewType,
|
||
|
Optional,
|
||
|
Sequence,
|
||
|
Set,
|
||
|
Tuple,
|
||
|
TypeVar,
|
||
|
Union,
|
||
|
cast,
|
||
|
)
|
||
|
import warnings
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
from pandas._libs import algos as libalgos, index as libindex, lib
|
||
|
import pandas._libs.join as libjoin
|
||
|
from pandas._libs.lib import is_datetime_array, no_default
|
||
|
from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime, Timestamp
|
||
|
from pandas._libs.tslibs.timezones import tz_compare
|
||
|
from pandas._typing import AnyArrayLike, Dtype, DtypeObj, Label, Shape, final
|
||
|
from pandas.compat.numpy import function as nv
|
||
|
from pandas.errors import DuplicateLabelError, InvalidIndexError
|
||
|
from pandas.util._decorators import Appender, cache_readonly, doc
|
||
|
|
||
|
from pandas.core.dtypes.cast import (
|
||
|
find_common_type,
|
||
|
maybe_cast_to_integer_array,
|
||
|
validate_numeric_casting,
|
||
|
)
|
||
|
from pandas.core.dtypes.common import (
|
||
|
ensure_int64,
|
||
|
ensure_object,
|
||
|
ensure_platform_int,
|
||
|
is_bool_dtype,
|
||
|
is_categorical_dtype,
|
||
|
is_datetime64_any_dtype,
|
||
|
is_dtype_equal,
|
||
|
is_extension_array_dtype,
|
||
|
is_float,
|
||
|
is_float_dtype,
|
||
|
is_hashable,
|
||
|
is_integer,
|
||
|
is_integer_dtype,
|
||
|
is_interval_dtype,
|
||
|
is_iterator,
|
||
|
is_list_like,
|
||
|
is_object_dtype,
|
||
|
is_period_dtype,
|
||
|
is_scalar,
|
||
|
is_signed_integer_dtype,
|
||
|
is_timedelta64_dtype,
|
||
|
is_unsigned_integer_dtype,
|
||
|
needs_i8_conversion,
|
||
|
pandas_dtype,
|
||
|
validate_all_hashable,
|
||
|
)
|
||
|
from pandas.core.dtypes.concat import concat_compat
|
||
|
from pandas.core.dtypes.generic import (
|
||
|
ABCDatetimeIndex,
|
||
|
ABCMultiIndex,
|
||
|
ABCPandasArray,
|
||
|
ABCPeriodIndex,
|
||
|
ABCSeries,
|
||
|
ABCTimedeltaIndex,
|
||
|
)
|
||
|
from pandas.core.dtypes.missing import array_equivalent, isna
|
||
|
|
||
|
from pandas.core import missing, ops
|
||
|
from pandas.core.accessor import CachedAccessor
|
||
|
import pandas.core.algorithms as algos
|
||
|
from pandas.core.arrays import Categorical, ExtensionArray
|
||
|
from pandas.core.arrays.datetimes import tz_to_dtype, validate_tz_from_dtype
|
||
|
from pandas.core.base import IndexOpsMixin, PandasObject
|
||
|
import pandas.core.common as com
|
||
|
from pandas.core.construction import extract_array
|
||
|
from pandas.core.indexers import deprecate_ndim_indexing
|
||
|
from pandas.core.indexes.frozen import FrozenList
|
||
|
from pandas.core.ops import get_op_result_name
|
||
|
from pandas.core.ops.invalid import make_invalid_op
|
||
|
from pandas.core.sorting import ensure_key_mapped, nargsort
|
||
|
from pandas.core.strings import StringMethods
|
||
|
|
||
|
from pandas.io.formats.printing import (
|
||
|
PrettyDict,
|
||
|
default_pprint,
|
||
|
format_object_attrs,
|
||
|
format_object_summary,
|
||
|
pprint_thing,
|
||
|
)
|
||
|
|
||
|
if TYPE_CHECKING:
|
||
|
from pandas import MultiIndex, RangeIndex, Series
|
||
|
|
||
|
|
||
|
__all__ = ["Index"]
|
||
|
|
||
|
_unsortable_types = frozenset(("mixed", "mixed-integer"))
|
||
|
|
||
|
_index_doc_kwargs = {
|
||
|
"klass": "Index",
|
||
|
"inplace": "",
|
||
|
"target_klass": "Index",
|
||
|
"raises_section": "",
|
||
|
"unique": "Index",
|
||
|
"duplicated": "np.ndarray",
|
||
|
}
|
||
|
_index_shared_docs = {}
|
||
|
str_t = str
|
||
|
|
||
|
|
||
|
_o_dtype = np.dtype(object)
|
||
|
|
||
|
|
||
|
_Identity = NewType("_Identity", object)
|
||
|
|
||
|
|
||
|
def _new_Index(cls, d):
|
||
|
"""
|
||
|
This is called upon unpickling, rather than the default which doesn't
|
||
|
have arguments and breaks __new__.
|
||
|
"""
|
||
|
# required for backward compat, because PI can't be instantiated with
|
||
|
# ordinals through __new__ GH #13277
|
||
|
if issubclass(cls, ABCPeriodIndex):
|
||
|
from pandas.core.indexes.period import _new_PeriodIndex
|
||
|
|
||
|
return _new_PeriodIndex(cls, **d)
|
||
|
|
||
|
if issubclass(cls, ABCMultiIndex):
|
||
|
if "labels" in d and "codes" not in d:
|
||
|
# GH#23752 "labels" kwarg has been replaced with "codes"
|
||
|
d["codes"] = d.pop("labels")
|
||
|
|
||
|
return cls.__new__(cls, **d)
|
||
|
|
||
|
|
||
|
_IndexT = TypeVar("_IndexT", bound="Index")
|
||
|
|
||
|
|
||
|
class Index(IndexOpsMixin, PandasObject):
|
||
|
"""
|
||
|
Immutable sequence used for indexing and alignment. The basic object
|
||
|
storing axis labels for all pandas objects.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
data : array-like (1-dimensional)
|
||
|
dtype : NumPy dtype (default: object)
|
||
|
If dtype is None, we find the dtype that best fits the data.
|
||
|
If an actual dtype is provided, we coerce to that dtype if it's safe.
|
||
|
Otherwise, an error will be raised.
|
||
|
copy : bool
|
||
|
Make a copy of input ndarray.
|
||
|
name : object
|
||
|
Name to be stored in the index.
|
||
|
tupleize_cols : bool (default: True)
|
||
|
When True, attempt to create a MultiIndex if possible.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
RangeIndex : Index implementing a monotonic integer range.
|
||
|
CategoricalIndex : Index of :class:`Categorical` s.
|
||
|
MultiIndex : A multi-level, or hierarchical Index.
|
||
|
IntervalIndex : An Index of :class:`Interval` s.
|
||
|
DatetimeIndex : Index of datetime64 data.
|
||
|
TimedeltaIndex : Index of timedelta64 data.
|
||
|
PeriodIndex : Index of Period data.
|
||
|
Int64Index : A special case of :class:`Index` with purely integer labels.
|
||
|
UInt64Index : A special case of :class:`Index` with purely unsigned integer labels.
|
||
|
Float64Index : A special case of :class:`Index` with purely float labels.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
An Index instance can **only** contain hashable objects
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> pd.Index([1, 2, 3])
|
||
|
Int64Index([1, 2, 3], dtype='int64')
|
||
|
|
||
|
>>> pd.Index(list('abc'))
|
||
|
Index(['a', 'b', 'c'], dtype='object')
|
||
|
"""
|
||
|
|
||
|
# tolist is not actually deprecated, just suppressed in the __dir__
|
||
|
_hidden_attrs: FrozenSet[str] = (
|
||
|
PandasObject._hidden_attrs
|
||
|
| IndexOpsMixin._hidden_attrs
|
||
|
| frozenset(["contains", "set_value"])
|
||
|
)
|
||
|
|
||
|
# To hand over control to subclasses
|
||
|
_join_precedence = 1
|
||
|
|
||
|
# Cython methods; see github.com/cython/cython/issues/2647
|
||
|
# for why we need to wrap these instead of making them class attributes
|
||
|
# Moreover, cython will choose the appropriate-dtyped sub-function
|
||
|
# given the dtypes of the passed arguments
|
||
|
def _left_indexer_unique(self, left, right):
|
||
|
return libjoin.left_join_indexer_unique(left, right)
|
||
|
|
||
|
def _left_indexer(self, left, right):
|
||
|
return libjoin.left_join_indexer(left, right)
|
||
|
|
||
|
def _inner_indexer(self, left, right):
|
||
|
return libjoin.inner_join_indexer(left, right)
|
||
|
|
||
|
def _outer_indexer(self, left, right):
|
||
|
return libjoin.outer_join_indexer(left, right)
|
||
|
|
||
|
_typ = "index"
|
||
|
_data: Union[ExtensionArray, np.ndarray]
|
||
|
_id: Optional[_Identity] = None
|
||
|
_name: Label = None
|
||
|
# MultiIndex.levels previously allowed setting the index name. We
|
||
|
# don't allow this anymore, and raise if it happens rather than
|
||
|
# failing silently.
|
||
|
_no_setting_name: bool = False
|
||
|
_comparables = ["name"]
|
||
|
_attributes = ["name"]
|
||
|
_is_numeric_dtype = False
|
||
|
_can_hold_na = True
|
||
|
_can_hold_strings = True
|
||
|
|
||
|
# would we like our indexing holder to defer to us
|
||
|
_defer_to_indexing = False
|
||
|
|
||
|
_engine_type = libindex.ObjectEngine
|
||
|
# whether we support partial string indexing. Overridden
|
||
|
# in DatetimeIndex and PeriodIndex
|
||
|
_supports_partial_string_indexing = False
|
||
|
|
||
|
_accessors = {"str"}
|
||
|
|
||
|
str = CachedAccessor("str", StringMethods)
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Constructors
|
||
|
|
||
|
def __new__(
|
||
|
cls, data=None, dtype=None, copy=False, name=None, tupleize_cols=True, **kwargs
|
||
|
) -> "Index":
|
||
|
|
||
|
from pandas.core.indexes.range import RangeIndex
|
||
|
|
||
|
name = maybe_extract_name(name, data, cls)
|
||
|
|
||
|
if dtype is not None:
|
||
|
dtype = pandas_dtype(dtype)
|
||
|
if "tz" in kwargs:
|
||
|
tz = kwargs.pop("tz")
|
||
|
validate_tz_from_dtype(dtype, tz)
|
||
|
dtype = tz_to_dtype(tz)
|
||
|
|
||
|
if isinstance(data, ABCPandasArray):
|
||
|
# ensure users don't accidentally put a PandasArray in an index.
|
||
|
data = data.to_numpy()
|
||
|
|
||
|
data_dtype = getattr(data, "dtype", None)
|
||
|
|
||
|
# range
|
||
|
if isinstance(data, RangeIndex):
|
||
|
return RangeIndex(start=data, copy=copy, dtype=dtype, name=name)
|
||
|
elif isinstance(data, range):
|
||
|
return RangeIndex.from_range(data, dtype=dtype, name=name)
|
||
|
|
||
|
# categorical
|
||
|
elif is_categorical_dtype(data_dtype) or is_categorical_dtype(dtype):
|
||
|
# Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
|
||
|
from pandas.core.indexes.category import CategoricalIndex
|
||
|
|
||
|
return _maybe_asobject(dtype, CategoricalIndex, data, copy, name, **kwargs)
|
||
|
|
||
|
# interval
|
||
|
elif is_interval_dtype(data_dtype) or is_interval_dtype(dtype):
|
||
|
# Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
|
||
|
from pandas.core.indexes.interval import IntervalIndex
|
||
|
|
||
|
return _maybe_asobject(dtype, IntervalIndex, data, copy, name, **kwargs)
|
||
|
|
||
|
elif is_datetime64_any_dtype(data_dtype) or is_datetime64_any_dtype(dtype):
|
||
|
# Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
|
||
|
from pandas import DatetimeIndex
|
||
|
|
||
|
return _maybe_asobject(dtype, DatetimeIndex, data, copy, name, **kwargs)
|
||
|
|
||
|
elif is_timedelta64_dtype(data_dtype) or is_timedelta64_dtype(dtype):
|
||
|
# Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
|
||
|
from pandas import TimedeltaIndex
|
||
|
|
||
|
return _maybe_asobject(dtype, TimedeltaIndex, data, copy, name, **kwargs)
|
||
|
|
||
|
elif is_period_dtype(data_dtype) or is_period_dtype(dtype):
|
||
|
# Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
|
||
|
from pandas import PeriodIndex
|
||
|
|
||
|
return _maybe_asobject(dtype, PeriodIndex, data, copy, name, **kwargs)
|
||
|
|
||
|
# extension dtype
|
||
|
elif is_extension_array_dtype(data_dtype) or is_extension_array_dtype(dtype):
|
||
|
if not (dtype is None or is_object_dtype(dtype)):
|
||
|
# coerce to the provided dtype
|
||
|
ea_cls = dtype.construct_array_type()
|
||
|
data = ea_cls._from_sequence(data, dtype=dtype, copy=False)
|
||
|
else:
|
||
|
data = np.asarray(data, dtype=object)
|
||
|
|
||
|
# coerce to the object dtype
|
||
|
data = data.astype(object)
|
||
|
return Index(data, dtype=object, copy=copy, name=name, **kwargs)
|
||
|
|
||
|
# index-like
|
||
|
elif isinstance(data, (np.ndarray, Index, ABCSeries)):
|
||
|
# Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
|
||
|
from pandas.core.indexes.numeric import (
|
||
|
Float64Index,
|
||
|
Int64Index,
|
||
|
UInt64Index,
|
||
|
)
|
||
|
|
||
|
if dtype is not None:
|
||
|
# we need to avoid having numpy coerce
|
||
|
# things that look like ints/floats to ints unless
|
||
|
# they are actually ints, e.g. '0' and 0.0
|
||
|
# should not be coerced
|
||
|
# GH 11836
|
||
|
data = _maybe_cast_with_dtype(data, dtype, copy)
|
||
|
dtype = data.dtype # TODO: maybe not for object?
|
||
|
|
||
|
# maybe coerce to a sub-class
|
||
|
if is_signed_integer_dtype(data.dtype):
|
||
|
return Int64Index(data, copy=copy, dtype=dtype, name=name)
|
||
|
elif is_unsigned_integer_dtype(data.dtype):
|
||
|
return UInt64Index(data, copy=copy, dtype=dtype, name=name)
|
||
|
elif is_float_dtype(data.dtype):
|
||
|
return Float64Index(data, copy=copy, dtype=dtype, name=name)
|
||
|
elif issubclass(data.dtype.type, bool) or is_bool_dtype(data):
|
||
|
subarr = data.astype("object")
|
||
|
else:
|
||
|
subarr = com.asarray_tuplesafe(data, dtype=object)
|
||
|
|
||
|
# asarray_tuplesafe does not always copy underlying data,
|
||
|
# so need to make sure that this happens
|
||
|
if copy:
|
||
|
subarr = subarr.copy()
|
||
|
|
||
|
if dtype is None:
|
||
|
new_data, new_dtype = _maybe_cast_data_without_dtype(subarr)
|
||
|
if new_dtype is not None:
|
||
|
return cls(
|
||
|
new_data, dtype=new_dtype, copy=False, name=name, **kwargs
|
||
|
)
|
||
|
|
||
|
if kwargs:
|
||
|
raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}")
|
||
|
if subarr.ndim > 1:
|
||
|
# GH#13601, GH#20285, GH#27125
|
||
|
raise ValueError("Index data must be 1-dimensional")
|
||
|
return cls._simple_new(subarr, name)
|
||
|
|
||
|
elif data is None or is_scalar(data):
|
||
|
raise cls._scalar_data_error(data)
|
||
|
elif hasattr(data, "__array__"):
|
||
|
return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs)
|
||
|
else:
|
||
|
if tupleize_cols and is_list_like(data):
|
||
|
# GH21470: convert iterable to list before determining if empty
|
||
|
if is_iterator(data):
|
||
|
data = list(data)
|
||
|
|
||
|
if data and all(isinstance(e, tuple) for e in data):
|
||
|
# we must be all tuples, otherwise don't construct
|
||
|
# 10697
|
||
|
from pandas.core.indexes.multi import MultiIndex
|
||
|
|
||
|
return MultiIndex.from_tuples(
|
||
|
data, names=name or kwargs.get("names")
|
||
|
)
|
||
|
# other iterable of some kind
|
||
|
subarr = com.asarray_tuplesafe(data, dtype=object)
|
||
|
return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs)
|
||
|
|
||
|
"""
|
||
|
NOTE for new Index creation:
|
||
|
|
||
|
- _simple_new: It returns new Index with the same type as the caller.
|
||
|
All metadata (such as name) must be provided by caller's responsibility.
|
||
|
Using _shallow_copy is recommended because it fills these metadata
|
||
|
otherwise specified.
|
||
|
|
||
|
- _shallow_copy: It returns new Index with the same type (using
|
||
|
_simple_new), but fills caller's metadata otherwise specified. Passed
|
||
|
kwargs will overwrite corresponding metadata.
|
||
|
|
||
|
See each method's docstring.
|
||
|
"""
|
||
|
|
||
|
@property
|
||
|
def asi8(self):
|
||
|
"""
|
||
|
Integer representation of the values.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
ndarray
|
||
|
An ndarray with int64 dtype.
|
||
|
"""
|
||
|
warnings.warn(
|
||
|
"Index.asi8 is deprecated and will be removed in a future version",
|
||
|
FutureWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
return None
|
||
|
|
||
|
@classmethod
|
||
|
def _simple_new(cls, values, name: Label = None):
|
||
|
"""
|
||
|
We require that we have a dtype compat for the values. If we are passed
|
||
|
a non-dtype compat, then coerce using the constructor.
|
||
|
|
||
|
Must be careful not to recurse.
|
||
|
"""
|
||
|
assert isinstance(values, np.ndarray), type(values)
|
||
|
|
||
|
result = object.__new__(cls)
|
||
|
result._data = values
|
||
|
# _index_data is a (temporary?) fix to ensure that the direct data
|
||
|
# manipulation we do in `_libs/reduction.pyx` continues to work.
|
||
|
# We need access to the actual ndarray, since we're messing with
|
||
|
# data buffers and strides.
|
||
|
result._index_data = values
|
||
|
result._name = name
|
||
|
result._cache = {}
|
||
|
result._reset_identity()
|
||
|
|
||
|
return result
|
||
|
|
||
|
@cache_readonly
|
||
|
def _constructor(self):
|
||
|
return type(self)
|
||
|
|
||
|
@final
|
||
|
def _maybe_check_unique(self):
|
||
|
"""
|
||
|
Check that an Index has no duplicates.
|
||
|
|
||
|
This is typically only called via
|
||
|
`NDFrame.flags.allows_duplicate_labels.setter` when it's set to
|
||
|
True (duplicates aren't allowed).
|
||
|
|
||
|
Raises
|
||
|
------
|
||
|
DuplicateLabelError
|
||
|
When the index is not unique.
|
||
|
"""
|
||
|
if not self.is_unique:
|
||
|
msg = """Index has duplicates."""
|
||
|
duplicates = self._format_duplicate_message()
|
||
|
msg += f"\n{duplicates}"
|
||
|
|
||
|
raise DuplicateLabelError(msg)
|
||
|
|
||
|
@final
|
||
|
def _format_duplicate_message(self):
|
||
|
"""
|
||
|
Construct the DataFrame for a DuplicateLabelError.
|
||
|
|
||
|
This returns a DataFrame indicating the labels and positions
|
||
|
of duplicates in an index. This should only be called when it's
|
||
|
already known that duplicates are present.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index(['a', 'b', 'a'])
|
||
|
>>> idx._format_duplicate_message()
|
||
|
positions
|
||
|
label
|
||
|
a [0, 2]
|
||
|
"""
|
||
|
from pandas import Series
|
||
|
|
||
|
duplicates = self[self.duplicated(keep="first")].unique()
|
||
|
assert len(duplicates)
|
||
|
|
||
|
out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates]
|
||
|
if self.nlevels == 1:
|
||
|
out = out.rename_axis("label")
|
||
|
return out.to_frame(name="positions")
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Index Internals Methods
|
||
|
|
||
|
@final
|
||
|
def _get_attributes_dict(self):
|
||
|
"""
|
||
|
Return an attributes dict for my class.
|
||
|
"""
|
||
|
return {k: getattr(self, k, None) for k in self._attributes}
|
||
|
|
||
|
def _shallow_copy(self, values=None, name: Label = no_default):
|
||
|
"""
|
||
|
Create a new Index with the same class as the caller, don't copy the
|
||
|
data, use the same object attributes with passed in attributes taking
|
||
|
precedence.
|
||
|
|
||
|
*this is an internal non-public method*
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
values : the values to create the new Index, optional
|
||
|
name : Label, defaults to self.name
|
||
|
"""
|
||
|
name = self.name if name is no_default else name
|
||
|
|
||
|
if values is not None:
|
||
|
return self._simple_new(values, name=name)
|
||
|
|
||
|
result = self._simple_new(self._values, name=name)
|
||
|
result._cache = self._cache
|
||
|
return result
|
||
|
|
||
|
@final
|
||
|
def is_(self, other) -> bool:
|
||
|
"""
|
||
|
More flexible, faster check like ``is`` but that works through views.
|
||
|
|
||
|
Note: this is *not* the same as ``Index.identical()``, which checks
|
||
|
that metadata is also the same.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
other : object
|
||
|
Other object to compare against.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
bool
|
||
|
True if both have same underlying data, False otherwise.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Index.identical : Works like ``Index.is_`` but also checks metadata.
|
||
|
"""
|
||
|
if self is other:
|
||
|
return True
|
||
|
elif not hasattr(other, "_id"):
|
||
|
return False
|
||
|
elif self._id is None or other._id is None:
|
||
|
return False
|
||
|
else:
|
||
|
return self._id is other._id
|
||
|
|
||
|
@final
|
||
|
def _reset_identity(self) -> None:
|
||
|
"""
|
||
|
Initializes or resets ``_id`` attribute with new object.
|
||
|
"""
|
||
|
self._id = _Identity(object())
|
||
|
|
||
|
@final
|
||
|
def _cleanup(self):
|
||
|
self._engine.clear_mapping()
|
||
|
|
||
|
@cache_readonly
|
||
|
def _engine(self):
|
||
|
# property, for now, slow to look up
|
||
|
|
||
|
# to avoid a reference cycle, bind `target_values` to a local variable, so
|
||
|
# `self` is not passed into the lambda.
|
||
|
target_values = self._get_engine_target()
|
||
|
return self._engine_type(lambda: target_values, len(self))
|
||
|
|
||
|
@cache_readonly
|
||
|
def _dir_additions_for_owner(self) -> Set[str_t]:
|
||
|
"""
|
||
|
Add the string-like labels to the owner dataframe/series dir output.
|
||
|
|
||
|
If this is a MultiIndex, it's first level values are used.
|
||
|
"""
|
||
|
return {
|
||
|
c
|
||
|
for c in self.unique(level=0)[:100]
|
||
|
if isinstance(c, str) and c.isidentifier()
|
||
|
}
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Array-Like Methods
|
||
|
|
||
|
# ndarray compat
|
||
|
def __len__(self) -> int:
|
||
|
"""
|
||
|
Return the length of the Index.
|
||
|
"""
|
||
|
return len(self._data)
|
||
|
|
||
|
def __array__(self, dtype=None) -> np.ndarray:
|
||
|
"""
|
||
|
The array interface, return my values.
|
||
|
"""
|
||
|
return np.asarray(self._data, dtype=dtype)
|
||
|
|
||
|
def __array_wrap__(self, result, context=None):
|
||
|
"""
|
||
|
Gets called after a ufunc and other functions.
|
||
|
"""
|
||
|
result = lib.item_from_zerodim(result)
|
||
|
if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1:
|
||
|
return result
|
||
|
|
||
|
attrs = self._get_attributes_dict()
|
||
|
return Index(result, **attrs)
|
||
|
|
||
|
@cache_readonly
|
||
|
def dtype(self):
|
||
|
"""
|
||
|
Return the dtype object of the underlying data.
|
||
|
"""
|
||
|
return self._data.dtype
|
||
|
|
||
|
@final
|
||
|
def ravel(self, order="C"):
|
||
|
"""
|
||
|
Return an ndarray of the flattened values of the underlying data.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
numpy.ndarray
|
||
|
Flattened array.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
numpy.ndarray.ravel : Return a flattened array.
|
||
|
"""
|
||
|
warnings.warn(
|
||
|
"Index.ravel returning ndarray is deprecated; in a future version "
|
||
|
"this will return a view on self.",
|
||
|
FutureWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
values = self._get_engine_target()
|
||
|
return values.ravel(order=order)
|
||
|
|
||
|
def view(self, cls=None):
|
||
|
|
||
|
# we need to see if we are subclassing an
|
||
|
# index type here
|
||
|
if cls is not None and not hasattr(cls, "_typ"):
|
||
|
result = self._data.view(cls)
|
||
|
else:
|
||
|
result = self._shallow_copy()
|
||
|
if isinstance(result, Index):
|
||
|
result._id = self._id
|
||
|
return result
|
||
|
|
||
|
def astype(self, dtype, copy=True):
|
||
|
"""
|
||
|
Create an Index with values cast to dtypes.
|
||
|
|
||
|
The class of a new Index is determined by dtype. When conversion is
|
||
|
impossible, a TypeError exception is raised.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
dtype : numpy dtype or pandas type
|
||
|
Note that any signed integer `dtype` is treated as ``'int64'``,
|
||
|
and any unsigned integer `dtype` is treated as ``'uint64'``,
|
||
|
regardless of the size.
|
||
|
copy : bool, default True
|
||
|
By default, astype always returns a newly allocated object.
|
||
|
If copy is set to False and internal requirements on dtype are
|
||
|
satisfied, the original data is used to create a new Index
|
||
|
or the original Index is returned.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Index
|
||
|
Index with values cast to specified dtype.
|
||
|
"""
|
||
|
if dtype is not None:
|
||
|
dtype = pandas_dtype(dtype)
|
||
|
|
||
|
if is_dtype_equal(self.dtype, dtype):
|
||
|
return self.copy() if copy else self
|
||
|
|
||
|
elif is_categorical_dtype(dtype):
|
||
|
from pandas.core.indexes.category import CategoricalIndex
|
||
|
|
||
|
return CategoricalIndex(
|
||
|
self._values, name=self.name, dtype=dtype, copy=copy
|
||
|
)
|
||
|
|
||
|
elif is_extension_array_dtype(dtype):
|
||
|
return Index(np.asarray(self), name=self.name, dtype=dtype, copy=copy)
|
||
|
|
||
|
try:
|
||
|
casted = self._values.astype(dtype, copy=copy)
|
||
|
except (TypeError, ValueError) as err:
|
||
|
raise TypeError(
|
||
|
f"Cannot cast {type(self).__name__} to dtype {dtype}"
|
||
|
) from err
|
||
|
return Index(casted, name=self.name, dtype=dtype)
|
||
|
|
||
|
_index_shared_docs[
|
||
|
"take"
|
||
|
] = """
|
||
|
Return a new %(klass)s of the values selected by the indices.
|
||
|
|
||
|
For internal compatibility with numpy arrays.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
indices : list
|
||
|
Indices to be taken.
|
||
|
axis : int, optional
|
||
|
The axis over which to select values, always 0.
|
||
|
allow_fill : bool, default True
|
||
|
fill_value : bool, default None
|
||
|
If allow_fill=True and fill_value is not None, indices specified by
|
||
|
-1 is regarded as NA. If Index doesn't hold NA, raise ValueError.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
numpy.ndarray
|
||
|
Elements of given indices.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
numpy.ndarray.take: Return an array formed from the
|
||
|
elements of a at the given indices.
|
||
|
"""
|
||
|
|
||
|
@Appender(_index_shared_docs["take"] % _index_doc_kwargs)
|
||
|
def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
|
||
|
if kwargs:
|
||
|
nv.validate_take((), kwargs)
|
||
|
indices = ensure_platform_int(indices)
|
||
|
allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices)
|
||
|
|
||
|
# Note: we discard fill_value and use self._na_value, only relevant
|
||
|
# in the case where allow_fill is True and fill_value is not None
|
||
|
taken = algos.take(
|
||
|
self._values, indices, allow_fill=allow_fill, fill_value=self._na_value
|
||
|
)
|
||
|
return self._shallow_copy(taken)
|
||
|
|
||
|
def _maybe_disallow_fill(self, allow_fill: bool, fill_value, indices) -> bool:
|
||
|
"""
|
||
|
We only use pandas-style take when allow_fill is True _and_
|
||
|
fill_value is not None.
|
||
|
"""
|
||
|
if allow_fill and fill_value is not None:
|
||
|
# only fill if we are passing a non-None fill_value
|
||
|
if self._can_hold_na:
|
||
|
if (indices < -1).any():
|
||
|
raise ValueError(
|
||
|
"When allow_fill=True and fill_value is not None, "
|
||
|
"all indices must be >= -1"
|
||
|
)
|
||
|
else:
|
||
|
cls_name = type(self).__name__
|
||
|
raise ValueError(
|
||
|
f"Unable to fill values because {cls_name} cannot contain NA"
|
||
|
)
|
||
|
else:
|
||
|
allow_fill = False
|
||
|
return allow_fill
|
||
|
|
||
|
_index_shared_docs[
|
||
|
"repeat"
|
||
|
] = """
|
||
|
Repeat elements of a %(klass)s.
|
||
|
|
||
|
Returns a new %(klass)s where each element of the current %(klass)s
|
||
|
is repeated consecutively a given number of times.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
repeats : int or array of ints
|
||
|
The number of repetitions for each element. This should be a
|
||
|
non-negative integer. Repeating 0 times will return an empty
|
||
|
%(klass)s.
|
||
|
axis : None
|
||
|
Must be ``None``. Has no effect but is accepted for compatibility
|
||
|
with numpy.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
repeated_index : %(klass)s
|
||
|
Newly created %(klass)s with repeated elements.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Series.repeat : Equivalent function for Series.
|
||
|
numpy.repeat : Similar method for :class:`numpy.ndarray`.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index(['a', 'b', 'c'])
|
||
|
>>> idx
|
||
|
Index(['a', 'b', 'c'], dtype='object')
|
||
|
>>> idx.repeat(2)
|
||
|
Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object')
|
||
|
>>> idx.repeat([1, 2, 3])
|
||
|
Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object')
|
||
|
"""
|
||
|
|
||
|
@Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)
|
||
|
def repeat(self, repeats, axis=None):
|
||
|
repeats = ensure_platform_int(repeats)
|
||
|
nv.validate_repeat((), {"axis": axis})
|
||
|
return self._shallow_copy(self._values.repeat(repeats))
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Copying Methods
|
||
|
|
||
|
def copy(
|
||
|
self: _IndexT,
|
||
|
name: Optional[Label] = None,
|
||
|
deep: bool = False,
|
||
|
dtype: Optional[Dtype] = None,
|
||
|
names: Optional[Sequence[Label]] = None,
|
||
|
) -> _IndexT:
|
||
|
"""
|
||
|
Make a copy of this object.
|
||
|
|
||
|
Name and dtype sets those attributes on the new object.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
name : Label, optional
|
||
|
Set name for new object.
|
||
|
deep : bool, default False
|
||
|
dtype : numpy dtype or pandas type, optional
|
||
|
Set dtype for new object.
|
||
|
|
||
|
.. deprecated:: 1.2.0
|
||
|
use ``astype`` method instead.
|
||
|
names : list-like, optional
|
||
|
Kept for compatibility with MultiIndex. Should not be used.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Index
|
||
|
Index refer to new object which is a copy of this object.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
In most cases, there should be no functional difference from using
|
||
|
``deep``, but if ``deep`` is passed it will attempt to deepcopy.
|
||
|
"""
|
||
|
name = self._validate_names(name=name, names=names, deep=deep)[0]
|
||
|
if deep:
|
||
|
new_index = self._shallow_copy(self._data.copy(), name=name)
|
||
|
else:
|
||
|
new_index = self._shallow_copy(name=name)
|
||
|
|
||
|
if dtype:
|
||
|
warnings.warn(
|
||
|
"parameter dtype is deprecated and will be removed in a future "
|
||
|
"version. Use the astype method instead.",
|
||
|
FutureWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
new_index = new_index.astype(dtype)
|
||
|
return new_index
|
||
|
|
||
|
@final
|
||
|
def __copy__(self, **kwargs):
|
||
|
return self.copy(**kwargs)
|
||
|
|
||
|
@final
|
||
|
def __deepcopy__(self, memo=None):
|
||
|
"""
|
||
|
Parameters
|
||
|
----------
|
||
|
memo, default None
|
||
|
Standard signature. Unused
|
||
|
"""
|
||
|
return self.copy(deep=True)
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Rendering Methods
|
||
|
|
||
|
def __repr__(self) -> str_t:
|
||
|
"""
|
||
|
Return a string representation for this object.
|
||
|
"""
|
||
|
klass_name = type(self).__name__
|
||
|
data = self._format_data()
|
||
|
attrs = self._format_attrs()
|
||
|
space = self._format_space()
|
||
|
attrs_str = [f"{k}={v}" for k, v in attrs]
|
||
|
prepr = f",{space}".join(attrs_str)
|
||
|
|
||
|
# no data provided, just attributes
|
||
|
if data is None:
|
||
|
data = ""
|
||
|
|
||
|
res = f"{klass_name}({data}{prepr})"
|
||
|
|
||
|
return res
|
||
|
|
||
|
def _format_space(self) -> str_t:
|
||
|
|
||
|
# using space here controls if the attributes
|
||
|
# are line separated or not (the default)
|
||
|
|
||
|
# max_seq_items = get_option('display.max_seq_items')
|
||
|
# if len(self) > max_seq_items:
|
||
|
# space = "\n%s" % (' ' * (len(klass) + 1))
|
||
|
return " "
|
||
|
|
||
|
@property
|
||
|
def _formatter_func(self):
|
||
|
"""
|
||
|
Return the formatter function.
|
||
|
"""
|
||
|
return default_pprint
|
||
|
|
||
|
def _format_data(self, name=None) -> str_t:
|
||
|
"""
|
||
|
Return the formatted data as a unicode string.
|
||
|
"""
|
||
|
# do we want to justify (only do so for non-objects)
|
||
|
is_justify = True
|
||
|
|
||
|
if self.inferred_type == "string":
|
||
|
is_justify = False
|
||
|
elif self.inferred_type == "categorical":
|
||
|
# error: "Index" has no attribute "categories"
|
||
|
if is_object_dtype(self.categories): # type: ignore[attr-defined]
|
||
|
is_justify = False
|
||
|
|
||
|
return format_object_summary(
|
||
|
self, self._formatter_func, is_justify=is_justify, name=name
|
||
|
)
|
||
|
|
||
|
def _format_attrs(self):
|
||
|
"""
|
||
|
Return a list of tuples of the (attr,formatted_value).
|
||
|
"""
|
||
|
return format_object_attrs(self)
|
||
|
|
||
|
def _mpl_repr(self):
|
||
|
# how to represent ourselves to matplotlib
|
||
|
return self.values
|
||
|
|
||
|
def format(
|
||
|
self,
|
||
|
name: bool = False,
|
||
|
formatter: Optional[Callable] = None,
|
||
|
na_rep: str_t = "NaN",
|
||
|
) -> List[str_t]:
|
||
|
"""
|
||
|
Render a string representation of the Index.
|
||
|
"""
|
||
|
header = []
|
||
|
if name:
|
||
|
header.append(
|
||
|
pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
|
||
|
if self.name is not None
|
||
|
else ""
|
||
|
)
|
||
|
|
||
|
if formatter is not None:
|
||
|
return header + list(self.map(formatter))
|
||
|
|
||
|
return self._format_with_header(header, na_rep=na_rep)
|
||
|
|
||
|
def _format_with_header(
|
||
|
self, header: List[str_t], na_rep: str_t = "NaN"
|
||
|
) -> List[str_t]:
|
||
|
from pandas.io.formats.format import format_array
|
||
|
|
||
|
values = self._values
|
||
|
|
||
|
if is_object_dtype(values.dtype):
|
||
|
values = lib.maybe_convert_objects(values, safe=1)
|
||
|
|
||
|
if is_object_dtype(values.dtype):
|
||
|
result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]
|
||
|
|
||
|
# could have nans
|
||
|
mask = isna(values)
|
||
|
if mask.any():
|
||
|
result_arr = np.array(result)
|
||
|
result_arr[mask] = na_rep
|
||
|
result = result_arr.tolist()
|
||
|
else:
|
||
|
result = trim_front(format_array(values, None, justify="left"))
|
||
|
return header + result
|
||
|
|
||
|
def to_native_types(self, slicer=None, **kwargs):
|
||
|
"""
|
||
|
Format specified values of `self` and return them.
|
||
|
|
||
|
.. deprecated:: 1.2.0
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
slicer : int, array-like
|
||
|
An indexer into `self` that specifies which values
|
||
|
are used in the formatting process.
|
||
|
kwargs : dict
|
||
|
Options for specifying how the values should be formatted.
|
||
|
These options include the following:
|
||
|
|
||
|
1) na_rep : str
|
||
|
The value that serves as a placeholder for NULL values
|
||
|
2) quoting : bool or None
|
||
|
Whether or not there are quoted values in `self`
|
||
|
3) date_format : str
|
||
|
The format used to represent date-like values.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
numpy.ndarray
|
||
|
Formatted values.
|
||
|
"""
|
||
|
warnings.warn(
|
||
|
"The 'to_native_types' method is deprecated and will be removed in "
|
||
|
"a future version. Use 'astype(str)' instead.",
|
||
|
FutureWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
values = self
|
||
|
if slicer is not None:
|
||
|
values = values[slicer]
|
||
|
return values._format_native_types(**kwargs)
|
||
|
|
||
|
def _format_native_types(self, na_rep="", quoting=None, **kwargs):
|
||
|
"""
|
||
|
Actually format specific types of the index.
|
||
|
"""
|
||
|
mask = isna(self)
|
||
|
if not self.is_object() and not quoting:
|
||
|
values = np.asarray(self).astype(str)
|
||
|
else:
|
||
|
values = np.array(self, dtype=object, copy=True)
|
||
|
|
||
|
values[mask] = na_rep
|
||
|
return values
|
||
|
|
||
|
def _summary(self, name=None) -> str_t:
|
||
|
"""
|
||
|
Return a summarized representation.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
name : str
|
||
|
name to use in the summary representation
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
String with a summarized representation of the index
|
||
|
"""
|
||
|
if len(self) > 0:
|
||
|
head = self[0]
|
||
|
if hasattr(head, "format") and not isinstance(head, str):
|
||
|
head = head.format()
|
||
|
tail = self[-1]
|
||
|
if hasattr(tail, "format") and not isinstance(tail, str):
|
||
|
tail = tail.format()
|
||
|
index_summary = f", {head} to {tail}"
|
||
|
else:
|
||
|
index_summary = ""
|
||
|
|
||
|
if name is None:
|
||
|
name = type(self).__name__
|
||
|
return f"{name}: {len(self)} entries{index_summary}"
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Conversion Methods
|
||
|
|
||
|
def to_flat_index(self):
|
||
|
"""
|
||
|
Identity method.
|
||
|
|
||
|
.. versionadded:: 0.24.0
|
||
|
|
||
|
This is implemented for compatibility with subclass implementations
|
||
|
when chaining.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
pd.Index
|
||
|
Caller.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
MultiIndex.to_flat_index : Subclass implementation.
|
||
|
"""
|
||
|
return self
|
||
|
|
||
|
def to_series(self, index=None, name=None):
|
||
|
"""
|
||
|
Create a Series with both index and values equal to the index keys.
|
||
|
|
||
|
Useful with map for returning an indexer based on an index.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
index : Index, optional
|
||
|
Index of resulting Series. If None, defaults to original index.
|
||
|
name : str, optional
|
||
|
Name of resulting Series. If None, defaults to name of original
|
||
|
index.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Series
|
||
|
The dtype will be based on the type of the Index values.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Index.to_frame : Convert an Index to a DataFrame.
|
||
|
Series.to_frame : Convert Series to DataFrame.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')
|
||
|
|
||
|
By default, the original Index and original name is reused.
|
||
|
|
||
|
>>> idx.to_series()
|
||
|
animal
|
||
|
Ant Ant
|
||
|
Bear Bear
|
||
|
Cow Cow
|
||
|
Name: animal, dtype: object
|
||
|
|
||
|
To enforce a new Index, specify new labels to ``index``:
|
||
|
|
||
|
>>> idx.to_series(index=[0, 1, 2])
|
||
|
0 Ant
|
||
|
1 Bear
|
||
|
2 Cow
|
||
|
Name: animal, dtype: object
|
||
|
|
||
|
To override the name of the resulting column, specify `name`:
|
||
|
|
||
|
>>> idx.to_series(name='zoo')
|
||
|
animal
|
||
|
Ant Ant
|
||
|
Bear Bear
|
||
|
Cow Cow
|
||
|
Name: zoo, dtype: object
|
||
|
"""
|
||
|
from pandas import Series
|
||
|
|
||
|
if index is None:
|
||
|
index = self._shallow_copy()
|
||
|
if name is None:
|
||
|
name = self.name
|
||
|
|
||
|
return Series(self.values.copy(), index=index, name=name)
|
||
|
|
||
|
def to_frame(self, index: bool = True, name=None):
|
||
|
"""
|
||
|
Create a DataFrame with a column containing the Index.
|
||
|
|
||
|
.. versionadded:: 0.24.0
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
index : bool, default True
|
||
|
Set the index of the returned DataFrame as the original Index.
|
||
|
|
||
|
name : object, default None
|
||
|
The passed name should substitute for the index name (if it has
|
||
|
one).
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
DataFrame
|
||
|
DataFrame containing the original Index data.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Index.to_series : Convert an Index to a Series.
|
||
|
Series.to_frame : Convert Series to DataFrame.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')
|
||
|
>>> idx.to_frame()
|
||
|
animal
|
||
|
animal
|
||
|
Ant Ant
|
||
|
Bear Bear
|
||
|
Cow Cow
|
||
|
|
||
|
By default, the original Index is reused. To enforce a new Index:
|
||
|
|
||
|
>>> idx.to_frame(index=False)
|
||
|
animal
|
||
|
0 Ant
|
||
|
1 Bear
|
||
|
2 Cow
|
||
|
|
||
|
To override the name of the resulting column, specify `name`:
|
||
|
|
||
|
>>> idx.to_frame(index=False, name='zoo')
|
||
|
zoo
|
||
|
0 Ant
|
||
|
1 Bear
|
||
|
2 Cow
|
||
|
"""
|
||
|
from pandas import DataFrame
|
||
|
|
||
|
if name is None:
|
||
|
name = self.name or 0
|
||
|
result = DataFrame({name: self._values.copy()})
|
||
|
|
||
|
if index:
|
||
|
result.index = self
|
||
|
return result
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Name-Centric Methods
|
||
|
|
||
|
@property
|
||
|
def name(self):
|
||
|
"""
|
||
|
Return Index or MultiIndex name.
|
||
|
"""
|
||
|
return self._name
|
||
|
|
||
|
@name.setter
|
||
|
def name(self, value):
|
||
|
if self._no_setting_name:
|
||
|
# Used in MultiIndex.levels to avoid silently ignoring name updates.
|
||
|
raise RuntimeError(
|
||
|
"Cannot set name on a level of a MultiIndex. Use "
|
||
|
"'MultiIndex.set_names' instead."
|
||
|
)
|
||
|
maybe_extract_name(value, None, type(self))
|
||
|
self._name = value
|
||
|
|
||
|
@final
|
||
|
def _validate_names(self, name=None, names=None, deep: bool = False) -> List[Label]:
|
||
|
"""
|
||
|
Handles the quirks of having a singular 'name' parameter for general
|
||
|
Index and plural 'names' parameter for MultiIndex.
|
||
|
"""
|
||
|
from copy import deepcopy
|
||
|
|
||
|
if names is not None and name is not None:
|
||
|
raise TypeError("Can only provide one of `names` and `name`")
|
||
|
elif names is None and name is None:
|
||
|
new_names = deepcopy(self.names) if deep else self.names
|
||
|
elif names is not None:
|
||
|
if not is_list_like(names):
|
||
|
raise TypeError("Must pass list-like as `names`.")
|
||
|
new_names = names
|
||
|
elif not is_list_like(name):
|
||
|
new_names = [name]
|
||
|
else:
|
||
|
new_names = name
|
||
|
|
||
|
if len(new_names) != len(self.names):
|
||
|
raise ValueError(
|
||
|
f"Length of new names must be {len(self.names)}, got {len(new_names)}"
|
||
|
)
|
||
|
|
||
|
# All items in 'new_names' need to be hashable
|
||
|
validate_all_hashable(*new_names, error_name=f"{type(self).__name__}.name")
|
||
|
|
||
|
return new_names
|
||
|
|
||
|
def _get_names(self):
|
||
|
return FrozenList((self.name,))
|
||
|
|
||
|
def _set_names(self, values, level=None):
|
||
|
"""
|
||
|
Set new names on index. Each name has to be a hashable type.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
values : str or sequence
|
||
|
name(s) to set
|
||
|
level : int, level name, or sequence of int/level names (default None)
|
||
|
If the index is a MultiIndex (hierarchical), level(s) to set (None
|
||
|
for all levels). Otherwise level must be None
|
||
|
|
||
|
Raises
|
||
|
------
|
||
|
TypeError if each name is not hashable.
|
||
|
"""
|
||
|
if not is_list_like(values):
|
||
|
raise ValueError("Names must be a list-like")
|
||
|
if len(values) != 1:
|
||
|
raise ValueError(f"Length of new names must be 1, got {len(values)}")
|
||
|
|
||
|
# GH 20527
|
||
|
# All items in 'name' need to be hashable:
|
||
|
validate_all_hashable(*values, error_name=f"{type(self).__name__}.name")
|
||
|
|
||
|
self._name = values[0]
|
||
|
|
||
|
names = property(fset=_set_names, fget=_get_names)
|
||
|
|
||
|
@final
|
||
|
def set_names(self, names, level=None, inplace: bool = False):
|
||
|
"""
|
||
|
Set Index or MultiIndex name.
|
||
|
|
||
|
Able to set new names partially and by level.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
names : label or list of label
|
||
|
Name(s) to set.
|
||
|
level : int, label or list of int or label, optional
|
||
|
If the index is a MultiIndex, level(s) to set (None for all
|
||
|
levels). Otherwise level must be None.
|
||
|
inplace : bool, default False
|
||
|
Modifies the object directly, instead of creating a new Index or
|
||
|
MultiIndex.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Index or None
|
||
|
The same type as the caller or None if ``inplace=True``.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Index.rename : Able to set new names without level.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index([1, 2, 3, 4])
|
||
|
>>> idx
|
||
|
Int64Index([1, 2, 3, 4], dtype='int64')
|
||
|
>>> idx.set_names('quarter')
|
||
|
Int64Index([1, 2, 3, 4], dtype='int64', name='quarter')
|
||
|
|
||
|
>>> idx = pd.MultiIndex.from_product([['python', 'cobra'],
|
||
|
... [2018, 2019]])
|
||
|
>>> idx
|
||
|
MultiIndex([('python', 2018),
|
||
|
('python', 2019),
|
||
|
( 'cobra', 2018),
|
||
|
( 'cobra', 2019)],
|
||
|
)
|
||
|
>>> idx.set_names(['kind', 'year'], inplace=True)
|
||
|
>>> idx
|
||
|
MultiIndex([('python', 2018),
|
||
|
('python', 2019),
|
||
|
( 'cobra', 2018),
|
||
|
( 'cobra', 2019)],
|
||
|
names=['kind', 'year'])
|
||
|
>>> idx.set_names('species', level=0)
|
||
|
MultiIndex([('python', 2018),
|
||
|
('python', 2019),
|
||
|
( 'cobra', 2018),
|
||
|
( 'cobra', 2019)],
|
||
|
names=['species', 'year'])
|
||
|
"""
|
||
|
if level is not None and not isinstance(self, ABCMultiIndex):
|
||
|
raise ValueError("Level must be None for non-MultiIndex")
|
||
|
|
||
|
if level is not None and not is_list_like(level) and is_list_like(names):
|
||
|
raise TypeError("Names must be a string when a single level is provided.")
|
||
|
|
||
|
if not is_list_like(names) and level is None and self.nlevels > 1:
|
||
|
raise TypeError("Must pass list-like as `names`.")
|
||
|
|
||
|
if not is_list_like(names):
|
||
|
names = [names]
|
||
|
if level is not None and not is_list_like(level):
|
||
|
level = [level]
|
||
|
|
||
|
if inplace:
|
||
|
idx = self
|
||
|
else:
|
||
|
idx = self._shallow_copy()
|
||
|
idx._set_names(names, level=level)
|
||
|
if not inplace:
|
||
|
return idx
|
||
|
|
||
|
def rename(self, name, inplace=False):
|
||
|
"""
|
||
|
Alter Index or MultiIndex name.
|
||
|
|
||
|
Able to set new names without level. Defaults to returning new index.
|
||
|
Length of names must match number of levels in MultiIndex.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
name : label or list of labels
|
||
|
Name(s) to set.
|
||
|
inplace : bool, default False
|
||
|
Modifies the object directly, instead of creating a new Index or
|
||
|
MultiIndex.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Index or None
|
||
|
The same type as the caller or None if ``inplace=True``.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Index.set_names : Able to set new names partially and by level.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score')
|
||
|
>>> idx.rename('grade')
|
||
|
Index(['A', 'C', 'A', 'B'], dtype='object', name='grade')
|
||
|
|
||
|
>>> idx = pd.MultiIndex.from_product([['python', 'cobra'],
|
||
|
... [2018, 2019]],
|
||
|
... names=['kind', 'year'])
|
||
|
>>> idx
|
||
|
MultiIndex([('python', 2018),
|
||
|
('python', 2019),
|
||
|
( 'cobra', 2018),
|
||
|
( 'cobra', 2019)],
|
||
|
names=['kind', 'year'])
|
||
|
>>> idx.rename(['species', 'year'])
|
||
|
MultiIndex([('python', 2018),
|
||
|
('python', 2019),
|
||
|
( 'cobra', 2018),
|
||
|
( 'cobra', 2019)],
|
||
|
names=['species', 'year'])
|
||
|
>>> idx.rename('species')
|
||
|
Traceback (most recent call last):
|
||
|
TypeError: Must pass list-like as `names`.
|
||
|
"""
|
||
|
return self.set_names([name], inplace=inplace)
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Level-Centric Methods
|
||
|
|
||
|
@property
|
||
|
def nlevels(self) -> int:
|
||
|
"""
|
||
|
Number of levels.
|
||
|
"""
|
||
|
return 1
|
||
|
|
||
|
def _sort_levels_monotonic(self):
|
||
|
"""
|
||
|
Compat with MultiIndex.
|
||
|
"""
|
||
|
return self
|
||
|
|
||
|
@final
|
||
|
def _validate_index_level(self, level):
|
||
|
"""
|
||
|
Validate index level.
|
||
|
|
||
|
For single-level Index getting level number is a no-op, but some
|
||
|
verification must be done like in MultiIndex.
|
||
|
|
||
|
"""
|
||
|
if isinstance(level, int):
|
||
|
if level < 0 and level != -1:
|
||
|
raise IndexError(
|
||
|
"Too many levels: Index has only 1 level, "
|
||
|
f"{level} is not a valid level number"
|
||
|
)
|
||
|
elif level > 0:
|
||
|
raise IndexError(
|
||
|
f"Too many levels: Index has only 1 level, not {level + 1}"
|
||
|
)
|
||
|
elif level != self.name:
|
||
|
raise KeyError(
|
||
|
f"Requested level ({level}) does not match index name ({self.name})"
|
||
|
)
|
||
|
|
||
|
def _get_level_number(self, level) -> int:
|
||
|
self._validate_index_level(level)
|
||
|
return 0
|
||
|
|
||
|
def sortlevel(self, level=None, ascending=True, sort_remaining=None):
|
||
|
"""
|
||
|
For internal compatibility with the Index API.
|
||
|
|
||
|
Sort the Index. This is for compat with MultiIndex
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
ascending : bool, default True
|
||
|
False to sort in descending order
|
||
|
|
||
|
level, sort_remaining are compat parameters
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Index
|
||
|
"""
|
||
|
if not isinstance(ascending, (list, bool)):
|
||
|
raise TypeError(
|
||
|
"ascending must be a single bool value or"
|
||
|
"a list of bool values of length 1"
|
||
|
)
|
||
|
|
||
|
if isinstance(ascending, list):
|
||
|
if len(ascending) != 1:
|
||
|
raise TypeError("ascending must be a list of bool values of length 1")
|
||
|
ascending = ascending[0]
|
||
|
|
||
|
if not isinstance(ascending, bool):
|
||
|
raise TypeError("ascending must be a bool value")
|
||
|
|
||
|
return self.sort_values(return_indexer=True, ascending=ascending)
|
||
|
|
||
|
def _get_level_values(self, level):
|
||
|
"""
|
||
|
Return an Index of values for requested level.
|
||
|
|
||
|
This is primarily useful to get an individual level of values from a
|
||
|
MultiIndex, but is provided on Index as well for compatibility.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
level : int or str
|
||
|
It is either the integer position or the name of the level.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Index
|
||
|
Calling object, as there is only one level in the Index.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
MultiIndex.get_level_values : Get values for a level of a MultiIndex.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
For Index, level should be 0, since there are no multiple levels.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index(list('abc'))
|
||
|
>>> idx
|
||
|
Index(['a', 'b', 'c'], dtype='object')
|
||
|
|
||
|
Get level values by supplying `level` as integer:
|
||
|
|
||
|
>>> idx.get_level_values(0)
|
||
|
Index(['a', 'b', 'c'], dtype='object')
|
||
|
"""
|
||
|
self._validate_index_level(level)
|
||
|
return self
|
||
|
|
||
|
get_level_values = _get_level_values
|
||
|
|
||
|
@final
|
||
|
def droplevel(self, level=0):
|
||
|
"""
|
||
|
Return index with requested level(s) removed.
|
||
|
|
||
|
If resulting index has only 1 level left, the result will be
|
||
|
of Index type, not MultiIndex.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
level : int, str, or list-like, default 0
|
||
|
If a string is given, must be the name of a level
|
||
|
If list-like, elements must be names or indexes of levels.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Index or MultiIndex
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> mi = pd.MultiIndex.from_arrays(
|
||
|
... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z'])
|
||
|
>>> mi
|
||
|
MultiIndex([(1, 3, 5),
|
||
|
(2, 4, 6)],
|
||
|
names=['x', 'y', 'z'])
|
||
|
|
||
|
>>> mi.droplevel()
|
||
|
MultiIndex([(3, 5),
|
||
|
(4, 6)],
|
||
|
names=['y', 'z'])
|
||
|
|
||
|
>>> mi.droplevel(2)
|
||
|
MultiIndex([(1, 3),
|
||
|
(2, 4)],
|
||
|
names=['x', 'y'])
|
||
|
|
||
|
>>> mi.droplevel('z')
|
||
|
MultiIndex([(1, 3),
|
||
|
(2, 4)],
|
||
|
names=['x', 'y'])
|
||
|
|
||
|
>>> mi.droplevel(['x', 'y'])
|
||
|
Int64Index([5, 6], dtype='int64', name='z')
|
||
|
"""
|
||
|
if not isinstance(level, (tuple, list)):
|
||
|
level = [level]
|
||
|
|
||
|
levnums = sorted(self._get_level_number(lev) for lev in level)[::-1]
|
||
|
|
||
|
return self._drop_level_numbers(levnums)
|
||
|
|
||
|
def _drop_level_numbers(self, levnums: List[int]):
|
||
|
"""
|
||
|
Drop MultiIndex levels by level _number_, not name.
|
||
|
"""
|
||
|
|
||
|
if len(levnums) == 0:
|
||
|
return self
|
||
|
if len(levnums) >= self.nlevels:
|
||
|
raise ValueError(
|
||
|
f"Cannot remove {len(levnums)} levels from an index with "
|
||
|
f"{self.nlevels} levels: at least one level must be left."
|
||
|
)
|
||
|
# The two checks above guarantee that here self is a MultiIndex
|
||
|
self = cast("MultiIndex", self)
|
||
|
|
||
|
new_levels = list(self.levels)
|
||
|
new_codes = list(self.codes)
|
||
|
new_names = list(self.names)
|
||
|
|
||
|
for i in levnums:
|
||
|
new_levels.pop(i)
|
||
|
new_codes.pop(i)
|
||
|
new_names.pop(i)
|
||
|
|
||
|
if len(new_levels) == 1:
|
||
|
|
||
|
# set nan if needed
|
||
|
mask = new_codes[0] == -1
|
||
|
result = new_levels[0].take(new_codes[0])
|
||
|
if mask.any():
|
||
|
result = result.putmask(mask, np.nan)
|
||
|
|
||
|
result._name = new_names[0]
|
||
|
return result
|
||
|
else:
|
||
|
from pandas.core.indexes.multi import MultiIndex
|
||
|
|
||
|
return MultiIndex(
|
||
|
levels=new_levels,
|
||
|
codes=new_codes,
|
||
|
names=new_names,
|
||
|
verify_integrity=False,
|
||
|
)
|
||
|
|
||
|
def _get_grouper_for_level(self, mapper, level=None):
|
||
|
"""
|
||
|
Get index grouper corresponding to an index level
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
mapper: Group mapping function or None
|
||
|
Function mapping index values to groups
|
||
|
level : int or None
|
||
|
Index level
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
grouper : Index
|
||
|
Index of values to group on.
|
||
|
labels : ndarray of int or None
|
||
|
Array of locations in level_index.
|
||
|
uniques : Index or None
|
||
|
Index of unique values for level.
|
||
|
"""
|
||
|
assert level is None or level == 0
|
||
|
if mapper is None:
|
||
|
grouper = self
|
||
|
else:
|
||
|
grouper = self.map(mapper)
|
||
|
|
||
|
return grouper, None, None
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Introspection Methods
|
||
|
|
||
|
@final
|
||
|
@property
|
||
|
def is_monotonic(self) -> bool:
|
||
|
"""
|
||
|
Alias for is_monotonic_increasing.
|
||
|
"""
|
||
|
return self.is_monotonic_increasing
|
||
|
|
||
|
@property
|
||
|
def is_monotonic_increasing(self) -> bool:
|
||
|
"""
|
||
|
Return if the index is monotonic increasing (only equal or
|
||
|
increasing) values.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> Index([1, 2, 3]).is_monotonic_increasing
|
||
|
True
|
||
|
>>> Index([1, 2, 2]).is_monotonic_increasing
|
||
|
True
|
||
|
>>> Index([1, 3, 2]).is_monotonic_increasing
|
||
|
False
|
||
|
"""
|
||
|
return self._engine.is_monotonic_increasing
|
||
|
|
||
|
@property
|
||
|
def is_monotonic_decreasing(self) -> bool:
|
||
|
"""
|
||
|
Return if the index is monotonic decreasing (only equal or
|
||
|
decreasing) values.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> Index([3, 2, 1]).is_monotonic_decreasing
|
||
|
True
|
||
|
>>> Index([3, 2, 2]).is_monotonic_decreasing
|
||
|
True
|
||
|
>>> Index([3, 1, 2]).is_monotonic_decreasing
|
||
|
False
|
||
|
"""
|
||
|
return self._engine.is_monotonic_decreasing
|
||
|
|
||
|
@property
|
||
|
def _is_strictly_monotonic_increasing(self) -> bool:
|
||
|
"""
|
||
|
Return if the index is strictly monotonic increasing
|
||
|
(only increasing) values.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> Index([1, 2, 3])._is_strictly_monotonic_increasing
|
||
|
True
|
||
|
>>> Index([1, 2, 2])._is_strictly_monotonic_increasing
|
||
|
False
|
||
|
>>> Index([1, 3, 2])._is_strictly_monotonic_increasing
|
||
|
False
|
||
|
"""
|
||
|
return self.is_unique and self.is_monotonic_increasing
|
||
|
|
||
|
@property
|
||
|
def _is_strictly_monotonic_decreasing(self) -> bool:
|
||
|
"""
|
||
|
Return if the index is strictly monotonic decreasing
|
||
|
(only decreasing) values.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> Index([3, 2, 1])._is_strictly_monotonic_decreasing
|
||
|
True
|
||
|
>>> Index([3, 2, 2])._is_strictly_monotonic_decreasing
|
||
|
False
|
||
|
>>> Index([3, 1, 2])._is_strictly_monotonic_decreasing
|
||
|
False
|
||
|
"""
|
||
|
return self.is_unique and self.is_monotonic_decreasing
|
||
|
|
||
|
@cache_readonly
|
||
|
def is_unique(self) -> bool:
|
||
|
"""
|
||
|
Return if the index has unique values.
|
||
|
"""
|
||
|
return self._engine.is_unique
|
||
|
|
||
|
@property
|
||
|
def has_duplicates(self) -> bool:
|
||
|
"""
|
||
|
Check if the Index has duplicate values.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
bool
|
||
|
Whether or not the Index has duplicate values.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index([1, 5, 7, 7])
|
||
|
>>> idx.has_duplicates
|
||
|
True
|
||
|
|
||
|
>>> idx = pd.Index([1, 5, 7])
|
||
|
>>> idx.has_duplicates
|
||
|
False
|
||
|
|
||
|
>>> idx = pd.Index(["Watermelon", "Orange", "Apple",
|
||
|
... "Watermelon"]).astype("category")
|
||
|
>>> idx.has_duplicates
|
||
|
True
|
||
|
|
||
|
>>> idx = pd.Index(["Orange", "Apple",
|
||
|
... "Watermelon"]).astype("category")
|
||
|
>>> idx.has_duplicates
|
||
|
False
|
||
|
"""
|
||
|
return not self.is_unique
|
||
|
|
||
|
@final
|
||
|
def is_boolean(self) -> bool:
|
||
|
"""
|
||
|
Check if the Index only consists of booleans.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
bool
|
||
|
Whether or not the Index only consists of booleans.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
is_integer : Check if the Index only consists of integers.
|
||
|
is_floating : Check if the Index is a floating type.
|
||
|
is_numeric : Check if the Index only consists of numeric data.
|
||
|
is_object : Check if the Index is of the object dtype.
|
||
|
is_categorical : Check if the Index holds categorical data.
|
||
|
is_interval : Check if the Index holds Interval objects.
|
||
|
is_mixed : Check if the Index holds data with mixed data types.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index([True, False, True])
|
||
|
>>> idx.is_boolean()
|
||
|
True
|
||
|
|
||
|
>>> idx = pd.Index(["True", "False", "True"])
|
||
|
>>> idx.is_boolean()
|
||
|
False
|
||
|
|
||
|
>>> idx = pd.Index([True, False, "True"])
|
||
|
>>> idx.is_boolean()
|
||
|
False
|
||
|
"""
|
||
|
return self.inferred_type in ["boolean"]
|
||
|
|
||
|
@final
|
||
|
def is_integer(self) -> bool:
|
||
|
"""
|
||
|
Check if the Index only consists of integers.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
bool
|
||
|
Whether or not the Index only consists of integers.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
is_boolean : Check if the Index only consists of booleans.
|
||
|
is_floating : Check if the Index is a floating type.
|
||
|
is_numeric : Check if the Index only consists of numeric data.
|
||
|
is_object : Check if the Index is of the object dtype.
|
||
|
is_categorical : Check if the Index holds categorical data.
|
||
|
is_interval : Check if the Index holds Interval objects.
|
||
|
is_mixed : Check if the Index holds data with mixed data types.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index([1, 2, 3, 4])
|
||
|
>>> idx.is_integer()
|
||
|
True
|
||
|
|
||
|
>>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
|
||
|
>>> idx.is_integer()
|
||
|
False
|
||
|
|
||
|
>>> idx = pd.Index(["Apple", "Mango", "Watermelon"])
|
||
|
>>> idx.is_integer()
|
||
|
False
|
||
|
"""
|
||
|
return self.inferred_type in ["integer"]
|
||
|
|
||
|
@final
|
||
|
def is_floating(self) -> bool:
|
||
|
"""
|
||
|
Check if the Index is a floating type.
|
||
|
|
||
|
The Index may consist of only floats, NaNs, or a mix of floats,
|
||
|
integers, or NaNs.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
bool
|
||
|
Whether or not the Index only consists of only consists of floats, NaNs, or
|
||
|
a mix of floats, integers, or NaNs.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
is_boolean : Check if the Index only consists of booleans.
|
||
|
is_integer : Check if the Index only consists of integers.
|
||
|
is_numeric : Check if the Index only consists of numeric data.
|
||
|
is_object : Check if the Index is of the object dtype.
|
||
|
is_categorical : Check if the Index holds categorical data.
|
||
|
is_interval : Check if the Index holds Interval objects.
|
||
|
is_mixed : Check if the Index holds data with mixed data types.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
|
||
|
>>> idx.is_floating()
|
||
|
True
|
||
|
|
||
|
>>> idx = pd.Index([1.0, 2.0, np.nan, 4.0])
|
||
|
>>> idx.is_floating()
|
||
|
True
|
||
|
|
||
|
>>> idx = pd.Index([1, 2, 3, 4, np.nan])
|
||
|
>>> idx.is_floating()
|
||
|
True
|
||
|
|
||
|
>>> idx = pd.Index([1, 2, 3, 4])
|
||
|
>>> idx.is_floating()
|
||
|
False
|
||
|
"""
|
||
|
return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"]
|
||
|
|
||
|
@final
|
||
|
def is_numeric(self) -> bool:
|
||
|
"""
|
||
|
Check if the Index only consists of numeric data.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
bool
|
||
|
Whether or not the Index only consists of numeric data.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
is_boolean : Check if the Index only consists of booleans.
|
||
|
is_integer : Check if the Index only consists of integers.
|
||
|
is_floating : Check if the Index is a floating type.
|
||
|
is_object : Check if the Index is of the object dtype.
|
||
|
is_categorical : Check if the Index holds categorical data.
|
||
|
is_interval : Check if the Index holds Interval objects.
|
||
|
is_mixed : Check if the Index holds data with mixed data types.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
|
||
|
>>> idx.is_numeric()
|
||
|
True
|
||
|
|
||
|
>>> idx = pd.Index([1, 2, 3, 4.0])
|
||
|
>>> idx.is_numeric()
|
||
|
True
|
||
|
|
||
|
>>> idx = pd.Index([1, 2, 3, 4])
|
||
|
>>> idx.is_numeric()
|
||
|
True
|
||
|
|
||
|
>>> idx = pd.Index([1, 2, 3, 4.0, np.nan])
|
||
|
>>> idx.is_numeric()
|
||
|
True
|
||
|
|
||
|
>>> idx = pd.Index([1, 2, 3, 4.0, np.nan, "Apple"])
|
||
|
>>> idx.is_numeric()
|
||
|
False
|
||
|
"""
|
||
|
return self.inferred_type in ["integer", "floating"]
|
||
|
|
||
|
@final
|
||
|
def is_object(self) -> bool:
|
||
|
"""
|
||
|
Check if the Index is of the object dtype.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
bool
|
||
|
Whether or not the Index is of the object dtype.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
is_boolean : Check if the Index only consists of booleans.
|
||
|
is_integer : Check if the Index only consists of integers.
|
||
|
is_floating : Check if the Index is a floating type.
|
||
|
is_numeric : Check if the Index only consists of numeric data.
|
||
|
is_categorical : Check if the Index holds categorical data.
|
||
|
is_interval : Check if the Index holds Interval objects.
|
||
|
is_mixed : Check if the Index holds data with mixed data types.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index(["Apple", "Mango", "Watermelon"])
|
||
|
>>> idx.is_object()
|
||
|
True
|
||
|
|
||
|
>>> idx = pd.Index(["Apple", "Mango", 2.0])
|
||
|
>>> idx.is_object()
|
||
|
True
|
||
|
|
||
|
>>> idx = pd.Index(["Watermelon", "Orange", "Apple",
|
||
|
... "Watermelon"]).astype("category")
|
||
|
>>> idx.is_object()
|
||
|
False
|
||
|
|
||
|
>>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
|
||
|
>>> idx.is_object()
|
||
|
False
|
||
|
"""
|
||
|
return is_object_dtype(self.dtype)
|
||
|
|
||
|
@final
|
||
|
def is_categorical(self) -> bool:
|
||
|
"""
|
||
|
Check if the Index holds categorical data.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
bool
|
||
|
True if the Index is categorical.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
CategoricalIndex : Index for categorical data.
|
||
|
is_boolean : Check if the Index only consists of booleans.
|
||
|
is_integer : Check if the Index only consists of integers.
|
||
|
is_floating : Check if the Index is a floating type.
|
||
|
is_numeric : Check if the Index only consists of numeric data.
|
||
|
is_object : Check if the Index is of the object dtype.
|
||
|
is_interval : Check if the Index holds Interval objects.
|
||
|
is_mixed : Check if the Index holds data with mixed data types.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index(["Watermelon", "Orange", "Apple",
|
||
|
... "Watermelon"]).astype("category")
|
||
|
>>> idx.is_categorical()
|
||
|
True
|
||
|
|
||
|
>>> idx = pd.Index([1, 3, 5, 7])
|
||
|
>>> idx.is_categorical()
|
||
|
False
|
||
|
|
||
|
>>> s = pd.Series(["Peter", "Victor", "Elisabeth", "Mar"])
|
||
|
>>> s
|
||
|
0 Peter
|
||
|
1 Victor
|
||
|
2 Elisabeth
|
||
|
3 Mar
|
||
|
dtype: object
|
||
|
>>> s.index.is_categorical()
|
||
|
False
|
||
|
"""
|
||
|
return self.inferred_type in ["categorical"]
|
||
|
|
||
|
@final
|
||
|
def is_interval(self) -> bool:
|
||
|
"""
|
||
|
Check if the Index holds Interval objects.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
bool
|
||
|
Whether or not the Index holds Interval objects.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
IntervalIndex : Index for Interval objects.
|
||
|
is_boolean : Check if the Index only consists of booleans.
|
||
|
is_integer : Check if the Index only consists of integers.
|
||
|
is_floating : Check if the Index is a floating type.
|
||
|
is_numeric : Check if the Index only consists of numeric data.
|
||
|
is_object : Check if the Index is of the object dtype.
|
||
|
is_categorical : Check if the Index holds categorical data.
|
||
|
is_mixed : Check if the Index holds data with mixed data types.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index([pd.Interval(left=0, right=5),
|
||
|
... pd.Interval(left=5, right=10)])
|
||
|
>>> idx.is_interval()
|
||
|
True
|
||
|
|
||
|
>>> idx = pd.Index([1, 3, 5, 7])
|
||
|
>>> idx.is_interval()
|
||
|
False
|
||
|
"""
|
||
|
return self.inferred_type in ["interval"]
|
||
|
|
||
|
@final
|
||
|
def is_mixed(self) -> bool:
|
||
|
"""
|
||
|
Check if the Index holds data with mixed data types.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
bool
|
||
|
Whether or not the Index holds data with mixed data types.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
is_boolean : Check if the Index only consists of booleans.
|
||
|
is_integer : Check if the Index only consists of integers.
|
||
|
is_floating : Check if the Index is a floating type.
|
||
|
is_numeric : Check if the Index only consists of numeric data.
|
||
|
is_object : Check if the Index is of the object dtype.
|
||
|
is_categorical : Check if the Index holds categorical data.
|
||
|
is_interval : Check if the Index holds Interval objects.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index(['a', np.nan, 'b'])
|
||
|
>>> idx.is_mixed()
|
||
|
True
|
||
|
|
||
|
>>> idx = pd.Index([1.0, 2.0, 3.0, 5.0])
|
||
|
>>> idx.is_mixed()
|
||
|
False
|
||
|
"""
|
||
|
warnings.warn(
|
||
|
"Index.is_mixed is deprecated and will be removed in a future version. "
|
||
|
"Check index.inferred_type directly instead.",
|
||
|
FutureWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
return self.inferred_type in ["mixed"]
|
||
|
|
||
|
@final
|
||
|
def holds_integer(self) -> bool:
|
||
|
"""
|
||
|
Whether the type is an integer type.
|
||
|
"""
|
||
|
return self.inferred_type in ["integer", "mixed-integer"]
|
||
|
|
||
|
@cache_readonly
|
||
|
def inferred_type(self) -> str_t:
|
||
|
"""
|
||
|
Return a string of the type inferred from the values.
|
||
|
"""
|
||
|
return lib.infer_dtype(self._values, skipna=False)
|
||
|
|
||
|
@cache_readonly
|
||
|
def _is_all_dates(self) -> bool:
|
||
|
"""
|
||
|
Whether or not the index values only consist of dates.
|
||
|
"""
|
||
|
return is_datetime_array(ensure_object(self._values))
|
||
|
|
||
|
@cache_readonly
|
||
|
def is_all_dates(self):
|
||
|
"""
|
||
|
Whether or not the index values only consist of dates.
|
||
|
"""
|
||
|
warnings.warn(
|
||
|
"Index.is_all_dates is deprecated, will be removed in a future version. "
|
||
|
"check index.inferred_type instead",
|
||
|
FutureWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
return self._is_all_dates
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Pickle Methods
|
||
|
|
||
|
def __reduce__(self):
|
||
|
d = {"data": self._data}
|
||
|
d.update(self._get_attributes_dict())
|
||
|
return _new_Index, (type(self), d), None
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Null Handling Methods
|
||
|
|
||
|
_na_value = np.nan
|
||
|
"""The expected NA value to use with this index."""
|
||
|
|
||
|
@cache_readonly
|
||
|
def _isnan(self):
|
||
|
"""
|
||
|
Return if each value is NaN.
|
||
|
"""
|
||
|
if self._can_hold_na:
|
||
|
return isna(self)
|
||
|
else:
|
||
|
# shouldn't reach to this condition by checking hasnans beforehand
|
||
|
values = np.empty(len(self), dtype=np.bool_)
|
||
|
values.fill(False)
|
||
|
return values
|
||
|
|
||
|
@cache_readonly
|
||
|
@final
|
||
|
def _nan_idxs(self):
|
||
|
if self._can_hold_na:
|
||
|
return self._isnan.nonzero()[0]
|
||
|
else:
|
||
|
return np.array([], dtype=np.int64)
|
||
|
|
||
|
@cache_readonly
|
||
|
def hasnans(self) -> bool:
|
||
|
"""
|
||
|
Return if I have any nans; enables various perf speedups.
|
||
|
"""
|
||
|
if self._can_hold_na:
|
||
|
return bool(self._isnan.any())
|
||
|
else:
|
||
|
return False
|
||
|
|
||
|
@final
|
||
|
def isna(self):
|
||
|
"""
|
||
|
Detect missing values.
|
||
|
|
||
|
Return a boolean same-sized object indicating if the values are NA.
|
||
|
NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get
|
||
|
mapped to ``True`` values.
|
||
|
Everything else get mapped to ``False`` values. Characters such as
|
||
|
empty strings `''` or :attr:`numpy.inf` are not considered NA values
|
||
|
(unless you set ``pandas.options.mode.use_inf_as_na = True``).
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
numpy.ndarray
|
||
|
A boolean array of whether my values are NA.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Index.notna : Boolean inverse of isna.
|
||
|
Index.dropna : Omit entries with missing values.
|
||
|
isna : Top-level isna.
|
||
|
Series.isna : Detect missing values in Series object.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
Show which entries in a pandas.Index are NA. The result is an
|
||
|
array.
|
||
|
|
||
|
>>> idx = pd.Index([5.2, 6.0, np.NaN])
|
||
|
>>> idx
|
||
|
Float64Index([5.2, 6.0, nan], dtype='float64')
|
||
|
>>> idx.isna()
|
||
|
array([False, False, True])
|
||
|
|
||
|
Empty strings are not considered NA values. None is considered an NA
|
||
|
value.
|
||
|
|
||
|
>>> idx = pd.Index(['black', '', 'red', None])
|
||
|
>>> idx
|
||
|
Index(['black', '', 'red', None], dtype='object')
|
||
|
>>> idx.isna()
|
||
|
array([False, False, False, True])
|
||
|
|
||
|
For datetimes, `NaT` (Not a Time) is considered as an NA value.
|
||
|
|
||
|
>>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'),
|
||
|
... pd.Timestamp(''), None, pd.NaT])
|
||
|
>>> idx
|
||
|
DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'],
|
||
|
dtype='datetime64[ns]', freq=None)
|
||
|
>>> idx.isna()
|
||
|
array([False, True, True, True])
|
||
|
"""
|
||
|
return self._isnan
|
||
|
|
||
|
isnull = isna
|
||
|
|
||
|
@final
|
||
|
def notna(self):
|
||
|
"""
|
||
|
Detect existing (non-missing) values.
|
||
|
|
||
|
Return a boolean same-sized object indicating if the values are not NA.
|
||
|
Non-missing values get mapped to ``True``. Characters such as empty
|
||
|
strings ``''`` or :attr:`numpy.inf` are not considered NA values
|
||
|
(unless you set ``pandas.options.mode.use_inf_as_na = True``).
|
||
|
NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False``
|
||
|
values.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
numpy.ndarray
|
||
|
Boolean array to indicate which entries are not NA.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Index.notnull : Alias of notna.
|
||
|
Index.isna: Inverse of notna.
|
||
|
notna : Top-level notna.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
Show which entries in an Index are not NA. The result is an
|
||
|
array.
|
||
|
|
||
|
>>> idx = pd.Index([5.2, 6.0, np.NaN])
|
||
|
>>> idx
|
||
|
Float64Index([5.2, 6.0, nan], dtype='float64')
|
||
|
>>> idx.notna()
|
||
|
array([ True, True, False])
|
||
|
|
||
|
Empty strings are not considered NA values. None is considered a NA
|
||
|
value.
|
||
|
|
||
|
>>> idx = pd.Index(['black', '', 'red', None])
|
||
|
>>> idx
|
||
|
Index(['black', '', 'red', None], dtype='object')
|
||
|
>>> idx.notna()
|
||
|
array([ True, True, True, False])
|
||
|
"""
|
||
|
return ~self.isna()
|
||
|
|
||
|
notnull = notna
|
||
|
|
||
|
def fillna(self, value=None, downcast=None):
|
||
|
"""
|
||
|
Fill NA/NaN values with the specified value.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
value : scalar
|
||
|
Scalar value to use to fill holes (e.g. 0).
|
||
|
This value cannot be a list-likes.
|
||
|
downcast : dict, default is None
|
||
|
A dict of item->dtype of what to downcast if possible,
|
||
|
or the string 'infer' which will try to downcast to an appropriate
|
||
|
equal type (e.g. float64 to int64 if possible).
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Index
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
DataFrame.fillna : Fill NaN values of a DataFrame.
|
||
|
Series.fillna : Fill NaN Values of a Series.
|
||
|
"""
|
||
|
value = self._require_scalar(value)
|
||
|
if self.hasnans:
|
||
|
result = self.putmask(self._isnan, value)
|
||
|
if downcast is None:
|
||
|
# no need to care metadata other than name
|
||
|
# because it can't have freq if
|
||
|
return Index(result, name=self.name)
|
||
|
return self._shallow_copy()
|
||
|
|
||
|
def dropna(self, how="any"):
|
||
|
"""
|
||
|
Return Index without NA/NaN values.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
how : {'any', 'all'}, default 'any'
|
||
|
If the Index is a MultiIndex, drop the value when any or all levels
|
||
|
are NaN.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Index
|
||
|
"""
|
||
|
if how not in ("any", "all"):
|
||
|
raise ValueError(f"invalid how option: {how}")
|
||
|
|
||
|
if self.hasnans:
|
||
|
return self._shallow_copy(self._values[~self._isnan])
|
||
|
return self._shallow_copy()
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Uniqueness Methods
|
||
|
|
||
|
def unique(self, level=None):
|
||
|
"""
|
||
|
Return unique values in the index.
|
||
|
|
||
|
Unique values are returned in order of appearance, this does NOT sort.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
level : int or str, optional, default None
|
||
|
Only return values from specified level (for MultiIndex).
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Index without duplicates
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
unique : Numpy array of unique values in that column.
|
||
|
Series.unique : Return unique values of Series object.
|
||
|
"""
|
||
|
if level is not None:
|
||
|
self._validate_index_level(level)
|
||
|
|
||
|
if self.is_unique:
|
||
|
return self._shallow_copy()
|
||
|
|
||
|
result = super().unique()
|
||
|
return self._shallow_copy(result)
|
||
|
|
||
|
@final
|
||
|
def drop_duplicates(self, keep="first"):
|
||
|
"""
|
||
|
Return Index with duplicate values removed.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
keep : {'first', 'last', ``False``}, default 'first'
|
||
|
- 'first' : Drop duplicates except for the first occurrence.
|
||
|
- 'last' : Drop duplicates except for the last occurrence.
|
||
|
- ``False`` : Drop all duplicates.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
deduplicated : Index
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Series.drop_duplicates : Equivalent method on Series.
|
||
|
DataFrame.drop_duplicates : Equivalent method on DataFrame.
|
||
|
Index.duplicated : Related method on Index, indicating duplicate
|
||
|
Index values.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
Generate an pandas.Index with duplicate values.
|
||
|
|
||
|
>>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'])
|
||
|
|
||
|
The `keep` parameter controls which duplicate values are removed.
|
||
|
The value 'first' keeps the first occurrence for each
|
||
|
set of duplicated entries. The default value of keep is 'first'.
|
||
|
|
||
|
>>> idx.drop_duplicates(keep='first')
|
||
|
Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object')
|
||
|
|
||
|
The value 'last' keeps the last occurrence for each set of duplicated
|
||
|
entries.
|
||
|
|
||
|
>>> idx.drop_duplicates(keep='last')
|
||
|
Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object')
|
||
|
|
||
|
The value ``False`` discards all sets of duplicated entries.
|
||
|
|
||
|
>>> idx.drop_duplicates(keep=False)
|
||
|
Index(['cow', 'beetle', 'hippo'], dtype='object')
|
||
|
"""
|
||
|
if self.is_unique:
|
||
|
return self._shallow_copy()
|
||
|
|
||
|
return super().drop_duplicates(keep=keep)
|
||
|
|
||
|
def duplicated(self, keep="first"):
|
||
|
"""
|
||
|
Indicate duplicate index values.
|
||
|
|
||
|
Duplicated values are indicated as ``True`` values in the resulting
|
||
|
array. Either all duplicates, all except the first, or all except the
|
||
|
last occurrence of duplicates can be indicated.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
keep : {'first', 'last', False}, default 'first'
|
||
|
The value or values in a set of duplicates to mark as missing.
|
||
|
|
||
|
- 'first' : Mark duplicates as ``True`` except for the first
|
||
|
occurrence.
|
||
|
- 'last' : Mark duplicates as ``True`` except for the last
|
||
|
occurrence.
|
||
|
- ``False`` : Mark all duplicates as ``True``.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
numpy.ndarray
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Series.duplicated : Equivalent method on pandas.Series.
|
||
|
DataFrame.duplicated : Equivalent method on pandas.DataFrame.
|
||
|
Index.drop_duplicates : Remove duplicate values from Index.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
By default, for each set of duplicated values, the first occurrence is
|
||
|
set to False and all others to True:
|
||
|
|
||
|
>>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama'])
|
||
|
>>> idx.duplicated()
|
||
|
array([False, False, True, False, True])
|
||
|
|
||
|
which is equivalent to
|
||
|
|
||
|
>>> idx.duplicated(keep='first')
|
||
|
array([False, False, True, False, True])
|
||
|
|
||
|
By using 'last', the last occurrence of each set of duplicated values
|
||
|
is set on False and all others on True:
|
||
|
|
||
|
>>> idx.duplicated(keep='last')
|
||
|
array([ True, False, True, False, False])
|
||
|
|
||
|
By setting keep on ``False``, all duplicates are True:
|
||
|
|
||
|
>>> idx.duplicated(keep=False)
|
||
|
array([ True, False, True, False, True])
|
||
|
"""
|
||
|
if self.is_unique:
|
||
|
# fastpath available bc we are immutable
|
||
|
return np.zeros(len(self), dtype=bool)
|
||
|
return super().duplicated(keep=keep)
|
||
|
|
||
|
def _get_unique_index(self, dropna: bool = False):
|
||
|
"""
|
||
|
Returns an index containing unique values.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
dropna : bool, default False
|
||
|
If True, NaN values are dropped.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
uniques : index
|
||
|
"""
|
||
|
if self.is_unique and not dropna:
|
||
|
return self
|
||
|
|
||
|
if not self.is_unique:
|
||
|
values = self.unique()
|
||
|
if not isinstance(self, ABCMultiIndex):
|
||
|
# extract an array to pass to _shallow_copy
|
||
|
values = values._data
|
||
|
else:
|
||
|
values = self._values
|
||
|
|
||
|
if dropna and not isinstance(self, ABCMultiIndex):
|
||
|
# isna not defined for MultiIndex
|
||
|
if self.hasnans:
|
||
|
values = values[~isna(values)]
|
||
|
|
||
|
return self._shallow_copy(values)
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Arithmetic & Logical Methods
|
||
|
|
||
|
def __iadd__(self, other):
|
||
|
# alias for __add__
|
||
|
return self + other
|
||
|
|
||
|
@final
|
||
|
def __and__(self, other):
|
||
|
warnings.warn(
|
||
|
"Index.__and__ operating as a set operation is deprecated, "
|
||
|
"in the future this will be a logical operation matching "
|
||
|
"Series.__and__. Use index.intersection(other) instead",
|
||
|
FutureWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
return self.intersection(other)
|
||
|
|
||
|
@final
|
||
|
def __or__(self, other):
|
||
|
warnings.warn(
|
||
|
"Index.__or__ operating as a set operation is deprecated, "
|
||
|
"in the future this will be a logical operation matching "
|
||
|
"Series.__or__. Use index.union(other) instead",
|
||
|
FutureWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
return self.union(other)
|
||
|
|
||
|
@final
|
||
|
def __xor__(self, other):
|
||
|
warnings.warn(
|
||
|
"Index.__xor__ operating as a set operation is deprecated, "
|
||
|
"in the future this will be a logical operation matching "
|
||
|
"Series.__xor__. Use index.symmetric_difference(other) instead",
|
||
|
FutureWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
return self.symmetric_difference(other)
|
||
|
|
||
|
@final
|
||
|
def __nonzero__(self):
|
||
|
raise ValueError(
|
||
|
f"The truth value of a {type(self).__name__} is ambiguous. "
|
||
|
"Use a.empty, a.bool(), a.item(), a.any() or a.all()."
|
||
|
)
|
||
|
|
||
|
__bool__ = __nonzero__
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Set Operation Methods
|
||
|
|
||
|
@final
|
||
|
def _get_reconciled_name_object(self, other):
|
||
|
"""
|
||
|
If the result of a set operation will be self,
|
||
|
return self, unless the name changes, in which
|
||
|
case make a shallow copy of self.
|
||
|
"""
|
||
|
name = get_op_result_name(self, other)
|
||
|
if self.name != name:
|
||
|
return self.rename(name)
|
||
|
return self
|
||
|
|
||
|
@final
|
||
|
def _union_incompatible_dtypes(self, other, sort):
|
||
|
"""
|
||
|
Casts this and other index to object dtype to allow the formation
|
||
|
of a union between incompatible types.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
other : Index or array-like
|
||
|
sort : False or None, default False
|
||
|
Whether to sort the resulting index.
|
||
|
|
||
|
* False : do not sort the result.
|
||
|
* None : sort the result, except when `self` and `other` are equal
|
||
|
or when the values cannot be compared.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Index
|
||
|
"""
|
||
|
this = self.astype(object, copy=False)
|
||
|
# cast to Index for when `other` is list-like
|
||
|
other = Index(other).astype(object, copy=False)
|
||
|
return Index.union(this, other, sort=sort).astype(object, copy=False)
|
||
|
|
||
|
def _can_union_without_object_cast(self, other) -> bool:
|
||
|
"""
|
||
|
Check whether this and the other dtype are compatible with each other.
|
||
|
Meaning a union can be formed between them without needing to be cast
|
||
|
to dtype object.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
other : Index or array-like
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
bool
|
||
|
"""
|
||
|
return type(self) is type(other) and is_dtype_equal(self.dtype, other.dtype)
|
||
|
|
||
|
@final
|
||
|
def _validate_sort_keyword(self, sort):
|
||
|
if sort not in [None, False]:
|
||
|
raise ValueError(
|
||
|
"The 'sort' keyword only takes the values of "
|
||
|
f"None or False; {sort} was passed."
|
||
|
)
|
||
|
|
||
|
def union(self, other, sort=None):
|
||
|
"""
|
||
|
Form the union of two Index objects.
|
||
|
|
||
|
If the Index objects are incompatible, both Index objects will be
|
||
|
cast to dtype('object') first.
|
||
|
|
||
|
.. versionchanged:: 0.25.0
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
other : Index or array-like
|
||
|
sort : bool or None, default None
|
||
|
Whether to sort the resulting Index.
|
||
|
|
||
|
* None : Sort the result, except when
|
||
|
|
||
|
1. `self` and `other` are equal.
|
||
|
2. `self` or `other` has length 0.
|
||
|
3. Some values in `self` or `other` cannot be compared.
|
||
|
A RuntimeWarning is issued in this case.
|
||
|
|
||
|
* False : do not sort the result.
|
||
|
|
||
|
.. versionadded:: 0.24.0
|
||
|
|
||
|
.. versionchanged:: 0.24.1
|
||
|
|
||
|
Changed the default value from ``True`` to ``None``
|
||
|
(without change in behaviour).
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
union : Index
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
Union matching dtypes
|
||
|
|
||
|
>>> idx1 = pd.Index([1, 2, 3, 4])
|
||
|
>>> idx2 = pd.Index([3, 4, 5, 6])
|
||
|
>>> idx1.union(idx2)
|
||
|
Int64Index([1, 2, 3, 4, 5, 6], dtype='int64')
|
||
|
|
||
|
Union mismatched dtypes
|
||
|
|
||
|
>>> idx1 = pd.Index(['a', 'b', 'c', 'd'])
|
||
|
>>> idx2 = pd.Index([1, 2, 3, 4])
|
||
|
>>> idx1.union(idx2)
|
||
|
Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object')
|
||
|
"""
|
||
|
self._validate_sort_keyword(sort)
|
||
|
self._assert_can_do_setop(other)
|
||
|
other, result_name = self._convert_can_do_setop(other)
|
||
|
|
||
|
if not self._can_union_without_object_cast(other):
|
||
|
return self._union_incompatible_dtypes(other, sort=sort)
|
||
|
|
||
|
result = self._union(other, sort=sort)
|
||
|
|
||
|
return self._wrap_setop_result(other, result)
|
||
|
|
||
|
def _union(self, other, sort):
|
||
|
"""
|
||
|
Specific union logic should go here. In subclasses, union behavior
|
||
|
should be overwritten here rather than in `self.union`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
other : Index or array-like
|
||
|
sort : False or None, default False
|
||
|
Whether to sort the resulting index.
|
||
|
|
||
|
* False : do not sort the result.
|
||
|
* None : sort the result, except when `self` and `other` are equal
|
||
|
or when the values cannot be compared.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Index
|
||
|
"""
|
||
|
if not len(other) or self.equals(other):
|
||
|
return self
|
||
|
|
||
|
if not len(self):
|
||
|
return other
|
||
|
|
||
|
# TODO(EA): setops-refactor, clean all this up
|
||
|
lvals = self._values
|
||
|
rvals = other._values
|
||
|
|
||
|
if sort is None and self.is_monotonic and other.is_monotonic:
|
||
|
try:
|
||
|
result = self._outer_indexer(lvals, rvals)[0]
|
||
|
except TypeError:
|
||
|
# incomparable objects
|
||
|
result = list(lvals)
|
||
|
|
||
|
# worth making this faster? a very unusual case
|
||
|
value_set = set(lvals)
|
||
|
result.extend([x for x in rvals if x not in value_set])
|
||
|
result = Index(result)._values # do type inference here
|
||
|
else:
|
||
|
# find indexes of things in "other" that are not in "self"
|
||
|
if self.is_unique:
|
||
|
indexer = self.get_indexer(other)
|
||
|
indexer = (indexer == -1).nonzero()[0]
|
||
|
else:
|
||
|
indexer = algos.unique1d(self.get_indexer_non_unique(other)[1])
|
||
|
|
||
|
if len(indexer) > 0:
|
||
|
other_diff = algos.take_nd(rvals, indexer, allow_fill=False)
|
||
|
result = concat_compat((lvals, other_diff))
|
||
|
|
||
|
else:
|
||
|
result = lvals
|
||
|
|
||
|
if sort is None:
|
||
|
try:
|
||
|
result = algos.safe_sort(result)
|
||
|
except TypeError as err:
|
||
|
warnings.warn(
|
||
|
f"{err}, sort order is undefined for incomparable objects",
|
||
|
RuntimeWarning,
|
||
|
stacklevel=3,
|
||
|
)
|
||
|
|
||
|
return result
|
||
|
|
||
|
@final
|
||
|
def _wrap_setop_result(self, other, result):
|
||
|
if isinstance(self, (ABCDatetimeIndex, ABCTimedeltaIndex)) and isinstance(
|
||
|
result, np.ndarray
|
||
|
):
|
||
|
result = type(self._data)._simple_new(result, dtype=self.dtype)
|
||
|
elif is_categorical_dtype(self.dtype) and isinstance(result, np.ndarray):
|
||
|
result = Categorical(result, dtype=self.dtype)
|
||
|
|
||
|
name = get_op_result_name(self, other)
|
||
|
if isinstance(result, Index):
|
||
|
if result.name != name:
|
||
|
return result.rename(name)
|
||
|
return result
|
||
|
else:
|
||
|
return self._shallow_copy(result, name=name)
|
||
|
|
||
|
# TODO: standardize return type of non-union setops type(self vs other)
|
||
|
def intersection(self, other, sort=False):
|
||
|
"""
|
||
|
Form the intersection of two Index objects.
|
||
|
|
||
|
This returns a new Index with elements common to the index and `other`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
other : Index or array-like
|
||
|
sort : False or None, default False
|
||
|
Whether to sort the resulting index.
|
||
|
|
||
|
* False : do not sort the result.
|
||
|
* None : sort the result, except when `self` and `other` are equal
|
||
|
or when the values cannot be compared.
|
||
|
|
||
|
.. versionadded:: 0.24.0
|
||
|
|
||
|
.. versionchanged:: 0.24.1
|
||
|
|
||
|
Changed the default from ``True`` to ``False``, to match
|
||
|
the behaviour of 0.23.4 and earlier.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
intersection : Index
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx1 = pd.Index([1, 2, 3, 4])
|
||
|
>>> idx2 = pd.Index([3, 4, 5, 6])
|
||
|
>>> idx1.intersection(idx2)
|
||
|
Int64Index([3, 4], dtype='int64')
|
||
|
"""
|
||
|
self._validate_sort_keyword(sort)
|
||
|
self._assert_can_do_setop(other)
|
||
|
other, _ = self._convert_can_do_setop(other)
|
||
|
|
||
|
if self.equals(other):
|
||
|
if self.has_duplicates:
|
||
|
return self.unique()._get_reconciled_name_object(other)
|
||
|
return self._get_reconciled_name_object(other)
|
||
|
|
||
|
if not is_dtype_equal(self.dtype, other.dtype):
|
||
|
dtype = find_common_type([self.dtype, other.dtype])
|
||
|
this = self.astype(dtype, copy=False)
|
||
|
other = other.astype(dtype, copy=False)
|
||
|
return this.intersection(other, sort=sort)
|
||
|
|
||
|
result = self._intersection(other, sort=sort)
|
||
|
return self._wrap_setop_result(other, result)
|
||
|
|
||
|
def _intersection(self, other, sort=False):
|
||
|
"""
|
||
|
intersection specialized to the case with matching dtypes.
|
||
|
"""
|
||
|
# TODO(EA): setops-refactor, clean all this up
|
||
|
lvals = self._values
|
||
|
rvals = other._values
|
||
|
|
||
|
if self.is_monotonic and other.is_monotonic:
|
||
|
try:
|
||
|
result = self._inner_indexer(lvals, rvals)[0]
|
||
|
except TypeError:
|
||
|
pass
|
||
|
else:
|
||
|
return algos.unique1d(result)
|
||
|
|
||
|
try:
|
||
|
indexer = Index(rvals).get_indexer(lvals)
|
||
|
indexer = indexer.take((indexer != -1).nonzero()[0])
|
||
|
except (InvalidIndexError, IncompatibleFrequency):
|
||
|
# InvalidIndexError raised by get_indexer if non-unique
|
||
|
# IncompatibleFrequency raised by PeriodIndex.get_indexer
|
||
|
indexer = algos.unique1d(Index(rvals).get_indexer_non_unique(lvals)[0])
|
||
|
indexer = indexer[indexer != -1]
|
||
|
|
||
|
result = other.take(indexer).unique()._values
|
||
|
|
||
|
if sort is None:
|
||
|
result = algos.safe_sort(result)
|
||
|
|
||
|
# Intersection has to be unique
|
||
|
assert Index(result).is_unique
|
||
|
|
||
|
return result
|
||
|
|
||
|
def difference(self, other, sort=None):
|
||
|
"""
|
||
|
Return a new Index with elements of index not in `other`.
|
||
|
|
||
|
This is the set difference of two Index objects.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
other : Index or array-like
|
||
|
sort : False or None, default None
|
||
|
Whether to sort the resulting index. By default, the
|
||
|
values are attempted to be sorted, but any TypeError from
|
||
|
incomparable elements is caught by pandas.
|
||
|
|
||
|
* None : Attempt to sort the result, but catch any TypeErrors
|
||
|
from comparing incomparable elements.
|
||
|
* False : Do not sort the result.
|
||
|
|
||
|
.. versionadded:: 0.24.0
|
||
|
|
||
|
.. versionchanged:: 0.24.1
|
||
|
|
||
|
Changed the default value from ``True`` to ``None``
|
||
|
(without change in behaviour).
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
difference : Index
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx1 = pd.Index([2, 1, 3, 4])
|
||
|
>>> idx2 = pd.Index([3, 4, 5, 6])
|
||
|
>>> idx1.difference(idx2)
|
||
|
Int64Index([1, 2], dtype='int64')
|
||
|
>>> idx1.difference(idx2, sort=False)
|
||
|
Int64Index([2, 1], dtype='int64')
|
||
|
"""
|
||
|
self._validate_sort_keyword(sort)
|
||
|
self._assert_can_do_setop(other)
|
||
|
other, result_name = self._convert_can_do_setop(other)
|
||
|
|
||
|
if self.equals(other):
|
||
|
return self[:0].rename(result_name)
|
||
|
|
||
|
result = self._difference(other, sort=sort)
|
||
|
return self._wrap_setop_result(other, result)
|
||
|
|
||
|
def _difference(self, other, sort):
|
||
|
|
||
|
this = self._get_unique_index()
|
||
|
|
||
|
indexer = this.get_indexer(other)
|
||
|
indexer = indexer.take((indexer != -1).nonzero()[0])
|
||
|
|
||
|
label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True)
|
||
|
the_diff = this._values.take(label_diff)
|
||
|
if sort is None:
|
||
|
try:
|
||
|
the_diff = algos.safe_sort(the_diff)
|
||
|
except TypeError:
|
||
|
pass
|
||
|
|
||
|
return the_diff
|
||
|
|
||
|
def symmetric_difference(self, other, result_name=None, sort=None):
|
||
|
"""
|
||
|
Compute the symmetric difference of two Index objects.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
other : Index or array-like
|
||
|
result_name : str
|
||
|
sort : False or None, default None
|
||
|
Whether to sort the resulting index. By default, the
|
||
|
values are attempted to be sorted, but any TypeError from
|
||
|
incomparable elements is caught by pandas.
|
||
|
|
||
|
* None : Attempt to sort the result, but catch any TypeErrors
|
||
|
from comparing incomparable elements.
|
||
|
* False : Do not sort the result.
|
||
|
|
||
|
.. versionadded:: 0.24.0
|
||
|
|
||
|
.. versionchanged:: 0.24.1
|
||
|
|
||
|
Changed the default value from ``True`` to ``None``
|
||
|
(without change in behaviour).
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
symmetric_difference : Index
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
``symmetric_difference`` contains elements that appear in either
|
||
|
``idx1`` or ``idx2`` but not both. Equivalent to the Index created by
|
||
|
``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates
|
||
|
dropped.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx1 = pd.Index([1, 2, 3, 4])
|
||
|
>>> idx2 = pd.Index([2, 3, 4, 5])
|
||
|
>>> idx1.symmetric_difference(idx2)
|
||
|
Int64Index([1, 5], dtype='int64')
|
||
|
|
||
|
You can also use the ``^`` operator:
|
||
|
|
||
|
>>> idx1 ^ idx2
|
||
|
Int64Index([1, 5], dtype='int64')
|
||
|
"""
|
||
|
self._validate_sort_keyword(sort)
|
||
|
self._assert_can_do_setop(other)
|
||
|
other, result_name_update = self._convert_can_do_setop(other)
|
||
|
if result_name is None:
|
||
|
result_name = result_name_update
|
||
|
|
||
|
this = self._get_unique_index()
|
||
|
other = other._get_unique_index()
|
||
|
indexer = this.get_indexer(other)
|
||
|
|
||
|
# {this} minus {other}
|
||
|
common_indexer = indexer.take((indexer != -1).nonzero()[0])
|
||
|
left_indexer = np.setdiff1d(
|
||
|
np.arange(this.size), common_indexer, assume_unique=True
|
||
|
)
|
||
|
left_diff = this._values.take(left_indexer)
|
||
|
|
||
|
# {other} minus {this}
|
||
|
right_indexer = (indexer == -1).nonzero()[0]
|
||
|
right_diff = other._values.take(right_indexer)
|
||
|
|
||
|
the_diff = concat_compat([left_diff, right_diff])
|
||
|
if sort is None:
|
||
|
try:
|
||
|
the_diff = algos.safe_sort(the_diff)
|
||
|
except TypeError:
|
||
|
pass
|
||
|
|
||
|
return Index(the_diff, dtype=self.dtype, name=result_name)
|
||
|
|
||
|
def _assert_can_do_setop(self, other):
|
||
|
if not is_list_like(other):
|
||
|
raise TypeError("Input must be Index or array-like")
|
||
|
return True
|
||
|
|
||
|
def _convert_can_do_setop(self, other):
|
||
|
if not isinstance(other, Index):
|
||
|
other = Index(other, name=self.name)
|
||
|
result_name = self.name
|
||
|
else:
|
||
|
result_name = get_op_result_name(self, other)
|
||
|
return other, result_name
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Indexing Methods
|
||
|
|
||
|
def get_loc(self, key, method=None, tolerance=None):
|
||
|
"""
|
||
|
Get integer location, slice or boolean mask for requested label.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
key : label
|
||
|
method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
|
||
|
* default: exact matches only.
|
||
|
* pad / ffill: find the PREVIOUS index value if no exact match.
|
||
|
* backfill / bfill: use NEXT index value if no exact match
|
||
|
* nearest: use the NEAREST index value if no exact match. Tied
|
||
|
distances are broken by preferring the larger index value.
|
||
|
tolerance : int or float, optional
|
||
|
Maximum distance from index value for inexact matches. The value of
|
||
|
the index at the matching location must satisfy the equation
|
||
|
``abs(index[loc] - key) <= tolerance``.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
loc : int if unique index, slice if monotonic index, else mask
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> unique_index = pd.Index(list('abc'))
|
||
|
>>> unique_index.get_loc('b')
|
||
|
1
|
||
|
|
||
|
>>> monotonic_index = pd.Index(list('abbc'))
|
||
|
>>> monotonic_index.get_loc('b')
|
||
|
slice(1, 3, None)
|
||
|
|
||
|
>>> non_monotonic_index = pd.Index(list('abcb'))
|
||
|
>>> non_monotonic_index.get_loc('b')
|
||
|
array([False, True, False, True])
|
||
|
"""
|
||
|
if method is None:
|
||
|
if tolerance is not None:
|
||
|
raise ValueError(
|
||
|
"tolerance argument only valid if using pad, "
|
||
|
"backfill or nearest lookups"
|
||
|
)
|
||
|
casted_key = self._maybe_cast_indexer(key)
|
||
|
try:
|
||
|
return self._engine.get_loc(casted_key)
|
||
|
except KeyError as err:
|
||
|
raise KeyError(key) from err
|
||
|
|
||
|
if tolerance is not None:
|
||
|
tolerance = self._convert_tolerance(tolerance, np.asarray(key))
|
||
|
|
||
|
indexer = self.get_indexer([key], method=method, tolerance=tolerance)
|
||
|
if indexer.ndim > 1 or indexer.size > 1:
|
||
|
raise TypeError("get_loc requires scalar valued input")
|
||
|
loc = indexer.item()
|
||
|
if loc == -1:
|
||
|
raise KeyError(key)
|
||
|
return loc
|
||
|
|
||
|
_index_shared_docs[
|
||
|
"get_indexer"
|
||
|
] = """
|
||
|
Compute indexer and mask for new index given the current index. The
|
||
|
indexer should be then used as an input to ndarray.take to align the
|
||
|
current data to the new index.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
target : %(target_klass)s
|
||
|
method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
|
||
|
* default: exact matches only.
|
||
|
* pad / ffill: find the PREVIOUS index value if no exact match.
|
||
|
* backfill / bfill: use NEXT index value if no exact match
|
||
|
* nearest: use the NEAREST index value if no exact match. Tied
|
||
|
distances are broken by preferring the larger index value.
|
||
|
limit : int, optional
|
||
|
Maximum number of consecutive labels in ``target`` to match for
|
||
|
inexact matches.
|
||
|
tolerance : optional
|
||
|
Maximum distance between original and new labels for inexact
|
||
|
matches. The values of the index at the matching locations must
|
||
|
satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
|
||
|
|
||
|
Tolerance may be a scalar value, which applies the same tolerance
|
||
|
to all values, or list-like, which applies variable tolerance per
|
||
|
element. List-like includes list, tuple, array, Series, and must be
|
||
|
the same size as the index and its dtype must exactly match the
|
||
|
index's type.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
indexer : ndarray of int
|
||
|
Integers from 0 to n - 1 indicating that the index at these
|
||
|
positions matches the corresponding target values. Missing values
|
||
|
in the target are marked by -1.
|
||
|
%(raises_section)s
|
||
|
Examples
|
||
|
--------
|
||
|
>>> index = pd.Index(['c', 'a', 'b'])
|
||
|
>>> index.get_indexer(['a', 'b', 'x'])
|
||
|
array([ 1, 2, -1])
|
||
|
|
||
|
Notice that the return value is an array of locations in ``index``
|
||
|
and ``x`` is marked by -1, as it is not in ``index``.
|
||
|
"""
|
||
|
|
||
|
@Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs)
|
||
|
def get_indexer(
|
||
|
self, target, method=None, limit=None, tolerance=None
|
||
|
) -> np.ndarray:
|
||
|
method = missing.clean_reindex_fill_method(method)
|
||
|
target = ensure_index(target)
|
||
|
if tolerance is not None:
|
||
|
tolerance = self._convert_tolerance(tolerance, target)
|
||
|
|
||
|
# Treat boolean labels passed to a numeric index as not found. Without
|
||
|
# this fix False and True would be treated as 0 and 1 respectively.
|
||
|
# (GH #16877)
|
||
|
if target.is_boolean() and self.is_numeric():
|
||
|
return ensure_platform_int(np.repeat(-1, target.size))
|
||
|
|
||
|
pself, ptarget = self._maybe_promote(target)
|
||
|
if pself is not self or ptarget is not target:
|
||
|
return pself.get_indexer(
|
||
|
ptarget, method=method, limit=limit, tolerance=tolerance
|
||
|
)
|
||
|
|
||
|
if not is_dtype_equal(self.dtype, target.dtype):
|
||
|
this = self.astype(object)
|
||
|
target = target.astype(object)
|
||
|
return this.get_indexer(
|
||
|
target, method=method, limit=limit, tolerance=tolerance
|
||
|
)
|
||
|
|
||
|
if not self.is_unique:
|
||
|
raise InvalidIndexError(
|
||
|
"Reindexing only valid with uniquely valued Index objects"
|
||
|
)
|
||
|
|
||
|
if method == "pad" or method == "backfill":
|
||
|
indexer = self._get_fill_indexer(target, method, limit, tolerance)
|
||
|
elif method == "nearest":
|
||
|
indexer = self._get_nearest_indexer(target, limit, tolerance)
|
||
|
else:
|
||
|
if tolerance is not None:
|
||
|
raise ValueError(
|
||
|
"tolerance argument only valid if doing pad, "
|
||
|
"backfill or nearest reindexing"
|
||
|
)
|
||
|
if limit is not None:
|
||
|
raise ValueError(
|
||
|
"limit argument only valid if doing pad, "
|
||
|
"backfill or nearest reindexing"
|
||
|
)
|
||
|
|
||
|
indexer = self._engine.get_indexer(target._get_engine_target())
|
||
|
|
||
|
return ensure_platform_int(indexer)
|
||
|
|
||
|
def _convert_tolerance(self, tolerance, target):
|
||
|
# override this method on subclasses
|
||
|
tolerance = np.asarray(tolerance)
|
||
|
if target.size != tolerance.size and tolerance.size > 1:
|
||
|
raise ValueError("list-like tolerance size must match target index size")
|
||
|
return tolerance
|
||
|
|
||
|
@final
|
||
|
def _get_fill_indexer(
|
||
|
self, target: "Index", method: str_t, limit=None, tolerance=None
|
||
|
) -> np.ndarray:
|
||
|
|
||
|
target_values = target._get_engine_target()
|
||
|
|
||
|
if self.is_monotonic_increasing and target.is_monotonic_increasing:
|
||
|
engine_method = (
|
||
|
self._engine.get_pad_indexer
|
||
|
if method == "pad"
|
||
|
else self._engine.get_backfill_indexer
|
||
|
)
|
||
|
indexer = engine_method(target_values, limit)
|
||
|
else:
|
||
|
indexer = self._get_fill_indexer_searchsorted(target, method, limit)
|
||
|
if tolerance is not None and len(self):
|
||
|
indexer = self._filter_indexer_tolerance(target_values, indexer, tolerance)
|
||
|
return indexer
|
||
|
|
||
|
@final
|
||
|
def _get_fill_indexer_searchsorted(
|
||
|
self, target: "Index", method: str_t, limit=None
|
||
|
) -> np.ndarray:
|
||
|
"""
|
||
|
Fallback pad/backfill get_indexer that works for monotonic decreasing
|
||
|
indexes and non-monotonic targets.
|
||
|
"""
|
||
|
if limit is not None:
|
||
|
raise ValueError(
|
||
|
f"limit argument for {repr(method)} method only well-defined "
|
||
|
"if index and target are monotonic"
|
||
|
)
|
||
|
|
||
|
side = "left" if method == "pad" else "right"
|
||
|
|
||
|
# find exact matches first (this simplifies the algorithm)
|
||
|
indexer = self.get_indexer(target)
|
||
|
nonexact = indexer == -1
|
||
|
indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side)
|
||
|
if side == "left":
|
||
|
# searchsorted returns "indices into a sorted array such that,
|
||
|
# if the corresponding elements in v were inserted before the
|
||
|
# indices, the order of a would be preserved".
|
||
|
# Thus, we need to subtract 1 to find values to the left.
|
||
|
indexer[nonexact] -= 1
|
||
|
# This also mapped not found values (values of 0 from
|
||
|
# np.searchsorted) to -1, which conveniently is also our
|
||
|
# sentinel for missing values
|
||
|
else:
|
||
|
# Mark indices to the right of the largest value as not found
|
||
|
indexer[indexer == len(self)] = -1
|
||
|
return indexer
|
||
|
|
||
|
@final
|
||
|
def _get_nearest_indexer(self, target: "Index", limit, tolerance) -> np.ndarray:
|
||
|
"""
|
||
|
Get the indexer for the nearest index labels; requires an index with
|
||
|
values that can be subtracted from each other (e.g., not strings or
|
||
|
tuples).
|
||
|
"""
|
||
|
if not len(self):
|
||
|
return self._get_fill_indexer(target, "pad")
|
||
|
|
||
|
left_indexer = self.get_indexer(target, "pad", limit=limit)
|
||
|
right_indexer = self.get_indexer(target, "backfill", limit=limit)
|
||
|
|
||
|
target_values = target._values
|
||
|
# error: Unsupported left operand type for - ("ExtensionArray")
|
||
|
left_distances = np.abs(
|
||
|
self._values[left_indexer] - target_values # type: ignore[operator]
|
||
|
)
|
||
|
# error: Unsupported left operand type for - ("ExtensionArray")
|
||
|
right_distances = np.abs(
|
||
|
self._values[right_indexer] - target_values # type: ignore[operator]
|
||
|
)
|
||
|
|
||
|
op = operator.lt if self.is_monotonic_increasing else operator.le
|
||
|
indexer = np.where(
|
||
|
op(left_distances, right_distances) | (right_indexer == -1),
|
||
|
left_indexer,
|
||
|
right_indexer,
|
||
|
)
|
||
|
if tolerance is not None:
|
||
|
indexer = self._filter_indexer_tolerance(target_values, indexer, tolerance)
|
||
|
return indexer
|
||
|
|
||
|
@final
|
||
|
def _filter_indexer_tolerance(
|
||
|
self,
|
||
|
target: Union["Index", np.ndarray, ExtensionArray],
|
||
|
indexer: np.ndarray,
|
||
|
tolerance,
|
||
|
) -> np.ndarray:
|
||
|
# error: Unsupported left operand type for - ("ExtensionArray")
|
||
|
distance = abs(self._values[indexer] - target) # type: ignore[operator]
|
||
|
indexer = np.where(distance <= tolerance, indexer, -1)
|
||
|
return indexer
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Indexer Conversion Methods
|
||
|
|
||
|
def _get_partial_string_timestamp_match_key(self, key):
|
||
|
"""
|
||
|
Translate any partial string timestamp matches in key, returning the
|
||
|
new key.
|
||
|
|
||
|
Only relevant for MultiIndex.
|
||
|
"""
|
||
|
# GH#10331
|
||
|
return key
|
||
|
|
||
|
@final
|
||
|
def _validate_positional_slice(self, key: slice):
|
||
|
"""
|
||
|
For positional indexing, a slice must have either int or None
|
||
|
for each of start, stop, and step.
|
||
|
"""
|
||
|
self._validate_indexer("positional", key.start, "iloc")
|
||
|
self._validate_indexer("positional", key.stop, "iloc")
|
||
|
self._validate_indexer("positional", key.step, "iloc")
|
||
|
|
||
|
def _convert_slice_indexer(self, key: slice, kind: str_t):
|
||
|
"""
|
||
|
Convert a slice indexer.
|
||
|
|
||
|
By definition, these are labels unless 'iloc' is passed in.
|
||
|
Floats are not allowed as the start, step, or stop of the slice.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
key : label of the slice bound
|
||
|
kind : {'loc', 'getitem'}
|
||
|
"""
|
||
|
assert kind in ["loc", "getitem"], kind
|
||
|
|
||
|
# potentially cast the bounds to integers
|
||
|
start, stop, step = key.start, key.stop, key.step
|
||
|
|
||
|
# figure out if this is a positional indexer
|
||
|
def is_int(v):
|
||
|
return v is None or is_integer(v)
|
||
|
|
||
|
is_index_slice = is_int(start) and is_int(stop) and is_int(step)
|
||
|
is_positional = is_index_slice and not (
|
||
|
self.is_integer() or self.is_categorical()
|
||
|
)
|
||
|
|
||
|
if kind == "getitem":
|
||
|
"""
|
||
|
called from the getitem slicers, validate that we are in fact
|
||
|
integers
|
||
|
"""
|
||
|
if self.is_integer() or is_index_slice:
|
||
|
self._validate_indexer("slice", key.start, "getitem")
|
||
|
self._validate_indexer("slice", key.stop, "getitem")
|
||
|
self._validate_indexer("slice", key.step, "getitem")
|
||
|
return key
|
||
|
|
||
|
# convert the slice to an indexer here
|
||
|
|
||
|
# if we are mixed and have integers
|
||
|
if is_positional:
|
||
|
try:
|
||
|
# Validate start & stop
|
||
|
if start is not None:
|
||
|
self.get_loc(start)
|
||
|
if stop is not None:
|
||
|
self.get_loc(stop)
|
||
|
is_positional = False
|
||
|
except KeyError:
|
||
|
pass
|
||
|
|
||
|
if com.is_null_slice(key):
|
||
|
# It doesn't matter if we are positional or label based
|
||
|
indexer = key
|
||
|
elif is_positional:
|
||
|
if kind == "loc":
|
||
|
# GH#16121, GH#24612, GH#31810
|
||
|
warnings.warn(
|
||
|
"Slicing a positional slice with .loc is not supported, "
|
||
|
"and will raise TypeError in a future version. "
|
||
|
"Use .loc with labels or .iloc with positions instead.",
|
||
|
FutureWarning,
|
||
|
stacklevel=6,
|
||
|
)
|
||
|
indexer = key
|
||
|
else:
|
||
|
indexer = self.slice_indexer(start, stop, step, kind=kind)
|
||
|
|
||
|
return indexer
|
||
|
|
||
|
def _convert_listlike_indexer(self, keyarr):
|
||
|
"""
|
||
|
Parameters
|
||
|
----------
|
||
|
keyarr : list-like
|
||
|
Indexer to convert.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
indexer : numpy.ndarray or None
|
||
|
Return an ndarray or None if cannot convert.
|
||
|
keyarr : numpy.ndarray
|
||
|
Return tuple-safe keys.
|
||
|
"""
|
||
|
if isinstance(keyarr, Index):
|
||
|
pass
|
||
|
else:
|
||
|
keyarr = self._convert_arr_indexer(keyarr)
|
||
|
|
||
|
indexer = self._convert_list_indexer(keyarr)
|
||
|
return indexer, keyarr
|
||
|
|
||
|
def _convert_arr_indexer(self, keyarr):
|
||
|
"""
|
||
|
Convert an array-like indexer to the appropriate dtype.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
keyarr : array-like
|
||
|
Indexer to convert.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
converted_keyarr : array-like
|
||
|
"""
|
||
|
keyarr = com.asarray_tuplesafe(keyarr)
|
||
|
return keyarr
|
||
|
|
||
|
def _convert_list_indexer(self, keyarr):
|
||
|
"""
|
||
|
Convert a list-like indexer to the appropriate dtype.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
keyarr : Index (or sub-class)
|
||
|
Indexer to convert.
|
||
|
kind : iloc, loc, optional
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
positional indexer or None
|
||
|
"""
|
||
|
return None
|
||
|
|
||
|
@final
|
||
|
def _invalid_indexer(self, form: str_t, key) -> TypeError:
|
||
|
"""
|
||
|
Consistent invalid indexer message.
|
||
|
"""
|
||
|
return TypeError(
|
||
|
f"cannot do {form} indexing on {type(self).__name__} with these "
|
||
|
f"indexers [{key}] of type {type(key).__name__}"
|
||
|
)
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Reindex Methods
|
||
|
|
||
|
@final
|
||
|
def _can_reindex(self, indexer):
|
||
|
"""
|
||
|
Check if we are allowing reindexing with this particular indexer.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
indexer : an integer indexer
|
||
|
|
||
|
Raises
|
||
|
------
|
||
|
ValueError if its a duplicate axis
|
||
|
"""
|
||
|
# trying to reindex on an axis with duplicates
|
||
|
if not self._index_as_unique and len(indexer):
|
||
|
raise ValueError("cannot reindex from a duplicate axis")
|
||
|
|
||
|
def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
|
||
|
"""
|
||
|
Create index with target's values.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
target : an iterable
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
new_index : pd.Index
|
||
|
Resulting index.
|
||
|
indexer : np.ndarray or None
|
||
|
Indices of output values in original index.
|
||
|
"""
|
||
|
# GH6552: preserve names when reindexing to non-named target
|
||
|
# (i.e. neither Index nor Series).
|
||
|
preserve_names = not hasattr(target, "name")
|
||
|
|
||
|
# GH7774: preserve dtype/tz if target is empty and not an Index.
|
||
|
target = ensure_has_len(target) # target may be an iterator
|
||
|
|
||
|
if not isinstance(target, Index) and len(target) == 0:
|
||
|
target = self[:0]
|
||
|
else:
|
||
|
target = ensure_index(target)
|
||
|
|
||
|
if level is not None:
|
||
|
if method is not None:
|
||
|
raise TypeError("Fill method not supported if level passed")
|
||
|
_, indexer, _ = self._join_level(
|
||
|
target, level, how="right", return_indexers=True
|
||
|
)
|
||
|
else:
|
||
|
if self.equals(target):
|
||
|
indexer = None
|
||
|
else:
|
||
|
if self._index_as_unique:
|
||
|
indexer = self.get_indexer(
|
||
|
target, method=method, limit=limit, tolerance=tolerance
|
||
|
)
|
||
|
else:
|
||
|
if method is not None or limit is not None:
|
||
|
raise ValueError(
|
||
|
"cannot reindex a non-unique index "
|
||
|
"with a method or limit"
|
||
|
)
|
||
|
indexer, missing = self.get_indexer_non_unique(target)
|
||
|
|
||
|
if preserve_names and target.nlevels == 1 and target.name != self.name:
|
||
|
target = target.copy()
|
||
|
target.name = self.name
|
||
|
|
||
|
return target, indexer
|
||
|
|
||
|
def _reindex_non_unique(self, target):
|
||
|
"""
|
||
|
Create a new index with target's values (move/add/delete values as
|
||
|
necessary) use with non-unique Index and a possibly non-unique target.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
target : an iterable
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
new_index : pd.Index
|
||
|
Resulting index.
|
||
|
indexer : np.ndarray or None
|
||
|
Indices of output values in original index.
|
||
|
|
||
|
"""
|
||
|
target = ensure_index(target)
|
||
|
if len(target) == 0:
|
||
|
# GH#13691
|
||
|
return self[:0], np.array([], dtype=np.intp), None
|
||
|
|
||
|
indexer, missing = self.get_indexer_non_unique(target)
|
||
|
check = indexer != -1
|
||
|
new_labels = self.take(indexer[check])
|
||
|
new_indexer = None
|
||
|
|
||
|
if len(missing):
|
||
|
length = np.arange(len(indexer))
|
||
|
|
||
|
missing = ensure_platform_int(missing)
|
||
|
missing_labels = target.take(missing)
|
||
|
missing_indexer = ensure_int64(length[~check])
|
||
|
cur_labels = self.take(indexer[check]).values
|
||
|
cur_indexer = ensure_int64(length[check])
|
||
|
|
||
|
new_labels = np.empty((len(indexer),), dtype=object)
|
||
|
new_labels[cur_indexer] = cur_labels
|
||
|
new_labels[missing_indexer] = missing_labels
|
||
|
|
||
|
# a unique indexer
|
||
|
if target.is_unique:
|
||
|
|
||
|
# see GH5553, make sure we use the right indexer
|
||
|
new_indexer = np.arange(len(indexer))
|
||
|
new_indexer[cur_indexer] = np.arange(len(cur_labels))
|
||
|
new_indexer[missing_indexer] = -1
|
||
|
|
||
|
# we have a non_unique selector, need to use the original
|
||
|
# indexer here
|
||
|
else:
|
||
|
|
||
|
# need to retake to have the same size as the indexer
|
||
|
indexer[~check] = -1
|
||
|
|
||
|
# reset the new indexer to account for the new size
|
||
|
new_indexer = np.arange(len(self.take(indexer)))
|
||
|
new_indexer[~check] = -1
|
||
|
|
||
|
if isinstance(self, ABCMultiIndex):
|
||
|
new_index = type(self).from_tuples(new_labels, names=self.names)
|
||
|
else:
|
||
|
new_index = Index(new_labels, name=self.name)
|
||
|
return new_index, indexer, new_indexer
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Join Methods
|
||
|
|
||
|
def join(self, other, how="left", level=None, return_indexers=False, sort=False):
|
||
|
"""
|
||
|
Compute join_index and indexers to conform data
|
||
|
structures to the new index.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
other : Index
|
||
|
how : {'left', 'right', 'inner', 'outer'}
|
||
|
level : int or level name, default None
|
||
|
return_indexers : bool, default False
|
||
|
sort : bool, default False
|
||
|
Sort the join keys lexicographically in the result Index. If False,
|
||
|
the order of the join keys depends on the join type (how keyword).
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
join_index, (left_indexer, right_indexer)
|
||
|
"""
|
||
|
other = ensure_index(other)
|
||
|
self_is_mi = isinstance(self, ABCMultiIndex)
|
||
|
other_is_mi = isinstance(other, ABCMultiIndex)
|
||
|
|
||
|
# try to figure out the join level
|
||
|
# GH3662
|
||
|
if level is None and (self_is_mi or other_is_mi):
|
||
|
|
||
|
# have the same levels/names so a simple join
|
||
|
if self.names == other.names:
|
||
|
pass
|
||
|
else:
|
||
|
return self._join_multi(other, how=how, return_indexers=return_indexers)
|
||
|
|
||
|
# join on the level
|
||
|
if level is not None and (self_is_mi or other_is_mi):
|
||
|
return self._join_level(
|
||
|
other, level, how=how, return_indexers=return_indexers
|
||
|
)
|
||
|
|
||
|
if len(other) == 0 and how in ("left", "outer"):
|
||
|
join_index = self._shallow_copy()
|
||
|
if return_indexers:
|
||
|
rindexer = np.repeat(-1, len(join_index))
|
||
|
return join_index, None, rindexer
|
||
|
else:
|
||
|
return join_index
|
||
|
|
||
|
if len(self) == 0 and how in ("right", "outer"):
|
||
|
join_index = other._shallow_copy()
|
||
|
if return_indexers:
|
||
|
lindexer = np.repeat(-1, len(join_index))
|
||
|
return join_index, lindexer, None
|
||
|
else:
|
||
|
return join_index
|
||
|
|
||
|
if self._join_precedence < other._join_precedence:
|
||
|
how = {"right": "left", "left": "right"}.get(how, how)
|
||
|
result = other.join(
|
||
|
self, how=how, level=level, return_indexers=return_indexers
|
||
|
)
|
||
|
if return_indexers:
|
||
|
x, y, z = result
|
||
|
result = x, z, y
|
||
|
return result
|
||
|
|
||
|
if not is_dtype_equal(self.dtype, other.dtype):
|
||
|
this = self.astype("O")
|
||
|
other = other.astype("O")
|
||
|
return this.join(other, how=how, return_indexers=return_indexers)
|
||
|
|
||
|
_validate_join_method(how)
|
||
|
|
||
|
if not self.is_unique and not other.is_unique:
|
||
|
return self._join_non_unique(
|
||
|
other, how=how, return_indexers=return_indexers
|
||
|
)
|
||
|
elif not self.is_unique or not other.is_unique:
|
||
|
if self.is_monotonic and other.is_monotonic:
|
||
|
return self._join_monotonic(
|
||
|
other, how=how, return_indexers=return_indexers
|
||
|
)
|
||
|
else:
|
||
|
return self._join_non_unique(
|
||
|
other, how=how, return_indexers=return_indexers
|
||
|
)
|
||
|
elif self.is_monotonic and other.is_monotonic:
|
||
|
try:
|
||
|
return self._join_monotonic(
|
||
|
other, how=how, return_indexers=return_indexers
|
||
|
)
|
||
|
except TypeError:
|
||
|
pass
|
||
|
|
||
|
if how == "left":
|
||
|
join_index = self
|
||
|
elif how == "right":
|
||
|
join_index = other
|
||
|
elif how == "inner":
|
||
|
# TODO: sort=False here for backwards compat. It may
|
||
|
# be better to use the sort parameter passed into join
|
||
|
join_index = self.intersection(other, sort=False)
|
||
|
elif how == "outer":
|
||
|
# TODO: sort=True here for backwards compat. It may
|
||
|
# be better to use the sort parameter passed into join
|
||
|
join_index = self.union(other)
|
||
|
|
||
|
if sort:
|
||
|
join_index = join_index.sort_values()
|
||
|
|
||
|
if return_indexers:
|
||
|
if join_index is self:
|
||
|
lindexer = None
|
||
|
else:
|
||
|
lindexer = self.get_indexer(join_index)
|
||
|
if join_index is other:
|
||
|
rindexer = None
|
||
|
else:
|
||
|
rindexer = other.get_indexer(join_index)
|
||
|
return join_index, lindexer, rindexer
|
||
|
else:
|
||
|
return join_index
|
||
|
|
||
|
@final
|
||
|
def _join_multi(self, other, how, return_indexers=True):
|
||
|
from pandas.core.indexes.multi import MultiIndex
|
||
|
from pandas.core.reshape.merge import restore_dropped_levels_multijoin
|
||
|
|
||
|
# figure out join names
|
||
|
self_names_list = list(com.not_none(*self.names))
|
||
|
other_names_list = list(com.not_none(*other.names))
|
||
|
self_names_order = self_names_list.index
|
||
|
other_names_order = other_names_list.index
|
||
|
self_names = set(self_names_list)
|
||
|
other_names = set(other_names_list)
|
||
|
overlap = self_names & other_names
|
||
|
|
||
|
# need at least 1 in common
|
||
|
if not overlap:
|
||
|
raise ValueError("cannot join with no overlapping index names")
|
||
|
|
||
|
if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
|
||
|
|
||
|
# Drop the non-matching levels from left and right respectively
|
||
|
ldrop_names = sorted(self_names - overlap, key=self_names_order)
|
||
|
rdrop_names = sorted(other_names - overlap, key=other_names_order)
|
||
|
|
||
|
# if only the order differs
|
||
|
if not len(ldrop_names + rdrop_names):
|
||
|
self_jnlevels = self
|
||
|
other_jnlevels = other.reorder_levels(self.names)
|
||
|
else:
|
||
|
self_jnlevels = self.droplevel(ldrop_names)
|
||
|
other_jnlevels = other.droplevel(rdrop_names)
|
||
|
|
||
|
# Join left and right
|
||
|
# Join on same leveled multi-index frames is supported
|
||
|
join_idx, lidx, ridx = self_jnlevels.join(
|
||
|
other_jnlevels, how, return_indexers=True
|
||
|
)
|
||
|
|
||
|
# Restore the dropped levels
|
||
|
# Returned index level order is
|
||
|
# common levels, ldrop_names, rdrop_names
|
||
|
dropped_names = ldrop_names + rdrop_names
|
||
|
|
||
|
levels, codes, names = restore_dropped_levels_multijoin(
|
||
|
self, other, dropped_names, join_idx, lidx, ridx
|
||
|
)
|
||
|
|
||
|
# Re-create the multi-index
|
||
|
multi_join_idx = MultiIndex(
|
||
|
levels=levels, codes=codes, names=names, verify_integrity=False
|
||
|
)
|
||
|
|
||
|
multi_join_idx = multi_join_idx.remove_unused_levels()
|
||
|
|
||
|
if return_indexers:
|
||
|
return multi_join_idx, lidx, ridx
|
||
|
else:
|
||
|
return multi_join_idx
|
||
|
|
||
|
jl = list(overlap)[0]
|
||
|
|
||
|
# Case where only one index is multi
|
||
|
# make the indices into mi's that match
|
||
|
flip_order = False
|
||
|
if isinstance(self, MultiIndex):
|
||
|
self, other = other, self
|
||
|
flip_order = True
|
||
|
# flip if join method is right or left
|
||
|
how = {"right": "left", "left": "right"}.get(how, how)
|
||
|
|
||
|
level = other.names.index(jl)
|
||
|
result = self._join_level(
|
||
|
other, level, how=how, return_indexers=return_indexers
|
||
|
)
|
||
|
|
||
|
if flip_order:
|
||
|
if isinstance(result, tuple):
|
||
|
return result[0], result[2], result[1]
|
||
|
return result
|
||
|
|
||
|
@final
|
||
|
def _join_non_unique(self, other, how="left", return_indexers=False):
|
||
|
from pandas.core.reshape.merge import get_join_indexers
|
||
|
|
||
|
# We only get here if dtypes match
|
||
|
assert self.dtype == other.dtype
|
||
|
|
||
|
lvalues = self._get_engine_target()
|
||
|
rvalues = other._get_engine_target()
|
||
|
|
||
|
left_idx, right_idx = get_join_indexers(
|
||
|
[lvalues], [rvalues], how=how, sort=True
|
||
|
)
|
||
|
|
||
|
left_idx = ensure_platform_int(left_idx)
|
||
|
right_idx = ensure_platform_int(right_idx)
|
||
|
|
||
|
join_index = np.asarray(lvalues.take(left_idx))
|
||
|
mask = left_idx == -1
|
||
|
np.putmask(join_index, mask, rvalues.take(right_idx))
|
||
|
|
||
|
join_index = self._wrap_joined_index(join_index, other)
|
||
|
|
||
|
if return_indexers:
|
||
|
return join_index, left_idx, right_idx
|
||
|
else:
|
||
|
return join_index
|
||
|
|
||
|
@final
|
||
|
def _join_level(
|
||
|
self, other, level, how="left", return_indexers=False, keep_order=True
|
||
|
):
|
||
|
"""
|
||
|
The join method *only* affects the level of the resulting
|
||
|
MultiIndex. Otherwise it just exactly aligns the Index data to the
|
||
|
labels of the level in the MultiIndex.
|
||
|
|
||
|
If ```keep_order == True```, the order of the data indexed by the
|
||
|
MultiIndex will not be changed; otherwise, it will tie out
|
||
|
with `other`.
|
||
|
"""
|
||
|
from pandas.core.indexes.multi import MultiIndex
|
||
|
|
||
|
def _get_leaf_sorter(labels):
|
||
|
"""
|
||
|
Returns sorter for the inner most level while preserving the
|
||
|
order of higher levels.
|
||
|
"""
|
||
|
if labels[0].size == 0:
|
||
|
return np.empty(0, dtype="int64")
|
||
|
|
||
|
if len(labels) == 1:
|
||
|
lab = ensure_int64(labels[0])
|
||
|
sorter, _ = libalgos.groupsort_indexer(lab, 1 + lab.max())
|
||
|
return sorter
|
||
|
|
||
|
# find indexers of beginning of each set of
|
||
|
# same-key labels w.r.t all but last level
|
||
|
tic = labels[0][:-1] != labels[0][1:]
|
||
|
for lab in labels[1:-1]:
|
||
|
tic |= lab[:-1] != lab[1:]
|
||
|
|
||
|
starts = np.hstack(([True], tic, [True])).nonzero()[0]
|
||
|
lab = ensure_int64(labels[-1])
|
||
|
return lib.get_level_sorter(lab, ensure_int64(starts))
|
||
|
|
||
|
if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
|
||
|
raise TypeError("Join on level between two MultiIndex objects is ambiguous")
|
||
|
|
||
|
left, right = self, other
|
||
|
|
||
|
flip_order = not isinstance(self, MultiIndex)
|
||
|
if flip_order:
|
||
|
left, right = right, left
|
||
|
how = {"right": "left", "left": "right"}.get(how, how)
|
||
|
|
||
|
assert isinstance(left, MultiIndex)
|
||
|
|
||
|
level = left._get_level_number(level)
|
||
|
old_level = left.levels[level]
|
||
|
|
||
|
if not right.is_unique:
|
||
|
raise NotImplementedError(
|
||
|
"Index._join_level on non-unique index is not implemented"
|
||
|
)
|
||
|
|
||
|
new_level, left_lev_indexer, right_lev_indexer = old_level.join(
|
||
|
right, how=how, return_indexers=True
|
||
|
)
|
||
|
|
||
|
if left_lev_indexer is None:
|
||
|
if keep_order or len(left) == 0:
|
||
|
left_indexer = None
|
||
|
join_index = left
|
||
|
else: # sort the leaves
|
||
|
left_indexer = _get_leaf_sorter(left.codes[: level + 1])
|
||
|
join_index = left[left_indexer]
|
||
|
|
||
|
else:
|
||
|
left_lev_indexer = ensure_int64(left_lev_indexer)
|
||
|
rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level))
|
||
|
old_codes = left.codes[level]
|
||
|
new_lev_codes = algos.take_nd(
|
||
|
rev_indexer, old_codes[old_codes != -1], allow_fill=False
|
||
|
)
|
||
|
|
||
|
new_codes = list(left.codes)
|
||
|
new_codes[level] = new_lev_codes
|
||
|
|
||
|
new_levels = list(left.levels)
|
||
|
new_levels[level] = new_level
|
||
|
|
||
|
if keep_order: # just drop missing values. o.w. keep order
|
||
|
left_indexer = np.arange(len(left), dtype=np.intp)
|
||
|
mask = new_lev_codes != -1
|
||
|
if not mask.all():
|
||
|
new_codes = [lab[mask] for lab in new_codes]
|
||
|
left_indexer = left_indexer[mask]
|
||
|
|
||
|
else: # tie out the order with other
|
||
|
if level == 0: # outer most level, take the fast route
|
||
|
ngroups = 1 + new_lev_codes.max()
|
||
|
left_indexer, counts = libalgos.groupsort_indexer(
|
||
|
new_lev_codes, ngroups
|
||
|
)
|
||
|
|
||
|
# missing values are placed first; drop them!
|
||
|
left_indexer = left_indexer[counts[0] :]
|
||
|
new_codes = [lab[left_indexer] for lab in new_codes]
|
||
|
|
||
|
else: # sort the leaves
|
||
|
mask = new_lev_codes != -1
|
||
|
mask_all = mask.all()
|
||
|
if not mask_all:
|
||
|
new_codes = [lab[mask] for lab in new_codes]
|
||
|
|
||
|
left_indexer = _get_leaf_sorter(new_codes[: level + 1])
|
||
|
new_codes = [lab[left_indexer] for lab in new_codes]
|
||
|
|
||
|
# left_indexers are w.r.t masked frame.
|
||
|
# reverse to original frame!
|
||
|
if not mask_all:
|
||
|
left_indexer = mask.nonzero()[0][left_indexer]
|
||
|
|
||
|
join_index = MultiIndex(
|
||
|
levels=new_levels,
|
||
|
codes=new_codes,
|
||
|
names=left.names,
|
||
|
verify_integrity=False,
|
||
|
)
|
||
|
|
||
|
if right_lev_indexer is not None:
|
||
|
right_indexer = algos.take_nd(
|
||
|
right_lev_indexer, join_index.codes[level], allow_fill=False
|
||
|
)
|
||
|
else:
|
||
|
right_indexer = join_index.codes[level]
|
||
|
|
||
|
if flip_order:
|
||
|
left_indexer, right_indexer = right_indexer, left_indexer
|
||
|
|
||
|
if return_indexers:
|
||
|
left_indexer = (
|
||
|
None if left_indexer is None else ensure_platform_int(left_indexer)
|
||
|
)
|
||
|
right_indexer = (
|
||
|
None if right_indexer is None else ensure_platform_int(right_indexer)
|
||
|
)
|
||
|
return join_index, left_indexer, right_indexer
|
||
|
else:
|
||
|
return join_index
|
||
|
|
||
|
@final
|
||
|
def _join_monotonic(self, other, how="left", return_indexers=False):
|
||
|
# We only get here with matching dtypes
|
||
|
assert other.dtype == self.dtype
|
||
|
|
||
|
if self.equals(other):
|
||
|
ret_index = other if how == "right" else self
|
||
|
if return_indexers:
|
||
|
return ret_index, None, None
|
||
|
else:
|
||
|
return ret_index
|
||
|
|
||
|
sv = self._get_engine_target()
|
||
|
ov = other._get_engine_target()
|
||
|
|
||
|
if self.is_unique and other.is_unique:
|
||
|
# We can perform much better than the general case
|
||
|
if how == "left":
|
||
|
join_index = self
|
||
|
lidx = None
|
||
|
ridx = self._left_indexer_unique(sv, ov)
|
||
|
elif how == "right":
|
||
|
join_index = other
|
||
|
lidx = self._left_indexer_unique(ov, sv)
|
||
|
ridx = None
|
||
|
elif how == "inner":
|
||
|
join_index, lidx, ridx = self._inner_indexer(sv, ov)
|
||
|
join_index = self._wrap_joined_index(join_index, other)
|
||
|
elif how == "outer":
|
||
|
join_index, lidx, ridx = self._outer_indexer(sv, ov)
|
||
|
join_index = self._wrap_joined_index(join_index, other)
|
||
|
else:
|
||
|
if how == "left":
|
||
|
join_index, lidx, ridx = self._left_indexer(sv, ov)
|
||
|
elif how == "right":
|
||
|
join_index, ridx, lidx = self._left_indexer(ov, sv)
|
||
|
elif how == "inner":
|
||
|
join_index, lidx, ridx = self._inner_indexer(sv, ov)
|
||
|
elif how == "outer":
|
||
|
join_index, lidx, ridx = self._outer_indexer(sv, ov)
|
||
|
join_index = self._wrap_joined_index(join_index, other)
|
||
|
|
||
|
if return_indexers:
|
||
|
lidx = None if lidx is None else ensure_platform_int(lidx)
|
||
|
ridx = None if ridx is None else ensure_platform_int(ridx)
|
||
|
return join_index, lidx, ridx
|
||
|
else:
|
||
|
return join_index
|
||
|
|
||
|
def _wrap_joined_index(
|
||
|
self: _IndexT, joined: np.ndarray, other: _IndexT
|
||
|
) -> _IndexT:
|
||
|
assert other.dtype == self.dtype
|
||
|
|
||
|
if isinstance(self, ABCMultiIndex):
|
||
|
name = self.names if self.names == other.names else None
|
||
|
else:
|
||
|
name = get_op_result_name(self, other)
|
||
|
return self._constructor(joined, name=name)
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Uncategorized Methods
|
||
|
|
||
|
@property
|
||
|
def values(self) -> np.ndarray:
|
||
|
"""
|
||
|
Return an array representing the data in the Index.
|
||
|
|
||
|
.. warning::
|
||
|
|
||
|
We recommend using :attr:`Index.array` or
|
||
|
:meth:`Index.to_numpy`, depending on whether you need
|
||
|
a reference to the underlying data or a NumPy array.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
array: numpy.ndarray or ExtensionArray
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Index.array : Reference to the underlying data.
|
||
|
Index.to_numpy : A NumPy array representing the underlying data.
|
||
|
"""
|
||
|
return self._data.view(np.ndarray)
|
||
|
|
||
|
@cache_readonly
|
||
|
@doc(IndexOpsMixin.array)
|
||
|
def array(self) -> ExtensionArray:
|
||
|
array = self._data
|
||
|
if isinstance(array, np.ndarray):
|
||
|
from pandas.core.arrays.numpy_ import PandasArray
|
||
|
|
||
|
array = PandasArray(array)
|
||
|
return array
|
||
|
|
||
|
@property
|
||
|
def _values(self) -> Union[ExtensionArray, np.ndarray]:
|
||
|
"""
|
||
|
The best array representation.
|
||
|
|
||
|
This is an ndarray or ExtensionArray.
|
||
|
|
||
|
``_values`` are consistent between ``Series`` and ``Index``.
|
||
|
|
||
|
It may differ from the public '.values' method.
|
||
|
|
||
|
index | values | _values |
|
||
|
----------------- | --------------- | ------------- |
|
||
|
Index | ndarray | ndarray |
|
||
|
CategoricalIndex | Categorical | Categorical |
|
||
|
DatetimeIndex | ndarray[M8ns] | DatetimeArray |
|
||
|
DatetimeIndex[tz] | ndarray[M8ns] | DatetimeArray |
|
||
|
PeriodIndex | ndarray[object] | PeriodArray |
|
||
|
IntervalIndex | IntervalArray | IntervalArray |
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
values : Values
|
||
|
"""
|
||
|
return self._data
|
||
|
|
||
|
def _get_engine_target(self) -> np.ndarray:
|
||
|
"""
|
||
|
Get the ndarray that we can pass to the IndexEngine constructor.
|
||
|
"""
|
||
|
return self._values
|
||
|
|
||
|
@doc(IndexOpsMixin.memory_usage)
|
||
|
def memory_usage(self, deep: bool = False) -> int:
|
||
|
result = super().memory_usage(deep=deep)
|
||
|
|
||
|
# include our engine hashtable
|
||
|
result += self._engine.sizeof(deep=deep)
|
||
|
return result
|
||
|
|
||
|
def where(self, cond, other=None):
|
||
|
"""
|
||
|
Replace values where the condition is False.
|
||
|
|
||
|
The replacement is taken from other.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
cond : bool array-like with the same length as self
|
||
|
Condition to select the values on.
|
||
|
other : scalar, or array-like, default None
|
||
|
Replacement if the condition is False.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
pandas.Index
|
||
|
A copy of self with values replaced from other
|
||
|
where the condition is False.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Series.where : Same method for Series.
|
||
|
DataFrame.where : Same method for DataFrame.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index(['car', 'bike', 'train', 'tractor'])
|
||
|
>>> idx
|
||
|
Index(['car', 'bike', 'train', 'tractor'], dtype='object')
|
||
|
>>> idx.where(idx.isin(['car', 'train']), 'other')
|
||
|
Index(['car', 'other', 'train', 'other'], dtype='object')
|
||
|
"""
|
||
|
if other is None:
|
||
|
other = self._na_value
|
||
|
|
||
|
values = self.values
|
||
|
|
||
|
try:
|
||
|
self._validate_fill_value(other)
|
||
|
except (ValueError, TypeError):
|
||
|
return self.astype(object).where(cond, other)
|
||
|
|
||
|
values = np.where(cond, values, other)
|
||
|
|
||
|
return Index(values, name=self.name)
|
||
|
|
||
|
# construction helpers
|
||
|
@final
|
||
|
@classmethod
|
||
|
def _scalar_data_error(cls, data):
|
||
|
# We return the TypeError so that we can raise it from the constructor
|
||
|
# in order to keep mypy happy
|
||
|
return TypeError(
|
||
|
f"{cls.__name__}(...) must be called with a collection of some "
|
||
|
f"kind, {repr(data)} was passed"
|
||
|
)
|
||
|
|
||
|
@final
|
||
|
@classmethod
|
||
|
def _string_data_error(cls, data):
|
||
|
raise TypeError(
|
||
|
"String dtype not supported, you may need "
|
||
|
"to explicitly cast to a numeric type"
|
||
|
)
|
||
|
|
||
|
@final
|
||
|
def _coerce_scalar_to_index(self, item):
|
||
|
"""
|
||
|
We need to coerce a scalar to a compat for our index type.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
item : scalar item to coerce
|
||
|
"""
|
||
|
dtype = self.dtype
|
||
|
|
||
|
if self._is_numeric_dtype and isna(item):
|
||
|
# We can't coerce to the numeric dtype of "self" (unless
|
||
|
# it's float) if there are NaN values in our output.
|
||
|
dtype = None
|
||
|
|
||
|
return Index([item], dtype=dtype, **self._get_attributes_dict())
|
||
|
|
||
|
def _validate_fill_value(self, value):
|
||
|
"""
|
||
|
Check if the value can be inserted into our array, and convert
|
||
|
it to an appropriate native type if necessary.
|
||
|
"""
|
||
|
return value
|
||
|
|
||
|
@final
|
||
|
def _require_scalar(self, value):
|
||
|
"""
|
||
|
Check that this is a scalar value that we can use for setitem-like
|
||
|
operations without changing dtype.
|
||
|
"""
|
||
|
if not is_scalar(value):
|
||
|
raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}")
|
||
|
return value
|
||
|
|
||
|
@property
|
||
|
def _has_complex_internals(self) -> bool:
|
||
|
"""
|
||
|
Indicates if an index is not directly backed by a numpy array
|
||
|
"""
|
||
|
# used to avoid libreduction code paths, which raise or require conversion
|
||
|
return False
|
||
|
|
||
|
def _is_memory_usage_qualified(self) -> bool:
|
||
|
"""
|
||
|
Return a boolean if we need a qualified .info display.
|
||
|
"""
|
||
|
return self.is_object()
|
||
|
|
||
|
def is_type_compatible(self, kind: str_t) -> bool:
|
||
|
"""
|
||
|
Whether the index type is compatible with the provided type.
|
||
|
"""
|
||
|
return kind == self.inferred_type
|
||
|
|
||
|
def __contains__(self, key: Any) -> bool:
|
||
|
"""
|
||
|
Return a boolean indicating whether the provided key is in the index.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
key : label
|
||
|
The key to check if it is present in the index.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
bool
|
||
|
Whether the key search is in the index.
|
||
|
|
||
|
Raises
|
||
|
------
|
||
|
TypeError
|
||
|
If the key is not hashable.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Index.isin : Returns an ndarray of boolean dtype indicating whether the
|
||
|
list-like key is in the index.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index([1, 2, 3, 4])
|
||
|
>>> idx
|
||
|
Int64Index([1, 2, 3, 4], dtype='int64')
|
||
|
|
||
|
>>> 2 in idx
|
||
|
True
|
||
|
>>> 6 in idx
|
||
|
False
|
||
|
"""
|
||
|
hash(key)
|
||
|
try:
|
||
|
return key in self._engine
|
||
|
except (OverflowError, TypeError, ValueError):
|
||
|
return False
|
||
|
|
||
|
@final
|
||
|
def __hash__(self):
|
||
|
raise TypeError(f"unhashable type: {repr(type(self).__name__)}")
|
||
|
|
||
|
@final
|
||
|
def __setitem__(self, key, value):
|
||
|
raise TypeError("Index does not support mutable operations")
|
||
|
|
||
|
def __getitem__(self, key):
|
||
|
"""
|
||
|
Override numpy.ndarray's __getitem__ method to work as desired.
|
||
|
|
||
|
This function adds lists and Series as valid boolean indexers
|
||
|
(ndarrays only supports ndarray with dtype=bool).
|
||
|
|
||
|
If resulting ndim != 1, plain ndarray is returned instead of
|
||
|
corresponding `Index` subclass.
|
||
|
|
||
|
"""
|
||
|
# There's no custom logic to be implemented in __getslice__, so it's
|
||
|
# not overloaded intentionally.
|
||
|
getitem = self._data.__getitem__
|
||
|
promote = self._shallow_copy
|
||
|
|
||
|
if is_scalar(key):
|
||
|
key = com.cast_scalar_indexer(key, warn_float=True)
|
||
|
return getitem(key)
|
||
|
|
||
|
if isinstance(key, slice):
|
||
|
# This case is separated from the conditional above to avoid
|
||
|
# pessimization of basic indexing.
|
||
|
return promote(getitem(key))
|
||
|
|
||
|
if com.is_bool_indexer(key):
|
||
|
key = np.asarray(key, dtype=bool)
|
||
|
|
||
|
result = getitem(key)
|
||
|
if not is_scalar(result):
|
||
|
if np.ndim(result) > 1:
|
||
|
deprecate_ndim_indexing(result)
|
||
|
return result
|
||
|
return promote(result)
|
||
|
else:
|
||
|
return result
|
||
|
|
||
|
@final
|
||
|
def _can_hold_identifiers_and_holds_name(self, name) -> bool:
|
||
|
"""
|
||
|
Faster check for ``name in self`` when we know `name` is a Python
|
||
|
identifier (e.g. in NDFrame.__getattr__, which hits this to support
|
||
|
. key lookup). For indexes that can't hold identifiers (everything
|
||
|
but object & categorical) we just return False.
|
||
|
|
||
|
https://github.com/pandas-dev/pandas/issues/19764
|
||
|
"""
|
||
|
if self.is_object() or self.is_categorical():
|
||
|
return name in self
|
||
|
return False
|
||
|
|
||
|
def append(self, other):
|
||
|
"""
|
||
|
Append a collection of Index options together.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
other : Index or list/tuple of indices
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
appended : Index
|
||
|
"""
|
||
|
to_concat = [self]
|
||
|
|
||
|
if isinstance(other, (list, tuple)):
|
||
|
to_concat = to_concat + list(other)
|
||
|
else:
|
||
|
to_concat.append(other)
|
||
|
|
||
|
for obj in to_concat:
|
||
|
if not isinstance(obj, Index):
|
||
|
raise TypeError("all inputs must be Index")
|
||
|
|
||
|
names = {obj.name for obj in to_concat}
|
||
|
name = None if len(names) > 1 else self.name
|
||
|
|
||
|
return self._concat(to_concat, name)
|
||
|
|
||
|
def _concat(self, to_concat: List["Index"], name: Label) -> "Index":
|
||
|
"""
|
||
|
Concatenate multiple Index objects.
|
||
|
"""
|
||
|
to_concat_vals = [x._values for x in to_concat]
|
||
|
|
||
|
result = concat_compat(to_concat_vals)
|
||
|
return Index(result, name=name)
|
||
|
|
||
|
def putmask(self, mask, value):
|
||
|
"""
|
||
|
Return a new Index of the values set with the mask.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Index
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
numpy.ndarray.putmask : Changes elements of an array
|
||
|
based on conditional and input values.
|
||
|
"""
|
||
|
values = self._values.copy()
|
||
|
try:
|
||
|
converted = self._validate_fill_value(value)
|
||
|
except (ValueError, TypeError) as err:
|
||
|
if is_object_dtype(self):
|
||
|
raise err
|
||
|
|
||
|
# coerces to object
|
||
|
return self.astype(object).putmask(mask, value)
|
||
|
|
||
|
np.putmask(values, mask, converted)
|
||
|
return self._shallow_copy(values)
|
||
|
|
||
|
def equals(self, other: object) -> bool:
|
||
|
"""
|
||
|
Determine if two Index object are equal.
|
||
|
|
||
|
The things that are being compared are:
|
||
|
|
||
|
* The elements inside the Index object.
|
||
|
* The order of the elements inside the Index object.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
other : Any
|
||
|
The other object to compare against.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
bool
|
||
|
True if "other" is an Index and it has the same elements and order
|
||
|
as the calling index; False otherwise.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx1 = pd.Index([1, 2, 3])
|
||
|
>>> idx1
|
||
|
Int64Index([1, 2, 3], dtype='int64')
|
||
|
>>> idx1.equals(pd.Index([1, 2, 3]))
|
||
|
True
|
||
|
|
||
|
The elements inside are compared
|
||
|
|
||
|
>>> idx2 = pd.Index(["1", "2", "3"])
|
||
|
>>> idx2
|
||
|
Index(['1', '2', '3'], dtype='object')
|
||
|
|
||
|
>>> idx1.equals(idx2)
|
||
|
False
|
||
|
|
||
|
The order is compared
|
||
|
|
||
|
>>> ascending_idx = pd.Index([1, 2, 3])
|
||
|
>>> ascending_idx
|
||
|
Int64Index([1, 2, 3], dtype='int64')
|
||
|
>>> descending_idx = pd.Index([3, 2, 1])
|
||
|
>>> descending_idx
|
||
|
Int64Index([3, 2, 1], dtype='int64')
|
||
|
>>> ascending_idx.equals(descending_idx)
|
||
|
False
|
||
|
|
||
|
The dtype is *not* compared
|
||
|
|
||
|
>>> int64_idx = pd.Int64Index([1, 2, 3])
|
||
|
>>> int64_idx
|
||
|
Int64Index([1, 2, 3], dtype='int64')
|
||
|
>>> uint64_idx = pd.UInt64Index([1, 2, 3])
|
||
|
>>> uint64_idx
|
||
|
UInt64Index([1, 2, 3], dtype='uint64')
|
||
|
>>> int64_idx.equals(uint64_idx)
|
||
|
True
|
||
|
"""
|
||
|
if self.is_(other):
|
||
|
return True
|
||
|
|
||
|
if not isinstance(other, Index):
|
||
|
return False
|
||
|
|
||
|
if is_object_dtype(self.dtype) and not is_object_dtype(other.dtype):
|
||
|
# if other is not object, use other's logic for coercion
|
||
|
return other.equals(self)
|
||
|
|
||
|
if isinstance(other, ABCMultiIndex):
|
||
|
# d-level MultiIndex can equal d-tuple Index
|
||
|
return other.equals(self)
|
||
|
|
||
|
if is_extension_array_dtype(other.dtype):
|
||
|
# All EA-backed Index subclasses override equals
|
||
|
return other.equals(self)
|
||
|
|
||
|
return array_equivalent(self._values, other._values)
|
||
|
|
||
|
@final
|
||
|
def identical(self, other) -> bool:
|
||
|
"""
|
||
|
Similar to equals, but checks that object attributes and types are also equal.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
bool
|
||
|
If two Index objects have equal elements and same type True,
|
||
|
otherwise False.
|
||
|
"""
|
||
|
return (
|
||
|
self.equals(other)
|
||
|
and all(
|
||
|
getattr(self, c, None) == getattr(other, c, None)
|
||
|
for c in self._comparables
|
||
|
)
|
||
|
and type(self) == type(other)
|
||
|
)
|
||
|
|
||
|
@final
|
||
|
def asof(self, label):
|
||
|
"""
|
||
|
Return the label from the index, or, if not present, the previous one.
|
||
|
|
||
|
Assuming that the index is sorted, return the passed index label if it
|
||
|
is in the index, or return the previous index label if the passed one
|
||
|
is not in the index.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
label : object
|
||
|
The label up to which the method returns the latest index label.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
object
|
||
|
The passed label if it is in the index. The previous label if the
|
||
|
passed label is not in the sorted index or `NaN` if there is no
|
||
|
such label.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Series.asof : Return the latest value in a Series up to the
|
||
|
passed index.
|
||
|
merge_asof : Perform an asof merge (similar to left join but it
|
||
|
matches on nearest key rather than equal key).
|
||
|
Index.get_loc : An `asof` is a thin wrapper around `get_loc`
|
||
|
with method='pad'.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
`Index.asof` returns the latest index label up to the passed label.
|
||
|
|
||
|
>>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03'])
|
||
|
>>> idx.asof('2014-01-01')
|
||
|
'2013-12-31'
|
||
|
|
||
|
If the label is in the index, the method returns the passed label.
|
||
|
|
||
|
>>> idx.asof('2014-01-02')
|
||
|
'2014-01-02'
|
||
|
|
||
|
If all of the labels in the index are later than the passed label,
|
||
|
NaN is returned.
|
||
|
|
||
|
>>> idx.asof('1999-01-02')
|
||
|
nan
|
||
|
|
||
|
If the index is not sorted, an error is raised.
|
||
|
|
||
|
>>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02',
|
||
|
... '2014-01-03'])
|
||
|
>>> idx_not_sorted.asof('2013-12-31')
|
||
|
Traceback (most recent call last):
|
||
|
ValueError: index must be monotonic increasing or decreasing
|
||
|
"""
|
||
|
try:
|
||
|
loc = self.get_loc(label, method="pad")
|
||
|
except KeyError:
|
||
|
return self._na_value
|
||
|
else:
|
||
|
if isinstance(loc, slice):
|
||
|
loc = loc.indices(len(self))[-1]
|
||
|
return self[loc]
|
||
|
|
||
|
def asof_locs(self, where: "Index", mask) -> np.ndarray:
|
||
|
"""
|
||
|
Return the locations (indices) of labels in the index.
|
||
|
|
||
|
As in the `asof` function, if the label (a particular entry in
|
||
|
`where`) is not in the index, the latest index label up to the
|
||
|
passed label is chosen and its index returned.
|
||
|
|
||
|
If all of the labels in the index are later than a label in `where`,
|
||
|
-1 is returned.
|
||
|
|
||
|
`mask` is used to ignore NA values in the index during calculation.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
where : Index
|
||
|
An Index consisting of an array of timestamps.
|
||
|
mask : array-like
|
||
|
Array of booleans denoting where values in the original
|
||
|
data are not NA.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
numpy.ndarray
|
||
|
An array of locations (indices) of the labels from the Index
|
||
|
which correspond to the return values of the `asof` function
|
||
|
for every element in `where`.
|
||
|
"""
|
||
|
locs = self._values[mask].searchsorted(where._values, side="right")
|
||
|
locs = np.where(locs > 0, locs - 1, 0)
|
||
|
|
||
|
result = np.arange(len(self))[mask].take(locs)
|
||
|
|
||
|
# TODO: overload return type of ExtensionArray.__getitem__
|
||
|
first_value = cast(Any, self._values[mask.argmax()])
|
||
|
result[(locs == 0) & (where._values < first_value)] = -1
|
||
|
|
||
|
return result
|
||
|
|
||
|
@final
|
||
|
def sort_values(
|
||
|
self,
|
||
|
return_indexer: bool = False,
|
||
|
ascending: bool = True,
|
||
|
na_position: str_t = "last",
|
||
|
key: Optional[Callable] = None,
|
||
|
):
|
||
|
"""
|
||
|
Return a sorted copy of the index.
|
||
|
|
||
|
Return a sorted copy of the index, and optionally return the indices
|
||
|
that sorted the index itself.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
return_indexer : bool, default False
|
||
|
Should the indices that would sort the index be returned.
|
||
|
ascending : bool, default True
|
||
|
Should the index values be sorted in an ascending order.
|
||
|
na_position : {'first' or 'last'}, default 'last'
|
||
|
Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
|
||
|
the end.
|
||
|
|
||
|
.. versionadded:: 1.2.0
|
||
|
|
||
|
key : callable, optional
|
||
|
If not None, apply the key function to the index values
|
||
|
before sorting. This is similar to the `key` argument in the
|
||
|
builtin :meth:`sorted` function, with the notable difference that
|
||
|
this `key` function should be *vectorized*. It should expect an
|
||
|
``Index`` and return an ``Index`` of the same shape.
|
||
|
|
||
|
.. versionadded:: 1.1.0
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
sorted_index : pandas.Index
|
||
|
Sorted copy of the index.
|
||
|
indexer : numpy.ndarray, optional
|
||
|
The indices that the index itself was sorted by.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Series.sort_values : Sort values of a Series.
|
||
|
DataFrame.sort_values : Sort values in a DataFrame.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index([10, 100, 1, 1000])
|
||
|
>>> idx
|
||
|
Int64Index([10, 100, 1, 1000], dtype='int64')
|
||
|
|
||
|
Sort values in ascending order (default behavior).
|
||
|
|
||
|
>>> idx.sort_values()
|
||
|
Int64Index([1, 10, 100, 1000], dtype='int64')
|
||
|
|
||
|
Sort values in descending order, and also get the indices `idx` was
|
||
|
sorted by.
|
||
|
|
||
|
>>> idx.sort_values(ascending=False, return_indexer=True)
|
||
|
(Int64Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2]))
|
||
|
"""
|
||
|
idx = ensure_key_mapped(self, key)
|
||
|
|
||
|
# GH 35584. Sort missing values according to na_position kwarg
|
||
|
# ignore na_position for MultiIndex
|
||
|
if not isinstance(self, ABCMultiIndex):
|
||
|
_as = nargsort(
|
||
|
items=idx, ascending=ascending, na_position=na_position, key=key
|
||
|
)
|
||
|
else:
|
||
|
_as = idx.argsort()
|
||
|
if not ascending:
|
||
|
_as = _as[::-1]
|
||
|
|
||
|
sorted_index = self.take(_as)
|
||
|
|
||
|
if return_indexer:
|
||
|
return sorted_index, _as
|
||
|
else:
|
||
|
return sorted_index
|
||
|
|
||
|
@final
|
||
|
def sort(self, *args, **kwargs):
|
||
|
"""
|
||
|
Use sort_values instead.
|
||
|
"""
|
||
|
raise TypeError("cannot sort an Index object in-place, use sort_values instead")
|
||
|
|
||
|
def shift(self, periods=1, freq=None):
|
||
|
"""
|
||
|
Shift index by desired number of time frequency increments.
|
||
|
|
||
|
This method is for shifting the values of datetime-like indexes
|
||
|
by a specified time increment a given number of times.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
periods : int, default 1
|
||
|
Number of periods (or increments) to shift by,
|
||
|
can be positive or negative.
|
||
|
freq : pandas.DateOffset, pandas.Timedelta or str, optional
|
||
|
Frequency increment to shift by.
|
||
|
If None, the index is shifted by its own `freq` attribute.
|
||
|
Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
pandas.Index
|
||
|
Shifted index.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Series.shift : Shift values of Series.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
This method is only implemented for datetime-like index classes,
|
||
|
i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
Put the first 5 month starts of 2011 into an index.
|
||
|
|
||
|
>>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS')
|
||
|
>>> month_starts
|
||
|
DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01',
|
||
|
'2011-05-01'],
|
||
|
dtype='datetime64[ns]', freq='MS')
|
||
|
|
||
|
Shift the index by 10 days.
|
||
|
|
||
|
>>> month_starts.shift(10, freq='D')
|
||
|
DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11',
|
||
|
'2011-05-11'],
|
||
|
dtype='datetime64[ns]', freq=None)
|
||
|
|
||
|
The default value of `freq` is the `freq` attribute of the index,
|
||
|
which is 'MS' (month start) in this example.
|
||
|
|
||
|
>>> month_starts.shift(10)
|
||
|
DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01',
|
||
|
'2012-03-01'],
|
||
|
dtype='datetime64[ns]', freq='MS')
|
||
|
"""
|
||
|
raise NotImplementedError(
|
||
|
f"This method is only implemented for DatetimeIndex, PeriodIndex and "
|
||
|
f"TimedeltaIndex; Got type {type(self).__name__}"
|
||
|
)
|
||
|
|
||
|
def argsort(self, *args, **kwargs) -> np.ndarray:
|
||
|
"""
|
||
|
Return the integer indices that would sort the index.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
*args
|
||
|
Passed to `numpy.ndarray.argsort`.
|
||
|
**kwargs
|
||
|
Passed to `numpy.ndarray.argsort`.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
numpy.ndarray
|
||
|
Integer indices that would sort the index if used as
|
||
|
an indexer.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
numpy.argsort : Similar method for NumPy arrays.
|
||
|
Index.sort_values : Return sorted copy of Index.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index(['b', 'a', 'd', 'c'])
|
||
|
>>> idx
|
||
|
Index(['b', 'a', 'd', 'c'], dtype='object')
|
||
|
|
||
|
>>> order = idx.argsort()
|
||
|
>>> order
|
||
|
array([1, 0, 3, 2])
|
||
|
|
||
|
>>> idx[order]
|
||
|
Index(['a', 'b', 'c', 'd'], dtype='object')
|
||
|
"""
|
||
|
if needs_i8_conversion(self.dtype):
|
||
|
# TODO: these do not match the underlying EA argsort methods GH#37863
|
||
|
return self.asi8.argsort(*args, **kwargs)
|
||
|
|
||
|
# This works for either ndarray or EA, is overriden
|
||
|
# by RangeIndex, MultIIndex
|
||
|
return self._data.argsort(*args, **kwargs)
|
||
|
|
||
|
@final
|
||
|
def get_value(self, series: "Series", key):
|
||
|
"""
|
||
|
Fast lookup of value from 1-dimensional ndarray.
|
||
|
|
||
|
Only use this if you know what you're doing.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
scalar or Series
|
||
|
"""
|
||
|
warnings.warn(
|
||
|
"get_value is deprecated and will be removed in a future version. "
|
||
|
"Use Series[key] instead",
|
||
|
FutureWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
|
||
|
self._check_indexing_error(key)
|
||
|
|
||
|
try:
|
||
|
# GH 20882, 21257
|
||
|
# First try to convert the key to a location
|
||
|
# If that fails, raise a KeyError if an integer
|
||
|
# index, otherwise, see if key is an integer, and
|
||
|
# try that
|
||
|
loc = self.get_loc(key)
|
||
|
except KeyError:
|
||
|
if not self._should_fallback_to_positional():
|
||
|
raise
|
||
|
elif is_integer(key):
|
||
|
# If the Index cannot hold integer, then this is unambiguously
|
||
|
# a locational lookup.
|
||
|
loc = key
|
||
|
else:
|
||
|
raise
|
||
|
|
||
|
return self._get_values_for_loc(series, loc, key)
|
||
|
|
||
|
def _check_indexing_error(self, key):
|
||
|
if not is_scalar(key):
|
||
|
# if key is not a scalar, directly raise an error (the code below
|
||
|
# would convert to numpy arrays and raise later any way) - GH29926
|
||
|
raise InvalidIndexError(key)
|
||
|
|
||
|
def _should_fallback_to_positional(self) -> bool:
|
||
|
"""
|
||
|
Should an integer key be treated as positional?
|
||
|
"""
|
||
|
if self.holds_integer() or self.is_boolean():
|
||
|
return False
|
||
|
return True
|
||
|
|
||
|
def _get_values_for_loc(self, series: "Series", loc, key):
|
||
|
"""
|
||
|
Do a positional lookup on the given Series, returning either a scalar
|
||
|
or a Series.
|
||
|
|
||
|
Assumes that `series.index is self`
|
||
|
|
||
|
key is included for MultiIndex compat.
|
||
|
"""
|
||
|
if is_integer(loc):
|
||
|
return series._values[loc]
|
||
|
|
||
|
return series.iloc[loc]
|
||
|
|
||
|
@final
|
||
|
def set_value(self, arr, key, value):
|
||
|
"""
|
||
|
Fast lookup of value from 1-dimensional ndarray.
|
||
|
|
||
|
.. deprecated:: 1.0
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
Only use this if you know what you're doing.
|
||
|
"""
|
||
|
warnings.warn(
|
||
|
(
|
||
|
"The 'set_value' method is deprecated, and "
|
||
|
"will be removed in a future version."
|
||
|
),
|
||
|
FutureWarning,
|
||
|
stacklevel=2,
|
||
|
)
|
||
|
loc = self._engine.get_loc(key)
|
||
|
validate_numeric_casting(arr.dtype, value)
|
||
|
arr[loc] = value
|
||
|
|
||
|
_index_shared_docs[
|
||
|
"get_indexer_non_unique"
|
||
|
] = """
|
||
|
Compute indexer and mask for new index given the current index. The
|
||
|
indexer should be then used as an input to ndarray.take to align the
|
||
|
current data to the new index.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
target : %(target_klass)s
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
indexer : ndarray of int
|
||
|
Integers from 0 to n - 1 indicating that the index at these
|
||
|
positions matches the corresponding target values. Missing values
|
||
|
in the target are marked by -1.
|
||
|
missing : ndarray of int
|
||
|
An indexer into the target of the values not found.
|
||
|
These correspond to the -1 in the indexer array.
|
||
|
"""
|
||
|
|
||
|
@Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
|
||
|
def get_indexer_non_unique(self, target):
|
||
|
target = ensure_index(target)
|
||
|
|
||
|
if target.is_boolean() and self.is_numeric():
|
||
|
# Treat boolean labels passed to a numeric index as not found. Without
|
||
|
# this fix False and True would be treated as 0 and 1 respectively.
|
||
|
# (GH #16877)
|
||
|
return self._get_indexer_non_comparable(target, method=None, unique=False)
|
||
|
|
||
|
pself, ptarget = self._maybe_promote(target)
|
||
|
if pself is not self or ptarget is not target:
|
||
|
return pself.get_indexer_non_unique(ptarget)
|
||
|
|
||
|
if not self._should_compare(target):
|
||
|
return self._get_indexer_non_comparable(target, method=None, unique=False)
|
||
|
|
||
|
if not is_dtype_equal(self.dtype, target.dtype):
|
||
|
# TODO: if object, could use infer_dtype to pre-empt costly
|
||
|
# conversion if still non-comparable?
|
||
|
dtype = find_common_type([self.dtype, target.dtype])
|
||
|
if (
|
||
|
dtype.kind in ["i", "u"]
|
||
|
and is_categorical_dtype(target.dtype)
|
||
|
and target.hasnans
|
||
|
):
|
||
|
# FIXME: find_common_type incorrect with Categorical GH#38240
|
||
|
# FIXME: some cases where float64 cast can be lossy?
|
||
|
dtype = np.dtype(np.float64)
|
||
|
|
||
|
this = self.astype(dtype, copy=False)
|
||
|
that = target.astype(dtype, copy=False)
|
||
|
return this.get_indexer_non_unique(that)
|
||
|
|
||
|
if is_categorical_dtype(target.dtype):
|
||
|
tgt_values = np.asarray(target)
|
||
|
else:
|
||
|
tgt_values = target._get_engine_target()
|
||
|
|
||
|
indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
|
||
|
return ensure_platform_int(indexer), missing
|
||
|
|
||
|
@final
|
||
|
def get_indexer_for(self, target, **kwargs):
|
||
|
"""
|
||
|
Guaranteed return of an indexer even when non-unique.
|
||
|
|
||
|
This dispatches to get_indexer or get_indexer_non_unique
|
||
|
as appropriate.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
numpy.ndarray
|
||
|
List of indices.
|
||
|
"""
|
||
|
if self._index_as_unique:
|
||
|
return self.get_indexer(target, **kwargs)
|
||
|
indexer, _ = self.get_indexer_non_unique(target)
|
||
|
return indexer
|
||
|
|
||
|
def _get_indexer_non_comparable(self, target: "Index", method, unique: bool = True):
|
||
|
"""
|
||
|
Called from get_indexer or get_indexer_non_unique when the target
|
||
|
is of a non-comparable dtype.
|
||
|
|
||
|
For get_indexer lookups with method=None, get_indexer is an _equality_
|
||
|
check, so non-comparable dtypes mean we will always have no matches.
|
||
|
|
||
|
For get_indexer lookups with a method, get_indexer is an _inequality_
|
||
|
check, so non-comparable dtypes mean we will always raise TypeError.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
target : Index
|
||
|
method : str or None
|
||
|
unique : bool, default True
|
||
|
* True if called from get_indexer.
|
||
|
* False if called from get_indexer_non_unique.
|
||
|
|
||
|
Raises
|
||
|
------
|
||
|
TypeError
|
||
|
If doing an inequality check, i.e. method is not None.
|
||
|
"""
|
||
|
if method is not None:
|
||
|
other = unpack_nested_dtype(target)
|
||
|
raise TypeError(f"Cannot compare dtypes {self.dtype} and {other.dtype}")
|
||
|
|
||
|
no_matches = -1 * np.ones(target.shape, dtype=np.intp)
|
||
|
if unique:
|
||
|
# This is for get_indexer
|
||
|
return no_matches
|
||
|
else:
|
||
|
# This is for get_indexer_non_unique
|
||
|
missing = np.arange(len(target), dtype=np.intp)
|
||
|
return no_matches, missing
|
||
|
|
||
|
@property
|
||
|
def _index_as_unique(self):
|
||
|
"""
|
||
|
Whether we should treat this as unique for the sake of
|
||
|
get_indexer vs get_indexer_non_unique.
|
||
|
|
||
|
For IntervalIndex compat.
|
||
|
"""
|
||
|
return self.is_unique
|
||
|
|
||
|
@final
|
||
|
def _maybe_promote(self, other: "Index"):
|
||
|
"""
|
||
|
When dealing with an object-dtype Index and a non-object Index, see
|
||
|
if we can upcast the object-dtype one to improve performance.
|
||
|
"""
|
||
|
|
||
|
if self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex):
|
||
|
try:
|
||
|
return type(other)(self), other
|
||
|
except OutOfBoundsDatetime:
|
||
|
return self, other
|
||
|
elif self.inferred_type == "timedelta" and isinstance(other, ABCTimedeltaIndex):
|
||
|
# TODO: we dont have tests that get here
|
||
|
return type(other)(self), other
|
||
|
elif self.inferred_type == "boolean":
|
||
|
if not is_object_dtype(self.dtype):
|
||
|
return self.astype("object"), other.astype("object")
|
||
|
|
||
|
if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype):
|
||
|
# Reverse op so we dont need to re-implement on the subclasses
|
||
|
other, self = other._maybe_promote(self)
|
||
|
|
||
|
return self, other
|
||
|
|
||
|
def _should_compare(self, other: "Index") -> bool:
|
||
|
"""
|
||
|
Check if `self == other` can ever have non-False entries.
|
||
|
"""
|
||
|
other = unpack_nested_dtype(other)
|
||
|
dtype = other.dtype
|
||
|
return self._is_comparable_dtype(dtype) or is_object_dtype(dtype)
|
||
|
|
||
|
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
|
||
|
"""
|
||
|
Can we compare values of the given dtype to our own?
|
||
|
"""
|
||
|
return True
|
||
|
|
||
|
@final
|
||
|
def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]:
|
||
|
"""
|
||
|
Group the index labels by a given array of values.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
values : array
|
||
|
Values used to determine the groups.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
dict
|
||
|
{group name -> group labels}
|
||
|
"""
|
||
|
# TODO: if we are a MultiIndex, we can do better
|
||
|
# that converting to tuples
|
||
|
if isinstance(values, ABCMultiIndex):
|
||
|
values = values._values
|
||
|
values = Categorical(values)
|
||
|
result = values._reverse_indexer()
|
||
|
|
||
|
# map to the label
|
||
|
result = {k: self.take(v) for k, v in result.items()}
|
||
|
|
||
|
return PrettyDict(result)
|
||
|
|
||
|
def map(self, mapper, na_action=None):
|
||
|
"""
|
||
|
Map values using input correspondence (a dict, Series, or function).
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
mapper : function, dict, or Series
|
||
|
Mapping correspondence.
|
||
|
na_action : {None, 'ignore'}
|
||
|
If 'ignore', propagate NA values, without passing them to the
|
||
|
mapping correspondence.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
applied : Union[Index, MultiIndex], inferred
|
||
|
The output of the mapping function applied to the index.
|
||
|
If the function returns a tuple with more than one element
|
||
|
a MultiIndex will be returned.
|
||
|
"""
|
||
|
from pandas.core.indexes.multi import MultiIndex
|
||
|
|
||
|
new_values = super()._map_values(mapper, na_action=na_action)
|
||
|
|
||
|
attributes = self._get_attributes_dict()
|
||
|
|
||
|
# we can return a MultiIndex
|
||
|
if new_values.size and isinstance(new_values[0], tuple):
|
||
|
if isinstance(self, MultiIndex):
|
||
|
names = self.names
|
||
|
elif attributes.get("name"):
|
||
|
names = [attributes.get("name")] * len(new_values[0])
|
||
|
else:
|
||
|
names = None
|
||
|
return MultiIndex.from_tuples(new_values, names=names)
|
||
|
|
||
|
attributes["copy"] = False
|
||
|
if not new_values.size:
|
||
|
# empty
|
||
|
attributes["dtype"] = self.dtype
|
||
|
|
||
|
return Index(new_values, **attributes)
|
||
|
|
||
|
# TODO: De-duplicate with map, xref GH#32349
|
||
|
@final
|
||
|
def _transform_index(self, func, level=None) -> "Index":
|
||
|
"""
|
||
|
Apply function to all values found in index.
|
||
|
|
||
|
This includes transforming multiindex entries separately.
|
||
|
Only apply function to one level of the MultiIndex if level is specified.
|
||
|
"""
|
||
|
if isinstance(self, ABCMultiIndex):
|
||
|
if level is not None:
|
||
|
items = [
|
||
|
tuple(func(y) if i == level else y for i, y in enumerate(x))
|
||
|
for x in self
|
||
|
]
|
||
|
else:
|
||
|
items = [tuple(func(y) for y in x) for x in self]
|
||
|
return type(self).from_tuples(items, names=self.names)
|
||
|
else:
|
||
|
items = [func(x) for x in self]
|
||
|
return Index(items, name=self.name, tupleize_cols=False)
|
||
|
|
||
|
def isin(self, values, level=None):
|
||
|
"""
|
||
|
Return a boolean array where the index values are in `values`.
|
||
|
|
||
|
Compute boolean array of whether each index value is found in the
|
||
|
passed set of values. The length of the returned boolean array matches
|
||
|
the length of the index.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
values : set or list-like
|
||
|
Sought values.
|
||
|
level : str or int, optional
|
||
|
Name or position of the index level to use (if the index is a
|
||
|
`MultiIndex`).
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
is_contained : ndarray
|
||
|
NumPy array of boolean values.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Series.isin : Same for Series.
|
||
|
DataFrame.isin : Same method for DataFrames.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
In the case of `MultiIndex` you must either specify `values` as a
|
||
|
list-like object containing tuples that are the same length as the
|
||
|
number of levels, or specify `level`. Otherwise it will raise a
|
||
|
``ValueError``.
|
||
|
|
||
|
If `level` is specified:
|
||
|
|
||
|
- if it is the name of one *and only one* index level, use that level;
|
||
|
- otherwise it should be a number indicating level position.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index([1,2,3])
|
||
|
>>> idx
|
||
|
Int64Index([1, 2, 3], dtype='int64')
|
||
|
|
||
|
Check whether each index value in a list of values.
|
||
|
|
||
|
>>> idx.isin([1, 4])
|
||
|
array([ True, False, False])
|
||
|
|
||
|
>>> midx = pd.MultiIndex.from_arrays([[1,2,3],
|
||
|
... ['red', 'blue', 'green']],
|
||
|
... names=('number', 'color'))
|
||
|
>>> midx
|
||
|
MultiIndex([(1, 'red'),
|
||
|
(2, 'blue'),
|
||
|
(3, 'green')],
|
||
|
names=['number', 'color'])
|
||
|
|
||
|
Check whether the strings in the 'color' level of the MultiIndex
|
||
|
are in a list of colors.
|
||
|
|
||
|
>>> midx.isin(['red', 'orange', 'yellow'], level='color')
|
||
|
array([ True, False, False])
|
||
|
|
||
|
To check across the levels of a MultiIndex, pass a list of tuples:
|
||
|
|
||
|
>>> midx.isin([(1, 'red'), (3, 'red')])
|
||
|
array([ True, False, False])
|
||
|
|
||
|
For a DatetimeIndex, string values in `values` are converted to
|
||
|
Timestamps.
|
||
|
|
||
|
>>> dates = ['2000-03-11', '2000-03-12', '2000-03-13']
|
||
|
>>> dti = pd.to_datetime(dates)
|
||
|
>>> dti
|
||
|
DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'],
|
||
|
dtype='datetime64[ns]', freq=None)
|
||
|
|
||
|
>>> dti.isin(['2000-03-11'])
|
||
|
array([ True, False, False])
|
||
|
"""
|
||
|
if level is not None:
|
||
|
self._validate_index_level(level)
|
||
|
return algos.isin(self._values, values)
|
||
|
|
||
|
def _get_string_slice(self, key: str_t):
|
||
|
# this is for partial string indexing,
|
||
|
# overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex
|
||
|
raise NotImplementedError
|
||
|
|
||
|
def slice_indexer(
|
||
|
self,
|
||
|
start: Optional[Label] = None,
|
||
|
end: Optional[Label] = None,
|
||
|
step: Optional[int] = None,
|
||
|
kind: Optional[str_t] = None,
|
||
|
) -> slice:
|
||
|
"""
|
||
|
Compute the slice indexer for input labels and step.
|
||
|
|
||
|
Index needs to be ordered and unique.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
start : label, default None
|
||
|
If None, defaults to the beginning.
|
||
|
end : label, default None
|
||
|
If None, defaults to the end.
|
||
|
step : int, default None
|
||
|
kind : str, default None
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
indexer : slice
|
||
|
|
||
|
Raises
|
||
|
------
|
||
|
KeyError : If key does not exist, or key is not unique and index is
|
||
|
not ordered.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
This function assumes that the data is sorted, so use at your own peril
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
This is a method on all index types. For example you can do:
|
||
|
|
||
|
>>> idx = pd.Index(list('abcd'))
|
||
|
>>> idx.slice_indexer(start='b', end='c')
|
||
|
slice(1, 3, None)
|
||
|
|
||
|
>>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')])
|
||
|
>>> idx.slice_indexer(start='b', end=('c', 'g'))
|
||
|
slice(1, 3, None)
|
||
|
"""
|
||
|
start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind)
|
||
|
|
||
|
# return a slice
|
||
|
if not is_scalar(start_slice):
|
||
|
raise AssertionError("Start slice bound is non-scalar")
|
||
|
if not is_scalar(end_slice):
|
||
|
raise AssertionError("End slice bound is non-scalar")
|
||
|
|
||
|
return slice(start_slice, end_slice, step)
|
||
|
|
||
|
def _maybe_cast_indexer(self, key):
|
||
|
"""
|
||
|
If we have a float key and are not a floating index, then try to cast
|
||
|
to an int if equivalent.
|
||
|
"""
|
||
|
if not self.is_floating():
|
||
|
return com.cast_scalar_indexer(key)
|
||
|
return key
|
||
|
|
||
|
@final
|
||
|
def _validate_indexer(self, form: str_t, key, kind: str_t):
|
||
|
"""
|
||
|
If we are positional indexer, validate that we have appropriate
|
||
|
typed bounds must be an integer.
|
||
|
"""
|
||
|
assert kind in ["getitem", "iloc"]
|
||
|
|
||
|
if key is None:
|
||
|
pass
|
||
|
elif is_integer(key):
|
||
|
pass
|
||
|
else:
|
||
|
raise self._invalid_indexer(form, key)
|
||
|
|
||
|
def _maybe_cast_slice_bound(self, label, side: str_t, kind):
|
||
|
"""
|
||
|
This function should be overloaded in subclasses that allow non-trivial
|
||
|
casting on label-slice bounds, e.g. datetime-like indices allowing
|
||
|
strings containing formatted datetimes.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
label : object
|
||
|
side : {'left', 'right'}
|
||
|
kind : {'loc', 'getitem'} or None
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
label : object
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
Value of `side` parameter should be validated in caller.
|
||
|
"""
|
||
|
assert kind in ["loc", "getitem", None]
|
||
|
|
||
|
# We are a plain index here (sub-class override this method if they
|
||
|
# wish to have special treatment for floats/ints, e.g. Float64Index and
|
||
|
# datetimelike Indexes
|
||
|
# reject them, if index does not contain label
|
||
|
if (is_float(label) or is_integer(label)) and label not in self.values:
|
||
|
raise self._invalid_indexer("slice", label)
|
||
|
|
||
|
return label
|
||
|
|
||
|
def _searchsorted_monotonic(self, label, side="left"):
|
||
|
if self.is_monotonic_increasing:
|
||
|
return self.searchsorted(label, side=side)
|
||
|
elif self.is_monotonic_decreasing:
|
||
|
# np.searchsorted expects ascending sort order, have to reverse
|
||
|
# everything for it to work (element ordering, search side and
|
||
|
# resulting value).
|
||
|
pos = self[::-1].searchsorted(
|
||
|
label, side="right" if side == "left" else "left"
|
||
|
)
|
||
|
return len(self) - pos
|
||
|
|
||
|
raise ValueError("index must be monotonic increasing or decreasing")
|
||
|
|
||
|
def get_slice_bound(self, label, side: str_t, kind) -> int:
|
||
|
"""
|
||
|
Calculate slice bound that corresponds to given label.
|
||
|
|
||
|
Returns leftmost (one-past-the-rightmost if ``side=='right'``) position
|
||
|
of given label.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
label : object
|
||
|
side : {'left', 'right'}
|
||
|
kind : {'loc', 'getitem'} or None
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
int
|
||
|
Index of label.
|
||
|
"""
|
||
|
assert kind in ["loc", "getitem", None]
|
||
|
|
||
|
if side not in ("left", "right"):
|
||
|
raise ValueError(
|
||
|
"Invalid value for side kwarg, must be either "
|
||
|
f"'left' or 'right': {side}"
|
||
|
)
|
||
|
|
||
|
original_label = label
|
||
|
|
||
|
# For datetime indices label may be a string that has to be converted
|
||
|
# to datetime boundary according to its resolution.
|
||
|
label = self._maybe_cast_slice_bound(label, side, kind)
|
||
|
|
||
|
# we need to look up the label
|
||
|
try:
|
||
|
slc = self.get_loc(label)
|
||
|
except KeyError as err:
|
||
|
try:
|
||
|
return self._searchsorted_monotonic(label, side)
|
||
|
except ValueError:
|
||
|
# raise the original KeyError
|
||
|
raise err
|
||
|
|
||
|
if isinstance(slc, np.ndarray):
|
||
|
# get_loc may return a boolean array or an array of indices, which
|
||
|
# is OK as long as they are representable by a slice.
|
||
|
if is_bool_dtype(slc):
|
||
|
slc = lib.maybe_booleans_to_slice(slc.view("u1"))
|
||
|
else:
|
||
|
slc = lib.maybe_indices_to_slice(
|
||
|
slc.astype(np.intp, copy=False), len(self)
|
||
|
)
|
||
|
if isinstance(slc, np.ndarray):
|
||
|
raise KeyError(
|
||
|
f"Cannot get {side} slice bound for non-unique "
|
||
|
f"label: {repr(original_label)}"
|
||
|
)
|
||
|
|
||
|
if isinstance(slc, slice):
|
||
|
if side == "left":
|
||
|
return slc.start
|
||
|
else:
|
||
|
return slc.stop
|
||
|
else:
|
||
|
if side == "right":
|
||
|
return slc + 1
|
||
|
else:
|
||
|
return slc
|
||
|
|
||
|
def slice_locs(self, start=None, end=None, step=None, kind=None):
|
||
|
"""
|
||
|
Compute slice locations for input labels.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
start : label, default None
|
||
|
If None, defaults to the beginning.
|
||
|
end : label, default None
|
||
|
If None, defaults to the end.
|
||
|
step : int, defaults None
|
||
|
If None, defaults to 1.
|
||
|
kind : {'loc', 'getitem'} or None
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
start, end : int
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Index.get_loc : Get location for a single label.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
This method only works if the index is monotonic or unique.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index(list('abcd'))
|
||
|
>>> idx.slice_locs(start='b', end='c')
|
||
|
(1, 3)
|
||
|
"""
|
||
|
inc = step is None or step >= 0
|
||
|
|
||
|
if not inc:
|
||
|
# If it's a reverse slice, temporarily swap bounds.
|
||
|
start, end = end, start
|
||
|
|
||
|
# GH 16785: If start and end happen to be date strings with UTC offsets
|
||
|
# attempt to parse and check that the offsets are the same
|
||
|
if isinstance(start, (str, datetime)) and isinstance(end, (str, datetime)):
|
||
|
try:
|
||
|
ts_start = Timestamp(start)
|
||
|
ts_end = Timestamp(end)
|
||
|
except (ValueError, TypeError):
|
||
|
pass
|
||
|
else:
|
||
|
if not tz_compare(ts_start.tzinfo, ts_end.tzinfo):
|
||
|
raise ValueError("Both dates must have the same UTC offset")
|
||
|
|
||
|
start_slice = None
|
||
|
if start is not None:
|
||
|
start_slice = self.get_slice_bound(start, "left", kind)
|
||
|
if start_slice is None:
|
||
|
start_slice = 0
|
||
|
|
||
|
end_slice = None
|
||
|
if end is not None:
|
||
|
end_slice = self.get_slice_bound(end, "right", kind)
|
||
|
if end_slice is None:
|
||
|
end_slice = len(self)
|
||
|
|
||
|
if not inc:
|
||
|
# Bounds at this moment are swapped, swap them back and shift by 1.
|
||
|
#
|
||
|
# slice_locs('B', 'A', step=-1): s='B', e='A'
|
||
|
#
|
||
|
# s='A' e='B'
|
||
|
# AFTER SWAP: | |
|
||
|
# v ------------------> V
|
||
|
# -----------------------------------
|
||
|
# | | |A|A|A|A| | | | | |B|B| | | | |
|
||
|
# -----------------------------------
|
||
|
# ^ <------------------ ^
|
||
|
# SHOULD BE: | |
|
||
|
# end=s-1 start=e-1
|
||
|
#
|
||
|
end_slice, start_slice = start_slice - 1, end_slice - 1
|
||
|
|
||
|
# i == -1 triggers ``len(self) + i`` selection that points to the
|
||
|
# last element, not before-the-first one, subtracting len(self)
|
||
|
# compensates that.
|
||
|
if end_slice == -1:
|
||
|
end_slice -= len(self)
|
||
|
if start_slice == -1:
|
||
|
start_slice -= len(self)
|
||
|
|
||
|
return start_slice, end_slice
|
||
|
|
||
|
def delete(self, loc):
|
||
|
"""
|
||
|
Make new Index with passed location(-s) deleted.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
loc : int or list of int
|
||
|
Location of item(-s) which will be deleted.
|
||
|
Use a list of locations to delete more than one value at the same time.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Index
|
||
|
New Index with passed location(-s) deleted.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
numpy.delete : Delete any rows and column from NumPy array (ndarray).
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> idx = pd.Index(['a', 'b', 'c'])
|
||
|
>>> idx.delete(1)
|
||
|
Index(['a', 'c'], dtype='object')
|
||
|
|
||
|
>>> idx = pd.Index(['a', 'b', 'c'])
|
||
|
>>> idx.delete([0, 2])
|
||
|
Index(['b'], dtype='object')
|
||
|
"""
|
||
|
return self._shallow_copy(np.delete(self._data, loc))
|
||
|
|
||
|
def insert(self, loc: int, item):
|
||
|
"""
|
||
|
Make new Index inserting new item at location.
|
||
|
|
||
|
Follows Python list.append semantics for negative values.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
loc : int
|
||
|
item : object
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
new_index : Index
|
||
|
"""
|
||
|
# Note: this method is overridden by all ExtensionIndex subclasses,
|
||
|
# so self is never backed by an EA.
|
||
|
arr = np.asarray(self)
|
||
|
item = self._coerce_scalar_to_index(item)._values
|
||
|
idx = np.concatenate((arr[:loc], item, arr[loc:]))
|
||
|
return Index(idx, name=self.name)
|
||
|
|
||
|
def drop(self, labels, errors: str_t = "raise"):
|
||
|
"""
|
||
|
Make new Index with passed list of labels deleted.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
labels : array-like
|
||
|
errors : {'ignore', 'raise'}, default 'raise'
|
||
|
If 'ignore', suppress error and existing labels are dropped.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
dropped : Index
|
||
|
|
||
|
Raises
|
||
|
------
|
||
|
KeyError
|
||
|
If not all of the labels are found in the selected axis
|
||
|
"""
|
||
|
arr_dtype = "object" if self.dtype == "object" else None
|
||
|
labels = com.index_labels_to_array(labels, dtype=arr_dtype)
|
||
|
indexer = self.get_indexer_for(labels)
|
||
|
mask = indexer == -1
|
||
|
if mask.any():
|
||
|
if errors != "ignore":
|
||
|
raise KeyError(f"{labels[mask]} not found in axis")
|
||
|
indexer = indexer[~mask]
|
||
|
return self.delete(indexer)
|
||
|
|
||
|
# --------------------------------------------------------------------
|
||
|
# Generated Arithmetic, Comparison, and Unary Methods
|
||
|
|
||
|
def _cmp_method(self, other, op):
|
||
|
"""
|
||
|
Wrapper used to dispatch comparison operations.
|
||
|
"""
|
||
|
if self.is_(other):
|
||
|
# fastpath
|
||
|
if op in {operator.eq, operator.le, operator.ge}:
|
||
|
arr = np.ones(len(self), dtype=bool)
|
||
|
if self._can_hold_na and not isinstance(self, ABCMultiIndex):
|
||
|
# TODO: should set MultiIndex._can_hold_na = False?
|
||
|
arr[self.isna()] = False
|
||
|
return arr
|
||
|
elif op in {operator.ne, operator.lt, operator.gt}:
|
||
|
return np.zeros(len(self), dtype=bool)
|
||
|
|
||
|
if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)):
|
||
|
if len(self) != len(other):
|
||
|
raise ValueError("Lengths must match to compare")
|
||
|
|
||
|
if not isinstance(other, ABCMultiIndex):
|
||
|
other = extract_array(other, extract_numpy=True)
|
||
|
else:
|
||
|
other = np.asarray(other)
|
||
|
|
||
|
if is_object_dtype(self.dtype) and isinstance(other, ExtensionArray):
|
||
|
# e.g. PeriodArray, Categorical
|
||
|
with np.errstate(all="ignore"):
|
||
|
result = op(self._values, other)
|
||
|
|
||
|
elif is_object_dtype(self.dtype) and not isinstance(self, ABCMultiIndex):
|
||
|
# don't pass MultiIndex
|
||
|
with np.errstate(all="ignore"):
|
||
|
result = ops.comp_method_OBJECT_ARRAY(op, self._values, other)
|
||
|
|
||
|
elif is_interval_dtype(self.dtype):
|
||
|
with np.errstate(all="ignore"):
|
||
|
result = op(self._values, np.asarray(other))
|
||
|
|
||
|
else:
|
||
|
with np.errstate(all="ignore"):
|
||
|
result = ops.comparison_op(self._values, other, op)
|
||
|
|
||
|
return result
|
||
|
|
||
|
def _arith_method(self, other, op):
|
||
|
"""
|
||
|
Wrapper used to dispatch arithmetic operations.
|
||
|
"""
|
||
|
|
||
|
from pandas import Series
|
||
|
|
||
|
result = op(Series(self), other)
|
||
|
if isinstance(result, tuple):
|
||
|
return (Index(result[0]), Index(result[1]))
|
||
|
return Index(result)
|
||
|
|
||
|
def _unary_method(self, op):
|
||
|
result = op(self._values)
|
||
|
return Index(result, name=self.name)
|
||
|
|
||
|
def __abs__(self):
|
||
|
return self._unary_method(operator.abs)
|
||
|
|
||
|
def __neg__(self):
|
||
|
return self._unary_method(operator.neg)
|
||
|
|
||
|
def __pos__(self):
|
||
|
return self._unary_method(operator.pos)
|
||
|
|
||
|
def __inv__(self):
|
||
|
# TODO: why not operator.inv?
|
||
|
# TODO: __inv__ vs __invert__?
|
||
|
return self._unary_method(lambda x: -x)
|
||
|
|
||
|
def any(self, *args, **kwargs):
|
||
|
"""
|
||
|
Return whether any element is Truthy.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
*args
|
||
|
These parameters will be passed to numpy.any.
|
||
|
**kwargs
|
||
|
These parameters will be passed to numpy.any.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
any : bool or array_like (if axis is specified)
|
||
|
A single element array_like may be converted to bool.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Index.all : Return whether all elements are True.
|
||
|
Series.all : Return whether all elements are True.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
Not a Number (NaN), positive infinity and negative infinity
|
||
|
evaluate to True because these are not equal to zero.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> index = pd.Index([0, 1, 2])
|
||
|
>>> index.any()
|
||
|
True
|
||
|
|
||
|
>>> index = pd.Index([0, 0, 0])
|
||
|
>>> index.any()
|
||
|
False
|
||
|
"""
|
||
|
# FIXME: docstr inaccurate, args/kwargs not passed
|
||
|
self._maybe_disable_logical_methods("any")
|
||
|
return np.any(self.values)
|
||
|
|
||
|
def all(self):
|
||
|
"""
|
||
|
Return whether all elements are Truthy.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
*args
|
||
|
These parameters will be passed to numpy.all.
|
||
|
**kwargs
|
||
|
These parameters will be passed to numpy.all.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
all : bool or array_like (if axis is specified)
|
||
|
A single element array_like may be converted to bool.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
Index.any : Return whether any element in an Index is True.
|
||
|
Series.any : Return whether any element in a Series is True.
|
||
|
Series.all : Return whether all elements in a Series are True.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
Not a Number (NaN), positive infinity and negative infinity
|
||
|
evaluate to True because these are not equal to zero.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
**all**
|
||
|
|
||
|
True, because nonzero integers are considered True.
|
||
|
|
||
|
>>> pd.Index([1, 2, 3]).all()
|
||
|
True
|
||
|
|
||
|
False, because ``0`` is considered False.
|
||
|
|
||
|
>>> pd.Index([0, 1, 2]).all()
|
||
|
False
|
||
|
|
||
|
**any**
|
||
|
|
||
|
True, because ``1`` is considered True.
|
||
|
|
||
|
>>> pd.Index([0, 0, 1]).any()
|
||
|
True
|
||
|
|
||
|
False, because ``0`` is considered False.
|
||
|
|
||
|
>>> pd.Index([0, 0, 0]).any()
|
||
|
False
|
||
|
"""
|
||
|
# FIXME: docstr inaccurate, args/kwargs not passed
|
||
|
|
||
|
self._maybe_disable_logical_methods("all")
|
||
|
return np.all(self.values)
|
||
|
|
||
|
@final
|
||
|
def _maybe_disable_logical_methods(self, opname: str_t):
|
||
|
"""
|
||
|
raise if this Index subclass does not support any or all.
|
||
|
"""
|
||
|
if (
|
||
|
isinstance(self, ABCMultiIndex)
|
||
|
or needs_i8_conversion(self.dtype)
|
||
|
or is_interval_dtype(self.dtype)
|
||
|
or is_categorical_dtype(self.dtype)
|
||
|
or is_float_dtype(self.dtype)
|
||
|
):
|
||
|
# This call will raise
|
||
|
make_invalid_op(opname)(self)
|
||
|
|
||
|
@property
|
||
|
def shape(self) -> Shape:
|
||
|
"""
|
||
|
Return a tuple of the shape of the underlying data.
|
||
|
"""
|
||
|
# not using "(len(self), )" to return "correct" shape if the values
|
||
|
# consists of a >1 D array (see GH-27775)
|
||
|
# overridden in MultiIndex.shape to avoid materializing the values
|
||
|
return self._values.shape
|
||
|
|
||
|
|
||
|
def ensure_index_from_sequences(sequences, names=None):
|
||
|
"""
|
||
|
Construct an index from sequences of data.
|
||
|
|
||
|
A single sequence returns an Index. Many sequences returns a
|
||
|
MultiIndex.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
sequences : sequence of sequences
|
||
|
names : sequence of str
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
index : Index or MultiIndex
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> ensure_index_from_sequences([[1, 2, 3]], names=["name"])
|
||
|
Int64Index([1, 2, 3], dtype='int64', name='name')
|
||
|
|
||
|
>>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"])
|
||
|
MultiIndex([('a', 'a'),
|
||
|
('a', 'b')],
|
||
|
names=['L1', 'L2'])
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
ensure_index
|
||
|
"""
|
||
|
from pandas.core.indexes.multi import MultiIndex
|
||
|
|
||
|
if len(sequences) == 1:
|
||
|
if names is not None:
|
||
|
names = names[0]
|
||
|
return Index(sequences[0], name=names)
|
||
|
else:
|
||
|
return MultiIndex.from_arrays(sequences, names=names)
|
||
|
|
||
|
|
||
|
def ensure_index(
|
||
|
index_like: Union[AnyArrayLike, Sequence], copy: bool = False
|
||
|
) -> Index:
|
||
|
"""
|
||
|
Ensure that we have an index from some index-like object.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
index_like : sequence
|
||
|
An Index or other sequence
|
||
|
copy : bool, default False
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
index : Index or MultiIndex
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
ensure_index_from_sequences
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> ensure_index(['a', 'b'])
|
||
|
Index(['a', 'b'], dtype='object')
|
||
|
|
||
|
>>> ensure_index([('a', 'a'), ('b', 'c')])
|
||
|
Index([('a', 'a'), ('b', 'c')], dtype='object')
|
||
|
|
||
|
>>> ensure_index([['a', 'a'], ['b', 'c']])
|
||
|
MultiIndex([('a', 'b'),
|
||
|
('a', 'c')],
|
||
|
)
|
||
|
"""
|
||
|
if isinstance(index_like, Index):
|
||
|
if copy:
|
||
|
index_like = index_like.copy()
|
||
|
return index_like
|
||
|
if hasattr(index_like, "name"):
|
||
|
# https://github.com/python/mypy/issues/1424
|
||
|
# error: Item "ExtensionArray" of "Union[ExtensionArray,
|
||
|
# Sequence[Any]]" has no attribute "name" [union-attr]
|
||
|
# error: Item "Sequence[Any]" of "Union[ExtensionArray, Sequence[Any]]"
|
||
|
# has no attribute "name" [union-attr]
|
||
|
# error: "Sequence[Any]" has no attribute "name" [attr-defined]
|
||
|
# error: Item "Sequence[Any]" of "Union[Series, Sequence[Any]]" has no
|
||
|
# attribute "name" [union-attr]
|
||
|
# error: Item "Sequence[Any]" of "Union[Any, Sequence[Any]]" has no
|
||
|
# attribute "name" [union-attr]
|
||
|
name = index_like.name # type: ignore[union-attr, attr-defined]
|
||
|
return Index(index_like, name=name, copy=copy)
|
||
|
|
||
|
if is_iterator(index_like):
|
||
|
index_like = list(index_like)
|
||
|
|
||
|
# must check for exactly list here because of strict type
|
||
|
# check in clean_index_list
|
||
|
if isinstance(index_like, list):
|
||
|
if type(index_like) != list:
|
||
|
index_like = list(index_like)
|
||
|
|
||
|
converted, all_arrays = lib.clean_index_list(index_like)
|
||
|
|
||
|
if len(converted) > 0 and all_arrays:
|
||
|
from pandas.core.indexes.multi import MultiIndex
|
||
|
|
||
|
return MultiIndex.from_arrays(converted)
|
||
|
else:
|
||
|
if isinstance(converted, np.ndarray) and converted.dtype == np.int64:
|
||
|
# Check for overflows if we should actually be uint64
|
||
|
# xref GH#35481
|
||
|
alt = np.asarray(index_like)
|
||
|
if alt.dtype == np.uint64:
|
||
|
converted = alt
|
||
|
|
||
|
index_like = converted
|
||
|
else:
|
||
|
# clean_index_list does the equivalent of copying
|
||
|
# so only need to do this if not list instance
|
||
|
if copy:
|
||
|
index_like = copy_func(index_like)
|
||
|
|
||
|
return Index(index_like)
|
||
|
|
||
|
|
||
|
def ensure_has_len(seq):
|
||
|
"""
|
||
|
If seq is an iterator, put its values into a list.
|
||
|
"""
|
||
|
try:
|
||
|
len(seq)
|
||
|
except TypeError:
|
||
|
return list(seq)
|
||
|
else:
|
||
|
return seq
|
||
|
|
||
|
|
||
|
def trim_front(strings: List[str]) -> List[str]:
|
||
|
"""
|
||
|
Trims zeros and decimal points.
|
||
|
"""
|
||
|
trimmed = strings
|
||
|
while len(strings) > 0 and all(x[0] == " " for x in trimmed):
|
||
|
trimmed = [x[1:] for x in trimmed]
|
||
|
return trimmed
|
||
|
|
||
|
|
||
|
def _validate_join_method(method: str):
|
||
|
if method not in ["left", "right", "inner", "outer"]:
|
||
|
raise ValueError(f"do not recognize join method {method}")
|
||
|
|
||
|
|
||
|
def default_index(n: int) -> "RangeIndex":
|
||
|
from pandas.core.indexes.range import RangeIndex
|
||
|
|
||
|
return RangeIndex(0, n, name=None)
|
||
|
|
||
|
|
||
|
def maybe_extract_name(name, obj, cls) -> Label:
|
||
|
"""
|
||
|
If no name is passed, then extract it from data, validating hashability.
|
||
|
"""
|
||
|
if name is None and isinstance(obj, (Index, ABCSeries)):
|
||
|
# Note we don't just check for "name" attribute since that would
|
||
|
# pick up e.g. dtype.name
|
||
|
name = obj.name
|
||
|
|
||
|
# GH#29069
|
||
|
if not is_hashable(name):
|
||
|
raise TypeError(f"{cls.__name__}.name must be a hashable type")
|
||
|
|
||
|
return name
|
||
|
|
||
|
|
||
|
def _maybe_cast_with_dtype(data: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
|
||
|
"""
|
||
|
If a dtype is passed, cast to the closest matching dtype that is supported
|
||
|
by Index.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
data : np.ndarray
|
||
|
dtype : np.dtype
|
||
|
copy : bool
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
np.ndarray
|
||
|
"""
|
||
|
# we need to avoid having numpy coerce
|
||
|
# things that look like ints/floats to ints unless
|
||
|
# they are actually ints, e.g. '0' and 0.0
|
||
|
# should not be coerced
|
||
|
# GH 11836
|
||
|
if is_integer_dtype(dtype):
|
||
|
inferred = lib.infer_dtype(data, skipna=False)
|
||
|
if inferred == "integer":
|
||
|
data = maybe_cast_to_integer_array(data, dtype, copy=copy)
|
||
|
elif inferred in ["floating", "mixed-integer-float"]:
|
||
|
if isna(data).any():
|
||
|
raise ValueError("cannot convert float NaN to integer")
|
||
|
|
||
|
if inferred == "mixed-integer-float":
|
||
|
data = maybe_cast_to_integer_array(data, dtype)
|
||
|
|
||
|
# If we are actually all equal to integers,
|
||
|
# then coerce to integer.
|
||
|
try:
|
||
|
data = _try_convert_to_int_array(data, copy, dtype)
|
||
|
except ValueError:
|
||
|
data = np.array(data, dtype=np.float64, copy=copy)
|
||
|
|
||
|
elif inferred == "string":
|
||
|
pass
|
||
|
else:
|
||
|
data = data.astype(dtype)
|
||
|
elif is_float_dtype(dtype):
|
||
|
inferred = lib.infer_dtype(data, skipna=False)
|
||
|
if inferred == "string":
|
||
|
pass
|
||
|
else:
|
||
|
data = data.astype(dtype)
|
||
|
else:
|
||
|
data = np.array(data, dtype=dtype, copy=copy)
|
||
|
|
||
|
return data
|
||
|
|
||
|
|
||
|
def _maybe_cast_data_without_dtype(subarr):
|
||
|
"""
|
||
|
If we have an arraylike input but no passed dtype, try to infer
|
||
|
a supported dtype.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
subarr : np.ndarray, Index, or Series
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
converted : np.ndarray or ExtensionArray
|
||
|
dtype : np.dtype or ExtensionDtype
|
||
|
"""
|
||
|
# Runtime import needed bc IntervalArray imports Index
|
||
|
from pandas.core.arrays import (
|
||
|
DatetimeArray,
|
||
|
IntervalArray,
|
||
|
PeriodArray,
|
||
|
TimedeltaArray,
|
||
|
)
|
||
|
|
||
|
inferred = lib.infer_dtype(subarr, skipna=False)
|
||
|
|
||
|
if inferred == "integer":
|
||
|
try:
|
||
|
data = _try_convert_to_int_array(subarr, False, None)
|
||
|
return data, data.dtype
|
||
|
except ValueError:
|
||
|
pass
|
||
|
|
||
|
return subarr, object
|
||
|
|
||
|
elif inferred in ["floating", "mixed-integer-float", "integer-na"]:
|
||
|
# TODO: Returns IntegerArray for integer-na case in the future
|
||
|
return subarr, np.float64
|
||
|
|
||
|
elif inferred == "interval":
|
||
|
try:
|
||
|
data = IntervalArray._from_sequence(subarr, copy=False)
|
||
|
return data, data.dtype
|
||
|
except ValueError:
|
||
|
# GH27172: mixed closed Intervals --> object dtype
|
||
|
pass
|
||
|
elif inferred == "boolean":
|
||
|
# don't support boolean explicitly ATM
|
||
|
pass
|
||
|
elif inferred != "string":
|
||
|
if inferred.startswith("datetime"):
|
||
|
try:
|
||
|
data = DatetimeArray._from_sequence(subarr, copy=False)
|
||
|
return data, data.dtype
|
||
|
except (ValueError, OutOfBoundsDatetime):
|
||
|
# GH 27011
|
||
|
# If we have mixed timezones, just send it
|
||
|
# down the base constructor
|
||
|
pass
|
||
|
|
||
|
elif inferred.startswith("timedelta"):
|
||
|
data = TimedeltaArray._from_sequence(subarr, copy=False)
|
||
|
return data, data.dtype
|
||
|
elif inferred == "period":
|
||
|
try:
|
||
|
data = PeriodArray._from_sequence(subarr)
|
||
|
return data, data.dtype
|
||
|
except IncompatibleFrequency:
|
||
|
pass
|
||
|
|
||
|
return subarr, subarr.dtype
|
||
|
|
||
|
|
||
|
def _try_convert_to_int_array(
|
||
|
data: np.ndarray, copy: bool, dtype: np.dtype
|
||
|
) -> np.ndarray:
|
||
|
"""
|
||
|
Attempt to convert an array of data into an integer array.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
data : The data to convert.
|
||
|
copy : bool
|
||
|
Whether to copy the data or not.
|
||
|
dtype : np.dtype
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
int_array : data converted to either an ndarray[int64] or ndarray[uint64]
|
||
|
|
||
|
Raises
|
||
|
------
|
||
|
ValueError if the conversion was not successful.
|
||
|
"""
|
||
|
if not is_unsigned_integer_dtype(dtype):
|
||
|
# skip int64 conversion attempt if uint-like dtype is passed, as
|
||
|
# this could return Int64Index when UInt64Index is what's desired
|
||
|
try:
|
||
|
res = data.astype("i8", copy=False)
|
||
|
if (res == data).all():
|
||
|
return res # TODO: might still need to copy
|
||
|
except (OverflowError, TypeError, ValueError):
|
||
|
pass
|
||
|
|
||
|
# Conversion to int64 failed (possibly due to overflow) or was skipped,
|
||
|
# so let's try now with uint64.
|
||
|
try:
|
||
|
res = data.astype("u8", copy=False)
|
||
|
if (res == data).all():
|
||
|
return res # TODO: might still need to copy
|
||
|
except (OverflowError, TypeError, ValueError):
|
||
|
pass
|
||
|
|
||
|
raise ValueError
|
||
|
|
||
|
|
||
|
def _maybe_asobject(dtype, klass, data, copy: bool, name: Label, **kwargs):
|
||
|
"""
|
||
|
If an object dtype was specified, create the non-object Index
|
||
|
and then convert it to object.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
dtype : np.dtype, ExtensionDtype, str
|
||
|
klass : Index subclass
|
||
|
data : list-like
|
||
|
copy : bool
|
||
|
name : hashable
|
||
|
**kwargs
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Index
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
We assume that calling .astype(object) on this klass will make a copy.
|
||
|
"""
|
||
|
|
||
|
# GH#23524 passing `dtype=object` to DatetimeIndex is invalid,
|
||
|
# will raise in the where `data` is already tz-aware. So
|
||
|
# we leave it out of this step and cast to object-dtype after
|
||
|
# the DatetimeIndex construction.
|
||
|
|
||
|
if is_dtype_equal(_o_dtype, dtype):
|
||
|
# Note we can pass copy=False because the .astype below
|
||
|
# will always make a copy
|
||
|
index = klass(data, copy=False, name=name, **kwargs)
|
||
|
return index.astype(object)
|
||
|
|
||
|
return klass(data, dtype=dtype, copy=copy, name=name, **kwargs)
|
||
|
|
||
|
|
||
|
def get_unanimous_names(*indexes: Index) -> Tuple[Label, ...]:
|
||
|
"""
|
||
|
Return common name if all indices agree, otherwise None (level-by-level).
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
indexes : list of Index objects
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
list
|
||
|
A list representing the unanimous 'names' found.
|
||
|
"""
|
||
|
name_tups = [tuple(i.names) for i in indexes]
|
||
|
name_sets = [{*ns} for ns in zip_longest(*name_tups)]
|
||
|
names = tuple(ns.pop() if len(ns) == 1 else None for ns in name_sets)
|
||
|
return names
|
||
|
|
||
|
|
||
|
def unpack_nested_dtype(other: Index) -> Index:
|
||
|
"""
|
||
|
When checking if our dtype is comparable with another, we need
|
||
|
to unpack CategoricalDtype to look at its categories.dtype.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
other : Index
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Index
|
||
|
"""
|
||
|
dtype = other.dtype
|
||
|
if is_categorical_dtype(dtype):
|
||
|
# If there is ever a SparseIndex, this could get dispatched
|
||
|
# here too.
|
||
|
return dtype.categories
|
||
|
return other
|