""" Methods that can be shared by many array-like classes or subclasses: Series Index ExtensionArray """ from __future__ import annotations import operator from typing import Any import numpy as np from pandas._libs import lib from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op from pandas.core.dtypes.generic import ABCNDFrame from pandas.core import roperator from pandas.core.construction import extract_array from pandas.core.ops.common import unpack_zerodim_and_defer REDUCTION_ALIASES = { "maximum": "max", "minimum": "min", "add": "sum", "multiply": "prod", } class OpsMixin: # ------------------------------------------------------------- # Comparisons def _cmp_method(self, other, op): return NotImplemented @unpack_zerodim_and_defer("__eq__") def __eq__(self, other): return self._cmp_method(other, operator.eq) @unpack_zerodim_and_defer("__ne__") def __ne__(self, other): return self._cmp_method(other, operator.ne) @unpack_zerodim_and_defer("__lt__") def __lt__(self, other): return self._cmp_method(other, operator.lt) @unpack_zerodim_and_defer("__le__") def __le__(self, other): return self._cmp_method(other, operator.le) @unpack_zerodim_and_defer("__gt__") def __gt__(self, other): return self._cmp_method(other, operator.gt) @unpack_zerodim_and_defer("__ge__") def __ge__(self, other): return self._cmp_method(other, operator.ge) # ------------------------------------------------------------- # Logical Methods def _logical_method(self, other, op): return NotImplemented @unpack_zerodim_and_defer("__and__") def __and__(self, other): return self._logical_method(other, operator.and_) @unpack_zerodim_and_defer("__rand__") def __rand__(self, other): return self._logical_method(other, roperator.rand_) @unpack_zerodim_and_defer("__or__") def __or__(self, other): return self._logical_method(other, operator.or_) @unpack_zerodim_and_defer("__ror__") def __ror__(self, other): return self._logical_method(other, roperator.ror_) @unpack_zerodim_and_defer("__xor__") def __xor__(self, other): return self._logical_method(other, operator.xor) @unpack_zerodim_and_defer("__rxor__") def __rxor__(self, other): return self._logical_method(other, roperator.rxor) # ------------------------------------------------------------- # Arithmetic Methods def _arith_method(self, other, op): return NotImplemented @unpack_zerodim_and_defer("__add__") def __add__(self, other): """ Get Addition of DataFrame and other, column-wise. Equivalent to ``DataFrame.add(other)``. Parameters ---------- other : scalar, sequence, Series, dict or DataFrame Object to be added to the DataFrame. Returns ------- DataFrame The result of adding ``other`` to DataFrame. See Also -------- DataFrame.add : Add a DataFrame and another object, with option for index- or column-oriented addition. Examples -------- >>> df = pd.DataFrame({'height': [1.5, 2.6], 'weight': [500, 800]}, ... index=['elk', 'moose']) >>> df height weight elk 1.5 500 moose 2.6 800 Adding a scalar affects all rows and columns. >>> df[['height', 'weight']] + 1.5 height weight elk 3.0 501.5 moose 4.1 801.5 Each element of a list is added to a column of the DataFrame, in order. >>> df[['height', 'weight']] + [0.5, 1.5] height weight elk 2.0 501.5 moose 3.1 801.5 Keys of a dictionary are aligned to the DataFrame, based on column names; each value in the dictionary is added to the corresponding column. >>> df[['height', 'weight']] + {'height': 0.5, 'weight': 1.5} height weight elk 2.0 501.5 moose 3.1 801.5 When `other` is a :class:`Series`, the index of `other` is aligned with the columns of the DataFrame. >>> s1 = pd.Series([0.5, 1.5], index=['weight', 'height']) >>> df[['height', 'weight']] + s1 height weight elk 3.0 500.5 moose 4.1 800.5 Even when the index of `other` is the same as the index of the DataFrame, the :class:`Series` will not be reoriented. If index-wise alignment is desired, :meth:`DataFrame.add` should be used with `axis='index'`. >>> s2 = pd.Series([0.5, 1.5], index=['elk', 'moose']) >>> df[['height', 'weight']] + s2 elk height moose weight elk NaN NaN NaN NaN moose NaN NaN NaN NaN >>> df[['height', 'weight']].add(s2, axis='index') height weight elk 2.0 500.5 moose 4.1 801.5 When `other` is a :class:`DataFrame`, both columns names and the index are aligned. >>> other = pd.DataFrame({'height': [0.2, 0.4, 0.6]}, ... index=['elk', 'moose', 'deer']) >>> df[['height', 'weight']] + other height weight deer NaN NaN elk 1.7 NaN moose 3.0 NaN """ return self._arith_method(other, operator.add) @unpack_zerodim_and_defer("__radd__") def __radd__(self, other): return self._arith_method(other, roperator.radd) @unpack_zerodim_and_defer("__sub__") def __sub__(self, other): return self._arith_method(other, operator.sub) @unpack_zerodim_and_defer("__rsub__") def __rsub__(self, other): return self._arith_method(other, roperator.rsub) @unpack_zerodim_and_defer("__mul__") def __mul__(self, other): return self._arith_method(other, operator.mul) @unpack_zerodim_and_defer("__rmul__") def __rmul__(self, other): return self._arith_method(other, roperator.rmul) @unpack_zerodim_and_defer("__truediv__") def __truediv__(self, other): return self._arith_method(other, operator.truediv) @unpack_zerodim_and_defer("__rtruediv__") def __rtruediv__(self, other): return self._arith_method(other, roperator.rtruediv) @unpack_zerodim_and_defer("__floordiv__") def __floordiv__(self, other): return self._arith_method(other, operator.floordiv) @unpack_zerodim_and_defer("__rfloordiv") def __rfloordiv__(self, other): return self._arith_method(other, roperator.rfloordiv) @unpack_zerodim_and_defer("__mod__") def __mod__(self, other): return self._arith_method(other, operator.mod) @unpack_zerodim_and_defer("__rmod__") def __rmod__(self, other): return self._arith_method(other, roperator.rmod) @unpack_zerodim_and_defer("__divmod__") def __divmod__(self, other): return self._arith_method(other, divmod) @unpack_zerodim_and_defer("__rdivmod__") def __rdivmod__(self, other): return self._arith_method(other, roperator.rdivmod) @unpack_zerodim_and_defer("__pow__") def __pow__(self, other): return self._arith_method(other, operator.pow) @unpack_zerodim_and_defer("__rpow__") def __rpow__(self, other): return self._arith_method(other, roperator.rpow) # ----------------------------------------------------------------------------- # Helpers to implement __array_ufunc__ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any): """ Compatibility with numpy ufuncs. See also -------- numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__ """ from pandas.core.frame import ( DataFrame, Series, ) from pandas.core.generic import NDFrame from pandas.core.internals import BlockManager cls = type(self) kwargs = _standardize_out_kwarg(**kwargs) # for binary ops, use our custom dunder methods result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs) if result is not NotImplemented: return result # Determine if we should defer. no_defer = ( np.ndarray.__array_ufunc__, cls.__array_ufunc__, ) for item in inputs: higher_priority = ( hasattr(item, "__array_priority__") and item.__array_priority__ > self.__array_priority__ ) has_array_ufunc = ( hasattr(item, "__array_ufunc__") and type(item).__array_ufunc__ not in no_defer and not isinstance(item, self._HANDLED_TYPES) ) if higher_priority or has_array_ufunc: return NotImplemented # align all the inputs. types = tuple(type(x) for x in inputs) alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)] if len(alignable) > 1: # This triggers alignment. # At the moment, there aren't any ufuncs with more than two inputs # so this ends up just being x1.index | x2.index, but we write # it to handle *args. set_types = set(types) if len(set_types) > 1 and {DataFrame, Series}.issubset(set_types): # We currently don't handle ufunc(DataFrame, Series) # well. Previously this raised an internal ValueError. We might # support it someday, so raise a NotImplementedError. raise NotImplementedError( f"Cannot apply ufunc {ufunc} to mixed DataFrame and Series inputs." ) axes = self.axes for obj in alignable[1:]: # this relies on the fact that we aren't handling mixed # series / frame ufuncs. for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)): axes[i] = ax1.union(ax2) reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes)) inputs = tuple( x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x for x, t in zip(inputs, types) ) else: reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes)) if self.ndim == 1: names = [getattr(x, "name") for x in inputs if hasattr(x, "name")] name = names[0] if len(set(names)) == 1 else None reconstruct_kwargs = {"name": name} else: reconstruct_kwargs = {} def reconstruct(result): if ufunc.nout > 1: # np.modf, np.frexp, np.divmod return tuple(_reconstruct(x) for x in result) return _reconstruct(result) def _reconstruct(result): if lib.is_scalar(result): return result if result.ndim != self.ndim: if method == "outer": raise NotImplementedError return result if isinstance(result, BlockManager): # we went through BlockManager.apply e.g. np.sqrt result = self._constructor(result, **reconstruct_kwargs, copy=False) else: # we converted an array, lost our axes result = self._constructor( result, **reconstruct_axes, **reconstruct_kwargs, copy=False ) # TODO: When we support multiple values in __finalize__, this # should pass alignable to `__finalize__` instead of self. # Then `np.add(a, b)` would consider attrs from both a and b # when a and b are NDFrames. if len(alignable) == 1: result = result.__finalize__(self) return result if "out" in kwargs: # e.g. test_multiindex_get_loc result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs) return reconstruct(result) if method == "reduce": # e.g. test.series.test_ufunc.test_reduce result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs) if result is not NotImplemented: return result # We still get here with kwargs `axis` for e.g. np.maximum.accumulate # and `dtype` and `keepdims` for np.ptp if self.ndim > 1 and (len(inputs) > 1 or ufunc.nout > 1): # Just give up on preserving types in the complex case. # In theory we could preserve them for them. # * nout>1 is doable if BlockManager.apply took nout and # returned a Tuple[BlockManager]. # * len(inputs) > 1 is doable when we know that we have # aligned blocks / dtypes. # e.g. my_ufunc, modf, logaddexp, heaviside, subtract, add inputs = tuple(np.asarray(x) for x in inputs) # Note: we can't use default_array_ufunc here bc reindexing means # that `self` may not be among `inputs` result = getattr(ufunc, method)(*inputs, **kwargs) elif self.ndim == 1: # ufunc(series, ...) inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs) result = getattr(ufunc, method)(*inputs, **kwargs) else: # ufunc(dataframe) if method == "__call__" and not kwargs: # for np.(..) calls # kwargs cannot necessarily be handled block-by-block, so only # take this path if there are no kwargs mgr = inputs[0]._mgr result = mgr.apply(getattr(ufunc, method)) else: # otherwise specific ufunc methods (eg np..accumulate(..)) # Those can have an axis keyword and thus can't be called block-by-block result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs) # e.g. np.negative (only one reached), with "where" and "out" in kwargs result = reconstruct(result) return result def _standardize_out_kwarg(**kwargs) -> dict: """ If kwargs contain "out1" and "out2", replace that with a tuple "out" np.divmod, np.modf, np.frexp can have either `out=(out1, out2)` or `out1=out1, out2=out2)` """ if "out" not in kwargs and "out1" in kwargs and "out2" in kwargs: out1 = kwargs.pop("out1") out2 = kwargs.pop("out2") out = (out1, out2) kwargs["out"] = out return kwargs def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): """ If we have an `out` keyword, then call the ufunc without `out` and then set the result into the given `out`. """ # Note: we assume _standardize_out_kwarg has already been called. out = kwargs.pop("out") where = kwargs.pop("where", None) result = getattr(ufunc, method)(*inputs, **kwargs) if result is NotImplemented: return NotImplemented if isinstance(result, tuple): # i.e. np.divmod, np.modf, np.frexp if not isinstance(out, tuple) or len(out) != len(result): raise NotImplementedError for arr, res in zip(out, result): _assign_where(arr, res, where) return out if isinstance(out, tuple): if len(out) == 1: out = out[0] else: raise NotImplementedError _assign_where(out, result, where) return out def _assign_where(out, result, where) -> None: """ Set a ufunc result into 'out', masking with a 'where' argument if necessary. """ if where is None: # no 'where' arg passed to ufunc out[:] = result else: np.putmask(out, where, result) def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): """ Fallback to the behavior we would get if we did not define __array_ufunc__. Notes ----- We are assuming that `self` is among `inputs`. """ if not any(x is self for x in inputs): raise NotImplementedError new_inputs = [x if x is not self else np.asarray(x) for x in inputs] return getattr(ufunc, method)(*new_inputs, **kwargs) def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): """ Dispatch ufunc reductions to self's reduction methods. """ assert method == "reduce" if len(inputs) != 1 or inputs[0] is not self: return NotImplemented if ufunc.__name__ not in REDUCTION_ALIASES: return NotImplemented method_name = REDUCTION_ALIASES[ufunc.__name__] # NB: we are assuming that min/max represent minimum/maximum methods, # which would not be accurate for e.g. Timestamp.min if not hasattr(self, method_name): return NotImplemented if self.ndim > 1: if isinstance(self, ABCNDFrame): # TODO: test cases where this doesn't hold, i.e. 2D DTA/TDA kwargs["numeric_only"] = False if "axis" not in kwargs: # For DataFrame reductions we don't want the default axis=0 # Note: np.min is not a ufunc, but uses array_function_dispatch, # so calls DataFrame.min (without ever getting here) with the np.min # default of axis=None, which DataFrame.min catches and changes to axis=0. # np.minimum.reduce(df) gets here bc axis is not in kwargs, # so we set axis=0 to match the behaviorof np.minimum.reduce(df.values) kwargs["axis"] = 0 # By default, numpy's reductions do not skip NaNs, so we have to # pass skipna=False return getattr(self, method_name)(skipna=False, **kwargs)