projektAI/venv/Lib/site-packages/pandas/core/ops/__init__.py
2021-06-06 22:13:05 +02:00

457 lines
14 KiB
Python

"""
Arithmetic operations for PandasObjects
This is not a public API.
"""
import operator
from typing import TYPE_CHECKING, Optional, Set
import warnings
import numpy as np
from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op # noqa:F401
from pandas._typing import Level
from pandas.util._decorators import Appender
from pandas.core.dtypes.common import is_array_like, is_list_like
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
from pandas.core.dtypes.missing import isna
from pandas.core import algorithms
from pandas.core.ops.array_ops import ( # noqa:F401
arithmetic_op,
comp_method_OBJECT_ARRAY,
comparison_op,
get_array_op,
logical_op,
)
from pandas.core.ops.common import ( # noqa:F401
get_op_result_name,
unpack_zerodim_and_defer,
)
from pandas.core.ops.docstrings import (
_flex_comp_doc_FRAME,
_op_descriptions,
make_flex_doc,
)
from pandas.core.ops.invalid import invalid_comparison # noqa:F401
from pandas.core.ops.mask_ops import kleene_and, kleene_or, kleene_xor # noqa: F401
from pandas.core.ops.methods import add_flex_arithmetic_methods # noqa:F401
from pandas.core.ops.roperator import ( # noqa:F401
radd,
rand_,
rdiv,
rdivmod,
rfloordiv,
rmod,
rmul,
ror_,
rpow,
rsub,
rtruediv,
rxor,
)
if TYPE_CHECKING:
from pandas import DataFrame, Series
# -----------------------------------------------------------------------------
# constants
ARITHMETIC_BINOPS: Set[str] = {
"add",
"sub",
"mul",
"pow",
"mod",
"floordiv",
"truediv",
"divmod",
"radd",
"rsub",
"rmul",
"rpow",
"rmod",
"rfloordiv",
"rtruediv",
"rdivmod",
}
COMPARISON_BINOPS: Set[str] = {"eq", "ne", "lt", "gt", "le", "ge"}
# -----------------------------------------------------------------------------
# Masking NA values and fallbacks for operations numpy does not support
def fill_binop(left, right, fill_value):
"""
If a non-None fill_value is given, replace null entries in left and right
with this value, but only in positions where _one_ of left/right is null,
not both.
Parameters
----------
left : array-like
right : array-like
fill_value : object
Returns
-------
left : array-like
right : array-like
Notes
-----
Makes copies if fill_value is not None and NAs are present.
"""
if fill_value is not None:
left_mask = isna(left)
right_mask = isna(right)
# one but not both
mask = left_mask ^ right_mask
if left_mask.any():
# Avoid making a copy if we can
left = left.copy()
left[left_mask & mask] = fill_value
if right_mask.any():
# Avoid making a copy if we can
right = right.copy()
right[right_mask & mask] = fill_value
return left, right
# -----------------------------------------------------------------------------
# Series
def align_method_SERIES(left: "Series", right, align_asobject: bool = False):
""" align lhs and rhs Series """
# ToDo: Different from align_method_FRAME, list, tuple and ndarray
# are not coerced here
# because Series has inconsistencies described in #13637
if isinstance(right, ABCSeries):
# avoid repeated alignment
if not left.index.equals(right.index):
if align_asobject:
# to keep original value's dtype for bool ops
left = left.astype(object)
right = right.astype(object)
left, right = left.align(right, copy=False)
return left, right
def flex_method_SERIES(op):
name = op.__name__.strip("_")
doc = make_flex_doc(name, "series")
@Appender(doc)
def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
# validate axis
if axis is not None:
self._get_axis_number(axis)
res_name = get_op_result_name(self, other)
if isinstance(other, ABCSeries):
return self._binop(other, op, level=level, fill_value=fill_value)
elif isinstance(other, (np.ndarray, list, tuple)):
if len(other) != len(self):
raise ValueError("Lengths must be equal")
other = self._constructor(other, self.index)
result = self._binop(other, op, level=level, fill_value=fill_value)
result.name = res_name
return result
else:
if fill_value is not None:
self = self.fillna(fill_value)
return op(self, other)
flex_wrapper.__name__ = name
return flex_wrapper
# -----------------------------------------------------------------------------
# DataFrame
def align_method_FRAME(
left, right, axis, flex: Optional[bool] = False, level: Level = None
):
"""
Convert rhs to meet lhs dims if input is list, tuple or np.ndarray.
Parameters
----------
left : DataFrame
right : Any
axis: int, str, or None
flex: bool or None, default False
Whether this is a flex op, in which case we reindex.
None indicates not to check for alignment.
level : int or level name, default None
Returns
-------
left : DataFrame
right : Any
"""
def to_series(right):
msg = "Unable to coerce to Series, length must be {req_len}: given {given_len}"
if axis is not None and left._get_axis_name(axis) == "index":
if len(left.index) != len(right):
raise ValueError(
msg.format(req_len=len(left.index), given_len=len(right))
)
right = left._constructor_sliced(right, index=left.index)
else:
if len(left.columns) != len(right):
raise ValueError(
msg.format(req_len=len(left.columns), given_len=len(right))
)
right = left._constructor_sliced(right, index=left.columns)
return right
if isinstance(right, np.ndarray):
if right.ndim == 1:
right = to_series(right)
elif right.ndim == 2:
if right.shape == left.shape:
right = left._constructor(right, index=left.index, columns=left.columns)
elif right.shape[0] == left.shape[0] and right.shape[1] == 1:
# Broadcast across columns
right = np.broadcast_to(right, left.shape)
right = left._constructor(right, index=left.index, columns=left.columns)
elif right.shape[1] == left.shape[1] and right.shape[0] == 1:
# Broadcast along rows
right = to_series(right[0, :])
else:
raise ValueError(
"Unable to coerce to DataFrame, shape "
f"must be {left.shape}: given {right.shape}"
)
elif right.ndim > 2:
raise ValueError(
"Unable to coerce to Series/DataFrame, "
f"dimension must be <= 2: {right.shape}"
)
elif is_list_like(right) and not isinstance(right, (ABCSeries, ABCDataFrame)):
# GH 36702. Raise when attempting arithmetic with list of array-like.
if any(is_array_like(el) for el in right):
raise ValueError(
f"Unable to coerce list of {type(right[0])} to Series/DataFrame"
)
# GH17901
right = to_series(right)
if flex is not None and isinstance(right, ABCDataFrame):
if not left._indexed_same(right):
if flex:
left, right = left.align(right, join="outer", level=level, copy=False)
else:
raise ValueError(
"Can only compare identically-labeled DataFrame objects"
)
elif isinstance(right, ABCSeries):
# axis=1 is default for DataFrame-with-Series op
axis = left._get_axis_number(axis) if axis is not None else 1
if not flex:
if not left.axes[axis].equals(right.index):
warnings.warn(
"Automatic reindexing on DataFrame vs Series comparisons "
"is deprecated and will raise ValueError in a future version. "
"Do `left, right = left.align(right, axis=1, copy=False)` "
"before e.g. `left == right`",
FutureWarning,
stacklevel=5,
)
left, right = left.align(
right, join="outer", axis=axis, level=level, copy=False
)
right = _maybe_align_series_as_frame(left, right, axis)
return left, right
def should_reindex_frame_op(
left: "DataFrame", right, op, axis, default_axis, fill_value, level
) -> bool:
"""
Check if this is an operation between DataFrames that will need to reindex.
"""
assert isinstance(left, ABCDataFrame)
if op is operator.pow or op is rpow:
# GH#32685 pow has special semantics for operating with null values
return False
if not isinstance(right, ABCDataFrame):
return False
if fill_value is None and level is None and axis is default_axis:
# TODO: any other cases we should handle here?
# Intersection is always unique so we have to check the unique columns
left_uniques = left.columns.unique()
right_uniques = right.columns.unique()
cols = left_uniques.intersection(right_uniques)
if len(cols) and not (cols.equals(left_uniques) and cols.equals(right_uniques)):
# TODO: is there a shortcut available when len(cols) == 0?
return True
return False
def frame_arith_method_with_reindex(
left: "DataFrame", right: "DataFrame", op
) -> "DataFrame":
"""
For DataFrame-with-DataFrame operations that require reindexing,
operate only on shared columns, then reindex.
Parameters
----------
left : DataFrame
right : DataFrame
op : binary operator
Returns
-------
DataFrame
"""
# GH#31623, only operate on shared columns
cols, lcols, rcols = left.columns.join(
right.columns, how="inner", level=None, return_indexers=True
)
new_left = left.iloc[:, lcols]
new_right = right.iloc[:, rcols]
result = op(new_left, new_right)
# Do the join on the columns instead of using align_method_FRAME
# to avoid constructing two potentially large/sparse DataFrames
join_columns, _, _ = left.columns.join(
right.columns, how="outer", level=None, return_indexers=True
)
if result.columns.has_duplicates:
# Avoid reindexing with a duplicate axis.
# https://github.com/pandas-dev/pandas/issues/35194
indexer, _ = result.columns.get_indexer_non_unique(join_columns)
indexer = algorithms.unique1d(indexer)
result = result._reindex_with_indexers(
{1: [join_columns, indexer]}, allow_dups=True
)
else:
result = result.reindex(join_columns, axis=1)
return result
def _maybe_align_series_as_frame(frame: "DataFrame", series: "Series", axis: int):
"""
If the Series operand is not EA-dtype, we can broadcast to 2D and operate
blockwise.
"""
rvalues = series._values
if not isinstance(rvalues, np.ndarray):
# TODO(EA2D): no need to special-case with 2D EAs
if rvalues.dtype == "datetime64[ns]" or rvalues.dtype == "timedelta64[ns]":
# We can losslessly+cheaply cast to ndarray
rvalues = np.asarray(rvalues)
else:
return series
if axis == 0:
rvalues = rvalues.reshape(-1, 1)
else:
rvalues = rvalues.reshape(1, -1)
rvalues = np.broadcast_to(rvalues, frame.shape)
return type(frame)(rvalues, index=frame.index, columns=frame.columns)
def flex_arith_method_FRAME(op):
op_name = op.__name__.strip("_")
default_axis = "columns"
na_op = get_array_op(op)
doc = make_flex_doc(op_name, "dataframe")
@Appender(doc)
def f(self, other, axis=default_axis, level=None, fill_value=None):
if should_reindex_frame_op(
self, other, op, axis, default_axis, fill_value, level
):
return frame_arith_method_with_reindex(self, other, op)
if isinstance(other, ABCSeries) and fill_value is not None:
# TODO: We could allow this in cases where we end up going
# through the DataFrame path
raise NotImplementedError(f"fill_value {fill_value} not supported.")
axis = self._get_axis_number(axis) if axis is not None else 1
self, other = align_method_FRAME(self, other, axis, flex=True, level=level)
if isinstance(other, ABCDataFrame):
# Another DataFrame
new_data = self._combine_frame(other, na_op, fill_value)
elif isinstance(other, ABCSeries):
new_data = self._dispatch_frame_op(other, op, axis=axis)
else:
# in this case we always have `np.ndim(other) == 0`
if fill_value is not None:
self = self.fillna(fill_value)
new_data = self._dispatch_frame_op(other, op)
return self._construct_result(new_data)
f.__name__ = op_name
return f
def flex_comp_method_FRAME(op):
op_name = op.__name__.strip("_")
default_axis = "columns" # because we are "flex"
doc = _flex_comp_doc_FRAME.format(
op_name=op_name, desc=_op_descriptions[op_name]["desc"]
)
@Appender(doc)
def f(self, other, axis=default_axis, level=None):
axis = self._get_axis_number(axis) if axis is not None else 1
self, other = align_method_FRAME(self, other, axis, flex=True, level=level)
new_data = self._dispatch_frame_op(other, op, axis=axis)
return self._construct_result(new_data)
f.__name__ = op_name
return f