projektAI/venv/Lib/site-packages/pandas/core/internals/blocks.py
2021-06-06 22:13:05 +02:00

2894 lines
92 KiB
Python

from datetime import datetime, timedelta
import inspect
import re
from typing import TYPE_CHECKING, Any, List, Optional, Type, Union, cast
import warnings
import numpy as np
from pandas._libs import NaT, algos as libalgos, internals as libinternals, lib, writers
from pandas._libs.internals import BlockPlacement
from pandas._libs.tslibs import conversion
from pandas._libs.tslibs.timezones import tz_compare
from pandas._typing import ArrayLike, Scalar, Shape
from pandas.util._validators import validate_bool_kwarg
from pandas.core.dtypes.cast import (
astype_nansafe,
convert_scalar_for_putitemlike,
find_common_type,
infer_dtype_from,
infer_dtype_from_scalar,
maybe_box_datetimelike,
maybe_downcast_numeric,
maybe_downcast_to_dtype,
maybe_infer_dtype_type,
maybe_promote,
maybe_upcast,
soft_convert_objects,
)
from pandas.core.dtypes.common import (
DT64NS_DTYPE,
TD64NS_DTYPE,
is_bool_dtype,
is_categorical_dtype,
is_datetime64_any_dtype,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_dtype_equal,
is_extension_array_dtype,
is_float,
is_float_dtype,
is_integer,
is_integer_dtype,
is_interval_dtype,
is_list_like,
is_object_dtype,
is_period_dtype,
is_re,
is_re_compilable,
is_sparse,
is_timedelta64_dtype,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCIndexClass,
ABCPandasArray,
ABCSeries,
)
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, isna_compat
import pandas.core.algorithms as algos
from pandas.core.array_algos.replace import compare_or_regex_search, replace_regex
from pandas.core.array_algos.transforms import shift
from pandas.core.arrays import (
Categorical,
DatetimeArray,
ExtensionArray,
PandasArray,
PandasDtype,
TimedeltaArray,
)
from pandas.core.base import PandasObject
import pandas.core.common as com
from pandas.core.construction import extract_array
from pandas.core.indexers import (
check_setitem_lengths,
is_empty_indexer,
is_scalar_indexer,
)
import pandas.core.missing as missing
from pandas.core.nanops import nanpercentile
if TYPE_CHECKING:
from pandas import Index
class Block(PandasObject):
"""
Canonical n-dimensional unit of homogeneous dtype contained in a pandas
data structure
Index-ignorant; let the container take care of that
"""
values: Union[np.ndarray, ExtensionArray]
__slots__ = ["_mgr_locs", "values", "ndim"]
is_numeric = False
is_float = False
is_integer = False
is_complex = False
is_datetime = False
is_datetimetz = False
is_timedelta = False
is_bool = False
is_object = False
is_extension = False
_can_hold_na = False
_can_consolidate = True
_validate_ndim = True
@classmethod
def _simple_new(
cls, values: ArrayLike, placement: BlockPlacement, ndim: int
) -> "Block":
"""
Fastpath constructor, does *no* validation
"""
obj = object.__new__(cls)
obj.ndim = ndim
obj.values = values
obj._mgr_locs = placement
return obj
def __init__(self, values, placement, ndim: int):
"""
Parameters
----------
values : np.ndarray or ExtensionArray
placement : BlockPlacement (or castable)
ndim : int
1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame
"""
# TODO(EA2D): ndim will be unnecessary with 2D EAs
self.ndim = self._check_ndim(values, ndim)
self.mgr_locs = placement
self.values = self._maybe_coerce_values(values)
if self._validate_ndim and self.ndim and len(self.mgr_locs) != len(self.values):
raise ValueError(
f"Wrong number of items passed {len(self.values)}, "
f"placement implies {len(self.mgr_locs)}"
)
def _maybe_coerce_values(self, values):
"""
Ensure we have correctly-typed values.
Parameters
----------
values : np.ndarray, ExtensionArray, Index
Returns
-------
np.ndarray or ExtensionArray
"""
return values
def _check_ndim(self, values, ndim):
"""
ndim inference and validation.
Infers ndim from 'values' if not provided to __init__.
Validates that values.ndim and ndim are consistent if and only if
the class variable '_validate_ndim' is True.
Parameters
----------
values : array-like
ndim : int or None
Returns
-------
ndim : int
Raises
------
ValueError : the number of dimensions do not match
"""
if ndim is None:
ndim = values.ndim
if self._validate_ndim and values.ndim != ndim:
raise ValueError(
"Wrong number of dimensions. "
f"values.ndim != ndim [{values.ndim} != {ndim}]"
)
return ndim
@property
def _holder(self):
"""
The array-like that can hold the underlying values.
None for 'Block', overridden by subclasses that don't
use an ndarray.
"""
return None
@property
def _consolidate_key(self):
return self._can_consolidate, self.dtype.name
@property
def is_view(self) -> bool:
""" return a boolean if I am possibly a view """
values = self.values
values = cast(np.ndarray, values)
return values.base is not None
@property
def is_categorical(self) -> bool:
return self._holder is Categorical
@property
def is_datelike(self) -> bool:
""" return True if I am a non-datelike """
return self.is_datetime or self.is_timedelta
def external_values(self):
"""
The array that Series.values returns (public attribute).
This has some historical constraints, and is overridden in block
subclasses to return the correct array (e.g. period returns
object ndarray and datetimetz a datetime64[ns] ndarray instead of
proper extension array).
"""
return self.values
def internal_values(self):
"""
The array that Series._values returns (internal values).
"""
return self.values
def array_values(self) -> ExtensionArray:
"""
The array that Series.array returns. Always an ExtensionArray.
"""
return PandasArray(self.values)
def get_values(self, dtype=None):
"""
return an internal format, currently just the ndarray
this is often overridden to handle to_dense like operations
"""
if is_object_dtype(dtype):
return self.values.astype(object)
return self.values
def get_block_values_for_json(self) -> np.ndarray:
"""
This is used in the JSON C code.
"""
# TODO(EA2D): reshape will be unnecessary with 2D EAs
return np.asarray(self.values).reshape(self.shape)
@property
def fill_value(self):
return np.nan
@property
def mgr_locs(self):
return self._mgr_locs
@mgr_locs.setter
def mgr_locs(self, new_mgr_locs):
if not isinstance(new_mgr_locs, libinternals.BlockPlacement):
new_mgr_locs = libinternals.BlockPlacement(new_mgr_locs)
self._mgr_locs = new_mgr_locs
def make_block(self, values, placement=None) -> "Block":
"""
Create a new block, with type inference propagate any values that are
not specified
"""
if placement is None:
placement = self.mgr_locs
if self.is_extension:
values = _block_shape(values, ndim=self.ndim)
return make_block(values, placement=placement, ndim=self.ndim)
def make_block_same_class(self, values, placement=None, ndim=None):
""" Wrap given values in a block of same type as self. """
if placement is None:
placement = self.mgr_locs
if ndim is None:
ndim = self.ndim
return type(self)(values, placement=placement, ndim=ndim)
def __repr__(self) -> str:
# don't want to print out all of the items here
name = type(self).__name__
if self.ndim == 1:
result = f"{name}: {len(self)} dtype: {self.dtype}"
else:
shape = " x ".join(str(s) for s in self.shape)
result = f"{name}: {self.mgr_locs.indexer}, {shape}, dtype: {self.dtype}"
return result
def __len__(self) -> int:
return len(self.values)
def __getstate__(self):
return self.mgr_locs.indexer, self.values
def __setstate__(self, state):
self.mgr_locs = libinternals.BlockPlacement(state[0])
self.values = state[1]
self.ndim = self.values.ndim
def _slice(self, slicer):
""" return a slice of my values """
return self.values[slicer]
def getitem_block(self, slicer, new_mgr_locs=None):
"""
Perform __getitem__-like, return result as block.
As of now, only supports slices that preserve dimensionality.
"""
if new_mgr_locs is None:
axis0_slicer = slicer[0] if isinstance(slicer, tuple) else slicer
new_mgr_locs = self.mgr_locs[axis0_slicer]
elif not isinstance(new_mgr_locs, BlockPlacement):
new_mgr_locs = BlockPlacement(new_mgr_locs)
new_values = self._slice(slicer)
if self._validate_ndim and new_values.ndim != self.ndim:
raise ValueError("Only same dim slicing is allowed")
return type(self)._simple_new(new_values, new_mgr_locs, self.ndim)
@property
def shape(self):
return self.values.shape
@property
def dtype(self):
return self.values.dtype
def iget(self, i):
return self.values[i]
def set_inplace(self, locs, values):
"""
Modify block values in-place with new item value.
Notes
-----
`set` never creates a new array or new Block, whereas `setitem` _may_
create a new array and always creates a new Block.
"""
self.values[locs] = values
def delete(self, loc) -> None:
"""
Delete given loc(-s) from block in-place.
"""
self.values = np.delete(self.values, loc, 0)
self.mgr_locs = self.mgr_locs.delete(loc)
def apply(self, func, **kwargs) -> List["Block"]:
"""
apply the function to my values; return a block if we are not
one
"""
with np.errstate(all="ignore"):
result = func(self.values, **kwargs)
return self._split_op_result(result)
def reduce(self, func, ignore_failures: bool = False) -> List["Block"]:
# We will apply the function and reshape the result into a single-row
# Block with the same mgr_locs; squeezing will be done at a higher level
assert self.ndim == 2
try:
result = func(self.values)
except (TypeError, NotImplementedError):
if ignore_failures:
return []
raise
if np.ndim(result) == 0:
# TODO(EA2D): special case not needed with 2D EAs
res_values = np.array([[result]])
else:
res_values = result.reshape(-1, 1)
nb = self.make_block(res_values)
return [nb]
def _split_op_result(self, result) -> List["Block"]:
# See also: split_and_operate
if is_extension_array_dtype(result) and result.ndim > 1:
# TODO(EA2D): unnecessary with 2D EAs
# if we get a 2D ExtensionArray, we need to split it into 1D pieces
nbs = []
for i, loc in enumerate(self.mgr_locs):
vals = result[i]
block = self.make_block(values=vals, placement=[loc])
nbs.append(block)
return nbs
if not isinstance(result, Block):
result = self.make_block(result)
return [result]
def fillna(
self, value, limit=None, inplace: bool = False, downcast=None
) -> List["Block"]:
"""
fillna on the block with the value. If we fail, then convert to
ObjectBlock and try again
"""
inplace = validate_bool_kwarg(inplace, "inplace")
mask = isna(self.values)
mask = _extract_bool_array(mask)
if limit is not None:
limit = libalgos.validate_limit(None, limit=limit)
mask[mask.cumsum(self.ndim - 1) > limit] = False
if not self._can_hold_na:
if inplace:
return [self]
else:
return [self.copy()]
if self._can_hold_element(value):
nb = self if inplace else self.copy()
nb._putmask_simple(mask, value)
# TODO: should be nb._maybe_downcast?
return self._maybe_downcast([nb], downcast)
# we can't process the value, but nothing to do
if not mask.any():
return [self] if inplace else [self.copy()]
# operate column-by-column
def f(mask, val, idx):
block = self.coerce_to_target_dtype(value)
# slice out our block
if idx is not None:
# i.e. self.ndim == 2
block = block.getitem_block(slice(idx, idx + 1))
return block.fillna(value, limit=limit, inplace=inplace, downcast=None)
return self.split_and_operate(None, f, inplace)
def _split(self) -> List["Block"]:
"""
Split a block into a list of single-column blocks.
"""
assert self.ndim == 2
new_blocks = []
for i, ref_loc in enumerate(self.mgr_locs):
vals = self.values[slice(i, i + 1)]
nb = self.make_block(vals, [ref_loc])
new_blocks.append(nb)
return new_blocks
def split_and_operate(
self, mask, f, inplace: bool, ignore_failures: bool = False
) -> List["Block"]:
"""
split the block per-column, and apply the callable f
per-column, return a new block for each. Handle
masking which will not change a block unless needed.
Parameters
----------
mask : 2-d boolean mask
f : callable accepting (1d-mask, 1d values, indexer)
inplace : bool
ignore_failures : bool, default False
Returns
-------
list of blocks
"""
if mask is None:
mask = np.broadcast_to(True, shape=self.shape)
new_values = self.values
def make_a_block(nv, ref_loc):
if isinstance(nv, list):
assert len(nv) == 1, nv
assert isinstance(nv[0], Block)
block = nv[0]
else:
# Put back the dimension that was taken from it and make
# a block out of the result.
nv = _block_shape(nv, ndim=self.ndim)
block = self.make_block(values=nv, placement=ref_loc)
return block
# ndim == 1
if self.ndim == 1:
if mask.any():
nv = f(mask, new_values, None)
else:
nv = new_values if inplace else new_values.copy()
block = make_a_block(nv, self.mgr_locs)
return [block]
# ndim > 1
new_blocks = []
for i, ref_loc in enumerate(self.mgr_locs):
m = mask[i]
v = new_values[i]
# need a new block
if m.any() or m.size == 0:
# Apply our function; we may ignore_failures if this is a
# reduction that is dropping nuisance columns GH#37827
try:
nv = f(m, v, i)
except TypeError:
if ignore_failures:
continue
else:
raise
else:
nv = v if inplace else v.copy()
block = make_a_block(nv, [ref_loc])
new_blocks.append(block)
return new_blocks
def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]:
# no need to downcast our float
# unless indicated
if downcast is None and (self.is_float or self.is_datelike):
return blocks
return extend_blocks([b.downcast(downcast) for b in blocks])
def downcast(self, dtypes=None) -> List["Block"]:
""" try to downcast each item to the dict of dtypes if present """
# turn it off completely
if dtypes is False:
return [self]
values = self.values
if self.ndim == 1:
# try to cast all non-floats here
if dtypes is None:
dtypes = "infer"
nv = maybe_downcast_to_dtype(values, dtypes)
return [self.make_block(nv)]
# ndim > 1
if dtypes is None:
return [self]
if not (dtypes == "infer" or isinstance(dtypes, dict)):
raise ValueError(
"downcast must have a dictionary or 'infer' as its argument"
)
elif dtypes != "infer":
raise AssertionError("dtypes as dict is not supported yet")
# operate column-by-column
# this is expensive as it splits the blocks items-by-item
def f(mask, val, idx):
val = maybe_downcast_to_dtype(val, dtype="infer")
return val
return self.split_and_operate(None, f, False)
def astype(self, dtype, copy: bool = False, errors: str = "raise"):
"""
Coerce to the new dtype.
Parameters
----------
dtype : str, dtype convertible
copy : bool, default False
copy if indicated
errors : str, {'raise', 'ignore'}, default 'ignore'
- ``raise`` : allow exceptions to be raised
- ``ignore`` : suppress exceptions. On error return original object
Returns
-------
Block
"""
errors_legal_values = ("raise", "ignore")
if errors not in errors_legal_values:
invalid_arg = (
"Expected value of kwarg 'errors' to be one of "
f"{list(errors_legal_values)}. Supplied value is '{errors}'"
)
raise ValueError(invalid_arg)
if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype):
msg = (
f"Expected an instance of {dtype.__name__}, "
"but got the class instead. Try instantiating 'dtype'."
)
raise TypeError(msg)
if dtype is not None:
dtype = pandas_dtype(dtype)
# may need to convert to categorical
if is_categorical_dtype(dtype):
if is_categorical_dtype(self.values.dtype):
# GH 10696/18593: update an existing categorical efficiently
return self.make_block(self.values.astype(dtype, copy=copy))
return self.make_block(Categorical(self.values, dtype=dtype))
dtype = pandas_dtype(dtype)
# astype processing
if is_dtype_equal(self.dtype, dtype):
if copy:
return self.copy()
return self
# force the copy here
if self.is_extension:
try:
values = self.values.astype(dtype)
except (ValueError, TypeError):
if errors == "ignore":
values = self.values
else:
raise
else:
if issubclass(dtype.type, str):
# use native type formatting for datetime/tz/timedelta
if self.is_datelike:
values = self.to_native_types().values
# astype formatting
else:
# Because we have neither is_extension nor is_datelike,
# self.values already has the correct shape
values = self.values
else:
values = self.get_values(dtype=dtype)
# _astype_nansafe works fine with 1-d only
vals1d = values.ravel()
try:
values = astype_nansafe(vals1d, dtype, copy=True)
except (ValueError, TypeError):
# e.g. astype_nansafe can fail on object-dtype of strings
# trying to convert to float
if errors == "raise":
raise
newb = self.copy() if copy else self
return newb
# TODO(EA2D): special case not needed with 2D EAs
if isinstance(values, np.ndarray):
values = values.reshape(self.shape)
newb = self.make_block(values)
if newb.is_numeric and self.is_numeric:
if newb.shape != self.shape:
raise TypeError(
f"cannot set astype for copy = [{copy}] for dtype "
f"({self.dtype.name} [{self.shape}]) to different shape "
f"({newb.dtype.name} [{newb.shape}])"
)
return newb
def convert(
self,
copy: bool = True,
datetime: bool = True,
numeric: bool = True,
timedelta: bool = True,
) -> List["Block"]:
"""
attempt to coerce any object types to better types return a copy
of the block (if copy = True) by definition we are not an ObjectBlock
here!
"""
return [self.copy()] if copy else [self]
def _can_hold_element(self, element: Any) -> bool:
""" require the same dtype as ourselves """
dtype = self.values.dtype.type
tipo = maybe_infer_dtype_type(element)
if tipo is not None:
return issubclass(tipo.type, dtype)
return isinstance(element, dtype)
def should_store(self, value: ArrayLike) -> bool:
"""
Should we set self.values[indexer] = value inplace or do we need to cast?
Parameters
----------
value : np.ndarray or ExtensionArray
Returns
-------
bool
"""
return is_dtype_equal(value.dtype, self.dtype)
def to_native_types(self, na_rep="nan", quoting=None, **kwargs):
""" convert to our native types format """
values = self.values
mask = isna(values)
itemsize = writers.word_len(na_rep)
if not self.is_object and not quoting and itemsize:
values = values.astype(str)
if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize:
# enlarge for the na_rep
values = values.astype(f"<U{itemsize}")
else:
values = np.array(values, dtype="object")
values[mask] = na_rep
return self.make_block(values)
# block actions #
def copy(self, deep: bool = True):
""" copy constructor """
values = self.values
if deep:
values = values.copy()
return self.make_block_same_class(values, ndim=self.ndim)
def replace(
self,
to_replace,
value,
inplace: bool = False,
regex: bool = False,
) -> List["Block"]:
"""
replace the to_replace value with value, possible to create new
blocks here this is just a call to putmask. regex is not used here.
It is used in ObjectBlocks. It is here for API compatibility.
"""
inplace = validate_bool_kwarg(inplace, "inplace")
original_to_replace = to_replace
if not self._can_hold_element(to_replace):
# We cannot hold `to_replace`, so we know immediately that
# replacing it is a no-op.
# Note: If to_replace were a list, NDFrame.replace would call
# replace_list instead of replace.
return [self] if inplace else [self.copy()]
values = self.values
if lib.is_scalar(to_replace) and isinstance(values, np.ndarray):
# The only non-DatetimeLike class that also has a non-trivial
# try_coerce_args is ObjectBlock, but that overrides replace,
# so does not get here.
to_replace = convert_scalar_for_putitemlike(to_replace, values.dtype)
mask = missing.mask_missing(values, to_replace)
if not mask.any():
# Note: we get here with test_replace_extension_other incorrectly
# bc _can_hold_element is incorrect.
return [self] if inplace else [self.copy()]
if not self._can_hold_element(value):
blk = self.astype(object)
return blk.replace(
to_replace=original_to_replace,
value=value,
inplace=True,
regex=regex,
)
blk = self if inplace else self.copy()
blk._putmask_simple(mask, value)
blocks = blk.convert(numeric=False, copy=not inplace)
return blocks
def _replace_regex(
self,
to_replace,
value,
inplace: bool = False,
convert: bool = True,
mask=None,
) -> List["Block"]:
"""
Replace elements by the given value.
Parameters
----------
to_replace : object or pattern
Scalar to replace or regular expression to match.
value : object
Replacement object.
inplace : bool, default False
Perform inplace modification.
convert : bool, default True
If true, try to coerce any object types to better types.
mask : array-like of bool, optional
True indicate corresponding element is ignored.
Returns
-------
List[Block]
"""
if not self._can_hold_element(to_replace):
# i.e. only ObjectBlock, but could in principle include a
# String ExtensionBlock
return [self] if inplace else [self.copy()]
rx = re.compile(to_replace)
new_values = self.values if inplace else self.values.copy()
replace_regex(new_values, rx, value, mask)
block = self.make_block(new_values)
if convert:
nbs = block.convert(numeric=False)
else:
nbs = [block]
return nbs
def _replace_list(
self,
src_list: List[Any],
dest_list: List[Any],
inplace: bool = False,
regex: bool = False,
) -> List["Block"]:
"""
See BlockManager._replace_list docstring.
"""
# Exclude anything that we know we won't contain
pairs = [
(x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x)
]
if not len(pairs):
# shortcut, nothing to replace
return [self] if inplace else [self.copy()]
src_len = len(pairs) - 1
def comp(s: Scalar, mask: np.ndarray, regex: bool = False) -> np.ndarray:
"""
Generate a bool array by perform an equality check, or perform
an element-wise regular expression matching
"""
if isna(s):
return ~mask
s = maybe_box_datetimelike(s)
return compare_or_regex_search(self.values, s, regex, mask)
if self.is_object:
# Calculate the mask once, prior to the call of comp
# in order to avoid repeating the same computations
mask = ~isna(self.values)
masks = [comp(s[0], mask, regex) for s in pairs]
else:
# GH#38086 faster if we know we dont need to check for regex
masks = [missing.mask_missing(self.values, s[0]) for s in pairs]
masks = [_extract_bool_array(x) for x in masks]
rb = [self if inplace else self.copy()]
for i, (src, dest) in enumerate(pairs):
convert = i == src_len # only convert once at the end
new_rb: List["Block"] = []
# GH-39338: _replace_coerce can split a block into
# single-column blocks, so track the index so we know
# where to index into the mask
for blk_num, blk in enumerate(rb):
if len(rb) == 1:
m = masks[i]
else:
mib = masks[i]
assert not isinstance(mib, bool)
m = mib[blk_num : blk_num + 1]
result = blk._replace_coerce(
to_replace=src,
value=dest,
mask=m,
inplace=inplace,
regex=regex,
)
if convert and blk.is_object:
result = extend_blocks(
[b.convert(numeric=False, copy=True) for b in result]
)
new_rb.extend(result)
rb = new_rb
return rb
def setitem(self, indexer, value):
"""
Attempt self.values[indexer] = value, possibly creating a new array.
Parameters
----------
indexer : tuple, list-like, array-like, slice
The subset of self.values to set
value : object
The value being set
Returns
-------
Block
Notes
-----
`indexer` is a direct slice/positional indexer. `value` must
be a compatible shape.
"""
transpose = self.ndim == 2
if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim:
raise ValueError(f"Cannot set values with ndim > {self.ndim}")
# coerce None values, if appropriate
if value is None:
if self.is_numeric:
value = np.nan
# coerce if block dtype can store value
values = self.values
if self._can_hold_element(value):
# We only get here for non-Extension Blocks, so _try_coerce_args
# is only relevant for DatetimeBlock and TimedeltaBlock
if lib.is_scalar(value):
value = convert_scalar_for_putitemlike(value, values.dtype)
else:
# current dtype cannot store value, coerce to common dtype
if hasattr(value, "dtype"):
dtype = value.dtype
elif lib.is_scalar(value) and not isna(value):
dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=True)
else:
# e.g. we are bool dtype and value is nan
# TODO: watch out for case with listlike value and scalar/empty indexer
dtype, _ = maybe_promote(np.array(value).dtype)
return self.astype(dtype).setitem(indexer, value)
dtype = find_common_type([values.dtype, dtype])
assert not is_dtype_equal(self.dtype, dtype)
# otherwise should have _can_hold_element
return self.astype(dtype).setitem(indexer, value)
# value must be storable at this moment
if is_extension_array_dtype(getattr(value, "dtype", None)):
# We need to be careful not to allow through strings that
# can be parsed to EADtypes
is_ea_value = True
arr_value = value
else:
is_ea_value = False
arr_value = np.array(value)
if transpose:
values = values.T
# length checking
check_setitem_lengths(indexer, value, values)
exact_match = (
len(arr_value.shape)
and arr_value.shape[0] == values.shape[0]
and arr_value.size == values.size
)
if is_empty_indexer(indexer, arr_value):
# GH#8669 empty indexers
pass
elif is_scalar_indexer(indexer, self.ndim):
# setting a single element for each dim and with a rhs that could
# be e.g. a list; see GH#6043
values[indexer] = value
elif exact_match and is_categorical_dtype(arr_value.dtype):
# GH25495 - If the current dtype is not categorical,
# we need to create a new categorical block
values[indexer] = value
return self.make_block(Categorical(self.values, dtype=arr_value.dtype))
elif exact_match and is_ea_value:
# GH#32395 if we're going to replace the values entirely, just
# substitute in the new array
return self.make_block(arr_value)
# if we are an exact match (ex-broadcasting),
# then use the resultant dtype
elif exact_match:
# We are setting _all_ of the array's values, so can cast to new dtype
values[indexer] = value
values = values.astype(arr_value.dtype, copy=False)
# set
else:
values[indexer] = value
if transpose:
values = values.T
block = self.make_block(values)
return block
def _putmask_simple(self, mask: np.ndarray, value: Any):
"""
Like putmask but
a) we do not cast on failure
b) we do not handle repeating or truncating like numpy.
Parameters
----------
mask : np.ndarray[bool]
We assume _extract_bool_array has already been called.
value : Any
We assume self._can_hold_element(value)
"""
values = self.values
if lib.is_scalar(value) and isinstance(values, np.ndarray):
value = convert_scalar_for_putitemlike(value, values.dtype)
if self.is_extension or (self.is_object and not lib.is_scalar(value)):
# GH#19266 using np.putmask gives unexpected results with listlike value
if is_list_like(value) and len(value) == len(values):
values[mask] = value[mask]
else:
values[mask] = value
else:
# GH#37833 np.putmask is more performant than __setitem__
np.putmask(values, mask, value)
def putmask(
self, mask, new, inplace: bool = False, axis: int = 0, transpose: bool = False
) -> List["Block"]:
"""
putmask the data to the block; it is possible that we may create a
new dtype of block
Return the resulting block(s).
Parameters
----------
mask : np.ndarray[bool], SparseArray[bool], or BooleanArray
new : a ndarray/object
inplace : bool, default False
Perform inplace modification.
axis : int
transpose : bool, default False
Set to True if self is stored with axes reversed.
Returns
-------
List[Block]
"""
mask = _extract_bool_array(mask)
assert not isinstance(new, (ABCIndexClass, ABCSeries, ABCDataFrame))
new_values = self.values # delay copy if possible.
# if we are passed a scalar None, convert it here
if not is_list_like(new) and isna(new) and not self.is_object:
# FIXME: make sure we have compatible NA
new = self.fill_value
if self._can_hold_element(new):
# We only get here for non-Extension Blocks, so _try_coerce_args
# is only relevant for DatetimeBlock and TimedeltaBlock
if lib.is_scalar(new):
new = convert_scalar_for_putitemlike(new, self.values.dtype)
if transpose:
new_values = new_values.T
# If the default repeat behavior in np.putmask would go in the
# wrong direction, then explicitly repeat and reshape new instead
if getattr(new, "ndim", 0) >= 1:
if self.ndim - 1 == new.ndim and axis == 1:
new = np.repeat(new, new_values.shape[-1]).reshape(self.shape)
new = new.astype(new_values.dtype)
if new_values is self.values and not inplace:
new_values = new_values.copy()
# we require exact matches between the len of the
# values we are setting (or is compat). np.putmask
# doesn't check this and will simply truncate / pad
# the output, but we want sane error messages
#
# TODO: this prob needs some better checking
# for 2D cases
if (
is_list_like(new)
and np.any(mask[mask])
and getattr(new, "ndim", 1) == 1
):
if mask[mask].shape[-1] == len(new):
# GH 30567
# If length of ``new`` is less than the length of ``new_values``,
# `np.putmask` would first repeat the ``new`` array and then
# assign the masked values hence produces incorrect result.
# `np.place` on the other hand uses the ``new`` values at it is
# to place in the masked locations of ``new_values``
np.place(new_values, mask, new)
elif mask.shape[-1] == len(new) or len(new) == 1:
np.putmask(new_values, mask, new)
else:
raise ValueError("cannot assign mismatch length to masked array")
else:
np.putmask(new_values, mask, new)
# maybe upcast me
elif mask.any():
if transpose:
mask = mask.T
if isinstance(new, np.ndarray):
new = new.T
axis = new_values.ndim - axis - 1
# Pseudo-broadcast
if getattr(new, "ndim", 0) >= 1:
if self.ndim - 1 == new.ndim:
new_shape = list(new.shape)
new_shape.insert(axis, 1)
new = new.reshape(tuple(new_shape))
# operate column-by-column
def f(mask, val, idx):
if idx is None:
# ndim==1 case.
n = new
else:
if isinstance(new, np.ndarray):
n = np.squeeze(new[idx % new.shape[0]])
else:
n = np.array(new)
# type of the new block
dtype, _ = maybe_promote(n.dtype)
# we need to explicitly astype here to make a copy
n = n.astype(dtype)
nv = _putmask_smart(val, mask, n)
return nv
new_blocks = self.split_and_operate(mask, f, inplace)
return new_blocks
if inplace:
return [self]
if transpose:
if new_values is None:
new_values = self.values if inplace else self.values.copy()
new_values = new_values.T
return [self.make_block(new_values)]
def coerce_to_target_dtype(self, other):
"""
coerce the current block to a dtype compat for other
we will return a block, possibly object, and not raise
we can also safely try to coerce to the same dtype
and will receive the same block
"""
# if we cannot then coerce to object
dtype, _ = infer_dtype_from(other, pandas_dtype=True)
if is_dtype_equal(self.dtype, dtype):
return self
if self.is_bool or is_object_dtype(dtype) or is_bool_dtype(dtype):
# we don't upcast to bool
return self.astype(object)
elif (self.is_float or self.is_complex) and (
is_integer_dtype(dtype) or is_float_dtype(dtype)
):
# don't coerce float/complex to int
return self
elif self.is_datetime or is_datetime64_any_dtype(dtype):
# The is_dtype_equal check above ensures that at most one of
# these two conditions hold, so we must cast to object.
return self.astype(object)
elif self.is_timedelta or is_timedelta64_dtype(dtype):
# The is_dtype_equal check above ensures that at most one of
# these two conditions hold, so we must cast to object.
return self.astype(object)
try:
return self.astype(dtype)
except (ValueError, TypeError, OverflowError):
return self.astype(object)
def interpolate(
self,
method: str = "pad",
axis: int = 0,
index: Optional["Index"] = None,
inplace: bool = False,
limit: Optional[int] = None,
limit_direction: str = "forward",
limit_area: Optional[str] = None,
fill_value: Optional[Any] = None,
coerce: bool = False,
downcast: Optional[str] = None,
**kwargs,
):
inplace = validate_bool_kwarg(inplace, "inplace")
if not self._can_hold_na:
# If there are no NAs, then interpolate is a no-op
return self if inplace else self.copy()
# a fill na type method
try:
m = missing.clean_fill_method(method)
except ValueError:
m = None
if m is not None:
if fill_value is not None:
# similar to validate_fillna_kwargs
raise ValueError("Cannot pass both fill_value and method")
return self._interpolate_with_fill(
method=m,
axis=axis,
inplace=inplace,
limit=limit,
limit_area=limit_area,
downcast=downcast,
)
# validate the interp method
m = missing.clean_interp_method(method, **kwargs)
assert index is not None # for mypy
return self._interpolate(
method=m,
index=index,
axis=axis,
limit=limit,
limit_direction=limit_direction,
limit_area=limit_area,
fill_value=fill_value,
inplace=inplace,
downcast=downcast,
**kwargs,
)
def _interpolate_with_fill(
self,
method: str = "pad",
axis: int = 0,
inplace: bool = False,
limit: Optional[int] = None,
limit_area: Optional[str] = None,
downcast: Optional[str] = None,
) -> List["Block"]:
""" fillna but using the interpolate machinery """
inplace = validate_bool_kwarg(inplace, "inplace")
assert self._can_hold_na # checked by caller
values = self.values if inplace else self.values.copy()
values = missing.interpolate_2d(
values,
method=method,
axis=axis,
limit=limit,
limit_area=limit_area,
)
blocks = [self.make_block_same_class(values, ndim=self.ndim)]
return self._maybe_downcast(blocks, downcast)
def _interpolate(
self,
method: str,
index: "Index",
fill_value: Optional[Any] = None,
axis: int = 0,
limit: Optional[int] = None,
limit_direction: str = "forward",
limit_area: Optional[str] = None,
inplace: bool = False,
downcast: Optional[str] = None,
**kwargs,
) -> List["Block"]:
""" interpolate using scipy wrappers """
inplace = validate_bool_kwarg(inplace, "inplace")
data = self.values if inplace else self.values.copy()
# only deal with floats
if not self.is_float:
if not self.is_integer:
return [self]
data = data.astype(np.float64)
if fill_value is None:
fill_value = self.fill_value
if method in ("krogh", "piecewise_polynomial", "pchip"):
if not index.is_monotonic:
raise ValueError(
f"{method} interpolation requires that the index be monotonic."
)
# process 1-d slices in the axis direction
def func(yvalues: np.ndarray) -> np.ndarray:
# process a 1-d slice, returning it
# should the axis argument be handled below in apply_along_axis?
# i.e. not an arg to missing.interpolate_1d
return missing.interpolate_1d(
xvalues=index,
yvalues=yvalues,
method=method,
limit=limit,
limit_direction=limit_direction,
limit_area=limit_area,
fill_value=fill_value,
bounds_error=False,
**kwargs,
)
# interp each column independently
interp_values = np.apply_along_axis(func, axis, data)
blocks = [self.make_block_same_class(interp_values)]
return self._maybe_downcast(blocks, downcast)
def take_nd(self, indexer, axis: int, new_mgr_locs=None, fill_value=lib.no_default):
"""
Take values according to indexer and return them as a block.bb
"""
# algos.take_nd dispatches for DatetimeTZBlock, CategoricalBlock
# so need to preserve types
# sparse is treated like an ndarray, but needs .get_values() shaping
values = self.values
if fill_value is lib.no_default:
fill_value = self.fill_value
allow_fill = False
else:
allow_fill = True
new_values = algos.take_nd(
values, indexer, axis=axis, allow_fill=allow_fill, fill_value=fill_value
)
# Called from three places in managers, all of which satisfy
# this assertion
assert not (axis == 0 and new_mgr_locs is None)
if new_mgr_locs is None:
new_mgr_locs = self.mgr_locs
if not is_dtype_equal(new_values.dtype, self.dtype):
return self.make_block(new_values, new_mgr_locs)
else:
return self.make_block_same_class(new_values, new_mgr_locs)
def diff(self, n: int, axis: int = 1) -> List["Block"]:
""" return block for the diff of the values """
new_values = algos.diff(self.values, n, axis=axis, stacklevel=7)
return [self.make_block(values=new_values)]
def shift(self, periods: int, axis: int = 0, fill_value=None):
""" shift the block by periods, possibly upcast """
# convert integer to float if necessary. need to do a lot more than
# that, handle boolean etc also
new_values, fill_value = maybe_upcast(self.values, fill_value)
new_values = shift(new_values, periods, axis, fill_value)
return [self.make_block(new_values)]
def where(
self, other, cond, errors="raise", try_cast: bool = False, axis: int = 0
) -> List["Block"]:
"""
evaluate the block; return result block(s) from the result
Parameters
----------
other : a ndarray/object
cond : np.ndarray[bool], SparseArray[bool], or BooleanArray
errors : str, {'raise', 'ignore'}, default 'raise'
- ``raise`` : allow exceptions to be raised
- ``ignore`` : suppress exceptions. On error return original object
try_cast: bool, default False
axis : int, default 0
Returns
-------
List[Block]
"""
import pandas.core.computation.expressions as expressions
cond = _extract_bool_array(cond)
assert not isinstance(other, (ABCIndexClass, ABCSeries, ABCDataFrame))
assert errors in ["raise", "ignore"]
transpose = self.ndim == 2
values = self.values
orig_other = other
if transpose:
values = values.T
# If the default broadcasting would go in the wrong direction, then
# explicitly reshape other instead
if getattr(other, "ndim", 0) >= 1:
if values.ndim - 1 == other.ndim and axis == 1:
other = other.reshape(tuple(other.shape + (1,)))
elif transpose and values.ndim == self.ndim - 1:
# TODO(EA2D): not neceesssary with 2D EAs
cond = cond.T
if not hasattr(cond, "shape"):
raise ValueError("where must have a condition that is ndarray like")
if cond.ravel("K").all():
result = values.copy()
else:
# see if we can operate on the entire block, or need item-by-item
# or if we are a single block (ndim == 1)
if (
(self.is_integer or self.is_bool)
and lib.is_float(other)
and np.isnan(other)
):
# GH#3733 special case to avoid object-dtype casting
# and go through numexpr path instead.
# In integer case, np.where will cast to floats
pass
elif not self._can_hold_element(other):
# we cannot coerce, return a compat dtype
# we are explicitly ignoring errors
block = self.coerce_to_target_dtype(other)
blocks = block.where(
orig_other, cond, errors=errors, try_cast=try_cast, axis=axis
)
return self._maybe_downcast(blocks, "infer")
if not (
(self.is_integer or self.is_bool)
and lib.is_float(other)
and np.isnan(other)
):
# convert datetime to datetime64, timedelta to timedelta64
other = convert_scalar_for_putitemlike(other, values.dtype)
# By the time we get here, we should have all Series/Index
# args extracted to ndarray
result = expressions.where(cond, values, other)
if self._can_hold_na or self.ndim == 1:
if transpose:
result = result.T
return [self.make_block(result)]
# might need to separate out blocks
axis = cond.ndim - 1
cond = cond.swapaxes(axis, 0)
mask = np.array([cond[i].all() for i in range(cond.shape[0])], dtype=bool)
result_blocks: List["Block"] = []
for m in [mask, ~mask]:
if m.any():
result = cast(np.ndarray, result) # EABlock overrides where
taken = result.take(m.nonzero()[0], axis=axis)
r = maybe_downcast_numeric(taken, self.dtype)
nb = self.make_block(r.T, placement=self.mgr_locs[m])
result_blocks.append(nb)
return result_blocks
def _unstack(self, unstacker, fill_value, new_placement):
"""
Return a list of unstacked blocks of self
Parameters
----------
unstacker : reshape._Unstacker
fill_value : int
Only used in ExtensionBlock._unstack
Returns
-------
blocks : list of Block
New blocks of unstacked values.
mask : array_like of bool
The mask of columns of `blocks` we should keep.
"""
new_values, mask = unstacker.get_new_values(
self.values.T, fill_value=fill_value
)
mask = mask.any(0)
# TODO: in all tests we have mask.all(); can we rely on that?
new_values = new_values.T[mask]
new_placement = new_placement[mask]
blocks = [make_block(new_values, placement=new_placement)]
return blocks, mask
def quantile(self, qs, interpolation="linear", axis: int = 0):
"""
compute the quantiles of the
Parameters
----------
qs: a scalar or list of the quantiles to be computed
interpolation: type of interpolation, default 'linear'
axis: axis to compute, default 0
Returns
-------
Block
"""
# We should always have ndim == 2 because Series dispatches to DataFrame
assert self.ndim == 2
values = self.get_values()
is_empty = values.shape[axis] == 0
orig_scalar = not is_list_like(qs)
if orig_scalar:
# make list-like, unpack later
qs = [qs]
if is_empty:
# create the array of na_values
# 2d len(values) * len(qs)
result = np.repeat(
np.array([self.fill_value] * len(qs)), len(values)
).reshape(len(values), len(qs))
else:
# asarray needed for Sparse, see GH#24600
mask = np.asarray(isna(values))
result = nanpercentile(
values,
np.array(qs) * 100,
axis=axis,
na_value=self.fill_value,
mask=mask,
ndim=values.ndim,
interpolation=interpolation,
)
result = np.array(result, copy=False)
result = result.T
if orig_scalar and not lib.is_scalar(result):
# result could be scalar in case with is_empty and self.ndim == 1
assert result.shape[-1] == 1, result.shape
result = result[..., 0]
result = lib.item_from_zerodim(result)
ndim = np.ndim(result)
return make_block(result, placement=np.arange(len(result)), ndim=ndim)
def _replace_coerce(
self,
to_replace,
value,
mask: np.ndarray,
inplace: bool = True,
regex: bool = False,
) -> List["Block"]:
"""
Replace value corresponding to the given boolean array with another
value.
Parameters
----------
to_replace : object or pattern
Scalar to replace or regular expression to match.
value : object
Replacement object.
mask : np.ndarray[bool]
True indicate corresponding element is ignored.
inplace : bool, default True
Perform inplace modification.
regex : bool, default False
If true, perform regular expression substitution.
Returns
-------
List[Block]
"""
if mask.any():
if not regex:
nb = self.coerce_to_target_dtype(value)
if nb is self and not inplace:
nb = nb.copy()
nb._putmask_simple(mask, value)
return [nb]
else:
regex = _should_use_regex(regex, to_replace)
if regex:
return self._replace_regex(
to_replace,
value,
inplace=inplace,
convert=False,
mask=mask,
)
return self.replace(to_replace, value, inplace=inplace, regex=False)
return [self]
class ExtensionBlock(Block):
"""
Block for holding extension types.
Notes
-----
This holds all 3rd-party extension array types. It's also the immediate
parent class for our internal extension types' blocks, CategoricalBlock.
ExtensionArrays are limited to 1-D.
"""
_can_consolidate = False
_validate_ndim = False
is_extension = True
values: ExtensionArray
def __init__(self, values, placement, ndim: int):
"""
Initialize a non-consolidatable block.
'ndim' may be inferred from 'placement'.
This will call continue to call __init__ for the other base
classes mixed in with this Mixin.
"""
# Placement must be converted to BlockPlacement so that we can check
# its length
if not isinstance(placement, libinternals.BlockPlacement):
placement = libinternals.BlockPlacement(placement)
# Maybe infer ndim from placement
if ndim is None:
if len(placement) != 1:
ndim = 1
else:
ndim = 2
super().__init__(values, placement, ndim=ndim)
if self.ndim == 2 and len(self.mgr_locs) != 1:
# TODO(EA2D): check unnecessary with 2D EAs
raise AssertionError("block.size != values.size")
@property
def shape(self):
# TODO(EA2D): override unnecessary with 2D EAs
if self.ndim == 1:
return (len(self.values),)
return len(self.mgr_locs), len(self.values)
def iget(self, col):
if self.ndim == 2 and isinstance(col, tuple):
# TODO(EA2D): unnecessary with 2D EAs
col, loc = col
if not com.is_null_slice(col) and col != 0:
raise IndexError(f"{self} only contains one item")
elif isinstance(col, slice):
if col != slice(None):
raise NotImplementedError(col)
return self.values[[loc]]
return self.values[loc]
else:
if col != 0:
raise IndexError(f"{self} only contains one item")
return self.values
def set_inplace(self, locs, values):
# NB: This is a misnomer, is supposed to be inplace but is not,
# see GH#33457
assert locs.tolist() == [0]
self.values = values
def putmask(
self, mask, new, inplace: bool = False, axis: int = 0, transpose: bool = False
) -> List["Block"]:
"""
See Block.putmask.__doc__
"""
inplace = validate_bool_kwarg(inplace, "inplace")
mask = _extract_bool_array(mask)
new_values = self.values if inplace else self.values.copy()
if isinstance(new, (np.ndarray, ExtensionArray)) and len(new) == len(mask):
new = new[mask]
mask = safe_reshape(mask, new_values.shape)
new_values[mask] = new
return [self.make_block(values=new_values)]
def _maybe_coerce_values(self, values):
"""
Unbox to an extension array.
This will unbox an ExtensionArray stored in an Index or Series.
ExtensionArrays pass through. No dtype coercion is done.
Parameters
----------
values : Index, Series, ExtensionArray
Returns
-------
ExtensionArray
"""
return extract_array(values)
@property
def _holder(self):
# For extension blocks, the holder is values-dependent.
return type(self.values)
@property
def fill_value(self):
# Used in reindex_indexer
return self.values.dtype.na_value
@property
def _can_hold_na(self):
# The default ExtensionArray._can_hold_na is True
return self._holder._can_hold_na
@property
def is_view(self) -> bool:
"""Extension arrays are never treated as views."""
return False
@property
def is_numeric(self):
return self.values.dtype._is_numeric
def setitem(self, indexer, value):
"""
Attempt self.values[indexer] = value, possibly creating a new array.
This differs from Block.setitem by not allowing setitem to change
the dtype of the Block.
Parameters
----------
indexer : tuple, list-like, array-like, slice
The subset of self.values to set
value : object
The value being set
Returns
-------
Block
Notes
-----
`indexer` is a direct slice/positional indexer. `value` must
be a compatible shape.
"""
if not self._can_hold_element(value):
# This is only relevant for DatetimeTZBlock, which has a
# non-trivial `_can_hold_element`.
# https://github.com/pandas-dev/pandas/issues/24020
# Need a dedicated setitem until GH#24020 (type promotion in setitem
# for extension arrays) is designed and implemented.
return self.astype(object).setitem(indexer, value)
if isinstance(indexer, tuple):
# TODO(EA2D): not needed with 2D EAs
# we are always 1-D
indexer = indexer[0]
check_setitem_lengths(indexer, value, self.values)
self.values[indexer] = value
return self
def get_values(self, dtype=None):
# ExtensionArrays must be iterable, so this works.
# TODO(EA2D): reshape not needed with 2D EAs
return np.asarray(self.values).reshape(self.shape)
def array_values(self) -> ExtensionArray:
return self.values
def to_native_types(self, na_rep="nan", quoting=None, **kwargs):
"""override to use ExtensionArray astype for the conversion"""
values = self.values
mask = isna(values)
values = np.asarray(values.astype(object))
values[mask] = na_rep
# TODO(EA2D): reshape not needed with 2D EAs
# we are expected to return a 2-d ndarray
return self.make_block(values)
def take_nd(
self, indexer, axis: int = 0, new_mgr_locs=None, fill_value=lib.no_default
):
"""
Take values according to indexer and return them as a block.
"""
if fill_value is lib.no_default:
fill_value = None
# TODO(EA2D): special case not needed with 2D EAs
# axis doesn't matter; we are really a single-dim object
# but are passed the axis depending on the calling routing
# if its REALLY axis 0, then this will be a reindex and not a take
new_values = self.values.take(indexer, fill_value=fill_value, allow_fill=True)
# Called from three places in managers, all of which satisfy
# this assertion
assert not (self.ndim == 1 and new_mgr_locs is None)
if new_mgr_locs is None:
new_mgr_locs = self.mgr_locs
return self.make_block_same_class(new_values, new_mgr_locs)
def _can_hold_element(self, element: Any) -> bool:
# TODO: We may need to think about pushing this onto the array.
# We're doing the same as CategoricalBlock here.
return True
def _slice(self, slicer):
"""
Return a slice of my values.
Parameters
----------
slicer : slice, ndarray[int], or a tuple of these
Valid (non-reducing) indexer for self.values.
Returns
-------
np.ndarray or ExtensionArray
"""
# return same dims as we currently have
if not isinstance(slicer, tuple) and self.ndim == 2:
# reached via getitem_block via _slice_take_blocks_ax0
# TODO(EA2D): wont be necessary with 2D EAs
slicer = (slicer, slice(None))
if isinstance(slicer, tuple) and len(slicer) == 2:
first = slicer[0]
if not isinstance(first, slice):
raise AssertionError(
"invalid slicing for a 1-ndim ExtensionArray", first
)
# GH#32959 only full-slicers along fake-dim0 are valid
# TODO(EA2D): wont be necessary with 2D EAs
new_locs = self.mgr_locs[first]
if len(new_locs):
# effectively slice(None)
slicer = slicer[1]
else:
raise AssertionError(
"invalid slicing for a 1-ndim ExtensionArray", slicer
)
return self.values[slicer]
def fillna(self, value, limit=None, inplace=False, downcast=None):
values = self.values if inplace else self.values.copy()
values = values.fillna(value=value, limit=limit)
return [
self.make_block_same_class(
values=values, placement=self.mgr_locs, ndim=self.ndim
)
]
def interpolate(
self, method="pad", axis=0, inplace=False, limit=None, fill_value=None, **kwargs
):
values = self.values if inplace else self.values.copy()
return self.make_block_same_class(
values=values.fillna(value=fill_value, method=method, limit=limit),
placement=self.mgr_locs,
)
def diff(self, n: int, axis: int = 1) -> List["Block"]:
if axis == 0 and n != 0:
# n==0 case will be a no-op so let is fall through
# Since we only have one column, the result will be all-NA.
# Create this result by shifting along axis=0 past the length of
# our values.
return super().diff(len(self.values), axis=0)
if axis == 1:
# TODO(EA2D): unnecessary with 2D EAs
# we are by definition 1D.
axis = 0
return super().diff(n, axis)
def shift(
self, periods: int, axis: int = 0, fill_value: Any = None
) -> List["ExtensionBlock"]:
"""
Shift the block by `periods`.
Dispatches to underlying ExtensionArray and re-boxes in an
ExtensionBlock.
"""
return [
self.make_block_same_class(
self.values.shift(periods=periods, fill_value=fill_value),
placement=self.mgr_locs,
ndim=self.ndim,
)
]
def where(
self, other, cond, errors="raise", try_cast: bool = False, axis: int = 0
) -> List["Block"]:
cond = _extract_bool_array(cond)
assert not isinstance(other, (ABCIndexClass, ABCSeries, ABCDataFrame))
if isinstance(other, np.ndarray) and other.ndim == 2:
# TODO(EA2D): unnecessary with 2D EAs
assert other.shape[1] == 1
other = other[:, 0]
if isinstance(cond, np.ndarray) and cond.ndim == 2:
# TODO(EA2D): unnecessary with 2D EAs
assert cond.shape[1] == 1
cond = cond[:, 0]
if lib.is_scalar(other) and isna(other):
# The default `other` for Series / Frame is np.nan
# we want to replace that with the correct NA value
# for the type
other = self.dtype.na_value
if is_sparse(self.values):
# TODO(SparseArray.__setitem__): remove this if condition
# We need to re-infer the type of the data after doing the
# where, for cases where the subtypes don't match
dtype = None
else:
dtype = self.dtype
result = self.values.copy()
icond = ~cond
if lib.is_scalar(other):
set_other = other
else:
set_other = other[icond]
try:
result[icond] = set_other
except (NotImplementedError, TypeError):
# NotImplementedError for class not implementing `__setitem__`
# TypeError for SparseArray, which implements just to raise
# a TypeError
result = self._holder._from_sequence(
np.where(cond, self.values, other), dtype=dtype
)
return [self.make_block_same_class(result, placement=self.mgr_locs)]
def _unstack(self, unstacker, fill_value, new_placement):
# ExtensionArray-safe unstack.
# We override ObjectBlock._unstack, which unstacks directly on the
# values of the array. For EA-backed blocks, this would require
# converting to a 2-D ndarray of objects.
# Instead, we unstack an ndarray of integer positions, followed by
# a `take` on the actual values.
n_rows = self.shape[-1]
dummy_arr = np.arange(n_rows)
new_values, mask = unstacker.get_new_values(dummy_arr, fill_value=-1)
mask = mask.any(0)
# TODO: in all tests we have mask.all(); can we rely on that?
blocks = [
self.make_block_same_class(
self.values.take(indices, allow_fill=True, fill_value=fill_value),
[place],
)
for indices, place in zip(new_values.T, new_placement)
]
return blocks, mask
class ObjectValuesExtensionBlock(ExtensionBlock):
"""
Block providing backwards-compatibility for `.values`.
Used by PeriodArray and IntervalArray to ensure that
Series[T].values is an ndarray of objects.
"""
def external_values(self):
return self.values.astype(object)
def _can_hold_element(self, element: Any) -> bool:
if is_valid_nat_for_dtype(element, self.dtype):
return True
if isinstance(element, list) and len(element) == 0:
return True
tipo = maybe_infer_dtype_type(element)
if tipo is not None:
return issubclass(tipo.type, self.dtype.type)
return isinstance(element, self.dtype.type)
class NumericBlock(Block):
__slots__ = ()
is_numeric = True
_can_hold_na = True
class FloatBlock(NumericBlock):
__slots__ = ()
is_float = True
def _can_hold_element(self, element: Any) -> bool:
tipo = maybe_infer_dtype_type(element)
if tipo is not None:
return issubclass(tipo.type, (np.floating, np.integer)) and not issubclass(
tipo.type, np.timedelta64
)
return isinstance(
element, (float, int, np.floating, np.int_)
) and not isinstance(
element,
(bool, np.bool_, np.timedelta64),
)
def to_native_types(
self, na_rep="", float_format=None, decimal=".", quoting=None, **kwargs
):
""" convert to our native types format """
values = self.values
# see gh-13418: no special formatting is desired at the
# output (important for appropriate 'quoting' behaviour),
# so do not pass it through the FloatArrayFormatter
if float_format is None and decimal == ".":
mask = isna(values)
if not quoting:
values = values.astype(str)
else:
values = np.array(values, dtype="object")
values[mask] = na_rep
return self.make_block(values)
from pandas.io.formats.format import FloatArrayFormatter
formatter = FloatArrayFormatter(
values,
na_rep=na_rep,
float_format=float_format,
decimal=decimal,
quoting=quoting,
fixed_width=False,
)
res = formatter.get_result_as_array()
return self.make_block(res)
class ComplexBlock(NumericBlock):
__slots__ = ()
is_complex = True
def _can_hold_element(self, element: Any) -> bool:
tipo = maybe_infer_dtype_type(element)
if tipo is not None:
return issubclass(tipo.type, (np.floating, np.integer, np.complexfloating))
return isinstance(
element, (float, int, complex, np.float_, np.int_)
) and not isinstance(element, (bool, np.bool_))
class IntBlock(NumericBlock):
__slots__ = ()
is_integer = True
_can_hold_na = False
def _can_hold_element(self, element: Any) -> bool:
tipo = maybe_infer_dtype_type(element)
if tipo is not None:
return (
issubclass(tipo.type, np.integer)
and not issubclass(tipo.type, np.timedelta64)
and self.dtype.itemsize >= tipo.itemsize
)
# We have not inferred an integer from the dtype
# check if we have a builtin int or a float equal to an int
return is_integer(element) or (is_float(element) and element.is_integer())
class DatetimeLikeBlockMixin(Block):
"""Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock."""
@property
def _holder(self):
return DatetimeArray
@property
def fill_value(self):
return np.datetime64("NaT", "ns")
def get_values(self, dtype=None):
"""
return object dtype as boxed values, such as Timestamps/Timedelta
"""
if is_object_dtype(dtype):
# DTA/TDA constructor and astype can handle 2D
return self._holder(self.values).astype(object)
return self.values
def internal_values(self):
# Override to return DatetimeArray and TimedeltaArray
return self.array_values()
def array_values(self):
return self._holder._simple_new(self.values)
def iget(self, key):
# GH#31649 we need to wrap scalars in Timestamp/Timedelta
# TODO(EA2D): this can be removed if we ever have 2D EA
return self.array_values().reshape(self.shape)[key]
def diff(self, n: int, axis: int = 0) -> List["Block"]:
"""
1st discrete difference.
Parameters
----------
n : int
Number of periods to diff.
axis : int, default 0
Axis to diff upon.
Returns
-------
A list with a new TimeDeltaBlock.
Notes
-----
The arguments here are mimicking shift so they are called correctly
by apply.
"""
# TODO(EA2D): reshape not necessary with 2D EAs
values = self.array_values().reshape(self.shape)
new_values = values - values.shift(n, axis=axis)
return [
TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer, ndim=self.ndim)
]
def shift(self, periods, axis=0, fill_value=None):
# TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs
values = self.array_values()
new_values = values.shift(periods, fill_value=fill_value, axis=axis)
return self.make_block_same_class(new_values)
def to_native_types(self, na_rep="NaT", **kwargs):
""" convert to our native types format """
arr = self.array_values()
result = arr._format_native_types(na_rep=na_rep, **kwargs)
return self.make_block(result)
class DatetimeBlock(DatetimeLikeBlockMixin):
__slots__ = ()
is_datetime = True
@property
def _can_hold_na(self):
return True
def _maybe_coerce_values(self, values):
"""
Input validation for values passed to __init__. Ensure that
we have datetime64ns, coercing if necessary.
Parameters
----------
values : array-like
Must be convertible to datetime64
Returns
-------
values : ndarray[datetime64ns]
Overridden by DatetimeTZBlock.
"""
if values.dtype != DT64NS_DTYPE:
values = conversion.ensure_datetime64ns(values)
if isinstance(values, DatetimeArray):
values = values._data
assert isinstance(values, np.ndarray), type(values)
return values
def astype(self, dtype, copy: bool = False, errors: str = "raise"):
"""
these automatically copy, so copy=True has no effect
raise on an except if raise == True
"""
dtype = pandas_dtype(dtype)
# if we are passed a datetime64[ns, tz]
if is_datetime64tz_dtype(dtype):
values = self.values
if copy:
# this should be the only copy
values = values.copy()
values = DatetimeArray._simple_new(values.view("i8"), dtype=dtype)
return self.make_block(values)
# delegate
return super().astype(dtype=dtype, copy=copy, errors=errors)
def _can_hold_element(self, element: Any) -> bool:
tipo = maybe_infer_dtype_type(element)
if tipo is not None:
if isinstance(element, list) and len(element) == 0:
# Following DatetimeArray._validate_setitem_value
# convention, we treat this as object-dtype
# (even though tipo is float64)
return True
elif self.is_datetimetz:
# require exact match, since non-nano does not exist
return is_dtype_equal(tipo, self.dtype) or is_valid_nat_for_dtype(
element, self.dtype
)
# GH#27419 if we get a non-nano datetime64 object
return is_datetime64_dtype(tipo)
elif element is NaT:
return True
elif isinstance(element, datetime):
if self.is_datetimetz:
return tz_compare(element.tzinfo, self.dtype.tz)
return element.tzinfo is None
return is_valid_nat_for_dtype(element, self.dtype)
def set_inplace(self, locs, values):
"""
See Block.set.__doc__
"""
values = conversion.ensure_datetime64ns(values, copy=False)
self.values[locs] = values
class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
""" implement a datetime64 block with a tz attribute """
values: DatetimeArray
__slots__ = ()
is_datetimetz = True
is_extension = True
internal_values = Block.internal_values
_can_hold_element = DatetimeBlock._can_hold_element
to_native_types = DatetimeBlock.to_native_types
diff = DatetimeBlock.diff
fill_value = np.datetime64("NaT", "ns")
array_values = ExtensionBlock.array_values
@property
def _holder(self):
return DatetimeArray
def _maybe_coerce_values(self, values):
"""
Input validation for values passed to __init__. Ensure that
we have datetime64TZ, coercing if necessary.
Parameters
----------
values : array-like
Must be convertible to datetime64
Returns
-------
values : DatetimeArray
"""
if not isinstance(values, self._holder):
values = self._holder(values)
if values.tz is None:
raise ValueError("cannot create a DatetimeTZBlock without a tz")
return values
@property
def is_view(self) -> bool:
""" return a boolean if I am possibly a view """
# check the ndarray values of the DatetimeIndex values
return self.values._data.base is not None
def get_values(self, dtype=None):
"""
Returns an ndarray of values.
Parameters
----------
dtype : np.dtype
Only `object`-like dtypes are respected here (not sure
why).
Returns
-------
values : ndarray
When ``dtype=object``, then and object-dtype ndarray of
boxed values is returned. Otherwise, an M8[ns] ndarray
is returned.
DatetimeArray is always 1-d. ``get_values`` will reshape
the return value to be the same dimensionality as the
block.
"""
values = self.values
if is_object_dtype(dtype):
values = values.astype(object)
# TODO(EA2D): reshape unnecessary with 2D EAs
# Ensure that our shape is correct for DataFrame.
# ExtensionArrays are always 1-D, even in a DataFrame when
# the analogous NumPy-backed column would be a 2-D ndarray.
return np.asarray(values).reshape(self.shape)
def external_values(self):
# NB: this is different from np.asarray(self.values), since that
# return an object-dtype ndarray of Timestamps.
return np.asarray(self.values.astype("datetime64[ns]", copy=False))
def fillna(self, value, limit=None, inplace=False, downcast=None):
# We support filling a DatetimeTZ with a `value` whose timezone
# is different by coercing to object.
if self._can_hold_element(value):
return super().fillna(value, limit, inplace, downcast)
# different timezones, or a non-tz
return self.astype(object).fillna(
value, limit=limit, inplace=inplace, downcast=downcast
)
def quantile(self, qs, interpolation="linear", axis=0):
naive = self.values.view("M8[ns]")
# TODO(EA2D): kludge for 2D block with 1D values
naive = naive.reshape(self.shape)
blk = self.make_block(naive)
res_blk = blk.quantile(qs, interpolation=interpolation, axis=axis)
# TODO(EA2D): ravel is kludge for 2D block with 1D values, assumes column-like
aware = self._holder(res_blk.values.ravel(), dtype=self.dtype)
return self.make_block_same_class(aware, ndim=res_blk.ndim)
def _check_ndim(self, values, ndim):
"""
ndim inference and validation.
This is overriden by the DatetimeTZBlock to check the case of 2D
data (values.ndim == 2), which should only be allowed if ndim is
also 2.
The case of 1D array is still allowed with both ndim of 1 or 2, as
if the case for other EAs. Therefore, we are only checking
`values.ndim > ndim` instead of `values.ndim != ndim` as for
consolidated blocks.
"""
if ndim is None:
ndim = values.ndim
if values.ndim > ndim:
raise ValueError(
"Wrong number of dimensions. "
f"values.ndim != ndim [{values.ndim} != {ndim}]"
)
return ndim
class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock):
__slots__ = ()
is_timedelta = True
_can_hold_na = True
is_numeric = False
fill_value = np.timedelta64("NaT", "ns")
def _maybe_coerce_values(self, values):
if values.dtype != TD64NS_DTYPE:
# non-nano we will convert to nano
if values.dtype.kind != "m":
# caller is responsible for ensuring timedelta64 dtype
raise TypeError(values.dtype) # pragma: no cover
values = TimedeltaArray._from_sequence(values)._data
if isinstance(values, TimedeltaArray):
values = values._data
assert isinstance(values, np.ndarray), type(values)
return values
@property
def _holder(self):
return TimedeltaArray
def _can_hold_element(self, element: Any) -> bool:
tipo = maybe_infer_dtype_type(element)
if tipo is not None:
return issubclass(tipo.type, np.timedelta64)
elif element is NaT:
return True
elif isinstance(element, (timedelta, np.timedelta64)):
return True
return is_valid_nat_for_dtype(element, self.dtype)
def fillna(self, value, **kwargs):
# TODO(EA2D): if we operated on array_values, TDA.fillna would handle
# raising here.
if is_integer(value):
# Deprecation GH#24694, GH#19233
raise TypeError(
"Passing integers to fillna for timedelta64[ns] dtype is no "
"longer supported. To obtain the old behavior, pass "
"`pd.Timedelta(seconds=n)` instead."
)
return super().fillna(value, **kwargs)
class BoolBlock(NumericBlock):
__slots__ = ()
is_bool = True
_can_hold_na = False
def _can_hold_element(self, element: Any) -> bool:
tipo = maybe_infer_dtype_type(element)
if tipo is not None:
return issubclass(tipo.type, np.bool_)
return isinstance(element, (bool, np.bool_))
class ObjectBlock(Block):
__slots__ = ()
is_object = True
_can_hold_na = True
def _maybe_coerce_values(self, values):
if issubclass(values.dtype.type, str):
values = np.array(values, dtype=object)
return values
@property
def is_bool(self):
"""
we can be a bool if we have only bool values but are of type
object
"""
return lib.is_bool_array(self.values.ravel("K"))
def reduce(self, func, ignore_failures: bool = False) -> List[Block]:
"""
For object-dtype, we operate column-wise.
"""
assert self.ndim == 2
values = self.values
if len(values) > 1:
# split_and_operate expects func with signature (mask, values, inplace)
def mask_func(mask, values, inplace):
if values.ndim == 1:
values = values.reshape(1, -1)
return func(values)
return self.split_and_operate(
None, mask_func, False, ignore_failures=ignore_failures
)
try:
res = func(values)
except TypeError:
if not ignore_failures:
raise
return []
assert isinstance(res, np.ndarray)
assert res.ndim == 1
res = res.reshape(1, -1)
return [self.make_block_same_class(res)]
def convert(
self,
copy: bool = True,
datetime: bool = True,
numeric: bool = True,
timedelta: bool = True,
) -> List["Block"]:
"""
attempt to cast any object types to better types return a copy of
the block (if copy = True) by definition we ARE an ObjectBlock!!!!!
"""
# operate column-by-column
def f(mask, val, idx):
shape = val.shape
values = soft_convert_objects(
val.ravel(),
datetime=datetime,
numeric=numeric,
timedelta=timedelta,
copy=copy,
)
if isinstance(values, np.ndarray):
# TODO(EA2D): allow EA once reshape is supported
values = values.reshape(shape)
return values
if self.ndim == 2:
blocks = self.split_and_operate(None, f, False)
else:
values = f(None, self.values.ravel(), None)
blocks = [self.make_block(values)]
return blocks
def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]:
if downcast is not None:
return blocks
# split and convert the blocks
return extend_blocks([b.convert(datetime=True, numeric=False) for b in blocks])
def _can_hold_element(self, element: Any) -> bool:
return True
def replace(
self,
to_replace,
value,
inplace: bool = False,
regex: bool = False,
) -> List["Block"]:
# Note: the checks we do in NDFrame.replace ensure we never get
# here with listlike to_replace or value, as those cases
# go through _replace_list
regex = _should_use_regex(regex, to_replace)
if regex:
return self._replace_regex(to_replace, value, inplace=inplace)
else:
return super().replace(to_replace, value, inplace=inplace, regex=False)
def _should_use_regex(regex: bool, to_replace: Any) -> bool:
"""
Decide whether to treat `to_replace` as a regular expression.
"""
if is_re(to_replace):
regex = True
regex = regex and is_re_compilable(to_replace)
# Don't use regex if the pattern is empty.
regex = regex and re.compile(to_replace).pattern != ""
return regex
class CategoricalBlock(ExtensionBlock):
__slots__ = ()
def _replace_list(
self,
src_list: List[Any],
dest_list: List[Any],
inplace: bool = False,
regex: bool = False,
) -> List["Block"]:
if len(algos.unique(dest_list)) == 1:
# We likely got here by tiling value inside NDFrame.replace,
# so un-tile here
return self.replace(src_list, dest_list[0], inplace, regex)
return super()._replace_list(src_list, dest_list, inplace, regex)
def replace(
self,
to_replace,
value,
inplace: bool = False,
regex: bool = False,
) -> List["Block"]:
inplace = validate_bool_kwarg(inplace, "inplace")
result = self if inplace else self.copy()
result.values.replace(to_replace, value, inplace=True)
return [result]
# -----------------------------------------------------------------
# Constructor Helpers
def get_block_type(values, dtype=None):
"""
Find the appropriate Block subclass to use for the given values and dtype.
Parameters
----------
values : ndarray-like
dtype : numpy or pandas dtype
Returns
-------
cls : class, subclass of Block
"""
dtype = dtype or values.dtype
vtype = dtype.type
cls: Type[Block]
if is_sparse(dtype):
# Need this first(ish) so that Sparse[datetime] is sparse
cls = ExtensionBlock
elif is_categorical_dtype(values.dtype):
cls = CategoricalBlock
elif issubclass(vtype, np.datetime64):
assert not is_datetime64tz_dtype(values.dtype)
cls = DatetimeBlock
elif is_datetime64tz_dtype(values.dtype):
cls = DatetimeTZBlock
elif is_interval_dtype(dtype) or is_period_dtype(dtype):
cls = ObjectValuesExtensionBlock
elif is_extension_array_dtype(values.dtype):
# Note: need to be sure PandasArray is unwrapped before we get here
cls = ExtensionBlock
elif issubclass(vtype, np.floating):
cls = FloatBlock
elif issubclass(vtype, np.timedelta64):
assert issubclass(vtype, np.integer)
cls = TimeDeltaBlock
elif issubclass(vtype, np.complexfloating):
cls = ComplexBlock
elif issubclass(vtype, np.integer):
cls = IntBlock
elif dtype == np.bool_:
cls = BoolBlock
else:
cls = ObjectBlock
return cls
def make_block(values, placement, klass=None, ndim=None, dtype=None):
# Ensure that we don't allow PandasArray / PandasDtype in internals.
# For now, blocks should be backed by ndarrays when possible.
if isinstance(values, ABCPandasArray):
values = values.to_numpy()
if ndim and ndim > 1:
# TODO(EA2D): special case not needed with 2D EAs
values = np.atleast_2d(values)
if isinstance(dtype, PandasDtype):
dtype = dtype.numpy_dtype
if klass is None:
dtype = dtype or values.dtype
klass = get_block_type(values, dtype)
elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype):
# TODO: This is no longer hit internally; does it need to be retained
# for e.g. pyarrow?
values = DatetimeArray._simple_new(values, dtype=dtype)
return klass(values, ndim=ndim, placement=placement)
# -----------------------------------------------------------------
def extend_blocks(result, blocks=None):
""" return a new extended blocks, given the result """
if blocks is None:
blocks = []
if isinstance(result, list):
for r in result:
if isinstance(r, list):
blocks.extend(r)
else:
blocks.append(r)
else:
assert isinstance(result, Block), type(result)
blocks.append(result)
return blocks
def _block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike:
""" guarantee the shape of the values to be at least 1 d """
if values.ndim < ndim:
shape = values.shape
if not is_extension_array_dtype(values.dtype):
# TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023
# block.shape is incorrect for "2D" ExtensionArrays
# We can't, and don't need to, reshape.
# error: "ExtensionArray" has no attribute "reshape"
values = values.reshape(tuple((1,) + shape)) # type: ignore[attr-defined]
return values
def safe_reshape(arr, new_shape: Shape):
"""
If possible, reshape `arr` to have shape `new_shape`,
with a couple of exceptions (see gh-13012):
1) If `arr` is a ExtensionArray or Index, `arr` will be
returned as is.
2) If `arr` is a Series, the `_values` attribute will
be reshaped and returned.
Parameters
----------
arr : array-like, object to be reshaped
new_shape : int or tuple of ints, the new shape
"""
if isinstance(arr, ABCSeries):
arr = arr._values
if not is_extension_array_dtype(arr.dtype):
# Note: this will include TimedeltaArray and tz-naive DatetimeArray
# TODO(EA2D): special case will be unnecessary with 2D EAs
arr = np.asarray(arr).reshape(new_shape)
return arr
def _putmask_smart(v: np.ndarray, mask: np.ndarray, n) -> np.ndarray:
"""
Return a new ndarray, try to preserve dtype if possible.
Parameters
----------
v : np.ndarray
`values`, updated in-place.
mask : np.ndarray[bool]
Applies to both sides (array like).
n : `new values` either scalar or an array like aligned with `values`
Returns
-------
values : ndarray with updated values
this *may* be a copy of the original
See Also
--------
ndarray.putmask
"""
# we cannot use np.asarray() here as we cannot have conversions
# that numpy does when numeric are mixed with strings
# n should be the length of the mask or a scalar here
if not is_list_like(n):
n = np.repeat(n, len(mask))
# see if we are only masking values that if putted
# will work in the current dtype
try:
nn = n[mask]
except TypeError:
# TypeError: only integer scalar arrays can be converted to a scalar index
pass
else:
# make sure that we have a nullable type
# if we have nulls
if not isna_compat(v, nn[0]):
pass
elif not (is_float_dtype(nn.dtype) or is_integer_dtype(nn.dtype)):
# only compare integers/floats
pass
elif not (is_float_dtype(v.dtype) or is_integer_dtype(v.dtype)):
# only compare integers/floats
pass
else:
# we ignore ComplexWarning here
with warnings.catch_warnings(record=True):
warnings.simplefilter("ignore", np.ComplexWarning)
nn_at = nn.astype(v.dtype)
comp = nn == nn_at
if is_list_like(comp) and comp.all():
nv = v.copy()
nv[mask] = nn_at
return nv
n = np.asarray(n)
def _putmask_preserve(nv, n):
try:
nv[mask] = n[mask]
except (IndexError, ValueError):
nv[mask] = n
return nv
# preserves dtype if possible
if v.dtype.kind == n.dtype.kind:
return _putmask_preserve(v, n)
# change the dtype if needed
dtype, _ = maybe_promote(n.dtype)
v = v.astype(dtype)
return _putmask_preserve(v, n)
def _extract_bool_array(mask: ArrayLike) -> np.ndarray:
"""
If we have a SparseArray or BooleanArray, convert it to ndarray[bool].
"""
if isinstance(mask, ExtensionArray):
# We could have BooleanArray, Sparse[bool], ...
# Except for BooleanArray, this is equivalent to just
# np.asarray(mask, dtype=bool)
mask = mask.to_numpy(dtype=bool, na_value=False)
assert isinstance(mask, np.ndarray), type(mask)
assert mask.dtype == bool, mask.dtype
return mask