3RNN/Lib/site-packages/pandas/core/internals/blocks.py
2024-05-26 19:49:15 +02:00

2851 lines
96 KiB
Python

from __future__ import annotations
from functools import wraps
import inspect
import re
from typing import (
TYPE_CHECKING,
Any,
Callable,
Literal,
cast,
final,
)
import warnings
import weakref
import numpy as np
from pandas._config import (
get_option,
using_copy_on_write,
warn_copy_on_write,
)
from pandas._libs import (
NaT,
internals as libinternals,
lib,
)
from pandas._libs.internals import (
BlockPlacement,
BlockValuesRefs,
)
from pandas._libs.missing import NA
from pandas._typing import (
ArrayLike,
AxisInt,
DtypeBackend,
DtypeObj,
F,
FillnaOptions,
IgnoreRaise,
InterpolateOptions,
QuantileInterpolation,
Self,
Shape,
npt,
)
from pandas.errors import AbstractMethodError
from pandas.util._decorators import cache_readonly
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import validate_bool_kwarg
from pandas.core.dtypes.astype import (
astype_array_safe,
astype_is_view,
)
from pandas.core.dtypes.cast import (
LossySetitemError,
can_hold_element,
convert_dtypes,
find_result_type,
maybe_downcast_to_dtype,
np_can_hold_element,
)
from pandas.core.dtypes.common import (
is_1d_only_ea_dtype,
is_float_dtype,
is_integer_dtype,
is_list_like,
is_scalar,
is_string_dtype,
)
from pandas.core.dtypes.dtypes import (
DatetimeTZDtype,
ExtensionDtype,
IntervalDtype,
NumpyEADtype,
PeriodDtype,
)
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCIndex,
ABCNumpyExtensionArray,
ABCSeries,
)
from pandas.core.dtypes.missing import (
is_valid_na_for_dtype,
isna,
na_value_for_dtype,
)
from pandas.core import missing
import pandas.core.algorithms as algos
from pandas.core.array_algos.putmask import (
extract_bool_array,
putmask_inplace,
putmask_without_repeat,
setitem_datetimelike_compat,
validate_putmask,
)
from pandas.core.array_algos.quantile import quantile_compat
from pandas.core.array_algos.replace import (
compare_or_regex_search,
replace_regex,
should_use_regex,
)
from pandas.core.array_algos.transforms import shift
from pandas.core.arrays import (
Categorical,
DatetimeArray,
ExtensionArray,
IntervalArray,
NumpyExtensionArray,
PeriodArray,
TimedeltaArray,
)
from pandas.core.base import PandasObject
import pandas.core.common as com
from pandas.core.computation import expressions
from pandas.core.construction import (
ensure_wrapped_if_datetimelike,
extract_array,
)
from pandas.core.indexers import check_setitem_lengths
from pandas.core.indexes.base import get_values_for_csv
if TYPE_CHECKING:
from collections.abc import (
Iterable,
Sequence,
)
from pandas.core.api import Index
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
# comparison is faster than is_object_dtype
_dtype_obj = np.dtype("object")
COW_WARNING_GENERAL_MSG = """\
Setting a value on a view: behaviour will change in pandas 3.0.
You are mutating a Series or DataFrame object, and currently this mutation will
also have effect on other Series or DataFrame objects that share data with this
object. In pandas 3.0 (with Copy-on-Write), updating one Series or DataFrame object
will never modify another.
"""
COW_WARNING_SETITEM_MSG = """\
Setting a value on a view: behaviour will change in pandas 3.0.
Currently, the mutation will also have effect on the object that shares data
with this object. For example, when setting a value in a Series that was
extracted from a column of a DataFrame, that DataFrame will also be updated:
ser = df["col"]
ser[0] = 0 <--- in pandas 2, this also updates `df`
In pandas 3.0 (with Copy-on-Write), updating one Series/DataFrame will never
modify another, and thus in the example above, `df` will not be changed.
"""
def maybe_split(meth: F) -> F:
"""
If we have a multi-column block, split and operate block-wise. Otherwise
use the original method.
"""
@wraps(meth)
def newfunc(self, *args, **kwargs) -> list[Block]:
if self.ndim == 1 or self.shape[0] == 1:
return meth(self, *args, **kwargs)
else:
# Split and operate column-by-column
return self.split_and_operate(meth, *args, **kwargs)
return cast(F, newfunc)
class Block(PandasObject, libinternals.Block):
"""
Canonical n-dimensional unit of homogeneous dtype contained in a pandas
data structure
Index-ignorant; let the container take care of that
"""
values: np.ndarray | ExtensionArray
ndim: int
refs: BlockValuesRefs
__init__: Callable
__slots__ = ()
is_numeric = False
@final
@cache_readonly
def _validate_ndim(self) -> bool:
"""
We validate dimension for blocks that can hold 2D values, which for now
means numpy dtypes or DatetimeTZDtype.
"""
dtype = self.dtype
return not isinstance(dtype, ExtensionDtype) or isinstance(
dtype, DatetimeTZDtype
)
@final
@cache_readonly
def is_object(self) -> bool:
return self.values.dtype == _dtype_obj
@final
@cache_readonly
def is_extension(self) -> bool:
return not lib.is_np_dtype(self.values.dtype)
@final
@cache_readonly
def _can_consolidate(self) -> bool:
# We _could_ consolidate for DatetimeTZDtype but don't for now.
return not self.is_extension
@final
@cache_readonly
def _consolidate_key(self):
return self._can_consolidate, self.dtype.name
@final
@cache_readonly
def _can_hold_na(self) -> bool:
"""
Can we store NA values in this Block?
"""
dtype = self.dtype
if isinstance(dtype, np.dtype):
return dtype.kind not in "iub"
return dtype._can_hold_na
@final
@property
def is_bool(self) -> bool:
"""
We can be bool if a) we are bool dtype or b) object dtype with bool objects.
"""
return self.values.dtype == np.dtype(bool)
@final
def external_values(self):
return external_values(self.values)
@final
@cache_readonly
def fill_value(self):
# Used in reindex_indexer
return na_value_for_dtype(self.dtype, compat=False)
@final
def _standardize_fill_value(self, value):
# if we are passed a scalar None, convert it here
if self.dtype != _dtype_obj and is_valid_na_for_dtype(value, self.dtype):
value = self.fill_value
return value
@property
def mgr_locs(self) -> BlockPlacement:
return self._mgr_locs
@mgr_locs.setter
def mgr_locs(self, new_mgr_locs: BlockPlacement) -> None:
self._mgr_locs = new_mgr_locs
@final
def make_block(
self,
values,
placement: BlockPlacement | None = None,
refs: BlockValuesRefs | None = None,
) -> Block:
"""
Create a new block, with type inference propagate any values that are
not specified
"""
if placement is None:
placement = self._mgr_locs
if self.is_extension:
values = ensure_block_shape(values, ndim=self.ndim)
return new_block(values, placement=placement, ndim=self.ndim, refs=refs)
@final
def make_block_same_class(
self,
values,
placement: BlockPlacement | None = None,
refs: BlockValuesRefs | None = None,
) -> Self:
"""Wrap given values in a block of same type as self."""
# Pre-2.0 we called ensure_wrapped_if_datetimelike because fastparquet
# relied on it, as of 2.0 the caller is responsible for this.
if placement is None:
placement = self._mgr_locs
# We assume maybe_coerce_values has already been called
return type(self)(values, placement=placement, ndim=self.ndim, refs=refs)
@final
def __repr__(self) -> str:
# don't want to print out all of the items here
name = type(self).__name__
if self.ndim == 1:
result = f"{name}: {len(self)} dtype: {self.dtype}"
else:
shape = " x ".join([str(s) for s in self.shape])
result = f"{name}: {self.mgr_locs.indexer}, {shape}, dtype: {self.dtype}"
return result
@final
def __len__(self) -> int:
return len(self.values)
@final
def slice_block_columns(self, slc: slice) -> Self:
"""
Perform __getitem__-like, return result as block.
"""
new_mgr_locs = self._mgr_locs[slc]
new_values = self._slice(slc)
refs = self.refs
return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs)
@final
def take_block_columns(self, indices: npt.NDArray[np.intp]) -> Self:
"""
Perform __getitem__-like, return result as block.
Only supports slices that preserve dimensionality.
"""
# Note: only called from is from internals.concat, and we can verify
# that never happens with 1-column blocks, i.e. never for ExtensionBlock.
new_mgr_locs = self._mgr_locs[indices]
new_values = self._slice(indices)
return type(self)(new_values, new_mgr_locs, self.ndim, refs=None)
@final
def getitem_block_columns(
self, slicer: slice, new_mgr_locs: BlockPlacement, ref_inplace_op: bool = False
) -> Self:
"""
Perform __getitem__-like, return result as block.
Only supports slices that preserve dimensionality.
"""
new_values = self._slice(slicer)
refs = self.refs if not ref_inplace_op or self.refs.has_reference() else None
return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs)
@final
def _can_hold_element(self, element: Any) -> bool:
"""require the same dtype as ourselves"""
element = extract_array(element, extract_numpy=True)
return can_hold_element(self.values, element)
@final
def should_store(self, value: ArrayLike) -> bool:
"""
Should we set self.values[indexer] = value inplace or do we need to cast?
Parameters
----------
value : np.ndarray or ExtensionArray
Returns
-------
bool
"""
return value.dtype == self.dtype
# ---------------------------------------------------------------------
# Apply/Reduce and Helpers
@final
def apply(self, func, **kwargs) -> list[Block]:
"""
apply the function to my values; return a block if we are not
one
"""
result = func(self.values, **kwargs)
result = maybe_coerce_values(result)
return self._split_op_result(result)
@final
def reduce(self, func) -> list[Block]:
# We will apply the function and reshape the result into a single-row
# Block with the same mgr_locs; squeezing will be done at a higher level
assert self.ndim == 2
result = func(self.values)
if self.values.ndim == 1:
res_values = result
else:
res_values = result.reshape(-1, 1)
nb = self.make_block(res_values)
return [nb]
@final
def _split_op_result(self, result: ArrayLike) -> list[Block]:
# See also: split_and_operate
if result.ndim > 1 and isinstance(result.dtype, ExtensionDtype):
# TODO(EA2D): unnecessary with 2D EAs
# if we get a 2D ExtensionArray, we need to split it into 1D pieces
nbs = []
for i, loc in enumerate(self._mgr_locs):
if not is_1d_only_ea_dtype(result.dtype):
vals = result[i : i + 1]
else:
vals = result[i]
bp = BlockPlacement(loc)
block = self.make_block(values=vals, placement=bp)
nbs.append(block)
return nbs
nb = self.make_block(result)
return [nb]
@final
def _split(self) -> list[Block]:
"""
Split a block into a list of single-column blocks.
"""
assert self.ndim == 2
new_blocks = []
for i, ref_loc in enumerate(self._mgr_locs):
vals = self.values[slice(i, i + 1)]
bp = BlockPlacement(ref_loc)
nb = type(self)(vals, placement=bp, ndim=2, refs=self.refs)
new_blocks.append(nb)
return new_blocks
@final
def split_and_operate(self, func, *args, **kwargs) -> list[Block]:
"""
Split the block and apply func column-by-column.
Parameters
----------
func : Block method
*args
**kwargs
Returns
-------
List[Block]
"""
assert self.ndim == 2 and self.shape[0] != 1
res_blocks = []
for nb in self._split():
rbs = func(nb, *args, **kwargs)
res_blocks.extend(rbs)
return res_blocks
# ---------------------------------------------------------------------
# Up/Down-casting
@final
def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
"""
coerce the current block to a dtype compat for other
we will return a block, possibly object, and not raise
we can also safely try to coerce to the same dtype
and will receive the same block
"""
new_dtype = find_result_type(self.values.dtype, other)
if new_dtype == self.dtype:
# GH#52927 avoid RecursionError
raise AssertionError(
"Something has gone wrong, please report a bug at "
"https://github.com/pandas-dev/pandas/issues"
)
# In a future version of pandas, the default will be that
# setting `nan` into an integer series won't raise.
if (
is_scalar(other)
and is_integer_dtype(self.values.dtype)
and isna(other)
and other is not NaT
and not (
isinstance(other, (np.datetime64, np.timedelta64)) and np.isnat(other)
)
):
warn_on_upcast = False
elif (
isinstance(other, np.ndarray)
and other.ndim == 1
and is_integer_dtype(self.values.dtype)
and is_float_dtype(other.dtype)
and lib.has_only_ints_or_nan(other)
):
warn_on_upcast = False
if warn_on_upcast:
warnings.warn(
f"Setting an item of incompatible dtype is deprecated "
"and will raise an error in a future version of pandas. "
f"Value '{other}' has dtype incompatible with {self.values.dtype}, "
"please explicitly cast to a compatible dtype first.",
FutureWarning,
stacklevel=find_stack_level(),
)
if self.values.dtype == new_dtype:
raise AssertionError(
f"Did not expect new dtype {new_dtype} to equal self.dtype "
f"{self.values.dtype}. Please report a bug at "
"https://github.com/pandas-dev/pandas/issues."
)
return self.astype(new_dtype, copy=False)
@final
def _maybe_downcast(
self,
blocks: list[Block],
downcast,
using_cow: bool,
caller: str,
) -> list[Block]:
if downcast is False:
return blocks
if self.dtype == _dtype_obj:
# TODO: does it matter that self.dtype might not match blocks[i].dtype?
# GH#44241 We downcast regardless of the argument;
# respecting 'downcast=None' may be worthwhile at some point,
# but ATM it breaks too much existing code.
# split and convert the blocks
if caller == "fillna" and get_option("future.no_silent_downcasting"):
return blocks
nbs = extend_blocks(
[blk.convert(using_cow=using_cow, copy=not using_cow) for blk in blocks]
)
if caller == "fillna":
if len(nbs) != len(blocks) or not all(
x.dtype == y.dtype for x, y in zip(nbs, blocks)
):
# GH#54261
warnings.warn(
"Downcasting object dtype arrays on .fillna, .ffill, .bfill "
"is deprecated and will change in a future version. "
"Call result.infer_objects(copy=False) instead. "
"To opt-in to the future "
"behavior, set "
"`pd.set_option('future.no_silent_downcasting', True)`",
FutureWarning,
stacklevel=find_stack_level(),
)
return nbs
elif downcast is None:
return blocks
elif caller == "where" and get_option("future.no_silent_downcasting") is True:
return blocks
else:
nbs = extend_blocks([b._downcast_2d(downcast, using_cow) for b in blocks])
# When _maybe_downcast is called with caller="where", it is either
# a) with downcast=False, which is a no-op (the desired future behavior)
# b) with downcast="infer", which is _not_ passed by the user.
# In the latter case the future behavior is to stop doing inference,
# so we issue a warning if and only if some inference occurred.
if caller == "where":
# GH#53656
if len(blocks) != len(nbs) or any(
left.dtype != right.dtype for left, right in zip(blocks, nbs)
):
# In this case _maybe_downcast was _not_ a no-op, so the behavior
# will change, so we issue a warning.
warnings.warn(
"Downcasting behavior in Series and DataFrame methods 'where', "
"'mask', and 'clip' is deprecated. In a future "
"version this will not infer object dtypes or cast all-round "
"floats to integers. Instead call "
"result.infer_objects(copy=False) for object inference, "
"or cast round floats explicitly. To opt-in to the future "
"behavior, set "
"`pd.set_option('future.no_silent_downcasting', True)`",
FutureWarning,
stacklevel=find_stack_level(),
)
return nbs
@final
@maybe_split
def _downcast_2d(self, dtype, using_cow: bool = False) -> list[Block]:
"""
downcast specialized to 2D case post-validation.
Refactored to allow use of maybe_split.
"""
new_values = maybe_downcast_to_dtype(self.values, dtype=dtype)
new_values = maybe_coerce_values(new_values)
refs = self.refs if new_values is self.values else None
return [self.make_block(new_values, refs=refs)]
@final
def convert(
self,
*,
copy: bool = True,
using_cow: bool = False,
) -> list[Block]:
"""
Attempt to coerce any object types to better types. Return a copy
of the block (if copy = True).
"""
if not self.is_object:
if not copy and using_cow:
return [self.copy(deep=False)]
return [self.copy()] if copy else [self]
if self.ndim != 1 and self.shape[0] != 1:
blocks = self.split_and_operate(
Block.convert, copy=copy, using_cow=using_cow
)
if all(blk.dtype.kind == "O" for blk in blocks):
# Avoid fragmenting the block if convert is a no-op
if using_cow:
return [self.copy(deep=False)]
return [self.copy()] if copy else [self]
return blocks
values = self.values
if values.ndim == 2:
# the check above ensures we only get here with values.shape[0] == 1,
# avoid doing .ravel as that might make a copy
values = values[0]
res_values = lib.maybe_convert_objects(
values, # type: ignore[arg-type]
convert_non_numeric=True,
)
refs = None
if copy and res_values is values:
res_values = values.copy()
elif res_values is values:
refs = self.refs
res_values = ensure_block_shape(res_values, self.ndim)
res_values = maybe_coerce_values(res_values)
return [self.make_block(res_values, refs=refs)]
def convert_dtypes(
self,
copy: bool,
using_cow: bool,
infer_objects: bool = True,
convert_string: bool = True,
convert_integer: bool = True,
convert_boolean: bool = True,
convert_floating: bool = True,
dtype_backend: DtypeBackend = "numpy_nullable",
) -> list[Block]:
if infer_objects and self.is_object:
blks = self.convert(copy=False, using_cow=using_cow)
else:
blks = [self]
if not any(
[convert_floating, convert_integer, convert_boolean, convert_string]
):
return [b.copy(deep=copy) for b in blks]
rbs = []
for blk in blks:
# Determine dtype column by column
sub_blks = [blk] if blk.ndim == 1 or self.shape[0] == 1 else blk._split()
dtypes = [
convert_dtypes(
b.values,
convert_string,
convert_integer,
convert_boolean,
convert_floating,
infer_objects,
dtype_backend,
)
for b in sub_blks
]
if all(dtype == self.dtype for dtype in dtypes):
# Avoid block splitting if no dtype changes
rbs.append(blk.copy(deep=copy))
continue
for dtype, b in zip(dtypes, sub_blks):
rbs.append(b.astype(dtype=dtype, copy=copy, squeeze=b.ndim != 1))
return rbs
# ---------------------------------------------------------------------
# Array-Like Methods
@final
@cache_readonly
def dtype(self) -> DtypeObj:
return self.values.dtype
@final
def astype(
self,
dtype: DtypeObj,
copy: bool = False,
errors: IgnoreRaise = "raise",
using_cow: bool = False,
squeeze: bool = False,
) -> Block:
"""
Coerce to the new dtype.
Parameters
----------
dtype : np.dtype or ExtensionDtype
copy : bool, default False
copy if indicated
errors : str, {'raise', 'ignore'}, default 'raise'
- ``raise`` : allow exceptions to be raised
- ``ignore`` : suppress exceptions. On error return original object
using_cow: bool, default False
Signaling if copy on write copy logic is used.
squeeze : bool, default False
squeeze values to ndim=1 if only one column is given
Returns
-------
Block
"""
values = self.values
if squeeze and values.ndim == 2 and is_1d_only_ea_dtype(dtype):
if values.shape[0] != 1:
raise ValueError("Can not squeeze with more than one column.")
values = values[0, :] # type: ignore[call-overload]
new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
new_values = maybe_coerce_values(new_values)
refs = None
if (using_cow or not copy) and astype_is_view(values.dtype, new_values.dtype):
refs = self.refs
newb = self.make_block(new_values, refs=refs)
if newb.shape != self.shape:
raise TypeError(
f"cannot set astype for copy = [{copy}] for dtype "
f"({self.dtype.name} [{self.shape}]) to different shape "
f"({newb.dtype.name} [{newb.shape}])"
)
return newb
@final
def get_values_for_csv(
self, *, float_format, date_format, decimal, na_rep: str = "nan", quoting=None
) -> Block:
"""convert to our native types format"""
result = get_values_for_csv(
self.values,
na_rep=na_rep,
quoting=quoting,
float_format=float_format,
date_format=date_format,
decimal=decimal,
)
return self.make_block(result)
@final
def copy(self, deep: bool = True) -> Self:
"""copy constructor"""
values = self.values
refs: BlockValuesRefs | None
if deep:
values = values.copy()
refs = None
else:
refs = self.refs
return type(self)(values, placement=self._mgr_locs, ndim=self.ndim, refs=refs)
# ---------------------------------------------------------------------
# Copy-on-Write Helpers
@final
def _maybe_copy(self, using_cow: bool, inplace: bool) -> Self:
if using_cow and inplace:
deep = self.refs.has_reference()
blk = self.copy(deep=deep)
else:
blk = self if inplace else self.copy()
return blk
@final
def _get_refs_and_copy(self, using_cow: bool, inplace: bool):
refs = None
copy = not inplace
if inplace:
if using_cow and self.refs.has_reference():
copy = True
else:
refs = self.refs
return copy, refs
# ---------------------------------------------------------------------
# Replace
@final
def replace(
self,
to_replace,
value,
inplace: bool = False,
# mask may be pre-computed if we're called from replace_list
mask: npt.NDArray[np.bool_] | None = None,
using_cow: bool = False,
already_warned=None,
) -> list[Block]:
"""
replace the to_replace value with value, possible to create new
blocks here this is just a call to putmask.
"""
# Note: the checks we do in NDFrame.replace ensure we never get
# here with listlike to_replace or value, as those cases
# go through replace_list
values = self.values
if isinstance(values, Categorical):
# TODO: avoid special-casing
# GH49404
blk = self._maybe_copy(using_cow, inplace)
values = cast(Categorical, blk.values)
values._replace(to_replace=to_replace, value=value, inplace=True)
return [blk]
if not self._can_hold_element(to_replace):
# We cannot hold `to_replace`, so we know immediately that
# replacing it is a no-op.
# Note: If to_replace were a list, NDFrame.replace would call
# replace_list instead of replace.
if using_cow:
return [self.copy(deep=False)]
else:
return [self] if inplace else [self.copy()]
if mask is None:
mask = missing.mask_missing(values, to_replace)
if not mask.any():
# Note: we get here with test_replace_extension_other incorrectly
# bc _can_hold_element is incorrect.
if using_cow:
return [self.copy(deep=False)]
else:
return [self] if inplace else [self.copy()]
elif self._can_hold_element(value):
# TODO(CoW): Maybe split here as well into columns where mask has True
# and rest?
blk = self._maybe_copy(using_cow, inplace)
putmask_inplace(blk.values, mask, value)
if (
inplace
and warn_copy_on_write()
and already_warned is not None
and not already_warned.warned_already
):
if self.refs.has_reference():
warnings.warn(
COW_WARNING_GENERAL_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)
already_warned.warned_already = True
if not (self.is_object and value is None):
# if the user *explicitly* gave None, we keep None, otherwise
# may downcast to NaN
if get_option("future.no_silent_downcasting") is True:
blocks = [blk]
else:
blocks = blk.convert(copy=False, using_cow=using_cow)
if len(blocks) > 1 or blocks[0].dtype != blk.dtype:
warnings.warn(
# GH#54710
"Downcasting behavior in `replace` is deprecated and "
"will be removed in a future version. To retain the old "
"behavior, explicitly call "
"`result.infer_objects(copy=False)`. "
"To opt-in to the future "
"behavior, set "
"`pd.set_option('future.no_silent_downcasting', True)`",
FutureWarning,
stacklevel=find_stack_level(),
)
else:
blocks = [blk]
return blocks
elif self.ndim == 1 or self.shape[0] == 1:
if value is None or value is NA:
blk = self.astype(np.dtype(object))
else:
blk = self.coerce_to_target_dtype(value)
return blk.replace(
to_replace=to_replace,
value=value,
inplace=True,
mask=mask,
)
else:
# split so that we only upcast where necessary
blocks = []
for i, nb in enumerate(self._split()):
blocks.extend(
type(self).replace(
nb,
to_replace=to_replace,
value=value,
inplace=True,
mask=mask[i : i + 1],
using_cow=using_cow,
)
)
return blocks
@final
def _replace_regex(
self,
to_replace,
value,
inplace: bool = False,
mask=None,
using_cow: bool = False,
already_warned=None,
) -> list[Block]:
"""
Replace elements by the given value.
Parameters
----------
to_replace : object or pattern
Scalar to replace or regular expression to match.
value : object
Replacement object.
inplace : bool, default False
Perform inplace modification.
mask : array-like of bool, optional
True indicate corresponding element is ignored.
using_cow: bool, default False
Specifying if copy on write is enabled.
Returns
-------
List[Block]
"""
if not self._can_hold_element(to_replace):
# i.e. only if self.is_object is True, but could in principle include a
# String ExtensionBlock
if using_cow:
return [self.copy(deep=False)]
return [self] if inplace else [self.copy()]
rx = re.compile(to_replace)
block = self._maybe_copy(using_cow, inplace)
replace_regex(block.values, rx, value, mask)
if (
inplace
and warn_copy_on_write()
and already_warned is not None
and not already_warned.warned_already
):
if self.refs.has_reference():
warnings.warn(
COW_WARNING_GENERAL_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)
already_warned.warned_already = True
nbs = block.convert(copy=False, using_cow=using_cow)
opt = get_option("future.no_silent_downcasting")
if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt:
warnings.warn(
# GH#54710
"Downcasting behavior in `replace` is deprecated and "
"will be removed in a future version. To retain the old "
"behavior, explicitly call `result.infer_objects(copy=False)`. "
"To opt-in to the future "
"behavior, set "
"`pd.set_option('future.no_silent_downcasting', True)`",
FutureWarning,
stacklevel=find_stack_level(),
)
return nbs
@final
def replace_list(
self,
src_list: Iterable[Any],
dest_list: Sequence[Any],
inplace: bool = False,
regex: bool = False,
using_cow: bool = False,
already_warned=None,
) -> list[Block]:
"""
See BlockManager.replace_list docstring.
"""
values = self.values
if isinstance(values, Categorical):
# TODO: avoid special-casing
# GH49404
blk = self._maybe_copy(using_cow, inplace)
values = cast(Categorical, blk.values)
values._replace(to_replace=src_list, value=dest_list, inplace=True)
return [blk]
# Exclude anything that we know we won't contain
pairs = [
(x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x)
]
if not len(pairs):
if using_cow:
return [self.copy(deep=False)]
# shortcut, nothing to replace
return [self] if inplace else [self.copy()]
src_len = len(pairs) - 1
if is_string_dtype(values.dtype):
# Calculate the mask once, prior to the call of comp
# in order to avoid repeating the same computations
na_mask = ~isna(values)
masks: Iterable[npt.NDArray[np.bool_]] = (
extract_bool_array(
cast(
ArrayLike,
compare_or_regex_search(
values, s[0], regex=regex, mask=na_mask
),
)
)
for s in pairs
)
else:
# GH#38086 faster if we know we dont need to check for regex
masks = (missing.mask_missing(values, s[0]) for s in pairs)
# Materialize if inplace = True, since the masks can change
# as we replace
if inplace:
masks = list(masks)
if using_cow:
# Don't set up refs here, otherwise we will think that we have
# references when we check again later
rb = [self]
else:
rb = [self if inplace else self.copy()]
if (
inplace
and warn_copy_on_write()
and already_warned is not None
and not already_warned.warned_already
):
if self.refs.has_reference():
warnings.warn(
COW_WARNING_GENERAL_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)
already_warned.warned_already = True
opt = get_option("future.no_silent_downcasting")
for i, ((src, dest), mask) in enumerate(zip(pairs, masks)):
convert = i == src_len # only convert once at the end
new_rb: list[Block] = []
# GH-39338: _replace_coerce can split a block into
# single-column blocks, so track the index so we know
# where to index into the mask
for blk_num, blk in enumerate(rb):
if len(rb) == 1:
m = mask
else:
mib = mask
assert not isinstance(mib, bool)
m = mib[blk_num : blk_num + 1]
# error: Argument "mask" to "_replace_coerce" of "Block" has
# incompatible type "Union[ExtensionArray, ndarray[Any, Any], bool]";
# expected "ndarray[Any, dtype[bool_]]"
result = blk._replace_coerce(
to_replace=src,
value=dest,
mask=m,
inplace=inplace,
regex=regex,
using_cow=using_cow,
)
if using_cow and i != src_len:
# This is ugly, but we have to get rid of intermediate refs
# that did not go out of scope yet, otherwise we will trigger
# many unnecessary copies
for b in result:
ref = weakref.ref(b)
b.refs.referenced_blocks.pop(
b.refs.referenced_blocks.index(ref)
)
if (
not opt
and convert
and blk.is_object
and not all(x is None for x in dest_list)
):
# GH#44498 avoid unwanted cast-back
nbs = []
for res_blk in result:
converted = res_blk.convert(
copy=True and not using_cow, using_cow=using_cow
)
if len(converted) > 1 or converted[0].dtype != res_blk.dtype:
warnings.warn(
# GH#54710
"Downcasting behavior in `replace` is deprecated "
"and will be removed in a future version. To "
"retain the old behavior, explicitly call "
"`result.infer_objects(copy=False)`. "
"To opt-in to the future "
"behavior, set "
"`pd.set_option('future.no_silent_downcasting', True)`",
FutureWarning,
stacklevel=find_stack_level(),
)
nbs.extend(converted)
result = nbs
new_rb.extend(result)
rb = new_rb
return rb
@final
def _replace_coerce(
self,
to_replace,
value,
mask: npt.NDArray[np.bool_],
inplace: bool = True,
regex: bool = False,
using_cow: bool = False,
) -> list[Block]:
"""
Replace value corresponding to the given boolean array with another
value.
Parameters
----------
to_replace : object or pattern
Scalar to replace or regular expression to match.
value : object
Replacement object.
mask : np.ndarray[bool]
True indicate corresponding element is ignored.
inplace : bool, default True
Perform inplace modification.
regex : bool, default False
If true, perform regular expression substitution.
Returns
-------
List[Block]
"""
if should_use_regex(regex, to_replace):
return self._replace_regex(
to_replace,
value,
inplace=inplace,
mask=mask,
)
else:
if value is None:
# gh-45601, gh-45836, gh-46634
if mask.any():
has_ref = self.refs.has_reference()
nb = self.astype(np.dtype(object), copy=False, using_cow=using_cow)
if (nb is self or using_cow) and not inplace:
nb = nb.copy()
elif inplace and has_ref and nb.refs.has_reference() and using_cow:
# no copy in astype and we had refs before
nb = nb.copy()
putmask_inplace(nb.values, mask, value)
return [nb]
if using_cow:
return [self]
return [self] if inplace else [self.copy()]
return self.replace(
to_replace=to_replace,
value=value,
inplace=inplace,
mask=mask,
using_cow=using_cow,
)
# ---------------------------------------------------------------------
# 2D Methods - Shared by NumpyBlock and NDArrayBackedExtensionBlock
# but not ExtensionBlock
def _maybe_squeeze_arg(self, arg: np.ndarray) -> np.ndarray:
"""
For compatibility with 1D-only ExtensionArrays.
"""
return arg
def _unwrap_setitem_indexer(self, indexer):
"""
For compatibility with 1D-only ExtensionArrays.
"""
return indexer
# NB: this cannot be made cache_readonly because in mgr.set_values we pin
# new .values that can have different shape GH#42631
@property
def shape(self) -> Shape:
return self.values.shape
def iget(self, i: int | tuple[int, int] | tuple[slice, int]) -> np.ndarray:
# In the case where we have a tuple[slice, int], the slice will always
# be slice(None)
# Note: only reached with self.ndim == 2
# Invalid index type "Union[int, Tuple[int, int], Tuple[slice, int]]"
# for "Union[ndarray[Any, Any], ExtensionArray]"; expected type
# "Union[int, integer[Any]]"
return self.values[i] # type: ignore[index]
def _slice(
self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp]
) -> ArrayLike:
"""return a slice of my values"""
return self.values[slicer]
def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None:
"""
Modify block values in-place with new item value.
If copy=True, first copy the underlying values in place before modifying
(for Copy-on-Write).
Notes
-----
`set_inplace` never creates a new array or new Block, whereas `setitem`
_may_ create a new array and always creates a new Block.
Caller is responsible for checking values.dtype == self.dtype.
"""
if copy:
self.values = self.values.copy()
self.values[locs] = values
@final
def take_nd(
self,
indexer: npt.NDArray[np.intp],
axis: AxisInt,
new_mgr_locs: BlockPlacement | None = None,
fill_value=lib.no_default,
) -> Block:
"""
Take values according to indexer and return them as a block.
"""
values = self.values
if fill_value is lib.no_default:
fill_value = self.fill_value
allow_fill = False
else:
allow_fill = True
# Note: algos.take_nd has upcast logic similar to coerce_to_target_dtype
new_values = algos.take_nd(
values, indexer, axis=axis, allow_fill=allow_fill, fill_value=fill_value
)
# Called from three places in managers, all of which satisfy
# these assertions
if isinstance(self, ExtensionBlock):
# NB: in this case, the 'axis' kwarg will be ignored in the
# algos.take_nd call above.
assert not (self.ndim == 1 and new_mgr_locs is None)
assert not (axis == 0 and new_mgr_locs is None)
if new_mgr_locs is None:
new_mgr_locs = self._mgr_locs
if new_values.dtype != self.dtype:
return self.make_block(new_values, new_mgr_locs)
else:
return self.make_block_same_class(new_values, new_mgr_locs)
def _unstack(
self,
unstacker,
fill_value,
new_placement: npt.NDArray[np.intp],
needs_masking: npt.NDArray[np.bool_],
):
"""
Return a list of unstacked blocks of self
Parameters
----------
unstacker : reshape._Unstacker
fill_value : int
Only used in ExtensionBlock._unstack
new_placement : np.ndarray[np.intp]
allow_fill : bool
needs_masking : np.ndarray[bool]
Returns
-------
blocks : list of Block
New blocks of unstacked values.
mask : array-like of bool
The mask of columns of `blocks` we should keep.
"""
new_values, mask = unstacker.get_new_values(
self.values.T, fill_value=fill_value
)
mask = mask.any(0)
# TODO: in all tests we have mask.all(); can we rely on that?
# Note: these next two lines ensure that
# mask.sum() == sum(len(nb.mgr_locs) for nb in blocks)
# which the calling function needs in order to pass verify_integrity=False
# to the BlockManager constructor
new_values = new_values.T[mask]
new_placement = new_placement[mask]
bp = BlockPlacement(new_placement)
blocks = [new_block_2d(new_values, placement=bp)]
return blocks, mask
# ---------------------------------------------------------------------
def setitem(self, indexer, value, using_cow: bool = False) -> Block:
"""
Attempt self.values[indexer] = value, possibly creating a new array.
Parameters
----------
indexer : tuple, list-like, array-like, slice, int
The subset of self.values to set
value : object
The value being set
using_cow: bool, default False
Signaling if CoW is used.
Returns
-------
Block
Notes
-----
`indexer` is a direct slice/positional indexer. `value` must
be a compatible shape.
"""
value = self._standardize_fill_value(value)
values = cast(np.ndarray, self.values)
if self.ndim == 2:
values = values.T
# length checking
check_setitem_lengths(indexer, value, values)
if self.dtype != _dtype_obj:
# GH48933: extract_array would convert a pd.Series value to np.ndarray
value = extract_array(value, extract_numpy=True)
try:
casted = np_can_hold_element(values.dtype, value)
except LossySetitemError:
# current dtype cannot store value, coerce to common dtype
nb = self.coerce_to_target_dtype(value, warn_on_upcast=True)
return nb.setitem(indexer, value)
else:
if self.dtype == _dtype_obj:
# TODO: avoid having to construct values[indexer]
vi = values[indexer]
if lib.is_list_like(vi):
# checking lib.is_scalar here fails on
# test_iloc_setitem_custom_object
casted = setitem_datetimelike_compat(values, len(vi), casted)
self = self._maybe_copy(using_cow, inplace=True)
values = cast(np.ndarray, self.values.T)
if isinstance(casted, np.ndarray) and casted.ndim == 1 and len(casted) == 1:
# NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615
casted = casted[0, ...]
try:
values[indexer] = casted
except (TypeError, ValueError) as err:
if is_list_like(casted):
raise ValueError(
"setting an array element with a sequence."
) from err
raise
return self
def putmask(
self, mask, new, using_cow: bool = False, already_warned=None
) -> list[Block]:
"""
putmask the data to the block; it is possible that we may create a
new dtype of block
Return the resulting block(s).
Parameters
----------
mask : np.ndarray[bool], SparseArray[bool], or BooleanArray
new : a ndarray/object
using_cow: bool, default False
Returns
-------
List[Block]
"""
orig_mask = mask
values = cast(np.ndarray, self.values)
mask, noop = validate_putmask(values.T, mask)
assert not isinstance(new, (ABCIndex, ABCSeries, ABCDataFrame))
if new is lib.no_default:
new = self.fill_value
new = self._standardize_fill_value(new)
new = extract_array(new, extract_numpy=True)
if noop:
if using_cow:
return [self.copy(deep=False)]
return [self]
if (
warn_copy_on_write()
and already_warned is not None
and not already_warned.warned_already
):
if self.refs.has_reference():
warnings.warn(
COW_WARNING_GENERAL_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)
already_warned.warned_already = True
try:
casted = np_can_hold_element(values.dtype, new)
self = self._maybe_copy(using_cow, inplace=True)
values = cast(np.ndarray, self.values)
putmask_without_repeat(values.T, mask, casted)
return [self]
except LossySetitemError:
if self.ndim == 1 or self.shape[0] == 1:
# no need to split columns
if not is_list_like(new):
# using just new[indexer] can't save us the need to cast
return self.coerce_to_target_dtype(
new, warn_on_upcast=True
).putmask(mask, new)
else:
indexer = mask.nonzero()[0]
nb = self.setitem(indexer, new[indexer], using_cow=using_cow)
return [nb]
else:
is_array = isinstance(new, np.ndarray)
res_blocks = []
nbs = self._split()
for i, nb in enumerate(nbs):
n = new
if is_array:
# we have a different value per-column
n = new[:, i : i + 1]
submask = orig_mask[:, i : i + 1]
rbs = nb.putmask(submask, n, using_cow=using_cow)
res_blocks.extend(rbs)
return res_blocks
def where(
self, other, cond, _downcast: str | bool = "infer", using_cow: bool = False
) -> list[Block]:
"""
evaluate the block; return result block(s) from the result
Parameters
----------
other : a ndarray/object
cond : np.ndarray[bool], SparseArray[bool], or BooleanArray
_downcast : str or None, default "infer"
Private because we only specify it when calling from fillna.
Returns
-------
List[Block]
"""
assert cond.ndim == self.ndim
assert not isinstance(other, (ABCIndex, ABCSeries, ABCDataFrame))
transpose = self.ndim == 2
cond = extract_bool_array(cond)
# EABlocks override where
values = cast(np.ndarray, self.values)
orig_other = other
if transpose:
values = values.T
icond, noop = validate_putmask(values, ~cond)
if noop:
# GH-39595: Always return a copy; short-circuit up/downcasting
if using_cow:
return [self.copy(deep=False)]
return [self.copy()]
if other is lib.no_default:
other = self.fill_value
other = self._standardize_fill_value(other)
try:
# try/except here is equivalent to a self._can_hold_element check,
# but this gets us back 'casted' which we will reuse below;
# without using 'casted', expressions.where may do unwanted upcasts.
casted = np_can_hold_element(values.dtype, other)
except (ValueError, TypeError, LossySetitemError):
# we cannot coerce, return a compat dtype
if self.ndim == 1 or self.shape[0] == 1:
# no need to split columns
block = self.coerce_to_target_dtype(other)
blocks = block.where(orig_other, cond, using_cow=using_cow)
return self._maybe_downcast(
blocks, downcast=_downcast, using_cow=using_cow, caller="where"
)
else:
# since _maybe_downcast would split blocks anyway, we
# can avoid some potential upcast/downcast by splitting
# on the front end.
is_array = isinstance(other, (np.ndarray, ExtensionArray))
res_blocks = []
nbs = self._split()
for i, nb in enumerate(nbs):
oth = other
if is_array:
# we have a different value per-column
oth = other[:, i : i + 1]
submask = cond[:, i : i + 1]
rbs = nb.where(
oth, submask, _downcast=_downcast, using_cow=using_cow
)
res_blocks.extend(rbs)
return res_blocks
else:
other = casted
alt = setitem_datetimelike_compat(values, icond.sum(), other)
if alt is not other:
if is_list_like(other) and len(other) < len(values):
# call np.where with other to get the appropriate ValueError
np.where(~icond, values, other)
raise NotImplementedError(
"This should not be reached; call to np.where above is "
"expected to raise ValueError. Please report a bug at "
"github.com/pandas-dev/pandas"
)
result = values.copy()
np.putmask(result, icond, alt)
else:
# By the time we get here, we should have all Series/Index
# args extracted to ndarray
if (
is_list_like(other)
and not isinstance(other, np.ndarray)
and len(other) == self.shape[-1]
):
# If we don't do this broadcasting here, then expressions.where
# will broadcast a 1D other to be row-like instead of
# column-like.
other = np.array(other).reshape(values.shape)
# If lengths don't match (or len(other)==1), we will raise
# inside expressions.where, see test_series_where
# Note: expressions.where may upcast.
result = expressions.where(~icond, values, other)
# The np_can_hold_element check _should_ ensure that we always
# have result.dtype == self.dtype here.
if transpose:
result = result.T
return [self.make_block(result)]
def fillna(
self,
value,
limit: int | None = None,
inplace: bool = False,
downcast=None,
using_cow: bool = False,
already_warned=None,
) -> list[Block]:
"""
fillna on the block with the value. If we fail, then convert to
block to hold objects instead and try again
"""
# Caller is responsible for validating limit; if int it is strictly positive
inplace = validate_bool_kwarg(inplace, "inplace")
if not self._can_hold_na:
# can short-circuit the isna call
noop = True
else:
mask = isna(self.values)
mask, noop = validate_putmask(self.values, mask)
if noop:
# we can't process the value, but nothing to do
if inplace:
if using_cow:
return [self.copy(deep=False)]
# Arbitrarily imposing the convention that we ignore downcast
# on no-op when inplace=True
return [self]
else:
# GH#45423 consistent downcasting on no-ops.
nb = self.copy(deep=not using_cow)
nbs = nb._maybe_downcast(
[nb], downcast=downcast, using_cow=using_cow, caller="fillna"
)
return nbs
if limit is not None:
mask[mask.cumsum(self.ndim - 1) > limit] = False
if inplace:
nbs = self.putmask(
mask.T, value, using_cow=using_cow, already_warned=already_warned
)
else:
# without _downcast, we would break
# test_fillna_dtype_conversion_equiv_replace
nbs = self.where(value, ~mask.T, _downcast=False)
# Note: blk._maybe_downcast vs self._maybe_downcast(nbs)
# makes a difference bc blk may have object dtype, which has
# different behavior in _maybe_downcast.
return extend_blocks(
[
blk._maybe_downcast(
[blk], downcast=downcast, using_cow=using_cow, caller="fillna"
)
for blk in nbs
]
)
def pad_or_backfill(
self,
*,
method: FillnaOptions,
axis: AxisInt = 0,
inplace: bool = False,
limit: int | None = None,
limit_area: Literal["inside", "outside"] | None = None,
downcast: Literal["infer"] | None = None,
using_cow: bool = False,
already_warned=None,
) -> list[Block]:
if not self._can_hold_na:
# If there are no NAs, then interpolate is a no-op
if using_cow:
return [self.copy(deep=False)]
return [self] if inplace else [self.copy()]
copy, refs = self._get_refs_and_copy(using_cow, inplace)
# Dispatch to the NumpyExtensionArray method.
# We know self.array_values is a NumpyExtensionArray bc EABlock overrides
vals = cast(NumpyExtensionArray, self.array_values)
if axis == 1:
vals = vals.T
new_values = vals._pad_or_backfill(
method=method,
limit=limit,
limit_area=limit_area,
copy=copy,
)
if (
not copy
and warn_copy_on_write()
and already_warned is not None
and not already_warned.warned_already
):
if self.refs.has_reference():
warnings.warn(
COW_WARNING_GENERAL_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)
already_warned.warned_already = True
if axis == 1:
new_values = new_values.T
data = extract_array(new_values, extract_numpy=True)
nb = self.make_block_same_class(data, refs=refs)
return nb._maybe_downcast([nb], downcast, using_cow, caller="fillna")
@final
def interpolate(
self,
*,
method: InterpolateOptions,
index: Index,
inplace: bool = False,
limit: int | None = None,
limit_direction: Literal["forward", "backward", "both"] = "forward",
limit_area: Literal["inside", "outside"] | None = None,
downcast: Literal["infer"] | None = None,
using_cow: bool = False,
already_warned=None,
**kwargs,
) -> list[Block]:
inplace = validate_bool_kwarg(inplace, "inplace")
# error: Non-overlapping equality check [...]
if method == "asfreq": # type: ignore[comparison-overlap]
# clean_fill_method used to allow this
missing.clean_fill_method(method)
if not self._can_hold_na:
# If there are no NAs, then interpolate is a no-op
if using_cow:
return [self.copy(deep=False)]
return [self] if inplace else [self.copy()]
# TODO(3.0): this case will not be reachable once GH#53638 is enforced
if self.dtype == _dtype_obj:
# only deal with floats
# bc we already checked that can_hold_na, we don't have int dtype here
# test_interp_basic checks that we make a copy here
if using_cow:
return [self.copy(deep=False)]
return [self] if inplace else [self.copy()]
copy, refs = self._get_refs_and_copy(using_cow, inplace)
# Dispatch to the EA method.
new_values = self.array_values.interpolate(
method=method,
axis=self.ndim - 1,
index=index,
limit=limit,
limit_direction=limit_direction,
limit_area=limit_area,
copy=copy,
**kwargs,
)
data = extract_array(new_values, extract_numpy=True)
if (
not copy
and warn_copy_on_write()
and already_warned is not None
and not already_warned.warned_already
):
if self.refs.has_reference():
warnings.warn(
COW_WARNING_GENERAL_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)
already_warned.warned_already = True
nb = self.make_block_same_class(data, refs=refs)
return nb._maybe_downcast([nb], downcast, using_cow, caller="interpolate")
@final
def diff(self, n: int) -> list[Block]:
"""return block for the diff of the values"""
# only reached with ndim == 2
# TODO(EA2D): transpose will be unnecessary with 2D EAs
new_values = algos.diff(self.values.T, n, axis=0).T
return [self.make_block(values=new_values)]
def shift(self, periods: int, fill_value: Any = None) -> list[Block]:
"""shift the block by periods, possibly upcast"""
# convert integer to float if necessary. need to do a lot more than
# that, handle boolean etc also
axis = self.ndim - 1
# Note: periods is never 0 here, as that is handled at the top of
# NDFrame.shift. If that ever changes, we can do a check for periods=0
# and possibly avoid coercing.
if not lib.is_scalar(fill_value) and self.dtype != _dtype_obj:
# with object dtype there is nothing to promote, and the user can
# pass pretty much any weird fill_value they like
# see test_shift_object_non_scalar_fill
raise ValueError("fill_value must be a scalar")
fill_value = self._standardize_fill_value(fill_value)
try:
# error: Argument 1 to "np_can_hold_element" has incompatible type
# "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]"
casted = np_can_hold_element(
self.dtype, fill_value # type: ignore[arg-type]
)
except LossySetitemError:
nb = self.coerce_to_target_dtype(fill_value)
return nb.shift(periods, fill_value=fill_value)
else:
values = cast(np.ndarray, self.values)
new_values = shift(values, periods, axis, casted)
return [self.make_block_same_class(new_values)]
@final
def quantile(
self,
qs: Index, # with dtype float64
interpolation: QuantileInterpolation = "linear",
) -> Block:
"""
compute the quantiles of the
Parameters
----------
qs : Index
The quantiles to be computed in float64.
interpolation : str, default 'linear'
Type of interpolation.
Returns
-------
Block
"""
# We should always have ndim == 2 because Series dispatches to DataFrame
assert self.ndim == 2
assert is_list_like(qs) # caller is responsible for this
result = quantile_compat(self.values, np.asarray(qs._values), interpolation)
# ensure_block_shape needed for cases where we start with EA and result
# is ndarray, e.g. IntegerArray, SparseArray
result = ensure_block_shape(result, ndim=2)
return new_block_2d(result, placement=self._mgr_locs)
@final
def round(self, decimals: int, using_cow: bool = False) -> Self:
"""
Rounds the values.
If the block is not of an integer or float dtype, nothing happens.
This is consistent with DataFrame.round behavivor.
(Note: Series.round would raise)
Parameters
----------
decimals: int,
Number of decimal places to round to.
Caller is responsible for validating this
using_cow: bool,
Whether Copy on Write is enabled right now
"""
if not self.is_numeric or self.is_bool:
return self.copy(deep=not using_cow)
refs = None
# TODO: round only defined on BaseMaskedArray
# Series also does this, so would need to fix both places
# error: Item "ExtensionArray" of "Union[ndarray[Any, Any], ExtensionArray]"
# has no attribute "round"
values = self.values.round(decimals) # type: ignore[union-attr]
if values is self.values:
if not using_cow:
# Normally would need to do this before, but
# numpy only returns same array when round operation
# is no-op
# https://github.com/numpy/numpy/blob/486878b37fc7439a3b2b87747f50db9b62fea8eb/numpy/core/src/multiarray/calculation.c#L625-L636
values = values.copy()
else:
refs = self.refs
return self.make_block_same_class(values, refs=refs)
# ---------------------------------------------------------------------
# Abstract Methods Overridden By EABackedBlock and NumpyBlock
def delete(self, loc) -> list[Block]:
"""Deletes the locs from the block.
We split the block to avoid copying the underlying data. We create new
blocks for every connected segment of the initial block that is not deleted.
The new blocks point to the initial array.
"""
if not is_list_like(loc):
loc = [loc]
if self.ndim == 1:
values = cast(np.ndarray, self.values)
values = np.delete(values, loc)
mgr_locs = self._mgr_locs.delete(loc)
return [type(self)(values, placement=mgr_locs, ndim=self.ndim)]
if np.max(loc) >= self.values.shape[0]:
raise IndexError
# Add one out-of-bounds indexer as maximum to collect
# all columns after our last indexer if any
loc = np.concatenate([loc, [self.values.shape[0]]])
mgr_locs_arr = self._mgr_locs.as_array
new_blocks: list[Block] = []
previous_loc = -1
# TODO(CoW): This is tricky, if parent block goes out of scope
# all split blocks are referencing each other even though they
# don't share data
refs = self.refs if self.refs.has_reference() else None
for idx in loc:
if idx == previous_loc + 1:
# There is no column between current and last idx
pass
else:
# No overload variant of "__getitem__" of "ExtensionArray" matches
# argument type "Tuple[slice, slice]"
values = self.values[previous_loc + 1 : idx, :] # type: ignore[call-overload]
locs = mgr_locs_arr[previous_loc + 1 : idx]
nb = type(self)(
values, placement=BlockPlacement(locs), ndim=self.ndim, refs=refs
)
new_blocks.append(nb)
previous_loc = idx
return new_blocks
@property
def is_view(self) -> bool:
"""return a boolean if I am possibly a view"""
raise AbstractMethodError(self)
@property
def array_values(self) -> ExtensionArray:
"""
The array that Series.array returns. Always an ExtensionArray.
"""
raise AbstractMethodError(self)
def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
"""
return an internal format, currently just the ndarray
this is often overridden to handle to_dense like operations
"""
raise AbstractMethodError(self)
class EABackedBlock(Block):
"""
Mixin for Block subclasses backed by ExtensionArray.
"""
values: ExtensionArray
@final
def shift(self, periods: int, fill_value: Any = None) -> list[Block]:
"""
Shift the block by `periods`.
Dispatches to underlying ExtensionArray and re-boxes in an
ExtensionBlock.
"""
# Transpose since EA.shift is always along axis=0, while we want to shift
# along rows.
new_values = self.values.T.shift(periods=periods, fill_value=fill_value).T
return [self.make_block_same_class(new_values)]
@final
def setitem(self, indexer, value, using_cow: bool = False):
"""
Attempt self.values[indexer] = value, possibly creating a new array.
This differs from Block.setitem by not allowing setitem to change
the dtype of the Block.
Parameters
----------
indexer : tuple, list-like, array-like, slice, int
The subset of self.values to set
value : object
The value being set
using_cow: bool, default False
Signaling if CoW is used.
Returns
-------
Block
Notes
-----
`indexer` is a direct slice/positional indexer. `value` must
be a compatible shape.
"""
orig_indexer = indexer
orig_value = value
indexer = self._unwrap_setitem_indexer(indexer)
value = self._maybe_squeeze_arg(value)
values = self.values
if values.ndim == 2:
# TODO(GH#45419): string[pyarrow] tests break if we transpose
# unconditionally
values = values.T
check_setitem_lengths(indexer, value, values)
try:
values[indexer] = value
except (ValueError, TypeError):
if isinstance(self.dtype, IntervalDtype):
# see TestSetitemFloatIntervalWithIntIntervalValues
nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True)
return nb.setitem(orig_indexer, orig_value)
elif isinstance(self, NDArrayBackedExtensionBlock):
nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True)
return nb.setitem(orig_indexer, orig_value)
else:
raise
else:
return self
@final
def where(
self, other, cond, _downcast: str | bool = "infer", using_cow: bool = False
) -> list[Block]:
# _downcast private bc we only specify it when calling from fillna
arr = self.values.T
cond = extract_bool_array(cond)
orig_other = other
orig_cond = cond
other = self._maybe_squeeze_arg(other)
cond = self._maybe_squeeze_arg(cond)
if other is lib.no_default:
other = self.fill_value
icond, noop = validate_putmask(arr, ~cond)
if noop:
# GH#44181, GH#45135
# Avoid a) raising for Interval/PeriodDtype and b) unnecessary object upcast
if using_cow:
return [self.copy(deep=False)]
return [self.copy()]
try:
res_values = arr._where(cond, other).T
except (ValueError, TypeError):
if self.ndim == 1 or self.shape[0] == 1:
if isinstance(self.dtype, IntervalDtype):
# TestSetitemFloatIntervalWithIntIntervalValues
blk = self.coerce_to_target_dtype(orig_other)
nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)
return self._maybe_downcast(
nbs, downcast=_downcast, using_cow=using_cow, caller="where"
)
elif isinstance(self, NDArrayBackedExtensionBlock):
# NB: not (yet) the same as
# isinstance(values, NDArrayBackedExtensionArray)
blk = self.coerce_to_target_dtype(orig_other)
nbs = blk.where(orig_other, orig_cond, using_cow=using_cow)
return self._maybe_downcast(
nbs, downcast=_downcast, using_cow=using_cow, caller="where"
)
else:
raise
else:
# Same pattern we use in Block.putmask
is_array = isinstance(orig_other, (np.ndarray, ExtensionArray))
res_blocks = []
nbs = self._split()
for i, nb in enumerate(nbs):
n = orig_other
if is_array:
# we have a different value per-column
n = orig_other[:, i : i + 1]
submask = orig_cond[:, i : i + 1]
rbs = nb.where(n, submask, using_cow=using_cow)
res_blocks.extend(rbs)
return res_blocks
nb = self.make_block_same_class(res_values)
return [nb]
@final
def putmask(
self, mask, new, using_cow: bool = False, already_warned=None
) -> list[Block]:
"""
See Block.putmask.__doc__
"""
mask = extract_bool_array(mask)
if new is lib.no_default:
new = self.fill_value
orig_new = new
orig_mask = mask
new = self._maybe_squeeze_arg(new)
mask = self._maybe_squeeze_arg(mask)
if not mask.any():
if using_cow:
return [self.copy(deep=False)]
return [self]
if (
warn_copy_on_write()
and already_warned is not None
and not already_warned.warned_already
):
if self.refs.has_reference():
warnings.warn(
COW_WARNING_GENERAL_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)
already_warned.warned_already = True
self = self._maybe_copy(using_cow, inplace=True)
values = self.values
if values.ndim == 2:
values = values.T
try:
# Caller is responsible for ensuring matching lengths
values._putmask(mask, new)
except (TypeError, ValueError):
if self.ndim == 1 or self.shape[0] == 1:
if isinstance(self.dtype, IntervalDtype):
# Discussion about what we want to support in the general
# case GH#39584
blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True)
return blk.putmask(orig_mask, orig_new)
elif isinstance(self, NDArrayBackedExtensionBlock):
# NB: not (yet) the same as
# isinstance(values, NDArrayBackedExtensionArray)
blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True)
return blk.putmask(orig_mask, orig_new)
else:
raise
else:
# Same pattern we use in Block.putmask
is_array = isinstance(orig_new, (np.ndarray, ExtensionArray))
res_blocks = []
nbs = self._split()
for i, nb in enumerate(nbs):
n = orig_new
if is_array:
# we have a different value per-column
n = orig_new[:, i : i + 1]
submask = orig_mask[:, i : i + 1]
rbs = nb.putmask(submask, n)
res_blocks.extend(rbs)
return res_blocks
return [self]
@final
def delete(self, loc) -> list[Block]:
# This will be unnecessary if/when __array_function__ is implemented
if self.ndim == 1:
values = self.values.delete(loc)
mgr_locs = self._mgr_locs.delete(loc)
return [type(self)(values, placement=mgr_locs, ndim=self.ndim)]
elif self.values.ndim == 1:
# We get here through to_stata
return []
return super().delete(loc)
@final
@cache_readonly
def array_values(self) -> ExtensionArray:
return self.values
@final
def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
"""
return object dtype as boxed values, such as Timestamps/Timedelta
"""
values: ArrayLike = self.values
if dtype == _dtype_obj:
values = values.astype(object)
# TODO(EA2D): reshape not needed with 2D EAs
return np.asarray(values).reshape(self.shape)
@final
def pad_or_backfill(
self,
*,
method: FillnaOptions,
axis: AxisInt = 0,
inplace: bool = False,
limit: int | None = None,
limit_area: Literal["inside", "outside"] | None = None,
downcast: Literal["infer"] | None = None,
using_cow: bool = False,
already_warned=None,
) -> list[Block]:
values = self.values
kwargs: dict[str, Any] = {"method": method, "limit": limit}
if "limit_area" in inspect.signature(values._pad_or_backfill).parameters:
kwargs["limit_area"] = limit_area
elif limit_area is not None:
raise NotImplementedError(
f"{type(values).__name__} does not implement limit_area "
"(added in pandas 2.2). 3rd-party ExtnsionArray authors "
"need to add this argument to _pad_or_backfill."
)
if values.ndim == 2 and axis == 1:
# NDArrayBackedExtensionArray.fillna assumes axis=0
new_values = values.T._pad_or_backfill(**kwargs).T
else:
new_values = values._pad_or_backfill(**kwargs)
return [self.make_block_same_class(new_values)]
class ExtensionBlock(EABackedBlock):
"""
Block for holding extension types.
Notes
-----
This holds all 3rd-party extension array types. It's also the immediate
parent class for our internal extension types' blocks.
ExtensionArrays are limited to 1-D.
"""
values: ExtensionArray
def fillna(
self,
value,
limit: int | None = None,
inplace: bool = False,
downcast=None,
using_cow: bool = False,
already_warned=None,
) -> list[Block]:
if isinstance(self.dtype, IntervalDtype):
# Block.fillna handles coercion (test_fillna_interval)
return super().fillna(
value=value,
limit=limit,
inplace=inplace,
downcast=downcast,
using_cow=using_cow,
already_warned=already_warned,
)
if using_cow and self._can_hold_na and not self.values._hasna:
refs = self.refs
new_values = self.values
else:
copy, refs = self._get_refs_and_copy(using_cow, inplace)
try:
new_values = self.values.fillna(
value=value, method=None, limit=limit, copy=copy
)
except TypeError:
# 3rd party EA that has not implemented copy keyword yet
refs = None
new_values = self.values.fillna(value=value, method=None, limit=limit)
# issue the warning *after* retrying, in case the TypeError
# was caused by an invalid fill_value
warnings.warn(
# GH#53278
"ExtensionArray.fillna added a 'copy' keyword in pandas "
"2.1.0. In a future version, ExtensionArray subclasses will "
"need to implement this keyword or an exception will be "
"raised. In the interim, the keyword is ignored by "
f"{type(self.values).__name__}.",
DeprecationWarning,
stacklevel=find_stack_level(),
)
else:
if (
not copy
and warn_copy_on_write()
and already_warned is not None
and not already_warned.warned_already
):
if self.refs.has_reference():
warnings.warn(
COW_WARNING_GENERAL_MSG,
FutureWarning,
stacklevel=find_stack_level(),
)
already_warned.warned_already = True
nb = self.make_block_same_class(new_values, refs=refs)
return nb._maybe_downcast([nb], downcast, using_cow=using_cow, caller="fillna")
@cache_readonly
def shape(self) -> Shape:
# TODO(EA2D): override unnecessary with 2D EAs
if self.ndim == 1:
return (len(self.values),)
return len(self._mgr_locs), len(self.values)
def iget(self, i: int | tuple[int, int] | tuple[slice, int]):
# In the case where we have a tuple[slice, int], the slice will always
# be slice(None)
# We _could_ make the annotation more specific, but mypy would
# complain about override mismatch:
# Literal[0] | tuple[Literal[0], int] | tuple[slice, int]
# Note: only reached with self.ndim == 2
if isinstance(i, tuple):
# TODO(EA2D): unnecessary with 2D EAs
col, loc = i
if not com.is_null_slice(col) and col != 0:
raise IndexError(f"{self} only contains one item")
if isinstance(col, slice):
# the is_null_slice check above assures that col is slice(None)
# so what we want is a view on all our columns and row loc
if loc < 0:
loc += len(self.values)
# Note: loc:loc+1 vs [[loc]] makes a difference when called
# from fast_xs because we want to get a view back.
return self.values[loc : loc + 1]
return self.values[loc]
else:
if i != 0:
raise IndexError(f"{self} only contains one item")
return self.values
def set_inplace(self, locs, values: ArrayLike, copy: bool = False) -> None:
# When an ndarray, we should have locs.tolist() == [0]
# When a BlockPlacement we should have list(locs) == [0]
if copy:
self.values = self.values.copy()
self.values[:] = values
def _maybe_squeeze_arg(self, arg):
"""
If necessary, squeeze a (N, 1) ndarray to (N,)
"""
# e.g. if we are passed a 2D mask for putmask
if (
isinstance(arg, (np.ndarray, ExtensionArray))
and arg.ndim == self.values.ndim + 1
):
# TODO(EA2D): unnecessary with 2D EAs
assert arg.shape[1] == 1
# error: No overload variant of "__getitem__" of "ExtensionArray"
# matches argument type "Tuple[slice, int]"
arg = arg[:, 0] # type: ignore[call-overload]
elif isinstance(arg, ABCDataFrame):
# 2022-01-06 only reached for setitem
# TODO: should we avoid getting here with DataFrame?
assert arg.shape[1] == 1
arg = arg._ixs(0, axis=1)._values
return arg
def _unwrap_setitem_indexer(self, indexer):
"""
Adapt a 2D-indexer to our 1D values.
This is intended for 'setitem', not 'iget' or '_slice'.
"""
# TODO: ATM this doesn't work for iget/_slice, can we change that?
if isinstance(indexer, tuple) and len(indexer) == 2:
# TODO(EA2D): not needed with 2D EAs
# Should never have length > 2. Caller is responsible for checking.
# Length 1 is reached vis setitem_single_block and setitem_single_column
# each of which pass indexer=(pi,)
if all(isinstance(x, np.ndarray) and x.ndim == 2 for x in indexer):
# GH#44703 went through indexing.maybe_convert_ix
first, second = indexer
if not (
second.size == 1 and (second == 0).all() and first.shape[1] == 1
):
raise NotImplementedError(
"This should not be reached. Please report a bug at "
"github.com/pandas-dev/pandas/"
)
indexer = first[:, 0]
elif lib.is_integer(indexer[1]) and indexer[1] == 0:
# reached via setitem_single_block passing the whole indexer
indexer = indexer[0]
elif com.is_null_slice(indexer[1]):
indexer = indexer[0]
elif is_list_like(indexer[1]) and indexer[1][0] == 0:
indexer = indexer[0]
else:
raise NotImplementedError(
"This should not be reached. Please report a bug at "
"github.com/pandas-dev/pandas/"
)
return indexer
@property
def is_view(self) -> bool:
"""Extension arrays are never treated as views."""
return False
# error: Cannot override writeable attribute with read-only property
@cache_readonly
def is_numeric(self) -> bool: # type: ignore[override]
return self.values.dtype._is_numeric
def _slice(
self, slicer: slice | npt.NDArray[np.bool_] | npt.NDArray[np.intp]
) -> ExtensionArray:
"""
Return a slice of my values.
Parameters
----------
slicer : slice, ndarray[int], or ndarray[bool]
Valid (non-reducing) indexer for self.values.
Returns
-------
ExtensionArray
"""
# Notes: ndarray[bool] is only reachable when via get_rows_with_mask, which
# is only for Series, i.e. self.ndim == 1.
# return same dims as we currently have
if self.ndim == 2:
# reached via getitem_block via _slice_take_blocks_ax0
# TODO(EA2D): won't be necessary with 2D EAs
if not isinstance(slicer, slice):
raise AssertionError(
"invalid slicing for a 1-ndim ExtensionArray", slicer
)
# GH#32959 only full-slicers along fake-dim0 are valid
# TODO(EA2D): won't be necessary with 2D EAs
# range(1) instead of self._mgr_locs to avoid exception on [::-1]
# see test_iloc_getitem_slice_negative_step_ea_block
new_locs = range(1)[slicer]
if not len(new_locs):
raise AssertionError(
"invalid slicing for a 1-ndim ExtensionArray", slicer
)
slicer = slice(None)
return self.values[slicer]
@final
def slice_block_rows(self, slicer: slice) -> Self:
"""
Perform __getitem__-like specialized to slicing along index.
"""
# GH#42787 in principle this is equivalent to values[..., slicer], but we don't
# require subclasses of ExtensionArray to support that form (for now).
new_values = self.values[slicer]
return type(self)(new_values, self._mgr_locs, ndim=self.ndim, refs=self.refs)
def _unstack(
self,
unstacker,
fill_value,
new_placement: npt.NDArray[np.intp],
needs_masking: npt.NDArray[np.bool_],
):
# ExtensionArray-safe unstack.
# We override Block._unstack, which unstacks directly on the
# values of the array. For EA-backed blocks, this would require
# converting to a 2-D ndarray of objects.
# Instead, we unstack an ndarray of integer positions, followed by
# a `take` on the actual values.
# Caller is responsible for ensuring self.shape[-1] == len(unstacker.index)
new_values, mask = unstacker.arange_result
# Note: these next two lines ensure that
# mask.sum() == sum(len(nb.mgr_locs) for nb in blocks)
# which the calling function needs in order to pass verify_integrity=False
# to the BlockManager constructor
new_values = new_values.T[mask]
new_placement = new_placement[mask]
# needs_masking[i] calculated once in BlockManager.unstack tells
# us if there are any -1s in the relevant indices. When False,
# that allows us to go through a faster path in 'take', among
# other things avoiding e.g. Categorical._validate_scalar.
blocks = [
# TODO: could cast to object depending on fill_value?
type(self)(
self.values.take(
indices, allow_fill=needs_masking[i], fill_value=fill_value
),
BlockPlacement(place),
ndim=2,
)
for i, (indices, place) in enumerate(zip(new_values, new_placement))
]
return blocks, mask
class NumpyBlock(Block):
values: np.ndarray
__slots__ = ()
@property
def is_view(self) -> bool:
"""return a boolean if I am possibly a view"""
return self.values.base is not None
@property
def array_values(self) -> ExtensionArray:
return NumpyExtensionArray(self.values)
def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
if dtype == _dtype_obj:
return self.values.astype(_dtype_obj)
return self.values
@cache_readonly
def is_numeric(self) -> bool: # type: ignore[override]
dtype = self.values.dtype
kind = dtype.kind
return kind in "fciub"
class NumericBlock(NumpyBlock):
# this Block type is kept for backwards-compatibility
# TODO(3.0): delete and remove deprecation in __init__.py.
__slots__ = ()
class ObjectBlock(NumpyBlock):
# this Block type is kept for backwards-compatibility
# TODO(3.0): delete and remove deprecation in __init__.py.
__slots__ = ()
class NDArrayBackedExtensionBlock(EABackedBlock):
"""
Block backed by an NDArrayBackedExtensionArray
"""
values: NDArrayBackedExtensionArray
@property
def is_view(self) -> bool:
"""return a boolean if I am possibly a view"""
# check the ndarray values of the DatetimeIndex values
return self.values._ndarray.base is not None
class DatetimeLikeBlock(NDArrayBackedExtensionBlock):
"""Block for datetime64[ns], timedelta64[ns]."""
__slots__ = ()
is_numeric = False
values: DatetimeArray | TimedeltaArray
class DatetimeTZBlock(DatetimeLikeBlock):
"""implement a datetime64 block with a tz attribute"""
values: DatetimeArray
__slots__ = ()
# -----------------------------------------------------------------
# Constructor Helpers
def maybe_coerce_values(values: ArrayLike) -> ArrayLike:
"""
Input validation for values passed to __init__. Ensure that
any datetime64/timedelta64 dtypes are in nanoseconds. Ensure
that we do not have string dtypes.
Parameters
----------
values : np.ndarray or ExtensionArray
Returns
-------
values : np.ndarray or ExtensionArray
"""
# Caller is responsible for ensuring NumpyExtensionArray is already extracted.
if isinstance(values, np.ndarray):
values = ensure_wrapped_if_datetimelike(values)
if issubclass(values.dtype.type, str):
values = np.array(values, dtype=object)
if isinstance(values, (DatetimeArray, TimedeltaArray)) and values.freq is not None:
# freq is only stored in DatetimeIndex/TimedeltaIndex, not in Series/DataFrame
values = values._with_freq(None)
return values
def get_block_type(dtype: DtypeObj) -> type[Block]:
"""
Find the appropriate Block subclass to use for the given values and dtype.
Parameters
----------
dtype : numpy or pandas dtype
Returns
-------
cls : class, subclass of Block
"""
if isinstance(dtype, DatetimeTZDtype):
return DatetimeTZBlock
elif isinstance(dtype, PeriodDtype):
return NDArrayBackedExtensionBlock
elif isinstance(dtype, ExtensionDtype):
# Note: need to be sure NumpyExtensionArray is unwrapped before we get here
return ExtensionBlock
# We use kind checks because it is much more performant
# than is_foo_dtype
kind = dtype.kind
if kind in "Mm":
return DatetimeLikeBlock
return NumpyBlock
def new_block_2d(
values: ArrayLike, placement: BlockPlacement, refs: BlockValuesRefs | None = None
):
# new_block specialized to case with
# ndim=2
# isinstance(placement, BlockPlacement)
# check_ndim/ensure_block_shape already checked
klass = get_block_type(values.dtype)
values = maybe_coerce_values(values)
return klass(values, ndim=2, placement=placement, refs=refs)
def new_block(
values,
placement: BlockPlacement,
*,
ndim: int,
refs: BlockValuesRefs | None = None,
) -> Block:
# caller is responsible for ensuring:
# - values is NOT a NumpyExtensionArray
# - check_ndim/ensure_block_shape already checked
# - maybe_coerce_values already called/unnecessary
klass = get_block_type(values.dtype)
return klass(values, ndim=ndim, placement=placement, refs=refs)
def check_ndim(values, placement: BlockPlacement, ndim: int) -> None:
"""
ndim inference and validation.
Validates that values.ndim and ndim are consistent.
Validates that len(values) and len(placement) are consistent.
Parameters
----------
values : array-like
placement : BlockPlacement
ndim : int
Raises
------
ValueError : the number of dimensions do not match
"""
if values.ndim > ndim:
# Check for both np.ndarray and ExtensionArray
raise ValueError(
"Wrong number of dimensions. "
f"values.ndim > ndim [{values.ndim} > {ndim}]"
)
if not is_1d_only_ea_dtype(values.dtype):
# TODO(EA2D): special case not needed with 2D EAs
if values.ndim != ndim:
raise ValueError(
"Wrong number of dimensions. "
f"values.ndim != ndim [{values.ndim} != {ndim}]"
)
if len(placement) != len(values):
raise ValueError(
f"Wrong number of items passed {len(values)}, "
f"placement implies {len(placement)}"
)
elif ndim == 2 and len(placement) != 1:
# TODO(EA2D): special case unnecessary with 2D EAs
raise ValueError("need to split")
def extract_pandas_array(
values: ArrayLike, dtype: DtypeObj | None, ndim: int
) -> tuple[ArrayLike, DtypeObj | None]:
"""
Ensure that we don't allow NumpyExtensionArray / NumpyEADtype in internals.
"""
# For now, blocks should be backed by ndarrays when possible.
if isinstance(values, ABCNumpyExtensionArray):
values = values.to_numpy()
if ndim and ndim > 1:
# TODO(EA2D): special case not needed with 2D EAs
values = np.atleast_2d(values)
if isinstance(dtype, NumpyEADtype):
dtype = dtype.numpy_dtype
return values, dtype
# -----------------------------------------------------------------
def extend_blocks(result, blocks=None) -> list[Block]:
"""return a new extended blocks, given the result"""
if blocks is None:
blocks = []
if isinstance(result, list):
for r in result:
if isinstance(r, list):
blocks.extend(r)
else:
blocks.append(r)
else:
assert isinstance(result, Block), type(result)
blocks.append(result)
return blocks
def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike:
"""
Reshape if possible to have values.ndim == ndim.
"""
if values.ndim < ndim:
if not is_1d_only_ea_dtype(values.dtype):
# TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023
# block.shape is incorrect for "2D" ExtensionArrays
# We can't, and don't need to, reshape.
values = cast("np.ndarray | DatetimeArray | TimedeltaArray", values)
values = values.reshape(1, -1)
return values
def external_values(values: ArrayLike) -> ArrayLike:
"""
The array that Series.values returns (public attribute).
This has some historical constraints, and is overridden in block
subclasses to return the correct array (e.g. period returns
object ndarray and datetimetz a datetime64[ns] ndarray instead of
proper extension array).
"""
if isinstance(values, (PeriodArray, IntervalArray)):
return values.astype(object)
elif isinstance(values, (DatetimeArray, TimedeltaArray)):
# NB: for datetime64tz this is different from np.asarray(values), since
# that returns an object-dtype ndarray of Timestamps.
# Avoid raising in .astype in casting from dt64tz to dt64
values = values._ndarray
if isinstance(values, np.ndarray) and using_copy_on_write():
values = values.view()
values.flags.writeable = False
# TODO(CoW) we should also mark our ExtensionArrays as read-only
return values