projektAI/venv/Lib/site-packages/pandas/core/dtypes/common.py
2021-06-06 22:13:05 +02:00

1817 lines
46 KiB
Python

"""
Common type operations.
"""
from typing import Any, Callable, Union
import warnings
import numpy as np
from pandas._libs import Interval, Period, algos
from pandas._libs.tslibs import conversion
from pandas._typing import ArrayLike, DtypeObj, Optional
from pandas.core.dtypes.base import registry
from pandas.core.dtypes.dtypes import (
CategoricalDtype,
DatetimeTZDtype,
ExtensionDtype,
IntervalDtype,
PeriodDtype,
)
from pandas.core.dtypes.generic import ABCCategorical, ABCIndexClass
from pandas.core.dtypes.inference import ( # noqa:F401
is_array_like,
is_bool,
is_complex,
is_dataclass,
is_decimal,
is_dict_like,
is_file_like,
is_float,
is_hashable,
is_integer,
is_interval,
is_iterator,
is_list_like,
is_named_tuple,
is_nested_list_like,
is_number,
is_re,
is_re_compilable,
is_scalar,
is_sequence,
)
POSSIBLY_CAST_DTYPES = {
np.dtype(t).name
for t in [
"O",
"int8",
"uint8",
"int16",
"uint16",
"int32",
"uint32",
"int64",
"uint64",
]
}
DT64NS_DTYPE = conversion.DT64NS_DTYPE
TD64NS_DTYPE = conversion.TD64NS_DTYPE
INT64_DTYPE = np.dtype(np.int64)
# oh the troubles to reduce import time
_is_scipy_sparse = None
ensure_float64 = algos.ensure_float64
ensure_float32 = algos.ensure_float32
def ensure_float(arr):
"""
Ensure that an array object has a float dtype if possible.
Parameters
----------
arr : array-like
The array whose data type we want to enforce as float.
Returns
-------
float_arr : The original array cast to the float dtype if
possible. Otherwise, the original array is returned.
"""
if is_extension_array_dtype(arr.dtype):
if is_float_dtype(arr.dtype):
arr = arr.to_numpy(dtype=arr.dtype.numpy_dtype, na_value=np.nan)
else:
arr = arr.to_numpy(dtype="float64", na_value=np.nan)
elif issubclass(arr.dtype.type, (np.integer, np.bool_)):
arr = arr.astype(float)
return arr
ensure_uint64 = algos.ensure_uint64
ensure_int64 = algos.ensure_int64
ensure_int32 = algos.ensure_int32
ensure_int16 = algos.ensure_int16
ensure_int8 = algos.ensure_int8
ensure_platform_int = algos.ensure_platform_int
ensure_object = algos.ensure_object
def ensure_str(value: Union[bytes, Any]) -> str:
"""
Ensure that bytes and non-strings get converted into ``str`` objects.
"""
if isinstance(value, bytes):
value = value.decode("utf-8")
elif not isinstance(value, str):
value = str(value)
return value
def ensure_int_or_float(arr: ArrayLike, copy: bool = False) -> np.ndarray:
"""
Ensure that an dtype array of some integer dtype
has an int64 dtype if possible.
If it's not possible, potentially because of overflow,
convert the array to float64 instead.
Parameters
----------
arr : array-like
The array whose data type we want to enforce.
copy: bool
Whether to copy the original array or reuse
it in place, if possible.
Returns
-------
out_arr : The input array cast as int64 if
possible without overflow.
Otherwise the input array cast to float64.
Notes
-----
If the array is explicitly of type uint64 the type
will remain unchanged.
"""
# TODO: GH27506 potential bug with ExtensionArrays
try:
# error: Unexpected keyword argument "casting" for "astype"
return arr.astype("int64", copy=copy, casting="safe") # type: ignore[call-arg]
except TypeError:
pass
try:
# error: Unexpected keyword argument "casting" for "astype"
return arr.astype("uint64", copy=copy, casting="safe") # type: ignore[call-arg]
except TypeError:
if is_extension_array_dtype(arr.dtype):
return arr.to_numpy(dtype="float64", na_value=np.nan)
return arr.astype("float64", copy=copy)
def ensure_python_int(value: Union[int, np.integer]) -> int:
"""
Ensure that a value is a python int.
Parameters
----------
value: int or numpy.integer
Returns
-------
int
Raises
------
TypeError: if the value isn't an int or can't be converted to one.
"""
if not is_scalar(value):
raise TypeError(
f"Value needs to be a scalar value, was type {type(value).__name__}"
)
try:
new_value = int(value)
assert new_value == value
except (TypeError, ValueError, AssertionError) as err:
raise TypeError(f"Wrong type {type(value)} for value {value}") from err
return new_value
def classes(*klasses) -> Callable:
""" evaluate if the tipo is a subclass of the klasses """
return lambda tipo: issubclass(tipo, klasses)
def classes_and_not_datetimelike(*klasses) -> Callable:
"""
evaluate if the tipo is a subclass of the klasses
and not a datetimelike
"""
return lambda tipo: (
issubclass(tipo, klasses)
and not issubclass(tipo, (np.datetime64, np.timedelta64))
)
def is_object_dtype(arr_or_dtype) -> bool:
"""
Check whether an array-like or dtype is of the object dtype.
Parameters
----------
arr_or_dtype : array-like
The array-like or dtype to check.
Returns
-------
boolean
Whether or not the array-like or dtype is of the object dtype.
Examples
--------
>>> is_object_dtype(object)
True
>>> is_object_dtype(int)
False
>>> is_object_dtype(np.array([], dtype=object))
True
>>> is_object_dtype(np.array([], dtype=int))
False
>>> is_object_dtype([1, 2, 3])
False
"""
return _is_dtype_type(arr_or_dtype, classes(np.object_))
def is_sparse(arr) -> bool:
"""
Check whether an array-like is a 1-D pandas sparse array.
Check that the one-dimensional array-like is a pandas sparse array.
Returns True if it is a pandas sparse array, not another type of
sparse array.
Parameters
----------
arr : array-like
Array-like to check.
Returns
-------
bool
Whether or not the array-like is a pandas sparse array.
Examples
--------
Returns `True` if the parameter is a 1-D pandas sparse array.
>>> is_sparse(pd.arrays.SparseArray([0, 0, 1, 0]))
True
>>> is_sparse(pd.Series(pd.arrays.SparseArray([0, 0, 1, 0])))
True
Returns `False` if the parameter is not sparse.
>>> is_sparse(np.array([0, 0, 1, 0]))
False
>>> is_sparse(pd.Series([0, 1, 0, 0]))
False
Returns `False` if the parameter is not a pandas sparse array.
>>> from scipy.sparse import bsr_matrix
>>> is_sparse(bsr_matrix([0, 1, 0, 0]))
False
Returns `False` if the parameter has more than one dimension.
"""
from pandas.core.arrays.sparse import SparseDtype
dtype = getattr(arr, "dtype", arr)
return isinstance(dtype, SparseDtype)
def is_scipy_sparse(arr) -> bool:
"""
Check whether an array-like is a scipy.sparse.spmatrix instance.
Parameters
----------
arr : array-like
The array-like to check.
Returns
-------
boolean
Whether or not the array-like is a scipy.sparse.spmatrix instance.
Notes
-----
If scipy is not installed, this function will always return False.
Examples
--------
>>> from scipy.sparse import bsr_matrix
>>> is_scipy_sparse(bsr_matrix([1, 2, 3]))
True
>>> is_scipy_sparse(pd.arrays.SparseArray([1, 2, 3]))
False
"""
global _is_scipy_sparse
if _is_scipy_sparse is None:
try:
from scipy.sparse import issparse as _is_scipy_sparse
except ImportError:
_is_scipy_sparse = lambda _: False
assert _is_scipy_sparse is not None
return _is_scipy_sparse(arr)
def is_categorical(arr) -> bool:
"""
Check whether an array-like is a Categorical instance.
Parameters
----------
arr : array-like
The array-like to check.
Returns
-------
boolean
Whether or not the array-like is of a Categorical instance.
Examples
--------
>>> is_categorical([1, 2, 3])
False
Categoricals, Series Categoricals, and CategoricalIndex will return True.
>>> cat = pd.Categorical([1, 2, 3])
>>> is_categorical(cat)
True
>>> is_categorical(pd.Series(cat))
True
>>> is_categorical(pd.CategoricalIndex([1, 2, 3]))
True
"""
warnings.warn(
"is_categorical is deprecated and will be removed in a future version. "
"Use is_categorical_dtype instead",
FutureWarning,
stacklevel=2,
)
return isinstance(arr, ABCCategorical) or is_categorical_dtype(arr)
def is_datetime64_dtype(arr_or_dtype) -> bool:
"""
Check whether an array-like or dtype is of the datetime64 dtype.
Parameters
----------
arr_or_dtype : array-like
The array-like or dtype to check.
Returns
-------
boolean
Whether or not the array-like or dtype is of the datetime64 dtype.
Examples
--------
>>> is_datetime64_dtype(object)
False
>>> is_datetime64_dtype(np.datetime64)
True
>>> is_datetime64_dtype(np.array([], dtype=int))
False
>>> is_datetime64_dtype(np.array([], dtype=np.datetime64))
True
>>> is_datetime64_dtype([1, 2, 3])
False
"""
if isinstance(arr_or_dtype, np.dtype):
# GH#33400 fastpath for dtype object
return arr_or_dtype.kind == "M"
return _is_dtype_type(arr_or_dtype, classes(np.datetime64))
def is_datetime64tz_dtype(arr_or_dtype) -> bool:
"""
Check whether an array-like or dtype is of a DatetimeTZDtype dtype.
Parameters
----------
arr_or_dtype : array-like
The array-like or dtype to check.
Returns
-------
boolean
Whether or not the array-like or dtype is of a DatetimeTZDtype dtype.
Examples
--------
>>> is_datetime64tz_dtype(object)
False
>>> is_datetime64tz_dtype([1, 2, 3])
False
>>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) # tz-naive
False
>>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern"))
True
>>> dtype = DatetimeTZDtype("ns", tz="US/Eastern")
>>> s = pd.Series([], dtype=dtype)
>>> is_datetime64tz_dtype(dtype)
True
>>> is_datetime64tz_dtype(s)
True
"""
if isinstance(arr_or_dtype, ExtensionDtype):
# GH#33400 fastpath for dtype object
return arr_or_dtype.kind == "M"
if arr_or_dtype is None:
return False
return DatetimeTZDtype.is_dtype(arr_or_dtype)
def is_timedelta64_dtype(arr_or_dtype) -> bool:
"""
Check whether an array-like or dtype is of the timedelta64 dtype.
Parameters
----------
arr_or_dtype : array-like
The array-like or dtype to check.
Returns
-------
boolean
Whether or not the array-like or dtype is of the timedelta64 dtype.
Examples
--------
>>> is_timedelta64_dtype(object)
False
>>> is_timedelta64_dtype(np.timedelta64)
True
>>> is_timedelta64_dtype([1, 2, 3])
False
>>> is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]"))
True
>>> is_timedelta64_dtype('0 days')
False
"""
if isinstance(arr_or_dtype, np.dtype):
# GH#33400 fastpath for dtype object
return arr_or_dtype.kind == "m"
return _is_dtype_type(arr_or_dtype, classes(np.timedelta64))
def is_period_dtype(arr_or_dtype) -> bool:
"""
Check whether an array-like or dtype is of the Period dtype.
Parameters
----------
arr_or_dtype : array-like
The array-like or dtype to check.
Returns
-------
boolean
Whether or not the array-like or dtype is of the Period dtype.
Examples
--------
>>> is_period_dtype(object)
False
>>> is_period_dtype(PeriodDtype(freq="D"))
True
>>> is_period_dtype([1, 2, 3])
False
>>> is_period_dtype(pd.Period("2017-01-01"))
False
>>> is_period_dtype(pd.PeriodIndex([], freq="A"))
True
"""
if isinstance(arr_or_dtype, ExtensionDtype):
# GH#33400 fastpath for dtype object
return arr_or_dtype.type is Period
if arr_or_dtype is None:
return False
return PeriodDtype.is_dtype(arr_or_dtype)
def is_interval_dtype(arr_or_dtype) -> bool:
"""
Check whether an array-like or dtype is of the Interval dtype.
Parameters
----------
arr_or_dtype : array-like
The array-like or dtype to check.
Returns
-------
boolean
Whether or not the array-like or dtype is of the Interval dtype.
Examples
--------
>>> is_interval_dtype(object)
False
>>> is_interval_dtype(IntervalDtype())
True
>>> is_interval_dtype([1, 2, 3])
False
>>>
>>> interval = pd.Interval(1, 2, closed="right")
>>> is_interval_dtype(interval)
False
>>> is_interval_dtype(pd.IntervalIndex([interval]))
True
"""
if isinstance(arr_or_dtype, ExtensionDtype):
# GH#33400 fastpath for dtype object
return arr_or_dtype.type is Interval
if arr_or_dtype is None:
return False
return IntervalDtype.is_dtype(arr_or_dtype)
def is_categorical_dtype(arr_or_dtype) -> bool:
"""
Check whether an array-like or dtype is of the Categorical dtype.
Parameters
----------
arr_or_dtype : array-like
The array-like or dtype to check.
Returns
-------
boolean
Whether or not the array-like or dtype is of the Categorical dtype.
Examples
--------
>>> is_categorical_dtype(object)
False
>>> is_categorical_dtype(CategoricalDtype())
True
>>> is_categorical_dtype([1, 2, 3])
False
>>> is_categorical_dtype(pd.Categorical([1, 2, 3]))
True
>>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))
True
"""
if isinstance(arr_or_dtype, ExtensionDtype):
# GH#33400 fastpath for dtype object
return arr_or_dtype.name == "category"
if arr_or_dtype is None:
return False
return CategoricalDtype.is_dtype(arr_or_dtype)
def is_string_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of the string dtype.
Parameters
----------
arr_or_dtype : array-like
The array or dtype to check.
Returns
-------
boolean
Whether or not the array or dtype is of the string dtype.
Examples
--------
>>> is_string_dtype(str)
True
>>> is_string_dtype(object)
True
>>> is_string_dtype(int)
False
>>>
>>> is_string_dtype(np.array(['a', 'b']))
True
>>> is_string_dtype(pd.Series([1, 2]))
False
"""
# TODO: gh-15585: consider making the checks stricter.
def condition(dtype) -> bool:
return dtype.kind in ("O", "S", "U") and not is_excluded_dtype(dtype)
def is_excluded_dtype(dtype) -> bool:
"""
These have kind = "O" but aren't string dtypes so need to be explicitly excluded
"""
is_excluded_checks = (is_period_dtype, is_interval_dtype, is_categorical_dtype)
return any(is_excluded(dtype) for is_excluded in is_excluded_checks)
return _is_dtype(arr_or_dtype, condition)
def is_dtype_equal(source, target) -> bool:
"""
Check if two dtypes are equal.
Parameters
----------
source : The first dtype to compare
target : The second dtype to compare
Returns
-------
boolean
Whether or not the two dtypes are equal.
Examples
--------
>>> is_dtype_equal(int, float)
False
>>> is_dtype_equal("int", int)
True
>>> is_dtype_equal(object, "category")
False
>>> is_dtype_equal(CategoricalDtype(), "category")
True
>>> is_dtype_equal(DatetimeTZDtype(tz="UTC"), "datetime64")
False
"""
try:
source = get_dtype(source)
target = get_dtype(target)
return source == target
except (TypeError, AttributeError):
# invalid comparison
# object == category will hit this
return False
def is_any_int_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of an integer dtype.
In this function, timedelta64 instances are also considered "any-integer"
type objects and will return True.
This function is internal and should not be exposed in the public API.
.. versionchanged:: 0.24.0
The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered
as integer by this function.
Parameters
----------
arr_or_dtype : array-like
The array or dtype to check.
Returns
-------
boolean
Whether or not the array or dtype is of an integer dtype.
Examples
--------
>>> is_any_int_dtype(str)
False
>>> is_any_int_dtype(int)
True
>>> is_any_int_dtype(float)
False
>>> is_any_int_dtype(np.uint64)
True
>>> is_any_int_dtype(np.datetime64)
False
>>> is_any_int_dtype(np.timedelta64)
True
>>> is_any_int_dtype(np.array(['a', 'b']))
False
>>> is_any_int_dtype(pd.Series([1, 2]))
True
>>> is_any_int_dtype(np.array([], dtype=np.timedelta64))
True
>>> is_any_int_dtype(pd.Index([1, 2.])) # float
False
"""
return _is_dtype_type(arr_or_dtype, classes(np.integer, np.timedelta64))
def is_integer_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of an integer dtype.
Unlike in `in_any_int_dtype`, timedelta64 instances will return False.
.. versionchanged:: 0.24.0
The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered
as integer by this function.
Parameters
----------
arr_or_dtype : array-like
The array or dtype to check.
Returns
-------
boolean
Whether or not the array or dtype is of an integer dtype and
not an instance of timedelta64.
Examples
--------
>>> is_integer_dtype(str)
False
>>> is_integer_dtype(int)
True
>>> is_integer_dtype(float)
False
>>> is_integer_dtype(np.uint64)
True
>>> is_integer_dtype('int8')
True
>>> is_integer_dtype('Int8')
True
>>> is_integer_dtype(pd.Int8Dtype)
True
>>> is_integer_dtype(np.datetime64)
False
>>> is_integer_dtype(np.timedelta64)
False
>>> is_integer_dtype(np.array(['a', 'b']))
False
>>> is_integer_dtype(pd.Series([1, 2]))
True
>>> is_integer_dtype(np.array([], dtype=np.timedelta64))
False
>>> is_integer_dtype(pd.Index([1, 2.])) # float
False
"""
return _is_dtype_type(arr_or_dtype, classes_and_not_datetimelike(np.integer))
def is_signed_integer_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of a signed integer dtype.
Unlike in `in_any_int_dtype`, timedelta64 instances will return False.
.. versionchanged:: 0.24.0
The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered
as integer by this function.
Parameters
----------
arr_or_dtype : array-like
The array or dtype to check.
Returns
-------
boolean
Whether or not the array or dtype is of a signed integer dtype
and not an instance of timedelta64.
Examples
--------
>>> is_signed_integer_dtype(str)
False
>>> is_signed_integer_dtype(int)
True
>>> is_signed_integer_dtype(float)
False
>>> is_signed_integer_dtype(np.uint64) # unsigned
False
>>> is_signed_integer_dtype('int8')
True
>>> is_signed_integer_dtype('Int8')
True
>>> is_signed_integer_dtype(pd.Int8Dtype)
True
>>> is_signed_integer_dtype(np.datetime64)
False
>>> is_signed_integer_dtype(np.timedelta64)
False
>>> is_signed_integer_dtype(np.array(['a', 'b']))
False
>>> is_signed_integer_dtype(pd.Series([1, 2]))
True
>>> is_signed_integer_dtype(np.array([], dtype=np.timedelta64))
False
>>> is_signed_integer_dtype(pd.Index([1, 2.])) # float
False
>>> is_signed_integer_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned
False
"""
return _is_dtype_type(arr_or_dtype, classes_and_not_datetimelike(np.signedinteger))
def is_unsigned_integer_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of an unsigned integer dtype.
.. versionchanged:: 0.24.0
The nullable Integer dtypes (e.g. pandas.UInt64Dtype) are also
considered as integer by this function.
Parameters
----------
arr_or_dtype : array-like
The array or dtype to check.
Returns
-------
boolean
Whether or not the array or dtype is of an unsigned integer dtype.
Examples
--------
>>> is_unsigned_integer_dtype(str)
False
>>> is_unsigned_integer_dtype(int) # signed
False
>>> is_unsigned_integer_dtype(float)
False
>>> is_unsigned_integer_dtype(np.uint64)
True
>>> is_unsigned_integer_dtype('uint8')
True
>>> is_unsigned_integer_dtype('UInt8')
True
>>> is_unsigned_integer_dtype(pd.UInt8Dtype)
True
>>> is_unsigned_integer_dtype(np.array(['a', 'b']))
False
>>> is_unsigned_integer_dtype(pd.Series([1, 2])) # signed
False
>>> is_unsigned_integer_dtype(pd.Index([1, 2.])) # float
False
>>> is_unsigned_integer_dtype(np.array([1, 2], dtype=np.uint32))
True
"""
return _is_dtype_type(
arr_or_dtype, classes_and_not_datetimelike(np.unsignedinteger)
)
def is_int64_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of the int64 dtype.
Parameters
----------
arr_or_dtype : array-like
The array or dtype to check.
Returns
-------
boolean
Whether or not the array or dtype is of the int64 dtype.
Notes
-----
Depending on system architecture, the return value of `is_int64_dtype(
int)` will be True if the OS uses 64-bit integers and False if the OS
uses 32-bit integers.
Examples
--------
>>> is_int64_dtype(str)
False
>>> is_int64_dtype(np.int32)
False
>>> is_int64_dtype(np.int64)
True
>>> is_int64_dtype('int8')
False
>>> is_int64_dtype('Int8')
False
>>> is_int64_dtype(pd.Int64Dtype)
True
>>> is_int64_dtype(float)
False
>>> is_int64_dtype(np.uint64) # unsigned
False
>>> is_int64_dtype(np.array(['a', 'b']))
False
>>> is_int64_dtype(np.array([1, 2], dtype=np.int64))
True
>>> is_int64_dtype(pd.Index([1, 2.])) # float
False
>>> is_int64_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned
False
"""
return _is_dtype_type(arr_or_dtype, classes(np.int64))
def is_datetime64_any_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of the datetime64 dtype.
Parameters
----------
arr_or_dtype : array-like
The array or dtype to check.
Returns
-------
bool
Whether or not the array or dtype is of the datetime64 dtype.
Examples
--------
>>> is_datetime64_any_dtype(str)
False
>>> is_datetime64_any_dtype(int)
False
>>> is_datetime64_any_dtype(np.datetime64) # can be tz-naive
True
>>> is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern"))
True
>>> is_datetime64_any_dtype(np.array(['a', 'b']))
False
>>> is_datetime64_any_dtype(np.array([1, 2]))
False
>>> is_datetime64_any_dtype(np.array([], dtype="datetime64[ns]"))
True
>>> is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]"))
True
"""
if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)):
# GH#33400 fastpath for dtype object
return arr_or_dtype.kind == "M"
if arr_or_dtype is None:
return False
return is_datetime64_dtype(arr_or_dtype) or is_datetime64tz_dtype(arr_or_dtype)
def is_datetime64_ns_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of the datetime64[ns] dtype.
Parameters
----------
arr_or_dtype : array-like
The array or dtype to check.
Returns
-------
bool
Whether or not the array or dtype is of the datetime64[ns] dtype.
Examples
--------
>>> is_datetime64_ns_dtype(str)
False
>>> is_datetime64_ns_dtype(int)
False
>>> is_datetime64_ns_dtype(np.datetime64) # no unit
False
>>> is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern"))
True
>>> is_datetime64_ns_dtype(np.array(['a', 'b']))
False
>>> is_datetime64_ns_dtype(np.array([1, 2]))
False
>>> is_datetime64_ns_dtype(np.array([], dtype="datetime64")) # no unit
False
>>> is_datetime64_ns_dtype(np.array([], dtype="datetime64[ps]")) # wrong unit
False
>>> is_datetime64_ns_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]"))
True
"""
if arr_or_dtype is None:
return False
try:
tipo = get_dtype(arr_or_dtype)
except TypeError:
if is_datetime64tz_dtype(arr_or_dtype):
tipo = get_dtype(arr_or_dtype.dtype)
else:
return False
return tipo == DT64NS_DTYPE or getattr(tipo, "base", None) == DT64NS_DTYPE
def is_timedelta64_ns_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of the timedelta64[ns] dtype.
This is a very specific dtype, so generic ones like `np.timedelta64`
will return False if passed into this function.
Parameters
----------
arr_or_dtype : array-like
The array or dtype to check.
Returns
-------
boolean
Whether or not the array or dtype is of the timedelta64[ns] dtype.
Examples
--------
>>> is_timedelta64_ns_dtype(np.dtype('m8[ns]'))
True
>>> is_timedelta64_ns_dtype(np.dtype('m8[ps]')) # Wrong frequency
False
>>> is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]'))
True
>>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64))
False
"""
return _is_dtype(arr_or_dtype, lambda dtype: dtype == TD64NS_DTYPE)
def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of
a timedelta64 or datetime64 dtype.
Parameters
----------
arr_or_dtype : array-like
The array or dtype to check.
Returns
-------
boolean
Whether or not the array or dtype is of a timedelta64,
or datetime64 dtype.
Examples
--------
>>> is_datetime_or_timedelta_dtype(str)
False
>>> is_datetime_or_timedelta_dtype(int)
False
>>> is_datetime_or_timedelta_dtype(np.datetime64)
True
>>> is_datetime_or_timedelta_dtype(np.timedelta64)
True
>>> is_datetime_or_timedelta_dtype(np.array(['a', 'b']))
False
>>> is_datetime_or_timedelta_dtype(pd.Series([1, 2]))
False
>>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.timedelta64))
True
>>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64))
True
"""
return _is_dtype_type(arr_or_dtype, classes(np.datetime64, np.timedelta64))
# This exists to silence numpy deprecation warnings, see GH#29553
def is_numeric_v_string_like(a, b):
"""
Check if we are comparing a string-like object to a numeric ndarray.
NumPy doesn't like to compare such objects, especially numeric arrays
and scalar string-likes.
Parameters
----------
a : array-like, scalar
The first object to check.
b : array-like, scalar
The second object to check.
Returns
-------
boolean
Whether we return a comparing a string-like object to a numeric array.
Examples
--------
>>> is_numeric_v_string_like(1, 1)
False
>>> is_numeric_v_string_like("foo", "foo")
False
>>> is_numeric_v_string_like(1, "foo") # non-array numeric
False
>>> is_numeric_v_string_like(np.array([1]), "foo")
True
>>> is_numeric_v_string_like("foo", np.array([1])) # symmetric check
True
>>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"]))
True
>>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2]))
True
>>> is_numeric_v_string_like(np.array([1]), np.array([2]))
False
>>> is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"]))
False
"""
is_a_array = isinstance(a, np.ndarray)
is_b_array = isinstance(b, np.ndarray)
is_a_numeric_array = is_a_array and is_numeric_dtype(a)
is_b_numeric_array = is_b_array and is_numeric_dtype(b)
is_a_string_array = is_a_array and is_string_like_dtype(a)
is_b_string_array = is_b_array and is_string_like_dtype(b)
is_a_scalar_string_like = not is_a_array and isinstance(a, str)
is_b_scalar_string_like = not is_b_array and isinstance(b, str)
return (
(is_a_numeric_array and is_b_scalar_string_like)
or (is_b_numeric_array and is_a_scalar_string_like)
or (is_a_numeric_array and is_b_string_array)
or (is_b_numeric_array and is_a_string_array)
)
# This exists to silence numpy deprecation warnings, see GH#29553
def is_datetimelike_v_numeric(a, b):
"""
Check if we are comparing a datetime-like object to a numeric object.
By "numeric," we mean an object that is either of an int or float dtype.
Parameters
----------
a : array-like, scalar
The first object to check.
b : array-like, scalar
The second object to check.
Returns
-------
boolean
Whether we return a comparing a datetime-like to a numeric object.
Examples
--------
>>> from datetime import datetime
>>> dt = np.datetime64(datetime(2017, 1, 1))
>>>
>>> is_datetimelike_v_numeric(1, 1)
False
>>> is_datetimelike_v_numeric(dt, dt)
False
>>> is_datetimelike_v_numeric(1, dt)
True
>>> is_datetimelike_v_numeric(dt, 1) # symmetric check
True
>>> is_datetimelike_v_numeric(np.array([dt]), 1)
True
>>> is_datetimelike_v_numeric(np.array([1]), dt)
True
>>> is_datetimelike_v_numeric(np.array([dt]), np.array([1]))
True
>>> is_datetimelike_v_numeric(np.array([1]), np.array([2]))
False
>>> is_datetimelike_v_numeric(np.array([dt]), np.array([dt]))
False
"""
if not hasattr(a, "dtype"):
a = np.asarray(a)
if not hasattr(b, "dtype"):
b = np.asarray(b)
def is_numeric(x):
"""
Check if an object has a numeric dtype (i.e. integer or float).
"""
return is_integer_dtype(x) or is_float_dtype(x)
return (needs_i8_conversion(a) and is_numeric(b)) or (
needs_i8_conversion(b) and is_numeric(a)
)
def needs_i8_conversion(arr_or_dtype) -> bool:
"""
Check whether the array or dtype should be converted to int64.
An array-like or dtype "needs" such a conversion if the array-like
or dtype is of a datetime-like dtype
Parameters
----------
arr_or_dtype : array-like
The array or dtype to check.
Returns
-------
boolean
Whether or not the array or dtype should be converted to int64.
Examples
--------
>>> needs_i8_conversion(str)
False
>>> needs_i8_conversion(np.int64)
False
>>> needs_i8_conversion(np.datetime64)
True
>>> needs_i8_conversion(np.array(['a', 'b']))
False
>>> needs_i8_conversion(pd.Series([1, 2]))
False
>>> needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]"))
True
>>> needs_i8_conversion(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern"))
True
"""
if arr_or_dtype is None:
return False
if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)):
# fastpath
dtype = arr_or_dtype
return dtype.kind in ["m", "M"] or dtype.type is Period
return (
is_datetime_or_timedelta_dtype(arr_or_dtype)
or is_datetime64tz_dtype(arr_or_dtype)
or is_period_dtype(arr_or_dtype)
)
def is_numeric_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of a numeric dtype.
Parameters
----------
arr_or_dtype : array-like
The array or dtype to check.
Returns
-------
boolean
Whether or not the array or dtype is of a numeric dtype.
Examples
--------
>>> is_numeric_dtype(str)
False
>>> is_numeric_dtype(int)
True
>>> is_numeric_dtype(float)
True
>>> is_numeric_dtype(np.uint64)
True
>>> is_numeric_dtype(np.datetime64)
False
>>> is_numeric_dtype(np.timedelta64)
False
>>> is_numeric_dtype(np.array(['a', 'b']))
False
>>> is_numeric_dtype(pd.Series([1, 2]))
True
>>> is_numeric_dtype(pd.Index([1, 2.]))
True
>>> is_numeric_dtype(np.array([], dtype=np.timedelta64))
False
"""
return _is_dtype_type(
arr_or_dtype, classes_and_not_datetimelike(np.number, np.bool_)
)
def is_string_like_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of a string-like dtype.
Unlike `is_string_dtype`, the object dtype is excluded because it
is a mixed dtype.
Parameters
----------
arr_or_dtype : array-like
The array or dtype to check.
Returns
-------
boolean
Whether or not the array or dtype is of the string dtype.
Examples
--------
>>> is_string_like_dtype(str)
True
>>> is_string_like_dtype(object)
False
>>> is_string_like_dtype(np.array(['a', 'b']))
True
>>> is_string_like_dtype(pd.Series([1, 2]))
False
"""
return _is_dtype(arr_or_dtype, lambda dtype: dtype.kind in ("S", "U"))
def is_float_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of a float dtype.
This function is internal and should not be exposed in the public API.
Parameters
----------
arr_or_dtype : array-like
The array or dtype to check.
Returns
-------
boolean
Whether or not the array or dtype is of a float dtype.
Examples
--------
>>> is_float_dtype(str)
False
>>> is_float_dtype(int)
False
>>> is_float_dtype(float)
True
>>> is_float_dtype(np.array(['a', 'b']))
False
>>> is_float_dtype(pd.Series([1, 2]))
False
>>> is_float_dtype(pd.Index([1, 2.]))
True
"""
return _is_dtype_type(arr_or_dtype, classes(np.floating))
def is_bool_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of a boolean dtype.
Parameters
----------
arr_or_dtype : array-like
The array or dtype to check.
Returns
-------
boolean
Whether or not the array or dtype is of a boolean dtype.
Notes
-----
An ExtensionArray is considered boolean when the ``_is_boolean``
attribute is set to True.
Examples
--------
>>> is_bool_dtype(str)
False
>>> is_bool_dtype(int)
False
>>> is_bool_dtype(bool)
True
>>> is_bool_dtype(np.bool_)
True
>>> is_bool_dtype(np.array(['a', 'b']))
False
>>> is_bool_dtype(pd.Series([1, 2]))
False
>>> is_bool_dtype(np.array([True, False]))
True
>>> is_bool_dtype(pd.Categorical([True, False]))
True
>>> is_bool_dtype(pd.arrays.SparseArray([True, False]))
True
"""
if arr_or_dtype is None:
return False
try:
dtype = get_dtype(arr_or_dtype)
except (TypeError, ValueError):
return False
if isinstance(arr_or_dtype, CategoricalDtype):
arr_or_dtype = arr_or_dtype.categories
# now we use the special definition for Index
if isinstance(arr_or_dtype, ABCIndexClass):
# TODO(jreback)
# we don't have a boolean Index class
# so its object, we need to infer to
# guess this
return arr_or_dtype.is_object and arr_or_dtype.inferred_type == "boolean"
elif is_extension_array_dtype(arr_or_dtype):
return getattr(dtype, "_is_boolean", False)
return issubclass(dtype.type, np.bool_)
def is_extension_type(arr) -> bool:
"""
Check whether an array-like is of a pandas extension class instance.
.. deprecated:: 1.0.0
Use ``is_extension_array_dtype`` instead.
Extension classes include categoricals, pandas sparse objects (i.e.
classes represented within the pandas library and not ones external
to it like scipy sparse matrices), and datetime-like arrays.
Parameters
----------
arr : array-like
The array-like to check.
Returns
-------
boolean
Whether or not the array-like is of a pandas extension class instance.
Examples
--------
>>> is_extension_type([1, 2, 3])
False
>>> is_extension_type(np.array([1, 2, 3]))
False
>>>
>>> cat = pd.Categorical([1, 2, 3])
>>>
>>> is_extension_type(cat)
True
>>> is_extension_type(pd.Series(cat))
True
>>> is_extension_type(pd.arrays.SparseArray([1, 2, 3]))
True
>>> from scipy.sparse import bsr_matrix
>>> is_extension_type(bsr_matrix([1, 2, 3]))
False
>>> is_extension_type(pd.DatetimeIndex([1, 2, 3]))
False
>>> is_extension_type(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern"))
True
>>>
>>> dtype = DatetimeTZDtype("ns", tz="US/Eastern")
>>> s = pd.Series([], dtype=dtype)
>>> is_extension_type(s)
True
"""
warnings.warn(
"'is_extension_type' is deprecated and will be removed in a future "
"version. Use 'is_extension_array_dtype' instead.",
FutureWarning,
stacklevel=2,
)
if is_categorical_dtype(arr):
return True
elif is_sparse(arr):
return True
elif is_datetime64tz_dtype(arr):
return True
return False
def is_extension_array_dtype(arr_or_dtype) -> bool:
"""
Check if an object is a pandas extension array type.
See the :ref:`Use Guide <extending.extension-types>` for more.
Parameters
----------
arr_or_dtype : object
For array-like input, the ``.dtype`` attribute will
be extracted.
Returns
-------
bool
Whether the `arr_or_dtype` is an extension array type.
Notes
-----
This checks whether an object implements the pandas extension
array interface. In pandas, this includes:
* Categorical
* Sparse
* Interval
* Period
* DatetimeArray
* TimedeltaArray
Third-party libraries may implement arrays or types satisfying
this interface as well.
Examples
--------
>>> from pandas.api.types import is_extension_array_dtype
>>> arr = pd.Categorical(['a', 'b'])
>>> is_extension_array_dtype(arr)
True
>>> is_extension_array_dtype(arr.dtype)
True
>>> arr = np.array(['a', 'b'])
>>> is_extension_array_dtype(arr.dtype)
False
"""
dtype = getattr(arr_or_dtype, "dtype", arr_or_dtype)
return isinstance(dtype, ExtensionDtype) or registry.find(dtype) is not None
def is_complex_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of a complex dtype.
Parameters
----------
arr_or_dtype : array-like
The array or dtype to check.
Returns
-------
boolean
Whether or not the array or dtype is of a complex dtype.
Examples
--------
>>> is_complex_dtype(str)
False
>>> is_complex_dtype(int)
False
>>> is_complex_dtype(np.complex_)
True
>>> is_complex_dtype(np.array(['a', 'b']))
False
>>> is_complex_dtype(pd.Series([1, 2]))
False
>>> is_complex_dtype(np.array([1 + 1j, 5]))
True
"""
return _is_dtype_type(arr_or_dtype, classes(np.complexfloating))
def _is_dtype(arr_or_dtype, condition) -> bool:
"""
Return a boolean if the condition is satisfied for the arr_or_dtype.
Parameters
----------
arr_or_dtype : array-like, str, np.dtype, or ExtensionArrayType
The array-like or dtype object whose dtype we want to extract.
condition : callable[Union[np.dtype, ExtensionDtype]]
Returns
-------
bool
"""
if arr_or_dtype is None:
return False
try:
dtype = get_dtype(arr_or_dtype)
except (TypeError, ValueError, UnicodeEncodeError):
return False
return condition(dtype)
def get_dtype(arr_or_dtype) -> DtypeObj:
"""
Get the dtype instance associated with an array
or dtype object.
Parameters
----------
arr_or_dtype : array-like
The array-like or dtype object whose dtype we want to extract.
Returns
-------
obj_dtype : The extract dtype instance from the
passed in array or dtype object.
Raises
------
TypeError : The passed in object is None.
"""
if arr_or_dtype is None:
raise TypeError("Cannot deduce dtype from null object")
# fastpath
elif isinstance(arr_or_dtype, np.dtype):
return arr_or_dtype
elif isinstance(arr_or_dtype, type):
return np.dtype(arr_or_dtype)
# if we have an array-like
elif hasattr(arr_or_dtype, "dtype"):
arr_or_dtype = arr_or_dtype.dtype
return pandas_dtype(arr_or_dtype)
def _is_dtype_type(arr_or_dtype, condition) -> bool:
"""
Return a boolean if the condition is satisfied for the arr_or_dtype.
Parameters
----------
arr_or_dtype : array-like
The array-like or dtype object whose dtype we want to extract.
condition : callable[Union[np.dtype, ExtensionDtypeType]]
Returns
-------
bool : if the condition is satisfied for the arr_or_dtype
"""
if arr_or_dtype is None:
return condition(type(None))
# fastpath
if isinstance(arr_or_dtype, np.dtype):
return condition(arr_or_dtype.type)
elif isinstance(arr_or_dtype, type):
if issubclass(arr_or_dtype, ExtensionDtype):
arr_or_dtype = arr_or_dtype.type
return condition(np.dtype(arr_or_dtype).type)
# if we have an array-like
if hasattr(arr_or_dtype, "dtype"):
arr_or_dtype = arr_or_dtype.dtype
# we are not possibly a dtype
elif is_list_like(arr_or_dtype):
return condition(type(None))
try:
tipo = pandas_dtype(arr_or_dtype).type
except (TypeError, ValueError, UnicodeEncodeError):
if is_scalar(arr_or_dtype):
return condition(type(None))
return False
return condition(tipo)
def infer_dtype_from_object(dtype):
"""
Get a numpy dtype.type-style object for a dtype object.
This methods also includes handling of the datetime64[ns] and
datetime64[ns, TZ] objects.
If no dtype can be found, we return ``object``.
Parameters
----------
dtype : dtype, type
The dtype object whose numpy dtype.type-style
object we want to extract.
Returns
-------
dtype_object : The extracted numpy dtype.type-style object.
"""
if isinstance(dtype, type) and issubclass(dtype, np.generic):
# Type object from a dtype
return dtype
elif isinstance(dtype, (np.dtype, ExtensionDtype)):
# dtype object
try:
_validate_date_like_dtype(dtype)
except TypeError:
# Should still pass if we don't have a date-like
pass
return dtype.type
try:
dtype = pandas_dtype(dtype)
except TypeError:
pass
if is_extension_array_dtype(dtype):
return dtype.type
elif isinstance(dtype, str):
# TODO(jreback)
# should deprecate these
if dtype in ["datetimetz", "datetime64tz"]:
return DatetimeTZDtype.type
elif dtype in ["period"]:
raise NotImplementedError
if dtype == "datetime" or dtype == "timedelta":
dtype += "64"
try:
return infer_dtype_from_object(getattr(np, dtype))
except (AttributeError, TypeError):
# Handles cases like get_dtype(int) i.e.,
# Python objects that are valid dtypes
# (unlike user-defined types, in general)
#
# TypeError handles the float16 type code of 'e'
# further handle internal types
pass
return infer_dtype_from_object(np.dtype(dtype))
def _validate_date_like_dtype(dtype) -> None:
"""
Check whether the dtype is a date-like dtype. Raises an error if invalid.
Parameters
----------
dtype : dtype, type
The dtype to check.
Raises
------
TypeError : The dtype could not be casted to a date-like dtype.
ValueError : The dtype is an illegal date-like dtype (e.g. the
frequency provided is too specific)
"""
try:
typ = np.datetime_data(dtype)[0]
except ValueError as e:
raise TypeError(e) from e
if typ != "generic" and typ != "ns":
raise ValueError(
f"{repr(dtype.name)} is too specific of a frequency, "
f"try passing {repr(dtype.type.__name__)}"
)
def validate_all_hashable(*args, error_name: Optional[str] = None) -> None:
"""
Return None if all args are hashable, else raise a TypeError.
Parameters
----------
*args
Arguments to validate.
error_name : str, optional
The name to use if error
Raises
------
TypeError : If an argument is not hashable
Returns
-------
None
"""
if not all(is_hashable(arg) for arg in args):
if error_name:
raise TypeError(f"{error_name} must be a hashable type")
else:
raise TypeError("All elements must be hashable")
def pandas_dtype(dtype) -> DtypeObj:
"""
Convert input into a pandas only dtype object or a numpy dtype object.
Parameters
----------
dtype : object to be converted
Returns
-------
np.dtype or a pandas dtype
Raises
------
TypeError if not a dtype
"""
# short-circuit
if isinstance(dtype, np.ndarray):
return dtype.dtype
elif isinstance(dtype, (np.dtype, ExtensionDtype)):
return dtype
# registered extension types
result = registry.find(dtype)
if result is not None:
return result
# try a numpy dtype
# raise a consistent TypeError if failed
try:
npdtype = np.dtype(dtype)
except SyntaxError as err:
# np.dtype uses `eval` which can raise SyntaxError
raise TypeError(f"data type '{dtype}' not understood") from err
# Any invalid dtype (such as pd.Timestamp) should raise an error.
# np.dtype(invalid_type).kind = 0 for such objects. However, this will
# also catch some valid dtypes such as object, np.object_ and 'object'
# which we safeguard against by catching them earlier and returning
# np.dtype(valid_dtype) before this condition is evaluated.
if is_hashable(dtype) and dtype in [object, np.object_, "object", "O"]:
# check hashability to avoid errors/DeprecationWarning when we get
# here and `dtype` is an array
return npdtype
elif npdtype.kind == "O":
raise TypeError(f"dtype '{dtype}' not understood")
return npdtype