""" Common type operations. """ from typing import Any, Callable, Union import warnings import numpy as np from pandas._libs import Interval, Period, algos from pandas._libs.tslibs import conversion from pandas._typing import ArrayLike, DtypeObj, Optional from pandas.core.dtypes.base import registry from pandas.core.dtypes.dtypes import ( CategoricalDtype, DatetimeTZDtype, ExtensionDtype, IntervalDtype, PeriodDtype, ) from pandas.core.dtypes.generic import ABCCategorical, ABCIndexClass from pandas.core.dtypes.inference import ( # noqa:F401 is_array_like, is_bool, is_complex, is_dataclass, is_decimal, is_dict_like, is_file_like, is_float, is_hashable, is_integer, is_interval, is_iterator, is_list_like, is_named_tuple, is_nested_list_like, is_number, is_re, is_re_compilable, is_scalar, is_sequence, ) POSSIBLY_CAST_DTYPES = { np.dtype(t).name for t in [ "O", "int8", "uint8", "int16", "uint16", "int32", "uint32", "int64", "uint64", ] } DT64NS_DTYPE = conversion.DT64NS_DTYPE TD64NS_DTYPE = conversion.TD64NS_DTYPE INT64_DTYPE = np.dtype(np.int64) # oh the troubles to reduce import time _is_scipy_sparse = None ensure_float64 = algos.ensure_float64 ensure_float32 = algos.ensure_float32 def ensure_float(arr): """ Ensure that an array object has a float dtype if possible. Parameters ---------- arr : array-like The array whose data type we want to enforce as float. Returns ------- float_arr : The original array cast to the float dtype if possible. Otherwise, the original array is returned. """ if is_extension_array_dtype(arr.dtype): if is_float_dtype(arr.dtype): arr = arr.to_numpy(dtype=arr.dtype.numpy_dtype, na_value=np.nan) else: arr = arr.to_numpy(dtype="float64", na_value=np.nan) elif issubclass(arr.dtype.type, (np.integer, np.bool_)): arr = arr.astype(float) return arr ensure_uint64 = algos.ensure_uint64 ensure_int64 = algos.ensure_int64 ensure_int32 = algos.ensure_int32 ensure_int16 = algos.ensure_int16 ensure_int8 = algos.ensure_int8 ensure_platform_int = algos.ensure_platform_int ensure_object = algos.ensure_object def ensure_str(value: Union[bytes, Any]) -> str: """ Ensure that bytes and non-strings get converted into ``str`` objects. """ if isinstance(value, bytes): value = value.decode("utf-8") elif not isinstance(value, str): value = str(value) return value def ensure_int_or_float(arr: ArrayLike, copy: bool = False) -> np.ndarray: """ Ensure that an dtype array of some integer dtype has an int64 dtype if possible. If it's not possible, potentially because of overflow, convert the array to float64 instead. Parameters ---------- arr : array-like The array whose data type we want to enforce. copy: bool Whether to copy the original array or reuse it in place, if possible. Returns ------- out_arr : The input array cast as int64 if possible without overflow. Otherwise the input array cast to float64. Notes ----- If the array is explicitly of type uint64 the type will remain unchanged. """ # TODO: GH27506 potential bug with ExtensionArrays try: # error: Unexpected keyword argument "casting" for "astype" return arr.astype("int64", copy=copy, casting="safe") # type: ignore[call-arg] except TypeError: pass try: # error: Unexpected keyword argument "casting" for "astype" return arr.astype("uint64", copy=copy, casting="safe") # type: ignore[call-arg] except TypeError: if is_extension_array_dtype(arr.dtype): return arr.to_numpy(dtype="float64", na_value=np.nan) return arr.astype("float64", copy=copy) def ensure_python_int(value: Union[int, np.integer]) -> int: """ Ensure that a value is a python int. Parameters ---------- value: int or numpy.integer Returns ------- int Raises ------ TypeError: if the value isn't an int or can't be converted to one. """ if not is_scalar(value): raise TypeError( f"Value needs to be a scalar value, was type {type(value).__name__}" ) try: new_value = int(value) assert new_value == value except (TypeError, ValueError, AssertionError) as err: raise TypeError(f"Wrong type {type(value)} for value {value}") from err return new_value def classes(*klasses) -> Callable: """ evaluate if the tipo is a subclass of the klasses """ return lambda tipo: issubclass(tipo, klasses) def classes_and_not_datetimelike(*klasses) -> Callable: """ evaluate if the tipo is a subclass of the klasses and not a datetimelike """ return lambda tipo: ( issubclass(tipo, klasses) and not issubclass(tipo, (np.datetime64, np.timedelta64)) ) def is_object_dtype(arr_or_dtype) -> bool: """ Check whether an array-like or dtype is of the object dtype. Parameters ---------- arr_or_dtype : array-like The array-like or dtype to check. Returns ------- boolean Whether or not the array-like or dtype is of the object dtype. Examples -------- >>> is_object_dtype(object) True >>> is_object_dtype(int) False >>> is_object_dtype(np.array([], dtype=object)) True >>> is_object_dtype(np.array([], dtype=int)) False >>> is_object_dtype([1, 2, 3]) False """ return _is_dtype_type(arr_or_dtype, classes(np.object_)) def is_sparse(arr) -> bool: """ Check whether an array-like is a 1-D pandas sparse array. Check that the one-dimensional array-like is a pandas sparse array. Returns True if it is a pandas sparse array, not another type of sparse array. Parameters ---------- arr : array-like Array-like to check. Returns ------- bool Whether or not the array-like is a pandas sparse array. Examples -------- Returns `True` if the parameter is a 1-D pandas sparse array. >>> is_sparse(pd.arrays.SparseArray([0, 0, 1, 0])) True >>> is_sparse(pd.Series(pd.arrays.SparseArray([0, 0, 1, 0]))) True Returns `False` if the parameter is not sparse. >>> is_sparse(np.array([0, 0, 1, 0])) False >>> is_sparse(pd.Series([0, 1, 0, 0])) False Returns `False` if the parameter is not a pandas sparse array. >>> from scipy.sparse import bsr_matrix >>> is_sparse(bsr_matrix([0, 1, 0, 0])) False Returns `False` if the parameter has more than one dimension. """ from pandas.core.arrays.sparse import SparseDtype dtype = getattr(arr, "dtype", arr) return isinstance(dtype, SparseDtype) def is_scipy_sparse(arr) -> bool: """ Check whether an array-like is a scipy.sparse.spmatrix instance. Parameters ---------- arr : array-like The array-like to check. Returns ------- boolean Whether or not the array-like is a scipy.sparse.spmatrix instance. Notes ----- If scipy is not installed, this function will always return False. Examples -------- >>> from scipy.sparse import bsr_matrix >>> is_scipy_sparse(bsr_matrix([1, 2, 3])) True >>> is_scipy_sparse(pd.arrays.SparseArray([1, 2, 3])) False """ global _is_scipy_sparse if _is_scipy_sparse is None: try: from scipy.sparse import issparse as _is_scipy_sparse except ImportError: _is_scipy_sparse = lambda _: False assert _is_scipy_sparse is not None return _is_scipy_sparse(arr) def is_categorical(arr) -> bool: """ Check whether an array-like is a Categorical instance. Parameters ---------- arr : array-like The array-like to check. Returns ------- boolean Whether or not the array-like is of a Categorical instance. Examples -------- >>> is_categorical([1, 2, 3]) False Categoricals, Series Categoricals, and CategoricalIndex will return True. >>> cat = pd.Categorical([1, 2, 3]) >>> is_categorical(cat) True >>> is_categorical(pd.Series(cat)) True >>> is_categorical(pd.CategoricalIndex([1, 2, 3])) True """ warnings.warn( "is_categorical is deprecated and will be removed in a future version. " "Use is_categorical_dtype instead", FutureWarning, stacklevel=2, ) return isinstance(arr, ABCCategorical) or is_categorical_dtype(arr) def is_datetime64_dtype(arr_or_dtype) -> bool: """ Check whether an array-like or dtype is of the datetime64 dtype. Parameters ---------- arr_or_dtype : array-like The array-like or dtype to check. Returns ------- boolean Whether or not the array-like or dtype is of the datetime64 dtype. Examples -------- >>> is_datetime64_dtype(object) False >>> is_datetime64_dtype(np.datetime64) True >>> is_datetime64_dtype(np.array([], dtype=int)) False >>> is_datetime64_dtype(np.array([], dtype=np.datetime64)) True >>> is_datetime64_dtype([1, 2, 3]) False """ if isinstance(arr_or_dtype, np.dtype): # GH#33400 fastpath for dtype object return arr_or_dtype.kind == "M" return _is_dtype_type(arr_or_dtype, classes(np.datetime64)) def is_datetime64tz_dtype(arr_or_dtype) -> bool: """ Check whether an array-like or dtype is of a DatetimeTZDtype dtype. Parameters ---------- arr_or_dtype : array-like The array-like or dtype to check. Returns ------- boolean Whether or not the array-like or dtype is of a DatetimeTZDtype dtype. Examples -------- >>> is_datetime64tz_dtype(object) False >>> is_datetime64tz_dtype([1, 2, 3]) False >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) # tz-naive False >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) True >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") >>> s = pd.Series([], dtype=dtype) >>> is_datetime64tz_dtype(dtype) True >>> is_datetime64tz_dtype(s) True """ if isinstance(arr_or_dtype, ExtensionDtype): # GH#33400 fastpath for dtype object return arr_or_dtype.kind == "M" if arr_or_dtype is None: return False return DatetimeTZDtype.is_dtype(arr_or_dtype) def is_timedelta64_dtype(arr_or_dtype) -> bool: """ Check whether an array-like or dtype is of the timedelta64 dtype. Parameters ---------- arr_or_dtype : array-like The array-like or dtype to check. Returns ------- boolean Whether or not the array-like or dtype is of the timedelta64 dtype. Examples -------- >>> is_timedelta64_dtype(object) False >>> is_timedelta64_dtype(np.timedelta64) True >>> is_timedelta64_dtype([1, 2, 3]) False >>> is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) True >>> is_timedelta64_dtype('0 days') False """ if isinstance(arr_or_dtype, np.dtype): # GH#33400 fastpath for dtype object return arr_or_dtype.kind == "m" return _is_dtype_type(arr_or_dtype, classes(np.timedelta64)) def is_period_dtype(arr_or_dtype) -> bool: """ Check whether an array-like or dtype is of the Period dtype. Parameters ---------- arr_or_dtype : array-like The array-like or dtype to check. Returns ------- boolean Whether or not the array-like or dtype is of the Period dtype. Examples -------- >>> is_period_dtype(object) False >>> is_period_dtype(PeriodDtype(freq="D")) True >>> is_period_dtype([1, 2, 3]) False >>> is_period_dtype(pd.Period("2017-01-01")) False >>> is_period_dtype(pd.PeriodIndex([], freq="A")) True """ if isinstance(arr_or_dtype, ExtensionDtype): # GH#33400 fastpath for dtype object return arr_or_dtype.type is Period if arr_or_dtype is None: return False return PeriodDtype.is_dtype(arr_or_dtype) def is_interval_dtype(arr_or_dtype) -> bool: """ Check whether an array-like or dtype is of the Interval dtype. Parameters ---------- arr_or_dtype : array-like The array-like or dtype to check. Returns ------- boolean Whether or not the array-like or dtype is of the Interval dtype. Examples -------- >>> is_interval_dtype(object) False >>> is_interval_dtype(IntervalDtype()) True >>> is_interval_dtype([1, 2, 3]) False >>> >>> interval = pd.Interval(1, 2, closed="right") >>> is_interval_dtype(interval) False >>> is_interval_dtype(pd.IntervalIndex([interval])) True """ if isinstance(arr_or_dtype, ExtensionDtype): # GH#33400 fastpath for dtype object return arr_or_dtype.type is Interval if arr_or_dtype is None: return False return IntervalDtype.is_dtype(arr_or_dtype) def is_categorical_dtype(arr_or_dtype) -> bool: """ Check whether an array-like or dtype is of the Categorical dtype. Parameters ---------- arr_or_dtype : array-like The array-like or dtype to check. Returns ------- boolean Whether or not the array-like or dtype is of the Categorical dtype. Examples -------- >>> is_categorical_dtype(object) False >>> is_categorical_dtype(CategoricalDtype()) True >>> is_categorical_dtype([1, 2, 3]) False >>> is_categorical_dtype(pd.Categorical([1, 2, 3])) True >>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) True """ if isinstance(arr_or_dtype, ExtensionDtype): # GH#33400 fastpath for dtype object return arr_or_dtype.name == "category" if arr_or_dtype is None: return False return CategoricalDtype.is_dtype(arr_or_dtype) def is_string_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of the string dtype. Parameters ---------- arr_or_dtype : array-like The array or dtype to check. Returns ------- boolean Whether or not the array or dtype is of the string dtype. Examples -------- >>> is_string_dtype(str) True >>> is_string_dtype(object) True >>> is_string_dtype(int) False >>> >>> is_string_dtype(np.array(['a', 'b'])) True >>> is_string_dtype(pd.Series([1, 2])) False """ # TODO: gh-15585: consider making the checks stricter. def condition(dtype) -> bool: return dtype.kind in ("O", "S", "U") and not is_excluded_dtype(dtype) def is_excluded_dtype(dtype) -> bool: """ These have kind = "O" but aren't string dtypes so need to be explicitly excluded """ is_excluded_checks = (is_period_dtype, is_interval_dtype, is_categorical_dtype) return any(is_excluded(dtype) for is_excluded in is_excluded_checks) return _is_dtype(arr_or_dtype, condition) def is_dtype_equal(source, target) -> bool: """ Check if two dtypes are equal. Parameters ---------- source : The first dtype to compare target : The second dtype to compare Returns ------- boolean Whether or not the two dtypes are equal. Examples -------- >>> is_dtype_equal(int, float) False >>> is_dtype_equal("int", int) True >>> is_dtype_equal(object, "category") False >>> is_dtype_equal(CategoricalDtype(), "category") True >>> is_dtype_equal(DatetimeTZDtype(tz="UTC"), "datetime64") False """ try: source = get_dtype(source) target = get_dtype(target) return source == target except (TypeError, AttributeError): # invalid comparison # object == category will hit this return False def is_any_int_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of an integer dtype. In this function, timedelta64 instances are also considered "any-integer" type objects and will return True. This function is internal and should not be exposed in the public API. .. versionchanged:: 0.24.0 The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered as integer by this function. Parameters ---------- arr_or_dtype : array-like The array or dtype to check. Returns ------- boolean Whether or not the array or dtype is of an integer dtype. Examples -------- >>> is_any_int_dtype(str) False >>> is_any_int_dtype(int) True >>> is_any_int_dtype(float) False >>> is_any_int_dtype(np.uint64) True >>> is_any_int_dtype(np.datetime64) False >>> is_any_int_dtype(np.timedelta64) True >>> is_any_int_dtype(np.array(['a', 'b'])) False >>> is_any_int_dtype(pd.Series([1, 2])) True >>> is_any_int_dtype(np.array([], dtype=np.timedelta64)) True >>> is_any_int_dtype(pd.Index([1, 2.])) # float False """ return _is_dtype_type(arr_or_dtype, classes(np.integer, np.timedelta64)) def is_integer_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of an integer dtype. Unlike in `in_any_int_dtype`, timedelta64 instances will return False. .. versionchanged:: 0.24.0 The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered as integer by this function. Parameters ---------- arr_or_dtype : array-like The array or dtype to check. Returns ------- boolean Whether or not the array or dtype is of an integer dtype and not an instance of timedelta64. Examples -------- >>> is_integer_dtype(str) False >>> is_integer_dtype(int) True >>> is_integer_dtype(float) False >>> is_integer_dtype(np.uint64) True >>> is_integer_dtype('int8') True >>> is_integer_dtype('Int8') True >>> is_integer_dtype(pd.Int8Dtype) True >>> is_integer_dtype(np.datetime64) False >>> is_integer_dtype(np.timedelta64) False >>> is_integer_dtype(np.array(['a', 'b'])) False >>> is_integer_dtype(pd.Series([1, 2])) True >>> is_integer_dtype(np.array([], dtype=np.timedelta64)) False >>> is_integer_dtype(pd.Index([1, 2.])) # float False """ return _is_dtype_type(arr_or_dtype, classes_and_not_datetimelike(np.integer)) def is_signed_integer_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of a signed integer dtype. Unlike in `in_any_int_dtype`, timedelta64 instances will return False. .. versionchanged:: 0.24.0 The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered as integer by this function. Parameters ---------- arr_or_dtype : array-like The array or dtype to check. Returns ------- boolean Whether or not the array or dtype is of a signed integer dtype and not an instance of timedelta64. Examples -------- >>> is_signed_integer_dtype(str) False >>> is_signed_integer_dtype(int) True >>> is_signed_integer_dtype(float) False >>> is_signed_integer_dtype(np.uint64) # unsigned False >>> is_signed_integer_dtype('int8') True >>> is_signed_integer_dtype('Int8') True >>> is_signed_integer_dtype(pd.Int8Dtype) True >>> is_signed_integer_dtype(np.datetime64) False >>> is_signed_integer_dtype(np.timedelta64) False >>> is_signed_integer_dtype(np.array(['a', 'b'])) False >>> is_signed_integer_dtype(pd.Series([1, 2])) True >>> is_signed_integer_dtype(np.array([], dtype=np.timedelta64)) False >>> is_signed_integer_dtype(pd.Index([1, 2.])) # float False >>> is_signed_integer_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned False """ return _is_dtype_type(arr_or_dtype, classes_and_not_datetimelike(np.signedinteger)) def is_unsigned_integer_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of an unsigned integer dtype. .. versionchanged:: 0.24.0 The nullable Integer dtypes (e.g. pandas.UInt64Dtype) are also considered as integer by this function. Parameters ---------- arr_or_dtype : array-like The array or dtype to check. Returns ------- boolean Whether or not the array or dtype is of an unsigned integer dtype. Examples -------- >>> is_unsigned_integer_dtype(str) False >>> is_unsigned_integer_dtype(int) # signed False >>> is_unsigned_integer_dtype(float) False >>> is_unsigned_integer_dtype(np.uint64) True >>> is_unsigned_integer_dtype('uint8') True >>> is_unsigned_integer_dtype('UInt8') True >>> is_unsigned_integer_dtype(pd.UInt8Dtype) True >>> is_unsigned_integer_dtype(np.array(['a', 'b'])) False >>> is_unsigned_integer_dtype(pd.Series([1, 2])) # signed False >>> is_unsigned_integer_dtype(pd.Index([1, 2.])) # float False >>> is_unsigned_integer_dtype(np.array([1, 2], dtype=np.uint32)) True """ return _is_dtype_type( arr_or_dtype, classes_and_not_datetimelike(np.unsignedinteger) ) def is_int64_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of the int64 dtype. Parameters ---------- arr_or_dtype : array-like The array or dtype to check. Returns ------- boolean Whether or not the array or dtype is of the int64 dtype. Notes ----- Depending on system architecture, the return value of `is_int64_dtype( int)` will be True if the OS uses 64-bit integers and False if the OS uses 32-bit integers. Examples -------- >>> is_int64_dtype(str) False >>> is_int64_dtype(np.int32) False >>> is_int64_dtype(np.int64) True >>> is_int64_dtype('int8') False >>> is_int64_dtype('Int8') False >>> is_int64_dtype(pd.Int64Dtype) True >>> is_int64_dtype(float) False >>> is_int64_dtype(np.uint64) # unsigned False >>> is_int64_dtype(np.array(['a', 'b'])) False >>> is_int64_dtype(np.array([1, 2], dtype=np.int64)) True >>> is_int64_dtype(pd.Index([1, 2.])) # float False >>> is_int64_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned False """ return _is_dtype_type(arr_or_dtype, classes(np.int64)) def is_datetime64_any_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of the datetime64 dtype. Parameters ---------- arr_or_dtype : array-like The array or dtype to check. Returns ------- bool Whether or not the array or dtype is of the datetime64 dtype. Examples -------- >>> is_datetime64_any_dtype(str) False >>> is_datetime64_any_dtype(int) False >>> is_datetime64_any_dtype(np.datetime64) # can be tz-naive True >>> is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern")) True >>> is_datetime64_any_dtype(np.array(['a', 'b'])) False >>> is_datetime64_any_dtype(np.array([1, 2])) False >>> is_datetime64_any_dtype(np.array([], dtype="datetime64[ns]")) True >>> is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]")) True """ if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)): # GH#33400 fastpath for dtype object return arr_or_dtype.kind == "M" if arr_or_dtype is None: return False return is_datetime64_dtype(arr_or_dtype) or is_datetime64tz_dtype(arr_or_dtype) def is_datetime64_ns_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of the datetime64[ns] dtype. Parameters ---------- arr_or_dtype : array-like The array or dtype to check. Returns ------- bool Whether or not the array or dtype is of the datetime64[ns] dtype. Examples -------- >>> is_datetime64_ns_dtype(str) False >>> is_datetime64_ns_dtype(int) False >>> is_datetime64_ns_dtype(np.datetime64) # no unit False >>> is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern")) True >>> is_datetime64_ns_dtype(np.array(['a', 'b'])) False >>> is_datetime64_ns_dtype(np.array([1, 2])) False >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64")) # no unit False >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64[ps]")) # wrong unit False >>> is_datetime64_ns_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]")) True """ if arr_or_dtype is None: return False try: tipo = get_dtype(arr_or_dtype) except TypeError: if is_datetime64tz_dtype(arr_or_dtype): tipo = get_dtype(arr_or_dtype.dtype) else: return False return tipo == DT64NS_DTYPE or getattr(tipo, "base", None) == DT64NS_DTYPE def is_timedelta64_ns_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of the timedelta64[ns] dtype. This is a very specific dtype, so generic ones like `np.timedelta64` will return False if passed into this function. Parameters ---------- arr_or_dtype : array-like The array or dtype to check. Returns ------- boolean Whether or not the array or dtype is of the timedelta64[ns] dtype. Examples -------- >>> is_timedelta64_ns_dtype(np.dtype('m8[ns]')) True >>> is_timedelta64_ns_dtype(np.dtype('m8[ps]')) # Wrong frequency False >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]')) True >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64)) False """ return _is_dtype(arr_or_dtype, lambda dtype: dtype == TD64NS_DTYPE) def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of a timedelta64 or datetime64 dtype. Parameters ---------- arr_or_dtype : array-like The array or dtype to check. Returns ------- boolean Whether or not the array or dtype is of a timedelta64, or datetime64 dtype. Examples -------- >>> is_datetime_or_timedelta_dtype(str) False >>> is_datetime_or_timedelta_dtype(int) False >>> is_datetime_or_timedelta_dtype(np.datetime64) True >>> is_datetime_or_timedelta_dtype(np.timedelta64) True >>> is_datetime_or_timedelta_dtype(np.array(['a', 'b'])) False >>> is_datetime_or_timedelta_dtype(pd.Series([1, 2])) False >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.timedelta64)) True >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64)) True """ return _is_dtype_type(arr_or_dtype, classes(np.datetime64, np.timedelta64)) # This exists to silence numpy deprecation warnings, see GH#29553 def is_numeric_v_string_like(a, b): """ Check if we are comparing a string-like object to a numeric ndarray. NumPy doesn't like to compare such objects, especially numeric arrays and scalar string-likes. Parameters ---------- a : array-like, scalar The first object to check. b : array-like, scalar The second object to check. Returns ------- boolean Whether we return a comparing a string-like object to a numeric array. Examples -------- >>> is_numeric_v_string_like(1, 1) False >>> is_numeric_v_string_like("foo", "foo") False >>> is_numeric_v_string_like(1, "foo") # non-array numeric False >>> is_numeric_v_string_like(np.array([1]), "foo") True >>> is_numeric_v_string_like("foo", np.array([1])) # symmetric check True >>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) True >>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) True >>> is_numeric_v_string_like(np.array([1]), np.array([2])) False >>> is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) False """ is_a_array = isinstance(a, np.ndarray) is_b_array = isinstance(b, np.ndarray) is_a_numeric_array = is_a_array and is_numeric_dtype(a) is_b_numeric_array = is_b_array and is_numeric_dtype(b) is_a_string_array = is_a_array and is_string_like_dtype(a) is_b_string_array = is_b_array and is_string_like_dtype(b) is_a_scalar_string_like = not is_a_array and isinstance(a, str) is_b_scalar_string_like = not is_b_array and isinstance(b, str) return ( (is_a_numeric_array and is_b_scalar_string_like) or (is_b_numeric_array and is_a_scalar_string_like) or (is_a_numeric_array and is_b_string_array) or (is_b_numeric_array and is_a_string_array) ) # This exists to silence numpy deprecation warnings, see GH#29553 def is_datetimelike_v_numeric(a, b): """ Check if we are comparing a datetime-like object to a numeric object. By "numeric," we mean an object that is either of an int or float dtype. Parameters ---------- a : array-like, scalar The first object to check. b : array-like, scalar The second object to check. Returns ------- boolean Whether we return a comparing a datetime-like to a numeric object. Examples -------- >>> from datetime import datetime >>> dt = np.datetime64(datetime(2017, 1, 1)) >>> >>> is_datetimelike_v_numeric(1, 1) False >>> is_datetimelike_v_numeric(dt, dt) False >>> is_datetimelike_v_numeric(1, dt) True >>> is_datetimelike_v_numeric(dt, 1) # symmetric check True >>> is_datetimelike_v_numeric(np.array([dt]), 1) True >>> is_datetimelike_v_numeric(np.array([1]), dt) True >>> is_datetimelike_v_numeric(np.array([dt]), np.array([1])) True >>> is_datetimelike_v_numeric(np.array([1]), np.array([2])) False >>> is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) False """ if not hasattr(a, "dtype"): a = np.asarray(a) if not hasattr(b, "dtype"): b = np.asarray(b) def is_numeric(x): """ Check if an object has a numeric dtype (i.e. integer or float). """ return is_integer_dtype(x) or is_float_dtype(x) return (needs_i8_conversion(a) and is_numeric(b)) or ( needs_i8_conversion(b) and is_numeric(a) ) def needs_i8_conversion(arr_or_dtype) -> bool: """ Check whether the array or dtype should be converted to int64. An array-like or dtype "needs" such a conversion if the array-like or dtype is of a datetime-like dtype Parameters ---------- arr_or_dtype : array-like The array or dtype to check. Returns ------- boolean Whether or not the array or dtype should be converted to int64. Examples -------- >>> needs_i8_conversion(str) False >>> needs_i8_conversion(np.int64) False >>> needs_i8_conversion(np.datetime64) True >>> needs_i8_conversion(np.array(['a', 'b'])) False >>> needs_i8_conversion(pd.Series([1, 2])) False >>> needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]")) True >>> needs_i8_conversion(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) True """ if arr_or_dtype is None: return False if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)): # fastpath dtype = arr_or_dtype return dtype.kind in ["m", "M"] or dtype.type is Period return ( is_datetime_or_timedelta_dtype(arr_or_dtype) or is_datetime64tz_dtype(arr_or_dtype) or is_period_dtype(arr_or_dtype) ) def is_numeric_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of a numeric dtype. Parameters ---------- arr_or_dtype : array-like The array or dtype to check. Returns ------- boolean Whether or not the array or dtype is of a numeric dtype. Examples -------- >>> is_numeric_dtype(str) False >>> is_numeric_dtype(int) True >>> is_numeric_dtype(float) True >>> is_numeric_dtype(np.uint64) True >>> is_numeric_dtype(np.datetime64) False >>> is_numeric_dtype(np.timedelta64) False >>> is_numeric_dtype(np.array(['a', 'b'])) False >>> is_numeric_dtype(pd.Series([1, 2])) True >>> is_numeric_dtype(pd.Index([1, 2.])) True >>> is_numeric_dtype(np.array([], dtype=np.timedelta64)) False """ return _is_dtype_type( arr_or_dtype, classes_and_not_datetimelike(np.number, np.bool_) ) def is_string_like_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of a string-like dtype. Unlike `is_string_dtype`, the object dtype is excluded because it is a mixed dtype. Parameters ---------- arr_or_dtype : array-like The array or dtype to check. Returns ------- boolean Whether or not the array or dtype is of the string dtype. Examples -------- >>> is_string_like_dtype(str) True >>> is_string_like_dtype(object) False >>> is_string_like_dtype(np.array(['a', 'b'])) True >>> is_string_like_dtype(pd.Series([1, 2])) False """ return _is_dtype(arr_or_dtype, lambda dtype: dtype.kind in ("S", "U")) def is_float_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of a float dtype. This function is internal and should not be exposed in the public API. Parameters ---------- arr_or_dtype : array-like The array or dtype to check. Returns ------- boolean Whether or not the array or dtype is of a float dtype. Examples -------- >>> is_float_dtype(str) False >>> is_float_dtype(int) False >>> is_float_dtype(float) True >>> is_float_dtype(np.array(['a', 'b'])) False >>> is_float_dtype(pd.Series([1, 2])) False >>> is_float_dtype(pd.Index([1, 2.])) True """ return _is_dtype_type(arr_or_dtype, classes(np.floating)) def is_bool_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of a boolean dtype. Parameters ---------- arr_or_dtype : array-like The array or dtype to check. Returns ------- boolean Whether or not the array or dtype is of a boolean dtype. Notes ----- An ExtensionArray is considered boolean when the ``_is_boolean`` attribute is set to True. Examples -------- >>> is_bool_dtype(str) False >>> is_bool_dtype(int) False >>> is_bool_dtype(bool) True >>> is_bool_dtype(np.bool_) True >>> is_bool_dtype(np.array(['a', 'b'])) False >>> is_bool_dtype(pd.Series([1, 2])) False >>> is_bool_dtype(np.array([True, False])) True >>> is_bool_dtype(pd.Categorical([True, False])) True >>> is_bool_dtype(pd.arrays.SparseArray([True, False])) True """ if arr_or_dtype is None: return False try: dtype = get_dtype(arr_or_dtype) except (TypeError, ValueError): return False if isinstance(arr_or_dtype, CategoricalDtype): arr_or_dtype = arr_or_dtype.categories # now we use the special definition for Index if isinstance(arr_or_dtype, ABCIndexClass): # TODO(jreback) # we don't have a boolean Index class # so its object, we need to infer to # guess this return arr_or_dtype.is_object and arr_or_dtype.inferred_type == "boolean" elif is_extension_array_dtype(arr_or_dtype): return getattr(dtype, "_is_boolean", False) return issubclass(dtype.type, np.bool_) def is_extension_type(arr) -> bool: """ Check whether an array-like is of a pandas extension class instance. .. deprecated:: 1.0.0 Use ``is_extension_array_dtype`` instead. Extension classes include categoricals, pandas sparse objects (i.e. classes represented within the pandas library and not ones external to it like scipy sparse matrices), and datetime-like arrays. Parameters ---------- arr : array-like The array-like to check. Returns ------- boolean Whether or not the array-like is of a pandas extension class instance. Examples -------- >>> is_extension_type([1, 2, 3]) False >>> is_extension_type(np.array([1, 2, 3])) False >>> >>> cat = pd.Categorical([1, 2, 3]) >>> >>> is_extension_type(cat) True >>> is_extension_type(pd.Series(cat)) True >>> is_extension_type(pd.arrays.SparseArray([1, 2, 3])) True >>> from scipy.sparse import bsr_matrix >>> is_extension_type(bsr_matrix([1, 2, 3])) False >>> is_extension_type(pd.DatetimeIndex([1, 2, 3])) False >>> is_extension_type(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) True >>> >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") >>> s = pd.Series([], dtype=dtype) >>> is_extension_type(s) True """ warnings.warn( "'is_extension_type' is deprecated and will be removed in a future " "version. Use 'is_extension_array_dtype' instead.", FutureWarning, stacklevel=2, ) if is_categorical_dtype(arr): return True elif is_sparse(arr): return True elif is_datetime64tz_dtype(arr): return True return False def is_extension_array_dtype(arr_or_dtype) -> bool: """ Check if an object is a pandas extension array type. See the :ref:`Use Guide ` for more. Parameters ---------- arr_or_dtype : object For array-like input, the ``.dtype`` attribute will be extracted. Returns ------- bool Whether the `arr_or_dtype` is an extension array type. Notes ----- This checks whether an object implements the pandas extension array interface. In pandas, this includes: * Categorical * Sparse * Interval * Period * DatetimeArray * TimedeltaArray Third-party libraries may implement arrays or types satisfying this interface as well. Examples -------- >>> from pandas.api.types import is_extension_array_dtype >>> arr = pd.Categorical(['a', 'b']) >>> is_extension_array_dtype(arr) True >>> is_extension_array_dtype(arr.dtype) True >>> arr = np.array(['a', 'b']) >>> is_extension_array_dtype(arr.dtype) False """ dtype = getattr(arr_or_dtype, "dtype", arr_or_dtype) return isinstance(dtype, ExtensionDtype) or registry.find(dtype) is not None def is_complex_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of a complex dtype. Parameters ---------- arr_or_dtype : array-like The array or dtype to check. Returns ------- boolean Whether or not the array or dtype is of a complex dtype. Examples -------- >>> is_complex_dtype(str) False >>> is_complex_dtype(int) False >>> is_complex_dtype(np.complex_) True >>> is_complex_dtype(np.array(['a', 'b'])) False >>> is_complex_dtype(pd.Series([1, 2])) False >>> is_complex_dtype(np.array([1 + 1j, 5])) True """ return _is_dtype_type(arr_or_dtype, classes(np.complexfloating)) def _is_dtype(arr_or_dtype, condition) -> bool: """ Return a boolean if the condition is satisfied for the arr_or_dtype. Parameters ---------- arr_or_dtype : array-like, str, np.dtype, or ExtensionArrayType The array-like or dtype object whose dtype we want to extract. condition : callable[Union[np.dtype, ExtensionDtype]] Returns ------- bool """ if arr_or_dtype is None: return False try: dtype = get_dtype(arr_or_dtype) except (TypeError, ValueError, UnicodeEncodeError): return False return condition(dtype) def get_dtype(arr_or_dtype) -> DtypeObj: """ Get the dtype instance associated with an array or dtype object. Parameters ---------- arr_or_dtype : array-like The array-like or dtype object whose dtype we want to extract. Returns ------- obj_dtype : The extract dtype instance from the passed in array or dtype object. Raises ------ TypeError : The passed in object is None. """ if arr_or_dtype is None: raise TypeError("Cannot deduce dtype from null object") # fastpath elif isinstance(arr_or_dtype, np.dtype): return arr_or_dtype elif isinstance(arr_or_dtype, type): return np.dtype(arr_or_dtype) # if we have an array-like elif hasattr(arr_or_dtype, "dtype"): arr_or_dtype = arr_or_dtype.dtype return pandas_dtype(arr_or_dtype) def _is_dtype_type(arr_or_dtype, condition) -> bool: """ Return a boolean if the condition is satisfied for the arr_or_dtype. Parameters ---------- arr_or_dtype : array-like The array-like or dtype object whose dtype we want to extract. condition : callable[Union[np.dtype, ExtensionDtypeType]] Returns ------- bool : if the condition is satisfied for the arr_or_dtype """ if arr_or_dtype is None: return condition(type(None)) # fastpath if isinstance(arr_or_dtype, np.dtype): return condition(arr_or_dtype.type) elif isinstance(arr_or_dtype, type): if issubclass(arr_or_dtype, ExtensionDtype): arr_or_dtype = arr_or_dtype.type return condition(np.dtype(arr_or_dtype).type) # if we have an array-like if hasattr(arr_or_dtype, "dtype"): arr_or_dtype = arr_or_dtype.dtype # we are not possibly a dtype elif is_list_like(arr_or_dtype): return condition(type(None)) try: tipo = pandas_dtype(arr_or_dtype).type except (TypeError, ValueError, UnicodeEncodeError): if is_scalar(arr_or_dtype): return condition(type(None)) return False return condition(tipo) def infer_dtype_from_object(dtype): """ Get a numpy dtype.type-style object for a dtype object. This methods also includes handling of the datetime64[ns] and datetime64[ns, TZ] objects. If no dtype can be found, we return ``object``. Parameters ---------- dtype : dtype, type The dtype object whose numpy dtype.type-style object we want to extract. Returns ------- dtype_object : The extracted numpy dtype.type-style object. """ if isinstance(dtype, type) and issubclass(dtype, np.generic): # Type object from a dtype return dtype elif isinstance(dtype, (np.dtype, ExtensionDtype)): # dtype object try: _validate_date_like_dtype(dtype) except TypeError: # Should still pass if we don't have a date-like pass return dtype.type try: dtype = pandas_dtype(dtype) except TypeError: pass if is_extension_array_dtype(dtype): return dtype.type elif isinstance(dtype, str): # TODO(jreback) # should deprecate these if dtype in ["datetimetz", "datetime64tz"]: return DatetimeTZDtype.type elif dtype in ["period"]: raise NotImplementedError if dtype == "datetime" or dtype == "timedelta": dtype += "64" try: return infer_dtype_from_object(getattr(np, dtype)) except (AttributeError, TypeError): # Handles cases like get_dtype(int) i.e., # Python objects that are valid dtypes # (unlike user-defined types, in general) # # TypeError handles the float16 type code of 'e' # further handle internal types pass return infer_dtype_from_object(np.dtype(dtype)) def _validate_date_like_dtype(dtype) -> None: """ Check whether the dtype is a date-like dtype. Raises an error if invalid. Parameters ---------- dtype : dtype, type The dtype to check. Raises ------ TypeError : The dtype could not be casted to a date-like dtype. ValueError : The dtype is an illegal date-like dtype (e.g. the frequency provided is too specific) """ try: typ = np.datetime_data(dtype)[0] except ValueError as e: raise TypeError(e) from e if typ != "generic" and typ != "ns": raise ValueError( f"{repr(dtype.name)} is too specific of a frequency, " f"try passing {repr(dtype.type.__name__)}" ) def validate_all_hashable(*args, error_name: Optional[str] = None) -> None: """ Return None if all args are hashable, else raise a TypeError. Parameters ---------- *args Arguments to validate. error_name : str, optional The name to use if error Raises ------ TypeError : If an argument is not hashable Returns ------- None """ if not all(is_hashable(arg) for arg in args): if error_name: raise TypeError(f"{error_name} must be a hashable type") else: raise TypeError("All elements must be hashable") def pandas_dtype(dtype) -> DtypeObj: """ Convert input into a pandas only dtype object or a numpy dtype object. Parameters ---------- dtype : object to be converted Returns ------- np.dtype or a pandas dtype Raises ------ TypeError if not a dtype """ # short-circuit if isinstance(dtype, np.ndarray): return dtype.dtype elif isinstance(dtype, (np.dtype, ExtensionDtype)): return dtype # registered extension types result = registry.find(dtype) if result is not None: return result # try a numpy dtype # raise a consistent TypeError if failed try: npdtype = np.dtype(dtype) except SyntaxError as err: # np.dtype uses `eval` which can raise SyntaxError raise TypeError(f"data type '{dtype}' not understood") from err # Any invalid dtype (such as pd.Timestamp) should raise an error. # np.dtype(invalid_type).kind = 0 for such objects. However, this will # also catch some valid dtypes such as object, np.object_ and 'object' # which we safeguard against by catching them earlier and returning # np.dtype(valid_dtype) before this condition is evaluated. if is_hashable(dtype) and dtype in [object, np.object_, "object", "O"]: # check hashability to avoid errors/DeprecationWarning when we get # here and `dtype` is an array return npdtype elif npdtype.kind == "O": raise TypeError(f"dtype '{dtype}' not understood") return npdtype