766 lines
24 KiB
Python
766 lines
24 KiB
Python
|
from datetime import datetime
|
||
|
from typing import List
|
||
|
|
||
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
import pandas.util._test_decorators as td
|
||
|
|
||
|
from pandas.core.dtypes.cast import astype_nansafe
|
||
|
import pandas.core.dtypes.common as com
|
||
|
from pandas.core.dtypes.dtypes import (
|
||
|
CategoricalDtype,
|
||
|
CategoricalDtypeType,
|
||
|
DatetimeTZDtype,
|
||
|
IntervalDtype,
|
||
|
PeriodDtype,
|
||
|
)
|
||
|
from pandas.core.dtypes.missing import isna
|
||
|
|
||
|
import pandas as pd
|
||
|
import pandas._testing as tm
|
||
|
from pandas.arrays import SparseArray
|
||
|
|
||
|
|
||
|
# EA & Actual Dtypes
|
||
|
def to_ea_dtypes(dtypes):
|
||
|
""" convert list of string dtypes to EA dtype """
|
||
|
return [getattr(pd, dt + "Dtype") for dt in dtypes]
|
||
|
|
||
|
|
||
|
def to_numpy_dtypes(dtypes):
|
||
|
""" convert list of string dtypes to numpy dtype """
|
||
|
return [getattr(np, dt) for dt in dtypes if isinstance(dt, str)]
|
||
|
|
||
|
|
||
|
class TestPandasDtype:
|
||
|
|
||
|
# Passing invalid dtype, both as a string or object, must raise TypeError
|
||
|
# Per issue GH15520
|
||
|
@pytest.mark.parametrize("box", [pd.Timestamp, "pd.Timestamp", list])
|
||
|
def test_invalid_dtype_error(self, box):
|
||
|
with pytest.raises(TypeError, match="not understood"):
|
||
|
com.pandas_dtype(box)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype",
|
||
|
[
|
||
|
object,
|
||
|
"float64",
|
||
|
np.object_,
|
||
|
np.dtype("object"),
|
||
|
"O",
|
||
|
np.float64,
|
||
|
float,
|
||
|
np.dtype("float64"),
|
||
|
],
|
||
|
)
|
||
|
def test_pandas_dtype_valid(self, dtype):
|
||
|
assert com.pandas_dtype(dtype) == dtype
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype", ["M8[ns]", "m8[ns]", "object", "float64", "int64"]
|
||
|
)
|
||
|
def test_numpy_dtype(self, dtype):
|
||
|
assert com.pandas_dtype(dtype) == np.dtype(dtype)
|
||
|
|
||
|
def test_numpy_string_dtype(self):
|
||
|
# do not parse freq-like string as period dtype
|
||
|
assert com.pandas_dtype("U") == np.dtype("U")
|
||
|
assert com.pandas_dtype("S") == np.dtype("S")
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype",
|
||
|
[
|
||
|
"datetime64[ns, US/Eastern]",
|
||
|
"datetime64[ns, Asia/Tokyo]",
|
||
|
"datetime64[ns, UTC]",
|
||
|
# GH#33885 check that the M8 alias is understood
|
||
|
"M8[ns, US/Eastern]",
|
||
|
"M8[ns, Asia/Tokyo]",
|
||
|
"M8[ns, UTC]",
|
||
|
],
|
||
|
)
|
||
|
def test_datetimetz_dtype(self, dtype):
|
||
|
assert com.pandas_dtype(dtype) == DatetimeTZDtype.construct_from_string(dtype)
|
||
|
assert com.pandas_dtype(dtype) == dtype
|
||
|
|
||
|
def test_categorical_dtype(self):
|
||
|
assert com.pandas_dtype("category") == CategoricalDtype()
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype",
|
||
|
[
|
||
|
"period[D]",
|
||
|
"period[3M]",
|
||
|
"period[U]",
|
||
|
"Period[D]",
|
||
|
"Period[3M]",
|
||
|
"Period[U]",
|
||
|
],
|
||
|
)
|
||
|
def test_period_dtype(self, dtype):
|
||
|
assert com.pandas_dtype(dtype) is PeriodDtype(dtype)
|
||
|
assert com.pandas_dtype(dtype) == PeriodDtype(dtype)
|
||
|
assert com.pandas_dtype(dtype) == dtype
|
||
|
|
||
|
|
||
|
dtypes = {
|
||
|
"datetime_tz": com.pandas_dtype("datetime64[ns, US/Eastern]"),
|
||
|
"datetime": com.pandas_dtype("datetime64[ns]"),
|
||
|
"timedelta": com.pandas_dtype("timedelta64[ns]"),
|
||
|
"period": PeriodDtype("D"),
|
||
|
"integer": np.dtype(np.int64),
|
||
|
"float": np.dtype(np.float64),
|
||
|
"object": np.dtype(object),
|
||
|
"category": com.pandas_dtype("category"),
|
||
|
}
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("name1,dtype1", list(dtypes.items()), ids=lambda x: str(x))
|
||
|
@pytest.mark.parametrize("name2,dtype2", list(dtypes.items()), ids=lambda x: str(x))
|
||
|
def test_dtype_equal(name1, dtype1, name2, dtype2):
|
||
|
|
||
|
# match equal to self, but not equal to other
|
||
|
assert com.is_dtype_equal(dtype1, dtype1)
|
||
|
if name1 != name2:
|
||
|
assert not com.is_dtype_equal(dtype1, dtype2)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype1,dtype2",
|
||
|
[
|
||
|
(np.int8, np.int64),
|
||
|
(np.int16, np.int64),
|
||
|
(np.int32, np.int64),
|
||
|
(np.float32, np.float64),
|
||
|
(PeriodDtype("D"), PeriodDtype("2D")), # PeriodType
|
||
|
(
|
||
|
com.pandas_dtype("datetime64[ns, US/Eastern]"),
|
||
|
com.pandas_dtype("datetime64[ns, CET]"),
|
||
|
), # Datetime
|
||
|
(None, None), # gh-15941: no exception should be raised.
|
||
|
],
|
||
|
)
|
||
|
def test_dtype_equal_strict(dtype1, dtype2):
|
||
|
assert not com.is_dtype_equal(dtype1, dtype2)
|
||
|
|
||
|
|
||
|
def get_is_dtype_funcs():
|
||
|
"""
|
||
|
Get all functions in pandas.core.dtypes.common that
|
||
|
begin with 'is_' and end with 'dtype'
|
||
|
|
||
|
"""
|
||
|
fnames = [f for f in dir(com) if (f.startswith("is_") and f.endswith("dtype"))]
|
||
|
return [getattr(com, fname) for fname in fnames]
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("func", get_is_dtype_funcs(), ids=lambda x: x.__name__)
|
||
|
def test_get_dtype_error_catch(func):
|
||
|
# see gh-15941
|
||
|
#
|
||
|
# No exception should be raised.
|
||
|
|
||
|
assert not func(None)
|
||
|
|
||
|
|
||
|
def test_is_object():
|
||
|
assert com.is_object_dtype(object)
|
||
|
assert com.is_object_dtype(np.array([], dtype=object))
|
||
|
|
||
|
assert not com.is_object_dtype(int)
|
||
|
assert not com.is_object_dtype(np.array([], dtype=int))
|
||
|
assert not com.is_object_dtype([1, 2, 3])
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)]
|
||
|
)
|
||
|
def test_is_sparse(check_scipy):
|
||
|
assert com.is_sparse(SparseArray([1, 2, 3]))
|
||
|
|
||
|
assert not com.is_sparse(np.array([1, 2, 3]))
|
||
|
|
||
|
if check_scipy:
|
||
|
import scipy.sparse
|
||
|
|
||
|
assert not com.is_sparse(scipy.sparse.bsr_matrix([1, 2, 3]))
|
||
|
|
||
|
|
||
|
@td.skip_if_no_scipy
|
||
|
def test_is_scipy_sparse():
|
||
|
from scipy.sparse import bsr_matrix
|
||
|
|
||
|
assert com.is_scipy_sparse(bsr_matrix([1, 2, 3]))
|
||
|
|
||
|
assert not com.is_scipy_sparse(SparseArray([1, 2, 3]))
|
||
|
|
||
|
|
||
|
def test_is_categorical():
|
||
|
cat = pd.Categorical([1, 2, 3])
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
assert com.is_categorical(cat)
|
||
|
assert com.is_categorical(pd.Series(cat))
|
||
|
assert com.is_categorical(pd.CategoricalIndex([1, 2, 3]))
|
||
|
|
||
|
assert not com.is_categorical([1, 2, 3])
|
||
|
|
||
|
|
||
|
def test_is_categorical_deprecation():
|
||
|
# GH#33385
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
com.is_categorical([1, 2, 3])
|
||
|
|
||
|
|
||
|
def test_is_datetime64_dtype():
|
||
|
assert not com.is_datetime64_dtype(object)
|
||
|
assert not com.is_datetime64_dtype([1, 2, 3])
|
||
|
assert not com.is_datetime64_dtype(np.array([], dtype=int))
|
||
|
|
||
|
assert com.is_datetime64_dtype(np.datetime64)
|
||
|
assert com.is_datetime64_dtype(np.array([], dtype=np.datetime64))
|
||
|
|
||
|
|
||
|
def test_is_datetime64tz_dtype():
|
||
|
assert not com.is_datetime64tz_dtype(object)
|
||
|
assert not com.is_datetime64tz_dtype([1, 2, 3])
|
||
|
assert not com.is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3]))
|
||
|
assert com.is_datetime64tz_dtype(pd.DatetimeIndex(["2000"], tz="US/Eastern"))
|
||
|
|
||
|
|
||
|
def test_is_timedelta64_dtype():
|
||
|
assert not com.is_timedelta64_dtype(object)
|
||
|
assert not com.is_timedelta64_dtype(None)
|
||
|
assert not com.is_timedelta64_dtype([1, 2, 3])
|
||
|
assert not com.is_timedelta64_dtype(np.array([], dtype=np.datetime64))
|
||
|
assert not com.is_timedelta64_dtype("0 days")
|
||
|
assert not com.is_timedelta64_dtype("0 days 00:00:00")
|
||
|
assert not com.is_timedelta64_dtype(["0 days 00:00:00"])
|
||
|
assert not com.is_timedelta64_dtype("NO DATE")
|
||
|
|
||
|
assert com.is_timedelta64_dtype(np.timedelta64)
|
||
|
assert com.is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]"))
|
||
|
assert com.is_timedelta64_dtype(pd.to_timedelta(["0 days", "1 days"]))
|
||
|
|
||
|
|
||
|
def test_is_period_dtype():
|
||
|
assert not com.is_period_dtype(object)
|
||
|
assert not com.is_period_dtype([1, 2, 3])
|
||
|
assert not com.is_period_dtype(pd.Period("2017-01-01"))
|
||
|
|
||
|
assert com.is_period_dtype(PeriodDtype(freq="D"))
|
||
|
assert com.is_period_dtype(pd.PeriodIndex([], freq="A"))
|
||
|
|
||
|
|
||
|
def test_is_interval_dtype():
|
||
|
assert not com.is_interval_dtype(object)
|
||
|
assert not com.is_interval_dtype([1, 2, 3])
|
||
|
|
||
|
assert com.is_interval_dtype(IntervalDtype())
|
||
|
|
||
|
interval = pd.Interval(1, 2, closed="right")
|
||
|
assert not com.is_interval_dtype(interval)
|
||
|
assert com.is_interval_dtype(pd.IntervalIndex([interval]))
|
||
|
|
||
|
|
||
|
def test_is_categorical_dtype():
|
||
|
assert not com.is_categorical_dtype(object)
|
||
|
assert not com.is_categorical_dtype([1, 2, 3])
|
||
|
|
||
|
assert com.is_categorical_dtype(CategoricalDtype())
|
||
|
assert com.is_categorical_dtype(pd.Categorical([1, 2, 3]))
|
||
|
assert com.is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))
|
||
|
|
||
|
|
||
|
def test_is_string_dtype():
|
||
|
assert not com.is_string_dtype(int)
|
||
|
assert not com.is_string_dtype(pd.Series([1, 2]))
|
||
|
|
||
|
assert com.is_string_dtype(str)
|
||
|
assert com.is_string_dtype(object)
|
||
|
assert com.is_string_dtype(np.array(["a", "b"]))
|
||
|
assert com.is_string_dtype(pd.StringDtype())
|
||
|
assert com.is_string_dtype(pd.array(["a", "b"], dtype="string"))
|
||
|
|
||
|
|
||
|
integer_dtypes: List = []
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype",
|
||
|
integer_dtypes
|
||
|
+ [pd.Series([1, 2])]
|
||
|
+ tm.ALL_INT_DTYPES
|
||
|
+ to_numpy_dtypes(tm.ALL_INT_DTYPES)
|
||
|
+ tm.ALL_EA_INT_DTYPES
|
||
|
+ to_ea_dtypes(tm.ALL_EA_INT_DTYPES),
|
||
|
)
|
||
|
def test_is_integer_dtype(dtype):
|
||
|
assert com.is_integer_dtype(dtype)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype",
|
||
|
[
|
||
|
str,
|
||
|
float,
|
||
|
np.datetime64,
|
||
|
np.timedelta64,
|
||
|
pd.Index([1, 2.0]),
|
||
|
np.array(["a", "b"]),
|
||
|
np.array([], dtype=np.timedelta64),
|
||
|
],
|
||
|
)
|
||
|
def test_is_not_integer_dtype(dtype):
|
||
|
assert not com.is_integer_dtype(dtype)
|
||
|
|
||
|
|
||
|
signed_integer_dtypes: List = []
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype",
|
||
|
signed_integer_dtypes
|
||
|
+ [pd.Series([1, 2])]
|
||
|
+ tm.SIGNED_INT_DTYPES
|
||
|
+ to_numpy_dtypes(tm.SIGNED_INT_DTYPES)
|
||
|
+ tm.SIGNED_EA_INT_DTYPES
|
||
|
+ to_ea_dtypes(tm.SIGNED_EA_INT_DTYPES),
|
||
|
)
|
||
|
def test_is_signed_integer_dtype(dtype):
|
||
|
assert com.is_integer_dtype(dtype)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype",
|
||
|
[
|
||
|
str,
|
||
|
float,
|
||
|
np.datetime64,
|
||
|
np.timedelta64,
|
||
|
pd.Index([1, 2.0]),
|
||
|
np.array(["a", "b"]),
|
||
|
np.array([], dtype=np.timedelta64),
|
||
|
]
|
||
|
+ tm.UNSIGNED_INT_DTYPES
|
||
|
+ to_numpy_dtypes(tm.UNSIGNED_INT_DTYPES)
|
||
|
+ tm.UNSIGNED_EA_INT_DTYPES
|
||
|
+ to_ea_dtypes(tm.UNSIGNED_EA_INT_DTYPES),
|
||
|
)
|
||
|
def test_is_not_signed_integer_dtype(dtype):
|
||
|
assert not com.is_signed_integer_dtype(dtype)
|
||
|
|
||
|
|
||
|
unsigned_integer_dtypes: List = []
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype",
|
||
|
unsigned_integer_dtypes
|
||
|
+ [pd.Series([1, 2], dtype=np.uint32)]
|
||
|
+ tm.UNSIGNED_INT_DTYPES
|
||
|
+ to_numpy_dtypes(tm.UNSIGNED_INT_DTYPES)
|
||
|
+ tm.UNSIGNED_EA_INT_DTYPES
|
||
|
+ to_ea_dtypes(tm.UNSIGNED_EA_INT_DTYPES),
|
||
|
)
|
||
|
def test_is_unsigned_integer_dtype(dtype):
|
||
|
assert com.is_unsigned_integer_dtype(dtype)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype",
|
||
|
[
|
||
|
str,
|
||
|
float,
|
||
|
np.datetime64,
|
||
|
np.timedelta64,
|
||
|
pd.Index([1, 2.0]),
|
||
|
np.array(["a", "b"]),
|
||
|
np.array([], dtype=np.timedelta64),
|
||
|
]
|
||
|
+ tm.SIGNED_INT_DTYPES
|
||
|
+ to_numpy_dtypes(tm.SIGNED_INT_DTYPES)
|
||
|
+ tm.SIGNED_EA_INT_DTYPES
|
||
|
+ to_ea_dtypes(tm.SIGNED_EA_INT_DTYPES),
|
||
|
)
|
||
|
def test_is_not_unsigned_integer_dtype(dtype):
|
||
|
assert not com.is_unsigned_integer_dtype(dtype)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype", [np.int64, np.array([1, 2], dtype=np.int64), "Int64", pd.Int64Dtype]
|
||
|
)
|
||
|
def test_is_int64_dtype(dtype):
|
||
|
assert com.is_int64_dtype(dtype)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype",
|
||
|
[
|
||
|
str,
|
||
|
float,
|
||
|
np.int32,
|
||
|
np.uint64,
|
||
|
pd.Index([1, 2.0]),
|
||
|
np.array(["a", "b"]),
|
||
|
np.array([1, 2], dtype=np.uint32),
|
||
|
"int8",
|
||
|
"Int8",
|
||
|
pd.Int8Dtype,
|
||
|
],
|
||
|
)
|
||
|
def test_is_not_int64_dtype(dtype):
|
||
|
assert not com.is_int64_dtype(dtype)
|
||
|
|
||
|
|
||
|
def test_is_datetime64_any_dtype():
|
||
|
assert not com.is_datetime64_any_dtype(int)
|
||
|
assert not com.is_datetime64_any_dtype(str)
|
||
|
assert not com.is_datetime64_any_dtype(np.array([1, 2]))
|
||
|
assert not com.is_datetime64_any_dtype(np.array(["a", "b"]))
|
||
|
|
||
|
assert com.is_datetime64_any_dtype(np.datetime64)
|
||
|
assert com.is_datetime64_any_dtype(np.array([], dtype=np.datetime64))
|
||
|
assert com.is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern"))
|
||
|
assert com.is_datetime64_any_dtype(
|
||
|
pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]")
|
||
|
)
|
||
|
|
||
|
|
||
|
def test_is_datetime64_ns_dtype():
|
||
|
assert not com.is_datetime64_ns_dtype(int)
|
||
|
assert not com.is_datetime64_ns_dtype(str)
|
||
|
assert not com.is_datetime64_ns_dtype(np.datetime64)
|
||
|
assert not com.is_datetime64_ns_dtype(np.array([1, 2]))
|
||
|
assert not com.is_datetime64_ns_dtype(np.array(["a", "b"]))
|
||
|
assert not com.is_datetime64_ns_dtype(np.array([], dtype=np.datetime64))
|
||
|
|
||
|
# This datetime array has the wrong unit (ps instead of ns)
|
||
|
assert not com.is_datetime64_ns_dtype(np.array([], dtype="datetime64[ps]"))
|
||
|
|
||
|
assert com.is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern"))
|
||
|
assert com.is_datetime64_ns_dtype(
|
||
|
pd.DatetimeIndex([1, 2, 3], dtype=np.dtype("datetime64[ns]"))
|
||
|
)
|
||
|
|
||
|
|
||
|
def test_is_timedelta64_ns_dtype():
|
||
|
assert not com.is_timedelta64_ns_dtype(np.dtype("m8[ps]"))
|
||
|
assert not com.is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64))
|
||
|
|
||
|
assert com.is_timedelta64_ns_dtype(np.dtype("m8[ns]"))
|
||
|
assert com.is_timedelta64_ns_dtype(np.array([1, 2], dtype="m8[ns]"))
|
||
|
|
||
|
|
||
|
def test_is_datetime_or_timedelta_dtype():
|
||
|
assert not com.is_datetime_or_timedelta_dtype(int)
|
||
|
assert not com.is_datetime_or_timedelta_dtype(str)
|
||
|
assert not com.is_datetime_or_timedelta_dtype(pd.Series([1, 2]))
|
||
|
assert not com.is_datetime_or_timedelta_dtype(np.array(["a", "b"]))
|
||
|
|
||
|
# TODO(jreback), this is slightly suspect
|
||
|
assert not com.is_datetime_or_timedelta_dtype(DatetimeTZDtype("ns", "US/Eastern"))
|
||
|
|
||
|
assert com.is_datetime_or_timedelta_dtype(np.datetime64)
|
||
|
assert com.is_datetime_or_timedelta_dtype(np.timedelta64)
|
||
|
assert com.is_datetime_or_timedelta_dtype(np.array([], dtype=np.timedelta64))
|
||
|
assert com.is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64))
|
||
|
|
||
|
|
||
|
def test_is_numeric_v_string_like():
|
||
|
assert not com.is_numeric_v_string_like(1, 1)
|
||
|
assert not com.is_numeric_v_string_like(1, "foo")
|
||
|
assert not com.is_numeric_v_string_like("foo", "foo")
|
||
|
assert not com.is_numeric_v_string_like(np.array([1]), np.array([2]))
|
||
|
assert not com.is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"]))
|
||
|
|
||
|
assert com.is_numeric_v_string_like(np.array([1]), "foo")
|
||
|
assert com.is_numeric_v_string_like("foo", np.array([1]))
|
||
|
assert com.is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"]))
|
||
|
assert com.is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2]))
|
||
|
|
||
|
|
||
|
def test_is_datetimelike_v_numeric():
|
||
|
dt = np.datetime64(datetime(2017, 1, 1))
|
||
|
|
||
|
assert not com.is_datetimelike_v_numeric(1, 1)
|
||
|
assert not com.is_datetimelike_v_numeric(dt, dt)
|
||
|
assert not com.is_datetimelike_v_numeric(np.array([1]), np.array([2]))
|
||
|
assert not com.is_datetimelike_v_numeric(np.array([dt]), np.array([dt]))
|
||
|
|
||
|
assert com.is_datetimelike_v_numeric(1, dt)
|
||
|
assert com.is_datetimelike_v_numeric(1, dt)
|
||
|
assert com.is_datetimelike_v_numeric(np.array([dt]), 1)
|
||
|
assert com.is_datetimelike_v_numeric(np.array([1]), dt)
|
||
|
assert com.is_datetimelike_v_numeric(np.array([dt]), np.array([1]))
|
||
|
|
||
|
|
||
|
def test_needs_i8_conversion():
|
||
|
assert not com.needs_i8_conversion(str)
|
||
|
assert not com.needs_i8_conversion(np.int64)
|
||
|
assert not com.needs_i8_conversion(pd.Series([1, 2]))
|
||
|
assert not com.needs_i8_conversion(np.array(["a", "b"]))
|
||
|
|
||
|
assert com.needs_i8_conversion(np.datetime64)
|
||
|
assert com.needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]"))
|
||
|
assert com.needs_i8_conversion(pd.DatetimeIndex(["2000"], tz="US/Eastern"))
|
||
|
|
||
|
|
||
|
def test_is_numeric_dtype():
|
||
|
assert not com.is_numeric_dtype(str)
|
||
|
assert not com.is_numeric_dtype(np.datetime64)
|
||
|
assert not com.is_numeric_dtype(np.timedelta64)
|
||
|
assert not com.is_numeric_dtype(np.array(["a", "b"]))
|
||
|
assert not com.is_numeric_dtype(np.array([], dtype=np.timedelta64))
|
||
|
|
||
|
assert com.is_numeric_dtype(int)
|
||
|
assert com.is_numeric_dtype(float)
|
||
|
assert com.is_numeric_dtype(np.uint64)
|
||
|
assert com.is_numeric_dtype(pd.Series([1, 2]))
|
||
|
assert com.is_numeric_dtype(pd.Index([1, 2.0]))
|
||
|
|
||
|
|
||
|
def test_is_string_like_dtype():
|
||
|
assert not com.is_string_like_dtype(object)
|
||
|
assert not com.is_string_like_dtype(pd.Series([1, 2]))
|
||
|
|
||
|
assert com.is_string_like_dtype(str)
|
||
|
assert com.is_string_like_dtype(np.array(["a", "b"]))
|
||
|
|
||
|
|
||
|
def test_is_float_dtype():
|
||
|
assert not com.is_float_dtype(str)
|
||
|
assert not com.is_float_dtype(int)
|
||
|
assert not com.is_float_dtype(pd.Series([1, 2]))
|
||
|
assert not com.is_float_dtype(np.array(["a", "b"]))
|
||
|
|
||
|
assert com.is_float_dtype(float)
|
||
|
assert com.is_float_dtype(pd.Index([1, 2.0]))
|
||
|
|
||
|
|
||
|
def test_is_bool_dtype():
|
||
|
assert not com.is_bool_dtype(int)
|
||
|
assert not com.is_bool_dtype(str)
|
||
|
assert not com.is_bool_dtype(pd.Series([1, 2]))
|
||
|
assert not com.is_bool_dtype(np.array(["a", "b"]))
|
||
|
assert not com.is_bool_dtype(pd.Index(["a", "b"]))
|
||
|
assert not com.is_bool_dtype("Int64")
|
||
|
|
||
|
assert com.is_bool_dtype(bool)
|
||
|
assert com.is_bool_dtype(np.bool_)
|
||
|
assert com.is_bool_dtype(np.array([True, False]))
|
||
|
assert com.is_bool_dtype(pd.Index([True, False]))
|
||
|
|
||
|
assert com.is_bool_dtype(pd.BooleanDtype())
|
||
|
assert com.is_bool_dtype(pd.array([True, False, None], dtype="boolean"))
|
||
|
assert com.is_bool_dtype("boolean")
|
||
|
|
||
|
|
||
|
def test_is_bool_dtype_numpy_error():
|
||
|
# GH39010
|
||
|
assert not com.is_bool_dtype("0 - Name")
|
||
|
|
||
|
|
||
|
@pytest.mark.filterwarnings("ignore:'is_extension_type' is deprecated:FutureWarning")
|
||
|
@pytest.mark.parametrize(
|
||
|
"check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)]
|
||
|
)
|
||
|
def test_is_extension_type(check_scipy):
|
||
|
assert not com.is_extension_type([1, 2, 3])
|
||
|
assert not com.is_extension_type(np.array([1, 2, 3]))
|
||
|
assert not com.is_extension_type(pd.DatetimeIndex([1, 2, 3]))
|
||
|
|
||
|
cat = pd.Categorical([1, 2, 3])
|
||
|
assert com.is_extension_type(cat)
|
||
|
assert com.is_extension_type(pd.Series(cat))
|
||
|
assert com.is_extension_type(SparseArray([1, 2, 3]))
|
||
|
assert com.is_extension_type(pd.DatetimeIndex(["2000"], tz="US/Eastern"))
|
||
|
|
||
|
dtype = DatetimeTZDtype("ns", tz="US/Eastern")
|
||
|
s = pd.Series([], dtype=dtype)
|
||
|
assert com.is_extension_type(s)
|
||
|
|
||
|
if check_scipy:
|
||
|
import scipy.sparse
|
||
|
|
||
|
assert not com.is_extension_type(scipy.sparse.bsr_matrix([1, 2, 3]))
|
||
|
|
||
|
|
||
|
def test_is_extension_type_deprecation():
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
com.is_extension_type([1, 2, 3])
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)]
|
||
|
)
|
||
|
def test_is_extension_array_dtype(check_scipy):
|
||
|
assert not com.is_extension_array_dtype([1, 2, 3])
|
||
|
assert not com.is_extension_array_dtype(np.array([1, 2, 3]))
|
||
|
assert not com.is_extension_array_dtype(pd.DatetimeIndex([1, 2, 3]))
|
||
|
|
||
|
cat = pd.Categorical([1, 2, 3])
|
||
|
assert com.is_extension_array_dtype(cat)
|
||
|
assert com.is_extension_array_dtype(pd.Series(cat))
|
||
|
assert com.is_extension_array_dtype(SparseArray([1, 2, 3]))
|
||
|
assert com.is_extension_array_dtype(pd.DatetimeIndex(["2000"], tz="US/Eastern"))
|
||
|
|
||
|
dtype = DatetimeTZDtype("ns", tz="US/Eastern")
|
||
|
s = pd.Series([], dtype=dtype)
|
||
|
assert com.is_extension_array_dtype(s)
|
||
|
|
||
|
if check_scipy:
|
||
|
import scipy.sparse
|
||
|
|
||
|
assert not com.is_extension_array_dtype(scipy.sparse.bsr_matrix([1, 2, 3]))
|
||
|
|
||
|
|
||
|
def test_is_complex_dtype():
|
||
|
assert not com.is_complex_dtype(int)
|
||
|
assert not com.is_complex_dtype(str)
|
||
|
assert not com.is_complex_dtype(pd.Series([1, 2]))
|
||
|
assert not com.is_complex_dtype(np.array(["a", "b"]))
|
||
|
|
||
|
assert com.is_complex_dtype(np.complex_)
|
||
|
assert com.is_complex_dtype(complex)
|
||
|
assert com.is_complex_dtype(np.array([1 + 1j, 5]))
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"input_param,result",
|
||
|
[
|
||
|
(int, np.dtype(int)),
|
||
|
("int32", np.dtype("int32")),
|
||
|
(float, np.dtype(float)),
|
||
|
("float64", np.dtype("float64")),
|
||
|
(np.dtype("float64"), np.dtype("float64")),
|
||
|
(str, np.dtype(str)),
|
||
|
(pd.Series([1, 2], dtype=np.dtype("int16")), np.dtype("int16")),
|
||
|
(pd.Series(["a", "b"]), np.dtype(object)),
|
||
|
(pd.Index([1, 2]), np.dtype("int64")),
|
||
|
(pd.Index(["a", "b"]), np.dtype(object)),
|
||
|
("category", "category"),
|
||
|
(pd.Categorical(["a", "b"]).dtype, CategoricalDtype(["a", "b"])),
|
||
|
(pd.Categorical(["a", "b"]), CategoricalDtype(["a", "b"])),
|
||
|
(pd.CategoricalIndex(["a", "b"]).dtype, CategoricalDtype(["a", "b"])),
|
||
|
(pd.CategoricalIndex(["a", "b"]), CategoricalDtype(["a", "b"])),
|
||
|
(CategoricalDtype(), CategoricalDtype()),
|
||
|
(CategoricalDtype(["a", "b"]), CategoricalDtype()),
|
||
|
(pd.DatetimeIndex([1, 2]), np.dtype("=M8[ns]")),
|
||
|
(pd.DatetimeIndex([1, 2]).dtype, np.dtype("=M8[ns]")),
|
||
|
("<M8[ns]", np.dtype("<M8[ns]")),
|
||
|
("datetime64[ns, Europe/London]", DatetimeTZDtype("ns", "Europe/London")),
|
||
|
(PeriodDtype(freq="D"), PeriodDtype(freq="D")),
|
||
|
("period[D]", PeriodDtype(freq="D")),
|
||
|
(IntervalDtype(), IntervalDtype()),
|
||
|
],
|
||
|
)
|
||
|
def test_get_dtype(input_param, result):
|
||
|
assert com.get_dtype(input_param) == result
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"input_param,expected_error_message",
|
||
|
[
|
||
|
(None, "Cannot deduce dtype from null object"),
|
||
|
(1, "data type not understood"),
|
||
|
(1.2, "data type not understood"),
|
||
|
# numpy dev changed from double-quotes to single quotes
|
||
|
("random string", "data type [\"']random string[\"'] not understood"),
|
||
|
(pd.DataFrame([1, 2]), "data type not understood"),
|
||
|
],
|
||
|
)
|
||
|
def test_get_dtype_fails(input_param, expected_error_message):
|
||
|
# python objects
|
||
|
# 2020-02-02 npdev changed error message
|
||
|
expected_error_message += f"|Cannot interpret '{input_param}' as a data type"
|
||
|
with pytest.raises(TypeError, match=expected_error_message):
|
||
|
com.get_dtype(input_param)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"input_param,result",
|
||
|
[
|
||
|
(int, np.dtype(int).type),
|
||
|
("int32", np.int32),
|
||
|
(float, np.dtype(float).type),
|
||
|
("float64", np.float64),
|
||
|
(np.dtype("float64"), np.float64),
|
||
|
(str, np.dtype(str).type),
|
||
|
(pd.Series([1, 2], dtype=np.dtype("int16")), np.int16),
|
||
|
(pd.Series(["a", "b"]), np.object_),
|
||
|
(pd.Index([1, 2], dtype="int64"), np.int64),
|
||
|
(pd.Index(["a", "b"]), np.object_),
|
||
|
("category", CategoricalDtypeType),
|
||
|
(pd.Categorical(["a", "b"]).dtype, CategoricalDtypeType),
|
||
|
(pd.Categorical(["a", "b"]), CategoricalDtypeType),
|
||
|
(pd.CategoricalIndex(["a", "b"]).dtype, CategoricalDtypeType),
|
||
|
(pd.CategoricalIndex(["a", "b"]), CategoricalDtypeType),
|
||
|
(pd.DatetimeIndex([1, 2]), np.datetime64),
|
||
|
(pd.DatetimeIndex([1, 2]).dtype, np.datetime64),
|
||
|
("<M8[ns]", np.datetime64),
|
||
|
(pd.DatetimeIndex(["2000"], tz="Europe/London"), pd.Timestamp),
|
||
|
(pd.DatetimeIndex(["2000"], tz="Europe/London").dtype, pd.Timestamp),
|
||
|
("datetime64[ns, Europe/London]", pd.Timestamp),
|
||
|
(PeriodDtype(freq="D"), pd.Period),
|
||
|
("period[D]", pd.Period),
|
||
|
(IntervalDtype(), pd.Interval),
|
||
|
(None, type(None)),
|
||
|
(1, type(None)),
|
||
|
(1.2, type(None)),
|
||
|
(pd.DataFrame([1, 2]), type(None)), # composite dtype
|
||
|
],
|
||
|
)
|
||
|
def test__is_dtype_type(input_param, result):
|
||
|
assert com._is_dtype_type(input_param, lambda tipo: tipo == result)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("val", [np.datetime64("NaT"), np.timedelta64("NaT")])
|
||
|
@pytest.mark.parametrize("typ", [np.int64])
|
||
|
def test_astype_nansafe(val, typ):
|
||
|
arr = np.array([val])
|
||
|
|
||
|
msg = "Cannot convert NaT values to integer"
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
astype_nansafe(arr, dtype=typ)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("from_type", [np.datetime64, np.timedelta64])
|
||
|
@pytest.mark.parametrize(
|
||
|
"to_type",
|
||
|
[
|
||
|
np.uint8,
|
||
|
np.uint16,
|
||
|
np.uint32,
|
||
|
np.int8,
|
||
|
np.int16,
|
||
|
np.int32,
|
||
|
np.float16,
|
||
|
np.float32,
|
||
|
],
|
||
|
)
|
||
|
def test_astype_datetime64_bad_dtype_raises(from_type, to_type):
|
||
|
arr = np.array([from_type("2018")])
|
||
|
|
||
|
with pytest.raises(TypeError, match="cannot astype"):
|
||
|
astype_nansafe(arr, dtype=to_type)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("from_type", [np.datetime64, np.timedelta64])
|
||
|
def test_astype_object_preserves_datetime_na(from_type):
|
||
|
arr = np.array([from_type("NaT")])
|
||
|
result = astype_nansafe(arr, dtype="object")
|
||
|
|
||
|
assert isna(result)[0]
|
||
|
|
||
|
|
||
|
def test_validate_allhashable():
|
||
|
assert com.validate_all_hashable(1, "a") is None
|
||
|
|
||
|
with pytest.raises(TypeError, match="All elements must be hashable"):
|
||
|
com.validate_all_hashable([])
|
||
|
|
||
|
with pytest.raises(TypeError, match="list must be a hashable type"):
|
||
|
com.validate_all_hashable([], error_name="list")
|