1518 lines
51 KiB
Python
1518 lines
51 KiB
Python
![]() |
"""
|
||
|
These the test the public routines exposed in types/common.py
|
||
|
related to inference and not otherwise tested in types/test_common.py
|
||
|
|
||
|
"""
|
||
|
import collections
|
||
|
from collections import namedtuple
|
||
|
from datetime import date, datetime, time, timedelta
|
||
|
from decimal import Decimal
|
||
|
from fractions import Fraction
|
||
|
from io import StringIO
|
||
|
from numbers import Number
|
||
|
import re
|
||
|
|
||
|
import numpy as np
|
||
|
import pytest
|
||
|
import pytz
|
||
|
|
||
|
from pandas._libs import lib, missing as libmissing
|
||
|
import pandas.util._test_decorators as td
|
||
|
|
||
|
from pandas.core.dtypes import inference
|
||
|
from pandas.core.dtypes.common import (
|
||
|
ensure_int32,
|
||
|
is_bool,
|
||
|
is_datetime64_any_dtype,
|
||
|
is_datetime64_dtype,
|
||
|
is_datetime64_ns_dtype,
|
||
|
is_datetime64tz_dtype,
|
||
|
is_float,
|
||
|
is_integer,
|
||
|
is_number,
|
||
|
is_scalar,
|
||
|
is_scipy_sparse,
|
||
|
is_timedelta64_dtype,
|
||
|
is_timedelta64_ns_dtype,
|
||
|
)
|
||
|
|
||
|
import pandas as pd
|
||
|
from pandas import (
|
||
|
Categorical,
|
||
|
DataFrame,
|
||
|
DateOffset,
|
||
|
DatetimeIndex,
|
||
|
Index,
|
||
|
Interval,
|
||
|
Period,
|
||
|
PeriodIndex,
|
||
|
Series,
|
||
|
Timedelta,
|
||
|
TimedeltaIndex,
|
||
|
Timestamp,
|
||
|
)
|
||
|
import pandas._testing as tm
|
||
|
from pandas.core.arrays import IntegerArray
|
||
|
|
||
|
|
||
|
@pytest.fixture(params=[True, False], ids=str)
|
||
|
def coerce(request):
|
||
|
return request.param
|
||
|
|
||
|
|
||
|
# collect all objects to be tested for list-like-ness; use tuples of objects,
|
||
|
# whether they are list-like or not (special casing for sets), and their ID
|
||
|
ll_params = [
|
||
|
([1], True, "list"),
|
||
|
([], True, "list-empty"),
|
||
|
((1,), True, "tuple"),
|
||
|
((), True, "tuple-empty"),
|
||
|
({"a": 1}, True, "dict"),
|
||
|
({}, True, "dict-empty"),
|
||
|
({"a", 1}, "set", "set"),
|
||
|
(set(), "set", "set-empty"),
|
||
|
(frozenset({"a", 1}), "set", "frozenset"),
|
||
|
(frozenset(), "set", "frozenset-empty"),
|
||
|
(iter([1, 2]), True, "iterator"),
|
||
|
(iter([]), True, "iterator-empty"),
|
||
|
((x for x in [1, 2]), True, "generator"),
|
||
|
((_ for _ in []), True, "generator-empty"),
|
||
|
(Series([1]), True, "Series"),
|
||
|
(Series([], dtype=object), True, "Series-empty"),
|
||
|
(Series(["a"]).str, True, "StringMethods"),
|
||
|
(Series([], dtype="O").str, True, "StringMethods-empty"),
|
||
|
(Index([1]), True, "Index"),
|
||
|
(Index([]), True, "Index-empty"),
|
||
|
(DataFrame([[1]]), True, "DataFrame"),
|
||
|
(DataFrame(), True, "DataFrame-empty"),
|
||
|
(np.ndarray((2,) * 1), True, "ndarray-1d"),
|
||
|
(np.array([]), True, "ndarray-1d-empty"),
|
||
|
(np.ndarray((2,) * 2), True, "ndarray-2d"),
|
||
|
(np.array([[]]), True, "ndarray-2d-empty"),
|
||
|
(np.ndarray((2,) * 3), True, "ndarray-3d"),
|
||
|
(np.array([[[]]]), True, "ndarray-3d-empty"),
|
||
|
(np.ndarray((2,) * 4), True, "ndarray-4d"),
|
||
|
(np.array([[[[]]]]), True, "ndarray-4d-empty"),
|
||
|
(np.array(2), False, "ndarray-0d"),
|
||
|
(1, False, "int"),
|
||
|
(b"123", False, "bytes"),
|
||
|
(b"", False, "bytes-empty"),
|
||
|
("123", False, "string"),
|
||
|
("", False, "string-empty"),
|
||
|
(str, False, "string-type"),
|
||
|
(object(), False, "object"),
|
||
|
(np.nan, False, "NaN"),
|
||
|
(None, False, "None"),
|
||
|
]
|
||
|
objs, expected, ids = zip(*ll_params)
|
||
|
|
||
|
|
||
|
@pytest.fixture(params=zip(objs, expected), ids=ids)
|
||
|
def maybe_list_like(request):
|
||
|
return request.param
|
||
|
|
||
|
|
||
|
def test_is_list_like(maybe_list_like):
|
||
|
obj, expected = maybe_list_like
|
||
|
expected = True if expected == "set" else expected
|
||
|
assert inference.is_list_like(obj) == expected
|
||
|
|
||
|
|
||
|
def test_is_list_like_disallow_sets(maybe_list_like):
|
||
|
obj, expected = maybe_list_like
|
||
|
expected = False if expected == "set" else expected
|
||
|
assert inference.is_list_like(obj, allow_sets=False) == expected
|
||
|
|
||
|
|
||
|
def test_is_list_like_recursion():
|
||
|
# GH 33721
|
||
|
# interpreter would crash with SIGABRT
|
||
|
def foo():
|
||
|
inference.is_list_like([])
|
||
|
foo()
|
||
|
|
||
|
with pytest.raises(RecursionError):
|
||
|
foo()
|
||
|
|
||
|
|
||
|
def test_is_sequence():
|
||
|
is_seq = inference.is_sequence
|
||
|
assert is_seq((1, 2))
|
||
|
assert is_seq([1, 2])
|
||
|
assert not is_seq("abcd")
|
||
|
assert not is_seq(np.int64)
|
||
|
|
||
|
class A:
|
||
|
def __getitem__(self):
|
||
|
return 1
|
||
|
|
||
|
assert not is_seq(A())
|
||
|
|
||
|
|
||
|
def test_is_array_like():
|
||
|
assert inference.is_array_like(Series([], dtype=object))
|
||
|
assert inference.is_array_like(Series([1, 2]))
|
||
|
assert inference.is_array_like(np.array(["a", "b"]))
|
||
|
assert inference.is_array_like(Index(["2016-01-01"]))
|
||
|
|
||
|
class DtypeList(list):
|
||
|
dtype = "special"
|
||
|
|
||
|
assert inference.is_array_like(DtypeList())
|
||
|
|
||
|
assert not inference.is_array_like([1, 2, 3])
|
||
|
assert not inference.is_array_like(())
|
||
|
assert not inference.is_array_like("foo")
|
||
|
assert not inference.is_array_like(123)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"inner",
|
||
|
[
|
||
|
[],
|
||
|
[1],
|
||
|
(1,),
|
||
|
(1, 2),
|
||
|
{"a": 1},
|
||
|
{1, "a"},
|
||
|
Series([1]),
|
||
|
Series([], dtype=object),
|
||
|
Series(["a"]).str,
|
||
|
(x for x in range(5)),
|
||
|
],
|
||
|
)
|
||
|
@pytest.mark.parametrize("outer", [list, Series, np.array, tuple])
|
||
|
def test_is_nested_list_like_passes(inner, outer):
|
||
|
result = outer([inner for _ in range(5)])
|
||
|
assert inference.is_list_like(result)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"obj",
|
||
|
[
|
||
|
"abc",
|
||
|
[],
|
||
|
[1],
|
||
|
(1,),
|
||
|
["a"],
|
||
|
"a",
|
||
|
{"a"},
|
||
|
[1, 2, 3],
|
||
|
Series([1]),
|
||
|
DataFrame({"A": [1]}),
|
||
|
([1, 2] for _ in range(5)),
|
||
|
],
|
||
|
)
|
||
|
def test_is_nested_list_like_fails(obj):
|
||
|
assert not inference.is_nested_list_like(obj)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("ll", [{}, {"A": 1}, Series([1]), collections.defaultdict()])
|
||
|
def test_is_dict_like_passes(ll):
|
||
|
assert inference.is_dict_like(ll)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"ll",
|
||
|
[
|
||
|
"1",
|
||
|
1,
|
||
|
[1, 2],
|
||
|
(1, 2),
|
||
|
range(2),
|
||
|
Index([1]),
|
||
|
dict,
|
||
|
collections.defaultdict,
|
||
|
Series,
|
||
|
],
|
||
|
)
|
||
|
def test_is_dict_like_fails(ll):
|
||
|
assert not inference.is_dict_like(ll)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("has_keys", [True, False])
|
||
|
@pytest.mark.parametrize("has_getitem", [True, False])
|
||
|
@pytest.mark.parametrize("has_contains", [True, False])
|
||
|
def test_is_dict_like_duck_type(has_keys, has_getitem, has_contains):
|
||
|
class DictLike:
|
||
|
def __init__(self, d):
|
||
|
self.d = d
|
||
|
|
||
|
if has_keys:
|
||
|
|
||
|
def keys(self):
|
||
|
return self.d.keys()
|
||
|
|
||
|
if has_getitem:
|
||
|
|
||
|
def __getitem__(self, key):
|
||
|
return self.d.__getitem__(key)
|
||
|
|
||
|
if has_contains:
|
||
|
|
||
|
def __contains__(self, key) -> bool:
|
||
|
return self.d.__contains__(key)
|
||
|
|
||
|
d = DictLike({1: 2})
|
||
|
result = inference.is_dict_like(d)
|
||
|
expected = has_keys and has_getitem and has_contains
|
||
|
|
||
|
assert result is expected
|
||
|
|
||
|
|
||
|
def test_is_file_like():
|
||
|
class MockFile:
|
||
|
pass
|
||
|
|
||
|
is_file = inference.is_file_like
|
||
|
|
||
|
data = StringIO("data")
|
||
|
assert is_file(data)
|
||
|
|
||
|
# No read / write attributes
|
||
|
# No iterator attributes
|
||
|
m = MockFile()
|
||
|
assert not is_file(m)
|
||
|
|
||
|
MockFile.write = lambda self: 0
|
||
|
|
||
|
# Write attribute but not an iterator
|
||
|
m = MockFile()
|
||
|
assert not is_file(m)
|
||
|
|
||
|
# gh-16530: Valid iterator just means we have the
|
||
|
# __iter__ attribute for our purposes.
|
||
|
MockFile.__iter__ = lambda self: self
|
||
|
|
||
|
# Valid write-only file
|
||
|
m = MockFile()
|
||
|
assert is_file(m)
|
||
|
|
||
|
del MockFile.write
|
||
|
MockFile.read = lambda self: 0
|
||
|
|
||
|
# Valid read-only file
|
||
|
m = MockFile()
|
||
|
assert is_file(m)
|
||
|
|
||
|
# Iterator but no read / write attributes
|
||
|
data = [1, 2, 3]
|
||
|
assert not is_file(data)
|
||
|
|
||
|
|
||
|
test_tuple = collections.namedtuple("Test", ["a", "b", "c"])
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("ll", [test_tuple(1, 2, 3)])
|
||
|
def test_is_names_tuple_passes(ll):
|
||
|
assert inference.is_named_tuple(ll)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("ll", [(1, 2, 3), "a", Series({"pi": 3.14})])
|
||
|
def test_is_names_tuple_fails(ll):
|
||
|
assert not inference.is_named_tuple(ll)
|
||
|
|
||
|
|
||
|
def test_is_hashable():
|
||
|
|
||
|
# all new-style classes are hashable by default
|
||
|
class HashableClass:
|
||
|
pass
|
||
|
|
||
|
class UnhashableClass1:
|
||
|
__hash__ = None
|
||
|
|
||
|
class UnhashableClass2:
|
||
|
def __hash__(self):
|
||
|
raise TypeError("Not hashable")
|
||
|
|
||
|
hashable = (1, 3.14, np.float64(3.14), "a", (), (1,), HashableClass())
|
||
|
not_hashable = ([], UnhashableClass1())
|
||
|
abc_hashable_not_really_hashable = (([],), UnhashableClass2())
|
||
|
|
||
|
for i in hashable:
|
||
|
assert inference.is_hashable(i)
|
||
|
for i in not_hashable:
|
||
|
assert not inference.is_hashable(i)
|
||
|
for i in abc_hashable_not_really_hashable:
|
||
|
assert not inference.is_hashable(i)
|
||
|
|
||
|
# numpy.array is no longer collections.abc.Hashable as of
|
||
|
# https://github.com/numpy/numpy/pull/5326, just test
|
||
|
# is_hashable()
|
||
|
assert not inference.is_hashable(np.array([]))
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("ll", [re.compile("ad")])
|
||
|
def test_is_re_passes(ll):
|
||
|
assert inference.is_re(ll)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("ll", ["x", 2, 3, object()])
|
||
|
def test_is_re_fails(ll):
|
||
|
assert not inference.is_re(ll)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"ll", [r"a", "x", r"asdf", re.compile("adsf"), r"\u2233\s*", re.compile(r"")]
|
||
|
)
|
||
|
def test_is_recompilable_passes(ll):
|
||
|
assert inference.is_re_compilable(ll)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("ll", [1, [], object()])
|
||
|
def test_is_recompilable_fails(ll):
|
||
|
assert not inference.is_re_compilable(ll)
|
||
|
|
||
|
|
||
|
class TestInference:
|
||
|
@pytest.mark.parametrize(
|
||
|
"arr",
|
||
|
[
|
||
|
np.array(list("abc"), dtype="S1"),
|
||
|
np.array(list("abc"), dtype="S1").astype(object),
|
||
|
[b"a", np.nan, b"c"],
|
||
|
],
|
||
|
)
|
||
|
def test_infer_dtype_bytes(self, arr):
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "bytes"
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"value, expected",
|
||
|
[
|
||
|
(float("inf"), True),
|
||
|
(np.inf, True),
|
||
|
(-np.inf, False),
|
||
|
(1, False),
|
||
|
("a", False),
|
||
|
],
|
||
|
)
|
||
|
def test_isposinf_scalar(self, value, expected):
|
||
|
# GH 11352
|
||
|
result = libmissing.isposinf_scalar(value)
|
||
|
assert result is expected
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"value, expected",
|
||
|
[
|
||
|
(float("-inf"), True),
|
||
|
(-np.inf, True),
|
||
|
(np.inf, False),
|
||
|
(1, False),
|
||
|
("a", False),
|
||
|
],
|
||
|
)
|
||
|
def test_isneginf_scalar(self, value, expected):
|
||
|
result = libmissing.isneginf_scalar(value)
|
||
|
assert result is expected
|
||
|
|
||
|
@pytest.mark.parametrize("coerce_numeric", [True, False])
|
||
|
@pytest.mark.parametrize(
|
||
|
"infinity", ["inf", "inF", "iNf", "Inf", "iNF", "InF", "INf", "INF"]
|
||
|
)
|
||
|
@pytest.mark.parametrize("prefix", ["", "-", "+"])
|
||
|
def test_maybe_convert_numeric_infinities(self, coerce_numeric, infinity, prefix):
|
||
|
# see gh-13274
|
||
|
result = lib.maybe_convert_numeric(
|
||
|
np.array([prefix + infinity], dtype=object),
|
||
|
na_values={"", "NULL", "nan"},
|
||
|
coerce_numeric=coerce_numeric,
|
||
|
)
|
||
|
expected = np.array([np.inf if prefix in ["", "+"] else -np.inf])
|
||
|
tm.assert_numpy_array_equal(result, expected)
|
||
|
|
||
|
def test_maybe_convert_numeric_infinities_raises(self):
|
||
|
msg = "Unable to parse string"
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
lib.maybe_convert_numeric(
|
||
|
np.array(["foo_inf"], dtype=object),
|
||
|
na_values={"", "NULL", "nan"},
|
||
|
coerce_numeric=False,
|
||
|
)
|
||
|
|
||
|
def test_maybe_convert_numeric_post_floatify_nan(self, coerce):
|
||
|
# see gh-13314
|
||
|
data = np.array(["1.200", "-999.000", "4.500"], dtype=object)
|
||
|
expected = np.array([1.2, np.nan, 4.5], dtype=np.float64)
|
||
|
nan_values = {-999, -999.0}
|
||
|
|
||
|
out = lib.maybe_convert_numeric(data, nan_values, coerce)
|
||
|
tm.assert_numpy_array_equal(out, expected)
|
||
|
|
||
|
def test_convert_infs(self):
|
||
|
arr = np.array(["inf", "inf", "inf"], dtype="O")
|
||
|
result = lib.maybe_convert_numeric(arr, set(), False)
|
||
|
assert result.dtype == np.float64
|
||
|
|
||
|
arr = np.array(["-inf", "-inf", "-inf"], dtype="O")
|
||
|
result = lib.maybe_convert_numeric(arr, set(), False)
|
||
|
assert result.dtype == np.float64
|
||
|
|
||
|
def test_scientific_no_exponent(self):
|
||
|
# See PR 12215
|
||
|
arr = np.array(["42E", "2E", "99e", "6e"], dtype="O")
|
||
|
result = lib.maybe_convert_numeric(arr, set(), False, True)
|
||
|
assert np.all(np.isnan(result))
|
||
|
|
||
|
def test_convert_non_hashable(self):
|
||
|
# GH13324
|
||
|
# make sure that we are handing non-hashables
|
||
|
arr = np.array([[10.0, 2], 1.0, "apple"], dtype=object)
|
||
|
result = lib.maybe_convert_numeric(arr, set(), False, True)
|
||
|
tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan]))
|
||
|
|
||
|
def test_convert_numeric_uint64(self):
|
||
|
arr = np.array([2 ** 63], dtype=object)
|
||
|
exp = np.array([2 ** 63], dtype=np.uint64)
|
||
|
tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp)
|
||
|
|
||
|
arr = np.array([str(2 ** 63)], dtype=object)
|
||
|
exp = np.array([2 ** 63], dtype=np.uint64)
|
||
|
tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp)
|
||
|
|
||
|
arr = np.array([np.uint64(2 ** 63)], dtype=object)
|
||
|
exp = np.array([2 ** 63], dtype=np.uint64)
|
||
|
tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"arr",
|
||
|
[
|
||
|
np.array([2 ** 63, np.nan], dtype=object),
|
||
|
np.array([str(2 ** 63), np.nan], dtype=object),
|
||
|
np.array([np.nan, 2 ** 63], dtype=object),
|
||
|
np.array([np.nan, str(2 ** 63)], dtype=object),
|
||
|
],
|
||
|
)
|
||
|
def test_convert_numeric_uint64_nan(self, coerce, arr):
|
||
|
expected = arr.astype(float) if coerce else arr.copy()
|
||
|
result = lib.maybe_convert_numeric(arr, set(), coerce_numeric=coerce)
|
||
|
tm.assert_almost_equal(result, expected)
|
||
|
|
||
|
def test_convert_numeric_uint64_nan_values(self, coerce):
|
||
|
arr = np.array([2 ** 63, 2 ** 63 + 1], dtype=object)
|
||
|
na_values = {2 ** 63}
|
||
|
|
||
|
expected = (
|
||
|
np.array([np.nan, 2 ** 63 + 1], dtype=float) if coerce else arr.copy()
|
||
|
)
|
||
|
result = lib.maybe_convert_numeric(arr, na_values, coerce_numeric=coerce)
|
||
|
tm.assert_almost_equal(result, expected)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"case",
|
||
|
[
|
||
|
np.array([2 ** 63, -1], dtype=object),
|
||
|
np.array([str(2 ** 63), -1], dtype=object),
|
||
|
np.array([str(2 ** 63), str(-1)], dtype=object),
|
||
|
np.array([-1, 2 ** 63], dtype=object),
|
||
|
np.array([-1, str(2 ** 63)], dtype=object),
|
||
|
np.array([str(-1), str(2 ** 63)], dtype=object),
|
||
|
],
|
||
|
)
|
||
|
def test_convert_numeric_int64_uint64(self, case, coerce):
|
||
|
expected = case.astype(float) if coerce else case.copy()
|
||
|
result = lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce)
|
||
|
tm.assert_almost_equal(result, expected)
|
||
|
|
||
|
def test_convert_numeric_string_uint64(self):
|
||
|
# GH32394
|
||
|
result = lib.maybe_convert_numeric(
|
||
|
np.array(["uint64"], dtype=object), set(), coerce_numeric=True
|
||
|
)
|
||
|
assert np.isnan(result)
|
||
|
|
||
|
@pytest.mark.parametrize("value", [-(2 ** 63) - 1, 2 ** 64])
|
||
|
def test_convert_int_overflow(self, value):
|
||
|
# see gh-18584
|
||
|
arr = np.array([value], dtype=object)
|
||
|
result = lib.maybe_convert_objects(arr)
|
||
|
tm.assert_numpy_array_equal(arr, result)
|
||
|
|
||
|
def test_maybe_convert_objects_uint64(self):
|
||
|
# see gh-4471
|
||
|
arr = np.array([2 ** 63], dtype=object)
|
||
|
exp = np.array([2 ** 63], dtype=np.uint64)
|
||
|
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
|
||
|
|
||
|
# NumPy bug: can't compare uint64 to int64, as that
|
||
|
# results in both casting to float64, so we should
|
||
|
# make sure that this function is robust against it
|
||
|
arr = np.array([np.uint64(2 ** 63)], dtype=object)
|
||
|
exp = np.array([2 ** 63], dtype=np.uint64)
|
||
|
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
|
||
|
|
||
|
arr = np.array([2, -1], dtype=object)
|
||
|
exp = np.array([2, -1], dtype=np.int64)
|
||
|
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
|
||
|
|
||
|
arr = np.array([2 ** 63, -1], dtype=object)
|
||
|
exp = np.array([2 ** 63, -1], dtype=object)
|
||
|
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
|
||
|
|
||
|
def test_maybe_convert_objects_datetime(self):
|
||
|
# GH27438
|
||
|
arr = np.array(
|
||
|
[np.datetime64("2000-01-01"), np.timedelta64(1, "s")], dtype=object
|
||
|
)
|
||
|
exp = arr.copy()
|
||
|
out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1)
|
||
|
tm.assert_numpy_array_equal(out, exp)
|
||
|
|
||
|
arr = np.array([pd.NaT, np.timedelta64(1, "s")], dtype=object)
|
||
|
exp = np.array([np.timedelta64("NaT"), np.timedelta64(1, "s")], dtype="m8[ns]")
|
||
|
out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1)
|
||
|
tm.assert_numpy_array_equal(out, exp)
|
||
|
|
||
|
arr = np.array([np.timedelta64(1, "s"), np.nan], dtype=object)
|
||
|
exp = arr.copy()
|
||
|
out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1)
|
||
|
tm.assert_numpy_array_equal(out, exp)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"exp",
|
||
|
[
|
||
|
IntegerArray(np.array([2, 0], dtype="i8"), np.array([False, True])),
|
||
|
IntegerArray(np.array([2, 0], dtype="int64"), np.array([False, True])),
|
||
|
],
|
||
|
)
|
||
|
def test_maybe_convert_objects_nullable_integer(self, exp):
|
||
|
# GH27335
|
||
|
arr = np.array([2, np.NaN], dtype=object)
|
||
|
result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=1)
|
||
|
|
||
|
tm.assert_extension_array_equal(result, exp)
|
||
|
|
||
|
def test_maybe_convert_objects_bool_nan(self):
|
||
|
# GH32146
|
||
|
ind = Index([True, False, np.nan], dtype=object)
|
||
|
exp = np.array([True, False, np.nan], dtype=object)
|
||
|
out = lib.maybe_convert_objects(ind.values, safe=1)
|
||
|
tm.assert_numpy_array_equal(out, exp)
|
||
|
|
||
|
def test_mixed_dtypes_remain_object_array(self):
|
||
|
# GH14956
|
||
|
array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object)
|
||
|
result = lib.maybe_convert_objects(array, convert_datetime=1)
|
||
|
tm.assert_numpy_array_equal(result, array)
|
||
|
|
||
|
|
||
|
class TestTypeInference:
|
||
|
|
||
|
# Dummy class used for testing with Python objects
|
||
|
class Dummy:
|
||
|
pass
|
||
|
|
||
|
def test_inferred_dtype_fixture(self, any_skipna_inferred_dtype):
|
||
|
# see pandas/conftest.py
|
||
|
inferred_dtype, values = any_skipna_inferred_dtype
|
||
|
|
||
|
# make sure the inferred dtype of the fixture is as requested
|
||
|
assert inferred_dtype == lib.infer_dtype(values, skipna=True)
|
||
|
|
||
|
@pytest.mark.parametrize("skipna", [True, False])
|
||
|
def test_length_zero(self, skipna):
|
||
|
result = lib.infer_dtype(np.array([], dtype="i4"), skipna=skipna)
|
||
|
assert result == "integer"
|
||
|
|
||
|
result = lib.infer_dtype([], skipna=skipna)
|
||
|
assert result == "empty"
|
||
|
|
||
|
# GH 18004
|
||
|
arr = np.array([np.array([], dtype=object), np.array([], dtype=object)])
|
||
|
result = lib.infer_dtype(arr, skipna=skipna)
|
||
|
assert result == "empty"
|
||
|
|
||
|
def test_integers(self):
|
||
|
arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype="O")
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "integer"
|
||
|
|
||
|
arr = np.array([1, 2, 3, np.int64(4), np.int32(5), "foo"], dtype="O")
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "mixed-integer"
|
||
|
|
||
|
arr = np.array([1, 2, 3, 4, 5], dtype="i4")
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "integer"
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"arr, skipna",
|
||
|
[
|
||
|
(np.array([1, 2, np.nan, np.nan, 3], dtype="O"), False),
|
||
|
(np.array([1, 2, np.nan, np.nan, 3], dtype="O"), True),
|
||
|
(np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), False),
|
||
|
(np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), True),
|
||
|
],
|
||
|
)
|
||
|
def test_integer_na(self, arr, skipna):
|
||
|
# GH 27392
|
||
|
result = lib.infer_dtype(arr, skipna=skipna)
|
||
|
expected = "integer" if skipna else "integer-na"
|
||
|
assert result == expected
|
||
|
|
||
|
def test_infer_dtype_skipna_default(self):
|
||
|
# infer_dtype `skipna` default deprecated in GH#24050,
|
||
|
# changed to True in GH#29876
|
||
|
arr = np.array([1, 2, 3, np.nan], dtype=object)
|
||
|
|
||
|
result = lib.infer_dtype(arr)
|
||
|
assert result == "integer"
|
||
|
|
||
|
def test_bools(self):
|
||
|
arr = np.array([True, False, True, True, True], dtype="O")
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "boolean"
|
||
|
|
||
|
arr = np.array([np.bool_(True), np.bool_(False)], dtype="O")
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "boolean"
|
||
|
|
||
|
arr = np.array([True, False, True, "foo"], dtype="O")
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "mixed"
|
||
|
|
||
|
arr = np.array([True, False, True], dtype=bool)
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "boolean"
|
||
|
|
||
|
arr = np.array([True, np.nan, False], dtype="O")
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "boolean"
|
||
|
|
||
|
result = lib.infer_dtype(arr, skipna=False)
|
||
|
assert result == "mixed"
|
||
|
|
||
|
def test_floats(self):
|
||
|
arr = np.array([1.0, 2.0, 3.0, np.float64(4), np.float32(5)], dtype="O")
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "floating"
|
||
|
|
||
|
arr = np.array([1, 2, 3, np.float64(4), np.float32(5), "foo"], dtype="O")
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "mixed-integer"
|
||
|
|
||
|
arr = np.array([1, 2, 3, 4, 5], dtype="f4")
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "floating"
|
||
|
|
||
|
arr = np.array([1, 2, 3, 4, 5], dtype="f8")
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "floating"
|
||
|
|
||
|
def test_decimals(self):
|
||
|
# GH15690
|
||
|
arr = np.array([Decimal(1), Decimal(2), Decimal(3)])
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "decimal"
|
||
|
|
||
|
arr = np.array([1.0, 2.0, Decimal(3)])
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "mixed"
|
||
|
|
||
|
result = lib.infer_dtype(arr[::-1], skipna=True)
|
||
|
assert result == "mixed"
|
||
|
|
||
|
arr = np.array([Decimal(1), Decimal("NaN"), Decimal(3)])
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "decimal"
|
||
|
|
||
|
arr = np.array([Decimal(1), np.nan, Decimal(3)], dtype="O")
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "decimal"
|
||
|
|
||
|
# complex is compatible with nan, so skipna has no effect
|
||
|
@pytest.mark.parametrize("skipna", [True, False])
|
||
|
def test_complex(self, skipna):
|
||
|
# gets cast to complex on array construction
|
||
|
arr = np.array([1.0, 2.0, 1 + 1j])
|
||
|
result = lib.infer_dtype(arr, skipna=skipna)
|
||
|
assert result == "complex"
|
||
|
|
||
|
arr = np.array([1.0, 2.0, 1 + 1j], dtype="O")
|
||
|
result = lib.infer_dtype(arr, skipna=skipna)
|
||
|
assert result == "mixed"
|
||
|
|
||
|
result = lib.infer_dtype(arr[::-1], skipna=skipna)
|
||
|
assert result == "mixed"
|
||
|
|
||
|
# gets cast to complex on array construction
|
||
|
arr = np.array([1, np.nan, 1 + 1j])
|
||
|
result = lib.infer_dtype(arr, skipna=skipna)
|
||
|
assert result == "complex"
|
||
|
|
||
|
arr = np.array([1.0, np.nan, 1 + 1j], dtype="O")
|
||
|
result = lib.infer_dtype(arr, skipna=skipna)
|
||
|
assert result == "mixed"
|
||
|
|
||
|
# complex with nans stays complex
|
||
|
arr = np.array([1 + 1j, np.nan, 3 + 3j], dtype="O")
|
||
|
result = lib.infer_dtype(arr, skipna=skipna)
|
||
|
assert result == "complex"
|
||
|
|
||
|
# test smaller complex dtype; will pass through _try_infer_map fastpath
|
||
|
arr = np.array([1 + 1j, np.nan, 3 + 3j], dtype=np.complex64)
|
||
|
result = lib.infer_dtype(arr, skipna=skipna)
|
||
|
assert result == "complex"
|
||
|
|
||
|
def test_string(self):
|
||
|
pass
|
||
|
|
||
|
def test_unicode(self):
|
||
|
arr = ["a", np.nan, "c"]
|
||
|
result = lib.infer_dtype(arr, skipna=False)
|
||
|
# This currently returns "mixed", but it's not clear that's optimal.
|
||
|
# This could also return "string" or "mixed-string"
|
||
|
assert result == "mixed"
|
||
|
|
||
|
arr = ["a", np.nan, "c"]
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "string"
|
||
|
|
||
|
arr = ["a", "c"]
|
||
|
result = lib.infer_dtype(arr, skipna=False)
|
||
|
assert result == "string"
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype, missing, skipna, expected",
|
||
|
[
|
||
|
(float, np.nan, False, "floating"),
|
||
|
(float, np.nan, True, "floating"),
|
||
|
(object, np.nan, False, "floating"),
|
||
|
(object, np.nan, True, "empty"),
|
||
|
(object, None, False, "mixed"),
|
||
|
(object, None, True, "empty"),
|
||
|
],
|
||
|
)
|
||
|
@pytest.mark.parametrize("box", [pd.Series, np.array])
|
||
|
def test_object_empty(self, box, missing, dtype, skipna, expected):
|
||
|
# GH 23421
|
||
|
arr = box([missing, missing], dtype=dtype)
|
||
|
|
||
|
result = lib.infer_dtype(arr, skipna=skipna)
|
||
|
assert result == expected
|
||
|
|
||
|
def test_datetime(self):
|
||
|
|
||
|
dates = [datetime(2012, 1, x) for x in range(1, 20)]
|
||
|
index = Index(dates)
|
||
|
assert index.inferred_type == "datetime64"
|
||
|
|
||
|
def test_infer_dtype_datetime64(self):
|
||
|
arr = np.array(
|
||
|
[np.datetime64("2011-01-01"), np.datetime64("2011-01-01")], dtype=object
|
||
|
)
|
||
|
assert lib.infer_dtype(arr, skipna=True) == "datetime64"
|
||
|
|
||
|
@pytest.mark.parametrize("na_value", [pd.NaT, np.nan])
|
||
|
def test_infer_dtype_datetime64_with_na(self, na_value):
|
||
|
# starts with nan
|
||
|
arr = np.array([na_value, np.datetime64("2011-01-02")])
|
||
|
assert lib.infer_dtype(arr, skipna=True) == "datetime64"
|
||
|
|
||
|
arr = np.array([na_value, np.datetime64("2011-01-02"), na_value])
|
||
|
assert lib.infer_dtype(arr, skipna=True) == "datetime64"
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"arr",
|
||
|
[
|
||
|
np.array(
|
||
|
[np.timedelta64("nat"), np.datetime64("2011-01-02")], dtype=object
|
||
|
),
|
||
|
np.array(
|
||
|
[np.datetime64("2011-01-02"), np.timedelta64("nat")], dtype=object
|
||
|
),
|
||
|
np.array([np.datetime64("2011-01-01"), Timestamp("2011-01-02")]),
|
||
|
np.array([Timestamp("2011-01-02"), np.datetime64("2011-01-01")]),
|
||
|
np.array([np.nan, Timestamp("2011-01-02"), 1.1]),
|
||
|
np.array([np.nan, "2011-01-01", Timestamp("2011-01-02")]),
|
||
|
np.array([np.datetime64("nat"), np.timedelta64(1, "D")], dtype=object),
|
||
|
np.array([np.timedelta64(1, "D"), np.datetime64("nat")], dtype=object),
|
||
|
],
|
||
|
)
|
||
|
def test_infer_datetimelike_dtype_mixed(self, arr):
|
||
|
assert lib.infer_dtype(arr, skipna=False) == "mixed"
|
||
|
|
||
|
def test_infer_dtype_mixed_integer(self):
|
||
|
arr = np.array([np.nan, Timestamp("2011-01-02"), 1])
|
||
|
assert lib.infer_dtype(arr, skipna=True) == "mixed-integer"
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"arr",
|
||
|
[
|
||
|
np.array([Timestamp("2011-01-01"), Timestamp("2011-01-02")]),
|
||
|
np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]),
|
||
|
np.array([datetime(2011, 1, 1), Timestamp("2011-01-02")]),
|
||
|
],
|
||
|
)
|
||
|
def test_infer_dtype_datetime(self, arr):
|
||
|
assert lib.infer_dtype(arr, skipna=True) == "datetime"
|
||
|
|
||
|
@pytest.mark.parametrize("na_value", [pd.NaT, np.nan])
|
||
|
@pytest.mark.parametrize(
|
||
|
"time_stamp", [Timestamp("2011-01-01"), datetime(2011, 1, 1)]
|
||
|
)
|
||
|
def test_infer_dtype_datetime_with_na(self, na_value, time_stamp):
|
||
|
# starts with nan
|
||
|
arr = np.array([na_value, time_stamp])
|
||
|
assert lib.infer_dtype(arr, skipna=True) == "datetime"
|
||
|
|
||
|
arr = np.array([na_value, time_stamp, na_value])
|
||
|
assert lib.infer_dtype(arr, skipna=True) == "datetime"
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"arr",
|
||
|
[
|
||
|
np.array([Timedelta("1 days"), Timedelta("2 days")]),
|
||
|
np.array([np.timedelta64(1, "D"), np.timedelta64(2, "D")], dtype=object),
|
||
|
np.array([timedelta(1), timedelta(2)]),
|
||
|
],
|
||
|
)
|
||
|
def test_infer_dtype_timedelta(self, arr):
|
||
|
assert lib.infer_dtype(arr, skipna=True) == "timedelta"
|
||
|
|
||
|
@pytest.mark.parametrize("na_value", [pd.NaT, np.nan])
|
||
|
@pytest.mark.parametrize(
|
||
|
"delta", [Timedelta("1 days"), np.timedelta64(1, "D"), timedelta(1)]
|
||
|
)
|
||
|
def test_infer_dtype_timedelta_with_na(self, na_value, delta):
|
||
|
# starts with nan
|
||
|
arr = np.array([na_value, delta])
|
||
|
assert lib.infer_dtype(arr, skipna=True) == "timedelta"
|
||
|
|
||
|
arr = np.array([na_value, delta, na_value])
|
||
|
assert lib.infer_dtype(arr, skipna=True) == "timedelta"
|
||
|
|
||
|
def test_infer_dtype_period(self):
|
||
|
# GH 13664
|
||
|
arr = np.array([Period("2011-01", freq="D"), Period("2011-02", freq="D")])
|
||
|
assert lib.infer_dtype(arr, skipna=True) == "period"
|
||
|
|
||
|
arr = np.array([Period("2011-01", freq="D"), Period("2011-02", freq="M")])
|
||
|
assert lib.infer_dtype(arr, skipna=True) == "period"
|
||
|
|
||
|
def test_infer_dtype_period_mixed(self):
|
||
|
arr = np.array(
|
||
|
[Period("2011-01", freq="M"), np.datetime64("nat")], dtype=object
|
||
|
)
|
||
|
assert lib.infer_dtype(arr, skipna=False) == "mixed"
|
||
|
|
||
|
arr = np.array(
|
||
|
[np.datetime64("nat"), Period("2011-01", freq="M")], dtype=object
|
||
|
)
|
||
|
assert lib.infer_dtype(arr, skipna=False) == "mixed"
|
||
|
|
||
|
@pytest.mark.parametrize("na_value", [pd.NaT, np.nan])
|
||
|
def test_infer_dtype_period_with_na(self, na_value):
|
||
|
# starts with nan
|
||
|
arr = np.array([na_value, Period("2011-01", freq="D")])
|
||
|
assert lib.infer_dtype(arr, skipna=True) == "period"
|
||
|
|
||
|
arr = np.array([na_value, Period("2011-01", freq="D"), na_value])
|
||
|
assert lib.infer_dtype(arr, skipna=True) == "period"
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"data",
|
||
|
[
|
||
|
[datetime(2017, 6, 12, 19, 30), datetime(2017, 3, 11, 1, 15)],
|
||
|
[Timestamp("20170612"), Timestamp("20170311")],
|
||
|
[
|
||
|
Timestamp("20170612", tz="US/Eastern"),
|
||
|
Timestamp("20170311", tz="US/Eastern"),
|
||
|
],
|
||
|
[date(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")],
|
||
|
[np.datetime64("2017-06-12"), np.datetime64("2017-03-11")],
|
||
|
[np.datetime64("2017-06-12"), datetime(2017, 3, 11, 1, 15)],
|
||
|
],
|
||
|
)
|
||
|
def test_infer_datetimelike_array_datetime(self, data):
|
||
|
assert lib.infer_datetimelike_array(data) == "datetime"
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"data",
|
||
|
[
|
||
|
[timedelta(2017, 6, 12), timedelta(2017, 3, 11)],
|
||
|
[timedelta(2017, 6, 12), date(2017, 3, 11)],
|
||
|
[np.timedelta64(2017, "D"), np.timedelta64(6, "s")],
|
||
|
[np.timedelta64(2017, "D"), timedelta(2017, 3, 11)],
|
||
|
],
|
||
|
)
|
||
|
def test_infer_datetimelike_array_timedelta(self, data):
|
||
|
assert lib.infer_datetimelike_array(data) == "timedelta"
|
||
|
|
||
|
def test_infer_datetimelike_array_date(self):
|
||
|
arr = [date(2017, 6, 12), date(2017, 3, 11)]
|
||
|
assert lib.infer_datetimelike_array(arr) == "date"
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"data",
|
||
|
[
|
||
|
["2017-06-12", "2017-03-11"],
|
||
|
[20170612, 20170311],
|
||
|
[20170612.5, 20170311.8],
|
||
|
[Dummy(), Dummy()],
|
||
|
[Timestamp("20170612"), Timestamp("20170311", tz="US/Eastern")],
|
||
|
[Timestamp("20170612"), 20170311],
|
||
|
[timedelta(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")],
|
||
|
],
|
||
|
)
|
||
|
def test_infer_datetimelike_array_mixed(self, data):
|
||
|
assert lib.infer_datetimelike_array(data) == "mixed"
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"first, expected",
|
||
|
[
|
||
|
[[None], "mixed"],
|
||
|
[[np.nan], "mixed"],
|
||
|
[[pd.NaT], "nat"],
|
||
|
[[datetime(2017, 6, 12, 19, 30), pd.NaT], "datetime"],
|
||
|
[[np.datetime64("2017-06-12"), pd.NaT], "datetime"],
|
||
|
[[date(2017, 6, 12), pd.NaT], "date"],
|
||
|
[[timedelta(2017, 6, 12), pd.NaT], "timedelta"],
|
||
|
[[np.timedelta64(2017, "D"), pd.NaT], "timedelta"],
|
||
|
],
|
||
|
)
|
||
|
@pytest.mark.parametrize("second", [None, np.nan])
|
||
|
def test_infer_datetimelike_array_nan_nat_like(self, first, second, expected):
|
||
|
first.append(second)
|
||
|
assert lib.infer_datetimelike_array(first) == expected
|
||
|
|
||
|
def test_infer_dtype_all_nan_nat_like(self):
|
||
|
arr = np.array([np.nan, np.nan])
|
||
|
assert lib.infer_dtype(arr, skipna=True) == "floating"
|
||
|
|
||
|
# nan and None mix are result in mixed
|
||
|
arr = np.array([np.nan, np.nan, None])
|
||
|
assert lib.infer_dtype(arr, skipna=True) == "empty"
|
||
|
assert lib.infer_dtype(arr, skipna=False) == "mixed"
|
||
|
|
||
|
arr = np.array([None, np.nan, np.nan])
|
||
|
assert lib.infer_dtype(arr, skipna=True) == "empty"
|
||
|
assert lib.infer_dtype(arr, skipna=False) == "mixed"
|
||
|
|
||
|
# pd.NaT
|
||
|
arr = np.array([pd.NaT])
|
||
|
assert lib.infer_dtype(arr, skipna=False) == "datetime"
|
||
|
|
||
|
arr = np.array([pd.NaT, np.nan])
|
||
|
assert lib.infer_dtype(arr, skipna=False) == "datetime"
|
||
|
|
||
|
arr = np.array([np.nan, pd.NaT])
|
||
|
assert lib.infer_dtype(arr, skipna=False) == "datetime"
|
||
|
|
||
|
arr = np.array([np.nan, pd.NaT, np.nan])
|
||
|
assert lib.infer_dtype(arr, skipna=False) == "datetime"
|
||
|
|
||
|
arr = np.array([None, pd.NaT, None])
|
||
|
assert lib.infer_dtype(arr, skipna=False) == "datetime"
|
||
|
|
||
|
# np.datetime64(nat)
|
||
|
arr = np.array([np.datetime64("nat")])
|
||
|
assert lib.infer_dtype(arr, skipna=False) == "datetime64"
|
||
|
|
||
|
for n in [np.nan, pd.NaT, None]:
|
||
|
arr = np.array([n, np.datetime64("nat"), n])
|
||
|
assert lib.infer_dtype(arr, skipna=False) == "datetime64"
|
||
|
|
||
|
arr = np.array([pd.NaT, n, np.datetime64("nat"), n])
|
||
|
assert lib.infer_dtype(arr, skipna=False) == "datetime64"
|
||
|
|
||
|
arr = np.array([np.timedelta64("nat")], dtype=object)
|
||
|
assert lib.infer_dtype(arr, skipna=False) == "timedelta"
|
||
|
|
||
|
for n in [np.nan, pd.NaT, None]:
|
||
|
arr = np.array([n, np.timedelta64("nat"), n])
|
||
|
assert lib.infer_dtype(arr, skipna=False) == "timedelta"
|
||
|
|
||
|
arr = np.array([pd.NaT, n, np.timedelta64("nat"), n])
|
||
|
assert lib.infer_dtype(arr, skipna=False) == "timedelta"
|
||
|
|
||
|
# datetime / timedelta mixed
|
||
|
arr = np.array([pd.NaT, np.datetime64("nat"), np.timedelta64("nat"), np.nan])
|
||
|
assert lib.infer_dtype(arr, skipna=False) == "mixed"
|
||
|
|
||
|
arr = np.array([np.timedelta64("nat"), np.datetime64("nat")], dtype=object)
|
||
|
assert lib.infer_dtype(arr, skipna=False) == "mixed"
|
||
|
|
||
|
def test_is_datetimelike_array_all_nan_nat_like(self):
|
||
|
arr = np.array([np.nan, pd.NaT, np.datetime64("nat")])
|
||
|
assert lib.is_datetime_array(arr)
|
||
|
assert lib.is_datetime64_array(arr)
|
||
|
assert not lib.is_timedelta_or_timedelta64_array(arr)
|
||
|
|
||
|
arr = np.array([np.nan, pd.NaT, np.timedelta64("nat")])
|
||
|
assert not lib.is_datetime_array(arr)
|
||
|
assert not lib.is_datetime64_array(arr)
|
||
|
assert lib.is_timedelta_or_timedelta64_array(arr)
|
||
|
|
||
|
arr = np.array([np.nan, pd.NaT, np.datetime64("nat"), np.timedelta64("nat")])
|
||
|
assert not lib.is_datetime_array(arr)
|
||
|
assert not lib.is_datetime64_array(arr)
|
||
|
assert not lib.is_timedelta_or_timedelta64_array(arr)
|
||
|
|
||
|
arr = np.array([np.nan, pd.NaT])
|
||
|
assert lib.is_datetime_array(arr)
|
||
|
assert lib.is_datetime64_array(arr)
|
||
|
assert lib.is_timedelta_or_timedelta64_array(arr)
|
||
|
|
||
|
arr = np.array([np.nan, np.nan], dtype=object)
|
||
|
assert not lib.is_datetime_array(arr)
|
||
|
assert not lib.is_datetime64_array(arr)
|
||
|
assert not lib.is_timedelta_or_timedelta64_array(arr)
|
||
|
|
||
|
assert lib.is_datetime_with_singletz_array(
|
||
|
np.array(
|
||
|
[
|
||
|
Timestamp("20130101", tz="US/Eastern"),
|
||
|
Timestamp("20130102", tz="US/Eastern"),
|
||
|
],
|
||
|
dtype=object,
|
||
|
)
|
||
|
)
|
||
|
assert not lib.is_datetime_with_singletz_array(
|
||
|
np.array(
|
||
|
[
|
||
|
Timestamp("20130101", tz="US/Eastern"),
|
||
|
Timestamp("20130102", tz="CET"),
|
||
|
],
|
||
|
dtype=object,
|
||
|
)
|
||
|
)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"func",
|
||
|
[
|
||
|
"is_datetime_array",
|
||
|
"is_datetime64_array",
|
||
|
"is_bool_array",
|
||
|
"is_timedelta_or_timedelta64_array",
|
||
|
"is_date_array",
|
||
|
"is_time_array",
|
||
|
"is_interval_array",
|
||
|
"is_period_array",
|
||
|
],
|
||
|
)
|
||
|
def test_other_dtypes_for_array(self, func):
|
||
|
func = getattr(lib, func)
|
||
|
arr = np.array(["foo", "bar"])
|
||
|
assert not func(arr)
|
||
|
|
||
|
arr = np.array([1, 2])
|
||
|
assert not func(arr)
|
||
|
|
||
|
def test_date(self):
|
||
|
|
||
|
dates = [date(2012, 1, day) for day in range(1, 20)]
|
||
|
index = Index(dates)
|
||
|
assert index.inferred_type == "date"
|
||
|
|
||
|
dates = [date(2012, 1, day) for day in range(1, 20)] + [np.nan]
|
||
|
result = lib.infer_dtype(dates, skipna=False)
|
||
|
assert result == "mixed"
|
||
|
|
||
|
result = lib.infer_dtype(dates, skipna=True)
|
||
|
assert result == "date"
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"values",
|
||
|
[
|
||
|
[date(2020, 1, 1), Timestamp("2020-01-01")],
|
||
|
[Timestamp("2020-01-01"), date(2020, 1, 1)],
|
||
|
[date(2020, 1, 1), pd.NaT],
|
||
|
[pd.NaT, date(2020, 1, 1)],
|
||
|
],
|
||
|
)
|
||
|
@pytest.mark.parametrize("skipna", [True, False])
|
||
|
def test_infer_dtype_date_order_invariant(self, values, skipna):
|
||
|
# https://github.com/pandas-dev/pandas/issues/33741
|
||
|
result = lib.infer_dtype(values, skipna=skipna)
|
||
|
assert result == "date"
|
||
|
|
||
|
def test_is_numeric_array(self):
|
||
|
|
||
|
assert lib.is_float_array(np.array([1, 2.0]))
|
||
|
assert lib.is_float_array(np.array([1, 2.0, np.nan]))
|
||
|
assert not lib.is_float_array(np.array([1, 2]))
|
||
|
|
||
|
assert lib.is_integer_array(np.array([1, 2]))
|
||
|
assert not lib.is_integer_array(np.array([1, 2.0]))
|
||
|
|
||
|
def test_is_string_array(self):
|
||
|
|
||
|
assert lib.is_string_array(np.array(["foo", "bar"]))
|
||
|
assert not lib.is_string_array(
|
||
|
np.array(["foo", "bar", pd.NA], dtype=object), skipna=False
|
||
|
)
|
||
|
assert lib.is_string_array(
|
||
|
np.array(["foo", "bar", pd.NA], dtype=object), skipna=True
|
||
|
)
|
||
|
# NaN is not valid for string array, just NA
|
||
|
assert not lib.is_string_array(
|
||
|
np.array(["foo", "bar", np.nan], dtype=object), skipna=True
|
||
|
)
|
||
|
|
||
|
assert not lib.is_string_array(np.array([1, 2]))
|
||
|
|
||
|
def test_to_object_array_tuples(self):
|
||
|
r = (5, 6)
|
||
|
values = [r]
|
||
|
lib.to_object_array_tuples(values)
|
||
|
|
||
|
# make sure record array works
|
||
|
record = namedtuple("record", "x y")
|
||
|
r = record(5, 6)
|
||
|
values = [r]
|
||
|
lib.to_object_array_tuples(values)
|
||
|
|
||
|
def test_object(self):
|
||
|
|
||
|
# GH 7431
|
||
|
# cannot infer more than this as only a single element
|
||
|
arr = np.array([None], dtype="O")
|
||
|
result = lib.infer_dtype(arr, skipna=False)
|
||
|
assert result == "mixed"
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "empty"
|
||
|
|
||
|
def test_to_object_array_width(self):
|
||
|
# see gh-13320
|
||
|
rows = [[1, 2, 3], [4, 5, 6]]
|
||
|
|
||
|
expected = np.array(rows, dtype=object)
|
||
|
out = lib.to_object_array(rows)
|
||
|
tm.assert_numpy_array_equal(out, expected)
|
||
|
|
||
|
expected = np.array(rows, dtype=object)
|
||
|
out = lib.to_object_array(rows, min_width=1)
|
||
|
tm.assert_numpy_array_equal(out, expected)
|
||
|
|
||
|
expected = np.array(
|
||
|
[[1, 2, 3, None, None], [4, 5, 6, None, None]], dtype=object
|
||
|
)
|
||
|
out = lib.to_object_array(rows, min_width=5)
|
||
|
tm.assert_numpy_array_equal(out, expected)
|
||
|
|
||
|
def test_is_period(self):
|
||
|
assert lib.is_period(Period("2011-01", freq="M"))
|
||
|
assert not lib.is_period(PeriodIndex(["2011-01"], freq="M"))
|
||
|
assert not lib.is_period(Timestamp("2011-01"))
|
||
|
assert not lib.is_period(1)
|
||
|
assert not lib.is_period(np.nan)
|
||
|
|
||
|
def test_categorical(self):
|
||
|
|
||
|
# GH 8974
|
||
|
arr = Categorical(list("abc"))
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "categorical"
|
||
|
|
||
|
result = lib.infer_dtype(Series(arr), skipna=True)
|
||
|
assert result == "categorical"
|
||
|
|
||
|
arr = Categorical(list("abc"), categories=["cegfab"], ordered=True)
|
||
|
result = lib.infer_dtype(arr, skipna=True)
|
||
|
assert result == "categorical"
|
||
|
|
||
|
result = lib.infer_dtype(Series(arr), skipna=True)
|
||
|
assert result == "categorical"
|
||
|
|
||
|
def test_interval(self):
|
||
|
idx = pd.IntervalIndex.from_breaks(range(5), closed="both")
|
||
|
inferred = lib.infer_dtype(idx, skipna=False)
|
||
|
assert inferred == "interval"
|
||
|
|
||
|
inferred = lib.infer_dtype(idx._data, skipna=False)
|
||
|
assert inferred == "interval"
|
||
|
|
||
|
inferred = lib.infer_dtype(Series(idx), skipna=False)
|
||
|
assert inferred == "interval"
|
||
|
|
||
|
@pytest.mark.parametrize("klass", [pd.array, pd.Series])
|
||
|
@pytest.mark.parametrize("skipna", [True, False])
|
||
|
@pytest.mark.parametrize("data", [["a", "b", "c"], ["a", "b", pd.NA]])
|
||
|
def test_string_dtype(self, data, skipna, klass):
|
||
|
# StringArray
|
||
|
val = klass(data, dtype="string")
|
||
|
inferred = lib.infer_dtype(val, skipna=skipna)
|
||
|
assert inferred == "string"
|
||
|
|
||
|
@pytest.mark.parametrize("klass", [pd.array, pd.Series])
|
||
|
@pytest.mark.parametrize("skipna", [True, False])
|
||
|
@pytest.mark.parametrize("data", [[True, False, True], [True, False, pd.NA]])
|
||
|
def test_boolean_dtype(self, data, skipna, klass):
|
||
|
# BooleanArray
|
||
|
val = klass(data, dtype="boolean")
|
||
|
inferred = lib.infer_dtype(val, skipna=skipna)
|
||
|
assert inferred == "boolean"
|
||
|
|
||
|
|
||
|
class TestNumberScalar:
|
||
|
def test_is_number(self):
|
||
|
|
||
|
assert is_number(True)
|
||
|
assert is_number(1)
|
||
|
assert is_number(1.1)
|
||
|
assert is_number(1 + 3j)
|
||
|
assert is_number(np.int64(1))
|
||
|
assert is_number(np.float64(1.1))
|
||
|
assert is_number(np.complex128(1 + 3j))
|
||
|
assert is_number(np.nan)
|
||
|
|
||
|
assert not is_number(None)
|
||
|
assert not is_number("x")
|
||
|
assert not is_number(datetime(2011, 1, 1))
|
||
|
assert not is_number(np.datetime64("2011-01-01"))
|
||
|
assert not is_number(Timestamp("2011-01-01"))
|
||
|
assert not is_number(Timestamp("2011-01-01", tz="US/Eastern"))
|
||
|
assert not is_number(timedelta(1000))
|
||
|
assert not is_number(Timedelta("1 days"))
|
||
|
|
||
|
# questionable
|
||
|
assert not is_number(np.bool_(False))
|
||
|
assert is_number(np.timedelta64(1, "D"))
|
||
|
|
||
|
def test_is_bool(self):
|
||
|
assert is_bool(True)
|
||
|
assert is_bool(False)
|
||
|
assert is_bool(np.bool_(False))
|
||
|
|
||
|
assert not is_bool(1)
|
||
|
assert not is_bool(1.1)
|
||
|
assert not is_bool(1 + 3j)
|
||
|
assert not is_bool(np.int64(1))
|
||
|
assert not is_bool(np.float64(1.1))
|
||
|
assert not is_bool(np.complex128(1 + 3j))
|
||
|
assert not is_bool(np.nan)
|
||
|
assert not is_bool(None)
|
||
|
assert not is_bool("x")
|
||
|
assert not is_bool(datetime(2011, 1, 1))
|
||
|
assert not is_bool(np.datetime64("2011-01-01"))
|
||
|
assert not is_bool(Timestamp("2011-01-01"))
|
||
|
assert not is_bool(Timestamp("2011-01-01", tz="US/Eastern"))
|
||
|
assert not is_bool(timedelta(1000))
|
||
|
assert not is_bool(np.timedelta64(1, "D"))
|
||
|
assert not is_bool(Timedelta("1 days"))
|
||
|
|
||
|
def test_is_integer(self):
|
||
|
assert is_integer(1)
|
||
|
assert is_integer(np.int64(1))
|
||
|
|
||
|
assert not is_integer(True)
|
||
|
assert not is_integer(1.1)
|
||
|
assert not is_integer(1 + 3j)
|
||
|
assert not is_integer(False)
|
||
|
assert not is_integer(np.bool_(False))
|
||
|
assert not is_integer(np.float64(1.1))
|
||
|
assert not is_integer(np.complex128(1 + 3j))
|
||
|
assert not is_integer(np.nan)
|
||
|
assert not is_integer(None)
|
||
|
assert not is_integer("x")
|
||
|
assert not is_integer(datetime(2011, 1, 1))
|
||
|
assert not is_integer(np.datetime64("2011-01-01"))
|
||
|
assert not is_integer(Timestamp("2011-01-01"))
|
||
|
assert not is_integer(Timestamp("2011-01-01", tz="US/Eastern"))
|
||
|
assert not is_integer(timedelta(1000))
|
||
|
assert not is_integer(Timedelta("1 days"))
|
||
|
assert not is_integer(np.timedelta64(1, "D"))
|
||
|
|
||
|
def test_is_float(self):
|
||
|
assert is_float(1.1)
|
||
|
assert is_float(np.float64(1.1))
|
||
|
assert is_float(np.nan)
|
||
|
|
||
|
assert not is_float(True)
|
||
|
assert not is_float(1)
|
||
|
assert not is_float(1 + 3j)
|
||
|
assert not is_float(False)
|
||
|
assert not is_float(np.bool_(False))
|
||
|
assert not is_float(np.int64(1))
|
||
|
assert not is_float(np.complex128(1 + 3j))
|
||
|
assert not is_float(None)
|
||
|
assert not is_float("x")
|
||
|
assert not is_float(datetime(2011, 1, 1))
|
||
|
assert not is_float(np.datetime64("2011-01-01"))
|
||
|
assert not is_float(Timestamp("2011-01-01"))
|
||
|
assert not is_float(Timestamp("2011-01-01", tz="US/Eastern"))
|
||
|
assert not is_float(timedelta(1000))
|
||
|
assert not is_float(np.timedelta64(1, "D"))
|
||
|
assert not is_float(Timedelta("1 days"))
|
||
|
|
||
|
def test_is_datetime_dtypes(self):
|
||
|
|
||
|
ts = pd.date_range("20130101", periods=3)
|
||
|
tsa = pd.date_range("20130101", periods=3, tz="US/Eastern")
|
||
|
|
||
|
assert is_datetime64_dtype("datetime64")
|
||
|
assert is_datetime64_dtype("datetime64[ns]")
|
||
|
assert is_datetime64_dtype(ts)
|
||
|
assert not is_datetime64_dtype(tsa)
|
||
|
|
||
|
assert not is_datetime64_ns_dtype("datetime64")
|
||
|
assert is_datetime64_ns_dtype("datetime64[ns]")
|
||
|
assert is_datetime64_ns_dtype(ts)
|
||
|
assert is_datetime64_ns_dtype(tsa)
|
||
|
|
||
|
assert is_datetime64_any_dtype("datetime64")
|
||
|
assert is_datetime64_any_dtype("datetime64[ns]")
|
||
|
assert is_datetime64_any_dtype(ts)
|
||
|
assert is_datetime64_any_dtype(tsa)
|
||
|
|
||
|
assert not is_datetime64tz_dtype("datetime64")
|
||
|
assert not is_datetime64tz_dtype("datetime64[ns]")
|
||
|
assert not is_datetime64tz_dtype(ts)
|
||
|
assert is_datetime64tz_dtype(tsa)
|
||
|
|
||
|
for tz in ["US/Eastern", "UTC"]:
|
||
|
dtype = f"datetime64[ns, {tz}]"
|
||
|
assert not is_datetime64_dtype(dtype)
|
||
|
assert is_datetime64tz_dtype(dtype)
|
||
|
assert is_datetime64_ns_dtype(dtype)
|
||
|
assert is_datetime64_any_dtype(dtype)
|
||
|
|
||
|
def test_is_timedelta(self):
|
||
|
assert is_timedelta64_dtype("timedelta64")
|
||
|
assert is_timedelta64_dtype("timedelta64[ns]")
|
||
|
assert not is_timedelta64_ns_dtype("timedelta64")
|
||
|
assert is_timedelta64_ns_dtype("timedelta64[ns]")
|
||
|
|
||
|
tdi = TimedeltaIndex([1e14, 2e14], dtype="timedelta64[ns]")
|
||
|
assert is_timedelta64_dtype(tdi)
|
||
|
assert is_timedelta64_ns_dtype(tdi)
|
||
|
assert is_timedelta64_ns_dtype(tdi.astype("timedelta64[ns]"))
|
||
|
|
||
|
# Conversion to Int64Index:
|
||
|
assert not is_timedelta64_ns_dtype(tdi.astype("timedelta64"))
|
||
|
assert not is_timedelta64_ns_dtype(tdi.astype("timedelta64[h]"))
|
||
|
|
||
|
|
||
|
class TestIsScalar:
|
||
|
def test_is_scalar_builtin_scalars(self):
|
||
|
assert is_scalar(None)
|
||
|
assert is_scalar(True)
|
||
|
assert is_scalar(False)
|
||
|
assert is_scalar(Fraction())
|
||
|
assert is_scalar(0.0)
|
||
|
assert is_scalar(1)
|
||
|
assert is_scalar(complex(2))
|
||
|
assert is_scalar(float("NaN"))
|
||
|
assert is_scalar(np.nan)
|
||
|
assert is_scalar("foobar")
|
||
|
assert is_scalar(b"foobar")
|
||
|
assert is_scalar(datetime(2014, 1, 1))
|
||
|
assert is_scalar(date(2014, 1, 1))
|
||
|
assert is_scalar(time(12, 0))
|
||
|
assert is_scalar(timedelta(hours=1))
|
||
|
assert is_scalar(pd.NaT)
|
||
|
assert is_scalar(pd.NA)
|
||
|
|
||
|
def test_is_scalar_builtin_nonscalars(self):
|
||
|
assert not is_scalar({})
|
||
|
assert not is_scalar([])
|
||
|
assert not is_scalar([1])
|
||
|
assert not is_scalar(())
|
||
|
assert not is_scalar((1,))
|
||
|
assert not is_scalar(slice(None))
|
||
|
assert not is_scalar(Ellipsis)
|
||
|
|
||
|
def test_is_scalar_numpy_array_scalars(self):
|
||
|
assert is_scalar(np.int64(1))
|
||
|
assert is_scalar(np.float64(1.0))
|
||
|
assert is_scalar(np.int32(1))
|
||
|
assert is_scalar(np.complex64(2))
|
||
|
assert is_scalar(np.object_("foobar"))
|
||
|
assert is_scalar(np.str_("foobar"))
|
||
|
assert is_scalar(np.unicode_("foobar"))
|
||
|
assert is_scalar(np.bytes_(b"foobar"))
|
||
|
assert is_scalar(np.datetime64("2014-01-01"))
|
||
|
assert is_scalar(np.timedelta64(1, "h"))
|
||
|
|
||
|
def test_is_scalar_numpy_zerodim_arrays(self):
|
||
|
for zerodim in [
|
||
|
np.array(1),
|
||
|
np.array("foobar"),
|
||
|
np.array(np.datetime64("2014-01-01")),
|
||
|
np.array(np.timedelta64(1, "h")),
|
||
|
np.array(np.datetime64("NaT")),
|
||
|
]:
|
||
|
assert not is_scalar(zerodim)
|
||
|
assert is_scalar(lib.item_from_zerodim(zerodim))
|
||
|
|
||
|
@pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
|
||
|
def test_is_scalar_numpy_arrays(self):
|
||
|
assert not is_scalar(np.array([]))
|
||
|
assert not is_scalar(np.array([[]]))
|
||
|
assert not is_scalar(np.matrix("1; 2"))
|
||
|
|
||
|
def test_is_scalar_pandas_scalars(self):
|
||
|
assert is_scalar(Timestamp("2014-01-01"))
|
||
|
assert is_scalar(Timedelta(hours=1))
|
||
|
assert is_scalar(Period("2014-01-01"))
|
||
|
assert is_scalar(Interval(left=0, right=1))
|
||
|
assert is_scalar(DateOffset(days=1))
|
||
|
assert is_scalar(pd.offsets.Minute(3))
|
||
|
|
||
|
def test_is_scalar_pandas_containers(self):
|
||
|
assert not is_scalar(Series(dtype=object))
|
||
|
assert not is_scalar(Series([1]))
|
||
|
assert not is_scalar(DataFrame())
|
||
|
assert not is_scalar(DataFrame([[1]]))
|
||
|
assert not is_scalar(Index([]))
|
||
|
assert not is_scalar(Index([1]))
|
||
|
assert not is_scalar(Categorical([]))
|
||
|
assert not is_scalar(DatetimeIndex([])._data)
|
||
|
assert not is_scalar(TimedeltaIndex([])._data)
|
||
|
assert not is_scalar(DatetimeIndex([])._data.to_period("D"))
|
||
|
assert not is_scalar(pd.array([1, 2, 3]))
|
||
|
|
||
|
def test_is_scalar_number(self):
|
||
|
# Number() is not recognied by PyNumber_Check, so by extension
|
||
|
# is not recognized by is_scalar, but instances of non-abstract
|
||
|
# subclasses are.
|
||
|
|
||
|
class Numeric(Number):
|
||
|
def __init__(self, value):
|
||
|
self.value = value
|
||
|
|
||
|
def __int__(self):
|
||
|
return self.value
|
||
|
|
||
|
num = Numeric(1)
|
||
|
assert is_scalar(num)
|
||
|
|
||
|
|
||
|
def test_datetimeindex_from_empty_datetime64_array():
|
||
|
for unit in ["ms", "us", "ns"]:
|
||
|
idx = DatetimeIndex(np.array([], dtype=f"datetime64[{unit}]"))
|
||
|
assert len(idx) == 0
|
||
|
|
||
|
|
||
|
def test_nan_to_nat_conversions():
|
||
|
|
||
|
df = DataFrame(
|
||
|
{"A": np.asarray(range(10), dtype="float64"), "B": Timestamp("20010101")}
|
||
|
)
|
||
|
df.iloc[3:6, :] = np.nan
|
||
|
result = df.loc[4, "B"]
|
||
|
assert result is pd.NaT
|
||
|
|
||
|
s = df["B"].copy()
|
||
|
s[8:9] = np.nan
|
||
|
assert s[8] is pd.NaT
|
||
|
|
||
|
|
||
|
@td.skip_if_no_scipy
|
||
|
@pytest.mark.filterwarnings("ignore::PendingDeprecationWarning")
|
||
|
def test_is_scipy_sparse(spmatrix):
|
||
|
assert is_scipy_sparse(spmatrix([[0, 1]]))
|
||
|
assert not is_scipy_sparse(np.array([1]))
|
||
|
|
||
|
|
||
|
def test_ensure_int32():
|
||
|
values = np.arange(10, dtype=np.int32)
|
||
|
result = ensure_int32(values)
|
||
|
assert result.dtype == np.int32
|
||
|
|
||
|
values = np.arange(10, dtype=np.int64)
|
||
|
result = ensure_int32(values)
|
||
|
assert result.dtype == np.int32
|