183 lines
7.2 KiB
Python
183 lines
7.2 KiB
Python
![]() |
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||
|
|
||
|
from pandas import Categorical, CategoricalIndex, Index, Series, Timestamp
|
||
|
import pandas._testing as tm
|
||
|
|
||
|
|
||
|
class TestCategoricalDtypes:
|
||
|
def test_is_dtype_equal_deprecated(self):
|
||
|
# GH#37545
|
||
|
c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False)
|
||
|
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
c1.is_dtype_equal(c1)
|
||
|
|
||
|
def test_categories_match_up_to_permutation(self):
|
||
|
|
||
|
# test dtype comparisons between cats
|
||
|
|
||
|
c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False)
|
||
|
c2 = Categorical(list("aabca"), categories=list("cab"), ordered=False)
|
||
|
c3 = Categorical(list("aabca"), categories=list("cab"), ordered=True)
|
||
|
assert c1._categories_match_up_to_permutation(c1)
|
||
|
assert c2._categories_match_up_to_permutation(c2)
|
||
|
assert c3._categories_match_up_to_permutation(c3)
|
||
|
assert c1._categories_match_up_to_permutation(c2)
|
||
|
assert not c1._categories_match_up_to_permutation(c3)
|
||
|
assert not c1._categories_match_up_to_permutation(Index(list("aabca")))
|
||
|
assert not c1._categories_match_up_to_permutation(c1.astype(object))
|
||
|
assert c1._categories_match_up_to_permutation(CategoricalIndex(c1))
|
||
|
assert c1._categories_match_up_to_permutation(
|
||
|
CategoricalIndex(c1, categories=list("cab"))
|
||
|
)
|
||
|
assert not c1._categories_match_up_to_permutation(
|
||
|
CategoricalIndex(c1, ordered=True)
|
||
|
)
|
||
|
|
||
|
# GH 16659
|
||
|
s1 = Series(c1)
|
||
|
s2 = Series(c2)
|
||
|
s3 = Series(c3)
|
||
|
assert c1._categories_match_up_to_permutation(s1)
|
||
|
assert c2._categories_match_up_to_permutation(s2)
|
||
|
assert c3._categories_match_up_to_permutation(s3)
|
||
|
assert c1._categories_match_up_to_permutation(s2)
|
||
|
assert not c1._categories_match_up_to_permutation(s3)
|
||
|
assert not c1._categories_match_up_to_permutation(s1.astype(object))
|
||
|
|
||
|
def test_set_dtype_same(self):
|
||
|
c = Categorical(["a", "b", "c"])
|
||
|
result = c._set_dtype(CategoricalDtype(["a", "b", "c"]))
|
||
|
tm.assert_categorical_equal(result, c)
|
||
|
|
||
|
def test_set_dtype_new_categories(self):
|
||
|
c = Categorical(["a", "b", "c"])
|
||
|
result = c._set_dtype(CategoricalDtype(list("abcd")))
|
||
|
tm.assert_numpy_array_equal(result.codes, c.codes)
|
||
|
tm.assert_index_equal(result.dtype.categories, Index(list("abcd")))
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"values, categories, new_categories",
|
||
|
[
|
||
|
# No NaNs, same cats, same order
|
||
|
(["a", "b", "a"], ["a", "b"], ["a", "b"]),
|
||
|
# No NaNs, same cats, different order
|
||
|
(["a", "b", "a"], ["a", "b"], ["b", "a"]),
|
||
|
# Same, unsorted
|
||
|
(["b", "a", "a"], ["a", "b"], ["a", "b"]),
|
||
|
# No NaNs, same cats, different order
|
||
|
(["b", "a", "a"], ["a", "b"], ["b", "a"]),
|
||
|
# NaNs
|
||
|
(["a", "b", "c"], ["a", "b"], ["a", "b"]),
|
||
|
(["a", "b", "c"], ["a", "b"], ["b", "a"]),
|
||
|
(["b", "a", "c"], ["a", "b"], ["a", "b"]),
|
||
|
(["b", "a", "c"], ["a", "b"], ["a", "b"]),
|
||
|
# Introduce NaNs
|
||
|
(["a", "b", "c"], ["a", "b"], ["a"]),
|
||
|
(["a", "b", "c"], ["a", "b"], ["b"]),
|
||
|
(["b", "a", "c"], ["a", "b"], ["a"]),
|
||
|
(["b", "a", "c"], ["a", "b"], ["a"]),
|
||
|
# No overlap
|
||
|
(["a", "b", "c"], ["a", "b"], ["d", "e"]),
|
||
|
],
|
||
|
)
|
||
|
@pytest.mark.parametrize("ordered", [True, False])
|
||
|
def test_set_dtype_many(self, values, categories, new_categories, ordered):
|
||
|
c = Categorical(values, categories)
|
||
|
expected = Categorical(values, new_categories, ordered)
|
||
|
result = c._set_dtype(expected.dtype)
|
||
|
tm.assert_categorical_equal(result, expected)
|
||
|
|
||
|
def test_set_dtype_no_overlap(self):
|
||
|
c = Categorical(["a", "b", "c"], ["d", "e"])
|
||
|
result = c._set_dtype(CategoricalDtype(["a", "b"]))
|
||
|
expected = Categorical([None, None, None], categories=["a", "b"])
|
||
|
tm.assert_categorical_equal(result, expected)
|
||
|
|
||
|
def test_codes_dtypes(self):
|
||
|
|
||
|
# GH 8453
|
||
|
result = Categorical(["foo", "bar", "baz"])
|
||
|
assert result.codes.dtype == "int8"
|
||
|
|
||
|
result = Categorical([f"foo{i:05d}" for i in range(400)])
|
||
|
assert result.codes.dtype == "int16"
|
||
|
|
||
|
result = Categorical([f"foo{i:05d}" for i in range(40000)])
|
||
|
assert result.codes.dtype == "int32"
|
||
|
|
||
|
# adding cats
|
||
|
result = Categorical(["foo", "bar", "baz"])
|
||
|
assert result.codes.dtype == "int8"
|
||
|
result = result.add_categories([f"foo{i:05d}" for i in range(400)])
|
||
|
assert result.codes.dtype == "int16"
|
||
|
|
||
|
# removing cats
|
||
|
result = result.remove_categories([f"foo{i:05d}" for i in range(300)])
|
||
|
assert result.codes.dtype == "int8"
|
||
|
|
||
|
@pytest.mark.parametrize("ordered", [True, False])
|
||
|
def test_astype(self, ordered):
|
||
|
# string
|
||
|
cat = Categorical(list("abbaaccc"), ordered=ordered)
|
||
|
result = cat.astype(object)
|
||
|
expected = np.array(cat)
|
||
|
tm.assert_numpy_array_equal(result, expected)
|
||
|
|
||
|
msg = r"Cannot cast object dtype to <class 'float'>"
|
||
|
with pytest.raises(ValueError, match=msg):
|
||
|
cat.astype(float)
|
||
|
|
||
|
# numeric
|
||
|
cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered)
|
||
|
result = cat.astype(object)
|
||
|
expected = np.array(cat, dtype=object)
|
||
|
tm.assert_numpy_array_equal(result, expected)
|
||
|
|
||
|
result = cat.astype(int)
|
||
|
expected = np.array(cat, dtype="int")
|
||
|
tm.assert_numpy_array_equal(result, expected)
|
||
|
|
||
|
result = cat.astype(float)
|
||
|
expected = np.array(cat, dtype=float)
|
||
|
tm.assert_numpy_array_equal(result, expected)
|
||
|
|
||
|
@pytest.mark.parametrize("dtype_ordered", [True, False])
|
||
|
@pytest.mark.parametrize("cat_ordered", [True, False])
|
||
|
def test_astype_category(self, dtype_ordered, cat_ordered):
|
||
|
# GH 10696/18593
|
||
|
data = list("abcaacbab")
|
||
|
cat = Categorical(data, categories=list("bac"), ordered=cat_ordered)
|
||
|
|
||
|
# standard categories
|
||
|
dtype = CategoricalDtype(ordered=dtype_ordered)
|
||
|
result = cat.astype(dtype)
|
||
|
expected = Categorical(data, categories=cat.categories, ordered=dtype_ordered)
|
||
|
tm.assert_categorical_equal(result, expected)
|
||
|
|
||
|
# non-standard categories
|
||
|
dtype = CategoricalDtype(list("adc"), dtype_ordered)
|
||
|
result = cat.astype(dtype)
|
||
|
expected = Categorical(data, dtype=dtype)
|
||
|
tm.assert_categorical_equal(result, expected)
|
||
|
|
||
|
if dtype_ordered is False:
|
||
|
# dtype='category' can't specify ordered, so only test once
|
||
|
result = cat.astype("category")
|
||
|
expected = cat
|
||
|
tm.assert_categorical_equal(result, expected)
|
||
|
|
||
|
def test_iter_python_types(self):
|
||
|
# GH-19909
|
||
|
cat = Categorical([1, 2])
|
||
|
assert isinstance(list(cat)[0], int)
|
||
|
assert isinstance(cat.tolist()[0], int)
|
||
|
|
||
|
def test_iter_python_types_datetime(self):
|
||
|
cat = Categorical([Timestamp("2017-01-01"), Timestamp("2017-01-02")])
|
||
|
assert isinstance(list(cat)[0], Timestamp)
|
||
|
assert isinstance(cat.tolist()[0], Timestamp)
|