projektAI/venv/Lib/site-packages/pandas/tests/series/test_dtypes.py

141 lines
5.4 KiB
Python
Raw Normal View History

2021-06-06 22:13:05 +02:00
import numpy as np
import pytest
from pandas.core.dtypes.dtypes import CategoricalDtype
import pandas as pd
from pandas import Categorical, DataFrame, Series
import pandas._testing as tm
class TestSeriesDtypes:
def test_dtype(self, datetime_series):
assert datetime_series.dtype == np.dtype("float64")
assert datetime_series.dtypes == np.dtype("float64")
def test_astype_from_categorical(self):
items = ["a", "b", "c", "a"]
s = Series(items)
exp = Series(Categorical(items))
res = s.astype("category")
tm.assert_series_equal(res, exp)
items = [1, 2, 3, 1]
s = Series(items)
exp = Series(Categorical(items))
res = s.astype("category")
tm.assert_series_equal(res, exp)
df = DataFrame({"cats": [1, 2, 3, 4, 5, 6], "vals": [1, 2, 3, 4, 5, 6]})
cats = Categorical([1, 2, 3, 4, 5, 6])
exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]})
df["cats"] = df["cats"].astype("category")
tm.assert_frame_equal(exp_df, df)
df = DataFrame(
{"cats": ["a", "b", "b", "a", "a", "d"], "vals": [1, 2, 3, 4, 5, 6]}
)
cats = Categorical(["a", "b", "b", "a", "a", "d"])
exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]})
df["cats"] = df["cats"].astype("category")
tm.assert_frame_equal(exp_df, df)
# with keywords
lst = ["a", "b", "c", "a"]
s = Series(lst)
exp = Series(Categorical(lst, ordered=True))
res = s.astype(CategoricalDtype(None, ordered=True))
tm.assert_series_equal(res, exp)
exp = Series(Categorical(lst, categories=list("abcdef"), ordered=True))
res = s.astype(CategoricalDtype(list("abcdef"), ordered=True))
tm.assert_series_equal(res, exp)
def test_astype_categorical_to_other(self):
cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)])
ser = Series(np.random.RandomState(0).randint(0, 10000, 100)).sort_values()
ser = pd.cut(ser, range(0, 10500, 500), right=False, labels=cat)
expected = ser
tm.assert_series_equal(ser.astype("category"), expected)
tm.assert_series_equal(ser.astype(CategoricalDtype()), expected)
msg = r"Cannot cast object dtype to float64"
with pytest.raises(ValueError, match=msg):
ser.astype("float64")
cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]))
exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"])
tm.assert_series_equal(cat.astype("str"), exp)
s2 = Series(Categorical(["1", "2", "3", "4"]))
exp2 = Series([1, 2, 3, 4]).astype("int")
tm.assert_series_equal(s2.astype("int"), exp2)
# object don't sort correctly, so just compare that we have the same
# values
def cmp(a, b):
tm.assert_almost_equal(np.sort(np.unique(a)), np.sort(np.unique(b)))
expected = Series(np.array(ser.values), name="value_group")
cmp(ser.astype("object"), expected)
cmp(ser.astype(np.object_), expected)
# array conversion
tm.assert_almost_equal(np.array(ser), np.array(ser.values))
tm.assert_series_equal(ser.astype("category"), ser)
tm.assert_series_equal(ser.astype(CategoricalDtype()), ser)
roundtrip_expected = ser.cat.set_categories(
ser.cat.categories.sort_values()
).cat.remove_unused_categories()
result = ser.astype("object").astype("category")
tm.assert_series_equal(result, roundtrip_expected)
result = ser.astype("object").astype(CategoricalDtype())
tm.assert_series_equal(result, roundtrip_expected)
def test_astype_categorical_invalid_conversions(self):
# invalid conversion (these are NOT a dtype)
cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)])
ser = Series(np.random.RandomState(0).randint(0, 10000, 100)).sort_values()
ser = pd.cut(ser, range(0, 10500, 500), right=False, labels=cat)
msg = (
"dtype '<class 'pandas.core.arrays.categorical.Categorical'>' "
"not understood"
)
with pytest.raises(TypeError, match=msg):
ser.astype(Categorical)
with pytest.raises(TypeError, match=msg):
ser.astype("object").astype(Categorical)
def test_categorical_astype_to_int(self, any_int_or_nullable_int_dtype):
# GH 39402
df = DataFrame(data={"col1": pd.array([2.0, 1.0, 3.0])})
df.col1 = df.col1.astype("category")
df.col1 = df.col1.astype(any_int_or_nullable_int_dtype)
expected = DataFrame(
{"col1": pd.array([2, 1, 3], dtype=any_int_or_nullable_int_dtype)}
)
tm.assert_frame_equal(df, expected)
def test_series_to_categorical(self):
# see gh-16524: test conversion of Series to Categorical
series = Series(["a", "b", "c"])
result = Series(series, dtype="category")
expected = Series(["a", "b", "c"], dtype="category")
tm.assert_series_equal(result, expected)
def test_reindex_astype_order_consistency(self):
# GH 17444
s = Series([1, 2, 3], index=[2, 0, 1])
new_index = [0, 1, 2]
temp_dtype = "category"
new_dtype = str
s1 = s.reindex(new_index).astype(temp_dtype).astype(new_dtype)
s2 = s.astype(temp_dtype).reindex(new_index).astype(new_dtype)
tm.assert_series_equal(s1, s2)