projektAI/venv/Lib/site-packages/pandas/tests/series/methods/test_convert_dtypes.py
2021-06-06 22:13:05 +02:00

208 lines
5.7 KiB
Python

from itertools import product
import numpy as np
import pytest
from pandas.core.dtypes.common import is_interval_dtype
import pandas as pd
import pandas._testing as tm
# Each test case consists of a tuple with the data and dtype to create the
# test Series, the default dtype for the expected result (which is valid
# for most cases), and the specific cases where the result deviates from
# this default. Those overrides are defined as a dict with (keyword, val) as
# dictionary key. In case of multiple items, the last override takes precendence.
test_cases = [
(
# data
[1, 2, 3],
# original dtype
np.dtype("int32"),
# default expected dtype
"Int32",
# exceptions on expected dtype
{("convert_integer", False): np.dtype("int32")},
),
(
[1, 2, 3],
np.dtype("int64"),
"Int64",
{("convert_integer", False): np.dtype("int64")},
),
(
["x", "y", "z"],
np.dtype("O"),
pd.StringDtype(),
{("convert_string", False): np.dtype("O")},
),
(
[True, False, np.nan],
np.dtype("O"),
pd.BooleanDtype(),
{("convert_boolean", False): np.dtype("O")},
),
(
["h", "i", np.nan],
np.dtype("O"),
pd.StringDtype(),
{("convert_string", False): np.dtype("O")},
),
( # GH32117
["h", "i", 1],
np.dtype("O"),
np.dtype("O"),
{},
),
(
[10, np.nan, 20],
np.dtype("float"),
"Int64",
{
("convert_integer", False, "convert_floating", True): "Float64",
("convert_integer", False, "convert_floating", False): np.dtype("float"),
},
),
(
[np.nan, 100.5, 200],
np.dtype("float"),
"Float64",
{("convert_floating", False): np.dtype("float")},
),
(
[3, 4, 5],
"Int8",
"Int8",
{},
),
(
[[1, 2], [3, 4], [5]],
None,
np.dtype("O"),
{},
),
(
[4, 5, 6],
np.dtype("uint32"),
"UInt32",
{("convert_integer", False): np.dtype("uint32")},
),
(
[-10, 12, 13],
np.dtype("i1"),
"Int8",
{("convert_integer", False): np.dtype("i1")},
),
(
[1.2, 1.3],
np.dtype("float32"),
"Float32",
{("convert_floating", False): np.dtype("float32")},
),
(
[1, 2.0],
object,
"Int64",
{
("convert_integer", False): "Float64",
("convert_integer", False, "convert_floating", False): np.dtype("float"),
("infer_objects", False): np.dtype("object"),
},
),
(
[1, 2.5],
object,
"Float64",
{
("convert_floating", False): np.dtype("float"),
("infer_objects", False): np.dtype("object"),
},
),
(["a", "b"], pd.CategoricalDtype(), pd.CategoricalDtype(), {}),
(
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]),
pd.DatetimeTZDtype(tz="UTC"),
pd.DatetimeTZDtype(tz="UTC"),
{},
),
(
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]),
"datetime64[ns]",
np.dtype("datetime64[ns]"),
{},
),
(
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]),
object,
np.dtype("datetime64[ns]"),
{("infer_objects", False): np.dtype("object")},
),
(pd.period_range("1/1/2011", freq="M", periods=3), None, pd.PeriodDtype("M"), {}),
(
pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]),
None,
pd.IntervalDtype("int64"),
{},
),
]
class TestSeriesConvertDtypes:
@pytest.mark.parametrize(
"data, maindtype, expected_default, expected_other",
test_cases,
)
@pytest.mark.parametrize("params", product(*[(True, False)] * 5))
def test_convert_dtypes(
self, data, maindtype, params, expected_default, expected_other
):
if maindtype is not None:
series = pd.Series(data, dtype=maindtype)
else:
series = pd.Series(data)
result = series.convert_dtypes(*params)
param_names = [
"infer_objects",
"convert_string",
"convert_integer",
"convert_boolean",
"convert_floating",
]
params_dict = dict(zip(param_names, params))
expected_dtype = expected_default
for spec, dtype in expected_other.items():
if all(params_dict[key] is val for key, val in zip(spec[::2], spec[1::2])):
expected_dtype = dtype
expected = pd.Series(data, dtype=expected_dtype)
tm.assert_series_equal(result, expected)
# Test that it is a copy
copy = series.copy(deep=True)
if is_interval_dtype(result.dtype) and result.dtype.subtype.kind in ["i", "u"]:
msg = "Cannot set float NaN to integer-backed IntervalArray"
with pytest.raises(ValueError, match=msg):
result[result.notna()] = np.nan
else:
result[result.notna()] = np.nan
# Make sure original not changed
tm.assert_series_equal(series, copy)
def test_convert_string_dtype(self):
# https://github.com/pandas-dev/pandas/issues/31731 -> converting columns
# that are already string dtype
df = pd.DataFrame(
{"A": ["a", "b", pd.NA], "B": ["ä", "ö", "ü"]}, dtype="string"
)
result = df.convert_dtypes()
tm.assert_frame_equal(df, result)
def test_convert_bool_dtype(self):
# GH32287
df = pd.DataFrame({"A": pd.array([True])})
tm.assert_frame_equal(df, df.convert_dtypes())