LSR/env/lib/python3.6/site-packages/pandas/tests/series/test_internals.py
2020-06-04 17:24:47 +02:00

245 lines
8.7 KiB
Python

from datetime import datetime
import numpy as np
import pytest
import pandas as pd
from pandas import NaT, Series, Timestamp
import pandas._testing as tm
from pandas.core.internals.blocks import IntBlock
class TestSeriesInternals:
# GH 10265
def test_convert(self):
# Tests: All to nans, coerce, true
# Test coercion returns correct type
s = Series(["a", "b", "c"])
results = s._convert(datetime=True, coerce=True)
expected = Series([NaT] * 3)
tm.assert_series_equal(results, expected)
results = s._convert(numeric=True, coerce=True)
expected = Series([np.nan] * 3)
tm.assert_series_equal(results, expected)
expected = Series([NaT] * 3, dtype=np.dtype("m8[ns]"))
results = s._convert(timedelta=True, coerce=True)
tm.assert_series_equal(results, expected)
dt = datetime(2001, 1, 1, 0, 0)
td = dt - datetime(2000, 1, 1, 0, 0)
# Test coercion with mixed types
s = Series(["a", "3.1415", dt, td])
results = s._convert(datetime=True, coerce=True)
expected = Series([NaT, NaT, dt, NaT])
tm.assert_series_equal(results, expected)
results = s._convert(numeric=True, coerce=True)
expected = Series([np.nan, 3.1415, np.nan, np.nan])
tm.assert_series_equal(results, expected)
results = s._convert(timedelta=True, coerce=True)
expected = Series([NaT, NaT, NaT, td], dtype=np.dtype("m8[ns]"))
tm.assert_series_equal(results, expected)
# Test standard conversion returns original
results = s._convert(datetime=True)
tm.assert_series_equal(results, s)
results = s._convert(numeric=True)
expected = Series([np.nan, 3.1415, np.nan, np.nan])
tm.assert_series_equal(results, expected)
results = s._convert(timedelta=True)
tm.assert_series_equal(results, s)
# test pass-through and non-conversion when other types selected
s = Series(["1.0", "2.0", "3.0"])
results = s._convert(datetime=True, numeric=True, timedelta=True)
expected = Series([1.0, 2.0, 3.0])
tm.assert_series_equal(results, expected)
results = s._convert(True, False, True)
tm.assert_series_equal(results, s)
s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)], dtype="O")
results = s._convert(datetime=True, numeric=True, timedelta=True)
expected = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)])
tm.assert_series_equal(results, expected)
results = s._convert(datetime=False, numeric=True, timedelta=True)
tm.assert_series_equal(results, s)
td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0)
s = Series([td, td], dtype="O")
results = s._convert(datetime=True, numeric=True, timedelta=True)
expected = Series([td, td])
tm.assert_series_equal(results, expected)
results = s._convert(True, True, False)
tm.assert_series_equal(results, s)
s = Series([1.0, 2, 3], index=["a", "b", "c"])
result = s._convert(numeric=True)
tm.assert_series_equal(result, s)
# force numeric conversion
r = s.copy().astype("O")
r["a"] = "1"
result = r._convert(numeric=True)
tm.assert_series_equal(result, s)
r = s.copy().astype("O")
r["a"] = "1."
result = r._convert(numeric=True)
tm.assert_series_equal(result, s)
r = s.copy().astype("O")
r["a"] = "garbled"
result = r._convert(numeric=True)
expected = s.copy()
expected["a"] = np.nan
tm.assert_series_equal(result, expected)
# GH 4119, not converting a mixed type (e.g.floats and object)
s = Series([1, "na", 3, 4])
result = s._convert(datetime=True, numeric=True)
expected = Series([1, np.nan, 3, 4])
tm.assert_series_equal(result, expected)
s = Series([1, "", 3, 4])
result = s._convert(datetime=True, numeric=True)
tm.assert_series_equal(result, expected)
# dates
s = Series(
[
datetime(2001, 1, 1, 0, 0),
datetime(2001, 1, 2, 0, 0),
datetime(2001, 1, 3, 0, 0),
]
)
s2 = Series(
[
datetime(2001, 1, 1, 0, 0),
datetime(2001, 1, 2, 0, 0),
datetime(2001, 1, 3, 0, 0),
"foo",
1.0,
1,
Timestamp("20010104"),
"20010105",
],
dtype="O",
)
result = s._convert(datetime=True)
expected = Series(
[Timestamp("20010101"), Timestamp("20010102"), Timestamp("20010103")],
dtype="M8[ns]",
)
tm.assert_series_equal(result, expected)
result = s._convert(datetime=True, coerce=True)
tm.assert_series_equal(result, expected)
expected = Series(
[
Timestamp("20010101"),
Timestamp("20010102"),
Timestamp("20010103"),
NaT,
NaT,
NaT,
Timestamp("20010104"),
Timestamp("20010105"),
],
dtype="M8[ns]",
)
result = s2._convert(datetime=True, numeric=False, timedelta=False, coerce=True)
tm.assert_series_equal(result, expected)
result = s2._convert(datetime=True, coerce=True)
tm.assert_series_equal(result, expected)
s = Series(["foo", "bar", 1, 1.0], dtype="O")
result = s._convert(datetime=True, coerce=True)
expected = Series([NaT] * 2 + [Timestamp(1)] * 2)
tm.assert_series_equal(result, expected)
# preserver if non-object
s = Series([1], dtype="float32")
result = s._convert(datetime=True, coerce=True)
tm.assert_series_equal(result, s)
# r = s.copy()
# r[0] = np.nan
# result = r._convert(convert_dates=True,convert_numeric=False)
# assert result.dtype == 'M8[ns]'
# dateutil parses some single letters into today's value as a date
expected = Series([NaT])
for x in "abcdefghijklmnopqrstuvwxyz":
s = Series([x])
result = s._convert(datetime=True, coerce=True)
tm.assert_series_equal(result, expected)
s = Series([x.upper()])
result = s._convert(datetime=True, coerce=True)
tm.assert_series_equal(result, expected)
def test_convert_no_arg_error(self):
s = Series(["1.0", "2"])
msg = r"At least one of datetime, numeric or timedelta must be True\."
with pytest.raises(ValueError, match=msg):
s._convert()
def test_convert_preserve_bool(self):
s = Series([1, True, 3, 5], dtype=object)
r = s._convert(datetime=True, numeric=True)
e = Series([1, 1, 3, 5], dtype="i8")
tm.assert_series_equal(r, e)
def test_convert_preserve_all_bool(self):
s = Series([False, True, False, False], dtype=object)
r = s._convert(datetime=True, numeric=True)
e = Series([False, True, False, False], dtype=bool)
tm.assert_series_equal(r, e)
def test_constructor_no_pandas_array(self):
ser = pd.Series([1, 2, 3])
result = pd.Series(ser.array)
tm.assert_series_equal(ser, result)
assert isinstance(result._data.blocks[0], IntBlock)
def test_astype_no_pandas_dtype(self):
# https://github.com/pandas-dev/pandas/pull/24866
ser = pd.Series([1, 2], dtype="int64")
# Don't have PandasDtype in the public API, so we use `.array.dtype`,
# which is a PandasDtype.
result = ser.astype(ser.array.dtype)
tm.assert_series_equal(result, ser)
def test_from_array(self):
result = pd.Series(pd.array(["1H", "2H"], dtype="timedelta64[ns]"))
assert result._data.blocks[0].is_extension is False
result = pd.Series(pd.array(["2015"], dtype="datetime64[ns]"))
assert result._data.blocks[0].is_extension is False
def test_from_list_dtype(self):
result = pd.Series(["1H", "2H"], dtype="timedelta64[ns]")
assert result._data.blocks[0].is_extension is False
result = pd.Series(["2015"], dtype="datetime64[ns]")
assert result._data.blocks[0].is_extension is False
def test_hasnans_unchached_for_series():
# GH#19700
idx = pd.Index([0, 1])
assert idx.hasnans is False
assert "hasnans" in idx._cache
ser = idx.to_series()
assert ser.hasnans is False
assert not hasattr(ser, "_cache")
ser.iloc[-1] = np.nan
assert ser.hasnans is True
assert Series.hasnans.__doc__ == pd.Index.hasnans.__doc__