3RNN/Lib/site-packages/pandas/tests/series/test_formats.py

578 lines
17 KiB
Python
Raw Permalink Normal View History

2024-05-26 19:49:15 +02:00
from datetime import (
datetime,
timedelta,
)
import numpy as np
import pytest
from pandas._config import using_pyarrow_string_dtype
import pandas as pd
from pandas import (
Categorical,
DataFrame,
Index,
Series,
date_range,
option_context,
period_range,
timedelta_range,
)
import pandas._testing as tm
class TestSeriesRepr:
def test_multilevel_name_print_0(self):
# GH#55415 None does not get printed, but 0 does
# (matching DataFrame and flat index behavior)
mi = pd.MultiIndex.from_product([range(2, 3), range(3, 4)], names=[0, None])
ser = Series(1.5, index=mi)
res = repr(ser)
expected = "0 \n2 3 1.5\ndtype: float64"
assert res == expected
def test_multilevel_name_print(self, lexsorted_two_level_string_multiindex):
index = lexsorted_two_level_string_multiindex
ser = Series(range(len(index)), index=index, name="sth")
expected = [
"first second",
"foo one 0",
" two 1",
" three 2",
"bar one 3",
" two 4",
"baz two 5",
" three 6",
"qux one 7",
" two 8",
" three 9",
"Name: sth, dtype: int64",
]
expected = "\n".join(expected)
assert repr(ser) == expected
def test_small_name_printing(self):
# Test small Series.
s = Series([0, 1, 2])
s.name = "test"
assert "Name: test" in repr(s)
s.name = None
assert "Name:" not in repr(s)
def test_big_name_printing(self):
# Test big Series (diff code path).
s = Series(range(1000))
s.name = "test"
assert "Name: test" in repr(s)
s.name = None
assert "Name:" not in repr(s)
def test_empty_name_printing(self):
s = Series(index=date_range("20010101", "20020101"), name="test", dtype=object)
assert "Name: test" in repr(s)
@pytest.mark.parametrize("args", [(), (0, -1)])
def test_float_range(self, args):
str(
Series(
np.random.default_rng(2).standard_normal(1000),
index=np.arange(1000, *args),
)
)
def test_empty_object(self):
# empty
str(Series(dtype=object))
def test_string(self, string_series):
str(string_series)
str(string_series.astype(int))
# with NaNs
string_series[5:7] = np.nan
str(string_series)
def test_object(self, object_series):
str(object_series)
def test_datetime(self, datetime_series):
str(datetime_series)
# with Nones
ots = datetime_series.astype("O")
ots[::2] = None
repr(ots)
@pytest.mark.parametrize(
"name",
[
"",
1,
1.2,
"foo",
"\u03B1\u03B2\u03B3",
"loooooooooooooooooooooooooooooooooooooooooooooooooooong",
("foo", "bar", "baz"),
(1, 2),
("foo", 1, 2.3),
("\u03B1", "\u03B2", "\u03B3"),
("\u03B1", "bar"),
],
)
def test_various_names(self, name, string_series):
# various names
string_series.name = name
repr(string_series)
def test_tuple_name(self):
biggie = Series(
np.random.default_rng(2).standard_normal(1000),
index=np.arange(1000),
name=("foo", "bar", "baz"),
)
repr(biggie)
@pytest.mark.parametrize("arg", [100, 1001])
def test_tidy_repr_name_0(self, arg):
# tidy repr
ser = Series(np.random.default_rng(2).standard_normal(arg), name=0)
rep_str = repr(ser)
assert "Name: 0" in rep_str
@pytest.mark.xfail(
using_pyarrow_string_dtype(), reason="TODO: investigate why this is failing"
)
def test_newline(self):
ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"])
assert "\t" not in repr(ser)
assert "\r" not in repr(ser)
assert "a\n" not in repr(ser)
@pytest.mark.parametrize(
"name, expected",
[
["foo", "Series([], Name: foo, dtype: int64)"],
[None, "Series([], dtype: int64)"],
],
)
def test_empty_int64(self, name, expected):
# with empty series (#4651)
s = Series([], dtype=np.int64, name=name)
assert repr(s) == expected
def test_repr_bool_fails(self, capsys):
s = Series(
[
DataFrame(np.random.default_rng(2).standard_normal((2, 2)))
for i in range(5)
]
)
# It works (with no Cython exception barf)!
repr(s)
captured = capsys.readouterr()
assert captured.err == ""
def test_repr_name_iterable_indexable(self):
s = Series([1, 2, 3], name=np.int64(3))
# it works!
repr(s)
s.name = ("\u05d0",) * 2
repr(s)
def test_repr_max_rows(self):
# GH 6863
with option_context("display.max_rows", None):
str(Series(range(1001))) # should not raise exception
def test_unicode_string_with_unicode(self):
df = Series(["\u05d0"], name="\u05d1")
str(df)
ser = Series(["\u03c3"] * 10)
repr(ser)
ser2 = Series(["\u05d0"] * 1000)
ser2.name = "title1"
repr(ser2)
def test_str_to_bytes_raises(self):
# GH 26447
df = Series(["abc"], name="abc")
msg = "^'str' object cannot be interpreted as an integer$"
with pytest.raises(TypeError, match=msg):
bytes(df)
def test_timeseries_repr_object_dtype(self):
index = Index(
[datetime(2000, 1, 1) + timedelta(i) for i in range(1000)], dtype=object
)
ts = Series(np.random.default_rng(2).standard_normal(len(index)), index)
repr(ts)
ts = Series(
np.arange(20, dtype=np.float64), index=date_range("2020-01-01", periods=20)
)
assert repr(ts).splitlines()[-1].startswith("Freq:")
ts2 = ts.iloc[np.random.default_rng(2).integers(0, len(ts) - 1, 400)]
repr(ts2).splitlines()[-1]
def test_latex_repr(self):
pytest.importorskip("jinja2") # uses Styler implementation
result = r"""\begin{tabular}{ll}
\toprule
& 0 \\
\midrule
0 & $\alpha$ \\
1 & b \\
2 & c \\
\bottomrule
\end{tabular}
"""
with option_context(
"styler.format.escape", None, "styler.render.repr", "latex"
):
s = Series([r"$\alpha$", "b", "c"])
assert result == s._repr_latex_()
assert s._repr_latex_() is None
def test_index_repr_in_frame_with_nan(self):
# see gh-25061
i = Index([1, np.nan])
s = Series([1, 2], index=i)
exp = """1.0 1\nNaN 2\ndtype: int64"""
assert repr(s) == exp
def test_format_pre_1900_dates(self):
rng = date_range("1/1/1850", "1/1/1950", freq="YE-DEC")
msg = "DatetimeIndex.format is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
rng.format()
ts = Series(1, index=rng)
repr(ts)
def test_series_repr_nat(self):
series = Series([0, 1000, 2000, pd.NaT._value], dtype="M8[ns]")
result = repr(series)
expected = (
"0 1970-01-01 00:00:00.000000\n"
"1 1970-01-01 00:00:00.000001\n"
"2 1970-01-01 00:00:00.000002\n"
"3 NaT\n"
"dtype: datetime64[ns]"
)
assert result == expected
def test_float_repr(self):
# GH#35603
# check float format when cast to object
ser = Series([1.0]).astype(object)
expected = "0 1.0\ndtype: object"
assert repr(ser) == expected
def test_different_null_objects(self):
# GH#45263
ser = Series([1, 2, 3, 4], [True, None, np.nan, pd.NaT])
result = repr(ser)
expected = "True 1\nNone 2\nNaN 3\nNaT 4\ndtype: int64"
assert result == expected
class TestCategoricalRepr:
def test_categorical_repr_unicode(self):
# see gh-21002
class County:
name = "San Sebastián"
state = "PR"
def __repr__(self) -> str:
return self.name + ", " + self.state
cat = Categorical([County() for _ in range(61)])
idx = Index(cat)
ser = idx.to_series()
repr(ser)
str(ser)
def test_categorical_repr(self, using_infer_string):
a = Series(Categorical([1, 2, 3, 4]))
exp = (
"0 1\n1 2\n2 3\n3 4\n"
"dtype: category\nCategories (4, int64): [1, 2, 3, 4]"
)
assert exp == a.__str__()
a = Series(Categorical(["a", "b"] * 25))
if using_infer_string:
exp = (
"0 a\n1 b\n"
" ..\n"
"48 a\n49 b\n"
"Length: 50, dtype: category\nCategories (2, string): [a, b]"
)
else:
exp = (
"0 a\n1 b\n"
" ..\n"
"48 a\n49 b\n"
"Length: 50, dtype: category\nCategories (2, object): ['a', 'b']"
)
with option_context("display.max_rows", 5):
assert exp == repr(a)
levs = list("abcdefghijklmnopqrstuvwxyz")
a = Series(Categorical(["a", "b"], categories=levs, ordered=True))
if using_infer_string:
exp = (
"0 a\n1 b\n"
"dtype: category\n"
"Categories (26, string): [a < b < c < d ... w < x < y < z]"
)
else:
exp = (
"0 a\n1 b\n"
"dtype: category\n"
"Categories (26, object): ['a' < 'b' < 'c' < 'd' ... "
"'w' < 'x' < 'y' < 'z']"
)
assert exp == a.__str__()
def test_categorical_series_repr(self):
s = Series(Categorical([1, 2, 3]))
exp = """0 1
1 2
2 3
dtype: category
Categories (3, int64): [1, 2, 3]"""
assert repr(s) == exp
s = Series(Categorical(np.arange(10)))
exp = f"""0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
dtype: category
Categories (10, {np.dtype(int)}): [0, 1, 2, 3, ..., 6, 7, 8, 9]"""
assert repr(s) == exp
def test_categorical_series_repr_ordered(self):
s = Series(Categorical([1, 2, 3], ordered=True))
exp = """0 1
1 2
2 3
dtype: category
Categories (3, int64): [1 < 2 < 3]"""
assert repr(s) == exp
s = Series(Categorical(np.arange(10), ordered=True))
exp = f"""0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
dtype: category
Categories (10, {np.dtype(int)}): [0 < 1 < 2 < 3 ... 6 < 7 < 8 < 9]"""
assert repr(s) == exp
def test_categorical_series_repr_datetime(self):
idx = date_range("2011-01-01 09:00", freq="h", periods=5)
s = Series(Categorical(idx))
exp = """0 2011-01-01 09:00:00
1 2011-01-01 10:00:00
2 2011-01-01 11:00:00
3 2011-01-01 12:00:00
4 2011-01-01 13:00:00
dtype: category
Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00,
2011-01-01 12:00:00, 2011-01-01 13:00:00]""" # noqa: E501
assert repr(s) == exp
idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern")
s = Series(Categorical(idx))
exp = """0 2011-01-01 09:00:00-05:00
1 2011-01-01 10:00:00-05:00
2 2011-01-01 11:00:00-05:00
3 2011-01-01 12:00:00-05:00
4 2011-01-01 13:00:00-05:00
dtype: category
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,
2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,
2011-01-01 13:00:00-05:00]""" # noqa: E501
assert repr(s) == exp
def test_categorical_series_repr_datetime_ordered(self):
idx = date_range("2011-01-01 09:00", freq="h", periods=5)
s = Series(Categorical(idx, ordered=True))
exp = """0 2011-01-01 09:00:00
1 2011-01-01 10:00:00
2 2011-01-01 11:00:00
3 2011-01-01 12:00:00
4 2011-01-01 13:00:00
dtype: category
Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa: E501
assert repr(s) == exp
idx = date_range("2011-01-01 09:00", freq="h", periods=5, tz="US/Eastern")
s = Series(Categorical(idx, ordered=True))
exp = """0 2011-01-01 09:00:00-05:00
1 2011-01-01 10:00:00-05:00
2 2011-01-01 11:00:00-05:00
3 2011-01-01 12:00:00-05:00
4 2011-01-01 13:00:00-05:00
dtype: category
Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
2011-01-01 13:00:00-05:00]""" # noqa: E501
assert repr(s) == exp
def test_categorical_series_repr_period(self):
idx = period_range("2011-01-01 09:00", freq="h", periods=5)
s = Series(Categorical(idx))
exp = """0 2011-01-01 09:00
1 2011-01-01 10:00
2 2011-01-01 11:00
3 2011-01-01 12:00
4 2011-01-01 13:00
dtype: category
Categories (5, period[h]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
2011-01-01 13:00]""" # noqa: E501
assert repr(s) == exp
idx = period_range("2011-01", freq="M", periods=5)
s = Series(Categorical(idx))
exp = """0 2011-01
1 2011-02
2 2011-03
3 2011-04
4 2011-05
dtype: category
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
assert repr(s) == exp
def test_categorical_series_repr_period_ordered(self):
idx = period_range("2011-01-01 09:00", freq="h", periods=5)
s = Series(Categorical(idx, ordered=True))
exp = """0 2011-01-01 09:00
1 2011-01-01 10:00
2 2011-01-01 11:00
3 2011-01-01 12:00
4 2011-01-01 13:00
dtype: category
Categories (5, period[h]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
2011-01-01 13:00]""" # noqa: E501
assert repr(s) == exp
idx = period_range("2011-01", freq="M", periods=5)
s = Series(Categorical(idx, ordered=True))
exp = """0 2011-01
1 2011-02
2 2011-03
3 2011-04
4 2011-05
dtype: category
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
assert repr(s) == exp
def test_categorical_series_repr_timedelta(self):
idx = timedelta_range("1 days", periods=5)
s = Series(Categorical(idx))
exp = """0 1 days
1 2 days
2 3 days
3 4 days
4 5 days
dtype: category
Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""
assert repr(s) == exp
idx = timedelta_range("1 hours", periods=10)
s = Series(Categorical(idx))
exp = """0 0 days 01:00:00
1 1 days 01:00:00
2 2 days 01:00:00
3 3 days 01:00:00
4 4 days 01:00:00
5 5 days 01:00:00
6 6 days 01:00:00
7 7 days 01:00:00
8 8 days 01:00:00
9 9 days 01:00:00
dtype: category
Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00,
8 days 01:00:00, 9 days 01:00:00]""" # noqa: E501
assert repr(s) == exp
def test_categorical_series_repr_timedelta_ordered(self):
idx = timedelta_range("1 days", periods=5)
s = Series(Categorical(idx, ordered=True))
exp = """0 1 days
1 2 days
2 3 days
3 4 days
4 5 days
dtype: category
Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]"""
assert repr(s) == exp
idx = timedelta_range("1 hours", periods=10)
s = Series(Categorical(idx, ordered=True))
exp = """0 0 days 01:00:00
1 1 days 01:00:00
2 2 days 01:00:00
3 3 days 01:00:00
4 4 days 01:00:00
5 5 days 01:00:00
6 6 days 01:00:00
7 7 days 01:00:00
8 8 days 01:00:00
9 9 days 01:00:00
dtype: category
Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 <
8 days 01:00:00 < 9 days 01:00:00]""" # noqa: E501
assert repr(s) == exp