351 lines
13 KiB
Python
351 lines
13 KiB
Python
from contextlib import nullcontext
|
|
from datetime import (
|
|
datetime,
|
|
time,
|
|
)
|
|
import locale
|
|
|
|
import numpy as np
|
|
import pytest
|
|
|
|
import pandas as pd
|
|
from pandas import (
|
|
PeriodIndex,
|
|
Series,
|
|
)
|
|
import pandas._testing as tm
|
|
|
|
|
|
def get_local_am_pm():
|
|
"""Return the AM and PM strings returned by strftime in current locale."""
|
|
am_local = time(1).strftime("%p")
|
|
pm_local = time(13).strftime("%p")
|
|
return am_local, pm_local
|
|
|
|
|
|
def test_get_values_for_csv():
|
|
index = PeriodIndex(["2017-01-01", "2017-01-02", "2017-01-03"], freq="D")
|
|
|
|
# First, with no arguments.
|
|
expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype=object)
|
|
|
|
result = index._get_values_for_csv()
|
|
tm.assert_numpy_array_equal(result, expected)
|
|
|
|
# No NaN values, so na_rep has no effect
|
|
result = index._get_values_for_csv(na_rep="pandas")
|
|
tm.assert_numpy_array_equal(result, expected)
|
|
|
|
# Make sure date formatting works
|
|
expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype=object)
|
|
|
|
result = index._get_values_for_csv(date_format="%m-%Y-%d")
|
|
tm.assert_numpy_array_equal(result, expected)
|
|
|
|
# NULL object handling should work
|
|
index = PeriodIndex(["2017-01-01", pd.NaT, "2017-01-03"], freq="D")
|
|
expected = np.array(["2017-01-01", "NaT", "2017-01-03"], dtype=object)
|
|
|
|
result = index._get_values_for_csv(na_rep="NaT")
|
|
tm.assert_numpy_array_equal(result, expected)
|
|
|
|
expected = np.array(["2017-01-01", "pandas", "2017-01-03"], dtype=object)
|
|
|
|
result = index._get_values_for_csv(na_rep="pandas")
|
|
tm.assert_numpy_array_equal(result, expected)
|
|
|
|
|
|
class TestPeriodIndexRendering:
|
|
def test_format_empty(self):
|
|
# GH#35712
|
|
empty_idx = PeriodIndex([], freq="Y")
|
|
msg = r"PeriodIndex\.format is deprecated"
|
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
|
assert empty_idx.format() == []
|
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
|
assert empty_idx.format(name=True) == [""]
|
|
|
|
@pytest.mark.parametrize("method", ["__repr__", "__str__"])
|
|
def test_representation(self, method):
|
|
# GH#7601
|
|
idx1 = PeriodIndex([], freq="D")
|
|
idx2 = PeriodIndex(["2011-01-01"], freq="D")
|
|
idx3 = PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
|
|
idx4 = PeriodIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
|
|
idx5 = PeriodIndex(["2011", "2012", "2013"], freq="Y")
|
|
idx6 = PeriodIndex(["2011-01-01 09:00", "2012-02-01 10:00", "NaT"], freq="h")
|
|
idx7 = pd.period_range("2013Q1", periods=1, freq="Q")
|
|
idx8 = pd.period_range("2013Q1", periods=2, freq="Q")
|
|
idx9 = pd.period_range("2013Q1", periods=3, freq="Q")
|
|
idx10 = PeriodIndex(["2011-01-01", "2011-02-01"], freq="3D")
|
|
|
|
exp1 = "PeriodIndex([], dtype='period[D]')"
|
|
|
|
exp2 = "PeriodIndex(['2011-01-01'], dtype='period[D]')"
|
|
|
|
exp3 = "PeriodIndex(['2011-01-01', '2011-01-02'], dtype='period[D]')"
|
|
|
|
exp4 = (
|
|
"PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], "
|
|
"dtype='period[D]')"
|
|
)
|
|
|
|
exp5 = "PeriodIndex(['2011', '2012', '2013'], dtype='period[Y-DEC]')"
|
|
|
|
exp6 = (
|
|
"PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], "
|
|
"dtype='period[h]')"
|
|
)
|
|
|
|
exp7 = "PeriodIndex(['2013Q1'], dtype='period[Q-DEC]')"
|
|
|
|
exp8 = "PeriodIndex(['2013Q1', '2013Q2'], dtype='period[Q-DEC]')"
|
|
|
|
exp9 = "PeriodIndex(['2013Q1', '2013Q2', '2013Q3'], dtype='period[Q-DEC]')"
|
|
|
|
exp10 = "PeriodIndex(['2011-01-01', '2011-02-01'], dtype='period[3D]')"
|
|
|
|
for idx, expected in zip(
|
|
[idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9, idx10],
|
|
[exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9, exp10],
|
|
):
|
|
result = getattr(idx, method)()
|
|
assert result == expected
|
|
|
|
# TODO: These are Series.__repr__ tests
|
|
def test_representation_to_series(self):
|
|
# GH#10971
|
|
idx1 = PeriodIndex([], freq="D")
|
|
idx2 = PeriodIndex(["2011-01-01"], freq="D")
|
|
idx3 = PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
|
|
idx4 = PeriodIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
|
|
idx5 = PeriodIndex(["2011", "2012", "2013"], freq="Y")
|
|
idx6 = PeriodIndex(["2011-01-01 09:00", "2012-02-01 10:00", "NaT"], freq="h")
|
|
|
|
idx7 = pd.period_range("2013Q1", periods=1, freq="Q")
|
|
idx8 = pd.period_range("2013Q1", periods=2, freq="Q")
|
|
idx9 = pd.period_range("2013Q1", periods=3, freq="Q")
|
|
|
|
exp1 = """Series([], dtype: period[D])"""
|
|
|
|
exp2 = """0 2011-01-01
|
|
dtype: period[D]"""
|
|
|
|
exp3 = """0 2011-01-01
|
|
1 2011-01-02
|
|
dtype: period[D]"""
|
|
|
|
exp4 = """0 2011-01-01
|
|
1 2011-01-02
|
|
2 2011-01-03
|
|
dtype: period[D]"""
|
|
|
|
exp5 = """0 2011
|
|
1 2012
|
|
2 2013
|
|
dtype: period[Y-DEC]"""
|
|
|
|
exp6 = """0 2011-01-01 09:00
|
|
1 2012-02-01 10:00
|
|
2 NaT
|
|
dtype: period[h]"""
|
|
|
|
exp7 = """0 2013Q1
|
|
dtype: period[Q-DEC]"""
|
|
|
|
exp8 = """0 2013Q1
|
|
1 2013Q2
|
|
dtype: period[Q-DEC]"""
|
|
|
|
exp9 = """0 2013Q1
|
|
1 2013Q2
|
|
2 2013Q3
|
|
dtype: period[Q-DEC]"""
|
|
|
|
for idx, expected in zip(
|
|
[idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9],
|
|
[exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9],
|
|
):
|
|
result = repr(Series(idx))
|
|
assert result == expected
|
|
|
|
def test_summary(self):
|
|
# GH#9116
|
|
idx1 = PeriodIndex([], freq="D")
|
|
idx2 = PeriodIndex(["2011-01-01"], freq="D")
|
|
idx3 = PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
|
|
idx4 = PeriodIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
|
|
idx5 = PeriodIndex(["2011", "2012", "2013"], freq="Y")
|
|
idx6 = PeriodIndex(["2011-01-01 09:00", "2012-02-01 10:00", "NaT"], freq="h")
|
|
|
|
idx7 = pd.period_range("2013Q1", periods=1, freq="Q")
|
|
idx8 = pd.period_range("2013Q1", periods=2, freq="Q")
|
|
idx9 = pd.period_range("2013Q1", periods=3, freq="Q")
|
|
|
|
exp1 = """PeriodIndex: 0 entries
|
|
Freq: D"""
|
|
|
|
exp2 = """PeriodIndex: 1 entries, 2011-01-01 to 2011-01-01
|
|
Freq: D"""
|
|
|
|
exp3 = """PeriodIndex: 2 entries, 2011-01-01 to 2011-01-02
|
|
Freq: D"""
|
|
|
|
exp4 = """PeriodIndex: 3 entries, 2011-01-01 to 2011-01-03
|
|
Freq: D"""
|
|
|
|
exp5 = """PeriodIndex: 3 entries, 2011 to 2013
|
|
Freq: Y-DEC"""
|
|
|
|
exp6 = """PeriodIndex: 3 entries, 2011-01-01 09:00 to NaT
|
|
Freq: h"""
|
|
|
|
exp7 = """PeriodIndex: 1 entries, 2013Q1 to 2013Q1
|
|
Freq: Q-DEC"""
|
|
|
|
exp8 = """PeriodIndex: 2 entries, 2013Q1 to 2013Q2
|
|
Freq: Q-DEC"""
|
|
|
|
exp9 = """PeriodIndex: 3 entries, 2013Q1 to 2013Q3
|
|
Freq: Q-DEC"""
|
|
|
|
for idx, expected in zip(
|
|
[idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9],
|
|
[exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9],
|
|
):
|
|
result = idx._summary()
|
|
assert result == expected
|
|
|
|
|
|
class TestPeriodIndexFormat:
|
|
def test_period_format_and_strftime_default(self):
|
|
per = PeriodIndex([datetime(2003, 1, 1, 12), None], freq="h")
|
|
|
|
# Default formatting
|
|
msg = "PeriodIndex.format is deprecated"
|
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
|
formatted = per.format()
|
|
assert formatted[0] == "2003-01-01 12:00" # default: minutes not shown
|
|
assert formatted[1] == "NaT"
|
|
# format is equivalent to strftime(None)...
|
|
assert formatted[0] == per.strftime(None)[0]
|
|
assert per.strftime(None)[1] is np.nan # ...except for NaTs
|
|
|
|
# Same test with nanoseconds freq
|
|
per = pd.period_range("2003-01-01 12:01:01.123456789", periods=2, freq="ns")
|
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
|
formatted = per.format()
|
|
assert (formatted == per.strftime(None)).all()
|
|
assert formatted[0] == "2003-01-01 12:01:01.123456789"
|
|
assert formatted[1] == "2003-01-01 12:01:01.123456790"
|
|
|
|
def test_period_custom(self):
|
|
# GH#46252 custom formatting directives %l (ms) and %u (us)
|
|
msg = "PeriodIndex.format is deprecated"
|
|
|
|
# 3 digits
|
|
per = pd.period_range("2003-01-01 12:01:01.123", periods=2, freq="ms")
|
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
|
formatted = per.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)")
|
|
assert formatted[0] == "03 12:01:01 (ms=123 us=123000 ns=123000000)"
|
|
assert formatted[1] == "03 12:01:01 (ms=124 us=124000 ns=124000000)"
|
|
|
|
# 6 digits
|
|
per = pd.period_range("2003-01-01 12:01:01.123456", periods=2, freq="us")
|
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
|
formatted = per.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)")
|
|
assert formatted[0] == "03 12:01:01 (ms=123 us=123456 ns=123456000)"
|
|
assert formatted[1] == "03 12:01:01 (ms=123 us=123457 ns=123457000)"
|
|
|
|
# 9 digits
|
|
per = pd.period_range("2003-01-01 12:01:01.123456789", periods=2, freq="ns")
|
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
|
formatted = per.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)")
|
|
assert formatted[0] == "03 12:01:01 (ms=123 us=123456 ns=123456789)"
|
|
assert formatted[1] == "03 12:01:01 (ms=123 us=123456 ns=123456790)"
|
|
|
|
def test_period_tz(self):
|
|
# Formatting periods created from a datetime with timezone.
|
|
msg = r"PeriodIndex\.format is deprecated"
|
|
# This timestamp is in 2013 in Europe/Paris but is 2012 in UTC
|
|
dt = pd.to_datetime(["2013-01-01 00:00:00+01:00"], utc=True)
|
|
|
|
# Converting to a period looses the timezone information
|
|
# Since tz is currently set as utc, we'll see 2012
|
|
with tm.assert_produces_warning(UserWarning, match="will drop timezone"):
|
|
per = dt.to_period(freq="h")
|
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
|
assert per.format()[0] == "2012-12-31 23:00"
|
|
|
|
# If tz is currently set as paris before conversion, we'll see 2013
|
|
dt = dt.tz_convert("Europe/Paris")
|
|
with tm.assert_produces_warning(UserWarning, match="will drop timezone"):
|
|
per = dt.to_period(freq="h")
|
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
|
assert per.format()[0] == "2013-01-01 00:00"
|
|
|
|
@pytest.mark.parametrize(
|
|
"locale_str",
|
|
[
|
|
pytest.param(None, id=str(locale.getlocale())),
|
|
"it_IT.utf8",
|
|
"it_IT", # Note: encoding will be 'ISO8859-1'
|
|
"zh_CN.utf8",
|
|
"zh_CN", # Note: encoding will be 'gb2312'
|
|
],
|
|
)
|
|
def test_period_non_ascii_fmt(self, locale_str):
|
|
# GH#46468 non-ascii char in input format string leads to wrong output
|
|
|
|
# Skip if locale cannot be set
|
|
if locale_str is not None and not tm.can_set_locale(locale_str, locale.LC_ALL):
|
|
pytest.skip(f"Skipping as locale '{locale_str}' cannot be set on host.")
|
|
|
|
# Change locale temporarily for this test.
|
|
with tm.set_locale(locale_str, locale.LC_ALL) if locale_str else nullcontext():
|
|
# Scalar
|
|
per = pd.Period("2018-03-11 13:00", freq="h")
|
|
assert per.strftime("%y é") == "18 é"
|
|
|
|
# Index
|
|
per = pd.period_range("2003-01-01 01:00:00", periods=2, freq="12h")
|
|
msg = "PeriodIndex.format is deprecated"
|
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
|
formatted = per.format(date_format="%y é")
|
|
assert formatted[0] == "03 é"
|
|
assert formatted[1] == "03 é"
|
|
|
|
@pytest.mark.parametrize(
|
|
"locale_str",
|
|
[
|
|
pytest.param(None, id=str(locale.getlocale())),
|
|
"it_IT.utf8",
|
|
"it_IT", # Note: encoding will be 'ISO8859-1'
|
|
"zh_CN.utf8",
|
|
"zh_CN", # Note: encoding will be 'gb2312'
|
|
],
|
|
)
|
|
def test_period_custom_locale_directive(self, locale_str):
|
|
# GH#46319 locale-specific directive leads to non-utf8 c strftime char* result
|
|
|
|
# Skip if locale cannot be set
|
|
if locale_str is not None and not tm.can_set_locale(locale_str, locale.LC_ALL):
|
|
pytest.skip(f"Skipping as locale '{locale_str}' cannot be set on host.")
|
|
|
|
# Change locale temporarily for this test.
|
|
with tm.set_locale(locale_str, locale.LC_ALL) if locale_str else nullcontext():
|
|
# Get locale-specific reference
|
|
am_local, pm_local = get_local_am_pm()
|
|
|
|
# Scalar
|
|
per = pd.Period("2018-03-11 13:00", freq="h")
|
|
assert per.strftime("%p") == pm_local
|
|
|
|
# Index
|
|
per = pd.period_range("2003-01-01 01:00:00", periods=2, freq="12h")
|
|
msg = "PeriodIndex.format is deprecated"
|
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
|
formatted = per.format(date_format="%y %I:%M:%S%p")
|
|
assert formatted[0] == f"03 01:00:00{am_local}"
|
|
assert formatted[1] == f"03 01:00:00{pm_local}"
|