projektAI/venv/Lib/site-packages/pandas/tests/io/pytables/test_timezones.py

462 lines
14 KiB
Python
Raw Normal View History

2021-06-06 22:13:05 +02:00
import datetime
import numpy as np
import pytest
import pandas.util._test_decorators as td
import pandas as pd
from pandas import DataFrame, DatetimeIndex, Series, Timestamp, date_range
import pandas._testing as tm
from pandas.tests.io.pytables.common import (
_maybe_remove,
ensure_clean_path,
ensure_clean_store,
)
def _compare_with_tz(a, b):
tm.assert_frame_equal(a, b)
# compare the zones on each element
for c in a.columns:
for i in a.index:
a_e = a.loc[i, c]
b_e = b.loc[i, c]
if not (a_e == b_e and a_e.tz == b_e.tz):
raise AssertionError(f"invalid tz comparison [{a_e}] [{b_e}]")
def test_append_with_timezones_dateutil(setup_path):
from datetime import timedelta
# use maybe_get_tz instead of dateutil.tz.gettz to handle the windows
# filename issues.
from pandas._libs.tslibs.timezones import maybe_get_tz
gettz = lambda x: maybe_get_tz("dateutil/" + x)
# as columns
with ensure_clean_store(setup_path) as store:
_maybe_remove(store, "df_tz")
df = DataFrame(
{
"A": [
Timestamp("20130102 2:00:00", tz=gettz("US/Eastern"))
+ timedelta(hours=1) * i
for i in range(5)
]
}
)
store.append("df_tz", df, data_columns=["A"])
result = store["df_tz"]
_compare_with_tz(result, df)
tm.assert_frame_equal(result, df)
# select with tz aware
expected = df[df.A >= df.A[3]]
result = store.select("df_tz", where="A>=df.A[3]")
_compare_with_tz(result, expected)
# ensure we include dates in DST and STD time here.
_maybe_remove(store, "df_tz")
df = DataFrame(
{
"A": Timestamp("20130102", tz=gettz("US/Eastern")),
"B": Timestamp("20130603", tz=gettz("US/Eastern")),
},
index=range(5),
)
store.append("df_tz", df)
result = store["df_tz"]
_compare_with_tz(result, df)
tm.assert_frame_equal(result, df)
df = DataFrame(
{
"A": Timestamp("20130102", tz=gettz("US/Eastern")),
"B": Timestamp("20130102", tz=gettz("EET")),
},
index=range(5),
)
msg = (
r"invalid info for \[values_block_1\] for \[tz\], "
r"existing_value \[dateutil/.*US/Eastern\] "
r"conflicts with new value \[dateutil/.*EET\]"
)
with pytest.raises(ValueError, match=msg):
store.append("df_tz", df)
# this is ok
_maybe_remove(store, "df_tz")
store.append("df_tz", df, data_columns=["A", "B"])
result = store["df_tz"]
_compare_with_tz(result, df)
tm.assert_frame_equal(result, df)
# can't append with diff timezone
df = DataFrame(
{
"A": Timestamp("20130102", tz=gettz("US/Eastern")),
"B": Timestamp("20130102", tz=gettz("CET")),
},
index=range(5),
)
msg = (
r"invalid info for \[B\] for \[tz\], "
r"existing_value \[dateutil/.*EET\] "
r"conflicts with new value \[dateutil/.*CET\]"
)
with pytest.raises(ValueError, match=msg):
store.append("df_tz", df)
# as index
with ensure_clean_store(setup_path) as store:
dti = date_range("2000-1-1", periods=3, freq="H", tz=gettz("US/Eastern"))
dti = dti._with_freq(None) # freq doesnt round-trip
# GH 4098 example
df = DataFrame({"A": Series(range(3), index=dti)})
_maybe_remove(store, "df")
store.put("df", df)
result = store.select("df")
tm.assert_frame_equal(result, df)
_maybe_remove(store, "df")
store.append("df", df)
result = store.select("df")
tm.assert_frame_equal(result, df)
def test_append_with_timezones_pytz(setup_path):
from datetime import timedelta
# as columns
with ensure_clean_store(setup_path) as store:
_maybe_remove(store, "df_tz")
df = DataFrame(
{
"A": [
Timestamp("20130102 2:00:00", tz="US/Eastern")
+ timedelta(hours=1) * i
for i in range(5)
]
}
)
store.append("df_tz", df, data_columns=["A"])
result = store["df_tz"]
_compare_with_tz(result, df)
tm.assert_frame_equal(result, df)
# select with tz aware
_compare_with_tz(store.select("df_tz", where="A>=df.A[3]"), df[df.A >= df.A[3]])
_maybe_remove(store, "df_tz")
# ensure we include dates in DST and STD time here.
df = DataFrame(
{
"A": Timestamp("20130102", tz="US/Eastern"),
"B": Timestamp("20130603", tz="US/Eastern"),
},
index=range(5),
)
store.append("df_tz", df)
result = store["df_tz"]
_compare_with_tz(result, df)
tm.assert_frame_equal(result, df)
df = DataFrame(
{
"A": Timestamp("20130102", tz="US/Eastern"),
"B": Timestamp("20130102", tz="EET"),
},
index=range(5),
)
msg = (
r"invalid info for \[values_block_1\] for \[tz\], "
r"existing_value \[US/Eastern\] conflicts with new value \[EET\]"
)
with pytest.raises(ValueError, match=msg):
store.append("df_tz", df)
# this is ok
_maybe_remove(store, "df_tz")
store.append("df_tz", df, data_columns=["A", "B"])
result = store["df_tz"]
_compare_with_tz(result, df)
tm.assert_frame_equal(result, df)
# can't append with diff timezone
df = DataFrame(
{
"A": Timestamp("20130102", tz="US/Eastern"),
"B": Timestamp("20130102", tz="CET"),
},
index=range(5),
)
msg = (
r"invalid info for \[B\] for \[tz\], "
r"existing_value \[EET\] conflicts with new value \[CET\]"
)
with pytest.raises(ValueError, match=msg):
store.append("df_tz", df)
# as index
with ensure_clean_store(setup_path) as store:
dti = date_range("2000-1-1", periods=3, freq="H", tz="US/Eastern")
dti = dti._with_freq(None) # freq doesnt round-trip
# GH 4098 example
df = DataFrame({"A": Series(range(3), index=dti)})
_maybe_remove(store, "df")
store.put("df", df)
result = store.select("df")
tm.assert_frame_equal(result, df)
_maybe_remove(store, "df")
store.append("df", df)
result = store.select("df")
tm.assert_frame_equal(result, df)
def test_roundtrip_tz_aware_index(setup_path):
# GH 17618
time = Timestamp("2000-01-01 01:00:00", tz="US/Eastern")
df = DataFrame(data=[0], index=[time])
with ensure_clean_store(setup_path) as store:
store.put("frame", df, format="fixed")
recons = store["frame"]
tm.assert_frame_equal(recons, df)
assert recons.index[0].value == 946706400000000000
def test_store_index_name_with_tz(setup_path):
# GH 13884
df = DataFrame({"A": [1, 2]})
df.index = DatetimeIndex([1234567890123456787, 1234567890123456788])
df.index = df.index.tz_localize("UTC")
df.index.name = "foo"
with ensure_clean_store(setup_path) as store:
store.put("frame", df, format="table")
recons = store["frame"]
tm.assert_frame_equal(recons, df)
def test_tseries_select_index_column(setup_path):
# GH7777
# selecting a UTC datetimeindex column did
# not preserve UTC tzinfo set before storing
# check that no tz still works
rng = date_range("1/1/2000", "1/30/2000")
frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
with ensure_clean_store(setup_path) as store:
store.append("frame", frame)
result = store.select_column("frame", "index")
assert rng.tz == DatetimeIndex(result.values).tz
# check utc
rng = date_range("1/1/2000", "1/30/2000", tz="UTC")
frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
with ensure_clean_store(setup_path) as store:
store.append("frame", frame)
result = store.select_column("frame", "index")
assert rng.tz == result.dt.tz
# double check non-utc
rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")
frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
with ensure_clean_store(setup_path) as store:
store.append("frame", frame)
result = store.select_column("frame", "index")
assert rng.tz == result.dt.tz
def test_timezones_fixed_format_frame_non_empty(setup_path):
with ensure_clean_store(setup_path) as store:
# index
rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")
rng = rng._with_freq(None) # freq doesnt round-trip
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
store["df"] = df
result = store["df"]
tm.assert_frame_equal(result, df)
# as data
# GH11411
_maybe_remove(store, "df")
df = DataFrame(
{
"A": rng,
"B": rng.tz_convert("UTC").tz_localize(None),
"C": rng.tz_convert("CET"),
"D": range(len(rng)),
},
index=rng,
)
store["df"] = df
result = store["df"]
tm.assert_frame_equal(result, df)
def test_timezones_fixed_format_frame_empty(setup_path, tz_aware_fixture):
# GH 20594
dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)
with ensure_clean_store(setup_path) as store:
s = Series(dtype=dtype)
df = DataFrame({"A": s})
store["df"] = df
result = store["df"]
tm.assert_frame_equal(result, df)
def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture):
# GH 20594
dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)
with ensure_clean_store(setup_path) as store:
s = Series([0], dtype=dtype)
store["s"] = s
result = store["s"]
tm.assert_series_equal(result, s)
def test_timezones_fixed_format_series_empty(setup_path, tz_aware_fixture):
# GH 20594
dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)
with ensure_clean_store(setup_path) as store:
s = Series(dtype=dtype)
store["s"] = s
result = store["s"]
tm.assert_series_equal(result, s)
def test_fixed_offset_tz(setup_path):
rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00")
frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
with ensure_clean_store(setup_path) as store:
store["frame"] = frame
recons = store["frame"]
tm.assert_index_equal(recons.index, rng)
assert rng.tz == recons.index.tz
@td.skip_if_windows
def test_store_timezone(setup_path):
# GH2852
# issue storing datetime.date with a timezone as it resets when read
# back in a new timezone
# original method
with ensure_clean_store(setup_path) as store:
today = datetime.date(2013, 9, 10)
df = DataFrame([1, 2, 3], index=[today, today, today])
store["obj1"] = df
result = store["obj1"]
tm.assert_frame_equal(result, df)
# with tz setting
with ensure_clean_store(setup_path) as store:
with tm.set_timezone("EST5EDT"):
today = datetime.date(2013, 9, 10)
df = DataFrame([1, 2, 3], index=[today, today, today])
store["obj1"] = df
with tm.set_timezone("CST6CDT"):
result = store["obj1"]
tm.assert_frame_equal(result, df)
def test_legacy_datetimetz_object(datapath, setup_path):
# legacy from < 0.17.0
# 8260
expected = DataFrame(
{
"A": Timestamp("20130102", tz="US/Eastern"),
"B": Timestamp("20130603", tz="CET"),
},
index=range(5),
)
with ensure_clean_store(
datapath("io", "data", "legacy_hdf", "datetimetz_object.h5"), mode="r"
) as store:
result = store["df"]
tm.assert_frame_equal(result, expected)
def test_dst_transitions(setup_path):
# make sure we are not failing on transitions
with ensure_clean_store(setup_path) as store:
times = pd.date_range(
"2013-10-26 23:00",
"2013-10-27 01:00",
tz="Europe/London",
freq="H",
ambiguous="infer",
)
times = times._with_freq(None) # freq doesnt round-trip
for i in [times, times + pd.Timedelta("10min")]:
_maybe_remove(store, "df")
df = DataFrame({"A": range(len(i)), "B": i}, index=i)
store.append("df", df)
result = store.select("df")
tm.assert_frame_equal(result, df)
def test_read_with_where_tz_aware_index(setup_path):
# GH 11926
periods = 10
dts = pd.date_range("20151201", periods=periods, freq="D", tz="UTC")
mi = pd.MultiIndex.from_arrays([dts, range(periods)], names=["DATE", "NO"])
expected = DataFrame({"MYCOL": 0}, index=mi)
key = "mykey"
with ensure_clean_path(setup_path) as path:
with pd.HDFStore(path) as store:
store.append(key, expected, format="table", append=True)
result = pd.read_hdf(path, key, where="DATE > 20151130")
tm.assert_frame_equal(result, expected)
def test_py2_created_with_datetimez(datapath, setup_path):
# The test HDF5 file was created in Python 2, but could not be read in
# Python 3.
#
# GH26443
index = [Timestamp("2019-01-01T18:00").tz_localize("America/New_York")]
expected = DataFrame({"data": 123}, index=index)
with ensure_clean_store(
datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r"
) as store:
result = store["key"]
tm.assert_frame_equal(result, expected)