449 lines
14 KiB
Cython
449 lines
14 KiB
Cython
|
from datetime import (
|
||
|
timedelta,
|
||
|
timezone,
|
||
|
)
|
||
|
|
||
|
from pandas.compat._optional import import_optional_dependency
|
||
|
|
||
|
try:
|
||
|
# py39+
|
||
|
import zoneinfo
|
||
|
from zoneinfo import ZoneInfo
|
||
|
except ImportError:
|
||
|
zoneinfo = None
|
||
|
ZoneInfo = None
|
||
|
|
||
|
from cpython.datetime cimport (
|
||
|
datetime,
|
||
|
timedelta,
|
||
|
tzinfo,
|
||
|
)
|
||
|
|
||
|
# dateutil compat
|
||
|
|
||
|
from dateutil.tz import (
|
||
|
gettz as dateutil_gettz,
|
||
|
tzfile as _dateutil_tzfile,
|
||
|
tzlocal as _dateutil_tzlocal,
|
||
|
tzutc as _dateutil_tzutc,
|
||
|
)
|
||
|
import numpy as np
|
||
|
import pytz
|
||
|
from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo
|
||
|
|
||
|
cimport numpy as cnp
|
||
|
from numpy cimport int64_t
|
||
|
|
||
|
cnp.import_array()
|
||
|
|
||
|
# ----------------------------------------------------------------------
|
||
|
from pandas._libs.tslibs.util cimport (
|
||
|
get_nat,
|
||
|
is_integer_object,
|
||
|
)
|
||
|
|
||
|
|
||
|
cdef int64_t NPY_NAT = get_nat()
|
||
|
cdef tzinfo utc_stdlib = timezone.utc
|
||
|
cdef tzinfo utc_pytz = pytz.utc
|
||
|
cdef tzinfo utc_dateutil_str = dateutil_gettz("UTC") # NB: *not* the same as tzutc()
|
||
|
|
||
|
cdef tzinfo utc_zoneinfo = None
|
||
|
|
||
|
|
||
|
# ----------------------------------------------------------------------
|
||
|
|
||
|
cdef bint is_utc_zoneinfo(tzinfo tz):
|
||
|
# Workaround for cases with missing tzdata
|
||
|
# https://github.com/pandas-dev/pandas/pull/46425#discussion_r830633025
|
||
|
if tz is None or zoneinfo is None:
|
||
|
return False
|
||
|
|
||
|
global utc_zoneinfo
|
||
|
if utc_zoneinfo is None:
|
||
|
try:
|
||
|
utc_zoneinfo = ZoneInfo("UTC")
|
||
|
except zoneinfo.ZoneInfoNotFoundError:
|
||
|
return False
|
||
|
# Warn if tzdata is too old, even if there is a system tzdata to alert
|
||
|
# users about the mismatch between local/system tzdata
|
||
|
import_optional_dependency("tzdata", errors="warn", min_version="2022.1")
|
||
|
|
||
|
return tz is utc_zoneinfo
|
||
|
|
||
|
|
||
|
cpdef inline bint is_utc(tzinfo tz):
|
||
|
return (
|
||
|
tz is utc_pytz
|
||
|
or tz is utc_stdlib
|
||
|
or isinstance(tz, _dateutil_tzutc)
|
||
|
or tz is utc_dateutil_str
|
||
|
or is_utc_zoneinfo(tz)
|
||
|
)
|
||
|
|
||
|
|
||
|
cdef bint is_zoneinfo(tzinfo tz):
|
||
|
if ZoneInfo is None:
|
||
|
return False
|
||
|
return isinstance(tz, ZoneInfo)
|
||
|
|
||
|
|
||
|
cdef bint is_tzlocal(tzinfo tz):
|
||
|
return isinstance(tz, _dateutil_tzlocal)
|
||
|
|
||
|
|
||
|
cdef bint treat_tz_as_pytz(tzinfo tz):
|
||
|
return (hasattr(tz, "_utc_transition_times") and
|
||
|
hasattr(tz, "_transition_info"))
|
||
|
|
||
|
|
||
|
cdef bint treat_tz_as_dateutil(tzinfo tz):
|
||
|
return hasattr(tz, "_trans_list") and hasattr(tz, "_trans_idx")
|
||
|
|
||
|
|
||
|
# Returns str or tzinfo object
|
||
|
cpdef inline object get_timezone(tzinfo tz):
|
||
|
"""
|
||
|
We need to do several things here:
|
||
|
1) Distinguish between pytz and dateutil timezones
|
||
|
2) Not be over-specific (e.g. US/Eastern with/without DST is same *zone*
|
||
|
but a different tz object)
|
||
|
3) Provide something to serialize when we're storing a datetime object
|
||
|
in pytables.
|
||
|
|
||
|
We return a string prefaced with dateutil if it's a dateutil tz, else just
|
||
|
the tz name. It needs to be a string so that we can serialize it with
|
||
|
UJSON/pytables. maybe_get_tz (below) is the inverse of this process.
|
||
|
"""
|
||
|
if tz is None:
|
||
|
raise TypeError("tz argument cannot be None")
|
||
|
if is_utc(tz):
|
||
|
return tz
|
||
|
else:
|
||
|
if treat_tz_as_dateutil(tz):
|
||
|
if ".tar.gz" in tz._filename:
|
||
|
raise ValueError(
|
||
|
"Bad tz filename. Dateutil on python 3 on windows has a "
|
||
|
"bug which causes tzfile._filename to be the same for all "
|
||
|
"timezone files. Please construct dateutil timezones "
|
||
|
'implicitly by passing a string like "dateutil/Europe'
|
||
|
'/London" when you construct your pandas objects instead '
|
||
|
"of passing a timezone object. See "
|
||
|
"https://github.com/pandas-dev/pandas/pull/7362")
|
||
|
return "dateutil/" + tz._filename
|
||
|
else:
|
||
|
# tz is a pytz timezone or unknown.
|
||
|
try:
|
||
|
zone = tz.zone
|
||
|
if zone is None:
|
||
|
return tz
|
||
|
return zone
|
||
|
except AttributeError:
|
||
|
return tz
|
||
|
|
||
|
|
||
|
cpdef inline tzinfo maybe_get_tz(object tz):
|
||
|
"""
|
||
|
(Maybe) Construct a timezone object from a string. If tz is a string, use
|
||
|
it to construct a timezone object. Otherwise, just return tz.
|
||
|
"""
|
||
|
if isinstance(tz, str):
|
||
|
if tz == "tzlocal()":
|
||
|
tz = _dateutil_tzlocal()
|
||
|
elif tz.startswith("dateutil/"):
|
||
|
zone = tz[9:]
|
||
|
tz = dateutil_gettz(zone)
|
||
|
# On Python 3 on Windows, the filename is not always set correctly.
|
||
|
if isinstance(tz, _dateutil_tzfile) and ".tar.gz" in tz._filename:
|
||
|
tz._filename = zone
|
||
|
elif tz[0] in {"-", "+"}:
|
||
|
hours = int(tz[0:3])
|
||
|
minutes = int(tz[0] + tz[4:6])
|
||
|
tz = timezone(timedelta(hours=hours, minutes=minutes))
|
||
|
elif tz[0:4] in {"UTC-", "UTC+"}:
|
||
|
hours = int(tz[3:6])
|
||
|
minutes = int(tz[3] + tz[7:9])
|
||
|
tz = timezone(timedelta(hours=hours, minutes=minutes))
|
||
|
elif tz == "UTC" or tz == "utc":
|
||
|
tz = utc_stdlib
|
||
|
else:
|
||
|
tz = pytz.timezone(tz)
|
||
|
elif is_integer_object(tz):
|
||
|
tz = timezone(timedelta(seconds=tz))
|
||
|
elif isinstance(tz, tzinfo):
|
||
|
pass
|
||
|
elif tz is None:
|
||
|
pass
|
||
|
else:
|
||
|
raise TypeError(type(tz))
|
||
|
return tz
|
||
|
|
||
|
|
||
|
def _p_tz_cache_key(tz: tzinfo):
|
||
|
"""
|
||
|
Python interface for cache function to facilitate testing.
|
||
|
"""
|
||
|
return tz_cache_key(tz)
|
||
|
|
||
|
|
||
|
# Timezone data caches, key is the pytz string or dateutil file name.
|
||
|
dst_cache = {}
|
||
|
|
||
|
|
||
|
cdef object tz_cache_key(tzinfo tz):
|
||
|
"""
|
||
|
Return the key in the cache for the timezone info object or None
|
||
|
if unknown.
|
||
|
|
||
|
The key is currently the tz string for pytz timezones, the filename for
|
||
|
dateutil timezones.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
This cannot just be the hash of a timezone object. Unfortunately, the
|
||
|
hashes of two dateutil tz objects which represent the same timezone are
|
||
|
not equal (even though the tz objects will compare equal and represent
|
||
|
the same tz file). Also, pytz objects are not always hashable so we use
|
||
|
str(tz) instead.
|
||
|
"""
|
||
|
if isinstance(tz, _pytz_BaseTzInfo):
|
||
|
return tz.zone
|
||
|
elif isinstance(tz, _dateutil_tzfile):
|
||
|
if ".tar.gz" in tz._filename:
|
||
|
raise ValueError("Bad tz filename. Dateutil on python 3 on "
|
||
|
"windows has a bug which causes tzfile._filename "
|
||
|
"to be the same for all timezone files. Please "
|
||
|
"construct dateutil timezones implicitly by "
|
||
|
'passing a string like "dateutil/Europe/London" '
|
||
|
"when you construct your pandas objects instead "
|
||
|
"of passing a timezone object. See "
|
||
|
"https://github.com/pandas-dev/pandas/pull/7362")
|
||
|
return "dateutil" + tz._filename
|
||
|
else:
|
||
|
return None
|
||
|
|
||
|
|
||
|
# ----------------------------------------------------------------------
|
||
|
# UTC Offsets
|
||
|
|
||
|
|
||
|
cdef timedelta get_utcoffset(tzinfo tz, datetime obj):
|
||
|
try:
|
||
|
return tz._utcoffset
|
||
|
except AttributeError:
|
||
|
return tz.utcoffset(obj)
|
||
|
|
||
|
|
||
|
cpdef inline bint is_fixed_offset(tzinfo tz):
|
||
|
if treat_tz_as_dateutil(tz):
|
||
|
if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0:
|
||
|
return 1
|
||
|
else:
|
||
|
return 0
|
||
|
elif treat_tz_as_pytz(tz):
|
||
|
if (len(tz._transition_info) == 0
|
||
|
and len(tz._utc_transition_times) == 0):
|
||
|
return 1
|
||
|
else:
|
||
|
return 0
|
||
|
elif is_zoneinfo(tz):
|
||
|
return 0
|
||
|
# This also implicitly accepts datetime.timezone objects which are
|
||
|
# considered fixed
|
||
|
return 1
|
||
|
|
||
|
|
||
|
cdef object _get_utc_trans_times_from_dateutil_tz(tzinfo tz):
|
||
|
"""
|
||
|
Transition times in dateutil timezones are stored in local non-dst
|
||
|
time. This code converts them to UTC. It's the reverse of the code
|
||
|
in dateutil.tz.tzfile.__init__.
|
||
|
"""
|
||
|
new_trans = list(tz._trans_list)
|
||
|
last_std_offset = 0
|
||
|
for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx)):
|
||
|
if not tti.isdst:
|
||
|
last_std_offset = tti.offset
|
||
|
new_trans[i] = trans - last_std_offset
|
||
|
return new_trans
|
||
|
|
||
|
|
||
|
cdef int64_t[::1] unbox_utcoffsets(object transinfo):
|
||
|
cdef:
|
||
|
Py_ssize_t i, sz
|
||
|
int64_t[::1] arr
|
||
|
|
||
|
sz = len(transinfo)
|
||
|
arr = np.empty(sz, dtype="i8")
|
||
|
|
||
|
for i in range(sz):
|
||
|
arr[i] = int(transinfo[i][0].total_seconds()) * 1_000_000_000
|
||
|
|
||
|
return arr
|
||
|
|
||
|
|
||
|
# ----------------------------------------------------------------------
|
||
|
# Daylight Savings
|
||
|
|
||
|
|
||
|
cdef object get_dst_info(tzinfo tz):
|
||
|
"""
|
||
|
Returns
|
||
|
-------
|
||
|
ndarray[int64_t]
|
||
|
Nanosecond UTC times of DST transitions.
|
||
|
ndarray[int64_t]
|
||
|
Nanosecond UTC offsets corresponding to DST transitions.
|
||
|
str
|
||
|
Describing the type of tzinfo object.
|
||
|
"""
|
||
|
cache_key = tz_cache_key(tz)
|
||
|
if cache_key is None:
|
||
|
# e.g. pytz.FixedOffset, matplotlib.dates._UTC,
|
||
|
# psycopg2.tz.FixedOffsetTimezone
|
||
|
num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000
|
||
|
# If we have e.g. ZoneInfo here, the get_utcoffset call will return None,
|
||
|
# so the total_seconds() call will raise AttributeError.
|
||
|
return (np.array([NPY_NAT + 1], dtype=np.int64),
|
||
|
np.array([num], dtype=np.int64),
|
||
|
"unknown")
|
||
|
|
||
|
if cache_key not in dst_cache:
|
||
|
if treat_tz_as_pytz(tz):
|
||
|
trans = np.array(tz._utc_transition_times, dtype="M8[ns]")
|
||
|
trans = trans.view("i8")
|
||
|
if tz._utc_transition_times[0].year == 1:
|
||
|
trans[0] = NPY_NAT + 1
|
||
|
deltas = unbox_utcoffsets(tz._transition_info)
|
||
|
typ = "pytz"
|
||
|
|
||
|
elif treat_tz_as_dateutil(tz):
|
||
|
if len(tz._trans_list):
|
||
|
# get utc trans times
|
||
|
trans_list = _get_utc_trans_times_from_dateutil_tz(tz)
|
||
|
trans = np.hstack([
|
||
|
np.array([0], dtype="M8[s]"), # place holder for 1st item
|
||
|
np.array(trans_list, dtype="M8[s]")]).astype(
|
||
|
"M8[ns]") # all trans listed
|
||
|
trans = trans.view("i8")
|
||
|
trans[0] = NPY_NAT + 1
|
||
|
|
||
|
# deltas
|
||
|
deltas = np.array([v.offset for v in (
|
||
|
tz._ttinfo_before,) + tz._trans_idx], dtype="i8")
|
||
|
deltas *= 1_000_000_000
|
||
|
typ = "dateutil"
|
||
|
|
||
|
elif is_fixed_offset(tz):
|
||
|
trans = np.array([NPY_NAT + 1], dtype=np.int64)
|
||
|
deltas = np.array([tz._ttinfo_std.offset],
|
||
|
dtype="i8") * 1_000_000_000
|
||
|
typ = "fixed"
|
||
|
else:
|
||
|
# 2018-07-12 this is not reached in the tests, and this case
|
||
|
# is not handled in any of the functions that call
|
||
|
# get_dst_info. If this case _were_ hit the calling
|
||
|
# functions would then hit an IndexError because they assume
|
||
|
# `deltas` is non-empty.
|
||
|
# (under the just-deleted code that returned empty arrays)
|
||
|
raise AssertionError("dateutil tzinfo is not a FixedOffset "
|
||
|
"and has an empty `_trans_list`.", tz)
|
||
|
else:
|
||
|
# static tzinfo, we can get here with pytz.StaticTZInfo
|
||
|
# which are not caught by treat_tz_as_pytz
|
||
|
trans = np.array([NPY_NAT + 1], dtype=np.int64)
|
||
|
num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000
|
||
|
deltas = np.array([num], dtype=np.int64)
|
||
|
typ = "static"
|
||
|
|
||
|
dst_cache[cache_key] = (trans, deltas, typ)
|
||
|
|
||
|
return dst_cache[cache_key]
|
||
|
|
||
|
|
||
|
def infer_tzinfo(datetime start, datetime end):
|
||
|
if start is not None and end is not None:
|
||
|
tz = start.tzinfo
|
||
|
if not tz_compare(tz, end.tzinfo):
|
||
|
raise AssertionError(f"Inputs must both have the same timezone, "
|
||
|
f"{tz} != {end.tzinfo}")
|
||
|
elif start is not None:
|
||
|
tz = start.tzinfo
|
||
|
elif end is not None:
|
||
|
tz = end.tzinfo
|
||
|
else:
|
||
|
tz = None
|
||
|
return tz
|
||
|
|
||
|
|
||
|
cpdef bint tz_compare(tzinfo start, tzinfo end):
|
||
|
"""
|
||
|
Compare string representations of timezones
|
||
|
|
||
|
The same timezone can be represented as different instances of
|
||
|
timezones. For example
|
||
|
`<DstTzInfo 'Europe/Paris' LMT+0:09:00 STD>` and
|
||
|
`<DstTzInfo 'Europe/Paris' CET+1:00:00 STD>` are essentially same
|
||
|
timezones but aren't evaluated such, but the string representation
|
||
|
for both of these is `'Europe/Paris'`.
|
||
|
|
||
|
This exists only to add a notion of equality to pytz-style zones
|
||
|
that is compatible with the notion of equality expected of tzinfo
|
||
|
subclasses.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
start : tzinfo
|
||
|
end : tzinfo
|
||
|
|
||
|
Returns:
|
||
|
-------
|
||
|
bool
|
||
|
"""
|
||
|
# GH 18523
|
||
|
if is_utc(start):
|
||
|
# GH#38851 consider pytz/dateutil/stdlib UTCs as equivalent
|
||
|
return is_utc(end)
|
||
|
elif is_utc(end):
|
||
|
# Ensure we don't treat tzlocal as equal to UTC when running in UTC
|
||
|
return False
|
||
|
elif start is None or end is None:
|
||
|
return start is None and end is None
|
||
|
return get_timezone(start) == get_timezone(end)
|
||
|
|
||
|
|
||
|
def tz_standardize(tz: tzinfo) -> tzinfo:
|
||
|
"""
|
||
|
If the passed tz is a pytz timezone object, "normalize" it to the a
|
||
|
consistent version
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
tz : tzinfo
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
tzinfo
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> from datetime import datetime
|
||
|
>>> from pytz import timezone
|
||
|
>>> tz = timezone('US/Pacific').normalize(
|
||
|
... datetime(2014, 1, 1, tzinfo=pytz.utc)
|
||
|
... ).tzinfo
|
||
|
>>> tz
|
||
|
<DstTzInfo 'US/Pacific' PST-1 day, 16:00:00 STD>
|
||
|
>>> tz_standardize(tz)
|
||
|
<DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
|
||
|
|
||
|
>>> tz = timezone('US/Pacific')
|
||
|
>>> tz
|
||
|
<DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
|
||
|
>>> tz_standardize(tz)
|
||
|
<DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
|
||
|
"""
|
||
|
if treat_tz_as_pytz(tz):
|
||
|
return pytz.timezone(str(tz))
|
||
|
return tz
|