import numpy as np cimport numpy as cnp from libc.math cimport log10 from numpy cimport ( int32_t, int64_t, ) cnp.import_array() # stdlib datetime imports from datetime import timezone from cpython.datetime cimport ( PyDate_Check, PyDateTime_Check, datetime, import_datetime, time, timedelta, tzinfo, ) import_datetime() from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs.dtypes cimport ( abbrev_to_npy_unit, get_supported_reso, periods_per_second, ) from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, NPY_FR_us, check_dts_bounds, convert_reso, get_datetime64_unit, get_datetime64_value, get_implementation_bounds, npy_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, pydatetime_to_dt64, pydatetime_to_dtstruct, string_to_dts, ) from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.nattype cimport ( NPY_NAT, c_NaT as NaT, c_nat_strings as nat_strings, ) from pandas._libs.tslibs.parsing cimport parse_datetime_string from pandas._libs.tslibs.timestamps cimport _Timestamp from pandas._libs.tslibs.timezones cimport ( get_utcoffset, is_utc, ) from pandas._libs.tslibs.tzconversion cimport ( Localizer, tz_localize_to_utc_single, ) from pandas._libs.tslibs.util cimport ( is_datetime64_object, is_float_object, is_integer_object, ) # ---------------------------------------------------------------------- # Constants DT64NS_DTYPE = np.dtype("M8[ns]") TD64NS_DTYPE = np.dtype("m8[ns]") # ---------------------------------------------------------------------- # Unit Conversion Helpers cdef int64_t cast_from_unit( object ts, str unit, NPY_DATETIMEUNIT out_reso=NPY_FR_ns ) except? -1: """ Return a casting of the unit represented to nanoseconds round the fractional part of a float to our precision, p. Parameters ---------- ts : int, float, or None unit : str Returns ------- int64_t """ cdef: int64_t m int p m, p = precision_from_unit(unit, out_reso) # just give me the unit back if ts is None: return m if unit in ["Y", "M"]: if is_float_object(ts) and not ts.is_integer(): # GH#47267 it is clear that 2 "M" corresponds to 1970-02-01, # but not clear what 2.5 "M" corresponds to, so we will # disallow that case. raise ValueError( f"Conversion of non-round float with unit={unit} " "is ambiguous" ) # GH#47266 go through np.datetime64 to avoid weird results e.g. with "Y" # and 150 we'd get 2120-01-01 09:00:00 if is_float_object(ts): ts = int(ts) dt64obj = np.datetime64(ts, unit) return get_datetime64_nanos(dt64obj, out_reso) # cast the unit, multiply base/frac separately # to avoid precision issues from float -> int try: base = ts except OverflowError as err: raise OutOfBoundsDatetime( f"cannot convert input {ts} with the unit '{unit}'" ) from err frac = ts - base if p: frac = round(frac, p) try: return (base * m) + (frac * m) except OverflowError as err: raise OutOfBoundsDatetime( f"cannot convert input {ts} with the unit '{unit}'" ) from err cpdef inline (int64_t, int) precision_from_unit( str unit, NPY_DATETIMEUNIT out_reso=NPY_DATETIMEUNIT.NPY_FR_ns, ): """ Return a casting of the unit represented to nanoseconds + the precision to round the fractional part. Notes ----- The caller is responsible for ensuring that the default value of "ns" takes the place of None. """ cdef: int64_t m int64_t multiplier int p NPY_DATETIMEUNIT reso = abbrev_to_npy_unit(unit) multiplier = periods_per_second(out_reso) if reso == NPY_DATETIMEUNIT.NPY_FR_Y: # each 400 years we have 97 leap years, for an average of 97/400=.2425 # extra days each year. We get 31556952 by writing # 3600*24*365.2425=31556952 m = multiplier * 31556952 elif reso == NPY_DATETIMEUNIT.NPY_FR_M: # 2629746 comes from dividing the "Y" case by 12. m = multiplier * 2629746 elif reso == NPY_DATETIMEUNIT.NPY_FR_W: m = multiplier * 3600 * 24 * 7 elif reso == NPY_DATETIMEUNIT.NPY_FR_D: m = multiplier * 3600 * 24 elif reso == NPY_DATETIMEUNIT.NPY_FR_h: m = multiplier * 3600 elif reso == NPY_DATETIMEUNIT.NPY_FR_m: m = multiplier * 60 elif reso == NPY_DATETIMEUNIT.NPY_FR_s: m = multiplier elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: m = multiplier // 1_000 elif reso == NPY_DATETIMEUNIT.NPY_FR_us: m = multiplier // 1_000_000 elif reso == NPY_DATETIMEUNIT.NPY_FR_ns or reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: m = multiplier // 1_000_000_000 else: raise ValueError(f"cannot cast unit {unit}") p = log10(m) # number of digits in 'm' minus 1 return m, p cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1: """ Extract the value and unit from a np.datetime64 object, then convert the value to nanoseconds if necessary. """ cdef: npy_datetimestruct dts NPY_DATETIMEUNIT unit npy_datetime ival ival = get_datetime64_value(val) if ival == NPY_NAT: return NPY_NAT unit = get_datetime64_unit(val) if unit != reso: pandas_datetime_to_datetimestruct(ival, unit, &dts) check_dts_bounds(&dts, reso) ival = npy_datetimestruct_to_datetime(reso, &dts) return ival # ---------------------------------------------------------------------- # _TSObject Conversion # lightweight C object to hold datetime & int64 pair cdef class _TSObject: # cdef: # npy_datetimestruct dts # npy_datetimestruct # int64_t value # numpy dt64 # tzinfo tzinfo # bint fold # NPY_DATETIMEUNIT creso def __cinit__(self): # GH 25057. As per PEP 495, set fold to 0 by default self.fold = 0 self.creso = NPY_FR_ns # default value cdef int64_t ensure_reso(self, NPY_DATETIMEUNIT creso, str val=None) except? -1: if self.creso != creso: try: self.value = convert_reso(self.value, self.creso, creso, False) except OverflowError as err: if val is not None: raise OutOfBoundsDatetime( f"Out of bounds nanosecond timestamp: {val}" ) from err raise OutOfBoundsDatetime from err self.creso = creso return self.value cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, bint dayfirst, bint yearfirst, int32_t nanos=0): """ Extract datetime and int64 from any of: - np.int64 (with unit providing a possible modifier) - np.datetime64 - a float (with unit providing a possible modifier) - python int or long object (with unit providing a possible modifier) - iso8601 string object - python datetime object - another timestamp object Raises ------ OutOfBoundsDatetime : ts cannot be converted within implementation bounds """ cdef: _TSObject obj NPY_DATETIMEUNIT reso obj = _TSObject() if isinstance(ts, str): return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst) if ts is None or ts is NaT: obj.value = NPY_NAT elif is_datetime64_object(ts): reso = get_supported_reso(get_datetime64_unit(ts)) obj.creso = reso obj.value = get_datetime64_nanos(ts, reso) if obj.value != NPY_NAT: pandas_datetime_to_datetimestruct(obj.value, reso, &obj.dts) elif is_integer_object(ts): try: ts = ts except OverflowError: # GH#26651 re-raise as OutOfBoundsDatetime raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp {ts}") if ts == NPY_NAT: obj.value = NPY_NAT else: if unit is None: unit = "ns" in_reso = abbrev_to_npy_unit(unit) reso = get_supported_reso(in_reso) ts = cast_from_unit(ts, unit, reso) obj.value = ts obj.creso = reso pandas_datetime_to_datetimestruct(ts, reso, &obj.dts) elif is_float_object(ts): if ts != ts or ts == NPY_NAT: obj.value = NPY_NAT else: ts = cast_from_unit(ts, unit) obj.value = ts pandas_datetime_to_datetimestruct(ts, NPY_FR_ns, &obj.dts) elif PyDateTime_Check(ts): if nanos == 0: if isinstance(ts, ABCTimestamp): reso = abbrev_to_npy_unit(ts.unit) # TODO: faster way to do this? else: # TODO: what if user explicitly passes nanos=0? reso = NPY_FR_us else: reso = NPY_FR_ns return convert_datetime_to_tsobject(ts, tz, nanos, reso=reso) elif PyDate_Check(ts): # Keep the converter same as PyDateTime's # For date object we give the lowest supported resolution, i.e. "s" ts = datetime.combine(ts, time()) return convert_datetime_to_tsobject( ts, tz, nanos=0, reso=NPY_DATETIMEUNIT.NPY_FR_s ) else: from .period import Period if isinstance(ts, Period): raise ValueError("Cannot convert Period to Timestamp " "unambiguously. Use to_timestamp") raise TypeError(f"Cannot convert input [{ts}] of type {type(ts)} to " f"Timestamp") maybe_localize_tso(obj, tz, obj.creso) return obj cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso): if tz is not None: _localize_tso(obj, tz, reso) if obj.value != NPY_NAT: # check_overflows needs to run after _localize_tso check_dts_bounds(&obj.dts, reso) check_overflows(obj, reso) cdef _TSObject convert_datetime_to_tsobject( datetime ts, tzinfo tz, int32_t nanos=0, NPY_DATETIMEUNIT reso=NPY_FR_ns, ): """ Convert a datetime (or Timestamp) input `ts`, along with optional timezone object `tz` to a _TSObject. The optional argument `nanos` allows for cases where datetime input needs to be supplemented with higher-precision information. Parameters ---------- ts : datetime or Timestamp Value to be converted to _TSObject tz : tzinfo or None timezone for the timezone-aware output nanos : int32_t, default is 0 nanoseconds supplement the precision of the datetime input ts reso : NPY_DATETIMEUNIT, default NPY_FR_ns Returns ------- obj : _TSObject """ cdef: _TSObject obj = _TSObject() int64_t pps obj.creso = reso obj.fold = ts.fold if tz is not None: if ts.tzinfo is not None: # Convert the current timezone to the passed timezone ts = ts.astimezone(tz) pydatetime_to_dtstruct(ts, &obj.dts) obj.tzinfo = ts.tzinfo elif not is_utc(tz): ts = _localize_pydatetime(ts, tz) pydatetime_to_dtstruct(ts, &obj.dts) obj.tzinfo = ts.tzinfo else: # UTC pydatetime_to_dtstruct(ts, &obj.dts) obj.tzinfo = tz else: pydatetime_to_dtstruct(ts, &obj.dts) obj.tzinfo = ts.tzinfo if isinstance(ts, ABCTimestamp): obj.dts.ps = ts.nanosecond * 1000 if nanos: obj.dts.ps = nanos * 1000 obj.value = npy_datetimestruct_to_datetime(reso, &obj.dts) if obj.tzinfo is not None and not is_utc(obj.tzinfo): offset = get_utcoffset(obj.tzinfo, ts) pps = periods_per_second(reso) obj.value -= int(offset.total_seconds() * pps) check_dts_bounds(&obj.dts, reso) check_overflows(obj, reso) return obj cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, int tzoffset, tzinfo tz=None, NPY_DATETIMEUNIT reso=NPY_FR_ns): """ Convert a datetimestruct `dts`, along with initial timezone offset `tzoffset` to a _TSObject (with timezone object `tz` - optional). Parameters ---------- dts : npy_datetimestruct tzoffset : int tz : tzinfo or None timezone for the timezone-aware output. reso : NPY_DATETIMEUNIT, default NPY_FR_ns Returns ------- obj : _TSObject """ cdef: _TSObject obj = _TSObject() int64_t value # numpy dt64 datetime dt Py_ssize_t pos value = npy_datetimestruct_to_datetime(reso, &dts) obj.dts = dts obj.tzinfo = timezone(timedelta(minutes=tzoffset)) obj.value = tz_localize_to_utc_single( value, obj.tzinfo, ambiguous=None, nonexistent=None, creso=reso ) obj.creso = reso if tz is None: check_overflows(obj, reso) return obj cdef: Localizer info = Localizer(tz, reso) # Infer fold from offset-adjusted obj.value # see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute if info.use_utc: pass elif info.use_tzlocal: info.utc_val_to_local_val(obj.value, &pos, &obj.fold) elif info.use_dst and not info.use_pytz: # i.e. dateutil info.utc_val_to_local_val(obj.value, &pos, &obj.fold) # Keep the converter same as PyDateTime's dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, obj.dts.hour, obj.dts.min, obj.dts.sec, obj.dts.us, obj.tzinfo, fold=obj.fold) obj = convert_datetime_to_tsobject( dt, tz, nanos=obj.dts.ps // 1000) obj.ensure_reso(reso) # TODO: more performant to get reso right up front? return obj cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, str unit, bint dayfirst=False, bint yearfirst=False): """ Convert a string input `ts`, along with optional timezone object`tz` to a _TSObject. The optional arguments `dayfirst` and `yearfirst` are passed to the dateutil parser. Parameters ---------- ts : str Value to be converted to _TSObject tz : tzinfo or None timezone for the timezone-aware output unit : str or None dayfirst : bool, default False When parsing an ambiguous date string, interpret e.g. "3/4/1975" as April 3, as opposed to the standard US interpretation March 4. yearfirst : bool, default False When parsing an ambiguous date string, interpret e.g. "01/05/09" as "May 9, 2001", as opposed to the default "Jan 5, 2009" Returns ------- obj : _TSObject """ cdef: npy_datetimestruct dts int out_local = 0, out_tzoffset = 0, string_to_dts_failed datetime dt int64_t ival NPY_DATETIMEUNIT out_bestunit, reso if len(ts) == 0 or ts in nat_strings: obj = _TSObject() obj.value = NPY_NAT obj.tzinfo = tz return obj elif ts == "now": # Issue 9000, we short-circuit rather than going # into np_datetime_strings which returns utc dt = datetime.now(tz) elif ts == "today": # Issue 9000, we short-circuit rather than going # into np_datetime_strings which returns a normalized datetime dt = datetime.now(tz) # equiv: datetime.today().replace(tzinfo=tz) else: string_to_dts_failed = string_to_dts( ts, &dts, &out_bestunit, &out_local, &out_tzoffset, False ) if not string_to_dts_failed: reso = get_supported_reso(out_bestunit) check_dts_bounds(&dts, reso) if out_local == 1: return _create_tsobject_tz_using_offset( dts, out_tzoffset, tz, reso ) else: ival = npy_datetimestruct_to_datetime(reso, &dts) if tz is not None: # shift for _localize_tso ival = tz_localize_to_utc_single( ival, tz, ambiguous="raise", nonexistent=None, creso=reso ) obj = _TSObject() obj.dts = dts obj.value = ival obj.creso = reso maybe_localize_tso(obj, tz, obj.creso) return obj dt = parse_datetime_string( ts, dayfirst=dayfirst, yearfirst=yearfirst, out_bestunit=&out_bestunit ) reso = get_supported_reso(out_bestunit) return convert_datetime_to_tsobject(dt, tz, nanos=0, reso=reso) return convert_datetime_to_tsobject(dt, tz) cdef check_overflows(_TSObject obj, NPY_DATETIMEUNIT reso=NPY_FR_ns): """ Check that we haven't silently overflowed in timezone conversion Parameters ---------- obj : _TSObject reso : NPY_DATETIMEUNIT, default NPY_FR_ns Returns ------- None Raises ------ OutOfBoundsDatetime """ # GH#12677 cdef: npy_datetimestruct lb, ub get_implementation_bounds(reso, &lb, &ub) if obj.dts.year == lb.year: if not (obj.value < 0): from pandas._libs.tslibs.timestamps import Timestamp fmt = (f"{obj.dts.year}-{obj.dts.month:02d}-{obj.dts.day:02d} " f"{obj.dts.hour:02d}:{obj.dts.min:02d}:{obj.dts.sec:02d}") raise OutOfBoundsDatetime( f"Converting {fmt} underflows past {Timestamp.min}" ) elif obj.dts.year == ub.year: if not (obj.value > 0): from pandas._libs.tslibs.timestamps import Timestamp fmt = (f"{obj.dts.year}-{obj.dts.month:02d}-{obj.dts.day:02d} " f"{obj.dts.hour:02d}:{obj.dts.min:02d}:{obj.dts.sec:02d}") raise OutOfBoundsDatetime( f"Converting {fmt} overflows past {Timestamp.max}" ) # ---------------------------------------------------------------------- # Localization cdef void _localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso): """ Given the UTC nanosecond timestamp in obj.value, find the wall-clock representation of that timestamp in the given timezone. Parameters ---------- obj : _TSObject tz : tzinfo reso : NPY_DATETIMEUNIT Returns ------- None Notes ----- Sets obj.tzinfo inplace, alters obj.dts inplace. """ cdef: int64_t local_val Py_ssize_t outpos = -1 Localizer info = Localizer(tz, reso) assert obj.tzinfo is None if info.use_utc: pass elif obj.value == NPY_NAT: pass else: local_val = info.utc_val_to_local_val(obj.value, &outpos, &obj.fold) if info.use_pytz: # infer we went through a pytz path, will have outpos!=-1 tz = tz._tzinfos[tz._transition_info[outpos]] pandas_datetime_to_datetimestruct(local_val, reso, &obj.dts) obj.tzinfo = tz cdef datetime _localize_pydatetime(datetime dt, tzinfo tz): """ Take a datetime/Timestamp in UTC and localizes to timezone tz. NB: Unlike the public version, this treats datetime and Timestamp objects identically, i.e. discards nanos from Timestamps. It also assumes that the `tz` input is not None. """ try: # datetime.replace with pytz may be incorrect result return tz.localize(dt) except AttributeError: return dt.replace(tzinfo=tz) cpdef inline datetime localize_pydatetime(datetime dt, tzinfo tz): """ Take a datetime/Timestamp in UTC and localizes to timezone tz. Parameters ---------- dt : datetime or Timestamp tz : tzinfo or None Returns ------- localized : datetime or Timestamp """ if tz is None: return dt elif isinstance(dt, ABCTimestamp): return dt.tz_localize(tz) return _localize_pydatetime(dt, tz) cdef tzinfo convert_timezone( tzinfo tz_in, tzinfo tz_out, bint found_naive, bint found_tz, bint utc_convert, ): """ Validate that ``tz_in`` can be converted/localized to ``tz_out``. Parameters ---------- tz_in : tzinfo or None Timezone info of element being processed. tz_out : tzinfo or None Timezone info of output. found_naive : bool Whether a timezone-naive element has been found so far. found_tz : bool Whether a timezone-aware element has been found so far. utc_convert : bool Whether to convert/localize to UTC. Returns ------- tz_info Timezone info of output. Raises ------ ValueError If ``tz_in`` can't be converted/localized to ``tz_out``. """ if tz_in is not None: if utc_convert: pass elif found_naive: raise ValueError("Tz-aware datetime.datetime " "cannot be converted to " "datetime64 unless utc=True") elif tz_out is not None and not tz_compare(tz_out, tz_in): raise ValueError("Tz-aware datetime.datetime " "cannot be converted to " "datetime64 unless utc=True") else: tz_out = tz_in else: if found_tz and not utc_convert: raise ValueError("Cannot mix tz-aware with " "tz-naive values") return tz_out cdef int64_t parse_pydatetime( datetime val, npy_datetimestruct *dts, bint utc_convert, ) except? -1: """ Convert pydatetime to datetime64. Parameters ---------- val : datetime Element being processed. dts : *npy_datetimestruct Needed to use in pydatetime_to_dt64, which writes to it. utc_convert : bool Whether to convert/localize to UTC. Raises ------ OutOfBoundsDatetime """ cdef: _TSObject _ts int64_t result if val.tzinfo is not None: if utc_convert: _ts = convert_datetime_to_tsobject(val, None) _ts.ensure_reso(NPY_FR_ns) result = _ts.value else: _ts = convert_datetime_to_tsobject(val, None) _ts.ensure_reso(NPY_FR_ns) result = _ts.value else: if isinstance(val, _Timestamp): result = val.as_unit("ns")._value else: result = pydatetime_to_dt64(val, dts) check_dts_bounds(dts) return result