Inzynierka/Lib/site-packages/pandas/_libs/tslibs/strptime.pyx

"""Strptime-related classes and functions.

TimeRE, _calc_julian_from_U_or_W are vendored
from the standard library, see
https://github.com/python/cpython/blob/main/Lib/_strptime.py
The original module-level docstring follows.

Strptime-related classes and functions.
CLASSES:
    LocaleTime -- Discovers and stores locale-specific time information
    TimeRE -- Creates regexes for pattern matching a string of text containing
                time information
FUNCTIONS:
    _getlang -- Figure out what language is being used for the locale
    strptime -- Calculates the time struct represented by the passed-in string
"""
from datetime import timezone

from cpython.datetime cimport (
    PyDate_Check,
    PyDateTime_Check,
    date,
    import_datetime,
    timedelta,
    tzinfo,
)
from _strptime import (
    TimeRE as _TimeRE,
    _getlang,
)
from _strptime import LocaleTime  # no-cython-lint

import_datetime()

from _thread import allocate_lock as _thread_allocate_lock
import re

import numpy as np
import pytz

cimport numpy as cnp
from numpy cimport (
    int64_t,
    ndarray,
)

from pandas._libs.missing cimport checknull_with_nat_and_na
from pandas._libs.tslibs.conversion cimport (
    convert_timezone,
    get_datetime64_nanos,
)
from pandas._libs.tslibs.nattype cimport (
    NPY_NAT,
    c_nat_strings as nat_strings,
)
from pandas._libs.tslibs.np_datetime cimport (
    NPY_DATETIMEUNIT,
    NPY_FR_ns,
    check_dts_bounds,
    npy_datetimestruct,
    npy_datetimestruct_to_datetime,
    pydate_to_dt64,
    pydatetime_to_dt64,
    string_to_dts,
)
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
from pandas._libs.tslibs.timestamps cimport _Timestamp
from pandas._libs.util cimport (
    is_datetime64_object,
    is_float_object,
    is_integer_object,
)

from pandas._libs.tslibs.timestamps import Timestamp

cnp.import_array()

cdef bint format_is_iso(f: str):
    """
    Does format match the iso8601 set that can be handled by the C parser?
    Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different
    but must be consistent.  Leading 0s in dates and times are optional.
    """
    iso_regex = re.compile(
        r"""
        ^                     # start of string
        %Y                    # Year
        (?:([-/ \\.]?)%m      # month with or without separators
        (?: \1%d              # day with same separator as for year-month
        (?:[ T]%H             # hour with separator
        (?:\:%M               # minute with separator
        (?:\:%S               # second with separator
        (?:%z|\.%f(?:%z)?     # timezone or fractional second
        )?)?)?)?)?)?          # optional
        $                     # end of string
        """,
        re.VERBOSE,
    )
    excluded_formats = ["%Y%m"]
    return re.match(iso_regex, f) is not None and f not in excluded_formats


def _test_format_is_iso(f: str) -> bool:
    """Only used in testing."""
    return format_is_iso(f)


cdef bint parse_today_now(str val, int64_t* iresult, bint utc):
    # We delay this check for as long as possible
    # because it catches relatively rare cases

    # Multiply by 1000 to convert to nanos, since these methods naturally have
    #  microsecond resolution
    if val == "now":
        if utc:
            iresult[0] = Timestamp.utcnow()._value * 1000
        else:
            # GH#18705 make sure to_datetime("now") matches Timestamp("now")
            # Note using Timestamp.now() is faster than Timestamp("now")
            iresult[0] = Timestamp.now()._value * 1000
        return True
    elif val == "today":
        iresult[0] = Timestamp.today()._value * 1000
        return True
    return False

cdef dict _parse_code_table = {"y": 0,
                               "Y": 1,
                               "m": 2,
                               "B": 3,
                               "b": 4,
                               "d": 5,
                               "H": 6,
                               "I": 7,
                               "M": 8,
                               "S": 9,
                               "f": 10,
                               "A": 11,
                               "a": 12,
                               "w": 13,
                               "j": 14,
                               "U": 15,
                               "W": 16,
                               "Z": 17,
                               "p": 18,  # an additional key, only with I
                               "z": 19,
                               "G": 20,
                               "V": 21,
                               "u": 22}


def array_strptime(
    ndarray[object] values,
    str fmt,
    bint exact=True,
    errors="raise",
    bint utc=False,
):
    """
    Calculates the datetime structs represented by the passed array of strings

    Parameters
    ----------
    values : ndarray of string-like objects
    fmt : string-like regex
    exact : matches must be exact if True, search if False
    errors : string specifying error handling, {'raise', 'ignore', 'coerce'}
    """

    cdef:
        Py_ssize_t i, n = len(values)
        npy_datetimestruct dts
        int64_t[::1] iresult
        object[::1] result_timezone
        int year, month, day, minute, hour, second, weekday, julian
        int week_of_year, week_of_year_start, parse_code, ordinal
        int iso_week, iso_year
        int64_t us, ns
        object val, group_key, ampm, found, tz
        bint is_raise = errors=="raise"
        bint is_ignore = errors=="ignore"
        bint is_coerce = errors=="coerce"
        bint found_naive = False
        bint found_tz = False
        tzinfo tz_out = None
        bint iso_format = format_is_iso(fmt)
        NPY_DATETIMEUNIT out_bestunit
        int out_local = 0, out_tzoffset = 0
        bint string_to_dts_succeeded = 0

    assert is_raise or is_ignore or is_coerce

    if "%W" in fmt or "%U" in fmt:
        if "%Y" not in fmt and "%y" not in fmt:
            raise ValueError("Cannot use '%W' or '%U' without day and year")
        if "%A" not in fmt and "%a" not in fmt and "%w" not in fmt:
            raise ValueError("Cannot use '%W' or '%U' without day and year")
    elif "%Z" in fmt and "%z" in fmt:
        raise ValueError("Cannot parse both %Z and %z")
    elif "%j" in fmt and "%G" in fmt:
        raise ValueError("Day of the year directive '%j' is not "
                         "compatible with ISO year directive '%G'. "
                         "Use '%Y' instead.")
    elif "%G" in fmt and (
        "%V" not in fmt
        or not (
            "%A" in fmt
            or "%a" in fmt
            or "%w" in fmt
            or "%u" in fmt
        )
    ):
        raise ValueError("ISO year directive '%G' must be used with "
                         "the ISO week directive '%V' and a weekday "
                         "directive '%A', '%a', '%w', or '%u'.")
    elif "%V" in fmt and "%Y" in fmt:
        raise ValueError("ISO week directive '%V' is incompatible with "
                         "the year directive '%Y'. Use the ISO year "
                         "'%G' instead.")
    elif "%V" in fmt and (
        "%G" not in fmt
        or not (
            "%A" in fmt
            or "%a" in fmt
            or "%w" in fmt
            or "%u" in fmt
        )
    ):
        raise ValueError("ISO week directive '%V' must be used with "
                         "the ISO year directive '%G' and a weekday "
                         "directive '%A', '%a', '%w', or '%u'.")

    global _TimeRE_cache, _regex_cache
    with _cache_lock:
        if _getlang() != _TimeRE_cache.locale_time.lang:
            _TimeRE_cache = TimeRE()
            _regex_cache.clear()
        if len(_regex_cache) > _CACHE_MAX_SIZE:
            _regex_cache.clear()
        locale_time = _TimeRE_cache.locale_time
        format_regex = _regex_cache.get(fmt)
        if not format_regex:
            try:
                format_regex = _TimeRE_cache.compile(fmt)
            # KeyError raised when a bad format is found; can be specified as
            # \\, in which case it was a stray % but with a space after it
            except KeyError, err:
                bad_directive = err.args[0]
                if bad_directive == "\\":
                    bad_directive = "%"
                del err
                raise ValueError(f"'{bad_directive}' is a bad directive "
                                 f"in format '{fmt}'")
            # IndexError only occurs when the format string is "%"
            except IndexError:
                raise ValueError(f"stray % in format '{fmt}'")
            _regex_cache[fmt] = format_regex

    result = np.empty(n, dtype="M8[ns]")
    iresult = result.view("i8")
    result_timezone = np.empty(n, dtype="object")

    dts.us = dts.ps = dts.as = 0

    for i in range(n):
        val = values[i]
        try:
            if isinstance(val, str):
                if len(val) == 0 or val in nat_strings:
                    iresult[i] = NPY_NAT
                    continue
            elif checknull_with_nat_and_na(val):
                iresult[i] = NPY_NAT
                continue
            elif PyDateTime_Check(val):
                if val.tzinfo is not None:
                    found_tz = True
                else:
                    found_naive = True
                tz_out = convert_timezone(
                    val.tzinfo,
                    tz_out,
                    found_naive,
                    found_tz,
                    utc,
                )
                if isinstance(val, _Timestamp):
                    iresult[i] = val.tz_localize(None).as_unit("ns")._value
                else:
                    iresult[i] = pydatetime_to_dt64(val.replace(tzinfo=None), &dts)
                    check_dts_bounds(&dts)
                result_timezone[i] = val.tzinfo
                continue
            elif PyDate_Check(val):
                iresult[i] = pydate_to_dt64(val, &dts)
                check_dts_bounds(&dts)
                continue
            elif is_datetime64_object(val):
                iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
                continue
            elif (
                    (is_integer_object(val) or is_float_object(val))
                    and (val != val or val == NPY_NAT)
            ):
                iresult[i] = NPY_NAT
                continue
            else:
                val = str(val)

            if fmt == "ISO8601":
                string_to_dts_succeeded = not string_to_dts(
                    val, &dts, &out_bestunit, &out_local,
                    &out_tzoffset, False, None, False
                )
            elif iso_format:
                string_to_dts_succeeded = not string_to_dts(
                    val, &dts, &out_bestunit, &out_local,
                    &out_tzoffset, False, fmt, exact
                )
            if string_to_dts_succeeded:
                # No error reported by string_to_dts, pick back up
                # where we left off
                value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
                if out_local == 1:
                    # Store the out_tzoffset in seconds
                    # since we store the total_seconds of
                    # dateutil.tz.tzoffset objects
                    tz = timezone(timedelta(minutes=out_tzoffset))
                    result_timezone[i] = tz
                    out_local = 0
                    out_tzoffset = 0
                iresult[i] = value
                check_dts_bounds(&dts)
                continue

            if parse_today_now(val, &iresult[i], utc):
                continue

            # Some ISO formats can't be parsed by string_to_dts
            # For example, 6-digit YYYYMD. So, if there's an error, and a format
            # was specified, then try the string-matching code below. If the format
            # specified was 'ISO8601', then we need to error, because
            # only string_to_dts handles mixed ISO8601 formats.
            if not string_to_dts_succeeded and fmt == "ISO8601":
                raise ValueError(f"Time data {val} is not ISO8601 format")

            # exact matching
            if exact:
                found = format_regex.match(val)
                if not found:
                    raise ValueError(
                        f"time data \"{val}\" doesn't match format \"{fmt}\""
                    )
                if len(val) != found.end():
                    raise ValueError(
                        "unconverted data remains when parsing with "
                        f"format \"{fmt}\": \"{val[found.end():]}\""
                    )

            # search
            else:
                found = format_regex.search(val)
                if not found:
                    raise ValueError(
                        f"time data \"{val}\" doesn't match format \"{fmt}\""
                    )

            iso_year = -1
            year = 1900
            month = day = 1
            hour = minute = second = ns = us = 0
            tz = None
            # Default to -1 to signify that values not known; not critical to have,
            # though
            iso_week = week_of_year = -1
            week_of_year_start = -1
            # weekday and julian defaulted to -1 so as to signal need to calculate
            # values
            weekday = julian = -1
            found_dict = found.groupdict()
            for group_key in found_dict.iterkeys():
                # Directives not explicitly handled below:
                #   c, x, X
                #      handled by making out of other directives
                #   U, W
                #      worthless without day of the week
                parse_code = _parse_code_table[group_key]

                if parse_code == 0:
                    year = int(found_dict["y"])
                    # Open Group specification for strptime() states that a %y
                    # value in the range of [00, 68] is in the century 2000, while
                    # [69,99] is in the century 1900
                    if year <= 68:
                        year += 2000
                    else:
                        year += 1900
                elif parse_code == 1:
                    year = int(found_dict["Y"])
                elif parse_code == 2:
                    month = int(found_dict["m"])
                # elif group_key == 'B':
                elif parse_code == 3:
                    month = locale_time.f_month.index(found_dict["B"].lower())
                # elif group_key == 'b':
                elif parse_code == 4:
                    month = locale_time.a_month.index(found_dict["b"].lower())
                # elif group_key == 'd':
                elif parse_code == 5:
                    day = int(found_dict["d"])
                # elif group_key == 'H':
                elif parse_code == 6:
                    hour = int(found_dict["H"])
                elif parse_code == 7:
                    hour = int(found_dict["I"])
                    ampm = found_dict.get("p", "").lower()
                    # If there was no AM/PM indicator, we'll treat this like AM
                    if ampm in ("", locale_time.am_pm[0]):
                        # We're in AM so the hour is correct unless we're
                        # looking at 12 midnight.
                        # 12 midnight == 12 AM == hour 0
                        if hour == 12:
                            hour = 0
                    elif ampm == locale_time.am_pm[1]:
                        # We're in PM so we need to add 12 to the hour unless
                        # we're looking at 12 noon.
                        # 12 noon == 12 PM == hour 12
                        if hour != 12:
                            hour += 12
                elif parse_code == 8:
                    minute = int(found_dict["M"])
                elif parse_code == 9:
                    second = int(found_dict["S"])
                elif parse_code == 10:
                    s = found_dict["f"]
                    # Pad to always return nanoseconds
                    s += "0" * (9 - len(s))
                    us = long(s)
                    ns = us % 1000
                    us = us // 1000
                elif parse_code == 11:
                    weekday = locale_time.f_weekday.index(found_dict["A"].lower())
                elif parse_code == 12:
                    weekday = locale_time.a_weekday.index(found_dict["a"].lower())
                elif parse_code == 13:
                    weekday = int(found_dict["w"])
                    if weekday == 0:
                        weekday = 6
                    else:
                        weekday -= 1
                elif parse_code == 14:
                    julian = int(found_dict["j"])
                elif parse_code == 15 or parse_code == 16:
                    week_of_year = int(found_dict[group_key])
                    if group_key == "U":
                        # U starts week on Sunday.
                        week_of_year_start = 6
                    else:
                        # W starts week on Monday.
                        week_of_year_start = 0
                elif parse_code == 17:
                    tz = pytz.timezone(found_dict["Z"])
                elif parse_code == 19:
                    tz = parse_timezone_directive(found_dict["z"])
                elif parse_code == 20:
                    iso_year = int(found_dict["G"])
                elif parse_code == 21:
                    iso_week = int(found_dict["V"])
                elif parse_code == 22:
                    weekday = int(found_dict["u"])
                    weekday -= 1

            # If we know the wk of the year and what day of that wk, we can figure
            # out the Julian day of the year.
            if julian == -1 and weekday != -1:
                if week_of_year != -1:
                    week_starts_Mon = week_of_year_start == 0
                    julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
                                                      week_starts_Mon)
                elif iso_year != -1 and iso_week != -1:
                    year, julian = _calc_julian_from_V(iso_year, iso_week,
                                                       weekday + 1)
            # Cannot pre-calculate date() since can change in Julian
            # calculation and thus could have different value for the day of the wk
            # calculation.
            if julian == -1:
                # Need to add 1 to result since first day of the year is 1, not
                # 0.
                ordinal = date(year, month, day).toordinal()
                julian = ordinal - date(year, 1, 1).toordinal() + 1
            else:
                # Assume that if they bothered to include Julian day it will
                # be accurate.
                datetime_result = date.fromordinal(
                    (julian - 1) + date(year, 1, 1).toordinal())
                year = datetime_result.year
                month = datetime_result.month
                day = datetime_result.day
            if weekday == -1:
                weekday = date(year, month, day).weekday()

            dts.year = year
            dts.month = month
            dts.day = day
            dts.hour = hour
            dts.min = minute
            dts.sec = second
            dts.us = us
            dts.ps = ns * 1000

            iresult[i] = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)
            check_dts_bounds(&dts)

            result_timezone[i] = tz

        except (ValueError, OutOfBoundsDatetime) as ex:
            ex.args = (
                f"{str(ex)}, at position {i}. You might want to try:\n"
                "    - passing `format` if your strings have a consistent format;\n"
                "    - passing `format='ISO8601'` if your strings are "
                "all ISO8601 but not necessarily in exactly the same format;\n"
                "    - passing `format='mixed'`, and the format will be "
                "inferred for each element individually. "
                "You might want to use `dayfirst` alongside this.",
            )
            if is_coerce:
                iresult[i] = NPY_NAT
                continue
            elif is_raise:
                raise
            return values, []

    return result, result_timezone.base


class TimeRE(_TimeRE):
    """
    Handle conversion from format directives to regexes.

    Creates regexes for pattern matching a string of text containing
    time information
    """

    def __init__(self, locale_time=None):
        """
        Create keys/values.

        Order of execution is important for dependency reasons.
        """
        self._Z = None
        super().__init__(locale_time=locale_time)
        # GH 48767: Overrides for cpython's TimeRE
        #  1) Parse up to nanos instead of micros
        self.update({"f": r"(?P<f>[0-9]{1,9})"}),

    def __getitem__(self, key):
        if key == "Z":
            # lazy computation
            if self._Z is None:
                self._Z = self.__seqToRE(pytz.all_timezones, "Z")
            # Note: handling Z is the key difference vs using the stdlib
            # _strptime.TimeRE. test_to_datetime_parse_tzname_or_tzoffset with
            # fmt='%Y-%m-%d %H:%M:%S %Z' fails with the stdlib version.
            return self._Z
        return super().__getitem__(key)


_cache_lock = _thread_allocate_lock()
# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock
# first!
_TimeRE_cache = TimeRE()
_CACHE_MAX_SIZE = 5  # Max number of regexes stored in _regex_cache
_regex_cache = {}


cdef int _calc_julian_from_U_or_W(int year, int week_of_year,
                                  int day_of_week, int week_starts_Mon):
    """
    Calculate the Julian day based on the year, week of the year, and day of
    the week, with week_start_day representing whether the week of the year
    assumes the week starts on Sunday or Monday (6 or 0).

    Parameters
    ----------
    year : int
        the year
    week_of_year : int
        week taken from format U or W
    week_starts_Mon : int
        represents whether the week of the year
        assumes the week starts on Sunday or Monday (6 or 0)

    Returns
    -------
    int
        converted julian day
    """

    cdef:
        int first_weekday, week_0_length, days_to_week

    first_weekday = date(year, 1, 1).weekday()
    # If we are dealing with the %U directive (week starts on Sunday), it's
    # easier to just shift the view to Sunday being the first day of the
    # week.
    if not week_starts_Mon:
        first_weekday = (first_weekday + 1) % 7
        day_of_week = (day_of_week + 1) % 7

    # Need to watch out for a week 0 (when the first day of the year is not
    # the same as that specified by %U or %W).
    week_0_length = (7 - first_weekday) % 7
    if week_of_year == 0:
        return 1 + day_of_week - first_weekday
    else:
        days_to_week = week_0_length + (7 * (week_of_year - 1))
        return 1 + days_to_week + day_of_week


cdef (int, int) _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday):
    """
    Calculate the Julian day based on the ISO 8601 year, week, and weekday.

    ISO weeks start on Mondays, with week 01 being the week containing 4 Jan.
    ISO week days range from 1 (Monday) to 7 (Sunday).

    Parameters
    ----------
    iso_year : int
        the year taken from format %G
    iso_week : int
        the week taken from format %V
    iso_weekday : int
        weekday taken from format %u

    Returns
    -------
    (int, int)
        the iso year and the Gregorian ordinal date / julian date
    """

    cdef:
        int correction, ordinal

    correction = date(iso_year, 1, 4).isoweekday() + 3
    ordinal = (iso_week * 7) + iso_weekday - correction
    # ordinal may be negative or 0 now, which means the date is in the previous
    # calendar year
    if ordinal < 1:
        ordinal += date(iso_year, 1, 1).toordinal()
        iso_year -= 1
        ordinal -= date(iso_year, 1, 1).toordinal()
    return iso_year, ordinal


cdef tzinfo parse_timezone_directive(str z):
    """
    Parse the '%z' directive and return a datetime.timezone object.

    Parameters
    ----------
    z : string of the UTC offset

    Returns
    -------
    datetime.timezone

    Notes
    -----
    This is essentially similar to the cpython implementation
    https://github.com/python/cpython/blob/master/Lib/_strptime.py#L457-L479
    """

    cdef:
        int hours, minutes, seconds, pad_number, microseconds
        int total_minutes
        object gmtoff_remainder, gmtoff_remainder_padding

    if z == "Z":
        return timezone(timedelta(0))
    if z[3] == ":":
        z = z[:3] + z[4:]
        if len(z) > 5:
            if z[5] != ":":
                raise ValueError(f"Inconsistent use of : in {z}")
            z = z[:5] + z[6:]
    hours = int(z[1:3])
    minutes = int(z[3:5])
    seconds = int(z[5:7] or 0)

    # Pad to always return microseconds.
    gmtoff_remainder = z[8:]
    pad_number = 6 - len(gmtoff_remainder)
    gmtoff_remainder_padding = "0" * pad_number
    microseconds = int(gmtoff_remainder + gmtoff_remainder_padding)

    total_minutes = ((hours * 60) + minutes + (seconds // 60) +
                     (microseconds // 60_000_000))
    total_minutes = -total_minutes if z.startswith("-") else total_minutes
    return timezone(timedelta(minutes=total_minutes))
first commit 2023-06-02 12:51:02 +02:00			`"""Strptime-related classes and functions.`

			`TimeRE, _calc_julian_from_U_or_W are vendored`
			`from the standard library, see`
			`https://github.com/python/cpython/blob/main/Lib/_strptime.py`
			`The original module-level docstring follows.`

			`Strptime-related classes and functions.`
			`CLASSES:`
			`LocaleTime -- Discovers and stores locale-specific time information`
			`TimeRE -- Creates regexes for pattern matching a string of text containing`
			`time information`
			`FUNCTIONS:`
			`_getlang -- Figure out what language is being used for the locale`
			`strptime -- Calculates the time struct represented by the passed-in string`
			`"""`
			`from datetime import timezone`

			`from cpython.datetime cimport (`
			`PyDate_Check,`
			`PyDateTime_Check,`
			`date,`
			`import_datetime,`
			`timedelta,`
			`tzinfo,`
			`)`
			`from _strptime import (`
			`TimeRE as _TimeRE,`
			`_getlang,`
			`)`
			`from _strptime import LocaleTime # no-cython-lint`

			`import_datetime()`

			`from _thread import allocate_lock as _thread_allocate_lock`
			`import re`

			`import numpy as np`
			`import pytz`

			`cimport numpy as cnp`
			`from numpy cimport (`
			`int64_t,`
			`ndarray,`
			`)`

			`from pandas._libs.missing cimport checknull_with_nat_and_na`
			`from pandas._libs.tslibs.conversion cimport (`
			`convert_timezone,`
			`get_datetime64_nanos,`
			`)`
			`from pandas._libs.tslibs.nattype cimport (`
			`NPY_NAT,`
			`c_nat_strings as nat_strings,`
			`)`
			`from pandas._libs.tslibs.np_datetime cimport (`
			`NPY_DATETIMEUNIT,`
			`NPY_FR_ns,`
			`check_dts_bounds,`
			`npy_datetimestruct,`
			`npy_datetimestruct_to_datetime,`
			`pydate_to_dt64,`
			`pydatetime_to_dt64,`
			`string_to_dts,`
			`)`
			`from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime`
			`from pandas._libs.tslibs.timestamps cimport _Timestamp`
			`from pandas._libs.util cimport (`
			`is_datetime64_object,`
			`is_float_object,`
			`is_integer_object,`
			`)`

			`from pandas._libs.tslibs.timestamps import Timestamp`

			`cnp.import_array()`

			`cdef bint format_is_iso(f: str):`
			`"""`
			`Does format match the iso8601 set that can be handled by the C parser?`
			`Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different`
			`but must be consistent. Leading 0s in dates and times are optional.`
			`"""`
			`iso_regex = re.compile(`
			`r"""`
			`^ # start of string`
			`%Y # Year`
			`(?:([-/ \\.]?)%m # month with or without separators`
			`(?: \1%d # day with same separator as for year-month`
			`(?:[ T]%H # hour with separator`
			`(?:\:%M # minute with separator`
			`(?:\:%S # second with separator`
			`(?:%z\|\.%f(?:%z)? # timezone or fractional second`
			`)?)?)?)?)?)? # optional`
			`$ # end of string`
			`""",`
			`re.VERBOSE,`
			`)`
			`excluded_formats = ["%Y%m"]`
			`return re.match(iso_regex, f) is not None and f not in excluded_formats`


			`def _test_format_is_iso(f: str) -> bool:`
			`"""Only used in testing."""`
			`return format_is_iso(f)`


			`cdef bint parse_today_now(str val, int64_t* iresult, bint utc):`
			`# We delay this check for as long as possible`
			`# because it catches relatively rare cases`

			`# Multiply by 1000 to convert to nanos, since these methods naturally have`
			`# microsecond resolution`
			`if val == "now":`
			`if utc:`
			`iresult[0] = Timestamp.utcnow()._value * 1000`
			`else:`
			`# GH#18705 make sure to_datetime("now") matches Timestamp("now")`
			`# Note using Timestamp.now() is faster than Timestamp("now")`
			`iresult[0] = Timestamp.now()._value * 1000`
			`return True`
			`elif val == "today":`
			`iresult[0] = Timestamp.today()._value * 1000`
			`return True`
			`return False`

			`cdef dict _parse_code_table = {"y": 0,`
			`"Y": 1,`
			`"m": 2,`
			`"B": 3,`
			`"b": 4,`
			`"d": 5,`
			`"H": 6,`
			`"I": 7,`
			`"M": 8,`
			`"S": 9,`
			`"f": 10,`
			`"A": 11,`
			`"a": 12,`
			`"w": 13,`
			`"j": 14,`
			`"U": 15,`
			`"W": 16,`
			`"Z": 17,`
			`"p": 18, # an additional key, only with I`
			`"z": 19,`
			`"G": 20,`
			`"V": 21,`
			`"u": 22}`


			`def array_strptime(`
			`ndarray[object] values,`
			`str fmt,`
			`bint exact=True,`
			`errors="raise",`
			`bint utc=False,`
			`):`
			`"""`
			`Calculates the datetime structs represented by the passed array of strings`

			`Parameters`
			`----------`
			`values : ndarray of string-like objects`
			`fmt : string-like regex`
			`exact : matches must be exact if True, search if False`
			`errors : string specifying error handling, {'raise', 'ignore', 'coerce'}`
			`"""`

			`cdef:`
			`Py_ssize_t i, n = len(values)`
			`npy_datetimestruct dts`
			`int64_t[::1] iresult`
			`object[::1] result_timezone`
			`int year, month, day, minute, hour, second, weekday, julian`
			`int week_of_year, week_of_year_start, parse_code, ordinal`
			`int iso_week, iso_year`
			`int64_t us, ns`
			`object val, group_key, ampm, found, tz`
			`bint is_raise = errors=="raise"`
			`bint is_ignore = errors=="ignore"`
			`bint is_coerce = errors=="coerce"`
			`bint found_naive = False`
			`bint found_tz = False`
			`tzinfo tz_out = None`
			`bint iso_format = format_is_iso(fmt)`
			`NPY_DATETIMEUNIT out_bestunit`
			`int out_local = 0, out_tzoffset = 0`
			`bint string_to_dts_succeeded = 0`

			`assert is_raise or is_ignore or is_coerce`

			`if "%W" in fmt or "%U" in fmt:`
			`if "%Y" not in fmt and "%y" not in fmt:`
			`raise ValueError("Cannot use '%W' or '%U' without day and year")`
			`if "%A" not in fmt and "%a" not in fmt and "%w" not in fmt:`
			`raise ValueError("Cannot use '%W' or '%U' without day and year")`
			`elif "%Z" in fmt and "%z" in fmt:`
			`raise ValueError("Cannot parse both %Z and %z")`
			`elif "%j" in fmt and "%G" in fmt:`
			`raise ValueError("Day of the year directive '%j' is not "`
			`"compatible with ISO year directive '%G'. "`
			`"Use '%Y' instead.")`
			`elif "%G" in fmt and (`
			`"%V" not in fmt`
			`or not (`
			`"%A" in fmt`
			`or "%a" in fmt`
			`or "%w" in fmt`
			`or "%u" in fmt`
			`)`
			`):`
			`raise ValueError("ISO year directive '%G' must be used with "`
			`"the ISO week directive '%V' and a weekday "`
			`"directive '%A', '%a', '%w', or '%u'.")`
			`elif "%V" in fmt and "%Y" in fmt:`
			`raise ValueError("ISO week directive '%V' is incompatible with "`
			`"the year directive '%Y'. Use the ISO year "`
			`"'%G' instead.")`
			`elif "%V" in fmt and (`
			`"%G" not in fmt`
			`or not (`
			`"%A" in fmt`
			`or "%a" in fmt`
			`or "%w" in fmt`
			`or "%u" in fmt`
			`)`
			`):`
			`raise ValueError("ISO week directive '%V' must be used with "`
			`"the ISO year directive '%G' and a weekday "`
			`"directive '%A', '%a', '%w', or '%u'.")`

			`global _TimeRE_cache, _regex_cache`
			`with _cache_lock:`
			`if _getlang() != _TimeRE_cache.locale_time.lang:`
			`_TimeRE_cache = TimeRE()`
			`_regex_cache.clear()`
			`if len(_regex_cache) > _CACHE_MAX_SIZE:`
			`_regex_cache.clear()`
			`locale_time = _TimeRE_cache.locale_time`
			`format_regex = _regex_cache.get(fmt)`
			`if not format_regex:`
			`try:`
			`format_regex = _TimeRE_cache.compile(fmt)`
			`# KeyError raised when a bad format is found; can be specified as`
			`# \\, in which case it was a stray % but with a space after it`
			`except KeyError, err:`
			`bad_directive = err.args[0]`
			`if bad_directive == "\\":`
			`bad_directive = "%"`
			`del err`
			`raise ValueError(f"'{bad_directive}' is a bad directive "`
			`f"in format '{fmt}'")`
			`# IndexError only occurs when the format string is "%"`
			`except IndexError:`
			`raise ValueError(f"stray % in format '{fmt}'")`
			`_regex_cache[fmt] = format_regex`

			`result = np.empty(n, dtype="M8[ns]")`
			`iresult = result.view("i8")`
			`result_timezone = np.empty(n, dtype="object")`

			`dts.us = dts.ps = dts.as = 0`

			`for i in range(n):`
			`val = values[i]`
			`try:`
			`if isinstance(val, str):`
			`if len(val) == 0 or val in nat_strings:`
			`iresult[i] = NPY_NAT`
			`continue`
			`elif checknull_with_nat_and_na(val):`
			`iresult[i] = NPY_NAT`
			`continue`
			`elif PyDateTime_Check(val):`
			`if val.tzinfo is not None:`
			`found_tz = True`
			`else:`
			`found_naive = True`
			`tz_out = convert_timezone(`
			`val.tzinfo,`
			`tz_out,`
			`found_naive,`
			`found_tz,`
			`utc,`
			`)`
			`if isinstance(val, _Timestamp):`
			`iresult[i] = val.tz_localize(None).as_unit("ns")._value`
			`else:`
			`iresult[i] = pydatetime_to_dt64(val.replace(tzinfo=None), &dts)`
			`check_dts_bounds(&dts)`
			`result_timezone[i] = val.tzinfo`
			`continue`
			`elif PyDate_Check(val):`
			`iresult[i] = pydate_to_dt64(val, &dts)`
			`check_dts_bounds(&dts)`
			`continue`
			`elif is_datetime64_object(val):`
			`iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)`
			`continue`
			`elif (`
			`(is_integer_object(val) or is_float_object(val))`
			`and (val != val or val == NPY_NAT)`
			`):`
			`iresult[i] = NPY_NAT`
			`continue`
			`else:`
			`val = str(val)`

			`if fmt == "ISO8601":`
			`string_to_dts_succeeded = not string_to_dts(`
			`val, &dts, &out_bestunit, &out_local,`
			`&out_tzoffset, False, None, False`
			`)`
			`elif iso_format:`
			`string_to_dts_succeeded = not string_to_dts(`
			`val, &dts, &out_bestunit, &out_local,`
			`&out_tzoffset, False, fmt, exact`
			`)`
			`if string_to_dts_succeeded:`
			`# No error reported by string_to_dts, pick back up`
			`# where we left off`
			`value = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)`
			`if out_local == 1:`
			`# Store the out_tzoffset in seconds`
			`# since we store the total_seconds of`
			`# dateutil.tz.tzoffset objects`
			`tz = timezone(timedelta(minutes=out_tzoffset))`
			`result_timezone[i] = tz`
			`out_local = 0`
			`out_tzoffset = 0`
			`iresult[i] = value`
			`check_dts_bounds(&dts)`
			`continue`

			`if parse_today_now(val, &iresult[i], utc):`
			`continue`

			`# Some ISO formats can't be parsed by string_to_dts`
			`# For example, 6-digit YYYYMD. So, if there's an error, and a format`
			`# was specified, then try the string-matching code below. If the format`
			`# specified was 'ISO8601', then we need to error, because`
			`# only string_to_dts handles mixed ISO8601 formats.`
			`if not string_to_dts_succeeded and fmt == "ISO8601":`
			`raise ValueError(f"Time data {val} is not ISO8601 format")`

			`# exact matching`
			`if exact:`
			`found = format_regex.match(val)`
			`if not found:`
			`raise ValueError(`
			`f"time data \"{val}\" doesn't match format \"{fmt}\""`
			`)`
			`if len(val) != found.end():`
			`raise ValueError(`
			`"unconverted data remains when parsing with "`
			`f"format \"{fmt}\": \"{val[found.end():]}\""`
			`)`

			`# search`
			`else:`
			`found = format_regex.search(val)`
			`if not found:`
			`raise ValueError(`
			`f"time data \"{val}\" doesn't match format \"{fmt}\""`
			`)`

			`iso_year = -1`
			`year = 1900`
			`month = day = 1`
			`hour = minute = second = ns = us = 0`
			`tz = None`
			`# Default to -1 to signify that values not known; not critical to have,`
			`# though`
			`iso_week = week_of_year = -1`
			`week_of_year_start = -1`
			`# weekday and julian defaulted to -1 so as to signal need to calculate`
			`# values`
			`weekday = julian = -1`
			`found_dict = found.groupdict()`
			`for group_key in found_dict.iterkeys():`
			`# Directives not explicitly handled below:`
			`# c, x, X`
			`# handled by making out of other directives`
			`# U, W`
			`# worthless without day of the week`
			`parse_code = _parse_code_table[group_key]`

			`if parse_code == 0:`
			`year = int(found_dict["y"])`
			`# Open Group specification for strptime() states that a %y`
			`# value in the range of [00, 68] is in the century 2000, while`
			`# [69,99] is in the century 1900`
			`if year <= 68:`
			`year += 2000`
			`else:`
			`year += 1900`
			`elif parse_code == 1:`
			`year = int(found_dict["Y"])`
			`elif parse_code == 2:`
			`month = int(found_dict["m"])`
			`# elif group_key == 'B':`
			`elif parse_code == 3:`
			`month = locale_time.f_month.index(found_dict["B"].lower())`
			`# elif group_key == 'b':`
			`elif parse_code == 4:`
			`month = locale_time.a_month.index(found_dict["b"].lower())`
			`# elif group_key == 'd':`
			`elif parse_code == 5:`
			`day = int(found_dict["d"])`
			`# elif group_key == 'H':`
			`elif parse_code == 6:`
			`hour = int(found_dict["H"])`
			`elif parse_code == 7:`
			`hour = int(found_dict["I"])`
			`ampm = found_dict.get("p", "").lower()`
			`# If there was no AM/PM indicator, we'll treat this like AM`
			`if ampm in ("", locale_time.am_pm[0]):`
			`# We're in AM so the hour is correct unless we're`
			`# looking at 12 midnight.`
			`# 12 midnight == 12 AM == hour 0`
			`if hour == 12:`
			`hour = 0`
			`elif ampm == locale_time.am_pm[1]:`
			`# We're in PM so we need to add 12 to the hour unless`
			`# we're looking at 12 noon.`
			`# 12 noon == 12 PM == hour 12`
			`if hour != 12:`
			`hour += 12`
			`elif parse_code == 8:`
			`minute = int(found_dict["M"])`
			`elif parse_code == 9:`
			`second = int(found_dict["S"])`
			`elif parse_code == 10:`
			`s = found_dict["f"]`
			`# Pad to always return nanoseconds`
			`s += "0" * (9 - len(s))`
			`us = long(s)`
			`ns = us % 1000`
			`us = us // 1000`
			`elif parse_code == 11:`
			`weekday = locale_time.f_weekday.index(found_dict["A"].lower())`
			`elif parse_code == 12:`
			`weekday = locale_time.a_weekday.index(found_dict["a"].lower())`
			`elif parse_code == 13:`
			`weekday = int(found_dict["w"])`
			`if weekday == 0:`
			`weekday = 6`
			`else:`
			`weekday -= 1`
			`elif parse_code == 14:`
			`julian = int(found_dict["j"])`
			`elif parse_code == 15 or parse_code == 16:`
			`week_of_year = int(found_dict[group_key])`
			`if group_key == "U":`
			`# U starts week on Sunday.`
			`week_of_year_start = 6`
			`else:`
			`# W starts week on Monday.`
			`week_of_year_start = 0`
			`elif parse_code == 17:`
			`tz = pytz.timezone(found_dict["Z"])`
			`elif parse_code == 19:`
			`tz = parse_timezone_directive(found_dict["z"])`
			`elif parse_code == 20:`
			`iso_year = int(found_dict["G"])`
			`elif parse_code == 21:`
			`iso_week = int(found_dict["V"])`
			`elif parse_code == 22:`
			`weekday = int(found_dict["u"])`
			`weekday -= 1`

			`# If we know the wk of the year and what day of that wk, we can figure`
			`# out the Julian day of the year.`
			`if julian == -1 and weekday != -1:`
			`if week_of_year != -1:`
			`week_starts_Mon = week_of_year_start == 0`
			`julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,`
			`week_starts_Mon)`
			`elif iso_year != -1 and iso_week != -1:`
			`year, julian = _calc_julian_from_V(iso_year, iso_week,`
			`weekday + 1)`
			`# Cannot pre-calculate date() since can change in Julian`
			`# calculation and thus could have different value for the day of the wk`
			`# calculation.`
			`if julian == -1:`
			`# Need to add 1 to result since first day of the year is 1, not`
			`# 0.`
			`ordinal = date(year, month, day).toordinal()`
			`julian = ordinal - date(year, 1, 1).toordinal() + 1`
			`else:`
			`# Assume that if they bothered to include Julian day it will`
			`# be accurate.`
			`datetime_result = date.fromordinal(`
			`(julian - 1) + date(year, 1, 1).toordinal())`
			`year = datetime_result.year`
			`month = datetime_result.month`
			`day = datetime_result.day`
			`if weekday == -1:`
			`weekday = date(year, month, day).weekday()`

			`dts.year = year`
			`dts.month = month`
			`dts.day = day`
			`dts.hour = hour`
			`dts.min = minute`
			`dts.sec = second`
			`dts.us = us`
			`dts.ps = ns * 1000`

			`iresult[i] = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts)`
			`check_dts_bounds(&dts)`

			`result_timezone[i] = tz`

			`except (ValueError, OutOfBoundsDatetime) as ex:`
			`ex.args = (`
			`f"{str(ex)}, at position {i}. You might want to try:\n"`
			" - passing `format` if your strings have a consistent format;\n"
			" - passing `format='ISO8601'` if your strings are "
			`"all ISO8601 but not necessarily in exactly the same format;\n"`
			" - passing `format='mixed'`, and the format will be "
			`"inferred for each element individually. "`
			"You might want to use `dayfirst` alongside this.",
			`)`
			`if is_coerce:`
			`iresult[i] = NPY_NAT`
			`continue`
			`elif is_raise:`
			`raise`
			`return values, []`

			`return result, result_timezone.base`


			`class TimeRE(_TimeRE):`
			`"""`
			`Handle conversion from format directives to regexes.`

			`Creates regexes for pattern matching a string of text containing`
			`time information`
			`"""`

			`def __init__(self, locale_time=None):`
			`"""`
			`Create keys/values.`

			`Order of execution is important for dependency reasons.`
			`"""`
			`self._Z = None`
			`super().__init__(locale_time=locale_time)`
			`# GH 48767: Overrides for cpython's TimeRE`
			`# 1) Parse up to nanos instead of micros`
			`self.update({"f": r"(?P<f>[0-9]{1,9})"}),`

			`def __getitem__(self, key):`
			`if key == "Z":`
			`# lazy computation`
			`if self._Z is None:`
			`self._Z = self.__seqToRE(pytz.all_timezones, "Z")`
			`# Note: handling Z is the key difference vs using the stdlib`
			`# _strptime.TimeRE. test_to_datetime_parse_tzname_or_tzoffset with`
			`# fmt='%Y-%m-%d %H:%M:%S %Z' fails with the stdlib version.`
			`return self._Z`
			`return super().__getitem__(key)`


			`_cache_lock = _thread_allocate_lock()`
			`# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock`
			`# first!`
			`_TimeRE_cache = TimeRE()`
			`_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache`
			`_regex_cache = {}`


			`cdef int _calc_julian_from_U_or_W(int year, int week_of_year,`
			`int day_of_week, int week_starts_Mon):`
			`"""`
			`Calculate the Julian day based on the year, week of the year, and day of`
			`the week, with week_start_day representing whether the week of the year`
			`assumes the week starts on Sunday or Monday (6 or 0).`

			`Parameters`
			`----------`
			`year : int`
			`the year`
			`week_of_year : int`
			`week taken from format U or W`
			`week_starts_Mon : int`
			`represents whether the week of the year`
			`assumes the week starts on Sunday or Monday (6 or 0)`

			`Returns`
			`-------`
			`int`
			`converted julian day`
			`"""`

			`cdef:`
			`int first_weekday, week_0_length, days_to_week`

			`first_weekday = date(year, 1, 1).weekday()`
			`# If we are dealing with the %U directive (week starts on Sunday), it's`
			`# easier to just shift the view to Sunday being the first day of the`
			`# week.`
			`if not week_starts_Mon:`
			`first_weekday = (first_weekday + 1) % 7`
			`day_of_week = (day_of_week + 1) % 7`

			`# Need to watch out for a week 0 (when the first day of the year is not`
			`# the same as that specified by %U or %W).`
			`week_0_length = (7 - first_weekday) % 7`
			`if week_of_year == 0:`
			`return 1 + day_of_week - first_weekday`
			`else:`
			`days_to_week = week_0_length + (7 * (week_of_year - 1))`
			`return 1 + days_to_week + day_of_week`


			`cdef (int, int) _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday):`
			`"""`
			`Calculate the Julian day based on the ISO 8601 year, week, and weekday.`

			`ISO weeks start on Mondays, with week 01 being the week containing 4 Jan.`
			`ISO week days range from 1 (Monday) to 7 (Sunday).`

			`Parameters`
			`----------`
			`iso_year : int`
			`the year taken from format %G`
			`iso_week : int`
			`the week taken from format %V`
			`iso_weekday : int`
			`weekday taken from format %u`

			`Returns`
			`-------`
			`(int, int)`
			`the iso year and the Gregorian ordinal date / julian date`
			`"""`

			`cdef:`
			`int correction, ordinal`

			`correction = date(iso_year, 1, 4).isoweekday() + 3`
			`ordinal = (iso_week * 7) + iso_weekday - correction`
			`# ordinal may be negative or 0 now, which means the date is in the previous`
			`# calendar year`
			`if ordinal < 1:`
			`ordinal += date(iso_year, 1, 1).toordinal()`
			`iso_year -= 1`
			`ordinal -= date(iso_year, 1, 1).toordinal()`
			`return iso_year, ordinal`


			`cdef tzinfo parse_timezone_directive(str z):`
			`"""`
			`Parse the '%z' directive and return a datetime.timezone object.`

			`Parameters`
			`----------`
			`z : string of the UTC offset`

			`Returns`
			`-------`
			`datetime.timezone`

			`Notes`
			`-----`
			`This is essentially similar to the cpython implementation`
			`https://github.com/python/cpython/blob/master/Lib/_strptime.py#L457-L479`
			`"""`

			`cdef:`
			`int hours, minutes, seconds, pad_number, microseconds`
			`int total_minutes`
			`object gmtoff_remainder, gmtoff_remainder_padding`

			`if z == "Z":`
			`return timezone(timedelta(0))`
			`if z[3] == ":":`
			`z = z[:3] + z[4:]`
			`if len(z) > 5:`
			`if z[5] != ":":`
			`raise ValueError(f"Inconsistent use of : in {z}")`
			`z = z[:5] + z[6:]`
			`hours = int(z[1:3])`
			`minutes = int(z[3:5])`
			`seconds = int(z[5:7] or 0)`

			`# Pad to always return microseconds.`
			`gmtoff_remainder = z[8:]`
			`pad_number = 6 - len(gmtoff_remainder)`
			`gmtoff_remainder_padding = "0" * pad_number`
			`microseconds = int(gmtoff_remainder + gmtoff_remainder_padding)`

			`total_minutes = ((hours * 60) + minutes + (seconds // 60) +`
			`(microseconds // 60_000_000))`
			`total_minutes = -total_minutes if z.startswith("-") else total_minutes`
			`return timezone(timedelta(minutes=total_minutes))`