Machine learning

This commit is contained in:
LixayTF 2023-09-20 19:46:58 +02:00
parent a27721b391
commit b56730eef8
10570 changed files with 2372494 additions and 0 deletions

View File

@ -0,0 +1,222 @@
# don't import any costly modules
import sys
import os
is_pypy = '__pypy__' in sys.builtin_module_names
def warn_distutils_present():
if 'distutils' not in sys.modules:
return
if is_pypy and sys.version_info < (3, 7):
# PyPy for 3.6 unconditionally imports distutils, so bypass the warning
# https://foss.heptapod.net/pypy/pypy/-/blob/be829135bc0d758997b3566062999ee8b23872b4/lib-python/3/site.py#L250
return
import warnings
warnings.warn(
"Distutils was imported before Setuptools, but importing Setuptools "
"also replaces the `distutils` module in `sys.modules`. This may lead "
"to undesirable behaviors or errors. To avoid these issues, avoid "
"using distutils directly, ensure that setuptools is installed in the "
"traditional way (e.g. not an editable install), and/or make sure "
"that setuptools is always imported before distutils."
)
def clear_distutils():
if 'distutils' not in sys.modules:
return
import warnings
warnings.warn("Setuptools is replacing distutils.")
mods = [
name
for name in sys.modules
if name == "distutils" or name.startswith("distutils.")
]
for name in mods:
del sys.modules[name]
def enabled():
"""
Allow selection of distutils by environment variable.
"""
which = os.environ.get('SETUPTOOLS_USE_DISTUTILS', 'local')
return which == 'local'
def ensure_local_distutils():
import importlib
clear_distutils()
# With the DistutilsMetaFinder in place,
# perform an import to cause distutils to be
# loaded from setuptools._distutils. Ref #2906.
with shim():
importlib.import_module('distutils')
# check that submodules load as expected
core = importlib.import_module('distutils.core')
assert '_distutils' in core.__file__, core.__file__
assert 'setuptools._distutils.log' not in sys.modules
def do_override():
"""
Ensure that the local copy of distutils is preferred over stdlib.
See https://github.com/pypa/setuptools/issues/417#issuecomment-392298401
for more motivation.
"""
if enabled():
warn_distutils_present()
ensure_local_distutils()
class _TrivialRe:
def __init__(self, *patterns):
self._patterns = patterns
def match(self, string):
return all(pat in string for pat in self._patterns)
class DistutilsMetaFinder:
def find_spec(self, fullname, path, target=None):
# optimization: only consider top level modules and those
# found in the CPython test suite.
if path is not None and not fullname.startswith('test.'):
return
method_name = 'spec_for_{fullname}'.format(**locals())
method = getattr(self, method_name, lambda: None)
return method()
def spec_for_distutils(self):
if self.is_cpython():
return
import importlib
import importlib.abc
import importlib.util
try:
mod = importlib.import_module('setuptools._distutils')
except Exception:
# There are a couple of cases where setuptools._distutils
# may not be present:
# - An older Setuptools without a local distutils is
# taking precedence. Ref #2957.
# - Path manipulation during sitecustomize removes
# setuptools from the path but only after the hook
# has been loaded. Ref #2980.
# In either case, fall back to stdlib behavior.
return
class DistutilsLoader(importlib.abc.Loader):
def create_module(self, spec):
mod.__name__ = 'distutils'
return mod
def exec_module(self, module):
pass
return importlib.util.spec_from_loader(
'distutils', DistutilsLoader(), origin=mod.__file__
)
@staticmethod
def is_cpython():
"""
Suppress supplying distutils for CPython (build and tests).
Ref #2965 and #3007.
"""
return os.path.isfile('pybuilddir.txt')
def spec_for_pip(self):
"""
Ensure stdlib distutils when running under pip.
See pypa/pip#8761 for rationale.
"""
if self.pip_imported_during_build():
return
clear_distutils()
self.spec_for_distutils = lambda: None
@classmethod
def pip_imported_during_build(cls):
"""
Detect if pip is being imported in a build script. Ref #2355.
"""
import traceback
return any(
cls.frame_file_is_setup(frame) for frame, line in traceback.walk_stack(None)
)
@staticmethod
def frame_file_is_setup(frame):
"""
Return True if the indicated frame suggests a setup.py file.
"""
# some frames may not have __file__ (#2940)
return frame.f_globals.get('__file__', '').endswith('setup.py')
def spec_for_sensitive_tests(self):
"""
Ensure stdlib distutils when running select tests under CPython.
python/cpython#91169
"""
clear_distutils()
self.spec_for_distutils = lambda: None
sensitive_tests = (
[
'test.test_distutils',
'test.test_peg_generator',
'test.test_importlib',
]
if sys.version_info < (3, 10)
else [
'test.test_distutils',
]
)
for name in DistutilsMetaFinder.sensitive_tests:
setattr(
DistutilsMetaFinder,
f'spec_for_{name}',
DistutilsMetaFinder.spec_for_sensitive_tests,
)
DISTUTILS_FINDER = DistutilsMetaFinder()
def add_shim():
DISTUTILS_FINDER in sys.meta_path or insert_shim()
class shim:
def __enter__(self):
insert_shim()
def __exit__(self, exc, value, tb):
remove_shim()
def insert_shim():
sys.meta_path.insert(0, DISTUTILS_FINDER)
def remove_shim():
try:
sys.meta_path.remove(DISTUTILS_FINDER)
except ValueError:
pass

View File

@ -0,0 +1 @@
__import__('_distutils_hack').do_override()

View File

@ -0,0 +1 @@
import _virtualenv

View File

@ -0,0 +1,130 @@
"""Patches that are applied at runtime to the virtual environment"""
# -*- coding: utf-8 -*-
import os
import sys
VIRTUALENV_PATCH_FILE = os.path.join(__file__)
def patch_dist(dist):
"""
Distutils allows user to configure some arguments via a configuration file:
https://docs.python.org/3/install/index.html#distutils-configuration-files
Some of this arguments though don't make sense in context of the virtual environment files, let's fix them up.
"""
# we cannot allow some install config as that would get packages installed outside of the virtual environment
old_parse_config_files = dist.Distribution.parse_config_files
def parse_config_files(self, *args, **kwargs):
result = old_parse_config_files(self, *args, **kwargs)
install = self.get_option_dict("install")
if "prefix" in install: # the prefix governs where to install the libraries
install["prefix"] = VIRTUALENV_PATCH_FILE, os.path.abspath(sys.prefix)
for base in ("purelib", "platlib", "headers", "scripts", "data"):
key = "install_{}".format(base)
if key in install: # do not allow global configs to hijack venv paths
install.pop(key, None)
return result
dist.Distribution.parse_config_files = parse_config_files
# Import hook that patches some modules to ignore configuration values that break package installation in case
# of virtual environments.
_DISTUTILS_PATCH = "distutils.dist", "setuptools.dist"
if sys.version_info > (3, 4):
# https://docs.python.org/3/library/importlib.html#setting-up-an-importer
class _Finder:
"""A meta path finder that allows patching the imported distutils modules"""
fullname = None
# lock[0] is threading.Lock(), but initialized lazily to avoid importing threading very early at startup,
# because there are gevent-based applications that need to be first to import threading by themselves.
# See https://github.com/pypa/virtualenv/issues/1895 for details.
lock = []
def find_spec(self, fullname, path, target=None): # noqa: U100
if fullname in _DISTUTILS_PATCH and self.fullname is None:
# initialize lock[0] lazily
if len(self.lock) == 0:
import threading
lock = threading.Lock()
# there is possibility that two threads T1 and T2 are simultaneously running into find_spec,
# observing .lock as empty, and further going into hereby initialization. However due to the GIL,
# list.append() operation is atomic and this way only one of the threads will "win" to put the lock
# - that every thread will use - into .lock[0].
# https://docs.python.org/3/faq/library.html#what-kinds-of-global-value-mutation-are-thread-safe
self.lock.append(lock)
from functools import partial
from importlib.util import find_spec
with self.lock[0]:
self.fullname = fullname
try:
spec = find_spec(fullname, path)
if spec is not None:
# https://www.python.org/dev/peps/pep-0451/#how-loading-will-work
is_new_api = hasattr(spec.loader, "exec_module")
func_name = "exec_module" if is_new_api else "load_module"
old = getattr(spec.loader, func_name)
func = self.exec_module if is_new_api else self.load_module
if old is not func:
try:
setattr(spec.loader, func_name, partial(func, old))
except AttributeError:
pass # C-Extension loaders are r/o such as zipimporter with <python 3.7
return spec
finally:
self.fullname = None
@staticmethod
def exec_module(old, module):
old(module)
if module.__name__ in _DISTUTILS_PATCH:
patch_dist(module)
@staticmethod
def load_module(old, name):
module = old(name)
if module.__name__ in _DISTUTILS_PATCH:
patch_dist(module)
return module
sys.meta_path.insert(0, _Finder())
else:
# https://www.python.org/dev/peps/pep-0302/
from imp import find_module
from pkgutil import ImpImporter, ImpLoader
class _VirtualenvImporter(object, ImpImporter):
def __init__(self, path=None):
object.__init__(self)
ImpImporter.__init__(self, path)
def find_module(self, fullname, path=None):
if fullname in _DISTUTILS_PATCH:
try:
return _VirtualenvLoader(fullname, *find_module(fullname.split(".")[-1], path))
except ImportError:
pass
return None
class _VirtualenvLoader(object, ImpLoader):
def __init__(self, fullname, file, filename, etc):
object.__init__(self)
ImpLoader.__init__(self, fullname, file, filename, etc)
def load_module(self, fullname):
module = super(_VirtualenvLoader, self).load_module(fullname)
patch_dist(module)
module.__loader__ = None # distlib fallback
return module
sys.meta_path.append(_VirtualenvImporter())

View File

@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
try:
from ._version import version as __version__
except ImportError:
__version__ = 'unknown'
__all__ = ['easter', 'parser', 'relativedelta', 'rrule', 'tz',
'utils', 'zoneinfo']

View File

@ -0,0 +1,43 @@
"""
Common code used in multiple modules.
"""
class weekday(object):
__slots__ = ["weekday", "n"]
def __init__(self, weekday, n=None):
self.weekday = weekday
self.n = n
def __call__(self, n):
if n == self.n:
return self
else:
return self.__class__(self.weekday, n)
def __eq__(self, other):
try:
if self.weekday != other.weekday or self.n != other.n:
return False
except AttributeError:
return False
return True
def __hash__(self):
return hash((
self.weekday,
self.n,
))
def __ne__(self, other):
return not (self == other)
def __repr__(self):
s = ("MO", "TU", "WE", "TH", "FR", "SA", "SU")[self.weekday]
if not self.n:
return s
else:
return "%s(%+d)" % (s, self.n)
# vim:ts=4:sw=4:et

View File

@ -0,0 +1,5 @@
# coding: utf-8
# file generated by setuptools_scm
# don't change, don't track in version control
version = '2.8.2'
version_tuple = (2, 8, 2)

View File

@ -0,0 +1,89 @@
# -*- coding: utf-8 -*-
"""
This module offers a generic Easter computing method for any given year, using
Western, Orthodox or Julian algorithms.
"""
import datetime
__all__ = ["easter", "EASTER_JULIAN", "EASTER_ORTHODOX", "EASTER_WESTERN"]
EASTER_JULIAN = 1
EASTER_ORTHODOX = 2
EASTER_WESTERN = 3
def easter(year, method=EASTER_WESTERN):
"""
This method was ported from the work done by GM Arts,
on top of the algorithm by Claus Tondering, which was
based in part on the algorithm of Ouding (1940), as
quoted in "Explanatory Supplement to the Astronomical
Almanac", P. Kenneth Seidelmann, editor.
This algorithm implements three different Easter
calculation methods:
1. Original calculation in Julian calendar, valid in
dates after 326 AD
2. Original method, with date converted to Gregorian
calendar, valid in years 1583 to 4099
3. Revised method, in Gregorian calendar, valid in
years 1583 to 4099 as well
These methods are represented by the constants:
* ``EASTER_JULIAN = 1``
* ``EASTER_ORTHODOX = 2``
* ``EASTER_WESTERN = 3``
The default method is method 3.
More about the algorithm may be found at:
`GM Arts: Easter Algorithms <http://www.gmarts.org/index.php?go=415>`_
and
`The Calendar FAQ: Easter <https://www.tondering.dk/claus/cal/easter.php>`_
"""
if not (1 <= method <= 3):
raise ValueError("invalid method")
# g - Golden year - 1
# c - Century
# h - (23 - Epact) mod 30
# i - Number of days from March 21 to Paschal Full Moon
# j - Weekday for PFM (0=Sunday, etc)
# p - Number of days from March 21 to Sunday on or before PFM
# (-6 to 28 methods 1 & 3, to 56 for method 2)
# e - Extra days to add for method 2 (converting Julian
# date to Gregorian date)
y = year
g = y % 19
e = 0
if method < 3:
# Old method
i = (19*g + 15) % 30
j = (y + y//4 + i) % 7
if method == 2:
# Extra dates to convert Julian to Gregorian date
e = 10
if y > 1600:
e = e + y//100 - 16 - (y//100 - 16)//4
else:
# New method
c = y//100
h = (c - c//4 - (8*c + 13)//25 + 19*g + 15) % 30
i = h - (h//28)*(1 - (h//28)*(29//(h + 1))*((21 - g)//11))
j = (y + y//4 + i + 2 - c + c//4) % 7
# p can be from -6 to 56 corresponding to dates 22 March to 23 May
# (later dates apply to method 2, although 23 May never actually occurs)
p = i - j + e
d = 1 + (p + 27 + (p + 6)//40) % 31
m = 3 + (p + 26)//30
return datetime.date(int(y), int(m), int(d))

View File

@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
from ._parser import parse, parser, parserinfo, ParserError
from ._parser import DEFAULTPARSER, DEFAULTTZPARSER
from ._parser import UnknownTimezoneWarning
from ._parser import __doc__
from .isoparser import isoparser, isoparse
__all__ = ['parse', 'parser', 'parserinfo',
'isoparse', 'isoparser',
'ParserError',
'UnknownTimezoneWarning']
###
# Deprecate portions of the private interface so that downstream code that
# is improperly relying on it is given *some* notice.
def __deprecated_private_func(f):
from functools import wraps
import warnings
msg = ('{name} is a private function and may break without warning, '
'it will be moved and or renamed in future versions.')
msg = msg.format(name=f.__name__)
@wraps(f)
def deprecated_func(*args, **kwargs):
warnings.warn(msg, DeprecationWarning)
return f(*args, **kwargs)
return deprecated_func
def __deprecate_private_class(c):
import warnings
msg = ('{name} is a private class and may break without warning, '
'it will be moved and or renamed in future versions.')
msg = msg.format(name=c.__name__)
class private_class(c):
__doc__ = c.__doc__
def __init__(self, *args, **kwargs):
warnings.warn(msg, DeprecationWarning)
super(private_class, self).__init__(*args, **kwargs)
private_class.__name__ = c.__name__
return private_class
from ._parser import _timelex, _resultbase
from ._parser import _tzparser, _parsetz
_timelex = __deprecate_private_class(_timelex)
_tzparser = __deprecate_private_class(_tzparser)
_resultbase = __deprecate_private_class(_resultbase)
_parsetz = __deprecated_private_func(_parsetz)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,416 @@
# -*- coding: utf-8 -*-
"""
This module offers a parser for ISO-8601 strings
It is intended to support all valid date, time and datetime formats per the
ISO-8601 specification.
..versionadded:: 2.7.0
"""
from datetime import datetime, timedelta, time, date
import calendar
from dateutil import tz
from functools import wraps
import re
import six
__all__ = ["isoparse", "isoparser"]
def _takes_ascii(f):
@wraps(f)
def func(self, str_in, *args, **kwargs):
# If it's a stream, read the whole thing
str_in = getattr(str_in, 'read', lambda: str_in)()
# If it's unicode, turn it into bytes, since ISO-8601 only covers ASCII
if isinstance(str_in, six.text_type):
# ASCII is the same in UTF-8
try:
str_in = str_in.encode('ascii')
except UnicodeEncodeError as e:
msg = 'ISO-8601 strings should contain only ASCII characters'
six.raise_from(ValueError(msg), e)
return f(self, str_in, *args, **kwargs)
return func
class isoparser(object):
def __init__(self, sep=None):
"""
:param sep:
A single character that separates date and time portions. If
``None``, the parser will accept any single character.
For strict ISO-8601 adherence, pass ``'T'``.
"""
if sep is not None:
if (len(sep) != 1 or ord(sep) >= 128 or sep in '0123456789'):
raise ValueError('Separator must be a single, non-numeric ' +
'ASCII character')
sep = sep.encode('ascii')
self._sep = sep
@_takes_ascii
def isoparse(self, dt_str):
"""
Parse an ISO-8601 datetime string into a :class:`datetime.datetime`.
An ISO-8601 datetime string consists of a date portion, followed
optionally by a time portion - the date and time portions are separated
by a single character separator, which is ``T`` in the official
standard. Incomplete date formats (such as ``YYYY-MM``) may *not* be
combined with a time portion.
Supported date formats are:
Common:
- ``YYYY``
- ``YYYY-MM`` or ``YYYYMM``
- ``YYYY-MM-DD`` or ``YYYYMMDD``
Uncommon:
- ``YYYY-Www`` or ``YYYYWww`` - ISO week (day defaults to 0)
- ``YYYY-Www-D`` or ``YYYYWwwD`` - ISO week and day
The ISO week and day numbering follows the same logic as
:func:`datetime.date.isocalendar`.
Supported time formats are:
- ``hh``
- ``hh:mm`` or ``hhmm``
- ``hh:mm:ss`` or ``hhmmss``
- ``hh:mm:ss.ssssss`` (Up to 6 sub-second digits)
Midnight is a special case for `hh`, as the standard supports both
00:00 and 24:00 as a representation. The decimal separator can be
either a dot or a comma.
.. caution::
Support for fractional components other than seconds is part of the
ISO-8601 standard, but is not currently implemented in this parser.
Supported time zone offset formats are:
- `Z` (UTC)
- `±HH:MM`
- `±HHMM`
- `±HH`
Offsets will be represented as :class:`dateutil.tz.tzoffset` objects,
with the exception of UTC, which will be represented as
:class:`dateutil.tz.tzutc`. Time zone offsets equivalent to UTC (such
as `+00:00`) will also be represented as :class:`dateutil.tz.tzutc`.
:param dt_str:
A string or stream containing only an ISO-8601 datetime string
:return:
Returns a :class:`datetime.datetime` representing the string.
Unspecified components default to their lowest value.
.. warning::
As of version 2.7.0, the strictness of the parser should not be
considered a stable part of the contract. Any valid ISO-8601 string
that parses correctly with the default settings will continue to
parse correctly in future versions, but invalid strings that
currently fail (e.g. ``2017-01-01T00:00+00:00:00``) are not
guaranteed to continue failing in future versions if they encode
a valid date.
.. versionadded:: 2.7.0
"""
components, pos = self._parse_isodate(dt_str)
if len(dt_str) > pos:
if self._sep is None or dt_str[pos:pos + 1] == self._sep:
components += self._parse_isotime(dt_str[pos + 1:])
else:
raise ValueError('String contains unknown ISO components')
if len(components) > 3 and components[3] == 24:
components[3] = 0
return datetime(*components) + timedelta(days=1)
return datetime(*components)
@_takes_ascii
def parse_isodate(self, datestr):
"""
Parse the date portion of an ISO string.
:param datestr:
The string portion of an ISO string, without a separator
:return:
Returns a :class:`datetime.date` object
"""
components, pos = self._parse_isodate(datestr)
if pos < len(datestr):
raise ValueError('String contains unknown ISO ' +
'components: {!r}'.format(datestr.decode('ascii')))
return date(*components)
@_takes_ascii
def parse_isotime(self, timestr):
"""
Parse the time portion of an ISO string.
:param timestr:
The time portion of an ISO string, without a separator
:return:
Returns a :class:`datetime.time` object
"""
components = self._parse_isotime(timestr)
if components[0] == 24:
components[0] = 0
return time(*components)
@_takes_ascii
def parse_tzstr(self, tzstr, zero_as_utc=True):
"""
Parse a valid ISO time zone string.
See :func:`isoparser.isoparse` for details on supported formats.
:param tzstr:
A string representing an ISO time zone offset
:param zero_as_utc:
Whether to return :class:`dateutil.tz.tzutc` for zero-offset zones
:return:
Returns :class:`dateutil.tz.tzoffset` for offsets and
:class:`dateutil.tz.tzutc` for ``Z`` and (if ``zero_as_utc`` is
specified) offsets equivalent to UTC.
"""
return self._parse_tzstr(tzstr, zero_as_utc=zero_as_utc)
# Constants
_DATE_SEP = b'-'
_TIME_SEP = b':'
_FRACTION_REGEX = re.compile(b'[\\.,]([0-9]+)')
def _parse_isodate(self, dt_str):
try:
return self._parse_isodate_common(dt_str)
except ValueError:
return self._parse_isodate_uncommon(dt_str)
def _parse_isodate_common(self, dt_str):
len_str = len(dt_str)
components = [1, 1, 1]
if len_str < 4:
raise ValueError('ISO string too short')
# Year
components[0] = int(dt_str[0:4])
pos = 4
if pos >= len_str:
return components, pos
has_sep = dt_str[pos:pos + 1] == self._DATE_SEP
if has_sep:
pos += 1
# Month
if len_str - pos < 2:
raise ValueError('Invalid common month')
components[1] = int(dt_str[pos:pos + 2])
pos += 2
if pos >= len_str:
if has_sep:
return components, pos
else:
raise ValueError('Invalid ISO format')
if has_sep:
if dt_str[pos:pos + 1] != self._DATE_SEP:
raise ValueError('Invalid separator in ISO string')
pos += 1
# Day
if len_str - pos < 2:
raise ValueError('Invalid common day')
components[2] = int(dt_str[pos:pos + 2])
return components, pos + 2
def _parse_isodate_uncommon(self, dt_str):
if len(dt_str) < 4:
raise ValueError('ISO string too short')
# All ISO formats start with the year
year = int(dt_str[0:4])
has_sep = dt_str[4:5] == self._DATE_SEP
pos = 4 + has_sep # Skip '-' if it's there
if dt_str[pos:pos + 1] == b'W':
# YYYY-?Www-?D?
pos += 1
weekno = int(dt_str[pos:pos + 2])
pos += 2
dayno = 1
if len(dt_str) > pos:
if (dt_str[pos:pos + 1] == self._DATE_SEP) != has_sep:
raise ValueError('Inconsistent use of dash separator')
pos += has_sep
dayno = int(dt_str[pos:pos + 1])
pos += 1
base_date = self._calculate_weekdate(year, weekno, dayno)
else:
# YYYYDDD or YYYY-DDD
if len(dt_str) - pos < 3:
raise ValueError('Invalid ordinal day')
ordinal_day = int(dt_str[pos:pos + 3])
pos += 3
if ordinal_day < 1 or ordinal_day > (365 + calendar.isleap(year)):
raise ValueError('Invalid ordinal day' +
' {} for year {}'.format(ordinal_day, year))
base_date = date(year, 1, 1) + timedelta(days=ordinal_day - 1)
components = [base_date.year, base_date.month, base_date.day]
return components, pos
def _calculate_weekdate(self, year, week, day):
"""
Calculate the day of corresponding to the ISO year-week-day calendar.
This function is effectively the inverse of
:func:`datetime.date.isocalendar`.
:param year:
The year in the ISO calendar
:param week:
The week in the ISO calendar - range is [1, 53]
:param day:
The day in the ISO calendar - range is [1 (MON), 7 (SUN)]
:return:
Returns a :class:`datetime.date`
"""
if not 0 < week < 54:
raise ValueError('Invalid week: {}'.format(week))
if not 0 < day < 8: # Range is 1-7
raise ValueError('Invalid weekday: {}'.format(day))
# Get week 1 for the specific year:
jan_4 = date(year, 1, 4) # Week 1 always has January 4th in it
week_1 = jan_4 - timedelta(days=jan_4.isocalendar()[2] - 1)
# Now add the specific number of weeks and days to get what we want
week_offset = (week - 1) * 7 + (day - 1)
return week_1 + timedelta(days=week_offset)
def _parse_isotime(self, timestr):
len_str = len(timestr)
components = [0, 0, 0, 0, None]
pos = 0
comp = -1
if len_str < 2:
raise ValueError('ISO time too short')
has_sep = False
while pos < len_str and comp < 5:
comp += 1
if timestr[pos:pos + 1] in b'-+Zz':
# Detect time zone boundary
components[-1] = self._parse_tzstr(timestr[pos:])
pos = len_str
break
if comp == 1 and timestr[pos:pos+1] == self._TIME_SEP:
has_sep = True
pos += 1
elif comp == 2 and has_sep:
if timestr[pos:pos+1] != self._TIME_SEP:
raise ValueError('Inconsistent use of colon separator')
pos += 1
if comp < 3:
# Hour, minute, second
components[comp] = int(timestr[pos:pos + 2])
pos += 2
if comp == 3:
# Fraction of a second
frac = self._FRACTION_REGEX.match(timestr[pos:])
if not frac:
continue
us_str = frac.group(1)[:6] # Truncate to microseconds
components[comp] = int(us_str) * 10**(6 - len(us_str))
pos += len(frac.group())
if pos < len_str:
raise ValueError('Unused components in ISO string')
if components[0] == 24:
# Standard supports 00:00 and 24:00 as representations of midnight
if any(component != 0 for component in components[1:4]):
raise ValueError('Hour may only be 24 at 24:00:00.000')
return components
def _parse_tzstr(self, tzstr, zero_as_utc=True):
if tzstr == b'Z' or tzstr == b'z':
return tz.UTC
if len(tzstr) not in {3, 5, 6}:
raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters')
if tzstr[0:1] == b'-':
mult = -1
elif tzstr[0:1] == b'+':
mult = 1
else:
raise ValueError('Time zone offset requires sign')
hours = int(tzstr[1:3])
if len(tzstr) == 3:
minutes = 0
else:
minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3):])
if zero_as_utc and hours == 0 and minutes == 0:
return tz.UTC
else:
if minutes > 59:
raise ValueError('Invalid minutes in time zone offset')
if hours > 23:
raise ValueError('Invalid hours in time zone offset')
return tz.tzoffset(None, mult * (hours * 60 + minutes) * 60)
DEFAULT_ISOPARSER = isoparser()
isoparse = DEFAULT_ISOPARSER.isoparse

View File

@ -0,0 +1,599 @@
# -*- coding: utf-8 -*-
import datetime
import calendar
import operator
from math import copysign
from six import integer_types
from warnings import warn
from ._common import weekday
MO, TU, WE, TH, FR, SA, SU = weekdays = tuple(weekday(x) for x in range(7))
__all__ = ["relativedelta", "MO", "TU", "WE", "TH", "FR", "SA", "SU"]
class relativedelta(object):
"""
The relativedelta type is designed to be applied to an existing datetime and
can replace specific components of that datetime, or represents an interval
of time.
It is based on the specification of the excellent work done by M.-A. Lemburg
in his
`mx.DateTime <https://www.egenix.com/products/python/mxBase/mxDateTime/>`_ extension.
However, notice that this type does *NOT* implement the same algorithm as
his work. Do *NOT* expect it to behave like mx.DateTime's counterpart.
There are two different ways to build a relativedelta instance. The
first one is passing it two date/datetime classes::
relativedelta(datetime1, datetime2)
The second one is passing it any number of the following keyword arguments::
relativedelta(arg1=x,arg2=y,arg3=z...)
year, month, day, hour, minute, second, microsecond:
Absolute information (argument is singular); adding or subtracting a
relativedelta with absolute information does not perform an arithmetic
operation, but rather REPLACES the corresponding value in the
original datetime with the value(s) in relativedelta.
years, months, weeks, days, hours, minutes, seconds, microseconds:
Relative information, may be negative (argument is plural); adding
or subtracting a relativedelta with relative information performs
the corresponding arithmetic operation on the original datetime value
with the information in the relativedelta.
weekday:
One of the weekday instances (MO, TU, etc) available in the
relativedelta module. These instances may receive a parameter N,
specifying the Nth weekday, which could be positive or negative
(like MO(+1) or MO(-2)). Not specifying it is the same as specifying
+1. You can also use an integer, where 0=MO. This argument is always
relative e.g. if the calculated date is already Monday, using MO(1)
or MO(-1) won't change the day. To effectively make it absolute, use
it in combination with the day argument (e.g. day=1, MO(1) for first
Monday of the month).
leapdays:
Will add given days to the date found, if year is a leap
year, and the date found is post 28 of february.
yearday, nlyearday:
Set the yearday or the non-leap year day (jump leap days).
These are converted to day/month/leapdays information.
There are relative and absolute forms of the keyword
arguments. The plural is relative, and the singular is
absolute. For each argument in the order below, the absolute form
is applied first (by setting each attribute to that value) and
then the relative form (by adding the value to the attribute).
The order of attributes considered when this relativedelta is
added to a datetime is:
1. Year
2. Month
3. Day
4. Hours
5. Minutes
6. Seconds
7. Microseconds
Finally, weekday is applied, using the rule described above.
For example
>>> from datetime import datetime
>>> from dateutil.relativedelta import relativedelta, MO
>>> dt = datetime(2018, 4, 9, 13, 37, 0)
>>> delta = relativedelta(hours=25, day=1, weekday=MO(1))
>>> dt + delta
datetime.datetime(2018, 4, 2, 14, 37)
First, the day is set to 1 (the first of the month), then 25 hours
are added, to get to the 2nd day and 14th hour, finally the
weekday is applied, but since the 2nd is already a Monday there is
no effect.
"""
def __init__(self, dt1=None, dt2=None,
years=0, months=0, days=0, leapdays=0, weeks=0,
hours=0, minutes=0, seconds=0, microseconds=0,
year=None, month=None, day=None, weekday=None,
yearday=None, nlyearday=None,
hour=None, minute=None, second=None, microsecond=None):
if dt1 and dt2:
# datetime is a subclass of date. So both must be date
if not (isinstance(dt1, datetime.date) and
isinstance(dt2, datetime.date)):
raise TypeError("relativedelta only diffs datetime/date")
# We allow two dates, or two datetimes, so we coerce them to be
# of the same type
if (isinstance(dt1, datetime.datetime) !=
isinstance(dt2, datetime.datetime)):
if not isinstance(dt1, datetime.datetime):
dt1 = datetime.datetime.fromordinal(dt1.toordinal())
elif not isinstance(dt2, datetime.datetime):
dt2 = datetime.datetime.fromordinal(dt2.toordinal())
self.years = 0
self.months = 0
self.days = 0
self.leapdays = 0
self.hours = 0
self.minutes = 0
self.seconds = 0
self.microseconds = 0
self.year = None
self.month = None
self.day = None
self.weekday = None
self.hour = None
self.minute = None
self.second = None
self.microsecond = None
self._has_time = 0
# Get year / month delta between the two
months = (dt1.year - dt2.year) * 12 + (dt1.month - dt2.month)
self._set_months(months)
# Remove the year/month delta so the timedelta is just well-defined
# time units (seconds, days and microseconds)
dtm = self.__radd__(dt2)
# If we've overshot our target, make an adjustment
if dt1 < dt2:
compare = operator.gt
increment = 1
else:
compare = operator.lt
increment = -1
while compare(dt1, dtm):
months += increment
self._set_months(months)
dtm = self.__radd__(dt2)
# Get the timedelta between the "months-adjusted" date and dt1
delta = dt1 - dtm
self.seconds = delta.seconds + delta.days * 86400
self.microseconds = delta.microseconds
else:
# Check for non-integer values in integer-only quantities
if any(x is not None and x != int(x) for x in (years, months)):
raise ValueError("Non-integer years and months are "
"ambiguous and not currently supported.")
# Relative information
self.years = int(years)
self.months = int(months)
self.days = days + weeks * 7
self.leapdays = leapdays
self.hours = hours
self.minutes = minutes
self.seconds = seconds
self.microseconds = microseconds
# Absolute information
self.year = year
self.month = month
self.day = day
self.hour = hour
self.minute = minute
self.second = second
self.microsecond = microsecond
if any(x is not None and int(x) != x
for x in (year, month, day, hour,
minute, second, microsecond)):
# For now we'll deprecate floats - later it'll be an error.
warn("Non-integer value passed as absolute information. " +
"This is not a well-defined condition and will raise " +
"errors in future versions.", DeprecationWarning)
if isinstance(weekday, integer_types):
self.weekday = weekdays[weekday]
else:
self.weekday = weekday
yday = 0
if nlyearday:
yday = nlyearday
elif yearday:
yday = yearday
if yearday > 59:
self.leapdays = -1
if yday:
ydayidx = [31, 59, 90, 120, 151, 181, 212,
243, 273, 304, 334, 366]
for idx, ydays in enumerate(ydayidx):
if yday <= ydays:
self.month = idx+1
if idx == 0:
self.day = yday
else:
self.day = yday-ydayidx[idx-1]
break
else:
raise ValueError("invalid year day (%d)" % yday)
self._fix()
def _fix(self):
if abs(self.microseconds) > 999999:
s = _sign(self.microseconds)
div, mod = divmod(self.microseconds * s, 1000000)
self.microseconds = mod * s
self.seconds += div * s
if abs(self.seconds) > 59:
s = _sign(self.seconds)
div, mod = divmod(self.seconds * s, 60)
self.seconds = mod * s
self.minutes += div * s
if abs(self.minutes) > 59:
s = _sign(self.minutes)
div, mod = divmod(self.minutes * s, 60)
self.minutes = mod * s
self.hours += div * s
if abs(self.hours) > 23:
s = _sign(self.hours)
div, mod = divmod(self.hours * s, 24)
self.hours = mod * s
self.days += div * s
if abs(self.months) > 11:
s = _sign(self.months)
div, mod = divmod(self.months * s, 12)
self.months = mod * s
self.years += div * s
if (self.hours or self.minutes or self.seconds or self.microseconds
or self.hour is not None or self.minute is not None or
self.second is not None or self.microsecond is not None):
self._has_time = 1
else:
self._has_time = 0
@property
def weeks(self):
return int(self.days / 7.0)
@weeks.setter
def weeks(self, value):
self.days = self.days - (self.weeks * 7) + value * 7
def _set_months(self, months):
self.months = months
if abs(self.months) > 11:
s = _sign(self.months)
div, mod = divmod(self.months * s, 12)
self.months = mod * s
self.years = div * s
else:
self.years = 0
def normalized(self):
"""
Return a version of this object represented entirely using integer
values for the relative attributes.
>>> relativedelta(days=1.5, hours=2).normalized()
relativedelta(days=+1, hours=+14)
:return:
Returns a :class:`dateutil.relativedelta.relativedelta` object.
"""
# Cascade remainders down (rounding each to roughly nearest microsecond)
days = int(self.days)
hours_f = round(self.hours + 24 * (self.days - days), 11)
hours = int(hours_f)
minutes_f = round(self.minutes + 60 * (hours_f - hours), 10)
minutes = int(minutes_f)
seconds_f = round(self.seconds + 60 * (minutes_f - minutes), 8)
seconds = int(seconds_f)
microseconds = round(self.microseconds + 1e6 * (seconds_f - seconds))
# Constructor carries overflow back up with call to _fix()
return self.__class__(years=self.years, months=self.months,
days=days, hours=hours, minutes=minutes,
seconds=seconds, microseconds=microseconds,
leapdays=self.leapdays, year=self.year,
month=self.month, day=self.day,
weekday=self.weekday, hour=self.hour,
minute=self.minute, second=self.second,
microsecond=self.microsecond)
def __add__(self, other):
if isinstance(other, relativedelta):
return self.__class__(years=other.years + self.years,
months=other.months + self.months,
days=other.days + self.days,
hours=other.hours + self.hours,
minutes=other.minutes + self.minutes,
seconds=other.seconds + self.seconds,
microseconds=(other.microseconds +
self.microseconds),
leapdays=other.leapdays or self.leapdays,
year=(other.year if other.year is not None
else self.year),
month=(other.month if other.month is not None
else self.month),
day=(other.day if other.day is not None
else self.day),
weekday=(other.weekday if other.weekday is not None
else self.weekday),
hour=(other.hour if other.hour is not None
else self.hour),
minute=(other.minute if other.minute is not None
else self.minute),
second=(other.second if other.second is not None
else self.second),
microsecond=(other.microsecond if other.microsecond
is not None else
self.microsecond))
if isinstance(other, datetime.timedelta):
return self.__class__(years=self.years,
months=self.months,
days=self.days + other.days,
hours=self.hours,
minutes=self.minutes,
seconds=self.seconds + other.seconds,
microseconds=self.microseconds + other.microseconds,
leapdays=self.leapdays,
year=self.year,
month=self.month,
day=self.day,
weekday=self.weekday,
hour=self.hour,
minute=self.minute,
second=self.second,
microsecond=self.microsecond)
if not isinstance(other, datetime.date):
return NotImplemented
elif self._has_time and not isinstance(other, datetime.datetime):
other = datetime.datetime.fromordinal(other.toordinal())
year = (self.year or other.year)+self.years
month = self.month or other.month
if self.months:
assert 1 <= abs(self.months) <= 12
month += self.months
if month > 12:
year += 1
month -= 12
elif month < 1:
year -= 1
month += 12
day = min(calendar.monthrange(year, month)[1],
self.day or other.day)
repl = {"year": year, "month": month, "day": day}
for attr in ["hour", "minute", "second", "microsecond"]:
value = getattr(self, attr)
if value is not None:
repl[attr] = value
days = self.days
if self.leapdays and month > 2 and calendar.isleap(year):
days += self.leapdays
ret = (other.replace(**repl)
+ datetime.timedelta(days=days,
hours=self.hours,
minutes=self.minutes,
seconds=self.seconds,
microseconds=self.microseconds))
if self.weekday:
weekday, nth = self.weekday.weekday, self.weekday.n or 1
jumpdays = (abs(nth) - 1) * 7
if nth > 0:
jumpdays += (7 - ret.weekday() + weekday) % 7
else:
jumpdays += (ret.weekday() - weekday) % 7
jumpdays *= -1
ret += datetime.timedelta(days=jumpdays)
return ret
def __radd__(self, other):
return self.__add__(other)
def __rsub__(self, other):
return self.__neg__().__radd__(other)
def __sub__(self, other):
if not isinstance(other, relativedelta):
return NotImplemented # In case the other object defines __rsub__
return self.__class__(years=self.years - other.years,
months=self.months - other.months,
days=self.days - other.days,
hours=self.hours - other.hours,
minutes=self.minutes - other.minutes,
seconds=self.seconds - other.seconds,
microseconds=self.microseconds - other.microseconds,
leapdays=self.leapdays or other.leapdays,
year=(self.year if self.year is not None
else other.year),
month=(self.month if self.month is not None else
other.month),
day=(self.day if self.day is not None else
other.day),
weekday=(self.weekday if self.weekday is not None else
other.weekday),
hour=(self.hour if self.hour is not None else
other.hour),
minute=(self.minute if self.minute is not None else
other.minute),
second=(self.second if self.second is not None else
other.second),
microsecond=(self.microsecond if self.microsecond
is not None else
other.microsecond))
def __abs__(self):
return self.__class__(years=abs(self.years),
months=abs(self.months),
days=abs(self.days),
hours=abs(self.hours),
minutes=abs(self.minutes),
seconds=abs(self.seconds),
microseconds=abs(self.microseconds),
leapdays=self.leapdays,
year=self.year,
month=self.month,
day=self.day,
weekday=self.weekday,
hour=self.hour,
minute=self.minute,
second=self.second,
microsecond=self.microsecond)
def __neg__(self):
return self.__class__(years=-self.years,
months=-self.months,
days=-self.days,
hours=-self.hours,
minutes=-self.minutes,
seconds=-self.seconds,
microseconds=-self.microseconds,
leapdays=self.leapdays,
year=self.year,
month=self.month,
day=self.day,
weekday=self.weekday,
hour=self.hour,
minute=self.minute,
second=self.second,
microsecond=self.microsecond)
def __bool__(self):
return not (not self.years and
not self.months and
not self.days and
not self.hours and
not self.minutes and
not self.seconds and
not self.microseconds and
not self.leapdays and
self.year is None and
self.month is None and
self.day is None and
self.weekday is None and
self.hour is None and
self.minute is None and
self.second is None and
self.microsecond is None)
# Compatibility with Python 2.x
__nonzero__ = __bool__
def __mul__(self, other):
try:
f = float(other)
except TypeError:
return NotImplemented
return self.__class__(years=int(self.years * f),
months=int(self.months * f),
days=int(self.days * f),
hours=int(self.hours * f),
minutes=int(self.minutes * f),
seconds=int(self.seconds * f),
microseconds=int(self.microseconds * f),
leapdays=self.leapdays,
year=self.year,
month=self.month,
day=self.day,
weekday=self.weekday,
hour=self.hour,
minute=self.minute,
second=self.second,
microsecond=self.microsecond)
__rmul__ = __mul__
def __eq__(self, other):
if not isinstance(other, relativedelta):
return NotImplemented
if self.weekday or other.weekday:
if not self.weekday or not other.weekday:
return False
if self.weekday.weekday != other.weekday.weekday:
return False
n1, n2 = self.weekday.n, other.weekday.n
if n1 != n2 and not ((not n1 or n1 == 1) and (not n2 or n2 == 1)):
return False
return (self.years == other.years and
self.months == other.months and
self.days == other.days and
self.hours == other.hours and
self.minutes == other.minutes and
self.seconds == other.seconds and
self.microseconds == other.microseconds and
self.leapdays == other.leapdays and
self.year == other.year and
self.month == other.month and
self.day == other.day and
self.hour == other.hour and
self.minute == other.minute and
self.second == other.second and
self.microsecond == other.microsecond)
def __hash__(self):
return hash((
self.weekday,
self.years,
self.months,
self.days,
self.hours,
self.minutes,
self.seconds,
self.microseconds,
self.leapdays,
self.year,
self.month,
self.day,
self.hour,
self.minute,
self.second,
self.microsecond,
))
def __ne__(self, other):
return not self.__eq__(other)
def __div__(self, other):
try:
reciprocal = 1 / float(other)
except TypeError:
return NotImplemented
return self.__mul__(reciprocal)
__truediv__ = __div__
def __repr__(self):
l = []
for attr in ["years", "months", "days", "leapdays",
"hours", "minutes", "seconds", "microseconds"]:
value = getattr(self, attr)
if value:
l.append("{attr}={value:+g}".format(attr=attr, value=value))
for attr in ["year", "month", "day", "weekday",
"hour", "minute", "second", "microsecond"]:
value = getattr(self, attr)
if value is not None:
l.append("{attr}={value}".format(attr=attr, value=repr(value)))
return "{classname}({attrs})".format(classname=self.__class__.__name__,
attrs=", ".join(l))
def _sign(x):
return int(copysign(1, x))
# vim:ts=4:sw=4:et

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,12 @@
# -*- coding: utf-8 -*-
from .tz import *
from .tz import __doc__
__all__ = ["tzutc", "tzoffset", "tzlocal", "tzfile", "tzrange",
"tzstr", "tzical", "tzwin", "tzwinlocal", "gettz",
"enfold", "datetime_ambiguous", "datetime_exists",
"resolve_imaginary", "UTC", "DeprecatedTzFormatWarning"]
class DeprecatedTzFormatWarning(Warning):
"""Warning raised when time zones are parsed from deprecated formats."""

View File

@ -0,0 +1,419 @@
from six import PY2
from functools import wraps
from datetime import datetime, timedelta, tzinfo
ZERO = timedelta(0)
__all__ = ['tzname_in_python2', 'enfold']
def tzname_in_python2(namefunc):
"""Change unicode output into bytestrings in Python 2
tzname() API changed in Python 3. It used to return bytes, but was changed
to unicode strings
"""
if PY2:
@wraps(namefunc)
def adjust_encoding(*args, **kwargs):
name = namefunc(*args, **kwargs)
if name is not None:
name = name.encode()
return name
return adjust_encoding
else:
return namefunc
# The following is adapted from Alexander Belopolsky's tz library
# https://github.com/abalkin/tz
if hasattr(datetime, 'fold'):
# This is the pre-python 3.6 fold situation
def enfold(dt, fold=1):
"""
Provides a unified interface for assigning the ``fold`` attribute to
datetimes both before and after the implementation of PEP-495.
:param fold:
The value for the ``fold`` attribute in the returned datetime. This
should be either 0 or 1.
:return:
Returns an object for which ``getattr(dt, 'fold', 0)`` returns
``fold`` for all versions of Python. In versions prior to
Python 3.6, this is a ``_DatetimeWithFold`` object, which is a
subclass of :py:class:`datetime.datetime` with the ``fold``
attribute added, if ``fold`` is 1.
.. versionadded:: 2.6.0
"""
return dt.replace(fold=fold)
else:
class _DatetimeWithFold(datetime):
"""
This is a class designed to provide a PEP 495-compliant interface for
Python versions before 3.6. It is used only for dates in a fold, so
the ``fold`` attribute is fixed at ``1``.
.. versionadded:: 2.6.0
"""
__slots__ = ()
def replace(self, *args, **kwargs):
"""
Return a datetime with the same attributes, except for those
attributes given new values by whichever keyword arguments are
specified. Note that tzinfo=None can be specified to create a naive
datetime from an aware datetime with no conversion of date and time
data.
This is reimplemented in ``_DatetimeWithFold`` because pypy3 will
return a ``datetime.datetime`` even if ``fold`` is unchanged.
"""
argnames = (
'year', 'month', 'day', 'hour', 'minute', 'second',
'microsecond', 'tzinfo'
)
for arg, argname in zip(args, argnames):
if argname in kwargs:
raise TypeError('Duplicate argument: {}'.format(argname))
kwargs[argname] = arg
for argname in argnames:
if argname not in kwargs:
kwargs[argname] = getattr(self, argname)
dt_class = self.__class__ if kwargs.get('fold', 1) else datetime
return dt_class(**kwargs)
@property
def fold(self):
return 1
def enfold(dt, fold=1):
"""
Provides a unified interface for assigning the ``fold`` attribute to
datetimes both before and after the implementation of PEP-495.
:param fold:
The value for the ``fold`` attribute in the returned datetime. This
should be either 0 or 1.
:return:
Returns an object for which ``getattr(dt, 'fold', 0)`` returns
``fold`` for all versions of Python. In versions prior to
Python 3.6, this is a ``_DatetimeWithFold`` object, which is a
subclass of :py:class:`datetime.datetime` with the ``fold``
attribute added, if ``fold`` is 1.
.. versionadded:: 2.6.0
"""
if getattr(dt, 'fold', 0) == fold:
return dt
args = dt.timetuple()[:6]
args += (dt.microsecond, dt.tzinfo)
if fold:
return _DatetimeWithFold(*args)
else:
return datetime(*args)
def _validate_fromutc_inputs(f):
"""
The CPython version of ``fromutc`` checks that the input is a ``datetime``
object and that ``self`` is attached as its ``tzinfo``.
"""
@wraps(f)
def fromutc(self, dt):
if not isinstance(dt, datetime):
raise TypeError("fromutc() requires a datetime argument")
if dt.tzinfo is not self:
raise ValueError("dt.tzinfo is not self")
return f(self, dt)
return fromutc
class _tzinfo(tzinfo):
"""
Base class for all ``dateutil`` ``tzinfo`` objects.
"""
def is_ambiguous(self, dt):
"""
Whether or not the "wall time" of a given datetime is ambiguous in this
zone.
:param dt:
A :py:class:`datetime.datetime`, naive or time zone aware.
:return:
Returns ``True`` if ambiguous, ``False`` otherwise.
.. versionadded:: 2.6.0
"""
dt = dt.replace(tzinfo=self)
wall_0 = enfold(dt, fold=0)
wall_1 = enfold(dt, fold=1)
same_offset = wall_0.utcoffset() == wall_1.utcoffset()
same_dt = wall_0.replace(tzinfo=None) == wall_1.replace(tzinfo=None)
return same_dt and not same_offset
def _fold_status(self, dt_utc, dt_wall):
"""
Determine the fold status of a "wall" datetime, given a representation
of the same datetime as a (naive) UTC datetime. This is calculated based
on the assumption that ``dt.utcoffset() - dt.dst()`` is constant for all
datetimes, and that this offset is the actual number of hours separating
``dt_utc`` and ``dt_wall``.
:param dt_utc:
Representation of the datetime as UTC
:param dt_wall:
Representation of the datetime as "wall time". This parameter must
either have a `fold` attribute or have a fold-naive
:class:`datetime.tzinfo` attached, otherwise the calculation may
fail.
"""
if self.is_ambiguous(dt_wall):
delta_wall = dt_wall - dt_utc
_fold = int(delta_wall == (dt_utc.utcoffset() - dt_utc.dst()))
else:
_fold = 0
return _fold
def _fold(self, dt):
return getattr(dt, 'fold', 0)
def _fromutc(self, dt):
"""
Given a timezone-aware datetime in a given timezone, calculates a
timezone-aware datetime in a new timezone.
Since this is the one time that we *know* we have an unambiguous
datetime object, we take this opportunity to determine whether the
datetime is ambiguous and in a "fold" state (e.g. if it's the first
occurrence, chronologically, of the ambiguous datetime).
:param dt:
A timezone-aware :class:`datetime.datetime` object.
"""
# Re-implement the algorithm from Python's datetime.py
dtoff = dt.utcoffset()
if dtoff is None:
raise ValueError("fromutc() requires a non-None utcoffset() "
"result")
# The original datetime.py code assumes that `dst()` defaults to
# zero during ambiguous times. PEP 495 inverts this presumption, so
# for pre-PEP 495 versions of python, we need to tweak the algorithm.
dtdst = dt.dst()
if dtdst is None:
raise ValueError("fromutc() requires a non-None dst() result")
delta = dtoff - dtdst
dt += delta
# Set fold=1 so we can default to being in the fold for
# ambiguous dates.
dtdst = enfold(dt, fold=1).dst()
if dtdst is None:
raise ValueError("fromutc(): dt.dst gave inconsistent "
"results; cannot convert")
return dt + dtdst
@_validate_fromutc_inputs
def fromutc(self, dt):
"""
Given a timezone-aware datetime in a given timezone, calculates a
timezone-aware datetime in a new timezone.
Since this is the one time that we *know* we have an unambiguous
datetime object, we take this opportunity to determine whether the
datetime is ambiguous and in a "fold" state (e.g. if it's the first
occurrence, chronologically, of the ambiguous datetime).
:param dt:
A timezone-aware :class:`datetime.datetime` object.
"""
dt_wall = self._fromutc(dt)
# Calculate the fold status given the two datetimes.
_fold = self._fold_status(dt, dt_wall)
# Set the default fold value for ambiguous dates
return enfold(dt_wall, fold=_fold)
class tzrangebase(_tzinfo):
"""
This is an abstract base class for time zones represented by an annual
transition into and out of DST. Child classes should implement the following
methods:
* ``__init__(self, *args, **kwargs)``
* ``transitions(self, year)`` - this is expected to return a tuple of
datetimes representing the DST on and off transitions in standard
time.
A fully initialized ``tzrangebase`` subclass should also provide the
following attributes:
* ``hasdst``: Boolean whether or not the zone uses DST.
* ``_dst_offset`` / ``_std_offset``: :class:`datetime.timedelta` objects
representing the respective UTC offsets.
* ``_dst_abbr`` / ``_std_abbr``: Strings representing the timezone short
abbreviations in DST and STD, respectively.
* ``_hasdst``: Whether or not the zone has DST.
.. versionadded:: 2.6.0
"""
def __init__(self):
raise NotImplementedError('tzrangebase is an abstract base class')
def utcoffset(self, dt):
isdst = self._isdst(dt)
if isdst is None:
return None
elif isdst:
return self._dst_offset
else:
return self._std_offset
def dst(self, dt):
isdst = self._isdst(dt)
if isdst is None:
return None
elif isdst:
return self._dst_base_offset
else:
return ZERO
@tzname_in_python2
def tzname(self, dt):
if self._isdst(dt):
return self._dst_abbr
else:
return self._std_abbr
def fromutc(self, dt):
""" Given a datetime in UTC, return local time """
if not isinstance(dt, datetime):
raise TypeError("fromutc() requires a datetime argument")
if dt.tzinfo is not self:
raise ValueError("dt.tzinfo is not self")
# Get transitions - if there are none, fixed offset
transitions = self.transitions(dt.year)
if transitions is None:
return dt + self.utcoffset(dt)
# Get the transition times in UTC
dston, dstoff = transitions
dston -= self._std_offset
dstoff -= self._std_offset
utc_transitions = (dston, dstoff)
dt_utc = dt.replace(tzinfo=None)
isdst = self._naive_isdst(dt_utc, utc_transitions)
if isdst:
dt_wall = dt + self._dst_offset
else:
dt_wall = dt + self._std_offset
_fold = int(not isdst and self.is_ambiguous(dt_wall))
return enfold(dt_wall, fold=_fold)
def is_ambiguous(self, dt):
"""
Whether or not the "wall time" of a given datetime is ambiguous in this
zone.
:param dt:
A :py:class:`datetime.datetime`, naive or time zone aware.
:return:
Returns ``True`` if ambiguous, ``False`` otherwise.
.. versionadded:: 2.6.0
"""
if not self.hasdst:
return False
start, end = self.transitions(dt.year)
dt = dt.replace(tzinfo=None)
return (end <= dt < end + self._dst_base_offset)
def _isdst(self, dt):
if not self.hasdst:
return False
elif dt is None:
return None
transitions = self.transitions(dt.year)
if transitions is None:
return False
dt = dt.replace(tzinfo=None)
isdst = self._naive_isdst(dt, transitions)
# Handle ambiguous dates
if not isdst and self.is_ambiguous(dt):
return not self._fold(dt)
else:
return isdst
def _naive_isdst(self, dt, transitions):
dston, dstoff = transitions
dt = dt.replace(tzinfo=None)
if dston < dstoff:
isdst = dston <= dt < dstoff
else:
isdst = not dstoff <= dt < dston
return isdst
@property
def _dst_base_offset(self):
return self._dst_offset - self._std_offset
__hash__ = None
def __ne__(self, other):
return not (self == other)
def __repr__(self):
return "%s(...)" % self.__class__.__name__
__reduce__ = object.__reduce__

View File

@ -0,0 +1,80 @@
from datetime import timedelta
import weakref
from collections import OrderedDict
from six.moves import _thread
class _TzSingleton(type):
def __init__(cls, *args, **kwargs):
cls.__instance = None
super(_TzSingleton, cls).__init__(*args, **kwargs)
def __call__(cls):
if cls.__instance is None:
cls.__instance = super(_TzSingleton, cls).__call__()
return cls.__instance
class _TzFactory(type):
def instance(cls, *args, **kwargs):
"""Alternate constructor that returns a fresh instance"""
return type.__call__(cls, *args, **kwargs)
class _TzOffsetFactory(_TzFactory):
def __init__(cls, *args, **kwargs):
cls.__instances = weakref.WeakValueDictionary()
cls.__strong_cache = OrderedDict()
cls.__strong_cache_size = 8
cls._cache_lock = _thread.allocate_lock()
def __call__(cls, name, offset):
if isinstance(offset, timedelta):
key = (name, offset.total_seconds())
else:
key = (name, offset)
instance = cls.__instances.get(key, None)
if instance is None:
instance = cls.__instances.setdefault(key,
cls.instance(name, offset))
# This lock may not be necessary in Python 3. See GH issue #901
with cls._cache_lock:
cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance)
# Remove an item if the strong cache is overpopulated
if len(cls.__strong_cache) > cls.__strong_cache_size:
cls.__strong_cache.popitem(last=False)
return instance
class _TzStrFactory(_TzFactory):
def __init__(cls, *args, **kwargs):
cls.__instances = weakref.WeakValueDictionary()
cls.__strong_cache = OrderedDict()
cls.__strong_cache_size = 8
cls.__cache_lock = _thread.allocate_lock()
def __call__(cls, s, posix_offset=False):
key = (s, posix_offset)
instance = cls.__instances.get(key, None)
if instance is None:
instance = cls.__instances.setdefault(key,
cls.instance(s, posix_offset))
# This lock may not be necessary in Python 3. See GH issue #901
with cls.__cache_lock:
cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance)
# Remove an item if the strong cache is overpopulated
if len(cls.__strong_cache) > cls.__strong_cache_size:
cls.__strong_cache.popitem(last=False)
return instance

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,370 @@
# -*- coding: utf-8 -*-
"""
This module provides an interface to the native time zone data on Windows,
including :py:class:`datetime.tzinfo` implementations.
Attempting to import this module on a non-Windows platform will raise an
:py:obj:`ImportError`.
"""
# This code was originally contributed by Jeffrey Harris.
import datetime
import struct
from six.moves import winreg
from six import text_type
try:
import ctypes
from ctypes import wintypes
except ValueError:
# ValueError is raised on non-Windows systems for some horrible reason.
raise ImportError("Running tzwin on non-Windows system")
from ._common import tzrangebase
__all__ = ["tzwin", "tzwinlocal", "tzres"]
ONEWEEK = datetime.timedelta(7)
TZKEYNAMENT = r"SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones"
TZKEYNAME9X = r"SOFTWARE\Microsoft\Windows\CurrentVersion\Time Zones"
TZLOCALKEYNAME = r"SYSTEM\CurrentControlSet\Control\TimeZoneInformation"
def _settzkeyname():
handle = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE)
try:
winreg.OpenKey(handle, TZKEYNAMENT).Close()
TZKEYNAME = TZKEYNAMENT
except WindowsError:
TZKEYNAME = TZKEYNAME9X
handle.Close()
return TZKEYNAME
TZKEYNAME = _settzkeyname()
class tzres(object):
"""
Class for accessing ``tzres.dll``, which contains timezone name related
resources.
.. versionadded:: 2.5.0
"""
p_wchar = ctypes.POINTER(wintypes.WCHAR) # Pointer to a wide char
def __init__(self, tzres_loc='tzres.dll'):
# Load the user32 DLL so we can load strings from tzres
user32 = ctypes.WinDLL('user32')
# Specify the LoadStringW function
user32.LoadStringW.argtypes = (wintypes.HINSTANCE,
wintypes.UINT,
wintypes.LPWSTR,
ctypes.c_int)
self.LoadStringW = user32.LoadStringW
self._tzres = ctypes.WinDLL(tzres_loc)
self.tzres_loc = tzres_loc
def load_name(self, offset):
"""
Load a timezone name from a DLL offset (integer).
>>> from dateutil.tzwin import tzres
>>> tzr = tzres()
>>> print(tzr.load_name(112))
'Eastern Standard Time'
:param offset:
A positive integer value referring to a string from the tzres dll.
.. note::
Offsets found in the registry are generally of the form
``@tzres.dll,-114``. The offset in this case is 114, not -114.
"""
resource = self.p_wchar()
lpBuffer = ctypes.cast(ctypes.byref(resource), wintypes.LPWSTR)
nchar = self.LoadStringW(self._tzres._handle, offset, lpBuffer, 0)
return resource[:nchar]
def name_from_string(self, tzname_str):
"""
Parse strings as returned from the Windows registry into the time zone
name as defined in the registry.
>>> from dateutil.tzwin import tzres
>>> tzr = tzres()
>>> print(tzr.name_from_string('@tzres.dll,-251'))
'Dateline Daylight Time'
>>> print(tzr.name_from_string('Eastern Standard Time'))
'Eastern Standard Time'
:param tzname_str:
A timezone name string as returned from a Windows registry key.
:return:
Returns the localized timezone string from tzres.dll if the string
is of the form `@tzres.dll,-offset`, else returns the input string.
"""
if not tzname_str.startswith('@'):
return tzname_str
name_splt = tzname_str.split(',-')
try:
offset = int(name_splt[1])
except:
raise ValueError("Malformed timezone string.")
return self.load_name(offset)
class tzwinbase(tzrangebase):
"""tzinfo class based on win32's timezones available in the registry."""
def __init__(self):
raise NotImplementedError('tzwinbase is an abstract base class')
def __eq__(self, other):
# Compare on all relevant dimensions, including name.
if not isinstance(other, tzwinbase):
return NotImplemented
return (self._std_offset == other._std_offset and
self._dst_offset == other._dst_offset and
self._stddayofweek == other._stddayofweek and
self._dstdayofweek == other._dstdayofweek and
self._stdweeknumber == other._stdweeknumber and
self._dstweeknumber == other._dstweeknumber and
self._stdhour == other._stdhour and
self._dsthour == other._dsthour and
self._stdminute == other._stdminute and
self._dstminute == other._dstminute and
self._std_abbr == other._std_abbr and
self._dst_abbr == other._dst_abbr)
@staticmethod
def list():
"""Return a list of all time zones known to the system."""
with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle:
with winreg.OpenKey(handle, TZKEYNAME) as tzkey:
result = [winreg.EnumKey(tzkey, i)
for i in range(winreg.QueryInfoKey(tzkey)[0])]
return result
def display(self):
"""
Return the display name of the time zone.
"""
return self._display
def transitions(self, year):
"""
For a given year, get the DST on and off transition times, expressed
always on the standard time side. For zones with no transitions, this
function returns ``None``.
:param year:
The year whose transitions you would like to query.
:return:
Returns a :class:`tuple` of :class:`datetime.datetime` objects,
``(dston, dstoff)`` for zones with an annual DST transition, or
``None`` for fixed offset zones.
"""
if not self.hasdst:
return None
dston = picknthweekday(year, self._dstmonth, self._dstdayofweek,
self._dsthour, self._dstminute,
self._dstweeknumber)
dstoff = picknthweekday(year, self._stdmonth, self._stddayofweek,
self._stdhour, self._stdminute,
self._stdweeknumber)
# Ambiguous dates default to the STD side
dstoff -= self._dst_base_offset
return dston, dstoff
def _get_hasdst(self):
return self._dstmonth != 0
@property
def _dst_base_offset(self):
return self._dst_base_offset_
class tzwin(tzwinbase):
"""
Time zone object created from the zone info in the Windows registry
These are similar to :py:class:`dateutil.tz.tzrange` objects in that
the time zone data is provided in the format of a single offset rule
for either 0 or 2 time zone transitions per year.
:param: name
The name of a Windows time zone key, e.g. "Eastern Standard Time".
The full list of keys can be retrieved with :func:`tzwin.list`.
"""
def __init__(self, name):
self._name = name
with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle:
tzkeyname = text_type("{kn}\\{name}").format(kn=TZKEYNAME, name=name)
with winreg.OpenKey(handle, tzkeyname) as tzkey:
keydict = valuestodict(tzkey)
self._std_abbr = keydict["Std"]
self._dst_abbr = keydict["Dlt"]
self._display = keydict["Display"]
# See http://ww_winreg.jsiinc.com/SUBA/tip0300/rh0398.htm
tup = struct.unpack("=3l16h", keydict["TZI"])
stdoffset = -tup[0]-tup[1] # Bias + StandardBias * -1
dstoffset = stdoffset-tup[2] # + DaylightBias * -1
self._std_offset = datetime.timedelta(minutes=stdoffset)
self._dst_offset = datetime.timedelta(minutes=dstoffset)
# for the meaning see the win32 TIME_ZONE_INFORMATION structure docs
# http://msdn.microsoft.com/en-us/library/windows/desktop/ms725481(v=vs.85).aspx
(self._stdmonth,
self._stddayofweek, # Sunday = 0
self._stdweeknumber, # Last = 5
self._stdhour,
self._stdminute) = tup[4:9]
(self._dstmonth,
self._dstdayofweek, # Sunday = 0
self._dstweeknumber, # Last = 5
self._dsthour,
self._dstminute) = tup[12:17]
self._dst_base_offset_ = self._dst_offset - self._std_offset
self.hasdst = self._get_hasdst()
def __repr__(self):
return "tzwin(%s)" % repr(self._name)
def __reduce__(self):
return (self.__class__, (self._name,))
class tzwinlocal(tzwinbase):
"""
Class representing the local time zone information in the Windows registry
While :class:`dateutil.tz.tzlocal` makes system calls (via the :mod:`time`
module) to retrieve time zone information, ``tzwinlocal`` retrieves the
rules directly from the Windows registry and creates an object like
:class:`dateutil.tz.tzwin`.
Because Windows does not have an equivalent of :func:`time.tzset`, on
Windows, :class:`dateutil.tz.tzlocal` instances will always reflect the
time zone settings *at the time that the process was started*, meaning
changes to the machine's time zone settings during the run of a program
on Windows will **not** be reflected by :class:`dateutil.tz.tzlocal`.
Because ``tzwinlocal`` reads the registry directly, it is unaffected by
this issue.
"""
def __init__(self):
with winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE) as handle:
with winreg.OpenKey(handle, TZLOCALKEYNAME) as tzlocalkey:
keydict = valuestodict(tzlocalkey)
self._std_abbr = keydict["StandardName"]
self._dst_abbr = keydict["DaylightName"]
try:
tzkeyname = text_type('{kn}\\{sn}').format(kn=TZKEYNAME,
sn=self._std_abbr)
with winreg.OpenKey(handle, tzkeyname) as tzkey:
_keydict = valuestodict(tzkey)
self._display = _keydict["Display"]
except OSError:
self._display = None
stdoffset = -keydict["Bias"]-keydict["StandardBias"]
dstoffset = stdoffset-keydict["DaylightBias"]
self._std_offset = datetime.timedelta(minutes=stdoffset)
self._dst_offset = datetime.timedelta(minutes=dstoffset)
# For reasons unclear, in this particular key, the day of week has been
# moved to the END of the SYSTEMTIME structure.
tup = struct.unpack("=8h", keydict["StandardStart"])
(self._stdmonth,
self._stdweeknumber, # Last = 5
self._stdhour,
self._stdminute) = tup[1:5]
self._stddayofweek = tup[7]
tup = struct.unpack("=8h", keydict["DaylightStart"])
(self._dstmonth,
self._dstweeknumber, # Last = 5
self._dsthour,
self._dstminute) = tup[1:5]
self._dstdayofweek = tup[7]
self._dst_base_offset_ = self._dst_offset - self._std_offset
self.hasdst = self._get_hasdst()
def __repr__(self):
return "tzwinlocal()"
def __str__(self):
# str will return the standard name, not the daylight name.
return "tzwinlocal(%s)" % repr(self._std_abbr)
def __reduce__(self):
return (self.__class__, ())
def picknthweekday(year, month, dayofweek, hour, minute, whichweek):
""" dayofweek == 0 means Sunday, whichweek 5 means last instance """
first = datetime.datetime(year, month, 1, hour, minute)
# This will work if dayofweek is ISO weekday (1-7) or Microsoft-style (0-6),
# Because 7 % 7 = 0
weekdayone = first.replace(day=((dayofweek - first.isoweekday()) % 7) + 1)
wd = weekdayone + ((whichweek - 1) * ONEWEEK)
if (wd.month != month):
wd -= ONEWEEK
return wd
def valuestodict(key):
"""Convert a registry key's values to a dictionary."""
dout = {}
size = winreg.QueryInfoKey(key)[1]
tz_res = None
for i in range(size):
key_name, value, dtype = winreg.EnumValue(key, i)
if dtype == winreg.REG_DWORD or dtype == winreg.REG_DWORD_LITTLE_ENDIAN:
# If it's a DWORD (32-bit integer), it's stored as unsigned - convert
# that to a proper signed integer
if value & (1 << 31):
value = value - (1 << 32)
elif dtype == winreg.REG_SZ:
# If it's a reference to the tzres DLL, load the actual string
if value.startswith('@tzres'):
tz_res = tz_res or tzres()
value = tz_res.name_from_string(value)
value = value.rstrip('\x00') # Remove trailing nulls
dout[key_name] = value
return dout

View File

@ -0,0 +1,2 @@
# tzwin has moved to dateutil.tz.win
from .tz.win import *

View File

@ -0,0 +1,71 @@
# -*- coding: utf-8 -*-
"""
This module offers general convenience and utility functions for dealing with
datetimes.
.. versionadded:: 2.7.0
"""
from __future__ import unicode_literals
from datetime import datetime, time
def today(tzinfo=None):
"""
Returns a :py:class:`datetime` representing the current day at midnight
:param tzinfo:
The time zone to attach (also used to determine the current day).
:return:
A :py:class:`datetime.datetime` object representing the current day
at midnight.
"""
dt = datetime.now(tzinfo)
return datetime.combine(dt.date(), time(0, tzinfo=tzinfo))
def default_tzinfo(dt, tzinfo):
"""
Sets the ``tzinfo`` parameter on naive datetimes only
This is useful for example when you are provided a datetime that may have
either an implicit or explicit time zone, such as when parsing a time zone
string.
.. doctest::
>>> from dateutil.tz import tzoffset
>>> from dateutil.parser import parse
>>> from dateutil.utils import default_tzinfo
>>> dflt_tz = tzoffset("EST", -18000)
>>> print(default_tzinfo(parse('2014-01-01 12:30 UTC'), dflt_tz))
2014-01-01 12:30:00+00:00
>>> print(default_tzinfo(parse('2014-01-01 12:30'), dflt_tz))
2014-01-01 12:30:00-05:00
:param dt:
The datetime on which to replace the time zone
:param tzinfo:
The :py:class:`datetime.tzinfo` subclass instance to assign to
``dt`` if (and only if) it is naive.
:return:
Returns an aware :py:class:`datetime.datetime`.
"""
if dt.tzinfo is not None:
return dt
else:
return dt.replace(tzinfo=tzinfo)
def within_delta(dt1, dt2, delta):
"""
Useful for comparing two datetimes that may have a negligible difference
to be considered equal.
"""
delta = abs(delta)
difference = dt1 - dt2
return -delta <= difference <= delta

View File

@ -0,0 +1,167 @@
# -*- coding: utf-8 -*-
import warnings
import json
from tarfile import TarFile
from pkgutil import get_data
from io import BytesIO
from dateutil.tz import tzfile as _tzfile
__all__ = ["get_zonefile_instance", "gettz", "gettz_db_metadata"]
ZONEFILENAME = "dateutil-zoneinfo.tar.gz"
METADATA_FN = 'METADATA'
class tzfile(_tzfile):
def __reduce__(self):
return (gettz, (self._filename,))
def getzoneinfofile_stream():
try:
return BytesIO(get_data(__name__, ZONEFILENAME))
except IOError as e: # TODO switch to FileNotFoundError?
warnings.warn("I/O error({0}): {1}".format(e.errno, e.strerror))
return None
class ZoneInfoFile(object):
def __init__(self, zonefile_stream=None):
if zonefile_stream is not None:
with TarFile.open(fileobj=zonefile_stream) as tf:
self.zones = {zf.name: tzfile(tf.extractfile(zf), filename=zf.name)
for zf in tf.getmembers()
if zf.isfile() and zf.name != METADATA_FN}
# deal with links: They'll point to their parent object. Less
# waste of memory
links = {zl.name: self.zones[zl.linkname]
for zl in tf.getmembers() if
zl.islnk() or zl.issym()}
self.zones.update(links)
try:
metadata_json = tf.extractfile(tf.getmember(METADATA_FN))
metadata_str = metadata_json.read().decode('UTF-8')
self.metadata = json.loads(metadata_str)
except KeyError:
# no metadata in tar file
self.metadata = None
else:
self.zones = {}
self.metadata = None
def get(self, name, default=None):
"""
Wrapper for :func:`ZoneInfoFile.zones.get`. This is a convenience method
for retrieving zones from the zone dictionary.
:param name:
The name of the zone to retrieve. (Generally IANA zone names)
:param default:
The value to return in the event of a missing key.
.. versionadded:: 2.6.0
"""
return self.zones.get(name, default)
# The current API has gettz as a module function, although in fact it taps into
# a stateful class. So as a workaround for now, without changing the API, we
# will create a new "global" class instance the first time a user requests a
# timezone. Ugly, but adheres to the api.
#
# TODO: Remove after deprecation period.
_CLASS_ZONE_INSTANCE = []
def get_zonefile_instance(new_instance=False):
"""
This is a convenience function which provides a :class:`ZoneInfoFile`
instance using the data provided by the ``dateutil`` package. By default, it
caches a single instance of the ZoneInfoFile object and returns that.
:param new_instance:
If ``True``, a new instance of :class:`ZoneInfoFile` is instantiated and
used as the cached instance for the next call. Otherwise, new instances
are created only as necessary.
:return:
Returns a :class:`ZoneInfoFile` object.
.. versionadded:: 2.6
"""
if new_instance:
zif = None
else:
zif = getattr(get_zonefile_instance, '_cached_instance', None)
if zif is None:
zif = ZoneInfoFile(getzoneinfofile_stream())
get_zonefile_instance._cached_instance = zif
return zif
def gettz(name):
"""
This retrieves a time zone from the local zoneinfo tarball that is packaged
with dateutil.
:param name:
An IANA-style time zone name, as found in the zoneinfo file.
:return:
Returns a :class:`dateutil.tz.tzfile` time zone object.
.. warning::
It is generally inadvisable to use this function, and it is only
provided for API compatibility with earlier versions. This is *not*
equivalent to ``dateutil.tz.gettz()``, which selects an appropriate
time zone based on the inputs, favoring system zoneinfo. This is ONLY
for accessing the dateutil-specific zoneinfo (which may be out of
date compared to the system zoneinfo).
.. deprecated:: 2.6
If you need to use a specific zoneinfofile over the system zoneinfo,
instantiate a :class:`dateutil.zoneinfo.ZoneInfoFile` object and call
:func:`dateutil.zoneinfo.ZoneInfoFile.get(name)` instead.
Use :func:`get_zonefile_instance` to retrieve an instance of the
dateutil-provided zoneinfo.
"""
warnings.warn("zoneinfo.gettz() will be removed in future versions, "
"to use the dateutil-provided zoneinfo files, instantiate a "
"ZoneInfoFile object and use ZoneInfoFile.zones.get() "
"instead. See the documentation for details.",
DeprecationWarning)
if len(_CLASS_ZONE_INSTANCE) == 0:
_CLASS_ZONE_INSTANCE.append(ZoneInfoFile(getzoneinfofile_stream()))
return _CLASS_ZONE_INSTANCE[0].zones.get(name)
def gettz_db_metadata():
""" Get the zonefile metadata
See `zonefile_metadata`_
:returns:
A dictionary with the database metadata
.. deprecated:: 2.6
See deprecation warning in :func:`zoneinfo.gettz`. To get metadata,
query the attribute ``zoneinfo.ZoneInfoFile.metadata``.
"""
warnings.warn("zoneinfo.gettz_db_metadata() will be removed in future "
"versions, to use the dateutil-provided zoneinfo files, "
"ZoneInfoFile object and query the 'metadata' attribute "
"instead. See the documentation for details.",
DeprecationWarning)
if len(_CLASS_ZONE_INSTANCE) == 0:
_CLASS_ZONE_INSTANCE.append(ZoneInfoFile(getzoneinfofile_stream()))
return _CLASS_ZONE_INSTANCE[0].metadata

View File

@ -0,0 +1,75 @@
import logging
import os
import tempfile
import shutil
import json
from subprocess import check_call, check_output
from tarfile import TarFile
from dateutil.zoneinfo import METADATA_FN, ZONEFILENAME
def rebuild(filename, tag=None, format="gz", zonegroups=[], metadata=None):
"""Rebuild the internal timezone info in dateutil/zoneinfo/zoneinfo*tar*
filename is the timezone tarball from ``ftp.iana.org/tz``.
"""
tmpdir = tempfile.mkdtemp()
zonedir = os.path.join(tmpdir, "zoneinfo")
moduledir = os.path.dirname(__file__)
try:
with TarFile.open(filename) as tf:
for name in zonegroups:
tf.extract(name, tmpdir)
filepaths = [os.path.join(tmpdir, n) for n in zonegroups]
_run_zic(zonedir, filepaths)
# write metadata file
with open(os.path.join(zonedir, METADATA_FN), 'w') as f:
json.dump(metadata, f, indent=4, sort_keys=True)
target = os.path.join(moduledir, ZONEFILENAME)
with TarFile.open(target, "w:%s" % format) as tf:
for entry in os.listdir(zonedir):
entrypath = os.path.join(zonedir, entry)
tf.add(entrypath, entry)
finally:
shutil.rmtree(tmpdir)
def _run_zic(zonedir, filepaths):
"""Calls the ``zic`` compiler in a compatible way to get a "fat" binary.
Recent versions of ``zic`` default to ``-b slim``, while older versions
don't even have the ``-b`` option (but default to "fat" binaries). The
current version of dateutil does not support Version 2+ TZif files, which
causes problems when used in conjunction with "slim" binaries, so this
function is used to ensure that we always get a "fat" binary.
"""
try:
help_text = check_output(["zic", "--help"])
except OSError as e:
_print_on_nosuchfile(e)
raise
if b"-b " in help_text:
bloat_args = ["-b", "fat"]
else:
bloat_args = []
check_call(["zic"] + bloat_args + ["-d", zonedir] + filepaths)
def _print_on_nosuchfile(e):
"""Print helpful troubleshooting message
e is an exception raised by subprocess.check_call()
"""
if e.errno == 2:
logging.error(
"Could not find zic. Perhaps you need to install "
"libc-bin or some other package that provides it, "
"or it's not in your PATH?")

View File

@ -0,0 +1 @@
import os; var = 'SETUPTOOLS_USE_DISTUTILS'; enabled = os.environ.get(var, 'local') == 'local'; enabled and __import__('_distutils_hack').add_shim();

View File

@ -0,0 +1,29 @@
BSD 3-Clause License
Copyright (c) 2008-2021, The joblib developers.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,167 @@
Metadata-Version: 2.1
Name: joblib
Version: 1.2.0
Summary: Lightweight pipelining with Python functions
Home-page: https://joblib.readthedocs.io
Author: Gael Varoquaux
Author-email: gael.varoquaux@normalesup.org
License: BSD
Project-URL: Source, https://github.com/joblib/joblib
Platform: any
Classifier: Development Status :: 5 - Production/Stable
Classifier: Environment :: Console
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: Science/Research
Classifier: Intended Audience :: Education
Classifier: License :: OSI Approved :: BSD License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Topic :: Scientific/Engineering
Classifier: Topic :: Utilities
Classifier: Topic :: Software Development :: Libraries
Requires-Python: >=3.7
Description-Content-Type: text/x-rst
License-File: LICENSE.txt
|PyPi| |Azure| |ReadTheDocs| |Codecov|
.. |PyPi| image:: https://badge.fury.io/py/joblib.svg
:target: https://badge.fury.io/py/joblib
:alt: Joblib version
.. |Azure| image:: https://dev.azure.com/joblib/joblib/_apis/build/status/joblib.joblib?branchName=master
:target: https://dev.azure.com/joblib/joblib/_build?definitionId=3&_a=summary&branchFilter=40
:alt: Azure CI status
.. |ReadTheDocs| image:: https://readthedocs.org/projects/joblib/badge/?version=latest
:target: https://joblib.readthedocs.io/en/latest/?badge=latest
:alt: Documentation Status
.. |Codecov| image:: https://codecov.io/gh/joblib/joblib/branch/master/graph/badge.svg
:target: https://codecov.io/gh/joblib/joblib
:alt: Codecov coverage
The homepage of joblib with user documentation is located on:
https://joblib.readthedocs.io
Getting the latest code
=======================
To get the latest code using git, simply type::
git clone git://github.com/joblib/joblib.git
If you don't have git installed, you can download a zip or tarball
of the latest code: http://github.com/joblib/joblib/archives/master
Installing
==========
You can use `pip` to install joblib::
pip install joblib
from any directory or::
python setup.py install
from the source directory.
Dependencies
============
- Joblib has no mandatory dependencies besides Python (supported versions are
3.7+).
- Joblib has an optional dependency on Numpy (at least version 1.6.1) for array
manipulation.
- Joblib includes its own vendored copy of
`loky <https://github.com/tomMoral/loky>`_ for process management.
- Joblib can efficiently dump and load numpy arrays but does not require numpy
to be installed.
- Joblib has an optional dependency on
`python-lz4 <https://pypi.python.org/pypi/lz4>`_ as a faster alternative to
zlib and gzip for compressed serialization.
- Joblib has an optional dependency on psutil to mitigate memory leaks in
parallel worker processes.
- Some examples require external dependencies such as pandas. See the
instructions in the `Building the docs`_ section for details.
Workflow to contribute
======================
To contribute to joblib, first create an account on `github
<http://github.com/>`_. Once this is done, fork the `joblib repository
<http://github.com/joblib/joblib>`_ to have your own repository,
clone it using 'git clone' on the computers where you want to work. Make
your changes in your clone, push them to your github account, test them
on several computers, and when you are happy with them, send a pull
request to the main repository.
Running the test suite
======================
To run the test suite, you need the pytest (version >= 3) and coverage modules.
Run the test suite using::
pytest joblib
from the root of the project.
Building the docs
=================
To build the docs you need to have sphinx (>=1.4) and some dependencies
installed::
pip install -U -r .readthedocs-requirements.txt
The docs can then be built with the following command::
make doc
The html docs are located in the ``doc/_build/html`` directory.
Making a source tarball
=======================
To create a source tarball, eg for packaging or distributing, run the
following command::
python setup.py sdist
The tarball will be created in the `dist` directory. This command will
compile the docs, and the resulting tarball can be installed with
no extra dependencies than the Python standard library. You will need
setuptool and sphinx.
Making a release and uploading it to PyPI
=========================================
This command is only run by project manager, to make a release, and
upload in to PyPI::
python setup.py sdist bdist_wheel
twine upload dist/*
Note that the documentation should automatically get updated at each git
push. If that is not the case, try building th doc locally and resolve
any doc build error (in particular when running the examples).
Updating the changelog
======================
Changes are listed in the CHANGES.rst file. They must be manually updated
but, the following git command may be used to generate the lines::
git log --abbrev-commit --date=short --no-merges --sparse

View File

@ -0,0 +1,229 @@
joblib-1.2.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
joblib-1.2.0.dist-info/LICENSE.txt,sha256=QmEpEcGHLF5LQ_auDo7llGfNNQMyJBz3LOkGQCZPrmo,1527
joblib-1.2.0.dist-info/METADATA,sha256=8WqQd0iOL12Xi46VHDpPINbmuwBKJAbVZhm431qnTjc,5297
joblib-1.2.0.dist-info/RECORD,,
joblib-1.2.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
joblib-1.2.0.dist-info/top_level.txt,sha256=P0LsoZ45gBL7ckL4lqQt7tdbrHD4xlVYhffmhHeeT_U,7
joblib/__init__.py,sha256=CXOHJmaK9uQ_RyLc4X-FusXf_NDOwd9gIfkE13qA1yM,4995
joblib/__pycache__/__init__.cpython-311.pyc,,
joblib/__pycache__/_cloudpickle_wrapper.cpython-311.pyc,,
joblib/__pycache__/_dask.cpython-311.pyc,,
joblib/__pycache__/_deprecated_format_stack.cpython-311.pyc,,
joblib/__pycache__/_deprecated_my_exceptions.cpython-311.pyc,,
joblib/__pycache__/_memmapping_reducer.cpython-311.pyc,,
joblib/__pycache__/_multiprocessing_helpers.cpython-311.pyc,,
joblib/__pycache__/_parallel_backends.cpython-311.pyc,,
joblib/__pycache__/_store_backends.cpython-311.pyc,,
joblib/__pycache__/_utils.cpython-311.pyc,,
joblib/__pycache__/backports.cpython-311.pyc,,
joblib/__pycache__/compressor.cpython-311.pyc,,
joblib/__pycache__/disk.cpython-311.pyc,,
joblib/__pycache__/executor.cpython-311.pyc,,
joblib/__pycache__/format_stack.cpython-311.pyc,,
joblib/__pycache__/func_inspect.cpython-311.pyc,,
joblib/__pycache__/hashing.cpython-311.pyc,,
joblib/__pycache__/logger.cpython-311.pyc,,
joblib/__pycache__/memory.cpython-311.pyc,,
joblib/__pycache__/my_exceptions.cpython-311.pyc,,
joblib/__pycache__/numpy_pickle.cpython-311.pyc,,
joblib/__pycache__/numpy_pickle_compat.cpython-311.pyc,,
joblib/__pycache__/numpy_pickle_utils.cpython-311.pyc,,
joblib/__pycache__/parallel.cpython-311.pyc,,
joblib/__pycache__/pool.cpython-311.pyc,,
joblib/__pycache__/testing.cpython-311.pyc,,
joblib/_cloudpickle_wrapper.py,sha256=fUx9s5qv2nMDi0xhIT7ttYJd6fGG8aYdBJbdXM-cBmc,376
joblib/_dask.py,sha256=Tj4GRYMAucsTblVr0HYNDg0tPFgZcjP6zA55jyPj6NM,13006
joblib/_deprecated_format_stack.py,sha256=_pPLwMH6hydhpPCNxRRTCkbcsKu-SdM0H-cxn5X-EDE,14505
joblib/_deprecated_my_exceptions.py,sha256=q8QyZCf_sFBM6PUKaebZMTdIIyx8yeBuOfbyxZX7mA0,4134
joblib/_memmapping_reducer.py,sha256=axuV2_zUxk7k7iJIWPXM9JXxh20_n3wHwrye1BfmmHE,28277
joblib/_multiprocessing_helpers.py,sha256=t7wIXfrLfzqFXjOeOYs4JP45tptxmYm5_yE8ylIRbR8,1925
joblib/_parallel_backends.py,sha256=1ui21a49B-xcu-l1dpzdzytb0sWKsiI0otiIir8o1hU,25438
joblib/_store_backends.py,sha256=uH1_HSM4C28QM-mCR2Tlq5XQZpwQP7In5kxcWfX9ZO0,14434
joblib/_utils.py,sha256=1Z5H3H2D0GVNMgQlCusSziMGHKHOl0V3um1tDLj-jY0,1116
joblib/backports.py,sha256=SLHgXOABF9JiHan3RmI_jIIvkiWEyOoGecg8pIA7z1o,6082
joblib/compressor.py,sha256=1u7lSlHAji0DMvH2qsxTol1dGzDNTWOsLLCFZuSq1AU,19769
joblib/disk.py,sha256=PxUC63dBG2O1GriL1SLskHUjz7XzR-y6rqKEJHEY0jA,4389
joblib/executor.py,sha256=wF4pTwot1wRzGLqozbQ8m46SN40RSUFVMLxprqxG2AY,5316
joblib/externals/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
joblib/externals/__pycache__/__init__.cpython-311.pyc,,
joblib/externals/cloudpickle/__init__.py,sha256=AjJuqYtfWZSP5igeJIFvAYsNy6kHgLen58Q4w-ZyWH4,292
joblib/externals/cloudpickle/__pycache__/__init__.cpython-311.pyc,,
joblib/externals/cloudpickle/__pycache__/cloudpickle.cpython-311.pyc,,
joblib/externals/cloudpickle/__pycache__/cloudpickle_fast.cpython-311.pyc,,
joblib/externals/cloudpickle/__pycache__/compat.cpython-311.pyc,,
joblib/externals/cloudpickle/cloudpickle.py,sha256=tdSUqMaT1OQcSairpMCPqWn6npVpcdjLuTbhUjD11a8,35137
joblib/externals/cloudpickle/cloudpickle_fast.py,sha256=BXEnvuGqN1rakNfxpmk0zEbZdQLqttn3n2QCO410mRk,34114
joblib/externals/cloudpickle/compat.py,sha256=ITODUx0U_b7chpS47_Siqx7YP1rMYUpJH6B3rBANmWY,508
joblib/externals/loky/__init__.py,sha256=V1JyKhbeXyZ4ThFoXVIvC3GQDc9FOXN4VHV6KV69xs0,1085
joblib/externals/loky/__pycache__/__init__.cpython-311.pyc,,
joblib/externals/loky/__pycache__/_base.cpython-311.pyc,,
joblib/externals/loky/__pycache__/cloudpickle_wrapper.cpython-311.pyc,,
joblib/externals/loky/__pycache__/initializers.cpython-311.pyc,,
joblib/externals/loky/__pycache__/process_executor.cpython-311.pyc,,
joblib/externals/loky/__pycache__/reusable_executor.cpython-311.pyc,,
joblib/externals/loky/_base.py,sha256=Jd_A_xtbMztXEbsD5I8J1-UUv1M2BGFyrPMMnd9mA-Q,1057
joblib/externals/loky/backend/__init__.py,sha256=7jTmGTrtjxnQF6rqX_xOd3odn22K-zbe4_Of5yZ6dTc,312
joblib/externals/loky/backend/__pycache__/__init__.cpython-311.pyc,,
joblib/externals/loky/backend/__pycache__/_posix_reduction.cpython-311.pyc,,
joblib/externals/loky/backend/__pycache__/_win_reduction.cpython-311.pyc,,
joblib/externals/loky/backend/__pycache__/context.cpython-311.pyc,,
joblib/externals/loky/backend/__pycache__/fork_exec.cpython-311.pyc,,
joblib/externals/loky/backend/__pycache__/popen_loky_posix.cpython-311.pyc,,
joblib/externals/loky/backend/__pycache__/popen_loky_win32.cpython-311.pyc,,
joblib/externals/loky/backend/__pycache__/process.cpython-311.pyc,,
joblib/externals/loky/backend/__pycache__/queues.cpython-311.pyc,,
joblib/externals/loky/backend/__pycache__/reduction.cpython-311.pyc,,
joblib/externals/loky/backend/__pycache__/resource_tracker.cpython-311.pyc,,
joblib/externals/loky/backend/__pycache__/spawn.cpython-311.pyc,,
joblib/externals/loky/backend/__pycache__/synchronize.cpython-311.pyc,,
joblib/externals/loky/backend/__pycache__/utils.cpython-311.pyc,,
joblib/externals/loky/backend/_posix_reduction.py,sha256=FTCvX2f5IfKX5obn5tr61xP6TkN4E68L-mBhWr8tpn4,1801
joblib/externals/loky/backend/_win_reduction.py,sha256=F6hddzf7ZPUj1O4OYJbZ12pJmBrvGZ3mDaispPa-iqo,2042
joblib/externals/loky/backend/context.py,sha256=BIRF8utNPHzb0EbjplFzlL0YYa87bPKN2vbFE-NQxl4,11433
joblib/externals/loky/backend/fork_exec.py,sha256=rBuA8fdO5TVaQBeV7TFrLri3VhLzseyKnBc04fB904A,1185
joblib/externals/loky/backend/popen_loky_posix.py,sha256=9wOtvORNSP11KQKa8jRSVfQHVvel7lUtCWGRPEkMhTk,5556
joblib/externals/loky/backend/popen_loky_win32.py,sha256=nll8E81wMpglVazStwo7k3OybdDqOItKA2L84VEr4ag,5052
joblib/externals/loky/backend/process.py,sha256=LgGKNvSIN7uA-piZ3vdEWpJPgGepU73cRiacA9VqdG0,1798
joblib/externals/loky/backend/queues.py,sha256=29UGM2u12PvXCB6c-Gkakmc_jlzdYzF0MoXlIileY6Q,6801
joblib/externals/loky/backend/reduction.py,sha256=6IAslYWdqqotcVSh-6caV-Hy142BaqRQedR7pCYmvtU,7062
joblib/externals/loky/backend/resource_tracker.py,sha256=Z6dqTP3SyAwtzVDEVbXeAwQj1BKkijhO1uSGst397ug,14359
joblib/externals/loky/backend/spawn.py,sha256=mydRcNAvzdJt6f1v2T-ORXzF5DlmNKDYTnHukCTLzqE,8857
joblib/externals/loky/backend/synchronize.py,sha256=CvVpKTTlaKhH0ED573XF9ZrBGau4eBjn3KQGYSCkOfU,11719
joblib/externals/loky/backend/utils.py,sha256=-bzkl3uj8sr9pFzxUbL6_-f7W-ekC4P10Im1JxXwBcc,5738
joblib/externals/loky/cloudpickle_wrapper.py,sha256=gUDfxIQ7sm_stiGym1lP5SP9xFbJnUbVZiIG_ypKE9s,3578
joblib/externals/loky/initializers.py,sha256=XHjfNLD9Y5ruzrpg9geluDPxs4C-kn4s9q1iUGSBlGA,2539
joblib/externals/loky/process_executor.py,sha256=XVSS0A3WcsweWfnaBnAVtStMtu-1X9y9MYeE-adaCkA,48885
joblib/externals/loky/reusable_executor.py,sha256=_y61oYui2GMqq5XnOScrwH5Z82lFBgUI6nMnYMGnKBg,10028
joblib/format_stack.py,sha256=I5WFLlnyYhrYEtZWJXtVhv70B7AwWV_pL4ElvgnIW1o,1045
joblib/func_inspect.py,sha256=sS-Nc7Xe8SJXtRta8ILhzOCF2KG9TE_UGe5V9qTQIIg,13945
joblib/hashing.py,sha256=NO90wKt1boeyAM8RlD0HtNwJHVsGLvKdVj9oRGpi8_Y,10537
joblib/logger.py,sha256=xjTDhqjpQU8DjIqrL35IiZ1z7HJ-fgbVc-7Ijcii3Eg,5129
joblib/memory.py,sha256=j5nTnYMzVDcQOtQYE68Oc_HAu_JY9bF8VXOysHfFvUQ,40609
joblib/my_exceptions.py,sha256=-TkSV9Uy6kVSOSm870CQFYHIbIM6CJKlQtrrM-XRDAw,962
joblib/numpy_pickle.py,sha256=58LBcb1HIvEoew4yCwPl18dvSnCA7F0zLTacOUimN7E,26869
joblib/numpy_pickle_compat.py,sha256=eRRsub4aOB0fPmkC5L-JoKKs6Nv7hFs9ZwQmDmbANQg,8547
joblib/numpy_pickle_utils.py,sha256=Ema4jJslsCG_HDIAtnjT7wDNw09Uv-5367N02SN3lAM,8705
joblib/parallel.py,sha256=_4fVQXQ2mlnJeAaIJQHGNw3w32nw5Rj9pfW-FVi9bX4,48569
joblib/pool.py,sha256=JuG9uuE3KJwR8gJZIGzwJqObJD0xFuUhQP2IYvgQab0,14334
joblib/test/__init__.py,sha256=bkIwY5OneyPcRn2VuzQlIFdtW5Cwo1mUJ7IfSztDO9c,73
joblib/test/__pycache__/__init__.cpython-311.pyc,,
joblib/test/__pycache__/common.cpython-311.pyc,,
joblib/test/__pycache__/test_backports.cpython-311.pyc,,
joblib/test/__pycache__/test_cloudpickle_wrapper.cpython-311.pyc,,
joblib/test/__pycache__/test_dask.cpython-311.pyc,,
joblib/test/__pycache__/test_deprecated_objects.cpython-311.pyc,,
joblib/test/__pycache__/test_disk.cpython-311.pyc,,
joblib/test/__pycache__/test_format_stack.cpython-311.pyc,,
joblib/test/__pycache__/test_func_inspect.cpython-311.pyc,,
joblib/test/__pycache__/test_func_inspect_special_encoding.cpython-311.pyc,,
joblib/test/__pycache__/test_hashing.cpython-311.pyc,,
joblib/test/__pycache__/test_init.cpython-311.pyc,,
joblib/test/__pycache__/test_logger.cpython-311.pyc,,
joblib/test/__pycache__/test_memmapping.cpython-311.pyc,,
joblib/test/__pycache__/test_memory.cpython-311.pyc,,
joblib/test/__pycache__/test_missing_multiprocessing.cpython-311.pyc,,
joblib/test/__pycache__/test_module.cpython-311.pyc,,
joblib/test/__pycache__/test_my_exceptions.cpython-311.pyc,,
joblib/test/__pycache__/test_numpy_pickle.cpython-311.pyc,,
joblib/test/__pycache__/test_numpy_pickle_compat.cpython-311.pyc,,
joblib/test/__pycache__/test_numpy_pickle_utils.cpython-311.pyc,,
joblib/test/__pycache__/test_parallel.cpython-311.pyc,,
joblib/test/__pycache__/test_store_backends.cpython-311.pyc,,
joblib/test/__pycache__/test_testing.cpython-311.pyc,,
joblib/test/__pycache__/test_utils.cpython-311.pyc,,
joblib/test/__pycache__/testutils.cpython-311.pyc,,
joblib/test/common.py,sha256=gWDIvGl8Ns6vPTBvItuSFoEWigI0EPXSoPkSvqSM4zM,3283
joblib/test/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
joblib/test/data/__pycache__/__init__.cpython-311.pyc,,
joblib/test/data/__pycache__/create_numpy_pickle.cpython-311.pyc,,
joblib/test/data/create_numpy_pickle.py,sha256=9Gm6_T-pqe7LGTo0xbz3e1KjIhqKKb6A7ImSAlY2RyM,3459
joblib/test/data/joblib_0.10.0_compressed_pickle_py27_np16.gz,sha256=QYRH6Q2DSGVorjCSqWCxjTWCMOJKyew4Nl2qmfQVvQ8,769
joblib/test/data/joblib_0.10.0_compressed_pickle_py27_np17.gz,sha256=ofTozM_KlPJa50TR8FCwc09mMmO6OO0GQhgUBLNIsXs,757
joblib/test/data/joblib_0.10.0_compressed_pickle_py33_np18.gz,sha256=2eIVeA-XjOaT5IEQ6tI2UuHG3hwhiRciMmkBmPcIh4g,792
joblib/test/data/joblib_0.10.0_compressed_pickle_py34_np19.gz,sha256=Gr2z_1tVWDH1H3_wCVHmakknf8KqeHKT8Yz4d1vmUCM,794
joblib/test/data/joblib_0.10.0_compressed_pickle_py35_np19.gz,sha256=pWw_xuDbOkECqu1KGf1OFU7s2VbzC2v5F5iXhE7TwB4,790
joblib/test/data/joblib_0.10.0_pickle_py27_np17.pkl,sha256=icRQjj374B-AHk5znxre0T9oWUHokoHIBQ8MqKo8l-U,986
joblib/test/data/joblib_0.10.0_pickle_py27_np17.pkl.bz2,sha256=oYQVIyMiUxyRgWSuBBSOvCWKzToA-kUpcoQWdV4UoV4,997
joblib/test/data/joblib_0.10.0_pickle_py27_np17.pkl.gzip,sha256=Jpv3iGcDgKTv-O4nZsUreIbUK7qnt2cugZ-VMgNeEDQ,798
joblib/test/data/joblib_0.10.0_pickle_py27_np17.pkl.lzma,sha256=c0wu0x8pPv4BcStj7pE61rZpf68FLG_pNzQZ4e82zH8,660
joblib/test/data/joblib_0.10.0_pickle_py27_np17.pkl.xz,sha256=77FG1FDG0GHQav-1bxc4Tn9ky6ubUW_MbE0_iGmz5wc,712
joblib/test/data/joblib_0.10.0_pickle_py33_np18.pkl,sha256=4GTC7s_cWNVShERn2nvVbspZYJgyK_0man4TEqvdVzU,1068
joblib/test/data/joblib_0.10.0_pickle_py33_np18.pkl.bz2,sha256=6G1vbs_iYmz2kYJ6w4qB1k7D67UnxUMus0S4SWeBtFo,1000
joblib/test/data/joblib_0.10.0_pickle_py33_np18.pkl.gzip,sha256=tlRUWeJS1BXmcwtLNSNK9L0hDHekFl07CqWxTShinmY,831
joblib/test/data/joblib_0.10.0_pickle_py33_np18.pkl.lzma,sha256=CorPwnfv3rR5hjNtJI01-sEBMOnkSxNlRVaWTszMopA,694
joblib/test/data/joblib_0.10.0_pickle_py33_np18.pkl.xz,sha256=Dppj3MffOKsKETeptEtDaxPOv6MA6xnbpK5LzlDQ-oE,752
joblib/test/data/joblib_0.10.0_pickle_py34_np19.pkl,sha256=HL5Fb1uR9aPLjjhoOPJ2wwM1Qyo1FCZoYYd2HVw0Fos,1068
joblib/test/data/joblib_0.10.0_pickle_py34_np19.pkl.bz2,sha256=Pyr2fqZnwfUxXdyrBr-kRwBYY8HA_Yi7fgSguKy5pUs,1021
joblib/test/data/joblib_0.10.0_pickle_py34_np19.pkl.gzip,sha256=os8NJjQI9FhnlZM-Ay9dX_Uo35gZnoJCgQSIVvcBPfE,831
joblib/test/data/joblib_0.10.0_pickle_py34_np19.pkl.lzma,sha256=Q_0y43qU7_GqAabJ8y3PWVhOisurnCAq3GzuCu04V58,697
joblib/test/data/joblib_0.10.0_pickle_py34_np19.pkl.xz,sha256=BNfmiQfpeLVpdfkwlJK4hJ5Cpgl0vreVyekyc5d_PNM,752
joblib/test/data/joblib_0.10.0_pickle_py35_np19.pkl,sha256=l7nvLolhBDIdPFznOz3lBHiMOPBPCMi1bXop1tFSCpY,1068
joblib/test/data/joblib_0.10.0_pickle_py35_np19.pkl.bz2,sha256=pqGpuIS-ZU4uP8mkglHs8MaSDiVcPy7l3XHYJSppRgY,1005
joblib/test/data/joblib_0.10.0_pickle_py35_np19.pkl.gzip,sha256=YRFXE6LEb6qK72yPqnXdqQVY8Ts8xKUS9PWQKhLxWvk,833
joblib/test/data/joblib_0.10.0_pickle_py35_np19.pkl.lzma,sha256=Bf7gCUeTuTjCkbcIdyZYz69irblX4SAVQEzxCnMQhNU,701
joblib/test/data/joblib_0.10.0_pickle_py35_np19.pkl.xz,sha256=As8w2LGWwwNmKy3QNdKljK63Yq46gjRf_RJ0lh5_WqA,752
joblib/test/data/joblib_0.11.0_compressed_pickle_py36_np111.gz,sha256=1WrnXDqDoNEPYOZX1Q5Wr2463b8vVV6fw4Wm5S4bMt4,800
joblib/test/data/joblib_0.11.0_pickle_py36_np111.pkl,sha256=XmsOFxeC1f1aYdGETclG6yfF9rLoB11DayOAhDMULrw,1068
joblib/test/data/joblib_0.11.0_pickle_py36_np111.pkl.bz2,sha256=vI2yWb50LKL_NgZyd_XkoD5teIg93uI42mWnx9ee-AQ,991
joblib/test/data/joblib_0.11.0_pickle_py36_np111.pkl.gzip,sha256=1WrnXDqDoNEPYOZX1Q5Wr2463b8vVV6fw4Wm5S4bMt4,800
joblib/test/data/joblib_0.11.0_pickle_py36_np111.pkl.lzma,sha256=IWA0JlZG2ur53HgTUDl1m7q79dcVq6b0VOq33gKoJU0,715
joblib/test/data/joblib_0.11.0_pickle_py36_np111.pkl.xz,sha256=3Xh_NbMZdBjYx7ynfJ3Fyke28izSRSSzzNB0z5D4k9Y,752
joblib/test/data/joblib_0.8.4_compressed_pickle_py27_np17.gz,sha256=Sp-ZT7i6pj5on2gbptszu7RarzJpOmHJ67UKOmCPQMg,659
joblib/test/data/joblib_0.9.2_compressed_pickle_py27_np16.gz,sha256=NLtDrvo2XIH0KvUUAvhOqMeoXEjGW0IuTk_osu5XiDw,658
joblib/test/data/joblib_0.9.2_compressed_pickle_py27_np17.gz,sha256=NLtDrvo2XIH0KvUUAvhOqMeoXEjGW0IuTk_osu5XiDw,658
joblib/test/data/joblib_0.9.2_compressed_pickle_py34_np19.gz,sha256=nzO9iiGkG3KbBdrF3usOho8higkrDj_lmICUzxZyF_Y,673
joblib/test/data/joblib_0.9.2_compressed_pickle_py35_np19.gz,sha256=nzO9iiGkG3KbBdrF3usOho8higkrDj_lmICUzxZyF_Y,673
joblib/test/data/joblib_0.9.2_pickle_py27_np16.pkl,sha256=naijdk2xIeKdIa3mfJw0JlmOdtiN6uRM1yOJg6-M73M,670
joblib/test/data/joblib_0.9.2_pickle_py27_np16.pkl_01.npy,sha256=DvvX2c5-7DpuCg20HnleA5bMo9awN9rWxhtGSEPSiAk,120
joblib/test/data/joblib_0.9.2_pickle_py27_np16.pkl_02.npy,sha256=HBzzbLeB-8whuVO7CgtF3wktoOrg52WILlljzNcBBbE,120
joblib/test/data/joblib_0.9.2_pickle_py27_np16.pkl_03.npy,sha256=oMRa4qKJhBy-uiRDt-uqOzHAqencxzKUrKVynaAJJAU,236
joblib/test/data/joblib_0.9.2_pickle_py27_np16.pkl_04.npy,sha256=PsviRClLqT4IR5sWwbmpQR41af9mDtBFncodJBOB3wU,104
joblib/test/data/joblib_0.9.2_pickle_py27_np17.pkl,sha256=LynX8dLOygfxDfFywOgm7wgWOhSxLG7z-oDsU6X83Dw,670
joblib/test/data/joblib_0.9.2_pickle_py27_np17.pkl_01.npy,sha256=DvvX2c5-7DpuCg20HnleA5bMo9awN9rWxhtGSEPSiAk,120
joblib/test/data/joblib_0.9.2_pickle_py27_np17.pkl_02.npy,sha256=HBzzbLeB-8whuVO7CgtF3wktoOrg52WILlljzNcBBbE,120
joblib/test/data/joblib_0.9.2_pickle_py27_np17.pkl_03.npy,sha256=oMRa4qKJhBy-uiRDt-uqOzHAqencxzKUrKVynaAJJAU,236
joblib/test/data/joblib_0.9.2_pickle_py27_np17.pkl_04.npy,sha256=PsviRClLqT4IR5sWwbmpQR41af9mDtBFncodJBOB3wU,104
joblib/test/data/joblib_0.9.2_pickle_py33_np18.pkl,sha256=w9TLxpDTzp5TI6cU6lRvMsAasXEChcQgGE9s30sm_CU,691
joblib/test/data/joblib_0.9.2_pickle_py33_np18.pkl_01.npy,sha256=DvvX2c5-7DpuCg20HnleA5bMo9awN9rWxhtGSEPSiAk,120
joblib/test/data/joblib_0.9.2_pickle_py33_np18.pkl_02.npy,sha256=HBzzbLeB-8whuVO7CgtF3wktoOrg52WILlljzNcBBbE,120
joblib/test/data/joblib_0.9.2_pickle_py33_np18.pkl_03.npy,sha256=jt6aZKUrJdfbMJUJVsl47As5MrfRSs1avGMhbmS6vec,307
joblib/test/data/joblib_0.9.2_pickle_py33_np18.pkl_04.npy,sha256=PsviRClLqT4IR5sWwbmpQR41af9mDtBFncodJBOB3wU,104
joblib/test/data/joblib_0.9.2_pickle_py34_np19.pkl,sha256=ilOBAOaulLFvKrD32S1NfnpiK-LfzA9rC3O2I7xROuI,691
joblib/test/data/joblib_0.9.2_pickle_py34_np19.pkl_01.npy,sha256=DvvX2c5-7DpuCg20HnleA5bMo9awN9rWxhtGSEPSiAk,120
joblib/test/data/joblib_0.9.2_pickle_py34_np19.pkl_02.npy,sha256=HBzzbLeB-8whuVO7CgtF3wktoOrg52WILlljzNcBBbE,120
joblib/test/data/joblib_0.9.2_pickle_py34_np19.pkl_03.npy,sha256=jt6aZKUrJdfbMJUJVsl47As5MrfRSs1avGMhbmS6vec,307
joblib/test/data/joblib_0.9.2_pickle_py34_np19.pkl_04.npy,sha256=PsviRClLqT4IR5sWwbmpQR41af9mDtBFncodJBOB3wU,104
joblib/test/data/joblib_0.9.2_pickle_py35_np19.pkl,sha256=WfDVIqKcMzzh1gSAshIfzBoIpdLdZQuG79yYf5kfpOo,691
joblib/test/data/joblib_0.9.2_pickle_py35_np19.pkl_01.npy,sha256=DvvX2c5-7DpuCg20HnleA5bMo9awN9rWxhtGSEPSiAk,120
joblib/test/data/joblib_0.9.2_pickle_py35_np19.pkl_02.npy,sha256=HBzzbLeB-8whuVO7CgtF3wktoOrg52WILlljzNcBBbE,120
joblib/test/data/joblib_0.9.2_pickle_py35_np19.pkl_03.npy,sha256=jt6aZKUrJdfbMJUJVsl47As5MrfRSs1avGMhbmS6vec,307
joblib/test/data/joblib_0.9.2_pickle_py35_np19.pkl_04.npy,sha256=PsviRClLqT4IR5sWwbmpQR41af9mDtBFncodJBOB3wU,104
joblib/test/data/joblib_0.9.4.dev0_compressed_cache_size_pickle_py35_np19.gz,sha256=8jYfWJsx0oY2J-3LlmEigK5cClnJSW2J2rfeSTZw-Ts,802
joblib/test/data/joblib_0.9.4.dev0_compressed_cache_size_pickle_py35_np19.gz_01.npy.z,sha256=YT9VvT3sEl2uWlOyvH2CkyE9Sok4od9O3kWtgeuUUqE,43
joblib/test/data/joblib_0.9.4.dev0_compressed_cache_size_pickle_py35_np19.gz_02.npy.z,sha256=txA5RDI0PRuiU_UNKY8pGp-zQgQQ9vaVvMi60hOPaVs,43
joblib/test/data/joblib_0.9.4.dev0_compressed_cache_size_pickle_py35_np19.gz_03.npy.z,sha256=d3AwICvU2MpSNjh2aPIsdJeGZLlDjANAF1Soa6uM0Po,37
joblib/test/test_backports.py,sha256=Y9bhGa6H-K_FgLkDyXaSHzpaWk148Rjn8R9IKCKdy-k,1175
joblib/test/test_cloudpickle_wrapper.py,sha256=GNaOG8SygvHfWNdhV5nirM3uo4F-5OB3yuGlg7xtrSY,749
joblib/test/test_dask.py,sha256=LmHYrjNKa8v7I3hnEV1nmSzVoG6Ip5gOcBBrxlZDy0M,18004
joblib/test/test_deprecated_objects.py,sha256=r5Y_Bcomk-CXvwCDlNrIN7eqXytVeQ4h_sMPinkPkrE,1077
joblib/test/test_disk.py,sha256=wJd1o9nLzqEjLqxxkgB9S7-UcKjHPQ8qK5l0czcNp0o,2205
joblib/test/test_format_stack.py,sha256=wTtjRlp0edNv7_NzxZU6DAVJQoebL-lnGsUEMwVZXpM,4250
joblib/test/test_func_inspect.py,sha256=KDvLtRziyA_r6GiNEh05_zhlx9ntJW7abSuQ6cqJajU,8935
joblib/test/test_func_inspect_special_encoding.py,sha256=oHbMTPOK3XI0YVoS0GsouJ-GfM_neP4GOIJC-TKnNgU,146
joblib/test/test_hashing.py,sha256=bW7F8gLQwDpCOFRCAWlDx1iyGjQ3BDEYBol3vpEztaE,16078
joblib/test/test_init.py,sha256=bgNF-9CIJl1MFNA75LBWOaiNtvduVfuvglz_u9Tt8Uc,422
joblib/test/test_logger.py,sha256=WjxDzpRmdwj_Uyt2R-S3DFRT9CGTk7G2DWucU3sqbu8,985
joblib/test/test_memmapping.py,sha256=l7uIhxgbcLh1WepMdB2IBEamZ_RisbVK38BQtdGPB1o,42512
joblib/test/test_memory.py,sha256=sgoBhXWgZjWTb3GiV21RAn34ehpku5aaZjzWR6338Ow,43864
joblib/test/test_missing_multiprocessing.py,sha256=oeneMgi6iUVIbkfazGvXmOp6fqa7ok9uhj902Qjs_nk,1123
joblib/test/test_module.py,sha256=HTHQSgnwa-1blkvhL_oVBO5yYdw8IrQTIJCK_QCsMtM,1936
joblib/test/test_my_exceptions.py,sha256=4_1xlIwbgEt6_bqaQ8lGyTfUAV7RhLMRRbQKIyYOTA8,2066
joblib/test/test_numpy_pickle.py,sha256=dzt4F53-ANNEr0jWqCeppePIaHp7uh8vuCu7rx9-CtE,42357
joblib/test/test_numpy_pickle_compat.py,sha256=C5OiaFrqmxYD57fr_LpmItd6OOZPeOMfo9RVr6ZZIkk,624
joblib/test/test_numpy_pickle_utils.py,sha256=PJVVgr-v3so9oAf9LblASRCpt-wXAo19FvsUpw-fZjI,421
joblib/test/test_parallel.py,sha256=WAnr1vHXnEN8muI-txTOXZ2AMv8hWVOoTxKqCgputhg,61231
joblib/test/test_store_backends.py,sha256=fZh0_E5Rj5VTJ_UzH3autHpWwEaWQvWTiQB8felVAN4,1942
joblib/test/test_testing.py,sha256=I-EkdKHWdHu8m5fo2NnyB0AqR8zAOJ01WKKvyZYRneY,2467
joblib/test/test_utils.py,sha256=L6aBHiZrF6TUFbPGmQXGyNMnoAy5pEhOJoH5CAG_6CU,584
joblib/test/testutils.py,sha256=6a7zVJm1kg6M-t4CH9tz8A6rMdC6ZY9sU6wBB8C7Zzo,251
joblib/testing.py,sha256=JqQDWEkN76ZPmt65EswiERMofFre_JUFzBJgfmw2k2w,2260

View File

@ -0,0 +1,5 @@
Wheel-Version: 1.0
Generator: bdist_wheel (0.37.1)
Root-Is-Purelib: true
Tag: py3-none-any

View File

@ -0,0 +1,138 @@
"""Joblib is a set of tools to provide **lightweight pipelining in
Python**. In particular:
1. transparent disk-caching of functions and lazy re-evaluation
(memoize pattern)
2. easy simple parallel computing
Joblib is optimized to be **fast** and **robust** on large
data in particular and has specific optimizations for `numpy` arrays. It is
**BSD-licensed**.
==================== ===============================================
**Documentation:** https://joblib.readthedocs.io
**Download:** https://pypi.python.org/pypi/joblib#downloads
**Source code:** https://github.com/joblib/joblib
**Report issues:** https://github.com/joblib/joblib/issues
==================== ===============================================
Vision
--------
The vision is to provide tools to easily achieve better performance and
reproducibility when working with long running jobs.
* **Avoid computing the same thing twice**: code is often rerun again and
again, for instance when prototyping computational-heavy jobs (as in
scientific development), but hand-crafted solutions to alleviate this
issue are error-prone and often lead to unreproducible results.
* **Persist to disk transparently**: efficiently persisting
arbitrary objects containing large data is hard. Using
joblib's caching mechanism avoids hand-written persistence and
implicitly links the file on disk to the execution context of
the original Python object. As a result, joblib's persistence is
good for resuming an application status or computational job, eg
after a crash.
Joblib addresses these problems while **leaving your code and your flow
control as unmodified as possible** (no framework, no new paradigms).
Main features
------------------
1) **Transparent and fast disk-caching of output value:** a memoize or
make-like functionality for Python functions that works well for
arbitrary Python objects, including very large numpy arrays. Separate
persistence and flow-execution logic from domain logic or algorithmic
code by writing the operations as a set of steps with well-defined
inputs and outputs: Python functions. Joblib can save their
computation to disk and rerun it only if necessary::
>>> from joblib import Memory
>>> cachedir = 'your_cache_dir_goes_here'
>>> mem = Memory(cachedir)
>>> import numpy as np
>>> a = np.vander(np.arange(3)).astype(float)
>>> square = mem.cache(np.square)
>>> b = square(a) # doctest: +ELLIPSIS
________________________________________________________________________________
[Memory] Calling square...
square(array([[0., 0., 1.],
[1., 1., 1.],
[4., 2., 1.]]))
___________________________________________________________square - 0...s, 0.0min
>>> c = square(a)
>>> # The above call did not trigger an evaluation
2) **Embarrassingly parallel helper:** to make it easy to write readable
parallel code and debug it quickly::
>>> from joblib import Parallel, delayed
>>> from math import sqrt
>>> Parallel(n_jobs=1)(delayed(sqrt)(i**2) for i in range(10))
[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
3) **Fast compressed Persistence**: a replacement for pickle to work
efficiently on Python objects containing large data (
*joblib.dump* & *joblib.load* ).
..
>>> import shutil ; shutil.rmtree(cachedir)
"""
# PEP0440 compatible formatted version, see:
# https://www.python.org/dev/peps/pep-0440/
#
# Generic release markers:
# X.Y
# X.Y.Z # For bugfix releases
#
# Admissible pre-release markers:
# X.YaN # Alpha release
# X.YbN # Beta release
# X.YrcN # Release Candidate
# X.Y # Final release
#
# Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
#
__version__ = '1.2.0'
import os
from .memory import Memory, MemorizedResult, register_store_backend
from .logger import PrintTime
from .logger import Logger
from .hashing import hash
from .numpy_pickle import dump
from .numpy_pickle import load
from .compressor import register_compressor
from .parallel import Parallel
from .parallel import delayed
from .parallel import cpu_count
from .parallel import register_parallel_backend
from .parallel import parallel_backend
from .parallel import effective_n_jobs
from ._cloudpickle_wrapper import wrap_non_picklable_objects
__all__ = ['Memory', 'MemorizedResult', 'PrintTime', 'Logger', 'hash', 'dump',
'load', 'Parallel', 'delayed', 'cpu_count', 'effective_n_jobs',
'register_parallel_backend', 'parallel_backend',
'register_store_backend', 'register_compressor',
'wrap_non_picklable_objects']
# Workaround issue discovered in intel-openmp 2019.5:
# https://github.com/ContinuumIO/anaconda-issues/issues/11294
os.environ.setdefault("KMP_INIT_AT_FORK", "FALSE")

View File

@ -0,0 +1,17 @@
"""
Small shim of loky's cloudpickle_wrapper to avoid failure when
multiprocessing is not available.
"""
from ._multiprocessing_helpers import mp
def my_wrap_non_picklable_objects(obj, keep_wrapper=True):
return obj
if mp is None:
wrap_non_picklable_objects = my_wrap_non_picklable_objects
else:
from .externals.loky import wrap_non_picklable_objects # noqa

View File

@ -0,0 +1,368 @@
from __future__ import print_function, division, absolute_import
import asyncio
import concurrent.futures
import contextlib
import time
from uuid import uuid4
import weakref
from .parallel import AutoBatchingMixin, ParallelBackendBase, BatchedCalls
from .parallel import parallel_backend
try:
import dask
import distributed
except ImportError:
dask = None
distributed = None
if dask is not None and distributed is not None:
from dask.utils import funcname, itemgetter
from dask.sizeof import sizeof
from dask.distributed import (
Client,
as_completed,
get_client,
secede,
rejoin,
get_worker
)
from distributed.utils import thread_state
try:
# asyncio.TimeoutError, Python3-only error thrown by recent versions of
# distributed
from distributed.utils import TimeoutError as _TimeoutError
except ImportError:
from tornado.gen import TimeoutError as _TimeoutError
def is_weakrefable(obj):
try:
weakref.ref(obj)
return True
except TypeError:
return False
class _WeakKeyDictionary:
"""A variant of weakref.WeakKeyDictionary for unhashable objects.
This datastructure is used to store futures for broadcasted data objects
such as large numpy arrays or pandas dataframes that are not hashable and
therefore cannot be used as keys of traditional python dicts.
Furthermore using a dict with id(array) as key is not safe because the
Python is likely to reuse id of recently collected arrays.
"""
def __init__(self):
self._data = {}
def __getitem__(self, obj):
ref, val = self._data[id(obj)]
if ref() is not obj:
# In case of a race condition with on_destroy.
raise KeyError(obj)
return val
def __setitem__(self, obj, value):
key = id(obj)
try:
ref, _ = self._data[key]
if ref() is not obj:
# In case of race condition with on_destroy.
raise KeyError(obj)
except KeyError:
# Insert the new entry in the mapping along with a weakref
# callback to automatically delete the entry from the mapping
# as soon as the object used as key is garbage collected.
def on_destroy(_):
del self._data[key]
ref = weakref.ref(obj, on_destroy)
self._data[key] = ref, value
def __len__(self):
return len(self._data)
def clear(self):
self._data.clear()
def _funcname(x):
try:
if isinstance(x, list):
x = x[0][0]
except Exception:
pass
return funcname(x)
def _make_tasks_summary(tasks):
"""Summarize of list of (func, args, kwargs) function calls"""
unique_funcs = {func for func, args, kwargs in tasks}
if len(unique_funcs) == 1:
mixed = False
else:
mixed = True
return len(tasks), mixed, _funcname(tasks)
class Batch:
"""dask-compatible wrapper that executes a batch of tasks"""
def __init__(self, tasks):
# collect some metadata from the tasks to ease Batch calls
# introspection when debugging
self._num_tasks, self._mixed, self._funcname = _make_tasks_summary(
tasks
)
def __call__(self, tasks=None):
results = []
with parallel_backend('dask'):
for func, args, kwargs in tasks:
results.append(func(*args, **kwargs))
return results
def __repr__(self):
descr = f"batch_of_{self._funcname}_{self._num_tasks}_calls"
if self._mixed:
descr = "mixed_" + descr
return descr
def _joblib_probe_task():
# Noop used by the joblib connector to probe when workers are ready.
pass
class DaskDistributedBackend(AutoBatchingMixin, ParallelBackendBase):
MIN_IDEAL_BATCH_DURATION = 0.2
MAX_IDEAL_BATCH_DURATION = 1.0
supports_timeout = True
def __init__(self, scheduler_host=None, scatter=None,
client=None, loop=None, wait_for_workers_timeout=10,
**submit_kwargs):
super().__init__()
if distributed is None:
msg = ("You are trying to use 'dask' as a joblib parallel backend "
"but dask is not installed. Please install dask "
"to fix this error.")
raise ValueError(msg)
if client is None:
if scheduler_host:
client = Client(scheduler_host, loop=loop,
set_as_default=False)
else:
try:
client = get_client()
except ValueError as e:
msg = ("To use Joblib with Dask first create a Dask Client"
"\n\n"
" from dask.distributed import Client\n"
" client = Client()\n"
"or\n"
" client = Client('scheduler-address:8786')")
raise ValueError(msg) from e
self.client = client
if scatter is not None and not isinstance(scatter, (list, tuple)):
raise TypeError("scatter must be a list/tuple, got "
"`%s`" % type(scatter).__name__)
if scatter is not None and len(scatter) > 0:
# Keep a reference to the scattered data to keep the ids the same
self._scatter = list(scatter)
scattered = self.client.scatter(scatter, broadcast=True)
self.data_futures = {id(x): f for x, f in zip(scatter, scattered)}
else:
self._scatter = []
self.data_futures = {}
self.wait_for_workers_timeout = wait_for_workers_timeout
self.submit_kwargs = submit_kwargs
self.waiting_futures = as_completed(
[],
loop=client.loop,
with_results=True,
raise_errors=False
)
self._results = {}
self._callbacks = {}
async def _collect(self):
while self._continue:
async for future, result in self.waiting_futures:
cf_future = self._results.pop(future)
callback = self._callbacks.pop(future)
if future.status == "error":
typ, exc, tb = result
cf_future.set_exception(exc)
else:
cf_future.set_result(result)
callback(result)
await asyncio.sleep(0.01)
def __reduce__(self):
return (DaskDistributedBackend, ())
def get_nested_backend(self):
return DaskDistributedBackend(client=self.client), -1
def configure(self, n_jobs=1, parallel=None, **backend_args):
self.parallel = parallel
return self.effective_n_jobs(n_jobs)
def start_call(self):
self._continue = True
self.client.loop.add_callback(self._collect)
self.call_data_futures = _WeakKeyDictionary()
def stop_call(self):
# The explicit call to clear is required to break a cycling reference
# to the futures.
self._continue = False
# wait for the future collection routine (self._backend._collect) to
# finish in order to limit asyncio warnings due to aborting _collect
# during a following backend termination call
time.sleep(0.01)
self.call_data_futures.clear()
def effective_n_jobs(self, n_jobs):
effective_n_jobs = sum(self.client.ncores().values())
if effective_n_jobs != 0 or not self.wait_for_workers_timeout:
return effective_n_jobs
# If there is no worker, schedule a probe task to wait for the workers
# to come up and be available. If the dask cluster is in adaptive mode
# task might cause the cluster to provision some workers.
try:
self.client.submit(_joblib_probe_task).result(
timeout=self.wait_for_workers_timeout)
except _TimeoutError as e:
error_msg = (
"DaskDistributedBackend has no worker after {} seconds. "
"Make sure that workers are started and can properly connect "
"to the scheduler and increase the joblib/dask connection "
"timeout with:\n\n"
"parallel_backend('dask', wait_for_workers_timeout={})"
).format(self.wait_for_workers_timeout,
max(10, 2 * self.wait_for_workers_timeout))
raise TimeoutError(error_msg) from e
return sum(self.client.ncores().values())
async def _to_func_args(self, func):
itemgetters = dict()
# Futures that are dynamically generated during a single call to
# Parallel.__call__.
call_data_futures = getattr(self, 'call_data_futures', None)
async def maybe_to_futures(args):
out = []
for arg in args:
arg_id = id(arg)
if arg_id in itemgetters:
out.append(itemgetters[arg_id])
continue
f = self.data_futures.get(arg_id, None)
if f is None and call_data_futures is not None:
try:
f = await call_data_futures[arg]
except KeyError:
pass
if f is None:
if is_weakrefable(arg) and sizeof(arg) > 1e3:
# Automatically scatter large objects to some of
# the workers to avoid duplicated data transfers.
# Rely on automated inter-worker data stealing if
# more workers need to reuse this data
# concurrently.
# set hash=False - nested scatter calls (i.e
# calling client.scatter inside a dask worker)
# using hash=True often raise CancelledError,
# see dask/distributed#3703
_coro = self.client.scatter(
arg,
asynchronous=True,
hash=False
)
# Centralize the scattering of identical arguments
# between concurrent apply_async callbacks by
# exposing the running coroutine in
# call_data_futures before it completes.
t = asyncio.Task(_coro)
call_data_futures[arg] = t
f = await t
if f is not None:
out.append(f)
else:
out.append(arg)
return out
tasks = []
for f, args, kwargs in func.items:
args = list(await maybe_to_futures(args))
kwargs = dict(zip(kwargs.keys(),
await maybe_to_futures(kwargs.values())))
tasks.append((f, args, kwargs))
return (Batch(tasks), tasks)
def apply_async(self, func, callback=None):
cf_future = concurrent.futures.Future()
cf_future.get = cf_future.result # achieve AsyncResult API
async def f(func, callback):
batch, tasks = await self._to_func_args(func)
key = f'{repr(batch)}-{uuid4().hex}'
dask_future = self.client.submit(
batch, tasks=tasks, key=key, **self.submit_kwargs
)
self.waiting_futures.add(dask_future)
self._callbacks[dask_future] = callback
self._results[dask_future] = cf_future
self.client.loop.add_callback(f, func, callback)
return cf_future
def abort_everything(self, ensure_ready=True):
""" Tell the client to cancel any task submitted via this instance
joblib.Parallel will never access those results
"""
with self.waiting_futures.lock:
self.waiting_futures.futures.clear()
while not self.waiting_futures.queue.empty():
self.waiting_futures.queue.get()
@contextlib.contextmanager
def retrieval_context(self):
"""Override ParallelBackendBase.retrieval_context to avoid deadlocks.
This removes thread from the worker's thread pool (using 'secede').
Seceding avoids deadlock in nested parallelism settings.
"""
# See 'joblib.Parallel.__call__' and 'joblib.Parallel.retrieve' for how
# this is used.
if hasattr(thread_state, 'execution_state'):
# we are in a worker. Secede to avoid deadlock.
secede()
yield
if hasattr(thread_state, 'execution_state'):
rejoin()

View File

@ -0,0 +1,397 @@
"""
Represent an exception with a lot of information.
Provides 2 useful functions:
format_exc: format an exception into a complete traceback, with full
debugging instruction.
format_outer_frames: format the current position in the stack call.
Adapted from IPython's VerboseTB.
"""
# Authors: Gael Varoquaux < gael dot varoquaux at normalesup dot org >
# Nathaniel Gray <n8gray@caltech.edu>
# Fernando Perez <fperez@colorado.edu>
# Copyright: 2010, Gael Varoquaux
# 2001-2004, Fernando Perez
# 2001 Nathaniel Gray
# License: BSD 3 clause
# flake8: noqa
import inspect
import keyword
import linecache
import os
import pydoc
import sys
import time
import tokenize
import traceback
INDENT = ' ' * 8
###############################################################################
# some internal-use functions
def safe_repr(value):
"""Hopefully pretty robust repr equivalent."""
# this is pretty horrible but should always return *something*
try:
return pydoc.text.repr(value)
except KeyboardInterrupt:
raise
except:
try:
return repr(value)
except KeyboardInterrupt:
raise
except:
try:
# all still in an except block so we catch
# getattr raising
name = getattr(value, '__name__', None)
if name:
# ick, recursion
return safe_repr(name)
klass = getattr(value, '__class__', None)
if klass:
return '%s instance' % safe_repr(klass)
except KeyboardInterrupt:
raise
except:
return 'UNRECOVERABLE REPR FAILURE'
def eq_repr(value, repr=safe_repr):
return '=%s' % repr(value)
###############################################################################
def uniq_stable(elems):
"""uniq_stable(elems) -> list
Return from an iterable, a list of all the unique elements in the input,
but maintaining the order in which they first appear.
A naive solution to this problem which just makes a dictionary with the
elements as keys fails to respect the stability condition, since
dictionaries are unsorted by nature.
Note: All elements in the input must be hashable.
"""
unique = []
unique_set = set()
for nn in elems:
if nn not in unique_set:
unique.append(nn)
unique_set.add(nn)
return unique
###############################################################################
def fix_frame_records_filenames(records):
"""Try to fix the filenames in each record from inspect.getinnerframes().
Particularly, modules loaded from within zip files have useless filenames
attached to their code object, and inspect.getinnerframes() just uses it.
"""
fixed_records = []
for frame, filename, line_no, func_name, lines, index in records:
# Look inside the frame's globals dictionary for __file__, which should
# be better.
better_fn = frame.f_globals.get('__file__', None)
if isinstance(better_fn, str):
# Check the type just in case someone did something weird with
# __file__. It might also be None if the error occurred during
# import.
filename = better_fn
fixed_records.append((frame, filename, line_no, func_name, lines,
index))
return fixed_records
def _fixed_getframes(etb, context=1, tb_offset=0):
LNUM_POS, LINES_POS, INDEX_POS = 2, 4, 5
records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))
# If the error is at the console, don't build any context, since it would
# otherwise produce 5 blank lines printed out (there is no file at the
# console)
rec_check = records[tb_offset:]
try:
rname = rec_check[0][1]
if rname == '<ipython console>' or rname.endswith('<string>'):
return rec_check
except IndexError:
pass
aux = traceback.extract_tb(etb)
assert len(records) == len(aux)
for i, (file, lnum, _, _) in enumerate(aux):
maybe_start = lnum - 1 - context // 2
start = max(maybe_start, 0)
end = start + context
lines = linecache.getlines(file)[start:end]
buf = list(records[i])
buf[LNUM_POS] = lnum
buf[INDEX_POS] = lnum - 1 - start
buf[LINES_POS] = lines
records[i] = tuple(buf)
return records[tb_offset:]
def _format_traceback_lines(lnum, index, lines, lvals=None):
numbers_width = 7
res = []
i = lnum - index
for line in lines:
if i == lnum:
# This is the line with the error
pad = numbers_width - len(str(i))
if pad >= 3:
marker = '-' * (pad - 3) + '-> '
elif pad == 2:
marker = '> '
elif pad == 1:
marker = '>'
else:
marker = ''
num = marker + str(i)
else:
num = '%*s' % (numbers_width, i)
line = '%s %s' % (num, line)
res.append(line)
if lvals and i == lnum:
res.append(lvals + '\n')
i = i + 1
return res
def format_records(records): # , print_globals=False):
# Loop over all records printing context and info
frames = []
abspath = os.path.abspath
for frame, file, lnum, func, lines, index in records:
try:
file = file and abspath(file) or '?'
except OSError:
# if file is '<console>' or something not in the filesystem,
# the abspath call will throw an OSError. Just ignore it and
# keep the original file string.
pass
if file.endswith('.pyc'):
file = file[:-4] + '.py'
link = file
args, varargs, varkw, locals = inspect.getargvalues(frame)
if func == '?':
call = ''
else:
# Decide whether to include variable details or not
try:
call = 'in %s%s' % (func, inspect.formatargvalues(args,
varargs, varkw, locals,
formatvalue=eq_repr))
except KeyError:
# Very odd crash from inspect.formatargvalues(). The
# scenario under which it appeared was a call to
# view(array,scale) in NumTut.view.view(), where scale had
# been defined as a scalar (it should be a tuple). Somehow
# inspect messes up resolving the argument list of view()
# and barfs out. At some point I should dig into this one
# and file a bug report about it.
print("\nJoblib's exception reporting continues...\n")
call = 'in %s(***failed resolving arguments***)' % func
# Initialize a list of names on the current line, which the
# tokenizer below will populate.
names = []
def tokeneater(token_type, token, start, end, line):
"""Stateful tokeneater which builds dotted names.
The list of names it appends to (from the enclosing scope) can
contain repeated composite names. This is unavoidable, since
there is no way to disambiguate partial dotted structures until
the full list is known. The caller is responsible for pruning
the final list of duplicates before using it."""
# build composite names
if token == '.':
try:
names[-1] += '.'
# store state so the next token is added for x.y.z names
tokeneater.name_cont = True
return
except IndexError:
pass
if token_type == tokenize.NAME and token not in keyword.kwlist:
if tokeneater.name_cont:
# Dotted names
names[-1] += token
tokeneater.name_cont = False
else:
# Regular new names. We append everything, the caller
# will be responsible for pruning the list later. It's
# very tricky to try to prune as we go, b/c composite
# names can fool us. The pruning at the end is easy
# to do (or the caller can print a list with repeated
# names if so desired.
names.append(token)
elif token_type == tokenize.NEWLINE:
raise IndexError
# we need to store a bit of state in the tokenizer to build
# dotted names
tokeneater.name_cont = False
def linereader(file=file, lnum=[lnum], getline=linecache.getline):
line = getline(file, lnum[0])
lnum[0] += 1
return line
# Build the list of names on this line of code where the exception
# occurred.
try:
# This builds the names list in-place by capturing it from the
# enclosing scope.
for token in tokenize.generate_tokens(linereader):
tokeneater(*token)
except (IndexError, UnicodeDecodeError, SyntaxError):
# signals exit of tokenizer
# SyntaxError can happen when trying to tokenize
# a compiled (e.g. .so or .pyd) extension
pass
except tokenize.TokenError as msg:
_m = ("An unexpected error occurred while tokenizing input file %s\n"
"The following traceback may be corrupted or invalid\n"
"The error message is: %s\n" % (file, msg))
print(_m)
# prune names list of duplicates, but keep the right order
unique_names = uniq_stable(names)
# Start loop over vars
lvals = []
for name_full in unique_names:
name_base = name_full.split('.', 1)[0]
if name_base in frame.f_code.co_varnames:
if name_base in locals.keys():
try:
value = safe_repr(eval(name_full, locals))
except:
value = "undefined"
else:
value = "undefined"
name = name_full
lvals.append('%s = %s' % (name, value))
#elif print_globals:
# if frame.f_globals.has_key(name_base):
# try:
# value = safe_repr(eval(name_full,frame.f_globals))
# except:
# value = "undefined"
# else:
# value = "undefined"
# name = 'global %s' % name_full
# lvals.append('%s = %s' % (name,value))
if lvals:
lvals = '%s%s' % (INDENT, ('\n%s' % INDENT).join(lvals))
else:
lvals = ''
level = '%s\n%s %s\n' % (75 * '.', link, call)
if index is None:
frames.append(level)
else:
frames.append('%s%s' % (level, ''.join(
_format_traceback_lines(lnum, index, lines, lvals))))
return frames
###############################################################################
def format_exc(etype, evalue, etb, context=5, tb_offset=0):
""" Return a nice text document describing the traceback.
Parameters
-----------
etype, evalue, etb: as returned by sys.exc_info
context: number of lines of the source file to plot
tb_offset: the number of stack frame not to use (0 = use all)
"""
# some locals
try:
etype = etype.__name__
except AttributeError:
pass
# Header with the exception type, python version, and date
pyver = 'Python ' + sys.version.split()[0] + ': ' + sys.executable
date = time.ctime(time.time())
pid = 'PID: %i' % os.getpid()
head = '%s%s%s\n%s%s%s' % (
etype, ' ' * (75 - len(str(etype)) - len(date)),
date, pid, ' ' * (75 - len(str(pid)) - len(pyver)),
pyver)
# Drop topmost frames if requested
records = _fixed_getframes(etb, context, tb_offset)
# Get (safely) a string form of the exception info
try:
etype_str, evalue_str = map(str, (etype, evalue))
except BaseException:
# User exception is improperly defined.
etype, evalue = str, sys.exc_info()[:2]
etype_str, evalue_str = map(str, (etype, evalue))
# ... and format it
exception = ['%s: %s' % (etype_str, evalue_str)]
frames = format_records(records)
return '%s\n%s\n%s' % (head, '\n'.join(frames), ''.join(exception[0]))
###############################################################################
def format_outer_frames(context=5, stack_start=None, stack_end=None,
ignore_ipython=True):
LNUM_POS, LINES_POS, INDEX_POS = 2, 4, 5
records = inspect.getouterframes(inspect.currentframe())
output = list()
for i, (frame, filename, line_no, func_name, lines, index) \
in enumerate(records):
# Look inside the frame's globals dictionary for __file__, which should
# be better.
better_fn = frame.f_globals.get('__file__', None)
if isinstance(better_fn, str):
# Check the type just in case someone did something weird with
# __file__. It might also be None if the error occurred during
# import.
filename = better_fn
if filename.endswith('.pyc'):
filename = filename[:-4] + '.py'
if ignore_ipython:
# Hack to avoid printing the internals of IPython
if (os.path.basename(filename) in ('iplib.py', 'py3compat.py')
and func_name in ('execfile', 'safe_execfile', 'runcode')):
break
maybe_start = line_no - 1 - context // 2
start = max(maybe_start, 0)
end = start + context
lines = linecache.getlines(filename)[start:end]
buf = list(records[i])
buf[LNUM_POS] = line_no
buf[INDEX_POS] = line_no - 1 - start
buf[LINES_POS] = lines
output.append(tuple(buf))
return '\n'.join(format_records(output[stack_end:stack_start:-1]))

View File

@ -0,0 +1,115 @@
"""
Exceptions
This module is deprecated and will be removed in joblib 0.16.
"""
# Author: Gael Varoquaux < gael dot varoquaux at normalesup dot org >
# Copyright: 2010, Gael Varoquaux
# License: BSD 3 clause
class JoblibException(Exception):
"""A simple exception with an error message that you can get to."""
def __init__(self, *args):
# We need to implement __init__ so that it is picked in the
# multiple heritance hierarchy in the class created in
# _mk_exception. Note: in Python 2, if you implement __init__
# in your exception class you need to set .args correctly,
# otherwise you can dump an exception instance with pickle but
# not load it (at load time an empty .args will be passed to
# the constructor). Also we want to be explicit and not use
# 'super' here. Using 'super' can cause a sibling class method
# to be called and we have no control the sibling class method
# constructor signature in the exception returned by
# _mk_exception.
Exception.__init__(self, *args)
def __repr__(self):
if hasattr(self, 'args') and len(self.args) > 0:
message = self.args[0]
else:
message = ''
name = self.__class__.__name__
return '%s\n%s\n%s\n%s' % (name, 75 * '_', message, 75 * '_')
__str__ = __repr__
class TransportableException(JoblibException):
"""An exception containing all the info to wrap an original
exception and recreate it.
"""
def __init__(self, message, etype):
# The next line set the .args correctly. This is needed to
# make the exception loadable with pickle
JoblibException.__init__(self, message, etype)
self.message = message
self.etype = etype
def unwrap(self, context_message=""):
report = """\
%s
---------------------------------------------------------------------------
Joblib worker traceback:
---------------------------------------------------------------------------
%s""" % (context_message, self.message)
# Unwrap the exception to a JoblibException
exception_type = _mk_exception(self.etype)[0]
return exception_type(report)
_exception_mapping = dict()
def _mk_exception(exception, name=None):
if issubclass(exception, JoblibException):
# No need to wrap recursively JoblibException
return exception, exception.__name__
# Create an exception inheriting from both JoblibException
# and that exception
if name is None:
name = exception.__name__
this_name = 'Joblib%s' % name
if this_name in _exception_mapping:
# Avoid creating twice the same exception
this_exception = _exception_mapping[this_name]
else:
if exception is Exception:
# JoblibException is already a subclass of Exception. No
# need to use multiple inheritance
return JoblibException, this_name
try:
this_exception = type(
this_name, (JoblibException, exception), {})
_exception_mapping[this_name] = this_exception
except TypeError:
# This happens if "Cannot create a consistent method
# resolution order", e.g. because 'exception' is a
# subclass of JoblibException or 'exception' is not an
# acceptable base class
this_exception = JoblibException
return this_exception, this_name
def _mk_common_exceptions():
namespace = dict()
import builtins as _builtin_exceptions
common_exceptions = filter(
lambda x: x.endswith('Error'),
dir(_builtin_exceptions))
for name in common_exceptions:
obj = getattr(_builtin_exceptions, name)
if isinstance(obj, type) and issubclass(obj, BaseException):
this_obj, this_name = _mk_exception(obj, name=name)
namespace[this_name] = this_obj
return namespace
# Updating module locals so that the exceptions pickle right. AFAIK this
# works only at module-creation time
locals().update(_mk_common_exceptions())

View File

@ -0,0 +1,668 @@
"""
Reducer using memory mapping for numpy arrays
"""
# Author: Thomas Moreau <thomas.moreau.2010@gmail.com>
# Copyright: 2017, Thomas Moreau
# License: BSD 3 clause
from mmap import mmap
import errno
import os
import stat
import threading
import atexit
import tempfile
import time
import warnings
import weakref
from uuid import uuid4
from multiprocessing import util
from pickle import whichmodule, loads, dumps, HIGHEST_PROTOCOL, PicklingError
try:
WindowsError
except NameError:
WindowsError = type(None)
try:
import numpy as np
from numpy.lib.stride_tricks import as_strided
except ImportError:
np = None
from .numpy_pickle import dump, load, load_temporary_memmap
from .backports import make_memmap
from .disk import delete_folder
from .externals.loky.backend import resource_tracker
# Some system have a ramdisk mounted by default, we can use it instead of /tmp
# as the default folder to dump big arrays to share with subprocesses.
SYSTEM_SHARED_MEM_FS = '/dev/shm'
# Minimal number of bytes available on SYSTEM_SHARED_MEM_FS to consider using
# it as the default folder to dump big arrays to share with subprocesses.
SYSTEM_SHARED_MEM_FS_MIN_SIZE = int(2e9)
# Folder and file permissions to chmod temporary files generated by the
# memmapping pool. Only the owner of the Python process can access the
# temporary files and folder.
FOLDER_PERMISSIONS = stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR
FILE_PERMISSIONS = stat.S_IRUSR | stat.S_IWUSR
# Set used in joblib workers, referencing the filenames of temporary memmaps
# created by joblib to speed up data communication. In child processes, we add
# a finalizer to these memmaps that sends a maybe_unlink call to the
# resource_tracker, in order to free main memory as fast as possible.
JOBLIB_MMAPS = set()
def _log_and_unlink(filename):
from .externals.loky.backend.resource_tracker import _resource_tracker
util.debug(
"[FINALIZER CALL] object mapping to {} about to be deleted,"
" decrementing the refcount of the file (pid: {})".format(
os.path.basename(filename), os.getpid()))
_resource_tracker.maybe_unlink(filename, "file")
def add_maybe_unlink_finalizer(memmap):
util.debug(
"[FINALIZER ADD] adding finalizer to {} (id {}, filename {}, pid {})"
"".format(type(memmap), id(memmap), os.path.basename(memmap.filename),
os.getpid()))
weakref.finalize(memmap, _log_and_unlink, memmap.filename)
def unlink_file(filename):
"""Wrapper around os.unlink with a retry mechanism.
The retry mechanism has been implemented primarily to overcome a race
condition happening during the finalizer of a np.memmap: when a process
holding the last reference to a mmap-backed np.memmap/np.array is about to
delete this array (and close the reference), it sends a maybe_unlink
request to the resource_tracker. This request can be processed faster than
it takes for the last reference of the memmap to be closed, yielding (on
Windows) a PermissionError in the resource_tracker loop.
"""
NUM_RETRIES = 10
for retry_no in range(1, NUM_RETRIES + 1):
try:
os.unlink(filename)
break
except PermissionError:
util.debug(
'[ResourceTracker] tried to unlink {}, got '
'PermissionError'.format(filename)
)
if retry_no == NUM_RETRIES:
raise
else:
time.sleep(.2)
except FileNotFoundError:
# In case of a race condition when deleting the temporary folder,
# avoid noisy FileNotFoundError exception in the resource tracker.
pass
resource_tracker._CLEANUP_FUNCS['file'] = unlink_file
class _WeakArrayKeyMap:
"""A variant of weakref.WeakKeyDictionary for unhashable numpy arrays.
This datastructure will be used with numpy arrays as obj keys, therefore we
do not use the __get__ / __set__ methods to avoid any conflict with the
numpy fancy indexing syntax.
"""
def __init__(self):
self._data = {}
def get(self, obj):
ref, val = self._data[id(obj)]
if ref() is not obj:
# In case of race condition with on_destroy: could never be
# triggered by the joblib tests with CPython.
raise KeyError(obj)
return val
def set(self, obj, value):
key = id(obj)
try:
ref, _ = self._data[key]
if ref() is not obj:
# In case of race condition with on_destroy: could never be
# triggered by the joblib tests with CPython.
raise KeyError(obj)
except KeyError:
# Insert the new entry in the mapping along with a weakref
# callback to automatically delete the entry from the mapping
# as soon as the object used as key is garbage collected.
def on_destroy(_):
del self._data[key]
ref = weakref.ref(obj, on_destroy)
self._data[key] = ref, value
def __getstate__(self):
raise PicklingError("_WeakArrayKeyMap is not pickleable")
###############################################################################
# Support for efficient transient pickling of numpy data structures
def _get_backing_memmap(a):
"""Recursively look up the original np.memmap instance base if any."""
b = getattr(a, 'base', None)
if b is None:
# TODO: check scipy sparse datastructure if scipy is installed
# a nor its descendants do not have a memmap base
return None
elif isinstance(b, mmap):
# a is already a real memmap instance.
return a
else:
# Recursive exploration of the base ancestry
return _get_backing_memmap(b)
def _get_temp_dir(pool_folder_name, temp_folder=None):
"""Get the full path to a subfolder inside the temporary folder.
Parameters
----------
pool_folder_name : str
Sub-folder name used for the serialization of a pool instance.
temp_folder: str, optional
Folder to be used by the pool for memmapping large arrays
for sharing memory with worker processes. If None, this will try in
order:
- a folder pointed by the JOBLIB_TEMP_FOLDER environment
variable,
- /dev/shm if the folder exists and is writable: this is a
RAMdisk filesystem available by default on modern Linux
distributions,
- the default system temporary folder that can be
overridden with TMP, TMPDIR or TEMP environment
variables, typically /tmp under Unix operating systems.
Returns
-------
pool_folder : str
full path to the temporary folder
use_shared_mem : bool
whether the temporary folder is written to the system shared memory
folder or some other temporary folder.
"""
use_shared_mem = False
if temp_folder is None:
temp_folder = os.environ.get('JOBLIB_TEMP_FOLDER', None)
if temp_folder is None:
if os.path.exists(SYSTEM_SHARED_MEM_FS):
try:
shm_stats = os.statvfs(SYSTEM_SHARED_MEM_FS)
available_nbytes = shm_stats.f_bsize * shm_stats.f_bavail
if available_nbytes > SYSTEM_SHARED_MEM_FS_MIN_SIZE:
# Try to see if we have write access to the shared mem
# folder only if it is reasonably large (that is 2GB or
# more).
temp_folder = SYSTEM_SHARED_MEM_FS
pool_folder = os.path.join(temp_folder, pool_folder_name)
if not os.path.exists(pool_folder):
os.makedirs(pool_folder)
use_shared_mem = True
except (IOError, OSError):
# Missing rights in the /dev/shm partition, fallback to regular
# temp folder.
temp_folder = None
if temp_folder is None:
# Fallback to the default tmp folder, typically /tmp
temp_folder = tempfile.gettempdir()
temp_folder = os.path.abspath(os.path.expanduser(temp_folder))
pool_folder = os.path.join(temp_folder, pool_folder_name)
return pool_folder, use_shared_mem
def has_shareable_memory(a):
"""Return True if a is backed by some mmap buffer directly or not."""
return _get_backing_memmap(a) is not None
def _strided_from_memmap(filename, dtype, mode, offset, order, shape, strides,
total_buffer_len, unlink_on_gc_collect):
"""Reconstruct an array view on a memory mapped file."""
if mode == 'w+':
# Do not zero the original data when unpickling
mode = 'r+'
if strides is None:
# Simple, contiguous memmap
return make_memmap(
filename, dtype=dtype, shape=shape, mode=mode, offset=offset,
order=order, unlink_on_gc_collect=unlink_on_gc_collect
)
else:
# For non-contiguous data, memmap the total enclosing buffer and then
# extract the non-contiguous view with the stride-tricks API
base = make_memmap(
filename, dtype=dtype, shape=total_buffer_len, offset=offset,
mode=mode, order=order, unlink_on_gc_collect=unlink_on_gc_collect
)
return as_strided(base, shape=shape, strides=strides)
def _reduce_memmap_backed(a, m):
"""Pickling reduction for memmap backed arrays.
a is expected to be an instance of np.ndarray (or np.memmap)
m is expected to be an instance of np.memmap on the top of the ``base``
attribute ancestry of a. ``m.base`` should be the real python mmap object.
"""
# offset that comes from the striding differences between a and m
util.debug('[MEMMAP REDUCE] reducing a memmap-backed array '
'(shape, {}, pid: {})'.format(a.shape, os.getpid()))
a_start, a_end = np.byte_bounds(a)
m_start = np.byte_bounds(m)[0]
offset = a_start - m_start
# offset from the backing memmap
offset += m.offset
if m.flags['F_CONTIGUOUS']:
order = 'F'
else:
# The backing memmap buffer is necessarily contiguous hence C if not
# Fortran
order = 'C'
if a.flags['F_CONTIGUOUS'] or a.flags['C_CONTIGUOUS']:
# If the array is a contiguous view, no need to pass the strides
strides = None
total_buffer_len = None
else:
# Compute the total number of items to map from which the strided
# view will be extracted.
strides = a.strides
total_buffer_len = (a_end - a_start) // a.itemsize
return (_strided_from_memmap,
(m.filename, a.dtype, m.mode, offset, order, a.shape, strides,
total_buffer_len, False))
def reduce_array_memmap_backward(a):
"""reduce a np.array or a np.memmap from a child process"""
m = _get_backing_memmap(a)
if isinstance(m, np.memmap) and m.filename not in JOBLIB_MMAPS:
# if a is backed by a memmaped file, reconstruct a using the
# memmaped file.
return _reduce_memmap_backed(a, m)
else:
# a is either a regular (not memmap-backed) numpy array, or an array
# backed by a shared temporary file created by joblib. In the latter
# case, in order to limit the lifespan of these temporary files, we
# serialize the memmap as a regular numpy array, and decref the
# file backing the memmap (done implicitly in a previously registered
# finalizer, see ``unlink_on_gc_collect`` for more details)
return (
loads, (dumps(np.asarray(a), protocol=HIGHEST_PROTOCOL), )
)
class ArrayMemmapForwardReducer(object):
"""Reducer callable to dump large arrays to memmap files.
Parameters
----------
max_nbytes: int
Threshold to trigger memmapping of large arrays to files created
a folder.
temp_folder_resolver: callable
An callable in charge of resolving a temporary folder name where files
for backing memmapped arrays are created.
mmap_mode: 'r', 'r+' or 'c'
Mode for the created memmap datastructure. See the documentation of
numpy.memmap for more details. Note: 'w+' is coerced to 'r+'
automatically to avoid zeroing the data on unpickling.
verbose: int, optional, 0 by default
If verbose > 0, memmap creations are logged.
If verbose > 1, both memmap creations, reuse and array pickling are
logged.
prewarm: bool, optional, False by default.
Force a read on newly memmapped array to make sure that OS pre-cache it
memory. This can be useful to avoid concurrent disk access when the
same data array is passed to different worker processes.
"""
def __init__(self, max_nbytes, temp_folder_resolver, mmap_mode,
unlink_on_gc_collect, verbose=0, prewarm=True):
self._max_nbytes = max_nbytes
self._temp_folder_resolver = temp_folder_resolver
self._mmap_mode = mmap_mode
self.verbose = int(verbose)
if prewarm == "auto":
self._prewarm = not self._temp_folder.startswith(
SYSTEM_SHARED_MEM_FS
)
else:
self._prewarm = prewarm
self._prewarm = prewarm
self._memmaped_arrays = _WeakArrayKeyMap()
self._temporary_memmaped_filenames = set()
self._unlink_on_gc_collect = unlink_on_gc_collect
@property
def _temp_folder(self):
return self._temp_folder_resolver()
def __reduce__(self):
# The ArrayMemmapForwardReducer is passed to the children processes: it
# needs to be pickled but the _WeakArrayKeyMap need to be skipped as
# it's only guaranteed to be consistent with the parent process memory
# garbage collection.
# Although this reducer is pickled, it is not needed in its destination
# process (child processes), as we only use this reducer to send
# memmaps from the parent process to the children processes. For this
# reason, we can afford skipping the resolver, (which would otherwise
# be unpicklable), and pass it as None instead.
args = (self._max_nbytes, None, self._mmap_mode,
self._unlink_on_gc_collect)
kwargs = {
'verbose': self.verbose,
'prewarm': self._prewarm,
}
return ArrayMemmapForwardReducer, args, kwargs
def __call__(self, a):
m = _get_backing_memmap(a)
if m is not None and isinstance(m, np.memmap):
# a is already backed by a memmap file, let's reuse it directly
return _reduce_memmap_backed(a, m)
if (not a.dtype.hasobject and self._max_nbytes is not None and
a.nbytes > self._max_nbytes):
# check that the folder exists (lazily create the pool temp folder
# if required)
try:
os.makedirs(self._temp_folder)
os.chmod(self._temp_folder, FOLDER_PERMISSIONS)
except OSError as e:
if e.errno != errno.EEXIST:
raise e
try:
basename = self._memmaped_arrays.get(a)
except KeyError:
# Generate a new unique random filename. The process and thread
# ids are only useful for debugging purpose and to make it
# easier to cleanup orphaned files in case of hard process
# kill (e.g. by "kill -9" or segfault).
basename = "{}-{}-{}.pkl".format(
os.getpid(), id(threading.current_thread()), uuid4().hex)
self._memmaped_arrays.set(a, basename)
filename = os.path.join(self._temp_folder, basename)
# In case the same array with the same content is passed several
# times to the pool subprocess children, serialize it only once
is_new_memmap = filename not in self._temporary_memmaped_filenames
# add the memmap to the list of temporary memmaps created by joblib
self._temporary_memmaped_filenames.add(filename)
if self._unlink_on_gc_collect:
# Bump reference count of the memmap by 1 to account for
# shared usage of the memmap by a child process. The
# corresponding decref call will be executed upon calling
# resource_tracker.maybe_unlink, registered as a finalizer in
# the child.
# the incref/decref calls here are only possible when the child
# and the parent share the same resource_tracker. It is not the
# case for the multiprocessing backend, but it does not matter
# because unlinking a memmap from a child process is only
# useful to control the memory usage of long-lasting child
# processes, while the multiprocessing-based pools terminate
# their workers at the end of a map() call.
resource_tracker.register(filename, "file")
if is_new_memmap:
# Incref each temporary memmap created by joblib one extra
# time. This means that these memmaps will only be deleted
# once an extra maybe_unlink() is called, which is done once
# all the jobs have completed (or been canceled) in the
# Parallel._terminate_backend() method.
resource_tracker.register(filename, "file")
if not os.path.exists(filename):
util.debug(
"[ARRAY DUMP] Pickling new array (shape={}, dtype={}) "
"creating a new memmap at {}".format(
a.shape, a.dtype, filename))
for dumped_filename in dump(a, filename):
os.chmod(dumped_filename, FILE_PERMISSIONS)
if self._prewarm:
# Warm up the data by accessing it. This operation ensures
# that the disk access required to create the memmapping
# file are performed in the reducing process and avoids
# concurrent memmap creation in multiple children
# processes.
load(filename, mmap_mode=self._mmap_mode).max()
else:
util.debug(
"[ARRAY DUMP] Pickling known array (shape={}, dtype={}) "
"reusing memmap file: {}".format(
a.shape, a.dtype, os.path.basename(filename)))
# The worker process will use joblib.load to memmap the data
return (
(load_temporary_memmap, (filename, self._mmap_mode,
self._unlink_on_gc_collect))
)
else:
# do not convert a into memmap, let pickler do its usual copy with
# the default system pickler
util.debug(
'[ARRAY DUMP] Pickling array (NO MEMMAPPING) (shape={}, '
' dtype={}).'.format(a.shape, a.dtype))
return (loads, (dumps(a, protocol=HIGHEST_PROTOCOL),))
def get_memmapping_reducers(
forward_reducers=None, backward_reducers=None,
temp_folder_resolver=None, max_nbytes=1e6, mmap_mode='r', verbose=0,
prewarm=False, unlink_on_gc_collect=True, **kwargs):
"""Construct a pair of memmapping reducer linked to a tmpdir.
This function manage the creation and the clean up of the temporary folders
underlying the memory maps and should be use to get the reducers necessary
to construct joblib pool or executor.
"""
if forward_reducers is None:
forward_reducers = dict()
if backward_reducers is None:
backward_reducers = dict()
if np is not None:
# Register smart numpy.ndarray reducers that detects memmap backed
# arrays and that is also able to dump to memmap large in-memory
# arrays over the max_nbytes threshold
forward_reduce_ndarray = ArrayMemmapForwardReducer(
max_nbytes, temp_folder_resolver, mmap_mode, unlink_on_gc_collect,
verbose, prewarm=prewarm)
forward_reducers[np.ndarray] = forward_reduce_ndarray
forward_reducers[np.memmap] = forward_reduce_ndarray
# Communication from child process to the parent process always
# pickles in-memory numpy.ndarray without dumping them as memmap
# to avoid confusing the caller and make it tricky to collect the
# temporary folder
backward_reducers[np.ndarray] = reduce_array_memmap_backward
backward_reducers[np.memmap] = reduce_array_memmap_backward
return forward_reducers, backward_reducers
class TemporaryResourcesManager(object):
"""Stateful object able to manage temporary folder and pickles
It exposes:
- a per-context folder name resolving API that memmap-based reducers will
rely on to know where to pickle the temporary memmaps
- a temporary file/folder management API that internally uses the
resource_tracker.
"""
def __init__(self, temp_folder_root=None, context_id=None):
self._current_temp_folder = None
self._temp_folder_root = temp_folder_root
self._use_shared_mem = None
self._cached_temp_folders = dict()
self._id = uuid4().hex
self._finalizers = {}
if context_id is None:
# It would be safer to not assign a default context id (less silent
# bugs), but doing this while maintaining backward compatibility
# with the previous, context-unaware version get_memmaping_executor
# exposes exposes too many low-level details.
context_id = uuid4().hex
self.set_current_context(context_id)
def set_current_context(self, context_id):
self._current_context_id = context_id
self.register_new_context(context_id)
def register_new_context(self, context_id):
# Prepare a sub-folder name specific to a context (usually a unique id
# generated by each instance of the Parallel class). Do not create in
# advance to spare FS write access if no array is to be dumped).
if context_id in self._cached_temp_folders:
return
else:
# During its lifecycle, one Parallel object can have several
# executors associated to it (for instance, if a loky worker raises
# an exception, joblib shutdowns the executor and instantly
# recreates a new one before raising the error - see
# ``ensure_ready``. Because we don't want two executors tied to
# the same Parallel object (and thus the same context id) to
# register/use/delete the same folder, we also add an id specific
# to the current Manager (and thus specific to its associated
# executor) to the folder name.
new_folder_name = (
"joblib_memmapping_folder_{}_{}_{}".format(
os.getpid(), self._id, context_id)
)
new_folder_path, _ = _get_temp_dir(
new_folder_name, self._temp_folder_root
)
self.register_folder_finalizer(new_folder_path, context_id)
self._cached_temp_folders[context_id] = new_folder_path
def resolve_temp_folder_name(self):
"""Return a folder name specific to the currently activated context"""
return self._cached_temp_folders[self._current_context_id]
def _unregister_context(self, context_id=None):
if context_id is None:
for context_id in list(self._cached_temp_folders):
self._unregister_context(context_id)
else:
temp_folder = self._cached_temp_folders[context_id]
finalizer = self._finalizers[context_id]
resource_tracker.unregister(temp_folder, "folder")
atexit.unregister(finalizer)
self._cached_temp_folders.pop(context_id)
self._finalizers.pop(context_id)
# resource management API
def register_folder_finalizer(self, pool_subfolder, context_id):
# Register the garbage collector at program exit in case caller forgets
# to call terminate explicitly: note we do not pass any reference to
# ensure that this callback won't prevent garbage collection of
# parallel instance and related file handler resources such as POSIX
# semaphores and pipes
pool_module_name = whichmodule(delete_folder, 'delete_folder')
resource_tracker.register(pool_subfolder, "folder")
def _cleanup():
# In some cases the Python runtime seems to set delete_folder to
# None just before exiting when accessing the delete_folder
# function from the closure namespace. So instead we reimport
# the delete_folder function explicitly.
# https://github.com/joblib/joblib/issues/328
# We cannot just use from 'joblib.pool import delete_folder'
# because joblib should only use relative imports to allow
# easy vendoring.
delete_folder = __import__(
pool_module_name, fromlist=['delete_folder']).delete_folder
try:
delete_folder(pool_subfolder, allow_non_empty=True)
resource_tracker.unregister(pool_subfolder, "folder")
except OSError:
warnings.warn("Failed to delete temporary folder: {}"
.format(pool_subfolder))
self._finalizers[context_id] = atexit.register(_cleanup)
def _unlink_temporary_resources(self, context_id=None):
"""Unlink temporary resources created by a process-based pool"""
if context_id is None:
# iterate over a copy of the cache keys because
# unlink_temporary_resources further deletes an entry in this
# cache
for context_id in self._cached_temp_folders.copy():
self._unlink_temporary_resources(context_id)
else:
temp_folder = self._cached_temp_folders[context_id]
if os.path.exists(temp_folder):
for filename in os.listdir(temp_folder):
resource_tracker.maybe_unlink(
os.path.join(temp_folder, filename), "file"
)
self._try_delete_folder(
allow_non_empty=False, context_id=context_id
)
def _unregister_temporary_resources(self, context_id=None):
"""Unregister temporary resources created by a process-based pool"""
if context_id is None:
for context_id in self._cached_temp_folders:
self._unregister_temporary_resources(context_id)
else:
temp_folder = self._cached_temp_folders[context_id]
if os.path.exists(temp_folder):
for filename in os.listdir(temp_folder):
resource_tracker.unregister(
os.path.join(temp_folder, filename), "file"
)
def _try_delete_folder(self, allow_non_empty, context_id=None):
if context_id is None:
# ditto
for context_id in self._cached_temp_folders.copy():
self._try_delete_folder(
allow_non_empty=allow_non_empty, context_id=context_id
)
else:
temp_folder = self._cached_temp_folders[context_id]
try:
delete_folder(
temp_folder, allow_non_empty=allow_non_empty
)
# Now that this folder is deleted, we can forget about it
self._unregister_context(context_id)
except OSError:
# Temporary folder cannot be deleted right now. No need to
# handle it though, as this folder will be cleaned up by an
# atexit finalizer registered by the memmapping_reducer.
pass

View File

@ -0,0 +1,53 @@
"""Helper module to factorize the conditional multiprocessing import logic
We use a distinct module to simplify import statements and avoid introducing
circular dependencies (for instance for the assert_spawning name).
"""
import os
import warnings
# Obtain possible configuration from the environment, assuming 1 (on)
# by default, upon 0 set to None. Should instructively fail if some non
# 0/1 value is set.
mp = int(os.environ.get('JOBLIB_MULTIPROCESSING', 1)) or None
if mp:
try:
import multiprocessing as mp
import _multiprocessing # noqa
except ImportError:
mp = None
# 2nd stage: validate that locking is available on the system and
# issue a warning if not
if mp is not None:
try:
# try to create a named semaphore using SemLock to make sure they are
# available on this platform. We use the low level object
# _multiprocessing.SemLock to avoid spawning a resource tracker on
# Unix system or changing the default backend.
import tempfile
from _multiprocessing import SemLock
_rand = tempfile._RandomNameSequence()
for i in range(100):
try:
name = '/joblib-{}-{}' .format(
os.getpid(), next(_rand))
_sem = SemLock(0, 0, 1, name=name, unlink=True)
del _sem # cleanup
break
except FileExistsError as e: # pragma: no cover
if i >= 99:
raise FileExistsError(
'cannot find name for semaphore') from e
except (FileExistsError, AttributeError, ImportError, OSError) as e:
mp = None
warnings.warn('%s. joblib will operate in serial mode' % (e,))
# 3rd stage: backward compat for the assert_spawning helper
if mp is not None:
from multiprocessing.context import assert_spawning
else:
assert_spawning = None

View File

@ -0,0 +1,653 @@
"""
Backends for embarrassingly parallel code.
"""
import gc
import os
import warnings
import threading
import functools
import contextlib
from abc import ABCMeta, abstractmethod
from .my_exceptions import WorkerInterrupt
from ._multiprocessing_helpers import mp
if mp is not None:
from .pool import MemmappingPool
from multiprocessing.pool import ThreadPool
from .executor import get_memmapping_executor
# Compat between concurrent.futures and multiprocessing TimeoutError
from multiprocessing import TimeoutError
from concurrent.futures._base import TimeoutError as CfTimeoutError
from .externals.loky import process_executor, cpu_count
class ParallelBackendBase(metaclass=ABCMeta):
"""Helper abc which defines all methods a ParallelBackend must implement"""
supports_timeout = False
supports_inner_max_num_threads = False
nesting_level = None
def __init__(self, nesting_level=None, inner_max_num_threads=None,
**kwargs):
super().__init__(**kwargs)
self.nesting_level = nesting_level
self.inner_max_num_threads = inner_max_num_threads
MAX_NUM_THREADS_VARS = [
'OMP_NUM_THREADS', 'OPENBLAS_NUM_THREADS', 'MKL_NUM_THREADS',
'BLIS_NUM_THREADS', 'VECLIB_MAXIMUM_THREADS', 'NUMBA_NUM_THREADS',
'NUMEXPR_NUM_THREADS',
]
TBB_ENABLE_IPC_VAR = "ENABLE_IPC"
@abstractmethod
def effective_n_jobs(self, n_jobs):
"""Determine the number of jobs that can actually run in parallel
n_jobs is the number of workers requested by the callers. Passing
n_jobs=-1 means requesting all available workers for instance matching
the number of CPU cores on the worker host(s).
This method should return a guesstimate of the number of workers that
can actually perform work concurrently. The primary use case is to make
it possible for the caller to know in how many chunks to slice the
work.
In general working on larger data chunks is more efficient (less
scheduling overhead and better use of CPU cache prefetching heuristics)
as long as all the workers have enough work to do.
"""
@abstractmethod
def apply_async(self, func, callback=None):
"""Schedule a func to be run"""
def configure(self, n_jobs=1, parallel=None, prefer=None, require=None,
**backend_args):
"""Reconfigure the backend and return the number of workers.
This makes it possible to reuse an existing backend instance for
successive independent calls to Parallel with different parameters.
"""
self.parallel = parallel
return self.effective_n_jobs(n_jobs)
def start_call(self):
"""Call-back method called at the beginning of a Parallel call"""
def stop_call(self):
"""Call-back method called at the end of a Parallel call"""
def terminate(self):
"""Shutdown the workers and free the shared memory."""
def compute_batch_size(self):
"""Determine the optimal batch size"""
return 1
def batch_completed(self, batch_size, duration):
"""Callback indicate how long it took to run a batch"""
def get_exceptions(self):
"""List of exception types to be captured."""
return []
def abort_everything(self, ensure_ready=True):
"""Abort any running tasks
This is called when an exception has been raised when executing a tasks
and all the remaining tasks will be ignored and can therefore be
aborted to spare computation resources.
If ensure_ready is True, the backend should be left in an operating
state as future tasks might be re-submitted via that same backend
instance.
If ensure_ready is False, the implementer of this method can decide
to leave the backend in a closed / terminated state as no new task
are expected to be submitted to this backend.
Setting ensure_ready to False is an optimization that can be leveraged
when aborting tasks via killing processes from a local process pool
managed by the backend it-self: if we expect no new tasks, there is no
point in re-creating new workers.
"""
# Does nothing by default: to be overridden in subclasses when
# canceling tasks is possible.
pass
def get_nested_backend(self):
"""Backend instance to be used by nested Parallel calls.
By default a thread-based backend is used for the first level of
nesting. Beyond, switch to sequential backend to avoid spawning too
many threads on the host.
"""
nesting_level = getattr(self, 'nesting_level', 0) + 1
if nesting_level > 1:
return SequentialBackend(nesting_level=nesting_level), None
else:
return ThreadingBackend(nesting_level=nesting_level), None
@contextlib.contextmanager
def retrieval_context(self):
"""Context manager to manage an execution context.
Calls to Parallel.retrieve will be made inside this context.
By default, this does nothing. It may be useful for subclasses to
handle nested parallelism. In particular, it may be required to avoid
deadlocks if a backend manages a fixed number of workers, when those
workers may be asked to do nested Parallel calls. Without
'retrieval_context' this could lead to deadlock, as all the workers
managed by the backend may be "busy" waiting for the nested parallel
calls to finish, but the backend has no free workers to execute those
tasks.
"""
yield
def _prepare_worker_env(self, n_jobs):
"""Return environment variables limiting threadpools in external libs.
This function return a dict containing environment variables to pass
when creating a pool of process. These environment variables limit the
number of threads to `n_threads` for OpenMP, MKL, Accelerated and
OpenBLAS libraries in the child processes.
"""
explicit_n_threads = self.inner_max_num_threads
default_n_threads = str(max(cpu_count() // n_jobs, 1))
# Set the inner environment variables to self.inner_max_num_threads if
# it is given. Else, default to cpu_count // n_jobs unless the variable
# is already present in the parent process environment.
env = {}
for var in self.MAX_NUM_THREADS_VARS:
if explicit_n_threads is None:
var_value = os.environ.get(var, None)
if var_value is None:
var_value = default_n_threads
else:
var_value = str(explicit_n_threads)
env[var] = var_value
if self.TBB_ENABLE_IPC_VAR not in os.environ:
# To avoid over-subscription when using TBB, let the TBB schedulers
# use Inter Process Communication to coordinate:
env[self.TBB_ENABLE_IPC_VAR] = "1"
return env
@staticmethod
def in_main_thread():
return isinstance(threading.current_thread(), threading._MainThread)
class SequentialBackend(ParallelBackendBase):
"""A ParallelBackend which will execute all batches sequentially.
Does not use/create any threading objects, and hence has minimal
overhead. Used when n_jobs == 1.
"""
uses_threads = True
supports_sharedmem = True
def effective_n_jobs(self, n_jobs):
"""Determine the number of jobs which are going to run in parallel"""
if n_jobs == 0:
raise ValueError('n_jobs == 0 in Parallel has no meaning')
return 1
def apply_async(self, func, callback=None):
"""Schedule a func to be run"""
result = ImmediateResult(func)
if callback:
callback(result)
return result
def get_nested_backend(self):
# import is not top level to avoid cyclic import errors.
from .parallel import get_active_backend
# SequentialBackend should neither change the nesting level, the
# default backend or the number of jobs. Just return the current one.
return get_active_backend()
class PoolManagerMixin(object):
"""A helper class for managing pool of workers."""
_pool = None
def effective_n_jobs(self, n_jobs):
"""Determine the number of jobs which are going to run in parallel"""
if n_jobs == 0:
raise ValueError('n_jobs == 0 in Parallel has no meaning')
elif mp is None or n_jobs is None:
# multiprocessing is not available or disabled, fallback
# to sequential mode
return 1
elif n_jobs < 0:
n_jobs = max(cpu_count() + 1 + n_jobs, 1)
return n_jobs
def terminate(self):
"""Shutdown the process or thread pool"""
if self._pool is not None:
self._pool.close()
self._pool.terminate() # terminate does a join()
self._pool = None
def _get_pool(self):
"""Used by apply_async to make it possible to implement lazy init"""
return self._pool
def apply_async(self, func, callback=None):
"""Schedule a func to be run"""
return self._get_pool().apply_async(
SafeFunction(func), callback=callback)
def abort_everything(self, ensure_ready=True):
"""Shutdown the pool and restart a new one with the same parameters"""
self.terminate()
if ensure_ready:
self.configure(n_jobs=self.parallel.n_jobs, parallel=self.parallel,
**self.parallel._backend_args)
class AutoBatchingMixin(object):
"""A helper class for automagically batching jobs."""
# In seconds, should be big enough to hide multiprocessing dispatching
# overhead.
# This settings was found by running benchmarks/bench_auto_batching.py
# with various parameters on various platforms.
MIN_IDEAL_BATCH_DURATION = .2
# Should not be too high to avoid stragglers: long jobs running alone
# on a single worker while other workers have no work to process any more.
MAX_IDEAL_BATCH_DURATION = 2
# Batching counters default values
_DEFAULT_EFFECTIVE_BATCH_SIZE = 1
_DEFAULT_SMOOTHED_BATCH_DURATION = 0.0
def __init__(self, **kwargs):
super().__init__(**kwargs)
self._effective_batch_size = self._DEFAULT_EFFECTIVE_BATCH_SIZE
self._smoothed_batch_duration = self._DEFAULT_SMOOTHED_BATCH_DURATION
def compute_batch_size(self):
"""Determine the optimal batch size"""
old_batch_size = self._effective_batch_size
batch_duration = self._smoothed_batch_duration
if (batch_duration > 0 and
batch_duration < self.MIN_IDEAL_BATCH_DURATION):
# The current batch size is too small: the duration of the
# processing of a batch of task is not large enough to hide
# the scheduling overhead.
ideal_batch_size = int(old_batch_size *
self.MIN_IDEAL_BATCH_DURATION /
batch_duration)
# Multiply by two to limit oscilations between min and max.
ideal_batch_size *= 2
# dont increase the batch size too fast to limit huge batch sizes
# potentially leading to starving worker
batch_size = min(2 * old_batch_size, ideal_batch_size)
batch_size = max(batch_size, 1)
self._effective_batch_size = batch_size
if self.parallel.verbose >= 10:
self.parallel._print(
"Batch computation too fast (%.4fs.) "
"Setting batch_size=%d.", (batch_duration, batch_size))
elif (batch_duration > self.MAX_IDEAL_BATCH_DURATION and
old_batch_size >= 2):
# The current batch size is too big. If we schedule overly long
# running batches some CPUs might wait with nothing left to do
# while a couple of CPUs a left processing a few long running
# batches. Better reduce the batch size a bit to limit the
# likelihood of scheduling such stragglers.
# decrease the batch size quickly to limit potential starving
ideal_batch_size = int(
old_batch_size * self.MIN_IDEAL_BATCH_DURATION / batch_duration
)
# Multiply by two to limit oscilations between min and max.
batch_size = max(2 * ideal_batch_size, 1)
self._effective_batch_size = batch_size
if self.parallel.verbose >= 10:
self.parallel._print(
"Batch computation too slow (%.4fs.) "
"Setting batch_size=%d.", (batch_duration, batch_size))
else:
# No batch size adjustment
batch_size = old_batch_size
if batch_size != old_batch_size:
# Reset estimation of the smoothed mean batch duration: this
# estimate is updated in the multiprocessing apply_async
# CallBack as long as the batch_size is constant. Therefore
# we need to reset the estimate whenever we re-tune the batch
# size.
self._smoothed_batch_duration = \
self._DEFAULT_SMOOTHED_BATCH_DURATION
return batch_size
def batch_completed(self, batch_size, duration):
"""Callback indicate how long it took to run a batch"""
if batch_size == self._effective_batch_size:
# Update the smoothed streaming estimate of the duration of a batch
# from dispatch to completion
old_duration = self._smoothed_batch_duration
if old_duration == self._DEFAULT_SMOOTHED_BATCH_DURATION:
# First record of duration for this batch size after the last
# reset.
new_duration = duration
else:
# Update the exponentially weighted average of the duration of
# batch for the current effective size.
new_duration = 0.8 * old_duration + 0.2 * duration
self._smoothed_batch_duration = new_duration
def reset_batch_stats(self):
"""Reset batch statistics to default values.
This avoids interferences with future jobs.
"""
self._effective_batch_size = self._DEFAULT_EFFECTIVE_BATCH_SIZE
self._smoothed_batch_duration = self._DEFAULT_SMOOTHED_BATCH_DURATION
class ThreadingBackend(PoolManagerMixin, ParallelBackendBase):
"""A ParallelBackend which will use a thread pool to execute batches in.
This is a low-overhead backend but it suffers from the Python Global
Interpreter Lock if the called function relies a lot on Python objects.
Mostly useful when the execution bottleneck is a compiled extension that
explicitly releases the GIL (for instance a Cython loop wrapped in a "with
nogil" block or an expensive call to a library such as NumPy).
The actual thread pool is lazily initialized: the actual thread pool
construction is delayed to the first call to apply_async.
ThreadingBackend is used as the default backend for nested calls.
"""
supports_timeout = True
uses_threads = True
supports_sharedmem = True
def configure(self, n_jobs=1, parallel=None, **backend_args):
"""Build a process or thread pool and return the number of workers"""
n_jobs = self.effective_n_jobs(n_jobs)
if n_jobs == 1:
# Avoid unnecessary overhead and use sequential backend instead.
raise FallbackToBackend(
SequentialBackend(nesting_level=self.nesting_level))
self.parallel = parallel
self._n_jobs = n_jobs
return n_jobs
def _get_pool(self):
"""Lazily initialize the thread pool
The actual pool of worker threads is only initialized at the first
call to apply_async.
"""
if self._pool is None:
self._pool = ThreadPool(self._n_jobs)
return self._pool
class MultiprocessingBackend(PoolManagerMixin, AutoBatchingMixin,
ParallelBackendBase):
"""A ParallelBackend which will use a multiprocessing.Pool.
Will introduce some communication and memory overhead when exchanging
input and output data with the with the worker Python processes.
However, does not suffer from the Python Global Interpreter Lock.
"""
supports_timeout = True
def effective_n_jobs(self, n_jobs):
"""Determine the number of jobs which are going to run in parallel.
This also checks if we are attempting to create a nested parallel
loop.
"""
if mp is None:
return 1
if mp.current_process().daemon:
# Daemonic processes cannot have children
if n_jobs != 1:
if inside_dask_worker():
msg = (
"Inside a Dask worker with daemon=True, "
"setting n_jobs=1.\nPossible work-arounds:\n"
"- dask.config.set("
"{'distributed.worker.daemon': False})"
"- set the environment variable "
"DASK_DISTRIBUTED__WORKER__DAEMON=False\n"
"before creating your Dask cluster."
)
else:
msg = (
'Multiprocessing-backed parallel loops '
'cannot be nested, setting n_jobs=1'
)
warnings.warn(msg, stacklevel=3)
return 1
if process_executor._CURRENT_DEPTH > 0:
# Mixing loky and multiprocessing in nested loop is not supported
if n_jobs != 1:
warnings.warn(
'Multiprocessing-backed parallel loops cannot be nested,'
' below loky, setting n_jobs=1',
stacklevel=3)
return 1
elif not (self.in_main_thread() or self.nesting_level == 0):
# Prevent posix fork inside in non-main posix threads
if n_jobs != 1:
warnings.warn(
'Multiprocessing-backed parallel loops cannot be nested'
' below threads, setting n_jobs=1',
stacklevel=3)
return 1
return super(MultiprocessingBackend, self).effective_n_jobs(n_jobs)
def configure(self, n_jobs=1, parallel=None, prefer=None, require=None,
**memmappingpool_args):
"""Build a process or thread pool and return the number of workers"""
n_jobs = self.effective_n_jobs(n_jobs)
if n_jobs == 1:
raise FallbackToBackend(
SequentialBackend(nesting_level=self.nesting_level))
# Make sure to free as much memory as possible before forking
gc.collect()
self._pool = MemmappingPool(n_jobs, **memmappingpool_args)
self.parallel = parallel
return n_jobs
def terminate(self):
"""Shutdown the process or thread pool"""
super(MultiprocessingBackend, self).terminate()
self.reset_batch_stats()
class LokyBackend(AutoBatchingMixin, ParallelBackendBase):
"""Managing pool of workers with loky instead of multiprocessing."""
supports_timeout = True
supports_inner_max_num_threads = True
def configure(self, n_jobs=1, parallel=None, prefer=None, require=None,
idle_worker_timeout=300, **memmappingexecutor_args):
"""Build a process executor and return the number of workers"""
n_jobs = self.effective_n_jobs(n_jobs)
if n_jobs == 1:
raise FallbackToBackend(
SequentialBackend(nesting_level=self.nesting_level))
self._workers = get_memmapping_executor(
n_jobs, timeout=idle_worker_timeout,
env=self._prepare_worker_env(n_jobs=n_jobs),
context_id=parallel._id, **memmappingexecutor_args)
self.parallel = parallel
return n_jobs
def effective_n_jobs(self, n_jobs):
"""Determine the number of jobs which are going to run in parallel"""
if n_jobs == 0:
raise ValueError('n_jobs == 0 in Parallel has no meaning')
elif mp is None or n_jobs is None:
# multiprocessing is not available or disabled, fallback
# to sequential mode
return 1
elif mp.current_process().daemon:
# Daemonic processes cannot have children
if n_jobs != 1:
if inside_dask_worker():
msg = (
"Inside a Dask worker with daemon=True, "
"setting n_jobs=1.\nPossible work-arounds:\n"
"- dask.config.set("
"{'distributed.worker.daemon': False})\n"
"- set the environment variable "
"DASK_DISTRIBUTED__WORKER__DAEMON=False\n"
"before creating your Dask cluster."
)
else:
msg = (
'Loky-backed parallel loops cannot be called in a'
' multiprocessing, setting n_jobs=1'
)
warnings.warn(msg, stacklevel=3)
return 1
elif not (self.in_main_thread() or self.nesting_level == 0):
# Prevent posix fork inside in non-main posix threads
if n_jobs != 1:
warnings.warn(
'Loky-backed parallel loops cannot be nested below '
'threads, setting n_jobs=1',
stacklevel=3)
return 1
elif n_jobs < 0:
n_jobs = max(cpu_count() + 1 + n_jobs, 1)
return n_jobs
def apply_async(self, func, callback=None):
"""Schedule a func to be run"""
future = self._workers.submit(SafeFunction(func))
future.get = functools.partial(self.wrap_future_result, future)
if callback is not None:
future.add_done_callback(callback)
return future
@staticmethod
def wrap_future_result(future, timeout=None):
"""Wrapper for Future.result to implement the same behaviour as
AsyncResults.get from multiprocessing."""
try:
return future.result(timeout=timeout)
except CfTimeoutError as e:
raise TimeoutError from e
def terminate(self):
if self._workers is not None:
# Don't terminate the workers as we want to reuse them in later
# calls, but cleanup the temporary resources that the Parallel call
# created. This 'hack' requires a private, low-level operation.
self._workers._temp_folder_manager._unlink_temporary_resources(
context_id=self.parallel._id
)
self._workers = None
self.reset_batch_stats()
def abort_everything(self, ensure_ready=True):
"""Shutdown the workers and restart a new one with the same parameters
"""
self._workers.terminate(kill_workers=True)
self._workers = None
if ensure_ready:
self.configure(n_jobs=self.parallel.n_jobs, parallel=self.parallel)
class ImmediateResult(object):
def __init__(self, batch):
# Don't delay the application, to avoid keeping the input
# arguments in memory
self.results = batch()
def get(self):
return self.results
class SafeFunction(object):
"""Wrapper that handles the serialization of exception tracebacks.
TODO python2_drop: check whether SafeFunction is still needed since we
dropped support for Python 2. If not needed anymore it should be
deprecated.
If an exception is triggered when calling the inner function, a copy of
the full traceback is captured to make it possible to serialize
it so that it can be rendered in a different Python process.
"""
def __init__(self, func):
self.func = func
def __call__(self, *args, **kwargs):
try:
return self.func(*args, **kwargs)
except KeyboardInterrupt as e:
# We capture the KeyboardInterrupt and reraise it as
# something different, as multiprocessing does not
# interrupt processing for a KeyboardInterrupt
raise WorkerInterrupt() from e
except BaseException:
# Rely on Python 3 built-in Remote Traceback reporting
raise
class FallbackToBackend(Exception):
"""Raised when configuration should fallback to another backend"""
def __init__(self, backend):
self.backend = backend
def inside_dask_worker():
"""Check whether the current function is executed inside a Dask worker.
"""
# This function can not be in joblib._dask because there would be a
# circular import:
# _dask imports _parallel_backend that imports _dask ...
try:
from distributed import get_worker
except ImportError:
return False
try:
get_worker()
return True
except ValueError:
return False

View File

@ -0,0 +1,414 @@
"""Storage providers backends for Memory caching."""
import re
import os
import os.path
import datetime
import json
import shutil
import warnings
import collections
import operator
import threading
from abc import ABCMeta, abstractmethod
from .backports import concurrency_safe_rename
from .disk import mkdirp, memstr_to_bytes, rm_subdirs
from . import numpy_pickle
CacheItemInfo = collections.namedtuple('CacheItemInfo',
'path size last_access')
def concurrency_safe_write(object_to_write, filename, write_func):
"""Writes an object into a unique file in a concurrency-safe way."""
thread_id = id(threading.current_thread())
temporary_filename = '{}.thread-{}-pid-{}'.format(
filename, thread_id, os.getpid())
write_func(object_to_write, temporary_filename)
return temporary_filename
class StoreBackendBase(metaclass=ABCMeta):
"""Helper Abstract Base Class which defines all methods that
a StorageBackend must implement."""
location = None
@abstractmethod
def _open_item(self, f, mode):
"""Opens an item on the store and return a file-like object.
This method is private and only used by the StoreBackendMixin object.
Parameters
----------
f: a file-like object
The file-like object where an item is stored and retrieved
mode: string, optional
the mode in which the file-like object is opened allowed valued are
'rb', 'wb'
Returns
-------
a file-like object
"""
@abstractmethod
def _item_exists(self, location):
"""Checks if an item location exists in the store.
This method is private and only used by the StoreBackendMixin object.
Parameters
----------
location: string
The location of an item. On a filesystem, this corresponds to the
absolute path, including the filename, of a file.
Returns
-------
True if the item exists, False otherwise
"""
@abstractmethod
def _move_item(self, src, dst):
"""Moves an item from src to dst in the store.
This method is private and only used by the StoreBackendMixin object.
Parameters
----------
src: string
The source location of an item
dst: string
The destination location of an item
"""
@abstractmethod
def create_location(self, location):
"""Creates a location on the store.
Parameters
----------
location: string
The location in the store. On a filesystem, this corresponds to a
directory.
"""
@abstractmethod
def clear_location(self, location):
"""Clears a location on the store.
Parameters
----------
location: string
The location in the store. On a filesystem, this corresponds to a
directory or a filename absolute path
"""
@abstractmethod
def get_items(self):
"""Returns the whole list of items available in the store.
Returns
-------
The list of items identified by their ids (e.g filename in a
filesystem).
"""
@abstractmethod
def configure(self, location, verbose=0, backend_options=dict()):
"""Configures the store.
Parameters
----------
location: string
The base location used by the store. On a filesystem, this
corresponds to a directory.
verbose: int
The level of verbosity of the store
backend_options: dict
Contains a dictionary of named parameters used to configure the
store backend.
"""
class StoreBackendMixin(object):
"""Class providing all logic for managing the store in a generic way.
The StoreBackend subclass has to implement 3 methods: create_location,
clear_location and configure. The StoreBackend also has to provide
a private _open_item, _item_exists and _move_item methods. The _open_item
method has to have the same signature as the builtin open and return a
file-like object.
"""
def load_item(self, path, verbose=1, msg=None):
"""Load an item from the store given its path as a list of
strings."""
full_path = os.path.join(self.location, *path)
if verbose > 1:
if verbose < 10:
print('{0}...'.format(msg))
else:
print('{0} from {1}'.format(msg, full_path))
mmap_mode = (None if not hasattr(self, 'mmap_mode')
else self.mmap_mode)
filename = os.path.join(full_path, 'output.pkl')
if not self._item_exists(filename):
raise KeyError("Non-existing item (may have been "
"cleared).\nFile %s does not exist" % filename)
# file-like object cannot be used when mmap_mode is set
if mmap_mode is None:
with self._open_item(filename, "rb") as f:
item = numpy_pickle.load(f)
else:
item = numpy_pickle.load(filename, mmap_mode=mmap_mode)
return item
def dump_item(self, path, item, verbose=1):
"""Dump an item in the store at the path given as a list of
strings."""
try:
item_path = os.path.join(self.location, *path)
if not self._item_exists(item_path):
self.create_location(item_path)
filename = os.path.join(item_path, 'output.pkl')
if verbose > 10:
print('Persisting in %s' % item_path)
def write_func(to_write, dest_filename):
with self._open_item(dest_filename, "wb") as f:
numpy_pickle.dump(to_write, f,
compress=self.compress)
self._concurrency_safe_write(item, filename, write_func)
except: # noqa: E722
" Race condition in the creation of the directory "
def clear_item(self, path):
"""Clear the item at the path, given as a list of strings."""
item_path = os.path.join(self.location, *path)
if self._item_exists(item_path):
self.clear_location(item_path)
def contains_item(self, path):
"""Check if there is an item at the path, given as a list of
strings"""
item_path = os.path.join(self.location, *path)
filename = os.path.join(item_path, 'output.pkl')
return self._item_exists(filename)
def get_item_info(self, path):
"""Return information about item."""
return {'location': os.path.join(self.location,
*path)}
def get_metadata(self, path):
"""Return actual metadata of an item."""
try:
item_path = os.path.join(self.location, *path)
filename = os.path.join(item_path, 'metadata.json')
with self._open_item(filename, 'rb') as f:
return json.loads(f.read().decode('utf-8'))
except: # noqa: E722
return {}
def store_metadata(self, path, metadata):
"""Store metadata of a computation."""
try:
item_path = os.path.join(self.location, *path)
self.create_location(item_path)
filename = os.path.join(item_path, 'metadata.json')
def write_func(to_write, dest_filename):
with self._open_item(dest_filename, "wb") as f:
f.write(json.dumps(to_write).encode('utf-8'))
self._concurrency_safe_write(metadata, filename, write_func)
except: # noqa: E722
pass
def contains_path(self, path):
"""Check cached function is available in store."""
func_path = os.path.join(self.location, *path)
return self.object_exists(func_path)
def clear_path(self, path):
"""Clear all items with a common path in the store."""
func_path = os.path.join(self.location, *path)
if self._item_exists(func_path):
self.clear_location(func_path)
def store_cached_func_code(self, path, func_code=None):
"""Store the code of the cached function."""
func_path = os.path.join(self.location, *path)
if not self._item_exists(func_path):
self.create_location(func_path)
if func_code is not None:
filename = os.path.join(func_path, "func_code.py")
with self._open_item(filename, 'wb') as f:
f.write(func_code.encode('utf-8'))
def get_cached_func_code(self, path):
"""Store the code of the cached function."""
path += ['func_code.py', ]
filename = os.path.join(self.location, *path)
try:
with self._open_item(filename, 'rb') as f:
return f.read().decode('utf-8')
except: # noqa: E722
raise
def get_cached_func_info(self, path):
"""Return information related to the cached function if it exists."""
return {'location': os.path.join(self.location, *path)}
def clear(self):
"""Clear the whole store content."""
self.clear_location(self.location)
def reduce_store_size(self, bytes_limit):
"""Reduce store size to keep it under the given bytes limit."""
items_to_delete = self._get_items_to_delete(bytes_limit)
for item in items_to_delete:
if self.verbose > 10:
print('Deleting item {0}'.format(item))
try:
self.clear_location(item.path)
except OSError:
# Even with ignore_errors=True shutil.rmtree can raise OSError
# with:
# [Errno 116] Stale file handle if another process has deleted
# the folder already.
pass
def _get_items_to_delete(self, bytes_limit):
"""Get items to delete to keep the store under a size limit."""
if isinstance(bytes_limit, str):
bytes_limit = memstr_to_bytes(bytes_limit)
items = self.get_items()
size = sum(item.size for item in items)
to_delete_size = size - bytes_limit
if to_delete_size < 0:
return []
# We want to delete first the cache items that were accessed a
# long time ago
items.sort(key=operator.attrgetter('last_access'))
items_to_delete = []
size_so_far = 0
for item in items:
if size_so_far > to_delete_size:
break
items_to_delete.append(item)
size_so_far += item.size
return items_to_delete
def _concurrency_safe_write(self, to_write, filename, write_func):
"""Writes an object into a file in a concurrency-safe way."""
temporary_filename = concurrency_safe_write(to_write,
filename, write_func)
self._move_item(temporary_filename, filename)
def __repr__(self):
"""Printable representation of the store location."""
return '{class_name}(location="{location}")'.format(
class_name=self.__class__.__name__, location=self.location)
class FileSystemStoreBackend(StoreBackendBase, StoreBackendMixin):
"""A StoreBackend used with local or network file systems."""
_open_item = staticmethod(open)
_item_exists = staticmethod(os.path.exists)
_move_item = staticmethod(concurrency_safe_rename)
def clear_location(self, location):
"""Delete location on store."""
if (location == self.location):
rm_subdirs(location)
else:
shutil.rmtree(location, ignore_errors=True)
def create_location(self, location):
"""Create object location on store"""
mkdirp(location)
def get_items(self):
"""Returns the whole list of items available in the store."""
items = []
for dirpath, _, filenames in os.walk(self.location):
is_cache_hash_dir = re.match('[a-f0-9]{32}',
os.path.basename(dirpath))
if is_cache_hash_dir:
output_filename = os.path.join(dirpath, 'output.pkl')
try:
last_access = os.path.getatime(output_filename)
except OSError:
try:
last_access = os.path.getatime(dirpath)
except OSError:
# The directory has already been deleted
continue
last_access = datetime.datetime.fromtimestamp(last_access)
try:
full_filenames = [os.path.join(dirpath, fn)
for fn in filenames]
dirsize = sum(os.path.getsize(fn)
for fn in full_filenames)
except OSError:
# Either output_filename or one of the files in
# dirpath does not exist any more. We assume this
# directory is being cleaned by another process already
continue
items.append(CacheItemInfo(dirpath, dirsize,
last_access))
return items
def configure(self, location, verbose=1, backend_options=None):
"""Configure the store backend.
For this backend, valid store options are 'compress' and 'mmap_mode'
"""
if backend_options is None:
backend_options = {}
# setup location directory
self.location = location
if not os.path.exists(self.location):
mkdirp(self.location)
# item can be stored compressed for faster I/O
self.compress = backend_options.get('compress', False)
# FileSystemStoreBackend can be used with mmap_mode options under
# certain conditions.
mmap_mode = backend_options.get('mmap_mode')
if self.compress and mmap_mode is not None:
warnings.warn('Compressed items cannot be memmapped in a '
'filesystem store. Option will be ignored.',
stacklevel=2)
self.mmap_mode = mmap_mode
self.verbose = verbose

View File

@ -0,0 +1,44 @@
# Adapted from https://stackoverflow.com/a/9558001/2536294
import ast
import operator as op
# supported operators
operators = {
ast.Add: op.add,
ast.Sub: op.sub,
ast.Mult: op.mul,
ast.Div: op.truediv,
ast.FloorDiv: op.floordiv,
ast.Mod: op.mod,
ast.Pow: op.pow,
ast.USub: op.neg,
}
def eval_expr(expr):
"""
>>> eval_expr('2*6')
12
>>> eval_expr('2**6')
64
>>> eval_expr('1 + 2*3**(4) / (6 + -7)')
-161.0
"""
try:
return eval_(ast.parse(expr, mode="eval").body)
except (TypeError, SyntaxError, KeyError) as e:
raise ValueError(
f"{expr!r} is not a valid or supported arithmetic expression."
) from e
def eval_(node):
if isinstance(node, ast.Num): # <number>
return node.n
elif isinstance(node, ast.BinOp): # <left> <operator> <right>
return operators[type(node.op)](eval_(node.left), eval_(node.right))
elif isinstance(node, ast.UnaryOp): # <operator> <operand> e.g., -1
return operators[type(node.op)](eval_(node.operand))
else:
raise TypeError(node)

View File

@ -0,0 +1,190 @@
"""
Backports of fixes for joblib dependencies
"""
import os
import re
import time
from os.path import basename
from multiprocessing import util
# Prior to joblib 1.2, joblib used to import LooseVersion from
# distutils.version. This import had a side-effect with setuptools that was
# implicitly required in sklearn.show_versions() to work without raising an
# exception for scikit-learn 1.0 and earlier. This has been fixed in
# scikit-learn 1.1 (not yet released at the time of writing), see:
# https://github.com/scikit-learn/scikit-learn/issues/22614
#
# To avoid unnecessary disruption for users who might update to joblib 1.2
# prior to a release of scikit-learn that includes the fix, let's keep on
# importing distutils here. TODO: Remove this for a future release of joblib,
# e.g. 6 months after the release of scikit-learn 1.1.
import distutils # noqa
class Version:
"""Backport from deprecated distutils
We maintain this backport to avoid introducing a new dependency on
`packaging`.
We might rexplore this choice in the future if all major Python projects
introduce a dependency on packaging anyway.
"""
def __init__(self, vstring=None):
if vstring:
self.parse(vstring)
def __repr__(self):
return "%s ('%s')" % (self.__class__.__name__, str(self))
def __eq__(self, other):
c = self._cmp(other)
if c is NotImplemented:
return c
return c == 0
def __lt__(self, other):
c = self._cmp(other)
if c is NotImplemented:
return c
return c < 0
def __le__(self, other):
c = self._cmp(other)
if c is NotImplemented:
return c
return c <= 0
def __gt__(self, other):
c = self._cmp(other)
if c is NotImplemented:
return c
return c > 0
def __ge__(self, other):
c = self._cmp(other)
if c is NotImplemented:
return c
return c >= 0
class LooseVersion(Version):
"""Backport from deprecated distutils
We maintain this backport to avoid introducing a new dependency on
`packaging`.
We might rexplore this choice in the future if all major Python projects
introduce a dependency on packaging anyway.
"""
component_re = re.compile(r'(\d+ | [a-z]+ | \.)', re.VERBOSE)
def __init__(self, vstring=None):
if vstring:
self.parse(vstring)
def parse(self, vstring):
# I've given up on thinking I can reconstruct the version string
# from the parsed tuple -- so I just store the string here for
# use by __str__
self.vstring = vstring
components = [x for x in self.component_re.split(vstring)
if x and x != '.']
for i, obj in enumerate(components):
try:
components[i] = int(obj)
except ValueError:
pass
self.version = components
def __str__(self):
return self.vstring
def __repr__(self):
return "LooseVersion ('%s')" % str(self)
def _cmp(self, other):
if isinstance(other, str):
other = LooseVersion(other)
elif not isinstance(other, LooseVersion):
return NotImplemented
if self.version == other.version:
return 0
if self.version < other.version:
return -1
if self.version > other.version:
return 1
try:
import numpy as np
def make_memmap(filename, dtype='uint8', mode='r+', offset=0,
shape=None, order='C', unlink_on_gc_collect=False):
"""Custom memmap constructor compatible with numpy.memmap.
This function:
- is a backport the numpy memmap offset fix (See
https://github.com/numpy/numpy/pull/8443 for more details.
The numpy fix is available starting numpy 1.13)
- adds ``unlink_on_gc_collect``, which specifies explicitly whether
the process re-constructing the memmap owns a reference to the
underlying file. If set to True, it adds a finalizer to the
newly-created memmap that sends a maybe_unlink request for the
memmaped file to resource_tracker.
"""
util.debug(
"[MEMMAP READ] creating a memmap (shape {}, filename {}, "
"pid {})".format(shape, basename(filename), os.getpid())
)
mm = np.memmap(filename, dtype=dtype, mode=mode, offset=offset,
shape=shape, order=order)
if LooseVersion(np.__version__) < '1.13':
mm.offset = offset
if unlink_on_gc_collect:
from ._memmapping_reducer import add_maybe_unlink_finalizer
add_maybe_unlink_finalizer(mm)
return mm
except ImportError:
def make_memmap(filename, dtype='uint8', mode='r+', offset=0,
shape=None, order='C', unlink_on_gc_collect=False):
raise NotImplementedError(
"'joblib.backports.make_memmap' should not be used "
'if numpy is not installed.')
if os.name == 'nt':
# https://github.com/joblib/joblib/issues/540
access_denied_errors = (5, 13)
from os import replace
def concurrency_safe_rename(src, dst):
"""Renames ``src`` into ``dst`` overwriting ``dst`` if it exists.
On Windows os.replace can yield permission errors if executed by two
different processes.
"""
max_sleep_time = 1
total_sleep_time = 0
sleep_time = 0.001
while total_sleep_time < max_sleep_time:
try:
replace(src, dst)
break
except Exception as exc:
if getattr(exc, 'winerror', None) in access_denied_errors:
time.sleep(sleep_time)
total_sleep_time += sleep_time
sleep_time *= 2
else:
raise
else:
raise
else:
from os import replace as concurrency_safe_rename # noqa

View File

@ -0,0 +1,570 @@
"""Classes and functions for managing compressors."""
import io
import zlib
from joblib.backports import LooseVersion
try:
from threading import RLock
except ImportError:
from dummy_threading import RLock
try:
import bz2
except ImportError:
bz2 = None
try:
import lz4
from lz4.frame import LZ4FrameFile
except ImportError:
lz4 = None
try:
import lzma
except ImportError:
lzma = None
LZ4_NOT_INSTALLED_ERROR = ('LZ4 is not installed. Install it with pip: '
'https://python-lz4.readthedocs.io/')
# Registered compressors
_COMPRESSORS = {}
# Magic numbers of supported compression file formats.
_ZFILE_PREFIX = b'ZF' # used with pickle files created before 0.9.3.
_ZLIB_PREFIX = b'\x78'
_GZIP_PREFIX = b'\x1f\x8b'
_BZ2_PREFIX = b'BZ'
_XZ_PREFIX = b'\xfd\x37\x7a\x58\x5a'
_LZMA_PREFIX = b'\x5d\x00'
_LZ4_PREFIX = b'\x04\x22\x4D\x18'
def register_compressor(compressor_name, compressor,
force=False):
"""Register a new compressor.
Parameters
-----------
compressor_name: str.
The name of the compressor.
compressor: CompressorWrapper
An instance of a 'CompressorWrapper'.
"""
global _COMPRESSORS
if not isinstance(compressor_name, str):
raise ValueError("Compressor name should be a string, "
"'{}' given.".format(compressor_name))
if not isinstance(compressor, CompressorWrapper):
raise ValueError("Compressor should implement the CompressorWrapper "
"interface, '{}' given.".format(compressor))
if (compressor.fileobj_factory is not None and
(not hasattr(compressor.fileobj_factory, 'read') or
not hasattr(compressor.fileobj_factory, 'write') or
not hasattr(compressor.fileobj_factory, 'seek') or
not hasattr(compressor.fileobj_factory, 'tell'))):
raise ValueError("Compressor 'fileobj_factory' attribute should "
"implement the file object interface, '{}' given."
.format(compressor.fileobj_factory))
if compressor_name in _COMPRESSORS and not force:
raise ValueError("Compressor '{}' already registered."
.format(compressor_name))
_COMPRESSORS[compressor_name] = compressor
class CompressorWrapper():
"""A wrapper around a compressor file object.
Attributes
----------
obj: a file-like object
The object must implement the buffer interface and will be used
internally to compress/decompress the data.
prefix: bytestring
A bytestring corresponding to the magic number that identifies the
file format associated to the compressor.
extension: str
The file extension used to automatically select this compressor during
a dump to a file.
"""
def __init__(self, obj, prefix=b'', extension=''):
self.fileobj_factory = obj
self.prefix = prefix
self.extension = extension
def compressor_file(self, fileobj, compresslevel=None):
"""Returns an instance of a compressor file object."""
if compresslevel is None:
return self.fileobj_factory(fileobj, 'wb')
else:
return self.fileobj_factory(fileobj, 'wb',
compresslevel=compresslevel)
def decompressor_file(self, fileobj):
"""Returns an instance of a decompressor file object."""
return self.fileobj_factory(fileobj, 'rb')
class BZ2CompressorWrapper(CompressorWrapper):
prefix = _BZ2_PREFIX
extension = '.bz2'
def __init__(self):
if bz2 is not None:
self.fileobj_factory = bz2.BZ2File
else:
self.fileobj_factory = None
def _check_versions(self):
if bz2 is None:
raise ValueError('bz2 module is not compiled on your python '
'standard library.')
def compressor_file(self, fileobj, compresslevel=None):
"""Returns an instance of a compressor file object."""
self._check_versions()
if compresslevel is None:
return self.fileobj_factory(fileobj, 'wb')
else:
return self.fileobj_factory(fileobj, 'wb',
compresslevel=compresslevel)
def decompressor_file(self, fileobj):
"""Returns an instance of a decompressor file object."""
self._check_versions()
fileobj = self.fileobj_factory(fileobj, 'rb')
return fileobj
class LZMACompressorWrapper(CompressorWrapper):
prefix = _LZMA_PREFIX
extension = '.lzma'
_lzma_format_name = 'FORMAT_ALONE'
def __init__(self):
if lzma is not None:
self.fileobj_factory = lzma.LZMAFile
self._lzma_format = getattr(lzma, self._lzma_format_name)
else:
self.fileobj_factory = None
def _check_versions(self):
if lzma is None:
raise ValueError('lzma module is not compiled on your python '
'standard library.')
def compressor_file(self, fileobj, compresslevel=None):
"""Returns an instance of a compressor file object."""
if compresslevel is None:
return self.fileobj_factory(fileobj, 'wb',
format=self._lzma_format)
else:
return self.fileobj_factory(fileobj, 'wb',
format=self._lzma_format,
preset=compresslevel)
def decompressor_file(self, fileobj):
"""Returns an instance of a decompressor file object."""
return lzma.LZMAFile(fileobj, 'rb')
class XZCompressorWrapper(LZMACompressorWrapper):
prefix = _XZ_PREFIX
extension = '.xz'
_lzma_format_name = 'FORMAT_XZ'
class LZ4CompressorWrapper(CompressorWrapper):
prefix = _LZ4_PREFIX
extension = '.lz4'
def __init__(self):
if lz4 is not None:
self.fileobj_factory = LZ4FrameFile
else:
self.fileobj_factory = None
def _check_versions(self):
if lz4 is None:
raise ValueError(LZ4_NOT_INSTALLED_ERROR)
lz4_version = lz4.__version__
if lz4_version.startswith("v"):
lz4_version = lz4_version[1:]
if LooseVersion(lz4_version) < LooseVersion('0.19'):
raise ValueError(LZ4_NOT_INSTALLED_ERROR)
def compressor_file(self, fileobj, compresslevel=None):
"""Returns an instance of a compressor file object."""
self._check_versions()
if compresslevel is None:
return self.fileobj_factory(fileobj, 'wb')
else:
return self.fileobj_factory(fileobj, 'wb',
compression_level=compresslevel)
def decompressor_file(self, fileobj):
"""Returns an instance of a decompressor file object."""
self._check_versions()
return self.fileobj_factory(fileobj, 'rb')
###############################################################################
# base file compression/decompression object definition
_MODE_CLOSED = 0
_MODE_READ = 1
_MODE_READ_EOF = 2
_MODE_WRITE = 3
_BUFFER_SIZE = 8192
class BinaryZlibFile(io.BufferedIOBase):
"""A file object providing transparent zlib (de)compression.
TODO python2_drop: is it still needed since we dropped Python 2 support A
BinaryZlibFile can act as a wrapper for an existing file object, or refer
directly to a named file on disk.
Note that BinaryZlibFile provides only a *binary* file interface: data read
is returned as bytes, and data to be written should be given as bytes.
This object is an adaptation of the BZ2File object and is compatible with
versions of python >= 2.7.
If filename is a str or bytes object, it gives the name
of the file to be opened. Otherwise, it should be a file object,
which will be used to read or write the compressed data.
mode can be 'rb' for reading (default) or 'wb' for (over)writing
If mode is 'wb', compresslevel can be a number between 1
and 9 specifying the level of compression: 1 produces the least
compression, and 9 produces the most compression. 3 is the default.
"""
wbits = zlib.MAX_WBITS
def __init__(self, filename, mode="rb", compresslevel=3):
# This lock must be recursive, so that BufferedIOBase's
# readline(), readlines() and writelines() don't deadlock.
self._lock = RLock()
self._fp = None
self._closefp = False
self._mode = _MODE_CLOSED
self._pos = 0
self._size = -1
self.compresslevel = compresslevel
if not isinstance(compresslevel, int) or not (1 <= compresslevel <= 9):
raise ValueError("'compresslevel' must be an integer "
"between 1 and 9. You provided 'compresslevel={}'"
.format(compresslevel))
if mode == "rb":
self._mode = _MODE_READ
self._decompressor = zlib.decompressobj(self.wbits)
self._buffer = b""
self._buffer_offset = 0
elif mode == "wb":
self._mode = _MODE_WRITE
self._compressor = zlib.compressobj(self.compresslevel,
zlib.DEFLATED, self.wbits,
zlib.DEF_MEM_LEVEL, 0)
else:
raise ValueError("Invalid mode: %r" % (mode,))
if isinstance(filename, str):
self._fp = io.open(filename, mode)
self._closefp = True
elif hasattr(filename, "read") or hasattr(filename, "write"):
self._fp = filename
else:
raise TypeError("filename must be a str or bytes object, "
"or a file")
def close(self):
"""Flush and close the file.
May be called more than once without error. Once the file is
closed, any other operation on it will raise a ValueError.
"""
with self._lock:
if self._mode == _MODE_CLOSED:
return
try:
if self._mode in (_MODE_READ, _MODE_READ_EOF):
self._decompressor = None
elif self._mode == _MODE_WRITE:
self._fp.write(self._compressor.flush())
self._compressor = None
finally:
try:
if self._closefp:
self._fp.close()
finally:
self._fp = None
self._closefp = False
self._mode = _MODE_CLOSED
self._buffer = b""
self._buffer_offset = 0
@property
def closed(self):
"""True if this file is closed."""
return self._mode == _MODE_CLOSED
def fileno(self):
"""Return the file descriptor for the underlying file."""
self._check_not_closed()
return self._fp.fileno()
def seekable(self):
"""Return whether the file supports seeking."""
return self.readable() and self._fp.seekable()
def readable(self):
"""Return whether the file was opened for reading."""
self._check_not_closed()
return self._mode in (_MODE_READ, _MODE_READ_EOF)
def writable(self):
"""Return whether the file was opened for writing."""
self._check_not_closed()
return self._mode == _MODE_WRITE
# Mode-checking helper functions.
def _check_not_closed(self):
if self.closed:
fname = getattr(self._fp, 'name', None)
msg = "I/O operation on closed file"
if fname is not None:
msg += " {}".format(fname)
msg += "."
raise ValueError(msg)
def _check_can_read(self):
if self._mode not in (_MODE_READ, _MODE_READ_EOF):
self._check_not_closed()
raise io.UnsupportedOperation("File not open for reading")
def _check_can_write(self):
if self._mode != _MODE_WRITE:
self._check_not_closed()
raise io.UnsupportedOperation("File not open for writing")
def _check_can_seek(self):
if self._mode not in (_MODE_READ, _MODE_READ_EOF):
self._check_not_closed()
raise io.UnsupportedOperation("Seeking is only supported "
"on files open for reading")
if not self._fp.seekable():
raise io.UnsupportedOperation("The underlying file object "
"does not support seeking")
# Fill the readahead buffer if it is empty. Returns False on EOF.
def _fill_buffer(self):
if self._mode == _MODE_READ_EOF:
return False
# Depending on the input data, our call to the decompressor may not
# return any data. In this case, try again after reading another block.
while self._buffer_offset == len(self._buffer):
try:
rawblock = (self._decompressor.unused_data or
self._fp.read(_BUFFER_SIZE))
if not rawblock:
raise EOFError
except EOFError:
# End-of-stream marker and end of file. We're good.
self._mode = _MODE_READ_EOF
self._size = self._pos
return False
else:
self._buffer = self._decompressor.decompress(rawblock)
self._buffer_offset = 0
return True
# Read data until EOF.
# If return_data is false, consume the data without returning it.
def _read_all(self, return_data=True):
# The loop assumes that _buffer_offset is 0. Ensure that this is true.
self._buffer = self._buffer[self._buffer_offset:]
self._buffer_offset = 0
blocks = []
while self._fill_buffer():
if return_data:
blocks.append(self._buffer)
self._pos += len(self._buffer)
self._buffer = b""
if return_data:
return b"".join(blocks)
# Read a block of up to n bytes.
# If return_data is false, consume the data without returning it.
def _read_block(self, n_bytes, return_data=True):
# If we have enough data buffered, return immediately.
end = self._buffer_offset + n_bytes
if end <= len(self._buffer):
data = self._buffer[self._buffer_offset: end]
self._buffer_offset = end
self._pos += len(data)
return data if return_data else None
# The loop assumes that _buffer_offset is 0. Ensure that this is true.
self._buffer = self._buffer[self._buffer_offset:]
self._buffer_offset = 0
blocks = []
while n_bytes > 0 and self._fill_buffer():
if n_bytes < len(self._buffer):
data = self._buffer[:n_bytes]
self._buffer_offset = n_bytes
else:
data = self._buffer
self._buffer = b""
if return_data:
blocks.append(data)
self._pos += len(data)
n_bytes -= len(data)
if return_data:
return b"".join(blocks)
def read(self, size=-1):
"""Read up to size uncompressed bytes from the file.
If size is negative or omitted, read until EOF is reached.
Returns b'' if the file is already at EOF.
"""
with self._lock:
self._check_can_read()
if size == 0:
return b""
elif size < 0:
return self._read_all()
else:
return self._read_block(size)
def readinto(self, b):
"""Read up to len(b) bytes into b.
Returns the number of bytes read (0 for EOF).
"""
with self._lock:
return io.BufferedIOBase.readinto(self, b)
def write(self, data):
"""Write a byte string to the file.
Returns the number of uncompressed bytes written, which is
always len(data). Note that due to buffering, the file on disk
may not reflect the data written until close() is called.
"""
with self._lock:
self._check_can_write()
# Convert data type if called by io.BufferedWriter.
if isinstance(data, memoryview):
data = data.tobytes()
compressed = self._compressor.compress(data)
self._fp.write(compressed)
self._pos += len(data)
return len(data)
# Rewind the file to the beginning of the data stream.
def _rewind(self):
self._fp.seek(0, 0)
self._mode = _MODE_READ
self._pos = 0
self._decompressor = zlib.decompressobj(self.wbits)
self._buffer = b""
self._buffer_offset = 0
def seek(self, offset, whence=0):
"""Change the file position.
The new position is specified by offset, relative to the
position indicated by whence. Values for whence are:
0: start of stream (default); offset must not be negative
1: current stream position
2: end of stream; offset must not be positive
Returns the new file position.
Note that seeking is emulated, so depending on the parameters,
this operation may be extremely slow.
"""
with self._lock:
self._check_can_seek()
# Recalculate offset as an absolute file position.
if whence == 0:
pass
elif whence == 1:
offset = self._pos + offset
elif whence == 2:
# Seeking relative to EOF - we need to know the file's size.
if self._size < 0:
self._read_all(return_data=False)
offset = self._size + offset
else:
raise ValueError("Invalid value for whence: %s" % (whence,))
# Make it so that offset is the number of bytes to skip forward.
if offset < self._pos:
self._rewind()
else:
offset -= self._pos
# Read and discard data until we reach the desired position.
self._read_block(offset, return_data=False)
return self._pos
def tell(self):
"""Return the current file position."""
with self._lock:
self._check_not_closed()
return self._pos
class ZlibCompressorWrapper(CompressorWrapper):
def __init__(self):
CompressorWrapper.__init__(self, obj=BinaryZlibFile,
prefix=_ZLIB_PREFIX, extension='.z')
class BinaryGzipFile(BinaryZlibFile):
"""A file object providing transparent gzip (de)compression.
If filename is a str or bytes object, it gives the name
of the file to be opened. Otherwise, it should be a file object,
which will be used to read or write the compressed data.
mode can be 'rb' for reading (default) or 'wb' for (over)writing
If mode is 'wb', compresslevel can be a number between 1
and 9 specifying the level of compression: 1 produces the least
compression, and 9 produces the most compression. 3 is the default.
"""
wbits = 31 # zlib compressor/decompressor wbits value for gzip format.
class GzipCompressorWrapper(CompressorWrapper):
def __init__(self):
CompressorWrapper.__init__(self, obj=BinaryGzipFile,
prefix=_GZIP_PREFIX, extension='.gz')

View File

@ -0,0 +1,136 @@
"""
Disk management utilities.
"""
# Authors: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
# Lars Buitinck
# Copyright (c) 2010 Gael Varoquaux
# License: BSD Style, 3 clauses.
import os
import sys
import time
import errno
import shutil
from multiprocessing import util
try:
WindowsError
except NameError:
WindowsError = OSError
def disk_used(path):
""" Return the disk usage in a directory."""
size = 0
for file in os.listdir(path) + ['.']:
stat = os.stat(os.path.join(path, file))
if hasattr(stat, 'st_blocks'):
size += stat.st_blocks * 512
else:
# on some platform st_blocks is not available (e.g., Windows)
# approximate by rounding to next multiple of 512
size += (stat.st_size // 512 + 1) * 512
# We need to convert to int to avoid having longs on some systems (we
# don't want longs to avoid problems we SQLite)
return int(size / 1024.)
def memstr_to_bytes(text):
""" Convert a memory text to its value in bytes.
"""
kilo = 1024
units = dict(K=kilo, M=kilo ** 2, G=kilo ** 3)
try:
size = int(units[text[-1]] * float(text[:-1]))
except (KeyError, ValueError) as e:
raise ValueError(
"Invalid literal for size give: %s (type %s) should be "
"alike '10G', '500M', '50K'." % (text, type(text))) from e
return size
def mkdirp(d):
"""Ensure directory d exists (like mkdir -p on Unix)
No guarantee that the directory is writable.
"""
try:
os.makedirs(d)
except OSError as e:
if e.errno != errno.EEXIST:
raise
# if a rmtree operation fails in rm_subdirs, wait for this much time (in secs),
# then retry up to RM_SUBDIRS_N_RETRY times. If it still fails, raise the
# exception. this mechanism ensures that the sub-process gc have the time to
# collect and close the memmaps before we fail.
RM_SUBDIRS_RETRY_TIME = 0.1
RM_SUBDIRS_N_RETRY = 10
def rm_subdirs(path, onerror=None):
"""Remove all subdirectories in this path.
The directory indicated by `path` is left in place, and its subdirectories
are erased.
If onerror is set, it is called to handle the error with arguments (func,
path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
path is the argument to that function that caused it to fail; and
exc_info is a tuple returned by sys.exc_info(). If onerror is None,
an exception is raised.
"""
# NOTE this code is adapted from the one in shutil.rmtree, and is
# just as fast
names = []
try:
names = os.listdir(path)
except os.error:
if onerror is not None:
onerror(os.listdir, path, sys.exc_info())
else:
raise
for name in names:
fullname = os.path.join(path, name)
delete_folder(fullname, onerror=onerror)
def delete_folder(folder_path, onerror=None, allow_non_empty=True):
"""Utility function to cleanup a temporary folder if it still exists."""
if os.path.isdir(folder_path):
if onerror is not None:
shutil.rmtree(folder_path, False, onerror)
else:
# allow the rmtree to fail once, wait and re-try.
# if the error is raised again, fail
err_count = 0
while True:
files = os.listdir(folder_path)
try:
if len(files) == 0 or allow_non_empty:
shutil.rmtree(
folder_path, ignore_errors=False, onerror=None
)
util.debug(
"Successfully deleted {}".format(folder_path))
break
else:
raise OSError(
"Expected empty folder {} but got {} "
"files.".format(folder_path, len(files))
)
except (OSError, WindowsError):
err_count += 1
if err_count > RM_SUBDIRS_N_RETRY:
# the folder cannot be deleted right now. It maybe
# because some temporary files have not been deleted
# yet.
raise
time.sleep(RM_SUBDIRS_RETRY_TIME)

View File

@ -0,0 +1,120 @@
"""Utility function to construct a loky.ReusableExecutor with custom pickler.
This module provides efficient ways of working with data stored in
shared memory with numpy.memmap arrays without inducing any memory
copy between the parent and child processes.
"""
# Author: Thomas Moreau <thomas.moreau.2010@gmail.com>
# Copyright: 2017, Thomas Moreau
# License: BSD 3 clause
from ._memmapping_reducer import get_memmapping_reducers
from ._memmapping_reducer import TemporaryResourcesManager
from .externals.loky.reusable_executor import _ReusablePoolExecutor
_executor_args = None
def get_memmapping_executor(n_jobs, **kwargs):
return MemmappingExecutor.get_memmapping_executor(n_jobs, **kwargs)
class MemmappingExecutor(_ReusablePoolExecutor):
@classmethod
def get_memmapping_executor(cls, n_jobs, timeout=300, initializer=None,
initargs=(), env=None, temp_folder=None,
context_id=None, **backend_args):
"""Factory for ReusableExecutor with automatic memmapping for large numpy
arrays.
"""
global _executor_args
# Check if we can reuse the executor here instead of deferring the test
# to loky as the reducers are objects that changes at each call.
executor_args = backend_args.copy()
executor_args.update(env if env else {})
executor_args.update(dict(
timeout=timeout, initializer=initializer, initargs=initargs))
reuse = _executor_args is None or _executor_args == executor_args
_executor_args = executor_args
manager = TemporaryResourcesManager(temp_folder)
# reducers access the temporary folder in which to store temporary
# pickles through a call to manager.resolve_temp_folder_name. resolving
# the folder name dynamically is useful to use different folders across
# calls of a same reusable executor
job_reducers, result_reducers = get_memmapping_reducers(
unlink_on_gc_collect=True,
temp_folder_resolver=manager.resolve_temp_folder_name,
**backend_args)
_executor, executor_is_reused = super().get_reusable_executor(
n_jobs, job_reducers=job_reducers, result_reducers=result_reducers,
reuse=reuse, timeout=timeout, initializer=initializer,
initargs=initargs, env=env
)
if not executor_is_reused:
# Only set a _temp_folder_manager for new executors. Reused
# executors already have a _temporary_folder_manager that must not
# be re-assigned like that because it is referenced in various
# places in the reducing machinery of the executor.
_executor._temp_folder_manager = manager
if context_id is not None:
# Only register the specified context once we know which manager
# the current executor is using, in order to not register an atexit
# finalizer twice for the same folder.
_executor._temp_folder_manager.register_new_context(context_id)
return _executor
def terminate(self, kill_workers=False):
self.shutdown(kill_workers=kill_workers)
if kill_workers:
# When workers are killed in such a brutal manner, they cannot
# execute the finalizer of their shared memmaps. The refcount of
# those memmaps may be off by an unknown number, so instead of
# decref'ing them, we delete the whole temporary folder, and
# unregister them. There is no risk of PermissionError at folder
# deletion because because at this point, all child processes are
# dead, so all references to temporary memmaps are closed.
# unregister temporary resources from all contexts
with self._submit_resize_lock:
self._temp_folder_manager._unregister_temporary_resources()
self._temp_folder_manager._try_delete_folder(
allow_non_empty=True
)
else:
self._temp_folder_manager._unlink_temporary_resources()
self._temp_folder_manager._try_delete_folder(allow_non_empty=True)
@property
def _temp_folder(self):
# Legacy property in tests. could be removed if we refactored the
# memmapping tests. SHOULD ONLY BE USED IN TESTS!
# We cache this property because it is called late in the tests - at
# this point, all context have been unregistered, and
# resolve_temp_folder_name raises an error.
if getattr(self, '_cached_temp_folder', None) is not None:
return self._cached_temp_folder
else:
self._cached_temp_folder = self._temp_folder_manager.resolve_temp_folder_name() # noqa
return self._cached_temp_folder
class _TestingMemmappingExecutor(MemmappingExecutor):
"""Wrapper around ReusableExecutor to ease memmapping testing with Pool
and Executor. This is only for testing purposes.
"""
def apply_async(self, func, args):
"""Schedule a func to be run"""
future = self.submit(func, *args)
future.get = future.result
return future
def map(self, f, *args):
return list(super().map(f, *args))

View File

@ -0,0 +1,8 @@
from .cloudpickle import * # noqa
from .cloudpickle_fast import CloudPickler, dumps, dump # noqa
# Conform to the convention used by python serialization libraries, which
# expose their Pickler subclass at top-level under the "Pickler" name.
Pickler = CloudPickler
__version__ = '2.2.0'

Some files were not shown because too many files have changed in this diff Show More