1488 lines
54 KiB
Python
1488 lines
54 KiB
Python
"""Pickler class to extend the standard pickle.Pickler functionality
|
|
|
|
The main objective is to make it natural to perform distributed computing on
|
|
clusters (such as PySpark, Dask, Ray...) with interactively defined code
|
|
(functions, classes, ...) written in notebooks or console.
|
|
|
|
In particular this pickler adds the following features:
|
|
- serialize interactively-defined or locally-defined functions, classes,
|
|
enums, typevars, lambdas and nested functions to compiled byte code;
|
|
- deal with some other non-serializable objects in an ad-hoc manner where
|
|
applicable.
|
|
|
|
This pickler is therefore meant to be used for the communication between short
|
|
lived Python processes running the same version of Python and libraries. In
|
|
particular, it is not meant to be used for long term storage of Python objects.
|
|
|
|
It does not include an unpickler, as standard Python unpickling suffices.
|
|
|
|
This module was extracted from the `cloud` package, developed by `PiCloud, Inc.
|
|
<https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_.
|
|
|
|
Copyright (c) 2012-now, CloudPickle developers and contributors.
|
|
Copyright (c) 2012, Regents of the University of California.
|
|
Copyright (c) 2009 `PiCloud, Inc. <https://web.archive.org/web/20140626004012/http://www.picloud.com/>`_.
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions
|
|
are met:
|
|
* Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
* Neither the name of the University of California, Berkeley nor the
|
|
names of its contributors may be used to endorse or promote
|
|
products derived from this software without specific prior written
|
|
permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
|
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
"""
|
|
|
|
import _collections_abc
|
|
from collections import ChainMap, OrderedDict
|
|
import abc
|
|
import builtins
|
|
import copyreg
|
|
import dataclasses
|
|
import dis
|
|
from enum import Enum
|
|
import io
|
|
import itertools
|
|
import logging
|
|
import opcode
|
|
import pickle
|
|
from pickle import _getattribute
|
|
import platform
|
|
import struct
|
|
import sys
|
|
import threading
|
|
import types
|
|
import typing
|
|
import uuid
|
|
import warnings
|
|
import weakref
|
|
|
|
# The following import is required to be imported in the cloudpickle
|
|
# namespace to be able to load pickle files generated with older versions of
|
|
# cloudpickle. See: tests/test_backward_compat.py
|
|
from types import CellType # noqa: F401
|
|
|
|
|
|
# cloudpickle is meant for inter process communication: we expect all
|
|
# communicating processes to run the same Python version hence we favor
|
|
# communication speed over compatibility:
|
|
DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL
|
|
|
|
# Names of modules whose resources should be treated as dynamic.
|
|
_PICKLE_BY_VALUE_MODULES = set()
|
|
|
|
# Track the provenance of reconstructed dynamic classes to make it possible to
|
|
# reconstruct instances from the matching singleton class definition when
|
|
# appropriate and preserve the usual "isinstance" semantics of Python objects.
|
|
_DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary()
|
|
_DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary()
|
|
_DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock()
|
|
|
|
PYPY = platform.python_implementation() == "PyPy"
|
|
|
|
builtin_code_type = None
|
|
if PYPY:
|
|
# builtin-code objects only exist in pypy
|
|
builtin_code_type = type(float.__new__.__code__)
|
|
|
|
_extract_code_globals_cache = weakref.WeakKeyDictionary()
|
|
|
|
|
|
def _get_or_create_tracker_id(class_def):
|
|
with _DYNAMIC_CLASS_TRACKER_LOCK:
|
|
class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def)
|
|
if class_tracker_id is None:
|
|
class_tracker_id = uuid.uuid4().hex
|
|
_DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
|
|
_DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def
|
|
return class_tracker_id
|
|
|
|
|
|
def _lookup_class_or_track(class_tracker_id, class_def):
|
|
if class_tracker_id is not None:
|
|
with _DYNAMIC_CLASS_TRACKER_LOCK:
|
|
class_def = _DYNAMIC_CLASS_TRACKER_BY_ID.setdefault(
|
|
class_tracker_id, class_def
|
|
)
|
|
_DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
|
|
return class_def
|
|
|
|
|
|
def register_pickle_by_value(module):
|
|
"""Register a module to make it functions and classes picklable by value.
|
|
|
|
By default, functions and classes that are attributes of an importable
|
|
module are to be pickled by reference, that is relying on re-importing
|
|
the attribute from the module at load time.
|
|
|
|
If `register_pickle_by_value(module)` is called, all its functions and
|
|
classes are subsequently to be pickled by value, meaning that they can
|
|
be loaded in Python processes where the module is not importable.
|
|
|
|
This is especially useful when developing a module in a distributed
|
|
execution environment: restarting the client Python process with the new
|
|
source code is enough: there is no need to re-install the new version
|
|
of the module on all the worker nodes nor to restart the workers.
|
|
|
|
Note: this feature is considered experimental. See the cloudpickle
|
|
README.md file for more details and limitations.
|
|
"""
|
|
if not isinstance(module, types.ModuleType):
|
|
raise ValueError(f"Input should be a module object, got {str(module)} instead")
|
|
# In the future, cloudpickle may need a way to access any module registered
|
|
# for pickling by value in order to introspect relative imports inside
|
|
# functions pickled by value. (see
|
|
# https://github.com/cloudpipe/cloudpickle/pull/417#issuecomment-873684633).
|
|
# This access can be ensured by checking that module is present in
|
|
# sys.modules at registering time and assuming that it will still be in
|
|
# there when accessed during pickling. Another alternative would be to
|
|
# store a weakref to the module. Even though cloudpickle does not implement
|
|
# this introspection yet, in order to avoid a possible breaking change
|
|
# later, we still enforce the presence of module inside sys.modules.
|
|
if module.__name__ not in sys.modules:
|
|
raise ValueError(
|
|
f"{module} was not imported correctly, have you used an "
|
|
"`import` statement to access it?"
|
|
)
|
|
_PICKLE_BY_VALUE_MODULES.add(module.__name__)
|
|
|
|
|
|
def unregister_pickle_by_value(module):
|
|
"""Unregister that the input module should be pickled by value."""
|
|
if not isinstance(module, types.ModuleType):
|
|
raise ValueError(f"Input should be a module object, got {str(module)} instead")
|
|
if module.__name__ not in _PICKLE_BY_VALUE_MODULES:
|
|
raise ValueError(f"{module} is not registered for pickle by value")
|
|
else:
|
|
_PICKLE_BY_VALUE_MODULES.remove(module.__name__)
|
|
|
|
|
|
def list_registry_pickle_by_value():
|
|
return _PICKLE_BY_VALUE_MODULES.copy()
|
|
|
|
|
|
def _is_registered_pickle_by_value(module):
|
|
module_name = module.__name__
|
|
if module_name in _PICKLE_BY_VALUE_MODULES:
|
|
return True
|
|
while True:
|
|
parent_name = module_name.rsplit(".", 1)[0]
|
|
if parent_name == module_name:
|
|
break
|
|
if parent_name in _PICKLE_BY_VALUE_MODULES:
|
|
return True
|
|
module_name = parent_name
|
|
return False
|
|
|
|
|
|
def _whichmodule(obj, name):
|
|
"""Find the module an object belongs to.
|
|
|
|
This function differs from ``pickle.whichmodule`` in two ways:
|
|
- it does not mangle the cases where obj's module is __main__ and obj was
|
|
not found in any module.
|
|
- Errors arising during module introspection are ignored, as those errors
|
|
are considered unwanted side effects.
|
|
"""
|
|
module_name = getattr(obj, "__module__", None)
|
|
|
|
if module_name is not None:
|
|
return module_name
|
|
# Protect the iteration by using a copy of sys.modules against dynamic
|
|
# modules that trigger imports of other modules upon calls to getattr or
|
|
# other threads importing at the same time.
|
|
for module_name, module in sys.modules.copy().items():
|
|
# Some modules such as coverage can inject non-module objects inside
|
|
# sys.modules
|
|
if (
|
|
module_name == "__main__"
|
|
or module is None
|
|
or not isinstance(module, types.ModuleType)
|
|
):
|
|
continue
|
|
try:
|
|
if _getattribute(module, name)[0] is obj:
|
|
return module_name
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
|
|
def _should_pickle_by_reference(obj, name=None):
|
|
"""Test whether an function or a class should be pickled by reference
|
|
|
|
Pickling by reference means by that the object (typically a function or a
|
|
class) is an attribute of a module that is assumed to be importable in the
|
|
target Python environment. Loading will therefore rely on importing the
|
|
module and then calling `getattr` on it to access the function or class.
|
|
|
|
Pickling by reference is the only option to pickle functions and classes
|
|
in the standard library. In cloudpickle the alternative option is to
|
|
pickle by value (for instance for interactively or locally defined
|
|
functions and classes or for attributes of modules that have been
|
|
explicitly registered to be pickled by value.
|
|
"""
|
|
if isinstance(obj, types.FunctionType) or issubclass(type(obj), type):
|
|
module_and_name = _lookup_module_and_qualname(obj, name=name)
|
|
if module_and_name is None:
|
|
return False
|
|
module, name = module_and_name
|
|
return not _is_registered_pickle_by_value(module)
|
|
|
|
elif isinstance(obj, types.ModuleType):
|
|
# We assume that sys.modules is primarily used as a cache mechanism for
|
|
# the Python import machinery. Checking if a module has been added in
|
|
# is sys.modules therefore a cheap and simple heuristic to tell us
|
|
# whether we can assume that a given module could be imported by name
|
|
# in another Python process.
|
|
if _is_registered_pickle_by_value(obj):
|
|
return False
|
|
return obj.__name__ in sys.modules
|
|
else:
|
|
raise TypeError(
|
|
"cannot check importability of {} instances".format(type(obj).__name__)
|
|
)
|
|
|
|
|
|
def _lookup_module_and_qualname(obj, name=None):
|
|
if name is None:
|
|
name = getattr(obj, "__qualname__", None)
|
|
if name is None: # pragma: no cover
|
|
# This used to be needed for Python 2.7 support but is probably not
|
|
# needed anymore. However we keep the __name__ introspection in case
|
|
# users of cloudpickle rely on this old behavior for unknown reasons.
|
|
name = getattr(obj, "__name__", None)
|
|
|
|
module_name = _whichmodule(obj, name)
|
|
|
|
if module_name is None:
|
|
# In this case, obj.__module__ is None AND obj was not found in any
|
|
# imported module. obj is thus treated as dynamic.
|
|
return None
|
|
|
|
if module_name == "__main__":
|
|
return None
|
|
|
|
# Note: if module_name is in sys.modules, the corresponding module is
|
|
# assumed importable at unpickling time. See #357
|
|
module = sys.modules.get(module_name, None)
|
|
if module is None:
|
|
# The main reason why obj's module would not be imported is that this
|
|
# module has been dynamically created, using for example
|
|
# types.ModuleType. The other possibility is that module was removed
|
|
# from sys.modules after obj was created/imported. But this case is not
|
|
# supported, as the standard pickle does not support it either.
|
|
return None
|
|
|
|
try:
|
|
obj2, parent = _getattribute(module, name)
|
|
except AttributeError:
|
|
# obj was not found inside the module it points to
|
|
return None
|
|
if obj2 is not obj:
|
|
return None
|
|
return module, name
|
|
|
|
|
|
def _extract_code_globals(co):
|
|
"""Find all globals names read or written to by codeblock co."""
|
|
out_names = _extract_code_globals_cache.get(co)
|
|
if out_names is None:
|
|
# We use a dict with None values instead of a set to get a
|
|
# deterministic order and avoid introducing non-deterministic pickle
|
|
# bytes as a results.
|
|
out_names = {name: None for name in _walk_global_ops(co)}
|
|
|
|
# Declaring a function inside another one using the "def ..." syntax
|
|
# generates a constant code object corresponding to the one of the
|
|
# nested function's As the nested function may itself need global
|
|
# variables, we need to introspect its code, extract its globals, (look
|
|
# for code object in it's co_consts attribute..) and add the result to
|
|
# code_globals
|
|
if co.co_consts:
|
|
for const in co.co_consts:
|
|
if isinstance(const, types.CodeType):
|
|
out_names.update(_extract_code_globals(const))
|
|
|
|
_extract_code_globals_cache[co] = out_names
|
|
|
|
return out_names
|
|
|
|
|
|
def _find_imported_submodules(code, top_level_dependencies):
|
|
"""Find currently imported submodules used by a function.
|
|
|
|
Submodules used by a function need to be detected and referenced for the
|
|
function to work correctly at depickling time. Because submodules can be
|
|
referenced as attribute of their parent package (``package.submodule``), we
|
|
need a special introspection technique that does not rely on GLOBAL-related
|
|
opcodes to find references of them in a code object.
|
|
|
|
Example:
|
|
```
|
|
import concurrent.futures
|
|
import cloudpickle
|
|
def func():
|
|
x = concurrent.futures.ThreadPoolExecutor
|
|
if __name__ == '__main__':
|
|
cloudpickle.dumps(func)
|
|
```
|
|
The globals extracted by cloudpickle in the function's state include the
|
|
concurrent package, but not its submodule (here, concurrent.futures), which
|
|
is the module used by func. Find_imported_submodules will detect the usage
|
|
of concurrent.futures. Saving this module alongside with func will ensure
|
|
that calling func once depickled does not fail due to concurrent.futures
|
|
not being imported
|
|
"""
|
|
|
|
subimports = []
|
|
# check if any known dependency is an imported package
|
|
for x in top_level_dependencies:
|
|
if (
|
|
isinstance(x, types.ModuleType)
|
|
and hasattr(x, "__package__")
|
|
and x.__package__
|
|
):
|
|
# check if the package has any currently loaded sub-imports
|
|
prefix = x.__name__ + "."
|
|
# A concurrent thread could mutate sys.modules,
|
|
# make sure we iterate over a copy to avoid exceptions
|
|
for name in list(sys.modules):
|
|
# Older versions of pytest will add a "None" module to
|
|
# sys.modules.
|
|
if name is not None and name.startswith(prefix):
|
|
# check whether the function can address the sub-module
|
|
tokens = set(name[len(prefix) :].split("."))
|
|
if not tokens - set(code.co_names):
|
|
subimports.append(sys.modules[name])
|
|
return subimports
|
|
|
|
|
|
# relevant opcodes
|
|
STORE_GLOBAL = opcode.opmap["STORE_GLOBAL"]
|
|
DELETE_GLOBAL = opcode.opmap["DELETE_GLOBAL"]
|
|
LOAD_GLOBAL = opcode.opmap["LOAD_GLOBAL"]
|
|
GLOBAL_OPS = (STORE_GLOBAL, DELETE_GLOBAL, LOAD_GLOBAL)
|
|
HAVE_ARGUMENT = dis.HAVE_ARGUMENT
|
|
EXTENDED_ARG = dis.EXTENDED_ARG
|
|
|
|
|
|
_BUILTIN_TYPE_NAMES = {}
|
|
for k, v in types.__dict__.items():
|
|
if type(v) is type:
|
|
_BUILTIN_TYPE_NAMES[v] = k
|
|
|
|
|
|
def _builtin_type(name):
|
|
if name == "ClassType": # pragma: no cover
|
|
# Backward compat to load pickle files generated with cloudpickle
|
|
# < 1.3 even if loading pickle files from older versions is not
|
|
# officially supported.
|
|
return type
|
|
return getattr(types, name)
|
|
|
|
|
|
def _walk_global_ops(code):
|
|
"""Yield referenced name for global-referencing instructions in code."""
|
|
for instr in dis.get_instructions(code):
|
|
op = instr.opcode
|
|
if op in GLOBAL_OPS:
|
|
yield instr.argval
|
|
|
|
|
|
def _extract_class_dict(cls):
|
|
"""Retrieve a copy of the dict of a class without the inherited method."""
|
|
clsdict = dict(cls.__dict__) # copy dict proxy to a dict
|
|
if len(cls.__bases__) == 1:
|
|
inherited_dict = cls.__bases__[0].__dict__
|
|
else:
|
|
inherited_dict = {}
|
|
for base in reversed(cls.__bases__):
|
|
inherited_dict.update(base.__dict__)
|
|
to_remove = []
|
|
for name, value in clsdict.items():
|
|
try:
|
|
base_value = inherited_dict[name]
|
|
if value is base_value:
|
|
to_remove.append(name)
|
|
except KeyError:
|
|
pass
|
|
for name in to_remove:
|
|
clsdict.pop(name)
|
|
return clsdict
|
|
|
|
|
|
def is_tornado_coroutine(func):
|
|
"""Return whether `func` is a Tornado coroutine function.
|
|
|
|
Running coroutines are not supported.
|
|
"""
|
|
warnings.warn(
|
|
"is_tornado_coroutine is deprecated in cloudpickle 3.0 and will be "
|
|
"removed in cloudpickle 4.0. Use tornado.gen.is_coroutine_function "
|
|
"directly instead.",
|
|
category=DeprecationWarning,
|
|
)
|
|
if "tornado.gen" not in sys.modules:
|
|
return False
|
|
gen = sys.modules["tornado.gen"]
|
|
if not hasattr(gen, "is_coroutine_function"):
|
|
# Tornado version is too old
|
|
return False
|
|
return gen.is_coroutine_function(func)
|
|
|
|
|
|
def subimport(name):
|
|
# We cannot do simply: `return __import__(name)`: Indeed, if ``name`` is
|
|
# the name of a submodule, __import__ will return the top-level root module
|
|
# of this submodule. For instance, __import__('os.path') returns the `os`
|
|
# module.
|
|
__import__(name)
|
|
return sys.modules[name]
|
|
|
|
|
|
def dynamic_subimport(name, vars):
|
|
mod = types.ModuleType(name)
|
|
mod.__dict__.update(vars)
|
|
mod.__dict__["__builtins__"] = builtins.__dict__
|
|
return mod
|
|
|
|
|
|
def _get_cell_contents(cell):
|
|
try:
|
|
return cell.cell_contents
|
|
except ValueError:
|
|
# Handle empty cells explicitly with a sentinel value.
|
|
return _empty_cell_value
|
|
|
|
|
|
def instance(cls):
|
|
"""Create a new instance of a class.
|
|
|
|
Parameters
|
|
----------
|
|
cls : type
|
|
The class to create an instance of.
|
|
|
|
Returns
|
|
-------
|
|
instance : cls
|
|
A new instance of ``cls``.
|
|
"""
|
|
return cls()
|
|
|
|
|
|
@instance
|
|
class _empty_cell_value:
|
|
"""Sentinel for empty closures."""
|
|
|
|
@classmethod
|
|
def __reduce__(cls):
|
|
return cls.__name__
|
|
|
|
|
|
def _make_function(code, globals, name, argdefs, closure):
|
|
# Setting __builtins__ in globals is needed for nogil CPython.
|
|
globals["__builtins__"] = __builtins__
|
|
return types.FunctionType(code, globals, name, argdefs, closure)
|
|
|
|
|
|
def _make_empty_cell():
|
|
if False:
|
|
# trick the compiler into creating an empty cell in our lambda
|
|
cell = None
|
|
raise AssertionError("this route should not be executed")
|
|
|
|
return (lambda: cell).__closure__[0]
|
|
|
|
|
|
def _make_cell(value=_empty_cell_value):
|
|
cell = _make_empty_cell()
|
|
if value is not _empty_cell_value:
|
|
cell.cell_contents = value
|
|
return cell
|
|
|
|
|
|
def _make_skeleton_class(
|
|
type_constructor, name, bases, type_kwargs, class_tracker_id, extra
|
|
):
|
|
"""Build dynamic class with an empty __dict__ to be filled once memoized
|
|
|
|
If class_tracker_id is not None, try to lookup an existing class definition
|
|
matching that id. If none is found, track a newly reconstructed class
|
|
definition under that id so that other instances stemming from the same
|
|
class id will also reuse this class definition.
|
|
|
|
The "extra" variable is meant to be a dict (or None) that can be used for
|
|
forward compatibility shall the need arise.
|
|
"""
|
|
skeleton_class = types.new_class(
|
|
name, bases, {"metaclass": type_constructor}, lambda ns: ns.update(type_kwargs)
|
|
)
|
|
return _lookup_class_or_track(class_tracker_id, skeleton_class)
|
|
|
|
|
|
def _make_skeleton_enum(
|
|
bases, name, qualname, members, module, class_tracker_id, extra
|
|
):
|
|
"""Build dynamic enum with an empty __dict__ to be filled once memoized
|
|
|
|
The creation of the enum class is inspired by the code of
|
|
EnumMeta._create_.
|
|
|
|
If class_tracker_id is not None, try to lookup an existing enum definition
|
|
matching that id. If none is found, track a newly reconstructed enum
|
|
definition under that id so that other instances stemming from the same
|
|
class id will also reuse this enum definition.
|
|
|
|
The "extra" variable is meant to be a dict (or None) that can be used for
|
|
forward compatibility shall the need arise.
|
|
"""
|
|
# enums always inherit from their base Enum class at the last position in
|
|
# the list of base classes:
|
|
enum_base = bases[-1]
|
|
metacls = enum_base.__class__
|
|
classdict = metacls.__prepare__(name, bases)
|
|
|
|
for member_name, member_value in members.items():
|
|
classdict[member_name] = member_value
|
|
enum_class = metacls.__new__(metacls, name, bases, classdict)
|
|
enum_class.__module__ = module
|
|
enum_class.__qualname__ = qualname
|
|
|
|
return _lookup_class_or_track(class_tracker_id, enum_class)
|
|
|
|
|
|
def _make_typevar(name, bound, constraints, covariant, contravariant, class_tracker_id):
|
|
tv = typing.TypeVar(
|
|
name,
|
|
*constraints,
|
|
bound=bound,
|
|
covariant=covariant,
|
|
contravariant=contravariant,
|
|
)
|
|
return _lookup_class_or_track(class_tracker_id, tv)
|
|
|
|
|
|
def _decompose_typevar(obj):
|
|
return (
|
|
obj.__name__,
|
|
obj.__bound__,
|
|
obj.__constraints__,
|
|
obj.__covariant__,
|
|
obj.__contravariant__,
|
|
_get_or_create_tracker_id(obj),
|
|
)
|
|
|
|
|
|
def _typevar_reduce(obj):
|
|
# TypeVar instances require the module information hence why we
|
|
# are not using the _should_pickle_by_reference directly
|
|
module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__)
|
|
|
|
if module_and_name is None:
|
|
return (_make_typevar, _decompose_typevar(obj))
|
|
elif _is_registered_pickle_by_value(module_and_name[0]):
|
|
return (_make_typevar, _decompose_typevar(obj))
|
|
|
|
return (getattr, module_and_name)
|
|
|
|
|
|
def _get_bases(typ):
|
|
if "__orig_bases__" in getattr(typ, "__dict__", {}):
|
|
# For generic types (see PEP 560)
|
|
# Note that simply checking `hasattr(typ, '__orig_bases__')` is not
|
|
# correct. Subclasses of a fully-parameterized generic class does not
|
|
# have `__orig_bases__` defined, but `hasattr(typ, '__orig_bases__')`
|
|
# will return True because it's defined in the base class.
|
|
bases_attr = "__orig_bases__"
|
|
else:
|
|
# For regular class objects
|
|
bases_attr = "__bases__"
|
|
return getattr(typ, bases_attr)
|
|
|
|
|
|
def _make_dict_keys(obj, is_ordered=False):
|
|
if is_ordered:
|
|
return OrderedDict.fromkeys(obj).keys()
|
|
else:
|
|
return dict.fromkeys(obj).keys()
|
|
|
|
|
|
def _make_dict_values(obj, is_ordered=False):
|
|
if is_ordered:
|
|
return OrderedDict((i, _) for i, _ in enumerate(obj)).values()
|
|
else:
|
|
return {i: _ for i, _ in enumerate(obj)}.values()
|
|
|
|
|
|
def _make_dict_items(obj, is_ordered=False):
|
|
if is_ordered:
|
|
return OrderedDict(obj).items()
|
|
else:
|
|
return obj.items()
|
|
|
|
|
|
# COLLECTION OF OBJECTS __getnewargs__-LIKE METHODS
|
|
# -------------------------------------------------
|
|
|
|
|
|
def _class_getnewargs(obj):
|
|
type_kwargs = {}
|
|
if "__module__" in obj.__dict__:
|
|
type_kwargs["__module__"] = obj.__module__
|
|
|
|
__dict__ = obj.__dict__.get("__dict__", None)
|
|
if isinstance(__dict__, property):
|
|
type_kwargs["__dict__"] = __dict__
|
|
|
|
return (
|
|
type(obj),
|
|
obj.__name__,
|
|
_get_bases(obj),
|
|
type_kwargs,
|
|
_get_or_create_tracker_id(obj),
|
|
None,
|
|
)
|
|
|
|
|
|
def _enum_getnewargs(obj):
|
|
members = {e.name: e.value for e in obj}
|
|
return (
|
|
obj.__bases__,
|
|
obj.__name__,
|
|
obj.__qualname__,
|
|
members,
|
|
obj.__module__,
|
|
_get_or_create_tracker_id(obj),
|
|
None,
|
|
)
|
|
|
|
|
|
# COLLECTION OF OBJECTS RECONSTRUCTORS
|
|
# ------------------------------------
|
|
def _file_reconstructor(retval):
|
|
return retval
|
|
|
|
|
|
# COLLECTION OF OBJECTS STATE GETTERS
|
|
# -----------------------------------
|
|
|
|
|
|
def _function_getstate(func):
|
|
# - Put func's dynamic attributes (stored in func.__dict__) in state. These
|
|
# attributes will be restored at unpickling time using
|
|
# f.__dict__.update(state)
|
|
# - Put func's members into slotstate. Such attributes will be restored at
|
|
# unpickling time by iterating over slotstate and calling setattr(func,
|
|
# slotname, slotvalue)
|
|
slotstate = {
|
|
"__name__": func.__name__,
|
|
"__qualname__": func.__qualname__,
|
|
"__annotations__": func.__annotations__,
|
|
"__kwdefaults__": func.__kwdefaults__,
|
|
"__defaults__": func.__defaults__,
|
|
"__module__": func.__module__,
|
|
"__doc__": func.__doc__,
|
|
"__closure__": func.__closure__,
|
|
}
|
|
|
|
f_globals_ref = _extract_code_globals(func.__code__)
|
|
f_globals = {k: func.__globals__[k] for k in f_globals_ref if k in func.__globals__}
|
|
|
|
if func.__closure__ is not None:
|
|
closure_values = list(map(_get_cell_contents, func.__closure__))
|
|
else:
|
|
closure_values = ()
|
|
|
|
# Extract currently-imported submodules used by func. Storing these modules
|
|
# in a smoke _cloudpickle_subimports attribute of the object's state will
|
|
# trigger the side effect of importing these modules at unpickling time
|
|
# (which is necessary for func to work correctly once depickled)
|
|
slotstate["_cloudpickle_submodules"] = _find_imported_submodules(
|
|
func.__code__, itertools.chain(f_globals.values(), closure_values)
|
|
)
|
|
slotstate["__globals__"] = f_globals
|
|
|
|
state = func.__dict__
|
|
return state, slotstate
|
|
|
|
|
|
def _class_getstate(obj):
|
|
clsdict = _extract_class_dict(obj)
|
|
clsdict.pop("__weakref__", None)
|
|
|
|
if issubclass(type(obj), abc.ABCMeta):
|
|
# If obj is an instance of an ABCMeta subclass, don't pickle the
|
|
# cache/negative caches populated during isinstance/issubclass
|
|
# checks, but pickle the list of registered subclasses of obj.
|
|
clsdict.pop("_abc_cache", None)
|
|
clsdict.pop("_abc_negative_cache", None)
|
|
clsdict.pop("_abc_negative_cache_version", None)
|
|
registry = clsdict.pop("_abc_registry", None)
|
|
if registry is None:
|
|
# The abc caches and registered subclasses of a
|
|
# class are bundled into the single _abc_impl attribute
|
|
clsdict.pop("_abc_impl", None)
|
|
(registry, _, _, _) = abc._get_dump(obj)
|
|
|
|
clsdict["_abc_impl"] = [subclass_weakref() for subclass_weakref in registry]
|
|
else:
|
|
# In the above if clause, registry is a set of weakrefs -- in
|
|
# this case, registry is a WeakSet
|
|
clsdict["_abc_impl"] = [type_ for type_ in registry]
|
|
|
|
if "__slots__" in clsdict:
|
|
# pickle string length optimization: member descriptors of obj are
|
|
# created automatically from obj's __slots__ attribute, no need to
|
|
# save them in obj's state
|
|
if isinstance(obj.__slots__, str):
|
|
clsdict.pop(obj.__slots__)
|
|
else:
|
|
for k in obj.__slots__:
|
|
clsdict.pop(k, None)
|
|
|
|
clsdict.pop("__dict__", None) # unpicklable property object
|
|
|
|
return (clsdict, {})
|
|
|
|
|
|
def _enum_getstate(obj):
|
|
clsdict, slotstate = _class_getstate(obj)
|
|
|
|
members = {e.name: e.value for e in obj}
|
|
# Cleanup the clsdict that will be passed to _make_skeleton_enum:
|
|
# Those attributes are already handled by the metaclass.
|
|
for attrname in [
|
|
"_generate_next_value_",
|
|
"_member_names_",
|
|
"_member_map_",
|
|
"_member_type_",
|
|
"_value2member_map_",
|
|
]:
|
|
clsdict.pop(attrname, None)
|
|
for member in members:
|
|
clsdict.pop(member)
|
|
# Special handling of Enum subclasses
|
|
return clsdict, slotstate
|
|
|
|
|
|
# COLLECTIONS OF OBJECTS REDUCERS
|
|
# -------------------------------
|
|
# A reducer is a function taking a single argument (obj), and that returns a
|
|
# tuple with all the necessary data to re-construct obj. Apart from a few
|
|
# exceptions (list, dict, bytes, int, etc.), a reducer is necessary to
|
|
# correctly pickle an object.
|
|
# While many built-in objects (Exceptions objects, instances of the "object"
|
|
# class, etc), are shipped with their own built-in reducer (invoked using
|
|
# obj.__reduce__), some do not. The following methods were created to "fill
|
|
# these holes".
|
|
|
|
|
|
def _code_reduce(obj):
|
|
"""code object reducer."""
|
|
# If you are not sure about the order of arguments, take a look at help
|
|
# of the specific type from types, for example:
|
|
# >>> from types import CodeType
|
|
# >>> help(CodeType)
|
|
if hasattr(obj, "co_exceptiontable"):
|
|
# Python 3.11 and later: there are some new attributes
|
|
# related to the enhanced exceptions.
|
|
args = (
|
|
obj.co_argcount,
|
|
obj.co_posonlyargcount,
|
|
obj.co_kwonlyargcount,
|
|
obj.co_nlocals,
|
|
obj.co_stacksize,
|
|
obj.co_flags,
|
|
obj.co_code,
|
|
obj.co_consts,
|
|
obj.co_names,
|
|
obj.co_varnames,
|
|
obj.co_filename,
|
|
obj.co_name,
|
|
obj.co_qualname,
|
|
obj.co_firstlineno,
|
|
obj.co_linetable,
|
|
obj.co_exceptiontable,
|
|
obj.co_freevars,
|
|
obj.co_cellvars,
|
|
)
|
|
elif hasattr(obj, "co_linetable"):
|
|
# Python 3.10 and later: obj.co_lnotab is deprecated and constructor
|
|
# expects obj.co_linetable instead.
|
|
args = (
|
|
obj.co_argcount,
|
|
obj.co_posonlyargcount,
|
|
obj.co_kwonlyargcount,
|
|
obj.co_nlocals,
|
|
obj.co_stacksize,
|
|
obj.co_flags,
|
|
obj.co_code,
|
|
obj.co_consts,
|
|
obj.co_names,
|
|
obj.co_varnames,
|
|
obj.co_filename,
|
|
obj.co_name,
|
|
obj.co_firstlineno,
|
|
obj.co_linetable,
|
|
obj.co_freevars,
|
|
obj.co_cellvars,
|
|
)
|
|
elif hasattr(obj, "co_nmeta"): # pragma: no cover
|
|
# "nogil" Python: modified attributes from 3.9
|
|
args = (
|
|
obj.co_argcount,
|
|
obj.co_posonlyargcount,
|
|
obj.co_kwonlyargcount,
|
|
obj.co_nlocals,
|
|
obj.co_framesize,
|
|
obj.co_ndefaultargs,
|
|
obj.co_nmeta,
|
|
obj.co_flags,
|
|
obj.co_code,
|
|
obj.co_consts,
|
|
obj.co_varnames,
|
|
obj.co_filename,
|
|
obj.co_name,
|
|
obj.co_firstlineno,
|
|
obj.co_lnotab,
|
|
obj.co_exc_handlers,
|
|
obj.co_jump_table,
|
|
obj.co_freevars,
|
|
obj.co_cellvars,
|
|
obj.co_free2reg,
|
|
obj.co_cell2reg,
|
|
)
|
|
else:
|
|
# Backward compat for 3.8 and 3.9
|
|
args = (
|
|
obj.co_argcount,
|
|
obj.co_posonlyargcount,
|
|
obj.co_kwonlyargcount,
|
|
obj.co_nlocals,
|
|
obj.co_stacksize,
|
|
obj.co_flags,
|
|
obj.co_code,
|
|
obj.co_consts,
|
|
obj.co_names,
|
|
obj.co_varnames,
|
|
obj.co_filename,
|
|
obj.co_name,
|
|
obj.co_firstlineno,
|
|
obj.co_lnotab,
|
|
obj.co_freevars,
|
|
obj.co_cellvars,
|
|
)
|
|
return types.CodeType, args
|
|
|
|
|
|
def _cell_reduce(obj):
|
|
"""Cell (containing values of a function's free variables) reducer."""
|
|
try:
|
|
obj.cell_contents
|
|
except ValueError: # cell is empty
|
|
return _make_empty_cell, ()
|
|
else:
|
|
return _make_cell, (obj.cell_contents,)
|
|
|
|
|
|
def _classmethod_reduce(obj):
|
|
orig_func = obj.__func__
|
|
return type(obj), (orig_func,)
|
|
|
|
|
|
def _file_reduce(obj):
|
|
"""Save a file."""
|
|
import io
|
|
|
|
if not hasattr(obj, "name") or not hasattr(obj, "mode"):
|
|
raise pickle.PicklingError(
|
|
"Cannot pickle files that do not map to an actual file"
|
|
)
|
|
if obj is sys.stdout:
|
|
return getattr, (sys, "stdout")
|
|
if obj is sys.stderr:
|
|
return getattr, (sys, "stderr")
|
|
if obj is sys.stdin:
|
|
raise pickle.PicklingError("Cannot pickle standard input")
|
|
if obj.closed:
|
|
raise pickle.PicklingError("Cannot pickle closed files")
|
|
if hasattr(obj, "isatty") and obj.isatty():
|
|
raise pickle.PicklingError("Cannot pickle files that map to tty objects")
|
|
if "r" not in obj.mode and "+" not in obj.mode:
|
|
raise pickle.PicklingError(
|
|
"Cannot pickle files that are not opened for reading: %s" % obj.mode
|
|
)
|
|
|
|
name = obj.name
|
|
|
|
retval = io.StringIO()
|
|
|
|
try:
|
|
# Read the whole file
|
|
curloc = obj.tell()
|
|
obj.seek(0)
|
|
contents = obj.read()
|
|
obj.seek(curloc)
|
|
except OSError as e:
|
|
raise pickle.PicklingError(
|
|
"Cannot pickle file %s as it cannot be read" % name
|
|
) from e
|
|
retval.write(contents)
|
|
retval.seek(curloc)
|
|
|
|
retval.name = name
|
|
return _file_reconstructor, (retval,)
|
|
|
|
|
|
def _getset_descriptor_reduce(obj):
|
|
return getattr, (obj.__objclass__, obj.__name__)
|
|
|
|
|
|
def _mappingproxy_reduce(obj):
|
|
return types.MappingProxyType, (dict(obj),)
|
|
|
|
|
|
def _memoryview_reduce(obj):
|
|
return bytes, (obj.tobytes(),)
|
|
|
|
|
|
def _module_reduce(obj):
|
|
if _should_pickle_by_reference(obj):
|
|
return subimport, (obj.__name__,)
|
|
else:
|
|
# Some external libraries can populate the "__builtins__" entry of a
|
|
# module's `__dict__` with unpicklable objects (see #316). For that
|
|
# reason, we do not attempt to pickle the "__builtins__" entry, and
|
|
# restore a default value for it at unpickling time.
|
|
state = obj.__dict__.copy()
|
|
state.pop("__builtins__", None)
|
|
return dynamic_subimport, (obj.__name__, state)
|
|
|
|
|
|
def _method_reduce(obj):
|
|
return (types.MethodType, (obj.__func__, obj.__self__))
|
|
|
|
|
|
def _logger_reduce(obj):
|
|
return logging.getLogger, (obj.name,)
|
|
|
|
|
|
def _root_logger_reduce(obj):
|
|
return logging.getLogger, ()
|
|
|
|
|
|
def _property_reduce(obj):
|
|
return property, (obj.fget, obj.fset, obj.fdel, obj.__doc__)
|
|
|
|
|
|
def _weakset_reduce(obj):
|
|
return weakref.WeakSet, (list(obj),)
|
|
|
|
|
|
def _dynamic_class_reduce(obj):
|
|
"""Save a class that can't be referenced as a module attribute.
|
|
|
|
This method is used to serialize classes that are defined inside
|
|
functions, or that otherwise can't be serialized as attribute lookups
|
|
from importable modules.
|
|
"""
|
|
if Enum is not None and issubclass(obj, Enum):
|
|
return (
|
|
_make_skeleton_enum,
|
|
_enum_getnewargs(obj),
|
|
_enum_getstate(obj),
|
|
None,
|
|
None,
|
|
_class_setstate,
|
|
)
|
|
else:
|
|
return (
|
|
_make_skeleton_class,
|
|
_class_getnewargs(obj),
|
|
_class_getstate(obj),
|
|
None,
|
|
None,
|
|
_class_setstate,
|
|
)
|
|
|
|
|
|
def _class_reduce(obj):
|
|
"""Select the reducer depending on the dynamic nature of the class obj."""
|
|
if obj is type(None): # noqa
|
|
return type, (None,)
|
|
elif obj is type(Ellipsis):
|
|
return type, (Ellipsis,)
|
|
elif obj is type(NotImplemented):
|
|
return type, (NotImplemented,)
|
|
elif obj in _BUILTIN_TYPE_NAMES:
|
|
return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],)
|
|
elif not _should_pickle_by_reference(obj):
|
|
return _dynamic_class_reduce(obj)
|
|
return NotImplemented
|
|
|
|
|
|
def _dict_keys_reduce(obj):
|
|
# Safer not to ship the full dict as sending the rest might
|
|
# be unintended and could potentially cause leaking of
|
|
# sensitive information
|
|
return _make_dict_keys, (list(obj),)
|
|
|
|
|
|
def _dict_values_reduce(obj):
|
|
# Safer not to ship the full dict as sending the rest might
|
|
# be unintended and could potentially cause leaking of
|
|
# sensitive information
|
|
return _make_dict_values, (list(obj),)
|
|
|
|
|
|
def _dict_items_reduce(obj):
|
|
return _make_dict_items, (dict(obj),)
|
|
|
|
|
|
def _odict_keys_reduce(obj):
|
|
# Safer not to ship the full dict as sending the rest might
|
|
# be unintended and could potentially cause leaking of
|
|
# sensitive information
|
|
return _make_dict_keys, (list(obj), True)
|
|
|
|
|
|
def _odict_values_reduce(obj):
|
|
# Safer not to ship the full dict as sending the rest might
|
|
# be unintended and could potentially cause leaking of
|
|
# sensitive information
|
|
return _make_dict_values, (list(obj), True)
|
|
|
|
|
|
def _odict_items_reduce(obj):
|
|
return _make_dict_items, (dict(obj), True)
|
|
|
|
|
|
def _dataclass_field_base_reduce(obj):
|
|
return _get_dataclass_field_type_sentinel, (obj.name,)
|
|
|
|
|
|
# COLLECTIONS OF OBJECTS STATE SETTERS
|
|
# ------------------------------------
|
|
# state setters are called at unpickling time, once the object is created and
|
|
# it has to be updated to how it was at unpickling time.
|
|
|
|
|
|
def _function_setstate(obj, state):
|
|
"""Update the state of a dynamic function.
|
|
|
|
As __closure__ and __globals__ are readonly attributes of a function, we
|
|
cannot rely on the native setstate routine of pickle.load_build, that calls
|
|
setattr on items of the slotstate. Instead, we have to modify them inplace.
|
|
"""
|
|
state, slotstate = state
|
|
obj.__dict__.update(state)
|
|
|
|
obj_globals = slotstate.pop("__globals__")
|
|
obj_closure = slotstate.pop("__closure__")
|
|
# _cloudpickle_subimports is a set of submodules that must be loaded for
|
|
# the pickled function to work correctly at unpickling time. Now that these
|
|
# submodules are depickled (hence imported), they can be removed from the
|
|
# object's state (the object state only served as a reference holder to
|
|
# these submodules)
|
|
slotstate.pop("_cloudpickle_submodules")
|
|
|
|
obj.__globals__.update(obj_globals)
|
|
obj.__globals__["__builtins__"] = __builtins__
|
|
|
|
if obj_closure is not None:
|
|
for i, cell in enumerate(obj_closure):
|
|
try:
|
|
value = cell.cell_contents
|
|
except ValueError: # cell is empty
|
|
continue
|
|
obj.__closure__[i].cell_contents = value
|
|
|
|
for k, v in slotstate.items():
|
|
setattr(obj, k, v)
|
|
|
|
|
|
def _class_setstate(obj, state):
|
|
state, slotstate = state
|
|
registry = None
|
|
for attrname, attr in state.items():
|
|
if attrname == "_abc_impl":
|
|
registry = attr
|
|
else:
|
|
setattr(obj, attrname, attr)
|
|
if registry is not None:
|
|
for subclass in registry:
|
|
obj.register(subclass)
|
|
|
|
return obj
|
|
|
|
|
|
# COLLECTION OF DATACLASS UTILITIES
|
|
# ---------------------------------
|
|
# There are some internal sentinel values whose identity must be preserved when
|
|
# unpickling dataclass fields. Each sentinel value has a unique name that we can
|
|
# use to retrieve its identity at unpickling time.
|
|
|
|
|
|
_DATACLASSE_FIELD_TYPE_SENTINELS = {
|
|
dataclasses._FIELD.name: dataclasses._FIELD,
|
|
dataclasses._FIELD_CLASSVAR.name: dataclasses._FIELD_CLASSVAR,
|
|
dataclasses._FIELD_INITVAR.name: dataclasses._FIELD_INITVAR,
|
|
}
|
|
|
|
|
|
def _get_dataclass_field_type_sentinel(name):
|
|
return _DATACLASSE_FIELD_TYPE_SENTINELS[name]
|
|
|
|
|
|
class Pickler(pickle.Pickler):
|
|
# set of reducers defined and used by cloudpickle (private)
|
|
_dispatch_table = {}
|
|
_dispatch_table[classmethod] = _classmethod_reduce
|
|
_dispatch_table[io.TextIOWrapper] = _file_reduce
|
|
_dispatch_table[logging.Logger] = _logger_reduce
|
|
_dispatch_table[logging.RootLogger] = _root_logger_reduce
|
|
_dispatch_table[memoryview] = _memoryview_reduce
|
|
_dispatch_table[property] = _property_reduce
|
|
_dispatch_table[staticmethod] = _classmethod_reduce
|
|
_dispatch_table[CellType] = _cell_reduce
|
|
_dispatch_table[types.CodeType] = _code_reduce
|
|
_dispatch_table[types.GetSetDescriptorType] = _getset_descriptor_reduce
|
|
_dispatch_table[types.ModuleType] = _module_reduce
|
|
_dispatch_table[types.MethodType] = _method_reduce
|
|
_dispatch_table[types.MappingProxyType] = _mappingproxy_reduce
|
|
_dispatch_table[weakref.WeakSet] = _weakset_reduce
|
|
_dispatch_table[typing.TypeVar] = _typevar_reduce
|
|
_dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce
|
|
_dispatch_table[_collections_abc.dict_values] = _dict_values_reduce
|
|
_dispatch_table[_collections_abc.dict_items] = _dict_items_reduce
|
|
_dispatch_table[type(OrderedDict().keys())] = _odict_keys_reduce
|
|
_dispatch_table[type(OrderedDict().values())] = _odict_values_reduce
|
|
_dispatch_table[type(OrderedDict().items())] = _odict_items_reduce
|
|
_dispatch_table[abc.abstractmethod] = _classmethod_reduce
|
|
_dispatch_table[abc.abstractclassmethod] = _classmethod_reduce
|
|
_dispatch_table[abc.abstractstaticmethod] = _classmethod_reduce
|
|
_dispatch_table[abc.abstractproperty] = _property_reduce
|
|
_dispatch_table[dataclasses._FIELD_BASE] = _dataclass_field_base_reduce
|
|
|
|
dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table)
|
|
|
|
# function reducers are defined as instance methods of cloudpickle.Pickler
|
|
# objects, as they rely on a cloudpickle.Pickler attribute (globals_ref)
|
|
def _dynamic_function_reduce(self, func):
|
|
"""Reduce a function that is not pickleable via attribute lookup."""
|
|
newargs = self._function_getnewargs(func)
|
|
state = _function_getstate(func)
|
|
return (_make_function, newargs, state, None, None, _function_setstate)
|
|
|
|
def _function_reduce(self, obj):
|
|
"""Reducer for function objects.
|
|
|
|
If obj is a top-level attribute of a file-backed module, this reducer
|
|
returns NotImplemented, making the cloudpickle.Pickler fall back to
|
|
traditional pickle.Pickler routines to save obj. Otherwise, it reduces
|
|
obj using a custom cloudpickle reducer designed specifically to handle
|
|
dynamic functions.
|
|
"""
|
|
if _should_pickle_by_reference(obj):
|
|
return NotImplemented
|
|
else:
|
|
return self._dynamic_function_reduce(obj)
|
|
|
|
def _function_getnewargs(self, func):
|
|
code = func.__code__
|
|
|
|
# base_globals represents the future global namespace of func at
|
|
# unpickling time. Looking it up and storing it in
|
|
# cloudpickle.Pickler.globals_ref allow functions sharing the same
|
|
# globals at pickling time to also share them once unpickled, at one
|
|
# condition: since globals_ref is an attribute of a cloudpickle.Pickler
|
|
# instance, and that a new cloudpickle.Pickler is created each time
|
|
# cloudpickle.dump or cloudpickle.dumps is called, functions also need
|
|
# to be saved within the same invocation of
|
|
# cloudpickle.dump/cloudpickle.dumps (for example:
|
|
# cloudpickle.dumps([f1, f2])). There is no such limitation when using
|
|
# cloudpickle.Pickler.dump, as long as the multiple invocations are
|
|
# bound to the same cloudpickle.Pickler instance.
|
|
base_globals = self.globals_ref.setdefault(id(func.__globals__), {})
|
|
|
|
if base_globals == {}:
|
|
# Add module attributes used to resolve relative imports
|
|
# instructions inside func.
|
|
for k in ["__package__", "__name__", "__path__", "__file__"]:
|
|
if k in func.__globals__:
|
|
base_globals[k] = func.__globals__[k]
|
|
|
|
# Do not bind the free variables before the function is created to
|
|
# avoid infinite recursion.
|
|
if func.__closure__ is None:
|
|
closure = None
|
|
else:
|
|
closure = tuple(_make_empty_cell() for _ in range(len(code.co_freevars)))
|
|
|
|
return code, base_globals, None, None, closure
|
|
|
|
def dump(self, obj):
|
|
try:
|
|
return super().dump(obj)
|
|
except RuntimeError as e:
|
|
if len(e.args) > 0 and "recursion" in e.args[0]:
|
|
msg = "Could not pickle object as excessively deep recursion required."
|
|
raise pickle.PicklingError(msg) from e
|
|
else:
|
|
raise
|
|
|
|
def __init__(self, file, protocol=None, buffer_callback=None):
|
|
if protocol is None:
|
|
protocol = DEFAULT_PROTOCOL
|
|
super().__init__(file, protocol=protocol, buffer_callback=buffer_callback)
|
|
# map functions __globals__ attribute ids, to ensure that functions
|
|
# sharing the same global namespace at pickling time also share
|
|
# their global namespace at unpickling time.
|
|
self.globals_ref = {}
|
|
self.proto = int(protocol)
|
|
|
|
if not PYPY:
|
|
# pickle.Pickler is the C implementation of the CPython pickler and
|
|
# therefore we rely on reduce_override method to customize the pickler
|
|
# behavior.
|
|
|
|
# `cloudpickle.Pickler.dispatch` is only left for backward
|
|
# compatibility - note that when using protocol 5,
|
|
# `cloudpickle.Pickler.dispatch` is not an extension of
|
|
# `pickle._Pickler.dispatch` dictionary, because `cloudpickle.Pickler`
|
|
# subclasses the C-implemented `pickle.Pickler`, which does not expose
|
|
# a `dispatch` attribute. Earlier versions of `cloudpickle.Pickler`
|
|
# used `cloudpickle.Pickler.dispatch` as a class-level attribute
|
|
# storing all reducers implemented by cloudpickle, but the attribute
|
|
# name was not a great choice given because it would collide with a
|
|
# similarly named attribute in the pure-Python `pickle._Pickler`
|
|
# implementation in the standard library.
|
|
dispatch = dispatch_table
|
|
|
|
# Implementation of the reducer_override callback, in order to
|
|
# efficiently serialize dynamic functions and classes by subclassing
|
|
# the C-implemented `pickle.Pickler`.
|
|
# TODO: decorrelate reducer_override (which is tied to CPython's
|
|
# implementation - would it make sense to backport it to pypy? - and
|
|
# pickle's protocol 5 which is implementation agnostic. Currently, the
|
|
# availability of both notions coincide on CPython's pickle, but it may
|
|
# not be the case anymore when pypy implements protocol 5.
|
|
|
|
def reducer_override(self, obj):
|
|
"""Type-agnostic reducing callback for function and classes.
|
|
|
|
For performance reasons, subclasses of the C `pickle.Pickler` class
|
|
cannot register custom reducers for functions and classes in the
|
|
dispatch_table attribute. Reducers for such types must instead
|
|
implemented via the special `reducer_override` method.
|
|
|
|
Note that this method will be called for any object except a few
|
|
builtin-types (int, lists, dicts etc.), which differs from reducers
|
|
in the Pickler's dispatch_table, each of them being invoked for
|
|
objects of a specific type only.
|
|
|
|
This property comes in handy for classes: although most classes are
|
|
instances of the ``type`` metaclass, some of them can be instances
|
|
of other custom metaclasses (such as enum.EnumMeta for example). In
|
|
particular, the metaclass will likely not be known in advance, and
|
|
thus cannot be special-cased using an entry in the dispatch_table.
|
|
reducer_override, among other things, allows us to register a
|
|
reducer that will be called for any class, independently of its
|
|
type.
|
|
|
|
Notes:
|
|
|
|
* reducer_override has the priority over dispatch_table-registered
|
|
reducers.
|
|
* reducer_override can be used to fix other limitations of
|
|
cloudpickle for other types that suffered from type-specific
|
|
reducers, such as Exceptions. See
|
|
https://github.com/cloudpipe/cloudpickle/issues/248
|
|
"""
|
|
t = type(obj)
|
|
try:
|
|
is_anyclass = issubclass(t, type)
|
|
except TypeError: # t is not a class (old Boost; see SF #502085)
|
|
is_anyclass = False
|
|
|
|
if is_anyclass:
|
|
return _class_reduce(obj)
|
|
elif isinstance(obj, types.FunctionType):
|
|
return self._function_reduce(obj)
|
|
else:
|
|
# fallback to save_global, including the Pickler's
|
|
# dispatch_table
|
|
return NotImplemented
|
|
|
|
else:
|
|
# When reducer_override is not available, hack the pure-Python
|
|
# Pickler's types.FunctionType and type savers. Note: the type saver
|
|
# must override Pickler.save_global, because pickle.py contains a
|
|
# hard-coded call to save_global when pickling meta-classes.
|
|
dispatch = pickle.Pickler.dispatch.copy()
|
|
|
|
def _save_reduce_pickle5(
|
|
self,
|
|
func,
|
|
args,
|
|
state=None,
|
|
listitems=None,
|
|
dictitems=None,
|
|
state_setter=None,
|
|
obj=None,
|
|
):
|
|
save = self.save
|
|
write = self.write
|
|
self.save_reduce(
|
|
func,
|
|
args,
|
|
state=None,
|
|
listitems=listitems,
|
|
dictitems=dictitems,
|
|
obj=obj,
|
|
)
|
|
# backport of the Python 3.8 state_setter pickle operations
|
|
save(state_setter)
|
|
save(obj) # simple BINGET opcode as obj is already memoized.
|
|
save(state)
|
|
write(pickle.TUPLE2)
|
|
# Trigger a state_setter(obj, state) function call.
|
|
write(pickle.REDUCE)
|
|
# The purpose of state_setter is to carry-out an
|
|
# inplace modification of obj. We do not care about what the
|
|
# method might return, so its output is eventually removed from
|
|
# the stack.
|
|
write(pickle.POP)
|
|
|
|
def save_global(self, obj, name=None, pack=struct.pack):
|
|
"""Main dispatch method.
|
|
|
|
The name of this method is somewhat misleading: all types get
|
|
dispatched here.
|
|
"""
|
|
if obj is type(None): # noqa
|
|
return self.save_reduce(type, (None,), obj=obj)
|
|
elif obj is type(Ellipsis):
|
|
return self.save_reduce(type, (Ellipsis,), obj=obj)
|
|
elif obj is type(NotImplemented):
|
|
return self.save_reduce(type, (NotImplemented,), obj=obj)
|
|
elif obj in _BUILTIN_TYPE_NAMES:
|
|
return self.save_reduce(
|
|
_builtin_type, (_BUILTIN_TYPE_NAMES[obj],), obj=obj
|
|
)
|
|
|
|
if name is not None:
|
|
super().save_global(obj, name=name)
|
|
elif not _should_pickle_by_reference(obj, name=name):
|
|
self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj)
|
|
else:
|
|
super().save_global(obj, name=name)
|
|
|
|
dispatch[type] = save_global
|
|
|
|
def save_function(self, obj, name=None):
|
|
"""Registered with the dispatch to handle all function types.
|
|
|
|
Determines what kind of function obj is (e.g. lambda, defined at
|
|
interactive prompt, etc) and handles the pickling appropriately.
|
|
"""
|
|
if _should_pickle_by_reference(obj, name=name):
|
|
return super().save_global(obj, name=name)
|
|
elif PYPY and isinstance(obj.__code__, builtin_code_type):
|
|
return self.save_pypy_builtin_func(obj)
|
|
else:
|
|
return self._save_reduce_pickle5(
|
|
*self._dynamic_function_reduce(obj), obj=obj
|
|
)
|
|
|
|
def save_pypy_builtin_func(self, obj):
|
|
"""Save pypy equivalent of builtin functions.
|
|
|
|
PyPy does not have the concept of builtin-functions. Instead,
|
|
builtin-functions are simple function instances, but with a
|
|
builtin-code attribute.
|
|
Most of the time, builtin functions should be pickled by attribute.
|
|
But PyPy has flaky support for __qualname__, so some builtin
|
|
functions such as float.__new__ will be classified as dynamic. For
|
|
this reason only, we created this special routine. Because
|
|
builtin-functions are not expected to have closure or globals,
|
|
there is no additional hack (compared the one already implemented
|
|
in pickle) to protect ourselves from reference cycles. A simple
|
|
(reconstructor, newargs, obj.__dict__) tuple is save_reduced. Note
|
|
also that PyPy improved their support for __qualname__ in v3.6, so
|
|
this routing should be removed when cloudpickle supports only PyPy
|
|
3.6 and later.
|
|
"""
|
|
rv = (
|
|
types.FunctionType,
|
|
(obj.__code__, {}, obj.__name__, obj.__defaults__, obj.__closure__),
|
|
obj.__dict__,
|
|
)
|
|
self.save_reduce(*rv, obj=obj)
|
|
|
|
dispatch[types.FunctionType] = save_function
|
|
|
|
|
|
# Shorthands similar to pickle.dump/pickle.dumps
|
|
|
|
|
|
def dump(obj, file, protocol=None, buffer_callback=None):
|
|
"""Serialize obj as bytes streamed into file
|
|
|
|
protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
|
|
pickle.HIGHEST_PROTOCOL. This setting favors maximum communication
|
|
speed between processes running the same Python version.
|
|
|
|
Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure
|
|
compatibility with older versions of Python (although this is not always
|
|
guaranteed to work because cloudpickle relies on some internal
|
|
implementation details that can change from one Python version to the
|
|
next).
|
|
"""
|
|
Pickler(file, protocol=protocol, buffer_callback=buffer_callback).dump(obj)
|
|
|
|
|
|
def dumps(obj, protocol=None, buffer_callback=None):
|
|
"""Serialize obj as a string of bytes allocated in memory
|
|
|
|
protocol defaults to cloudpickle.DEFAULT_PROTOCOL which is an alias to
|
|
pickle.HIGHEST_PROTOCOL. This setting favors maximum communication
|
|
speed between processes running the same Python version.
|
|
|
|
Set protocol=pickle.DEFAULT_PROTOCOL instead if you need to ensure
|
|
compatibility with older versions of Python (although this is not always
|
|
guaranteed to work because cloudpickle relies on some internal
|
|
implementation details that can change from one Python version to the
|
|
next).
|
|
"""
|
|
with io.BytesIO() as file:
|
|
cp = Pickler(file, protocol=protocol, buffer_callback=buffer_callback)
|
|
cp.dump(obj)
|
|
return file.getvalue()
|
|
|
|
|
|
# Include pickles unloading functions in this namespace for convenience.
|
|
load, loads = pickle.load, pickle.loads
|
|
|
|
# Backward compat alias.
|
|
CloudPickler = Pickler
|