Inzynierka/Lib/site-packages/scipy/_lib/_util.py

712 lines
24 KiB
Python
Raw Normal View History

2023-06-02 12:51:02 +02:00
from contextlib import contextmanager
import functools
import operator
import warnings
import numbers
from collections import namedtuple
import inspect
import math
from typing import (
Optional,
Union,
TYPE_CHECKING,
TypeVar,
)
import numpy as np
IntNumber = Union[int, np.integer]
DecimalNumber = Union[float, np.floating, np.integer]
# Since Generator was introduced in numpy 1.17, the following condition is needed for
# backward compatibility
if TYPE_CHECKING:
SeedType = Optional[Union[IntNumber, np.random.Generator,
np.random.RandomState]]
GeneratorType = TypeVar("GeneratorType", bound=Union[np.random.Generator,
np.random.RandomState])
try:
from numpy.random import Generator as Generator
except ImportError:
class Generator(): # type: ignore[no-redef]
pass
def _lazywhere(cond, arrays, f, fillvalue=None, f2=None):
"""
np.where(cond, x, fillvalue) always evaluates x even where cond is False.
This one only evaluates f(arr1[cond], arr2[cond], ...).
Examples
--------
>>> import numpy as np
>>> a, b = np.array([1, 2, 3, 4]), np.array([5, 6, 7, 8])
>>> def f(a, b):
... return a*b
>>> _lazywhere(a > 2, (a, b), f, np.nan)
array([ nan, nan, 21., 32.])
Notice, it assumes that all `arrays` are of the same shape, or can be
broadcasted together.
"""
cond = np.asarray(cond)
if fillvalue is None:
if f2 is None:
raise ValueError("One of (fillvalue, f2) must be given.")
else:
fillvalue = np.nan
else:
if f2 is not None:
raise ValueError("Only one of (fillvalue, f2) can be given.")
args = np.broadcast_arrays(cond, *arrays)
cond, arrays = args[0], args[1:]
temp = tuple(np.extract(cond, arr) for arr in arrays)
tcode = np.mintypecode([a.dtype.char for a in arrays])
out = np.full(np.shape(arrays[0]), fill_value=fillvalue, dtype=tcode)
np.place(out, cond, f(*temp))
if f2 is not None:
temp = tuple(np.extract(~cond, arr) for arr in arrays)
np.place(out, ~cond, f2(*temp))
return out
def _lazyselect(condlist, choicelist, arrays, default=0):
"""
Mimic `np.select(condlist, choicelist)`.
Notice, it assumes that all `arrays` are of the same shape or can be
broadcasted together.
All functions in `choicelist` must accept array arguments in the order
given in `arrays` and must return an array of the same shape as broadcasted
`arrays`.
Examples
--------
>>> import numpy as np
>>> x = np.arange(6)
>>> np.select([x <3, x > 3], [x**2, x**3], default=0)
array([ 0, 1, 4, 0, 64, 125])
>>> _lazyselect([x < 3, x > 3], [lambda x: x**2, lambda x: x**3], (x,))
array([ 0., 1., 4., 0., 64., 125.])
>>> a = -np.ones_like(x)
>>> _lazyselect([x < 3, x > 3],
... [lambda x, a: x**2, lambda x, a: a * x**3],
... (x, a), default=np.nan)
array([ 0., 1., 4., nan, -64., -125.])
"""
arrays = np.broadcast_arrays(*arrays)
tcode = np.mintypecode([a.dtype.char for a in arrays])
out = np.full(np.shape(arrays[0]), fill_value=default, dtype=tcode)
for func, cond in zip(choicelist, condlist):
if np.all(cond is False):
continue
cond, _ = np.broadcast_arrays(cond, arrays[0])
temp = tuple(np.extract(cond, arr) for arr in arrays)
np.place(out, cond, func(*temp))
return out
def _aligned_zeros(shape, dtype=float, order="C", align=None):
"""Allocate a new ndarray with aligned memory.
Primary use case for this currently is working around a f2py issue
in NumPy 1.9.1, where dtype.alignment is such that np.zeros() does
not necessarily create arrays aligned up to it.
"""
dtype = np.dtype(dtype)
if align is None:
align = dtype.alignment
if not hasattr(shape, '__len__'):
shape = (shape,)
size = functools.reduce(operator.mul, shape) * dtype.itemsize
buf = np.empty(size + align + 1, np.uint8)
offset = buf.__array_interface__['data'][0] % align
if offset != 0:
offset = align - offset
# Note: slices producing 0-size arrays do not necessarily change
# data pointer --- so we use and allocate size+1
buf = buf[offset:offset+size+1][:-1]
data = np.ndarray(shape, dtype, buf, order=order)
data.fill(0)
return data
def _prune_array(array):
"""Return an array equivalent to the input array. If the input
array is a view of a much larger array, copy its contents to a
newly allocated array. Otherwise, return the input unchanged.
"""
if array.base is not None and array.size < array.base.size // 2:
return array.copy()
return array
def prod(iterable):
"""
Product of a sequence of numbers.
Faster than np.prod for short lists like array shapes, and does
not overflow if using Python integers.
"""
product = 1
for x in iterable:
product *= x
return product
def float_factorial(n: int) -> float:
"""Compute the factorial and return as a float
Returns infinity when result is too large for a double
"""
return float(math.factorial(n)) if n < 171 else np.inf
# copy-pasted from scikit-learn utils/validation.py
# change this to scipy.stats._qmc.check_random_state once numpy 1.16 is dropped
def check_random_state(seed):
"""Turn `seed` into a `np.random.RandomState` instance.
Parameters
----------
seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
If `seed` is None (or `np.random`), the `numpy.random.RandomState`
singleton is used.
If `seed` is an int, a new ``RandomState`` instance is used,
seeded with `seed`.
If `seed` is already a ``Generator`` or ``RandomState`` instance then
that instance is used.
Returns
-------
seed : {`numpy.random.Generator`, `numpy.random.RandomState`}
Random number generator.
"""
if seed is None or seed is np.random:
return np.random.mtrand._rand
if isinstance(seed, (numbers.Integral, np.integer)):
return np.random.RandomState(seed)
if isinstance(seed, (np.random.RandomState, np.random.Generator)):
return seed
raise ValueError('%r cannot be used to seed a numpy.random.RandomState'
' instance' % seed)
def _asarray_validated(a, check_finite=True,
sparse_ok=False, objects_ok=False, mask_ok=False,
as_inexact=False):
"""
Helper function for SciPy argument validation.
Many SciPy linear algebra functions do support arbitrary array-like
input arguments. Examples of commonly unsupported inputs include
matrices containing inf/nan, sparse matrix representations, and
matrices with complicated elements.
Parameters
----------
a : array_like
The array-like input.
check_finite : bool, optional
Whether to check that the input matrices contain only finite numbers.
Disabling may give a performance gain, but may result in problems
(crashes, non-termination) if the inputs do contain infinities or NaNs.
Default: True
sparse_ok : bool, optional
True if scipy sparse matrices are allowed.
objects_ok : bool, optional
True if arrays with dype('O') are allowed.
mask_ok : bool, optional
True if masked arrays are allowed.
as_inexact : bool, optional
True to convert the input array to a np.inexact dtype.
Returns
-------
ret : ndarray
The converted validated array.
"""
if not sparse_ok:
import scipy.sparse
if scipy.sparse.issparse(a):
msg = ('Sparse matrices are not supported by this function. '
'Perhaps one of the scipy.sparse.linalg functions '
'would work instead.')
raise ValueError(msg)
if not mask_ok:
if np.ma.isMaskedArray(a):
raise ValueError('masked arrays are not supported')
toarray = np.asarray_chkfinite if check_finite else np.asarray
a = toarray(a)
if not objects_ok:
if a.dtype is np.dtype('O'):
raise ValueError('object arrays are not supported')
if as_inexact:
if not np.issubdtype(a.dtype, np.inexact):
a = toarray(a, dtype=np.float_)
return a
def _validate_int(k, name, minimum=None):
"""
Validate a scalar integer.
This functon can be used to validate an argument to a function
that expects the value to be an integer. It uses `operator.index`
to validate the value (so, for example, k=2.0 results in a
TypeError).
Parameters
----------
k : int
The value to be validated.
name : str
The name of the parameter.
minimum : int, optional
An optional lower bound.
"""
try:
k = operator.index(k)
except TypeError:
raise TypeError(f'{name} must be an integer.') from None
if minimum is not None and k < minimum:
raise ValueError(f'{name} must be an integer not less '
f'than {minimum}') from None
return k
# Add a replacement for inspect.getfullargspec()/
# The version below is borrowed from Django,
# https://github.com/django/django/pull/4846.
# Note an inconsistency between inspect.getfullargspec(func) and
# inspect.signature(func). If `func` is a bound method, the latter does *not*
# list `self` as a first argument, while the former *does*.
# Hence, cook up a common ground replacement: `getfullargspec_no_self` which
# mimics `inspect.getfullargspec` but does not list `self`.
#
# This way, the caller code does not need to know whether it uses a legacy
# .getfullargspec or a bright and shiny .signature.
FullArgSpec = namedtuple('FullArgSpec',
['args', 'varargs', 'varkw', 'defaults',
'kwonlyargs', 'kwonlydefaults', 'annotations'])
def getfullargspec_no_self(func):
"""inspect.getfullargspec replacement using inspect.signature.
If func is a bound method, do not list the 'self' parameter.
Parameters
----------
func : callable
A callable to inspect
Returns
-------
fullargspec : FullArgSpec(args, varargs, varkw, defaults, kwonlyargs,
kwonlydefaults, annotations)
NOTE: if the first argument of `func` is self, it is *not*, I repeat
*not*, included in fullargspec.args.
This is done for consistency between inspect.getargspec() under
Python 2.x, and inspect.signature() under Python 3.x.
"""
sig = inspect.signature(func)
args = [
p.name for p in sig.parameters.values()
if p.kind in [inspect.Parameter.POSITIONAL_OR_KEYWORD,
inspect.Parameter.POSITIONAL_ONLY]
]
varargs = [
p.name for p in sig.parameters.values()
if p.kind == inspect.Parameter.VAR_POSITIONAL
]
varargs = varargs[0] if varargs else None
varkw = [
p.name for p in sig.parameters.values()
if p.kind == inspect.Parameter.VAR_KEYWORD
]
varkw = varkw[0] if varkw else None
defaults = tuple(
p.default for p in sig.parameters.values()
if (p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD and
p.default is not p.empty)
) or None
kwonlyargs = [
p.name for p in sig.parameters.values()
if p.kind == inspect.Parameter.KEYWORD_ONLY
]
kwdefaults = {p.name: p.default for p in sig.parameters.values()
if p.kind == inspect.Parameter.KEYWORD_ONLY and
p.default is not p.empty}
annotations = {p.name: p.annotation for p in sig.parameters.values()
if p.annotation is not p.empty}
return FullArgSpec(args, varargs, varkw, defaults, kwonlyargs,
kwdefaults or None, annotations)
class _FunctionWrapper:
"""
Object to wrap user's function, allowing picklability
"""
def __init__(self, f, args):
self.f = f
self.args = [] if args is None else args
def __call__(self, x):
return self.f(x, *self.args)
class MapWrapper:
"""
Parallelisation wrapper for working with map-like callables, such as
`multiprocessing.Pool.map`.
Parameters
----------
pool : int or map-like callable
If `pool` is an integer, then it specifies the number of threads to
use for parallelization. If ``int(pool) == 1``, then no parallel
processing is used and the map builtin is used.
If ``pool == -1``, then the pool will utilize all available CPUs.
If `pool` is a map-like callable that follows the same
calling sequence as the built-in map function, then this callable is
used for parallelization.
"""
def __init__(self, pool=1):
self.pool = None
self._mapfunc = map
self._own_pool = False
if callable(pool):
self.pool = pool
self._mapfunc = self.pool
else:
from multiprocessing import Pool
# user supplies a number
if int(pool) == -1:
# use as many processors as possible
self.pool = Pool()
self._mapfunc = self.pool.map
self._own_pool = True
elif int(pool) == 1:
pass
elif int(pool) > 1:
# use the number of processors requested
self.pool = Pool(processes=int(pool))
self._mapfunc = self.pool.map
self._own_pool = True
else:
raise RuntimeError("Number of workers specified must be -1,"
" an int >= 1, or an object with a 'map' "
"method")
def __enter__(self):
return self
def terminate(self):
if self._own_pool:
self.pool.terminate()
def join(self):
if self._own_pool:
self.pool.join()
def close(self):
if self._own_pool:
self.pool.close()
def __exit__(self, exc_type, exc_value, traceback):
if self._own_pool:
self.pool.close()
self.pool.terminate()
def __call__(self, func, iterable):
# only accept one iterable because that's all Pool.map accepts
try:
return self._mapfunc(func, iterable)
except TypeError as e:
# wrong number of arguments
raise TypeError("The map-like callable must be of the"
" form f(func, iterable)") from e
def rng_integers(gen, low, high=None, size=None, dtype='int64',
endpoint=False):
"""
Return random integers from low (inclusive) to high (exclusive), or if
endpoint=True, low (inclusive) to high (inclusive). Replaces
`RandomState.randint` (with endpoint=False) and
`RandomState.random_integers` (with endpoint=True).
Return random integers from the "discrete uniform" distribution of the
specified dtype. If high is None (the default), then results are from
0 to low.
Parameters
----------
gen : {None, np.random.RandomState, np.random.Generator}
Random number generator. If None, then the np.random.RandomState
singleton is used.
low : int or array-like of ints
Lowest (signed) integers to be drawn from the distribution (unless
high=None, in which case this parameter is 0 and this value is used
for high).
high : int or array-like of ints
If provided, one above the largest (signed) integer to be drawn from
the distribution (see above for behavior if high=None). If array-like,
must contain integer values.
size : array-like of ints, optional
Output shape. If the given shape is, e.g., (m, n, k), then m * n * k
samples are drawn. Default is None, in which case a single value is
returned.
dtype : {str, dtype}, optional
Desired dtype of the result. All dtypes are determined by their name,
i.e., 'int64', 'int', etc, so byteorder is not available and a specific
precision may have different C types depending on the platform.
The default value is np.int_.
endpoint : bool, optional
If True, sample from the interval [low, high] instead of the default
[low, high) Defaults to False.
Returns
-------
out: int or ndarray of ints
size-shaped array of random integers from the appropriate distribution,
or a single such random int if size not provided.
"""
if isinstance(gen, Generator):
return gen.integers(low, high=high, size=size, dtype=dtype,
endpoint=endpoint)
else:
if gen is None:
# default is RandomState singleton used by np.random.
gen = np.random.mtrand._rand
if endpoint:
# inclusive of endpoint
# remember that low and high can be arrays, so don't modify in
# place
if high is None:
return gen.randint(low + 1, size=size, dtype=dtype)
if high is not None:
return gen.randint(low, high=high + 1, size=size, dtype=dtype)
# exclusive
return gen.randint(low, high=high, size=size, dtype=dtype)
@contextmanager
def _fixed_default_rng(seed=1638083107694713882823079058616272161):
"""Context with a fixed np.random.default_rng seed."""
orig_fun = np.random.default_rng
np.random.default_rng = lambda seed=seed: orig_fun(seed)
try:
yield
finally:
np.random.default_rng = orig_fun
def _argmin(a, keepdims=False, axis=None):
"""
argmin with a `keepdims` parameter.
See https://github.com/numpy/numpy/issues/8710
If axis is not None, a.shape[axis] must be greater than 0.
"""
res = np.argmin(a, axis=axis)
if keepdims and axis is not None:
res = np.expand_dims(res, axis=axis)
return res
def _first_nonnan(a, axis):
"""
Return the first non-nan value along the given axis.
If a slice is all nan, nan is returned for that slice.
The shape of the return value corresponds to ``keepdims=True``.
Examples
--------
>>> import numpy as np
>>> nan = np.nan
>>> a = np.array([[ 3., 3., nan, 3.],
[ 1., nan, 2., 4.],
[nan, nan, 9., -1.],
[nan, 5., 4., 3.],
[ 2., 2., 2., 2.],
[nan, nan, nan, nan]])
>>> _first_nonnan(a, axis=0)
array([[3., 3., 2., 3.]])
>>> _first_nonnan(a, axis=1)
array([[ 3.],
[ 1.],
[ 9.],
[ 5.],
[ 2.],
[nan]])
"""
k = _argmin(np.isnan(a), axis=axis, keepdims=True)
return np.take_along_axis(a, k, axis=axis)
def _nan_allsame(a, axis, keepdims=False):
"""
Determine if the values along an axis are all the same.
nan values are ignored.
`a` must be a numpy array.
`axis` is assumed to be normalized; that is, 0 <= axis < a.ndim.
For an axis of length 0, the result is True. That is, we adopt the
convention that ``allsame([])`` is True. (There are no values in the
input that are different.)
`True` is returned for slices that are all nan--not because all the
values are the same, but because this is equivalent to ``allsame([])``.
Examples
--------
>>> import numpy as np
>>> a = np.array([[ 3., 3., nan, 3.],
[ 1., nan, 2., 4.],
[nan, nan, 9., -1.],
[nan, 5., 4., 3.],
[ 2., 2., 2., 2.],
[nan, nan, nan, nan]])
>>> _nan_allsame(a, axis=1, keepdims=True)
array([[ True],
[False],
[False],
[False],
[ True],
[ True]])
"""
if axis is None:
if a.size == 0:
return True
a = a.ravel()
axis = 0
else:
shp = a.shape
if shp[axis] == 0:
shp = shp[:axis] + (1,)*keepdims + shp[axis + 1:]
return np.full(shp, fill_value=True, dtype=bool)
a0 = _first_nonnan(a, axis=axis)
return ((a0 == a) | np.isnan(a)).all(axis=axis, keepdims=keepdims)
def _contains_nan(a, nan_policy='propagate', use_summation=True):
if not isinstance(a, np.ndarray):
use_summation = False # some array_likes ignore nans (e.g. pandas)
policies = ['propagate', 'raise', 'omit']
if nan_policy not in policies:
raise ValueError("nan_policy must be one of {%s}" %
', '.join("'%s'" % s for s in policies))
if np.issubdtype(a.dtype, np.inexact):
# The summation method avoids creating a (potentially huge) array.
if use_summation:
with np.errstate(invalid='ignore', over='ignore'):
contains_nan = np.isnan(np.sum(a))
else:
contains_nan = np.isnan(a).any()
elif np.issubdtype(a.dtype, object):
contains_nan = False
for el in a.ravel():
# isnan doesn't work on non-numeric elements
if np.issubdtype(type(el), np.number) and np.isnan(el):
contains_nan = True
break
else:
# Only `object` and `inexact` arrays can have NaNs
contains_nan = False
if contains_nan and nan_policy == 'raise':
raise ValueError("The input contains nan values")
return contains_nan, nan_policy
def _rename_parameter(old_name, new_name, dep_version=None):
"""
Generate decorator for backward-compatible keyword renaming.
Apply the decorator generated by `_rename_parameter` to functions with a
recently renamed parameter to maintain backward-compatibility.
After decoration, the function behaves as follows:
If only the new parameter is passed into the function, behave as usual.
If only the old parameter is passed into the function (as a keyword), raise
a DeprecationWarning if `dep_version` is provided, and behave as usual
otherwise.
If both old and new parameters are passed into the function, raise a
DeprecationWarning if `dep_version` is provided, and raise the appropriate
TypeError (function got multiple values for argument).
Parameters
----------
old_name : str
Old name of parameter
new_name : str
New name of parameter
dep_version : str, optional
Version of SciPy in which old parameter was deprecated in the format
'X.Y.Z'. If supplied, the deprecation message will indicate that
support for the old parameter will be removed in version 'X.Y+2.Z'
Notes
-----
Untested with functions that accept *args. Probably won't work as written.
"""
def decorator(fun):
@functools.wraps(fun)
def wrapper(*args, **kwargs):
if old_name in kwargs:
if dep_version:
end_version = dep_version.split('.')
end_version[1] = str(int(end_version[1]) + 2)
end_version = '.'.join(end_version)
message = (f"Use of keyword argument `{old_name}` is "
f"deprecated and replaced by `{new_name}`. "
f"Support for `{old_name}` will be removed "
f"in SciPy {end_version}.")
warnings.warn(message, DeprecationWarning, stacklevel=2)
if new_name in kwargs:
message = (f"{fun.__name__}() got multiple values for "
f"argument now known as `{new_name}`")
raise TypeError(message)
kwargs[new_name] = kwargs.pop(old_name)
return fun(*args, **kwargs)
return wrapper
return decorator
def _rng_spawn(rng, n_children):
# spawns independent RNGs from a parent RNG
bg = rng._bit_generator
ss = bg._seed_seq
child_rngs = [np.random.Generator(type(bg)(child_ss))
for child_ss in ss.spawn(n_children)]
return child_rngs