617 lines
22 KiB
Python
617 lines
22 KiB
Python
import numpy as np
|
|
import scipy.sparse as sps
|
|
from ._numdiff import approx_derivative, group_columns
|
|
from ._hessian_update_strategy import HessianUpdateStrategy
|
|
from scipy.sparse.linalg import LinearOperator
|
|
|
|
|
|
FD_METHODS = ('2-point', '3-point', 'cs')
|
|
|
|
|
|
class ScalarFunction:
|
|
"""Scalar function and its derivatives.
|
|
|
|
This class defines a scalar function F: R^n->R and methods for
|
|
computing or approximating its first and second derivatives.
|
|
|
|
Parameters
|
|
----------
|
|
fun : callable
|
|
evaluates the scalar function. Must be of the form ``fun(x, *args)``,
|
|
where ``x`` is the argument in the form of a 1-D array and ``args`` is
|
|
a tuple of any additional fixed parameters needed to completely specify
|
|
the function. Should return a scalar.
|
|
x0 : array-like
|
|
Provides an initial set of variables for evaluating fun. Array of real
|
|
elements of size (n,), where 'n' is the number of independent
|
|
variables.
|
|
args : tuple, optional
|
|
Any additional fixed parameters needed to completely specify the scalar
|
|
function.
|
|
grad : {callable, '2-point', '3-point', 'cs'}
|
|
Method for computing the gradient vector.
|
|
If it is a callable, it should be a function that returns the gradient
|
|
vector:
|
|
|
|
``grad(x, *args) -> array_like, shape (n,)``
|
|
|
|
where ``x`` is an array with shape (n,) and ``args`` is a tuple with
|
|
the fixed parameters.
|
|
Alternatively, the keywords {'2-point', '3-point', 'cs'} can be used
|
|
to select a finite difference scheme for numerical estimation of the
|
|
gradient with a relative step size. These finite difference schemes
|
|
obey any specified `bounds`.
|
|
hess : {callable, '2-point', '3-point', 'cs', HessianUpdateStrategy}
|
|
Method for computing the Hessian matrix. If it is callable, it should
|
|
return the Hessian matrix:
|
|
|
|
``hess(x, *args) -> {LinearOperator, spmatrix, array}, (n, n)``
|
|
|
|
where x is a (n,) ndarray and `args` is a tuple with the fixed
|
|
parameters. Alternatively, the keywords {'2-point', '3-point', 'cs'}
|
|
select a finite difference scheme for numerical estimation. Or, objects
|
|
implementing `HessianUpdateStrategy` interface can be used to
|
|
approximate the Hessian.
|
|
Whenever the gradient is estimated via finite-differences, the Hessian
|
|
cannot be estimated with options {'2-point', '3-point', 'cs'} and needs
|
|
to be estimated using one of the quasi-Newton strategies.
|
|
finite_diff_rel_step : None or array_like
|
|
Relative step size to use. The absolute step size is computed as
|
|
``h = finite_diff_rel_step * sign(x0) * max(1, abs(x0))``, possibly
|
|
adjusted to fit into the bounds. For ``method='3-point'`` the sign
|
|
of `h` is ignored. If None then finite_diff_rel_step is selected
|
|
automatically,
|
|
finite_diff_bounds : tuple of array_like
|
|
Lower and upper bounds on independent variables. Defaults to no bounds,
|
|
(-np.inf, np.inf). Each bound must match the size of `x0` or be a
|
|
scalar, in the latter case the bound will be the same for all
|
|
variables. Use it to limit the range of function evaluation.
|
|
epsilon : None or array_like, optional
|
|
Absolute step size to use, possibly adjusted to fit into the bounds.
|
|
For ``method='3-point'`` the sign of `epsilon` is ignored. By default
|
|
relative steps are used, only if ``epsilon is not None`` are absolute
|
|
steps used.
|
|
|
|
Notes
|
|
-----
|
|
This class implements a memoization logic. There are methods `fun`,
|
|
`grad`, hess` and corresponding attributes `f`, `g` and `H`. The following
|
|
things should be considered:
|
|
|
|
1. Use only public methods `fun`, `grad` and `hess`.
|
|
2. After one of the methods is called, the corresponding attribute
|
|
will be set. However, a subsequent call with a different argument
|
|
of *any* of the methods may overwrite the attribute.
|
|
"""
|
|
def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
|
|
finite_diff_bounds, epsilon=None):
|
|
if not callable(grad) and grad not in FD_METHODS:
|
|
raise ValueError(
|
|
f"`grad` must be either callable or one of {FD_METHODS}."
|
|
)
|
|
|
|
if not (callable(hess) or hess in FD_METHODS
|
|
or isinstance(hess, HessianUpdateStrategy)):
|
|
raise ValueError(
|
|
f"`hess` must be either callable, HessianUpdateStrategy"
|
|
f" or one of {FD_METHODS}."
|
|
)
|
|
|
|
if grad in FD_METHODS and hess in FD_METHODS:
|
|
raise ValueError("Whenever the gradient is estimated via "
|
|
"finite-differences, we require the Hessian "
|
|
"to be estimated using one of the "
|
|
"quasi-Newton strategies.")
|
|
|
|
# the astype call ensures that self.x is a copy of x0
|
|
self.x = np.atleast_1d(x0).astype(float)
|
|
self.n = self.x.size
|
|
self.nfev = 0
|
|
self.ngev = 0
|
|
self.nhev = 0
|
|
self.f_updated = False
|
|
self.g_updated = False
|
|
self.H_updated = False
|
|
|
|
self._lowest_x = None
|
|
self._lowest_f = np.inf
|
|
|
|
finite_diff_options = {}
|
|
if grad in FD_METHODS:
|
|
finite_diff_options["method"] = grad
|
|
finite_diff_options["rel_step"] = finite_diff_rel_step
|
|
finite_diff_options["abs_step"] = epsilon
|
|
finite_diff_options["bounds"] = finite_diff_bounds
|
|
if hess in FD_METHODS:
|
|
finite_diff_options["method"] = hess
|
|
finite_diff_options["rel_step"] = finite_diff_rel_step
|
|
finite_diff_options["abs_step"] = epsilon
|
|
finite_diff_options["as_linear_operator"] = True
|
|
|
|
# Function evaluation
|
|
def fun_wrapped(x):
|
|
self.nfev += 1
|
|
# Send a copy because the user may overwrite it.
|
|
# Overwriting results in undefined behaviour because
|
|
# fun(self.x) will change self.x, with the two no longer linked.
|
|
fx = fun(np.copy(x), *args)
|
|
# Make sure the function returns a true scalar
|
|
if not np.isscalar(fx):
|
|
try:
|
|
fx = np.asarray(fx).item()
|
|
except (TypeError, ValueError) as e:
|
|
raise ValueError(
|
|
"The user-provided objective function "
|
|
"must return a scalar value."
|
|
) from e
|
|
|
|
if fx < self._lowest_f:
|
|
self._lowest_x = x
|
|
self._lowest_f = fx
|
|
|
|
return fx
|
|
|
|
def update_fun():
|
|
self.f = fun_wrapped(self.x)
|
|
|
|
self._update_fun_impl = update_fun
|
|
self._update_fun()
|
|
|
|
# Gradient evaluation
|
|
if callable(grad):
|
|
def grad_wrapped(x):
|
|
self.ngev += 1
|
|
return np.atleast_1d(grad(np.copy(x), *args))
|
|
|
|
def update_grad():
|
|
self.g = grad_wrapped(self.x)
|
|
|
|
elif grad in FD_METHODS:
|
|
def update_grad():
|
|
self._update_fun()
|
|
self.ngev += 1
|
|
self.g = approx_derivative(fun_wrapped, self.x, f0=self.f,
|
|
**finite_diff_options)
|
|
|
|
self._update_grad_impl = update_grad
|
|
self._update_grad()
|
|
|
|
# Hessian Evaluation
|
|
if callable(hess):
|
|
self.H = hess(np.copy(x0), *args)
|
|
self.H_updated = True
|
|
self.nhev += 1
|
|
|
|
if sps.issparse(self.H):
|
|
def hess_wrapped(x):
|
|
self.nhev += 1
|
|
return sps.csr_matrix(hess(np.copy(x), *args))
|
|
self.H = sps.csr_matrix(self.H)
|
|
|
|
elif isinstance(self.H, LinearOperator):
|
|
def hess_wrapped(x):
|
|
self.nhev += 1
|
|
return hess(np.copy(x), *args)
|
|
|
|
else:
|
|
def hess_wrapped(x):
|
|
self.nhev += 1
|
|
return np.atleast_2d(np.asarray(hess(np.copy(x), *args)))
|
|
self.H = np.atleast_2d(np.asarray(self.H))
|
|
|
|
def update_hess():
|
|
self.H = hess_wrapped(self.x)
|
|
|
|
elif hess in FD_METHODS:
|
|
def update_hess():
|
|
self._update_grad()
|
|
self.H = approx_derivative(grad_wrapped, self.x, f0=self.g,
|
|
**finite_diff_options)
|
|
return self.H
|
|
|
|
update_hess()
|
|
self.H_updated = True
|
|
elif isinstance(hess, HessianUpdateStrategy):
|
|
self.H = hess
|
|
self.H.initialize(self.n, 'hess')
|
|
self.H_updated = True
|
|
self.x_prev = None
|
|
self.g_prev = None
|
|
|
|
def update_hess():
|
|
self._update_grad()
|
|
self.H.update(self.x - self.x_prev, self.g - self.g_prev)
|
|
|
|
self._update_hess_impl = update_hess
|
|
|
|
if isinstance(hess, HessianUpdateStrategy):
|
|
def update_x(x):
|
|
self._update_grad()
|
|
self.x_prev = self.x
|
|
self.g_prev = self.g
|
|
# ensure that self.x is a copy of x. Don't store a reference
|
|
# otherwise the memoization doesn't work properly.
|
|
self.x = np.atleast_1d(x).astype(float)
|
|
self.f_updated = False
|
|
self.g_updated = False
|
|
self.H_updated = False
|
|
self._update_hess()
|
|
else:
|
|
def update_x(x):
|
|
# ensure that self.x is a copy of x. Don't store a reference
|
|
# otherwise the memoization doesn't work properly.
|
|
self.x = np.atleast_1d(x).astype(float)
|
|
self.f_updated = False
|
|
self.g_updated = False
|
|
self.H_updated = False
|
|
self._update_x_impl = update_x
|
|
|
|
def _update_fun(self):
|
|
if not self.f_updated:
|
|
self._update_fun_impl()
|
|
self.f_updated = True
|
|
|
|
def _update_grad(self):
|
|
if not self.g_updated:
|
|
self._update_grad_impl()
|
|
self.g_updated = True
|
|
|
|
def _update_hess(self):
|
|
if not self.H_updated:
|
|
self._update_hess_impl()
|
|
self.H_updated = True
|
|
|
|
def fun(self, x):
|
|
if not np.array_equal(x, self.x):
|
|
self._update_x_impl(x)
|
|
self._update_fun()
|
|
return self.f
|
|
|
|
def grad(self, x):
|
|
if not np.array_equal(x, self.x):
|
|
self._update_x_impl(x)
|
|
self._update_grad()
|
|
return self.g
|
|
|
|
def hess(self, x):
|
|
if not np.array_equal(x, self.x):
|
|
self._update_x_impl(x)
|
|
self._update_hess()
|
|
return self.H
|
|
|
|
def fun_and_grad(self, x):
|
|
if not np.array_equal(x, self.x):
|
|
self._update_x_impl(x)
|
|
self._update_fun()
|
|
self._update_grad()
|
|
return self.f, self.g
|
|
|
|
|
|
class VectorFunction:
|
|
"""Vector function and its derivatives.
|
|
|
|
This class defines a vector function F: R^n->R^m and methods for
|
|
computing or approximating its first and second derivatives.
|
|
|
|
Notes
|
|
-----
|
|
This class implements a memoization logic. There are methods `fun`,
|
|
`jac`, hess` and corresponding attributes `f`, `J` and `H`. The following
|
|
things should be considered:
|
|
|
|
1. Use only public methods `fun`, `jac` and `hess`.
|
|
2. After one of the methods is called, the corresponding attribute
|
|
will be set. However, a subsequent call with a different argument
|
|
of *any* of the methods may overwrite the attribute.
|
|
"""
|
|
def __init__(self, fun, x0, jac, hess,
|
|
finite_diff_rel_step, finite_diff_jac_sparsity,
|
|
finite_diff_bounds, sparse_jacobian):
|
|
if not callable(jac) and jac not in FD_METHODS:
|
|
raise ValueError("`jac` must be either callable or one of {}."
|
|
.format(FD_METHODS))
|
|
|
|
if not (callable(hess) or hess in FD_METHODS
|
|
or isinstance(hess, HessianUpdateStrategy)):
|
|
raise ValueError("`hess` must be either callable,"
|
|
"HessianUpdateStrategy or one of {}."
|
|
.format(FD_METHODS))
|
|
|
|
if jac in FD_METHODS and hess in FD_METHODS:
|
|
raise ValueError("Whenever the Jacobian is estimated via "
|
|
"finite-differences, we require the Hessian to "
|
|
"be estimated using one of the quasi-Newton "
|
|
"strategies.")
|
|
|
|
self.x = np.atleast_1d(x0).astype(float)
|
|
self.n = self.x.size
|
|
self.nfev = 0
|
|
self.njev = 0
|
|
self.nhev = 0
|
|
self.f_updated = False
|
|
self.J_updated = False
|
|
self.H_updated = False
|
|
|
|
finite_diff_options = {}
|
|
if jac in FD_METHODS:
|
|
finite_diff_options["method"] = jac
|
|
finite_diff_options["rel_step"] = finite_diff_rel_step
|
|
if finite_diff_jac_sparsity is not None:
|
|
sparsity_groups = group_columns(finite_diff_jac_sparsity)
|
|
finite_diff_options["sparsity"] = (finite_diff_jac_sparsity,
|
|
sparsity_groups)
|
|
finite_diff_options["bounds"] = finite_diff_bounds
|
|
self.x_diff = np.copy(self.x)
|
|
if hess in FD_METHODS:
|
|
finite_diff_options["method"] = hess
|
|
finite_diff_options["rel_step"] = finite_diff_rel_step
|
|
finite_diff_options["as_linear_operator"] = True
|
|
self.x_diff = np.copy(self.x)
|
|
if jac in FD_METHODS and hess in FD_METHODS:
|
|
raise ValueError("Whenever the Jacobian is estimated via "
|
|
"finite-differences, we require the Hessian to "
|
|
"be estimated using one of the quasi-Newton "
|
|
"strategies.")
|
|
|
|
# Function evaluation
|
|
def fun_wrapped(x):
|
|
self.nfev += 1
|
|
return np.atleast_1d(fun(x))
|
|
|
|
def update_fun():
|
|
self.f = fun_wrapped(self.x)
|
|
|
|
self._update_fun_impl = update_fun
|
|
update_fun()
|
|
|
|
self.v = np.zeros_like(self.f)
|
|
self.m = self.v.size
|
|
|
|
# Jacobian Evaluation
|
|
if callable(jac):
|
|
self.J = jac(self.x)
|
|
self.J_updated = True
|
|
self.njev += 1
|
|
|
|
if (sparse_jacobian or
|
|
sparse_jacobian is None and sps.issparse(self.J)):
|
|
def jac_wrapped(x):
|
|
self.njev += 1
|
|
return sps.csr_matrix(jac(x))
|
|
self.J = sps.csr_matrix(self.J)
|
|
self.sparse_jacobian = True
|
|
|
|
elif sps.issparse(self.J):
|
|
def jac_wrapped(x):
|
|
self.njev += 1
|
|
return jac(x).toarray()
|
|
self.J = self.J.toarray()
|
|
self.sparse_jacobian = False
|
|
|
|
else:
|
|
def jac_wrapped(x):
|
|
self.njev += 1
|
|
return np.atleast_2d(jac(x))
|
|
self.J = np.atleast_2d(self.J)
|
|
self.sparse_jacobian = False
|
|
|
|
def update_jac():
|
|
self.J = jac_wrapped(self.x)
|
|
|
|
elif jac in FD_METHODS:
|
|
self.J = approx_derivative(fun_wrapped, self.x, f0=self.f,
|
|
**finite_diff_options)
|
|
self.J_updated = True
|
|
|
|
if (sparse_jacobian or
|
|
sparse_jacobian is None and sps.issparse(self.J)):
|
|
def update_jac():
|
|
self._update_fun()
|
|
self.J = sps.csr_matrix(
|
|
approx_derivative(fun_wrapped, self.x, f0=self.f,
|
|
**finite_diff_options))
|
|
self.J = sps.csr_matrix(self.J)
|
|
self.sparse_jacobian = True
|
|
|
|
elif sps.issparse(self.J):
|
|
def update_jac():
|
|
self._update_fun()
|
|
self.J = approx_derivative(fun_wrapped, self.x, f0=self.f,
|
|
**finite_diff_options).toarray()
|
|
self.J = self.J.toarray()
|
|
self.sparse_jacobian = False
|
|
|
|
else:
|
|
def update_jac():
|
|
self._update_fun()
|
|
self.J = np.atleast_2d(
|
|
approx_derivative(fun_wrapped, self.x, f0=self.f,
|
|
**finite_diff_options))
|
|
self.J = np.atleast_2d(self.J)
|
|
self.sparse_jacobian = False
|
|
|
|
self._update_jac_impl = update_jac
|
|
|
|
# Define Hessian
|
|
if callable(hess):
|
|
self.H = hess(self.x, self.v)
|
|
self.H_updated = True
|
|
self.nhev += 1
|
|
|
|
if sps.issparse(self.H):
|
|
def hess_wrapped(x, v):
|
|
self.nhev += 1
|
|
return sps.csr_matrix(hess(x, v))
|
|
self.H = sps.csr_matrix(self.H)
|
|
|
|
elif isinstance(self.H, LinearOperator):
|
|
def hess_wrapped(x, v):
|
|
self.nhev += 1
|
|
return hess(x, v)
|
|
|
|
else:
|
|
def hess_wrapped(x, v):
|
|
self.nhev += 1
|
|
return np.atleast_2d(np.asarray(hess(x, v)))
|
|
self.H = np.atleast_2d(np.asarray(self.H))
|
|
|
|
def update_hess():
|
|
self.H = hess_wrapped(self.x, self.v)
|
|
elif hess in FD_METHODS:
|
|
def jac_dot_v(x, v):
|
|
return jac_wrapped(x).T.dot(v)
|
|
|
|
def update_hess():
|
|
self._update_jac()
|
|
self.H = approx_derivative(jac_dot_v, self.x,
|
|
f0=self.J.T.dot(self.v),
|
|
args=(self.v,),
|
|
**finite_diff_options)
|
|
update_hess()
|
|
self.H_updated = True
|
|
elif isinstance(hess, HessianUpdateStrategy):
|
|
self.H = hess
|
|
self.H.initialize(self.n, 'hess')
|
|
self.H_updated = True
|
|
self.x_prev = None
|
|
self.J_prev = None
|
|
|
|
def update_hess():
|
|
self._update_jac()
|
|
# When v is updated before x was updated, then x_prev and
|
|
# J_prev are None and we need this check.
|
|
if self.x_prev is not None and self.J_prev is not None:
|
|
delta_x = self.x - self.x_prev
|
|
delta_g = self.J.T.dot(self.v) - self.J_prev.T.dot(self.v)
|
|
self.H.update(delta_x, delta_g)
|
|
|
|
self._update_hess_impl = update_hess
|
|
|
|
if isinstance(hess, HessianUpdateStrategy):
|
|
def update_x(x):
|
|
self._update_jac()
|
|
self.x_prev = self.x
|
|
self.J_prev = self.J
|
|
self.x = np.atleast_1d(x).astype(float)
|
|
self.f_updated = False
|
|
self.J_updated = False
|
|
self.H_updated = False
|
|
self._update_hess()
|
|
else:
|
|
def update_x(x):
|
|
self.x = np.atleast_1d(x).astype(float)
|
|
self.f_updated = False
|
|
self.J_updated = False
|
|
self.H_updated = False
|
|
|
|
self._update_x_impl = update_x
|
|
|
|
def _update_v(self, v):
|
|
if not np.array_equal(v, self.v):
|
|
self.v = v
|
|
self.H_updated = False
|
|
|
|
def _update_x(self, x):
|
|
if not np.array_equal(x, self.x):
|
|
self._update_x_impl(x)
|
|
|
|
def _update_fun(self):
|
|
if not self.f_updated:
|
|
self._update_fun_impl()
|
|
self.f_updated = True
|
|
|
|
def _update_jac(self):
|
|
if not self.J_updated:
|
|
self._update_jac_impl()
|
|
self.J_updated = True
|
|
|
|
def _update_hess(self):
|
|
if not self.H_updated:
|
|
self._update_hess_impl()
|
|
self.H_updated = True
|
|
|
|
def fun(self, x):
|
|
self._update_x(x)
|
|
self._update_fun()
|
|
return self.f
|
|
|
|
def jac(self, x):
|
|
self._update_x(x)
|
|
self._update_jac()
|
|
return self.J
|
|
|
|
def hess(self, x, v):
|
|
# v should be updated before x.
|
|
self._update_v(v)
|
|
self._update_x(x)
|
|
self._update_hess()
|
|
return self.H
|
|
|
|
|
|
class LinearVectorFunction:
|
|
"""Linear vector function and its derivatives.
|
|
|
|
Defines a linear function F = A x, where x is N-D vector and
|
|
A is m-by-n matrix. The Jacobian is constant and equals to A. The Hessian
|
|
is identically zero and it is returned as a csr matrix.
|
|
"""
|
|
def __init__(self, A, x0, sparse_jacobian):
|
|
if sparse_jacobian or sparse_jacobian is None and sps.issparse(A):
|
|
self.J = sps.csr_matrix(A)
|
|
self.sparse_jacobian = True
|
|
elif sps.issparse(A):
|
|
self.J = A.toarray()
|
|
self.sparse_jacobian = False
|
|
else:
|
|
# np.asarray makes sure A is ndarray and not matrix
|
|
self.J = np.atleast_2d(np.asarray(A))
|
|
self.sparse_jacobian = False
|
|
|
|
self.m, self.n = self.J.shape
|
|
|
|
self.x = np.atleast_1d(x0).astype(float)
|
|
self.f = self.J.dot(self.x)
|
|
self.f_updated = True
|
|
|
|
self.v = np.zeros(self.m, dtype=float)
|
|
self.H = sps.csr_matrix((self.n, self.n))
|
|
|
|
def _update_x(self, x):
|
|
if not np.array_equal(x, self.x):
|
|
self.x = np.atleast_1d(x).astype(float)
|
|
self.f_updated = False
|
|
|
|
def fun(self, x):
|
|
self._update_x(x)
|
|
if not self.f_updated:
|
|
self.f = self.J.dot(x)
|
|
self.f_updated = True
|
|
return self.f
|
|
|
|
def jac(self, x):
|
|
self._update_x(x)
|
|
return self.J
|
|
|
|
def hess(self, x, v):
|
|
self._update_x(x)
|
|
self.v = v
|
|
return self.H
|
|
|
|
|
|
class IdentityVectorFunction(LinearVectorFunction):
|
|
"""Identity vector function and its derivatives.
|
|
|
|
The Jacobian is the identity matrix, returned as a dense array when
|
|
`sparse_jacobian=False` and as a csr matrix otherwise. The Hessian is
|
|
identically zero and it is returned as a csr matrix.
|
|
"""
|
|
def __init__(self, x0, sparse_jacobian):
|
|
n = len(x0)
|
|
if sparse_jacobian or sparse_jacobian is None:
|
|
A = sps.eye(n, format='csr')
|
|
sparse_jacobian = True
|
|
else:
|
|
A = np.eye(n)
|
|
sparse_jacobian = False
|
|
super().__init__(A, x0, sparse_jacobian)
|