1122 lines
37 KiB
Python
1122 lines
37 KiB
Python
"""Orthogonal matching pursuit algorithms"""
|
|
|
|
# Author: Vlad Niculae
|
|
#
|
|
# License: BSD 3 clause
|
|
|
|
import warnings
|
|
from math import sqrt
|
|
from numbers import Integral, Real
|
|
|
|
import numpy as np
|
|
from scipy import linalg
|
|
from scipy.linalg.lapack import get_lapack_funcs
|
|
|
|
from ..base import MultiOutputMixin, RegressorMixin, _fit_context
|
|
from ..model_selection import check_cv
|
|
from ..utils import Bunch, as_float_array, check_array
|
|
from ..utils._param_validation import Interval, StrOptions, validate_params
|
|
from ..utils.metadata_routing import (
|
|
MetadataRouter,
|
|
MethodMapping,
|
|
_raise_for_params,
|
|
_routing_enabled,
|
|
process_routing,
|
|
)
|
|
from ..utils.parallel import Parallel, delayed
|
|
from ._base import LinearModel, _pre_fit
|
|
|
|
premature = (
|
|
"Orthogonal matching pursuit ended prematurely due to linear"
|
|
" dependence in the dictionary. The requested precision might"
|
|
" not have been met."
|
|
)
|
|
|
|
|
|
def _cholesky_omp(X, y, n_nonzero_coefs, tol=None, copy_X=True, return_path=False):
|
|
"""Orthogonal Matching Pursuit step using the Cholesky decomposition.
|
|
|
|
Parameters
|
|
----------
|
|
X : ndarray of shape (n_samples, n_features)
|
|
Input dictionary. Columns are assumed to have unit norm.
|
|
|
|
y : ndarray of shape (n_samples,)
|
|
Input targets.
|
|
|
|
n_nonzero_coefs : int
|
|
Targeted number of non-zero elements.
|
|
|
|
tol : float, default=None
|
|
Targeted squared error, if not None overrides n_nonzero_coefs.
|
|
|
|
copy_X : bool, default=True
|
|
Whether the design matrix X must be copied by the algorithm. A false
|
|
value is only helpful if X is already Fortran-ordered, otherwise a
|
|
copy is made anyway.
|
|
|
|
return_path : bool, default=False
|
|
Whether to return every value of the nonzero coefficients along the
|
|
forward path. Useful for cross-validation.
|
|
|
|
Returns
|
|
-------
|
|
gamma : ndarray of shape (n_nonzero_coefs,)
|
|
Non-zero elements of the solution.
|
|
|
|
idx : ndarray of shape (n_nonzero_coefs,)
|
|
Indices of the positions of the elements in gamma within the solution
|
|
vector.
|
|
|
|
coef : ndarray of shape (n_features, n_nonzero_coefs)
|
|
The first k values of column k correspond to the coefficient value
|
|
for the active features at that step. The lower left triangle contains
|
|
garbage. Only returned if ``return_path=True``.
|
|
|
|
n_active : int
|
|
Number of active features at convergence.
|
|
"""
|
|
if copy_X:
|
|
X = X.copy("F")
|
|
else: # even if we are allowed to overwrite, still copy it if bad order
|
|
X = np.asfortranarray(X)
|
|
|
|
min_float = np.finfo(X.dtype).eps
|
|
nrm2, swap = linalg.get_blas_funcs(("nrm2", "swap"), (X,))
|
|
(potrs,) = get_lapack_funcs(("potrs",), (X,))
|
|
|
|
alpha = np.dot(X.T, y)
|
|
residual = y
|
|
gamma = np.empty(0)
|
|
n_active = 0
|
|
indices = np.arange(X.shape[1]) # keeping track of swapping
|
|
|
|
max_features = X.shape[1] if tol is not None else n_nonzero_coefs
|
|
|
|
L = np.empty((max_features, max_features), dtype=X.dtype)
|
|
|
|
if return_path:
|
|
coefs = np.empty_like(L)
|
|
|
|
while True:
|
|
lam = np.argmax(np.abs(np.dot(X.T, residual)))
|
|
if lam < n_active or alpha[lam] ** 2 < min_float:
|
|
# atom already selected or inner product too small
|
|
warnings.warn(premature, RuntimeWarning, stacklevel=2)
|
|
break
|
|
|
|
if n_active > 0:
|
|
# Updates the Cholesky decomposition of X' X
|
|
L[n_active, :n_active] = np.dot(X[:, :n_active].T, X[:, lam])
|
|
linalg.solve_triangular(
|
|
L[:n_active, :n_active],
|
|
L[n_active, :n_active],
|
|
trans=0,
|
|
lower=1,
|
|
overwrite_b=True,
|
|
check_finite=False,
|
|
)
|
|
v = nrm2(L[n_active, :n_active]) ** 2
|
|
Lkk = linalg.norm(X[:, lam]) ** 2 - v
|
|
if Lkk <= min_float: # selected atoms are dependent
|
|
warnings.warn(premature, RuntimeWarning, stacklevel=2)
|
|
break
|
|
L[n_active, n_active] = sqrt(Lkk)
|
|
else:
|
|
L[0, 0] = linalg.norm(X[:, lam])
|
|
|
|
X.T[n_active], X.T[lam] = swap(X.T[n_active], X.T[lam])
|
|
alpha[n_active], alpha[lam] = alpha[lam], alpha[n_active]
|
|
indices[n_active], indices[lam] = indices[lam], indices[n_active]
|
|
n_active += 1
|
|
|
|
# solves LL'x = X'y as a composition of two triangular systems
|
|
gamma, _ = potrs(
|
|
L[:n_active, :n_active], alpha[:n_active], lower=True, overwrite_b=False
|
|
)
|
|
|
|
if return_path:
|
|
coefs[:n_active, n_active - 1] = gamma
|
|
residual = y - np.dot(X[:, :n_active], gamma)
|
|
if tol is not None and nrm2(residual) ** 2 <= tol:
|
|
break
|
|
elif n_active == max_features:
|
|
break
|
|
|
|
if return_path:
|
|
return gamma, indices[:n_active], coefs[:, :n_active], n_active
|
|
else:
|
|
return gamma, indices[:n_active], n_active
|
|
|
|
|
|
def _gram_omp(
|
|
Gram,
|
|
Xy,
|
|
n_nonzero_coefs,
|
|
tol_0=None,
|
|
tol=None,
|
|
copy_Gram=True,
|
|
copy_Xy=True,
|
|
return_path=False,
|
|
):
|
|
"""Orthogonal Matching Pursuit step on a precomputed Gram matrix.
|
|
|
|
This function uses the Cholesky decomposition method.
|
|
|
|
Parameters
|
|
----------
|
|
Gram : ndarray of shape (n_features, n_features)
|
|
Gram matrix of the input data matrix.
|
|
|
|
Xy : ndarray of shape (n_features,)
|
|
Input targets.
|
|
|
|
n_nonzero_coefs : int
|
|
Targeted number of non-zero elements.
|
|
|
|
tol_0 : float, default=None
|
|
Squared norm of y, required if tol is not None.
|
|
|
|
tol : float, default=None
|
|
Targeted squared error, if not None overrides n_nonzero_coefs.
|
|
|
|
copy_Gram : bool, default=True
|
|
Whether the gram matrix must be copied by the algorithm. A false
|
|
value is only helpful if it is already Fortran-ordered, otherwise a
|
|
copy is made anyway.
|
|
|
|
copy_Xy : bool, default=True
|
|
Whether the covariance vector Xy must be copied by the algorithm.
|
|
If False, it may be overwritten.
|
|
|
|
return_path : bool, default=False
|
|
Whether to return every value of the nonzero coefficients along the
|
|
forward path. Useful for cross-validation.
|
|
|
|
Returns
|
|
-------
|
|
gamma : ndarray of shape (n_nonzero_coefs,)
|
|
Non-zero elements of the solution.
|
|
|
|
idx : ndarray of shape (n_nonzero_coefs,)
|
|
Indices of the positions of the elements in gamma within the solution
|
|
vector.
|
|
|
|
coefs : ndarray of shape (n_features, n_nonzero_coefs)
|
|
The first k values of column k correspond to the coefficient value
|
|
for the active features at that step. The lower left triangle contains
|
|
garbage. Only returned if ``return_path=True``.
|
|
|
|
n_active : int
|
|
Number of active features at convergence.
|
|
"""
|
|
Gram = Gram.copy("F") if copy_Gram else np.asfortranarray(Gram)
|
|
|
|
if copy_Xy or not Xy.flags.writeable:
|
|
Xy = Xy.copy()
|
|
|
|
min_float = np.finfo(Gram.dtype).eps
|
|
nrm2, swap = linalg.get_blas_funcs(("nrm2", "swap"), (Gram,))
|
|
(potrs,) = get_lapack_funcs(("potrs",), (Gram,))
|
|
|
|
indices = np.arange(len(Gram)) # keeping track of swapping
|
|
alpha = Xy
|
|
tol_curr = tol_0
|
|
delta = 0
|
|
gamma = np.empty(0)
|
|
n_active = 0
|
|
|
|
max_features = len(Gram) if tol is not None else n_nonzero_coefs
|
|
|
|
L = np.empty((max_features, max_features), dtype=Gram.dtype)
|
|
|
|
L[0, 0] = 1.0
|
|
if return_path:
|
|
coefs = np.empty_like(L)
|
|
|
|
while True:
|
|
lam = np.argmax(np.abs(alpha))
|
|
if lam < n_active or alpha[lam] ** 2 < min_float:
|
|
# selected same atom twice, or inner product too small
|
|
warnings.warn(premature, RuntimeWarning, stacklevel=3)
|
|
break
|
|
if n_active > 0:
|
|
L[n_active, :n_active] = Gram[lam, :n_active]
|
|
linalg.solve_triangular(
|
|
L[:n_active, :n_active],
|
|
L[n_active, :n_active],
|
|
trans=0,
|
|
lower=1,
|
|
overwrite_b=True,
|
|
check_finite=False,
|
|
)
|
|
v = nrm2(L[n_active, :n_active]) ** 2
|
|
Lkk = Gram[lam, lam] - v
|
|
if Lkk <= min_float: # selected atoms are dependent
|
|
warnings.warn(premature, RuntimeWarning, stacklevel=3)
|
|
break
|
|
L[n_active, n_active] = sqrt(Lkk)
|
|
else:
|
|
L[0, 0] = sqrt(Gram[lam, lam])
|
|
|
|
Gram[n_active], Gram[lam] = swap(Gram[n_active], Gram[lam])
|
|
Gram.T[n_active], Gram.T[lam] = swap(Gram.T[n_active], Gram.T[lam])
|
|
indices[n_active], indices[lam] = indices[lam], indices[n_active]
|
|
Xy[n_active], Xy[lam] = Xy[lam], Xy[n_active]
|
|
n_active += 1
|
|
# solves LL'x = X'y as a composition of two triangular systems
|
|
gamma, _ = potrs(
|
|
L[:n_active, :n_active], Xy[:n_active], lower=True, overwrite_b=False
|
|
)
|
|
if return_path:
|
|
coefs[:n_active, n_active - 1] = gamma
|
|
beta = np.dot(Gram[:, :n_active], gamma)
|
|
alpha = Xy - beta
|
|
if tol is not None:
|
|
tol_curr += delta
|
|
delta = np.inner(gamma, beta[:n_active])
|
|
tol_curr -= delta
|
|
if abs(tol_curr) <= tol:
|
|
break
|
|
elif n_active == max_features:
|
|
break
|
|
|
|
if return_path:
|
|
return gamma, indices[:n_active], coefs[:, :n_active], n_active
|
|
else:
|
|
return gamma, indices[:n_active], n_active
|
|
|
|
|
|
@validate_params(
|
|
{
|
|
"X": ["array-like"],
|
|
"y": [np.ndarray],
|
|
"n_nonzero_coefs": [Interval(Integral, 1, None, closed="left"), None],
|
|
"tol": [Interval(Real, 0, None, closed="left"), None],
|
|
"precompute": ["boolean", StrOptions({"auto"})],
|
|
"copy_X": ["boolean"],
|
|
"return_path": ["boolean"],
|
|
"return_n_iter": ["boolean"],
|
|
},
|
|
prefer_skip_nested_validation=True,
|
|
)
|
|
def orthogonal_mp(
|
|
X,
|
|
y,
|
|
*,
|
|
n_nonzero_coefs=None,
|
|
tol=None,
|
|
precompute=False,
|
|
copy_X=True,
|
|
return_path=False,
|
|
return_n_iter=False,
|
|
):
|
|
r"""Orthogonal Matching Pursuit (OMP).
|
|
|
|
Solves n_targets Orthogonal Matching Pursuit problems.
|
|
An instance of the problem has the form:
|
|
|
|
When parametrized by the number of non-zero coefficients using
|
|
`n_nonzero_coefs`:
|
|
argmin ||y - X\gamma||^2 subject to ||\gamma||_0 <= n_{nonzero coefs}
|
|
|
|
When parametrized by error using the parameter `tol`:
|
|
argmin ||\gamma||_0 subject to ||y - X\gamma||^2 <= tol
|
|
|
|
Read more in the :ref:`User Guide <omp>`.
|
|
|
|
Parameters
|
|
----------
|
|
X : array-like of shape (n_samples, n_features)
|
|
Input data. Columns are assumed to have unit norm.
|
|
|
|
y : ndarray of shape (n_samples,) or (n_samples, n_targets)
|
|
Input targets.
|
|
|
|
n_nonzero_coefs : int, default=None
|
|
Desired number of non-zero entries in the solution. If None (by
|
|
default) this value is set to 10% of n_features.
|
|
|
|
tol : float, default=None
|
|
Maximum squared norm of the residual. If not None, overrides n_nonzero_coefs.
|
|
|
|
precompute : 'auto' or bool, default=False
|
|
Whether to perform precomputations. Improves performance when n_targets
|
|
or n_samples is very large.
|
|
|
|
copy_X : bool, default=True
|
|
Whether the design matrix X must be copied by the algorithm. A false
|
|
value is only helpful if X is already Fortran-ordered, otherwise a
|
|
copy is made anyway.
|
|
|
|
return_path : bool, default=False
|
|
Whether to return every value of the nonzero coefficients along the
|
|
forward path. Useful for cross-validation.
|
|
|
|
return_n_iter : bool, default=False
|
|
Whether or not to return the number of iterations.
|
|
|
|
Returns
|
|
-------
|
|
coef : ndarray of shape (n_features,) or (n_features, n_targets)
|
|
Coefficients of the OMP solution. If `return_path=True`, this contains
|
|
the whole coefficient path. In this case its shape is
|
|
(n_features, n_features) or (n_features, n_targets, n_features) and
|
|
iterating over the last axis generates coefficients in increasing order
|
|
of active features.
|
|
|
|
n_iters : array-like or int
|
|
Number of active features across every target. Returned only if
|
|
`return_n_iter` is set to True.
|
|
|
|
See Also
|
|
--------
|
|
OrthogonalMatchingPursuit : Orthogonal Matching Pursuit model.
|
|
orthogonal_mp_gram : Solve OMP problems using Gram matrix and the product X.T * y.
|
|
lars_path : Compute Least Angle Regression or Lasso path using LARS algorithm.
|
|
sklearn.decomposition.sparse_encode : Sparse coding.
|
|
|
|
Notes
|
|
-----
|
|
Orthogonal matching pursuit was introduced in S. Mallat, Z. Zhang,
|
|
Matching pursuits with time-frequency dictionaries, IEEE Transactions on
|
|
Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.
|
|
(https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf)
|
|
|
|
This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,
|
|
M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal
|
|
Matching Pursuit Technical Report - CS Technion, April 2008.
|
|
https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.datasets import make_regression
|
|
>>> from sklearn.linear_model import orthogonal_mp
|
|
>>> X, y = make_regression(noise=4, random_state=0)
|
|
>>> coef = orthogonal_mp(X, y)
|
|
>>> coef.shape
|
|
(100,)
|
|
>>> X[:1,] @ coef
|
|
array([-78.68...])
|
|
"""
|
|
X = check_array(X, order="F", copy=copy_X)
|
|
copy_X = False
|
|
if y.ndim == 1:
|
|
y = y.reshape(-1, 1)
|
|
y = check_array(y)
|
|
if y.shape[1] > 1: # subsequent targets will be affected
|
|
copy_X = True
|
|
if n_nonzero_coefs is None and tol is None:
|
|
# default for n_nonzero_coefs is 0.1 * n_features
|
|
# but at least one.
|
|
n_nonzero_coefs = max(int(0.1 * X.shape[1]), 1)
|
|
if tol is None and n_nonzero_coefs > X.shape[1]:
|
|
raise ValueError(
|
|
"The number of atoms cannot be more than the number of features"
|
|
)
|
|
if precompute == "auto":
|
|
precompute = X.shape[0] > X.shape[1]
|
|
if precompute:
|
|
G = np.dot(X.T, X)
|
|
G = np.asfortranarray(G)
|
|
Xy = np.dot(X.T, y)
|
|
if tol is not None:
|
|
norms_squared = np.sum((y**2), axis=0)
|
|
else:
|
|
norms_squared = None
|
|
return orthogonal_mp_gram(
|
|
G,
|
|
Xy,
|
|
n_nonzero_coefs=n_nonzero_coefs,
|
|
tol=tol,
|
|
norms_squared=norms_squared,
|
|
copy_Gram=copy_X,
|
|
copy_Xy=False,
|
|
return_path=return_path,
|
|
)
|
|
|
|
if return_path:
|
|
coef = np.zeros((X.shape[1], y.shape[1], X.shape[1]))
|
|
else:
|
|
coef = np.zeros((X.shape[1], y.shape[1]))
|
|
n_iters = []
|
|
|
|
for k in range(y.shape[1]):
|
|
out = _cholesky_omp(
|
|
X, y[:, k], n_nonzero_coefs, tol, copy_X=copy_X, return_path=return_path
|
|
)
|
|
if return_path:
|
|
_, idx, coefs, n_iter = out
|
|
coef = coef[:, :, : len(idx)]
|
|
for n_active, x in enumerate(coefs.T):
|
|
coef[idx[: n_active + 1], k, n_active] = x[: n_active + 1]
|
|
else:
|
|
x, idx, n_iter = out
|
|
coef[idx, k] = x
|
|
n_iters.append(n_iter)
|
|
|
|
if y.shape[1] == 1:
|
|
n_iters = n_iters[0]
|
|
|
|
if return_n_iter:
|
|
return np.squeeze(coef), n_iters
|
|
else:
|
|
return np.squeeze(coef)
|
|
|
|
|
|
@validate_params(
|
|
{
|
|
"Gram": ["array-like"],
|
|
"Xy": ["array-like"],
|
|
"n_nonzero_coefs": [Interval(Integral, 0, None, closed="neither"), None],
|
|
"tol": [Interval(Real, 0, None, closed="left"), None],
|
|
"norms_squared": ["array-like", None],
|
|
"copy_Gram": ["boolean"],
|
|
"copy_Xy": ["boolean"],
|
|
"return_path": ["boolean"],
|
|
"return_n_iter": ["boolean"],
|
|
},
|
|
prefer_skip_nested_validation=True,
|
|
)
|
|
def orthogonal_mp_gram(
|
|
Gram,
|
|
Xy,
|
|
*,
|
|
n_nonzero_coefs=None,
|
|
tol=None,
|
|
norms_squared=None,
|
|
copy_Gram=True,
|
|
copy_Xy=True,
|
|
return_path=False,
|
|
return_n_iter=False,
|
|
):
|
|
"""Gram Orthogonal Matching Pursuit (OMP).
|
|
|
|
Solves n_targets Orthogonal Matching Pursuit problems using only
|
|
the Gram matrix X.T * X and the product X.T * y.
|
|
|
|
Read more in the :ref:`User Guide <omp>`.
|
|
|
|
Parameters
|
|
----------
|
|
Gram : array-like of shape (n_features, n_features)
|
|
Gram matrix of the input data: `X.T * X`.
|
|
|
|
Xy : array-like of shape (n_features,) or (n_features, n_targets)
|
|
Input targets multiplied by `X`: `X.T * y`.
|
|
|
|
n_nonzero_coefs : int, default=None
|
|
Desired number of non-zero entries in the solution. If `None` (by
|
|
default) this value is set to 10% of n_features.
|
|
|
|
tol : float, default=None
|
|
Maximum squared norm of the residual. If not `None`,
|
|
overrides `n_nonzero_coefs`.
|
|
|
|
norms_squared : array-like of shape (n_targets,), default=None
|
|
Squared L2 norms of the lines of `y`. Required if `tol` is not None.
|
|
|
|
copy_Gram : bool, default=True
|
|
Whether the gram matrix must be copied by the algorithm. A `False`
|
|
value is only helpful if it is already Fortran-ordered, otherwise a
|
|
copy is made anyway.
|
|
|
|
copy_Xy : bool, default=True
|
|
Whether the covariance vector `Xy` must be copied by the algorithm.
|
|
If `False`, it may be overwritten.
|
|
|
|
return_path : bool, default=False
|
|
Whether to return every value of the nonzero coefficients along the
|
|
forward path. Useful for cross-validation.
|
|
|
|
return_n_iter : bool, default=False
|
|
Whether or not to return the number of iterations.
|
|
|
|
Returns
|
|
-------
|
|
coef : ndarray of shape (n_features,) or (n_features, n_targets)
|
|
Coefficients of the OMP solution. If `return_path=True`, this contains
|
|
the whole coefficient path. In this case its shape is
|
|
`(n_features, n_features)` or `(n_features, n_targets, n_features)` and
|
|
iterating over the last axis yields coefficients in increasing order
|
|
of active features.
|
|
|
|
n_iters : list or int
|
|
Number of active features across every target. Returned only if
|
|
`return_n_iter` is set to True.
|
|
|
|
See Also
|
|
--------
|
|
OrthogonalMatchingPursuit : Orthogonal Matching Pursuit model (OMP).
|
|
orthogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems.
|
|
lars_path : Compute Least Angle Regression or Lasso path using
|
|
LARS algorithm.
|
|
sklearn.decomposition.sparse_encode : Generic sparse coding.
|
|
Each column of the result is the solution to a Lasso problem.
|
|
|
|
Notes
|
|
-----
|
|
Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,
|
|
Matching pursuits with time-frequency dictionaries, IEEE Transactions on
|
|
Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.
|
|
(https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf)
|
|
|
|
This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,
|
|
M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal
|
|
Matching Pursuit Technical Report - CS Technion, April 2008.
|
|
https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.datasets import make_regression
|
|
>>> from sklearn.linear_model import orthogonal_mp_gram
|
|
>>> X, y = make_regression(noise=4, random_state=0)
|
|
>>> coef = orthogonal_mp_gram(X.T @ X, X.T @ y)
|
|
>>> coef.shape
|
|
(100,)
|
|
>>> X[:1,] @ coef
|
|
array([-78.68...])
|
|
"""
|
|
Gram = check_array(Gram, order="F", copy=copy_Gram)
|
|
Xy = np.asarray(Xy)
|
|
if Xy.ndim > 1 and Xy.shape[1] > 1:
|
|
# or subsequent target will be affected
|
|
copy_Gram = True
|
|
if Xy.ndim == 1:
|
|
Xy = Xy[:, np.newaxis]
|
|
if tol is not None:
|
|
norms_squared = [norms_squared]
|
|
if copy_Xy or not Xy.flags.writeable:
|
|
# Make the copy once instead of many times in _gram_omp itself.
|
|
Xy = Xy.copy()
|
|
|
|
if n_nonzero_coefs is None and tol is None:
|
|
n_nonzero_coefs = int(0.1 * len(Gram))
|
|
if tol is not None and norms_squared is None:
|
|
raise ValueError(
|
|
"Gram OMP needs the precomputed norms in order "
|
|
"to evaluate the error sum of squares."
|
|
)
|
|
if tol is not None and tol < 0:
|
|
raise ValueError("Epsilon cannot be negative")
|
|
if tol is None and n_nonzero_coefs <= 0:
|
|
raise ValueError("The number of atoms must be positive")
|
|
if tol is None and n_nonzero_coefs > len(Gram):
|
|
raise ValueError(
|
|
"The number of atoms cannot be more than the number of features"
|
|
)
|
|
|
|
if return_path:
|
|
coef = np.zeros((len(Gram), Xy.shape[1], len(Gram)), dtype=Gram.dtype)
|
|
else:
|
|
coef = np.zeros((len(Gram), Xy.shape[1]), dtype=Gram.dtype)
|
|
|
|
n_iters = []
|
|
for k in range(Xy.shape[1]):
|
|
out = _gram_omp(
|
|
Gram,
|
|
Xy[:, k],
|
|
n_nonzero_coefs,
|
|
norms_squared[k] if tol is not None else None,
|
|
tol,
|
|
copy_Gram=copy_Gram,
|
|
copy_Xy=False,
|
|
return_path=return_path,
|
|
)
|
|
if return_path:
|
|
_, idx, coefs, n_iter = out
|
|
coef = coef[:, :, : len(idx)]
|
|
for n_active, x in enumerate(coefs.T):
|
|
coef[idx[: n_active + 1], k, n_active] = x[: n_active + 1]
|
|
else:
|
|
x, idx, n_iter = out
|
|
coef[idx, k] = x
|
|
n_iters.append(n_iter)
|
|
|
|
if Xy.shape[1] == 1:
|
|
n_iters = n_iters[0]
|
|
|
|
if return_n_iter:
|
|
return np.squeeze(coef), n_iters
|
|
else:
|
|
return np.squeeze(coef)
|
|
|
|
|
|
class OrthogonalMatchingPursuit(MultiOutputMixin, RegressorMixin, LinearModel):
|
|
"""Orthogonal Matching Pursuit model (OMP).
|
|
|
|
Read more in the :ref:`User Guide <omp>`.
|
|
|
|
Parameters
|
|
----------
|
|
n_nonzero_coefs : int, default=None
|
|
Desired number of non-zero entries in the solution. Ignored if `tol` is set.
|
|
When `None` and `tol` is also `None`, this value is either set to 10% of
|
|
`n_features` or 1, whichever is greater.
|
|
|
|
tol : float, default=None
|
|
Maximum squared norm of the residual. If not None, overrides n_nonzero_coefs.
|
|
|
|
fit_intercept : bool, default=True
|
|
Whether to calculate the intercept for this model. If set
|
|
to false, no intercept will be used in calculations
|
|
(i.e. data is expected to be centered).
|
|
|
|
precompute : 'auto' or bool, default='auto'
|
|
Whether to use a precomputed Gram and Xy matrix to speed up
|
|
calculations. Improves performance when :term:`n_targets` or
|
|
:term:`n_samples` is very large. Note that if you already have such
|
|
matrices, you can pass them directly to the fit method.
|
|
|
|
Attributes
|
|
----------
|
|
coef_ : ndarray of shape (n_features,) or (n_targets, n_features)
|
|
Parameter vector (w in the formula).
|
|
|
|
intercept_ : float or ndarray of shape (n_targets,)
|
|
Independent term in decision function.
|
|
|
|
n_iter_ : int or array-like
|
|
Number of active features across every target.
|
|
|
|
n_nonzero_coefs_ : int or None
|
|
The number of non-zero coefficients in the solution or `None` when `tol` is
|
|
set. If `n_nonzero_coefs` is None and `tol` is None this value is either set
|
|
to 10% of `n_features` or 1, whichever is greater.
|
|
|
|
n_features_in_ : int
|
|
Number of features seen during :term:`fit`.
|
|
|
|
.. versionadded:: 0.24
|
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
Names of features seen during :term:`fit`. Defined only when `X`
|
|
has feature names that are all strings.
|
|
|
|
.. versionadded:: 1.0
|
|
|
|
See Also
|
|
--------
|
|
orthogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems.
|
|
orthogonal_mp_gram : Solves n_targets Orthogonal Matching Pursuit
|
|
problems using only the Gram matrix X.T * X and the product X.T * y.
|
|
lars_path : Compute Least Angle Regression or Lasso path using LARS algorithm.
|
|
Lars : Least Angle Regression model a.k.a. LAR.
|
|
LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.
|
|
sklearn.decomposition.sparse_encode : Generic sparse coding.
|
|
Each column of the result is the solution to a Lasso problem.
|
|
OrthogonalMatchingPursuitCV : Cross-validated
|
|
Orthogonal Matching Pursuit model (OMP).
|
|
|
|
Notes
|
|
-----
|
|
Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,
|
|
Matching pursuits with time-frequency dictionaries, IEEE Transactions on
|
|
Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.
|
|
(https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf)
|
|
|
|
This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,
|
|
M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal
|
|
Matching Pursuit Technical Report - CS Technion, April 2008.
|
|
https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.linear_model import OrthogonalMatchingPursuit
|
|
>>> from sklearn.datasets import make_regression
|
|
>>> X, y = make_regression(noise=4, random_state=0)
|
|
>>> reg = OrthogonalMatchingPursuit().fit(X, y)
|
|
>>> reg.score(X, y)
|
|
0.9991...
|
|
>>> reg.predict(X[:1,])
|
|
array([-78.3854...])
|
|
"""
|
|
|
|
_parameter_constraints: dict = {
|
|
"n_nonzero_coefs": [Interval(Integral, 1, None, closed="left"), None],
|
|
"tol": [Interval(Real, 0, None, closed="left"), None],
|
|
"fit_intercept": ["boolean"],
|
|
"precompute": [StrOptions({"auto"}), "boolean"],
|
|
}
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
n_nonzero_coefs=None,
|
|
tol=None,
|
|
fit_intercept=True,
|
|
precompute="auto",
|
|
):
|
|
self.n_nonzero_coefs = n_nonzero_coefs
|
|
self.tol = tol
|
|
self.fit_intercept = fit_intercept
|
|
self.precompute = precompute
|
|
|
|
@_fit_context(prefer_skip_nested_validation=True)
|
|
def fit(self, X, y):
|
|
"""Fit the model using X, y as training data.
|
|
|
|
Parameters
|
|
----------
|
|
X : array-like of shape (n_samples, n_features)
|
|
Training data.
|
|
|
|
y : array-like of shape (n_samples,) or (n_samples, n_targets)
|
|
Target values. Will be cast to X's dtype if necessary.
|
|
|
|
Returns
|
|
-------
|
|
self : object
|
|
Returns an instance of self.
|
|
"""
|
|
X, y = self._validate_data(X, y, multi_output=True, y_numeric=True)
|
|
n_features = X.shape[1]
|
|
|
|
X, y, X_offset, y_offset, X_scale, Gram, Xy = _pre_fit(
|
|
X, y, None, self.precompute, self.fit_intercept, copy=True
|
|
)
|
|
|
|
if y.ndim == 1:
|
|
y = y[:, np.newaxis]
|
|
|
|
if self.n_nonzero_coefs is None and self.tol is None:
|
|
# default for n_nonzero_coefs is 0.1 * n_features
|
|
# but at least one.
|
|
self.n_nonzero_coefs_ = max(int(0.1 * n_features), 1)
|
|
elif self.tol is not None:
|
|
self.n_nonzero_coefs_ = None
|
|
else:
|
|
self.n_nonzero_coefs_ = self.n_nonzero_coefs
|
|
|
|
if Gram is False:
|
|
coef_, self.n_iter_ = orthogonal_mp(
|
|
X,
|
|
y,
|
|
n_nonzero_coefs=self.n_nonzero_coefs_,
|
|
tol=self.tol,
|
|
precompute=False,
|
|
copy_X=True,
|
|
return_n_iter=True,
|
|
)
|
|
else:
|
|
norms_sq = np.sum(y**2, axis=0) if self.tol is not None else None
|
|
|
|
coef_, self.n_iter_ = orthogonal_mp_gram(
|
|
Gram,
|
|
Xy=Xy,
|
|
n_nonzero_coefs=self.n_nonzero_coefs_,
|
|
tol=self.tol,
|
|
norms_squared=norms_sq,
|
|
copy_Gram=True,
|
|
copy_Xy=True,
|
|
return_n_iter=True,
|
|
)
|
|
self.coef_ = coef_.T
|
|
self._set_intercept(X_offset, y_offset, X_scale)
|
|
return self
|
|
|
|
|
|
def _omp_path_residues(
|
|
X_train,
|
|
y_train,
|
|
X_test,
|
|
y_test,
|
|
copy=True,
|
|
fit_intercept=True,
|
|
max_iter=100,
|
|
):
|
|
"""Compute the residues on left-out data for a full LARS path.
|
|
|
|
Parameters
|
|
----------
|
|
X_train : ndarray of shape (n_samples, n_features)
|
|
The data to fit the LARS on.
|
|
|
|
y_train : ndarray of shape (n_samples)
|
|
The target variable to fit LARS on.
|
|
|
|
X_test : ndarray of shape (n_samples, n_features)
|
|
The data to compute the residues on.
|
|
|
|
y_test : ndarray of shape (n_samples)
|
|
The target variable to compute the residues on.
|
|
|
|
copy : bool, default=True
|
|
Whether X_train, X_test, y_train and y_test should be copied. If
|
|
False, they may be overwritten.
|
|
|
|
fit_intercept : bool, default=True
|
|
Whether to calculate the intercept for this model. If set
|
|
to false, no intercept will be used in calculations
|
|
(i.e. data is expected to be centered).
|
|
|
|
max_iter : int, default=100
|
|
Maximum numbers of iterations to perform, therefore maximum features
|
|
to include. 100 by default.
|
|
|
|
Returns
|
|
-------
|
|
residues : ndarray of shape (n_samples, max_features)
|
|
Residues of the prediction on the test data.
|
|
"""
|
|
|
|
if copy:
|
|
X_train = X_train.copy()
|
|
y_train = y_train.copy()
|
|
X_test = X_test.copy()
|
|
y_test = y_test.copy()
|
|
|
|
if fit_intercept:
|
|
X_mean = X_train.mean(axis=0)
|
|
X_train -= X_mean
|
|
X_test -= X_mean
|
|
y_mean = y_train.mean(axis=0)
|
|
y_train = as_float_array(y_train, copy=False)
|
|
y_train -= y_mean
|
|
y_test = as_float_array(y_test, copy=False)
|
|
y_test -= y_mean
|
|
|
|
coefs = orthogonal_mp(
|
|
X_train,
|
|
y_train,
|
|
n_nonzero_coefs=max_iter,
|
|
tol=None,
|
|
precompute=False,
|
|
copy_X=False,
|
|
return_path=True,
|
|
)
|
|
if coefs.ndim == 1:
|
|
coefs = coefs[:, np.newaxis]
|
|
|
|
return np.dot(coefs.T, X_test.T) - y_test
|
|
|
|
|
|
class OrthogonalMatchingPursuitCV(RegressorMixin, LinearModel):
|
|
"""Cross-validated Orthogonal Matching Pursuit model (OMP).
|
|
|
|
See glossary entry for :term:`cross-validation estimator`.
|
|
|
|
Read more in the :ref:`User Guide <omp>`.
|
|
|
|
Parameters
|
|
----------
|
|
copy : bool, default=True
|
|
Whether the design matrix X must be copied by the algorithm. A false
|
|
value is only helpful if X is already Fortran-ordered, otherwise a
|
|
copy is made anyway.
|
|
|
|
fit_intercept : bool, default=True
|
|
Whether to calculate the intercept for this model. If set
|
|
to false, no intercept will be used in calculations
|
|
(i.e. data is expected to be centered).
|
|
|
|
max_iter : int, default=None
|
|
Maximum numbers of iterations to perform, therefore maximum features
|
|
to include. 10% of ``n_features`` but at least 5 if available.
|
|
|
|
cv : int, cross-validation generator or iterable, default=None
|
|
Determines the cross-validation splitting strategy.
|
|
Possible inputs for cv are:
|
|
|
|
- None, to use the default 5-fold cross-validation,
|
|
- integer, to specify the number of folds.
|
|
- :term:`CV splitter`,
|
|
- An iterable yielding (train, test) splits as arrays of indices.
|
|
|
|
For integer/None inputs, :class:`~sklearn.model_selection.KFold` is used.
|
|
|
|
Refer :ref:`User Guide <cross_validation>` for the various
|
|
cross-validation strategies that can be used here.
|
|
|
|
.. versionchanged:: 0.22
|
|
``cv`` default value if None changed from 3-fold to 5-fold.
|
|
|
|
n_jobs : int, default=None
|
|
Number of CPUs to use during the cross validation.
|
|
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
|
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
|
|
for more details.
|
|
|
|
verbose : bool or int, default=False
|
|
Sets the verbosity amount.
|
|
|
|
Attributes
|
|
----------
|
|
intercept_ : float or ndarray of shape (n_targets,)
|
|
Independent term in decision function.
|
|
|
|
coef_ : ndarray of shape (n_features,) or (n_targets, n_features)
|
|
Parameter vector (w in the problem formulation).
|
|
|
|
n_nonzero_coefs_ : int
|
|
Estimated number of non-zero coefficients giving the best mean squared
|
|
error over the cross-validation folds.
|
|
|
|
n_iter_ : int or array-like
|
|
Number of active features across every target for the model refit with
|
|
the best hyperparameters got by cross-validating across all folds.
|
|
|
|
n_features_in_ : int
|
|
Number of features seen during :term:`fit`.
|
|
|
|
.. versionadded:: 0.24
|
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
Names of features seen during :term:`fit`. Defined only when `X`
|
|
has feature names that are all strings.
|
|
|
|
.. versionadded:: 1.0
|
|
|
|
See Also
|
|
--------
|
|
orthogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems.
|
|
orthogonal_mp_gram : Solves n_targets Orthogonal Matching Pursuit
|
|
problems using only the Gram matrix X.T * X and the product X.T * y.
|
|
lars_path : Compute Least Angle Regression or Lasso path using LARS algorithm.
|
|
Lars : Least Angle Regression model a.k.a. LAR.
|
|
LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.
|
|
OrthogonalMatchingPursuit : Orthogonal Matching Pursuit model (OMP).
|
|
LarsCV : Cross-validated Least Angle Regression model.
|
|
LassoLarsCV : Cross-validated Lasso model fit with Least Angle Regression.
|
|
sklearn.decomposition.sparse_encode : Generic sparse coding.
|
|
Each column of the result is the solution to a Lasso problem.
|
|
|
|
Notes
|
|
-----
|
|
In `fit`, once the optimal number of non-zero coefficients is found through
|
|
cross-validation, the model is fit again using the entire training set.
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.linear_model import OrthogonalMatchingPursuitCV
|
|
>>> from sklearn.datasets import make_regression
|
|
>>> X, y = make_regression(n_features=100, n_informative=10,
|
|
... noise=4, random_state=0)
|
|
>>> reg = OrthogonalMatchingPursuitCV(cv=5).fit(X, y)
|
|
>>> reg.score(X, y)
|
|
0.9991...
|
|
>>> reg.n_nonzero_coefs_
|
|
10
|
|
>>> reg.predict(X[:1,])
|
|
array([-78.3854...])
|
|
"""
|
|
|
|
_parameter_constraints: dict = {
|
|
"copy": ["boolean"],
|
|
"fit_intercept": ["boolean"],
|
|
"max_iter": [Interval(Integral, 0, None, closed="left"), None],
|
|
"cv": ["cv_object"],
|
|
"n_jobs": [Integral, None],
|
|
"verbose": ["verbose"],
|
|
}
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
copy=True,
|
|
fit_intercept=True,
|
|
max_iter=None,
|
|
cv=None,
|
|
n_jobs=None,
|
|
verbose=False,
|
|
):
|
|
self.copy = copy
|
|
self.fit_intercept = fit_intercept
|
|
self.max_iter = max_iter
|
|
self.cv = cv
|
|
self.n_jobs = n_jobs
|
|
self.verbose = verbose
|
|
|
|
@_fit_context(prefer_skip_nested_validation=True)
|
|
def fit(self, X, y, **fit_params):
|
|
"""Fit the model using X, y as training data.
|
|
|
|
Parameters
|
|
----------
|
|
X : array-like of shape (n_samples, n_features)
|
|
Training data.
|
|
|
|
y : array-like of shape (n_samples,)
|
|
Target values. Will be cast to X's dtype if necessary.
|
|
|
|
**fit_params : dict
|
|
Parameters to pass to the underlying splitter.
|
|
|
|
.. versionadded:: 1.4
|
|
Only available if `enable_metadata_routing=True`,
|
|
which can be set by using
|
|
``sklearn.set_config(enable_metadata_routing=True)``.
|
|
See :ref:`Metadata Routing User Guide <metadata_routing>` for
|
|
more details.
|
|
|
|
Returns
|
|
-------
|
|
self : object
|
|
Returns an instance of self.
|
|
"""
|
|
_raise_for_params(fit_params, self, "fit")
|
|
|
|
X, y = self._validate_data(X, y, y_numeric=True, ensure_min_features=2)
|
|
X = as_float_array(X, copy=False, force_all_finite=False)
|
|
cv = check_cv(self.cv, classifier=False)
|
|
if _routing_enabled():
|
|
routed_params = process_routing(self, "fit", **fit_params)
|
|
else:
|
|
# TODO(SLEP6): remove when metadata routing cannot be disabled.
|
|
routed_params = Bunch()
|
|
routed_params.splitter = Bunch(split={})
|
|
max_iter = (
|
|
min(max(int(0.1 * X.shape[1]), 5), X.shape[1])
|
|
if not self.max_iter
|
|
else self.max_iter
|
|
)
|
|
cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
|
|
delayed(_omp_path_residues)(
|
|
X[train],
|
|
y[train],
|
|
X[test],
|
|
y[test],
|
|
self.copy,
|
|
self.fit_intercept,
|
|
max_iter,
|
|
)
|
|
for train, test in cv.split(X, **routed_params.splitter.split)
|
|
)
|
|
|
|
min_early_stop = min(fold.shape[0] for fold in cv_paths)
|
|
mse_folds = np.array(
|
|
[(fold[:min_early_stop] ** 2).mean(axis=1) for fold in cv_paths]
|
|
)
|
|
best_n_nonzero_coefs = np.argmin(mse_folds.mean(axis=0)) + 1
|
|
self.n_nonzero_coefs_ = best_n_nonzero_coefs
|
|
omp = OrthogonalMatchingPursuit(
|
|
n_nonzero_coefs=best_n_nonzero_coefs,
|
|
fit_intercept=self.fit_intercept,
|
|
).fit(X, y)
|
|
|
|
self.coef_ = omp.coef_
|
|
self.intercept_ = omp.intercept_
|
|
self.n_iter_ = omp.n_iter_
|
|
return self
|
|
|
|
def get_metadata_routing(self):
|
|
"""Get metadata routing of this object.
|
|
|
|
Please check :ref:`User Guide <metadata_routing>` on how the routing
|
|
mechanism works.
|
|
|
|
.. versionadded:: 1.4
|
|
|
|
Returns
|
|
-------
|
|
routing : MetadataRouter
|
|
A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
|
|
routing information.
|
|
"""
|
|
|
|
router = MetadataRouter(owner=self.__class__.__name__).add(
|
|
splitter=self.cv,
|
|
method_mapping=MethodMapping().add(caller="fit", callee="split"),
|
|
)
|
|
return router
|