"""Orthogonal matching pursuit algorithms""" # Author: Vlad Niculae # # License: BSD 3 clause import warnings from math import sqrt from numbers import Integral, Real import numpy as np from scipy import linalg from scipy.linalg.lapack import get_lapack_funcs from ..base import MultiOutputMixin, RegressorMixin, _fit_context from ..model_selection import check_cv from ..utils import Bunch, as_float_array, check_array from ..utils._param_validation import Interval, StrOptions, validate_params from ..utils.metadata_routing import ( MetadataRouter, MethodMapping, _raise_for_params, _routing_enabled, process_routing, ) from ..utils.parallel import Parallel, delayed from ._base import LinearModel, _pre_fit premature = ( "Orthogonal matching pursuit ended prematurely due to linear" " dependence in the dictionary. The requested precision might" " not have been met." ) def _cholesky_omp(X, y, n_nonzero_coefs, tol=None, copy_X=True, return_path=False): """Orthogonal Matching Pursuit step using the Cholesky decomposition. Parameters ---------- X : ndarray of shape (n_samples, n_features) Input dictionary. Columns are assumed to have unit norm. y : ndarray of shape (n_samples,) Input targets. n_nonzero_coefs : int Targeted number of non-zero elements. tol : float, default=None Targeted squared error, if not None overrides n_nonzero_coefs. copy_X : bool, default=True Whether the design matrix X must be copied by the algorithm. A false value is only helpful if X is already Fortran-ordered, otherwise a copy is made anyway. return_path : bool, default=False Whether to return every value of the nonzero coefficients along the forward path. Useful for cross-validation. Returns ------- gamma : ndarray of shape (n_nonzero_coefs,) Non-zero elements of the solution. idx : ndarray of shape (n_nonzero_coefs,) Indices of the positions of the elements in gamma within the solution vector. coef : ndarray of shape (n_features, n_nonzero_coefs) The first k values of column k correspond to the coefficient value for the active features at that step. The lower left triangle contains garbage. Only returned if ``return_path=True``. n_active : int Number of active features at convergence. """ if copy_X: X = X.copy("F") else: # even if we are allowed to overwrite, still copy it if bad order X = np.asfortranarray(X) min_float = np.finfo(X.dtype).eps nrm2, swap = linalg.get_blas_funcs(("nrm2", "swap"), (X,)) (potrs,) = get_lapack_funcs(("potrs",), (X,)) alpha = np.dot(X.T, y) residual = y gamma = np.empty(0) n_active = 0 indices = np.arange(X.shape[1]) # keeping track of swapping max_features = X.shape[1] if tol is not None else n_nonzero_coefs L = np.empty((max_features, max_features), dtype=X.dtype) if return_path: coefs = np.empty_like(L) while True: lam = np.argmax(np.abs(np.dot(X.T, residual))) if lam < n_active or alpha[lam] ** 2 < min_float: # atom already selected or inner product too small warnings.warn(premature, RuntimeWarning, stacklevel=2) break if n_active > 0: # Updates the Cholesky decomposition of X' X L[n_active, :n_active] = np.dot(X[:, :n_active].T, X[:, lam]) linalg.solve_triangular( L[:n_active, :n_active], L[n_active, :n_active], trans=0, lower=1, overwrite_b=True, check_finite=False, ) v = nrm2(L[n_active, :n_active]) ** 2 Lkk = linalg.norm(X[:, lam]) ** 2 - v if Lkk <= min_float: # selected atoms are dependent warnings.warn(premature, RuntimeWarning, stacklevel=2) break L[n_active, n_active] = sqrt(Lkk) else: L[0, 0] = linalg.norm(X[:, lam]) X.T[n_active], X.T[lam] = swap(X.T[n_active], X.T[lam]) alpha[n_active], alpha[lam] = alpha[lam], alpha[n_active] indices[n_active], indices[lam] = indices[lam], indices[n_active] n_active += 1 # solves LL'x = X'y as a composition of two triangular systems gamma, _ = potrs( L[:n_active, :n_active], alpha[:n_active], lower=True, overwrite_b=False ) if return_path: coefs[:n_active, n_active - 1] = gamma residual = y - np.dot(X[:, :n_active], gamma) if tol is not None and nrm2(residual) ** 2 <= tol: break elif n_active == max_features: break if return_path: return gamma, indices[:n_active], coefs[:, :n_active], n_active else: return gamma, indices[:n_active], n_active def _gram_omp( Gram, Xy, n_nonzero_coefs, tol_0=None, tol=None, copy_Gram=True, copy_Xy=True, return_path=False, ): """Orthogonal Matching Pursuit step on a precomputed Gram matrix. This function uses the Cholesky decomposition method. Parameters ---------- Gram : ndarray of shape (n_features, n_features) Gram matrix of the input data matrix. Xy : ndarray of shape (n_features,) Input targets. n_nonzero_coefs : int Targeted number of non-zero elements. tol_0 : float, default=None Squared norm of y, required if tol is not None. tol : float, default=None Targeted squared error, if not None overrides n_nonzero_coefs. copy_Gram : bool, default=True Whether the gram matrix must be copied by the algorithm. A false value is only helpful if it is already Fortran-ordered, otherwise a copy is made anyway. copy_Xy : bool, default=True Whether the covariance vector Xy must be copied by the algorithm. If False, it may be overwritten. return_path : bool, default=False Whether to return every value of the nonzero coefficients along the forward path. Useful for cross-validation. Returns ------- gamma : ndarray of shape (n_nonzero_coefs,) Non-zero elements of the solution. idx : ndarray of shape (n_nonzero_coefs,) Indices of the positions of the elements in gamma within the solution vector. coefs : ndarray of shape (n_features, n_nonzero_coefs) The first k values of column k correspond to the coefficient value for the active features at that step. The lower left triangle contains garbage. Only returned if ``return_path=True``. n_active : int Number of active features at convergence. """ Gram = Gram.copy("F") if copy_Gram else np.asfortranarray(Gram) if copy_Xy or not Xy.flags.writeable: Xy = Xy.copy() min_float = np.finfo(Gram.dtype).eps nrm2, swap = linalg.get_blas_funcs(("nrm2", "swap"), (Gram,)) (potrs,) = get_lapack_funcs(("potrs",), (Gram,)) indices = np.arange(len(Gram)) # keeping track of swapping alpha = Xy tol_curr = tol_0 delta = 0 gamma = np.empty(0) n_active = 0 max_features = len(Gram) if tol is not None else n_nonzero_coefs L = np.empty((max_features, max_features), dtype=Gram.dtype) L[0, 0] = 1.0 if return_path: coefs = np.empty_like(L) while True: lam = np.argmax(np.abs(alpha)) if lam < n_active or alpha[lam] ** 2 < min_float: # selected same atom twice, or inner product too small warnings.warn(premature, RuntimeWarning, stacklevel=3) break if n_active > 0: L[n_active, :n_active] = Gram[lam, :n_active] linalg.solve_triangular( L[:n_active, :n_active], L[n_active, :n_active], trans=0, lower=1, overwrite_b=True, check_finite=False, ) v = nrm2(L[n_active, :n_active]) ** 2 Lkk = Gram[lam, lam] - v if Lkk <= min_float: # selected atoms are dependent warnings.warn(premature, RuntimeWarning, stacklevel=3) break L[n_active, n_active] = sqrt(Lkk) else: L[0, 0] = sqrt(Gram[lam, lam]) Gram[n_active], Gram[lam] = swap(Gram[n_active], Gram[lam]) Gram.T[n_active], Gram.T[lam] = swap(Gram.T[n_active], Gram.T[lam]) indices[n_active], indices[lam] = indices[lam], indices[n_active] Xy[n_active], Xy[lam] = Xy[lam], Xy[n_active] n_active += 1 # solves LL'x = X'y as a composition of two triangular systems gamma, _ = potrs( L[:n_active, :n_active], Xy[:n_active], lower=True, overwrite_b=False ) if return_path: coefs[:n_active, n_active - 1] = gamma beta = np.dot(Gram[:, :n_active], gamma) alpha = Xy - beta if tol is not None: tol_curr += delta delta = np.inner(gamma, beta[:n_active]) tol_curr -= delta if abs(tol_curr) <= tol: break elif n_active == max_features: break if return_path: return gamma, indices[:n_active], coefs[:, :n_active], n_active else: return gamma, indices[:n_active], n_active @validate_params( { "X": ["array-like"], "y": [np.ndarray], "n_nonzero_coefs": [Interval(Integral, 1, None, closed="left"), None], "tol": [Interval(Real, 0, None, closed="left"), None], "precompute": ["boolean", StrOptions({"auto"})], "copy_X": ["boolean"], "return_path": ["boolean"], "return_n_iter": ["boolean"], }, prefer_skip_nested_validation=True, ) def orthogonal_mp( X, y, *, n_nonzero_coefs=None, tol=None, precompute=False, copy_X=True, return_path=False, return_n_iter=False, ): r"""Orthogonal Matching Pursuit (OMP). Solves n_targets Orthogonal Matching Pursuit problems. An instance of the problem has the form: When parametrized by the number of non-zero coefficients using `n_nonzero_coefs`: argmin ||y - X\gamma||^2 subject to ||\gamma||_0 <= n_{nonzero coefs} When parametrized by error using the parameter `tol`: argmin ||\gamma||_0 subject to ||y - X\gamma||^2 <= tol Read more in the :ref:`User Guide `. Parameters ---------- X : array-like of shape (n_samples, n_features) Input data. Columns are assumed to have unit norm. y : ndarray of shape (n_samples,) or (n_samples, n_targets) Input targets. n_nonzero_coefs : int, default=None Desired number of non-zero entries in the solution. If None (by default) this value is set to 10% of n_features. tol : float, default=None Maximum squared norm of the residual. If not None, overrides n_nonzero_coefs. precompute : 'auto' or bool, default=False Whether to perform precomputations. Improves performance when n_targets or n_samples is very large. copy_X : bool, default=True Whether the design matrix X must be copied by the algorithm. A false value is only helpful if X is already Fortran-ordered, otherwise a copy is made anyway. return_path : bool, default=False Whether to return every value of the nonzero coefficients along the forward path. Useful for cross-validation. return_n_iter : bool, default=False Whether or not to return the number of iterations. Returns ------- coef : ndarray of shape (n_features,) or (n_features, n_targets) Coefficients of the OMP solution. If `return_path=True`, this contains the whole coefficient path. In this case its shape is (n_features, n_features) or (n_features, n_targets, n_features) and iterating over the last axis generates coefficients in increasing order of active features. n_iters : array-like or int Number of active features across every target. Returned only if `return_n_iter` is set to True. See Also -------- OrthogonalMatchingPursuit : Orthogonal Matching Pursuit model. orthogonal_mp_gram : Solve OMP problems using Gram matrix and the product X.T * y. lars_path : Compute Least Angle Regression or Lasso path using LARS algorithm. sklearn.decomposition.sparse_encode : Sparse coding. Notes ----- Orthogonal matching pursuit was introduced in S. Mallat, Z. Zhang, Matching pursuits with time-frequency dictionaries, IEEE Transactions on Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415. (https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf) This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad, M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal Matching Pursuit Technical Report - CS Technion, April 2008. https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf Examples -------- >>> from sklearn.datasets import make_regression >>> from sklearn.linear_model import orthogonal_mp >>> X, y = make_regression(noise=4, random_state=0) >>> coef = orthogonal_mp(X, y) >>> coef.shape (100,) >>> X[:1,] @ coef array([-78.68...]) """ X = check_array(X, order="F", copy=copy_X) copy_X = False if y.ndim == 1: y = y.reshape(-1, 1) y = check_array(y) if y.shape[1] > 1: # subsequent targets will be affected copy_X = True if n_nonzero_coefs is None and tol is None: # default for n_nonzero_coefs is 0.1 * n_features # but at least one. n_nonzero_coefs = max(int(0.1 * X.shape[1]), 1) if tol is None and n_nonzero_coefs > X.shape[1]: raise ValueError( "The number of atoms cannot be more than the number of features" ) if precompute == "auto": precompute = X.shape[0] > X.shape[1] if precompute: G = np.dot(X.T, X) G = np.asfortranarray(G) Xy = np.dot(X.T, y) if tol is not None: norms_squared = np.sum((y**2), axis=0) else: norms_squared = None return orthogonal_mp_gram( G, Xy, n_nonzero_coefs=n_nonzero_coefs, tol=tol, norms_squared=norms_squared, copy_Gram=copy_X, copy_Xy=False, return_path=return_path, ) if return_path: coef = np.zeros((X.shape[1], y.shape[1], X.shape[1])) else: coef = np.zeros((X.shape[1], y.shape[1])) n_iters = [] for k in range(y.shape[1]): out = _cholesky_omp( X, y[:, k], n_nonzero_coefs, tol, copy_X=copy_X, return_path=return_path ) if return_path: _, idx, coefs, n_iter = out coef = coef[:, :, : len(idx)] for n_active, x in enumerate(coefs.T): coef[idx[: n_active + 1], k, n_active] = x[: n_active + 1] else: x, idx, n_iter = out coef[idx, k] = x n_iters.append(n_iter) if y.shape[1] == 1: n_iters = n_iters[0] if return_n_iter: return np.squeeze(coef), n_iters else: return np.squeeze(coef) @validate_params( { "Gram": ["array-like"], "Xy": ["array-like"], "n_nonzero_coefs": [Interval(Integral, 0, None, closed="neither"), None], "tol": [Interval(Real, 0, None, closed="left"), None], "norms_squared": ["array-like", None], "copy_Gram": ["boolean"], "copy_Xy": ["boolean"], "return_path": ["boolean"], "return_n_iter": ["boolean"], }, prefer_skip_nested_validation=True, ) def orthogonal_mp_gram( Gram, Xy, *, n_nonzero_coefs=None, tol=None, norms_squared=None, copy_Gram=True, copy_Xy=True, return_path=False, return_n_iter=False, ): """Gram Orthogonal Matching Pursuit (OMP). Solves n_targets Orthogonal Matching Pursuit problems using only the Gram matrix X.T * X and the product X.T * y. Read more in the :ref:`User Guide `. Parameters ---------- Gram : array-like of shape (n_features, n_features) Gram matrix of the input data: `X.T * X`. Xy : array-like of shape (n_features,) or (n_features, n_targets) Input targets multiplied by `X`: `X.T * y`. n_nonzero_coefs : int, default=None Desired number of non-zero entries in the solution. If `None` (by default) this value is set to 10% of n_features. tol : float, default=None Maximum squared norm of the residual. If not `None`, overrides `n_nonzero_coefs`. norms_squared : array-like of shape (n_targets,), default=None Squared L2 norms of the lines of `y`. Required if `tol` is not None. copy_Gram : bool, default=True Whether the gram matrix must be copied by the algorithm. A `False` value is only helpful if it is already Fortran-ordered, otherwise a copy is made anyway. copy_Xy : bool, default=True Whether the covariance vector `Xy` must be copied by the algorithm. If `False`, it may be overwritten. return_path : bool, default=False Whether to return every value of the nonzero coefficients along the forward path. Useful for cross-validation. return_n_iter : bool, default=False Whether or not to return the number of iterations. Returns ------- coef : ndarray of shape (n_features,) or (n_features, n_targets) Coefficients of the OMP solution. If `return_path=True`, this contains the whole coefficient path. In this case its shape is `(n_features, n_features)` or `(n_features, n_targets, n_features)` and iterating over the last axis yields coefficients in increasing order of active features. n_iters : list or int Number of active features across every target. Returned only if `return_n_iter` is set to True. See Also -------- OrthogonalMatchingPursuit : Orthogonal Matching Pursuit model (OMP). orthogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems. lars_path : Compute Least Angle Regression or Lasso path using LARS algorithm. sklearn.decomposition.sparse_encode : Generic sparse coding. Each column of the result is the solution to a Lasso problem. Notes ----- Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang, Matching pursuits with time-frequency dictionaries, IEEE Transactions on Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415. (https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf) This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad, M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal Matching Pursuit Technical Report - CS Technion, April 2008. https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf Examples -------- >>> from sklearn.datasets import make_regression >>> from sklearn.linear_model import orthogonal_mp_gram >>> X, y = make_regression(noise=4, random_state=0) >>> coef = orthogonal_mp_gram(X.T @ X, X.T @ y) >>> coef.shape (100,) >>> X[:1,] @ coef array([-78.68...]) """ Gram = check_array(Gram, order="F", copy=copy_Gram) Xy = np.asarray(Xy) if Xy.ndim > 1 and Xy.shape[1] > 1: # or subsequent target will be affected copy_Gram = True if Xy.ndim == 1: Xy = Xy[:, np.newaxis] if tol is not None: norms_squared = [norms_squared] if copy_Xy or not Xy.flags.writeable: # Make the copy once instead of many times in _gram_omp itself. Xy = Xy.copy() if n_nonzero_coefs is None and tol is None: n_nonzero_coefs = int(0.1 * len(Gram)) if tol is not None and norms_squared is None: raise ValueError( "Gram OMP needs the precomputed norms in order " "to evaluate the error sum of squares." ) if tol is not None and tol < 0: raise ValueError("Epsilon cannot be negative") if tol is None and n_nonzero_coefs <= 0: raise ValueError("The number of atoms must be positive") if tol is None and n_nonzero_coefs > len(Gram): raise ValueError( "The number of atoms cannot be more than the number of features" ) if return_path: coef = np.zeros((len(Gram), Xy.shape[1], len(Gram)), dtype=Gram.dtype) else: coef = np.zeros((len(Gram), Xy.shape[1]), dtype=Gram.dtype) n_iters = [] for k in range(Xy.shape[1]): out = _gram_omp( Gram, Xy[:, k], n_nonzero_coefs, norms_squared[k] if tol is not None else None, tol, copy_Gram=copy_Gram, copy_Xy=False, return_path=return_path, ) if return_path: _, idx, coefs, n_iter = out coef = coef[:, :, : len(idx)] for n_active, x in enumerate(coefs.T): coef[idx[: n_active + 1], k, n_active] = x[: n_active + 1] else: x, idx, n_iter = out coef[idx, k] = x n_iters.append(n_iter) if Xy.shape[1] == 1: n_iters = n_iters[0] if return_n_iter: return np.squeeze(coef), n_iters else: return np.squeeze(coef) class OrthogonalMatchingPursuit(MultiOutputMixin, RegressorMixin, LinearModel): """Orthogonal Matching Pursuit model (OMP). Read more in the :ref:`User Guide `. Parameters ---------- n_nonzero_coefs : int, default=None Desired number of non-zero entries in the solution. Ignored if `tol` is set. When `None` and `tol` is also `None`, this value is either set to 10% of `n_features` or 1, whichever is greater. tol : float, default=None Maximum squared norm of the residual. If not None, overrides n_nonzero_coefs. fit_intercept : bool, default=True Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered). precompute : 'auto' or bool, default='auto' Whether to use a precomputed Gram and Xy matrix to speed up calculations. Improves performance when :term:`n_targets` or :term:`n_samples` is very large. Note that if you already have such matrices, you can pass them directly to the fit method. Attributes ---------- coef_ : ndarray of shape (n_features,) or (n_targets, n_features) Parameter vector (w in the formula). intercept_ : float or ndarray of shape (n_targets,) Independent term in decision function. n_iter_ : int or array-like Number of active features across every target. n_nonzero_coefs_ : int or None The number of non-zero coefficients in the solution or `None` when `tol` is set. If `n_nonzero_coefs` is None and `tol` is None this value is either set to 10% of `n_features` or 1, whichever is greater. n_features_in_ : int Number of features seen during :term:`fit`. .. versionadded:: 0.24 feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Defined only when `X` has feature names that are all strings. .. versionadded:: 1.0 See Also -------- orthogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems. orthogonal_mp_gram : Solves n_targets Orthogonal Matching Pursuit problems using only the Gram matrix X.T * X and the product X.T * y. lars_path : Compute Least Angle Regression or Lasso path using LARS algorithm. Lars : Least Angle Regression model a.k.a. LAR. LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars. sklearn.decomposition.sparse_encode : Generic sparse coding. Each column of the result is the solution to a Lasso problem. OrthogonalMatchingPursuitCV : Cross-validated Orthogonal Matching Pursuit model (OMP). Notes ----- Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang, Matching pursuits with time-frequency dictionaries, IEEE Transactions on Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415. (https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf) This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad, M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal Matching Pursuit Technical Report - CS Technion, April 2008. https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf Examples -------- >>> from sklearn.linear_model import OrthogonalMatchingPursuit >>> from sklearn.datasets import make_regression >>> X, y = make_regression(noise=4, random_state=0) >>> reg = OrthogonalMatchingPursuit().fit(X, y) >>> reg.score(X, y) 0.9991... >>> reg.predict(X[:1,]) array([-78.3854...]) """ _parameter_constraints: dict = { "n_nonzero_coefs": [Interval(Integral, 1, None, closed="left"), None], "tol": [Interval(Real, 0, None, closed="left"), None], "fit_intercept": ["boolean"], "precompute": [StrOptions({"auto"}), "boolean"], } def __init__( self, *, n_nonzero_coefs=None, tol=None, fit_intercept=True, precompute="auto", ): self.n_nonzero_coefs = n_nonzero_coefs self.tol = tol self.fit_intercept = fit_intercept self.precompute = precompute @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y): """Fit the model using X, y as training data. Parameters ---------- X : array-like of shape (n_samples, n_features) Training data. y : array-like of shape (n_samples,) or (n_samples, n_targets) Target values. Will be cast to X's dtype if necessary. Returns ------- self : object Returns an instance of self. """ X, y = self._validate_data(X, y, multi_output=True, y_numeric=True) n_features = X.shape[1] X, y, X_offset, y_offset, X_scale, Gram, Xy = _pre_fit( X, y, None, self.precompute, self.fit_intercept, copy=True ) if y.ndim == 1: y = y[:, np.newaxis] if self.n_nonzero_coefs is None and self.tol is None: # default for n_nonzero_coefs is 0.1 * n_features # but at least one. self.n_nonzero_coefs_ = max(int(0.1 * n_features), 1) elif self.tol is not None: self.n_nonzero_coefs_ = None else: self.n_nonzero_coefs_ = self.n_nonzero_coefs if Gram is False: coef_, self.n_iter_ = orthogonal_mp( X, y, n_nonzero_coefs=self.n_nonzero_coefs_, tol=self.tol, precompute=False, copy_X=True, return_n_iter=True, ) else: norms_sq = np.sum(y**2, axis=0) if self.tol is not None else None coef_, self.n_iter_ = orthogonal_mp_gram( Gram, Xy=Xy, n_nonzero_coefs=self.n_nonzero_coefs_, tol=self.tol, norms_squared=norms_sq, copy_Gram=True, copy_Xy=True, return_n_iter=True, ) self.coef_ = coef_.T self._set_intercept(X_offset, y_offset, X_scale) return self def _omp_path_residues( X_train, y_train, X_test, y_test, copy=True, fit_intercept=True, max_iter=100, ): """Compute the residues on left-out data for a full LARS path. Parameters ---------- X_train : ndarray of shape (n_samples, n_features) The data to fit the LARS on. y_train : ndarray of shape (n_samples) The target variable to fit LARS on. X_test : ndarray of shape (n_samples, n_features) The data to compute the residues on. y_test : ndarray of shape (n_samples) The target variable to compute the residues on. copy : bool, default=True Whether X_train, X_test, y_train and y_test should be copied. If False, they may be overwritten. fit_intercept : bool, default=True Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered). max_iter : int, default=100 Maximum numbers of iterations to perform, therefore maximum features to include. 100 by default. Returns ------- residues : ndarray of shape (n_samples, max_features) Residues of the prediction on the test data. """ if copy: X_train = X_train.copy() y_train = y_train.copy() X_test = X_test.copy() y_test = y_test.copy() if fit_intercept: X_mean = X_train.mean(axis=0) X_train -= X_mean X_test -= X_mean y_mean = y_train.mean(axis=0) y_train = as_float_array(y_train, copy=False) y_train -= y_mean y_test = as_float_array(y_test, copy=False) y_test -= y_mean coefs = orthogonal_mp( X_train, y_train, n_nonzero_coefs=max_iter, tol=None, precompute=False, copy_X=False, return_path=True, ) if coefs.ndim == 1: coefs = coefs[:, np.newaxis] return np.dot(coefs.T, X_test.T) - y_test class OrthogonalMatchingPursuitCV(RegressorMixin, LinearModel): """Cross-validated Orthogonal Matching Pursuit model (OMP). See glossary entry for :term:`cross-validation estimator`. Read more in the :ref:`User Guide `. Parameters ---------- copy : bool, default=True Whether the design matrix X must be copied by the algorithm. A false value is only helpful if X is already Fortran-ordered, otherwise a copy is made anyway. fit_intercept : bool, default=True Whether to calculate the intercept for this model. If set to false, no intercept will be used in calculations (i.e. data is expected to be centered). max_iter : int, default=None Maximum numbers of iterations to perform, therefore maximum features to include. 10% of ``n_features`` but at least 5 if available. cv : int, cross-validation generator or iterable, default=None Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 5-fold cross-validation, - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, :class:`~sklearn.model_selection.KFold` is used. Refer :ref:`User Guide ` for the various cross-validation strategies that can be used here. .. versionchanged:: 0.22 ``cv`` default value if None changed from 3-fold to 5-fold. n_jobs : int, default=None Number of CPUs to use during the cross validation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. verbose : bool or int, default=False Sets the verbosity amount. Attributes ---------- intercept_ : float or ndarray of shape (n_targets,) Independent term in decision function. coef_ : ndarray of shape (n_features,) or (n_targets, n_features) Parameter vector (w in the problem formulation). n_nonzero_coefs_ : int Estimated number of non-zero coefficients giving the best mean squared error over the cross-validation folds. n_iter_ : int or array-like Number of active features across every target for the model refit with the best hyperparameters got by cross-validating across all folds. n_features_in_ : int Number of features seen during :term:`fit`. .. versionadded:: 0.24 feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Defined only when `X` has feature names that are all strings. .. versionadded:: 1.0 See Also -------- orthogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems. orthogonal_mp_gram : Solves n_targets Orthogonal Matching Pursuit problems using only the Gram matrix X.T * X and the product X.T * y. lars_path : Compute Least Angle Regression or Lasso path using LARS algorithm. Lars : Least Angle Regression model a.k.a. LAR. LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars. OrthogonalMatchingPursuit : Orthogonal Matching Pursuit model (OMP). LarsCV : Cross-validated Least Angle Regression model. LassoLarsCV : Cross-validated Lasso model fit with Least Angle Regression. sklearn.decomposition.sparse_encode : Generic sparse coding. Each column of the result is the solution to a Lasso problem. Notes ----- In `fit`, once the optimal number of non-zero coefficients is found through cross-validation, the model is fit again using the entire training set. Examples -------- >>> from sklearn.linear_model import OrthogonalMatchingPursuitCV >>> from sklearn.datasets import make_regression >>> X, y = make_regression(n_features=100, n_informative=10, ... noise=4, random_state=0) >>> reg = OrthogonalMatchingPursuitCV(cv=5).fit(X, y) >>> reg.score(X, y) 0.9991... >>> reg.n_nonzero_coefs_ 10 >>> reg.predict(X[:1,]) array([-78.3854...]) """ _parameter_constraints: dict = { "copy": ["boolean"], "fit_intercept": ["boolean"], "max_iter": [Interval(Integral, 0, None, closed="left"), None], "cv": ["cv_object"], "n_jobs": [Integral, None], "verbose": ["verbose"], } def __init__( self, *, copy=True, fit_intercept=True, max_iter=None, cv=None, n_jobs=None, verbose=False, ): self.copy = copy self.fit_intercept = fit_intercept self.max_iter = max_iter self.cv = cv self.n_jobs = n_jobs self.verbose = verbose @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, **fit_params): """Fit the model using X, y as training data. Parameters ---------- X : array-like of shape (n_samples, n_features) Training data. y : array-like of shape (n_samples,) Target values. Will be cast to X's dtype if necessary. **fit_params : dict Parameters to pass to the underlying splitter. .. versionadded:: 1.4 Only available if `enable_metadata_routing=True`, which can be set by using ``sklearn.set_config(enable_metadata_routing=True)``. See :ref:`Metadata Routing User Guide ` for more details. Returns ------- self : object Returns an instance of self. """ _raise_for_params(fit_params, self, "fit") X, y = self._validate_data(X, y, y_numeric=True, ensure_min_features=2) X = as_float_array(X, copy=False, force_all_finite=False) cv = check_cv(self.cv, classifier=False) if _routing_enabled(): routed_params = process_routing(self, "fit", **fit_params) else: # TODO(SLEP6): remove when metadata routing cannot be disabled. routed_params = Bunch() routed_params.splitter = Bunch(split={}) max_iter = ( min(max(int(0.1 * X.shape[1]), 5), X.shape[1]) if not self.max_iter else self.max_iter ) cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(_omp_path_residues)( X[train], y[train], X[test], y[test], self.copy, self.fit_intercept, max_iter, ) for train, test in cv.split(X, **routed_params.splitter.split) ) min_early_stop = min(fold.shape[0] for fold in cv_paths) mse_folds = np.array( [(fold[:min_early_stop] ** 2).mean(axis=1) for fold in cv_paths] ) best_n_nonzero_coefs = np.argmin(mse_folds.mean(axis=0)) + 1 self.n_nonzero_coefs_ = best_n_nonzero_coefs omp = OrthogonalMatchingPursuit( n_nonzero_coefs=best_n_nonzero_coefs, fit_intercept=self.fit_intercept, ).fit(X, y) self.coef_ = omp.coef_ self.intercept_ = omp.intercept_ self.n_iter_ = omp.n_iter_ return self def get_metadata_routing(self): """Get metadata routing of this object. Please check :ref:`User Guide ` on how the routing mechanism works. .. versionadded:: 1.4 Returns ------- routing : MetadataRouter A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating routing information. """ router = MetadataRouter(owner=self.__class__.__name__).add( splitter=self.cv, method_mapping=MethodMapping().add(caller="fit", callee="split"), ) return router