""" The :mod:`sklearn.pls` module implements Partial Least Squares (PLS). """ # Author: Edouard Duchesnay # License: BSD 3 clause import warnings from abc import ABCMeta, abstractmethod from numbers import Integral, Real import numpy as np from scipy.linalg import svd from ..base import ( BaseEstimator, ClassNamePrefixFeaturesOutMixin, MultiOutputMixin, RegressorMixin, TransformerMixin, _fit_context, ) from ..exceptions import ConvergenceWarning from ..utils import check_array, check_consistent_length from ..utils._param_validation import Interval, StrOptions from ..utils.extmath import svd_flip from ..utils.fixes import parse_version, sp_version from ..utils.validation import FLOAT_DTYPES, check_is_fitted __all__ = ["PLSCanonical", "PLSRegression", "PLSSVD"] if sp_version >= parse_version("1.7"): # Starting in scipy 1.7 pinv2 was deprecated in favor of pinv. # pinv now uses the svd to compute the pseudo-inverse. from scipy.linalg import pinv as pinv2 else: from scipy.linalg import pinv2 def _pinv2_old(a): # Used previous scipy pinv2 that was updated in: # https://github.com/scipy/scipy/pull/10067 # We can not set `cond` or `rcond` for pinv2 in scipy >= 1.3 to keep the # same behavior of pinv2 for scipy < 1.3, because the condition used to # determine the rank is dependent on the output of svd. u, s, vh = svd(a, full_matrices=False, check_finite=False) t = u.dtype.char.lower() factor = {"f": 1e3, "d": 1e6} cond = np.max(s) * factor[t] * np.finfo(t).eps rank = np.sum(s > cond) u = u[:, :rank] u /= s[:rank] return np.transpose(np.conjugate(np.dot(u, vh[:rank]))) def _get_first_singular_vectors_power_method( X, Y, mode="A", max_iter=500, tol=1e-06, norm_y_weights=False ): """Return the first left and right singular vectors of X'Y. Provides an alternative to the svd(X'Y) and uses the power method instead. With norm_y_weights to True and in mode A, this corresponds to the algorithm section 11.3 of the Wegelin's review, except this starts at the "update saliences" part. """ eps = np.finfo(X.dtype).eps try: y_score = next(col for col in Y.T if np.any(np.abs(col) > eps)) except StopIteration as e: raise StopIteration("y residual is constant") from e x_weights_old = 100 # init to big value for first convergence check if mode == "B": # Precompute pseudo inverse matrices # Basically: X_pinv = (X.T X)^-1 X.T # Which requires inverting a (n_features, n_features) matrix. # As a result, and as detailed in the Wegelin's review, CCA (i.e. mode # B) will be unstable if n_features > n_samples or n_targets > # n_samples X_pinv, Y_pinv = _pinv2_old(X), _pinv2_old(Y) for i in range(max_iter): if mode == "B": x_weights = np.dot(X_pinv, y_score) else: x_weights = np.dot(X.T, y_score) / np.dot(y_score, y_score) x_weights /= np.sqrt(np.dot(x_weights, x_weights)) + eps x_score = np.dot(X, x_weights) if mode == "B": y_weights = np.dot(Y_pinv, x_score) else: y_weights = np.dot(Y.T, x_score) / np.dot(x_score.T, x_score) if norm_y_weights: y_weights /= np.sqrt(np.dot(y_weights, y_weights)) + eps y_score = np.dot(Y, y_weights) / (np.dot(y_weights, y_weights) + eps) x_weights_diff = x_weights - x_weights_old if np.dot(x_weights_diff, x_weights_diff) < tol or Y.shape[1] == 1: break x_weights_old = x_weights n_iter = i + 1 if n_iter == max_iter: warnings.warn("Maximum number of iterations reached", ConvergenceWarning) return x_weights, y_weights, n_iter def _get_first_singular_vectors_svd(X, Y): """Return the first left and right singular vectors of X'Y. Here the whole SVD is computed. """ C = np.dot(X.T, Y) U, _, Vt = svd(C, full_matrices=False) return U[:, 0], Vt[0, :] def _center_scale_xy(X, Y, scale=True): """Center X, Y and scale if the scale parameter==True Returns ------- X, Y, x_mean, y_mean, x_std, y_std """ # center x_mean = X.mean(axis=0) X -= x_mean y_mean = Y.mean(axis=0) Y -= y_mean # scale if scale: x_std = X.std(axis=0, ddof=1) x_std[x_std == 0.0] = 1.0 X /= x_std y_std = Y.std(axis=0, ddof=1) y_std[y_std == 0.0] = 1.0 Y /= y_std else: x_std = np.ones(X.shape[1]) y_std = np.ones(Y.shape[1]) return X, Y, x_mean, y_mean, x_std, y_std def _svd_flip_1d(u, v): """Same as svd_flip but works on 1d arrays, and is inplace""" # svd_flip would force us to convert to 2d array and would also return 2d # arrays. We don't want that. biggest_abs_val_idx = np.argmax(np.abs(u)) sign = np.sign(u[biggest_abs_val_idx]) u *= sign v *= sign # TODO(1.7): Remove def _deprecate_Y_when_optional(y, Y): if Y is not None: warnings.warn( "`Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead.", FutureWarning, ) if y is not None: raise ValueError( "Cannot use both `y` and `Y`. Use only `y` as `Y` is deprecated." ) return Y return y # TODO(1.7): Remove def _deprecate_Y_when_required(y, Y): if y is None and Y is None: raise ValueError("y is required.") return _deprecate_Y_when_optional(y, Y) class _PLS( ClassNamePrefixFeaturesOutMixin, TransformerMixin, RegressorMixin, MultiOutputMixin, BaseEstimator, metaclass=ABCMeta, ): """Partial Least Squares (PLS) This class implements the generic PLS algorithm. Main ref: Wegelin, a survey of Partial Least Squares (PLS) methods, with emphasis on the two-block case https://stat.uw.edu/sites/default/files/files/reports/2000/tr371.pdf """ _parameter_constraints: dict = { "n_components": [Interval(Integral, 1, None, closed="left")], "scale": ["boolean"], "deflation_mode": [StrOptions({"regression", "canonical"})], "mode": [StrOptions({"A", "B"})], "algorithm": [StrOptions({"svd", "nipals"})], "max_iter": [Interval(Integral, 1, None, closed="left")], "tol": [Interval(Real, 0, None, closed="left")], "copy": ["boolean"], } @abstractmethod def __init__( self, n_components=2, *, scale=True, deflation_mode="regression", mode="A", algorithm="nipals", max_iter=500, tol=1e-06, copy=True, ): self.n_components = n_components self.deflation_mode = deflation_mode self.mode = mode self.scale = scale self.algorithm = algorithm self.max_iter = max_iter self.tol = tol self.copy = copy @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None, Y=None): """Fit model to data. Parameters ---------- X : array-like of shape (n_samples, n_features) Training vectors, where `n_samples` is the number of samples and `n_features` is the number of predictors. y : array-like of shape (n_samples,) or (n_samples, n_targets) Target vectors, where `n_samples` is the number of samples and `n_targets` is the number of response variables. Y : array-like of shape (n_samples,) or (n_samples, n_targets) Target vectors, where `n_samples` is the number of samples and `n_targets` is the number of response variables. .. deprecated:: 1.5 `Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead. Returns ------- self : object Fitted model. """ y = _deprecate_Y_when_required(y, Y) check_consistent_length(X, y) X = self._validate_data( X, dtype=np.float64, copy=self.copy, ensure_min_samples=2 ) y = check_array( y, input_name="y", dtype=np.float64, copy=self.copy, ensure_2d=False ) if y.ndim == 1: self._predict_1d = True y = y.reshape(-1, 1) else: self._predict_1d = False n = X.shape[0] p = X.shape[1] q = y.shape[1] n_components = self.n_components # With PLSRegression n_components is bounded by the rank of (X.T X) see # Wegelin page 25. With CCA and PLSCanonical, n_components is bounded # by the rank of X and the rank of Y: see Wegelin page 12 rank_upper_bound = p if self.deflation_mode == "regression" else min(n, p, q) if n_components > rank_upper_bound: raise ValueError( f"`n_components` upper bound is {rank_upper_bound}. " f"Got {n_components} instead. Reduce `n_components`." ) self._norm_y_weights = self.deflation_mode == "canonical" # 1.1 norm_y_weights = self._norm_y_weights # Scale (in place) Xk, yk, self._x_mean, self._y_mean, self._x_std, self._y_std = _center_scale_xy( X, y, self.scale ) self.x_weights_ = np.zeros((p, n_components)) # U self.y_weights_ = np.zeros((q, n_components)) # V self._x_scores = np.zeros((n, n_components)) # Xi self._y_scores = np.zeros((n, n_components)) # Omega self.x_loadings_ = np.zeros((p, n_components)) # Gamma self.y_loadings_ = np.zeros((q, n_components)) # Delta self.n_iter_ = [] # This whole thing corresponds to the algorithm in section 4.1 of the # review from Wegelin. See above for a notation mapping from code to # paper. y_eps = np.finfo(yk.dtype).eps for k in range(n_components): # Find first left and right singular vectors of the X.T.dot(Y) # cross-covariance matrix. if self.algorithm == "nipals": # Replace columns that are all close to zero with zeros yk_mask = np.all(np.abs(yk) < 10 * y_eps, axis=0) yk[:, yk_mask] = 0.0 try: ( x_weights, y_weights, n_iter_, ) = _get_first_singular_vectors_power_method( Xk, yk, mode=self.mode, max_iter=self.max_iter, tol=self.tol, norm_y_weights=norm_y_weights, ) except StopIteration as e: if str(e) != "y residual is constant": raise warnings.warn(f"y residual is constant at iteration {k}") break self.n_iter_.append(n_iter_) elif self.algorithm == "svd": x_weights, y_weights = _get_first_singular_vectors_svd(Xk, yk) # inplace sign flip for consistency across solvers and archs _svd_flip_1d(x_weights, y_weights) # compute scores, i.e. the projections of X and Y x_scores = np.dot(Xk, x_weights) if norm_y_weights: y_ss = 1 else: y_ss = np.dot(y_weights, y_weights) y_scores = np.dot(yk, y_weights) / y_ss # Deflation: subtract rank-one approx to obtain Xk+1 and Yk+1 x_loadings = np.dot(x_scores, Xk) / np.dot(x_scores, x_scores) Xk -= np.outer(x_scores, x_loadings) if self.deflation_mode == "canonical": # regress Yk on y_score y_loadings = np.dot(y_scores, yk) / np.dot(y_scores, y_scores) yk -= np.outer(y_scores, y_loadings) if self.deflation_mode == "regression": # regress Yk on x_score y_loadings = np.dot(x_scores, yk) / np.dot(x_scores, x_scores) yk -= np.outer(x_scores, y_loadings) self.x_weights_[:, k] = x_weights self.y_weights_[:, k] = y_weights self._x_scores[:, k] = x_scores self._y_scores[:, k] = y_scores self.x_loadings_[:, k] = x_loadings self.y_loadings_[:, k] = y_loadings # X was approximated as Xi . Gamma.T + X_(R+1) # Xi . Gamma.T is a sum of n_components rank-1 matrices. X_(R+1) is # whatever is left to fully reconstruct X, and can be 0 if X is of rank # n_components. # Similarly, y was approximated as Omega . Delta.T + y_(R+1) # Compute transformation matrices (rotations_). See User Guide. self.x_rotations_ = np.dot( self.x_weights_, pinv2(np.dot(self.x_loadings_.T, self.x_weights_), check_finite=False), ) self.y_rotations_ = np.dot( self.y_weights_, pinv2(np.dot(self.y_loadings_.T, self.y_weights_), check_finite=False), ) self.coef_ = np.dot(self.x_rotations_, self.y_loadings_.T) self.coef_ = (self.coef_ * self._y_std).T / self._x_std self.intercept_ = self._y_mean self._n_features_out = self.x_rotations_.shape[1] return self def transform(self, X, y=None, Y=None, copy=True): """Apply the dimension reduction. Parameters ---------- X : array-like of shape (n_samples, n_features) Samples to transform. y : array-like of shape (n_samples, n_targets), default=None Target vectors. Y : array-like of shape (n_samples, n_targets), default=None Target vectors. .. deprecated:: 1.5 `Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead. copy : bool, default=True Whether to copy `X` and `Y`, or perform in-place normalization. Returns ------- x_scores, y_scores : array-like or tuple of array-like Return `x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise. """ y = _deprecate_Y_when_optional(y, Y) check_is_fitted(self) X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES, reset=False) # Normalize X -= self._x_mean X /= self._x_std # Apply rotation x_scores = np.dot(X, self.x_rotations_) if y is not None: y = check_array( y, input_name="y", ensure_2d=False, copy=copy, dtype=FLOAT_DTYPES ) if y.ndim == 1: y = y.reshape(-1, 1) y -= self._y_mean y /= self._y_std y_scores = np.dot(y, self.y_rotations_) return x_scores, y_scores return x_scores def inverse_transform(self, X, y=None, Y=None): """Transform data back to its original space. Parameters ---------- X : array-like of shape (n_samples, n_components) New data, where `n_samples` is the number of samples and `n_components` is the number of pls components. y : array-like of shape (n_samples,) or (n_samples, n_components) New target, where `n_samples` is the number of samples and `n_components` is the number of pls components. Y : array-like of shape (n_samples, n_components) New target, where `n_samples` is the number of samples and `n_components` is the number of pls components. .. deprecated:: 1.5 `Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead. Returns ------- X_reconstructed : ndarray of shape (n_samples, n_features) Return the reconstructed `X` data. y_reconstructed : ndarray of shape (n_samples, n_targets) Return the reconstructed `X` target. Only returned when `y` is given. Notes ----- This transformation will only be exact if `n_components=n_features`. """ y = _deprecate_Y_when_optional(y, Y) check_is_fitted(self) X = check_array(X, input_name="X", dtype=FLOAT_DTYPES) # From pls space to original space X_reconstructed = np.matmul(X, self.x_loadings_.T) # Denormalize X_reconstructed *= self._x_std X_reconstructed += self._x_mean if y is not None: y = check_array(y, input_name="y", dtype=FLOAT_DTYPES) # From pls space to original space y_reconstructed = np.matmul(y, self.y_loadings_.T) # Denormalize y_reconstructed *= self._y_std y_reconstructed += self._y_mean return X_reconstructed, y_reconstructed return X_reconstructed def predict(self, X, copy=True): """Predict targets of given samples. Parameters ---------- X : array-like of shape (n_samples, n_features) Samples. copy : bool, default=True Whether to copy `X` and `Y`, or perform in-place normalization. Returns ------- y_pred : ndarray of shape (n_samples,) or (n_samples, n_targets) Returns predicted values. Notes ----- This call requires the estimation of a matrix of shape `(n_features, n_targets)`, which may be an issue in high dimensional space. """ check_is_fitted(self) X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES, reset=False) # Only center X but do not scale it since the coefficients are already scaled X -= self._x_mean Ypred = X @ self.coef_.T + self.intercept_ return Ypred.ravel() if self._predict_1d else Ypred def fit_transform(self, X, y=None): """Learn and apply the dimension reduction on the train data. Parameters ---------- X : array-like of shape (n_samples, n_features) Training vectors, where `n_samples` is the number of samples and `n_features` is the number of predictors. y : array-like of shape (n_samples, n_targets), default=None Target vectors, where `n_samples` is the number of samples and `n_targets` is the number of response variables. Returns ------- self : ndarray of shape (n_samples, n_components) Return `x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise. """ return self.fit(X, y).transform(X, y) def _more_tags(self): return {"poor_score": True, "requires_y": False} class PLSRegression(_PLS): """PLS regression. PLSRegression is also known as PLS2 or PLS1, depending on the number of targets. For a comparison between other cross decomposition algorithms, see :ref:`sphx_glr_auto_examples_cross_decomposition_plot_compare_cross_decomposition.py`. Read more in the :ref:`User Guide `. .. versionadded:: 0.8 Parameters ---------- n_components : int, default=2 Number of components to keep. Should be in `[1, n_features]`. scale : bool, default=True Whether to scale `X` and `Y`. max_iter : int, default=500 The maximum number of iterations of the power method when `algorithm='nipals'`. Ignored otherwise. tol : float, default=1e-06 The tolerance used as convergence criteria in the power method: the algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less than `tol`, where `u` corresponds to the left singular vector. copy : bool, default=True Whether to copy `X` and `Y` in :term:`fit` before applying centering, and potentially scaling. If `False`, these operations will be done inplace, modifying both arrays. Attributes ---------- x_weights_ : ndarray of shape (n_features, n_components) The left singular vectors of the cross-covariance matrices of each iteration. y_weights_ : ndarray of shape (n_targets, n_components) The right singular vectors of the cross-covariance matrices of each iteration. x_loadings_ : ndarray of shape (n_features, n_components) The loadings of `X`. y_loadings_ : ndarray of shape (n_targets, n_components) The loadings of `Y`. x_scores_ : ndarray of shape (n_samples, n_components) The transformed training samples. y_scores_ : ndarray of shape (n_samples, n_components) The transformed training targets. x_rotations_ : ndarray of shape (n_features, n_components) The projection matrix used to transform `X`. y_rotations_ : ndarray of shape (n_targets, n_components) The projection matrix used to transform `Y`. coef_ : ndarray of shape (n_target, n_features) The coefficients of the linear model such that `Y` is approximated as `Y = X @ coef_.T + intercept_`. intercept_ : ndarray of shape (n_targets,) The intercepts of the linear model such that `Y` is approximated as `Y = X @ coef_.T + intercept_`. .. versionadded:: 1.1 n_iter_ : list of shape (n_components,) Number of iterations of the power method, for each component. n_features_in_ : int Number of features seen during :term:`fit`. feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Defined only when `X` has feature names that are all strings. .. versionadded:: 1.0 See Also -------- PLSCanonical : Partial Least Squares transformer and regressor. Examples -------- >>> from sklearn.cross_decomposition import PLSRegression >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]] >>> y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]] >>> pls2 = PLSRegression(n_components=2) >>> pls2.fit(X, y) PLSRegression() >>> Y_pred = pls2.predict(X) For a comparison between PLS Regression and :class:`~sklearn.decomposition.PCA`, see :ref:`sphx_glr_auto_examples_cross_decomposition_plot_pcr_vs_pls.py`. """ _parameter_constraints: dict = {**_PLS._parameter_constraints} for param in ("deflation_mode", "mode", "algorithm"): _parameter_constraints.pop(param) # This implementation provides the same results that 3 PLS packages # provided in the R language (R-project): # - "mixOmics" with function pls(X, Y, mode = "regression") # - "plspm " with function plsreg2(X, Y) # - "pls" with function oscorespls.fit(X, Y) def __init__( self, n_components=2, *, scale=True, max_iter=500, tol=1e-06, copy=True ): super().__init__( n_components=n_components, scale=scale, deflation_mode="regression", mode="A", algorithm="nipals", max_iter=max_iter, tol=tol, copy=copy, ) def fit(self, X, y=None, Y=None): """Fit model to data. Parameters ---------- X : array-like of shape (n_samples, n_features) Training vectors, where `n_samples` is the number of samples and `n_features` is the number of predictors. y : array-like of shape (n_samples,) or (n_samples, n_targets) Target vectors, where `n_samples` is the number of samples and `n_targets` is the number of response variables. Y : array-like of shape (n_samples,) or (n_samples, n_targets) Target vectors, where `n_samples` is the number of samples and `n_targets` is the number of response variables. .. deprecated:: 1.5 `Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead. Returns ------- self : object Fitted model. """ y = _deprecate_Y_when_required(y, Y) super().fit(X, y) # expose the fitted attributes `x_scores_` and `y_scores_` self.x_scores_ = self._x_scores self.y_scores_ = self._y_scores return self class PLSCanonical(_PLS): """Partial Least Squares transformer and regressor. For a comparison between other cross decomposition algorithms, see :ref:`sphx_glr_auto_examples_cross_decomposition_plot_compare_cross_decomposition.py`. Read more in the :ref:`User Guide `. .. versionadded:: 0.8 Parameters ---------- n_components : int, default=2 Number of components to keep. Should be in `[1, min(n_samples, n_features, n_targets)]`. scale : bool, default=True Whether to scale `X` and `Y`. algorithm : {'nipals', 'svd'}, default='nipals' The algorithm used to estimate the first singular vectors of the cross-covariance matrix. 'nipals' uses the power method while 'svd' will compute the whole SVD. max_iter : int, default=500 The maximum number of iterations of the power method when `algorithm='nipals'`. Ignored otherwise. tol : float, default=1e-06 The tolerance used as convergence criteria in the power method: the algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less than `tol`, where `u` corresponds to the left singular vector. copy : bool, default=True Whether to copy `X` and `Y` in fit before applying centering, and potentially scaling. If False, these operations will be done inplace, modifying both arrays. Attributes ---------- x_weights_ : ndarray of shape (n_features, n_components) The left singular vectors of the cross-covariance matrices of each iteration. y_weights_ : ndarray of shape (n_targets, n_components) The right singular vectors of the cross-covariance matrices of each iteration. x_loadings_ : ndarray of shape (n_features, n_components) The loadings of `X`. y_loadings_ : ndarray of shape (n_targets, n_components) The loadings of `Y`. x_rotations_ : ndarray of shape (n_features, n_components) The projection matrix used to transform `X`. y_rotations_ : ndarray of shape (n_targets, n_components) The projection matrix used to transform `Y`. coef_ : ndarray of shape (n_targets, n_features) The coefficients of the linear model such that `Y` is approximated as `Y = X @ coef_.T + intercept_`. intercept_ : ndarray of shape (n_targets,) The intercepts of the linear model such that `Y` is approximated as `Y = X @ coef_.T + intercept_`. .. versionadded:: 1.1 n_iter_ : list of shape (n_components,) Number of iterations of the power method, for each component. Empty if `algorithm='svd'`. n_features_in_ : int Number of features seen during :term:`fit`. feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Defined only when `X` has feature names that are all strings. .. versionadded:: 1.0 See Also -------- CCA : Canonical Correlation Analysis. PLSSVD : Partial Least Square SVD. Examples -------- >>> from sklearn.cross_decomposition import PLSCanonical >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]] >>> y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]] >>> plsca = PLSCanonical(n_components=2) >>> plsca.fit(X, y) PLSCanonical() >>> X_c, y_c = plsca.transform(X, y) """ _parameter_constraints: dict = {**_PLS._parameter_constraints} for param in ("deflation_mode", "mode"): _parameter_constraints.pop(param) # This implementation provides the same results that the "plspm" package # provided in the R language (R-project), using the function plsca(X, Y). # Results are equal or collinear with the function # ``pls(..., mode = "canonical")`` of the "mixOmics" package. The # difference relies in the fact that mixOmics implementation does not # exactly implement the Wold algorithm since it does not normalize # y_weights to one. def __init__( self, n_components=2, *, scale=True, algorithm="nipals", max_iter=500, tol=1e-06, copy=True, ): super().__init__( n_components=n_components, scale=scale, deflation_mode="canonical", mode="A", algorithm=algorithm, max_iter=max_iter, tol=tol, copy=copy, ) class CCA(_PLS): """Canonical Correlation Analysis, also known as "Mode B" PLS. For a comparison between other cross decomposition algorithms, see :ref:`sphx_glr_auto_examples_cross_decomposition_plot_compare_cross_decomposition.py`. Read more in the :ref:`User Guide `. Parameters ---------- n_components : int, default=2 Number of components to keep. Should be in `[1, min(n_samples, n_features, n_targets)]`. scale : bool, default=True Whether to scale `X` and `Y`. max_iter : int, default=500 The maximum number of iterations of the power method. tol : float, default=1e-06 The tolerance used as convergence criteria in the power method: the algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less than `tol`, where `u` corresponds to the left singular vector. copy : bool, default=True Whether to copy `X` and `Y` in fit before applying centering, and potentially scaling. If False, these operations will be done inplace, modifying both arrays. Attributes ---------- x_weights_ : ndarray of shape (n_features, n_components) The left singular vectors of the cross-covariance matrices of each iteration. y_weights_ : ndarray of shape (n_targets, n_components) The right singular vectors of the cross-covariance matrices of each iteration. x_loadings_ : ndarray of shape (n_features, n_components) The loadings of `X`. y_loadings_ : ndarray of shape (n_targets, n_components) The loadings of `Y`. x_rotations_ : ndarray of shape (n_features, n_components) The projection matrix used to transform `X`. y_rotations_ : ndarray of shape (n_targets, n_components) The projection matrix used to transform `Y`. coef_ : ndarray of shape (n_targets, n_features) The coefficients of the linear model such that `Y` is approximated as `Y = X @ coef_.T + intercept_`. intercept_ : ndarray of shape (n_targets,) The intercepts of the linear model such that `Y` is approximated as `Y = X @ coef_.T + intercept_`. .. versionadded:: 1.1 n_iter_ : list of shape (n_components,) Number of iterations of the power method, for each component. n_features_in_ : int Number of features seen during :term:`fit`. feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Defined only when `X` has feature names that are all strings. .. versionadded:: 1.0 See Also -------- PLSCanonical : Partial Least Squares transformer and regressor. PLSSVD : Partial Least Square SVD. Examples -------- >>> from sklearn.cross_decomposition import CCA >>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]] >>> y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]] >>> cca = CCA(n_components=1) >>> cca.fit(X, y) CCA(n_components=1) >>> X_c, Y_c = cca.transform(X, y) """ _parameter_constraints: dict = {**_PLS._parameter_constraints} for param in ("deflation_mode", "mode", "algorithm"): _parameter_constraints.pop(param) def __init__( self, n_components=2, *, scale=True, max_iter=500, tol=1e-06, copy=True ): super().__init__( n_components=n_components, scale=scale, deflation_mode="canonical", mode="B", algorithm="nipals", max_iter=max_iter, tol=tol, copy=copy, ) class PLSSVD(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator): """Partial Least Square SVD. This transformer simply performs a SVD on the cross-covariance matrix `X'Y`. It is able to project both the training data `X` and the targets `Y`. The training data `X` is projected on the left singular vectors, while the targets are projected on the right singular vectors. Read more in the :ref:`User Guide `. .. versionadded:: 0.8 Parameters ---------- n_components : int, default=2 The number of components to keep. Should be in `[1, min(n_samples, n_features, n_targets)]`. scale : bool, default=True Whether to scale `X` and `Y`. copy : bool, default=True Whether to copy `X` and `Y` in fit before applying centering, and potentially scaling. If `False`, these operations will be done inplace, modifying both arrays. Attributes ---------- x_weights_ : ndarray of shape (n_features, n_components) The left singular vectors of the SVD of the cross-covariance matrix. Used to project `X` in :meth:`transform`. y_weights_ : ndarray of (n_targets, n_components) The right singular vectors of the SVD of the cross-covariance matrix. Used to project `X` in :meth:`transform`. n_features_in_ : int Number of features seen during :term:`fit`. feature_names_in_ : ndarray of shape (`n_features_in_`,) Names of features seen during :term:`fit`. Defined only when `X` has feature names that are all strings. .. versionadded:: 1.0 See Also -------- PLSCanonical : Partial Least Squares transformer and regressor. CCA : Canonical Correlation Analysis. Examples -------- >>> import numpy as np >>> from sklearn.cross_decomposition import PLSSVD >>> X = np.array([[0., 0., 1.], ... [1., 0., 0.], ... [2., 2., 2.], ... [2., 5., 4.]]) >>> y = np.array([[0.1, -0.2], ... [0.9, 1.1], ... [6.2, 5.9], ... [11.9, 12.3]]) >>> pls = PLSSVD(n_components=2).fit(X, y) >>> X_c, y_c = pls.transform(X, y) >>> X_c.shape, y_c.shape ((4, 2), (4, 2)) """ _parameter_constraints: dict = { "n_components": [Interval(Integral, 1, None, closed="left")], "scale": ["boolean"], "copy": ["boolean"], } def __init__(self, n_components=2, *, scale=True, copy=True): self.n_components = n_components self.scale = scale self.copy = copy @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None, Y=None): """Fit model to data. Parameters ---------- X : array-like of shape (n_samples, n_features) Training samples. y : array-like of shape (n_samples,) or (n_samples, n_targets) Targets. Y : array-like of shape (n_samples,) or (n_samples, n_targets) Targets. .. deprecated:: 1.5 `Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead. Returns ------- self : object Fitted estimator. """ y = _deprecate_Y_when_required(y, Y) check_consistent_length(X, y) X = self._validate_data( X, dtype=np.float64, copy=self.copy, ensure_min_samples=2 ) y = check_array( y, input_name="y", dtype=np.float64, copy=self.copy, ensure_2d=False ) if y.ndim == 1: y = y.reshape(-1, 1) # we'll compute the SVD of the cross-covariance matrix = X.T.dot(y) # This matrix rank is at most min(n_samples, n_features, n_targets) so # n_components cannot be bigger than that. n_components = self.n_components rank_upper_bound = min(X.shape[0], X.shape[1], y.shape[1]) if n_components > rank_upper_bound: raise ValueError( f"`n_components` upper bound is {rank_upper_bound}. " f"Got {n_components} instead. Reduce `n_components`." ) X, y, self._x_mean, self._y_mean, self._x_std, self._y_std = _center_scale_xy( X, y, self.scale ) # Compute SVD of cross-covariance matrix C = np.dot(X.T, y) U, s, Vt = svd(C, full_matrices=False) U = U[:, :n_components] Vt = Vt[:n_components] U, Vt = svd_flip(U, Vt) V = Vt.T self.x_weights_ = U self.y_weights_ = V self._n_features_out = self.x_weights_.shape[1] return self def transform(self, X, y=None, Y=None): """ Apply the dimensionality reduction. Parameters ---------- X : array-like of shape (n_samples, n_features) Samples to be transformed. y : array-like of shape (n_samples,) or (n_samples, n_targets), \ default=None Targets. Y : array-like of shape (n_samples,) or (n_samples, n_targets), \ default=None Targets. .. deprecated:: 1.5 `Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead. Returns ------- x_scores : array-like or tuple of array-like The transformed data `X_transformed` if `Y is not None`, `(X_transformed, Y_transformed)` otherwise. """ y = _deprecate_Y_when_optional(y, Y) check_is_fitted(self) X = self._validate_data(X, dtype=np.float64, reset=False) Xr = (X - self._x_mean) / self._x_std x_scores = np.dot(Xr, self.x_weights_) if y is not None: y = check_array(y, input_name="y", ensure_2d=False, dtype=np.float64) if y.ndim == 1: y = y.reshape(-1, 1) yr = (y - self._y_mean) / self._y_std y_scores = np.dot(yr, self.y_weights_) return x_scores, y_scores return x_scores def fit_transform(self, X, y=None): """Learn and apply the dimensionality reduction. Parameters ---------- X : array-like of shape (n_samples, n_features) Training samples. y : array-like of shape (n_samples,) or (n_samples, n_targets), \ default=None Targets. Returns ------- out : array-like or tuple of array-like The transformed data `X_transformed` if `Y is not None`, `(X_transformed, Y_transformed)` otherwise. """ return self.fit(X, y).transform(X, y)