Traktor/myenv/Lib/site-packages/sklearn/cross_decomposition/_pls.py
2024-05-23 01:57:24 +02:00

1154 lines
38 KiB
Python

"""
The :mod:`sklearn.pls` module implements Partial Least Squares (PLS).
"""
# Author: Edouard Duchesnay <edouard.duchesnay@cea.fr>
# License: BSD 3 clause
import warnings
from abc import ABCMeta, abstractmethod
from numbers import Integral, Real
import numpy as np
from scipy.linalg import svd
from ..base import (
BaseEstimator,
ClassNamePrefixFeaturesOutMixin,
MultiOutputMixin,
RegressorMixin,
TransformerMixin,
_fit_context,
)
from ..exceptions import ConvergenceWarning
from ..utils import check_array, check_consistent_length
from ..utils._param_validation import Interval, StrOptions
from ..utils.extmath import svd_flip
from ..utils.fixes import parse_version, sp_version
from ..utils.validation import FLOAT_DTYPES, check_is_fitted
__all__ = ["PLSCanonical", "PLSRegression", "PLSSVD"]
if sp_version >= parse_version("1.7"):
# Starting in scipy 1.7 pinv2 was deprecated in favor of pinv.
# pinv now uses the svd to compute the pseudo-inverse.
from scipy.linalg import pinv as pinv2
else:
from scipy.linalg import pinv2
def _pinv2_old(a):
# Used previous scipy pinv2 that was updated in:
# https://github.com/scipy/scipy/pull/10067
# We can not set `cond` or `rcond` for pinv2 in scipy >= 1.3 to keep the
# same behavior of pinv2 for scipy < 1.3, because the condition used to
# determine the rank is dependent on the output of svd.
u, s, vh = svd(a, full_matrices=False, check_finite=False)
t = u.dtype.char.lower()
factor = {"f": 1e3, "d": 1e6}
cond = np.max(s) * factor[t] * np.finfo(t).eps
rank = np.sum(s > cond)
u = u[:, :rank]
u /= s[:rank]
return np.transpose(np.conjugate(np.dot(u, vh[:rank])))
def _get_first_singular_vectors_power_method(
X, Y, mode="A", max_iter=500, tol=1e-06, norm_y_weights=False
):
"""Return the first left and right singular vectors of X'Y.
Provides an alternative to the svd(X'Y) and uses the power method instead.
With norm_y_weights to True and in mode A, this corresponds to the
algorithm section 11.3 of the Wegelin's review, except this starts at the
"update saliences" part.
"""
eps = np.finfo(X.dtype).eps
try:
y_score = next(col for col in Y.T if np.any(np.abs(col) > eps))
except StopIteration as e:
raise StopIteration("y residual is constant") from e
x_weights_old = 100 # init to big value for first convergence check
if mode == "B":
# Precompute pseudo inverse matrices
# Basically: X_pinv = (X.T X)^-1 X.T
# Which requires inverting a (n_features, n_features) matrix.
# As a result, and as detailed in the Wegelin's review, CCA (i.e. mode
# B) will be unstable if n_features > n_samples or n_targets >
# n_samples
X_pinv, Y_pinv = _pinv2_old(X), _pinv2_old(Y)
for i in range(max_iter):
if mode == "B":
x_weights = np.dot(X_pinv, y_score)
else:
x_weights = np.dot(X.T, y_score) / np.dot(y_score, y_score)
x_weights /= np.sqrt(np.dot(x_weights, x_weights)) + eps
x_score = np.dot(X, x_weights)
if mode == "B":
y_weights = np.dot(Y_pinv, x_score)
else:
y_weights = np.dot(Y.T, x_score) / np.dot(x_score.T, x_score)
if norm_y_weights:
y_weights /= np.sqrt(np.dot(y_weights, y_weights)) + eps
y_score = np.dot(Y, y_weights) / (np.dot(y_weights, y_weights) + eps)
x_weights_diff = x_weights - x_weights_old
if np.dot(x_weights_diff, x_weights_diff) < tol or Y.shape[1] == 1:
break
x_weights_old = x_weights
n_iter = i + 1
if n_iter == max_iter:
warnings.warn("Maximum number of iterations reached", ConvergenceWarning)
return x_weights, y_weights, n_iter
def _get_first_singular_vectors_svd(X, Y):
"""Return the first left and right singular vectors of X'Y.
Here the whole SVD is computed.
"""
C = np.dot(X.T, Y)
U, _, Vt = svd(C, full_matrices=False)
return U[:, 0], Vt[0, :]
def _center_scale_xy(X, Y, scale=True):
"""Center X, Y and scale if the scale parameter==True
Returns
-------
X, Y, x_mean, y_mean, x_std, y_std
"""
# center
x_mean = X.mean(axis=0)
X -= x_mean
y_mean = Y.mean(axis=0)
Y -= y_mean
# scale
if scale:
x_std = X.std(axis=0, ddof=1)
x_std[x_std == 0.0] = 1.0
X /= x_std
y_std = Y.std(axis=0, ddof=1)
y_std[y_std == 0.0] = 1.0
Y /= y_std
else:
x_std = np.ones(X.shape[1])
y_std = np.ones(Y.shape[1])
return X, Y, x_mean, y_mean, x_std, y_std
def _svd_flip_1d(u, v):
"""Same as svd_flip but works on 1d arrays, and is inplace"""
# svd_flip would force us to convert to 2d array and would also return 2d
# arrays. We don't want that.
biggest_abs_val_idx = np.argmax(np.abs(u))
sign = np.sign(u[biggest_abs_val_idx])
u *= sign
v *= sign
# TODO(1.7): Remove
def _deprecate_Y_when_optional(y, Y):
if Y is not None:
warnings.warn(
"`Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead.",
FutureWarning,
)
if y is not None:
raise ValueError(
"Cannot use both `y` and `Y`. Use only `y` as `Y` is deprecated."
)
return Y
return y
# TODO(1.7): Remove
def _deprecate_Y_when_required(y, Y):
if y is None and Y is None:
raise ValueError("y is required.")
return _deprecate_Y_when_optional(y, Y)
class _PLS(
ClassNamePrefixFeaturesOutMixin,
TransformerMixin,
RegressorMixin,
MultiOutputMixin,
BaseEstimator,
metaclass=ABCMeta,
):
"""Partial Least Squares (PLS)
This class implements the generic PLS algorithm.
Main ref: Wegelin, a survey of Partial Least Squares (PLS) methods,
with emphasis on the two-block case
https://stat.uw.edu/sites/default/files/files/reports/2000/tr371.pdf
"""
_parameter_constraints: dict = {
"n_components": [Interval(Integral, 1, None, closed="left")],
"scale": ["boolean"],
"deflation_mode": [StrOptions({"regression", "canonical"})],
"mode": [StrOptions({"A", "B"})],
"algorithm": [StrOptions({"svd", "nipals"})],
"max_iter": [Interval(Integral, 1, None, closed="left")],
"tol": [Interval(Real, 0, None, closed="left")],
"copy": ["boolean"],
}
@abstractmethod
def __init__(
self,
n_components=2,
*,
scale=True,
deflation_mode="regression",
mode="A",
algorithm="nipals",
max_iter=500,
tol=1e-06,
copy=True,
):
self.n_components = n_components
self.deflation_mode = deflation_mode
self.mode = mode
self.scale = scale
self.algorithm = algorithm
self.max_iter = max_iter
self.tol = tol
self.copy = copy
@_fit_context(prefer_skip_nested_validation=True)
def fit(self, X, y=None, Y=None):
"""Fit model to data.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Training vectors, where `n_samples` is the number of samples and
`n_features` is the number of predictors.
y : array-like of shape (n_samples,) or (n_samples, n_targets)
Target vectors, where `n_samples` is the number of samples and
`n_targets` is the number of response variables.
Y : array-like of shape (n_samples,) or (n_samples, n_targets)
Target vectors, where `n_samples` is the number of samples and
`n_targets` is the number of response variables.
.. deprecated:: 1.5
`Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead.
Returns
-------
self : object
Fitted model.
"""
y = _deprecate_Y_when_required(y, Y)
check_consistent_length(X, y)
X = self._validate_data(
X, dtype=np.float64, copy=self.copy, ensure_min_samples=2
)
y = check_array(
y, input_name="y", dtype=np.float64, copy=self.copy, ensure_2d=False
)
if y.ndim == 1:
self._predict_1d = True
y = y.reshape(-1, 1)
else:
self._predict_1d = False
n = X.shape[0]
p = X.shape[1]
q = y.shape[1]
n_components = self.n_components
# With PLSRegression n_components is bounded by the rank of (X.T X) see
# Wegelin page 25. With CCA and PLSCanonical, n_components is bounded
# by the rank of X and the rank of Y: see Wegelin page 12
rank_upper_bound = p if self.deflation_mode == "regression" else min(n, p, q)
if n_components > rank_upper_bound:
raise ValueError(
f"`n_components` upper bound is {rank_upper_bound}. "
f"Got {n_components} instead. Reduce `n_components`."
)
self._norm_y_weights = self.deflation_mode == "canonical" # 1.1
norm_y_weights = self._norm_y_weights
# Scale (in place)
Xk, yk, self._x_mean, self._y_mean, self._x_std, self._y_std = _center_scale_xy(
X, y, self.scale
)
self.x_weights_ = np.zeros((p, n_components)) # U
self.y_weights_ = np.zeros((q, n_components)) # V
self._x_scores = np.zeros((n, n_components)) # Xi
self._y_scores = np.zeros((n, n_components)) # Omega
self.x_loadings_ = np.zeros((p, n_components)) # Gamma
self.y_loadings_ = np.zeros((q, n_components)) # Delta
self.n_iter_ = []
# This whole thing corresponds to the algorithm in section 4.1 of the
# review from Wegelin. See above for a notation mapping from code to
# paper.
y_eps = np.finfo(yk.dtype).eps
for k in range(n_components):
# Find first left and right singular vectors of the X.T.dot(Y)
# cross-covariance matrix.
if self.algorithm == "nipals":
# Replace columns that are all close to zero with zeros
yk_mask = np.all(np.abs(yk) < 10 * y_eps, axis=0)
yk[:, yk_mask] = 0.0
try:
(
x_weights,
y_weights,
n_iter_,
) = _get_first_singular_vectors_power_method(
Xk,
yk,
mode=self.mode,
max_iter=self.max_iter,
tol=self.tol,
norm_y_weights=norm_y_weights,
)
except StopIteration as e:
if str(e) != "y residual is constant":
raise
warnings.warn(f"y residual is constant at iteration {k}")
break
self.n_iter_.append(n_iter_)
elif self.algorithm == "svd":
x_weights, y_weights = _get_first_singular_vectors_svd(Xk, yk)
# inplace sign flip for consistency across solvers and archs
_svd_flip_1d(x_weights, y_weights)
# compute scores, i.e. the projections of X and Y
x_scores = np.dot(Xk, x_weights)
if norm_y_weights:
y_ss = 1
else:
y_ss = np.dot(y_weights, y_weights)
y_scores = np.dot(yk, y_weights) / y_ss
# Deflation: subtract rank-one approx to obtain Xk+1 and Yk+1
x_loadings = np.dot(x_scores, Xk) / np.dot(x_scores, x_scores)
Xk -= np.outer(x_scores, x_loadings)
if self.deflation_mode == "canonical":
# regress Yk on y_score
y_loadings = np.dot(y_scores, yk) / np.dot(y_scores, y_scores)
yk -= np.outer(y_scores, y_loadings)
if self.deflation_mode == "regression":
# regress Yk on x_score
y_loadings = np.dot(x_scores, yk) / np.dot(x_scores, x_scores)
yk -= np.outer(x_scores, y_loadings)
self.x_weights_[:, k] = x_weights
self.y_weights_[:, k] = y_weights
self._x_scores[:, k] = x_scores
self._y_scores[:, k] = y_scores
self.x_loadings_[:, k] = x_loadings
self.y_loadings_[:, k] = y_loadings
# X was approximated as Xi . Gamma.T + X_(R+1)
# Xi . Gamma.T is a sum of n_components rank-1 matrices. X_(R+1) is
# whatever is left to fully reconstruct X, and can be 0 if X is of rank
# n_components.
# Similarly, y was approximated as Omega . Delta.T + y_(R+1)
# Compute transformation matrices (rotations_). See User Guide.
self.x_rotations_ = np.dot(
self.x_weights_,
pinv2(np.dot(self.x_loadings_.T, self.x_weights_), check_finite=False),
)
self.y_rotations_ = np.dot(
self.y_weights_,
pinv2(np.dot(self.y_loadings_.T, self.y_weights_), check_finite=False),
)
self.coef_ = np.dot(self.x_rotations_, self.y_loadings_.T)
self.coef_ = (self.coef_ * self._y_std).T / self._x_std
self.intercept_ = self._y_mean
self._n_features_out = self.x_rotations_.shape[1]
return self
def transform(self, X, y=None, Y=None, copy=True):
"""Apply the dimension reduction.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Samples to transform.
y : array-like of shape (n_samples, n_targets), default=None
Target vectors.
Y : array-like of shape (n_samples, n_targets), default=None
Target vectors.
.. deprecated:: 1.5
`Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead.
copy : bool, default=True
Whether to copy `X` and `Y`, or perform in-place normalization.
Returns
-------
x_scores, y_scores : array-like or tuple of array-like
Return `x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise.
"""
y = _deprecate_Y_when_optional(y, Y)
check_is_fitted(self)
X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES, reset=False)
# Normalize
X -= self._x_mean
X /= self._x_std
# Apply rotation
x_scores = np.dot(X, self.x_rotations_)
if y is not None:
y = check_array(
y, input_name="y", ensure_2d=False, copy=copy, dtype=FLOAT_DTYPES
)
if y.ndim == 1:
y = y.reshape(-1, 1)
y -= self._y_mean
y /= self._y_std
y_scores = np.dot(y, self.y_rotations_)
return x_scores, y_scores
return x_scores
def inverse_transform(self, X, y=None, Y=None):
"""Transform data back to its original space.
Parameters
----------
X : array-like of shape (n_samples, n_components)
New data, where `n_samples` is the number of samples
and `n_components` is the number of pls components.
y : array-like of shape (n_samples,) or (n_samples, n_components)
New target, where `n_samples` is the number of samples
and `n_components` is the number of pls components.
Y : array-like of shape (n_samples, n_components)
New target, where `n_samples` is the number of samples
and `n_components` is the number of pls components.
.. deprecated:: 1.5
`Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead.
Returns
-------
X_reconstructed : ndarray of shape (n_samples, n_features)
Return the reconstructed `X` data.
y_reconstructed : ndarray of shape (n_samples, n_targets)
Return the reconstructed `X` target. Only returned when `y` is given.
Notes
-----
This transformation will only be exact if `n_components=n_features`.
"""
y = _deprecate_Y_when_optional(y, Y)
check_is_fitted(self)
X = check_array(X, input_name="X", dtype=FLOAT_DTYPES)
# From pls space to original space
X_reconstructed = np.matmul(X, self.x_loadings_.T)
# Denormalize
X_reconstructed *= self._x_std
X_reconstructed += self._x_mean
if y is not None:
y = check_array(y, input_name="y", dtype=FLOAT_DTYPES)
# From pls space to original space
y_reconstructed = np.matmul(y, self.y_loadings_.T)
# Denormalize
y_reconstructed *= self._y_std
y_reconstructed += self._y_mean
return X_reconstructed, y_reconstructed
return X_reconstructed
def predict(self, X, copy=True):
"""Predict targets of given samples.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Samples.
copy : bool, default=True
Whether to copy `X` and `Y`, or perform in-place normalization.
Returns
-------
y_pred : ndarray of shape (n_samples,) or (n_samples, n_targets)
Returns predicted values.
Notes
-----
This call requires the estimation of a matrix of shape
`(n_features, n_targets)`, which may be an issue in high dimensional
space.
"""
check_is_fitted(self)
X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES, reset=False)
# Only center X but do not scale it since the coefficients are already scaled
X -= self._x_mean
Ypred = X @ self.coef_.T + self.intercept_
return Ypred.ravel() if self._predict_1d else Ypred
def fit_transform(self, X, y=None):
"""Learn and apply the dimension reduction on the train data.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Training vectors, where `n_samples` is the number of samples and
`n_features` is the number of predictors.
y : array-like of shape (n_samples, n_targets), default=None
Target vectors, where `n_samples` is the number of samples and
`n_targets` is the number of response variables.
Returns
-------
self : ndarray of shape (n_samples, n_components)
Return `x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise.
"""
return self.fit(X, y).transform(X, y)
def _more_tags(self):
return {"poor_score": True, "requires_y": False}
class PLSRegression(_PLS):
"""PLS regression.
PLSRegression is also known as PLS2 or PLS1, depending on the number of
targets.
For a comparison between other cross decomposition algorithms, see
:ref:`sphx_glr_auto_examples_cross_decomposition_plot_compare_cross_decomposition.py`.
Read more in the :ref:`User Guide <cross_decomposition>`.
.. versionadded:: 0.8
Parameters
----------
n_components : int, default=2
Number of components to keep. Should be in `[1, n_features]`.
scale : bool, default=True
Whether to scale `X` and `Y`.
max_iter : int, default=500
The maximum number of iterations of the power method when
`algorithm='nipals'`. Ignored otherwise.
tol : float, default=1e-06
The tolerance used as convergence criteria in the power method: the
algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less
than `tol`, where `u` corresponds to the left singular vector.
copy : bool, default=True
Whether to copy `X` and `Y` in :term:`fit` before applying centering,
and potentially scaling. If `False`, these operations will be done
inplace, modifying both arrays.
Attributes
----------
x_weights_ : ndarray of shape (n_features, n_components)
The left singular vectors of the cross-covariance matrices of each
iteration.
y_weights_ : ndarray of shape (n_targets, n_components)
The right singular vectors of the cross-covariance matrices of each
iteration.
x_loadings_ : ndarray of shape (n_features, n_components)
The loadings of `X`.
y_loadings_ : ndarray of shape (n_targets, n_components)
The loadings of `Y`.
x_scores_ : ndarray of shape (n_samples, n_components)
The transformed training samples.
y_scores_ : ndarray of shape (n_samples, n_components)
The transformed training targets.
x_rotations_ : ndarray of shape (n_features, n_components)
The projection matrix used to transform `X`.
y_rotations_ : ndarray of shape (n_targets, n_components)
The projection matrix used to transform `Y`.
coef_ : ndarray of shape (n_target, n_features)
The coefficients of the linear model such that `Y` is approximated as
`Y = X @ coef_.T + intercept_`.
intercept_ : ndarray of shape (n_targets,)
The intercepts of the linear model such that `Y` is approximated as
`Y = X @ coef_.T + intercept_`.
.. versionadded:: 1.1
n_iter_ : list of shape (n_components,)
Number of iterations of the power method, for each
component.
n_features_in_ : int
Number of features seen during :term:`fit`.
feature_names_in_ : ndarray of shape (`n_features_in_`,)
Names of features seen during :term:`fit`. Defined only when `X`
has feature names that are all strings.
.. versionadded:: 1.0
See Also
--------
PLSCanonical : Partial Least Squares transformer and regressor.
Examples
--------
>>> from sklearn.cross_decomposition import PLSRegression
>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
>>> y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
>>> pls2 = PLSRegression(n_components=2)
>>> pls2.fit(X, y)
PLSRegression()
>>> Y_pred = pls2.predict(X)
For a comparison between PLS Regression and :class:`~sklearn.decomposition.PCA`, see
:ref:`sphx_glr_auto_examples_cross_decomposition_plot_pcr_vs_pls.py`.
"""
_parameter_constraints: dict = {**_PLS._parameter_constraints}
for param in ("deflation_mode", "mode", "algorithm"):
_parameter_constraints.pop(param)
# This implementation provides the same results that 3 PLS packages
# provided in the R language (R-project):
# - "mixOmics" with function pls(X, Y, mode = "regression")
# - "plspm " with function plsreg2(X, Y)
# - "pls" with function oscorespls.fit(X, Y)
def __init__(
self, n_components=2, *, scale=True, max_iter=500, tol=1e-06, copy=True
):
super().__init__(
n_components=n_components,
scale=scale,
deflation_mode="regression",
mode="A",
algorithm="nipals",
max_iter=max_iter,
tol=tol,
copy=copy,
)
def fit(self, X, y=None, Y=None):
"""Fit model to data.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Training vectors, where `n_samples` is the number of samples and
`n_features` is the number of predictors.
y : array-like of shape (n_samples,) or (n_samples, n_targets)
Target vectors, where `n_samples` is the number of samples and
`n_targets` is the number of response variables.
Y : array-like of shape (n_samples,) or (n_samples, n_targets)
Target vectors, where `n_samples` is the number of samples and
`n_targets` is the number of response variables.
.. deprecated:: 1.5
`Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead.
Returns
-------
self : object
Fitted model.
"""
y = _deprecate_Y_when_required(y, Y)
super().fit(X, y)
# expose the fitted attributes `x_scores_` and `y_scores_`
self.x_scores_ = self._x_scores
self.y_scores_ = self._y_scores
return self
class PLSCanonical(_PLS):
"""Partial Least Squares transformer and regressor.
For a comparison between other cross decomposition algorithms, see
:ref:`sphx_glr_auto_examples_cross_decomposition_plot_compare_cross_decomposition.py`.
Read more in the :ref:`User Guide <cross_decomposition>`.
.. versionadded:: 0.8
Parameters
----------
n_components : int, default=2
Number of components to keep. Should be in `[1, min(n_samples,
n_features, n_targets)]`.
scale : bool, default=True
Whether to scale `X` and `Y`.
algorithm : {'nipals', 'svd'}, default='nipals'
The algorithm used to estimate the first singular vectors of the
cross-covariance matrix. 'nipals' uses the power method while 'svd'
will compute the whole SVD.
max_iter : int, default=500
The maximum number of iterations of the power method when
`algorithm='nipals'`. Ignored otherwise.
tol : float, default=1e-06
The tolerance used as convergence criteria in the power method: the
algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less
than `tol`, where `u` corresponds to the left singular vector.
copy : bool, default=True
Whether to copy `X` and `Y` in fit before applying centering, and
potentially scaling. If False, these operations will be done inplace,
modifying both arrays.
Attributes
----------
x_weights_ : ndarray of shape (n_features, n_components)
The left singular vectors of the cross-covariance matrices of each
iteration.
y_weights_ : ndarray of shape (n_targets, n_components)
The right singular vectors of the cross-covariance matrices of each
iteration.
x_loadings_ : ndarray of shape (n_features, n_components)
The loadings of `X`.
y_loadings_ : ndarray of shape (n_targets, n_components)
The loadings of `Y`.
x_rotations_ : ndarray of shape (n_features, n_components)
The projection matrix used to transform `X`.
y_rotations_ : ndarray of shape (n_targets, n_components)
The projection matrix used to transform `Y`.
coef_ : ndarray of shape (n_targets, n_features)
The coefficients of the linear model such that `Y` is approximated as
`Y = X @ coef_.T + intercept_`.
intercept_ : ndarray of shape (n_targets,)
The intercepts of the linear model such that `Y` is approximated as
`Y = X @ coef_.T + intercept_`.
.. versionadded:: 1.1
n_iter_ : list of shape (n_components,)
Number of iterations of the power method, for each
component. Empty if `algorithm='svd'`.
n_features_in_ : int
Number of features seen during :term:`fit`.
feature_names_in_ : ndarray of shape (`n_features_in_`,)
Names of features seen during :term:`fit`. Defined only when `X`
has feature names that are all strings.
.. versionadded:: 1.0
See Also
--------
CCA : Canonical Correlation Analysis.
PLSSVD : Partial Least Square SVD.
Examples
--------
>>> from sklearn.cross_decomposition import PLSCanonical
>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
>>> y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
>>> plsca = PLSCanonical(n_components=2)
>>> plsca.fit(X, y)
PLSCanonical()
>>> X_c, y_c = plsca.transform(X, y)
"""
_parameter_constraints: dict = {**_PLS._parameter_constraints}
for param in ("deflation_mode", "mode"):
_parameter_constraints.pop(param)
# This implementation provides the same results that the "plspm" package
# provided in the R language (R-project), using the function plsca(X, Y).
# Results are equal or collinear with the function
# ``pls(..., mode = "canonical")`` of the "mixOmics" package. The
# difference relies in the fact that mixOmics implementation does not
# exactly implement the Wold algorithm since it does not normalize
# y_weights to one.
def __init__(
self,
n_components=2,
*,
scale=True,
algorithm="nipals",
max_iter=500,
tol=1e-06,
copy=True,
):
super().__init__(
n_components=n_components,
scale=scale,
deflation_mode="canonical",
mode="A",
algorithm=algorithm,
max_iter=max_iter,
tol=tol,
copy=copy,
)
class CCA(_PLS):
"""Canonical Correlation Analysis, also known as "Mode B" PLS.
For a comparison between other cross decomposition algorithms, see
:ref:`sphx_glr_auto_examples_cross_decomposition_plot_compare_cross_decomposition.py`.
Read more in the :ref:`User Guide <cross_decomposition>`.
Parameters
----------
n_components : int, default=2
Number of components to keep. Should be in `[1, min(n_samples,
n_features, n_targets)]`.
scale : bool, default=True
Whether to scale `X` and `Y`.
max_iter : int, default=500
The maximum number of iterations of the power method.
tol : float, default=1e-06
The tolerance used as convergence criteria in the power method: the
algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less
than `tol`, where `u` corresponds to the left singular vector.
copy : bool, default=True
Whether to copy `X` and `Y` in fit before applying centering, and
potentially scaling. If False, these operations will be done inplace,
modifying both arrays.
Attributes
----------
x_weights_ : ndarray of shape (n_features, n_components)
The left singular vectors of the cross-covariance matrices of each
iteration.
y_weights_ : ndarray of shape (n_targets, n_components)
The right singular vectors of the cross-covariance matrices of each
iteration.
x_loadings_ : ndarray of shape (n_features, n_components)
The loadings of `X`.
y_loadings_ : ndarray of shape (n_targets, n_components)
The loadings of `Y`.
x_rotations_ : ndarray of shape (n_features, n_components)
The projection matrix used to transform `X`.
y_rotations_ : ndarray of shape (n_targets, n_components)
The projection matrix used to transform `Y`.
coef_ : ndarray of shape (n_targets, n_features)
The coefficients of the linear model such that `Y` is approximated as
`Y = X @ coef_.T + intercept_`.
intercept_ : ndarray of shape (n_targets,)
The intercepts of the linear model such that `Y` is approximated as
`Y = X @ coef_.T + intercept_`.
.. versionadded:: 1.1
n_iter_ : list of shape (n_components,)
Number of iterations of the power method, for each
component.
n_features_in_ : int
Number of features seen during :term:`fit`.
feature_names_in_ : ndarray of shape (`n_features_in_`,)
Names of features seen during :term:`fit`. Defined only when `X`
has feature names that are all strings.
.. versionadded:: 1.0
See Also
--------
PLSCanonical : Partial Least Squares transformer and regressor.
PLSSVD : Partial Least Square SVD.
Examples
--------
>>> from sklearn.cross_decomposition import CCA
>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]]
>>> y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
>>> cca = CCA(n_components=1)
>>> cca.fit(X, y)
CCA(n_components=1)
>>> X_c, Y_c = cca.transform(X, y)
"""
_parameter_constraints: dict = {**_PLS._parameter_constraints}
for param in ("deflation_mode", "mode", "algorithm"):
_parameter_constraints.pop(param)
def __init__(
self, n_components=2, *, scale=True, max_iter=500, tol=1e-06, copy=True
):
super().__init__(
n_components=n_components,
scale=scale,
deflation_mode="canonical",
mode="B",
algorithm="nipals",
max_iter=max_iter,
tol=tol,
copy=copy,
)
class PLSSVD(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
"""Partial Least Square SVD.
This transformer simply performs a SVD on the cross-covariance matrix
`X'Y`. It is able to project both the training data `X` and the targets
`Y`. The training data `X` is projected on the left singular vectors, while
the targets are projected on the right singular vectors.
Read more in the :ref:`User Guide <cross_decomposition>`.
.. versionadded:: 0.8
Parameters
----------
n_components : int, default=2
The number of components to keep. Should be in `[1,
min(n_samples, n_features, n_targets)]`.
scale : bool, default=True
Whether to scale `X` and `Y`.
copy : bool, default=True
Whether to copy `X` and `Y` in fit before applying centering, and
potentially scaling. If `False`, these operations will be done inplace,
modifying both arrays.
Attributes
----------
x_weights_ : ndarray of shape (n_features, n_components)
The left singular vectors of the SVD of the cross-covariance matrix.
Used to project `X` in :meth:`transform`.
y_weights_ : ndarray of (n_targets, n_components)
The right singular vectors of the SVD of the cross-covariance matrix.
Used to project `X` in :meth:`transform`.
n_features_in_ : int
Number of features seen during :term:`fit`.
feature_names_in_ : ndarray of shape (`n_features_in_`,)
Names of features seen during :term:`fit`. Defined only when `X`
has feature names that are all strings.
.. versionadded:: 1.0
See Also
--------
PLSCanonical : Partial Least Squares transformer and regressor.
CCA : Canonical Correlation Analysis.
Examples
--------
>>> import numpy as np
>>> from sklearn.cross_decomposition import PLSSVD
>>> X = np.array([[0., 0., 1.],
... [1., 0., 0.],
... [2., 2., 2.],
... [2., 5., 4.]])
>>> y = np.array([[0.1, -0.2],
... [0.9, 1.1],
... [6.2, 5.9],
... [11.9, 12.3]])
>>> pls = PLSSVD(n_components=2).fit(X, y)
>>> X_c, y_c = pls.transform(X, y)
>>> X_c.shape, y_c.shape
((4, 2), (4, 2))
"""
_parameter_constraints: dict = {
"n_components": [Interval(Integral, 1, None, closed="left")],
"scale": ["boolean"],
"copy": ["boolean"],
}
def __init__(self, n_components=2, *, scale=True, copy=True):
self.n_components = n_components
self.scale = scale
self.copy = copy
@_fit_context(prefer_skip_nested_validation=True)
def fit(self, X, y=None, Y=None):
"""Fit model to data.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Training samples.
y : array-like of shape (n_samples,) or (n_samples, n_targets)
Targets.
Y : array-like of shape (n_samples,) or (n_samples, n_targets)
Targets.
.. deprecated:: 1.5
`Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead.
Returns
-------
self : object
Fitted estimator.
"""
y = _deprecate_Y_when_required(y, Y)
check_consistent_length(X, y)
X = self._validate_data(
X, dtype=np.float64, copy=self.copy, ensure_min_samples=2
)
y = check_array(
y, input_name="y", dtype=np.float64, copy=self.copy, ensure_2d=False
)
if y.ndim == 1:
y = y.reshape(-1, 1)
# we'll compute the SVD of the cross-covariance matrix = X.T.dot(y)
# This matrix rank is at most min(n_samples, n_features, n_targets) so
# n_components cannot be bigger than that.
n_components = self.n_components
rank_upper_bound = min(X.shape[0], X.shape[1], y.shape[1])
if n_components > rank_upper_bound:
raise ValueError(
f"`n_components` upper bound is {rank_upper_bound}. "
f"Got {n_components} instead. Reduce `n_components`."
)
X, y, self._x_mean, self._y_mean, self._x_std, self._y_std = _center_scale_xy(
X, y, self.scale
)
# Compute SVD of cross-covariance matrix
C = np.dot(X.T, y)
U, s, Vt = svd(C, full_matrices=False)
U = U[:, :n_components]
Vt = Vt[:n_components]
U, Vt = svd_flip(U, Vt)
V = Vt.T
self.x_weights_ = U
self.y_weights_ = V
self._n_features_out = self.x_weights_.shape[1]
return self
def transform(self, X, y=None, Y=None):
"""
Apply the dimensionality reduction.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Samples to be transformed.
y : array-like of shape (n_samples,) or (n_samples, n_targets), \
default=None
Targets.
Y : array-like of shape (n_samples,) or (n_samples, n_targets), \
default=None
Targets.
.. deprecated:: 1.5
`Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead.
Returns
-------
x_scores : array-like or tuple of array-like
The transformed data `X_transformed` if `Y is not None`,
`(X_transformed, Y_transformed)` otherwise.
"""
y = _deprecate_Y_when_optional(y, Y)
check_is_fitted(self)
X = self._validate_data(X, dtype=np.float64, reset=False)
Xr = (X - self._x_mean) / self._x_std
x_scores = np.dot(Xr, self.x_weights_)
if y is not None:
y = check_array(y, input_name="y", ensure_2d=False, dtype=np.float64)
if y.ndim == 1:
y = y.reshape(-1, 1)
yr = (y - self._y_mean) / self._y_std
y_scores = np.dot(yr, self.y_weights_)
return x_scores, y_scores
return x_scores
def fit_transform(self, X, y=None):
"""Learn and apply the dimensionality reduction.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Training samples.
y : array-like of shape (n_samples,) or (n_samples, n_targets), \
default=None
Targets.
Returns
-------
out : array-like or tuple of array-like
The transformed data `X_transformed` if `Y is not None`,
`(X_transformed, Y_transformed)` otherwise.
"""
return self.fit(X, y).transform(X, y)