1154 lines
38 KiB
Python
1154 lines
38 KiB
Python
"""
|
|
The :mod:`sklearn.pls` module implements Partial Least Squares (PLS).
|
|
"""
|
|
|
|
# Author: Edouard Duchesnay <edouard.duchesnay@cea.fr>
|
|
# License: BSD 3 clause
|
|
|
|
import warnings
|
|
from abc import ABCMeta, abstractmethod
|
|
from numbers import Integral, Real
|
|
|
|
import numpy as np
|
|
from scipy.linalg import svd
|
|
|
|
from ..base import (
|
|
BaseEstimator,
|
|
ClassNamePrefixFeaturesOutMixin,
|
|
MultiOutputMixin,
|
|
RegressorMixin,
|
|
TransformerMixin,
|
|
_fit_context,
|
|
)
|
|
from ..exceptions import ConvergenceWarning
|
|
from ..utils import check_array, check_consistent_length
|
|
from ..utils._param_validation import Interval, StrOptions
|
|
from ..utils.extmath import svd_flip
|
|
from ..utils.fixes import parse_version, sp_version
|
|
from ..utils.validation import FLOAT_DTYPES, check_is_fitted
|
|
|
|
__all__ = ["PLSCanonical", "PLSRegression", "PLSSVD"]
|
|
|
|
|
|
if sp_version >= parse_version("1.7"):
|
|
# Starting in scipy 1.7 pinv2 was deprecated in favor of pinv.
|
|
# pinv now uses the svd to compute the pseudo-inverse.
|
|
from scipy.linalg import pinv as pinv2
|
|
else:
|
|
from scipy.linalg import pinv2
|
|
|
|
|
|
def _pinv2_old(a):
|
|
# Used previous scipy pinv2 that was updated in:
|
|
# https://github.com/scipy/scipy/pull/10067
|
|
# We can not set `cond` or `rcond` for pinv2 in scipy >= 1.3 to keep the
|
|
# same behavior of pinv2 for scipy < 1.3, because the condition used to
|
|
# determine the rank is dependent on the output of svd.
|
|
u, s, vh = svd(a, full_matrices=False, check_finite=False)
|
|
|
|
t = u.dtype.char.lower()
|
|
factor = {"f": 1e3, "d": 1e6}
|
|
cond = np.max(s) * factor[t] * np.finfo(t).eps
|
|
rank = np.sum(s > cond)
|
|
|
|
u = u[:, :rank]
|
|
u /= s[:rank]
|
|
return np.transpose(np.conjugate(np.dot(u, vh[:rank])))
|
|
|
|
|
|
def _get_first_singular_vectors_power_method(
|
|
X, Y, mode="A", max_iter=500, tol=1e-06, norm_y_weights=False
|
|
):
|
|
"""Return the first left and right singular vectors of X'Y.
|
|
|
|
Provides an alternative to the svd(X'Y) and uses the power method instead.
|
|
With norm_y_weights to True and in mode A, this corresponds to the
|
|
algorithm section 11.3 of the Wegelin's review, except this starts at the
|
|
"update saliences" part.
|
|
"""
|
|
|
|
eps = np.finfo(X.dtype).eps
|
|
try:
|
|
y_score = next(col for col in Y.T if np.any(np.abs(col) > eps))
|
|
except StopIteration as e:
|
|
raise StopIteration("y residual is constant") from e
|
|
|
|
x_weights_old = 100 # init to big value for first convergence check
|
|
|
|
if mode == "B":
|
|
# Precompute pseudo inverse matrices
|
|
# Basically: X_pinv = (X.T X)^-1 X.T
|
|
# Which requires inverting a (n_features, n_features) matrix.
|
|
# As a result, and as detailed in the Wegelin's review, CCA (i.e. mode
|
|
# B) will be unstable if n_features > n_samples or n_targets >
|
|
# n_samples
|
|
X_pinv, Y_pinv = _pinv2_old(X), _pinv2_old(Y)
|
|
|
|
for i in range(max_iter):
|
|
if mode == "B":
|
|
x_weights = np.dot(X_pinv, y_score)
|
|
else:
|
|
x_weights = np.dot(X.T, y_score) / np.dot(y_score, y_score)
|
|
|
|
x_weights /= np.sqrt(np.dot(x_weights, x_weights)) + eps
|
|
x_score = np.dot(X, x_weights)
|
|
|
|
if mode == "B":
|
|
y_weights = np.dot(Y_pinv, x_score)
|
|
else:
|
|
y_weights = np.dot(Y.T, x_score) / np.dot(x_score.T, x_score)
|
|
|
|
if norm_y_weights:
|
|
y_weights /= np.sqrt(np.dot(y_weights, y_weights)) + eps
|
|
|
|
y_score = np.dot(Y, y_weights) / (np.dot(y_weights, y_weights) + eps)
|
|
|
|
x_weights_diff = x_weights - x_weights_old
|
|
if np.dot(x_weights_diff, x_weights_diff) < tol or Y.shape[1] == 1:
|
|
break
|
|
x_weights_old = x_weights
|
|
|
|
n_iter = i + 1
|
|
if n_iter == max_iter:
|
|
warnings.warn("Maximum number of iterations reached", ConvergenceWarning)
|
|
|
|
return x_weights, y_weights, n_iter
|
|
|
|
|
|
def _get_first_singular_vectors_svd(X, Y):
|
|
"""Return the first left and right singular vectors of X'Y.
|
|
|
|
Here the whole SVD is computed.
|
|
"""
|
|
C = np.dot(X.T, Y)
|
|
U, _, Vt = svd(C, full_matrices=False)
|
|
return U[:, 0], Vt[0, :]
|
|
|
|
|
|
def _center_scale_xy(X, Y, scale=True):
|
|
"""Center X, Y and scale if the scale parameter==True
|
|
|
|
Returns
|
|
-------
|
|
X, Y, x_mean, y_mean, x_std, y_std
|
|
"""
|
|
# center
|
|
x_mean = X.mean(axis=0)
|
|
X -= x_mean
|
|
y_mean = Y.mean(axis=0)
|
|
Y -= y_mean
|
|
# scale
|
|
if scale:
|
|
x_std = X.std(axis=0, ddof=1)
|
|
x_std[x_std == 0.0] = 1.0
|
|
X /= x_std
|
|
y_std = Y.std(axis=0, ddof=1)
|
|
y_std[y_std == 0.0] = 1.0
|
|
Y /= y_std
|
|
else:
|
|
x_std = np.ones(X.shape[1])
|
|
y_std = np.ones(Y.shape[1])
|
|
return X, Y, x_mean, y_mean, x_std, y_std
|
|
|
|
|
|
def _svd_flip_1d(u, v):
|
|
"""Same as svd_flip but works on 1d arrays, and is inplace"""
|
|
# svd_flip would force us to convert to 2d array and would also return 2d
|
|
# arrays. We don't want that.
|
|
biggest_abs_val_idx = np.argmax(np.abs(u))
|
|
sign = np.sign(u[biggest_abs_val_idx])
|
|
u *= sign
|
|
v *= sign
|
|
|
|
|
|
# TODO(1.7): Remove
|
|
def _deprecate_Y_when_optional(y, Y):
|
|
if Y is not None:
|
|
warnings.warn(
|
|
"`Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead.",
|
|
FutureWarning,
|
|
)
|
|
if y is not None:
|
|
raise ValueError(
|
|
"Cannot use both `y` and `Y`. Use only `y` as `Y` is deprecated."
|
|
)
|
|
return Y
|
|
return y
|
|
|
|
|
|
# TODO(1.7): Remove
|
|
def _deprecate_Y_when_required(y, Y):
|
|
if y is None and Y is None:
|
|
raise ValueError("y is required.")
|
|
return _deprecate_Y_when_optional(y, Y)
|
|
|
|
|
|
class _PLS(
|
|
ClassNamePrefixFeaturesOutMixin,
|
|
TransformerMixin,
|
|
RegressorMixin,
|
|
MultiOutputMixin,
|
|
BaseEstimator,
|
|
metaclass=ABCMeta,
|
|
):
|
|
"""Partial Least Squares (PLS)
|
|
|
|
This class implements the generic PLS algorithm.
|
|
|
|
Main ref: Wegelin, a survey of Partial Least Squares (PLS) methods,
|
|
with emphasis on the two-block case
|
|
https://stat.uw.edu/sites/default/files/files/reports/2000/tr371.pdf
|
|
"""
|
|
|
|
_parameter_constraints: dict = {
|
|
"n_components": [Interval(Integral, 1, None, closed="left")],
|
|
"scale": ["boolean"],
|
|
"deflation_mode": [StrOptions({"regression", "canonical"})],
|
|
"mode": [StrOptions({"A", "B"})],
|
|
"algorithm": [StrOptions({"svd", "nipals"})],
|
|
"max_iter": [Interval(Integral, 1, None, closed="left")],
|
|
"tol": [Interval(Real, 0, None, closed="left")],
|
|
"copy": ["boolean"],
|
|
}
|
|
|
|
@abstractmethod
|
|
def __init__(
|
|
self,
|
|
n_components=2,
|
|
*,
|
|
scale=True,
|
|
deflation_mode="regression",
|
|
mode="A",
|
|
algorithm="nipals",
|
|
max_iter=500,
|
|
tol=1e-06,
|
|
copy=True,
|
|
):
|
|
self.n_components = n_components
|
|
self.deflation_mode = deflation_mode
|
|
self.mode = mode
|
|
self.scale = scale
|
|
self.algorithm = algorithm
|
|
self.max_iter = max_iter
|
|
self.tol = tol
|
|
self.copy = copy
|
|
|
|
@_fit_context(prefer_skip_nested_validation=True)
|
|
def fit(self, X, y=None, Y=None):
|
|
"""Fit model to data.
|
|
|
|
Parameters
|
|
----------
|
|
X : array-like of shape (n_samples, n_features)
|
|
Training vectors, where `n_samples` is the number of samples and
|
|
`n_features` is the number of predictors.
|
|
|
|
y : array-like of shape (n_samples,) or (n_samples, n_targets)
|
|
Target vectors, where `n_samples` is the number of samples and
|
|
`n_targets` is the number of response variables.
|
|
|
|
Y : array-like of shape (n_samples,) or (n_samples, n_targets)
|
|
Target vectors, where `n_samples` is the number of samples and
|
|
`n_targets` is the number of response variables.
|
|
|
|
.. deprecated:: 1.5
|
|
`Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead.
|
|
|
|
Returns
|
|
-------
|
|
self : object
|
|
Fitted model.
|
|
"""
|
|
y = _deprecate_Y_when_required(y, Y)
|
|
|
|
check_consistent_length(X, y)
|
|
X = self._validate_data(
|
|
X, dtype=np.float64, copy=self.copy, ensure_min_samples=2
|
|
)
|
|
y = check_array(
|
|
y, input_name="y", dtype=np.float64, copy=self.copy, ensure_2d=False
|
|
)
|
|
if y.ndim == 1:
|
|
self._predict_1d = True
|
|
y = y.reshape(-1, 1)
|
|
else:
|
|
self._predict_1d = False
|
|
|
|
n = X.shape[0]
|
|
p = X.shape[1]
|
|
q = y.shape[1]
|
|
|
|
n_components = self.n_components
|
|
# With PLSRegression n_components is bounded by the rank of (X.T X) see
|
|
# Wegelin page 25. With CCA and PLSCanonical, n_components is bounded
|
|
# by the rank of X and the rank of Y: see Wegelin page 12
|
|
rank_upper_bound = p if self.deflation_mode == "regression" else min(n, p, q)
|
|
if n_components > rank_upper_bound:
|
|
raise ValueError(
|
|
f"`n_components` upper bound is {rank_upper_bound}. "
|
|
f"Got {n_components} instead. Reduce `n_components`."
|
|
)
|
|
|
|
self._norm_y_weights = self.deflation_mode == "canonical" # 1.1
|
|
norm_y_weights = self._norm_y_weights
|
|
|
|
# Scale (in place)
|
|
Xk, yk, self._x_mean, self._y_mean, self._x_std, self._y_std = _center_scale_xy(
|
|
X, y, self.scale
|
|
)
|
|
|
|
self.x_weights_ = np.zeros((p, n_components)) # U
|
|
self.y_weights_ = np.zeros((q, n_components)) # V
|
|
self._x_scores = np.zeros((n, n_components)) # Xi
|
|
self._y_scores = np.zeros((n, n_components)) # Omega
|
|
self.x_loadings_ = np.zeros((p, n_components)) # Gamma
|
|
self.y_loadings_ = np.zeros((q, n_components)) # Delta
|
|
self.n_iter_ = []
|
|
|
|
# This whole thing corresponds to the algorithm in section 4.1 of the
|
|
# review from Wegelin. See above for a notation mapping from code to
|
|
# paper.
|
|
y_eps = np.finfo(yk.dtype).eps
|
|
for k in range(n_components):
|
|
# Find first left and right singular vectors of the X.T.dot(Y)
|
|
# cross-covariance matrix.
|
|
if self.algorithm == "nipals":
|
|
# Replace columns that are all close to zero with zeros
|
|
yk_mask = np.all(np.abs(yk) < 10 * y_eps, axis=0)
|
|
yk[:, yk_mask] = 0.0
|
|
|
|
try:
|
|
(
|
|
x_weights,
|
|
y_weights,
|
|
n_iter_,
|
|
) = _get_first_singular_vectors_power_method(
|
|
Xk,
|
|
yk,
|
|
mode=self.mode,
|
|
max_iter=self.max_iter,
|
|
tol=self.tol,
|
|
norm_y_weights=norm_y_weights,
|
|
)
|
|
except StopIteration as e:
|
|
if str(e) != "y residual is constant":
|
|
raise
|
|
warnings.warn(f"y residual is constant at iteration {k}")
|
|
break
|
|
|
|
self.n_iter_.append(n_iter_)
|
|
|
|
elif self.algorithm == "svd":
|
|
x_weights, y_weights = _get_first_singular_vectors_svd(Xk, yk)
|
|
|
|
# inplace sign flip for consistency across solvers and archs
|
|
_svd_flip_1d(x_weights, y_weights)
|
|
|
|
# compute scores, i.e. the projections of X and Y
|
|
x_scores = np.dot(Xk, x_weights)
|
|
if norm_y_weights:
|
|
y_ss = 1
|
|
else:
|
|
y_ss = np.dot(y_weights, y_weights)
|
|
y_scores = np.dot(yk, y_weights) / y_ss
|
|
|
|
# Deflation: subtract rank-one approx to obtain Xk+1 and Yk+1
|
|
x_loadings = np.dot(x_scores, Xk) / np.dot(x_scores, x_scores)
|
|
Xk -= np.outer(x_scores, x_loadings)
|
|
|
|
if self.deflation_mode == "canonical":
|
|
# regress Yk on y_score
|
|
y_loadings = np.dot(y_scores, yk) / np.dot(y_scores, y_scores)
|
|
yk -= np.outer(y_scores, y_loadings)
|
|
if self.deflation_mode == "regression":
|
|
# regress Yk on x_score
|
|
y_loadings = np.dot(x_scores, yk) / np.dot(x_scores, x_scores)
|
|
yk -= np.outer(x_scores, y_loadings)
|
|
|
|
self.x_weights_[:, k] = x_weights
|
|
self.y_weights_[:, k] = y_weights
|
|
self._x_scores[:, k] = x_scores
|
|
self._y_scores[:, k] = y_scores
|
|
self.x_loadings_[:, k] = x_loadings
|
|
self.y_loadings_[:, k] = y_loadings
|
|
|
|
# X was approximated as Xi . Gamma.T + X_(R+1)
|
|
# Xi . Gamma.T is a sum of n_components rank-1 matrices. X_(R+1) is
|
|
# whatever is left to fully reconstruct X, and can be 0 if X is of rank
|
|
# n_components.
|
|
# Similarly, y was approximated as Omega . Delta.T + y_(R+1)
|
|
|
|
# Compute transformation matrices (rotations_). See User Guide.
|
|
self.x_rotations_ = np.dot(
|
|
self.x_weights_,
|
|
pinv2(np.dot(self.x_loadings_.T, self.x_weights_), check_finite=False),
|
|
)
|
|
self.y_rotations_ = np.dot(
|
|
self.y_weights_,
|
|
pinv2(np.dot(self.y_loadings_.T, self.y_weights_), check_finite=False),
|
|
)
|
|
self.coef_ = np.dot(self.x_rotations_, self.y_loadings_.T)
|
|
self.coef_ = (self.coef_ * self._y_std).T / self._x_std
|
|
self.intercept_ = self._y_mean
|
|
self._n_features_out = self.x_rotations_.shape[1]
|
|
return self
|
|
|
|
def transform(self, X, y=None, Y=None, copy=True):
|
|
"""Apply the dimension reduction.
|
|
|
|
Parameters
|
|
----------
|
|
X : array-like of shape (n_samples, n_features)
|
|
Samples to transform.
|
|
|
|
y : array-like of shape (n_samples, n_targets), default=None
|
|
Target vectors.
|
|
|
|
Y : array-like of shape (n_samples, n_targets), default=None
|
|
Target vectors.
|
|
|
|
.. deprecated:: 1.5
|
|
`Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead.
|
|
|
|
copy : bool, default=True
|
|
Whether to copy `X` and `Y`, or perform in-place normalization.
|
|
|
|
Returns
|
|
-------
|
|
x_scores, y_scores : array-like or tuple of array-like
|
|
Return `x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise.
|
|
"""
|
|
y = _deprecate_Y_when_optional(y, Y)
|
|
|
|
check_is_fitted(self)
|
|
X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES, reset=False)
|
|
# Normalize
|
|
X -= self._x_mean
|
|
X /= self._x_std
|
|
# Apply rotation
|
|
x_scores = np.dot(X, self.x_rotations_)
|
|
if y is not None:
|
|
y = check_array(
|
|
y, input_name="y", ensure_2d=False, copy=copy, dtype=FLOAT_DTYPES
|
|
)
|
|
if y.ndim == 1:
|
|
y = y.reshape(-1, 1)
|
|
y -= self._y_mean
|
|
y /= self._y_std
|
|
y_scores = np.dot(y, self.y_rotations_)
|
|
return x_scores, y_scores
|
|
|
|
return x_scores
|
|
|
|
def inverse_transform(self, X, y=None, Y=None):
|
|
"""Transform data back to its original space.
|
|
|
|
Parameters
|
|
----------
|
|
X : array-like of shape (n_samples, n_components)
|
|
New data, where `n_samples` is the number of samples
|
|
and `n_components` is the number of pls components.
|
|
|
|
y : array-like of shape (n_samples,) or (n_samples, n_components)
|
|
New target, where `n_samples` is the number of samples
|
|
and `n_components` is the number of pls components.
|
|
|
|
Y : array-like of shape (n_samples, n_components)
|
|
New target, where `n_samples` is the number of samples
|
|
and `n_components` is the number of pls components.
|
|
|
|
.. deprecated:: 1.5
|
|
`Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead.
|
|
|
|
Returns
|
|
-------
|
|
X_reconstructed : ndarray of shape (n_samples, n_features)
|
|
Return the reconstructed `X` data.
|
|
|
|
y_reconstructed : ndarray of shape (n_samples, n_targets)
|
|
Return the reconstructed `X` target. Only returned when `y` is given.
|
|
|
|
Notes
|
|
-----
|
|
This transformation will only be exact if `n_components=n_features`.
|
|
"""
|
|
y = _deprecate_Y_when_optional(y, Y)
|
|
|
|
check_is_fitted(self)
|
|
X = check_array(X, input_name="X", dtype=FLOAT_DTYPES)
|
|
# From pls space to original space
|
|
X_reconstructed = np.matmul(X, self.x_loadings_.T)
|
|
# Denormalize
|
|
X_reconstructed *= self._x_std
|
|
X_reconstructed += self._x_mean
|
|
|
|
if y is not None:
|
|
y = check_array(y, input_name="y", dtype=FLOAT_DTYPES)
|
|
# From pls space to original space
|
|
y_reconstructed = np.matmul(y, self.y_loadings_.T)
|
|
# Denormalize
|
|
y_reconstructed *= self._y_std
|
|
y_reconstructed += self._y_mean
|
|
return X_reconstructed, y_reconstructed
|
|
|
|
return X_reconstructed
|
|
|
|
def predict(self, X, copy=True):
|
|
"""Predict targets of given samples.
|
|
|
|
Parameters
|
|
----------
|
|
X : array-like of shape (n_samples, n_features)
|
|
Samples.
|
|
|
|
copy : bool, default=True
|
|
Whether to copy `X` and `Y`, or perform in-place normalization.
|
|
|
|
Returns
|
|
-------
|
|
y_pred : ndarray of shape (n_samples,) or (n_samples, n_targets)
|
|
Returns predicted values.
|
|
|
|
Notes
|
|
-----
|
|
This call requires the estimation of a matrix of shape
|
|
`(n_features, n_targets)`, which may be an issue in high dimensional
|
|
space.
|
|
"""
|
|
check_is_fitted(self)
|
|
X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES, reset=False)
|
|
# Only center X but do not scale it since the coefficients are already scaled
|
|
X -= self._x_mean
|
|
Ypred = X @ self.coef_.T + self.intercept_
|
|
return Ypred.ravel() if self._predict_1d else Ypred
|
|
|
|
def fit_transform(self, X, y=None):
|
|
"""Learn and apply the dimension reduction on the train data.
|
|
|
|
Parameters
|
|
----------
|
|
X : array-like of shape (n_samples, n_features)
|
|
Training vectors, where `n_samples` is the number of samples and
|
|
`n_features` is the number of predictors.
|
|
|
|
y : array-like of shape (n_samples, n_targets), default=None
|
|
Target vectors, where `n_samples` is the number of samples and
|
|
`n_targets` is the number of response variables.
|
|
|
|
Returns
|
|
-------
|
|
self : ndarray of shape (n_samples, n_components)
|
|
Return `x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise.
|
|
"""
|
|
return self.fit(X, y).transform(X, y)
|
|
|
|
def _more_tags(self):
|
|
return {"poor_score": True, "requires_y": False}
|
|
|
|
|
|
class PLSRegression(_PLS):
|
|
"""PLS regression.
|
|
|
|
PLSRegression is also known as PLS2 or PLS1, depending on the number of
|
|
targets.
|
|
|
|
For a comparison between other cross decomposition algorithms, see
|
|
:ref:`sphx_glr_auto_examples_cross_decomposition_plot_compare_cross_decomposition.py`.
|
|
|
|
Read more in the :ref:`User Guide <cross_decomposition>`.
|
|
|
|
.. versionadded:: 0.8
|
|
|
|
Parameters
|
|
----------
|
|
n_components : int, default=2
|
|
Number of components to keep. Should be in `[1, n_features]`.
|
|
|
|
scale : bool, default=True
|
|
Whether to scale `X` and `Y`.
|
|
|
|
max_iter : int, default=500
|
|
The maximum number of iterations of the power method when
|
|
`algorithm='nipals'`. Ignored otherwise.
|
|
|
|
tol : float, default=1e-06
|
|
The tolerance used as convergence criteria in the power method: the
|
|
algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less
|
|
than `tol`, where `u` corresponds to the left singular vector.
|
|
|
|
copy : bool, default=True
|
|
Whether to copy `X` and `Y` in :term:`fit` before applying centering,
|
|
and potentially scaling. If `False`, these operations will be done
|
|
inplace, modifying both arrays.
|
|
|
|
Attributes
|
|
----------
|
|
x_weights_ : ndarray of shape (n_features, n_components)
|
|
The left singular vectors of the cross-covariance matrices of each
|
|
iteration.
|
|
|
|
y_weights_ : ndarray of shape (n_targets, n_components)
|
|
The right singular vectors of the cross-covariance matrices of each
|
|
iteration.
|
|
|
|
x_loadings_ : ndarray of shape (n_features, n_components)
|
|
The loadings of `X`.
|
|
|
|
y_loadings_ : ndarray of shape (n_targets, n_components)
|
|
The loadings of `Y`.
|
|
|
|
x_scores_ : ndarray of shape (n_samples, n_components)
|
|
The transformed training samples.
|
|
|
|
y_scores_ : ndarray of shape (n_samples, n_components)
|
|
The transformed training targets.
|
|
|
|
x_rotations_ : ndarray of shape (n_features, n_components)
|
|
The projection matrix used to transform `X`.
|
|
|
|
y_rotations_ : ndarray of shape (n_targets, n_components)
|
|
The projection matrix used to transform `Y`.
|
|
|
|
coef_ : ndarray of shape (n_target, n_features)
|
|
The coefficients of the linear model such that `Y` is approximated as
|
|
`Y = X @ coef_.T + intercept_`.
|
|
|
|
intercept_ : ndarray of shape (n_targets,)
|
|
The intercepts of the linear model such that `Y` is approximated as
|
|
`Y = X @ coef_.T + intercept_`.
|
|
|
|
.. versionadded:: 1.1
|
|
|
|
n_iter_ : list of shape (n_components,)
|
|
Number of iterations of the power method, for each
|
|
component.
|
|
|
|
n_features_in_ : int
|
|
Number of features seen during :term:`fit`.
|
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
Names of features seen during :term:`fit`. Defined only when `X`
|
|
has feature names that are all strings.
|
|
|
|
.. versionadded:: 1.0
|
|
|
|
See Also
|
|
--------
|
|
PLSCanonical : Partial Least Squares transformer and regressor.
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.cross_decomposition import PLSRegression
|
|
>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
|
|
>>> y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
|
|
>>> pls2 = PLSRegression(n_components=2)
|
|
>>> pls2.fit(X, y)
|
|
PLSRegression()
|
|
>>> Y_pred = pls2.predict(X)
|
|
|
|
For a comparison between PLS Regression and :class:`~sklearn.decomposition.PCA`, see
|
|
:ref:`sphx_glr_auto_examples_cross_decomposition_plot_pcr_vs_pls.py`.
|
|
"""
|
|
|
|
_parameter_constraints: dict = {**_PLS._parameter_constraints}
|
|
for param in ("deflation_mode", "mode", "algorithm"):
|
|
_parameter_constraints.pop(param)
|
|
|
|
# This implementation provides the same results that 3 PLS packages
|
|
# provided in the R language (R-project):
|
|
# - "mixOmics" with function pls(X, Y, mode = "regression")
|
|
# - "plspm " with function plsreg2(X, Y)
|
|
# - "pls" with function oscorespls.fit(X, Y)
|
|
|
|
def __init__(
|
|
self, n_components=2, *, scale=True, max_iter=500, tol=1e-06, copy=True
|
|
):
|
|
super().__init__(
|
|
n_components=n_components,
|
|
scale=scale,
|
|
deflation_mode="regression",
|
|
mode="A",
|
|
algorithm="nipals",
|
|
max_iter=max_iter,
|
|
tol=tol,
|
|
copy=copy,
|
|
)
|
|
|
|
def fit(self, X, y=None, Y=None):
|
|
"""Fit model to data.
|
|
|
|
Parameters
|
|
----------
|
|
X : array-like of shape (n_samples, n_features)
|
|
Training vectors, where `n_samples` is the number of samples and
|
|
`n_features` is the number of predictors.
|
|
|
|
y : array-like of shape (n_samples,) or (n_samples, n_targets)
|
|
Target vectors, where `n_samples` is the number of samples and
|
|
`n_targets` is the number of response variables.
|
|
|
|
Y : array-like of shape (n_samples,) or (n_samples, n_targets)
|
|
Target vectors, where `n_samples` is the number of samples and
|
|
`n_targets` is the number of response variables.
|
|
|
|
.. deprecated:: 1.5
|
|
`Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead.
|
|
|
|
Returns
|
|
-------
|
|
self : object
|
|
Fitted model.
|
|
"""
|
|
y = _deprecate_Y_when_required(y, Y)
|
|
|
|
super().fit(X, y)
|
|
# expose the fitted attributes `x_scores_` and `y_scores_`
|
|
self.x_scores_ = self._x_scores
|
|
self.y_scores_ = self._y_scores
|
|
return self
|
|
|
|
|
|
class PLSCanonical(_PLS):
|
|
"""Partial Least Squares transformer and regressor.
|
|
|
|
For a comparison between other cross decomposition algorithms, see
|
|
:ref:`sphx_glr_auto_examples_cross_decomposition_plot_compare_cross_decomposition.py`.
|
|
|
|
Read more in the :ref:`User Guide <cross_decomposition>`.
|
|
|
|
.. versionadded:: 0.8
|
|
|
|
Parameters
|
|
----------
|
|
n_components : int, default=2
|
|
Number of components to keep. Should be in `[1, min(n_samples,
|
|
n_features, n_targets)]`.
|
|
|
|
scale : bool, default=True
|
|
Whether to scale `X` and `Y`.
|
|
|
|
algorithm : {'nipals', 'svd'}, default='nipals'
|
|
The algorithm used to estimate the first singular vectors of the
|
|
cross-covariance matrix. 'nipals' uses the power method while 'svd'
|
|
will compute the whole SVD.
|
|
|
|
max_iter : int, default=500
|
|
The maximum number of iterations of the power method when
|
|
`algorithm='nipals'`. Ignored otherwise.
|
|
|
|
tol : float, default=1e-06
|
|
The tolerance used as convergence criteria in the power method: the
|
|
algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less
|
|
than `tol`, where `u` corresponds to the left singular vector.
|
|
|
|
copy : bool, default=True
|
|
Whether to copy `X` and `Y` in fit before applying centering, and
|
|
potentially scaling. If False, these operations will be done inplace,
|
|
modifying both arrays.
|
|
|
|
Attributes
|
|
----------
|
|
x_weights_ : ndarray of shape (n_features, n_components)
|
|
The left singular vectors of the cross-covariance matrices of each
|
|
iteration.
|
|
|
|
y_weights_ : ndarray of shape (n_targets, n_components)
|
|
The right singular vectors of the cross-covariance matrices of each
|
|
iteration.
|
|
|
|
x_loadings_ : ndarray of shape (n_features, n_components)
|
|
The loadings of `X`.
|
|
|
|
y_loadings_ : ndarray of shape (n_targets, n_components)
|
|
The loadings of `Y`.
|
|
|
|
x_rotations_ : ndarray of shape (n_features, n_components)
|
|
The projection matrix used to transform `X`.
|
|
|
|
y_rotations_ : ndarray of shape (n_targets, n_components)
|
|
The projection matrix used to transform `Y`.
|
|
|
|
coef_ : ndarray of shape (n_targets, n_features)
|
|
The coefficients of the linear model such that `Y` is approximated as
|
|
`Y = X @ coef_.T + intercept_`.
|
|
|
|
intercept_ : ndarray of shape (n_targets,)
|
|
The intercepts of the linear model such that `Y` is approximated as
|
|
`Y = X @ coef_.T + intercept_`.
|
|
|
|
.. versionadded:: 1.1
|
|
|
|
n_iter_ : list of shape (n_components,)
|
|
Number of iterations of the power method, for each
|
|
component. Empty if `algorithm='svd'`.
|
|
|
|
n_features_in_ : int
|
|
Number of features seen during :term:`fit`.
|
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
Names of features seen during :term:`fit`. Defined only when `X`
|
|
has feature names that are all strings.
|
|
|
|
.. versionadded:: 1.0
|
|
|
|
See Also
|
|
--------
|
|
CCA : Canonical Correlation Analysis.
|
|
PLSSVD : Partial Least Square SVD.
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.cross_decomposition import PLSCanonical
|
|
>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
|
|
>>> y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
|
|
>>> plsca = PLSCanonical(n_components=2)
|
|
>>> plsca.fit(X, y)
|
|
PLSCanonical()
|
|
>>> X_c, y_c = plsca.transform(X, y)
|
|
"""
|
|
|
|
_parameter_constraints: dict = {**_PLS._parameter_constraints}
|
|
for param in ("deflation_mode", "mode"):
|
|
_parameter_constraints.pop(param)
|
|
|
|
# This implementation provides the same results that the "plspm" package
|
|
# provided in the R language (R-project), using the function plsca(X, Y).
|
|
# Results are equal or collinear with the function
|
|
# ``pls(..., mode = "canonical")`` of the "mixOmics" package. The
|
|
# difference relies in the fact that mixOmics implementation does not
|
|
# exactly implement the Wold algorithm since it does not normalize
|
|
# y_weights to one.
|
|
|
|
def __init__(
|
|
self,
|
|
n_components=2,
|
|
*,
|
|
scale=True,
|
|
algorithm="nipals",
|
|
max_iter=500,
|
|
tol=1e-06,
|
|
copy=True,
|
|
):
|
|
super().__init__(
|
|
n_components=n_components,
|
|
scale=scale,
|
|
deflation_mode="canonical",
|
|
mode="A",
|
|
algorithm=algorithm,
|
|
max_iter=max_iter,
|
|
tol=tol,
|
|
copy=copy,
|
|
)
|
|
|
|
|
|
class CCA(_PLS):
|
|
"""Canonical Correlation Analysis, also known as "Mode B" PLS.
|
|
|
|
For a comparison between other cross decomposition algorithms, see
|
|
:ref:`sphx_glr_auto_examples_cross_decomposition_plot_compare_cross_decomposition.py`.
|
|
|
|
Read more in the :ref:`User Guide <cross_decomposition>`.
|
|
|
|
Parameters
|
|
----------
|
|
n_components : int, default=2
|
|
Number of components to keep. Should be in `[1, min(n_samples,
|
|
n_features, n_targets)]`.
|
|
|
|
scale : bool, default=True
|
|
Whether to scale `X` and `Y`.
|
|
|
|
max_iter : int, default=500
|
|
The maximum number of iterations of the power method.
|
|
|
|
tol : float, default=1e-06
|
|
The tolerance used as convergence criteria in the power method: the
|
|
algorithm stops whenever the squared norm of `u_i - u_{i-1}` is less
|
|
than `tol`, where `u` corresponds to the left singular vector.
|
|
|
|
copy : bool, default=True
|
|
Whether to copy `X` and `Y` in fit before applying centering, and
|
|
potentially scaling. If False, these operations will be done inplace,
|
|
modifying both arrays.
|
|
|
|
Attributes
|
|
----------
|
|
x_weights_ : ndarray of shape (n_features, n_components)
|
|
The left singular vectors of the cross-covariance matrices of each
|
|
iteration.
|
|
|
|
y_weights_ : ndarray of shape (n_targets, n_components)
|
|
The right singular vectors of the cross-covariance matrices of each
|
|
iteration.
|
|
|
|
x_loadings_ : ndarray of shape (n_features, n_components)
|
|
The loadings of `X`.
|
|
|
|
y_loadings_ : ndarray of shape (n_targets, n_components)
|
|
The loadings of `Y`.
|
|
|
|
x_rotations_ : ndarray of shape (n_features, n_components)
|
|
The projection matrix used to transform `X`.
|
|
|
|
y_rotations_ : ndarray of shape (n_targets, n_components)
|
|
The projection matrix used to transform `Y`.
|
|
|
|
coef_ : ndarray of shape (n_targets, n_features)
|
|
The coefficients of the linear model such that `Y` is approximated as
|
|
`Y = X @ coef_.T + intercept_`.
|
|
|
|
intercept_ : ndarray of shape (n_targets,)
|
|
The intercepts of the linear model such that `Y` is approximated as
|
|
`Y = X @ coef_.T + intercept_`.
|
|
|
|
.. versionadded:: 1.1
|
|
|
|
n_iter_ : list of shape (n_components,)
|
|
Number of iterations of the power method, for each
|
|
component.
|
|
|
|
n_features_in_ : int
|
|
Number of features seen during :term:`fit`.
|
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
Names of features seen during :term:`fit`. Defined only when `X`
|
|
has feature names that are all strings.
|
|
|
|
.. versionadded:: 1.0
|
|
|
|
See Also
|
|
--------
|
|
PLSCanonical : Partial Least Squares transformer and regressor.
|
|
PLSSVD : Partial Least Square SVD.
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.cross_decomposition import CCA
|
|
>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]]
|
|
>>> y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
|
|
>>> cca = CCA(n_components=1)
|
|
>>> cca.fit(X, y)
|
|
CCA(n_components=1)
|
|
>>> X_c, Y_c = cca.transform(X, y)
|
|
"""
|
|
|
|
_parameter_constraints: dict = {**_PLS._parameter_constraints}
|
|
for param in ("deflation_mode", "mode", "algorithm"):
|
|
_parameter_constraints.pop(param)
|
|
|
|
def __init__(
|
|
self, n_components=2, *, scale=True, max_iter=500, tol=1e-06, copy=True
|
|
):
|
|
super().__init__(
|
|
n_components=n_components,
|
|
scale=scale,
|
|
deflation_mode="canonical",
|
|
mode="B",
|
|
algorithm="nipals",
|
|
max_iter=max_iter,
|
|
tol=tol,
|
|
copy=copy,
|
|
)
|
|
|
|
|
|
class PLSSVD(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
|
|
"""Partial Least Square SVD.
|
|
|
|
This transformer simply performs a SVD on the cross-covariance matrix
|
|
`X'Y`. It is able to project both the training data `X` and the targets
|
|
`Y`. The training data `X` is projected on the left singular vectors, while
|
|
the targets are projected on the right singular vectors.
|
|
|
|
Read more in the :ref:`User Guide <cross_decomposition>`.
|
|
|
|
.. versionadded:: 0.8
|
|
|
|
Parameters
|
|
----------
|
|
n_components : int, default=2
|
|
The number of components to keep. Should be in `[1,
|
|
min(n_samples, n_features, n_targets)]`.
|
|
|
|
scale : bool, default=True
|
|
Whether to scale `X` and `Y`.
|
|
|
|
copy : bool, default=True
|
|
Whether to copy `X` and `Y` in fit before applying centering, and
|
|
potentially scaling. If `False`, these operations will be done inplace,
|
|
modifying both arrays.
|
|
|
|
Attributes
|
|
----------
|
|
x_weights_ : ndarray of shape (n_features, n_components)
|
|
The left singular vectors of the SVD of the cross-covariance matrix.
|
|
Used to project `X` in :meth:`transform`.
|
|
|
|
y_weights_ : ndarray of (n_targets, n_components)
|
|
The right singular vectors of the SVD of the cross-covariance matrix.
|
|
Used to project `X` in :meth:`transform`.
|
|
|
|
n_features_in_ : int
|
|
Number of features seen during :term:`fit`.
|
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
Names of features seen during :term:`fit`. Defined only when `X`
|
|
has feature names that are all strings.
|
|
|
|
.. versionadded:: 1.0
|
|
|
|
See Also
|
|
--------
|
|
PLSCanonical : Partial Least Squares transformer and regressor.
|
|
CCA : Canonical Correlation Analysis.
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> from sklearn.cross_decomposition import PLSSVD
|
|
>>> X = np.array([[0., 0., 1.],
|
|
... [1., 0., 0.],
|
|
... [2., 2., 2.],
|
|
... [2., 5., 4.]])
|
|
>>> y = np.array([[0.1, -0.2],
|
|
... [0.9, 1.1],
|
|
... [6.2, 5.9],
|
|
... [11.9, 12.3]])
|
|
>>> pls = PLSSVD(n_components=2).fit(X, y)
|
|
>>> X_c, y_c = pls.transform(X, y)
|
|
>>> X_c.shape, y_c.shape
|
|
((4, 2), (4, 2))
|
|
"""
|
|
|
|
_parameter_constraints: dict = {
|
|
"n_components": [Interval(Integral, 1, None, closed="left")],
|
|
"scale": ["boolean"],
|
|
"copy": ["boolean"],
|
|
}
|
|
|
|
def __init__(self, n_components=2, *, scale=True, copy=True):
|
|
self.n_components = n_components
|
|
self.scale = scale
|
|
self.copy = copy
|
|
|
|
@_fit_context(prefer_skip_nested_validation=True)
|
|
def fit(self, X, y=None, Y=None):
|
|
"""Fit model to data.
|
|
|
|
Parameters
|
|
----------
|
|
X : array-like of shape (n_samples, n_features)
|
|
Training samples.
|
|
|
|
y : array-like of shape (n_samples,) or (n_samples, n_targets)
|
|
Targets.
|
|
|
|
Y : array-like of shape (n_samples,) or (n_samples, n_targets)
|
|
Targets.
|
|
|
|
.. deprecated:: 1.5
|
|
`Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead.
|
|
|
|
Returns
|
|
-------
|
|
self : object
|
|
Fitted estimator.
|
|
"""
|
|
y = _deprecate_Y_when_required(y, Y)
|
|
check_consistent_length(X, y)
|
|
X = self._validate_data(
|
|
X, dtype=np.float64, copy=self.copy, ensure_min_samples=2
|
|
)
|
|
y = check_array(
|
|
y, input_name="y", dtype=np.float64, copy=self.copy, ensure_2d=False
|
|
)
|
|
if y.ndim == 1:
|
|
y = y.reshape(-1, 1)
|
|
|
|
# we'll compute the SVD of the cross-covariance matrix = X.T.dot(y)
|
|
# This matrix rank is at most min(n_samples, n_features, n_targets) so
|
|
# n_components cannot be bigger than that.
|
|
n_components = self.n_components
|
|
rank_upper_bound = min(X.shape[0], X.shape[1], y.shape[1])
|
|
if n_components > rank_upper_bound:
|
|
raise ValueError(
|
|
f"`n_components` upper bound is {rank_upper_bound}. "
|
|
f"Got {n_components} instead. Reduce `n_components`."
|
|
)
|
|
|
|
X, y, self._x_mean, self._y_mean, self._x_std, self._y_std = _center_scale_xy(
|
|
X, y, self.scale
|
|
)
|
|
|
|
# Compute SVD of cross-covariance matrix
|
|
C = np.dot(X.T, y)
|
|
U, s, Vt = svd(C, full_matrices=False)
|
|
U = U[:, :n_components]
|
|
Vt = Vt[:n_components]
|
|
U, Vt = svd_flip(U, Vt)
|
|
V = Vt.T
|
|
|
|
self.x_weights_ = U
|
|
self.y_weights_ = V
|
|
self._n_features_out = self.x_weights_.shape[1]
|
|
return self
|
|
|
|
def transform(self, X, y=None, Y=None):
|
|
"""
|
|
Apply the dimensionality reduction.
|
|
|
|
Parameters
|
|
----------
|
|
X : array-like of shape (n_samples, n_features)
|
|
Samples to be transformed.
|
|
|
|
y : array-like of shape (n_samples,) or (n_samples, n_targets), \
|
|
default=None
|
|
Targets.
|
|
|
|
Y : array-like of shape (n_samples,) or (n_samples, n_targets), \
|
|
default=None
|
|
Targets.
|
|
|
|
.. deprecated:: 1.5
|
|
`Y` is deprecated in 1.5 and will be removed in 1.7. Use `y` instead.
|
|
|
|
Returns
|
|
-------
|
|
x_scores : array-like or tuple of array-like
|
|
The transformed data `X_transformed` if `Y is not None`,
|
|
`(X_transformed, Y_transformed)` otherwise.
|
|
"""
|
|
y = _deprecate_Y_when_optional(y, Y)
|
|
check_is_fitted(self)
|
|
X = self._validate_data(X, dtype=np.float64, reset=False)
|
|
Xr = (X - self._x_mean) / self._x_std
|
|
x_scores = np.dot(Xr, self.x_weights_)
|
|
if y is not None:
|
|
y = check_array(y, input_name="y", ensure_2d=False, dtype=np.float64)
|
|
if y.ndim == 1:
|
|
y = y.reshape(-1, 1)
|
|
yr = (y - self._y_mean) / self._y_std
|
|
y_scores = np.dot(yr, self.y_weights_)
|
|
return x_scores, y_scores
|
|
return x_scores
|
|
|
|
def fit_transform(self, X, y=None):
|
|
"""Learn and apply the dimensionality reduction.
|
|
|
|
Parameters
|
|
----------
|
|
X : array-like of shape (n_samples, n_features)
|
|
Training samples.
|
|
|
|
y : array-like of shape (n_samples,) or (n_samples, n_targets), \
|
|
default=None
|
|
Targets.
|
|
|
|
Returns
|
|
-------
|
|
out : array-like or tuple of array-like
|
|
The transformed data `X_transformed` if `Y is not None`,
|
|
`(X_transformed, Y_transformed)` otherwise.
|
|
"""
|
|
return self.fit(X, y).transform(X, y)
|