projektAI/venv/Lib/site-packages/sklearn/kernel_ridge.py

211 lines
8.3 KiB
Python
Raw Normal View History

2021-06-06 22:13:05 +02:00
"""Module :mod:`sklearn.kernel_ridge` implements kernel ridge regression."""
# Authors: Mathieu Blondel <mathieu@mblondel.org>
# Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
# License: BSD 3 clause
import numpy as np
from .base import BaseEstimator, RegressorMixin, MultiOutputMixin
from .metrics.pairwise import pairwise_kernels
from .linear_model._ridge import _solve_cholesky_kernel
from .utils.validation import check_is_fitted, _check_sample_weight
from .utils.validation import _deprecate_positional_args
from .utils.deprecation import deprecated
class KernelRidge(MultiOutputMixin, RegressorMixin, BaseEstimator):
"""Kernel ridge regression.
Kernel ridge regression (KRR) combines ridge regression (linear least
squares with l2-norm regularization) with the kernel trick. It thus
learns a linear function in the space induced by the respective kernel and
the data. For non-linear kernels, this corresponds to a non-linear
function in the original space.
The form of the model learned by KRR is identical to support vector
regression (SVR). However, different loss functions are used: KRR uses
squared error loss while support vector regression uses epsilon-insensitive
loss, both combined with l2 regularization. In contrast to SVR, fitting a
KRR model can be done in closed-form and is typically faster for
medium-sized datasets. On the other hand, the learned model is non-sparse
and thus slower than SVR, which learns a sparse model for epsilon > 0, at
prediction-time.
This estimator has built-in support for multi-variate regression
(i.e., when y is a 2d-array of shape [n_samples, n_targets]).
Read more in the :ref:`User Guide <kernel_ridge>`.
Parameters
----------
alpha : float or array-like of shape (n_targets,), default=1.0
Regularization strength; must be a positive float. Regularization
improves the conditioning of the problem and reduces the variance of
the estimates. Larger values specify stronger regularization.
Alpha corresponds to ``1 / (2C)`` in other linear models such as
:class:`~sklearn.linear_model.LogisticRegression` or
:class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are
assumed to be specific to the targets. Hence they must correspond in
number. See :ref:`ridge_regression` for formula.
kernel : string or callable, default="linear"
Kernel mapping used internally. This parameter is directly passed to
:class:`~sklearn.metrics.pairwise.pairwise_kernel`.
If `kernel` is a string, it must be one of the metrics
in `pairwise.PAIRWISE_KERNEL_FUNCTIONS`.
If `kernel` is "precomputed", X is assumed to be a kernel matrix.
Alternatively, if `kernel` is a callable function, it is called on
each pair of instances (rows) and the resulting value recorded. The
callable should take two rows from X as input and return the
corresponding kernel value as a single number. This means that
callables from :mod:`sklearn.metrics.pairwise` are not allowed, as
they operate on matrices, not single samples. Use the string
identifying the kernel instead.
gamma : float, default=None
Gamma parameter for the RBF, laplacian, polynomial, exponential chi2
and sigmoid kernels. Interpretation of the default value is left to
the kernel; see the documentation for sklearn.metrics.pairwise.
Ignored by other kernels.
degree : float, default=3
Degree of the polynomial kernel. Ignored by other kernels.
coef0 : float, default=1
Zero coefficient for polynomial and sigmoid kernels.
Ignored by other kernels.
kernel_params : mapping of string to any, default=None
Additional parameters (keyword arguments) for kernel function passed
as callable object.
Attributes
----------
dual_coef_ : ndarray of shape (n_samples,) or (n_samples, n_targets)
Representation of weight vector(s) in kernel space
X_fit_ : {ndarray, sparse matrix} of shape (n_samples, n_features)
Training data, which is also required for prediction. If
kernel == "precomputed" this is instead the precomputed
training matrix, of shape (n_samples, n_samples).
References
----------
* Kevin P. Murphy
"Machine Learning: A Probabilistic Perspective", The MIT Press
chapter 14.4.3, pp. 492-493
See Also
--------
sklearn.linear_model.Ridge : Linear ridge regression.
sklearn.svm.SVR : Support Vector Regression implemented using libsvm.
Examples
--------
>>> from sklearn.kernel_ridge import KernelRidge
>>> import numpy as np
>>> n_samples, n_features = 10, 5
>>> rng = np.random.RandomState(0)
>>> y = rng.randn(n_samples)
>>> X = rng.randn(n_samples, n_features)
>>> clf = KernelRidge(alpha=1.0)
>>> clf.fit(X, y)
KernelRidge(alpha=1.0)
"""
@_deprecate_positional_args
def __init__(self, alpha=1, *, kernel="linear", gamma=None, degree=3,
coef0=1, kernel_params=None):
self.alpha = alpha
self.kernel = kernel
self.gamma = gamma
self.degree = degree
self.coef0 = coef0
self.kernel_params = kernel_params
def _get_kernel(self, X, Y=None):
if callable(self.kernel):
params = self.kernel_params or {}
else:
params = {"gamma": self.gamma,
"degree": self.degree,
"coef0": self.coef0}
return pairwise_kernels(X, Y, metric=self.kernel,
filter_params=True, **params)
def _more_tags(self):
return {'pairwise': self.kernel == 'precomputed'}
# TODO: Remove in 1.1
# mypy error: Decorated property not supported
@deprecated("Attribute _pairwise was deprecated in " # type: ignore
"version 0.24 and will be removed in 1.1 (renaming of 0.26).")
@property
def _pairwise(self):
return self.kernel == "precomputed"
def fit(self, X, y, sample_weight=None):
"""Fit Kernel Ridge regression model
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
Training data. If kernel == "precomputed" this is instead
a precomputed kernel matrix, of shape (n_samples, n_samples).
y : array-like of shape (n_samples,) or (n_samples, n_targets)
Target values
sample_weight : float or array-like of shape (n_samples,), default=None
Individual weights for each sample, ignored if None is passed.
Returns
-------
self : returns an instance of self.
"""
# Convert data
X, y = self._validate_data(X, y, accept_sparse=("csr", "csc"),
multi_output=True, y_numeric=True)
if sample_weight is not None and not isinstance(sample_weight, float):
sample_weight = _check_sample_weight(sample_weight, X)
K = self._get_kernel(X)
alpha = np.atleast_1d(self.alpha)
ravel = False
if len(y.shape) == 1:
y = y.reshape(-1, 1)
ravel = True
copy = self.kernel == "precomputed"
self.dual_coef_ = _solve_cholesky_kernel(K, y, alpha,
sample_weight,
copy)
if ravel:
self.dual_coef_ = self.dual_coef_.ravel()
self.X_fit_ = X
return self
def predict(self, X):
"""Predict using the kernel ridge model
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
Samples. If kernel == "precomputed" this is instead a
precomputed kernel matrix, shape = [n_samples,
n_samples_fitted], where n_samples_fitted is the number of
samples used in the fitting for this estimator.
Returns
-------
C : ndarray of shape (n_samples,) or (n_samples, n_targets)
Returns predicted values.
"""
check_is_fitted(self)
X = self._validate_data(X, accept_sparse=("csr", "csc"), reset=False)
K = self._get_kernel(X, self.X_fit_)
return np.dot(K, self.dual_coef_)