1807 lines
62 KiB
Python
1807 lines
62 KiB
Python
from numbers import Integral, Real
|
|
|
|
import numpy as np
|
|
|
|
from ._base import _fit_liblinear, BaseSVC, BaseLibSVM
|
|
from ..base import BaseEstimator, RegressorMixin, OutlierMixin
|
|
from ..linear_model._base import LinearClassifierMixin, SparseCoefMixin, LinearModel
|
|
from ..utils import deprecated
|
|
from ..utils.validation import _num_samples
|
|
from ..utils.multiclass import check_classification_targets
|
|
from ..utils._param_validation import Interval, StrOptions
|
|
|
|
|
|
class LinearSVC(LinearClassifierMixin, SparseCoefMixin, BaseEstimator):
|
|
"""Linear Support Vector Classification.
|
|
|
|
Similar to SVC with parameter kernel='linear', but implemented in terms of
|
|
liblinear rather than libsvm, so it has more flexibility in the choice of
|
|
penalties and loss functions and should scale better to large numbers of
|
|
samples.
|
|
|
|
This class supports both dense and sparse input and the multiclass support
|
|
is handled according to a one-vs-the-rest scheme.
|
|
|
|
Read more in the :ref:`User Guide <svm_classification>`.
|
|
|
|
Parameters
|
|
----------
|
|
penalty : {'l1', 'l2'}, default='l2'
|
|
Specifies the norm used in the penalization. The 'l2'
|
|
penalty is the standard used in SVC. The 'l1' leads to ``coef_``
|
|
vectors that are sparse.
|
|
|
|
loss : {'hinge', 'squared_hinge'}, default='squared_hinge'
|
|
Specifies the loss function. 'hinge' is the standard SVM loss
|
|
(used e.g. by the SVC class) while 'squared_hinge' is the
|
|
square of the hinge loss. The combination of ``penalty='l1'``
|
|
and ``loss='hinge'`` is not supported.
|
|
|
|
dual : bool, default=True
|
|
Select the algorithm to either solve the dual or primal
|
|
optimization problem. Prefer dual=False when n_samples > n_features.
|
|
|
|
tol : float, default=1e-4
|
|
Tolerance for stopping criteria.
|
|
|
|
C : float, default=1.0
|
|
Regularization parameter. The strength of the regularization is
|
|
inversely proportional to C. Must be strictly positive.
|
|
|
|
multi_class : {'ovr', 'crammer_singer'}, default='ovr'
|
|
Determines the multi-class strategy if `y` contains more than
|
|
two classes.
|
|
``"ovr"`` trains n_classes one-vs-rest classifiers, while
|
|
``"crammer_singer"`` optimizes a joint objective over all classes.
|
|
While `crammer_singer` is interesting from a theoretical perspective
|
|
as it is consistent, it is seldom used in practice as it rarely leads
|
|
to better accuracy and is more expensive to compute.
|
|
If ``"crammer_singer"`` is chosen, the options loss, penalty and dual
|
|
will be ignored.
|
|
|
|
fit_intercept : bool, default=True
|
|
Whether to calculate the intercept for this model. If set
|
|
to false, no intercept will be used in calculations
|
|
(i.e. data is expected to be already centered).
|
|
|
|
intercept_scaling : float, default=1.0
|
|
When self.fit_intercept is True, instance vector x becomes
|
|
``[x, self.intercept_scaling]``,
|
|
i.e. a "synthetic" feature with constant value equals to
|
|
intercept_scaling is appended to the instance vector.
|
|
The intercept becomes intercept_scaling * synthetic feature weight
|
|
Note! the synthetic feature weight is subject to l1/l2 regularization
|
|
as all other features.
|
|
To lessen the effect of regularization on synthetic feature weight
|
|
(and therefore on the intercept) intercept_scaling has to be increased.
|
|
|
|
class_weight : dict or 'balanced', default=None
|
|
Set the parameter C of class i to ``class_weight[i]*C`` for
|
|
SVC. If not given, all classes are supposed to have
|
|
weight one.
|
|
The "balanced" mode uses the values of y to automatically adjust
|
|
weights inversely proportional to class frequencies in the input data
|
|
as ``n_samples / (n_classes * np.bincount(y))``.
|
|
|
|
verbose : int, default=0
|
|
Enable verbose output. Note that this setting takes advantage of a
|
|
per-process runtime setting in liblinear that, if enabled, may not work
|
|
properly in a multithreaded context.
|
|
|
|
random_state : int, RandomState instance or None, default=None
|
|
Controls the pseudo random number generation for shuffling the data for
|
|
the dual coordinate descent (if ``dual=True``). When ``dual=False`` the
|
|
underlying implementation of :class:`LinearSVC` is not random and
|
|
``random_state`` has no effect on the results.
|
|
Pass an int for reproducible output across multiple function calls.
|
|
See :term:`Glossary <random_state>`.
|
|
|
|
max_iter : int, default=1000
|
|
The maximum number of iterations to be run.
|
|
|
|
Attributes
|
|
----------
|
|
coef_ : ndarray of shape (1, n_features) if n_classes == 2 \
|
|
else (n_classes, n_features)
|
|
Weights assigned to the features (coefficients in the primal
|
|
problem).
|
|
|
|
``coef_`` is a readonly property derived from ``raw_coef_`` that
|
|
follows the internal memory layout of liblinear.
|
|
|
|
intercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,)
|
|
Constants in decision function.
|
|
|
|
classes_ : ndarray of shape (n_classes,)
|
|
The unique classes labels.
|
|
|
|
n_features_in_ : int
|
|
Number of features seen during :term:`fit`.
|
|
|
|
.. versionadded:: 0.24
|
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
Names of features seen during :term:`fit`. Defined only when `X`
|
|
has feature names that are all strings.
|
|
|
|
.. versionadded:: 1.0
|
|
|
|
n_iter_ : int
|
|
Maximum number of iterations run across all classes.
|
|
|
|
See Also
|
|
--------
|
|
SVC : Implementation of Support Vector Machine classifier using libsvm:
|
|
the kernel can be non-linear but its SMO algorithm does not
|
|
scale to large number of samples as LinearSVC does.
|
|
|
|
Furthermore SVC multi-class mode is implemented using one
|
|
vs one scheme while LinearSVC uses one vs the rest. It is
|
|
possible to implement one vs the rest with SVC by using the
|
|
:class:`~sklearn.multiclass.OneVsRestClassifier` wrapper.
|
|
|
|
Finally SVC can fit dense data without memory copy if the input
|
|
is C-contiguous. Sparse data will still incur memory copy though.
|
|
|
|
sklearn.linear_model.SGDClassifier : SGDClassifier can optimize the same
|
|
cost function as LinearSVC
|
|
by adjusting the penalty and loss parameters. In addition it requires
|
|
less memory, allows incremental (online) learning, and implements
|
|
various loss functions and regularization regimes.
|
|
|
|
Notes
|
|
-----
|
|
The underlying C implementation uses a random number generator to
|
|
select features when fitting the model. It is thus not uncommon
|
|
to have slightly different results for the same input data. If
|
|
that happens, try with a smaller ``tol`` parameter.
|
|
|
|
The underlying implementation, liblinear, uses a sparse internal
|
|
representation for the data that will incur a memory copy.
|
|
|
|
Predict output may not match that of standalone liblinear in certain
|
|
cases. See :ref:`differences from liblinear <liblinear_differences>`
|
|
in the narrative documentation.
|
|
|
|
References
|
|
----------
|
|
`LIBLINEAR: A Library for Large Linear Classification
|
|
<https://www.csie.ntu.edu.tw/~cjlin/liblinear/>`__
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.svm import LinearSVC
|
|
>>> from sklearn.pipeline import make_pipeline
|
|
>>> from sklearn.preprocessing import StandardScaler
|
|
>>> from sklearn.datasets import make_classification
|
|
>>> X, y = make_classification(n_features=4, random_state=0)
|
|
>>> clf = make_pipeline(StandardScaler(),
|
|
... LinearSVC(random_state=0, tol=1e-5))
|
|
>>> clf.fit(X, y)
|
|
Pipeline(steps=[('standardscaler', StandardScaler()),
|
|
('linearsvc', LinearSVC(random_state=0, tol=1e-05))])
|
|
|
|
>>> print(clf.named_steps['linearsvc'].coef_)
|
|
[[0.141... 0.526... 0.679... 0.493...]]
|
|
|
|
>>> print(clf.named_steps['linearsvc'].intercept_)
|
|
[0.1693...]
|
|
>>> print(clf.predict([[0, 0, 0, 0]]))
|
|
[1]
|
|
"""
|
|
|
|
_parameter_constraints: dict = {
|
|
"penalty": [StrOptions({"l1", "l2"})],
|
|
"loss": [StrOptions({"hinge", "squared_hinge"})],
|
|
"dual": ["boolean"],
|
|
"tol": [Interval(Real, 0.0, None, closed="neither")],
|
|
"C": [Interval(Real, 0.0, None, closed="neither")],
|
|
"multi_class": [StrOptions({"ovr", "crammer_singer"})],
|
|
"fit_intercept": ["boolean"],
|
|
"intercept_scaling": [Interval(Real, 0, None, closed="neither")],
|
|
"class_weight": [None, dict, StrOptions({"balanced"})],
|
|
"verbose": ["verbose"],
|
|
"random_state": ["random_state"],
|
|
"max_iter": [Interval(Integral, 0, None, closed="left")],
|
|
}
|
|
|
|
def __init__(
|
|
self,
|
|
penalty="l2",
|
|
loss="squared_hinge",
|
|
*,
|
|
dual=True,
|
|
tol=1e-4,
|
|
C=1.0,
|
|
multi_class="ovr",
|
|
fit_intercept=True,
|
|
intercept_scaling=1,
|
|
class_weight=None,
|
|
verbose=0,
|
|
random_state=None,
|
|
max_iter=1000,
|
|
):
|
|
self.dual = dual
|
|
self.tol = tol
|
|
self.C = C
|
|
self.multi_class = multi_class
|
|
self.fit_intercept = fit_intercept
|
|
self.intercept_scaling = intercept_scaling
|
|
self.class_weight = class_weight
|
|
self.verbose = verbose
|
|
self.random_state = random_state
|
|
self.max_iter = max_iter
|
|
self.penalty = penalty
|
|
self.loss = loss
|
|
|
|
def fit(self, X, y, sample_weight=None):
|
|
"""Fit the model according to the given training data.
|
|
|
|
Parameters
|
|
----------
|
|
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
Training vector, where `n_samples` is the number of samples and
|
|
`n_features` is the number of features.
|
|
|
|
y : array-like of shape (n_samples,)
|
|
Target vector relative to X.
|
|
|
|
sample_weight : array-like of shape (n_samples,), default=None
|
|
Array of weights that are assigned to individual
|
|
samples. If not provided,
|
|
then each sample is given unit weight.
|
|
|
|
.. versionadded:: 0.18
|
|
|
|
Returns
|
|
-------
|
|
self : object
|
|
An instance of the estimator.
|
|
"""
|
|
self._validate_params()
|
|
|
|
X, y = self._validate_data(
|
|
X,
|
|
y,
|
|
accept_sparse="csr",
|
|
dtype=np.float64,
|
|
order="C",
|
|
accept_large_sparse=False,
|
|
)
|
|
check_classification_targets(y)
|
|
self.classes_ = np.unique(y)
|
|
|
|
self.coef_, self.intercept_, n_iter_ = _fit_liblinear(
|
|
X,
|
|
y,
|
|
self.C,
|
|
self.fit_intercept,
|
|
self.intercept_scaling,
|
|
self.class_weight,
|
|
self.penalty,
|
|
self.dual,
|
|
self.verbose,
|
|
self.max_iter,
|
|
self.tol,
|
|
self.random_state,
|
|
self.multi_class,
|
|
self.loss,
|
|
sample_weight=sample_weight,
|
|
)
|
|
# Backward compatibility: _fit_liblinear is used both by LinearSVC/R
|
|
# and LogisticRegression but LogisticRegression sets a structured
|
|
# `n_iter_` attribute with information about the underlying OvR fits
|
|
# while LinearSVC/R only reports the maximum value.
|
|
self.n_iter_ = n_iter_.max().item()
|
|
|
|
if self.multi_class == "crammer_singer" and len(self.classes_) == 2:
|
|
self.coef_ = (self.coef_[1] - self.coef_[0]).reshape(1, -1)
|
|
if self.fit_intercept:
|
|
intercept = self.intercept_[1] - self.intercept_[0]
|
|
self.intercept_ = np.array([intercept])
|
|
|
|
return self
|
|
|
|
def _more_tags(self):
|
|
return {
|
|
"_xfail_checks": {
|
|
"check_sample_weights_invariance": (
|
|
"zero sample_weight is not equivalent to removing samples"
|
|
),
|
|
}
|
|
}
|
|
|
|
|
|
class LinearSVR(RegressorMixin, LinearModel):
|
|
"""Linear Support Vector Regression.
|
|
|
|
Similar to SVR with parameter kernel='linear', but implemented in terms of
|
|
liblinear rather than libsvm, so it has more flexibility in the choice of
|
|
penalties and loss functions and should scale better to large numbers of
|
|
samples.
|
|
|
|
This class supports both dense and sparse input.
|
|
|
|
Read more in the :ref:`User Guide <svm_regression>`.
|
|
|
|
.. versionadded:: 0.16
|
|
|
|
Parameters
|
|
----------
|
|
epsilon : float, default=0.0
|
|
Epsilon parameter in the epsilon-insensitive loss function. Note
|
|
that the value of this parameter depends on the scale of the target
|
|
variable y. If unsure, set ``epsilon=0``.
|
|
|
|
tol : float, default=1e-4
|
|
Tolerance for stopping criteria.
|
|
|
|
C : float, default=1.0
|
|
Regularization parameter. The strength of the regularization is
|
|
inversely proportional to C. Must be strictly positive.
|
|
|
|
loss : {'epsilon_insensitive', 'squared_epsilon_insensitive'}, \
|
|
default='epsilon_insensitive'
|
|
Specifies the loss function. The epsilon-insensitive loss
|
|
(standard SVR) is the L1 loss, while the squared epsilon-insensitive
|
|
loss ('squared_epsilon_insensitive') is the L2 loss.
|
|
|
|
fit_intercept : bool, default=True
|
|
Whether to calculate the intercept for this model. If set
|
|
to false, no intercept will be used in calculations
|
|
(i.e. data is expected to be already centered).
|
|
|
|
intercept_scaling : float, default=1.0
|
|
When self.fit_intercept is True, instance vector x becomes
|
|
[x, self.intercept_scaling],
|
|
i.e. a "synthetic" feature with constant value equals to
|
|
intercept_scaling is appended to the instance vector.
|
|
The intercept becomes intercept_scaling * synthetic feature weight
|
|
Note! the synthetic feature weight is subject to l1/l2 regularization
|
|
as all other features.
|
|
To lessen the effect of regularization on synthetic feature weight
|
|
(and therefore on the intercept) intercept_scaling has to be increased.
|
|
|
|
dual : bool, default=True
|
|
Select the algorithm to either solve the dual or primal
|
|
optimization problem. Prefer dual=False when n_samples > n_features.
|
|
|
|
verbose : int, default=0
|
|
Enable verbose output. Note that this setting takes advantage of a
|
|
per-process runtime setting in liblinear that, if enabled, may not work
|
|
properly in a multithreaded context.
|
|
|
|
random_state : int, RandomState instance or None, default=None
|
|
Controls the pseudo random number generation for shuffling the data.
|
|
Pass an int for reproducible output across multiple function calls.
|
|
See :term:`Glossary <random_state>`.
|
|
|
|
max_iter : int, default=1000
|
|
The maximum number of iterations to be run.
|
|
|
|
Attributes
|
|
----------
|
|
coef_ : ndarray of shape (n_features) if n_classes == 2 \
|
|
else (n_classes, n_features)
|
|
Weights assigned to the features (coefficients in the primal
|
|
problem).
|
|
|
|
`coef_` is a readonly property derived from `raw_coef_` that
|
|
follows the internal memory layout of liblinear.
|
|
|
|
intercept_ : ndarray of shape (1) if n_classes == 2 else (n_classes)
|
|
Constants in decision function.
|
|
|
|
n_features_in_ : int
|
|
Number of features seen during :term:`fit`.
|
|
|
|
.. versionadded:: 0.24
|
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
Names of features seen during :term:`fit`. Defined only when `X`
|
|
has feature names that are all strings.
|
|
|
|
.. versionadded:: 1.0
|
|
|
|
n_iter_ : int
|
|
Maximum number of iterations run across all classes.
|
|
|
|
See Also
|
|
--------
|
|
LinearSVC : Implementation of Support Vector Machine classifier using the
|
|
same library as this class (liblinear).
|
|
|
|
SVR : Implementation of Support Vector Machine regression using libsvm:
|
|
the kernel can be non-linear but its SMO algorithm does not
|
|
scale to large number of samples as LinearSVC does.
|
|
|
|
sklearn.linear_model.SGDRegressor : SGDRegressor can optimize the same cost
|
|
function as LinearSVR
|
|
by adjusting the penalty and loss parameters. In addition it requires
|
|
less memory, allows incremental (online) learning, and implements
|
|
various loss functions and regularization regimes.
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.svm import LinearSVR
|
|
>>> from sklearn.pipeline import make_pipeline
|
|
>>> from sklearn.preprocessing import StandardScaler
|
|
>>> from sklearn.datasets import make_regression
|
|
>>> X, y = make_regression(n_features=4, random_state=0)
|
|
>>> regr = make_pipeline(StandardScaler(),
|
|
... LinearSVR(random_state=0, tol=1e-5))
|
|
>>> regr.fit(X, y)
|
|
Pipeline(steps=[('standardscaler', StandardScaler()),
|
|
('linearsvr', LinearSVR(random_state=0, tol=1e-05))])
|
|
|
|
>>> print(regr.named_steps['linearsvr'].coef_)
|
|
[18.582... 27.023... 44.357... 64.522...]
|
|
>>> print(regr.named_steps['linearsvr'].intercept_)
|
|
[-4...]
|
|
>>> print(regr.predict([[0, 0, 0, 0]]))
|
|
[-2.384...]
|
|
"""
|
|
|
|
_parameter_constraints: dict = {
|
|
"epsilon": [Real],
|
|
"tol": [Interval(Real, 0.0, None, closed="neither")],
|
|
"C": [Interval(Real, 0.0, None, closed="neither")],
|
|
"loss": [StrOptions({"epsilon_insensitive", "squared_epsilon_insensitive"})],
|
|
"fit_intercept": ["boolean"],
|
|
"intercept_scaling": [Interval(Real, 0, None, closed="neither")],
|
|
"dual": ["boolean"],
|
|
"verbose": ["verbose"],
|
|
"random_state": ["random_state"],
|
|
"max_iter": [Interval(Integral, 0, None, closed="left")],
|
|
}
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
epsilon=0.0,
|
|
tol=1e-4,
|
|
C=1.0,
|
|
loss="epsilon_insensitive",
|
|
fit_intercept=True,
|
|
intercept_scaling=1.0,
|
|
dual=True,
|
|
verbose=0,
|
|
random_state=None,
|
|
max_iter=1000,
|
|
):
|
|
self.tol = tol
|
|
self.C = C
|
|
self.epsilon = epsilon
|
|
self.fit_intercept = fit_intercept
|
|
self.intercept_scaling = intercept_scaling
|
|
self.verbose = verbose
|
|
self.random_state = random_state
|
|
self.max_iter = max_iter
|
|
self.dual = dual
|
|
self.loss = loss
|
|
|
|
def fit(self, X, y, sample_weight=None):
|
|
"""Fit the model according to the given training data.
|
|
|
|
Parameters
|
|
----------
|
|
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
Training vector, where `n_samples` is the number of samples and
|
|
`n_features` is the number of features.
|
|
|
|
y : array-like of shape (n_samples,)
|
|
Target vector relative to X.
|
|
|
|
sample_weight : array-like of shape (n_samples,), default=None
|
|
Array of weights that are assigned to individual
|
|
samples. If not provided,
|
|
then each sample is given unit weight.
|
|
|
|
.. versionadded:: 0.18
|
|
|
|
Returns
|
|
-------
|
|
self : object
|
|
An instance of the estimator.
|
|
"""
|
|
self._validate_params()
|
|
|
|
X, y = self._validate_data(
|
|
X,
|
|
y,
|
|
accept_sparse="csr",
|
|
dtype=np.float64,
|
|
order="C",
|
|
accept_large_sparse=False,
|
|
)
|
|
penalty = "l2" # SVR only accepts l2 penalty
|
|
self.coef_, self.intercept_, n_iter_ = _fit_liblinear(
|
|
X,
|
|
y,
|
|
self.C,
|
|
self.fit_intercept,
|
|
self.intercept_scaling,
|
|
None,
|
|
penalty,
|
|
self.dual,
|
|
self.verbose,
|
|
self.max_iter,
|
|
self.tol,
|
|
self.random_state,
|
|
loss=self.loss,
|
|
epsilon=self.epsilon,
|
|
sample_weight=sample_weight,
|
|
)
|
|
self.coef_ = self.coef_.ravel()
|
|
# Backward compatibility: _fit_liblinear is used both by LinearSVC/R
|
|
# and LogisticRegression but LogisticRegression sets a structured
|
|
# `n_iter_` attribute with information about the underlying OvR fits
|
|
# while LinearSVC/R only reports the maximum value.
|
|
self.n_iter_ = n_iter_.max().item()
|
|
|
|
return self
|
|
|
|
def _more_tags(self):
|
|
return {
|
|
"_xfail_checks": {
|
|
"check_sample_weights_invariance": (
|
|
"zero sample_weight is not equivalent to removing samples"
|
|
),
|
|
}
|
|
}
|
|
|
|
|
|
class SVC(BaseSVC):
|
|
"""C-Support Vector Classification.
|
|
|
|
The implementation is based on libsvm. The fit time scales at least
|
|
quadratically with the number of samples and may be impractical
|
|
beyond tens of thousands of samples. For large datasets
|
|
consider using :class:`~sklearn.svm.LinearSVC` or
|
|
:class:`~sklearn.linear_model.SGDClassifier` instead, possibly after a
|
|
:class:`~sklearn.kernel_approximation.Nystroem` transformer or
|
|
other :ref:`kernel_approximation`.
|
|
|
|
The multiclass support is handled according to a one-vs-one scheme.
|
|
|
|
For details on the precise mathematical formulation of the provided
|
|
kernel functions and how `gamma`, `coef0` and `degree` affect each
|
|
other, see the corresponding section in the narrative documentation:
|
|
:ref:`svm_kernels`.
|
|
|
|
Read more in the :ref:`User Guide <svm_classification>`.
|
|
|
|
Parameters
|
|
----------
|
|
C : float, default=1.0
|
|
Regularization parameter. The strength of the regularization is
|
|
inversely proportional to C. Must be strictly positive. The penalty
|
|
is a squared l2 penalty.
|
|
|
|
kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, \
|
|
default='rbf'
|
|
Specifies the kernel type to be used in the algorithm.
|
|
If none is given, 'rbf' will be used. If a callable is given it is
|
|
used to pre-compute the kernel matrix from data matrices; that matrix
|
|
should be an array of shape ``(n_samples, n_samples)``.
|
|
|
|
degree : int, default=3
|
|
Degree of the polynomial kernel function ('poly').
|
|
Must be non-negative. Ignored by all other kernels.
|
|
|
|
gamma : {'scale', 'auto'} or float, default='scale'
|
|
Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
|
|
|
|
- if ``gamma='scale'`` (default) is passed then it uses
|
|
1 / (n_features * X.var()) as value of gamma,
|
|
- if 'auto', uses 1 / n_features
|
|
- if float, must be non-negative.
|
|
|
|
.. versionchanged:: 0.22
|
|
The default value of ``gamma`` changed from 'auto' to 'scale'.
|
|
|
|
coef0 : float, default=0.0
|
|
Independent term in kernel function.
|
|
It is only significant in 'poly' and 'sigmoid'.
|
|
|
|
shrinking : bool, default=True
|
|
Whether to use the shrinking heuristic.
|
|
See the :ref:`User Guide <shrinking_svm>`.
|
|
|
|
probability : bool, default=False
|
|
Whether to enable probability estimates. This must be enabled prior
|
|
to calling `fit`, will slow down that method as it internally uses
|
|
5-fold cross-validation, and `predict_proba` may be inconsistent with
|
|
`predict`. Read more in the :ref:`User Guide <scores_probabilities>`.
|
|
|
|
tol : float, default=1e-3
|
|
Tolerance for stopping criterion.
|
|
|
|
cache_size : float, default=200
|
|
Specify the size of the kernel cache (in MB).
|
|
|
|
class_weight : dict or 'balanced', default=None
|
|
Set the parameter C of class i to class_weight[i]*C for
|
|
SVC. If not given, all classes are supposed to have
|
|
weight one.
|
|
The "balanced" mode uses the values of y to automatically adjust
|
|
weights inversely proportional to class frequencies in the input data
|
|
as ``n_samples / (n_classes * np.bincount(y))``.
|
|
|
|
verbose : bool, default=False
|
|
Enable verbose output. Note that this setting takes advantage of a
|
|
per-process runtime setting in libsvm that, if enabled, may not work
|
|
properly in a multithreaded context.
|
|
|
|
max_iter : int, default=-1
|
|
Hard limit on iterations within solver, or -1 for no limit.
|
|
|
|
decision_function_shape : {'ovo', 'ovr'}, default='ovr'
|
|
Whether to return a one-vs-rest ('ovr') decision function of shape
|
|
(n_samples, n_classes) as all other classifiers, or the original
|
|
one-vs-one ('ovo') decision function of libsvm which has shape
|
|
(n_samples, n_classes * (n_classes - 1) / 2). However, note that
|
|
internally, one-vs-one ('ovo') is always used as a multi-class strategy
|
|
to train models; an ovr matrix is only constructed from the ovo matrix.
|
|
The parameter is ignored for binary classification.
|
|
|
|
.. versionchanged:: 0.19
|
|
decision_function_shape is 'ovr' by default.
|
|
|
|
.. versionadded:: 0.17
|
|
*decision_function_shape='ovr'* is recommended.
|
|
|
|
.. versionchanged:: 0.17
|
|
Deprecated *decision_function_shape='ovo' and None*.
|
|
|
|
break_ties : bool, default=False
|
|
If true, ``decision_function_shape='ovr'``, and number of classes > 2,
|
|
:term:`predict` will break ties according to the confidence values of
|
|
:term:`decision_function`; otherwise the first class among the tied
|
|
classes is returned. Please note that breaking ties comes at a
|
|
relatively high computational cost compared to a simple predict.
|
|
|
|
.. versionadded:: 0.22
|
|
|
|
random_state : int, RandomState instance or None, default=None
|
|
Controls the pseudo random number generation for shuffling the data for
|
|
probability estimates. Ignored when `probability` is False.
|
|
Pass an int for reproducible output across multiple function calls.
|
|
See :term:`Glossary <random_state>`.
|
|
|
|
Attributes
|
|
----------
|
|
class_weight_ : ndarray of shape (n_classes,)
|
|
Multipliers of parameter C for each class.
|
|
Computed based on the ``class_weight`` parameter.
|
|
|
|
classes_ : ndarray of shape (n_classes,)
|
|
The classes labels.
|
|
|
|
coef_ : ndarray of shape (n_classes * (n_classes - 1) / 2, n_features)
|
|
Weights assigned to the features (coefficients in the primal
|
|
problem). This is only available in the case of a linear kernel.
|
|
|
|
`coef_` is a readonly property derived from `dual_coef_` and
|
|
`support_vectors_`.
|
|
|
|
dual_coef_ : ndarray of shape (n_classes -1, n_SV)
|
|
Dual coefficients of the support vector in the decision
|
|
function (see :ref:`sgd_mathematical_formulation`), multiplied by
|
|
their targets.
|
|
For multiclass, coefficient for all 1-vs-1 classifiers.
|
|
The layout of the coefficients in the multiclass case is somewhat
|
|
non-trivial. See the :ref:`multi-class section of the User Guide
|
|
<svm_multi_class>` for details.
|
|
|
|
fit_status_ : int
|
|
0 if correctly fitted, 1 otherwise (will raise warning)
|
|
|
|
intercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)
|
|
Constants in decision function.
|
|
|
|
n_features_in_ : int
|
|
Number of features seen during :term:`fit`.
|
|
|
|
.. versionadded:: 0.24
|
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
Names of features seen during :term:`fit`. Defined only when `X`
|
|
has feature names that are all strings.
|
|
|
|
.. versionadded:: 1.0
|
|
|
|
n_iter_ : ndarray of shape (n_classes * (n_classes - 1) // 2,)
|
|
Number of iterations run by the optimization routine to fit the model.
|
|
The shape of this attribute depends on the number of models optimized
|
|
which in turn depends on the number of classes.
|
|
|
|
.. versionadded:: 1.1
|
|
|
|
support_ : ndarray of shape (n_SV)
|
|
Indices of support vectors.
|
|
|
|
support_vectors_ : ndarray of shape (n_SV, n_features)
|
|
Support vectors.
|
|
|
|
n_support_ : ndarray of shape (n_classes,), dtype=int32
|
|
Number of support vectors for each class.
|
|
|
|
probA_ : ndarray of shape (n_classes * (n_classes - 1) / 2)
|
|
probB_ : ndarray of shape (n_classes * (n_classes - 1) / 2)
|
|
If `probability=True`, it corresponds to the parameters learned in
|
|
Platt scaling to produce probability estimates from decision values.
|
|
If `probability=False`, it's an empty array. Platt scaling uses the
|
|
logistic function
|
|
``1 / (1 + exp(decision_value * probA_ + probB_))``
|
|
where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For
|
|
more information on the multiclass case and training procedure see
|
|
section 8 of [1]_.
|
|
|
|
shape_fit_ : tuple of int of shape (n_dimensions_of_X,)
|
|
Array dimensions of training vector ``X``.
|
|
|
|
See Also
|
|
--------
|
|
SVR : Support Vector Machine for Regression implemented using libsvm.
|
|
|
|
LinearSVC : Scalable Linear Support Vector Machine for classification
|
|
implemented using liblinear. Check the See Also section of
|
|
LinearSVC for more comparison element.
|
|
|
|
References
|
|
----------
|
|
.. [1] `LIBSVM: A Library for Support Vector Machines
|
|
<http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_
|
|
|
|
.. [2] `Platt, John (1999). "Probabilistic Outputs for Support Vector
|
|
Machines and Comparisons to Regularized Likelihood Methods"
|
|
<https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> from sklearn.pipeline import make_pipeline
|
|
>>> from sklearn.preprocessing import StandardScaler
|
|
>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
|
|
>>> y = np.array([1, 1, 2, 2])
|
|
>>> from sklearn.svm import SVC
|
|
>>> clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
|
|
>>> clf.fit(X, y)
|
|
Pipeline(steps=[('standardscaler', StandardScaler()),
|
|
('svc', SVC(gamma='auto'))])
|
|
|
|
>>> print(clf.predict([[-0.8, -1]]))
|
|
[1]
|
|
"""
|
|
|
|
_impl = "c_svc"
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
C=1.0,
|
|
kernel="rbf",
|
|
degree=3,
|
|
gamma="scale",
|
|
coef0=0.0,
|
|
shrinking=True,
|
|
probability=False,
|
|
tol=1e-3,
|
|
cache_size=200,
|
|
class_weight=None,
|
|
verbose=False,
|
|
max_iter=-1,
|
|
decision_function_shape="ovr",
|
|
break_ties=False,
|
|
random_state=None,
|
|
):
|
|
|
|
super().__init__(
|
|
kernel=kernel,
|
|
degree=degree,
|
|
gamma=gamma,
|
|
coef0=coef0,
|
|
tol=tol,
|
|
C=C,
|
|
nu=0.0,
|
|
shrinking=shrinking,
|
|
probability=probability,
|
|
cache_size=cache_size,
|
|
class_weight=class_weight,
|
|
verbose=verbose,
|
|
max_iter=max_iter,
|
|
decision_function_shape=decision_function_shape,
|
|
break_ties=break_ties,
|
|
random_state=random_state,
|
|
)
|
|
|
|
def _more_tags(self):
|
|
return {
|
|
"_xfail_checks": {
|
|
"check_sample_weights_invariance": (
|
|
"zero sample_weight is not equivalent to removing samples"
|
|
),
|
|
}
|
|
}
|
|
|
|
|
|
class NuSVC(BaseSVC):
|
|
"""Nu-Support Vector Classification.
|
|
|
|
Similar to SVC but uses a parameter to control the number of support
|
|
vectors.
|
|
|
|
The implementation is based on libsvm.
|
|
|
|
Read more in the :ref:`User Guide <svm_classification>`.
|
|
|
|
Parameters
|
|
----------
|
|
nu : float, default=0.5
|
|
An upper bound on the fraction of margin errors (see :ref:`User Guide
|
|
<nu_svc>`) and a lower bound of the fraction of support vectors.
|
|
Should be in the interval (0, 1].
|
|
|
|
kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, \
|
|
default='rbf'
|
|
Specifies the kernel type to be used in the algorithm.
|
|
If none is given, 'rbf' will be used. If a callable is given it is
|
|
used to precompute the kernel matrix.
|
|
|
|
degree : int, default=3
|
|
Degree of the polynomial kernel function ('poly').
|
|
Must be non-negative. Ignored by all other kernels.
|
|
|
|
gamma : {'scale', 'auto'} or float, default='scale'
|
|
Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
|
|
|
|
- if ``gamma='scale'`` (default) is passed then it uses
|
|
1 / (n_features * X.var()) as value of gamma,
|
|
- if 'auto', uses 1 / n_features
|
|
- if float, must be non-negative.
|
|
|
|
.. versionchanged:: 0.22
|
|
The default value of ``gamma`` changed from 'auto' to 'scale'.
|
|
|
|
coef0 : float, default=0.0
|
|
Independent term in kernel function.
|
|
It is only significant in 'poly' and 'sigmoid'.
|
|
|
|
shrinking : bool, default=True
|
|
Whether to use the shrinking heuristic.
|
|
See the :ref:`User Guide <shrinking_svm>`.
|
|
|
|
probability : bool, default=False
|
|
Whether to enable probability estimates. This must be enabled prior
|
|
to calling `fit`, will slow down that method as it internally uses
|
|
5-fold cross-validation, and `predict_proba` may be inconsistent with
|
|
`predict`. Read more in the :ref:`User Guide <scores_probabilities>`.
|
|
|
|
tol : float, default=1e-3
|
|
Tolerance for stopping criterion.
|
|
|
|
cache_size : float, default=200
|
|
Specify the size of the kernel cache (in MB).
|
|
|
|
class_weight : {dict, 'balanced'}, default=None
|
|
Set the parameter C of class i to class_weight[i]*C for
|
|
SVC. If not given, all classes are supposed to have
|
|
weight one. The "balanced" mode uses the values of y to automatically
|
|
adjust weights inversely proportional to class frequencies as
|
|
``n_samples / (n_classes * np.bincount(y))``.
|
|
|
|
verbose : bool, default=False
|
|
Enable verbose output. Note that this setting takes advantage of a
|
|
per-process runtime setting in libsvm that, if enabled, may not work
|
|
properly in a multithreaded context.
|
|
|
|
max_iter : int, default=-1
|
|
Hard limit on iterations within solver, or -1 for no limit.
|
|
|
|
decision_function_shape : {'ovo', 'ovr'}, default='ovr'
|
|
Whether to return a one-vs-rest ('ovr') decision function of shape
|
|
(n_samples, n_classes) as all other classifiers, or the original
|
|
one-vs-one ('ovo') decision function of libsvm which has shape
|
|
(n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one
|
|
('ovo') is always used as multi-class strategy. The parameter is
|
|
ignored for binary classification.
|
|
|
|
.. versionchanged:: 0.19
|
|
decision_function_shape is 'ovr' by default.
|
|
|
|
.. versionadded:: 0.17
|
|
*decision_function_shape='ovr'* is recommended.
|
|
|
|
.. versionchanged:: 0.17
|
|
Deprecated *decision_function_shape='ovo' and None*.
|
|
|
|
break_ties : bool, default=False
|
|
If true, ``decision_function_shape='ovr'``, and number of classes > 2,
|
|
:term:`predict` will break ties according to the confidence values of
|
|
:term:`decision_function`; otherwise the first class among the tied
|
|
classes is returned. Please note that breaking ties comes at a
|
|
relatively high computational cost compared to a simple predict.
|
|
|
|
.. versionadded:: 0.22
|
|
|
|
random_state : int, RandomState instance or None, default=None
|
|
Controls the pseudo random number generation for shuffling the data for
|
|
probability estimates. Ignored when `probability` is False.
|
|
Pass an int for reproducible output across multiple function calls.
|
|
See :term:`Glossary <random_state>`.
|
|
|
|
Attributes
|
|
----------
|
|
class_weight_ : ndarray of shape (n_classes,)
|
|
Multipliers of parameter C of each class.
|
|
Computed based on the ``class_weight`` parameter.
|
|
|
|
classes_ : ndarray of shape (n_classes,)
|
|
The unique classes labels.
|
|
|
|
coef_ : ndarray of shape (n_classes * (n_classes -1) / 2, n_features)
|
|
Weights assigned to the features (coefficients in the primal
|
|
problem). This is only available in the case of a linear kernel.
|
|
|
|
`coef_` is readonly property derived from `dual_coef_` and
|
|
`support_vectors_`.
|
|
|
|
dual_coef_ : ndarray of shape (n_classes - 1, n_SV)
|
|
Dual coefficients of the support vector in the decision
|
|
function (see :ref:`sgd_mathematical_formulation`), multiplied by
|
|
their targets.
|
|
For multiclass, coefficient for all 1-vs-1 classifiers.
|
|
The layout of the coefficients in the multiclass case is somewhat
|
|
non-trivial. See the :ref:`multi-class section of the User Guide
|
|
<svm_multi_class>` for details.
|
|
|
|
fit_status_ : int
|
|
0 if correctly fitted, 1 if the algorithm did not converge.
|
|
|
|
intercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)
|
|
Constants in decision function.
|
|
|
|
n_features_in_ : int
|
|
Number of features seen during :term:`fit`.
|
|
|
|
.. versionadded:: 0.24
|
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
Names of features seen during :term:`fit`. Defined only when `X`
|
|
has feature names that are all strings.
|
|
|
|
.. versionadded:: 1.0
|
|
|
|
n_iter_ : ndarray of shape (n_classes * (n_classes - 1) // 2,)
|
|
Number of iterations run by the optimization routine to fit the model.
|
|
The shape of this attribute depends on the number of models optimized
|
|
which in turn depends on the number of classes.
|
|
|
|
.. versionadded:: 1.1
|
|
|
|
support_ : ndarray of shape (n_SV,)
|
|
Indices of support vectors.
|
|
|
|
support_vectors_ : ndarray of shape (n_SV, n_features)
|
|
Support vectors.
|
|
|
|
n_support_ : ndarray of shape (n_classes,), dtype=int32
|
|
Number of support vectors for each class.
|
|
|
|
fit_status_ : int
|
|
0 if correctly fitted, 1 if the algorithm did not converge.
|
|
|
|
probA_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)
|
|
probB_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)
|
|
If `probability=True`, it corresponds to the parameters learned in
|
|
Platt scaling to produce probability estimates from decision values.
|
|
If `probability=False`, it's an empty array. Platt scaling uses the
|
|
logistic function
|
|
``1 / (1 + exp(decision_value * probA_ + probB_))``
|
|
where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For
|
|
more information on the multiclass case and training procedure see
|
|
section 8 of [1]_.
|
|
|
|
shape_fit_ : tuple of int of shape (n_dimensions_of_X,)
|
|
Array dimensions of training vector ``X``.
|
|
|
|
See Also
|
|
--------
|
|
SVC : Support Vector Machine for classification using libsvm.
|
|
|
|
LinearSVC : Scalable linear Support Vector Machine for classification using
|
|
liblinear.
|
|
|
|
References
|
|
----------
|
|
.. [1] `LIBSVM: A Library for Support Vector Machines
|
|
<http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_
|
|
|
|
.. [2] `Platt, John (1999). "Probabilistic Outputs for Support Vector
|
|
Machines and Comparisons to Regularized Likelihood Methods"
|
|
<https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_
|
|
|
|
Examples
|
|
--------
|
|
>>> import numpy as np
|
|
>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
|
|
>>> y = np.array([1, 1, 2, 2])
|
|
>>> from sklearn.pipeline import make_pipeline
|
|
>>> from sklearn.preprocessing import StandardScaler
|
|
>>> from sklearn.svm import NuSVC
|
|
>>> clf = make_pipeline(StandardScaler(), NuSVC())
|
|
>>> clf.fit(X, y)
|
|
Pipeline(steps=[('standardscaler', StandardScaler()), ('nusvc', NuSVC())])
|
|
>>> print(clf.predict([[-0.8, -1]]))
|
|
[1]
|
|
"""
|
|
|
|
_impl = "nu_svc"
|
|
|
|
_parameter_constraints: dict = {
|
|
**BaseSVC._parameter_constraints,
|
|
"nu": [Interval(Real, 0.0, 1.0, closed="right")],
|
|
}
|
|
_parameter_constraints.pop("C")
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
nu=0.5,
|
|
kernel="rbf",
|
|
degree=3,
|
|
gamma="scale",
|
|
coef0=0.0,
|
|
shrinking=True,
|
|
probability=False,
|
|
tol=1e-3,
|
|
cache_size=200,
|
|
class_weight=None,
|
|
verbose=False,
|
|
max_iter=-1,
|
|
decision_function_shape="ovr",
|
|
break_ties=False,
|
|
random_state=None,
|
|
):
|
|
|
|
super().__init__(
|
|
kernel=kernel,
|
|
degree=degree,
|
|
gamma=gamma,
|
|
coef0=coef0,
|
|
tol=tol,
|
|
C=0.0,
|
|
nu=nu,
|
|
shrinking=shrinking,
|
|
probability=probability,
|
|
cache_size=cache_size,
|
|
class_weight=class_weight,
|
|
verbose=verbose,
|
|
max_iter=max_iter,
|
|
decision_function_shape=decision_function_shape,
|
|
break_ties=break_ties,
|
|
random_state=random_state,
|
|
)
|
|
|
|
def _more_tags(self):
|
|
return {
|
|
"_xfail_checks": {
|
|
"check_methods_subset_invariance": (
|
|
"fails for the decision_function method"
|
|
),
|
|
"check_class_weight_classifiers": "class_weight is ignored.",
|
|
"check_sample_weights_invariance": (
|
|
"zero sample_weight is not equivalent to removing samples"
|
|
),
|
|
"check_classifiers_one_label_sample_weights": (
|
|
"specified nu is infeasible for the fit."
|
|
),
|
|
}
|
|
}
|
|
|
|
|
|
class SVR(RegressorMixin, BaseLibSVM):
|
|
"""Epsilon-Support Vector Regression.
|
|
|
|
The free parameters in the model are C and epsilon.
|
|
|
|
The implementation is based on libsvm. The fit time complexity
|
|
is more than quadratic with the number of samples which makes it hard
|
|
to scale to datasets with more than a couple of 10000 samples. For large
|
|
datasets consider using :class:`~sklearn.svm.LinearSVR` or
|
|
:class:`~sklearn.linear_model.SGDRegressor` instead, possibly after a
|
|
:class:`~sklearn.kernel_approximation.Nystroem` transformer or
|
|
other :ref:`kernel_approximation`.
|
|
|
|
Read more in the :ref:`User Guide <svm_regression>`.
|
|
|
|
Parameters
|
|
----------
|
|
kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, \
|
|
default='rbf'
|
|
Specifies the kernel type to be used in the algorithm.
|
|
If none is given, 'rbf' will be used. If a callable is given it is
|
|
used to precompute the kernel matrix.
|
|
|
|
degree : int, default=3
|
|
Degree of the polynomial kernel function ('poly').
|
|
Must be non-negative. Ignored by all other kernels.
|
|
|
|
gamma : {'scale', 'auto'} or float, default='scale'
|
|
Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
|
|
|
|
- if ``gamma='scale'`` (default) is passed then it uses
|
|
1 / (n_features * X.var()) as value of gamma,
|
|
- if 'auto', uses 1 / n_features
|
|
- if float, must be non-negative.
|
|
|
|
.. versionchanged:: 0.22
|
|
The default value of ``gamma`` changed from 'auto' to 'scale'.
|
|
|
|
coef0 : float, default=0.0
|
|
Independent term in kernel function.
|
|
It is only significant in 'poly' and 'sigmoid'.
|
|
|
|
tol : float, default=1e-3
|
|
Tolerance for stopping criterion.
|
|
|
|
C : float, default=1.0
|
|
Regularization parameter. The strength of the regularization is
|
|
inversely proportional to C. Must be strictly positive.
|
|
The penalty is a squared l2 penalty.
|
|
|
|
epsilon : float, default=0.1
|
|
Epsilon in the epsilon-SVR model. It specifies the epsilon-tube
|
|
within which no penalty is associated in the training loss function
|
|
with points predicted within a distance epsilon from the actual
|
|
value. Must be non-negative.
|
|
|
|
shrinking : bool, default=True
|
|
Whether to use the shrinking heuristic.
|
|
See the :ref:`User Guide <shrinking_svm>`.
|
|
|
|
cache_size : float, default=200
|
|
Specify the size of the kernel cache (in MB).
|
|
|
|
verbose : bool, default=False
|
|
Enable verbose output. Note that this setting takes advantage of a
|
|
per-process runtime setting in libsvm that, if enabled, may not work
|
|
properly in a multithreaded context.
|
|
|
|
max_iter : int, default=-1
|
|
Hard limit on iterations within solver, or -1 for no limit.
|
|
|
|
Attributes
|
|
----------
|
|
class_weight_ : ndarray of shape (n_classes,)
|
|
Multipliers of parameter C for each class.
|
|
Computed based on the ``class_weight`` parameter.
|
|
|
|
.. deprecated:: 1.2
|
|
`class_weight_` was deprecated in version 1.2 and will be removed in 1.4.
|
|
|
|
coef_ : ndarray of shape (1, n_features)
|
|
Weights assigned to the features (coefficients in the primal
|
|
problem). This is only available in the case of a linear kernel.
|
|
|
|
`coef_` is readonly property derived from `dual_coef_` and
|
|
`support_vectors_`.
|
|
|
|
dual_coef_ : ndarray of shape (1, n_SV)
|
|
Coefficients of the support vector in the decision function.
|
|
|
|
fit_status_ : int
|
|
0 if correctly fitted, 1 otherwise (will raise warning)
|
|
|
|
intercept_ : ndarray of shape (1,)
|
|
Constants in decision function.
|
|
|
|
n_features_in_ : int
|
|
Number of features seen during :term:`fit`.
|
|
|
|
.. versionadded:: 0.24
|
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
Names of features seen during :term:`fit`. Defined only when `X`
|
|
has feature names that are all strings.
|
|
|
|
.. versionadded:: 1.0
|
|
|
|
n_iter_ : int
|
|
Number of iterations run by the optimization routine to fit the model.
|
|
|
|
.. versionadded:: 1.1
|
|
|
|
n_support_ : ndarray of shape (1,), dtype=int32
|
|
Number of support vectors.
|
|
|
|
shape_fit_ : tuple of int of shape (n_dimensions_of_X,)
|
|
Array dimensions of training vector ``X``.
|
|
|
|
support_ : ndarray of shape (n_SV,)
|
|
Indices of support vectors.
|
|
|
|
support_vectors_ : ndarray of shape (n_SV, n_features)
|
|
Support vectors.
|
|
|
|
See Also
|
|
--------
|
|
NuSVR : Support Vector Machine for regression implemented using libsvm
|
|
using a parameter to control the number of support vectors.
|
|
|
|
LinearSVR : Scalable Linear Support Vector Machine for regression
|
|
implemented using liblinear.
|
|
|
|
References
|
|
----------
|
|
.. [1] `LIBSVM: A Library for Support Vector Machines
|
|
<http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_
|
|
|
|
.. [2] `Platt, John (1999). "Probabilistic Outputs for Support Vector
|
|
Machines and Comparisons to Regularized Likelihood Methods"
|
|
<https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.svm import SVR
|
|
>>> from sklearn.pipeline import make_pipeline
|
|
>>> from sklearn.preprocessing import StandardScaler
|
|
>>> import numpy as np
|
|
>>> n_samples, n_features = 10, 5
|
|
>>> rng = np.random.RandomState(0)
|
|
>>> y = rng.randn(n_samples)
|
|
>>> X = rng.randn(n_samples, n_features)
|
|
>>> regr = make_pipeline(StandardScaler(), SVR(C=1.0, epsilon=0.2))
|
|
>>> regr.fit(X, y)
|
|
Pipeline(steps=[('standardscaler', StandardScaler()),
|
|
('svr', SVR(epsilon=0.2))])
|
|
"""
|
|
|
|
_impl = "epsilon_svr"
|
|
|
|
_parameter_constraints: dict = {**BaseLibSVM._parameter_constraints}
|
|
for unused_param in ["class_weight", "nu", "probability", "random_state"]:
|
|
_parameter_constraints.pop(unused_param)
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
kernel="rbf",
|
|
degree=3,
|
|
gamma="scale",
|
|
coef0=0.0,
|
|
tol=1e-3,
|
|
C=1.0,
|
|
epsilon=0.1,
|
|
shrinking=True,
|
|
cache_size=200,
|
|
verbose=False,
|
|
max_iter=-1,
|
|
):
|
|
|
|
super().__init__(
|
|
kernel=kernel,
|
|
degree=degree,
|
|
gamma=gamma,
|
|
coef0=coef0,
|
|
tol=tol,
|
|
C=C,
|
|
nu=0.0,
|
|
epsilon=epsilon,
|
|
verbose=verbose,
|
|
shrinking=shrinking,
|
|
probability=False,
|
|
cache_size=cache_size,
|
|
class_weight=None,
|
|
max_iter=max_iter,
|
|
random_state=None,
|
|
)
|
|
|
|
# TODO(1.4): Remove
|
|
@deprecated( # type: ignore
|
|
"Attribute `class_weight_` was deprecated in version 1.2 and will be removed in"
|
|
" 1.4."
|
|
)
|
|
@property
|
|
def class_weight_(self):
|
|
return np.empty(0)
|
|
|
|
def _more_tags(self):
|
|
return {
|
|
"_xfail_checks": {
|
|
"check_sample_weights_invariance": (
|
|
"zero sample_weight is not equivalent to removing samples"
|
|
),
|
|
}
|
|
}
|
|
|
|
|
|
class NuSVR(RegressorMixin, BaseLibSVM):
|
|
"""Nu Support Vector Regression.
|
|
|
|
Similar to NuSVC, for regression, uses a parameter nu to control
|
|
the number of support vectors. However, unlike NuSVC, where nu
|
|
replaces C, here nu replaces the parameter epsilon of epsilon-SVR.
|
|
|
|
The implementation is based on libsvm.
|
|
|
|
Read more in the :ref:`User Guide <svm_regression>`.
|
|
|
|
Parameters
|
|
----------
|
|
nu : float, default=0.5
|
|
An upper bound on the fraction of training errors and a lower bound of
|
|
the fraction of support vectors. Should be in the interval (0, 1]. By
|
|
default 0.5 will be taken.
|
|
|
|
C : float, default=1.0
|
|
Penalty parameter C of the error term.
|
|
|
|
kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, \
|
|
default='rbf'
|
|
Specifies the kernel type to be used in the algorithm.
|
|
If none is given, 'rbf' will be used. If a callable is given it is
|
|
used to precompute the kernel matrix.
|
|
|
|
degree : int, default=3
|
|
Degree of the polynomial kernel function ('poly').
|
|
Must be non-negative. Ignored by all other kernels.
|
|
|
|
gamma : {'scale', 'auto'} or float, default='scale'
|
|
Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
|
|
|
|
- if ``gamma='scale'`` (default) is passed then it uses
|
|
1 / (n_features * X.var()) as value of gamma,
|
|
- if 'auto', uses 1 / n_features
|
|
- if float, must be non-negative.
|
|
|
|
.. versionchanged:: 0.22
|
|
The default value of ``gamma`` changed from 'auto' to 'scale'.
|
|
|
|
coef0 : float, default=0.0
|
|
Independent term in kernel function.
|
|
It is only significant in 'poly' and 'sigmoid'.
|
|
|
|
shrinking : bool, default=True
|
|
Whether to use the shrinking heuristic.
|
|
See the :ref:`User Guide <shrinking_svm>`.
|
|
|
|
tol : float, default=1e-3
|
|
Tolerance for stopping criterion.
|
|
|
|
cache_size : float, default=200
|
|
Specify the size of the kernel cache (in MB).
|
|
|
|
verbose : bool, default=False
|
|
Enable verbose output. Note that this setting takes advantage of a
|
|
per-process runtime setting in libsvm that, if enabled, may not work
|
|
properly in a multithreaded context.
|
|
|
|
max_iter : int, default=-1
|
|
Hard limit on iterations within solver, or -1 for no limit.
|
|
|
|
Attributes
|
|
----------
|
|
class_weight_ : ndarray of shape (n_classes,)
|
|
Multipliers of parameter C for each class.
|
|
Computed based on the ``class_weight`` parameter.
|
|
|
|
.. deprecated:: 1.2
|
|
`class_weight_` was deprecated in version 1.2 and will be removed in 1.4.
|
|
|
|
coef_ : ndarray of shape (1, n_features)
|
|
Weights assigned to the features (coefficients in the primal
|
|
problem). This is only available in the case of a linear kernel.
|
|
|
|
`coef_` is readonly property derived from `dual_coef_` and
|
|
`support_vectors_`.
|
|
|
|
dual_coef_ : ndarray of shape (1, n_SV)
|
|
Coefficients of the support vector in the decision function.
|
|
|
|
fit_status_ : int
|
|
0 if correctly fitted, 1 otherwise (will raise warning)
|
|
|
|
intercept_ : ndarray of shape (1,)
|
|
Constants in decision function.
|
|
|
|
n_features_in_ : int
|
|
Number of features seen during :term:`fit`.
|
|
|
|
.. versionadded:: 0.24
|
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
Names of features seen during :term:`fit`. Defined only when `X`
|
|
has feature names that are all strings.
|
|
|
|
.. versionadded:: 1.0
|
|
|
|
n_iter_ : int
|
|
Number of iterations run by the optimization routine to fit the model.
|
|
|
|
.. versionadded:: 1.1
|
|
|
|
n_support_ : ndarray of shape (1,), dtype=int32
|
|
Number of support vectors.
|
|
|
|
shape_fit_ : tuple of int of shape (n_dimensions_of_X,)
|
|
Array dimensions of training vector ``X``.
|
|
|
|
support_ : ndarray of shape (n_SV,)
|
|
Indices of support vectors.
|
|
|
|
support_vectors_ : ndarray of shape (n_SV, n_features)
|
|
Support vectors.
|
|
|
|
See Also
|
|
--------
|
|
NuSVC : Support Vector Machine for classification implemented with libsvm
|
|
with a parameter to control the number of support vectors.
|
|
|
|
SVR : Epsilon Support Vector Machine for regression implemented with
|
|
libsvm.
|
|
|
|
References
|
|
----------
|
|
.. [1] `LIBSVM: A Library for Support Vector Machines
|
|
<http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_
|
|
|
|
.. [2] `Platt, John (1999). "Probabilistic Outputs for Support Vector
|
|
Machines and Comparisons to Regularized Likelihood Methods"
|
|
<https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.svm import NuSVR
|
|
>>> from sklearn.pipeline import make_pipeline
|
|
>>> from sklearn.preprocessing import StandardScaler
|
|
>>> import numpy as np
|
|
>>> n_samples, n_features = 10, 5
|
|
>>> np.random.seed(0)
|
|
>>> y = np.random.randn(n_samples)
|
|
>>> X = np.random.randn(n_samples, n_features)
|
|
>>> regr = make_pipeline(StandardScaler(), NuSVR(C=1.0, nu=0.1))
|
|
>>> regr.fit(X, y)
|
|
Pipeline(steps=[('standardscaler', StandardScaler()),
|
|
('nusvr', NuSVR(nu=0.1))])
|
|
"""
|
|
|
|
_impl = "nu_svr"
|
|
|
|
_parameter_constraints: dict = {**BaseLibSVM._parameter_constraints}
|
|
for unused_param in ["class_weight", "epsilon", "probability", "random_state"]:
|
|
_parameter_constraints.pop(unused_param)
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
nu=0.5,
|
|
C=1.0,
|
|
kernel="rbf",
|
|
degree=3,
|
|
gamma="scale",
|
|
coef0=0.0,
|
|
shrinking=True,
|
|
tol=1e-3,
|
|
cache_size=200,
|
|
verbose=False,
|
|
max_iter=-1,
|
|
):
|
|
|
|
super().__init__(
|
|
kernel=kernel,
|
|
degree=degree,
|
|
gamma=gamma,
|
|
coef0=coef0,
|
|
tol=tol,
|
|
C=C,
|
|
nu=nu,
|
|
epsilon=0.0,
|
|
shrinking=shrinking,
|
|
probability=False,
|
|
cache_size=cache_size,
|
|
class_weight=None,
|
|
verbose=verbose,
|
|
max_iter=max_iter,
|
|
random_state=None,
|
|
)
|
|
|
|
# TODO(1.4): Remove
|
|
@deprecated( # type: ignore
|
|
"Attribute `class_weight_` was deprecated in version 1.2 and will be removed in"
|
|
" 1.4."
|
|
)
|
|
@property
|
|
def class_weight_(self):
|
|
return np.empty(0)
|
|
|
|
def _more_tags(self):
|
|
return {
|
|
"_xfail_checks": {
|
|
"check_sample_weights_invariance": (
|
|
"zero sample_weight is not equivalent to removing samples"
|
|
),
|
|
}
|
|
}
|
|
|
|
|
|
class OneClassSVM(OutlierMixin, BaseLibSVM):
|
|
"""Unsupervised Outlier Detection.
|
|
|
|
Estimate the support of a high-dimensional distribution.
|
|
|
|
The implementation is based on libsvm.
|
|
|
|
Read more in the :ref:`User Guide <outlier_detection>`.
|
|
|
|
Parameters
|
|
----------
|
|
kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, \
|
|
default='rbf'
|
|
Specifies the kernel type to be used in the algorithm.
|
|
If none is given, 'rbf' will be used. If a callable is given it is
|
|
used to precompute the kernel matrix.
|
|
|
|
degree : int, default=3
|
|
Degree of the polynomial kernel function ('poly').
|
|
Must be non-negative. Ignored by all other kernels.
|
|
|
|
gamma : {'scale', 'auto'} or float, default='scale'
|
|
Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
|
|
|
|
- if ``gamma='scale'`` (default) is passed then it uses
|
|
1 / (n_features * X.var()) as value of gamma,
|
|
- if 'auto', uses 1 / n_features
|
|
- if float, must be non-negative.
|
|
|
|
.. versionchanged:: 0.22
|
|
The default value of ``gamma`` changed from 'auto' to 'scale'.
|
|
|
|
coef0 : float, default=0.0
|
|
Independent term in kernel function.
|
|
It is only significant in 'poly' and 'sigmoid'.
|
|
|
|
tol : float, default=1e-3
|
|
Tolerance for stopping criterion.
|
|
|
|
nu : float, default=0.5
|
|
An upper bound on the fraction of training
|
|
errors and a lower bound of the fraction of support
|
|
vectors. Should be in the interval (0, 1]. By default 0.5
|
|
will be taken.
|
|
|
|
shrinking : bool, default=True
|
|
Whether to use the shrinking heuristic.
|
|
See the :ref:`User Guide <shrinking_svm>`.
|
|
|
|
cache_size : float, default=200
|
|
Specify the size of the kernel cache (in MB).
|
|
|
|
verbose : bool, default=False
|
|
Enable verbose output. Note that this setting takes advantage of a
|
|
per-process runtime setting in libsvm that, if enabled, may not work
|
|
properly in a multithreaded context.
|
|
|
|
max_iter : int, default=-1
|
|
Hard limit on iterations within solver, or -1 for no limit.
|
|
|
|
Attributes
|
|
----------
|
|
class_weight_ : ndarray of shape (n_classes,)
|
|
Multipliers of parameter C for each class.
|
|
Computed based on the ``class_weight`` parameter.
|
|
|
|
.. deprecated:: 1.2
|
|
`class_weight_` was deprecated in version 1.2 and will be removed in 1.4.
|
|
|
|
coef_ : ndarray of shape (1, n_features)
|
|
Weights assigned to the features (coefficients in the primal
|
|
problem). This is only available in the case of a linear kernel.
|
|
|
|
`coef_` is readonly property derived from `dual_coef_` and
|
|
`support_vectors_`.
|
|
|
|
dual_coef_ : ndarray of shape (1, n_SV)
|
|
Coefficients of the support vectors in the decision function.
|
|
|
|
fit_status_ : int
|
|
0 if correctly fitted, 1 otherwise (will raise warning)
|
|
|
|
intercept_ : ndarray of shape (1,)
|
|
Constant in the decision function.
|
|
|
|
n_features_in_ : int
|
|
Number of features seen during :term:`fit`.
|
|
|
|
.. versionadded:: 0.24
|
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
|
Names of features seen during :term:`fit`. Defined only when `X`
|
|
has feature names that are all strings.
|
|
|
|
.. versionadded:: 1.0
|
|
|
|
n_iter_ : int
|
|
Number of iterations run by the optimization routine to fit the model.
|
|
|
|
.. versionadded:: 1.1
|
|
|
|
n_support_ : ndarray of shape (n_classes,), dtype=int32
|
|
Number of support vectors for each class.
|
|
|
|
offset_ : float
|
|
Offset used to define the decision function from the raw scores.
|
|
We have the relation: decision_function = score_samples - `offset_`.
|
|
The offset is the opposite of `intercept_` and is provided for
|
|
consistency with other outlier detection algorithms.
|
|
|
|
.. versionadded:: 0.20
|
|
|
|
shape_fit_ : tuple of int of shape (n_dimensions_of_X,)
|
|
Array dimensions of training vector ``X``.
|
|
|
|
support_ : ndarray of shape (n_SV,)
|
|
Indices of support vectors.
|
|
|
|
support_vectors_ : ndarray of shape (n_SV, n_features)
|
|
Support vectors.
|
|
|
|
See Also
|
|
--------
|
|
sklearn.linear_model.SGDOneClassSVM : Solves linear One-Class SVM using
|
|
Stochastic Gradient Descent.
|
|
sklearn.neighbors.LocalOutlierFactor : Unsupervised Outlier Detection using
|
|
Local Outlier Factor (LOF).
|
|
sklearn.ensemble.IsolationForest : Isolation Forest Algorithm.
|
|
|
|
Examples
|
|
--------
|
|
>>> from sklearn.svm import OneClassSVM
|
|
>>> X = [[0], [0.44], [0.45], [0.46], [1]]
|
|
>>> clf = OneClassSVM(gamma='auto').fit(X)
|
|
>>> clf.predict(X)
|
|
array([-1, 1, 1, 1, -1])
|
|
>>> clf.score_samples(X)
|
|
array([1.7798..., 2.0547..., 2.0556..., 2.0561..., 1.7332...])
|
|
"""
|
|
|
|
_impl = "one_class"
|
|
|
|
_parameter_constraints: dict = {**BaseLibSVM._parameter_constraints}
|
|
for unused_param in ["C", "class_weight", "epsilon", "probability", "random_state"]:
|
|
_parameter_constraints.pop(unused_param)
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
kernel="rbf",
|
|
degree=3,
|
|
gamma="scale",
|
|
coef0=0.0,
|
|
tol=1e-3,
|
|
nu=0.5,
|
|
shrinking=True,
|
|
cache_size=200,
|
|
verbose=False,
|
|
max_iter=-1,
|
|
):
|
|
|
|
super().__init__(
|
|
kernel,
|
|
degree,
|
|
gamma,
|
|
coef0,
|
|
tol,
|
|
0.0,
|
|
nu,
|
|
0.0,
|
|
shrinking,
|
|
False,
|
|
cache_size,
|
|
None,
|
|
verbose,
|
|
max_iter,
|
|
random_state=None,
|
|
)
|
|
|
|
# TODO(1.4): Remove
|
|
@deprecated( # type: ignore
|
|
"Attribute `class_weight_` was deprecated in version 1.2 and will be removed in"
|
|
" 1.4."
|
|
)
|
|
@property
|
|
def class_weight_(self):
|
|
return np.empty(0)
|
|
|
|
def fit(self, X, y=None, sample_weight=None):
|
|
"""Detect the soft boundary of the set of samples X.
|
|
|
|
Parameters
|
|
----------
|
|
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
|
Set of samples, where `n_samples` is the number of samples and
|
|
`n_features` is the number of features.
|
|
|
|
y : Ignored
|
|
Not used, present for API consistency by convention.
|
|
|
|
sample_weight : array-like of shape (n_samples,), default=None
|
|
Per-sample weights. Rescale C per sample. Higher weights
|
|
force the classifier to put more emphasis on these points.
|
|
|
|
Returns
|
|
-------
|
|
self : object
|
|
Fitted estimator.
|
|
|
|
Notes
|
|
-----
|
|
If X is not a C-ordered contiguous array it is copied.
|
|
"""
|
|
super().fit(X, np.ones(_num_samples(X)), sample_weight=sample_weight)
|
|
self.offset_ = -self._intercept_
|
|
return self
|
|
|
|
def decision_function(self, X):
|
|
"""Signed distance to the separating hyperplane.
|
|
|
|
Signed distance is positive for an inlier and negative for an outlier.
|
|
|
|
Parameters
|
|
----------
|
|
X : array-like of shape (n_samples, n_features)
|
|
The data matrix.
|
|
|
|
Returns
|
|
-------
|
|
dec : ndarray of shape (n_samples,)
|
|
Returns the decision function of the samples.
|
|
"""
|
|
dec = self._decision_function(X).ravel()
|
|
return dec
|
|
|
|
def score_samples(self, X):
|
|
"""Raw scoring function of the samples.
|
|
|
|
Parameters
|
|
----------
|
|
X : array-like of shape (n_samples, n_features)
|
|
The data matrix.
|
|
|
|
Returns
|
|
-------
|
|
score_samples : ndarray of shape (n_samples,)
|
|
Returns the (unshifted) scoring function of the samples.
|
|
"""
|
|
return self.decision_function(X) + self.offset_
|
|
|
|
def predict(self, X):
|
|
"""Perform classification on samples in X.
|
|
|
|
For a one-class model, +1 or -1 is returned.
|
|
|
|
Parameters
|
|
----------
|
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) or \
|
|
(n_samples_test, n_samples_train)
|
|
For kernel="precomputed", the expected shape of X is
|
|
(n_samples_test, n_samples_train).
|
|
|
|
Returns
|
|
-------
|
|
y_pred : ndarray of shape (n_samples,)
|
|
Class labels for samples in X.
|
|
"""
|
|
y = super().predict(X)
|
|
return np.asarray(y, dtype=np.intp)
|
|
|
|
def _more_tags(self):
|
|
return {
|
|
"_xfail_checks": {
|
|
"check_sample_weights_invariance": (
|
|
"zero sample_weight is not equivalent to removing samples"
|
|
),
|
|
}
|
|
}
|