817 lines
29 KiB
Python
817 lines
29 KiB
Python
|
# Authors: Alexandre Gramfort <alexandre.gramfort@inria.fr>
|
||
|
# Vincent Michel <vincent.michel@inria.fr>
|
||
|
# Gilles Louppe <g.louppe@gmail.com>
|
||
|
#
|
||
|
# License: BSD 3 clause
|
||
|
|
||
|
"""Recursive feature elimination for feature ranking"""
|
||
|
|
||
|
import warnings
|
||
|
from numbers import Integral
|
||
|
|
||
|
import numpy as np
|
||
|
from joblib import effective_n_jobs
|
||
|
|
||
|
from ..base import BaseEstimator, MetaEstimatorMixin, _fit_context, clone, is_classifier
|
||
|
from ..metrics import check_scoring
|
||
|
from ..model_selection import check_cv
|
||
|
from ..model_selection._validation import _score
|
||
|
from ..utils._param_validation import HasMethods, Interval, RealNotInt
|
||
|
from ..utils.metadata_routing import (
|
||
|
_raise_for_unsupported_routing,
|
||
|
_RoutingNotSupportedMixin,
|
||
|
)
|
||
|
from ..utils.metaestimators import _safe_split, available_if
|
||
|
from ..utils.parallel import Parallel, delayed
|
||
|
from ..utils.validation import check_is_fitted
|
||
|
from ._base import SelectorMixin, _get_feature_importances
|
||
|
|
||
|
|
||
|
def _rfe_single_fit(rfe, estimator, X, y, train, test, scorer):
|
||
|
"""
|
||
|
Return the score and n_features per step for a fit across one fold.
|
||
|
"""
|
||
|
X_train, y_train = _safe_split(estimator, X, y, train)
|
||
|
X_test, y_test = _safe_split(estimator, X, y, test, train)
|
||
|
|
||
|
rfe._fit(
|
||
|
X_train,
|
||
|
y_train,
|
||
|
lambda estimator, features: _score(
|
||
|
# TODO(SLEP6): pass score_params here
|
||
|
estimator,
|
||
|
X_test[:, features],
|
||
|
y_test,
|
||
|
scorer,
|
||
|
score_params=None,
|
||
|
),
|
||
|
)
|
||
|
|
||
|
return rfe.step_scores_, rfe.step_n_features_
|
||
|
|
||
|
|
||
|
def _estimator_has(attr):
|
||
|
"""Check if we can delegate a method to the underlying estimator.
|
||
|
|
||
|
First, we check the fitted `estimator_` if available, otherwise we check the
|
||
|
unfitted `estimator`. We raise the original `AttributeError` if `attr` does
|
||
|
not exist. This function is used together with `available_if`.
|
||
|
"""
|
||
|
|
||
|
def check(self):
|
||
|
if hasattr(self, "estimator_"):
|
||
|
getattr(self.estimator_, attr)
|
||
|
else:
|
||
|
getattr(self.estimator, attr)
|
||
|
|
||
|
return True
|
||
|
|
||
|
return check
|
||
|
|
||
|
|
||
|
class RFE(_RoutingNotSupportedMixin, SelectorMixin, MetaEstimatorMixin, BaseEstimator):
|
||
|
"""Feature ranking with recursive feature elimination.
|
||
|
|
||
|
Given an external estimator that assigns weights to features (e.g., the
|
||
|
coefficients of a linear model), the goal of recursive feature elimination
|
||
|
(RFE) is to select features by recursively considering smaller and smaller
|
||
|
sets of features. First, the estimator is trained on the initial set of
|
||
|
features and the importance of each feature is obtained either through
|
||
|
any specific attribute or callable.
|
||
|
Then, the least important features are pruned from current set of features.
|
||
|
That procedure is recursively repeated on the pruned set until the desired
|
||
|
number of features to select is eventually reached.
|
||
|
|
||
|
Read more in the :ref:`User Guide <rfe>`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
estimator : ``Estimator`` instance
|
||
|
A supervised learning estimator with a ``fit`` method that provides
|
||
|
information about feature importance
|
||
|
(e.g. `coef_`, `feature_importances_`).
|
||
|
|
||
|
n_features_to_select : int or float, default=None
|
||
|
The number of features to select. If `None`, half of the features are
|
||
|
selected. If integer, the parameter is the absolute number of features
|
||
|
to select. If float between 0 and 1, it is the fraction of features to
|
||
|
select.
|
||
|
|
||
|
.. versionchanged:: 0.24
|
||
|
Added float values for fractions.
|
||
|
|
||
|
step : int or float, default=1
|
||
|
If greater than or equal to 1, then ``step`` corresponds to the
|
||
|
(integer) number of features to remove at each iteration.
|
||
|
If within (0.0, 1.0), then ``step`` corresponds to the percentage
|
||
|
(rounded down) of features to remove at each iteration.
|
||
|
|
||
|
verbose : int, default=0
|
||
|
Controls verbosity of output.
|
||
|
|
||
|
importance_getter : str or callable, default='auto'
|
||
|
If 'auto', uses the feature importance either through a `coef_`
|
||
|
or `feature_importances_` attributes of estimator.
|
||
|
|
||
|
Also accepts a string that specifies an attribute name/path
|
||
|
for extracting feature importance (implemented with `attrgetter`).
|
||
|
For example, give `regressor_.coef_` in case of
|
||
|
:class:`~sklearn.compose.TransformedTargetRegressor` or
|
||
|
`named_steps.clf.feature_importances_` in case of
|
||
|
class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.
|
||
|
|
||
|
If `callable`, overrides the default feature importance getter.
|
||
|
The callable is passed with the fitted estimator and it should
|
||
|
return importance for each feature.
|
||
|
|
||
|
.. versionadded:: 0.24
|
||
|
|
||
|
Attributes
|
||
|
----------
|
||
|
classes_ : ndarray of shape (n_classes,)
|
||
|
The classes labels. Only available when `estimator` is a classifier.
|
||
|
|
||
|
estimator_ : ``Estimator`` instance
|
||
|
The fitted estimator used to select features.
|
||
|
|
||
|
n_features_ : int
|
||
|
The number of selected features.
|
||
|
|
||
|
n_features_in_ : int
|
||
|
Number of features seen during :term:`fit`. Only defined if the
|
||
|
underlying estimator exposes such an attribute when fit.
|
||
|
|
||
|
.. versionadded:: 0.24
|
||
|
|
||
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
||
|
Names of features seen during :term:`fit`. Defined only when `X`
|
||
|
has feature names that are all strings.
|
||
|
|
||
|
.. versionadded:: 1.0
|
||
|
|
||
|
ranking_ : ndarray of shape (n_features,)
|
||
|
The feature ranking, such that ``ranking_[i]`` corresponds to the
|
||
|
ranking position of the i-th feature. Selected (i.e., estimated
|
||
|
best) features are assigned rank 1.
|
||
|
|
||
|
support_ : ndarray of shape (n_features,)
|
||
|
The mask of selected features.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
RFECV : Recursive feature elimination with built-in cross-validated
|
||
|
selection of the best number of features.
|
||
|
SelectFromModel : Feature selection based on thresholds of importance
|
||
|
weights.
|
||
|
SequentialFeatureSelector : Sequential cross-validation based feature
|
||
|
selection. Does not rely on importance weights.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
Allows NaN/Inf in the input if the underlying estimator does as well.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
|
||
|
.. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., "Gene selection
|
||
|
for cancer classification using support vector machines",
|
||
|
Mach. Learn., 46(1-3), 389--422, 2002.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
The following example shows how to retrieve the 5 most informative
|
||
|
features in the Friedman #1 dataset.
|
||
|
|
||
|
>>> from sklearn.datasets import make_friedman1
|
||
|
>>> from sklearn.feature_selection import RFE
|
||
|
>>> from sklearn.svm import SVR
|
||
|
>>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
|
||
|
>>> estimator = SVR(kernel="linear")
|
||
|
>>> selector = RFE(estimator, n_features_to_select=5, step=1)
|
||
|
>>> selector = selector.fit(X, y)
|
||
|
>>> selector.support_
|
||
|
array([ True, True, True, True, True, False, False, False, False,
|
||
|
False])
|
||
|
>>> selector.ranking_
|
||
|
array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])
|
||
|
"""
|
||
|
|
||
|
_parameter_constraints: dict = {
|
||
|
"estimator": [HasMethods(["fit"])],
|
||
|
"n_features_to_select": [
|
||
|
None,
|
||
|
Interval(RealNotInt, 0, 1, closed="right"),
|
||
|
Interval(Integral, 0, None, closed="neither"),
|
||
|
],
|
||
|
"step": [
|
||
|
Interval(Integral, 0, None, closed="neither"),
|
||
|
Interval(RealNotInt, 0, 1, closed="neither"),
|
||
|
],
|
||
|
"verbose": ["verbose"],
|
||
|
"importance_getter": [str, callable],
|
||
|
}
|
||
|
|
||
|
def __init__(
|
||
|
self,
|
||
|
estimator,
|
||
|
*,
|
||
|
n_features_to_select=None,
|
||
|
step=1,
|
||
|
verbose=0,
|
||
|
importance_getter="auto",
|
||
|
):
|
||
|
self.estimator = estimator
|
||
|
self.n_features_to_select = n_features_to_select
|
||
|
self.step = step
|
||
|
self.importance_getter = importance_getter
|
||
|
self.verbose = verbose
|
||
|
|
||
|
@property
|
||
|
def _estimator_type(self):
|
||
|
return self.estimator._estimator_type
|
||
|
|
||
|
@property
|
||
|
def classes_(self):
|
||
|
"""Classes labels available when `estimator` is a classifier.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
ndarray of shape (n_classes,)
|
||
|
"""
|
||
|
return self.estimator_.classes_
|
||
|
|
||
|
@_fit_context(
|
||
|
# RFE.estimator is not validated yet
|
||
|
prefer_skip_nested_validation=False
|
||
|
)
|
||
|
def fit(self, X, y, **fit_params):
|
||
|
"""Fit the RFE model and then the underlying estimator on the selected features.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||
|
The training input samples.
|
||
|
|
||
|
y : array-like of shape (n_samples,)
|
||
|
The target values.
|
||
|
|
||
|
**fit_params : dict
|
||
|
Additional parameters passed to the `fit` method of the underlying
|
||
|
estimator.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
self : object
|
||
|
Fitted estimator.
|
||
|
"""
|
||
|
_raise_for_unsupported_routing(self, "fit", **fit_params)
|
||
|
return self._fit(X, y, **fit_params)
|
||
|
|
||
|
def _fit(self, X, y, step_score=None, **fit_params):
|
||
|
# Parameter step_score controls the calculation of self.step_scores_
|
||
|
# step_score is not exposed to users and is used when implementing RFECV
|
||
|
# self.step_scores_ will not be calculated when calling _fit through fit
|
||
|
|
||
|
X, y = self._validate_data(
|
||
|
X,
|
||
|
y,
|
||
|
accept_sparse="csc",
|
||
|
ensure_min_features=2,
|
||
|
force_all_finite=False,
|
||
|
multi_output=True,
|
||
|
)
|
||
|
|
||
|
# Initialization
|
||
|
n_features = X.shape[1]
|
||
|
if self.n_features_to_select is None:
|
||
|
n_features_to_select = n_features // 2
|
||
|
elif isinstance(self.n_features_to_select, Integral): # int
|
||
|
n_features_to_select = self.n_features_to_select
|
||
|
if n_features_to_select > n_features:
|
||
|
warnings.warn(
|
||
|
(
|
||
|
f"Found {n_features_to_select=} > {n_features=}. There will be"
|
||
|
" no feature selection and all features will be kept."
|
||
|
),
|
||
|
UserWarning,
|
||
|
)
|
||
|
else: # float
|
||
|
n_features_to_select = int(n_features * self.n_features_to_select)
|
||
|
|
||
|
if 0.0 < self.step < 1.0:
|
||
|
step = int(max(1, self.step * n_features))
|
||
|
else:
|
||
|
step = int(self.step)
|
||
|
|
||
|
support_ = np.ones(n_features, dtype=bool)
|
||
|
ranking_ = np.ones(n_features, dtype=int)
|
||
|
|
||
|
if step_score:
|
||
|
self.step_n_features_ = []
|
||
|
self.step_scores_ = []
|
||
|
|
||
|
# Elimination
|
||
|
while np.sum(support_) > n_features_to_select:
|
||
|
# Remaining features
|
||
|
features = np.arange(n_features)[support_]
|
||
|
|
||
|
# Rank the remaining features
|
||
|
estimator = clone(self.estimator)
|
||
|
if self.verbose > 0:
|
||
|
print("Fitting estimator with %d features." % np.sum(support_))
|
||
|
|
||
|
estimator.fit(X[:, features], y, **fit_params)
|
||
|
|
||
|
# Get importance and rank them
|
||
|
importances = _get_feature_importances(
|
||
|
estimator,
|
||
|
self.importance_getter,
|
||
|
transform_func="square",
|
||
|
)
|
||
|
ranks = np.argsort(importances)
|
||
|
|
||
|
# for sparse case ranks is matrix
|
||
|
ranks = np.ravel(ranks)
|
||
|
|
||
|
# Eliminate the worse features
|
||
|
threshold = min(step, np.sum(support_) - n_features_to_select)
|
||
|
|
||
|
# Compute step score on the previous selection iteration
|
||
|
# because 'estimator' must use features
|
||
|
# that have not been eliminated yet
|
||
|
if step_score:
|
||
|
self.step_n_features_.append(len(features))
|
||
|
self.step_scores_.append(step_score(estimator, features))
|
||
|
support_[features[ranks][:threshold]] = False
|
||
|
ranking_[np.logical_not(support_)] += 1
|
||
|
|
||
|
# Set final attributes
|
||
|
features = np.arange(n_features)[support_]
|
||
|
self.estimator_ = clone(self.estimator)
|
||
|
self.estimator_.fit(X[:, features], y, **fit_params)
|
||
|
|
||
|
# Compute step score when only n_features_to_select features left
|
||
|
if step_score:
|
||
|
self.step_n_features_.append(len(features))
|
||
|
self.step_scores_.append(step_score(self.estimator_, features))
|
||
|
self.n_features_ = support_.sum()
|
||
|
self.support_ = support_
|
||
|
self.ranking_ = ranking_
|
||
|
|
||
|
return self
|
||
|
|
||
|
@available_if(_estimator_has("predict"))
|
||
|
def predict(self, X):
|
||
|
"""Reduce X to the selected features and predict using the estimator.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : array of shape [n_samples, n_features]
|
||
|
The input samples.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
y : array of shape [n_samples]
|
||
|
The predicted target values.
|
||
|
"""
|
||
|
check_is_fitted(self)
|
||
|
return self.estimator_.predict(self.transform(X))
|
||
|
|
||
|
@available_if(_estimator_has("score"))
|
||
|
def score(self, X, y, **fit_params):
|
||
|
"""Reduce X to the selected features and return the score of the estimator.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : array of shape [n_samples, n_features]
|
||
|
The input samples.
|
||
|
|
||
|
y : array of shape [n_samples]
|
||
|
The target values.
|
||
|
|
||
|
**fit_params : dict
|
||
|
Parameters to pass to the `score` method of the underlying
|
||
|
estimator.
|
||
|
|
||
|
.. versionadded:: 1.0
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
score : float
|
||
|
Score of the underlying base estimator computed with the selected
|
||
|
features returned by `rfe.transform(X)` and `y`.
|
||
|
"""
|
||
|
check_is_fitted(self)
|
||
|
return self.estimator_.score(self.transform(X), y, **fit_params)
|
||
|
|
||
|
def _get_support_mask(self):
|
||
|
check_is_fitted(self)
|
||
|
return self.support_
|
||
|
|
||
|
@available_if(_estimator_has("decision_function"))
|
||
|
def decision_function(self, X):
|
||
|
"""Compute the decision function of ``X``.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : {array-like or sparse matrix} of shape (n_samples, n_features)
|
||
|
The input samples. Internally, it will be converted to
|
||
|
``dtype=np.float32`` and if a sparse matrix is provided
|
||
|
to a sparse ``csr_matrix``.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
score : array, shape = [n_samples, n_classes] or [n_samples]
|
||
|
The decision function of the input samples. The order of the
|
||
|
classes corresponds to that in the attribute :term:`classes_`.
|
||
|
Regression and binary classification produce an array of shape
|
||
|
[n_samples].
|
||
|
"""
|
||
|
check_is_fitted(self)
|
||
|
return self.estimator_.decision_function(self.transform(X))
|
||
|
|
||
|
@available_if(_estimator_has("predict_proba"))
|
||
|
def predict_proba(self, X):
|
||
|
"""Predict class probabilities for X.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : {array-like or sparse matrix} of shape (n_samples, n_features)
|
||
|
The input samples. Internally, it will be converted to
|
||
|
``dtype=np.float32`` and if a sparse matrix is provided
|
||
|
to a sparse ``csr_matrix``.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
p : array of shape (n_samples, n_classes)
|
||
|
The class probabilities of the input samples. The order of the
|
||
|
classes corresponds to that in the attribute :term:`classes_`.
|
||
|
"""
|
||
|
check_is_fitted(self)
|
||
|
return self.estimator_.predict_proba(self.transform(X))
|
||
|
|
||
|
@available_if(_estimator_has("predict_log_proba"))
|
||
|
def predict_log_proba(self, X):
|
||
|
"""Predict class log-probabilities for X.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : array of shape [n_samples, n_features]
|
||
|
The input samples.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
p : array of shape (n_samples, n_classes)
|
||
|
The class log-probabilities of the input samples. The order of the
|
||
|
classes corresponds to that in the attribute :term:`classes_`.
|
||
|
"""
|
||
|
check_is_fitted(self)
|
||
|
return self.estimator_.predict_log_proba(self.transform(X))
|
||
|
|
||
|
def _more_tags(self):
|
||
|
tags = {
|
||
|
"poor_score": True,
|
||
|
"requires_y": True,
|
||
|
"allow_nan": True,
|
||
|
}
|
||
|
|
||
|
# Adjust allow_nan if estimator explicitly defines `allow_nan`.
|
||
|
if hasattr(self.estimator, "_get_tags"):
|
||
|
tags["allow_nan"] = self.estimator._get_tags()["allow_nan"]
|
||
|
|
||
|
return tags
|
||
|
|
||
|
|
||
|
class RFECV(RFE):
|
||
|
"""Recursive feature elimination with cross-validation to select features.
|
||
|
|
||
|
The number of features selected is tuned automatically by fitting an :class:`RFE`
|
||
|
selector on the different cross-validation splits (provided by the `cv` parameter).
|
||
|
The performance of the :class:`RFE` selector are evaluated using `scorer` for
|
||
|
different number of selected features and aggregated together. Finally, the scores
|
||
|
are averaged across folds and the number of features selected is set to the number
|
||
|
of features that maximize the cross-validation score.
|
||
|
See glossary entry for :term:`cross-validation estimator`.
|
||
|
|
||
|
Read more in the :ref:`User Guide <rfe>`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
estimator : ``Estimator`` instance
|
||
|
A supervised learning estimator with a ``fit`` method that provides
|
||
|
information about feature importance either through a ``coef_``
|
||
|
attribute or through a ``feature_importances_`` attribute.
|
||
|
|
||
|
step : int or float, default=1
|
||
|
If greater than or equal to 1, then ``step`` corresponds to the
|
||
|
(integer) number of features to remove at each iteration.
|
||
|
If within (0.0, 1.0), then ``step`` corresponds to the percentage
|
||
|
(rounded down) of features to remove at each iteration.
|
||
|
Note that the last iteration may remove fewer than ``step`` features in
|
||
|
order to reach ``min_features_to_select``.
|
||
|
|
||
|
min_features_to_select : int, default=1
|
||
|
The minimum number of features to be selected. This number of features
|
||
|
will always be scored, even if the difference between the original
|
||
|
feature count and ``min_features_to_select`` isn't divisible by
|
||
|
``step``.
|
||
|
|
||
|
.. versionadded:: 0.20
|
||
|
|
||
|
cv : int, cross-validation generator or an iterable, default=None
|
||
|
Determines the cross-validation splitting strategy.
|
||
|
Possible inputs for cv are:
|
||
|
|
||
|
- None, to use the default 5-fold cross-validation,
|
||
|
- integer, to specify the number of folds.
|
||
|
- :term:`CV splitter`,
|
||
|
- An iterable yielding (train, test) splits as arrays of indices.
|
||
|
|
||
|
For integer/None inputs, if ``y`` is binary or multiclass,
|
||
|
:class:`~sklearn.model_selection.StratifiedKFold` is used. If the
|
||
|
estimator is a classifier or if ``y`` is neither binary nor multiclass,
|
||
|
:class:`~sklearn.model_selection.KFold` is used.
|
||
|
|
||
|
Refer :ref:`User Guide <cross_validation>` for the various
|
||
|
cross-validation strategies that can be used here.
|
||
|
|
||
|
.. versionchanged:: 0.22
|
||
|
``cv`` default value of None changed from 3-fold to 5-fold.
|
||
|
|
||
|
scoring : str, callable or None, default=None
|
||
|
A string (see model evaluation documentation) or
|
||
|
a scorer callable object / function with signature
|
||
|
``scorer(estimator, X, y)``.
|
||
|
|
||
|
verbose : int, default=0
|
||
|
Controls verbosity of output.
|
||
|
|
||
|
n_jobs : int or None, default=None
|
||
|
Number of cores to run in parallel while fitting across folds.
|
||
|
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
||
|
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
|
||
|
for more details.
|
||
|
|
||
|
.. versionadded:: 0.18
|
||
|
|
||
|
importance_getter : str or callable, default='auto'
|
||
|
If 'auto', uses the feature importance either through a `coef_`
|
||
|
or `feature_importances_` attributes of estimator.
|
||
|
|
||
|
Also accepts a string that specifies an attribute name/path
|
||
|
for extracting feature importance.
|
||
|
For example, give `regressor_.coef_` in case of
|
||
|
:class:`~sklearn.compose.TransformedTargetRegressor` or
|
||
|
`named_steps.clf.feature_importances_` in case of
|
||
|
:class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.
|
||
|
|
||
|
If `callable`, overrides the default feature importance getter.
|
||
|
The callable is passed with the fitted estimator and it should
|
||
|
return importance for each feature.
|
||
|
|
||
|
.. versionadded:: 0.24
|
||
|
|
||
|
Attributes
|
||
|
----------
|
||
|
classes_ : ndarray of shape (n_classes,)
|
||
|
The classes labels. Only available when `estimator` is a classifier.
|
||
|
|
||
|
estimator_ : ``Estimator`` instance
|
||
|
The fitted estimator used to select features.
|
||
|
|
||
|
cv_results_ : dict of ndarrays
|
||
|
All arrays (values of the dictionary) are sorted in ascending order
|
||
|
by the number of features used (i.e., the first element of the array
|
||
|
represents the models that used the least number of features, while the
|
||
|
last element represents the models that used all available features).
|
||
|
This dictionary contains the following keys:
|
||
|
|
||
|
split(k)_test_score : ndarray of shape (n_subsets_of_features,)
|
||
|
The cross-validation scores across (k)th fold.
|
||
|
|
||
|
mean_test_score : ndarray of shape (n_subsets_of_features,)
|
||
|
Mean of scores over the folds.
|
||
|
|
||
|
std_test_score : ndarray of shape (n_subsets_of_features,)
|
||
|
Standard deviation of scores over the folds.
|
||
|
|
||
|
n_features : ndarray of shape (n_subsets_of_features,)
|
||
|
Number of features used at each step.
|
||
|
|
||
|
.. versionadded:: 1.0
|
||
|
|
||
|
n_features_ : int
|
||
|
The number of selected features with cross-validation.
|
||
|
|
||
|
n_features_in_ : int
|
||
|
Number of features seen during :term:`fit`. Only defined if the
|
||
|
underlying estimator exposes such an attribute when fit.
|
||
|
|
||
|
.. versionadded:: 0.24
|
||
|
|
||
|
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
||
|
Names of features seen during :term:`fit`. Defined only when `X`
|
||
|
has feature names that are all strings.
|
||
|
|
||
|
.. versionadded:: 1.0
|
||
|
|
||
|
ranking_ : narray of shape (n_features,)
|
||
|
The feature ranking, such that `ranking_[i]`
|
||
|
corresponds to the ranking
|
||
|
position of the i-th feature.
|
||
|
Selected (i.e., estimated best)
|
||
|
features are assigned rank 1.
|
||
|
|
||
|
support_ : ndarray of shape (n_features,)
|
||
|
The mask of selected features.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
RFE : Recursive feature elimination.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
The size of all values in ``cv_results_`` is equal to
|
||
|
``ceil((n_features - min_features_to_select) / step) + 1``,
|
||
|
where step is the number of features removed at each iteration.
|
||
|
|
||
|
Allows NaN/Inf in the input if the underlying estimator does as well.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
|
||
|
.. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., "Gene selection
|
||
|
for cancer classification using support vector machines",
|
||
|
Mach. Learn., 46(1-3), 389--422, 2002.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
The following example shows how to retrieve the a-priori not known 5
|
||
|
informative features in the Friedman #1 dataset.
|
||
|
|
||
|
>>> from sklearn.datasets import make_friedman1
|
||
|
>>> from sklearn.feature_selection import RFECV
|
||
|
>>> from sklearn.svm import SVR
|
||
|
>>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
|
||
|
>>> estimator = SVR(kernel="linear")
|
||
|
>>> selector = RFECV(estimator, step=1, cv=5)
|
||
|
>>> selector = selector.fit(X, y)
|
||
|
>>> selector.support_
|
||
|
array([ True, True, True, True, True, False, False, False, False,
|
||
|
False])
|
||
|
>>> selector.ranking_
|
||
|
array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])
|
||
|
"""
|
||
|
|
||
|
_parameter_constraints: dict = {
|
||
|
**RFE._parameter_constraints,
|
||
|
"min_features_to_select": [Interval(Integral, 0, None, closed="neither")],
|
||
|
"cv": ["cv_object"],
|
||
|
"scoring": [None, str, callable],
|
||
|
"n_jobs": [None, Integral],
|
||
|
}
|
||
|
_parameter_constraints.pop("n_features_to_select")
|
||
|
|
||
|
def __init__(
|
||
|
self,
|
||
|
estimator,
|
||
|
*,
|
||
|
step=1,
|
||
|
min_features_to_select=1,
|
||
|
cv=None,
|
||
|
scoring=None,
|
||
|
verbose=0,
|
||
|
n_jobs=None,
|
||
|
importance_getter="auto",
|
||
|
):
|
||
|
self.estimator = estimator
|
||
|
self.step = step
|
||
|
self.importance_getter = importance_getter
|
||
|
self.cv = cv
|
||
|
self.scoring = scoring
|
||
|
self.verbose = verbose
|
||
|
self.n_jobs = n_jobs
|
||
|
self.min_features_to_select = min_features_to_select
|
||
|
|
||
|
@_fit_context(
|
||
|
# RFECV.estimator is not validated yet
|
||
|
prefer_skip_nested_validation=False
|
||
|
)
|
||
|
def fit(self, X, y, groups=None):
|
||
|
"""Fit the RFE model and automatically tune the number of selected features.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||
|
Training vector, where `n_samples` is the number of samples and
|
||
|
`n_features` is the total number of features.
|
||
|
|
||
|
y : array-like of shape (n_samples,)
|
||
|
Target values (integers for classification, real numbers for
|
||
|
regression).
|
||
|
|
||
|
groups : array-like of shape (n_samples,) or None, default=None
|
||
|
Group labels for the samples used while splitting the dataset into
|
||
|
train/test set. Only used in conjunction with a "Group" :term:`cv`
|
||
|
instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).
|
||
|
|
||
|
.. versionadded:: 0.20
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
self : object
|
||
|
Fitted estimator.
|
||
|
"""
|
||
|
_raise_for_unsupported_routing(self, "fit", groups=groups)
|
||
|
X, y = self._validate_data(
|
||
|
X,
|
||
|
y,
|
||
|
accept_sparse="csr",
|
||
|
ensure_min_features=2,
|
||
|
force_all_finite=False,
|
||
|
multi_output=True,
|
||
|
)
|
||
|
|
||
|
# Initialization
|
||
|
cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator))
|
||
|
scorer = check_scoring(self.estimator, scoring=self.scoring)
|
||
|
|
||
|
# Build an RFE object, which will evaluate and score each possible
|
||
|
# feature count, down to self.min_features_to_select
|
||
|
n_features = X.shape[1]
|
||
|
if self.min_features_to_select > n_features:
|
||
|
warnings.warn(
|
||
|
(
|
||
|
f"Found min_features_to_select={self.min_features_to_select} > "
|
||
|
f"{n_features=}. There will be no feature selection and all "
|
||
|
"features will be kept."
|
||
|
),
|
||
|
UserWarning,
|
||
|
)
|
||
|
rfe = RFE(
|
||
|
estimator=self.estimator,
|
||
|
n_features_to_select=min(self.min_features_to_select, n_features),
|
||
|
importance_getter=self.importance_getter,
|
||
|
step=self.step,
|
||
|
verbose=self.verbose,
|
||
|
)
|
||
|
|
||
|
# Determine the number of subsets of features by fitting across
|
||
|
# the train folds and choosing the "features_to_select" parameter
|
||
|
# that gives the least averaged error across all folds.
|
||
|
|
||
|
# Note that joblib raises a non-picklable error for bound methods
|
||
|
# even if n_jobs is set to 1 with the default multiprocessing
|
||
|
# backend.
|
||
|
# This branching is done so that to
|
||
|
# make sure that user code that sets n_jobs to 1
|
||
|
# and provides bound methods as scorers is not broken with the
|
||
|
# addition of n_jobs parameter in version 0.18.
|
||
|
|
||
|
if effective_n_jobs(self.n_jobs) == 1:
|
||
|
parallel, func = list, _rfe_single_fit
|
||
|
else:
|
||
|
parallel = Parallel(n_jobs=self.n_jobs)
|
||
|
func = delayed(_rfe_single_fit)
|
||
|
|
||
|
scores_features = parallel(
|
||
|
func(rfe, self.estimator, X, y, train, test, scorer)
|
||
|
for train, test in cv.split(X, y, groups)
|
||
|
)
|
||
|
scores, step_n_features = zip(*scores_features)
|
||
|
|
||
|
step_n_features_rev = np.array(step_n_features[0])[::-1]
|
||
|
scores = np.array(scores)
|
||
|
|
||
|
# Reverse order such that lowest number of features is selected in case of tie.
|
||
|
scores_sum_rev = np.sum(scores, axis=0)[::-1]
|
||
|
n_features_to_select = step_n_features_rev[np.argmax(scores_sum_rev)]
|
||
|
|
||
|
# Re-execute an elimination with best_k over the whole set
|
||
|
rfe = RFE(
|
||
|
estimator=self.estimator,
|
||
|
n_features_to_select=n_features_to_select,
|
||
|
step=self.step,
|
||
|
importance_getter=self.importance_getter,
|
||
|
verbose=self.verbose,
|
||
|
)
|
||
|
|
||
|
rfe.fit(X, y)
|
||
|
|
||
|
# Set final attributes
|
||
|
self.support_ = rfe.support_
|
||
|
self.n_features_ = rfe.n_features_
|
||
|
self.ranking_ = rfe.ranking_
|
||
|
self.estimator_ = clone(self.estimator)
|
||
|
self.estimator_.fit(self._transform(X), y)
|
||
|
|
||
|
# reverse to stay consistent with before
|
||
|
scores_rev = scores[:, ::-1]
|
||
|
self.cv_results_ = {
|
||
|
"mean_test_score": np.mean(scores_rev, axis=0),
|
||
|
"std_test_score": np.std(scores_rev, axis=0),
|
||
|
**{f"split{i}_test_score": scores_rev[i] for i in range(scores.shape[0])},
|
||
|
"n_features": step_n_features_rev,
|
||
|
}
|
||
|
return self
|