Intelegentny_Pszczelarz/.venv/Lib/site-packages/sklearn/feature_selection/_sequential.py

"""
Sequential feature selection
"""
from numbers import Integral, Real

import numpy as np

import warnings

from ._base import SelectorMixin
from ..base import BaseEstimator, MetaEstimatorMixin, clone
from ..utils._param_validation import HasMethods, Hidden, Interval, StrOptions
from ..utils._tags import _safe_tags
from ..utils.validation import check_is_fitted
from ..model_selection import cross_val_score
from ..metrics import get_scorer_names


class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin, BaseEstimator):
    """Transformer that performs Sequential Feature Selection.

    This Sequential Feature Selector adds (forward selection) or
    removes (backward selection) features to form a feature subset in a
    greedy fashion. At each stage, this estimator chooses the best feature to
    add or remove based on the cross-validation score of an estimator. In
    the case of unsupervised learning, this Sequential Feature Selector
    looks only at the features (X), not the desired outputs (y).

    Read more in the :ref:`User Guide <sequential_feature_selection>`.

    .. versionadded:: 0.24

    Parameters
    ----------
    estimator : estimator instance
        An unfitted estimator.

    n_features_to_select : "auto", int or float, default='warn'
        If `"auto"`, the behaviour depends on the `tol` parameter:

        - if `tol` is not `None`, then features are selected until the score
          improvement does not exceed `tol`.
        - otherwise, half of the features are selected.

        If integer, the parameter is the absolute number of features to select.
        If float between 0 and 1, it is the fraction of features to select.

        .. versionadded:: 1.1
           The option `"auto"` was added in version 1.1.

        .. deprecated:: 1.1
           The default changed from `None` to `"warn"` in 1.1 and will become
           `"auto"` in 1.3. `None` and `'warn'` will be removed in 1.3.
           To keep the same behaviour as `None`, set
           `n_features_to_select="auto" and `tol=None`.

    tol : float, default=None
        If the score is not incremented by at least `tol` between two
        consecutive feature additions or removals, stop adding or removing.

        `tol` can be negative when removing features using `direction="backward"`.
        It can be useful to reduce the number of features at the cost of a small
        decrease in the score.

        `tol` is enabled only when `n_features_to_select` is `"auto"`.

        .. versionadded:: 1.1

    direction : {'forward', 'backward'}, default='forward'
        Whether to perform forward selection or backward selection.

    scoring : str or callable, default=None
        A single str (see :ref:`scoring_parameter`) or a callable
        (see :ref:`scoring`) to evaluate the predictions on the test set.

        NOTE that when using a custom scorer, it should return a single
        value.

        If None, the estimator's score method is used.

    cv : int, cross-validation generator or an iterable, default=None
        Determines the cross-validation splitting strategy.
        Possible inputs for cv are:

        - None, to use the default 5-fold cross validation,
        - integer, to specify the number of folds in a `(Stratified)KFold`,
        - :term:`CV splitter`,
        - An iterable yielding (train, test) splits as arrays of indices.

        For integer/None inputs, if the estimator is a classifier and ``y`` is
        either binary or multiclass, :class:`StratifiedKFold` is used. In all
        other cases, :class:`KFold` is used. These splitters are instantiated
        with `shuffle=False` so the splits will be the same across calls.

        Refer :ref:`User Guide <cross_validation>` for the various
        cross-validation strategies that can be used here.

    n_jobs : int, default=None
        Number of jobs to run in parallel. When evaluating a new feature to
        add or remove, the cross-validation procedure is parallel over the
        folds.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.

    Attributes
    ----------
    n_features_in_ : int
        Number of features seen during :term:`fit`. Only defined if the
        underlying estimator exposes such an attribute when fit.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    n_features_to_select_ : int
        The number of features that were selected.

    support_ : ndarray of shape (n_features,), dtype=bool
        The mask of selected features.

    See Also
    --------
    GenericUnivariateSelect : Univariate feature selector with configurable
        strategy.
    RFE : Recursive feature elimination based on importance weights.
    RFECV : Recursive feature elimination based on importance weights, with
        automatic selection of the number of features.
    SelectFromModel : Feature selection based on thresholds of importance
        weights.

    Examples
    --------
    >>> from sklearn.feature_selection import SequentialFeatureSelector
    >>> from sklearn.neighbors import KNeighborsClassifier
    >>> from sklearn.datasets import load_iris
    >>> X, y = load_iris(return_X_y=True)
    >>> knn = KNeighborsClassifier(n_neighbors=3)
    >>> sfs = SequentialFeatureSelector(knn, n_features_to_select=3)
    >>> sfs.fit(X, y)
    SequentialFeatureSelector(estimator=KNeighborsClassifier(n_neighbors=3),
                              n_features_to_select=3)
    >>> sfs.get_support()
    array([ True, False,  True,  True])
    >>> sfs.transform(X).shape
    (150, 3)
    """

    _parameter_constraints: dict = {
        "estimator": [HasMethods(["fit"])],
        "n_features_to_select": [
            StrOptions({"auto", "warn"}, deprecated={"warn"}),
            Interval(Real, 0, 1, closed="right"),
            Interval(Integral, 0, None, closed="neither"),
            Hidden(None),
        ],
        "tol": [None, Interval(Real, None, None, closed="neither")],
        "direction": [StrOptions({"forward", "backward"})],
        "scoring": [None, StrOptions(set(get_scorer_names())), callable],
        "cv": ["cv_object"],
        "n_jobs": [None, Integral],
    }

    def __init__(
        self,
        estimator,
        *,
        n_features_to_select="warn",
        tol=None,
        direction="forward",
        scoring=None,
        cv=5,
        n_jobs=None,
    ):

        self.estimator = estimator
        self.n_features_to_select = n_features_to_select
        self.tol = tol
        self.direction = direction
        self.scoring = scoring
        self.cv = cv
        self.n_jobs = n_jobs

    def fit(self, X, y=None):
        """Learn the features to select from X.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training vectors, where `n_samples` is the number of samples and
            `n_features` is the number of predictors.

        y : array-like of shape (n_samples,), default=None
            Target values. This parameter may be ignored for
            unsupervised learning.

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        self._validate_params()

        # FIXME: to be removed in 1.3
        if self.n_features_to_select in ("warn", None):
            # for backwards compatibility
            warnings.warn(
                "Leaving `n_features_to_select` to "
                "None is deprecated in 1.0 and will become 'auto' "
                "in 1.3. To keep the same behaviour as with None "
                "(i.e. select half of the features) and avoid "
                "this warning, you should manually set "
                "`n_features_to_select='auto'` and set tol=None "
                "when creating an instance.",
                FutureWarning,
            )

        tags = self._get_tags()
        X = self._validate_data(
            X,
            accept_sparse="csc",
            ensure_min_features=2,
            force_all_finite=not tags.get("allow_nan", True),
        )
        n_features = X.shape[1]

        # FIXME: to be fixed in 1.3
        error_msg = (
            "n_features_to_select must be either 'auto', 'warn', "
            "None, an integer in [1, n_features - 1] "
            "representing the absolute "
            "number of features, or a float in (0, 1] "
            "representing a percentage of features to "
            f"select. Got {self.n_features_to_select}"
        )
        if self.n_features_to_select in ("warn", None):
            if self.tol is not None:
                raise ValueError("tol is only enabled if `n_features_to_select='auto'`")
            self.n_features_to_select_ = n_features // 2
        elif self.n_features_to_select == "auto":
            if self.tol is not None:
                # With auto feature selection, `n_features_to_select_` will be updated
                # to `support_.sum()` after features are selected.
                self.n_features_to_select_ = n_features - 1
            else:
                self.n_features_to_select_ = n_features // 2
        elif isinstance(self.n_features_to_select, Integral):
            if not 0 < self.n_features_to_select < n_features:
                raise ValueError(error_msg)
            self.n_features_to_select_ = self.n_features_to_select
        elif isinstance(self.n_features_to_select, Real):
            self.n_features_to_select_ = int(n_features * self.n_features_to_select)

        if self.tol is not None and self.tol < 0 and self.direction == "forward":
            raise ValueError("tol must be positive when doing forward selection")

        cloned_estimator = clone(self.estimator)

        # the current mask corresponds to the set of features:
        # - that we have already *selected* if we do forward selection
        # - that we have already *excluded* if we do backward selection
        current_mask = np.zeros(shape=n_features, dtype=bool)
        n_iterations = (
            self.n_features_to_select_
            if self.n_features_to_select == "auto" or self.direction == "forward"
            else n_features - self.n_features_to_select_
        )

        old_score = -np.inf
        is_auto_select = self.tol is not None and self.n_features_to_select == "auto"
        for _ in range(n_iterations):
            new_feature_idx, new_score = self._get_best_new_feature_score(
                cloned_estimator, X, y, current_mask
            )
            if is_auto_select and ((new_score - old_score) < self.tol):
                break

            old_score = new_score
            current_mask[new_feature_idx] = True

        if self.direction == "backward":
            current_mask = ~current_mask

        self.support_ = current_mask
        self.n_features_to_select_ = self.support_.sum()

        return self

    def _get_best_new_feature_score(self, estimator, X, y, current_mask):
        # Return the best new feature and its score to add to the current_mask,
        # i.e. return the best new feature and its score to add (resp. remove)
        # when doing forward selection (resp. backward selection).
        # Feature will be added if the current score and past score are greater
        # than tol when n_feature is auto,
        candidate_feature_indices = np.flatnonzero(~current_mask)
        scores = {}
        for feature_idx in candidate_feature_indices:
            candidate_mask = current_mask.copy()
            candidate_mask[feature_idx] = True
            if self.direction == "backward":
                candidate_mask = ~candidate_mask
            X_new = X[:, candidate_mask]
            scores[feature_idx] = cross_val_score(
                estimator,
                X_new,
                y,
                cv=self.cv,
                scoring=self.scoring,
                n_jobs=self.n_jobs,
            ).mean()
        new_feature_idx = max(scores, key=lambda feature_idx: scores[feature_idx])
        return new_feature_idx, scores[new_feature_idx]

    def _get_support_mask(self):
        check_is_fitted(self)
        return self.support_

    def _more_tags(self):
        return {
            "allow_nan": _safe_tags(self.estimator, key="allow_nan"),
        }
feature: "ANN commit 2" 2023-06-19 00:49:18 +02:00			`"""`
			`Sequential feature selection`
			`"""`
			`from numbers import Integral, Real`

			`import numpy as np`

			`import warnings`

			`from ._base import SelectorMixin`
			`from ..base import BaseEstimator, MetaEstimatorMixin, clone`
			`from ..utils._param_validation import HasMethods, Hidden, Interval, StrOptions`
			`from ..utils._tags import _safe_tags`
			`from ..utils.validation import check_is_fitted`
			`from ..model_selection import cross_val_score`
			`from ..metrics import get_scorer_names`


			`class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin, BaseEstimator):`
			`"""Transformer that performs Sequential Feature Selection.`

			`This Sequential Feature Selector adds (forward selection) or`
			`removes (backward selection) features to form a feature subset in a`
			`greedy fashion. At each stage, this estimator chooses the best feature to`
			`add or remove based on the cross-validation score of an estimator. In`
			`the case of unsupervised learning, this Sequential Feature Selector`
			`looks only at the features (X), not the desired outputs (y).`

			Read more in the :ref:`User Guide <sequential_feature_selection>`.

			`.. versionadded:: 0.24`

			`Parameters`
			`----------`
			`estimator : estimator instance`
			`An unfitted estimator.`

			`n_features_to_select : "auto", int or float, default='warn'`
			If `"auto"`, the behaviour depends on the `tol` parameter:

			- if `tol` is not `None`, then features are selected until the score
			improvement does not exceed `tol`.
			`- otherwise, half of the features are selected.`

			`If integer, the parameter is the absolute number of features to select.`
			`If float between 0 and 1, it is the fraction of features to select.`

			`.. versionadded:: 1.1`
			The option `"auto"` was added in version 1.1.

			`.. deprecated:: 1.1`
			The default changed from `None` to `"warn"` in 1.1 and will become
			`"auto"` in 1.3. `None` and `'warn'` will be removed in 1.3.
			To keep the same behaviour as `None`, set
			`n_features_to_select="auto" and `tol=None`.

			`tol : float, default=None`
			If the score is not incremented by at least `tol` between two
			`consecutive feature additions or removals, stop adding or removing.`

			`tol` can be negative when removing features using `direction="backward"`.
			`It can be useful to reduce the number of features at the cost of a small`
			`decrease in the score.`

			`tol` is enabled only when `n_features_to_select` is `"auto"`.

			`.. versionadded:: 1.1`

			`direction : {'forward', 'backward'}, default='forward'`
			`Whether to perform forward selection or backward selection.`

			`scoring : str or callable, default=None`
			A single str (see :ref:`scoring_parameter`) or a callable
			(see :ref:`scoring`) to evaluate the predictions on the test set.

			`NOTE that when using a custom scorer, it should return a single`
			`value.`

			`If None, the estimator's score method is used.`

			`cv : int, cross-validation generator or an iterable, default=None`
			`Determines the cross-validation splitting strategy.`
			`Possible inputs for cv are:`

			`- None, to use the default 5-fold cross validation,`
			- integer, to specify the number of folds in a `(Stratified)KFold`,
			- :term:`CV splitter`,
			`- An iterable yielding (train, test) splits as arrays of indices.`

			For integer/None inputs, if the estimator is a classifier and ``y`` is
			either binary or multiclass, :class:`StratifiedKFold` is used. In all
			other cases, :class:`KFold` is used. These splitters are instantiated
			with `shuffle=False` so the splits will be the same across calls.

			Refer :ref:`User Guide <cross_validation>` for the various
			`cross-validation strategies that can be used here.`

			`n_jobs : int, default=None`
			`Number of jobs to run in parallel. When evaluating a new feature to`
			`add or remove, the cross-validation procedure is parallel over the`
			`folds.`
			``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
			``-1`` means using all processors. See :term:`Glossary <n_jobs>`
			`for more details.`

			`Attributes`
			`----------`
			`n_features_in_ : int`
			Number of features seen during :term:`fit`. Only defined if the
			`underlying estimator exposes such an attribute when fit.`

			`.. versionadded:: 0.24`

			feature_names_in_ : ndarray of shape (`n_features_in_`,)
			Names of features seen during :term:`fit`. Defined only when `X`
			`has feature names that are all strings.`

			`.. versionadded:: 1.0`

			`n_features_to_select_ : int`
			`The number of features that were selected.`

			`support_ : ndarray of shape (n_features,), dtype=bool`
			`The mask of selected features.`

			`See Also`
			`--------`
			`GenericUnivariateSelect : Univariate feature selector with configurable`
			`strategy.`
			`RFE : Recursive feature elimination based on importance weights.`
			`RFECV : Recursive feature elimination based on importance weights, with`
			`automatic selection of the number of features.`
			`SelectFromModel : Feature selection based on thresholds of importance`
			`weights.`

			`Examples`
			`--------`
			`>>> from sklearn.feature_selection import SequentialFeatureSelector`
			`>>> from sklearn.neighbors import KNeighborsClassifier`
			`>>> from sklearn.datasets import load_iris`
			`>>> X, y = load_iris(return_X_y=True)`
			`>>> knn = KNeighborsClassifier(n_neighbors=3)`
			`>>> sfs = SequentialFeatureSelector(knn, n_features_to_select=3)`
			`>>> sfs.fit(X, y)`
			`SequentialFeatureSelector(estimator=KNeighborsClassifier(n_neighbors=3),`
			`n_features_to_select=3)`
			`>>> sfs.get_support()`
			`array([ True, False, True, True])`
			`>>> sfs.transform(X).shape`
			`(150, 3)`
			`"""`

			`_parameter_constraints: dict = {`
			`"estimator": [HasMethods(["fit"])],`
			`"n_features_to_select": [`
			`StrOptions({"auto", "warn"}, deprecated={"warn"}),`
			`Interval(Real, 0, 1, closed="right"),`
			`Interval(Integral, 0, None, closed="neither"),`
			`Hidden(None),`
			`],`
			`"tol": [None, Interval(Real, None, None, closed="neither")],`
			`"direction": [StrOptions({"forward", "backward"})],`
			`"scoring": [None, StrOptions(set(get_scorer_names())), callable],`
			`"cv": ["cv_object"],`
			`"n_jobs": [None, Integral],`
			`}`

			`def __init__(`
			`self,`
			`estimator,`
			`*,`
			`n_features_to_select="warn",`
			`tol=None,`
			`direction="forward",`
			`scoring=None,`
			`cv=5,`
			`n_jobs=None,`
			`):`

			`self.estimator = estimator`
			`self.n_features_to_select = n_features_to_select`
			`self.tol = tol`
			`self.direction = direction`
			`self.scoring = scoring`
			`self.cv = cv`
			`self.n_jobs = n_jobs`

			`def fit(self, X, y=None):`
			`"""Learn the features to select from X.`

			`Parameters`
			`----------`
			`X : array-like of shape (n_samples, n_features)`
			Training vectors, where `n_samples` is the number of samples and
			`n_features` is the number of predictors.

			`y : array-like of shape (n_samples,), default=None`
			`Target values. This parameter may be ignored for`
			`unsupervised learning.`

			`Returns`
			`-------`
			`self : object`
			`Returns the instance itself.`
			`"""`
			`self._validate_params()`

			`# FIXME: to be removed in 1.3`
			`if self.n_features_to_select in ("warn", None):`
			`# for backwards compatibility`
			`warnings.warn(`
			"Leaving `n_features_to_select` to "
			`"None is deprecated in 1.0 and will become 'auto' "`
			`"in 1.3. To keep the same behaviour as with None "`
			`"(i.e. select half of the features) and avoid "`
			`"this warning, you should manually set "`
			"`n_features_to_select='auto'` and set tol=None "
			`"when creating an instance.",`
			`FutureWarning,`
			`)`

			`tags = self._get_tags()`
			`X = self._validate_data(`
			`X,`
			`accept_sparse="csc",`
			`ensure_min_features=2,`
			`force_all_finite=not tags.get("allow_nan", True),`
			`)`
			`n_features = X.shape[1]`

			`# FIXME: to be fixed in 1.3`
			`error_msg = (`
			`"n_features_to_select must be either 'auto', 'warn', "`
			`"None, an integer in [1, n_features - 1] "`
			`"representing the absolute "`
			`"number of features, or a float in (0, 1] "`
			`"representing a percentage of features to "`
			`f"select. Got {self.n_features_to_select}"`
			`)`
			`if self.n_features_to_select in ("warn", None):`
			`if self.tol is not None:`
			raise ValueError("tol is only enabled if `n_features_to_select='auto'`")
			`self.n_features_to_select_ = n_features // 2`
			`elif self.n_features_to_select == "auto":`
			`if self.tol is not None:`
			# With auto feature selection, `n_features_to_select_` will be updated
			# to `support_.sum()` after features are selected.
			`self.n_features_to_select_ = n_features - 1`
			`else:`
			`self.n_features_to_select_ = n_features // 2`
			`elif isinstance(self.n_features_to_select, Integral):`
			`if not 0 < self.n_features_to_select < n_features:`
			`raise ValueError(error_msg)`
			`self.n_features_to_select_ = self.n_features_to_select`
			`elif isinstance(self.n_features_to_select, Real):`
			`self.n_features_to_select_ = int(n_features * self.n_features_to_select)`

			`if self.tol is not None and self.tol < 0 and self.direction == "forward":`
			`raise ValueError("tol must be positive when doing forward selection")`

			`cloned_estimator = clone(self.estimator)`

			`# the current mask corresponds to the set of features:`
			`# - that we have already selected if we do forward selection`
			`# - that we have already excluded if we do backward selection`
			`current_mask = np.zeros(shape=n_features, dtype=bool)`
			`n_iterations = (`
			`self.n_features_to_select_`
			`if self.n_features_to_select == "auto" or self.direction == "forward"`
			`else n_features - self.n_features_to_select_`
			`)`

			`old_score = -np.inf`
			`is_auto_select = self.tol is not None and self.n_features_to_select == "auto"`
			`for _ in range(n_iterations):`
			`new_feature_idx, new_score = self._get_best_new_feature_score(`
			`cloned_estimator, X, y, current_mask`
			`)`
			`if is_auto_select and ((new_score - old_score) < self.tol):`
			`break`

			`old_score = new_score`
			`current_mask[new_feature_idx] = True`

			`if self.direction == "backward":`
			`current_mask = ~current_mask`

			`self.support_ = current_mask`
			`self.n_features_to_select_ = self.support_.sum()`

			`return self`

			`def _get_best_new_feature_score(self, estimator, X, y, current_mask):`
			`# Return the best new feature and its score to add to the current_mask,`
			`# i.e. return the best new feature and its score to add (resp. remove)`
			`# when doing forward selection (resp. backward selection).`
			`# Feature will be added if the current score and past score are greater`
			`# than tol when n_feature is auto,`
			`candidate_feature_indices = np.flatnonzero(~current_mask)`
			`scores = {}`
			`for feature_idx in candidate_feature_indices:`
			`candidate_mask = current_mask.copy()`
			`candidate_mask[feature_idx] = True`
			`if self.direction == "backward":`
			`candidate_mask = ~candidate_mask`
			`X_new = X[:, candidate_mask]`
			`scores[feature_idx] = cross_val_score(`
			`estimator,`
			`X_new,`
			`y,`
			`cv=self.cv,`
			`scoring=self.scoring,`
			`n_jobs=self.n_jobs,`
			`).mean()`
			`new_feature_idx = max(scores, key=lambda feature_idx: scores[feature_idx])`
			`return new_feature_idx, scores[new_feature_idx]`

			`def _get_support_mask(self):`
			`check_is_fitted(self)`
			`return self.support_`

			`def _more_tags(self):`
			`return {`
			`"allow_nan": _safe_tags(self.estimator, key="allow_nan"),`
			`}`