projektAI/venv/Lib/site-packages/sklearn/feature_selection/_base.py

# -*- coding: utf-8 -*-
"""Generic feature selection mixin"""

# Authors: G. Varoquaux, A. Gramfort, L. Buitinck, J. Nothman
# License: BSD 3 clause

from abc import ABCMeta, abstractmethod
from warnings import warn
from operator import attrgetter

import numpy as np
from scipy.sparse import issparse, csc_matrix

from ..base import TransformerMixin
from ..utils import (
    check_array,
    safe_mask,
    safe_sqr,
)
from ..utils._tags import _safe_tags


class SelectorMixin(TransformerMixin, metaclass=ABCMeta):
    """
    Transformer mixin that performs feature selection given a support mask

    This mixin provides a feature selector implementation with `transform` and
    `inverse_transform` functionality given an implementation of
    `_get_support_mask`.
    """

    def get_support(self, indices=False):
        """
        Get a mask, or integer index, of the features selected

        Parameters
        ----------
        indices : bool, default=False
            If True, the return value will be an array of integers, rather
            than a boolean mask.

        Returns
        -------
        support : array
            An index that selects the retained features from a feature vector.
            If `indices` is False, this is a boolean array of shape
            [# input features], in which an element is True iff its
            corresponding feature is selected for retention. If `indices` is
            True, this is an integer array of shape [# output features] whose
            values are indices into the input feature vector.
        """
        mask = self._get_support_mask()
        return mask if not indices else np.where(mask)[0]

    @abstractmethod
    def _get_support_mask(self):
        """
        Get the boolean mask indicating which features are selected

        Returns
        -------
        support : boolean array of shape [# input features]
            An element is True iff its corresponding feature is selected for
            retention.
        """

    def transform(self, X):
        """Reduce X to the selected features.

        Parameters
        ----------
        X : array of shape [n_samples, n_features]
            The input samples.

        Returns
        -------
        X_r : array of shape [n_samples, n_selected_features]
            The input samples with only the selected features.
        """
        # note: we use _safe_tags instead of _get_tags because this is a
        # public Mixin.
        X = check_array(
            X,
            dtype=None,
            accept_sparse="csr",
            force_all_finite=not _safe_tags(self, key="allow_nan"),
        )
        mask = self.get_support()
        if not mask.any():
            warn("No features were selected: either the data is"
                 " too noisy or the selection test too strict.",
                 UserWarning)
            return np.empty(0).reshape((X.shape[0], 0))
        if len(mask) != X.shape[1]:
            raise ValueError("X has a different shape than during fitting.")
        return X[:, safe_mask(X, mask)]

    def inverse_transform(self, X):
        """
        Reverse the transformation operation

        Parameters
        ----------
        X : array of shape [n_samples, n_selected_features]
            The input samples.

        Returns
        -------
        X_r : array of shape [n_samples, n_original_features]
            `X` with columns of zeros inserted where features would have
            been removed by :meth:`transform`.
        """
        if issparse(X):
            X = X.tocsc()
            # insert additional entries in indptr:
            # e.g. if transform changed indptr from [0 2 6 7] to [0 2 3]
            # col_nonzeros here will be [2 0 1] so indptr becomes [0 2 2 3]
            it = self.inverse_transform(np.diff(X.indptr).reshape(1, -1))
            col_nonzeros = it.ravel()
            indptr = np.concatenate([[0], np.cumsum(col_nonzeros)])
            Xt = csc_matrix((X.data, X.indices, indptr),
                            shape=(X.shape[0], len(indptr) - 1), dtype=X.dtype)
            return Xt

        support = self.get_support()
        X = check_array(X, dtype=None)
        if support.sum() != X.shape[1]:
            raise ValueError("X has a different shape than during fitting.")

        if X.ndim == 1:
            X = X[None, :]
        Xt = np.zeros((X.shape[0], support.size), dtype=X.dtype)
        Xt[:, support] = X
        return Xt


def _get_feature_importances(estimator, getter, transform_func=None,
                             norm_order=1):
    """
    Retrieve and aggregate (ndim > 1)  the feature importances
    from an estimator. Also optionally applies transformation.

    Parameters
    ----------
    estimator : estimator
        A scikit-learn estimator from which we want to get the feature
        importances.

    getter : "auto", str or callable
        An attribute or a callable to get the feature importance. If `"auto"`,
        `estimator` is expected to expose `coef_` or `feature_importances`.

    transform_func : {"norm", "square"}, default=None
        The transform to apply to the feature importances. By default (`None`)
        no transformation is applied.

    norm_order : int, default=1
        The norm order to apply when `transform_func="norm"`. Only applied
        when `importances.ndim > 1`.

    Returns
    -------
    importances : ndarray of shape (n_features,)
        The features importances, optionally transformed.
    """
    if isinstance(getter, str):
        if getter == 'auto':
            if hasattr(estimator, 'coef_'):
                getter = attrgetter('coef_')
            elif hasattr(estimator, 'feature_importances_'):
                getter = attrgetter('feature_importances_')
            else:
                raise ValueError(
                    f"when `importance_getter=='auto'`, the underlying "
                    f"estimator {estimator.__class__.__name__} should have "
                    f"`coef_` or `feature_importances_` attribute. Either "
                    f"pass a fitted estimator to feature selector or call fit "
                    f"before calling transform."
                )
        else:
            getter = attrgetter(getter)
    elif not callable(getter):
        raise ValueError(
            '`importance_getter` has to be a string or `callable`'
        )
    importances = getter(estimator)

    if transform_func is None:
        return importances
    elif transform_func == "norm":
        if importances.ndim == 1:
            importances = np.abs(importances)
        else:
            importances = np.linalg.norm(importances, axis=0,
                                         ord=norm_order)
    elif transform_func == "square":
        if importances.ndim == 1:
            importances = safe_sqr(importances)
        else:
            importances = safe_sqr(importances).sum(axis=0)
    else:
        raise ValueError("Valid values for `transform_func` are " +
                         "None, 'norm' and 'square'. Those two " +
                         "transformation are only supported now")

    return importances
Działa 2021-06-06 22:13:05 +02:00			`# -- coding: utf-8 --`
			`"""Generic feature selection mixin"""`

			`# Authors: G. Varoquaux, A. Gramfort, L. Buitinck, J. Nothman`
			`# License: BSD 3 clause`

			`from abc import ABCMeta, abstractmethod`
			`from warnings import warn`
			`from operator import attrgetter`

			`import numpy as np`
			`from scipy.sparse import issparse, csc_matrix`

			`from ..base import TransformerMixin`
			`from ..utils import (`
			`check_array,`
			`safe_mask,`
			`safe_sqr,`
			`)`
			`from ..utils._tags import _safe_tags`


			`class SelectorMixin(TransformerMixin, metaclass=ABCMeta):`
			`"""`
			`Transformer mixin that performs feature selection given a support mask`

			This mixin provides a feature selector implementation with `transform` and
			`inverse_transform` functionality given an implementation of
			`_get_support_mask`.
			`"""`

			`def get_support(self, indices=False):`
			`"""`
			`Get a mask, or integer index, of the features selected`

			`Parameters`
			`----------`
			`indices : bool, default=False`
			`If True, the return value will be an array of integers, rather`
			`than a boolean mask.`

			`Returns`
			`-------`
			`support : array`
			`An index that selects the retained features from a feature vector.`
			If `indices` is False, this is a boolean array of shape
			`[# input features], in which an element is True iff its`
			corresponding feature is selected for retention. If `indices` is
			`True, this is an integer array of shape [# output features] whose`
			`values are indices into the input feature vector.`
			`"""`
			`mask = self._get_support_mask()`
			`return mask if not indices else np.where(mask)[0]`

			`@abstractmethod`
			`def _get_support_mask(self):`
			`"""`
			`Get the boolean mask indicating which features are selected`

			`Returns`
			`-------`
			`support : boolean array of shape [# input features]`
			`An element is True iff its corresponding feature is selected for`
			`retention.`
			`"""`

			`def transform(self, X):`
			`"""Reduce X to the selected features.`

			`Parameters`
			`----------`
			`X : array of shape [n_samples, n_features]`
			`The input samples.`

			`Returns`
			`-------`
			`X_r : array of shape [n_samples, n_selected_features]`
			`The input samples with only the selected features.`
			`"""`
			`# note: we use _safe_tags instead of _get_tags because this is a`
			`# public Mixin.`
			`X = check_array(`
			`X,`
			`dtype=None,`
			`accept_sparse="csr",`
			`force_all_finite=not _safe_tags(self, key="allow_nan"),`
			`)`
			`mask = self.get_support()`
			`if not mask.any():`
			`warn("No features were selected: either the data is"`
			`" too noisy or the selection test too strict.",`
			`UserWarning)`
			`return np.empty(0).reshape((X.shape[0], 0))`
			`if len(mask) != X.shape[1]:`
			`raise ValueError("X has a different shape than during fitting.")`
			`return X[:, safe_mask(X, mask)]`

			`def inverse_transform(self, X):`
			`"""`
			`Reverse the transformation operation`

			`Parameters`
			`----------`
			`X : array of shape [n_samples, n_selected_features]`
			`The input samples.`

			`Returns`
			`-------`
			`X_r : array of shape [n_samples, n_original_features]`
			`X` with columns of zeros inserted where features would have
			been removed by :meth:`transform`.
			`"""`
			`if issparse(X):`
			`X = X.tocsc()`
			`# insert additional entries in indptr:`
			`# e.g. if transform changed indptr from [0 2 6 7] to [0 2 3]`
			`# col_nonzeros here will be [2 0 1] so indptr becomes [0 2 2 3]`
			`it = self.inverse_transform(np.diff(X.indptr).reshape(1, -1))`
			`col_nonzeros = it.ravel()`
			`indptr = np.concatenate([[0], np.cumsum(col_nonzeros)])`
			`Xt = csc_matrix((X.data, X.indices, indptr),`
			`shape=(X.shape[0], len(indptr) - 1), dtype=X.dtype)`
			`return Xt`

			`support = self.get_support()`
			`X = check_array(X, dtype=None)`
			`if support.sum() != X.shape[1]:`
			`raise ValueError("X has a different shape than during fitting.")`

			`if X.ndim == 1:`
			`X = X[None, :]`
			`Xt = np.zeros((X.shape[0], support.size), dtype=X.dtype)`
			`Xt[:, support] = X`
			`return Xt`


			`def _get_feature_importances(estimator, getter, transform_func=None,`
			`norm_order=1):`
			`"""`
			`Retrieve and aggregate (ndim > 1) the feature importances`
			`from an estimator. Also optionally applies transformation.`

			`Parameters`
			`----------`
			`estimator : estimator`
			`A scikit-learn estimator from which we want to get the feature`
			`importances.`

			`getter : "auto", str or callable`
			An attribute or a callable to get the feature importance. If `"auto"`,
			`estimator` is expected to expose `coef_` or `feature_importances`.

			`transform_func : {"norm", "square"}, default=None`
			The transform to apply to the feature importances. By default (`None`)
			`no transformation is applied.`

			`norm_order : int, default=1`
			The norm order to apply when `transform_func="norm"`. Only applied
			when `importances.ndim > 1`.

			`Returns`
			`-------`
			`importances : ndarray of shape (n_features,)`
			`The features importances, optionally transformed.`
			`"""`
			`if isinstance(getter, str):`
			`if getter == 'auto':`
			`if hasattr(estimator, 'coef_'):`
			`getter = attrgetter('coef_')`
			`elif hasattr(estimator, 'feature_importances_'):`
			`getter = attrgetter('feature_importances_')`
			`else:`
			`raise ValueError(`
			f"when `importance_getter=='auto'`, the underlying "
			`f"estimator {estimator.__class__.__name__} should have "`
			f"`coef_` or `feature_importances_` attribute. Either "
			`f"pass a fitted estimator to feature selector or call fit "`
			`f"before calling transform."`
			`)`
			`else:`
			`getter = attrgetter(getter)`
			`elif not callable(getter):`
			`raise ValueError(`
			'`importance_getter` has to be a string or `callable`'
			`)`
			`importances = getter(estimator)`

			`if transform_func is None:`
			`return importances`
			`elif transform_func == "norm":`
			`if importances.ndim == 1:`
			`importances = np.abs(importances)`
			`else:`
			`importances = np.linalg.norm(importances, axis=0,`
			`ord=norm_order)`
			`elif transform_func == "square":`
			`if importances.ndim == 1:`
			`importances = safe_sqr(importances)`
			`else:`
			`importances = safe_sqr(importances).sum(axis=0)`
			`else:`
			raise ValueError("Valid values for `transform_func` are " +
			`"None, 'norm' and 'square'. Those two " +`
			`"transformation are only supported now")`

			`return importances`