projektAI/venv/Lib/site-packages/mlxtend/evaluate/feature_importance.py

# Sebastian Raschka 2014-2020
# mlxtend Machine Learning Library Extensions
#
# Feature Importance Estimation Through Permutation
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause

import numpy as np


def feature_importance_permutation(X, y, predict_method,
                                   metric, num_rounds=1, seed=None):
    """Feature importance imputation via permutation importance

    Parameters
    ----------

    X : NumPy array, shape = [n_samples, n_features]
        Dataset, where n_samples is the number of samples and
        n_features is the number of features.

    y : NumPy array, shape = [n_samples]
        Target values.

    predict_method : prediction function
        A callable function that predicts the target values
        from X.

    metric : str, callable
        The metric for evaluating the feature importance through
        permutation. By default, the strings 'accuracy' is
        recommended for classifiers and the string 'r2' is
        recommended for regressors. Optionally, a custom
        scoring function (e.g., `metric=scoring_func`) that
        accepts two arguments, y_true and y_pred, which have
        similar shape to the `y` array.

    num_rounds : int (default=1)
        Number of rounds the feature columns are permuted to
        compute the permutation importance.

    seed : int or None (default=None)
        Random seed for permuting the feature columns.

    Returns
    ---------

    mean_importance_vals, all_importance_vals : NumPy arrays.
      The first array, mean_importance_vals has shape [n_features, ] and
      contains the importance values for all features.
      The shape of the second array is [n_features, num_rounds] and contains
      the feature importance for each repetition. If num_rounds=1,
      it contains the same values as the first array, mean_importance_vals.

    Examples
    -----------
    For usage examples, please see
    http://rasbt.github.io/mlxtend/user_guide/evaluate/feature_importance_permutation/

    """

    if not isinstance(num_rounds, int):
        raise ValueError('num_rounds must be an integer.')
    if num_rounds < 1:
        raise ValueError('num_rounds must be greater than 1.')

    if not (metric in ('r2', 'accuracy') or hasattr(metric, '__call__')):
        raise ValueError('metric must be either "r2", "accuracy", '
                         'or a function with signature func(y_true, y_pred).')

    if metric == 'r2':
        def score_func(y_true, y_pred):
            sum_of_squares = np.sum(np.square(y_true - y_pred))
            res_sum_of_squares = np.sum(np.square(y_true - y_true.mean()))
            r2_score = 1. - (sum_of_squares / res_sum_of_squares)
            return r2_score

    elif metric == 'accuracy':
        def score_func(y_true, y_pred):
            return np.mean(y_true == y_pred)

    else:
        score_func = metric

    rng = np.random.RandomState(seed)

    mean_importance_vals = np.zeros(X.shape[1])
    all_importance_vals = np.zeros((X.shape[1], num_rounds))

    baseline = score_func(y, predict_method(X))

    for round_idx in range(num_rounds):
        for col_idx in range(X.shape[1]):
            save_col = X[:, col_idx].copy()
            rng.shuffle(X[:, col_idx])
            new_score = score_func(y, predict_method(X))
            X[:, col_idx] = save_col
            importance = baseline - new_score
            mean_importance_vals[col_idx] += importance
            all_importance_vals[col_idx, round_idx] = importance
    mean_importance_vals /= num_rounds

    return mean_importance_vals, all_importance_vals
Działa 2021-06-06 22:13:05 +02:00			`# Sebastian Raschka 2014-2020`
			`# mlxtend Machine Learning Library Extensions`
			`#`
			`# Feature Importance Estimation Through Permutation`
			`# Author: Sebastian Raschka <sebastianraschka.com>`
			`#`
			`# License: BSD 3 clause`

			`import numpy as np`


			`def feature_importance_permutation(X, y, predict_method,`
			`metric, num_rounds=1, seed=None):`
			`"""Feature importance imputation via permutation importance`

			`Parameters`
			`----------`

			`X : NumPy array, shape = [n_samples, n_features]`
			`Dataset, where n_samples is the number of samples and`
			`n_features is the number of features.`

			`y : NumPy array, shape = [n_samples]`
			`Target values.`

			`predict_method : prediction function`
			`A callable function that predicts the target values`
			`from X.`

			`metric : str, callable`
			`The metric for evaluating the feature importance through`
			`permutation. By default, the strings 'accuracy' is`
			`recommended for classifiers and the string 'r2' is`
			`recommended for regressors. Optionally, a custom`
			scoring function (e.g., `metric=scoring_func`) that
			`accepts two arguments, y_true and y_pred, which have`
			similar shape to the `y` array.

			`num_rounds : int (default=1)`
			`Number of rounds the feature columns are permuted to`
			`compute the permutation importance.`

			`seed : int or None (default=None)`
			`Random seed for permuting the feature columns.`

			`Returns`
			`---------`

			`mean_importance_vals, all_importance_vals : NumPy arrays.`
			`The first array, mean_importance_vals has shape [n_features, ] and`
			`contains the importance values for all features.`
			`The shape of the second array is [n_features, num_rounds] and contains`
			`the feature importance for each repetition. If num_rounds=1,`
			`it contains the same values as the first array, mean_importance_vals.`

			`Examples`
			`-----------`
			`For usage examples, please see`
			`http://rasbt.github.io/mlxtend/user_guide/evaluate/feature_importance_permutation/`

			`"""`

			`if not isinstance(num_rounds, int):`
			`raise ValueError('num_rounds must be an integer.')`
			`if num_rounds < 1:`
			`raise ValueError('num_rounds must be greater than 1.')`

			`if not (metric in ('r2', 'accuracy') or hasattr(metric, '__call__')):`
			`raise ValueError('metric must be either "r2", "accuracy", '`
			`'or a function with signature func(y_true, y_pred).')`

			`if metric == 'r2':`
			`def score_func(y_true, y_pred):`
			`sum_of_squares = np.sum(np.square(y_true - y_pred))`
			`res_sum_of_squares = np.sum(np.square(y_true - y_true.mean()))`
			`r2_score = 1. - (sum_of_squares / res_sum_of_squares)`
			`return r2_score`

			`elif metric == 'accuracy':`
			`def score_func(y_true, y_pred):`
			`return np.mean(y_true == y_pred)`

			`else:`
			`score_func = metric`

			`rng = np.random.RandomState(seed)`

			`mean_importance_vals = np.zeros(X.shape[1])`
			`all_importance_vals = np.zeros((X.shape[1], num_rounds))`

			`baseline = score_func(y, predict_method(X))`

			`for round_idx in range(num_rounds):`
			`for col_idx in range(X.shape[1]):`
			`save_col = X[:, col_idx].copy()`
			`rng.shuffle(X[:, col_idx])`
			`new_score = score_func(y, predict_method(X))`
			`X[:, col_idx] = save_col`
			`importance = baseline - new_score`
			`mean_importance_vals[col_idx] += importance`
			`all_importance_vals[col_idx, round_idx] = importance`
			`mean_importance_vals /= num_rounds`

			`return mean_importance_vals, all_importance_vals`