projektAI/venv/Lib/site-packages/mlxtend/evaluate/ttest.py

# Sebastian Raschka 2014-2020
# mlxtend Machine Learning Library Extensions
#
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause

import numpy as np
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import get_scorer


def paired_ttest_resampled(estimator1, estimator2, X, y,
                           num_rounds=30, test_size=0.3,
                           scoring=None,
                           random_seed=None):
    """
    Implements the resampled paired t test procedure
    to compare the performance of two models
    (also called k-hold-out paired t test).

    Parameters
    ----------
    estimator1 : scikit-learn classifier or regressor

    estimator2 : scikit-learn classifier or regressor

    X : {array-like, sparse matrix}, shape = [n_samples, n_features]
        Training vectors, where n_samples is the number of samples and
        n_features is the number of features.

    y : array-like, shape = [n_samples]
        Target values.

    num_rounds : int (default: 30)
        Number of resampling iterations
        (i.e., train/test splits)

    test_size : float or int (default: 0.3)
        If float, should be between 0.0 and 1.0 and
        represent the proportion of the dataset to use
        as a test set.
        If int, represents the absolute number of test exsamples.

    scoring : str, callable, or None (default: None)
        If None (default), uses 'accuracy' for sklearn classifiers
        and 'r2' for sklearn regressors.
        If str, uses a sklearn scoring metric string identifier, for example
        {accuracy, f1, precision, recall, roc_auc} for classifiers,
        {'mean_absolute_error', 'mean_squared_error'/'neg_mean_squared_error',
        'median_absolute_error', 'r2'} for regressors.
        If a callable object or function is provided, it has to be conform with
        sklearn's signature ``scorer(estimator, X, y)``; see
        http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html
        for more information.

    random_seed : int or None (default: None)
        Random seed for creating the test/train splits.

    Returns
    ----------
    t : float
        The t-statistic

    pvalue : float
        Two-tailed p-value.
        If the chosen significance level is larger
        than the p-value, we reject the null hypothesis
        and accept that there are significant differences
        in the two compared models.

    Examples
    -----------
    For usage examples, please see
    http://rasbt.github.io/mlxtend/user_guide/evaluate/paired_ttest_resampled/

    """
    if (not isinstance(test_size, int) and not isinstance(test_size, float)):
        raise ValueError('train_size must be of '
                         'type int or float. Got %s.' % type(test_size))

    rng = np.random.RandomState(random_seed)

    if scoring is None:
        if estimator1._estimator_type == 'classifier':
            scoring = 'accuracy'
        elif estimator1._estimator_type == 'regressor':
            scoring = 'r2'
        else:
            raise AttributeError('Estimator must '
                                 'be a Classifier or Regressor.')
    if isinstance(scoring, str):
        scorer = get_scorer(scoring)
    else:
        scorer = scoring

    score_diff = []
    for i in range(num_rounds):

        randint = rng.randint(low=0, high=32767)

        X_train, X_test, y_train, y_test = \
            train_test_split(X, y, test_size=test_size,
                             random_state=randint)

        estimator1.fit(X_train, y_train)
        estimator2.fit(X_train, y_train)

        est1_score = scorer(estimator1, X_test, y_test)
        est2_score = scorer(estimator2, X_test, y_test)
        score_diff.append(est1_score - est2_score)

    avg_diff = np.mean(score_diff)

    numerator = avg_diff * np.sqrt(num_rounds)
    denominator = np.sqrt(sum([(diff - avg_diff)**2 for diff in score_diff])
                          / (num_rounds - 1))
    t_stat = numerator / denominator

    pvalue = stats.t.sf(np.abs(t_stat), num_rounds - 1)*2.
    return float(t_stat), float(pvalue)


def paired_ttest_kfold_cv(estimator1, estimator2, X, y,
                          cv=10,
                          scoring=None,
                          shuffle=False,
                          random_seed=None):
    """
    Implements the k-fold paired t test procedure
    to compare the performance of two models.

    Parameters
    ----------
    estimator1 : scikit-learn classifier or regressor

    estimator2 : scikit-learn classifier or regressor

    X : {array-like, sparse matrix}, shape = [n_samples, n_features]
        Training vectors, where n_samples is the number of samples and
        n_features is the number of features.

    y : array-like, shape = [n_samples]
        Target values.

    cv : int (default: 10)
        Number of splits and iteration for the
        cross-validation procedure

    scoring : str, callable, or None (default: None)
        If None (default), uses 'accuracy' for sklearn classifiers
        and 'r2' for sklearn regressors.
        If str, uses a sklearn scoring metric string identifier, for example
        {accuracy, f1, precision, recall, roc_auc} for classifiers,
        {'mean_absolute_error', 'mean_squared_error'/'neg_mean_squared_error',
        'median_absolute_error', 'r2'} for regressors.
        If a callable object or function is provided, it has to be conform with
        sklearn's signature ``scorer(estimator, X, y)``; see
        http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html
        for more information.

    shuffle : bool (default: True)
        Whether to shuffle the dataset for generating
        the k-fold splits.

    random_seed : int or None (default: None)
        Random seed for shuffling the dataset
        for generating the k-fold splits.
        Ignored if shuffle=False.

    Returns
    ----------
    t : float
        The t-statistic

    pvalue : float
        Two-tailed p-value.
        If the chosen significance level is larger
        than the p-value, we reject the null hypothesis
        and accept that there are significant differences
        in the two compared models.

    Examples
    -----------
    For usage examples, please see
    http://rasbt.github.io/mlxtend/user_guide/evaluate/paired_ttest_kfold_cv/

    """

    kf = KFold(n_splits=cv, random_state=random_seed, shuffle=shuffle)

    if scoring is None:
        if estimator1._estimator_type == 'classifier':
            scoring = 'accuracy'
        elif estimator1._estimator_type == 'regressor':
            scoring = 'r2'
        else:
            raise AttributeError('Estimator must '
                                 'be a Classifier or Regressor.')
    if isinstance(scoring, str):
        scorer = get_scorer(scoring)
    else:
        scorer = scoring

    score_diff = []

    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        estimator1.fit(X_train, y_train)
        estimator2.fit(X_train, y_train)

        est1_score = scorer(estimator1, X_test, y_test)
        est2_score = scorer(estimator2, X_test, y_test)
        score_diff.append(est1_score - est2_score)

    avg_diff = np.mean(score_diff)

    numerator = avg_diff * np.sqrt(cv)
    denominator = np.sqrt(sum([(diff - avg_diff)**2 for diff in score_diff])
                          / (cv - 1))
    t_stat = numerator / denominator

    pvalue = stats.t.sf(np.abs(t_stat), cv - 1)*2.
    return float(t_stat), float(pvalue)


def paired_ttest_5x2cv(estimator1, estimator2, X, y,
                       scoring=None,
                       random_seed=None):
    """
    Implements the 5x2cv paired t test proposed
    by Dieterrich (1998)
    to compare the performance of two models.

    Parameters
    ----------
    estimator1 : scikit-learn classifier or regressor

    estimator2 : scikit-learn classifier or regressor

    X : {array-like, sparse matrix}, shape = [n_samples, n_features]
        Training vectors, where n_samples is the number of samples and
        n_features is the number of features.

    y : array-like, shape = [n_samples]
        Target values.

    scoring : str, callable, or None (default: None)
        If None (default), uses 'accuracy' for sklearn classifiers
        and 'r2' for sklearn regressors.
        If str, uses a sklearn scoring metric string identifier, for example
        {accuracy, f1, precision, recall, roc_auc} for classifiers,
        {'mean_absolute_error', 'mean_squared_error'/'neg_mean_squared_error',
        'median_absolute_error', 'r2'} for regressors.
        If a callable object or function is provided, it has to be conform with
        sklearn's signature ``scorer(estimator, X, y)``; see
        http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html
        for more information.

    random_seed : int or None (default: None)
        Random seed for creating the test/train splits.

    Returns
    ----------
    t : float
        The t-statistic

    pvalue : float
        Two-tailed p-value.
        If the chosen significance level is larger
        than the p-value, we reject the null hypothesis
        and accept that there are significant differences
        in the two compared models.

    Examples
    -----------
    For usage examples, please see
    http://rasbt.github.io/mlxtend/user_guide/evaluate/paired_ttest_5x2cv/

    """
    rng = np.random.RandomState(random_seed)

    if scoring is None:
        if estimator1._estimator_type == 'classifier':
            scoring = 'accuracy'
        elif estimator1._estimator_type == 'regressor':
            scoring = 'r2'
        else:
            raise AttributeError('Estimator must '
                                 'be a Classifier or Regressor.')
    if isinstance(scoring, str):
        scorer = get_scorer(scoring)
    else:
        scorer = scoring

    variance_sum = 0.
    first_diff = None

    def score_diff(X_1, X_2, y_1, y_2):

        estimator1.fit(X_1, y_1)
        estimator2.fit(X_1, y_1)
        est1_score = scorer(estimator1, X_2, y_2)
        est2_score = scorer(estimator2, X_2, y_2)
        score_diff = est1_score - est2_score
        return score_diff

    for i in range(5):

        randint = rng.randint(low=0, high=32767)
        X_1, X_2, y_1, y_2 = \
            train_test_split(X, y, test_size=0.5,
                             random_state=randint)

        score_diff_1 = score_diff(X_1, X_2, y_1, y_2)
        score_diff_2 = score_diff(X_2, X_1, y_2, y_1)
        score_mean = (score_diff_1 + score_diff_2) / 2.
        score_var = ((score_diff_1 - score_mean)**2 +
                     (score_diff_2 - score_mean)**2)
        variance_sum += score_var
        if first_diff is None:
            first_diff = score_diff_1

    numerator = first_diff
    denominator = np.sqrt(1/5. * variance_sum)
    t_stat = numerator / denominator

    pvalue = stats.t.sf(np.abs(t_stat), 5)*2.
    return float(t_stat), float(pvalue)
Działa 2021-06-06 22:13:05 +02:00			`# Sebastian Raschka 2014-2020`
			`# mlxtend Machine Learning Library Extensions`
			`#`
			`# Author: Sebastian Raschka <sebastianraschka.com>`
			`#`
			`# License: BSD 3 clause`

			`import numpy as np`
			`from scipy import stats`
			`from sklearn.model_selection import train_test_split`
			`from sklearn.model_selection import KFold`
			`from sklearn.metrics import get_scorer`


			`def paired_ttest_resampled(estimator1, estimator2, X, y,`
			`num_rounds=30, test_size=0.3,`
			`scoring=None,`
			`random_seed=None):`
			`"""`
			`Implements the resampled paired t test procedure`
			`to compare the performance of two models`
			`(also called k-hold-out paired t test).`

			`Parameters`
			`----------`
			`estimator1 : scikit-learn classifier or regressor`

			`estimator2 : scikit-learn classifier or regressor`

			`X : {array-like, sparse matrix}, shape = [n_samples, n_features]`
			`Training vectors, where n_samples is the number of samples and`
			`n_features is the number of features.`

			`y : array-like, shape = [n_samples]`
			`Target values.`

			`num_rounds : int (default: 30)`
			`Number of resampling iterations`
			`(i.e., train/test splits)`

			`test_size : float or int (default: 0.3)`
			`If float, should be between 0.0 and 1.0 and`
			`represent the proportion of the dataset to use`
			`as a test set.`
			`If int, represents the absolute number of test exsamples.`

			`scoring : str, callable, or None (default: None)`
			`If None (default), uses 'accuracy' for sklearn classifiers`
			`and 'r2' for sklearn regressors.`
			`If str, uses a sklearn scoring metric string identifier, for example`
			`{accuracy, f1, precision, recall, roc_auc} for classifiers,`
			`{'mean_absolute_error', 'mean_squared_error'/'neg_mean_squared_error',`
			`'median_absolute_error', 'r2'} for regressors.`
			`If a callable object or function is provided, it has to be conform with`
			sklearn's signature ``scorer(estimator, X, y)``; see
			`http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html`
			`for more information.`

			`random_seed : int or None (default: None)`
			`Random seed for creating the test/train splits.`

			`Returns`
			`----------`
			`t : float`
			`The t-statistic`

			`pvalue : float`
			`Two-tailed p-value.`
			`If the chosen significance level is larger`
			`than the p-value, we reject the null hypothesis`
			`and accept that there are significant differences`
			`in the two compared models.`

			`Examples`
			`-----------`
			`For usage examples, please see`
			`http://rasbt.github.io/mlxtend/user_guide/evaluate/paired_ttest_resampled/`

			`"""`
			`if (not isinstance(test_size, int) and not isinstance(test_size, float)):`
			`raise ValueError('train_size must be of '`
			`'type int or float. Got %s.' % type(test_size))`

			`rng = np.random.RandomState(random_seed)`

			`if scoring is None:`
			`if estimator1._estimator_type == 'classifier':`
			`scoring = 'accuracy'`
			`elif estimator1._estimator_type == 'regressor':`
			`scoring = 'r2'`
			`else:`
			`raise AttributeError('Estimator must '`
			`'be a Classifier or Regressor.')`
			`if isinstance(scoring, str):`
			`scorer = get_scorer(scoring)`
			`else:`
			`scorer = scoring`

			`score_diff = []`
			`for i in range(num_rounds):`

			`randint = rng.randint(low=0, high=32767)`

			`X_train, X_test, y_train, y_test = \`
			`train_test_split(X, y, test_size=test_size,`
			`random_state=randint)`

			`estimator1.fit(X_train, y_train)`
			`estimator2.fit(X_train, y_train)`

			`est1_score = scorer(estimator1, X_test, y_test)`
			`est2_score = scorer(estimator2, X_test, y_test)`
			`score_diff.append(est1_score - est2_score)`

			`avg_diff = np.mean(score_diff)`

			`numerator = avg_diff * np.sqrt(num_rounds)`
			`denominator = np.sqrt(sum([(diff - avg_diff)**2 for diff in score_diff])`
			`/ (num_rounds - 1))`
			`t_stat = numerator / denominator`

			`pvalue = stats.t.sf(np.abs(t_stat), num_rounds - 1)*2.`
			`return float(t_stat), float(pvalue)`


			`def paired_ttest_kfold_cv(estimator1, estimator2, X, y,`
			`cv=10,`
			`scoring=None,`
			`shuffle=False,`
			`random_seed=None):`
			`"""`
			`Implements the k-fold paired t test procedure`
			`to compare the performance of two models.`

			`Parameters`
			`----------`
			`estimator1 : scikit-learn classifier or regressor`

			`estimator2 : scikit-learn classifier or regressor`

			`X : {array-like, sparse matrix}, shape = [n_samples, n_features]`
			`Training vectors, where n_samples is the number of samples and`
			`n_features is the number of features.`

			`y : array-like, shape = [n_samples]`
			`Target values.`

			`cv : int (default: 10)`
			`Number of splits and iteration for the`
			`cross-validation procedure`

			`scoring : str, callable, or None (default: None)`
			`If None (default), uses 'accuracy' for sklearn classifiers`
			`and 'r2' for sklearn regressors.`
			`If str, uses a sklearn scoring metric string identifier, for example`
			`{accuracy, f1, precision, recall, roc_auc} for classifiers,`
			`{'mean_absolute_error', 'mean_squared_error'/'neg_mean_squared_error',`
			`'median_absolute_error', 'r2'} for regressors.`
			`If a callable object or function is provided, it has to be conform with`
			sklearn's signature ``scorer(estimator, X, y)``; see
			`http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html`
			`for more information.`

			`shuffle : bool (default: True)`
			`Whether to shuffle the dataset for generating`
			`the k-fold splits.`

			`random_seed : int or None (default: None)`
			`Random seed for shuffling the dataset`
			`for generating the k-fold splits.`
			`Ignored if shuffle=False.`

			`Returns`
			`----------`
			`t : float`
			`The t-statistic`

			`pvalue : float`
			`Two-tailed p-value.`
			`If the chosen significance level is larger`
			`than the p-value, we reject the null hypothesis`
			`and accept that there are significant differences`
			`in the two compared models.`

			`Examples`
			`-----------`
			`For usage examples, please see`
			`http://rasbt.github.io/mlxtend/user_guide/evaluate/paired_ttest_kfold_cv/`

			`"""`

			`kf = KFold(n_splits=cv, random_state=random_seed, shuffle=shuffle)`

			`if scoring is None:`
			`if estimator1._estimator_type == 'classifier':`
			`scoring = 'accuracy'`
			`elif estimator1._estimator_type == 'regressor':`
			`scoring = 'r2'`
			`else:`
			`raise AttributeError('Estimator must '`
			`'be a Classifier or Regressor.')`
			`if isinstance(scoring, str):`
			`scorer = get_scorer(scoring)`
			`else:`
			`scorer = scoring`

			`score_diff = []`

			`for train_index, test_index in kf.split(X):`
			`X_train, X_test = X[train_index], X[test_index]`
			`y_train, y_test = y[train_index], y[test_index]`

			`estimator1.fit(X_train, y_train)`
			`estimator2.fit(X_train, y_train)`

			`est1_score = scorer(estimator1, X_test, y_test)`
			`est2_score = scorer(estimator2, X_test, y_test)`
			`score_diff.append(est1_score - est2_score)`

			`avg_diff = np.mean(score_diff)`

			`numerator = avg_diff * np.sqrt(cv)`
			`denominator = np.sqrt(sum([(diff - avg_diff)**2 for diff in score_diff])`
			`/ (cv - 1))`
			`t_stat = numerator / denominator`

			`pvalue = stats.t.sf(np.abs(t_stat), cv - 1)*2.`
			`return float(t_stat), float(pvalue)`


			`def paired_ttest_5x2cv(estimator1, estimator2, X, y,`
			`scoring=None,`
			`random_seed=None):`
			`"""`
			`Implements the 5x2cv paired t test proposed`
			`by Dieterrich (1998)`
			`to compare the performance of two models.`

			`Parameters`
			`----------`
			`estimator1 : scikit-learn classifier or regressor`

			`estimator2 : scikit-learn classifier or regressor`

			`X : {array-like, sparse matrix}, shape = [n_samples, n_features]`
			`Training vectors, where n_samples is the number of samples and`
			`n_features is the number of features.`

			`y : array-like, shape = [n_samples]`
			`Target values.`

			`scoring : str, callable, or None (default: None)`
			`If None (default), uses 'accuracy' for sklearn classifiers`
			`and 'r2' for sklearn regressors.`
			`If str, uses a sklearn scoring metric string identifier, for example`
			`{accuracy, f1, precision, recall, roc_auc} for classifiers,`
			`{'mean_absolute_error', 'mean_squared_error'/'neg_mean_squared_error',`
			`'median_absolute_error', 'r2'} for regressors.`
			`If a callable object or function is provided, it has to be conform with`
			sklearn's signature ``scorer(estimator, X, y)``; see
			`http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html`
			`for more information.`

			`random_seed : int or None (default: None)`
			`Random seed for creating the test/train splits.`

			`Returns`
			`----------`
			`t : float`
			`The t-statistic`

			`pvalue : float`
			`Two-tailed p-value.`
			`If the chosen significance level is larger`
			`than the p-value, we reject the null hypothesis`
			`and accept that there are significant differences`
			`in the two compared models.`

			`Examples`
			`-----------`
			`For usage examples, please see`
			`http://rasbt.github.io/mlxtend/user_guide/evaluate/paired_ttest_5x2cv/`

			`"""`
			`rng = np.random.RandomState(random_seed)`

			`if scoring is None:`
			`if estimator1._estimator_type == 'classifier':`
			`scoring = 'accuracy'`
			`elif estimator1._estimator_type == 'regressor':`
			`scoring = 'r2'`
			`else:`
			`raise AttributeError('Estimator must '`
			`'be a Classifier or Regressor.')`
			`if isinstance(scoring, str):`
			`scorer = get_scorer(scoring)`
			`else:`
			`scorer = scoring`

			`variance_sum = 0.`
			`first_diff = None`

			`def score_diff(X_1, X_2, y_1, y_2):`

			`estimator1.fit(X_1, y_1)`
			`estimator2.fit(X_1, y_1)`
			`est1_score = scorer(estimator1, X_2, y_2)`
			`est2_score = scorer(estimator2, X_2, y_2)`
			`score_diff = est1_score - est2_score`
			`return score_diff`

			`for i in range(5):`

			`randint = rng.randint(low=0, high=32767)`
			`X_1, X_2, y_1, y_2 = \`
			`train_test_split(X, y, test_size=0.5,`
			`random_state=randint)`

			`score_diff_1 = score_diff(X_1, X_2, y_1, y_2)`
			`score_diff_2 = score_diff(X_2, X_1, y_2, y_1)`
			`score_mean = (score_diff_1 + score_diff_2) / 2.`
			`score_var = ((score_diff_1 - score_mean)**2 +`
			`(score_diff_2 - score_mean)**2)`
			`variance_sum += score_var`
			`if first_diff is None:`
			`first_diff = score_diff_1`

			`numerator = first_diff`
			`denominator = np.sqrt(1/5. * variance_sum)`
			`t_stat = numerator / denominator`

			`pvalue = stats.t.sf(np.abs(t_stat), 5)*2.`
			`return float(t_stat), float(pvalue)`