projektAI/venv/Lib/site-packages/mlxtend/evaluate/mcnemar.py

# Sebastian Raschka 2014-2020
# mlxtend Machine Learning Library Extensions
#
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause

import numpy as np
import scipy.stats
from itertools import combinations


def mcnemar_table(y_target, y_model1, y_model2):
    """
    Compute a 2x2 contigency table for McNemar's test.

    Parameters
    -----------
    y_target : array-like, shape=[n_samples]
        True class labels as 1D NumPy array.
    y_model1 : array-like, shape=[n_samples]
        Predicted class labels from model as 1D NumPy array.
    y_model2 : array-like, shape=[n_samples]
        Predicted class labels from model 2 as 1D NumPy array.

    Returns
    ----------
    tb : array-like, shape=[2, 2]
       2x2 contingency table with the following contents:
       a: tb[0, 0]: # of samples that both models predicted correctly
       b: tb[0, 1]: # of samples that model 1 got right and model 2 got wrong
       c: tb[1, 0]: # of samples that model 2 got right and model 1 got wrong
       d: tb[1, 1]: # of samples that both models predicted incorrectly

    Examples
    -----------
    For usage examples, please see
    http://rasbt.github.io/mlxtend/user_guide/evaluate/mcnemar_table/

    """
    for ary in (y_target, y_model1, y_model2):
        if len(ary.shape) != 1:
            raise ValueError('One or more input arrays are not 1-dimensional.')

    if y_target.shape[0] != y_model1.shape[0]:
        raise ValueError('y_target and y_model1 contain a different number'
                         ' of elements.')

    if y_target.shape[0] != y_model2.shape[0]:
        raise ValueError('y_target and y_model2 contain a different number'
                         ' of elements.')

    m1_vs_true = (y_target == y_model1).astype(int)
    m2_vs_true = (y_target == y_model2).astype(int)

    plus_true = m1_vs_true + m2_vs_true
    minus_true = m1_vs_true - m2_vs_true

    tb = np.zeros((2, 2), dtype=int)

    tb[0, 0] = np.sum(plus_true == 2)
    tb[0, 1] = np.sum(minus_true == 1)
    tb[1, 0] = np.sum(minus_true == -1)
    tb[1, 1] = np.sum(plus_true == 0)

    return tb


def mcnemar_tables(y_target, *y_model_predictions):
    """
    Compute multiple 2x2 contigency tables for McNemar's
    test or Cochran's Q test.

    Parameters
    -----------
    y_target : array-like, shape=[n_samples]
        True class labels as 1D NumPy array.

    y_model_predictions : array-like, shape=[n_samples]
        Predicted class labels for a model.

    Returns
    ----------

    tables : dict
        Dictionary of NumPy arrays with shape=[2, 2]. Each dictionary
        key names the two models to be compared based on the order the
        models were passed as `*y_model_predictions`. The number of
        dictionary entries is equal to the number of pairwise combinations
        between the m models, i.e., "m choose 2."

        For example the following target array (containing the true labels)
        and 3 models

        - y_true = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
        - y_mod0 = np.array([0, 1, 0, 0, 0, 1, 1, 0, 0, 0])
        - y_mod0 = np.array([0, 0, 1, 1, 0, 1, 1, 0, 0, 0])
        - y_mod0 = np.array([0, 1, 1, 1, 0, 1, 0, 0, 0, 0])

        would result in the following dictionary:


        {'model_0 vs model_1': array([[ 4.,  1.],
                                      [ 2.,  3.]]),
         'model_0 vs model_2': array([[ 3.,  0.],
                                      [ 3.,  4.]]),
         'model_1 vs model_2': array([[ 3.,  0.],
                                      [ 2.,  5.]])}

        Each array is structured in the following way:

        - tb[0, 0]: # of samples that both models predicted correctly
        - tb[0, 1]: # of samples that model a got right and model b got wrong
        - tb[1, 0]: # of samples that model b got right and model a got wrong
        - tb[1, 1]: # of samples that both models predicted incorrectly

    Examples
    -----------

    For usage examples, please see
    http://rasbt.github.io/mlxtend/user_guide/evaluate/mcnemar_tables/

    """
    model_lens = set()
    y_model_predictions = list(y_model_predictions)
    for ary in ([y_target] + y_model_predictions):
        if len(ary.shape) != 1:
            raise ValueError('One or more input arrays are not 1-dimensional.')
        model_lens.add(ary.shape[0])

    if len(model_lens) > 1:
        raise ValueError('Each prediction array must have the '
                         'same number of samples.')

    num_models = len(y_model_predictions)

    if num_models < 2:
        raise ValueError('Provide at least 2 model prediction arrays.')

    tables = {}

    for comb in combinations(range(num_models), 2):

        tb = np.zeros((2, 2))
        model1_vs_true = (y_target == y_model_predictions[comb[0]]).astype(int)
        model2_vs_true = (y_target == y_model_predictions[comb[1]]).astype(int)
        plus_true = model1_vs_true + model2_vs_true
        minus_true = model1_vs_true - model2_vs_true

        tb[0, 0] = np.sum(plus_true == 2)
        tb[0, 1] = np.sum(minus_true == 1)
        tb[1, 0] = np.sum(minus_true == -1)
        tb[1, 1] = np.sum(plus_true == 0)

        name_str = 'model_%s vs model_%s' % (comb[0], comb[1])
        tables[name_str] = tb

    return tables


def mcnemar(ary, corrected=True, exact=False):
    """
    McNemar test for paired nominal data

    Parameters
    -----------
    ary : array-like, shape=[2, 2]
        2 x 2 contigency table (as returned by evaluate.mcnemar_table),
        where
        a: ary[0, 0]: # of samples that both models predicted correctly
        b: ary[0, 1]: # of samples that model 1 got right and model 2 got wrong
        c: ary[1, 0]: # of samples that model 2 got right and model 1 got wrong
        d: aryCell [1, 1]: # of samples that both models predicted incorrectly
    corrected : array-like, shape=[n_samples] (default: True)
        Uses Edward's continuity correction for chi-squared if `True`
    exact : bool, (default: False)
        If `True`, uses an exact binomial test comparing b to
        a binomial distribution with n = b + c and p = 0.5.
        It is highly recommended to use `exact=True` for sample sizes < 25
        since chi-squared is not well-approximated
        by the chi-squared distribution!

    Returns
    -----------
    chi2, p : float or None, float
        Returns the chi-squared value and the p-value;
        if `exact=True` (default: `False`), `chi2` is `None`

    Examples
    -----------

    For usage examples, please see
    http://rasbt.github.io/mlxtend/user_guide/evaluate/mcnemar/

    """

    if not ary.shape == (2, 2):
        raise ValueError('Input array must be a 2x2 array.')

    b = ary[0, 1]
    c = ary[1, 0]
    n = b + c

    if not exact:
        if corrected:
            chi2 = (abs(ary[0, 1] - ary[1, 0]) - 1.0)**2 / float(n)
        else:
            chi2 = (ary[0, 1] - ary[1, 0])**2 / float(n)
        p = scipy.stats.distributions.chi2.sf(chi2, 1)

    else:
        chi2 = min(b, c)
        p = min(scipy.stats.binom.cdf(chi2, b + c, .5) * 2., 1.)

        # this is equivalent to the following code:
        #
        #    p = 0
        #    for i in range(max(b, c), b+c+1):
        #        p += scipy.special.binom(b+c, i) * 0.5**i * (1-0.5)**((b+c)-i)
        #    p = 2*p

    return chi2, p
Działa 2021-06-06 22:13:05 +02:00			`# Sebastian Raschka 2014-2020`
			`# mlxtend Machine Learning Library Extensions`
			`#`
			`# Author: Sebastian Raschka <sebastianraschka.com>`
			`#`
			`# License: BSD 3 clause`

			`import numpy as np`
			`import scipy.stats`
			`from itertools import combinations`


			`def mcnemar_table(y_target, y_model1, y_model2):`
			`"""`
			`Compute a 2x2 contigency table for McNemar's test.`

			`Parameters`
			`-----------`
			`y_target : array-like, shape=[n_samples]`
			`True class labels as 1D NumPy array.`
			`y_model1 : array-like, shape=[n_samples]`
			`Predicted class labels from model as 1D NumPy array.`
			`y_model2 : array-like, shape=[n_samples]`
			`Predicted class labels from model 2 as 1D NumPy array.`

			`Returns`
			`----------`
			`tb : array-like, shape=[2, 2]`
			`2x2 contingency table with the following contents:`
			`a: tb[0, 0]: # of samples that both models predicted correctly`
			`b: tb[0, 1]: # of samples that model 1 got right and model 2 got wrong`
			`c: tb[1, 0]: # of samples that model 2 got right and model 1 got wrong`
			`d: tb[1, 1]: # of samples that both models predicted incorrectly`

			`Examples`
			`-----------`
			`For usage examples, please see`
			`http://rasbt.github.io/mlxtend/user_guide/evaluate/mcnemar_table/`

			`"""`
			`for ary in (y_target, y_model1, y_model2):`
			`if len(ary.shape) != 1:`
			`raise ValueError('One or more input arrays are not 1-dimensional.')`

			`if y_target.shape[0] != y_model1.shape[0]:`
			`raise ValueError('y_target and y_model1 contain a different number'`
			`' of elements.')`

			`if y_target.shape[0] != y_model2.shape[0]:`
			`raise ValueError('y_target and y_model2 contain a different number'`
			`' of elements.')`

			`m1_vs_true = (y_target == y_model1).astype(int)`
			`m2_vs_true = (y_target == y_model2).astype(int)`

			`plus_true = m1_vs_true + m2_vs_true`
			`minus_true = m1_vs_true - m2_vs_true`

			`tb = np.zeros((2, 2), dtype=int)`

			`tb[0, 0] = np.sum(plus_true == 2)`
			`tb[0, 1] = np.sum(minus_true == 1)`
			`tb[1, 0] = np.sum(minus_true == -1)`
			`tb[1, 1] = np.sum(plus_true == 0)`

			`return tb`


			`def mcnemar_tables(y_target, *y_model_predictions):`
			`"""`
			`Compute multiple 2x2 contigency tables for McNemar's`
			`test or Cochran's Q test.`

			`Parameters`
			`-----------`
			`y_target : array-like, shape=[n_samples]`
			`True class labels as 1D NumPy array.`

			`y_model_predictions : array-like, shape=[n_samples]`
			`Predicted class labels for a model.`

			`Returns`
			`----------`

			`tables : dict`
			`Dictionary of NumPy arrays with shape=[2, 2]. Each dictionary`
			`key names the two models to be compared based on the order the`
			models were passed as `*y_model_predictions`. The number of
			`dictionary entries is equal to the number of pairwise combinations`
			`between the m models, i.e., "m choose 2."`

			`For example the following target array (containing the true labels)`
			`and 3 models`

			`- y_true = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])`
			`- y_mod0 = np.array([0, 1, 0, 0, 0, 1, 1, 0, 0, 0])`
			`- y_mod0 = np.array([0, 0, 1, 1, 0, 1, 1, 0, 0, 0])`
			`- y_mod0 = np.array([0, 1, 1, 1, 0, 1, 0, 0, 0, 0])`

			`would result in the following dictionary:`


			`{'model_0 vs model_1': array([[ 4., 1.],`
			`[ 2., 3.]]),`
			`'model_0 vs model_2': array([[ 3., 0.],`
			`[ 3., 4.]]),`
			`'model_1 vs model_2': array([[ 3., 0.],`
			`[ 2., 5.]])}`

			`Each array is structured in the following way:`

			`- tb[0, 0]: # of samples that both models predicted correctly`
			`- tb[0, 1]: # of samples that model a got right and model b got wrong`
			`- tb[1, 0]: # of samples that model b got right and model a got wrong`
			`- tb[1, 1]: # of samples that both models predicted incorrectly`

			`Examples`
			`-----------`

			`For usage examples, please see`
			`http://rasbt.github.io/mlxtend/user_guide/evaluate/mcnemar_tables/`

			`"""`
			`model_lens = set()`
			`y_model_predictions = list(y_model_predictions)`
			`for ary in ([y_target] + y_model_predictions):`
			`if len(ary.shape) != 1:`
			`raise ValueError('One or more input arrays are not 1-dimensional.')`
			`model_lens.add(ary.shape[0])`

			`if len(model_lens) > 1:`
			`raise ValueError('Each prediction array must have the '`
			`'same number of samples.')`

			`num_models = len(y_model_predictions)`

			`if num_models < 2:`
			`raise ValueError('Provide at least 2 model prediction arrays.')`

			`tables = {}`

			`for comb in combinations(range(num_models), 2):`

			`tb = np.zeros((2, 2))`
			`model1_vs_true = (y_target == y_model_predictions[comb[0]]).astype(int)`
			`model2_vs_true = (y_target == y_model_predictions[comb[1]]).astype(int)`
			`plus_true = model1_vs_true + model2_vs_true`
			`minus_true = model1_vs_true - model2_vs_true`

			`tb[0, 0] = np.sum(plus_true == 2)`
			`tb[0, 1] = np.sum(minus_true == 1)`
			`tb[1, 0] = np.sum(minus_true == -1)`
			`tb[1, 1] = np.sum(plus_true == 0)`

			`name_str = 'model_%s vs model_%s' % (comb[0], comb[1])`
			`tables[name_str] = tb`

			`return tables`


			`def mcnemar(ary, corrected=True, exact=False):`
			`"""`
			`McNemar test for paired nominal data`

			`Parameters`
			`-----------`
			`ary : array-like, shape=[2, 2]`
			`2 x 2 contigency table (as returned by evaluate.mcnemar_table),`
			`where`
			`a: ary[0, 0]: # of samples that both models predicted correctly`
			`b: ary[0, 1]: # of samples that model 1 got right and model 2 got wrong`
			`c: ary[1, 0]: # of samples that model 2 got right and model 1 got wrong`
			`d: aryCell [1, 1]: # of samples that both models predicted incorrectly`
			`corrected : array-like, shape=[n_samples] (default: True)`
			Uses Edward's continuity correction for chi-squared if `True`
			`exact : bool, (default: False)`
			If `True`, uses an exact binomial test comparing b to
			`a binomial distribution with n = b + c and p = 0.5.`
			It is highly recommended to use `exact=True` for sample sizes < 25
			`since chi-squared is not well-approximated`
			`by the chi-squared distribution!`

			`Returns`
			`-----------`
			`chi2, p : float or None, float`
			`Returns the chi-squared value and the p-value;`
			if `exact=True` (default: `False`), `chi2` is `None`

			`Examples`
			`-----------`

			`For usage examples, please see`
			`http://rasbt.github.io/mlxtend/user_guide/evaluate/mcnemar/`

			`"""`

			`if not ary.shape == (2, 2):`
			`raise ValueError('Input array must be a 2x2 array.')`

			`b = ary[0, 1]`
			`c = ary[1, 0]`
			`n = b + c`

			`if not exact:`
			`if corrected:`
			`chi2 = (abs(ary[0, 1] - ary[1, 0]) - 1.0)**2 / float(n)`
			`else:`
			`chi2 = (ary[0, 1] - ary[1, 0])**2 / float(n)`
			`p = scipy.stats.distributions.chi2.sf(chi2, 1)`

			`else:`
			`chi2 = min(b, c)`
			`p = min(scipy.stats.binom.cdf(chi2, b + c, .5) * 2., 1.)`

			`# this is equivalent to the following code:`
			`#`
			`# p = 0`
			`# for i in range(max(b, c), b+c+1):`
			`# p += scipy.special.binom(b+c, i) * 0.5*i (1-0.5)**((b+c)-i)`
			`# p = 2*p`

			`return chi2, p`